messing with text files

cat e* >e.txt; # concatenate = combine all files starting with e... into one file e.txt

cat -n e.txt; # output to screen, number lines
1999  find: ‘/etc/sudoers.d’: Keine Berechtigung
2000  find: ‘/etc/lvm/archive’: Keine Berechtigung
2001  find: ‘/etc/lvm/backup’: Keine Berechtigung
2002  find: ‘/etc/lvm/cache’: Keine Berechtigung
2003  find: ‘/etc/audisp’: Keine Berechtigung
2004  find: ‘/etc/audit’: Keine Berechtigung
2005  find: ‘/etc/polkit-1/rules.d’: Keine Berechtigung
2006
2007  find: ‘/etc/autoinstall’: Keine Berechtigung
...

tac e.txt; # output file in reverse order

head -n3 file; # output first 3 lines of file

head -c128 /bin/cat; # output first 20 bytes of file /bin/cat
reset; # reset console

head -n -3 file; # output anything but the last 3 lines of file

tail -n3 file; # output only last 3 lines of file

tail -f file; # follow changes of file (nice for log file real time view)

od /etc/passwd|head -n3; # output in octal format first 3 lines of /etc/passwd
0000000 072141 074072 031072 035065 032462 041072 072141 064143
0000020 065040 061157 020163 060544 066545 067157 027472 060566
0000040 027562 070163 067557 027554 072141 067552 071542 027472

od -txC /etc/passwd|head -n3; # output in hex format first 3 lines of /etc/passwd
0000000 61 74 3a 78 3a 32 35 3a 32 35 3a 42 61 74 63 68
0000020 20 6a 6f 62 73 20 64 61 65 6d 6f 6e 3a 2f 76 61
0000040 72 2f 73 70 6f 6f 6c 2f 61 74 6a 6f 62 73 3a 2f

od -txCz /etc/passwd|head -n3; # output in hex format but also in ASCII the first 3 lines of /etc/passwd
0000000 61 74 3a 78 3a 32 35 3a 32 35 3a 42 61 74 63 68  >at:x:25:25:Batch<
0000020 20 6a 6f 62 73 20 64 61 65 6d 6f 6e 3a 2f 76 61  > jobs daemon:/va<
0000040 72 2f 73 70 6f 6f 6c 2f 61 74 6a 6f 62 73 3a 2f  >r/spool/atjobs:/<
\_offset \_ 20(HEX) = 32(DEC) = " " (SPACE)

hexdump -o -s 446 -n 64 /etc/passwd; # similar to od but flexibler in output
00001be  061057  067151  063057  066141  062563  066412  067141  074072
00001ce  030472  035063  031066  046472  067141  060565  020154  060560
00001de  062547  020163  064566  073545  071145  027472  060566  027562
00001ee  060543  064143  027545  060555  035156  061057  067151  061057
00001fe

vim tab.txt; # create new file fill with content
Fritz   ist     heute   anscheinend     sehr
müde    deswegen        wird    er      nicht
mehr    lange   wach    bleiben

od -txCz tab.txt
0000000 46 72 69 74 7a 09 69 73 74 09 68 65 75 74 65 09  >Fritz.ist.heute.<
0000020 61 6e 73 63 68 65 69 6e 65 6e 64 09 73 65 68 72  >anscheinend.sehr<
0000040 0a 6d c3 bc 64 65 09 64 65 73 77 65 67 65 6e 09  >.m..de.deswegen.<
0000060 77 69 72 64 09 65 72 09 6e 69 63 68 74 0a 6d 65  >wird.er.nicht.me<
0000100 68 72 09 6c 61 6e 67 65 09 77 61 63 68 09 62 6c  >hr.lange.wach.bl<
0000120 65 69 62 65 6e 0a                                >eiben.<
0000126

expand tab.txt |od -txCz; # convert tabs to spaces
0000000 46 72 69 74 7a 20 20 20 69 73 74 20 20 20 20 20  >Fritz   ist     <
0000020 68 65 75 74 65 20 20 20 61 6e 73 63 68 65 69 6e  >heute   anschein<
0000040 65 6e 64 20 20 20 20 20 73 65 68 72 0a 6d c3 bc  >end     sehr.m..<
0000060 64 65 20 20 20 20 64 65 73 77 65 67 65 6e 20 20  >de    deswegen  <
0000100 20 20 20 20 20 20 77 69 72 64 20 20 20 20 65 72  >      wird    er<
0000120 20 20 20 20 20 20 6e 69 63 68 74 0a 6d 65 68 72  >      nicht.mehr<
0000140 20 20 20 20 6c 61 6e 67 65 20 20 20 77 61 63 68  >    lange   wach<
0000160 20 20 20 20 62 6c 65 69 62 65 6e 0a              >    bleiben.<
0000174

cat tab.txt |od -txCz
0000000 46 72 69 74 7a 09 69 73 74 09 68 65 75 74 65 09  >Fritz.ist.heute.<
0000020 61 6e 73 63 68 65 69 6e 65 6e 64 09 73 65 68 72  >anscheinend.sehr<
0000040 0a 6d c3 bc 64 65 09 64 65 73 77 65 67 65 6e 09  >.m..de.deswegen.<
0000060 77 69 72 64 09 65 72 09 6e 69 63 68 74 0a 6d 65  >wird.er.nicht.me<
0000100 68 72 09 6c 61 6e 67 65 09 77 61 63 68 09 62 6c  >hr.lange.wach.bl<
0000120 65 69 62 65 6e 0a                                >eiben.<
0000126

expand -t 18 tab.txt; # convert tabs to spaces in this case 18 spaces
Fritz             ist               heute             anscheinend       sehr
müde              deswegen          wird              er                nicht
mehr              lange             wach              bleiben

expand -t 18 tab.txt >x.tab;

unexpand -a x.tab |od -txCz; # convert spaces to tabs
0000000 46 72 69 74 7a 09 09 20 20 69 73 74 09 09 20 20  >Fritz..  ist..  <
0000020 20 20 68 65 75 74 65 09 20 20 20 20 20 20 61 6e  >  heute.      an<
0000040 73 63 68 65 69 6e 65 6e 64 09 73 65 68 72 0a 6d  >scheinend.sehr.m<
0000060 c3 bc 64 65 09 09 20 20 64 65 73 77 65 67 65 6e  >..de..  deswegen<
0000100 09 20 20 20 20 77 69 72 64 09 20 20 20 20 20 20  >.    wird.      <
0000120 65 72 09 09 6e 69 63 68 74 0a 6d 65 68 72 09 09  >er..nicht.mehr..<
0000140 20 20 6c 61 6e 67 65 09 09 20 20 20 20 77 61 63  >  lange..    wac<
0000160 68 09 20 20 20 20 20 20 62 6c 65 69 62 65 6e 0a  >h.      bleiben.<
0000200

tr t T <inhalt; # replace all small t with capital T

tr a-z A-Z <inhalt | grep --color T; # replace all small letters with CAPITAL LETTERS (A-Z)
INSGESAMT 200
-RW-R--R-- 1 USER USERS   150 26. APR 09:59 ARTIKEL.TXT
DRWXR-XR-X 2 USER USERS     6 24. APR 13:45 DOKUMENTE
-RW-R--R-- 1 USER USERS    33 26. APR 16:34 EDITOR_FOR_POOR.TXT
-RW-R--R-- 1 USER USERS 61288 27. APR 09:59 E.TXT
-RW-R--R-- 1 USER USERS     0 27. APR 10:02 INHALT
DRWXR-XR-X 2 USER USERS     6 24. APR 13:45 ÖFFENTLICH
DRWXR-XR-X 2 USER USERS    24 24. APR 13:01 PUBLIC_HTML
-RW-R--R-- 1 USER USERS 61254 26. APR 16:56 RESULT
...

tr -d aeoui <inhalt; # remove all vocals
nsgsmt 200
-rw-r--r-- 1 sr srs   150 26. Apr 09:59 rtkl.txt
drwxr-xr-x 2 sr srs     6 24. Apr 13:45 Bldr
drwxr-xr-x 2 sr srs     6 24. Apr 13:01 bn
drwxr-xr-x 2 sr srs     6 24. Apr 13:45 Dkmnt
drwxr-xr-x 2 sr srs     6 24. Apr 13:45 Dwnlds
...

tr -s Ö oe <artikel.txt; # replace char Ö with oe
oesterreich

vim LoreIpsum.txt; # prepare file fill with
Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua.

At vero eos et accusam et justo duo dolores et ea rebum.

Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.

Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua.

At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.

fmt -c -w 40 LoreIpsum.txt; # simple optimal text formatter
Lorem ipsum dolor sit amet, consetetur
sadipscing elitr, sed diam nonumy
eirmod tempor invidunt ut labore et
dolore magna aliquyam erat, sed diam
voluptua.

At vero eos et accusam et justo duo
dolores et ea rebum.
Stet clita kasd gubergren, no sea
takimata sanctus est Lorem ipsum dolor
sit amet.

Lorem ipsum dolor sit amet, consetetur
sadipscing elitr, sed diam nonumy
eirmod tempor invidunt ut labore et
dolore magna aliquyam erat, sed diam
voluptua.

At vero eos et accusam et justo duo
dolores et ea rebum. Stet clita kasd
gubergren, no sea takimata sanctus est
Lorem ipsum dolor sit amet.

fmt -c -w 34 LoreIpsum.txt |pr -h "Lore Ipsum" -2 |grep -v '^$'; # pre format for print - add header via pr -h "header and grep removes all empty lines

2017-04-27 10:53                   Lore Ipsum                    Seite 1
Lorem ipsum dolor sit amet,
consetetur sadipscing elitr,        Lorem ipsum dolor sit amet,
sed diam nonumy eirmod tempor       consetetur sadipscing elitr,
invidunt ut labore et dolore        sed diam nonumy eirmod tempor
magna aliquyam erat, sed diam       invidunt ut labore et dolore
voluptua.                           magna aliquyam erat, sed diam
                                    voluptua.
At vero eos et accusam et justo
duo dolores et ea rebum.            At vero eos et accusam et justo
                                    duo dolores et ea rebum. Stet
Stet clita kasd gubergren, no       clita kasd gubergren, no sea
sea takimata sanctus est Lorem      takimata sanctus est Lorem ipsum
ipsum dolor sit amet.               dolor sit amet.

nl LoreIpsum.txt; # number lines
     1  Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua.
     2  At vero eos et accusam et justo duo dolores et ea rebum.
     3  Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
     4  Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua.
     5  At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.

nl -b a -n rz -w 5 -v 1000 -i 10 LoreIpsum.txt; # number lines
# -b a = number all lines (also the empty ones)
# -n rz = align-right with leading zeros
# -w 5 = 5 digits
# -v 1000 = start couting lines with 1000
# -i 10 = every line counts as 10
01000   Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua.
01010
01020   At vero eos et accusam et justo duo dolores et ea rebum.
01030
01040   Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.
01050
01060   Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua.
01070
01080   At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.

nl -b a -n rz -w 5 -v 1000 -i 10 inhalt; # again same
01000   insgesamt 200
01010   -rw-r--r-- 1 user users   150 26. Apr 09:59 artikel.txt
01020   drwxr-xr-x 2 user users     6 24. Apr 13:45 Bilder
01030   drwxr-xr-x 2 user users     6 24. Apr 13:01 bin
01040   drwxr-xr-x 2 user users     6 24. Apr 13:45 Dokumente
01050   drwxr-xr-x 2 user users     6 24. Apr 13:45 Downloads
...

wc LoreIpsum.txt; # word count but also lines and chars
9 100 596 LoreIpsum.txt
# 9 lines
# 100 words
# 596 chars

wc -l LoreIpsum.txt; # count only lines
9 LoreIpsum.txt

ls -l /usr/bin/ |wc -l; # roughly count amount of programms under /usr/bin
2139

vim artikel.txt; # prepare file fill with
cat artikel.txt |sort
Affenschwanzbaum
Affenbrotbaum
affenbrotbaum
Affenkletterbaum
Heute ist Dienstag
Das wichtigste ist Heute
Paul
Peter
Maier
Meier
Das ist mir wichtig
Österreich ist schön

sort artikel.txt; # sort alphabetically
affenbrotbaum
Affenbrotbaum
Affenkletterbaum
Affenschwanzbaum
Das ist mir wichtig
Das wichtigste ist Heute
Heute ist Dienstag
Maier
Meier
Österreich ist schön
Paul
Peter

suse:/home/user # sort artikel.txt; # it could be possible that sort behaves differently depending on locale settings, no difference here, special chars might be at the very end of ASCII
affenbrotbaum
Affenbrotbaum
Affenkletterbaum
Affenschwanzbaum
Das ist mir wichtig
Das wichtigste ist Heute
Heute ist Dienstag
Maier
Meier
Österreich ist schön
Paul
Peter

suse:/home/user # locale
LANG=de_DE.UTF-8
LC_CTYPE="de_DE.UTF-8"
LC_NUMERIC="de_DE.UTF-8"
LC_TIME="de_DE.UTF-8"
LC_COLLATE="de_DE.UTF-8"
LC_MONETARY="de_DE.UTF-8"
LC_MESSAGES="de_DE.UTF-8"
LC_PAPER="de_DE.UTF-8"
LC_NAME="de_DE.UTF-8"
LC_ADDRESS="de_DE.UTF-8"
LC_TELEPHONE="de_DE.UTF-8"
LC_MEASUREMENT="de_DE.UTF-8"
LC_IDENTIFICATION="de_DE.UTF-8"
LC_ALL=

user@debian:~$ sort artikel.txt
affenbrotbaum
Affenbrotbaum
Affenkletterbaum
Affenschwanzbaum
Das ist mir wichtig
Das wichtigste ist Heute
Heute ist Dienstag
Maier
Meier
Österreich ist schön
Paul
Peter

user@debian:~$ locale
LANG=en_US.UTF-8
LANGUAGE=en_US:en
LC_CTYPE="en_US.UTF-8"
LC_NUMERIC="en_US.UTF-8"
LC_TIME="en_US.UTF-8"
LC_COLLATE="en_US.UTF-8"
LC_MONETARY="en_US.UTF-8"
LC_MESSAGES="en_US.UTF-8"
LC_PAPER="en_US.UTF-8"
LC_NAME="en_US.UTF-8"
LC_ADDRESS="en_US.UTF-8"
LC_TELEPHONE="en_US.UTF-8"
LC_MEASUREMENT="en_US.UTF-8"
LC_IDENTIFICATION="en_US.UTF-8"
LC_ALL=

ls -l|sort -k 9,9; # sort only the the 9th row
insgesamt 216
-rw-r--r-- 1 user users   172 27. Apr 11:13 artikel.txt
drwxr-xr-x 2 user users     6 24. Apr 13:45 Bilder
drwxr-xr-x 2 user users     6 24. Apr 13:01 bin
drwxr-xr-x 2 user users     6 24. Apr 13:45 Dokumente
drwxr-xr-x 2 user users     6 24. Apr 13:45 Downloads
-rw-r--r-- 1 user users    33 26. Apr 16:34 editor_for_poor.txt
...

 ls -l|sort -k 9,9 -r; # sort only the the 9th row, reverse output
-rw-r--r-- 1 user users   218 27. Apr 10:41 x.tab
drwxr-xr-x 2 user users     6 24. Apr 13:45 Vorlagen
drwxr-xr-x 2 user users     6 24. Apr 13:45 Videos
-rw-r--r-- 1 user users   394 26. Apr 17:16 userliste
drwxr-xr-x 2 user users    25 26. Apr 14:28 test
...

ls -l|sort -k 5,5 -n; # sort 5th row only -numeric
insgesamt 216
drwxr-xr-x 2 user users     6 24. Apr 13:01 bin
drwxr-xr-x 2 user users     6 24. Apr 13:45 Bilder
drwxr-xr-x 2 user users     6 24. Apr 13:45 Dokumente
drwxr-xr-x 2 user users    24 24. Apr 13:01 public_html
drwxr-xr-x 2 user users    25 26. Apr 14:28 test
drwxr-xr-x 2 user users    32 26. Apr 12:55 scripts
-rw-r--r-- 1 user users    33 26. Apr 16:34 editor_for_poor.txt
-rw-r--r-- 1 user users    83 26. Apr 16:13 termine
-rw-r--r-- 1 user users    86 27. Apr 10:37 tab.txt
-rw-r--r-- 1 user users   172 27. Apr 11:13 artikel.txt
-rw-r--r-- 1 user users   218 27. Apr 10:41 x.tab
-rw-r--r-- 1 user users   394 26. Apr 17:16 userliste
-rw-r--r-- 1 user users   596 27. Apr 10:48 LoreIpsum.txt
-rw-r--r-- 1 user users   765 26. Apr 16:42 errors
-rw-r--r-- 1 user users  1187 27. Apr 10:11 inhalt
-rw-r--r-- 1 user users 60489 26. Apr 16:38 ergebnis
-rw-r--r-- 1 user users 61254 26. Apr 16:56 result
-rw-r--r-- 1 user users 61288 27. Apr 09:59 e.txt

cat tab.txt >> tab_duplicates.txt; # run this like 10 times
# now this file contains a lot of duplicate lines

sort tab_duplicates.txt |uniq; # uniq again filters out all duplicate lines
Fritz   ist     heute   anscheinend     sehr
mehr    lange   wach    bleiben
müde    deswegen        wird    er      nicht

sort tab_duplicates.txt |uniq -c; # report how many duplicates of which lines are in the file (9 duplicates)
      9 Fritz   ist     heute   anscheinend     sehr
      9 mehr    lange   wach    bleiben
      9 müde    deswegen        wird    er      nicht

ls -l >inhalt; # create a new file with the content of current directory listing

cut -c 33-44,13-20 inhalt |tail -n +2; #
# tail -n +2 - do not output the first two lines
 user us. Apr 11:13
 user us. Apr 13:45
 user us. Apr 13:01
 user us. Apr 13:45
 user us. Apr 13:45
 user us. Apr 16:34
 user us. Apr 16:38
 user us. Apr 16:42

cut -d: -f 1 /etc/passwd >x1; # export first row of :semicolon:separated:passwd:file
cut -d: -f 5 /etc/passwd >x5; # export fifth row of :semicolon:separated:passwd:file

paste x1 x5; # combine the two files line wise
at      Batch jobs daemon
bin     bin
daemon  Daemon
ftp     FTP account
ftpsecure       Secure FTP User
games   Games account
gdm     Gnome Display Manager daemon
lp      Printing daemon
mail    Mailer daemon
...

paste x5 x1 |expand -t 35; # reformat with expand for better formatting
Batch jobs daemon                  at
bin                                bin
Daemon                             daemon
FTP account                        ftp
Secure FTP User                    ftpsecure
Games account                      games
Gnome Display Manager daemon       gdm
Printing daemon                    lp
Mailer daemon                      mail
...

vim j1; # create new file fill with
1 Hans
2 Peter
3 Fritz
4 Josef
5 Dieter

vim j2; # create new file fill with
1 230
2 20
3 47
4 88
5 20

paste j1 j2 |expand -t 35
1 Hans                             1 230
2 Peter                            2 20
3 Fritz                            3 47
4 Josef                            4 88
5 Dieter                           5 20

join j1 j2; # join two numbered (keys) files into one output
1 Hans 230
2 Peter 20
3 Fritz 47
4 Josef 88
5 Dieter 20
admin