HOWTO GNU Linux bash terminal – string text processing examples

16.May.2017

Administration / Server, Bash / Terminal / Scripts, GNU-Linux

messing with text files

test content

# test if /path/file contains SEARCH_PATTERN
if grep -q SEARCH_PATTERN "/path/file"; then
   cat /path/file; # do something
fi

paste

basically line-wise combines two files into one. (first line of two files will end up in one line)

# add 3 new test-users to system
for u in jim bob joe; do useradd -m $u; done

# generate list of users file
echo jim >> users
echo bob >> users
echo joe >> users

# generate password file
pwgen -B 6 3 > passwords;

paste users passwords; # will combine the two files
jim     Ethee3
joe     aitaJ4
bob     ooJoh9

# will combine the two files - with delimiter being a :colon:
# and write the output to screen and file users_and_passwords
paste -d: users passwords|tee users_and_passwords;
jim:Ethee3
joe:aitaJ4
bob:ooJoh9

# you can feed that output to chpasswd to update the passwords of all 3 users (need to exist)
chpasswd < users_and_passwords;

join

similar to paste – it adds lines together.

# generate teste data
echo "1  England" >> country;
echo "2  Germany" >> country;
echo "3  Russia" >> country;
echo "4  Brazil" >> country;

echo "3  Moscow" >> city;
echo "1  London" >> city;
echo "4  SaoPaulo" >> city;
echo "2  Berlin" >> city;

# unfortunately you will have to sort the unsorted second list first
sort -k1,1 country > country_sorted

# otherwise you will get "is not sorted"

join city country
1 London England
2 Berlin Germany
3 Moscow Russia
4 SaoPaulo Brazil

concatenate

cat e* >e.txt; # concatenate = combine all files starting with e... into one file e.txt

cat -n e.txt; # output to screen, number lines
1999  find: ‘/etc/sudoers.d’: Keine Berechtigung
2000  find: ‘/etc/lvm/archive’: Keine Berechtigung
2001  find: ‘/etc/lvm/backup’: Keine Berechtigung
2002  find: ‘/etc/lvm/cache’: Keine Berechtigung
2003  find: ‘/etc/audisp’: Keine Berechtigung
2004  find: ‘/etc/audit’: Keine Berechtigung
2005  find: ‘/etc/polkit-1/rules.d’: Keine Berechtigung
2006
2007  find: ‘/etc/autoinstall’: Keine Berechtigung
...

tac e.txt; # output file in reverse order

head -n3 file; # output first 3 lines of file

head -c128 /bin/cat; # output first 20 bytes of file /bin/cat
reset; # reset console

head -n -3 file; # output anything but the last 3 lines of file

tail -n3 file; # output only last 3 lines of file

tail -f file; # follow changes of file (nice for log file real time view)

od /etc/passwd|head -n3; # output in octal format first 3 lines of /etc/passwd
0000000 072141 074072 031072 035065 032462 041072 072141 064143
0000020 065040 061157 020163 060544 066545 067157 027472 060566
0000040 027562 070163 067557 027554 072141 067552 071542 027472

od -txC /etc/passwd|head -n3; # output in hex format first 3 lines of /etc/passwd
0000000 61 74 3a 78 3a 32 35 3a 32 35 3a 42 61 74 63 68
0000020 20 6a 6f 62 73 20 64 61 65 6d 6f 6e 3a 2f 76 61
0000040 72 2f 73 70 6f 6f 6c 2f 61 74 6a 6f 62 73 3a 2f

od -txCz /etc/passwd|head -n3; # output in hex format but also in ASCII the first 3 lines of /etc/passwd
0000000 61 74 3a 78 3a 32 35 3a 32 35 3a 42 61 74 63 68  >at:x:25:25:Batch< 0000020 20 6a 6f 62 73 20 64 61 65 6d 6f 6e 3a 2f 76 61 > jobs daemon:/va< 0000040 72 2f 73 70 6f 6f 6c 2f 61 74 6a 6f 62 73 3a 2f >r/spool/atjobs:/< \_offset \_ 20(HEX) = 32(DEC) = " " (SPACE) hexdump -o -s 446 -n 64 /etc/passwd; # similar to od but flexibler in output 00001be 061057 067151 063057 066141 062563 066412 067141 074072 00001ce 030472 035063 031066 046472 067141 060565 020154 060560 00001de 062547 020163 064566 073545 071145 027472 060566 027562 00001ee 060543 064143 027545 060555 035156 061057 067151 061057 00001fe vim tab.txt; # create new file fill with content Fritz ist heute anscheinend sehr müde deswegen wird er nicht mehr lange wach bleiben od -txCz tab.txt 0000000 46 72 69 74 7a 09 69 73 74 09 68 65 75 74 65 09 >Fritz.ist.heute.< 0000020 61 6e 73 63 68 65 69 6e 65 6e 64 09 73 65 68 72 >anscheinend.sehr< 0000040 0a 6d c3 bc 64 65 09 64 65 73 77 65 67 65 6e 09 >.m..de.deswegen.< 0000060 77 69 72 64 09 65 72 09 6e 69 63 68 74 0a 6d 65 >wird.er.nicht.me< 0000100 68 72 09 6c 61 6e 67 65 09 77 61 63 68 09 62 6c >hr.lange.wach.bl< 0000120 65 69 62 65 6e 0a >eiben.< 0000126 expand tab.txt |od -txCz; # convert tabs to spaces 0000000 46 72 69 74 7a 20 20 20 69 73 74 20 20 20 20 20 >Fritz   ist     < 0000020 68 65 75 74 65 20 20 20 61 6e 73 63 68 65 69 6e >heute   anschein< 0000040 65 6e 64 20 20 20 20 20 73 65 68 72 0a 6d c3 bc >end     sehr.m..< 0000060 64 65 20 20 20 20 64 65 73 77 65 67 65 6e 20 20 >de    deswegen  < 0000100 20 20 20 20 20 20 77 69 72 64 20 20 20 20 65 72 >      wird    er< 0000120 20 20 20 20 20 20 6e 69 63 68 74 0a 6d 65 68 72 >      nicht.mehr< 0000140 20 20 20 20 6c 61 6e 67 65 20 20 20 77 61 63 68 >    lange   wach< 0000160 20 20 20 20 62 6c 65 69 62 65 6e 0a >    bleiben.< 0000174 cat tab.txt |od -txCz 0000000 46 72 69 74 7a 09 69 73 74 09 68 65 75 74 65 09 >Fritz.ist.heute.< 0000020 61 6e 73 63 68 65 69 6e 65 6e 64 09 73 65 68 72 >anscheinend.sehr< 0000040 0a 6d c3 bc 64 65 09 64 65 73 77 65 67 65 6e 09 >.m..de.deswegen.< 0000060 77 69 72 64 09 65 72 09 6e 69 63 68 74 0a 6d 65 >wird.er.nicht.me< 0000100 68 72 09 6c 61 6e 67 65 09 77 61 63 68 09 62 6c >hr.lange.wach.bl< 0000120 65 69 62 65 6e 0a >eiben.< 0000126 expand -t 18 tab.txt; # convert tabs to spaces in this case 18 spaces Fritz ist heute anscheinend sehr müde deswegen wird er nicht mehr lange wach bleiben expand -t 18 tab.txt >x.tab;

unexpand -a x.tab |od -txCz; # convert spaces to tabs
0000000 46 72 69 74 7a 09 09 20 20 69 73 74 09 09 20 20  >Fritz..  ist..  < 0000020 20 20 68 65 75 74 65 09 20 20 20 20 20 20 61 6e >  heute.      an< 0000040 73 63 68 65 69 6e 65 6e 64 09 73 65 68 72 0a 6d >scheinend.sehr.m< 0000060 c3 bc 64 65 09 09 20 20 64 65 73 77 65 67 65 6e >..de..  deswegen< 0000100 09 20 20 20 20 77 69 72 64 09 20 20 20 20 20 20 >.    wird.      < 0000120 65 72 09 09 6e 69 63 68 74 0a 6d 65 68 72 09 09 >er..nicht.mehr..< 0000140 20 20 6c 61 6e 67 65 09 09 20 20 20 20 77 61 63 >  lange..    wac< 0000160 68 09 20 20 20 20 20 20 62 6c 65 69 62 65 6e 0a >h.      bleiben.<
0000200

tr t T <inhalt; # replace all small t with capital T

tr a-z A-Z > tab_duplicates.txt; # run this like 10 times
# now this file contains a lot of duplicate lines

sort tab_duplicates.txt |uniq; # uniq again filters out all duplicate lines
Fritz   ist     heute   anscheinend     sehr
mehr    lange   wach    bleiben
müde    deswegen        wird    er      nicht

sort tab_duplicates.txt |uniq -c; # report how many duplicates of which lines are in the file (9 duplicates)
      9 Fritz   ist     heute   anscheinend     sehr
      9 mehr    lange   wach    bleiben
      9 müde    deswegen        wird    er      nicht

ls -l >inhalt; # create a new file with the content of current directory listing

# extract/cut out all chars from position 13-20 and 33-44 in each line
cut -c 13-20,33-44 inhalt |tail -n +2;
# tail -n +2 - do not output the first two lines
 user us. Apr 11:13
 user us. Apr 13:45
 user us. Apr 13:01
 user us. Apr 13:45
 user us. Apr 13:45
 user us. Apr 16:34
 user us. Apr 16:38
 user us. Apr 16:42

# export first row of :semicolon:separated:passwd:file
cut -d: -f 1 /etc/passwd >x1;
# export fifth row of :semicolon:separated:passwd:file 
cut -d: -f 5 /etc/passwd >x5;

# combine the two files line wise
paste x1 x5;
at      Batch jobs daemon
bin     bin
daemon  Daemon
ftp     FTP account
ftpsecure       Secure FTP User
games   Games account
gdm     Gnome Display Manager daemon
lp      Printing daemon
mail    Mailer daemon
...

# reformat with expand for better formatting
paste x5 x1 |expand -t 35;
Batch jobs daemon                  at
bin                                bin
Daemon                             daemon
FTP account                        ftp
Secure FTP User                    ftpsecure
Games account                      games
Gnome Display Manager daemon       gdm
Printing daemon                    lp
Mailer daemon                      mail
...

# prepare test case
# create new file fill with
vim j1;
1 Hans
2 Peter
3 Fritz
4 Josef
5 Dieter

# create new file fill with
vim j2;
1 230
2 20
3 47
4 88
5 20

paste j1 j2 |expand -t 35
1 Hans                             1 230
2 Peter                            2 20
3 Fritz                            3 47
4 Josef                            4 88
5 Dieter                           5 20

# join two numbered (keys) files into one output
join j1 j2;
1 Hans 230
2 Peter 20
3 Fritz 47
4 Josef 88
5 Dieter 20

HOWTO GNU Linux bash terminal – string text processing examples

messing with text files

test content

paste

join

concatenate

Links:

liked this article?