Awk: Difference between revisions
Jump to navigation
Jump to search
(38 intermediate revisions by 3 users not shown) | |||
Line 1: | Line 1: | ||
==useful== | =basic= | ||
Print row 5 and 9. | The following simple commands are used most often. | ||
awk 'pattern' file # print lines matching pattern | |||
awk '{action}' file # take action for every line | |||
awk 'pattern {action}' file | |||
$0, $1, $2,... Field variables | |||
FILENAME Name of input file | |||
FS Field separator, by default the space character. Can be modified. | |||
NF Number of fields in the current record | |||
NR Number of the current record (line number) | |||
OFMT Output format (default: "%.6g") | |||
RS Record Separator | |||
ORS Output Record Separator. By default, the output and input records separators are a carriage return stored in the built-in variables ORS and RS, respectively. | |||
=print every 5 line= | |||
awk 'NR % 5 == 0' | |||
=awk script= | |||
awk 'BEGIN {initializations} | |||
search_pattern1 {actions} | |||
search_pattern2 {actions} | |||
... | |||
END {final actions}' file | |||
=useful= | |||
Remove 2 first lines of file. | |||
awk 'NR > 2' /tmp/tmp | |||
Get media information. Pick encoded date, replace : with -, print row 6 and 7 and add .mov. Find just once. | |||
mediainfo *.mov | awk '/Encoded date/ { gsub(/:/,"-") ; print $6 "_" $7 ".mov" ;exit }' | |||
=awk with replace= | |||
Replace systemd with andreas and print column 1. | |||
awk -F: '{gsub ("systemd","andreas")}''{print $1}' /etc/passwd | |||
=Move last column to first= | |||
| awk '{ print $NF, $0 }' | sed 's/[^ ]*$//g' | |||
=remove xml comments= | |||
<pre><nowiki> | |||
awk -v RS='<!--|-->' 'NR%2' file | |||
</nowiki></pre> | |||
=remove column 6 and 8.= | |||
awk '{$6=$8=""; print $0}' file | |||
=print something between tags= | |||
<pre><nowiki> | |||
awk -F'[<|>]' '/jndi/{print $3}' | |||
</nowiki></pre> | |||
=print lines containing both cat, dog and bird.= | |||
awk '/cat/ && /dog/ && /bird/' files | |||
=Print rows where first column doesnt contains ABC= | |||
awk '$1 !~ /ABC/' file # print lines whose 1st field doesn't | |||
=Print lines where 8 column does not contain a date= | |||
awk -F',' '$8 !~ /[0-9]{4,}/' list.csv | |||
=Print row 5 and 9.= | |||
awk 'NR==5 || NR==9' "file" | awk 'NR==5 || NR==9' "file" | ||
Print row betwen 5 and 9. | Print row betwen 5 and 9. | ||
Line 10: | Line 67: | ||
printf("\n") # CR at end of line | printf("\n") # CR at end of line | ||
} ' | } ' | ||
=Summarize size of files.= | |||
find . -type f -printf '%s\n' | awk '{ a+=$1 } END { print a }' | |||
Summarize on 5 column. MB | |||
awk '{ SUM += $5} END { print SUM/1024/1024 }' | |||
Summarize on 5 column. GB | |||
awk '{ SUM += $5} END { print SUM/1024/1024/1024 }' | |||
=Print lines containing T2 in second column.= | |||
awk '$2 ~ /T2/' file | |||
Print second column where 1 columnt contains nodev | |||
awk '$1 == "nodev" { print $2 }' | |||
=Print lines where first column value is bigger than 10= | |||
awk '$1 > 10' | |||
=lenght= | |||
Print length of string. | |||
awk '{ print length(), $0 }' | |||
=Print fields matching regexp in 5 column delimiter being ,= | |||
awk -F',' '$5 ~ /^10.[0-9]*.21.[0-9]*/' | |||
=Print lines where 4 column is empty= | |||
awk 'BEGIN {FS=","} $4=="" {print}' | |||
=awk one liners= | |||
<pre> | <pre> | ||
FILE SPACING: | FILE SPACING: | ||
Line 212: | Line 292: | ||
awk '/./' | awk '/./' | ||
</pre> | </pre> | ||
=run command on mathed pattern= | |||
ls -la | awk '/^drwx/ { system("echo directory with certain permissions" $NF) }' | |||
=get average on a series of digits= | |||
awk '{ total += $1; count++ } END { print total/count }' | |||
[[Category:Applications]] | |||
[[Category:Unix]] | |||
[[Category:Commands]] |
Latest revision as of 13:18, 15 September 2020
basic
The following simple commands are used most often.
awk 'pattern' file # print lines matching pattern awk '{action}' file # take action for every line awk 'pattern {action}' file
$0, $1, $2,... Field variables FILENAME Name of input file FS Field separator, by default the space character. Can be modified. NF Number of fields in the current record NR Number of the current record (line number) OFMT Output format (default: "%.6g") RS Record Separator ORS Output Record Separator. By default, the output and input records separators are a carriage return stored in the built-in variables ORS and RS, respectively.
print every 5 line
awk 'NR % 5 == 0'
awk script
awk 'BEGIN {initializations} search_pattern1 {actions} search_pattern2 {actions} ... END {final actions}' file
useful
Remove 2 first lines of file.
awk 'NR > 2' /tmp/tmp
Get media information. Pick encoded date, replace : with -, print row 6 and 7 and add .mov. Find just once.
mediainfo *.mov | awk '/Encoded date/ { gsub(/:/,"-") ; print $6 "_" $7 ".mov" ;exit }'
awk with replace
Replace systemd with andreas and print column 1.
awk -F: '{gsub ("systemd","andreas")}{print $1}' /etc/passwd
Move last column to first
| awk '{ print $NF, $0 }' | sed 's/[^ ]*$//g'
remove xml comments
awk -v RS='<!--|-->' 'NR%2' file
remove column 6 and 8.
awk '{$6=$8=""; print $0}' file
print something between tags
awk -F'[<|>]' '/jndi/{print $3}'
print lines containing both cat, dog and bird.
awk '/cat/ && /dog/ && /bird/' files
Print rows where first column doesnt contains ABC
awk '$1 !~ /ABC/' file # print lines whose 1st field doesn't
Print lines where 8 column does not contain a date
awk -F',' '$8 !~ /[0-9]{4,}/' list.csv
Print row 5 and 9.
awk 'NR==5 || NR==9' "file"
Print row betwen 5 and 9.
awk 'NR>=5&&NR<=9' "file"
Print column 4 to the end of passwd. Field separator set to :
cat /etc/passwd | awk ' BEGIN { FS=":" } { for (i=4; i<=NF; i++) printf("%s ", $i) printf("\n") # CR at end of line } '
Summarize size of files.
find . -type f -printf '%s\n' | awk '{ a+=$1 } END { print a }'
Summarize on 5 column. MB
awk '{ SUM += $5} END { print SUM/1024/1024 }'
Summarize on 5 column. GB
awk '{ SUM += $5} END { print SUM/1024/1024/1024 }'
Print lines containing T2 in second column.
awk '$2 ~ /T2/' file
Print second column where 1 columnt contains nodev
awk '$1 == "nodev" { print $2 }'
Print lines where first column value is bigger than 10
awk '$1 > 10'
lenght
Print length of string.
awk '{ print length(), $0 }'
Print fields matching regexp in 5 column delimiter being ,
awk -F',' '$5 ~ /^10.[0-9]*.21.[0-9]*/'
Print lines where 4 column is empty
awk 'BEGIN {FS=","} $4=="" {print}'
awk one liners
FILE SPACING: # double space a file awk ‘1;{print “”}’ awk ‘BEGIN{ORS=”\n\n”};1′ # double space a file which already has blank lines in it. Output file # should contain no more than one blank line between lines of text. # NOTE: On Unix systems, DOS lines which have only CRLF (\r\n) are # often treated as non-blank, and thus ‘NF’ alone will return TRUE. awk ‘NF{print $0 “\n”}’ # triple space a file awk ‘1;{print “\n”}’ NUMBERING AND CALCULATIONS: # precede each line by its line number FOR THAT FILE (left alignment). # Using a tab (\t) instead of space will preserve margins. awk ‘{print FNR “\t” $0}’ files* # precede each line by its line number FOR ALL FILES TOGETHER, with tab. awk ‘{print NR “\t” $0}’ files* # number each line of a file (number on left, right-aligned) # Double the percent signs if typing from the DOS command prompt. awk ‘{printf(”%5d : %s\n”, NR,$0)}’ # number each line of file, but only print numbers if line is not blank # Remember caveats about Unix treatment of \r (mentioned above) awk ‘NF{$0=++a ” :” $0};{print}’ awk ‘{print (NF? ++a ” :” :”") $0}’ # count lines (emulates “wc -l”) awk ‘END{print NR}’ # print the sums of the fields of every line awk ‘{s=0; for (i=1; i max {max=$1; maxline=$0}; END{ print max, maxline}’ # print the number of fields in each line, followed by the line awk ‘{ print NF “:” $0 } ‘ # print the last field of each line awk ‘{ print $NF }’ # print the last field of the last line awk ‘{ field = $NF }; END{ print field }’ # print every line with more than 4 fields awk ‘NF > 4′ # print every line where the value of the last field is > 4 awk ‘$NF > 4′ TEXT CONVERSION AND SUBSTITUTION: # IN UNIX ENVIRONMENT: convert DOS newlines (CR/LF) to Unix format awk ‘{sub(/\r$/,”");print}’ # assumes EACH line ends with Ctrl-M # IN UNIX ENVIRONMENT: convert Unix newlines (LF) to DOS format awk ‘{sub(/$/,”\r”);print} # IN DOS ENVIRONMENT: convert Unix newlines (LF) to DOS format awk 1 # IN DOS ENVIRONMENT: convert DOS newlines (CR/LF) to Unix format # Cannot be done with DOS versions of awk, other than gawk: gawk -v BINMODE=”w” ‘1′ infile >outfile # Use “tr” instead. tr -d \r outfile # GNU tr version 1.22 or higher # delete leading whitespace (spaces, tabs) from front of each line # aligns all text flush left awk ‘{sub(/^[ \t]+/, “”); print}’ # delete trailing whitespace (spaces, tabs) from end of each line awk ‘{sub(/[ \t]+$/, “”);print}’ # delete BOTH leading and trailing whitespace from each line awk ‘{gsub(/^[ \t]+|[ \t]+$/,”");print}’ awk ‘{$1=$1;print}’ # also removes extra space between fields # insert 5 blank spaces at beginning of each line (make page offset) awk ‘{sub(/^/, ” “);print}’ # align all text flush right on a 79-column width awk ‘{printf “%79s\n”, $0}’ file* # center all text on a 79-character width awk ‘{l=length();s=int((79-l)/2); printf “%”(s+l)”s\n”,$0}’ file* # substitute (find and replace) “foo” with “bar” on each line awk ‘{sub(/foo/,”bar”);print}’ # replaces only 1st instance gawk ‘{$0=gensub(/foo/,”bar”,4);print}’ # replaces only 4th instance awk ‘{gsub(/foo/,”bar”);print}’ # replaces ALL instances in a line # substitute “foo” with “bar” ONLY for lines which contain “baz” awk ‘/baz/{gsub(/foo/, “bar”)};{print}’ # substitute “foo” with “bar” EXCEPT for lines which contain “baz” awk ‘!/baz/{gsub(/foo/, “bar”)};{print}’ # change “scarlet” or “ruby” or “puce” to “red” awk ‘{gsub(/scarlet|ruby|puce/, “red”); print}’ # reverse order of lines (emulates “tac”) awk ‘{a[i++]=$0} END {for (j=i-1; j>=0;) print a[j–] }’ file* # if a line ends with a backslash, append the next line to it # (fails if there are multiple lines ending with backslash…) awk ‘/\\$/ {sub(/\\$/,”"); getline t; print $0 t; next}; 1′ file* # print and sort the login names of all users awk -F “:” ‘{ print $1 | “sort” }’ /etc/passwd # print the first 2 fields, in opposite order, of every line awk ‘{print $2, $1}’ file # switch the first 2 fields of every line awk ‘{temp = $1; $1 = $2; $2 = temp}’ file # print every line, deleting the second field of that line awk ‘{ $2 = “”; print }’ # print in reverse order the fields of every line awk ‘{for (i=NF; i>0; i–) printf(”%s “,i);printf (”\n”)}’ file # remove duplicate, consecutive lines (emulates “uniq”) awk ‘a !~ $0; {a=$0}’ # remove duplicate, nonconsecutive lines awk ‘! a[$0]++’ # most concise script awk ‘!($0 in a) {a[$0];print}’ # most efficient script # concatenate every 5 lines of input, using a comma separator # between fields awk ‘ORS=%NR%5?”,”:”\n”‘ file SELECTIVE PRINTING OF CERTAIN LINES: # print first 10 lines of file (emulates behavior of “head”) awk ‘NR 1{exit};1′ # print the last 2 lines of a file (emulates “tail -2″) awk ‘{y=x “\n” $0; x=$0};END{print y}’ # print the last line of a file (emulates “tail -1″) awk ‘END{print}’ # print only lines which match regular expression (emulates “grep”) awk ‘/regex/’ # print only lines which do NOT match regex (emulates “grep -v”) awk ‘!/regex/’ # print the line immediately before a regex, but not the line # containing the regex awk ‘/regex/{print x};{x=$0}’ awk ‘/regex/{print (x==”" ? “match on line 1″ : x)};{x=$0}’ # print the line immediately after a regex, but not the line # containing the regex awk ‘/regex/{getline;print}’ # grep for AAA and BBB and CCC (in any order) awk ‘/AAA/; /BBB/; /CCC/’ # grep for AAA and BBB and CCC (in that order) awk ‘/AAA.*BBB.*CCC/’ # print only lines of 65 characters or longer awk ‘length > 64′ # print only lines of less than 65 characters awk ‘length # print section of file from regular expression to end of file awk '/regex/,0' awk '/regex/,EOF' # print section of file based on line numbers (lines 8-12, inclusive) awk 'NR==8,NR==12' # print line number 52 awk 'NR==52' awk 'NR==52 {print;exit}' # more efficient on large files # print section of file between two regular expressions (inclusive) awk '/Iowa/,/Montana/' # case sensitive SELECTIVE DELETION OF CERTAIN LINES: # delete ALL blank lines from a file (same as "grep '.' ") awk NF awk '/./'
run command on mathed pattern
ls -la | awk '/^drwx/ { system("echo directory with certain permissions" $NF) }'
get average on a series of digits
awk '{ total += $1; count++ } END { print total/count }'