Isolate the words and aggregate the count for each word in an associative array. A word is a field without the punctuation marks like ? or ,
# wordfreq - print number of occurrences of each word
# input: text
# output: number-word pairs sorted by number
awk '{
gsub (/I [ ., : ; I ? ( ) { } ] /, "" )
for (i = 1; i <= NF; i++)
count[$i]++
}
END {
for (w in count)
print count[w], w | "sort -rn"
}' capitals
gsub (/I [ ., : ; I ? ( ) { } ] /, "" )
for (i = 1; i <= NF; i++)
count[$i]++
}
END {
for (w in count)
print count[w], w | "sort -rn"
}' capitals
No comments:
Post a Comment
Note: Only a member of this blog may post a comment.