xref: /illumos-gate/usr/src/test/util-tests/tests/awk/gnu/longwrds.awk (revision b210e77709da8e42dfe621e10ccf4be504206058)
1# From Gawk Manual modified by bug fix and removal of punctuation
2
3# Invoker can customize sort command if necessary.
4BEGIN {
5	if (!SORT) SORT = "LC_ALL=C sort"
6}
7
8# Record every word which is used at least once
9{
10	for (i = 1; i <= NF; i++) {
11		tmp = tolower($i)
12		if (0 != (pos = match(tmp, /([[:lower:]]|-)+/)))
13			used[substr(tmp, pos, RLENGTH)] = 1
14	}
15}
16
17#Find a number of distinct words longer than 10 characters
18END {
19	num_long_words = 0
20	for (x in used)
21		if (length(x) > 10) {
22			++num_long_words
23			print x | SORT
24		}
25	print(num_long_words, "long words") | SORT
26	close(SORT)
27}
28