1# 2# Copyright (c) 2023 Klara, Inc. 3# 4# SPDX-License-Identifier: BSD-2-Clause 5# 6 7# 8# These tests need to run in a multibyte locale with non-localized 9# error messages. 10# 11export LC_CTYPE=C.UTF-8 12export LC_MESSAGES=C 13 14# 15# Size of wc's read buffer. 16# 17MAXBSIZE=65536 18 19# 20# Sample text containing multibyte characters 21# 22tv="Der bode en underlig gråsprængt en 23på den yderste nøgne ø; – 24han gjorde visst intet menneske mén 25hverken på land eller sjø; 26dog stundom gnistred hans øjne stygt, – 27helst mod uroligt vejr, – 28og da mente folk, at han var forrykt, 29og da var der få, som uden frykt 30kom Terje Vigen nær. 31" 32tvl=10 33tvw=55 34tvc=300 35tvm=283 36tvcL=42 37tvmL=39 38 39# 40# Run a series of tests using the same input file. The first argument 41# is the name of the file. The next three are the expected line, 42# word, and byte counts. The optional fifth is the expected character 43# count; if not provided, it is expected to be identical to the byte 44# count. 45# 46atf_check_wc() { 47 local file="$1" 48 local l="$2" 49 local w="$3" 50 local c="$4" 51 local m="${5-$4}" 52 53 atf_check -o match:"^ +${l} +${w} +${c}\$" wc <"${file}" 54 atf_check -o match:"^ +${l}\$" wc -l <"${file}" 55 atf_check -o match:"^ +${w}\$" wc -w <"${file}" 56 atf_check -o match:"^ +${c}\$" wc -c <"${file}" 57 atf_check -o match:"^ +${m}\$" wc -m <"${file}" 58 atf_check -o match:"^ +${l} +${w} +${c} ${file}\$" wc "$file" 59 atf_check -o match:"^ +${l} ${file}\$" wc -l "$file" 60 atf_check -o match:"^ +${w} ${file}\$" wc -w "$file" 61 atf_check -o match:"^ +${c} ${file}\$" wc -c "$file" 62 atf_check -o match:"^ +${m} ${file}\$" wc -m "$file" 63} 64 65atf_test_case basic 66basic_head() 67{ 68 atf_set "descr" "Basic test case" 69} 70basic_body() 71{ 72 printf "a b\n" >foo 73 atf_check_wc foo 1 2 4 74} 75 76atf_test_case blank 77blank_head() 78{ 79 atf_set "descr" "Input containing only blank lines" 80} 81blank_body() 82{ 83 printf "\n\n\n" >foo 84 atf_check_wc foo 3 0 3 85} 86 87atf_test_case empty 88empty_head() 89{ 90 atf_set "descr" "Empty input" 91} 92empty_body() 93{ 94 printf "" >foo 95 atf_check_wc foo 0 0 0 96} 97 98atf_test_case invalid 99invalid_head() 100{ 101 atf_set "descr" "Invalid multibyte input" 102} 103invalid_body() 104{ 105 printf "a\377b\n" >foo 106 atf_check \ 107 -e match:"Illegal byte sequence" \ 108 -o match:"^ +4 foo$" \ 109 wc -m foo 110} 111 112atf_test_case multiline 113multiline_head() 114{ 115 atf_set "descr" "Multiline, multibyte input" 116} 117multiline_body() 118{ 119 printf "%s\n" "$tv" >foo 120 atf_check_wc foo $tvl $tvw $tvc $tvm 121 # longest line in bytes 122 atf_check -o match:"^ +$tvc +$tvcL foo" wc -cL foo 123 atf_check -o match:"^ +$tvc +$tvcL" wc -cL <foo 124 # longest line in characters 125 atf_check -o match:"^ +$tvm +$tvmL foo" wc -mL foo 126 atf_check -o match:"^ +$tvm +$tvmL" wc -mL <foo 127} 128 129atf_test_case multiline_repeated 130multiline_repeated_head() 131{ 132 atf_set "descr" "Multiline input exceeding the input buffer size" 133} 134multiline_repeated_body() 135{ 136 local c=0 137 while [ $c -lt 1000 ] ; do 138 printf "%1\$s\n%1\$s\n%1\$s\n%1\$s\n%1\$s\n" "$tv" 139 c=$((c+5)) 140 done >foo 141 atf_check_wc foo $((tvl*c)) $((tvw*c)) $((tvc*c)) $((tvm*c)) 142} 143 144atf_test_case nul 145nul_head() 146{ 147 atf_set "descr" "Input containing NUL" 148} 149nul_body() 150{ 151 printf "a\0b\n" >foo 152 atf_check_wc foo 1 1 4 153} 154 155atf_test_case poop 156poop_head() 157{ 158 atf_set "descr" "Multibyte sequence across buffer boundary" 159} 160poop_body() 161{ 162 local l=0 w=0 c=0 m=0 163 # The code below produces a stream of 4-byte UTF-8 sequences 164 # aligned on 5-byte boundaries, ensuring that the first full 165 # read of length MAXBSIZE will end in a partial sequence — 166 # unless MAXBSIZE is a multiple of 5 (not possible since it's 167 # a power of 2) or one less than a multiple of 5 (e.g. 2^18 = 168 # 262,144 = (52429 * 5) - 1) in which case we prepend a single 169 # newline to push our sequence out of phase. 170 atf_check_not_equal 0 $((MAXBSIZE % 5)) 171 :>foo 172 if [ $((MAXBSIZE % 5)) -eq 4 ] ; then 173 printf "\n" 174 l=$((l + 1)) 175 c=$((c + 1)) 176 m=$((m + 1)) 177 fi >>foo 178 while [ $c -le $MAXBSIZE ] ; do 179 printf "...............\n" 180 l=$((l + 1)) 181 w=$((w + 1)) 182 c=$((c + 80)) # 80 bytes 183 m=$((m + 32)) # 32 multibyte characters 184 done >>foo 185 atf_check_wc foo $l $w $c $m 186} 187 188atf_test_case total 189total_head() 190{ 191 atf_set "descr" "Multiple inputs" 192} 193total_body() 194{ 195 printf "%s\n" "$tv" >foo 196 printf "%s\n" "$tv" >bar 197 atf_check \ 198 -o match:"^ +$((tvl*2)) +$((tvw*2)) +$((tvc*2)) total$" \ 199 wc foo bar 200} 201 202atf_test_case unterminated 203unterminated_head() 204{ 205 atf_set "descr" "Input not ending in newline" 206} 207unterminated_body() 208{ 209 printf "a b" >foo 210 atf_check_wc foo 0 2 3 211} 212 213atf_test_case usage 214usage_head() 215{ 216 atf_set "descr" "Trigger usage message" 217} 218usage_body() 219{ 220 atf_check -s exit:1 -e match:"usage: wc" wc -\? 221} 222 223atf_test_case whitespace 224whitespace_head() 225{ 226 atf_set "descr" "Input containing only whitespace and newlines" 227} 228whitespace_body() 229{ 230 printf "\n \n\t\n" >foo 231 atf_check_wc foo 3 0 5 232} 233 234atf_init_test_cases() 235{ 236 atf_add_test_case basic 237 atf_add_test_case blank 238 atf_add_test_case empty 239 atf_add_test_case invalid 240 atf_add_test_case multiline 241 atf_add_test_case multiline_repeated 242 atf_add_test_case nul 243 atf_add_test_case poop 244 atf_add_test_case total 245 atf_add_test_case unterminated 246 atf_add_test_case usage 247 atf_add_test_case whitespace 248} 249