1#!/bin/bash 2 3# Copyright 2019 Joyent, Inc. 4 5if [[ -z "$AWK" || -z "$WORKDIR" ]]; then 6 printf '$AWK and $WORKDIR must be set\n' >&2 7 exit 1 8fi 9 10TEMP0=$WORKDIR/test.temp.0 11TEMP1=$WORKDIR/test.temp.1 12TEMP2=$WORKDIR/test.temp.2 13TEMP3=$WORKDIR/test.temp.3 14 15RESULT=0 16 17fail() { 18 echo "$1" >&2 19 RESULT=1 20} 21 22echo T.misc: miscellaneous buglets now watched for 23 24rm -f core 25 26echo 'The big brown over the lazy doe 27The big brown over the lazy dog 28x 29The big brown over the lazy dog' > $TEMP0 30echo 'failed 31succeeded 32failed 33succeeded' > $TEMP1 34$AWK '{ if (match($0, /^The big brown over the lazy dog/) == 0) { 35 printf("failed\n") 36 } else { 37 printf("succeeded\n") 38 } 39} ' $TEMP0 > $TEMP2 40cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc ghosh RE bug' 41 42echo '123 431234567890 4412345678901' > $TEMP0 45echo '12345678901' > $TEMP1 46$AWK 'length($0) > 10' $TEMP0 > $TEMP2 47cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc last number bug' 48 49# check some \ sequences in strings (ascii) 50echo HIJKL > $TEMP1 51echo $TEMP0 | $AWK '{ print "H\x49\x4a\x4BL" }' > $TEMP2 52cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc hex string cvt' 53 54echo 012x45 > $TEMP1 55$AWK 'BEGIN { print "0\061\62x\0645" }' > $TEMP2 56cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc oct string cvt' 57 58# $i++ means ($i)++ 59echo 3 5 | $AWK '{ i = 1; print $i++ ; print $1, i }' > $TEMP1 60echo '3 614 1' > $TEMP2 62cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc bad field increment' 63 64# makes sure that fields are recomputed even if self-assignment 65# take into account that subtracting from NF now rebuilds the record 66echo 'a b c 67s p q r 68x y z' > $TEMP0 69echo 'a 70s p 71x' > $TEMP1 72$AWK '{ NF -= 2; $1 = $1; print }' < $TEMP0 > $TEMP2 73diff $TEMP1 $TEMP2 || fail "BAD: T.misc bad field self-assignment" 74 75echo '1 761' > $TEMP1 77$AWK 'BEGIN {x = 1; print x; x = x; print x}' > $TEMP2 78diff $TEMP1 $TEMP2 || fail "BAD: T.misc bad self-assignment" 79 80echo 573109312 | $AWK '{print $1*4}' > $TEMP1 81echo 2292437248 > $TEMP2 82diff $TEMP1 $TEMP2 || fail "BAD: T.misc bad overflow" 83 84# note that there are 8-bit characters in the echo 85# some shells will probably screw this up. 86echo '# 87code 1 88code 2' | 89$AWK '/^#/' > $TEMP1 90echo '#' > $TEMP2 91diff $TEMP1 $TEMP2 || fail "BAD: T.misc bad match of 8-bit char" 92 93echo hello | 94$AWK 'BEGIN { FILENAME = "/etc/passwd" } 95 { print $0 }' >/dev/null 96if [[ $? -eq 139 ]]; then fail "BAD: T.misc /etc/passwd dropped core"; fi 97 98echo hello | 99$AWK ' function foo(foo) { 100 foo = 1 101 foo() 102 } 103 { foo(bar) } 104' >/dev/null 2>&1 105if [[ $? -eq 139 ]]; then 106 fail "BAD: T.misc function foo(foo) dropped core" 107 rm -f core 108fi 109 110echo '2 11110' | 112$AWK '{ x[NR] = $0 } # test whether $0 is NUM as well as STR 113END { if (x[1] > x[2]) print "BAD: T.misc: $0 is not NUM" }' 114 115 116$AWK 'BEGIN { 117 npad = substr("alexander" " ",1,15) 118 print npad 119 }' > $TEMP0 120grep '\\' $TEMP0 && fail "BAD: T.misc alexander fails" 121 122# This should give an error about function arguments 123$AWK ' 124function foo(x) { print "x is" x } 125BEGIN { foo(foo) } 126' 2> $TEMP0 127grep "can't use function foo" $TEMP0 >/dev/null || fail "BAD: T.misc fcn args" 128 129 130# gawk defref test; should give error about undefined function 131$AWK 'BEGIN { foo() }' 2> $TEMP0 132grep "calling undefined function foo" $TEMP0 >/dev/null || fail "BAD: T.misc undefined function" 133 134 135# gawk arrayparm test; should give error about function 136$AWK ' 137BEGIN { 138 foo[1]=1; 139 foo[2]=2; 140 bug1(foo); 141} 142function bug1(i) { 143 for (i in foo) { 144 bug2(i); 145 delete foo[i]; 146 print i,1,bot[1]; 147 } 148} 149function bug2(arg) { 150 bot[arg]=arg; 151} 152' 2> $TEMP0 153grep "can.t assign to foo" $TEMP0 >/dev/null || fail "BAD: T.misc foo bug" 154 155 156# This should be a syntax error 157$AWK ' 158!x = y 159' 2> $TEMP0 160grep "syntax error" $TEMP0 >/dev/null || fail "BAD: T.misc syntax error !x=y fails" 161 162# This should print bbb 163$AWK ' 164BEGIN { up[1] = "a" 165 for (i in up) gsub("a", "A", x) 166 print x x "bbb" 167 exit 168 } 169' > $TEMP0 170grep bbb $TEMP0 >/dev/null || fail "BAD: T.misc gsub failed" 171 172echo yes | 173$AWK ' 174BEGIN { 175 printf "push return" >"/dev/null" 176 getline ans <"/dev/null" 177} ' 178if [[ $? -eq 139 ]]; then fail "BAD: T.misc getline ans dropped core"; fi 179 180$AWK 'BEGIN { unireghf() } 181function unireghf(hfeed) { hfeed[1] = 0 }' 182if [[ $? -eq 139 ]]; then fail "BAD: T.misc unireghf dropped core"; fi 183 184echo x | $AWK '/[/]/' 2> $TEMP0 185grep 'nonterminated character class' $TEMP0 >/dev/null || error 'BAD: T.misc nonterminated fails' 186if [[ $? -eq 139 ]]; then fail "BAD: T.misc nonterminated dropped core"; fi 187 188$AWK ' 189function f() { return 12345 } 190BEGIN { printf "<%s>\n", f() } 191' > $TEMP0 192grep '<12345>' $TEMP0 >/dev/null || fail 'BAD: T.misc <12345> fails' 193 194echo 'abc 195def 196 197ghi 198jkl' > $TEMP0 199$AWK ' 200BEGIN { RS = "" 201 while (getline <"'$TEMP0'") 202 print 203}' > $TEMP1 204$AWK 'END {print NR}' $TEMP1 | grep 4 >/dev/null || fail 'BAD: T.misc abcdef fails' 205 206 207# The following should not produce a warning about changing a constant 208# nor about a curdled tempcell list 209$AWK 'function f(x) { x = 2 } 210BEGIN { f(1) }' > $TEMP0 211grep '^' $TEMP0 && fail 'BAD: test constant change fails' 212 213# The following should not produce a warning about a curdled tempcell list 214$AWK 'function f(x) { x } 215BEGIN { f(1) }' > $TEMP0 216grep '^' $TEMP0 && fail 'BAD: test tempcell list fails' 217 218$AWK 'BEGIN { print 9, a=10, 11; print a; exit }' > $TEMP1 219echo '9 10 11 22010' > $TEMP2 221diff $TEMP1 $TEMP2 || fail 'BAD: T.misc (embedded expression)' 222 223echo "abc defgh ijkl" | $AWK ' 224 { $1 = ""; line = $0; print line; print $0; $0 = line; print $0 }' > $TEMP1 225echo " defgh ijkl 226 defgh ijkl 227 defgh ijkl" > $TEMP2 228diff $TEMP1 $TEMP2 || fail 'BAD: T.misc (assignment to $0)' 229 230$AWK ' 231function min(a, b) 232{ 233 if (a < b) 234 return a 235 else 236 return b 237} 238BEGIN { exit } 239' 240if [[ $? -eq 139 ]]; then fail "BAD: T.misc function min dropped core"; fi 241 242# The following should not give a syntax error message: 243$AWK ' 244function expand(chart) { 245 getline chart < "CHAR.ticks" 246} 247' > $TEMP0 248grep '^' $TEMP0 >/dev/null && fail 'BAD: T.misc expand error' 249 250$AWK 'BEGIN { print 1e40 }' >/dev/null 251if [[ $? -eq 139 ]]; then fail "BAD: T.misc 1E40 dropped core"; fi 252 253# The following syntax error should not dump core: 254$AWK ' 255$NF==3 {first=1} 256$NF==2 && first==0 && (abs($1-o1)>120||abs($2-o2)>120) {print $0} 257$NF==2 {o1=%1; o2=$2; first=0} 258' 2>/dev/null 259if [[ $? -eq 139 ]]; then fail "BAD: T.misc first/abs dropped core"; fi 260 261# The following syntax error should not dump core: 262$AWK '{ n = split($1, address, !); print address[1] }' 2> $TEMP0 263grep 'illegal statement' $TEMP0 >/dev/null || fail 'BAD: T.misc split error' 264if [[ $? -eq 139 ]]; then fail "BAD: T.misc split! dropped core"; fi 265 266# The following should cause a syntax error message 267$AWK 'BEGIN {"hello"}' 2> $TEMP0 268grep 'illegal statement' $TEMP0 >/dev/null || fail 'BAD: T.misc hello error' 269 270# The following should give a syntax error message: 271$AWK ' 272function pile(c, r) { 273 r = ++pile[c] 274} 275 276{ pile($1) } 277' 2> $TEMP0 278grep 'context is' $TEMP0 >/dev/null || fail 'BAD: T.misc pile error' 279 280# This should complain about missing atan2 argument: 281$AWK 'BEGIN { atan2(1) }' 2> $TEMP0 282grep 'requires two arg' $TEMP0 >/dev/null || fail 'BAD: T.misc atan2 error' 283 284# This should not core dump: 285$AWK 'BEGIN { f() } 286function f(A) { delete A[1] } 287' 288if [[ $? -eq 139 ]]; then fail "BAD: T.misc delete dropped core"; fi 289 290# nasty one: should not be able to overwrite constants 291$AWK 'BEGIN { gsub(/ana/,"anda","banana") 292 printf "the monkey ate a %s\n", "banana" } 293' >/dev/null 2> $TEMP0 294grep 'syntax error' $TEMP0 >/dev/null || fail 'BAD: T.misc gsub banana error' 295 296# nasty one: should not be able to overwrite constants 297$AWK 'BEGIN { sub(/ana/,"anda","banana") 298 printf "the monkey ate a %s\n", "banana" } 299' >/dev/null 2> $TEMP0 300grep 'syntax error' $TEMP0 >/dev/null || fail 'BAD: T.misc sub banana error' 301 302# line numbers used to double-count comments 303$AWK '# 304# 305# 306/x 307' >/dev/null 2> $TEMP0 308grep 'line [45]' $TEMP0 >/dev/null || fail 'BAD: T.misc lineno' 309 310echo 'x 311\y' > $TEMP1 312$AWK 'BEGIN { print "x\f\r\b\v\a\\y" }' > $TEMP2 313cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc weird chars' 314 315echo 0 > $TEMP1 316$AWK ' BEGIN { exit } 317 { print } 318 END { print NR }' > $TEMP2 319cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc BEGIN exit' 320 321echo 1 > $TEMP1 322$AWK ' { exit } 323 END { print NR }' /etc/passwd > $TEMP2 324cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc immmediate exit' 325 326echo 1 > $TEMP1 327$AWK ' {i = 1; while (i <= NF) {if (i == NF) exit; i++ } } 328 END { print NR }' /etc/passwd > $TEMP2 329cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc immmediate exit 2' 330 331echo 1 > $TEMP1 332$AWK ' function f() { 333 i = 1; while (i <= NF) {if (i == NF) return NR; i++ } 334 } 335 { if (f() == 1) exit } 336 END { print NR }' /etc/passwd > $TEMP2 337cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc while return' 338 339echo 1 > $TEMP1 340$AWK ' function f() { 341 split("a b c", arr) 342 for (i in arr) {if (i == 3) return NR; i++ } 343 } 344 { if (f() == 1) exit } 345 END { print NR }' /etc/passwd > $TEMP2 346cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc while return' 347 348echo 1 > $TEMP1 349$AWK ' {i = 1; do { if (i == NF) exit; i++ } while (i <= NF) } 350 END { print NR }' /etc/passwd > $TEMP2 351cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc immmediate exit 3' 352 353echo 1 > $TEMP1 354$AWK ' function f() { 355 i = 1; do { if (i == NF) return NR; i++ } while (i <= NF) 356 } 357 { if (f() == 1) exit } 358 END { print NR }' /etc/passwd > $TEMP2 359cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc do return' 360 361echo 1 > $TEMP1 362$AWK ' {i = 1; do { if (i == NF) break; i++ } while (i <= NF); exit } 363 END { print NR }' /etc/passwd > $TEMP2 364cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc immmediate exit 4' 365 366echo 1 > $TEMP1 367$AWK ' { n = split($0, x) 368 for (i in x) { 369 if (i == 1) 370 exit } } 371 END { print NR }' /etc/passwd > $TEMP2 372cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc immmediate exit 5' 373 374echo XXXXXXXX > $TEMP1 375$AWK 'BEGIN { s = "ab\fc\rd\be" 376 t = s; gsub("[" s "]", "X", t); print t }' > $TEMP2 377cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc weird escapes in char class' 378 379$AWK '{}' /etc/passwd glop/glop > $TEMP0 2> $TEMP2 380grep "can't open.*glop" $TEMP2 >/dev/null || fail "BAD: T.misc can't open" 381 382echo ' 383 384 385a 386aa 387 388b 389 390 391c 392 393' > $TEMP0 394echo 3 > $TEMP1 395$AWK 'BEGIN { RS = "" }; END { print NR }' $TEMP0 > $TEMP2 396cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc RS botch' 397 398$AWK 'BEGIN \ 399 { 400 print "hello, world" 401 } 402}}}' > $TEMP1 2> $TEMP2 403grep 'source line 5' $TEMP2 >/dev/null 2>&1 || fail 'BAD: T.misc continuation line number' 404 405 406echo 111 222 333 > $TEMP0 407$AWK '{ f[1]=1; f[2]=2; print $f[1], $f[1]++, $f[2], f[1], f[2] }' $TEMP0 > $TEMP2 408echo 111 111 222 2 2 > $TEMP1 409cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc $f[1]++' 410 411 412# These should be syntax errors 413$AWK . 2> $TEMP0 414grep "syntax error" $TEMP0 >/dev/null || fail "BAD: T.misc syntax error . fails" 415 416$AWK .. 2> $TEMP0 417grep "syntax error" $TEMP0 >/dev/null || fail "BAD: T.misc syntax error .. fails" 418 419$AWK .E. 2> $TEMP0 420grep "syntax error" $TEMP0 >/dev/null || fail "BAD: T.misc syntax error .E. fails" 421 422$AWK .++. 2> $TEMP0 423grep "syntax error" $TEMP0 >/dev/null || fail "BAD: T.misc syntax error .++. fails" 424 425 426 427# These should be syntax errors 428$AWK '$' 2> $TEMP0 429grep "unexpected" $TEMP0 >/dev/null || fail "BAD: T.misc syntax error $ fails" 430 431$AWK '{print $' 2> $TEMP0 432grep "unexpected" $TEMP0 >/dev/null || fail "BAD: T.misc syntax error \$2 fails" 433 434$AWK '"' 2> $TEMP0 435grep "non-terminated" $TEMP0 >/dev/null || fail "BAD: T.misc bare quote fails" 436 437 438# %c of 0 is explicit null byte 439 440echo '3' > $TEMP1 441$AWK 'BEGIN {printf("%c%c\n", 0, 0) }' | wc | $AWK '{print $3}' > $TEMP2 442cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc null byte' 443 444# non-terminated RE 445 446$AWK /xyz > $TEMP0 2>&1 447grep "non-terminated" $TEMP0 >/dev/null || fail "BAD: T.misc non-terminated RE" 448 449# next several were infinite loops, found by brian tsang. 450# this is his example: 451 452$AWK 'BEGIN { 453 switch (substr("x",1,1)) { 454 case /ask.com/: 455 break 456 case "google": 457 break 458 } 459}' > $TEMP0 2>&1 460grep "illegal statement" $TEMP0 >/dev/null || fail "BAD: T.misc looping syntax error 1" 461 462$AWK 'BEGIN { s { c /./ } }' > $TEMP0 2>&1 463grep "illegal statement" $TEMP0 >/dev/null || fail "BAD: T.misc looping syntax error 2" 464 465$AWK 'BEGIN { s { c /../ } }' > $TEMP0 2>&1 466grep "illegal statement" $TEMP0 >/dev/null || fail "BAD: T.misc looping syntax error 3" 467 468$AWK 'BEGIN {printf "%2$s %1$s\n", "a", "b"}' >$TEMP0 2>&1 469grep "'$' not permitted in awk formats" $TEMP0 >/dev/null || fail "BAD: T.misc '$' not permitted in formats" 470 471echo 'a 472b c 473de fg hi' > $TEMP0 474$AWK 'END { print NF, $0 }' $TEMP0 > $TEMP1 475awk '{ print NF, $0 }' $TEMP0| tail -1 > $TEMP2 476cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc END must preserve $0' 477 478echo 'fg hi' > $TEMP0 479$AWK 'END { print NF, $0 }' $TEMP0 > $TEMP1 480awk '{ print NF, $0 }' $TEMP0| tail -1 > $TEMP2 481cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc END must preserve $0' 482 483echo '' > $TEMP0 484$AWK 'END { print NF, $0 }' $TEMP0 > $TEMP1 485awk '{ print NF, $0 }' $TEMP0| tail -1 > $TEMP2 486cmp -s $TEMP1 $TEMP2 || fail 'BAD: T.misc END must preserve $0' 487 488 489LC_ALL= LC_NUMERIC=ru_RU.ISO8859-5 $AWK 'BEGIN { 490 "echo 1,200" | getline; 491 if ($1 == 1.2) { 492 printf "good "; 493 } else { 494 printf "bad "; 495 } 496 n = 2.3; 497 print ($1 + 0.1), (n + 0.1); 498}' > $TEMP1 499echo 'good 1,3 2,4' > $TEMP2 500diff $TEMP1 $TEMP2 || fail 'BAD: T.misc LC_NUMERIC should change radix' 501 502$AWK 'function foo(q) { 503 return (q = q); 504} 505BEGIN { print foo("h"); }' > $TEMP1 506echo 'h' > $TEMP2 507diff $TEMP1 $TEMP2 || fail 'BAD: T.misc return tempcell' 508 509$AWK -v RECSIZE=8192 'BEGIN { 510 for (c = 0; c < 3; c++) { 511 a = (RECSIZE % 2 > 0 ? "5" : "55"); 512 while (length(a) < RECSIZE + c) { 513 a = a " 5"; 514 } 515 $0 = a; 516 print $2; 517 } 518}' > $TEMP1 519printf '5\n5\n5\n' > $TEMP2 520diff $TEMP1 $TEMP2 || fail 'BAD: T.misc initial fields overflow' 521 522exit $RESULT 523