1######################################################################## 2# # 3# This software is part of the ast package # 4# Copyright (c) 1982-2012 AT&T Intellectual Property # 5# and is licensed under the # 6# Eclipse Public License, Version 1.0 # 7# by AT&T Intellectual Property # 8# # 9# A copy of the License is available at # 10# http://www.eclipse.org/org/documents/epl-v10.html # 11# (with md5 checksum b35adb5213ca9657e911e9befb180842) # 12# # 13# Information and Software Systems Research # 14# AT&T Research # 15# Florham Park NJ # 16# # 17# David Korn <dgk@research.att.com> # 18# # 19######################################################################## 20function err_exit 21{ 22 print -u2 -n "\t" 23 print -u2 -r ${Command}[$1]: "${@:2}" 24 let Errors+=1 25} 26alias err_exit='err_exit $LINENO' 27 28Command=${0##*/} 29integer Errors=0 30 31unset LANG ${!LC_*} 32 33tmp=$(mktemp -dt) || { err_exit mktemp -dt failed; exit 1; } 34trap "cd /; rm -rf $tmp" EXIT 35cd $tmp || exit 36 37a=$($SHELL -c '/' 2>&1 | sed -e "s,.*: *,," -e "s, *\[.*,,") 38b=$($SHELL -c '(LC_ALL=debug / 2>/dev/null); /' 2>&1 | sed -e "s,.*: *,," -e "s, *\[.*,,") 39[[ "$b" == "$a" ]] || err_exit "locale not restored after subshell -- expected '$a', got '$b'" 40b=$($SHELL -c '(LC_ALL=debug; / 2>/dev/null); /' 2>&1 | sed -e "s,.*: *,," -e "s, *\[.*,,") 41[[ "$b" == "$a" ]] || err_exit "locale not restored after subshell -- expected '$a', got '$b'" 42 43# test shift-jis \x81\x40 ... \x81\x7E encodings 44# (shift char followed by 7 bit ascii) 45 46typeset -i16 chr 47for locale in $(PATH=/bin:/usr/bin locale -a 2>/dev/null | grep -i jis) 48do export LC_ALL=$locale 49 for ((chr=0x40; chr<=0x7E; chr++)) 50 do c=${chr#16#} 51 for s in \\x81\\x$c \\x$c 52 do b="$(printf "$s")" 53 eval n=\$\'$s\' 54 [[ $b == "$n" ]] || err_exit "LC_ALL=$locale printf difference for \"$s\" -- expected '$n', got '$b'" 55 u=$(print -- $b) 56 q=$(print -- "$b") 57 [[ $u == "$q" ]] || err_exit "LC_ALL=$locale quoted print difference for \"$s\" -- $b => '$u' vs \"$b\" => '$q'" 58 done 59 done 60done 61 62# this locale is supported by ast on all platforms 63# EU for { decimal_point="," thousands_sep="." } 64 65locale=C_EU.UTF-8 66 67export LC_ALL=C 68 69# test multibyte value/trace format -- $'\303\274' is UTF-8 u-umlaut 70 71c=$(LC_ALL=C $SHELL -c "printf $':%2s:\n' $'\303\274'") 72u=$(LC_ALL=$locale $SHELL -c "printf $':%2s:\n' $'\303\274'" 2>/dev/null) 73if [[ "$c" != "$u" ]] 74then LC_ALL=$locale 75 x=$'+2+ typeset item.text\ 76+3+ item.text=\303\274\ 77+4+ print -- \303\274\ 78\303\274\ 79+5+ eval $\'arr[0]=(\\n\\ttext=\\303\\274\\n)\' 80+2+ arr[0].text=ü\ 81+6+ print -- \303\274\ 82ü\ 83+7+ eval txt=$\'(\\n\\ttext=\\303\\274\\n)\' 84+2+ txt.text=\303\274\ 85+8+ print -- \'(\' text=$\'\\303\\274\' \')\'\ 86( text=\303\274 )' 87 u=$(LC_ALL=$locale PS4='+$LINENO+ ' $SHELL -x -c " 88 item=(typeset text) 89 item.text=$'\303\274' 90 print -- \"\${item.text}\" 91 eval \"arr[0]=\$item\" 92 print -- \"\${arr[0].text}\" 93 eval \"txt=\${arr[0]}\" 94 print -- \$txt 95 " 2>&1) 96 [[ "$u" == "$x" ]] || err_exit LC_ALL=$locale multibyte value/trace format failed 97 98 x=$'00fc\n20ac' 99 u=$(LC_ALL=$locale $SHELL -c $'printf "%04x\n" \$\'\"\303\274\"\' \$\'\"\xE2\x82\xAC\"\'') 100 [[ $u == $x ]] || err_exit LC_ALL=$locale multibyte %04x printf format failed 101fi 102 103if (( $($SHELL -c $'export LC_ALL='$locale$'; print -r "\342\202\254\342\202\254\342\202\254\342\202\254w\342\202\254\342\202\254\342\202\254\342\202\254" | wc -m' 2>/dev/null) == 10 )) 104then LC_ALL=$locale $SHELL -c b1=$'"\342\202\254\342\202\254\342\202\254\342\202\254w\342\202\254\342\202\254\342\202\254\342\202\254"; [[ ${b1:4:1} == w ]]' || err_exit 'multibyte ${var:offset:len} not working correctly' 105fi 106 107#$SHELL -c 'export LANG='$locale'; printf "\u[20ac]\u[20ac]" > $tmp/two_euro_chars.txt' 108printf $'\342\202\254\342\202\254' > $tmp/two_euro_chars.txt 109exp="6 2 6" 110set -- $($SHELL -c " 111 unset LC_CTYPE 112 export LANG=$locale 113 export LC_ALL=C 114 command wc -C < $tmp/two_euro_chars.txt 115 unset LC_ALL 116 command wc -C < $tmp/two_euro_chars.txt 117 export LC_ALL=C 118 command wc -C < $tmp/two_euro_chars.txt 119") 120got=$* 121[[ $got == $exp ]] || err_exit "command wc LC_ALL default failed -- expected '$exp', got '$got'" 122set -- $($SHELL -c " 123 if builtin wc 2>/dev/null || builtin -f cmd wc 2>/dev/null 124 then unset LC_CTYPE 125 export LANG=$locale 126 export LC_ALL=C 127 wc -C < $tmp/two_euro_chars.txt 128 unset LC_ALL 129 wc -C < $tmp/two_euro_chars.txt 130 export LC_ALL=C 131 wc -C < $tmp/two_euro_chars.txt 132 fi 133") 134got=$* 135[[ $got == $exp ]] || err_exit "builtin wc LC_ALL default failed -- expected '$exp', got '$got'" 136 137# multibyte char straddling buffer boundary 138 139{ 140 unset i 141 integer i 142 for ((i = 0; i < 163; i++)) 143 do print "#234567890123456789012345678901234567890123456789" 144 done 145 printf $'%-.*c\n' 15 '#' 146 for ((i = 0; i < 2; i++)) 147 do print $': "\xe5\xae\x9f\xe8\xa1\x8c\xe6\xa9\x9f\xe8\x83\xbd\xe3\x82\x92\xe8\xa1\xa8\xe7\xa4\xba\xe3\x81\x97\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82" :' 148 done 149} > ko.dat 150 151LC_ALL=$locale $SHELL < ko.dat 2> /dev/null || err_exit "script with multibyte char straddling buffer boundary fails" 152 153# exp LC_ALL LC_NUMERIC LANG 154set -- \ 155 2,5 $locale C '' \ 156 2.5 C $locale '' \ 157 2,5 $locale '' C \ 158 2,5 '' $locale C \ 159 2.5 C '' $locale \ 160 2.5 '' C $locale \ 161 162unset a b c 163unset LC_ALL LC_NUMERIC LANG 164integer a b c 165while (( $# >= 4 )) 166do exp=$1 167 unset H V 168 typeset -A H 169 typeset -a V 170 [[ $2 ]] && V[0]="export LC_ALL=$2;" 171 [[ $3 ]] && V[1]="export LC_NUMERIC=$3;" 172 [[ $4 ]] && V[2]="export LANG=$4;" 173 for ((a = 0; a < 3; a++)) 174 do for ((b = 0; b < 3; b++)) 175 do if (( b != a )) 176 then for ((c = 0; c < 3; c++)) 177 do if (( c != a && c != b )) 178 then T=${V[$a]}${V[$b]}${V[$c]} 179 if [[ ! ${H[$T]} ]] 180 then H[$T]=1 181 got=$($SHELL -c "${T}print \$(( $exp ))" 2>&1) 182 [[ $got == $exp ]] || err_exit "${T} sequence failed -- expected '$exp', got '$got'" 183 fi 184 fi 185 done 186 fi 187 done 188 done 189 shift 4 190done 191 192# setocale(LC_ALL,"") after setlocale() initialization 193 194printf 'f1\357\274\240f2\n' > input1 195printf 't2\357\274\240f1\n' > input2 196printf '\357\274\240\n' > delim 197print "export LC_ALL=$locale 198join -j1 1 -j2 2 -o 1.1 -t \$(cat delim) input1 input2 > out" > script 199$SHELL -c 'unset LANG ${!LC_*}; $SHELL ./script' || 200err_exit "join test script failed -- exit code $?" 201exp="f1" 202got="$(<out)" 203[[ $got == "$exp" ]] || err_exit "LC_ALL test script failed -- expected '$exp', got '$got'" 204 205# multibyte identifiers 206 207exp=OK 208got=$(LC_ALL=C.UTF-8 $SHELL -c $'\u[5929]=OK; print ${\u[5929]}' 2>&1) 209[[ $got == "$exp" ]] || err_exit "multibyte variable definition/expansion failed -- expected '$exp', got '$got'" 210got=$(LC_ALL=C.UTF-8 $SHELL -c $'function \u[5929]\n{\nprint OK;\n}; \u[5929]' 2>&1) 211[[ $got == "$exp" ]] || err_exit "multibyte ksh function definition/execution failed -- expected '$exp', got '$got'" 212got=$(LC_ALL=C.UTF-8 $SHELL -c $'\u[5929]()\n{\nprint OK;\n}; \u[5929]' 2>&1) 213[[ $got == "$exp" ]] || err_exit "multibyte posix function definition/execution failed -- expected '$exp', got '$got'" 214 215# this locale is supported by ast on all platforms 216# mainly used to debug multibyte and message translation code 217# however wctype is not supported but that's ok for these tests 218 219locale=debug 220 221if [[ "$(LC_ALL=$locale $SHELL <<- \+EOF+ 222 x=a<1z>b<2yx>c 223 print ${#x} 224 +EOF+)" != 5 225 ]] 226then err_exit '${#x} not working with multibyte locales' 227fi 228 229dir=_not_found_ 230exp=2 231for cmd in \ 232 "cd $dir; export LC_ALL=debug; cd $dir" \ 233 "cd $dir; LC_ALL=debug cd $dir" \ 234 235do got=$($SHELL -c "$cmd" 2>&1 | sort -u | wc -l) 236 (( ${got:-0} == $exp )) || err_exit "'$cmd' sequence failed -- error message not localized" 237done 238exp=121 239for lc in LANG LC_MESSAGES LC_ALL 240do for cmd in "($lc=$locale;cd $dir)" "$lc=$locale;cd $dir;unset $lc" "function tst { typeset $lc=$locale;cd $dir; }; tst" 241 do tst="$lc=C;cd $dir;$cmd;cd $dir;:" 242 $SHELL -c "unset LANG \${!LC_*}; $SHELL -c '$tst'" > out 2>&1 || 243 err_exit "'$tst' failed -- exit status $?" 244 integer id=0 245 unset msg 246 typeset -A msg 247 got= 248 while read -r line 249 do line=${line##*:} 250 if [[ ! ${msg[$line]} ]] 251 then msg[$line]=$((++id)) 252 fi 253 got+=${msg[$line]} 254 done < out 255 [[ $got == $exp ]] || err_exit "'$tst' failed -- expected '$exp', got '$got'" 256 done 257done 258 259exp=123 260got=$(LC_ALL=debug $SHELL -c "a<2A@>z=$exp; print \$a<2A@>z") 261[[ $got == $exp ]] || err_exit "multibyte debug locale \$a<2A@>z failed -- expected '$exp', got '$got'" 262 263unset LC_ALL LC_MESSAGES 264export LANG=debug 265function message 266{ 267 print -r $"An error occurred." 268} 269exp=$'(libshell,3,46)\nAn error occurred.\n(libshell,3,46)' 270alt=$'(debug,message,libshell,An error occurred.)\nAn error occurred.\n(debug,message,libshell,An error occurred.)' 271got=$(message; LANG=C message; message) 272[[ $got == "$exp" || $got == "$alt" ]] || { 273 EXP=$(printf %q "$exp") 274 ALT=$(printf %q "$alt") 275 GOT=$(printf %q "$got") 276 err_exit "LANG change not seen by function -- expected $EXP or $ALT, got $GOT" 277} 278 279a_thing=fish 280got=$(print -r aa$"\\ahello \" /\\${a_thing}/\\"zz) 281exp='aa(debug,'$Command',libshell,\ahello " /\fish/\)zz' 282[[ $got == "$exp" ]] || err_exit "$\"...\" containing expansions fails: expected $exp, got $got" 283 284exp='(debug,'$Command',libshell,This is a string\n)' 285typeset got=$"This is a string\n" 286[[ $got == "$exp" ]] || err_exit "$\"...\" in assignment expansion fails: expected $exp got $got" 287 288unset LANG 289 290LC_ALL=C 291x=$"hello" 292[[ $x == hello ]] || err_exit 'assignment of message strings not working' 293 294# tests for multibyte characteer at buffer boundary 295{ 296 print 'cat << \\EOF' 297 for ((i=1; i < 164; i++)) 298 do print 123456789+123456789+123456789+123456789+123456789 299 done 300 print $'next character is multibyte<2b|>c<3d|\>foo' 301 for ((i=1; i < 10; i++)) 302 do print 123456789+123456789+123456789+123456789+123456789 303 done 304 print EOF 305} > script$$.1 306chmod +x script$$.1 307x=$( LC_ALL=debug $SHELL ./script$$.1) 308[[ ${#x} == 8641 ]] || err_exit 'here doc contains wrong number of chars with multibyte locale' 309[[ $x == *$'next character is multibyte<2b|>c<3d|\>foo'* ]] || err_exit "here_doc doesn't contain line with multibyte chars" 310 311 312x=$(LC_ALL=debug $SHELL -c 'x="a<2b|>c";print -r -- ${#x}') 313(( x == 3 )) || err_exit 'character length of multibyte character should be 3' 314x=$(LC_ALL=debug $SHELL -c 'typeset -R10 x="a<2b|>c";print -r -- "${x}"') 315[[ $x == ' a<2b|>c' ]] || err_exit 'typeset -R10 should begin with three spaces' 316x=$(LC_ALL=debug $SHELL -c 'typeset -L10 x="a<2b|>c";print -r -- "${x}"') 317[[ $x == 'a<2b|>c ' ]] || err_exit 'typeset -L10 should end in three spaces' 318 319if $SHELL -c "export LC_ALL=en_US.UTF-8; c=$'\342\202\254'; [[ \${#c} == 1 ]]" 2>/dev/null 320then LC_ALL=en_US.UTF-8 321 unset i p1 p2 x 322 for i in 9 b c d 20 1680 2000 2001 2002 2003 2004 2005 2006 2008 2009 200a 2028 2029 3000 # 1803 2007 202f 205f 323 do if ! eval "[[ \$'\\u[$i]' == [[:space:]] ]]" 324 then x+=,$i 325 fi 326 done 327 if [[ $x ]] 328 then if [[ $x == ,*,* ]] 329 then p1=s p2="are not space characters" 330 else p1= p2="is not a space character" 331 fi 332 err_exit "unicode char$p1 ${x#?} $p2 in locale $LC_ALL" 333 fi 334 unset x 335 x=$(printf "hello\u[20ac]\xee world") 336 [[ $(print -r -- "$x") == $'hello\u[20ac]\xee world' ]] || err_exit '%q with unicode and non-unicode not working' 337 if [[ $(whence od) ]] 338 then got='68 65 6c 6c 6f e2 82 ac ee 20 77 6f 72 6c 64 0a' 339 [[ $(print -r -- "$x" | od -An -tx1 | xargs echo) == "$got" ]] || err_exit "incorrect string from printf %q" 340 fi 341 342fi 343 344exit $((Errors<125?Errors:125)) 345 346