1# 2# CDDL HEADER START 3# 4# The contents of this file are subject to the terms of the 5# Common Development and Distribution License (the "License"). 6# You may not use this file except in compliance with the License. 7# 8# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9# or http://www.opensolaris.org/os/licensing. 10# See the License for the specific language governing permissions 11# and limitations under the License. 12# 13# When distributing Covered Code, include this CDDL HEADER in each 14# file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15# If applicable, add the following below this CDDL HEADER, with the 16# fields enclosed by brackets "[]" replaced with your own identifying 17# information: Portions Copyright [yyyy] [name of copyright owner] 18# 19# CDDL HEADER END 20# 21 22# 23# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24# 25 26# 27# This test checks whether the AST "cut" utility's "-d" option 28# works with multibyte characters 29# 30# This was reported as CR #6904575 ("cut -d with multibyte character no longer works"): 31# ------------ snip ------------ 32# cut -d with multibyte char no longer work correctly. 33# 34# $ echo $LANG 35# ja 36# $ od -tx1 mb.eucjp 37# 0000000 a4 a2 a4 a4 a4 a4 a4 a6 a4 a8 0a 38# 0000013 39# $ od -tx1 delim 40# 0000000 a4 a4 0a 41# 0000003 42# $ wc -m mb.eucjp 43# 6 mb.eucjp 44# 45# It has 5 characters (2byte each). 46# 47# $ /usr/bin/cut -d `cat delim` -f1 mb.eucjp | od -tx1 48# 0000000 0a 49# 0000001 50# 51# correct output is 52# 53# 0000000 a4 a2 0a 54# 0000003 55# 56# files are attached. 57# ------------ snip ------------ 58# 59 60# test setup 61function err_exit 62{ 63 print -u2 -n "\t" 64 print -u2 -r ${Command}[$1]: "${@:2}" 65 (( Errors < 127 && Errors++ )) 66} 67alias err_exit='err_exit $LINENO' 68 69set -o nounset 70Command=${0##*/} 71integer Errors=0 72 73typeset ocwd 74typeset tmpdir 75typeset out 76 77# create temporary test directory 78ocwd="$PWD" 79tmpdir="$(mktemp -t -d "test_sun_solaris_cr_6904575_cut_-d_with_multibyte_character_no_longer_works.XXXXXXXX")" || err_exit "Cannot create temporary directory" 80 81cd "${tmpdir}" || { err_exit "cd ${tmpdir} failed." ; exit $((Errors)) ; } 82 83 84# run tests 85 86 87function test1 88{ 89 typeset cut_cmd="$1" 90 typeset testid 91 typeset out 92 typeset testname 93 compound saved_locale 94 95 # save locale information 96 [[ -v LC_ALL ]] && saved_locale.LC_ALL="${LC_ALL}" 97 [[ -v LC_CTYPE ]] && saved_locale.LC_CTYPE="${LC_CTYPE}" 98 [[ -v LANG ]] && saved_locale.LANG="${LANG}" 99 100 compound -r -a testcases=( 101 ( 102 typeset name="ascii_plain" 103 typeset locale="C" 104 typeset input_format='abcdefg' 105 typeset -a cut_args_format=( "-f1" "-d" "e" ) 106 typeset output_format='abcd' 107 ) 108 ( 109 typeset name="unicode_plain" 110 typeset locale="<unicode>" 111 typeset input_format='abcd\u[20ac]fg' 112 typeset -a cut_args_format=( '-f1' '-d' '\u[20ac]' ) 113 typeset output_format='abcd' 114 ) 115 ( 116 typeset name="unicode_plain2" 117 typeset locale="<unicode>" 118 typeset input_format='abcd\u[20ac]fg' 119 typeset -a cut_args_format=( '-f1' '-d' 'f' ) 120 typeset output_format='abcd\u[20ac]' 121 ) 122 ) 123 124 for testid in "${!testcases[@]}" ; do 125 nameref tc=testcases[${testid}] 126 testname="${cut_cmd}/${tc.name}" 127 128 if [[ "${tc.locale}" == "<unicode>" ]] ; then 129 if [[ ! -v LC_ALL || $LC_ALL != .*.UTF-8 ]]; then 130 export LC_ALL='en_US.UTF-8' 131 fi 132 else 133 export LC_ALL="${tc.locale}" 134 fi 135 136 # build "cut_args" array with multibyte characters in the current locale 137 typeset -a cut_args 138 integer arg_index 139 for arg_index in "${!tc.cut_args_format[@]}" ; do 140 cut_args+=( "$( printf -- "${tc.cut_args_format[arg_index]}" )" ) 141 done 142 143 typeset output_format="$( printf -- "${tc.output_format}" )" 144 145 #printf "args=|%q|\n" "${cut_args[@]}" 146 147 out="$(printf "${tc.input_format}" | ${SHELL} -c "${cut_cmd} \"\$@\"" dummy "${cut_args[@]}" 2>&1)" || err_exit "${testname}: Command returned exit code $?" 148 [[ "${out}" == ${output_format} ]] || err_exit "${testname}: Expected match for $(printf "%q\n" "${output_format}"), got $(printf "%q\n" "${out}")" 149 150 # cleanup and restore locale settings 151 unset cut_args arg_index 152 [[ -v saved_locale.LC_ALL ]] && LC_ALL="${saved_locale.LC_ALL}" || unset LC_ALL 153 [[ -v saved_locale.LC_CTYPE ]] && LC_CTYPE="${saved_locale.LC_CTYPE}" || unset LC_CTYPE 154 [[ -v saved_locale.LANG ]] && LANG="${saved_locale.LANG}" || unset LANG 155 done 156 157 return 0 158} 159 160 161function test2 162{ 163 typeset cutcmd=$1 164 typeset testname="${cutcmd}" 165 typeset out 166 167 # create files 168 printf "\xa4\xa2\xa4\xa4\xa4\xa4\xa4\xa6\xa4\xa8\x0a" >"mb.eucjp" 169 printf "\xa4\xa4\x0a" >"delim" 170 171 # run test 172 out=$( LC_ALL=ja_JP.eucJP ${SHELL} -o pipefail -o errexit -c '$1 -d $(cat delim) -f1 "mb.eucjp" | od -tx1' dummy "${cutcmd}" 2>&1 ) || err_exit "${testname}: Test failed with exit code $?" 173 [[ "${out}" == $'0000000 a4 a2 0a\n0000003' ]] || err_exit "${testname}: Expected \$'0000000 a4 a2 0a\n0000003', got $(printf "%q\n" "${out}")" 174 175 # cleanup 176 rm "mb.eucjp" "delim" 177 178 return 0 179} 180 181#for cmd in "/usr/bin/cut" "cut" ; do 182for cmd in "cut" ; do 183 test1 "${cmd}" 184 test2 "${cmd}" 185done 186 187 188 189cd "${ocwd}" 190rmdir "${tmpdir}" || err_exit "Cannot remove temporary directory ${tmpdir}". 191 192# tests done 193exit $((Errors)) 194