1*b30d1939SAndy Fiddaman# 2*b30d1939SAndy Fiddaman# CDDL HEADER START 3*b30d1939SAndy Fiddaman# 4*b30d1939SAndy Fiddaman# The contents of this file are subject to the terms of the 5*b30d1939SAndy Fiddaman# Common Development and Distribution License (the "License"). 6*b30d1939SAndy Fiddaman# You may not use this file except in compliance with the License. 7*b30d1939SAndy Fiddaman# 8*b30d1939SAndy Fiddaman# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*b30d1939SAndy Fiddaman# or http://www.opensolaris.org/os/licensing. 10*b30d1939SAndy Fiddaman# See the License for the specific language governing permissions 11*b30d1939SAndy Fiddaman# and limitations under the License. 12*b30d1939SAndy Fiddaman# 13*b30d1939SAndy Fiddaman# When distributing Covered Code, include this CDDL HEADER in each 14*b30d1939SAndy Fiddaman# file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*b30d1939SAndy Fiddaman# If applicable, add the following below this CDDL HEADER, with the 16*b30d1939SAndy Fiddaman# fields enclosed by brackets "[]" replaced with your own identifying 17*b30d1939SAndy Fiddaman# information: Portions Copyright [yyyy] [name of copyright owner] 18*b30d1939SAndy Fiddaman# 19*b30d1939SAndy Fiddaman# CDDL HEADER END 20*b30d1939SAndy Fiddaman# 21*b30d1939SAndy Fiddaman 22*b30d1939SAndy Fiddaman# 23*b30d1939SAndy Fiddaman# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24*b30d1939SAndy Fiddaman# 25*b30d1939SAndy Fiddaman 26*b30d1939SAndy Fiddaman# 27*b30d1939SAndy Fiddaman# This test checks whether the AST "cut" utility's "-d" option 28*b30d1939SAndy Fiddaman# works with multibyte characters 29*b30d1939SAndy Fiddaman# 30*b30d1939SAndy Fiddaman# This was reported as CR #6904575 ("cut -d with multibyte character no longer works"): 31*b30d1939SAndy Fiddaman# ------------ snip ------------ 32*b30d1939SAndy Fiddaman# cut -d with multibyte char no longer work correctly. 33*b30d1939SAndy Fiddaman# 34*b30d1939SAndy Fiddaman# $ echo $LANG 35*b30d1939SAndy Fiddaman# ja 36*b30d1939SAndy Fiddaman# $ od -tx1 mb.eucjp 37*b30d1939SAndy Fiddaman# 0000000 a4 a2 a4 a4 a4 a4 a4 a6 a4 a8 0a 38*b30d1939SAndy Fiddaman# 0000013 39*b30d1939SAndy Fiddaman# $ od -tx1 delim 40*b30d1939SAndy Fiddaman# 0000000 a4 a4 0a 41*b30d1939SAndy Fiddaman# 0000003 42*b30d1939SAndy Fiddaman# $ wc -m mb.eucjp 43*b30d1939SAndy Fiddaman# 6 mb.eucjp 44*b30d1939SAndy Fiddaman# 45*b30d1939SAndy Fiddaman# It has 5 characters (2byte each). 46*b30d1939SAndy Fiddaman# 47*b30d1939SAndy Fiddaman# $ /usr/bin/cut -d `cat delim` -f1 mb.eucjp | od -tx1 48*b30d1939SAndy Fiddaman# 0000000 0a 49*b30d1939SAndy Fiddaman# 0000001 50*b30d1939SAndy Fiddaman# 51*b30d1939SAndy Fiddaman# correct output is 52*b30d1939SAndy Fiddaman# 53*b30d1939SAndy Fiddaman# 0000000 a4 a2 0a 54*b30d1939SAndy Fiddaman# 0000003 55*b30d1939SAndy Fiddaman# 56*b30d1939SAndy Fiddaman# files are attached. 57*b30d1939SAndy Fiddaman# ------------ snip ------------ 58*b30d1939SAndy Fiddaman# 59*b30d1939SAndy Fiddaman 60*b30d1939SAndy Fiddaman# test setup 61*b30d1939SAndy Fiddamanfunction err_exit 62*b30d1939SAndy Fiddaman{ 63*b30d1939SAndy Fiddaman print -u2 -n "\t" 64*b30d1939SAndy Fiddaman print -u2 -r ${Command}[$1]: "${@:2}" 65*b30d1939SAndy Fiddaman (( Errors < 127 && Errors++ )) 66*b30d1939SAndy Fiddaman} 67*b30d1939SAndy Fiddamanalias err_exit='err_exit $LINENO' 68*b30d1939SAndy Fiddaman 69*b30d1939SAndy Fiddamanset -o nounset 70*b30d1939SAndy FiddamanCommand=${0##*/} 71*b30d1939SAndy Fiddamaninteger Errors=0 72*b30d1939SAndy Fiddaman 73*b30d1939SAndy Fiddamantypeset ocwd 74*b30d1939SAndy Fiddamantypeset tmpdir 75*b30d1939SAndy Fiddamantypeset out 76*b30d1939SAndy Fiddaman 77*b30d1939SAndy Fiddaman# create temporary test directory 78*b30d1939SAndy Fiddamanocwd="$PWD" 79*b30d1939SAndy Fiddamantmpdir="$(mktemp -t -d "test_sun_solaris_cr_6904575_cut_-d_with_multibyte_character_no_longer_works.XXXXXXXX")" || err_exit "Cannot create temporary directory" 80*b30d1939SAndy Fiddaman 81*b30d1939SAndy Fiddamancd "${tmpdir}" || { err_exit "cd ${tmpdir} failed." ; exit $((Errors)) ; } 82*b30d1939SAndy Fiddaman 83*b30d1939SAndy Fiddaman 84*b30d1939SAndy Fiddaman# run tests 85*b30d1939SAndy Fiddaman 86*b30d1939SAndy Fiddaman 87*b30d1939SAndy Fiddamanfunction test1 88*b30d1939SAndy Fiddaman{ 89*b30d1939SAndy Fiddaman typeset cut_cmd="$1" 90*b30d1939SAndy Fiddaman typeset testid 91*b30d1939SAndy Fiddaman typeset out 92*b30d1939SAndy Fiddaman typeset testname 93*b30d1939SAndy Fiddaman compound saved_locale 94*b30d1939SAndy Fiddaman 95*b30d1939SAndy Fiddaman # save locale information 96*b30d1939SAndy Fiddaman [[ -v LC_ALL ]] && saved_locale.LC_ALL="${LC_ALL}" 97*b30d1939SAndy Fiddaman [[ -v LC_CTYPE ]] && saved_locale.LC_CTYPE="${LC_CTYPE}" 98*b30d1939SAndy Fiddaman [[ -v LANG ]] && saved_locale.LANG="${LANG}" 99*b30d1939SAndy Fiddaman 100*b30d1939SAndy Fiddaman compound -r -a testcases=( 101*b30d1939SAndy Fiddaman ( 102*b30d1939SAndy Fiddaman typeset name="ascii_plain" 103*b30d1939SAndy Fiddaman typeset locale="C" 104*b30d1939SAndy Fiddaman typeset input_format='abcdefg' 105*b30d1939SAndy Fiddaman typeset -a cut_args_format=( "-f1" "-d" "e" ) 106*b30d1939SAndy Fiddaman typeset output_format='abcd' 107*b30d1939SAndy Fiddaman ) 108*b30d1939SAndy Fiddaman ( 109*b30d1939SAndy Fiddaman typeset name="unicode_plain" 110*b30d1939SAndy Fiddaman typeset locale="<unicode>" 111*b30d1939SAndy Fiddaman typeset input_format='abcd\u[20ac]fg' 112*b30d1939SAndy Fiddaman typeset -a cut_args_format=( '-f1' '-d' '\u[20ac]' ) 113*b30d1939SAndy Fiddaman typeset output_format='abcd' 114*b30d1939SAndy Fiddaman ) 115*b30d1939SAndy Fiddaman ( 116*b30d1939SAndy Fiddaman typeset name="unicode_plain2" 117*b30d1939SAndy Fiddaman typeset locale="<unicode>" 118*b30d1939SAndy Fiddaman typeset input_format='abcd\u[20ac]fg' 119*b30d1939SAndy Fiddaman typeset -a cut_args_format=( '-f1' '-d' 'f' ) 120*b30d1939SAndy Fiddaman typeset output_format='abcd\u[20ac]' 121*b30d1939SAndy Fiddaman ) 122*b30d1939SAndy Fiddaman ) 123*b30d1939SAndy Fiddaman 124*b30d1939SAndy Fiddaman for testid in "${!testcases[@]}" ; do 125*b30d1939SAndy Fiddaman nameref tc=testcases[${testid}] 126*b30d1939SAndy Fiddaman testname="${cut_cmd}/${tc.name}" 127*b30d1939SAndy Fiddaman 128*b30d1939SAndy Fiddaman if [[ "${tc.locale}" == "<unicode>" ]] ; then 129*b30d1939SAndy Fiddaman if [[ ! -v LC_ALL || $LC_ALL != .*.UTF-8 ]]; then 130*b30d1939SAndy Fiddaman export LC_ALL='en_US.UTF-8' 131*b30d1939SAndy Fiddaman fi 132*b30d1939SAndy Fiddaman else 133*b30d1939SAndy Fiddaman export LC_ALL="${tc.locale}" 134*b30d1939SAndy Fiddaman fi 135*b30d1939SAndy Fiddaman 136*b30d1939SAndy Fiddaman # build "cut_args" array with multibyte characters in the current locale 137*b30d1939SAndy Fiddaman typeset -a cut_args 138*b30d1939SAndy Fiddaman integer arg_index 139*b30d1939SAndy Fiddaman for arg_index in "${!tc.cut_args_format[@]}" ; do 140*b30d1939SAndy Fiddaman cut_args+=( "$( printf -- "${tc.cut_args_format[arg_index]}" )" ) 141*b30d1939SAndy Fiddaman done 142*b30d1939SAndy Fiddaman 143*b30d1939SAndy Fiddaman typeset output_format="$( printf -- "${tc.output_format}" )" 144*b30d1939SAndy Fiddaman 145*b30d1939SAndy Fiddaman #printf "args=|%q|\n" "${cut_args[@]}" 146*b30d1939SAndy Fiddaman 147*b30d1939SAndy Fiddaman out="$(printf "${tc.input_format}" | ${SHELL} -c "${cut_cmd} \"\$@\"" dummy "${cut_args[@]}" 2>&1)" || err_exit "${testname}: Command returned exit code $?" 148*b30d1939SAndy Fiddaman [[ "${out}" == ${output_format} ]] || err_exit "${testname}: Expected match for $(printf "%q\n" "${output_format}"), got $(printf "%q\n" "${out}")" 149*b30d1939SAndy Fiddaman 150*b30d1939SAndy Fiddaman # cleanup and restore locale settings 151*b30d1939SAndy Fiddaman unset cut_args arg_index 152*b30d1939SAndy Fiddaman [[ -v saved_locale.LC_ALL ]] && LC_ALL="${saved_locale.LC_ALL}" || unset LC_ALL 153*b30d1939SAndy Fiddaman [[ -v saved_locale.LC_CTYPE ]] && LC_CTYPE="${saved_locale.LC_CTYPE}" || unset LC_CTYPE 154*b30d1939SAndy Fiddaman [[ -v saved_locale.LANG ]] && LANG="${saved_locale.LANG}" || unset LANG 155*b30d1939SAndy Fiddaman done 156*b30d1939SAndy Fiddaman 157*b30d1939SAndy Fiddaman return 0 158*b30d1939SAndy Fiddaman} 159*b30d1939SAndy Fiddaman 160*b30d1939SAndy Fiddaman 161*b30d1939SAndy Fiddamanfunction test2 162*b30d1939SAndy Fiddaman{ 163*b30d1939SAndy Fiddaman typeset cutcmd=$1 164*b30d1939SAndy Fiddaman typeset testname="${cutcmd}" 165*b30d1939SAndy Fiddaman typeset out 166*b30d1939SAndy Fiddaman 167*b30d1939SAndy Fiddaman # create files 168*b30d1939SAndy Fiddaman printf "\xa4\xa2\xa4\xa4\xa4\xa4\xa4\xa6\xa4\xa8\x0a" >"mb.eucjp" 169*b30d1939SAndy Fiddaman printf "\xa4\xa4\x0a" >"delim" 170*b30d1939SAndy Fiddaman 171*b30d1939SAndy Fiddaman # run test 172*b30d1939SAndy Fiddaman out=$( LC_ALL=ja_JP.eucJP ${SHELL} -o pipefail -o errexit -c '$1 -d $(cat delim) -f1 "mb.eucjp" | od -tx1' dummy "${cutcmd}" 2>&1 ) || err_exit "${testname}: Test failed with exit code $?" 173*b30d1939SAndy Fiddaman [[ "${out}" == $'0000000 a4 a2 0a\n0000003' ]] || err_exit "${testname}: Expected \$'0000000 a4 a2 0a\n0000003', got $(printf "%q\n" "${out}")" 174*b30d1939SAndy Fiddaman 175*b30d1939SAndy Fiddaman # cleanup 176*b30d1939SAndy Fiddaman rm "mb.eucjp" "delim" 177*b30d1939SAndy Fiddaman 178*b30d1939SAndy Fiddaman return 0 179*b30d1939SAndy Fiddaman} 180*b30d1939SAndy Fiddaman 181*b30d1939SAndy Fiddaman#for cmd in "/usr/bin/cut" "cut" ; do 182*b30d1939SAndy Fiddamanfor cmd in "cut" ; do 183*b30d1939SAndy Fiddaman test1 "${cmd}" 184*b30d1939SAndy Fiddaman test2 "${cmd}" 185*b30d1939SAndy Fiddamandone 186*b30d1939SAndy Fiddaman 187*b30d1939SAndy Fiddaman 188*b30d1939SAndy Fiddaman 189*b30d1939SAndy Fiddamancd "${ocwd}" 190*b30d1939SAndy Fiddamanrmdir "${tmpdir}" || err_exit "Cannot remove temporary directory ${tmpdir}". 191*b30d1939SAndy Fiddaman 192*b30d1939SAndy Fiddaman# tests done 193*b30d1939SAndy Fiddamanexit $((Errors)) 194