1*b30d1939SAndy Fiddaman#
2*b30d1939SAndy Fiddaman# CDDL HEADER START
3*b30d1939SAndy Fiddaman#
4*b30d1939SAndy Fiddaman# The contents of this file are subject to the terms of the
5*b30d1939SAndy Fiddaman# Common Development and Distribution License (the "License").
6*b30d1939SAndy Fiddaman# You may not use this file except in compliance with the License.
7*b30d1939SAndy Fiddaman#
8*b30d1939SAndy Fiddaman# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*b30d1939SAndy Fiddaman# or http://www.opensolaris.org/os/licensing.
10*b30d1939SAndy Fiddaman# See the License for the specific language governing permissions
11*b30d1939SAndy Fiddaman# and limitations under the License.
12*b30d1939SAndy Fiddaman#
13*b30d1939SAndy Fiddaman# When distributing Covered Code, include this CDDL HEADER in each
14*b30d1939SAndy Fiddaman# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*b30d1939SAndy Fiddaman# If applicable, add the following below this CDDL HEADER, with the
16*b30d1939SAndy Fiddaman# fields enclosed by brackets "[]" replaced with your own identifying
17*b30d1939SAndy Fiddaman# information: Portions Copyright [yyyy] [name of copyright owner]
18*b30d1939SAndy Fiddaman#
19*b30d1939SAndy Fiddaman# CDDL HEADER END
20*b30d1939SAndy Fiddaman#
21*b30d1939SAndy Fiddaman
22*b30d1939SAndy Fiddaman#
23*b30d1939SAndy Fiddaman# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24*b30d1939SAndy Fiddaman#
25*b30d1939SAndy Fiddaman
26*b30d1939SAndy Fiddaman#
27*b30d1939SAndy Fiddaman# This test checks whether the AST "cut" utility's "-d" option
28*b30d1939SAndy Fiddaman# works with multibyte characters
29*b30d1939SAndy Fiddaman#
30*b30d1939SAndy Fiddaman# This was reported as CR #6904575 ("cut -d with multibyte character no longer works"):
31*b30d1939SAndy Fiddaman# ------------ snip ------------
32*b30d1939SAndy Fiddaman# cut -d with multibyte char no longer work correctly.
33*b30d1939SAndy Fiddaman#
34*b30d1939SAndy Fiddaman# $ echo $LANG
35*b30d1939SAndy Fiddaman# ja
36*b30d1939SAndy Fiddaman# $ od -tx1 mb.eucjp
37*b30d1939SAndy Fiddaman# 0000000 a4 a2 a4 a4 a4 a4 a4 a6 a4 a8 0a
38*b30d1939SAndy Fiddaman# 0000013
39*b30d1939SAndy Fiddaman# $ od -tx1 delim
40*b30d1939SAndy Fiddaman# 0000000 a4 a4 0a
41*b30d1939SAndy Fiddaman# 0000003
42*b30d1939SAndy Fiddaman# $ wc -m mb.eucjp
43*b30d1939SAndy Fiddaman#        6 mb.eucjp
44*b30d1939SAndy Fiddaman#
45*b30d1939SAndy Fiddaman# It has 5 characters (2byte each).
46*b30d1939SAndy Fiddaman#
47*b30d1939SAndy Fiddaman# $ /usr/bin/cut -d `cat delim` -f1 mb.eucjp | od -tx1
48*b30d1939SAndy Fiddaman# 0000000 0a
49*b30d1939SAndy Fiddaman# 0000001
50*b30d1939SAndy Fiddaman#
51*b30d1939SAndy Fiddaman# correct output is
52*b30d1939SAndy Fiddaman#
53*b30d1939SAndy Fiddaman# 0000000 a4 a2 0a
54*b30d1939SAndy Fiddaman# 0000003
55*b30d1939SAndy Fiddaman#
56*b30d1939SAndy Fiddaman# files are attached.
57*b30d1939SAndy Fiddaman# ------------ snip ------------
58*b30d1939SAndy Fiddaman#
59*b30d1939SAndy Fiddaman
60*b30d1939SAndy Fiddaman# test setup
61*b30d1939SAndy Fiddamanfunction err_exit
62*b30d1939SAndy Fiddaman{
63*b30d1939SAndy Fiddaman	print -u2 -n "\t"
64*b30d1939SAndy Fiddaman	print -u2 -r ${Command}[$1]: "${@:2}"
65*b30d1939SAndy Fiddaman	(( Errors < 127 && Errors++ ))
66*b30d1939SAndy Fiddaman}
67*b30d1939SAndy Fiddamanalias err_exit='err_exit $LINENO'
68*b30d1939SAndy Fiddaman
69*b30d1939SAndy Fiddamanset -o nounset
70*b30d1939SAndy FiddamanCommand=${0##*/}
71*b30d1939SAndy Fiddamaninteger Errors=0
72*b30d1939SAndy Fiddaman
73*b30d1939SAndy Fiddamantypeset ocwd
74*b30d1939SAndy Fiddamantypeset tmpdir
75*b30d1939SAndy Fiddamantypeset out
76*b30d1939SAndy Fiddaman
77*b30d1939SAndy Fiddaman# create temporary test directory
78*b30d1939SAndy Fiddamanocwd="$PWD"
79*b30d1939SAndy Fiddamantmpdir="$(mktemp -t -d "test_sun_solaris_cr_6904575_cut_-d_with_multibyte_character_no_longer_works.XXXXXXXX")" || err_exit "Cannot create temporary directory"
80*b30d1939SAndy Fiddaman
81*b30d1939SAndy Fiddamancd "${tmpdir}" || { err_exit "cd ${tmpdir} failed." ; exit $((Errors)) ; }
82*b30d1939SAndy Fiddaman
83*b30d1939SAndy Fiddaman
84*b30d1939SAndy Fiddaman# run tests
85*b30d1939SAndy Fiddaman
86*b30d1939SAndy Fiddaman
87*b30d1939SAndy Fiddamanfunction test1
88*b30d1939SAndy Fiddaman{
89*b30d1939SAndy Fiddaman	typeset cut_cmd="$1"
90*b30d1939SAndy Fiddaman	typeset testid
91*b30d1939SAndy Fiddaman	typeset out
92*b30d1939SAndy Fiddaman	typeset testname
93*b30d1939SAndy Fiddaman	compound saved_locale
94*b30d1939SAndy Fiddaman
95*b30d1939SAndy Fiddaman	# save locale information
96*b30d1939SAndy Fiddaman	[[ -v LC_ALL	]] && saved_locale.LC_ALL="${LC_ALL}"
97*b30d1939SAndy Fiddaman	[[ -v LC_CTYPE	]] && saved_locale.LC_CTYPE="${LC_CTYPE}"
98*b30d1939SAndy Fiddaman	[[ -v LANG	]] && saved_locale.LANG="${LANG}"
99*b30d1939SAndy Fiddaman
100*b30d1939SAndy Fiddaman	compound -r -a testcases=(
101*b30d1939SAndy Fiddaman		(
102*b30d1939SAndy Fiddaman			typeset name="ascii_plain"
103*b30d1939SAndy Fiddaman			typeset locale="C"
104*b30d1939SAndy Fiddaman			typeset input_format='abcdefg'
105*b30d1939SAndy Fiddaman			typeset -a cut_args_format=( "-f1" "-d" "e" )
106*b30d1939SAndy Fiddaman			typeset output_format='abcd'
107*b30d1939SAndy Fiddaman		)
108*b30d1939SAndy Fiddaman		(
109*b30d1939SAndy Fiddaman			typeset name="unicode_plain"
110*b30d1939SAndy Fiddaman			typeset locale="<unicode>"
111*b30d1939SAndy Fiddaman			typeset input_format='abcd\u[20ac]fg'
112*b30d1939SAndy Fiddaman			typeset -a cut_args_format=( '-f1' '-d' '\u[20ac]' )
113*b30d1939SAndy Fiddaman			typeset output_format='abcd'
114*b30d1939SAndy Fiddaman		)
115*b30d1939SAndy Fiddaman		(
116*b30d1939SAndy Fiddaman			typeset name="unicode_plain2"
117*b30d1939SAndy Fiddaman			typeset locale="<unicode>"
118*b30d1939SAndy Fiddaman			typeset input_format='abcd\u[20ac]fg'
119*b30d1939SAndy Fiddaman			typeset -a cut_args_format=( '-f1' '-d' 'f' )
120*b30d1939SAndy Fiddaman			typeset output_format='abcd\u[20ac]'
121*b30d1939SAndy Fiddaman		)
122*b30d1939SAndy Fiddaman	)
123*b30d1939SAndy Fiddaman
124*b30d1939SAndy Fiddaman	for testid in "${!testcases[@]}" ; do
125*b30d1939SAndy Fiddaman        	nameref tc=testcases[${testid}]
126*b30d1939SAndy Fiddaman		testname="${cut_cmd}/${tc.name}"
127*b30d1939SAndy Fiddaman
128*b30d1939SAndy Fiddaman		if [[ "${tc.locale}" == "<unicode>" ]] ; then
129*b30d1939SAndy Fiddaman			if [[ ! -v LC_ALL || $LC_ALL != .*.UTF-8 ]]; then
130*b30d1939SAndy Fiddaman				export LC_ALL='en_US.UTF-8'
131*b30d1939SAndy Fiddaman			fi
132*b30d1939SAndy Fiddaman		else
133*b30d1939SAndy Fiddaman			export LC_ALL="${tc.locale}"
134*b30d1939SAndy Fiddaman		fi
135*b30d1939SAndy Fiddaman
136*b30d1939SAndy Fiddaman		# build "cut_args" array with multibyte characters in the current locale
137*b30d1939SAndy Fiddaman		typeset -a cut_args
138*b30d1939SAndy Fiddaman		integer arg_index
139*b30d1939SAndy Fiddaman		for arg_index in "${!tc.cut_args_format[@]}" ; do
140*b30d1939SAndy Fiddaman			cut_args+=( "$( printf -- "${tc.cut_args_format[arg_index]}" )" )
141*b30d1939SAndy Fiddaman		done
142*b30d1939SAndy Fiddaman
143*b30d1939SAndy Fiddaman		typeset output_format="$( printf -- "${tc.output_format}" )"
144*b30d1939SAndy Fiddaman
145*b30d1939SAndy Fiddaman		#printf "args=|%q|\n" "${cut_args[@]}"
146*b30d1939SAndy Fiddaman
147*b30d1939SAndy Fiddaman		out="$(printf "${tc.input_format}" | ${SHELL} -c "${cut_cmd} \"\$@\"" dummy "${cut_args[@]}" 2>&1)" || err_exit "${testname}: Command returned exit code $?"
148*b30d1939SAndy Fiddaman		[[ "${out}" == ${output_format} ]] || err_exit "${testname}: Expected match for $(printf "%q\n" "${output_format}"), got $(printf "%q\n" "${out}")"
149*b30d1939SAndy Fiddaman
150*b30d1939SAndy Fiddaman		# cleanup and restore locale settings
151*b30d1939SAndy Fiddaman		unset cut_args arg_index
152*b30d1939SAndy Fiddaman		[[ -v saved_locale.LC_ALL	]] && LC_ALL="${saved_locale.LC_ALL}" || unset LC_ALL
153*b30d1939SAndy Fiddaman		[[ -v saved_locale.LC_CTYPE	]] && LC_CTYPE="${saved_locale.LC_CTYPE}" || unset LC_CTYPE
154*b30d1939SAndy Fiddaman		[[ -v saved_locale.LANG		]] && LANG="${saved_locale.LANG}" || unset LANG
155*b30d1939SAndy Fiddaman	done
156*b30d1939SAndy Fiddaman
157*b30d1939SAndy Fiddaman	return 0
158*b30d1939SAndy Fiddaman}
159*b30d1939SAndy Fiddaman
160*b30d1939SAndy Fiddaman
161*b30d1939SAndy Fiddamanfunction test2
162*b30d1939SAndy Fiddaman{
163*b30d1939SAndy Fiddaman	typeset cutcmd=$1
164*b30d1939SAndy Fiddaman	typeset testname="${cutcmd}"
165*b30d1939SAndy Fiddaman	typeset out
166*b30d1939SAndy Fiddaman
167*b30d1939SAndy Fiddaman	# create files
168*b30d1939SAndy Fiddaman	printf "\xa4\xa2\xa4\xa4\xa4\xa4\xa4\xa6\xa4\xa8\x0a" >"mb.eucjp"
169*b30d1939SAndy Fiddaman	printf "\xa4\xa4\x0a" >"delim"
170*b30d1939SAndy Fiddaman
171*b30d1939SAndy Fiddaman	# run test
172*b30d1939SAndy Fiddaman	out=$( LC_ALL=ja_JP.eucJP ${SHELL} -o pipefail -o errexit -c '$1 -d $(cat delim) -f1 "mb.eucjp" | od -tx1' dummy "${cutcmd}" 2>&1 ) || err_exit "${testname}: Test failed with exit code $?"
173*b30d1939SAndy Fiddaman	[[ "${out}" == $'0000000 a4 a2 0a\n0000003' ]] || err_exit "${testname}: Expected \$'0000000 a4 a2 0a\n0000003', got $(printf "%q\n" "${out}")"
174*b30d1939SAndy Fiddaman
175*b30d1939SAndy Fiddaman	# cleanup
176*b30d1939SAndy Fiddaman	rm "mb.eucjp" "delim"
177*b30d1939SAndy Fiddaman
178*b30d1939SAndy Fiddaman	return 0
179*b30d1939SAndy Fiddaman}
180*b30d1939SAndy Fiddaman
181*b30d1939SAndy Fiddaman#for cmd in "/usr/bin/cut" "cut" ; do
182*b30d1939SAndy Fiddamanfor cmd in "cut" ; do
183*b30d1939SAndy Fiddaman	test1 "${cmd}"
184*b30d1939SAndy Fiddaman	test2 "${cmd}"
185*b30d1939SAndy Fiddamandone
186*b30d1939SAndy Fiddaman
187*b30d1939SAndy Fiddaman
188*b30d1939SAndy Fiddaman
189*b30d1939SAndy Fiddamancd "${ocwd}"
190*b30d1939SAndy Fiddamanrmdir "${tmpdir}" || err_exit "Cannot remove temporary directory ${tmpdir}".
191*b30d1939SAndy Fiddaman
192*b30d1939SAndy Fiddaman# tests done
193*b30d1939SAndy Fiddamanexit $((Errors))
194