1#
2# CDDL HEADER START
3#
4# The contents of this file are subject to the terms of the
5# Common Development and Distribution License (the "License").
6# You may not use this file except in compliance with the License.
7#
8# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9# or http://www.opensolaris.org/os/licensing.
10# See the License for the specific language governing permissions
11# and limitations under the License.
12#
13# When distributing Covered Code, include this CDDL HEADER in each
14# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15# If applicable, add the following below this CDDL HEADER, with the
16# fields enclosed by brackets "[]" replaced with your own identifying
17# information: Portions Copyright [yyyy] [name of copyright owner]
18#
19# CDDL HEADER END
20#
21
22#
23# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24#
25
26#
27# This test checks whether "wc" builtin counts the number of bytes
28# and multibyte characters in different locales correctly.
29#
30# This was reported as CR #6904557 ("wc no longer counts number of
31# bytes correctly"):
32# ------------ snip ------------
33# wc no longer count bytes.
34#
35# $ echo $LANG
36# en_US.UTF-8
37# $ ls -l mb.utf8
38# -rw-r--r--   1 nakanon  staff          7 Nov  2 14:06 mb.utf8
39# $ wc mb.utf8
40#        1       1       4 mb.utf8
41# $
42#
43# mb.utf8 is attached.
44#
45# Man page says:
46#
47#      If no option is  specified,  the  default  is  -lwc  (counts
48#      lines, words, and bytes.)
49#
50# SUS says:
51# http://www.opengroup.org/onlinepubs/000095399/utilities/wc.html
52#
53# By default, the standard output shall contain an entry for each
54# input file of the form:
55#
56# "%d %d %d %s\n", <newlines>, <words>, <bytes>, <file>
57#
58# If the -m option is specified, the number of characters shall
59# replace the <bytes> field in this format.
60# ------------ snip ------------
61#
62
63# test setup
64function err_exit
65{
66	print -u2 -n "\t"
67	print -u2 -r ${Command}[$1]: "${@:2}"
68	(( Errors < 127 && Errors++ ))
69}
70alias err_exit='err_exit $LINENO'
71
72set -o nounset
73Command=${0##*/}
74integer Errors=0
75
76typeset ocwd
77typeset tmpdir
78typeset out
79
80# create temporary test directory
81ocwd="$PWD"
82tmpdir="$(mktemp -t -d "test_sun_solaris_cr_6904557_wc_no_longer_counts_number_of_bytes_correctly.XXXXXXXX")" || err_exit "Cannot create temporary directory"
83
84cd "${tmpdir}" || { err_exit "cd ${tmpdir} failed." ; exit $((Errors)) ; }
85
86
87# run tests
88
89function test1
90{
91	typeset wc_cmd="$1"
92	typeset testid
93	typeset out
94	typeset testname
95	compound saved_locale
96
97	# save locale information
98	[[ -v LC_ALL	]] && saved_locale.LC_ALL="${LC_ALL}"
99	[[ -v LC_CTYPE	]] && saved_locale.LC_CTYPE="${LC_CTYPE}"
100	[[ -v LANG	]] && saved_locale.LANG="${LANG}"
101
102	compound -r -a testcases=(
103		(
104			typeset name="unicode_plain"
105			typeset locale="<unicode>"
106			typeset input_format='\xc3\xa1\xc3\xa2\xc3\xa3\x0a'
107			typeset output_pattern='~(Elr)[[:space:][:blank:]]*1[[:space:][:blank:]]*1[[:space:][:blank:]]*7'
108			typeset -a wc_args=( )
109		)
110		(
111			typeset name="unicode_clw"
112			typeset locale="<unicode>"
113			typeset input_format='\xc3\xa1\xc3\xa2\xc3\xa3\x0a'
114			typeset output_pattern='~(Elr)[[:space:][:blank:]]*1[[:space:][:blank:]]*1[[:space:][:blank:]]*7'
115			typeset -a wc_args=( "-c" "-l" "-w" )
116		)
117		(
118			typeset name="unicode_widechars_lines_words"
119			typeset locale="<unicode>"
120			typeset input_format='\xc3\xa1\xc3\xa2\xc3\xa3\x0a'
121			typeset output_pattern='~(Elr)[[:space:][:blank:]]*1[[:space:][:blank:]]*1[[:space:][:blank:]]*4'
122			typeset -a wc_args=( "-C" "-l" "-w" )
123		)
124		(
125			typeset name="ja_JP.eucJP_plain"
126			typeset locale="ja_JP.eucJP"
127			typeset input_format='\x74\x32\xa1\xf7\x66\x31\x0a'
128			typeset output_pattern='~(Elr)[[:space:][:blank:]]*1[[:space:][:blank:]]*1[[:space:][:blank:]]*7'
129			typeset -a wc_args=( )
130		)
131		(
132			typeset name="ja_JP.eucJP_widechars_lines_words"
133			typeset locale="ja_JP.eucJP"
134			typeset input_format='\x74\x32\xa1\xf7\x66\x31\x0a'
135			typeset output_pattern='~(Elr)[[:space:][:blank:]]*1[[:space:][:blank:]]*1[[:space:][:blank:]]*6'
136			typeset -a wc_args=( "-C" "-l" "-w" )
137		)
138	)
139
140	for testid in "${!testcases[@]}" ; do
141        	nameref tc=testcases[${testid}]
142		testname="${wc_cmd}/${tc.name}"
143
144		if [[ "${tc.locale}" == "<unicode>" ]] ; then
145			if [[ "$LC_ALL" != *.UTF-8 ]] ; then
146				export LC_ALL='en_US.UTF-8'
147			fi
148		else
149			export LC_ALL="${tc.locale}"
150		fi
151
152		out="$(printf "${tc.input_format}" | ${SHELL} -c "${wc_cmd} \"\$@\"" dummy "${tc.wc_args[@]}" 2>&1)" || err_exit "${testname}: Command returned exit code $?"
153		[[ "${out}" == ${tc.output_pattern} ]] || err_exit "${testname}: Expected match for $(printf "%q\n" "${tc.output_pattern}"), got $(printf "%q\n" "${out}")"
154
155		# restore locale settings
156		[[ -v saved_locale.LC_ALL	]] && LC_ALL="${saved_locale.LC_ALL}" || unset LC_ALL
157		[[ -v saved_locale.LC_CTYPE	]] && LC_CTYPE="${saved_locale.LC_CTYPE}" || unset LC_CTYPE
158		[[ -v saved_locale.LANG		]] && LANG="${saved_locale.LANG}" || unset LANG
159	done
160
161	return 0
162}
163
164#for cmd in "wc" "/usr/bin/wc" ; do
165for cmd in "wc" ; do
166	test1 "${cmd}"
167done
168
169
170cd "${ocwd}"
171rmdir "${tmpdir}" || err_exit "Cannot remove temporary directory ${tmpdir}".
172
173# tests done
174exit $((Errors))
175