1# 2# CDDL HEADER START 3# 4# The contents of this file are subject to the terms of the 5# Common Development and Distribution License (the "License"). 6# You may not use this file except in compliance with the License. 7# 8# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9# or http://www.opensolaris.org/os/licensing. 10# See the License for the specific language governing permissions 11# and limitations under the License. 12# 13# When distributing Covered Code, include this CDDL HEADER in each 14# file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15# If applicable, add the following below this CDDL HEADER, with the 16# fields enclosed by brackets "[]" replaced with your own identifying 17# information: Portions Copyright [yyyy] [name of copyright owner] 18# 19# CDDL HEADER END 20# 21 22# 23# Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24# 25 26# 27# This test checks whether "wc" builtin counts the number of bytes 28# and multibyte characters in different locales correctly. 29# 30# This was reported as CR #6904557 ("wc no longer counts number of 31# bytes correctly"): 32# ------------ snip ------------ 33# wc no longer count bytes. 34# 35# $ echo $LANG 36# en_US.UTF-8 37# $ ls -l mb.utf8 38# -rw-r--r-- 1 nakanon staff 7 Nov 2 14:06 mb.utf8 39# $ wc mb.utf8 40# 1 1 4 mb.utf8 41# $ 42# 43# mb.utf8 is attached. 44# 45# Man page says: 46# 47# If no option is specified, the default is -lwc (counts 48# lines, words, and bytes.) 49# 50# SUS says: 51# http://www.opengroup.org/onlinepubs/000095399/utilities/wc.html 52# 53# By default, the standard output shall contain an entry for each 54# input file of the form: 55# 56# "%d %d %d %s\n", <newlines>, <words>, <bytes>, <file> 57# 58# If the -m option is specified, the number of characters shall 59# replace the <bytes> field in this format. 60# ------------ snip ------------ 61# 62 63# test setup 64function err_exit 65{ 66 print -u2 -n "\t" 67 print -u2 -r ${Command}[$1]: "${@:2}" 68 (( Errors < 127 && Errors++ )) 69} 70alias err_exit='err_exit $LINENO' 71 72set -o nounset 73Command=${0##*/} 74integer Errors=0 75 76typeset ocwd 77typeset tmpdir 78typeset out 79 80# create temporary test directory 81ocwd="$PWD" 82tmpdir="$(mktemp -t -d "test_sun_solaris_cr_6904557_wc_no_longer_counts_number_of_bytes_correctly.XXXXXXXX")" || err_exit "Cannot create temporary directory" 83 84cd "${tmpdir}" || { err_exit "cd ${tmpdir} failed." ; exit $((Errors)) ; } 85 86 87# run tests 88 89function test1 90{ 91 typeset wc_cmd="$1" 92 typeset testid 93 typeset out 94 typeset testname 95 compound saved_locale 96 97 # save locale information 98 [[ -v LC_ALL ]] && saved_locale.LC_ALL="${LC_ALL}" 99 [[ -v LC_CTYPE ]] && saved_locale.LC_CTYPE="${LC_CTYPE}" 100 [[ -v LANG ]] && saved_locale.LANG="${LANG}" 101 102 compound -r -a testcases=( 103 ( 104 typeset name="unicode_plain" 105 typeset locale="<unicode>" 106 typeset input_format='\xc3\xa1\xc3\xa2\xc3\xa3\x0a' 107 typeset output_pattern='~(Elr)[[:space:][:blank:]]*1[[:space:][:blank:]]*1[[:space:][:blank:]]*7' 108 typeset -a wc_args=( ) 109 ) 110 ( 111 typeset name="unicode_clw" 112 typeset locale="<unicode>" 113 typeset input_format='\xc3\xa1\xc3\xa2\xc3\xa3\x0a' 114 typeset output_pattern='~(Elr)[[:space:][:blank:]]*1[[:space:][:blank:]]*1[[:space:][:blank:]]*7' 115 typeset -a wc_args=( "-c" "-l" "-w" ) 116 ) 117 ( 118 typeset name="unicode_widechars_lines_words" 119 typeset locale="<unicode>" 120 typeset input_format='\xc3\xa1\xc3\xa2\xc3\xa3\x0a' 121 typeset output_pattern='~(Elr)[[:space:][:blank:]]*1[[:space:][:blank:]]*1[[:space:][:blank:]]*4' 122 typeset -a wc_args=( "-C" "-l" "-w" ) 123 ) 124 ( 125 typeset name="ja_JP.eucJP_plain" 126 typeset locale="ja_JP.eucJP" 127 typeset input_format='\x74\x32\xa1\xf7\x66\x31\x0a' 128 typeset output_pattern='~(Elr)[[:space:][:blank:]]*1[[:space:][:blank:]]*1[[:space:][:blank:]]*7' 129 typeset -a wc_args=( ) 130 ) 131 ( 132 typeset name="ja_JP.eucJP_widechars_lines_words" 133 typeset locale="ja_JP.eucJP" 134 typeset input_format='\x74\x32\xa1\xf7\x66\x31\x0a' 135 typeset output_pattern='~(Elr)[[:space:][:blank:]]*1[[:space:][:blank:]]*1[[:space:][:blank:]]*6' 136 typeset -a wc_args=( "-C" "-l" "-w" ) 137 ) 138 ) 139 140 for testid in "${!testcases[@]}" ; do 141 nameref tc=testcases[${testid}] 142 testname="${wc_cmd}/${tc.name}" 143 144 if [[ "${tc.locale}" == "<unicode>" ]] ; then 145 if [[ "$LC_ALL" != *.UTF-8 ]] ; then 146 export LC_ALL='en_US.UTF-8' 147 fi 148 else 149 export LC_ALL="${tc.locale}" 150 fi 151 152 out="$(printf "${tc.input_format}" | ${SHELL} -c "${wc_cmd} \"\$@\"" dummy "${tc.wc_args[@]}" 2>&1)" || err_exit "${testname}: Command returned exit code $?" 153 [[ "${out}" == ${tc.output_pattern} ]] || err_exit "${testname}: Expected match for $(printf "%q\n" "${tc.output_pattern}"), got $(printf "%q\n" "${out}")" 154 155 # restore locale settings 156 [[ -v saved_locale.LC_ALL ]] && LC_ALL="${saved_locale.LC_ALL}" || unset LC_ALL 157 [[ -v saved_locale.LC_CTYPE ]] && LC_CTYPE="${saved_locale.LC_CTYPE}" || unset LC_CTYPE 158 [[ -v saved_locale.LANG ]] && LANG="${saved_locale.LANG}" || unset LANG 159 done 160 161 return 0 162} 163 164#for cmd in "wc" "/usr/bin/wc" ; do 165for cmd in "wc" ; do 166 test1 "${cmd}" 167done 168 169 170cd "${ocwd}" 171rmdir "${tmpdir}" || err_exit "Cannot remove temporary directory ${tmpdir}". 172 173# tests done 174exit $((Errors)) 175