xref: /illumos-gate/usr/src/test/util-tests/tests/awk/gnu/localenl.sh (revision 5328fc53d11d7151861fa272e4fb0248b8f0e145)
1#! /bin/sh
2# From arnold@f7.net  Sun Apr 22 20:15:25 2007
3# Date: Thu, 19 Apr 2007 17:09:02 +0300
4# From: Pekka Pessi <Pekka.Pessi@nokia.com>
5# X-Face: #V(jdpv[lI!TNUU=2*oh:="#suS*ponXW"yr6G;~L}<xZn_2^0)V{jqdc4y}@2b]ffd}SY#
6#  :9||1pew85O,WjiYA"6C7bW^zt^+.{b#B{lEE+4$9lrXL(55g}dU>uZ\JfD\"IG#G{j`hZI;=DmT\H
7#  pfDMyJ`i=:M;BM3R.`[>P^ER8+]i
8# Subject: UTF-8 locale and \n in regexps
9# To: bug-gawk@gnu.org
10# Cc: Pekka.Pessi@nokia.com
11# Message-id: <pvlkgoh2wx.fsf@nokia.com>
12# MIME-version: 1.0
13# Content-type: multipart/mixed; boundary="=-=-="
14#
15# --=-=-=
16#
17# Hello,
18#
19# It looks like regexp with \n in [^] behaves badly if locale has
20# an UTF-8 ctype.
21#
22# It looks like if there is \n and an range without \n, like /\n[^x\n]foo/,
23# and first \n ends an even-numbered line within the string, regexp
24# does not match.
25#
26# Please see the attached script for an demonstration.
27#
28# --Pekka Pessi
29#
30#
31# --=-=-=
32# Content-Disposition: inline; filename=gawk-test
33#
34#! /bin/sh
35
36if [ -z "$AWK" ]; then
37    printf '$AWK must be set\n' >&2
38    exit 1
39fi
40
41# April 2010: Remove UNKNOWN, causes spurious failures on some systems
42for LC_ALL in C POSIX en_US.ISO8859-1 en_US.UTF-8 #UNKNOWN
43do
44export LC_ALL
45cat <<EOF |
46line1
47line2
48line3
49line4 
50line5
51line6
52line7
53line8
54line9
55EOF
56$AWK '
57BEGIN { RS="\0"; }
58{
59  if (match($0, /\n[^2\n]*2/)) { got2=1; } else { print "no match 2"; }
60  if (match($0, /\n[^3\n]*3/)) { got3=1; } else { print "no match 3"; }
61  if (match($0, /\n[^4\n]*4/)) { got4=1; } else { print "no match 4"; }
62  if (match($0, /\n[^5\t]*5/)) { got5=1; } else { print "no match 5"; }
63  if (match($0, /\n[^6\n]*6/)) { got6=1; } else { print "no match 6"; }
64  if (match($0, /\n[a-z]*7\n/)){ got7=1; } else { print "no match 7"; }
65  if (match($0, /\n[^8\n]*8/)) { got8=1; } else { print "no match 8"; }
66  if (match($0, /8.[^9\n]+9/)) { got9=1; } else { print "no match 9"; }
67}
68
69END { exit(!(got2 && got3 && got4 && got5 && got6 && got7 && got8 && got9)); }
70' || {
71  echo LC_ALL=$LC_ALL FAILED
72  exit 1
73}
74echo LC_ALL=$LC_ALL passed
75done
76#
77# --=-=-=--
78#
79