xref: /illumos-gate/usr/src/test/util-tests/tests/awk/gnu/gsubtst2.awk (revision d8849d7dee03b84a3fa281ec65eb9e3d86d3756b)
1#From arnold  Thu May  9 17:27:03 2002
2#Return-Path: <arnold@skeeve.com>
3#Received: (from arnold@localhost)
4#	by skeeve.com (8.11.6/8.11.6) id g49ER3K27925
5#	for arnold; Thu, 9 May 2002 17:27:03 +0300
6#Date: Thu, 9 May 2002 17:27:03 +0300
7#From: Aharon Robbins <arnold@skeeve.com>
8#Message-Id: <200205091427.g49ER3K27925@skeeve.com>
9#To: arnold@skeeve.com
10#Subject: fixme
11#X-SpamBouncer: 1.4 (10/07/01)
12#X-SBRule: Pattern Match (Other Patterns) (Score: 4850)
13#X-SBRule: Pattern Match (Spam Phone #) (Score: 0)
14#X-SBClass: Blocked
15#Status: O
16#
17#Path: ord-read.news.verio.net!dfw-artgen!iad-peer.news.verio.net!news.verio.net!fu-berlin.de!uni-berlin.de!host213-120-137-48.in-addr.btopenworld.COM!not-for-mail
18#From: laura@madonnaweb.com (laura fairhead)
19#Newsgroups: comp.lang.awk
20#Subject: bug in gawk3.1.0 regex code
21#Date: Wed, 08 May 2002 23:31:40 GMT
22#Organization: that'll be the daewooo :)
23#Lines: 211
24#Message-ID: <3cd9b0f7.29675926@NEWS.CIS.DFN.DE>
25#Reply-To: laura@madonnaweb.com
26#NNTP-Posting-Host: host213-120-137-48.in-addr.btopenworld.com (213.120.137.48)
27#X-Trace: fu-berlin.de 1020900891 18168286 213.120.137.48 (16 [53286])
28#X-Newsreader: Forte Free Agent 1.21/32.243
29#Xref: dfw-artgen comp.lang.awk:13059
30#
31#
32#I believe I've just found a bug in gawk3.1.0 implementation of
33#extended regular expressions. It seems to be down to the alternation
34#operator; when using an end anchor '$' as a subexpression in an
35#alternation and the entire matched RE is a nul-string it fails
36#to match the end of string, for example;
37#
38#gsub(/$|2/,"x")
39#print
40#
41#input           = 12345
42#expected output = 1x345x
43#actual output   = 1x345
44#
45#The start anchor '^' always works as expected;
46#
47#gsub(/^|2/,"x")
48#print
49#
50#input           = 12345
51#expected output = x1x345
52#actual output   = x1x345
53#
54#This was with POSIX compliance enabled althought that doesn't
55#effect the result.
56#
57#I checked on gawk3.0.6 and got exactly the same results however
58#gawk2.15.6 gives the expected results.
59#
60#I'm about to post a bug report about this into gnu.utils.bug
61#but I thought I'd post it here first in case anyone has
62#any input/comments/whatever ....
63#
64#Complete test results were as follows;
65#
66#input          12345
67#output         gsub(/regex/,"x",input)
68#
69#regex          output
70#(^)            x12345
71#($)            12345x
72#(^)|($)        x12345x
73#($)|(^)        x12345x
74#(2)            1x345
75#(^)|2          x1x345
76#2|(^)          x1x345
77#($)|2          1x345
78#2|($)          1x345
79#(2)|(^)        x1x345
80#(^)|(2)        x1x345
81#(2)|($)        1x345
82#($)|(2)        1x345
83#.((2)|(^))     x345
84#.((^)|(2))     x345
85#.((2)|($))     x34x
86#.(($)|(2))     x34x
87#x{0}((2)|(^))  x1x345
88#x{0}((^)|(2))  x1x345
89#x{0}((2)|($))  1x345
90#x{0}(($)|(2))  1x345
91#x*((2)|(^))    x1x345
92#x*((^)|(2))    x1x345
93#x*((2)|($))    1x345
94#x*(($)|(2))    1x345
95#
96#Here's the test program I used, a few of the cases use ERE {n[,[m]]}
97#operators so that will have to be commented out or have a check
98#added or something (should have put a conditional in I know... ;-)
99#
100#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
101#
102BEGIN{
103
104TESTSTR="12345"
105
106print "input          "TESTSTR
107print "output         gsub(/regex/,\"x\",input)"
108print ""
109
110print "regex          output"
111$0=TESTSTR
112gsub(/(^)/,"x")
113print "(^)            "$0
114
115$0=TESTSTR
116gsub(/($)/,"x")
117print "($)            "$0
118
119$0=TESTSTR
120gsub(/(^)|($)/,"x")
121print "(^)|($)        "$0
122
123$0=TESTSTR
124gsub(/($)|(^)/,"x")
125print "($)|(^)        "$0
126
127$0=TESTSTR
128gsub(/2/,"x")
129print "(2)            "$0
130
131$0=TESTSTR
132gsub(/(^)|2/,"x")
133print "(^)|2          "$0
134
135$0=TESTSTR
136gsub(/2|(^)/,"x")
137print "2|(^)          "$0
138
139$0=TESTSTR
140gsub(/($)|2/,"x")
141print "($)|2          "$0
142
143$0=TESTSTR
144gsub(/2|($)/,"x")
145print "2|($)          "$0
146
147$0=TESTSTR
148gsub(/(2)|(^)/,"x")
149print "(2)|(^)        "$0
150
151$0=TESTSTR
152gsub(/(^)|(2)/,"x")
153print "(^)|(2)        "$0
154
155$0=TESTSTR
156gsub(/(2)|($)/,"x")
157print "(2)|($)        "$0
158
159$0=TESTSTR
160gsub(/($)|(2)/,"x")
161print "($)|(2)        "$0
162
163$0=TESTSTR
164gsub(/.((2)|(^))/,"x")
165print ".((2)|(^))     "$0
166
167$0=TESTSTR
168gsub(/.((^)|(2))/,"x")
169print ".((^)|(2))     "$0
170
171$0=TESTSTR
172gsub(/.((2)|($))/,"x")
173print ".((2)|($))     "$0
174
175$0=TESTSTR
176gsub(/.(($)|(2))/,"x")
177print ".(($)|(2))     "$0
178
179# $0=TESTSTR
180# gsub(/x{0}((2)|(^))/,"x")
181# print "x{0}((2)|(^))  "$0
182#
183# $0=TESTSTR
184# gsub(/x{0}((^)|(2))/,"x")
185# print "x{0}((^)|(2))  "$0
186#
187# $0=TESTSTR
188# gsub(/x{0}((2)|($))/,"x")
189# print "x{0}((2)|($))  "$0
190#
191# $0=TESTSTR
192# gsub(/x{0}(($)|(2))/,"x")
193# print "x{0}(($)|(2))  "$0
194
195$0=TESTSTR
196gsub(/x*((2)|(^))/,"x")
197print "x*((2)|(^))    "$0
198
199$0=TESTSTR
200gsub(/x*((^)|(2))/,"x")
201print "x*((^)|(2))    "$0
202
203$0=TESTSTR
204gsub(/x*((2)|($))/,"x")
205print "x*((2)|($))    "$0
206
207$0=TESTSTR
208gsub(/x*(($)|(2))/,"x")
209print "x*(($)|(2))    "$0
210
211# $0=TESTSTR
212# gsub(/x{0}^/,"x")
213# print "x{0}^          "$0
214#
215# $0=TESTSTR
216# gsub(/x{0}$/,"x")
217# print "x{0}$          "$0
218#
219# $0=TESTSTR
220# gsub(/(x{0}^)|2/,"x")
221# print "(x{0}^)|2      "$0
222#
223# $0=TESTSTR
224# gsub(/(x{0}$)|2/,"x")
225# print "(x{0}$)|2      "$0
226
227
228}
229#
230#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
231#
232#byefrom
233#
234#--
235#laura fairhead  # laura@madonnaweb.com  http://lf.8k.com
236#                # if you are bored crack my sig.
237#1F8B0808CABB793C0000666667002D8E410E83300C04EF91F2877D00CA138A7A
238#EAA98F30C494480157B623C4EF1B508FDED1CEFA9152A23DE35D661593C5318E
239#630C313CD701BE92E390563326EE17A3CA818F5266E4C2461547F1F5267659CA
240#8EE2092F76C329ED02CA430C5373CC62FF94BAC6210B36D9F9BC4AB53378D978
241#80F2978A1A6E5D6F5133B67B6113178DC1059526698AFE5C17A5187E7D930492
242