1#From arnold Thu May 9 17:27:03 2002 2#Return-Path: <arnold@skeeve.com> 3#Received: (from arnold@localhost) 4# by skeeve.com (8.11.6/8.11.6) id g49ER3K27925 5# for arnold; Thu, 9 May 2002 17:27:03 +0300 6#Date: Thu, 9 May 2002 17:27:03 +0300 7#From: Aharon Robbins <arnold@skeeve.com> 8#Message-Id: <200205091427.g49ER3K27925@skeeve.com> 9#To: arnold@skeeve.com 10#Subject: fixme 11#X-SpamBouncer: 1.4 (10/07/01) 12#X-SBRule: Pattern Match (Other Patterns) (Score: 4850) 13#X-SBRule: Pattern Match (Spam Phone #) (Score: 0) 14#X-SBClass: Blocked 15#Status: O 16# 17#Path: ord-read.news.verio.net!dfw-artgen!iad-peer.news.verio.net!news.verio.net!fu-berlin.de!uni-berlin.de!host213-120-137-48.in-addr.btopenworld.COM!not-for-mail 18#From: laura@madonnaweb.com (laura fairhead) 19#Newsgroups: comp.lang.awk 20#Subject: bug in gawk3.1.0 regex code 21#Date: Wed, 08 May 2002 23:31:40 GMT 22#Organization: that'll be the daewooo :) 23#Lines: 211 24#Message-ID: <3cd9b0f7.29675926@NEWS.CIS.DFN.DE> 25#Reply-To: laura@madonnaweb.com 26#NNTP-Posting-Host: host213-120-137-48.in-addr.btopenworld.com (213.120.137.48) 27#X-Trace: fu-berlin.de 1020900891 18168286 213.120.137.48 (16 [53286]) 28#X-Newsreader: Forte Free Agent 1.21/32.243 29#Xref: dfw-artgen comp.lang.awk:13059 30# 31# 32#I believe I've just found a bug in gawk3.1.0 implementation of 33#extended regular expressions. It seems to be down to the alternation 34#operator; when using an end anchor '$' as a subexpression in an 35#alternation and the entire matched RE is a nul-string it fails 36#to match the end of string, for example; 37# 38#gsub(/$|2/,"x") 39#print 40# 41#input = 12345 42#expected output = 1x345x 43#actual output = 1x345 44# 45#The start anchor '^' always works as expected; 46# 47#gsub(/^|2/,"x") 48#print 49# 50#input = 12345 51#expected output = x1x345 52#actual output = x1x345 53# 54#This was with POSIX compliance enabled althought that doesn't 55#effect the result. 56# 57#I checked on gawk3.0.6 and got exactly the same results however 58#gawk2.15.6 gives the expected results. 59# 60#I'm about to post a bug report about this into gnu.utils.bug 61#but I thought I'd post it here first in case anyone has 62#any input/comments/whatever .... 63# 64#Complete test results were as follows; 65# 66#input 12345 67#output gsub(/regex/,"x",input) 68# 69#regex output 70#(^) x12345 71#($) 12345x 72#(^)|($) x12345x 73#($)|(^) x12345x 74#(2) 1x345 75#(^)|2 x1x345 76#2|(^) x1x345 77#($)|2 1x345 78#2|($) 1x345 79#(2)|(^) x1x345 80#(^)|(2) x1x345 81#(2)|($) 1x345 82#($)|(2) 1x345 83#.((2)|(^)) x345 84#.((^)|(2)) x345 85#.((2)|($)) x34x 86#.(($)|(2)) x34x 87#x{0}((2)|(^)) x1x345 88#x{0}((^)|(2)) x1x345 89#x{0}((2)|($)) 1x345 90#x{0}(($)|(2)) 1x345 91#x*((2)|(^)) x1x345 92#x*((^)|(2)) x1x345 93#x*((2)|($)) 1x345 94#x*(($)|(2)) 1x345 95# 96#Here's the test program I used, a few of the cases use ERE {n[,[m]]} 97#operators so that will have to be commented out or have a check 98#added or something (should have put a conditional in I know... ;-) 99# 100#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 101# 102BEGIN{ 103 104TESTSTR="12345" 105 106print "input "TESTSTR 107print "output gsub(/regex/,\"x\",input)" 108print "" 109 110print "regex output" 111$0=TESTSTR 112gsub(/(^)/,"x") 113print "(^) "$0 114 115$0=TESTSTR 116gsub(/($)/,"x") 117print "($) "$0 118 119$0=TESTSTR 120gsub(/(^)|($)/,"x") 121print "(^)|($) "$0 122 123$0=TESTSTR 124gsub(/($)|(^)/,"x") 125print "($)|(^) "$0 126 127$0=TESTSTR 128gsub(/2/,"x") 129print "(2) "$0 130 131$0=TESTSTR 132gsub(/(^)|2/,"x") 133print "(^)|2 "$0 134 135$0=TESTSTR 136gsub(/2|(^)/,"x") 137print "2|(^) "$0 138 139$0=TESTSTR 140gsub(/($)|2/,"x") 141print "($)|2 "$0 142 143$0=TESTSTR 144gsub(/2|($)/,"x") 145print "2|($) "$0 146 147$0=TESTSTR 148gsub(/(2)|(^)/,"x") 149print "(2)|(^) "$0 150 151$0=TESTSTR 152gsub(/(^)|(2)/,"x") 153print "(^)|(2) "$0 154 155$0=TESTSTR 156gsub(/(2)|($)/,"x") 157print "(2)|($) "$0 158 159$0=TESTSTR 160gsub(/($)|(2)/,"x") 161print "($)|(2) "$0 162 163$0=TESTSTR 164gsub(/.((2)|(^))/,"x") 165print ".((2)|(^)) "$0 166 167$0=TESTSTR 168gsub(/.((^)|(2))/,"x") 169print ".((^)|(2)) "$0 170 171$0=TESTSTR 172gsub(/.((2)|($))/,"x") 173print ".((2)|($)) "$0 174 175$0=TESTSTR 176gsub(/.(($)|(2))/,"x") 177print ".(($)|(2)) "$0 178 179# $0=TESTSTR 180# gsub(/x{0}((2)|(^))/,"x") 181# print "x{0}((2)|(^)) "$0 182# 183# $0=TESTSTR 184# gsub(/x{0}((^)|(2))/,"x") 185# print "x{0}((^)|(2)) "$0 186# 187# $0=TESTSTR 188# gsub(/x{0}((2)|($))/,"x") 189# print "x{0}((2)|($)) "$0 190# 191# $0=TESTSTR 192# gsub(/x{0}(($)|(2))/,"x") 193# print "x{0}(($)|(2)) "$0 194 195$0=TESTSTR 196gsub(/x*((2)|(^))/,"x") 197print "x*((2)|(^)) "$0 198 199$0=TESTSTR 200gsub(/x*((^)|(2))/,"x") 201print "x*((^)|(2)) "$0 202 203$0=TESTSTR 204gsub(/x*((2)|($))/,"x") 205print "x*((2)|($)) "$0 206 207$0=TESTSTR 208gsub(/x*(($)|(2))/,"x") 209print "x*(($)|(2)) "$0 210 211# $0=TESTSTR 212# gsub(/x{0}^/,"x") 213# print "x{0}^ "$0 214# 215# $0=TESTSTR 216# gsub(/x{0}$/,"x") 217# print "x{0}$ "$0 218# 219# $0=TESTSTR 220# gsub(/(x{0}^)|2/,"x") 221# print "(x{0}^)|2 "$0 222# 223# $0=TESTSTR 224# gsub(/(x{0}$)|2/,"x") 225# print "(x{0}$)|2 "$0 226 227 228} 229# 230#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 231# 232#byefrom 233# 234#-- 235#laura fairhead # laura@madonnaweb.com http://lf.8k.com 236# # if you are bored crack my sig. 237#1F8B0808CABB793C0000666667002D8E410E83300C04EF91F2877D00CA138A7A 238#EAA98F30C494480157B623C4EF1B508FDED1CEFA9152A23DE35D661593C5318E 239#630C313CD701BE92E390563326EE17A3CA818F5266E4C2461547F1F5267659CA 240#8EE2092F76C329ED02CA430C5373CC62FF94BAC6210B36D9F9BC4AB53378D978 241#80F2978A1A6E5D6F5133B67B6113178DC1059526698AFE5C17A5187E7D930492 242