#! /bin/sh # From arnold@f7.net Sun Apr 22 20:15:25 2007 # Date: Thu, 19 Apr 2007 17:09:02 +0300 # From: Pekka Pessi # X-Face: #V(jdpv[lI!TNUU=2*oh:="#suS*ponXW"yr6G;~L}uZ\JfD\"IG#G{j`hZI;=DmT\H # pfDMyJ`i=:M;BM3R.`[>P^ER8+]i # Subject: UTF-8 locale and \n in regexps # To: bug-gawk@gnu.org # Cc: Pekka.Pessi@nokia.com # Message-id: # MIME-version: 1.0 # Content-type: multipart/mixed; boundary="=-=-=" # # --=-=-= # # Hello, # # It looks like regexp with \n in [^] behaves badly if locale has # an UTF-8 ctype. # # It looks like if there is \n and an range without \n, like /\n[^x\n]foo/, # and first \n ends an even-numbered line within the string, regexp # does not match. # # Please see the attached script for an demonstration. # # --Pekka Pessi # # # --=-=-= # Content-Disposition: inline; filename=gawk-test # #! /bin/sh if [ -z "$AWK" ]; then printf '$AWK must be set\n' >&2 exit 1 fi # April 2010: Remove UNKNOWN, causes spurious failures on some systems for LC_ALL in C POSIX en_US.ISO8859-1 en_US.UTF-8 #UNKNOWN do export LC_ALL cat <