1*e6d6c189SCody Peter Mello# Date: Thu, 27 Apr 2006 20:59:03 +0100 2*e6d6c189SCody Peter Mello# From: Lee Haywood <ljhaywood2@googlemail.com> 3*e6d6c189SCody Peter Mello# Subject: gawk multi-byte support bugs, assertion bug and fix. 4*e6d6c189SCody Peter Mello# To: bug-gawk@gnu.org 5*e6d6c189SCody Peter Mello# Message-id: <60962be00604271259na0d8fdayb9d0c69a853216e8@mail.gmail.com> 6*e6d6c189SCody Peter Mello# MIME-version: 1.0 7*e6d6c189SCody Peter Mello# Content-type: multipart/alternative; 8*e6d6c189SCody Peter Mello# boundary="----=_Part_10136_920879.1146167943492" 9*e6d6c189SCody Peter Mello# Status: RO 10*e6d6c189SCody Peter Mello# 11*e6d6c189SCody Peter Mello# ------=_Part_10136_920879.1146167943492 12*e6d6c189SCody Peter Mello# Content-Type: text/plain; charset=ISO-8859-1 13*e6d6c189SCody Peter Mello# Content-Transfer-Encoding: quoted-printable 14*e6d6c189SCody Peter Mello# Content-Disposition: inline 15*e6d6c189SCody Peter Mello# 16*e6d6c189SCody Peter Mello# 17*e6d6c189SCody Peter Mello# Firstly, I have been getting the following error from version 3.1.5. 18*e6d6c189SCody Peter Mello# 19*e6d6c189SCody Peter Mello# awk: node.c:515: unref: Assertion `(tmp->flags & 4096) !=3D 0' failed. 20*e6d6c189SCody Peter Mello# 21*e6d6c189SCody Peter Mello# In mk_number() in node.c the MBS_SUPPORT code is inside the GAWKDEBUG 22*e6d6c189SCody Peter Mello# section - moving it outside explicitly clears the string values, which 23*e6d6c189SCody Peter Mello# prevents the assertion error from occurring. The corrected version is 24*e6d6c189SCody Peter Mello# shown at the end of this message. 25*e6d6c189SCody Peter Mello# 26*e6d6c189SCody Peter Mello# As an aside, I also noticed that n->wstptr is not cleared by 27*e6d6c189SCody Peter Mello# set_field() and set_record() in field.c when the flags are set to 28*e6d6c189SCody Peter Mello# exclude WSTRCUR. However, I do not have a test case to show if 29*e6d6c189SCody Peter Mello# changing them makes any difference. 30*e6d6c189SCody Peter Mello# 31*e6d6c189SCody Peter Mello# A second problem also occurs when gawk 3.1.5 is compiled with 32*e6d6c189SCody Peter Mello# multi-byte character support (MBS_SUPPORT). The following code should 33*e6d6c189SCody Peter Mello# change the index of the substring "bc" from 2 to 3, but it gets 34*e6d6c189SCody Peter Mello# reported as 2 in both cases - which is obviously disastrous. 35*e6d6c189SCody Peter Mello# 36*e6d6c189SCody Peter Mello# awk 'BEGIN { 37*e6d6c189SCody Peter Mello# Value =3D "abc" 38*e6d6c189SCody Peter Mello# 39*e6d6c189SCody Peter Mello# print "Before <" Value "> ", 40*e6d6c189SCody Peter Mello# index( Value, "bc" ) 41*e6d6c189SCody Peter Mello# 42*e6d6c189SCody Peter Mello# sub( /bc/, "bbc", Value ) 43*e6d6c189SCody Peter Mello# 44*e6d6c189SCody Peter Mello# print "After <" Value ">", 45*e6d6c189SCody Peter Mello# index( Value, "bc" ) 46*e6d6c189SCody Peter Mello# }' 47*e6d6c189SCody Peter Mello# 48*e6d6c189SCody Peter Mello# Compiling with MBS_SUPPORT undefined makes these problems go away. 49*e6d6c189SCody Peter Mello# 50*e6d6c189SCody Peter Mello# /* mk_number --- allocate a node with defined number */ 51*e6d6c189SCody Peter Mello# 52*e6d6c189SCody Peter Mello# NODE * 53*e6d6c189SCody Peter Mello# mk_number(AWKNUM x, unsigned int flags) 54*e6d6c189SCody Peter Mello# { 55*e6d6c189SCody Peter Mello# register NODE *r; 56*e6d6c189SCody Peter Mello# 57*e6d6c189SCody Peter Mello# getnode(r); 58*e6d6c189SCody Peter Mello# r->type =3D Node_val; 59*e6d6c189SCody Peter Mello# r->numbr =3D x; 60*e6d6c189SCody Peter Mello# r->flags =3D flags; 61*e6d6c189SCody Peter Mello# #if defined MBS_SUPPORT 62*e6d6c189SCody Peter Mello# r->wstptr =3D NULL; 63*e6d6c189SCody Peter Mello# r->wstlen =3D 0; 64*e6d6c189SCody Peter Mello# #endif /* MBS_SUPPORT */ 65*e6d6c189SCody Peter Mello# #ifdef GAWKDEBUG 66*e6d6c189SCody Peter Mello# r->stref =3D 1; 67*e6d6c189SCody Peter Mello# r->stptr =3D NULL; 68*e6d6c189SCody Peter Mello# r->stlen =3D 0; 69*e6d6c189SCody Peter Mello# #if defined MBS_SUPPORT 70*e6d6c189SCody Peter Mello# r->flags &=3D ~WSTRCUR; 71*e6d6c189SCody Peter Mello# #endif /* MBS_SUPPORT */ 72*e6d6c189SCody Peter Mello# #endif /* GAWKDEBUG */ 73*e6d6c189SCody Peter Mello# return r; 74*e6d6c189SCody Peter Mello# } 75*e6d6c189SCody Peter Mello# 76*e6d6c189SCody Peter Mello# Thanks. 77*e6d6c189SCody Peter Mello# 78*e6d6c189SCody Peter Mello# -- 79*e6d6c189SCody Peter Mello# Lee Haywood. 80*e6d6c189SCody Peter Mello 81*e6d6c189SCody Peter MelloBEGIN { 82*e6d6c189SCody Peter Mello Value = "abc" 83*e6d6c189SCody Peter Mello 84*e6d6c189SCody Peter Mello print "Before <" Value "> ", index( Value, "bc" ) 85*e6d6c189SCody Peter Mello 86*e6d6c189SCody Peter Mello sub( /bc/, "bbc", Value ) 87*e6d6c189SCody Peter Mello 88*e6d6c189SCody Peter Mello print "After <" Value ">", index( Value, "bc" ) 89*e6d6c189SCody Peter Mello} 90