1*e6d6c189SCody Peter Mello#To: bug-gnu-utils@gnu.org 2*e6d6c189SCody Peter Mello#cc: arnold@gnu.org 3*e6d6c189SCody Peter Mello#Subject: Possible bug in GNU Awk 3.0.4 4*e6d6c189SCody Peter Mello#Date: Wed, 24 Nov 1999 21:47:24 +0000 5*e6d6c189SCody Peter Mello#From: Daniel Elphick <de397@ecs.soton.ac.uk> 6*e6d6c189SCody Peter Mello#Message-Id: <E11qkG4-0000l0-00@cameron> 7*e6d6c189SCody Peter Mello# 8*e6d6c189SCody Peter Mello#This is a multipart MIME message. 9*e6d6c189SCody Peter Mello# 10*e6d6c189SCody Peter Mello#--==_Exmh_-11192982200 11*e6d6c189SCody Peter Mello#Content-Type: text/plain; charset=us-ascii 12*e6d6c189SCody Peter Mello# 13*e6d6c189SCody Peter Mello# 14*e6d6c189SCody Peter Mello#When I use the attached awk script unique on the attached data file, it 15*e6d6c189SCody Peter Mello#reports that all 4 lines of the data are the same. Using mawk it correctly 16*e6d6c189SCody Peter Mello#reports that there are no repeats. 17*e6d6c189SCody Peter Mello# 18*e6d6c189SCody Peter Mello#I don't know if there are limits on the size of associative array keys for the 19*e6d6c189SCody Peter Mello#purposes of reliable indexing but if there is then it is not (obviously) 20*e6d6c189SCody Peter Mello#documented. 21*e6d6c189SCody Peter Mello# 22*e6d6c189SCody Peter Mello# 23*e6d6c189SCody Peter Mello#--==_Exmh_-11192982200 24*e6d6c189SCody Peter Mello#Content-Type: text/plain ; name="data"; charset=us-ascii 25*e6d6c189SCody Peter Mello#Content-Description: data 26*e6d6c189SCody Peter Mello#Content-Disposition: attachment; filename="data" 27*e6d6c189SCody Peter Mello# 28*e6d6c189SCody Peter Mello#322322111111112232231111 29*e6d6c189SCody Peter Mello#322322111111112213223111 30*e6d6c189SCody Peter Mello#322322111111112211132231 31*e6d6c189SCody Peter Mello#322322111111112211113223 32*e6d6c189SCody Peter Mello# 33*e6d6c189SCody Peter Mello#--==_Exmh_-11192982200 34*e6d6c189SCody Peter Mello#Content-Type: text/plain ; name="unique"; charset=us-ascii 35*e6d6c189SCody Peter Mello#Content-Description: unique 36*e6d6c189SCody Peter Mello#Content-Disposition: attachment; filename="unique" 37*e6d6c189SCody Peter Mello# 38*e6d6c189SCody Peter Mello{ 39*e6d6c189SCody Peter Mello if($0 in a) 40*e6d6c189SCody Peter Mello { 41*e6d6c189SCody Peter Mello printf("line %d has been seen before at line %d\n", NR, a[$0]) 42*e6d6c189SCody Peter Mello repeat_count += 1 43*e6d6c189SCody Peter Mello } 44*e6d6c189SCody Peter Mello else 45*e6d6c189SCody Peter Mello { 46*e6d6c189SCody Peter Mello a[$0] = NR 47*e6d6c189SCody Peter Mello } 48*e6d6c189SCody Peter Mello count += 1 49*e6d6c189SCody Peter Mello} 50*e6d6c189SCody Peter MelloEND { 51*e6d6c189SCody Peter Mello# printf("%d %f%%\n", repeat_count, (float)repeat_count / count * 100) 52*e6d6c189SCody Peter Mello printf("%d %f%%\n", repeat_count, repeat_count / count * 100) 53*e6d6c189SCody Peter Mello} 54*e6d6c189SCody Peter Mello# 55*e6d6c189SCody Peter Mello#--==_Exmh_-11192982200-- 56