xref: /illumos-gate/usr/src/test/util-tests/tests/awk/gnu/numindex.awk (revision e6d6c189fa3a95d7aa27bbe0aeacf7c1a6b57c8c)
1*e6d6c189SCody Peter Mello#To: bug-gnu-utils@gnu.org
2*e6d6c189SCody Peter Mello#cc: arnold@gnu.org
3*e6d6c189SCody Peter Mello#Subject: Possible bug in GNU Awk 3.0.4
4*e6d6c189SCody Peter Mello#Date: Wed, 24 Nov 1999 21:47:24 +0000
5*e6d6c189SCody Peter Mello#From: Daniel Elphick <de397@ecs.soton.ac.uk>
6*e6d6c189SCody Peter Mello#Message-Id: <E11qkG4-0000l0-00@cameron>
7*e6d6c189SCody Peter Mello#
8*e6d6c189SCody Peter Mello#This is a multipart MIME message.
9*e6d6c189SCody Peter Mello#
10*e6d6c189SCody Peter Mello#--==_Exmh_-11192982200
11*e6d6c189SCody Peter Mello#Content-Type: text/plain; charset=us-ascii
12*e6d6c189SCody Peter Mello#
13*e6d6c189SCody Peter Mello#
14*e6d6c189SCody Peter Mello#When I use the attached awk script unique on the attached data file, it
15*e6d6c189SCody Peter Mello#reports that all 4 lines of the data are the same. Using mawk it correctly
16*e6d6c189SCody Peter Mello#reports that there are no repeats.
17*e6d6c189SCody Peter Mello#
18*e6d6c189SCody Peter Mello#I don't know if there are limits on the size of associative array keys for the
19*e6d6c189SCody Peter Mello#purposes of reliable indexing but if there is then it is not (obviously)
20*e6d6c189SCody Peter Mello#documented.
21*e6d6c189SCody Peter Mello#
22*e6d6c189SCody Peter Mello#
23*e6d6c189SCody Peter Mello#--==_Exmh_-11192982200
24*e6d6c189SCody Peter Mello#Content-Type: text/plain ; name="data"; charset=us-ascii
25*e6d6c189SCody Peter Mello#Content-Description: data
26*e6d6c189SCody Peter Mello#Content-Disposition: attachment; filename="data"
27*e6d6c189SCody Peter Mello#
28*e6d6c189SCody Peter Mello#322322111111112232231111
29*e6d6c189SCody Peter Mello#322322111111112213223111
30*e6d6c189SCody Peter Mello#322322111111112211132231
31*e6d6c189SCody Peter Mello#322322111111112211113223
32*e6d6c189SCody Peter Mello#
33*e6d6c189SCody Peter Mello#--==_Exmh_-11192982200
34*e6d6c189SCody Peter Mello#Content-Type: text/plain ; name="unique"; charset=us-ascii
35*e6d6c189SCody Peter Mello#Content-Description: unique
36*e6d6c189SCody Peter Mello#Content-Disposition: attachment; filename="unique"
37*e6d6c189SCody Peter Mello#
38*e6d6c189SCody Peter Mello{
39*e6d6c189SCody Peter Mello	if($0 in a)
40*e6d6c189SCody Peter Mello	{
41*e6d6c189SCody Peter Mello		printf("line %d has been seen before at line %d\n",  NR, a[$0])
42*e6d6c189SCody Peter Mello		repeat_count += 1
43*e6d6c189SCody Peter Mello	}
44*e6d6c189SCody Peter Mello	else
45*e6d6c189SCody Peter Mello	{
46*e6d6c189SCody Peter Mello		a[$0] = NR
47*e6d6c189SCody Peter Mello	}
48*e6d6c189SCody Peter Mello	count += 1
49*e6d6c189SCody Peter Mello}
50*e6d6c189SCody Peter MelloEND {
51*e6d6c189SCody Peter Mello#	printf("%d %f%%\n", repeat_count, (float)repeat_count / count * 100)
52*e6d6c189SCody Peter Mello	printf("%d %f%%\n", repeat_count, repeat_count / count * 100)
53*e6d6c189SCody Peter Mello}
54*e6d6c189SCody Peter Mello#
55*e6d6c189SCody Peter Mello#--==_Exmh_-11192982200--
56