xref: /illumos-gate/usr/src/cmd/refer/hunt2.c (revision 11a8fa6cb17403e630122ac19b39a323c6e64142)
1*11a8fa6cSceastha /*
2*11a8fa6cSceastha  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
3*11a8fa6cSceastha  * Use is subject to license terms.
4*11a8fa6cSceastha  */
5*11a8fa6cSceastha 
67c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
77c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
87c478bd9Sstevel@tonic-gate 
97c478bd9Sstevel@tonic-gate /*
107c478bd9Sstevel@tonic-gate  * Copyright (c) 1980 Regents of the University of California.
117c478bd9Sstevel@tonic-gate  * All rights reserved. The Berkeley software License Agreement
127c478bd9Sstevel@tonic-gate  * specifies the terms and conditions for redistribution.
137c478bd9Sstevel@tonic-gate  */
147c478bd9Sstevel@tonic-gate 
157c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
167c478bd9Sstevel@tonic-gate 
177c478bd9Sstevel@tonic-gate #include "refer..c"
187c478bd9Sstevel@tonic-gate 
197c478bd9Sstevel@tonic-gate static int *coord = 0;
207c478bd9Sstevel@tonic-gate int hh[50];
21*11a8fa6cSceastha extern int *hfreq, hfrflg;
227c478bd9Sstevel@tonic-gate extern int prfreqs;
237c478bd9Sstevel@tonic-gate union ptr {
247c478bd9Sstevel@tonic-gate 	unsigned *a;
257c478bd9Sstevel@tonic-gate 	long *b;
267c478bd9Sstevel@tonic-gate };
277c478bd9Sstevel@tonic-gate 
28*11a8fa6cSceastha extern int hash();
29*11a8fa6cSceastha extern void shell();
30*11a8fa6cSceastha extern void *zalloc();
31*11a8fa6cSceastha 
32*11a8fa6cSceastha static long getl(FILE *);
33*11a8fa6cSceastha static int hcomp(int, int);
34*11a8fa6cSceastha static void hexch(int, int);
35*11a8fa6cSceastha 
36*11a8fa6cSceastha int
37*11a8fa6cSceastha doquery(long *hpt, int nhash, FILE *fb, int nitem,
38*11a8fa6cSceastha 	    char *qitem[], unsigned *mptr)
397c478bd9Sstevel@tonic-gate {
407c478bd9Sstevel@tonic-gate 	long k;
417c478bd9Sstevel@tonic-gate 	union ptr prevdrop, master;
427c478bd9Sstevel@tonic-gate 	int nf = 0, best = 0, nterm = 0, i, g, j;
437c478bd9Sstevel@tonic-gate 	int *prevcoord;
447c478bd9Sstevel@tonic-gate 	long lp;
457c478bd9Sstevel@tonic-gate 	extern int lmaster, colevel, reached;
467c478bd9Sstevel@tonic-gate 	extern int iflong;
477c478bd9Sstevel@tonic-gate 
487c478bd9Sstevel@tonic-gate 	if (iflong) {
497c478bd9Sstevel@tonic-gate 		master.b = (long *)mptr;
50*11a8fa6cSceastha 	} else {
517c478bd9Sstevel@tonic-gate 		master.a = mptr;
527c478bd9Sstevel@tonic-gate 	}
537c478bd9Sstevel@tonic-gate 
547c478bd9Sstevel@tonic-gate #if D1
557c478bd9Sstevel@tonic-gate 	fprintf(stderr, "entering doquery nitem %d\n", nitem);
56*11a8fa6cSceastha 	fprintf(stderr, "first few hashes are %ld %ld %ld %ld %ld\n",
57*11a8fa6cSceastha 	    hpt[0], hpt[1], hpt[2], hpt[3], hpt[4]);
58*11a8fa6cSceastha 	fprintf(stderr, "and frequencies are  %d %d %d %d %d\n",
59*11a8fa6cSceastha 	    hfreq[0], hfreq[1], hfreq[2], hfreq[3], hfreq[4]);
607c478bd9Sstevel@tonic-gate #endif
617c478bd9Sstevel@tonic-gate 	assert(lmaster > 0);
627c478bd9Sstevel@tonic-gate 	if (coord == 0)
637c478bd9Sstevel@tonic-gate 		coord = (int *)zalloc(lmaster, sizeof (lmaster));
64*11a8fa6cSceastha 	if (colevel > 0) {
657c478bd9Sstevel@tonic-gate 		if (iflong)
667c478bd9Sstevel@tonic-gate 			prevdrop.b = (long *)zalloc(lmaster, sizeof (long));
677c478bd9Sstevel@tonic-gate 		else
687c478bd9Sstevel@tonic-gate 			prevdrop.a = (unsigned *)zalloc(lmaster, sizeof (int));
697c478bd9Sstevel@tonic-gate 		prevcoord = (int *)zalloc(lmaster, sizeof (lmaster));
70*11a8fa6cSceastha 	} else {
717c478bd9Sstevel@tonic-gate 		prevdrop.a = master.a;
727c478bd9Sstevel@tonic-gate 		prevcoord = coord;
737c478bd9Sstevel@tonic-gate 	}
747c478bd9Sstevel@tonic-gate #if D1
757c478bd9Sstevel@tonic-gate 	fprintf(stderr, "nitem %d\n", nitem);
767c478bd9Sstevel@tonic-gate #endif
77*11a8fa6cSceastha 	for (i = 0; i < nitem; i++) {
787c478bd9Sstevel@tonic-gate 		hh[i] = hash(qitem[i])%nhash;
797c478bd9Sstevel@tonic-gate #if D1
807c478bd9Sstevel@tonic-gate 		fprintf(stderr, "query wd X%sX has hash %d\n", qitem[i], hh[i]);
817c478bd9Sstevel@tonic-gate #endif
827c478bd9Sstevel@tonic-gate 	}
837c478bd9Sstevel@tonic-gate #if D1
847c478bd9Sstevel@tonic-gate 	fprintf(stderr, "past that loop nhash %d hpt is %lo\n", nhash, hpt);
857c478bd9Sstevel@tonic-gate #endif
867c478bd9Sstevel@tonic-gate 	if (prfreqs)
877c478bd9Sstevel@tonic-gate 		for (i = 0; i < nitem; i++)
88*11a8fa6cSceastha 			fprintf(stderr, "item %s hash %d hfreq %d\n",
89*11a8fa6cSceastha 			    qitem[i], hh[i], hfreq[hh[i]]);
907c478bd9Sstevel@tonic-gate 	/* if possible, sort query into decreasing frequency of hashes */
917c478bd9Sstevel@tonic-gate 	if (hfrflg)
927c478bd9Sstevel@tonic-gate 		shell(nitem, hcomp, hexch);
937c478bd9Sstevel@tonic-gate #if D1
947c478bd9Sstevel@tonic-gate 	for (i = 0; i < nitem; i++)
957c478bd9Sstevel@tonic-gate 		fprintf(stderr, "item hash %d frq %d\n", hh[i], hfreq[hh[i]]);
967c478bd9Sstevel@tonic-gate #endif
977c478bd9Sstevel@tonic-gate 	lp = hpt [hh[0]];
987c478bd9Sstevel@tonic-gate #if D1
997c478bd9Sstevel@tonic-gate 	fprintf(stderr, "first item hash %d lp %ld 0%lo\n", hh[0], lp, lp);
1007c478bd9Sstevel@tonic-gate #endif
1017c478bd9Sstevel@tonic-gate 	assert(fb != NULL);
1027c478bd9Sstevel@tonic-gate 	assert(fseek(fb, lp, 0) != -1);
103*11a8fa6cSceastha 	for (i = 0; i < lmaster; i++) {
1047c478bd9Sstevel@tonic-gate 		if (iflong)
1057c478bd9Sstevel@tonic-gate 			master.b[i] = getl(fb);
1067c478bd9Sstevel@tonic-gate 		else
1077c478bd9Sstevel@tonic-gate 			master.a[i] = getw(fb);
1087c478bd9Sstevel@tonic-gate 		coord[i] = 1;
1097c478bd9Sstevel@tonic-gate #if D2
1107c478bd9Sstevel@tonic-gate 		if (iflong)
1117c478bd9Sstevel@tonic-gate 			fprintf(stderr, "master has %ld\n", (master.b[i]));
1127c478bd9Sstevel@tonic-gate 		else
1137c478bd9Sstevel@tonic-gate 			fprintf(stderr, "master has %d\n", (master.a[i]));
1147c478bd9Sstevel@tonic-gate #endif
1157c478bd9Sstevel@tonic-gate 		assert(i < lmaster);
116*11a8fa6cSceastha 		if (iflong) {
1177c478bd9Sstevel@tonic-gate 			if (master.b[i] == -1L) break;
118*11a8fa6cSceastha 		} else {
1197c478bd9Sstevel@tonic-gate 			if (master.a[i] == -1) break;
1207c478bd9Sstevel@tonic-gate 		}
1217c478bd9Sstevel@tonic-gate 	}
1227c478bd9Sstevel@tonic-gate 	nf = i;
123*11a8fa6cSceastha 	for (nterm = 1; nterm < nitem; nterm++) {
1247c478bd9Sstevel@tonic-gate #ifdef D1
1257c478bd9Sstevel@tonic-gate 		fprintf(stderr, "item %d, hash %d\n", nterm, hh[nterm]);
1267c478bd9Sstevel@tonic-gate #endif
127*11a8fa6cSceastha 		if (colevel > 0) {
128*11a8fa6cSceastha 			for (j = 0; j < nf; j++) {
1297c478bd9Sstevel@tonic-gate 				if (iflong)
1307c478bd9Sstevel@tonic-gate 					prevdrop.b[j] = master.b[j];
1317c478bd9Sstevel@tonic-gate 				else
1327c478bd9Sstevel@tonic-gate 					prevdrop.a[j] = master.a[j];
1337c478bd9Sstevel@tonic-gate 				prevcoord[j] = coord[j];
1347c478bd9Sstevel@tonic-gate 			}
1357c478bd9Sstevel@tonic-gate 		}
1367c478bd9Sstevel@tonic-gate 		lp = hpt[hh[nterm]];
1377c478bd9Sstevel@tonic-gate 		assert(fseek(fb, lp, 0) != -1);
1387c478bd9Sstevel@tonic-gate #if D1
139*11a8fa6cSceastha 		fprintf(stderr, "item %d hash %d seek to %ld\n",
140*11a8fa6cSceastha 		    nterm, hh[nterm], lp);
1417c478bd9Sstevel@tonic-gate #endif
1427c478bd9Sstevel@tonic-gate 		g = j = 0;
143*11a8fa6cSceastha 		while (1) {
1447c478bd9Sstevel@tonic-gate 			if (iflong)
1457c478bd9Sstevel@tonic-gate 				k = getl(fb);
1467c478bd9Sstevel@tonic-gate 			else
1477c478bd9Sstevel@tonic-gate 				k = getw(fb);
1487c478bd9Sstevel@tonic-gate 			if (k == -1) break;
1497c478bd9Sstevel@tonic-gate #if D2
1507c478bd9Sstevel@tonic-gate 			fprintf(stderr, "next term finds %ld\n", k);
1517c478bd9Sstevel@tonic-gate #endif
1527c478bd9Sstevel@tonic-gate #if D3
1537c478bd9Sstevel@tonic-gate 			if (iflong)
154*11a8fa6cSceastha 				fprintf(stderr,
155*11a8fa6cSceastha 				    "bfwh j %d nf %d master %ld k %ld\n",
156*11a8fa6cSceastha 				    j, nf, prevdrop.b[j], (long)(k));
1577c478bd9Sstevel@tonic-gate 			else
158*11a8fa6cSceastha 				fprintf(stderr,
159*11a8fa6cSceastha 				    "bfwh j %d nf %d master %ld k %ld\n",
160*11a8fa6cSceastha 				    j, nf, prevdrop.a[j], (long)(k));
1617c478bd9Sstevel@tonic-gate #endif
162*11a8fa6cSceastha 			while (j < nf &&
163*11a8fa6cSceastha 			    (iflong ? prevdrop.b[j] : prevdrop.a[j]) < k) {
1647c478bd9Sstevel@tonic-gate #if D3
1657c478bd9Sstevel@tonic-gate 				if (iflong)
166*11a8fa6cSceastha 					fprintf(stderr, "j %d nf %d prevdrop "
167*11a8fa6cSceastha 					    "%ld prevcoord %d colevel %d nterm "
168*11a8fa6cSceastha 					    "%d k %ld\n", j, nf, prevdrop.b[j],
169*11a8fa6cSceastha 					    prevcoord[j], colevel, nterm,
170*11a8fa6cSceastha 					    (long)(k));
1717c478bd9Sstevel@tonic-gate 				else
172*11a8fa6cSceastha 					fprintf(stderr, "j %d nf %d prevdrop "
173*11a8fa6cSceastha 					    "%ld prevcoord %d colevel %d nterm "
174*11a8fa6cSceastha 					    "%d k %ld\n", j, nf, prevdrop.a[j],
175*11a8fa6cSceastha 					    prevcoord[j], colevel, nterm,
176*11a8fa6cSceastha 					    (long)(k));
1777c478bd9Sstevel@tonic-gate #endif
1787c478bd9Sstevel@tonic-gate 				if (prevcoord[j] + colevel <= nterm)
1797c478bd9Sstevel@tonic-gate 					j++;
180*11a8fa6cSceastha 				else {
1817c478bd9Sstevel@tonic-gate 					assert(g < lmaster);
1827c478bd9Sstevel@tonic-gate 					if (iflong)
1837c478bd9Sstevel@tonic-gate 						master.b[g] = prevdrop.b[j];
1847c478bd9Sstevel@tonic-gate 					else
1857c478bd9Sstevel@tonic-gate 						master.a[g] = prevdrop.a[j];
1867c478bd9Sstevel@tonic-gate 					coord[g++] = prevcoord[j++];
1877c478bd9Sstevel@tonic-gate #if D1
1887c478bd9Sstevel@tonic-gate 					if (iflong)
189*11a8fa6cSceastha 						fprintf(stderr, " not skip g "
190*11a8fa6cSceastha 						    "%d doc %d coord %d note "
191*11a8fa6cSceastha 						    "%d\n", g, master.b[g-1],
192*11a8fa6cSceastha 						    coord[g-1], master.b[j-1]);
1937c478bd9Sstevel@tonic-gate 					else
194*11a8fa6cSceastha 						fprintf(stderr, " not skip g "
195*11a8fa6cSceastha 						    "%d doc %ld coord %d nterm "
196*11a8fa6cSceastha 						    "%d\n", g, master.a[g-1],
197*11a8fa6cSceastha 						    coord[g-1], nterm);
1987c478bd9Sstevel@tonic-gate #endif
1997c478bd9Sstevel@tonic-gate 					continue;
2007c478bd9Sstevel@tonic-gate 				}
2017c478bd9Sstevel@tonic-gate 			}
2027c478bd9Sstevel@tonic-gate 			if (colevel == 0 && j >= nf) break;
203*11a8fa6cSceastha 			if (j < nf &&
204*11a8fa6cSceastha 			    (iflong ? prevdrop.b[j] : prevdrop.a[j]) == k) {
2057c478bd9Sstevel@tonic-gate 				if (iflong)
2067c478bd9Sstevel@tonic-gate 					master.b[g] = k;
2077c478bd9Sstevel@tonic-gate 				else
2087c478bd9Sstevel@tonic-gate 					master.a[g] = k;
2097c478bd9Sstevel@tonic-gate 				coord[g++] = prevcoord[j++]+1;
2107c478bd9Sstevel@tonic-gate #if D1
2117c478bd9Sstevel@tonic-gate 				if (iflong)
212*11a8fa6cSceastha 					fprintf(stderr, " at g %d item %ld "
213*11a8fa6cSceastha 					    "coord %d note %ld\n", g,
214*11a8fa6cSceastha 					    master.b[g-1], coord[g-1],
215*11a8fa6cSceastha 					    master.b[j-1]);
2167c478bd9Sstevel@tonic-gate 				else
217*11a8fa6cSceastha 					fprintf(stderr, " at g %d item %d "
218*11a8fa6cSceastha 					    "coord %d note %d\n", g,
219*11a8fa6cSceastha 					    master.a[g-1], coord[g-1],
220*11a8fa6cSceastha 					    master.a[j-1]);
2217c478bd9Sstevel@tonic-gate #endif
222*11a8fa6cSceastha 			} else
223*11a8fa6cSceastha 				if (colevel >= nterm) {
2247c478bd9Sstevel@tonic-gate 					if (iflong)
2257c478bd9Sstevel@tonic-gate 						master.b[g] = k;
2267c478bd9Sstevel@tonic-gate 					else
2277c478bd9Sstevel@tonic-gate 						master.a[g] = k;
2287c478bd9Sstevel@tonic-gate 					coord[g++] = 1;
2297c478bd9Sstevel@tonic-gate 				}
2307c478bd9Sstevel@tonic-gate 		}
2317c478bd9Sstevel@tonic-gate #if D1
2327c478bd9Sstevel@tonic-gate 		fprintf(stderr, "now have %d items\n", g);
2337c478bd9Sstevel@tonic-gate #endif
2347c478bd9Sstevel@tonic-gate 		if (colevel > 0)
2357c478bd9Sstevel@tonic-gate 			for (; j < nf; j++)
236*11a8fa6cSceastha 				if ((iflong ? prevdrop.b[j] : prevdrop.a[j]) +
237*11a8fa6cSceastha 				    colevel > nterm) {
2387c478bd9Sstevel@tonic-gate 					assert(g < lmaster);
2397c478bd9Sstevel@tonic-gate 					if (iflong)
2407c478bd9Sstevel@tonic-gate 						master.b[g] = prevdrop.b[j];
2417c478bd9Sstevel@tonic-gate 					else
2427c478bd9Sstevel@tonic-gate 						master.a[g] = prevdrop.a[j];
2437c478bd9Sstevel@tonic-gate 					coord[g++] = prevcoord[j];
2447c478bd9Sstevel@tonic-gate #if D3
2457c478bd9Sstevel@tonic-gate 					if (iflong)
246*11a8fa6cSceastha 						fprintf(stderr, "copied over "
247*11a8fa6cSceastha 						    "%ld coord %d\n",
248*11a8fa6cSceastha 						    master.b[g-1], coord[g-1]);
2497c478bd9Sstevel@tonic-gate 					else
250*11a8fa6cSceastha 						fprintf(stderr, "copied over "
251*11a8fa6cSceastha 						    "%d coord %d\n",
252*11a8fa6cSceastha 						    master.a[g-1], coord[g-1]);
2537c478bd9Sstevel@tonic-gate #endif
2547c478bd9Sstevel@tonic-gate 				}
2557c478bd9Sstevel@tonic-gate 		nf = g;
2567c478bd9Sstevel@tonic-gate 	}
257*11a8fa6cSceastha 	if (colevel > 0) {
2587c478bd9Sstevel@tonic-gate 		best = 0;
2597c478bd9Sstevel@tonic-gate 		for (j = 0; j < nf; j++)
2607c478bd9Sstevel@tonic-gate 			if (coord[j] > best) best = coord[j];
2617c478bd9Sstevel@tonic-gate #if D1
2627c478bd9Sstevel@tonic-gate 		fprintf(stderr, "colevel %d best %d\n", colevel, best);
2637c478bd9Sstevel@tonic-gate #endif
2647c478bd9Sstevel@tonic-gate 		reached = best;
2657c478bd9Sstevel@tonic-gate 		for (g = j = 0; j < nf; j++)
266*11a8fa6cSceastha 			if (coord[j] == best) {
2677c478bd9Sstevel@tonic-gate 				if (iflong)
2687c478bd9Sstevel@tonic-gate 					master.b[g++] = master.b[j];
2697c478bd9Sstevel@tonic-gate 				else
2707c478bd9Sstevel@tonic-gate 					master.a[g++] = master.a[j];
2717c478bd9Sstevel@tonic-gate 			}
2727c478bd9Sstevel@tonic-gate 		nf = g;
2737c478bd9Sstevel@tonic-gate #if D1
2747c478bd9Sstevel@tonic-gate 		fprintf(stderr, "yet got %d\n", nf);
2757c478bd9Sstevel@tonic-gate #endif
2767c478bd9Sstevel@tonic-gate 	}
2777c478bd9Sstevel@tonic-gate #ifdef D1
2787c478bd9Sstevel@tonic-gate 	fprintf(stderr, " returning with %d\n", nf);
2797c478bd9Sstevel@tonic-gate #endif
280*11a8fa6cSceastha 	if (colevel) {
2817c478bd9Sstevel@tonic-gate 		free(prevdrop, lmaster, iflong ? sizeof (long): sizeof (int));
2827c478bd9Sstevel@tonic-gate 		free(prevcoord, lmaster, sizeof (lmaster));
2837c478bd9Sstevel@tonic-gate 	}
2847c478bd9Sstevel@tonic-gate #if D3
2857c478bd9Sstevel@tonic-gate 	for (g = 0; g < nf; g++)
2867c478bd9Sstevel@tonic-gate 		if (iflong)
2877c478bd9Sstevel@tonic-gate 			fprintf(stderr, ":%ld\n", master.b[g]);
2887c478bd9Sstevel@tonic-gate 		else
2897c478bd9Sstevel@tonic-gate 			fprintf(stderr, ":%d\n", master.a[g]);
2907c478bd9Sstevel@tonic-gate #endif
2917c478bd9Sstevel@tonic-gate 	return (nf);
2927c478bd9Sstevel@tonic-gate }
2937c478bd9Sstevel@tonic-gate 
294*11a8fa6cSceastha static long
295*11a8fa6cSceastha getl(FILE *fb)
2967c478bd9Sstevel@tonic-gate {
2977c478bd9Sstevel@tonic-gate 	return (getw(fb));
2987c478bd9Sstevel@tonic-gate }
2997c478bd9Sstevel@tonic-gate 
300*11a8fa6cSceastha static int
301*11a8fa6cSceastha hcomp(int n1, int n2)
3027c478bd9Sstevel@tonic-gate {
3037c478bd9Sstevel@tonic-gate 	return (hfreq[hh[n1]] <= hfreq[hh[n2]]);
3047c478bd9Sstevel@tonic-gate }
3057c478bd9Sstevel@tonic-gate 
306*11a8fa6cSceastha static void
307*11a8fa6cSceastha hexch(int n1, int n2)
3087c478bd9Sstevel@tonic-gate {
3097c478bd9Sstevel@tonic-gate 	int t;
3107c478bd9Sstevel@tonic-gate 	t = hh[n1];
3117c478bd9Sstevel@tonic-gate 	hh[n1] = hh[n2];
3127c478bd9Sstevel@tonic-gate 	hh[n2] = t;
3137c478bd9Sstevel@tonic-gate }
314