xref: /illumos-gate/usr/src/cmd/refer/hunt2.c (revision 2a8bcb4efb45d99ac41c94a75c396b362c414f7f)
1*11a8fa6cSceastha /*
2*11a8fa6cSceastha  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
3*11a8fa6cSceastha  * Use is subject to license terms.
4*11a8fa6cSceastha  */
5*11a8fa6cSceastha 
67c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
77c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
87c478bd9Sstevel@tonic-gate 
97c478bd9Sstevel@tonic-gate /*
107c478bd9Sstevel@tonic-gate  * Copyright (c) 1980 Regents of the University of California.
117c478bd9Sstevel@tonic-gate  * All rights reserved. The Berkeley software License Agreement
127c478bd9Sstevel@tonic-gate  * specifies the terms and conditions for redistribution.
137c478bd9Sstevel@tonic-gate  */
147c478bd9Sstevel@tonic-gate 
157c478bd9Sstevel@tonic-gate #include "refer..c"
167c478bd9Sstevel@tonic-gate 
177c478bd9Sstevel@tonic-gate static int *coord = 0;
187c478bd9Sstevel@tonic-gate int hh[50];
19*11a8fa6cSceastha extern int *hfreq, hfrflg;
207c478bd9Sstevel@tonic-gate extern int prfreqs;
217c478bd9Sstevel@tonic-gate union ptr {
227c478bd9Sstevel@tonic-gate 	unsigned *a;
237c478bd9Sstevel@tonic-gate 	long *b;
247c478bd9Sstevel@tonic-gate };
257c478bd9Sstevel@tonic-gate 
26*11a8fa6cSceastha extern int hash();
27*11a8fa6cSceastha extern void shell();
28*11a8fa6cSceastha extern void *zalloc();
29*11a8fa6cSceastha 
30*11a8fa6cSceastha static long getl(FILE *);
31*11a8fa6cSceastha static int hcomp(int, int);
32*11a8fa6cSceastha static void hexch(int, int);
33*11a8fa6cSceastha 
34*11a8fa6cSceastha int
doquery(long * hpt,int nhash,FILE * fb,int nitem,char * qitem[],unsigned * mptr)35*11a8fa6cSceastha doquery(long *hpt, int nhash, FILE *fb, int nitem,
36*11a8fa6cSceastha 	    char *qitem[], unsigned *mptr)
377c478bd9Sstevel@tonic-gate {
387c478bd9Sstevel@tonic-gate 	long k;
397c478bd9Sstevel@tonic-gate 	union ptr prevdrop, master;
407c478bd9Sstevel@tonic-gate 	int nf = 0, best = 0, nterm = 0, i, g, j;
417c478bd9Sstevel@tonic-gate 	int *prevcoord;
427c478bd9Sstevel@tonic-gate 	long lp;
437c478bd9Sstevel@tonic-gate 	extern int lmaster, colevel, reached;
447c478bd9Sstevel@tonic-gate 	extern int iflong;
457c478bd9Sstevel@tonic-gate 
467c478bd9Sstevel@tonic-gate 	if (iflong) {
477c478bd9Sstevel@tonic-gate 		master.b = (long *)mptr;
48*11a8fa6cSceastha 	} else {
497c478bd9Sstevel@tonic-gate 		master.a = mptr;
507c478bd9Sstevel@tonic-gate 	}
517c478bd9Sstevel@tonic-gate 
527c478bd9Sstevel@tonic-gate #if D1
537c478bd9Sstevel@tonic-gate 	fprintf(stderr, "entering doquery nitem %d\n", nitem);
54*11a8fa6cSceastha 	fprintf(stderr, "first few hashes are %ld %ld %ld %ld %ld\n",
55*11a8fa6cSceastha 	    hpt[0], hpt[1], hpt[2], hpt[3], hpt[4]);
56*11a8fa6cSceastha 	fprintf(stderr, "and frequencies are  %d %d %d %d %d\n",
57*11a8fa6cSceastha 	    hfreq[0], hfreq[1], hfreq[2], hfreq[3], hfreq[4]);
587c478bd9Sstevel@tonic-gate #endif
597c478bd9Sstevel@tonic-gate 	assert(lmaster > 0);
607c478bd9Sstevel@tonic-gate 	if (coord == 0)
617c478bd9Sstevel@tonic-gate 		coord = (int *)zalloc(lmaster, sizeof (lmaster));
62*11a8fa6cSceastha 	if (colevel > 0) {
637c478bd9Sstevel@tonic-gate 		if (iflong)
647c478bd9Sstevel@tonic-gate 			prevdrop.b = (long *)zalloc(lmaster, sizeof (long));
657c478bd9Sstevel@tonic-gate 		else
667c478bd9Sstevel@tonic-gate 			prevdrop.a = (unsigned *)zalloc(lmaster, sizeof (int));
677c478bd9Sstevel@tonic-gate 		prevcoord = (int *)zalloc(lmaster, sizeof (lmaster));
68*11a8fa6cSceastha 	} else {
697c478bd9Sstevel@tonic-gate 		prevdrop.a = master.a;
707c478bd9Sstevel@tonic-gate 		prevcoord = coord;
717c478bd9Sstevel@tonic-gate 	}
727c478bd9Sstevel@tonic-gate #if D1
737c478bd9Sstevel@tonic-gate 	fprintf(stderr, "nitem %d\n", nitem);
747c478bd9Sstevel@tonic-gate #endif
75*11a8fa6cSceastha 	for (i = 0; i < nitem; i++) {
767c478bd9Sstevel@tonic-gate 		hh[i] = hash(qitem[i])%nhash;
777c478bd9Sstevel@tonic-gate #if D1
787c478bd9Sstevel@tonic-gate 		fprintf(stderr, "query wd X%sX has hash %d\n", qitem[i], hh[i]);
797c478bd9Sstevel@tonic-gate #endif
807c478bd9Sstevel@tonic-gate 	}
817c478bd9Sstevel@tonic-gate #if D1
827c478bd9Sstevel@tonic-gate 	fprintf(stderr, "past that loop nhash %d hpt is %lo\n", nhash, hpt);
837c478bd9Sstevel@tonic-gate #endif
847c478bd9Sstevel@tonic-gate 	if (prfreqs)
857c478bd9Sstevel@tonic-gate 		for (i = 0; i < nitem; i++)
86*11a8fa6cSceastha 			fprintf(stderr, "item %s hash %d hfreq %d\n",
87*11a8fa6cSceastha 			    qitem[i], hh[i], hfreq[hh[i]]);
887c478bd9Sstevel@tonic-gate 	/* if possible, sort query into decreasing frequency of hashes */
897c478bd9Sstevel@tonic-gate 	if (hfrflg)
907c478bd9Sstevel@tonic-gate 		shell(nitem, hcomp, hexch);
917c478bd9Sstevel@tonic-gate #if D1
927c478bd9Sstevel@tonic-gate 	for (i = 0; i < nitem; i++)
937c478bd9Sstevel@tonic-gate 		fprintf(stderr, "item hash %d frq %d\n", hh[i], hfreq[hh[i]]);
947c478bd9Sstevel@tonic-gate #endif
957c478bd9Sstevel@tonic-gate 	lp = hpt [hh[0]];
967c478bd9Sstevel@tonic-gate #if D1
977c478bd9Sstevel@tonic-gate 	fprintf(stderr, "first item hash %d lp %ld 0%lo\n", hh[0], lp, lp);
987c478bd9Sstevel@tonic-gate #endif
997c478bd9Sstevel@tonic-gate 	assert(fb != NULL);
1007c478bd9Sstevel@tonic-gate 	assert(fseek(fb, lp, 0) != -1);
101*11a8fa6cSceastha 	for (i = 0; i < lmaster; i++) {
1027c478bd9Sstevel@tonic-gate 		if (iflong)
1037c478bd9Sstevel@tonic-gate 			master.b[i] = getl(fb);
1047c478bd9Sstevel@tonic-gate 		else
1057c478bd9Sstevel@tonic-gate 			master.a[i] = getw(fb);
1067c478bd9Sstevel@tonic-gate 		coord[i] = 1;
1077c478bd9Sstevel@tonic-gate #if D2
1087c478bd9Sstevel@tonic-gate 		if (iflong)
1097c478bd9Sstevel@tonic-gate 			fprintf(stderr, "master has %ld\n", (master.b[i]));
1107c478bd9Sstevel@tonic-gate 		else
1117c478bd9Sstevel@tonic-gate 			fprintf(stderr, "master has %d\n", (master.a[i]));
1127c478bd9Sstevel@tonic-gate #endif
1137c478bd9Sstevel@tonic-gate 		assert(i < lmaster);
114*11a8fa6cSceastha 		if (iflong) {
1157c478bd9Sstevel@tonic-gate 			if (master.b[i] == -1L) break;
116*11a8fa6cSceastha 		} else {
1177c478bd9Sstevel@tonic-gate 			if (master.a[i] == -1) break;
1187c478bd9Sstevel@tonic-gate 		}
1197c478bd9Sstevel@tonic-gate 	}
1207c478bd9Sstevel@tonic-gate 	nf = i;
121*11a8fa6cSceastha 	for (nterm = 1; nterm < nitem; nterm++) {
1227c478bd9Sstevel@tonic-gate #ifdef D1
1237c478bd9Sstevel@tonic-gate 		fprintf(stderr, "item %d, hash %d\n", nterm, hh[nterm]);
1247c478bd9Sstevel@tonic-gate #endif
125*11a8fa6cSceastha 		if (colevel > 0) {
126*11a8fa6cSceastha 			for (j = 0; j < nf; j++) {
1277c478bd9Sstevel@tonic-gate 				if (iflong)
1287c478bd9Sstevel@tonic-gate 					prevdrop.b[j] = master.b[j];
1297c478bd9Sstevel@tonic-gate 				else
1307c478bd9Sstevel@tonic-gate 					prevdrop.a[j] = master.a[j];
1317c478bd9Sstevel@tonic-gate 				prevcoord[j] = coord[j];
1327c478bd9Sstevel@tonic-gate 			}
1337c478bd9Sstevel@tonic-gate 		}
1347c478bd9Sstevel@tonic-gate 		lp = hpt[hh[nterm]];
1357c478bd9Sstevel@tonic-gate 		assert(fseek(fb, lp, 0) != -1);
1367c478bd9Sstevel@tonic-gate #if D1
137*11a8fa6cSceastha 		fprintf(stderr, "item %d hash %d seek to %ld\n",
138*11a8fa6cSceastha 		    nterm, hh[nterm], lp);
1397c478bd9Sstevel@tonic-gate #endif
1407c478bd9Sstevel@tonic-gate 		g = j = 0;
141*11a8fa6cSceastha 		while (1) {
1427c478bd9Sstevel@tonic-gate 			if (iflong)
1437c478bd9Sstevel@tonic-gate 				k = getl(fb);
1447c478bd9Sstevel@tonic-gate 			else
1457c478bd9Sstevel@tonic-gate 				k = getw(fb);
1467c478bd9Sstevel@tonic-gate 			if (k == -1) break;
1477c478bd9Sstevel@tonic-gate #if D2
1487c478bd9Sstevel@tonic-gate 			fprintf(stderr, "next term finds %ld\n", k);
1497c478bd9Sstevel@tonic-gate #endif
1507c478bd9Sstevel@tonic-gate #if D3
1517c478bd9Sstevel@tonic-gate 			if (iflong)
152*11a8fa6cSceastha 				fprintf(stderr,
153*11a8fa6cSceastha 				    "bfwh j %d nf %d master %ld k %ld\n",
154*11a8fa6cSceastha 				    j, nf, prevdrop.b[j], (long)(k));
1557c478bd9Sstevel@tonic-gate 			else
156*11a8fa6cSceastha 				fprintf(stderr,
157*11a8fa6cSceastha 				    "bfwh j %d nf %d master %ld k %ld\n",
158*11a8fa6cSceastha 				    j, nf, prevdrop.a[j], (long)(k));
1597c478bd9Sstevel@tonic-gate #endif
160*11a8fa6cSceastha 			while (j < nf &&
161*11a8fa6cSceastha 			    (iflong ? prevdrop.b[j] : prevdrop.a[j]) < k) {
1627c478bd9Sstevel@tonic-gate #if D3
1637c478bd9Sstevel@tonic-gate 				if (iflong)
164*11a8fa6cSceastha 					fprintf(stderr, "j %d nf %d prevdrop "
165*11a8fa6cSceastha 					    "%ld prevcoord %d colevel %d nterm "
166*11a8fa6cSceastha 					    "%d k %ld\n", j, nf, prevdrop.b[j],
167*11a8fa6cSceastha 					    prevcoord[j], colevel, nterm,
168*11a8fa6cSceastha 					    (long)(k));
1697c478bd9Sstevel@tonic-gate 				else
170*11a8fa6cSceastha 					fprintf(stderr, "j %d nf %d prevdrop "
171*11a8fa6cSceastha 					    "%ld prevcoord %d colevel %d nterm "
172*11a8fa6cSceastha 					    "%d k %ld\n", j, nf, prevdrop.a[j],
173*11a8fa6cSceastha 					    prevcoord[j], colevel, nterm,
174*11a8fa6cSceastha 					    (long)(k));
1757c478bd9Sstevel@tonic-gate #endif
1767c478bd9Sstevel@tonic-gate 				if (prevcoord[j] + colevel <= nterm)
1777c478bd9Sstevel@tonic-gate 					j++;
178*11a8fa6cSceastha 				else {
1797c478bd9Sstevel@tonic-gate 					assert(g < lmaster);
1807c478bd9Sstevel@tonic-gate 					if (iflong)
1817c478bd9Sstevel@tonic-gate 						master.b[g] = prevdrop.b[j];
1827c478bd9Sstevel@tonic-gate 					else
1837c478bd9Sstevel@tonic-gate 						master.a[g] = prevdrop.a[j];
1847c478bd9Sstevel@tonic-gate 					coord[g++] = prevcoord[j++];
1857c478bd9Sstevel@tonic-gate #if D1
1867c478bd9Sstevel@tonic-gate 					if (iflong)
187*11a8fa6cSceastha 						fprintf(stderr, " not skip g "
188*11a8fa6cSceastha 						    "%d doc %d coord %d note "
189*11a8fa6cSceastha 						    "%d\n", g, master.b[g-1],
190*11a8fa6cSceastha 						    coord[g-1], master.b[j-1]);
1917c478bd9Sstevel@tonic-gate 					else
192*11a8fa6cSceastha 						fprintf(stderr, " not skip g "
193*11a8fa6cSceastha 						    "%d doc %ld coord %d nterm "
194*11a8fa6cSceastha 						    "%d\n", g, master.a[g-1],
195*11a8fa6cSceastha 						    coord[g-1], nterm);
1967c478bd9Sstevel@tonic-gate #endif
1977c478bd9Sstevel@tonic-gate 					continue;
1987c478bd9Sstevel@tonic-gate 				}
1997c478bd9Sstevel@tonic-gate 			}
2007c478bd9Sstevel@tonic-gate 			if (colevel == 0 && j >= nf) break;
201*11a8fa6cSceastha 			if (j < nf &&
202*11a8fa6cSceastha 			    (iflong ? prevdrop.b[j] : prevdrop.a[j]) == k) {
2037c478bd9Sstevel@tonic-gate 				if (iflong)
2047c478bd9Sstevel@tonic-gate 					master.b[g] = k;
2057c478bd9Sstevel@tonic-gate 				else
2067c478bd9Sstevel@tonic-gate 					master.a[g] = k;
2077c478bd9Sstevel@tonic-gate 				coord[g++] = prevcoord[j++]+1;
2087c478bd9Sstevel@tonic-gate #if D1
2097c478bd9Sstevel@tonic-gate 				if (iflong)
210*11a8fa6cSceastha 					fprintf(stderr, " at g %d item %ld "
211*11a8fa6cSceastha 					    "coord %d note %ld\n", g,
212*11a8fa6cSceastha 					    master.b[g-1], coord[g-1],
213*11a8fa6cSceastha 					    master.b[j-1]);
2147c478bd9Sstevel@tonic-gate 				else
215*11a8fa6cSceastha 					fprintf(stderr, " at g %d item %d "
216*11a8fa6cSceastha 					    "coord %d note %d\n", g,
217*11a8fa6cSceastha 					    master.a[g-1], coord[g-1],
218*11a8fa6cSceastha 					    master.a[j-1]);
2197c478bd9Sstevel@tonic-gate #endif
220*11a8fa6cSceastha 			} else
221*11a8fa6cSceastha 				if (colevel >= nterm) {
2227c478bd9Sstevel@tonic-gate 					if (iflong)
2237c478bd9Sstevel@tonic-gate 						master.b[g] = k;
2247c478bd9Sstevel@tonic-gate 					else
2257c478bd9Sstevel@tonic-gate 						master.a[g] = k;
2267c478bd9Sstevel@tonic-gate 					coord[g++] = 1;
2277c478bd9Sstevel@tonic-gate 				}
2287c478bd9Sstevel@tonic-gate 		}
2297c478bd9Sstevel@tonic-gate #if D1
2307c478bd9Sstevel@tonic-gate 		fprintf(stderr, "now have %d items\n", g);
2317c478bd9Sstevel@tonic-gate #endif
2327c478bd9Sstevel@tonic-gate 		if (colevel > 0)
2337c478bd9Sstevel@tonic-gate 			for (; j < nf; j++)
234*11a8fa6cSceastha 				if ((iflong ? prevdrop.b[j] : prevdrop.a[j]) +
235*11a8fa6cSceastha 				    colevel > nterm) {
2367c478bd9Sstevel@tonic-gate 					assert(g < lmaster);
2377c478bd9Sstevel@tonic-gate 					if (iflong)
2387c478bd9Sstevel@tonic-gate 						master.b[g] = prevdrop.b[j];
2397c478bd9Sstevel@tonic-gate 					else
2407c478bd9Sstevel@tonic-gate 						master.a[g] = prevdrop.a[j];
2417c478bd9Sstevel@tonic-gate 					coord[g++] = prevcoord[j];
2427c478bd9Sstevel@tonic-gate #if D3
2437c478bd9Sstevel@tonic-gate 					if (iflong)
244*11a8fa6cSceastha 						fprintf(stderr, "copied over "
245*11a8fa6cSceastha 						    "%ld coord %d\n",
246*11a8fa6cSceastha 						    master.b[g-1], coord[g-1]);
2477c478bd9Sstevel@tonic-gate 					else
248*11a8fa6cSceastha 						fprintf(stderr, "copied over "
249*11a8fa6cSceastha 						    "%d coord %d\n",
250*11a8fa6cSceastha 						    master.a[g-1], coord[g-1]);
2517c478bd9Sstevel@tonic-gate #endif
2527c478bd9Sstevel@tonic-gate 				}
2537c478bd9Sstevel@tonic-gate 		nf = g;
2547c478bd9Sstevel@tonic-gate 	}
255*11a8fa6cSceastha 	if (colevel > 0) {
2567c478bd9Sstevel@tonic-gate 		best = 0;
2577c478bd9Sstevel@tonic-gate 		for (j = 0; j < nf; j++)
2587c478bd9Sstevel@tonic-gate 			if (coord[j] > best) best = coord[j];
2597c478bd9Sstevel@tonic-gate #if D1
2607c478bd9Sstevel@tonic-gate 		fprintf(stderr, "colevel %d best %d\n", colevel, best);
2617c478bd9Sstevel@tonic-gate #endif
2627c478bd9Sstevel@tonic-gate 		reached = best;
2637c478bd9Sstevel@tonic-gate 		for (g = j = 0; j < nf; j++)
264*11a8fa6cSceastha 			if (coord[j] == best) {
2657c478bd9Sstevel@tonic-gate 				if (iflong)
2667c478bd9Sstevel@tonic-gate 					master.b[g++] = master.b[j];
2677c478bd9Sstevel@tonic-gate 				else
2687c478bd9Sstevel@tonic-gate 					master.a[g++] = master.a[j];
2697c478bd9Sstevel@tonic-gate 			}
2707c478bd9Sstevel@tonic-gate 		nf = g;
2717c478bd9Sstevel@tonic-gate #if D1
2727c478bd9Sstevel@tonic-gate 		fprintf(stderr, "yet got %d\n", nf);
2737c478bd9Sstevel@tonic-gate #endif
2747c478bd9Sstevel@tonic-gate 	}
2757c478bd9Sstevel@tonic-gate #ifdef D1
2767c478bd9Sstevel@tonic-gate 	fprintf(stderr, " returning with %d\n", nf);
2777c478bd9Sstevel@tonic-gate #endif
278*11a8fa6cSceastha 	if (colevel) {
2797c478bd9Sstevel@tonic-gate 		free(prevdrop, lmaster, iflong ? sizeof (long): sizeof (int));
2807c478bd9Sstevel@tonic-gate 		free(prevcoord, lmaster, sizeof (lmaster));
2817c478bd9Sstevel@tonic-gate 	}
2827c478bd9Sstevel@tonic-gate #if D3
2837c478bd9Sstevel@tonic-gate 	for (g = 0; g < nf; g++)
2847c478bd9Sstevel@tonic-gate 		if (iflong)
2857c478bd9Sstevel@tonic-gate 			fprintf(stderr, ":%ld\n", master.b[g]);
2867c478bd9Sstevel@tonic-gate 		else
2877c478bd9Sstevel@tonic-gate 			fprintf(stderr, ":%d\n", master.a[g]);
2887c478bd9Sstevel@tonic-gate #endif
2897c478bd9Sstevel@tonic-gate 	return (nf);
2907c478bd9Sstevel@tonic-gate }
2917c478bd9Sstevel@tonic-gate 
292*11a8fa6cSceastha static long
getl(FILE * fb)293*11a8fa6cSceastha getl(FILE *fb)
2947c478bd9Sstevel@tonic-gate {
2957c478bd9Sstevel@tonic-gate 	return (getw(fb));
2967c478bd9Sstevel@tonic-gate }
2977c478bd9Sstevel@tonic-gate 
298*11a8fa6cSceastha static int
hcomp(int n1,int n2)299*11a8fa6cSceastha hcomp(int n1, int n2)
3007c478bd9Sstevel@tonic-gate {
3017c478bd9Sstevel@tonic-gate 	return (hfreq[hh[n1]] <= hfreq[hh[n2]]);
3027c478bd9Sstevel@tonic-gate }
3037c478bd9Sstevel@tonic-gate 
304*11a8fa6cSceastha static void
hexch(int n1,int n2)305*11a8fa6cSceastha hexch(int n1, int n2)
3067c478bd9Sstevel@tonic-gate {
3077c478bd9Sstevel@tonic-gate 	int t;
3087c478bd9Sstevel@tonic-gate 	t = hh[n1];
3097c478bd9Sstevel@tonic-gate 	hh[n1] = hh[n2];
3107c478bd9Sstevel@tonic-gate 	hh[n2] = t;
3117c478bd9Sstevel@tonic-gate }
312