1*11a8fa6cSceastha /*
2*11a8fa6cSceastha * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
3*11a8fa6cSceastha * Use is subject to license terms.
4*11a8fa6cSceastha */
5*11a8fa6cSceastha
67c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
77c478bd9Sstevel@tonic-gate /* All Rights Reserved */
87c478bd9Sstevel@tonic-gate
97c478bd9Sstevel@tonic-gate /*
107c478bd9Sstevel@tonic-gate * Copyright (c) 1980 Regents of the University of California.
117c478bd9Sstevel@tonic-gate * All rights reserved. The Berkeley software License Agreement
127c478bd9Sstevel@tonic-gate * specifies the terms and conditions for redistribution.
137c478bd9Sstevel@tonic-gate */
147c478bd9Sstevel@tonic-gate
157c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI"
167c478bd9Sstevel@tonic-gate
177c478bd9Sstevel@tonic-gate #include "refer..c"
187c478bd9Sstevel@tonic-gate
197c478bd9Sstevel@tonic-gate static int *coord = 0;
207c478bd9Sstevel@tonic-gate int hh[50];
21*11a8fa6cSceastha extern int *hfreq, hfrflg;
227c478bd9Sstevel@tonic-gate extern int prfreqs;
237c478bd9Sstevel@tonic-gate union ptr {
247c478bd9Sstevel@tonic-gate unsigned *a;
257c478bd9Sstevel@tonic-gate long *b;
267c478bd9Sstevel@tonic-gate };
277c478bd9Sstevel@tonic-gate
28*11a8fa6cSceastha extern int hash();
29*11a8fa6cSceastha extern void shell();
30*11a8fa6cSceastha extern void *zalloc();
31*11a8fa6cSceastha
32*11a8fa6cSceastha static long getl(FILE *);
33*11a8fa6cSceastha static int hcomp(int, int);
34*11a8fa6cSceastha static void hexch(int, int);
35*11a8fa6cSceastha
36*11a8fa6cSceastha int
doquery(long * hpt,int nhash,FILE * fb,int nitem,char * qitem[],unsigned * mptr)37*11a8fa6cSceastha doquery(long *hpt, int nhash, FILE *fb, int nitem,
38*11a8fa6cSceastha char *qitem[], unsigned *mptr)
397c478bd9Sstevel@tonic-gate {
407c478bd9Sstevel@tonic-gate long k;
417c478bd9Sstevel@tonic-gate union ptr prevdrop, master;
427c478bd9Sstevel@tonic-gate int nf = 0, best = 0, nterm = 0, i, g, j;
437c478bd9Sstevel@tonic-gate int *prevcoord;
447c478bd9Sstevel@tonic-gate long lp;
457c478bd9Sstevel@tonic-gate extern int lmaster, colevel, reached;
467c478bd9Sstevel@tonic-gate extern int iflong;
477c478bd9Sstevel@tonic-gate
487c478bd9Sstevel@tonic-gate if (iflong) {
497c478bd9Sstevel@tonic-gate master.b = (long *)mptr;
50*11a8fa6cSceastha } else {
517c478bd9Sstevel@tonic-gate master.a = mptr;
527c478bd9Sstevel@tonic-gate }
537c478bd9Sstevel@tonic-gate
547c478bd9Sstevel@tonic-gate #if D1
557c478bd9Sstevel@tonic-gate fprintf(stderr, "entering doquery nitem %d\n", nitem);
56*11a8fa6cSceastha fprintf(stderr, "first few hashes are %ld %ld %ld %ld %ld\n",
57*11a8fa6cSceastha hpt[0], hpt[1], hpt[2], hpt[3], hpt[4]);
58*11a8fa6cSceastha fprintf(stderr, "and frequencies are %d %d %d %d %d\n",
59*11a8fa6cSceastha hfreq[0], hfreq[1], hfreq[2], hfreq[3], hfreq[4]);
607c478bd9Sstevel@tonic-gate #endif
617c478bd9Sstevel@tonic-gate assert(lmaster > 0);
627c478bd9Sstevel@tonic-gate if (coord == 0)
637c478bd9Sstevel@tonic-gate coord = (int *)zalloc(lmaster, sizeof (lmaster));
64*11a8fa6cSceastha if (colevel > 0) {
657c478bd9Sstevel@tonic-gate if (iflong)
667c478bd9Sstevel@tonic-gate prevdrop.b = (long *)zalloc(lmaster, sizeof (long));
677c478bd9Sstevel@tonic-gate else
687c478bd9Sstevel@tonic-gate prevdrop.a = (unsigned *)zalloc(lmaster, sizeof (int));
697c478bd9Sstevel@tonic-gate prevcoord = (int *)zalloc(lmaster, sizeof (lmaster));
70*11a8fa6cSceastha } else {
717c478bd9Sstevel@tonic-gate prevdrop.a = master.a;
727c478bd9Sstevel@tonic-gate prevcoord = coord;
737c478bd9Sstevel@tonic-gate }
747c478bd9Sstevel@tonic-gate #if D1
757c478bd9Sstevel@tonic-gate fprintf(stderr, "nitem %d\n", nitem);
767c478bd9Sstevel@tonic-gate #endif
77*11a8fa6cSceastha for (i = 0; i < nitem; i++) {
787c478bd9Sstevel@tonic-gate hh[i] = hash(qitem[i])%nhash;
797c478bd9Sstevel@tonic-gate #if D1
807c478bd9Sstevel@tonic-gate fprintf(stderr, "query wd X%sX has hash %d\n", qitem[i], hh[i]);
817c478bd9Sstevel@tonic-gate #endif
827c478bd9Sstevel@tonic-gate }
837c478bd9Sstevel@tonic-gate #if D1
847c478bd9Sstevel@tonic-gate fprintf(stderr, "past that loop nhash %d hpt is %lo\n", nhash, hpt);
857c478bd9Sstevel@tonic-gate #endif
867c478bd9Sstevel@tonic-gate if (prfreqs)
877c478bd9Sstevel@tonic-gate for (i = 0; i < nitem; i++)
88*11a8fa6cSceastha fprintf(stderr, "item %s hash %d hfreq %d\n",
89*11a8fa6cSceastha qitem[i], hh[i], hfreq[hh[i]]);
907c478bd9Sstevel@tonic-gate /* if possible, sort query into decreasing frequency of hashes */
917c478bd9Sstevel@tonic-gate if (hfrflg)
927c478bd9Sstevel@tonic-gate shell(nitem, hcomp, hexch);
937c478bd9Sstevel@tonic-gate #if D1
947c478bd9Sstevel@tonic-gate for (i = 0; i < nitem; i++)
957c478bd9Sstevel@tonic-gate fprintf(stderr, "item hash %d frq %d\n", hh[i], hfreq[hh[i]]);
967c478bd9Sstevel@tonic-gate #endif
977c478bd9Sstevel@tonic-gate lp = hpt [hh[0]];
987c478bd9Sstevel@tonic-gate #if D1
997c478bd9Sstevel@tonic-gate fprintf(stderr, "first item hash %d lp %ld 0%lo\n", hh[0], lp, lp);
1007c478bd9Sstevel@tonic-gate #endif
1017c478bd9Sstevel@tonic-gate assert(fb != NULL);
1027c478bd9Sstevel@tonic-gate assert(fseek(fb, lp, 0) != -1);
103*11a8fa6cSceastha for (i = 0; i < lmaster; i++) {
1047c478bd9Sstevel@tonic-gate if (iflong)
1057c478bd9Sstevel@tonic-gate master.b[i] = getl(fb);
1067c478bd9Sstevel@tonic-gate else
1077c478bd9Sstevel@tonic-gate master.a[i] = getw(fb);
1087c478bd9Sstevel@tonic-gate coord[i] = 1;
1097c478bd9Sstevel@tonic-gate #if D2
1107c478bd9Sstevel@tonic-gate if (iflong)
1117c478bd9Sstevel@tonic-gate fprintf(stderr, "master has %ld\n", (master.b[i]));
1127c478bd9Sstevel@tonic-gate else
1137c478bd9Sstevel@tonic-gate fprintf(stderr, "master has %d\n", (master.a[i]));
1147c478bd9Sstevel@tonic-gate #endif
1157c478bd9Sstevel@tonic-gate assert(i < lmaster);
116*11a8fa6cSceastha if (iflong) {
1177c478bd9Sstevel@tonic-gate if (master.b[i] == -1L) break;
118*11a8fa6cSceastha } else {
1197c478bd9Sstevel@tonic-gate if (master.a[i] == -1) break;
1207c478bd9Sstevel@tonic-gate }
1217c478bd9Sstevel@tonic-gate }
1227c478bd9Sstevel@tonic-gate nf = i;
123*11a8fa6cSceastha for (nterm = 1; nterm < nitem; nterm++) {
1247c478bd9Sstevel@tonic-gate #ifdef D1
1257c478bd9Sstevel@tonic-gate fprintf(stderr, "item %d, hash %d\n", nterm, hh[nterm]);
1267c478bd9Sstevel@tonic-gate #endif
127*11a8fa6cSceastha if (colevel > 0) {
128*11a8fa6cSceastha for (j = 0; j < nf; j++) {
1297c478bd9Sstevel@tonic-gate if (iflong)
1307c478bd9Sstevel@tonic-gate prevdrop.b[j] = master.b[j];
1317c478bd9Sstevel@tonic-gate else
1327c478bd9Sstevel@tonic-gate prevdrop.a[j] = master.a[j];
1337c478bd9Sstevel@tonic-gate prevcoord[j] = coord[j];
1347c478bd9Sstevel@tonic-gate }
1357c478bd9Sstevel@tonic-gate }
1367c478bd9Sstevel@tonic-gate lp = hpt[hh[nterm]];
1377c478bd9Sstevel@tonic-gate assert(fseek(fb, lp, 0) != -1);
1387c478bd9Sstevel@tonic-gate #if D1
139*11a8fa6cSceastha fprintf(stderr, "item %d hash %d seek to %ld\n",
140*11a8fa6cSceastha nterm, hh[nterm], lp);
1417c478bd9Sstevel@tonic-gate #endif
1427c478bd9Sstevel@tonic-gate g = j = 0;
143*11a8fa6cSceastha while (1) {
1447c478bd9Sstevel@tonic-gate if (iflong)
1457c478bd9Sstevel@tonic-gate k = getl(fb);
1467c478bd9Sstevel@tonic-gate else
1477c478bd9Sstevel@tonic-gate k = getw(fb);
1487c478bd9Sstevel@tonic-gate if (k == -1) break;
1497c478bd9Sstevel@tonic-gate #if D2
1507c478bd9Sstevel@tonic-gate fprintf(stderr, "next term finds %ld\n", k);
1517c478bd9Sstevel@tonic-gate #endif
1527c478bd9Sstevel@tonic-gate #if D3
1537c478bd9Sstevel@tonic-gate if (iflong)
154*11a8fa6cSceastha fprintf(stderr,
155*11a8fa6cSceastha "bfwh j %d nf %d master %ld k %ld\n",
156*11a8fa6cSceastha j, nf, prevdrop.b[j], (long)(k));
1577c478bd9Sstevel@tonic-gate else
158*11a8fa6cSceastha fprintf(stderr,
159*11a8fa6cSceastha "bfwh j %d nf %d master %ld k %ld\n",
160*11a8fa6cSceastha j, nf, prevdrop.a[j], (long)(k));
1617c478bd9Sstevel@tonic-gate #endif
162*11a8fa6cSceastha while (j < nf &&
163*11a8fa6cSceastha (iflong ? prevdrop.b[j] : prevdrop.a[j]) < k) {
1647c478bd9Sstevel@tonic-gate #if D3
1657c478bd9Sstevel@tonic-gate if (iflong)
166*11a8fa6cSceastha fprintf(stderr, "j %d nf %d prevdrop "
167*11a8fa6cSceastha "%ld prevcoord %d colevel %d nterm "
168*11a8fa6cSceastha "%d k %ld\n", j, nf, prevdrop.b[j],
169*11a8fa6cSceastha prevcoord[j], colevel, nterm,
170*11a8fa6cSceastha (long)(k));
1717c478bd9Sstevel@tonic-gate else
172*11a8fa6cSceastha fprintf(stderr, "j %d nf %d prevdrop "
173*11a8fa6cSceastha "%ld prevcoord %d colevel %d nterm "
174*11a8fa6cSceastha "%d k %ld\n", j, nf, prevdrop.a[j],
175*11a8fa6cSceastha prevcoord[j], colevel, nterm,
176*11a8fa6cSceastha (long)(k));
1777c478bd9Sstevel@tonic-gate #endif
1787c478bd9Sstevel@tonic-gate if (prevcoord[j] + colevel <= nterm)
1797c478bd9Sstevel@tonic-gate j++;
180*11a8fa6cSceastha else {
1817c478bd9Sstevel@tonic-gate assert(g < lmaster);
1827c478bd9Sstevel@tonic-gate if (iflong)
1837c478bd9Sstevel@tonic-gate master.b[g] = prevdrop.b[j];
1847c478bd9Sstevel@tonic-gate else
1857c478bd9Sstevel@tonic-gate master.a[g] = prevdrop.a[j];
1867c478bd9Sstevel@tonic-gate coord[g++] = prevcoord[j++];
1877c478bd9Sstevel@tonic-gate #if D1
1887c478bd9Sstevel@tonic-gate if (iflong)
189*11a8fa6cSceastha fprintf(stderr, " not skip g "
190*11a8fa6cSceastha "%d doc %d coord %d note "
191*11a8fa6cSceastha "%d\n", g, master.b[g-1],
192*11a8fa6cSceastha coord[g-1], master.b[j-1]);
1937c478bd9Sstevel@tonic-gate else
194*11a8fa6cSceastha fprintf(stderr, " not skip g "
195*11a8fa6cSceastha "%d doc %ld coord %d nterm "
196*11a8fa6cSceastha "%d\n", g, master.a[g-1],
197*11a8fa6cSceastha coord[g-1], nterm);
1987c478bd9Sstevel@tonic-gate #endif
1997c478bd9Sstevel@tonic-gate continue;
2007c478bd9Sstevel@tonic-gate }
2017c478bd9Sstevel@tonic-gate }
2027c478bd9Sstevel@tonic-gate if (colevel == 0 && j >= nf) break;
203*11a8fa6cSceastha if (j < nf &&
204*11a8fa6cSceastha (iflong ? prevdrop.b[j] : prevdrop.a[j]) == k) {
2057c478bd9Sstevel@tonic-gate if (iflong)
2067c478bd9Sstevel@tonic-gate master.b[g] = k;
2077c478bd9Sstevel@tonic-gate else
2087c478bd9Sstevel@tonic-gate master.a[g] = k;
2097c478bd9Sstevel@tonic-gate coord[g++] = prevcoord[j++]+1;
2107c478bd9Sstevel@tonic-gate #if D1
2117c478bd9Sstevel@tonic-gate if (iflong)
212*11a8fa6cSceastha fprintf(stderr, " at g %d item %ld "
213*11a8fa6cSceastha "coord %d note %ld\n", g,
214*11a8fa6cSceastha master.b[g-1], coord[g-1],
215*11a8fa6cSceastha master.b[j-1]);
2167c478bd9Sstevel@tonic-gate else
217*11a8fa6cSceastha fprintf(stderr, " at g %d item %d "
218*11a8fa6cSceastha "coord %d note %d\n", g,
219*11a8fa6cSceastha master.a[g-1], coord[g-1],
220*11a8fa6cSceastha master.a[j-1]);
2217c478bd9Sstevel@tonic-gate #endif
222*11a8fa6cSceastha } else
223*11a8fa6cSceastha if (colevel >= nterm) {
2247c478bd9Sstevel@tonic-gate if (iflong)
2257c478bd9Sstevel@tonic-gate master.b[g] = k;
2267c478bd9Sstevel@tonic-gate else
2277c478bd9Sstevel@tonic-gate master.a[g] = k;
2287c478bd9Sstevel@tonic-gate coord[g++] = 1;
2297c478bd9Sstevel@tonic-gate }
2307c478bd9Sstevel@tonic-gate }
2317c478bd9Sstevel@tonic-gate #if D1
2327c478bd9Sstevel@tonic-gate fprintf(stderr, "now have %d items\n", g);
2337c478bd9Sstevel@tonic-gate #endif
2347c478bd9Sstevel@tonic-gate if (colevel > 0)
2357c478bd9Sstevel@tonic-gate for (; j < nf; j++)
236*11a8fa6cSceastha if ((iflong ? prevdrop.b[j] : prevdrop.a[j]) +
237*11a8fa6cSceastha colevel > nterm) {
2387c478bd9Sstevel@tonic-gate assert(g < lmaster);
2397c478bd9Sstevel@tonic-gate if (iflong)
2407c478bd9Sstevel@tonic-gate master.b[g] = prevdrop.b[j];
2417c478bd9Sstevel@tonic-gate else
2427c478bd9Sstevel@tonic-gate master.a[g] = prevdrop.a[j];
2437c478bd9Sstevel@tonic-gate coord[g++] = prevcoord[j];
2447c478bd9Sstevel@tonic-gate #if D3
2457c478bd9Sstevel@tonic-gate if (iflong)
246*11a8fa6cSceastha fprintf(stderr, "copied over "
247*11a8fa6cSceastha "%ld coord %d\n",
248*11a8fa6cSceastha master.b[g-1], coord[g-1]);
2497c478bd9Sstevel@tonic-gate else
250*11a8fa6cSceastha fprintf(stderr, "copied over "
251*11a8fa6cSceastha "%d coord %d\n",
252*11a8fa6cSceastha master.a[g-1], coord[g-1]);
2537c478bd9Sstevel@tonic-gate #endif
2547c478bd9Sstevel@tonic-gate }
2557c478bd9Sstevel@tonic-gate nf = g;
2567c478bd9Sstevel@tonic-gate }
257*11a8fa6cSceastha if (colevel > 0) {
2587c478bd9Sstevel@tonic-gate best = 0;
2597c478bd9Sstevel@tonic-gate for (j = 0; j < nf; j++)
2607c478bd9Sstevel@tonic-gate if (coord[j] > best) best = coord[j];
2617c478bd9Sstevel@tonic-gate #if D1
2627c478bd9Sstevel@tonic-gate fprintf(stderr, "colevel %d best %d\n", colevel, best);
2637c478bd9Sstevel@tonic-gate #endif
2647c478bd9Sstevel@tonic-gate reached = best;
2657c478bd9Sstevel@tonic-gate for (g = j = 0; j < nf; j++)
266*11a8fa6cSceastha if (coord[j] == best) {
2677c478bd9Sstevel@tonic-gate if (iflong)
2687c478bd9Sstevel@tonic-gate master.b[g++] = master.b[j];
2697c478bd9Sstevel@tonic-gate else
2707c478bd9Sstevel@tonic-gate master.a[g++] = master.a[j];
2717c478bd9Sstevel@tonic-gate }
2727c478bd9Sstevel@tonic-gate nf = g;
2737c478bd9Sstevel@tonic-gate #if D1
2747c478bd9Sstevel@tonic-gate fprintf(stderr, "yet got %d\n", nf);
2757c478bd9Sstevel@tonic-gate #endif
2767c478bd9Sstevel@tonic-gate }
2777c478bd9Sstevel@tonic-gate #ifdef D1
2787c478bd9Sstevel@tonic-gate fprintf(stderr, " returning with %d\n", nf);
2797c478bd9Sstevel@tonic-gate #endif
280*11a8fa6cSceastha if (colevel) {
2817c478bd9Sstevel@tonic-gate free(prevdrop, lmaster, iflong ? sizeof (long): sizeof (int));
2827c478bd9Sstevel@tonic-gate free(prevcoord, lmaster, sizeof (lmaster));
2837c478bd9Sstevel@tonic-gate }
2847c478bd9Sstevel@tonic-gate #if D3
2857c478bd9Sstevel@tonic-gate for (g = 0; g < nf; g++)
2867c478bd9Sstevel@tonic-gate if (iflong)
2877c478bd9Sstevel@tonic-gate fprintf(stderr, ":%ld\n", master.b[g]);
2887c478bd9Sstevel@tonic-gate else
2897c478bd9Sstevel@tonic-gate fprintf(stderr, ":%d\n", master.a[g]);
2907c478bd9Sstevel@tonic-gate #endif
2917c478bd9Sstevel@tonic-gate return (nf);
2927c478bd9Sstevel@tonic-gate }
2937c478bd9Sstevel@tonic-gate
294*11a8fa6cSceastha static long
getl(FILE * fb)295*11a8fa6cSceastha getl(FILE *fb)
2967c478bd9Sstevel@tonic-gate {
2977c478bd9Sstevel@tonic-gate return (getw(fb));
2987c478bd9Sstevel@tonic-gate }
2997c478bd9Sstevel@tonic-gate
300*11a8fa6cSceastha static int
hcomp(int n1,int n2)301*11a8fa6cSceastha hcomp(int n1, int n2)
3027c478bd9Sstevel@tonic-gate {
3037c478bd9Sstevel@tonic-gate return (hfreq[hh[n1]] <= hfreq[hh[n2]]);
3047c478bd9Sstevel@tonic-gate }
3057c478bd9Sstevel@tonic-gate
306*11a8fa6cSceastha static void
hexch(int n1,int n2)307*11a8fa6cSceastha hexch(int n1, int n2)
3087c478bd9Sstevel@tonic-gate {
3097c478bd9Sstevel@tonic-gate int t;
3107c478bd9Sstevel@tonic-gate t = hh[n1];
3117c478bd9Sstevel@tonic-gate hh[n1] = hh[n2];
3127c478bd9Sstevel@tonic-gate hh[n2] = t;
3137c478bd9Sstevel@tonic-gate }
314