1*11a8fa6cSceastha /*
2*11a8fa6cSceastha * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
3*11a8fa6cSceastha * Use is subject to license terms.
4*11a8fa6cSceastha */
5*11a8fa6cSceastha
67c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
77c478bd9Sstevel@tonic-gate /* All Rights Reserved */
87c478bd9Sstevel@tonic-gate
97c478bd9Sstevel@tonic-gate /*
107c478bd9Sstevel@tonic-gate * Copyright (c) 1980 Regents of the University of California.
117c478bd9Sstevel@tonic-gate * All rights reserved. The Berkeley software License Agreement
127c478bd9Sstevel@tonic-gate * specifies the terms and conditions for redistribution.
137c478bd9Sstevel@tonic-gate */
147c478bd9Sstevel@tonic-gate
157c478bd9Sstevel@tonic-gate #include "refer..c"
167c478bd9Sstevel@tonic-gate
177c478bd9Sstevel@tonic-gate static int *coord = 0;
187c478bd9Sstevel@tonic-gate int hh[50];
19*11a8fa6cSceastha extern int *hfreq, hfrflg;
207c478bd9Sstevel@tonic-gate extern int prfreqs;
217c478bd9Sstevel@tonic-gate union ptr {
227c478bd9Sstevel@tonic-gate unsigned *a;
237c478bd9Sstevel@tonic-gate long *b;
247c478bd9Sstevel@tonic-gate };
257c478bd9Sstevel@tonic-gate
26*11a8fa6cSceastha extern int hash();
27*11a8fa6cSceastha extern void shell();
28*11a8fa6cSceastha extern void *zalloc();
29*11a8fa6cSceastha
30*11a8fa6cSceastha static long getl(FILE *);
31*11a8fa6cSceastha static int hcomp(int, int);
32*11a8fa6cSceastha static void hexch(int, int);
33*11a8fa6cSceastha
34*11a8fa6cSceastha int
doquery(long * hpt,int nhash,FILE * fb,int nitem,char * qitem[],unsigned * mptr)35*11a8fa6cSceastha doquery(long *hpt, int nhash, FILE *fb, int nitem,
36*11a8fa6cSceastha char *qitem[], unsigned *mptr)
377c478bd9Sstevel@tonic-gate {
387c478bd9Sstevel@tonic-gate long k;
397c478bd9Sstevel@tonic-gate union ptr prevdrop, master;
407c478bd9Sstevel@tonic-gate int nf = 0, best = 0, nterm = 0, i, g, j;
417c478bd9Sstevel@tonic-gate int *prevcoord;
427c478bd9Sstevel@tonic-gate long lp;
437c478bd9Sstevel@tonic-gate extern int lmaster, colevel, reached;
447c478bd9Sstevel@tonic-gate extern int iflong;
457c478bd9Sstevel@tonic-gate
467c478bd9Sstevel@tonic-gate if (iflong) {
477c478bd9Sstevel@tonic-gate master.b = (long *)mptr;
48*11a8fa6cSceastha } else {
497c478bd9Sstevel@tonic-gate master.a = mptr;
507c478bd9Sstevel@tonic-gate }
517c478bd9Sstevel@tonic-gate
527c478bd9Sstevel@tonic-gate #if D1
537c478bd9Sstevel@tonic-gate fprintf(stderr, "entering doquery nitem %d\n", nitem);
54*11a8fa6cSceastha fprintf(stderr, "first few hashes are %ld %ld %ld %ld %ld\n",
55*11a8fa6cSceastha hpt[0], hpt[1], hpt[2], hpt[3], hpt[4]);
56*11a8fa6cSceastha fprintf(stderr, "and frequencies are %d %d %d %d %d\n",
57*11a8fa6cSceastha hfreq[0], hfreq[1], hfreq[2], hfreq[3], hfreq[4]);
587c478bd9Sstevel@tonic-gate #endif
597c478bd9Sstevel@tonic-gate assert(lmaster > 0);
607c478bd9Sstevel@tonic-gate if (coord == 0)
617c478bd9Sstevel@tonic-gate coord = (int *)zalloc(lmaster, sizeof (lmaster));
62*11a8fa6cSceastha if (colevel > 0) {
637c478bd9Sstevel@tonic-gate if (iflong)
647c478bd9Sstevel@tonic-gate prevdrop.b = (long *)zalloc(lmaster, sizeof (long));
657c478bd9Sstevel@tonic-gate else
667c478bd9Sstevel@tonic-gate prevdrop.a = (unsigned *)zalloc(lmaster, sizeof (int));
677c478bd9Sstevel@tonic-gate prevcoord = (int *)zalloc(lmaster, sizeof (lmaster));
68*11a8fa6cSceastha } else {
697c478bd9Sstevel@tonic-gate prevdrop.a = master.a;
707c478bd9Sstevel@tonic-gate prevcoord = coord;
717c478bd9Sstevel@tonic-gate }
727c478bd9Sstevel@tonic-gate #if D1
737c478bd9Sstevel@tonic-gate fprintf(stderr, "nitem %d\n", nitem);
747c478bd9Sstevel@tonic-gate #endif
75*11a8fa6cSceastha for (i = 0; i < nitem; i++) {
767c478bd9Sstevel@tonic-gate hh[i] = hash(qitem[i])%nhash;
777c478bd9Sstevel@tonic-gate #if D1
787c478bd9Sstevel@tonic-gate fprintf(stderr, "query wd X%sX has hash %d\n", qitem[i], hh[i]);
797c478bd9Sstevel@tonic-gate #endif
807c478bd9Sstevel@tonic-gate }
817c478bd9Sstevel@tonic-gate #if D1
827c478bd9Sstevel@tonic-gate fprintf(stderr, "past that loop nhash %d hpt is %lo\n", nhash, hpt);
837c478bd9Sstevel@tonic-gate #endif
847c478bd9Sstevel@tonic-gate if (prfreqs)
857c478bd9Sstevel@tonic-gate for (i = 0; i < nitem; i++)
86*11a8fa6cSceastha fprintf(stderr, "item %s hash %d hfreq %d\n",
87*11a8fa6cSceastha qitem[i], hh[i], hfreq[hh[i]]);
887c478bd9Sstevel@tonic-gate /* if possible, sort query into decreasing frequency of hashes */
897c478bd9Sstevel@tonic-gate if (hfrflg)
907c478bd9Sstevel@tonic-gate shell(nitem, hcomp, hexch);
917c478bd9Sstevel@tonic-gate #if D1
927c478bd9Sstevel@tonic-gate for (i = 0; i < nitem; i++)
937c478bd9Sstevel@tonic-gate fprintf(stderr, "item hash %d frq %d\n", hh[i], hfreq[hh[i]]);
947c478bd9Sstevel@tonic-gate #endif
957c478bd9Sstevel@tonic-gate lp = hpt [hh[0]];
967c478bd9Sstevel@tonic-gate #if D1
977c478bd9Sstevel@tonic-gate fprintf(stderr, "first item hash %d lp %ld 0%lo\n", hh[0], lp, lp);
987c478bd9Sstevel@tonic-gate #endif
997c478bd9Sstevel@tonic-gate assert(fb != NULL);
1007c478bd9Sstevel@tonic-gate assert(fseek(fb, lp, 0) != -1);
101*11a8fa6cSceastha for (i = 0; i < lmaster; i++) {
1027c478bd9Sstevel@tonic-gate if (iflong)
1037c478bd9Sstevel@tonic-gate master.b[i] = getl(fb);
1047c478bd9Sstevel@tonic-gate else
1057c478bd9Sstevel@tonic-gate master.a[i] = getw(fb);
1067c478bd9Sstevel@tonic-gate coord[i] = 1;
1077c478bd9Sstevel@tonic-gate #if D2
1087c478bd9Sstevel@tonic-gate if (iflong)
1097c478bd9Sstevel@tonic-gate fprintf(stderr, "master has %ld\n", (master.b[i]));
1107c478bd9Sstevel@tonic-gate else
1117c478bd9Sstevel@tonic-gate fprintf(stderr, "master has %d\n", (master.a[i]));
1127c478bd9Sstevel@tonic-gate #endif
1137c478bd9Sstevel@tonic-gate assert(i < lmaster);
114*11a8fa6cSceastha if (iflong) {
1157c478bd9Sstevel@tonic-gate if (master.b[i] == -1L) break;
116*11a8fa6cSceastha } else {
1177c478bd9Sstevel@tonic-gate if (master.a[i] == -1) break;
1187c478bd9Sstevel@tonic-gate }
1197c478bd9Sstevel@tonic-gate }
1207c478bd9Sstevel@tonic-gate nf = i;
121*11a8fa6cSceastha for (nterm = 1; nterm < nitem; nterm++) {
1227c478bd9Sstevel@tonic-gate #ifdef D1
1237c478bd9Sstevel@tonic-gate fprintf(stderr, "item %d, hash %d\n", nterm, hh[nterm]);
1247c478bd9Sstevel@tonic-gate #endif
125*11a8fa6cSceastha if (colevel > 0) {
126*11a8fa6cSceastha for (j = 0; j < nf; j++) {
1277c478bd9Sstevel@tonic-gate if (iflong)
1287c478bd9Sstevel@tonic-gate prevdrop.b[j] = master.b[j];
1297c478bd9Sstevel@tonic-gate else
1307c478bd9Sstevel@tonic-gate prevdrop.a[j] = master.a[j];
1317c478bd9Sstevel@tonic-gate prevcoord[j] = coord[j];
1327c478bd9Sstevel@tonic-gate }
1337c478bd9Sstevel@tonic-gate }
1347c478bd9Sstevel@tonic-gate lp = hpt[hh[nterm]];
1357c478bd9Sstevel@tonic-gate assert(fseek(fb, lp, 0) != -1);
1367c478bd9Sstevel@tonic-gate #if D1
137*11a8fa6cSceastha fprintf(stderr, "item %d hash %d seek to %ld\n",
138*11a8fa6cSceastha nterm, hh[nterm], lp);
1397c478bd9Sstevel@tonic-gate #endif
1407c478bd9Sstevel@tonic-gate g = j = 0;
141*11a8fa6cSceastha while (1) {
1427c478bd9Sstevel@tonic-gate if (iflong)
1437c478bd9Sstevel@tonic-gate k = getl(fb);
1447c478bd9Sstevel@tonic-gate else
1457c478bd9Sstevel@tonic-gate k = getw(fb);
1467c478bd9Sstevel@tonic-gate if (k == -1) break;
1477c478bd9Sstevel@tonic-gate #if D2
1487c478bd9Sstevel@tonic-gate fprintf(stderr, "next term finds %ld\n", k);
1497c478bd9Sstevel@tonic-gate #endif
1507c478bd9Sstevel@tonic-gate #if D3
1517c478bd9Sstevel@tonic-gate if (iflong)
152*11a8fa6cSceastha fprintf(stderr,
153*11a8fa6cSceastha "bfwh j %d nf %d master %ld k %ld\n",
154*11a8fa6cSceastha j, nf, prevdrop.b[j], (long)(k));
1557c478bd9Sstevel@tonic-gate else
156*11a8fa6cSceastha fprintf(stderr,
157*11a8fa6cSceastha "bfwh j %d nf %d master %ld k %ld\n",
158*11a8fa6cSceastha j, nf, prevdrop.a[j], (long)(k));
1597c478bd9Sstevel@tonic-gate #endif
160*11a8fa6cSceastha while (j < nf &&
161*11a8fa6cSceastha (iflong ? prevdrop.b[j] : prevdrop.a[j]) < k) {
1627c478bd9Sstevel@tonic-gate #if D3
1637c478bd9Sstevel@tonic-gate if (iflong)
164*11a8fa6cSceastha fprintf(stderr, "j %d nf %d prevdrop "
165*11a8fa6cSceastha "%ld prevcoord %d colevel %d nterm "
166*11a8fa6cSceastha "%d k %ld\n", j, nf, prevdrop.b[j],
167*11a8fa6cSceastha prevcoord[j], colevel, nterm,
168*11a8fa6cSceastha (long)(k));
1697c478bd9Sstevel@tonic-gate else
170*11a8fa6cSceastha fprintf(stderr, "j %d nf %d prevdrop "
171*11a8fa6cSceastha "%ld prevcoord %d colevel %d nterm "
172*11a8fa6cSceastha "%d k %ld\n", j, nf, prevdrop.a[j],
173*11a8fa6cSceastha prevcoord[j], colevel, nterm,
174*11a8fa6cSceastha (long)(k));
1757c478bd9Sstevel@tonic-gate #endif
1767c478bd9Sstevel@tonic-gate if (prevcoord[j] + colevel <= nterm)
1777c478bd9Sstevel@tonic-gate j++;
178*11a8fa6cSceastha else {
1797c478bd9Sstevel@tonic-gate assert(g < lmaster);
1807c478bd9Sstevel@tonic-gate if (iflong)
1817c478bd9Sstevel@tonic-gate master.b[g] = prevdrop.b[j];
1827c478bd9Sstevel@tonic-gate else
1837c478bd9Sstevel@tonic-gate master.a[g] = prevdrop.a[j];
1847c478bd9Sstevel@tonic-gate coord[g++] = prevcoord[j++];
1857c478bd9Sstevel@tonic-gate #if D1
1867c478bd9Sstevel@tonic-gate if (iflong)
187*11a8fa6cSceastha fprintf(stderr, " not skip g "
188*11a8fa6cSceastha "%d doc %d coord %d note "
189*11a8fa6cSceastha "%d\n", g, master.b[g-1],
190*11a8fa6cSceastha coord[g-1], master.b[j-1]);
1917c478bd9Sstevel@tonic-gate else
192*11a8fa6cSceastha fprintf(stderr, " not skip g "
193*11a8fa6cSceastha "%d doc %ld coord %d nterm "
194*11a8fa6cSceastha "%d\n", g, master.a[g-1],
195*11a8fa6cSceastha coord[g-1], nterm);
1967c478bd9Sstevel@tonic-gate #endif
1977c478bd9Sstevel@tonic-gate continue;
1987c478bd9Sstevel@tonic-gate }
1997c478bd9Sstevel@tonic-gate }
2007c478bd9Sstevel@tonic-gate if (colevel == 0 && j >= nf) break;
201*11a8fa6cSceastha if (j < nf &&
202*11a8fa6cSceastha (iflong ? prevdrop.b[j] : prevdrop.a[j]) == k) {
2037c478bd9Sstevel@tonic-gate if (iflong)
2047c478bd9Sstevel@tonic-gate master.b[g] = k;
2057c478bd9Sstevel@tonic-gate else
2067c478bd9Sstevel@tonic-gate master.a[g] = k;
2077c478bd9Sstevel@tonic-gate coord[g++] = prevcoord[j++]+1;
2087c478bd9Sstevel@tonic-gate #if D1
2097c478bd9Sstevel@tonic-gate if (iflong)
210*11a8fa6cSceastha fprintf(stderr, " at g %d item %ld "
211*11a8fa6cSceastha "coord %d note %ld\n", g,
212*11a8fa6cSceastha master.b[g-1], coord[g-1],
213*11a8fa6cSceastha master.b[j-1]);
2147c478bd9Sstevel@tonic-gate else
215*11a8fa6cSceastha fprintf(stderr, " at g %d item %d "
216*11a8fa6cSceastha "coord %d note %d\n", g,
217*11a8fa6cSceastha master.a[g-1], coord[g-1],
218*11a8fa6cSceastha master.a[j-1]);
2197c478bd9Sstevel@tonic-gate #endif
220*11a8fa6cSceastha } else
221*11a8fa6cSceastha if (colevel >= nterm) {
2227c478bd9Sstevel@tonic-gate if (iflong)
2237c478bd9Sstevel@tonic-gate master.b[g] = k;
2247c478bd9Sstevel@tonic-gate else
2257c478bd9Sstevel@tonic-gate master.a[g] = k;
2267c478bd9Sstevel@tonic-gate coord[g++] = 1;
2277c478bd9Sstevel@tonic-gate }
2287c478bd9Sstevel@tonic-gate }
2297c478bd9Sstevel@tonic-gate #if D1
2307c478bd9Sstevel@tonic-gate fprintf(stderr, "now have %d items\n", g);
2317c478bd9Sstevel@tonic-gate #endif
2327c478bd9Sstevel@tonic-gate if (colevel > 0)
2337c478bd9Sstevel@tonic-gate for (; j < nf; j++)
234*11a8fa6cSceastha if ((iflong ? prevdrop.b[j] : prevdrop.a[j]) +
235*11a8fa6cSceastha colevel > nterm) {
2367c478bd9Sstevel@tonic-gate assert(g < lmaster);
2377c478bd9Sstevel@tonic-gate if (iflong)
2387c478bd9Sstevel@tonic-gate master.b[g] = prevdrop.b[j];
2397c478bd9Sstevel@tonic-gate else
2407c478bd9Sstevel@tonic-gate master.a[g] = prevdrop.a[j];
2417c478bd9Sstevel@tonic-gate coord[g++] = prevcoord[j];
2427c478bd9Sstevel@tonic-gate #if D3
2437c478bd9Sstevel@tonic-gate if (iflong)
244*11a8fa6cSceastha fprintf(stderr, "copied over "
245*11a8fa6cSceastha "%ld coord %d\n",
246*11a8fa6cSceastha master.b[g-1], coord[g-1]);
2477c478bd9Sstevel@tonic-gate else
248*11a8fa6cSceastha fprintf(stderr, "copied over "
249*11a8fa6cSceastha "%d coord %d\n",
250*11a8fa6cSceastha master.a[g-1], coord[g-1]);
2517c478bd9Sstevel@tonic-gate #endif
2527c478bd9Sstevel@tonic-gate }
2537c478bd9Sstevel@tonic-gate nf = g;
2547c478bd9Sstevel@tonic-gate }
255*11a8fa6cSceastha if (colevel > 0) {
2567c478bd9Sstevel@tonic-gate best = 0;
2577c478bd9Sstevel@tonic-gate for (j = 0; j < nf; j++)
2587c478bd9Sstevel@tonic-gate if (coord[j] > best) best = coord[j];
2597c478bd9Sstevel@tonic-gate #if D1
2607c478bd9Sstevel@tonic-gate fprintf(stderr, "colevel %d best %d\n", colevel, best);
2617c478bd9Sstevel@tonic-gate #endif
2627c478bd9Sstevel@tonic-gate reached = best;
2637c478bd9Sstevel@tonic-gate for (g = j = 0; j < nf; j++)
264*11a8fa6cSceastha if (coord[j] == best) {
2657c478bd9Sstevel@tonic-gate if (iflong)
2667c478bd9Sstevel@tonic-gate master.b[g++] = master.b[j];
2677c478bd9Sstevel@tonic-gate else
2687c478bd9Sstevel@tonic-gate master.a[g++] = master.a[j];
2697c478bd9Sstevel@tonic-gate }
2707c478bd9Sstevel@tonic-gate nf = g;
2717c478bd9Sstevel@tonic-gate #if D1
2727c478bd9Sstevel@tonic-gate fprintf(stderr, "yet got %d\n", nf);
2737c478bd9Sstevel@tonic-gate #endif
2747c478bd9Sstevel@tonic-gate }
2757c478bd9Sstevel@tonic-gate #ifdef D1
2767c478bd9Sstevel@tonic-gate fprintf(stderr, " returning with %d\n", nf);
2777c478bd9Sstevel@tonic-gate #endif
278*11a8fa6cSceastha if (colevel) {
2797c478bd9Sstevel@tonic-gate free(prevdrop, lmaster, iflong ? sizeof (long): sizeof (int));
2807c478bd9Sstevel@tonic-gate free(prevcoord, lmaster, sizeof (lmaster));
2817c478bd9Sstevel@tonic-gate }
2827c478bd9Sstevel@tonic-gate #if D3
2837c478bd9Sstevel@tonic-gate for (g = 0; g < nf; g++)
2847c478bd9Sstevel@tonic-gate if (iflong)
2857c478bd9Sstevel@tonic-gate fprintf(stderr, ":%ld\n", master.b[g]);
2867c478bd9Sstevel@tonic-gate else
2877c478bd9Sstevel@tonic-gate fprintf(stderr, ":%d\n", master.a[g]);
2887c478bd9Sstevel@tonic-gate #endif
2897c478bd9Sstevel@tonic-gate return (nf);
2907c478bd9Sstevel@tonic-gate }
2917c478bd9Sstevel@tonic-gate
292*11a8fa6cSceastha static long
getl(FILE * fb)293*11a8fa6cSceastha getl(FILE *fb)
2947c478bd9Sstevel@tonic-gate {
2957c478bd9Sstevel@tonic-gate return (getw(fb));
2967c478bd9Sstevel@tonic-gate }
2977c478bd9Sstevel@tonic-gate
298*11a8fa6cSceastha static int
hcomp(int n1,int n2)299*11a8fa6cSceastha hcomp(int n1, int n2)
3007c478bd9Sstevel@tonic-gate {
3017c478bd9Sstevel@tonic-gate return (hfreq[hh[n1]] <= hfreq[hh[n2]]);
3027c478bd9Sstevel@tonic-gate }
3037c478bd9Sstevel@tonic-gate
304*11a8fa6cSceastha static void
hexch(int n1,int n2)305*11a8fa6cSceastha hexch(int n1, int n2)
3067c478bd9Sstevel@tonic-gate {
3077c478bd9Sstevel@tonic-gate int t;
3087c478bd9Sstevel@tonic-gate t = hh[n1];
3097c478bd9Sstevel@tonic-gate hh[n1] = hh[n2];
3107c478bd9Sstevel@tonic-gate hh[n2] = t;
3117c478bd9Sstevel@tonic-gate }
312