1*11a8fa6cSceastha /* 2*11a8fa6cSceastha * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 3*11a8fa6cSceastha * Use is subject to license terms. 4*11a8fa6cSceastha */ 5*11a8fa6cSceastha 67c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 77c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 87c478bd9Sstevel@tonic-gate 97c478bd9Sstevel@tonic-gate /* 107c478bd9Sstevel@tonic-gate * Copyright (c) 1980 Regents of the University of California. 117c478bd9Sstevel@tonic-gate * All rights reserved. The Berkeley software License Agreement 127c478bd9Sstevel@tonic-gate * specifies the terms and conditions for redistribution. 137c478bd9Sstevel@tonic-gate */ 147c478bd9Sstevel@tonic-gate 157c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 167c478bd9Sstevel@tonic-gate 177c478bd9Sstevel@tonic-gate #include "refer..c" 187c478bd9Sstevel@tonic-gate 197c478bd9Sstevel@tonic-gate static int *coord = 0; 207c478bd9Sstevel@tonic-gate int hh[50]; 21*11a8fa6cSceastha extern int *hfreq, hfrflg; 227c478bd9Sstevel@tonic-gate extern int prfreqs; 237c478bd9Sstevel@tonic-gate union ptr { 247c478bd9Sstevel@tonic-gate unsigned *a; 257c478bd9Sstevel@tonic-gate long *b; 267c478bd9Sstevel@tonic-gate }; 277c478bd9Sstevel@tonic-gate 28*11a8fa6cSceastha extern int hash(); 29*11a8fa6cSceastha extern void shell(); 30*11a8fa6cSceastha extern void *zalloc(); 31*11a8fa6cSceastha 32*11a8fa6cSceastha static long getl(FILE *); 33*11a8fa6cSceastha static int hcomp(int, int); 34*11a8fa6cSceastha static void hexch(int, int); 35*11a8fa6cSceastha 36*11a8fa6cSceastha int 37*11a8fa6cSceastha doquery(long *hpt, int nhash, FILE *fb, int nitem, 38*11a8fa6cSceastha char *qitem[], unsigned *mptr) 397c478bd9Sstevel@tonic-gate { 407c478bd9Sstevel@tonic-gate long k; 417c478bd9Sstevel@tonic-gate union ptr prevdrop, master; 427c478bd9Sstevel@tonic-gate int nf = 0, best = 0, nterm = 0, i, g, j; 437c478bd9Sstevel@tonic-gate int *prevcoord; 447c478bd9Sstevel@tonic-gate long lp; 457c478bd9Sstevel@tonic-gate extern int lmaster, colevel, reached; 467c478bd9Sstevel@tonic-gate extern int iflong; 477c478bd9Sstevel@tonic-gate 487c478bd9Sstevel@tonic-gate if (iflong) { 497c478bd9Sstevel@tonic-gate master.b = (long *)mptr; 50*11a8fa6cSceastha } else { 517c478bd9Sstevel@tonic-gate master.a = mptr; 527c478bd9Sstevel@tonic-gate } 537c478bd9Sstevel@tonic-gate 547c478bd9Sstevel@tonic-gate #if D1 557c478bd9Sstevel@tonic-gate fprintf(stderr, "entering doquery nitem %d\n", nitem); 56*11a8fa6cSceastha fprintf(stderr, "first few hashes are %ld %ld %ld %ld %ld\n", 57*11a8fa6cSceastha hpt[0], hpt[1], hpt[2], hpt[3], hpt[4]); 58*11a8fa6cSceastha fprintf(stderr, "and frequencies are %d %d %d %d %d\n", 59*11a8fa6cSceastha hfreq[0], hfreq[1], hfreq[2], hfreq[3], hfreq[4]); 607c478bd9Sstevel@tonic-gate #endif 617c478bd9Sstevel@tonic-gate assert(lmaster > 0); 627c478bd9Sstevel@tonic-gate if (coord == 0) 637c478bd9Sstevel@tonic-gate coord = (int *)zalloc(lmaster, sizeof (lmaster)); 64*11a8fa6cSceastha if (colevel > 0) { 657c478bd9Sstevel@tonic-gate if (iflong) 667c478bd9Sstevel@tonic-gate prevdrop.b = (long *)zalloc(lmaster, sizeof (long)); 677c478bd9Sstevel@tonic-gate else 687c478bd9Sstevel@tonic-gate prevdrop.a = (unsigned *)zalloc(lmaster, sizeof (int)); 697c478bd9Sstevel@tonic-gate prevcoord = (int *)zalloc(lmaster, sizeof (lmaster)); 70*11a8fa6cSceastha } else { 717c478bd9Sstevel@tonic-gate prevdrop.a = master.a; 727c478bd9Sstevel@tonic-gate prevcoord = coord; 737c478bd9Sstevel@tonic-gate } 747c478bd9Sstevel@tonic-gate #if D1 757c478bd9Sstevel@tonic-gate fprintf(stderr, "nitem %d\n", nitem); 767c478bd9Sstevel@tonic-gate #endif 77*11a8fa6cSceastha for (i = 0; i < nitem; i++) { 787c478bd9Sstevel@tonic-gate hh[i] = hash(qitem[i])%nhash; 797c478bd9Sstevel@tonic-gate #if D1 807c478bd9Sstevel@tonic-gate fprintf(stderr, "query wd X%sX has hash %d\n", qitem[i], hh[i]); 817c478bd9Sstevel@tonic-gate #endif 827c478bd9Sstevel@tonic-gate } 837c478bd9Sstevel@tonic-gate #if D1 847c478bd9Sstevel@tonic-gate fprintf(stderr, "past that loop nhash %d hpt is %lo\n", nhash, hpt); 857c478bd9Sstevel@tonic-gate #endif 867c478bd9Sstevel@tonic-gate if (prfreqs) 877c478bd9Sstevel@tonic-gate for (i = 0; i < nitem; i++) 88*11a8fa6cSceastha fprintf(stderr, "item %s hash %d hfreq %d\n", 89*11a8fa6cSceastha qitem[i], hh[i], hfreq[hh[i]]); 907c478bd9Sstevel@tonic-gate /* if possible, sort query into decreasing frequency of hashes */ 917c478bd9Sstevel@tonic-gate if (hfrflg) 927c478bd9Sstevel@tonic-gate shell(nitem, hcomp, hexch); 937c478bd9Sstevel@tonic-gate #if D1 947c478bd9Sstevel@tonic-gate for (i = 0; i < nitem; i++) 957c478bd9Sstevel@tonic-gate fprintf(stderr, "item hash %d frq %d\n", hh[i], hfreq[hh[i]]); 967c478bd9Sstevel@tonic-gate #endif 977c478bd9Sstevel@tonic-gate lp = hpt [hh[0]]; 987c478bd9Sstevel@tonic-gate #if D1 997c478bd9Sstevel@tonic-gate fprintf(stderr, "first item hash %d lp %ld 0%lo\n", hh[0], lp, lp); 1007c478bd9Sstevel@tonic-gate #endif 1017c478bd9Sstevel@tonic-gate assert(fb != NULL); 1027c478bd9Sstevel@tonic-gate assert(fseek(fb, lp, 0) != -1); 103*11a8fa6cSceastha for (i = 0; i < lmaster; i++) { 1047c478bd9Sstevel@tonic-gate if (iflong) 1057c478bd9Sstevel@tonic-gate master.b[i] = getl(fb); 1067c478bd9Sstevel@tonic-gate else 1077c478bd9Sstevel@tonic-gate master.a[i] = getw(fb); 1087c478bd9Sstevel@tonic-gate coord[i] = 1; 1097c478bd9Sstevel@tonic-gate #if D2 1107c478bd9Sstevel@tonic-gate if (iflong) 1117c478bd9Sstevel@tonic-gate fprintf(stderr, "master has %ld\n", (master.b[i])); 1127c478bd9Sstevel@tonic-gate else 1137c478bd9Sstevel@tonic-gate fprintf(stderr, "master has %d\n", (master.a[i])); 1147c478bd9Sstevel@tonic-gate #endif 1157c478bd9Sstevel@tonic-gate assert(i < lmaster); 116*11a8fa6cSceastha if (iflong) { 1177c478bd9Sstevel@tonic-gate if (master.b[i] == -1L) break; 118*11a8fa6cSceastha } else { 1197c478bd9Sstevel@tonic-gate if (master.a[i] == -1) break; 1207c478bd9Sstevel@tonic-gate } 1217c478bd9Sstevel@tonic-gate } 1227c478bd9Sstevel@tonic-gate nf = i; 123*11a8fa6cSceastha for (nterm = 1; nterm < nitem; nterm++) { 1247c478bd9Sstevel@tonic-gate #ifdef D1 1257c478bd9Sstevel@tonic-gate fprintf(stderr, "item %d, hash %d\n", nterm, hh[nterm]); 1267c478bd9Sstevel@tonic-gate #endif 127*11a8fa6cSceastha if (colevel > 0) { 128*11a8fa6cSceastha for (j = 0; j < nf; j++) { 1297c478bd9Sstevel@tonic-gate if (iflong) 1307c478bd9Sstevel@tonic-gate prevdrop.b[j] = master.b[j]; 1317c478bd9Sstevel@tonic-gate else 1327c478bd9Sstevel@tonic-gate prevdrop.a[j] = master.a[j]; 1337c478bd9Sstevel@tonic-gate prevcoord[j] = coord[j]; 1347c478bd9Sstevel@tonic-gate } 1357c478bd9Sstevel@tonic-gate } 1367c478bd9Sstevel@tonic-gate lp = hpt[hh[nterm]]; 1377c478bd9Sstevel@tonic-gate assert(fseek(fb, lp, 0) != -1); 1387c478bd9Sstevel@tonic-gate #if D1 139*11a8fa6cSceastha fprintf(stderr, "item %d hash %d seek to %ld\n", 140*11a8fa6cSceastha nterm, hh[nterm], lp); 1417c478bd9Sstevel@tonic-gate #endif 1427c478bd9Sstevel@tonic-gate g = j = 0; 143*11a8fa6cSceastha while (1) { 1447c478bd9Sstevel@tonic-gate if (iflong) 1457c478bd9Sstevel@tonic-gate k = getl(fb); 1467c478bd9Sstevel@tonic-gate else 1477c478bd9Sstevel@tonic-gate k = getw(fb); 1487c478bd9Sstevel@tonic-gate if (k == -1) break; 1497c478bd9Sstevel@tonic-gate #if D2 1507c478bd9Sstevel@tonic-gate fprintf(stderr, "next term finds %ld\n", k); 1517c478bd9Sstevel@tonic-gate #endif 1527c478bd9Sstevel@tonic-gate #if D3 1537c478bd9Sstevel@tonic-gate if (iflong) 154*11a8fa6cSceastha fprintf(stderr, 155*11a8fa6cSceastha "bfwh j %d nf %d master %ld k %ld\n", 156*11a8fa6cSceastha j, nf, prevdrop.b[j], (long)(k)); 1577c478bd9Sstevel@tonic-gate else 158*11a8fa6cSceastha fprintf(stderr, 159*11a8fa6cSceastha "bfwh j %d nf %d master %ld k %ld\n", 160*11a8fa6cSceastha j, nf, prevdrop.a[j], (long)(k)); 1617c478bd9Sstevel@tonic-gate #endif 162*11a8fa6cSceastha while (j < nf && 163*11a8fa6cSceastha (iflong ? prevdrop.b[j] : prevdrop.a[j]) < k) { 1647c478bd9Sstevel@tonic-gate #if D3 1657c478bd9Sstevel@tonic-gate if (iflong) 166*11a8fa6cSceastha fprintf(stderr, "j %d nf %d prevdrop " 167*11a8fa6cSceastha "%ld prevcoord %d colevel %d nterm " 168*11a8fa6cSceastha "%d k %ld\n", j, nf, prevdrop.b[j], 169*11a8fa6cSceastha prevcoord[j], colevel, nterm, 170*11a8fa6cSceastha (long)(k)); 1717c478bd9Sstevel@tonic-gate else 172*11a8fa6cSceastha fprintf(stderr, "j %d nf %d prevdrop " 173*11a8fa6cSceastha "%ld prevcoord %d colevel %d nterm " 174*11a8fa6cSceastha "%d k %ld\n", j, nf, prevdrop.a[j], 175*11a8fa6cSceastha prevcoord[j], colevel, nterm, 176*11a8fa6cSceastha (long)(k)); 1777c478bd9Sstevel@tonic-gate #endif 1787c478bd9Sstevel@tonic-gate if (prevcoord[j] + colevel <= nterm) 1797c478bd9Sstevel@tonic-gate j++; 180*11a8fa6cSceastha else { 1817c478bd9Sstevel@tonic-gate assert(g < lmaster); 1827c478bd9Sstevel@tonic-gate if (iflong) 1837c478bd9Sstevel@tonic-gate master.b[g] = prevdrop.b[j]; 1847c478bd9Sstevel@tonic-gate else 1857c478bd9Sstevel@tonic-gate master.a[g] = prevdrop.a[j]; 1867c478bd9Sstevel@tonic-gate coord[g++] = prevcoord[j++]; 1877c478bd9Sstevel@tonic-gate #if D1 1887c478bd9Sstevel@tonic-gate if (iflong) 189*11a8fa6cSceastha fprintf(stderr, " not skip g " 190*11a8fa6cSceastha "%d doc %d coord %d note " 191*11a8fa6cSceastha "%d\n", g, master.b[g-1], 192*11a8fa6cSceastha coord[g-1], master.b[j-1]); 1937c478bd9Sstevel@tonic-gate else 194*11a8fa6cSceastha fprintf(stderr, " not skip g " 195*11a8fa6cSceastha "%d doc %ld coord %d nterm " 196*11a8fa6cSceastha "%d\n", g, master.a[g-1], 197*11a8fa6cSceastha coord[g-1], nterm); 1987c478bd9Sstevel@tonic-gate #endif 1997c478bd9Sstevel@tonic-gate continue; 2007c478bd9Sstevel@tonic-gate } 2017c478bd9Sstevel@tonic-gate } 2027c478bd9Sstevel@tonic-gate if (colevel == 0 && j >= nf) break; 203*11a8fa6cSceastha if (j < nf && 204*11a8fa6cSceastha (iflong ? prevdrop.b[j] : prevdrop.a[j]) == k) { 2057c478bd9Sstevel@tonic-gate if (iflong) 2067c478bd9Sstevel@tonic-gate master.b[g] = k; 2077c478bd9Sstevel@tonic-gate else 2087c478bd9Sstevel@tonic-gate master.a[g] = k; 2097c478bd9Sstevel@tonic-gate coord[g++] = prevcoord[j++]+1; 2107c478bd9Sstevel@tonic-gate #if D1 2117c478bd9Sstevel@tonic-gate if (iflong) 212*11a8fa6cSceastha fprintf(stderr, " at g %d item %ld " 213*11a8fa6cSceastha "coord %d note %ld\n", g, 214*11a8fa6cSceastha master.b[g-1], coord[g-1], 215*11a8fa6cSceastha master.b[j-1]); 2167c478bd9Sstevel@tonic-gate else 217*11a8fa6cSceastha fprintf(stderr, " at g %d item %d " 218*11a8fa6cSceastha "coord %d note %d\n", g, 219*11a8fa6cSceastha master.a[g-1], coord[g-1], 220*11a8fa6cSceastha master.a[j-1]); 2217c478bd9Sstevel@tonic-gate #endif 222*11a8fa6cSceastha } else 223*11a8fa6cSceastha if (colevel >= nterm) { 2247c478bd9Sstevel@tonic-gate if (iflong) 2257c478bd9Sstevel@tonic-gate master.b[g] = k; 2267c478bd9Sstevel@tonic-gate else 2277c478bd9Sstevel@tonic-gate master.a[g] = k; 2287c478bd9Sstevel@tonic-gate coord[g++] = 1; 2297c478bd9Sstevel@tonic-gate } 2307c478bd9Sstevel@tonic-gate } 2317c478bd9Sstevel@tonic-gate #if D1 2327c478bd9Sstevel@tonic-gate fprintf(stderr, "now have %d items\n", g); 2337c478bd9Sstevel@tonic-gate #endif 2347c478bd9Sstevel@tonic-gate if (colevel > 0) 2357c478bd9Sstevel@tonic-gate for (; j < nf; j++) 236*11a8fa6cSceastha if ((iflong ? prevdrop.b[j] : prevdrop.a[j]) + 237*11a8fa6cSceastha colevel > nterm) { 2387c478bd9Sstevel@tonic-gate assert(g < lmaster); 2397c478bd9Sstevel@tonic-gate if (iflong) 2407c478bd9Sstevel@tonic-gate master.b[g] = prevdrop.b[j]; 2417c478bd9Sstevel@tonic-gate else 2427c478bd9Sstevel@tonic-gate master.a[g] = prevdrop.a[j]; 2437c478bd9Sstevel@tonic-gate coord[g++] = prevcoord[j]; 2447c478bd9Sstevel@tonic-gate #if D3 2457c478bd9Sstevel@tonic-gate if (iflong) 246*11a8fa6cSceastha fprintf(stderr, "copied over " 247*11a8fa6cSceastha "%ld coord %d\n", 248*11a8fa6cSceastha master.b[g-1], coord[g-1]); 2497c478bd9Sstevel@tonic-gate else 250*11a8fa6cSceastha fprintf(stderr, "copied over " 251*11a8fa6cSceastha "%d coord %d\n", 252*11a8fa6cSceastha master.a[g-1], coord[g-1]); 2537c478bd9Sstevel@tonic-gate #endif 2547c478bd9Sstevel@tonic-gate } 2557c478bd9Sstevel@tonic-gate nf = g; 2567c478bd9Sstevel@tonic-gate } 257*11a8fa6cSceastha if (colevel > 0) { 2587c478bd9Sstevel@tonic-gate best = 0; 2597c478bd9Sstevel@tonic-gate for (j = 0; j < nf; j++) 2607c478bd9Sstevel@tonic-gate if (coord[j] > best) best = coord[j]; 2617c478bd9Sstevel@tonic-gate #if D1 2627c478bd9Sstevel@tonic-gate fprintf(stderr, "colevel %d best %d\n", colevel, best); 2637c478bd9Sstevel@tonic-gate #endif 2647c478bd9Sstevel@tonic-gate reached = best; 2657c478bd9Sstevel@tonic-gate for (g = j = 0; j < nf; j++) 266*11a8fa6cSceastha if (coord[j] == best) { 2677c478bd9Sstevel@tonic-gate if (iflong) 2687c478bd9Sstevel@tonic-gate master.b[g++] = master.b[j]; 2697c478bd9Sstevel@tonic-gate else 2707c478bd9Sstevel@tonic-gate master.a[g++] = master.a[j]; 2717c478bd9Sstevel@tonic-gate } 2727c478bd9Sstevel@tonic-gate nf = g; 2737c478bd9Sstevel@tonic-gate #if D1 2747c478bd9Sstevel@tonic-gate fprintf(stderr, "yet got %d\n", nf); 2757c478bd9Sstevel@tonic-gate #endif 2767c478bd9Sstevel@tonic-gate } 2777c478bd9Sstevel@tonic-gate #ifdef D1 2787c478bd9Sstevel@tonic-gate fprintf(stderr, " returning with %d\n", nf); 2797c478bd9Sstevel@tonic-gate #endif 280*11a8fa6cSceastha if (colevel) { 2817c478bd9Sstevel@tonic-gate free(prevdrop, lmaster, iflong ? sizeof (long): sizeof (int)); 2827c478bd9Sstevel@tonic-gate free(prevcoord, lmaster, sizeof (lmaster)); 2837c478bd9Sstevel@tonic-gate } 2847c478bd9Sstevel@tonic-gate #if D3 2857c478bd9Sstevel@tonic-gate for (g = 0; g < nf; g++) 2867c478bd9Sstevel@tonic-gate if (iflong) 2877c478bd9Sstevel@tonic-gate fprintf(stderr, ":%ld\n", master.b[g]); 2887c478bd9Sstevel@tonic-gate else 2897c478bd9Sstevel@tonic-gate fprintf(stderr, ":%d\n", master.a[g]); 2907c478bd9Sstevel@tonic-gate #endif 2917c478bd9Sstevel@tonic-gate return (nf); 2927c478bd9Sstevel@tonic-gate } 2937c478bd9Sstevel@tonic-gate 294*11a8fa6cSceastha static long 295*11a8fa6cSceastha getl(FILE *fb) 2967c478bd9Sstevel@tonic-gate { 2977c478bd9Sstevel@tonic-gate return (getw(fb)); 2987c478bd9Sstevel@tonic-gate } 2997c478bd9Sstevel@tonic-gate 300*11a8fa6cSceastha static int 301*11a8fa6cSceastha hcomp(int n1, int n2) 3027c478bd9Sstevel@tonic-gate { 3037c478bd9Sstevel@tonic-gate return (hfreq[hh[n1]] <= hfreq[hh[n2]]); 3047c478bd9Sstevel@tonic-gate } 3057c478bd9Sstevel@tonic-gate 306*11a8fa6cSceastha static void 307*11a8fa6cSceastha hexch(int n1, int n2) 3087c478bd9Sstevel@tonic-gate { 3097c478bd9Sstevel@tonic-gate int t; 3107c478bd9Sstevel@tonic-gate t = hh[n1]; 3117c478bd9Sstevel@tonic-gate hh[n1] = hh[n2]; 3127c478bd9Sstevel@tonic-gate hh[n2] = t; 3137c478bd9Sstevel@tonic-gate } 314