xref: /titanic_44/usr/src/cmd/refer/hunt1.c (revision a38ddfee9c8c6b6c5a2947ff52fd2338362a4444)
1 /*
2  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
7 /*	  All Rights Reserved  	*/
8 
9 /*
10  * Copyright (c) 1980 Regents of the University of California.
11  * All rights reserved. The Berkeley software License Agreement
12  * specifies the terms and conditions for redistribution.
13  */
14 
15 #pragma ident	"%Z%%M%	%I%	%E% SMI"
16 
17 #include <locale.h>
18 #include <stdio.h>
19 #include <assert.h>
20 extern char refdir[];
21 extern int keepold;
22 extern char *fgnames[];
23 extern char **fgnamp;
24 FILE *fd = NULL;
25 int lmaster = 500;
26 int *hfreq, hfrflg;
27 int colevel = 0;
28 int measure = 0;
29 int soutlen = 1000;
30 int reached = 0;
31 int iflong = 0;
32 int prfreqs = 0;
33 char usedir[100];
34 char *calloc();
35 char *todir();
36 char gfile[50];
37 static int full = 1000;
38 static int tags = 0;
39 char *sinput, *soutput, *tagout;
40 long indexdate = 0, gdate();
41 
42 extern int baddrop();
43 extern int doquery();
44 extern void err();
45 extern long findline();
46 extern int getq();
47 extern void grepcall();
48 extern int makefgrep();
49 extern void result();
50 extern void tick();
51 extern void tock();
52 
53 static int setfrom(char);
54 
55 int
56 main(int argc, char *argv[])
57 {
58 	/* read query from stdin, expect name of indexes in argv[1] */
59 	static FILE *fa, *fb, *fc;
60 	char nma[100], nmb[100], nmc[100], *qitem[100], *rprog = NULL;
61 	char nmd[100], grepquery[256];
62 	static char oldname[30];
63 	static int was = 0;
64 	/* these pointers are unions of pointer to int and pointer to long */
65 	long *hpt;
66 	unsigned *master = 0;
67 	int falseflg, nhash, nitem, nfound, frtbl, kk;
68 
69 	/* special wart for refpart: default is tags only */
70 
71 	(void) setlocale(LC_ALL, "");
72 
73 #if !defined(TEXT_DOMAIN)
74 #define	TEXT_DOMAIN "SYS_TEST"
75 #endif
76 	(void) textdomain(TEXT_DOMAIN);
77 
78 	falseflg = 0;
79 
80 	while (argc > 1 && argv[1][0] == '-') {
81 		switch (argv[1][1]) {
82 		case 'a': /* all output, incl. false drops */
83 			falseflg = 1;
84 			break;
85 		case 'r':
86 			argc--;
87 			argv++;
88 			rprog = argv[1];
89 			break;
90 		case 'F': /* put out full text */
91 			full = setfrom(argv[1][2]);
92 			break;
93 		case 'T': /* put out tags */
94 			tags = setfrom(argv[1][2]);
95 			break;
96 		case 'i': /* input in argument string */
97 			argc--;
98 			argv++;
99 			sinput = argv[1];
100 			break;
101 		case 's': /* text output to string */
102 		case 'o':
103 			argc--;
104 			argv++;
105 			soutput = argv[1];
106 			if ((int)argv[2] < 16000) {
107 				soutlen = (int)argv[2];
108 				argc--;
109 				argv++;
110 			}
111 			break;
112 		case 't': /* tag output to string */
113 			argc--;
114 			argv++;
115 			tagout = argv[1];
116 			break;
117 		case 'l': /* length of internal lists */
118 			argc--;
119 			argv++;
120 			lmaster = atoi(argv[1]);
121 			break;
122 		case 'g': /* suppress fgrep search on old files */
123 			keepold = 0;
124 			break;
125 		case 'C': /* coordination level */
126 			colevel = atoi(argv[1]+2);
127 #if D1
128 			fprintf(stderr, "colevel set to %d\n", colevel);
129 #endif
130 			break;
131 		case 'P': /* print term freqs */
132 			prfreqs = 1;
133 			break;
134 		case 'm':
135 			measure = 1;
136 			break;
137 		}
138 		argc--;
139 		argv++;
140 	}
141 	if (argc < 2)
142 		exit(1);
143 	strcpy(nma, todir(argv[1]));
144 	if (was == 0 || strcmp(oldname, nma) != 0) {
145 		strcpy(oldname, nma);
146 		strcpy(nmb, nma);
147 		strcpy(nmc, nmb);
148 		strcpy(nmd, nma);
149 		strcat(nma, ".ia");
150 		strcat(nmb, ".ib");
151 		strcat(nmc, ".ic");
152 		strcat(nmd, ".id");
153 		if (was) {
154 			fclose(fa);
155 			fclose(fb);
156 			fclose(fc);
157 		}
158 
159 		fa = fopen(nma, "r");
160 		if (fa == NULL) {
161 			strcpy(*fgnamp++ = calloc(strlen(oldname)+2, 1),
162 			    oldname);
163 			fb = NULL;
164 			goto search;
165 		}
166 		fb = fopen(nmb, "r");
167 		fc = fopen(nmc, "r");
168 		was = 1;
169 		if (fb == NULL || fc == NULL) {
170 			err(gettext("Index incomplete %s"), nmb);
171 			exit(1);
172 		}
173 		indexdate = gdate(fb);
174 		fd = fopen(nmd, "r");
175 	}
176 	fseek(fa, 0L, 0);
177 	fread(&nhash, sizeof (nhash), 1, fa);
178 	fread(&iflong, sizeof (iflong), 1, fa);
179 	if (master == 0)
180 		master = (unsigned *)calloc(lmaster, iflong ?
181 		    sizeof (long) : sizeof (unsigned));
182 	hpt = (long *)calloc(nhash, sizeof (*hpt));
183 	kk = fread(hpt, sizeof (*hpt), nhash, fa);
184 #if D1
185 	fprintf(stderr, "read %d hashes, iflong %d, nhash %d\n",
186 	    kk, iflong, nhash);
187 #endif
188 	assert(kk == nhash);
189 	hfreq = (int *)calloc(nhash, sizeof (*hfreq));
190 	assert(hfreq != NULL);
191 	frtbl = fread(hfreq, sizeof (*hfreq), nhash, fa);
192 	hfrflg = (frtbl == nhash);
193 #if D1
194 	fprintf(stderr, "read freqs %d\n", frtbl);
195 #endif
196 
197 search:
198 	while (1) {
199 		nitem = getq(qitem);
200 		if (measure) tick();
201 		if (nitem == 0) continue;
202 		if (nitem < 0) break;
203 		if (tagout) tagout[0] = 0;
204 		if (fb != NULL) {
205 			nfound = doquery(hpt, nhash, fb, nitem, qitem, master);
206 #if D1
207 			fprintf(stderr, "after doquery nfound %d\n", nfound);
208 #endif
209 			fgnamp = fgnames;
210 			if (falseflg == 0)
211 				nfound = baddrop(master, nfound, fc,
212 				    nitem, qitem, rprog, full);
213 #if D1
214 			fprintf(stderr, "after baddrop nfound %d\n", nfound);
215 #endif
216 		}
217 		if (fgnamp > fgnames) {
218 			char **fgp, tgbuff[100];
219 			int k;
220 #if D1
221 			fprintf(stderr, "were %d bad files\n", fgnamp-fgnames);
222 #endif
223 			(void) memset(tgbuff, 0, sizeof (tgbuff));
224 			grepquery[0] = 0;
225 			for (k = 0; k < nitem; k++) {
226 				strcat(grepquery, " ");
227 				strcat(grepquery, qitem[k]);
228 			}
229 #if D1
230 			fprintf(stderr, "grepquery %s\n", grepquery);
231 #endif
232 			for (fgp = fgnames; fgp < fgnamp; fgp++) {
233 #if D1
234 				fprintf(stderr, "Now on %s query /%s/\n",
235 				    *fgp, grepquery);
236 #endif
237 				makefgrep(*fgp);
238 #if D1
239 				fprintf(stderr, "grepmade\n");
240 #endif
241 				if (tagout == 0)
242 					tagout = tgbuff;
243 				grepcall(grepquery, tagout, *fgp);
244 #if D1
245 				fprintf(stderr, "tagout now /%s/\n", tagout);
246 #endif
247 				if (full) {
248 					int nout;
249 					char *bout;
250 					char *tagp;
251 					char *oldtagp;
252 					tagp = tagout;
253 					while (*tagp) {
254 						oldtagp = tagp;
255 						while (*tagp &&
256 						    (*tagp != '\n'))
257 							tagp++;
258 						if (*tagp)
259 							tagp++;
260 						nout = findline(oldtagp, &bout,
261 						    1000, 0L);
262 						if (nout > 0) {
263 							fputs(bout, stdout);
264 							free(bout);
265 						}
266 					}
267 				}
268 			}
269 		}
270 		if (tags)
271 			result(master, nfound > tags ? tags : nfound, fc);
272 		if (measure) tock();
273 	}
274 	return (0);
275 }
276 
277 char *
278 todir(char *t)
279 {
280 	char *s;
281 	s = t;
282 	while (*s) s++;
283 	while (s >= t && *s != '/') s--;
284 	if (s < t)
285 		return (t);
286 	*s++ = 0;
287 	t = (*t ? t : "/");
288 	chdir(t);
289 	strcpy(usedir, t);
290 	return (s);
291 }
292 
293 static int
294 setfrom(char c)
295 {
296 	switch (c) {
297 	case 'y':
298 	case '\0':
299 	default:
300 		return (1000);
301 	case '1':
302 	case '2':
303 	case '3':
304 	case '4':
305 	case '5':
306 	case '6':
307 	case '7':
308 	case '8':
309 	case '9':
310 		return (c-'0');
311 	case 'n':
312 	case '0':
313 		return (0);
314 	}
315 }
316