1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate *
4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate * with the License.
8*7c478bd9Sstevel@tonic-gate *
9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate *
14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate *
20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate */
22*7c478bd9Sstevel@tonic-gate /* Copyright (c) 1988 AT&T */
23*7c478bd9Sstevel@tonic-gate /* All Rights Reserved */
24*7c478bd9Sstevel@tonic-gate
25*7c478bd9Sstevel@tonic-gate
26*7c478bd9Sstevel@tonic-gate /*
27*7c478bd9Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
28*7c478bd9Sstevel@tonic-gate * Use is subject to license terms.
29*7c478bd9Sstevel@tonic-gate */
30*7c478bd9Sstevel@tonic-gate
31*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI"
32*7c478bd9Sstevel@tonic-gate
33*7c478bd9Sstevel@tonic-gate #include <ctype.h>
34*7c478bd9Sstevel@tonic-gate #include <stdio.h>
35*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
36*7c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
37*7c478bd9Sstevel@tonic-gate #include <sys/byteorder.h>
38*7c478bd9Sstevel@tonic-gate #if SHARE
39*7c478bd9Sstevel@tonic-gate #include <sys/ipc.h>
40*7c478bd9Sstevel@tonic-gate #include <sys/shm.h>
41*7c478bd9Sstevel@tonic-gate #define ERR -1
42*7c478bd9Sstevel@tonic-gate #endif
43*7c478bd9Sstevel@tonic-gate #include "invlib.h"
44*7c478bd9Sstevel@tonic-gate #include "library.h"
45*7c478bd9Sstevel@tonic-gate
46*7c478bd9Sstevel@tonic-gate #define DEBUG 0 /* debugging code and realloc messages */
47*7c478bd9Sstevel@tonic-gate #define BLOCKSIZE 2 * BUFSIZ /* logical block size */
48*7c478bd9Sstevel@tonic-gate #define LINEMAX 1000 /* sorted posting line max size */
49*7c478bd9Sstevel@tonic-gate #define POSTINC 10000 /* posting buffer size increment */
50*7c478bd9Sstevel@tonic-gate #define SEP ' ' /* sorted posting field separator */
51*7c478bd9Sstevel@tonic-gate #define SETINC 100 /* posting set size increment */
52*7c478bd9Sstevel@tonic-gate #define STATS 0 /* print statistics */
53*7c478bd9Sstevel@tonic-gate #define SUPERINC 10000 /* super index size increment */
54*7c478bd9Sstevel@tonic-gate #define TERMMAX 512 /* term max size */
55*7c478bd9Sstevel@tonic-gate #define VERSION 1 /* inverted index format version */
56*7c478bd9Sstevel@tonic-gate #define ZIPFSIZE 200 /* zipf curve size */
57*7c478bd9Sstevel@tonic-gate #define FREAD "r" /* fopen for reading */
58*7c478bd9Sstevel@tonic-gate #define FREADP "r+" /* fopen for update */
59*7c478bd9Sstevel@tonic-gate #define FWRITE "w" /* fopen truncate or create for writing */
60*7c478bd9Sstevel@tonic-gate #define FWRITEP "w+" /* fopen truncate or create for update */
61*7c478bd9Sstevel@tonic-gate
62*7c478bd9Sstevel@tonic-gate extern char *argv0; /* command name (must be set in main function) */
63*7c478bd9Sstevel@tonic-gate
64*7c478bd9Sstevel@tonic-gate int invbreak;
65*7c478bd9Sstevel@tonic-gate
66*7c478bd9Sstevel@tonic-gate #if STATS
67*7c478bd9Sstevel@tonic-gate int showzipf; /* show postings per term distribution */
68*7c478bd9Sstevel@tonic-gate #endif
69*7c478bd9Sstevel@tonic-gate
70*7c478bd9Sstevel@tonic-gate static POSTING *item, *enditem, *item1 = NULL, *item2 = NULL;
71*7c478bd9Sstevel@tonic-gate static unsigned setsize1, setsize2;
72*7c478bd9Sstevel@tonic-gate static long numitems, totterm, zerolong;
73*7c478bd9Sstevel@tonic-gate static char *indexfile, *postingfile;
74*7c478bd9Sstevel@tonic-gate static FILE *outfile, *fpost;
75*7c478bd9Sstevel@tonic-gate static unsigned supersize = SUPERINC, supintsize;
76*7c478bd9Sstevel@tonic-gate static int numpost, numlogblk, amtused, nextpost,
77*7c478bd9Sstevel@tonic-gate lastinblk, numinvitems;
78*7c478bd9Sstevel@tonic-gate static POSTING *POST, *postptr;
79*7c478bd9Sstevel@tonic-gate static unsigned long *SUPINT, *supint, nextsupfing;
80*7c478bd9Sstevel@tonic-gate static char *SUPFING, *supfing;
81*7c478bd9Sstevel@tonic-gate static char thisterm[TERMMAX];
82*7c478bd9Sstevel@tonic-gate static union {
83*7c478bd9Sstevel@tonic-gate long invblk[BLOCKSIZE / sizeof (long)];
84*7c478bd9Sstevel@tonic-gate char chrblk[BLOCKSIZE];
85*7c478bd9Sstevel@tonic-gate } logicalblk;
86*7c478bd9Sstevel@tonic-gate
87*7c478bd9Sstevel@tonic-gate #if DEBUG || STATS
88*7c478bd9Sstevel@tonic-gate static long totpost;
89*7c478bd9Sstevel@tonic-gate #endif
90*7c478bd9Sstevel@tonic-gate
91*7c478bd9Sstevel@tonic-gate #if STATS
92*7c478bd9Sstevel@tonic-gate static int zipf[ZIPFSIZE + 1];
93*7c478bd9Sstevel@tonic-gate #endif
94*7c478bd9Sstevel@tonic-gate
95*7c478bd9Sstevel@tonic-gate static void invcannotalloc(size_t n);
96*7c478bd9Sstevel@tonic-gate static void invcannotopen(char *file);
97*7c478bd9Sstevel@tonic-gate static void invcannotwrite(char *file);
98*7c478bd9Sstevel@tonic-gate static int invnewterm(void);
99*7c478bd9Sstevel@tonic-gate static int boolready(void);
100*7c478bd9Sstevel@tonic-gate
101*7c478bd9Sstevel@tonic-gate long
invmake(char * invname,char * invpost,FILE * infile)102*7c478bd9Sstevel@tonic-gate invmake(char *invname, char *invpost, FILE *infile)
103*7c478bd9Sstevel@tonic-gate {
104*7c478bd9Sstevel@tonic-gate unsigned char *s;
105*7c478bd9Sstevel@tonic-gate long num;
106*7c478bd9Sstevel@tonic-gate int i;
107*7c478bd9Sstevel@tonic-gate long fileindex;
108*7c478bd9Sstevel@tonic-gate unsigned postsize = POSTINC * sizeof (POSTING);
109*7c478bd9Sstevel@tonic-gate unsigned long *intptr;
110*7c478bd9Sstevel@tonic-gate char line[LINEMAX];
111*7c478bd9Sstevel@tonic-gate long tlong;
112*7c478bd9Sstevel@tonic-gate PARAM param;
113*7c478bd9Sstevel@tonic-gate POSTING posting;
114*7c478bd9Sstevel@tonic-gate #if STATS
115*7c478bd9Sstevel@tonic-gate int j;
116*7c478bd9Sstevel@tonic-gate unsigned maxtermlen = 0;
117*7c478bd9Sstevel@tonic-gate #endif
118*7c478bd9Sstevel@tonic-gate /* output file */
119*7c478bd9Sstevel@tonic-gate if ((outfile = vpfopen(invname, FWRITEP)) == NULL) {
120*7c478bd9Sstevel@tonic-gate invcannotopen(invname);
121*7c478bd9Sstevel@tonic-gate return (0);
122*7c478bd9Sstevel@tonic-gate }
123*7c478bd9Sstevel@tonic-gate indexfile = invname;
124*7c478bd9Sstevel@tonic-gate (void) fseek(outfile, (long)BUFSIZ, 0);
125*7c478bd9Sstevel@tonic-gate
126*7c478bd9Sstevel@tonic-gate /* posting file */
127*7c478bd9Sstevel@tonic-gate if ((fpost = vpfopen(invpost, FWRITE)) == NULL) {
128*7c478bd9Sstevel@tonic-gate invcannotopen(invpost);
129*7c478bd9Sstevel@tonic-gate return (0);
130*7c478bd9Sstevel@tonic-gate }
131*7c478bd9Sstevel@tonic-gate postingfile = invpost;
132*7c478bd9Sstevel@tonic-gate nextpost = 0;
133*7c478bd9Sstevel@tonic-gate /* get space for the postings list */
134*7c478bd9Sstevel@tonic-gate if ((POST = (POSTING *)malloc(postsize)) == NULL) {
135*7c478bd9Sstevel@tonic-gate invcannotalloc(postsize);
136*7c478bd9Sstevel@tonic-gate return (0);
137*7c478bd9Sstevel@tonic-gate }
138*7c478bd9Sstevel@tonic-gate postptr = POST;
139*7c478bd9Sstevel@tonic-gate /* get space for the superfinger (superindex) */
140*7c478bd9Sstevel@tonic-gate if ((SUPFING = malloc(supersize)) == NULL) {
141*7c478bd9Sstevel@tonic-gate invcannotalloc(supersize);
142*7c478bd9Sstevel@tonic-gate return (0);
143*7c478bd9Sstevel@tonic-gate }
144*7c478bd9Sstevel@tonic-gate supfing = SUPFING;
145*7c478bd9Sstevel@tonic-gate supintsize = supersize / 40;
146*7c478bd9Sstevel@tonic-gate /* also for the superfinger index */
147*7c478bd9Sstevel@tonic-gate if ((SUPINT = malloc(supintsize * sizeof (long))) == NULL) {
148*7c478bd9Sstevel@tonic-gate invcannotalloc(supintsize * sizeof (long));
149*7c478bd9Sstevel@tonic-gate return (0);
150*7c478bd9Sstevel@tonic-gate }
151*7c478bd9Sstevel@tonic-gate supint = SUPINT;
152*7c478bd9Sstevel@tonic-gate supint++; /* leave first term open for a count */
153*7c478bd9Sstevel@tonic-gate /* initialize using an empty term */
154*7c478bd9Sstevel@tonic-gate (void) strcpy(thisterm, "");
155*7c478bd9Sstevel@tonic-gate *supint++ = 0;
156*7c478bd9Sstevel@tonic-gate *supfing++ = ' ';
157*7c478bd9Sstevel@tonic-gate *supfing++ = '\0';
158*7c478bd9Sstevel@tonic-gate nextsupfing = 2;
159*7c478bd9Sstevel@tonic-gate #if DEBUG || STATS
160*7c478bd9Sstevel@tonic-gate totpost = 0L;
161*7c478bd9Sstevel@tonic-gate #endif
162*7c478bd9Sstevel@tonic-gate totterm = 0L;
163*7c478bd9Sstevel@tonic-gate numpost = 1;
164*7c478bd9Sstevel@tonic-gate
165*7c478bd9Sstevel@tonic-gate /*
166*7c478bd9Sstevel@tonic-gate * set up as though a block had come and gone, i.e., set up for
167*7c478bd9Sstevel@tonic-gate * new block
168*7c478bd9Sstevel@tonic-gate */
169*7c478bd9Sstevel@tonic-gate amtused = 16; /* leave no space - init 3 words + one for luck */
170*7c478bd9Sstevel@tonic-gate numinvitems = 0;
171*7c478bd9Sstevel@tonic-gate numlogblk = 0;
172*7c478bd9Sstevel@tonic-gate lastinblk = BLOCKSIZE;
173*7c478bd9Sstevel@tonic-gate
174*7c478bd9Sstevel@tonic-gate /* now loop as long as more to read (till eof) */
175*7c478bd9Sstevel@tonic-gate while (fgets(line, LINEMAX, infile) != NULL) {
176*7c478bd9Sstevel@tonic-gate #if DEBUG || STATS
177*7c478bd9Sstevel@tonic-gate ++totpost;
178*7c478bd9Sstevel@tonic-gate #endif
179*7c478bd9Sstevel@tonic-gate s = (unsigned char *) strchr(line, SEP);
180*7c478bd9Sstevel@tonic-gate if (s == NULL) /* where did this line come from ??? */
181*7c478bd9Sstevel@tonic-gate continue; /* workaround: just skip it */
182*7c478bd9Sstevel@tonic-gate *s = '\0';
183*7c478bd9Sstevel@tonic-gate #if STATS
184*7c478bd9Sstevel@tonic-gate if ((i = strlen(line)) > maxtermlen) {
185*7c478bd9Sstevel@tonic-gate maxtermlen = i;
186*7c478bd9Sstevel@tonic-gate }
187*7c478bd9Sstevel@tonic-gate #endif
188*7c478bd9Sstevel@tonic-gate #if DEBUG
189*7c478bd9Sstevel@tonic-gate (void) printf("%ld: %s ", totpost, line);
190*7c478bd9Sstevel@tonic-gate (void) fflush(stdout);
191*7c478bd9Sstevel@tonic-gate #endif
192*7c478bd9Sstevel@tonic-gate if (strcmp(thisterm, line) == 0) {
193*7c478bd9Sstevel@tonic-gate if (postptr + 10 > POST + postsize / sizeof (POSTING)) {
194*7c478bd9Sstevel@tonic-gate i = postptr - POST;
195*7c478bd9Sstevel@tonic-gate postsize += POSTINC * sizeof (POSTING);
196*7c478bd9Sstevel@tonic-gate if ((POST = realloc(POST, postsize)) == NULL) {
197*7c478bd9Sstevel@tonic-gate invcannotalloc(postsize);
198*7c478bd9Sstevel@tonic-gate return (0);
199*7c478bd9Sstevel@tonic-gate }
200*7c478bd9Sstevel@tonic-gate postptr = i + POST;
201*7c478bd9Sstevel@tonic-gate #if DEBUG
202*7c478bd9Sstevel@tonic-gate (void) printf("reallocated post space to %u, "
203*7c478bd9Sstevel@tonic-gate "totpost=%ld\n", postsize, totpost);
204*7c478bd9Sstevel@tonic-gate #endif
205*7c478bd9Sstevel@tonic-gate }
206*7c478bd9Sstevel@tonic-gate numpost++;
207*7c478bd9Sstevel@tonic-gate } else {
208*7c478bd9Sstevel@tonic-gate /* have a new term */
209*7c478bd9Sstevel@tonic-gate if (!invnewterm()) {
210*7c478bd9Sstevel@tonic-gate return (0);
211*7c478bd9Sstevel@tonic-gate }
212*7c478bd9Sstevel@tonic-gate (void) strcpy(thisterm, line);
213*7c478bd9Sstevel@tonic-gate numpost = 1;
214*7c478bd9Sstevel@tonic-gate postptr = POST;
215*7c478bd9Sstevel@tonic-gate fileindex = 0;
216*7c478bd9Sstevel@tonic-gate }
217*7c478bd9Sstevel@tonic-gate /* get the new posting */
218*7c478bd9Sstevel@tonic-gate num = *++s - '!';
219*7c478bd9Sstevel@tonic-gate i = 1;
220*7c478bd9Sstevel@tonic-gate do {
221*7c478bd9Sstevel@tonic-gate num = BASE * num + *++s - '!';
222*7c478bd9Sstevel@tonic-gate } while (++i < PRECISION);
223*7c478bd9Sstevel@tonic-gate posting.lineoffset = num;
224*7c478bd9Sstevel@tonic-gate while (++fileindex < nsrcoffset && num > srcoffset[fileindex]) {
225*7c478bd9Sstevel@tonic-gate ;
226*7c478bd9Sstevel@tonic-gate }
227*7c478bd9Sstevel@tonic-gate posting.fileindex = --fileindex;
228*7c478bd9Sstevel@tonic-gate posting.type = *++s;
229*7c478bd9Sstevel@tonic-gate num = *++s - '!';
230*7c478bd9Sstevel@tonic-gate if (*s != '\n') {
231*7c478bd9Sstevel@tonic-gate num = *++s - '!';
232*7c478bd9Sstevel@tonic-gate while (*++s != '\n') {
233*7c478bd9Sstevel@tonic-gate num = BASE * num + *s - '!';
234*7c478bd9Sstevel@tonic-gate }
235*7c478bd9Sstevel@tonic-gate posting.fcnoffset = num;
236*7c478bd9Sstevel@tonic-gate } else {
237*7c478bd9Sstevel@tonic-gate posting.fcnoffset = 0;
238*7c478bd9Sstevel@tonic-gate }
239*7c478bd9Sstevel@tonic-gate *postptr++ = posting;
240*7c478bd9Sstevel@tonic-gate #if DEBUG
241*7c478bd9Sstevel@tonic-gate (void) printf("%ld %ld %ld %ld\n", posting.fileindex,
242*7c478bd9Sstevel@tonic-gate posting.fcnoffset, posting.lineoffset, posting.type);
243*7c478bd9Sstevel@tonic-gate (void) fflush(stdout);
244*7c478bd9Sstevel@tonic-gate #endif
245*7c478bd9Sstevel@tonic-gate }
246*7c478bd9Sstevel@tonic-gate if (!invnewterm()) {
247*7c478bd9Sstevel@tonic-gate return (0);
248*7c478bd9Sstevel@tonic-gate }
249*7c478bd9Sstevel@tonic-gate /* now clean up final block */
250*7c478bd9Sstevel@tonic-gate logicalblk.invblk[0] = numinvitems;
251*7c478bd9Sstevel@tonic-gate /* loops pointer around to start */
252*7c478bd9Sstevel@tonic-gate logicalblk.invblk[1] = 0;
253*7c478bd9Sstevel@tonic-gate logicalblk.invblk[2] = numlogblk - 1;
254*7c478bd9Sstevel@tonic-gate if (fwrite((char *)&logicalblk, BLOCKSIZE, 1, outfile) == 0) {
255*7c478bd9Sstevel@tonic-gate goto cannotwrite;
256*7c478bd9Sstevel@tonic-gate }
257*7c478bd9Sstevel@tonic-gate numlogblk++;
258*7c478bd9Sstevel@tonic-gate /* write out block to save space. what in it doesn't matter */
259*7c478bd9Sstevel@tonic-gate if (fwrite((char *)&logicalblk, BLOCKSIZE, 1, outfile) == 0) {
260*7c478bd9Sstevel@tonic-gate goto cannotwrite;
261*7c478bd9Sstevel@tonic-gate }
262*7c478bd9Sstevel@tonic-gate /* finish up the super finger */
263*7c478bd9Sstevel@tonic-gate *SUPINT = numlogblk;
264*7c478bd9Sstevel@tonic-gate /* add to the offsets the size of the offset pointers */
265*7c478bd9Sstevel@tonic-gate intptr = (SUPINT + 1);
266*7c478bd9Sstevel@tonic-gate i = (char *)supint - (char *)SUPINT;
267*7c478bd9Sstevel@tonic-gate while (intptr < supint)
268*7c478bd9Sstevel@tonic-gate *intptr++ += i;
269*7c478bd9Sstevel@tonic-gate /* write out the offsets (1 for the N at start) and the super finger */
270*7c478bd9Sstevel@tonic-gate if (fwrite((char *)SUPINT, sizeof (*SUPINT), numlogblk + 1,
271*7c478bd9Sstevel@tonic-gate outfile) == 0 ||
272*7c478bd9Sstevel@tonic-gate fwrite(SUPFING, 1, supfing - SUPFING, outfile) == 0) {
273*7c478bd9Sstevel@tonic-gate goto cannotwrite;
274*7c478bd9Sstevel@tonic-gate }
275*7c478bd9Sstevel@tonic-gate /* save the size for reference later */
276*7c478bd9Sstevel@tonic-gate nextsupfing = sizeof (long) + sizeof (long) * numlogblk +
277*7c478bd9Sstevel@tonic-gate (supfing - SUPFING);
278*7c478bd9Sstevel@tonic-gate /*
279*7c478bd9Sstevel@tonic-gate * make sure the file ends at a logical block boundary. This is
280*7c478bd9Sstevel@tonic-gate * necessary for invinsert to correctly create extended blocks
281*7c478bd9Sstevel@tonic-gate */
282*7c478bd9Sstevel@tonic-gate i = nextsupfing % BLOCKSIZE;
283*7c478bd9Sstevel@tonic-gate /* write out junk to fill log blk */
284*7c478bd9Sstevel@tonic-gate if (fwrite(SUPFING, BLOCKSIZE - i, 1, outfile) == 0 ||
285*7c478bd9Sstevel@tonic-gate fflush(outfile) == EOF) {
286*7c478bd9Sstevel@tonic-gate /* rewind doesn't check for write failure */
287*7c478bd9Sstevel@tonic-gate goto cannotwrite;
288*7c478bd9Sstevel@tonic-gate }
289*7c478bd9Sstevel@tonic-gate /* write the control area */
290*7c478bd9Sstevel@tonic-gate rewind(outfile);
291*7c478bd9Sstevel@tonic-gate param.version = VERSION;
292*7c478bd9Sstevel@tonic-gate param.filestat = 0;
293*7c478bd9Sstevel@tonic-gate param.sizeblk = BLOCKSIZE;
294*7c478bd9Sstevel@tonic-gate param.startbyte = (numlogblk + 1) * BLOCKSIZE + BUFSIZ;
295*7c478bd9Sstevel@tonic-gate param.supsize = nextsupfing;
296*7c478bd9Sstevel@tonic-gate param.cntlsize = BUFSIZ;
297*7c478bd9Sstevel@tonic-gate param.share = 0;
298*7c478bd9Sstevel@tonic-gate if (fwrite((char *)¶m, sizeof (param), 1, outfile) == 0) {
299*7c478bd9Sstevel@tonic-gate goto cannotwrite;
300*7c478bd9Sstevel@tonic-gate }
301*7c478bd9Sstevel@tonic-gate for (i = 0; i < 10; i++) /* for future use */
302*7c478bd9Sstevel@tonic-gate if (fwrite((char *)&zerolong, sizeof (zerolong),
303*7c478bd9Sstevel@tonic-gate 1, outfile) == 0) {
304*7c478bd9Sstevel@tonic-gate goto cannotwrite;
305*7c478bd9Sstevel@tonic-gate }
306*7c478bd9Sstevel@tonic-gate
307*7c478bd9Sstevel@tonic-gate /* make first block loop backwards to last block */
308*7c478bd9Sstevel@tonic-gate if (fflush(outfile) == EOF) {
309*7c478bd9Sstevel@tonic-gate /* fseek doesn't check for write failure */
310*7c478bd9Sstevel@tonic-gate goto cannotwrite;
311*7c478bd9Sstevel@tonic-gate }
312*7c478bd9Sstevel@tonic-gate /* get to second word first block */
313*7c478bd9Sstevel@tonic-gate (void) fseek(outfile, (long)BUFSIZ + 8, 0);
314*7c478bd9Sstevel@tonic-gate tlong = numlogblk - 1;
315*7c478bd9Sstevel@tonic-gate if (fwrite((char *)&tlong, sizeof (tlong), 1, outfile) == 0 ||
316*7c478bd9Sstevel@tonic-gate fclose(outfile) == EOF) {
317*7c478bd9Sstevel@tonic-gate cannotwrite:
318*7c478bd9Sstevel@tonic-gate invcannotwrite(invname);
319*7c478bd9Sstevel@tonic-gate return (0);
320*7c478bd9Sstevel@tonic-gate }
321*7c478bd9Sstevel@tonic-gate if (fclose(fpost) == EOF) {
322*7c478bd9Sstevel@tonic-gate invcannotwrite(postingfile);
323*7c478bd9Sstevel@tonic-gate return (0);
324*7c478bd9Sstevel@tonic-gate }
325*7c478bd9Sstevel@tonic-gate --totterm; /* don't count null term */
326*7c478bd9Sstevel@tonic-gate #if STATS
327*7c478bd9Sstevel@tonic-gate (void) printf("logical blocks = %d, postings = %ld, terms = %ld, "
328*7c478bd9Sstevel@tonic-gate "max term length = %d\n", numlogblk, totpost, totterm, maxtermlen);
329*7c478bd9Sstevel@tonic-gate if (showzipf) {
330*7c478bd9Sstevel@tonic-gate (void) printf(
331*7c478bd9Sstevel@tonic-gate "\n************* ZIPF curve ****************\n");
332*7c478bd9Sstevel@tonic-gate for (j = ZIPFSIZE; j > 1; j--)
333*7c478bd9Sstevel@tonic-gate if (zipf[j])
334*7c478bd9Sstevel@tonic-gate break;
335*7c478bd9Sstevel@tonic-gate for (i = 1; i < j; ++i) {
336*7c478bd9Sstevel@tonic-gate (void) printf("%3d -%6d ", i, zipf[i]);
337*7c478bd9Sstevel@tonic-gate if (i % 6 == 0) (void) putchar('\n');
338*7c478bd9Sstevel@tonic-gate }
339*7c478bd9Sstevel@tonic-gate (void) printf(">%d-%6d\n", ZIPFSIZE, zipf[0]);
340*7c478bd9Sstevel@tonic-gate }
341*7c478bd9Sstevel@tonic-gate #endif
342*7c478bd9Sstevel@tonic-gate /* free all malloc'd memory */
343*7c478bd9Sstevel@tonic-gate free(POST);
344*7c478bd9Sstevel@tonic-gate free(SUPFING);
345*7c478bd9Sstevel@tonic-gate free(SUPINT);
346*7c478bd9Sstevel@tonic-gate return (totterm);
347*7c478bd9Sstevel@tonic-gate }
348*7c478bd9Sstevel@tonic-gate
349*7c478bd9Sstevel@tonic-gate /* add a term to the data base */
350*7c478bd9Sstevel@tonic-gate
351*7c478bd9Sstevel@tonic-gate static int
invnewterm(void)352*7c478bd9Sstevel@tonic-gate invnewterm(void)
353*7c478bd9Sstevel@tonic-gate {
354*7c478bd9Sstevel@tonic-gate int backupflag, i, j, maxback, holditems, gooditems, howfar;
355*7c478bd9Sstevel@tonic-gate int len, numwilluse, wdlen;
356*7c478bd9Sstevel@tonic-gate char *tptr, *tptr2, *tptr3;
357*7c478bd9Sstevel@tonic-gate union {
358*7c478bd9Sstevel@tonic-gate unsigned long packword[2];
359*7c478bd9Sstevel@tonic-gate ENTRY e;
360*7c478bd9Sstevel@tonic-gate } iteminfo;
361*7c478bd9Sstevel@tonic-gate
362*7c478bd9Sstevel@tonic-gate totterm++;
363*7c478bd9Sstevel@tonic-gate #if STATS
364*7c478bd9Sstevel@tonic-gate /* keep zipfian info on the distribution */
365*7c478bd9Sstevel@tonic-gate if (numpost <= ZIPFSIZE)
366*7c478bd9Sstevel@tonic-gate zipf[numpost]++;
367*7c478bd9Sstevel@tonic-gate else
368*7c478bd9Sstevel@tonic-gate zipf[0]++;
369*7c478bd9Sstevel@tonic-gate #endif
370*7c478bd9Sstevel@tonic-gate len = strlen(thisterm);
371*7c478bd9Sstevel@tonic-gate wdlen = (len + (sizeof (long) - 1)) / sizeof (long);
372*7c478bd9Sstevel@tonic-gate numwilluse = (wdlen + 3) * sizeof (long);
373*7c478bd9Sstevel@tonic-gate /* new block if at least 1 item in block */
374*7c478bd9Sstevel@tonic-gate if (numinvitems && numwilluse + amtused > BLOCKSIZE) {
375*7c478bd9Sstevel@tonic-gate /* set up new block */
376*7c478bd9Sstevel@tonic-gate if (supfing + 500 > SUPFING + supersize) {
377*7c478bd9Sstevel@tonic-gate i = supfing - SUPFING;
378*7c478bd9Sstevel@tonic-gate supersize += 20000;
379*7c478bd9Sstevel@tonic-gate if ((SUPFING = realloc(SUPFING, supersize)) == NULL) {
380*7c478bd9Sstevel@tonic-gate invcannotalloc(supersize);
381*7c478bd9Sstevel@tonic-gate return (0);
382*7c478bd9Sstevel@tonic-gate }
383*7c478bd9Sstevel@tonic-gate supfing = i + SUPFING;
384*7c478bd9Sstevel@tonic-gate #if DEBUG
385*7c478bd9Sstevel@tonic-gate (void) printf("reallocated superfinger space to %d, "
386*7c478bd9Sstevel@tonic-gate "totpost=%ld\n", supersize, totpost);
387*7c478bd9Sstevel@tonic-gate #endif
388*7c478bd9Sstevel@tonic-gate }
389*7c478bd9Sstevel@tonic-gate /* check that room for the offset as well */
390*7c478bd9Sstevel@tonic-gate if ((numlogblk + 10) > supintsize) {
391*7c478bd9Sstevel@tonic-gate i = supint - SUPINT;
392*7c478bd9Sstevel@tonic-gate supintsize += SUPERINC;
393*7c478bd9Sstevel@tonic-gate if ((SUPINT = realloc((char *)SUPINT,
394*7c478bd9Sstevel@tonic-gate supintsize * sizeof (long))) == NULL) {
395*7c478bd9Sstevel@tonic-gate invcannotalloc(supintsize * sizeof (long));
396*7c478bd9Sstevel@tonic-gate return (0);
397*7c478bd9Sstevel@tonic-gate }
398*7c478bd9Sstevel@tonic-gate supint = i + SUPINT;
399*7c478bd9Sstevel@tonic-gate #if DEBUG
400*7c478bd9Sstevel@tonic-gate (void) printf("reallocated superfinger offset to %d, "
401*7c478bd9Sstevel@tonic-gate "totpost = %ld\n", supintsize * sizeof (long),
402*7c478bd9Sstevel@tonic-gate totpost);
403*7c478bd9Sstevel@tonic-gate #endif
404*7c478bd9Sstevel@tonic-gate }
405*7c478bd9Sstevel@tonic-gate /* See if backup is efficatious */
406*7c478bd9Sstevel@tonic-gate backupflag = 0;
407*7c478bd9Sstevel@tonic-gate maxback = strlen(thisterm) / 10;
408*7c478bd9Sstevel@tonic-gate holditems = numinvitems;
409*7c478bd9Sstevel@tonic-gate if (maxback > numinvitems)
410*7c478bd9Sstevel@tonic-gate maxback = numinvitems - 2;
411*7c478bd9Sstevel@tonic-gate howfar = 0;
412*7c478bd9Sstevel@tonic-gate while (--maxback > 0) {
413*7c478bd9Sstevel@tonic-gate howfar++;
414*7c478bd9Sstevel@tonic-gate iteminfo.packword[0] =
415*7c478bd9Sstevel@tonic-gate logicalblk.invblk[--holditems * 2 +
416*7c478bd9Sstevel@tonic-gate (sizeof (long) - 1)];
417*7c478bd9Sstevel@tonic-gate if ((i = iteminfo.e.size / 10) < maxback) {
418*7c478bd9Sstevel@tonic-gate maxback = i;
419*7c478bd9Sstevel@tonic-gate backupflag = howfar;
420*7c478bd9Sstevel@tonic-gate gooditems = holditems;
421*7c478bd9Sstevel@tonic-gate tptr2 = logicalblk.chrblk + iteminfo.e.offset;
422*7c478bd9Sstevel@tonic-gate }
423*7c478bd9Sstevel@tonic-gate }
424*7c478bd9Sstevel@tonic-gate /* see if backup will occur */
425*7c478bd9Sstevel@tonic-gate if (backupflag) {
426*7c478bd9Sstevel@tonic-gate numinvitems = gooditems;
427*7c478bd9Sstevel@tonic-gate }
428*7c478bd9Sstevel@tonic-gate logicalblk.invblk[0] = numinvitems;
429*7c478bd9Sstevel@tonic-gate /* set forward pointer pointing to next */
430*7c478bd9Sstevel@tonic-gate logicalblk.invblk[1] = numlogblk + 1;
431*7c478bd9Sstevel@tonic-gate /* set back pointer to last block */
432*7c478bd9Sstevel@tonic-gate logicalblk.invblk[2] = numlogblk - 1;
433*7c478bd9Sstevel@tonic-gate if (fwrite((char *)logicalblk.chrblk, 1,
434*7c478bd9Sstevel@tonic-gate BLOCKSIZE, outfile) == 0) {
435*7c478bd9Sstevel@tonic-gate invcannotwrite(indexfile);
436*7c478bd9Sstevel@tonic-gate return (0);
437*7c478bd9Sstevel@tonic-gate }
438*7c478bd9Sstevel@tonic-gate amtused = 16;
439*7c478bd9Sstevel@tonic-gate numlogblk++;
440*7c478bd9Sstevel@tonic-gate /* check if had to back up, if so do it */
441*7c478bd9Sstevel@tonic-gate if (backupflag) {
442*7c478bd9Sstevel@tonic-gate /* find out where the end of the new block is */
443*7c478bd9Sstevel@tonic-gate iteminfo.packword[0] =
444*7c478bd9Sstevel@tonic-gate logicalblk.invblk[numinvitems * 2 + 1];
445*7c478bd9Sstevel@tonic-gate tptr3 = logicalblk.chrblk + iteminfo.e.offset;
446*7c478bd9Sstevel@tonic-gate /* move the index for this block */
447*7c478bd9Sstevel@tonic-gate for (i = 3; i <= (backupflag * 2 + 2); i++) {
448*7c478bd9Sstevel@tonic-gate logicalblk.invblk[i] =
449*7c478bd9Sstevel@tonic-gate logicalblk.invblk[numinvitems * 2+i];
450*7c478bd9Sstevel@tonic-gate }
451*7c478bd9Sstevel@tonic-gate /* move the word into the super index */
452*7c478bd9Sstevel@tonic-gate iteminfo.packword[0] = logicalblk.invblk[3];
453*7c478bd9Sstevel@tonic-gate iteminfo.packword[1] = logicalblk.invblk[4];
454*7c478bd9Sstevel@tonic-gate tptr2 = logicalblk.chrblk + iteminfo.e.offset;
455*7c478bd9Sstevel@tonic-gate (void) strncpy(supfing, tptr2, (int)iteminfo.e.size);
456*7c478bd9Sstevel@tonic-gate *(supfing + iteminfo.e.size) = '\0';
457*7c478bd9Sstevel@tonic-gate #if DEBUG
458*7c478bd9Sstevel@tonic-gate (void) printf("backup %d at term=%s to term=%s\n",
459*7c478bd9Sstevel@tonic-gate backupflag, thisterm, supfing);
460*7c478bd9Sstevel@tonic-gate #endif
461*7c478bd9Sstevel@tonic-gate *supint++ = nextsupfing;
462*7c478bd9Sstevel@tonic-gate nextsupfing += strlen(supfing) + 1;
463*7c478bd9Sstevel@tonic-gate supfing += strlen(supfing) + 1;
464*7c478bd9Sstevel@tonic-gate /* now fix up the logical block */
465*7c478bd9Sstevel@tonic-gate tptr = logicalblk.chrblk + lastinblk;
466*7c478bd9Sstevel@tonic-gate lastinblk = BLOCKSIZE;
467*7c478bd9Sstevel@tonic-gate tptr2 = logicalblk.chrblk + lastinblk;
468*7c478bd9Sstevel@tonic-gate j = tptr3 - tptr;
469*7c478bd9Sstevel@tonic-gate while (tptr3 > tptr)
470*7c478bd9Sstevel@tonic-gate *--tptr2 = *--tptr3;
471*7c478bd9Sstevel@tonic-gate lastinblk -= j;
472*7c478bd9Sstevel@tonic-gate amtused += (8 * backupflag + j);
473*7c478bd9Sstevel@tonic-gate for (i = 3; i < (backupflag * 2 + 2); i += 2) {
474*7c478bd9Sstevel@tonic-gate iteminfo.packword[0] = logicalblk.invblk[i];
475*7c478bd9Sstevel@tonic-gate iteminfo.e.offset += (tptr2 - tptr3);
476*7c478bd9Sstevel@tonic-gate logicalblk.invblk[i] = iteminfo.packword[0];
477*7c478bd9Sstevel@tonic-gate }
478*7c478bd9Sstevel@tonic-gate numinvitems = backupflag;
479*7c478bd9Sstevel@tonic-gate } else { /* no backup needed */
480*7c478bd9Sstevel@tonic-gate numinvitems = 0;
481*7c478bd9Sstevel@tonic-gate lastinblk = BLOCKSIZE;
482*7c478bd9Sstevel@tonic-gate /* add new term to superindex */
483*7c478bd9Sstevel@tonic-gate (void) strcpy(supfing, thisterm);
484*7c478bd9Sstevel@tonic-gate supfing += strlen(thisterm) + 1;
485*7c478bd9Sstevel@tonic-gate *supint++ = nextsupfing;
486*7c478bd9Sstevel@tonic-gate nextsupfing += strlen(thisterm) + 1;
487*7c478bd9Sstevel@tonic-gate }
488*7c478bd9Sstevel@tonic-gate }
489*7c478bd9Sstevel@tonic-gate lastinblk -= (numwilluse - 8);
490*7c478bd9Sstevel@tonic-gate iteminfo.e.offset = lastinblk;
491*7c478bd9Sstevel@tonic-gate iteminfo.e.size = (char)len;
492*7c478bd9Sstevel@tonic-gate iteminfo.e.space = 0;
493*7c478bd9Sstevel@tonic-gate iteminfo.e.post = numpost;
494*7c478bd9Sstevel@tonic-gate (void) strncpy(logicalblk.chrblk + lastinblk, thisterm, len);
495*7c478bd9Sstevel@tonic-gate amtused += numwilluse;
496*7c478bd9Sstevel@tonic-gate logicalblk.invblk[(lastinblk/sizeof (long))+wdlen] = nextpost;
497*7c478bd9Sstevel@tonic-gate if ((i = postptr - POST) > 0) {
498*7c478bd9Sstevel@tonic-gate if (fwrite((char *)POST, sizeof (POSTING), i, fpost) == 0) {
499*7c478bd9Sstevel@tonic-gate invcannotwrite(postingfile);
500*7c478bd9Sstevel@tonic-gate return (0);
501*7c478bd9Sstevel@tonic-gate }
502*7c478bd9Sstevel@tonic-gate nextpost += i * sizeof (POSTING);
503*7c478bd9Sstevel@tonic-gate }
504*7c478bd9Sstevel@tonic-gate logicalblk.invblk[3+2*numinvitems++] = iteminfo.packword[0];
505*7c478bd9Sstevel@tonic-gate logicalblk.invblk[2+2*numinvitems] = iteminfo.packword[1];
506*7c478bd9Sstevel@tonic-gate return (1);
507*7c478bd9Sstevel@tonic-gate }
508*7c478bd9Sstevel@tonic-gate
509*7c478bd9Sstevel@tonic-gate static void
swap_ints(void * p,size_t sz)510*7c478bd9Sstevel@tonic-gate swap_ints(void *p, size_t sz)
511*7c478bd9Sstevel@tonic-gate {
512*7c478bd9Sstevel@tonic-gate uint32_t *s;
513*7c478bd9Sstevel@tonic-gate uint32_t *e = (uint32_t *)p + (sz / sizeof (uint32_t));
514*7c478bd9Sstevel@tonic-gate
515*7c478bd9Sstevel@tonic-gate for (s = p; s < e; s++)
516*7c478bd9Sstevel@tonic-gate *s = BSWAP_32(*s);
517*7c478bd9Sstevel@tonic-gate }
518*7c478bd9Sstevel@tonic-gate
519*7c478bd9Sstevel@tonic-gate static void
write_param(INVCONTROL * invcntl)520*7c478bd9Sstevel@tonic-gate write_param(INVCONTROL *invcntl)
521*7c478bd9Sstevel@tonic-gate {
522*7c478bd9Sstevel@tonic-gate if (invcntl->swap)
523*7c478bd9Sstevel@tonic-gate swap_ints(&invcntl->param, sizeof (invcntl->param));
524*7c478bd9Sstevel@tonic-gate
525*7c478bd9Sstevel@tonic-gate rewind(invcntl->invfile);
526*7c478bd9Sstevel@tonic-gate (void) fwrite((char *)&invcntl->param, sizeof (invcntl->param), 1,
527*7c478bd9Sstevel@tonic-gate invcntl->invfile);
528*7c478bd9Sstevel@tonic-gate
529*7c478bd9Sstevel@tonic-gate if (invcntl->swap)
530*7c478bd9Sstevel@tonic-gate swap_ints(&invcntl->param, sizeof (invcntl->param));
531*7c478bd9Sstevel@tonic-gate }
532*7c478bd9Sstevel@tonic-gate
533*7c478bd9Sstevel@tonic-gate static void
read_superfinger(INVCONTROL * invcntl)534*7c478bd9Sstevel@tonic-gate read_superfinger(INVCONTROL *invcntl)
535*7c478bd9Sstevel@tonic-gate {
536*7c478bd9Sstevel@tonic-gate size_t count;
537*7c478bd9Sstevel@tonic-gate
538*7c478bd9Sstevel@tonic-gate (void) fseek(invcntl->invfile, invcntl->param.startbyte, SEEK_SET);
539*7c478bd9Sstevel@tonic-gate (void) fread(invcntl->iindex, (int)invcntl->param.supsize,
540*7c478bd9Sstevel@tonic-gate 1, invcntl->invfile);
541*7c478bd9Sstevel@tonic-gate
542*7c478bd9Sstevel@tonic-gate if (invcntl->swap) {
543*7c478bd9Sstevel@tonic-gate /*
544*7c478bd9Sstevel@tonic-gate * The superfinger consists of a count, followed by
545*7c478bd9Sstevel@tonic-gate * count offsets, followed by a string table (which
546*7c478bd9Sstevel@tonic-gate * the offsets reference).
547*7c478bd9Sstevel@tonic-gate *
548*7c478bd9Sstevel@tonic-gate * We need to swap the count and the offsets.
549*7c478bd9Sstevel@tonic-gate */
550*7c478bd9Sstevel@tonic-gate count = 1 + BSWAP_32(*(uint32_t *)invcntl->iindex);
551*7c478bd9Sstevel@tonic-gate swap_ints(invcntl->iindex, count * sizeof (unsigned long));
552*7c478bd9Sstevel@tonic-gate }
553*7c478bd9Sstevel@tonic-gate }
554*7c478bd9Sstevel@tonic-gate
555*7c478bd9Sstevel@tonic-gate static void
read_logblock(INVCONTROL * invcntl,int block)556*7c478bd9Sstevel@tonic-gate read_logblock(INVCONTROL *invcntl, int block)
557*7c478bd9Sstevel@tonic-gate {
558*7c478bd9Sstevel@tonic-gate /* note always fetch it if the file is busy */
559*7c478bd9Sstevel@tonic-gate if ((block != invcntl->numblk) ||
560*7c478bd9Sstevel@tonic-gate (invcntl->param.filestat >= INVBUSY)) {
561*7c478bd9Sstevel@tonic-gate (void) fseek(invcntl->invfile,
562*7c478bd9Sstevel@tonic-gate (block * invcntl->param.sizeblk) + invcntl->param.cntlsize,
563*7c478bd9Sstevel@tonic-gate SEEK_SET);
564*7c478bd9Sstevel@tonic-gate invcntl->numblk = block;
565*7c478bd9Sstevel@tonic-gate (void) fread(invcntl->logblk, (int)invcntl->param.sizeblk,
566*7c478bd9Sstevel@tonic-gate 1, invcntl->invfile);
567*7c478bd9Sstevel@tonic-gate
568*7c478bd9Sstevel@tonic-gate if (invcntl->swap) {
569*7c478bd9Sstevel@tonic-gate size_t count;
570*7c478bd9Sstevel@tonic-gate ENTRY *ecur, *eend;
571*7c478bd9Sstevel@tonic-gate uint32_t *postptr;
572*7c478bd9Sstevel@tonic-gate
573*7c478bd9Sstevel@tonic-gate /*
574*7c478bd9Sstevel@tonic-gate * A logblock consists of a count, a next block id,
575*7c478bd9Sstevel@tonic-gate * and a previous block id, followed by count
576*7c478bd9Sstevel@tonic-gate * ENTRYs, followed by alternating strings and
577*7c478bd9Sstevel@tonic-gate * offsets.
578*7c478bd9Sstevel@tonic-gate */
579*7c478bd9Sstevel@tonic-gate swap_ints(invcntl->logblk, 3 * sizeof (unsigned long));
580*7c478bd9Sstevel@tonic-gate
581*7c478bd9Sstevel@tonic-gate count = *(uint32_t *)invcntl->logblk;
582*7c478bd9Sstevel@tonic-gate
583*7c478bd9Sstevel@tonic-gate ecur = (ENTRY *)((uint32_t *)invcntl->logblk + 3);
584*7c478bd9Sstevel@tonic-gate eend = ecur + count;
585*7c478bd9Sstevel@tonic-gate
586*7c478bd9Sstevel@tonic-gate for (; ecur < eend; ecur++) {
587*7c478bd9Sstevel@tonic-gate ecur->offset = BSWAP_16(ecur->offset);
588*7c478bd9Sstevel@tonic-gate ecur->post = BSWAP_32(ecur->post);
589*7c478bd9Sstevel@tonic-gate
590*7c478bd9Sstevel@tonic-gate /*
591*7c478bd9Sstevel@tonic-gate * After the string is the posting offset.
592*7c478bd9Sstevel@tonic-gate */
593*7c478bd9Sstevel@tonic-gate postptr = (uint32_t *)(invcntl->logblk +
594*7c478bd9Sstevel@tonic-gate ecur->offset +
595*7c478bd9Sstevel@tonic-gate P2ROUNDUP(ecur->size, sizeof (long)));
596*7c478bd9Sstevel@tonic-gate
597*7c478bd9Sstevel@tonic-gate *postptr = BSWAP_32(*postptr);
598*7c478bd9Sstevel@tonic-gate }
599*7c478bd9Sstevel@tonic-gate }
600*7c478bd9Sstevel@tonic-gate }
601*7c478bd9Sstevel@tonic-gate }
602*7c478bd9Sstevel@tonic-gate
603*7c478bd9Sstevel@tonic-gate void
read_next_posting(INVCONTROL * invcntl,POSTING * posting)604*7c478bd9Sstevel@tonic-gate read_next_posting(INVCONTROL *invcntl, POSTING *posting)
605*7c478bd9Sstevel@tonic-gate {
606*7c478bd9Sstevel@tonic-gate (void) fread((char *)posting, sizeof (*posting), 1, invcntl->postfile);
607*7c478bd9Sstevel@tonic-gate if (invcntl->swap) {
608*7c478bd9Sstevel@tonic-gate posting->lineoffset = BSWAP_32(posting->lineoffset);
609*7c478bd9Sstevel@tonic-gate posting->fcnoffset = BSWAP_32(posting->fcnoffset);
610*7c478bd9Sstevel@tonic-gate /*
611*7c478bd9Sstevel@tonic-gate * fileindex is a 24-bit field, so shift it before swapping
612*7c478bd9Sstevel@tonic-gate */
613*7c478bd9Sstevel@tonic-gate posting->fileindex = BSWAP_32(posting->fileindex << 8);
614*7c478bd9Sstevel@tonic-gate }
615*7c478bd9Sstevel@tonic-gate }
616*7c478bd9Sstevel@tonic-gate
617*7c478bd9Sstevel@tonic-gate int
invopen(INVCONTROL * invcntl,char * invname,char * invpost,int stat)618*7c478bd9Sstevel@tonic-gate invopen(INVCONTROL *invcntl, char *invname, char *invpost, int stat)
619*7c478bd9Sstevel@tonic-gate {
620*7c478bd9Sstevel@tonic-gate int read_index;
621*7c478bd9Sstevel@tonic-gate
622*7c478bd9Sstevel@tonic-gate if ((invcntl->invfile = vpfopen(invname,
623*7c478bd9Sstevel@tonic-gate ((stat == 0) ? FREAD : FREADP))) == NULL) {
624*7c478bd9Sstevel@tonic-gate invcannotopen(invname);
625*7c478bd9Sstevel@tonic-gate return (-1);
626*7c478bd9Sstevel@tonic-gate }
627*7c478bd9Sstevel@tonic-gate if (fread((char *)&invcntl->param, sizeof (invcntl->param), 1,
628*7c478bd9Sstevel@tonic-gate invcntl->invfile) == 0) {
629*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "%s: empty inverted file\n", argv0);
630*7c478bd9Sstevel@tonic-gate goto closeinv;
631*7c478bd9Sstevel@tonic-gate }
632*7c478bd9Sstevel@tonic-gate if (invcntl->param.version != VERSION &&
633*7c478bd9Sstevel@tonic-gate BSWAP_32(invcntl->param.version) != VERSION) {
634*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr,
635*7c478bd9Sstevel@tonic-gate "%s: cannot read old index format; use -U option to "
636*7c478bd9Sstevel@tonic-gate "force database to rebuild\n", argv0);
637*7c478bd9Sstevel@tonic-gate goto closeinv;
638*7c478bd9Sstevel@tonic-gate }
639*7c478bd9Sstevel@tonic-gate invcntl->swap = (invcntl->param.version != VERSION);
640*7c478bd9Sstevel@tonic-gate if (invcntl->swap)
641*7c478bd9Sstevel@tonic-gate swap_ints(&invcntl->param, sizeof (invcntl->param));
642*7c478bd9Sstevel@tonic-gate
643*7c478bd9Sstevel@tonic-gate if (stat == 0 && invcntl->param.filestat == INVALONE) {
644*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "%s: inverted file is locked\n", argv0);
645*7c478bd9Sstevel@tonic-gate goto closeinv;
646*7c478bd9Sstevel@tonic-gate }
647*7c478bd9Sstevel@tonic-gate if ((invcntl->postfile = vpfopen(invpost,
648*7c478bd9Sstevel@tonic-gate ((stat == 0) ? FREAD : FREADP))) == NULL) {
649*7c478bd9Sstevel@tonic-gate invcannotopen(invpost);
650*7c478bd9Sstevel@tonic-gate goto closeinv;
651*7c478bd9Sstevel@tonic-gate }
652*7c478bd9Sstevel@tonic-gate /* allocate core for a logical block */
653*7c478bd9Sstevel@tonic-gate if ((invcntl->logblk = malloc(invcntl->param.sizeblk)) == NULL) {
654*7c478bd9Sstevel@tonic-gate invcannotalloc((unsigned)invcntl->param.sizeblk);
655*7c478bd9Sstevel@tonic-gate goto closeboth;
656*7c478bd9Sstevel@tonic-gate }
657*7c478bd9Sstevel@tonic-gate /* allocate for and read in superfinger */
658*7c478bd9Sstevel@tonic-gate read_index = 1;
659*7c478bd9Sstevel@tonic-gate invcntl->iindex = NULL;
660*7c478bd9Sstevel@tonic-gate #if SHARE
661*7c478bd9Sstevel@tonic-gate if (invcntl->param.share == 1) {
662*7c478bd9Sstevel@tonic-gate key_t ftok(), shm_key;
663*7c478bd9Sstevel@tonic-gate struct shmid_ds shm_buf;
664*7c478bd9Sstevel@tonic-gate char *shmat();
665*7c478bd9Sstevel@tonic-gate int shm_id;
666*7c478bd9Sstevel@tonic-gate
667*7c478bd9Sstevel@tonic-gate /* see if the shared segment exists */
668*7c478bd9Sstevel@tonic-gate shm_key = ftok(invname, 2);
669*7c478bd9Sstevel@tonic-gate shm_id = shmget(shm_key, 0, 0);
670*7c478bd9Sstevel@tonic-gate /*
671*7c478bd9Sstevel@tonic-gate * Failure simply means (hopefully) that segment doesn't
672*7c478bd9Sstevel@tonic-gate * exist
673*7c478bd9Sstevel@tonic-gate */
674*7c478bd9Sstevel@tonic-gate if (shm_id == -1) {
675*7c478bd9Sstevel@tonic-gate /*
676*7c478bd9Sstevel@tonic-gate * Have to give general write permission due to AMdahl
677*7c478bd9Sstevel@tonic-gate * not having protected segments
678*7c478bd9Sstevel@tonic-gate */
679*7c478bd9Sstevel@tonic-gate shm_id = shmget(shm_key,
680*7c478bd9Sstevel@tonic-gate invcntl->param.supsize + sizeof (long),
681*7c478bd9Sstevel@tonic-gate IPC_CREAT | 0666);
682*7c478bd9Sstevel@tonic-gate if (shm_id == -1)
683*7c478bd9Sstevel@tonic-gate perror("Could not create shared "
684*7c478bd9Sstevel@tonic-gate "memory segment");
685*7c478bd9Sstevel@tonic-gate } else
686*7c478bd9Sstevel@tonic-gate read_index = 0;
687*7c478bd9Sstevel@tonic-gate
688*7c478bd9Sstevel@tonic-gate if (shm_id != -1) {
689*7c478bd9Sstevel@tonic-gate invcntl->iindex = shmat(shm_id, 0,
690*7c478bd9Sstevel@tonic-gate ((read_index) ? 0 : SHM_RDONLY));
691*7c478bd9Sstevel@tonic-gate if (invcntl->iindex == (char *)ERR) {
692*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr,
693*7c478bd9Sstevel@tonic-gate "%s: shared memory link failed\n", argv0);
694*7c478bd9Sstevel@tonic-gate invcntl->iindex = NULL;
695*7c478bd9Sstevel@tonic-gate read_index = 1;
696*7c478bd9Sstevel@tonic-gate }
697*7c478bd9Sstevel@tonic-gate }
698*7c478bd9Sstevel@tonic-gate }
699*7c478bd9Sstevel@tonic-gate #endif
700*7c478bd9Sstevel@tonic-gate if (invcntl->iindex == NULL)
701*7c478bd9Sstevel@tonic-gate invcntl->iindex = malloc(invcntl->param.supsize + 16);
702*7c478bd9Sstevel@tonic-gate if (invcntl->iindex == NULL) {
703*7c478bd9Sstevel@tonic-gate invcannotalloc((unsigned)invcntl->param.supsize);
704*7c478bd9Sstevel@tonic-gate free(invcntl->logblk);
705*7c478bd9Sstevel@tonic-gate goto closeboth;
706*7c478bd9Sstevel@tonic-gate }
707*7c478bd9Sstevel@tonic-gate if (read_index) {
708*7c478bd9Sstevel@tonic-gate read_superfinger(invcntl);
709*7c478bd9Sstevel@tonic-gate }
710*7c478bd9Sstevel@tonic-gate invcntl->numblk = -1;
711*7c478bd9Sstevel@tonic-gate if (boolready() == -1) {
712*7c478bd9Sstevel@tonic-gate closeboth:
713*7c478bd9Sstevel@tonic-gate (void) fclose(invcntl->postfile);
714*7c478bd9Sstevel@tonic-gate closeinv:
715*7c478bd9Sstevel@tonic-gate (void) fclose(invcntl->invfile);
716*7c478bd9Sstevel@tonic-gate return (-1);
717*7c478bd9Sstevel@tonic-gate }
718*7c478bd9Sstevel@tonic-gate /* write back out the control block if anything changed */
719*7c478bd9Sstevel@tonic-gate invcntl->param.filestat = stat;
720*7c478bd9Sstevel@tonic-gate if (stat > invcntl->param.filestat)
721*7c478bd9Sstevel@tonic-gate write_param(invcntl);
722*7c478bd9Sstevel@tonic-gate return (1);
723*7c478bd9Sstevel@tonic-gate }
724*7c478bd9Sstevel@tonic-gate
725*7c478bd9Sstevel@tonic-gate /* invclose must be called to wrap things up and deallocate core */
726*7c478bd9Sstevel@tonic-gate void
invclose(INVCONTROL * invcntl)727*7c478bd9Sstevel@tonic-gate invclose(INVCONTROL *invcntl)
728*7c478bd9Sstevel@tonic-gate {
729*7c478bd9Sstevel@tonic-gate /* write out the control block in case anything changed */
730*7c478bd9Sstevel@tonic-gate if (invcntl->param.filestat > 0) {
731*7c478bd9Sstevel@tonic-gate invcntl->param.filestat = 0;
732*7c478bd9Sstevel@tonic-gate write_param(invcntl);
733*7c478bd9Sstevel@tonic-gate }
734*7c478bd9Sstevel@tonic-gate (void) fclose(invcntl->invfile);
735*7c478bd9Sstevel@tonic-gate (void) fclose(invcntl->postfile);
736*7c478bd9Sstevel@tonic-gate #if SHARE
737*7c478bd9Sstevel@tonic-gate if (invcntl->param.share > 0) {
738*7c478bd9Sstevel@tonic-gate shmdt(invcntl->iindex);
739*7c478bd9Sstevel@tonic-gate invcntl->iindex = NULL;
740*7c478bd9Sstevel@tonic-gate }
741*7c478bd9Sstevel@tonic-gate #endif
742*7c478bd9Sstevel@tonic-gate if (invcntl->iindex != NULL)
743*7c478bd9Sstevel@tonic-gate free(invcntl->iindex);
744*7c478bd9Sstevel@tonic-gate free(invcntl->logblk);
745*7c478bd9Sstevel@tonic-gate }
746*7c478bd9Sstevel@tonic-gate
747*7c478bd9Sstevel@tonic-gate /* invstep steps the inverted file forward one item */
748*7c478bd9Sstevel@tonic-gate void
invstep(INVCONTROL * invcntl)749*7c478bd9Sstevel@tonic-gate invstep(INVCONTROL *invcntl)
750*7c478bd9Sstevel@tonic-gate {
751*7c478bd9Sstevel@tonic-gate if (invcntl->keypnt < (*(int *)invcntl->logblk - 1)) {
752*7c478bd9Sstevel@tonic-gate invcntl->keypnt++;
753*7c478bd9Sstevel@tonic-gate return;
754*7c478bd9Sstevel@tonic-gate }
755*7c478bd9Sstevel@tonic-gate
756*7c478bd9Sstevel@tonic-gate /* move forward a block else wrap */
757*7c478bd9Sstevel@tonic-gate read_logblock(invcntl, *(int *)(invcntl->logblk + sizeof (long)));
758*7c478bd9Sstevel@tonic-gate
759*7c478bd9Sstevel@tonic-gate invcntl->keypnt = 0;
760*7c478bd9Sstevel@tonic-gate }
761*7c478bd9Sstevel@tonic-gate
762*7c478bd9Sstevel@tonic-gate /* invforward moves forward one term in the inverted file */
763*7c478bd9Sstevel@tonic-gate int
invforward(INVCONTROL * invcntl)764*7c478bd9Sstevel@tonic-gate invforward(INVCONTROL *invcntl)
765*7c478bd9Sstevel@tonic-gate {
766*7c478bd9Sstevel@tonic-gate invstep(invcntl);
767*7c478bd9Sstevel@tonic-gate /* skip things with 0 postings */
768*7c478bd9Sstevel@tonic-gate while (((ENTRY *)(invcntl->logblk + 12) + invcntl->keypnt)->post == 0) {
769*7c478bd9Sstevel@tonic-gate invstep(invcntl);
770*7c478bd9Sstevel@tonic-gate }
771*7c478bd9Sstevel@tonic-gate /* Check for having wrapped - reached start of inverted file! */
772*7c478bd9Sstevel@tonic-gate if ((invcntl->numblk == 0) && (invcntl->keypnt == 0))
773*7c478bd9Sstevel@tonic-gate return (0);
774*7c478bd9Sstevel@tonic-gate return (1);
775*7c478bd9Sstevel@tonic-gate }
776*7c478bd9Sstevel@tonic-gate
777*7c478bd9Sstevel@tonic-gate /* invterm gets the present term from the present logical block */
778*7c478bd9Sstevel@tonic-gate int
invterm(INVCONTROL * invcntl,char * term)779*7c478bd9Sstevel@tonic-gate invterm(INVCONTROL *invcntl, char *term)
780*7c478bd9Sstevel@tonic-gate {
781*7c478bd9Sstevel@tonic-gate ENTRY * entryptr;
782*7c478bd9Sstevel@tonic-gate
783*7c478bd9Sstevel@tonic-gate entryptr = (ENTRY *)(invcntl->logblk + 12) + invcntl->keypnt;
784*7c478bd9Sstevel@tonic-gate (void) strncpy(term, entryptr->offset + invcntl->logblk,
785*7c478bd9Sstevel@tonic-gate (int)entryptr->size);
786*7c478bd9Sstevel@tonic-gate *(term + entryptr->size) = '\0';
787*7c478bd9Sstevel@tonic-gate return (entryptr->post);
788*7c478bd9Sstevel@tonic-gate }
789*7c478bd9Sstevel@tonic-gate
790*7c478bd9Sstevel@tonic-gate /* invfind searches for an individual item in the inverted file */
791*7c478bd9Sstevel@tonic-gate long
invfind(INVCONTROL * invcntl,char * searchterm)792*7c478bd9Sstevel@tonic-gate invfind(INVCONTROL *invcntl, char *searchterm)
793*7c478bd9Sstevel@tonic-gate {
794*7c478bd9Sstevel@tonic-gate int imid, ilow, ihigh;
795*7c478bd9Sstevel@tonic-gate long num;
796*7c478bd9Sstevel@tonic-gate int i;
797*7c478bd9Sstevel@tonic-gate unsigned long *intptr, *intptr2;
798*7c478bd9Sstevel@tonic-gate ENTRY *entryptr;
799*7c478bd9Sstevel@tonic-gate
800*7c478bd9Sstevel@tonic-gate /* make sure it is initialized via invready */
801*7c478bd9Sstevel@tonic-gate if (invcntl->invfile == 0)
802*7c478bd9Sstevel@tonic-gate return (-1L);
803*7c478bd9Sstevel@tonic-gate
804*7c478bd9Sstevel@tonic-gate /* now search for the appropriate finger block */
805*7c478bd9Sstevel@tonic-gate intptr = (unsigned long *)invcntl->iindex;
806*7c478bd9Sstevel@tonic-gate
807*7c478bd9Sstevel@tonic-gate ilow = 0;
808*7c478bd9Sstevel@tonic-gate ihigh = *intptr++ - 1;
809*7c478bd9Sstevel@tonic-gate while (ilow <= ihigh) {
810*7c478bd9Sstevel@tonic-gate imid = (ilow + ihigh) / 2;
811*7c478bd9Sstevel@tonic-gate intptr2 = intptr + imid;
812*7c478bd9Sstevel@tonic-gate i = strcmp(searchterm, (invcntl->iindex + *intptr2));
813*7c478bd9Sstevel@tonic-gate if (i < 0)
814*7c478bd9Sstevel@tonic-gate ihigh = imid - 1;
815*7c478bd9Sstevel@tonic-gate else if (i > 0)
816*7c478bd9Sstevel@tonic-gate ilow = ++imid;
817*7c478bd9Sstevel@tonic-gate else {
818*7c478bd9Sstevel@tonic-gate ilow = imid + 1;
819*7c478bd9Sstevel@tonic-gate break;
820*7c478bd9Sstevel@tonic-gate }
821*7c478bd9Sstevel@tonic-gate }
822*7c478bd9Sstevel@tonic-gate /* be careful about case where searchterm is after last in this block */
823*7c478bd9Sstevel@tonic-gate imid = (ilow) ? ilow - 1 : 0;
824*7c478bd9Sstevel@tonic-gate
825*7c478bd9Sstevel@tonic-gate /* fetch the appropriate logical block if not in core */
826*7c478bd9Sstevel@tonic-gate read_logblock(invcntl, imid);
827*7c478bd9Sstevel@tonic-gate
828*7c478bd9Sstevel@tonic-gate srch_ext:
829*7c478bd9Sstevel@tonic-gate /* now find the term in this block. tricky this */
830*7c478bd9Sstevel@tonic-gate intptr = (unsigned long *)invcntl->logblk;
831*7c478bd9Sstevel@tonic-gate
832*7c478bd9Sstevel@tonic-gate ilow = 0;
833*7c478bd9Sstevel@tonic-gate ihigh = *intptr - 1;
834*7c478bd9Sstevel@tonic-gate intptr += 3;
835*7c478bd9Sstevel@tonic-gate num = 0;
836*7c478bd9Sstevel@tonic-gate while (ilow <= ihigh) {
837*7c478bd9Sstevel@tonic-gate imid = (ilow + ihigh) / 2;
838*7c478bd9Sstevel@tonic-gate entryptr = (ENTRY *)intptr + imid;
839*7c478bd9Sstevel@tonic-gate i = strncmp(searchterm, (invcntl->logblk + entryptr->offset),
840*7c478bd9Sstevel@tonic-gate (int)entryptr->size);
841*7c478bd9Sstevel@tonic-gate if (i == 0)
842*7c478bd9Sstevel@tonic-gate i = strlen(searchterm) - entryptr->size;
843*7c478bd9Sstevel@tonic-gate if (i < 0)
844*7c478bd9Sstevel@tonic-gate ihigh = imid - 1;
845*7c478bd9Sstevel@tonic-gate else if (i > 0)
846*7c478bd9Sstevel@tonic-gate ilow = ++imid;
847*7c478bd9Sstevel@tonic-gate else {
848*7c478bd9Sstevel@tonic-gate num = entryptr->post;
849*7c478bd9Sstevel@tonic-gate break;
850*7c478bd9Sstevel@tonic-gate }
851*7c478bd9Sstevel@tonic-gate }
852*7c478bd9Sstevel@tonic-gate /* be careful about case where searchterm is after last in this block */
853*7c478bd9Sstevel@tonic-gate if (imid >= *(long *)invcntl->logblk) {
854*7c478bd9Sstevel@tonic-gate invcntl->keypnt = *(long *)invcntl->logblk;
855*7c478bd9Sstevel@tonic-gate invstep(invcntl);
856*7c478bd9Sstevel@tonic-gate /* note if this happens the term could be in extended block */
857*7c478bd9Sstevel@tonic-gate if (invcntl->param.startbyte <
858*7c478bd9Sstevel@tonic-gate invcntl->numblk * invcntl->param.sizeblk)
859*7c478bd9Sstevel@tonic-gate goto srch_ext;
860*7c478bd9Sstevel@tonic-gate } else
861*7c478bd9Sstevel@tonic-gate invcntl->keypnt = imid;
862*7c478bd9Sstevel@tonic-gate return (num);
863*7c478bd9Sstevel@tonic-gate }
864*7c478bd9Sstevel@tonic-gate
865*7c478bd9Sstevel@tonic-gate #if DEBUG
866*7c478bd9Sstevel@tonic-gate
867*7c478bd9Sstevel@tonic-gate /* invdump dumps the block the term parameter is in */
868*7c478bd9Sstevel@tonic-gate void
invdump(INVCONTROL * invcntl,char * term)869*7c478bd9Sstevel@tonic-gate invdump(INVCONTROL *invcntl, char *term)
870*7c478bd9Sstevel@tonic-gate {
871*7c478bd9Sstevel@tonic-gate long i, j, n, *longptr;
872*7c478bd9Sstevel@tonic-gate ENTRY * entryptr;
873*7c478bd9Sstevel@tonic-gate char temp[512], *ptr;
874*7c478bd9Sstevel@tonic-gate
875*7c478bd9Sstevel@tonic-gate /* dump superindex if term is "-" */
876*7c478bd9Sstevel@tonic-gate if (*term == '-') {
877*7c478bd9Sstevel@tonic-gate j = atoi(term + 1);
878*7c478bd9Sstevel@tonic-gate longptr = (long *)invcntl->iindex;
879*7c478bd9Sstevel@tonic-gate n = *longptr++;
880*7c478bd9Sstevel@tonic-gate (void) printf("Superindex dump, num blocks=%ld\n", n);
881*7c478bd9Sstevel@tonic-gate longptr += j;
882*7c478bd9Sstevel@tonic-gate while ((longptr <= ((long *)invcntl->iindex) + n) &&
883*7c478bd9Sstevel@tonic-gate invbreak == 0) {
884*7c478bd9Sstevel@tonic-gate (void) printf("%2ld %6ld %s\n", j++, *longptr,
885*7c478bd9Sstevel@tonic-gate invcntl->iindex + *longptr);
886*7c478bd9Sstevel@tonic-gate longptr++;
887*7c478bd9Sstevel@tonic-gate }
888*7c478bd9Sstevel@tonic-gate return;
889*7c478bd9Sstevel@tonic-gate } else if (*term == '#') {
890*7c478bd9Sstevel@tonic-gate j = atoi(term + 1);
891*7c478bd9Sstevel@tonic-gate /* fetch the appropriate logical block */
892*7c478bd9Sstevel@tonic-gate read_logblock(invcntl, j);
893*7c478bd9Sstevel@tonic-gate } else
894*7c478bd9Sstevel@tonic-gate i = abs((int)invfind(invcntl, term));
895*7c478bd9Sstevel@tonic-gate longptr = (long *)invcntl->logblk;
896*7c478bd9Sstevel@tonic-gate n = *longptr++;
897*7c478bd9Sstevel@tonic-gate (void) printf("Entry term to invdump=%s, postings=%ld, "
898*7c478bd9Sstevel@tonic-gate "forward ptr=%ld, back ptr=%ld\n", term, i, *(longptr),
899*7c478bd9Sstevel@tonic-gate *(longptr + 1));
900*7c478bd9Sstevel@tonic-gate entryptr = (ENTRY *)(invcntl->logblk + 12);
901*7c478bd9Sstevel@tonic-gate (void) printf("%ld terms in this block, block=%ld\n", n,
902*7c478bd9Sstevel@tonic-gate invcntl->numblk);
903*7c478bd9Sstevel@tonic-gate (void) printf("\tterm\t\t\tposts\tsize\toffset\tspace\t1st word\n");
904*7c478bd9Sstevel@tonic-gate for (j = 0; j < n && invbreak == 0; j++) {
905*7c478bd9Sstevel@tonic-gate ptr = invcntl->logblk + entryptr->offset;
906*7c478bd9Sstevel@tonic-gate (void) strncpy(temp, ptr, (int)entryptr->size);
907*7c478bd9Sstevel@tonic-gate temp[entryptr->size] = '\0';
908*7c478bd9Sstevel@tonic-gate ptr += (sizeof (long) *
909*7c478bd9Sstevel@tonic-gate (long)((entryptr->size +
910*7c478bd9Sstevel@tonic-gate (sizeof (long) - 1)) / sizeof (long)));
911*7c478bd9Sstevel@tonic-gate (void) printf("%2ld %-24s\t%5ld\t%3d\t%d\t%d\t%ld\n", j, temp,
912*7c478bd9Sstevel@tonic-gate entryptr->post, entryptr->size, entryptr->offset,
913*7c478bd9Sstevel@tonic-gate entryptr->space, *(long *)ptr);
914*7c478bd9Sstevel@tonic-gate entryptr++;
915*7c478bd9Sstevel@tonic-gate }
916*7c478bd9Sstevel@tonic-gate }
917*7c478bd9Sstevel@tonic-gate #endif
918*7c478bd9Sstevel@tonic-gate
919*7c478bd9Sstevel@tonic-gate static int
boolready(void)920*7c478bd9Sstevel@tonic-gate boolready(void)
921*7c478bd9Sstevel@tonic-gate {
922*7c478bd9Sstevel@tonic-gate numitems = 0;
923*7c478bd9Sstevel@tonic-gate if (item1 != NULL)
924*7c478bd9Sstevel@tonic-gate free(item1);
925*7c478bd9Sstevel@tonic-gate setsize1 = SETINC;
926*7c478bd9Sstevel@tonic-gate if ((item1 = (POSTING *)malloc(SETINC * sizeof (POSTING))) == NULL) {
927*7c478bd9Sstevel@tonic-gate invcannotalloc(SETINC);
928*7c478bd9Sstevel@tonic-gate return (-1);
929*7c478bd9Sstevel@tonic-gate }
930*7c478bd9Sstevel@tonic-gate if (item2 != NULL)
931*7c478bd9Sstevel@tonic-gate free(item2);
932*7c478bd9Sstevel@tonic-gate setsize2 = SETINC;
933*7c478bd9Sstevel@tonic-gate if ((item2 = (POSTING *)malloc(SETINC * sizeof (POSTING))) == NULL) {
934*7c478bd9Sstevel@tonic-gate invcannotalloc(SETINC);
935*7c478bd9Sstevel@tonic-gate return (-1);
936*7c478bd9Sstevel@tonic-gate }
937*7c478bd9Sstevel@tonic-gate item = item1;
938*7c478bd9Sstevel@tonic-gate enditem = item;
939*7c478bd9Sstevel@tonic-gate return (0);
940*7c478bd9Sstevel@tonic-gate }
941*7c478bd9Sstevel@tonic-gate
942*7c478bd9Sstevel@tonic-gate void
boolclear(void)943*7c478bd9Sstevel@tonic-gate boolclear(void)
944*7c478bd9Sstevel@tonic-gate {
945*7c478bd9Sstevel@tonic-gate numitems = 0;
946*7c478bd9Sstevel@tonic-gate item = item1;
947*7c478bd9Sstevel@tonic-gate enditem = item;
948*7c478bd9Sstevel@tonic-gate }
949*7c478bd9Sstevel@tonic-gate
950*7c478bd9Sstevel@tonic-gate POSTING *
boolfile(INVCONTROL * invcntl,long * num,int bool)951*7c478bd9Sstevel@tonic-gate boolfile(INVCONTROL *invcntl, long *num, int bool)
952*7c478bd9Sstevel@tonic-gate {
953*7c478bd9Sstevel@tonic-gate ENTRY *entryptr;
954*7c478bd9Sstevel@tonic-gate FILE *file;
955*7c478bd9Sstevel@tonic-gate char *ptr;
956*7c478bd9Sstevel@tonic-gate unsigned long *ptr2;
957*7c478bd9Sstevel@tonic-gate POSTING *newitem;
958*7c478bd9Sstevel@tonic-gate POSTING posting;
959*7c478bd9Sstevel@tonic-gate unsigned u;
960*7c478bd9Sstevel@tonic-gate POSTING *newsetp, *set1p;
961*7c478bd9Sstevel@tonic-gate long newsetc, set1c, set2c;
962*7c478bd9Sstevel@tonic-gate
963*7c478bd9Sstevel@tonic-gate entryptr = (ENTRY *) (invcntl->logblk + 12) + invcntl->keypnt;
964*7c478bd9Sstevel@tonic-gate ptr = invcntl->logblk + entryptr->offset;
965*7c478bd9Sstevel@tonic-gate ptr2 = ((unsigned long *)ptr) +
966*7c478bd9Sstevel@tonic-gate (entryptr->size + (sizeof (long) - 1)) / sizeof (long);
967*7c478bd9Sstevel@tonic-gate *num = entryptr->post;
968*7c478bd9Sstevel@tonic-gate switch (bool) {
969*7c478bd9Sstevel@tonic-gate case OR:
970*7c478bd9Sstevel@tonic-gate case NOT:
971*7c478bd9Sstevel@tonic-gate if (*num == 0) {
972*7c478bd9Sstevel@tonic-gate *num = numitems;
973*7c478bd9Sstevel@tonic-gate return (item);
974*7c478bd9Sstevel@tonic-gate }
975*7c478bd9Sstevel@tonic-gate }
976*7c478bd9Sstevel@tonic-gate /* make room for the new set */
977*7c478bd9Sstevel@tonic-gate u = 0;
978*7c478bd9Sstevel@tonic-gate switch (bool) {
979*7c478bd9Sstevel@tonic-gate case AND:
980*7c478bd9Sstevel@tonic-gate case NOT:
981*7c478bd9Sstevel@tonic-gate newsetp = set1p = item;
982*7c478bd9Sstevel@tonic-gate break;
983*7c478bd9Sstevel@tonic-gate
984*7c478bd9Sstevel@tonic-gate case OR:
985*7c478bd9Sstevel@tonic-gate u = enditem - item;
986*7c478bd9Sstevel@tonic-gate /* FALLTHROUGH */
987*7c478bd9Sstevel@tonic-gate case REVERSENOT:
988*7c478bd9Sstevel@tonic-gate u += *num;
989*7c478bd9Sstevel@tonic-gate if (item == item2) {
990*7c478bd9Sstevel@tonic-gate if (u > setsize1) {
991*7c478bd9Sstevel@tonic-gate u += SETINC;
992*7c478bd9Sstevel@tonic-gate if ((item1 = (POSTING *) realloc(item1,
993*7c478bd9Sstevel@tonic-gate u * sizeof (POSTING))) == NULL) {
994*7c478bd9Sstevel@tonic-gate goto cannotalloc;
995*7c478bd9Sstevel@tonic-gate }
996*7c478bd9Sstevel@tonic-gate setsize1 = u;
997*7c478bd9Sstevel@tonic-gate }
998*7c478bd9Sstevel@tonic-gate newitem = item1;
999*7c478bd9Sstevel@tonic-gate } else {
1000*7c478bd9Sstevel@tonic-gate if (u > setsize2) {
1001*7c478bd9Sstevel@tonic-gate u += SETINC;
1002*7c478bd9Sstevel@tonic-gate if ((item2 = (POSTING *)realloc(item2,
1003*7c478bd9Sstevel@tonic-gate u * sizeof (POSTING))) == NULL) {
1004*7c478bd9Sstevel@tonic-gate cannotalloc:
1005*7c478bd9Sstevel@tonic-gate invcannotalloc(u * sizeof (POSTING));
1006*7c478bd9Sstevel@tonic-gate (void) boolready();
1007*7c478bd9Sstevel@tonic-gate *num = -1;
1008*7c478bd9Sstevel@tonic-gate return (NULL);
1009*7c478bd9Sstevel@tonic-gate }
1010*7c478bd9Sstevel@tonic-gate setsize2 = u;
1011*7c478bd9Sstevel@tonic-gate }
1012*7c478bd9Sstevel@tonic-gate newitem = item2;
1013*7c478bd9Sstevel@tonic-gate }
1014*7c478bd9Sstevel@tonic-gate set1p = item;
1015*7c478bd9Sstevel@tonic-gate newsetp = newitem;
1016*7c478bd9Sstevel@tonic-gate }
1017*7c478bd9Sstevel@tonic-gate file = invcntl->postfile;
1018*7c478bd9Sstevel@tonic-gate (void) fseek(file, (long)*ptr2, SEEK_SET);
1019*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting);
1020*7c478bd9Sstevel@tonic-gate newsetc = 0;
1021*7c478bd9Sstevel@tonic-gate switch (bool) {
1022*7c478bd9Sstevel@tonic-gate case OR:
1023*7c478bd9Sstevel@tonic-gate /* while something in both sets */
1024*7c478bd9Sstevel@tonic-gate set1p = item;
1025*7c478bd9Sstevel@tonic-gate newsetp = newitem;
1026*7c478bd9Sstevel@tonic-gate for (set1c = 0, set2c = 0;
1027*7c478bd9Sstevel@tonic-gate set1c < numitems && set2c < *num; newsetc++) {
1028*7c478bd9Sstevel@tonic-gate if (set1p->lineoffset < posting.lineoffset) {
1029*7c478bd9Sstevel@tonic-gate *newsetp++ = *set1p++;
1030*7c478bd9Sstevel@tonic-gate set1c++;
1031*7c478bd9Sstevel@tonic-gate } else if (set1p->lineoffset > posting.lineoffset) {
1032*7c478bd9Sstevel@tonic-gate *newsetp++ = posting;
1033*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting);
1034*7c478bd9Sstevel@tonic-gate set2c++;
1035*7c478bd9Sstevel@tonic-gate } else if (set1p->type < posting.type) {
1036*7c478bd9Sstevel@tonic-gate *newsetp++ = *set1p++;
1037*7c478bd9Sstevel@tonic-gate set1c++;
1038*7c478bd9Sstevel@tonic-gate } else if (set1p->type > posting.type) {
1039*7c478bd9Sstevel@tonic-gate *newsetp++ = posting;
1040*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting);
1041*7c478bd9Sstevel@tonic-gate set2c++;
1042*7c478bd9Sstevel@tonic-gate } else { /* identical postings */
1043*7c478bd9Sstevel@tonic-gate *newsetp++ = *set1p++;
1044*7c478bd9Sstevel@tonic-gate set1c++;
1045*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting);
1046*7c478bd9Sstevel@tonic-gate set2c++;
1047*7c478bd9Sstevel@tonic-gate }
1048*7c478bd9Sstevel@tonic-gate }
1049*7c478bd9Sstevel@tonic-gate /* find out what ran out and move the rest in */
1050*7c478bd9Sstevel@tonic-gate if (set1c < numitems) {
1051*7c478bd9Sstevel@tonic-gate newsetc += numitems - set1c;
1052*7c478bd9Sstevel@tonic-gate while (set1c++ < numitems) {
1053*7c478bd9Sstevel@tonic-gate *newsetp++ = *set1p++;
1054*7c478bd9Sstevel@tonic-gate }
1055*7c478bd9Sstevel@tonic-gate } else {
1056*7c478bd9Sstevel@tonic-gate while (set2c++ < *num) {
1057*7c478bd9Sstevel@tonic-gate *newsetp++ = posting;
1058*7c478bd9Sstevel@tonic-gate newsetc++;
1059*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting);
1060*7c478bd9Sstevel@tonic-gate }
1061*7c478bd9Sstevel@tonic-gate }
1062*7c478bd9Sstevel@tonic-gate item = newitem;
1063*7c478bd9Sstevel@tonic-gate break; /* end of OR */
1064*7c478bd9Sstevel@tonic-gate #if 0
1065*7c478bd9Sstevel@tonic-gate case AND:
1066*7c478bd9Sstevel@tonic-gate set1c = 0;
1067*7c478bd9Sstevel@tonic-gate set2c = 0;
1068*7c478bd9Sstevel@tonic-gate while (set1c < numitems && set2c < *num) {
1069*7c478bd9Sstevel@tonic-gate if (set1p->lineoffset < posting.lineoffset) {
1070*7c478bd9Sstevel@tonic-gate set1p++;
1071*7c478bd9Sstevel@tonic-gate set1c++;
1072*7c478bd9Sstevel@tonic-gate } else if (set1p->lineoffset > posting.lineoffset) {
1073*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting);
1074*7c478bd9Sstevel@tonic-gate set2c++;
1075*7c478bd9Sstevel@tonic-gate } else if (set1p->type < posting.type) {
1076*7c478bd9Sstevel@tonic-gate *set1p++;
1077*7c478bd9Sstevel@tonic-gate set1c++;
1078*7c478bd9Sstevel@tonic-gate } else if (set1p->type > posting.type) {
1079*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting);
1080*7c478bd9Sstevel@tonic-gate set2c++;
1081*7c478bd9Sstevel@tonic-gate } else { /* identical postings */
1082*7c478bd9Sstevel@tonic-gate *newsetp++ = *set1p++;
1083*7c478bd9Sstevel@tonic-gate newsetc++;
1084*7c478bd9Sstevel@tonic-gate set1c++;
1085*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting);
1086*7c478bd9Sstevel@tonic-gate set2c++;
1087*7c478bd9Sstevel@tonic-gate }
1088*7c478bd9Sstevel@tonic-gate }
1089*7c478bd9Sstevel@tonic-gate break; /* end of AND */
1090*7c478bd9Sstevel@tonic-gate
1091*7c478bd9Sstevel@tonic-gate case NOT:
1092*7c478bd9Sstevel@tonic-gate set1c = 0;
1093*7c478bd9Sstevel@tonic-gate set2c = 0;
1094*7c478bd9Sstevel@tonic-gate while (set1c < numitems && set2c < *num) {
1095*7c478bd9Sstevel@tonic-gate if (set1p->lineoffset < posting.lineoffset) {
1096*7c478bd9Sstevel@tonic-gate *newsetp++ = *set1p++;
1097*7c478bd9Sstevel@tonic-gate newsetc++;
1098*7c478bd9Sstevel@tonic-gate set1c++;
1099*7c478bd9Sstevel@tonic-gate } else if (set1p->lineoffset > posting.lineoffset) {
1100*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting);
1101*7c478bd9Sstevel@tonic-gate set2c++;
1102*7c478bd9Sstevel@tonic-gate } else if (set1p->type < posting.type) {
1103*7c478bd9Sstevel@tonic-gate *newsetp++ = *set1p++;
1104*7c478bd9Sstevel@tonic-gate newsetc++;
1105*7c478bd9Sstevel@tonic-gate set1c++;
1106*7c478bd9Sstevel@tonic-gate } else if (set1p->type > posting.type) {
1107*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting);
1108*7c478bd9Sstevel@tonic-gate set2c++;
1109*7c478bd9Sstevel@tonic-gate } else { /* identical postings */
1110*7c478bd9Sstevel@tonic-gate set1c++;
1111*7c478bd9Sstevel@tonic-gate set1p++;
1112*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting);
1113*7c478bd9Sstevel@tonic-gate set2c++;
1114*7c478bd9Sstevel@tonic-gate }
1115*7c478bd9Sstevel@tonic-gate }
1116*7c478bd9Sstevel@tonic-gate newsetc += numitems - set1c;
1117*7c478bd9Sstevel@tonic-gate while (set1c++ < numitems) {
1118*7c478bd9Sstevel@tonic-gate *newsetp++ = *set1p++;
1119*7c478bd9Sstevel@tonic-gate }
1120*7c478bd9Sstevel@tonic-gate break; /* end of NOT */
1121*7c478bd9Sstevel@tonic-gate
1122*7c478bd9Sstevel@tonic-gate case REVERSENOT: /* core NOT incoming set */
1123*7c478bd9Sstevel@tonic-gate set1c = 0;
1124*7c478bd9Sstevel@tonic-gate set2c = 0;
1125*7c478bd9Sstevel@tonic-gate while (set1c < numitems && set2c < *num) {
1126*7c478bd9Sstevel@tonic-gate if (set1p->lineoffset < posting.lineoffset) {
1127*7c478bd9Sstevel@tonic-gate set1p++;
1128*7c478bd9Sstevel@tonic-gate set1c++;
1129*7c478bd9Sstevel@tonic-gate } else if (set1p->lineoffset > posting.lineoffset) {
1130*7c478bd9Sstevel@tonic-gate *newsetp++ = posting;
1131*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting);
1132*7c478bd9Sstevel@tonic-gate set2c++;
1133*7c478bd9Sstevel@tonic-gate } else if (set1p->type < posting.type) {
1134*7c478bd9Sstevel@tonic-gate set1p++;
1135*7c478bd9Sstevel@tonic-gate set1c++;
1136*7c478bd9Sstevel@tonic-gate } else if (set1p->type > posting.type) {
1137*7c478bd9Sstevel@tonic-gate *newsetp++ = posting;
1138*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting);
1139*7c478bd9Sstevel@tonic-gate set2c++;
1140*7c478bd9Sstevel@tonic-gate } else { /* identical postings */
1141*7c478bd9Sstevel@tonic-gate set1c++;
1142*7c478bd9Sstevel@tonic-gate set1p++;
1143*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting);
1144*7c478bd9Sstevel@tonic-gate set2c++;
1145*7c478bd9Sstevel@tonic-gate }
1146*7c478bd9Sstevel@tonic-gate }
1147*7c478bd9Sstevel@tonic-gate while (set2c++ < *num) {
1148*7c478bd9Sstevel@tonic-gate *newsetp++ = posting;
1149*7c478bd9Sstevel@tonic-gate newsetc++;
1150*7c478bd9Sstevel@tonic-gate read_next_posting(invcntl, &posting);
1151*7c478bd9Sstevel@tonic-gate }
1152*7c478bd9Sstevel@tonic-gate item = newitem;
1153*7c478bd9Sstevel@tonic-gate break; /* end of REVERSENOT */
1154*7c478bd9Sstevel@tonic-gate #endif
1155*7c478bd9Sstevel@tonic-gate }
1156*7c478bd9Sstevel@tonic-gate numitems = newsetc;
1157*7c478bd9Sstevel@tonic-gate *num = newsetc;
1158*7c478bd9Sstevel@tonic-gate enditem = (POSTING *)newsetp;
1159*7c478bd9Sstevel@tonic-gate return ((POSTING *)item);
1160*7c478bd9Sstevel@tonic-gate }
1161*7c478bd9Sstevel@tonic-gate
1162*7c478bd9Sstevel@tonic-gate #if 0
1163*7c478bd9Sstevel@tonic-gate POSTING *
1164*7c478bd9Sstevel@tonic-gate boolsave(int clear)
1165*7c478bd9Sstevel@tonic-gate {
1166*7c478bd9Sstevel@tonic-gate int i;
1167*7c478bd9Sstevel@tonic-gate POSTING *ptr;
1168*7c478bd9Sstevel@tonic-gate POSTING *oldstuff, *newstuff;
1169*7c478bd9Sstevel@tonic-gate
1170*7c478bd9Sstevel@tonic-gate if (numitems == 0) {
1171*7c478bd9Sstevel@tonic-gate if (clear)
1172*7c478bd9Sstevel@tonic-gate boolclear();
1173*7c478bd9Sstevel@tonic-gate return (NULL);
1174*7c478bd9Sstevel@tonic-gate }
1175*7c478bd9Sstevel@tonic-gate /*
1176*7c478bd9Sstevel@tonic-gate * if clear then give them what we have and use (void)
1177*7c478bd9Sstevel@tonic-gate * boolready to realloc
1178*7c478bd9Sstevel@tonic-gate */
1179*7c478bd9Sstevel@tonic-gate if (clear) {
1180*7c478bd9Sstevel@tonic-gate ptr = item;
1181*7c478bd9Sstevel@tonic-gate /* free up the space we didn't give them */
1182*7c478bd9Sstevel@tonic-gate if (item == item1)
1183*7c478bd9Sstevel@tonic-gate item1 = NULL;
1184*7c478bd9Sstevel@tonic-gate else
1185*7c478bd9Sstevel@tonic-gate item2 = NULL;
1186*7c478bd9Sstevel@tonic-gate (void) boolready();
1187*7c478bd9Sstevel@tonic-gate return (ptr);
1188*7c478bd9Sstevel@tonic-gate }
1189*7c478bd9Sstevel@tonic-gate i = (enditem - item) * sizeof (POSTING) + 100;
1190*7c478bd9Sstevel@tonic-gate if ((ptr = (POSTING *)malloc(i))r == NULL) {
1191*7c478bd9Sstevel@tonic-gate invcannotalloc(i);
1192*7c478bd9Sstevel@tonic-gate return (ptr);
1193*7c478bd9Sstevel@tonic-gate }
1194*7c478bd9Sstevel@tonic-gate /* move present set into place */
1195*7c478bd9Sstevel@tonic-gate oldstuff = item;
1196*7c478bd9Sstevel@tonic-gate newstuff = ptr;
1197*7c478bd9Sstevel@tonic-gate while (oldstuff < enditem)
1198*7c478bd9Sstevel@tonic-gate *newstuff++ = *oldstuff++;
1199*7c478bd9Sstevel@tonic-gate return (ptr);
1200*7c478bd9Sstevel@tonic-gate }
1201*7c478bd9Sstevel@tonic-gate #endif
1202*7c478bd9Sstevel@tonic-gate
1203*7c478bd9Sstevel@tonic-gate static void
invcannotalloc(size_t n)1204*7c478bd9Sstevel@tonic-gate invcannotalloc(size_t n)
1205*7c478bd9Sstevel@tonic-gate {
1206*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "%s: cannot allocate %u bytes\n", argv0, n);
1207*7c478bd9Sstevel@tonic-gate }
1208*7c478bd9Sstevel@tonic-gate
1209*7c478bd9Sstevel@tonic-gate static void
invcannotopen(char * file)1210*7c478bd9Sstevel@tonic-gate invcannotopen(char *file)
1211*7c478bd9Sstevel@tonic-gate {
1212*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "%s: cannot open file %s\n", argv0, file);
1213*7c478bd9Sstevel@tonic-gate }
1214*7c478bd9Sstevel@tonic-gate
1215*7c478bd9Sstevel@tonic-gate static void
invcannotwrite(char * file)1216*7c478bd9Sstevel@tonic-gate invcannotwrite(char *file)
1217*7c478bd9Sstevel@tonic-gate {
1218*7c478bd9Sstevel@tonic-gate (void) perror(argv0); /* must be first to preserve errno */
1219*7c478bd9Sstevel@tonic-gate (void) fprintf(stderr, "%s: write to file %s failed\n", argv0, file);
1220*7c478bd9Sstevel@tonic-gate }
1221