1 /*
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1995-2022 Wolfram Schneider <wosch@FreeBSD.org>
5 * Copyright (c) 1989, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * James A. Woods.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36
37 #ifndef _LOCATE_STATISTIC_
38 #define _LOCATE_STATISTIC_
39
40 void
statistic(FILE * fp,char * path_fcodes)41 statistic (FILE *fp, char *path_fcodes)
42 {
43 long lines, chars, size, size_nbg, big, zwerg, umlaut;
44 u_char *p, *s;
45 int c;
46 int count, longest_path;
47 int error = 0;
48 u_char bigram1[NBG], bigram2[NBG], path[LOCATE_PATH_MAX];
49
50 for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
51 p[c] = check_bigram_char(getc(fp));
52 s[c] = check_bigram_char(getc(fp));
53 }
54
55 lines = chars = big = zwerg = umlaut = longest_path = 0;
56 size = NBG + NBG;
57
58 for (c = getc(fp), count = 0; c != EOF; size++) {
59 if (c == SWITCH) {
60 count += getwf(fp) - OFFSET;
61 size += sizeof(int);
62 zwerg++;
63 } else
64 count += c - OFFSET;
65
66 if (count < 0 || count >= LOCATE_PATH_MAX) {
67 /* stop on error and display the statstics anyway */
68 warnx("corrupted database: %s %d", path_fcodes, count);
69 error = 1;
70 break;
71 }
72
73 for (p = path + count; (c = getc(fp)) > SWITCH; size++)
74 if (c < PARITY) {
75 if (c == UMLAUT) {
76 c = getc(fp);
77 size++;
78 umlaut++;
79 }
80 p++;
81 } else {
82 /* bigram char */
83 big++;
84 p += 2;
85 }
86
87 p++;
88 lines++;
89 chars += (p - path);
90 if ((p - path) > longest_path)
91 longest_path = p - path;
92 }
93
94 /* size without bigram db */
95 size_nbg = size - (2 * NBG);
96
97 (void)printf("\nDatabase: %s\n", path_fcodes);
98 (void)printf("Compression: Front: %2.2f%%, ", chars > 0 ? (size_nbg + big) / (chars / (float)100) : 0);
99 (void)printf("Bigram: %2.2f%%, ", big > 0 ? (size_nbg - big) / (size_nbg / (float)100) : 0);
100 /* incl. bigram db overhead */
101 (void)printf("Total: %2.2f%%\n", chars > 0 ? size / (chars / (float)100) : 0);
102 (void)printf("Filenames: %ld, ", lines);
103 (void)printf("Characters: %ld, ", chars);
104 (void)printf("Database size: %ld\n", size);
105 (void)printf("Bigram characters: %ld, ", big);
106 (void)printf("Integers: %ld, ", zwerg);
107 (void)printf("8-Bit characters: %ld\n", umlaut);
108 printf("Longest path: %d\n", longest_path > 0 ? longest_path - 1 : 0);
109
110 /* non zero exit on corrupt database */
111 if (error)
112 exit(error);
113 }
114 #endif /* _LOCATE_STATISTIC_ */
115
116 extern char separator;
117
118 void
119 #ifdef FF_MMAP
120
121
122 #ifdef FF_ICASE
fastfind_mmap_icase(char * pathpart,caddr_t paddr,off_t len,char * database)123 fastfind_mmap_icase
124 #else
125 fastfind_mmap
126 #endif /* FF_ICASE */
127 (char *pathpart, caddr_t paddr, off_t len, char *database)
128
129
130 #else /* MMAP */
131
132
133 #ifdef FF_ICASE
134 fastfind_icase
135 #else
136 fastfind
137 #endif /* FF_ICASE */
138
139 (FILE *fp, char *pathpart, char *database)
140
141
142 #endif /* MMAP */
143
144 {
145 u_char *p, *s, *patend, *q, *foundchar;
146 int c, cc;
147 int count, found, globflag;
148 u_char *cutoff;
149 u_char bigram1[NBG], bigram2[NBG], path[LOCATE_PATH_MAX + 2];
150
151 #ifdef FF_ICASE
152 /* use a lookup table for case insensitive search */
153 u_char table[UCHAR_MAX + 1];
154
155 tolower_word(pathpart);
156 #endif /* FF_ICASE*/
157
158 /* init bigram table */
159 #ifdef FF_MMAP
160 for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) {
161 p[c] = check_bigram_char(*paddr++);
162 s[c] = check_bigram_char(*paddr++);
163 }
164 #else
165 for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
166 p[c] = check_bigram_char(getc(fp));
167 s[c] = check_bigram_char(getc(fp));
168 }
169 #endif /* FF_MMAP */
170
171 /* find optimal (last) char for searching */
172 for (p = pathpart; *p != '\0'; p++)
173 if (strchr(LOCATE_REG, *p) != NULL)
174 break;
175
176 if (*p == '\0')
177 globflag = 0;
178 else
179 globflag = 1;
180
181 p = pathpart;
182 patend = patprep(p);
183 cc = *patend;
184
185 #ifdef FF_ICASE
186 /* set patend char to true */
187 for (c = 0; c < UCHAR_MAX + 1; c++)
188 table[c] = 0;
189
190 table[TOLOWER(*patend)] = 1;
191 table[toupper(*patend)] = 1;
192 #endif /* FF_ICASE */
193
194
195 /* main loop */
196 found = count = 0;
197 foundchar = 0;
198
199 #ifdef FF_MMAP
200 c = (u_char)*paddr++;
201 len--;
202
203 for (; len > 0; ) {
204 #else
205 c = getc(fp);
206 for (; c != EOF; ) {
207 #endif /* FF_MMAP */
208
209 /* go forward or backward */
210 if (c == SWITCH) { /* big step, an integer */
211 #ifdef FF_MMAP
212 if (len < sizeof(int))
213 errx(1, "corrupted database: %s", database);
214
215 count += getwm(paddr) - OFFSET;
216 len -= INTSIZE;
217 paddr += INTSIZE;
218 #else
219 count += getwf(fp) - OFFSET;
220 #endif /* FF_MMAP */
221 } else { /* slow step, =< 14 chars */
222 count += c - OFFSET;
223 }
224
225 if (count < 0 || count >= LOCATE_PATH_MAX)
226 errx(1, "corrupted database: %s %d", database, count);
227
228 /* overlay old path */
229 p = path + count;
230 foundchar = p - 1;
231
232 #ifdef FF_MMAP
233 for (; len > 0;) {
234 c = (u_char)*paddr++;
235 len--;
236 #else
237 for (;;) {
238 c = getc(fp);
239 #endif /* FF_MMAP */
240 /*
241 * == UMLAUT: 8 bit char followed
242 * <= SWITCH: offset
243 * >= PARITY: bigram
244 * rest: single ascii char
245 *
246 * offset < SWITCH < UMLAUT < ascii < PARITY < bigram
247 */
248 if (c < PARITY) {
249 if (c <= UMLAUT) {
250 if (c == UMLAUT) {
251 #ifdef FF_MMAP
252 c = (u_char)*paddr++;
253 len--;
254 #else
255 c = getc(fp);
256 #endif /* FF_MMAP */
257
258 } else
259 break; /* SWITCH */
260 }
261 #ifdef FF_ICASE
262 if (table[c])
263 #else
264 if (c == cc)
265 #endif /* FF_ICASE */
266 foundchar = p;
267 *p++ = c;
268 }
269 else {
270 /* bigrams are parity-marked */
271 TO7BIT(c);
272
273 #ifndef FF_ICASE
274 if (bigram1[c] == cc ||
275 bigram2[c] == cc)
276 #else
277
278 if (table[bigram1[c]] ||
279 table[bigram2[c]])
280 #endif /* FF_ICASE */
281 foundchar = p + 1;
282
283 *p++ = bigram1[c];
284 *p++ = bigram2[c];
285 }
286
287 if (p - path >= LOCATE_PATH_MAX)
288 errx(1, "corrupted database: %s %td", database, p - path);
289
290 }
291
292 if (found) { /* previous line matched */
293 cutoff = path;
294 *p-- = '\0';
295 foundchar = p;
296 } else if (foundchar >= path + count) { /* a char matched */
297 *p-- = '\0';
298 cutoff = path + count;
299 } else /* nothing to do */
300 continue;
301
302 found = 0;
303 for (s = foundchar; s >= cutoff; s--) {
304 if (*s == cc
305 #ifdef FF_ICASE
306 || TOLOWER(*s) == cc
307 #endif /* FF_ICASE */
308 ) { /* fast first char check */
309 for (p = patend - 1, q = s - 1; *p != '\0';
310 p--, q--)
311 if (*q != *p
312 #ifdef FF_ICASE
313 && TOLOWER(*q) != *p
314 #endif /* FF_ICASE */
315 )
316 break;
317 if (*p == '\0') { /* fast match success */
318 found = 1;
319 if (!globflag ||
320 #ifndef FF_ICASE
321 !fnmatch(pathpart, path, 0))
322 #else
323 !fnmatch(pathpart, path,
324 FNM_CASEFOLD))
325 #endif /* !FF_ICASE */
326 {
327 if (f_silent)
328 counter++;
329 else if (f_limit) {
330 counter++;
331 if (f_limit >= counter)
332 (void)printf("%s%c",path,separator);
333 else
334 errx(0, "[show only %ld lines]", counter - 1);
335 } else
336 (void)printf("%s%c",path,separator);
337 }
338 break;
339 }
340 }
341 }
342 }
343 }
344