xref: /freebsd/usr.bin/locate/locate/fastfind.c (revision b3e7694832e81d7a904a10f525f8797b753bf0d3)
1dbaf4288SWolfram Schneider /*
2389844c0SBaptiste Daroussin  * SPDX-License-Identifier: BSD-3-Clause
3df57947fSPedro F. Giffuni  *
4a97ce14aSWolfram Schneider  * Copyright (c) 1995-2022 Wolfram Schneider <wosch@FreeBSD.org>
5dbaf4288SWolfram Schneider  * Copyright (c) 1989, 1993
6dbaf4288SWolfram Schneider  *	The Regents of the University of California.  All rights reserved.
7dbaf4288SWolfram Schneider  *
8dbaf4288SWolfram Schneider  * This code is derived from software contributed to Berkeley by
9dbaf4288SWolfram Schneider  * James A. Woods.
10dbaf4288SWolfram Schneider  *
11dbaf4288SWolfram Schneider  * Redistribution and use in source and binary forms, with or without
12dbaf4288SWolfram Schneider  * modification, are permitted provided that the following conditions
13dbaf4288SWolfram Schneider  * are met:
14dbaf4288SWolfram Schneider  * 1. Redistributions of source code must retain the above copyright
15dbaf4288SWolfram Schneider  *    notice, this list of conditions and the following disclaimer.
16dbaf4288SWolfram Schneider  * 2. Redistributions in binary form must reproduce the above copyright
17dbaf4288SWolfram Schneider  *    notice, this list of conditions and the following disclaimer in the
18dbaf4288SWolfram Schneider  *    documentation and/or other materials provided with the distribution.
19389844c0SBaptiste Daroussin  * 3. Neither the name of the University nor the names of its contributors
20dbaf4288SWolfram Schneider  *    may be used to endorse or promote products derived from this software
21dbaf4288SWolfram Schneider  *    without specific prior written permission.
22dbaf4288SWolfram Schneider  *
23dbaf4288SWolfram Schneider  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24dbaf4288SWolfram Schneider  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25dbaf4288SWolfram Schneider  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26dbaf4288SWolfram Schneider  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27dbaf4288SWolfram Schneider  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28dbaf4288SWolfram Schneider  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29dbaf4288SWolfram Schneider  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30dbaf4288SWolfram Schneider  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31dbaf4288SWolfram Schneider  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32dbaf4288SWolfram Schneider  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33dbaf4288SWolfram Schneider  * SUCH DAMAGE.
34dbaf4288SWolfram Schneider  */
35dbaf4288SWolfram Schneider 
36dbaf4288SWolfram Schneider 
37dbaf4288SWolfram Schneider #ifndef _LOCATE_STATISTIC_
38dbaf4288SWolfram Schneider #define _LOCATE_STATISTIC_
39dbaf4288SWolfram Schneider 
40dbaf4288SWolfram Schneider void
statistic(FILE * fp,char * path_fcodes)41*c330a185SJohn Baldwin statistic (FILE *fp, char *path_fcodes)
42dbaf4288SWolfram Schneider {
438d35ca86SWolfram Schneider 	long lines, chars, size, size_nbg, big, zwerg, umlaut;
4469ae5b96SWolfram Schneider 	u_char *p, *s;
4569ae5b96SWolfram Schneider 	int c;
469146546eSWolfram Schneider 	int count, longest_path;
47ccf50c1dSWolfram Schneider 	int error = 0;
48834a8fa1SWolfram Schneider 	u_char bigram1[NBG], bigram2[NBG], path[LOCATE_PATH_MAX];
49dbaf4288SWolfram Schneider 
50dbaf4288SWolfram Schneider 	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
51dbaf4288SWolfram Schneider 		p[c] = check_bigram_char(getc(fp));
52dbaf4288SWolfram Schneider 		s[c] = check_bigram_char(getc(fp));
53dbaf4288SWolfram Schneider 	}
54dbaf4288SWolfram Schneider 
559146546eSWolfram Schneider 	lines = chars = big = zwerg = umlaut = longest_path = 0;
56dbaf4288SWolfram Schneider 	size = NBG + NBG;
57dbaf4288SWolfram Schneider 
58dbaf4288SWolfram Schneider 	for (c = getc(fp), count = 0; c != EOF; size++) {
59dbaf4288SWolfram Schneider 		if (c == SWITCH) {
60dbaf4288SWolfram Schneider 			count += getwf(fp) - OFFSET;
61dbaf4288SWolfram Schneider 			size += sizeof(int);
62139764e8SWolfram Schneider 			zwerg++;
63dbaf4288SWolfram Schneider 		} else
64dbaf4288SWolfram Schneider 			count += c - OFFSET;
65dbaf4288SWolfram Schneider 
66834a8fa1SWolfram Schneider 		if (count < 0 || count >= LOCATE_PATH_MAX) {
67b7a74bbcSWolfram Schneider 			/* stop on error and display the statstics anyway */
68834a8fa1SWolfram Schneider 			warnx("corrupted database: %s %d", path_fcodes, count);
69ccf50c1dSWolfram Schneider 			error = 1;
70b7a74bbcSWolfram Schneider 			break;
71b7a74bbcSWolfram Schneider 		}
72b7a74bbcSWolfram Schneider 
73dbaf4288SWolfram Schneider 		for (p = path + count; (c = getc(fp)) > SWITCH; size++)
74139764e8SWolfram Schneider 			if (c < PARITY) {
75139764e8SWolfram Schneider 				if (c == UMLAUT) {
76139764e8SWolfram Schneider 					c = getc(fp);
77139764e8SWolfram Schneider 					size++;
78139764e8SWolfram Schneider 					umlaut++;
79139764e8SWolfram Schneider 				}
80dbaf4288SWolfram Schneider 				p++;
81139764e8SWolfram Schneider 			} else {
82139764e8SWolfram Schneider 				/* bigram char */
83dbaf4288SWolfram Schneider 				big++;
84dbaf4288SWolfram Schneider 				p += 2;
85dbaf4288SWolfram Schneider 			}
86dbaf4288SWolfram Schneider 
87dbaf4288SWolfram Schneider 		p++;
88dbaf4288SWolfram Schneider 		lines++;
89dbaf4288SWolfram Schneider 		chars += (p - path);
909146546eSWolfram Schneider 		if ((p - path) > longest_path)
919146546eSWolfram Schneider 			longest_path = p - path;
92dbaf4288SWolfram Schneider 	}
93dbaf4288SWolfram Schneider 
948d35ca86SWolfram Schneider 	/* size without bigram db */
958d35ca86SWolfram Schneider 	size_nbg = size - (2 * NBG);
968d35ca86SWolfram Schneider 
97dbaf4288SWolfram Schneider 	(void)printf("\nDatabase: %s\n", path_fcodes);
988d35ca86SWolfram Schneider 	(void)printf("Compression: Front: %2.2f%%, ", chars > 0 ?  (size_nbg + big) / (chars / (float)100) : 0);
998d35ca86SWolfram Schneider 	(void)printf("Bigram: %2.2f%%, ", big > 0 ? (size_nbg - big) / (size_nbg / (float)100) : 0);
1008d35ca86SWolfram Schneider 	/* incl. bigram db overhead */
1018d35ca86SWolfram Schneider 	(void)printf("Total: %2.2f%%\n", chars > 0 ?  size / (chars / (float)100) : 0);
102cfa38564SWolfram Schneider 	(void)printf("Filenames: %ld, ", lines);
103cfa38564SWolfram Schneider 	(void)printf("Characters: %ld, ", chars);
104cfa38564SWolfram Schneider 	(void)printf("Database size: %ld\n", size);
105cfa38564SWolfram Schneider 	(void)printf("Bigram characters: %ld, ", big);
106cfa38564SWolfram Schneider 	(void)printf("Integers: %ld, ", zwerg);
107cfa38564SWolfram Schneider 	(void)printf("8-Bit characters: %ld\n", umlaut);
1088d35ca86SWolfram Schneider 	printf("Longest path: %d\n", longest_path > 0 ? longest_path - 1 : 0);
109dbaf4288SWolfram Schneider 
110ccf50c1dSWolfram Schneider 	/* non zero exit on corrupt database */
111ccf50c1dSWolfram Schneider 	if (error)
112ccf50c1dSWolfram Schneider 		exit(error);
113dbaf4288SWolfram Schneider }
114dbaf4288SWolfram Schneider #endif /* _LOCATE_STATISTIC_ */
115dbaf4288SWolfram Schneider 
1161c6859f8SDag-Erling Smørgrav extern	char	separator;
117dbaf4288SWolfram Schneider 
118dbaf4288SWolfram Schneider void
119dbaf4288SWolfram Schneider #ifdef FF_MMAP
120dbaf4288SWolfram Schneider 
121dbaf4288SWolfram Schneider 
122dbaf4288SWolfram Schneider #ifdef FF_ICASE
fastfind_mmap_icase(char * pathpart,caddr_t paddr,off_t len,char * database)123dbaf4288SWolfram Schneider fastfind_mmap_icase
124dbaf4288SWolfram Schneider #else
125dbaf4288SWolfram Schneider fastfind_mmap
126139764e8SWolfram Schneider #endif /* FF_ICASE */
127*c330a185SJohn Baldwin (char *pathpart, caddr_t paddr, off_t len, char *database)
128dbaf4288SWolfram Schneider 
129dbaf4288SWolfram Schneider 
130dbaf4288SWolfram Schneider #else /* MMAP */
131dbaf4288SWolfram Schneider 
132dbaf4288SWolfram Schneider 
133dbaf4288SWolfram Schneider #ifdef FF_ICASE
134dbaf4288SWolfram Schneider fastfind_icase
135139764e8SWolfram Schneider #else
136dbaf4288SWolfram Schneider fastfind
137dbaf4288SWolfram Schneider #endif /* FF_ICASE */
138dbaf4288SWolfram Schneider 
139*c330a185SJohn Baldwin (FILE *fp, char *pathpart, char *database)
140dbaf4288SWolfram Schneider 
141dbaf4288SWolfram Schneider 
142dbaf4288SWolfram Schneider #endif /* MMAP */
143dbaf4288SWolfram Schneider 
144dbaf4288SWolfram Schneider {
14569ae5b96SWolfram Schneider 	u_char *p, *s, *patend, *q, *foundchar;
14669ae5b96SWolfram Schneider 	int c, cc;
147dbaf4288SWolfram Schneider 	int count, found, globflag;
148dbaf4288SWolfram Schneider 	u_char *cutoff;
149834a8fa1SWolfram Schneider 	u_char bigram1[NBG], bigram2[NBG], path[LOCATE_PATH_MAX + 2];
150dbaf4288SWolfram Schneider 
151dbaf4288SWolfram Schneider #ifdef FF_ICASE
152dbaf4288SWolfram Schneider 	/* use a lookup table for case insensitive search */
153139764e8SWolfram Schneider 	u_char table[UCHAR_MAX + 1];
154dbaf4288SWolfram Schneider 
155dbaf4288SWolfram Schneider 	tolower_word(pathpart);
156139764e8SWolfram Schneider #endif /* FF_ICASE*/
157dbaf4288SWolfram Schneider 
158dbaf4288SWolfram Schneider 	/* init bigram table */
159dbaf4288SWolfram Schneider #ifdef FF_MMAP
160dbaf4288SWolfram Schneider 	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++, len-= 2) {
161dbaf4288SWolfram Schneider 		p[c] = check_bigram_char(*paddr++);
162dbaf4288SWolfram Schneider 		s[c] = check_bigram_char(*paddr++);
163dbaf4288SWolfram Schneider 	}
164dbaf4288SWolfram Schneider #else
165dbaf4288SWolfram Schneider 	for (c = 0, p = bigram1, s = bigram2; c < NBG; c++) {
166dbaf4288SWolfram Schneider 		p[c] = check_bigram_char(getc(fp));
167dbaf4288SWolfram Schneider 		s[c] = check_bigram_char(getc(fp));
168dbaf4288SWolfram Schneider 	}
169139764e8SWolfram Schneider #endif /* FF_MMAP */
170dbaf4288SWolfram Schneider 
171dbaf4288SWolfram Schneider 	/* find optimal (last) char for searching */
172b72acdbdSWolfram Schneider 	for (p = pathpart; *p != '\0'; p++)
173b3608ae1SEd Schouten 		if (strchr(LOCATE_REG, *p) != NULL)
174b72acdbdSWolfram Schneider 			break;
175b72acdbdSWolfram Schneider 
176b72acdbdSWolfram Schneider 	if (*p == '\0')
177b72acdbdSWolfram Schneider 		globflag = 0;
178b72acdbdSWolfram Schneider 	else
179b72acdbdSWolfram Schneider 		globflag = 1;
180b72acdbdSWolfram Schneider 
181dbaf4288SWolfram Schneider 	p = pathpart;
182dbaf4288SWolfram Schneider 	patend = patprep(p);
183dbaf4288SWolfram Schneider 	cc = *patend;
184dbaf4288SWolfram Schneider 
185dbaf4288SWolfram Schneider #ifdef FF_ICASE
186dbaf4288SWolfram Schneider 	/* set patend char to true */
187f8a459d3SWolfram Schneider         for (c = 0; c < UCHAR_MAX + 1; c++)
188f8a459d3SWolfram Schneider                 table[c] = 0;
189f8a459d3SWolfram Schneider 
190dbaf4288SWolfram Schneider 	table[TOLOWER(*patend)] = 1;
191dbaf4288SWolfram Schneider 	table[toupper(*patend)] = 1;
192139764e8SWolfram Schneider #endif /* FF_ICASE */
193dbaf4288SWolfram Schneider 
194dbaf4288SWolfram Schneider 
195dbaf4288SWolfram Schneider 	/* main loop */
196dbaf4288SWolfram Schneider 	found = count = 0;
197dbaf4288SWolfram Schneider 	foundchar = 0;
198dbaf4288SWolfram Schneider 
199dbaf4288SWolfram Schneider #ifdef FF_MMAP
20033ee87faSWolfram Schneider 	c = (u_char)*paddr++;
20133ee87faSWolfram Schneider 	len--;
20233ee87faSWolfram Schneider 
203139764e8SWolfram Schneider 	for (; len > 0; ) {
204dbaf4288SWolfram Schneider #else
205139764e8SWolfram Schneider 	c = getc(fp);
206139764e8SWolfram Schneider 	for (; c != EOF; ) {
207139764e8SWolfram Schneider #endif /* FF_MMAP */
208dbaf4288SWolfram Schneider 
209dbaf4288SWolfram Schneider 		/* go forward or backward */
210dbaf4288SWolfram Schneider 		if (c == SWITCH) { /* big step, an integer */
211dbaf4288SWolfram Schneider #ifdef FF_MMAP
21233ee87faSWolfram Schneider 			if (len < sizeof(int))
21333ee87faSWolfram Schneider 				errx(1, "corrupted database: %s", database);
21433ee87faSWolfram Schneider 
215dbaf4288SWolfram Schneider 			count += getwm(paddr) - OFFSET;
21633ee87faSWolfram Schneider 			len -= INTSIZE;
21733ee87faSWolfram Schneider 			paddr += INTSIZE;
218dbaf4288SWolfram Schneider #else
219dbaf4288SWolfram Schneider 			count +=  getwf(fp) - OFFSET;
220139764e8SWolfram Schneider #endif /* FF_MMAP */
221dbaf4288SWolfram Schneider 		} else {	   /* slow step, =< 14 chars */
222dbaf4288SWolfram Schneider 			count += c - OFFSET;
223dbaf4288SWolfram Schneider 		}
224dbaf4288SWolfram Schneider 
225834a8fa1SWolfram Schneider 		if (count < 0 || count >= LOCATE_PATH_MAX)
22633ee87faSWolfram Schneider 			errx(1, "corrupted database: %s %d", database, count);
22733ee87faSWolfram Schneider 
228dbaf4288SWolfram Schneider 		/* overlay old path */
229dbaf4288SWolfram Schneider 		p = path + count;
230dbaf4288SWolfram Schneider 		foundchar = p - 1;
231dbaf4288SWolfram Schneider 
232139764e8SWolfram Schneider #ifdef FF_MMAP
23347ecee5eSWolfram Schneider 		for (; len > 0;) {
234139764e8SWolfram Schneider 			c = (u_char)*paddr++;
235139764e8SWolfram Schneider 		        len--;
236139764e8SWolfram Schneider #else
23747ecee5eSWolfram Schneider 		for (;;) {
238139764e8SWolfram Schneider 			c = getc(fp);
239139764e8SWolfram Schneider #endif /* FF_MMAP */
240139764e8SWolfram Schneider 			/*
241139764e8SWolfram Schneider 			 * == UMLAUT: 8 bit char followed
242139764e8SWolfram Schneider 			 * <= SWITCH: offset
243139764e8SWolfram Schneider 			 * >= PARITY: bigram
244139764e8SWolfram Schneider 			 * rest:      single ascii char
245139764e8SWolfram Schneider 			 *
246139764e8SWolfram Schneider 			 * offset < SWITCH < UMLAUT < ascii < PARITY < bigram
247139764e8SWolfram Schneider 			 */
248dbaf4288SWolfram Schneider 			if (c < PARITY) {
249139764e8SWolfram Schneider 				if (c <= UMLAUT) {
250139764e8SWolfram Schneider 					if (c == UMLAUT) {
251139764e8SWolfram Schneider #ifdef FF_MMAP
252139764e8SWolfram Schneider 						c = (u_char)*paddr++;
253139764e8SWolfram Schneider 						len--;
254139764e8SWolfram Schneider #else
255139764e8SWolfram Schneider 						c = getc(fp);
256139764e8SWolfram Schneider #endif /* FF_MMAP */
257139764e8SWolfram Schneider 
258139764e8SWolfram Schneider 					} else
259139764e8SWolfram Schneider 						break; /* SWITCH */
260139764e8SWolfram Schneider 				}
261dbaf4288SWolfram Schneider #ifdef FF_ICASE
262dbaf4288SWolfram Schneider 				if (table[c])
263dbaf4288SWolfram Schneider #else
264dbaf4288SWolfram Schneider 				if (c == cc)
265139764e8SWolfram Schneider #endif /* FF_ICASE */
266dbaf4288SWolfram Schneider 					foundchar = p;
267dbaf4288SWolfram Schneider 				*p++ = c;
268dbaf4288SWolfram Schneider 			}
269dbaf4288SWolfram Schneider 			else {
270dbaf4288SWolfram Schneider 				/* bigrams are parity-marked */
271dbaf4288SWolfram Schneider 				TO7BIT(c);
272dbaf4288SWolfram Schneider 
273dbaf4288SWolfram Schneider #ifndef FF_ICASE
274dbaf4288SWolfram Schneider 				if (bigram1[c] == cc ||
275dbaf4288SWolfram Schneider 				    bigram2[c] == cc)
276dbaf4288SWolfram Schneider #else
277dbaf4288SWolfram Schneider 
278dbaf4288SWolfram Schneider 					if (table[bigram1[c]] ||
279dbaf4288SWolfram Schneider 					    table[bigram2[c]])
280139764e8SWolfram Schneider #endif /* FF_ICASE */
281dbaf4288SWolfram Schneider 						foundchar = p + 1;
282dbaf4288SWolfram Schneider 
283dbaf4288SWolfram Schneider 				*p++ = bigram1[c];
284dbaf4288SWolfram Schneider 				*p++ = bigram2[c];
285dbaf4288SWolfram Schneider 			}
28633ee87faSWolfram Schneider 
287834a8fa1SWolfram Schneider 			if (p - path >= LOCATE_PATH_MAX)
28839a30a80SWolfram Schneider 				errx(1, "corrupted database: %s %td", database, p - path);
28933ee87faSWolfram Schneider 
290139764e8SWolfram Schneider 		}
291dbaf4288SWolfram Schneider 
292dbaf4288SWolfram Schneider 		if (found) {                     /* previous line matched */
293dbaf4288SWolfram Schneider 			cutoff = path;
294dbaf4288SWolfram Schneider 			*p-- = '\0';
295dbaf4288SWolfram Schneider 			foundchar = p;
296dbaf4288SWolfram Schneider 		} else if (foundchar >= path + count) { /* a char matched */
297dbaf4288SWolfram Schneider 			*p-- = '\0';
298dbaf4288SWolfram Schneider 			cutoff = path + count;
299dbaf4288SWolfram Schneider 		} else                           /* nothing to do */
300dbaf4288SWolfram Schneider 			continue;
301dbaf4288SWolfram Schneider 
302dbaf4288SWolfram Schneider 		found = 0;
303dbaf4288SWolfram Schneider 		for (s = foundchar; s >= cutoff; s--) {
304dbaf4288SWolfram Schneider 			if (*s == cc
305dbaf4288SWolfram Schneider #ifdef FF_ICASE
306dbaf4288SWolfram Schneider 			    || TOLOWER(*s) == cc
307139764e8SWolfram Schneider #endif /* FF_ICASE */
308dbaf4288SWolfram Schneider 			    ) {	/* fast first char check */
309dbaf4288SWolfram Schneider 				for (p = patend - 1, q = s - 1; *p != '\0';
310dbaf4288SWolfram Schneider 				     p--, q--)
311dbaf4288SWolfram Schneider 					if (*q != *p
312dbaf4288SWolfram Schneider #ifdef FF_ICASE
313dbaf4288SWolfram Schneider 					    && TOLOWER(*q) != *p
314139764e8SWolfram Schneider #endif /* FF_ICASE */
315dbaf4288SWolfram Schneider 					    )
316dbaf4288SWolfram Schneider 						break;
317dbaf4288SWolfram Schneider 				if (*p == '\0') {   /* fast match success */
318dbaf4288SWolfram Schneider 					found = 1;
319efcc77baSWolfram Schneider 					if (!globflag ||
320efcc77baSWolfram Schneider #ifndef FF_ICASE
321efcc77baSWolfram Schneider 					    !fnmatch(pathpart, path, 0))
322efcc77baSWolfram Schneider #else
323efcc77baSWolfram Schneider 					    !fnmatch(pathpart, path,
324a7230c69SAndrey A. Chernov 						     FNM_CASEFOLD))
325efcc77baSWolfram Schneider #endif /* !FF_ICASE */
326efcc77baSWolfram Schneider 					{
327dbaf4288SWolfram Schneider 						if (f_silent)
328dbaf4288SWolfram Schneider 							counter++;
329dbaf4288SWolfram Schneider 						else if (f_limit) {
330dbaf4288SWolfram Schneider 							counter++;
331dbaf4288SWolfram Schneider 							if (f_limit >= counter)
3321c6859f8SDag-Erling Smørgrav 								(void)printf("%s%c",path,separator);
333bef3ca03SPhilippe Charnier 							else
3340c178a2aSWolfram Schneider 								errx(0, "[show only %ld lines]", counter - 1);
335dbaf4288SWolfram Schneider 						} else
3361c6859f8SDag-Erling Smørgrav 							(void)printf("%s%c",path,separator);
337dbaf4288SWolfram Schneider 					}
338dbaf4288SWolfram Schneider 					break;
339dbaf4288SWolfram Schneider 				}
340dbaf4288SWolfram Schneider 			}
341dbaf4288SWolfram Schneider 		}
342dbaf4288SWolfram Schneider 	}
343dbaf4288SWolfram Schneider }
344