1 /*
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1995-2022 Wolfram Schneider <wosch@FreeBSD.org>
5 * Copyright (c) 1989, 1993
6 * The Regents of the University of California. All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * James A. Woods.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 * may be used to endorse or promote products derived from this software
21 * without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36 /*
37 * Ref: Usenix ;login:, Vol 8, No 1, February/March, 1983, p. 8.
38 *
39 * Locate scans a file list for the full pathname of a file given only part
40 * of the name. The list has been processed with with "front-compression"
41 * and bigram coding. Front compression reduces space by a factor of 4-5,
42 * bigram coding by a further 20-25%.
43 *
44 * The codes are:
45 *
46 * 0-28 likeliest differential counts + offset to make nonnegative
47 * 30 switch code for out-of-range count to follow in next word
48 * 31 an 8 bit char followed
49 * 128-255 bigram codes (128 most common, as determined by 'updatedb')
50 * 32-127 single character (printable) ascii residue (ie, literal)
51 *
52 * A novel two-tiered string search technique is employed:
53 *
54 * First, a metacharacter-free subpattern and partial pathname is matched
55 * BACKWARDS to avoid full expansion of the pathname list. The time savings
56 * is 40-50% over forward matching, which cannot efficiently handle
57 * overlapped search patterns and compressed path residue.
58 *
59 * Then, the actual shell glob-style regular expression (if in this form) is
60 * matched against the candidate pathnames using the slower routines provided
61 * in the standard 'find'.
62 */
63
64 #include <sys/param.h>
65 #include <ctype.h>
66 #include <err.h>
67 #include <fnmatch.h>
68 #include <locale.h>
69 #include <stdio.h>
70 #include <stdlib.h>
71 #include <string.h>
72 #include <unistd.h>
73
74 #ifdef MMAP
75 # include <sys/types.h>
76 # include <sys/stat.h>
77 # include <sys/mman.h>
78 # include <fcntl.h>
79 #endif
80
81 #include "locate.h"
82 #include "pathnames.h"
83
84
85 int f_mmap; /* use mmap */
86 int f_icase; /* ignore case */
87 int f_stdin; /* read database from stdin */
88 int f_statistic; /* print statistic */
89 int f_silent; /* suppress output, show only count of matches */
90 long f_limit; /* limit number of output lines, 0 == infinite */
91 long counter; /* counter for matches [-c] */
92 char separator='\n'; /* line separator */
93
94 u_char myctype[UCHAR_MAX + 1];
95
96 void usage(void);
97 void statistic(FILE *, char *);
98 void fastfind(FILE *, char *, char *);
99 void fastfind_icase(FILE *, char *, char *);
100 void fastfind_mmap(char *, caddr_t, off_t, char *);
101 void fastfind_mmap_icase(char *, caddr_t, off_t, char *);
102 void search_mmap(char *, char **);
103 void search_fopen(char *, char **);
104 unsigned long cputime(void);
105
106 extern char **colon(char **, char*, char*);
107 extern int getwm(caddr_t);
108 extern int getwf(FILE *);
109 extern u_char *tolower_word(u_char *);
110 extern int check_bigram_char(int);
111 extern char *patprep(char *);
112 extern void rebuild_message(char *db);
113 extern int check_size(char *db);
114
115 int
main(int argc,char ** argv)116 main(int argc, char **argv)
117 {
118 int ch;
119 char **dbv = NULL;
120 char *path_fcodes; /* locate database */
121 #ifdef MMAP
122 f_mmap = 1; /* mmap is default */
123 #endif
124 (void) setlocale(LC_ALL, "");
125
126 while ((ch = getopt(argc, argv, "0Scd:il:ms")) != -1)
127 switch(ch) {
128 case '0': /* 'find -print0' style */
129 separator = '\0';
130 break;
131 case 'S': /* statistic lines */
132 f_statistic = 1;
133 break;
134 case 'l': /* limit number of output lines, 0 == infinite */
135 f_limit = atol(optarg);
136 if (f_limit < 0 )
137 errx(1, "invalid argument for -l: '%s'", optarg);
138 break;
139 case 'd': /* database */
140 dbv = colon(dbv, optarg, _PATH_FCODES);
141 break;
142 case 'i': /* ignore case */
143 f_icase = 1;
144 break;
145 case 'm': /* mmap */
146 #ifdef MMAP
147 f_mmap = 1;
148 #else
149 warnx("mmap(2) not implemented");
150 #endif
151 break;
152 case 's': /* stdio lib */
153 f_mmap = 0;
154 break;
155 case 'c': /* suppress output, show only count of matches */
156 f_silent = 1;
157 break;
158 default:
159 usage();
160 }
161 argv += optind;
162 argc -= optind;
163
164 /* to few arguments */
165 if (argc < 1 && !(f_statistic))
166 usage();
167
168 /* no (valid) database as argument */
169 if (dbv == NULL || *dbv == NULL) {
170 /* try to read database from environment */
171 if ((path_fcodes = getenv("LOCATE_PATH")) == NULL ||
172 *path_fcodes == '\0')
173 /* use default database */
174 dbv = colon(dbv, _PATH_FCODES, _PATH_FCODES);
175 else /* $LOCATE_PATH */
176 dbv = colon(dbv, path_fcodes, _PATH_FCODES);
177 }
178
179 if (f_icase && UCHAR_MAX < 4096) /* init tolower lookup table */
180 for (ch = 0; ch < UCHAR_MAX + 1; ch++)
181 myctype[ch] = tolower(ch);
182
183 /* foreach database ... */
184 while((path_fcodes = *dbv) != NULL) {
185 dbv++;
186
187 if (!strcmp(path_fcodes, "-"))
188 f_stdin = 1;
189 else
190 f_stdin = 0;
191
192 #ifndef MMAP
193 f_mmap = 0; /* be paranoid */
194 #endif
195 if (!f_mmap || f_stdin || f_statistic)
196 search_fopen(path_fcodes, argv);
197 else
198 search_mmap(path_fcodes, argv);
199 }
200
201 if (f_silent)
202 printf("%ld\n", counter);
203 exit(0);
204 }
205
206 /*
207 * Arguments:
208 * db database
209 * s search strings
210 */
211 void
search_fopen(char * db,char ** s)212 search_fopen(char *db, char **s)
213 {
214 FILE *fp;
215
216 /* can only read stdin once */
217 if (f_stdin) {
218 fp = stdin;
219 if (*(s+1) != NULL) {
220 warnx("read database from stdin, use only `%s' as pattern", *s);
221 *(s+1) = NULL;
222 }
223 }
224 else {
225 if (!check_size(db))
226 exit(1);
227
228 if ((fp = fopen(db, "r")) == NULL) {
229 warn("`%s'", db);
230 rebuild_message(db);
231 exit(1);
232 }
233 }
234
235 /* count only chars or lines */
236 if (f_statistic) {
237 statistic(fp, db);
238 (void)fclose(fp);
239 return;
240 }
241
242 /* foreach search string ... */
243 while(*s != NULL) {
244 if (!f_stdin &&
245 fseek(fp, (long)0, SEEK_SET) == -1)
246 err(1, "fseek to begin of ``%s''\n", db);
247
248 if (f_icase)
249 fastfind_icase(fp, *s, db);
250 else
251 fastfind(fp, *s, db);
252 s++;
253 }
254 (void)fclose(fp);
255 }
256
257 #ifdef MMAP
258 /*
259 * Arguments:
260 * db database
261 * s search strings
262 */
263 void
search_mmap(char * db,char ** s)264 search_mmap(char *db, char **s)
265 {
266 struct stat sb;
267 int fd;
268 caddr_t p;
269 off_t len;
270
271 if (!check_size(db))
272 exit(1);
273
274 if (stat(db, &sb) == -1)
275 err(1, "stat");
276
277 len = sb.st_size;
278
279 if ((fd = open(db, O_RDONLY)) == -1) {
280 warn("%s", db);
281 rebuild_message(db);
282 exit(1);
283 }
284
285 if ((p = mmap((caddr_t)0, (size_t)len,
286 PROT_READ, MAP_SHARED,
287 fd, (off_t)0)) == MAP_FAILED)
288 err(1, "mmap ``%s''", db);
289
290 /* foreach search string ... */
291 while (*s != NULL) {
292 if (f_icase)
293 fastfind_mmap_icase(*s, p, len, db);
294 else
295 fastfind_mmap(*s, p, len, db);
296 s++;
297 }
298
299 if (munmap(p, (size_t)len) == -1)
300 warn("munmap %s\n", db);
301
302 (void)close(fd);
303 }
304 #endif /* MMAP */
305
306 void
usage()307 usage ()
308 {
309 (void)fprintf(stderr,
310 "usage: locate [-0Scims] [-l limit] [-d database] pattern ...\n\n");
311 (void)fprintf(stderr,
312 "default database: `%s' or $LOCATE_PATH\n", _PATH_FCODES);
313 exit(1);
314 }
315
316
317 /* load fastfind functions */
318
319 /* statistic */
320 /* fastfind_mmap, fastfind_mmap_icase */
321 #ifdef MMAP
322 #undef FF_MMAP
323 #undef FF_ICASE
324
325 #define FF_MMAP
326 #include "fastfind.c"
327 #define FF_ICASE
328 #include "fastfind.c"
329 #endif /* MMAP */
330
331 /* fopen */
332 /* fastfind, fastfind_icase */
333 #undef FF_MMAP
334 #undef FF_ICASE
335 #include "fastfind.c"
336 #define FF_ICASE
337 #include "fastfind.c"
338