xref: /titanic_44/usr/src/lib/libast/common/misc/fastfind.c (revision 416369f6ab2ab327f0e71cbdb854f9e5bdc6f5ec)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *           Copyright (c) 1985-2007 AT&T Knowledge Ventures            *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                      by AT&T Knowledge Ventures                      *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 /*
24  * original code
25  *
26  *  	James A. Woods, Informatics General Corporation,
27  *	NASA Ames Research Center, 6/81.
28  *	Usenix ;login:, February/March, 1983, p. 8.
29  *
30  * discipline/method interface
31  *
32  *	Glenn Fowler
33  *	AT&T Research
34  *	modified from the original BSD source
35  *
36  * 'fastfind' scans a file list for the full pathname of a file
37  * given only a piece of the name.  The list is processed with
38  * with "front-compression" and bigram coding.  Front compression reduces
39  * space by a factor of 4-5, bigram coding by a further 20-25%.
40  *
41  * there are 4 methods:
42  *
43  *	FF_old	original with 7 bit bigram encoding (no magic)
44  *	FF_gnu	8 bit clean front compression (FF_gnu_magic)
45  *	FF_dir	FF_gnu with sfgetl/sfputl and trailing / on dirs (FF_dir_magic)
46  *	FF_typ	FF_dir with (mime) types (FF_typ_magic)
47  *
48  * the bigram encoding steals the eighth bit (that's why its FF_old)
49  * maybe one day we'll limit it to readonly:
50  *
51  * 0-2*FF_OFF	 likeliest differential counts + offset to make nonnegative
52  * FF_ESC	 4 byte big-endian out-of-range count+FF_OFF follows
53  * FF_MIN-FF_MAX ascii residue
54  * >=FF_MAX	 bigram codes
55  *
56  * a two-tiered string search technique is employed
57  *
58  * a metacharacter-free subpattern and partial pathname is matched
59  * backwards to avoid full expansion of the pathname list
60  *
61  * then the actual shell glob-style regular expression (if in this form)
62  * is matched against the candidate pathnames using the slower regexec()
63  *
64  * The original BSD code is covered by the BSD license:
65  *
66  * Copyright (c) 1985, 1993, 1999
67  *	The Regents of the University of California.  All rights reserved.
68  *
69  * Redistribution and use in source and binary forms, with or without
70  * modification, are permitted provided that the following conditions
71  * are met:
72  * 1. Redistributions of source code must retain the above copyright
73  *    notice, this list of conditions and the following disclaimer.
74  * 2. Redistributions in binary form must reproduce the above copyright
75  *    notice, this list of conditions and the following disclaimer in the
76  *    documentation and/or other materials provided with the distribution.
77  * 3. Neither the name of the University nor the names of its contributors
78  *    may be used to endorse or promote products derived from this software
79  *    without specific prior written permission.
80  *
81  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
82  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
83  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
84  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
85  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
86  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
87  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
88  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
89  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
90  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
91  * SUCH DAMAGE.
92  */
93 
94 static const char id[] = "\n@(#)$Id: fastfind (AT&T Research) 2002-10-02 $\0\n";
95 
96 static const char lib[] = "libast:fastfind";
97 
98 #include "findlib.h"
99 
100 #define FIND_MATCH	"*/(find|locate)/*"
101 
102 /*
103  * this db could be anywhere
104  * findcodes[] directories are checked for findnames[i]
105  */
106 
107 static char*		findcodes[] =
108 {
109 	0,
110 	0,
111 	FIND_CODES,
112 	"/usr/local/share/lib",
113 	"/usr/local/lib",
114 	"/usr/share/lib",
115 	"/usr/lib",
116 	"/var/spool",
117 	"/usr/local/var",
118 	"/var/lib",
119 	"/var/lib/slocate",
120 	"/var/db",
121 };
122 
123 static char*		findnames[] =
124 {
125 	"find/codes",
126 	"find/find.codes",
127 	"locate/locatedb",
128 	"locatedb",
129 	"locate.database",
130 	"slocate.db",
131 };
132 
133 /*
134  * convert t to lower case and drop leading x- and x- after /
135  * converted value copied to b of size n
136  */
137 
138 char*
139 typefix(char* buf, size_t n, register const char* t)
140 {
141 	register int	c;
142 	register char*	b = buf;
143 
144 	if ((*t == 'x' || *t == 'X') && *(t + 1) == '-')
145 		t += 2;
146 	while (c = *t++)
147 	{
148 		if (isupper(c))
149 			c = tolower(c);
150 		if ((*b++ = c) == '/' && (*t == 'x' || *t == 'X') && *(t + 1) == '-')
151 			t += 2;
152 	}
153 	*b = 0;
154 	return buf;
155 }
156 
157 /*
158  * return a fastfind stream handle for pattern
159  */
160 
161 Find_t*
162 findopen(const char* file, const char* pattern, const char* type, Finddisc_t* disc)
163 {
164 	register Find_t*	fp;
165 	register char*		p;
166 	register char*		s;
167 	register char*		b;
168 	register int		i;
169 	register int		j;
170 	char*			path;
171 	int			brace = 0;
172 	int			paren = 0;
173 	int			k;
174 	int			q;
175 	int			fd;
176 	int			uid;
177 	Vmalloc_t*		vm;
178 	Type_t*			tp;
179 	struct stat		st;
180 
181 
182 	if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
183 		goto nospace;
184 
185 	/*
186 	 * NOTE: searching for FIND_CODES would be much simpler if we
187 	 *       just stuck with our own, but we also support GNU
188 	 *	 locate codes and have to search for the one of a
189 	 *	 bazillion possible names for that file
190 	 */
191 
192 	if (!findcodes[1])
193 		findcodes[1] = getenv(FIND_CODES_ENV);
194 	if (disc->flags & FIND_GENERATE)
195 	{
196 		if (!(fp = (Find_t*)vmnewof(vm, 0, Find_t, 1, sizeof(Encode_t) - sizeof(Code_t))))
197 			goto nospace;
198 		fp->vm = vm;
199 		fp->id = lib;
200 		fp->disc = disc;
201 		fp->generate = 1;
202 		if (file && (!*file || streq(file, "-")))
203 			file = 0;
204 		uid = geteuid();
205 		j = (findcodes[0] = (char*)file) && *file == '/' ? 1 : elementsof(findcodes);
206 
207 		/*
208 		 * look for the codes file, but since it may not exist yet,
209 		 * also look for the containing directory if i<2 or if
210 		 * it is sufficiently qualified (FIND_MATCH)
211 		 */
212 
213 		for (i = 0; i < j; i++)
214 			if (path = findcodes[i])
215 			{
216 				if (*path == '/')
217 				{
218 					if (!stat(path, &st))
219 					{
220 						if (S_ISDIR(st.st_mode))
221 						{
222 							for (k = 0; k < elementsof(findnames); k++)
223 							{
224 								sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s/%s", path, findnames[k]);
225 								if (!eaccess(fp->encode.file, R_OK|W_OK))
226 								{
227 									path = fp->encode.file;
228 									break;
229 								}
230 								if (strchr(findnames[k], '/') && (b = strrchr(fp->encode.file, '/')))
231 								{
232 									*b = 0;
233 									if (!stat(fp->encode.file, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR))
234 									{
235 										*b = '/';
236 										path = fp->encode.file;
237 										break;
238 									}
239 								}
240 							}
241 							if (k < elementsof(findnames))
242 								break;
243 						}
244 						else if (st.st_uid == uid && (st.st_mode & S_IWUSR))
245 						{
246 							sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s", path);
247 							path = fp->encode.file;
248 							break;
249 						}
250 					}
251 					else if (i < 2 || strmatch(path, FIND_MATCH))
252 					{
253 						sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s", path);
254 						if (b = strrchr(fp->encode.file, '/'))
255 						{
256 							*b = 0;
257 							if (!stat(fp->encode.file, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR))
258 							{
259 								*b = '/';
260 								path = fp->encode.file;
261 								break;
262 							}
263 						}
264 					}
265 				}
266 				else if (pathpath(fp->encode.file, path, "", PATH_REGULAR|PATH_READ|PATH_WRITE))
267 				{
268 					path = fp->encode.file;
269 					break;
270 				}
271 				else if (b = strrchr(path, '/'))
272 				{
273 					sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%-.*s", b - path, path);
274 					if (pathpath(fp->encode.temp, fp->encode.file, "", PATH_EXECUTE|PATH_READ|PATH_WRITE) &&
275 					    !stat(fp->encode.temp, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR))
276 					{
277 						sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s%s", fp->encode.temp, b);
278 						path = fp->encode.file;
279 						break;
280 					}
281 				}
282 			}
283 		if (i >= j)
284 		{
285 			if (fp->disc->errorf)
286 				(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot locate codes", file ? file : findcodes[2]);
287 			goto drop;
288 		}
289 		if (fp->disc->flags & FIND_OLD)
290 		{
291 			/*
292 			 * FF_old generates temp data that is read
293 			 * in a second pass to generate the real codes
294 			 */
295 
296 			fp->method = FF_old;
297 			if (!(fp->fp = sftmp(32 * PATH_MAX)))
298 			{
299 				if (fp->disc->errorf)
300 					(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "cannot create tmp file");
301 				goto drop;
302 			}
303 		}
304 		else
305 		{
306 			/*
307 			 * the rest generate into a temp file that
308 			 * is simply renamed on completion
309 			 */
310 
311 			if (s = strrchr(path, '/'))
312 			{
313 				*s = 0;
314 				p = path;
315 			}
316 			else
317 				p = ".";
318 			if (!pathtemp(fp->encode.temp, sizeof(fp->encode.temp), p, "ff", &fd))
319 			{
320 				if (fp->disc->errorf)
321 					(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot create tmp file in this directory", p ? p : ".");
322 				goto drop;
323 			}
324 			if (s)
325 				*s = '/';
326 			if (!(fp->fp = sfnew(NiL, NiL, (size_t)SF_UNBOUND, fd, SF_WRITE)))
327 			{
328 				if (fp->disc->errorf)
329 					(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot open tmp file", fp->encode.temp);
330 				close(fd);
331 				goto drop;
332 			}
333 			if (fp->disc->flags & FIND_TYPE)
334 			{
335 				fp->method = FF_typ;
336 				fp->encode.namedisc.key = offsetof(Type_t, name);
337 				fp->encode.namedisc.link = offsetof(Type_t, byname);
338 				fp->encode.indexdisc.key = offsetof(Type_t, index);
339 				fp->encode.indexdisc.size = sizeof(unsigned long);
340 				fp->encode.indexdisc.link = offsetof(Type_t, byindex);
341 				s = "system/dir";
342 				if (!(fp->encode.namedict = dtopen(&fp->encode.namedisc, Dttree)) || !(fp->encode.indexdict = dtopen(&fp->encode.indexdisc, Dttree)) || !(tp = newof(0, Type_t, 1, strlen(s) + 1)))
343 				{
344 					if (fp->encode.namedict)
345 						dtclose(fp->encode.namedict);
346 					if (fp->disc->errorf)
347 						(*fp->disc->errorf)(fp, fp->disc, 2, "cannot allocate type table");
348 					goto drop;
349 				}
350 
351 				/*
352 				 * type index 1 is always system/dir
353 				 */
354 
355 				tp->index = ++fp->types;
356 				strcpy(tp->name, s);
357 				dtinsert(fp->encode.namedict, tp);
358 				dtinsert(fp->encode.indexdict, tp);
359 			}
360 			else if (fp->disc->flags & FIND_GNU)
361 			{
362 				fp->method = FF_gnu;
363 				sfputc(fp->fp, 0);
364 				sfputr(fp->fp, FF_gnu_magic, 0);
365 			}
366 			else
367 			{
368 				fp->method = FF_dir;
369 				sfputc(fp->fp, 0);
370 				sfputr(fp->fp, FF_dir_magic, 0);
371 			}
372 		}
373 	}
374 	else
375 	{
376 		i = sizeof(Decode_t) + sizeof(Code_t);
377 		if (!pattern || !*pattern)
378 			pattern = "*";
379 		i += (j = 2 * (strlen(pattern) + 1));
380 		if (!(fp = (Find_t*)vmnewof(vm, 0, Find_t, 1, i)))
381 		{
382 			vmclose(vm);
383 			return 0;
384 		}
385 		fp->vm = vm;
386 		fp->id = lib;
387 		fp->disc = disc;
388 		if (disc->flags & FIND_ICASE)
389 			fp->decode.ignorecase = 1;
390 		j = (findcodes[0] = (char*)file) && *file == '/' ? 1 : elementsof(findcodes);
391 		for (i = 0; i < j; i++)
392 			if (path = findcodes[i])
393 			{
394 				if (*path == '/')
395 				{
396 					if (!stat(path, &st))
397 					{
398 						if (S_ISDIR(st.st_mode))
399 						{
400 							for (k = 0; k < elementsof(findnames); k++)
401 							{
402 								sfsprintf(fp->decode.path, sizeof(fp->decode.path), "%s/%s", path, findnames[k]);
403 								if (fp->fp = sfopen(NiL, fp->decode.path, "r"))
404 								{
405 									path = fp->decode.path;
406 									break;
407 								}
408 							}
409 							if (fp->fp)
410 								break;
411 						}
412 						else if (fp->fp = sfopen(NiL, path, "r"))
413 							break;
414 					}
415 				}
416 				else if ((path = pathpath(fp->decode.path, path, "", PATH_REGULAR|PATH_READ)) && (fp->fp = sfopen(NiL, path, "r")))
417 					break;
418 			}
419 		if (!fp->fp)
420 		{
421 			if (fp->disc->errorf)
422 				(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot locate codes", file ? file : findcodes[2]);
423 			goto drop;
424 		}
425 		if (fstat(sffileno(fp->fp), &st))
426 		{
427 			if (fp->disc->errorf)
428 				(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot stat codes", path);
429 			goto drop;
430 		}
431 		if (fp->secure = ((st.st_mode & (S_IRGRP|S_IROTH)) == S_IRGRP) && st.st_gid == getegid() && getegid() != getgid())
432 			setgid(getgid());
433 		fp->stamp = st.st_mtime;
434 		b = (s = fp->decode.temp) + 1;
435 		for (i = 0; i < elementsof(fp->decode.bigram1); i++)
436 		{
437 			if ((j = sfgetc(fp->fp)) == EOF)
438 				goto invalid;
439 			if (!(*s++ = fp->decode.bigram1[i] = j) && i)
440 			{
441 				i = -i;
442 				break;
443 			}
444 			if ((j = sfgetc(fp->fp)) == EOF)
445 				goto invalid;
446 			if (!(*s++ = fp->decode.bigram2[i] = j) && (i || fp->decode.bigram1[0] >= '0' && fp->decode.bigram1[0] <= '1'))
447 				break;
448 		}
449 		if (streq(b, FF_typ_magic))
450 		{
451 			if (type)
452 			{
453 				type = (const char*)typefix(fp->decode.bigram2, sizeof(fp->decode.bigram2), type);
454 				memset(fp->decode.bigram1, 0, sizeof(fp->decode.bigram1));
455 			}
456 			fp->method = FF_typ;
457 			for (j = 0, i = 1;; i++)
458 			{
459 				if (!(s = sfgetr(fp->fp, 0, 0)))
460 					goto invalid;
461 				if (!*s)
462 					break;
463 				if (type && strmatch(s, type))
464 				{
465 					FF_SET_TYPE(fp, i);
466 					j++;
467 				}
468 			}
469 			if (type && !j)
470 				goto drop;
471 			fp->types = j;
472 		}
473 		else if (streq(b, FF_dir_magic))
474 			fp->method = FF_dir;
475 		else if (streq(b, FF_gnu_magic))
476 			fp->method = FF_gnu;
477 		else if (!*b && *--b >= '0' && *b <= '1')
478 		{
479 			fp->method = FF_gnu;
480 			while (j = sfgetc(fp->fp))
481 			{
482 				if (j == EOF || fp->decode.count >= sizeof(fp->decode.path))
483 					goto invalid;
484 				fp->decode.path[fp->decode.count++] = j;
485 			}
486 		}
487 		else
488 		{
489 			fp->method = FF_old;
490 			if (i < 0)
491 			{
492 				if ((j = sfgetc(fp->fp)) == EOF)
493 					goto invalid;
494 				fp->decode.bigram2[i = -i] = j;
495 			}
496 			while (++i < elementsof(fp->decode.bigram1))
497 			{
498 				if ((j = sfgetc(fp->fp)) == EOF)
499 					goto invalid;
500 				fp->decode.bigram1[i] = j;
501 				if ((j = sfgetc(fp->fp)) == EOF)
502 					goto invalid;
503 				fp->decode.bigram2[i] = j;
504 			}
505 			if ((fp->decode.peek = sfgetc(fp->fp)) != FF_OFF)
506 				goto invalid;
507 		}
508 
509 		/*
510 		 * set up the physical dir table
511 		 */
512 
513 		if (disc->version >= 19980301L)
514 		{
515 			fp->verifyf = disc->verifyf;
516 			if (disc->dirs && *disc->dirs)
517 			{
518 				for (k = 0; disc->dirs[k]; k++);
519 				if (k == 1 && streq(disc->dirs[0], "/"))
520 					k = 0;
521 				if (k)
522 				{
523 					if (!(fp->dirs = vmnewof(fp->vm, 0, char*, 2 * k + 1, 0)))
524 						goto drop;
525 					if (!(fp->lens = vmnewof(fp->vm, 0, int, 2 * k, 0)))
526 						goto drop;
527 					p = 0;
528 					b = fp->decode.temp;
529 					j = fp->method == FF_old || fp->method == FF_gnu;
530 
531 					/*
532 					 * fill the dir list with logical and
533 					 * physical names since we don't know
534 					 * which way the db was encoded (it
535 					 * could be *both* ways)
536 					 */
537 
538 					for (i = q = 0; i < k; i++)
539 					{
540 						if (*(s = disc->dirs[i]) == '/')
541 							sfsprintf(b, sizeof(fp->decode.temp) - 1, "%s", s);
542 						else if (!p && !(p = getcwd(fp->decode.path, sizeof(fp->decode.path))))
543 							goto nospace;
544 						else
545 							sfsprintf(b, sizeof(fp->decode.temp) - 1, "%s/%s", p, s);
546 						s = pathcanon(b, 0);
547 						*s = '/';
548 						*(s + 1) = 0;
549 						if (!(fp->dirs[q] = vmstrdup(fp->vm, b)))
550 							goto nospace;
551 						if (j)
552 							(fp->dirs[q])[s - b] = 0;
553 						q++;
554 						*s = 0;
555 						s = pathcanon(b, PATH_PHYSICAL);
556 						*s = '/';
557 						*(s + 1) = 0;
558 						if (!strneq(b, fp->dirs[q - 1], s - b))
559 						{
560 							if (!(fp->dirs[q] = vmstrdup(fp->vm, b)))
561 								goto nospace;
562 							if (j)
563 								(fp->dirs[q])[s - b] = 0;
564 							q++;
565 						}
566 					}
567 					strsort(fp->dirs, q, strcasecmp);
568 					for (i = 0; i < q; i++)
569 						fp->lens[i] = strlen(fp->dirs[i]);
570 				}
571 			}
572 		}
573 		if (fp->verifyf || (disc->flags & FIND_VERIFY))
574 		{
575 			if (fp->method != FF_dir && fp->method != FF_typ)
576 			{
577 				if (fp->disc->errorf)
578 					(*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s code format does not support directory verification", path, fp->method == FF_gnu ? FF_gnu_magic : "OLD-BIGRAM");
579 				goto drop;
580 			}
581 			fp->verify = 1;
582 		}
583 
584 		/*
585 		 * extract last glob-free subpattern in name for fast pre-match
586 		 * prepend 0 for backwards match
587 		 */
588 
589 		if (p = s = (char*)pattern)
590 		{
591 			b = fp->decode.pattern;
592 			for (;;)
593 			{
594 				switch (*b++ = *p++)
595 				{
596 				case 0:
597 					break;
598 				case '\\':
599 					s = p;
600 					if (!*p++)
601 						break;
602 					continue;
603 				case '[':
604 					if (!brace)
605 					{
606 						brace++;
607 						if (*p == ']')
608 							p++;
609 					}
610 					continue;
611 				case ']':
612 					if (brace)
613 					{
614 						brace--;
615 						s = p;
616 					}
617 					continue;
618 				case '(':
619 					if (!brace)
620 						paren++;
621 					continue;
622 				case ')':
623 					if (!brace && paren > 0 && !--paren)
624 						s = p;
625 					continue;
626 				case '|':
627 				case '&':
628 					if (!brace && !paren)
629 					{
630 						s = "";
631 						break;
632 					}
633 					continue;
634 				case '*':
635 				case '?':
636 					s = p;
637 					continue;
638 				default:
639 					continue;
640 				}
641 				break;
642 			}
643 			if (s != pattern && !streq(pattern, "*"))
644 			{
645 				fp->decode.match = 1;
646 				if (i = regcomp(&fp->decode.re, pattern, REG_SHELL|REG_AUGMENTED|(fp->decode.ignorecase?REG_ICASE:0)))
647 				{
648 					if (disc->errorf)
649 					{
650 						regerror(i, &fp->decode.re, fp->decode.temp, sizeof(fp->decode.temp));
651 						(*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s", pattern, fp->decode.temp);
652 					}
653 					goto drop;
654 				}
655 			}
656 			if (*s)
657 			{
658 				*b++ = 0;
659 				while (i = *s++)
660 					*b++ = i;
661 				*b-- = 0;
662 				fp->decode.end = b;
663 				if (fp->decode.ignorecase)
664 					for (s = fp->decode.pattern; s <= b; s++)
665 						if (isupper(*s))
666 							*s = tolower(*s);
667 			}
668 		}
669 	}
670 	return fp;
671  nospace:
672 	if (disc->errorf)
673 		(*fp->disc->errorf)(fp, fp->disc, 2, "out of space");
674 	if (!vm)
675 		return 0;
676 	if (!fp)
677 	{
678 		vmclose(vm);
679 		return 0;
680 	}
681 	goto drop;
682  invalid:
683 	if (fp->disc->errorf)
684 		(*fp->disc->errorf)(fp, fp->disc, 2, "%s: invalid codes", path);
685  drop:
686 	if (!fp->generate && fp->decode.match)
687 		regfree(&fp->decode.re);
688 	if (fp->fp)
689 		sfclose(fp->fp);
690 	vmclose(fp->vm);
691 	return 0;
692 }
693 
694 /*
695  * return the next fastfind path
696  * 0 returned when list exhausted
697  */
698 
699 char*
700 findread(register Find_t* fp)
701 {
702 	register char*		p;
703 	register char*		q;
704 	register char*		s;
705 	register char*		b;
706 	register char*		e;
707 	register int		c;
708 	register int		n;
709 	register int		m;
710 	int			ignorecase;
711 	int			t;
712 	unsigned char		w[4];
713 	struct stat		st;
714 
715 	if (fp->generate)
716 		return 0;
717 	if (fp->decode.restore)
718 	{
719 		*fp->decode.restore = '/';
720 		fp->decode.restore = 0;
721 	}
722 	ignorecase = fp->decode.ignorecase ? STR_ICASE : 0;
723 	c = fp->decode.peek;
724  next:
725 	for (;;)
726 	{
727 		switch (fp->method)
728 		{
729 		case FF_dir:
730 			t = 0;
731 			n = sfgetl(fp->fp);
732 			goto grab;
733 		case FF_gnu:
734 			if ((c = sfgetc(fp->fp)) == EOF)
735 				return 0;
736 			if (c == 0x80)
737 			{
738 				if ((c = sfgetc(fp->fp)) == EOF)
739 					return 0;
740 				n = c << 8;
741 				if ((c = sfgetc(fp->fp)) == EOF)
742 					return 0;
743 				n |= c;
744 				if (n & 0x8000)
745 					n = (n - 0xffff) - 1;
746 			}
747 			else if ((n = c) & 0x80)
748 				n = (n - 0xff) - 1;
749 			t = 0;
750 			goto grab;
751 		case FF_typ:
752 			t = sfgetu(fp->fp);
753 			n = sfgetl(fp->fp);
754 		grab:
755 			p = fp->decode.path + (fp->decode.count += n);
756 			do
757 			{
758 				if ((c = sfgetc(fp->fp)) == EOF)
759 					return 0;
760 			} while (*p++ = c);
761 			p -= 2;
762 			break;
763 		case FF_old:
764 			if (c == EOF)
765 			{
766 				fp->decode.peek = c;
767 				return 0;
768 			}
769 			if (c == FF_ESC)
770 			{
771 				if (sfread(fp->fp, w, sizeof(w)) != sizeof(w))
772 					return 0;
773 				if (fp->decode.swap >= 0)
774 				{
775 					c = (int32_t)((w[0] << 24) | (w[1] << 16) | (w[2] << 8) | w[3]);
776 					if (!fp->decode.swap)
777 					{
778 						/*
779 						 * the old format uses machine
780 						 * byte order; this test uses
781 						 * the smallest magnitude of
782 						 * both byte orders on the
783 						 * first encoded path motion
784 						 * to determine the original
785 						 * byte order
786 						 */
787 
788 						m = c;
789 						if (m < 0)
790 							m = -m;
791 						n = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]);
792 						if (n < 0)
793 							n = -n;
794 						if (m < n)
795 							fp->decode.swap = 1;
796 						else
797 						{
798 							fp->decode.swap = -1;
799 							c = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]);
800 						}
801 					}
802 				}
803 				else
804 					c = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]);
805 			}
806 			fp->decode.count += c - FF_OFF;
807 			for (p = fp->decode.path + fp->decode.count; (c = sfgetc(fp->fp)) > FF_ESC;)
808 				if (c & (1<<(CHAR_BIT-1)))
809 				{
810 					*p++ = fp->decode.bigram1[c & ((1<<(CHAR_BIT-1))-1)];
811 					*p++ = fp->decode.bigram2[c & ((1<<(CHAR_BIT-1))-1)];
812 				}
813 				else
814 					*p++ = c;
815 			*p-- = 0;
816 			t = 0;
817 			break;
818 		}
819 		b = fp->decode.path;
820 		if (fp->decode.found)
821 			fp->decode.found = 0;
822 		else
823 			b += fp->decode.count;
824 		if (fp->dirs)
825 			for (;;)
826 			{
827 				if (!*fp->dirs)
828 					return 0;
829 
830 				/*
831 				 * use the ordering and lengths to prune
832 				 * comparison function calls
833 				 * (*fp->dirs)[*fp->lens]=='/' if its
834 				 * already been matched
835 				 */
836 
837 				if ((n = p - fp->decode.path + 1) > (m = *fp->lens))
838 				{
839 					if (!(*fp->dirs)[m])
840 						goto next;
841 					if (!strncasecmp(*fp->dirs, fp->decode.path, m))
842 						break;
843 				}
844 				else if (n == m)
845 				{
846 					if (!(*fp->dirs)[m])
847 					{
848 						if (!(n = strcasecmp(*fp->dirs, fp->decode.path)) && (ignorecase || !strcmp(*fp->dirs, fp->decode.path)))
849 						{
850 							if (m > 0)
851 							{
852 								(*fp->dirs)[m] = '/';
853 								if ((*fp->dirs)[m - 1] != '/')
854 									(*fp->dirs)[++(*fp->lens)] = '/';
855 							}
856 							break;
857 						}
858 						if (n >= 0)
859 							goto next;
860 					}
861 				}
862 				else if (!(*fp->dirs)[m])
863 					goto next;
864 				fp->dirs++;
865 				fp->lens++;
866 			}
867 		if (fp->verify && (*p == '/' || t == 1))
868 		{
869 			if ((n = p - fp->decode.path))
870 				*p = 0;
871 			else
872 				n = 1;
873 			if (fp->verifyf)
874 				n = (*fp->verifyf)(fp, fp->decode.path, n, fp->disc);
875 			else if (stat(fp->decode.path, &st))
876 				n = -1;
877 			else if ((unsigned long)st.st_mtime > fp->stamp)
878 				n = 1;
879 			else
880 				n = 0;
881 			*p = '/';
882 
883 			/*
884 			 * n<0	skip this subtree
885 			 * n==0 keep as is
886 			 * n>0	read this dir now
887 			 */
888 
889 			/* NOT IMPLEMENTED YET */
890 		}
891 		if (FF_OK_TYPE(fp, t))
892 		{
893 			if (fp->decode.end)
894 			{
895 				if (*(s = p) == '/')
896 					s--;
897 				if (*fp->decode.pattern == '/' && b > fp->decode.path)
898 					b--;
899 				for (; s >= b; s--)
900 					if (*s == *fp->decode.end || ignorecase && tolower(*s) == *fp->decode.end)
901 					{
902 						if (ignorecase)
903 							for (e = fp->decode.end - 1, q = s - 1; *e && (*q == *e || tolower(*q) == *e); e--, q--);
904 						else
905 							for (e = fp->decode.end - 1, q = s - 1; *e && *q == *e; e--, q--);
906 						if (!*e)
907 						{
908 							fp->decode.found = 1;
909 							if (!fp->decode.match || strgrpmatch(fp->decode.path, fp->decode.pattern, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT|ignorecase))
910 							{
911 								fp->decode.peek = c;
912 								if (*p == '/')
913 									*(fp->decode.restore = p) = 0;
914 								if (!fp->secure || !access(fp->decode.path, F_OK))
915 									return fp->decode.path;
916 							}
917 							break;
918 						}
919 					}
920 			}
921 			else if (!fp->decode.match || !(n = regexec(&fp->decode.re, fp->decode.path, 0, NiL, 0)))
922 			{
923 				fp->decode.peek = c;
924 				if (*p == '/' && p > fp->decode.path)
925 					*(fp->decode.restore = p) = 0;
926 				if (!fp->secure || !access(fp->decode.path, F_OK))
927 					return fp->decode.path;
928 			}
929 			else if (n != REG_NOMATCH)
930 			{
931 				if (fp->disc->errorf)
932 				{
933 					regerror(n, &fp->decode.re, fp->decode.temp, sizeof(fp->decode.temp));
934 					(*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s", fp->decode.pattern, fp->decode.temp);
935 				}
936 				return 0;
937 			}
938 		}
939 	}
940 }
941 
942 /*
943  * add path to the code table
944  * paths are assumed to be in sort order
945  */
946 
947 int
948 findwrite(register Find_t* fp, const char* path, size_t len, const char* type)
949 {
950 	register unsigned char*	s;
951 	register unsigned char*	e;
952 	register unsigned char*	p;
953 	register int		n;
954 	register int		d;
955 	register Type_t*	x;
956 	register unsigned long	u;
957 
958 	if (!fp->generate)
959 		return -1;
960 	if (type && fp->method == FF_dir)
961 	{
962 		len = sfsprintf(fp->encode.mark, sizeof(fp->encode.mark), "%-.*s/", len, path);
963 		path = fp->encode.mark;
964 	}
965 	s = (unsigned char*)path;
966 	if (len <= 0)
967 		len = strlen(path);
968 	if (len < sizeof(fp->encode.path))
969 		e = s + len++;
970 	else
971 	{
972 		len = sizeof(fp->encode.path) - 1;
973 		e = s + len;
974 	}
975 	p = (unsigned char*)fp->encode.path;
976 	while (s < e)
977 	{
978 		if (*s != *p++)
979 			break;
980 		s++;
981 	}
982 	n = s - (unsigned char*)path;
983 	switch (fp->method)
984 	{
985 	case FF_gnu:
986 		d = n - fp->encode.prefix;
987 		if (d >= -127 && d <= 127)
988 			sfputc(fp->fp, d & 0xff);
989 		else
990 		{
991 			sfputc(fp->fp, 0x80);
992 			sfputc(fp->fp, (d >> 8) & 0xff);
993 			sfputc(fp->fp, d & 0xff);
994 		}
995 		fp->encode.prefix = n;
996 		sfputr(fp->fp, (char*)s, 0);
997 		break;
998 	case FF_old:
999 		sfprintf(fp->fp, "%ld", n - fp->encode.prefix + FF_OFF);
1000 		fp->encode.prefix = n;
1001 		sfputc(fp->fp, ' ');
1002 		p = s;
1003 		while (s < e)
1004 		{
1005 			n = *s++;
1006 			if (s >= e)
1007 				break;
1008 			fp->encode.code[n][*s++]++;
1009 		}
1010 		while (p < e)
1011 		{
1012 			if ((n = *p++) < FF_MIN || n >= FF_MAX)
1013 				n = '?';
1014 			sfputc(fp->fp, n);
1015 		}
1016 		sfputc(fp->fp, 0);
1017 		break;
1018 	case FF_typ:
1019 		if (type)
1020 		{
1021 			type = (const char*)typefix((char*)fp->encode.bigram, sizeof(fp->encode.bigram), type);
1022 			if (x = (Type_t*)dtmatch(fp->encode.namedict, type))
1023 				u = x->index;
1024 			else if (!(x = newof(0, Type_t, 1, strlen(type) + 1)))
1025 				u = 0;
1026 			else
1027 			{
1028 				u = x->index = ++fp->types;
1029 				strcpy(x->name, type);
1030 				dtinsert(fp->encode.namedict, x);
1031 				dtinsert(fp->encode.indexdict, x);
1032 			}
1033 		}
1034 		else
1035 			u = 0;
1036 		sfputu(fp->fp, u);
1037 		/*FALLTHROUGH...*/
1038 	case FF_dir:
1039 		d = n - fp->encode.prefix;
1040 		sfputl(fp->fp, d);
1041 		fp->encode.prefix = n;
1042 		sfputr(fp->fp, (char*)s, 0);
1043 		break;
1044 	}
1045 	memcpy(fp->encode.path, path, len);
1046 	return 0;
1047 }
1048 
1049 /*
1050  * findsync() helper
1051  */
1052 
1053 static int
1054 finddone(register Find_t* fp)
1055 {
1056 	int	r;
1057 
1058 	if (sfsync(fp->fp))
1059 	{
1060 		if (fp->disc->errorf)
1061 			(*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sfsync]", fp->encode.file);
1062 		return -1;
1063 	}
1064 	if (sferror(fp->fp))
1065 	{
1066 		if (fp->disc->errorf)
1067 			(*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sferror]", fp->encode.file);
1068 		return -1;
1069 	}
1070 	r = sfclose(fp->fp);
1071 	fp->fp = 0;
1072 	if (r)
1073 	{
1074 		if (fp->disc->errorf)
1075 			(*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sfclose]", fp->encode.file);
1076 		return -1;
1077 	}
1078 	return 0;
1079 }
1080 
1081 /*
1082  * finish the code table
1083  */
1084 
1085 static int
1086 findsync(register Find_t* fp)
1087 {
1088 	register char*		s;
1089 	register int		n;
1090 	register int		m;
1091 	register int		d;
1092 	register Type_t*	x;
1093 	char*			t;
1094 	int			b;
1095 	long			z;
1096 	Sfio_t*			sp;
1097 
1098 	switch (fp->method)
1099 	{
1100 	case FF_dir:
1101 	case FF_gnu:
1102 		/*
1103 		 * replace the real file with the temp file
1104 		 */
1105 
1106 		if (finddone(fp))
1107 			goto bad;
1108 		remove(fp->encode.file);
1109 		if (rename(fp->encode.temp, fp->encode.file))
1110 		{
1111 			if (fp->disc->errorf)
1112 				(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot rename from tmp file %s", fp->encode.file, fp->encode.temp);
1113 			remove(fp->encode.temp);
1114 			return -1;
1115 		}
1116 		break;
1117 	case FF_old:
1118 		/*
1119 		 * determine the top FF_MAX bigrams
1120 		 */
1121 
1122 		for (n = 0; n < FF_MAX; n++)
1123 			for (m = 0; m < FF_MAX; m++)
1124 				fp->encode.hits[fp->encode.code[n][m]]++;
1125 		fp->encode.hits[0] = 0;
1126 		m = 1;
1127 		for (n = USHRT_MAX; n >= 0; n--)
1128 			if (d = fp->encode.hits[n])
1129 			{
1130 				fp->encode.hits[n] = m;
1131 				if ((m += d) > FF_MAX)
1132 					break;
1133 			}
1134 		while (--n >= 0)
1135 			fp->encode.hits[n] = 0;
1136 		for (n = FF_MAX - 1; n >= 0; n--)
1137 			for (m = FF_MAX - 1; m >= 0; m--)
1138 				if (fp->encode.hits[fp->encode.code[n][m]])
1139 				{
1140 					d = fp->encode.code[n][m];
1141 					b = fp->encode.hits[d] - 1;
1142 					fp->encode.code[n][m] = b + FF_MAX;
1143 					if (fp->encode.hits[d]++ >= FF_MAX)
1144 						fp->encode.hits[d] = 0;
1145 					fp->encode.bigram[b *= 2] = n;
1146 					fp->encode.bigram[b + 1] = m;
1147 				}
1148 				else
1149 					fp->encode.code[n][m] = 0;
1150 
1151 		/*
1152 		 * commit the real file
1153 		 */
1154 
1155 		if (sfseek(fp->fp, (Sfoff_t)0, SEEK_SET))
1156 		{
1157 			if (fp->disc->errorf)
1158 				(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "cannot rewind tmp file");
1159 			return -1;
1160 		}
1161 		if (!(sp = sfopen(NiL, fp->encode.file, "w")))
1162 			goto badcreate;
1163 
1164 		/*
1165 		 * dump the bigrams
1166 		 */
1167 
1168 		sfwrite(sp, fp->encode.bigram, sizeof(fp->encode.bigram));
1169 
1170 		/*
1171 		 * encode the massaged paths
1172 		 */
1173 
1174 		while (s = sfgetr(fp->fp, 0, 0))
1175 		{
1176 			z = strtol(s, &t, 0);
1177 			s = t;
1178 			if (z < 0 || z > 2 * FF_OFF)
1179 			{
1180 				sfputc(sp, FF_ESC);
1181 				sfputc(sp, (z >> 24));
1182 				sfputc(sp, (z >> 16));
1183 				sfputc(sp, (z >> 8));
1184 				sfputc(sp, z);
1185 			}
1186 			else
1187 				sfputc(sp, z);
1188 			while (n = *s++)
1189 			{
1190 				if (!(m = *s++))
1191 				{
1192 					sfputc(sp, n);
1193 					break;
1194 				}
1195 				if (d = fp->encode.code[n][m])
1196 					sfputc(sp, d);
1197 				else
1198 				{
1199 					sfputc(sp, n);
1200 					sfputc(sp, m);
1201 				}
1202 			}
1203 		}
1204 		sfclose(fp->fp);
1205 		fp->fp = sp;
1206 		if (finddone(fp))
1207 			goto bad;
1208 		break;
1209 	case FF_typ:
1210 		if (finddone(fp))
1211 			goto bad;
1212 		if (!(fp->fp = sfopen(NiL, fp->encode.temp, "r")))
1213 		{
1214 			if (fp->disc->errorf)
1215 				(*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot read tmp file", fp->encode.temp);
1216 			remove(fp->encode.temp);
1217 			return -1;
1218 		}
1219 
1220 		/*
1221 		 * commit the output file
1222 		 */
1223 
1224 		if (!(sp = sfopen(NiL, fp->encode.file, "w")))
1225 			goto badcreate;
1226 
1227 		/*
1228 		 * write the header magic
1229 		 */
1230 
1231 		sfputc(sp, 0);
1232 		sfputr(sp, FF_typ_magic, 0);
1233 
1234 		/*
1235 		 * write the type table in index order starting with 1
1236 		 */
1237 
1238 		for (x = (Type_t*)dtfirst(fp->encode.indexdict); x; x = (Type_t*)dtnext(fp->encode.indexdict, x))
1239 			sfputr(sp, x->name, 0);
1240 		sfputc(sp, 0);
1241 
1242 		/*
1243 		 * append the front compressed strings
1244 		 */
1245 
1246 		if (sfmove(fp->fp, sp, SF_UNBOUND, -1) < 0 || !sfeof(fp->fp))
1247 		{
1248 			sfclose(sp);
1249 			if (fp->disc->errorf)
1250 				(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot append codes", fp->encode.file);
1251 			goto bad;
1252 		}
1253 		sfclose(fp->fp);
1254 		fp->fp = sp;
1255 		if (finddone(fp))
1256 			goto bad;
1257 		remove(fp->encode.temp);
1258 		break;
1259 	}
1260 	return 0;
1261  badcreate:
1262 	if (fp->disc->errorf)
1263 		(*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot write codes", fp->encode.file);
1264  bad:
1265 	if (fp->fp)
1266 	{
1267 		sfclose(fp->fp);
1268 		fp->fp = 0;
1269 	}
1270 	remove(fp->encode.temp);
1271 	return -1;
1272 }
1273 
1274 /*
1275  * close an open fastfind stream
1276  */
1277 
1278 int
1279 findclose(register Find_t* fp)
1280 {
1281 	int	n = 0;
1282 
1283 	if (!fp)
1284 		return -1;
1285 	if (fp->generate)
1286 	{
1287 		n = findsync(fp);
1288 		if (fp->encode.indexdict)
1289 			dtclose(fp->encode.indexdict);
1290 		if (fp->encode.namedict)
1291 			dtclose(fp->encode.namedict);
1292 	}
1293 	else
1294 	{
1295 		if (fp->decode.match)
1296 			regfree(&fp->decode.re);
1297 		n = 0;
1298 	}
1299 	if (fp->fp)
1300 		sfclose(fp->fp);
1301 	vmclose(fp->vm);
1302 	return n;
1303 }
1304