1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2010 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * Phong Vo <kpv@research.att.com> *
20 * *
21 ***********************************************************************/
22 #pragma prototyped
23 /*
24 * original code
25 *
26 * James A. Woods, Informatics General Corporation,
27 * NASA Ames Research Center, 6/81.
28 * Usenix ;login:, February/March, 1983, p. 8.
29 *
30 * discipline/method interface
31 *
32 * Glenn Fowler
33 * AT&T Research
34 * modified from the original BSD source
35 *
36 * 'fastfind' scans a file list for the full pathname of a file
37 * given only a piece of the name. The list is processed with
38 * with "front-compression" and bigram coding. Front compression reduces
39 * space by a factor of 4-5, bigram coding by a further 20-25%.
40 *
41 * there are 4 methods:
42 *
43 * FF_old original with 7 bit bigram encoding (no magic)
44 * FF_gnu 8 bit clean front compression (FF_gnu_magic)
45 * FF_dir FF_gnu with sfgetl/sfputl and trailing / on dirs (FF_dir_magic)
46 * FF_typ FF_dir with (mime) types (FF_typ_magic)
47 *
48 * the bigram encoding steals the eighth bit (that's why its FF_old)
49 * maybe one day we'll limit it to readonly:
50 *
51 * 0-2*FF_OFF likeliest differential counts + offset to make nonnegative
52 * FF_ESC 4 byte big-endian out-of-range count+FF_OFF follows
53 * FF_MIN-FF_MAX ascii residue
54 * >=FF_MAX bigram codes
55 *
56 * a two-tiered string search technique is employed
57 *
58 * a metacharacter-free subpattern and partial pathname is matched
59 * backwards to avoid full expansion of the pathname list
60 *
61 * then the actual shell glob-style regular expression (if in this form)
62 * is matched against the candidate pathnames using the slower regexec()
63 *
64 * The original BSD code is covered by the BSD license:
65 *
66 * Copyright (c) 1985, 1993, 1999
67 * The Regents of the University of California. All rights reserved.
68 *
69 * Redistribution and use in source and binary forms, with or without
70 * modification, are permitted provided that the following conditions
71 * are met:
72 * 1. Redistributions of source code must retain the above copyright
73 * notice, this list of conditions and the following disclaimer.
74 * 2. Redistributions in binary form must reproduce the above copyright
75 * notice, this list of conditions and the following disclaimer in the
76 * documentation and/or other materials provided with the distribution.
77 * 3. Neither the name of the University nor the names of its contributors
78 * may be used to endorse or promote products derived from this software
79 * without specific prior written permission.
80 *
81 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
82 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
83 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
84 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
85 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
86 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
87 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
88 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
89 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
90 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
91 * SUCH DAMAGE.
92 */
93
94 static const char id[] = "\n@(#)$Id: fastfind (AT&T Research) 2002-10-02 $\0\n";
95
96 static const char lib[] = "libast:fastfind";
97
98 #include "findlib.h"
99
100 #define FIND_MATCH "*/(find|locate)/*"
101
102 /*
103 * this db could be anywhere
104 * findcodes[] directories are checked for findnames[i]
105 */
106
107 static char* findcodes[] =
108 {
109 0,
110 0,
111 FIND_CODES,
112 "/usr/local/share/lib",
113 "/usr/local/lib",
114 "/usr/share/lib",
115 "/usr/lib",
116 "/var/spool",
117 "/usr/local/var",
118 "/var/lib",
119 "/var/lib/slocate",
120 "/var/db",
121 };
122
123 static char* findnames[] =
124 {
125 "find/codes",
126 "find/find.codes",
127 "locate/locatedb",
128 "locatedb",
129 "locate.database",
130 "slocate.db",
131 };
132
133 /*
134 * convert t to lower case and drop leading x- and x- after /
135 * converted value copied to b of size n
136 */
137
138 char*
typefix(char * buf,size_t n,register const char * t)139 typefix(char* buf, size_t n, register const char* t)
140 {
141 register int c;
142 register char* b = buf;
143
144 if ((*t == 'x' || *t == 'X') && *(t + 1) == '-')
145 t += 2;
146 while (c = *t++)
147 {
148 if (isupper(c))
149 c = tolower(c);
150 if ((*b++ = c) == '/' && (*t == 'x' || *t == 'X') && *(t + 1) == '-')
151 t += 2;
152 }
153 *b = 0;
154 return buf;
155 }
156
157 /*
158 * return a fastfind stream handle for pattern
159 */
160
161 Find_t*
findopen(const char * file,const char * pattern,const char * type,Finddisc_t * disc)162 findopen(const char* file, const char* pattern, const char* type, Finddisc_t* disc)
163 {
164 register Find_t* fp;
165 register char* p;
166 register char* s;
167 register char* b;
168 register int i;
169 register int j;
170 char* path;
171 int brace = 0;
172 int paren = 0;
173 int k;
174 int q;
175 int fd;
176 int uid;
177 Vmalloc_t* vm;
178 Type_t* tp;
179 struct stat st;
180
181
182 if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
183 goto nospace;
184
185 /*
186 * NOTE: searching for FIND_CODES would be much simpler if we
187 * just stuck with our own, but we also support GNU
188 * locate codes and have to search for the one of a
189 * bazillion possible names for that file
190 */
191
192 if (!findcodes[1])
193 findcodes[1] = getenv(FIND_CODES_ENV);
194 if (disc->flags & FIND_GENERATE)
195 {
196 if (!(fp = (Find_t*)vmnewof(vm, 0, Find_t, 1, sizeof(Encode_t) - sizeof(Code_t))))
197 goto nospace;
198 fp->vm = vm;
199 fp->id = lib;
200 fp->disc = disc;
201 fp->generate = 1;
202 if (file && (!*file || streq(file, "-")))
203 file = 0;
204 uid = geteuid();
205 j = (findcodes[0] = (char*)file) && *file == '/' ? 1 : elementsof(findcodes);
206
207 /*
208 * look for the codes file, but since it may not exist yet,
209 * also look for the containing directory if i<2 or if
210 * it is sufficiently qualified (FIND_MATCH)
211 */
212
213 for (i = 0; i < j; i++)
214 if (path = findcodes[i])
215 {
216 if (*path == '/')
217 {
218 if (!stat(path, &st))
219 {
220 if (S_ISDIR(st.st_mode))
221 {
222 for (k = 0; k < elementsof(findnames); k++)
223 {
224 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s/%s", path, findnames[k]);
225 if (!eaccess(fp->encode.file, R_OK|W_OK))
226 {
227 path = fp->encode.file;
228 break;
229 }
230 if (strchr(findnames[k], '/') && (b = strrchr(fp->encode.file, '/')))
231 {
232 *b = 0;
233 if (!stat(fp->encode.file, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR))
234 {
235 *b = '/';
236 path = fp->encode.file;
237 break;
238 }
239 }
240 }
241 if (k < elementsof(findnames))
242 break;
243 }
244 else if (st.st_uid == uid && (st.st_mode & S_IWUSR))
245 {
246 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s", path);
247 path = fp->encode.file;
248 break;
249 }
250 }
251 else if (i < 2 || strmatch(path, FIND_MATCH))
252 {
253 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s", path);
254 if (b = strrchr(fp->encode.file, '/'))
255 {
256 *b = 0;
257 if (!stat(fp->encode.file, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR))
258 {
259 *b = '/';
260 path = fp->encode.file;
261 break;
262 }
263 }
264 }
265 }
266 else if (pathpath(fp->encode.file, path, "", PATH_REGULAR|PATH_READ|PATH_WRITE))
267 {
268 path = fp->encode.file;
269 break;
270 }
271 else if (b = strrchr(path, '/'))
272 {
273 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%-.*s", b - path, path);
274 if (pathpath(fp->encode.temp, fp->encode.file, "", PATH_EXECUTE|PATH_READ|PATH_WRITE) &&
275 !stat(fp->encode.temp, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR))
276 {
277 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s%s", fp->encode.temp, b);
278 path = fp->encode.file;
279 break;
280 }
281 }
282 }
283 if (i >= j)
284 {
285 if (fp->disc->errorf)
286 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot locate codes", file ? file : findcodes[2]);
287 goto drop;
288 }
289 if (fp->disc->flags & FIND_OLD)
290 {
291 /*
292 * FF_old generates temp data that is read
293 * in a second pass to generate the real codes
294 */
295
296 fp->method = FF_old;
297 if (!(fp->fp = sftmp(32 * PATH_MAX)))
298 {
299 if (fp->disc->errorf)
300 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "cannot create tmp file");
301 goto drop;
302 }
303 }
304 else
305 {
306 /*
307 * the rest generate into a temp file that
308 * is simply renamed on completion
309 */
310
311 if (s = strrchr(path, '/'))
312 {
313 *s = 0;
314 p = path;
315 }
316 else
317 p = ".";
318 if (!pathtemp(fp->encode.temp, sizeof(fp->encode.temp), p, "ff", &fd))
319 {
320 if (fp->disc->errorf)
321 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot create tmp file in this directory", p ? p : ".");
322 goto drop;
323 }
324 if (s)
325 *s = '/';
326 if (!(fp->fp = sfnew(NiL, NiL, (size_t)SF_UNBOUND, fd, SF_WRITE)))
327 {
328 if (fp->disc->errorf)
329 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot open tmp file", fp->encode.temp);
330 close(fd);
331 goto drop;
332 }
333 if (fp->disc->flags & FIND_TYPE)
334 {
335 fp->method = FF_typ;
336 fp->encode.namedisc.key = offsetof(Type_t, name);
337 fp->encode.namedisc.link = offsetof(Type_t, byname);
338 fp->encode.indexdisc.key = offsetof(Type_t, index);
339 fp->encode.indexdisc.size = sizeof(unsigned long);
340 fp->encode.indexdisc.link = offsetof(Type_t, byindex);
341 s = "system/dir";
342 if (!(fp->encode.namedict = dtopen(&fp->encode.namedisc, Dttree)) || !(fp->encode.indexdict = dtopen(&fp->encode.indexdisc, Dttree)) || !(tp = newof(0, Type_t, 1, strlen(s) + 1)))
343 {
344 if (fp->encode.namedict)
345 dtclose(fp->encode.namedict);
346 if (fp->disc->errorf)
347 (*fp->disc->errorf)(fp, fp->disc, 2, "cannot allocate type table");
348 goto drop;
349 }
350
351 /*
352 * type index 1 is always system/dir
353 */
354
355 tp->index = ++fp->types;
356 strcpy(tp->name, s);
357 dtinsert(fp->encode.namedict, tp);
358 dtinsert(fp->encode.indexdict, tp);
359 }
360 else if (fp->disc->flags & FIND_GNU)
361 {
362 fp->method = FF_gnu;
363 sfputc(fp->fp, 0);
364 sfputr(fp->fp, FF_gnu_magic, 0);
365 }
366 else
367 {
368 fp->method = FF_dir;
369 sfputc(fp->fp, 0);
370 sfputr(fp->fp, FF_dir_magic, 0);
371 }
372 }
373 }
374 else
375 {
376 i = sizeof(Decode_t) + sizeof(Code_t);
377 if (!pattern || !*pattern)
378 pattern = "*";
379 i += (j = 2 * (strlen(pattern) + 1));
380 if (!(fp = (Find_t*)vmnewof(vm, 0, Find_t, 1, i)))
381 {
382 vmclose(vm);
383 return 0;
384 }
385 fp->vm = vm;
386 fp->id = lib;
387 fp->disc = disc;
388 if (disc->flags & FIND_ICASE)
389 fp->decode.ignorecase = 1;
390 j = (findcodes[0] = (char*)file) && *file == '/' ? 1 : elementsof(findcodes);
391 for (i = 0; i < j; i++)
392 if (path = findcodes[i])
393 {
394 if (*path == '/')
395 {
396 if (!stat(path, &st))
397 {
398 if (S_ISDIR(st.st_mode))
399 {
400 for (k = 0; k < elementsof(findnames); k++)
401 {
402 sfsprintf(fp->decode.path, sizeof(fp->decode.path), "%s/%s", path, findnames[k]);
403 if (fp->fp = sfopen(NiL, fp->decode.path, "r"))
404 {
405 path = fp->decode.path;
406 break;
407 }
408 }
409 if (fp->fp)
410 break;
411 }
412 else if (fp->fp = sfopen(NiL, path, "r"))
413 break;
414 }
415 }
416 else if ((path = pathpath(fp->decode.path, path, "", PATH_REGULAR|PATH_READ)) && (fp->fp = sfopen(NiL, path, "r")))
417 break;
418 }
419 if (!fp->fp)
420 {
421 if (fp->disc->errorf)
422 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot locate codes", file ? file : findcodes[2]);
423 goto drop;
424 }
425 if (fstat(sffileno(fp->fp), &st))
426 {
427 if (fp->disc->errorf)
428 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot stat codes", path);
429 goto drop;
430 }
431 if (fp->secure = ((st.st_mode & (S_IRGRP|S_IROTH)) == S_IRGRP) && st.st_gid == getegid() && getegid() != getgid())
432 setgid(getgid());
433 fp->stamp = st.st_mtime;
434 b = (s = fp->decode.temp) + 1;
435 for (i = 0; i < elementsof(fp->decode.bigram1); i++)
436 {
437 if ((j = sfgetc(fp->fp)) == EOF)
438 goto invalid;
439 if (!(*s++ = fp->decode.bigram1[i] = j) && i)
440 {
441 i = -i;
442 break;
443 }
444 if ((j = sfgetc(fp->fp)) == EOF)
445 goto invalid;
446 if (!(*s++ = fp->decode.bigram2[i] = j) && (i || fp->decode.bigram1[0] >= '0' && fp->decode.bigram1[0] <= '1'))
447 break;
448 }
449 if (streq(b, FF_typ_magic))
450 {
451 if (type)
452 {
453 type = (const char*)typefix(fp->decode.bigram2, sizeof(fp->decode.bigram2), type);
454 memset(fp->decode.bigram1, 0, sizeof(fp->decode.bigram1));
455 }
456 fp->method = FF_typ;
457 for (j = 0, i = 1;; i++)
458 {
459 if (!(s = sfgetr(fp->fp, 0, 0)))
460 goto invalid;
461 if (!*s)
462 break;
463 if (type && strmatch(s, type))
464 {
465 FF_SET_TYPE(fp, i);
466 j++;
467 }
468 }
469 if (type && !j)
470 goto drop;
471 fp->types = j;
472 }
473 else if (streq(b, FF_dir_magic))
474 fp->method = FF_dir;
475 else if (streq(b, FF_gnu_magic))
476 fp->method = FF_gnu;
477 else if (!*b && *--b >= '0' && *b <= '1')
478 {
479 fp->method = FF_gnu;
480 while (j = sfgetc(fp->fp))
481 {
482 if (j == EOF || fp->decode.count >= sizeof(fp->decode.path))
483 goto invalid;
484 fp->decode.path[fp->decode.count++] = j;
485 }
486 }
487 else
488 {
489 fp->method = FF_old;
490 if (i < 0)
491 {
492 if ((j = sfgetc(fp->fp)) == EOF)
493 goto invalid;
494 fp->decode.bigram2[i = -i] = j;
495 }
496 while (++i < elementsof(fp->decode.bigram1))
497 {
498 if ((j = sfgetc(fp->fp)) == EOF)
499 goto invalid;
500 fp->decode.bigram1[i] = j;
501 if ((j = sfgetc(fp->fp)) == EOF)
502 goto invalid;
503 fp->decode.bigram2[i] = j;
504 }
505 if ((fp->decode.peek = sfgetc(fp->fp)) != FF_OFF)
506 goto invalid;
507 }
508
509 /*
510 * set up the physical dir table
511 */
512
513 if (disc->version >= 19980301L)
514 {
515 fp->verifyf = disc->verifyf;
516 if (disc->dirs && *disc->dirs)
517 {
518 for (k = 0; disc->dirs[k]; k++);
519 if (k == 1 && streq(disc->dirs[0], "/"))
520 k = 0;
521 if (k)
522 {
523 if (!(fp->dirs = vmnewof(fp->vm, 0, char*, 2 * k + 1, 0)))
524 goto drop;
525 if (!(fp->lens = vmnewof(fp->vm, 0, int, 2 * k, 0)))
526 goto drop;
527 p = 0;
528 b = fp->decode.temp;
529 j = fp->method == FF_old || fp->method == FF_gnu;
530
531 /*
532 * fill the dir list with logical and
533 * physical names since we don't know
534 * which way the db was encoded (it
535 * could be *both* ways)
536 */
537
538 for (i = q = 0; i < k; i++)
539 {
540 if (*(s = disc->dirs[i]) == '/')
541 sfsprintf(b, sizeof(fp->decode.temp) - 1, "%s", s);
542 else if (!p && !(p = getcwd(fp->decode.path, sizeof(fp->decode.path))))
543 goto nospace;
544 else
545 sfsprintf(b, sizeof(fp->decode.temp) - 1, "%s/%s", p, s);
546 s = pathcanon(b, 0);
547 *s = '/';
548 *(s + 1) = 0;
549 if (!(fp->dirs[q] = vmstrdup(fp->vm, b)))
550 goto nospace;
551 if (j)
552 (fp->dirs[q])[s - b] = 0;
553 q++;
554 *s = 0;
555 s = pathcanon(b, PATH_PHYSICAL);
556 *s = '/';
557 *(s + 1) = 0;
558 if (!strneq(b, fp->dirs[q - 1], s - b))
559 {
560 if (!(fp->dirs[q] = vmstrdup(fp->vm, b)))
561 goto nospace;
562 if (j)
563 (fp->dirs[q])[s - b] = 0;
564 q++;
565 }
566 }
567 strsort(fp->dirs, q, strcasecmp);
568 for (i = 0; i < q; i++)
569 fp->lens[i] = strlen(fp->dirs[i]);
570 }
571 }
572 }
573 if (fp->verifyf || (disc->flags & FIND_VERIFY))
574 {
575 if (fp->method != FF_dir && fp->method != FF_typ)
576 {
577 if (fp->disc->errorf)
578 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s code format does not support directory verification", path, fp->method == FF_gnu ? FF_gnu_magic : "OLD-BIGRAM");
579 goto drop;
580 }
581 fp->verify = 1;
582 }
583
584 /*
585 * extract last glob-free subpattern in name for fast pre-match
586 * prepend 0 for backwards match
587 */
588
589 if (p = s = (char*)pattern)
590 {
591 b = fp->decode.pattern;
592 for (;;)
593 {
594 switch (*b++ = *p++)
595 {
596 case 0:
597 break;
598 case '\\':
599 s = p;
600 if (!*p++)
601 break;
602 continue;
603 case '[':
604 if (!brace)
605 {
606 brace++;
607 if (*p == ']')
608 p++;
609 }
610 continue;
611 case ']':
612 if (brace)
613 {
614 brace--;
615 s = p;
616 }
617 continue;
618 case '(':
619 if (!brace)
620 paren++;
621 continue;
622 case ')':
623 if (!brace && paren > 0 && !--paren)
624 s = p;
625 continue;
626 case '|':
627 case '&':
628 if (!brace && !paren)
629 {
630 s = "";
631 break;
632 }
633 continue;
634 case '*':
635 case '?':
636 s = p;
637 continue;
638 default:
639 continue;
640 }
641 break;
642 }
643 if (s != pattern && !streq(pattern, "*"))
644 {
645 fp->decode.match = 1;
646 if (i = regcomp(&fp->decode.re, pattern, REG_SHELL|REG_AUGMENTED|(fp->decode.ignorecase?REG_ICASE:0)))
647 {
648 if (disc->errorf)
649 {
650 regerror(i, &fp->decode.re, fp->decode.temp, sizeof(fp->decode.temp));
651 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s", pattern, fp->decode.temp);
652 }
653 goto drop;
654 }
655 }
656 if (*s)
657 {
658 *b++ = 0;
659 while (i = *s++)
660 *b++ = i;
661 *b-- = 0;
662 fp->decode.end = b;
663 if (fp->decode.ignorecase)
664 for (s = fp->decode.pattern; s <= b; s++)
665 if (isupper(*s))
666 *s = tolower(*s);
667 }
668 }
669 }
670 return fp;
671 nospace:
672 if (disc->errorf)
673 (*fp->disc->errorf)(fp, fp->disc, 2, "out of space");
674 if (!vm)
675 return 0;
676 if (!fp)
677 {
678 vmclose(vm);
679 return 0;
680 }
681 goto drop;
682 invalid:
683 if (fp->disc->errorf)
684 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: invalid codes", path);
685 drop:
686 if (!fp->generate && fp->decode.match)
687 regfree(&fp->decode.re);
688 if (fp->fp)
689 sfclose(fp->fp);
690 vmclose(fp->vm);
691 return 0;
692 }
693
694 /*
695 * return the next fastfind path
696 * 0 returned when list exhausted
697 */
698
699 char*
findread(register Find_t * fp)700 findread(register Find_t* fp)
701 {
702 register char* p;
703 register char* q;
704 register char* s;
705 register char* b;
706 register char* e;
707 register int c;
708 register int n;
709 register int m;
710 int ignorecase;
711 int t;
712 unsigned char w[4];
713 struct stat st;
714
715 if (fp->generate)
716 return 0;
717 if (fp->decode.restore)
718 {
719 *fp->decode.restore = '/';
720 fp->decode.restore = 0;
721 }
722 ignorecase = fp->decode.ignorecase ? STR_ICASE : 0;
723 c = fp->decode.peek;
724 next:
725 for (;;)
726 {
727 switch (fp->method)
728 {
729 case FF_dir:
730 t = 0;
731 n = sfgetl(fp->fp);
732 goto grab;
733 case FF_gnu:
734 if ((c = sfgetc(fp->fp)) == EOF)
735 return 0;
736 if (c == 0x80)
737 {
738 if ((c = sfgetc(fp->fp)) == EOF)
739 return 0;
740 n = c << 8;
741 if ((c = sfgetc(fp->fp)) == EOF)
742 return 0;
743 n |= c;
744 if (n & 0x8000)
745 n = (n - 0xffff) - 1;
746 }
747 else if ((n = c) & 0x80)
748 n = (n - 0xff) - 1;
749 t = 0;
750 goto grab;
751 case FF_typ:
752 t = sfgetu(fp->fp);
753 n = sfgetl(fp->fp);
754 grab:
755 p = fp->decode.path + (fp->decode.count += n);
756 do
757 {
758 if ((c = sfgetc(fp->fp)) == EOF)
759 return 0;
760 } while (*p++ = c);
761 p -= 2;
762 break;
763 case FF_old:
764 if (c == EOF)
765 {
766 fp->decode.peek = c;
767 return 0;
768 }
769 if (c == FF_ESC)
770 {
771 if (sfread(fp->fp, w, sizeof(w)) != sizeof(w))
772 return 0;
773 if (fp->decode.swap >= 0)
774 {
775 c = (int32_t)((w[0] << 24) | (w[1] << 16) | (w[2] << 8) | w[3]);
776 if (!fp->decode.swap)
777 {
778 /*
779 * the old format uses machine
780 * byte order; this test uses
781 * the smallest magnitude of
782 * both byte orders on the
783 * first encoded path motion
784 * to determine the original
785 * byte order
786 */
787
788 m = c;
789 if (m < 0)
790 m = -m;
791 n = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]);
792 if (n < 0)
793 n = -n;
794 if (m < n)
795 fp->decode.swap = 1;
796 else
797 {
798 fp->decode.swap = -1;
799 c = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]);
800 }
801 }
802 }
803 else
804 c = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]);
805 }
806 fp->decode.count += c - FF_OFF;
807 for (p = fp->decode.path + fp->decode.count; (c = sfgetc(fp->fp)) > FF_ESC;)
808 if (c & (1<<(CHAR_BIT-1)))
809 {
810 *p++ = fp->decode.bigram1[c & ((1<<(CHAR_BIT-1))-1)];
811 *p++ = fp->decode.bigram2[c & ((1<<(CHAR_BIT-1))-1)];
812 }
813 else
814 *p++ = c;
815 *p-- = 0;
816 t = 0;
817 break;
818 }
819 b = fp->decode.path;
820 if (fp->decode.found)
821 fp->decode.found = 0;
822 else
823 b += fp->decode.count;
824 if (fp->dirs)
825 for (;;)
826 {
827 if (!*fp->dirs)
828 return 0;
829
830 /*
831 * use the ordering and lengths to prune
832 * comparison function calls
833 * (*fp->dirs)[*fp->lens]=='/' if its
834 * already been matched
835 */
836
837 if ((n = p - fp->decode.path + 1) > (m = *fp->lens))
838 {
839 if (!(*fp->dirs)[m])
840 goto next;
841 if (!strncasecmp(*fp->dirs, fp->decode.path, m))
842 break;
843 }
844 else if (n == m)
845 {
846 if (!(*fp->dirs)[m])
847 {
848 if (!(n = strcasecmp(*fp->dirs, fp->decode.path)) && (ignorecase || !strcmp(*fp->dirs, fp->decode.path)))
849 {
850 if (m > 0)
851 {
852 (*fp->dirs)[m] = '/';
853 if ((*fp->dirs)[m - 1] != '/')
854 (*fp->dirs)[++(*fp->lens)] = '/';
855 }
856 break;
857 }
858 if (n >= 0)
859 goto next;
860 }
861 }
862 else if (!(*fp->dirs)[m])
863 goto next;
864 fp->dirs++;
865 fp->lens++;
866 }
867 if (fp->verify && (*p == '/' || t == 1))
868 {
869 if ((n = p - fp->decode.path))
870 *p = 0;
871 else
872 n = 1;
873 if (fp->verifyf)
874 n = (*fp->verifyf)(fp, fp->decode.path, n, fp->disc);
875 else if (stat(fp->decode.path, &st))
876 n = -1;
877 else if ((unsigned long)st.st_mtime > fp->stamp)
878 n = 1;
879 else
880 n = 0;
881 *p = '/';
882
883 /*
884 * n<0 skip this subtree
885 * n==0 keep as is
886 * n>0 read this dir now
887 */
888
889 /* NOT IMPLEMENTED YET */
890 }
891 if (FF_OK_TYPE(fp, t))
892 {
893 if (fp->decode.end)
894 {
895 if (*(s = p) == '/')
896 s--;
897 if (*fp->decode.pattern == '/' && b > fp->decode.path)
898 b--;
899 for (; s >= b; s--)
900 if (*s == *fp->decode.end || ignorecase && tolower(*s) == *fp->decode.end)
901 {
902 if (ignorecase)
903 for (e = fp->decode.end - 1, q = s - 1; *e && (*q == *e || tolower(*q) == *e); e--, q--);
904 else
905 for (e = fp->decode.end - 1, q = s - 1; *e && *q == *e; e--, q--);
906 if (!*e)
907 {
908 fp->decode.found = 1;
909 if (!fp->decode.match || strgrpmatch(fp->decode.path, fp->decode.pattern, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT|ignorecase))
910 {
911 fp->decode.peek = c;
912 if (*p == '/')
913 *(fp->decode.restore = p) = 0;
914 if (!fp->secure || !access(fp->decode.path, F_OK))
915 return fp->decode.path;
916 }
917 break;
918 }
919 }
920 }
921 else if (!fp->decode.match || !(n = regexec(&fp->decode.re, fp->decode.path, 0, NiL, 0)))
922 {
923 fp->decode.peek = c;
924 if (*p == '/' && p > fp->decode.path)
925 *(fp->decode.restore = p) = 0;
926 if (!fp->secure || !access(fp->decode.path, F_OK))
927 return fp->decode.path;
928 }
929 else if (n != REG_NOMATCH)
930 {
931 if (fp->disc->errorf)
932 {
933 regerror(n, &fp->decode.re, fp->decode.temp, sizeof(fp->decode.temp));
934 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s", fp->decode.pattern, fp->decode.temp);
935 }
936 return 0;
937 }
938 }
939 }
940 }
941
942 /*
943 * add path to the code table
944 * paths are assumed to be in sort order
945 */
946
947 int
findwrite(register Find_t * fp,const char * path,size_t len,const char * type)948 findwrite(register Find_t* fp, const char* path, size_t len, const char* type)
949 {
950 register unsigned char* s;
951 register unsigned char* e;
952 register unsigned char* p;
953 register int n;
954 register int d;
955 register Type_t* x;
956 register unsigned long u;
957
958 if (!fp->generate)
959 return -1;
960 if (type && fp->method == FF_dir)
961 {
962 len = sfsprintf(fp->encode.mark, sizeof(fp->encode.mark), "%-.*s/", len, path);
963 path = fp->encode.mark;
964 }
965 s = (unsigned char*)path;
966 if (len <= 0)
967 len = strlen(path);
968 if (len < sizeof(fp->encode.path))
969 e = s + len++;
970 else
971 {
972 len = sizeof(fp->encode.path) - 1;
973 e = s + len;
974 }
975 p = (unsigned char*)fp->encode.path;
976 while (s < e)
977 {
978 if (*s != *p++)
979 break;
980 s++;
981 }
982 n = s - (unsigned char*)path;
983 switch (fp->method)
984 {
985 case FF_gnu:
986 d = n - fp->encode.prefix;
987 if (d >= -127 && d <= 127)
988 sfputc(fp->fp, d & 0xff);
989 else
990 {
991 sfputc(fp->fp, 0x80);
992 sfputc(fp->fp, (d >> 8) & 0xff);
993 sfputc(fp->fp, d & 0xff);
994 }
995 fp->encode.prefix = n;
996 sfputr(fp->fp, (char*)s, 0);
997 break;
998 case FF_old:
999 sfprintf(fp->fp, "%ld", n - fp->encode.prefix + FF_OFF);
1000 fp->encode.prefix = n;
1001 sfputc(fp->fp, ' ');
1002 p = s;
1003 while (s < e)
1004 {
1005 n = *s++;
1006 if (s >= e)
1007 break;
1008 fp->encode.code[n][*s++]++;
1009 }
1010 while (p < e)
1011 {
1012 if ((n = *p++) < FF_MIN || n >= FF_MAX)
1013 n = '?';
1014 sfputc(fp->fp, n);
1015 }
1016 sfputc(fp->fp, 0);
1017 break;
1018 case FF_typ:
1019 if (type)
1020 {
1021 type = (const char*)typefix((char*)fp->encode.bigram, sizeof(fp->encode.bigram), type);
1022 if (x = (Type_t*)dtmatch(fp->encode.namedict, type))
1023 u = x->index;
1024 else if (!(x = newof(0, Type_t, 1, strlen(type) + 1)))
1025 u = 0;
1026 else
1027 {
1028 u = x->index = ++fp->types;
1029 strcpy(x->name, type);
1030 dtinsert(fp->encode.namedict, x);
1031 dtinsert(fp->encode.indexdict, x);
1032 }
1033 }
1034 else
1035 u = 0;
1036 sfputu(fp->fp, u);
1037 /*FALLTHROUGH...*/
1038 case FF_dir:
1039 d = n - fp->encode.prefix;
1040 sfputl(fp->fp, d);
1041 fp->encode.prefix = n;
1042 sfputr(fp->fp, (char*)s, 0);
1043 break;
1044 }
1045 memcpy(fp->encode.path, path, len);
1046 return 0;
1047 }
1048
1049 /*
1050 * findsync() helper
1051 */
1052
1053 static int
finddone(register Find_t * fp)1054 finddone(register Find_t* fp)
1055 {
1056 int r;
1057
1058 if (sfsync(fp->fp))
1059 {
1060 if (fp->disc->errorf)
1061 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sfsync]", fp->encode.file);
1062 return -1;
1063 }
1064 if (sferror(fp->fp))
1065 {
1066 if (fp->disc->errorf)
1067 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sferror]", fp->encode.file);
1068 return -1;
1069 }
1070 r = sfclose(fp->fp);
1071 fp->fp = 0;
1072 if (r)
1073 {
1074 if (fp->disc->errorf)
1075 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sfclose]", fp->encode.file);
1076 return -1;
1077 }
1078 return 0;
1079 }
1080
1081 /*
1082 * finish the code table
1083 */
1084
1085 static int
findsync(register Find_t * fp)1086 findsync(register Find_t* fp)
1087 {
1088 register char* s;
1089 register int n;
1090 register int m;
1091 register int d;
1092 register Type_t* x;
1093 char* t;
1094 int b;
1095 long z;
1096 Sfio_t* sp;
1097
1098 switch (fp->method)
1099 {
1100 case FF_dir:
1101 case FF_gnu:
1102 /*
1103 * replace the real file with the temp file
1104 */
1105
1106 if (finddone(fp))
1107 goto bad;
1108 remove(fp->encode.file);
1109 if (rename(fp->encode.temp, fp->encode.file))
1110 {
1111 if (fp->disc->errorf)
1112 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot rename from tmp file %s", fp->encode.file, fp->encode.temp);
1113 remove(fp->encode.temp);
1114 return -1;
1115 }
1116 break;
1117 case FF_old:
1118 /*
1119 * determine the top FF_MAX bigrams
1120 */
1121
1122 for (n = 0; n < FF_MAX; n++)
1123 for (m = 0; m < FF_MAX; m++)
1124 fp->encode.hits[fp->encode.code[n][m]]++;
1125 fp->encode.hits[0] = 0;
1126 m = 1;
1127 for (n = USHRT_MAX; n >= 0; n--)
1128 if (d = fp->encode.hits[n])
1129 {
1130 fp->encode.hits[n] = m;
1131 if ((m += d) > FF_MAX)
1132 break;
1133 }
1134 while (--n >= 0)
1135 fp->encode.hits[n] = 0;
1136 for (n = FF_MAX - 1; n >= 0; n--)
1137 for (m = FF_MAX - 1; m >= 0; m--)
1138 if (fp->encode.hits[fp->encode.code[n][m]])
1139 {
1140 d = fp->encode.code[n][m];
1141 b = fp->encode.hits[d] - 1;
1142 fp->encode.code[n][m] = b + FF_MAX;
1143 if (fp->encode.hits[d]++ >= FF_MAX)
1144 fp->encode.hits[d] = 0;
1145 fp->encode.bigram[b *= 2] = n;
1146 fp->encode.bigram[b + 1] = m;
1147 }
1148 else
1149 fp->encode.code[n][m] = 0;
1150
1151 /*
1152 * commit the real file
1153 */
1154
1155 if (sfseek(fp->fp, (Sfoff_t)0, SEEK_SET))
1156 {
1157 if (fp->disc->errorf)
1158 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "cannot rewind tmp file");
1159 return -1;
1160 }
1161 if (!(sp = sfopen(NiL, fp->encode.file, "w")))
1162 goto badcreate;
1163
1164 /*
1165 * dump the bigrams
1166 */
1167
1168 sfwrite(sp, fp->encode.bigram, sizeof(fp->encode.bigram));
1169
1170 /*
1171 * encode the massaged paths
1172 */
1173
1174 while (s = sfgetr(fp->fp, 0, 0))
1175 {
1176 z = strtol(s, &t, 0);
1177 s = t;
1178 if (z < 0 || z > 2 * FF_OFF)
1179 {
1180 sfputc(sp, FF_ESC);
1181 sfputc(sp, (z >> 24));
1182 sfputc(sp, (z >> 16));
1183 sfputc(sp, (z >> 8));
1184 sfputc(sp, z);
1185 }
1186 else
1187 sfputc(sp, z);
1188 while (n = *s++)
1189 {
1190 if (!(m = *s++))
1191 {
1192 sfputc(sp, n);
1193 break;
1194 }
1195 if (d = fp->encode.code[n][m])
1196 sfputc(sp, d);
1197 else
1198 {
1199 sfputc(sp, n);
1200 sfputc(sp, m);
1201 }
1202 }
1203 }
1204 sfclose(fp->fp);
1205 fp->fp = sp;
1206 if (finddone(fp))
1207 goto bad;
1208 break;
1209 case FF_typ:
1210 if (finddone(fp))
1211 goto bad;
1212 if (!(fp->fp = sfopen(NiL, fp->encode.temp, "r")))
1213 {
1214 if (fp->disc->errorf)
1215 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot read tmp file", fp->encode.temp);
1216 remove(fp->encode.temp);
1217 return -1;
1218 }
1219
1220 /*
1221 * commit the output file
1222 */
1223
1224 if (!(sp = sfopen(NiL, fp->encode.file, "w")))
1225 goto badcreate;
1226
1227 /*
1228 * write the header magic
1229 */
1230
1231 sfputc(sp, 0);
1232 sfputr(sp, FF_typ_magic, 0);
1233
1234 /*
1235 * write the type table in index order starting with 1
1236 */
1237
1238 for (x = (Type_t*)dtfirst(fp->encode.indexdict); x; x = (Type_t*)dtnext(fp->encode.indexdict, x))
1239 sfputr(sp, x->name, 0);
1240 sfputc(sp, 0);
1241
1242 /*
1243 * append the front compressed strings
1244 */
1245
1246 if (sfmove(fp->fp, sp, SF_UNBOUND, -1) < 0 || !sfeof(fp->fp))
1247 {
1248 sfclose(sp);
1249 if (fp->disc->errorf)
1250 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot append codes", fp->encode.file);
1251 goto bad;
1252 }
1253 sfclose(fp->fp);
1254 fp->fp = sp;
1255 if (finddone(fp))
1256 goto bad;
1257 remove(fp->encode.temp);
1258 break;
1259 }
1260 return 0;
1261 badcreate:
1262 if (fp->disc->errorf)
1263 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot write codes", fp->encode.file);
1264 bad:
1265 if (fp->fp)
1266 {
1267 sfclose(fp->fp);
1268 fp->fp = 0;
1269 }
1270 remove(fp->encode.temp);
1271 return -1;
1272 }
1273
1274 /*
1275 * close an open fastfind stream
1276 */
1277
1278 int
findclose(register Find_t * fp)1279 findclose(register Find_t* fp)
1280 {
1281 int n = 0;
1282
1283 if (!fp)
1284 return -1;
1285 if (fp->generate)
1286 {
1287 n = findsync(fp);
1288 if (fp->encode.indexdict)
1289 dtclose(fp->encode.indexdict);
1290 if (fp->encode.namedict)
1291 dtclose(fp->encode.namedict);
1292 }
1293 else
1294 {
1295 if (fp->decode.match)
1296 regfree(&fp->decode.re);
1297 n = 0;
1298 }
1299 if (fp->fp)
1300 sfclose(fp->fp);
1301 vmclose(fp->vm);
1302 return n;
1303 }
1304