1 #pragma prototyped
2 /*
3 * original code
4 *
5 * James A. Woods, Informatics General Corporation,
6 * NASA Ames Research Center, 6/81.
7 * Usenix ;login:, February/March, 1983, p. 8.
8 *
9 * discipline/method interface
10 *
11 * Glenn Fowler
12 * AT&T Research
13 * modified from the original BSD source
14 *
15 * 'fastfind' scans a file list for the full pathname of a file
16 * given only a piece of the name. The list is processed with
17 * with "front-compression" and bigram coding. Front compression reduces
18 * space by a factor of 4-5, bigram coding by a further 20-25%.
19 *
20 * there are 4 methods:
21 *
22 * FF_old original with 7 bit bigram encoding (no magic)
23 * FF_gnu 8 bit clean front compression (FF_gnu_magic)
24 * FF_dir FF_gnu with sfgetl/sfputl and trailing / on dirs (FF_dir_magic)
25 * FF_typ FF_dir with (mime) types (FF_typ_magic)
26 *
27 * the bigram encoding steals the eighth bit (that's why its FF_old)
28 * maybe one day we'll limit it to readonly:
29 *
30 * 0-2*FF_OFF likeliest differential counts + offset to make nonnegative
31 * FF_ESC 4 byte big-endian out-of-range count+FF_OFF follows
32 * FF_MIN-FF_MAX ascii residue
33 * >=FF_MAX bigram codes
34 *
35 * a two-tiered string search technique is employed
36 *
37 * a metacharacter-free subpattern and partial pathname is matched
38 * backwards to avoid full expansion of the pathname list
39 *
40 * then the actual shell glob-style regular expression (if in this form)
41 * is matched against the candidate pathnames using the slower regexec()
42 *
43 * The original BSD code is covered by the BSD license:
44 *
45 * Copyright (c) 1985, 1993, 1999
46 * The Regents of the University of California. All rights reserved.
47 *
48 * Redistribution and use in source and binary forms, with or without
49 * modification, are permitted provided that the following conditions
50 * are met:
51 * 1. Redistributions of source code must retain the above copyright
52 * notice, this list of conditions and the following disclaimer.
53 * 2. Redistributions in binary form must reproduce the above copyright
54 * notice, this list of conditions and the following disclaimer in the
55 * documentation and/or other materials provided with the distribution.
56 * 3. Neither the name of the University nor the names of its contributors
57 * may be used to endorse or promote products derived from this software
58 * without specific prior written permission.
59 *
60 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
61 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
62 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
63 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
64 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
65 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
66 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
68 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
69 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
70 * SUCH DAMAGE.
71 */
72
73 static const char id[] = "\n@(#)$Id: fastfind (AT&T Research) 2002-10-02 $\0\n";
74
75 static const char lib[] = "libast:fastfind";
76
77 #include "findlib.h"
78
79 #define FIND_MATCH "*/(find|locate)/*"
80
81 /*
82 * this db could be anywhere
83 * findcodes[] directories are checked for findnames[i]
84 */
85
86 static char* findcodes[] =
87 {
88 0,
89 0,
90 FIND_CODES,
91 "/usr/local/share/lib",
92 "/usr/local/lib",
93 "/usr/share/lib",
94 "/usr/lib",
95 "/var/spool",
96 "/usr/local/var",
97 "/var/lib",
98 "/var/lib/slocate",
99 "/var/db",
100 };
101
102 static char* findnames[] =
103 {
104 "find/codes",
105 "find/find.codes",
106 "locate/locatedb",
107 "locatedb",
108 "locate.database",
109 "slocate.db",
110 };
111
112 /*
113 * convert t to lower case and drop leading x- and x- after /
114 * converted value copied to b of size n
115 */
116
117 char*
typefix(char * buf,size_t n,register const char * t)118 typefix(char* buf, size_t n, register const char* t)
119 {
120 register int c;
121 register char* b = buf;
122
123 if ((*t == 'x' || *t == 'X') && *(t + 1) == '-')
124 t += 2;
125 while (c = *t++)
126 {
127 if (isupper(c))
128 c = tolower(c);
129 if ((*b++ = c) == '/' && (*t == 'x' || *t == 'X') && *(t + 1) == '-')
130 t += 2;
131 }
132 *b = 0;
133 return buf;
134 }
135
136 /*
137 * return a fastfind stream handle for pattern
138 */
139
140 Find_t*
findopen(const char * file,const char * pattern,const char * type,Finddisc_t * disc)141 findopen(const char* file, const char* pattern, const char* type, Finddisc_t* disc)
142 {
143 register Find_t* fp;
144 register char* p;
145 register char* s;
146 register char* b;
147 register int i;
148 register int j;
149 char* path;
150 int brace = 0;
151 int paren = 0;
152 int k;
153 int q;
154 int fd;
155 int uid;
156 Vmalloc_t* vm;
157 Type_t* tp;
158 struct stat st;
159
160
161 if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
162 goto nospace;
163
164 /*
165 * NOTE: searching for FIND_CODES would be much simpler if we
166 * just stuck with our own, but we also support GNU
167 * locate codes and have to search for the one of a
168 * bazillion possible names for that file
169 */
170
171 if (!findcodes[1])
172 findcodes[1] = getenv(FIND_CODES_ENV);
173 if (disc->flags & FIND_GENERATE)
174 {
175 if (!(fp = (Find_t*)vmnewof(vm, 0, Find_t, 1, sizeof(Encode_t) - sizeof(Code_t))))
176 goto nospace;
177 fp->vm = vm;
178 fp->id = lib;
179 fp->disc = disc;
180 fp->generate = 1;
181 if (file && (!*file || streq(file, "-")))
182 file = 0;
183 uid = geteuid();
184 j = (findcodes[0] = (char*)file) && *file == '/' ? 1 : elementsof(findcodes);
185
186 /*
187 * look for the codes file, but since it may not exist yet,
188 * also look for the containing directory if i<2 or if
189 * it is sufficiently qualified (FIND_MATCH)
190 */
191
192 for (i = 0; i < j; i++)
193 if (path = findcodes[i])
194 {
195 if (*path == '/')
196 {
197 if (!stat(path, &st))
198 {
199 if (S_ISDIR(st.st_mode))
200 {
201 for (k = 0; k < elementsof(findnames); k++)
202 {
203 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s/%s", path, findnames[k]);
204 if (!eaccess(fp->encode.file, R_OK|W_OK))
205 {
206 path = fp->encode.file;
207 break;
208 }
209 if (strchr(findnames[k], '/') && (b = strrchr(fp->encode.file, '/')))
210 {
211 *b = 0;
212 if (!stat(fp->encode.file, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR))
213 {
214 *b = '/';
215 path = fp->encode.file;
216 break;
217 }
218 }
219 }
220 if (k < elementsof(findnames))
221 break;
222 }
223 else if (st.st_uid == uid && (st.st_mode & S_IWUSR))
224 {
225 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s", path);
226 path = fp->encode.file;
227 break;
228 }
229 }
230 else if (i < 2 || strmatch(path, FIND_MATCH))
231 {
232 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s", path);
233 if (b = strrchr(fp->encode.file, '/'))
234 {
235 *b = 0;
236 if (!stat(fp->encode.file, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR))
237 {
238 *b = '/';
239 path = fp->encode.file;
240 break;
241 }
242 }
243 }
244 }
245 else if (pathpath(path, "", PATH_REGULAR|PATH_READ|PATH_WRITE, fp->encode.file, sizeof(fp->encode.file)))
246 {
247 path = fp->encode.file;
248 break;
249 }
250 else if (b = strrchr(path, '/'))
251 {
252 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%-.*s", b - path, path);
253 if (pathpath(fp->encode.file, "", PATH_EXECUTE|PATH_READ|PATH_WRITE, fp->encode.temp, sizeof(fp->encode.temp)) &&
254 !stat(fp->encode.temp, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR))
255 {
256 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s%s", fp->encode.temp, b);
257 path = fp->encode.file;
258 break;
259 }
260 }
261 }
262 if (i >= j)
263 {
264 if (fp->disc->errorf)
265 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot locate codes", file ? file : findcodes[2]);
266 goto drop;
267 }
268 if (fp->disc->flags & FIND_OLD)
269 {
270 /*
271 * FF_old generates temp data that is read
272 * in a second pass to generate the real codes
273 */
274
275 fp->method = FF_old;
276 if (!(fp->fp = sftmp(32 * PATH_MAX)))
277 {
278 if (fp->disc->errorf)
279 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "cannot create tmp file");
280 goto drop;
281 }
282 }
283 else
284 {
285 /*
286 * the rest generate into a temp file that
287 * is simply renamed on completion
288 */
289
290 if (s = strrchr(path, '/'))
291 {
292 *s = 0;
293 p = path;
294 }
295 else
296 p = ".";
297 if (!pathtemp(fp->encode.temp, sizeof(fp->encode.temp), p, "ff", &fd))
298 {
299 if (fp->disc->errorf)
300 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot create tmp file in this directory", p ? p : ".");
301 goto drop;
302 }
303 if (s)
304 *s = '/';
305 if (!(fp->fp = sfnew(NiL, NiL, (size_t)SF_UNBOUND, fd, SF_WRITE)))
306 {
307 if (fp->disc->errorf)
308 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot open tmp file", fp->encode.temp);
309 close(fd);
310 goto drop;
311 }
312 if (fp->disc->flags & FIND_TYPE)
313 {
314 fp->method = FF_typ;
315 fp->encode.namedisc.key = offsetof(Type_t, name);
316 fp->encode.namedisc.link = offsetof(Type_t, byname);
317 fp->encode.indexdisc.key = offsetof(Type_t, index);
318 fp->encode.indexdisc.size = sizeof(unsigned long);
319 fp->encode.indexdisc.link = offsetof(Type_t, byindex);
320 s = "system/dir";
321 if (!(fp->encode.namedict = dtopen(&fp->encode.namedisc, Dtoset)) || !(fp->encode.indexdict = dtopen(&fp->encode.indexdisc, Dtoset)) || !(tp = newof(0, Type_t, 1, strlen(s) + 1)))
322 {
323 if (fp->encode.namedict)
324 dtclose(fp->encode.namedict);
325 if (fp->disc->errorf)
326 (*fp->disc->errorf)(fp, fp->disc, 2, "cannot allocate type table");
327 goto drop;
328 }
329
330 /*
331 * type index 1 is always system/dir
332 */
333
334 tp->index = ++fp->types;
335 strcpy(tp->name, s);
336 dtinsert(fp->encode.namedict, tp);
337 dtinsert(fp->encode.indexdict, tp);
338 }
339 else if (fp->disc->flags & FIND_GNU)
340 {
341 fp->method = FF_gnu;
342 sfputc(fp->fp, 0);
343 sfputr(fp->fp, FF_gnu_magic, 0);
344 }
345 else
346 {
347 fp->method = FF_dir;
348 sfputc(fp->fp, 0);
349 sfputr(fp->fp, FF_dir_magic, 0);
350 }
351 }
352 }
353 else
354 {
355 i = sizeof(Decode_t) + sizeof(Code_t);
356 if (!pattern || !*pattern)
357 pattern = "*";
358 i += (j = 2 * (strlen(pattern) + 1));
359 if (!(fp = (Find_t*)vmnewof(vm, 0, Find_t, 1, i)))
360 {
361 vmclose(vm);
362 return 0;
363 }
364 fp->vm = vm;
365 fp->id = lib;
366 fp->disc = disc;
367 if (disc->flags & FIND_ICASE)
368 fp->decode.ignorecase = 1;
369 j = (findcodes[0] = (char*)file) && *file == '/' ? 1 : elementsof(findcodes);
370 for (i = 0; i < j; i++)
371 if (path = findcodes[i])
372 {
373 if (*path == '/')
374 {
375 if (!stat(path, &st))
376 {
377 if (S_ISDIR(st.st_mode))
378 {
379 for (k = 0; k < elementsof(findnames); k++)
380 {
381 sfsprintf(fp->decode.path, sizeof(fp->decode.path), "%s/%s", path, findnames[k]);
382 if (fp->fp = sfopen(NiL, fp->decode.path, "r"))
383 {
384 path = fp->decode.path;
385 break;
386 }
387 }
388 if (fp->fp)
389 break;
390 }
391 else if (fp->fp = sfopen(NiL, path, "r"))
392 break;
393 }
394 }
395 else if ((path = pathpath(path, "", PATH_REGULAR|PATH_READ, fp->decode.path, sizeof(fp->decode.path))) && (fp->fp = sfopen(NiL, path, "r")))
396 break;
397 }
398 if (!fp->fp)
399 {
400 if (fp->disc->errorf)
401 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot locate codes", file ? file : findcodes[2]);
402 goto drop;
403 }
404 if (fstat(sffileno(fp->fp), &st))
405 {
406 if (fp->disc->errorf)
407 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot stat codes", path);
408 goto drop;
409 }
410 if (fp->secure = ((st.st_mode & (S_IRGRP|S_IROTH)) == S_IRGRP) && st.st_gid == getegid() && getegid() != getgid())
411 setgid(getgid());
412 fp->stamp = st.st_mtime;
413 b = (s = fp->decode.temp) + 1;
414 for (i = 0; i < elementsof(fp->decode.bigram1); i++)
415 {
416 if ((j = sfgetc(fp->fp)) == EOF)
417 goto invalid;
418 if (!(*s++ = fp->decode.bigram1[i] = j) && i)
419 {
420 i = -i;
421 break;
422 }
423 if ((j = sfgetc(fp->fp)) == EOF)
424 goto invalid;
425 if (!(*s++ = fp->decode.bigram2[i] = j) && (i || fp->decode.bigram1[0] >= '0' && fp->decode.bigram1[0] <= '1'))
426 break;
427 }
428 if (streq(b, FF_typ_magic))
429 {
430 if (type)
431 {
432 type = (const char*)typefix(fp->decode.bigram2, sizeof(fp->decode.bigram2), type);
433 memset(fp->decode.bigram1, 0, sizeof(fp->decode.bigram1));
434 }
435 fp->method = FF_typ;
436 for (j = 0, i = 1;; i++)
437 {
438 if (!(s = sfgetr(fp->fp, 0, 0)))
439 goto invalid;
440 if (!*s)
441 break;
442 if (type && strmatch(s, type))
443 {
444 FF_SET_TYPE(fp, i);
445 j++;
446 }
447 }
448 if (type && !j)
449 goto drop;
450 fp->types = j;
451 }
452 else if (streq(b, FF_dir_magic))
453 fp->method = FF_dir;
454 else if (streq(b, FF_gnu_magic))
455 fp->method = FF_gnu;
456 else if (!*b && *--b >= '0' && *b <= '1')
457 {
458 fp->method = FF_gnu;
459 while (j = sfgetc(fp->fp))
460 {
461 if (j == EOF || fp->decode.count >= sizeof(fp->decode.path))
462 goto invalid;
463 fp->decode.path[fp->decode.count++] = j;
464 }
465 }
466 else
467 {
468 fp->method = FF_old;
469 if (i < 0)
470 {
471 if ((j = sfgetc(fp->fp)) == EOF)
472 goto invalid;
473 fp->decode.bigram2[i = -i] = j;
474 }
475 while (++i < elementsof(fp->decode.bigram1))
476 {
477 if ((j = sfgetc(fp->fp)) == EOF)
478 goto invalid;
479 fp->decode.bigram1[i] = j;
480 if ((j = sfgetc(fp->fp)) == EOF)
481 goto invalid;
482 fp->decode.bigram2[i] = j;
483 }
484 if ((fp->decode.peek = sfgetc(fp->fp)) != FF_OFF)
485 goto invalid;
486 }
487
488 /*
489 * set up the physical dir table
490 */
491
492 if (disc->version >= 19980301L)
493 {
494 fp->verifyf = disc->verifyf;
495 if (disc->dirs && *disc->dirs)
496 {
497 for (k = 0; disc->dirs[k]; k++);
498 if (k == 1 && streq(disc->dirs[0], "/"))
499 k = 0;
500 if (k)
501 {
502 if (!(fp->dirs = vmnewof(fp->vm, 0, char*, 2 * k + 1, 0)))
503 goto drop;
504 if (!(fp->lens = vmnewof(fp->vm, 0, int, 2 * k, 0)))
505 goto drop;
506 p = 0;
507 b = fp->decode.temp;
508 j = fp->method == FF_old || fp->method == FF_gnu;
509
510 /*
511 * fill the dir list with logical and
512 * physical names since we don't know
513 * which way the db was encoded (it
514 * could be *both* ways)
515 */
516
517 for (i = q = 0; i < k; i++)
518 {
519 if (*(s = disc->dirs[i]) == '/')
520 sfsprintf(b, sizeof(fp->decode.temp) - 1, "%s", s);
521 else if (!p && !(p = getcwd(fp->decode.path, sizeof(fp->decode.path))))
522 goto nospace;
523 else
524 sfsprintf(b, sizeof(fp->decode.temp) - 1, "%s/%s", p, s);
525 s = pathcanon(b, sizeof(fp->decode.temp), 0);
526 *s = '/';
527 *(s + 1) = 0;
528 if (!(fp->dirs[q] = vmstrdup(fp->vm, b)))
529 goto nospace;
530 if (j)
531 (fp->dirs[q])[s - b] = 0;
532 q++;
533 *s = 0;
534 s = pathcanon(b, sizeof(fp->decode.temp), PATH_PHYSICAL);
535 *s = '/';
536 *(s + 1) = 0;
537 if (!strneq(b, fp->dirs[q - 1], s - b))
538 {
539 if (!(fp->dirs[q] = vmstrdup(fp->vm, b)))
540 goto nospace;
541 if (j)
542 (fp->dirs[q])[s - b] = 0;
543 q++;
544 }
545 }
546 strsort(fp->dirs, q, strcasecmp);
547 for (i = 0; i < q; i++)
548 fp->lens[i] = strlen(fp->dirs[i]);
549 }
550 }
551 }
552 if (fp->verifyf || (disc->flags & FIND_VERIFY))
553 {
554 if (fp->method != FF_dir && fp->method != FF_typ)
555 {
556 if (fp->disc->errorf)
557 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s code format does not support directory verification", path, fp->method == FF_gnu ? FF_gnu_magic : "OLD-BIGRAM");
558 goto drop;
559 }
560 fp->verify = 1;
561 }
562
563 /*
564 * extract last glob-free subpattern in name for fast pre-match
565 * prepend 0 for backwards match
566 */
567
568 if (p = s = (char*)pattern)
569 {
570 b = fp->decode.pattern;
571 for (;;)
572 {
573 switch (*b++ = *p++)
574 {
575 case 0:
576 break;
577 case '\\':
578 s = p;
579 if (!*p++)
580 break;
581 continue;
582 case '[':
583 if (!brace)
584 {
585 brace++;
586 if (*p == ']')
587 p++;
588 }
589 continue;
590 case ']':
591 if (brace)
592 {
593 brace--;
594 s = p;
595 }
596 continue;
597 case '(':
598 if (!brace)
599 paren++;
600 continue;
601 case ')':
602 if (!brace && paren > 0 && !--paren)
603 s = p;
604 continue;
605 case '|':
606 case '&':
607 if (!brace && !paren)
608 {
609 s = "";
610 break;
611 }
612 continue;
613 case '*':
614 case '?':
615 s = p;
616 continue;
617 default:
618 continue;
619 }
620 break;
621 }
622 if (s != pattern && !streq(pattern, "*"))
623 {
624 fp->decode.match = 1;
625 if (i = regcomp(&fp->decode.re, pattern, REG_SHELL|REG_AUGMENTED|(fp->decode.ignorecase?REG_ICASE:0)))
626 {
627 if (disc->errorf)
628 {
629 regerror(i, &fp->decode.re, fp->decode.temp, sizeof(fp->decode.temp));
630 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s", pattern, fp->decode.temp);
631 }
632 goto drop;
633 }
634 }
635 if (*s)
636 {
637 *b++ = 0;
638 while (i = *s++)
639 *b++ = i;
640 *b-- = 0;
641 fp->decode.end = b;
642 if (fp->decode.ignorecase)
643 for (s = fp->decode.pattern; s <= b; s++)
644 if (isupper(*s))
645 *s = tolower(*s);
646 }
647 }
648 }
649 return fp;
650 nospace:
651 if (disc->errorf)
652 (*fp->disc->errorf)(fp, fp->disc, 2, "out of space");
653 if (!vm)
654 return 0;
655 if (!fp)
656 {
657 vmclose(vm);
658 return 0;
659 }
660 goto drop;
661 invalid:
662 if (fp->disc->errorf)
663 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: invalid codes", path);
664 drop:
665 if (!fp->generate && fp->decode.match)
666 regfree(&fp->decode.re);
667 if (fp->fp)
668 sfclose(fp->fp);
669 vmclose(fp->vm);
670 return 0;
671 }
672
673 /*
674 * return the next fastfind path
675 * 0 returned when list exhausted
676 */
677
678 char*
findread(register Find_t * fp)679 findread(register Find_t* fp)
680 {
681 register char* p;
682 register char* q;
683 register char* s;
684 register char* b;
685 register char* e;
686 register int c;
687 register int n;
688 register int m;
689 int ignorecase;
690 int t;
691 unsigned char w[4];
692 struct stat st;
693
694 if (fp->generate)
695 return 0;
696 if (fp->decode.restore)
697 {
698 *fp->decode.restore = '/';
699 fp->decode.restore = 0;
700 }
701 ignorecase = fp->decode.ignorecase ? STR_ICASE : 0;
702 c = fp->decode.peek;
703 next:
704 for (;;)
705 {
706 switch (fp->method)
707 {
708 case FF_dir:
709 t = 0;
710 n = sfgetl(fp->fp);
711 goto grab;
712 case FF_gnu:
713 if ((c = sfgetc(fp->fp)) == EOF)
714 return 0;
715 if (c == 0x80)
716 {
717 if ((c = sfgetc(fp->fp)) == EOF)
718 return 0;
719 n = c << 8;
720 if ((c = sfgetc(fp->fp)) == EOF)
721 return 0;
722 n |= c;
723 if (n & 0x8000)
724 n = (n - 0xffff) - 1;
725 }
726 else if ((n = c) & 0x80)
727 n = (n - 0xff) - 1;
728 t = 0;
729 goto grab;
730 case FF_typ:
731 t = sfgetu(fp->fp);
732 n = sfgetl(fp->fp);
733 grab:
734 p = fp->decode.path + (fp->decode.count += n);
735 do
736 {
737 if ((c = sfgetc(fp->fp)) == EOF)
738 return 0;
739 } while (*p++ = c);
740 p -= 2;
741 break;
742 case FF_old:
743 if (c == EOF)
744 {
745 fp->decode.peek = c;
746 return 0;
747 }
748 if (c == FF_ESC)
749 {
750 if (sfread(fp->fp, w, sizeof(w)) != sizeof(w))
751 return 0;
752 if (fp->decode.swap >= 0)
753 {
754 c = (int32_t)((w[0] << 24) | (w[1] << 16) | (w[2] << 8) | w[3]);
755 if (!fp->decode.swap)
756 {
757 /*
758 * the old format uses machine
759 * byte order; this test uses
760 * the smallest magnitude of
761 * both byte orders on the
762 * first encoded path motion
763 * to determine the original
764 * byte order
765 */
766
767 m = c;
768 if (m < 0)
769 m = -m;
770 n = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]);
771 if (n < 0)
772 n = -n;
773 if (m < n)
774 fp->decode.swap = 1;
775 else
776 {
777 fp->decode.swap = -1;
778 c = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]);
779 }
780 }
781 }
782 else
783 c = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]);
784 }
785 fp->decode.count += c - FF_OFF;
786 for (p = fp->decode.path + fp->decode.count; (c = sfgetc(fp->fp)) > FF_ESC;)
787 if (c & (1<<(CHAR_BIT-1)))
788 {
789 *p++ = fp->decode.bigram1[c & ((1<<(CHAR_BIT-1))-1)];
790 *p++ = fp->decode.bigram2[c & ((1<<(CHAR_BIT-1))-1)];
791 }
792 else
793 *p++ = c;
794 *p-- = 0;
795 t = 0;
796 break;
797 }
798 b = fp->decode.path;
799 if (fp->decode.found)
800 fp->decode.found = 0;
801 else
802 b += fp->decode.count;
803 if (fp->dirs)
804 for (;;)
805 {
806 if (!*fp->dirs)
807 return 0;
808
809 /*
810 * use the ordering and lengths to prune
811 * comparison function calls
812 * (*fp->dirs)[*fp->lens]=='/' if its
813 * already been matched
814 */
815
816 if ((n = p - fp->decode.path + 1) > (m = *fp->lens))
817 {
818 if (!(*fp->dirs)[m])
819 goto next;
820 if (!strncasecmp(*fp->dirs, fp->decode.path, m))
821 break;
822 }
823 else if (n == m)
824 {
825 if (!(*fp->dirs)[m])
826 {
827 if (!(n = strcasecmp(*fp->dirs, fp->decode.path)) && (ignorecase || !strcmp(*fp->dirs, fp->decode.path)))
828 {
829 if (m > 0)
830 {
831 (*fp->dirs)[m] = '/';
832 if ((*fp->dirs)[m - 1] != '/')
833 (*fp->dirs)[++(*fp->lens)] = '/';
834 }
835 break;
836 }
837 if (n >= 0)
838 goto next;
839 }
840 }
841 else if (!(*fp->dirs)[m])
842 goto next;
843 fp->dirs++;
844 fp->lens++;
845 }
846 if (fp->verify && (*p == '/' || t == 1))
847 {
848 if ((n = p - fp->decode.path))
849 *p = 0;
850 else
851 n = 1;
852 if (fp->verifyf)
853 n = (*fp->verifyf)(fp, fp->decode.path, n, fp->disc);
854 else if (stat(fp->decode.path, &st))
855 n = -1;
856 else if ((unsigned long)st.st_mtime > fp->stamp)
857 n = 1;
858 else
859 n = 0;
860 *p = '/';
861
862 /*
863 * n<0 skip this subtree
864 * n==0 keep as is
865 * n>0 read this dir now
866 */
867
868 /* NOT IMPLEMENTED YET */
869 }
870 if (FF_OK_TYPE(fp, t))
871 {
872 if (fp->decode.end)
873 {
874 if (*(s = p) == '/')
875 s--;
876 if (*fp->decode.pattern == '/' && b > fp->decode.path)
877 b--;
878 for (; s >= b; s--)
879 if (*s == *fp->decode.end || ignorecase && tolower(*s) == *fp->decode.end)
880 {
881 if (ignorecase)
882 for (e = fp->decode.end - 1, q = s - 1; *e && (*q == *e || tolower(*q) == *e); e--, q--);
883 else
884 for (e = fp->decode.end - 1, q = s - 1; *e && *q == *e; e--, q--);
885 if (!*e)
886 {
887 fp->decode.found = 1;
888 if (!fp->decode.match || strgrpmatch(fp->decode.path, fp->decode.pattern, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT|ignorecase))
889 {
890 fp->decode.peek = c;
891 if (*p == '/')
892 *(fp->decode.restore = p) = 0;
893 if (!fp->secure || !access(fp->decode.path, F_OK))
894 return fp->decode.path;
895 }
896 break;
897 }
898 }
899 }
900 else if (!fp->decode.match || !(n = regexec(&fp->decode.re, fp->decode.path, 0, NiL, 0)))
901 {
902 fp->decode.peek = c;
903 if (*p == '/' && p > fp->decode.path)
904 *(fp->decode.restore = p) = 0;
905 if (!fp->secure || !access(fp->decode.path, F_OK))
906 return fp->decode.path;
907 }
908 else if (n != REG_NOMATCH)
909 {
910 if (fp->disc->errorf)
911 {
912 regerror(n, &fp->decode.re, fp->decode.temp, sizeof(fp->decode.temp));
913 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s", fp->decode.pattern, fp->decode.temp);
914 }
915 return 0;
916 }
917 }
918 }
919 }
920
921 /*
922 * add path to the code table
923 * paths are assumed to be in sort order
924 */
925
926 int
findwrite(register Find_t * fp,const char * path,size_t len,const char * type)927 findwrite(register Find_t* fp, const char* path, size_t len, const char* type)
928 {
929 register unsigned char* s;
930 register unsigned char* e;
931 register unsigned char* p;
932 register int n;
933 register int d;
934 register Type_t* x;
935 register unsigned long u;
936
937 if (!fp->generate)
938 return -1;
939 if (type && fp->method == FF_dir)
940 {
941 len = sfsprintf(fp->encode.mark, sizeof(fp->encode.mark), "%-.*s/", len, path);
942 path = fp->encode.mark;
943 }
944 s = (unsigned char*)path;
945 if (len <= 0)
946 len = strlen(path);
947 if (len < sizeof(fp->encode.path))
948 e = s + len++;
949 else
950 {
951 len = sizeof(fp->encode.path) - 1;
952 e = s + len;
953 }
954 p = (unsigned char*)fp->encode.path;
955 while (s < e)
956 {
957 if (*s != *p++)
958 break;
959 s++;
960 }
961 n = s - (unsigned char*)path;
962 switch (fp->method)
963 {
964 case FF_gnu:
965 d = n - fp->encode.prefix;
966 if (d >= -127 && d <= 127)
967 sfputc(fp->fp, d & 0xff);
968 else
969 {
970 sfputc(fp->fp, 0x80);
971 sfputc(fp->fp, (d >> 8) & 0xff);
972 sfputc(fp->fp, d & 0xff);
973 }
974 fp->encode.prefix = n;
975 sfputr(fp->fp, (char*)s, 0);
976 break;
977 case FF_old:
978 sfprintf(fp->fp, "%ld", n - fp->encode.prefix + FF_OFF);
979 fp->encode.prefix = n;
980 sfputc(fp->fp, ' ');
981 p = s;
982 while (s < e)
983 {
984 n = *s++;
985 if (s >= e)
986 break;
987 fp->encode.code[n][*s++]++;
988 }
989 while (p < e)
990 {
991 if ((n = *p++) < FF_MIN || n >= FF_MAX)
992 n = '?';
993 sfputc(fp->fp, n);
994 }
995 sfputc(fp->fp, 0);
996 break;
997 case FF_typ:
998 if (type)
999 {
1000 type = (const char*)typefix((char*)fp->encode.bigram, sizeof(fp->encode.bigram), type);
1001 if (x = (Type_t*)dtmatch(fp->encode.namedict, type))
1002 u = x->index;
1003 else if (!(x = newof(0, Type_t, 1, strlen(type) + 1)))
1004 u = 0;
1005 else
1006 {
1007 u = x->index = ++fp->types;
1008 strcpy(x->name, type);
1009 dtinsert(fp->encode.namedict, x);
1010 dtinsert(fp->encode.indexdict, x);
1011 }
1012 }
1013 else
1014 u = 0;
1015 sfputu(fp->fp, u);
1016 /*FALLTHROUGH...*/
1017 case FF_dir:
1018 d = n - fp->encode.prefix;
1019 sfputl(fp->fp, d);
1020 fp->encode.prefix = n;
1021 sfputr(fp->fp, (char*)s, 0);
1022 break;
1023 }
1024 memcpy(fp->encode.path, path, len);
1025 return 0;
1026 }
1027
1028 /*
1029 * findsync() helper
1030 */
1031
1032 static int
finddone(register Find_t * fp)1033 finddone(register Find_t* fp)
1034 {
1035 int r;
1036
1037 if (sfsync(fp->fp))
1038 {
1039 if (fp->disc->errorf)
1040 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sfsync]", fp->encode.file);
1041 return -1;
1042 }
1043 if (sferror(fp->fp))
1044 {
1045 if (fp->disc->errorf)
1046 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sferror]", fp->encode.file);
1047 return -1;
1048 }
1049 r = sfclose(fp->fp);
1050 fp->fp = 0;
1051 if (r)
1052 {
1053 if (fp->disc->errorf)
1054 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sfclose]", fp->encode.file);
1055 return -1;
1056 }
1057 return 0;
1058 }
1059
1060 /*
1061 * finish the code table
1062 */
1063
1064 static int
findsync(register Find_t * fp)1065 findsync(register Find_t* fp)
1066 {
1067 register char* s;
1068 register int n;
1069 register int m;
1070 register int d;
1071 register Type_t* x;
1072 char* t;
1073 int b;
1074 long z;
1075 Sfio_t* sp;
1076
1077 switch (fp->method)
1078 {
1079 case FF_dir:
1080 case FF_gnu:
1081 /*
1082 * replace the real file with the temp file
1083 */
1084
1085 if (finddone(fp))
1086 goto bad;
1087 remove(fp->encode.file);
1088 if (rename(fp->encode.temp, fp->encode.file))
1089 {
1090 if (fp->disc->errorf)
1091 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot rename from tmp file %s", fp->encode.file, fp->encode.temp);
1092 remove(fp->encode.temp);
1093 return -1;
1094 }
1095 break;
1096 case FF_old:
1097 /*
1098 * determine the top FF_MAX bigrams
1099 */
1100
1101 for (n = 0; n < FF_MAX; n++)
1102 for (m = 0; m < FF_MAX; m++)
1103 fp->encode.hits[fp->encode.code[n][m]]++;
1104 fp->encode.hits[0] = 0;
1105 m = 1;
1106 for (n = USHRT_MAX; n >= 0; n--)
1107 if (d = fp->encode.hits[n])
1108 {
1109 fp->encode.hits[n] = m;
1110 if ((m += d) > FF_MAX)
1111 break;
1112 }
1113 while (--n >= 0)
1114 fp->encode.hits[n] = 0;
1115 for (n = FF_MAX - 1; n >= 0; n--)
1116 for (m = FF_MAX - 1; m >= 0; m--)
1117 if (fp->encode.hits[fp->encode.code[n][m]])
1118 {
1119 d = fp->encode.code[n][m];
1120 b = fp->encode.hits[d] - 1;
1121 fp->encode.code[n][m] = b + FF_MAX;
1122 if (fp->encode.hits[d]++ >= FF_MAX)
1123 fp->encode.hits[d] = 0;
1124 fp->encode.bigram[b *= 2] = n;
1125 fp->encode.bigram[b + 1] = m;
1126 }
1127 else
1128 fp->encode.code[n][m] = 0;
1129
1130 /*
1131 * commit the real file
1132 */
1133
1134 if (sfseek(fp->fp, (Sfoff_t)0, SEEK_SET))
1135 {
1136 if (fp->disc->errorf)
1137 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "cannot rewind tmp file");
1138 return -1;
1139 }
1140 if (!(sp = sfopen(NiL, fp->encode.file, "w")))
1141 goto badcreate;
1142
1143 /*
1144 * dump the bigrams
1145 */
1146
1147 sfwrite(sp, fp->encode.bigram, sizeof(fp->encode.bigram));
1148
1149 /*
1150 * encode the massaged paths
1151 */
1152
1153 while (s = sfgetr(fp->fp, 0, 0))
1154 {
1155 z = strtol(s, &t, 0);
1156 s = t;
1157 if (z < 0 || z > 2 * FF_OFF)
1158 {
1159 sfputc(sp, FF_ESC);
1160 sfputc(sp, (z >> 24));
1161 sfputc(sp, (z >> 16));
1162 sfputc(sp, (z >> 8));
1163 sfputc(sp, z);
1164 }
1165 else
1166 sfputc(sp, z);
1167 while (n = *s++)
1168 {
1169 if (!(m = *s++))
1170 {
1171 sfputc(sp, n);
1172 break;
1173 }
1174 if (d = fp->encode.code[n][m])
1175 sfputc(sp, d);
1176 else
1177 {
1178 sfputc(sp, n);
1179 sfputc(sp, m);
1180 }
1181 }
1182 }
1183 sfclose(fp->fp);
1184 fp->fp = sp;
1185 if (finddone(fp))
1186 goto bad;
1187 break;
1188 case FF_typ:
1189 if (finddone(fp))
1190 goto bad;
1191 if (!(fp->fp = sfopen(NiL, fp->encode.temp, "r")))
1192 {
1193 if (fp->disc->errorf)
1194 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot read tmp file", fp->encode.temp);
1195 remove(fp->encode.temp);
1196 return -1;
1197 }
1198
1199 /*
1200 * commit the output file
1201 */
1202
1203 if (!(sp = sfopen(NiL, fp->encode.file, "w")))
1204 goto badcreate;
1205
1206 /*
1207 * write the header magic
1208 */
1209
1210 sfputc(sp, 0);
1211 sfputr(sp, FF_typ_magic, 0);
1212
1213 /*
1214 * write the type table in index order starting with 1
1215 */
1216
1217 for (x = (Type_t*)dtfirst(fp->encode.indexdict); x; x = (Type_t*)dtnext(fp->encode.indexdict, x))
1218 sfputr(sp, x->name, 0);
1219 sfputc(sp, 0);
1220
1221 /*
1222 * append the front compressed strings
1223 */
1224
1225 if (sfmove(fp->fp, sp, SF_UNBOUND, -1) < 0 || !sfeof(fp->fp))
1226 {
1227 sfclose(sp);
1228 if (fp->disc->errorf)
1229 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot append codes", fp->encode.file);
1230 goto bad;
1231 }
1232 sfclose(fp->fp);
1233 fp->fp = sp;
1234 if (finddone(fp))
1235 goto bad;
1236 remove(fp->encode.temp);
1237 break;
1238 }
1239 return 0;
1240 badcreate:
1241 if (fp->disc->errorf)
1242 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot write codes", fp->encode.file);
1243 bad:
1244 if (fp->fp)
1245 {
1246 sfclose(fp->fp);
1247 fp->fp = 0;
1248 }
1249 remove(fp->encode.temp);
1250 return -1;
1251 }
1252
1253 /*
1254 * close an open fastfind stream
1255 */
1256
1257 int
findclose(register Find_t * fp)1258 findclose(register Find_t* fp)
1259 {
1260 int n = 0;
1261
1262 if (!fp)
1263 return -1;
1264 if (fp->generate)
1265 {
1266 n = findsync(fp);
1267 if (fp->encode.indexdict)
1268 dtclose(fp->encode.indexdict);
1269 if (fp->encode.namedict)
1270 dtclose(fp->encode.namedict);
1271 }
1272 else
1273 {
1274 if (fp->decode.match)
1275 regfree(&fp->decode.re);
1276 n = 0;
1277 }
1278 if (fp->fp)
1279 sfclose(fp->fp);
1280 vmclose(fp->vm);
1281 return n;
1282 }
1283