1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1985-2008 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * Phong Vo <kpv@research.att.com> * 20 * * 21 ***********************************************************************/ 22 #pragma prototyped 23 /* 24 * original code 25 * 26 * James A. Woods, Informatics General Corporation, 27 * NASA Ames Research Center, 6/81. 28 * Usenix ;login:, February/March, 1983, p. 8. 29 * 30 * discipline/method interface 31 * 32 * Glenn Fowler 33 * AT&T Research 34 * modified from the original BSD source 35 * 36 * 'fastfind' scans a file list for the full pathname of a file 37 * given only a piece of the name. The list is processed with 38 * with "front-compression" and bigram coding. Front compression reduces 39 * space by a factor of 4-5, bigram coding by a further 20-25%. 40 * 41 * there are 4 methods: 42 * 43 * FF_old original with 7 bit bigram encoding (no magic) 44 * FF_gnu 8 bit clean front compression (FF_gnu_magic) 45 * FF_dir FF_gnu with sfgetl/sfputl and trailing / on dirs (FF_dir_magic) 46 * FF_typ FF_dir with (mime) types (FF_typ_magic) 47 * 48 * the bigram encoding steals the eighth bit (that's why its FF_old) 49 * maybe one day we'll limit it to readonly: 50 * 51 * 0-2*FF_OFF likeliest differential counts + offset to make nonnegative 52 * FF_ESC 4 byte big-endian out-of-range count+FF_OFF follows 53 * FF_MIN-FF_MAX ascii residue 54 * >=FF_MAX bigram codes 55 * 56 * a two-tiered string search technique is employed 57 * 58 * a metacharacter-free subpattern and partial pathname is matched 59 * backwards to avoid full expansion of the pathname list 60 * 61 * then the actual shell glob-style regular expression (if in this form) 62 * is matched against the candidate pathnames using the slower regexec() 63 * 64 * The original BSD code is covered by the BSD license: 65 * 66 * Copyright (c) 1985, 1993, 1999 67 * The Regents of the University of California. All rights reserved. 68 * 69 * Redistribution and use in source and binary forms, with or without 70 * modification, are permitted provided that the following conditions 71 * are met: 72 * 1. Redistributions of source code must retain the above copyright 73 * notice, this list of conditions and the following disclaimer. 74 * 2. Redistributions in binary form must reproduce the above copyright 75 * notice, this list of conditions and the following disclaimer in the 76 * documentation and/or other materials provided with the distribution. 77 * 3. Neither the name of the University nor the names of its contributors 78 * may be used to endorse or promote products derived from this software 79 * without specific prior written permission. 80 * 81 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 82 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 83 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 84 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 85 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 86 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 87 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 88 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 89 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 90 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 91 * SUCH DAMAGE. 92 */ 93 94 static const char id[] = "\n@(#)$Id: fastfind (AT&T Research) 2002-10-02 $\0\n"; 95 96 static const char lib[] = "libast:fastfind"; 97 98 #include "findlib.h" 99 100 #define FIND_MATCH "*/(find|locate)/*" 101 102 /* 103 * this db could be anywhere 104 * findcodes[] directories are checked for findnames[i] 105 */ 106 107 static char* findcodes[] = 108 { 109 0, 110 0, 111 FIND_CODES, 112 "/usr/local/share/lib", 113 "/usr/local/lib", 114 "/usr/share/lib", 115 "/usr/lib", 116 "/var/spool", 117 "/usr/local/var", 118 "/var/lib", 119 "/var/lib/slocate", 120 "/var/db", 121 }; 122 123 static char* findnames[] = 124 { 125 "find/codes", 126 "find/find.codes", 127 "locate/locatedb", 128 "locatedb", 129 "locate.database", 130 "slocate.db", 131 }; 132 133 /* 134 * convert t to lower case and drop leading x- and x- after / 135 * converted value copied to b of size n 136 */ 137 138 char* 139 typefix(char* buf, size_t n, register const char* t) 140 { 141 register int c; 142 register char* b = buf; 143 144 if ((*t == 'x' || *t == 'X') && *(t + 1) == '-') 145 t += 2; 146 while (c = *t++) 147 { 148 if (isupper(c)) 149 c = tolower(c); 150 if ((*b++ = c) == '/' && (*t == 'x' || *t == 'X') && *(t + 1) == '-') 151 t += 2; 152 } 153 *b = 0; 154 return buf; 155 } 156 157 /* 158 * return a fastfind stream handle for pattern 159 */ 160 161 Find_t* 162 findopen(const char* file, const char* pattern, const char* type, Finddisc_t* disc) 163 { 164 register Find_t* fp; 165 register char* p; 166 register char* s; 167 register char* b; 168 register int i; 169 register int j; 170 char* path; 171 int brace = 0; 172 int paren = 0; 173 int k; 174 int q; 175 int fd; 176 int uid; 177 Vmalloc_t* vm; 178 Type_t* tp; 179 struct stat st; 180 181 182 if (!(vm = vmopen(Vmdcheap, Vmbest, 0))) 183 goto nospace; 184 185 /* 186 * NOTE: searching for FIND_CODES would be much simpler if we 187 * just stuck with our own, but we also support GNU 188 * locate codes and have to search for the one of a 189 * bazillion possible names for that file 190 */ 191 192 if (!findcodes[1]) 193 findcodes[1] = getenv(FIND_CODES_ENV); 194 if (disc->flags & FIND_GENERATE) 195 { 196 if (!(fp = (Find_t*)vmnewof(vm, 0, Find_t, 1, sizeof(Encode_t) - sizeof(Code_t)))) 197 goto nospace; 198 fp->vm = vm; 199 fp->id = lib; 200 fp->disc = disc; 201 fp->generate = 1; 202 if (file && (!*file || streq(file, "-"))) 203 file = 0; 204 uid = geteuid(); 205 j = (findcodes[0] = (char*)file) && *file == '/' ? 1 : elementsof(findcodes); 206 207 /* 208 * look for the codes file, but since it may not exist yet, 209 * also look for the containing directory if i<2 or if 210 * it is sufficiently qualified (FIND_MATCH) 211 */ 212 213 for (i = 0; i < j; i++) 214 if (path = findcodes[i]) 215 { 216 if (*path == '/') 217 { 218 if (!stat(path, &st)) 219 { 220 if (S_ISDIR(st.st_mode)) 221 { 222 for (k = 0; k < elementsof(findnames); k++) 223 { 224 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s/%s", path, findnames[k]); 225 if (!eaccess(fp->encode.file, R_OK|W_OK)) 226 { 227 path = fp->encode.file; 228 break; 229 } 230 if (strchr(findnames[k], '/') && (b = strrchr(fp->encode.file, '/'))) 231 { 232 *b = 0; 233 if (!stat(fp->encode.file, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR)) 234 { 235 *b = '/'; 236 path = fp->encode.file; 237 break; 238 } 239 } 240 } 241 if (k < elementsof(findnames)) 242 break; 243 } 244 else if (st.st_uid == uid && (st.st_mode & S_IWUSR)) 245 { 246 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s", path); 247 path = fp->encode.file; 248 break; 249 } 250 } 251 else if (i < 2 || strmatch(path, FIND_MATCH)) 252 { 253 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s", path); 254 if (b = strrchr(fp->encode.file, '/')) 255 { 256 *b = 0; 257 if (!stat(fp->encode.file, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR)) 258 { 259 *b = '/'; 260 path = fp->encode.file; 261 break; 262 } 263 } 264 } 265 } 266 else if (pathpath(fp->encode.file, path, "", PATH_REGULAR|PATH_READ|PATH_WRITE)) 267 { 268 path = fp->encode.file; 269 break; 270 } 271 else if (b = strrchr(path, '/')) 272 { 273 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%-.*s", b - path, path); 274 if (pathpath(fp->encode.temp, fp->encode.file, "", PATH_EXECUTE|PATH_READ|PATH_WRITE) && 275 !stat(fp->encode.temp, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR)) 276 { 277 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s%s", fp->encode.temp, b); 278 path = fp->encode.file; 279 break; 280 } 281 } 282 } 283 if (i >= j) 284 { 285 if (fp->disc->errorf) 286 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot locate codes", file ? file : findcodes[2]); 287 goto drop; 288 } 289 if (fp->disc->flags & FIND_OLD) 290 { 291 /* 292 * FF_old generates temp data that is read 293 * in a second pass to generate the real codes 294 */ 295 296 fp->method = FF_old; 297 if (!(fp->fp = sftmp(32 * PATH_MAX))) 298 { 299 if (fp->disc->errorf) 300 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "cannot create tmp file"); 301 goto drop; 302 } 303 } 304 else 305 { 306 /* 307 * the rest generate into a temp file that 308 * is simply renamed on completion 309 */ 310 311 if (s = strrchr(path, '/')) 312 { 313 *s = 0; 314 p = path; 315 } 316 else 317 p = "."; 318 if (!pathtemp(fp->encode.temp, sizeof(fp->encode.temp), p, "ff", &fd)) 319 { 320 if (fp->disc->errorf) 321 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot create tmp file in this directory", p ? p : "."); 322 goto drop; 323 } 324 if (s) 325 *s = '/'; 326 if (!(fp->fp = sfnew(NiL, NiL, (size_t)SF_UNBOUND, fd, SF_WRITE))) 327 { 328 if (fp->disc->errorf) 329 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot open tmp file", fp->encode.temp); 330 close(fd); 331 goto drop; 332 } 333 if (fp->disc->flags & FIND_TYPE) 334 { 335 fp->method = FF_typ; 336 fp->encode.namedisc.key = offsetof(Type_t, name); 337 fp->encode.namedisc.link = offsetof(Type_t, byname); 338 fp->encode.indexdisc.key = offsetof(Type_t, index); 339 fp->encode.indexdisc.size = sizeof(unsigned long); 340 fp->encode.indexdisc.link = offsetof(Type_t, byindex); 341 s = "system/dir"; 342 if (!(fp->encode.namedict = dtopen(&fp->encode.namedisc, Dttree)) || !(fp->encode.indexdict = dtopen(&fp->encode.indexdisc, Dttree)) || !(tp = newof(0, Type_t, 1, strlen(s) + 1))) 343 { 344 if (fp->encode.namedict) 345 dtclose(fp->encode.namedict); 346 if (fp->disc->errorf) 347 (*fp->disc->errorf)(fp, fp->disc, 2, "cannot allocate type table"); 348 goto drop; 349 } 350 351 /* 352 * type index 1 is always system/dir 353 */ 354 355 tp->index = ++fp->types; 356 strcpy(tp->name, s); 357 dtinsert(fp->encode.namedict, tp); 358 dtinsert(fp->encode.indexdict, tp); 359 } 360 else if (fp->disc->flags & FIND_GNU) 361 { 362 fp->method = FF_gnu; 363 sfputc(fp->fp, 0); 364 sfputr(fp->fp, FF_gnu_magic, 0); 365 } 366 else 367 { 368 fp->method = FF_dir; 369 sfputc(fp->fp, 0); 370 sfputr(fp->fp, FF_dir_magic, 0); 371 } 372 } 373 } 374 else 375 { 376 i = sizeof(Decode_t) + sizeof(Code_t); 377 if (!pattern || !*pattern) 378 pattern = "*"; 379 i += (j = 2 * (strlen(pattern) + 1)); 380 if (!(fp = (Find_t*)vmnewof(vm, 0, Find_t, 1, i))) 381 { 382 vmclose(vm); 383 return 0; 384 } 385 fp->vm = vm; 386 fp->id = lib; 387 fp->disc = disc; 388 if (disc->flags & FIND_ICASE) 389 fp->decode.ignorecase = 1; 390 j = (findcodes[0] = (char*)file) && *file == '/' ? 1 : elementsof(findcodes); 391 for (i = 0; i < j; i++) 392 if (path = findcodes[i]) 393 { 394 if (*path == '/') 395 { 396 if (!stat(path, &st)) 397 { 398 if (S_ISDIR(st.st_mode)) 399 { 400 for (k = 0; k < elementsof(findnames); k++) 401 { 402 sfsprintf(fp->decode.path, sizeof(fp->decode.path), "%s/%s", path, findnames[k]); 403 if (fp->fp = sfopen(NiL, fp->decode.path, "r")) 404 { 405 path = fp->decode.path; 406 break; 407 } 408 } 409 if (fp->fp) 410 break; 411 } 412 else if (fp->fp = sfopen(NiL, path, "r")) 413 break; 414 } 415 } 416 else if ((path = pathpath(fp->decode.path, path, "", PATH_REGULAR|PATH_READ)) && (fp->fp = sfopen(NiL, path, "r"))) 417 break; 418 } 419 if (!fp->fp) 420 { 421 if (fp->disc->errorf) 422 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot locate codes", file ? file : findcodes[2]); 423 goto drop; 424 } 425 if (fstat(sffileno(fp->fp), &st)) 426 { 427 if (fp->disc->errorf) 428 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot stat codes", path); 429 goto drop; 430 } 431 if (fp->secure = ((st.st_mode & (S_IRGRP|S_IROTH)) == S_IRGRP) && st.st_gid == getegid() && getegid() != getgid()) 432 setgid(getgid()); 433 fp->stamp = st.st_mtime; 434 b = (s = fp->decode.temp) + 1; 435 for (i = 0; i < elementsof(fp->decode.bigram1); i++) 436 { 437 if ((j = sfgetc(fp->fp)) == EOF) 438 goto invalid; 439 if (!(*s++ = fp->decode.bigram1[i] = j) && i) 440 { 441 i = -i; 442 break; 443 } 444 if ((j = sfgetc(fp->fp)) == EOF) 445 goto invalid; 446 if (!(*s++ = fp->decode.bigram2[i] = j) && (i || fp->decode.bigram1[0] >= '0' && fp->decode.bigram1[0] <= '1')) 447 break; 448 } 449 if (streq(b, FF_typ_magic)) 450 { 451 if (type) 452 { 453 type = (const char*)typefix(fp->decode.bigram2, sizeof(fp->decode.bigram2), type); 454 memset(fp->decode.bigram1, 0, sizeof(fp->decode.bigram1)); 455 } 456 fp->method = FF_typ; 457 for (j = 0, i = 1;; i++) 458 { 459 if (!(s = sfgetr(fp->fp, 0, 0))) 460 goto invalid; 461 if (!*s) 462 break; 463 if (type && strmatch(s, type)) 464 { 465 FF_SET_TYPE(fp, i); 466 j++; 467 } 468 } 469 if (type && !j) 470 goto drop; 471 fp->types = j; 472 } 473 else if (streq(b, FF_dir_magic)) 474 fp->method = FF_dir; 475 else if (streq(b, FF_gnu_magic)) 476 fp->method = FF_gnu; 477 else if (!*b && *--b >= '0' && *b <= '1') 478 { 479 fp->method = FF_gnu; 480 while (j = sfgetc(fp->fp)) 481 { 482 if (j == EOF || fp->decode.count >= sizeof(fp->decode.path)) 483 goto invalid; 484 fp->decode.path[fp->decode.count++] = j; 485 } 486 } 487 else 488 { 489 fp->method = FF_old; 490 if (i < 0) 491 { 492 if ((j = sfgetc(fp->fp)) == EOF) 493 goto invalid; 494 fp->decode.bigram2[i = -i] = j; 495 } 496 while (++i < elementsof(fp->decode.bigram1)) 497 { 498 if ((j = sfgetc(fp->fp)) == EOF) 499 goto invalid; 500 fp->decode.bigram1[i] = j; 501 if ((j = sfgetc(fp->fp)) == EOF) 502 goto invalid; 503 fp->decode.bigram2[i] = j; 504 } 505 if ((fp->decode.peek = sfgetc(fp->fp)) != FF_OFF) 506 goto invalid; 507 } 508 509 /* 510 * set up the physical dir table 511 */ 512 513 if (disc->version >= 19980301L) 514 { 515 fp->verifyf = disc->verifyf; 516 if (disc->dirs && *disc->dirs) 517 { 518 for (k = 0; disc->dirs[k]; k++); 519 if (k == 1 && streq(disc->dirs[0], "/")) 520 k = 0; 521 if (k) 522 { 523 if (!(fp->dirs = vmnewof(fp->vm, 0, char*, 2 * k + 1, 0))) 524 goto drop; 525 if (!(fp->lens = vmnewof(fp->vm, 0, int, 2 * k, 0))) 526 goto drop; 527 p = 0; 528 b = fp->decode.temp; 529 j = fp->method == FF_old || fp->method == FF_gnu; 530 531 /* 532 * fill the dir list with logical and 533 * physical names since we don't know 534 * which way the db was encoded (it 535 * could be *both* ways) 536 */ 537 538 for (i = q = 0; i < k; i++) 539 { 540 if (*(s = disc->dirs[i]) == '/') 541 sfsprintf(b, sizeof(fp->decode.temp) - 1, "%s", s); 542 else if (!p && !(p = getcwd(fp->decode.path, sizeof(fp->decode.path)))) 543 goto nospace; 544 else 545 sfsprintf(b, sizeof(fp->decode.temp) - 1, "%s/%s", p, s); 546 s = pathcanon(b, 0); 547 *s = '/'; 548 *(s + 1) = 0; 549 if (!(fp->dirs[q] = vmstrdup(fp->vm, b))) 550 goto nospace; 551 if (j) 552 (fp->dirs[q])[s - b] = 0; 553 q++; 554 *s = 0; 555 s = pathcanon(b, PATH_PHYSICAL); 556 *s = '/'; 557 *(s + 1) = 0; 558 if (!strneq(b, fp->dirs[q - 1], s - b)) 559 { 560 if (!(fp->dirs[q] = vmstrdup(fp->vm, b))) 561 goto nospace; 562 if (j) 563 (fp->dirs[q])[s - b] = 0; 564 q++; 565 } 566 } 567 strsort(fp->dirs, q, strcasecmp); 568 for (i = 0; i < q; i++) 569 fp->lens[i] = strlen(fp->dirs[i]); 570 } 571 } 572 } 573 if (fp->verifyf || (disc->flags & FIND_VERIFY)) 574 { 575 if (fp->method != FF_dir && fp->method != FF_typ) 576 { 577 if (fp->disc->errorf) 578 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s code format does not support directory verification", path, fp->method == FF_gnu ? FF_gnu_magic : "OLD-BIGRAM"); 579 goto drop; 580 } 581 fp->verify = 1; 582 } 583 584 /* 585 * extract last glob-free subpattern in name for fast pre-match 586 * prepend 0 for backwards match 587 */ 588 589 if (p = s = (char*)pattern) 590 { 591 b = fp->decode.pattern; 592 for (;;) 593 { 594 switch (*b++ = *p++) 595 { 596 case 0: 597 break; 598 case '\\': 599 s = p; 600 if (!*p++) 601 break; 602 continue; 603 case '[': 604 if (!brace) 605 { 606 brace++; 607 if (*p == ']') 608 p++; 609 } 610 continue; 611 case ']': 612 if (brace) 613 { 614 brace--; 615 s = p; 616 } 617 continue; 618 case '(': 619 if (!brace) 620 paren++; 621 continue; 622 case ')': 623 if (!brace && paren > 0 && !--paren) 624 s = p; 625 continue; 626 case '|': 627 case '&': 628 if (!brace && !paren) 629 { 630 s = ""; 631 break; 632 } 633 continue; 634 case '*': 635 case '?': 636 s = p; 637 continue; 638 default: 639 continue; 640 } 641 break; 642 } 643 if (s != pattern && !streq(pattern, "*")) 644 { 645 fp->decode.match = 1; 646 if (i = regcomp(&fp->decode.re, pattern, REG_SHELL|REG_AUGMENTED|(fp->decode.ignorecase?REG_ICASE:0))) 647 { 648 if (disc->errorf) 649 { 650 regerror(i, &fp->decode.re, fp->decode.temp, sizeof(fp->decode.temp)); 651 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s", pattern, fp->decode.temp); 652 } 653 goto drop; 654 } 655 } 656 if (*s) 657 { 658 *b++ = 0; 659 while (i = *s++) 660 *b++ = i; 661 *b-- = 0; 662 fp->decode.end = b; 663 if (fp->decode.ignorecase) 664 for (s = fp->decode.pattern; s <= b; s++) 665 if (isupper(*s)) 666 *s = tolower(*s); 667 } 668 } 669 } 670 return fp; 671 nospace: 672 if (disc->errorf) 673 (*fp->disc->errorf)(fp, fp->disc, 2, "out of space"); 674 if (!vm) 675 return 0; 676 if (!fp) 677 { 678 vmclose(vm); 679 return 0; 680 } 681 goto drop; 682 invalid: 683 if (fp->disc->errorf) 684 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: invalid codes", path); 685 drop: 686 if (!fp->generate && fp->decode.match) 687 regfree(&fp->decode.re); 688 if (fp->fp) 689 sfclose(fp->fp); 690 vmclose(fp->vm); 691 return 0; 692 } 693 694 /* 695 * return the next fastfind path 696 * 0 returned when list exhausted 697 */ 698 699 char* 700 findread(register Find_t* fp) 701 { 702 register char* p; 703 register char* q; 704 register char* s; 705 register char* b; 706 register char* e; 707 register int c; 708 register int n; 709 register int m; 710 int ignorecase; 711 int t; 712 unsigned char w[4]; 713 struct stat st; 714 715 if (fp->generate) 716 return 0; 717 if (fp->decode.restore) 718 { 719 *fp->decode.restore = '/'; 720 fp->decode.restore = 0; 721 } 722 ignorecase = fp->decode.ignorecase ? STR_ICASE : 0; 723 c = fp->decode.peek; 724 next: 725 for (;;) 726 { 727 switch (fp->method) 728 { 729 case FF_dir: 730 t = 0; 731 n = sfgetl(fp->fp); 732 goto grab; 733 case FF_gnu: 734 if ((c = sfgetc(fp->fp)) == EOF) 735 return 0; 736 if (c == 0x80) 737 { 738 if ((c = sfgetc(fp->fp)) == EOF) 739 return 0; 740 n = c << 8; 741 if ((c = sfgetc(fp->fp)) == EOF) 742 return 0; 743 n |= c; 744 if (n & 0x8000) 745 n = (n - 0xffff) - 1; 746 } 747 else if ((n = c) & 0x80) 748 n = (n - 0xff) - 1; 749 t = 0; 750 goto grab; 751 case FF_typ: 752 t = sfgetu(fp->fp); 753 n = sfgetl(fp->fp); 754 grab: 755 p = fp->decode.path + (fp->decode.count += n); 756 do 757 { 758 if ((c = sfgetc(fp->fp)) == EOF) 759 return 0; 760 } while (*p++ = c); 761 p -= 2; 762 break; 763 case FF_old: 764 if (c == EOF) 765 { 766 fp->decode.peek = c; 767 return 0; 768 } 769 if (c == FF_ESC) 770 { 771 if (sfread(fp->fp, w, sizeof(w)) != sizeof(w)) 772 return 0; 773 if (fp->decode.swap >= 0) 774 { 775 c = (int32_t)((w[0] << 24) | (w[1] << 16) | (w[2] << 8) | w[3]); 776 if (!fp->decode.swap) 777 { 778 /* 779 * the old format uses machine 780 * byte order; this test uses 781 * the smallest magnitude of 782 * both byte orders on the 783 * first encoded path motion 784 * to determine the original 785 * byte order 786 */ 787 788 m = c; 789 if (m < 0) 790 m = -m; 791 n = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]); 792 if (n < 0) 793 n = -n; 794 if (m < n) 795 fp->decode.swap = 1; 796 else 797 { 798 fp->decode.swap = -1; 799 c = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]); 800 } 801 } 802 } 803 else 804 c = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]); 805 } 806 fp->decode.count += c - FF_OFF; 807 for (p = fp->decode.path + fp->decode.count; (c = sfgetc(fp->fp)) > FF_ESC;) 808 if (c & (1<<(CHAR_BIT-1))) 809 { 810 *p++ = fp->decode.bigram1[c & ((1<<(CHAR_BIT-1))-1)]; 811 *p++ = fp->decode.bigram2[c & ((1<<(CHAR_BIT-1))-1)]; 812 } 813 else 814 *p++ = c; 815 *p-- = 0; 816 t = 0; 817 break; 818 } 819 b = fp->decode.path; 820 if (fp->decode.found) 821 fp->decode.found = 0; 822 else 823 b += fp->decode.count; 824 if (fp->dirs) 825 for (;;) 826 { 827 if (!*fp->dirs) 828 return 0; 829 830 /* 831 * use the ordering and lengths to prune 832 * comparison function calls 833 * (*fp->dirs)[*fp->lens]=='/' if its 834 * already been matched 835 */ 836 837 if ((n = p - fp->decode.path + 1) > (m = *fp->lens)) 838 { 839 if (!(*fp->dirs)[m]) 840 goto next; 841 if (!strncasecmp(*fp->dirs, fp->decode.path, m)) 842 break; 843 } 844 else if (n == m) 845 { 846 if (!(*fp->dirs)[m]) 847 { 848 if (!(n = strcasecmp(*fp->dirs, fp->decode.path)) && (ignorecase || !strcmp(*fp->dirs, fp->decode.path))) 849 { 850 if (m > 0) 851 { 852 (*fp->dirs)[m] = '/'; 853 if ((*fp->dirs)[m - 1] != '/') 854 (*fp->dirs)[++(*fp->lens)] = '/'; 855 } 856 break; 857 } 858 if (n >= 0) 859 goto next; 860 } 861 } 862 else if (!(*fp->dirs)[m]) 863 goto next; 864 fp->dirs++; 865 fp->lens++; 866 } 867 if (fp->verify && (*p == '/' || t == 1)) 868 { 869 if ((n = p - fp->decode.path)) 870 *p = 0; 871 else 872 n = 1; 873 if (fp->verifyf) 874 n = (*fp->verifyf)(fp, fp->decode.path, n, fp->disc); 875 else if (stat(fp->decode.path, &st)) 876 n = -1; 877 else if ((unsigned long)st.st_mtime > fp->stamp) 878 n = 1; 879 else 880 n = 0; 881 *p = '/'; 882 883 /* 884 * n<0 skip this subtree 885 * n==0 keep as is 886 * n>0 read this dir now 887 */ 888 889 /* NOT IMPLEMENTED YET */ 890 } 891 if (FF_OK_TYPE(fp, t)) 892 { 893 if (fp->decode.end) 894 { 895 if (*(s = p) == '/') 896 s--; 897 if (*fp->decode.pattern == '/' && b > fp->decode.path) 898 b--; 899 for (; s >= b; s--) 900 if (*s == *fp->decode.end || ignorecase && tolower(*s) == *fp->decode.end) 901 { 902 if (ignorecase) 903 for (e = fp->decode.end - 1, q = s - 1; *e && (*q == *e || tolower(*q) == *e); e--, q--); 904 else 905 for (e = fp->decode.end - 1, q = s - 1; *e && *q == *e; e--, q--); 906 if (!*e) 907 { 908 fp->decode.found = 1; 909 if (!fp->decode.match || strgrpmatch(fp->decode.path, fp->decode.pattern, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT|ignorecase)) 910 { 911 fp->decode.peek = c; 912 if (*p == '/') 913 *(fp->decode.restore = p) = 0; 914 if (!fp->secure || !access(fp->decode.path, F_OK)) 915 return fp->decode.path; 916 } 917 break; 918 } 919 } 920 } 921 else if (!fp->decode.match || !(n = regexec(&fp->decode.re, fp->decode.path, 0, NiL, 0))) 922 { 923 fp->decode.peek = c; 924 if (*p == '/' && p > fp->decode.path) 925 *(fp->decode.restore = p) = 0; 926 if (!fp->secure || !access(fp->decode.path, F_OK)) 927 return fp->decode.path; 928 } 929 else if (n != REG_NOMATCH) 930 { 931 if (fp->disc->errorf) 932 { 933 regerror(n, &fp->decode.re, fp->decode.temp, sizeof(fp->decode.temp)); 934 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s", fp->decode.pattern, fp->decode.temp); 935 } 936 return 0; 937 } 938 } 939 } 940 } 941 942 /* 943 * add path to the code table 944 * paths are assumed to be in sort order 945 */ 946 947 int 948 findwrite(register Find_t* fp, const char* path, size_t len, const char* type) 949 { 950 register unsigned char* s; 951 register unsigned char* e; 952 register unsigned char* p; 953 register int n; 954 register int d; 955 register Type_t* x; 956 register unsigned long u; 957 958 if (!fp->generate) 959 return -1; 960 if (type && fp->method == FF_dir) 961 { 962 len = sfsprintf(fp->encode.mark, sizeof(fp->encode.mark), "%-.*s/", len, path); 963 path = fp->encode.mark; 964 } 965 s = (unsigned char*)path; 966 if (len <= 0) 967 len = strlen(path); 968 if (len < sizeof(fp->encode.path)) 969 e = s + len++; 970 else 971 { 972 len = sizeof(fp->encode.path) - 1; 973 e = s + len; 974 } 975 p = (unsigned char*)fp->encode.path; 976 while (s < e) 977 { 978 if (*s != *p++) 979 break; 980 s++; 981 } 982 n = s - (unsigned char*)path; 983 switch (fp->method) 984 { 985 case FF_gnu: 986 d = n - fp->encode.prefix; 987 if (d >= -127 && d <= 127) 988 sfputc(fp->fp, d & 0xff); 989 else 990 { 991 sfputc(fp->fp, 0x80); 992 sfputc(fp->fp, (d >> 8) & 0xff); 993 sfputc(fp->fp, d & 0xff); 994 } 995 fp->encode.prefix = n; 996 sfputr(fp->fp, (char*)s, 0); 997 break; 998 case FF_old: 999 sfprintf(fp->fp, "%ld", n - fp->encode.prefix + FF_OFF); 1000 fp->encode.prefix = n; 1001 sfputc(fp->fp, ' '); 1002 p = s; 1003 while (s < e) 1004 { 1005 n = *s++; 1006 if (s >= e) 1007 break; 1008 fp->encode.code[n][*s++]++; 1009 } 1010 while (p < e) 1011 { 1012 if ((n = *p++) < FF_MIN || n >= FF_MAX) 1013 n = '?'; 1014 sfputc(fp->fp, n); 1015 } 1016 sfputc(fp->fp, 0); 1017 break; 1018 case FF_typ: 1019 if (type) 1020 { 1021 type = (const char*)typefix((char*)fp->encode.bigram, sizeof(fp->encode.bigram), type); 1022 if (x = (Type_t*)dtmatch(fp->encode.namedict, type)) 1023 u = x->index; 1024 else if (!(x = newof(0, Type_t, 1, strlen(type) + 1))) 1025 u = 0; 1026 else 1027 { 1028 u = x->index = ++fp->types; 1029 strcpy(x->name, type); 1030 dtinsert(fp->encode.namedict, x); 1031 dtinsert(fp->encode.indexdict, x); 1032 } 1033 } 1034 else 1035 u = 0; 1036 sfputu(fp->fp, u); 1037 /*FALLTHROUGH...*/ 1038 case FF_dir: 1039 d = n - fp->encode.prefix; 1040 sfputl(fp->fp, d); 1041 fp->encode.prefix = n; 1042 sfputr(fp->fp, (char*)s, 0); 1043 break; 1044 } 1045 memcpy(fp->encode.path, path, len); 1046 return 0; 1047 } 1048 1049 /* 1050 * findsync() helper 1051 */ 1052 1053 static int 1054 finddone(register Find_t* fp) 1055 { 1056 int r; 1057 1058 if (sfsync(fp->fp)) 1059 { 1060 if (fp->disc->errorf) 1061 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sfsync]", fp->encode.file); 1062 return -1; 1063 } 1064 if (sferror(fp->fp)) 1065 { 1066 if (fp->disc->errorf) 1067 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sferror]", fp->encode.file); 1068 return -1; 1069 } 1070 r = sfclose(fp->fp); 1071 fp->fp = 0; 1072 if (r) 1073 { 1074 if (fp->disc->errorf) 1075 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sfclose]", fp->encode.file); 1076 return -1; 1077 } 1078 return 0; 1079 } 1080 1081 /* 1082 * finish the code table 1083 */ 1084 1085 static int 1086 findsync(register Find_t* fp) 1087 { 1088 register char* s; 1089 register int n; 1090 register int m; 1091 register int d; 1092 register Type_t* x; 1093 char* t; 1094 int b; 1095 long z; 1096 Sfio_t* sp; 1097 1098 switch (fp->method) 1099 { 1100 case FF_dir: 1101 case FF_gnu: 1102 /* 1103 * replace the real file with the temp file 1104 */ 1105 1106 if (finddone(fp)) 1107 goto bad; 1108 remove(fp->encode.file); 1109 if (rename(fp->encode.temp, fp->encode.file)) 1110 { 1111 if (fp->disc->errorf) 1112 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot rename from tmp file %s", fp->encode.file, fp->encode.temp); 1113 remove(fp->encode.temp); 1114 return -1; 1115 } 1116 break; 1117 case FF_old: 1118 /* 1119 * determine the top FF_MAX bigrams 1120 */ 1121 1122 for (n = 0; n < FF_MAX; n++) 1123 for (m = 0; m < FF_MAX; m++) 1124 fp->encode.hits[fp->encode.code[n][m]]++; 1125 fp->encode.hits[0] = 0; 1126 m = 1; 1127 for (n = USHRT_MAX; n >= 0; n--) 1128 if (d = fp->encode.hits[n]) 1129 { 1130 fp->encode.hits[n] = m; 1131 if ((m += d) > FF_MAX) 1132 break; 1133 } 1134 while (--n >= 0) 1135 fp->encode.hits[n] = 0; 1136 for (n = FF_MAX - 1; n >= 0; n--) 1137 for (m = FF_MAX - 1; m >= 0; m--) 1138 if (fp->encode.hits[fp->encode.code[n][m]]) 1139 { 1140 d = fp->encode.code[n][m]; 1141 b = fp->encode.hits[d] - 1; 1142 fp->encode.code[n][m] = b + FF_MAX; 1143 if (fp->encode.hits[d]++ >= FF_MAX) 1144 fp->encode.hits[d] = 0; 1145 fp->encode.bigram[b *= 2] = n; 1146 fp->encode.bigram[b + 1] = m; 1147 } 1148 else 1149 fp->encode.code[n][m] = 0; 1150 1151 /* 1152 * commit the real file 1153 */ 1154 1155 if (sfseek(fp->fp, (Sfoff_t)0, SEEK_SET)) 1156 { 1157 if (fp->disc->errorf) 1158 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "cannot rewind tmp file"); 1159 return -1; 1160 } 1161 if (!(sp = sfopen(NiL, fp->encode.file, "w"))) 1162 goto badcreate; 1163 1164 /* 1165 * dump the bigrams 1166 */ 1167 1168 sfwrite(sp, fp->encode.bigram, sizeof(fp->encode.bigram)); 1169 1170 /* 1171 * encode the massaged paths 1172 */ 1173 1174 while (s = sfgetr(fp->fp, 0, 0)) 1175 { 1176 z = strtol(s, &t, 0); 1177 s = t; 1178 if (z < 0 || z > 2 * FF_OFF) 1179 { 1180 sfputc(sp, FF_ESC); 1181 sfputc(sp, (z >> 24)); 1182 sfputc(sp, (z >> 16)); 1183 sfputc(sp, (z >> 8)); 1184 sfputc(sp, z); 1185 } 1186 else 1187 sfputc(sp, z); 1188 while (n = *s++) 1189 { 1190 if (!(m = *s++)) 1191 { 1192 sfputc(sp, n); 1193 break; 1194 } 1195 if (d = fp->encode.code[n][m]) 1196 sfputc(sp, d); 1197 else 1198 { 1199 sfputc(sp, n); 1200 sfputc(sp, m); 1201 } 1202 } 1203 } 1204 sfclose(fp->fp); 1205 fp->fp = sp; 1206 if (finddone(fp)) 1207 goto bad; 1208 break; 1209 case FF_typ: 1210 if (finddone(fp)) 1211 goto bad; 1212 if (!(fp->fp = sfopen(NiL, fp->encode.temp, "r"))) 1213 { 1214 if (fp->disc->errorf) 1215 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot read tmp file", fp->encode.temp); 1216 remove(fp->encode.temp); 1217 return -1; 1218 } 1219 1220 /* 1221 * commit the output file 1222 */ 1223 1224 if (!(sp = sfopen(NiL, fp->encode.file, "w"))) 1225 goto badcreate; 1226 1227 /* 1228 * write the header magic 1229 */ 1230 1231 sfputc(sp, 0); 1232 sfputr(sp, FF_typ_magic, 0); 1233 1234 /* 1235 * write the type table in index order starting with 1 1236 */ 1237 1238 for (x = (Type_t*)dtfirst(fp->encode.indexdict); x; x = (Type_t*)dtnext(fp->encode.indexdict, x)) 1239 sfputr(sp, x->name, 0); 1240 sfputc(sp, 0); 1241 1242 /* 1243 * append the front compressed strings 1244 */ 1245 1246 if (sfmove(fp->fp, sp, SF_UNBOUND, -1) < 0 || !sfeof(fp->fp)) 1247 { 1248 sfclose(sp); 1249 if (fp->disc->errorf) 1250 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot append codes", fp->encode.file); 1251 goto bad; 1252 } 1253 sfclose(fp->fp); 1254 fp->fp = sp; 1255 if (finddone(fp)) 1256 goto bad; 1257 remove(fp->encode.temp); 1258 break; 1259 } 1260 return 0; 1261 badcreate: 1262 if (fp->disc->errorf) 1263 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot write codes", fp->encode.file); 1264 bad: 1265 if (fp->fp) 1266 { 1267 sfclose(fp->fp); 1268 fp->fp = 0; 1269 } 1270 remove(fp->encode.temp); 1271 return -1; 1272 } 1273 1274 /* 1275 * close an open fastfind stream 1276 */ 1277 1278 int 1279 findclose(register Find_t* fp) 1280 { 1281 int n = 0; 1282 1283 if (!fp) 1284 return -1; 1285 if (fp->generate) 1286 { 1287 n = findsync(fp); 1288 if (fp->encode.indexdict) 1289 dtclose(fp->encode.indexdict); 1290 if (fp->encode.namedict) 1291 dtclose(fp->encode.namedict); 1292 } 1293 else 1294 { 1295 if (fp->decode.match) 1296 regfree(&fp->decode.re); 1297 n = 0; 1298 } 1299 if (fp->fp) 1300 sfclose(fp->fp); 1301 vmclose(fp->vm); 1302 return n; 1303 } 1304