1 #pragma prototyped 2 /* 3 * original code 4 * 5 * James A. Woods, Informatics General Corporation, 6 * NASA Ames Research Center, 6/81. 7 * Usenix ;login:, February/March, 1983, p. 8. 8 * 9 * discipline/method interface 10 * 11 * Glenn Fowler 12 * AT&T Research 13 * modified from the original BSD source 14 * 15 * 'fastfind' scans a file list for the full pathname of a file 16 * given only a piece of the name. The list is processed with 17 * with "front-compression" and bigram coding. Front compression reduces 18 * space by a factor of 4-5, bigram coding by a further 20-25%. 19 * 20 * there are 4 methods: 21 * 22 * FF_old original with 7 bit bigram encoding (no magic) 23 * FF_gnu 8 bit clean front compression (FF_gnu_magic) 24 * FF_dir FF_gnu with sfgetl/sfputl and trailing / on dirs (FF_dir_magic) 25 * FF_typ FF_dir with (mime) types (FF_typ_magic) 26 * 27 * the bigram encoding steals the eighth bit (that's why its FF_old) 28 * maybe one day we'll limit it to readonly: 29 * 30 * 0-2*FF_OFF likeliest differential counts + offset to make nonnegative 31 * FF_ESC 4 byte big-endian out-of-range count+FF_OFF follows 32 * FF_MIN-FF_MAX ascii residue 33 * >=FF_MAX bigram codes 34 * 35 * a two-tiered string search technique is employed 36 * 37 * a metacharacter-free subpattern and partial pathname is matched 38 * backwards to avoid full expansion of the pathname list 39 * 40 * then the actual shell glob-style regular expression (if in this form) 41 * is matched against the candidate pathnames using the slower regexec() 42 * 43 * The original BSD code is covered by the BSD license: 44 * 45 * Copyright (c) 1985, 1993, 1999 46 * The Regents of the University of California. All rights reserved. 47 * 48 * Redistribution and use in source and binary forms, with or without 49 * modification, are permitted provided that the following conditions 50 * are met: 51 * 1. Redistributions of source code must retain the above copyright 52 * notice, this list of conditions and the following disclaimer. 53 * 2. Redistributions in binary form must reproduce the above copyright 54 * notice, this list of conditions and the following disclaimer in the 55 * documentation and/or other materials provided with the distribution. 56 * 3. Neither the name of the University nor the names of its contributors 57 * may be used to endorse or promote products derived from this software 58 * without specific prior written permission. 59 * 60 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 61 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 62 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 63 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 64 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 65 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 66 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 68 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 69 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 70 * SUCH DAMAGE. 71 */ 72 73 static const char id[] = "\n@(#)$Id: fastfind (AT&T Research) 2002-10-02 $\0\n"; 74 75 static const char lib[] = "libast:fastfind"; 76 77 #include "findlib.h" 78 79 #define FIND_MATCH "*/(find|locate)/*" 80 81 /* 82 * this db could be anywhere 83 * findcodes[] directories are checked for findnames[i] 84 */ 85 86 static char* findcodes[] = 87 { 88 0, 89 0, 90 FIND_CODES, 91 "/usr/local/share/lib", 92 "/usr/local/lib", 93 "/usr/share/lib", 94 "/usr/lib", 95 "/var/spool", 96 "/usr/local/var", 97 "/var/lib", 98 "/var/lib/slocate", 99 "/var/db", 100 }; 101 102 static char* findnames[] = 103 { 104 "find/codes", 105 "find/find.codes", 106 "locate/locatedb", 107 "locatedb", 108 "locate.database", 109 "slocate.db", 110 }; 111 112 /* 113 * convert t to lower case and drop leading x- and x- after / 114 * converted value copied to b of size n 115 */ 116 117 char* 118 typefix(char* buf, size_t n, register const char* t) 119 { 120 register int c; 121 register char* b = buf; 122 123 if ((*t == 'x' || *t == 'X') && *(t + 1) == '-') 124 t += 2; 125 while (c = *t++) 126 { 127 if (isupper(c)) 128 c = tolower(c); 129 if ((*b++ = c) == '/' && (*t == 'x' || *t == 'X') && *(t + 1) == '-') 130 t += 2; 131 } 132 *b = 0; 133 return buf; 134 } 135 136 /* 137 * return a fastfind stream handle for pattern 138 */ 139 140 Find_t* 141 findopen(const char* file, const char* pattern, const char* type, Finddisc_t* disc) 142 { 143 register Find_t* fp; 144 register char* p; 145 register char* s; 146 register char* b; 147 register int i; 148 register int j; 149 char* path; 150 int brace = 0; 151 int paren = 0; 152 int k; 153 int q; 154 int fd; 155 int uid; 156 Vmalloc_t* vm; 157 Type_t* tp; 158 struct stat st; 159 160 161 if (!(vm = vmopen(Vmdcheap, Vmbest, 0))) 162 goto nospace; 163 164 /* 165 * NOTE: searching for FIND_CODES would be much simpler if we 166 * just stuck with our own, but we also support GNU 167 * locate codes and have to search for the one of a 168 * bazillion possible names for that file 169 */ 170 171 if (!findcodes[1]) 172 findcodes[1] = getenv(FIND_CODES_ENV); 173 if (disc->flags & FIND_GENERATE) 174 { 175 if (!(fp = (Find_t*)vmnewof(vm, 0, Find_t, 1, sizeof(Encode_t) - sizeof(Code_t)))) 176 goto nospace; 177 fp->vm = vm; 178 fp->id = lib; 179 fp->disc = disc; 180 fp->generate = 1; 181 if (file && (!*file || streq(file, "-"))) 182 file = 0; 183 uid = geteuid(); 184 j = (findcodes[0] = (char*)file) && *file == '/' ? 1 : elementsof(findcodes); 185 186 /* 187 * look for the codes file, but since it may not exist yet, 188 * also look for the containing directory if i<2 or if 189 * it is sufficiently qualified (FIND_MATCH) 190 */ 191 192 for (i = 0; i < j; i++) 193 if (path = findcodes[i]) 194 { 195 if (*path == '/') 196 { 197 if (!stat(path, &st)) 198 { 199 if (S_ISDIR(st.st_mode)) 200 { 201 for (k = 0; k < elementsof(findnames); k++) 202 { 203 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s/%s", path, findnames[k]); 204 if (!eaccess(fp->encode.file, R_OK|W_OK)) 205 { 206 path = fp->encode.file; 207 break; 208 } 209 if (strchr(findnames[k], '/') && (b = strrchr(fp->encode.file, '/'))) 210 { 211 *b = 0; 212 if (!stat(fp->encode.file, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR)) 213 { 214 *b = '/'; 215 path = fp->encode.file; 216 break; 217 } 218 } 219 } 220 if (k < elementsof(findnames)) 221 break; 222 } 223 else if (st.st_uid == uid && (st.st_mode & S_IWUSR)) 224 { 225 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s", path); 226 path = fp->encode.file; 227 break; 228 } 229 } 230 else if (i < 2 || strmatch(path, FIND_MATCH)) 231 { 232 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s", path); 233 if (b = strrchr(fp->encode.file, '/')) 234 { 235 *b = 0; 236 if (!stat(fp->encode.file, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR)) 237 { 238 *b = '/'; 239 path = fp->encode.file; 240 break; 241 } 242 } 243 } 244 } 245 else if (pathpath(path, "", PATH_REGULAR|PATH_READ|PATH_WRITE, fp->encode.file, sizeof(fp->encode.file))) 246 { 247 path = fp->encode.file; 248 break; 249 } 250 else if (b = strrchr(path, '/')) 251 { 252 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%-.*s", b - path, path); 253 if (pathpath(fp->encode.file, "", PATH_EXECUTE|PATH_READ|PATH_WRITE, fp->encode.temp, sizeof(fp->encode.temp)) && 254 !stat(fp->encode.temp, &st) && st.st_uid == uid && (st.st_mode & S_IWUSR)) 255 { 256 sfsprintf(fp->encode.file, sizeof(fp->encode.file), "%s%s", fp->encode.temp, b); 257 path = fp->encode.file; 258 break; 259 } 260 } 261 } 262 if (i >= j) 263 { 264 if (fp->disc->errorf) 265 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot locate codes", file ? file : findcodes[2]); 266 goto drop; 267 } 268 if (fp->disc->flags & FIND_OLD) 269 { 270 /* 271 * FF_old generates temp data that is read 272 * in a second pass to generate the real codes 273 */ 274 275 fp->method = FF_old; 276 if (!(fp->fp = sftmp(32 * PATH_MAX))) 277 { 278 if (fp->disc->errorf) 279 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "cannot create tmp file"); 280 goto drop; 281 } 282 } 283 else 284 { 285 /* 286 * the rest generate into a temp file that 287 * is simply renamed on completion 288 */ 289 290 if (s = strrchr(path, '/')) 291 { 292 *s = 0; 293 p = path; 294 } 295 else 296 p = "."; 297 if (!pathtemp(fp->encode.temp, sizeof(fp->encode.temp), p, "ff", &fd)) 298 { 299 if (fp->disc->errorf) 300 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot create tmp file in this directory", p ? p : "."); 301 goto drop; 302 } 303 if (s) 304 *s = '/'; 305 if (!(fp->fp = sfnew(NiL, NiL, (size_t)SF_UNBOUND, fd, SF_WRITE))) 306 { 307 if (fp->disc->errorf) 308 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot open tmp file", fp->encode.temp); 309 close(fd); 310 goto drop; 311 } 312 if (fp->disc->flags & FIND_TYPE) 313 { 314 fp->method = FF_typ; 315 fp->encode.namedisc.key = offsetof(Type_t, name); 316 fp->encode.namedisc.link = offsetof(Type_t, byname); 317 fp->encode.indexdisc.key = offsetof(Type_t, index); 318 fp->encode.indexdisc.size = sizeof(unsigned long); 319 fp->encode.indexdisc.link = offsetof(Type_t, byindex); 320 s = "system/dir"; 321 if (!(fp->encode.namedict = dtopen(&fp->encode.namedisc, Dtoset)) || !(fp->encode.indexdict = dtopen(&fp->encode.indexdisc, Dtoset)) || !(tp = newof(0, Type_t, 1, strlen(s) + 1))) 322 { 323 if (fp->encode.namedict) 324 dtclose(fp->encode.namedict); 325 if (fp->disc->errorf) 326 (*fp->disc->errorf)(fp, fp->disc, 2, "cannot allocate type table"); 327 goto drop; 328 } 329 330 /* 331 * type index 1 is always system/dir 332 */ 333 334 tp->index = ++fp->types; 335 strcpy(tp->name, s); 336 dtinsert(fp->encode.namedict, tp); 337 dtinsert(fp->encode.indexdict, tp); 338 } 339 else if (fp->disc->flags & FIND_GNU) 340 { 341 fp->method = FF_gnu; 342 sfputc(fp->fp, 0); 343 sfputr(fp->fp, FF_gnu_magic, 0); 344 } 345 else 346 { 347 fp->method = FF_dir; 348 sfputc(fp->fp, 0); 349 sfputr(fp->fp, FF_dir_magic, 0); 350 } 351 } 352 } 353 else 354 { 355 i = sizeof(Decode_t) + sizeof(Code_t); 356 if (!pattern || !*pattern) 357 pattern = "*"; 358 i += (j = 2 * (strlen(pattern) + 1)); 359 if (!(fp = (Find_t*)vmnewof(vm, 0, Find_t, 1, i))) 360 { 361 vmclose(vm); 362 return 0; 363 } 364 fp->vm = vm; 365 fp->id = lib; 366 fp->disc = disc; 367 if (disc->flags & FIND_ICASE) 368 fp->decode.ignorecase = 1; 369 j = (findcodes[0] = (char*)file) && *file == '/' ? 1 : elementsof(findcodes); 370 for (i = 0; i < j; i++) 371 if (path = findcodes[i]) 372 { 373 if (*path == '/') 374 { 375 if (!stat(path, &st)) 376 { 377 if (S_ISDIR(st.st_mode)) 378 { 379 for (k = 0; k < elementsof(findnames); k++) 380 { 381 sfsprintf(fp->decode.path, sizeof(fp->decode.path), "%s/%s", path, findnames[k]); 382 if (fp->fp = sfopen(NiL, fp->decode.path, "r")) 383 { 384 path = fp->decode.path; 385 break; 386 } 387 } 388 if (fp->fp) 389 break; 390 } 391 else if (fp->fp = sfopen(NiL, path, "r")) 392 break; 393 } 394 } 395 else if ((path = pathpath(path, "", PATH_REGULAR|PATH_READ, fp->decode.path, sizeof(fp->decode.path))) && (fp->fp = sfopen(NiL, path, "r"))) 396 break; 397 } 398 if (!fp->fp) 399 { 400 if (fp->disc->errorf) 401 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot locate codes", file ? file : findcodes[2]); 402 goto drop; 403 } 404 if (fstat(sffileno(fp->fp), &st)) 405 { 406 if (fp->disc->errorf) 407 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot stat codes", path); 408 goto drop; 409 } 410 if (fp->secure = ((st.st_mode & (S_IRGRP|S_IROTH)) == S_IRGRP) && st.st_gid == getegid() && getegid() != getgid()) 411 setgid(getgid()); 412 fp->stamp = st.st_mtime; 413 b = (s = fp->decode.temp) + 1; 414 for (i = 0; i < elementsof(fp->decode.bigram1); i++) 415 { 416 if ((j = sfgetc(fp->fp)) == EOF) 417 goto invalid; 418 if (!(*s++ = fp->decode.bigram1[i] = j) && i) 419 { 420 i = -i; 421 break; 422 } 423 if ((j = sfgetc(fp->fp)) == EOF) 424 goto invalid; 425 if (!(*s++ = fp->decode.bigram2[i] = j) && (i || fp->decode.bigram1[0] >= '0' && fp->decode.bigram1[0] <= '1')) 426 break; 427 } 428 if (streq(b, FF_typ_magic)) 429 { 430 if (type) 431 { 432 type = (const char*)typefix(fp->decode.bigram2, sizeof(fp->decode.bigram2), type); 433 memset(fp->decode.bigram1, 0, sizeof(fp->decode.bigram1)); 434 } 435 fp->method = FF_typ; 436 for (j = 0, i = 1;; i++) 437 { 438 if (!(s = sfgetr(fp->fp, 0, 0))) 439 goto invalid; 440 if (!*s) 441 break; 442 if (type && strmatch(s, type)) 443 { 444 FF_SET_TYPE(fp, i); 445 j++; 446 } 447 } 448 if (type && !j) 449 goto drop; 450 fp->types = j; 451 } 452 else if (streq(b, FF_dir_magic)) 453 fp->method = FF_dir; 454 else if (streq(b, FF_gnu_magic)) 455 fp->method = FF_gnu; 456 else if (!*b && *--b >= '0' && *b <= '1') 457 { 458 fp->method = FF_gnu; 459 while (j = sfgetc(fp->fp)) 460 { 461 if (j == EOF || fp->decode.count >= sizeof(fp->decode.path)) 462 goto invalid; 463 fp->decode.path[fp->decode.count++] = j; 464 } 465 } 466 else 467 { 468 fp->method = FF_old; 469 if (i < 0) 470 { 471 if ((j = sfgetc(fp->fp)) == EOF) 472 goto invalid; 473 fp->decode.bigram2[i = -i] = j; 474 } 475 while (++i < elementsof(fp->decode.bigram1)) 476 { 477 if ((j = sfgetc(fp->fp)) == EOF) 478 goto invalid; 479 fp->decode.bigram1[i] = j; 480 if ((j = sfgetc(fp->fp)) == EOF) 481 goto invalid; 482 fp->decode.bigram2[i] = j; 483 } 484 if ((fp->decode.peek = sfgetc(fp->fp)) != FF_OFF) 485 goto invalid; 486 } 487 488 /* 489 * set up the physical dir table 490 */ 491 492 if (disc->version >= 19980301L) 493 { 494 fp->verifyf = disc->verifyf; 495 if (disc->dirs && *disc->dirs) 496 { 497 for (k = 0; disc->dirs[k]; k++); 498 if (k == 1 && streq(disc->dirs[0], "/")) 499 k = 0; 500 if (k) 501 { 502 if (!(fp->dirs = vmnewof(fp->vm, 0, char*, 2 * k + 1, 0))) 503 goto drop; 504 if (!(fp->lens = vmnewof(fp->vm, 0, int, 2 * k, 0))) 505 goto drop; 506 p = 0; 507 b = fp->decode.temp; 508 j = fp->method == FF_old || fp->method == FF_gnu; 509 510 /* 511 * fill the dir list with logical and 512 * physical names since we don't know 513 * which way the db was encoded (it 514 * could be *both* ways) 515 */ 516 517 for (i = q = 0; i < k; i++) 518 { 519 if (*(s = disc->dirs[i]) == '/') 520 sfsprintf(b, sizeof(fp->decode.temp) - 1, "%s", s); 521 else if (!p && !(p = getcwd(fp->decode.path, sizeof(fp->decode.path)))) 522 goto nospace; 523 else 524 sfsprintf(b, sizeof(fp->decode.temp) - 1, "%s/%s", p, s); 525 s = pathcanon(b, sizeof(fp->decode.temp), 0); 526 *s = '/'; 527 *(s + 1) = 0; 528 if (!(fp->dirs[q] = vmstrdup(fp->vm, b))) 529 goto nospace; 530 if (j) 531 (fp->dirs[q])[s - b] = 0; 532 q++; 533 *s = 0; 534 s = pathcanon(b, sizeof(fp->decode.temp), PATH_PHYSICAL); 535 *s = '/'; 536 *(s + 1) = 0; 537 if (!strneq(b, fp->dirs[q - 1], s - b)) 538 { 539 if (!(fp->dirs[q] = vmstrdup(fp->vm, b))) 540 goto nospace; 541 if (j) 542 (fp->dirs[q])[s - b] = 0; 543 q++; 544 } 545 } 546 strsort(fp->dirs, q, strcasecmp); 547 for (i = 0; i < q; i++) 548 fp->lens[i] = strlen(fp->dirs[i]); 549 } 550 } 551 } 552 if (fp->verifyf || (disc->flags & FIND_VERIFY)) 553 { 554 if (fp->method != FF_dir && fp->method != FF_typ) 555 { 556 if (fp->disc->errorf) 557 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s code format does not support directory verification", path, fp->method == FF_gnu ? FF_gnu_magic : "OLD-BIGRAM"); 558 goto drop; 559 } 560 fp->verify = 1; 561 } 562 563 /* 564 * extract last glob-free subpattern in name for fast pre-match 565 * prepend 0 for backwards match 566 */ 567 568 if (p = s = (char*)pattern) 569 { 570 b = fp->decode.pattern; 571 for (;;) 572 { 573 switch (*b++ = *p++) 574 { 575 case 0: 576 break; 577 case '\\': 578 s = p; 579 if (!*p++) 580 break; 581 continue; 582 case '[': 583 if (!brace) 584 { 585 brace++; 586 if (*p == ']') 587 p++; 588 } 589 continue; 590 case ']': 591 if (brace) 592 { 593 brace--; 594 s = p; 595 } 596 continue; 597 case '(': 598 if (!brace) 599 paren++; 600 continue; 601 case ')': 602 if (!brace && paren > 0 && !--paren) 603 s = p; 604 continue; 605 case '|': 606 case '&': 607 if (!brace && !paren) 608 { 609 s = ""; 610 break; 611 } 612 continue; 613 case '*': 614 case '?': 615 s = p; 616 continue; 617 default: 618 continue; 619 } 620 break; 621 } 622 if (s != pattern && !streq(pattern, "*")) 623 { 624 fp->decode.match = 1; 625 if (i = regcomp(&fp->decode.re, pattern, REG_SHELL|REG_AUGMENTED|(fp->decode.ignorecase?REG_ICASE:0))) 626 { 627 if (disc->errorf) 628 { 629 regerror(i, &fp->decode.re, fp->decode.temp, sizeof(fp->decode.temp)); 630 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s", pattern, fp->decode.temp); 631 } 632 goto drop; 633 } 634 } 635 if (*s) 636 { 637 *b++ = 0; 638 while (i = *s++) 639 *b++ = i; 640 *b-- = 0; 641 fp->decode.end = b; 642 if (fp->decode.ignorecase) 643 for (s = fp->decode.pattern; s <= b; s++) 644 if (isupper(*s)) 645 *s = tolower(*s); 646 } 647 } 648 } 649 return fp; 650 nospace: 651 if (disc->errorf) 652 (*fp->disc->errorf)(fp, fp->disc, 2, "out of space"); 653 if (!vm) 654 return 0; 655 if (!fp) 656 { 657 vmclose(vm); 658 return 0; 659 } 660 goto drop; 661 invalid: 662 if (fp->disc->errorf) 663 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: invalid codes", path); 664 drop: 665 if (!fp->generate && fp->decode.match) 666 regfree(&fp->decode.re); 667 if (fp->fp) 668 sfclose(fp->fp); 669 vmclose(fp->vm); 670 return 0; 671 } 672 673 /* 674 * return the next fastfind path 675 * 0 returned when list exhausted 676 */ 677 678 char* 679 findread(register Find_t* fp) 680 { 681 register char* p; 682 register char* q; 683 register char* s; 684 register char* b; 685 register char* e; 686 register int c; 687 register int n; 688 register int m; 689 int ignorecase; 690 int t; 691 unsigned char w[4]; 692 struct stat st; 693 694 if (fp->generate) 695 return 0; 696 if (fp->decode.restore) 697 { 698 *fp->decode.restore = '/'; 699 fp->decode.restore = 0; 700 } 701 ignorecase = fp->decode.ignorecase ? STR_ICASE : 0; 702 c = fp->decode.peek; 703 next: 704 for (;;) 705 { 706 switch (fp->method) 707 { 708 case FF_dir: 709 t = 0; 710 n = sfgetl(fp->fp); 711 goto grab; 712 case FF_gnu: 713 if ((c = sfgetc(fp->fp)) == EOF) 714 return 0; 715 if (c == 0x80) 716 { 717 if ((c = sfgetc(fp->fp)) == EOF) 718 return 0; 719 n = c << 8; 720 if ((c = sfgetc(fp->fp)) == EOF) 721 return 0; 722 n |= c; 723 if (n & 0x8000) 724 n = (n - 0xffff) - 1; 725 } 726 else if ((n = c) & 0x80) 727 n = (n - 0xff) - 1; 728 t = 0; 729 goto grab; 730 case FF_typ: 731 t = sfgetu(fp->fp); 732 n = sfgetl(fp->fp); 733 grab: 734 p = fp->decode.path + (fp->decode.count += n); 735 do 736 { 737 if ((c = sfgetc(fp->fp)) == EOF) 738 return 0; 739 } while (*p++ = c); 740 p -= 2; 741 break; 742 case FF_old: 743 if (c == EOF) 744 { 745 fp->decode.peek = c; 746 return 0; 747 } 748 if (c == FF_ESC) 749 { 750 if (sfread(fp->fp, w, sizeof(w)) != sizeof(w)) 751 return 0; 752 if (fp->decode.swap >= 0) 753 { 754 c = (int32_t)((w[0] << 24) | (w[1] << 16) | (w[2] << 8) | w[3]); 755 if (!fp->decode.swap) 756 { 757 /* 758 * the old format uses machine 759 * byte order; this test uses 760 * the smallest magnitude of 761 * both byte orders on the 762 * first encoded path motion 763 * to determine the original 764 * byte order 765 */ 766 767 m = c; 768 if (m < 0) 769 m = -m; 770 n = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]); 771 if (n < 0) 772 n = -n; 773 if (m < n) 774 fp->decode.swap = 1; 775 else 776 { 777 fp->decode.swap = -1; 778 c = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]); 779 } 780 } 781 } 782 else 783 c = (int32_t)((w[3] << 24) | (w[2] << 16) | (w[1] << 8) | w[0]); 784 } 785 fp->decode.count += c - FF_OFF; 786 for (p = fp->decode.path + fp->decode.count; (c = sfgetc(fp->fp)) > FF_ESC;) 787 if (c & (1<<(CHAR_BIT-1))) 788 { 789 *p++ = fp->decode.bigram1[c & ((1<<(CHAR_BIT-1))-1)]; 790 *p++ = fp->decode.bigram2[c & ((1<<(CHAR_BIT-1))-1)]; 791 } 792 else 793 *p++ = c; 794 *p-- = 0; 795 t = 0; 796 break; 797 } 798 b = fp->decode.path; 799 if (fp->decode.found) 800 fp->decode.found = 0; 801 else 802 b += fp->decode.count; 803 if (fp->dirs) 804 for (;;) 805 { 806 if (!*fp->dirs) 807 return 0; 808 809 /* 810 * use the ordering and lengths to prune 811 * comparison function calls 812 * (*fp->dirs)[*fp->lens]=='/' if its 813 * already been matched 814 */ 815 816 if ((n = p - fp->decode.path + 1) > (m = *fp->lens)) 817 { 818 if (!(*fp->dirs)[m]) 819 goto next; 820 if (!strncasecmp(*fp->dirs, fp->decode.path, m)) 821 break; 822 } 823 else if (n == m) 824 { 825 if (!(*fp->dirs)[m]) 826 { 827 if (!(n = strcasecmp(*fp->dirs, fp->decode.path)) && (ignorecase || !strcmp(*fp->dirs, fp->decode.path))) 828 { 829 if (m > 0) 830 { 831 (*fp->dirs)[m] = '/'; 832 if ((*fp->dirs)[m - 1] != '/') 833 (*fp->dirs)[++(*fp->lens)] = '/'; 834 } 835 break; 836 } 837 if (n >= 0) 838 goto next; 839 } 840 } 841 else if (!(*fp->dirs)[m]) 842 goto next; 843 fp->dirs++; 844 fp->lens++; 845 } 846 if (fp->verify && (*p == '/' || t == 1)) 847 { 848 if ((n = p - fp->decode.path)) 849 *p = 0; 850 else 851 n = 1; 852 if (fp->verifyf) 853 n = (*fp->verifyf)(fp, fp->decode.path, n, fp->disc); 854 else if (stat(fp->decode.path, &st)) 855 n = -1; 856 else if ((unsigned long)st.st_mtime > fp->stamp) 857 n = 1; 858 else 859 n = 0; 860 *p = '/'; 861 862 /* 863 * n<0 skip this subtree 864 * n==0 keep as is 865 * n>0 read this dir now 866 */ 867 868 /* NOT IMPLEMENTED YET */ 869 } 870 if (FF_OK_TYPE(fp, t)) 871 { 872 if (fp->decode.end) 873 { 874 if (*(s = p) == '/') 875 s--; 876 if (*fp->decode.pattern == '/' && b > fp->decode.path) 877 b--; 878 for (; s >= b; s--) 879 if (*s == *fp->decode.end || ignorecase && tolower(*s) == *fp->decode.end) 880 { 881 if (ignorecase) 882 for (e = fp->decode.end - 1, q = s - 1; *e && (*q == *e || tolower(*q) == *e); e--, q--); 883 else 884 for (e = fp->decode.end - 1, q = s - 1; *e && *q == *e; e--, q--); 885 if (!*e) 886 { 887 fp->decode.found = 1; 888 if (!fp->decode.match || strgrpmatch(fp->decode.path, fp->decode.pattern, NiL, 0, STR_MAXIMAL|STR_LEFT|STR_RIGHT|ignorecase)) 889 { 890 fp->decode.peek = c; 891 if (*p == '/') 892 *(fp->decode.restore = p) = 0; 893 if (!fp->secure || !access(fp->decode.path, F_OK)) 894 return fp->decode.path; 895 } 896 break; 897 } 898 } 899 } 900 else if (!fp->decode.match || !(n = regexec(&fp->decode.re, fp->decode.path, 0, NiL, 0))) 901 { 902 fp->decode.peek = c; 903 if (*p == '/' && p > fp->decode.path) 904 *(fp->decode.restore = p) = 0; 905 if (!fp->secure || !access(fp->decode.path, F_OK)) 906 return fp->decode.path; 907 } 908 else if (n != REG_NOMATCH) 909 { 910 if (fp->disc->errorf) 911 { 912 regerror(n, &fp->decode.re, fp->decode.temp, sizeof(fp->decode.temp)); 913 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: %s", fp->decode.pattern, fp->decode.temp); 914 } 915 return 0; 916 } 917 } 918 } 919 } 920 921 /* 922 * add path to the code table 923 * paths are assumed to be in sort order 924 */ 925 926 int 927 findwrite(register Find_t* fp, const char* path, size_t len, const char* type) 928 { 929 register unsigned char* s; 930 register unsigned char* e; 931 register unsigned char* p; 932 register int n; 933 register int d; 934 register Type_t* x; 935 register unsigned long u; 936 937 if (!fp->generate) 938 return -1; 939 if (type && fp->method == FF_dir) 940 { 941 len = sfsprintf(fp->encode.mark, sizeof(fp->encode.mark), "%-.*s/", len, path); 942 path = fp->encode.mark; 943 } 944 s = (unsigned char*)path; 945 if (len <= 0) 946 len = strlen(path); 947 if (len < sizeof(fp->encode.path)) 948 e = s + len++; 949 else 950 { 951 len = sizeof(fp->encode.path) - 1; 952 e = s + len; 953 } 954 p = (unsigned char*)fp->encode.path; 955 while (s < e) 956 { 957 if (*s != *p++) 958 break; 959 s++; 960 } 961 n = s - (unsigned char*)path; 962 switch (fp->method) 963 { 964 case FF_gnu: 965 d = n - fp->encode.prefix; 966 if (d >= -127 && d <= 127) 967 sfputc(fp->fp, d & 0xff); 968 else 969 { 970 sfputc(fp->fp, 0x80); 971 sfputc(fp->fp, (d >> 8) & 0xff); 972 sfputc(fp->fp, d & 0xff); 973 } 974 fp->encode.prefix = n; 975 sfputr(fp->fp, (char*)s, 0); 976 break; 977 case FF_old: 978 sfprintf(fp->fp, "%ld", n - fp->encode.prefix + FF_OFF); 979 fp->encode.prefix = n; 980 sfputc(fp->fp, ' '); 981 p = s; 982 while (s < e) 983 { 984 n = *s++; 985 if (s >= e) 986 break; 987 fp->encode.code[n][*s++]++; 988 } 989 while (p < e) 990 { 991 if ((n = *p++) < FF_MIN || n >= FF_MAX) 992 n = '?'; 993 sfputc(fp->fp, n); 994 } 995 sfputc(fp->fp, 0); 996 break; 997 case FF_typ: 998 if (type) 999 { 1000 type = (const char*)typefix((char*)fp->encode.bigram, sizeof(fp->encode.bigram), type); 1001 if (x = (Type_t*)dtmatch(fp->encode.namedict, type)) 1002 u = x->index; 1003 else if (!(x = newof(0, Type_t, 1, strlen(type) + 1))) 1004 u = 0; 1005 else 1006 { 1007 u = x->index = ++fp->types; 1008 strcpy(x->name, type); 1009 dtinsert(fp->encode.namedict, x); 1010 dtinsert(fp->encode.indexdict, x); 1011 } 1012 } 1013 else 1014 u = 0; 1015 sfputu(fp->fp, u); 1016 /*FALLTHROUGH...*/ 1017 case FF_dir: 1018 d = n - fp->encode.prefix; 1019 sfputl(fp->fp, d); 1020 fp->encode.prefix = n; 1021 sfputr(fp->fp, (char*)s, 0); 1022 break; 1023 } 1024 memcpy(fp->encode.path, path, len); 1025 return 0; 1026 } 1027 1028 /* 1029 * findsync() helper 1030 */ 1031 1032 static int 1033 finddone(register Find_t* fp) 1034 { 1035 int r; 1036 1037 if (sfsync(fp->fp)) 1038 { 1039 if (fp->disc->errorf) 1040 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sfsync]", fp->encode.file); 1041 return -1; 1042 } 1043 if (sferror(fp->fp)) 1044 { 1045 if (fp->disc->errorf) 1046 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sferror]", fp->encode.file); 1047 return -1; 1048 } 1049 r = sfclose(fp->fp); 1050 fp->fp = 0; 1051 if (r) 1052 { 1053 if (fp->disc->errorf) 1054 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: write error [sfclose]", fp->encode.file); 1055 return -1; 1056 } 1057 return 0; 1058 } 1059 1060 /* 1061 * finish the code table 1062 */ 1063 1064 static int 1065 findsync(register Find_t* fp) 1066 { 1067 register char* s; 1068 register int n; 1069 register int m; 1070 register int d; 1071 register Type_t* x; 1072 char* t; 1073 int b; 1074 long z; 1075 Sfio_t* sp; 1076 1077 switch (fp->method) 1078 { 1079 case FF_dir: 1080 case FF_gnu: 1081 /* 1082 * replace the real file with the temp file 1083 */ 1084 1085 if (finddone(fp)) 1086 goto bad; 1087 remove(fp->encode.file); 1088 if (rename(fp->encode.temp, fp->encode.file)) 1089 { 1090 if (fp->disc->errorf) 1091 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot rename from tmp file %s", fp->encode.file, fp->encode.temp); 1092 remove(fp->encode.temp); 1093 return -1; 1094 } 1095 break; 1096 case FF_old: 1097 /* 1098 * determine the top FF_MAX bigrams 1099 */ 1100 1101 for (n = 0; n < FF_MAX; n++) 1102 for (m = 0; m < FF_MAX; m++) 1103 fp->encode.hits[fp->encode.code[n][m]]++; 1104 fp->encode.hits[0] = 0; 1105 m = 1; 1106 for (n = USHRT_MAX; n >= 0; n--) 1107 if (d = fp->encode.hits[n]) 1108 { 1109 fp->encode.hits[n] = m; 1110 if ((m += d) > FF_MAX) 1111 break; 1112 } 1113 while (--n >= 0) 1114 fp->encode.hits[n] = 0; 1115 for (n = FF_MAX - 1; n >= 0; n--) 1116 for (m = FF_MAX - 1; m >= 0; m--) 1117 if (fp->encode.hits[fp->encode.code[n][m]]) 1118 { 1119 d = fp->encode.code[n][m]; 1120 b = fp->encode.hits[d] - 1; 1121 fp->encode.code[n][m] = b + FF_MAX; 1122 if (fp->encode.hits[d]++ >= FF_MAX) 1123 fp->encode.hits[d] = 0; 1124 fp->encode.bigram[b *= 2] = n; 1125 fp->encode.bigram[b + 1] = m; 1126 } 1127 else 1128 fp->encode.code[n][m] = 0; 1129 1130 /* 1131 * commit the real file 1132 */ 1133 1134 if (sfseek(fp->fp, (Sfoff_t)0, SEEK_SET)) 1135 { 1136 if (fp->disc->errorf) 1137 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "cannot rewind tmp file"); 1138 return -1; 1139 } 1140 if (!(sp = sfopen(NiL, fp->encode.file, "w"))) 1141 goto badcreate; 1142 1143 /* 1144 * dump the bigrams 1145 */ 1146 1147 sfwrite(sp, fp->encode.bigram, sizeof(fp->encode.bigram)); 1148 1149 /* 1150 * encode the massaged paths 1151 */ 1152 1153 while (s = sfgetr(fp->fp, 0, 0)) 1154 { 1155 z = strtol(s, &t, 0); 1156 s = t; 1157 if (z < 0 || z > 2 * FF_OFF) 1158 { 1159 sfputc(sp, FF_ESC); 1160 sfputc(sp, (z >> 24)); 1161 sfputc(sp, (z >> 16)); 1162 sfputc(sp, (z >> 8)); 1163 sfputc(sp, z); 1164 } 1165 else 1166 sfputc(sp, z); 1167 while (n = *s++) 1168 { 1169 if (!(m = *s++)) 1170 { 1171 sfputc(sp, n); 1172 break; 1173 } 1174 if (d = fp->encode.code[n][m]) 1175 sfputc(sp, d); 1176 else 1177 { 1178 sfputc(sp, n); 1179 sfputc(sp, m); 1180 } 1181 } 1182 } 1183 sfclose(fp->fp); 1184 fp->fp = sp; 1185 if (finddone(fp)) 1186 goto bad; 1187 break; 1188 case FF_typ: 1189 if (finddone(fp)) 1190 goto bad; 1191 if (!(fp->fp = sfopen(NiL, fp->encode.temp, "r"))) 1192 { 1193 if (fp->disc->errorf) 1194 (*fp->disc->errorf)(fp, fp->disc, ERROR_SYSTEM|2, "%s: cannot read tmp file", fp->encode.temp); 1195 remove(fp->encode.temp); 1196 return -1; 1197 } 1198 1199 /* 1200 * commit the output file 1201 */ 1202 1203 if (!(sp = sfopen(NiL, fp->encode.file, "w"))) 1204 goto badcreate; 1205 1206 /* 1207 * write the header magic 1208 */ 1209 1210 sfputc(sp, 0); 1211 sfputr(sp, FF_typ_magic, 0); 1212 1213 /* 1214 * write the type table in index order starting with 1 1215 */ 1216 1217 for (x = (Type_t*)dtfirst(fp->encode.indexdict); x; x = (Type_t*)dtnext(fp->encode.indexdict, x)) 1218 sfputr(sp, x->name, 0); 1219 sfputc(sp, 0); 1220 1221 /* 1222 * append the front compressed strings 1223 */ 1224 1225 if (sfmove(fp->fp, sp, SF_UNBOUND, -1) < 0 || !sfeof(fp->fp)) 1226 { 1227 sfclose(sp); 1228 if (fp->disc->errorf) 1229 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot append codes", fp->encode.file); 1230 goto bad; 1231 } 1232 sfclose(fp->fp); 1233 fp->fp = sp; 1234 if (finddone(fp)) 1235 goto bad; 1236 remove(fp->encode.temp); 1237 break; 1238 } 1239 return 0; 1240 badcreate: 1241 if (fp->disc->errorf) 1242 (*fp->disc->errorf)(fp, fp->disc, 2, "%s: cannot write codes", fp->encode.file); 1243 bad: 1244 if (fp->fp) 1245 { 1246 sfclose(fp->fp); 1247 fp->fp = 0; 1248 } 1249 remove(fp->encode.temp); 1250 return -1; 1251 } 1252 1253 /* 1254 * close an open fastfind stream 1255 */ 1256 1257 int 1258 findclose(register Find_t* fp) 1259 { 1260 int n = 0; 1261 1262 if (!fp) 1263 return -1; 1264 if (fp->generate) 1265 { 1266 n = findsync(fp); 1267 if (fp->encode.indexdict) 1268 dtclose(fp->encode.indexdict); 1269 if (fp->encode.namedict) 1270 dtclose(fp->encode.namedict); 1271 } 1272 else 1273 { 1274 if (fp->decode.match) 1275 regfree(&fp->decode.re); 1276 n = 0; 1277 } 1278 if (fp->fp) 1279 sfclose(fp->fp); 1280 vmclose(fp->vm); 1281 return n; 1282 } 1283