1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 #if defined(LIBC_SCCS) && !defined(lint) 33 static char sccsid[] = "@(#)glob.c 5.12 (Berkeley) 6/24/91"; 34 #endif /* LIBC_SCCS and not lint */ 35 /* 36 * Glob: the interface is a superset of the one defined in POSIX 1003.2, 37 * draft 9. 38 * 39 * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 40 * 41 * Optional extra services, controlled by flags not defined by POSIX: 42 * 43 * GLOB_QUOTE: 44 * Escaping convention: \ inhibits any special meaning the following 45 * character might have (except \ at end of string is retained). 46 * GLOB_MAGCHAR: 47 * Set in gl_flags if pattern contained a globbing character. 48 * GLOB_ALTNOT: 49 * Use ^ instead of ! for "not". 50 * gl_matchc: 51 * Number of matches in the current invocation of glob. 52 */ 53 54 #ifdef notdef 55 #include <sys/types.h> 56 #include <sys/param.h> 57 #include <sys/stat.h> 58 #include <dirent.h> 59 #include <ctype.h> 60 typedef void * ptr_t; 61 #endif 62 #ifdef WINNT_NATIVE 63 #pragma warning(disable:4244) 64 #endif /* WINNT_NATIVE */ 65 66 #define Char __Char 67 #include "sh.h" 68 #undef Char 69 #undef QUOTE 70 #undef TILDE 71 #undef META 72 #undef CHAR 73 #undef ismeta 74 #undef Strchr 75 76 #include "glob.h" 77 78 #ifndef S_ISDIR 79 #define S_ISDIR(a) (((a) & S_IFMT) == S_IFDIR) 80 #endif 81 82 #if !defined(S_ISLNK) && defined(S_IFLNK) 83 #define S_ISLNK(a) (((a) & S_IFMT) == S_IFLNK) 84 #endif 85 86 #if !defined(S_ISLNK) && !defined(lstat) 87 #define lstat stat 88 #endif 89 90 typedef unsigned short Char; 91 92 static int glob1 __P((Char *, glob_t *, int)); 93 static int glob2 __P((Char *, Char *, Char *, glob_t *, int)); 94 static int glob3 __P((Char *, Char *, Char *, Char *, 95 glob_t *, int)); 96 static int globextend __P((Char *, glob_t *)); 97 static int match __P((Char *, Char *, Char *, int)); 98 #ifndef __clipper__ 99 static int compare __P((const ptr_t, const ptr_t)); 100 #endif 101 static DIR *Opendir __P((Char *)); 102 #ifdef S_IFLNK 103 static int Lstat __P((Char *, struct stat *)); 104 #endif 105 static int Stat __P((Char *, struct stat *sb)); 106 static Char *Strchr __P((Char *, int)); 107 #ifdef DEBUG 108 static void qprintf __P((Char *)); 109 #endif 110 111 #define DOLLAR '$' 112 #define DOT '.' 113 #define EOS '\0' 114 #define LBRACKET '[' 115 #define NOT '!' 116 #define ALTNOT '^' 117 #define QUESTION '?' 118 #define QUOTE '\\' 119 #define RANGE '-' 120 #define RBRACKET ']' 121 #define SEP '/' 122 #define STAR '*' 123 #define TILDE '~' 124 #define UNDERSCORE '_' 125 126 #define M_META 0x8000 127 #define M_PROTECT 0x4000 128 #define M_MASK 0xffff 129 #define M_ASCII 0x00ff 130 131 #define CHAR(c) ((c)&M_ASCII) 132 #define META(c) ((c)|M_META) 133 #define M_ALL META('*') 134 #define M_END META(']') 135 #define M_NOT META('!') 136 #define M_ALTNOT META('^') 137 #define M_ONE META('?') 138 #define M_RNG META('-') 139 #define M_SET META('[') 140 #define ismeta(c) (((c)&M_META) != 0) 141 142 #ifndef BUFSIZE 143 #define GLOBBUFLEN MAXPATHLEN 144 #else 145 #define GLOBBUFLEN BUFSIZE 146 #endif 147 148 int 149 globcharcoll(c1, c2) 150 int c1, c2; 151 { 152 #if defined(NLS) && defined(LC_COLLATE) && !defined(NOSTRCOLL) 153 char s1[2], s2[2]; 154 155 if (c1 == c2) 156 return (0); 157 /* 158 * From kevin lyda <kevin@suberic.net>: 159 * strcoll does not guarantee case sorting, so we pre-process now: 160 */ 161 if (islower(c1) && isupper(c2)) 162 return (1); 163 s1[0] = c1; 164 s2[0] = c2; 165 s1[1] = s2[1] = '\0'; 166 return strcoll(s1, s2); 167 #else 168 return (c1 - c2); 169 #endif 170 } 171 172 /* 173 * Need to dodge two kernel bugs: 174 * opendir("") != opendir(".") 175 * NAMEI_BUG: on plain files trailing slashes are ignored in some kernels. 176 * POSIX specifies that they should be ignored in directories. 177 */ 178 179 static DIR * 180 Opendir(str) 181 register Char *str; 182 { 183 char buf[GLOBBUFLEN]; 184 register char *dc = buf; 185 #if defined(hpux) || defined(__hpux) 186 struct stat st; 187 #endif 188 189 if (!*str) 190 return (opendir(".")); 191 while ((*dc++ = *str++) != '\0') 192 continue; 193 #if defined(hpux) || defined(__hpux) 194 /* 195 * Opendir on some device files hangs, so avoid it 196 */ 197 if (stat(buf, &st) == -1 || !S_ISDIR(st.st_mode)) 198 return NULL; 199 #endif 200 return (opendir(buf)); 201 } 202 203 #ifdef S_IFLNK 204 static int 205 Lstat(fn, sb) 206 register Char *fn; 207 struct stat *sb; 208 { 209 char buf[GLOBBUFLEN]; 210 register char *dc = buf; 211 212 while ((*dc++ = *fn++) != '\0') 213 continue; 214 # ifdef NAMEI_BUG 215 { 216 int st; 217 218 st = lstat(buf, sb); 219 if (*buf) 220 dc--; 221 return (*--dc == '/' && !S_ISDIR(sb->st_mode) ? -1 : st); 222 } 223 # else 224 return (lstat(buf, sb)); 225 # endif /* NAMEI_BUG */ 226 } 227 #else 228 #define Lstat Stat 229 #endif /* S_IFLNK */ 230 231 static int 232 Stat(fn, sb) 233 register Char *fn; 234 struct stat *sb; 235 { 236 char buf[GLOBBUFLEN]; 237 register char *dc = buf; 238 239 while ((*dc++ = *fn++) != '\0') 240 continue; 241 #ifdef NAMEI_BUG 242 { 243 int st; 244 245 st = stat(buf, sb); 246 if (*buf) 247 dc--; 248 return (*--dc == '/' && !S_ISDIR(sb->st_mode) ? -1 : st); 249 } 250 #else 251 return (stat(buf, sb)); 252 #endif /* NAMEI_BUG */ 253 } 254 255 static Char * 256 Strchr(str, ch) 257 Char *str; 258 int ch; 259 { 260 do 261 if (*str == ch) 262 return (str); 263 while (*str++); 264 return (NULL); 265 } 266 267 #ifdef DEBUG 268 static void 269 qprintf(s) 270 Char *s; 271 { 272 Char *p; 273 274 for (p = s; *p; p++) 275 printf("%c", *p & 0xff); 276 printf("\n"); 277 for (p = s; *p; p++) 278 printf("%c", *p & M_PROTECT ? '"' : ' '); 279 printf("\n"); 280 for (p = s; *p; p++) 281 printf("%c", *p & M_META ? '_' : ' '); 282 printf("\n"); 283 } 284 #endif /* DEBUG */ 285 286 static int 287 compare(p, q) 288 const ptr_t p, q; 289 { 290 #if defined(NLS) && !defined(NOSTRCOLL) 291 errno = 0; /* strcoll sets errno, another brain-damage */ 292 293 return (strcoll(*(char **) p, *(char **) q)); 294 #else 295 return (strcmp(*(char **) p, *(char **) q)); 296 #endif /* NLS && !NOSTRCOLL */ 297 } 298 299 /* 300 * The main glob() routine: compiles the pattern (optionally processing 301 * quotes), calls glob1() to do the real pattern matching, and finally 302 * sorts the list (unless unsorted operation is requested). Returns 0 303 * if things went well, nonzero if errors occurred. It is not an error 304 * to find no matches. 305 */ 306 int 307 glob(pattern, flags, errfunc, pglob) 308 const char *pattern; 309 int flags; 310 int (*errfunc) __P((const char *, int)); 311 glob_t *pglob; 312 { 313 int err, oldpathc; 314 Char *bufnext, *bufend, *compilebuf, m_not; 315 const unsigned char *compilepat, *patnext; 316 int c, not; 317 Char patbuf[GLOBBUFLEN + 1], *qpatnext; 318 int no_match; 319 320 patnext = (unsigned char *) pattern; 321 if (!(flags & GLOB_APPEND)) { 322 pglob->gl_pathc = 0; 323 pglob->gl_pathv = NULL; 324 if (!(flags & GLOB_DOOFFS)) 325 pglob->gl_offs = 0; 326 } 327 pglob->gl_flags = flags & ~GLOB_MAGCHAR; 328 pglob->gl_errfunc = errfunc; 329 oldpathc = pglob->gl_pathc; 330 pglob->gl_matchc = 0; 331 332 if (pglob->gl_flags & GLOB_ALTNOT) { 333 not = ALTNOT; 334 m_not = M_ALTNOT; 335 } 336 else { 337 not = NOT; 338 m_not = M_NOT; 339 } 340 341 bufnext = patbuf; 342 bufend = bufnext + GLOBBUFLEN; 343 compilebuf = bufnext; 344 compilepat = patnext; 345 346 no_match = *patnext == not; 347 if (no_match) 348 patnext++; 349 350 if (flags & GLOB_QUOTE) { 351 /* Protect the quoted characters */ 352 while (bufnext < bufend && (c = *patnext++) != EOS) 353 #ifdef DSPMBYTE 354 if (Ismbyte1(c) && *patnext != EOS) 355 { 356 *bufnext++ = (Char) c; 357 *bufnext++ = (Char) *patnext++; 358 } 359 else 360 #endif /* DSPMBYTE */ 361 if (c == QUOTE) { 362 if ((c = *patnext++) == EOS) { 363 c = QUOTE; 364 --patnext; 365 } 366 *bufnext++ = (Char) (c | M_PROTECT); 367 } 368 else 369 *bufnext++ = (Char) c; 370 } 371 else 372 while (bufnext < bufend && (c = *patnext++) != EOS) 373 *bufnext++ = (Char) c; 374 *bufnext = EOS; 375 376 bufnext = patbuf; 377 qpatnext = patbuf; 378 /* we don't need to check for buffer overflow any more */ 379 while ((c = *qpatnext++) != EOS) { 380 #ifdef DSPMBYTE 381 if (Ismbyte1(c) && *qpatnext != EOS) 382 { 383 *bufnext++ = CHAR(c); 384 *bufnext++ = CHAR(*qpatnext++); 385 } 386 else 387 #endif /* DSPMBYTE */ 388 switch (c) { 389 case LBRACKET: 390 c = *qpatnext; 391 if (c == not) 392 ++qpatnext; 393 if (*qpatnext == EOS || 394 Strchr(qpatnext + 1, RBRACKET) == NULL) { 395 *bufnext++ = LBRACKET; 396 if (c == not) 397 --qpatnext; 398 break; 399 } 400 pglob->gl_flags |= GLOB_MAGCHAR; 401 *bufnext++ = M_SET; 402 if (c == not) 403 *bufnext++ = m_not; 404 c = *qpatnext++; 405 do { 406 *bufnext++ = CHAR(c); 407 if (*qpatnext == RANGE && 408 (c = qpatnext[1]) != RBRACKET) { 409 *bufnext++ = M_RNG; 410 *bufnext++ = CHAR(c); 411 qpatnext += 2; 412 } 413 } while ((c = *qpatnext++) != RBRACKET); 414 *bufnext++ = M_END; 415 break; 416 case QUESTION: 417 pglob->gl_flags |= GLOB_MAGCHAR; 418 *bufnext++ = M_ONE; 419 break; 420 case STAR: 421 pglob->gl_flags |= GLOB_MAGCHAR; 422 /* collapse adjacent stars to one, to avoid 423 * exponential behavior 424 */ 425 if (bufnext == patbuf || bufnext[-1] != M_ALL) 426 *bufnext++ = M_ALL; 427 break; 428 default: 429 *bufnext++ = CHAR(c); 430 break; 431 } 432 } 433 *bufnext = EOS; 434 #ifdef DEBUG 435 qprintf(patbuf); 436 #endif 437 438 if ((err = glob1(patbuf, pglob, no_match)) != 0) 439 return (err); 440 441 /* 442 * If there was no match we are going to append the pattern 443 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified 444 * and the pattern did not contain any magic characters 445 * GLOB_NOMAGIC is there just for compatibility with csh. 446 */ 447 if (pglob->gl_pathc == oldpathc && 448 ((flags & GLOB_NOCHECK) || 449 ((flags & GLOB_NOMAGIC) && !(pglob->gl_flags & GLOB_MAGCHAR)))) { 450 if (!(flags & GLOB_QUOTE)) { 451 Char *dp = compilebuf; 452 const unsigned char *sp = compilepat; 453 454 while ((*dp++ = *sp++) != '\0') 455 continue; 456 } 457 else { 458 /* 459 * copy pattern, interpreting quotes; this is slightly different 460 * than the interpretation of quotes above -- which should prevail? 461 */ 462 while (*compilepat != EOS) { 463 if (*compilepat == QUOTE) { 464 if (*++compilepat == EOS) 465 --compilepat; 466 } 467 *compilebuf++ = (unsigned char) *compilepat++; 468 } 469 *compilebuf = EOS; 470 } 471 return (globextend(patbuf, pglob)); 472 } 473 else if (!(flags & GLOB_NOSORT) && (pglob->gl_pathc != oldpathc)) 474 qsort((char *) (pglob->gl_pathv + pglob->gl_offs + oldpathc), 475 pglob->gl_pathc - oldpathc, sizeof(char *), 476 (int (*) __P((const void *, const void *))) compare); 477 return (0); 478 } 479 480 static int 481 glob1(pattern, pglob, no_match) 482 Char *pattern; 483 glob_t *pglob; 484 int no_match; 485 { 486 Char pathbuf[GLOBBUFLEN + 1]; 487 488 /* 489 * a null pathname is invalid -- POSIX 1003.1 sect. 2.4. 490 */ 491 if (*pattern == EOS) 492 return (0); 493 return (glob2(pathbuf, pathbuf, pattern, pglob, no_match)); 494 } 495 496 /* 497 * functions glob2 and glob3 are mutually recursive; there is one level 498 * of recursion for each segment in the pattern that contains one or 499 * more meta characters. 500 */ 501 static int 502 glob2(pathbuf, pathend, pattern, pglob, no_match) 503 Char *pathbuf, *pathend, *pattern; 504 glob_t *pglob; 505 int no_match; 506 { 507 struct stat sbuf; 508 int anymeta; 509 Char *p, *q; 510 511 /* 512 * loop over pattern segments until end of pattern or until segment with 513 * meta character found. 514 */ 515 anymeta = 0; 516 for (;;) { 517 if (*pattern == EOS) { /* end of pattern? */ 518 *pathend = EOS; 519 520 if (Lstat(pathbuf, &sbuf)) 521 return (0); 522 523 if (((pglob->gl_flags & GLOB_MARK) && 524 pathend[-1] != SEP) && 525 (S_ISDIR(sbuf.st_mode) 526 #ifdef S_IFLNK 527 || (S_ISLNK(sbuf.st_mode) && 528 (Stat(pathbuf, &sbuf) == 0) && 529 S_ISDIR(sbuf.st_mode)) 530 #endif 531 )) { 532 *pathend++ = SEP; 533 *pathend = EOS; 534 } 535 ++pglob->gl_matchc; 536 return (globextend(pathbuf, pglob)); 537 } 538 539 /* find end of next segment, copy tentatively to pathend */ 540 q = pathend; 541 p = pattern; 542 while (*p != EOS && *p != SEP) { 543 if (ismeta(*p)) 544 anymeta = 1; 545 *q++ = *p++; 546 } 547 548 if (!anymeta) { /* no expansion, do next segment */ 549 pathend = q; 550 pattern = p; 551 while (*pattern == SEP) 552 *pathend++ = *pattern++; 553 } 554 else /* need expansion, recurse */ 555 return (glob3(pathbuf, pathend, pattern, p, pglob, no_match)); 556 } 557 /* NOTREACHED */ 558 } 559 560 561 static int 562 glob3(pathbuf, pathend, pattern, restpattern, pglob, no_match) 563 Char *pathbuf, *pathend, *pattern, *restpattern; 564 glob_t *pglob; 565 int no_match; 566 { 567 DIR *dirp; 568 struct dirent *dp; 569 int err; 570 Char m_not = (pglob->gl_flags & GLOB_ALTNOT) ? M_ALTNOT : M_NOT; 571 char cpathbuf[GLOBBUFLEN], *ptr;; 572 573 *pathend = EOS; 574 errno = 0; 575 576 if (!(dirp = Opendir(pathbuf))) { 577 /* todo: don't call for ENOENT or ENOTDIR? */ 578 for (ptr = cpathbuf; (*ptr++ = (char) *pathbuf++) != EOS;) 579 continue; 580 if ((pglob->gl_errfunc && (*pglob->gl_errfunc) (cpathbuf, errno)) || 581 (pglob->gl_flags & GLOB_ERR)) 582 return (GLOB_ABEND); 583 else 584 return (0); 585 } 586 587 err = 0; 588 589 /* search directory for matching names */ 590 while ((dp = readdir(dirp)) != NULL) { 591 register unsigned char *sc; 592 register Char *dc; 593 594 /* initial DOT must be matched literally */ 595 if (dp->d_name[0] == DOT && *pattern != DOT) 596 continue; 597 for (sc = (unsigned char *) dp->d_name, dc = pathend; 598 (*dc++ = *sc++) != '\0';) 599 continue; 600 if (match(pathend, pattern, restpattern, (int) m_not) == no_match) { 601 *pathend = EOS; 602 continue; 603 } 604 err = glob2(pathbuf, --dc, restpattern, pglob, no_match); 605 if (err) 606 break; 607 } 608 /* todo: check error from readdir? */ 609 (void) closedir(dirp); 610 return (err); 611 } 612 613 614 /* 615 * Extend the gl_pathv member of a glob_t structure to accomodate a new item, 616 * add the new item, and update gl_pathc. 617 * 618 * This assumes the BSD realloc, which only copies the block when its size 619 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic 620 * behavior. 621 * 622 * Return 0 if new item added, error code if memory couldn't be allocated. 623 * 624 * Invariant of the glob_t structure: 625 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 626 * gl_pathv points to (gl_offs + gl_pathc + 1) items. 627 */ 628 static int 629 globextend(path, pglob) 630 Char *path; 631 glob_t *pglob; 632 { 633 register char **pathv; 634 register int i; 635 unsigned int newsize; 636 char *copy; 637 Char *p; 638 639 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); 640 pathv = (char **) (pglob->gl_pathv ? 641 xrealloc((ptr_t) pglob->gl_pathv, (size_t) newsize) : 642 xmalloc((size_t) newsize)); 643 if (pathv == NULL) 644 return (GLOB_NOSPACE); 645 646 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 647 /* first time around -- clear initial gl_offs items */ 648 pathv += pglob->gl_offs; 649 for (i = pglob->gl_offs; --i >= 0;) 650 *--pathv = NULL; 651 } 652 pglob->gl_pathv = pathv; 653 654 for (p = path; *p++;) 655 continue; 656 if ((copy = (char *) xmalloc((size_t) (p - path))) != NULL) { 657 register char *dc = copy; 658 register Char *sc = path; 659 660 while ((*dc++ = *sc++) != '\0') 661 continue; 662 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 663 } 664 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 665 return ((copy == NULL) ? GLOB_NOSPACE : 0); 666 } 667 668 669 /* 670 * pattern matching function for filenames. Each occurrence of the * 671 * pattern causes a recursion level. 672 */ 673 static int 674 match(name, pat, patend, m_not) 675 register Char *name, *pat, *patend; 676 int m_not; 677 { 678 int ok, negate_range; 679 Char c, k; 680 681 while (pat < patend) { 682 c = *pat++; 683 switch (c & M_MASK) { 684 case M_ALL: 685 if (pat == patend) 686 return (1); 687 do 688 if (match(name, pat, patend, m_not)) 689 return (1); 690 while (*name++ != EOS); 691 return (0); 692 case M_ONE: 693 if (*name++ == EOS) 694 return (0); 695 break; 696 case M_SET: 697 ok = 0; 698 if ((k = *name++) == EOS) 699 return (0); 700 if ((negate_range = ((*pat & M_MASK) == m_not)) != 0) 701 ++pat; 702 while (((c = *pat++) & M_MASK) != M_END) { 703 if ((*pat & M_MASK) == M_RNG) { 704 if (globcharcoll(CHAR(c), CHAR(k)) <= 0 && 705 globcharcoll(CHAR(k), CHAR(pat[1])) <= 0) 706 ok = 1; 707 pat += 2; 708 } 709 else if (c == k) 710 ok = 1; 711 } 712 if (ok == negate_range) 713 return (0); 714 break; 715 default: 716 k = *name++; 717 if (samecase(k) != samecase(c)) 718 return (0); 719 break; 720 } 721 } 722 return (*name == EOS); 723 } 724 725 /* free allocated data belonging to a glob_t structure */ 726 void 727 globfree(pglob) 728 glob_t *pglob; 729 { 730 register int i; 731 register char **pp; 732 733 if (pglob->gl_pathv != NULL) { 734 pp = pglob->gl_pathv + pglob->gl_offs; 735 for (i = pglob->gl_pathc; i--; ++pp) 736 if (*pp) 737 xfree((ptr_t) *pp), *pp = NULL; 738 xfree((ptr_t) pglob->gl_pathv), pglob->gl_pathv = NULL; 739 } 740 } 741