1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 #if defined(LIBC_SCCS) && !defined(lint) 33 static char sccsid[] = "@(#)glob.c 5.12 (Berkeley) 6/24/91"; 34 #endif /* LIBC_SCCS and not lint */ 35 /* 36 * Glob: the interface is a superset of the one defined in POSIX 1003.2, 37 * draft 9. 38 * 39 * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 40 * 41 * Optional extra services, controlled by flags not defined by POSIX: 42 * 43 * GLOB_QUOTE: 44 * Escaping convention: \ inhibits any special meaning the following 45 * character might have (except \ at end of string is retained). 46 * GLOB_MAGCHAR: 47 * Set in gl_flags if pattern contained a globbing character. 48 * GLOB_ALTNOT: 49 * Use ^ instead of ! for "not". 50 * gl_matchc: 51 * Number of matches in the current invocation of glob. 52 */ 53 54 #ifdef WINNT_NATIVE 55 #pragma warning(disable:4244) 56 #endif /* WINNT_NATIVE */ 57 58 #define Char __Char 59 #include "sh.h" 60 #include "glob.h" 61 62 #undef Char 63 #undef QUOTE 64 #undef TILDE 65 #undef META 66 #undef ismeta 67 #undef Strchr 68 69 #ifndef S_ISDIR 70 #define S_ISDIR(a) (((a) & S_IFMT) == S_IFDIR) 71 #endif 72 73 #if !defined(S_ISLNK) && defined(S_IFLNK) 74 #define S_ISLNK(a) (((a) & S_IFMT) == S_IFLNK) 75 #endif 76 77 #if !defined(S_ISLNK) && !defined(lstat) 78 #define lstat stat 79 #endif 80 81 typedef unsigned short Char; 82 83 static int glob1 (Char *, glob_t *, int); 84 static int glob2 (struct strbuf *, const Char *, glob_t *, int); 85 static int glob3 (struct strbuf *, const Char *, const Char *, 86 glob_t *, int); 87 static void globextend (const char *, glob_t *); 88 static int match (const char *, const Char *, const Char *, 89 int); 90 static int compare (const void *, const void *); 91 static DIR *Opendir (const char *); 92 #ifdef S_IFLNK 93 static int Lstat (const char *, struct stat *); 94 #endif 95 static int Stat (const char *, struct stat *sb); 96 static Char *Strchr (Char *, int); 97 #ifdef DEBUG 98 static void qprintf (const Char *); 99 #endif 100 101 #define DOLLAR '$' 102 #define DOT '.' 103 #define EOS '\0' 104 #define LBRACKET '[' 105 #define NOT '!' 106 #define ALTNOT '^' 107 #define QUESTION '?' 108 #define QUOTE '\\' 109 #define RANGE '-' 110 #define RBRACKET ']' 111 #define SEP '/' 112 #define STAR '*' 113 #define TILDE '~' 114 #define UNDERSCORE '_' 115 116 #define M_META 0x8000 117 #define M_PROTECT 0x4000 118 #define M_MASK 0xffff 119 #define M_ASCII 0x00ff 120 121 #define LCHAR(c) ((c)&M_ASCII) 122 #define META(c) ((c)|M_META) 123 #define M_ALL META('*') 124 #define M_END META(']') 125 #define M_NOT META('!') 126 #define M_ALTNOT META('^') 127 #define M_ONE META('?') 128 #define M_RNG META('-') 129 #define M_SET META('[') 130 #define ismeta(c) (((c)&M_META) != 0) 131 132 int 133 globcharcoll(__Char c1, __Char c2, int cs) 134 { 135 #if defined(NLS) && defined(LC_COLLATE) && defined(HAVE_STRCOLL) 136 # if defined(WIDE_STRINGS) 137 wchar_t s1[2], s2[2]; 138 139 if (c1 == c2) 140 return (0); 141 if (cs) { 142 c1 = towlower(c1); 143 c2 = towlower(c2); 144 } else { 145 /* This should not be here, but I'll rather leave it in than engage in 146 a LC_COLLATE flamewar about a shell I don't use... */ 147 if (iswlower(c1) && iswupper(c2)) 148 return (1); 149 if (iswupper(c1) && iswlower(c2)) 150 return (-1); 151 } 152 s1[0] = c1; 153 s2[0] = c2; 154 s1[1] = s2[1] = '\0'; 155 return wcscoll(s1, s2); 156 # else /* not WIDE_STRINGS */ 157 char s1[2], s2[2]; 158 159 if (c1 == c2) 160 return (0); 161 /* 162 * From kevin lyda <kevin@suberic.net>: 163 * strcoll does not guarantee case sorting, so we pre-process now: 164 */ 165 if (cs) { 166 c1 = islower(c1) ? c1 : tolower(c1); 167 c2 = islower(c2) ? c2 : tolower(c2); 168 } else { 169 if (islower(c1) && isupper(c2)) 170 return (1); 171 if (isupper(c1) && islower(c2)) 172 return (-1); 173 } 174 s1[0] = c1; 175 s2[0] = c2; 176 s1[1] = s2[1] = '\0'; 177 return strcoll(s1, s2); 178 # endif 179 #else 180 return (c1 - c2); 181 #endif 182 } 183 184 /* 185 * Need to dodge two kernel bugs: 186 * opendir("") != opendir(".") 187 * NAMEI_BUG: on plain files trailing slashes are ignored in some kernels. 188 * POSIX specifies that they should be ignored in directories. 189 */ 190 191 static DIR * 192 Opendir(const char *str) 193 { 194 #if defined(hpux) || defined(__hpux) 195 struct stat st; 196 #endif 197 198 if (!*str) 199 return (opendir(".")); 200 #if defined(hpux) || defined(__hpux) 201 /* 202 * Opendir on some device files hangs, so avoid it 203 */ 204 if (stat(str, &st) == -1 || !S_ISDIR(st.st_mode)) 205 return NULL; 206 #endif 207 return opendir(str); 208 } 209 210 #ifdef S_IFLNK 211 static int 212 Lstat(const char *fn, struct stat *sb) 213 { 214 int st; 215 216 st = lstat(fn, sb); 217 # ifdef NAMEI_BUG 218 if (*fn != 0 && strend(fn)[-1] == '/' && !S_ISDIR(sb->st_mode)) 219 st = -1; 220 # endif /* NAMEI_BUG */ 221 return st; 222 } 223 #else 224 #define Lstat Stat 225 #endif /* S_IFLNK */ 226 227 static int 228 Stat(const char *fn, struct stat *sb) 229 { 230 int st; 231 232 st = stat(fn, sb); 233 #ifdef NAMEI_BUG 234 if (*fn != 0 && strend(fn)[-1] == '/' && !S_ISDIR(sb->st_mode)) 235 st = -1; 236 #endif /* NAMEI_BUG */ 237 return st; 238 } 239 240 static Char * 241 Strchr(Char *str, int ch) 242 { 243 do 244 if (*str == ch) 245 return (str); 246 while (*str++); 247 return (NULL); 248 } 249 250 #ifdef DEBUG 251 static void 252 qprintf(const Char *s) 253 { 254 const Char *p; 255 256 for (p = s; *p; p++) 257 printf("%c", *p & 0xff); 258 printf("\n"); 259 for (p = s; *p; p++) 260 printf("%c", *p & M_PROTECT ? '"' : ' '); 261 printf("\n"); 262 for (p = s; *p; p++) 263 printf("%c", *p & M_META ? '_' : ' '); 264 printf("\n"); 265 } 266 #endif /* DEBUG */ 267 268 static int 269 compare(const void *p, const void *q) 270 { 271 #if defined(NLS) && defined(HAVE_STRCOLL) 272 return (strcoll(*(char *const *) p, *(char *const *) q)); 273 #else 274 return (strcmp(*(char *const *) p, *(char *const *) q)); 275 #endif /* NLS && HAVE_STRCOLL */ 276 } 277 278 /* 279 * The main glob() routine: compiles the pattern (optionally processing 280 * quotes), calls glob1() to do the real pattern matching, and finally 281 * sorts the list (unless unsorted operation is requested). Returns 0 282 * if things went well, nonzero if errors occurred. It is not an error 283 * to find no matches. 284 */ 285 int 286 glob(const char *pattern, int flags, int (*errfunc) (const char *, int), 287 glob_t *pglob) 288 { 289 int err, oldpathc; 290 Char *bufnext, m_not; 291 const unsigned char *patnext; 292 int c, not; 293 Char *qpatnext, *patbuf; 294 int no_match; 295 296 patnext = (const unsigned char *) pattern; 297 if (!(flags & GLOB_APPEND)) { 298 pglob->gl_pathc = 0; 299 pglob->gl_pathv = NULL; 300 if (!(flags & GLOB_DOOFFS)) 301 pglob->gl_offs = 0; 302 } 303 pglob->gl_flags = flags & ~GLOB_MAGCHAR; 304 pglob->gl_errfunc = errfunc; 305 oldpathc = pglob->gl_pathc; 306 pglob->gl_matchc = 0; 307 308 if (pglob->gl_flags & GLOB_ALTNOT) { 309 not = ALTNOT; 310 m_not = M_ALTNOT; 311 } 312 else { 313 not = NOT; 314 m_not = M_NOT; 315 } 316 317 patbuf = xmalloc((strlen(pattern) + 1) * sizeof(*patbuf)); 318 bufnext = patbuf; 319 320 no_match = *patnext == not; 321 if (no_match) 322 patnext++; 323 324 if (flags & GLOB_QUOTE) { 325 /* Protect the quoted characters */ 326 while ((c = *patnext++) != EOS) { 327 #ifdef WIDE_STRINGS 328 int len; 329 330 len = mblen((const char *)(patnext - 1), MB_LEN_MAX); 331 if (len == -1) 332 mblen(NULL, 0); 333 if (len > 1) { 334 *bufnext++ = (Char) c; 335 while (--len != 0) 336 *bufnext++ = (Char) (*patnext++ | M_PROTECT); 337 } else 338 #endif /* WIDE_STRINGS */ 339 if (c == QUOTE) { 340 if ((c = *patnext++) == EOS) { 341 c = QUOTE; 342 --patnext; 343 } 344 *bufnext++ = (Char) (c | M_PROTECT); 345 } 346 else 347 *bufnext++ = (Char) c; 348 } 349 } 350 else 351 while ((c = *patnext++) != EOS) 352 *bufnext++ = (Char) c; 353 *bufnext = EOS; 354 355 bufnext = patbuf; 356 qpatnext = patbuf; 357 while ((c = *qpatnext++) != EOS) { 358 switch (c) { 359 case LBRACKET: 360 c = *qpatnext; 361 if (c == not) 362 ++qpatnext; 363 if (*qpatnext == EOS || 364 Strchr(qpatnext + 1, RBRACKET) == NULL) { 365 *bufnext++ = LBRACKET; 366 if (c == not) 367 --qpatnext; 368 break; 369 } 370 pglob->gl_flags |= GLOB_MAGCHAR; 371 *bufnext++ = M_SET; 372 if (c == not) 373 *bufnext++ = m_not; 374 c = *qpatnext++; 375 do { 376 *bufnext++ = LCHAR(c); 377 if (*qpatnext == RANGE && 378 (c = qpatnext[1]) != RBRACKET) { 379 *bufnext++ = M_RNG; 380 *bufnext++ = LCHAR(c); 381 qpatnext += 2; 382 } 383 } while ((c = *qpatnext++) != RBRACKET); 384 *bufnext++ = M_END; 385 break; 386 case QUESTION: 387 pglob->gl_flags |= GLOB_MAGCHAR; 388 *bufnext++ = M_ONE; 389 break; 390 case STAR: 391 pglob->gl_flags |= GLOB_MAGCHAR; 392 /* collapse adjacent stars to one, to avoid 393 * exponential behavior 394 */ 395 if (bufnext == patbuf || bufnext[-1] != M_ALL) 396 *bufnext++ = M_ALL; 397 break; 398 default: 399 *bufnext++ = LCHAR(c); 400 break; 401 } 402 } 403 *bufnext = EOS; 404 #ifdef DEBUG 405 qprintf(patbuf); 406 #endif 407 408 if ((err = glob1(patbuf, pglob, no_match)) != 0) { 409 xfree(patbuf); 410 return (err); 411 } 412 413 /* 414 * If there was no match we are going to append the pattern 415 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified 416 * and the pattern did not contain any magic characters 417 * GLOB_NOMAGIC is there just for compatibility with csh. 418 */ 419 if (pglob->gl_pathc == oldpathc && 420 ((flags & GLOB_NOCHECK) || 421 ((flags & GLOB_NOMAGIC) && !(pglob->gl_flags & GLOB_MAGCHAR)))) { 422 if (!(flags & GLOB_QUOTE)) 423 globextend(pattern, pglob); 424 else { 425 char *copy, *dest; 426 const char *src; 427 428 /* copy pattern, interpreting quotes */ 429 copy = xmalloc(strlen(pattern) + 1); 430 dest = copy; 431 src = pattern; 432 while (*src != EOS) { 433 if (*src == QUOTE) { 434 if (*++src == EOS) 435 --src; 436 } 437 *dest++ = *src++; 438 } 439 *dest = EOS; 440 globextend(copy, pglob); 441 xfree(copy); 442 } 443 xfree(patbuf); 444 return 0; 445 } 446 else if (!(flags & GLOB_NOSORT) && (pglob->gl_pathc != oldpathc)) 447 qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, 448 pglob->gl_pathc - oldpathc, sizeof(char *), compare); 449 xfree(patbuf); 450 return (0); 451 } 452 453 static int 454 glob1(Char *pattern, glob_t *pglob, int no_match) 455 { 456 struct strbuf pathbuf = strbuf_INIT; 457 int err; 458 459 /* 460 * a null pathname is invalid -- POSIX 1003.1 sect. 2.4. 461 */ 462 if (*pattern == EOS) 463 return (0); 464 err = glob2(&pathbuf, pattern, pglob, no_match); 465 xfree(pathbuf.s); 466 return err; 467 } 468 469 /* 470 * functions glob2 and glob3 are mutually recursive; there is one level 471 * of recursion for each segment in the pattern that contains one or 472 * more meta characters. 473 */ 474 static int 475 glob2(struct strbuf *pathbuf, const Char *pattern, glob_t *pglob, int no_match) 476 { 477 struct stat sbuf; 478 int anymeta; 479 const Char *p; 480 size_t orig_len; 481 482 /* 483 * loop over pattern segments until end of pattern or until segment with 484 * meta character found. 485 */ 486 anymeta = 0; 487 for (;;) { 488 if (*pattern == EOS) { /* end of pattern? */ 489 strbuf_terminate(pathbuf); 490 491 if (Lstat(pathbuf->s, &sbuf)) 492 return (0); 493 494 if (((pglob->gl_flags & GLOB_MARK) && 495 pathbuf->s[pathbuf->len - 1] != SEP) && 496 (S_ISDIR(sbuf.st_mode) 497 #ifdef S_IFLNK 498 || (S_ISLNK(sbuf.st_mode) && 499 (Stat(pathbuf->s, &sbuf) == 0) && 500 S_ISDIR(sbuf.st_mode)) 501 #endif 502 )) { 503 strbuf_append1(pathbuf, SEP); 504 strbuf_terminate(pathbuf); 505 } 506 ++pglob->gl_matchc; 507 globextend(pathbuf->s, pglob); 508 return 0; 509 } 510 511 /* find end of next segment, tentatively copy to pathbuf */ 512 p = pattern; 513 orig_len = pathbuf->len; 514 while (*p != EOS && *p != SEP) { 515 if (ismeta(*p)) 516 anymeta = 1; 517 strbuf_append1(pathbuf, *p++); 518 } 519 520 if (!anymeta) { /* no expansion, do next segment */ 521 pattern = p; 522 while (*pattern == SEP) 523 strbuf_append1(pathbuf, *pattern++); 524 } 525 else { /* need expansion, recurse */ 526 pathbuf->len = orig_len; 527 return (glob3(pathbuf, pattern, p, pglob, no_match)); 528 } 529 } 530 /* NOTREACHED */ 531 } 532 533 534 static int 535 glob3(struct strbuf *pathbuf, const Char *pattern, const Char *restpattern, 536 glob_t *pglob, int no_match) 537 { 538 DIR *dirp; 539 struct dirent *dp; 540 int err; 541 Char m_not = (pglob->gl_flags & GLOB_ALTNOT) ? M_ALTNOT : M_NOT; 542 size_t orig_len; 543 544 strbuf_terminate(pathbuf); 545 errno = 0; 546 547 if (!(dirp = Opendir(pathbuf->s))) { 548 /* todo: don't call for ENOENT or ENOTDIR? */ 549 if ((pglob->gl_errfunc && (*pglob->gl_errfunc) (pathbuf->s, errno)) || 550 (pglob->gl_flags & GLOB_ERR)) 551 return (GLOB_ABEND); 552 else 553 return (0); 554 } 555 556 err = 0; 557 558 orig_len = pathbuf->len; 559 /* search directory for matching names */ 560 while ((dp = readdir(dirp)) != NULL) { 561 /* initial DOT must be matched literally */ 562 if (dp->d_name[0] == DOT && *pattern != DOT) 563 continue; 564 pathbuf->len = orig_len; 565 strbuf_append(pathbuf, dp->d_name); 566 strbuf_terminate(pathbuf); 567 if (match(pathbuf->s + orig_len, pattern, restpattern, (int) m_not) 568 == no_match) 569 continue; 570 err = glob2(pathbuf, restpattern, pglob, no_match); 571 if (err) 572 break; 573 } 574 /* todo: check error from readdir? */ 575 closedir(dirp); 576 return (err); 577 } 578 579 580 /* 581 * Extend the gl_pathv member of a glob_t structure to accomodate a new item, 582 * add the new item, and update gl_pathc. 583 * 584 * This assumes the BSD realloc, which only copies the block when its size 585 * crosses a power-of-two boundary; for v7 realloc, this would cause quadratic 586 * behavior. 587 * 588 * Return 0 if new item added, error code if memory couldn't be allocated. 589 * 590 * Invariant of the glob_t structure: 591 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 592 * gl_pathv points to (gl_offs + gl_pathc + 1) items. 593 */ 594 static void 595 globextend(const char *path, glob_t *pglob) 596 { 597 char **pathv; 598 int i; 599 size_t newsize; 600 601 newsize = sizeof(*pathv) * (2 + pglob->gl_pathc + pglob->gl_offs); 602 pathv = xrealloc(pglob->gl_pathv, newsize); 603 604 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 605 /* first time around -- clear initial gl_offs items */ 606 pathv += pglob->gl_offs; 607 for (i = pglob->gl_offs; --i >= 0;) 608 *--pathv = NULL; 609 } 610 pglob->gl_pathv = pathv; 611 612 pathv[pglob->gl_offs + pglob->gl_pathc++] = strsave(path); 613 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 614 } 615 616 static size_t 617 One_Char_mbtowc(__Char *pwc, const Char *s, size_t n) 618 { 619 #ifdef WIDE_STRINGS 620 char buf[MB_LEN_MAX], *p; 621 622 if (n > MB_LEN_MAX) 623 n = MB_LEN_MAX; 624 p = buf; 625 while (p < buf + n && (*p++ = LCHAR(*s++)) != 0) 626 ; 627 return one_mbtowc(pwc, buf, n); 628 #else 629 *pwc = *s & CHAR; 630 return 1; 631 #endif 632 } 633 634 /* 635 * pattern matching function for filenames. Each occurrence of the * 636 * pattern causes a recursion level. 637 */ 638 static int 639 match(const char *name, const Char *pat, const Char *patend, int m_not) 640 { 641 int ok, negate_range; 642 Char c; 643 644 while (pat < patend) { 645 size_t lwk; 646 __Char wc, wk; 647 648 c = *pat; /* Only for M_MASK bits */ 649 pat += One_Char_mbtowc(&wc, pat, MB_LEN_MAX); 650 lwk = one_mbtowc(&wk, name, MB_LEN_MAX); 651 switch (c & M_MASK) { 652 case M_ALL: 653 if (pat == patend) 654 return (1); 655 for (;;) { 656 if (match(name, pat, patend, m_not)) 657 return (1); 658 if (*name == EOS) 659 break; 660 name += lwk; 661 lwk = one_mbtowc(&wk, name, MB_LEN_MAX); 662 } 663 return (0); 664 case M_ONE: 665 if (*name == EOS) 666 return (0); 667 name += lwk; 668 break; 669 case M_SET: 670 ok = 0; 671 if (*name == EOS) 672 return (0); 673 name += lwk; 674 if ((negate_range = ((*pat & M_MASK) == m_not)) != 0) 675 ++pat; 676 while ((*pat & M_MASK) != M_END) { 677 pat += One_Char_mbtowc(&wc, pat, MB_LEN_MAX); 678 if ((*pat & M_MASK) == M_RNG) { 679 __Char wc2; 680 681 pat++; 682 pat += One_Char_mbtowc(&wc2, pat, MB_LEN_MAX); 683 if (globcharcoll(wc, wk, 0) <= 0 && 684 globcharcoll(wk, wc2, 0) <= 0) 685 ok = 1; 686 } else if (wc == wk) 687 ok = 1; 688 } 689 pat += One_Char_mbtowc(&wc, pat, MB_LEN_MAX); 690 if (ok == negate_range) 691 return (0); 692 break; 693 default: 694 name += lwk; 695 if (samecase(wk) != samecase(wc)) 696 return (0); 697 break; 698 } 699 } 700 return (*name == EOS); 701 } 702 703 /* free allocated data belonging to a glob_t structure */ 704 void 705 globfree(glob_t *pglob) 706 { 707 int i; 708 char **pp; 709 710 if (pglob->gl_pathv != NULL) { 711 pp = pglob->gl_pathv + pglob->gl_offs; 712 for (i = pglob->gl_pathc; i--; ++pp) 713 if (*pp) 714 xfree(*pp), *pp = NULL; 715 xfree(pglob->gl_pathv), pglob->gl_pathv = NULL; 716 } 717 } 718