1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Guido van Rossum. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 2013 Gary Mills 35 */ 36 37 /* 38 * glob(3) -- a superset of the one defined in POSIX 1003.2. 39 * 40 * The [!...] convention to negate a range is supported (SysV, Posix, ksh). 41 * 42 * Optional extra services, controlled by flags not defined by POSIX: 43 * 44 * GLOB_QUOTE: 45 * Escaping convention: \ inhibits any special meaning the following 46 * character might have (except \ at end of string is retained). 47 * GLOB_MAGCHAR: 48 * Set in gl_flags if pattern contained a globbing character. 49 * GLOB_NOMAGIC: 50 * Same as GLOB_NOCHECK, but it will only append pattern if it did 51 * not contain any magic characters. [Used in csh style globbing] 52 * GLOB_ALTDIRFUNC: 53 * Use alternately specified directory access functions. 54 * GLOB_TILDE: 55 * expand ~user/foo to the /home/dir/of/user/foo 56 * GLOB_BRACE: 57 * expand {1,2}{a,b} to 1a 1b 2a 2b 58 * gl_matchc: 59 * Number of matches in the current invocation of glob. 60 */ 61 62 #include "lint.h" 63 64 #include <sys/param.h> 65 #include <sys/stat.h> 66 67 #include <ctype.h> 68 #include <dirent.h> 69 #include <errno.h> 70 #include <glob.h> 71 #include <limits.h> 72 #include <pwd.h> 73 #include <stdint.h> 74 #include <stdio.h> 75 #include <stdlib.h> 76 #include <string.h> 77 #include <unistd.h> 78 #include <wchar.h> 79 #include <wctype.h> 80 81 /* 82 * This is the legacy glob_t prior to illumos enhancement 1097, 83 * used when old programs call the old libc glob functions. 84 * (New programs call the _glob_ext, _globfree_ext functions.) 85 * This struct should be considered "carved in stone". 86 */ 87 typedef struct old_glob { 88 size_t gl_pathc; /* Count of paths matched by pattern */ 89 char **gl_pathv; /* List of matched pathnames */ 90 size_t gl_offs; /* # of slots reserved in gl_pathv */ 91 /* following are internal to the implementation */ 92 char **gl_pathp; /* gl_pathv + gl_offs */ 93 int gl_pathn; /* # of elements allocated */ 94 } old_glob_t; 95 96 /* 97 * For old programs, the external names need to be the old names: 98 * glob() and globfree() . We've redefined those already to 99 * _glob_ext() and _globfree_ext() . Now redefine old_glob() 100 * and old_globfree() to glob() and globfree() . 101 */ 102 #ifdef __PRAGMA_REDEFINE_EXTNAME 103 #pragma redefine_extname old_glob glob 104 #pragma redefine_extname old_globfree globfree 105 #endif /* __PRAGMA_REDEFINE_EXTNAME */ 106 extern int old_glob(const char *, int, int (*)(const char *, int), 107 old_glob_t *); 108 extern void old_globfree(old_glob_t *); 109 110 /* 111 * The various extensions to glob(3C) allow for stat and dirent structures to 112 * show up whose size may change in a largefile environment. If libc defines 113 * _FILE_OFFSET_BITS to be 64 that is the key to indicate that we're building 114 * the LFS version of this file. As such, we rename the public functions here, 115 * _glob_ext() and _globfree_ext() to have a 64 suffix. When building the LFS 116 * version, we do not include the old versions. 117 */ 118 #if !defined(_LP64) && _FILE_OFFSET_BITS == 64 119 #define _glob_ext _glob_ext64 120 #define _globfree_ext _globfree_ext64 121 #endif /* !_LP64 && _FILE_OFFSET_BITS == 64 */ 122 123 #define DOLLAR '$' 124 #define DOT '.' 125 #define EOS '\0' 126 #define LBRACKET '[' 127 #define NOT '!' 128 #define QUESTION '?' 129 #define QUOTE '\\' 130 #define RANGE '-' 131 #define RBRACKET ']' 132 #define SEP '/' 133 #define STAR '*' 134 #define TILDE '~' 135 #define UNDERSCORE '_' 136 #define LBRACE '{' 137 #define RBRACE '}' 138 #define SLASH '/' 139 #define COMMA ',' 140 #define COLON ':' 141 142 #define M_QUOTE 0x800000 143 #define M_PROTECT 0x400000 144 145 typedef struct wcat { 146 wchar_t w_wc; 147 uint_t w_at; 148 } wcat_t; 149 150 #define M_ALL '*' /* Plus M_QUOTE */ 151 #define M_END ']' /* Plus M_QUOTE */ 152 #define M_NOT '!' /* Plus M_QUOTE */ 153 #define M_ONE '?' /* Plus M_QUOTE */ 154 #define M_RNG '-' /* Plus M_QUOTE */ 155 #define M_SET '[' /* Plus M_QUOTE */ 156 #define M_CLASS ':' /* Plus M_QUOTE */ 157 #define ismeta(c) (((c).w_at&M_QUOTE) != 0) 158 159 #define INITIAL 8 /* initial pathv allocation */ 160 161 #define GLOB_LIMIT_MALLOC 65536 162 #define GLOB_LIMIT_STAT 2048 163 #define GLOB_LIMIT_READDIR 16384 164 165 struct glob_lim { 166 size_t glim_malloc; 167 size_t glim_stat; 168 size_t glim_readdir; 169 }; 170 171 struct glob_path_stat { 172 char *gps_path; 173 struct stat *gps_stat; 174 }; 175 176 static int compare(const void *, const void *); 177 static int compare_gps(const void *, const void *); 178 static int g_Ctoc(const wcat_t *, char *, uint_t); 179 static int g_lstat(wcat_t *, struct stat *, glob_t *); 180 static DIR *g_opendir(wcat_t *, glob_t *); 181 static wcat_t *g_strchr(const wcat_t *, wchar_t); 182 static int g_stat(wcat_t *, struct stat *, glob_t *); 183 static int glob0(const wcat_t *, glob_t *, struct glob_lim *, 184 int (*)(const char *, int)); 185 static int glob1(wcat_t *, wcat_t *, glob_t *, struct glob_lim *, 186 int (*)(const char *, int)); 187 static int glob2(wcat_t *, wcat_t *, wcat_t *, wcat_t *, wcat_t *, 188 wcat_t *, glob_t *, struct glob_lim *, 189 int (*)(const char *, int)); 190 static int glob3(wcat_t *, wcat_t *, wcat_t *, wcat_t *, wcat_t *, 191 wcat_t *, wcat_t *, glob_t *, struct glob_lim *, 192 int (*)(const char *, int)); 193 static int globextend(const wcat_t *, glob_t *, struct glob_lim *, 194 struct stat *); 195 static 196 const wcat_t *globtilde(const wcat_t *, wcat_t *, size_t, glob_t *); 197 static int globexp1(const wcat_t *, glob_t *, struct glob_lim *, 198 int (*)(const char *, int)); 199 static int globexp2(const wcat_t *, const wcat_t *, glob_t *, 200 struct glob_lim *, int (*)(const char *, int)); 201 static int match(wcat_t *, wcat_t *, wcat_t *); 202 203 /* 204 * Extended glob() function, selected by #pragma redefine_extname 205 * in glob.h with the external name _glob_ext() . 206 */ 207 int 208 _glob_ext(const char *pattern, int flags, int (*errfunc)(const char *, int), 209 glob_t *pglob) 210 { 211 const char *patnext; 212 int n; 213 size_t patlen; 214 wchar_t c; 215 wcat_t *bufnext, *bufend, patbuf[PATH_MAX]; 216 struct glob_lim limit = { 0, 0, 0 }; 217 218 patnext = pattern; 219 if (!(flags & GLOB_APPEND)) { 220 pglob->gl_pathc = 0; 221 pglob->gl_pathn = 0; 222 pglob->gl_pathv = NULL; 223 if ((flags & GLOB_KEEPSTAT) != 0) 224 pglob->gl_statv = NULL; 225 if (!(flags & GLOB_DOOFFS)) 226 pglob->gl_offs = 0; 227 } 228 pglob->gl_flags = flags & ~GLOB_MAGCHAR; 229 pglob->gl_matchc = 0; 230 231 if ((patlen = strnlen(pattern, PATH_MAX)) == PATH_MAX) 232 return (GLOB_NOMATCH); 233 234 if (pglob->gl_offs >= INT_MAX || pglob->gl_pathc >= INT_MAX || 235 pglob->gl_pathc >= INT_MAX - pglob->gl_offs - 1) 236 return (GLOB_NOSPACE); 237 238 bufnext = patbuf; 239 bufend = bufnext + PATH_MAX - 1; 240 patlen += 1; 241 if (flags & GLOB_NOESCAPE) { 242 while (bufnext < bufend) { 243 if ((n = mbtowc(&c, patnext, patlen)) > 0) { 244 patnext += n; 245 patlen -= n; 246 bufnext->w_at = 0; 247 (bufnext++)->w_wc = c; 248 } else if (n == 0) { 249 break; 250 } else { 251 return (GLOB_NOMATCH); 252 } 253 } 254 } else { 255 /* Protect the quoted characters. */ 256 while (bufnext < bufend) { 257 if ((n = mbtowc(&c, patnext, patlen)) > 0) { 258 patnext += n; 259 patlen -= n; 260 if (c == QUOTE) { 261 n = mbtowc(&c, patnext, patlen); 262 if (n < 0) 263 return (GLOB_NOMATCH); 264 if (n > 0) { 265 patnext += n; 266 patlen -= n; 267 } 268 if (n == 0) 269 c = QUOTE; 270 bufnext->w_at = M_PROTECT; 271 (bufnext++)->w_wc = c; 272 } else { 273 bufnext->w_at = 0; 274 (bufnext++)->w_wc = c; 275 } 276 } else if (n == 0) { 277 break; 278 } else { 279 return (GLOB_NOMATCH); 280 } 281 } 282 } 283 bufnext->w_at = 0; 284 bufnext->w_wc = EOS; 285 286 if (flags & GLOB_BRACE) 287 return (globexp1(patbuf, pglob, &limit, errfunc)); 288 else 289 return (glob0(patbuf, pglob, &limit, errfunc)); 290 } 291 292 /* 293 * Expand recursively a glob {} pattern. When there is no more expansion 294 * invoke the standard globbing routine to glob the rest of the magic 295 * characters 296 */ 297 static int 298 globexp1(const wcat_t *pattern, glob_t *pglob, struct glob_lim *limitp, 299 int (*errfunc)(const char *, int)) 300 { 301 const wcat_t *ptr = pattern; 302 303 /* Protect a single {}, for find(1), like csh */ 304 if (pattern[0].w_wc == LBRACE && pattern[1].w_wc == RBRACE && 305 pattern[2].w_wc == EOS) 306 return (glob0(pattern, pglob, limitp, errfunc)); 307 308 if ((ptr = (const wcat_t *) g_strchr(ptr, LBRACE)) != NULL) 309 return (globexp2(ptr, pattern, pglob, limitp, errfunc)); 310 311 return (glob0(pattern, pglob, limitp, errfunc)); 312 } 313 314 315 /* 316 * Recursive brace globbing helper. Tries to expand a single brace. 317 * If it succeeds then it invokes globexp1 with the new pattern. 318 * If it fails then it tries to glob the rest of the pattern and returns. 319 */ 320 static int 321 globexp2(const wcat_t *ptr, const wcat_t *pattern, glob_t *pglob, 322 struct glob_lim *limitp, int (*errfunc)(const char *, int)) 323 { 324 int i, rv; 325 wcat_t *lm, *ls; 326 const wcat_t *pe, *pm, *pl; 327 wcat_t patbuf[PATH_MAX]; 328 329 /* copy part up to the brace */ 330 for (lm = patbuf, pm = pattern; pm != ptr; *lm++ = *pm++) 331 ; 332 lm->w_at = 0; 333 lm->w_wc = EOS; 334 ls = lm; 335 336 /* Find the balanced brace */ 337 for (i = 0, pe = ++ptr; pe->w_wc != EOS; pe++) 338 if (pe->w_wc == LBRACKET) { 339 /* Ignore everything between [] */ 340 for (pm = pe++; pe->w_wc != RBRACKET && 341 pe->w_wc != EOS; pe++) 342 ; 343 if (pe->w_wc == EOS) { 344 /* 345 * We could not find a matching RBRACKET. 346 * Ignore and just look for RBRACE 347 */ 348 pe = pm; 349 } 350 } else if (pe->w_wc == LBRACE) { 351 i++; 352 } else if (pe->w_wc == RBRACE) { 353 if (i == 0) 354 break; 355 i--; 356 } 357 358 /* Non matching braces; just glob the pattern */ 359 if (i != 0 || pe->w_wc == EOS) 360 return (glob0(patbuf, pglob, limitp, errfunc)); 361 362 for (i = 0, pl = pm = ptr; pm <= pe; pm++) { 363 switch (pm->w_wc) { 364 case LBRACKET: 365 /* Ignore everything between [] */ 366 for (pl = pm++; pm->w_wc != RBRACKET && pm->w_wc != EOS; 367 pm++) 368 ; 369 if (pm->w_wc == EOS) { 370 /* 371 * We could not find a matching RBRACKET. 372 * Ignore and just look for RBRACE 373 */ 374 pm = pl; 375 } 376 break; 377 378 case LBRACE: 379 i++; 380 break; 381 382 case RBRACE: 383 if (i) { 384 i--; 385 break; 386 } 387 /* FALLTHROUGH */ 388 case COMMA: 389 if (i && pm->w_wc == COMMA) 390 break; 391 else { 392 /* Append the current string */ 393 for (lm = ls; (pl < pm); *lm++ = *pl++) 394 ; 395 396 /* 397 * Append the rest of the pattern after the 398 * closing brace 399 */ 400 for (pl = pe + 1; 401 (*lm++ = *pl++).w_wc != EOS; /* */) 402 ; 403 404 /* Expand the current pattern */ 405 rv = globexp1(patbuf, pglob, limitp, errfunc); 406 if (rv && rv != GLOB_NOMATCH) 407 return (rv); 408 409 /* move after the comma, to the next string */ 410 pl = pm + 1; 411 } 412 break; 413 414 default: 415 break; 416 } 417 } 418 return (0); 419 } 420 421 422 423 /* 424 * expand tilde from the passwd file. 425 */ 426 static const wcat_t * 427 globtilde(const wcat_t *pattern, wcat_t *patbuf, size_t patbuf_len, 428 glob_t *pglob) 429 { 430 struct passwd *pwd; 431 char *h; 432 const wcat_t *p; 433 wcat_t *b, *eb, *q; 434 int n; 435 size_t lenh; 436 wchar_t c; 437 438 if (pattern->w_wc != TILDE || !(pglob->gl_flags & GLOB_TILDE)) 439 return (pattern); 440 441 /* Copy up to the end of the string or / */ 442 eb = &patbuf[patbuf_len - 1]; 443 for (p = pattern + 1, q = patbuf; 444 q < eb && p->w_wc != EOS && p->w_wc != SLASH; *q++ = *p++) 445 ; 446 447 q->w_at = 0; 448 q->w_wc = EOS; 449 450 /* What to do if patbuf is full? */ 451 452 if (patbuf[0].w_wc == EOS) { 453 /* 454 * handle a plain ~ or ~/ by expanding $HOME 455 * first and then trying the password file 456 */ 457 if (issetugid() != 0) 458 return (pattern); 459 if ((h = getenv("HOME")) == NULL) { 460 if ((pwd = getpwuid(getuid())) == NULL) 461 return (pattern); 462 else 463 h = pwd->pw_dir; 464 } 465 } else { 466 /* 467 * Expand a ~user 468 */ 469 if ((pwd = getpwnam((char *)patbuf)) == NULL) 470 return (pattern); 471 else 472 h = pwd->pw_dir; 473 } 474 475 /* Copy the home directory */ 476 lenh = strlen(h) + 1; 477 for (b = patbuf; b < eb && *h != EOS; b++) { 478 if ((n = mbtowc(&c, h, lenh)) > 0) { 479 h += n; 480 lenh -= n; 481 b->w_at = 0; 482 b->w_wc = c; 483 } else if (n < 0) { 484 return (pattern); 485 } else { 486 break; 487 } 488 } 489 490 /* Append the rest of the pattern */ 491 while (b < eb && (*b++ = *p++).w_wc != EOS) 492 ; 493 b->w_at = 0; 494 b->w_wc = EOS; 495 496 return (patbuf); 497 } 498 499 static int 500 g_charclass(const wcat_t **patternp, wcat_t **bufnextp) 501 { 502 const wcat_t *pattern = *patternp + 1; 503 wcat_t *bufnext = *bufnextp; 504 const wcat_t *colon; 505 char cbuf[MB_LEN_MAX + 32]; 506 wctype_t cc; 507 size_t len; 508 509 if ((colon = g_strchr(pattern, COLON)) == NULL || 510 colon[1].w_wc != RBRACKET) 511 return (1); /* not a character class */ 512 513 len = (size_t)(colon - pattern); 514 if (len + MB_LEN_MAX + 1 > sizeof (cbuf)) 515 return (-1); /* invalid character class */ 516 { 517 wchar_t w; 518 const wcat_t *s1 = pattern; 519 char *s2 = cbuf; 520 size_t n = len; 521 522 /* Copy the string. */ 523 while (n > 0) { 524 w = (s1++)->w_wc; 525 /* Character class names must be ASCII. */ 526 if (iswascii(w)) { 527 n--; 528 *s2++ = w; 529 } else { 530 return (-1); /* invalid character class */ 531 } 532 } 533 *s2 = EOS; 534 } 535 if ((cc = wctype(cbuf)) == 0) 536 return (-1); /* invalid character class */ 537 bufnext->w_at = M_QUOTE; 538 (bufnext++)->w_wc = M_CLASS; 539 bufnext->w_at = 0; 540 (bufnext++)->w_wc = cc; 541 *bufnextp = bufnext; 542 *patternp += len + 3; 543 544 return (0); 545 } 546 547 /* 548 * The main glob() routine: compiles the pattern (optionally processing 549 * quotes), calls glob1() to do the real pattern matching, and finally 550 * sorts the list (unless unsorted operation is requested). Returns 0 551 * if things went well, nonzero if errors occurred. It is not an error 552 * to find no matches. 553 */ 554 static int 555 glob0(const wcat_t *pattern, glob_t *pglob, struct glob_lim *limitp, 556 int (*errfunc)(const char *, int)) 557 { 558 const wcat_t *qpatnext; 559 int err, oldpathc; 560 wchar_t c; 561 int a; 562 wcat_t *bufnext, patbuf[PATH_MAX]; 563 564 qpatnext = globtilde(pattern, patbuf, PATH_MAX, pglob); 565 oldpathc = pglob->gl_pathc; 566 bufnext = patbuf; 567 568 /* 569 * We don't need to check for buffer overflow any more. 570 * The pattern has already been copied to an internal buffer. 571 */ 572 while ((a = qpatnext->w_at), (c = (qpatnext++)->w_wc) != EOS) { 573 switch (c) { 574 case LBRACKET: 575 if (a != 0) { 576 bufnext->w_at = a; 577 (bufnext++)->w_wc = c; 578 break; 579 } 580 a = qpatnext->w_at; 581 c = qpatnext->w_wc; 582 if (a == 0 && c == NOT) 583 ++qpatnext; 584 if (qpatnext->w_wc == EOS || 585 g_strchr(qpatnext+1, RBRACKET) == NULL) { 586 bufnext->w_at = 0; 587 (bufnext++)->w_wc = LBRACKET; 588 if (a == 0 && c == NOT) 589 --qpatnext; 590 break; 591 } 592 bufnext->w_at = M_QUOTE; 593 (bufnext++)->w_wc = M_SET; 594 if (a == 0 && c == NOT) { 595 bufnext->w_at = M_QUOTE; 596 (bufnext++)->w_wc = M_NOT; 597 } 598 a = qpatnext->w_at; 599 c = (qpatnext++)->w_wc; 600 do { 601 if (a == 0 && c == LBRACKET && 602 qpatnext->w_wc == COLON) { 603 do { 604 err = g_charclass(&qpatnext, 605 &bufnext); 606 if (err) 607 break; 608 a = qpatnext->w_at; 609 c = (qpatnext++)->w_wc; 610 } while (a == 0 && c == LBRACKET && 611 qpatnext->w_wc == COLON); 612 if (err == -1 && 613 !(pglob->gl_flags & GLOB_NOCHECK)) 614 return (GLOB_NOMATCH); 615 if (a == 0 && c == RBRACKET) 616 break; 617 } 618 bufnext->w_at = a; 619 (bufnext++)->w_wc = c; 620 if (qpatnext->w_at == 0 && 621 qpatnext->w_wc == RANGE) { 622 a = qpatnext[1].w_at; 623 c = qpatnext[1].w_wc; 624 if (qpatnext[1].w_at != 0 || 625 qpatnext[1].w_wc != RBRACKET) { 626 bufnext->w_at = M_QUOTE; 627 (bufnext++)->w_wc = M_RNG; 628 bufnext->w_at = a; 629 (bufnext++)->w_wc = c; 630 qpatnext += 2; 631 } 632 } 633 a = qpatnext->w_at; 634 c = (qpatnext++)->w_wc; 635 } while (a != 0 || c != RBRACKET); 636 pglob->gl_flags |= GLOB_MAGCHAR; 637 bufnext->w_at = M_QUOTE; 638 (bufnext++)->w_wc = M_END; 639 break; 640 case QUESTION: 641 if (a != 0) { 642 bufnext->w_at = a; 643 (bufnext++)->w_wc = c; 644 break; 645 } 646 pglob->gl_flags |= GLOB_MAGCHAR; 647 bufnext->w_at = M_QUOTE; 648 (bufnext++)->w_wc = M_ONE; 649 break; 650 case STAR: 651 if (a != 0) { 652 bufnext->w_at = a; 653 (bufnext++)->w_wc = c; 654 break; 655 } 656 pglob->gl_flags |= GLOB_MAGCHAR; 657 /* 658 * collapse adjacent stars to one, 659 * to avoid exponential behavior 660 */ 661 if (bufnext == patbuf || 662 bufnext[-1].w_at != M_QUOTE || 663 bufnext[-1].w_wc != M_ALL) { 664 bufnext->w_at = M_QUOTE; 665 (bufnext++)->w_wc = M_ALL; 666 } 667 break; 668 default: 669 bufnext->w_at = a; 670 (bufnext++)->w_wc = c; 671 break; 672 } 673 } 674 bufnext->w_at = 0; 675 bufnext->w_wc = EOS; 676 677 if ((err = glob1(patbuf, patbuf+PATH_MAX-1, pglob, limitp, errfunc)) 678 != 0) 679 return (err); 680 681 /* 682 * If there was no match we are going to append the pattern 683 * if GLOB_NOCHECK was specified or if GLOB_NOMAGIC was specified 684 * and the pattern did not contain any magic characters 685 * GLOB_NOMAGIC is there just for compatibility with csh. 686 */ 687 if (pglob->gl_pathc == oldpathc) { 688 if ((pglob->gl_flags & GLOB_NOCHECK) || 689 ((pglob->gl_flags & GLOB_NOMAGIC) && 690 !(pglob->gl_flags & GLOB_MAGCHAR))) 691 return (globextend(pattern, pglob, limitp, NULL)); 692 else 693 return (GLOB_NOMATCH); 694 } 695 if (!(pglob->gl_flags & GLOB_NOSORT)) { 696 if ((pglob->gl_flags & GLOB_KEEPSTAT)) { 697 /* Keep the paths and stat info synced during sort */ 698 struct glob_path_stat *path_stat; 699 int i; 700 int n = pglob->gl_pathc - oldpathc; 701 int o = pglob->gl_offs + oldpathc; 702 703 if ((path_stat = calloc(n, sizeof (*path_stat))) == 704 NULL) 705 return (GLOB_NOSPACE); 706 for (i = 0; i < n; i++) { 707 path_stat[i].gps_path = pglob->gl_pathv[o + i]; 708 path_stat[i].gps_stat = pglob->gl_statv[o + i]; 709 } 710 qsort(path_stat, n, sizeof (*path_stat), compare_gps); 711 for (i = 0; i < n; i++) { 712 pglob->gl_pathv[o + i] = path_stat[i].gps_path; 713 pglob->gl_statv[o + i] = path_stat[i].gps_stat; 714 } 715 free(path_stat); 716 } else { 717 qsort(pglob->gl_pathv + pglob->gl_offs + oldpathc, 718 pglob->gl_pathc - oldpathc, sizeof (char *), 719 compare); 720 } 721 } 722 return (0); 723 } 724 725 static int 726 compare(const void *p, const void *q) 727 { 728 return (strcmp(*(char **)p, *(char **)q)); 729 } 730 731 static int 732 compare_gps(const void *_p, const void *_q) 733 { 734 const struct glob_path_stat *p = (const struct glob_path_stat *)_p; 735 const struct glob_path_stat *q = (const struct glob_path_stat *)_q; 736 737 return (strcmp(p->gps_path, q->gps_path)); 738 } 739 740 static int 741 glob1(wcat_t *pattern, wcat_t *pattern_last, glob_t *pglob, 742 struct glob_lim *limitp, int (*errfunc)(const char *, int)) 743 { 744 wcat_t pathbuf[PATH_MAX]; 745 746 /* A null pathname is invalid -- POSIX 1003.1 sect. 2.4. */ 747 if (pattern->w_wc == EOS) 748 return (0); 749 return (glob2(pathbuf, pathbuf+PATH_MAX-1, 750 pathbuf, pathbuf+PATH_MAX-1, 751 pattern, pattern_last, pglob, limitp, errfunc)); 752 } 753 754 /* 755 * The functions glob2 and glob3 are mutually recursive; there is one level 756 * of recursion for each segment in the pattern that contains one or more 757 * meta characters. 758 */ 759 static int 760 glob2(wcat_t *pathbuf, wcat_t *pathbuf_last, wcat_t *pathend, 761 wcat_t *pathend_last, wcat_t *pattern, wcat_t *pattern_last, 762 glob_t *pglob, struct glob_lim *limitp, int (*errfunc)(const char *, int)) 763 { 764 struct stat sb; 765 wcat_t *p, *q; 766 int anymeta; 767 768 /* 769 * Loop over pattern segments until end of pattern or until 770 * segment with meta character found. 771 */ 772 for (anymeta = 0; ; ) { 773 if (pattern->w_wc == EOS) { /* End of pattern? */ 774 pathend->w_at = 0; 775 pathend->w_wc = EOS; 776 777 if ((pglob->gl_flags & GLOB_LIMIT) && 778 limitp->glim_stat++ >= GLOB_LIMIT_STAT) { 779 errno = 0; 780 pathend->w_at = 0; 781 (pathend++)->w_wc = SEP; 782 pathend->w_at = 0; 783 pathend->w_wc = EOS; 784 return (GLOB_NOSPACE); 785 } 786 if (g_lstat(pathbuf, &sb, pglob)) 787 return (0); 788 789 if (((pglob->gl_flags & GLOB_MARK) && 790 (pathend[-1].w_at != 0 || 791 pathend[-1].w_wc != SEP)) && 792 (S_ISDIR(sb.st_mode) || 793 (S_ISLNK(sb.st_mode) && 794 (g_stat(pathbuf, &sb, pglob) == 0) && 795 S_ISDIR(sb.st_mode)))) { 796 if (pathend+1 > pathend_last) 797 return (GLOB_NOSPACE); 798 pathend->w_at = 0; 799 (pathend++)->w_wc = SEP; 800 pathend->w_at = 0; 801 pathend->w_wc = EOS; 802 } 803 ++pglob->gl_matchc; 804 return (globextend(pathbuf, pglob, limitp, &sb)); 805 } 806 807 /* Find end of next segment, copy tentatively to pathend. */ 808 q = pathend; 809 p = pattern; 810 while (p->w_wc != EOS && p->w_wc != SEP) { 811 if (ismeta(*p)) 812 anymeta = 1; 813 if (q+1 > pathend_last) 814 return (GLOB_NOSPACE); 815 *q++ = *p++; 816 } 817 818 if (!anymeta) { /* No expansion, do next segment. */ 819 pathend = q; 820 pattern = p; 821 while (pattern->w_wc == SEP) { 822 if (pathend+1 > pathend_last) 823 return (GLOB_NOSPACE); 824 *pathend++ = *pattern++; 825 } 826 } else { 827 /* Need expansion, recurse. */ 828 return (glob3(pathbuf, pathbuf_last, pathend, 829 pathend_last, pattern, p, pattern_last, 830 pglob, limitp, errfunc)); 831 } 832 } 833 /* NOTREACHED */ 834 } 835 836 static int 837 glob3(wcat_t *pathbuf, wcat_t *pathbuf_last, wcat_t *pathend, 838 wcat_t *pathend_last, wcat_t *pattern, wcat_t *restpattern, 839 wcat_t *restpattern_last, glob_t *pglob, struct glob_lim *limitp, 840 int (*errfunc)(const char *, int)) 841 { 842 struct dirent *dp; 843 DIR *dirp; 844 int err; 845 char buf[PATH_MAX]; 846 847 /* 848 * The readdirfunc declaration can't be prototyped, because it is 849 * assigned, below, to two functions which are prototyped in glob.h 850 * and dirent.h as taking pointers to differently typed opaque 851 * structures. 852 */ 853 struct dirent *(*readdirfunc)(void *); 854 855 if (pathend > pathend_last) 856 return (GLOB_NOSPACE); 857 pathend->w_at = 0; 858 pathend->w_wc = EOS; 859 errno = 0; 860 861 if ((dirp = g_opendir(pathbuf, pglob)) == NULL) { 862 /* TODO: don't call for ENOENT or ENOTDIR? */ 863 if (errfunc) { 864 if (g_Ctoc(pathbuf, buf, sizeof (buf))) 865 return (GLOB_ABORTED); 866 if (errfunc(buf, errno) || 867 pglob->gl_flags & GLOB_ERR) 868 return (GLOB_ABORTED); 869 } 870 return (0); 871 } 872 873 err = 0; 874 875 /* Search directory for matching names. */ 876 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 877 readdirfunc = pglob->gl_readdir; 878 else 879 readdirfunc = (struct dirent *(*)(void *))readdir; 880 while ((dp = (*readdirfunc)(dirp))) { 881 char *sc; 882 wcat_t *dc; 883 int n; 884 int lensc; 885 wchar_t w; 886 887 if ((pglob->gl_flags & GLOB_LIMIT) && 888 limitp->glim_readdir++ >= GLOB_LIMIT_READDIR) { 889 errno = 0; 890 pathend->w_at = 0; 891 (pathend++)->w_wc = SEP; 892 pathend->w_at = 0; 893 pathend->w_wc = EOS; 894 err = GLOB_NOSPACE; 895 break; 896 } 897 898 /* Initial DOT must be matched literally. */ 899 if (dp->d_name[0] == DOT && pattern->w_wc != DOT) 900 continue; 901 dc = pathend; 902 sc = dp->d_name; 903 lensc = strlen(sc) + 1; 904 while (dc < pathend_last) { 905 if ((n = mbtowc(&w, sc, lensc)) <= 0) { 906 sc += 1; 907 lensc -= 1; 908 dc->w_at = 0; 909 dc->w_wc = EOS; 910 } else { 911 sc += n; 912 lensc -= n; 913 dc->w_at = 0; 914 dc->w_wc = w; 915 } 916 dc++; 917 if (n <= 0) 918 break; 919 } 920 if (dc >= pathend_last) { 921 dc->w_at = 0; 922 dc->w_wc = EOS; 923 err = GLOB_NOSPACE; 924 break; 925 } 926 if (n < 0) { 927 err = GLOB_NOMATCH; 928 break; 929 } 930 931 if (!match(pathend, pattern, restpattern)) { 932 pathend->w_at = 0; 933 pathend->w_wc = EOS; 934 continue; 935 } 936 err = glob2(pathbuf, pathbuf_last, --dc, pathend_last, 937 restpattern, restpattern_last, pglob, limitp, 938 errfunc); 939 if (err) 940 break; 941 } 942 943 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 944 (*pglob->gl_closedir)(dirp); 945 else 946 (void) closedir(dirp); 947 return (err); 948 } 949 950 951 /* 952 * Extend the gl_pathv member of a glob_t structure to accommodate a new item, 953 * add the new item, and update gl_pathc. Avoids excessive reallocation 954 * by doubling the number of elements each time. Uses gl_pathn to contain 955 * the number. 956 * 957 * Return 0 if new item added, error code if memory couldn't be allocated. 958 * 959 * Invariant of the glob_t structure: 960 * Either gl_pathc is zero and gl_pathv is NULL; or gl_pathc > 0 and 961 * gl_pathv points to (gl_offs + gl_pathc + 1) items. 962 */ 963 static int 964 globextend(const wcat_t *path, glob_t *pglob, struct glob_lim *limitp, 965 struct stat *sb) 966 { 967 char **pathv; 968 ssize_t i; 969 size_t allocn, newn, len; 970 char *copy = NULL; 971 const wcat_t *p; 972 struct stat **statv; 973 char junk[MB_LEN_MAX]; 974 int n; 975 976 allocn = pglob->gl_pathn; 977 newn = 2 + pglob->gl_pathc + pglob->gl_offs; 978 979 if (newn <= allocn) { 980 pathv = pglob->gl_pathv; 981 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0) 982 statv = pglob->gl_statv; 983 } else { 984 if (allocn == 0) 985 allocn = pglob->gl_offs + INITIAL; 986 allocn *= 2; 987 if (pglob->gl_offs >= INT_MAX || 988 pglob->gl_pathc >= INT_MAX || 989 allocn >= INT_MAX || 990 SIZE_MAX / sizeof (*pathv) <= allocn || 991 SIZE_MAX / sizeof (*statv) <= allocn) { 992 nospace: 993 for (i = pglob->gl_offs; i < (ssize_t)(newn - 2); 994 i++) { 995 if (pglob->gl_pathv && pglob->gl_pathv[i]) 996 free(pglob->gl_pathv[i]); 997 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 && 998 pglob->gl_statv && pglob->gl_statv[i]) 999 free(pglob->gl_statv[i]); 1000 } 1001 free(pglob->gl_pathv); 1002 pglob->gl_pathv = NULL; 1003 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0) { 1004 free(pglob->gl_statv); 1005 pglob->gl_statv = NULL; 1006 } 1007 return (GLOB_NOSPACE); 1008 } 1009 limitp->glim_malloc += allocn * sizeof (*pathv); 1010 pathv = reallocarray(pglob->gl_pathv, allocn, sizeof (*pathv)); 1011 if (pathv == NULL) 1012 goto nospace; 1013 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0) { 1014 limitp->glim_malloc += allocn * sizeof (*statv); 1015 statv = reallocarray(pglob->gl_statv, allocn, 1016 sizeof (*statv)); 1017 if (statv == NULL) 1018 goto nospace; 1019 } 1020 } 1021 pglob->gl_pathn = allocn; 1022 1023 if (pglob->gl_pathv == NULL && pglob->gl_offs > 0) { 1024 /* first time around -- clear initial gl_offs items */ 1025 pathv += pglob->gl_offs; 1026 for (i = pglob->gl_offs; --i >= 0; ) 1027 *--pathv = NULL; 1028 } 1029 pglob->gl_pathv = pathv; 1030 1031 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0) { 1032 if (pglob->gl_statv == NULL && pglob->gl_offs > 0) { 1033 /* first time around -- clear initial gl_offs items */ 1034 statv += pglob->gl_offs; 1035 for (i = pglob->gl_offs; --i >= 0; ) 1036 *--statv = NULL; 1037 } 1038 pglob->gl_statv = statv; 1039 if (sb == NULL) 1040 statv[pglob->gl_offs + pglob->gl_pathc] = NULL; 1041 else { 1042 limitp->glim_malloc += sizeof (**statv); 1043 if ((statv[pglob->gl_offs + pglob->gl_pathc] = 1044 malloc(sizeof (**statv))) == NULL) 1045 goto copy_error; 1046 (void) memcpy(statv[pglob->gl_offs + pglob->gl_pathc], 1047 sb, sizeof (*sb)); 1048 } 1049 statv[pglob->gl_offs + pglob->gl_pathc + 1] = NULL; 1050 } 1051 1052 len = MB_LEN_MAX; 1053 p = path; 1054 while ((n = wctomb(junk, p->w_wc)) > 0) { 1055 len += n; 1056 if ((p++)->w_wc == EOS) 1057 break; 1058 } 1059 if (n < 0) 1060 return (GLOB_NOMATCH); 1061 1062 limitp->glim_malloc += len; 1063 if ((copy = malloc(len)) != NULL) { 1064 if (g_Ctoc(path, copy, len)) { 1065 free(copy); 1066 return (GLOB_NOSPACE); 1067 } 1068 pathv[pglob->gl_offs + pglob->gl_pathc++] = copy; 1069 } 1070 pathv[pglob->gl_offs + pglob->gl_pathc] = NULL; 1071 1072 if ((pglob->gl_flags & GLOB_LIMIT) && 1073 limitp->glim_malloc >= GLOB_LIMIT_MALLOC) { 1074 errno = 0; 1075 return (GLOB_NOSPACE); 1076 } 1077 copy_error: 1078 return (copy == NULL ? GLOB_NOSPACE : 0); 1079 } 1080 1081 1082 /* 1083 * pattern matching function for filenames. Each occurrence of the * 1084 * pattern causes an iteration. 1085 * 1086 * Note, this function differs from the original as per the discussion 1087 * here: https://research.swtch.com/glob 1088 * 1089 * Basically we removed the recursion and made it use the algorithm 1090 * from Russ Cox to not go quadratic on cases like a file called 1091 * ("a" x 100) . "x" matched against a pattern like "a*a*a*a*a*a*a*y". 1092 */ 1093 static int 1094 match(wcat_t *name, wcat_t *pat, wcat_t *patend) 1095 { 1096 int ok, negate_range; 1097 wcat_t c, k; 1098 wcat_t *nextp = NULL; 1099 wcat_t *nextn = NULL; 1100 1101 loop: 1102 while (pat < patend) { 1103 c = *pat++; 1104 switch (c.w_wc) { 1105 case M_ALL: 1106 if (c.w_at != M_QUOTE) { 1107 k = *name++; 1108 if (k.w_at != c.w_at || k.w_wc != c.w_wc) 1109 return (0); 1110 break; 1111 } 1112 while (pat < patend && pat->w_at == M_QUOTE && 1113 pat->w_wc == M_ALL) 1114 pat++; /* eat consecutive '*' */ 1115 if (pat == patend) 1116 return (1); 1117 if (name->w_wc == EOS) 1118 return (0); 1119 nextn = name + 1; 1120 nextp = pat - 1; 1121 break; 1122 case M_ONE: 1123 if (c.w_at != M_QUOTE) { 1124 k = *name++; 1125 if (k.w_at != c.w_at || k.w_wc != c.w_wc) 1126 goto fail; 1127 break; 1128 } 1129 if ((name++)->w_wc == EOS) 1130 goto fail; 1131 break; 1132 case M_SET: 1133 if (c.w_at != M_QUOTE) { 1134 k = *name++; 1135 if (k.w_at != c.w_at || k.w_wc != c.w_wc) 1136 goto fail; 1137 break; 1138 } 1139 ok = 0; 1140 if ((k = *name++).w_wc == EOS) 1141 goto fail; 1142 if ((negate_range = (pat->w_at == M_QUOTE && 1143 pat->w_wc == M_NOT)) != 0) 1144 ++pat; 1145 while (((c = *pat++).w_at != M_QUOTE) || 1146 c.w_wc != M_END) { 1147 if (c.w_at == M_QUOTE && c.w_wc == M_CLASS) { 1148 wcat_t cc; 1149 1150 cc.w_at = pat->w_at; 1151 cc.w_wc = pat->w_wc; 1152 if (iswctype(k.w_wc, cc.w_wc)) 1153 ok = 1; 1154 ++pat; 1155 } 1156 if (pat->w_at == M_QUOTE && 1157 pat->w_wc == M_RNG) { 1158 if (c.w_wc <= k.w_wc && 1159 k.w_wc <= pat[1].w_wc) 1160 ok = 1; 1161 pat += 2; 1162 } else if (c.w_wc == k.w_wc) 1163 ok = 1; 1164 } 1165 if (ok == negate_range) 1166 goto fail; 1167 break; 1168 default: 1169 k = *name++; 1170 if (k.w_at != c.w_at || k.w_wc != c.w_wc) 1171 goto fail; 1172 break; 1173 } 1174 } 1175 if (name->w_wc == EOS) 1176 return (1); 1177 1178 fail: 1179 if (nextn) { 1180 pat = nextp; 1181 name = nextn; 1182 goto loop; 1183 } 1184 return (0); 1185 } 1186 1187 /* 1188 * Extended globfree() function, selected by #pragma redefine_extname 1189 * in glob.h with the external name _globfree_ext() . 1190 */ 1191 void 1192 _globfree_ext(glob_t *pglob) 1193 { 1194 int i; 1195 char **pp; 1196 1197 if (pglob->gl_pathv != NULL) { 1198 pp = pglob->gl_pathv + pglob->gl_offs; 1199 for (i = pglob->gl_pathc; i--; ++pp) 1200 free(*pp); 1201 free(pglob->gl_pathv); 1202 pglob->gl_pathv = NULL; 1203 } 1204 if ((pglob->gl_flags & GLOB_KEEPSTAT) != 0 && 1205 pglob->gl_statv != NULL) { 1206 for (i = 0; i < pglob->gl_pathc; i++) { 1207 free(pglob->gl_statv[i]); 1208 } 1209 free(pglob->gl_statv); 1210 pglob->gl_statv = NULL; 1211 } 1212 } 1213 1214 static DIR * 1215 g_opendir(wcat_t *str, glob_t *pglob) 1216 { 1217 char buf[PATH_MAX]; 1218 1219 if (str->w_wc == EOS) 1220 (void) strlcpy(buf, ".", sizeof (buf)); 1221 else { 1222 if (g_Ctoc(str, buf, sizeof (buf))) 1223 return (NULL); 1224 } 1225 1226 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 1227 return ((*pglob->gl_opendir)(buf)); 1228 1229 return (opendir(buf)); 1230 } 1231 1232 static int 1233 g_lstat(wcat_t *fn, struct stat *sb, glob_t *pglob) 1234 { 1235 char buf[PATH_MAX]; 1236 1237 if (g_Ctoc(fn, buf, sizeof (buf))) 1238 return (-1); 1239 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 1240 return ((*pglob->gl_lstat)(buf, sb)); 1241 return (lstat(buf, sb)); 1242 } 1243 1244 static int 1245 g_stat(wcat_t *fn, struct stat *sb, glob_t *pglob) 1246 { 1247 char buf[PATH_MAX]; 1248 1249 if (g_Ctoc(fn, buf, sizeof (buf))) 1250 return (-1); 1251 if (pglob->gl_flags & GLOB_ALTDIRFUNC) 1252 return ((*pglob->gl_stat)(buf, sb)); 1253 return (stat(buf, sb)); 1254 } 1255 1256 static wcat_t * 1257 g_strchr(const wcat_t *str, wchar_t ch) 1258 { 1259 do { 1260 if (str->w_at == 0 && str->w_wc == ch) 1261 return ((wcat_t *)str); 1262 } while ((str++)->w_wc != EOS); 1263 return (NULL); 1264 } 1265 1266 static int 1267 g_Ctoc(const wcat_t *str, char *buf, uint_t len) 1268 { 1269 int n; 1270 wchar_t w; 1271 1272 while (len >= MB_LEN_MAX) { 1273 w = (str++)->w_wc; 1274 if ((n = wctomb(buf, w)) > 0) { 1275 len -= n; 1276 buf += n; 1277 } 1278 if (n < 0) 1279 break; 1280 if (w == EOS) 1281 return (0); 1282 } 1283 return (1); 1284 } 1285 1286 #if defined(_LP64) || _FILE_OFFSET_BITS != 64 1287 1288 /* glob() function with legacy glob structure */ 1289 int 1290 old_glob(const char *pattern, int flags, int (*errfunc)(const char *, int), 1291 old_glob_t *pglob) 1292 { 1293 1294 glob_t gl; 1295 int rv; 1296 1297 flags &= GLOB_POSIX; 1298 1299 (void) memset(&gl, 0, sizeof (gl)); 1300 1301 /* 1302 * Copy all the members, old to new. There's 1303 * really no point in micro-optimizing the copying. 1304 * Other members are set to zero. 1305 */ 1306 gl.gl_pathc = pglob->gl_pathc; 1307 gl.gl_pathv = pglob->gl_pathv; 1308 gl.gl_offs = pglob->gl_offs; 1309 gl.gl_pathp = pglob->gl_pathp; 1310 gl.gl_pathn = pglob->gl_pathn; 1311 1312 rv = _glob_ext(pattern, flags, errfunc, &gl); 1313 1314 /* 1315 * Copy all the members, new to old. There's 1316 * really no point in micro-optimizing the copying. 1317 */ 1318 pglob->gl_pathc = gl.gl_pathc; 1319 pglob->gl_pathv = gl.gl_pathv; 1320 pglob->gl_offs = gl.gl_offs; 1321 pglob->gl_pathp = gl.gl_pathp; 1322 pglob->gl_pathn = gl.gl_pathn; 1323 1324 return (rv); 1325 } 1326 1327 /* globfree() function with legacy glob structure */ 1328 void 1329 old_globfree(old_glob_t *pglob) 1330 { 1331 glob_t gl; 1332 1333 (void) memset(&gl, 0, sizeof (gl)); 1334 1335 /* 1336 * Copy all the members, old to new. There's 1337 * really no point in micro-optimizing the copying. 1338 * Other members are set to zero. 1339 */ 1340 gl.gl_pathc = pglob->gl_pathc; 1341 gl.gl_pathv = pglob->gl_pathv; 1342 gl.gl_offs = pglob->gl_offs; 1343 gl.gl_pathp = pglob->gl_pathp; 1344 gl.gl_pathn = pglob->gl_pathn; 1345 1346 _globfree_ext(&gl); 1347 1348 /* 1349 * Copy all the members, new to old. There's 1350 * really no point in micro-optimizing the copying. 1351 */ 1352 pglob->gl_pathc = gl.gl_pathc; 1353 pglob->gl_pathv = gl.gl_pathv; 1354 pglob->gl_offs = gl.gl_offs; 1355 pglob->gl_pathp = gl.gl_pathp; 1356 pglob->gl_pathn = gl.gl_pathn; 1357 1358 } 1359 1360 #endif /* _LP64 || _FILE_OFFSET_BITS != 64 */ 1361