1 /* $Id: compat_fts.c,v 1.17 2020/06/15 01:37:14 schwarze Exp $ */ 2 /* $OpenBSD: fts.c,v 1.59 2019/06/28 13:32:41 deraadt Exp $ */ 3 4 /*- 5 * Copyright (c) 1990, 1993, 1994 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 #include "config.h" 33 34 #include <sys/stat.h> 35 #include <sys/types.h> 36 37 #include <dirent.h> 38 #include <errno.h> 39 #include <fcntl.h> 40 #include <limits.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <unistd.h> 44 #include "compat_fts.h" 45 46 #define MAXIMUM(a, b) (((a) > (b)) ? (a) : (b)) 47 48 static FTSENT *fts_alloc(FTS *, const char *, size_t); 49 static FTSENT *fts_build(FTS *); 50 static void fts_lfree(FTSENT *); 51 static void fts_load(FTS *, FTSENT *); 52 static size_t fts_maxarglen(char * const *); 53 static void fts_padjust(FTS *, FTSENT *); 54 static int fts_palloc(FTS *, size_t); 55 static FTSENT *fts_sort(FTS *, FTSENT *, int); 56 static unsigned short fts_stat(FTS *, FTSENT *); 57 58 typedef int (*qsort_compar_proto)(const void *, const void *); 59 60 #define ISDOT(a) (a[0] == '.' && (!a[1] || (a[1] == '.' && !a[2]))) 61 #ifndef O_CLOEXEC 62 #define O_CLOEXEC 0 63 #endif 64 65 #define CLR(opt) (sp->fts_options &= ~(opt)) 66 #define ISSET(opt) (sp->fts_options & (opt)) 67 #define SET(opt) (sp->fts_options |= (opt)) 68 69 FTS * 70 fts_open(char * const *argv, int options, 71 int (*compar)(const FTSENT **, const FTSENT **)) 72 { 73 FTS *sp; 74 FTSENT *p, *root; 75 int nitems; 76 FTSENT *parent, *prev; 77 78 /* Options check. */ 79 if (options & ~FTS_OPTIONMASK) { 80 errno = EINVAL; 81 return (NULL); 82 } 83 84 /* At least one path must be specified. */ 85 if (*argv == NULL) { 86 errno = EINVAL; 87 return (NULL); 88 } 89 90 /* Allocate/initialize the stream */ 91 if ((sp = calloc(1, sizeof(FTS))) == NULL) 92 return (NULL); 93 sp->fts_compar = compar; 94 sp->fts_options = options; 95 96 /* 97 * Start out with 1K of path space, and enough, in any case, 98 * to hold the user's paths. 99 */ 100 if (fts_palloc(sp, MAXIMUM(fts_maxarglen(argv), PATH_MAX))) 101 goto mem1; 102 103 /* Allocate/initialize root's parent. */ 104 if ((parent = fts_alloc(sp, "", 0)) == NULL) 105 goto mem2; 106 parent->fts_level = FTS_ROOTPARENTLEVEL; 107 108 /* Allocate/initialize root(s). */ 109 for (root = prev = NULL, nitems = 0; *argv; ++argv, ++nitems) { 110 if ((p = fts_alloc(sp, *argv, strlen(*argv))) == NULL) 111 goto mem3; 112 p->fts_level = FTS_ROOTLEVEL; 113 p->fts_parent = parent; 114 p->fts_accpath = p->fts_name; 115 p->fts_info = fts_stat(sp, p); 116 117 /* Command-line "." and ".." are real directories. */ 118 if (p->fts_info == FTS_DOT) 119 p->fts_info = FTS_D; 120 121 /* 122 * If comparison routine supplied, traverse in sorted 123 * order; otherwise traverse in the order specified. 124 */ 125 if (compar) { 126 p->fts_link = root; 127 root = p; 128 } else { 129 p->fts_link = NULL; 130 if (root == NULL) 131 root = p; 132 else 133 prev->fts_link = p; 134 prev = p; 135 } 136 } 137 if (compar && nitems > 1) 138 root = fts_sort(sp, root, nitems); 139 140 /* 141 * Allocate a dummy pointer and make fts_read think that we've just 142 * finished the node before the root(s); set p->fts_info to FTS_INIT 143 * so that everything about the "current" node is ignored. 144 */ 145 if ((sp->fts_cur = fts_alloc(sp, "", 0)) == NULL) 146 goto mem3; 147 sp->fts_cur->fts_link = root; 148 sp->fts_cur->fts_info = FTS_INIT; 149 150 if (nitems == 0) 151 free(parent); 152 153 return (sp); 154 155 mem3: fts_lfree(root); 156 free(parent); 157 mem2: free(sp->fts_path); 158 mem1: free(sp); 159 return (NULL); 160 } 161 162 static void 163 fts_load(FTS *sp, FTSENT *p) 164 { 165 size_t len; 166 char *cp; 167 168 /* 169 * Load the stream structure for the next traversal. Since we don't 170 * actually enter the directory until after the preorder visit, set 171 * the fts_accpath field specially so the chdir gets done to the right 172 * place and the user can access the first node. From fts_open it's 173 * known that the path will fit. 174 */ 175 len = p->fts_pathlen = p->fts_namelen; 176 memmove(sp->fts_path, p->fts_name, len + 1); 177 if ((cp = strrchr(p->fts_name, '/')) && (cp != p->fts_name || cp[1])) { 178 len = strlen(++cp); 179 memmove(p->fts_name, cp, len + 1); 180 p->fts_namelen = len; 181 } 182 p->fts_accpath = p->fts_path = sp->fts_path; 183 sp->fts_dev = p->fts_dev; 184 } 185 186 int 187 fts_close(FTS *sp) 188 { 189 FTSENT *freep, *p; 190 191 /* 192 * This still works if we haven't read anything -- the dummy structure 193 * points to the root list, so we step through to the end of the root 194 * list which has a valid parent pointer. 195 */ 196 if (sp->fts_cur) { 197 for (p = sp->fts_cur; p->fts_level >= FTS_ROOTLEVEL;) { 198 freep = p; 199 p = p->fts_link ? p->fts_link : p->fts_parent; 200 free(freep); 201 } 202 free(p); 203 } 204 205 /* Free up child linked list, sort array, path buffer, stream ptr.*/ 206 if (sp->fts_child) 207 fts_lfree(sp->fts_child); 208 free(sp->fts_array); 209 free(sp->fts_path); 210 free(sp); 211 212 return (0); 213 } 214 215 /* 216 * Special case of "/" at the end of the path so that slashes aren't 217 * appended which would cause paths to be written as "....//foo". 218 */ 219 #define NAPPEND(p) \ 220 (p->fts_path[p->fts_pathlen - 1] == '/' \ 221 ? p->fts_pathlen - 1 : p->fts_pathlen) 222 223 FTSENT * 224 fts_read(FTS *sp) 225 { 226 FTSENT *p, *tmp; 227 int instr; 228 char *t; 229 230 /* If finished or unrecoverable error, return NULL. */ 231 if (sp->fts_cur == NULL || ISSET(FTS_STOP)) 232 return (NULL); 233 234 /* Set current node pointer. */ 235 p = sp->fts_cur; 236 237 /* Save and zero out user instructions. */ 238 instr = p->fts_instr; 239 p->fts_instr = FTS_NOINSTR; 240 241 /* Directory in pre-order. */ 242 if (p->fts_info == FTS_D) { 243 /* If skipped or crossed mount point, do post-order visit. */ 244 if (instr == FTS_SKIP || 245 (ISSET(FTS_XDEV) && p->fts_dev != sp->fts_dev)) { 246 if (sp->fts_child) { 247 fts_lfree(sp->fts_child); 248 sp->fts_child = NULL; 249 } 250 p->fts_info = FTS_DP; 251 return (p); 252 } 253 254 /* 255 * If haven't read do so. If the read fails, fts_build sets 256 * FTS_STOP or the fts_info field of the node. 257 */ 258 if (sp->fts_child) { 259 /* nothing */ 260 } else if ((sp->fts_child = fts_build(sp)) == NULL) { 261 if (ISSET(FTS_STOP)) 262 return (NULL); 263 return (p); 264 } 265 p = sp->fts_child; 266 sp->fts_child = NULL; 267 goto name; 268 } 269 270 /* Move to the next node on this level. */ 271 next: tmp = p; 272 if ((p = p->fts_link)) { 273 free(tmp); 274 275 /* 276 * If reached the top, return to the original directory (or 277 * the root of the tree), and load the paths for the next root. 278 */ 279 if (p->fts_level == FTS_ROOTLEVEL) { 280 fts_load(sp, p); 281 return (sp->fts_cur = p); 282 } 283 284 /* 285 * User may have called fts_set on the node. If skipped, 286 * ignore. If followed, get a file descriptor so we can 287 * get back if necessary. 288 */ 289 if (p->fts_instr == FTS_SKIP) 290 goto next; 291 292 name: t = sp->fts_path + NAPPEND(p->fts_parent); 293 *t++ = '/'; 294 memmove(t, p->fts_name, p->fts_namelen + 1); 295 return (sp->fts_cur = p); 296 } 297 298 /* Move up to the parent node. */ 299 p = tmp->fts_parent; 300 free(tmp); 301 302 if (p->fts_level == FTS_ROOTPARENTLEVEL) { 303 /* 304 * Done; free everything up and set errno to 0 so the user 305 * can distinguish between error and EOF. 306 */ 307 free(p); 308 errno = 0; 309 return (sp->fts_cur = NULL); 310 } 311 312 /* NUL terminate the pathname. */ 313 sp->fts_path[p->fts_pathlen] = '\0'; 314 315 p->fts_info = p->fts_errno ? FTS_ERR : FTS_DP; 316 return (sp->fts_cur = p); 317 } 318 319 /* 320 * Fts_set takes the stream as an argument although it's not used in this 321 * implementation; it would be necessary if anyone wanted to add global 322 * semantics to fts using fts_set. An error return is allowed for similar 323 * reasons. 324 */ 325 int 326 fts_set(FTS *sp, FTSENT *p, int instr) 327 { 328 if (instr && instr != FTS_NOINSTR && instr != FTS_SKIP) { 329 errno = EINVAL; 330 return (1); 331 } 332 p->fts_instr = instr; 333 return (0); 334 } 335 336 /* 337 * This is the tricky part -- do not casually change *anything* in here. The 338 * idea is to build the linked list of entries that are used by fts_children 339 * and fts_read. There are lots of special cases. 340 * 341 * The real slowdown in walking the tree is the stat calls. If FTS_NOSTAT is 342 * set and it's a physical walk (so that symbolic links can't be directories), 343 * we can do things quickly. First, if it's a 4.4BSD file system, the type 344 * of the file is in the directory entry. Otherwise, we assume that the number 345 * of subdirectories in a node is equal to the number of links to the parent. 346 * The former skips all stat calls. The latter skips stat calls in any leaf 347 * directories and for any files after the subdirectories in the directory have 348 * been found, cutting the stat calls by about 2/3. 349 */ 350 static FTSENT * 351 fts_build(FTS *sp) 352 { 353 struct dirent *dp; 354 FTSENT *p, *head; 355 FTSENT *cur, *tail; 356 DIR *dirp; 357 void *oldaddr; 358 size_t dlen, len, maxlen; 359 int nitems, level, doadjust; 360 int saved_errno; 361 char *cp; 362 363 /* Set current node pointer. */ 364 cur = sp->fts_cur; 365 366 /* 367 * Open the directory for reading. If this fails, we're done. 368 * If being called from fts_read, set the fts_info field. 369 */ 370 if ((dirp = opendir(cur->fts_accpath)) == NULL) { 371 cur->fts_info = FTS_DNR; 372 cur->fts_errno = errno; 373 return (NULL); 374 } 375 376 /* 377 * Figure out the max file name length that can be stored in the 378 * current path -- the inner loop allocates more path as necessary. 379 * We really wouldn't have to do the maxlen calculations here, we 380 * could do them in fts_read before returning the path, but it's a 381 * lot easier here since the length is part of the dirent structure. 382 * 383 * If not changing directories set a pointer so that can just append 384 * each new name into the path. 385 */ 386 len = NAPPEND(cur); 387 cp = sp->fts_path + len; 388 *cp++ = '/'; 389 len++; 390 maxlen = sp->fts_pathlen - len; 391 392 /* 393 * fts_level is signed so we must prevent it from wrapping 394 * around to FTS_ROOTLEVEL and FTS_ROOTPARENTLEVEL. 395 */ 396 level = cur->fts_level; 397 if (level < FTS_MAXLEVEL) 398 level++; 399 400 /* Read the directory, attaching each entry to the `link' pointer. */ 401 doadjust = 0; 402 for (head = tail = NULL, nitems = 0; dirp && (dp = readdir(dirp));) { 403 if (ISDOT(dp->d_name)) 404 continue; 405 406 #if HAVE_DIRENT_NAMLEN 407 dlen = dp->d_namlen; 408 #else 409 dlen = strlen(dp->d_name); 410 #endif 411 412 if (!(p = fts_alloc(sp, dp->d_name, dlen))) 413 goto mem1; 414 if (dlen >= maxlen) { /* include space for NUL */ 415 oldaddr = sp->fts_path; 416 if (fts_palloc(sp, dlen + len + 1)) { 417 /* 418 * No more memory for path or structures. Save 419 * errno, free up the current structure and the 420 * structures already allocated. 421 */ 422 mem1: saved_errno = errno; 423 free(p); 424 fts_lfree(head); 425 (void)closedir(dirp); 426 cur->fts_info = FTS_ERR; 427 SET(FTS_STOP); 428 errno = saved_errno; 429 return (NULL); 430 } 431 /* Did realloc() change the pointer? */ 432 if (oldaddr != sp->fts_path) { 433 doadjust = 1; 434 cp = sp->fts_path + len; 435 } 436 maxlen = sp->fts_pathlen - len; 437 } 438 439 p->fts_level = level; 440 p->fts_parent = sp->fts_cur; 441 p->fts_pathlen = len + dlen; 442 if (p->fts_pathlen < len) { 443 /* 444 * If we wrap, free up the current structure and 445 * the structures already allocated, then error 446 * out with ENAMETOOLONG. 447 */ 448 free(p); 449 fts_lfree(head); 450 (void)closedir(dirp); 451 cur->fts_info = FTS_ERR; 452 SET(FTS_STOP); 453 errno = ENAMETOOLONG; 454 return (NULL); 455 } 456 457 /* Build a file name for fts_stat to stat. */ 458 p->fts_accpath = p->fts_path; 459 memmove(cp, p->fts_name, p->fts_namelen + 1); 460 /* Stat it. */ 461 p->fts_info = fts_stat(sp, p); 462 463 /* We walk in directory order so "ls -f" doesn't get upset. */ 464 p->fts_link = NULL; 465 if (head == NULL) 466 head = tail = p; 467 else { 468 tail->fts_link = p; 469 tail = p; 470 } 471 ++nitems; 472 } 473 if (dirp) 474 (void)closedir(dirp); 475 476 /* 477 * If realloc() changed the address of the path, adjust the 478 * addresses for the rest of the tree and the dir list. 479 */ 480 if (doadjust) 481 fts_padjust(sp, head); 482 483 /* 484 * If not changing directories, reset the path back to original 485 * state. 486 */ 487 if (len == sp->fts_pathlen || nitems == 0) 488 --cp; 489 *cp = '\0'; 490 491 /* If didn't find anything, return NULL. */ 492 if (!nitems) { 493 cur->fts_info = FTS_DP; 494 return (NULL); 495 } 496 497 /* Sort the entries. */ 498 if (sp->fts_compar && nitems > 1) 499 head = fts_sort(sp, head, nitems); 500 return (head); 501 } 502 503 static unsigned short 504 fts_stat(FTS *sp, FTSENT *p) 505 { 506 FTSENT *t; 507 dev_t dev; 508 ino_t ino; 509 struct stat *sbp; 510 511 /* If user needs stat info, stat buffer already allocated. */ 512 sbp = p->fts_statp; 513 514 if (lstat(p->fts_accpath, sbp)) { 515 p->fts_errno = errno; 516 memset(sbp, 0, sizeof(struct stat)); 517 return (FTS_NS); 518 } 519 520 if (S_ISDIR(sbp->st_mode)) { 521 /* 522 * Set the device/inode. Used to find cycles and check for 523 * crossing mount points. Also remember the link count, used 524 * in fts_build to limit the number of stat calls. It is 525 * understood that these fields are only referenced if fts_info 526 * is set to FTS_D. 527 */ 528 dev = p->fts_dev = sbp->st_dev; 529 ino = p->fts_ino = sbp->st_ino; 530 p->fts_nlink = sbp->st_nlink; 531 532 if (ISDOT(p->fts_name)) 533 return (FTS_DOT); 534 535 /* 536 * Cycle detection is done by brute force when the directory 537 * is first encountered. If the tree gets deep enough or the 538 * number of symbolic links to directories is high enough, 539 * something faster might be worthwhile. 540 */ 541 for (t = p->fts_parent; 542 t->fts_level >= FTS_ROOTLEVEL; t = t->fts_parent) 543 if (ino == t->fts_ino && dev == t->fts_dev) { 544 p->fts_cycle = t; 545 return (FTS_DC); 546 } 547 return (FTS_D); 548 } 549 if (S_ISLNK(sbp->st_mode)) 550 return (FTS_SL); 551 if (S_ISREG(sbp->st_mode)) 552 return (FTS_F); 553 return (FTS_DEFAULT); 554 } 555 556 static FTSENT * 557 fts_sort(FTS *sp, FTSENT *head, int nitems) 558 { 559 FTSENT **ap, *p; 560 561 /* 562 * Construct an array of pointers to the structures and call qsort(3). 563 * Reassemble the array in the order returned by qsort. If unable to 564 * sort for memory reasons, return the directory entries in their 565 * current order. Allocate enough space for the current needs plus 566 * 40 so don't realloc one entry at a time. 567 */ 568 if (nitems > sp->fts_nitems) { 569 struct _ftsent **a; 570 571 if ((a = reallocarray(sp->fts_array, 572 nitems + 40, sizeof(FTSENT *))) == NULL) { 573 free(sp->fts_array); 574 sp->fts_array = NULL; 575 sp->fts_nitems = 0; 576 return (head); 577 } 578 sp->fts_nitems = nitems + 40; 579 sp->fts_array = a; 580 } 581 for (ap = sp->fts_array, p = head; p; p = p->fts_link) 582 *ap++ = p; 583 qsort(sp->fts_array, nitems, sizeof(FTSENT *), 584 (qsort_compar_proto)sp->fts_compar); 585 for (head = *(ap = sp->fts_array); --nitems; ++ap) 586 ap[0]->fts_link = ap[1]; 587 ap[0]->fts_link = NULL; 588 return (head); 589 } 590 591 static FTSENT * 592 fts_alloc(FTS *sp, const char *name, size_t namelen) 593 { 594 FTSENT *p; 595 size_t len; 596 597 len = sizeof(FTSENT) + namelen; 598 if ((p = calloc(1, len)) == NULL) 599 return (NULL); 600 601 p->fts_path = sp->fts_path; 602 p->fts_namelen = namelen; 603 p->fts_instr = FTS_NOINSTR; 604 p->fts_statp = malloc(sizeof(struct stat)); 605 if (p->fts_statp == NULL) { 606 free(p); 607 return (NULL); 608 } 609 memcpy(p->fts_name, name, namelen); 610 611 return (p); 612 } 613 614 static void 615 fts_lfree(FTSENT *head) 616 { 617 FTSENT *p; 618 619 /* Free a linked list of structures. */ 620 while ((p = head)) { 621 head = head->fts_link; 622 free(p); 623 } 624 } 625 626 /* 627 * Allow essentially unlimited paths; find, rm, ls should all work on any tree. 628 * Most systems will allow creation of paths much longer than PATH_MAX, even 629 * though the kernel won't resolve them. Add the size (not just what's needed) 630 * plus 256 bytes so don't realloc the path 2 bytes at a time. 631 */ 632 static int 633 fts_palloc(FTS *sp, size_t more) 634 { 635 char *p; 636 637 /* 638 * Check for possible wraparound. 639 */ 640 more += 256; 641 if (sp->fts_pathlen + more < sp->fts_pathlen) { 642 free(sp->fts_path); 643 sp->fts_path = NULL; 644 errno = ENAMETOOLONG; 645 return (1); 646 } 647 p = recallocarray(sp->fts_path, sp->fts_pathlen, 648 sp->fts_pathlen + more, 1); 649 if (p == NULL) { 650 free(sp->fts_path); 651 sp->fts_path = NULL; 652 return (1); 653 } 654 sp->fts_pathlen += more; 655 sp->fts_path = p; 656 return (0); 657 } 658 659 /* 660 * When the path is realloc'd, have to fix all of the pointers in structures 661 * already returned. 662 */ 663 static void 664 fts_padjust(FTS *sp, FTSENT *head) 665 { 666 FTSENT *p; 667 char *addr = sp->fts_path; 668 669 #define ADJUST(p) { \ 670 if ((p)->fts_accpath != (p)->fts_name) { \ 671 (p)->fts_accpath = \ 672 (char *)addr + ((p)->fts_accpath - (p)->fts_path); \ 673 } \ 674 (p)->fts_path = addr; \ 675 } 676 /* Adjust the current set of children. */ 677 for (p = sp->fts_child; p; p = p->fts_link) 678 ADJUST(p); 679 680 /* Adjust the rest of the tree, including the current level. */ 681 for (p = head; p->fts_level >= FTS_ROOTLEVEL;) { 682 ADJUST(p); 683 p = p->fts_link ? p->fts_link : p->fts_parent; 684 } 685 } 686 687 static size_t 688 fts_maxarglen(char * const *argv) 689 { 690 size_t len, max; 691 692 for (max = 0; *argv; ++argv) 693 if ((len = strlen(*argv)) > max) 694 max = len; 695 return (max + 1); 696 } 697