1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Chris Newcomb. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include <sys/param.h> 36 #include <sys/queue.h> 37 #include <sys/stat.h> 38 #include <err.h> 39 #include <errno.h> 40 #include <fnmatch.h> 41 #include <fts.h> 42 #include <getopt.h> 43 #include <libutil.h> 44 #include <locale.h> 45 #include <stdint.h> 46 #include <stdio.h> 47 #include <stdlib.h> 48 #include <string.h> 49 #include <sysexits.h> 50 #include <unistd.h> 51 #include <libxo/xo.h> 52 53 #define SI_OPT (CHAR_MAX + 1) 54 55 #define UNITS_2 1 56 #define UNITS_SI 2 57 58 static SLIST_HEAD(ignhead, ignentry) ignores; 59 struct ignentry { 60 char *mask; 61 SLIST_ENTRY(ignentry) next; 62 }; 63 64 static int linkchk(FTSENT *); 65 static void usage(void); 66 static void prthumanval(const char *, int64_t); 67 static void ignoreadd(const char *); 68 static void ignoreclean(void); 69 static int ignorep(FTSENT *); 70 static void siginfo(int __unused); 71 72 static int nodumpflag = 0; 73 static int Aflag, hflag; 74 static long blocksize, cblocksize; 75 static volatile sig_atomic_t info; 76 77 static const struct option long_options[] = 78 { 79 { "si", no_argument, NULL, SI_OPT }, 80 { NULL, no_argument, NULL, 0 }, 81 }; 82 83 int 84 main(int argc, char *argv[]) 85 { 86 FTS *fts; 87 FTSENT *p; 88 off_t savednumber, curblocks; 89 off_t threshold, threshold_sign; 90 int ftsoptions; 91 int depth; 92 int Hflag, Lflag, aflag, sflag, dflag, cflag; 93 int lflag, ch, notused, rval; 94 char **save; 95 static char dot[] = "."; 96 97 setlocale(LC_ALL, ""); 98 99 Hflag = Lflag = aflag = sflag = dflag = cflag = lflag = Aflag = 0; 100 101 save = argv; 102 ftsoptions = FTS_PHYSICAL; 103 savednumber = 0; 104 threshold = 0; 105 threshold_sign = 1; 106 cblocksize = DEV_BSIZE; 107 blocksize = 0; 108 depth = INT_MAX; 109 SLIST_INIT(&ignores); 110 111 argc = xo_parse_args(argc, argv); 112 if (argc < 0) 113 exit(EX_USAGE); 114 115 while ((ch = getopt_long(argc, argv, "+AB:HI:LPasd:cghklmnrt:x", 116 long_options, NULL)) != -1) 117 switch (ch) { 118 case 'A': 119 Aflag = 1; 120 break; 121 case 'B': 122 errno = 0; 123 cblocksize = atoi(optarg); 124 if (errno == ERANGE || cblocksize <= 0) { 125 xo_warnx("invalid argument to option B: %s", 126 optarg); 127 usage(); 128 } 129 break; 130 case 'H': 131 Hflag = 1; 132 Lflag = 0; 133 break; 134 case 'I': 135 ignoreadd(optarg); 136 break; 137 case 'L': 138 Lflag = 1; 139 Hflag = 0; 140 break; 141 case 'P': 142 Hflag = Lflag = 0; 143 break; 144 case 'a': 145 aflag = 1; 146 break; 147 case 's': 148 sflag = 1; 149 break; 150 case 'd': 151 dflag = 1; 152 errno = 0; 153 depth = atoi(optarg); 154 if (errno == ERANGE || depth < 0) { 155 xo_warnx("invalid argument to option d: %s", 156 optarg); 157 usage(); 158 } 159 break; 160 case 'c': 161 cflag = 1; 162 break; 163 case 'g': 164 hflag = 0; 165 blocksize = 1073741824; 166 break; 167 case 'h': 168 hflag = UNITS_2; 169 break; 170 case 'k': 171 hflag = 0; 172 blocksize = 1024; 173 break; 174 case 'l': 175 lflag = 1; 176 break; 177 case 'm': 178 hflag = 0; 179 blocksize = 1048576; 180 break; 181 case 'n': 182 nodumpflag = 1; 183 break; 184 case 'r': /* Compatibility. */ 185 break; 186 case 't' : 187 if (expand_number(optarg, &threshold) != 0 || 188 threshold == 0) { 189 xo_warnx("invalid threshold: %s", optarg); 190 usage(); 191 } else if (threshold < 0) 192 threshold_sign = -1; 193 break; 194 case 'x': 195 ftsoptions |= FTS_XDEV; 196 break; 197 case SI_OPT: 198 hflag = UNITS_SI; 199 break; 200 case '?': 201 default: 202 usage(); 203 /* NOTREACHED */ 204 } 205 206 argc -= optind; 207 argv += optind; 208 209 /* 210 * XXX 211 * Because of the way that fts(3) works, logical walks will not count 212 * the blocks actually used by symbolic links. We rationalize this by 213 * noting that users computing logical sizes are likely to do logical 214 * copies, so not counting the links is correct. The real reason is 215 * that we'd have to re-implement the kernel's symbolic link traversing 216 * algorithm to get this right. If, for example, you have relative 217 * symbolic links referencing other relative symbolic links, it gets 218 * very nasty, very fast. The bottom line is that it's documented in 219 * the man page, so it's a feature. 220 */ 221 222 if (Hflag) 223 ftsoptions |= FTS_COMFOLLOW; 224 if (Lflag) { 225 ftsoptions &= ~FTS_PHYSICAL; 226 ftsoptions |= FTS_LOGICAL; 227 } 228 229 if (!Aflag && (cblocksize % DEV_BSIZE) != 0) 230 cblocksize = howmany(cblocksize, DEV_BSIZE) * DEV_BSIZE; 231 232 if (aflag + dflag + sflag > 1) 233 usage(); 234 if (sflag) 235 depth = 0; 236 237 if (!*argv) { 238 argv = save; 239 argv[0] = dot; 240 argv[1] = NULL; 241 } 242 243 if (blocksize == 0) 244 (void)getbsize(¬used, &blocksize); 245 246 if (!Aflag) { 247 cblocksize /= DEV_BSIZE; 248 blocksize /= DEV_BSIZE; 249 } 250 251 if (threshold != 0) 252 threshold = howmany(threshold / DEV_BSIZE * cblocksize, 253 blocksize); 254 255 rval = 0; 256 257 (void)signal(SIGINFO, siginfo); 258 259 if ((fts = fts_open(argv, ftsoptions, NULL)) == NULL) 260 err(1, "fts_open"); 261 262 xo_open_container("disk-usage-information"); 263 xo_open_list("paths"); 264 while (errno = 0, (p = fts_read(fts)) != NULL) { 265 switch (p->fts_info) { 266 case FTS_D: /* Ignore. */ 267 if (ignorep(p)) 268 fts_set(fts, p, FTS_SKIP); 269 break; 270 case FTS_DP: 271 if (ignorep(p)) 272 break; 273 274 curblocks = Aflag ? 275 howmany(p->fts_statp->st_size, cblocksize) : 276 howmany(p->fts_statp->st_blocks, cblocksize); 277 p->fts_parent->fts_bignum += p->fts_bignum += 278 curblocks; 279 280 if (p->fts_level <= depth && threshold <= 281 threshold_sign * howmany(p->fts_bignum * 282 cblocksize, blocksize)) { 283 xo_open_instance("paths"); 284 if (hflag > 0) { 285 prthumanval("{:blocks/%4s}", 286 p->fts_bignum); 287 xo_emit("\t{:path/%s}\n", p->fts_path); 288 } else { 289 xo_emit("{:blocks/%jd}\t{:path/%s}\n", 290 (intmax_t)howmany(p->fts_bignum * 291 cblocksize, blocksize), 292 p->fts_path); 293 } 294 xo_close_instance("paths"); 295 } 296 if (info) { 297 info = 0; 298 (void)printf("\t%s\n", p->fts_path); 299 } 300 break; 301 case FTS_DC: /* Ignore. */ 302 break; 303 case FTS_DNR: /* Warn, continue. */ 304 case FTS_ERR: 305 case FTS_NS: 306 xo_warnx("%s: %s", p->fts_path, strerror(p->fts_errno)); 307 rval = 1; 308 break; 309 default: 310 if (ignorep(p)) 311 break; 312 313 if (lflag == 0 && p->fts_statp->st_nlink > 1 && 314 linkchk(p)) 315 break; 316 317 curblocks = Aflag ? 318 howmany(p->fts_statp->st_size, cblocksize) : 319 howmany(p->fts_statp->st_blocks, cblocksize); 320 321 if (aflag || p->fts_level == 0) { 322 xo_open_instance("paths"); 323 if (hflag > 0) { 324 prthumanval("{:blocks/%4s}", curblocks); 325 xo_emit("\t{:path/%s}\n", p->fts_path); 326 } else { 327 xo_emit("{:blocks/%jd}\t{:path/%s}\n", 328 (intmax_t)howmany(curblocks * 329 cblocksize, blocksize), 330 p->fts_path); 331 } 332 xo_close_instance("paths"); 333 } 334 335 p->fts_parent->fts_bignum += curblocks; 336 } 337 savednumber = p->fts_parent->fts_bignum; 338 } 339 xo_close_list("paths"); 340 341 if (errno) 342 xo_err(1, "fts_read"); 343 344 if (cflag) { 345 if (hflag > 0) { 346 prthumanval("{:total-blocks/%4s}\ttotal\n", 347 savednumber); 348 } else { 349 xo_emit("{:total-blocks/%jd}\ttotal\n", 350 (intmax_t)howmany( 351 savednumber * cblocksize, blocksize)); 352 } 353 } 354 355 ignoreclean(); 356 xo_close_container("disk-usage-information"); 357 if (xo_finish() < 0) 358 xo_err(1, "stdout"); 359 exit(rval); 360 } 361 362 static int 363 linkchk(FTSENT *p) 364 { 365 struct links_entry { 366 struct links_entry *next; 367 struct links_entry *previous; 368 int links; 369 dev_t dev; 370 ino_t ino; 371 }; 372 static const size_t links_hash_initial_size = 8192; 373 static struct links_entry **buckets; 374 static struct links_entry *free_list; 375 static size_t number_buckets; 376 static unsigned long number_entries; 377 static char stop_allocating; 378 struct links_entry *le, **new_buckets; 379 struct stat *st; 380 size_t i, new_size; 381 int hash; 382 383 st = p->fts_statp; 384 385 /* If necessary, initialize the hash table. */ 386 if (buckets == NULL) { 387 number_buckets = links_hash_initial_size; 388 buckets = malloc(number_buckets * sizeof(buckets[0])); 389 if (buckets == NULL) 390 errx(1, "No memory for hardlink detection"); 391 for (i = 0; i < number_buckets; i++) 392 buckets[i] = NULL; 393 } 394 395 /* If the hash table is getting too full, enlarge it. */ 396 if (number_entries > number_buckets * 10 && !stop_allocating) { 397 new_size = number_buckets * 2; 398 new_buckets = calloc(new_size, sizeof(struct links_entry *)); 399 400 /* Try releasing the free list to see if that helps. */ 401 if (new_buckets == NULL && free_list != NULL) { 402 while (free_list != NULL) { 403 le = free_list; 404 free_list = le->next; 405 free(le); 406 } 407 new_buckets = calloc(new_size, sizeof(new_buckets[0])); 408 } 409 410 if (new_buckets == NULL) { 411 stop_allocating = 1; 412 xo_warnx("No more memory for tracking hard links"); 413 } else { 414 for (i = 0; i < number_buckets; i++) { 415 while (buckets[i] != NULL) { 416 /* Remove entry from old bucket. */ 417 le = buckets[i]; 418 buckets[i] = le->next; 419 420 /* Add entry to new bucket. */ 421 hash = (le->dev ^ le->ino) % new_size; 422 423 if (new_buckets[hash] != NULL) 424 new_buckets[hash]->previous = 425 le; 426 le->next = new_buckets[hash]; 427 le->previous = NULL; 428 new_buckets[hash] = le; 429 } 430 } 431 free(buckets); 432 buckets = new_buckets; 433 number_buckets = new_size; 434 } 435 } 436 437 /* Try to locate this entry in the hash table. */ 438 hash = ( st->st_dev ^ st->st_ino ) % number_buckets; 439 for (le = buckets[hash]; le != NULL; le = le->next) { 440 if (le->dev == st->st_dev && le->ino == st->st_ino) { 441 /* 442 * Save memory by releasing an entry when we've seen 443 * all of its links. 444 */ 445 if (--le->links <= 0) { 446 if (le->previous != NULL) 447 le->previous->next = le->next; 448 if (le->next != NULL) 449 le->next->previous = le->previous; 450 if (buckets[hash] == le) 451 buckets[hash] = le->next; 452 number_entries--; 453 /* Recycle this node through the free list */ 454 if (stop_allocating) { 455 free(le); 456 } else { 457 le->next = free_list; 458 free_list = le; 459 } 460 } 461 return (1); 462 } 463 } 464 465 if (stop_allocating) 466 return (0); 467 468 /* Add this entry to the links cache. */ 469 if (free_list != NULL) { 470 /* Pull a node from the free list if we can. */ 471 le = free_list; 472 free_list = le->next; 473 } else 474 /* Malloc one if we have to. */ 475 le = malloc(sizeof(struct links_entry)); 476 if (le == NULL) { 477 stop_allocating = 1; 478 xo_warnx("No more memory for tracking hard links"); 479 return (0); 480 } 481 le->dev = st->st_dev; 482 le->ino = st->st_ino; 483 le->links = st->st_nlink - 1; 484 number_entries++; 485 le->next = buckets[hash]; 486 le->previous = NULL; 487 if (buckets[hash] != NULL) 488 buckets[hash]->previous = le; 489 buckets[hash] = le; 490 return (0); 491 } 492 493 static void 494 prthumanval(const char *fmt, int64_t bytes) 495 { 496 char buf[5]; 497 int flags; 498 499 bytes *= cblocksize; 500 flags = HN_B | HN_NOSPACE | HN_DECIMAL; 501 if (!Aflag) 502 bytes *= DEV_BSIZE; 503 if (hflag == UNITS_SI) 504 flags |= HN_DIVISOR_1000; 505 506 humanize_number(buf, sizeof(buf), bytes, "", HN_AUTOSCALE, flags); 507 508 xo_emit(fmt, buf); 509 } 510 511 static void 512 usage(void) 513 { 514 xo_error( 515 "usage: du [-Aclnx] [-H | -L | -P] [-g | -h | -k | -m] " 516 "[-a | -s | -d depth] [-B blocksize] [-I mask] " 517 "[-t threshold] [file ...]\n"); 518 exit(EX_USAGE); 519 } 520 521 static void 522 ignoreadd(const char *mask) 523 { 524 struct ignentry *ign; 525 526 ign = calloc(1, sizeof(*ign)); 527 if (ign == NULL) 528 errx(1, "cannot allocate memory"); 529 ign->mask = strdup(mask); 530 if (ign->mask == NULL) 531 errx(1, "cannot allocate memory"); 532 SLIST_INSERT_HEAD(&ignores, ign, next); 533 } 534 535 static void 536 ignoreclean(void) 537 { 538 struct ignentry *ign; 539 540 while (!SLIST_EMPTY(&ignores)) { 541 ign = SLIST_FIRST(&ignores); 542 SLIST_REMOVE_HEAD(&ignores, next); 543 free(ign->mask); 544 free(ign); 545 } 546 } 547 548 static int 549 ignorep(FTSENT *ent) 550 { 551 struct ignentry *ign; 552 553 if (nodumpflag && (ent->fts_statp->st_flags & UF_NODUMP)) 554 return 1; 555 SLIST_FOREACH(ign, &ignores, next) 556 if (fnmatch(ign->mask, ent->fts_name, 0) != FNM_NOMATCH) 557 return 1; 558 return 0; 559 } 560 561 static void 562 siginfo(int sig __unused) 563 { 564 565 info = 1; 566 } 567