1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1989, 1993, 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Chris Newcomb.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 #include <sys/param.h>
36 #include <sys/queue.h>
37 #include <sys/stat.h>
38 #include <err.h>
39 #include <errno.h>
40 #include <fnmatch.h>
41 #include <fts.h>
42 #include <getopt.h>
43 #include <libutil.h>
44 #include <locale.h>
45 #include <stdint.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <stdbool.h>
49 #include <string.h>
50 #include <sysexits.h>
51 #include <unistd.h>
52 #include <libxo/xo.h>
53
54 #define SI_OPT (CHAR_MAX + 1)
55
56 #define UNITS_2 1
57 #define UNITS_SI 2
58
59 #define DU_XO_VERSION "1"
60
61 static SLIST_HEAD(ignhead, ignentry) ignores;
62 struct ignentry {
63 char *mask;
64 SLIST_ENTRY(ignentry) next;
65 };
66
67 static bool check_threshold(FTSENT *);
68 static void ignoreadd(const char *);
69 static void ignoreclean(void);
70 static int ignorep(FTSENT *);
71 static int linkchk(FTSENT *);
72 static void print_file_size(FTSENT *);
73 static void prthumanval(const char *, int64_t);
74 static void record_file_size(FTSENT *);
75 static void siginfo(int __unused);
76 static void usage(void);
77
78 static int nodumpflag = 0;
79 static int Aflag, hflag;
80 static long blocksize, cblocksize;
81 static volatile sig_atomic_t info;
82 static off_t threshold, threshold_sign;
83
84 static const struct option long_options[] = {
85 { "si", no_argument, NULL, SI_OPT },
86 { NULL, no_argument, NULL, 0 },
87 };
88
89 int
main(int argc,char * argv[])90 main(int argc, char *argv[])
91 {
92 FTS *fts;
93 FTSENT *p;
94 off_t savednumber;
95 int ftsoptions;
96 int depth;
97 int Hflag, Lflag, aflag, sflag, dflag, cflag;
98 int lflag, ch, notused, rval;
99 char **save;
100 static char dot[] = ".";
101
102 setlocale(LC_ALL, "");
103
104 Hflag = Lflag = aflag = sflag = dflag = cflag = lflag = Aflag = 0;
105
106 save = argv;
107 ftsoptions = FTS_PHYSICAL;
108 savednumber = 0;
109 threshold = 0;
110 threshold_sign = 1;
111 cblocksize = DEV_BSIZE;
112 blocksize = 0;
113 depth = INT_MAX;
114 SLIST_INIT(&ignores);
115
116 argc = xo_parse_args(argc, argv);
117 if (argc < 0)
118 exit(EX_USAGE);
119
120 while ((ch = getopt_long(argc, argv, "+AB:HI:LPasd:cghklmnrt:x",
121 long_options, NULL)) != -1)
122 switch (ch) {
123 case 'A':
124 Aflag = 1;
125 break;
126 case 'B':
127 errno = 0;
128 cblocksize = atoi(optarg);
129 if (errno == ERANGE || cblocksize <= 0) {
130 xo_warnx("invalid argument to option B: %s",
131 optarg);
132 usage();
133 }
134 break;
135 case 'H':
136 Hflag = 1;
137 Lflag = 0;
138 break;
139 case 'I':
140 ignoreadd(optarg);
141 break;
142 case 'L':
143 Lflag = 1;
144 Hflag = 0;
145 break;
146 case 'P':
147 Hflag = Lflag = 0;
148 break;
149 case 'a':
150 aflag = 1;
151 break;
152 case 's':
153 sflag = 1;
154 break;
155 case 'd':
156 dflag = 1;
157 errno = 0;
158 depth = atoi(optarg);
159 if (errno == ERANGE || depth < 0) {
160 xo_warnx("invalid argument to option d: %s",
161 optarg);
162 usage();
163 }
164 break;
165 case 'c':
166 cflag = 1;
167 break;
168 case 'g':
169 hflag = 0;
170 blocksize = 1073741824;
171 break;
172 case 'h':
173 hflag = UNITS_2;
174 break;
175 case 'k':
176 hflag = 0;
177 blocksize = 1024;
178 break;
179 case 'l':
180 lflag = 1;
181 break;
182 case 'm':
183 hflag = 0;
184 blocksize = 1048576;
185 break;
186 case 'n':
187 nodumpflag = 1;
188 break;
189 case 'r': /* Compatibility. */
190 break;
191 case 't':
192 if (expand_number(optarg, &threshold) != 0 ||
193 threshold == 0) {
194 xo_warnx("invalid threshold: %s", optarg);
195 usage();
196 } else if (threshold < 0)
197 threshold_sign = -1;
198 break;
199 case 'x':
200 ftsoptions |= FTS_XDEV;
201 break;
202 case SI_OPT:
203 hflag = UNITS_SI;
204 break;
205 case '?':
206 default:
207 usage();
208 /* NOTREACHED */
209 }
210
211 argc -= optind;
212 argv += optind;
213
214 /*
215 * XXX
216 * Because of the way that fts(3) works, logical walks will not count
217 * the blocks actually used by symbolic links. We rationalize this by
218 * noting that users computing logical sizes are likely to do logical
219 * copies, so not counting the links is correct. The real reason is
220 * that we'd have to re-implement the kernel's symbolic link traversing
221 * algorithm to get this right. If, for example, you have relative
222 * symbolic links referencing other relative symbolic links, it gets
223 * very nasty, very fast. The bottom line is that it's documented in
224 * the man page, so it's a feature.
225 */
226
227 if (Hflag)
228 ftsoptions |= FTS_COMFOLLOW;
229 if (Lflag) {
230 ftsoptions &= ~FTS_PHYSICAL;
231 ftsoptions |= FTS_LOGICAL;
232 }
233
234 if (!Aflag && (cblocksize % DEV_BSIZE) != 0)
235 cblocksize = howmany(cblocksize, DEV_BSIZE) * DEV_BSIZE;
236
237 if (aflag + dflag + sflag > 1)
238 usage();
239 if (sflag)
240 depth = 0;
241
242 if (!*argv) {
243 argv = save;
244 argv[0] = dot;
245 argv[1] = NULL;
246 }
247
248 if (blocksize == 0)
249 (void)getbsize(¬used, &blocksize);
250
251 if (!Aflag) {
252 cblocksize /= DEV_BSIZE;
253 blocksize /= DEV_BSIZE;
254 }
255
256 if (threshold != 0)
257 threshold = howmany(threshold / DEV_BSIZE * cblocksize,
258 blocksize);
259
260 rval = 0;
261
262 (void)signal(SIGINFO, siginfo);
263
264 if ((fts = fts_open(argv, ftsoptions, NULL)) == NULL)
265 err(1, "fts_open");
266
267
268 xo_set_version(DU_XO_VERSION);
269 xo_open_container("disk-usage-information");
270 xo_open_list("paths");
271 while (errno = 0, (p = fts_read(fts)) != NULL) {
272 switch (p->fts_info) {
273 case FTS_D: /* Ignore. */
274 if (ignorep(p))
275 fts_set(fts, p, FTS_SKIP);
276 break;
277 case FTS_DP: /* Directory files */
278 if (ignorep(p))
279 break;
280
281 record_file_size(p);
282
283 if (p->fts_level <= depth && check_threshold(p))
284 print_file_size(p);
285
286 if (info) {
287 info = 0;
288 (void)printf("\t%s\n", p->fts_path);
289 }
290 break;
291 case FTS_DC: /* Ignore. */
292 break;
293 case FTS_DNR: /* Warn, continue. */
294 case FTS_ERR:
295 case FTS_NS:
296 xo_warnx("%s: %s", p->fts_path, strerror(p->fts_errno));
297 rval = 1;
298 break;
299 default: /* All other files */
300 if (ignorep(p))
301 break;
302
303 if (lflag == 0 && p->fts_statp->st_nlink > 1 &&
304 linkchk(p))
305 break;
306
307 record_file_size(p);
308
309 if ((aflag || p->fts_level == 0) && check_threshold(p))
310 print_file_size(p);
311 }
312 savednumber = p->fts_parent->fts_number;
313 }
314 xo_close_list("paths");
315
316 if (errno)
317 xo_err(1, "fts_read");
318
319 if (cflag) {
320 if (hflag > 0) {
321 prthumanval("{:total-blocks/%4s}\ttotal\n",
322 savednumber);
323 } else {
324 xo_emit("{:total-blocks/%jd}\ttotal\n",
325 (intmax_t)howmany(
326 savednumber * cblocksize, blocksize));
327 }
328 }
329
330 ignoreclean();
331 xo_close_container("disk-usage-information");
332 if (xo_finish() < 0)
333 xo_err(1, "stdout");
334 exit(rval);
335 }
336
337 static int
linkchk(FTSENT * p)338 linkchk(FTSENT *p)
339 {
340 struct links_entry {
341 struct links_entry *next;
342 struct links_entry *previous;
343 int links;
344 dev_t dev;
345 ino_t ino;
346 };
347 static const size_t links_hash_initial_size = 8192;
348 static struct links_entry **buckets;
349 static struct links_entry *free_list;
350 static size_t number_buckets;
351 static unsigned long number_entries;
352 static char stop_allocating;
353 struct links_entry *le, **new_buckets;
354 struct stat *st;
355 size_t i, new_size;
356 int hash;
357
358 st = p->fts_statp;
359
360 /* If necessary, initialize the hash table. */
361 if (buckets == NULL) {
362 number_buckets = links_hash_initial_size;
363 buckets = malloc(number_buckets * sizeof(buckets[0]));
364 if (buckets == NULL)
365 errx(1, "No memory for hardlink detection");
366 for (i = 0; i < number_buckets; i++)
367 buckets[i] = NULL;
368 }
369
370 /* If the hash table is getting too full, enlarge it. */
371 if (number_entries > number_buckets * 10 && !stop_allocating) {
372 new_size = number_buckets * 2;
373 new_buckets = calloc(new_size, sizeof(struct links_entry *));
374
375 /* Try releasing the free list to see if that helps. */
376 if (new_buckets == NULL && free_list != NULL) {
377 while (free_list != NULL) {
378 le = free_list;
379 free_list = le->next;
380 free(le);
381 }
382 new_buckets = calloc(new_size, sizeof(new_buckets[0]));
383 }
384
385 if (new_buckets == NULL) {
386 stop_allocating = 1;
387 xo_warnx("No more memory for tracking hard links");
388 } else {
389 for (i = 0; i < number_buckets; i++) {
390 while (buckets[i] != NULL) {
391 /* Remove entry from old bucket. */
392 le = buckets[i];
393 buckets[i] = le->next;
394
395 /* Add entry to new bucket. */
396 hash = (le->dev ^ le->ino) % new_size;
397
398 if (new_buckets[hash] != NULL)
399 new_buckets[hash]->previous =
400 le;
401 le->next = new_buckets[hash];
402 le->previous = NULL;
403 new_buckets[hash] = le;
404 }
405 }
406 free(buckets);
407 buckets = new_buckets;
408 number_buckets = new_size;
409 }
410 }
411
412 /* Try to locate this entry in the hash table. */
413 hash = (st->st_dev ^ st->st_ino) % number_buckets;
414 for (le = buckets[hash]; le != NULL; le = le->next) {
415 if (le->dev == st->st_dev && le->ino == st->st_ino) {
416 /*
417 * Save memory by releasing an entry when we've seen
418 * all of its links.
419 */
420 if (--le->links <= 0) {
421 if (le->previous != NULL)
422 le->previous->next = le->next;
423 if (le->next != NULL)
424 le->next->previous = le->previous;
425 if (buckets[hash] == le)
426 buckets[hash] = le->next;
427 number_entries--;
428 /* Recycle this node through the free list */
429 if (stop_allocating) {
430 free(le);
431 } else {
432 le->next = free_list;
433 free_list = le;
434 }
435 }
436 return (1);
437 }
438 }
439
440 if (stop_allocating)
441 return (0);
442
443 /* Add this entry to the links cache. */
444 if (free_list != NULL) {
445 /* Pull a node from the free list if we can. */
446 le = free_list;
447 free_list = le->next;
448 } else
449 /* Malloc one if we have to. */
450 le = malloc(sizeof(struct links_entry));
451 if (le == NULL) {
452 stop_allocating = 1;
453 xo_warnx("No more memory for tracking hard links");
454 return (0);
455 }
456 le->dev = st->st_dev;
457 le->ino = st->st_ino;
458 le->links = st->st_nlink - 1;
459 number_entries++;
460 le->next = buckets[hash];
461 le->previous = NULL;
462 if (buckets[hash] != NULL)
463 buckets[hash]->previous = le;
464 buckets[hash] = le;
465 return (0);
466 }
467
468 static void
prthumanval(const char * fmt,int64_t bytes)469 prthumanval(const char *fmt, int64_t bytes)
470 {
471 char buf[5];
472 int flags;
473
474 bytes *= cblocksize;
475 flags = HN_B | HN_NOSPACE | HN_DECIMAL;
476 if (!Aflag)
477 bytes *= DEV_BSIZE;
478 if (hflag == UNITS_SI)
479 flags |= HN_DIVISOR_1000;
480
481 humanize_number(buf, sizeof(buf), bytes, "", HN_AUTOSCALE, flags);
482
483 xo_emit(fmt, buf);
484 }
485
486 static void
usage(void)487 usage(void)
488 {
489 xo_error("%s\n%s\n%s\n",
490 "usage: du [--libxo] [-Aclnx] [-H | -L | -P] [-g | -h | -k | -m]",
491 " [-a | -s | -d depth] [-B blocksize] [-I mask] [-t threshold]",
492 " [file ...]");
493 exit(EX_USAGE);
494 }
495
496 static void
ignoreadd(const char * mask)497 ignoreadd(const char *mask)
498 {
499 struct ignentry *ign;
500
501 ign = calloc(1, sizeof(*ign));
502 if (ign == NULL)
503 errx(1, "cannot allocate memory");
504 ign->mask = strdup(mask);
505 if (ign->mask == NULL)
506 errx(1, "cannot allocate memory");
507 SLIST_INSERT_HEAD(&ignores, ign, next);
508 }
509
510 static void
ignoreclean(void)511 ignoreclean(void)
512 {
513 struct ignentry *ign;
514
515 while (!SLIST_EMPTY(&ignores)) {
516 ign = SLIST_FIRST(&ignores);
517 SLIST_REMOVE_HEAD(&ignores, next);
518 free(ign->mask);
519 free(ign);
520 }
521 }
522
523 static int
ignorep(FTSENT * ent)524 ignorep(FTSENT *ent)
525 {
526 struct ignentry *ign;
527
528 if (nodumpflag && (ent->fts_statp->st_flags & UF_NODUMP))
529 return (1);
530 SLIST_FOREACH(ign, &ignores, next)
531 if (fnmatch(ign->mask, ent->fts_name, 0) != FNM_NOMATCH)
532 return (1);
533 return (0);
534 }
535
536 static void
siginfo(int sig __unused)537 siginfo(int sig __unused)
538 {
539 info = 1;
540 }
541
542 /*
543 * Record the total disk/block size of the file or directory. The fts_number
544 * variable provided in FTSENT is used for keeping track of the total size.
545 * See FTS(3).
546 */
547 static void
record_file_size(FTSENT * p)548 record_file_size(FTSENT *p)
549 {
550 p->fts_number += Aflag ?
551 howmany(p->fts_statp->st_size, cblocksize) :
552 howmany(p->fts_statp->st_blocks, cblocksize);
553
554 p->fts_parent->fts_number += p->fts_number;
555 }
556
557 static bool
check_threshold(FTSENT * p)558 check_threshold(FTSENT *p)
559 {
560 return (threshold <= threshold_sign *
561 howmany(p->fts_number * cblocksize, blocksize));
562 }
563
564 static void
print_file_size(FTSENT * p)565 print_file_size(FTSENT *p)
566 {
567 xo_open_instance("paths");
568 if (hflag > 0) {
569 prthumanval("{:blocks/%4s}", p->fts_number);
570 xo_emit("\t{:path/%s}\n", p->fts_path);
571 } else {
572 xo_emit("{:blocks/%jd}\t{:path/%s}\n",
573 (intmax_t)howmany(p->fts_number * cblocksize, blocksize),
574 p->fts_path);
575 }
576 xo_close_instance("paths");
577 }
578