xref: /freebsd/usr.bin/du/du.c (revision 7d0873ebb83b19ba1e8a89e679470d885efe12e3)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Chris Newcomb.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/param.h>
36 #include <sys/queue.h>
37 #include <sys/stat.h>
38 #include <err.h>
39 #include <errno.h>
40 #include <fnmatch.h>
41 #include <fts.h>
42 #include <getopt.h>
43 #include <libutil.h>
44 #include <locale.h>
45 #include <stdint.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <sysexits.h>
50 #include <unistd.h>
51 #include <libxo/xo.h>
52 
53 #define SI_OPT	(CHAR_MAX + 1)
54 
55 #define UNITS_2		1
56 #define UNITS_SI	2
57 
58 #define DU_XO_VERSION "1"
59 
60 static SLIST_HEAD(ignhead, ignentry) ignores;
61 struct ignentry {
62 	char			*mask;
63 	SLIST_ENTRY(ignentry)	next;
64 };
65 
66 static int	linkchk(FTSENT *);
67 static void	usage(void);
68 static void	prthumanval(const char *, int64_t);
69 static void	ignoreadd(const char *);
70 static void	ignoreclean(void);
71 static int	ignorep(FTSENT *);
72 static void	siginfo(int __unused);
73 
74 static int	nodumpflag = 0;
75 static int	Aflag, hflag;
76 static long	blocksize, cblocksize;
77 static volatile sig_atomic_t info;
78 
79 static const struct option long_options[] =
80 {
81 	{ "si", no_argument, NULL, SI_OPT },
82 	{ NULL, no_argument, NULL, 0 },
83 };
84 
85 int
86 main(int argc, char *argv[])
87 {
88 	FTS		*fts;
89 	FTSENT		*p;
90 	off_t		savednumber, curblocks;
91 	off_t		threshold, threshold_sign;
92 	int		ftsoptions;
93 	int		depth;
94 	int		Hflag, Lflag, aflag, sflag, dflag, cflag;
95 	int		lflag, ch, notused, rval;
96 	char 		**save;
97 	static char	dot[] = ".";
98 
99 	setlocale(LC_ALL, "");
100 
101 	Hflag = Lflag = aflag = sflag = dflag = cflag = lflag = Aflag = 0;
102 
103 	save = argv;
104 	ftsoptions = FTS_PHYSICAL;
105 	savednumber = 0;
106 	threshold = 0;
107 	threshold_sign = 1;
108 	cblocksize = DEV_BSIZE;
109 	blocksize = 0;
110 	depth = INT_MAX;
111 	SLIST_INIT(&ignores);
112 
113 	argc = xo_parse_args(argc, argv);
114 	if (argc < 0)
115 		exit(EX_USAGE);
116 
117 	while ((ch = getopt_long(argc, argv, "+AB:HI:LPasd:cghklmnrt:x",
118 	    long_options, NULL)) != -1)
119 		switch (ch) {
120 		case 'A':
121 			Aflag = 1;
122 			break;
123 		case 'B':
124 			errno = 0;
125 			cblocksize = atoi(optarg);
126 			if (errno == ERANGE || cblocksize <= 0) {
127 				xo_warnx("invalid argument to option B: %s",
128 				    optarg);
129 				usage();
130 			}
131 			break;
132 		case 'H':
133 			Hflag = 1;
134 			Lflag = 0;
135 			break;
136 		case 'I':
137 			ignoreadd(optarg);
138 			break;
139 		case 'L':
140 			Lflag = 1;
141 			Hflag = 0;
142 			break;
143 		case 'P':
144 			Hflag = Lflag = 0;
145 			break;
146 		case 'a':
147 			aflag = 1;
148 			break;
149 		case 's':
150 			sflag = 1;
151 			break;
152 		case 'd':
153 			dflag = 1;
154 			errno = 0;
155 			depth = atoi(optarg);
156 			if (errno == ERANGE || depth < 0) {
157 				xo_warnx("invalid argument to option d: %s",
158 				    optarg);
159 				usage();
160 			}
161 			break;
162 		case 'c':
163 			cflag = 1;
164 			break;
165 		case 'g':
166 			hflag = 0;
167 			blocksize = 1073741824;
168 			break;
169 		case 'h':
170 			hflag = UNITS_2;
171 			break;
172 		case 'k':
173 			hflag = 0;
174 			blocksize = 1024;
175 			break;
176 		case 'l':
177 			lflag = 1;
178 			break;
179 		case 'm':
180 			hflag = 0;
181 			blocksize = 1048576;
182 			break;
183 		case 'n':
184 			nodumpflag = 1;
185 			break;
186 		case 'r':		 /* Compatibility. */
187 			break;
188 		case 't' :
189 			if (expand_number(optarg, &threshold) != 0 ||
190 			    threshold == 0) {
191 				xo_warnx("invalid threshold: %s", optarg);
192 				usage();
193 			} else if (threshold < 0)
194 				threshold_sign = -1;
195 			break;
196 		case 'x':
197 			ftsoptions |= FTS_XDEV;
198 			break;
199 		case SI_OPT:
200 			hflag = UNITS_SI;
201 			break;
202 		case '?':
203 		default:
204 			usage();
205 			/* NOTREACHED */
206 		}
207 
208 	argc -= optind;
209 	argv += optind;
210 
211 	/*
212 	 * XXX
213 	 * Because of the way that fts(3) works, logical walks will not count
214 	 * the blocks actually used by symbolic links.  We rationalize this by
215 	 * noting that users computing logical sizes are likely to do logical
216 	 * copies, so not counting the links is correct.  The real reason is
217 	 * that we'd have to re-implement the kernel's symbolic link traversing
218 	 * algorithm to get this right.  If, for example, you have relative
219 	 * symbolic links referencing other relative symbolic links, it gets
220 	 * very nasty, very fast.  The bottom line is that it's documented in
221 	 * the man page, so it's a feature.
222 	 */
223 
224 	if (Hflag)
225 		ftsoptions |= FTS_COMFOLLOW;
226 	if (Lflag) {
227 		ftsoptions &= ~FTS_PHYSICAL;
228 		ftsoptions |= FTS_LOGICAL;
229 	}
230 
231 	if (!Aflag && (cblocksize % DEV_BSIZE) != 0)
232 		cblocksize = howmany(cblocksize, DEV_BSIZE) * DEV_BSIZE;
233 
234 	if (aflag + dflag + sflag > 1)
235 		usage();
236 	if (sflag)
237 		depth = 0;
238 
239 	if (!*argv) {
240 		argv = save;
241 		argv[0] = dot;
242 		argv[1] = NULL;
243 	}
244 
245 	if (blocksize == 0)
246 		(void)getbsize(&notused, &blocksize);
247 
248 	if (!Aflag) {
249 		cblocksize /= DEV_BSIZE;
250 		blocksize /= DEV_BSIZE;
251 	}
252 
253 	if (threshold != 0)
254 		threshold = howmany(threshold / DEV_BSIZE * cblocksize,
255 		    blocksize);
256 
257 	rval = 0;
258 
259 	(void)signal(SIGINFO, siginfo);
260 
261 	if ((fts = fts_open(argv, ftsoptions, NULL)) == NULL)
262 		err(1, "fts_open");
263 
264 
265 	xo_set_version(DU_XO_VERSION);
266 	xo_open_container("disk-usage-information");
267 	xo_open_list("paths");
268 	while (errno = 0, (p = fts_read(fts)) != NULL) {
269 		switch (p->fts_info) {
270 		case FTS_D:			/* Ignore. */
271 			if (ignorep(p))
272 				fts_set(fts, p, FTS_SKIP);
273 			break;
274 		case FTS_DP:
275 			if (ignorep(p))
276 				break;
277 
278 			curblocks = Aflag ?
279 			    howmany(p->fts_statp->st_size, cblocksize) :
280 			    howmany(p->fts_statp->st_blocks, cblocksize);
281 			p->fts_parent->fts_bignum += p->fts_bignum +=
282 			    curblocks;
283 
284 			if (p->fts_level <= depth && threshold <=
285 			    threshold_sign * howmany(p->fts_bignum *
286 			    cblocksize, blocksize)) {
287 				xo_open_instance("paths");
288 				if (hflag > 0) {
289 					prthumanval("{:blocks/%4s}",
290 					    p->fts_bignum);
291 					xo_emit("\t{:path/%s}\n", p->fts_path);
292 				} else {
293 					xo_emit("{:blocks/%jd}\t{:path/%s}\n",
294 					    (intmax_t)howmany(p->fts_bignum *
295 					    cblocksize, blocksize),
296 					    p->fts_path);
297 				}
298 				xo_close_instance("paths");
299 			}
300 			if (info) {
301 				info = 0;
302 				(void)printf("\t%s\n", p->fts_path);
303 			}
304 			break;
305 		case FTS_DC:			/* Ignore. */
306 			break;
307 		case FTS_DNR:			/* Warn, continue. */
308 		case FTS_ERR:
309 		case FTS_NS:
310 			xo_warnx("%s: %s", p->fts_path, strerror(p->fts_errno));
311 			rval = 1;
312 			break;
313 		default:
314 			if (ignorep(p))
315 				break;
316 
317 			if (lflag == 0 && p->fts_statp->st_nlink > 1 &&
318 			    linkchk(p))
319 				break;
320 
321 			curblocks = Aflag ?
322 			    howmany(p->fts_statp->st_size, cblocksize) :
323 			    howmany(p->fts_statp->st_blocks, cblocksize);
324 
325 			if (aflag || p->fts_level == 0) {
326 				xo_open_instance("paths");
327 				if (hflag > 0) {
328 					prthumanval("{:blocks/%4s}", curblocks);
329 					xo_emit("\t{:path/%s}\n", p->fts_path);
330 				} else {
331 					xo_emit("{:blocks/%jd}\t{:path/%s}\n",
332 					    (intmax_t)howmany(curblocks *
333 					    cblocksize, blocksize),
334 					    p->fts_path);
335 				}
336 				xo_close_instance("paths");
337 			}
338 
339 			p->fts_parent->fts_bignum += curblocks;
340 		}
341 		savednumber = p->fts_parent->fts_bignum;
342 	}
343 	xo_close_list("paths");
344 
345 	if (errno)
346 		xo_err(1, "fts_read");
347 
348 	if (cflag) {
349 		if (hflag > 0) {
350 			prthumanval("{:total-blocks/%4s}\ttotal\n",
351 			    savednumber);
352 		} else {
353 			xo_emit("{:total-blocks/%jd}\ttotal\n",
354 			    (intmax_t)howmany(
355 			    savednumber * cblocksize, blocksize));
356 		}
357 	}
358 
359 	ignoreclean();
360 	xo_close_container("disk-usage-information");
361 	if (xo_finish() < 0)
362 		xo_err(1, "stdout");
363 	exit(rval);
364 }
365 
366 static int
367 linkchk(FTSENT *p)
368 {
369 	struct links_entry {
370 		struct links_entry *next;
371 		struct links_entry *previous;
372 		int	 links;
373 		dev_t	 dev;
374 		ino_t	 ino;
375 	};
376 	static const size_t links_hash_initial_size = 8192;
377 	static struct links_entry **buckets;
378 	static struct links_entry *free_list;
379 	static size_t number_buckets;
380 	static unsigned long number_entries;
381 	static char stop_allocating;
382 	struct links_entry *le, **new_buckets;
383 	struct stat *st;
384 	size_t i, new_size;
385 	int hash;
386 
387 	st = p->fts_statp;
388 
389 	/* If necessary, initialize the hash table. */
390 	if (buckets == NULL) {
391 		number_buckets = links_hash_initial_size;
392 		buckets = malloc(number_buckets * sizeof(buckets[0]));
393 		if (buckets == NULL)
394 			errx(1, "No memory for hardlink detection");
395 		for (i = 0; i < number_buckets; i++)
396 			buckets[i] = NULL;
397 	}
398 
399 	/* If the hash table is getting too full, enlarge it. */
400 	if (number_entries > number_buckets * 10 && !stop_allocating) {
401 		new_size = number_buckets * 2;
402 		new_buckets = calloc(new_size, sizeof(struct links_entry *));
403 
404 		/* Try releasing the free list to see if that helps. */
405 		if (new_buckets == NULL && free_list != NULL) {
406 			while (free_list != NULL) {
407 				le = free_list;
408 				free_list = le->next;
409 				free(le);
410 			}
411 			new_buckets = calloc(new_size, sizeof(new_buckets[0]));
412 		}
413 
414 		if (new_buckets == NULL) {
415 			stop_allocating = 1;
416 			xo_warnx("No more memory for tracking hard links");
417 		} else {
418 			for (i = 0; i < number_buckets; i++) {
419 				while (buckets[i] != NULL) {
420 					/* Remove entry from old bucket. */
421 					le = buckets[i];
422 					buckets[i] = le->next;
423 
424 					/* Add entry to new bucket. */
425 					hash = (le->dev ^ le->ino) % new_size;
426 
427 					if (new_buckets[hash] != NULL)
428 						new_buckets[hash]->previous =
429 						    le;
430 					le->next = new_buckets[hash];
431 					le->previous = NULL;
432 					new_buckets[hash] = le;
433 				}
434 			}
435 			free(buckets);
436 			buckets = new_buckets;
437 			number_buckets = new_size;
438 		}
439 	}
440 
441 	/* Try to locate this entry in the hash table. */
442 	hash = ( st->st_dev ^ st->st_ino ) % number_buckets;
443 	for (le = buckets[hash]; le != NULL; le = le->next) {
444 		if (le->dev == st->st_dev && le->ino == st->st_ino) {
445 			/*
446 			 * Save memory by releasing an entry when we've seen
447 			 * all of its links.
448 			 */
449 			if (--le->links <= 0) {
450 				if (le->previous != NULL)
451 					le->previous->next = le->next;
452 				if (le->next != NULL)
453 					le->next->previous = le->previous;
454 				if (buckets[hash] == le)
455 					buckets[hash] = le->next;
456 				number_entries--;
457 				/* Recycle this node through the free list */
458 				if (stop_allocating) {
459 					free(le);
460 				} else {
461 					le->next = free_list;
462 					free_list = le;
463 				}
464 			}
465 			return (1);
466 		}
467 	}
468 
469 	if (stop_allocating)
470 		return (0);
471 
472 	/* Add this entry to the links cache. */
473 	if (free_list != NULL) {
474 		/* Pull a node from the free list if we can. */
475 		le = free_list;
476 		free_list = le->next;
477 	} else
478 		/* Malloc one if we have to. */
479 		le = malloc(sizeof(struct links_entry));
480 	if (le == NULL) {
481 		stop_allocating = 1;
482 		xo_warnx("No more memory for tracking hard links");
483 		return (0);
484 	}
485 	le->dev = st->st_dev;
486 	le->ino = st->st_ino;
487 	le->links = st->st_nlink - 1;
488 	number_entries++;
489 	le->next = buckets[hash];
490 	le->previous = NULL;
491 	if (buckets[hash] != NULL)
492 		buckets[hash]->previous = le;
493 	buckets[hash] = le;
494 	return (0);
495 }
496 
497 static void
498 prthumanval(const char *fmt, int64_t bytes)
499 {
500 	char buf[5];
501 	int flags;
502 
503 	bytes *= cblocksize;
504 	flags = HN_B | HN_NOSPACE | HN_DECIMAL;
505 	if (!Aflag)
506 		bytes *= DEV_BSIZE;
507 	if (hflag == UNITS_SI)
508 		flags |= HN_DIVISOR_1000;
509 
510 	humanize_number(buf, sizeof(buf), bytes, "", HN_AUTOSCALE, flags);
511 
512 	xo_emit(fmt, buf);
513 }
514 
515 static void
516 usage(void)
517 {
518 	xo_error(
519 		"usage: du [-Aclnx] [-H | -L | -P] [-g | -h | -k | -m] "
520 		"[-a | -s | -d depth] [-B blocksize] [-I mask] "
521 		"[-t threshold] [file ...]\n");
522 	exit(EX_USAGE);
523 }
524 
525 static void
526 ignoreadd(const char *mask)
527 {
528 	struct ignentry *ign;
529 
530 	ign = calloc(1, sizeof(*ign));
531 	if (ign == NULL)
532 		errx(1, "cannot allocate memory");
533 	ign->mask = strdup(mask);
534 	if (ign->mask == NULL)
535 		errx(1, "cannot allocate memory");
536 	SLIST_INSERT_HEAD(&ignores, ign, next);
537 }
538 
539 static void
540 ignoreclean(void)
541 {
542 	struct ignentry *ign;
543 
544 	while (!SLIST_EMPTY(&ignores)) {
545 		ign = SLIST_FIRST(&ignores);
546 		SLIST_REMOVE_HEAD(&ignores, next);
547 		free(ign->mask);
548 		free(ign);
549 	}
550 }
551 
552 static int
553 ignorep(FTSENT *ent)
554 {
555 	struct ignentry *ign;
556 
557 	if (nodumpflag && (ent->fts_statp->st_flags & UF_NODUMP))
558 		return 1;
559 	SLIST_FOREACH(ign, &ignores, next)
560 		if (fnmatch(ign->mask, ent->fts_name, 0) != FNM_NOMATCH)
561 			return 1;
562 	return 0;
563 }
564 
565 static void
566 siginfo(int sig __unused)
567 {
568 
569 	info = 1;
570 }
571