xref: /freebsd/usr.bin/du/du.c (revision 74a9aac40e4283503952489645476155a3593954)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Chris Newcomb.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/param.h>
36 #include <sys/queue.h>
37 #include <sys/stat.h>
38 #include <err.h>
39 #include <errno.h>
40 #include <fnmatch.h>
41 #include <fts.h>
42 #include <getopt.h>
43 #include <libutil.h>
44 #include <locale.h>
45 #include <stdint.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <stdbool.h>
49 #include <string.h>
50 #include <sysexits.h>
51 #include <unistd.h>
52 #include <libxo/xo.h>
53 
54 #define SI_OPT	(CHAR_MAX + 1)
55 
56 #define UNITS_2		1
57 #define UNITS_SI	2
58 
59 #define DU_XO_VERSION "1"
60 
61 static SLIST_HEAD(ignhead, ignentry) ignores;
62 struct ignentry {
63 	char			*mask;
64 	SLIST_ENTRY(ignentry)	next;
65 };
66 
67 static bool	check_threshold(FTSENT *);
68 static void	ignoreadd(const char *);
69 static void	ignoreclean(void);
70 static int	ignorep(FTSENT *);
71 static int	linkchk(FTSENT *);
72 static void	print_file_size(FTSENT *);
73 static void	prthumanval(const char *, int64_t);
74 static void	record_file_size(FTSENT *);
75 static void	siginfo(int __unused);
76 static void	usage(void);
77 
78 static int	nodumpflag = 0;
79 static int	Aflag, hflag;
80 static long	blocksize, cblocksize;
81 static volatile sig_atomic_t info;
82 static off_t	threshold, threshold_sign;
83 
84 static const struct option long_options[] = {
85 	{ "si", no_argument, NULL, SI_OPT },
86 	{ NULL, no_argument, NULL, 0 },
87 };
88 
89 int
main(int argc,char * argv[])90 main(int argc, char *argv[])
91 {
92 	FTS		*fts;
93 	FTSENT		*p;
94 	off_t		savednumber;
95 	int		ftsoptions;
96 	int		depth;
97 	int		Hflag, Lflag, aflag, sflag, dflag, cflag;
98 	int		lflag, ch, notused, rval;
99 	char 		**save;
100 	static char	dot[] = ".";
101 
102 	setlocale(LC_ALL, "");
103 
104 	Hflag = Lflag = aflag = sflag = dflag = cflag = lflag = Aflag = 0;
105 
106 	save = argv;
107 	ftsoptions = FTS_PHYSICAL;
108 	savednumber = 0;
109 	threshold = 0;
110 	threshold_sign = 1;
111 	cblocksize = DEV_BSIZE;
112 	blocksize = 0;
113 	depth = INT_MAX;
114 	SLIST_INIT(&ignores);
115 
116 	argc = xo_parse_args(argc, argv);
117 	if (argc < 0)
118 		exit(EX_USAGE);
119 
120 	while ((ch = getopt_long(argc, argv, "+AB:HI:LPasd:cghklmnrt:x",
121 	    long_options, NULL)) != -1)
122 		switch (ch) {
123 		case 'A':
124 			Aflag = 1;
125 			break;
126 		case 'B':
127 			errno = 0;
128 			cblocksize = atoi(optarg);
129 			if (errno == ERANGE || cblocksize <= 0) {
130 				xo_warnx("invalid argument to option B: %s",
131 				    optarg);
132 				usage();
133 			}
134 			break;
135 		case 'H':
136 			Hflag = 1;
137 			Lflag = 0;
138 			break;
139 		case 'I':
140 			ignoreadd(optarg);
141 			break;
142 		case 'L':
143 			Lflag = 1;
144 			Hflag = 0;
145 			break;
146 		case 'P':
147 			Hflag = Lflag = 0;
148 			break;
149 		case 'a':
150 			aflag = 1;
151 			break;
152 		case 's':
153 			sflag = 1;
154 			break;
155 		case 'd':
156 			dflag = 1;
157 			errno = 0;
158 			depth = atoi(optarg);
159 			if (errno == ERANGE || depth < 0) {
160 				xo_warnx("invalid argument to option d: %s",
161 				    optarg);
162 				usage();
163 			}
164 			break;
165 		case 'c':
166 			cflag = 1;
167 			break;
168 		case 'g':
169 			hflag = 0;
170 			blocksize = 1073741824;
171 			break;
172 		case 'h':
173 			hflag = UNITS_2;
174 			break;
175 		case 'k':
176 			hflag = 0;
177 			blocksize = 1024;
178 			break;
179 		case 'l':
180 			lflag = 1;
181 			break;
182 		case 'm':
183 			hflag = 0;
184 			blocksize = 1048576;
185 			break;
186 		case 'n':
187 			nodumpflag = 1;
188 			break;
189 		case 'r':		 /* Compatibility. */
190 			break;
191 		case 't':
192 			if (expand_number(optarg, &threshold) != 0 ||
193 			    threshold == 0) {
194 				xo_warnx("invalid threshold: %s", optarg);
195 				usage();
196 			} else if (threshold < 0)
197 				threshold_sign = -1;
198 			break;
199 		case 'x':
200 			ftsoptions |= FTS_XDEV;
201 			break;
202 		case SI_OPT:
203 			hflag = UNITS_SI;
204 			break;
205 		case '?':
206 		default:
207 			usage();
208 			/* NOTREACHED */
209 		}
210 
211 	argc -= optind;
212 	argv += optind;
213 
214 	/*
215 	 * XXX
216 	 * Because of the way that fts(3) works, logical walks will not count
217 	 * the blocks actually used by symbolic links.  We rationalize this by
218 	 * noting that users computing logical sizes are likely to do logical
219 	 * copies, so not counting the links is correct.  The real reason is
220 	 * that we'd have to re-implement the kernel's symbolic link traversing
221 	 * algorithm to get this right.  If, for example, you have relative
222 	 * symbolic links referencing other relative symbolic links, it gets
223 	 * very nasty, very fast.  The bottom line is that it's documented in
224 	 * the man page, so it's a feature.
225 	 */
226 
227 	if (Hflag)
228 		ftsoptions |= FTS_COMFOLLOW;
229 	if (Lflag) {
230 		ftsoptions &= ~FTS_PHYSICAL;
231 		ftsoptions |= FTS_LOGICAL;
232 	}
233 
234 	if (!Aflag && (cblocksize % DEV_BSIZE) != 0)
235 		cblocksize = howmany(cblocksize, DEV_BSIZE) * DEV_BSIZE;
236 
237 	if (aflag + dflag + sflag > 1)
238 		usage();
239 	if (sflag)
240 		depth = 0;
241 
242 	if (!*argv) {
243 		argv = save;
244 		argv[0] = dot;
245 		argv[1] = NULL;
246 	}
247 
248 	if (blocksize == 0)
249 		(void)getbsize(&notused, &blocksize);
250 
251 	if (!Aflag) {
252 		cblocksize /= DEV_BSIZE;
253 		blocksize /= DEV_BSIZE;
254 	}
255 
256 	if (threshold != 0)
257 		threshold = howmany(threshold / DEV_BSIZE * cblocksize,
258 		    blocksize);
259 
260 	rval = 0;
261 
262 	(void)signal(SIGINFO, siginfo);
263 
264 	if ((fts = fts_open(argv, ftsoptions, NULL)) == NULL)
265 		err(1, "fts_open");
266 
267 
268 	xo_set_version(DU_XO_VERSION);
269 	xo_open_container("disk-usage-information");
270 	xo_open_list("paths");
271 	while (errno = 0, (p = fts_read(fts)) != NULL) {
272 		switch (p->fts_info) {
273 		case FTS_D:			/* Ignore. */
274 			if (ignorep(p))
275 				fts_set(fts, p, FTS_SKIP);
276 			break;
277 		case FTS_DP:			/* Directory files */
278 			if (ignorep(p))
279 				break;
280 
281 			record_file_size(p);
282 
283 			if (p->fts_level <= depth && check_threshold(p))
284 				print_file_size(p);
285 
286 			if (info) {
287 				info = 0;
288 				(void)printf("\t%s\n", p->fts_path);
289 			}
290 			break;
291 		case FTS_DC:			/* Ignore. */
292 			break;
293 		case FTS_DNR:			/* Warn, continue. */
294 		case FTS_ERR:
295 		case FTS_NS:
296 			xo_warnx("%s: %s", p->fts_path, strerror(p->fts_errno));
297 			rval = 1;
298 			break;
299 		default:			/* All other files */
300 			if (ignorep(p))
301 				break;
302 
303 			if (lflag == 0 && p->fts_statp->st_nlink > 1 &&
304 			    linkchk(p))
305 				break;
306 
307 			record_file_size(p);
308 
309 			if ((aflag || p->fts_level == 0) && check_threshold(p))
310 				print_file_size(p);
311 		}
312 		savednumber = p->fts_parent->fts_number;
313 	}
314 	xo_close_list("paths");
315 
316 	if (errno)
317 		xo_err(1, "fts_read");
318 
319 	if (cflag) {
320 		if (hflag > 0) {
321 			prthumanval("{:total-blocks/%4s}\ttotal\n",
322 			    savednumber);
323 		} else {
324 			xo_emit("{:total-blocks/%jd}\ttotal\n",
325 			    (intmax_t)howmany(
326 			    savednumber * cblocksize, blocksize));
327 		}
328 	}
329 
330 	ignoreclean();
331 	xo_close_container("disk-usage-information");
332 	if (xo_finish() < 0)
333 		xo_err(1, "stdout");
334 	exit(rval);
335 }
336 
337 static int
linkchk(FTSENT * p)338 linkchk(FTSENT *p)
339 {
340 	struct links_entry {
341 		struct links_entry *next;
342 		struct links_entry *previous;
343 		int	 links;
344 		dev_t	 dev;
345 		ino_t	 ino;
346 	};
347 	static const size_t links_hash_initial_size = 8192;
348 	static struct links_entry **buckets;
349 	static struct links_entry *free_list;
350 	static size_t number_buckets;
351 	static unsigned long number_entries;
352 	static char stop_allocating;
353 	struct links_entry *le, **new_buckets;
354 	struct stat *st;
355 	size_t i, new_size;
356 	int hash;
357 
358 	st = p->fts_statp;
359 
360 	/* If necessary, initialize the hash table. */
361 	if (buckets == NULL) {
362 		number_buckets = links_hash_initial_size;
363 		buckets = malloc(number_buckets * sizeof(buckets[0]));
364 		if (buckets == NULL)
365 			errx(1, "No memory for hardlink detection");
366 		for (i = 0; i < number_buckets; i++)
367 			buckets[i] = NULL;
368 	}
369 
370 	/* If the hash table is getting too full, enlarge it. */
371 	if (number_entries > number_buckets * 10 && !stop_allocating) {
372 		new_size = number_buckets * 2;
373 		new_buckets = calloc(new_size, sizeof(struct links_entry *));
374 
375 		/* Try releasing the free list to see if that helps. */
376 		if (new_buckets == NULL && free_list != NULL) {
377 			while (free_list != NULL) {
378 				le = free_list;
379 				free_list = le->next;
380 				free(le);
381 			}
382 			new_buckets = calloc(new_size, sizeof(new_buckets[0]));
383 		}
384 
385 		if (new_buckets == NULL) {
386 			stop_allocating = 1;
387 			xo_warnx("No more memory for tracking hard links");
388 		} else {
389 			for (i = 0; i < number_buckets; i++) {
390 				while (buckets[i] != NULL) {
391 					/* Remove entry from old bucket. */
392 					le = buckets[i];
393 					buckets[i] = le->next;
394 
395 					/* Add entry to new bucket. */
396 					hash = (le->dev ^ le->ino) % new_size;
397 
398 					if (new_buckets[hash] != NULL)
399 						new_buckets[hash]->previous =
400 						    le;
401 					le->next = new_buckets[hash];
402 					le->previous = NULL;
403 					new_buckets[hash] = le;
404 				}
405 			}
406 			free(buckets);
407 			buckets = new_buckets;
408 			number_buckets = new_size;
409 		}
410 	}
411 
412 	/* Try to locate this entry in the hash table. */
413 	hash = (st->st_dev ^ st->st_ino) % number_buckets;
414 	for (le = buckets[hash]; le != NULL; le = le->next) {
415 		if (le->dev == st->st_dev && le->ino == st->st_ino) {
416 			/*
417 			 * Save memory by releasing an entry when we've seen
418 			 * all of its links.
419 			 */
420 			if (--le->links <= 0) {
421 				if (le->previous != NULL)
422 					le->previous->next = le->next;
423 				if (le->next != NULL)
424 					le->next->previous = le->previous;
425 				if (buckets[hash] == le)
426 					buckets[hash] = le->next;
427 				number_entries--;
428 				/* Recycle this node through the free list */
429 				if (stop_allocating) {
430 					free(le);
431 				} else {
432 					le->next = free_list;
433 					free_list = le;
434 				}
435 			}
436 			return (1);
437 		}
438 	}
439 
440 	if (stop_allocating)
441 		return (0);
442 
443 	/* Add this entry to the links cache. */
444 	if (free_list != NULL) {
445 		/* Pull a node from the free list if we can. */
446 		le = free_list;
447 		free_list = le->next;
448 	} else
449 		/* Malloc one if we have to. */
450 		le = malloc(sizeof(struct links_entry));
451 	if (le == NULL) {
452 		stop_allocating = 1;
453 		xo_warnx("No more memory for tracking hard links");
454 		return (0);
455 	}
456 	le->dev = st->st_dev;
457 	le->ino = st->st_ino;
458 	le->links = st->st_nlink - 1;
459 	number_entries++;
460 	le->next = buckets[hash];
461 	le->previous = NULL;
462 	if (buckets[hash] != NULL)
463 		buckets[hash]->previous = le;
464 	buckets[hash] = le;
465 	return (0);
466 }
467 
468 static void
prthumanval(const char * fmt,int64_t bytes)469 prthumanval(const char *fmt, int64_t bytes)
470 {
471 	char buf[5];
472 	int flags;
473 
474 	bytes *= cblocksize;
475 	flags = HN_B | HN_NOSPACE | HN_DECIMAL;
476 	if (!Aflag)
477 		bytes *= DEV_BSIZE;
478 	if (hflag == UNITS_SI)
479 		flags |= HN_DIVISOR_1000;
480 
481 	humanize_number(buf, sizeof(buf), bytes, "", HN_AUTOSCALE, flags);
482 
483 	xo_emit(fmt, buf);
484 }
485 
486 static void
usage(void)487 usage(void)
488 {
489 	xo_error("%s\n%s\n%s\n",
490 	    "usage: du [--libxo] [-Aclnx] [-H | -L | -P] [-g | -h | -k | -m]",
491 	    "          [-a | -s | -d depth] [-B blocksize] [-I mask] [-t threshold]",
492 	    "          [file ...]");
493 	exit(EX_USAGE);
494 }
495 
496 static void
ignoreadd(const char * mask)497 ignoreadd(const char *mask)
498 {
499 	struct ignentry *ign;
500 
501 	ign = calloc(1, sizeof(*ign));
502 	if (ign == NULL)
503 		errx(1, "cannot allocate memory");
504 	ign->mask = strdup(mask);
505 	if (ign->mask == NULL)
506 		errx(1, "cannot allocate memory");
507 	SLIST_INSERT_HEAD(&ignores, ign, next);
508 }
509 
510 static void
ignoreclean(void)511 ignoreclean(void)
512 {
513 	struct ignentry *ign;
514 
515 	while (!SLIST_EMPTY(&ignores)) {
516 		ign = SLIST_FIRST(&ignores);
517 		SLIST_REMOVE_HEAD(&ignores, next);
518 		free(ign->mask);
519 		free(ign);
520 	}
521 }
522 
523 static int
ignorep(FTSENT * ent)524 ignorep(FTSENT *ent)
525 {
526 	struct ignentry *ign;
527 
528 	if (nodumpflag && (ent->fts_statp->st_flags & UF_NODUMP))
529 		return (1);
530 	SLIST_FOREACH(ign, &ignores, next)
531 		if (fnmatch(ign->mask, ent->fts_name, 0) != FNM_NOMATCH)
532 			return (1);
533 	return (0);
534 }
535 
536 static void
siginfo(int sig __unused)537 siginfo(int sig __unused)
538 {
539 	info = 1;
540 }
541 
542 /*
543  * Record the total disk/block size of the file or directory. The fts_number
544  * variable provided in FTSENT is used for keeping track of the total size.
545  * See FTS(3).
546  */
547 static void
record_file_size(FTSENT * p)548 record_file_size(FTSENT *p)
549 {
550 	p->fts_number += Aflag ?
551 	    howmany(p->fts_statp->st_size, cblocksize) :
552 	    howmany(p->fts_statp->st_blocks, cblocksize);
553 
554 	p->fts_parent->fts_number += p->fts_number;
555 }
556 
557 static bool
check_threshold(FTSENT * p)558 check_threshold(FTSENT *p)
559 {
560 	return (threshold <= threshold_sign *
561 	    howmany(p->fts_number * cblocksize, blocksize));
562 }
563 
564 static void
print_file_size(FTSENT * p)565 print_file_size(FTSENT *p)
566 {
567 	xo_open_instance("paths");
568 	if (hflag > 0) {
569 		prthumanval("{:blocks/%4s}", p->fts_number);
570 		xo_emit("\t{:path/%s}\n", p->fts_path);
571 	} else {
572 		xo_emit("{:blocks/%jd}\t{:path/%s}\n",
573 		    (intmax_t)howmany(p->fts_number * cblocksize, blocksize),
574 		p->fts_path);
575 	}
576 	xo_close_instance("paths");
577 }
578