xref: /freebsd/usr.bin/du/du.c (revision cfec995c87f39e59c80554b85625b4aaa8ddf8db)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Chris Newcomb.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/param.h>
36 #include <sys/queue.h>
37 #include <sys/stat.h>
38 
39 #include <errno.h>
40 #include <fnmatch.h>
41 #include <fts.h>
42 #include <getopt.h>
43 #include <libutil.h>
44 #include <locale.h>
45 #include <stdint.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <stdbool.h>
49 #include <string.h>
50 #include <sysexits.h>
51 #include <unistd.h>
52 #include <libxo/xo.h>
53 
54 #define SI_OPT	(CHAR_MAX + 1)
55 
56 #define UNITS_2		1
57 #define UNITS_SI	2
58 
59 #define DU_XO_VERSION "1"
60 
61 static SLIST_HEAD(ignhead, ignentry) ignores;
62 struct ignentry {
63 	char			*mask;
64 	SLIST_ENTRY(ignentry)	next;
65 };
66 
67 static bool	check_threshold(FTSENT *);
68 static void	ignoreadd(const char *);
69 static void	ignoreclean(void);
70 static bool	ignorep(FTSENT *);
71 static bool	linkchk(FTSENT *);
72 static void	print_file_size(FTSENT *);
73 static void	prthumanval(const char *, int64_t);
74 static void	record_file_size(FTSENT *);
75 static void	siginfo(int __unused);
76 static void	usage(void);
77 
78 static int	nodumpflag = 0;
79 static int	Aflag, hflag;
80 static long	blocksize, cblocksize;
81 static volatile sig_atomic_t info;
82 static off_t	threshold, threshold_sign;
83 
84 static const struct option long_options[] = {
85 	{ "si", no_argument, NULL, SI_OPT },
86 	{ NULL, no_argument, NULL, 0 },
87 };
88 
89 int
90 main(int argc, char *argv[])
91 {
92 	FTS		*fts;
93 	FTSENT		*p;
94 	int64_t		num;
95 	off_t		savednumber;
96 	int		ftsoptions;
97 	int		depth;
98 	int		Hflag, Lflag, aflag, sflag, dflag, cflag;
99 	int		lflag, ch, notused, rval;
100 	char 		**save;
101 	static char	dot[] = ".";
102 
103 	setlocale(LC_ALL, "");
104 
105 	Hflag = Lflag = aflag = sflag = dflag = cflag = lflag = Aflag = 0;
106 
107 	save = argv;
108 	ftsoptions = FTS_PHYSICAL;
109 	savednumber = 0;
110 	threshold = 0;
111 	threshold_sign = 1;
112 	cblocksize = DEV_BSIZE;
113 	blocksize = 0;
114 	depth = INT_MAX;
115 	SLIST_INIT(&ignores);
116 
117 	argc = xo_parse_args(argc, argv);
118 	if (argc < 0)
119 		exit(EX_USAGE);
120 
121 	while ((ch = getopt_long(argc, argv, "+AB:HI:LPasd:cghklmnrt:x",
122 	    long_options, NULL)) != -1)
123 		switch (ch) {
124 		case 'A':
125 			Aflag = 1;
126 			break;
127 		case 'B':
128 			errno = 0;
129 			cblocksize = atoi(optarg);
130 			if (errno == ERANGE || cblocksize <= 0) {
131 				xo_warnx("invalid argument to option B: %s",
132 				    optarg);
133 				usage();
134 			}
135 			break;
136 		case 'H':
137 			Hflag = 1;
138 			Lflag = 0;
139 			break;
140 		case 'I':
141 			ignoreadd(optarg);
142 			break;
143 		case 'L':
144 			Lflag = 1;
145 			Hflag = 0;
146 			break;
147 		case 'P':
148 			Hflag = Lflag = 0;
149 			break;
150 		case 'a':
151 			aflag = 1;
152 			break;
153 		case 's':
154 			sflag = 1;
155 			break;
156 		case 'd':
157 			dflag = 1;
158 			errno = 0;
159 			depth = atoi(optarg);
160 			if (errno == ERANGE || depth < 0) {
161 				xo_warnx("invalid argument to option d: %s",
162 				    optarg);
163 				usage();
164 			}
165 			break;
166 		case 'c':
167 			cflag = 1;
168 			break;
169 		case 'g':
170 			hflag = 0;
171 			blocksize = 1073741824;
172 			break;
173 		case 'h':
174 			hflag = UNITS_2;
175 			break;
176 		case 'k':
177 			hflag = 0;
178 			blocksize = 1024;
179 			break;
180 		case 'l':
181 			lflag = 1;
182 			break;
183 		case 'm':
184 			hflag = 0;
185 			blocksize = 1048576;
186 			break;
187 		case 'n':
188 			nodumpflag = 1;
189 			break;
190 		case 'r':		 /* Compatibility. */
191 			break;
192 		case 't':
193 			if (expand_number(optarg, &num) != 0 || num == 0) {
194 				xo_warnx("invalid threshold: %s", optarg);
195 				usage();
196 			}
197 			threshold = num;
198 			if (threshold < 0)
199 				threshold_sign = -1;
200 			break;
201 		case 'x':
202 			ftsoptions |= FTS_XDEV;
203 			break;
204 		case SI_OPT:
205 			hflag = UNITS_SI;
206 			break;
207 		case '?':
208 		default:
209 			usage();
210 			/* NOTREACHED */
211 		}
212 
213 	argc -= optind;
214 	argv += optind;
215 
216 	/*
217 	 * XXX
218 	 * Because of the way that fts(3) works, logical walks will not count
219 	 * the blocks actually used by symbolic links.  We rationalize this by
220 	 * noting that users computing logical sizes are likely to do logical
221 	 * copies, so not counting the links is correct.  The real reason is
222 	 * that we'd have to re-implement the kernel's symbolic link traversing
223 	 * algorithm to get this right.  If, for example, you have relative
224 	 * symbolic links referencing other relative symbolic links, it gets
225 	 * very nasty, very fast.  The bottom line is that it's documented in
226 	 * the man page, so it's a feature.
227 	 */
228 
229 	if (Hflag)
230 		ftsoptions |= FTS_COMFOLLOW;
231 	if (Lflag) {
232 		ftsoptions &= ~FTS_PHYSICAL;
233 		ftsoptions |= FTS_LOGICAL;
234 	}
235 
236 	if (!Aflag && (cblocksize % DEV_BSIZE) != 0)
237 		cblocksize = howmany(cblocksize, DEV_BSIZE) * DEV_BSIZE;
238 
239 	if (aflag + dflag + sflag > 1)
240 		usage();
241 	if (sflag)
242 		depth = 0;
243 
244 	if (argc == 0) {
245 		argv = save;
246 		argv[0] = dot;
247 		argv[1] = NULL;
248 	}
249 
250 	if (blocksize == 0)
251 		(void)getbsize(&notused, &blocksize);
252 
253 	if (!Aflag) {
254 		cblocksize /= DEV_BSIZE;
255 		blocksize /= DEV_BSIZE;
256 	}
257 
258 	if (threshold != 0)
259 		threshold = howmany(threshold / DEV_BSIZE * cblocksize,
260 		    blocksize);
261 
262 	rval = 0;
263 
264 	(void)signal(SIGINFO, siginfo);
265 
266 	if ((fts = fts_open(argv, ftsoptions, NULL)) == NULL)
267 		xo_err(1, "fts_open");
268 
269 	xo_set_version(DU_XO_VERSION);
270 	xo_open_container("disk-usage-information");
271 	xo_open_list("paths");
272 	for (errno = 0; (p = fts_read(fts)) != NULL; errno = 0) {
273 		switch (p->fts_info) {
274 		case FTS_D:			/* Ignore. */
275 			if (ignorep(p))
276 				fts_set(fts, p, FTS_SKIP);
277 			break;
278 		case FTS_DP:			/* Directory files */
279 			if (ignorep(p))
280 				break;
281 
282 			record_file_size(p);
283 
284 			if (p->fts_level <= depth && check_threshold(p))
285 				print_file_size(p);
286 
287 			if (info) {
288 				info = 0;
289 				(void)printf("\t%s\n", p->fts_path);
290 			}
291 			break;
292 		case FTS_DC:			/* Ignore. */
293 			break;
294 		case FTS_DNR:			/* Warn, continue. */
295 		case FTS_ERR:
296 		case FTS_NS:
297 			xo_warnx("%s: %s", p->fts_path, strerror(p->fts_errno));
298 			rval = 1;
299 			break;
300 		default:			/* All other files */
301 			if (ignorep(p))
302 				break;
303 
304 			if (lflag == 0 && p->fts_statp->st_nlink > 1 &&
305 			    linkchk(p))
306 				break;
307 
308 			record_file_size(p);
309 
310 			if ((aflag || p->fts_level == 0) && check_threshold(p))
311 				print_file_size(p);
312 		}
313 		savednumber = p->fts_parent->fts_number;
314 	}
315 	xo_close_list("paths");
316 
317 	if (errno != 0)
318 		xo_err(1, "fts_read");
319 
320 	if (cflag) {
321 		if (hflag > 0) {
322 			prthumanval("{:total-blocks/%4s}\ttotal\n",
323 			    savednumber);
324 		} else {
325 			xo_emit("{:total-blocks/%jd}\ttotal\n",
326 			    (intmax_t)howmany(
327 			    savednumber * cblocksize, blocksize));
328 		}
329 	}
330 
331 	ignoreclean();
332 	xo_close_container("disk-usage-information");
333 	if (xo_finish() < 0)
334 		xo_err(1, "stdout");
335 	exit(rval);
336 }
337 
338 static bool
339 linkchk(FTSENT *p)
340 {
341 	struct links_entry {
342 		struct links_entry *next;
343 		struct links_entry *previous;
344 		int	 links;
345 		dev_t	 dev;
346 		ino_t	 ino;
347 	};
348 	static const size_t links_hash_initial_size = 8192;
349 	static struct links_entry **buckets;
350 	static struct links_entry *free_list;
351 	static size_t number_buckets;
352 	static unsigned long number_entries;
353 	static char stop_allocating;
354 	struct links_entry *le, **new_buckets;
355 	struct stat *st;
356 	size_t i, new_size;
357 	int hash;
358 
359 	st = p->fts_statp;
360 
361 	/* If necessary, initialize the hash table. */
362 	if (buckets == NULL) {
363 		number_buckets = links_hash_initial_size;
364 		buckets = malloc(number_buckets * sizeof(buckets[0]));
365 		if (buckets == NULL)
366 			xo_errx(1, "No memory for hardlink detection");
367 		for (i = 0; i < number_buckets; i++)
368 			buckets[i] = NULL;
369 	}
370 
371 	/* If the hash table is getting too full, enlarge it. */
372 	if (number_entries > number_buckets * 10 && !stop_allocating) {
373 		new_size = number_buckets * 2;
374 		new_buckets = calloc(new_size, sizeof(struct links_entry *));
375 
376 		/* Try releasing the free list to see if that helps. */
377 		if (new_buckets == NULL && free_list != NULL) {
378 			while (free_list != NULL) {
379 				le = free_list;
380 				free_list = le->next;
381 				free(le);
382 			}
383 			new_buckets = calloc(new_size, sizeof(new_buckets[0]));
384 		}
385 
386 		if (new_buckets == NULL) {
387 			stop_allocating = 1;
388 			xo_warnx("No more memory for tracking hard links");
389 		} else {
390 			for (i = 0; i < number_buckets; i++) {
391 				while (buckets[i] != NULL) {
392 					/* Remove entry from old bucket. */
393 					le = buckets[i];
394 					buckets[i] = le->next;
395 
396 					/* Add entry to new bucket. */
397 					hash = (le->dev ^ le->ino) % new_size;
398 
399 					if (new_buckets[hash] != NULL)
400 						new_buckets[hash]->previous =
401 						    le;
402 					le->next = new_buckets[hash];
403 					le->previous = NULL;
404 					new_buckets[hash] = le;
405 				}
406 			}
407 			free(buckets);
408 			buckets = new_buckets;
409 			number_buckets = new_size;
410 		}
411 	}
412 
413 	/* Try to locate this entry in the hash table. */
414 	hash = (st->st_dev ^ st->st_ino) % number_buckets;
415 	for (le = buckets[hash]; le != NULL; le = le->next) {
416 		if (le->dev == st->st_dev && le->ino == st->st_ino) {
417 			/*
418 			 * Save memory by releasing an entry when we've seen
419 			 * all of its links.
420 			 */
421 			if (--le->links <= 0) {
422 				if (le->previous != NULL)
423 					le->previous->next = le->next;
424 				if (le->next != NULL)
425 					le->next->previous = le->previous;
426 				if (buckets[hash] == le)
427 					buckets[hash] = le->next;
428 				number_entries--;
429 				/* Recycle this node through the free list */
430 				if (stop_allocating) {
431 					free(le);
432 				} else {
433 					le->next = free_list;
434 					free_list = le;
435 				}
436 			}
437 			return (true);
438 		}
439 	}
440 
441 	if (stop_allocating)
442 		return (false);
443 
444 	/* Add this entry to the links cache. */
445 	if (free_list != NULL) {
446 		/* Pull a node from the free list if we can. */
447 		le = free_list;
448 		free_list = le->next;
449 	} else
450 		/* Malloc one if we have to. */
451 		le = malloc(sizeof(struct links_entry));
452 	if (le == NULL) {
453 		stop_allocating = 1;
454 		xo_warnx("No more memory for tracking hard links");
455 		return (false);
456 	}
457 	le->dev = st->st_dev;
458 	le->ino = st->st_ino;
459 	le->links = st->st_nlink - 1;
460 	number_entries++;
461 	le->next = buckets[hash];
462 	le->previous = NULL;
463 	if (buckets[hash] != NULL)
464 		buckets[hash]->previous = le;
465 	buckets[hash] = le;
466 	return (false);
467 }
468 
469 static void
470 prthumanval(const char *fmt, int64_t bytes)
471 {
472 	char buf[5];
473 	int flags;
474 
475 	bytes *= cblocksize;
476 	flags = HN_B | HN_NOSPACE | HN_DECIMAL;
477 	if (!Aflag)
478 		bytes *= DEV_BSIZE;
479 	if (hflag == UNITS_SI)
480 		flags |= HN_DIVISOR_1000;
481 
482 	humanize_number(buf, sizeof(buf), bytes, "", HN_AUTOSCALE, flags);
483 
484 	xo_emit(fmt, buf);
485 }
486 
487 static void
488 usage(void)
489 {
490 	xo_error("%s\n%s\n%s\n",
491 	    "usage: du [--libxo] [-Aclnx] [-H | -L | -P] [-g | -h | -k | -m]",
492 	    "          [-a | -s | -d depth] [-B blocksize] [-I mask] [-t threshold]",
493 	    "          [file ...]");
494 	exit(EX_USAGE);
495 }
496 
497 static void
498 ignoreadd(const char *mask)
499 {
500 	struct ignentry *ign;
501 
502 	ign = calloc(1, sizeof(*ign));
503 	if (ign == NULL)
504 		xo_errx(1, "cannot allocate memory");
505 	ign->mask = strdup(mask);
506 	if (ign->mask == NULL)
507 		xo_errx(1, "cannot allocate memory");
508 	SLIST_INSERT_HEAD(&ignores, ign, next);
509 }
510 
511 static void
512 ignoreclean(void)
513 {
514 	struct ignentry *ign;
515 
516 	while (!SLIST_EMPTY(&ignores)) {
517 		ign = SLIST_FIRST(&ignores);
518 		SLIST_REMOVE_HEAD(&ignores, next);
519 		free(ign->mask);
520 		free(ign);
521 	}
522 }
523 
524 static bool
525 ignorep(FTSENT *ent)
526 {
527 	struct ignentry *ign;
528 
529 	if (nodumpflag && (ent->fts_statp->st_flags & UF_NODUMP))
530 		return (true);
531 	SLIST_FOREACH(ign, &ignores, next) {
532 		if (fnmatch(ign->mask, ent->fts_name, 0) != FNM_NOMATCH)
533 			return (true);
534 	}
535 	return (false);
536 }
537 
538 static void
539 siginfo(int sig __unused)
540 {
541 	info = 1;
542 }
543 
544 /*
545  * Record the total disk/block size of the file or directory. The fts_number
546  * variable provided in FTSENT is used for keeping track of the total size.
547  * See FTS(3).
548  */
549 static void
550 record_file_size(FTSENT *p)
551 {
552 	p->fts_number += Aflag ?
553 	    howmany(p->fts_statp->st_size, cblocksize) :
554 	    howmany(p->fts_statp->st_blocks, cblocksize);
555 
556 	p->fts_parent->fts_number += p->fts_number;
557 }
558 
559 static bool
560 check_threshold(FTSENT *p)
561 {
562 	return (threshold <= threshold_sign *
563 	    howmany(p->fts_number * cblocksize, blocksize));
564 }
565 
566 static void
567 print_file_size(FTSENT *p)
568 {
569 	xo_open_instance("paths");
570 	if (hflag > 0) {
571 		prthumanval("{:blocks/%4s}", p->fts_number);
572 		xo_emit("\t{:path/%s}\n", p->fts_path);
573 	} else {
574 		xo_emit("{:blocks/%jd}\t{:path/%s}\n",
575 		    (intmax_t)howmany(p->fts_number * cblocksize, blocksize),
576 		p->fts_path);
577 	}
578 	xo_close_instance("paths");
579 }
580