xref: /freebsd/usr.bin/du/du.c (revision 5b56413d04e608379c9a306373554a8e4d321bc0)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1989, 1993, 1994
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Chris Newcomb.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/param.h>
36 #include <sys/queue.h>
37 #include <sys/stat.h>
38 #include <err.h>
39 #include <errno.h>
40 #include <fnmatch.h>
41 #include <fts.h>
42 #include <getopt.h>
43 #include <libutil.h>
44 #include <locale.h>
45 #include <stdint.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <sysexits.h>
50 #include <unistd.h>
51 #include <libxo/xo.h>
52 
53 #define SI_OPT	(CHAR_MAX + 1)
54 
55 #define UNITS_2		1
56 #define UNITS_SI	2
57 
58 static SLIST_HEAD(ignhead, ignentry) ignores;
59 struct ignentry {
60 	char			*mask;
61 	SLIST_ENTRY(ignentry)	next;
62 };
63 
64 static int	linkchk(FTSENT *);
65 static void	usage(void);
66 static void	prthumanval(const char *, int64_t);
67 static void	ignoreadd(const char *);
68 static void	ignoreclean(void);
69 static int	ignorep(FTSENT *);
70 static void	siginfo(int __unused);
71 
72 static int	nodumpflag = 0;
73 static int	Aflag, hflag;
74 static long	blocksize, cblocksize;
75 static volatile sig_atomic_t info;
76 
77 static const struct option long_options[] =
78 {
79 	{ "si", no_argument, NULL, SI_OPT },
80 	{ NULL, no_argument, NULL, 0 },
81 };
82 
83 int
84 main(int argc, char *argv[])
85 {
86 	FTS		*fts;
87 	FTSENT		*p;
88 	off_t		savednumber, curblocks;
89 	off_t		threshold, threshold_sign;
90 	int		ftsoptions;
91 	int		depth;
92 	int		Hflag, Lflag, aflag, sflag, dflag, cflag;
93 	int		lflag, ch, notused, rval;
94 	char 		**save;
95 	static char	dot[] = ".";
96 
97 	setlocale(LC_ALL, "");
98 
99 	Hflag = Lflag = aflag = sflag = dflag = cflag = lflag = Aflag = 0;
100 
101 	save = argv;
102 	ftsoptions = FTS_PHYSICAL;
103 	savednumber = 0;
104 	threshold = 0;
105 	threshold_sign = 1;
106 	cblocksize = DEV_BSIZE;
107 	blocksize = 0;
108 	depth = INT_MAX;
109 	SLIST_INIT(&ignores);
110 
111 	argc = xo_parse_args(argc, argv);
112 	if (argc < 0)
113 		exit(EX_USAGE);
114 
115 	while ((ch = getopt_long(argc, argv, "+AB:HI:LPasd:cghklmnrt:x",
116 	    long_options, NULL)) != -1)
117 		switch (ch) {
118 		case 'A':
119 			Aflag = 1;
120 			break;
121 		case 'B':
122 			errno = 0;
123 			cblocksize = atoi(optarg);
124 			if (errno == ERANGE || cblocksize <= 0) {
125 				xo_warnx("invalid argument to option B: %s",
126 				    optarg);
127 				usage();
128 			}
129 			break;
130 		case 'H':
131 			Hflag = 1;
132 			Lflag = 0;
133 			break;
134 		case 'I':
135 			ignoreadd(optarg);
136 			break;
137 		case 'L':
138 			Lflag = 1;
139 			Hflag = 0;
140 			break;
141 		case 'P':
142 			Hflag = Lflag = 0;
143 			break;
144 		case 'a':
145 			aflag = 1;
146 			break;
147 		case 's':
148 			sflag = 1;
149 			break;
150 		case 'd':
151 			dflag = 1;
152 			errno = 0;
153 			depth = atoi(optarg);
154 			if (errno == ERANGE || depth < 0) {
155 				xo_warnx("invalid argument to option d: %s",
156 				    optarg);
157 				usage();
158 			}
159 			break;
160 		case 'c':
161 			cflag = 1;
162 			break;
163 		case 'g':
164 			hflag = 0;
165 			blocksize = 1073741824;
166 			break;
167 		case 'h':
168 			hflag = UNITS_2;
169 			break;
170 		case 'k':
171 			hflag = 0;
172 			blocksize = 1024;
173 			break;
174 		case 'l':
175 			lflag = 1;
176 			break;
177 		case 'm':
178 			hflag = 0;
179 			blocksize = 1048576;
180 			break;
181 		case 'n':
182 			nodumpflag = 1;
183 			break;
184 		case 'r':		 /* Compatibility. */
185 			break;
186 		case 't' :
187 			if (expand_number(optarg, &threshold) != 0 ||
188 			    threshold == 0) {
189 				xo_warnx("invalid threshold: %s", optarg);
190 				usage();
191 			} else if (threshold < 0)
192 				threshold_sign = -1;
193 			break;
194 		case 'x':
195 			ftsoptions |= FTS_XDEV;
196 			break;
197 		case SI_OPT:
198 			hflag = UNITS_SI;
199 			break;
200 		case '?':
201 		default:
202 			usage();
203 			/* NOTREACHED */
204 		}
205 
206 	argc -= optind;
207 	argv += optind;
208 
209 	/*
210 	 * XXX
211 	 * Because of the way that fts(3) works, logical walks will not count
212 	 * the blocks actually used by symbolic links.  We rationalize this by
213 	 * noting that users computing logical sizes are likely to do logical
214 	 * copies, so not counting the links is correct.  The real reason is
215 	 * that we'd have to re-implement the kernel's symbolic link traversing
216 	 * algorithm to get this right.  If, for example, you have relative
217 	 * symbolic links referencing other relative symbolic links, it gets
218 	 * very nasty, very fast.  The bottom line is that it's documented in
219 	 * the man page, so it's a feature.
220 	 */
221 
222 	if (Hflag)
223 		ftsoptions |= FTS_COMFOLLOW;
224 	if (Lflag) {
225 		ftsoptions &= ~FTS_PHYSICAL;
226 		ftsoptions |= FTS_LOGICAL;
227 	}
228 
229 	if (!Aflag && (cblocksize % DEV_BSIZE) != 0)
230 		cblocksize = howmany(cblocksize, DEV_BSIZE) * DEV_BSIZE;
231 
232 	if (aflag + dflag + sflag > 1)
233 		usage();
234 	if (sflag)
235 		depth = 0;
236 
237 	if (!*argv) {
238 		argv = save;
239 		argv[0] = dot;
240 		argv[1] = NULL;
241 	}
242 
243 	if (blocksize == 0)
244 		(void)getbsize(&notused, &blocksize);
245 
246 	if (!Aflag) {
247 		cblocksize /= DEV_BSIZE;
248 		blocksize /= DEV_BSIZE;
249 	}
250 
251 	if (threshold != 0)
252 		threshold = howmany(threshold / DEV_BSIZE * cblocksize,
253 		    blocksize);
254 
255 	rval = 0;
256 
257 	(void)signal(SIGINFO, siginfo);
258 
259 	if ((fts = fts_open(argv, ftsoptions, NULL)) == NULL)
260 		err(1, "fts_open");
261 
262 	xo_open_container("disk-usage-information");
263 	xo_open_list("paths");
264 	while (errno = 0, (p = fts_read(fts)) != NULL) {
265 		switch (p->fts_info) {
266 		case FTS_D:			/* Ignore. */
267 			if (ignorep(p))
268 				fts_set(fts, p, FTS_SKIP);
269 			break;
270 		case FTS_DP:
271 			if (ignorep(p))
272 				break;
273 
274 			curblocks = Aflag ?
275 			    howmany(p->fts_statp->st_size, cblocksize) :
276 			    howmany(p->fts_statp->st_blocks, cblocksize);
277 			p->fts_parent->fts_bignum += p->fts_bignum +=
278 			    curblocks;
279 
280 			if (p->fts_level <= depth && threshold <=
281 			    threshold_sign * howmany(p->fts_bignum *
282 			    cblocksize, blocksize)) {
283 				xo_open_instance("paths");
284 				if (hflag > 0) {
285 					prthumanval("{:blocks/%4s}",
286 					    p->fts_bignum);
287 					xo_emit("\t{:path/%s}\n", p->fts_path);
288 				} else {
289 					xo_emit("{:blocks/%jd}\t{:path/%s}\n",
290 					    (intmax_t)howmany(p->fts_bignum *
291 					    cblocksize, blocksize),
292 					    p->fts_path);
293 				}
294 				xo_close_instance("paths");
295 			}
296 			if (info) {
297 				info = 0;
298 				(void)printf("\t%s\n", p->fts_path);
299 			}
300 			break;
301 		case FTS_DC:			/* Ignore. */
302 			break;
303 		case FTS_DNR:			/* Warn, continue. */
304 		case FTS_ERR:
305 		case FTS_NS:
306 			xo_warnx("%s: %s", p->fts_path, strerror(p->fts_errno));
307 			rval = 1;
308 			break;
309 		default:
310 			if (ignorep(p))
311 				break;
312 
313 			if (lflag == 0 && p->fts_statp->st_nlink > 1 &&
314 			    linkchk(p))
315 				break;
316 
317 			curblocks = Aflag ?
318 			    howmany(p->fts_statp->st_size, cblocksize) :
319 			    howmany(p->fts_statp->st_blocks, cblocksize);
320 
321 			if (aflag || p->fts_level == 0) {
322 				xo_open_instance("paths");
323 				if (hflag > 0) {
324 					prthumanval("{:blocks/%4s}", curblocks);
325 					xo_emit("\t{:path/%s}\n", p->fts_path);
326 				} else {
327 					xo_emit("{:blocks/%jd}\t{:path/%s}\n",
328 					    (intmax_t)howmany(curblocks *
329 					    cblocksize, blocksize),
330 					    p->fts_path);
331 				}
332 				xo_close_instance("paths");
333 			}
334 
335 			p->fts_parent->fts_bignum += curblocks;
336 		}
337 		savednumber = p->fts_parent->fts_bignum;
338 	}
339 	xo_close_list("paths");
340 
341 	if (errno)
342 		xo_err(1, "fts_read");
343 
344 	if (cflag) {
345 		if (hflag > 0) {
346 			prthumanval("{:total-blocks/%4s}\ttotal\n",
347 			    savednumber);
348 		} else {
349 			xo_emit("{:total-blocks/%jd}\ttotal\n",
350 			    (intmax_t)howmany(
351 			    savednumber * cblocksize, blocksize));
352 		}
353 	}
354 
355 	ignoreclean();
356 	xo_close_container("disk-usage-information");
357 	if (xo_finish() < 0)
358 		xo_err(1, "stdout");
359 	exit(rval);
360 }
361 
362 static int
363 linkchk(FTSENT *p)
364 {
365 	struct links_entry {
366 		struct links_entry *next;
367 		struct links_entry *previous;
368 		int	 links;
369 		dev_t	 dev;
370 		ino_t	 ino;
371 	};
372 	static const size_t links_hash_initial_size = 8192;
373 	static struct links_entry **buckets;
374 	static struct links_entry *free_list;
375 	static size_t number_buckets;
376 	static unsigned long number_entries;
377 	static char stop_allocating;
378 	struct links_entry *le, **new_buckets;
379 	struct stat *st;
380 	size_t i, new_size;
381 	int hash;
382 
383 	st = p->fts_statp;
384 
385 	/* If necessary, initialize the hash table. */
386 	if (buckets == NULL) {
387 		number_buckets = links_hash_initial_size;
388 		buckets = malloc(number_buckets * sizeof(buckets[0]));
389 		if (buckets == NULL)
390 			errx(1, "No memory for hardlink detection");
391 		for (i = 0; i < number_buckets; i++)
392 			buckets[i] = NULL;
393 	}
394 
395 	/* If the hash table is getting too full, enlarge it. */
396 	if (number_entries > number_buckets * 10 && !stop_allocating) {
397 		new_size = number_buckets * 2;
398 		new_buckets = calloc(new_size, sizeof(struct links_entry *));
399 
400 		/* Try releasing the free list to see if that helps. */
401 		if (new_buckets == NULL && free_list != NULL) {
402 			while (free_list != NULL) {
403 				le = free_list;
404 				free_list = le->next;
405 				free(le);
406 			}
407 			new_buckets = calloc(new_size, sizeof(new_buckets[0]));
408 		}
409 
410 		if (new_buckets == NULL) {
411 			stop_allocating = 1;
412 			xo_warnx("No more memory for tracking hard links");
413 		} else {
414 			for (i = 0; i < number_buckets; i++) {
415 				while (buckets[i] != NULL) {
416 					/* Remove entry from old bucket. */
417 					le = buckets[i];
418 					buckets[i] = le->next;
419 
420 					/* Add entry to new bucket. */
421 					hash = (le->dev ^ le->ino) % new_size;
422 
423 					if (new_buckets[hash] != NULL)
424 						new_buckets[hash]->previous =
425 						    le;
426 					le->next = new_buckets[hash];
427 					le->previous = NULL;
428 					new_buckets[hash] = le;
429 				}
430 			}
431 			free(buckets);
432 			buckets = new_buckets;
433 			number_buckets = new_size;
434 		}
435 	}
436 
437 	/* Try to locate this entry in the hash table. */
438 	hash = ( st->st_dev ^ st->st_ino ) % number_buckets;
439 	for (le = buckets[hash]; le != NULL; le = le->next) {
440 		if (le->dev == st->st_dev && le->ino == st->st_ino) {
441 			/*
442 			 * Save memory by releasing an entry when we've seen
443 			 * all of its links.
444 			 */
445 			if (--le->links <= 0) {
446 				if (le->previous != NULL)
447 					le->previous->next = le->next;
448 				if (le->next != NULL)
449 					le->next->previous = le->previous;
450 				if (buckets[hash] == le)
451 					buckets[hash] = le->next;
452 				number_entries--;
453 				/* Recycle this node through the free list */
454 				if (stop_allocating) {
455 					free(le);
456 				} else {
457 					le->next = free_list;
458 					free_list = le;
459 				}
460 			}
461 			return (1);
462 		}
463 	}
464 
465 	if (stop_allocating)
466 		return (0);
467 
468 	/* Add this entry to the links cache. */
469 	if (free_list != NULL) {
470 		/* Pull a node from the free list if we can. */
471 		le = free_list;
472 		free_list = le->next;
473 	} else
474 		/* Malloc one if we have to. */
475 		le = malloc(sizeof(struct links_entry));
476 	if (le == NULL) {
477 		stop_allocating = 1;
478 		xo_warnx("No more memory for tracking hard links");
479 		return (0);
480 	}
481 	le->dev = st->st_dev;
482 	le->ino = st->st_ino;
483 	le->links = st->st_nlink - 1;
484 	number_entries++;
485 	le->next = buckets[hash];
486 	le->previous = NULL;
487 	if (buckets[hash] != NULL)
488 		buckets[hash]->previous = le;
489 	buckets[hash] = le;
490 	return (0);
491 }
492 
493 static void
494 prthumanval(const char *fmt, int64_t bytes)
495 {
496 	char buf[5];
497 	int flags;
498 
499 	bytes *= cblocksize;
500 	flags = HN_B | HN_NOSPACE | HN_DECIMAL;
501 	if (!Aflag)
502 		bytes *= DEV_BSIZE;
503 	if (hflag == UNITS_SI)
504 		flags |= HN_DIVISOR_1000;
505 
506 	humanize_number(buf, sizeof(buf), bytes, "", HN_AUTOSCALE, flags);
507 
508 	xo_emit(fmt, buf);
509 }
510 
511 static void
512 usage(void)
513 {
514 	xo_error(
515 		"usage: du [-Aclnx] [-H | -L | -P] [-g | -h | -k | -m] "
516 		"[-a | -s | -d depth] [-B blocksize] [-I mask] "
517 		"[-t threshold] [file ...]\n");
518 	exit(EX_USAGE);
519 }
520 
521 static void
522 ignoreadd(const char *mask)
523 {
524 	struct ignentry *ign;
525 
526 	ign = calloc(1, sizeof(*ign));
527 	if (ign == NULL)
528 		errx(1, "cannot allocate memory");
529 	ign->mask = strdup(mask);
530 	if (ign->mask == NULL)
531 		errx(1, "cannot allocate memory");
532 	SLIST_INSERT_HEAD(&ignores, ign, next);
533 }
534 
535 static void
536 ignoreclean(void)
537 {
538 	struct ignentry *ign;
539 
540 	while (!SLIST_EMPTY(&ignores)) {
541 		ign = SLIST_FIRST(&ignores);
542 		SLIST_REMOVE_HEAD(&ignores, next);
543 		free(ign->mask);
544 		free(ign);
545 	}
546 }
547 
548 static int
549 ignorep(FTSENT *ent)
550 {
551 	struct ignentry *ign;
552 
553 	if (nodumpflag && (ent->fts_statp->st_flags & UF_NODUMP))
554 		return 1;
555 	SLIST_FOREACH(ign, &ignores, next)
556 		if (fnmatch(ign->mask, ent->fts_name, 0) != FNM_NOMATCH)
557 			return 1;
558 	return 0;
559 }
560 
561 static void
562 siginfo(int sig __unused)
563 {
564 
565 	info = 1;
566 }
567