xref: /illumos-gate/usr/src/cmd/du/du.c (revision e54334fcbcd9d9be8f3ad6fbf06b2af4890bfeca)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright 2017 OmniTI Computer Consulting, Inc.  All rights reserved.
25  * Copyright 2017 Jason King
26  * Copyright 2025 Edgecast Cloud LLC
27  * Copyright 2026 Oxide Computer Company
28  */
29 
30 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
31 /*	  All Rights Reserved	*/
32 
33 /*
34  * du -- summarize disk usage
35  */
36 
37 #include <sys/types.h>
38 #include <sys/wait.h>
39 #include <sys/stat.h>
40 #include <sys/avl.h>
41 #include <sys/sysmacros.h>
42 #include <assert.h>
43 #include <fcntl.h>
44 #include <dirent.h>
45 #include <limits.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <unistd.h>
50 #include <locale.h>
51 #include <libcmdutils.h>
52 
53 
54 static int		aflg = 0;
55 static int		bflg = 0;
56 static int		rflg = 0;
57 static int		sflg = 0;
58 static int		kflg = 0;
59 static int		mflg = 0;
60 static int		oflg = 0;
61 static int		dflg = 0;
62 static int		hflg = 0;
63 static int		Aflg = 0;
64 static int		Hflg = 0;
65 static int		Lflg = 0;
66 static int		cmdarg = 0;	/* Command line argument */
67 static char		*dot = ".";
68 static int		level = 0;	/* Level of recursion */
69 
70 static char		*base;
71 static char		*name;
72 static size_t		base_len = PATH_MAX + 1;    /* # of chars for base */
73 static size_t		name_len = PATH_MAX + 1;    /* # of chars for name */
74 
75 /*
76  * Output formats. illumos uses a tab as separator, XPG4 a space.
77  */
78 #ifdef XPG4
79 #define	FORMAT1	"%s %s\n"
80 #define	FORMAT2	"%llu %s\n"
81 #else
82 #define	FORMAT1	"%s\t%s\n"
83 #define	FORMAT2	"%llu\t%s\n"
84 #endif
85 
86 /*
87  * convert bytes to blocks
88  */
89 #define	DEV_BSHIFT	9
90 #define	DEV_KSHIFT	10
91 #define	DEV_MSHIFT	20
92 
93 static u_longlong_t	descend(char *curname, int curfd, int *retcode,
94 			    dev_t device);
95 static void		printsize(blkcnt_t blocks, char *path);
96 static void		exitdu(int exitcode);
97 
98 static avl_tree_t	*tree = NULL;
99 
100 int
101 main(int argc, char **argv)
102 {
103 	blkcnt_t	blocks = 0;
104 	int		c;
105 	extern int	optind;
106 	char		*np;
107 	pid_t		pid, wpid;
108 	int		status, retcode = 0;
109 	setbuf(stderr, NULL);
110 	(void) setlocale(LC_ALL, "");
111 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
112 #define	TEXT_DOMAIN	"SYS_TEST"	/* Use this only if it weren't */
113 #endif
114 	(void) textdomain(TEXT_DOMAIN);
115 
116 #ifdef XPG4
117 	rflg++;		/* "-r" is not an option but ON always */
118 #endif
119 
120 	while ((c = getopt(argc, argv, "aAbdhHkLmorsx")) != EOF)
121 		switch (c) {
122 
123 		case 'a':
124 			aflg++;
125 			continue;
126 
127 		case 'b':
128 			bflg++;
129 			/*
130 			 * -b implies -A since reporting in bytes
131 			 * requires the apparent file size rather than
132 			 * the allocated block count.
133 			 */
134 			Aflg++;
135 			continue;
136 
137 		case 'h':
138 			hflg++;
139 			kflg = 0;
140 			mflg = 0;
141 			continue;
142 
143 		case 'r':
144 			rflg++;
145 			continue;
146 
147 		case 's':
148 			sflg++;
149 			continue;
150 
151 		case 'k':
152 			kflg++;
153 			hflg = 0;
154 			mflg = 0;
155 			continue;
156 
157 		case 'm':
158 			mflg++;
159 			hflg = 0;
160 			kflg = 0;
161 			continue;
162 
163 		case 'o':
164 			oflg++;
165 			continue;
166 
167 		case 'd':
168 			dflg++;
169 			continue;
170 
171 		case 'x':
172 			dflg++;
173 			continue;
174 
175 		case 'A':
176 			Aflg++;
177 			continue;
178 
179 		case 'H':
180 			Hflg++;
181 			/* -H and -L are mutually exclusive */
182 			Lflg = 0;
183 			cmdarg++;
184 			continue;
185 
186 		case 'L':
187 			Lflg++;
188 			/* -H and -L are mutually exclusive */
189 			Hflg = 0;
190 			cmdarg = 0;
191 			continue;
192 		case '?':
193 			(void) fprintf(stderr, gettext(
194 			    "usage: du [-Adorx] [-a|-s] [-b|-h|-k|-m] [-H|-L] "
195 			    "[file...]\n"));
196 			exit(2);
197 		}
198 	if (optind == argc) {
199 		argv = &dot;
200 		argc = 1;
201 		optind = 0;
202 	}
203 
204 	/* "-o" and "-s" don't make any sense together. */
205 	if (oflg && sflg)
206 		oflg = 0;
207 
208 	if (bflg && (hflg || kflg || mflg)) {
209 		(void) fprintf(stderr, gettext("usage: -b cannot be used in "
210 		    "conjunction with -h, -k or -m\n"));
211 		exit(2);
212 	}
213 
214 	if ((base = (char *)calloc(base_len, sizeof (char))) == NULL) {
215 		perror("du");
216 		exit(1);
217 	}
218 	if ((name = (char *)calloc(name_len, sizeof (char))) == NULL) {
219 		perror("du");
220 		free(base);
221 		exit(1);
222 	}
223 	do {
224 		pid = (pid_t)-1;
225 		if (optind < argc - 1) {
226 			pid = fork();
227 			if (pid == (pid_t)-1) {
228 				perror(gettext("du: No more processes"));
229 				exitdu(1);
230 			}
231 			if (pid != 0) {
232 				while ((wpid = wait(&status)) != pid &&
233 				    wpid != (pid_t)-1)
234 					;
235 				if (pid != (pid_t)-1 && status != 0)
236 					retcode = 1;
237 			}
238 		}
239 		if (optind == argc - 1 || pid == 0) {
240 			while (base_len < (strlen(argv[optind]) + 1)) {
241 				base_len = base_len * 2;
242 				if ((base = (char *)realloc(base, base_len *
243 				    sizeof (char))) == NULL) {
244 					if (rflg) {
245 						(void) fprintf(stderr, gettext(
246 						    "du: can't process %s"),
247 						    argv[optind]);
248 						perror("");
249 					}
250 					exitdu(1);
251 				}
252 			}
253 			if (base_len > name_len) {
254 				name_len = base_len;
255 				if ((name = (char *)realloc(name, name_len *
256 				    sizeof (char))) == NULL) {
257 					if (rflg) {
258 						(void) fprintf(stderr, gettext(
259 						    "du: can't process %s"),
260 						    argv[optind]);
261 						perror("");
262 					}
263 					exitdu(1);
264 				}
265 			}
266 			(void) strcpy(base, argv[optind]);
267 			(void) strcpy(name, argv[optind]);
268 			np = strrchr(name, '/');
269 			if (np != NULL) {
270 				*np++ = '\0';
271 				if (chdir(*name ? name : "/") < 0) {
272 					if (rflg) {
273 						(void) fprintf(stderr, "du: ");
274 						perror(*name ? name : "/");
275 						exitdu(1);
276 					}
277 					exitdu(0);
278 				}
279 			} else
280 				np = base;
281 			blocks = descend(*np ? np : ".", 0, &retcode,
282 			    (dev_t)0);
283 			if (sflg)
284 				printsize(blocks, base);
285 			if (optind < argc - 1)
286 				exitdu(retcode);
287 		}
288 		optind++;
289 	} while (optind < argc);
290 	exitdu(retcode);
291 
292 	return (retcode);
293 }
294 
295 /*
296  * descend recursively, adding up the allocated blocks.
297  * If curname is NULL, curfd is used.
298  */
299 static u_longlong_t
300 descend(char *curname, int curfd, int *retcode, dev_t device)
301 {
302 	static DIR		*dirp = NULL;
303 	char			*ebase0, *ebase;
304 	struct stat		stb, stb1;
305 	int			i, j, ret, fd, tmpflg;
306 	int			follow_symlinks;
307 	blkcnt_t		blocks = 0;
308 	off_t			curoff = 0;
309 	ptrdiff_t		offset;
310 	ptrdiff_t		offset0;
311 	struct dirent		*dp;
312 	char			dirbuf[PATH_MAX + 1];
313 	u_longlong_t		retval;
314 
315 	ebase0 = ebase = strchr(base, 0);
316 	if (ebase > base && ebase[-1] == '/')
317 		ebase--;
318 	offset = ebase - base;
319 	offset0 = ebase0 - base;
320 
321 	if (curname)
322 		curfd = AT_FDCWD;
323 
324 	/*
325 	 * If neither a -L or a -H was specified, don't follow symlinks.
326 	 * If a -H was specified, don't follow symlinks if the file is
327 	 * not a command line argument.
328 	 */
329 	follow_symlinks = (Lflg || (Hflg && cmdarg));
330 	if (follow_symlinks) {
331 		i = fstatat(curfd, curname, &stb, 0);
332 		j = fstatat(curfd, curname, &stb1, AT_SYMLINK_NOFOLLOW);
333 
334 		/*
335 		 * Make sure any files encountered while traversing the
336 		 * hierarchy are not considered command line arguments.
337 		 */
338 		if (Hflg) {
339 			cmdarg = 0;
340 		}
341 	} else {
342 		i = fstatat(curfd, curname, &stb, AT_SYMLINK_NOFOLLOW);
343 		j = 0;
344 	}
345 
346 	if ((i < 0) || (j < 0)) {
347 		if (rflg) {
348 			(void) fprintf(stderr, "du: ");
349 			perror(base);
350 		}
351 
352 		/*
353 		 * POSIX states that non-zero status codes are only set
354 		 * when an error message is printed out on stderr
355 		 */
356 		*retcode = (rflg ? 1 : 0);
357 		*ebase0 = 0;
358 		return (0);
359 	}
360 	if (device) {
361 		if (dflg && stb.st_dev != device) {
362 			*ebase0 = 0;
363 			return (0);
364 		}
365 	}
366 	else
367 		device = stb.st_dev;
368 
369 	/*
370 	 * If following links (-L) we need to keep track of all inodes
371 	 * visited so they are only visited/reported once and cycles
372 	 * are avoided.  Otherwise, only keep track of files which are
373 	 * hard links so they only get reported once, and of directories
374 	 * so we don't report a directory and its hierarchy more than
375 	 * once in the special case in which it lies under the
376 	 * hierarchy of a directory which is a hard link.
377 	 * Note:  Files with multiple links should only be counted
378 	 * once.  Since each inode could possibly be referenced by a
379 	 * symbolic link, we need to keep track of all inodes when -L
380 	 * is specified.
381 	 */
382 	if (Lflg || ((stb.st_mode & S_IFMT) == S_IFDIR) ||
383 	    (stb.st_nlink > 1)) {
384 		int rc;
385 		if ((rc = add_tnode(&tree, stb.st_dev, stb.st_ino)) != 1) {
386 			if (rc == 0) {
387 				/*
388 				 * This hierarchy, or file with multiple
389 				 * links, has already been visited/reported.
390 				 */
391 				return (0);
392 			} else {
393 				/*
394 				 * An error occurred while trying to add the
395 				 * node to the tree.
396 				 */
397 				if (rflg) {
398 					perror("du");
399 				}
400 				exitdu(1);
401 			}
402 		}
403 	}
404 	blocks = Aflg ? stb.st_size : stb.st_blocks;
405 
406 	/*
407 	 * If there are extended attributes on the current file, add their
408 	 * block usage onto the block count.  Note: Since pathconf() always
409 	 * follows symlinks, only test for extended attributes using pathconf()
410 	 * if we are following symlinks or the current file is not a symlink.
411 	 */
412 	if (curname && (follow_symlinks ||
413 	    ((stb.st_mode & S_IFMT) != S_IFLNK)) &&
414 	    pathconf(curname, _PC_XATTR_EXISTS) == 1) {
415 		if ((fd = attropen(curname, ".", O_RDONLY)) < 0) {
416 			if (rflg)
417 				perror(gettext(
418 				    "du: can't access extended attributes"));
419 		}
420 		else
421 		{
422 			tmpflg = sflg;
423 			sflg = 1;
424 			blocks += descend(NULL, fd, retcode, device);
425 			sflg = tmpflg;
426 		}
427 	}
428 	if ((stb.st_mode & S_IFMT) != S_IFDIR) {
429 		/*
430 		 * Don't print twice: if sflg, file will get printed in main().
431 		 * Otherwise, level == 0 means this file is listed on the
432 		 * command line, so print here; aflg means print all files.
433 		 */
434 		if (sflg == 0 && (aflg || level == 0))
435 			printsize(blocks, base);
436 		return (blocks);
437 	}
438 	if (dirp != NULL)
439 		/*
440 		 * Close the parent directory descriptor, we will reopen
441 		 * the directory when we pop up from this level of the
442 		 * recursion.
443 		 */
444 		(void) closedir(dirp);
445 	if (curname == NULL)
446 		dirp = fdopendir(curfd);
447 	else
448 		dirp = opendir(curname);
449 	if (dirp == NULL) {
450 		if (rflg) {
451 			(void) fprintf(stderr, "du: ");
452 			perror(base);
453 		}
454 		*retcode = 1;
455 		*ebase0 = 0;
456 		return (0);
457 	}
458 	level++;
459 	if (curname == NULL || (Lflg && S_ISLNK(stb1.st_mode))) {
460 		if (getcwd(dirbuf, PATH_MAX) == NULL) {
461 			if (rflg) {
462 				(void) fprintf(stderr, "du: ");
463 				perror(base);
464 			}
465 			exitdu(1);
466 		}
467 	}
468 	if ((curname ? (chdir(curname) < 0) : (fchdir(curfd) < 0))) {
469 		if (rflg) {
470 			(void) fprintf(stderr, "du: ");
471 			perror(base);
472 		}
473 		*retcode = 1;
474 		*ebase0 = 0;
475 		(void) closedir(dirp);
476 		dirp = NULL;
477 		level--;
478 		return (0);
479 	}
480 	while ((dp = readdir(dirp)) != NULL) {
481 		if ((strcmp(dp->d_name, ".") == 0) ||
482 		    (strcmp(dp->d_name, "..") == 0))
483 			continue;
484 		/*
485 		 * we're about to append "/" + dp->d_name
486 		 * onto end of base; make sure there's enough
487 		 * space
488 		 */
489 		while ((offset + strlen(dp->d_name) + 2) > base_len) {
490 			base_len = base_len * 2;
491 			if ((base = (char *)realloc(base,
492 			    base_len * sizeof (char))) == NULL) {
493 				if (rflg) {
494 					perror("du");
495 				}
496 				exitdu(1);
497 			}
498 			ebase = base + offset;
499 			ebase0 = base + offset0;
500 		}
501 		/* LINTED - unbounded string specifier */
502 		(void) sprintf(ebase, "/%s", dp->d_name);
503 		curoff = telldir(dirp);
504 		retval = descend(ebase + 1, 0, retcode, device);
505 			/* base may have been moved via realloc in descend() */
506 		ebase = base + offset;
507 		ebase0 = base + offset0;
508 		*ebase = 0;
509 		blocks += retval;
510 		if (dirp == NULL) {
511 			if ((dirp = opendir(".")) == NULL) {
512 				if (rflg) {
513 					(void) fprintf(stderr,
514 					    gettext("du: Can't reopen in "));
515 					perror(base);
516 				}
517 				*retcode = 1;
518 				level--;
519 				return (0);
520 			}
521 			seekdir(dirp, curoff);
522 		}
523 	}
524 	(void) closedir(dirp);
525 	level--;
526 	dirp = NULL;
527 	if (sflg == 0)
528 		printsize(blocks, base);
529 	if (curname == NULL || (Lflg && S_ISLNK(stb1.st_mode)))
530 		ret = chdir(dirbuf);
531 	else
532 		ret = chdir("..");
533 	if (ret < 0) {
534 		if (rflg) {
535 			(void) sprintf(strchr(base, '\0'), "/..");
536 			(void) fprintf(stderr,
537 			    gettext("du: Can't change dir to '..' in "));
538 			perror(base);
539 		}
540 		exitdu(1);
541 	}
542 	*ebase0 = 0;
543 	if (oflg)
544 		return (0);
545 	else
546 		return (blocks);
547 }
548 
549 static u_longlong_t
550 mkb(blkcnt_t n, size_t shift)
551 {
552 	u_longlong_t v = (u_longlong_t)n;
553 
554 	/*
555 	 * If hflg was not used, we need to output number of blocks
556 	 * rounded up. Block sizes can be 1M, 1K or 512 bytes.
557 	 * First, convert blocks to 1 byte units and then round up.
558 	 * If Aflg was used, the value is already in bytes.
559 	 */
560 	if (!Aflg)
561 		v <<= DEV_BSHIFT;
562 
563 	return (P2ROUNDUP(v, 1 << shift) >> shift);
564 }
565 
566 static void
567 printsize(blkcnt_t blocks, char *path)
568 {
569 	if (hflg) {
570 		u_longlong_t bsize = Aflg ? 1 : (1 << DEV_BSHIFT);
571 
572 		char buf[NN_NUMBUF_SZ] = { 0 };
573 
574 		nicenum_scale(blocks, bsize, buf, sizeof (buf), 0);
575 		(void) printf(FORMAT1, buf, path);
576 		return;
577 	}
578 
579 	if (bflg) {
580 		assert(Aflg);
581 		(void) printf(FORMAT2, (u_longlong_t)blocks, path);
582 	} else if (kflg) {
583 		(void) printf(FORMAT2, mkb(blocks, DEV_KSHIFT), path);
584 	} else if (mflg) {
585 		(void) printf(FORMAT2, mkb(blocks, DEV_MSHIFT), path);
586 	} else {
587 		(void) printf(FORMAT2, mkb(blocks, DEV_BSHIFT), path);
588 	}
589 }
590 
591 static void
592 exitdu(int exitcode)
593 {
594 	free(base);
595 	free(name);
596 	exit(exitcode);
597 }
598