xref: /freebsd/usr.bin/xargs/xargs.c (revision 2e1417489338b971e5fd599ff48b5f65df9e8d3b)
1 /*-
2  * Copyright (c) 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * John B. Roll Jr.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 4. Neither the name of the University nor the names of its contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $xMach: xargs.c,v 1.6 2002/02/23 05:27:47 tim Exp $
33  */
34 
35 #if 0
36 #ifndef lint
37 static const char copyright[] =
38 "@(#) Copyright (c) 1990, 1993\n\
39 	The Regents of the University of California.  All rights reserved.\n";
40 #endif /* not lint */
41 
42 #ifndef lint
43 static char sccsid[] = "@(#)xargs.c	8.1 (Berkeley) 6/6/93";
44 #endif /* not lint */
45 #endif
46 #include <sys/cdefs.h>
47 __FBSDID("$FreeBSD$");
48 
49 #include <sys/param.h>
50 #include <sys/wait.h>
51 
52 #include <err.h>
53 #include <errno.h>
54 #include <fcntl.h>
55 #include <langinfo.h>
56 #include <locale.h>
57 #include <paths.h>
58 #include <regex.h>
59 #include <stdio.h>
60 #include <stdlib.h>
61 #include <string.h>
62 #include <unistd.h>
63 
64 #include "pathnames.h"
65 
66 static void	parse_input(int, char *[]);
67 static void	prerun(int, char *[]);
68 static int	prompt(void);
69 static void	run(char **);
70 static void	usage(void);
71 void		strnsubst(char **, const char *, const char *, size_t);
72 static pid_t	xwait(int block, int *status);
73 static void	waitchildren(const char *, int);
74 static void	pids_init(void);
75 static int	pids_empty(void);
76 static int	pids_full(void);
77 static void	pids_add(pid_t pid);
78 static int	pids_remove(pid_t pid);
79 static int	findslot(pid_t pid);
80 static int	findfreeslot(void);
81 static void	clearslot(int slot);
82 
83 static char echo[] = _PATH_ECHO;
84 static char **av, **bxp, **ep, **endxp, **xp;
85 static char *argp, *bbp, *ebp, *inpline, *p, *replstr;
86 static const char *eofstr;
87 static int count, insingle, indouble, oflag, pflag, tflag, Rflag, rval, zflag;
88 static int cnt, Iflag, jfound, Lflag, Sflag, wasquoted, xflag;
89 static int curprocs, maxprocs;
90 static pid_t *childpids;
91 
92 static volatile int childerr;
93 
94 extern char **environ;
95 
96 int
97 main(int argc, char *argv[])
98 {
99 	long arg_max;
100 	int ch, Jflag, nargs, nflag, nline;
101 	size_t linelen;
102 	char *endptr;
103 
104 	inpline = replstr = NULL;
105 	ep = environ;
106 	eofstr = "";
107 	Jflag = nflag = 0;
108 
109 	(void)setlocale(LC_ALL, "");
110 
111 	/*
112 	 * POSIX.2 limits the exec line length to ARG_MAX - 2K.  Running that
113 	 * caused some E2BIG errors, so it was changed to ARG_MAX - 4K.  Given
114 	 * that the smallest argument is 2 bytes in length, this means that
115 	 * the number of arguments is limited to:
116 	 *
117 	 *	 (ARG_MAX - 4K - LENGTH(utility + arguments)) / 2.
118 	 *
119 	 * We arbitrarily limit the number of arguments to 5000.  This is
120 	 * allowed by POSIX.2 as long as the resulting minimum exec line is
121 	 * at least LINE_MAX.  Realloc'ing as necessary is possible, but
122 	 * probably not worthwhile.
123 	 */
124 	nargs = 5000;
125 	if ((arg_max = sysconf(_SC_ARG_MAX)) == -1)
126 		errx(1, "sysconf(_SC_ARG_MAX) failed");
127 	nline = arg_max - 4 * 1024;
128 	while (*ep != NULL) {
129 		/* 1 byte for each '\0' */
130 		nline -= strlen(*ep++) + 1 + sizeof(*ep);
131 	}
132 	maxprocs = 1;
133 	while ((ch = getopt(argc, argv, "0E:I:J:L:n:oP:pR:S:s:rtx")) != -1)
134 		switch (ch) {
135 		case 'E':
136 			eofstr = optarg;
137 			break;
138 		case 'I':
139 			Jflag = 0;
140 			Iflag = 1;
141 			Lflag = 1;
142 			replstr = optarg;
143 			break;
144 		case 'J':
145 			Iflag = 0;
146 			Jflag = 1;
147 			replstr = optarg;
148 			break;
149 		case 'L':
150 			Lflag = atoi(optarg);
151 			break;
152 		case 'n':
153 			nflag = 1;
154 			if ((nargs = atoi(optarg)) <= 0)
155 				errx(1, "illegal argument count");
156 			break;
157 		case 'o':
158 			oflag = 1;
159 			break;
160 		case 'P':
161 			if ((maxprocs = atoi(optarg)) <= 0)
162 				errx(1, "max. processes must be >0");
163 			break;
164 		case 'p':
165 			pflag = 1;
166 			break;
167 		case 'R':
168 			Rflag = strtol(optarg, &endptr, 10);
169 			if (*endptr != '\0')
170 				errx(1, "replacements must be a number");
171 			break;
172 		case 'r':
173 			/* GNU compatibility */
174 			break;
175 		case 'S':
176 			Sflag = strtoul(optarg, &endptr, 10);
177 			if (*endptr != '\0')
178 				errx(1, "replsize must be a number");
179 			break;
180 		case 's':
181 			nline = atoi(optarg);
182 			break;
183 		case 't':
184 			tflag = 1;
185 			break;
186 		case 'x':
187 			xflag = 1;
188 			break;
189 		case '0':
190 			zflag = 1;
191 			break;
192 		case '?':
193 		default:
194 			usage();
195 	}
196 	argc -= optind;
197 	argv += optind;
198 
199 	if (!Iflag && Rflag)
200 		usage();
201 	if (!Iflag && Sflag)
202 		usage();
203 	if (Iflag && !Rflag)
204 		Rflag = 5;
205 	if (Iflag && !Sflag)
206 		Sflag = 255;
207 	if (xflag && !nflag)
208 		usage();
209 	if (Iflag || Lflag)
210 		xflag = 1;
211 	if (replstr != NULL && *replstr == '\0')
212 		errx(1, "replstr may not be empty");
213 
214 	pids_init();
215 
216 	/*
217 	 * Allocate pointers for the utility name, the utility arguments,
218 	 * the maximum arguments to be read from stdin and the trailing
219 	 * NULL.
220 	 */
221 	linelen = 1 + argc + nargs + 1;
222 	if ((av = bxp = malloc(linelen * sizeof(char **))) == NULL)
223 		errx(1, "malloc failed");
224 
225 	/*
226 	 * Use the user's name for the utility as argv[0], just like the
227 	 * shell.  Echo is the default.  Set up pointers for the user's
228 	 * arguments.
229 	 */
230 	if (*argv == NULL)
231 		cnt = strlen(*bxp++ = echo);
232 	else {
233 		do {
234 			if (Jflag && strcmp(*argv, replstr) == 0) {
235 				char **avj;
236 				jfound = 1;
237 				argv++;
238 				for (avj = argv; *avj; avj++)
239 					cnt += strlen(*avj) + 1;
240 				break;
241 			}
242 			cnt += strlen(*bxp++ = *argv) + 1;
243 		} while (*++argv != NULL);
244 	}
245 
246 	/*
247 	 * Set up begin/end/traversing pointers into the array.  The -n
248 	 * count doesn't include the trailing NULL pointer, so the malloc
249 	 * added in an extra slot.
250 	 */
251 	endxp = (xp = bxp) + nargs;
252 
253 	/*
254 	 * Allocate buffer space for the arguments read from stdin and the
255 	 * trailing NULL.  Buffer space is defined as the default or specified
256 	 * space, minus the length of the utility name and arguments.  Set up
257 	 * begin/end/traversing pointers into the array.  The -s count does
258 	 * include the trailing NULL, so the malloc didn't add in an extra
259 	 * slot.
260 	 */
261 	nline -= cnt;
262 	if (nline <= 0)
263 		errx(1, "insufficient space for command");
264 
265 	if ((bbp = malloc((size_t)(nline + 1))) == NULL)
266 		errx(1, "malloc failed");
267 	ebp = (argp = p = bbp) + nline - 1;
268 	for (;;)
269 		parse_input(argc, argv);
270 }
271 
272 static void
273 parse_input(int argc, char *argv[])
274 {
275 	int ch, foundeof;
276 	char **avj;
277 
278 	foundeof = 0;
279 
280 	switch (ch = getchar()) {
281 	case EOF:
282 		/* No arguments since last exec. */
283 		if (p == bbp) {
284 			waitchildren(*argv, 1);
285 			exit(rval);
286 		}
287 		goto arg1;
288 	case ' ':
289 	case '\t':
290 		/* Quotes escape tabs and spaces. */
291 		if (insingle || indouble || zflag)
292 			goto addch;
293 		goto arg2;
294 	case '\0':
295 		if (zflag) {
296 			/*
297 			 * Increment 'count', so that nulls will be treated
298 			 * as end-of-line, as well as end-of-argument.  This
299 			 * is needed so -0 works properly with -I and -L.
300 			 */
301 			count++;
302 			goto arg2;
303 		}
304 		goto addch;
305 	case '\n':
306 		if (zflag)
307 			goto addch;
308 		count++;	    /* Indicate end-of-line (used by -L) */
309 
310 		/* Quotes do not escape newlines. */
311 arg1:		if (insingle || indouble)
312 			errx(1, "unterminated quote");
313 arg2:
314 		foundeof = *eofstr != '\0' &&
315 		    strncmp(argp, eofstr, p - argp) == 0;
316 
317 		/* Do not make empty args unless they are quoted */
318 		if ((argp != p || wasquoted) && !foundeof) {
319 			*p++ = '\0';
320 			*xp++ = argp;
321 			if (Iflag) {
322 				size_t curlen;
323 
324 				if (inpline == NULL)
325 					curlen = 0;
326 				else {
327 					/*
328 					 * If this string is not zero
329 					 * length, append a space for
330 					 * separation before the next
331 					 * argument.
332 					 */
333 					if ((curlen = strlen(inpline)))
334 						strcat(inpline, " ");
335 				}
336 				curlen++;
337 				/*
338 				 * Allocate enough to hold what we will
339 				 * be holding in a second, and to append
340 				 * a space next time through, if we have
341 				 * to.
342 				 */
343 				inpline = realloc(inpline, curlen + 2 +
344 				    strlen(argp));
345 				if (inpline == NULL)
346 					errx(1, "realloc failed");
347 				if (curlen == 1)
348 					strcpy(inpline, argp);
349 				else
350 					strcat(inpline, argp);
351 			}
352 		}
353 
354 		/*
355 		 * If max'd out on args or buffer, or reached EOF,
356 		 * run the command.  If xflag and max'd out on buffer
357 		 * but not on args, object.  Having reached the limit
358 		 * of input lines, as specified by -L is the same as
359 		 * maxing out on arguments.
360 		 */
361 		if (xp == endxp || p > ebp || ch == EOF ||
362 		    (Lflag <= count && xflag) || foundeof) {
363 			if (xflag && xp != endxp && p > ebp)
364 				errx(1, "insufficient space for arguments");
365 			if (jfound) {
366 				for (avj = argv; *avj; avj++)
367 					*xp++ = *avj;
368 			}
369 			prerun(argc, av);
370 			if (ch == EOF || foundeof) {
371 				waitchildren(*argv, 1);
372 				exit(rval);
373 			}
374 			p = bbp;
375 			xp = bxp;
376 			count = 0;
377 		}
378 		argp = p;
379 		wasquoted = 0;
380 		break;
381 	case '\'':
382 		if (indouble || zflag)
383 			goto addch;
384 		insingle = !insingle;
385 		wasquoted = 1;
386 		break;
387 	case '"':
388 		if (insingle || zflag)
389 			goto addch;
390 		indouble = !indouble;
391 		wasquoted = 1;
392 		break;
393 	case '\\':
394 		if (zflag)
395 			goto addch;
396 		/* Backslash escapes anything, is escaped by quotes. */
397 		if (!insingle && !indouble && (ch = getchar()) == EOF)
398 			errx(1, "backslash at EOF");
399 		/* FALLTHROUGH */
400 	default:
401 addch:		if (p < ebp) {
402 			*p++ = ch;
403 			break;
404 		}
405 
406 		/* If only one argument, not enough buffer space. */
407 		if (bxp == xp)
408 			errx(1, "insufficient space for argument");
409 		/* Didn't hit argument limit, so if xflag object. */
410 		if (xflag)
411 			errx(1, "insufficient space for arguments");
412 
413 		if (jfound) {
414 			for (avj = argv; *avj; avj++)
415 				*xp++ = *avj;
416 		}
417 		prerun(argc, av);
418 		xp = bxp;
419 		cnt = ebp - argp;
420 		memcpy(bbp, argp, (size_t)cnt);
421 		p = (argp = bbp) + cnt;
422 		*p++ = ch;
423 		break;
424 	}
425 }
426 
427 /*
428  * Do things necessary before run()'ing, such as -I substitution,
429  * and then call run().
430  */
431 static void
432 prerun(int argc, char *argv[])
433 {
434 	char **tmp, **tmp2, **avj;
435 	int repls;
436 
437 	repls = Rflag;
438 
439 	if (argc == 0 || repls == 0) {
440 		*xp = NULL;
441 		run(argv);
442 		return;
443 	}
444 
445 	avj = argv;
446 
447 	/*
448 	 * Allocate memory to hold the argument list, and
449 	 * a NULL at the tail.
450 	 */
451 	tmp = malloc((argc + 1) * sizeof(char**));
452 	if (tmp == NULL)
453 		errx(1, "malloc failed");
454 	tmp2 = tmp;
455 
456 	/*
457 	 * Save the first argument and iterate over it, we
458 	 * cannot do strnsubst() to it.
459 	 */
460 	if ((*tmp++ = strdup(*avj++)) == NULL)
461 		errx(1, "strdup failed");
462 
463 	/*
464 	 * For each argument to utility, if we have not used up
465 	 * the number of replacements we are allowed to do, and
466 	 * if the argument contains at least one occurrence of
467 	 * replstr, call strnsubst(), else just save the string.
468 	 * Iterations over elements of avj and tmp are done
469 	 * where appropriate.
470 	 */
471 	while (--argc) {
472 		*tmp = *avj++;
473 		if (repls && strstr(*tmp, replstr) != NULL) {
474 			strnsubst(tmp++, replstr, inpline, (size_t)Sflag);
475 			if (repls > 0)
476 				repls--;
477 		} else {
478 			if ((*tmp = strdup(*tmp)) == NULL)
479 				errx(1, "strdup failed");
480 			tmp++;
481 		}
482 	}
483 
484 	/*
485 	 * Run it.
486 	 */
487 	*tmp = NULL;
488 	run(tmp2);
489 
490 	/*
491 	 * Walk from the tail to the head, free along the way.
492 	 */
493 	for (; tmp2 != tmp; tmp--)
494 		free(*tmp);
495 	/*
496 	 * Now free the list itself.
497 	 */
498 	free(tmp2);
499 
500 	/*
501 	 * Free the input line buffer, if we have one.
502 	 */
503 	if (inpline != NULL) {
504 		free(inpline);
505 		inpline = NULL;
506 	}
507 }
508 
509 static void
510 run(char **argv)
511 {
512 	pid_t pid;
513 	int fd;
514 	char **avec;
515 
516 	/*
517 	 * If the user wants to be notified of each command before it is
518 	 * executed, notify them.  If they want the notification to be
519 	 * followed by a prompt, then prompt them.
520 	 */
521 	if (tflag || pflag) {
522 		(void)fprintf(stderr, "%s", *argv);
523 		for (avec = argv + 1; *avec != NULL; ++avec)
524 			(void)fprintf(stderr, " %s", *avec);
525 		/*
526 		 * If the user has asked to be prompted, do so.
527 		 */
528 		if (pflag)
529 			/*
530 			 * If they asked not to exec, return without execution
531 			 * but if they asked to, go to the execution.  If we
532 			 * could not open their tty, break the switch and drop
533 			 * back to -t behaviour.
534 			 */
535 			switch (prompt()) {
536 			case 0:
537 				return;
538 			case 1:
539 				goto exec;
540 			case 2:
541 				break;
542 			}
543 		(void)fprintf(stderr, "\n");
544 		(void)fflush(stderr);
545 	}
546 exec:
547 	childerr = 0;
548 	switch (pid = vfork()) {
549 	case -1:
550 		err(1, "vfork");
551 	case 0:
552 		if (oflag) {
553 			if ((fd = open(_PATH_TTY, O_RDONLY)) == -1)
554 				err(1, "can't open /dev/tty");
555 		} else {
556 			fd = open(_PATH_DEVNULL, O_RDONLY);
557 		}
558 		if (fd > STDIN_FILENO) {
559 			if (dup2(fd, STDIN_FILENO) != 0)
560 				err(1, "can't dup2 to stdin");
561 			close(fd);
562 		}
563 		execvp(argv[0], argv);
564 		childerr = errno;
565 		_exit(1);
566 	}
567 	pids_add(pid);
568 	waitchildren(*argv, 0);
569 }
570 
571 /*
572  * Wait for a tracked child to exit and return its pid and exit status.
573  *
574  * Ignores (discards) all untracked child processes.
575  * Returns -1 and sets errno to ECHILD if no tracked children exist.
576  * If block is set, waits indefinitely for a child process to exit.
577  * If block is not set and no children have exited, returns 0 immediately.
578  */
579 static pid_t
580 xwait(int block, int *status) {
581 	pid_t pid;
582 
583 	if (pids_empty()) {
584 		errno = ECHILD;
585 		return (-1);
586 	}
587 
588 	while ((pid = waitpid(-1, status, block ? 0 : WNOHANG)) > 0)
589 		if (pids_remove(pid))
590 			break;
591 
592 	return (pid);
593 }
594 
595 static void
596 waitchildren(const char *name, int waitall)
597 {
598 	pid_t pid;
599 	int status;
600 
601 	while ((pid = xwait(waitall || pids_full(), &status)) > 0) {
602 		/* If we couldn't invoke the utility, exit. */
603 		if (childerr != 0) {
604 			errno = childerr;
605 			err(errno == ENOENT ? 127 : 126, "%s", name);
606 		}
607 		/*
608 		 * If utility signaled or exited with a value of 255,
609 		 * exit 1-125.
610 		 */
611 		if (WIFSIGNALED(status) || WEXITSTATUS(status) == 255)
612 			exit(1);
613 		if (WEXITSTATUS(status))
614 			rval = 1;
615 	}
616 
617 	if (pid == -1 && errno != ECHILD)
618 		err(1, "waitpid");
619 }
620 
621 #define	NOPID	(0)
622 
623 static void
624 pids_init(void)
625 {
626 	int i;
627 
628 	if ((childpids = malloc(maxprocs * sizeof(*childpids))) == NULL)
629 		errx(1, "malloc failed");
630 
631 	for (i = 0; i < maxprocs; i++)
632 		clearslot(i);
633 }
634 
635 static int
636 pids_empty(void)
637 {
638 	return (curprocs == 0);
639 }
640 
641 static int
642 pids_full(void)
643 {
644 	return (curprocs >= maxprocs);
645 }
646 
647 static void
648 pids_add(pid_t pid)
649 {
650 	int slot;
651 
652 	slot = findfreeslot();
653 	childpids[slot] = pid;
654 	curprocs++;
655 }
656 
657 static int
658 pids_remove(pid_t pid)
659 {
660 	int slot;
661 
662 	if ((slot = findslot(pid)) < 0)
663 		return (0);
664 
665 	clearslot(slot);
666 	curprocs--;
667 	return (1);
668 }
669 
670 static int
671 findfreeslot(void)
672 {
673 	int slot;
674 
675 	if ((slot = findslot(NOPID)) < 0)
676 		errx(1, "internal error: no free pid slot");
677 
678 	return (slot);
679 }
680 
681 static int
682 findslot(pid_t pid)
683 {
684 	int slot;
685 
686 	for (slot = 0; slot < maxprocs; slot++)
687 		if (childpids[slot] == pid)
688 			return (slot);
689 
690 	return (-1);
691 }
692 
693 static void
694 clearslot(int slot)
695 {
696 	childpids[slot] = NOPID;
697 }
698 
699 /*
700  * Prompt the user about running a command.
701  */
702 static int
703 prompt(void)
704 {
705 	regex_t cre;
706 	size_t rsize;
707 	int match;
708 	char *response;
709 	FILE *ttyfp;
710 
711 	if ((ttyfp = fopen(_PATH_TTY, "r")) == NULL)
712 		return (2);	/* Indicate that the TTY failed to open. */
713 	(void)fprintf(stderr, "?...");
714 	(void)fflush(stderr);
715 	if ((response = fgetln(ttyfp, &rsize)) == NULL ||
716 	    regcomp(&cre, nl_langinfo(YESEXPR), REG_BASIC) != 0) {
717 		(void)fclose(ttyfp);
718 		return (0);
719 	}
720 	response[rsize - 1] = '\0';
721 	match = regexec(&cre, response, 0, NULL, 0);
722 	(void)fclose(ttyfp);
723 	regfree(&cre);
724 	return (match == 0);
725 }
726 
727 static void
728 usage(void)
729 {
730 	fprintf(stderr,
731 "usage: xargs [-0opt] [-E eofstr] [-I replstr [-R replacements] [-S replsize]]\n"
732 "             [-J replstr] [-L number] [-n number [-x]] [-P maxprocs]\n"
733 "             [-s size] [utility [argument ...]]\n");
734 	exit(1);
735 }
736