xref: /freebsd/usr.bin/xargs/xargs.c (revision 5773cccf19ef7b97e56c1101aa481c43149224da)
1 /*-
2  * Copyright (c) 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * John B. Roll Jr.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. All advertising materials mentioning features or use of this software
17  *    must display the following acknowledgement:
18  *	This product includes software developed by the University of
19  *	California, Berkeley and its contributors.
20  * 4. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  *
36  * $xMach: xargs.c,v 1.6 2002/02/23 05:27:47 tim Exp $
37  */
38 
39 #ifndef lint
40 static const char copyright[] =
41 "@(#) Copyright (c) 1990, 1993\n\
42 	The Regents of the University of California.  All rights reserved.\n";
43 #endif /* not lint */
44 
45 #if 0
46 #ifndef lint
47 static char sccsid[] = "@(#)xargs.c	8.1 (Berkeley) 6/6/93";
48 #endif /* not lint */
49 #endif
50 
51 #include <sys/cdefs.h>
52 __FBSDID("$FreeBSD$");
53 
54 #include <sys/param.h>
55 #include <sys/wait.h>
56 
57 #include <err.h>
58 #include <errno.h>
59 #if (__FreeBSD_version >= 450002 && __FreeBSD_version < 500000) || \
60     __FreeBSD_version >= 500017
61 #include <langinfo.h>
62 #endif
63 #include <locale.h>
64 #include <paths.h>
65 #include <regex.h>
66 #include <stdio.h>
67 #include <stdlib.h>
68 #include <string.h>
69 #include <unistd.h>
70 
71 #include "pathnames.h"
72 
73 static void	parse_input(int, char *[]);
74 static void	prerun(int, char *[]);
75 static int	prompt(void);
76 static void	run(char **);
77 static void	usage(void);
78 void		strnsubst(char **, const char *, const char *, size_t);
79 
80 static char echo[] = _PATH_ECHO;
81 static char **av, **bxp, **ep, **exp, **xp;
82 static char *argp, *bbp, *ebp, *inpline, *p, *replstr;
83 static const char *eofstr;
84 static int count, insingle, indouble, pflag, tflag, Rflag, rval, zflag;
85 static int cnt, Iflag, jfound, Lflag, wasquoted, xflag;
86 
87 extern char **environ;
88 
89 int
90 main(int argc, char *argv[])
91 {
92 	long arg_max;
93 	int ch, Jflag, nargs, nflag, nline;
94 	size_t linelen;
95 
96 	inpline = replstr = NULL;
97 	ep = environ;
98 	eofstr = "";
99 	Jflag = nflag = 0;
100 
101 	(void)setlocale(LC_MESSAGES, "");
102 
103 	/*
104 	 * POSIX.2 limits the exec line length to ARG_MAX - 2K.  Running that
105 	 * caused some E2BIG errors, so it was changed to ARG_MAX - 4K.  Given
106 	 * that the smallest argument is 2 bytes in length, this means that
107 	 * the number of arguments is limited to:
108 	 *
109 	 *	 (ARG_MAX - 4K - LENGTH(utility + arguments)) / 2.
110 	 *
111 	 * We arbitrarily limit the number of arguments to 5000.  This is
112 	 * allowed by POSIX.2 as long as the resulting minimum exec line is
113 	 * at least LINE_MAX.  Realloc'ing as necessary is possible, but
114 	 * probably not worthwhile.
115 	 */
116 	nargs = 5000;
117 	if ((arg_max = sysconf(_SC_ARG_MAX)) == -1)
118 		errx(1, "sysconf(_SC_ARG_MAX) failed");
119 	nline = arg_max - 4 * 1024;
120 	while (*ep != NULL) {
121 		/* 1 byte for each '\0' */
122 		nline -= strlen(*ep++) + 1 + sizeof(*ep);
123 	}
124 	while ((ch = getopt(argc, argv, "0E:I:J:L:n:pR:s:tx")) != -1)
125 		switch(ch) {
126 		case 'E':
127 			eofstr = optarg;
128 			break;
129 		case 'I':
130 			Jflag = 0;
131 			Iflag = 1;
132 			Lflag = 1;
133 			replstr = optarg;
134 			break;
135 		case 'J':
136 			Iflag = 0;
137 			Jflag = 1;
138 			replstr = optarg;
139 			break;
140 		case 'L':
141 			Lflag = atoi(optarg);
142 			break;
143 		case 'n':
144 			nflag = 1;
145 			if ((nargs = atoi(optarg)) <= 0)
146 				errx(1, "illegal argument count");
147 			break;
148 		case 'p':
149 			pflag = 1;
150 			break;
151 		case 'R':
152 			if ((Rflag = atoi(optarg)) <= 0)
153 				errx(1, "illegal number of replacements");
154 			break;
155 		case 's':
156 			nline = atoi(optarg);
157 			break;
158 		case 't':
159 			tflag = 1;
160 			break;
161 		case 'x':
162 			xflag = 1;
163 			break;
164 		case '0':
165 			zflag = 1;
166 			break;
167 		case '?':
168 		default:
169 			usage();
170 	}
171 	argc -= optind;
172 	argv += optind;
173 
174 	if (!Iflag && Rflag)
175 		usage();
176 	if (Iflag && !Rflag)
177 		Rflag = 5;
178 	if (xflag && !nflag)
179 		usage();
180 	if (Iflag || Lflag)
181 		xflag = 1;
182 	if (replstr != NULL && *replstr == '\0')
183 		errx(1, "replstr may not be empty");
184 
185 	/*
186 	 * Allocate pointers for the utility name, the utility arguments,
187 	 * the maximum arguments to be read from stdin and the trailing
188 	 * NULL.
189 	 */
190 	linelen = 1 + argc + nargs + 1;
191 	if ((av = bxp = malloc(linelen * sizeof(char **))) == NULL)
192 		errx(1, "malloc failed");
193 
194 	/*
195 	 * Use the user's name for the utility as argv[0], just like the
196 	 * shell.  Echo is the default.  Set up pointers for the user's
197 	 * arguments.
198 	 */
199 	if (*argv == NULL)
200 		cnt = strlen(*bxp++ = echo);
201 	else {
202 		do {
203 			if (Jflag && strcmp(*argv, replstr) == 0) {
204 				char **avj;
205 				jfound = 1;
206 				argv++;
207 				for (avj = argv; *avj; avj++)
208 					cnt += strlen(*avj) + 1;
209 				break;
210 			}
211 			cnt += strlen(*bxp++ = *argv) + 1;
212 		} while (*++argv != NULL);
213 	}
214 
215 	/*
216 	 * Set up begin/end/traversing pointers into the array.  The -n
217 	 * count doesn't include the trailing NULL pointer, so the malloc
218 	 * added in an extra slot.
219 	 */
220 	exp = (xp = bxp) + nargs;
221 
222 	/*
223 	 * Allocate buffer space for the arguments read from stdin and the
224 	 * trailing NULL.  Buffer space is defined as the default or specified
225 	 * space, minus the length of the utility name and arguments.  Set up
226 	 * begin/end/traversing pointers into the array.  The -s count does
227 	 * include the trailing NULL, so the malloc didn't add in an extra
228 	 * slot.
229 	 */
230 	nline -= cnt;
231 	if (nline <= 0)
232 		errx(1, "insufficient space for command");
233 
234 	if ((bbp = malloc((size_t)(nline + 1))) == NULL)
235 		errx(1, "malloc failed");
236 	ebp = (argp = p = bbp) + nline - 1;
237 	for (;;)
238 		parse_input(argc, argv);
239 }
240 
241 static void
242 parse_input(int argc, char *argv[])
243 {
244 	int ch, foundeof;
245 	char **avj;
246 
247 	foundeof = 0;
248 
249 	switch(ch = getchar()) {
250 	case EOF:
251 		/* No arguments since last exec. */
252 		if (p == bbp)
253 			exit(rval);
254 		goto arg1;
255 	case ' ':
256 	case '\t':
257 		/* Quotes escape tabs and spaces. */
258 		if (insingle || indouble || zflag)
259 			goto addch;
260 		goto arg2;
261 	case '\0':
262 		if (zflag)
263 			goto arg2;
264 		goto addch;
265 	case '\n':
266 		count++;
267 		if (zflag)
268 			goto addch;
269 
270 		/* Quotes do not escape newlines. */
271 arg1:		if (insingle || indouble)
272 			errx(1, "unterminated quote");
273 arg2:
274 		foundeof = *eofstr != '\0' &&
275 		    strcmp(argp, eofstr) == 0;
276 
277 		/* Do not make empty args unless they are quoted */
278 		if ((argp != p || wasquoted) && !foundeof) {
279 			*p++ = '\0';
280 			*xp++ = argp;
281 			if (Iflag) {
282 				size_t curlen;
283 
284 				if (inpline == NULL)
285 					curlen = 0;
286 				else {
287 					/*
288 					 * If this string is not zero
289 					 * length, append a space for
290 					 * separation before the next
291 					 * argument.
292 					 */
293 					if ((curlen = strlen(inpline)))
294 						strcat(inpline, " ");
295 				}
296 				curlen++;
297 				/*
298 				 * Allocate enough to hold what we will
299 				 * be holding in a second, and to append
300 				 * a space next time through, if we have
301 				 * to.
302 				 */
303 				inpline = realloc(inpline, curlen + 2 +
304 				    strlen(argp));
305 				if (inpline == NULL)
306 					errx(1, "realloc failed");
307 				if (curlen == 1)
308 					strcpy(inpline, argp);
309 				else
310 					strcat(inpline, argp);
311 			}
312 		}
313 
314 		/*
315 		 * If max'd out on args or buffer, or reached EOF,
316 		 * run the command.  If xflag and max'd out on buffer
317 		 * but not on args, object.  Having reached the limit
318 		 * of input lines, as specified by -L is the same as
319 		 * maxing out on arguments.
320 		 */
321 		if (xp == exp || p > ebp || ch == EOF ||
322 		    (Lflag <= count && xflag) || foundeof) {
323 			if (xflag && xp != exp && p > ebp)
324 				errx(1, "insufficient space for arguments");
325 			if (jfound) {
326 				for (avj = argv; *avj; avj++)
327 					*xp++ = *avj;
328 			}
329 			prerun(argc, av);
330 			if (ch == EOF || foundeof)
331 				exit(rval);
332 			p = bbp;
333 			xp = bxp;
334 			count = 0;
335 		}
336 		argp = p;
337 		wasquoted = 0;
338 		break;
339 	case '\'':
340 		if (indouble || zflag)
341 			goto addch;
342 		insingle = !insingle;
343 		wasquoted = 1;
344 		break;
345 	case '"':
346 		if (insingle || zflag)
347 			goto addch;
348 		indouble = !indouble;
349 		wasquoted = 1;
350 		break;
351 	case '\\':
352 		if (zflag)
353 			goto addch;
354 		/* Backslash escapes anything, is escaped by quotes. */
355 		if (!insingle && !indouble && (ch = getchar()) == EOF)
356 			errx(1, "backslash at EOF");
357 		/* FALLTHROUGH */
358 	default:
359 addch:		if (p < ebp) {
360 			*p++ = ch;
361 			break;
362 		}
363 
364 		/* If only one argument, not enough buffer space. */
365 		if (bxp == xp)
366 			errx(1, "insufficient space for argument");
367 		/* Didn't hit argument limit, so if xflag object. */
368 		if (xflag)
369 			errx(1, "insufficient space for arguments");
370 
371 		if (jfound) {
372 			for (avj = argv; *avj; avj++)
373 				*xp++ = *avj;
374 		}
375 		prerun(argc, av);
376 		xp = bxp;
377 		cnt = ebp - argp;
378 		memcpy(bbp, argp, (size_t)cnt);
379 		p = (argp = bbp) + cnt;
380 		*p++ = ch;
381 		break;
382 	}
383 	return;
384 }
385 
386 /*
387  * Do things necessary before run()'ing, such as -I substitution,
388  * and then call run().
389  */
390 static void
391 prerun(int argc, char *argv[])
392 {
393 	char **tmp, **tmp2, **avj;
394 	int repls;
395 
396 	repls = Rflag;
397 
398 	if (argc == 0 || repls == 0) {
399 		*xp = NULL;
400 		run(argv);
401 		return;
402 	}
403 
404 	avj = argv;
405 
406 	/*
407 	 * Allocate memory to hold the argument list, and
408 	 * a NULL at the tail.
409 	 */
410 	tmp = malloc((argc + 1) * sizeof(char**));
411 	if (tmp == NULL)
412 		errx(1, "malloc failed");
413 	tmp2 = tmp;
414 
415 	/*
416 	 * Save the first argument and iterate over it, we
417 	 * cannot do strnsubst() to it.
418 	 */
419 	if ((*tmp++ = strdup(*avj++)) == NULL)
420 		errx(1, "strdup failed");
421 
422 	/*
423 	 * For each argument to utility, if we have not used up
424 	 * the number of replacements we are allowed to do, and
425 	 * if the argument contains at least one occurrence of
426 	 * replstr, call strnsubst(), else just save the string.
427 	 * Iterations over elements of avj and tmp are done
428 	 * where appropriate.
429 	 */
430 	while (--argc) {
431 		*tmp = *avj++;
432 		if (repls && strstr(*tmp, replstr) != NULL) {
433 			strnsubst(tmp++, replstr, inpline, (size_t)255);
434 			repls--;
435 		} else {
436 			if ((*tmp = strdup(*tmp)) == NULL)
437 				errx(1, "strdup failed");
438 			tmp++;
439 		}
440 	}
441 
442 	/*
443 	 * Run it.
444 	 */
445 	*tmp = NULL;
446 	run(tmp2);
447 
448 	/*
449 	 * Walk from the tail to the head, free along the way.
450 	 */
451 	for (; tmp2 != tmp; tmp--)
452 		free(*tmp);
453 	/*
454 	 * Now free the list itself.
455 	 */
456 	free(tmp2);
457 
458 	/*
459 	 * Free the input line buffer, if we have one.
460 	 */
461 	if (inpline != NULL) {
462 		free(inpline);
463 		inpline = NULL;
464 	}
465 }
466 
467 static void
468 run(char **argv)
469 {
470 	volatile int childerr;
471 	char **avec;
472 	pid_t pid;
473 	int status;
474 
475 	/*
476 	 * If the user wants to be notified of each command before it is
477 	 * executed, notify them.  If they want the notification to be
478 	 * followed by a prompt, then prompt them.
479 	 */
480 	if (tflag || pflag) {
481 		(void)fprintf(stderr, "%s", *argv);
482 		for (avec = argv + 1; *avec != NULL; ++avec)
483 			(void)fprintf(stderr, " %s", *avec);
484 		/*
485 		 * If the user has asked to be prompted, do so.
486 		 */
487 		if (pflag)
488 			/*
489 			 * If they asked not to exec, return without execution
490 			 * but if they asked to, go to the execution.  If we
491 			 * could not open their tty, break the switch and drop
492 			 * back to -t behaviour.
493 			 */
494 			switch (prompt()) {
495 			case 0:
496 				return;
497 			case 1:
498 				goto exec;
499 			case 2:
500 				break;
501 			}
502 		(void)fprintf(stderr, "\n");
503 		(void)fflush(stderr);
504 	}
505 exec:
506 	childerr = 0;
507 	switch(pid = vfork()) {
508 	case -1:
509 		err(1, "vfork");
510 	case 0:
511 		execvp(argv[0], argv);
512 		childerr = errno;
513 		_exit(1);
514 	}
515 	pid = waitpid(pid, &status, 0);
516 	if (pid == -1)
517 		err(1, "waitpid");
518 	/* If we couldn't invoke the utility, exit. */
519 	if (childerr != 0)
520 		err(childerr == ENOENT ? 127 : 126, "%s", *argv);
521 	/* If utility signaled or exited with a value of 255, exit 1-125. */
522 	if (WIFSIGNALED(status) || WEXITSTATUS(status) == 255)
523 		exit(1);
524 	if (WEXITSTATUS(status))
525 		rval = 1;
526 }
527 
528 /*
529  * Prompt the user about running a command.
530  */
531 static int
532 prompt(void)
533 {
534 	regex_t cre;
535 	size_t rsize;
536 	int match;
537 	char *response;
538 	FILE *ttyfp;
539 
540 	if ((ttyfp = fopen(_PATH_TTY, "r")) == NULL)
541 		return (2);	/* Indicate that the TTY failed to open. */
542 	(void)fprintf(stderr, "?...");
543 	(void)fflush(stderr);
544 	if ((response = fgetln(ttyfp, &rsize)) == NULL ||
545 	    regcomp(&cre,
546 #if (__FreeBSD_version >= 450002 && __FreeBSD_version < 500000) || \
547     __FreeBSD_version >= 500017
548 		nl_langinfo(YESEXPR),
549 #else
550 		"^[yY]",
551 #endif
552 		REG_BASIC) != 0) {
553 		(void)fclose(ttyfp);
554 		return (0);
555 	}
556 	match = regexec(&cre, response, 0, NULL, 0);
557 	(void)fclose(ttyfp);
558 	regfree(&cre);
559 	return (match == 0);
560 }
561 
562 static void
563 usage(void)
564 {
565 	fprintf(stderr,
566 "usage: xargs [-0pt] [-E eofstr] [-I replstr [-R replacements]] [-J replstr]\n"
567 "             [-L number] [-n number [-x] [-s size] [utility [argument ...]]\n");
568 	exit(1);
569 }
570