xref: /illumos-gate/usr/src/cmd/xargs/xargs.c (revision 437220cd296f6d8b6654d6d52508b40b1e2d1ac7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <stdio.h>
33 #include <sys/types.h>
34 #include <sys/wait.h>
35 #include <unistd.h>
36 #include <fcntl.h>
37 #include <string.h>
38 #include <stdarg.h>
39 #include <stdlib.h>
40 #include <limits.h>
41 #include <wchar.h>
42 #include <locale.h>
43 #include <langinfo.h>
44 #include <stropts.h>
45 #include <poll.h>
46 #include <errno.h>
47 #include <stdarg.h>
48 #include "getresponse.h"
49 
50 #define	HEAD	0
51 #define	TAIL	1
52 #define	FALSE 0
53 #define	TRUE 1
54 #define	MAXSBUF 255
55 #define	MAXIBUF 512
56 #define	MAXINSERTS 5
57 #define	BUFSIZE LINE_MAX
58 #define	MAXARGS 255
59 #define	INSPAT_STR	"{}"	/* default replstr string for -[Ii]	*/
60 #define	FORK_RETRY	5
61 
62 #define	QBUF_STARTLEN 255  /* start size of growable string buffer */
63 #define	QBUF_INC 100	   /* how much to grow a growable string by */
64 
65 static wctype_t	blank;
66 static char	*arglist[MAXARGS+1];
67 static char	argbuf[BUFSIZE+1];
68 static char	*next = argbuf;
69 static char	*lastarg = "";
70 static char	**ARGV = arglist;
71 static char	*LEOF = "_";
72 static char	*INSPAT = INSPAT_STR;
73 static char	ins_buf[MAXIBUF];
74 static char	*p_ibuf;
75 
76 static struct inserts {
77 	char	**p_ARGV;	/* where to put newarg ptr in arg list */
78 	char	*p_skel;	/* ptr to arg template */
79 } saveargv[MAXINSERTS];
80 
81 static off_t	file_offset = 0;
82 static int	PROMPT = -1;
83 static int	BUFLIM = BUFSIZE;
84 static int	N_ARGS = 0;
85 static int	N_args = 0;
86 static int	N_lines = 0;
87 static int	DASHX = FALSE;
88 static int	MORE = TRUE;
89 static int	PER_LINE = FALSE;
90 static int	ERR = FALSE;
91 static int	OK = TRUE;
92 static int	LEGAL = FALSE;
93 static int	TRACE = FALSE;
94 static int	INSERT = FALSE;
95 static int	linesize = 0;
96 static int	ibufsize = 0;
97 static int	exitstat = 0;	/* our exit status			*/
98 static int	mac;		/* modified argc, after parsing		*/
99 static char	**mav;		/* modified argv, after parsing		*/
100 static int	n_inserts;	/* # of insertions.			*/
101 static int	inquote = 0;	/* processing a quoted string		*/
102 
103 /*
104  * the pio structure is used to save any pending input before the
105  * user replies to a prompt. the pending input is saved here,
106  * for the appropriate processing later.
107  */
108 typedef struct pio {
109 	struct pio *next;	/* next in stack			*/
110 	char *start;		/* starting addr of the buffer		*/
111 	char *cur;		/* ptr to current char in buf		*/
112 	size_t length;		/* number of bytes remaining		*/
113 } pio;
114 
115 static pio *queued_data = NULL;
116 
117 /* our usage message:							*/
118 #define	USAGEMSG "Usage: xargs: [-t] [-p] [-e[eofstr]] [-E eofstr] "\
119 	"[-I replstr] [-i[replstr]] [-L #] [-l[#]] [-n # [-x]] [-s size] "\
120 	"[cmd [args ...]]\n"
121 
122 static int	echoargs();
123 static int	getchr(void);
124 static wchar_t	getwchr(void);
125 static void	ungetwchr(wchar_t);
126 static int	lcall(char *sub, char **subargs);
127 static int	xindex(char *as1, char *as2);
128 static void	addibuf(struct inserts *p);
129 static void	ermsg(char *messages, ...);
130 static char	*addarg(char *arg);
131 static char	*checklen(char *arg);
132 static size_t   store_wchr(char **, size_t *, size_t, wchar_t);
133 static char	*getarg();
134 static char	*insert(char *pattern, char *subst);
135 static void	usage();
136 static void	parseargs();
137 static void	saveinput();
138 
139 int
140 main(int argc, char **argv)
141 {
142 	int	j;
143 	struct inserts *psave;
144 	int c;
145 	int	initsize;
146 	char	*cmdname, *initbuf, **initlist;
147 
148 
149 	/* initialization */
150 	blank = wctype("blank");
151 	n_inserts = 0;
152 	psave = saveargv;
153 	(void) setlocale(LC_ALL, "");
154 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D 		*/
155 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't 		*/
156 #endif
157 	(void) textdomain(TEXT_DOMAIN);
158 	if (init_yes() < 0) {
159 		ermsg(gettext(ERR_MSG_INIT_YES), strerror(errno));
160 		exit(1);
161 	}
162 
163 	parseargs(argc, argv);
164 
165 	/* handling all of xargs arguments:				*/
166 	while ((c = getopt(mac, mav, "tpe:E:I:i:L:l:n:s:x")) != EOF) {
167 		switch (c) {
168 		case 't':	/* -t: turn trace mode on		*/
169 			TRACE = TRUE;
170 			break;
171 
172 		case 'p':	/* -p: turn on prompt mode.		*/
173 			if ((PROMPT = open("/dev/tty", O_RDONLY)) == -1) {
174 				perror(gettext("can't read from tty for -p"));
175 			} else {
176 				TRACE = TRUE;
177 			}
178 			break;
179 
180 		case 'e':
181 			/*
182 			 * -e[eofstr]: set/disable end-of-file.
183 			 * N.B. that an argument *isn't* required here; but
184 			 * parseargs forced an argument if not was given.  The
185 			 * forced argument is the default...
186 			 */
187 			LEOF = optarg; /* can be empty */
188 			break;
189 
190 		case 'E':
191 			/*
192 			 * -E eofstr: change end-of-file string.
193 			 * eofstr *is* required here, but can be empty:
194 			 */
195 			LEOF = optarg;
196 			break;
197 
198 		case 'I':
199 			/* -I replstr: Insert mode. replstr *is* required. */
200 			INSERT = PER_LINE = LEGAL = TRUE;
201 			N_ARGS = 0;
202 			INSPAT = optarg;
203 			if (*optarg == '\0') {
204 				ermsg(gettext(
205 				    "Option requires an argument: -%c\n"), c);
206 			}
207 			break;
208 
209 		case 'i':
210 			/*
211 			 * -i [replstr]: insert mode, with *optional* replstr.
212 			 * N.B. that an argument *isn't* required here; if
213 			 * it's not given, then the string INSPAT_STR will
214 			 * be assumed.
215 			 *
216 			 * Since getopts(3C) doesn't handle the case of an
217 			 * optional variable argument at all, we have to
218 			 * parse this by hand:
219 			 */
220 
221 			INSERT = PER_LINE = LEGAL = TRUE;
222 			N_ARGS = 0;
223 			if ((optarg != NULL) && (*optarg != '\0')) {
224 				INSPAT = optarg;
225 			} else {
226 				/*
227 				 * here, there is no next argument. so
228 				 * we reset INSPAT to the INSPAT_STR.
229 				 * we *have* to do this, as -i/I may have
230 				 * been given previously, and XCU4 requires
231 				 * that only "the last one specified takes
232 				 * effect".
233 				 */
234 				INSPAT = INSPAT_STR;
235 			}
236 			break;
237 
238 		case 'L':
239 			/*
240 			 * -L number: # of times cmd is executed
241 			 * number *is* required here:
242 			 */
243 			PER_LINE = TRUE;
244 			N_ARGS = 0;
245 			INSERT = FALSE;
246 			if ((PER_LINE = atoi(optarg)) <= 0) {
247 				ermsg(gettext("#lines must be positive "
248 				    "int: %s\n"), optarg);
249 			}
250 			break;
251 
252 		case 'l':
253 			/*
254 			 * -l [number]: # of times cmd is executed
255 			 * N.B. that an argument *isn't* required here; if
256 			 * it's not given, then 1 is assumed.
257 			 *
258 			 * parseargs handles the optional arg processing.
259 			 */
260 
261 			PER_LINE = LEGAL = TRUE;  /* initialization	*/
262 			N_ARGS = 0;
263 			INSERT = FALSE;
264 
265 			if ((optarg != NULL) && (*optarg != '\0')) {
266 				if ((PER_LINE = atoi(optarg)) <= 0)
267 					PER_LINE = 1;
268 			}
269 			break;
270 
271 		case 'n':	/* -n number: # stdin args		*/
272 			/*
273 			 * -n number: # stdin args.
274 			 * number *is* required here:
275 			 */
276 			if ((N_ARGS = atoi(optarg)) <= 0) {
277 				ermsg(gettext("#args must be positive "
278 				    "int: %s\n"), optarg);
279 			} else {
280 				LEGAL = DASHX || N_ARGS == 1;
281 				INSERT = PER_LINE = FALSE;
282 			}
283 			break;
284 
285 		case 's':	/* -s size: set max size of each arg list */
286 			BUFLIM = atoi(optarg);
287 			if (BUFLIM > BUFSIZE || BUFLIM <= 0) {
288 				ermsg(gettext(
289 				    "0 < max-cmd-line-size <= %d: "
290 				    "%s\n"), BUFSIZE, optarg);
291 			}
292 			break;
293 
294 		case 'x':	/* -x: terminate if args > size limit	*/
295 			DASHX = LEGAL = TRUE;
296 			break;
297 
298 		default:
299 			/*
300 			 * bad argument. complain and get ready to die.
301 			 */
302 			ERR = TRUE;
303 			usage();
304 
305 			exit(2);
306 			break;
307 		}
308 	}
309 
310 	/*
311 	 * if anything called ermsg(), something screwed up, so
312 	 * we exit early.
313 	 */
314 	if (OK == FALSE) {
315 		ERR = TRUE;
316 		usage();
317 		exit(2);
318 	}
319 
320 	/*
321 	 * we're finished handling xargs's options, so now pick up
322 	 * the command name (if any), and it's options.
323 	 */
324 
325 
326 	mac -= optind;	/* dec arg count by what we've processed 	*/
327 	mav += optind;	/* inc to current mav				*/
328 
329 	if (mac <= 0) {	/* if there're no more args to process,	*/
330 		cmdname = "/usr/bin/echo";	/* our default command	*/
331 		*ARGV++ = addarg(cmdname);	/* use the default cmd.	*/
332 	} else {	/* otherwise keep parsing rest of the string.	*/
333 		/*
334 		 * note that we can't use getopts(3C), and *must* parse
335 		 * this by hand, as we don't know apriori what options the
336 		 * command will take.
337 		 */
338 		cmdname = *mav;	/* get the command name	*/
339 
340 
341 		/* pick up the remaining args from the command line:	*/
342 		while ((OK == TRUE) && (mac-- > 0)) {
343 			/*
344 			 * while we haven't crapped out, and there's
345 			 * work to do:
346 			 */
347 			if (INSERT && ! ERR) {
348 				if (xindex(*mav, INSPAT) != -1) {
349 					if (++n_inserts > MAXINSERTS) {
350 						ermsg(gettext("too many args "
351 						    "with %s\n"), INSPAT);
352 						ERR = TRUE;
353 					}
354 					psave->p_ARGV = ARGV;
355 					(psave++)->p_skel = *mav;
356 				}
357 			}
358 			*ARGV++ = addarg(*mav++);
359 		}
360 	}
361 
362 	/* pick up args from standard input */
363 
364 	initbuf = next;
365 	initlist = ARGV;
366 	initsize = linesize;
367 
368 	while (OK && MORE) {
369 		N_args = 0;
370 		N_lines = 0;
371 		next = initbuf;
372 		ARGV = initlist;
373 		linesize = initsize;
374 		if (*lastarg) {
375 			*ARGV++ = addarg(lastarg);
376 			lastarg = "";
377 		}
378 
379 		while (((ARGV - arglist) < MAXARGS) &&
380 		    ((*ARGV++ = getarg()) != NULL) && OK)
381 			;
382 
383 		/* insert arg if requested */
384 
385 		if (!ERR && INSERT) {
386 			if ((!MORE) && (N_lines == 0)) {
387 				exit(exitstat);
388 			}
389 					/* no more input lines */
390 			p_ibuf = ins_buf;
391 			ARGV--;
392 			j = ibufsize = 0;
393 			for (psave = saveargv; ++j <= n_inserts; ++psave) {
394 				addibuf(psave);
395 				if (ERR)
396 					break;
397 			}
398 		}
399 		*ARGV = 0;
400 
401 		if (n_inserts > 0) {
402 			int t_ninserts;
403 
404 			/*
405 			 * if we've done any insertions, re-calculate the
406 			 * linesize. bomb out if we've exceeded our length.
407 			 */
408 			t_ninserts = n_inserts;
409 			n_inserts = 0;	/* inserts have been done 	*/
410 			linesize = 0;	/* recalculate this		*/
411 
412 			/* for each current argument in the list:	*/
413 			for (ARGV = arglist; *ARGV != NULL; ARGV++) {
414 				/* recalculate everything.		*/
415 				if (checklen(*ARGV) != 0) {
416 					if (N_ARGS && (N_args >= N_ARGS)) {
417 						N_lines = N_args = 0;
418 						OK = FALSE;
419 						ERR = TRUE;
420 					}
421 				}
422 			}
423 			n_inserts = t_ninserts;
424 		}
425 
426 		/* exec command */
427 
428 		if (!ERR) {
429 			if (!MORE &&
430 			    (PER_LINE && N_lines == 0 || N_ARGS && N_args == 0))
431 				exit(exitstat);
432 			OK = TRUE;
433 			j = TRACE ? echoargs() : TRUE;
434 			if (j) {
435 				/*
436 				 * for xcu4, all invocations of cmdname must
437 				 * return 0, in order for us to return 0.
438 				 * so if we have a non-zero status here,
439 				 * quit immediately.
440 				 */
441 				if ((exitstat |= lcall(cmdname, arglist)) == 0)
442 					continue;
443 			}
444 		}
445 	}
446 
447 	(void) lseek(0, file_offset, SEEK_SET);
448 	if (OK) {
449 		return (exitstat);
450 	} else {
451 		/*
452 		 * if exitstat was set, to match XCU4 complience,
453 		 * return that value, otherwise, return 1.
454 		 */
455 		return (exitstat ? exitstat : 1);
456 	}
457 }
458 
459 static void
460 queue(char *buffer, int len, int where)
461 {
462 	pio *new, *element;
463 
464 	if ((new = malloc(sizeof (pio))) == NULL) {
465 		perror(gettext("xargs: Memory allocation failure"));
466 		exit(1);
467 	}
468 	new->cur = new->start = buffer;
469 	new->length = len;
470 
471 	if (where == TAIL) {
472 		new->next = NULL;
473 		if (queued_data == NULL) {
474 			queued_data = new;
475 		} else {
476 			element = queued_data;
477 			while (element->next != NULL) {
478 				element = element->next;
479 			}
480 			element->next = new;
481 		}
482 	} else {
483 		file_offset -= len;
484 		new->next = queued_data;
485 		queued_data = new;
486 	}
487 }
488 
489 static char *
490 checklen(char *arg)
491 {
492 	int	oklen;
493 
494 	oklen = TRUE;
495 	linesize += strlen(arg) + 1;
496 	if (linesize >= BUFLIM) {
497 		/*
498 		 * we skip this if there're inserts. we'll handle the
499 		 * argument counting after all the insertions have
500 		 * been done.
501 		 */
502 		if (n_inserts == 0) {
503 			lastarg = arg;
504 			oklen = OK = FALSE;
505 
506 			if (LEGAL) {
507 				ERR = TRUE;
508 				ermsg(gettext("arg list too long\n"));
509 			} else if (N_args > 1) {
510 				N_args = 1;
511 			} else {
512 				ermsg(gettext("a single arg was greater than "
513 				    "the max arglist size of %d characters\n"),
514 				    BUFLIM);
515 				ERR = TRUE;
516 			}
517 		}
518 	}
519 	return (oklen ? arg : 0);
520 }
521 
522 static char *
523 addarg(char *arg)
524 {
525 	if (checklen(arg) != 0) {
526 		(void) strcpy(next, arg);
527 		arg = next;
528 		next += strlen(arg) + 1;
529 		return (arg);
530 	}
531 	return ((char *)0);
532 }
533 
534 /*
535  * store_wchr() : append a wchar_t to a char buffer, resize buffer if required.
536  *
537  *     Given a pointer to the beginning of a string buffer, the length of the
538  *     buffer and an offset indicating the next place to write within that
539  *     buffer, the passed wchar_t will be appended to the buffer if there is
540  *     enough space. If there is not enough space, an attempt to reallocate the
541  *     buffer will be made and if successful the passed pointer and size will be
542  *     updated to describe the reallocated block. Returns the new value for
543  *     'offset' (it will be incremented by the number of bytes written).
544  */
545 static size_t
546 store_wchr(char **buffer, size_t *buflen, size_t offset, wchar_t c)
547 {
548 	int bytes;
549 
550 	/*
551 	 * Make sure that there is enough room in the buffer to store the
552 	 * maximum length of c.
553 	 */
554 	if ((offset + MB_CUR_MAX) > *buflen) {
555 		/*
556 		 * Not enough room so attempt to reallocate. Add 'MB_CUR_MAX' to
557 		 * buffer length to ensure that there is always enough room to
558 		 * store 'c' if realloc succeeds, no matter what QBUF_INC is
559 		 * defined as.
560 		 */
561 		*buflen += (QBUF_INC + MB_CUR_MAX);
562 		if ((*buffer = realloc(*buffer, *buflen)) == NULL) {
563 			perror(gettext("xargs: Memory allocation failure"));
564 			exit(1);
565 		}
566 	}
567 	/* store bytes from wchar into buffer */
568 	bytes = wctomb(*buffer + offset, c);
569 	if (bytes == -1) {
570 		/* char was invalid */
571 		bytes = 1;
572 		*(*buffer + offset) = (char)c;
573 	}
574 
575 	/* return new value for offset */
576 	return (offset + bytes);
577 }
578 
579 static char *
580 getarg()
581 {
582 	int	bytes;
583 	wchar_t	c;
584 	char	*arg;
585 	char	*retarg, *requeue_buf;
586 	size_t  requeue_offset = 0, requeue_len;
587 	char	mbc[MB_LEN_MAX];
588 
589 	while (iswspace(c = getwchr()) || c == '\n')
590 		;
591 
592 	if (c == '\0') {
593 		MORE = FALSE;
594 		return (0);
595 	}
596 
597 	/*
598 	 * While we are reading in an argument, it is possible that we will
599 	 * reach the maximum length of the overflow buffer and we'll have to
600 	 * requeue what we have read so far. To handle this we allocate an
601 	 * initial buffer here which will keep an unprocessed copy of the data
602 	 * that we read in (this buffer will grow as required).
603 	 */
604 	requeue_len = (size_t)QBUF_STARTLEN;
605 	if ((requeue_buf = (char *)malloc(requeue_len)) == NULL) {
606 		perror(gettext("xargs: Memory allocation failure"));
607 		exit(1);
608 	}
609 
610 	for (arg = next; ; c = getwchr()) {
611 		bytes = wctomb(mbc, c);
612 
613 		/*
614 		 * Store the char that we have read before processing it in case
615 		 * the current argument needs to be requeued.
616 		 */
617 		requeue_offset = store_wchr(&requeue_buf, &requeue_len,
618 		    requeue_offset, c);
619 
620 		/* Check for overflow the input buffer */
621 		if ((next + ((bytes == -1) ? 1 : bytes)) >= &argbuf[BUFLIM]) {
622 			/*
623 			 * It's only an error if there are no Args in buffer
624 			 * already.
625 			 */
626 			if ((N_ARGS || PER_LINE) && LEGAL) {
627 				ERR = TRUE;
628 				ermsg(gettext("Argument list too long\n"));
629 				free(requeue_buf);
630 				return (0);
631 			} else if (N_args == 0) {
632 				lastarg = "";
633 				ERR = TRUE;
634 				ermsg(gettext("A single arg was greater than "
635 				    "the max arglist size of %d characters\n"),
636 				    BUFSIZE);
637 				free(requeue_buf);
638 				return (0);
639 			}
640 			/*
641 			 * Otherwise we put back the current argument
642 			 * and use what we have collected so far...
643 			 */
644 			queue(requeue_buf, requeue_offset, HEAD);
645 			/* reset inquote because we have requeued the quotes */
646 			inquote = 0;
647 			return (NULL);
648 		}
649 
650 
651 		if (iswctype(c, blank) && inquote == 0) {
652 			if (INSERT) {
653 				if (bytes == -1) {
654 					*next++ = (char)c;
655 				} else {
656 					(void) wctomb(next, c);
657 					next += bytes;
658 				}
659 				continue;
660 			}
661 
662 			/* skip over trailing whitespace till next arg */
663 			while (iswctype((c = getwchr()), blank) &&
664 			    (c != '\n') && (c != '\0'))
665 				;
666 
667 			/*
668 			 * if there was space till end of line then the last
669 			 * character was really a newline...
670 			 */
671 			if (c == L'\n' || c == L'\0') {
672 				ungetwchr(L'\n');
673 			} else {
674 				/* later code needs to know this was a space */
675 				ungetwchr(c);
676 				c = L' ';
677 			}
678 			goto end_arg;
679 		}
680 		switch (c) {
681 		case L'\0':
682 		case L'\n':
683 			if (inquote) {
684 				*next++ = '\0';
685 				ermsg(gettext("Missing quote: %s\n"), arg);
686 				ERR = TRUE;
687 				free(requeue_buf);
688 				return (0);
689 			}
690 
691 			N_lines++;
692 end_arg:		*next++ = '\0';
693 			/* we finished without requeuing so free requeue_buf */
694 			free(requeue_buf);
695 			if ((strcmp(arg, LEOF) == 0 && *LEOF != '\0') ||
696 			    (c == '\0' && strlen(arg) == 0)) {
697 				MORE = FALSE;
698 				/* absorb the rest of the line */
699 				if ((c != '\n') && (c != '\0'))
700 					while (c = getwchr())
701 						if ((c == '\n') || (c == '\0'))
702 							break;
703 				return (0);
704 			} else {
705 				++N_args;
706 				if (retarg = checklen(arg)) {
707 					if ((PER_LINE &&
708 					    N_lines >= PER_LINE &&
709 					    (c == '\0' || c == '\n')) ||
710 					    (N_ARGS && N_args >= N_ARGS)) {
711 						N_lines = N_args = 0;
712 						lastarg = "";
713 						OK = FALSE;
714 					}
715 				}
716 				return (retarg);
717 			}
718 
719 		case '"':
720 			if (inquote == 1)	/* in single quoted string */
721 				goto is_default;
722 			if (inquote == 2)	/* terminating double quote */
723 				inquote = 0;
724 			else			/* starting quoted string */
725 				inquote = 2;
726 			break;
727 
728 		case '\'':
729 			if (inquote == 2)	/* in double quoted string */
730 				goto is_default;
731 			if (inquote == 1)	/* terminating single quote */
732 				inquote = 0;
733 			else			/* starting quoted string */
734 				inquote = 1;
735 			break;
736 
737 		case L'\\':
738 			c = getwchr();
739 			/* store quoted char for potential requeueing */
740 			requeue_offset = store_wchr(&requeue_buf, &requeue_len,
741 			    requeue_offset, c);
742 
743 		default:
744 is_default:		if (bytes == -1) {
745 				*next++ = (char)c;
746 			} else {
747 				(void) wctomb(next, c);
748 				next += bytes;
749 			}
750 			break;
751 		}
752 	}
753 }
754 
755 
756 /*
757  * ermsg():	print out an error message, and indicate failure globally.
758  *
759  *	Assumes that message has already been gettext()'d. It would be
760  *	nice if we could just do the gettext() here, but we can't, since
761  *	since xgettext(1M) wouldn't be able to pick up our error message.
762  */
763 /* PRINTFLIKE1 */
764 static void
765 ermsg(char *messages, ...)
766 {
767 	va_list	ap;
768 
769 	va_start(ap, messages);
770 
771 	(void) fprintf(stderr, "xargs: ");
772 	(void) vfprintf(stderr, messages, ap);
773 
774 	va_end(ap);
775 	OK = FALSE;
776 }
777 
778 static int
779 echoargs()
780 {
781 	char	**anarg;
782 	char	**tanarg;	/* tmp ptr			*/
783 	int		i;
784 	char		reply[LINE_MAX];
785 
786 	tanarg = anarg = arglist-1;
787 
788 	/*
789 	 * write out each argument, separated by a space. the tanarg
790 	 * nonsense is for xcu4 testsuite compliance - so that an
791 	 * extra space isn't echoed after the last argument.
792 	 */
793 	while (*++anarg) {		/* while there's an argument	*/
794 		++tanarg;		/* follow anarg			*/
795 		(void) write(2, *anarg, strlen(*anarg));
796 
797 		if (*++tanarg) {	/* if there's another argument:	*/
798 			(void) write(2, " ", 1); /* add a space		*/
799 			--tanarg;	/* reset back to anarg		*/
800 		}
801 	}
802 	if (PROMPT == -1) {
803 		(void) write(2, "\n", 1);
804 		return (TRUE);
805 	}
806 
807 	/*
808 	 * at this point, there may be unexpected input pending on stdin,
809 	 * if one has used the -n flag. this presents a problem, because
810 	 * if we simply do a read(), we'll get the extra input, instead
811 	 * of our desired y/n input. so, we see if there's any extra
812 	 * input, and if there is, then we will store it.
813 	 */
814 	saveinput();
815 
816 	(void) write(2, "?...", 4);	/* ask the user for input	*/
817 
818 	for (i = 0; i < LINE_MAX && read(PROMPT, &reply[i], 1) > 0; i++) {
819 		if (reply[i] == '\n') {
820 			if (i == 0)
821 				return (FALSE);
822 			break;
823 		}
824 	}
825 	reply[i] = 0;
826 
827 	/* flush remainder of line if necessary */
828 	if (i == LINE_MAX) {
829 		char	bitbucket;
830 
831 		while ((read(PROMPT, &bitbucket, 1) > 0) && (bitbucket != '\n'))
832 			;
833 	}
834 
835 	return (yes_check(reply));
836 }
837 
838 
839 static char *
840 insert(char *pattern, char *subst)
841 {
842 	static char	buffer[MAXSBUF+1];
843 	int		len, ipatlen;
844 	char	*pat;
845 	char	*bufend;
846 	char	*pbuf;
847 
848 	len = strlen(subst);
849 	ipatlen = strlen(INSPAT) - 1;
850 	pat = pattern - 1;
851 	pbuf = buffer;
852 	bufend = &buffer[MAXSBUF];
853 
854 	while (*++pat) {
855 		if (xindex(pat, INSPAT) == 0) {
856 			if (pbuf + len >= bufend) {
857 				break;
858 			} else {
859 				(void) strcpy(pbuf, subst);
860 				pat += ipatlen;
861 				pbuf += len;
862 			}
863 		} else {
864 			*pbuf++ = *pat;
865 			if (pbuf >= bufend)
866 				break;
867 		}
868 	}
869 
870 	if (!*pat) {
871 		*pbuf = '\0';
872 		return (buffer);
873 	} else {
874 		ermsg(gettext("Maximum argument size with insertion via %s's "
875 		    "exceeded\n"), INSPAT);
876 		ERR = TRUE;
877 		return (0);
878 	}
879 }
880 
881 
882 static void
883 addibuf(struct inserts	*p)
884 {
885 	char	*newarg, *skel, *sub;
886 	int		l;
887 
888 	skel = p->p_skel;
889 	sub = *ARGV;
890 	linesize -= strlen(skel) + 1;
891 	newarg = insert(skel, sub);
892 	if (ERR)
893 		return;
894 
895 	if (checklen(newarg)) {
896 		if ((ibufsize += (l = strlen(newarg) + 1)) > MAXIBUF) {
897 			ermsg(gettext("Insert buffer overflow\n"));
898 			ERR = TRUE;
899 		}
900 		(void) strcpy(p_ibuf, newarg);
901 		*(p->p_ARGV) = p_ibuf;
902 		p_ibuf += l;
903 	}
904 }
905 
906 
907 /*
908  * getchr():	get the next character.
909  * description:
910  *	we get the next character from pio.structure, if there's a character
911  *	to get. this may happen when we've had to flush stdin=/dev/tty,
912  *	but still wanted to preserve the characters for later processing.
913  *
914  *	otherwise we just get the character from stdin.
915  */
916 static int
917 getchr(void)
918 {
919 	char	c;
920 
921 	do {
922 		if (queued_data == NULL) {
923 			char	*buffer;
924 			int	len;
925 
926 			if ((buffer = malloc(BUFSIZE)) == NULL) {
927 				perror(gettext(
928 				    "xargs: Memory allocation failure"));
929 				exit(1);
930 			}
931 
932 			if ((len = read(0, buffer, BUFSIZE)) == 0)
933 				return (0);
934 			if (len == -1) {
935 				perror(gettext("xargs: Read failure"));
936 				exit(1);
937 			}
938 
939 			queue(buffer, len, TAIL);
940 		}
941 
942 		file_offset++;
943 		c = *queued_data->cur++;	 /* get the next character */
944 		if (--queued_data->length == 0) { /* at the end of buffer? */
945 			pio	*nxt = queued_data->next;
946 
947 			free(queued_data->start);
948 			free(queued_data);
949 			queued_data = nxt;
950 		}
951 	} while (c == '\0');
952 	return (c);
953 }
954 
955 
956 static wchar_t
957 getwchr(void)
958 {
959 	int		i;
960 	wchar_t		wch;
961 	unsigned char	buffer[MB_LEN_MAX + 1];
962 
963 	for (i = 0; i < (int)MB_CUR_MAX; ) {
964 		if ((buffer[i++] = getchr()) == NULL) {
965 			/* We have reached  EOF */
966 			if (i == 1) {
967 				/* TRUE EOF has been reached */
968 				return (NULL);
969 			}
970 			/*
971 			 * We have some characters in our buffer still so it
972 			 * must be an invalid character right before EOF.
973 			 */
974 			break;
975 		}
976 
977 		/* If this succeeds then we are done */
978 		if (mbtowc(&wch, (char *)buffer, i) != -1)
979 			return (wch);
980 	}
981 
982 	/*
983 	 * We have now encountered an illegal character sequence.
984 	 * There is nothing much we can do at this point but
985 	 * return an error.  If we attempt to recover we may in fact
986 	 * return garbage as arguments, from the customer's point
987 	 * of view.  After all what if they are feeding us a file
988 	 * generated in another locale?
989 	 */
990 	errno = EILSEQ;
991 	perror(gettext("xargs: Corrupt input file"));
992 	exit(1);
993 	/* NOTREACHED */
994 }
995 
996 
997 static void
998 ungetwchr(wchar_t wch)
999 {
1000 	char	*buffer;
1001 	int	bytes;
1002 
1003 	if ((buffer = malloc(MB_LEN_MAX)) == NULL) {
1004 		perror(gettext("xargs: Memory allocation failure"));
1005 		exit(1);
1006 	}
1007 	bytes = wctomb(buffer, wch);
1008 	queue(buffer, bytes, HEAD);
1009 }
1010 
1011 
1012 static int
1013 lcall(char *sub, char **subargs)
1014 {
1015 	int retcode, retry = 0;
1016 	pid_t iwait, child;
1017 
1018 	for (; ; ) {
1019 		switch (child = fork()) {
1020 		default:
1021 			while ((iwait = wait(&retcode)) != child &&
1022 			    iwait != (pid_t)-1)
1023 				;
1024 			if (iwait == (pid_t)-1) {
1025 				perror(gettext("xargs: Wait failure"));
1026 				exit(122);
1027 				/* NOTREACHED */
1028 			}
1029 			if (WIFSIGNALED(retcode)) {
1030 				ermsg(gettext("Child killed with signal %d\n"),
1031 				    WTERMSIG(retcode));
1032 				exit(125);
1033 				/* NOTREACHED */
1034 			}
1035 			if ((WEXITSTATUS(retcode) & 0377) == 0377) {
1036 				ermsg(gettext("Command could not continue "
1037 				    "processing data\n"));
1038 				exit(124);
1039 				/* NOTREACHED */
1040 			}
1041 			return (WEXITSTATUS(retcode));
1042 		case 0:
1043 			(void) execvp(sub, subargs);
1044 			perror(gettext("xargs: Could not exec command"));
1045 			if (errno == EACCES)
1046 				exit(126);
1047 			exit(127);
1048 			/* NOTREACHED */
1049 		case -1:
1050 			if (errno != EAGAIN && retry++ < FORK_RETRY) {
1051 				perror(gettext("xargs: Could not fork child"));
1052 				exit(123);
1053 			}
1054 			(void) sleep(1);
1055 		}
1056 	}
1057 }
1058 
1059 
1060 /*
1061  * If `s2' is a substring of `s1' return the offset of the first
1062  * occurrence of `s2' in `s1', else return -1.
1063  */
1064 static int
1065 xindex(char *as1, char *as2)
1066 {
1067 	char	*s1, *s2, c;
1068 	int		offset;
1069 
1070 	s1 = as1;
1071 	s2 = as2;
1072 	c = *s2;
1073 
1074 	while (*s1) {
1075 		if (*s1++ == c) {
1076 			offset = s1 - as1 - 1;
1077 			s2++;
1078 			while ((c = *s2++) == *s1++ && c)
1079 				;
1080 			if (c == 0)
1081 				return (offset);
1082 			s1 = offset + as1 + 1;
1083 			s2 = as2;
1084 			c = *s2;
1085 		}
1086 	}
1087 	return (-1);
1088 }
1089 
1090 
1091 static void
1092 usage()
1093 {
1094 	ermsg(gettext(USAGEMSG));
1095 	OK = FALSE;
1096 }
1097 
1098 
1099 
1100 /*
1101  * parseargs():		modify the args
1102  *	since the -e, -i and -l flags all take optional subarguments,
1103  *	and getopts(3C) is clueless about this nonsense, we change the
1104  *	our local argument count and strings to separate this out,
1105  *	and make it easier to handle via getopts(3c).
1106  *
1107  *	-e	-> "-e ""
1108  *	-e3	-> "-e "3"
1109  *	-Estr	-> "-E "str"
1110  *	-i	-> "-i "{}"
1111  *	-irep	-> "-i "rep"
1112  *	-l	-> "-i "1"
1113  *	-l10	-> "-i "10"
1114  *
1115  *	since the -e, -i and -l flags all take optional subarguments,
1116  */
1117 static void
1118 parseargs(int ac, char **av)
1119 {
1120 	int i;			/* current argument			*/
1121 	int cflag;		/* 0 = not processing cmd arg		*/
1122 
1123 	if ((mav = malloc((ac * 2 + 1) * sizeof (char *))) == NULL) {
1124 		perror(gettext("xargs: Memory allocation failure"));
1125 		exit(1);
1126 	}
1127 
1128 	/* for each argument, see if we need to change things:		*/
1129 	for (i = mac = cflag = 0; (av[i] != NULL) && i < ac; i++, mac++) {
1130 		if ((mav[mac] = strdup(av[i])) == NULL) {
1131 			perror(gettext("xargs: Memory allocation failure"));
1132 			exit(1);
1133 		}
1134 
1135 		/* -- has been found or argument list is fully processes */
1136 		if (cflag)
1137 			continue;
1138 
1139 		/*
1140 		 * if we're doing special processing, and we've got a flag
1141 		 */
1142 		else if ((av[i][0] == '-') && (av[i][1] != NULL)) {
1143 			char	*def;
1144 
1145 			switch (av[i][1]) {
1146 			case	'e':
1147 				def = ""; /* -e with no arg turns off eof */
1148 				goto process_special;
1149 			case	'i':
1150 				def = INSPAT_STR;
1151 				goto process_special;
1152 			case	'l':
1153 				def = "1";
1154 process_special:
1155 				/*
1156 				 * if there's no sub-option, we *must* add
1157 				 * a default one. this is because xargs must
1158 				 * be able to distinguish between a valid
1159 				 * suboption, and a command name.
1160 				 */
1161 				if (av[i][2] == NULL) {
1162 					mav[++mac] = strdup(def);
1163 				} else {
1164 					/* clear out our version: */
1165 					mav[mac][2] = NULL;
1166 					mav[++mac] = strdup(&av[i][2]);
1167 				}
1168 				if (mav[mac] == NULL) {
1169 					perror(gettext("xargs: Memory"
1170 					    " allocation failure"));
1171 					exit(1);
1172 				}
1173 				break;
1174 
1175 			/* flags with required subarguments:		*/
1176 
1177 			/*
1178 			 * there are two separate cases here. either the
1179 			 * flag can have the normal XCU4 handling
1180 			 * (of the form: -X subargument); or it can have
1181 			 * the old solaris 2.[0-4] handling (of the
1182 			 * form: -Xsubargument). in order to maintain
1183 			 * backwards compatibility, we must support the
1184 			 * latter case. we handle the latter possibility
1185 			 * first so both the old solaris way of handling
1186 			 * and the new XCU4 way of handling things are allowed.
1187 			 */
1188 			case	'n':	/* FALLTHROUGH			*/
1189 			case	's':	/* FALLTHROUGH			*/
1190 			case	'E':	/* FALLTHROUGH			*/
1191 			case	'I':	/* FALLTHROUGH			*/
1192 			case	'L':
1193 				/*
1194 				 * if the second character isn't null, then
1195 				 * the user has specified the old syntax.
1196 				 * we move the subargument into our
1197 				 * mod'd argument list.
1198 				 */
1199 				if (av[i][2] != NULL) {
1200 					/* first clean things up:	*/
1201 					mav[mac][2] = NULL;
1202 
1203 					/* now add the separation:	*/
1204 					++mac;	/* inc to next mod'd arg */
1205 					if ((mav[mac] = strdup(&av[i][2])) ==
1206 					    NULL) {
1207 						perror(gettext("xargs: Memory"
1208 						    " allocation failure"));
1209 						exit(1);
1210 					}
1211 					break;
1212 				}
1213 				i++;
1214 				mac++;
1215 
1216 				if (av[i] == NULL) {
1217 					mav[mac] = NULL;
1218 					return;
1219 				}
1220 				if ((mav[mac] = strdup(av[i])) == NULL) {
1221 					perror(gettext("xargs: Memory"
1222 					    " allocation failure"));
1223 					exit(1);
1224 				}
1225 				break;
1226 
1227 			/* flags */
1228 			case 'p' :
1229 			case 't' :
1230 			case 'x' :
1231 				break;
1232 
1233 			case '-' :
1234 			default:
1235 				/*
1236 				 * here we've hit the cmd argument. so
1237 				 * we'll stop special processing, as the
1238 				 * cmd may have a "-i" etc., argument,
1239 				 * and we don't want to add a "" to it.
1240 				 */
1241 				cflag = 1;
1242 				break;
1243 			}
1244 		} else if (i > 0) {	/* if we're not the 1st arg	*/
1245 			/*
1246 			 * if it's not a flag, then it *must* be the cmd.
1247 			 * set cflag, so we don't mishandle the -[eil] flags.
1248 			 */
1249 			cflag = 1;
1250 		}
1251 	}
1252 
1253 	mav[mac] = NULL;
1254 }
1255 
1256 
1257 /*
1258  * saveinput(): pick up any pending input, so it can be processed later.
1259  *
1260  * description:
1261  *	the purpose of this routine is to allow us to handle the user
1262  *	typing in a 'y' or 'n', when there's existing characters already
1263  *	in stdin. this happens when one gives the "-n" option along with
1264  *	"-p". the problem occurs when the user first types in more arguments
1265  *	than specified by the -n number. echoargs() wants to read stdin
1266  *	in order to get the user's response, but if there's already stuff
1267  *	there, echoargs() won't read the proper character.
1268  *
1269  *	the solution provided by this routine is to pick up all characters
1270  *	(if any), and store them for later processing.
1271  */
1272 
1273 void
1274 saveinput()
1275 {
1276 	char *buffer;		/* ptr to the floating data buffer	*/
1277 	struct strpeek speek;	/* to see what's on the queue		*/
1278 	struct strpeek *ps;
1279 
1280 	/* if we're not in -p mode, skip				*/
1281 	if (PROMPT == -1) {
1282 		return;
1283 	}
1284 
1285 
1286 	/* now see if there's any activity pending:			*/
1287 	ps = &speek;
1288 	ps->ctlbuf.maxlen = 0;
1289 	ps->ctlbuf.len = 0;
1290 	ps->ctlbuf.buf = NULL;
1291 	ps->flags = 0;
1292 	ps->databuf.maxlen = MAX_INPUT;
1293 	ps->databuf.len = 0;
1294 	if ((buffer = malloc((size_t)MAX_INPUT)) == NULL) {
1295 		perror(gettext("xargs: Memory allocation failure"));
1296 		exit(1);
1297 	}
1298 	ps->databuf.buf = (char *)buffer;
1299 
1300 	if (ioctl(PROMPT, I_PEEK, ps) == -1) {
1301 		perror(gettext("xargs: I_PEEK failure"));
1302 		exit(1);
1303 	}
1304 
1305 	if (ps->databuf.len > 0) {
1306 		int	len;
1307 
1308 		if ((len = read(PROMPT, buffer, ps->databuf.len)) == -1) {
1309 			perror(gettext("xargs: read failure"));
1310 			exit(1);
1311 		}
1312 		queue(buffer, len, TAIL);
1313 	}
1314 }
1315