xref: /titanic_52/usr/src/cmd/xargs/xargs.c (revision b26a64ae582e72d0b4c710cd8eba9c4afd4a9fdd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <stdio.h>
33 #include <sys/types.h>
34 #include <sys/wait.h>
35 #include <unistd.h>
36 #include <fcntl.h>
37 #include <string.h>
38 #include <stdarg.h>
39 #include <stdlib.h>
40 #include <limits.h>
41 #include <wchar.h>
42 #include <locale.h>
43 #include <langinfo.h>
44 #include <stropts.h>
45 #include <poll.h>
46 #include <errno.h>
47 #include <stdarg.h>
48 #include "getresponse.h"
49 
50 #define	HEAD	0
51 #define	TAIL	1
52 #define	FALSE 0
53 #define	TRUE 1
54 #define	MAXSBUF 255
55 #define	MAXIBUF 512
56 #define	MAXINSERTS 5
57 #define	BUFSIZE LINE_MAX
58 #define	MAXARGS 255
59 #define	INSPAT_STR	"{}"	/* default replstr string for -[Ii]	*/
60 #define	FORK_RETRY	5
61 
62 #define	QBUF_STARTLEN 255  /* start size of growable string buffer */
63 #define	QBUF_INC 100	   /* how much to grow a growable string by */
64 
65 static wctype_t	blank;
66 static char	*arglist[MAXARGS+1];
67 static char	argbuf[BUFSIZE+1];
68 static char	*next = argbuf;
69 static char	*lastarg = "";
70 static char	**ARGV = arglist;
71 static char	*LEOF = "_";
72 static char	*INSPAT = INSPAT_STR;
73 static char	ins_buf[MAXIBUF];
74 static char	*p_ibuf;
75 
76 static struct inserts {
77 	char	**p_ARGV;	/* where to put newarg ptr in arg list */
78 	char	*p_skel;	/* ptr to arg template */
79 } saveargv[MAXINSERTS];
80 
81 static off_t	file_offset = 0;
82 static int	PROMPT = -1;
83 static int	BUFLIM = BUFSIZE;
84 static int	N_ARGS = 0;
85 static int	N_args = 0;
86 static int	N_lines = 0;
87 static int	DASHX = FALSE;
88 static int	MORE = TRUE;
89 static int	PER_LINE = FALSE;
90 static int	ERR = FALSE;
91 static int	OK = TRUE;
92 static int	LEGAL = FALSE;
93 static int	TRACE = FALSE;
94 static int	INSERT = FALSE;
95 static int	linesize = 0;
96 static int	ibufsize = 0;
97 static int	exitstat = 0;	/* our exit status			*/
98 static int	mac;		/* modified argc, after parsing		*/
99 static char	**mav;		/* modified argv, after parsing		*/
100 static int	n_inserts;	/* # of insertions.			*/
101 static int	inquote = 0;	/* processing a quoted string		*/
102 
103 /*
104  * the pio structure is used to save any pending input before the
105  * user replies to a prompt. the pending input is saved here,
106  * for the appropriate processing later.
107  */
108 typedef struct pio {
109 	struct pio *next;	/* next in stack			*/
110 	char *start;		/* starting addr of the buffer		*/
111 	char *cur;		/* ptr to current char in buf		*/
112 	size_t length;		/* number of bytes remaining		*/
113 } pio;
114 
115 static pio *queued_data = NULL;
116 
117 /* our usage message:							*/
118 #define	USAGEMSG "Usage: xargs: [-t] [-p] [-e[eofstr]] [-E eofstr] "\
119 	"[-I replstr] [-i[replstr]] [-L #] [-l[#]] [-n # [-x]] [-s size] "\
120 	"[cmd [args ...]]\n"
121 
122 static int	echoargs();
123 static int	getchr(void);
124 static wchar_t	getwchr(void);
125 static void	ungetwchr(wchar_t);
126 static int	lcall(char *sub, char **subargs);
127 static int	xindex(char *as1, char *as2);
128 static void	addibuf(struct inserts *p);
129 static void	ermsg(char *messages, ...);
130 static char	*addarg(char *arg);
131 static char	*checklen(char *arg);
132 static size_t   store_wchr(char **, size_t *, size_t, wchar_t);
133 static char	*getarg();
134 static char	*insert(char *pattern, char *subst);
135 static void	usage();
136 static void	parseargs();
137 static void	saveinput();
138 
139 int
140 main(int argc, char **argv)
141 {
142 	int	j;
143 	struct inserts *psave;
144 	int c;
145 	int	initsize;
146 	char	*cmdname, *initbuf, **initlist;
147 
148 
149 	/* initialization */
150 	blank = wctype("blank");
151 	n_inserts = 0;
152 	psave = saveargv;
153 	(void) setlocale(LC_ALL, "");
154 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D 		*/
155 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't 		*/
156 #endif
157 	(void) textdomain(TEXT_DOMAIN);
158 	if (init_yes() < 0) {
159 		ermsg(gettext(ERR_MSG_INIT_YES), strerror(errno));
160 		exit(1);
161 	}
162 
163 	parseargs(argc, argv);
164 
165 	/* handling all of xargs arguments:				*/
166 	while ((c = getopt(mac, mav, "tpe:E:I:i:L:l:n:s:x")) != EOF) {
167 		switch (c) {
168 		case 't':	/* -t: turn trace mode on		*/
169 			TRACE = TRUE;
170 			break;
171 
172 		case 'p':	/* -p: turn on prompt mode.		*/
173 			if ((PROMPT = open("/dev/tty", O_RDONLY)) == -1) {
174 				perror(gettext("can't read from tty for -p"));
175 			} else {
176 				TRACE = TRUE;
177 			}
178 			break;
179 
180 		case 'e':
181 			/*
182 			 * -e[eofstr]: set/disable end-of-file.
183 			 * N.B. that an argument *isn't* required here; but
184 			 * parseargs forced an argument if not was given.  The
185 			 * forced argument is the default...
186 			 */
187 			LEOF = optarg; /* can be empty */
188 			break;
189 
190 		case 'E':
191 			/*
192 			 * -E eofstr: change end-of-file string.
193 			 * eofstr *is* required here, but can be empty:
194 			 */
195 			LEOF = optarg;
196 			break;
197 
198 		case 'I':
199 			/* -I replstr: Insert mode. replstr *is* required. */
200 			INSERT = PER_LINE = LEGAL = TRUE;
201 			N_ARGS = 0;
202 			INSPAT = optarg;
203 			if (*optarg == '\0') {
204 				ermsg(gettext(
205 				    "Option requires an argument: -%c\n"), c);
206 			}
207 			break;
208 
209 		case 'i':
210 			/*
211 			 * -i [replstr]: insert mode, with *optional* replstr.
212 			 * N.B. that an argument *isn't* required here; if
213 			 * it's not given, then the string INSPAT_STR will
214 			 * be assumed.
215 			 *
216 			 * Since getopts(3C) doesn't handle the case of an
217 			 * optional variable argument at all, we have to
218 			 * parse this by hand:
219 			 */
220 
221 			INSERT = PER_LINE = LEGAL = TRUE;
222 			N_ARGS = 0;
223 			if ((optarg != NULL) && (*optarg != '\0')) {
224 				INSPAT = optarg;
225 			} else {
226 				/*
227 				 * here, there is no next argument. so
228 				 * we reset INSPAT to the INSPAT_STR.
229 				 * we *have* to do this, as -i/I may have
230 				 * been given previously, and XCU4 requires
231 				 * that only "the last one specified takes
232 				 * effect".
233 				 */
234 				INSPAT = INSPAT_STR;
235 			}
236 			break;
237 
238 		case 'L':
239 			/*
240 			 * -L number: # of times cmd is executed
241 			 * number *is* required here:
242 			 */
243 			PER_LINE = TRUE;
244 			N_ARGS = 0;
245 			INSERT = FALSE;
246 			if ((PER_LINE = atoi(optarg)) <= 0) {
247 				ermsg(gettext("#lines must be positive "
248 				    "int: %s\n"), optarg);
249 			}
250 			break;
251 
252 		case 'l':
253 			/*
254 			 * -l [number]: # of times cmd is executed
255 			 * N.B. that an argument *isn't* required here; if
256 			 * it's not given, then 1 is assumed.
257 			 *
258 			 * parseargs handles the optional arg processing.
259 			 */
260 
261 			PER_LINE = LEGAL = TRUE;  /* initialization	*/
262 			N_ARGS = 0;
263 			INSERT = FALSE;
264 
265 			if ((optarg != NULL) && (*optarg != '\0')) {
266 				if ((PER_LINE = atoi(optarg)) <= 0)
267 					PER_LINE = 1;
268 			}
269 			break;
270 
271 		case 'n':	/* -n number: # stdin args		*/
272 			/*
273 			 * -n number: # stdin args.
274 			 * number *is* required here:
275 			 */
276 			if ((N_ARGS = atoi(optarg)) <= 0) {
277 				ermsg(gettext("#args must be positive "
278 				    "int: %s\n"), optarg);
279 			} else {
280 				LEGAL = DASHX || N_ARGS == 1;
281 				INSERT = PER_LINE = FALSE;
282 			}
283 			break;
284 
285 		case 's':	/* -s size: set max size of each arg list */
286 			BUFLIM = atoi(optarg);
287 			if (BUFLIM > BUFSIZE || BUFLIM <= 0) {
288 				ermsg(gettext(
289 				    "0 < max-cmd-line-size <= %d: "
290 				    "%s\n"), BUFSIZE, optarg);
291 			}
292 			break;
293 
294 		case 'x':	/* -x: terminate if args > size limit	*/
295 			DASHX = LEGAL = TRUE;
296 			break;
297 
298 		default:
299 			/*
300 			 * bad argument. complain and get ready to die.
301 			 */
302 			ERR = TRUE;
303 			usage();
304 
305 			exit(2);
306 			break;
307 		}
308 	}
309 
310 	/*
311 	 * if anything called ermsg(), something screwed up, so
312 	 * we exit early.
313 	 */
314 	if (OK == FALSE) {
315 		ERR = TRUE;
316 		usage();
317 		exit(2);
318 	}
319 
320 	/*
321 	 * we're finished handling xargs's options, so now pick up
322 	 * the command name (if any), and it's options.
323 	 */
324 
325 
326 	mac -= optind;	/* dec arg count by what we've processed 	*/
327 	mav += optind;	/* inc to current mav				*/
328 
329 	if (mac <= 0) {	/* if there're no more args to process,	*/
330 		cmdname = "/usr/bin/echo";	/* our default command	*/
331 		*ARGV++ = addarg(cmdname);	/* use the default cmd.	*/
332 	} else {	/* otherwise keep parsing rest of the string.	*/
333 		/*
334 		 * note that we can't use getopts(3C), and *must* parse
335 		 * this by hand, as we don't know apriori what options the
336 		 * command will take.
337 		 */
338 		cmdname = *mav;	/* get the command name	*/
339 
340 
341 		/* pick up the remaining args from the command line:	*/
342 		while ((OK == TRUE) && (mac-- > 0)) {
343 			/*
344 			 * while we haven't crapped out, and there's
345 			 * work to do:
346 			 */
347 			if (INSERT && ! ERR) {
348 				if (xindex(*mav, INSPAT) != -1) {
349 					if (++n_inserts > MAXINSERTS) {
350 						ermsg(gettext("too many args "
351 						    "with %s\n"), INSPAT);
352 						ERR = TRUE;
353 					}
354 					psave->p_ARGV = ARGV;
355 					(psave++)->p_skel = *mav;
356 				}
357 			}
358 			*ARGV++ = addarg(*mav++);
359 		}
360 	}
361 
362 	/* pick up args from standard input */
363 
364 	initbuf = next;
365 	initlist = ARGV;
366 	initsize = linesize;
367 
368 	while (OK && MORE) {
369 		N_args = 0;
370 		N_lines = 0;
371 		next = initbuf;
372 		ARGV = initlist;
373 		linesize = initsize;
374 		if (*lastarg) {
375 			*ARGV++ = addarg(lastarg);
376 			lastarg = "";
377 		}
378 
379 		while (((ARGV - arglist) < MAXARGS) &&
380 		    ((*ARGV++ = getarg()) != NULL) && OK)
381 			;
382 
383 		/* insert arg if requested */
384 
385 		if (!ERR && INSERT) {
386 			if ((!MORE) && (N_lines == 0)) {
387 				exit(exitstat);
388 			}
389 					/* no more input lines */
390 			p_ibuf = ins_buf;
391 			ARGV--;
392 			j = ibufsize = 0;
393 			for (psave = saveargv; ++j <= n_inserts; ++psave) {
394 				addibuf(psave);
395 				if (ERR)
396 					break;
397 			}
398 		}
399 		*ARGV = 0;
400 
401 		if (n_inserts > 0) {
402 			int t_ninserts;
403 
404 			/*
405 			 * if we've done any insertions, re-calculate the
406 			 * linesize. bomb out if we've exceeded our length.
407 			 */
408 			t_ninserts = n_inserts;
409 			n_inserts = 0;	/* inserts have been done 	*/
410 			linesize = 0;	/* recalculate this		*/
411 
412 			/* for each current argument in the list:	*/
413 			for (ARGV = arglist; *ARGV != NULL; ARGV++) {
414 				/* recalculate everything.		*/
415 				if (checklen(*ARGV) != 0) {
416 					if (N_ARGS && (N_args >= N_ARGS)) {
417 						N_lines = N_args = 0;
418 						OK = FALSE;
419 						ERR = TRUE;
420 					}
421 				}
422 			}
423 			n_inserts = t_ninserts;
424 		}
425 
426 		/* exec command */
427 
428 		if (!ERR) {
429 			if (!MORE &&
430 			    (PER_LINE && N_lines == 0 || N_ARGS && N_args == 0))
431 				exit(exitstat);
432 			OK = TRUE;
433 			j = TRACE ? echoargs() : TRUE;
434 			if (j) {
435 				/*
436 				 * for xcu4, all invocations of cmdname must
437 				 * return 0, in order for us to return 0.
438 				 * so if we have a non-zero status here,
439 				 * quit immediately.
440 				 */
441 				if ((exitstat |= lcall(cmdname, arglist)) == 0)
442 					continue;
443 			}
444 		}
445 	}
446 
447 	(void) lseek(0, file_offset, SEEK_SET);
448 	if (OK) {
449 		return (exitstat);
450 	} else {
451 		/*
452 		 * if exitstat was set, to match XCU4 complience,
453 		 * return that value, otherwise, return 1.
454 		 */
455 		return (exitstat ? exitstat : 1);
456 	}
457 }
458 
459 static void
460 queue(char *buffer, int len, int where)
461 {
462 	pio *new, *element;
463 
464 	if ((new = malloc(sizeof (pio))) == NULL) {
465 		perror(gettext("xargs: Memory allocation failure"));
466 		exit(1);
467 	}
468 	new->cur = new->start = buffer;
469 	new->length = len;
470 
471 	if (where == TAIL) {
472 		new->next = NULL;
473 		if (queued_data == NULL) {
474 			queued_data = new;
475 		} else {
476 			element = queued_data;
477 			while (element->next != NULL) {
478 				element = element->next;
479 			}
480 			element->next = new;
481 		}
482 	} else {
483 		file_offset -= len;
484 		new->next = queued_data;
485 		queued_data = new;
486 	}
487 }
488 
489 static char *
490 checklen(char *arg)
491 {
492 	int	oklen;
493 
494 	oklen = TRUE;
495 	linesize += strlen(arg) + 1;
496 	if (linesize >= BUFLIM) {
497 		/*
498 		 * we skip this if there're inserts. we'll handle the
499 		 * argument counting after all the insertions have
500 		 * been done.
501 		 */
502 		if (n_inserts == 0) {
503 			lastarg = arg;
504 			oklen = OK = FALSE;
505 
506 			if (LEGAL) {
507 				ERR = TRUE;
508 				ermsg(gettext("arg list too long\n"));
509 			} else if (N_args > 1) {
510 				N_args = 1;
511 			} else {
512 				ermsg(gettext("a single arg was greater than "
513 				    "the max arglist size of %d characters\n"),
514 				    BUFLIM);
515 				ERR = TRUE;
516 			}
517 		}
518 	}
519 	return (oklen ? arg : 0);
520 }
521 
522 static char *
523 addarg(char *arg)
524 {
525 	if (checklen(arg) != 0) {
526 		(void) strcpy(next, arg);
527 		arg = next;
528 		next += strlen(arg) + 1;
529 		return (arg);
530 	}
531 	return ((char *)0);
532 }
533 
534 /*
535  * store_wchr() : append a wchar_t to a char buffer, resize buffer if required.
536  *
537  *     Given a pointer to the beginning of a string buffer, the length of the
538  *     buffer and an offset indicating the next place to write within that
539  *     buffer, the passed wchar_t will be appended to the buffer if there is
540  *     enough space. If there is not enough space, an attempt to reallocate the
541  *     buffer will be made and if successful the passed pointer and size will be
542  *     updated to describe the reallocated block. Returns the new value for
543  *     'offset' (it will be incremented by the number of bytes written).
544  */
545 static size_t
546 store_wchr(char **buffer, size_t *buflen, size_t offset, wchar_t c)
547 {
548 	int bytes;
549 
550 	/*
551 	 * Make sure that there is enough room in the buffer to store the
552 	 * maximum length of c.
553 	 */
554 	if ((offset + MB_CUR_MAX) > *buflen) {
555 		/*
556 		 * Not enough room so attempt to reallocate. Add 'MB_CUR_MAX' to
557 		 * buffer length to ensure that there is always enough room to
558 		 * store 'c' if realloc succeeds, no matter what QBUF_INC is
559 		 * defined as.
560 		 */
561 		*buflen += (QBUF_INC + MB_CUR_MAX);
562 		if ((*buffer = realloc(*buffer, *buflen)) == NULL) {
563 			perror(gettext("xargs: Memory allocation failure"));
564 			exit(1);
565 		}
566 	}
567 	/* store bytes from wchar into buffer */
568 	bytes = wctomb(*buffer + offset, c);
569 	if (bytes == -1) {
570 		/* char was invalid */
571 		bytes = 1;
572 		*(*buffer + offset) = (char)c;
573 	}
574 
575 	/* return new value for offset */
576 	return (offset + bytes);
577 }
578 
579 static char *
580 getarg()
581 {
582 	int	bytes;
583 	wchar_t	c;
584 	char	*arg;
585 	char	*retarg, *requeue_buf;
586 	size_t  requeue_offset = 0, requeue_len;
587 	char	mbc[MB_LEN_MAX];
588 
589 	while (iswspace(c = getwchr()) || c == '\n')
590 		;
591 
592 	if (c == '\0') {
593 		MORE = FALSE;
594 		return (0);
595 	}
596 
597 	/*
598 	 * While we are reading in an argument, it is possible that we will
599 	 * reach the maximum length of the overflow buffer and we'll have to
600 	 * requeue what we have read so far. To handle this we allocate an
601 	 * initial buffer here which will keep an unprocessed copy of the data
602 	 * that we read in (this buffer will grow as required).
603 	 */
604 	requeue_len = (size_t)QBUF_STARTLEN;
605 	if ((requeue_buf = (char *)malloc(requeue_len)) == NULL) {
606 		perror(gettext("xargs: Memory allocation failure"));
607 		exit(1);
608 	}
609 
610 	for (arg = next; ; c = getwchr()) {
611 		bytes = wctomb(mbc, c);
612 
613 		/*
614 		 * Store the char that we have read before processing it in case
615 		 * the current argument needs to be requeued.
616 		 */
617 		requeue_offset = store_wchr(&requeue_buf, &requeue_len,
618 		    requeue_offset, c);
619 
620 		/* Check for overflow the input buffer */
621 		if ((next + ((bytes == -1) ? 1 : bytes)) >= &argbuf[BUFLIM]) {
622 			/*
623 			 * It's only an error if there are no Args in buffer
624 			 * already.
625 			 */
626 			if ((N_ARGS || PER_LINE) && LEGAL) {
627 				ERR = TRUE;
628 				ermsg(gettext("Argument list too long\n"));
629 				free(requeue_buf);
630 				return (0);
631 			} else if (N_args == 0) {
632 				lastarg = "";
633 				ERR = TRUE;
634 				ermsg(gettext("A single arg was greater than "
635 				    "the max arglist size of %d characters\n"),
636 				    BUFSIZE);
637 				free(requeue_buf);
638 				return (0);
639 			}
640 			/*
641 			 * Otherwise we put back the current argument
642 			 * and use what we have collected so far...
643 			 */
644 			queue(requeue_buf, requeue_offset, HEAD);
645 			/* reset inquote because we have requeued the quotes */
646 			inquote = 0;
647 			return (NULL);
648 		}
649 
650 
651 		if (iswctype(c, blank) && inquote == 0) {
652 			if (INSERT) {
653 				if (bytes == -1) {
654 					*next++ = (char)c;
655 				} else {
656 					(void) wctomb(next, c);
657 					next += bytes;
658 				}
659 				continue;
660 			}
661 
662 			/* skip over trailing whitespace till next arg */
663 			while (iswctype((c = getwchr()), blank) &&
664 			    (c != '\n') && (c != '\0'))
665 				;
666 
667 			/*
668 			 * if there was space till end of line then the last
669 			 * character was really a newline...
670 			 */
671 			if (c == L'\n' || c == L'\0') {
672 				ungetwchr(L'\n');
673 			} else {
674 				/* later code needs to know this was a space */
675 				ungetwchr(c);
676 				c = L' ';
677 			}
678 			goto end_arg;
679 		}
680 		switch (c) {
681 		case L'\0':
682 		case L'\n':
683 			if (inquote) {
684 				*next++ = '\0';
685 				ermsg(gettext("Missing quote: %s\n"), arg);
686 				ERR = TRUE;
687 				free(requeue_buf);
688 				return (0);
689 			}
690 
691 			N_lines++;
692 end_arg:		*next++ = '\0';
693 			/* we finished without requeuing so free requeue_buf */
694 			free(requeue_buf);
695 			if ((strcmp(arg, LEOF) == 0 && *LEOF != '\0') ||
696 			    (c == '\0' && strlen(arg) == 0)) {
697 				MORE = FALSE;
698 				/* absorb the rest of the line */
699 				if ((c != '\n') && (c != '\0'))
700 					while (c = getwchr())
701 						if ((c == '\n') || (c == '\0'))
702 							break;
703 				if (strcmp(arg, LEOF) == 0 && *LEOF != '\0') {
704 					/*
705 					 * Encountered EOF string.
706 					 * Don't read any more lines.
707 					 */
708 					N_lines = 0;
709 				}
710 				return (0);
711 			} else {
712 				++N_args;
713 				if (retarg = checklen(arg)) {
714 					if ((PER_LINE &&
715 					    N_lines >= PER_LINE &&
716 					    (c == '\0' || c == '\n')) ||
717 					    (N_ARGS && N_args >= N_ARGS)) {
718 						N_lines = N_args = 0;
719 						lastarg = "";
720 						OK = FALSE;
721 					}
722 				}
723 				return (retarg);
724 			}
725 
726 		case '"':
727 			if (inquote == 1)	/* in single quoted string */
728 				goto is_default;
729 			if (inquote == 2)	/* terminating double quote */
730 				inquote = 0;
731 			else			/* starting quoted string */
732 				inquote = 2;
733 			break;
734 
735 		case '\'':
736 			if (inquote == 2)	/* in double quoted string */
737 				goto is_default;
738 			if (inquote == 1)	/* terminating single quote */
739 				inquote = 0;
740 			else			/* starting quoted string */
741 				inquote = 1;
742 			break;
743 
744 		case L'\\':
745 			/*
746 			 * Any unquoted character can be escaped by
747 			 * preceding it with a backslash.
748 			 */
749 			if (inquote == 0) {
750 				c = getwchr();
751 				/* store quoted char for potential requeueing */
752 				requeue_offset = store_wchr(&requeue_buf,
753 				    &requeue_len, requeue_offset, c);
754 			}
755 
756 		default:
757 is_default:		if (bytes == -1) {
758 				*next++ = (char)c;
759 			} else {
760 				(void) wctomb(next, c);
761 				next += bytes;
762 			}
763 			break;
764 		}
765 	}
766 }
767 
768 
769 /*
770  * ermsg():	print out an error message, and indicate failure globally.
771  *
772  *	Assumes that message has already been gettext()'d. It would be
773  *	nice if we could just do the gettext() here, but we can't, since
774  *	since xgettext(1M) wouldn't be able to pick up our error message.
775  */
776 /* PRINTFLIKE1 */
777 static void
778 ermsg(char *messages, ...)
779 {
780 	va_list	ap;
781 
782 	va_start(ap, messages);
783 
784 	(void) fprintf(stderr, "xargs: ");
785 	(void) vfprintf(stderr, messages, ap);
786 
787 	va_end(ap);
788 	OK = FALSE;
789 }
790 
791 static int
792 echoargs()
793 {
794 	char	**anarg;
795 	char	**tanarg;	/* tmp ptr			*/
796 	int		i;
797 	char		reply[LINE_MAX];
798 
799 	tanarg = anarg = arglist-1;
800 
801 	/*
802 	 * write out each argument, separated by a space. the tanarg
803 	 * nonsense is for xcu4 testsuite compliance - so that an
804 	 * extra space isn't echoed after the last argument.
805 	 */
806 	while (*++anarg) {		/* while there's an argument	*/
807 		++tanarg;		/* follow anarg			*/
808 		(void) write(2, *anarg, strlen(*anarg));
809 
810 		if (*++tanarg) {	/* if there's another argument:	*/
811 			(void) write(2, " ", 1); /* add a space		*/
812 			--tanarg;	/* reset back to anarg		*/
813 		}
814 	}
815 	if (PROMPT == -1) {
816 		(void) write(2, "\n", 1);
817 		return (TRUE);
818 	}
819 
820 	/*
821 	 * at this point, there may be unexpected input pending on stdin,
822 	 * if one has used the -n flag. this presents a problem, because
823 	 * if we simply do a read(), we'll get the extra input, instead
824 	 * of our desired y/n input. so, we see if there's any extra
825 	 * input, and if there is, then we will store it.
826 	 */
827 	saveinput();
828 
829 	(void) write(2, "?...", 4);	/* ask the user for input	*/
830 
831 	for (i = 0; i < LINE_MAX && read(PROMPT, &reply[i], 1) > 0; i++) {
832 		if (reply[i] == '\n') {
833 			if (i == 0)
834 				return (FALSE);
835 			break;
836 		}
837 	}
838 	reply[i] = 0;
839 
840 	/* flush remainder of line if necessary */
841 	if (i == LINE_MAX) {
842 		char	bitbucket;
843 
844 		while ((read(PROMPT, &bitbucket, 1) > 0) && (bitbucket != '\n'))
845 			;
846 	}
847 
848 	return (yes_check(reply));
849 }
850 
851 
852 static char *
853 insert(char *pattern, char *subst)
854 {
855 	static char	buffer[MAXSBUF+1];
856 	int		len, ipatlen;
857 	char	*pat;
858 	char	*bufend;
859 	char	*pbuf;
860 
861 	len = strlen(subst);
862 	ipatlen = strlen(INSPAT) - 1;
863 	pat = pattern - 1;
864 	pbuf = buffer;
865 	bufend = &buffer[MAXSBUF];
866 
867 	while (*++pat) {
868 		if (xindex(pat, INSPAT) == 0) {
869 			if (pbuf + len >= bufend) {
870 				break;
871 			} else {
872 				(void) strcpy(pbuf, subst);
873 				pat += ipatlen;
874 				pbuf += len;
875 			}
876 		} else {
877 			*pbuf++ = *pat;
878 			if (pbuf >= bufend)
879 				break;
880 		}
881 	}
882 
883 	if (!*pat) {
884 		*pbuf = '\0';
885 		return (buffer);
886 	} else {
887 		ermsg(gettext("Maximum argument size with insertion via %s's "
888 		    "exceeded\n"), INSPAT);
889 		ERR = TRUE;
890 		return (0);
891 	}
892 }
893 
894 
895 static void
896 addibuf(struct inserts	*p)
897 {
898 	char	*newarg, *skel, *sub;
899 	int		l;
900 
901 	skel = p->p_skel;
902 	sub = *ARGV;
903 	linesize -= strlen(skel) + 1;
904 	newarg = insert(skel, sub);
905 	if (ERR)
906 		return;
907 
908 	if (checklen(newarg)) {
909 		if ((ibufsize += (l = strlen(newarg) + 1)) > MAXIBUF) {
910 			ermsg(gettext("Insert buffer overflow\n"));
911 			ERR = TRUE;
912 		}
913 		(void) strcpy(p_ibuf, newarg);
914 		*(p->p_ARGV) = p_ibuf;
915 		p_ibuf += l;
916 	}
917 }
918 
919 
920 /*
921  * getchr():	get the next character.
922  * description:
923  *	we get the next character from pio.structure, if there's a character
924  *	to get. this may happen when we've had to flush stdin=/dev/tty,
925  *	but still wanted to preserve the characters for later processing.
926  *
927  *	otherwise we just get the character from stdin.
928  */
929 static int
930 getchr(void)
931 {
932 	char	c;
933 
934 	do {
935 		if (queued_data == NULL) {
936 			char	*buffer;
937 			int	len;
938 
939 			if ((buffer = malloc(BUFSIZE)) == NULL) {
940 				perror(gettext(
941 				    "xargs: Memory allocation failure"));
942 				exit(1);
943 			}
944 
945 			if ((len = read(0, buffer, BUFSIZE)) == 0)
946 				return (0);
947 			if (len == -1) {
948 				perror(gettext("xargs: Read failure"));
949 				exit(1);
950 			}
951 
952 			queue(buffer, len, TAIL);
953 		}
954 
955 		file_offset++;
956 		c = *queued_data->cur++;	 /* get the next character */
957 		if (--queued_data->length == 0) { /* at the end of buffer? */
958 			pio	*nxt = queued_data->next;
959 
960 			free(queued_data->start);
961 			free(queued_data);
962 			queued_data = nxt;
963 		}
964 	} while (c == '\0');
965 	return (c);
966 }
967 
968 
969 static wchar_t
970 getwchr(void)
971 {
972 	int		i;
973 	wchar_t		wch;
974 	unsigned char	buffer[MB_LEN_MAX + 1];
975 
976 	for (i = 0; i < (int)MB_CUR_MAX; ) {
977 		if ((buffer[i++] = getchr()) == NULL) {
978 			/* We have reached  EOF */
979 			if (i == 1) {
980 				/* TRUE EOF has been reached */
981 				return (NULL);
982 			}
983 			/*
984 			 * We have some characters in our buffer still so it
985 			 * must be an invalid character right before EOF.
986 			 */
987 			break;
988 		}
989 
990 		/* If this succeeds then we are done */
991 		if (mbtowc(&wch, (char *)buffer, i) != -1)
992 			return (wch);
993 	}
994 
995 	/*
996 	 * We have now encountered an illegal character sequence.
997 	 * There is nothing much we can do at this point but
998 	 * return an error.  If we attempt to recover we may in fact
999 	 * return garbage as arguments, from the customer's point
1000 	 * of view.  After all what if they are feeding us a file
1001 	 * generated in another locale?
1002 	 */
1003 	errno = EILSEQ;
1004 	perror(gettext("xargs: Corrupt input file"));
1005 	exit(1);
1006 	/* NOTREACHED */
1007 }
1008 
1009 
1010 static void
1011 ungetwchr(wchar_t wch)
1012 {
1013 	char	*buffer;
1014 	int	bytes;
1015 
1016 	if ((buffer = malloc(MB_LEN_MAX)) == NULL) {
1017 		perror(gettext("xargs: Memory allocation failure"));
1018 		exit(1);
1019 	}
1020 	bytes = wctomb(buffer, wch);
1021 	queue(buffer, bytes, HEAD);
1022 }
1023 
1024 
1025 static int
1026 lcall(char *sub, char **subargs)
1027 {
1028 	int retcode, retry = 0;
1029 	pid_t iwait, child;
1030 
1031 	for (; ; ) {
1032 		switch (child = fork()) {
1033 		default:
1034 			while ((iwait = wait(&retcode)) != child &&
1035 			    iwait != (pid_t)-1)
1036 				;
1037 			if (iwait == (pid_t)-1) {
1038 				perror(gettext("xargs: Wait failure"));
1039 				exit(122);
1040 				/* NOTREACHED */
1041 			}
1042 			if (WIFSIGNALED(retcode)) {
1043 				ermsg(gettext("Child killed with signal %d\n"),
1044 				    WTERMSIG(retcode));
1045 				exit(125);
1046 				/* NOTREACHED */
1047 			}
1048 			if ((WEXITSTATUS(retcode) & 0377) == 0377) {
1049 				ermsg(gettext("Command could not continue "
1050 				    "processing data\n"));
1051 				exit(124);
1052 				/* NOTREACHED */
1053 			}
1054 			return (WEXITSTATUS(retcode));
1055 		case 0:
1056 			(void) execvp(sub, subargs);
1057 			perror(gettext("xargs: Could not exec command"));
1058 			if (errno == EACCES)
1059 				exit(126);
1060 			exit(127);
1061 			/* NOTREACHED */
1062 		case -1:
1063 			if (errno != EAGAIN && retry++ < FORK_RETRY) {
1064 				perror(gettext("xargs: Could not fork child"));
1065 				exit(123);
1066 			}
1067 			(void) sleep(1);
1068 		}
1069 	}
1070 }
1071 
1072 
1073 /*
1074  * If `s2' is a substring of `s1' return the offset of the first
1075  * occurrence of `s2' in `s1', else return -1.
1076  */
1077 static int
1078 xindex(char *as1, char *as2)
1079 {
1080 	char	*s1, *s2, c;
1081 	int		offset;
1082 
1083 	s1 = as1;
1084 	s2 = as2;
1085 	c = *s2;
1086 
1087 	while (*s1) {
1088 		if (*s1++ == c) {
1089 			offset = s1 - as1 - 1;
1090 			s2++;
1091 			while ((c = *s2++) == *s1++ && c)
1092 				;
1093 			if (c == 0)
1094 				return (offset);
1095 			s1 = offset + as1 + 1;
1096 			s2 = as2;
1097 			c = *s2;
1098 		}
1099 	}
1100 	return (-1);
1101 }
1102 
1103 
1104 static void
1105 usage()
1106 {
1107 	ermsg(gettext(USAGEMSG));
1108 	OK = FALSE;
1109 }
1110 
1111 
1112 
1113 /*
1114  * parseargs():		modify the args
1115  *	since the -e, -i and -l flags all take optional subarguments,
1116  *	and getopts(3C) is clueless about this nonsense, we change the
1117  *	our local argument count and strings to separate this out,
1118  *	and make it easier to handle via getopts(3c).
1119  *
1120  *	-e	-> "-e ""
1121  *	-e3	-> "-e "3"
1122  *	-Estr	-> "-E "str"
1123  *	-i	-> "-i "{}"
1124  *	-irep	-> "-i "rep"
1125  *	-l	-> "-i "1"
1126  *	-l10	-> "-i "10"
1127  *
1128  *	since the -e, -i and -l flags all take optional subarguments,
1129  */
1130 static void
1131 parseargs(int ac, char **av)
1132 {
1133 	int i;			/* current argument			*/
1134 	int cflag;		/* 0 = not processing cmd arg		*/
1135 
1136 	if ((mav = malloc((ac * 2 + 1) * sizeof (char *))) == NULL) {
1137 		perror(gettext("xargs: Memory allocation failure"));
1138 		exit(1);
1139 	}
1140 
1141 	/* for each argument, see if we need to change things:		*/
1142 	for (i = mac = cflag = 0; (av[i] != NULL) && i < ac; i++, mac++) {
1143 		if ((mav[mac] = strdup(av[i])) == NULL) {
1144 			perror(gettext("xargs: Memory allocation failure"));
1145 			exit(1);
1146 		}
1147 
1148 		/* -- has been found or argument list is fully processes */
1149 		if (cflag)
1150 			continue;
1151 
1152 		/*
1153 		 * if we're doing special processing, and we've got a flag
1154 		 */
1155 		else if ((av[i][0] == '-') && (av[i][1] != NULL)) {
1156 			char	*def;
1157 
1158 			switch (av[i][1]) {
1159 			case	'e':
1160 				def = ""; /* -e with no arg turns off eof */
1161 				goto process_special;
1162 			case	'i':
1163 				def = INSPAT_STR;
1164 				goto process_special;
1165 			case	'l':
1166 				def = "1";
1167 process_special:
1168 				/*
1169 				 * if there's no sub-option, we *must* add
1170 				 * a default one. this is because xargs must
1171 				 * be able to distinguish between a valid
1172 				 * suboption, and a command name.
1173 				 */
1174 				if (av[i][2] == NULL) {
1175 					mav[++mac] = strdup(def);
1176 				} else {
1177 					/* clear out our version: */
1178 					mav[mac][2] = NULL;
1179 					mav[++mac] = strdup(&av[i][2]);
1180 				}
1181 				if (mav[mac] == NULL) {
1182 					perror(gettext("xargs: Memory"
1183 					    " allocation failure"));
1184 					exit(1);
1185 				}
1186 				break;
1187 
1188 			/* flags with required subarguments:		*/
1189 
1190 			/*
1191 			 * there are two separate cases here. either the
1192 			 * flag can have the normal XCU4 handling
1193 			 * (of the form: -X subargument); or it can have
1194 			 * the old solaris 2.[0-4] handling (of the
1195 			 * form: -Xsubargument). in order to maintain
1196 			 * backwards compatibility, we must support the
1197 			 * latter case. we handle the latter possibility
1198 			 * first so both the old solaris way of handling
1199 			 * and the new XCU4 way of handling things are allowed.
1200 			 */
1201 			case	'n':	/* FALLTHROUGH			*/
1202 			case	's':	/* FALLTHROUGH			*/
1203 			case	'E':	/* FALLTHROUGH			*/
1204 			case	'I':	/* FALLTHROUGH			*/
1205 			case	'L':
1206 				/*
1207 				 * if the second character isn't null, then
1208 				 * the user has specified the old syntax.
1209 				 * we move the subargument into our
1210 				 * mod'd argument list.
1211 				 */
1212 				if (av[i][2] != NULL) {
1213 					/* first clean things up:	*/
1214 					mav[mac][2] = NULL;
1215 
1216 					/* now add the separation:	*/
1217 					++mac;	/* inc to next mod'd arg */
1218 					if ((mav[mac] = strdup(&av[i][2])) ==
1219 					    NULL) {
1220 						perror(gettext("xargs: Memory"
1221 						    " allocation failure"));
1222 						exit(1);
1223 					}
1224 					break;
1225 				}
1226 				i++;
1227 				mac++;
1228 
1229 				if (av[i] == NULL) {
1230 					mav[mac] = NULL;
1231 					return;
1232 				}
1233 				if ((mav[mac] = strdup(av[i])) == NULL) {
1234 					perror(gettext("xargs: Memory"
1235 					    " allocation failure"));
1236 					exit(1);
1237 				}
1238 				break;
1239 
1240 			/* flags */
1241 			case 'p' :
1242 			case 't' :
1243 			case 'x' :
1244 				break;
1245 
1246 			case '-' :
1247 			default:
1248 				/*
1249 				 * here we've hit the cmd argument. so
1250 				 * we'll stop special processing, as the
1251 				 * cmd may have a "-i" etc., argument,
1252 				 * and we don't want to add a "" to it.
1253 				 */
1254 				cflag = 1;
1255 				break;
1256 			}
1257 		} else if (i > 0) {	/* if we're not the 1st arg	*/
1258 			/*
1259 			 * if it's not a flag, then it *must* be the cmd.
1260 			 * set cflag, so we don't mishandle the -[eil] flags.
1261 			 */
1262 			cflag = 1;
1263 		}
1264 	}
1265 
1266 	mav[mac] = NULL;
1267 }
1268 
1269 
1270 /*
1271  * saveinput(): pick up any pending input, so it can be processed later.
1272  *
1273  * description:
1274  *	the purpose of this routine is to allow us to handle the user
1275  *	typing in a 'y' or 'n', when there's existing characters already
1276  *	in stdin. this happens when one gives the "-n" option along with
1277  *	"-p". the problem occurs when the user first types in more arguments
1278  *	than specified by the -n number. echoargs() wants to read stdin
1279  *	in order to get the user's response, but if there's already stuff
1280  *	there, echoargs() won't read the proper character.
1281  *
1282  *	the solution provided by this routine is to pick up all characters
1283  *	(if any), and store them for later processing.
1284  */
1285 
1286 void
1287 saveinput()
1288 {
1289 	char *buffer;		/* ptr to the floating data buffer	*/
1290 	struct strpeek speek;	/* to see what's on the queue		*/
1291 	struct strpeek *ps;
1292 
1293 	/* if we're not in -p mode, skip				*/
1294 	if (PROMPT == -1) {
1295 		return;
1296 	}
1297 
1298 
1299 	/* now see if there's any activity pending:			*/
1300 	ps = &speek;
1301 	ps->ctlbuf.maxlen = 0;
1302 	ps->ctlbuf.len = 0;
1303 	ps->ctlbuf.buf = NULL;
1304 	ps->flags = 0;
1305 	ps->databuf.maxlen = MAX_INPUT;
1306 	ps->databuf.len = 0;
1307 	if ((buffer = malloc((size_t)MAX_INPUT)) == NULL) {
1308 		perror(gettext("xargs: Memory allocation failure"));
1309 		exit(1);
1310 	}
1311 	ps->databuf.buf = (char *)buffer;
1312 
1313 	if (ioctl(PROMPT, I_PEEK, ps) == -1) {
1314 		perror(gettext("xargs: I_PEEK failure"));
1315 		exit(1);
1316 	}
1317 
1318 	if (ps->databuf.len > 0) {
1319 		int	len;
1320 
1321 		if ((len = read(PROMPT, buffer, ps->databuf.len)) == -1) {
1322 			perror(gettext("xargs: read failure"));
1323 			exit(1);
1324 		}
1325 		queue(buffer, len, TAIL);
1326 	}
1327 }
1328