xref: /illumos-gate/usr/src/cmd/xargs/xargs.c (revision 168c213023b7f347f11abfc72f448b0c621ab718)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <stdio.h>
33 #include <sys/types.h>
34 #include <sys/wait.h>
35 #include <unistd.h>
36 #include <fcntl.h>
37 #include <string.h>
38 #include <stdarg.h>
39 #include <libgen.h>
40 #include <stdlib.h>
41 #include <limits.h>
42 #include <wchar.h>
43 #include <locale.h>
44 #include <langinfo.h>
45 #include <stropts.h>
46 #include <poll.h>
47 #include <errno.h>
48 #include <stdarg.h>
49 
50 #define	HEAD	0
51 #define	TAIL	1
52 #define	FALSE 0
53 #define	TRUE 1
54 #define	MAXSBUF 255
55 #define	MAXIBUF 512
56 #define	MAXINSERTS 5
57 #define	BUFSIZE LINE_MAX
58 #define	MAXARGS 255
59 #define	INSPAT_STR	"{}"	/* default replstr string for -[Ii]	*/
60 #define	FORK_RETRY	5
61 
62 #define	QBUF_STARTLEN 255  /* start size of growable string buffer */
63 #define	QBUF_INC 100	   /* how much to grow a growable string by */
64 
65 static wctype_t	blank;
66 static char	*arglist[MAXARGS+1];
67 static char	argbuf[BUFSIZE+1];
68 static char	*next = argbuf;
69 static char	*lastarg = "";
70 static char	**ARGV = arglist;
71 static char	*LEOF = "_";
72 static char	*INSPAT = INSPAT_STR;
73 static char	ins_buf[MAXIBUF];
74 static char	*p_ibuf;
75 
76 static struct inserts {
77 	char	**p_ARGV;	/* where to put newarg ptr in arg list */
78 	char	*p_skel;	/* ptr to arg template */
79 } saveargv[MAXINSERTS];
80 
81 static off_t	file_offset = 0;
82 static int	PROMPT = -1;
83 static int	BUFLIM = BUFSIZE;
84 static int	N_ARGS = 0;
85 static int	N_args = 0;
86 static int	N_lines = 0;
87 static int	DASHX = FALSE;
88 static int	MORE = TRUE;
89 static int	PER_LINE = FALSE;
90 static int	ERR = FALSE;
91 static int	OK = TRUE;
92 static int	LEGAL = FALSE;
93 static int	TRACE = FALSE;
94 static int	INSERT = FALSE;
95 static int	linesize = 0;
96 static int	ibufsize = 0;
97 static char	*yesstr;	/* the string contains int'l for "yes"	*/
98 static int	exitstat = 0;	/* our exit status			*/
99 static int	mac;		/* modified argc, after parsing		*/
100 static char	**mav;		/* modified argv, after parsing		*/
101 static int	n_inserts;	/* # of insertions.			*/
102 static int	inquote = 0;	/* processing a quoted string		*/
103 
104 /*
105  * the pio structure is used to save any pending input before the
106  * user replies to a prompt. the pending input is saved here,
107  * for the appropriate processing later.
108  */
109 typedef struct pio {
110 	struct pio *next;	/* next in stack			*/
111 	char *start;		/* starting addr of the buffer		*/
112 	char *cur;		/* ptr to current char in buf		*/
113 	size_t length;		/* number of bytes remaining		*/
114 } pio;
115 
116 static pio *queued_data = NULL;
117 
118 /* our usage message:							*/
119 #define	USAGEMSG "Usage: xargs: [-t] [-p] [-e[eofstr]] [-E eofstr] "\
120 	"[-I replstr] [-i[replstr]] [-L #] [-l[#]] [-n # [-x]] [-s size] "\
121 	"[cmd [args ...]]\n"
122 
123 static int	echoargs();
124 static int	getchr(void);
125 static wchar_t	getwchr(void);
126 static void	ungetwchr(wchar_t);
127 static int	lcall(char *sub, char **subargs);
128 static int	xindex(char *as1, char *as2);
129 static void	addibuf(struct inserts *p);
130 static void	ermsg(char *messages, ...);
131 static char	*addarg(char *arg);
132 static char	*checklen(char *arg);
133 static size_t   store_wchr(char **, size_t *, size_t, wchar_t);
134 static char	*getarg();
135 static char	*insert(char *pattern, char *subst);
136 static void	usage();
137 static void	parseargs();
138 static void	saveinput();
139 
140 
141 int
142 main(int argc, char **argv)
143 {
144 	int	j;
145 	struct inserts *psave;
146 	int c;
147 	int	initsize;
148 	char	*cmdname, *initbuf, **initlist;
149 
150 
151 	/* initialization */
152 
153 	blank = wctype("blank");
154 	n_inserts = 0;
155 	psave = saveargv;
156 	(void) setlocale(LC_ALL, "");
157 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D 		*/
158 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't 		*/
159 #endif
160 	(void) textdomain(TEXT_DOMAIN);
161 
162 	/*
163 	 * now we get the appropriate "yes" string for our locale.
164 	 * since this may be a multibyte character, we store the
165 	 * string which is returned. later on, when we're looking for
166 	 * a "y" in response to our prompt, we'll use the first
167 	 * multibyte character of yesstr as a comparision.
168 	 */
169 	initbuf = nl_langinfo(YESSTR);	/* initbuf is a tmp placeholder here */
170 	if ((yesstr = malloc(strlen(initbuf) + 1)) == NULL) {
171 		perror(gettext("xargs: Memory allocation failure"));
172 		exit(1);
173 	}
174 	(void) strcpy(yesstr, initbuf);
175 
176 	parseargs(argc, argv);
177 
178 	/* handling all of xargs arguments:				*/
179 	while ((c = getopt(mac, mav, "tpe:E:I:i:L:l:n:s:x")) != EOF) {
180 		switch (c) {
181 		case 't':	/* -t: turn trace mode on		*/
182 			TRACE = TRUE;
183 			break;
184 
185 		case 'p':	/* -p: turn on prompt mode.		*/
186 			if ((PROMPT = open("/dev/tty", O_RDONLY)) == -1) {
187 				perror(gettext("can't read from tty for -p"));
188 			} else {
189 				TRACE = TRUE;
190 			}
191 			break;
192 
193 		case 'e':
194 			/*
195 			 * -e[eofstr]: set/disable end-of-file.
196 			 * N.B. that an argument *isn't* required here; but
197 			 * parseargs forced an argument if not was given.  The
198 			 * forced argument is the default...
199 			 */
200 			LEOF = optarg; /* can be empty */
201 			break;
202 
203 		case 'E':
204 			/*
205 			 * -E eofstr: change end-of-file string.
206 			 * eofstr *is* required here, but can be empty:
207 			 */
208 			LEOF = optarg;
209 			break;
210 
211 		case 'I':
212 			/* -I replstr: Insert mode. replstr *is* required. */
213 			INSERT = PER_LINE = LEGAL = TRUE;
214 			N_ARGS = 0;
215 			INSPAT = optarg;
216 			if (*optarg == '\0') {
217 				ermsg(gettext(
218 				    "Option requires an argument: -%c\n"), c);
219 			}
220 			break;
221 
222 		case 'i':
223 			/*
224 			 * -i [replstr]: insert mode, with *optional* replstr.
225 			 * N.B. that an argument *isn't* required here; if
226 			 * it's not given, then the string INSPAT_STR will
227 			 * be assumed.
228 			 *
229 			 * Since getopts(3C) doesn't handle the case of an
230 			 * optional variable argument at all, we have to
231 			 * parse this by hand:
232 			 */
233 
234 			INSERT = PER_LINE = LEGAL = TRUE;
235 			N_ARGS = 0;
236 			if ((optarg != NULL) && (*optarg != '\0')) {
237 				INSPAT = optarg;
238 			} else {
239 				/*
240 				 * here, there is no next argument. so
241 				 * we reset INSPAT to the INSPAT_STR.
242 				 * we *have* to do this, as -i/I may have
243 				 * been given previously, and XCU4 requires
244 				 * that only "the last one specified takes
245 				 * effect".
246 				 */
247 				INSPAT = INSPAT_STR;
248 			}
249 			break;
250 
251 		case 'L':
252 			/*
253 			 * -L number: # of times cmd is executed
254 			 * number *is* required here:
255 			 */
256 			PER_LINE = TRUE;
257 			N_ARGS = 0;
258 			INSERT = FALSE;
259 			if ((PER_LINE = atoi(optarg)) <= 0) {
260 				ermsg(gettext("#lines must be positive "
261 				    "int: %s\n"), optarg);
262 			}
263 			break;
264 
265 		case 'l':
266 			/*
267 			 * -l [number]: # of times cmd is executed
268 			 * N.B. that an argument *isn't* required here; if
269 			 * it's not given, then 1 is assumed.
270 			 *
271 			 * parseargs handles the optional arg processing.
272 			 */
273 
274 			PER_LINE = LEGAL = TRUE;  /* initialization	*/
275 			N_ARGS = 0;
276 			INSERT = FALSE;
277 
278 			if ((optarg != NULL) && (*optarg != '\0')) {
279 				if ((PER_LINE = atoi(optarg)) <= 0)
280 					PER_LINE = 1;
281 			}
282 			break;
283 
284 		case 'n':	/* -n number: # stdin args		*/
285 			/*
286 			 * -n number: # stdin args.
287 			 * number *is* required here:
288 			 */
289 			if ((N_ARGS = atoi(optarg)) <= 0) {
290 				ermsg(gettext("#args must be positive "
291 				    "int: %s\n"), optarg);
292 			} else {
293 				LEGAL = DASHX || N_ARGS == 1;
294 				INSERT = PER_LINE = FALSE;
295 			}
296 			break;
297 
298 		case 's':	/* -s size: set max size of each arg list */
299 			BUFLIM = atoi(optarg);
300 			if (BUFLIM > BUFSIZE || BUFLIM <= 0) {
301 				ermsg(gettext(
302 				    "0 < max-cmd-line-size <= %d: "
303 				    "%s\n"), BUFSIZE, optarg);
304 			}
305 			break;
306 
307 		case 'x':	/* -x: terminate if args > size limit	*/
308 			DASHX = LEGAL = TRUE;
309 			break;
310 
311 		default:
312 			/*
313 			 * bad argument. complain and get ready to die.
314 			 */
315 			ERR = TRUE;
316 			usage();
317 
318 			exit(2);
319 			break;
320 		}
321 	}
322 
323 	/*
324 	 * if anything called ermsg(), something screwed up, so
325 	 * we exit early.
326 	 */
327 	if (OK == FALSE) {
328 		ERR = TRUE;
329 		usage();
330 		exit(2);
331 	}
332 
333 	/*
334 	 * we're finished handling xargs's options, so now pick up
335 	 * the command name (if any), and it's options.
336 	 */
337 
338 
339 	mac -= optind;	/* dec arg count by what we've processed 	*/
340 	mav += optind;	/* inc to current mav				*/
341 
342 	if (mac <= 0) {	/* if there're no more args to process,	*/
343 		cmdname = "/usr/bin/echo";	/* our default command	*/
344 		*ARGV++ = addarg(cmdname);	/* use the default cmd.	*/
345 	} else {	/* otherwise keep parsing rest of the string.	*/
346 		/*
347 		 * note that we can't use getopts(3C), and *must* parse
348 		 * this by hand, as we don't know apriori what options the
349 		 * command will take.
350 		 */
351 		cmdname = *mav;	/* get the command name	*/
352 
353 
354 		/* pick up the remaining args from the command line:	*/
355 		while ((OK == TRUE) && (mac-- > 0)) {
356 			/*
357 			 * while we haven't crapped out, and there's
358 			 * work to do:
359 			 */
360 			if (INSERT && ! ERR) {
361 				if (xindex(*mav, INSPAT) != -1) {
362 					if (++n_inserts > MAXINSERTS) {
363 						ermsg(gettext("too many args "
364 						    "with %s\n"), INSPAT);
365 						ERR = TRUE;
366 					}
367 					psave->p_ARGV = ARGV;
368 					(psave++)->p_skel = *mav;
369 				}
370 			}
371 			*ARGV++ = addarg(*mav++);
372 		}
373 	}
374 
375 	/* pick up args from standard input */
376 
377 	initbuf = next;
378 	initlist = ARGV;
379 	initsize = linesize;
380 
381 	while (OK && MORE) {
382 		N_args = 0;
383 		N_lines = 0;
384 		next = initbuf;
385 		ARGV = initlist;
386 		linesize = initsize;
387 		if (*lastarg) {
388 			*ARGV++ = addarg(lastarg);
389 			lastarg = "";
390 		}
391 
392 		while (((ARGV - arglist) < MAXARGS) &&
393 		    ((*ARGV++ = getarg()) != NULL) && OK)
394 			;
395 
396 		/* insert arg if requested */
397 
398 		if (!ERR && INSERT) {
399 			if ((!MORE) && (N_lines == 0)) {
400 				exit(exitstat);
401 			}
402 					/* no more input lines */
403 			p_ibuf = ins_buf;
404 			ARGV--;
405 			j = ibufsize = 0;
406 			for (psave = saveargv; ++j <= n_inserts; ++psave) {
407 				addibuf(psave);
408 				if (ERR)
409 					break;
410 			}
411 		}
412 		*ARGV = 0;
413 
414 		if (n_inserts > 0) {
415 			int t_ninserts;
416 
417 			/*
418 			 * if we've done any insertions, re-calculate the
419 			 * linesize. bomb out if we've exceeded our length.
420 			 */
421 			t_ninserts = n_inserts;
422 			n_inserts = 0;	/* inserts have been done 	*/
423 			linesize = 0;	/* recalculate this		*/
424 
425 			/* for each current argument in the list:	*/
426 			for (ARGV = arglist; *ARGV != NULL; ARGV++) {
427 				/* recalculate everything.		*/
428 				if (checklen(*ARGV) != 0) {
429 					if (N_ARGS && (N_args >= N_ARGS)) {
430 						N_lines = N_args = 0;
431 						OK = FALSE;
432 						ERR = TRUE;
433 					}
434 				}
435 			}
436 			n_inserts = t_ninserts;
437 		}
438 
439 		/* exec command */
440 
441 		if (!ERR) {
442 			if (!MORE &&
443 			    (PER_LINE && N_lines == 0 || N_ARGS && N_args == 0))
444 				exit(exitstat);
445 			OK = TRUE;
446 			j = TRACE ? echoargs() : TRUE;
447 			if (j) {
448 				/*
449 				 * for xcu4, all invocations of cmdname must
450 				 * return 0, in order for us to return 0.
451 				 * so if we have a non-zero status here,
452 				 * quit immediately.
453 				 */
454 				if ((exitstat |= lcall(cmdname, arglist)) == 0)
455 					continue;
456 			}
457 		}
458 	}
459 
460 	(void) lseek(0, file_offset, SEEK_SET);
461 	if (OK) {
462 		return (exitstat);
463 	} else {
464 		/*
465 		 * if exitstat was set, to match XCU4 complience,
466 		 * return that value, otherwise, return 1.
467 		 */
468 		return (exitstat ? exitstat : 1);
469 	}
470 }
471 
472 static void
473 queue(char *buffer, int len, int where)
474 {
475 	pio *new, *element;
476 
477 	if ((new = malloc(sizeof (pio))) == NULL) {
478 		perror(gettext("xargs: Memory allocation failure"));
479 		exit(1);
480 	}
481 	new->cur = new->start = buffer;
482 	new->length = len;
483 
484 	if (where == TAIL) {
485 		new->next = NULL;
486 		if (queued_data == NULL) {
487 			queued_data = new;
488 		} else {
489 			element = queued_data;
490 			while (element->next != NULL) {
491 				element = element->next;
492 			}
493 			element->next = new;
494 		}
495 	} else {
496 		file_offset -= len;
497 		new->next = queued_data;
498 		queued_data = new;
499 	}
500 }
501 
502 static char *
503 checklen(char *arg)
504 {
505 	int	oklen;
506 
507 	oklen = TRUE;
508 	linesize += strlen(arg) + 1;
509 	if (linesize >= BUFLIM) {
510 		/*
511 		 * we skip this if there're inserts. we'll handle the
512 		 * argument counting after all the insertions have
513 		 * been done.
514 		 */
515 		if (n_inserts == 0) {
516 			lastarg = arg;
517 			oklen = OK = FALSE;
518 
519 			if (LEGAL) {
520 				ERR = TRUE;
521 				ermsg(gettext("arg list too long\n"));
522 			} else if (N_args > 1) {
523 				N_args = 1;
524 			} else {
525 				ermsg(gettext("a single arg was greater than "
526 				    "the max arglist size of %d characters\n"),
527 				    BUFLIM);
528 				ERR = TRUE;
529 			}
530 		}
531 	}
532 	return (oklen ? arg : 0);
533 }
534 
535 static char *
536 addarg(char *arg)
537 {
538 	if (checklen(arg) != 0) {
539 		(void) strcpy(next, arg);
540 		arg = next;
541 		next += strlen(arg) + 1;
542 		return (arg);
543 	}
544 	return ((char *)0);
545 }
546 
547 /*
548  * store_wchr() : append a wchar_t to a char buffer, resize buffer if required.
549  *
550  *     Given a pointer to the beginning of a string buffer, the length of the
551  *     buffer and an offset indicating the next place to write within that
552  *     buffer, the passed wchar_t will be appended to the buffer if there is
553  *     enough space. If there is not enough space, an attempt to reallocate the
554  *     buffer will be made and if successful the passed pointer and size will be
555  *     updated to describe the reallocated block. Returns the new value for
556  *     'offset' (it will be incremented by the number of bytes written).
557  */
558 static size_t
559 store_wchr(char **buffer, size_t *buflen, size_t offset, wchar_t c)
560 {
561 	int bytes;
562 
563 	/*
564 	 * Make sure that there is enough room in the buffer to store the
565 	 * maximum length of c.
566 	 */
567 	if ((offset + MB_CUR_MAX) > *buflen) {
568 		/*
569 		 * Not enough room so attempt to reallocate. Add 'MB_CUR_MAX' to
570 		 * buffer length to ensure that there is always enough room to
571 		 * store 'c' if realloc succeeds, no matter what QBUF_INC is
572 		 * defined as.
573 		 */
574 		*buflen += (QBUF_INC + MB_CUR_MAX);
575 		if ((*buffer = realloc(*buffer, *buflen)) == NULL) {
576 			perror(gettext("xargs: Memory allocation failure"));
577 			exit(1);
578 		}
579 	}
580 	/* store bytes from wchar into buffer */
581 	bytes = wctomb(*buffer + offset, c);
582 	if (bytes == -1) {
583 		/* char was invalid */
584 		bytes = 1;
585 		*(*buffer + offset) = (char)c;
586 	}
587 
588 	/* return new value for offset */
589 	return (offset + bytes);
590 }
591 
592 static char *
593 getarg()
594 {
595 	int	bytes;
596 	wchar_t	c;
597 	char	*arg;
598 	char	*retarg, *requeue_buf;
599 	size_t  requeue_offset = 0, requeue_len;
600 	char	mbc[MB_LEN_MAX];
601 
602 	while (iswspace(c = getwchr()) || c == '\n')
603 		;
604 
605 	if (c == '\0') {
606 		MORE = FALSE;
607 		return (0);
608 	}
609 
610 	/*
611 	 * While we are reading in an argument, it is possible that we will
612 	 * reach the maximum length of the overflow buffer and we'll have to
613 	 * requeue what we have read so far. To handle this we allocate an
614 	 * initial buffer here which will keep an unprocessed copy of the data
615 	 * that we read in (this buffer will grow as required).
616 	 */
617 	requeue_len = (size_t)QBUF_STARTLEN;
618 	if ((requeue_buf = (char *)malloc(requeue_len)) == NULL) {
619 		perror(gettext("xargs: Memory allocation failure"));
620 		exit(1);
621 	}
622 
623 	for (arg = next; ; c = getwchr()) {
624 		bytes = wctomb(mbc, c);
625 
626 		/*
627 		 * Store the char that we have read before processing it in case
628 		 * the current argument needs to be requeued.
629 		 */
630 		requeue_offset = store_wchr(&requeue_buf, &requeue_len,
631 		    requeue_offset, c);
632 
633 		/* Check for overflow the input buffer */
634 		if ((next + ((bytes == -1) ? 1 : bytes)) >= &argbuf[BUFLIM]) {
635 			/*
636 			 * It's only an error if there are no Args in buffer
637 			 * already.
638 			 */
639 			if ((N_ARGS || PER_LINE) && LEGAL) {
640 				ERR = TRUE;
641 				ermsg(gettext("Argument list too long\n"));
642 				free(requeue_buf);
643 				return (0);
644 			} else if (N_args == 0) {
645 				lastarg = "";
646 				ERR = TRUE;
647 				ermsg(gettext("A single arg was greater than "
648 				    "the max arglist size of %d characters\n"),
649 				    BUFSIZE);
650 				free(requeue_buf);
651 				return (0);
652 			}
653 			/*
654 			 * Otherwise we put back the current argument
655 			 * and use what we have collected so far...
656 			 */
657 			queue(requeue_buf, requeue_offset, HEAD);
658 			/* reset inquote because we have requeued the quotes */
659 			inquote = 0;
660 			return (NULL);
661 		}
662 
663 
664 		if (iswctype(c, blank) && inquote == 0) {
665 			if (INSERT) {
666 				if (bytes == -1) {
667 					*next++ = (char)c;
668 				} else {
669 					(void) wctomb(next, c);
670 					next += bytes;
671 				}
672 				continue;
673 			}
674 
675 			/* skip over trailing whitespace till next arg */
676 			while (iswctype((c = getwchr()), blank) &&
677 			    (c != '\n') && (c != '\0'))
678 				;
679 
680 			/*
681 			 * if there was space till end of line then the last
682 			 * character was really a newline...
683 			 */
684 			if (c == L'\n' || c == L'\0') {
685 				ungetwchr(L'\n');
686 			} else {
687 				/* later code needs to know this was a space */
688 				ungetwchr(c);
689 				c = L' ';
690 			}
691 			goto end_arg;
692 		}
693 		switch (c) {
694 		case L'\0':
695 		case L'\n':
696 			if (inquote) {
697 				*next++ = '\0';
698 				ermsg(gettext("Missing quote: %s\n"), arg);
699 				ERR = TRUE;
700 				free(requeue_buf);
701 				return (0);
702 			}
703 
704 			N_lines++;
705 end_arg:		*next++ = '\0';
706 			/* we finished without requeuing so free requeue_buf */
707 			free(requeue_buf);
708 			if ((strcmp(arg, LEOF) == 0 && *LEOF != '\0') ||
709 			    (c == '\0' && strlen(arg) == 0)) {
710 				MORE = FALSE;
711 				/* absorb the rest of the line */
712 				if ((c != '\n') && (c != '\0'))
713 					while (c = getwchr())
714 						if ((c == '\n') || (c == '\0'))
715 							break;
716 				return (0);
717 			} else {
718 				++N_args;
719 				if (retarg = checklen(arg)) {
720 					if ((PER_LINE &&
721 					    N_lines >= PER_LINE &&
722 					    (c == '\0' || c == '\n')) ||
723 					    (N_ARGS && N_args >= N_ARGS)) {
724 						N_lines = N_args = 0;
725 						lastarg = "";
726 						OK = FALSE;
727 					}
728 				}
729 				return (retarg);
730 			}
731 
732 		case '"':
733 			if (inquote == 1)	/* in single quoted string */
734 				goto is_default;
735 			if (inquote == 2)	/* terminating double quote */
736 				inquote = 0;
737 			else			/* starting quoted string */
738 				inquote = 2;
739 			break;
740 
741 		case '\'':
742 			if (inquote == 2)	/* in double quoted string */
743 				goto is_default;
744 			if (inquote == 1)	/* terminating single quote */
745 				inquote = 0;
746 			else			/* starting quoted string */
747 				inquote = 1;
748 			break;
749 
750 		case L'\\':
751 			c = getwchr();
752 			/* store quoted char for potential requeueing */
753 			requeue_offset = store_wchr(&requeue_buf, &requeue_len,
754 			    requeue_offset, c);
755 
756 		default:
757 is_default:		if (bytes == -1) {
758 				*next++ = (char)c;
759 			} else {
760 				(void) wctomb(next, c);
761 				next += bytes;
762 			}
763 			break;
764 		}
765 	}
766 }
767 
768 
769 /*
770  * ermsg():	print out an error message, and indicate failure globally.
771  *
772  *	Assumes that message has already been gettext()'d. It would be
773  *	nice if we could just do the gettext() here, but we can't, since
774  *	since xgettext(1M) wouldn't be able to pick up our error message.
775  */
776 /* PRINTFLIKE1 */
777 static void
778 ermsg(char *messages, ...)
779 {
780 	va_list	ap;
781 
782 	va_start(ap, messages);
783 
784 	(void) fprintf(stderr, "xargs: ");
785 	(void) vfprintf(stderr, messages, ap);
786 
787 	va_end(ap);
788 	OK = FALSE;
789 }
790 
791 
792 /*
793  * Function: int rpmatch(char *)
794  *
795  * Description:
796  *
797  *	Internationalized get yes / no answer.
798  *
799  * Inputs:
800  *	s	-> Pointer to answer to compare against.
801  *
802  * Returns:
803  *	TRUE	-> Answer was affirmative
804  *	FALSE	-> Answer was negative
805  */
806 
807 static int
808 rpmatch(char *s)
809 {
810 	static char	*default_yesexpr = "^[Yy].*";
811 	static char	*compiled_yesexpr = (char *)NULL;
812 
813 	/* Execute once to initialize */
814 	if (compiled_yesexpr == (char *)NULL) {
815 		char	*yesexpr;
816 
817 		/* get yes expression according to current locale */
818 		yesexpr = nl_langinfo(YESEXPR);
819 		/*
820 		 * If the was no expression or if there is a compile error
821 		 * use default yes expression.  Anchor
822 		 */
823 		if ((yesexpr == (char *)NULL) || (*yesexpr == (char)NULL) ||
824 		    ((compiled_yesexpr =
825 		    regcmp(yesexpr, 0)) == NULL))
826 			compiled_yesexpr = regcmp(default_yesexpr, 0);
827 	}
828 
829 	/* match yesexpr */
830 	if (regex(compiled_yesexpr, s) == NULL) {
831 		return (FALSE);
832 	}
833 	return (TRUE);
834 }
835 
836 static int
837 echoargs()
838 {
839 	char	**anarg;
840 	char	**tanarg;	/* tmp ptr			*/
841 	int		i;
842 	char		reply[LINE_MAX];
843 
844 	tanarg = anarg = arglist-1;
845 
846 	/*
847 	 * write out each argument, separated by a space. the tanarg
848 	 * nonsense is for xcu4 testsuite compliance - so that an
849 	 * extra space isn't echoed after the last argument.
850 	 */
851 	while (*++anarg) {		/* while there's an argument	*/
852 		++tanarg;		/* follow anarg			*/
853 		(void) write(2, *anarg, strlen(*anarg));
854 
855 		if (*++tanarg) {	/* if there's another argument:	*/
856 			(void) write(2, " ", 1); /* add a space		*/
857 			--tanarg;	/* reset back to anarg		*/
858 		}
859 	}
860 	if (PROMPT == -1) {
861 		(void) write(2, "\n", 1);
862 		return (TRUE);
863 	}
864 
865 	/*
866 	 * at this point, there may be unexpected input pending on stdin,
867 	 * if one has used the -n flag. this presents a problem, because
868 	 * if we simply do a read(), we'll get the extra input, instead
869 	 * of our desired y/n input. so, we see if there's any extra
870 	 * input, and if there is, then we will store it.
871 	 */
872 
873 	saveinput();
874 
875 	(void) write(2, "?...", 4);	/* ask the user for input	*/
876 
877 	for (i = 0; i < LINE_MAX && read(PROMPT, &reply[i], 1) > 0; i++) {
878 		if (reply[i] == '\n') {
879 			if (i == 0)
880 				return (FALSE);
881 			break;
882 		}
883 	}
884 	reply[i] = 0;
885 
886 	/* flush remainder of line if necessary */
887 	if (i == LINE_MAX) {
888 		char	bitbucket;
889 
890 		while ((read(PROMPT, &bitbucket, 1) > 0) && (bitbucket != '\n'))
891 			;
892 	}
893 
894 	/*
895 	 * now we have to figure out whether the user typed an
896 	 * internationalized version of 'y' for yes. note that in some
897 	 * countries, they've gotten used to typing an ASCII 'y'! so
898 	 * even if our int'l version fails, we will check for an ASCII
899 	 * 'y', in order to be backwards compatible.
900 	 */
901 	return (rpmatch(reply));
902 }
903 
904 
905 static char *
906 insert(char *pattern, char *subst)
907 {
908 	static char	buffer[MAXSBUF+1];
909 	int		len, ipatlen;
910 	char	*pat;
911 	char	*bufend;
912 	char	*pbuf;
913 
914 	len = strlen(subst);
915 	ipatlen = strlen(INSPAT) - 1;
916 	pat = pattern - 1;
917 	pbuf = buffer;
918 	bufend = &buffer[MAXSBUF];
919 
920 	while (*++pat) {
921 		if (xindex(pat, INSPAT) == 0) {
922 			if (pbuf + len >= bufend) {
923 				break;
924 			} else {
925 				(void) strcpy(pbuf, subst);
926 				pat += ipatlen;
927 				pbuf += len;
928 			}
929 		} else {
930 			*pbuf++ = *pat;
931 			if (pbuf >= bufend)
932 				break;
933 		}
934 	}
935 
936 	if (!*pat) {
937 		*pbuf = '\0';
938 		return (buffer);
939 	} else {
940 		ermsg(gettext("Maximum argument size with insertion via %s's "
941 		    "exceeded\n"), INSPAT);
942 		ERR = TRUE;
943 		return (0);
944 	}
945 }
946 
947 
948 static void
949 addibuf(struct inserts	*p)
950 {
951 	char	*newarg, *skel, *sub;
952 	int		l;
953 
954 	skel = p->p_skel;
955 	sub = *ARGV;
956 	linesize -= strlen(skel) + 1;
957 	newarg = insert(skel, sub);
958 	if (ERR)
959 		return;
960 
961 	if (checklen(newarg)) {
962 		if ((ibufsize += (l = strlen(newarg) + 1)) > MAXIBUF) {
963 			ermsg(gettext("Insert buffer overflow\n"));
964 			ERR = TRUE;
965 		}
966 		(void) strcpy(p_ibuf, newarg);
967 		*(p->p_ARGV) = p_ibuf;
968 		p_ibuf += l;
969 	}
970 }
971 
972 
973 /*
974  * getchr():	get the next character.
975  * description:
976  *	we get the next character from pio.structure, if there's a character
977  *	to get. this may happen when we've had to flush stdin=/dev/tty,
978  *	but still wanted to preserve the characters for later processing.
979  *
980  *	otherwise we just get the character from stdin.
981  */
982 static int
983 getchr(void)
984 {
985 	char	c;
986 
987 	do {
988 		if (queued_data == NULL) {
989 			char	*buffer;
990 			int	len;
991 
992 			if ((buffer = malloc(BUFSIZE)) == NULL) {
993 				perror(gettext(
994 				    "xargs: Memory allocation failure"));
995 				exit(1);
996 			}
997 
998 			if ((len = read(0, buffer, BUFSIZE)) == 0)
999 				return (0);
1000 			if (len == -1) {
1001 				perror(gettext("xargs: Read failure"));
1002 				exit(1);
1003 			}
1004 
1005 			queue(buffer, len, TAIL);
1006 		}
1007 
1008 		file_offset++;
1009 		c = *queued_data->cur++;	 /* get the next character */
1010 		if (--queued_data->length == 0) { /* at the end of buffer? */
1011 			pio	*nxt = queued_data->next;
1012 
1013 			free(queued_data->start);
1014 			free(queued_data);
1015 			queued_data = nxt;
1016 		}
1017 	} while (c == '\0');
1018 	return (c);
1019 }
1020 
1021 
1022 static wchar_t
1023 getwchr(void)
1024 {
1025 	int		i;
1026 	wchar_t		wch;
1027 	unsigned char	buffer[MB_LEN_MAX + 1];
1028 
1029 	for (i = 0; i < (int)MB_CUR_MAX; ) {
1030 		if ((buffer[i++] = getchr()) == NULL) {
1031 			/* We have reached  EOF */
1032 			if (i == 1) {
1033 				/* TRUE EOF has been reached */
1034 				return (NULL);
1035 			}
1036 			/*
1037 			 * We have some characters in our buffer still so it
1038 			 * must be an invalid character right before EOF.
1039 			 */
1040 			break;
1041 		}
1042 
1043 		/* If this succeeds then we are done */
1044 		if (mbtowc(&wch, (char *)buffer, i) != -1)
1045 			return (wch);
1046 	}
1047 
1048 	/*
1049 	 * We have now encountered an illegal character sequence.
1050 	 * There is nothing much we can do at this point but
1051 	 * return an error.  If we attempt to recover we may in fact
1052 	 * return garbage as arguments, from the customer's point
1053 	 * of view.  After all what if they are feeding us a file
1054 	 * generated in another locale?
1055 	 */
1056 	errno = EILSEQ;
1057 	perror(gettext("xargs: Corrupt input file"));
1058 	exit(1);
1059 	/* NOTREACHED */
1060 }
1061 
1062 
1063 static void
1064 ungetwchr(wchar_t wch)
1065 {
1066 	char	*buffer;
1067 	int	bytes;
1068 
1069 	if ((buffer = malloc(MB_LEN_MAX)) == NULL) {
1070 		perror(gettext("xargs: Memory allocation failure"));
1071 		exit(1);
1072 	}
1073 	bytes = wctomb(buffer, wch);
1074 	queue(buffer, bytes, HEAD);
1075 }
1076 
1077 
1078 static int
1079 lcall(char *sub, char **subargs)
1080 {
1081 	int retcode, retry = 0;
1082 	pid_t iwait, child;
1083 
1084 	for (; ; ) {
1085 		switch (child = fork()) {
1086 		default:
1087 			while ((iwait = wait(&retcode)) != child &&
1088 			    iwait != (pid_t)-1)
1089 				;
1090 			if (iwait == (pid_t)-1) {
1091 				perror(gettext("xargs: Wait failure"));
1092 				exit(122);
1093 				/* NOTREACHED */
1094 			}
1095 			if (WIFSIGNALED(retcode)) {
1096 				ermsg(gettext("Child killed with signal %d\n"),
1097 				    WTERMSIG(retcode));
1098 				exit(125);
1099 				/* NOTREACHED */
1100 			}
1101 			if ((WEXITSTATUS(retcode) & 0377) == 0377) {
1102 				ermsg(gettext("Command could not continue "
1103 				    "processing data\n"));
1104 				exit(124);
1105 				/* NOTREACHED */
1106 			}
1107 			return (WEXITSTATUS(retcode));
1108 		case 0:
1109 			(void) execvp(sub, subargs);
1110 			perror(gettext("xargs: Could not exec command"));
1111 			if (errno == EACCES)
1112 				exit(126);
1113 			exit(127);
1114 			/* NOTREACHED */
1115 		case -1:
1116 			if (errno != EAGAIN && retry++ < FORK_RETRY) {
1117 				perror(gettext("xargs: Could not fork child"));
1118 				exit(123);
1119 			}
1120 			(void) sleep(1);
1121 		}
1122 	}
1123 }
1124 
1125 
1126 /*
1127  * If `s2' is a substring of `s1' return the offset of the first
1128  * occurrence of `s2' in `s1', else return -1.
1129  */
1130 static int
1131 xindex(char *as1, char *as2)
1132 {
1133 	char	*s1, *s2, c;
1134 	int		offset;
1135 
1136 	s1 = as1;
1137 	s2 = as2;
1138 	c = *s2;
1139 
1140 	while (*s1) {
1141 		if (*s1++ == c) {
1142 			offset = s1 - as1 - 1;
1143 			s2++;
1144 			while ((c = *s2++) == *s1++ && c)
1145 				;
1146 			if (c == 0)
1147 				return (offset);
1148 			s1 = offset + as1 + 1;
1149 			s2 = as2;
1150 			c = *s2;
1151 		}
1152 	}
1153 	return (-1);
1154 }
1155 
1156 
1157 static void
1158 usage()
1159 {
1160 	ermsg(gettext(USAGEMSG));
1161 	OK = FALSE;
1162 }
1163 
1164 
1165 
1166 /*
1167  * parseargs():		modify the args
1168  *	since the -e, -i and -l flags all take optional subarguments,
1169  *	and getopts(3C) is clueless about this nonsense, we change the
1170  *	our local argument count and strings to separate this out,
1171  *	and make it easier to handle via getopts(3c).
1172  *
1173  *	-e	-> "-e ""
1174  *	-e3	-> "-e "3"
1175  *	-Estr	-> "-E "str"
1176  *	-i	-> "-i "{}"
1177  *	-irep	-> "-i "rep"
1178  *	-l	-> "-i "1"
1179  *	-l10	-> "-i "10"
1180  *
1181  *	since the -e, -i and -l flags all take optional subarguments,
1182  */
1183 static void
1184 parseargs(int ac, char **av)
1185 {
1186 	int i;			/* current argument			*/
1187 	int cflag;		/* 0 = not processing cmd arg		*/
1188 
1189 	if ((mav = malloc((ac * 2 + 1) * sizeof (char *))) == NULL) {
1190 		perror(gettext("xargs: Memory allocation failure"));
1191 		exit(1);
1192 	}
1193 
1194 	/* for each argument, see if we need to change things:		*/
1195 	for (i = mac = cflag = 0; (av[i] != NULL) && i < ac; i++, mac++) {
1196 		if ((mav[mac] = strdup(av[i])) == NULL) {
1197 			perror(gettext("xargs: Memory allocation failure"));
1198 			exit(1);
1199 		}
1200 
1201 		/* -- has been found or argument list is fully processes */
1202 		if (cflag)
1203 			continue;
1204 
1205 		/*
1206 		 * if we're doing special processing, and we've got a flag
1207 		 */
1208 		else if ((av[i][0] == '-') && (av[i][1] != NULL)) {
1209 			char	*def;
1210 
1211 			switch (av[i][1]) {
1212 			case	'e':
1213 				def = ""; /* -e with no arg turns off eof */
1214 				goto process_special;
1215 			case	'i':
1216 				def = INSPAT_STR;
1217 				goto process_special;
1218 			case	'l':
1219 				def = "1";
1220 process_special:
1221 				/*
1222 				 * if there's no sub-option, we *must* add
1223 				 * a default one. this is because xargs must
1224 				 * be able to distinguish between a valid
1225 				 * suboption, and a command name.
1226 				 */
1227 				if (av[i][2] == NULL) {
1228 					mav[++mac] = strdup(def);
1229 				} else {
1230 					/* clear out our version: */
1231 					mav[mac][2] = NULL;
1232 					mav[++mac] = strdup(&av[i][2]);
1233 				}
1234 				if (mav[mac] == NULL) {
1235 					perror(gettext("xargs: Memory"
1236 					    " allocation failure"));
1237 					exit(1);
1238 				}
1239 				break;
1240 
1241 			/* flags with required subarguments:		*/
1242 
1243 			/*
1244 			 * there are two separate cases here. either the
1245 			 * flag can have the normal XCU4 handling
1246 			 * (of the form: -X subargument); or it can have
1247 			 * the old solaris 2.[0-4] handling (of the
1248 			 * form: -Xsubargument). in order to maintain
1249 			 * backwards compatibility, we must support the
1250 			 * latter case. we handle the latter possibility
1251 			 * first so both the old solaris way of handling
1252 			 * and the new XCU4 way of handling things are allowed.
1253 			 */
1254 			case	'n':	/* FALLTHROUGH			*/
1255 			case	's':	/* FALLTHROUGH			*/
1256 			case	'E':	/* FALLTHROUGH			*/
1257 			case	'I':	/* FALLTHROUGH			*/
1258 			case	'L':
1259 				/*
1260 				 * if the second character isn't null, then
1261 				 * the user has specified the old syntax.
1262 				 * we move the subargument into our
1263 				 * mod'd argument list.
1264 				 */
1265 				if (av[i][2] != NULL) {
1266 					/* first clean things up:	*/
1267 					mav[mac][2] = NULL;
1268 
1269 					/* now add the separation:	*/
1270 					++mac;	/* inc to next mod'd arg */
1271 					if ((mav[mac] = strdup(&av[i][2])) ==
1272 					    NULL) {
1273 						perror(gettext("xargs: Memory"
1274 						    " allocation failure"));
1275 						exit(1);
1276 					}
1277 					break;
1278 				}
1279 				i++;
1280 				mac++;
1281 
1282 				if (av[i] == NULL) {
1283 					mav[mac] = NULL;
1284 					return;
1285 				}
1286 				if ((mav[mac] = strdup(av[i])) == NULL) {
1287 					perror(gettext("xargs: Memory"
1288 					    " allocation failure"));
1289 					exit(1);
1290 				}
1291 				break;
1292 
1293 			/* flags */
1294 			case 'p' :
1295 			case 't' :
1296 			case 'x' :
1297 				break;
1298 
1299 			case '-' :
1300 			default:
1301 				/*
1302 				 * here we've hit the cmd argument. so
1303 				 * we'll stop special processing, as the
1304 				 * cmd may have a "-i" etc., argument,
1305 				 * and we don't want to add a "" to it.
1306 				 */
1307 				cflag = 1;
1308 				break;
1309 			}
1310 		} else if (i > 0) {	/* if we're not the 1st arg	*/
1311 			/*
1312 			 * if it's not a flag, then it *must* be the cmd.
1313 			 * set cflag, so we don't mishandle the -[eil] flags.
1314 			 */
1315 			cflag = 1;
1316 		}
1317 	}
1318 
1319 	mav[mac] = NULL;
1320 }
1321 
1322 
1323 /*
1324  * saveinput(): pick up any pending input, so it can be processed later.
1325  *
1326  * description:
1327  *	the purpose of this routine is to allow us to handle the user
1328  *	typing in a 'y' or 'n', when there's existing characters already
1329  *	in stdin. this happens when one gives the "-n" option along with
1330  *	"-p". the problem occurs when the user first types in more arguments
1331  *	than specified by the -n number. echoargs() wants to read stdin
1332  *	in order to get the user's response, but if there's already stuff
1333  *	there, echoargs() won't read the proper character.
1334  *
1335  *	the solution provided by this routine is to pick up all characters
1336  *	(if any), and store them for later processing.
1337  */
1338 
1339 void
1340 saveinput()
1341 {
1342 	char *buffer;		/* ptr to the floating data buffer	*/
1343 	struct strpeek speek;	/* to see what's on the queue		*/
1344 	struct strpeek *ps;
1345 
1346 	/* if we're not in -p mode, skip				*/
1347 	if (PROMPT == -1) {
1348 		return;
1349 	}
1350 
1351 
1352 	/* now see if there's any activity pending:			*/
1353 	ps = &speek;
1354 	ps->ctlbuf.maxlen = 0;
1355 	ps->ctlbuf.len = 0;
1356 	ps->ctlbuf.buf = NULL;
1357 	ps->flags = 0;
1358 	ps->databuf.maxlen = MAX_INPUT;
1359 	ps->databuf.len = 0;
1360 	if ((buffer = malloc((size_t)MAX_INPUT)) == NULL) {
1361 		perror(gettext("xargs: Memory allocation failure"));
1362 		exit(1);
1363 	}
1364 	ps->databuf.buf = (char *)buffer;
1365 
1366 	if (ioctl(PROMPT, I_PEEK, ps) == -1) {
1367 		perror(gettext("xargs: I_PEEK failure"));
1368 		exit(1);
1369 	}
1370 
1371 	if (ps->databuf.len > 0) {
1372 		int	len;
1373 
1374 		if ((len = read(PROMPT, buffer, ps->databuf.len)) == -1) {
1375 			perror(gettext("xargs: read failure"));
1376 			exit(1);
1377 		}
1378 		queue(buffer, len, TAIL);
1379 	}
1380 }
1381