xref: /titanic_50/usr/src/cmd/xargs/xargs.c (revision 8461248208fabd3a8230615f8615e5bf1b4dcdcb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #include <stdio.h>
34 #include <sys/types.h>
35 #include <sys/wait.h>
36 #include <unistd.h>
37 #include <fcntl.h>
38 #include <string.h>
39 #include <stdarg.h>
40 #include <libgen.h>
41 #include <stdlib.h>
42 #include <limits.h>
43 #include <wchar.h>
44 #include <locale.h>
45 #include <langinfo.h>
46 #include <stropts.h>
47 #include <poll.h>
48 #include <errno.h>
49 #include <stdarg.h>
50 
51 #define	HEAD	0
52 #define	TAIL	1
53 #define	FALSE 0
54 #define	TRUE 1
55 #define	MAXSBUF 255
56 #define	MAXIBUF 512
57 #define	MAXINSERTS 5
58 #define	BUFSIZE LINE_MAX
59 #define	MAXARGS 255
60 #define	INSPAT_STR	"{}"	/* default replstr string for -[Ii]	*/
61 #define	FORK_RETRY	5
62 
63 #define	QBUF_STARTLEN 255  /* start size of growable string buffer */
64 #define	QBUF_INC 100	   /* how much to grow a growable string by */
65 
66 static wctype_t	blank;
67 static char	*arglist[MAXARGS+1];
68 static char	argbuf[BUFSIZE+1];
69 static char	*next = argbuf;
70 static char	*lastarg = "";
71 static char	**ARGV = arglist;
72 static char	*LEOF = "_";
73 static char	*INSPAT = INSPAT_STR;
74 static char	ins_buf[MAXIBUF];
75 static char	*p_ibuf;
76 
77 static struct inserts {
78 	char	**p_ARGV;	/* where to put newarg ptr in arg list */
79 	char	*p_skel;	/* ptr to arg template */
80 } saveargv[MAXINSERTS];
81 
82 static off_t	file_offset = 0;
83 static int	PROMPT = -1;
84 static int	BUFLIM = BUFSIZE;
85 static int	N_ARGS = 0;
86 static int	N_args = 0;
87 static int	N_lines = 0;
88 static int	DASHX = FALSE;
89 static int	MORE = TRUE;
90 static int	PER_LINE = FALSE;
91 static int	ERR = FALSE;
92 static int	OK = TRUE;
93 static int	LEGAL = FALSE;
94 static int	TRACE = FALSE;
95 static int	INSERT = FALSE;
96 static int	linesize = 0;
97 static int	ibufsize = 0;
98 static char	*yesstr;	/* the string contains int'l for "yes"	*/
99 static int	exitstat = 0;	/* our exit status			*/
100 static int	mac;		/* modified argc, after parsing		*/
101 static char	**mav;		/* modified argv, after parsing		*/
102 static int	n_inserts;	/* # of insertions.			*/
103 static int	inquote = 0;	/* processing a quoted string		*/
104 
105 /*
106  * the pio structure is used to save any pending input before the
107  * user replies to a prompt. the pending input is saved here,
108  * for the appropriate processing later.
109  */
110 typedef struct pio {
111 	struct pio *next;	/* next in stack			*/
112 	char *start;		/* starting addr of the buffer		*/
113 	char *cur;		/* ptr to current char in buf		*/
114 	size_t length;		/* number of bytes remaining		*/
115 } pio;
116 
117 static pio *queued_data = NULL;
118 
119 /* our usage message:							*/
120 #define	USAGEMSG "Usage: xargs: [-t] [-p] [-e[eofstr]] [-E eofstr] "\
121 	"[-I replstr] [-i[replstr]] [-L #] [-l[#]] [-n # [-x]] [-s size] "\
122 	"[cmd [args ...]]\n"
123 
124 static int	echoargs();
125 static int	getchr(void);
126 static wchar_t	getwchr(void);
127 static void	ungetwchr(wchar_t);
128 static int	lcall(char *sub, char **subargs);
129 static int	xindex(char *as1, char *as2);
130 static void	addibuf(struct inserts *p);
131 static void	ermsg(char *messages, ...);
132 static char	*addarg(char *arg);
133 static char	*checklen(char *arg);
134 static size_t   store_wchr(char **, size_t *, size_t, wchar_t);
135 static char	*getarg();
136 static char	*insert(char *pattern, char *subst);
137 static void	usage();
138 static void	parseargs();
139 static void	saveinput();
140 
141 
142 int
143 main(int argc, char **argv)
144 {
145 	int	j;
146 	struct inserts *psave;
147 	int c;
148 	int	initsize;
149 	char	*cmdname, *initbuf, **initlist;
150 
151 
152 	/* initialization */
153 
154 	blank = wctype("blank");
155 	n_inserts = 0;
156 	psave = saveargv;
157 	(void) setlocale(LC_ALL, "");
158 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D 		*/
159 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't 		*/
160 #endif
161 	(void) textdomain(TEXT_DOMAIN);
162 
163 	/*
164 	 * now we get the appropriate "yes" string for our locale.
165 	 * since this may be a multibyte character, we store the
166 	 * string which is returned. later on, when we're looking for
167 	 * a "y" in response to our prompt, we'll use the first
168 	 * multibyte character of yesstr as a comparision.
169 	 */
170 	initbuf = nl_langinfo(YESSTR);	/* initbuf is a tmp placeholder here */
171 	if ((yesstr = malloc(strlen(initbuf) + 1)) == NULL) {
172 		perror(gettext("xargs: Memory allocation failure"));
173 		exit(1);
174 	}
175 	(void) strcpy(yesstr, initbuf);
176 
177 	parseargs(argc, argv);
178 
179 	/* handling all of xargs arguments:				*/
180 	while ((c = getopt(mac, mav, "tpe:E:I:i:L:l:n:s:x")) != EOF) {
181 		switch (c) {
182 		case 't':	/* -t: turn trace mode on		*/
183 			TRACE = TRUE;
184 			break;
185 
186 		case 'p':	/* -p: turn on prompt mode.		*/
187 			if ((PROMPT = open("/dev/tty", O_RDONLY)) == -1) {
188 				perror(gettext("can't read from tty for -p"));
189 			} else {
190 				TRACE = TRUE;
191 			}
192 			break;
193 
194 		case 'e':
195 			/*
196 			 * -e[eofstr]: set/disable end-of-file.
197 			 * N.B. that an argument *isn't* required here; but
198 			 * parseargs forced an argument if not was given.  The
199 			 * forced argument is the default...
200 			 */
201 			LEOF = optarg; /* can be empty */
202 			break;
203 
204 		case 'E':
205 			/*
206 			 * -E eofstr: change end-of-file string.
207 			 * eofstr *is* required here:
208 			 */
209 			LEOF = optarg;
210 #ifdef XPG6
211 			if (LEOF == NULL) {
212 #else
213 			if ((LEOF == NULL) || (*LEOF == NULL)) {
214 #endif
215 				ermsg(gettext("Must specify subargment to -E "
216 					" flag\n"));
217 			}
218 			break;
219 
220 		case 'I':
221 			/* -I replstr: Insert mode. replstr *is* required. */
222 			INSERT = PER_LINE = LEGAL = TRUE;
223 			N_ARGS = 0;
224 			if (*optarg) {
225 				INSPAT = optarg;
226 			} else {
227 				ermsg(gettext("Must specify subargment "
228 				    "for -I\n"));
229 			}
230 			break;
231 
232 		case 'i':
233 			/*
234 			 * -i [replstr]: insert mode, with *optional* replstr.
235 			 * N.B. that an argument *isn't* required here; if
236 			 * it's not given, then the string INSPAT_STR will
237 			 * be assumed.
238 			 *
239 			 * Since getopts(3C) doesn't handle the case of an
240 			 * optional variable argument at all, we have to
241 			 * parse this by hand:
242 			 */
243 
244 			INSERT = PER_LINE = LEGAL = TRUE;
245 			N_ARGS = 0;
246 			if (optarg[0] != NULL) {
247 				INSPAT = optarg;
248 			} else {
249 				/*
250 				 * here, there is no next argument. so
251 				 * we reset INSPAT to the INSPAT_STR.
252 				 * we *have* to do this, as -i/I may have
253 				 * been given previously, and XCU4 requires
254 				 * that only "the last one specified takes
255 				 * effect".
256 				 */
257 				INSPAT = INSPAT_STR;
258 			}
259 			break;
260 
261 		case 'L':
262 			/*
263 			 * -L number: # of times cmd is executed
264 			 * number *is* required here:
265 			 */
266 			PER_LINE = TRUE;
267 			N_ARGS = 0;
268 			INSERT = FALSE;
269 			if (optarg && (PER_LINE = atoi(optarg)) <= 0) {
270 				ermsg(gettext("#lines must be positive "
271 				    "int: %s\n"), optarg);
272 			}
273 			break;
274 
275 		case 'l':
276 			/*
277 			 * -l [number]: # of times cmd is executed
278 			 * N.B. that an argument *isn't* required here; if
279 			 * it's not given, then 1 is assumed.
280 			 *
281 			 * parseargs handles the optional arg processing.
282 			 */
283 
284 			PER_LINE = LEGAL = TRUE;  /* initialization	*/
285 			N_ARGS = 0;
286 			INSERT = FALSE;
287 
288 			if (optarg[0] != NULL) {
289 				if ((PER_LINE = atoi(optarg)) <= 0)
290 					PER_LINE = 1;
291 			}
292 			break;
293 
294 		case 'n':	/* -n number: # stdin args		*/
295 			/*
296 			 * -n number: # stdin args.
297 			 * number *is* required here:
298 			 */
299 			if ((N_ARGS = atoi(optarg)) <= 0) {
300 				ermsg(gettext("#args must be positive "
301 				    "int: %s\n"), optarg);
302 			} else {
303 				LEGAL = DASHX || N_ARGS == 1;
304 				INSERT = PER_LINE = FALSE;
305 			}
306 			break;
307 
308 		case 's':	/* -s size: set max size of each arg list */
309 			BUFLIM = atoi(optarg);
310 			if (BUFLIM > BUFSIZE || BUFLIM <= 0) {
311 				ermsg(gettext("0 < max-cmd-line-size <= %d: "
312 				    "%s\n"), BUFSIZE, optarg);
313 			}
314 			break;
315 
316 		case 'x':	/* -x: terminate if args > size limit	*/
317 			DASHX = LEGAL = TRUE;
318 			break;
319 
320 		default:
321 			/*
322 			 * bad argument. complain and get ready to die.
323 			 */
324 			ERR = TRUE;
325 			usage();
326 
327 			exit(2);
328 			break;
329 		}
330 	}
331 
332 	/*
333 	 * if anything called ermsg(), something screwed up, so
334 	 * we exit early.
335 	 */
336 	if (OK == FALSE) {
337 		ERR = TRUE;
338 		usage();
339 		exit(2);
340 	}
341 
342 	/*
343 	 * we're finished handling xargs's options, so now pick up
344 	 * the command name (if any), and it's options.
345 	 */
346 
347 
348 	mac -= optind;	/* dec arg count by what we've processed 	*/
349 	mav += optind;	/* inc to current mav				*/
350 
351 	if (mac <= 0) {	/* if there're no more args to process,	*/
352 		cmdname = "/usr/bin/echo";	/* our default command	*/
353 		*ARGV++ = addarg(cmdname);	/* use the default cmd.	*/
354 	} else {	/* otherwise keep parsing rest of the string.	*/
355 		/*
356 		 * note that we can't use getopts(3C), and *must* parse
357 		 * this by hand, as we don't know apriori what options the
358 		 * command will take.
359 		 */
360 		cmdname = *mav;	/* get the command name	*/
361 
362 
363 		/* pick up the remaining args from the command line:	*/
364 		while ((OK == TRUE) && (mac-- > 0)) {
365 			/*
366 			 * while we haven't crapped out, and there's
367 			 * work to do:
368 			 */
369 			if (INSERT && ! ERR) {
370 				if (xindex(*mav, INSPAT) != -1) {
371 					if (++n_inserts > MAXINSERTS) {
372 						ermsg(gettext("too many args "
373 						    "with %s\n"), INSPAT);
374 						ERR = TRUE;
375 					}
376 					psave->p_ARGV = ARGV;
377 					(psave++)->p_skel = *mav;
378 				}
379 			}
380 			*ARGV++ = addarg(*mav++);
381 		}
382 	}
383 
384 	/* pick up args from standard input */
385 
386 	initbuf = next;
387 	initlist = ARGV;
388 	initsize = linesize;
389 
390 	while (OK && MORE) {
391 		N_args = 0;
392 		N_lines = 0;
393 		next = initbuf;
394 		ARGV = initlist;
395 		linesize = initsize;
396 		if (*lastarg) {
397 			*ARGV++ = addarg(lastarg);
398 			lastarg = "";
399 		}
400 
401 		while (((ARGV - arglist) < MAXARGS) &&
402 		    ((*ARGV++ = getarg()) != NULL) && OK)
403 			;
404 
405 		/* insert arg if requested */
406 
407 		if (!ERR && INSERT) {
408 			if ((!MORE) && (N_lines == 0)) {
409 				exit(exitstat);
410 			}
411 					/* no more input lines */
412 			p_ibuf = ins_buf;
413 			ARGV--;
414 			j = ibufsize = 0;
415 			for (psave = saveargv; ++j <= n_inserts; ++psave) {
416 				addibuf(psave);
417 				if (ERR)
418 					break;
419 			}
420 		}
421 		*ARGV = 0;
422 
423 		if (n_inserts > 0) {
424 			int t_ninserts;
425 
426 			/*
427 			 * if we've done any insertions, re-calculate the
428 			 * linesize. bomb out if we've exceeded our length.
429 			 */
430 			t_ninserts = n_inserts;
431 			n_inserts = 0;	/* inserts have been done 	*/
432 			linesize = 0;	/* recalculate this		*/
433 
434 			/* for each current argument in the list:	*/
435 			for (ARGV = arglist; *ARGV != NULL; ARGV++) {
436 				/* recalculate everything.		*/
437 				if (checklen(*ARGV) != 0) {
438 					if (N_ARGS && (N_args >= N_ARGS)) {
439 						N_lines = N_args = 0;
440 						OK = FALSE;
441 						ERR = TRUE;
442 					}
443 				}
444 			}
445 			n_inserts = t_ninserts;
446 		}
447 
448 		/* exec command */
449 
450 		if (!ERR) {
451 			if (!MORE &&
452 			    (PER_LINE && N_lines == 0 || N_ARGS && N_args == 0))
453 				exit(exitstat);
454 			OK = TRUE;
455 			j = TRACE ? echoargs() : TRUE;
456 			if (j) {
457 				/*
458 				 * for xcu4, all invocations of cmdname must
459 				 * return 0, in order for us to return 0.
460 				 * so if we have a non-zero status here,
461 				 * quit immediately.
462 				 */
463 				if ((exitstat |= lcall(cmdname, arglist)) == 0)
464 					continue;
465 			}
466 		}
467 	}
468 
469 	(void) lseek(0, file_offset, SEEK_SET);
470 	if (OK) {
471 		return (exitstat);
472 	} else {
473 		/*
474 		 * if exitstat was set, to match XCU4 complience,
475 		 * return that value, otherwise, return 1.
476 		 */
477 		return (exitstat ? exitstat : 1);
478 	}
479 }
480 
481 static void
482 queue(char *buffer, int len, int where)
483 {
484 	pio *new, *element;
485 
486 	if ((new = malloc(sizeof (pio))) == NULL) {
487 		perror(gettext("xargs: Memory allocation failure"));
488 		exit(1);
489 	}
490 	new->cur = new->start = buffer;
491 	new->length = len;
492 
493 	if (where == TAIL) {
494 		new->next = NULL;
495 		if (queued_data == NULL) {
496 			queued_data = new;
497 		} else {
498 			element = queued_data;
499 			while (element->next != NULL) {
500 				element = element->next;
501 			}
502 			element->next = new;
503 		}
504 	} else {
505 		file_offset -= len;
506 		new->next = queued_data;
507 		queued_data = new;
508 	}
509 }
510 
511 static char *
512 checklen(char *arg)
513 {
514 	int	oklen;
515 
516 	oklen = TRUE;
517 	linesize += strlen(arg) + 1;
518 	if (linesize >= BUFLIM) {
519 		/*
520 		 * we skip this if there're inserts. we'll handle the
521 		 * argument counting after all the insertions have
522 		 * been done.
523 		 */
524 		if (n_inserts == 0) {
525 			lastarg = arg;
526 			oklen = OK = FALSE;
527 
528 			if (LEGAL) {
529 				ERR = TRUE;
530 				ermsg(gettext("arg list too long\n"));
531 			} else if (N_args > 1) {
532 				N_args = 1;
533 			} else {
534 				ermsg(gettext("a single arg was greater than "
535 				    "the max arglist size of %d characters\n"),
536 				    BUFLIM);
537 				ERR = TRUE;
538 			}
539 		}
540 	}
541 	return (oklen ? arg : 0);
542 }
543 
544 static char *
545 addarg(char *arg)
546 {
547 	if (checklen(arg) != 0) {
548 		(void) strcpy(next, arg);
549 		arg = next;
550 		next += strlen(arg) + 1;
551 		return (arg);
552 	}
553 	return ((char *)0);
554 }
555 
556 /*
557  * store_wchr() : append a wchar_t to a char buffer, resize buffer if required.
558  *
559  *     Given a pointer to the beginning of a string buffer, the length of the
560  *     buffer and an offset indicating the next place to write within that
561  *     buffer, the passed wchar_t will be appended to the buffer if there is
562  *     enough space. If there is not enough space, an attempt to reallocate the
563  *     buffer will be made and if successful the passed pointer and size will be
564  *     updated to describe the reallocated block. Returns the new value for
565  *     'offset' (it will be incremented by the number of bytes written).
566  */
567 static size_t
568 store_wchr(char **buffer, size_t *buflen, size_t offset, wchar_t c)
569 {
570 	int bytes;
571 
572 	/*
573 	 * Make sure that there is enough room in the buffer to store the
574 	 * maximum length of c.
575 	 */
576 	if ((offset + MB_CUR_MAX) > *buflen) {
577 		/*
578 		 * Not enough room so attempt to reallocate. Add 'MB_CUR_MAX' to
579 		 * buffer length to ensure that there is always enough room to
580 		 * store 'c' if realloc succeeds, no matter what QBUF_INC is
581 		 * defined as.
582 		 */
583 		*buflen += (QBUF_INC + MB_CUR_MAX);
584 		if ((*buffer = realloc(*buffer, *buflen)) == NULL) {
585 			perror(gettext("xargs: Memory allocation failure"));
586 			exit(1);
587 		}
588 	}
589 	/* store bytes from wchar into buffer */
590 	bytes = wctomb(*buffer + offset, c);
591 	if (bytes == -1) {
592 		/* char was invalid */
593 		bytes = 1;
594 		*(*buffer + offset) = (char)c;
595 	}
596 
597 	/* return new value for offset */
598 	return (offset + bytes);
599 }
600 
601 static char *
602 getarg()
603 {
604 	int	bytes;
605 	wchar_t	c;
606 	char	*arg;
607 	char	*retarg, *requeue_buf;
608 	size_t  requeue_offset = 0, requeue_len;
609 	char	mbc[MB_LEN_MAX];
610 
611 	while (iswspace(c = getwchr()) || c == '\n')
612 		;
613 
614 	if (c == '\0') {
615 		MORE = FALSE;
616 		return (0);
617 	}
618 
619 	/*
620 	 * While we are reading in an argument, it is possible that we will
621 	 * reach the maximum length of the overflow buffer and we'll have to
622 	 * requeue what we have read so far. To handle this we allocate an
623 	 * initial buffer here which will keep an unprocessed copy of the data
624 	 * that we read in (this buffer will grow as required).
625 	 */
626 	requeue_len = (size_t)QBUF_STARTLEN;
627 	if ((requeue_buf = (char *)malloc(requeue_len)) == NULL) {
628 		perror(gettext("xargs: Memory allocation failure"));
629 		exit(1);
630 	}
631 
632 	for (arg = next; ; c = getwchr()) {
633 		bytes = wctomb(mbc, c);
634 
635 		/*
636 		 * Store the char that we have read before processing it in case
637 		 * the current argument needs to be requeued.
638 		 */
639 		requeue_offset = store_wchr(&requeue_buf, &requeue_len,
640 		    requeue_offset, c);
641 
642 		/* Check for overflow the input buffer */
643 		if ((next + ((bytes == -1) ? 1 : bytes)) >= &argbuf[BUFLIM]) {
644 			/*
645 			 * It's only an error if there are no Args in buffer
646 			 * already.
647 			 */
648 			if ((N_ARGS || PER_LINE) && LEGAL) {
649 				ERR = TRUE;
650 				ermsg(gettext("Argument list too long\n"));
651 				free(requeue_buf);
652 				return (0);
653 			} else if (N_args == 0) {
654 				lastarg = "";
655 				ERR = TRUE;
656 				ermsg(gettext("A single arg was greater than "
657 				    "the max arglist size of %d characters\n"),
658 				    BUFSIZE);
659 				free(requeue_buf);
660 				return (0);
661 			}
662 			/*
663 			 * Otherwise we put back the current argument
664 			 * and use what we have collected so far...
665 			 */
666 			queue(requeue_buf, requeue_offset, HEAD);
667 			/* reset inquote because we have requeued the quotes */
668 			inquote = 0;
669 			return (NULL);
670 		}
671 
672 
673 		if (iswctype(c, blank) && inquote == 0) {
674 			if (INSERT) {
675 				if (bytes == -1) {
676 					*next++ = (char)c;
677 				} else {
678 					(void) wctomb(next, c);
679 					next += bytes;
680 				}
681 				continue;
682 			}
683 
684 			/* skip over trailing whitespace till next arg */
685 			while (iswctype((c = getwchr()), blank) &&
686 			    (c != '\n') && (c != '\0'))
687 				;
688 
689 			/*
690 			 * if there was space till end of line then the last
691 			 * character was really a newline...
692 			 */
693 			if (c == L'\n' || c == L'\0') {
694 				ungetwchr(L'\n');
695 			} else {
696 				/* later code needs to know this was a space */
697 				ungetwchr(c);
698 				c = L' ';
699 			}
700 			goto end_arg;
701 		}
702 		switch (c) {
703 		case L'\0':
704 		case L'\n':
705 			if (inquote) {
706 				*next++ = '\0';
707 				ermsg(gettext("Missing quote: %s\n"), arg);
708 				ERR = TRUE;
709 				free(requeue_buf);
710 				return (0);
711 			}
712 
713 			N_lines++;
714 end_arg:		*next++ = '\0';
715 			/* we finished without requeuing so free requeue_buf */
716 			free(requeue_buf);
717 			if (strcmp(arg, LEOF) == 0 || (c == '\0' &&
718 			    strlen(arg) == 0)) {
719 				MORE = FALSE;
720 				/* absorb the rest of the line */
721 				if ((c != '\n') && (c != '\0'))
722 					while (c = getwchr())
723 						if ((c == '\n') || (c == '\0'))
724 							break;
725 				return (0);
726 			} else {
727 				++N_args;
728 				if (retarg = checklen(arg)) {
729 					if ((PER_LINE &&
730 					    N_lines >= PER_LINE &&
731 					    (c == '\0' || c == '\n')) ||
732 					    (N_ARGS && N_args >= N_ARGS)) {
733 						N_lines = N_args = 0;
734 						lastarg = "";
735 						OK = FALSE;
736 					}
737 				}
738 				return (retarg);
739 			}
740 
741 		case '"':
742 			if (inquote == 1)	/* in single quoted string */
743 				goto is_default;
744 			if (inquote == 2)	/* terminating double quote */
745 				inquote = 0;
746 			else			/* starting quoted string */
747 				inquote = 2;
748 			break;
749 
750 		case '\'':
751 			if (inquote == 2)	/* in double quoted string */
752 				goto is_default;
753 			if (inquote == 1)	/* terminating single quote */
754 				inquote = 0;
755 			else			/* starting quoted string */
756 				inquote = 1;
757 			break;
758 
759 		case L'\\':
760 			c = getwchr();
761 			/* store quoted char for potential requeueing */
762 			requeue_offset = store_wchr(&requeue_buf, &requeue_len,
763 			    requeue_offset, c);
764 
765 		default:
766 is_default:		if (bytes == -1) {
767 				*next++ = (char)c;
768 			} else {
769 				(void) wctomb(next, c);
770 				next += bytes;
771 			}
772 			break;
773 		}
774 	}
775 }
776 
777 
778 /*
779  * ermsg():	print out an error message, and indicate failure globally.
780  *
781  *	Assumes that message has already been gettext()'d. It would be
782  *	nice if we could just do the gettext() here, but we can't, since
783  *	since xgettext(1M) wouldn't be able to pick up our error message.
784  */
785 /* PRINTFLIKE1 */
786 static void
787 ermsg(char *messages, ...)
788 {
789 	va_list	ap;
790 
791 	va_start(ap, messages);
792 
793 	(void) fprintf(stderr, "xargs: ");
794 	(void) vfprintf(stderr, messages, ap);
795 
796 	va_end(ap);
797 	OK = FALSE;
798 }
799 
800 
801 /*
802  * Function: int rpmatch(char *)
803  *
804  * Description:
805  *
806  *	Internationalized get yes / no answer.
807  *
808  * Inputs:
809  *	s	-> Pointer to answer to compare against.
810  *
811  * Returns:
812  *	TRUE	-> Answer was affirmative
813  *	FALSE	-> Answer was negative
814  */
815 
816 static int
817 rpmatch(char *s)
818 {
819 	static char	*default_yesexpr = "^[Yy].*";
820 	static char	*compiled_yesexpr = (char *)NULL;
821 
822 	/* Execute once to initialize */
823 	if (compiled_yesexpr == (char *)NULL) {
824 		char	*yesexpr;
825 
826 		/* get yes expression according to current locale */
827 		yesexpr = nl_langinfo(YESEXPR);
828 		/*
829 		 * If the was no expression or if there is a compile error
830 		 * use default yes expression.  Anchor
831 		 */
832 		if ((yesexpr == (char *)NULL) || (*yesexpr == (char)NULL) ||
833 		    ((compiled_yesexpr =
834 		    regcmp(yesexpr, 0)) == NULL))
835 			compiled_yesexpr = regcmp(default_yesexpr, 0);
836 	}
837 
838 	/* match yesexpr */
839 	if (regex(compiled_yesexpr, s) == NULL) {
840 		return (FALSE);
841 	}
842 	return (TRUE);
843 }
844 
845 static int
846 echoargs()
847 {
848 	char	**anarg;
849 	char	**tanarg;	/* tmp ptr			*/
850 	int		i;
851 	char		reply[LINE_MAX];
852 
853 	tanarg = anarg = arglist-1;
854 
855 	/*
856 	 * write out each argument, separated by a space. the tanarg
857 	 * nonsense is for xcu4 testsuite compliance - so that an
858 	 * extra space isn't echoed after the last argument.
859 	 */
860 	while (*++anarg) {		/* while there's an argument	*/
861 		++tanarg;		/* follow anarg			*/
862 		(void) write(2, *anarg, strlen(*anarg));
863 
864 		if (*++tanarg) {	/* if there's another argument:	*/
865 			(void) write(2, " ", 1); /* add a space		*/
866 			--tanarg;	/* reset back to anarg		*/
867 		}
868 	}
869 	if (PROMPT == -1) {
870 		(void) write(2, "\n", 1);
871 		return (TRUE);
872 	}
873 
874 	/*
875 	 * at this point, there may be unexpected input pending on stdin,
876 	 * if one has used the -n flag. this presents a problem, because
877 	 * if we simply do a read(), we'll get the extra input, instead
878 	 * of our desired y/n input. so, we see if there's any extra
879 	 * input, and if there is, then we will store it.
880 	 */
881 
882 	saveinput();
883 
884 	(void) write(2, "?...", 4);	/* ask the user for input	*/
885 
886 	for (i = 0; i < LINE_MAX && read(PROMPT, &reply[i], 1) > 0; i++) {
887 		if (reply[i] == '\n') {
888 			if (i == 0)
889 				return (FALSE);
890 			break;
891 		}
892 	}
893 	reply[i] = 0;
894 
895 	/* flush remainder of line if necessary */
896 	if (i == LINE_MAX) {
897 		char	bitbucket;
898 
899 		while ((read(PROMPT, &bitbucket, 1) > 0) && (bitbucket != '\n'))
900 			;
901 	}
902 
903 	/*
904 	 * now we have to figure out whether the user typed an
905 	 * internationalized version of 'y' for yes. note that in some
906 	 * countries, they've gotten used to typing an ASCII 'y'! so
907 	 * even if our int'l version fails, we will check for an ASCII
908 	 * 'y', in order to be backwards compatible.
909 	 */
910 	return (rpmatch(reply));
911 }
912 
913 
914 static char *
915 insert(char *pattern, char *subst)
916 {
917 	static char	buffer[MAXSBUF+1];
918 	int		len, ipatlen;
919 	char	*pat;
920 	char	*bufend;
921 	char	*pbuf;
922 
923 	len = strlen(subst);
924 	ipatlen = strlen(INSPAT) - 1;
925 	pat = pattern - 1;
926 	pbuf = buffer;
927 	bufend = &buffer[MAXSBUF];
928 
929 	while (*++pat) {
930 		if (xindex(pat, INSPAT) == 0) {
931 			if (pbuf + len >= bufend) {
932 				break;
933 			} else {
934 				(void) strcpy(pbuf, subst);
935 				pat += ipatlen;
936 				pbuf += len;
937 			}
938 		} else {
939 			*pbuf++ = *pat;
940 			if (pbuf >= bufend)
941 				break;
942 		}
943 	}
944 
945 	if (!*pat) {
946 		*pbuf = '\0';
947 		return (buffer);
948 	} else {
949 		ermsg(gettext("Maximum argument size with insertion via %s's "
950 		    "exceeded\n"), INSPAT);
951 		ERR = TRUE;
952 		return (0);
953 	}
954 }
955 
956 
957 static void
958 addibuf(struct inserts	*p)
959 {
960 	char	*newarg, *skel, *sub;
961 	int		l;
962 
963 	skel = p->p_skel;
964 	sub = *ARGV;
965 	linesize -= strlen(skel) + 1;
966 	newarg = insert(skel, sub);
967 	if (ERR)
968 	    return;
969 
970 	if (checklen(newarg)) {
971 		if ((ibufsize += (l = strlen(newarg) + 1)) > MAXIBUF) {
972 			ermsg(gettext("Insert buffer overflow\n"));
973 			ERR = TRUE;
974 		}
975 		(void) strcpy(p_ibuf, newarg);
976 		*(p->p_ARGV) = p_ibuf;
977 		p_ibuf += l;
978 	}
979 }
980 
981 
982 /*
983  * getchr():	get the next character.
984  * description:
985  *	we get the next character from pio.structure, if there's a character
986  *	to get. this may happen when we've had to flush stdin=/dev/tty,
987  *	but still wanted to preserve the characters for later processing.
988  *
989  *	otherwise we just get the character from stdin.
990  */
991 static int
992 getchr(void)
993 {
994 	char	c;
995 
996 	do {
997 		if (queued_data == NULL) {
998 			char	*buffer;
999 			int	len;
1000 
1001 			if ((buffer = malloc(BUFSIZE)) == NULL) {
1002 				perror(gettext(
1003 				    "xargs: Memory allocation failure"));
1004 				exit(1);
1005 			}
1006 
1007 			if ((len = read(0, buffer, BUFSIZE)) == 0)
1008 				return (0);
1009 			if (len == -1) {
1010 				perror(gettext("xargs: Read failure"));
1011 				exit(1);
1012 			}
1013 
1014 			queue(buffer, len, TAIL);
1015 		}
1016 
1017 		file_offset++;
1018 		c = *queued_data->cur++;	 /* get the next character */
1019 		if (--queued_data->length == 0) { /* at the end of buffer? */
1020 			pio	*nxt = queued_data->next;
1021 
1022 			free(queued_data->start);
1023 			free(queued_data);
1024 			queued_data = nxt;
1025 		}
1026 	} while (c == '\0');
1027 	return (c);
1028 }
1029 
1030 
1031 static wchar_t
1032 getwchr(void)
1033 {
1034 	int		i;
1035 	wchar_t		wch;
1036 	unsigned char	buffer[MB_LEN_MAX + 1];
1037 
1038 	for (i = 0; i < (int)MB_CUR_MAX; ) {
1039 		if ((buffer[i++] = getchr()) == NULL) {
1040 			/* We have reached  EOF */
1041 			if (i == 1) {
1042 				/* TRUE EOF has been reached */
1043 				return (NULL);
1044 			}
1045 			/*
1046 			 * We have some characters in our buffer still so it
1047 			 * must be an invalid character right before EOF.
1048 			 */
1049 			break;
1050 		}
1051 
1052 		/* If this succeeds then we are done */
1053 		if (mbtowc(&wch, (char *)buffer, i) != -1)
1054 			return (wch);
1055 	}
1056 
1057 	/*
1058 	 * We have now encountered an illegal character sequence.
1059 	 * There is nothing much we can do at this point but
1060 	 * return an error.  If we attempt to recover we may in fact
1061 	 * return garbage as arguments, from the customer's point
1062 	 * of view.  After all what if they are feeding us a file
1063 	 * generated in another locale?
1064 	 */
1065 	errno = EILSEQ;
1066 	perror(gettext("xargs: Corrupt input file"));
1067 	exit(1);
1068 	/* NOTREACHED */
1069 }
1070 
1071 
1072 static void
1073 ungetwchr(wchar_t wch)
1074 {
1075 	char	*buffer;
1076 	int	bytes;
1077 
1078 	if ((buffer = malloc(MB_LEN_MAX)) == NULL) {
1079 		perror(gettext("xargs: Memory allocation failure"));
1080 		exit(1);
1081 	}
1082 	bytes = wctomb(buffer, wch);
1083 	queue(buffer, bytes, HEAD);
1084 }
1085 
1086 
1087 static int
1088 lcall(char *sub, char **subargs)
1089 {
1090 	int retcode, retry = 0;
1091 	pid_t iwait, child;
1092 
1093 	for (; ; ) {
1094 		switch (child = fork()) {
1095 		default:
1096 			while ((iwait = wait(&retcode)) != child &&
1097 			    iwait != (pid_t)-1)
1098 				;
1099 			if (iwait == (pid_t)-1) {
1100 				perror(gettext("xargs: Wait failure"));
1101 				exit(122);
1102 				/* NOTREACHED */
1103 			}
1104 			if (WIFSIGNALED(retcode)) {
1105 				ermsg(gettext("Child killed with signal %d\n"),
1106 				    WTERMSIG(retcode));
1107 				exit(125);
1108 				/* NOTREACHED */
1109 			}
1110 			if ((WEXITSTATUS(retcode) & 0377) == 0377) {
1111 				ermsg(gettext("Command could not continue "
1112 				    "processing data\n"));
1113 				exit(124);
1114 				/* NOTREACHED */
1115 			}
1116 			return (WEXITSTATUS(retcode));
1117 		case 0:
1118 			(void) execvp(sub, subargs);
1119 			perror(gettext("xargs: Could not exec command"));
1120 			if (errno == EACCES)
1121 				exit(126);
1122 			exit(127);
1123 			/* NOTREACHED */
1124 		case -1:
1125 			if (errno != EAGAIN && retry++ < FORK_RETRY) {
1126 				perror(gettext("xargs: Could not fork child"));
1127 				exit(123);
1128 			}
1129 			(void) sleep(1);
1130 		}
1131 	}
1132 }
1133 
1134 
1135 /*
1136  * If `s2' is a substring of `s1' return the offset of the first
1137  * occurrence of `s2' in `s1', else return -1.
1138  */
1139 static int
1140 xindex(char *as1, char *as2)
1141 {
1142 	char	*s1, *s2, c;
1143 	int		offset;
1144 
1145 	s1 = as1;
1146 	s2 = as2;
1147 	c = *s2;
1148 
1149 	while (*s1) {
1150 		if (*s1++ == c) {
1151 			offset = s1 - as1 - 1;
1152 			s2++;
1153 			while ((c = *s2++) == *s1++ && c)
1154 				;
1155 			if (c == 0)
1156 				return (offset);
1157 			s1 = offset + as1 + 1;
1158 			s2 = as2;
1159 			c = *s2;
1160 		}
1161 	}
1162 	return (-1);
1163 }
1164 
1165 
1166 static void
1167 usage()
1168 {
1169 	ermsg(gettext(USAGEMSG));
1170 	OK = FALSE;
1171 }
1172 
1173 
1174 
1175 /*
1176  * parseargs():		modify the args
1177  *	since the -e, -i and -l flags all take optional subarguments,
1178  *	and getopts(3C) is clueless about this nonsense, we change the
1179  *	our local argument count and strings to separate this out,
1180  *	and make it easier to handle via getopts(3c).
1181  *
1182  *	-e	-> "-e ""
1183  *	-e3	-> "-e "3"
1184  *	-Estr	-> "-E "str"
1185  *	-i	-> "-i "{}"
1186  *	-irep	-> "-i "rep"
1187  *	-l	-> "-i "1"
1188  *	-l10	-> "-i "10"
1189  *
1190  *	since the -e, -i and -l flags all take optional subarguments,
1191  */
1192 static void
1193 parseargs(int ac, char **av)
1194 {
1195 	int i;			/* current argument			*/
1196 	int cflag;		/* 0 = not processing cmd arg		*/
1197 
1198 	if ((mav = malloc((ac * 2 + 1) * sizeof (char *))) == NULL) {
1199 		perror(gettext("xargs: Memory allocation failure"));
1200 		exit(1);
1201 	}
1202 
1203 	/* for each argument, see if we need to change things:		*/
1204 	for (i = mac = cflag = 0; (av[i] != NULL) && i < ac; i++, mac++) {
1205 		if ((mav[mac] = strdup(av[i])) == NULL) {
1206 			perror(gettext("xargs: Memory allocation failure"));
1207 			exit(1);
1208 		}
1209 
1210 		/* -- has been found or argument list is fully processes */
1211 		if (cflag)
1212 			continue;
1213 
1214 		/*
1215 		 * if we're doing special processing, and we've got a flag
1216 		 */
1217 		else if ((av[i][0] == '-') && (av[i][1] != NULL)) {
1218 			char	*def;
1219 
1220 			switch (av[i][1]) {
1221 			case	'e':
1222 				def = ""; /* -e with no arg turns off eof */
1223 				goto process_special;
1224 			case	'i':
1225 				def = INSPAT_STR;
1226 				goto process_special;
1227 			case	'l':
1228 				def = "1";
1229 process_special:
1230 				/*
1231 				 * if there's no sub-option, we *must* add
1232 				 * a default one. this is because xargs must
1233 				 * be able to distinguish between a valid
1234 				 * suboption, and a command name.
1235 				 */
1236 				if (av[i][2] == NULL) {
1237 					mav[++mac] = strdup(def);
1238 				} else {
1239 					/* clear out our version: */
1240 					mav[mac][2] = NULL;
1241 					mav[++mac] = strdup(&av[i][2]);
1242 				}
1243 				if (mav[mac] == NULL) {
1244 					perror(gettext("xargs: Memory"
1245 					    " allocation failure"));
1246 					exit(1);
1247 				}
1248 				break;
1249 
1250 			/* flags with required subarguments:		*/
1251 
1252 			/*
1253 			 * there are two separate cases here. either the
1254 			 * flag can have the normal XCU4 handling
1255 			 * (of the form: -X subargument); or it can have
1256 			 * the old solaris 2.[0-4] handling (of the
1257 			 * form: -Xsubargument). in order to maintain
1258 			 * backwards compatibility, we must support the
1259 			 * latter case. we handle the latter possibility
1260 			 * first so both the old solaris way of handling
1261 			 * and the new XCU4 way of handling things are allowed.
1262 			 */
1263 			case	'n':	/* FALLTHROUGH			*/
1264 			case	's':	/* FALLTHROUGH			*/
1265 			case	'E':	/* FALLTHROUGH			*/
1266 			case	'I':	/* FALLTHROUGH			*/
1267 			case	'L':
1268 				/*
1269 				 * if the second character isn't null, then
1270 				 * the user has specified the old syntax.
1271 				 * we move the subargument into our
1272 				 * mod'd argument list.
1273 				 */
1274 				if (av[i][2] != NULL) {
1275 					/* first clean things up:	*/
1276 					mav[mac][2] = NULL;
1277 
1278 					/* now add the separation:	*/
1279 					++mac;	/* inc to next mod'd arg */
1280 					if ((mav[mac] = strdup(&av[i][2])) ==
1281 					    NULL) {
1282 						perror(gettext("xargs: Memory"
1283 						    " allocation failure"));
1284 						exit(1);
1285 					}
1286 					break;
1287 				}
1288 				i++;
1289 				mac++;
1290 #ifdef XPG6
1291 				if (av[i] != NULL) {
1292 					if ((mav[mac] = strdup(av[i]))
1293 						== NULL) {
1294 						perror(gettext("xargs: Memory"
1295 						    " allocation failure"));
1296 						exit(1);
1297 					}
1298 				}
1299 #else
1300 				if (av[i] == NULL) {
1301 					if ((mav[mac++] = strdup("")) == NULL) {
1302 						perror(gettext("xargs: Memory "
1303 						    " allocation failure"));
1304 						exit(1);
1305 					}
1306 					mav[mac] = NULL;
1307 					return;
1308 				}
1309 				if ((mav[mac] = strdup(av[i])) == NULL) {
1310 					perror(gettext("xargs: Memory"
1311 						" allocation failure"));
1312 					exit(1);
1313 				}
1314 
1315 #endif
1316 				break;
1317 
1318 			/* flags */
1319 			case 'p' :
1320 			case 't' :
1321 			case 'x' :
1322 				break;
1323 
1324 			case '-' :
1325 			default:
1326 				/*
1327 				 * here we've hit the cmd argument. so
1328 				 * we'll stop special processing, as the
1329 				 * cmd may have a "-i" etc., argument,
1330 				 * and we don't want to add a "" to it.
1331 				 */
1332 				cflag = 1;
1333 				break;
1334 			}
1335 		} else if (i > 0) {	/* if we're not the 1st arg	*/
1336 			/*
1337 			 * if it's not a flag, then it *must* be the cmd.
1338 			 * set cflag, so we don't mishandle the -[eil] flags.
1339 			 */
1340 			cflag = 1;
1341 		}
1342 	}
1343 
1344 	mav[mac] = NULL;
1345 }
1346 
1347 
1348 /*
1349  * saveinput(): pick up any pending input, so it can be processed later.
1350  *
1351  * description:
1352  *	the purpose of this routine is to allow us to handle the user
1353  *	typing in a 'y' or 'n', when there's existing characters already
1354  *	in stdin. this happens when one gives the "-n" option along with
1355  *	"-p". the problem occurs when the user first types in more arguments
1356  *	than specified by the -n number. echoargs() wants to read stdin
1357  *	in order to get the user's response, but if there's already stuff
1358  *	there, echoargs() won't read the proper character.
1359  *
1360  *	the solution provided by this routine is to pick up all characters
1361  *	(if any), and store them for later processing.
1362  */
1363 
1364 void
1365 saveinput()
1366 {
1367 	char *buffer;		/* ptr to the floating data buffer	*/
1368 	struct strpeek speek;	/* to see what's on the queue		*/
1369 	struct strpeek *ps;
1370 
1371 	/* if we're not in -p mode, skip				*/
1372 	if (PROMPT == -1) {
1373 		return;
1374 	}
1375 
1376 
1377 	/* now see if there's any activity pending:			*/
1378 	ps = &speek;
1379 	ps->ctlbuf.maxlen = 0;
1380 	ps->ctlbuf.len = 0;
1381 	ps->ctlbuf.buf = NULL;
1382 	ps->flags = 0;
1383 	ps->databuf.maxlen = MAX_INPUT;
1384 	ps->databuf.len = 0;
1385 	if ((buffer = malloc((size_t)MAX_INPUT)) == NULL) {
1386 		perror(gettext("xargs: Memory allocation failure"));
1387 		exit(1);
1388 	}
1389 	ps->databuf.buf = (char *)buffer;
1390 
1391 	if (ioctl(PROMPT, I_PEEK, ps) == -1) {
1392 		perror(gettext("xargs: I_PEEK failure"));
1393 		exit(1);
1394 	}
1395 
1396 	if (ps->databuf.len > 0) {
1397 		int	len;
1398 
1399 		if ((len = read(PROMPT, buffer, ps->databuf.len)) == -1) {
1400 			perror(gettext("xargs: read failure"));
1401 			exit(1);
1402 		}
1403 		queue(buffer, len, TAIL);
1404 	}
1405 }
1406