xref: /titanic_41/usr/src/cmd/xargs/xargs.c (revision 2b24ab6b3865caeede9eeb9db6b83e1d89dcd1ea)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <stdio.h>
33 #include <sys/types.h>
34 #include <sys/wait.h>
35 #include <unistd.h>
36 #include <fcntl.h>
37 #include <string.h>
38 #include <stdarg.h>
39 #include <stdlib.h>
40 #include <limits.h>
41 #include <wchar.h>
42 #include <locale.h>
43 #include <langinfo.h>
44 #include <stropts.h>
45 #include <poll.h>
46 #include <errno.h>
47 #include <stdarg.h>
48 #include "getresponse.h"
49 
50 #define	HEAD	0
51 #define	TAIL	1
52 #define	FALSE 0
53 #define	TRUE 1
54 #define	MAXSBUF 255
55 #define	MAXIBUF 512
56 #define	MAXINSERTS 5
57 #define	BUFSIZE LINE_MAX
58 #define	MAXARGS 255
59 #define	INSPAT_STR	"{}"	/* default replstr string for -[Ii]	*/
60 #define	FORK_RETRY	5
61 
62 #define	QBUF_STARTLEN 255  /* start size of growable string buffer */
63 #define	QBUF_INC 100	   /* how much to grow a growable string by */
64 
65 static wctype_t	blank;
66 static char	*arglist[MAXARGS+1];
67 static char	argbuf[BUFSIZE+1];
68 static char	*next = argbuf;
69 static char	*lastarg = "";
70 static char	**ARGV = arglist;
71 static char	*LEOF = "_";
72 static char	*INSPAT = INSPAT_STR;
73 static char	ins_buf[MAXIBUF];
74 static char	*p_ibuf;
75 
76 static struct inserts {
77 	char	**p_ARGV;	/* where to put newarg ptr in arg list */
78 	char	*p_skel;	/* ptr to arg template */
79 } saveargv[MAXINSERTS];
80 
81 static off_t	file_offset = 0;
82 static int	PROMPT = -1;
83 static int	BUFLIM = BUFSIZE;
84 static int	N_ARGS = 0;
85 static int	N_args = 0;
86 static int	N_lines = 0;
87 static int	DASHX = FALSE;
88 static int	MORE = TRUE;
89 static int	PER_LINE = FALSE;
90 static int	ERR = FALSE;
91 static int	OK = TRUE;
92 static int	LEGAL = FALSE;
93 static int	TRACE = FALSE;
94 static int	INSERT = FALSE;
95 static int	linesize = 0;
96 static int	ibufsize = 0;
97 static int	exitstat = 0;	/* our exit status			*/
98 static int	mac;		/* modified argc, after parsing		*/
99 static char	**mav;		/* modified argv, after parsing		*/
100 static int	n_inserts;	/* # of insertions.			*/
101 static int	inquote = 0;	/* processing a quoted string		*/
102 static int	save_index = 0;
103 
104 /*
105  * the pio structure is used to save any pending input before the
106  * user replies to a prompt. the pending input is saved here,
107  * for the appropriate processing later.
108  */
109 typedef struct pio {
110 	struct pio *next;	/* next in stack			*/
111 	char *start;		/* starting addr of the buffer		*/
112 	char *cur;		/* ptr to current char in buf		*/
113 	size_t length;		/* number of bytes remaining		*/
114 } pio;
115 
116 static pio *queued_data = NULL;
117 
118 /* our usage message:							*/
119 #define	USAGEMSG "Usage: xargs: [-t] [-p] [-e[eofstr]] [-E eofstr] "\
120 	"[-I replstr] [-i[replstr]] [-L #] [-l[#]] [-n # [-x]] [-s size] "\
121 	"[cmd [args ...]]\n"
122 
123 static int	echoargs();
124 static int	getchr(void);
125 static wchar_t	getwchr(void);
126 static void	ungetwchr(wchar_t);
127 static int	lcall(char *sub, char **subargs);
128 static int	xindex(char *as1, char *as2);
129 static void	addibuf(struct inserts *p);
130 static void	ermsg(char *messages, ...);
131 static char	*addarg(char *arg);
132 static char	*checklen(char *arg);
133 static size_t   store_wchr(char **, size_t *, size_t, wchar_t);
134 static char	*getarg();
135 static char	*insert(char *pattern, char *subst);
136 static void	usage();
137 static void	parseargs();
138 static void	saveinput();
139 
140 int
141 main(int argc, char **argv)
142 {
143 	int	j;
144 	struct inserts *psave;
145 	int c;
146 	int	initsize;
147 	char	*cmdname, *initbuf, **initlist;
148 
149 
150 	/* initialization */
151 	blank = wctype("blank");
152 	n_inserts = 0;
153 	psave = saveargv;
154 	(void) setlocale(LC_ALL, "");
155 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D 		*/
156 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't 		*/
157 #endif
158 	(void) textdomain(TEXT_DOMAIN);
159 	if (init_yes() < 0) {
160 		ermsg(gettext(ERR_MSG_INIT_YES), strerror(errno));
161 		exit(1);
162 	}
163 
164 	parseargs(argc, argv);
165 
166 	/* handling all of xargs arguments:				*/
167 	while ((c = getopt(mac, mav, "tpe:E:I:i:L:l:n:s:x")) != EOF) {
168 		switch (c) {
169 		case 't':	/* -t: turn trace mode on		*/
170 			TRACE = TRUE;
171 			break;
172 
173 		case 'p':	/* -p: turn on prompt mode.		*/
174 			if ((PROMPT = open("/dev/tty", O_RDONLY)) == -1) {
175 				perror(gettext("can't read from tty for -p"));
176 			} else {
177 				TRACE = TRUE;
178 			}
179 			break;
180 
181 		case 'e':
182 			/*
183 			 * -e[eofstr]: set/disable end-of-file.
184 			 * N.B. that an argument *isn't* required here; but
185 			 * parseargs forced an argument if not was given.  The
186 			 * forced argument is the default...
187 			 */
188 			LEOF = optarg; /* can be empty */
189 			break;
190 
191 		case 'E':
192 			/*
193 			 * -E eofstr: change end-of-file string.
194 			 * eofstr *is* required here, but can be empty:
195 			 */
196 			LEOF = optarg;
197 			break;
198 
199 		case 'I':
200 			/* -I replstr: Insert mode. replstr *is* required. */
201 			INSERT = PER_LINE = LEGAL = TRUE;
202 			N_ARGS = 0;
203 			INSPAT = optarg;
204 			if (*optarg == '\0') {
205 				ermsg(gettext(
206 				    "Option requires an argument: -%c\n"), c);
207 			}
208 			break;
209 
210 		case 'i':
211 			/*
212 			 * -i [replstr]: insert mode, with *optional* replstr.
213 			 * N.B. that an argument *isn't* required here; if
214 			 * it's not given, then the string INSPAT_STR will
215 			 * be assumed.
216 			 *
217 			 * Since getopts(3C) doesn't handle the case of an
218 			 * optional variable argument at all, we have to
219 			 * parse this by hand:
220 			 */
221 
222 			INSERT = PER_LINE = LEGAL = TRUE;
223 			N_ARGS = 0;
224 			if ((optarg != NULL) && (*optarg != '\0')) {
225 				INSPAT = optarg;
226 			} else {
227 				/*
228 				 * here, there is no next argument. so
229 				 * we reset INSPAT to the INSPAT_STR.
230 				 * we *have* to do this, as -i/I may have
231 				 * been given previously, and XCU4 requires
232 				 * that only "the last one specified takes
233 				 * effect".
234 				 */
235 				INSPAT = INSPAT_STR;
236 			}
237 			break;
238 
239 		case 'L':
240 			/*
241 			 * -L number: # of times cmd is executed
242 			 * number *is* required here:
243 			 */
244 			PER_LINE = TRUE;
245 			N_ARGS = 0;
246 			INSERT = FALSE;
247 			if ((PER_LINE = atoi(optarg)) <= 0) {
248 				ermsg(gettext("#lines must be positive "
249 				    "int: %s\n"), optarg);
250 			}
251 			break;
252 
253 		case 'l':
254 			/*
255 			 * -l [number]: # of times cmd is executed
256 			 * N.B. that an argument *isn't* required here; if
257 			 * it's not given, then 1 is assumed.
258 			 *
259 			 * parseargs handles the optional arg processing.
260 			 */
261 
262 			PER_LINE = LEGAL = TRUE;  /* initialization	*/
263 			N_ARGS = 0;
264 			INSERT = FALSE;
265 
266 			if ((optarg != NULL) && (*optarg != '\0')) {
267 				if ((PER_LINE = atoi(optarg)) <= 0)
268 					PER_LINE = 1;
269 			}
270 			break;
271 
272 		case 'n':	/* -n number: # stdin args		*/
273 			/*
274 			 * -n number: # stdin args.
275 			 * number *is* required here:
276 			 */
277 			if ((N_ARGS = atoi(optarg)) <= 0) {
278 				ermsg(gettext("#args must be positive "
279 				    "int: %s\n"), optarg);
280 			} else {
281 				LEGAL = DASHX || N_ARGS == 1;
282 				INSERT = PER_LINE = FALSE;
283 			}
284 			break;
285 
286 		case 's':	/* -s size: set max size of each arg list */
287 			BUFLIM = atoi(optarg);
288 			if (BUFLIM > BUFSIZE || BUFLIM <= 0) {
289 				ermsg(gettext(
290 				    "0 < max-cmd-line-size <= %d: "
291 				    "%s\n"), BUFSIZE, optarg);
292 			}
293 			break;
294 
295 		case 'x':	/* -x: terminate if args > size limit	*/
296 			DASHX = LEGAL = TRUE;
297 			break;
298 
299 		default:
300 			/*
301 			 * bad argument. complain and get ready to die.
302 			 */
303 			ERR = TRUE;
304 			usage();
305 
306 			exit(2);
307 			break;
308 		}
309 	}
310 
311 	/*
312 	 * if anything called ermsg(), something screwed up, so
313 	 * we exit early.
314 	 */
315 	if (OK == FALSE) {
316 		ERR = TRUE;
317 		usage();
318 		exit(2);
319 	}
320 
321 	/*
322 	 * we're finished handling xargs's options, so now pick up
323 	 * the command name (if any), and it's options.
324 	 */
325 
326 
327 	mac -= optind;	/* dec arg count by what we've processed 	*/
328 	mav += optind;	/* inc to current mav				*/
329 
330 	if (mac <= 0) {	/* if there're no more args to process,	*/
331 		cmdname = "/usr/bin/echo";	/* our default command	*/
332 		*ARGV++ = addarg(cmdname);	/* use the default cmd.	*/
333 	} else {	/* otherwise keep parsing rest of the string.	*/
334 		/*
335 		 * note that we can't use getopts(3C), and *must* parse
336 		 * this by hand, as we don't know apriori what options the
337 		 * command will take.
338 		 */
339 		cmdname = *mav;	/* get the command name	*/
340 
341 
342 		/* pick up the remaining args from the command line:	*/
343 		while ((OK == TRUE) && (mac-- > 0)) {
344 			/*
345 			 * while we haven't crapped out, and there's
346 			 * work to do:
347 			 */
348 			if (INSERT && ! ERR) {
349 				if (xindex(*mav, INSPAT) != -1) {
350 					if (++n_inserts > MAXINSERTS) {
351 						ermsg(gettext("too many args "
352 						    "with %s\n"), INSPAT);
353 						ERR = TRUE;
354 					}
355 					psave->p_ARGV = ARGV;
356 					(psave++)->p_skel = *mav;
357 				}
358 			}
359 			*ARGV++ = addarg(*mav++);
360 		}
361 	}
362 
363 	/* pick up args from standard input */
364 
365 	initbuf = next;
366 	initlist = ARGV;
367 	initsize = linesize;
368 
369 	while (OK && MORE) {
370 		N_args = 0;
371 		N_lines = 0;
372 		next = initbuf;
373 		ARGV = initlist;
374 		linesize = initsize;
375 		if (*lastarg) {
376 			*ARGV++ = addarg(lastarg);
377 			lastarg = "";
378 		}
379 
380 		while (((*ARGV++ = getarg()) != NULL) && OK) {
381 			if ((ARGV - arglist) == MAXARGS) {
382 				save_index = ARGV - arglist;
383 				break;
384 			}
385 		}
386 		if ((save_index == MAXARGS) && !MORE && (N_args == 0)) {
387 			/* there were no more args after filling arglist */
388 			exit(exitstat);
389 		}
390 
391 		/* insert arg if requested */
392 
393 		if (!ERR && INSERT) {
394 			if ((!MORE) && (N_lines == 0)) {
395 				exit(exitstat);
396 			}
397 					/* no more input lines */
398 			p_ibuf = ins_buf;
399 			ARGV--;
400 			j = ibufsize = 0;
401 			for (psave = saveargv; ++j <= n_inserts; ++psave) {
402 				addibuf(psave);
403 				if (ERR)
404 					break;
405 			}
406 		}
407 		*ARGV = 0;
408 
409 		if (n_inserts > 0) {
410 			int t_ninserts;
411 
412 			/*
413 			 * if we've done any insertions, re-calculate the
414 			 * linesize. bomb out if we've exceeded our length.
415 			 */
416 			t_ninserts = n_inserts;
417 			n_inserts = 0;	/* inserts have been done 	*/
418 			linesize = 0;	/* recalculate this		*/
419 
420 			/* for each current argument in the list:	*/
421 			for (ARGV = arglist; *ARGV != NULL; ARGV++) {
422 				/* recalculate everything.		*/
423 				if (checklen(*ARGV) != 0) {
424 					if (N_ARGS && (N_args >= N_ARGS)) {
425 						N_lines = N_args = 0;
426 						OK = FALSE;
427 						ERR = TRUE;
428 					}
429 				}
430 			}
431 			n_inserts = t_ninserts;
432 		}
433 
434 		/* exec command */
435 
436 		if (!ERR) {
437 			if (!MORE &&
438 			    (PER_LINE && N_lines == 0 || N_ARGS && N_args == 0))
439 				exit(exitstat);
440 			OK = TRUE;
441 			j = TRACE ? echoargs() : TRUE;
442 			if (j) {
443 				/*
444 				 * for xcu4, all invocations of cmdname must
445 				 * return 0, in order for us to return 0.
446 				 * so if we have a non-zero status here,
447 				 * quit immediately.
448 				 */
449 				if ((exitstat |= lcall(cmdname, arglist)) == 0)
450 					continue;
451 			}
452 		}
453 	}
454 
455 	(void) lseek(0, file_offset, SEEK_SET);
456 	if (OK) {
457 		return (exitstat);
458 	} else {
459 		/*
460 		 * if exitstat was set, to match XCU4 complience,
461 		 * return that value, otherwise, return 1.
462 		 */
463 		return (exitstat ? exitstat : 1);
464 	}
465 }
466 
467 static void
468 queue(char *buffer, int len, int where)
469 {
470 	pio *new, *element;
471 
472 	if ((new = malloc(sizeof (pio))) == NULL) {
473 		perror(gettext("xargs: Memory allocation failure"));
474 		exit(1);
475 	}
476 	new->cur = new->start = buffer;
477 	new->length = len;
478 
479 	if (where == TAIL) {
480 		new->next = NULL;
481 		if (queued_data == NULL) {
482 			queued_data = new;
483 		} else {
484 			element = queued_data;
485 			while (element->next != NULL) {
486 				element = element->next;
487 			}
488 			element->next = new;
489 		}
490 	} else {
491 		file_offset -= len;
492 		new->next = queued_data;
493 		queued_data = new;
494 	}
495 }
496 
497 static char *
498 checklen(char *arg)
499 {
500 	int	oklen;
501 
502 	oklen = TRUE;
503 	linesize += strlen(arg) + 1;
504 	if (linesize >= BUFLIM) {
505 		/*
506 		 * we skip this if there're inserts. we'll handle the
507 		 * argument counting after all the insertions have
508 		 * been done.
509 		 */
510 		if (n_inserts == 0) {
511 			lastarg = arg;
512 			oklen = OK = FALSE;
513 
514 			if (LEGAL) {
515 				ERR = TRUE;
516 				ermsg(gettext("arg list too long\n"));
517 			} else if (N_args > 1) {
518 				N_args = 1;
519 			} else {
520 				ermsg(gettext("a single arg was greater than "
521 				    "the max arglist size of %d characters\n"),
522 				    BUFLIM);
523 				ERR = TRUE;
524 			}
525 		}
526 	}
527 	return (oklen ? arg : 0);
528 }
529 
530 static char *
531 addarg(char *arg)
532 {
533 	if (checklen(arg) != 0) {
534 		(void) strcpy(next, arg);
535 		arg = next;
536 		next += strlen(arg) + 1;
537 		return (arg);
538 	}
539 	return ((char *)0);
540 }
541 
542 /*
543  * store_wchr() : append a wchar_t to a char buffer, resize buffer if required.
544  *
545  *     Given a pointer to the beginning of a string buffer, the length of the
546  *     buffer and an offset indicating the next place to write within that
547  *     buffer, the passed wchar_t will be appended to the buffer if there is
548  *     enough space. If there is not enough space, an attempt to reallocate the
549  *     buffer will be made and if successful the passed pointer and size will be
550  *     updated to describe the reallocated block. Returns the new value for
551  *     'offset' (it will be incremented by the number of bytes written).
552  */
553 static size_t
554 store_wchr(char **buffer, size_t *buflen, size_t offset, wchar_t c)
555 {
556 	int bytes;
557 
558 	/*
559 	 * Make sure that there is enough room in the buffer to store the
560 	 * maximum length of c.
561 	 */
562 	if ((offset + MB_CUR_MAX) > *buflen) {
563 		/*
564 		 * Not enough room so attempt to reallocate. Add 'MB_CUR_MAX' to
565 		 * buffer length to ensure that there is always enough room to
566 		 * store 'c' if realloc succeeds, no matter what QBUF_INC is
567 		 * defined as.
568 		 */
569 		*buflen += (QBUF_INC + MB_CUR_MAX);
570 		if ((*buffer = realloc(*buffer, *buflen)) == NULL) {
571 			perror(gettext("xargs: Memory allocation failure"));
572 			exit(1);
573 		}
574 	}
575 	/* store bytes from wchar into buffer */
576 	bytes = wctomb(*buffer + offset, c);
577 	if (bytes == -1) {
578 		/* char was invalid */
579 		bytes = 1;
580 		*(*buffer + offset) = (char)c;
581 	}
582 
583 	/* return new value for offset */
584 	return (offset + bytes);
585 }
586 
587 static char *
588 getarg()
589 {
590 	int	bytes;
591 	wchar_t	c;
592 	char	*arg;
593 	char	*retarg, *requeue_buf;
594 	size_t  requeue_offset = 0, requeue_len;
595 	char	mbc[MB_LEN_MAX];
596 
597 	while (iswspace(c = getwchr()) || c == '\n')
598 		;
599 
600 	if (c == '\0') {
601 		MORE = FALSE;
602 		return (0);
603 	}
604 
605 	/*
606 	 * While we are reading in an argument, it is possible that we will
607 	 * reach the maximum length of the overflow buffer and we'll have to
608 	 * requeue what we have read so far. To handle this we allocate an
609 	 * initial buffer here which will keep an unprocessed copy of the data
610 	 * that we read in (this buffer will grow as required).
611 	 */
612 	requeue_len = (size_t)QBUF_STARTLEN;
613 	if ((requeue_buf = (char *)malloc(requeue_len)) == NULL) {
614 		perror(gettext("xargs: Memory allocation failure"));
615 		exit(1);
616 	}
617 
618 	for (arg = next; ; c = getwchr()) {
619 		bytes = wctomb(mbc, c);
620 
621 		/*
622 		 * Store the char that we have read before processing it in case
623 		 * the current argument needs to be requeued.
624 		 */
625 		requeue_offset = store_wchr(&requeue_buf, &requeue_len,
626 		    requeue_offset, c);
627 
628 		/* Check for overflow the input buffer */
629 		if ((next + ((bytes == -1) ? 1 : bytes)) >= &argbuf[BUFLIM]) {
630 			/*
631 			 * It's only an error if there are no Args in buffer
632 			 * already.
633 			 */
634 			if ((N_ARGS || PER_LINE) && LEGAL) {
635 				ERR = TRUE;
636 				ermsg(gettext("Argument list too long\n"));
637 				free(requeue_buf);
638 				return (0);
639 			} else if (N_args == 0) {
640 				lastarg = "";
641 				ERR = TRUE;
642 				ermsg(gettext("A single arg was greater than "
643 				    "the max arglist size of %d characters\n"),
644 				    BUFSIZE);
645 				free(requeue_buf);
646 				return (0);
647 			}
648 			/*
649 			 * Otherwise we put back the current argument
650 			 * and use what we have collected so far...
651 			 */
652 			queue(requeue_buf, requeue_offset, HEAD);
653 			/* reset inquote because we have requeued the quotes */
654 			inquote = 0;
655 			return (NULL);
656 		}
657 
658 
659 		if (iswctype(c, blank) && inquote == 0) {
660 			if (INSERT) {
661 				if (bytes == -1) {
662 					*next++ = (char)c;
663 				} else {
664 					(void) wctomb(next, c);
665 					next += bytes;
666 				}
667 				continue;
668 			}
669 
670 			/* skip over trailing whitespace till next arg */
671 			while (iswctype((c = getwchr()), blank) &&
672 			    (c != '\n') && (c != '\0'))
673 				;
674 
675 			/*
676 			 * if there was space till end of line then the last
677 			 * character was really a newline...
678 			 */
679 			if (c == L'\n' || c == L'\0') {
680 				ungetwchr(L'\n');
681 			} else {
682 				/* later code needs to know this was a space */
683 				ungetwchr(c);
684 				c = L' ';
685 			}
686 			goto end_arg;
687 		}
688 		switch (c) {
689 		case L'\0':
690 		case L'\n':
691 			if (inquote) {
692 				*next++ = '\0';
693 				ermsg(gettext("Missing quote: %s\n"), arg);
694 				ERR = TRUE;
695 				free(requeue_buf);
696 				return (0);
697 			}
698 
699 			N_lines++;
700 end_arg:		*next++ = '\0';
701 			/* we finished without requeuing so free requeue_buf */
702 			free(requeue_buf);
703 			if ((strcmp(arg, LEOF) == 0 && *LEOF != '\0') ||
704 			    (c == '\0' && strlen(arg) == 0)) {
705 				MORE = FALSE;
706 				/* absorb the rest of the line */
707 				if ((c != '\n') && (c != '\0'))
708 					while (c = getwchr())
709 						if ((c == '\n') || (c == '\0'))
710 							break;
711 				if (strcmp(arg, LEOF) == 0 && *LEOF != '\0') {
712 					/*
713 					 * Encountered EOF string.
714 					 * Don't read any more lines.
715 					 */
716 					N_lines = 0;
717 				}
718 				return (0);
719 			} else {
720 				++N_args;
721 				if (retarg = checklen(arg)) {
722 					if ((PER_LINE &&
723 					    N_lines >= PER_LINE &&
724 					    (c == '\0' || c == '\n')) ||
725 					    (N_ARGS && N_args >= N_ARGS)) {
726 						N_lines = N_args = 0;
727 						lastarg = "";
728 						OK = FALSE;
729 					}
730 				}
731 				return (retarg);
732 			}
733 
734 		case '"':
735 			if (inquote == 1)	/* in single quoted string */
736 				goto is_default;
737 			if (inquote == 2)	/* terminating double quote */
738 				inquote = 0;
739 			else			/* starting quoted string */
740 				inquote = 2;
741 			break;
742 
743 		case '\'':
744 			if (inquote == 2)	/* in double quoted string */
745 				goto is_default;
746 			if (inquote == 1)	/* terminating single quote */
747 				inquote = 0;
748 			else			/* starting quoted string */
749 				inquote = 1;
750 			break;
751 
752 		case L'\\':
753 			/*
754 			 * Any unquoted character can be escaped by
755 			 * preceding it with a backslash.
756 			 */
757 			if (inquote == 0) {
758 				c = getwchr();
759 				/* store quoted char for potential requeueing */
760 				requeue_offset = store_wchr(&requeue_buf,
761 				    &requeue_len, requeue_offset, c);
762 			}
763 
764 		default:
765 is_default:		if (bytes == -1) {
766 				*next++ = (char)c;
767 			} else {
768 				(void) wctomb(next, c);
769 				next += bytes;
770 			}
771 			break;
772 		}
773 	}
774 }
775 
776 
777 /*
778  * ermsg():	print out an error message, and indicate failure globally.
779  *
780  *	Assumes that message has already been gettext()'d. It would be
781  *	nice if we could just do the gettext() here, but we can't, since
782  *	since xgettext(1M) wouldn't be able to pick up our error message.
783  */
784 /* PRINTFLIKE1 */
785 static void
786 ermsg(char *messages, ...)
787 {
788 	va_list	ap;
789 
790 	va_start(ap, messages);
791 
792 	(void) fprintf(stderr, "xargs: ");
793 	(void) vfprintf(stderr, messages, ap);
794 
795 	va_end(ap);
796 	OK = FALSE;
797 }
798 
799 static int
800 echoargs()
801 {
802 	char	**anarg;
803 	char	**tanarg;	/* tmp ptr			*/
804 	int		i;
805 	char		reply[LINE_MAX];
806 
807 	tanarg = anarg = arglist-1;
808 
809 	/*
810 	 * write out each argument, separated by a space. the tanarg
811 	 * nonsense is for xcu4 testsuite compliance - so that an
812 	 * extra space isn't echoed after the last argument.
813 	 */
814 	while (*++anarg) {		/* while there's an argument	*/
815 		++tanarg;		/* follow anarg			*/
816 		(void) write(2, *anarg, strlen(*anarg));
817 
818 		if (*++tanarg) {	/* if there's another argument:	*/
819 			(void) write(2, " ", 1); /* add a space		*/
820 			--tanarg;	/* reset back to anarg		*/
821 		}
822 	}
823 	if (PROMPT == -1) {
824 		(void) write(2, "\n", 1);
825 		return (TRUE);
826 	}
827 
828 	/*
829 	 * at this point, there may be unexpected input pending on stdin,
830 	 * if one has used the -n flag. this presents a problem, because
831 	 * if we simply do a read(), we'll get the extra input, instead
832 	 * of our desired y/n input. so, we see if there's any extra
833 	 * input, and if there is, then we will store it.
834 	 */
835 	saveinput();
836 
837 	(void) write(2, "?...", 4);	/* ask the user for input	*/
838 
839 	for (i = 0; i < LINE_MAX && read(PROMPT, &reply[i], 1) > 0; i++) {
840 		if (reply[i] == '\n') {
841 			if (i == 0)
842 				return (FALSE);
843 			break;
844 		}
845 	}
846 	reply[i] = 0;
847 
848 	/* flush remainder of line if necessary */
849 	if (i == LINE_MAX) {
850 		char	bitbucket;
851 
852 		while ((read(PROMPT, &bitbucket, 1) > 0) && (bitbucket != '\n'))
853 			;
854 	}
855 
856 	return (yes_check(reply));
857 }
858 
859 
860 static char *
861 insert(char *pattern, char *subst)
862 {
863 	static char	buffer[MAXSBUF+1];
864 	int		len, ipatlen;
865 	char	*pat;
866 	char	*bufend;
867 	char	*pbuf;
868 
869 	len = strlen(subst);
870 	ipatlen = strlen(INSPAT) - 1;
871 	pat = pattern - 1;
872 	pbuf = buffer;
873 	bufend = &buffer[MAXSBUF];
874 
875 	while (*++pat) {
876 		if (xindex(pat, INSPAT) == 0) {
877 			if (pbuf + len >= bufend) {
878 				break;
879 			} else {
880 				(void) strcpy(pbuf, subst);
881 				pat += ipatlen;
882 				pbuf += len;
883 			}
884 		} else {
885 			*pbuf++ = *pat;
886 			if (pbuf >= bufend)
887 				break;
888 		}
889 	}
890 
891 	if (!*pat) {
892 		*pbuf = '\0';
893 		return (buffer);
894 	} else {
895 		ermsg(gettext("Maximum argument size with insertion via %s's "
896 		    "exceeded\n"), INSPAT);
897 		ERR = TRUE;
898 		return (0);
899 	}
900 }
901 
902 
903 static void
904 addibuf(struct inserts	*p)
905 {
906 	char	*newarg, *skel, *sub;
907 	int		l;
908 
909 	skel = p->p_skel;
910 	sub = *ARGV;
911 	linesize -= strlen(skel) + 1;
912 	newarg = insert(skel, sub);
913 	if (ERR)
914 		return;
915 
916 	if (checklen(newarg)) {
917 		if ((ibufsize += (l = strlen(newarg) + 1)) > MAXIBUF) {
918 			ermsg(gettext("Insert buffer overflow\n"));
919 			ERR = TRUE;
920 		}
921 		(void) strcpy(p_ibuf, newarg);
922 		*(p->p_ARGV) = p_ibuf;
923 		p_ibuf += l;
924 	}
925 }
926 
927 
928 /*
929  * getchr():	get the next character.
930  * description:
931  *	we get the next character from pio.structure, if there's a character
932  *	to get. this may happen when we've had to flush stdin=/dev/tty,
933  *	but still wanted to preserve the characters for later processing.
934  *
935  *	otherwise we just get the character from stdin.
936  */
937 static int
938 getchr(void)
939 {
940 	char	c;
941 
942 	do {
943 		if (queued_data == NULL) {
944 			char	*buffer;
945 			int	len;
946 
947 			if ((buffer = malloc(BUFSIZE)) == NULL) {
948 				perror(gettext(
949 				    "xargs: Memory allocation failure"));
950 				exit(1);
951 			}
952 
953 			if ((len = read(0, buffer, BUFSIZE)) == 0)
954 				return (0);
955 			if (len == -1) {
956 				perror(gettext("xargs: Read failure"));
957 				exit(1);
958 			}
959 
960 			queue(buffer, len, TAIL);
961 		}
962 
963 		file_offset++;
964 		c = *queued_data->cur++;	 /* get the next character */
965 		if (--queued_data->length == 0) { /* at the end of buffer? */
966 			pio	*nxt = queued_data->next;
967 
968 			free(queued_data->start);
969 			free(queued_data);
970 			queued_data = nxt;
971 		}
972 	} while (c == '\0');
973 	return (c);
974 }
975 
976 
977 static wchar_t
978 getwchr(void)
979 {
980 	int		i;
981 	wchar_t		wch;
982 	unsigned char	buffer[MB_LEN_MAX + 1];
983 
984 	for (i = 0; i < (int)MB_CUR_MAX; ) {
985 		if ((buffer[i++] = getchr()) == NULL) {
986 			/* We have reached  EOF */
987 			if (i == 1) {
988 				/* TRUE EOF has been reached */
989 				return (NULL);
990 			}
991 			/*
992 			 * We have some characters in our buffer still so it
993 			 * must be an invalid character right before EOF.
994 			 */
995 			break;
996 		}
997 
998 		/* If this succeeds then we are done */
999 		if (mbtowc(&wch, (char *)buffer, i) != -1)
1000 			return (wch);
1001 	}
1002 
1003 	/*
1004 	 * We have now encountered an illegal character sequence.
1005 	 * There is nothing much we can do at this point but
1006 	 * return an error.  If we attempt to recover we may in fact
1007 	 * return garbage as arguments, from the customer's point
1008 	 * of view.  After all what if they are feeding us a file
1009 	 * generated in another locale?
1010 	 */
1011 	errno = EILSEQ;
1012 	perror(gettext("xargs: Corrupt input file"));
1013 	exit(1);
1014 	/* NOTREACHED */
1015 }
1016 
1017 
1018 static void
1019 ungetwchr(wchar_t wch)
1020 {
1021 	char	*buffer;
1022 	int	bytes;
1023 
1024 	if ((buffer = malloc(MB_LEN_MAX)) == NULL) {
1025 		perror(gettext("xargs: Memory allocation failure"));
1026 		exit(1);
1027 	}
1028 	bytes = wctomb(buffer, wch);
1029 	queue(buffer, bytes, HEAD);
1030 }
1031 
1032 
1033 static int
1034 lcall(char *sub, char **subargs)
1035 {
1036 	int retcode, retry = 0;
1037 	pid_t iwait, child;
1038 
1039 	for (; ; ) {
1040 		switch (child = fork()) {
1041 		default:
1042 			while ((iwait = wait(&retcode)) != child &&
1043 			    iwait != (pid_t)-1)
1044 				;
1045 			if (iwait == (pid_t)-1) {
1046 				perror(gettext("xargs: Wait failure"));
1047 				exit(122);
1048 				/* NOTREACHED */
1049 			}
1050 			if (WIFSIGNALED(retcode)) {
1051 				ermsg(gettext("Child killed with signal %d\n"),
1052 				    WTERMSIG(retcode));
1053 				exit(125);
1054 				/* NOTREACHED */
1055 			}
1056 			if ((WEXITSTATUS(retcode) & 0377) == 0377) {
1057 				ermsg(gettext("Command could not continue "
1058 				    "processing data\n"));
1059 				exit(124);
1060 				/* NOTREACHED */
1061 			}
1062 			return (WEXITSTATUS(retcode));
1063 		case 0:
1064 			(void) execvp(sub, subargs);
1065 			perror(gettext("xargs: Could not exec command"));
1066 			if (errno == EACCES)
1067 				exit(126);
1068 			exit(127);
1069 			/* NOTREACHED */
1070 		case -1:
1071 			if (errno != EAGAIN && retry++ < FORK_RETRY) {
1072 				perror(gettext("xargs: Could not fork child"));
1073 				exit(123);
1074 			}
1075 			(void) sleep(1);
1076 		}
1077 	}
1078 }
1079 
1080 
1081 /*
1082  * If `s2' is a substring of `s1' return the offset of the first
1083  * occurrence of `s2' in `s1', else return -1.
1084  */
1085 static int
1086 xindex(char *as1, char *as2)
1087 {
1088 	char	*s1, *s2, c;
1089 	int		offset;
1090 
1091 	s1 = as1;
1092 	s2 = as2;
1093 	c = *s2;
1094 
1095 	while (*s1) {
1096 		if (*s1++ == c) {
1097 			offset = s1 - as1 - 1;
1098 			s2++;
1099 			while ((c = *s2++) == *s1++ && c)
1100 				;
1101 			if (c == 0)
1102 				return (offset);
1103 			s1 = offset + as1 + 1;
1104 			s2 = as2;
1105 			c = *s2;
1106 		}
1107 	}
1108 	return (-1);
1109 }
1110 
1111 
1112 static void
1113 usage()
1114 {
1115 	ermsg(gettext(USAGEMSG));
1116 	OK = FALSE;
1117 }
1118 
1119 
1120 
1121 /*
1122  * parseargs():		modify the args
1123  *	since the -e, -i and -l flags all take optional subarguments,
1124  *	and getopts(3C) is clueless about this nonsense, we change the
1125  *	our local argument count and strings to separate this out,
1126  *	and make it easier to handle via getopts(3c).
1127  *
1128  *	-e	-> "-e ""
1129  *	-e3	-> "-e "3"
1130  *	-Estr	-> "-E "str"
1131  *	-i	-> "-i "{}"
1132  *	-irep	-> "-i "rep"
1133  *	-l	-> "-i "1"
1134  *	-l10	-> "-i "10"
1135  *
1136  *	since the -e, -i and -l flags all take optional subarguments,
1137  */
1138 static void
1139 parseargs(int ac, char **av)
1140 {
1141 	int i;			/* current argument			*/
1142 	int cflag;		/* 0 = not processing cmd arg		*/
1143 
1144 	if ((mav = malloc((ac * 2 + 1) * sizeof (char *))) == NULL) {
1145 		perror(gettext("xargs: Memory allocation failure"));
1146 		exit(1);
1147 	}
1148 
1149 	/* for each argument, see if we need to change things:		*/
1150 	for (i = mac = cflag = 0; (av[i] != NULL) && i < ac; i++, mac++) {
1151 		if ((mav[mac] = strdup(av[i])) == NULL) {
1152 			perror(gettext("xargs: Memory allocation failure"));
1153 			exit(1);
1154 		}
1155 
1156 		/* -- has been found or argument list is fully processes */
1157 		if (cflag)
1158 			continue;
1159 
1160 		/*
1161 		 * if we're doing special processing, and we've got a flag
1162 		 */
1163 		else if ((av[i][0] == '-') && (av[i][1] != NULL)) {
1164 			char	*def;
1165 
1166 			switch (av[i][1]) {
1167 			case	'e':
1168 				def = ""; /* -e with no arg turns off eof */
1169 				goto process_special;
1170 			case	'i':
1171 				def = INSPAT_STR;
1172 				goto process_special;
1173 			case	'l':
1174 				def = "1";
1175 process_special:
1176 				/*
1177 				 * if there's no sub-option, we *must* add
1178 				 * a default one. this is because xargs must
1179 				 * be able to distinguish between a valid
1180 				 * suboption, and a command name.
1181 				 */
1182 				if (av[i][2] == NULL) {
1183 					mav[++mac] = strdup(def);
1184 				} else {
1185 					/* clear out our version: */
1186 					mav[mac][2] = NULL;
1187 					mav[++mac] = strdup(&av[i][2]);
1188 				}
1189 				if (mav[mac] == NULL) {
1190 					perror(gettext("xargs: Memory"
1191 					    " allocation failure"));
1192 					exit(1);
1193 				}
1194 				break;
1195 
1196 			/* flags with required subarguments:		*/
1197 
1198 			/*
1199 			 * there are two separate cases here. either the
1200 			 * flag can have the normal XCU4 handling
1201 			 * (of the form: -X subargument); or it can have
1202 			 * the old solaris 2.[0-4] handling (of the
1203 			 * form: -Xsubargument). in order to maintain
1204 			 * backwards compatibility, we must support the
1205 			 * latter case. we handle the latter possibility
1206 			 * first so both the old solaris way of handling
1207 			 * and the new XCU4 way of handling things are allowed.
1208 			 */
1209 			case	'n':	/* FALLTHROUGH			*/
1210 			case	's':	/* FALLTHROUGH			*/
1211 			case	'E':	/* FALLTHROUGH			*/
1212 			case	'I':	/* FALLTHROUGH			*/
1213 			case	'L':
1214 				/*
1215 				 * if the second character isn't null, then
1216 				 * the user has specified the old syntax.
1217 				 * we move the subargument into our
1218 				 * mod'd argument list.
1219 				 */
1220 				if (av[i][2] != NULL) {
1221 					/* first clean things up:	*/
1222 					mav[mac][2] = NULL;
1223 
1224 					/* now add the separation:	*/
1225 					++mac;	/* inc to next mod'd arg */
1226 					if ((mav[mac] = strdup(&av[i][2])) ==
1227 					    NULL) {
1228 						perror(gettext("xargs: Memory"
1229 						    " allocation failure"));
1230 						exit(1);
1231 					}
1232 					break;
1233 				}
1234 				i++;
1235 				mac++;
1236 
1237 				if (av[i] == NULL) {
1238 					mav[mac] = NULL;
1239 					return;
1240 				}
1241 				if ((mav[mac] = strdup(av[i])) == NULL) {
1242 					perror(gettext("xargs: Memory"
1243 					    " allocation failure"));
1244 					exit(1);
1245 				}
1246 				break;
1247 
1248 			/* flags */
1249 			case 'p' :
1250 			case 't' :
1251 			case 'x' :
1252 				break;
1253 
1254 			case '-' :
1255 			default:
1256 				/*
1257 				 * here we've hit the cmd argument. so
1258 				 * we'll stop special processing, as the
1259 				 * cmd may have a "-i" etc., argument,
1260 				 * and we don't want to add a "" to it.
1261 				 */
1262 				cflag = 1;
1263 				break;
1264 			}
1265 		} else if (i > 0) {	/* if we're not the 1st arg	*/
1266 			/*
1267 			 * if it's not a flag, then it *must* be the cmd.
1268 			 * set cflag, so we don't mishandle the -[eil] flags.
1269 			 */
1270 			cflag = 1;
1271 		}
1272 	}
1273 
1274 	mav[mac] = NULL;
1275 }
1276 
1277 
1278 /*
1279  * saveinput(): pick up any pending input, so it can be processed later.
1280  *
1281  * description:
1282  *	the purpose of this routine is to allow us to handle the user
1283  *	typing in a 'y' or 'n', when there's existing characters already
1284  *	in stdin. this happens when one gives the "-n" option along with
1285  *	"-p". the problem occurs when the user first types in more arguments
1286  *	than specified by the -n number. echoargs() wants to read stdin
1287  *	in order to get the user's response, but if there's already stuff
1288  *	there, echoargs() won't read the proper character.
1289  *
1290  *	the solution provided by this routine is to pick up all characters
1291  *	(if any), and store them for later processing.
1292  */
1293 
1294 void
1295 saveinput()
1296 {
1297 	char *buffer;		/* ptr to the floating data buffer	*/
1298 	struct strpeek speek;	/* to see what's on the queue		*/
1299 	struct strpeek *ps;
1300 
1301 	/* if we're not in -p mode, skip				*/
1302 	if (PROMPT == -1) {
1303 		return;
1304 	}
1305 
1306 
1307 	/* now see if there's any activity pending:			*/
1308 	ps = &speek;
1309 	ps->ctlbuf.maxlen = 0;
1310 	ps->ctlbuf.len = 0;
1311 	ps->ctlbuf.buf = NULL;
1312 	ps->flags = 0;
1313 	ps->databuf.maxlen = MAX_INPUT;
1314 	ps->databuf.len = 0;
1315 	if ((buffer = malloc((size_t)MAX_INPUT)) == NULL) {
1316 		perror(gettext("xargs: Memory allocation failure"));
1317 		exit(1);
1318 	}
1319 	ps->databuf.buf = (char *)buffer;
1320 
1321 	if (ioctl(PROMPT, I_PEEK, ps) == -1) {
1322 		perror(gettext("xargs: I_PEEK failure"));
1323 		exit(1);
1324 	}
1325 
1326 	if (ps->databuf.len > 0) {
1327 		int	len;
1328 
1329 		if ((len = read(PROMPT, buffer, ps->databuf.len)) == -1) {
1330 			perror(gettext("xargs: read failure"));
1331 			exit(1);
1332 		}
1333 		queue(buffer, len, TAIL);
1334 	}
1335 }
1336