xref: /illumos-gate/usr/src/cmd/sh/word.c (revision 6353250f8fb7d9f5b595f795d9f446e438685e2b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2000 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved  	*/
30 
31 /*
32  * UNIX shell
33  */
34 
35 #include	"defs.h"
36 #include	"sym.h"
37 #include	<errno.h>
38 #include	<fcntl.h>
39 
40 static int	readb(struct fileblk *, int, int);
41 
42 /* ========	character handling for command lines	======== */
43 
44 int
45 word(void)
46 {
47 	unsigned int	c, d, cc;
48 	struct argnod	*arg = (struct argnod *)locstak();
49 	unsigned char	*argp = arg->argval;
50 	unsigned char	*oldargp;
51 	int		alpha = 1;
52 	unsigned char *pc;
53 
54 	wdnum = 0;
55 	wdset = 0;
56 
57 	while (1)
58 	{
59 		while (c = nextwc(), space(c))		/* skipc() */
60 			;
61 
62 		if (c == COMCHAR)
63 		{
64 			while ((c = readwc()) != NL && c != EOF);
65 			peekc = c;
66 		}
67 		else
68 		{
69 			break;	/* out of comment - white space loop */
70 		}
71 	}
72 	if (!eofmeta(c))
73 	{
74 		do
75 		{
76 			if (c == LITERAL)
77 			{
78 				oldargp = argp;
79 				while ((c = readwc()) && c != LITERAL){
80 					/*
81 					 * quote each character within
82 					 * single quotes
83 					 */
84 					pc = readw(c);
85 					if (argp >= brkend)
86 						growstak(argp);
87 					*argp++='\\';
88 				/* Pick up rest of multibyte character */
89 					if (c == NL)
90 						chkpr();
91 					while (c = *pc++) {
92 						if (argp >= brkend)
93 							growstak(argp);
94 						*argp++ = (unsigned char)c;
95 					}
96 				}
97 				if (argp == oldargp) { /* null argument - '' */
98 				/*
99 				 * Word will be represented by quoted null
100 				 * in macro.c if necessary
101 				 */
102 					if (argp >= brkend)
103 						growstak(argp);
104 					*argp++ = '"';
105 					if (argp >= brkend)
106 						growstak(argp);
107 					*argp++ = '"';
108 				}
109 			}
110 			else
111 			{
112 				if (c == 0) {
113 					if (argp >= brkend)
114 						growstak(argp);
115 					*argp++ = 0;
116 				} else {
117 					pc = readw(c);
118 					while (*pc) {
119 						if (argp >= brkend)
120 							growstak(argp);
121 						*argp++ = *pc++;
122 					}
123 				}
124 				if (c == '\\') {
125 					if ((cc = readwc()) == 0) {
126 						if (argp >= brkend)
127 							growstak(argp);
128 						*argp++ = 0;
129 					} else {
130 						pc = readw(cc);
131 						while (*pc) {
132 							if (argp >= brkend)
133 								growstak(argp);
134 							*argp++ = *pc++;
135 						}
136 					}
137 				}
138 				if (c == '=')
139 					wdset |= alpha;
140 				if (!alphanum(c))
141 					alpha = 0;
142 				if (qotchar(c))
143 				{
144 					d = c;
145 					for (;;)
146 					{
147 						if ((c = nextwc()) == 0) {
148 							if (argp >= brkend)
149 								growstak(argp);
150 							*argp++ = 0;
151 						} else {
152 							pc = readw(c);
153 							while (*pc) {
154 								if (argp >= brkend)
155 									growstak(argp);
156 								*argp++ = *pc++;
157 							}
158 						}
159 						if (c == 0 || c == d)
160 							break;
161 						if (c == NL)
162 							chkpr();
163 						/*
164 						 * don't interpret quoted
165 						 * characters
166 						 */
167 						if (c == '\\') {
168 							if ((cc = readwc()) == 0) {
169 								if (argp >= brkend)
170 									growstak(argp);
171 								*argp++ = 0;
172 							} else {
173 								pc = readw(cc);
174 								while (*pc) {
175 									if (argp >= brkend)
176 										growstak(argp);
177 									*argp++ = *pc++;
178 								}
179 							}
180 						}
181 					}
182 				}
183 			}
184 		} while ((c = nextwc(), !eofmeta(c)));
185 		argp = endstak(argp);
186 		if (!letter(arg->argval[0]))
187 			wdset = 0;
188 
189 		peekn = c | MARK;
190 		if (arg->argval[1] == 0 &&
191 		    (d = arg->argval[0], digit(d)) &&
192 		    (c == '>' || c == '<'))
193 		{
194 			word();
195 			wdnum = d - '0';
196 		}else{ /* check for reserved words */
197 			if (reserv == FALSE ||
198 			    (wdval = syslook(arg->argval,
199 					reserved, no_reserved)) == 0) {
200 				wdval = 0;
201 			}
202 			/* set arg for reserved words too */
203 			wdarg = arg;
204 		}
205 	}else if (dipchar(c)){
206 		if ((d = nextwc()) == c)
207 		{
208 			wdval = c | SYMREP;
209 			if (c == '<')
210 			{
211 				if ((d = nextwc()) == '-')
212 					wdnum |= IOSTRIP;
213 				else
214 					peekn = d | MARK;
215 			}
216 		}
217 		else
218 		{
219 			peekn = d | MARK;
220 			wdval = c;
221 		}
222 	}
223 	else
224 	{
225 		if ((wdval = c) == EOF)
226 			wdval = EOFSYM;
227 		if (iopend && eolchar(c))
228 		{
229 			struct ionod *tmp_iopend;
230 			tmp_iopend = iopend;
231 			iopend = 0;
232 			copy(tmp_iopend);
233 		}
234 	}
235 	reserv = FALSE;
236 	return (wdval);
237 }
238 
239 unsigned int skipwc()
240 {
241 	unsigned int c;
242 
243 	while (c = nextwc(), space(c))
244 		;
245 	return (c);
246 }
247 
248 unsigned int nextwc()
249 {
250 	unsigned int	c, d;
251 
252 retry:
253 	if ((d = readwc()) == ESCAPE) {
254 		if ((c = readwc()) == NL) {
255 			chkpr();
256 			goto retry;
257 		}
258 		peekc = c | MARK;
259 	}
260 	return (d);
261 }
262 
263 unsigned char *readw(d)
264 wchar_t	d;
265 {
266 	static unsigned char c[MULTI_BYTE_MAX + 1];
267 	int length;
268 	wchar_t l;
269 	if (isascii(d)) {
270 		c[0] = d;
271 		c[1] = '\0';
272 		return (c);
273 	}
274 
275 	length = wctomb((char *)c, d);
276 	if (length <= 0) {
277 		c[0] = (unsigned char)d;
278 		length = 1;
279 	}
280 	c[length] = '\0';
281 	return (c);
282 }
283 
284 unsigned int
285 readwc()
286 {
287 	wchar_t	c;
288 	int	len;
289 	struct fileblk	*f;
290 	int	mbmax = MB_CUR_MAX;
291 	int	i, mlen;
292 
293 	if (peekn) {
294 		c = peekn & 0x7fffffff;
295 		peekn = 0;
296 		return (c);
297 	}
298 	if (peekc) {
299 		c = peekc & 0x7fffffff;
300 		peekc = 0;
301 		return (c);
302 	}
303 	f = standin;
304 
305 retry:
306 	if (f->fend > f->fnxt) {
307 		/*
308 		 * something in buffer
309 		 */
310 		if (*f->fnxt == 0) {
311 			f->fnxt++;
312 			f->nxtoff++;
313 			if (f->feval == 0)
314 				goto retry;	/* = c = readc(); */
315 			if (estabf(*f->feval++))
316 				c = EOF;
317 			else
318 				c = SPACE;
319 			if (flags & readpr && standin->fstak == 0)
320 				prc(c);
321 			if (c == NL)
322 				f->flin++;
323 			return (c);
324 		}
325 
326 		if (isascii(c = (unsigned char)*f->fnxt)) {
327 			f->fnxt++;
328 			f->nxtoff++;
329 			if (flags & readpr && standin->fstak == 0)
330 				prc(c);
331 			if (c == NL)
332 				f->flin++;
333 			return (c);
334 		}
335 
336 		for (i = 1; i <= mbmax; i++) {
337 			int	rest;
338 			if ((rest = f->fend - f->fnxt) < i) {
339 				/*
340 				 * not enough bytes available
341 				 * f->fsiz could be BUFFERSIZE or 1
342 				 * since mbmax is enough smaller than BUFFERSIZE,
343 				 * this loop won't overrun the f->fbuf buffer.
344 				 */
345 				len = readb(f,
346 					(f->fsiz == 1) ? 1 : (f->fsiz - rest),
347 					rest);
348 				if (len == 0)
349 					break;
350 			}
351 			mlen = mbtowc(&c, (char *)f->fnxt, i);
352 			if (mlen > 0)
353 				break;
354 		}
355 
356 		if (i > mbmax) {
357 			/*
358 			 * enough bytes available but cannot be converted to
359 			 * a valid wchar.
360 			 */
361 			c = (unsigned char)*f->fnxt;
362 			mlen = 1;
363 		}
364 
365 		f->fnxt += mlen;
366 		f->nxtoff += mlen;
367 		if (flags & readpr && standin->fstak == 0)
368 			prwc(c);
369 		if (c == NL)
370 			f->flin++;
371 		return (c);
372 	}
373 
374 	if (f->feof || f->fdes < 0){
375 		c = EOF;
376 		f->feof++;
377 		return (c);
378 	}
379 
380 	if (readb(f, f->fsiz, 0) <= 0){
381 		if (f->fdes != input || !isatty(input)) {
382 			close(f->fdes);
383 			f->fdes = -1;
384 		}
385 		f->feof++;
386 		c = EOF;
387 		return (c);
388 	}
389 	goto retry;
390 }
391 
392 static int
393 readb(struct fileblk *f, int toread, int rest)
394 {
395 	int	len;
396 	int	fflags;
397 
398 	if (rest) {
399 		/*
400 		 * copies the remaining 'rest' bytes from f->fnxt
401 		 * to f->fbuf
402 		 */
403 		(void) memcpy(f->fbuf, f->fnxt, rest);
404 		f->fnxt = f->fbuf;
405 		f->fend = f->fnxt + rest;
406 		f->nxtoff = 0;
407 		f->endoff = rest;
408 		if (f->fbuf[rest - 1] == '\n') {
409 			/*
410 			 * if '\n' found, it should be
411 			 * a bondary of multibyte char.
412 			 */
413 			return (rest);
414 		}
415 	}
416 
417 retry:
418 	do {
419 		if (trapnote & SIGSET) {
420 			newline();
421 			sigchk();
422 		} else if ((trapnote & TRAPSET) && (rwait > 0)) {
423 			newline();
424 			chktrap();
425 			clearup();
426 		}
427 	} while ((len = read(f->fdes, f->fbuf + rest, toread)) < 0 && trapnote);
428 	/*
429 	 * if child sets O_NDELAY or O_NONBLOCK on stdin
430 	 * and exited then turn the modes off and retry
431 	 */
432 	if (len == 0) {
433 		if (((flags & intflg) ||
434 		    ((flags & oneflg) == 0 && isatty(input) &&
435 		    (flags & stdflg))) &&
436 		    ((fflags = fcntl(f->fdes, F_GETFL, 0)) & O_NDELAY)) {
437 			fflags &= ~O_NDELAY;
438 			fcntl(f->fdes, F_SETFL, fflags);
439 			goto retry;
440 		}
441 	} else if (len < 0) {
442 		if (errno == EAGAIN) {
443 			fflags = fcntl(f->fdes, F_GETFL, 0);
444 			fflags &= ~O_NONBLOCK;
445 			fcntl(f->fdes, F_SETFL, fflags);
446 			goto retry;
447 		}
448 		len = 0;
449 	}
450 	f->fnxt = f->fbuf;
451 	f->fend = f->fnxt + (len + rest);
452 	f->nxtoff = 0;
453 	f->endoff = len + rest;
454 	return (len + rest);
455 }
456