xref: /illumos-gate/usr/src/cmd/sh/word.c (revision e71ca95ca6de23d33b54cb55cefdef30bc7c969b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright 2000 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved  	*/
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 /*
33  * UNIX shell
34  */
35 
36 #include	"defs.h"
37 #include	"sym.h"
38 #include	<errno.h>
39 #include	<fcntl.h>
40 
41 static int	readb(struct fileblk *, int, int);
42 
43 /* ========	character handling for command lines	======== */
44 
45 int
46 word(void)
47 {
48 	unsigned int	c, d, cc;
49 	struct argnod	*arg = (struct argnod *)locstak();
50 	unsigned char	*argp = arg->argval;
51 	unsigned char	*oldargp;
52 	int		alpha = 1;
53 	unsigned char *pc;
54 
55 	wdnum = 0;
56 	wdset = 0;
57 
58 	while (1)
59 	{
60 		while (c = nextwc(), space(c))		/* skipc() */
61 			;
62 
63 		if (c == COMCHAR)
64 		{
65 			while ((c = readwc()) != NL && c != EOF);
66 			peekc = c;
67 		}
68 		else
69 		{
70 			break;	/* out of comment - white space loop */
71 		}
72 	}
73 	if (!eofmeta(c))
74 	{
75 		do
76 		{
77 			if (c == LITERAL)
78 			{
79 				oldargp = argp;
80 				while ((c = readwc()) && c != LITERAL){
81 					/*
82 					 * quote each character within
83 					 * single quotes
84 					 */
85 					pc = readw(c);
86 					if (argp >= brkend)
87 						growstak(argp);
88 					*argp++='\\';
89 				/* Pick up rest of multibyte character */
90 					if (c == NL)
91 						chkpr();
92 					while (c = *pc++) {
93 						if (argp >= brkend)
94 							growstak(argp);
95 						*argp++ = (unsigned char)c;
96 					}
97 				}
98 				if (argp == oldargp) { /* null argument - '' */
99 				/*
100 				 * Word will be represented by quoted null
101 				 * in macro.c if necessary
102 				 */
103 					if (argp >= brkend)
104 						growstak(argp);
105 					*argp++ = '"';
106 					if (argp >= brkend)
107 						growstak(argp);
108 					*argp++ = '"';
109 				}
110 			}
111 			else
112 			{
113 				if (c == 0) {
114 					if (argp >= brkend)
115 						growstak(argp);
116 					*argp++ = 0;
117 				} else {
118 					pc = readw(c);
119 					while (*pc) {
120 						if (argp >= brkend)
121 							growstak(argp);
122 						*argp++ = *pc++;
123 					}
124 				}
125 				if (c == '\\') {
126 					if ((cc = readwc()) == 0) {
127 						if (argp >= brkend)
128 							growstak(argp);
129 						*argp++ = 0;
130 					} else {
131 						pc = readw(cc);
132 						while (*pc) {
133 							if (argp >= brkend)
134 								growstak(argp);
135 							*argp++ = *pc++;
136 						}
137 					}
138 				}
139 				if (c == '=')
140 					wdset |= alpha;
141 				if (!alphanum(c))
142 					alpha = 0;
143 				if (qotchar(c))
144 				{
145 					d = c;
146 					for (;;)
147 					{
148 						if ((c = nextwc()) == 0) {
149 							if (argp >= brkend)
150 								growstak(argp);
151 							*argp++ = 0;
152 						} else {
153 							pc = readw(c);
154 							while (*pc) {
155 								if (argp >= brkend)
156 									growstak(argp);
157 								*argp++ = *pc++;
158 							}
159 						}
160 						if (c == 0 || c == d)
161 							break;
162 						if (c == NL)
163 							chkpr();
164 						/*
165 						 * don't interpret quoted
166 						 * characters
167 						 */
168 						if (c == '\\') {
169 							if ((cc = readwc()) == 0) {
170 								if (argp >= brkend)
171 									growstak(argp);
172 								*argp++ = 0;
173 							} else {
174 								pc = readw(cc);
175 								while (*pc) {
176 									if (argp >= brkend)
177 										growstak(argp);
178 									*argp++ = *pc++;
179 								}
180 							}
181 						}
182 					}
183 				}
184 			}
185 		} while ((c = nextwc(), !eofmeta(c)));
186 		argp = endstak(argp);
187 		if (!letter(arg->argval[0]))
188 			wdset = 0;
189 
190 		peekn = c | MARK;
191 		if (arg->argval[1] == 0 &&
192 		    (d = arg->argval[0], digit(d)) &&
193 		    (c == '>' || c == '<'))
194 		{
195 			word();
196 			wdnum = d - '0';
197 		}else{ /* check for reserved words */
198 			if (reserv == FALSE ||
199 			    (wdval = syslook(arg->argval,
200 					reserved, no_reserved)) == 0) {
201 				wdval = 0;
202 			}
203 			/* set arg for reserved words too */
204 			wdarg = arg;
205 		}
206 	}else if (dipchar(c)){
207 		if ((d = nextwc()) == c)
208 		{
209 			wdval = c | SYMREP;
210 			if (c == '<')
211 			{
212 				if ((d = nextwc()) == '-')
213 					wdnum |= IOSTRIP;
214 				else
215 					peekn = d | MARK;
216 			}
217 		}
218 		else
219 		{
220 			peekn = d | MARK;
221 			wdval = c;
222 		}
223 	}
224 	else
225 	{
226 		if ((wdval = c) == EOF)
227 			wdval = EOFSYM;
228 		if (iopend && eolchar(c))
229 		{
230 			struct ionod *tmp_iopend;
231 			tmp_iopend = iopend;
232 			iopend = 0;
233 			copy(tmp_iopend);
234 		}
235 	}
236 	reserv = FALSE;
237 	return (wdval);
238 }
239 
240 unsigned int skipwc()
241 {
242 	unsigned int c;
243 
244 	while (c = nextwc(), space(c))
245 		;
246 	return (c);
247 }
248 
249 unsigned int nextwc()
250 {
251 	unsigned int	c, d;
252 
253 retry:
254 	if ((d = readwc()) == ESCAPE) {
255 		if ((c = readwc()) == NL) {
256 			chkpr();
257 			goto retry;
258 		}
259 		peekc = c | MARK;
260 	}
261 	return (d);
262 }
263 
264 unsigned char *readw(d)
265 wchar_t	d;
266 {
267 	static unsigned char c[MULTI_BYTE_MAX + 1];
268 	int length;
269 	wchar_t l;
270 	if (isascii(d)) {
271 		c[0] = d;
272 		c[1] = '\0';
273 		return (c);
274 	}
275 
276 	length = wctomb((char *)c, d);
277 	if (length <= 0) {
278 		c[0] = (unsigned char)d;
279 		length = 1;
280 	}
281 	c[length] = '\0';
282 	return (c);
283 }
284 
285 unsigned int
286 readwc()
287 {
288 	wchar_t	c;
289 	int	len;
290 	struct fileblk	*f;
291 	int	mbmax = MB_CUR_MAX;
292 	int	i, mlen;
293 
294 	if (peekn) {
295 		c = peekn & 0x7fffffff;
296 		peekn = 0;
297 		return (c);
298 	}
299 	if (peekc) {
300 		c = peekc & 0x7fffffff;
301 		peekc = 0;
302 		return (c);
303 	}
304 	f = standin;
305 
306 retry:
307 	if (f->fend > f->fnxt) {
308 		/*
309 		 * something in buffer
310 		 */
311 		if (*f->fnxt == 0) {
312 			f->fnxt++;
313 			f->nxtoff++;
314 			if (f->feval == 0)
315 				goto retry;	/* = c = readc(); */
316 			if (estabf(*f->feval++))
317 				c = EOF;
318 			else
319 				c = SPACE;
320 			if (flags & readpr && standin->fstak == 0)
321 				prc(c);
322 			if (c == NL)
323 				f->flin++;
324 			return (c);
325 		}
326 
327 		if (isascii(c = (unsigned char)*f->fnxt)) {
328 			f->fnxt++;
329 			f->nxtoff++;
330 			if (flags & readpr && standin->fstak == 0)
331 				prc(c);
332 			if (c == NL)
333 				f->flin++;
334 			return (c);
335 		}
336 
337 		for (i = 1; i <= mbmax; i++) {
338 			int	rest;
339 			if ((rest = f->fend - f->fnxt) < i) {
340 				/*
341 				 * not enough bytes available
342 				 * f->fsiz could be BUFFERSIZE or 1
343 				 * since mbmax is enough smaller than BUFFERSIZE,
344 				 * this loop won't overrun the f->fbuf buffer.
345 				 */
346 				len = readb(f,
347 					(f->fsiz == 1) ? 1 : (f->fsiz - rest),
348 					rest);
349 				if (len == 0)
350 					break;
351 			}
352 			mlen = mbtowc(&c, (char *)f->fnxt, i);
353 			if (mlen > 0)
354 				break;
355 		}
356 
357 		if (i > mbmax) {
358 			/*
359 			 * enough bytes available but cannot be converted to
360 			 * a valid wchar.
361 			 */
362 			c = (unsigned char)*f->fnxt;
363 			mlen = 1;
364 		}
365 
366 		f->fnxt += mlen;
367 		f->nxtoff += mlen;
368 		if (flags & readpr && standin->fstak == 0)
369 			prwc(c);
370 		if (c == NL)
371 			f->flin++;
372 		return (c);
373 	}
374 
375 	if (f->feof || f->fdes < 0){
376 		c = EOF;
377 		f->feof++;
378 		return (c);
379 	}
380 
381 	if (readb(f, f->fsiz, 0) <= 0){
382 		if (f->fdes != input || !isatty(input)) {
383 			close(f->fdes);
384 			f->fdes = -1;
385 		}
386 		f->feof++;
387 		c = EOF;
388 		return (c);
389 	}
390 	goto retry;
391 }
392 
393 static int
394 readb(struct fileblk *f, int toread, int rest)
395 {
396 	int	len;
397 	int	fflags;
398 
399 	if (rest) {
400 		/*
401 		 * copies the remaining 'rest' bytes from f->fnxt
402 		 * to f->fbuf
403 		 */
404 		(void) memcpy(f->fbuf, f->fnxt, rest);
405 		f->fnxt = f->fbuf;
406 		f->fend = f->fnxt + rest;
407 		f->nxtoff = 0;
408 		f->endoff = rest;
409 		if (f->fbuf[rest - 1] == '\n') {
410 			/*
411 			 * if '\n' found, it should be
412 			 * a bondary of multibyte char.
413 			 */
414 			return (rest);
415 		}
416 	}
417 
418 retry:
419 	do {
420 		if (trapnote & SIGSET) {
421 			newline();
422 			sigchk();
423 		} else if ((trapnote & TRAPSET) && (rwait > 0)) {
424 			newline();
425 			chktrap();
426 			clearup();
427 		}
428 	} while ((len = read(f->fdes, f->fbuf + rest, toread)) < 0 && trapnote);
429 	/*
430 	 * if child sets O_NDELAY or O_NONBLOCK on stdin
431 	 * and exited then turn the modes off and retry
432 	 */
433 	if (len == 0) {
434 		if (((flags & intflg) ||
435 		    ((flags & oneflg) == 0 && isatty(input) &&
436 		    (flags & stdflg))) &&
437 		    ((fflags = fcntl(f->fdes, F_GETFL, 0)) & O_NDELAY)) {
438 			fflags &= ~O_NDELAY;
439 			fcntl(f->fdes, F_SETFL, fflags);
440 			goto retry;
441 		}
442 	} else if (len < 0) {
443 		if (errno == EAGAIN) {
444 			fflags = fcntl(f->fdes, F_GETFL, 0);
445 			fflags &= ~O_NONBLOCK;
446 			fcntl(f->fdes, F_SETFL, fflags);
447 			goto retry;
448 		}
449 		len = 0;
450 	}
451 	f->fnxt = f->fbuf;
452 	f->fend = f->fnxt + (len + rest);
453 	f->nxtoff = 0;
454 	f->endoff = len + rest;
455 	return (len + rest);
456 }
457