xref: /titanic_51/usr/src/cmd/msgfmt/gnu_lex.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2001, 2002 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*7c478bd9Sstevel@tonic-gate 
29*7c478bd9Sstevel@tonic-gate #include "gnu_msgfmt.h"
30*7c478bd9Sstevel@tonic-gate #include "gnu_lex.h"
31*7c478bd9Sstevel@tonic-gate #include "y.tab.h"
32*7c478bd9Sstevel@tonic-gate 
33*7c478bd9Sstevel@tonic-gate int	cur_line = 1;
34*7c478bd9Sstevel@tonic-gate 
35*7c478bd9Sstevel@tonic-gate static char	backbuf[MB_LEN_MAX];
36*7c478bd9Sstevel@tonic-gate static int	backlen = 0;
37*7c478bd9Sstevel@tonic-gate 
38*7c478bd9Sstevel@tonic-gate /*
39*7c478bd9Sstevel@tonic-gate  * get_mb() returns one multibyte character.
40*7c478bd9Sstevel@tonic-gate  *
41*7c478bd9Sstevel@tonic-gate  * This function uses the iconv() function to find out one
42*7c478bd9Sstevel@tonic-gate  * multibyte character from a sequence of bytes in the file stream.
43*7c478bd9Sstevel@tonic-gate  * The conversion from the codeset specified in the PO file to UTF-8
44*7c478bd9Sstevel@tonic-gate  * is performed.  The funcition reads another byte and calls iconv(),
45*7c478bd9Sstevel@tonic-gate  * until iconv() successfully returns as a valid UTF-8 character has
46*7c478bd9Sstevel@tonic-gate  * been converted or returns EILSEQ.  If iconv() successfully returned,
47*7c478bd9Sstevel@tonic-gate  * the function returns the read bytes as one character.  Otherwise,
48*7c478bd9Sstevel@tonic-gate  * returns error.  The string converted to UTF-8 in outbuf won't be
49*7c478bd9Sstevel@tonic-gate  * used at all.
50*7c478bd9Sstevel@tonic-gate  */
51*7c478bd9Sstevel@tonic-gate static size_t
52*7c478bd9Sstevel@tonic-gate get_mb(unsigned char *tmpbuf, unsigned char fc)
53*7c478bd9Sstevel@tonic-gate {
54*7c478bd9Sstevel@tonic-gate 	int	c;
55*7c478bd9Sstevel@tonic-gate 	char	outbuf[8];			/* max size of a UTF-8 char */
56*7c478bd9Sstevel@tonic-gate 	const char	*inptr;
57*7c478bd9Sstevel@tonic-gate 	char	*outptr;
58*7c478bd9Sstevel@tonic-gate 	size_t	insize = 0, inlen, outlen, ret;
59*7c478bd9Sstevel@tonic-gate 
60*7c478bd9Sstevel@tonic-gate 	tmpbuf[insize++] = fc;		/* size of tmpbuf is MB_LEN_MAX+1 */
61*7c478bd9Sstevel@tonic-gate 
62*7c478bd9Sstevel@tonic-gate 	if (cd == (iconv_t)-1) {
63*7c478bd9Sstevel@tonic-gate 		/* no conversion */
64*7c478bd9Sstevel@tonic-gate 		tmpbuf[insize] = '\0';
65*7c478bd9Sstevel@tonic-gate 		return (insize);
66*7c478bd9Sstevel@tonic-gate 	}
67*7c478bd9Sstevel@tonic-gate 
68*7c478bd9Sstevel@tonic-gate 	for (; ; ) {
69*7c478bd9Sstevel@tonic-gate 		inptr = (const char *)tmpbuf;
70*7c478bd9Sstevel@tonic-gate 		outptr = &outbuf[0];
71*7c478bd9Sstevel@tonic-gate 		inlen = insize;
72*7c478bd9Sstevel@tonic-gate 		outlen = sizeof (outbuf);
73*7c478bd9Sstevel@tonic-gate 
74*7c478bd9Sstevel@tonic-gate 		errno = 0;
75*7c478bd9Sstevel@tonic-gate 		ret = iconv(cd, &inptr, &inlen, &outptr, &outlen);
76*7c478bd9Sstevel@tonic-gate 		if (ret == (size_t)-1) {
77*7c478bd9Sstevel@tonic-gate 			/* iconv failed */
78*7c478bd9Sstevel@tonic-gate 			switch (errno) {
79*7c478bd9Sstevel@tonic-gate 			case EILSEQ:
80*7c478bd9Sstevel@tonic-gate 				/* invalid character found */
81*7c478bd9Sstevel@tonic-gate 				error(gettext(ERR_INVALID_CHAR),
82*7c478bd9Sstevel@tonic-gate 					cur_line, cur_po);
83*7c478bd9Sstevel@tonic-gate 				/* NOTREACHED */
84*7c478bd9Sstevel@tonic-gate 			case EINVAL:
85*7c478bd9Sstevel@tonic-gate 				/* not enough input */
86*7c478bd9Sstevel@tonic-gate 				if (insize == MB_LEN_MAX) {
87*7c478bd9Sstevel@tonic-gate 					/* invalid character found */
88*7c478bd9Sstevel@tonic-gate 					error(gettext(ERR_INVALID_CHAR),
89*7c478bd9Sstevel@tonic-gate 						cur_line, cur_po);
90*7c478bd9Sstevel@tonic-gate 					/* NOTREACHED */
91*7c478bd9Sstevel@tonic-gate 				}
92*7c478bd9Sstevel@tonic-gate 				c = getc(fp);
93*7c478bd9Sstevel@tonic-gate 				if (c == EOF) {
94*7c478bd9Sstevel@tonic-gate 					error(gettext(ERR_UNEXP_EOF),
95*7c478bd9Sstevel@tonic-gate 						cur_line, cur_po);
96*7c478bd9Sstevel@tonic-gate 					/* NOTREACHED */
97*7c478bd9Sstevel@tonic-gate 				}
98*7c478bd9Sstevel@tonic-gate 				tmpbuf[insize++] = (unsigned char)c;
99*7c478bd9Sstevel@tonic-gate 
100*7c478bd9Sstevel@tonic-gate 				/* initialize the conversion */
101*7c478bd9Sstevel@tonic-gate 				outptr = &outbuf[0];
102*7c478bd9Sstevel@tonic-gate 				outlen = sizeof (outbuf);
103*7c478bd9Sstevel@tonic-gate 				(void) iconv(cd, NULL, NULL, &outptr, &outlen);
104*7c478bd9Sstevel@tonic-gate 
105*7c478bd9Sstevel@tonic-gate 				continue;
106*7c478bd9Sstevel@tonic-gate 				/* NOTREACHED */
107*7c478bd9Sstevel@tonic-gate 			default:
108*7c478bd9Sstevel@tonic-gate 				/* should never happen */
109*7c478bd9Sstevel@tonic-gate 				error(ERR_INTERNAL,
110*7c478bd9Sstevel@tonic-gate 					cur_line, cur_po);
111*7c478bd9Sstevel@tonic-gate 				/* NOTREACHED */
112*7c478bd9Sstevel@tonic-gate 			}
113*7c478bd9Sstevel@tonic-gate 			/* NOTREACHED */
114*7c478bd9Sstevel@tonic-gate 		}
115*7c478bd9Sstevel@tonic-gate 		tmpbuf[insize] = '\0';
116*7c478bd9Sstevel@tonic-gate 		return (insize);
117*7c478bd9Sstevel@tonic-gate 		/* NOTRECHED */
118*7c478bd9Sstevel@tonic-gate 	}
119*7c478bd9Sstevel@tonic-gate }
120*7c478bd9Sstevel@tonic-gate 
121*7c478bd9Sstevel@tonic-gate static void
122*7c478bd9Sstevel@tonic-gate po_uninput(int c)
123*7c478bd9Sstevel@tonic-gate {
124*7c478bd9Sstevel@tonic-gate 	(void) ungetc(c, fp);
125*7c478bd9Sstevel@tonic-gate 	if (c == '\n')
126*7c478bd9Sstevel@tonic-gate 		cur_line--;
127*7c478bd9Sstevel@tonic-gate }
128*7c478bd9Sstevel@tonic-gate 
129*7c478bd9Sstevel@tonic-gate static void
130*7c478bd9Sstevel@tonic-gate po_ungetc(struct ch *pch)
131*7c478bd9Sstevel@tonic-gate {
132*7c478bd9Sstevel@tonic-gate 	if (backlen) {
133*7c478bd9Sstevel@tonic-gate 		error(gettext(ERR_INTERNAL), cur_line, cur_po);
134*7c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
135*7c478bd9Sstevel@tonic-gate 	}
136*7c478bd9Sstevel@tonic-gate 	if (!pch->eof) {
137*7c478bd9Sstevel@tonic-gate 		backlen = pch->len;
138*7c478bd9Sstevel@tonic-gate 		(void) memcpy(backbuf, pch->buf, backlen);
139*7c478bd9Sstevel@tonic-gate 	}
140*7c478bd9Sstevel@tonic-gate }
141*7c478bd9Sstevel@tonic-gate 
142*7c478bd9Sstevel@tonic-gate static struct ch *
143*7c478bd9Sstevel@tonic-gate po_getc(void)
144*7c478bd9Sstevel@tonic-gate {
145*7c478bd9Sstevel@tonic-gate 	static struct ch	och;
146*7c478bd9Sstevel@tonic-gate 	int	c;
147*7c478bd9Sstevel@tonic-gate 
148*7c478bd9Sstevel@tonic-gate 	if (backlen) {
149*7c478bd9Sstevel@tonic-gate 		och.len = backlen;
150*7c478bd9Sstevel@tonic-gate 		(void) memcpy(och.buf, backbuf, backlen);
151*7c478bd9Sstevel@tonic-gate 		backlen = 0;
152*7c478bd9Sstevel@tonic-gate 		return (&och);
153*7c478bd9Sstevel@tonic-gate 	}
154*7c478bd9Sstevel@tonic-gate 
155*7c478bd9Sstevel@tonic-gate 	for (; ; ) {
156*7c478bd9Sstevel@tonic-gate 		c = getc(fp);
157*7c478bd9Sstevel@tonic-gate 		if (c == EOF) {
158*7c478bd9Sstevel@tonic-gate 			if (ferror(fp)) {
159*7c478bd9Sstevel@tonic-gate 				/* error happend */
160*7c478bd9Sstevel@tonic-gate 				error(gettext(ERR_READ_FAILED), cur_po);
161*7c478bd9Sstevel@tonic-gate 				/* NOTREACHED */
162*7c478bd9Sstevel@tonic-gate 			}
163*7c478bd9Sstevel@tonic-gate 			och.len = 0;
164*7c478bd9Sstevel@tonic-gate 			och.eof = 1;
165*7c478bd9Sstevel@tonic-gate 			return (&och);
166*7c478bd9Sstevel@tonic-gate 		}
167*7c478bd9Sstevel@tonic-gate 		if (c == '\\') {
168*7c478bd9Sstevel@tonic-gate 			c = getc(fp);
169*7c478bd9Sstevel@tonic-gate 			if (c == '\n') {
170*7c478bd9Sstevel@tonic-gate 				/* this newline should be escaped */
171*7c478bd9Sstevel@tonic-gate 				cur_line++;
172*7c478bd9Sstevel@tonic-gate 				continue;
173*7c478bd9Sstevel@tonic-gate 			} else {
174*7c478bd9Sstevel@tonic-gate 				po_uninput(c);
175*7c478bd9Sstevel@tonic-gate 				och.len = 1;
176*7c478bd9Sstevel@tonic-gate 				och.eof = 0;
177*7c478bd9Sstevel@tonic-gate 				och.buf[0] = '\\';
178*7c478bd9Sstevel@tonic-gate 				return (&och);
179*7c478bd9Sstevel@tonic-gate 			}
180*7c478bd9Sstevel@tonic-gate 			/* NOTREACHED */
181*7c478bd9Sstevel@tonic-gate 		}
182*7c478bd9Sstevel@tonic-gate 		if (c == '\n') {
183*7c478bd9Sstevel@tonic-gate 			cur_line++;
184*7c478bd9Sstevel@tonic-gate 			och.len = 1;
185*7c478bd9Sstevel@tonic-gate 			och.eof = 0;
186*7c478bd9Sstevel@tonic-gate 			och.buf[0] = '\n';
187*7c478bd9Sstevel@tonic-gate 			return (&och);
188*7c478bd9Sstevel@tonic-gate 		}
189*7c478bd9Sstevel@tonic-gate 		if (isascii((unsigned char)c)) {
190*7c478bd9Sstevel@tonic-gate 			/* single byte ascii */
191*7c478bd9Sstevel@tonic-gate 			och.len = 1;
192*7c478bd9Sstevel@tonic-gate 			och.eof = 0;
193*7c478bd9Sstevel@tonic-gate 			och.buf[0] = (unsigned char)c;
194*7c478bd9Sstevel@tonic-gate 			return (&och);
195*7c478bd9Sstevel@tonic-gate 		}
196*7c478bd9Sstevel@tonic-gate 
197*7c478bd9Sstevel@tonic-gate 		och.len = get_mb(&och.buf[0], (unsigned char)c);
198*7c478bd9Sstevel@tonic-gate 		och.eof = 0;
199*7c478bd9Sstevel@tonic-gate 		return (&och);
200*7c478bd9Sstevel@tonic-gate 	}
201*7c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
202*7c478bd9Sstevel@tonic-gate }
203*7c478bd9Sstevel@tonic-gate 
204*7c478bd9Sstevel@tonic-gate static void
205*7c478bd9Sstevel@tonic-gate extend_buf(char **buf, size_t *size, size_t add)
206*7c478bd9Sstevel@tonic-gate {
207*7c478bd9Sstevel@tonic-gate 	char	*tmp;
208*7c478bd9Sstevel@tonic-gate 
209*7c478bd9Sstevel@tonic-gate 	*size += add;
210*7c478bd9Sstevel@tonic-gate 	tmp = (char *)Xrealloc(*buf, *size);
211*7c478bd9Sstevel@tonic-gate 	*buf = tmp;
212*7c478bd9Sstevel@tonic-gate }
213*7c478bd9Sstevel@tonic-gate 
214*7c478bd9Sstevel@tonic-gate static struct ch	*
215*7c478bd9Sstevel@tonic-gate expand_es(void)
216*7c478bd9Sstevel@tonic-gate {
217*7c478bd9Sstevel@tonic-gate 	int	c, n, loop;
218*7c478bd9Sstevel@tonic-gate 	static struct ch	och;
219*7c478bd9Sstevel@tonic-gate 	struct ch	*pch;
220*7c478bd9Sstevel@tonic-gate 
221*7c478bd9Sstevel@tonic-gate 	pch = po_getc();
222*7c478bd9Sstevel@tonic-gate 	if (pch->eof) {
223*7c478bd9Sstevel@tonic-gate 		error(gettext(ERR_UNEXP_EOF),
224*7c478bd9Sstevel@tonic-gate 			cur_line, cur_po);
225*7c478bd9Sstevel@tonic-gate 		/* NOTREACHED */
226*7c478bd9Sstevel@tonic-gate 	}
227*7c478bd9Sstevel@tonic-gate 	if (pch->len > 1) {
228*7c478bd9Sstevel@tonic-gate 		/* not a valid escape sequence */
229*7c478bd9Sstevel@tonic-gate 		return (pch);
230*7c478bd9Sstevel@tonic-gate 	}
231*7c478bd9Sstevel@tonic-gate 
232*7c478bd9Sstevel@tonic-gate 	och.len = 1;
233*7c478bd9Sstevel@tonic-gate 	och.eof = 0;
234*7c478bd9Sstevel@tonic-gate 	switch (pch->buf[0]) {
235*7c478bd9Sstevel@tonic-gate 	case '"':
236*7c478bd9Sstevel@tonic-gate 	case '\\':
237*7c478bd9Sstevel@tonic-gate 		och.buf[0] = pch->buf[0];
238*7c478bd9Sstevel@tonic-gate 		break;
239*7c478bd9Sstevel@tonic-gate 	case 'b':
240*7c478bd9Sstevel@tonic-gate 		och.buf[0] = '\b';
241*7c478bd9Sstevel@tonic-gate 		break;
242*7c478bd9Sstevel@tonic-gate 	case 'f':
243*7c478bd9Sstevel@tonic-gate 		och.buf[0] = '\f';
244*7c478bd9Sstevel@tonic-gate 		break;
245*7c478bd9Sstevel@tonic-gate 	case 'n':
246*7c478bd9Sstevel@tonic-gate 		och.buf[0] = '\n';
247*7c478bd9Sstevel@tonic-gate 		break;
248*7c478bd9Sstevel@tonic-gate 	case 'r':
249*7c478bd9Sstevel@tonic-gate 		och.buf[0] = '\r';
250*7c478bd9Sstevel@tonic-gate 		break;
251*7c478bd9Sstevel@tonic-gate 	case 't':
252*7c478bd9Sstevel@tonic-gate 		och.buf[0] = '\t';
253*7c478bd9Sstevel@tonic-gate 		break;
254*7c478bd9Sstevel@tonic-gate 	case 'v':
255*7c478bd9Sstevel@tonic-gate 		och.buf[0] = '\v';
256*7c478bd9Sstevel@tonic-gate 		break;
257*7c478bd9Sstevel@tonic-gate 	case 'a':
258*7c478bd9Sstevel@tonic-gate 		och.buf[0] = '\a';
259*7c478bd9Sstevel@tonic-gate 		break;
260*7c478bd9Sstevel@tonic-gate 	case '0':
261*7c478bd9Sstevel@tonic-gate 	case '1':
262*7c478bd9Sstevel@tonic-gate 	case '2':
263*7c478bd9Sstevel@tonic-gate 	case '3':
264*7c478bd9Sstevel@tonic-gate 	case '4':
265*7c478bd9Sstevel@tonic-gate 	case '5':
266*7c478bd9Sstevel@tonic-gate 	case '6':
267*7c478bd9Sstevel@tonic-gate 	case '7':
268*7c478bd9Sstevel@tonic-gate 		/* octal */
269*7c478bd9Sstevel@tonic-gate 		c = pch->buf[0];
270*7c478bd9Sstevel@tonic-gate 		for (n = 0, loop = 0; ; ) {
271*7c478bd9Sstevel@tonic-gate 			n = n * 8 + c - '0';
272*7c478bd9Sstevel@tonic-gate 			loop++;
273*7c478bd9Sstevel@tonic-gate 			if (loop >= 3)
274*7c478bd9Sstevel@tonic-gate 				break;
275*7c478bd9Sstevel@tonic-gate 			pch = po_getc();
276*7c478bd9Sstevel@tonic-gate 			if (pch->eof) {
277*7c478bd9Sstevel@tonic-gate 				error(gettext(ERR_UNEXP_EOF),
278*7c478bd9Sstevel@tonic-gate 					cur_line, cur_po);
279*7c478bd9Sstevel@tonic-gate 				/* NOTREACHED */
280*7c478bd9Sstevel@tonic-gate 			}
281*7c478bd9Sstevel@tonic-gate 			if ((pch->len > 1) || (pch->buf[0] < '0') ||
282*7c478bd9Sstevel@tonic-gate 				(pch->buf[0] > '7'))
283*7c478bd9Sstevel@tonic-gate 				break;
284*7c478bd9Sstevel@tonic-gate 			c = pch->buf[0];
285*7c478bd9Sstevel@tonic-gate 		}
286*7c478bd9Sstevel@tonic-gate 		po_ungetc(pch);
287*7c478bd9Sstevel@tonic-gate 		och.buf[0] = (unsigned char)n;
288*7c478bd9Sstevel@tonic-gate 		break;
289*7c478bd9Sstevel@tonic-gate 	case 'x':
290*7c478bd9Sstevel@tonic-gate 		/* hex */
291*7c478bd9Sstevel@tonic-gate 		pch = po_getc();
292*7c478bd9Sstevel@tonic-gate 		if (pch->eof) {
293*7c478bd9Sstevel@tonic-gate 			error(gettext(ERR_UNEXP_EOF),
294*7c478bd9Sstevel@tonic-gate 				cur_line, cur_po);
295*7c478bd9Sstevel@tonic-gate 			/* NOTREACHED */
296*7c478bd9Sstevel@tonic-gate 		}
297*7c478bd9Sstevel@tonic-gate 		if (pch->len > 1) {
298*7c478bd9Sstevel@tonic-gate 			po_ungetc(pch);
299*7c478bd9Sstevel@tonic-gate 			och.buf[0] = 'x';
300*7c478bd9Sstevel@tonic-gate 			break;
301*7c478bd9Sstevel@tonic-gate 		}
302*7c478bd9Sstevel@tonic-gate 		c = pch->buf[0];
303*7c478bd9Sstevel@tonic-gate 		if (!isxdigit((unsigned char)c)) {
304*7c478bd9Sstevel@tonic-gate 			po_ungetc(pch);
305*7c478bd9Sstevel@tonic-gate 			och.buf[0] = 'x';
306*7c478bd9Sstevel@tonic-gate 			break;
307*7c478bd9Sstevel@tonic-gate 		}
308*7c478bd9Sstevel@tonic-gate 		if (isdigit((unsigned char)c)) {
309*7c478bd9Sstevel@tonic-gate 			n = c - '0';
310*7c478bd9Sstevel@tonic-gate 		} else if (isupper((unsigned char)c)) {
311*7c478bd9Sstevel@tonic-gate 			n = c - 'A' + 10;
312*7c478bd9Sstevel@tonic-gate 		} else {
313*7c478bd9Sstevel@tonic-gate 			n = c - 'a' + 10;
314*7c478bd9Sstevel@tonic-gate 		}
315*7c478bd9Sstevel@tonic-gate 
316*7c478bd9Sstevel@tonic-gate 		pch = po_getc();
317*7c478bd9Sstevel@tonic-gate 		if (pch->eof) {
318*7c478bd9Sstevel@tonic-gate 			error(gettext(ERR_UNEXP_EOF),
319*7c478bd9Sstevel@tonic-gate 				cur_line, cur_po);
320*7c478bd9Sstevel@tonic-gate 			/* NOTREACHED */
321*7c478bd9Sstevel@tonic-gate 		}
322*7c478bd9Sstevel@tonic-gate 		if (pch->len > 1) {
323*7c478bd9Sstevel@tonic-gate 			po_ungetc(pch);
324*7c478bd9Sstevel@tonic-gate 			och.buf[0] = (unsigned char)n;
325*7c478bd9Sstevel@tonic-gate 			break;
326*7c478bd9Sstevel@tonic-gate 		}
327*7c478bd9Sstevel@tonic-gate 		c = pch->buf[0];
328*7c478bd9Sstevel@tonic-gate 		if (!isxdigit((unsigned char)c)) {
329*7c478bd9Sstevel@tonic-gate 			po_ungetc(pch);
330*7c478bd9Sstevel@tonic-gate 			och.buf[0] = (unsigned char)n;
331*7c478bd9Sstevel@tonic-gate 			break;
332*7c478bd9Sstevel@tonic-gate 		}
333*7c478bd9Sstevel@tonic-gate 		n *= 16;
334*7c478bd9Sstevel@tonic-gate 		if (isdigit((unsigned char)c)) {
335*7c478bd9Sstevel@tonic-gate 			n += c - '0';
336*7c478bd9Sstevel@tonic-gate 		} else if (isupper((unsigned char)c)) {
337*7c478bd9Sstevel@tonic-gate 			n += c - 'A' + 10;
338*7c478bd9Sstevel@tonic-gate 		} else {
339*7c478bd9Sstevel@tonic-gate 			n += c - 'a' + 10;
340*7c478bd9Sstevel@tonic-gate 		}
341*7c478bd9Sstevel@tonic-gate 		och.buf[0] = (unsigned char)n;
342*7c478bd9Sstevel@tonic-gate 		break;
343*7c478bd9Sstevel@tonic-gate 
344*7c478bd9Sstevel@tonic-gate 	default:
345*7c478bd9Sstevel@tonic-gate 		och.buf[0] = pch->buf[0];
346*7c478bd9Sstevel@tonic-gate 		break;
347*7c478bd9Sstevel@tonic-gate 	}
348*7c478bd9Sstevel@tonic-gate 	return (&och);
349*7c478bd9Sstevel@tonic-gate }
350*7c478bd9Sstevel@tonic-gate 
351*7c478bd9Sstevel@tonic-gate int
352*7c478bd9Sstevel@tonic-gate yylex(void)
353*7c478bd9Sstevel@tonic-gate {
354*7c478bd9Sstevel@tonic-gate 	unsigned int	uc;
355*7c478bd9Sstevel@tonic-gate 	struct ch	*pch;
356*7c478bd9Sstevel@tonic-gate 	char	*buf;
357*7c478bd9Sstevel@tonic-gate 	size_t	buf_size, buf_pos;
358*7c478bd9Sstevel@tonic-gate 
359*7c478bd9Sstevel@tonic-gate 	for (; ; ) {
360*7c478bd9Sstevel@tonic-gate 		pch = po_getc();
361*7c478bd9Sstevel@tonic-gate 
362*7c478bd9Sstevel@tonic-gate 		if (pch->eof) {
363*7c478bd9Sstevel@tonic-gate 			/* EOF */
364*7c478bd9Sstevel@tonic-gate 			return (0);
365*7c478bd9Sstevel@tonic-gate 		}
366*7c478bd9Sstevel@tonic-gate 
367*7c478bd9Sstevel@tonic-gate 		if (pch->len > 1) {
368*7c478bd9Sstevel@tonic-gate 			/* multi byte */
369*7c478bd9Sstevel@tonic-gate 			yylval.c.len = pch->len;
370*7c478bd9Sstevel@tonic-gate 			(void) memcpy(yylval.c.buf, pch->buf, pch->len);
371*7c478bd9Sstevel@tonic-gate 			return (CHR);
372*7c478bd9Sstevel@tonic-gate 		}
373*7c478bd9Sstevel@tonic-gate 		/* single byte */
374*7c478bd9Sstevel@tonic-gate 		switch (pch->buf[0]) {
375*7c478bd9Sstevel@tonic-gate 		case ' ':
376*7c478bd9Sstevel@tonic-gate 		case '\t':
377*7c478bd9Sstevel@tonic-gate 		case '\n':
378*7c478bd9Sstevel@tonic-gate 			break;
379*7c478bd9Sstevel@tonic-gate 
380*7c478bd9Sstevel@tonic-gate 		case '#':
381*7c478bd9Sstevel@tonic-gate 			/* comment start */
382*7c478bd9Sstevel@tonic-gate 			buf_size = CBUFSIZE;
383*7c478bd9Sstevel@tonic-gate 			buf = (char *)Xmalloc(buf_size);
384*7c478bd9Sstevel@tonic-gate 			buf_pos = 0;
385*7c478bd9Sstevel@tonic-gate 			pch = po_getc();
386*7c478bd9Sstevel@tonic-gate 			while (!pch->eof &&
387*7c478bd9Sstevel@tonic-gate 				((pch->len != 1) || (pch->buf[0] != '\n'))) {
388*7c478bd9Sstevel@tonic-gate 				if (buf_pos + pch->len + 1 > buf_size)
389*7c478bd9Sstevel@tonic-gate 					extend_buf(&buf, &buf_size, CBUFSIZE);
390*7c478bd9Sstevel@tonic-gate 				(void) memcpy(buf + buf_pos,
391*7c478bd9Sstevel@tonic-gate 					pch->buf, pch->len);
392*7c478bd9Sstevel@tonic-gate 				buf_pos += pch->len;
393*7c478bd9Sstevel@tonic-gate 				pch = po_getc();
394*7c478bd9Sstevel@tonic-gate 			}
395*7c478bd9Sstevel@tonic-gate 			buf[buf_pos] = '\0';
396*7c478bd9Sstevel@tonic-gate 			yylval.str = buf;
397*7c478bd9Sstevel@tonic-gate 			return (COMMENT);
398*7c478bd9Sstevel@tonic-gate 			/* NOTREACHED */
399*7c478bd9Sstevel@tonic-gate 
400*7c478bd9Sstevel@tonic-gate 		case '[':
401*7c478bd9Sstevel@tonic-gate 		case ']':
402*7c478bd9Sstevel@tonic-gate 			return (pch->buf[0]);
403*7c478bd9Sstevel@tonic-gate 			/* NOTREACHED */
404*7c478bd9Sstevel@tonic-gate 
405*7c478bd9Sstevel@tonic-gate 		case '"':
406*7c478bd9Sstevel@tonic-gate 			buf_size = MBUFSIZE;
407*7c478bd9Sstevel@tonic-gate 			buf = (char *)Xmalloc(buf_size);
408*7c478bd9Sstevel@tonic-gate 			buf_pos = 0;
409*7c478bd9Sstevel@tonic-gate 			for (; ; ) {
410*7c478bd9Sstevel@tonic-gate 				pch = po_getc();
411*7c478bd9Sstevel@tonic-gate 
412*7c478bd9Sstevel@tonic-gate 				if (pch->eof) {
413*7c478bd9Sstevel@tonic-gate 					/* EOF */
414*7c478bd9Sstevel@tonic-gate 					error(gettext(ERR_UNEXP_EOF),
415*7c478bd9Sstevel@tonic-gate 						cur_line, cur_po);
416*7c478bd9Sstevel@tonic-gate 					/* NOTREACHED */
417*7c478bd9Sstevel@tonic-gate 				}
418*7c478bd9Sstevel@tonic-gate 
419*7c478bd9Sstevel@tonic-gate 				if (pch->len == 1) {
420*7c478bd9Sstevel@tonic-gate 					uc = pch->buf[0];
421*7c478bd9Sstevel@tonic-gate 
422*7c478bd9Sstevel@tonic-gate 					if (uc == '\n') {
423*7c478bd9Sstevel@tonic-gate 						error(gettext(ERR_UNEXP_EOL),
424*7c478bd9Sstevel@tonic-gate 							cur_line, cur_po);
425*7c478bd9Sstevel@tonic-gate 						/* NOTREACHED */
426*7c478bd9Sstevel@tonic-gate 					}
427*7c478bd9Sstevel@tonic-gate 					if (uc == '"')
428*7c478bd9Sstevel@tonic-gate 						break;
429*7c478bd9Sstevel@tonic-gate 					if (uc == '\\')
430*7c478bd9Sstevel@tonic-gate 						pch = expand_es();
431*7c478bd9Sstevel@tonic-gate 				}
432*7c478bd9Sstevel@tonic-gate 				if (buf_pos + pch->len + 1 > buf_size)
433*7c478bd9Sstevel@tonic-gate 					extend_buf(&buf, &buf_size,
434*7c478bd9Sstevel@tonic-gate 						MBUFSIZE);
435*7c478bd9Sstevel@tonic-gate 				(void) memcpy(buf + buf_pos,
436*7c478bd9Sstevel@tonic-gate 					pch->buf, pch->len);
437*7c478bd9Sstevel@tonic-gate 				buf_pos += pch->len;
438*7c478bd9Sstevel@tonic-gate 			}
439*7c478bd9Sstevel@tonic-gate 
440*7c478bd9Sstevel@tonic-gate 			buf[buf_pos] = '\0';
441*7c478bd9Sstevel@tonic-gate 			yylval.str = buf;
442*7c478bd9Sstevel@tonic-gate 			return (STR);
443*7c478bd9Sstevel@tonic-gate 			/* NOTREACHED */
444*7c478bd9Sstevel@tonic-gate 
445*7c478bd9Sstevel@tonic-gate 		default:
446*7c478bd9Sstevel@tonic-gate 			uc = pch->buf[0];
447*7c478bd9Sstevel@tonic-gate 
448*7c478bd9Sstevel@tonic-gate 			if (isalpha(uc) || (uc == '_')) {
449*7c478bd9Sstevel@tonic-gate 				buf_size = KBUFSIZE;
450*7c478bd9Sstevel@tonic-gate 				buf = (char *)Xmalloc(buf_size);
451*7c478bd9Sstevel@tonic-gate 				buf_pos = 0;
452*7c478bd9Sstevel@tonic-gate 				buf[buf_pos++] = (char)uc;
453*7c478bd9Sstevel@tonic-gate 				pch = po_getc();
454*7c478bd9Sstevel@tonic-gate 				while (!pch->eof &&
455*7c478bd9Sstevel@tonic-gate 					(pch->len == 1) &&
456*7c478bd9Sstevel@tonic-gate 					(isalpha(uc = pch->buf[0]) ||
457*7c478bd9Sstevel@tonic-gate 					isdigit(uc) || (uc == '_'))) {
458*7c478bd9Sstevel@tonic-gate 					if (buf_pos + 1 + 1 > buf_size)
459*7c478bd9Sstevel@tonic-gate 						extend_buf(&buf, &buf_size,
460*7c478bd9Sstevel@tonic-gate 							KBUFSIZE);
461*7c478bd9Sstevel@tonic-gate 					buf[buf_pos++] = (char)uc;
462*7c478bd9Sstevel@tonic-gate 					pch = po_getc();
463*7c478bd9Sstevel@tonic-gate 				}
464*7c478bd9Sstevel@tonic-gate 				/* push back the last char */
465*7c478bd9Sstevel@tonic-gate 				po_ungetc(pch);
466*7c478bd9Sstevel@tonic-gate 				buf[buf_pos] = '\0';
467*7c478bd9Sstevel@tonic-gate 				yylval.str = buf;
468*7c478bd9Sstevel@tonic-gate 				if (buf_pos > MAX_KW_LEN) {
469*7c478bd9Sstevel@tonic-gate 					/* kbuf is longer than any keywords */
470*7c478bd9Sstevel@tonic-gate 					return (SYMBOL);
471*7c478bd9Sstevel@tonic-gate 				}
472*7c478bd9Sstevel@tonic-gate 				yylval.num = cur_line;
473*7c478bd9Sstevel@tonic-gate 				if (strcmp(buf, KW_DOMAIN) == 0) {
474*7c478bd9Sstevel@tonic-gate 					free(buf);
475*7c478bd9Sstevel@tonic-gate 					return (DOMAIN);
476*7c478bd9Sstevel@tonic-gate 				} else if (strcmp(buf, KW_MSGID) == 0) {
477*7c478bd9Sstevel@tonic-gate 					free(buf);
478*7c478bd9Sstevel@tonic-gate 					return (MSGID);
479*7c478bd9Sstevel@tonic-gate 				} else if (strcmp(buf, KW_MSGID_PLURAL) == 0) {
480*7c478bd9Sstevel@tonic-gate 					free(buf);
481*7c478bd9Sstevel@tonic-gate 					return (MSGID_PLURAL);
482*7c478bd9Sstevel@tonic-gate 				} else if (strcmp(buf, KW_MSGSTR) == 0) {
483*7c478bd9Sstevel@tonic-gate 					free(buf);
484*7c478bd9Sstevel@tonic-gate 					return (MSGSTR);
485*7c478bd9Sstevel@tonic-gate 				} else {
486*7c478bd9Sstevel@tonic-gate 					free(buf);
487*7c478bd9Sstevel@tonic-gate 					return (SYMBOL);
488*7c478bd9Sstevel@tonic-gate 				}
489*7c478bd9Sstevel@tonic-gate 				/* NOTREACHED */
490*7c478bd9Sstevel@tonic-gate 			}
491*7c478bd9Sstevel@tonic-gate 			if (isdigit(uc)) {
492*7c478bd9Sstevel@tonic-gate 				buf_size = NBUFSIZE;
493*7c478bd9Sstevel@tonic-gate 				buf = (char *)Xmalloc(buf_size);
494*7c478bd9Sstevel@tonic-gate 				buf_pos = 0;
495*7c478bd9Sstevel@tonic-gate 				buf[buf_pos++] = (char)uc;
496*7c478bd9Sstevel@tonic-gate 				pch = po_getc();
497*7c478bd9Sstevel@tonic-gate 				while (!pch->eof &&
498*7c478bd9Sstevel@tonic-gate 					(pch->len == 1) &&
499*7c478bd9Sstevel@tonic-gate 					isdigit(uc = pch->buf[0])) {
500*7c478bd9Sstevel@tonic-gate 					if (buf_pos + 1 + 1 > buf_size)
501*7c478bd9Sstevel@tonic-gate 						extend_buf(&buf, &buf_size,
502*7c478bd9Sstevel@tonic-gate 							NBUFSIZE);
503*7c478bd9Sstevel@tonic-gate 					buf[buf_pos++] = (char)uc;
504*7c478bd9Sstevel@tonic-gate 					pch = po_getc();
505*7c478bd9Sstevel@tonic-gate 				}
506*7c478bd9Sstevel@tonic-gate 				/* push back the last char */
507*7c478bd9Sstevel@tonic-gate 				po_ungetc(pch);
508*7c478bd9Sstevel@tonic-gate 				buf[buf_pos] = '\0';
509*7c478bd9Sstevel@tonic-gate 				yylval.num = atoi(buf);
510*7c478bd9Sstevel@tonic-gate 				free(buf);
511*7c478bd9Sstevel@tonic-gate 				return (NUM);
512*7c478bd9Sstevel@tonic-gate 			}
513*7c478bd9Sstevel@tonic-gate 			/* just a char */
514*7c478bd9Sstevel@tonic-gate 			yylval.c.len = 1;
515*7c478bd9Sstevel@tonic-gate 			yylval.c.buf[0] = uc;
516*7c478bd9Sstevel@tonic-gate 			return (CHR);
517*7c478bd9Sstevel@tonic-gate 			/* NOTREACHED */
518*7c478bd9Sstevel@tonic-gate 		}
519*7c478bd9Sstevel@tonic-gate 	}
520*7c478bd9Sstevel@tonic-gate }
521