1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2001, 2002 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 #include "gnu_msgfmt.h"
30 #include "gnu_lex.h"
31 #include "y.tab.h"
32
33 int cur_line = 1;
34
35 static char backbuf[MB_LEN_MAX];
36 static int backlen = 0;
37
38 /*
39 * get_mb() returns one multibyte character.
40 *
41 * This function uses the iconv() function to find out one
42 * multibyte character from a sequence of bytes in the file stream.
43 * The conversion from the codeset specified in the PO file to UTF-8
44 * is performed. The funcition reads another byte and calls iconv(),
45 * until iconv() successfully returns as a valid UTF-8 character has
46 * been converted or returns EILSEQ. If iconv() successfully returned,
47 * the function returns the read bytes as one character. Otherwise,
48 * returns error. The string converted to UTF-8 in outbuf won't be
49 * used at all.
50 */
51 static size_t
get_mb(unsigned char * tmpbuf,unsigned char fc)52 get_mb(unsigned char *tmpbuf, unsigned char fc)
53 {
54 int c;
55 char outbuf[8]; /* max size of a UTF-8 char */
56 const char *inptr;
57 char *outptr;
58 size_t insize = 0, inlen, outlen, ret;
59
60 tmpbuf[insize++] = fc; /* size of tmpbuf is MB_LEN_MAX+1 */
61
62 if (cd == (iconv_t)-1) {
63 /* no conversion */
64 tmpbuf[insize] = '\0';
65 return (insize);
66 }
67
68 for (; ; ) {
69 inptr = (const char *)tmpbuf;
70 outptr = &outbuf[0];
71 inlen = insize;
72 outlen = sizeof (outbuf);
73
74 errno = 0;
75 ret = iconv(cd, &inptr, &inlen, &outptr, &outlen);
76 if (ret == (size_t)-1) {
77 /* iconv failed */
78 switch (errno) {
79 case EILSEQ:
80 /* invalid character found */
81 error(gettext(ERR_INVALID_CHAR),
82 cur_line, cur_po);
83 /* NOTREACHED */
84 case EINVAL:
85 /* not enough input */
86 if (insize == MB_LEN_MAX) {
87 /* invalid character found */
88 error(gettext(ERR_INVALID_CHAR),
89 cur_line, cur_po);
90 /* NOTREACHED */
91 }
92 c = getc(fp);
93 if (c == EOF) {
94 error(gettext(ERR_UNEXP_EOF),
95 cur_line, cur_po);
96 /* NOTREACHED */
97 }
98 tmpbuf[insize++] = (unsigned char)c;
99
100 /* initialize the conversion */
101 outptr = &outbuf[0];
102 outlen = sizeof (outbuf);
103 (void) iconv(cd, NULL, NULL, &outptr, &outlen);
104
105 continue;
106 /* NOTREACHED */
107 default:
108 /* should never happen */
109 error(ERR_INTERNAL,
110 cur_line, cur_po);
111 /* NOTREACHED */
112 }
113 /* NOTREACHED */
114 }
115 tmpbuf[insize] = '\0';
116 return (insize);
117 /* NOTRECHED */
118 }
119 }
120
121 static void
po_uninput(int c)122 po_uninput(int c)
123 {
124 (void) ungetc(c, fp);
125 if (c == '\n')
126 cur_line--;
127 }
128
129 static void
po_ungetc(struct ch * pch)130 po_ungetc(struct ch *pch)
131 {
132 if (backlen) {
133 error(gettext(ERR_INTERNAL), cur_line, cur_po);
134 /* NOTREACHED */
135 }
136 if (!pch->eof) {
137 backlen = pch->len;
138 (void) memcpy(backbuf, pch->buf, backlen);
139 }
140 }
141
142 static struct ch *
po_getc(void)143 po_getc(void)
144 {
145 static struct ch och;
146 int c;
147
148 if (backlen) {
149 och.len = backlen;
150 (void) memcpy(och.buf, backbuf, backlen);
151 backlen = 0;
152 return (&och);
153 }
154
155 for (; ; ) {
156 c = getc(fp);
157 if (c == EOF) {
158 if (ferror(fp)) {
159 /* error happend */
160 error(gettext(ERR_READ_FAILED), cur_po);
161 /* NOTREACHED */
162 }
163 och.len = 0;
164 och.eof = 1;
165 return (&och);
166 }
167 if (c == '\\') {
168 c = getc(fp);
169 if (c == '\n') {
170 /* this newline should be escaped */
171 cur_line++;
172 continue;
173 } else {
174 po_uninput(c);
175 och.len = 1;
176 och.eof = 0;
177 och.buf[0] = '\\';
178 return (&och);
179 }
180 /* NOTREACHED */
181 }
182 if (c == '\n') {
183 cur_line++;
184 och.len = 1;
185 och.eof = 0;
186 och.buf[0] = '\n';
187 return (&och);
188 }
189 if (isascii((unsigned char)c)) {
190 /* single byte ascii */
191 och.len = 1;
192 och.eof = 0;
193 och.buf[0] = (unsigned char)c;
194 return (&och);
195 }
196
197 och.len = get_mb(&och.buf[0], (unsigned char)c);
198 och.eof = 0;
199 return (&och);
200 }
201 /* NOTREACHED */
202 }
203
204 static void
extend_buf(char ** buf,size_t * size,size_t add)205 extend_buf(char **buf, size_t *size, size_t add)
206 {
207 char *tmp;
208
209 *size += add;
210 tmp = (char *)Xrealloc(*buf, *size);
211 *buf = tmp;
212 }
213
214 static struct ch *
expand_es(void)215 expand_es(void)
216 {
217 int c, n, loop;
218 static struct ch och;
219 struct ch *pch;
220
221 pch = po_getc();
222 if (pch->eof) {
223 error(gettext(ERR_UNEXP_EOF),
224 cur_line, cur_po);
225 /* NOTREACHED */
226 }
227 if (pch->len > 1) {
228 /* not a valid escape sequence */
229 return (pch);
230 }
231
232 och.len = 1;
233 och.eof = 0;
234 switch (pch->buf[0]) {
235 case '"':
236 case '\\':
237 och.buf[0] = pch->buf[0];
238 break;
239 case 'b':
240 och.buf[0] = '\b';
241 break;
242 case 'f':
243 och.buf[0] = '\f';
244 break;
245 case 'n':
246 och.buf[0] = '\n';
247 break;
248 case 'r':
249 och.buf[0] = '\r';
250 break;
251 case 't':
252 och.buf[0] = '\t';
253 break;
254 case 'v':
255 och.buf[0] = '\v';
256 break;
257 case 'a':
258 och.buf[0] = '\a';
259 break;
260 case '0':
261 case '1':
262 case '2':
263 case '3':
264 case '4':
265 case '5':
266 case '6':
267 case '7':
268 /* octal */
269 c = pch->buf[0];
270 for (n = 0, loop = 0; ; ) {
271 n = n * 8 + c - '0';
272 loop++;
273 if (loop >= 3)
274 break;
275 pch = po_getc();
276 if (pch->eof) {
277 error(gettext(ERR_UNEXP_EOF),
278 cur_line, cur_po);
279 /* NOTREACHED */
280 }
281 if ((pch->len > 1) || (pch->buf[0] < '0') ||
282 (pch->buf[0] > '7'))
283 break;
284 c = pch->buf[0];
285 }
286 po_ungetc(pch);
287 och.buf[0] = (unsigned char)n;
288 break;
289 case 'x':
290 /* hex */
291 pch = po_getc();
292 if (pch->eof) {
293 error(gettext(ERR_UNEXP_EOF),
294 cur_line, cur_po);
295 /* NOTREACHED */
296 }
297 if (pch->len > 1) {
298 po_ungetc(pch);
299 och.buf[0] = 'x';
300 break;
301 }
302 c = pch->buf[0];
303 if (!isxdigit((unsigned char)c)) {
304 po_ungetc(pch);
305 och.buf[0] = 'x';
306 break;
307 }
308 if (isdigit((unsigned char)c)) {
309 n = c - '0';
310 } else if (isupper((unsigned char)c)) {
311 n = c - 'A' + 10;
312 } else {
313 n = c - 'a' + 10;
314 }
315
316 pch = po_getc();
317 if (pch->eof) {
318 error(gettext(ERR_UNEXP_EOF),
319 cur_line, cur_po);
320 /* NOTREACHED */
321 }
322 if (pch->len > 1) {
323 po_ungetc(pch);
324 och.buf[0] = (unsigned char)n;
325 break;
326 }
327 c = pch->buf[0];
328 if (!isxdigit((unsigned char)c)) {
329 po_ungetc(pch);
330 och.buf[0] = (unsigned char)n;
331 break;
332 }
333 n *= 16;
334 if (isdigit((unsigned char)c)) {
335 n += c - '0';
336 } else if (isupper((unsigned char)c)) {
337 n += c - 'A' + 10;
338 } else {
339 n += c - 'a' + 10;
340 }
341 och.buf[0] = (unsigned char)n;
342 break;
343
344 default:
345 och.buf[0] = pch->buf[0];
346 break;
347 }
348 return (&och);
349 }
350
351 int
yylex(void)352 yylex(void)
353 {
354 unsigned int uc;
355 struct ch *pch;
356 char *buf;
357 size_t buf_size, buf_pos;
358
359 for (; ; ) {
360 pch = po_getc();
361
362 if (pch->eof) {
363 /* EOF */
364 return (0);
365 }
366
367 if (pch->len > 1) {
368 /* multi byte */
369 yylval.c.len = pch->len;
370 (void) memcpy(yylval.c.buf, pch->buf, pch->len);
371 return (CHR);
372 }
373 /* single byte */
374 switch (pch->buf[0]) {
375 case ' ':
376 case '\t':
377 case '\n':
378 break;
379
380 case '#':
381 /* comment start */
382 buf_size = CBUFSIZE;
383 buf = (char *)Xmalloc(buf_size);
384 buf_pos = 0;
385 pch = po_getc();
386 while (!pch->eof &&
387 ((pch->len != 1) || (pch->buf[0] != '\n'))) {
388 if (buf_pos + pch->len + 1 > buf_size)
389 extend_buf(&buf, &buf_size, CBUFSIZE);
390 (void) memcpy(buf + buf_pos,
391 pch->buf, pch->len);
392 buf_pos += pch->len;
393 pch = po_getc();
394 }
395 buf[buf_pos] = '\0';
396 yylval.str = buf;
397 return (COMMENT);
398 /* NOTREACHED */
399
400 case '[':
401 case ']':
402 return (pch->buf[0]);
403 /* NOTREACHED */
404
405 case '"':
406 buf_size = MBUFSIZE;
407 buf = (char *)Xmalloc(buf_size);
408 buf_pos = 0;
409 for (; ; ) {
410 pch = po_getc();
411
412 if (pch->eof) {
413 /* EOF */
414 error(gettext(ERR_UNEXP_EOF),
415 cur_line, cur_po);
416 /* NOTREACHED */
417 }
418
419 if (pch->len == 1) {
420 uc = pch->buf[0];
421
422 if (uc == '\n') {
423 error(gettext(ERR_UNEXP_EOL),
424 cur_line, cur_po);
425 /* NOTREACHED */
426 }
427 if (uc == '"')
428 break;
429 if (uc == '\\')
430 pch = expand_es();
431 }
432 if (buf_pos + pch->len + 1 > buf_size)
433 extend_buf(&buf, &buf_size,
434 MBUFSIZE);
435 (void) memcpy(buf + buf_pos,
436 pch->buf, pch->len);
437 buf_pos += pch->len;
438 }
439
440 buf[buf_pos] = '\0';
441 yylval.str = buf;
442 return (STR);
443 /* NOTREACHED */
444
445 default:
446 uc = pch->buf[0];
447
448 if (isalpha(uc) || (uc == '_')) {
449 buf_size = KBUFSIZE;
450 buf = (char *)Xmalloc(buf_size);
451 buf_pos = 0;
452 buf[buf_pos++] = (char)uc;
453 pch = po_getc();
454 while (!pch->eof &&
455 (pch->len == 1) &&
456 (isalpha(uc = pch->buf[0]) ||
457 isdigit(uc) || (uc == '_'))) {
458 if (buf_pos + 1 + 1 > buf_size)
459 extend_buf(&buf, &buf_size,
460 KBUFSIZE);
461 buf[buf_pos++] = (char)uc;
462 pch = po_getc();
463 }
464 /* push back the last char */
465 po_ungetc(pch);
466 buf[buf_pos] = '\0';
467 yylval.str = buf;
468 if (buf_pos > MAX_KW_LEN) {
469 /* kbuf is longer than any keywords */
470 return (SYMBOL);
471 }
472 yylval.num = cur_line;
473 if (strcmp(buf, KW_DOMAIN) == 0) {
474 free(buf);
475 return (DOMAIN);
476 } else if (strcmp(buf, KW_MSGID) == 0) {
477 free(buf);
478 return (MSGID);
479 } else if (strcmp(buf, KW_MSGID_PLURAL) == 0) {
480 free(buf);
481 return (MSGID_PLURAL);
482 } else if (strcmp(buf, KW_MSGSTR) == 0) {
483 free(buf);
484 return (MSGSTR);
485 } else {
486 free(buf);
487 return (SYMBOL);
488 }
489 /* NOTREACHED */
490 }
491 if (isdigit(uc)) {
492 buf_size = NBUFSIZE;
493 buf = (char *)Xmalloc(buf_size);
494 buf_pos = 0;
495 buf[buf_pos++] = (char)uc;
496 pch = po_getc();
497 while (!pch->eof &&
498 (pch->len == 1) &&
499 isdigit(uc = pch->buf[0])) {
500 if (buf_pos + 1 + 1 > buf_size)
501 extend_buf(&buf, &buf_size,
502 NBUFSIZE);
503 buf[buf_pos++] = (char)uc;
504 pch = po_getc();
505 }
506 /* push back the last char */
507 po_ungetc(pch);
508 buf[buf_pos] = '\0';
509 yylval.num = atoi(buf);
510 free(buf);
511 return (NUM);
512 }
513 /* just a char */
514 yylval.c.len = 1;
515 yylval.c.buf[0] = uc;
516 return (CHR);
517 /* NOTREACHED */
518 }
519 }
520 }
521