1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2001, 2002 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include "gnu_msgfmt.h"
28 #include "gnu_lex.h"
29 #include "y.tab.h"
30
31 int cur_line = 1;
32
33 static char backbuf[MB_LEN_MAX];
34 static int backlen = 0;
35
36 /*
37 * get_mb() returns one multibyte character.
38 *
39 * This function uses the iconv() function to find out one
40 * multibyte character from a sequence of bytes in the file stream.
41 * The conversion from the codeset specified in the PO file to UTF-8
42 * is performed. The funcition reads another byte and calls iconv(),
43 * until iconv() successfully returns as a valid UTF-8 character has
44 * been converted or returns EILSEQ. If iconv() successfully returned,
45 * the function returns the read bytes as one character. Otherwise,
46 * returns error. The string converted to UTF-8 in outbuf won't be
47 * used at all.
48 */
49 static size_t
get_mb(unsigned char * tmpbuf,unsigned char fc)50 get_mb(unsigned char *tmpbuf, unsigned char fc)
51 {
52 int c;
53 char outbuf[8]; /* max size of a UTF-8 char */
54 const char *inptr;
55 char *outptr;
56 size_t insize = 0, inlen, outlen, ret;
57
58 tmpbuf[insize++] = fc; /* size of tmpbuf is MB_LEN_MAX+1 */
59
60 if (cd == (iconv_t)-1) {
61 /* no conversion */
62 tmpbuf[insize] = '\0';
63 return (insize);
64 }
65
66 for (; ; ) {
67 inptr = (const char *)tmpbuf;
68 outptr = &outbuf[0];
69 inlen = insize;
70 outlen = sizeof (outbuf);
71
72 errno = 0;
73 ret = iconv(cd, &inptr, &inlen, &outptr, &outlen);
74 if (ret == (size_t)-1) {
75 /* iconv failed */
76 switch (errno) {
77 case EILSEQ:
78 /* invalid character found */
79 error(gettext(ERR_INVALID_CHAR),
80 cur_line, cur_po);
81 /* NOTREACHED */
82 case EINVAL:
83 /* not enough input */
84 if (insize == MB_LEN_MAX) {
85 /* invalid character found */
86 error(gettext(ERR_INVALID_CHAR),
87 cur_line, cur_po);
88 /* NOTREACHED */
89 }
90 c = getc(fp);
91 if (c == EOF) {
92 error(gettext(ERR_UNEXP_EOF),
93 cur_line, cur_po);
94 /* NOTREACHED */
95 }
96 tmpbuf[insize++] = (unsigned char)c;
97
98 /* initialize the conversion */
99 outptr = &outbuf[0];
100 outlen = sizeof (outbuf);
101 (void) iconv(cd, NULL, NULL, &outptr, &outlen);
102
103 continue;
104 /* NOTREACHED */
105 default:
106 /* should never happen */
107 error(ERR_INTERNAL,
108 cur_line, cur_po);
109 /* NOTREACHED */
110 }
111 /* NOTREACHED */
112 }
113 tmpbuf[insize] = '\0';
114 return (insize);
115 /* NOTRECHED */
116 }
117 }
118
119 static void
po_uninput(int c)120 po_uninput(int c)
121 {
122 (void) ungetc(c, fp);
123 if (c == '\n')
124 cur_line--;
125 }
126
127 static void
po_ungetc(struct ch * pch)128 po_ungetc(struct ch *pch)
129 {
130 if (backlen) {
131 error(gettext(ERR_INTERNAL), cur_line, cur_po);
132 /* NOTREACHED */
133 }
134 if (!pch->eof) {
135 backlen = pch->len;
136 (void) memcpy(backbuf, pch->buf, backlen);
137 }
138 }
139
140 static struct ch *
po_getc(void)141 po_getc(void)
142 {
143 static struct ch och;
144 int c;
145
146 if (backlen) {
147 och.len = backlen;
148 (void) memcpy(och.buf, backbuf, backlen);
149 backlen = 0;
150 return (&och);
151 }
152
153 for (; ; ) {
154 c = getc(fp);
155 if (c == EOF) {
156 if (ferror(fp)) {
157 /* error happend */
158 error(gettext(ERR_READ_FAILED), cur_po);
159 /* NOTREACHED */
160 }
161 och.len = 0;
162 och.eof = 1;
163 return (&och);
164 }
165 if (c == '\\') {
166 c = getc(fp);
167 if (c == '\n') {
168 /* this newline should be escaped */
169 cur_line++;
170 continue;
171 } else {
172 po_uninput(c);
173 och.len = 1;
174 och.eof = 0;
175 och.buf[0] = '\\';
176 return (&och);
177 }
178 /* NOTREACHED */
179 }
180 if (c == '\n') {
181 cur_line++;
182 och.len = 1;
183 och.eof = 0;
184 och.buf[0] = '\n';
185 return (&och);
186 }
187 if (isascii((unsigned char)c)) {
188 /* single byte ascii */
189 och.len = 1;
190 och.eof = 0;
191 och.buf[0] = (unsigned char)c;
192 return (&och);
193 }
194
195 och.len = get_mb(&och.buf[0], (unsigned char)c);
196 och.eof = 0;
197 return (&och);
198 }
199 /* NOTREACHED */
200 }
201
202 static void
extend_buf(char ** buf,size_t * size,size_t add)203 extend_buf(char **buf, size_t *size, size_t add)
204 {
205 char *tmp;
206
207 *size += add;
208 tmp = (char *)Xrealloc(*buf, *size);
209 *buf = tmp;
210 }
211
212 static struct ch *
expand_es(void)213 expand_es(void)
214 {
215 int c, n, loop;
216 static struct ch och;
217 struct ch *pch;
218
219 pch = po_getc();
220 if (pch->eof) {
221 error(gettext(ERR_UNEXP_EOF),
222 cur_line, cur_po);
223 /* NOTREACHED */
224 }
225 if (pch->len > 1) {
226 /* not a valid escape sequence */
227 return (pch);
228 }
229
230 och.len = 1;
231 och.eof = 0;
232 switch (pch->buf[0]) {
233 case '"':
234 case '\\':
235 och.buf[0] = pch->buf[0];
236 break;
237 case 'b':
238 och.buf[0] = '\b';
239 break;
240 case 'f':
241 och.buf[0] = '\f';
242 break;
243 case 'n':
244 och.buf[0] = '\n';
245 break;
246 case 'r':
247 och.buf[0] = '\r';
248 break;
249 case 't':
250 och.buf[0] = '\t';
251 break;
252 case 'v':
253 och.buf[0] = '\v';
254 break;
255 case 'a':
256 och.buf[0] = '\a';
257 break;
258 case '0':
259 case '1':
260 case '2':
261 case '3':
262 case '4':
263 case '5':
264 case '6':
265 case '7':
266 /* octal */
267 c = pch->buf[0];
268 for (n = 0, loop = 0; ; ) {
269 n = n * 8 + c - '0';
270 loop++;
271 if (loop >= 3)
272 break;
273 pch = po_getc();
274 if (pch->eof) {
275 error(gettext(ERR_UNEXP_EOF),
276 cur_line, cur_po);
277 /* NOTREACHED */
278 }
279 if ((pch->len > 1) || (pch->buf[0] < '0') ||
280 (pch->buf[0] > '7'))
281 break;
282 c = pch->buf[0];
283 }
284 po_ungetc(pch);
285 och.buf[0] = (unsigned char)n;
286 break;
287 case 'x':
288 /* hex */
289 pch = po_getc();
290 if (pch->eof) {
291 error(gettext(ERR_UNEXP_EOF),
292 cur_line, cur_po);
293 /* NOTREACHED */
294 }
295 if (pch->len > 1) {
296 po_ungetc(pch);
297 och.buf[0] = 'x';
298 break;
299 }
300 c = pch->buf[0];
301 if (!isxdigit((unsigned char)c)) {
302 po_ungetc(pch);
303 och.buf[0] = 'x';
304 break;
305 }
306 if (isdigit((unsigned char)c)) {
307 n = c - '0';
308 } else if (isupper((unsigned char)c)) {
309 n = c - 'A' + 10;
310 } else {
311 n = c - 'a' + 10;
312 }
313
314 pch = po_getc();
315 if (pch->eof) {
316 error(gettext(ERR_UNEXP_EOF),
317 cur_line, cur_po);
318 /* NOTREACHED */
319 }
320 if (pch->len > 1) {
321 po_ungetc(pch);
322 och.buf[0] = (unsigned char)n;
323 break;
324 }
325 c = pch->buf[0];
326 if (!isxdigit((unsigned char)c)) {
327 po_ungetc(pch);
328 och.buf[0] = (unsigned char)n;
329 break;
330 }
331 n *= 16;
332 if (isdigit((unsigned char)c)) {
333 n += c - '0';
334 } else if (isupper((unsigned char)c)) {
335 n += c - 'A' + 10;
336 } else {
337 n += c - 'a' + 10;
338 }
339 och.buf[0] = (unsigned char)n;
340 break;
341
342 default:
343 och.buf[0] = pch->buf[0];
344 break;
345 }
346 return (&och);
347 }
348
349 int
yylex(void)350 yylex(void)
351 {
352 unsigned int uc;
353 struct ch *pch;
354 char *buf;
355 size_t buf_size, buf_pos;
356
357 for (; ; ) {
358 pch = po_getc();
359
360 if (pch->eof) {
361 /* EOF */
362 return (0);
363 }
364
365 if (pch->len > 1) {
366 /* multi byte */
367 yylval.c.len = pch->len;
368 (void) memcpy(yylval.c.buf, pch->buf, pch->len);
369 return (CHR);
370 }
371 /* single byte */
372 switch (pch->buf[0]) {
373 case ' ':
374 case '\t':
375 case '\n':
376 break;
377
378 case '#':
379 /* comment start */
380 buf_size = CBUFSIZE;
381 buf = (char *)Xmalloc(buf_size);
382 buf_pos = 0;
383 pch = po_getc();
384 while (!pch->eof &&
385 ((pch->len != 1) || (pch->buf[0] != '\n'))) {
386 if (buf_pos + pch->len + 1 > buf_size)
387 extend_buf(&buf, &buf_size, CBUFSIZE);
388 (void) memcpy(buf + buf_pos,
389 pch->buf, pch->len);
390 buf_pos += pch->len;
391 pch = po_getc();
392 }
393 buf[buf_pos] = '\0';
394 yylval.str = buf;
395 return (COMMENT);
396 /* NOTREACHED */
397
398 case '[':
399 case ']':
400 return (pch->buf[0]);
401 /* NOTREACHED */
402
403 case '"':
404 buf_size = MBUFSIZE;
405 buf = (char *)Xmalloc(buf_size);
406 buf_pos = 0;
407 for (; ; ) {
408 pch = po_getc();
409
410 if (pch->eof) {
411 /* EOF */
412 error(gettext(ERR_UNEXP_EOF),
413 cur_line, cur_po);
414 /* NOTREACHED */
415 }
416
417 if (pch->len == 1) {
418 uc = pch->buf[0];
419
420 if (uc == '\n') {
421 error(gettext(ERR_UNEXP_EOL),
422 cur_line, cur_po);
423 /* NOTREACHED */
424 }
425 if (uc == '"')
426 break;
427 if (uc == '\\')
428 pch = expand_es();
429 }
430 if (buf_pos + pch->len + 1 > buf_size)
431 extend_buf(&buf, &buf_size,
432 MBUFSIZE);
433 (void) memcpy(buf + buf_pos,
434 pch->buf, pch->len);
435 buf_pos += pch->len;
436 }
437
438 buf[buf_pos] = '\0';
439 yylval.str = buf;
440 return (STR);
441 /* NOTREACHED */
442
443 default:
444 uc = pch->buf[0];
445
446 if (isalpha(uc) || (uc == '_')) {
447 buf_size = KBUFSIZE;
448 buf = (char *)Xmalloc(buf_size);
449 buf_pos = 0;
450 buf[buf_pos++] = (char)uc;
451 pch = po_getc();
452 while (!pch->eof &&
453 (pch->len == 1) &&
454 (isalpha(uc = pch->buf[0]) ||
455 isdigit(uc) || (uc == '_'))) {
456 if (buf_pos + 1 + 1 > buf_size)
457 extend_buf(&buf, &buf_size,
458 KBUFSIZE);
459 buf[buf_pos++] = (char)uc;
460 pch = po_getc();
461 }
462 /* push back the last char */
463 po_ungetc(pch);
464 buf[buf_pos] = '\0';
465 yylval.str = buf;
466 if (buf_pos > MAX_KW_LEN) {
467 /* kbuf is longer than any keywords */
468 return (SYMBOL);
469 }
470 yylval.num = cur_line;
471 if (strcmp(buf, KW_DOMAIN) == 0) {
472 free(buf);
473 return (DOMAIN);
474 } else if (strcmp(buf, KW_MSGID) == 0) {
475 free(buf);
476 return (MSGID);
477 } else if (strcmp(buf, KW_MSGID_PLURAL) == 0) {
478 free(buf);
479 return (MSGID_PLURAL);
480 } else if (strcmp(buf, KW_MSGSTR) == 0) {
481 free(buf);
482 return (MSGSTR);
483 } else {
484 free(buf);
485 return (SYMBOL);
486 }
487 /* NOTREACHED */
488 }
489 if (isdigit(uc)) {
490 buf_size = NBUFSIZE;
491 buf = (char *)Xmalloc(buf_size);
492 buf_pos = 0;
493 buf[buf_pos++] = (char)uc;
494 pch = po_getc();
495 while (!pch->eof &&
496 (pch->len == 1) &&
497 isdigit(uc = pch->buf[0])) {
498 if (buf_pos + 1 + 1 > buf_size)
499 extend_buf(&buf, &buf_size,
500 NBUFSIZE);
501 buf[buf_pos++] = (char)uc;
502 pch = po_getc();
503 }
504 /* push back the last char */
505 po_ungetc(pch);
506 buf[buf_pos] = '\0';
507 yylval.num = atoi(buf);
508 free(buf);
509 return (NUM);
510 }
511 /* just a char */
512 yylval.c.len = 1;
513 yylval.c.buf[0] = uc;
514 return (CHR);
515 /* NOTREACHED */
516 }
517 }
518 }
519