1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2001, 2002 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include "gnu_msgfmt.h" 30 #include "gnu_lex.h" 31 #include "y.tab.h" 32 33 int cur_line = 1; 34 35 static char backbuf[MB_LEN_MAX]; 36 static int backlen = 0; 37 38 /* 39 * get_mb() returns one multibyte character. 40 * 41 * This function uses the iconv() function to find out one 42 * multibyte character from a sequence of bytes in the file stream. 43 * The conversion from the codeset specified in the PO file to UTF-8 44 * is performed. The funcition reads another byte and calls iconv(), 45 * until iconv() successfully returns as a valid UTF-8 character has 46 * been converted or returns EILSEQ. If iconv() successfully returned, 47 * the function returns the read bytes as one character. Otherwise, 48 * returns error. The string converted to UTF-8 in outbuf won't be 49 * used at all. 50 */ 51 static size_t 52 get_mb(unsigned char *tmpbuf, unsigned char fc) 53 { 54 int c; 55 char outbuf[8]; /* max size of a UTF-8 char */ 56 const char *inptr; 57 char *outptr; 58 size_t insize = 0, inlen, outlen, ret; 59 60 tmpbuf[insize++] = fc; /* size of tmpbuf is MB_LEN_MAX+1 */ 61 62 if (cd == (iconv_t)-1) { 63 /* no conversion */ 64 tmpbuf[insize] = '\0'; 65 return (insize); 66 } 67 68 for (; ; ) { 69 inptr = (const char *)tmpbuf; 70 outptr = &outbuf[0]; 71 inlen = insize; 72 outlen = sizeof (outbuf); 73 74 errno = 0; 75 ret = iconv(cd, &inptr, &inlen, &outptr, &outlen); 76 if (ret == (size_t)-1) { 77 /* iconv failed */ 78 switch (errno) { 79 case EILSEQ: 80 /* invalid character found */ 81 error(gettext(ERR_INVALID_CHAR), 82 cur_line, cur_po); 83 /* NOTREACHED */ 84 case EINVAL: 85 /* not enough input */ 86 if (insize == MB_LEN_MAX) { 87 /* invalid character found */ 88 error(gettext(ERR_INVALID_CHAR), 89 cur_line, cur_po); 90 /* NOTREACHED */ 91 } 92 c = getc(fp); 93 if (c == EOF) { 94 error(gettext(ERR_UNEXP_EOF), 95 cur_line, cur_po); 96 /* NOTREACHED */ 97 } 98 tmpbuf[insize++] = (unsigned char)c; 99 100 /* initialize the conversion */ 101 outptr = &outbuf[0]; 102 outlen = sizeof (outbuf); 103 (void) iconv(cd, NULL, NULL, &outptr, &outlen); 104 105 continue; 106 /* NOTREACHED */ 107 default: 108 /* should never happen */ 109 error(ERR_INTERNAL, 110 cur_line, cur_po); 111 /* NOTREACHED */ 112 } 113 /* NOTREACHED */ 114 } 115 tmpbuf[insize] = '\0'; 116 return (insize); 117 /* NOTRECHED */ 118 } 119 } 120 121 static void 122 po_uninput(int c) 123 { 124 (void) ungetc(c, fp); 125 if (c == '\n') 126 cur_line--; 127 } 128 129 static void 130 po_ungetc(struct ch *pch) 131 { 132 if (backlen) { 133 error(gettext(ERR_INTERNAL), cur_line, cur_po); 134 /* NOTREACHED */ 135 } 136 if (!pch->eof) { 137 backlen = pch->len; 138 (void) memcpy(backbuf, pch->buf, backlen); 139 } 140 } 141 142 static struct ch * 143 po_getc(void) 144 { 145 static struct ch och; 146 int c; 147 148 if (backlen) { 149 och.len = backlen; 150 (void) memcpy(och.buf, backbuf, backlen); 151 backlen = 0; 152 return (&och); 153 } 154 155 for (; ; ) { 156 c = getc(fp); 157 if (c == EOF) { 158 if (ferror(fp)) { 159 /* error happend */ 160 error(gettext(ERR_READ_FAILED), cur_po); 161 /* NOTREACHED */ 162 } 163 och.len = 0; 164 och.eof = 1; 165 return (&och); 166 } 167 if (c == '\\') { 168 c = getc(fp); 169 if (c == '\n') { 170 /* this newline should be escaped */ 171 cur_line++; 172 continue; 173 } else { 174 po_uninput(c); 175 och.len = 1; 176 och.eof = 0; 177 och.buf[0] = '\\'; 178 return (&och); 179 } 180 /* NOTREACHED */ 181 } 182 if (c == '\n') { 183 cur_line++; 184 och.len = 1; 185 och.eof = 0; 186 och.buf[0] = '\n'; 187 return (&och); 188 } 189 if (isascii((unsigned char)c)) { 190 /* single byte ascii */ 191 och.len = 1; 192 och.eof = 0; 193 och.buf[0] = (unsigned char)c; 194 return (&och); 195 } 196 197 och.len = get_mb(&och.buf[0], (unsigned char)c); 198 och.eof = 0; 199 return (&och); 200 } 201 /* NOTREACHED */ 202 } 203 204 static void 205 extend_buf(char **buf, size_t *size, size_t add) 206 { 207 char *tmp; 208 209 *size += add; 210 tmp = (char *)Xrealloc(*buf, *size); 211 *buf = tmp; 212 } 213 214 static struct ch * 215 expand_es(void) 216 { 217 int c, n, loop; 218 static struct ch och; 219 struct ch *pch; 220 221 pch = po_getc(); 222 if (pch->eof) { 223 error(gettext(ERR_UNEXP_EOF), 224 cur_line, cur_po); 225 /* NOTREACHED */ 226 } 227 if (pch->len > 1) { 228 /* not a valid escape sequence */ 229 return (pch); 230 } 231 232 och.len = 1; 233 och.eof = 0; 234 switch (pch->buf[0]) { 235 case '"': 236 case '\\': 237 och.buf[0] = pch->buf[0]; 238 break; 239 case 'b': 240 och.buf[0] = '\b'; 241 break; 242 case 'f': 243 och.buf[0] = '\f'; 244 break; 245 case 'n': 246 och.buf[0] = '\n'; 247 break; 248 case 'r': 249 och.buf[0] = '\r'; 250 break; 251 case 't': 252 och.buf[0] = '\t'; 253 break; 254 case 'v': 255 och.buf[0] = '\v'; 256 break; 257 case 'a': 258 och.buf[0] = '\a'; 259 break; 260 case '0': 261 case '1': 262 case '2': 263 case '3': 264 case '4': 265 case '5': 266 case '6': 267 case '7': 268 /* octal */ 269 c = pch->buf[0]; 270 for (n = 0, loop = 0; ; ) { 271 n = n * 8 + c - '0'; 272 loop++; 273 if (loop >= 3) 274 break; 275 pch = po_getc(); 276 if (pch->eof) { 277 error(gettext(ERR_UNEXP_EOF), 278 cur_line, cur_po); 279 /* NOTREACHED */ 280 } 281 if ((pch->len > 1) || (pch->buf[0] < '0') || 282 (pch->buf[0] > '7')) 283 break; 284 c = pch->buf[0]; 285 } 286 po_ungetc(pch); 287 och.buf[0] = (unsigned char)n; 288 break; 289 case 'x': 290 /* hex */ 291 pch = po_getc(); 292 if (pch->eof) { 293 error(gettext(ERR_UNEXP_EOF), 294 cur_line, cur_po); 295 /* NOTREACHED */ 296 } 297 if (pch->len > 1) { 298 po_ungetc(pch); 299 och.buf[0] = 'x'; 300 break; 301 } 302 c = pch->buf[0]; 303 if (!isxdigit((unsigned char)c)) { 304 po_ungetc(pch); 305 och.buf[0] = 'x'; 306 break; 307 } 308 if (isdigit((unsigned char)c)) { 309 n = c - '0'; 310 } else if (isupper((unsigned char)c)) { 311 n = c - 'A' + 10; 312 } else { 313 n = c - 'a' + 10; 314 } 315 316 pch = po_getc(); 317 if (pch->eof) { 318 error(gettext(ERR_UNEXP_EOF), 319 cur_line, cur_po); 320 /* NOTREACHED */ 321 } 322 if (pch->len > 1) { 323 po_ungetc(pch); 324 och.buf[0] = (unsigned char)n; 325 break; 326 } 327 c = pch->buf[0]; 328 if (!isxdigit((unsigned char)c)) { 329 po_ungetc(pch); 330 och.buf[0] = (unsigned char)n; 331 break; 332 } 333 n *= 16; 334 if (isdigit((unsigned char)c)) { 335 n += c - '0'; 336 } else if (isupper((unsigned char)c)) { 337 n += c - 'A' + 10; 338 } else { 339 n += c - 'a' + 10; 340 } 341 och.buf[0] = (unsigned char)n; 342 break; 343 344 default: 345 och.buf[0] = pch->buf[0]; 346 break; 347 } 348 return (&och); 349 } 350 351 int 352 yylex(void) 353 { 354 unsigned int uc; 355 struct ch *pch; 356 char *buf; 357 size_t buf_size, buf_pos; 358 359 for (; ; ) { 360 pch = po_getc(); 361 362 if (pch->eof) { 363 /* EOF */ 364 return (0); 365 } 366 367 if (pch->len > 1) { 368 /* multi byte */ 369 yylval.c.len = pch->len; 370 (void) memcpy(yylval.c.buf, pch->buf, pch->len); 371 return (CHR); 372 } 373 /* single byte */ 374 switch (pch->buf[0]) { 375 case ' ': 376 case '\t': 377 case '\n': 378 break; 379 380 case '#': 381 /* comment start */ 382 buf_size = CBUFSIZE; 383 buf = (char *)Xmalloc(buf_size); 384 buf_pos = 0; 385 pch = po_getc(); 386 while (!pch->eof && 387 ((pch->len != 1) || (pch->buf[0] != '\n'))) { 388 if (buf_pos + pch->len + 1 > buf_size) 389 extend_buf(&buf, &buf_size, CBUFSIZE); 390 (void) memcpy(buf + buf_pos, 391 pch->buf, pch->len); 392 buf_pos += pch->len; 393 pch = po_getc(); 394 } 395 buf[buf_pos] = '\0'; 396 yylval.str = buf; 397 return (COMMENT); 398 /* NOTREACHED */ 399 400 case '[': 401 case ']': 402 return (pch->buf[0]); 403 /* NOTREACHED */ 404 405 case '"': 406 buf_size = MBUFSIZE; 407 buf = (char *)Xmalloc(buf_size); 408 buf_pos = 0; 409 for (; ; ) { 410 pch = po_getc(); 411 412 if (pch->eof) { 413 /* EOF */ 414 error(gettext(ERR_UNEXP_EOF), 415 cur_line, cur_po); 416 /* NOTREACHED */ 417 } 418 419 if (pch->len == 1) { 420 uc = pch->buf[0]; 421 422 if (uc == '\n') { 423 error(gettext(ERR_UNEXP_EOL), 424 cur_line, cur_po); 425 /* NOTREACHED */ 426 } 427 if (uc == '"') 428 break; 429 if (uc == '\\') 430 pch = expand_es(); 431 } 432 if (buf_pos + pch->len + 1 > buf_size) 433 extend_buf(&buf, &buf_size, 434 MBUFSIZE); 435 (void) memcpy(buf + buf_pos, 436 pch->buf, pch->len); 437 buf_pos += pch->len; 438 } 439 440 buf[buf_pos] = '\0'; 441 yylval.str = buf; 442 return (STR); 443 /* NOTREACHED */ 444 445 default: 446 uc = pch->buf[0]; 447 448 if (isalpha(uc) || (uc == '_')) { 449 buf_size = KBUFSIZE; 450 buf = (char *)Xmalloc(buf_size); 451 buf_pos = 0; 452 buf[buf_pos++] = (char)uc; 453 pch = po_getc(); 454 while (!pch->eof && 455 (pch->len == 1) && 456 (isalpha(uc = pch->buf[0]) || 457 isdigit(uc) || (uc == '_'))) { 458 if (buf_pos + 1 + 1 > buf_size) 459 extend_buf(&buf, &buf_size, 460 KBUFSIZE); 461 buf[buf_pos++] = (char)uc; 462 pch = po_getc(); 463 } 464 /* push back the last char */ 465 po_ungetc(pch); 466 buf[buf_pos] = '\0'; 467 yylval.str = buf; 468 if (buf_pos > MAX_KW_LEN) { 469 /* kbuf is longer than any keywords */ 470 return (SYMBOL); 471 } 472 yylval.num = cur_line; 473 if (strcmp(buf, KW_DOMAIN) == 0) { 474 free(buf); 475 return (DOMAIN); 476 } else if (strcmp(buf, KW_MSGID) == 0) { 477 free(buf); 478 return (MSGID); 479 } else if (strcmp(buf, KW_MSGID_PLURAL) == 0) { 480 free(buf); 481 return (MSGID_PLURAL); 482 } else if (strcmp(buf, KW_MSGSTR) == 0) { 483 free(buf); 484 return (MSGSTR); 485 } else { 486 free(buf); 487 return (SYMBOL); 488 } 489 /* NOTREACHED */ 490 } 491 if (isdigit(uc)) { 492 buf_size = NBUFSIZE; 493 buf = (char *)Xmalloc(buf_size); 494 buf_pos = 0; 495 buf[buf_pos++] = (char)uc; 496 pch = po_getc(); 497 while (!pch->eof && 498 (pch->len == 1) && 499 isdigit(uc = pch->buf[0])) { 500 if (buf_pos + 1 + 1 > buf_size) 501 extend_buf(&buf, &buf_size, 502 NBUFSIZE); 503 buf[buf_pos++] = (char)uc; 504 pch = po_getc(); 505 } 506 /* push back the last char */ 507 po_ungetc(pch); 508 buf[buf_pos] = '\0'; 509 yylval.num = atoi(buf); 510 free(buf); 511 return (NUM); 512 } 513 /* just a char */ 514 yylval.c.len = 1; 515 yylval.c.buf[0] = uc; 516 return (CHR); 517 /* NOTREACHED */ 518 } 519 } 520 } 521