1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2001, 2002 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include "gnu_msgfmt.h" 28 #include "gnu_lex.h" 29 #include "y.tab.h" 30 31 int cur_line = 1; 32 33 static char backbuf[MB_LEN_MAX]; 34 static int backlen = 0; 35 36 /* 37 * get_mb() returns one multibyte character. 38 * 39 * This function uses the iconv() function to find out one 40 * multibyte character from a sequence of bytes in the file stream. 41 * The conversion from the codeset specified in the PO file to UTF-8 42 * is performed. The funcition reads another byte and calls iconv(), 43 * until iconv() successfully returns as a valid UTF-8 character has 44 * been converted or returns EILSEQ. If iconv() successfully returned, 45 * the function returns the read bytes as one character. Otherwise, 46 * returns error. The string converted to UTF-8 in outbuf won't be 47 * used at all. 48 */ 49 static size_t 50 get_mb(unsigned char *tmpbuf, unsigned char fc) 51 { 52 int c; 53 char outbuf[8]; /* max size of a UTF-8 char */ 54 const char *inptr; 55 char *outptr; 56 size_t insize = 0, inlen, outlen, ret; 57 58 tmpbuf[insize++] = fc; /* size of tmpbuf is MB_LEN_MAX+1 */ 59 60 if (cd == (iconv_t)-1) { 61 /* no conversion */ 62 tmpbuf[insize] = '\0'; 63 return (insize); 64 } 65 66 for (; ; ) { 67 inptr = (const char *)tmpbuf; 68 outptr = &outbuf[0]; 69 inlen = insize; 70 outlen = sizeof (outbuf); 71 72 errno = 0; 73 ret = iconv(cd, &inptr, &inlen, &outptr, &outlen); 74 if (ret == (size_t)-1) { 75 /* iconv failed */ 76 switch (errno) { 77 case EILSEQ: 78 /* invalid character found */ 79 error(gettext(ERR_INVALID_CHAR), 80 cur_line, cur_po); 81 /* NOTREACHED */ 82 case EINVAL: 83 /* not enough input */ 84 if (insize == MB_LEN_MAX) { 85 /* invalid character found */ 86 error(gettext(ERR_INVALID_CHAR), 87 cur_line, cur_po); 88 /* NOTREACHED */ 89 } 90 c = getc(fp); 91 if (c == EOF) { 92 error(gettext(ERR_UNEXP_EOF), 93 cur_line, cur_po); 94 /* NOTREACHED */ 95 } 96 tmpbuf[insize++] = (unsigned char)c; 97 98 /* initialize the conversion */ 99 outptr = &outbuf[0]; 100 outlen = sizeof (outbuf); 101 (void) iconv(cd, NULL, NULL, &outptr, &outlen); 102 103 continue; 104 /* NOTREACHED */ 105 default: 106 /* should never happen */ 107 error(ERR_INTERNAL, 108 cur_line, cur_po); 109 /* NOTREACHED */ 110 } 111 /* NOTREACHED */ 112 } 113 tmpbuf[insize] = '\0'; 114 return (insize); 115 /* NOTRECHED */ 116 } 117 } 118 119 static void 120 po_uninput(int c) 121 { 122 (void) ungetc(c, fp); 123 if (c == '\n') 124 cur_line--; 125 } 126 127 static void 128 po_ungetc(struct ch *pch) 129 { 130 if (backlen) { 131 error(gettext(ERR_INTERNAL), cur_line, cur_po); 132 /* NOTREACHED */ 133 } 134 if (!pch->eof) { 135 backlen = pch->len; 136 (void) memcpy(backbuf, pch->buf, backlen); 137 } 138 } 139 140 static struct ch * 141 po_getc(void) 142 { 143 static struct ch och; 144 int c; 145 146 if (backlen) { 147 och.len = backlen; 148 (void) memcpy(och.buf, backbuf, backlen); 149 backlen = 0; 150 return (&och); 151 } 152 153 for (; ; ) { 154 c = getc(fp); 155 if (c == EOF) { 156 if (ferror(fp)) { 157 /* error happend */ 158 error(gettext(ERR_READ_FAILED), cur_po); 159 /* NOTREACHED */ 160 } 161 och.len = 0; 162 och.eof = 1; 163 return (&och); 164 } 165 if (c == '\\') { 166 c = getc(fp); 167 if (c == '\n') { 168 /* this newline should be escaped */ 169 cur_line++; 170 continue; 171 } else { 172 po_uninput(c); 173 och.len = 1; 174 och.eof = 0; 175 och.buf[0] = '\\'; 176 return (&och); 177 } 178 /* NOTREACHED */ 179 } 180 if (c == '\n') { 181 cur_line++; 182 och.len = 1; 183 och.eof = 0; 184 och.buf[0] = '\n'; 185 return (&och); 186 } 187 if (isascii((unsigned char)c)) { 188 /* single byte ascii */ 189 och.len = 1; 190 och.eof = 0; 191 och.buf[0] = (unsigned char)c; 192 return (&och); 193 } 194 195 och.len = get_mb(&och.buf[0], (unsigned char)c); 196 och.eof = 0; 197 return (&och); 198 } 199 /* NOTREACHED */ 200 } 201 202 static void 203 extend_buf(char **buf, size_t *size, size_t add) 204 { 205 char *tmp; 206 207 *size += add; 208 tmp = (char *)Xrealloc(*buf, *size); 209 *buf = tmp; 210 } 211 212 static struct ch * 213 expand_es(void) 214 { 215 int c, n, loop; 216 static struct ch och; 217 struct ch *pch; 218 219 pch = po_getc(); 220 if (pch->eof) { 221 error(gettext(ERR_UNEXP_EOF), 222 cur_line, cur_po); 223 /* NOTREACHED */ 224 } 225 if (pch->len > 1) { 226 /* not a valid escape sequence */ 227 return (pch); 228 } 229 230 och.len = 1; 231 och.eof = 0; 232 switch (pch->buf[0]) { 233 case '"': 234 case '\\': 235 och.buf[0] = pch->buf[0]; 236 break; 237 case 'b': 238 och.buf[0] = '\b'; 239 break; 240 case 'f': 241 och.buf[0] = '\f'; 242 break; 243 case 'n': 244 och.buf[0] = '\n'; 245 break; 246 case 'r': 247 och.buf[0] = '\r'; 248 break; 249 case 't': 250 och.buf[0] = '\t'; 251 break; 252 case 'v': 253 och.buf[0] = '\v'; 254 break; 255 case 'a': 256 och.buf[0] = '\a'; 257 break; 258 case '0': 259 case '1': 260 case '2': 261 case '3': 262 case '4': 263 case '5': 264 case '6': 265 case '7': 266 /* octal */ 267 c = pch->buf[0]; 268 for (n = 0, loop = 0; ; ) { 269 n = n * 8 + c - '0'; 270 loop++; 271 if (loop >= 3) 272 break; 273 pch = po_getc(); 274 if (pch->eof) { 275 error(gettext(ERR_UNEXP_EOF), 276 cur_line, cur_po); 277 /* NOTREACHED */ 278 } 279 if ((pch->len > 1) || (pch->buf[0] < '0') || 280 (pch->buf[0] > '7')) 281 break; 282 c = pch->buf[0]; 283 } 284 po_ungetc(pch); 285 och.buf[0] = (unsigned char)n; 286 break; 287 case 'x': 288 /* hex */ 289 pch = po_getc(); 290 if (pch->eof) { 291 error(gettext(ERR_UNEXP_EOF), 292 cur_line, cur_po); 293 /* NOTREACHED */ 294 } 295 if (pch->len > 1) { 296 po_ungetc(pch); 297 och.buf[0] = 'x'; 298 break; 299 } 300 c = pch->buf[0]; 301 if (!isxdigit((unsigned char)c)) { 302 po_ungetc(pch); 303 och.buf[0] = 'x'; 304 break; 305 } 306 if (isdigit((unsigned char)c)) { 307 n = c - '0'; 308 } else if (isupper((unsigned char)c)) { 309 n = c - 'A' + 10; 310 } else { 311 n = c - 'a' + 10; 312 } 313 314 pch = po_getc(); 315 if (pch->eof) { 316 error(gettext(ERR_UNEXP_EOF), 317 cur_line, cur_po); 318 /* NOTREACHED */ 319 } 320 if (pch->len > 1) { 321 po_ungetc(pch); 322 och.buf[0] = (unsigned char)n; 323 break; 324 } 325 c = pch->buf[0]; 326 if (!isxdigit((unsigned char)c)) { 327 po_ungetc(pch); 328 och.buf[0] = (unsigned char)n; 329 break; 330 } 331 n *= 16; 332 if (isdigit((unsigned char)c)) { 333 n += c - '0'; 334 } else if (isupper((unsigned char)c)) { 335 n += c - 'A' + 10; 336 } else { 337 n += c - 'a' + 10; 338 } 339 och.buf[0] = (unsigned char)n; 340 break; 341 342 default: 343 och.buf[0] = pch->buf[0]; 344 break; 345 } 346 return (&och); 347 } 348 349 int 350 yylex(void) 351 { 352 unsigned int uc; 353 struct ch *pch; 354 char *buf; 355 size_t buf_size, buf_pos; 356 357 for (; ; ) { 358 pch = po_getc(); 359 360 if (pch->eof) { 361 /* EOF */ 362 return (0); 363 } 364 365 if (pch->len > 1) { 366 /* multi byte */ 367 yylval.c.len = pch->len; 368 (void) memcpy(yylval.c.buf, pch->buf, pch->len); 369 return (CHR); 370 } 371 /* single byte */ 372 switch (pch->buf[0]) { 373 case ' ': 374 case '\t': 375 case '\n': 376 break; 377 378 case '#': 379 /* comment start */ 380 buf_size = CBUFSIZE; 381 buf = (char *)Xmalloc(buf_size); 382 buf_pos = 0; 383 pch = po_getc(); 384 while (!pch->eof && 385 ((pch->len != 1) || (pch->buf[0] != '\n'))) { 386 if (buf_pos + pch->len + 1 > buf_size) 387 extend_buf(&buf, &buf_size, CBUFSIZE); 388 (void) memcpy(buf + buf_pos, 389 pch->buf, pch->len); 390 buf_pos += pch->len; 391 pch = po_getc(); 392 } 393 buf[buf_pos] = '\0'; 394 yylval.str = buf; 395 return (COMMENT); 396 /* NOTREACHED */ 397 398 case '[': 399 case ']': 400 return (pch->buf[0]); 401 /* NOTREACHED */ 402 403 case '"': 404 buf_size = MBUFSIZE; 405 buf = (char *)Xmalloc(buf_size); 406 buf_pos = 0; 407 for (; ; ) { 408 pch = po_getc(); 409 410 if (pch->eof) { 411 /* EOF */ 412 error(gettext(ERR_UNEXP_EOF), 413 cur_line, cur_po); 414 /* NOTREACHED */ 415 } 416 417 if (pch->len == 1) { 418 uc = pch->buf[0]; 419 420 if (uc == '\n') { 421 error(gettext(ERR_UNEXP_EOL), 422 cur_line, cur_po); 423 /* NOTREACHED */ 424 } 425 if (uc == '"') 426 break; 427 if (uc == '\\') 428 pch = expand_es(); 429 } 430 if (buf_pos + pch->len + 1 > buf_size) 431 extend_buf(&buf, &buf_size, 432 MBUFSIZE); 433 (void) memcpy(buf + buf_pos, 434 pch->buf, pch->len); 435 buf_pos += pch->len; 436 } 437 438 buf[buf_pos] = '\0'; 439 yylval.str = buf; 440 return (STR); 441 /* NOTREACHED */ 442 443 default: 444 uc = pch->buf[0]; 445 446 if (isalpha(uc) || (uc == '_')) { 447 buf_size = KBUFSIZE; 448 buf = (char *)Xmalloc(buf_size); 449 buf_pos = 0; 450 buf[buf_pos++] = (char)uc; 451 pch = po_getc(); 452 while (!pch->eof && 453 (pch->len == 1) && 454 (isalpha(uc = pch->buf[0]) || 455 isdigit(uc) || (uc == '_'))) { 456 if (buf_pos + 1 + 1 > buf_size) 457 extend_buf(&buf, &buf_size, 458 KBUFSIZE); 459 buf[buf_pos++] = (char)uc; 460 pch = po_getc(); 461 } 462 /* push back the last char */ 463 po_ungetc(pch); 464 buf[buf_pos] = '\0'; 465 yylval.str = buf; 466 if (buf_pos > MAX_KW_LEN) { 467 /* kbuf is longer than any keywords */ 468 return (SYMBOL); 469 } 470 yylval.num = cur_line; 471 if (strcmp(buf, KW_DOMAIN) == 0) { 472 free(buf); 473 return (DOMAIN); 474 } else if (strcmp(buf, KW_MSGID) == 0) { 475 free(buf); 476 return (MSGID); 477 } else if (strcmp(buf, KW_MSGID_PLURAL) == 0) { 478 free(buf); 479 return (MSGID_PLURAL); 480 } else if (strcmp(buf, KW_MSGSTR) == 0) { 481 free(buf); 482 return (MSGSTR); 483 } else { 484 free(buf); 485 return (SYMBOL); 486 } 487 /* NOTREACHED */ 488 } 489 if (isdigit(uc)) { 490 buf_size = NBUFSIZE; 491 buf = (char *)Xmalloc(buf_size); 492 buf_pos = 0; 493 buf[buf_pos++] = (char)uc; 494 pch = po_getc(); 495 while (!pch->eof && 496 (pch->len == 1) && 497 isdigit(uc = pch->buf[0])) { 498 if (buf_pos + 1 + 1 > buf_size) 499 extend_buf(&buf, &buf_size, 500 NBUFSIZE); 501 buf[buf_pos++] = (char)uc; 502 pch = po_getc(); 503 } 504 /* push back the last char */ 505 po_ungetc(pch); 506 buf[buf_pos] = '\0'; 507 yylval.num = atoi(buf); 508 free(buf); 509 return (NUM); 510 } 511 /* just a char */ 512 yylval.c.len = 1; 513 yylval.c.buf[0] = uc; 514 return (CHR); 515 /* NOTREACHED */ 516 } 517 } 518 } 519