1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include "lint.h" 28 #include "mtlib.h" 29 #include <ctype.h> 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <sys/types.h> 34 #include <sys/mman.h> 35 #include <sys/param.h> 36 #include <sys/stat.h> 37 #include <thread.h> 38 #include <synch.h> 39 #include <unistd.h> 40 #include <limits.h> 41 #include <errno.h> 42 #include <inttypes.h> 43 #include "libc.h" 44 #include "msgfmt.h" 45 #include "nlspath_checks.h" 46 #include "gettext.h" 47 48 /* The following symbols are just for GNU binary compatibility */ 49 int _nl_msg_cat_cntr; 50 int *_nl_domain_bindings; 51 52 static const char *nullstr = ""; 53 54 #define CHARSET_MOD "charset=" 55 #define CHARSET_LEN (sizeof (CHARSET_MOD) - 1) 56 #define NPLURALS_MOD "nplurals=" 57 #define NPLURALS_LEN (sizeof (NPLURALS_MOD) - 1) 58 #define PLURAL_MOD "plural=" 59 #define PLURAL_LEN (sizeof (PLURAL_MOD) - 1) 60 61 static uint32_t get_hash_index(uint32_t *, uint32_t, uint32_t); 62 63 /* 64 * free_conv_msgstr 65 * 66 * release the memory allocated for storing code-converted messages 67 * 68 * f 69 * 0: do not free gmnp->conv_msgstr 70 * 1: free gmnp->conv_msgstr 71 */ 72 static void 73 free_conv_msgstr(Msg_g_node *gmnp, int f) 74 { 75 uint32_t i, num_of_conv; 76 77 #ifdef GETTEXT_DEBUG 78 gprintf(0, "*************** free_conv_msgstr(0x%p, %d)\n", 79 (void *)gmnp, f); 80 printgnumsg(gmnp, 1); 81 #endif 82 83 num_of_conv = gmnp->num_of_str + gmnp->num_of_d_str; 84 for (i = 0; i < num_of_conv; i++) { 85 if (gmnp->conv_msgstr[i]) { 86 free(gmnp->conv_msgstr[i]); 87 } 88 gmnp->conv_msgstr[i] = NULL; 89 } 90 if (f) { 91 free(gmnp->conv_msgstr); 92 gmnp->conv_msgstr = NULL; 93 } 94 } 95 96 /* 97 * dfltmsgstr 98 * 99 * choose an appropriate message by evaluating the plural expression, 100 * and return it. 101 */ 102 static char * 103 dfltmsgstr(Msg_g_node *gmnp, const char *msgstr, uint32_t msgstr_len, 104 struct msg_pack *mp) 105 { 106 unsigned int pindex; 107 size_t len; 108 const char *p; 109 110 #ifdef GETTEXT_DEBUG 111 gprintf(0, "*************** dfltmsgstr(0x%p, \"%s\", %u, 0x%p)\n", 112 (void *)gmnp, 113 msgstr ? msgstr : "(null)", msgstr_len, (void *)mp); 114 printgnumsg(gmnp, 1); 115 printmp(mp, 1); 116 #endif 117 118 if (mp->plural) { 119 if (gmnp->plural) { 120 pindex = plural_eval(gmnp->plural, mp->n); 121 } else { 122 /* 123 * This mo does not have plural information. 124 * Using the English form. 125 */ 126 if (mp->n == 1) 127 pindex = 0; 128 else 129 pindex = 1; 130 } 131 #ifdef GETTEXT_DEBUG 132 gprintf(0, "plural_eval returned: %u\n", pindex); 133 #endif 134 if (pindex >= gmnp->nplurals) { 135 /* should never happen */ 136 pindex = 0; 137 } 138 p = msgstr; 139 for (; pindex != 0; pindex--) { 140 len = msgstr_len - (p - msgstr); 141 p = memchr(p, '\0', len); 142 if (p == NULL) { 143 /* 144 * null byte not found 145 * this should never happen 146 */ 147 char *result; 148 DFLTMSG(result, mp->msgid1, mp->msgid2, 149 mp->n, mp->plural); 150 return (result); 151 } 152 p++; /* skip */ 153 } 154 return ((char *)p); 155 } 156 157 return ((char *)msgstr); 158 } 159 160 /* 161 * parse_header 162 * 163 * parse the header entry of the GNU MO file and 164 * extract the src encoding and the plural information of the MO file 165 */ 166 static int 167 parse_header(const char *header, Msg_g_node *gmnp) 168 { 169 char *charset = NULL; 170 char *charset_str; 171 size_t len; 172 char *nplurals_str, *plural_str; 173 plural_expr_t plural; 174 char *p, *q; 175 unsigned int nplurals; 176 int ret; 177 178 #ifdef GETTEXT_DEBUG 179 gprintf(0, "*************** parse_header(\"%s\", 0x%p)\n", 180 header ? header : "(null)", (void *)gmnp); 181 printgnumsg(gmnp, 1); 182 #endif 183 184 if (header == NULL) { 185 gmnp->src_encoding = (char *)nullstr; 186 gmnp->nplurals = 2; 187 gmnp->plural = NULL; 188 #ifdef GETTEXT_DEBUG 189 gprintf(0, "*************** exiting parse_header\n"); 190 gprintf(0, "no header\n"); 191 #endif 192 193 return (0); 194 } 195 196 charset_str = strstr(header, CHARSET_MOD); 197 if (charset_str == NULL) { 198 gmnp->src_encoding = (char *)nullstr; 199 } else { 200 p = charset_str + CHARSET_LEN; 201 q = p; 202 while ((*q != ' ') && (*q != '\t') && 203 (*q != '\n')) { 204 q++; 205 } 206 len = q - p; 207 if (len > 0) { 208 charset = malloc(len + 1); 209 if (charset == NULL) { 210 gmnp->src_encoding = (char *)nullstr; 211 gmnp->nplurals = 2; 212 gmnp->plural = NULL; 213 return (-1); 214 } 215 (void) memcpy(charset, p, len); 216 charset[len] = '\0'; 217 gmnp->src_encoding = charset; 218 } else { 219 gmnp->src_encoding = (char *)nullstr; 220 } 221 } 222 223 nplurals_str = strstr(header, NPLURALS_MOD); 224 plural_str = strstr(header, PLURAL_MOD); 225 if (nplurals_str == NULL || plural_str == NULL) { 226 /* no valid plural specification */ 227 gmnp->nplurals = 2; 228 gmnp->plural = NULL; 229 #ifdef GETTEXT_DEBUG 230 gprintf(0, "*************** exiting parse_header\n"); 231 gprintf(0, "no plural entry\n"); 232 #endif 233 return (0); 234 } else { 235 p = nplurals_str + NPLURALS_LEN; 236 while (*p && isspace((unsigned char)*p)) { 237 p++; 238 } 239 nplurals = (unsigned int)strtol(p, &q, 10); 240 if (p != q) { 241 gmnp->nplurals = nplurals; 242 } else { 243 gmnp->nplurals = 2; 244 } 245 246 p = plural_str + PLURAL_LEN; 247 #ifdef GETTEXT_DEBUG 248 gprintf(0, "plural_str: \"%s\"\n", p); 249 #endif 250 251 ret = plural_expr(&plural, (const char *)p); 252 if (ret == 0) { 253 /* parse succeeded */ 254 gmnp->plural = plural; 255 #ifdef GETTEXT_DEBUG 256 gprintf(0, "*************** exiting parse_header\n"); 257 gprintf(0, "charset: \"%s\"\n", 258 charset ? charset : "(null)"); 259 printexpr(plural, 1); 260 #endif 261 return (0); 262 } else if (ret == 1) { 263 /* parse error */ 264 gmnp->nplurals = 2; 265 gmnp->plural = NULL; 266 return (0); 267 } else { 268 /* fatal error */ 269 if (charset) 270 free(charset); 271 gmnp->src_encoding = (char *)nullstr; 272 gmnp->nplurals = 2; 273 gmnp->plural = NULL; 274 return (-1); 275 } 276 } 277 /* NOTREACHED */ 278 } 279 280 /* 281 * handle_lang 282 * 283 * take care of the LANGUAGE specification 284 */ 285 char * 286 handle_lang(struct msg_pack *mp) 287 { 288 const char *p, *op, *q; 289 size_t locale_len; 290 char *result; 291 char locale[MAXPATHLEN]; 292 293 294 #ifdef GETTEXT_DEBUG 295 gprintf(0, "*************** handle_lang(0x%p)\n", (void *)mp); 296 printmp(mp, 1); 297 #endif 298 299 p = mp->language; 300 301 while (*p) { 302 op = p; 303 q = strchr(p, ':'); 304 if (q == NULL) { 305 locale_len = strlen(p); 306 p += locale_len; 307 } else { 308 locale_len = q - p; 309 p += locale_len + 1; 310 } 311 if (locale_len >= MAXPATHLEN || locale_len == 0) { 312 /* illegal locale name */ 313 continue; 314 } 315 (void) memcpy(locale, op, locale_len); 316 locale[locale_len] = '\0'; 317 mp->locale = locale; 318 319 #ifdef GETTEXT_DEBUG 320 *mp->msgfile = '\0'; 321 #endif 322 if (mk_msgfile(mp) == NULL) { 323 /* illegal locale name */ 324 continue; 325 } 326 327 result = handle_mo(mp); 328 if (mp->status & ST_GNU_MSG_FOUND) 329 return (result); 330 331 if (mp->status & ST_SUN_MO_FOUND) 332 break; 333 } 334 335 /* 336 * no valid locale found, Sun MO found, or 337 * GNU MO found but no valid msg found there. 338 */ 339 340 if (mp->status & ST_GNU_MO_FOUND) { 341 /* 342 * GNU MO found but no valid msg found there. 343 * returning DFLTMSG. 344 */ 345 DFLTMSG(result, mp->msgid1, mp->msgid2, mp->n, mp->plural); 346 return (result); 347 } 348 return (NULL); 349 } 350 351 /* 352 * gnu_msgsearch 353 * 354 * Searchs the translation message for the specified msgid1. 355 * Hash algorithm used in this function is Open Addressing 356 * with Double Hashing: 357 * H(k, i) = (H1(k) + i * H2(k)) mod M 358 * H1(k) = hashvalue % M 359 * H2(k) = 1 + (hashvalue % (M - 2)) 360 * 361 * Ref: The Art of Computer Programming Volume 3 362 * Sorting and Searching, second edition 363 * Donald E Knuth 364 */ 365 static char * 366 gnu_msgsearch(Msg_g_node *gmnp, const char *msgid1, 367 uint32_t *msgstrlen, uint32_t *midx) 368 { 369 struct gnu_msg_info *header = gmnp->msg_file_info; 370 struct gnu_msg_ent *msgid_tbl, *msgstr_tbl; 371 uint32_t num_of_str, idx, mlen, msglen; 372 uint32_t hash_size, hash_val, hash_id, hash_inc, hash_idx; 373 uint32_t *hash_table; 374 char *base; 375 char *msg; 376 377 #ifdef GETTEXT_DEBUG 378 gprintf(0, "*************** gnu_msgsearch(0x%p, \"%s\", " 379 "0x%p, 0x%p)\n", 380 (void *)gmnp, msgid1, msgstrlen, midx); 381 printgnumsg(gmnp, 1); 382 #endif 383 384 base = (char *)header; 385 386 msgid_tbl = gmnp->msg_tbl[MSGID]; 387 msgstr_tbl = gmnp->msg_tbl[MSGSTR]; 388 hash_table = gmnp->hash_table; 389 hash_size = gmnp->hash_size; 390 num_of_str = gmnp->num_of_str; 391 392 if (!(gmnp->flag & ST_REV1) && 393 (hash_table == NULL || (hash_size <= 2))) { 394 /* 395 * Revision 0 and 396 * No hash table exists or 397 * hash size is enough small. 398 */ 399 uint32_t top, bottom; 400 char *msg_id_str; 401 int val; 402 403 top = 0; 404 bottom = num_of_str; 405 while (top < bottom) { 406 idx = (top + bottom) / 2; 407 msg_id_str = base + 408 SWAP(gmnp, msgid_tbl[idx].offset); 409 410 val = strcmp(msg_id_str, msgid1); 411 if (val < 0) { 412 top = idx + 1; 413 } else if (val > 0) { 414 bottom = idx; 415 } else { 416 *msgstrlen = (unsigned int) 417 SWAP(gmnp, msgstr_tbl[idx].len) + 1; 418 *midx = idx; 419 return (base + 420 SWAP(gmnp, msgstr_tbl[idx].offset)); 421 } 422 } 423 /* not found */ 424 return ((char *)msgid1); 425 } 426 427 /* use hash table */ 428 hash_id = get_hashid(msgid1, &msglen); 429 hash_idx = hash_id % hash_size; 430 hash_inc = 1 + (hash_id % (hash_size - 2)); 431 432 for (;;) { 433 hash_val = HASH_TBL(gmnp, hash_table[hash_idx]); 434 435 if (hash_val == 0) { 436 /* not found */ 437 return ((char *)msgid1); 438 } 439 if (hash_val <= num_of_str) { 440 /* static message */ 441 idx = hash_val - 1; 442 mlen = SWAP(gmnp, msgid_tbl[idx].len); 443 msg = base + SWAP(gmnp, msgid_tbl[idx].offset); 444 } else { 445 if (!(gmnp->flag & ST_REV1)) { 446 /* rev 0 does not have dynamic message */ 447 return ((char *)msgid1); 448 } 449 /* dynamic message */ 450 idx = hash_val - num_of_str - 1; 451 mlen = gmnp->d_msg[MSGID][idx].len; 452 msg = gmnp->mchunk + gmnp->d_msg[MSGID][idx].offset; 453 } 454 if (msglen <= mlen && strcmp(msgid1, msg) == 0) { 455 /* found */ 456 break; 457 } 458 hash_idx = (hash_idx + hash_inc) % hash_size; 459 } 460 461 /* msgstrlen should include a null termination */ 462 if (hash_val <= num_of_str) { 463 *msgstrlen = SWAP(gmnp, msgstr_tbl[idx].len) + 1; 464 msg = base + SWAP(gmnp, msgstr_tbl[idx].offset); 465 *midx = idx; 466 } else { 467 *msgstrlen = gmnp->d_msg[MSGSTR][idx].len + 1; 468 msg = gmnp->mchunk + gmnp->d_msg[MSGSTR][idx].offset; 469 *midx = idx + num_of_str; 470 } 471 472 return (msg); 473 } 474 475 /* 476 * do_conv 477 * 478 * Converts the specified string from the src encoding 479 * to the dst encoding by calling iconv() 480 */ 481 static uint32_t * 482 do_conv(iconv_t fd, const char *src, uint32_t srclen) 483 { 484 uint32_t tolen; 485 uint32_t *ptr, *optr; 486 size_t oleft, ileft, bufsize, memincr; 487 char *to, *tptr; 488 489 #ifdef GETTEXT_DEBUG 490 gprintf(0, "*************** do_conv(" 491 "0x%p, \"%s\", %d)\n", 492 (void *)fd, src ? src : "(null)", srclen); 493 #endif 494 495 memincr = srclen * 2; 496 bufsize = memincr; 497 ileft = srclen; 498 oleft = bufsize; 499 ptr = malloc(bufsize + sizeof (uint32_t)); 500 if (ptr == NULL) { 501 return (NULL); 502 } 503 to = (char *)(ptr + 1); 504 505 for (;;) { 506 tptr = to; 507 errno = 0; 508 #ifdef GETTEXT_DEBUG 509 gprintf(0, "******* calling iconv()\n"); 510 #endif 511 if (iconv(fd, &src, &ileft, &tptr, &oleft) == (size_t)-1) { 512 if (errno == E2BIG) { 513 #ifdef GETTEXT_DEBUG 514 gprintf(0, "******* iconv detected E2BIG\n"); 515 gprintf(0, "old bufsize: %u\n", bufsize); 516 #endif 517 518 optr = realloc(ptr, 519 bufsize + memincr + sizeof (uint32_t)); 520 if (optr == NULL) { 521 free(ptr); 522 return (NULL); 523 } 524 ptr = optr; 525 to = (char *)(optr + 1); 526 to += bufsize - oleft; 527 oleft += memincr; 528 bufsize += memincr; 529 #ifdef GETTEXT_DEBUG 530 gprintf(0, "new bufsize: %u\n", bufsize); 531 #endif 532 continue; 533 } else { 534 tolen = (uint32_t)(bufsize - oleft); 535 break; 536 } 537 } 538 tolen = (uint32_t)(bufsize - oleft); 539 break; 540 } 541 542 if (tolen < bufsize) { 543 /* shrink the buffer */ 544 optr = realloc(ptr, tolen + sizeof (uint32_t)); 545 if (optr == NULL) { 546 free(ptr); 547 return (NULL); 548 } 549 ptr = optr; 550 } 551 *ptr = tolen; 552 553 #ifdef GETTEXT_DEBUG 554 gprintf(0, "******* exiting do_conv()\n"); 555 gprintf(0, "tolen: %u\n", *ptr); 556 gprintf(0, "return: 0x%p\n", ptr); 557 #endif 558 return (ptr); 559 } 560 561 /* 562 * conv_msg 563 */ 564 static char * 565 conv_msg(Msg_g_node *gmnp, char *msgstr, uint32_t msgstr_len, uint32_t midx, 566 struct msg_pack *mp) 567 { 568 uint32_t *conv_dst; 569 size_t num_of_conv, conv_msgstr_len; 570 char *conv_msgstr, *result; 571 572 if (gmnp->conv_msgstr == NULL) { 573 num_of_conv = gmnp->num_of_str + gmnp->num_of_d_str; 574 gmnp->conv_msgstr = 575 calloc((size_t)num_of_conv, sizeof (uint32_t *)); 576 if (gmnp->conv_msgstr == NULL) { 577 /* malloc failed */ 578 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp); 579 return (result); 580 } 581 } 582 583 conv_dst = do_conv(gmnp->fd, (const char *)msgstr, msgstr_len); 584 585 if (conv_dst == NULL) { 586 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp); 587 return (result); 588 } 589 conv_msgstr_len = *conv_dst; 590 gmnp->conv_msgstr[midx] = conv_dst; 591 conv_msgstr = (char *)(conv_dst + 1); 592 result = dfltmsgstr(gmnp, conv_msgstr, conv_msgstr_len, mp); 593 return (result); 594 } 595 596 /* 597 * gnu_key_2_text 598 * 599 * Extracts msgstr from the GNU MO file 600 */ 601 char * 602 gnu_key_2_text(Msg_g_node *gmnp, const char *codeset, 603 struct msg_pack *mp) 604 { 605 uint32_t msgstr_len, midx; 606 iconv_t fd; 607 char *result, *msgstr; 608 int ret, conversion, new_encoding; 609 610 #ifdef GETTEXT_DEBUG 611 gprintf(0, "*************** gnu_key_2_text(" 612 "0x%p, \"%s\", 0x%p)\n", 613 (void *)gmnp, codeset ? codeset : "(null)", (void *)mp); 614 printgnumsg(gmnp, 1); 615 printmp(mp, 1); 616 #endif 617 618 /* first checks if header entry has been processed */ 619 if (!(gmnp->flag & ST_CHK)) { 620 char *msg_header; 621 622 msg_header = gnu_msgsearch(gmnp, "", &msgstr_len, &midx); 623 ret = parse_header((const char *)msg_header, gmnp); 624 if (ret == -1) { 625 /* fatal error */ 626 DFLTMSG(result, mp->msgid1, mp->msgid2, 627 mp->n, mp->plural); 628 return (result); 629 } 630 gmnp->flag |= ST_CHK; 631 } 632 msgstr = gnu_msgsearch(gmnp, mp->msgid1, &msgstr_len, &midx); 633 if (msgstr == mp->msgid1) { 634 /* not found */ 635 DFLTMSG(result, mp->msgid1, mp->msgid2, mp->n, mp->plural); 636 return (result); 637 } 638 639 #ifdef GETTEXT_DEBUG 640 printgnumsg(gmnp, 1); 641 #endif 642 if (gmnp->dst_encoding == NULL) { 643 /* 644 * destination encoding has not been set. 645 */ 646 char *dupcodeset = strdup(codeset); 647 if (dupcodeset == NULL) { 648 /* strdup failed */ 649 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp); 650 return (result); 651 } 652 gmnp->dst_encoding = dupcodeset; 653 654 if (strcmp(gmnp->dst_encoding, gmnp->src_encoding) == 0) { 655 /* 656 * target encoding and src encoding 657 * are the same. 658 * No conversion required. 659 */ 660 conversion = 0; 661 } else { 662 /* 663 * target encoding is different from 664 * src encoding. 665 * New conversion required. 666 */ 667 /* sanity check */ 668 if (gmnp->fd && (gmnp->fd != (iconv_t)-1)) { 669 (void) iconv_close(gmnp->fd); 670 gmnp->fd = (iconv_t)-1; 671 } 672 if (gmnp->conv_msgstr) 673 free_conv_msgstr(gmnp, 0); 674 conversion = 1; 675 new_encoding = 1; 676 } 677 } else { 678 /* 679 * dst encoding has been already set. 680 */ 681 if (strcmp(gmnp->dst_encoding, codeset) == 0) { 682 /* 683 * dst encoding and target encoding are the same. 684 */ 685 if (strcmp(gmnp->dst_encoding, gmnp->src_encoding) 686 == 0) { 687 /* 688 * dst encoding and src encoding are the same. 689 * No conversion required. 690 */ 691 conversion = 0; 692 } else { 693 /* 694 * dst encoding is different from src encoding. 695 * current conversion is valid. 696 */ 697 conversion = 1; 698 new_encoding = 0; 699 /* checks if iconv_open has succeeded before */ 700 if (gmnp->fd == (iconv_t)-1) { 701 /* 702 * iconv_open should have failed before 703 * Assume this conversion is invalid 704 */ 705 conversion = 0; 706 } else { 707 if (gmnp->conv_msgstr == NULL) { 708 /* 709 * memory allocation for 710 * conv_msgstr should 711 * have failed before. 712 */ 713 new_encoding = 1; 714 if (gmnp->fd) 715 (void) iconv_close( 716 gmnp->fd); 717 gmnp->fd = (iconv_t)-1; 718 } 719 } 720 } 721 } else { 722 /* 723 * dst encoding is different from target encoding. 724 * It has changed since before. 725 */ 726 char *dupcodeset = strdup(codeset); 727 if (dupcodeset == NULL) { 728 result = dfltmsgstr(gmnp, msgstr, 729 msgstr_len, mp); 730 return (result); 731 } 732 free(gmnp->dst_encoding); 733 gmnp->dst_encoding = dupcodeset; 734 if (strcmp(gmnp->dst_encoding, gmnp->src_encoding) 735 == 0) { 736 /* 737 * dst encoding and src encoding are the same. 738 * now, no conversion required. 739 */ 740 conversion = 0; 741 if (gmnp->conv_msgstr) 742 free_conv_msgstr(gmnp, 1); 743 } else { 744 /* 745 * dst encoding is different from src encoding. 746 * new conversion required. 747 */ 748 conversion = 1; 749 new_encoding = 1; 750 if (gmnp->conv_msgstr) 751 free_conv_msgstr(gmnp, 0); 752 } 753 754 if (gmnp->fd && (gmnp->fd != (iconv_t)-1)) { 755 (void) iconv_close(gmnp->fd); 756 } 757 if (gmnp->fd != (iconv_t)-1) { 758 gmnp->fd = (iconv_t)-1; 759 } 760 } 761 } 762 763 if (conversion == 0) { 764 /* no conversion */ 765 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp); 766 return (result); 767 } 768 /* conversion required */ 769 770 if (new_encoding == 0) { 771 /* dst codeset hasn't been changed since before */ 772 uint32_t *cmsg; 773 uint32_t conv_msgstr_len; 774 char *conv_msgstr; 775 776 if (gmnp->conv_msgstr[midx] == NULL) { 777 /* this msgstr hasn't been converted yet */ 778 result = conv_msg(gmnp, msgstr, msgstr_len, midx, mp); 779 return (result); 780 } 781 /* this msgstr is in the conversion cache */ 782 cmsg = (uint32_t *)(uintptr_t)gmnp->conv_msgstr[midx]; 783 conv_msgstr_len = *cmsg; 784 conv_msgstr = (char *)(cmsg + 1); 785 result = dfltmsgstr(gmnp, conv_msgstr, conv_msgstr_len, mp); 786 return (result); 787 } 788 /* new conversion */ 789 #ifdef GETTEXT_DEBUG 790 gprintf(0, "******* calling iconv_open()\n"); 791 gprintf(0, " dst: \"%s\", src: \"%s\"\n", 792 gmnp->dst_encoding, gmnp->src_encoding); 793 #endif 794 fd = iconv_open(gmnp->dst_encoding, gmnp->src_encoding); 795 gmnp->fd = fd; 796 if (fd == (iconv_t)-1) { 797 /* 798 * iconv_open() failed. 799 * no conversion 800 */ 801 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp); 802 return (result); 803 } 804 result = conv_msg(gmnp, msgstr, msgstr_len, midx, mp); 805 return (result); 806 } 807 808 809 #define PRI_STR(x, n) PRI##x##n 810 #define PRI_LEN(x, n) (char)(sizeof (PRI_STR(x, n)) - 1) 811 #define PRIS(P, x) {\ 812 /* x/N/ */ P(x, 8), P(x, 16), P(x, 32), P(x, 64), \ 813 /* xLEAST/N/ */ P(x, LEAST8), P(x, LEAST16), P(x, LEAST32), P(x, LEAST64), \ 814 /* xFAST/N/ */ P(x, FAST8), P(x, FAST16), P(x, FAST32), P(x, FAST64), \ 815 /* xMAX,PTR */ P(x, MAX), P(x, PTR) \ 816 } 817 818 #define PRI_BIAS_LEAST 4 819 #define PRI_BIAS_FAST 8 820 #define PRI_BIAS_MAX 12 821 #define PRI_BIAS_PTR 13 822 823 static const char *pri_d[] = PRIS(PRI_STR, d); 824 static const char *pri_i[] = PRIS(PRI_STR, i); 825 static const char *pri_o[] = PRIS(PRI_STR, o); 826 static const char *pri_u[] = PRIS(PRI_STR, u); 827 static const char *pri_x[] = PRIS(PRI_STR, x); 828 static const char *pri_X[] = PRIS(PRI_STR, X); 829 830 static const char pri_d_len[] = PRIS(PRI_LEN, d); 831 static const char pri_i_len[] = PRIS(PRI_LEN, i); 832 static const char pri_o_len[] = PRIS(PRI_LEN, o); 833 static const char pri_u_len[] = PRIS(PRI_LEN, u); 834 static const char pri_x_len[] = PRIS(PRI_LEN, x); 835 static const char pri_X_len[] = PRIS(PRI_LEN, X); 836 837 static struct { 838 const char type; 839 const char **str_table; 840 const char *len_table; 841 } pri_table[] = { 842 {'d', pri_d, pri_d_len}, {'i', pri_i, pri_i_len}, 843 {'o', pri_o, pri_o_len}, {'u', pri_u, pri_u_len}, 844 {'x', pri_x, pri_x_len}, {'X', pri_X, pri_X_len}, 845 }; 846 847 static struct { 848 const char *name; 849 const char nlen; 850 const char want_digits; 851 const char bias; 852 } special_table[] = { 853 {"LEAST", 5, 1, PRI_BIAS_LEAST}, 854 {"FAST", 4, 1, PRI_BIAS_FAST}, 855 {"MAX", 3, 0, PRI_BIAS_MAX}, 856 {"PTR", 3, 0, PRI_BIAS_PTR}, 857 }; 858 859 /* 860 * conv_macro() returns the conversion specifier corresponding 861 * to the macro name specified in 'name'. 'len' contains the 862 * length of the macro name including the null termination. 863 * '*elen' will be set to the length of the returning conversion 864 * specifier without the null termination. 865 */ 866 static const char * 867 conv_macro(const char *str, uint32_t len, uint32_t *lenp) 868 { 869 const char **tbl; 870 const char *ltbl; 871 char *next; 872 int n, i, num, bias, idx, want_digits; 873 874 if (len == 2) { 875 if (*str == 'I') { 876 /* Solaris does not support %I */ 877 *lenp = 0; 878 return (""); 879 } 880 return (NULL); 881 } 882 883 if (len <= 4 || strncmp(str, "PRI", 3) != 0) 884 return (NULL); 885 886 str += 3; 887 888 n = sizeof (pri_table) / sizeof (pri_table[0]); 889 for (i = 0; i < n; i++) { 890 if (pri_table[i].type == *str) 891 break; 892 } 893 if (i == n) 894 return (NULL); 895 tbl = pri_table[i].str_table; 896 ltbl = pri_table[i].len_table; 897 898 str++; 899 idx = want_digits = 0; 900 901 if (isdigit((unsigned char)*str)) { 902 /* PRIx/N/ */ 903 bias = 0; 904 want_digits = 1; 905 } else { 906 n = sizeof (special_table) / sizeof (special_table[0]); 907 for (i = 0; i < n; i++) { 908 if (strncmp(special_table[i].name, 909 str, special_table[i].nlen) == 0) { 910 break; 911 } 912 } 913 if (i == n) 914 return (NULL); 915 bias = special_table[i].bias; 916 want_digits = special_table[i].want_digits; 917 str += special_table[i].nlen; 918 } 919 920 if (want_digits) { 921 if (!isdigit((unsigned char)*str)) 922 return (NULL); 923 num = strtol(str, &next, 10); 924 /* see if it is 8/16/32/64 */ 925 for (n = 8, idx = 0; idx < 4; idx++, n *= 2) { 926 if (n == num) 927 break; 928 } 929 if (idx == 4) 930 return (NULL); 931 str = next; 932 } 933 if (*str != '\0') { 934 /* unknow format */ 935 return (NULL); 936 } 937 938 *lenp = (uint32_t)ltbl[bias + idx]; 939 return (tbl[bias + idx]); 940 } 941 942 static gnu_d_macro_t * 943 expand_macros(Msg_g_node *p) 944 { 945 char *base = (char *)p->msg_file_info; 946 struct gnu_msg_rev1_info *rev1_header = p->rev1_header; 947 struct gnu_msg_ent *d_macro_tbl; 948 gnu_d_macro_t *d_macro; 949 uint32_t num_of_d_macro, e_maclen, maclen, i; 950 const char *e_macname; 951 char *macname; 952 953 /* number of the dynamic macros */ 954 num_of_d_macro = SWAP(p, rev1_header->num_of_dynamic_macro); 955 956 d_macro = malloc((size_t)num_of_d_macro * sizeof (gnu_d_macro_t)); 957 if (d_macro == NULL) 958 return (NULL); 959 960 /* pointer to the dynamic strings table */ 961 d_macro_tbl = (struct gnu_msg_ent *)(uintptr_t) 962 (base + SWAP(p, rev1_header->off_dynamic_macro)); 963 964 for (i = 0; i < num_of_d_macro; i++) { 965 macname = base + SWAP(p, d_macro_tbl[i].offset); 966 maclen = SWAP(p, d_macro_tbl[i].len); 967 968 /* 969 * sanity check 970 * maclen includes a null termination. 971 */ 972 if (maclen != strlen(macname) + 1) { 973 free(d_macro); 974 return (NULL); 975 } 976 e_macname = conv_macro(macname, maclen, &e_maclen); 977 if (e_macname == NULL) { 978 free(d_macro); 979 return (NULL); 980 } 981 d_macro[i].len = e_maclen; 982 d_macro[i].ptr = e_macname; 983 } 984 985 return (d_macro); 986 } 987 988 static char * 989 expand_dynamic_message(Msg_g_node *p, struct gnu_msg_ent **e_msgs) 990 { 991 992 char *base = (char *)p->msg_file_info; 993 struct gnu_msg_rev1_info *rev1_header = p->rev1_header; 994 struct gnu_dynamic_tbl *d_info; 995 struct gnu_dynamic_ent *entry; 996 gnu_d_macro_t *d_macro; 997 uint32_t num_of_d_str, mlen, dlen, didx, i, j; 998 uint32_t off_d_tbl; 999 uint32_t *d_msg_off_tbl; 1000 size_t mchunk_size, used, need; 1001 char *mchunk, *msg; 1002 1003 #define MEM_INCR (1024) 1004 1005 d_macro = expand_macros(p); 1006 if (d_macro == NULL) 1007 return (NULL); 1008 1009 /* number of dynamic messages */ 1010 num_of_d_str = p->num_of_d_str; 1011 1012 mchunk = NULL; 1013 mchunk_size = 0; /* size of the allocated memory in mchunk */ 1014 used = 0; /* size of the used memory in mchunk */ 1015 for (i = MSGID; i <= MSGSTR; i++) { 1016 /* pointer to the offset table of dynamic msgids/msgstrs */ 1017 off_d_tbl = SWAP(p, 1018 i == MSGID ? rev1_header->off_dynamic_msgid_tbl : 1019 rev1_header->off_dynamic_msgstr_tbl); 1020 /* pointer to the dynamic msgids/msgstrs */ 1021 d_msg_off_tbl = (uint32_t *)(uintptr_t)(base + off_d_tbl); 1022 for (j = 0; j < num_of_d_str; j++) { 1023 e_msgs[i][j].offset = used; 1024 d_info = (struct gnu_dynamic_tbl *)(uintptr_t) 1025 (base + SWAP(p, d_msg_off_tbl[j])); 1026 entry = d_info->entry; 1027 msg = base + SWAP(p, d_info->offset); 1028 1029 for (;;) { 1030 mlen = SWAP(p, entry->len); 1031 didx = SWAP(p, entry->idx); 1032 dlen = (didx == NOMORE_DYNAMIC_MACRO) ? 0 : 1033 d_macro[didx].len; 1034 need = used + mlen + dlen; 1035 if (need >= mchunk_size) { 1036 char *t; 1037 size_t n = mchunk_size; 1038 do { 1039 n += MEM_INCR; 1040 } while (n <= need); 1041 t = realloc(mchunk, n); 1042 if (t == NULL) { 1043 free(d_macro); 1044 free(mchunk); 1045 return (NULL); 1046 } 1047 mchunk = t; 1048 mchunk_size = n; 1049 } 1050 (void) memcpy(mchunk + used, msg, (size_t)mlen); 1051 msg += mlen; 1052 used += mlen; 1053 1054 if (didx == NOMORE_DYNAMIC_MACRO) { 1055 /* 1056 * Last segment of a static 1057 * msg string contains a null 1058 * termination, so an explicit 1059 * null termination is not required 1060 * here. 1061 */ 1062 break; 1063 } 1064 (void) memcpy(mchunk + used, 1065 d_macro[didx].ptr, (size_t)dlen); 1066 used += dlen; 1067 entry++; /* to next entry */ 1068 } 1069 /* 1070 * e_msgs[][].len does not include a null termination 1071 */ 1072 e_msgs[i][j].len = used - e_msgs[i][j].offset - 1; 1073 } 1074 } 1075 1076 free(d_macro); 1077 1078 /* shrink mchunk to 'used' */ 1079 { 1080 char *t; 1081 t = realloc(mchunk, used); 1082 if (t == NULL) { 1083 free(mchunk); 1084 return (NULL); 1085 } 1086 mchunk = t; 1087 } 1088 1089 return (mchunk); 1090 } 1091 1092 static int 1093 build_rev1_info(Msg_g_node *p) 1094 { 1095 uint32_t *d_hash; 1096 uint32_t num_of_d_str, num_of_str; 1097 uint32_t idx, hash_value, hash_size; 1098 size_t hash_mem_size; 1099 size_t d_msgid_size, d_msgstr_size; 1100 char *chunk, *mchunk; 1101 int i; 1102 1103 #ifdef GETTEXT_DEBUG 1104 gprintf(0, "******* entering build_rev1_info(0x%p)\n", p); 1105 printgnumsg(p, 1); 1106 #endif 1107 1108 if (p->hash_table == NULL) { 1109 /* Revision 1 always requires the hash table */ 1110 return (-1); 1111 } 1112 1113 num_of_str = p->num_of_str; 1114 hash_size = p->hash_size; 1115 num_of_d_str = p->num_of_d_str; 1116 1117 hash_mem_size = hash_size * sizeof (uint32_t); 1118 ROUND(hash_mem_size, sizeof (struct gnu_msg_ent)); 1119 1120 d_msgid_size = num_of_d_str * sizeof (struct gnu_msg_ent); 1121 d_msgstr_size = num_of_d_str * sizeof (struct gnu_msg_ent); 1122 1123 chunk = malloc(hash_mem_size + d_msgid_size + d_msgstr_size); 1124 if (chunk == NULL) { 1125 return (-1); 1126 } 1127 1128 d_hash = (uint32_t *)(uintptr_t)chunk; 1129 p->d_msg[MSGID] = (struct gnu_msg_ent *)(uintptr_t) 1130 (chunk + hash_mem_size); 1131 p->d_msg[MSGSTR] = (struct gnu_msg_ent *)(uintptr_t) 1132 (chunk + hash_mem_size + d_msgid_size); 1133 1134 if ((mchunk = expand_dynamic_message(p, p->d_msg)) == NULL) { 1135 free(chunk); 1136 return (-1); 1137 } 1138 1139 /* copy the original hash table into the dynamic hash table */ 1140 for (i = 0; i < hash_size; i++) { 1141 d_hash[i] = SWAP(p, p->hash_table[i]); 1142 } 1143 1144 /* fill in the dynamic hash table with dynamic messages */ 1145 for (i = 0; i < num_of_d_str; i++) { 1146 hash_value = get_hashid(mchunk + p->d_msg[MSGID][i].offset, 1147 NULL); 1148 idx = get_hash_index(d_hash, hash_value, hash_size); 1149 d_hash[idx] = num_of_str + i + 1; 1150 } 1151 1152 p->mchunk = mchunk; 1153 p->hash_table = d_hash; 1154 1155 #ifdef GETTEXT_DEBUG 1156 print_rev1_info(p); 1157 gprintf(0, "******* exiting build_rev1_info()\n"); 1158 printgnumsg(p, 1); 1159 #endif 1160 1161 return (0); 1162 } 1163 1164 /* 1165 * gnu_setmsg 1166 * 1167 * INPUT 1168 * mnp - message node 1169 * addr - address to the mmapped file 1170 * size - size of the file 1171 * 1172 * RETURN 1173 * 0 - either T_GNU_MO or T_ILL_MO has been set 1174 * -1 - failed 1175 */ 1176 int 1177 gnu_setmsg(Msg_node *mnp, char *addr, size_t size) 1178 { 1179 struct gnu_msg_info *gnu_header; 1180 Msg_g_node *p; 1181 1182 #ifdef GETTEXT_DEBUG 1183 gprintf(0, "******** entering gnu_setmsg(0x%p, 0x%p, %lu)\n", 1184 (void *)mnp, addr, size); 1185 printmnp(mnp, 1); 1186 #endif 1187 1188 /* checks the GNU MAGIC number */ 1189 if (size < sizeof (struct gnu_msg_info)) { 1190 /* invalid mo file */ 1191 mnp->type = T_ILL_MO; 1192 #ifdef GETTEXT_DEBUG 1193 gprintf(0, "********* exiting gnu_setmsg\n"); 1194 printmnp(mnp, 1); 1195 #endif 1196 return (0); 1197 } 1198 1199 gnu_header = (struct gnu_msg_info *)(uintptr_t)addr; 1200 1201 p = calloc(1, sizeof (Msg_g_node)); 1202 if (p == NULL) { 1203 return (-1); 1204 } 1205 p->msg_file_info = gnu_header; 1206 1207 if (gnu_header->magic == GNU_MAGIC) { 1208 switch (gnu_header->revision) { 1209 case GNU_REVISION_0_1: 1210 case GNU_REVISION_1_1: 1211 p->flag |= ST_REV1; 1212 break; 1213 } 1214 } else if (gnu_header->magic == GNU_MAGIC_SWAPPED) { 1215 p->flag |= ST_SWP; 1216 switch (gnu_header->revision) { 1217 case GNU_REVISION_0_1_SWAPPED: 1218 case GNU_REVISION_1_1_SWAPPED: 1219 p->flag |= ST_REV1; 1220 break; 1221 } 1222 } else { 1223 /* invalid mo file */ 1224 free(p); 1225 mnp->type = T_ILL_MO; 1226 #ifdef GETTEXT_DEBUG 1227 gprintf(0, "********* exiting gnu_setmsg\n"); 1228 printmnp(mnp, 1); 1229 #endif 1230 return (0); 1231 } 1232 1233 p->fsize = size; 1234 p->num_of_str = SWAP(p, gnu_header->num_of_str); 1235 p->hash_size = SWAP(p, gnu_header->sz_hashtbl); 1236 p->hash_table = p->hash_size <= 2 ? NULL : 1237 (uint32_t *)(uintptr_t) 1238 (addr + SWAP(p, gnu_header->off_hashtbl)); 1239 1240 p->msg_tbl[MSGID] = (struct gnu_msg_ent *)(uintptr_t) 1241 (addr + SWAP(p, gnu_header->off_msgid_tbl)); 1242 p->msg_tbl[MSGSTR] = (struct gnu_msg_ent *)(uintptr_t) 1243 (addr + SWAP(p, gnu_header->off_msgstr_tbl)); 1244 1245 if (p->flag & ST_REV1) { 1246 /* Revision 1 */ 1247 struct gnu_msg_rev1_info *rev1_header; 1248 1249 rev1_header = (struct gnu_msg_rev1_info *) 1250 (uintptr_t)(addr + sizeof (struct gnu_msg_info)); 1251 p->rev1_header = rev1_header; 1252 p->num_of_d_str = SWAP(p, rev1_header->num_of_dynamic_str); 1253 if (build_rev1_info(p) == -1) { 1254 free(p); 1255 #ifdef GETTEXT_DEBUG 1256 gprintf(0, "******** exiting gnu_setmsg: " 1257 "build_rev1_info() failed\n"); 1258 #endif 1259 return (-1); 1260 } 1261 } 1262 1263 mnp->msg.gnumsg = p; 1264 mnp->type = T_GNU_MO; 1265 1266 #ifdef GETTEXT_DEBUG 1267 gprintf(0, "********* exiting gnu_setmsg\n"); 1268 printmnp(mnp, 1); 1269 #endif 1270 return (0); 1271 } 1272 1273 /* 1274 * get_hash_index 1275 * 1276 * Returns the index to an empty slot in the hash table 1277 * for the specified hash_value. 1278 */ 1279 static uint32_t 1280 get_hash_index(uint32_t *hash_tbl, uint32_t hash_value, uint32_t hash_size) 1281 { 1282 uint32_t idx, inc; 1283 1284 idx = hash_value % hash_size; 1285 inc = 1 + (hash_value % (hash_size - 2)); 1286 1287 for (;;) { 1288 if (hash_tbl[idx] == 0) { 1289 /* found an empty slot */ 1290 return (idx); 1291 } 1292 idx = (idx + inc) % hash_size; 1293 } 1294 /* NOTREACHED */ 1295 } 1296