1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include "lint.h" 28 #include "mtlib.h" 29 #include <ctype.h> 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <string.h> 33 #include <sys/types.h> 34 #include <sys/mman.h> 35 #include <sys/param.h> 36 #include <sys/stat.h> 37 #include <thread.h> 38 #include <synch.h> 39 #include <unistd.h> 40 #include <limits.h> 41 #include <errno.h> 42 #include <inttypes.h> 43 #include "libc.h" 44 #include "msgfmt.h" 45 #include "nlspath_checks.h" 46 #include "gettext.h" 47 48 #ifdef DEBUG 49 #include <assert.h> 50 #endif 51 52 /* The following symbols are just for GNU binary compatibility */ 53 int _nl_msg_cat_cntr; 54 int *_nl_domain_bindings; 55 56 static const char *nullstr = ""; 57 58 #define CHARSET_MOD "charset=" 59 #define CHARSET_LEN (sizeof (CHARSET_MOD) - 1) 60 #define NPLURALS_MOD "nplurals=" 61 #define NPLURALS_LEN (sizeof (NPLURALS_MOD) - 1) 62 #define PLURAL_MOD "plural=" 63 #define PLURAL_LEN (sizeof (PLURAL_MOD) - 1) 64 65 static uint32_t get_hash_index(uint32_t *, uint32_t, uint32_t); 66 67 /* 68 * free_conv_msgstr 69 * 70 * release the memory allocated for storing code-converted messages 71 * 72 * f 73 * 0: do not free gmnp->conv_msgstr 74 * 1: free gmnp->conv_msgstr 75 */ 76 static void 77 free_conv_msgstr(Msg_g_node *gmnp, int f) 78 { 79 uint32_t i, num_of_conv; 80 81 #ifdef GETTEXT_DEBUG 82 gprintf(0, "*************** free_conv_msgstr(0x%p, %d)\n", 83 (void *)gmnp, f); 84 printgnumsg(gmnp, 1); 85 #endif 86 87 num_of_conv = gmnp->num_of_str + gmnp->num_of_d_str; 88 for (i = 0; i < num_of_conv; i++) { 89 if (gmnp->conv_msgstr[i]) { 90 free(gmnp->conv_msgstr[i]); 91 } 92 gmnp->conv_msgstr[i] = NULL; 93 } 94 if (f) { 95 free(gmnp->conv_msgstr); 96 gmnp->conv_msgstr = NULL; 97 } 98 } 99 100 /* 101 * dfltmsgstr 102 * 103 * choose an appropriate message by evaluating the plural expression, 104 * and return it. 105 */ 106 static char * 107 dfltmsgstr(Msg_g_node *gmnp, const char *msgstr, uint32_t msgstr_len, 108 struct msg_pack *mp) 109 { 110 unsigned int pindex; 111 size_t len; 112 const char *p; 113 114 #ifdef GETTEXT_DEBUG 115 gprintf(0, "*************** dfltmsgstr(0x%p, \"%s\", %u, 0x%p)\n", 116 (void *)gmnp, 117 msgstr ? msgstr : "(null)", msgstr_len, (void *)mp); 118 printgnumsg(gmnp, 1); 119 printmp(mp, 1); 120 #endif 121 122 if (mp->plural) { 123 if (gmnp->plural) { 124 pindex = plural_eval(gmnp->plural, mp->n); 125 } else { 126 /* 127 * This mo does not have plural information. 128 * Using the English form. 129 */ 130 if (mp->n == 1) 131 pindex = 0; 132 else 133 pindex = 1; 134 } 135 #ifdef GETTEXT_DEBUG 136 gprintf(0, "plural_eval returned: %u\n", pindex); 137 #endif 138 if (pindex >= gmnp->nplurals) { 139 /* should never happen */ 140 pindex = 0; 141 } 142 p = msgstr; 143 for (; pindex != 0; pindex--) { 144 len = msgstr_len - (p - msgstr); 145 p = memchr(p, '\0', len); 146 if (p == NULL) { 147 /* 148 * null byte not found 149 * this should never happen 150 */ 151 char *result; 152 DFLTMSG(result, mp->msgid1, mp->msgid2, 153 mp->n, mp->plural); 154 return (result); 155 } 156 p++; /* skip */ 157 } 158 return ((char *)p); 159 } 160 161 return ((char *)msgstr); 162 } 163 164 /* 165 * parse_header 166 * 167 * parse the header entry of the GNU MO file and 168 * extract the src encoding and the plural information of the MO file 169 */ 170 static int 171 parse_header(const char *header, Msg_g_node *gmnp) 172 { 173 char *charset = NULL; 174 char *charset_str; 175 size_t len; 176 char *nplurals_str, *plural_str; 177 plural_expr_t plural; 178 char *p, *q; 179 unsigned int nplurals; 180 int ret; 181 182 #ifdef GETTEXT_DEBUG 183 gprintf(0, "*************** parse_header(\"%s\", 0x%p)\n", 184 header ? header : "(null)", (void *)gmnp); 185 printgnumsg(gmnp, 1); 186 #endif 187 188 if (header == NULL) { 189 gmnp->src_encoding = (char *)nullstr; 190 gmnp->nplurals = 2; 191 gmnp->plural = NULL; 192 #ifdef GETTEXT_DEBUG 193 gprintf(0, "*************** exiting parse_header\n"); 194 gprintf(0, "no header\n"); 195 #endif 196 197 return (0); 198 } 199 200 charset_str = strstr(header, CHARSET_MOD); 201 if (charset_str == NULL) { 202 gmnp->src_encoding = (char *)nullstr; 203 } else { 204 p = charset_str + CHARSET_LEN; 205 q = p; 206 while ((*q != ' ') && (*q != '\t') && 207 (*q != '\n')) { 208 q++; 209 } 210 len = q - p; 211 if (len > 0) { 212 charset = malloc(len + 1); 213 if (charset == NULL) { 214 gmnp->src_encoding = (char *)nullstr; 215 gmnp->nplurals = 2; 216 gmnp->plural = NULL; 217 return (-1); 218 } 219 (void) memcpy(charset, p, len); 220 charset[len] = '\0'; 221 gmnp->src_encoding = charset; 222 } else { 223 gmnp->src_encoding = (char *)nullstr; 224 } 225 } 226 227 nplurals_str = strstr(header, NPLURALS_MOD); 228 plural_str = strstr(header, PLURAL_MOD); 229 if (nplurals_str == NULL || plural_str == NULL) { 230 /* no valid plural specification */ 231 gmnp->nplurals = 2; 232 gmnp->plural = NULL; 233 #ifdef GETTEXT_DEBUG 234 gprintf(0, "*************** exiting parse_header\n"); 235 gprintf(0, "no plural entry\n"); 236 #endif 237 return (0); 238 } else { 239 p = nplurals_str + NPLURALS_LEN; 240 while (*p && isspace((unsigned char)*p)) { 241 p++; 242 } 243 nplurals = (unsigned int)strtol(p, &q, 10); 244 if (p != q) { 245 gmnp->nplurals = nplurals; 246 } else { 247 gmnp->nplurals = 2; 248 } 249 250 p = plural_str + PLURAL_LEN; 251 #ifdef GETTEXT_DEBUG 252 gprintf(0, "plural_str: \"%s\"\n", p); 253 #endif 254 255 ret = plural_expr(&plural, (const char *)p); 256 if (ret == 0) { 257 /* parse succeeded */ 258 gmnp->plural = plural; 259 #ifdef GETTEXT_DEBUG 260 gprintf(0, "*************** exiting parse_header\n"); 261 gprintf(0, "charset: \"%s\"\n", 262 charset ? charset : "(null)"); 263 printexpr(plural, 1); 264 #endif 265 return (0); 266 } else if (ret == 1) { 267 /* parse error */ 268 gmnp->nplurals = 2; 269 gmnp->plural = NULL; 270 return (0); 271 } else { 272 /* fatal error */ 273 if (charset) 274 free(charset); 275 gmnp->src_encoding = (char *)nullstr; 276 gmnp->nplurals = 2; 277 gmnp->plural = NULL; 278 return (-1); 279 } 280 } 281 /* NOTREACHED */ 282 } 283 284 /* 285 * handle_lang 286 * 287 * take care of the LANGUAGE specification 288 */ 289 char * 290 handle_lang(struct msg_pack *mp) 291 { 292 const char *p, *op, *q; 293 size_t locale_len; 294 char *result; 295 char locale[MAXPATHLEN]; 296 297 298 #ifdef GETTEXT_DEBUG 299 gprintf(0, "*************** handle_lang(0x%p)\n", (void *)mp); 300 printmp(mp, 1); 301 #endif 302 303 p = mp->language; 304 305 while (*p) { 306 op = p; 307 q = strchr(p, ':'); 308 if (q == NULL) { 309 locale_len = strlen(p); 310 p += locale_len; 311 } else { 312 locale_len = q - p; 313 p += locale_len + 1; 314 } 315 if (locale_len >= MAXPATHLEN || locale_len == 0) { 316 /* illegal locale name */ 317 continue; 318 } 319 (void) memcpy(locale, op, locale_len); 320 locale[locale_len] = '\0'; 321 mp->locale = locale; 322 323 #ifdef GETTEXT_DEBUG 324 *mp->msgfile = '\0'; 325 #endif 326 if (mk_msgfile(mp) == NULL) { 327 /* illegal locale name */ 328 continue; 329 } 330 331 result = handle_mo(mp); 332 if (mp->status & ST_GNU_MSG_FOUND) 333 return (result); 334 335 if (mp->status & ST_SUN_MO_FOUND) 336 break; 337 } 338 339 /* 340 * no valid locale found, Sun MO found, or 341 * GNU MO found but no valid msg found there. 342 */ 343 344 if (mp->status & ST_GNU_MO_FOUND) { 345 /* 346 * GNU MO found but no valid msg found there. 347 * returning DFLTMSG. 348 */ 349 DFLTMSG(result, mp->msgid1, mp->msgid2, mp->n, mp->plural); 350 return (result); 351 } 352 return (NULL); 353 } 354 355 /* 356 * gnu_msgsearch 357 * 358 * Searchs the translation message for the specified msgid1. 359 * Hash algorithm used in this function is Open Addressing 360 * with Double Hashing: 361 * H(k, i) = (H1(k) + i * H2(k)) mod M 362 * H1(k) = hashvalue % M 363 * H2(k) = 1 + (hashvalue % (M - 2)) 364 * 365 * Ref: The Art of Computer Programming Volume 3 366 * Sorting and Searching, second edition 367 * Donald E Knuth 368 */ 369 static char * 370 gnu_msgsearch(Msg_g_node *gmnp, const char *msgid1, 371 uint32_t *msgstrlen, uint32_t *midx) 372 { 373 struct gnu_msg_info *header = gmnp->msg_file_info; 374 struct gnu_msg_ent *msgid_tbl, *msgstr_tbl; 375 uint32_t num_of_str, idx, mlen, msglen; 376 uint32_t hash_size, hash_val, hash_id, hash_inc, hash_idx; 377 uint32_t *hash_table; 378 char *base; 379 char *msg; 380 381 #ifdef GETTEXT_DEBUG 382 gprintf(0, "*************** gnu_msgsearch(0x%p, \"%s\", " 383 "0x%p, 0x%p)\n", 384 (void *)gmnp, msgid1, msgstrlen, midx); 385 printgnumsg(gmnp, 1); 386 #endif 387 388 base = (char *)header; 389 390 msgid_tbl = gmnp->msg_tbl[MSGID]; 391 msgstr_tbl = gmnp->msg_tbl[MSGSTR]; 392 hash_table = gmnp->hash_table; 393 hash_size = gmnp->hash_size; 394 num_of_str = gmnp->num_of_str; 395 396 if (!(gmnp->flag & ST_REV1) && 397 (hash_table == NULL || (hash_size <= 2))) { 398 /* 399 * Revision 0 and 400 * No hash table exists or 401 * hash size is enough small. 402 */ 403 uint32_t top, bottom; 404 char *msg_id_str; 405 int val; 406 407 top = 0; 408 bottom = num_of_str; 409 while (top < bottom) { 410 idx = (top + bottom) / 2; 411 msg_id_str = base + 412 SWAP(gmnp, msgid_tbl[idx].offset); 413 414 val = strcmp(msg_id_str, msgid1); 415 if (val < 0) { 416 top = idx + 1; 417 } else if (val > 0) { 418 bottom = idx; 419 } else { 420 *msgstrlen = (unsigned int) 421 SWAP(gmnp, msgstr_tbl[idx].len) + 1; 422 *midx = idx; 423 return (base + 424 SWAP(gmnp, msgstr_tbl[idx].offset)); 425 } 426 } 427 /* not found */ 428 return ((char *)msgid1); 429 } 430 431 /* use hash table */ 432 hash_id = get_hashid(msgid1, &msglen); 433 hash_idx = hash_id % hash_size; 434 hash_inc = 1 + (hash_id % (hash_size - 2)); 435 436 for (;;) { 437 hash_val = HASH_TBL(gmnp, hash_table[hash_idx]); 438 439 if (hash_val == 0) { 440 /* not found */ 441 return ((char *)msgid1); 442 } 443 if (hash_val <= num_of_str) { 444 /* static message */ 445 idx = hash_val - 1; 446 mlen = SWAP(gmnp, msgid_tbl[idx].len); 447 msg = base + SWAP(gmnp, msgid_tbl[idx].offset); 448 } else { 449 if (!(gmnp->flag & ST_REV1)) { 450 /* rev 0 does not have dynamic message */ 451 return ((char *)msgid1); 452 } 453 /* dynamic message */ 454 idx = hash_val - num_of_str - 1; 455 mlen = gmnp->d_msg[MSGID][idx].len; 456 msg = gmnp->mchunk + gmnp->d_msg[MSGID][idx].offset; 457 } 458 if (msglen <= mlen && strcmp(msgid1, msg) == 0) { 459 /* found */ 460 break; 461 } 462 hash_idx = (hash_idx + hash_inc) % hash_size; 463 } 464 465 /* msgstrlen should include a null termination */ 466 if (hash_val <= num_of_str) { 467 *msgstrlen = SWAP(gmnp, msgstr_tbl[idx].len) + 1; 468 msg = base + SWAP(gmnp, msgstr_tbl[idx].offset); 469 *midx = idx; 470 } else { 471 *msgstrlen = gmnp->d_msg[MSGSTR][idx].len + 1; 472 msg = gmnp->mchunk + gmnp->d_msg[MSGSTR][idx].offset; 473 *midx = idx + num_of_str; 474 } 475 476 return (msg); 477 } 478 479 /* 480 * do_conv 481 * 482 * Converts the specified string from the src encoding 483 * to the dst encoding by calling iconv() 484 */ 485 static uint32_t * 486 do_conv(iconv_t fd, const char *src, uint32_t srclen) 487 { 488 uint32_t tolen; 489 uint32_t *ptr, *optr; 490 size_t oleft, ileft, bufsize, memincr; 491 char *to, *tptr; 492 493 #ifdef GETTEXT_DEBUG 494 gprintf(0, "*************** do_conv(" 495 "0x%p, \"%s\", %d)\n", 496 (void *)fd, src ? src : "(null)", srclen); 497 #endif 498 499 memincr = srclen * 2; 500 bufsize = memincr; 501 ileft = srclen; 502 oleft = bufsize; 503 ptr = malloc(bufsize + sizeof (uint32_t)); 504 if (ptr == NULL) { 505 return (NULL); 506 } 507 to = (char *)(ptr + 1); 508 509 for (;;) { 510 tptr = to; 511 errno = 0; 512 #ifdef GETTEXT_DEBUG 513 gprintf(0, "******* calling iconv()\n"); 514 #endif 515 if (iconv(fd, &src, &ileft, &tptr, &oleft) == (size_t)-1) { 516 if (errno == E2BIG) { 517 #ifdef GETTEXT_DEBUG 518 gprintf(0, "******* iconv detected E2BIG\n"); 519 gprintf(0, "old bufsize: %u\n", bufsize); 520 #endif 521 522 optr = realloc(ptr, 523 bufsize + memincr + sizeof (uint32_t)); 524 if (optr == NULL) { 525 free(ptr); 526 return (NULL); 527 } 528 ptr = optr; 529 to = (char *)(optr + 1); 530 to += bufsize - oleft; 531 oleft += memincr; 532 bufsize += memincr; 533 #ifdef GETTEXT_DEBUG 534 gprintf(0, "new bufsize: %u\n", bufsize); 535 #endif 536 continue; 537 } else { 538 tolen = (uint32_t)(bufsize - oleft); 539 break; 540 } 541 } 542 tolen = (uint32_t)(bufsize - oleft); 543 break; 544 } 545 546 if (tolen < bufsize) { 547 /* shrink the buffer */ 548 optr = realloc(ptr, tolen + sizeof (uint32_t)); 549 if (optr == NULL) { 550 free(ptr); 551 return (NULL); 552 } 553 ptr = optr; 554 } 555 *ptr = tolen; 556 557 #ifdef GETTEXT_DEBUG 558 gprintf(0, "******* exiting do_conv()\n"); 559 gprintf(0, "tolen: %u\n", *ptr); 560 gprintf(0, "return: 0x%p\n", ptr); 561 #endif 562 return (ptr); 563 } 564 565 /* 566 * conv_msg 567 */ 568 static char * 569 conv_msg(Msg_g_node *gmnp, char *msgstr, uint32_t msgstr_len, uint32_t midx, 570 struct msg_pack *mp) 571 { 572 uint32_t *conv_dst; 573 size_t num_of_conv, conv_msgstr_len; 574 char *conv_msgstr, *result; 575 576 if (gmnp->conv_msgstr == NULL) { 577 num_of_conv = gmnp->num_of_str + gmnp->num_of_d_str; 578 gmnp->conv_msgstr = 579 calloc((size_t)num_of_conv, sizeof (uint32_t *)); 580 if (gmnp->conv_msgstr == NULL) { 581 /* malloc failed */ 582 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp); 583 return (result); 584 } 585 } 586 587 conv_dst = do_conv(gmnp->fd, (const char *)msgstr, msgstr_len); 588 589 if (conv_dst == NULL) { 590 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp); 591 return (result); 592 } 593 conv_msgstr_len = *conv_dst; 594 gmnp->conv_msgstr[midx] = conv_dst; 595 conv_msgstr = (char *)(conv_dst + 1); 596 result = dfltmsgstr(gmnp, conv_msgstr, conv_msgstr_len, mp); 597 return (result); 598 } 599 600 /* 601 * gnu_key_2_text 602 * 603 * Extracts msgstr from the GNU MO file 604 */ 605 char * 606 gnu_key_2_text(Msg_g_node *gmnp, const char *codeset, 607 struct msg_pack *mp) 608 { 609 uint32_t msgstr_len, midx; 610 iconv_t fd; 611 char *result, *msgstr; 612 int ret, conversion, new_encoding; 613 614 #ifdef GETTEXT_DEBUG 615 gprintf(0, "*************** gnu_key_2_text(" 616 "0x%p, \"%s\", 0x%p)\n", 617 (void *)gmnp, codeset ? codeset : "(null)", (void *)mp); 618 printgnumsg(gmnp, 1); 619 printmp(mp, 1); 620 #endif 621 622 /* first checks if header entry has been processed */ 623 if (!(gmnp->flag & ST_CHK)) { 624 char *msg_header; 625 626 msg_header = gnu_msgsearch(gmnp, "", &msgstr_len, &midx); 627 ret = parse_header((const char *)msg_header, gmnp); 628 if (ret == -1) { 629 /* fatal error */ 630 DFLTMSG(result, mp->msgid1, mp->msgid2, 631 mp->n, mp->plural); 632 return (result); 633 } 634 gmnp->flag |= ST_CHK; 635 } 636 msgstr = gnu_msgsearch(gmnp, mp->msgid1, &msgstr_len, &midx); 637 if (msgstr == mp->msgid1) { 638 /* not found */ 639 DFLTMSG(result, mp->msgid1, mp->msgid2, mp->n, mp->plural); 640 return (result); 641 } 642 643 #ifdef GETTEXT_DEBUG 644 printgnumsg(gmnp, 1); 645 #endif 646 if (gmnp->dst_encoding == NULL) { 647 /* 648 * destination encoding has not been set. 649 */ 650 char *dupcodeset = strdup(codeset); 651 if (dupcodeset == NULL) { 652 /* strdup failed */ 653 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp); 654 return (result); 655 } 656 gmnp->dst_encoding = dupcodeset; 657 658 if (strcmp(gmnp->dst_encoding, gmnp->src_encoding) == 0) { 659 /* 660 * target encoding and src encoding 661 * are the same. 662 * No conversion required. 663 */ 664 conversion = 0; 665 } else { 666 /* 667 * target encoding is different from 668 * src encoding. 669 * New conversion required. 670 */ 671 /* sanity check */ 672 if (gmnp->fd && (gmnp->fd != (iconv_t)-1)) { 673 (void) iconv_close(gmnp->fd); 674 gmnp->fd = (iconv_t)-1; 675 } 676 if (gmnp->conv_msgstr) 677 free_conv_msgstr(gmnp, 0); 678 conversion = 1; 679 new_encoding = 1; 680 } 681 } else { 682 /* 683 * dst encoding has been already set. 684 */ 685 if (strcmp(gmnp->dst_encoding, codeset) == 0) { 686 /* 687 * dst encoding and target encoding are the same. 688 */ 689 if (strcmp(gmnp->dst_encoding, gmnp->src_encoding) 690 == 0) { 691 /* 692 * dst encoding and src encoding are the same. 693 * No conversion required. 694 */ 695 conversion = 0; 696 } else { 697 /* 698 * dst encoding is different from src encoding. 699 * current conversion is valid. 700 */ 701 conversion = 1; 702 new_encoding = 0; 703 /* checks if iconv_open has succeeded before */ 704 if (gmnp->fd == (iconv_t)-1) { 705 /* 706 * iconv_open should have failed before 707 * Assume this conversion is invalid 708 */ 709 conversion = 0; 710 } else { 711 if (gmnp->conv_msgstr == NULL) { 712 /* 713 * memory allocation for 714 * conv_msgstr should 715 * have failed before. 716 */ 717 new_encoding = 1; 718 if (gmnp->fd) 719 (void) iconv_close( 720 gmnp->fd); 721 gmnp->fd = (iconv_t)-1; 722 } 723 } 724 } 725 } else { 726 /* 727 * dst encoding is different from target encoding. 728 * It has changed since before. 729 */ 730 char *dupcodeset = strdup(codeset); 731 if (dupcodeset == NULL) { 732 result = dfltmsgstr(gmnp, msgstr, 733 msgstr_len, mp); 734 return (result); 735 } 736 free(gmnp->dst_encoding); 737 gmnp->dst_encoding = dupcodeset; 738 if (strcmp(gmnp->dst_encoding, gmnp->src_encoding) 739 == 0) { 740 /* 741 * dst encoding and src encoding are the same. 742 * now, no conversion required. 743 */ 744 conversion = 0; 745 if (gmnp->conv_msgstr) 746 free_conv_msgstr(gmnp, 1); 747 } else { 748 /* 749 * dst encoding is different from src encoding. 750 * new conversion required. 751 */ 752 conversion = 1; 753 new_encoding = 1; 754 if (gmnp->conv_msgstr) 755 free_conv_msgstr(gmnp, 0); 756 } 757 758 if (gmnp->fd && (gmnp->fd != (iconv_t)-1)) { 759 (void) iconv_close(gmnp->fd); 760 } 761 if (gmnp->fd != (iconv_t)-1) { 762 gmnp->fd = (iconv_t)-1; 763 } 764 } 765 } 766 767 if (conversion == 0) { 768 /* no conversion */ 769 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp); 770 return (result); 771 } 772 /* conversion required */ 773 774 if (new_encoding == 0) { 775 /* dst codeset hasn't been changed since before */ 776 uint32_t *cmsg; 777 uint32_t conv_msgstr_len; 778 char *conv_msgstr; 779 780 if (gmnp->conv_msgstr[midx] == NULL) { 781 /* this msgstr hasn't been converted yet */ 782 result = conv_msg(gmnp, msgstr, msgstr_len, midx, mp); 783 return (result); 784 } 785 /* this msgstr is in the conversion cache */ 786 cmsg = (uint32_t *)(uintptr_t)gmnp->conv_msgstr[midx]; 787 conv_msgstr_len = *cmsg; 788 conv_msgstr = (char *)(cmsg + 1); 789 result = dfltmsgstr(gmnp, conv_msgstr, conv_msgstr_len, mp); 790 return (result); 791 } 792 /* new conversion */ 793 #ifdef GETTEXT_DEBUG 794 gprintf(0, "******* calling iconv_open()\n"); 795 gprintf(0, " dst: \"%s\", src: \"%s\"\n", 796 gmnp->dst_encoding, gmnp->src_encoding); 797 #endif 798 fd = iconv_open(gmnp->dst_encoding, gmnp->src_encoding); 799 gmnp->fd = fd; 800 if (fd == (iconv_t)-1) { 801 /* 802 * iconv_open() failed. 803 * no conversion 804 */ 805 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp); 806 return (result); 807 } 808 result = conv_msg(gmnp, msgstr, msgstr_len, midx, mp); 809 return (result); 810 } 811 812 813 #define PRI_STR(x, n) PRI##x##n 814 #define PRI_LEN(x, n) (char)(sizeof (PRI_STR(x, n)) - 1) 815 #define PRIS(P, x) {\ 816 /* x/N/ */ P(x, 8), P(x, 16), P(x, 32), P(x, 64), \ 817 /* xLEAST/N/ */ P(x, LEAST8), P(x, LEAST16), P(x, LEAST32), P(x, LEAST64), \ 818 /* xFAST/N/ */ P(x, FAST8), P(x, FAST16), P(x, FAST32), P(x, FAST64), \ 819 /* xMAX,PTR */ P(x, MAX), P(x, PTR) \ 820 } 821 822 #define PRI_BIAS_LEAST 4 823 #define PRI_BIAS_FAST 8 824 #define PRI_BIAS_MAX 12 825 #define PRI_BIAS_PTR 13 826 827 static const char *pri_d[] = PRIS(PRI_STR, d); 828 static const char *pri_i[] = PRIS(PRI_STR, i); 829 static const char *pri_o[] = PRIS(PRI_STR, o); 830 static const char *pri_u[] = PRIS(PRI_STR, u); 831 static const char *pri_x[] = PRIS(PRI_STR, x); 832 static const char *pri_X[] = PRIS(PRI_STR, X); 833 834 static const char pri_d_len[] = PRIS(PRI_LEN, d); 835 static const char pri_i_len[] = PRIS(PRI_LEN, i); 836 static const char pri_o_len[] = PRIS(PRI_LEN, o); 837 static const char pri_u_len[] = PRIS(PRI_LEN, u); 838 static const char pri_x_len[] = PRIS(PRI_LEN, x); 839 static const char pri_X_len[] = PRIS(PRI_LEN, X); 840 841 static struct { 842 const char type; 843 const char **str_table; 844 const char *len_table; 845 } pri_table[] = { 846 {'d', pri_d, pri_d_len}, {'i', pri_i, pri_i_len}, 847 {'o', pri_o, pri_o_len}, {'u', pri_u, pri_u_len}, 848 {'x', pri_x, pri_x_len}, {'X', pri_X, pri_X_len}, 849 }; 850 851 static struct { 852 const char *name; 853 const char nlen; 854 const char want_digits; 855 const char bias; 856 } special_table[] = { 857 {"LEAST", 5, 1, PRI_BIAS_LEAST}, 858 {"FAST", 4, 1, PRI_BIAS_FAST}, 859 {"MAX", 3, 0, PRI_BIAS_MAX}, 860 {"PTR", 3, 0, PRI_BIAS_PTR}, 861 }; 862 863 /* 864 * conv_macro() returns the conversion specifier corresponding 865 * to the macro name specified in 'name'. 'len' contains the 866 * length of the macro name including the null termination. 867 * '*elen' will be set to the length of the returning conversion 868 * specifier without the null termination. 869 */ 870 static const char * 871 conv_macro(const char *str, uint32_t len, uint32_t *lenp) 872 { 873 const char **tbl; 874 const char *ltbl; 875 char *next; 876 int n, i, num, bias, idx, want_digits; 877 878 if (len == 2) { 879 if (*str == 'I') { 880 /* Solaris does not support %I */ 881 *lenp = 0; 882 return (""); 883 } 884 return (NULL); 885 } 886 887 if (len <= 4 || strncmp(str, "PRI", 3) != 0) 888 return (NULL); 889 890 str += 3; 891 892 n = sizeof (pri_table) / sizeof (pri_table[0]); 893 for (i = 0; i < n; i++) { 894 if (pri_table[i].type == *str) 895 break; 896 } 897 if (i == n) 898 return (NULL); 899 tbl = pri_table[i].str_table; 900 ltbl = pri_table[i].len_table; 901 902 str++; 903 idx = want_digits = 0; 904 905 if (isdigit((unsigned char)*str)) { 906 /* PRIx/N/ */ 907 bias = 0; 908 want_digits = 1; 909 } else { 910 n = sizeof (special_table) / sizeof (special_table[0]); 911 for (i = 0; i < n; i++) { 912 if (strncmp(special_table[i].name, 913 str, special_table[i].nlen) == 0) { 914 break; 915 } 916 } 917 if (i == n) 918 return (NULL); 919 bias = special_table[i].bias; 920 want_digits = special_table[i].want_digits; 921 str += special_table[i].nlen; 922 } 923 924 if (want_digits) { 925 if (!isdigit((unsigned char)*str)) 926 return (NULL); 927 num = strtol(str, &next, 10); 928 /* see if it is 8/16/32/64 */ 929 for (n = 8, idx = 0; idx < 4; idx++, n *= 2) { 930 if (n == num) 931 break; 932 } 933 if (idx == 4) 934 return (NULL); 935 str = next; 936 } 937 if (*str != '\0') { 938 /* unknow format */ 939 return (NULL); 940 } 941 942 *lenp = (uint32_t)ltbl[bias + idx]; 943 return (tbl[bias + idx]); 944 } 945 946 static gnu_d_macro_t * 947 expand_macros(Msg_g_node *p) 948 { 949 char *base = (char *)p->msg_file_info; 950 struct gnu_msg_rev1_info *rev1_header = p->rev1_header; 951 struct gnu_msg_ent *d_macro_tbl; 952 gnu_d_macro_t *d_macro; 953 uint32_t num_of_d_macro, e_maclen, maclen, i; 954 const char *e_macname; 955 char *macname; 956 957 /* number of the dynamic macros */ 958 num_of_d_macro = SWAP(p, rev1_header->num_of_dynamic_macro); 959 960 d_macro = malloc((size_t)num_of_d_macro * sizeof (gnu_d_macro_t)); 961 if (d_macro == NULL) 962 return (NULL); 963 964 /* pointer to the dynamic strings table */ 965 d_macro_tbl = (struct gnu_msg_ent *)(uintptr_t) 966 (base + SWAP(p, rev1_header->off_dynamic_macro)); 967 968 for (i = 0; i < num_of_d_macro; i++) { 969 macname = base + SWAP(p, d_macro_tbl[i].offset); 970 maclen = SWAP(p, d_macro_tbl[i].len); 971 972 /* 973 * sanity check 974 * maclen includes a null termination. 975 */ 976 if (maclen != strlen(macname) + 1) { 977 free(d_macro); 978 return (NULL); 979 } 980 e_macname = conv_macro(macname, maclen, &e_maclen); 981 if (e_macname == NULL) { 982 free(d_macro); 983 return (NULL); 984 } 985 d_macro[i].len = e_maclen; 986 d_macro[i].ptr = e_macname; 987 } 988 989 return (d_macro); 990 } 991 992 static char * 993 expand_dynamic_message(Msg_g_node *p, struct gnu_msg_ent **e_msgs) 994 { 995 996 char *base = (char *)p->msg_file_info; 997 struct gnu_msg_rev1_info *rev1_header = p->rev1_header; 998 struct gnu_dynamic_tbl *d_info; 999 struct gnu_dynamic_ent *entry; 1000 gnu_d_macro_t *d_macro; 1001 uint32_t num_of_d_str, mlen, dlen, didx, i, j; 1002 uint32_t off_d_tbl; 1003 uint32_t *d_msg_off_tbl; 1004 size_t mchunk_size, used, need; 1005 char *mchunk, *msg; 1006 1007 #define MEM_INCR (1024) 1008 1009 d_macro = expand_macros(p); 1010 if (d_macro == NULL) 1011 return (NULL); 1012 1013 /* number of dynamic messages */ 1014 num_of_d_str = p->num_of_d_str; 1015 1016 mchunk = NULL; 1017 mchunk_size = 0; /* size of the allocated memory in mchunk */ 1018 used = 0; /* size of the used memory in mchunk */ 1019 for (i = MSGID; i <= MSGSTR; i++) { 1020 /* pointer to the offset table of dynamic msgids/msgstrs */ 1021 off_d_tbl = SWAP(p, 1022 i == MSGID ? rev1_header->off_dynamic_msgid_tbl : 1023 rev1_header->off_dynamic_msgstr_tbl); 1024 /* pointer to the dynamic msgids/msgstrs */ 1025 d_msg_off_tbl = (uint32_t *)(uintptr_t)(base + off_d_tbl); 1026 for (j = 0; j < num_of_d_str; j++) { 1027 e_msgs[i][j].offset = used; 1028 d_info = (struct gnu_dynamic_tbl *)(uintptr_t) 1029 (base + SWAP(p, d_msg_off_tbl[j])); 1030 entry = d_info->entry; 1031 msg = base + SWAP(p, d_info->offset); 1032 1033 for (;;) { 1034 mlen = SWAP(p, entry->len); 1035 didx = SWAP(p, entry->idx); 1036 dlen = (didx == NOMORE_DYNAMIC_MACRO) ? 0 : 1037 d_macro[didx].len; 1038 need = used + mlen + dlen; 1039 if (need >= mchunk_size) { 1040 char *t; 1041 size_t n = mchunk_size; 1042 do { 1043 n += MEM_INCR; 1044 } while (n <= need); 1045 t = realloc(mchunk, n); 1046 if (t == NULL) { 1047 free(d_macro); 1048 free(mchunk); 1049 return (NULL); 1050 } 1051 mchunk = t; 1052 mchunk_size = n; 1053 } 1054 (void) memcpy(mchunk + used, msg, (size_t)mlen); 1055 msg += mlen; 1056 used += mlen; 1057 1058 if (didx == NOMORE_DYNAMIC_MACRO) { 1059 /* 1060 * Last segment of a static 1061 * msg string contains a null 1062 * termination, so an explicit 1063 * null termination is not required 1064 * here. 1065 */ 1066 break; 1067 } 1068 (void) memcpy(mchunk + used, 1069 d_macro[didx].ptr, (size_t)dlen); 1070 used += dlen; 1071 entry++; /* to next entry */ 1072 } 1073 /* 1074 * e_msgs[][].len does not include a null termination 1075 */ 1076 e_msgs[i][j].len = used - e_msgs[i][j].offset - 1; 1077 } 1078 } 1079 1080 free(d_macro); 1081 1082 /* shrink mchunk to 'used' */ 1083 { 1084 char *t; 1085 t = realloc(mchunk, used); 1086 if (t == NULL) { 1087 free(mchunk); 1088 return (NULL); 1089 } 1090 mchunk = t; 1091 } 1092 1093 return (mchunk); 1094 } 1095 1096 static int 1097 build_rev1_info(Msg_g_node *p) 1098 { 1099 uint32_t *d_hash; 1100 uint32_t num_of_d_str, num_of_str; 1101 uint32_t idx, hash_value, hash_size; 1102 size_t hash_mem_size; 1103 size_t d_msgid_size, d_msgstr_size; 1104 char *chunk, *mchunk; 1105 int i; 1106 1107 #ifdef GETTEXT_DEBUG 1108 gprintf(0, "******* entering build_rev1_info(0x%p)\n", p); 1109 printgnumsg(p, 1); 1110 #endif 1111 1112 if (p->hash_table == NULL) { 1113 /* Revision 1 always requires the hash table */ 1114 return (-1); 1115 } 1116 1117 num_of_str = p->num_of_str; 1118 hash_size = p->hash_size; 1119 num_of_d_str = p->num_of_d_str; 1120 1121 hash_mem_size = hash_size * sizeof (uint32_t); 1122 ROUND(hash_mem_size, sizeof (struct gnu_msg_ent)); 1123 1124 d_msgid_size = num_of_d_str * sizeof (struct gnu_msg_ent); 1125 d_msgstr_size = num_of_d_str * sizeof (struct gnu_msg_ent); 1126 1127 chunk = malloc(hash_mem_size + d_msgid_size + d_msgstr_size); 1128 if (chunk == NULL) { 1129 return (-1); 1130 } 1131 1132 d_hash = (uint32_t *)(uintptr_t)chunk; 1133 p->d_msg[MSGID] = (struct gnu_msg_ent *)(uintptr_t) 1134 (chunk + hash_mem_size); 1135 p->d_msg[MSGSTR] = (struct gnu_msg_ent *)(uintptr_t) 1136 (chunk + hash_mem_size + d_msgid_size); 1137 1138 if ((mchunk = expand_dynamic_message(p, p->d_msg)) == NULL) { 1139 free(chunk); 1140 return (-1); 1141 } 1142 1143 /* copy the original hash table into the dynamic hash table */ 1144 for (i = 0; i < hash_size; i++) { 1145 d_hash[i] = SWAP(p, p->hash_table[i]); 1146 } 1147 1148 /* fill in the dynamic hash table with dynamic messages */ 1149 for (i = 0; i < num_of_d_str; i++) { 1150 hash_value = get_hashid(mchunk + p->d_msg[MSGID][i].offset, 1151 NULL); 1152 idx = get_hash_index(d_hash, hash_value, hash_size); 1153 d_hash[idx] = num_of_str + i + 1; 1154 } 1155 1156 p->mchunk = mchunk; 1157 p->hash_table = d_hash; 1158 1159 #ifdef GETTEXT_DEBUG 1160 print_rev1_info(p); 1161 gprintf(0, "******* exiting build_rev1_info()\n"); 1162 printgnumsg(p, 1); 1163 #endif 1164 1165 return (0); 1166 } 1167 1168 /* 1169 * gnu_setmsg 1170 * 1171 * INPUT 1172 * mnp - message node 1173 * addr - address to the mmapped file 1174 * size - size of the file 1175 * 1176 * RETURN 1177 * 0 - either T_GNU_MO or T_ILL_MO has been set 1178 * -1 - failed 1179 */ 1180 int 1181 gnu_setmsg(Msg_node *mnp, char *addr, size_t size) 1182 { 1183 struct gnu_msg_info *gnu_header; 1184 Msg_g_node *p; 1185 1186 #ifdef GETTEXT_DEBUG 1187 gprintf(0, "******** entering gnu_setmsg(0x%p, 0x%p, %lu)\n", 1188 (void *)mnp, addr, size); 1189 printmnp(mnp, 1); 1190 #endif 1191 1192 /* checks the GNU MAGIC number */ 1193 if (size < sizeof (struct gnu_msg_info)) { 1194 /* invalid mo file */ 1195 mnp->type = T_ILL_MO; 1196 #ifdef GETTEXT_DEBUG 1197 gprintf(0, "********* exiting gnu_setmsg\n"); 1198 printmnp(mnp, 1); 1199 #endif 1200 return (0); 1201 } 1202 1203 gnu_header = (struct gnu_msg_info *)(uintptr_t)addr; 1204 1205 p = calloc(1, sizeof (Msg_g_node)); 1206 if (p == NULL) { 1207 return (-1); 1208 } 1209 p->msg_file_info = gnu_header; 1210 1211 if (gnu_header->magic == GNU_MAGIC) { 1212 switch (gnu_header->revision) { 1213 case GNU_REVISION_0_1: 1214 case GNU_REVISION_1_1: 1215 p->flag |= ST_REV1; 1216 break; 1217 } 1218 } else if (gnu_header->magic == GNU_MAGIC_SWAPPED) { 1219 p->flag |= ST_SWP; 1220 switch (gnu_header->revision) { 1221 case GNU_REVISION_0_1_SWAPPED: 1222 case GNU_REVISION_1_1_SWAPPED: 1223 p->flag |= ST_REV1; 1224 break; 1225 } 1226 } else { 1227 /* invalid mo file */ 1228 free(p); 1229 mnp->type = T_ILL_MO; 1230 #ifdef GETTEXT_DEBUG 1231 gprintf(0, "********* exiting gnu_setmsg\n"); 1232 printmnp(mnp, 1); 1233 #endif 1234 return (0); 1235 } 1236 1237 p->fsize = size; 1238 p->num_of_str = SWAP(p, gnu_header->num_of_str); 1239 p->hash_size = SWAP(p, gnu_header->sz_hashtbl); 1240 p->hash_table = p->hash_size <= 2 ? NULL : 1241 (uint32_t *)(uintptr_t) 1242 (addr + SWAP(p, gnu_header->off_hashtbl)); 1243 1244 p->msg_tbl[MSGID] = (struct gnu_msg_ent *)(uintptr_t) 1245 (addr + SWAP(p, gnu_header->off_msgid_tbl)); 1246 p->msg_tbl[MSGSTR] = (struct gnu_msg_ent *)(uintptr_t) 1247 (addr + SWAP(p, gnu_header->off_msgstr_tbl)); 1248 1249 if (p->flag & ST_REV1) { 1250 /* Revision 1 */ 1251 struct gnu_msg_rev1_info *rev1_header; 1252 1253 rev1_header = (struct gnu_msg_rev1_info *) 1254 (uintptr_t)(addr + sizeof (struct gnu_msg_info)); 1255 p->rev1_header = rev1_header; 1256 p->num_of_d_str = SWAP(p, rev1_header->num_of_dynamic_str); 1257 if (build_rev1_info(p) == -1) { 1258 free(p); 1259 #ifdef GETTEXT_DEBUG 1260 gprintf(0, "******** exiting gnu_setmsg: " 1261 "build_rev1_info() failed\n"); 1262 #endif 1263 return (-1); 1264 } 1265 } 1266 1267 mnp->msg.gnumsg = p; 1268 mnp->type = T_GNU_MO; 1269 1270 #ifdef GETTEXT_DEBUG 1271 gprintf(0, "********* exiting gnu_setmsg\n"); 1272 printmnp(mnp, 1); 1273 #endif 1274 return (0); 1275 } 1276 1277 /* 1278 * get_hash_index 1279 * 1280 * Returns the index to an empty slot in the hash table 1281 * for the specified hash_value. 1282 */ 1283 static uint32_t 1284 get_hash_index(uint32_t *hash_tbl, uint32_t hash_value, uint32_t hash_size) 1285 { 1286 uint32_t idx, inc; 1287 1288 idx = hash_value % hash_size; 1289 inc = 1 + (hash_value % (hash_size - 2)); 1290 1291 for (;;) { 1292 if (hash_tbl[idx] == 0) { 1293 /* found an empty slot */ 1294 return (idx); 1295 } 1296 idx = (idx + inc) % hash_size; 1297 } 1298 /* NOTREACHED */ 1299 } 1300