1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include "lint.h" 30 #include "mtlib.h" 31 #include <ctype.h> 32 #include <stdio.h> 33 #include <stdlib.h> 34 #include <string.h> 35 #include <sys/types.h> 36 #include <sys/mman.h> 37 #include <sys/param.h> 38 #include <sys/stat.h> 39 #include <thread.h> 40 #include <synch.h> 41 #include <unistd.h> 42 #include <limits.h> 43 #include <errno.h> 44 #include <inttypes.h> 45 #include "libc.h" 46 #include "msgfmt.h" 47 #include "nlspath_checks.h" 48 #include "gettext.h" 49 50 #ifdef DEBUG 51 #include <assert.h> 52 #endif 53 54 /* The following symbols are just for GNU binary compatibility */ 55 int _nl_msg_cat_cntr; 56 int *_nl_domain_bindings; 57 58 static const char *nullstr = ""; 59 60 #define CHARSET_MOD "charset=" 61 #define CHARSET_LEN (sizeof (CHARSET_MOD) - 1) 62 #define NPLURALS_MOD "nplurals=" 63 #define NPLURALS_LEN (sizeof (NPLURALS_MOD) - 1) 64 #define PLURAL_MOD "plural=" 65 #define PLURAL_LEN (sizeof (PLURAL_MOD) - 1) 66 67 static uint32_t get_hash_index(uint32_t *, uint32_t, uint32_t); 68 69 /* 70 * free_conv_msgstr 71 * 72 * release the memory allocated for storing code-converted messages 73 * 74 * f 75 * 0: do not free gmnp->conv_msgstr 76 * 1: free gmnp->conv_msgstr 77 */ 78 static void 79 free_conv_msgstr(Msg_g_node *gmnp, int f) 80 { 81 uint32_t i, num_of_conv; 82 83 #ifdef GETTEXT_DEBUG 84 gprintf(0, "*************** free_conv_msgstr(0x%p, %d)\n", 85 (void *)gmnp, f); 86 printgnumsg(gmnp, 1); 87 #endif 88 89 num_of_conv = gmnp->num_of_str + gmnp->num_of_d_str; 90 for (i = 0; i < num_of_conv; i++) { 91 if (gmnp->conv_msgstr[i]) { 92 free(gmnp->conv_msgstr[i]); 93 } 94 gmnp->conv_msgstr[i] = NULL; 95 } 96 if (f) { 97 free(gmnp->conv_msgstr); 98 gmnp->conv_msgstr = NULL; 99 } 100 } 101 102 /* 103 * dfltmsgstr 104 * 105 * choose an appropriate message by evaluating the plural expression, 106 * and return it. 107 */ 108 static char * 109 dfltmsgstr(Msg_g_node *gmnp, const char *msgstr, uint32_t msgstr_len, 110 struct msg_pack *mp) 111 { 112 unsigned int pindex; 113 size_t len; 114 const char *p; 115 116 #ifdef GETTEXT_DEBUG 117 gprintf(0, "*************** dfltmsgstr(0x%p, \"%s\", %u, 0x%p)\n", 118 (void *)gmnp, 119 msgstr ? msgstr : "(null)", msgstr_len, (void *)mp); 120 printgnumsg(gmnp, 1); 121 printmp(mp, 1); 122 #endif 123 124 if (mp->plural) { 125 if (gmnp->plural) { 126 pindex = plural_eval(gmnp->plural, mp->n); 127 } else { 128 /* 129 * This mo does not have plural information. 130 * Using the English form. 131 */ 132 if (mp->n == 1) 133 pindex = 0; 134 else 135 pindex = 1; 136 } 137 #ifdef GETTEXT_DEBUG 138 gprintf(0, "plural_eval returned: %u\n", pindex); 139 #endif 140 if (pindex >= gmnp->nplurals) { 141 /* should never happen */ 142 pindex = 0; 143 } 144 p = msgstr; 145 for (; pindex != 0; pindex--) { 146 len = msgstr_len - (p - msgstr); 147 p = memchr(p, '\0', len); 148 if (p == NULL) { 149 /* 150 * null byte not found 151 * this should never happen 152 */ 153 char *result; 154 DFLTMSG(result, mp->msgid1, mp->msgid2, 155 mp->n, mp->plural); 156 return (result); 157 } 158 p++; /* skip */ 159 } 160 return ((char *)p); 161 } 162 163 return ((char *)msgstr); 164 } 165 166 /* 167 * parse_header 168 * 169 * parse the header entry of the GNU MO file and 170 * extract the src encoding and the plural information of the MO file 171 */ 172 static int 173 parse_header(const char *header, Msg_g_node *gmnp) 174 { 175 char *charset = NULL; 176 char *charset_str; 177 size_t len; 178 char *nplurals_str, *plural_str; 179 plural_expr_t plural; 180 char *p, *q; 181 unsigned int nplurals; 182 int ret; 183 184 #ifdef GETTEXT_DEBUG 185 gprintf(0, "*************** parse_header(\"%s\", 0x%p)\n", 186 header ? header : "(null)", (void *)gmnp); 187 printgnumsg(gmnp, 1); 188 #endif 189 190 if (header == NULL) { 191 gmnp->src_encoding = (char *)nullstr; 192 gmnp->nplurals = 2; 193 gmnp->plural = NULL; 194 #ifdef GETTEXT_DEBUG 195 gprintf(0, "*************** exiting parse_header\n"); 196 gprintf(0, "no header\n"); 197 #endif 198 199 return (0); 200 } 201 202 charset_str = strstr(header, CHARSET_MOD); 203 if (charset_str == NULL) { 204 gmnp->src_encoding = (char *)nullstr; 205 } else { 206 p = charset_str + CHARSET_LEN; 207 q = p; 208 while ((*q != ' ') && (*q != '\t') && 209 (*q != '\n')) { 210 q++; 211 } 212 len = q - p; 213 if (len > 0) { 214 charset = malloc(len + 1); 215 if (charset == NULL) { 216 gmnp->src_encoding = (char *)nullstr; 217 gmnp->nplurals = 2; 218 gmnp->plural = NULL; 219 return (-1); 220 } 221 (void) memcpy(charset, p, len); 222 charset[len] = '\0'; 223 gmnp->src_encoding = charset; 224 } else { 225 gmnp->src_encoding = (char *)nullstr; 226 } 227 } 228 229 nplurals_str = strstr(header, NPLURALS_MOD); 230 plural_str = strstr(header, PLURAL_MOD); 231 if (nplurals_str == NULL || plural_str == NULL) { 232 /* no valid plural specification */ 233 gmnp->nplurals = 2; 234 gmnp->plural = NULL; 235 #ifdef GETTEXT_DEBUG 236 gprintf(0, "*************** exiting parse_header\n"); 237 gprintf(0, "no plural entry\n"); 238 #endif 239 return (0); 240 } else { 241 p = nplurals_str + NPLURALS_LEN; 242 while (*p && isspace((unsigned char)*p)) { 243 p++; 244 } 245 nplurals = (unsigned int)strtol(p, &q, 10); 246 if (p != q) { 247 gmnp->nplurals = nplurals; 248 } else { 249 gmnp->nplurals = 2; 250 } 251 252 p = plural_str + PLURAL_LEN; 253 #ifdef GETTEXT_DEBUG 254 gprintf(0, "plural_str: \"%s\"\n", p); 255 #endif 256 257 ret = plural_expr(&plural, (const char *)p); 258 if (ret == 0) { 259 /* parse succeeded */ 260 gmnp->plural = plural; 261 #ifdef GETTEXT_DEBUG 262 gprintf(0, "*************** exiting parse_header\n"); 263 gprintf(0, "charset: \"%s\"\n", 264 charset ? charset : "(null)"); 265 printexpr(plural, 1); 266 #endif 267 return (0); 268 } else if (ret == 1) { 269 /* parse error */ 270 gmnp->nplurals = 2; 271 gmnp->plural = NULL; 272 return (0); 273 } else { 274 /* fatal error */ 275 if (charset) 276 free(charset); 277 gmnp->src_encoding = (char *)nullstr; 278 gmnp->nplurals = 2; 279 gmnp->plural = NULL; 280 return (-1); 281 } 282 } 283 /* NOTREACHED */ 284 } 285 286 /* 287 * handle_lang 288 * 289 * take care of the LANGUAGE specification 290 */ 291 char * 292 handle_lang(struct msg_pack *mp) 293 { 294 const char *p, *op, *q; 295 size_t locale_len; 296 char *result; 297 char locale[MAXPATHLEN]; 298 299 300 #ifdef GETTEXT_DEBUG 301 gprintf(0, "*************** handle_lang(0x%p)\n", (void *)mp); 302 printmp(mp, 1); 303 #endif 304 305 p = mp->language; 306 307 while (*p) { 308 op = p; 309 q = strchr(p, ':'); 310 if (q == NULL) { 311 locale_len = strlen(p); 312 p += locale_len; 313 } else { 314 locale_len = q - p; 315 p += locale_len + 1; 316 } 317 if (locale_len >= MAXPATHLEN || locale_len == 0) { 318 /* illegal locale name */ 319 continue; 320 } 321 (void) memcpy(locale, op, locale_len); 322 locale[locale_len] = '\0'; 323 mp->locale = locale; 324 325 #ifdef GETTEXT_DEBUG 326 *mp->msgfile = '\0'; 327 #endif 328 if (mk_msgfile(mp) == NULL) { 329 /* illegal locale name */ 330 continue; 331 } 332 333 result = handle_mo(mp); 334 if (mp->status & ST_GNU_MSG_FOUND) 335 return (result); 336 337 if (mp->status & ST_SUN_MO_FOUND) 338 break; 339 } 340 341 /* 342 * no valid locale found, Sun MO found, or 343 * GNU MO found but no valid msg found there. 344 */ 345 346 if (mp->status & ST_GNU_MO_FOUND) { 347 /* 348 * GNU MO found but no valid msg found there. 349 * returning DFLTMSG. 350 */ 351 DFLTMSG(result, mp->msgid1, mp->msgid2, mp->n, mp->plural); 352 return (result); 353 } 354 return (NULL); 355 } 356 357 /* 358 * gnu_msgsearch 359 * 360 * Searchs the translation message for the specified msgid1. 361 * Hash algorithm used in this function is Open Addressing 362 * with Double Hashing: 363 * H(k, i) = (H1(k) + i * H2(k)) mod M 364 * H1(k) = hashvalue % M 365 * H2(k) = 1 + (hashvalue % (M - 2)) 366 * 367 * Ref: The Art of Computer Programming Volume 3 368 * Sorting and Searching, second edition 369 * Donald E Knuth 370 */ 371 static char * 372 gnu_msgsearch(Msg_g_node *gmnp, const char *msgid1, 373 uint32_t *msgstrlen, uint32_t *midx) 374 { 375 struct gnu_msg_info *header = gmnp->msg_file_info; 376 struct gnu_msg_ent *msgid_tbl, *msgstr_tbl; 377 uint32_t num_of_str, idx, mlen, msglen; 378 uint32_t hash_size, hash_val, hash_id, hash_inc, hash_idx; 379 uint32_t *hash_table; 380 char *base; 381 char *msg; 382 383 #ifdef GETTEXT_DEBUG 384 gprintf(0, "*************** gnu_msgsearch(0x%p, \"%s\", " 385 "0x%p, 0x%p)\n", 386 (void *)gmnp, msgid1, msgstrlen, midx); 387 printgnumsg(gmnp, 1); 388 #endif 389 390 base = (char *)header; 391 392 msgid_tbl = gmnp->msg_tbl[MSGID]; 393 msgstr_tbl = gmnp->msg_tbl[MSGSTR]; 394 hash_table = gmnp->hash_table; 395 hash_size = gmnp->hash_size; 396 num_of_str = gmnp->num_of_str; 397 398 if (!(gmnp->flag & ST_REV1) && 399 (hash_table == NULL || (hash_size <= 2))) { 400 /* 401 * Revision 0 and 402 * No hash table exists or 403 * hash size is enough small. 404 */ 405 uint32_t top, bottom; 406 char *msg_id_str; 407 int val; 408 409 top = 0; 410 bottom = num_of_str; 411 while (top < bottom) { 412 idx = (top + bottom) / 2; 413 msg_id_str = base + 414 SWAP(gmnp, msgid_tbl[idx].offset); 415 416 val = strcmp(msg_id_str, msgid1); 417 if (val < 0) { 418 top = idx + 1; 419 } else if (val > 0) { 420 bottom = idx; 421 } else { 422 *msgstrlen = (unsigned int) 423 SWAP(gmnp, msgstr_tbl[idx].len) + 1; 424 *midx = idx; 425 return (base + 426 SWAP(gmnp, msgstr_tbl[idx].offset)); 427 } 428 } 429 /* not found */ 430 return ((char *)msgid1); 431 } 432 433 /* use hash table */ 434 hash_id = get_hashid(msgid1, &msglen); 435 hash_idx = hash_id % hash_size; 436 hash_inc = 1 + (hash_id % (hash_size - 2)); 437 438 for (;;) { 439 hash_val = HASH_TBL(gmnp, hash_table[hash_idx]); 440 441 if (hash_val == 0) { 442 /* not found */ 443 return ((char *)msgid1); 444 } 445 if (hash_val <= num_of_str) { 446 /* static message */ 447 idx = hash_val - 1; 448 mlen = SWAP(gmnp, msgid_tbl[idx].len); 449 msg = base + SWAP(gmnp, msgid_tbl[idx].offset); 450 } else { 451 if (!(gmnp->flag & ST_REV1)) { 452 /* rev 0 does not have dynamic message */ 453 return ((char *)msgid1); 454 } 455 /* dynamic message */ 456 idx = hash_val - num_of_str - 1; 457 mlen = gmnp->d_msg[MSGID][idx].len; 458 msg = gmnp->mchunk + gmnp->d_msg[MSGID][idx].offset; 459 } 460 if (msglen <= mlen && strcmp(msgid1, msg) == 0) { 461 /* found */ 462 break; 463 } 464 hash_idx = (hash_idx + hash_inc) % hash_size; 465 } 466 467 /* msgstrlen should include a null termination */ 468 if (hash_val <= num_of_str) { 469 *msgstrlen = SWAP(gmnp, msgstr_tbl[idx].len) + 1; 470 msg = base + SWAP(gmnp, msgstr_tbl[idx].offset); 471 *midx = idx; 472 } else { 473 *msgstrlen = gmnp->d_msg[MSGSTR][idx].len + 1; 474 msg = gmnp->mchunk + gmnp->d_msg[MSGSTR][idx].offset; 475 *midx = idx + num_of_str; 476 } 477 478 return (msg); 479 } 480 481 /* 482 * do_conv 483 * 484 * Converts the specified string from the src encoding 485 * to the dst encoding by calling iconv() 486 */ 487 static uint32_t * 488 do_conv(iconv_t fd, const char *src, uint32_t srclen) 489 { 490 uint32_t tolen; 491 uint32_t *ptr, *optr; 492 size_t oleft, ileft, bufsize, memincr; 493 char *to, *tptr; 494 495 #ifdef GETTEXT_DEBUG 496 gprintf(0, "*************** do_conv(" 497 "0x%p, \"%s\", %d)\n", 498 (void *)fd, src ? src : "(null)", srclen); 499 #endif 500 501 memincr = srclen * 2; 502 bufsize = memincr; 503 ileft = srclen; 504 oleft = bufsize; 505 ptr = malloc(bufsize + sizeof (uint32_t)); 506 if (ptr == NULL) { 507 return (NULL); 508 } 509 to = (char *)(ptr + 1); 510 511 for (;;) { 512 tptr = to; 513 errno = 0; 514 #ifdef GETTEXT_DEBUG 515 gprintf(0, "******* calling iconv()\n"); 516 #endif 517 if (iconv(fd, &src, &ileft, &tptr, &oleft) == (size_t)-1) { 518 if (errno == E2BIG) { 519 #ifdef GETTEXT_DEBUG 520 gprintf(0, "******* iconv detected E2BIG\n"); 521 gprintf(0, "old bufsize: %u\n", bufsize); 522 #endif 523 524 optr = realloc(ptr, 525 bufsize + memincr + sizeof (uint32_t)); 526 if (optr == NULL) { 527 free(ptr); 528 return (NULL); 529 } 530 ptr = optr; 531 to = (char *)(optr + 1); 532 to += bufsize - oleft; 533 oleft += memincr; 534 bufsize += memincr; 535 #ifdef GETTEXT_DEBUG 536 gprintf(0, "new bufsize: %u\n", bufsize); 537 #endif 538 continue; 539 } else { 540 tolen = (uint32_t)(bufsize - oleft); 541 break; 542 } 543 } 544 tolen = (uint32_t)(bufsize - oleft); 545 break; 546 } 547 548 if (tolen < bufsize) { 549 /* shrink the buffer */ 550 optr = realloc(ptr, tolen + sizeof (uint32_t)); 551 if (optr == NULL) { 552 free(ptr); 553 return (NULL); 554 } 555 ptr = optr; 556 } 557 *ptr = tolen; 558 559 #ifdef GETTEXT_DEBUG 560 gprintf(0, "******* exiting do_conv()\n"); 561 gprintf(0, "tolen: %u\n", *ptr); 562 gprintf(0, "return: 0x%p\n", ptr); 563 #endif 564 return (ptr); 565 } 566 567 /* 568 * conv_msg 569 */ 570 static char * 571 conv_msg(Msg_g_node *gmnp, char *msgstr, uint32_t msgstr_len, uint32_t midx, 572 struct msg_pack *mp) 573 { 574 uint32_t *conv_dst; 575 size_t num_of_conv, conv_msgstr_len; 576 char *conv_msgstr, *result; 577 578 if (gmnp->conv_msgstr == NULL) { 579 num_of_conv = gmnp->num_of_str + gmnp->num_of_d_str; 580 gmnp->conv_msgstr = 581 calloc((size_t)num_of_conv, sizeof (uint32_t *)); 582 if (gmnp->conv_msgstr == NULL) { 583 /* malloc failed */ 584 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp); 585 return (result); 586 } 587 } 588 589 conv_dst = do_conv(gmnp->fd, (const char *)msgstr, msgstr_len); 590 591 if (conv_dst == NULL) { 592 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp); 593 return (result); 594 } 595 conv_msgstr_len = *conv_dst; 596 gmnp->conv_msgstr[midx] = conv_dst; 597 conv_msgstr = (char *)(conv_dst + 1); 598 result = dfltmsgstr(gmnp, conv_msgstr, conv_msgstr_len, mp); 599 return (result); 600 } 601 602 /* 603 * gnu_key_2_text 604 * 605 * Extracts msgstr from the GNU MO file 606 */ 607 char * 608 gnu_key_2_text(Msg_g_node *gmnp, const char *codeset, 609 struct msg_pack *mp) 610 { 611 uint32_t msgstr_len, midx; 612 iconv_t fd; 613 char *result, *msgstr; 614 int ret, conversion, new_encoding; 615 616 #ifdef GETTEXT_DEBUG 617 gprintf(0, "*************** gnu_key_2_text(" 618 "0x%p, \"%s\", 0x%p)\n", 619 (void *)gmnp, codeset ? codeset : "(null)", (void *)mp); 620 printgnumsg(gmnp, 1); 621 printmp(mp, 1); 622 #endif 623 624 /* first checks if header entry has been processed */ 625 if (!(gmnp->flag & ST_CHK)) { 626 char *msg_header; 627 628 msg_header = gnu_msgsearch(gmnp, "", &msgstr_len, &midx); 629 ret = parse_header((const char *)msg_header, gmnp); 630 if (ret == -1) { 631 /* fatal error */ 632 DFLTMSG(result, mp->msgid1, mp->msgid2, 633 mp->n, mp->plural); 634 return (result); 635 } 636 gmnp->flag |= ST_CHK; 637 } 638 msgstr = gnu_msgsearch(gmnp, mp->msgid1, &msgstr_len, &midx); 639 if (msgstr == mp->msgid1) { 640 /* not found */ 641 DFLTMSG(result, mp->msgid1, mp->msgid2, mp->n, mp->plural); 642 return (result); 643 } 644 645 #ifdef GETTEXT_DEBUG 646 printgnumsg(gmnp, 1); 647 #endif 648 if (gmnp->dst_encoding == NULL) { 649 /* 650 * destination encoding has not been set. 651 */ 652 char *dupcodeset = strdup(codeset); 653 if (dupcodeset == NULL) { 654 /* strdup failed */ 655 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp); 656 return (result); 657 } 658 gmnp->dst_encoding = dupcodeset; 659 660 if (strcmp(gmnp->dst_encoding, gmnp->src_encoding) == 0) { 661 /* 662 * target encoding and src encoding 663 * are the same. 664 * No conversion required. 665 */ 666 conversion = 0; 667 } else { 668 /* 669 * target encoding is different from 670 * src encoding. 671 * New conversion required. 672 */ 673 /* sanity check */ 674 if (gmnp->fd && (gmnp->fd != (iconv_t)-1)) { 675 (void) iconv_close(gmnp->fd); 676 gmnp->fd = (iconv_t)-1; 677 } 678 if (gmnp->conv_msgstr) 679 free_conv_msgstr(gmnp, 0); 680 conversion = 1; 681 new_encoding = 1; 682 } 683 } else { 684 /* 685 * dst encoding has been already set. 686 */ 687 if (strcmp(gmnp->dst_encoding, codeset) == 0) { 688 /* 689 * dst encoding and target encoding are the same. 690 */ 691 if (strcmp(gmnp->dst_encoding, gmnp->src_encoding) 692 == 0) { 693 /* 694 * dst encoding and src encoding are the same. 695 * No conversion required. 696 */ 697 conversion = 0; 698 } else { 699 /* 700 * dst encoding is different from src encoding. 701 * current conversion is valid. 702 */ 703 conversion = 1; 704 new_encoding = 0; 705 /* checks if iconv_open has succeeded before */ 706 if (gmnp->fd == (iconv_t)-1) { 707 /* 708 * iconv_open should have failed before 709 * Assume this conversion is invalid 710 */ 711 conversion = 0; 712 } else { 713 if (gmnp->conv_msgstr == NULL) { 714 /* 715 * memory allocation for 716 * conv_msgstr should 717 * have failed before. 718 */ 719 new_encoding = 1; 720 if (gmnp->fd) 721 (void) iconv_close( 722 gmnp->fd); 723 gmnp->fd = (iconv_t)-1; 724 } 725 } 726 } 727 } else { 728 /* 729 * dst encoding is different from target encoding. 730 * It has changed since before. 731 */ 732 char *dupcodeset = strdup(codeset); 733 if (dupcodeset == NULL) { 734 result = dfltmsgstr(gmnp, msgstr, 735 msgstr_len, mp); 736 return (result); 737 } 738 free(gmnp->dst_encoding); 739 gmnp->dst_encoding = dupcodeset; 740 if (strcmp(gmnp->dst_encoding, gmnp->src_encoding) 741 == 0) { 742 /* 743 * dst encoding and src encoding are the same. 744 * now, no conversion required. 745 */ 746 conversion = 0; 747 if (gmnp->conv_msgstr) 748 free_conv_msgstr(gmnp, 1); 749 } else { 750 /* 751 * dst encoding is different from src encoding. 752 * new conversion required. 753 */ 754 conversion = 1; 755 new_encoding = 1; 756 if (gmnp->conv_msgstr) 757 free_conv_msgstr(gmnp, 0); 758 } 759 760 if (gmnp->fd && (gmnp->fd != (iconv_t)-1)) { 761 (void) iconv_close(gmnp->fd); 762 } 763 if (gmnp->fd != (iconv_t)-1) { 764 gmnp->fd = (iconv_t)-1; 765 } 766 } 767 } 768 769 if (conversion == 0) { 770 /* no conversion */ 771 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp); 772 return (result); 773 } 774 /* conversion required */ 775 776 if (new_encoding == 0) { 777 /* dst codeset hasn't been changed since before */ 778 uint32_t *cmsg; 779 uint32_t conv_msgstr_len; 780 char *conv_msgstr; 781 782 if (gmnp->conv_msgstr[midx] == NULL) { 783 /* this msgstr hasn't been converted yet */ 784 result = conv_msg(gmnp, msgstr, msgstr_len, midx, mp); 785 return (result); 786 } 787 /* this msgstr is in the conversion cache */ 788 cmsg = (uint32_t *)(uintptr_t)gmnp->conv_msgstr[midx]; 789 conv_msgstr_len = *cmsg; 790 conv_msgstr = (char *)(cmsg + 1); 791 result = dfltmsgstr(gmnp, conv_msgstr, conv_msgstr_len, mp); 792 return (result); 793 } 794 /* new conversion */ 795 #ifdef GETTEXT_DEBUG 796 gprintf(0, "******* calling iconv_open()\n"); 797 gprintf(0, " dst: \"%s\", src: \"%s\"\n", 798 gmnp->dst_encoding, gmnp->src_encoding); 799 #endif 800 fd = iconv_open(gmnp->dst_encoding, gmnp->src_encoding); 801 gmnp->fd = fd; 802 if (fd == (iconv_t)-1) { 803 /* 804 * iconv_open() failed. 805 * no conversion 806 */ 807 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp); 808 return (result); 809 } 810 result = conv_msg(gmnp, msgstr, msgstr_len, midx, mp); 811 return (result); 812 } 813 814 815 #define PRI_STR(x, n) PRI##x##n 816 #define PRI_LEN(x, n) (char)(sizeof (PRI_STR(x, n)) - 1) 817 #define PRIS(P, x) {\ 818 /* x/N/ */ P(x, 8), P(x, 16), P(x, 32), P(x, 64), \ 819 /* xLEAST/N/ */ P(x, LEAST8), P(x, LEAST16), P(x, LEAST32), P(x, LEAST64), \ 820 /* xFAST/N/ */ P(x, FAST8), P(x, FAST16), P(x, FAST32), P(x, FAST64), \ 821 /* xMAX,PTR */ P(x, MAX), P(x, PTR) \ 822 } 823 824 #define PRI_BIAS_LEAST 4 825 #define PRI_BIAS_FAST 8 826 #define PRI_BIAS_MAX 12 827 #define PRI_BIAS_PTR 13 828 829 static const char *pri_d[] = PRIS(PRI_STR, d); 830 static const char *pri_i[] = PRIS(PRI_STR, i); 831 static const char *pri_o[] = PRIS(PRI_STR, o); 832 static const char *pri_u[] = PRIS(PRI_STR, u); 833 static const char *pri_x[] = PRIS(PRI_STR, x); 834 static const char *pri_X[] = PRIS(PRI_STR, X); 835 836 static const char pri_d_len[] = PRIS(PRI_LEN, d); 837 static const char pri_i_len[] = PRIS(PRI_LEN, i); 838 static const char pri_o_len[] = PRIS(PRI_LEN, o); 839 static const char pri_u_len[] = PRIS(PRI_LEN, u); 840 static const char pri_x_len[] = PRIS(PRI_LEN, x); 841 static const char pri_X_len[] = PRIS(PRI_LEN, X); 842 843 static struct { 844 const char type; 845 const char **str_table; 846 const char *len_table; 847 } pri_table[] = { 848 {'d', pri_d, pri_d_len}, {'i', pri_i, pri_i_len}, 849 {'o', pri_o, pri_o_len}, {'u', pri_u, pri_u_len}, 850 {'x', pri_x, pri_x_len}, {'X', pri_X, pri_X_len}, 851 }; 852 853 static struct { 854 const char *name; 855 const char nlen; 856 const char want_digits; 857 const char bias; 858 } special_table[] = { 859 {"LEAST", 5, 1, PRI_BIAS_LEAST}, 860 {"FAST", 4, 1, PRI_BIAS_FAST}, 861 {"MAX", 3, 0, PRI_BIAS_MAX}, 862 {"PTR", 3, 0, PRI_BIAS_PTR}, 863 }; 864 865 /* 866 * conv_macro() returns the conversion specifier corresponding 867 * to the macro name specified in 'name'. 'len' contains the 868 * length of the macro name including the null termination. 869 * '*elen' will be set to the length of the returning conversion 870 * specifier without the null termination. 871 */ 872 static const char * 873 conv_macro(const char *str, uint32_t len, uint32_t *lenp) 874 { 875 const char **tbl; 876 const char *ltbl; 877 char *next; 878 int n, i, num, bias, idx, want_digits; 879 880 if (len == 2) { 881 if (*str == 'I') { 882 /* Solaris does not support %I */ 883 *lenp = 0; 884 return (""); 885 } 886 return (NULL); 887 } 888 889 if (len <= 4 || strncmp(str, "PRI", 3) != 0) 890 return (NULL); 891 892 str += 3; 893 894 n = sizeof (pri_table) / sizeof (pri_table[0]); 895 for (i = 0; i < n; i++) { 896 if (pri_table[i].type == *str) 897 break; 898 } 899 if (i == n) 900 return (NULL); 901 tbl = pri_table[i].str_table; 902 ltbl = pri_table[i].len_table; 903 904 str++; 905 idx = want_digits = 0; 906 907 if (isdigit((unsigned char)*str)) { 908 /* PRIx/N/ */ 909 bias = 0; 910 want_digits = 1; 911 } else { 912 n = sizeof (special_table) / sizeof (special_table[0]); 913 for (i = 0; i < n; i++) { 914 if (strncmp(special_table[i].name, 915 str, special_table[i].nlen) == 0) { 916 break; 917 } 918 } 919 if (i == n) 920 return (NULL); 921 bias = special_table[i].bias; 922 want_digits = special_table[i].want_digits; 923 str += special_table[i].nlen; 924 } 925 926 if (want_digits) { 927 if (!isdigit((unsigned char)*str)) 928 return (NULL); 929 num = strtol(str, &next, 10); 930 /* see if it is 8/16/32/64 */ 931 for (n = 8, idx = 0; idx < 4; idx++, n *= 2) { 932 if (n == num) 933 break; 934 } 935 if (idx == 4) 936 return (NULL); 937 str = next; 938 } 939 if (*str != '\0') { 940 /* unknow format */ 941 return (NULL); 942 } 943 944 *lenp = (uint32_t)ltbl[bias + idx]; 945 return (tbl[bias + idx]); 946 } 947 948 static gnu_d_macro_t * 949 expand_macros(Msg_g_node *p) 950 { 951 char *base = (char *)p->msg_file_info; 952 struct gnu_msg_rev1_info *rev1_header = p->rev1_header; 953 struct gnu_msg_ent *d_macro_tbl; 954 gnu_d_macro_t *d_macro; 955 uint32_t num_of_d_macro, e_maclen, maclen, i; 956 const char *e_macname; 957 char *macname; 958 959 /* number of the dynamic macros */ 960 num_of_d_macro = SWAP(p, rev1_header->num_of_dynamic_macro); 961 962 d_macro = malloc((size_t)num_of_d_macro * sizeof (gnu_d_macro_t)); 963 if (d_macro == NULL) 964 return (NULL); 965 966 /* pointer to the dynamic strings table */ 967 d_macro_tbl = (struct gnu_msg_ent *)(uintptr_t) 968 (base + SWAP(p, rev1_header->off_dynamic_macro)); 969 970 for (i = 0; i < num_of_d_macro; i++) { 971 macname = base + SWAP(p, d_macro_tbl[i].offset); 972 maclen = SWAP(p, d_macro_tbl[i].len); 973 974 /* 975 * sanity check 976 * maclen includes a null termination. 977 */ 978 if (maclen != strlen(macname) + 1) { 979 free(d_macro); 980 return (NULL); 981 } 982 e_macname = conv_macro(macname, maclen, &e_maclen); 983 if (e_macname == NULL) { 984 free(d_macro); 985 return (NULL); 986 } 987 d_macro[i].len = e_maclen; 988 d_macro[i].ptr = e_macname; 989 } 990 991 return (d_macro); 992 } 993 994 static char * 995 expand_dynamic_message(Msg_g_node *p, struct gnu_msg_ent **e_msgs) 996 { 997 998 char *base = (char *)p->msg_file_info; 999 struct gnu_msg_rev1_info *rev1_header = p->rev1_header; 1000 struct gnu_dynamic_tbl *d_info; 1001 struct gnu_dynamic_ent *entry; 1002 gnu_d_macro_t *d_macro; 1003 uint32_t num_of_d_str, mlen, dlen, didx, i, j; 1004 uint32_t off_d_tbl; 1005 uint32_t *d_msg_off_tbl; 1006 size_t mchunk_size, used, need; 1007 char *mchunk, *msg; 1008 1009 #define MEM_INCR (1024) 1010 1011 d_macro = expand_macros(p); 1012 if (d_macro == NULL) 1013 return (NULL); 1014 1015 /* number of dynamic messages */ 1016 num_of_d_str = p->num_of_d_str; 1017 1018 mchunk = NULL; 1019 mchunk_size = 0; /* size of the allocated memory in mchunk */ 1020 used = 0; /* size of the used memory in mchunk */ 1021 for (i = MSGID; i <= MSGSTR; i++) { 1022 /* pointer to the offset table of dynamic msgids/msgstrs */ 1023 off_d_tbl = SWAP(p, 1024 i == MSGID ? rev1_header->off_dynamic_msgid_tbl : 1025 rev1_header->off_dynamic_msgstr_tbl); 1026 /* pointer to the dynamic msgids/msgstrs */ 1027 d_msg_off_tbl = (uint32_t *)(uintptr_t)(base + off_d_tbl); 1028 for (j = 0; j < num_of_d_str; j++) { 1029 e_msgs[i][j].offset = used; 1030 d_info = (struct gnu_dynamic_tbl *)(uintptr_t) 1031 (base + SWAP(p, d_msg_off_tbl[j])); 1032 entry = d_info->entry; 1033 msg = base + SWAP(p, d_info->offset); 1034 1035 for (;;) { 1036 mlen = SWAP(p, entry->len); 1037 didx = SWAP(p, entry->idx); 1038 dlen = (didx == NOMORE_DYNAMIC_MACRO) ? 0 : 1039 d_macro[didx].len; 1040 need = used + mlen + dlen; 1041 if (need >= mchunk_size) { 1042 char *t; 1043 size_t n = mchunk_size; 1044 do { 1045 n += MEM_INCR; 1046 } while (n <= need); 1047 t = realloc(mchunk, n); 1048 if (t == NULL) { 1049 free(d_macro); 1050 free(mchunk); 1051 return (NULL); 1052 } 1053 mchunk = t; 1054 mchunk_size = n; 1055 } 1056 (void) memcpy(mchunk + used, msg, (size_t)mlen); 1057 msg += mlen; 1058 used += mlen; 1059 1060 if (didx == NOMORE_DYNAMIC_MACRO) { 1061 /* 1062 * Last segment of a static 1063 * msg string contains a null 1064 * termination, so an explicit 1065 * null termination is not required 1066 * here. 1067 */ 1068 break; 1069 } 1070 (void) memcpy(mchunk + used, 1071 d_macro[didx].ptr, (size_t)dlen); 1072 used += dlen; 1073 entry++; /* to next entry */ 1074 } 1075 /* 1076 * e_msgs[][].len does not include a null termination 1077 */ 1078 e_msgs[i][j].len = used - e_msgs[i][j].offset - 1; 1079 } 1080 } 1081 1082 free(d_macro); 1083 1084 /* shrink mchunk to 'used' */ 1085 { 1086 char *t; 1087 t = realloc(mchunk, used); 1088 if (t == NULL) { 1089 free(mchunk); 1090 return (NULL); 1091 } 1092 mchunk = t; 1093 } 1094 1095 return (mchunk); 1096 } 1097 1098 static int 1099 build_rev1_info(Msg_g_node *p) 1100 { 1101 uint32_t *d_hash; 1102 uint32_t num_of_d_str, num_of_str; 1103 uint32_t idx, hash_value, hash_size; 1104 size_t hash_mem_size; 1105 size_t d_msgid_size, d_msgstr_size; 1106 char *chunk, *mchunk; 1107 int i; 1108 1109 #ifdef GETTEXT_DEBUG 1110 gprintf(0, "******* entering build_rev1_info(0x%p)\n", p); 1111 printgnumsg(p, 1); 1112 #endif 1113 1114 if (p->hash_table == NULL) { 1115 /* Revision 1 always requires the hash table */ 1116 return (-1); 1117 } 1118 1119 num_of_str = p->num_of_str; 1120 hash_size = p->hash_size; 1121 num_of_d_str = p->num_of_d_str; 1122 1123 hash_mem_size = hash_size * sizeof (uint32_t); 1124 ROUND(hash_mem_size, sizeof (struct gnu_msg_ent)); 1125 1126 d_msgid_size = num_of_d_str * sizeof (struct gnu_msg_ent); 1127 d_msgstr_size = num_of_d_str * sizeof (struct gnu_msg_ent); 1128 1129 chunk = malloc(hash_mem_size + d_msgid_size + d_msgstr_size); 1130 if (chunk == NULL) { 1131 return (-1); 1132 } 1133 1134 d_hash = (uint32_t *)(uintptr_t)chunk; 1135 p->d_msg[MSGID] = (struct gnu_msg_ent *)(uintptr_t) 1136 (chunk + hash_mem_size); 1137 p->d_msg[MSGSTR] = (struct gnu_msg_ent *)(uintptr_t) 1138 (chunk + hash_mem_size + d_msgid_size); 1139 1140 if ((mchunk = expand_dynamic_message(p, p->d_msg)) == NULL) { 1141 free(chunk); 1142 return (-1); 1143 } 1144 1145 /* copy the original hash table into the dynamic hash table */ 1146 for (i = 0; i < hash_size; i++) { 1147 d_hash[i] = SWAP(p, p->hash_table[i]); 1148 } 1149 1150 /* fill in the dynamic hash table with dynamic messages */ 1151 for (i = 0; i < num_of_d_str; i++) { 1152 hash_value = get_hashid(mchunk + p->d_msg[MSGID][i].offset, 1153 NULL); 1154 idx = get_hash_index(d_hash, hash_value, hash_size); 1155 d_hash[idx] = num_of_str + i + 1; 1156 } 1157 1158 p->mchunk = mchunk; 1159 p->hash_table = d_hash; 1160 1161 #ifdef GETTEXT_DEBUG 1162 print_rev1_info(p); 1163 gprintf(0, "******* exiting build_rev1_info()\n"); 1164 printgnumsg(p, 1); 1165 #endif 1166 1167 return (0); 1168 } 1169 1170 /* 1171 * gnu_setmsg 1172 * 1173 * INPUT 1174 * mnp - message node 1175 * addr - address to the mmapped file 1176 * size - size of the file 1177 * 1178 * RETURN 1179 * 0 - either T_GNU_MO or T_ILL_MO has been set 1180 * -1 - failed 1181 */ 1182 int 1183 gnu_setmsg(Msg_node *mnp, char *addr, size_t size) 1184 { 1185 struct gnu_msg_info *gnu_header; 1186 Msg_g_node *p; 1187 1188 #ifdef GETTEXT_DEBUG 1189 gprintf(0, "******** entering gnu_setmsg(0x%p, 0x%p, %lu)\n", 1190 (void *)mnp, addr, size); 1191 printmnp(mnp, 1); 1192 #endif 1193 1194 /* checks the GNU MAGIC number */ 1195 if (size < sizeof (struct gnu_msg_info)) { 1196 /* invalid mo file */ 1197 mnp->type = T_ILL_MO; 1198 #ifdef GETTEXT_DEBUG 1199 gprintf(0, "********* exiting gnu_setmsg\n"); 1200 printmnp(mnp, 1); 1201 #endif 1202 return (0); 1203 } 1204 1205 gnu_header = (struct gnu_msg_info *)(uintptr_t)addr; 1206 1207 p = calloc(1, sizeof (Msg_g_node)); 1208 if (p == NULL) { 1209 return (-1); 1210 } 1211 p->msg_file_info = gnu_header; 1212 1213 if (gnu_header->magic == GNU_MAGIC) { 1214 switch (gnu_header->revision) { 1215 case GNU_REVISION_0_1: 1216 case GNU_REVISION_1_1: 1217 p->flag |= ST_REV1; 1218 break; 1219 } 1220 } else if (gnu_header->magic == GNU_MAGIC_SWAPPED) { 1221 p->flag |= ST_SWP; 1222 switch (gnu_header->revision) { 1223 case GNU_REVISION_0_1_SWAPPED: 1224 case GNU_REVISION_1_1_SWAPPED: 1225 p->flag |= ST_REV1; 1226 break; 1227 } 1228 } else { 1229 /* invalid mo file */ 1230 free(p); 1231 mnp->type = T_ILL_MO; 1232 #ifdef GETTEXT_DEBUG 1233 gprintf(0, "********* exiting gnu_setmsg\n"); 1234 printmnp(mnp, 1); 1235 #endif 1236 return (0); 1237 } 1238 1239 p->fsize = size; 1240 p->num_of_str = SWAP(p, gnu_header->num_of_str); 1241 p->hash_size = SWAP(p, gnu_header->sz_hashtbl); 1242 p->hash_table = p->hash_size <= 2 ? NULL : 1243 (uint32_t *)(uintptr_t) 1244 (addr + SWAP(p, gnu_header->off_hashtbl)); 1245 1246 p->msg_tbl[MSGID] = (struct gnu_msg_ent *)(uintptr_t) 1247 (addr + SWAP(p, gnu_header->off_msgid_tbl)); 1248 p->msg_tbl[MSGSTR] = (struct gnu_msg_ent *)(uintptr_t) 1249 (addr + SWAP(p, gnu_header->off_msgstr_tbl)); 1250 1251 if (p->flag & ST_REV1) { 1252 /* Revision 1 */ 1253 struct gnu_msg_rev1_info *rev1_header; 1254 1255 rev1_header = (struct gnu_msg_rev1_info *) 1256 (uintptr_t)(addr + sizeof (struct gnu_msg_info)); 1257 p->rev1_header = rev1_header; 1258 p->num_of_d_str = SWAP(p, rev1_header->num_of_dynamic_str); 1259 if (build_rev1_info(p) == -1) { 1260 free(p); 1261 #ifdef GETTEXT_DEBUG 1262 gprintf(0, "******** exiting gnu_setmsg: " 1263 "build_rev1_info() failed\n"); 1264 #endif 1265 return (-1); 1266 } 1267 } 1268 1269 mnp->msg.gnumsg = p; 1270 mnp->type = T_GNU_MO; 1271 1272 #ifdef GETTEXT_DEBUG 1273 gprintf(0, "********* exiting gnu_setmsg\n"); 1274 printmnp(mnp, 1); 1275 #endif 1276 return (0); 1277 } 1278 1279 /* 1280 * get_hash_index 1281 * 1282 * Returns the index to an empty slot in the hash table 1283 * for the specified hash_value. 1284 */ 1285 static uint32_t 1286 get_hash_index(uint32_t *hash_tbl, uint32_t hash_value, uint32_t hash_size) 1287 { 1288 uint32_t idx, inc; 1289 1290 idx = hash_value % hash_size; 1291 inc = 1 + (hash_value % (hash_size - 2)); 1292 1293 for (;;) { 1294 if (hash_tbl[idx] == 0) { 1295 /* found an empty slot */ 1296 return (idx); 1297 } 1298 idx = (idx + inc) % hash_size; 1299 } 1300 /* NOTREACHED */ 1301 } 1302