1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 #include "lint.h"
30 #include "mtlib.h"
31 #include <ctype.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <sys/types.h>
36 #include <sys/mman.h>
37 #include <sys/param.h>
38 #include <sys/stat.h>
39 #include <thread.h>
40 #include <synch.h>
41 #include <unistd.h>
42 #include <limits.h>
43 #include <errno.h>
44 #include <inttypes.h>
45 #include "libc.h"
46 #include "msgfmt.h"
47 #include "nlspath_checks.h"
48 #include "gettext.h"
49
50 #ifdef DEBUG
51 #include <assert.h>
52 #endif
53
54 /* The following symbols are just for GNU binary compatibility */
55 int _nl_msg_cat_cntr;
56 int *_nl_domain_bindings;
57
58 static const char *nullstr = "";
59
60 #define CHARSET_MOD "charset="
61 #define CHARSET_LEN (sizeof (CHARSET_MOD) - 1)
62 #define NPLURALS_MOD "nplurals="
63 #define NPLURALS_LEN (sizeof (NPLURALS_MOD) - 1)
64 #define PLURAL_MOD "plural="
65 #define PLURAL_LEN (sizeof (PLURAL_MOD) - 1)
66
67 static uint32_t get_hash_index(uint32_t *, uint32_t, uint32_t);
68
69 /*
70 * free_conv_msgstr
71 *
72 * release the memory allocated for storing code-converted messages
73 *
74 * f
75 * 0: do not free gmnp->conv_msgstr
76 * 1: free gmnp->conv_msgstr
77 */
78 static void
free_conv_msgstr(Msg_g_node * gmnp,int f)79 free_conv_msgstr(Msg_g_node *gmnp, int f)
80 {
81 uint32_t i, num_of_conv;
82
83 #ifdef GETTEXT_DEBUG
84 gprintf(0, "*************** free_conv_msgstr(0x%p, %d)\n",
85 (void *)gmnp, f);
86 printgnumsg(gmnp, 1);
87 #endif
88
89 num_of_conv = gmnp->num_of_str + gmnp->num_of_d_str;
90 for (i = 0; i < num_of_conv; i++) {
91 if (gmnp->conv_msgstr[i]) {
92 free(gmnp->conv_msgstr[i]);
93 }
94 gmnp->conv_msgstr[i] = NULL;
95 }
96 if (f) {
97 free(gmnp->conv_msgstr);
98 gmnp->conv_msgstr = NULL;
99 }
100 }
101
102 /*
103 * dfltmsgstr
104 *
105 * choose an appropriate message by evaluating the plural expression,
106 * and return it.
107 */
108 static char *
dfltmsgstr(Msg_g_node * gmnp,const char * msgstr,uint32_t msgstr_len,struct msg_pack * mp)109 dfltmsgstr(Msg_g_node *gmnp, const char *msgstr, uint32_t msgstr_len,
110 struct msg_pack *mp)
111 {
112 unsigned int pindex;
113 size_t len;
114 const char *p;
115
116 #ifdef GETTEXT_DEBUG
117 gprintf(0, "*************** dfltmsgstr(0x%p, \"%s\", %u, 0x%p)\n",
118 (void *)gmnp,
119 msgstr ? msgstr : "(null)", msgstr_len, (void *)mp);
120 printgnumsg(gmnp, 1);
121 printmp(mp, 1);
122 #endif
123
124 if (mp->plural) {
125 if (gmnp->plural) {
126 pindex = plural_eval(gmnp->plural, mp->n);
127 } else {
128 /*
129 * This mo does not have plural information.
130 * Using the English form.
131 */
132 if (mp->n == 1)
133 pindex = 0;
134 else
135 pindex = 1;
136 }
137 #ifdef GETTEXT_DEBUG
138 gprintf(0, "plural_eval returned: %u\n", pindex);
139 #endif
140 if (pindex >= gmnp->nplurals) {
141 /* should never happen */
142 pindex = 0;
143 }
144 p = msgstr;
145 for (; pindex != 0; pindex--) {
146 len = msgstr_len - (p - msgstr);
147 p = memchr(p, '\0', len);
148 if (p == NULL) {
149 /*
150 * null byte not found
151 * this should never happen
152 */
153 char *result;
154 DFLTMSG(result, mp->msgid1, mp->msgid2,
155 mp->n, mp->plural);
156 return (result);
157 }
158 p++; /* skip */
159 }
160 return ((char *)p);
161 }
162
163 return ((char *)msgstr);
164 }
165
166 /*
167 * parse_header
168 *
169 * parse the header entry of the GNU MO file and
170 * extract the src encoding and the plural information of the MO file
171 */
172 static int
parse_header(const char * header,Msg_g_node * gmnp)173 parse_header(const char *header, Msg_g_node *gmnp)
174 {
175 char *charset = NULL;
176 char *charset_str;
177 size_t len;
178 char *nplurals_str, *plural_str;
179 plural_expr_t plural;
180 char *p, *q;
181 unsigned int nplurals;
182 int ret;
183
184 #ifdef GETTEXT_DEBUG
185 gprintf(0, "*************** parse_header(\"%s\", 0x%p)\n",
186 header ? header : "(null)", (void *)gmnp);
187 printgnumsg(gmnp, 1);
188 #endif
189
190 if (header == NULL) {
191 gmnp->src_encoding = (char *)nullstr;
192 gmnp->nplurals = 2;
193 gmnp->plural = NULL;
194 #ifdef GETTEXT_DEBUG
195 gprintf(0, "*************** exiting parse_header\n");
196 gprintf(0, "no header\n");
197 #endif
198
199 return (0);
200 }
201
202 charset_str = strstr(header, CHARSET_MOD);
203 if (charset_str == NULL) {
204 gmnp->src_encoding = (char *)nullstr;
205 } else {
206 p = charset_str + CHARSET_LEN;
207 q = p;
208 while ((*q != ' ') && (*q != '\t') &&
209 (*q != '\n')) {
210 q++;
211 }
212 len = q - p;
213 if (len > 0) {
214 charset = malloc(len + 1);
215 if (charset == NULL) {
216 gmnp->src_encoding = (char *)nullstr;
217 gmnp->nplurals = 2;
218 gmnp->plural = NULL;
219 return (-1);
220 }
221 (void) memcpy(charset, p, len);
222 charset[len] = '\0';
223 gmnp->src_encoding = charset;
224 } else {
225 gmnp->src_encoding = (char *)nullstr;
226 }
227 }
228
229 nplurals_str = strstr(header, NPLURALS_MOD);
230 plural_str = strstr(header, PLURAL_MOD);
231 if (nplurals_str == NULL || plural_str == NULL) {
232 /* no valid plural specification */
233 gmnp->nplurals = 2;
234 gmnp->plural = NULL;
235 #ifdef GETTEXT_DEBUG
236 gprintf(0, "*************** exiting parse_header\n");
237 gprintf(0, "no plural entry\n");
238 #endif
239 return (0);
240 } else {
241 p = nplurals_str + NPLURALS_LEN;
242 while (*p && isspace((unsigned char)*p)) {
243 p++;
244 }
245 nplurals = (unsigned int)strtol(p, &q, 10);
246 if (p != q) {
247 gmnp->nplurals = nplurals;
248 } else {
249 gmnp->nplurals = 2;
250 }
251
252 p = plural_str + PLURAL_LEN;
253 #ifdef GETTEXT_DEBUG
254 gprintf(0, "plural_str: \"%s\"\n", p);
255 #endif
256
257 ret = plural_expr(&plural, (const char *)p);
258 if (ret == 0) {
259 /* parse succeeded */
260 gmnp->plural = plural;
261 #ifdef GETTEXT_DEBUG
262 gprintf(0, "*************** exiting parse_header\n");
263 gprintf(0, "charset: \"%s\"\n",
264 charset ? charset : "(null)");
265 printexpr(plural, 1);
266 #endif
267 return (0);
268 } else if (ret == 1) {
269 /* parse error */
270 gmnp->nplurals = 2;
271 gmnp->plural = NULL;
272 return (0);
273 } else {
274 /* fatal error */
275 if (charset)
276 free(charset);
277 gmnp->src_encoding = (char *)nullstr;
278 gmnp->nplurals = 2;
279 gmnp->plural = NULL;
280 return (-1);
281 }
282 }
283 /* NOTREACHED */
284 }
285
286 /*
287 * handle_lang
288 *
289 * take care of the LANGUAGE specification
290 */
291 char *
handle_lang(struct msg_pack * mp)292 handle_lang(struct msg_pack *mp)
293 {
294 const char *p, *op, *q;
295 size_t locale_len;
296 char *result;
297 char locale[MAXPATHLEN];
298
299
300 #ifdef GETTEXT_DEBUG
301 gprintf(0, "*************** handle_lang(0x%p)\n", (void *)mp);
302 printmp(mp, 1);
303 #endif
304
305 p = mp->language;
306
307 while (*p) {
308 op = p;
309 q = strchr(p, ':');
310 if (q == NULL) {
311 locale_len = strlen(p);
312 p += locale_len;
313 } else {
314 locale_len = q - p;
315 p += locale_len + 1;
316 }
317 if (locale_len >= MAXPATHLEN || locale_len == 0) {
318 /* illegal locale name */
319 continue;
320 }
321 (void) memcpy(locale, op, locale_len);
322 locale[locale_len] = '\0';
323 mp->locale = locale;
324
325 #ifdef GETTEXT_DEBUG
326 *mp->msgfile = '\0';
327 #endif
328 if (mk_msgfile(mp) == NULL) {
329 /* illegal locale name */
330 continue;
331 }
332
333 result = handle_mo(mp);
334 if (mp->status & ST_GNU_MSG_FOUND)
335 return (result);
336
337 if (mp->status & ST_SUN_MO_FOUND)
338 break;
339 }
340
341 /*
342 * no valid locale found, Sun MO found, or
343 * GNU MO found but no valid msg found there.
344 */
345
346 if (mp->status & ST_GNU_MO_FOUND) {
347 /*
348 * GNU MO found but no valid msg found there.
349 * returning DFLTMSG.
350 */
351 DFLTMSG(result, mp->msgid1, mp->msgid2, mp->n, mp->plural);
352 return (result);
353 }
354 return (NULL);
355 }
356
357 /*
358 * gnu_msgsearch
359 *
360 * Searchs the translation message for the specified msgid1.
361 * Hash algorithm used in this function is Open Addressing
362 * with Double Hashing:
363 * H(k, i) = (H1(k) + i * H2(k)) mod M
364 * H1(k) = hashvalue % M
365 * H2(k) = 1 + (hashvalue % (M - 2))
366 *
367 * Ref: The Art of Computer Programming Volume 3
368 * Sorting and Searching, second edition
369 * Donald E Knuth
370 */
371 static char *
gnu_msgsearch(Msg_g_node * gmnp,const char * msgid1,uint32_t * msgstrlen,uint32_t * midx)372 gnu_msgsearch(Msg_g_node *gmnp, const char *msgid1,
373 uint32_t *msgstrlen, uint32_t *midx)
374 {
375 struct gnu_msg_info *header = gmnp->msg_file_info;
376 struct gnu_msg_ent *msgid_tbl, *msgstr_tbl;
377 uint32_t num_of_str, idx, mlen, msglen;
378 uint32_t hash_size, hash_val, hash_id, hash_inc, hash_idx;
379 uint32_t *hash_table;
380 char *base;
381 char *msg;
382
383 #ifdef GETTEXT_DEBUG
384 gprintf(0, "*************** gnu_msgsearch(0x%p, \"%s\", "
385 "0x%p, 0x%p)\n",
386 (void *)gmnp, msgid1, msgstrlen, midx);
387 printgnumsg(gmnp, 1);
388 #endif
389
390 base = (char *)header;
391
392 msgid_tbl = gmnp->msg_tbl[MSGID];
393 msgstr_tbl = gmnp->msg_tbl[MSGSTR];
394 hash_table = gmnp->hash_table;
395 hash_size = gmnp->hash_size;
396 num_of_str = gmnp->num_of_str;
397
398 if (!(gmnp->flag & ST_REV1) &&
399 (hash_table == NULL || (hash_size <= 2))) {
400 /*
401 * Revision 0 and
402 * No hash table exists or
403 * hash size is enough small.
404 */
405 uint32_t top, bottom;
406 char *msg_id_str;
407 int val;
408
409 top = 0;
410 bottom = num_of_str;
411 while (top < bottom) {
412 idx = (top + bottom) / 2;
413 msg_id_str = base +
414 SWAP(gmnp, msgid_tbl[idx].offset);
415
416 val = strcmp(msg_id_str, msgid1);
417 if (val < 0) {
418 top = idx + 1;
419 } else if (val > 0) {
420 bottom = idx;
421 } else {
422 *msgstrlen = (unsigned int)
423 SWAP(gmnp, msgstr_tbl[idx].len) + 1;
424 *midx = idx;
425 return (base +
426 SWAP(gmnp, msgstr_tbl[idx].offset));
427 }
428 }
429 /* not found */
430 return ((char *)msgid1);
431 }
432
433 /* use hash table */
434 hash_id = get_hashid(msgid1, &msglen);
435 hash_idx = hash_id % hash_size;
436 hash_inc = 1 + (hash_id % (hash_size - 2));
437
438 for (;;) {
439 hash_val = HASH_TBL(gmnp, hash_table[hash_idx]);
440
441 if (hash_val == 0) {
442 /* not found */
443 return ((char *)msgid1);
444 }
445 if (hash_val <= num_of_str) {
446 /* static message */
447 idx = hash_val - 1;
448 mlen = SWAP(gmnp, msgid_tbl[idx].len);
449 msg = base + SWAP(gmnp, msgid_tbl[idx].offset);
450 } else {
451 if (!(gmnp->flag & ST_REV1)) {
452 /* rev 0 does not have dynamic message */
453 return ((char *)msgid1);
454 }
455 /* dynamic message */
456 idx = hash_val - num_of_str - 1;
457 mlen = gmnp->d_msg[MSGID][idx].len;
458 msg = gmnp->mchunk + gmnp->d_msg[MSGID][idx].offset;
459 }
460 if (msglen <= mlen && strcmp(msgid1, msg) == 0) {
461 /* found */
462 break;
463 }
464 hash_idx = (hash_idx + hash_inc) % hash_size;
465 }
466
467 /* msgstrlen should include a null termination */
468 if (hash_val <= num_of_str) {
469 *msgstrlen = SWAP(gmnp, msgstr_tbl[idx].len) + 1;
470 msg = base + SWAP(gmnp, msgstr_tbl[idx].offset);
471 *midx = idx;
472 } else {
473 *msgstrlen = gmnp->d_msg[MSGSTR][idx].len + 1;
474 msg = gmnp->mchunk + gmnp->d_msg[MSGSTR][idx].offset;
475 *midx = idx + num_of_str;
476 }
477
478 return (msg);
479 }
480
481 /*
482 * do_conv
483 *
484 * Converts the specified string from the src encoding
485 * to the dst encoding by calling iconv()
486 */
487 static uint32_t *
do_conv(iconv_t fd,const char * src,uint32_t srclen)488 do_conv(iconv_t fd, const char *src, uint32_t srclen)
489 {
490 uint32_t tolen;
491 uint32_t *ptr, *optr;
492 size_t oleft, ileft, bufsize, memincr;
493 char *to, *tptr;
494
495 #ifdef GETTEXT_DEBUG
496 gprintf(0, "*************** do_conv("
497 "0x%p, \"%s\", %d)\n",
498 (void *)fd, src ? src : "(null)", srclen);
499 #endif
500
501 memincr = srclen * 2;
502 bufsize = memincr;
503 ileft = srclen;
504 oleft = bufsize;
505 ptr = malloc(bufsize + sizeof (uint32_t));
506 if (ptr == NULL) {
507 return (NULL);
508 }
509 to = (char *)(ptr + 1);
510
511 for (;;) {
512 tptr = to;
513 errno = 0;
514 #ifdef GETTEXT_DEBUG
515 gprintf(0, "******* calling iconv()\n");
516 #endif
517 if (iconv(fd, &src, &ileft, &tptr, &oleft) == (size_t)-1) {
518 if (errno == E2BIG) {
519 #ifdef GETTEXT_DEBUG
520 gprintf(0, "******* iconv detected E2BIG\n");
521 gprintf(0, "old bufsize: %u\n", bufsize);
522 #endif
523
524 optr = realloc(ptr,
525 bufsize + memincr + sizeof (uint32_t));
526 if (optr == NULL) {
527 free(ptr);
528 return (NULL);
529 }
530 ptr = optr;
531 to = (char *)(optr + 1);
532 to += bufsize - oleft;
533 oleft += memincr;
534 bufsize += memincr;
535 #ifdef GETTEXT_DEBUG
536 gprintf(0, "new bufsize: %u\n", bufsize);
537 #endif
538 continue;
539 } else {
540 tolen = (uint32_t)(bufsize - oleft);
541 break;
542 }
543 }
544 tolen = (uint32_t)(bufsize - oleft);
545 break;
546 }
547
548 if (tolen < bufsize) {
549 /* shrink the buffer */
550 optr = realloc(ptr, tolen + sizeof (uint32_t));
551 if (optr == NULL) {
552 free(ptr);
553 return (NULL);
554 }
555 ptr = optr;
556 }
557 *ptr = tolen;
558
559 #ifdef GETTEXT_DEBUG
560 gprintf(0, "******* exiting do_conv()\n");
561 gprintf(0, "tolen: %u\n", *ptr);
562 gprintf(0, "return: 0x%p\n", ptr);
563 #endif
564 return (ptr);
565 }
566
567 /*
568 * conv_msg
569 */
570 static char *
conv_msg(Msg_g_node * gmnp,char * msgstr,uint32_t msgstr_len,uint32_t midx,struct msg_pack * mp)571 conv_msg(Msg_g_node *gmnp, char *msgstr, uint32_t msgstr_len, uint32_t midx,
572 struct msg_pack *mp)
573 {
574 uint32_t *conv_dst;
575 size_t num_of_conv, conv_msgstr_len;
576 char *conv_msgstr, *result;
577
578 if (gmnp->conv_msgstr == NULL) {
579 num_of_conv = gmnp->num_of_str + gmnp->num_of_d_str;
580 gmnp->conv_msgstr =
581 calloc((size_t)num_of_conv, sizeof (uint32_t *));
582 if (gmnp->conv_msgstr == NULL) {
583 /* malloc failed */
584 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
585 return (result);
586 }
587 }
588
589 conv_dst = do_conv(gmnp->fd, (const char *)msgstr, msgstr_len);
590
591 if (conv_dst == NULL) {
592 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
593 return (result);
594 }
595 conv_msgstr_len = *conv_dst;
596 gmnp->conv_msgstr[midx] = conv_dst;
597 conv_msgstr = (char *)(conv_dst + 1);
598 result = dfltmsgstr(gmnp, conv_msgstr, conv_msgstr_len, mp);
599 return (result);
600 }
601
602 /*
603 * gnu_key_2_text
604 *
605 * Extracts msgstr from the GNU MO file
606 */
607 char *
gnu_key_2_text(Msg_g_node * gmnp,const char * codeset,struct msg_pack * mp)608 gnu_key_2_text(Msg_g_node *gmnp, const char *codeset,
609 struct msg_pack *mp)
610 {
611 uint32_t msgstr_len, midx;
612 iconv_t fd;
613 char *result, *msgstr;
614 int ret, conversion, new_encoding;
615
616 #ifdef GETTEXT_DEBUG
617 gprintf(0, "*************** gnu_key_2_text("
618 "0x%p, \"%s\", 0x%p)\n",
619 (void *)gmnp, codeset ? codeset : "(null)", (void *)mp);
620 printgnumsg(gmnp, 1);
621 printmp(mp, 1);
622 #endif
623
624 /* first checks if header entry has been processed */
625 if (!(gmnp->flag & ST_CHK)) {
626 char *msg_header;
627
628 msg_header = gnu_msgsearch(gmnp, "", &msgstr_len, &midx);
629 ret = parse_header((const char *)msg_header, gmnp);
630 if (ret == -1) {
631 /* fatal error */
632 DFLTMSG(result, mp->msgid1, mp->msgid2,
633 mp->n, mp->plural);
634 return (result);
635 }
636 gmnp->flag |= ST_CHK;
637 }
638 msgstr = gnu_msgsearch(gmnp, mp->msgid1, &msgstr_len, &midx);
639 if (msgstr == mp->msgid1) {
640 /* not found */
641 DFLTMSG(result, mp->msgid1, mp->msgid2, mp->n, mp->plural);
642 return (result);
643 }
644
645 #ifdef GETTEXT_DEBUG
646 printgnumsg(gmnp, 1);
647 #endif
648 if (gmnp->dst_encoding == NULL) {
649 /*
650 * destination encoding has not been set.
651 */
652 char *dupcodeset = strdup(codeset);
653 if (dupcodeset == NULL) {
654 /* strdup failed */
655 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
656 return (result);
657 }
658 gmnp->dst_encoding = dupcodeset;
659
660 if (strcmp(gmnp->dst_encoding, gmnp->src_encoding) == 0) {
661 /*
662 * target encoding and src encoding
663 * are the same.
664 * No conversion required.
665 */
666 conversion = 0;
667 } else {
668 /*
669 * target encoding is different from
670 * src encoding.
671 * New conversion required.
672 */
673 /* sanity check */
674 if (gmnp->fd && (gmnp->fd != (iconv_t)-1)) {
675 (void) iconv_close(gmnp->fd);
676 gmnp->fd = (iconv_t)-1;
677 }
678 if (gmnp->conv_msgstr)
679 free_conv_msgstr(gmnp, 0);
680 conversion = 1;
681 new_encoding = 1;
682 }
683 } else {
684 /*
685 * dst encoding has been already set.
686 */
687 if (strcmp(gmnp->dst_encoding, codeset) == 0) {
688 /*
689 * dst encoding and target encoding are the same.
690 */
691 if (strcmp(gmnp->dst_encoding, gmnp->src_encoding)
692 == 0) {
693 /*
694 * dst encoding and src encoding are the same.
695 * No conversion required.
696 */
697 conversion = 0;
698 } else {
699 /*
700 * dst encoding is different from src encoding.
701 * current conversion is valid.
702 */
703 conversion = 1;
704 new_encoding = 0;
705 /* checks if iconv_open has succeeded before */
706 if (gmnp->fd == (iconv_t)-1) {
707 /*
708 * iconv_open should have failed before
709 * Assume this conversion is invalid
710 */
711 conversion = 0;
712 } else {
713 if (gmnp->conv_msgstr == NULL) {
714 /*
715 * memory allocation for
716 * conv_msgstr should
717 * have failed before.
718 */
719 new_encoding = 1;
720 if (gmnp->fd)
721 (void) iconv_close(
722 gmnp->fd);
723 gmnp->fd = (iconv_t)-1;
724 }
725 }
726 }
727 } else {
728 /*
729 * dst encoding is different from target encoding.
730 * It has changed since before.
731 */
732 char *dupcodeset = strdup(codeset);
733 if (dupcodeset == NULL) {
734 result = dfltmsgstr(gmnp, msgstr,
735 msgstr_len, mp);
736 return (result);
737 }
738 free(gmnp->dst_encoding);
739 gmnp->dst_encoding = dupcodeset;
740 if (strcmp(gmnp->dst_encoding, gmnp->src_encoding)
741 == 0) {
742 /*
743 * dst encoding and src encoding are the same.
744 * now, no conversion required.
745 */
746 conversion = 0;
747 if (gmnp->conv_msgstr)
748 free_conv_msgstr(gmnp, 1);
749 } else {
750 /*
751 * dst encoding is different from src encoding.
752 * new conversion required.
753 */
754 conversion = 1;
755 new_encoding = 1;
756 if (gmnp->conv_msgstr)
757 free_conv_msgstr(gmnp, 0);
758 }
759
760 if (gmnp->fd && (gmnp->fd != (iconv_t)-1)) {
761 (void) iconv_close(gmnp->fd);
762 }
763 if (gmnp->fd != (iconv_t)-1) {
764 gmnp->fd = (iconv_t)-1;
765 }
766 }
767 }
768
769 if (conversion == 0) {
770 /* no conversion */
771 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
772 return (result);
773 }
774 /* conversion required */
775
776 if (new_encoding == 0) {
777 /* dst codeset hasn't been changed since before */
778 uint32_t *cmsg;
779 uint32_t conv_msgstr_len;
780 char *conv_msgstr;
781
782 if (gmnp->conv_msgstr[midx] == NULL) {
783 /* this msgstr hasn't been converted yet */
784 result = conv_msg(gmnp, msgstr, msgstr_len, midx, mp);
785 return (result);
786 }
787 /* this msgstr is in the conversion cache */
788 cmsg = (uint32_t *)(uintptr_t)gmnp->conv_msgstr[midx];
789 conv_msgstr_len = *cmsg;
790 conv_msgstr = (char *)(cmsg + 1);
791 result = dfltmsgstr(gmnp, conv_msgstr, conv_msgstr_len, mp);
792 return (result);
793 }
794 /* new conversion */
795 #ifdef GETTEXT_DEBUG
796 gprintf(0, "******* calling iconv_open()\n");
797 gprintf(0, " dst: \"%s\", src: \"%s\"\n",
798 gmnp->dst_encoding, gmnp->src_encoding);
799 #endif
800 fd = iconv_open(gmnp->dst_encoding, gmnp->src_encoding);
801 gmnp->fd = fd;
802 if (fd == (iconv_t)-1) {
803 /*
804 * iconv_open() failed.
805 * no conversion
806 */
807 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
808 return (result);
809 }
810 result = conv_msg(gmnp, msgstr, msgstr_len, midx, mp);
811 return (result);
812 }
813
814
815 #define PRI_STR(x, n) PRI##x##n
816 #define PRI_LEN(x, n) (char)(sizeof (PRI_STR(x, n)) - 1)
817 #define PRIS(P, x) {\
818 /* x/N/ */ P(x, 8), P(x, 16), P(x, 32), P(x, 64), \
819 /* xLEAST/N/ */ P(x, LEAST8), P(x, LEAST16), P(x, LEAST32), P(x, LEAST64), \
820 /* xFAST/N/ */ P(x, FAST8), P(x, FAST16), P(x, FAST32), P(x, FAST64), \
821 /* xMAX,PTR */ P(x, MAX), P(x, PTR) \
822 }
823
824 #define PRI_BIAS_LEAST 4
825 #define PRI_BIAS_FAST 8
826 #define PRI_BIAS_MAX 12
827 #define PRI_BIAS_PTR 13
828
829 static const char *pri_d[] = PRIS(PRI_STR, d);
830 static const char *pri_i[] = PRIS(PRI_STR, i);
831 static const char *pri_o[] = PRIS(PRI_STR, o);
832 static const char *pri_u[] = PRIS(PRI_STR, u);
833 static const char *pri_x[] = PRIS(PRI_STR, x);
834 static const char *pri_X[] = PRIS(PRI_STR, X);
835
836 static const char pri_d_len[] = PRIS(PRI_LEN, d);
837 static const char pri_i_len[] = PRIS(PRI_LEN, i);
838 static const char pri_o_len[] = PRIS(PRI_LEN, o);
839 static const char pri_u_len[] = PRIS(PRI_LEN, u);
840 static const char pri_x_len[] = PRIS(PRI_LEN, x);
841 static const char pri_X_len[] = PRIS(PRI_LEN, X);
842
843 static struct {
844 const char type;
845 const char **str_table;
846 const char *len_table;
847 } pri_table[] = {
848 {'d', pri_d, pri_d_len}, {'i', pri_i, pri_i_len},
849 {'o', pri_o, pri_o_len}, {'u', pri_u, pri_u_len},
850 {'x', pri_x, pri_x_len}, {'X', pri_X, pri_X_len},
851 };
852
853 static struct {
854 const char *name;
855 const char nlen;
856 const char want_digits;
857 const char bias;
858 } special_table[] = {
859 {"LEAST", 5, 1, PRI_BIAS_LEAST},
860 {"FAST", 4, 1, PRI_BIAS_FAST},
861 {"MAX", 3, 0, PRI_BIAS_MAX},
862 {"PTR", 3, 0, PRI_BIAS_PTR},
863 };
864
865 /*
866 * conv_macro() returns the conversion specifier corresponding
867 * to the macro name specified in 'name'. 'len' contains the
868 * length of the macro name including the null termination.
869 * '*elen' will be set to the length of the returning conversion
870 * specifier without the null termination.
871 */
872 static const char *
conv_macro(const char * str,uint32_t len,uint32_t * lenp)873 conv_macro(const char *str, uint32_t len, uint32_t *lenp)
874 {
875 const char **tbl;
876 const char *ltbl;
877 char *next;
878 int n, i, num, bias, idx, want_digits;
879
880 if (len == 2) {
881 if (*str == 'I') {
882 /* Solaris does not support %I */
883 *lenp = 0;
884 return ("");
885 }
886 return (NULL);
887 }
888
889 if (len <= 4 || strncmp(str, "PRI", 3) != 0)
890 return (NULL);
891
892 str += 3;
893
894 n = sizeof (pri_table) / sizeof (pri_table[0]);
895 for (i = 0; i < n; i++) {
896 if (pri_table[i].type == *str)
897 break;
898 }
899 if (i == n)
900 return (NULL);
901 tbl = pri_table[i].str_table;
902 ltbl = pri_table[i].len_table;
903
904 str++;
905 idx = want_digits = 0;
906
907 if (isdigit((unsigned char)*str)) {
908 /* PRIx/N/ */
909 bias = 0;
910 want_digits = 1;
911 } else {
912 n = sizeof (special_table) / sizeof (special_table[0]);
913 for (i = 0; i < n; i++) {
914 if (strncmp(special_table[i].name,
915 str, special_table[i].nlen) == 0) {
916 break;
917 }
918 }
919 if (i == n)
920 return (NULL);
921 bias = special_table[i].bias;
922 want_digits = special_table[i].want_digits;
923 str += special_table[i].nlen;
924 }
925
926 if (want_digits) {
927 if (!isdigit((unsigned char)*str))
928 return (NULL);
929 num = strtol(str, &next, 10);
930 /* see if it is 8/16/32/64 */
931 for (n = 8, idx = 0; idx < 4; idx++, n *= 2) {
932 if (n == num)
933 break;
934 }
935 if (idx == 4)
936 return (NULL);
937 str = next;
938 }
939 if (*str != '\0') {
940 /* unknow format */
941 return (NULL);
942 }
943
944 *lenp = (uint32_t)ltbl[bias + idx];
945 return (tbl[bias + idx]);
946 }
947
948 static gnu_d_macro_t *
expand_macros(Msg_g_node * p)949 expand_macros(Msg_g_node *p)
950 {
951 char *base = (char *)p->msg_file_info;
952 struct gnu_msg_rev1_info *rev1_header = p->rev1_header;
953 struct gnu_msg_ent *d_macro_tbl;
954 gnu_d_macro_t *d_macro;
955 uint32_t num_of_d_macro, e_maclen, maclen, i;
956 const char *e_macname;
957 char *macname;
958
959 /* number of the dynamic macros */
960 num_of_d_macro = SWAP(p, rev1_header->num_of_dynamic_macro);
961
962 d_macro = malloc((size_t)num_of_d_macro * sizeof (gnu_d_macro_t));
963 if (d_macro == NULL)
964 return (NULL);
965
966 /* pointer to the dynamic strings table */
967 d_macro_tbl = (struct gnu_msg_ent *)(uintptr_t)
968 (base + SWAP(p, rev1_header->off_dynamic_macro));
969
970 for (i = 0; i < num_of_d_macro; i++) {
971 macname = base + SWAP(p, d_macro_tbl[i].offset);
972 maclen = SWAP(p, d_macro_tbl[i].len);
973
974 /*
975 * sanity check
976 * maclen includes a null termination.
977 */
978 if (maclen != strlen(macname) + 1) {
979 free(d_macro);
980 return (NULL);
981 }
982 e_macname = conv_macro(macname, maclen, &e_maclen);
983 if (e_macname == NULL) {
984 free(d_macro);
985 return (NULL);
986 }
987 d_macro[i].len = e_maclen;
988 d_macro[i].ptr = e_macname;
989 }
990
991 return (d_macro);
992 }
993
994 static char *
expand_dynamic_message(Msg_g_node * p,struct gnu_msg_ent ** e_msgs)995 expand_dynamic_message(Msg_g_node *p, struct gnu_msg_ent **e_msgs)
996 {
997
998 char *base = (char *)p->msg_file_info;
999 struct gnu_msg_rev1_info *rev1_header = p->rev1_header;
1000 struct gnu_dynamic_tbl *d_info;
1001 struct gnu_dynamic_ent *entry;
1002 gnu_d_macro_t *d_macro;
1003 uint32_t num_of_d_str, mlen, dlen, didx, i, j;
1004 uint32_t off_d_tbl;
1005 uint32_t *d_msg_off_tbl;
1006 size_t mchunk_size, used, need;
1007 char *mchunk, *msg;
1008
1009 #define MEM_INCR (1024)
1010
1011 d_macro = expand_macros(p);
1012 if (d_macro == NULL)
1013 return (NULL);
1014
1015 /* number of dynamic messages */
1016 num_of_d_str = p->num_of_d_str;
1017
1018 mchunk = NULL;
1019 mchunk_size = 0; /* size of the allocated memory in mchunk */
1020 used = 0; /* size of the used memory in mchunk */
1021 for (i = MSGID; i <= MSGSTR; i++) {
1022 /* pointer to the offset table of dynamic msgids/msgstrs */
1023 off_d_tbl = SWAP(p,
1024 i == MSGID ? rev1_header->off_dynamic_msgid_tbl :
1025 rev1_header->off_dynamic_msgstr_tbl);
1026 /* pointer to the dynamic msgids/msgstrs */
1027 d_msg_off_tbl = (uint32_t *)(uintptr_t)(base + off_d_tbl);
1028 for (j = 0; j < num_of_d_str; j++) {
1029 e_msgs[i][j].offset = used;
1030 d_info = (struct gnu_dynamic_tbl *)(uintptr_t)
1031 (base + SWAP(p, d_msg_off_tbl[j]));
1032 entry = d_info->entry;
1033 msg = base + SWAP(p, d_info->offset);
1034
1035 for (;;) {
1036 mlen = SWAP(p, entry->len);
1037 didx = SWAP(p, entry->idx);
1038 dlen = (didx == NOMORE_DYNAMIC_MACRO) ? 0 :
1039 d_macro[didx].len;
1040 need = used + mlen + dlen;
1041 if (need >= mchunk_size) {
1042 char *t;
1043 size_t n = mchunk_size;
1044 do {
1045 n += MEM_INCR;
1046 } while (n <= need);
1047 t = realloc(mchunk, n);
1048 if (t == NULL) {
1049 free(d_macro);
1050 free(mchunk);
1051 return (NULL);
1052 }
1053 mchunk = t;
1054 mchunk_size = n;
1055 }
1056 (void) memcpy(mchunk + used, msg, (size_t)mlen);
1057 msg += mlen;
1058 used += mlen;
1059
1060 if (didx == NOMORE_DYNAMIC_MACRO) {
1061 /*
1062 * Last segment of a static
1063 * msg string contains a null
1064 * termination, so an explicit
1065 * null termination is not required
1066 * here.
1067 */
1068 break;
1069 }
1070 (void) memcpy(mchunk + used,
1071 d_macro[didx].ptr, (size_t)dlen);
1072 used += dlen;
1073 entry++; /* to next entry */
1074 }
1075 /*
1076 * e_msgs[][].len does not include a null termination
1077 */
1078 e_msgs[i][j].len = used - e_msgs[i][j].offset - 1;
1079 }
1080 }
1081
1082 free(d_macro);
1083
1084 /* shrink mchunk to 'used' */
1085 {
1086 char *t;
1087 t = realloc(mchunk, used);
1088 if (t == NULL) {
1089 free(mchunk);
1090 return (NULL);
1091 }
1092 mchunk = t;
1093 }
1094
1095 return (mchunk);
1096 }
1097
1098 static int
build_rev1_info(Msg_g_node * p)1099 build_rev1_info(Msg_g_node *p)
1100 {
1101 uint32_t *d_hash;
1102 uint32_t num_of_d_str, num_of_str;
1103 uint32_t idx, hash_value, hash_size;
1104 size_t hash_mem_size;
1105 size_t d_msgid_size, d_msgstr_size;
1106 char *chunk, *mchunk;
1107 int i;
1108
1109 #ifdef GETTEXT_DEBUG
1110 gprintf(0, "******* entering build_rev1_info(0x%p)\n", p);
1111 printgnumsg(p, 1);
1112 #endif
1113
1114 if (p->hash_table == NULL) {
1115 /* Revision 1 always requires the hash table */
1116 return (-1);
1117 }
1118
1119 num_of_str = p->num_of_str;
1120 hash_size = p->hash_size;
1121 num_of_d_str = p->num_of_d_str;
1122
1123 hash_mem_size = hash_size * sizeof (uint32_t);
1124 ROUND(hash_mem_size, sizeof (struct gnu_msg_ent));
1125
1126 d_msgid_size = num_of_d_str * sizeof (struct gnu_msg_ent);
1127 d_msgstr_size = num_of_d_str * sizeof (struct gnu_msg_ent);
1128
1129 chunk = malloc(hash_mem_size + d_msgid_size + d_msgstr_size);
1130 if (chunk == NULL) {
1131 return (-1);
1132 }
1133
1134 d_hash = (uint32_t *)(uintptr_t)chunk;
1135 p->d_msg[MSGID] = (struct gnu_msg_ent *)(uintptr_t)
1136 (chunk + hash_mem_size);
1137 p->d_msg[MSGSTR] = (struct gnu_msg_ent *)(uintptr_t)
1138 (chunk + hash_mem_size + d_msgid_size);
1139
1140 if ((mchunk = expand_dynamic_message(p, p->d_msg)) == NULL) {
1141 free(chunk);
1142 return (-1);
1143 }
1144
1145 /* copy the original hash table into the dynamic hash table */
1146 for (i = 0; i < hash_size; i++) {
1147 d_hash[i] = SWAP(p, p->hash_table[i]);
1148 }
1149
1150 /* fill in the dynamic hash table with dynamic messages */
1151 for (i = 0; i < num_of_d_str; i++) {
1152 hash_value = get_hashid(mchunk + p->d_msg[MSGID][i].offset,
1153 NULL);
1154 idx = get_hash_index(d_hash, hash_value, hash_size);
1155 d_hash[idx] = num_of_str + i + 1;
1156 }
1157
1158 p->mchunk = mchunk;
1159 p->hash_table = d_hash;
1160
1161 #ifdef GETTEXT_DEBUG
1162 print_rev1_info(p);
1163 gprintf(0, "******* exiting build_rev1_info()\n");
1164 printgnumsg(p, 1);
1165 #endif
1166
1167 return (0);
1168 }
1169
1170 /*
1171 * gnu_setmsg
1172 *
1173 * INPUT
1174 * mnp - message node
1175 * addr - address to the mmapped file
1176 * size - size of the file
1177 *
1178 * RETURN
1179 * 0 - either T_GNU_MO or T_ILL_MO has been set
1180 * -1 - failed
1181 */
1182 int
gnu_setmsg(Msg_node * mnp,char * addr,size_t size)1183 gnu_setmsg(Msg_node *mnp, char *addr, size_t size)
1184 {
1185 struct gnu_msg_info *gnu_header;
1186 Msg_g_node *p;
1187
1188 #ifdef GETTEXT_DEBUG
1189 gprintf(0, "******** entering gnu_setmsg(0x%p, 0x%p, %lu)\n",
1190 (void *)mnp, addr, size);
1191 printmnp(mnp, 1);
1192 #endif
1193
1194 /* checks the GNU MAGIC number */
1195 if (size < sizeof (struct gnu_msg_info)) {
1196 /* invalid mo file */
1197 mnp->type = T_ILL_MO;
1198 #ifdef GETTEXT_DEBUG
1199 gprintf(0, "********* exiting gnu_setmsg\n");
1200 printmnp(mnp, 1);
1201 #endif
1202 return (0);
1203 }
1204
1205 gnu_header = (struct gnu_msg_info *)(uintptr_t)addr;
1206
1207 p = calloc(1, sizeof (Msg_g_node));
1208 if (p == NULL) {
1209 return (-1);
1210 }
1211 p->msg_file_info = gnu_header;
1212
1213 if (gnu_header->magic == GNU_MAGIC) {
1214 switch (gnu_header->revision) {
1215 case GNU_REVISION_0_1:
1216 case GNU_REVISION_1_1:
1217 p->flag |= ST_REV1;
1218 break;
1219 }
1220 } else if (gnu_header->magic == GNU_MAGIC_SWAPPED) {
1221 p->flag |= ST_SWP;
1222 switch (gnu_header->revision) {
1223 case GNU_REVISION_0_1_SWAPPED:
1224 case GNU_REVISION_1_1_SWAPPED:
1225 p->flag |= ST_REV1;
1226 break;
1227 }
1228 } else {
1229 /* invalid mo file */
1230 free(p);
1231 mnp->type = T_ILL_MO;
1232 #ifdef GETTEXT_DEBUG
1233 gprintf(0, "********* exiting gnu_setmsg\n");
1234 printmnp(mnp, 1);
1235 #endif
1236 return (0);
1237 }
1238
1239 p->fsize = size;
1240 p->num_of_str = SWAP(p, gnu_header->num_of_str);
1241 p->hash_size = SWAP(p, gnu_header->sz_hashtbl);
1242 p->hash_table = p->hash_size <= 2 ? NULL :
1243 (uint32_t *)(uintptr_t)
1244 (addr + SWAP(p, gnu_header->off_hashtbl));
1245
1246 p->msg_tbl[MSGID] = (struct gnu_msg_ent *)(uintptr_t)
1247 (addr + SWAP(p, gnu_header->off_msgid_tbl));
1248 p->msg_tbl[MSGSTR] = (struct gnu_msg_ent *)(uintptr_t)
1249 (addr + SWAP(p, gnu_header->off_msgstr_tbl));
1250
1251 if (p->flag & ST_REV1) {
1252 /* Revision 1 */
1253 struct gnu_msg_rev1_info *rev1_header;
1254
1255 rev1_header = (struct gnu_msg_rev1_info *)
1256 (uintptr_t)(addr + sizeof (struct gnu_msg_info));
1257 p->rev1_header = rev1_header;
1258 p->num_of_d_str = SWAP(p, rev1_header->num_of_dynamic_str);
1259 if (build_rev1_info(p) == -1) {
1260 free(p);
1261 #ifdef GETTEXT_DEBUG
1262 gprintf(0, "******** exiting gnu_setmsg: "
1263 "build_rev1_info() failed\n");
1264 #endif
1265 return (-1);
1266 }
1267 }
1268
1269 mnp->msg.gnumsg = p;
1270 mnp->type = T_GNU_MO;
1271
1272 #ifdef GETTEXT_DEBUG
1273 gprintf(0, "********* exiting gnu_setmsg\n");
1274 printmnp(mnp, 1);
1275 #endif
1276 return (0);
1277 }
1278
1279 /*
1280 * get_hash_index
1281 *
1282 * Returns the index to an empty slot in the hash table
1283 * for the specified hash_value.
1284 */
1285 static uint32_t
get_hash_index(uint32_t * hash_tbl,uint32_t hash_value,uint32_t hash_size)1286 get_hash_index(uint32_t *hash_tbl, uint32_t hash_value, uint32_t hash_size)
1287 {
1288 uint32_t idx, inc;
1289
1290 idx = hash_value % hash_size;
1291 inc = 1 + (hash_value % (hash_size - 2));
1292
1293 for (;;) {
1294 if (hash_tbl[idx] == 0) {
1295 /* found an empty slot */
1296 return (idx);
1297 }
1298 idx = (idx + inc) % hash_size;
1299 }
1300 /* NOTREACHED */
1301 }
1302