1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include "lint.h"
28 #include "mtlib.h"
29 #include <ctype.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <sys/types.h>
34 #include <sys/mman.h>
35 #include <sys/param.h>
36 #include <sys/stat.h>
37 #include <thread.h>
38 #include <synch.h>
39 #include <unistd.h>
40 #include <limits.h>
41 #include <errno.h>
42 #include <inttypes.h>
43 #include "libc.h"
44 #include "msgfmt.h"
45 #include "nlspath_checks.h"
46 #include "gettext.h"
47
48 /* The following symbols are just for GNU binary compatibility */
49 int _nl_msg_cat_cntr;
50 int *_nl_domain_bindings;
51
52 static const char *nullstr = "";
53
54 #define CHARSET_MOD "charset="
55 #define CHARSET_LEN (sizeof (CHARSET_MOD) - 1)
56 #define NPLURALS_MOD "nplurals="
57 #define NPLURALS_LEN (sizeof (NPLURALS_MOD) - 1)
58 #define PLURAL_MOD "plural="
59 #define PLURAL_LEN (sizeof (PLURAL_MOD) - 1)
60
61 static uint32_t get_hash_index(uint32_t *, uint32_t, uint32_t);
62
63 /*
64 * free_conv_msgstr
65 *
66 * release the memory allocated for storing code-converted messages
67 *
68 * f
69 * 0: do not free gmnp->conv_msgstr
70 * 1: free gmnp->conv_msgstr
71 */
72 static void
free_conv_msgstr(Msg_g_node * gmnp,int f)73 free_conv_msgstr(Msg_g_node *gmnp, int f)
74 {
75 uint32_t i, num_of_conv;
76
77 #ifdef GETTEXT_DEBUG
78 gprintf(0, "*************** free_conv_msgstr(0x%p, %d)\n",
79 (void *)gmnp, f);
80 printgnumsg(gmnp, 1);
81 #endif
82
83 num_of_conv = gmnp->num_of_str + gmnp->num_of_d_str;
84 for (i = 0; i < num_of_conv; i++) {
85 if (gmnp->conv_msgstr[i]) {
86 free(gmnp->conv_msgstr[i]);
87 }
88 gmnp->conv_msgstr[i] = NULL;
89 }
90 if (f) {
91 free(gmnp->conv_msgstr);
92 gmnp->conv_msgstr = NULL;
93 }
94 }
95
96 /*
97 * dfltmsgstr
98 *
99 * choose an appropriate message by evaluating the plural expression,
100 * and return it.
101 */
102 static char *
dfltmsgstr(Msg_g_node * gmnp,const char * msgstr,uint32_t msgstr_len,struct msg_pack * mp)103 dfltmsgstr(Msg_g_node *gmnp, const char *msgstr, uint32_t msgstr_len,
104 struct msg_pack *mp)
105 {
106 unsigned int pindex;
107 size_t len;
108 const char *p;
109
110 #ifdef GETTEXT_DEBUG
111 gprintf(0, "*************** dfltmsgstr(0x%p, \"%s\", %u, 0x%p)\n",
112 (void *)gmnp,
113 msgstr ? msgstr : "(null)", msgstr_len, (void *)mp);
114 printgnumsg(gmnp, 1);
115 printmp(mp, 1);
116 #endif
117
118 if (mp->plural) {
119 if (gmnp->plural) {
120 pindex = plural_eval(gmnp->plural, mp->n);
121 } else {
122 /*
123 * This mo does not have plural information.
124 * Using the English form.
125 */
126 if (mp->n == 1)
127 pindex = 0;
128 else
129 pindex = 1;
130 }
131 #ifdef GETTEXT_DEBUG
132 gprintf(0, "plural_eval returned: %u\n", pindex);
133 #endif
134 if (pindex >= gmnp->nplurals) {
135 /* should never happen */
136 pindex = 0;
137 }
138 p = msgstr;
139 for (; pindex != 0; pindex--) {
140 len = msgstr_len - (p - msgstr);
141 p = memchr(p, '\0', len);
142 if (p == NULL) {
143 /*
144 * null byte not found
145 * this should never happen
146 */
147 char *result;
148 DFLTMSG(result, mp->msgid1, mp->msgid2,
149 mp->n, mp->plural);
150 return (result);
151 }
152 p++; /* skip */
153 }
154 return ((char *)p);
155 }
156
157 return ((char *)msgstr);
158 }
159
160 /*
161 * parse_header
162 *
163 * parse the header entry of the GNU MO file and
164 * extract the src encoding and the plural information of the MO file
165 */
166 static int
parse_header(const char * header,Msg_g_node * gmnp)167 parse_header(const char *header, Msg_g_node *gmnp)
168 {
169 char *charset = NULL;
170 char *charset_str;
171 size_t len;
172 char *nplurals_str, *plural_str;
173 plural_expr_t plural;
174 char *p, *q;
175 unsigned int nplurals;
176 int ret;
177
178 #ifdef GETTEXT_DEBUG
179 gprintf(0, "*************** parse_header(\"%s\", 0x%p)\n",
180 header ? header : "(null)", (void *)gmnp);
181 printgnumsg(gmnp, 1);
182 #endif
183
184 if (header == NULL) {
185 gmnp->src_encoding = (char *)nullstr;
186 gmnp->nplurals = 2;
187 gmnp->plural = NULL;
188 #ifdef GETTEXT_DEBUG
189 gprintf(0, "*************** exiting parse_header\n");
190 gprintf(0, "no header\n");
191 #endif
192
193 return (0);
194 }
195
196 charset_str = strstr(header, CHARSET_MOD);
197 if (charset_str == NULL) {
198 gmnp->src_encoding = (char *)nullstr;
199 } else {
200 p = charset_str + CHARSET_LEN;
201 q = p;
202 while ((*q != ' ') && (*q != '\t') &&
203 (*q != '\n')) {
204 q++;
205 }
206 len = q - p;
207 if (len > 0) {
208 charset = malloc(len + 1);
209 if (charset == NULL) {
210 gmnp->src_encoding = (char *)nullstr;
211 gmnp->nplurals = 2;
212 gmnp->plural = NULL;
213 return (-1);
214 }
215 (void) memcpy(charset, p, len);
216 charset[len] = '\0';
217 gmnp->src_encoding = charset;
218 } else {
219 gmnp->src_encoding = (char *)nullstr;
220 }
221 }
222
223 nplurals_str = strstr(header, NPLURALS_MOD);
224 plural_str = strstr(header, PLURAL_MOD);
225 if (nplurals_str == NULL || plural_str == NULL) {
226 /* no valid plural specification */
227 gmnp->nplurals = 2;
228 gmnp->plural = NULL;
229 #ifdef GETTEXT_DEBUG
230 gprintf(0, "*************** exiting parse_header\n");
231 gprintf(0, "no plural entry\n");
232 #endif
233 return (0);
234 } else {
235 p = nplurals_str + NPLURALS_LEN;
236 while (*p && isspace((unsigned char)*p)) {
237 p++;
238 }
239 nplurals = (unsigned int)strtol(p, &q, 10);
240 if (p != q) {
241 gmnp->nplurals = nplurals;
242 } else {
243 gmnp->nplurals = 2;
244 }
245
246 p = plural_str + PLURAL_LEN;
247 #ifdef GETTEXT_DEBUG
248 gprintf(0, "plural_str: \"%s\"\n", p);
249 #endif
250
251 ret = plural_expr(&plural, (const char *)p);
252 if (ret == 0) {
253 /* parse succeeded */
254 gmnp->plural = plural;
255 #ifdef GETTEXT_DEBUG
256 gprintf(0, "*************** exiting parse_header\n");
257 gprintf(0, "charset: \"%s\"\n",
258 charset ? charset : "(null)");
259 printexpr(plural, 1);
260 #endif
261 return (0);
262 } else if (ret == 1) {
263 /* parse error */
264 gmnp->nplurals = 2;
265 gmnp->plural = NULL;
266 return (0);
267 } else {
268 /* fatal error */
269 if (charset)
270 free(charset);
271 gmnp->src_encoding = (char *)nullstr;
272 gmnp->nplurals = 2;
273 gmnp->plural = NULL;
274 return (-1);
275 }
276 }
277 /* NOTREACHED */
278 }
279
280 /*
281 * handle_lang
282 *
283 * take care of the LANGUAGE specification
284 */
285 char *
handle_lang(struct msg_pack * mp)286 handle_lang(struct msg_pack *mp)
287 {
288 const char *p, *op, *q;
289 size_t locale_len;
290 char *result;
291 char locale[MAXPATHLEN];
292
293
294 #ifdef GETTEXT_DEBUG
295 gprintf(0, "*************** handle_lang(0x%p)\n", (void *)mp);
296 printmp(mp, 1);
297 #endif
298
299 p = mp->language;
300
301 while (*p) {
302 op = p;
303 q = strchr(p, ':');
304 if (q == NULL) {
305 locale_len = strlen(p);
306 p += locale_len;
307 } else {
308 locale_len = q - p;
309 p += locale_len + 1;
310 }
311 if (locale_len >= MAXPATHLEN || locale_len == 0) {
312 /* illegal locale name */
313 continue;
314 }
315 (void) memcpy(locale, op, locale_len);
316 locale[locale_len] = '\0';
317 mp->locale = locale;
318
319 #ifdef GETTEXT_DEBUG
320 *mp->msgfile = '\0';
321 #endif
322 if (mk_msgfile(mp) == NULL) {
323 /* illegal locale name */
324 continue;
325 }
326
327 result = handle_mo(mp);
328 if (mp->status & ST_GNU_MSG_FOUND)
329 return (result);
330
331 if (mp->status & ST_SUN_MO_FOUND)
332 break;
333 }
334
335 /*
336 * no valid locale found, Sun MO found, or
337 * GNU MO found but no valid msg found there.
338 */
339
340 if (mp->status & ST_GNU_MO_FOUND) {
341 /*
342 * GNU MO found but no valid msg found there.
343 * returning DFLTMSG.
344 */
345 DFLTMSG(result, mp->msgid1, mp->msgid2, mp->n, mp->plural);
346 return (result);
347 }
348 return (NULL);
349 }
350
351 /*
352 * gnu_msgsearch
353 *
354 * Searchs the translation message for the specified msgid1.
355 * Hash algorithm used in this function is Open Addressing
356 * with Double Hashing:
357 * H(k, i) = (H1(k) + i * H2(k)) mod M
358 * H1(k) = hashvalue % M
359 * H2(k) = 1 + (hashvalue % (M - 2))
360 *
361 * Ref: The Art of Computer Programming Volume 3
362 * Sorting and Searching, second edition
363 * Donald E Knuth
364 */
365 static char *
gnu_msgsearch(Msg_g_node * gmnp,const char * msgid1,uint32_t * msgstrlen,uint32_t * midx)366 gnu_msgsearch(Msg_g_node *gmnp, const char *msgid1,
367 uint32_t *msgstrlen, uint32_t *midx)
368 {
369 struct gnu_msg_info *header = gmnp->msg_file_info;
370 struct gnu_msg_ent *msgid_tbl, *msgstr_tbl;
371 uint32_t num_of_str, idx, mlen, msglen;
372 uint32_t hash_size, hash_val, hash_id, hash_inc, hash_idx;
373 uint32_t *hash_table;
374 char *base;
375 char *msg;
376
377 #ifdef GETTEXT_DEBUG
378 gprintf(0, "*************** gnu_msgsearch(0x%p, \"%s\", "
379 "0x%p, 0x%p)\n",
380 (void *)gmnp, msgid1, msgstrlen, midx);
381 printgnumsg(gmnp, 1);
382 #endif
383
384 base = (char *)header;
385
386 msgid_tbl = gmnp->msg_tbl[MSGID];
387 msgstr_tbl = gmnp->msg_tbl[MSGSTR];
388 hash_table = gmnp->hash_table;
389 hash_size = gmnp->hash_size;
390 num_of_str = gmnp->num_of_str;
391
392 if (!(gmnp->flag & ST_REV1) &&
393 (hash_table == NULL || (hash_size <= 2))) {
394 /*
395 * Revision 0 and
396 * No hash table exists or
397 * hash size is enough small.
398 */
399 uint32_t top, bottom;
400 char *msg_id_str;
401 int val;
402
403 top = 0;
404 bottom = num_of_str;
405 while (top < bottom) {
406 idx = (top + bottom) / 2;
407 msg_id_str = base +
408 SWAP(gmnp, msgid_tbl[idx].offset);
409
410 val = strcmp(msg_id_str, msgid1);
411 if (val < 0) {
412 top = idx + 1;
413 } else if (val > 0) {
414 bottom = idx;
415 } else {
416 *msgstrlen = (unsigned int)
417 SWAP(gmnp, msgstr_tbl[idx].len) + 1;
418 *midx = idx;
419 return (base +
420 SWAP(gmnp, msgstr_tbl[idx].offset));
421 }
422 }
423 /* not found */
424 return ((char *)msgid1);
425 }
426
427 /* use hash table */
428 hash_id = get_hashid(msgid1, &msglen);
429 hash_idx = hash_id % hash_size;
430 hash_inc = 1 + (hash_id % (hash_size - 2));
431
432 for (;;) {
433 hash_val = HASH_TBL(gmnp, hash_table[hash_idx]);
434
435 if (hash_val == 0) {
436 /* not found */
437 return ((char *)msgid1);
438 }
439 if (hash_val <= num_of_str) {
440 /* static message */
441 idx = hash_val - 1;
442 mlen = SWAP(gmnp, msgid_tbl[idx].len);
443 msg = base + SWAP(gmnp, msgid_tbl[idx].offset);
444 } else {
445 if (!(gmnp->flag & ST_REV1)) {
446 /* rev 0 does not have dynamic message */
447 return ((char *)msgid1);
448 }
449 /* dynamic message */
450 idx = hash_val - num_of_str - 1;
451 mlen = gmnp->d_msg[MSGID][idx].len;
452 msg = gmnp->mchunk + gmnp->d_msg[MSGID][idx].offset;
453 }
454 if (msglen <= mlen && strcmp(msgid1, msg) == 0) {
455 /* found */
456 break;
457 }
458 hash_idx = (hash_idx + hash_inc) % hash_size;
459 }
460
461 /* msgstrlen should include a null termination */
462 if (hash_val <= num_of_str) {
463 *msgstrlen = SWAP(gmnp, msgstr_tbl[idx].len) + 1;
464 msg = base + SWAP(gmnp, msgstr_tbl[idx].offset);
465 *midx = idx;
466 } else {
467 *msgstrlen = gmnp->d_msg[MSGSTR][idx].len + 1;
468 msg = gmnp->mchunk + gmnp->d_msg[MSGSTR][idx].offset;
469 *midx = idx + num_of_str;
470 }
471
472 return (msg);
473 }
474
475 /*
476 * do_conv
477 *
478 * Converts the specified string from the src encoding
479 * to the dst encoding by calling iconv()
480 */
481 static uint32_t *
do_conv(iconv_t fd,const char * src,uint32_t srclen)482 do_conv(iconv_t fd, const char *src, uint32_t srclen)
483 {
484 uint32_t tolen;
485 uint32_t *ptr, *optr;
486 size_t oleft, ileft, bufsize, memincr;
487 char *to, *tptr;
488
489 #ifdef GETTEXT_DEBUG
490 gprintf(0, "*************** do_conv("
491 "0x%p, \"%s\", %d)\n",
492 (void *)fd, src ? src : "(null)", srclen);
493 #endif
494
495 memincr = srclen * 2;
496 bufsize = memincr;
497 ileft = srclen;
498 oleft = bufsize;
499 ptr = malloc(bufsize + sizeof (uint32_t));
500 if (ptr == NULL) {
501 return (NULL);
502 }
503 to = (char *)(ptr + 1);
504
505 for (;;) {
506 tptr = to;
507 errno = 0;
508 #ifdef GETTEXT_DEBUG
509 gprintf(0, "******* calling iconv()\n");
510 #endif
511 if (iconv(fd, &src, &ileft, &tptr, &oleft) == (size_t)-1) {
512 if (errno == E2BIG) {
513 #ifdef GETTEXT_DEBUG
514 gprintf(0, "******* iconv detected E2BIG\n");
515 gprintf(0, "old bufsize: %u\n", bufsize);
516 #endif
517
518 optr = realloc(ptr,
519 bufsize + memincr + sizeof (uint32_t));
520 if (optr == NULL) {
521 free(ptr);
522 return (NULL);
523 }
524 ptr = optr;
525 to = (char *)(optr + 1);
526 to += bufsize - oleft;
527 oleft += memincr;
528 bufsize += memincr;
529 #ifdef GETTEXT_DEBUG
530 gprintf(0, "new bufsize: %u\n", bufsize);
531 #endif
532 continue;
533 } else {
534 tolen = (uint32_t)(bufsize - oleft);
535 break;
536 }
537 }
538 tolen = (uint32_t)(bufsize - oleft);
539 break;
540 }
541
542 if (tolen < bufsize) {
543 /* shrink the buffer */
544 optr = realloc(ptr, tolen + sizeof (uint32_t));
545 if (optr == NULL) {
546 free(ptr);
547 return (NULL);
548 }
549 ptr = optr;
550 }
551 *ptr = tolen;
552
553 #ifdef GETTEXT_DEBUG
554 gprintf(0, "******* exiting do_conv()\n");
555 gprintf(0, "tolen: %u\n", *ptr);
556 gprintf(0, "return: 0x%p\n", ptr);
557 #endif
558 return (ptr);
559 }
560
561 /*
562 * conv_msg
563 */
564 static char *
conv_msg(Msg_g_node * gmnp,char * msgstr,uint32_t msgstr_len,uint32_t midx,struct msg_pack * mp)565 conv_msg(Msg_g_node *gmnp, char *msgstr, uint32_t msgstr_len, uint32_t midx,
566 struct msg_pack *mp)
567 {
568 uint32_t *conv_dst;
569 size_t num_of_conv, conv_msgstr_len;
570 char *conv_msgstr, *result;
571
572 if (gmnp->conv_msgstr == NULL) {
573 num_of_conv = gmnp->num_of_str + gmnp->num_of_d_str;
574 gmnp->conv_msgstr =
575 calloc((size_t)num_of_conv, sizeof (uint32_t *));
576 if (gmnp->conv_msgstr == NULL) {
577 /* malloc failed */
578 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
579 return (result);
580 }
581 }
582
583 conv_dst = do_conv(gmnp->fd, (const char *)msgstr, msgstr_len);
584
585 if (conv_dst == NULL) {
586 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
587 return (result);
588 }
589 conv_msgstr_len = *conv_dst;
590 gmnp->conv_msgstr[midx] = conv_dst;
591 conv_msgstr = (char *)(conv_dst + 1);
592 result = dfltmsgstr(gmnp, conv_msgstr, conv_msgstr_len, mp);
593 return (result);
594 }
595
596 /*
597 * gnu_key_2_text
598 *
599 * Extracts msgstr from the GNU MO file
600 */
601 char *
gnu_key_2_text(Msg_g_node * gmnp,const char * codeset,struct msg_pack * mp)602 gnu_key_2_text(Msg_g_node *gmnp, const char *codeset,
603 struct msg_pack *mp)
604 {
605 uint32_t msgstr_len, midx;
606 iconv_t fd;
607 char *result, *msgstr;
608 int ret, conversion, new_encoding;
609
610 #ifdef GETTEXT_DEBUG
611 gprintf(0, "*************** gnu_key_2_text("
612 "0x%p, \"%s\", 0x%p)\n",
613 (void *)gmnp, codeset ? codeset : "(null)", (void *)mp);
614 printgnumsg(gmnp, 1);
615 printmp(mp, 1);
616 #endif
617
618 /* first checks if header entry has been processed */
619 if (!(gmnp->flag & ST_CHK)) {
620 char *msg_header;
621
622 msg_header = gnu_msgsearch(gmnp, "", &msgstr_len, &midx);
623 ret = parse_header((const char *)msg_header, gmnp);
624 if (ret == -1) {
625 /* fatal error */
626 DFLTMSG(result, mp->msgid1, mp->msgid2,
627 mp->n, mp->plural);
628 return (result);
629 }
630 gmnp->flag |= ST_CHK;
631 }
632 msgstr = gnu_msgsearch(gmnp, mp->msgid1, &msgstr_len, &midx);
633 if (msgstr == mp->msgid1) {
634 /* not found */
635 DFLTMSG(result, mp->msgid1, mp->msgid2, mp->n, mp->plural);
636 return (result);
637 }
638
639 #ifdef GETTEXT_DEBUG
640 printgnumsg(gmnp, 1);
641 #endif
642 if (gmnp->dst_encoding == NULL) {
643 /*
644 * destination encoding has not been set.
645 */
646 char *dupcodeset = strdup(codeset);
647 if (dupcodeset == NULL) {
648 /* strdup failed */
649 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
650 return (result);
651 }
652 gmnp->dst_encoding = dupcodeset;
653
654 if (strcmp(gmnp->dst_encoding, gmnp->src_encoding) == 0) {
655 /*
656 * target encoding and src encoding
657 * are the same.
658 * No conversion required.
659 */
660 conversion = 0;
661 } else {
662 /*
663 * target encoding is different from
664 * src encoding.
665 * New conversion required.
666 */
667 /* sanity check */
668 if (gmnp->fd && (gmnp->fd != (iconv_t)-1)) {
669 (void) iconv_close(gmnp->fd);
670 gmnp->fd = (iconv_t)-1;
671 }
672 if (gmnp->conv_msgstr)
673 free_conv_msgstr(gmnp, 0);
674 conversion = 1;
675 new_encoding = 1;
676 }
677 } else {
678 /*
679 * dst encoding has been already set.
680 */
681 if (strcmp(gmnp->dst_encoding, codeset) == 0) {
682 /*
683 * dst encoding and target encoding are the same.
684 */
685 if (strcmp(gmnp->dst_encoding, gmnp->src_encoding)
686 == 0) {
687 /*
688 * dst encoding and src encoding are the same.
689 * No conversion required.
690 */
691 conversion = 0;
692 } else {
693 /*
694 * dst encoding is different from src encoding.
695 * current conversion is valid.
696 */
697 conversion = 1;
698 new_encoding = 0;
699 /* checks if iconv_open has succeeded before */
700 if (gmnp->fd == (iconv_t)-1) {
701 /*
702 * iconv_open should have failed before
703 * Assume this conversion is invalid
704 */
705 conversion = 0;
706 } else {
707 if (gmnp->conv_msgstr == NULL) {
708 /*
709 * memory allocation for
710 * conv_msgstr should
711 * have failed before.
712 */
713 new_encoding = 1;
714 if (gmnp->fd)
715 (void) iconv_close(
716 gmnp->fd);
717 gmnp->fd = (iconv_t)-1;
718 }
719 }
720 }
721 } else {
722 /*
723 * dst encoding is different from target encoding.
724 * It has changed since before.
725 */
726 char *dupcodeset = strdup(codeset);
727 if (dupcodeset == NULL) {
728 result = dfltmsgstr(gmnp, msgstr,
729 msgstr_len, mp);
730 return (result);
731 }
732 free(gmnp->dst_encoding);
733 gmnp->dst_encoding = dupcodeset;
734 if (strcmp(gmnp->dst_encoding, gmnp->src_encoding)
735 == 0) {
736 /*
737 * dst encoding and src encoding are the same.
738 * now, no conversion required.
739 */
740 conversion = 0;
741 if (gmnp->conv_msgstr)
742 free_conv_msgstr(gmnp, 1);
743 } else {
744 /*
745 * dst encoding is different from src encoding.
746 * new conversion required.
747 */
748 conversion = 1;
749 new_encoding = 1;
750 if (gmnp->conv_msgstr)
751 free_conv_msgstr(gmnp, 0);
752 }
753
754 if (gmnp->fd && (gmnp->fd != (iconv_t)-1)) {
755 (void) iconv_close(gmnp->fd);
756 }
757 if (gmnp->fd != (iconv_t)-1) {
758 gmnp->fd = (iconv_t)-1;
759 }
760 }
761 }
762
763 if (conversion == 0) {
764 /* no conversion */
765 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
766 return (result);
767 }
768 /* conversion required */
769
770 if (new_encoding == 0) {
771 /* dst codeset hasn't been changed since before */
772 uint32_t *cmsg;
773 uint32_t conv_msgstr_len;
774 char *conv_msgstr;
775
776 if (gmnp->conv_msgstr[midx] == NULL) {
777 /* this msgstr hasn't been converted yet */
778 result = conv_msg(gmnp, msgstr, msgstr_len, midx, mp);
779 return (result);
780 }
781 /* this msgstr is in the conversion cache */
782 cmsg = (uint32_t *)(uintptr_t)gmnp->conv_msgstr[midx];
783 conv_msgstr_len = *cmsg;
784 conv_msgstr = (char *)(cmsg + 1);
785 result = dfltmsgstr(gmnp, conv_msgstr, conv_msgstr_len, mp);
786 return (result);
787 }
788 /* new conversion */
789 #ifdef GETTEXT_DEBUG
790 gprintf(0, "******* calling iconv_open()\n");
791 gprintf(0, " dst: \"%s\", src: \"%s\"\n",
792 gmnp->dst_encoding, gmnp->src_encoding);
793 #endif
794 fd = iconv_open(gmnp->dst_encoding, gmnp->src_encoding);
795 gmnp->fd = fd;
796 if (fd == (iconv_t)-1) {
797 /*
798 * iconv_open() failed.
799 * no conversion
800 */
801 result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
802 return (result);
803 }
804 result = conv_msg(gmnp, msgstr, msgstr_len, midx, mp);
805 return (result);
806 }
807
808
809 #define PRI_STR(x, n) PRI##x##n
810 #define PRI_LEN(x, n) (char)(sizeof (PRI_STR(x, n)) - 1)
811 #define PRIS(P, x) {\
812 /* x/N/ */ P(x, 8), P(x, 16), P(x, 32), P(x, 64), \
813 /* xLEAST/N/ */ P(x, LEAST8), P(x, LEAST16), P(x, LEAST32), P(x, LEAST64), \
814 /* xFAST/N/ */ P(x, FAST8), P(x, FAST16), P(x, FAST32), P(x, FAST64), \
815 /* xMAX,PTR */ P(x, MAX), P(x, PTR) \
816 }
817
818 #define PRI_BIAS_LEAST 4
819 #define PRI_BIAS_FAST 8
820 #define PRI_BIAS_MAX 12
821 #define PRI_BIAS_PTR 13
822
823 static const char *pri_d[] = PRIS(PRI_STR, d);
824 static const char *pri_i[] = PRIS(PRI_STR, i);
825 static const char *pri_o[] = PRIS(PRI_STR, o);
826 static const char *pri_u[] = PRIS(PRI_STR, u);
827 static const char *pri_x[] = PRIS(PRI_STR, x);
828 static const char *pri_X[] = PRIS(PRI_STR, X);
829
830 static const char pri_d_len[] = PRIS(PRI_LEN, d);
831 static const char pri_i_len[] = PRIS(PRI_LEN, i);
832 static const char pri_o_len[] = PRIS(PRI_LEN, o);
833 static const char pri_u_len[] = PRIS(PRI_LEN, u);
834 static const char pri_x_len[] = PRIS(PRI_LEN, x);
835 static const char pri_X_len[] = PRIS(PRI_LEN, X);
836
837 static struct {
838 const char type;
839 const char **str_table;
840 const char *len_table;
841 } pri_table[] = {
842 {'d', pri_d, pri_d_len}, {'i', pri_i, pri_i_len},
843 {'o', pri_o, pri_o_len}, {'u', pri_u, pri_u_len},
844 {'x', pri_x, pri_x_len}, {'X', pri_X, pri_X_len},
845 };
846
847 static struct {
848 const char *name;
849 const char nlen;
850 const char want_digits;
851 const char bias;
852 } special_table[] = {
853 {"LEAST", 5, 1, PRI_BIAS_LEAST},
854 {"FAST", 4, 1, PRI_BIAS_FAST},
855 {"MAX", 3, 0, PRI_BIAS_MAX},
856 {"PTR", 3, 0, PRI_BIAS_PTR},
857 };
858
859 /*
860 * conv_macro() returns the conversion specifier corresponding
861 * to the macro name specified in 'name'. 'len' contains the
862 * length of the macro name including the null termination.
863 * '*elen' will be set to the length of the returning conversion
864 * specifier without the null termination.
865 */
866 static const char *
conv_macro(const char * str,uint32_t len,uint32_t * lenp)867 conv_macro(const char *str, uint32_t len, uint32_t *lenp)
868 {
869 const char **tbl;
870 const char *ltbl;
871 char *next;
872 int n, i, num, bias, idx, want_digits;
873
874 if (len == 2) {
875 if (*str == 'I') {
876 /* Solaris does not support %I */
877 *lenp = 0;
878 return ("");
879 }
880 return (NULL);
881 }
882
883 if (len <= 4 || strncmp(str, "PRI", 3) != 0)
884 return (NULL);
885
886 str += 3;
887
888 n = sizeof (pri_table) / sizeof (pri_table[0]);
889 for (i = 0; i < n; i++) {
890 if (pri_table[i].type == *str)
891 break;
892 }
893 if (i == n)
894 return (NULL);
895 tbl = pri_table[i].str_table;
896 ltbl = pri_table[i].len_table;
897
898 str++;
899 idx = want_digits = 0;
900
901 if (isdigit((unsigned char)*str)) {
902 /* PRIx/N/ */
903 bias = 0;
904 want_digits = 1;
905 } else {
906 n = sizeof (special_table) / sizeof (special_table[0]);
907 for (i = 0; i < n; i++) {
908 if (strncmp(special_table[i].name,
909 str, special_table[i].nlen) == 0) {
910 break;
911 }
912 }
913 if (i == n)
914 return (NULL);
915 bias = special_table[i].bias;
916 want_digits = special_table[i].want_digits;
917 str += special_table[i].nlen;
918 }
919
920 if (want_digits) {
921 if (!isdigit((unsigned char)*str))
922 return (NULL);
923 num = strtol(str, &next, 10);
924 /* see if it is 8/16/32/64 */
925 for (n = 8, idx = 0; idx < 4; idx++, n *= 2) {
926 if (n == num)
927 break;
928 }
929 if (idx == 4)
930 return (NULL);
931 str = next;
932 }
933 if (*str != '\0') {
934 /* unknow format */
935 return (NULL);
936 }
937
938 *lenp = (uint32_t)ltbl[bias + idx];
939 return (tbl[bias + idx]);
940 }
941
942 static gnu_d_macro_t *
expand_macros(Msg_g_node * p)943 expand_macros(Msg_g_node *p)
944 {
945 char *base = (char *)p->msg_file_info;
946 struct gnu_msg_rev1_info *rev1_header = p->rev1_header;
947 struct gnu_msg_ent *d_macro_tbl;
948 gnu_d_macro_t *d_macro;
949 uint32_t num_of_d_macro, e_maclen, maclen, i;
950 const char *e_macname;
951 char *macname;
952
953 /* number of the dynamic macros */
954 num_of_d_macro = SWAP(p, rev1_header->num_of_dynamic_macro);
955
956 d_macro = malloc((size_t)num_of_d_macro * sizeof (gnu_d_macro_t));
957 if (d_macro == NULL)
958 return (NULL);
959
960 /* pointer to the dynamic strings table */
961 d_macro_tbl = (struct gnu_msg_ent *)(uintptr_t)
962 (base + SWAP(p, rev1_header->off_dynamic_macro));
963
964 for (i = 0; i < num_of_d_macro; i++) {
965 macname = base + SWAP(p, d_macro_tbl[i].offset);
966 maclen = SWAP(p, d_macro_tbl[i].len);
967
968 /*
969 * sanity check
970 * maclen includes a null termination.
971 */
972 if (maclen != strlen(macname) + 1) {
973 free(d_macro);
974 return (NULL);
975 }
976 e_macname = conv_macro(macname, maclen, &e_maclen);
977 if (e_macname == NULL) {
978 free(d_macro);
979 return (NULL);
980 }
981 d_macro[i].len = e_maclen;
982 d_macro[i].ptr = e_macname;
983 }
984
985 return (d_macro);
986 }
987
988 static char *
expand_dynamic_message(Msg_g_node * p,struct gnu_msg_ent ** e_msgs)989 expand_dynamic_message(Msg_g_node *p, struct gnu_msg_ent **e_msgs)
990 {
991
992 char *base = (char *)p->msg_file_info;
993 struct gnu_msg_rev1_info *rev1_header = p->rev1_header;
994 struct gnu_dynamic_tbl *d_info;
995 struct gnu_dynamic_ent *entry;
996 gnu_d_macro_t *d_macro;
997 uint32_t num_of_d_str, mlen, dlen, didx, i, j;
998 uint32_t off_d_tbl;
999 uint32_t *d_msg_off_tbl;
1000 size_t mchunk_size, used, need;
1001 char *mchunk, *msg;
1002
1003 #define MEM_INCR (1024)
1004
1005 d_macro = expand_macros(p);
1006 if (d_macro == NULL)
1007 return (NULL);
1008
1009 /* number of dynamic messages */
1010 num_of_d_str = p->num_of_d_str;
1011
1012 mchunk = NULL;
1013 mchunk_size = 0; /* size of the allocated memory in mchunk */
1014 used = 0; /* size of the used memory in mchunk */
1015 for (i = MSGID; i <= MSGSTR; i++) {
1016 /* pointer to the offset table of dynamic msgids/msgstrs */
1017 off_d_tbl = SWAP(p,
1018 i == MSGID ? rev1_header->off_dynamic_msgid_tbl :
1019 rev1_header->off_dynamic_msgstr_tbl);
1020 /* pointer to the dynamic msgids/msgstrs */
1021 d_msg_off_tbl = (uint32_t *)(uintptr_t)(base + off_d_tbl);
1022 for (j = 0; j < num_of_d_str; j++) {
1023 e_msgs[i][j].offset = used;
1024 d_info = (struct gnu_dynamic_tbl *)(uintptr_t)
1025 (base + SWAP(p, d_msg_off_tbl[j]));
1026 entry = d_info->entry;
1027 msg = base + SWAP(p, d_info->offset);
1028
1029 for (;;) {
1030 mlen = SWAP(p, entry->len);
1031 didx = SWAP(p, entry->idx);
1032 dlen = (didx == NOMORE_DYNAMIC_MACRO) ? 0 :
1033 d_macro[didx].len;
1034 need = used + mlen + dlen;
1035 if (need >= mchunk_size) {
1036 char *t;
1037 size_t n = mchunk_size;
1038 do {
1039 n += MEM_INCR;
1040 } while (n <= need);
1041 t = realloc(mchunk, n);
1042 if (t == NULL) {
1043 free(d_macro);
1044 free(mchunk);
1045 return (NULL);
1046 }
1047 mchunk = t;
1048 mchunk_size = n;
1049 }
1050 (void) memcpy(mchunk + used, msg, (size_t)mlen);
1051 msg += mlen;
1052 used += mlen;
1053
1054 if (didx == NOMORE_DYNAMIC_MACRO) {
1055 /*
1056 * Last segment of a static
1057 * msg string contains a null
1058 * termination, so an explicit
1059 * null termination is not required
1060 * here.
1061 */
1062 break;
1063 }
1064 (void) memcpy(mchunk + used,
1065 d_macro[didx].ptr, (size_t)dlen);
1066 used += dlen;
1067 entry++; /* to next entry */
1068 }
1069 /*
1070 * e_msgs[][].len does not include a null termination
1071 */
1072 e_msgs[i][j].len = used - e_msgs[i][j].offset - 1;
1073 }
1074 }
1075
1076 free(d_macro);
1077
1078 /* shrink mchunk to 'used' */
1079 {
1080 char *t;
1081 t = realloc(mchunk, used);
1082 if (t == NULL) {
1083 free(mchunk);
1084 return (NULL);
1085 }
1086 mchunk = t;
1087 }
1088
1089 return (mchunk);
1090 }
1091
1092 static int
build_rev1_info(Msg_g_node * p)1093 build_rev1_info(Msg_g_node *p)
1094 {
1095 uint32_t *d_hash;
1096 uint32_t num_of_d_str, num_of_str;
1097 uint32_t idx, hash_value, hash_size;
1098 size_t hash_mem_size;
1099 size_t d_msgid_size, d_msgstr_size;
1100 char *chunk, *mchunk;
1101 int i;
1102
1103 #ifdef GETTEXT_DEBUG
1104 gprintf(0, "******* entering build_rev1_info(0x%p)\n", p);
1105 printgnumsg(p, 1);
1106 #endif
1107
1108 if (p->hash_table == NULL) {
1109 /* Revision 1 always requires the hash table */
1110 return (-1);
1111 }
1112
1113 num_of_str = p->num_of_str;
1114 hash_size = p->hash_size;
1115 num_of_d_str = p->num_of_d_str;
1116
1117 hash_mem_size = hash_size * sizeof (uint32_t);
1118 ROUND(hash_mem_size, sizeof (struct gnu_msg_ent));
1119
1120 d_msgid_size = num_of_d_str * sizeof (struct gnu_msg_ent);
1121 d_msgstr_size = num_of_d_str * sizeof (struct gnu_msg_ent);
1122
1123 chunk = malloc(hash_mem_size + d_msgid_size + d_msgstr_size);
1124 if (chunk == NULL) {
1125 return (-1);
1126 }
1127
1128 d_hash = (uint32_t *)(uintptr_t)chunk;
1129 p->d_msg[MSGID] = (struct gnu_msg_ent *)(uintptr_t)
1130 (chunk + hash_mem_size);
1131 p->d_msg[MSGSTR] = (struct gnu_msg_ent *)(uintptr_t)
1132 (chunk + hash_mem_size + d_msgid_size);
1133
1134 if ((mchunk = expand_dynamic_message(p, p->d_msg)) == NULL) {
1135 free(chunk);
1136 return (-1);
1137 }
1138
1139 /* copy the original hash table into the dynamic hash table */
1140 for (i = 0; i < hash_size; i++) {
1141 d_hash[i] = SWAP(p, p->hash_table[i]);
1142 }
1143
1144 /* fill in the dynamic hash table with dynamic messages */
1145 for (i = 0; i < num_of_d_str; i++) {
1146 hash_value = get_hashid(mchunk + p->d_msg[MSGID][i].offset,
1147 NULL);
1148 idx = get_hash_index(d_hash, hash_value, hash_size);
1149 d_hash[idx] = num_of_str + i + 1;
1150 }
1151
1152 p->mchunk = mchunk;
1153 p->hash_table = d_hash;
1154
1155 #ifdef GETTEXT_DEBUG
1156 print_rev1_info(p);
1157 gprintf(0, "******* exiting build_rev1_info()\n");
1158 printgnumsg(p, 1);
1159 #endif
1160
1161 return (0);
1162 }
1163
1164 /*
1165 * gnu_setmsg
1166 *
1167 * INPUT
1168 * mnp - message node
1169 * addr - address to the mmapped file
1170 * size - size of the file
1171 *
1172 * RETURN
1173 * 0 - either T_GNU_MO or T_ILL_MO has been set
1174 * -1 - failed
1175 */
1176 int
gnu_setmsg(Msg_node * mnp,char * addr,size_t size)1177 gnu_setmsg(Msg_node *mnp, char *addr, size_t size)
1178 {
1179 struct gnu_msg_info *gnu_header;
1180 Msg_g_node *p;
1181
1182 #ifdef GETTEXT_DEBUG
1183 gprintf(0, "******** entering gnu_setmsg(0x%p, 0x%p, %lu)\n",
1184 (void *)mnp, addr, size);
1185 printmnp(mnp, 1);
1186 #endif
1187
1188 /* checks the GNU MAGIC number */
1189 if (size < sizeof (struct gnu_msg_info)) {
1190 /* invalid mo file */
1191 mnp->type = T_ILL_MO;
1192 #ifdef GETTEXT_DEBUG
1193 gprintf(0, "********* exiting gnu_setmsg\n");
1194 printmnp(mnp, 1);
1195 #endif
1196 return (0);
1197 }
1198
1199 gnu_header = (struct gnu_msg_info *)(uintptr_t)addr;
1200
1201 p = calloc(1, sizeof (Msg_g_node));
1202 if (p == NULL) {
1203 return (-1);
1204 }
1205 p->msg_file_info = gnu_header;
1206
1207 if (gnu_header->magic == GNU_MAGIC) {
1208 switch (gnu_header->revision) {
1209 case GNU_REVISION_0_1:
1210 case GNU_REVISION_1_1:
1211 p->flag |= ST_REV1;
1212 break;
1213 }
1214 } else if (gnu_header->magic == GNU_MAGIC_SWAPPED) {
1215 p->flag |= ST_SWP;
1216 switch (gnu_header->revision) {
1217 case GNU_REVISION_0_1_SWAPPED:
1218 case GNU_REVISION_1_1_SWAPPED:
1219 p->flag |= ST_REV1;
1220 break;
1221 }
1222 } else {
1223 /* invalid mo file */
1224 free(p);
1225 mnp->type = T_ILL_MO;
1226 #ifdef GETTEXT_DEBUG
1227 gprintf(0, "********* exiting gnu_setmsg\n");
1228 printmnp(mnp, 1);
1229 #endif
1230 return (0);
1231 }
1232
1233 p->fsize = size;
1234 p->num_of_str = SWAP(p, gnu_header->num_of_str);
1235 p->hash_size = SWAP(p, gnu_header->sz_hashtbl);
1236 p->hash_table = p->hash_size <= 2 ? NULL :
1237 (uint32_t *)(uintptr_t)
1238 (addr + SWAP(p, gnu_header->off_hashtbl));
1239
1240 p->msg_tbl[MSGID] = (struct gnu_msg_ent *)(uintptr_t)
1241 (addr + SWAP(p, gnu_header->off_msgid_tbl));
1242 p->msg_tbl[MSGSTR] = (struct gnu_msg_ent *)(uintptr_t)
1243 (addr + SWAP(p, gnu_header->off_msgstr_tbl));
1244
1245 if (p->flag & ST_REV1) {
1246 /* Revision 1 */
1247 struct gnu_msg_rev1_info *rev1_header;
1248
1249 rev1_header = (struct gnu_msg_rev1_info *)
1250 (uintptr_t)(addr + sizeof (struct gnu_msg_info));
1251 p->rev1_header = rev1_header;
1252 p->num_of_d_str = SWAP(p, rev1_header->num_of_dynamic_str);
1253 if (build_rev1_info(p) == -1) {
1254 free(p);
1255 #ifdef GETTEXT_DEBUG
1256 gprintf(0, "******** exiting gnu_setmsg: "
1257 "build_rev1_info() failed\n");
1258 #endif
1259 return (-1);
1260 }
1261 }
1262
1263 mnp->msg.gnumsg = p;
1264 mnp->type = T_GNU_MO;
1265
1266 #ifdef GETTEXT_DEBUG
1267 gprintf(0, "********* exiting gnu_setmsg\n");
1268 printmnp(mnp, 1);
1269 #endif
1270 return (0);
1271 }
1272
1273 /*
1274 * get_hash_index
1275 *
1276 * Returns the index to an empty slot in the hash table
1277 * for the specified hash_value.
1278 */
1279 static uint32_t
get_hash_index(uint32_t * hash_tbl,uint32_t hash_value,uint32_t hash_size)1280 get_hash_index(uint32_t *hash_tbl, uint32_t hash_value, uint32_t hash_size)
1281 {
1282 uint32_t idx, inc;
1283
1284 idx = hash_value % hash_size;
1285 inc = 1 + (hash_value % (hash_size - 2));
1286
1287 for (;;) {
1288 if (hash_tbl[idx] == 0) {
1289 /* found an empty slot */
1290 return (idx);
1291 }
1292 idx = (idx + inc) % hash_size;
1293 }
1294 /* NOTREACHED */
1295 }
1296