xref: /illumos-gate/usr/src/lib/libc/port/i18n/gettext_gnu.c (revision 7f3d7c9289dee6488b3cd2848a68c0b8580d750c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include "lint.h"
28 #include "mtlib.h"
29 #include <ctype.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <sys/types.h>
34 #include <sys/mman.h>
35 #include <sys/param.h>
36 #include <sys/stat.h>
37 #include <thread.h>
38 #include <synch.h>
39 #include <unistd.h>
40 #include <limits.h>
41 #include <errno.h>
42 #include <inttypes.h>
43 #include "libc.h"
44 #include "msgfmt.h"
45 #include "nlspath_checks.h"
46 #include "gettext.h"
47 
48 /* The following symbols are just for GNU binary compatibility */
49 int	_nl_msg_cat_cntr;
50 int	*_nl_domain_bindings;
51 
52 static const char	*nullstr = "";
53 
54 #define	CHARSET_MOD	"charset="
55 #define	CHARSET_LEN	(sizeof (CHARSET_MOD) - 1)
56 #define	NPLURALS_MOD	"nplurals="
57 #define	NPLURALS_LEN	(sizeof (NPLURALS_MOD) - 1)
58 #define	PLURAL_MOD	"plural="
59 #define	PLURAL_LEN	(sizeof (PLURAL_MOD) - 1)
60 
61 static uint32_t	get_hash_index(uint32_t *, uint32_t, uint32_t);
62 
63 /*
64  * free_conv_msgstr
65  *
66  * release the memory allocated for storing code-converted messages
67  *
68  * f
69  *	0:	do not free gmnp->conv_msgstr
70  *	1:	free gmnp->conv_msgstr
71  */
72 static void
73 free_conv_msgstr(Msg_g_node *gmnp, int f)
74 {
75 	uint32_t	i, num_of_conv;
76 
77 #ifdef GETTEXT_DEBUG
78 	gprintf(0, "*************** free_conv_msgstr(0x%p, %d)\n",
79 	    (void *)gmnp, f);
80 	printgnumsg(gmnp, 1);
81 #endif
82 
83 	num_of_conv = gmnp->num_of_str + gmnp->num_of_d_str;
84 	for (i = 0; i < num_of_conv; i++) {
85 		if (gmnp->conv_msgstr[i]) {
86 			free(gmnp->conv_msgstr[i]);
87 		}
88 		gmnp->conv_msgstr[i] = NULL;
89 	}
90 	if (f) {
91 		free(gmnp->conv_msgstr);
92 		gmnp->conv_msgstr = NULL;
93 	}
94 }
95 
96 /*
97  * dfltmsgstr
98  *
99  * choose an appropriate message by evaluating the plural expression,
100  * and return it.
101  */
102 static char *
103 dfltmsgstr(Msg_g_node *gmnp, const char *msgstr, uint32_t msgstr_len,
104     struct msg_pack *mp)
105 {
106 	unsigned int	pindex;
107 	size_t	len;
108 	const char	*p;
109 
110 #ifdef GETTEXT_DEBUG
111 	gprintf(0, "*************** dfltmsgstr(0x%p, \"%s\", %u, 0x%p)\n",
112 	    (void *)gmnp,
113 	    msgstr ? msgstr : "(null)", msgstr_len, (void *)mp);
114 	printgnumsg(gmnp, 1);
115 	printmp(mp, 1);
116 #endif
117 
118 	if (mp->plural) {
119 		if (gmnp->plural) {
120 			pindex = plural_eval(gmnp->plural, mp->n);
121 		} else {
122 			/*
123 			 * This mo does not have plural information.
124 			 * Using the English form.
125 			 */
126 			if (mp->n == 1)
127 				pindex = 0;
128 			else
129 				pindex = 1;
130 		}
131 #ifdef GETTEXT_DEBUG
132 		gprintf(0, "plural_eval returned: %u\n", pindex);
133 #endif
134 		if (pindex >= gmnp->nplurals) {
135 			/* should never happen */
136 			pindex = 0;
137 		}
138 		p = msgstr;
139 		for (; pindex != 0; pindex--) {
140 			len = msgstr_len - (p - msgstr);
141 			p = memchr(p, '\0', len);
142 			if (p == NULL) {
143 				/*
144 				 * null byte not found
145 				 * this should never happen
146 				 */
147 				char	*result;
148 				DFLTMSG(result, mp->msgid1, mp->msgid2,
149 				    mp->n, mp->plural);
150 				return (result);
151 			}
152 			p++;		/* skip */
153 		}
154 		return ((char *)p);
155 	}
156 
157 	return ((char *)msgstr);
158 }
159 
160 /*
161  * parse_header
162  *
163  * parse the header entry of the GNU MO file and
164  * extract the src encoding and the plural information of the MO file
165  */
166 static int
167 parse_header(const char *header, Msg_g_node *gmnp)
168 {
169 	char	*charset = NULL;
170 	char	*charset_str;
171 	size_t	len;
172 	char	*nplurals_str, *plural_str;
173 	plural_expr_t	plural;
174 	char	*p, *q;
175 	unsigned int	nplurals;
176 	int	ret;
177 
178 #ifdef GETTEXT_DEBUG
179 	gprintf(0, "*************** parse_header(\"%s\", 0x%p)\n",
180 	    header ? header : "(null)", (void *)gmnp);
181 	printgnumsg(gmnp, 1);
182 #endif
183 
184 	if (header == NULL) {
185 		gmnp->src_encoding = (char *)nullstr;
186 		gmnp->nplurals = 2;
187 		gmnp->plural = NULL;
188 #ifdef GETTEXT_DEBUG
189 		gprintf(0, "*************** exiting parse_header\n");
190 		gprintf(0, "no header\n");
191 #endif
192 
193 		return (0);
194 	}
195 
196 	charset_str = strstr(header, CHARSET_MOD);
197 	if (charset_str == NULL) {
198 		gmnp->src_encoding = (char *)nullstr;
199 	} else {
200 		p = charset_str + CHARSET_LEN;
201 		q = p;
202 		while ((*q != ' ') && (*q != '\t') &&
203 		    (*q != '\n')) {
204 			q++;
205 		}
206 		len = q - p;
207 		if (len > 0) {
208 			charset = malloc(len + 1);
209 			if (charset == NULL) {
210 				gmnp->src_encoding = (char *)nullstr;
211 				gmnp->nplurals = 2;
212 				gmnp->plural = NULL;
213 				return (-1);
214 			}
215 			(void) memcpy(charset, p, len);
216 			charset[len] = '\0';
217 			gmnp->src_encoding = charset;
218 		} else {
219 			gmnp->src_encoding = (char *)nullstr;
220 		}
221 	}
222 
223 	nplurals_str = strstr(header, NPLURALS_MOD);
224 	plural_str = strstr(header, PLURAL_MOD);
225 	if (nplurals_str == NULL || plural_str == NULL) {
226 		/* no valid plural specification */
227 		gmnp->nplurals = 2;
228 		gmnp->plural = NULL;
229 #ifdef GETTEXT_DEBUG
230 		gprintf(0, "*************** exiting parse_header\n");
231 		gprintf(0, "no plural entry\n");
232 #endif
233 		return (0);
234 	} else {
235 		p = nplurals_str + NPLURALS_LEN;
236 		while (*p && isspace((unsigned char)*p)) {
237 			p++;
238 		}
239 		nplurals = (unsigned int)strtol(p, &q, 10);
240 		if (p != q) {
241 			gmnp->nplurals = nplurals;
242 		} else {
243 			gmnp->nplurals = 2;
244 		}
245 
246 		p = plural_str + PLURAL_LEN;
247 #ifdef GETTEXT_DEBUG
248 		gprintf(0, "plural_str: \"%s\"\n", p);
249 #endif
250 
251 		ret = plural_expr(&plural, (const char *)p);
252 		if (ret == 0) {
253 			/* parse succeeded */
254 			gmnp->plural = plural;
255 #ifdef GETTEXT_DEBUG
256 		gprintf(0, "*************** exiting parse_header\n");
257 		gprintf(0, "charset: \"%s\"\n",
258 		    charset ? charset : "(null)");
259 		printexpr(plural, 1);
260 #endif
261 			return (0);
262 		} else if (ret == 1) {
263 			/* parse error */
264 			gmnp->nplurals = 2;
265 			gmnp->plural = NULL;
266 			return (0);
267 		} else {
268 			/* fatal error */
269 			if (charset)
270 				free(charset);
271 			gmnp->src_encoding = (char *)nullstr;
272 			gmnp->nplurals = 2;
273 			gmnp->plural = NULL;
274 			return (-1);
275 		}
276 	}
277 	/* NOTREACHED */
278 }
279 
280 /*
281  * handle_lang
282  *
283  * take care of the LANGUAGE specification
284  */
285 char *
286 handle_lang(struct msg_pack *mp)
287 {
288 	const char	*p, *op, *q;
289 	size_t	locale_len;
290 	char	*result;
291 	char	locale[MAXPATHLEN];
292 
293 
294 #ifdef GETTEXT_DEBUG
295 	gprintf(0, "*************** handle_lang(0x%p)\n", (void *)mp);
296 	printmp(mp, 1);
297 #endif
298 
299 	p = mp->language;
300 
301 	while (*p) {
302 		op = p;
303 		q = strchr(p, ':');
304 		if (q == NULL) {
305 			locale_len = strlen(p);
306 			p += locale_len;
307 		} else {
308 			locale_len = q - p;
309 			p += locale_len + 1;
310 		}
311 		if (locale_len >= MAXPATHLEN || locale_len == 0) {
312 			/* illegal locale name */
313 			continue;
314 		}
315 		(void) memcpy(locale, op, locale_len);
316 		locale[locale_len] = '\0';
317 		mp->locale = locale;
318 
319 #ifdef GETTEXT_DEBUG
320 		*mp->msgfile = '\0';
321 #endif
322 		if (mk_msgfile(mp) == NULL) {
323 			/* illegal locale name */
324 			continue;
325 		}
326 
327 		result = handle_mo(mp);
328 		if (mp->status & ST_GNU_MSG_FOUND)
329 			return (result);
330 
331 		if (mp->status & ST_SUN_MO_FOUND)
332 			break;
333 	}
334 
335 	/*
336 	 * no valid locale found, Sun MO found, or
337 	 * GNU MO found but no valid msg found there.
338 	 */
339 
340 	if (mp->status & ST_GNU_MO_FOUND) {
341 		/*
342 		 * GNU MO found but no valid msg found there.
343 		 * returning DFLTMSG.
344 		 */
345 		DFLTMSG(result, mp->msgid1, mp->msgid2, mp->n, mp->plural);
346 		return (result);
347 	}
348 	return (NULL);
349 }
350 
351 /*
352  * gnu_msgsearch
353  *
354  * Searchs the translation message for the specified msgid1.
355  * Hash algorithm used in this function is Open Addressing
356  * with Double Hashing:
357  * H(k, i) = (H1(k) + i * H2(k)) mod M
358  * H1(k) = hashvalue % M
359  * H2(k) = 1 + (hashvalue % (M - 2))
360  *
361  * Ref: The Art of Computer Programming Volume 3
362  * Sorting and Searching, second edition
363  * Donald E Knuth
364  */
365 static char *
366 gnu_msgsearch(Msg_g_node *gmnp, const char *msgid1,
367     uint32_t *msgstrlen, uint32_t *midx)
368 {
369 	struct gnu_msg_info	*header = gmnp->msg_file_info;
370 	struct gnu_msg_ent	*msgid_tbl, *msgstr_tbl;
371 	uint32_t	num_of_str, idx, mlen, msglen;
372 	uint32_t	hash_size, hash_val, hash_id, hash_inc, hash_idx;
373 	uint32_t	*hash_table;
374 	char	*base;
375 	char	*msg;
376 
377 #ifdef GETTEXT_DEBUG
378 	gprintf(0, "*************** gnu_msgsearch(0x%p, \"%s\", "
379 	    "0x%p, 0x%p)\n",
380 	    (void *)gmnp, msgid1, msgstrlen, midx);
381 	printgnumsg(gmnp, 1);
382 #endif
383 
384 	base = (char *)header;
385 
386 	msgid_tbl = gmnp->msg_tbl[MSGID];
387 	msgstr_tbl = gmnp->msg_tbl[MSGSTR];
388 	hash_table = gmnp->hash_table;
389 	hash_size = gmnp->hash_size;
390 	num_of_str = gmnp->num_of_str;
391 
392 	if (!(gmnp->flag & ST_REV1) &&
393 	    (hash_table == NULL || (hash_size <= 2))) {
394 		/*
395 		 * Revision 0 and
396 		 * No hash table exists or
397 		 * hash size is enough small.
398 		 */
399 		uint32_t	top, bottom;
400 		char	*msg_id_str;
401 		int	val;
402 
403 		top = 0;
404 		bottom = num_of_str;
405 		while (top < bottom) {
406 			idx = (top + bottom) / 2;
407 			msg_id_str = base +
408 			    SWAP(gmnp, msgid_tbl[idx].offset);
409 
410 			val = strcmp(msg_id_str, msgid1);
411 			if (val < 0) {
412 				top = idx + 1;
413 			} else if (val > 0) {
414 				bottom = idx;
415 			} else {
416 				*msgstrlen = (unsigned int)
417 				    SWAP(gmnp, msgstr_tbl[idx].len) + 1;
418 				*midx = idx;
419 				return (base +
420 				    SWAP(gmnp, msgstr_tbl[idx].offset));
421 			}
422 		}
423 		/* not found */
424 		return ((char *)msgid1);
425 	}
426 
427 	/* use hash table */
428 	hash_id = get_hashid(msgid1, &msglen);
429 	hash_idx = hash_id % hash_size;
430 	hash_inc = 1 + (hash_id % (hash_size - 2));
431 
432 	for (;;) {
433 		hash_val = HASH_TBL(gmnp, hash_table[hash_idx]);
434 
435 		if (hash_val == 0) {
436 			/* not found */
437 			return ((char *)msgid1);
438 		}
439 		if (hash_val <= num_of_str) {
440 			/* static message */
441 			idx = hash_val - 1;
442 			mlen = SWAP(gmnp, msgid_tbl[idx].len);
443 			msg = base + SWAP(gmnp, msgid_tbl[idx].offset);
444 		} else {
445 			if (!(gmnp->flag & ST_REV1)) {
446 				/* rev 0 does not have dynamic message */
447 				return ((char *)msgid1);
448 			}
449 			/* dynamic message */
450 			idx = hash_val - num_of_str - 1;
451 			mlen = gmnp->d_msg[MSGID][idx].len;
452 			msg = gmnp->mchunk + gmnp->d_msg[MSGID][idx].offset;
453 		}
454 		if (msglen <= mlen && strcmp(msgid1, msg) == 0) {
455 			/* found */
456 			break;
457 		}
458 		hash_idx = (hash_idx + hash_inc) % hash_size;
459 	}
460 
461 	/* msgstrlen should include a null termination */
462 	if (hash_val <= num_of_str) {
463 		*msgstrlen = SWAP(gmnp, msgstr_tbl[idx].len) + 1;
464 		msg = base + SWAP(gmnp, msgstr_tbl[idx].offset);
465 		*midx = idx;
466 	} else {
467 		*msgstrlen = gmnp->d_msg[MSGSTR][idx].len + 1;
468 		msg = gmnp->mchunk + gmnp->d_msg[MSGSTR][idx].offset;
469 		*midx = idx + num_of_str;
470 	}
471 
472 	return (msg);
473 }
474 
475 /*
476  * do_conv
477  *
478  * Converts the specified string from the src encoding
479  * to the dst encoding by calling iconv()
480  */
481 static uint32_t *
482 do_conv(iconv_t fd, const char *src, uint32_t srclen)
483 {
484 	uint32_t	tolen;
485 	uint32_t	*ptr, *optr;
486 	size_t	oleft, ileft, bufsize, memincr;
487 	char	*to, *tptr;
488 
489 #ifdef GETTEXT_DEBUG
490 	gprintf(0, "*************** do_conv("
491 	    "0x%p, \"%s\", %d)\n",
492 	    (void *)fd, src ? src : "(null)", srclen);
493 #endif
494 
495 	memincr = srclen * 2;
496 	bufsize = memincr;
497 	ileft = srclen;
498 	oleft = bufsize;
499 	ptr = malloc(bufsize + sizeof (uint32_t));
500 	if (ptr == NULL) {
501 		return (NULL);
502 	}
503 	to = (char *)(ptr + 1);
504 
505 	for (;;) {
506 		tptr = to;
507 		errno = 0;
508 #ifdef GETTEXT_DEBUG
509 		gprintf(0, "******* calling iconv()\n");
510 #endif
511 		if (iconv(fd, &src, &ileft, &tptr, &oleft) == (size_t)-1) {
512 			if (errno == E2BIG) {
513 #ifdef GETTEXT_DEBUG
514 				gprintf(0, "******* iconv detected E2BIG\n");
515 				gprintf(0, "old bufsize: %u\n", bufsize);
516 #endif
517 
518 				optr = realloc(ptr,
519 				    bufsize + memincr + sizeof (uint32_t));
520 				if (optr == NULL) {
521 					free(ptr);
522 					return (NULL);
523 				}
524 				ptr = optr;
525 				to = (char *)(optr + 1);
526 				to += bufsize - oleft;
527 				oleft += memincr;
528 				bufsize += memincr;
529 #ifdef GETTEXT_DEBUG
530 				gprintf(0, "new bufsize: %u\n", bufsize);
531 #endif
532 				continue;
533 			} else {
534 				tolen = (uint32_t)(bufsize - oleft);
535 				break;
536 			}
537 		}
538 		tolen = (uint32_t)(bufsize - oleft);
539 		break;
540 	}
541 
542 	if (tolen < bufsize) {
543 		/* shrink the buffer */
544 		optr = realloc(ptr, tolen + sizeof (uint32_t));
545 		if (optr == NULL) {
546 			free(ptr);
547 			return (NULL);
548 		}
549 		ptr = optr;
550 	}
551 	*ptr = tolen;
552 
553 #ifdef GETTEXT_DEBUG
554 	gprintf(0, "******* exiting do_conv()\n");
555 	gprintf(0, "tolen: %u\n", *ptr);
556 	gprintf(0, "return: 0x%p\n", ptr);
557 #endif
558 	return (ptr);
559 }
560 
561 /*
562  * conv_msg
563  */
564 static char *
565 conv_msg(Msg_g_node *gmnp, char *msgstr, uint32_t msgstr_len, uint32_t midx,
566     struct msg_pack *mp)
567 {
568 	uint32_t	*conv_dst;
569 	size_t	num_of_conv, conv_msgstr_len;
570 	char	*conv_msgstr, *result;
571 
572 	if (gmnp->conv_msgstr == NULL) {
573 		num_of_conv = gmnp->num_of_str + gmnp->num_of_d_str;
574 		gmnp->conv_msgstr =
575 		    calloc((size_t)num_of_conv, sizeof (uint32_t *));
576 		if (gmnp->conv_msgstr == NULL) {
577 			/* malloc failed */
578 			result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
579 			return (result);
580 		}
581 	}
582 
583 	conv_dst = do_conv(gmnp->fd, (const char *)msgstr, msgstr_len);
584 
585 	if (conv_dst == NULL) {
586 		result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
587 		return (result);
588 	}
589 	conv_msgstr_len = *conv_dst;
590 	gmnp->conv_msgstr[midx] = conv_dst;
591 	conv_msgstr = (char *)(conv_dst + 1);
592 	result = dfltmsgstr(gmnp, conv_msgstr, conv_msgstr_len, mp);
593 	return (result);
594 }
595 
596 /*
597  * gnu_key_2_text
598  *
599  * Extracts msgstr from the GNU MO file
600  */
601 char *
602 gnu_key_2_text(Msg_g_node *gmnp, const char *codeset,
603     struct msg_pack *mp)
604 {
605 	uint32_t	msgstr_len, midx;
606 	iconv_t	fd;
607 	char	*result, *msgstr;
608 	int	ret, conversion, new_encoding;
609 
610 #ifdef GETTEXT_DEBUG
611 	gprintf(0, "*************** gnu_key_2_text("
612 	    "0x%p, \"%s\", 0x%p)\n",
613 	    (void *)gmnp, codeset ? codeset : "(null)", (void *)mp);
614 	printgnumsg(gmnp, 1);
615 	printmp(mp, 1);
616 #endif
617 
618 	/* first checks if header entry has been processed */
619 	if (!(gmnp->flag & ST_CHK)) {
620 		char	*msg_header;
621 
622 		msg_header = gnu_msgsearch(gmnp, "", &msgstr_len, &midx);
623 		ret = parse_header((const char *)msg_header, gmnp);
624 		if (ret == -1) {
625 			/* fatal error */
626 			DFLTMSG(result, mp->msgid1, mp->msgid2,
627 			    mp->n, mp->plural);
628 			return (result);
629 		}
630 		gmnp->flag |= ST_CHK;
631 	}
632 	msgstr = gnu_msgsearch(gmnp, mp->msgid1, &msgstr_len, &midx);
633 	if (msgstr == mp->msgid1) {
634 		/* not found */
635 		DFLTMSG(result, mp->msgid1, mp->msgid2, mp->n, mp->plural);
636 		return (result);
637 	}
638 
639 #ifdef GETTEXT_DEBUG
640 	printgnumsg(gmnp, 1);
641 #endif
642 	if (gmnp->dst_encoding == NULL) {
643 		/*
644 		 * destination encoding has not been set.
645 		 */
646 		char	*dupcodeset = strdup(codeset);
647 		if (dupcodeset == NULL) {
648 			/* strdup failed */
649 			result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
650 			return (result);
651 		}
652 		gmnp->dst_encoding = dupcodeset;
653 
654 		if (strcmp(gmnp->dst_encoding, gmnp->src_encoding) == 0) {
655 			/*
656 			 * target encoding and src encoding
657 			 * are the same.
658 			 * No conversion required.
659 			 */
660 			conversion = 0;
661 		} else {
662 			/*
663 			 * target encoding is different from
664 			 * src encoding.
665 			 * New conversion required.
666 			 */
667 			/* sanity check */
668 			if (gmnp->fd && (gmnp->fd != (iconv_t)-1)) {
669 				(void) iconv_close(gmnp->fd);
670 				gmnp->fd = (iconv_t)-1;
671 			}
672 			if (gmnp->conv_msgstr)
673 				free_conv_msgstr(gmnp, 0);
674 			conversion = 1;
675 			new_encoding = 1;
676 		}
677 	} else {
678 		/*
679 		 * dst encoding has been already set.
680 		 */
681 		if (strcmp(gmnp->dst_encoding, codeset) == 0) {
682 			/*
683 			 * dst encoding and target encoding are the same.
684 			 */
685 			if (strcmp(gmnp->dst_encoding, gmnp->src_encoding)
686 			    == 0) {
687 				/*
688 				 * dst encoding and src encoding are the same.
689 				 * No conversion required.
690 				 */
691 				conversion = 0;
692 			} else {
693 				/*
694 				 * dst encoding is different from src encoding.
695 				 * current conversion is valid.
696 				 */
697 				conversion = 1;
698 				new_encoding = 0;
699 				/* checks if iconv_open has succeeded before */
700 				if (gmnp->fd == (iconv_t)-1) {
701 					/*
702 					 * iconv_open should have failed before
703 					 * Assume this conversion is invalid
704 					 */
705 					conversion = 0;
706 				} else {
707 					if (gmnp->conv_msgstr == NULL) {
708 						/*
709 						 * memory allocation for
710 						 * conv_msgstr should
711 						 * have failed before.
712 						 */
713 						new_encoding = 1;
714 						if (gmnp->fd)
715 							(void) iconv_close(
716 							    gmnp->fd);
717 						gmnp->fd = (iconv_t)-1;
718 					}
719 				}
720 			}
721 		} else {
722 			/*
723 			 * dst encoding is different from target encoding.
724 			 * It has changed since before.
725 			 */
726 			char	*dupcodeset = strdup(codeset);
727 			if (dupcodeset == NULL) {
728 				result = dfltmsgstr(gmnp, msgstr,
729 				    msgstr_len, mp);
730 				return (result);
731 			}
732 			free(gmnp->dst_encoding);
733 			gmnp->dst_encoding = dupcodeset;
734 			if (strcmp(gmnp->dst_encoding, gmnp->src_encoding)
735 			    == 0) {
736 				/*
737 				 * dst encoding and src encoding are the same.
738 				 * now, no conversion required.
739 				 */
740 				conversion = 0;
741 				if (gmnp->conv_msgstr)
742 					free_conv_msgstr(gmnp, 1);
743 			} else {
744 				/*
745 				 * dst encoding is different from src encoding.
746 				 * new conversion required.
747 				 */
748 				conversion = 1;
749 				new_encoding = 1;
750 				if (gmnp->conv_msgstr)
751 					free_conv_msgstr(gmnp, 0);
752 			}
753 
754 			if (gmnp->fd && (gmnp->fd != (iconv_t)-1)) {
755 				(void) iconv_close(gmnp->fd);
756 			}
757 			if (gmnp->fd != (iconv_t)-1) {
758 				gmnp->fd = (iconv_t)-1;
759 			}
760 		}
761 	}
762 
763 	if (conversion == 0) {
764 		/* no conversion */
765 		result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
766 		return (result);
767 	}
768 	/* conversion required */
769 
770 	if (new_encoding == 0) {
771 		/* dst codeset hasn't been changed since before */
772 		uint32_t	*cmsg;
773 		uint32_t	conv_msgstr_len;
774 		char	*conv_msgstr;
775 
776 		if (gmnp->conv_msgstr[midx] == NULL) {
777 			/* this msgstr hasn't been converted yet */
778 			result = conv_msg(gmnp, msgstr, msgstr_len, midx, mp);
779 			return (result);
780 		}
781 		/* this msgstr is in the conversion cache */
782 		cmsg = (uint32_t *)(uintptr_t)gmnp->conv_msgstr[midx];
783 		conv_msgstr_len = *cmsg;
784 		conv_msgstr = (char *)(cmsg + 1);
785 		result = dfltmsgstr(gmnp, conv_msgstr, conv_msgstr_len, mp);
786 		return (result);
787 	}
788 	/* new conversion */
789 #ifdef GETTEXT_DEBUG
790 	gprintf(0, "******* calling iconv_open()\n");
791 	gprintf(0, "      dst: \"%s\", src: \"%s\"\n",
792 	    gmnp->dst_encoding, gmnp->src_encoding);
793 #endif
794 	fd = iconv_open(gmnp->dst_encoding, gmnp->src_encoding);
795 	gmnp->fd = fd;
796 	if (fd == (iconv_t)-1) {
797 		/*
798 		 * iconv_open() failed.
799 		 * no conversion
800 		 */
801 		result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
802 		return (result);
803 	}
804 	result = conv_msg(gmnp, msgstr, msgstr_len, midx, mp);
805 	return (result);
806 }
807 
808 
809 #define	PRI_STR(x, n)	PRI##x##n
810 #define	PRI_LEN(x, n)	(char)(sizeof (PRI_STR(x, n)) - 1)
811 #define	PRIS(P, x)	{\
812 /* x/N/ */	P(x, 8), P(x, 16), P(x, 32), P(x, 64), \
813 /* xLEAST/N/ */	P(x, LEAST8), P(x, LEAST16), P(x, LEAST32), P(x, LEAST64), \
814 /* xFAST/N/ */	P(x, FAST8), P(x, FAST16), P(x, FAST32), P(x, FAST64), \
815 /* xMAX,PTR */	P(x, MAX), P(x, PTR) \
816 }
817 
818 #define	PRI_BIAS_LEAST	4
819 #define	PRI_BIAS_FAST	8
820 #define	PRI_BIAS_MAX	12
821 #define	PRI_BIAS_PTR	13
822 
823 static const char	*pri_d[] = PRIS(PRI_STR, d);
824 static const char	*pri_i[] = PRIS(PRI_STR, i);
825 static const char	*pri_o[] = PRIS(PRI_STR, o);
826 static const char	*pri_u[] = PRIS(PRI_STR, u);
827 static const char	*pri_x[] = PRIS(PRI_STR, x);
828 static const char	*pri_X[] = PRIS(PRI_STR, X);
829 
830 static const char	pri_d_len[] = PRIS(PRI_LEN, d);
831 static const char	pri_i_len[] = PRIS(PRI_LEN, i);
832 static const char	pri_o_len[] = PRIS(PRI_LEN, o);
833 static const char	pri_u_len[] = PRIS(PRI_LEN, u);
834 static const char	pri_x_len[] = PRIS(PRI_LEN, x);
835 static const char	pri_X_len[] = PRIS(PRI_LEN, X);
836 
837 static struct {
838 	const char	type;
839 	const char	**str_table;
840 	const char	*len_table;
841 } pri_table[] = {
842 	{'d', pri_d, pri_d_len}, {'i', pri_i, pri_i_len},
843 	{'o', pri_o, pri_o_len}, {'u', pri_u, pri_u_len},
844 	{'x', pri_x, pri_x_len}, {'X', pri_X, pri_X_len},
845 };
846 
847 static struct {
848 	const char	*name;
849 	const char	nlen;
850 	const char	want_digits;
851 	const char	bias;
852 } special_table[] = {
853 	{"LEAST",	5, 1, PRI_BIAS_LEAST},
854 	{"FAST",	4, 1, PRI_BIAS_FAST},
855 	{"MAX",		3, 0, PRI_BIAS_MAX},
856 	{"PTR",		3, 0, PRI_BIAS_PTR},
857 };
858 
859 /*
860  * conv_macro() returns the conversion specifier corresponding
861  * to the macro name specified in 'name'.  'len' contains the
862  * length of the macro name including the null termination.
863  * '*elen' will be set to the length of the returning conversion
864  * specifier without the null termination.
865  */
866 static const char *
867 conv_macro(const char *str, uint32_t len, uint32_t *lenp)
868 {
869 	const char	**tbl;
870 	const char	*ltbl;
871 	char	*next;
872 	int	n, i, num, bias, idx, want_digits;
873 
874 	if (len == 2) {
875 		if (*str == 'I') {
876 			/* Solaris does not support %I */
877 			*lenp = 0;
878 			return ("");
879 		}
880 		return (NULL);
881 	}
882 
883 	if (len <= 4 || strncmp(str, "PRI", 3) != 0)
884 		return (NULL);
885 
886 	str += 3;
887 
888 	n = sizeof (pri_table) / sizeof (pri_table[0]);
889 	for (i = 0; i < n; i++) {
890 		if (pri_table[i].type == *str)
891 			break;
892 	}
893 	if (i == n)
894 		return (NULL);
895 	tbl = pri_table[i].str_table;
896 	ltbl = pri_table[i].len_table;
897 
898 	str++;
899 	idx = want_digits = 0;
900 
901 	if (isdigit((unsigned char)*str)) {
902 		/* PRIx/N/ */
903 		bias = 0;
904 		want_digits = 1;
905 	} else {
906 		n = sizeof (special_table) / sizeof (special_table[0]);
907 		for (i = 0; i < n; i++) {
908 			if (strncmp(special_table[i].name,
909 			    str, special_table[i].nlen) == 0) {
910 				break;
911 			}
912 		}
913 		if (i == n)
914 			return (NULL);
915 		bias = special_table[i].bias;
916 		want_digits = special_table[i].want_digits;
917 		str += special_table[i].nlen;
918 	}
919 
920 	if (want_digits) {
921 		if (!isdigit((unsigned char)*str))
922 			return (NULL);
923 		num = strtol(str, &next, 10);
924 		/* see if it is 8/16/32/64 */
925 		for (n = 8, idx = 0; idx < 4; idx++, n *= 2) {
926 			if (n == num)
927 				break;
928 		}
929 		if (idx == 4)
930 			return (NULL);
931 		str = next;
932 	}
933 	if (*str != '\0') {
934 		/* unknow format */
935 		return (NULL);
936 	}
937 
938 	*lenp = (uint32_t)ltbl[bias + idx];
939 	return (tbl[bias + idx]);
940 }
941 
942 static gnu_d_macro_t *
943 expand_macros(Msg_g_node *p)
944 {
945 	char	*base = (char *)p->msg_file_info;
946 	struct gnu_msg_rev1_info	*rev1_header = p->rev1_header;
947 	struct gnu_msg_ent	*d_macro_tbl;
948 	gnu_d_macro_t	*d_macro;
949 	uint32_t	num_of_d_macro, e_maclen, maclen, i;
950 	const char	*e_macname;
951 	char	*macname;
952 
953 	/* number of the dynamic macros */
954 	num_of_d_macro = SWAP(p, rev1_header->num_of_dynamic_macro);
955 
956 	d_macro = malloc((size_t)num_of_d_macro * sizeof (gnu_d_macro_t));
957 	if (d_macro == NULL)
958 		return (NULL);
959 
960 	/* pointer to the dynamic strings table */
961 	d_macro_tbl = (struct gnu_msg_ent *)(uintptr_t)
962 	    (base + SWAP(p, rev1_header->off_dynamic_macro));
963 
964 	for (i = 0; i < num_of_d_macro; i++) {
965 		macname = base + SWAP(p, d_macro_tbl[i].offset);
966 		maclen = SWAP(p, d_macro_tbl[i].len);
967 
968 		/*
969 		 * sanity check
970 		 * maclen includes a null termination.
971 		 */
972 		if (maclen != strlen(macname) + 1) {
973 			free(d_macro);
974 			return (NULL);
975 		}
976 		e_macname = conv_macro(macname, maclen, &e_maclen);
977 		if (e_macname == NULL) {
978 			free(d_macro);
979 			return (NULL);
980 		}
981 		d_macro[i].len = e_maclen;
982 		d_macro[i].ptr = e_macname;
983 	}
984 
985 	return (d_macro);
986 }
987 
988 static char *
989 expand_dynamic_message(Msg_g_node *p, struct gnu_msg_ent **e_msgs)
990 {
991 
992 	char	*base = (char *)p->msg_file_info;
993 	struct gnu_msg_rev1_info	*rev1_header = p->rev1_header;
994 	struct gnu_dynamic_tbl	*d_info;
995 	struct gnu_dynamic_ent	*entry;
996 	gnu_d_macro_t	*d_macro;
997 	uint32_t	num_of_d_str, mlen, dlen, didx, i, j;
998 	uint32_t	off_d_tbl;
999 	uint32_t	*d_msg_off_tbl;
1000 	size_t	mchunk_size, used, need;
1001 	char	*mchunk, *msg;
1002 
1003 #define	MEM_INCR	(1024)
1004 
1005 	d_macro = expand_macros(p);
1006 	if (d_macro == NULL)
1007 		return (NULL);
1008 
1009 	/* number of dynamic messages */
1010 	num_of_d_str = p->num_of_d_str;
1011 
1012 	mchunk = NULL;
1013 	mchunk_size = 0;	/* size of the allocated memory in mchunk */
1014 	used = 0;		/* size of the used memory in mchunk */
1015 	for (i = MSGID; i <= MSGSTR; i++) {
1016 		/* pointer to the offset table of dynamic msgids/msgstrs */
1017 		off_d_tbl = SWAP(p,
1018 		    i == MSGID ? rev1_header->off_dynamic_msgid_tbl :
1019 		    rev1_header->off_dynamic_msgstr_tbl);
1020 		/* pointer to the dynamic msgids/msgstrs */
1021 		d_msg_off_tbl = (uint32_t *)(uintptr_t)(base + off_d_tbl);
1022 		for (j = 0; j < num_of_d_str; j++) {
1023 			e_msgs[i][j].offset = used;
1024 			d_info = (struct gnu_dynamic_tbl *)(uintptr_t)
1025 			    (base + SWAP(p, d_msg_off_tbl[j]));
1026 			entry = d_info->entry;
1027 			msg = base + SWAP(p, d_info->offset);
1028 
1029 			for (;;) {
1030 				mlen = SWAP(p, entry->len);
1031 				didx = SWAP(p, entry->idx);
1032 				dlen = (didx == NOMORE_DYNAMIC_MACRO) ? 0 :
1033 				    d_macro[didx].len;
1034 				need = used + mlen + dlen;
1035 				if (need >= mchunk_size) {
1036 					char	*t;
1037 					size_t	n = mchunk_size;
1038 					do {
1039 						n += MEM_INCR;
1040 					} while (n <= need);
1041 					t = realloc(mchunk, n);
1042 					if (t == NULL) {
1043 						free(d_macro);
1044 						free(mchunk);
1045 						return (NULL);
1046 					}
1047 					mchunk = t;
1048 					mchunk_size = n;
1049 				}
1050 				(void) memcpy(mchunk + used, msg, (size_t)mlen);
1051 				msg += mlen;
1052 				used += mlen;
1053 
1054 				if (didx == NOMORE_DYNAMIC_MACRO) {
1055 					/*
1056 					 * Last segment of a static
1057 					 * msg string contains a null
1058 					 * termination, so an explicit
1059 					 * null termination is not required
1060 					 * here.
1061 					 */
1062 					break;
1063 				}
1064 				(void) memcpy(mchunk + used,
1065 				    d_macro[didx].ptr, (size_t)dlen);
1066 				used += dlen;
1067 				entry++; /* to next entry */
1068 			}
1069 			/*
1070 			 * e_msgs[][].len does not include a null termination
1071 			 */
1072 			e_msgs[i][j].len = used - e_msgs[i][j].offset - 1;
1073 		}
1074 	}
1075 
1076 	free(d_macro);
1077 
1078 	/* shrink mchunk to 'used' */
1079 	{
1080 		char	*t;
1081 		t = realloc(mchunk, used);
1082 		if (t == NULL) {
1083 			free(mchunk);
1084 			return (NULL);
1085 		}
1086 		mchunk = t;
1087 	}
1088 
1089 	return (mchunk);
1090 }
1091 
1092 static int
1093 build_rev1_info(Msg_g_node *p)
1094 {
1095 	uint32_t	*d_hash;
1096 	uint32_t	num_of_d_str, num_of_str;
1097 	uint32_t	idx, hash_value, hash_size;
1098 	size_t	hash_mem_size;
1099 	size_t	d_msgid_size, d_msgstr_size;
1100 	char	*chunk, *mchunk;
1101 	int	i;
1102 
1103 #ifdef GETTEXT_DEBUG
1104 	gprintf(0, "******* entering build_rev1_info(0x%p)\n", p);
1105 	printgnumsg(p, 1);
1106 #endif
1107 
1108 	if (p->hash_table == NULL) {
1109 		/* Revision 1 always requires the hash table */
1110 		return (-1);
1111 	}
1112 
1113 	num_of_str = p->num_of_str;
1114 	hash_size = p->hash_size;
1115 	num_of_d_str = p->num_of_d_str;
1116 
1117 	hash_mem_size = hash_size * sizeof (uint32_t);
1118 	ROUND(hash_mem_size, sizeof (struct gnu_msg_ent));
1119 
1120 	d_msgid_size = num_of_d_str * sizeof (struct gnu_msg_ent);
1121 	d_msgstr_size = num_of_d_str * sizeof (struct gnu_msg_ent);
1122 
1123 	chunk = malloc(hash_mem_size + d_msgid_size + d_msgstr_size);
1124 	if (chunk == NULL) {
1125 		return (-1);
1126 	}
1127 
1128 	d_hash = (uint32_t *)(uintptr_t)chunk;
1129 	p->d_msg[MSGID] = (struct gnu_msg_ent *)(uintptr_t)
1130 	    (chunk + hash_mem_size);
1131 	p->d_msg[MSGSTR] = (struct gnu_msg_ent *)(uintptr_t)
1132 	    (chunk + hash_mem_size + d_msgid_size);
1133 
1134 	if ((mchunk = expand_dynamic_message(p, p->d_msg)) == NULL) {
1135 		free(chunk);
1136 		return (-1);
1137 	}
1138 
1139 	/* copy the original hash table into the dynamic hash table */
1140 	for (i = 0; i < hash_size; i++) {
1141 		d_hash[i] = SWAP(p, p->hash_table[i]);
1142 	}
1143 
1144 	/* fill in the dynamic hash table with dynamic messages */
1145 	for (i = 0; i < num_of_d_str; i++) {
1146 		hash_value = get_hashid(mchunk + p->d_msg[MSGID][i].offset,
1147 		    NULL);
1148 		idx = get_hash_index(d_hash, hash_value, hash_size);
1149 		d_hash[idx] = num_of_str + i + 1;
1150 	}
1151 
1152 	p->mchunk = mchunk;
1153 	p->hash_table = d_hash;
1154 
1155 #ifdef	GETTEXT_DEBUG
1156 	print_rev1_info(p);
1157 	gprintf(0, "******* exiting build_rev1_info()\n");
1158 	printgnumsg(p, 1);
1159 #endif
1160 
1161 	return (0);
1162 }
1163 
1164 /*
1165  * gnu_setmsg
1166  *
1167  * INPUT
1168  *   mnp  - message node
1169  *   addr - address to the mmapped file
1170  *   size - size of the file
1171  *
1172  * RETURN
1173  *   0   - either T_GNU_MO or T_ILL_MO has been set
1174  *  -1   - failed
1175  */
1176 int
1177 gnu_setmsg(Msg_node *mnp, char *addr, size_t size)
1178 {
1179 	struct gnu_msg_info	*gnu_header;
1180 	Msg_g_node	*p;
1181 
1182 #ifdef GETTEXT_DEBUG
1183 	gprintf(0, "******** entering gnu_setmsg(0x%p, 0x%p, %lu)\n",
1184 	    (void *)mnp, addr, size);
1185 	printmnp(mnp, 1);
1186 #endif
1187 
1188 	/* checks the GNU MAGIC number */
1189 	if (size < sizeof (struct gnu_msg_info)) {
1190 		/* invalid mo file */
1191 		mnp->type = T_ILL_MO;
1192 #ifdef	GETTEXT_DEBUG
1193 		gprintf(0, "********* exiting gnu_setmsg\n");
1194 		printmnp(mnp, 1);
1195 #endif
1196 		return (0);
1197 	}
1198 
1199 	gnu_header = (struct gnu_msg_info *)(uintptr_t)addr;
1200 
1201 	p = calloc(1, sizeof (Msg_g_node));
1202 	if (p == NULL) {
1203 		return (-1);
1204 	}
1205 	p->msg_file_info = gnu_header;
1206 
1207 	if (gnu_header->magic == GNU_MAGIC) {
1208 		switch (gnu_header->revision) {
1209 		case GNU_REVISION_0_1:
1210 		case GNU_REVISION_1_1:
1211 			p->flag |= ST_REV1;
1212 			break;
1213 		}
1214 	} else if (gnu_header->magic == GNU_MAGIC_SWAPPED) {
1215 		p->flag |= ST_SWP;
1216 		switch (gnu_header->revision) {
1217 		case GNU_REVISION_0_1_SWAPPED:
1218 		case GNU_REVISION_1_1_SWAPPED:
1219 			p->flag |= ST_REV1;
1220 			break;
1221 		}
1222 	} else {
1223 		/* invalid mo file */
1224 		free(p);
1225 		mnp->type = T_ILL_MO;
1226 #ifdef	GETTEXT_DEBUG
1227 		gprintf(0, "********* exiting gnu_setmsg\n");
1228 		printmnp(mnp, 1);
1229 #endif
1230 		return (0);
1231 	}
1232 
1233 	p->fsize = size;
1234 	p->num_of_str = SWAP(p, gnu_header->num_of_str);
1235 	p->hash_size = SWAP(p, gnu_header->sz_hashtbl);
1236 	p->hash_table = p->hash_size <= 2 ? NULL :
1237 	    (uint32_t *)(uintptr_t)
1238 	    (addr + SWAP(p, gnu_header->off_hashtbl));
1239 
1240 	p->msg_tbl[MSGID] = (struct gnu_msg_ent *)(uintptr_t)
1241 	    (addr + SWAP(p, gnu_header->off_msgid_tbl));
1242 	p->msg_tbl[MSGSTR] = (struct gnu_msg_ent *)(uintptr_t)
1243 	    (addr + SWAP(p, gnu_header->off_msgstr_tbl));
1244 
1245 	if (p->flag & ST_REV1) {
1246 		/* Revision 1 */
1247 		struct gnu_msg_rev1_info	*rev1_header;
1248 
1249 		rev1_header = (struct gnu_msg_rev1_info *)
1250 		    (uintptr_t)(addr + sizeof (struct gnu_msg_info));
1251 		p->rev1_header = rev1_header;
1252 		p->num_of_d_str = SWAP(p, rev1_header->num_of_dynamic_str);
1253 		if (build_rev1_info(p) == -1) {
1254 			free(p);
1255 #ifdef GETTEXT_DEBUG
1256 			gprintf(0, "******** exiting gnu_setmsg: "
1257 			    "build_rev1_info() failed\n");
1258 #endif
1259 			return (-1);
1260 		}
1261 	}
1262 
1263 	mnp->msg.gnumsg = p;
1264 	mnp->type = T_GNU_MO;
1265 
1266 #ifdef GETTEXT_DEBUG
1267 	gprintf(0, "********* exiting gnu_setmsg\n");
1268 	printmnp(mnp, 1);
1269 #endif
1270 	return (0);
1271 }
1272 
1273 /*
1274  * get_hash_index
1275  *
1276  * Returns the index to an empty slot in the hash table
1277  * for the specified hash_value.
1278  */
1279 static uint32_t
1280 get_hash_index(uint32_t *hash_tbl, uint32_t hash_value, uint32_t hash_size)
1281 {
1282 	uint32_t	idx, inc;
1283 
1284 	idx = hash_value % hash_size;
1285 	inc = 1 + (hash_value % (hash_size - 2));
1286 
1287 	for (;;) {
1288 		if (hash_tbl[idx] == 0) {
1289 			/* found an empty slot */
1290 			return (idx);
1291 		}
1292 		idx = (idx + inc) % hash_size;
1293 	}
1294 	/* NOTREACHED */
1295 }
1296