xref: /illumos-gate/usr/src/lib/libc/port/i18n/gettext_gnu.c (revision 07a48826732249fcd3aa8dd53c8389595e9f1fbc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include "lint.h"
30 #include "mtlib.h"
31 #include <ctype.h>
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <sys/types.h>
36 #include <sys/mman.h>
37 #include <sys/param.h>
38 #include <sys/stat.h>
39 #include <thread.h>
40 #include <synch.h>
41 #include <unistd.h>
42 #include <limits.h>
43 #include <errno.h>
44 #include <inttypes.h>
45 #include "libc.h"
46 #include "msgfmt.h"
47 #include "nlspath_checks.h"
48 #include "gettext.h"
49 
50 #ifdef DEBUG
51 #include <assert.h>
52 #endif
53 
54 /* The following symbols are just for GNU binary compatibility */
55 int	_nl_msg_cat_cntr;
56 int	*_nl_domain_bindings;
57 
58 static const char	*nullstr = "";
59 
60 #define	CHARSET_MOD	"charset="
61 #define	CHARSET_LEN	(sizeof (CHARSET_MOD) - 1)
62 #define	NPLURALS_MOD	"nplurals="
63 #define	NPLURALS_LEN	(sizeof (NPLURALS_MOD) - 1)
64 #define	PLURAL_MOD	"plural="
65 #define	PLURAL_LEN	(sizeof (PLURAL_MOD) - 1)
66 
67 static uint32_t	get_hash_index(uint32_t *, uint32_t, uint32_t);
68 
69 /*
70  * free_conv_msgstr
71  *
72  * release the memory allocated for storing code-converted messages
73  *
74  * f
75  *	0:	do not free gmnp->conv_msgstr
76  *	1:	free gmnp->conv_msgstr
77  */
78 static void
79 free_conv_msgstr(Msg_g_node *gmnp, int f)
80 {
81 	uint32_t	i, num_of_conv;
82 
83 #ifdef GETTEXT_DEBUG
84 	gprintf(0, "*************** free_conv_msgstr(0x%p, %d)\n",
85 	    (void *)gmnp, f);
86 	printgnumsg(gmnp, 1);
87 #endif
88 
89 	num_of_conv = gmnp->num_of_str + gmnp->num_of_d_str;
90 	for (i = 0; i < num_of_conv; i++) {
91 		if (gmnp->conv_msgstr[i]) {
92 			free(gmnp->conv_msgstr[i]);
93 		}
94 		gmnp->conv_msgstr[i] = NULL;
95 	}
96 	if (f) {
97 		free(gmnp->conv_msgstr);
98 		gmnp->conv_msgstr = NULL;
99 	}
100 }
101 
102 /*
103  * dfltmsgstr
104  *
105  * choose an appropriate message by evaluating the plural expression,
106  * and return it.
107  */
108 static char *
109 dfltmsgstr(Msg_g_node *gmnp, const char *msgstr, uint32_t msgstr_len,
110     struct msg_pack *mp)
111 {
112 	unsigned int	pindex;
113 	size_t	len;
114 	const char	*p;
115 
116 #ifdef GETTEXT_DEBUG
117 	gprintf(0, "*************** dfltmsgstr(0x%p, \"%s\", %u, 0x%p)\n",
118 	    (void *)gmnp,
119 	    msgstr ? msgstr : "(null)", msgstr_len, (void *)mp);
120 	printgnumsg(gmnp, 1);
121 	printmp(mp, 1);
122 #endif
123 
124 	if (mp->plural) {
125 		if (gmnp->plural) {
126 			pindex = plural_eval(gmnp->plural, mp->n);
127 		} else {
128 			/*
129 			 * This mo does not have plural information.
130 			 * Using the English form.
131 			 */
132 			if (mp->n == 1)
133 				pindex = 0;
134 			else
135 				pindex = 1;
136 		}
137 #ifdef GETTEXT_DEBUG
138 		gprintf(0, "plural_eval returned: %u\n", pindex);
139 #endif
140 		if (pindex >= gmnp->nplurals) {
141 			/* should never happen */
142 			pindex = 0;
143 		}
144 		p = msgstr;
145 		for (; pindex != 0; pindex--) {
146 			len = msgstr_len - (p - msgstr);
147 			p = memchr(p, '\0', len);
148 			if (p == NULL) {
149 				/*
150 				 * null byte not found
151 				 * this should never happen
152 				 */
153 				char	*result;
154 				DFLTMSG(result, mp->msgid1, mp->msgid2,
155 				    mp->n, mp->plural);
156 				return (result);
157 			}
158 			p++;		/* skip */
159 		}
160 		return ((char *)p);
161 	}
162 
163 	return ((char *)msgstr);
164 }
165 
166 /*
167  * parse_header
168  *
169  * parse the header entry of the GNU MO file and
170  * extract the src encoding and the plural information of the MO file
171  */
172 static int
173 parse_header(const char *header, Msg_g_node *gmnp)
174 {
175 	char	*charset = NULL;
176 	char	*charset_str;
177 	size_t	len;
178 	char	*nplurals_str, *plural_str;
179 	plural_expr_t	plural;
180 	char	*p, *q;
181 	unsigned int	nplurals;
182 	int	ret;
183 
184 #ifdef GETTEXT_DEBUG
185 	gprintf(0, "*************** parse_header(\"%s\", 0x%p)\n",
186 	    header ? header : "(null)", (void *)gmnp);
187 	printgnumsg(gmnp, 1);
188 #endif
189 
190 	if (header == NULL) {
191 		gmnp->src_encoding = (char *)nullstr;
192 		gmnp->nplurals = 2;
193 		gmnp->plural = NULL;
194 #ifdef GETTEXT_DEBUG
195 		gprintf(0, "*************** exiting parse_header\n");
196 		gprintf(0, "no header\n");
197 #endif
198 
199 		return (0);
200 	}
201 
202 	charset_str = strstr(header, CHARSET_MOD);
203 	if (charset_str == NULL) {
204 		gmnp->src_encoding = (char *)nullstr;
205 	} else {
206 		p = charset_str + CHARSET_LEN;
207 		q = p;
208 		while ((*q != ' ') && (*q != '\t') &&
209 		    (*q != '\n')) {
210 			q++;
211 		}
212 		len = q - p;
213 		if (len > 0) {
214 			charset = malloc(len + 1);
215 			if (charset == NULL) {
216 				gmnp->src_encoding = (char *)nullstr;
217 				gmnp->nplurals = 2;
218 				gmnp->plural = NULL;
219 				return (-1);
220 			}
221 			(void) memcpy(charset, p, len);
222 			charset[len] = '\0';
223 			gmnp->src_encoding = charset;
224 		} else {
225 			gmnp->src_encoding = (char *)nullstr;
226 		}
227 	}
228 
229 	nplurals_str = strstr(header, NPLURALS_MOD);
230 	plural_str = strstr(header, PLURAL_MOD);
231 	if (nplurals_str == NULL || plural_str == NULL) {
232 		/* no valid plural specification */
233 		gmnp->nplurals = 2;
234 		gmnp->plural = NULL;
235 #ifdef GETTEXT_DEBUG
236 		gprintf(0, "*************** exiting parse_header\n");
237 		gprintf(0, "no plural entry\n");
238 #endif
239 		return (0);
240 	} else {
241 		p = nplurals_str + NPLURALS_LEN;
242 		while (*p && isspace((unsigned char)*p)) {
243 			p++;
244 		}
245 		nplurals = (unsigned int)strtol(p, &q, 10);
246 		if (p != q) {
247 			gmnp->nplurals = nplurals;
248 		} else {
249 			gmnp->nplurals = 2;
250 		}
251 
252 		p = plural_str + PLURAL_LEN;
253 #ifdef GETTEXT_DEBUG
254 		gprintf(0, "plural_str: \"%s\"\n", p);
255 #endif
256 
257 		ret = plural_expr(&plural, (const char *)p);
258 		if (ret == 0) {
259 			/* parse succeeded */
260 			gmnp->plural = plural;
261 #ifdef GETTEXT_DEBUG
262 		gprintf(0, "*************** exiting parse_header\n");
263 		gprintf(0, "charset: \"%s\"\n",
264 		    charset ? charset : "(null)");
265 		printexpr(plural, 1);
266 #endif
267 			return (0);
268 		} else if (ret == 1) {
269 			/* parse error */
270 			gmnp->nplurals = 2;
271 			gmnp->plural = NULL;
272 			return (0);
273 		} else {
274 			/* fatal error */
275 			if (charset)
276 				free(charset);
277 			gmnp->src_encoding = (char *)nullstr;
278 			gmnp->nplurals = 2;
279 			gmnp->plural = NULL;
280 			return (-1);
281 		}
282 	}
283 	/* NOTREACHED */
284 }
285 
286 /*
287  * handle_lang
288  *
289  * take care of the LANGUAGE specification
290  */
291 char *
292 handle_lang(struct msg_pack *mp)
293 {
294 	const char	*p, *op, *q;
295 	size_t	locale_len;
296 	char	*result;
297 	char	locale[MAXPATHLEN];
298 
299 
300 #ifdef GETTEXT_DEBUG
301 	gprintf(0, "*************** handle_lang(0x%p)\n", (void *)mp);
302 	printmp(mp, 1);
303 #endif
304 
305 	p = mp->language;
306 
307 	while (*p) {
308 		op = p;
309 		q = strchr(p, ':');
310 		if (q == NULL) {
311 			locale_len = strlen(p);
312 			p += locale_len;
313 		} else {
314 			locale_len = q - p;
315 			p += locale_len + 1;
316 		}
317 		if (locale_len >= MAXPATHLEN || locale_len == 0) {
318 			/* illegal locale name */
319 			continue;
320 		}
321 		(void) memcpy(locale, op, locale_len);
322 		locale[locale_len] = '\0';
323 		mp->locale = locale;
324 
325 #ifdef GETTEXT_DEBUG
326 		*mp->msgfile = '\0';
327 #endif
328 		if (mk_msgfile(mp) == NULL) {
329 			/* illegal locale name */
330 			continue;
331 		}
332 
333 		result = handle_mo(mp);
334 		if (mp->status & ST_GNU_MSG_FOUND)
335 			return (result);
336 
337 		if (mp->status & ST_SUN_MO_FOUND)
338 			break;
339 	}
340 
341 	/*
342 	 * no valid locale found, Sun MO found, or
343 	 * GNU MO found but no valid msg found there.
344 	 */
345 
346 	if (mp->status & ST_GNU_MO_FOUND) {
347 		/*
348 		 * GNU MO found but no valid msg found there.
349 		 * returning DFLTMSG.
350 		 */
351 		DFLTMSG(result, mp->msgid1, mp->msgid2, mp->n, mp->plural);
352 		return (result);
353 	}
354 	return (NULL);
355 }
356 
357 /*
358  * gnu_msgsearch
359  *
360  * Searchs the translation message for the specified msgid1.
361  * Hash algorithm used in this function is Open Addressing
362  * with Double Hashing:
363  * H(k, i) = (H1(k) + i * H2(k)) mod M
364  * H1(k) = hashvalue % M
365  * H2(k) = 1 + (hashvalue % (M - 2))
366  *
367  * Ref: The Art of Computer Programming Volume 3
368  * Sorting and Searching, second edition
369  * Donald E Knuth
370  */
371 static char *
372 gnu_msgsearch(Msg_g_node *gmnp, const char *msgid1,
373     uint32_t *msgstrlen, uint32_t *midx)
374 {
375 	struct gnu_msg_info	*header = gmnp->msg_file_info;
376 	struct gnu_msg_ent	*msgid_tbl, *msgstr_tbl;
377 	uint32_t	num_of_str, idx, mlen, msglen;
378 	uint32_t	hash_size, hash_val, hash_id, hash_inc, hash_idx;
379 	uint32_t	*hash_table;
380 	char	*base;
381 	char	*msg;
382 
383 #ifdef GETTEXT_DEBUG
384 	gprintf(0, "*************** gnu_msgsearch(0x%p, \"%s\", "
385 	    "0x%p, 0x%p)\n",
386 	    (void *)gmnp, msgid1, msgstrlen, midx);
387 	printgnumsg(gmnp, 1);
388 #endif
389 
390 	base = (char *)header;
391 
392 	msgid_tbl = gmnp->msg_tbl[MSGID];
393 	msgstr_tbl = gmnp->msg_tbl[MSGSTR];
394 	hash_table = gmnp->hash_table;
395 	hash_size = gmnp->hash_size;
396 	num_of_str = gmnp->num_of_str;
397 
398 	if (!(gmnp->flag & ST_REV1) &&
399 	    (hash_table == NULL || (hash_size <= 2))) {
400 		/*
401 		 * Revision 0 and
402 		 * No hash table exists or
403 		 * hash size is enough small.
404 		 */
405 		uint32_t	top, bottom;
406 		char	*msg_id_str;
407 		int	val;
408 
409 		top = 0;
410 		bottom = num_of_str;
411 		while (top < bottom) {
412 			idx = (top + bottom) / 2;
413 			msg_id_str = base +
414 			    SWAP(gmnp, msgid_tbl[idx].offset);
415 
416 			val = strcmp(msg_id_str, msgid1);
417 			if (val < 0) {
418 				top = idx + 1;
419 			} else if (val > 0) {
420 				bottom = idx;
421 			} else {
422 				*msgstrlen = (unsigned int)
423 				    SWAP(gmnp, msgstr_tbl[idx].len) + 1;
424 				*midx = idx;
425 				return (base +
426 				    SWAP(gmnp, msgstr_tbl[idx].offset));
427 			}
428 		}
429 		/* not found */
430 		return ((char *)msgid1);
431 	}
432 
433 	/* use hash table */
434 	hash_id = get_hashid(msgid1, &msglen);
435 	hash_idx = hash_id % hash_size;
436 	hash_inc = 1 + (hash_id % (hash_size - 2));
437 
438 	for (;;) {
439 		hash_val = HASH_TBL(gmnp, hash_table[hash_idx]);
440 
441 		if (hash_val == 0) {
442 			/* not found */
443 			return ((char *)msgid1);
444 		}
445 		if (hash_val <= num_of_str) {
446 			/* static message */
447 			idx = hash_val - 1;
448 			mlen = SWAP(gmnp, msgid_tbl[idx].len);
449 			msg = base + SWAP(gmnp, msgid_tbl[idx].offset);
450 		} else {
451 			if (!(gmnp->flag & ST_REV1)) {
452 				/* rev 0 does not have dynamic message */
453 				return ((char *)msgid1);
454 			}
455 			/* dynamic message */
456 			idx = hash_val - num_of_str - 1;
457 			mlen = gmnp->d_msg[MSGID][idx].len;
458 			msg = gmnp->mchunk + gmnp->d_msg[MSGID][idx].offset;
459 		}
460 		if (msglen <= mlen && strcmp(msgid1, msg) == 0) {
461 			/* found */
462 			break;
463 		}
464 		hash_idx = (hash_idx + hash_inc) % hash_size;
465 	}
466 
467 	/* msgstrlen should include a null termination */
468 	if (hash_val <= num_of_str) {
469 		*msgstrlen = SWAP(gmnp, msgstr_tbl[idx].len) + 1;
470 		msg = base + SWAP(gmnp, msgstr_tbl[idx].offset);
471 		*midx = idx;
472 	} else {
473 		*msgstrlen = gmnp->d_msg[MSGSTR][idx].len + 1;
474 		msg = gmnp->mchunk + gmnp->d_msg[MSGSTR][idx].offset;
475 		*midx = idx + num_of_str;
476 	}
477 
478 	return (msg);
479 }
480 
481 /*
482  * do_conv
483  *
484  * Converts the specified string from the src encoding
485  * to the dst encoding by calling iconv()
486  */
487 static uint32_t *
488 do_conv(iconv_t fd, const char *src, uint32_t srclen)
489 {
490 	uint32_t	tolen;
491 	uint32_t	*ptr, *optr;
492 	size_t	oleft, ileft, bufsize, memincr;
493 	char	*to, *tptr;
494 
495 #ifdef GETTEXT_DEBUG
496 	gprintf(0, "*************** do_conv("
497 	    "0x%p, \"%s\", %d)\n",
498 	    (void *)fd, src ? src : "(null)", srclen);
499 #endif
500 
501 	memincr = srclen * 2;
502 	bufsize = memincr;
503 	ileft = srclen;
504 	oleft = bufsize;
505 	ptr = malloc(bufsize + sizeof (uint32_t));
506 	if (ptr == NULL) {
507 		return (NULL);
508 	}
509 	to = (char *)(ptr + 1);
510 
511 	for (;;) {
512 		tptr = to;
513 		errno = 0;
514 #ifdef GETTEXT_DEBUG
515 		gprintf(0, "******* calling iconv()\n");
516 #endif
517 		if (iconv(fd, &src, &ileft, &tptr, &oleft) == (size_t)-1) {
518 			if (errno == E2BIG) {
519 #ifdef GETTEXT_DEBUG
520 				gprintf(0, "******* iconv detected E2BIG\n");
521 				gprintf(0, "old bufsize: %u\n", bufsize);
522 #endif
523 
524 				optr = realloc(ptr,
525 				    bufsize + memincr + sizeof (uint32_t));
526 				if (optr == NULL) {
527 					free(ptr);
528 					return (NULL);
529 				}
530 				ptr = optr;
531 				to = (char *)(optr + 1);
532 				to += bufsize - oleft;
533 				oleft += memincr;
534 				bufsize += memincr;
535 #ifdef GETTEXT_DEBUG
536 				gprintf(0, "new bufsize: %u\n", bufsize);
537 #endif
538 				continue;
539 			} else {
540 				tolen = (uint32_t)(bufsize - oleft);
541 				break;
542 			}
543 		}
544 		tolen = (uint32_t)(bufsize - oleft);
545 		break;
546 	}
547 
548 	if (tolen < bufsize) {
549 		/* shrink the buffer */
550 		optr = realloc(ptr, tolen + sizeof (uint32_t));
551 		if (optr == NULL) {
552 			free(ptr);
553 			return (NULL);
554 		}
555 		ptr = optr;
556 	}
557 	*ptr = tolen;
558 
559 #ifdef GETTEXT_DEBUG
560 	gprintf(0, "******* exiting do_conv()\n");
561 	gprintf(0, "tolen: %u\n", *ptr);
562 	gprintf(0, "return: 0x%p\n", ptr);
563 #endif
564 	return (ptr);
565 }
566 
567 /*
568  * conv_msg
569  */
570 static char *
571 conv_msg(Msg_g_node *gmnp, char *msgstr, uint32_t msgstr_len, uint32_t midx,
572     struct msg_pack *mp)
573 {
574 	uint32_t	*conv_dst;
575 	size_t	num_of_conv, conv_msgstr_len;
576 	char	*conv_msgstr, *result;
577 
578 	if (gmnp->conv_msgstr == NULL) {
579 		num_of_conv = gmnp->num_of_str + gmnp->num_of_d_str;
580 		gmnp->conv_msgstr =
581 		    calloc((size_t)num_of_conv, sizeof (uint32_t *));
582 		if (gmnp->conv_msgstr == NULL) {
583 			/* malloc failed */
584 			result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
585 			return (result);
586 		}
587 	}
588 
589 	conv_dst = do_conv(gmnp->fd, (const char *)msgstr, msgstr_len);
590 
591 	if (conv_dst == NULL) {
592 		result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
593 		return (result);
594 	}
595 	conv_msgstr_len = *conv_dst;
596 	gmnp->conv_msgstr[midx] = conv_dst;
597 	conv_msgstr = (char *)(conv_dst + 1);
598 	result = dfltmsgstr(gmnp, conv_msgstr, conv_msgstr_len, mp);
599 	return (result);
600 }
601 
602 /*
603  * gnu_key_2_text
604  *
605  * Extracts msgstr from the GNU MO file
606  */
607 char *
608 gnu_key_2_text(Msg_g_node *gmnp, const char *codeset,
609     struct msg_pack *mp)
610 {
611 	uint32_t	msgstr_len, midx;
612 	iconv_t	fd;
613 	char	*result, *msgstr;
614 	int	ret, conversion, new_encoding;
615 
616 #ifdef GETTEXT_DEBUG
617 	gprintf(0, "*************** gnu_key_2_text("
618 	    "0x%p, \"%s\", 0x%p)\n",
619 	    (void *)gmnp, codeset ? codeset : "(null)", (void *)mp);
620 	printgnumsg(gmnp, 1);
621 	printmp(mp, 1);
622 #endif
623 
624 	/* first checks if header entry has been processed */
625 	if (!(gmnp->flag & ST_CHK)) {
626 		char	*msg_header;
627 
628 		msg_header = gnu_msgsearch(gmnp, "", &msgstr_len, &midx);
629 		ret = parse_header((const char *)msg_header, gmnp);
630 		if (ret == -1) {
631 			/* fatal error */
632 			DFLTMSG(result, mp->msgid1, mp->msgid2,
633 			    mp->n, mp->plural);
634 			return (result);
635 		}
636 		gmnp->flag |= ST_CHK;
637 	}
638 	msgstr = gnu_msgsearch(gmnp, mp->msgid1, &msgstr_len, &midx);
639 	if (msgstr == mp->msgid1) {
640 		/* not found */
641 		DFLTMSG(result, mp->msgid1, mp->msgid2, mp->n, mp->plural);
642 		return (result);
643 	}
644 
645 #ifdef GETTEXT_DEBUG
646 	printgnumsg(gmnp, 1);
647 #endif
648 	if (gmnp->dst_encoding == NULL) {
649 		/*
650 		 * destination encoding has not been set.
651 		 */
652 		char	*dupcodeset = strdup(codeset);
653 		if (dupcodeset == NULL) {
654 			/* strdup failed */
655 			result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
656 			return (result);
657 		}
658 		gmnp->dst_encoding = dupcodeset;
659 
660 		if (strcmp(gmnp->dst_encoding, gmnp->src_encoding) == 0) {
661 			/*
662 			 * target encoding and src encoding
663 			 * are the same.
664 			 * No conversion required.
665 			 */
666 			conversion = 0;
667 		} else {
668 			/*
669 			 * target encoding is different from
670 			 * src encoding.
671 			 * New conversion required.
672 			 */
673 			/* sanity check */
674 			if (gmnp->fd && (gmnp->fd != (iconv_t)-1)) {
675 				(void) iconv_close(gmnp->fd);
676 				gmnp->fd = (iconv_t)-1;
677 			}
678 			if (gmnp->conv_msgstr)
679 				free_conv_msgstr(gmnp, 0);
680 			conversion = 1;
681 			new_encoding = 1;
682 		}
683 	} else {
684 		/*
685 		 * dst encoding has been already set.
686 		 */
687 		if (strcmp(gmnp->dst_encoding, codeset) == 0) {
688 			/*
689 			 * dst encoding and target encoding are the same.
690 			 */
691 			if (strcmp(gmnp->dst_encoding, gmnp->src_encoding)
692 			    == 0) {
693 				/*
694 				 * dst encoding and src encoding are the same.
695 				 * No conversion required.
696 				 */
697 				conversion = 0;
698 			} else {
699 				/*
700 				 * dst encoding is different from src encoding.
701 				 * current conversion is valid.
702 				 */
703 				conversion = 1;
704 				new_encoding = 0;
705 				/* checks if iconv_open has succeeded before */
706 				if (gmnp->fd == (iconv_t)-1) {
707 					/*
708 					 * iconv_open should have failed before
709 					 * Assume this conversion is invalid
710 					 */
711 					conversion = 0;
712 				} else {
713 					if (gmnp->conv_msgstr == NULL) {
714 						/*
715 						 * memory allocation for
716 						 * conv_msgstr should
717 						 * have failed before.
718 						 */
719 						new_encoding = 1;
720 						if (gmnp->fd)
721 							(void) iconv_close(
722 							    gmnp->fd);
723 						gmnp->fd = (iconv_t)-1;
724 					}
725 				}
726 			}
727 		} else {
728 			/*
729 			 * dst encoding is different from target encoding.
730 			 * It has changed since before.
731 			 */
732 			char	*dupcodeset = strdup(codeset);
733 			if (dupcodeset == NULL) {
734 				result = dfltmsgstr(gmnp, msgstr,
735 				    msgstr_len, mp);
736 				return (result);
737 			}
738 			free(gmnp->dst_encoding);
739 			gmnp->dst_encoding = dupcodeset;
740 			if (strcmp(gmnp->dst_encoding, gmnp->src_encoding)
741 			    == 0) {
742 				/*
743 				 * dst encoding and src encoding are the same.
744 				 * now, no conversion required.
745 				 */
746 				conversion = 0;
747 				if (gmnp->conv_msgstr)
748 					free_conv_msgstr(gmnp, 1);
749 			} else {
750 				/*
751 				 * dst encoding is different from src encoding.
752 				 * new conversion required.
753 				 */
754 				conversion = 1;
755 				new_encoding = 1;
756 				if (gmnp->conv_msgstr)
757 					free_conv_msgstr(gmnp, 0);
758 			}
759 
760 			if (gmnp->fd && (gmnp->fd != (iconv_t)-1)) {
761 				(void) iconv_close(gmnp->fd);
762 			}
763 			if (gmnp->fd != (iconv_t)-1) {
764 				gmnp->fd = (iconv_t)-1;
765 			}
766 		}
767 	}
768 
769 	if (conversion == 0) {
770 		/* no conversion */
771 		result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
772 		return (result);
773 	}
774 	/* conversion required */
775 
776 	if (new_encoding == 0) {
777 		/* dst codeset hasn't been changed since before */
778 		uint32_t	*cmsg;
779 		uint32_t	conv_msgstr_len;
780 		char	*conv_msgstr;
781 
782 		if (gmnp->conv_msgstr[midx] == NULL) {
783 			/* this msgstr hasn't been converted yet */
784 			result = conv_msg(gmnp, msgstr, msgstr_len, midx, mp);
785 			return (result);
786 		}
787 		/* this msgstr is in the conversion cache */
788 		cmsg = (uint32_t *)(uintptr_t)gmnp->conv_msgstr[midx];
789 		conv_msgstr_len = *cmsg;
790 		conv_msgstr = (char *)(cmsg + 1);
791 		result = dfltmsgstr(gmnp, conv_msgstr, conv_msgstr_len, mp);
792 		return (result);
793 	}
794 	/* new conversion */
795 #ifdef GETTEXT_DEBUG
796 	gprintf(0, "******* calling iconv_open()\n");
797 	gprintf(0, "      dst: \"%s\", src: \"%s\"\n",
798 	    gmnp->dst_encoding, gmnp->src_encoding);
799 #endif
800 	fd = iconv_open(gmnp->dst_encoding, gmnp->src_encoding);
801 	gmnp->fd = fd;
802 	if (fd == (iconv_t)-1) {
803 		/*
804 		 * iconv_open() failed.
805 		 * no conversion
806 		 */
807 		result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
808 		return (result);
809 	}
810 	result = conv_msg(gmnp, msgstr, msgstr_len, midx, mp);
811 	return (result);
812 }
813 
814 
815 #define	PRI_STR(x, n)	PRI##x##n
816 #define	PRI_LEN(x, n)	(char)(sizeof (PRI_STR(x, n)) - 1)
817 #define	PRIS(P, x)	{\
818 /* x/N/ */	P(x, 8), P(x, 16), P(x, 32), P(x, 64), \
819 /* xLEAST/N/ */	P(x, LEAST8), P(x, LEAST16), P(x, LEAST32), P(x, LEAST64), \
820 /* xFAST/N/ */	P(x, FAST8), P(x, FAST16), P(x, FAST32), P(x, FAST64), \
821 /* xMAX,PTR */	P(x, MAX), P(x, PTR) \
822 }
823 
824 #define	PRI_BIAS_LEAST	4
825 #define	PRI_BIAS_FAST	8
826 #define	PRI_BIAS_MAX	12
827 #define	PRI_BIAS_PTR	13
828 
829 static const char	*pri_d[] = PRIS(PRI_STR, d);
830 static const char	*pri_i[] = PRIS(PRI_STR, i);
831 static const char	*pri_o[] = PRIS(PRI_STR, o);
832 static const char	*pri_u[] = PRIS(PRI_STR, u);
833 static const char	*pri_x[] = PRIS(PRI_STR, x);
834 static const char	*pri_X[] = PRIS(PRI_STR, X);
835 
836 static const char	pri_d_len[] = PRIS(PRI_LEN, d);
837 static const char	pri_i_len[] = PRIS(PRI_LEN, i);
838 static const char	pri_o_len[] = PRIS(PRI_LEN, o);
839 static const char	pri_u_len[] = PRIS(PRI_LEN, u);
840 static const char	pri_x_len[] = PRIS(PRI_LEN, x);
841 static const char	pri_X_len[] = PRIS(PRI_LEN, X);
842 
843 static struct {
844 	const char	type;
845 	const char	**str_table;
846 	const char	*len_table;
847 } pri_table[] = {
848 	{'d', pri_d, pri_d_len}, {'i', pri_i, pri_i_len},
849 	{'o', pri_o, pri_o_len}, {'u', pri_u, pri_u_len},
850 	{'x', pri_x, pri_x_len}, {'X', pri_X, pri_X_len},
851 };
852 
853 static struct {
854 	const char	*name;
855 	const char	nlen;
856 	const char	want_digits;
857 	const char	bias;
858 } special_table[] = {
859 	{"LEAST",	5, 1, PRI_BIAS_LEAST},
860 	{"FAST",	4, 1, PRI_BIAS_FAST},
861 	{"MAX",		3, 0, PRI_BIAS_MAX},
862 	{"PTR",		3, 0, PRI_BIAS_PTR},
863 };
864 
865 /*
866  * conv_macro() returns the conversion specifier corresponding
867  * to the macro name specified in 'name'.  'len' contains the
868  * length of the macro name including the null termination.
869  * '*elen' will be set to the length of the returning conversion
870  * specifier without the null termination.
871  */
872 static const char *
873 conv_macro(const char *str, uint32_t len, uint32_t *lenp)
874 {
875 	const char	**tbl;
876 	const char	*ltbl;
877 	char	*next;
878 	int	n, i, num, bias, idx, want_digits;
879 
880 	if (len == 2) {
881 		if (*str == 'I') {
882 			/* Solaris does not support %I */
883 			*lenp = 0;
884 			return ("");
885 		}
886 		return (NULL);
887 	}
888 
889 	if (len <= 4 || strncmp(str, "PRI", 3) != 0)
890 		return (NULL);
891 
892 	str += 3;
893 
894 	n = sizeof (pri_table) / sizeof (pri_table[0]);
895 	for (i = 0; i < n; i++) {
896 		if (pri_table[i].type == *str)
897 			break;
898 	}
899 	if (i == n)
900 		return (NULL);
901 	tbl = pri_table[i].str_table;
902 	ltbl = pri_table[i].len_table;
903 
904 	str++;
905 	idx = want_digits = 0;
906 
907 	if (isdigit((unsigned char)*str)) {
908 		/* PRIx/N/ */
909 		bias = 0;
910 		want_digits = 1;
911 	} else {
912 		n = sizeof (special_table) / sizeof (special_table[0]);
913 		for (i = 0; i < n; i++) {
914 			if (strncmp(special_table[i].name,
915 			    str, special_table[i].nlen) == 0) {
916 				break;
917 			}
918 		}
919 		if (i == n)
920 			return (NULL);
921 		bias = special_table[i].bias;
922 		want_digits = special_table[i].want_digits;
923 		str += special_table[i].nlen;
924 	}
925 
926 	if (want_digits) {
927 		if (!isdigit((unsigned char)*str))
928 			return (NULL);
929 		num = strtol(str, &next, 10);
930 		/* see if it is 8/16/32/64 */
931 		for (n = 8, idx = 0; idx < 4; idx++, n *= 2) {
932 			if (n == num)
933 				break;
934 		}
935 		if (idx == 4)
936 			return (NULL);
937 		str = next;
938 	}
939 	if (*str != '\0') {
940 		/* unknow format */
941 		return (NULL);
942 	}
943 
944 	*lenp = (uint32_t)ltbl[bias + idx];
945 	return (tbl[bias + idx]);
946 }
947 
948 static gnu_d_macro_t *
949 expand_macros(Msg_g_node *p)
950 {
951 	char	*base = (char *)p->msg_file_info;
952 	struct gnu_msg_rev1_info	*rev1_header = p->rev1_header;
953 	struct gnu_msg_ent	*d_macro_tbl;
954 	gnu_d_macro_t	*d_macro;
955 	uint32_t	num_of_d_macro, e_maclen, maclen, i;
956 	const char	*e_macname;
957 	char	*macname;
958 
959 	/* number of the dynamic macros */
960 	num_of_d_macro = SWAP(p, rev1_header->num_of_dynamic_macro);
961 
962 	d_macro = malloc((size_t)num_of_d_macro * sizeof (gnu_d_macro_t));
963 	if (d_macro == NULL)
964 		return (NULL);
965 
966 	/* pointer to the dynamic strings table */
967 	d_macro_tbl = (struct gnu_msg_ent *)(uintptr_t)
968 	    (base + SWAP(p, rev1_header->off_dynamic_macro));
969 
970 	for (i = 0; i < num_of_d_macro; i++) {
971 		macname = base + SWAP(p, d_macro_tbl[i].offset);
972 		maclen = SWAP(p, d_macro_tbl[i].len);
973 
974 		/*
975 		 * sanity check
976 		 * maclen includes a null termination.
977 		 */
978 		if (maclen != strlen(macname) + 1) {
979 			free(d_macro);
980 			return (NULL);
981 		}
982 		e_macname = conv_macro(macname, maclen, &e_maclen);
983 		if (e_macname == NULL) {
984 			free(d_macro);
985 			return (NULL);
986 		}
987 		d_macro[i].len = e_maclen;
988 		d_macro[i].ptr = e_macname;
989 	}
990 
991 	return (d_macro);
992 }
993 
994 static char *
995 expand_dynamic_message(Msg_g_node *p, struct gnu_msg_ent **e_msgs)
996 {
997 
998 	char	*base = (char *)p->msg_file_info;
999 	struct gnu_msg_rev1_info	*rev1_header = p->rev1_header;
1000 	struct gnu_dynamic_tbl	*d_info;
1001 	struct gnu_dynamic_ent	*entry;
1002 	gnu_d_macro_t	*d_macro;
1003 	uint32_t	num_of_d_str, mlen, dlen, didx, i, j;
1004 	uint32_t	off_d_tbl;
1005 	uint32_t	*d_msg_off_tbl;
1006 	size_t	mchunk_size, used, need;
1007 	char	*mchunk, *msg;
1008 
1009 #define	MEM_INCR	(1024)
1010 
1011 	d_macro = expand_macros(p);
1012 	if (d_macro == NULL)
1013 		return (NULL);
1014 
1015 	/* number of dynamic messages */
1016 	num_of_d_str = p->num_of_d_str;
1017 
1018 	mchunk = NULL;
1019 	mchunk_size = 0;	/* size of the allocated memory in mchunk */
1020 	used = 0;		/* size of the used memory in mchunk */
1021 	for (i = MSGID; i <= MSGSTR; i++) {
1022 		/* pointer to the offset table of dynamic msgids/msgstrs */
1023 		off_d_tbl = SWAP(p,
1024 		    i == MSGID ? rev1_header->off_dynamic_msgid_tbl :
1025 		    rev1_header->off_dynamic_msgstr_tbl);
1026 		/* pointer to the dynamic msgids/msgstrs */
1027 		d_msg_off_tbl = (uint32_t *)(uintptr_t)(base + off_d_tbl);
1028 		for (j = 0; j < num_of_d_str; j++) {
1029 			e_msgs[i][j].offset = used;
1030 			d_info = (struct gnu_dynamic_tbl *)(uintptr_t)
1031 			    (base + SWAP(p, d_msg_off_tbl[j]));
1032 			entry = d_info->entry;
1033 			msg = base + SWAP(p, d_info->offset);
1034 
1035 			for (;;) {
1036 				mlen = SWAP(p, entry->len);
1037 				didx = SWAP(p, entry->idx);
1038 				dlen = (didx == NOMORE_DYNAMIC_MACRO) ? 0 :
1039 				    d_macro[didx].len;
1040 				need = used + mlen + dlen;
1041 				if (need >= mchunk_size) {
1042 					char	*t;
1043 					size_t	n = mchunk_size;
1044 					do {
1045 						n += MEM_INCR;
1046 					} while (n <= need);
1047 					t = realloc(mchunk, n);
1048 					if (t == NULL) {
1049 						free(d_macro);
1050 						free(mchunk);
1051 						return (NULL);
1052 					}
1053 					mchunk = t;
1054 					mchunk_size = n;
1055 				}
1056 				(void) memcpy(mchunk + used, msg, (size_t)mlen);
1057 				msg += mlen;
1058 				used += mlen;
1059 
1060 				if (didx == NOMORE_DYNAMIC_MACRO) {
1061 					/*
1062 					 * Last segment of a static
1063 					 * msg string contains a null
1064 					 * termination, so an explicit
1065 					 * null termination is not required
1066 					 * here.
1067 					 */
1068 					break;
1069 				}
1070 				(void) memcpy(mchunk + used,
1071 				    d_macro[didx].ptr, (size_t)dlen);
1072 				used += dlen;
1073 				entry++; /* to next entry */
1074 			}
1075 			/*
1076 			 * e_msgs[][].len does not include a null termination
1077 			 */
1078 			e_msgs[i][j].len = used - e_msgs[i][j].offset - 1;
1079 		}
1080 	}
1081 
1082 	free(d_macro);
1083 
1084 	/* shrink mchunk to 'used' */
1085 	{
1086 		char	*t;
1087 		t = realloc(mchunk, used);
1088 		if (t == NULL) {
1089 			free(mchunk);
1090 			return (NULL);
1091 		}
1092 		mchunk = t;
1093 	}
1094 
1095 	return (mchunk);
1096 }
1097 
1098 static int
1099 build_rev1_info(Msg_g_node *p)
1100 {
1101 	uint32_t	*d_hash;
1102 	uint32_t	num_of_d_str, num_of_str;
1103 	uint32_t	idx, hash_value, hash_size;
1104 	size_t	hash_mem_size;
1105 	size_t	d_msgid_size, d_msgstr_size;
1106 	char	*chunk, *mchunk;
1107 	int	i;
1108 
1109 #ifdef GETTEXT_DEBUG
1110 	gprintf(0, "******* entering build_rev1_info(0x%p)\n", p);
1111 	printgnumsg(p, 1);
1112 #endif
1113 
1114 	if (p->hash_table == NULL) {
1115 		/* Revision 1 always requires the hash table */
1116 		return (-1);
1117 	}
1118 
1119 	num_of_str = p->num_of_str;
1120 	hash_size = p->hash_size;
1121 	num_of_d_str = p->num_of_d_str;
1122 
1123 	hash_mem_size = hash_size * sizeof (uint32_t);
1124 	ROUND(hash_mem_size, sizeof (struct gnu_msg_ent));
1125 
1126 	d_msgid_size = num_of_d_str * sizeof (struct gnu_msg_ent);
1127 	d_msgstr_size = num_of_d_str * sizeof (struct gnu_msg_ent);
1128 
1129 	chunk = malloc(hash_mem_size + d_msgid_size + d_msgstr_size);
1130 	if (chunk == NULL) {
1131 		return (-1);
1132 	}
1133 
1134 	d_hash = (uint32_t *)(uintptr_t)chunk;
1135 	p->d_msg[MSGID] = (struct gnu_msg_ent *)(uintptr_t)
1136 	    (chunk + hash_mem_size);
1137 	p->d_msg[MSGSTR] = (struct gnu_msg_ent *)(uintptr_t)
1138 	    (chunk + hash_mem_size + d_msgid_size);
1139 
1140 	if ((mchunk = expand_dynamic_message(p, p->d_msg)) == NULL) {
1141 		free(chunk);
1142 		return (-1);
1143 	}
1144 
1145 	/* copy the original hash table into the dynamic hash table */
1146 	for (i = 0; i < hash_size; i++) {
1147 		d_hash[i] = SWAP(p, p->hash_table[i]);
1148 	}
1149 
1150 	/* fill in the dynamic hash table with dynamic messages */
1151 	for (i = 0; i < num_of_d_str; i++) {
1152 		hash_value = get_hashid(mchunk + p->d_msg[MSGID][i].offset,
1153 		    NULL);
1154 		idx = get_hash_index(d_hash, hash_value, hash_size);
1155 		d_hash[idx] = num_of_str + i + 1;
1156 	}
1157 
1158 	p->mchunk = mchunk;
1159 	p->hash_table = d_hash;
1160 
1161 #ifdef	GETTEXT_DEBUG
1162 	print_rev1_info(p);
1163 	gprintf(0, "******* exiting build_rev1_info()\n");
1164 	printgnumsg(p, 1);
1165 #endif
1166 
1167 	return (0);
1168 }
1169 
1170 /*
1171  * gnu_setmsg
1172  *
1173  * INPUT
1174  *   mnp  - message node
1175  *   addr - address to the mmapped file
1176  *   size - size of the file
1177  *
1178  * RETURN
1179  *   0   - either T_GNU_MO or T_ILL_MO has been set
1180  *  -1   - failed
1181  */
1182 int
1183 gnu_setmsg(Msg_node *mnp, char *addr, size_t size)
1184 {
1185 	struct gnu_msg_info	*gnu_header;
1186 	Msg_g_node	*p;
1187 
1188 #ifdef GETTEXT_DEBUG
1189 	gprintf(0, "******** entering gnu_setmsg(0x%p, 0x%p, %lu)\n",
1190 	    (void *)mnp, addr, size);
1191 	printmnp(mnp, 1);
1192 #endif
1193 
1194 	/* checks the GNU MAGIC number */
1195 	if (size < sizeof (struct gnu_msg_info)) {
1196 		/* invalid mo file */
1197 		mnp->type = T_ILL_MO;
1198 #ifdef	GETTEXT_DEBUG
1199 		gprintf(0, "********* exiting gnu_setmsg\n");
1200 		printmnp(mnp, 1);
1201 #endif
1202 		return (0);
1203 	}
1204 
1205 	gnu_header = (struct gnu_msg_info *)(uintptr_t)addr;
1206 
1207 	p = calloc(1, sizeof (Msg_g_node));
1208 	if (p == NULL) {
1209 		return (-1);
1210 	}
1211 	p->msg_file_info = gnu_header;
1212 
1213 	if (gnu_header->magic == GNU_MAGIC) {
1214 		switch (gnu_header->revision) {
1215 		case GNU_REVISION_0_1:
1216 		case GNU_REVISION_1_1:
1217 			p->flag |= ST_REV1;
1218 			break;
1219 		}
1220 	} else if (gnu_header->magic == GNU_MAGIC_SWAPPED) {
1221 		p->flag |= ST_SWP;
1222 		switch (gnu_header->revision) {
1223 		case GNU_REVISION_0_1_SWAPPED:
1224 		case GNU_REVISION_1_1_SWAPPED:
1225 			p->flag |= ST_REV1;
1226 			break;
1227 		}
1228 	} else {
1229 		/* invalid mo file */
1230 		free(p);
1231 		mnp->type = T_ILL_MO;
1232 #ifdef	GETTEXT_DEBUG
1233 		gprintf(0, "********* exiting gnu_setmsg\n");
1234 		printmnp(mnp, 1);
1235 #endif
1236 		return (0);
1237 	}
1238 
1239 	p->fsize = size;
1240 	p->num_of_str = SWAP(p, gnu_header->num_of_str);
1241 	p->hash_size = SWAP(p, gnu_header->sz_hashtbl);
1242 	p->hash_table = p->hash_size <= 2 ? NULL :
1243 	    (uint32_t *)(uintptr_t)
1244 	    (addr + SWAP(p, gnu_header->off_hashtbl));
1245 
1246 	p->msg_tbl[MSGID] = (struct gnu_msg_ent *)(uintptr_t)
1247 	    (addr + SWAP(p, gnu_header->off_msgid_tbl));
1248 	p->msg_tbl[MSGSTR] = (struct gnu_msg_ent *)(uintptr_t)
1249 	    (addr + SWAP(p, gnu_header->off_msgstr_tbl));
1250 
1251 	if (p->flag & ST_REV1) {
1252 		/* Revision 1 */
1253 		struct gnu_msg_rev1_info	*rev1_header;
1254 
1255 		rev1_header = (struct gnu_msg_rev1_info *)
1256 		    (uintptr_t)(addr + sizeof (struct gnu_msg_info));
1257 		p->rev1_header = rev1_header;
1258 		p->num_of_d_str = SWAP(p, rev1_header->num_of_dynamic_str);
1259 		if (build_rev1_info(p) == -1) {
1260 			free(p);
1261 #ifdef GETTEXT_DEBUG
1262 			gprintf(0, "******** exiting gnu_setmsg: "
1263 			    "build_rev1_info() failed\n");
1264 #endif
1265 			return (-1);
1266 		}
1267 	}
1268 
1269 	mnp->msg.gnumsg = p;
1270 	mnp->type = T_GNU_MO;
1271 
1272 #ifdef GETTEXT_DEBUG
1273 	gprintf(0, "********* exiting gnu_setmsg\n");
1274 	printmnp(mnp, 1);
1275 #endif
1276 	return (0);
1277 }
1278 
1279 /*
1280  * get_hash_index
1281  *
1282  * Returns the index to an empty slot in the hash table
1283  * for the specified hash_value.
1284  */
1285 static uint32_t
1286 get_hash_index(uint32_t *hash_tbl, uint32_t hash_value, uint32_t hash_size)
1287 {
1288 	uint32_t	idx, inc;
1289 
1290 	idx = hash_value % hash_size;
1291 	inc = 1 + (hash_value % (hash_size - 2));
1292 
1293 	for (;;) {
1294 		if (hash_tbl[idx] == 0) {
1295 			/* found an empty slot */
1296 			return (idx);
1297 		}
1298 		idx = (idx + inc) % hash_size;
1299 	}
1300 	/* NOTREACHED */
1301 }
1302