xref: /illumos-gate/usr/src/lib/libc/port/i18n/gettext_gnu.c (revision 9c72db81a69bf1ea2a220c7cfc40eb0ef089be8c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include "lint.h"
28 #include "mtlib.h"
29 #include <ctype.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <sys/types.h>
34 #include <sys/mman.h>
35 #include <sys/param.h>
36 #include <sys/stat.h>
37 #include <thread.h>
38 #include <synch.h>
39 #include <unistd.h>
40 #include <limits.h>
41 #include <errno.h>
42 #include <inttypes.h>
43 #include "libc.h"
44 #include "msgfmt.h"
45 #include "nlspath_checks.h"
46 #include "gettext.h"
47 
48 #ifdef DEBUG
49 #include <assert.h>
50 #endif
51 
52 /* The following symbols are just for GNU binary compatibility */
53 int	_nl_msg_cat_cntr;
54 int	*_nl_domain_bindings;
55 
56 static const char	*nullstr = "";
57 
58 #define	CHARSET_MOD	"charset="
59 #define	CHARSET_LEN	(sizeof (CHARSET_MOD) - 1)
60 #define	NPLURALS_MOD	"nplurals="
61 #define	NPLURALS_LEN	(sizeof (NPLURALS_MOD) - 1)
62 #define	PLURAL_MOD	"plural="
63 #define	PLURAL_LEN	(sizeof (PLURAL_MOD) - 1)
64 
65 static uint32_t	get_hash_index(uint32_t *, uint32_t, uint32_t);
66 
67 /*
68  * free_conv_msgstr
69  *
70  * release the memory allocated for storing code-converted messages
71  *
72  * f
73  *	0:	do not free gmnp->conv_msgstr
74  *	1:	free gmnp->conv_msgstr
75  */
76 static void
77 free_conv_msgstr(Msg_g_node *gmnp, int f)
78 {
79 	uint32_t	i, num_of_conv;
80 
81 #ifdef GETTEXT_DEBUG
82 	gprintf(0, "*************** free_conv_msgstr(0x%p, %d)\n",
83 	    (void *)gmnp, f);
84 	printgnumsg(gmnp, 1);
85 #endif
86 
87 	num_of_conv = gmnp->num_of_str + gmnp->num_of_d_str;
88 	for (i = 0; i < num_of_conv; i++) {
89 		if (gmnp->conv_msgstr[i]) {
90 			free(gmnp->conv_msgstr[i]);
91 		}
92 		gmnp->conv_msgstr[i] = NULL;
93 	}
94 	if (f) {
95 		free(gmnp->conv_msgstr);
96 		gmnp->conv_msgstr = NULL;
97 	}
98 }
99 
100 /*
101  * dfltmsgstr
102  *
103  * choose an appropriate message by evaluating the plural expression,
104  * and return it.
105  */
106 static char *
107 dfltmsgstr(Msg_g_node *gmnp, const char *msgstr, uint32_t msgstr_len,
108     struct msg_pack *mp)
109 {
110 	unsigned int	pindex;
111 	size_t	len;
112 	const char	*p;
113 
114 #ifdef GETTEXT_DEBUG
115 	gprintf(0, "*************** dfltmsgstr(0x%p, \"%s\", %u, 0x%p)\n",
116 	    (void *)gmnp,
117 	    msgstr ? msgstr : "(null)", msgstr_len, (void *)mp);
118 	printgnumsg(gmnp, 1);
119 	printmp(mp, 1);
120 #endif
121 
122 	if (mp->plural) {
123 		if (gmnp->plural) {
124 			pindex = plural_eval(gmnp->plural, mp->n);
125 		} else {
126 			/*
127 			 * This mo does not have plural information.
128 			 * Using the English form.
129 			 */
130 			if (mp->n == 1)
131 				pindex = 0;
132 			else
133 				pindex = 1;
134 		}
135 #ifdef GETTEXT_DEBUG
136 		gprintf(0, "plural_eval returned: %u\n", pindex);
137 #endif
138 		if (pindex >= gmnp->nplurals) {
139 			/* should never happen */
140 			pindex = 0;
141 		}
142 		p = msgstr;
143 		for (; pindex != 0; pindex--) {
144 			len = msgstr_len - (p - msgstr);
145 			p = memchr(p, '\0', len);
146 			if (p == NULL) {
147 				/*
148 				 * null byte not found
149 				 * this should never happen
150 				 */
151 				char	*result;
152 				DFLTMSG(result, mp->msgid1, mp->msgid2,
153 				    mp->n, mp->plural);
154 				return (result);
155 			}
156 			p++;		/* skip */
157 		}
158 		return ((char *)p);
159 	}
160 
161 	return ((char *)msgstr);
162 }
163 
164 /*
165  * parse_header
166  *
167  * parse the header entry of the GNU MO file and
168  * extract the src encoding and the plural information of the MO file
169  */
170 static int
171 parse_header(const char *header, Msg_g_node *gmnp)
172 {
173 	char	*charset = NULL;
174 	char	*charset_str;
175 	size_t	len;
176 	char	*nplurals_str, *plural_str;
177 	plural_expr_t	plural;
178 	char	*p, *q;
179 	unsigned int	nplurals;
180 	int	ret;
181 
182 #ifdef GETTEXT_DEBUG
183 	gprintf(0, "*************** parse_header(\"%s\", 0x%p)\n",
184 	    header ? header : "(null)", (void *)gmnp);
185 	printgnumsg(gmnp, 1);
186 #endif
187 
188 	if (header == NULL) {
189 		gmnp->src_encoding = (char *)nullstr;
190 		gmnp->nplurals = 2;
191 		gmnp->plural = NULL;
192 #ifdef GETTEXT_DEBUG
193 		gprintf(0, "*************** exiting parse_header\n");
194 		gprintf(0, "no header\n");
195 #endif
196 
197 		return (0);
198 	}
199 
200 	charset_str = strstr(header, CHARSET_MOD);
201 	if (charset_str == NULL) {
202 		gmnp->src_encoding = (char *)nullstr;
203 	} else {
204 		p = charset_str + CHARSET_LEN;
205 		q = p;
206 		while ((*q != ' ') && (*q != '\t') &&
207 		    (*q != '\n')) {
208 			q++;
209 		}
210 		len = q - p;
211 		if (len > 0) {
212 			charset = malloc(len + 1);
213 			if (charset == NULL) {
214 				gmnp->src_encoding = (char *)nullstr;
215 				gmnp->nplurals = 2;
216 				gmnp->plural = NULL;
217 				return (-1);
218 			}
219 			(void) memcpy(charset, p, len);
220 			charset[len] = '\0';
221 			gmnp->src_encoding = charset;
222 		} else {
223 			gmnp->src_encoding = (char *)nullstr;
224 		}
225 	}
226 
227 	nplurals_str = strstr(header, NPLURALS_MOD);
228 	plural_str = strstr(header, PLURAL_MOD);
229 	if (nplurals_str == NULL || plural_str == NULL) {
230 		/* no valid plural specification */
231 		gmnp->nplurals = 2;
232 		gmnp->plural = NULL;
233 #ifdef GETTEXT_DEBUG
234 		gprintf(0, "*************** exiting parse_header\n");
235 		gprintf(0, "no plural entry\n");
236 #endif
237 		return (0);
238 	} else {
239 		p = nplurals_str + NPLURALS_LEN;
240 		while (*p && isspace((unsigned char)*p)) {
241 			p++;
242 		}
243 		nplurals = (unsigned int)strtol(p, &q, 10);
244 		if (p != q) {
245 			gmnp->nplurals = nplurals;
246 		} else {
247 			gmnp->nplurals = 2;
248 		}
249 
250 		p = plural_str + PLURAL_LEN;
251 #ifdef GETTEXT_DEBUG
252 		gprintf(0, "plural_str: \"%s\"\n", p);
253 #endif
254 
255 		ret = plural_expr(&plural, (const char *)p);
256 		if (ret == 0) {
257 			/* parse succeeded */
258 			gmnp->plural = plural;
259 #ifdef GETTEXT_DEBUG
260 		gprintf(0, "*************** exiting parse_header\n");
261 		gprintf(0, "charset: \"%s\"\n",
262 		    charset ? charset : "(null)");
263 		printexpr(plural, 1);
264 #endif
265 			return (0);
266 		} else if (ret == 1) {
267 			/* parse error */
268 			gmnp->nplurals = 2;
269 			gmnp->plural = NULL;
270 			return (0);
271 		} else {
272 			/* fatal error */
273 			if (charset)
274 				free(charset);
275 			gmnp->src_encoding = (char *)nullstr;
276 			gmnp->nplurals = 2;
277 			gmnp->plural = NULL;
278 			return (-1);
279 		}
280 	}
281 	/* NOTREACHED */
282 }
283 
284 /*
285  * handle_lang
286  *
287  * take care of the LANGUAGE specification
288  */
289 char *
290 handle_lang(struct msg_pack *mp)
291 {
292 	const char	*p, *op, *q;
293 	size_t	locale_len;
294 	char	*result;
295 	char	locale[MAXPATHLEN];
296 
297 
298 #ifdef GETTEXT_DEBUG
299 	gprintf(0, "*************** handle_lang(0x%p)\n", (void *)mp);
300 	printmp(mp, 1);
301 #endif
302 
303 	p = mp->language;
304 
305 	while (*p) {
306 		op = p;
307 		q = strchr(p, ':');
308 		if (q == NULL) {
309 			locale_len = strlen(p);
310 			p += locale_len;
311 		} else {
312 			locale_len = q - p;
313 			p += locale_len + 1;
314 		}
315 		if (locale_len >= MAXPATHLEN || locale_len == 0) {
316 			/* illegal locale name */
317 			continue;
318 		}
319 		(void) memcpy(locale, op, locale_len);
320 		locale[locale_len] = '\0';
321 		mp->locale = locale;
322 
323 #ifdef GETTEXT_DEBUG
324 		*mp->msgfile = '\0';
325 #endif
326 		if (mk_msgfile(mp) == NULL) {
327 			/* illegal locale name */
328 			continue;
329 		}
330 
331 		result = handle_mo(mp);
332 		if (mp->status & ST_GNU_MSG_FOUND)
333 			return (result);
334 
335 		if (mp->status & ST_SUN_MO_FOUND)
336 			break;
337 	}
338 
339 	/*
340 	 * no valid locale found, Sun MO found, or
341 	 * GNU MO found but no valid msg found there.
342 	 */
343 
344 	if (mp->status & ST_GNU_MO_FOUND) {
345 		/*
346 		 * GNU MO found but no valid msg found there.
347 		 * returning DFLTMSG.
348 		 */
349 		DFLTMSG(result, mp->msgid1, mp->msgid2, mp->n, mp->plural);
350 		return (result);
351 	}
352 	return (NULL);
353 }
354 
355 /*
356  * gnu_msgsearch
357  *
358  * Searchs the translation message for the specified msgid1.
359  * Hash algorithm used in this function is Open Addressing
360  * with Double Hashing:
361  * H(k, i) = (H1(k) + i * H2(k)) mod M
362  * H1(k) = hashvalue % M
363  * H2(k) = 1 + (hashvalue % (M - 2))
364  *
365  * Ref: The Art of Computer Programming Volume 3
366  * Sorting and Searching, second edition
367  * Donald E Knuth
368  */
369 static char *
370 gnu_msgsearch(Msg_g_node *gmnp, const char *msgid1,
371     uint32_t *msgstrlen, uint32_t *midx)
372 {
373 	struct gnu_msg_info	*header = gmnp->msg_file_info;
374 	struct gnu_msg_ent	*msgid_tbl, *msgstr_tbl;
375 	uint32_t	num_of_str, idx, mlen, msglen;
376 	uint32_t	hash_size, hash_val, hash_id, hash_inc, hash_idx;
377 	uint32_t	*hash_table;
378 	char	*base;
379 	char	*msg;
380 
381 #ifdef GETTEXT_DEBUG
382 	gprintf(0, "*************** gnu_msgsearch(0x%p, \"%s\", "
383 	    "0x%p, 0x%p)\n",
384 	    (void *)gmnp, msgid1, msgstrlen, midx);
385 	printgnumsg(gmnp, 1);
386 #endif
387 
388 	base = (char *)header;
389 
390 	msgid_tbl = gmnp->msg_tbl[MSGID];
391 	msgstr_tbl = gmnp->msg_tbl[MSGSTR];
392 	hash_table = gmnp->hash_table;
393 	hash_size = gmnp->hash_size;
394 	num_of_str = gmnp->num_of_str;
395 
396 	if (!(gmnp->flag & ST_REV1) &&
397 	    (hash_table == NULL || (hash_size <= 2))) {
398 		/*
399 		 * Revision 0 and
400 		 * No hash table exists or
401 		 * hash size is enough small.
402 		 */
403 		uint32_t	top, bottom;
404 		char	*msg_id_str;
405 		int	val;
406 
407 		top = 0;
408 		bottom = num_of_str;
409 		while (top < bottom) {
410 			idx = (top + bottom) / 2;
411 			msg_id_str = base +
412 			    SWAP(gmnp, msgid_tbl[idx].offset);
413 
414 			val = strcmp(msg_id_str, msgid1);
415 			if (val < 0) {
416 				top = idx + 1;
417 			} else if (val > 0) {
418 				bottom = idx;
419 			} else {
420 				*msgstrlen = (unsigned int)
421 				    SWAP(gmnp, msgstr_tbl[idx].len) + 1;
422 				*midx = idx;
423 				return (base +
424 				    SWAP(gmnp, msgstr_tbl[idx].offset));
425 			}
426 		}
427 		/* not found */
428 		return ((char *)msgid1);
429 	}
430 
431 	/* use hash table */
432 	hash_id = get_hashid(msgid1, &msglen);
433 	hash_idx = hash_id % hash_size;
434 	hash_inc = 1 + (hash_id % (hash_size - 2));
435 
436 	for (;;) {
437 		hash_val = HASH_TBL(gmnp, hash_table[hash_idx]);
438 
439 		if (hash_val == 0) {
440 			/* not found */
441 			return ((char *)msgid1);
442 		}
443 		if (hash_val <= num_of_str) {
444 			/* static message */
445 			idx = hash_val - 1;
446 			mlen = SWAP(gmnp, msgid_tbl[idx].len);
447 			msg = base + SWAP(gmnp, msgid_tbl[idx].offset);
448 		} else {
449 			if (!(gmnp->flag & ST_REV1)) {
450 				/* rev 0 does not have dynamic message */
451 				return ((char *)msgid1);
452 			}
453 			/* dynamic message */
454 			idx = hash_val - num_of_str - 1;
455 			mlen = gmnp->d_msg[MSGID][idx].len;
456 			msg = gmnp->mchunk + gmnp->d_msg[MSGID][idx].offset;
457 		}
458 		if (msglen <= mlen && strcmp(msgid1, msg) == 0) {
459 			/* found */
460 			break;
461 		}
462 		hash_idx = (hash_idx + hash_inc) % hash_size;
463 	}
464 
465 	/* msgstrlen should include a null termination */
466 	if (hash_val <= num_of_str) {
467 		*msgstrlen = SWAP(gmnp, msgstr_tbl[idx].len) + 1;
468 		msg = base + SWAP(gmnp, msgstr_tbl[idx].offset);
469 		*midx = idx;
470 	} else {
471 		*msgstrlen = gmnp->d_msg[MSGSTR][idx].len + 1;
472 		msg = gmnp->mchunk + gmnp->d_msg[MSGSTR][idx].offset;
473 		*midx = idx + num_of_str;
474 	}
475 
476 	return (msg);
477 }
478 
479 /*
480  * do_conv
481  *
482  * Converts the specified string from the src encoding
483  * to the dst encoding by calling iconv()
484  */
485 static uint32_t *
486 do_conv(iconv_t fd, const char *src, uint32_t srclen)
487 {
488 	uint32_t	tolen;
489 	uint32_t	*ptr, *optr;
490 	size_t	oleft, ileft, bufsize, memincr;
491 	char	*to, *tptr;
492 
493 #ifdef GETTEXT_DEBUG
494 	gprintf(0, "*************** do_conv("
495 	    "0x%p, \"%s\", %d)\n",
496 	    (void *)fd, src ? src : "(null)", srclen);
497 #endif
498 
499 	memincr = srclen * 2;
500 	bufsize = memincr;
501 	ileft = srclen;
502 	oleft = bufsize;
503 	ptr = malloc(bufsize + sizeof (uint32_t));
504 	if (ptr == NULL) {
505 		return (NULL);
506 	}
507 	to = (char *)(ptr + 1);
508 
509 	for (;;) {
510 		tptr = to;
511 		errno = 0;
512 #ifdef GETTEXT_DEBUG
513 		gprintf(0, "******* calling iconv()\n");
514 #endif
515 		if (iconv(fd, &src, &ileft, &tptr, &oleft) == (size_t)-1) {
516 			if (errno == E2BIG) {
517 #ifdef GETTEXT_DEBUG
518 				gprintf(0, "******* iconv detected E2BIG\n");
519 				gprintf(0, "old bufsize: %u\n", bufsize);
520 #endif
521 
522 				optr = realloc(ptr,
523 				    bufsize + memincr + sizeof (uint32_t));
524 				if (optr == NULL) {
525 					free(ptr);
526 					return (NULL);
527 				}
528 				ptr = optr;
529 				to = (char *)(optr + 1);
530 				to += bufsize - oleft;
531 				oleft += memincr;
532 				bufsize += memincr;
533 #ifdef GETTEXT_DEBUG
534 				gprintf(0, "new bufsize: %u\n", bufsize);
535 #endif
536 				continue;
537 			} else {
538 				tolen = (uint32_t)(bufsize - oleft);
539 				break;
540 			}
541 		}
542 		tolen = (uint32_t)(bufsize - oleft);
543 		break;
544 	}
545 
546 	if (tolen < bufsize) {
547 		/* shrink the buffer */
548 		optr = realloc(ptr, tolen + sizeof (uint32_t));
549 		if (optr == NULL) {
550 			free(ptr);
551 			return (NULL);
552 		}
553 		ptr = optr;
554 	}
555 	*ptr = tolen;
556 
557 #ifdef GETTEXT_DEBUG
558 	gprintf(0, "******* exiting do_conv()\n");
559 	gprintf(0, "tolen: %u\n", *ptr);
560 	gprintf(0, "return: 0x%p\n", ptr);
561 #endif
562 	return (ptr);
563 }
564 
565 /*
566  * conv_msg
567  */
568 static char *
569 conv_msg(Msg_g_node *gmnp, char *msgstr, uint32_t msgstr_len, uint32_t midx,
570     struct msg_pack *mp)
571 {
572 	uint32_t	*conv_dst;
573 	size_t	num_of_conv, conv_msgstr_len;
574 	char	*conv_msgstr, *result;
575 
576 	if (gmnp->conv_msgstr == NULL) {
577 		num_of_conv = gmnp->num_of_str + gmnp->num_of_d_str;
578 		gmnp->conv_msgstr =
579 		    calloc((size_t)num_of_conv, sizeof (uint32_t *));
580 		if (gmnp->conv_msgstr == NULL) {
581 			/* malloc failed */
582 			result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
583 			return (result);
584 		}
585 	}
586 
587 	conv_dst = do_conv(gmnp->fd, (const char *)msgstr, msgstr_len);
588 
589 	if (conv_dst == NULL) {
590 		result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
591 		return (result);
592 	}
593 	conv_msgstr_len = *conv_dst;
594 	gmnp->conv_msgstr[midx] = conv_dst;
595 	conv_msgstr = (char *)(conv_dst + 1);
596 	result = dfltmsgstr(gmnp, conv_msgstr, conv_msgstr_len, mp);
597 	return (result);
598 }
599 
600 /*
601  * gnu_key_2_text
602  *
603  * Extracts msgstr from the GNU MO file
604  */
605 char *
606 gnu_key_2_text(Msg_g_node *gmnp, const char *codeset,
607     struct msg_pack *mp)
608 {
609 	uint32_t	msgstr_len, midx;
610 	iconv_t	fd;
611 	char	*result, *msgstr;
612 	int	ret, conversion, new_encoding;
613 
614 #ifdef GETTEXT_DEBUG
615 	gprintf(0, "*************** gnu_key_2_text("
616 	    "0x%p, \"%s\", 0x%p)\n",
617 	    (void *)gmnp, codeset ? codeset : "(null)", (void *)mp);
618 	printgnumsg(gmnp, 1);
619 	printmp(mp, 1);
620 #endif
621 
622 	/* first checks if header entry has been processed */
623 	if (!(gmnp->flag & ST_CHK)) {
624 		char	*msg_header;
625 
626 		msg_header = gnu_msgsearch(gmnp, "", &msgstr_len, &midx);
627 		ret = parse_header((const char *)msg_header, gmnp);
628 		if (ret == -1) {
629 			/* fatal error */
630 			DFLTMSG(result, mp->msgid1, mp->msgid2,
631 			    mp->n, mp->plural);
632 			return (result);
633 		}
634 		gmnp->flag |= ST_CHK;
635 	}
636 	msgstr = gnu_msgsearch(gmnp, mp->msgid1, &msgstr_len, &midx);
637 	if (msgstr == mp->msgid1) {
638 		/* not found */
639 		DFLTMSG(result, mp->msgid1, mp->msgid2, mp->n, mp->plural);
640 		return (result);
641 	}
642 
643 #ifdef GETTEXT_DEBUG
644 	printgnumsg(gmnp, 1);
645 #endif
646 	if (gmnp->dst_encoding == NULL) {
647 		/*
648 		 * destination encoding has not been set.
649 		 */
650 		char	*dupcodeset = strdup(codeset);
651 		if (dupcodeset == NULL) {
652 			/* strdup failed */
653 			result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
654 			return (result);
655 		}
656 		gmnp->dst_encoding = dupcodeset;
657 
658 		if (strcmp(gmnp->dst_encoding, gmnp->src_encoding) == 0) {
659 			/*
660 			 * target encoding and src encoding
661 			 * are the same.
662 			 * No conversion required.
663 			 */
664 			conversion = 0;
665 		} else {
666 			/*
667 			 * target encoding is different from
668 			 * src encoding.
669 			 * New conversion required.
670 			 */
671 			/* sanity check */
672 			if (gmnp->fd && (gmnp->fd != (iconv_t)-1)) {
673 				(void) iconv_close(gmnp->fd);
674 				gmnp->fd = (iconv_t)-1;
675 			}
676 			if (gmnp->conv_msgstr)
677 				free_conv_msgstr(gmnp, 0);
678 			conversion = 1;
679 			new_encoding = 1;
680 		}
681 	} else {
682 		/*
683 		 * dst encoding has been already set.
684 		 */
685 		if (strcmp(gmnp->dst_encoding, codeset) == 0) {
686 			/*
687 			 * dst encoding and target encoding are the same.
688 			 */
689 			if (strcmp(gmnp->dst_encoding, gmnp->src_encoding)
690 			    == 0) {
691 				/*
692 				 * dst encoding and src encoding are the same.
693 				 * No conversion required.
694 				 */
695 				conversion = 0;
696 			} else {
697 				/*
698 				 * dst encoding is different from src encoding.
699 				 * current conversion is valid.
700 				 */
701 				conversion = 1;
702 				new_encoding = 0;
703 				/* checks if iconv_open has succeeded before */
704 				if (gmnp->fd == (iconv_t)-1) {
705 					/*
706 					 * iconv_open should have failed before
707 					 * Assume this conversion is invalid
708 					 */
709 					conversion = 0;
710 				} else {
711 					if (gmnp->conv_msgstr == NULL) {
712 						/*
713 						 * memory allocation for
714 						 * conv_msgstr should
715 						 * have failed before.
716 						 */
717 						new_encoding = 1;
718 						if (gmnp->fd)
719 							(void) iconv_close(
720 							    gmnp->fd);
721 						gmnp->fd = (iconv_t)-1;
722 					}
723 				}
724 			}
725 		} else {
726 			/*
727 			 * dst encoding is different from target encoding.
728 			 * It has changed since before.
729 			 */
730 			char	*dupcodeset = strdup(codeset);
731 			if (dupcodeset == NULL) {
732 				result = dfltmsgstr(gmnp, msgstr,
733 				    msgstr_len, mp);
734 				return (result);
735 			}
736 			free(gmnp->dst_encoding);
737 			gmnp->dst_encoding = dupcodeset;
738 			if (strcmp(gmnp->dst_encoding, gmnp->src_encoding)
739 			    == 0) {
740 				/*
741 				 * dst encoding and src encoding are the same.
742 				 * now, no conversion required.
743 				 */
744 				conversion = 0;
745 				if (gmnp->conv_msgstr)
746 					free_conv_msgstr(gmnp, 1);
747 			} else {
748 				/*
749 				 * dst encoding is different from src encoding.
750 				 * new conversion required.
751 				 */
752 				conversion = 1;
753 				new_encoding = 1;
754 				if (gmnp->conv_msgstr)
755 					free_conv_msgstr(gmnp, 0);
756 			}
757 
758 			if (gmnp->fd && (gmnp->fd != (iconv_t)-1)) {
759 				(void) iconv_close(gmnp->fd);
760 			}
761 			if (gmnp->fd != (iconv_t)-1) {
762 				gmnp->fd = (iconv_t)-1;
763 			}
764 		}
765 	}
766 
767 	if (conversion == 0) {
768 		/* no conversion */
769 		result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
770 		return (result);
771 	}
772 	/* conversion required */
773 
774 	if (new_encoding == 0) {
775 		/* dst codeset hasn't been changed since before */
776 		uint32_t	*cmsg;
777 		uint32_t	conv_msgstr_len;
778 		char	*conv_msgstr;
779 
780 		if (gmnp->conv_msgstr[midx] == NULL) {
781 			/* this msgstr hasn't been converted yet */
782 			result = conv_msg(gmnp, msgstr, msgstr_len, midx, mp);
783 			return (result);
784 		}
785 		/* this msgstr is in the conversion cache */
786 		cmsg = (uint32_t *)(uintptr_t)gmnp->conv_msgstr[midx];
787 		conv_msgstr_len = *cmsg;
788 		conv_msgstr = (char *)(cmsg + 1);
789 		result = dfltmsgstr(gmnp, conv_msgstr, conv_msgstr_len, mp);
790 		return (result);
791 	}
792 	/* new conversion */
793 #ifdef GETTEXT_DEBUG
794 	gprintf(0, "******* calling iconv_open()\n");
795 	gprintf(0, "      dst: \"%s\", src: \"%s\"\n",
796 	    gmnp->dst_encoding, gmnp->src_encoding);
797 #endif
798 	fd = iconv_open(gmnp->dst_encoding, gmnp->src_encoding);
799 	gmnp->fd = fd;
800 	if (fd == (iconv_t)-1) {
801 		/*
802 		 * iconv_open() failed.
803 		 * no conversion
804 		 */
805 		result = dfltmsgstr(gmnp, msgstr, msgstr_len, mp);
806 		return (result);
807 	}
808 	result = conv_msg(gmnp, msgstr, msgstr_len, midx, mp);
809 	return (result);
810 }
811 
812 
813 #define	PRI_STR(x, n)	PRI##x##n
814 #define	PRI_LEN(x, n)	(char)(sizeof (PRI_STR(x, n)) - 1)
815 #define	PRIS(P, x)	{\
816 /* x/N/ */	P(x, 8), P(x, 16), P(x, 32), P(x, 64), \
817 /* xLEAST/N/ */	P(x, LEAST8), P(x, LEAST16), P(x, LEAST32), P(x, LEAST64), \
818 /* xFAST/N/ */	P(x, FAST8), P(x, FAST16), P(x, FAST32), P(x, FAST64), \
819 /* xMAX,PTR */	P(x, MAX), P(x, PTR) \
820 }
821 
822 #define	PRI_BIAS_LEAST	4
823 #define	PRI_BIAS_FAST	8
824 #define	PRI_BIAS_MAX	12
825 #define	PRI_BIAS_PTR	13
826 
827 static const char	*pri_d[] = PRIS(PRI_STR, d);
828 static const char	*pri_i[] = PRIS(PRI_STR, i);
829 static const char	*pri_o[] = PRIS(PRI_STR, o);
830 static const char	*pri_u[] = PRIS(PRI_STR, u);
831 static const char	*pri_x[] = PRIS(PRI_STR, x);
832 static const char	*pri_X[] = PRIS(PRI_STR, X);
833 
834 static const char	pri_d_len[] = PRIS(PRI_LEN, d);
835 static const char	pri_i_len[] = PRIS(PRI_LEN, i);
836 static const char	pri_o_len[] = PRIS(PRI_LEN, o);
837 static const char	pri_u_len[] = PRIS(PRI_LEN, u);
838 static const char	pri_x_len[] = PRIS(PRI_LEN, x);
839 static const char	pri_X_len[] = PRIS(PRI_LEN, X);
840 
841 static struct {
842 	const char	type;
843 	const char	**str_table;
844 	const char	*len_table;
845 } pri_table[] = {
846 	{'d', pri_d, pri_d_len}, {'i', pri_i, pri_i_len},
847 	{'o', pri_o, pri_o_len}, {'u', pri_u, pri_u_len},
848 	{'x', pri_x, pri_x_len}, {'X', pri_X, pri_X_len},
849 };
850 
851 static struct {
852 	const char	*name;
853 	const char	nlen;
854 	const char	want_digits;
855 	const char	bias;
856 } special_table[] = {
857 	{"LEAST",	5, 1, PRI_BIAS_LEAST},
858 	{"FAST",	4, 1, PRI_BIAS_FAST},
859 	{"MAX",		3, 0, PRI_BIAS_MAX},
860 	{"PTR",		3, 0, PRI_BIAS_PTR},
861 };
862 
863 /*
864  * conv_macro() returns the conversion specifier corresponding
865  * to the macro name specified in 'name'.  'len' contains the
866  * length of the macro name including the null termination.
867  * '*elen' will be set to the length of the returning conversion
868  * specifier without the null termination.
869  */
870 static const char *
871 conv_macro(const char *str, uint32_t len, uint32_t *lenp)
872 {
873 	const char	**tbl;
874 	const char	*ltbl;
875 	char	*next;
876 	int	n, i, num, bias, idx, want_digits;
877 
878 	if (len == 2) {
879 		if (*str == 'I') {
880 			/* Solaris does not support %I */
881 			*lenp = 0;
882 			return ("");
883 		}
884 		return (NULL);
885 	}
886 
887 	if (len <= 4 || strncmp(str, "PRI", 3) != 0)
888 		return (NULL);
889 
890 	str += 3;
891 
892 	n = sizeof (pri_table) / sizeof (pri_table[0]);
893 	for (i = 0; i < n; i++) {
894 		if (pri_table[i].type == *str)
895 			break;
896 	}
897 	if (i == n)
898 		return (NULL);
899 	tbl = pri_table[i].str_table;
900 	ltbl = pri_table[i].len_table;
901 
902 	str++;
903 	idx = want_digits = 0;
904 
905 	if (isdigit((unsigned char)*str)) {
906 		/* PRIx/N/ */
907 		bias = 0;
908 		want_digits = 1;
909 	} else {
910 		n = sizeof (special_table) / sizeof (special_table[0]);
911 		for (i = 0; i < n; i++) {
912 			if (strncmp(special_table[i].name,
913 			    str, special_table[i].nlen) == 0) {
914 				break;
915 			}
916 		}
917 		if (i == n)
918 			return (NULL);
919 		bias = special_table[i].bias;
920 		want_digits = special_table[i].want_digits;
921 		str += special_table[i].nlen;
922 	}
923 
924 	if (want_digits) {
925 		if (!isdigit((unsigned char)*str))
926 			return (NULL);
927 		num = strtol(str, &next, 10);
928 		/* see if it is 8/16/32/64 */
929 		for (n = 8, idx = 0; idx < 4; idx++, n *= 2) {
930 			if (n == num)
931 				break;
932 		}
933 		if (idx == 4)
934 			return (NULL);
935 		str = next;
936 	}
937 	if (*str != '\0') {
938 		/* unknow format */
939 		return (NULL);
940 	}
941 
942 	*lenp = (uint32_t)ltbl[bias + idx];
943 	return (tbl[bias + idx]);
944 }
945 
946 static gnu_d_macro_t *
947 expand_macros(Msg_g_node *p)
948 {
949 	char	*base = (char *)p->msg_file_info;
950 	struct gnu_msg_rev1_info	*rev1_header = p->rev1_header;
951 	struct gnu_msg_ent	*d_macro_tbl;
952 	gnu_d_macro_t	*d_macro;
953 	uint32_t	num_of_d_macro, e_maclen, maclen, i;
954 	const char	*e_macname;
955 	char	*macname;
956 
957 	/* number of the dynamic macros */
958 	num_of_d_macro = SWAP(p, rev1_header->num_of_dynamic_macro);
959 
960 	d_macro = malloc((size_t)num_of_d_macro * sizeof (gnu_d_macro_t));
961 	if (d_macro == NULL)
962 		return (NULL);
963 
964 	/* pointer to the dynamic strings table */
965 	d_macro_tbl = (struct gnu_msg_ent *)(uintptr_t)
966 	    (base + SWAP(p, rev1_header->off_dynamic_macro));
967 
968 	for (i = 0; i < num_of_d_macro; i++) {
969 		macname = base + SWAP(p, d_macro_tbl[i].offset);
970 		maclen = SWAP(p, d_macro_tbl[i].len);
971 
972 		/*
973 		 * sanity check
974 		 * maclen includes a null termination.
975 		 */
976 		if (maclen != strlen(macname) + 1) {
977 			free(d_macro);
978 			return (NULL);
979 		}
980 		e_macname = conv_macro(macname, maclen, &e_maclen);
981 		if (e_macname == NULL) {
982 			free(d_macro);
983 			return (NULL);
984 		}
985 		d_macro[i].len = e_maclen;
986 		d_macro[i].ptr = e_macname;
987 	}
988 
989 	return (d_macro);
990 }
991 
992 static char *
993 expand_dynamic_message(Msg_g_node *p, struct gnu_msg_ent **e_msgs)
994 {
995 
996 	char	*base = (char *)p->msg_file_info;
997 	struct gnu_msg_rev1_info	*rev1_header = p->rev1_header;
998 	struct gnu_dynamic_tbl	*d_info;
999 	struct gnu_dynamic_ent	*entry;
1000 	gnu_d_macro_t	*d_macro;
1001 	uint32_t	num_of_d_str, mlen, dlen, didx, i, j;
1002 	uint32_t	off_d_tbl;
1003 	uint32_t	*d_msg_off_tbl;
1004 	size_t	mchunk_size, used, need;
1005 	char	*mchunk, *msg;
1006 
1007 #define	MEM_INCR	(1024)
1008 
1009 	d_macro = expand_macros(p);
1010 	if (d_macro == NULL)
1011 		return (NULL);
1012 
1013 	/* number of dynamic messages */
1014 	num_of_d_str = p->num_of_d_str;
1015 
1016 	mchunk = NULL;
1017 	mchunk_size = 0;	/* size of the allocated memory in mchunk */
1018 	used = 0;		/* size of the used memory in mchunk */
1019 	for (i = MSGID; i <= MSGSTR; i++) {
1020 		/* pointer to the offset table of dynamic msgids/msgstrs */
1021 		off_d_tbl = SWAP(p,
1022 		    i == MSGID ? rev1_header->off_dynamic_msgid_tbl :
1023 		    rev1_header->off_dynamic_msgstr_tbl);
1024 		/* pointer to the dynamic msgids/msgstrs */
1025 		d_msg_off_tbl = (uint32_t *)(uintptr_t)(base + off_d_tbl);
1026 		for (j = 0; j < num_of_d_str; j++) {
1027 			e_msgs[i][j].offset = used;
1028 			d_info = (struct gnu_dynamic_tbl *)(uintptr_t)
1029 			    (base + SWAP(p, d_msg_off_tbl[j]));
1030 			entry = d_info->entry;
1031 			msg = base + SWAP(p, d_info->offset);
1032 
1033 			for (;;) {
1034 				mlen = SWAP(p, entry->len);
1035 				didx = SWAP(p, entry->idx);
1036 				dlen = (didx == NOMORE_DYNAMIC_MACRO) ? 0 :
1037 				    d_macro[didx].len;
1038 				need = used + mlen + dlen;
1039 				if (need >= mchunk_size) {
1040 					char	*t;
1041 					size_t	n = mchunk_size;
1042 					do {
1043 						n += MEM_INCR;
1044 					} while (n <= need);
1045 					t = realloc(mchunk, n);
1046 					if (t == NULL) {
1047 						free(d_macro);
1048 						free(mchunk);
1049 						return (NULL);
1050 					}
1051 					mchunk = t;
1052 					mchunk_size = n;
1053 				}
1054 				(void) memcpy(mchunk + used, msg, (size_t)mlen);
1055 				msg += mlen;
1056 				used += mlen;
1057 
1058 				if (didx == NOMORE_DYNAMIC_MACRO) {
1059 					/*
1060 					 * Last segment of a static
1061 					 * msg string contains a null
1062 					 * termination, so an explicit
1063 					 * null termination is not required
1064 					 * here.
1065 					 */
1066 					break;
1067 				}
1068 				(void) memcpy(mchunk + used,
1069 				    d_macro[didx].ptr, (size_t)dlen);
1070 				used += dlen;
1071 				entry++; /* to next entry */
1072 			}
1073 			/*
1074 			 * e_msgs[][].len does not include a null termination
1075 			 */
1076 			e_msgs[i][j].len = used - e_msgs[i][j].offset - 1;
1077 		}
1078 	}
1079 
1080 	free(d_macro);
1081 
1082 	/* shrink mchunk to 'used' */
1083 	{
1084 		char	*t;
1085 		t = realloc(mchunk, used);
1086 		if (t == NULL) {
1087 			free(mchunk);
1088 			return (NULL);
1089 		}
1090 		mchunk = t;
1091 	}
1092 
1093 	return (mchunk);
1094 }
1095 
1096 static int
1097 build_rev1_info(Msg_g_node *p)
1098 {
1099 	uint32_t	*d_hash;
1100 	uint32_t	num_of_d_str, num_of_str;
1101 	uint32_t	idx, hash_value, hash_size;
1102 	size_t	hash_mem_size;
1103 	size_t	d_msgid_size, d_msgstr_size;
1104 	char	*chunk, *mchunk;
1105 	int	i;
1106 
1107 #ifdef GETTEXT_DEBUG
1108 	gprintf(0, "******* entering build_rev1_info(0x%p)\n", p);
1109 	printgnumsg(p, 1);
1110 #endif
1111 
1112 	if (p->hash_table == NULL) {
1113 		/* Revision 1 always requires the hash table */
1114 		return (-1);
1115 	}
1116 
1117 	num_of_str = p->num_of_str;
1118 	hash_size = p->hash_size;
1119 	num_of_d_str = p->num_of_d_str;
1120 
1121 	hash_mem_size = hash_size * sizeof (uint32_t);
1122 	ROUND(hash_mem_size, sizeof (struct gnu_msg_ent));
1123 
1124 	d_msgid_size = num_of_d_str * sizeof (struct gnu_msg_ent);
1125 	d_msgstr_size = num_of_d_str * sizeof (struct gnu_msg_ent);
1126 
1127 	chunk = malloc(hash_mem_size + d_msgid_size + d_msgstr_size);
1128 	if (chunk == NULL) {
1129 		return (-1);
1130 	}
1131 
1132 	d_hash = (uint32_t *)(uintptr_t)chunk;
1133 	p->d_msg[MSGID] = (struct gnu_msg_ent *)(uintptr_t)
1134 	    (chunk + hash_mem_size);
1135 	p->d_msg[MSGSTR] = (struct gnu_msg_ent *)(uintptr_t)
1136 	    (chunk + hash_mem_size + d_msgid_size);
1137 
1138 	if ((mchunk = expand_dynamic_message(p, p->d_msg)) == NULL) {
1139 		free(chunk);
1140 		return (-1);
1141 	}
1142 
1143 	/* copy the original hash table into the dynamic hash table */
1144 	for (i = 0; i < hash_size; i++) {
1145 		d_hash[i] = SWAP(p, p->hash_table[i]);
1146 	}
1147 
1148 	/* fill in the dynamic hash table with dynamic messages */
1149 	for (i = 0; i < num_of_d_str; i++) {
1150 		hash_value = get_hashid(mchunk + p->d_msg[MSGID][i].offset,
1151 		    NULL);
1152 		idx = get_hash_index(d_hash, hash_value, hash_size);
1153 		d_hash[idx] = num_of_str + i + 1;
1154 	}
1155 
1156 	p->mchunk = mchunk;
1157 	p->hash_table = d_hash;
1158 
1159 #ifdef	GETTEXT_DEBUG
1160 	print_rev1_info(p);
1161 	gprintf(0, "******* exiting build_rev1_info()\n");
1162 	printgnumsg(p, 1);
1163 #endif
1164 
1165 	return (0);
1166 }
1167 
1168 /*
1169  * gnu_setmsg
1170  *
1171  * INPUT
1172  *   mnp  - message node
1173  *   addr - address to the mmapped file
1174  *   size - size of the file
1175  *
1176  * RETURN
1177  *   0   - either T_GNU_MO or T_ILL_MO has been set
1178  *  -1   - failed
1179  */
1180 int
1181 gnu_setmsg(Msg_node *mnp, char *addr, size_t size)
1182 {
1183 	struct gnu_msg_info	*gnu_header;
1184 	Msg_g_node	*p;
1185 
1186 #ifdef GETTEXT_DEBUG
1187 	gprintf(0, "******** entering gnu_setmsg(0x%p, 0x%p, %lu)\n",
1188 	    (void *)mnp, addr, size);
1189 	printmnp(mnp, 1);
1190 #endif
1191 
1192 	/* checks the GNU MAGIC number */
1193 	if (size < sizeof (struct gnu_msg_info)) {
1194 		/* invalid mo file */
1195 		mnp->type = T_ILL_MO;
1196 #ifdef	GETTEXT_DEBUG
1197 		gprintf(0, "********* exiting gnu_setmsg\n");
1198 		printmnp(mnp, 1);
1199 #endif
1200 		return (0);
1201 	}
1202 
1203 	gnu_header = (struct gnu_msg_info *)(uintptr_t)addr;
1204 
1205 	p = calloc(1, sizeof (Msg_g_node));
1206 	if (p == NULL) {
1207 		return (-1);
1208 	}
1209 	p->msg_file_info = gnu_header;
1210 
1211 	if (gnu_header->magic == GNU_MAGIC) {
1212 		switch (gnu_header->revision) {
1213 		case GNU_REVISION_0_1:
1214 		case GNU_REVISION_1_1:
1215 			p->flag |= ST_REV1;
1216 			break;
1217 		}
1218 	} else if (gnu_header->magic == GNU_MAGIC_SWAPPED) {
1219 		p->flag |= ST_SWP;
1220 		switch (gnu_header->revision) {
1221 		case GNU_REVISION_0_1_SWAPPED:
1222 		case GNU_REVISION_1_1_SWAPPED:
1223 			p->flag |= ST_REV1;
1224 			break;
1225 		}
1226 	} else {
1227 		/* invalid mo file */
1228 		free(p);
1229 		mnp->type = T_ILL_MO;
1230 #ifdef	GETTEXT_DEBUG
1231 		gprintf(0, "********* exiting gnu_setmsg\n");
1232 		printmnp(mnp, 1);
1233 #endif
1234 		return (0);
1235 	}
1236 
1237 	p->fsize = size;
1238 	p->num_of_str = SWAP(p, gnu_header->num_of_str);
1239 	p->hash_size = SWAP(p, gnu_header->sz_hashtbl);
1240 	p->hash_table = p->hash_size <= 2 ? NULL :
1241 	    (uint32_t *)(uintptr_t)
1242 	    (addr + SWAP(p, gnu_header->off_hashtbl));
1243 
1244 	p->msg_tbl[MSGID] = (struct gnu_msg_ent *)(uintptr_t)
1245 	    (addr + SWAP(p, gnu_header->off_msgid_tbl));
1246 	p->msg_tbl[MSGSTR] = (struct gnu_msg_ent *)(uintptr_t)
1247 	    (addr + SWAP(p, gnu_header->off_msgstr_tbl));
1248 
1249 	if (p->flag & ST_REV1) {
1250 		/* Revision 1 */
1251 		struct gnu_msg_rev1_info	*rev1_header;
1252 
1253 		rev1_header = (struct gnu_msg_rev1_info *)
1254 		    (uintptr_t)(addr + sizeof (struct gnu_msg_info));
1255 		p->rev1_header = rev1_header;
1256 		p->num_of_d_str = SWAP(p, rev1_header->num_of_dynamic_str);
1257 		if (build_rev1_info(p) == -1) {
1258 			free(p);
1259 #ifdef GETTEXT_DEBUG
1260 			gprintf(0, "******** exiting gnu_setmsg: "
1261 			    "build_rev1_info() failed\n");
1262 #endif
1263 			return (-1);
1264 		}
1265 	}
1266 
1267 	mnp->msg.gnumsg = p;
1268 	mnp->type = T_GNU_MO;
1269 
1270 #ifdef GETTEXT_DEBUG
1271 	gprintf(0, "********* exiting gnu_setmsg\n");
1272 	printmnp(mnp, 1);
1273 #endif
1274 	return (0);
1275 }
1276 
1277 /*
1278  * get_hash_index
1279  *
1280  * Returns the index to an empty slot in the hash table
1281  * for the specified hash_value.
1282  */
1283 static uint32_t
1284 get_hash_index(uint32_t *hash_tbl, uint32_t hash_value, uint32_t hash_size)
1285 {
1286 	uint32_t	idx, inc;
1287 
1288 	idx = hash_value % hash_size;
1289 	inc = 1 + (hash_value % (hash_size - 2));
1290 
1291 	for (;;) {
1292 		if (hash_tbl[idx] == 0) {
1293 			/* found an empty slot */
1294 			return (idx);
1295 		}
1296 		idx = (idx + inc) % hash_size;
1297 	}
1298 	/* NOTREACHED */
1299 }
1300