xref: /titanic_50/usr/src/cmd/ssh/libssh/common/g11n.c (revision b9aa66a73c9016cf5c71fe80efe90ce9f2ca5c73)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
512fcfd5aSjp161948  * Common Development and Distribution License (the "License").
612fcfd5aSjp161948  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  *
216f786aceSNobutomo Nakano  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
227c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
237c478bd9Sstevel@tonic-gate  */
247c478bd9Sstevel@tonic-gate 
257c478bd9Sstevel@tonic-gate #include <errno.h>
267c478bd9Sstevel@tonic-gate #include <locale.h>
277c478bd9Sstevel@tonic-gate #include <langinfo.h>
287c478bd9Sstevel@tonic-gate #include <iconv.h>
297c478bd9Sstevel@tonic-gate #include <ctype.h>
306f786aceSNobutomo Nakano #include <wctype.h>
317c478bd9Sstevel@tonic-gate #include <strings.h>
327c478bd9Sstevel@tonic-gate #include <string.h>
337c478bd9Sstevel@tonic-gate #include <stdio.h>
347c478bd9Sstevel@tonic-gate #include <stdlib.h>
357c478bd9Sstevel@tonic-gate #include "includes.h"
367c478bd9Sstevel@tonic-gate #include "xmalloc.h"
377c478bd9Sstevel@tonic-gate #include "xlist.h"
386f786aceSNobutomo Nakano #include "compat.h"
396f786aceSNobutomo Nakano #include "log.h"
407c478bd9Sstevel@tonic-gate 
417c478bd9Sstevel@tonic-gate #ifdef MIN
427c478bd9Sstevel@tonic-gate #undef MIN
437c478bd9Sstevel@tonic-gate #endif /* MIN */
447c478bd9Sstevel@tonic-gate 
457c478bd9Sstevel@tonic-gate #define	MIN(x, y)	((x) < (y) ? (x) : (y))
467c478bd9Sstevel@tonic-gate 
477c478bd9Sstevel@tonic-gate #define	LOCALE_PATH	"/usr/bin/locale"
487c478bd9Sstevel@tonic-gate 
4932f863ecSjp161948 /* two-char country code, '-' and two-char region code */
5032f863ecSjp161948 #define	LANGTAG_MAX	5
517c478bd9Sstevel@tonic-gate 
527c478bd9Sstevel@tonic-gate static int locale_cmp(const void *d1, const void *d2);
537c478bd9Sstevel@tonic-gate static char *g11n_locale2langtag(char *locale);
547c478bd9Sstevel@tonic-gate 
556f786aceSNobutomo Nakano static char *do_iconv(iconv_t cd, const char *s, uint_t *lenp, char **err_str);
566f786aceSNobutomo Nakano 
576f786aceSNobutomo Nakano /*
586f786aceSNobutomo Nakano  * native_codeset records the codeset of the default system locale.
596f786aceSNobutomo Nakano  * It is used to convert the contents of file (eg /etc/issue) which is
606f786aceSNobutomo Nakano  * supposed to be in the codeset of default system locale.
616f786aceSNobutomo Nakano  */
626f786aceSNobutomo Nakano static char *native_codeset;
637c478bd9Sstevel@tonic-gate 
649a8058b5Sjp161948 /*
659a8058b5Sjp161948  * Convert locale string name into a language tag. The caller is responsible for
669a8058b5Sjp161948  * freeing the memory allocated for the result.
679a8058b5Sjp161948  */
6832f863ecSjp161948 static char *
g11n_locale2langtag(char * locale)697c478bd9Sstevel@tonic-gate g11n_locale2langtag(char *locale)
707c478bd9Sstevel@tonic-gate {
717c478bd9Sstevel@tonic-gate 	char *langtag;
727c478bd9Sstevel@tonic-gate 
737c478bd9Sstevel@tonic-gate 	/* base cases */
7432f863ecSjp161948 	if (!locale || !*locale)
7532f863ecSjp161948 		return (NULL);
767c478bd9Sstevel@tonic-gate 
7732f863ecSjp161948 	if (strcmp(locale, "POSIX") == 0 || strcmp(locale, "C") == 0)
789a8058b5Sjp161948 		return (xstrdup("i-default"));
797c478bd9Sstevel@tonic-gate 
8032f863ecSjp161948 	/* punt for language codes which are not exactly 2 letters */
817c478bd9Sstevel@tonic-gate 	if (strlen(locale) < 2 ||
827c478bd9Sstevel@tonic-gate 	    !isalpha(locale[0]) ||
837c478bd9Sstevel@tonic-gate 	    !isalpha(locale[1]) ||
847c478bd9Sstevel@tonic-gate 	    (locale[2] != '\0' &&
857c478bd9Sstevel@tonic-gate 	    locale[2] != '_' &&
867c478bd9Sstevel@tonic-gate 	    locale[2] != '.' &&
877c478bd9Sstevel@tonic-gate 	    locale[2] != '@'))
8832f863ecSjp161948 		return (NULL);
897c478bd9Sstevel@tonic-gate 
907c478bd9Sstevel@tonic-gate 
9132f863ecSjp161948 	/* we have a primary language sub-tag */
927c478bd9Sstevel@tonic-gate 	langtag = (char *)xmalloc(LANGTAG_MAX + 1);
937c478bd9Sstevel@tonic-gate 
947c478bd9Sstevel@tonic-gate 	strncpy(langtag, locale, 2);
957c478bd9Sstevel@tonic-gate 	langtag[2] = '\0';
967c478bd9Sstevel@tonic-gate 
9732f863ecSjp161948 	/* do we have country sub-tag? For example: cs_CZ */
987c478bd9Sstevel@tonic-gate 	if (locale[2] == '_') {
997c478bd9Sstevel@tonic-gate 		if (strlen(locale) < 5 ||
1007c478bd9Sstevel@tonic-gate 		    !isalpha(locale[3]) ||
1017c478bd9Sstevel@tonic-gate 		    !isalpha(locale[4]) ||
10232f863ecSjp161948 		    (locale[5] != '\0' && (locale[5] != '.' &&
10332f863ecSjp161948 		    locale[5] != '@'))) {
10432f863ecSjp161948 			return (langtag);
1057c478bd9Sstevel@tonic-gate 		}
1067c478bd9Sstevel@tonic-gate 
10732f863ecSjp161948 		/* example: create cs-CZ from cs_CZ */
10832f863ecSjp161948 		if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale, 2,
10932f863ecSjp161948 		    locale + 3) == 5)
11032f863ecSjp161948 			return (langtag);
1117c478bd9Sstevel@tonic-gate 	}
1127c478bd9Sstevel@tonic-gate 
11332f863ecSjp161948 	/* in all other cases we just use the primary language sub-tag */
11432f863ecSjp161948 	return (langtag);
1157c478bd9Sstevel@tonic-gate }
1167c478bd9Sstevel@tonic-gate 
11732f863ecSjp161948 uint_t
g11n_langtag_is_default(char * langtag)1187c478bd9Sstevel@tonic-gate g11n_langtag_is_default(char *langtag)
1197c478bd9Sstevel@tonic-gate {
1207c478bd9Sstevel@tonic-gate 	return (strcmp(langtag, "i-default") == 0);
1217c478bd9Sstevel@tonic-gate }
1227c478bd9Sstevel@tonic-gate 
1237c478bd9Sstevel@tonic-gate /*
1247c478bd9Sstevel@tonic-gate  * This lang tag / locale matching function works only for two-character
1257c478bd9Sstevel@tonic-gate  * language primary sub-tags and two-character country sub-tags.
1267c478bd9Sstevel@tonic-gate  */
12732f863ecSjp161948 uint_t
g11n_langtag_matches_locale(char * langtag,char * locale)1287c478bd9Sstevel@tonic-gate g11n_langtag_matches_locale(char *langtag, char *locale)
1297c478bd9Sstevel@tonic-gate {
13032f863ecSjp161948 	/* match "i-default" to the process' current locale if possible */
1317c478bd9Sstevel@tonic-gate 	if (g11n_langtag_is_default(langtag)) {
1327c478bd9Sstevel@tonic-gate 		if (strcasecmp(locale, "POSIX") == 0 ||
1337c478bd9Sstevel@tonic-gate 		    strcasecmp(locale, "C") == 0)
13432f863ecSjp161948 			return (1);
1357c478bd9Sstevel@tonic-gate 		else
13632f863ecSjp161948 			return (0);
1377c478bd9Sstevel@tonic-gate 	}
1387c478bd9Sstevel@tonic-gate 
13932f863ecSjp161948 	/*
14032f863ecSjp161948 	 * locale must be at least 2 chars long and the lang part must be
14132f863ecSjp161948 	 * exactly two characters
14232f863ecSjp161948 	 */
1437c478bd9Sstevel@tonic-gate 	if (strlen(locale) < 2 ||
1447c478bd9Sstevel@tonic-gate 	    (!isalpha(locale[0]) || !isalpha(locale[1]) ||
14532f863ecSjp161948 	    (locale[2] != '\0' && locale[2] != '_' &&
14632f863ecSjp161948 	    locale[2] != '.' && locale[2] != '@')))
14732f863ecSjp161948 		return (0);
1487c478bd9Sstevel@tonic-gate 
1497c478bd9Sstevel@tonic-gate 	/* same thing with the langtag */
1507c478bd9Sstevel@tonic-gate 	if (strlen(langtag) < 2 ||
1517c478bd9Sstevel@tonic-gate 	    (!isalpha(langtag[0]) || !isalpha(langtag[1]) ||
1527c478bd9Sstevel@tonic-gate 	    (langtag[2] != '\0' && langtag[2] != '-')))
15332f863ecSjp161948 		return (0);
1547c478bd9Sstevel@tonic-gate 
1557c478bd9Sstevel@tonic-gate 	/* primary language sub-tag and the locale's language part must match */
1567c478bd9Sstevel@tonic-gate 	if (strncasecmp(langtag, locale, 2) != 0)
15732f863ecSjp161948 		return (0);
1587c478bd9Sstevel@tonic-gate 
15932f863ecSjp161948 	/*
16032f863ecSjp161948 	 * primary language sub-tag and the locale's language match, now
16132f863ecSjp161948 	 * fuzzy check country part
16232f863ecSjp161948 	 */
1637c478bd9Sstevel@tonic-gate 
1647c478bd9Sstevel@tonic-gate 	/* neither langtag nor locale have more than one component */
1657c478bd9Sstevel@tonic-gate 	if (langtag[2] == '\0' &&
1667c478bd9Sstevel@tonic-gate 	    (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@'))
16732f863ecSjp161948 		return (2);
1687c478bd9Sstevel@tonic-gate 
1697c478bd9Sstevel@tonic-gate 	/* langtag has only one sub-tag... */
1707c478bd9Sstevel@tonic-gate 	if (langtag[2] == '\0')
17132f863ecSjp161948 		return (1);
1727c478bd9Sstevel@tonic-gate 
1737c478bd9Sstevel@tonic-gate 	/* locale has no country code... */
1747c478bd9Sstevel@tonic-gate 	if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')
17532f863ecSjp161948 		return (1);
1767c478bd9Sstevel@tonic-gate 
1777c478bd9Sstevel@tonic-gate 	/* langtag has more than one subtag and the locale has a country code */
1787c478bd9Sstevel@tonic-gate 
1797c478bd9Sstevel@tonic-gate 	/* ignore second subtag if not two chars */
1807c478bd9Sstevel@tonic-gate 	if (strlen(langtag) < 5)
18132f863ecSjp161948 		return (1);
1827c478bd9Sstevel@tonic-gate 
1837c478bd9Sstevel@tonic-gate 	if (!isalpha(langtag[3]) || !isalpha(langtag[4]) ||
1847c478bd9Sstevel@tonic-gate 	    (langtag[5] != '\0' && langtag[5] != '-'))
18532f863ecSjp161948 		return (1);
1867c478bd9Sstevel@tonic-gate 
1877c478bd9Sstevel@tonic-gate 	/* ignore rest of locale if there is no two-character country part */
1887c478bd9Sstevel@tonic-gate 	if (strlen(locale) < 5)
18932f863ecSjp161948 		return (1);
1907c478bd9Sstevel@tonic-gate 
1917c478bd9Sstevel@tonic-gate 	if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) ||
1927c478bd9Sstevel@tonic-gate 	    (locale[5] != '\0' && locale[5] != '.' && locale[5] != '@'))
19332f863ecSjp161948 		return (1);
1947c478bd9Sstevel@tonic-gate 
1957c478bd9Sstevel@tonic-gate 	/* if the country part matches, return 2 */
1967c478bd9Sstevel@tonic-gate 	if (strncasecmp(&langtag[3], &locale[3], 2) == 0)
19732f863ecSjp161948 		return (2);
1987c478bd9Sstevel@tonic-gate 
19932f863ecSjp161948 	return (1);
2007c478bd9Sstevel@tonic-gate }
2017c478bd9Sstevel@tonic-gate 
2027c478bd9Sstevel@tonic-gate char *
g11n_getlocale()2037c478bd9Sstevel@tonic-gate g11n_getlocale()
2047c478bd9Sstevel@tonic-gate {
20532f863ecSjp161948 	/* we have one text domain - always set it */
2067c478bd9Sstevel@tonic-gate 	(void) textdomain(TEXT_DOMAIN);
2077c478bd9Sstevel@tonic-gate 
20832f863ecSjp161948 	/* if the locale is not set, set it from the env vars */
20912fcfd5aSjp161948 	if (!setlocale(LC_MESSAGES, NULL))
21012fcfd5aSjp161948 		(void) setlocale(LC_MESSAGES, "");
2117c478bd9Sstevel@tonic-gate 
21232f863ecSjp161948 	return (setlocale(LC_MESSAGES, NULL));
2137c478bd9Sstevel@tonic-gate }
2147c478bd9Sstevel@tonic-gate 
2157c478bd9Sstevel@tonic-gate void
g11n_setlocale(int category,const char * locale)2167c478bd9Sstevel@tonic-gate g11n_setlocale(int category, const char *locale)
2177c478bd9Sstevel@tonic-gate {
2187c478bd9Sstevel@tonic-gate 	char *curr;
2197c478bd9Sstevel@tonic-gate 
2206f786aceSNobutomo Nakano 	if (native_codeset == NULL) {
2216f786aceSNobutomo Nakano 		/* set default locale, and record current codeset */
2226f786aceSNobutomo Nakano 		(void) setlocale(LC_ALL, "");
2236f786aceSNobutomo Nakano 		curr = nl_langinfo(CODESET);
2246f786aceSNobutomo Nakano 		native_codeset = xstrdup(curr);
2256f786aceSNobutomo Nakano 	}
2266f786aceSNobutomo Nakano 
22732f863ecSjp161948 	/* we have one text domain - always set it */
2287c478bd9Sstevel@tonic-gate 	(void) textdomain(TEXT_DOMAIN);
2297c478bd9Sstevel@tonic-gate 
2307c478bd9Sstevel@tonic-gate 	if (!locale)
2317c478bd9Sstevel@tonic-gate 		return;
2327c478bd9Sstevel@tonic-gate 
2337c478bd9Sstevel@tonic-gate 	if (*locale && ((curr = setlocale(category, NULL))) &&
2347c478bd9Sstevel@tonic-gate 	    strcmp(curr, locale) == 0)
2357c478bd9Sstevel@tonic-gate 		return;
2367c478bd9Sstevel@tonic-gate 
23732f863ecSjp161948 	/* if <category> is bogus, setlocale() will do nothing */
23812fcfd5aSjp161948 	(void) setlocale(category, locale);
2397c478bd9Sstevel@tonic-gate }
2407c478bd9Sstevel@tonic-gate 
2417c478bd9Sstevel@tonic-gate char **
g11n_getlocales()2427c478bd9Sstevel@tonic-gate g11n_getlocales()
2437c478bd9Sstevel@tonic-gate {
2447c478bd9Sstevel@tonic-gate 	FILE *locale_out;
24532f863ecSjp161948 	uint_t n_elems, list_size, long_line = 0;
2467c478bd9Sstevel@tonic-gate 	char **list;
2477c478bd9Sstevel@tonic-gate 	char locale[64];	/* 64 bytes is plenty for locale names */
2487c478bd9Sstevel@tonic-gate 
24932f863ecSjp161948 	if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL)
25032f863ecSjp161948 		return (NULL);
2517c478bd9Sstevel@tonic-gate 
2527c478bd9Sstevel@tonic-gate 	/*
25332f863ecSjp161948 	 * start with enough room for 65 locales - that's a lot fewer than
2547c478bd9Sstevel@tonic-gate 	 * all the locales available for installation, but a lot more than
2557c478bd9Sstevel@tonic-gate 	 * what most users will need and install
2567c478bd9Sstevel@tonic-gate 	 */
2577c478bd9Sstevel@tonic-gate 	n_elems = 0;
2587c478bd9Sstevel@tonic-gate 	list_size = 192;
2597c478bd9Sstevel@tonic-gate 	list = (char **) xmalloc(sizeof (char *) * (list_size + 1));
2607c478bd9Sstevel@tonic-gate 	memset(list, 0, sizeof (char *) * (list_size + 1));
2617c478bd9Sstevel@tonic-gate 
2627c478bd9Sstevel@tonic-gate 	while (fgets(locale, sizeof (locale), locale_out)) {
2637c478bd9Sstevel@tonic-gate 		/* skip long locale names (if any) */
2647c478bd9Sstevel@tonic-gate 		if (!strchr(locale, '\n')) {
2657c478bd9Sstevel@tonic-gate 			long_line = 1;
2667c478bd9Sstevel@tonic-gate 			continue;
26732f863ecSjp161948 		} else if (long_line) {
2687c478bd9Sstevel@tonic-gate 			long_line = 0;
2697c478bd9Sstevel@tonic-gate 			continue;
2707c478bd9Sstevel@tonic-gate 		}
27132f863ecSjp161948 
2727c478bd9Sstevel@tonic-gate 		if (strncmp(locale, "iso_8859", 8) == 0)
27332f863ecSjp161948 			/* ignore locale names like "iso_8859-1" */
27432f863ecSjp161948 			continue;
2757c478bd9Sstevel@tonic-gate 
2767c478bd9Sstevel@tonic-gate 		if (n_elems == list_size) {
2777c478bd9Sstevel@tonic-gate 			list_size *= 2;
27832f863ecSjp161948 			list = (char **)xrealloc((void *) list,
27932f863ecSjp161948 			    (list_size + 1) * sizeof (char *));
28032f863ecSjp161948 			memset(&list[n_elems + 1], 0,
28132f863ecSjp161948 			    sizeof (char *) * (list_size - n_elems + 1));
2827c478bd9Sstevel@tonic-gate 		}
2837c478bd9Sstevel@tonic-gate 
2847c478bd9Sstevel@tonic-gate 		*(strchr(locale, '\n')) = '\0';	/* remove the trailing \n */
2857c478bd9Sstevel@tonic-gate 		list[n_elems++] = xstrdup(locale);
2867c478bd9Sstevel@tonic-gate 	}
28732f863ecSjp161948 
288ee5b3c37Sjp161948 	(void) pclose(locale_out);
289ee5b3c37Sjp161948 
2909a8058b5Sjp161948 	if (n_elems == 0) {
2919a8058b5Sjp161948 		xfree(list);
292a6e0e77dSjp161948 		return (NULL);
2939a8058b5Sjp161948 	}
294a6e0e77dSjp161948 
2957c478bd9Sstevel@tonic-gate 	list[n_elems] = NULL;
2967c478bd9Sstevel@tonic-gate 
2977c478bd9Sstevel@tonic-gate 	qsort(list, n_elems - 1, sizeof (char *), locale_cmp);
29832f863ecSjp161948 	return (list);
2997c478bd9Sstevel@tonic-gate }
3007c478bd9Sstevel@tonic-gate 
3017c478bd9Sstevel@tonic-gate char *
g11n_getlangs()3027c478bd9Sstevel@tonic-gate g11n_getlangs()
3037c478bd9Sstevel@tonic-gate {
3047c478bd9Sstevel@tonic-gate 	char *locale;
3057c478bd9Sstevel@tonic-gate 
3067c478bd9Sstevel@tonic-gate 	if (getenv("SSH_LANGS"))
30732f863ecSjp161948 		return (xstrdup(getenv("SSH_LANGS")));
3087c478bd9Sstevel@tonic-gate 
3097c478bd9Sstevel@tonic-gate 	locale = g11n_getlocale();
3107c478bd9Sstevel@tonic-gate 
3117c478bd9Sstevel@tonic-gate 	if (!locale || !*locale)
31232f863ecSjp161948 		return (xstrdup("i-default"));
3137c478bd9Sstevel@tonic-gate 
31432f863ecSjp161948 	return (g11n_locale2langtag(locale));
3157c478bd9Sstevel@tonic-gate }
3167c478bd9Sstevel@tonic-gate 
3177c478bd9Sstevel@tonic-gate char *
g11n_locales2langs(char ** locale_set)3187c478bd9Sstevel@tonic-gate g11n_locales2langs(char **locale_set)
3197c478bd9Sstevel@tonic-gate {
3207c478bd9Sstevel@tonic-gate 	char **p, **r, **q;
3219a8058b5Sjp161948 	char *langtag, *langs;
3227c478bd9Sstevel@tonic-gate 	int locales, skip;
3237c478bd9Sstevel@tonic-gate 
3247c478bd9Sstevel@tonic-gate 	for (locales = 0, p = locale_set; p && *p; p++)
3257c478bd9Sstevel@tonic-gate 		locales++;
3267c478bd9Sstevel@tonic-gate 
3277c478bd9Sstevel@tonic-gate 	r = (char **)xmalloc((locales + 1) * sizeof (char *));
3287c478bd9Sstevel@tonic-gate 	memset(r, 0, (locales + 1) * sizeof (char *));
3297c478bd9Sstevel@tonic-gate 
3307c478bd9Sstevel@tonic-gate 	for (p = locale_set; p && *p && ((p - locale_set) <= locales); p++) {
3317c478bd9Sstevel@tonic-gate 		skip = 0;
3327c478bd9Sstevel@tonic-gate 		if ((langtag = g11n_locale2langtag(*p)) == NULL)
3337c478bd9Sstevel@tonic-gate 			continue;
3347c478bd9Sstevel@tonic-gate 		for (q = r; (q - r) < locales; q++) {
33532f863ecSjp161948 			if (!*q)
33632f863ecSjp161948 				break;
3377c478bd9Sstevel@tonic-gate 			if (*q && strcmp(*q, langtag) == 0)
3387c478bd9Sstevel@tonic-gate 				skip = 1;
3397c478bd9Sstevel@tonic-gate 		}
3407c478bd9Sstevel@tonic-gate 		if (!skip)
3417c478bd9Sstevel@tonic-gate 			*(q++) = langtag;
3429a8058b5Sjp161948 		else
3439a8058b5Sjp161948 			xfree(langtag);
3447c478bd9Sstevel@tonic-gate 		*q = NULL;
3457c478bd9Sstevel@tonic-gate 	}
34632f863ecSjp161948 
3479a8058b5Sjp161948 	langs = xjoin(r, ',');
3489a8058b5Sjp161948 	g11n_freelist(r);
3499a8058b5Sjp161948 
3509a8058b5Sjp161948 	return (langs);
3517c478bd9Sstevel@tonic-gate }
3527c478bd9Sstevel@tonic-gate 
35332f863ecSjp161948 static int
sortcmp(const void * d1,const void * d2)3547c478bd9Sstevel@tonic-gate sortcmp(const void *d1, const void *d2)
3557c478bd9Sstevel@tonic-gate {
3567c478bd9Sstevel@tonic-gate 	char *s1 = *(char **)d1;
3577c478bd9Sstevel@tonic-gate 	char *s2 = *(char **)d2;
3587c478bd9Sstevel@tonic-gate 
35932f863ecSjp161948 	return (strcmp(s1, s2));
3607c478bd9Sstevel@tonic-gate }
3617c478bd9Sstevel@tonic-gate 
3627c478bd9Sstevel@tonic-gate int
g11n_langtag_match(char * langtag1,char * langtag2)3637c478bd9Sstevel@tonic-gate g11n_langtag_match(char *langtag1, char *langtag2)
3647c478bd9Sstevel@tonic-gate {
3657c478bd9Sstevel@tonic-gate 	int len1, len2;
3667c478bd9Sstevel@tonic-gate 	char c1, c2;
3677c478bd9Sstevel@tonic-gate 
3687c478bd9Sstevel@tonic-gate 	len1 = (strchr(langtag1, '-')) ?
3697c478bd9Sstevel@tonic-gate 	    (strchr(langtag1, '-') - langtag1)
3707c478bd9Sstevel@tonic-gate 	    : strlen(langtag1);
3717c478bd9Sstevel@tonic-gate 
3727c478bd9Sstevel@tonic-gate 	len2 = (strchr(langtag2, '-')) ?
3737c478bd9Sstevel@tonic-gate 	    (strchr(langtag2, '-') - langtag2)
3747c478bd9Sstevel@tonic-gate 	    : strlen(langtag2);
3757c478bd9Sstevel@tonic-gate 
3767c478bd9Sstevel@tonic-gate 	/* no match */
37732f863ecSjp161948 	if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0)
37832f863ecSjp161948 		return (0);
3797c478bd9Sstevel@tonic-gate 
3807c478bd9Sstevel@tonic-gate 	c1 = *(langtag1 + len1);
3817c478bd9Sstevel@tonic-gate 	c2 = *(langtag2 + len2);
3827c478bd9Sstevel@tonic-gate 
3837c478bd9Sstevel@tonic-gate 	/* no country sub-tags - exact match */
3847c478bd9Sstevel@tonic-gate 	if (c1 == '\0' && c2 == '\0')
38532f863ecSjp161948 		return (2);
3867c478bd9Sstevel@tonic-gate 
3877c478bd9Sstevel@tonic-gate 	/* one langtag has a country sub-tag, the other doesn't */
3887c478bd9Sstevel@tonic-gate 	if (c1 == '\0' || c2 == '\0')
38932f863ecSjp161948 		return (1);
3907c478bd9Sstevel@tonic-gate 
3917c478bd9Sstevel@tonic-gate 	/* can't happen - both langtags have a country sub-tag */
3927c478bd9Sstevel@tonic-gate 	if (c1 != '-' || c2 != '-')
39332f863ecSjp161948 		return (1);
3947c478bd9Sstevel@tonic-gate 
3957c478bd9Sstevel@tonic-gate 	/* compare country subtags */
3967c478bd9Sstevel@tonic-gate 	langtag1 = langtag1 + len1 + 1;
3977c478bd9Sstevel@tonic-gate 	langtag2 = langtag2 + len2 + 1;
3987c478bd9Sstevel@tonic-gate 
3997c478bd9Sstevel@tonic-gate 	len1 = (strchr(langtag1, '-')) ?
40032f863ecSjp161948 	    (strchr(langtag1, '-') - langtag1) : strlen(langtag1);
4017c478bd9Sstevel@tonic-gate 
4027c478bd9Sstevel@tonic-gate 	len2 = (strchr(langtag2, '-')) ?
40332f863ecSjp161948 	    (strchr(langtag2, '-') - langtag2) : strlen(langtag2);
4047c478bd9Sstevel@tonic-gate 
40532f863ecSjp161948 	if (len1 != len2 || strncmp(langtag1, langtag2, len1) != 0)
40632f863ecSjp161948 		return (1);
4077c478bd9Sstevel@tonic-gate 
4087c478bd9Sstevel@tonic-gate 	/* country tags matched - exact match */
40932f863ecSjp161948 	return (2);
4107c478bd9Sstevel@tonic-gate }
4117c478bd9Sstevel@tonic-gate 
4127c478bd9Sstevel@tonic-gate char *
g11n_langtag_set_intersect(char * set1,char * set2)4137c478bd9Sstevel@tonic-gate g11n_langtag_set_intersect(char *set1, char *set2)
4147c478bd9Sstevel@tonic-gate {
4157c478bd9Sstevel@tonic-gate 	char **list1, **list2, **list3, **p, **q, **r;
4167c478bd9Sstevel@tonic-gate 	char *set3, *lang_subtag;
41732f863ecSjp161948 	uint_t n1, n2, n3;
41832f863ecSjp161948 	uint_t do_append;
4197c478bd9Sstevel@tonic-gate 
4207c478bd9Sstevel@tonic-gate 	list1 = xsplit(set1, ',');
4217c478bd9Sstevel@tonic-gate 	list2 = xsplit(set2, ',');
42232f863ecSjp161948 
42332f863ecSjp161948 	for (n1 = 0, p = list1; p && *p; p++, n1++)
42432f863ecSjp161948 		;
42532f863ecSjp161948 	for (n2 = 0, p = list2; p && *p; p++, n2++)
42632f863ecSjp161948 		;
4277c478bd9Sstevel@tonic-gate 
4287c478bd9Sstevel@tonic-gate 	list3 = (char **) xmalloc(sizeof (char *) * (n1 + n2 + 1));
4297c478bd9Sstevel@tonic-gate 	*list3 = NULL;
4307c478bd9Sstevel@tonic-gate 
43132f863ecSjp161948 	/*
43232f863ecSjp161948 	 * we must not sort the user langtags - sorting or not the server's
4337c478bd9Sstevel@tonic-gate 	 * should not affect the outcome
4347c478bd9Sstevel@tonic-gate 	 */
4357c478bd9Sstevel@tonic-gate 	qsort(list2, n2, sizeof (char *), sortcmp);
4367c478bd9Sstevel@tonic-gate 
4377c478bd9Sstevel@tonic-gate 	for (n3 = 0, p = list1; p && *p; p++) {
4387c478bd9Sstevel@tonic-gate 		do_append = 0;
4397c478bd9Sstevel@tonic-gate 		for (q = list2; q && *q; q++) {
4407c478bd9Sstevel@tonic-gate 			if (g11n_langtag_match(*p, *q) != 2) continue;
4417c478bd9Sstevel@tonic-gate 			/* append element */
4427c478bd9Sstevel@tonic-gate 			for (r = list3; (r - list3) <= (n1 + n2); r++) {
4437c478bd9Sstevel@tonic-gate 				do_append = 1;
44432f863ecSjp161948 				if (!*r)
44532f863ecSjp161948 					break;
4467c478bd9Sstevel@tonic-gate 				if (strcmp(*p, *r) == 0) {
4477c478bd9Sstevel@tonic-gate 					do_append = 0;
4487c478bd9Sstevel@tonic-gate 					break;
4497c478bd9Sstevel@tonic-gate 				}
4507c478bd9Sstevel@tonic-gate 			}
4517c478bd9Sstevel@tonic-gate 			if (do_append && n3 <= (n1 + n2)) {
4527c478bd9Sstevel@tonic-gate 				list3[n3++] = xstrdup(*p);
4537c478bd9Sstevel@tonic-gate 				list3[n3] = NULL;
4547c478bd9Sstevel@tonic-gate 			}
4557c478bd9Sstevel@tonic-gate 		}
4567c478bd9Sstevel@tonic-gate 	}
4577c478bd9Sstevel@tonic-gate 
4587c478bd9Sstevel@tonic-gate 	for (p = list1; p && *p; p++) {
4597c478bd9Sstevel@tonic-gate 		do_append = 0;
4607c478bd9Sstevel@tonic-gate 		for (q = list2; q && *q; q++) {
46132f863ecSjp161948 			if (g11n_langtag_match(*p, *q) != 1)
46232f863ecSjp161948 				continue;
46332f863ecSjp161948 
4647c478bd9Sstevel@tonic-gate 			/* append element */
4657c478bd9Sstevel@tonic-gate 			lang_subtag = xstrdup(*p);
4667c478bd9Sstevel@tonic-gate 			if (strchr(lang_subtag, '-'))
4677c478bd9Sstevel@tonic-gate 				*(strchr(lang_subtag, '-')) = '\0';
4687c478bd9Sstevel@tonic-gate 			for (r = list3; (r - list3) <= (n1 + n2); r++) {
4697c478bd9Sstevel@tonic-gate 				do_append = 1;
47032f863ecSjp161948 				if (!*r)
47132f863ecSjp161948 					break;
4727c478bd9Sstevel@tonic-gate 				if (strcmp(lang_subtag, *r) == 0) {
4737c478bd9Sstevel@tonic-gate 					do_append = 0;
4747c478bd9Sstevel@tonic-gate 					break;
4757c478bd9Sstevel@tonic-gate 				}
4767c478bd9Sstevel@tonic-gate 			}
4777c478bd9Sstevel@tonic-gate 			if (do_append && n3 <= (n1 + n2)) {
4787c478bd9Sstevel@tonic-gate 				list3[n3++] = lang_subtag;
4797c478bd9Sstevel@tonic-gate 				list3[n3] = NULL;
48032f863ecSjp161948 			} else
4817c478bd9Sstevel@tonic-gate 				xfree(lang_subtag);
4827c478bd9Sstevel@tonic-gate 		}
4837c478bd9Sstevel@tonic-gate 	}
4847c478bd9Sstevel@tonic-gate 
4857c478bd9Sstevel@tonic-gate 	set3 = xjoin(list3, ',');
4867c478bd9Sstevel@tonic-gate 	xfree_split_list(list1);
4877c478bd9Sstevel@tonic-gate 	xfree_split_list(list2);
4887c478bd9Sstevel@tonic-gate 	xfree_split_list(list3);
4897c478bd9Sstevel@tonic-gate 
49032f863ecSjp161948 	return (set3);
4917c478bd9Sstevel@tonic-gate }
4927c478bd9Sstevel@tonic-gate 
4937c478bd9Sstevel@tonic-gate char *
g11n_clnt_langtag_negotiate(char * clnt_langtags,char * srvr_langtags)4947c478bd9Sstevel@tonic-gate g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags)
4957c478bd9Sstevel@tonic-gate {
4967c478bd9Sstevel@tonic-gate 	char *list, *result;
4977c478bd9Sstevel@tonic-gate 	char **xlist;
4987c478bd9Sstevel@tonic-gate 
4997c478bd9Sstevel@tonic-gate 	/* g11n_langtag_set_intersect uses xmalloc - should not return NULL */
5007c478bd9Sstevel@tonic-gate 	list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags);
5017c478bd9Sstevel@tonic-gate 
5027c478bd9Sstevel@tonic-gate 	if (!list)
50332f863ecSjp161948 		return (NULL);
5047c478bd9Sstevel@tonic-gate 
5057c478bd9Sstevel@tonic-gate 	xlist = xsplit(list, ',');
5067c478bd9Sstevel@tonic-gate 
5077c478bd9Sstevel@tonic-gate 	xfree(list);
5087c478bd9Sstevel@tonic-gate 
5097c478bd9Sstevel@tonic-gate 	if (!xlist || !*xlist)
51032f863ecSjp161948 		return (NULL);
5117c478bd9Sstevel@tonic-gate 
5127c478bd9Sstevel@tonic-gate 	result = xstrdup(*xlist);
5137c478bd9Sstevel@tonic-gate 	xfree_split_list(xlist);
5147c478bd9Sstevel@tonic-gate 
51532f863ecSjp161948 	return (result);
5167c478bd9Sstevel@tonic-gate }
5177c478bd9Sstevel@tonic-gate 
5187c478bd9Sstevel@tonic-gate /*
5197c478bd9Sstevel@tonic-gate  * Compare locales, preferring UTF-8 codesets to others, otherwise doing
5207c478bd9Sstevel@tonic-gate  * a stright strcmp()
5217c478bd9Sstevel@tonic-gate  */
52232f863ecSjp161948 static int
locale_cmp(const void * d1,const void * d2)5237c478bd9Sstevel@tonic-gate locale_cmp(const void *d1, const void *d2)
5247c478bd9Sstevel@tonic-gate {
5257c478bd9Sstevel@tonic-gate 	char *dot_ptr;
5267c478bd9Sstevel@tonic-gate 	char *s1 = *(char **)d1;
5277c478bd9Sstevel@tonic-gate 	char *s2 = *(char **)d2;
5287c478bd9Sstevel@tonic-gate 	int s1_is_utf8 = 0;
5297c478bd9Sstevel@tonic-gate 	int s2_is_utf8 = 0;
5307c478bd9Sstevel@tonic-gate 
5317c478bd9Sstevel@tonic-gate 	/* check if s1 is a UTF-8 locale */
53232f863ecSjp161948 	if (((dot_ptr = strchr((char *)s1, '.')) != NULL) &&
53332f863ecSjp161948 	    (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) &&
5347c478bd9Sstevel@tonic-gate 	    (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) {
5357c478bd9Sstevel@tonic-gate 		s1_is_utf8++;
5367c478bd9Sstevel@tonic-gate 	}
53732f863ecSjp161948 
5387c478bd9Sstevel@tonic-gate 	/* check if s2 is a UTF-8 locale */
53932f863ecSjp161948 	if (((dot_ptr = strchr((char *)s2, '.')) != NULL) &&
54032f863ecSjp161948 	    (*dot_ptr != '\0') && (strncmp(dot_ptr + 1, "UTF-8", 5) == 0) &&
5417c478bd9Sstevel@tonic-gate 	    (*(dot_ptr + 6) == '\0' || *(dot_ptr + 6) == '@')) {
5427c478bd9Sstevel@tonic-gate 		s2_is_utf8++;
5437c478bd9Sstevel@tonic-gate 	}
5447c478bd9Sstevel@tonic-gate 
5457c478bd9Sstevel@tonic-gate 	/* prefer UTF-8 locales */
5467c478bd9Sstevel@tonic-gate 	if (s1_is_utf8 && !s2_is_utf8)
54732f863ecSjp161948 		return (-1);
5487c478bd9Sstevel@tonic-gate 
5497c478bd9Sstevel@tonic-gate 	if (s2_is_utf8 && !s1_is_utf8)
55032f863ecSjp161948 		return (1);
5517c478bd9Sstevel@tonic-gate 
5527c478bd9Sstevel@tonic-gate 	/* prefer any locale over the default locales */
55332f863ecSjp161948 	if (strcmp(s1, "C") == 0 || strcmp(s1, "POSIX") == 0 ||
55432f863ecSjp161948 	    strcmp(s1, "common") == 0) {
55532f863ecSjp161948 		if (strcmp(s2, "C") != 0 && strcmp(s2, "POSIX") != 0 &&
5567c478bd9Sstevel@tonic-gate 		    strcmp(s2, "common") != 0)
55732f863ecSjp161948 			return (1);
55832f863ecSjp161948 	}
5597c478bd9Sstevel@tonic-gate 
56032f863ecSjp161948 	if (strcmp(s2, "C") == 0 || strcmp(s2, "POSIX") == 0 ||
56132f863ecSjp161948 	    strcmp(s2, "common") == 0) {
5627c478bd9Sstevel@tonic-gate 		if (strcmp(s1, "C") != 0 &&
5637c478bd9Sstevel@tonic-gate 		    strcmp(s1, "POSIX") != 0 &&
5647c478bd9Sstevel@tonic-gate 		    strcmp(s1, "common") != 0)
56532f863ecSjp161948 			return (-1);
56632f863ecSjp161948 	}
5677c478bd9Sstevel@tonic-gate 
56832f863ecSjp161948 	return (strcmp(s1, s2));
5697c478bd9Sstevel@tonic-gate }
5707c478bd9Sstevel@tonic-gate 
5717c478bd9Sstevel@tonic-gate 
5727c478bd9Sstevel@tonic-gate char **
g11n_langtag_set_locale_set_intersect(char * langtag_set,char ** locale_set)57332f863ecSjp161948 g11n_langtag_set_locale_set_intersect(char *langtag_set, char **locale_set)
5747c478bd9Sstevel@tonic-gate {
5757c478bd9Sstevel@tonic-gate 	char **langtag_list, **result, **p, **q, **r;
5767c478bd9Sstevel@tonic-gate 	char *s;
57732f863ecSjp161948 	uint_t do_append, n_langtags, n_locales, n_results, max_results;
5787c478bd9Sstevel@tonic-gate 
579*b9aa66a7SJan Pechanec 	if (locale_set == NULL)
580*b9aa66a7SJan Pechanec 		return (NULL);
581*b9aa66a7SJan Pechanec 
58232f863ecSjp161948 	/* count lang tags and locales */
58332f863ecSjp161948 	for (n_locales = 0, p = locale_set; p && *p; p++)
58432f863ecSjp161948 		n_locales++;
58532f863ecSjp161948 
5867c478bd9Sstevel@tonic-gate 	n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0;
58732f863ecSjp161948 	/* count the number of langtags */
58832f863ecSjp161948 	for (; s = strchr(s, ','); s++, n_langtags++)
58932f863ecSjp161948 		;
5907c478bd9Sstevel@tonic-gate 
5917c478bd9Sstevel@tonic-gate 	qsort(locale_set, n_locales, sizeof (char *), locale_cmp);
5927c478bd9Sstevel@tonic-gate 
5937c478bd9Sstevel@tonic-gate 	langtag_list = xsplit(langtag_set, ',');
59432f863ecSjp161948 	for (n_langtags = 0, p = langtag_list; p && *p; p++, n_langtags++)
59532f863ecSjp161948 		;
5967c478bd9Sstevel@tonic-gate 
5977c478bd9Sstevel@tonic-gate 	max_results = MIN(n_locales, n_langtags) * 2;
5987c478bd9Sstevel@tonic-gate 	result = (char **) xmalloc(sizeof (char *) * (max_results + 1));
5997c478bd9Sstevel@tonic-gate 	*result = NULL;
6007c478bd9Sstevel@tonic-gate 	n_results = 0;
6017c478bd9Sstevel@tonic-gate 
60232f863ecSjp161948 	/* more specific matches first */
6037c478bd9Sstevel@tonic-gate 	for (p = langtag_list; p && *p; p++) {
6047c478bd9Sstevel@tonic-gate 		do_append = 0;
6057c478bd9Sstevel@tonic-gate 		for (q = locale_set; q && *q; q++) {
6067c478bd9Sstevel@tonic-gate 			if (g11n_langtag_matches_locale(*p, *q) == 2) {
6077c478bd9Sstevel@tonic-gate 				do_append = 1;
60832f863ecSjp161948 				for (r = result; (r - result) <=
60932f863ecSjp161948 				    MIN(n_locales, n_langtags); r++) {
61032f863ecSjp161948 					if (!*r)
61132f863ecSjp161948 						break;
6127c478bd9Sstevel@tonic-gate 					if (strcmp(*q, *r) == 0) {
6137c478bd9Sstevel@tonic-gate 						do_append = 0;
6147c478bd9Sstevel@tonic-gate 						break;
6157c478bd9Sstevel@tonic-gate 					}
6167c478bd9Sstevel@tonic-gate 				}
6177c478bd9Sstevel@tonic-gate 				if (do_append && n_results < max_results) {
6187c478bd9Sstevel@tonic-gate 					result[n_results++] = xstrdup(*q);
6197c478bd9Sstevel@tonic-gate 					result[n_results] = NULL;
6207c478bd9Sstevel@tonic-gate 				}
6217c478bd9Sstevel@tonic-gate 				break;
6227c478bd9Sstevel@tonic-gate 			}
6237c478bd9Sstevel@tonic-gate 		}
6247c478bd9Sstevel@tonic-gate 	}
6257c478bd9Sstevel@tonic-gate 
6267c478bd9Sstevel@tonic-gate 	for (p = langtag_list; p && *p; p++) {
6277c478bd9Sstevel@tonic-gate 		do_append = 0;
6287c478bd9Sstevel@tonic-gate 		for (q = locale_set; q && *q; q++) {
6297c478bd9Sstevel@tonic-gate 			if (g11n_langtag_matches_locale(*p, *q) == 1) {
6307c478bd9Sstevel@tonic-gate 				do_append = 1;
63132f863ecSjp161948 				for (r = result; (r - result) <=
63232f863ecSjp161948 				    MIN(n_locales, n_langtags); r++) {
63332f863ecSjp161948 					if (!*r)
63432f863ecSjp161948 						break;
6357c478bd9Sstevel@tonic-gate 					if (strcmp(*q, *r) == 0) {
6367c478bd9Sstevel@tonic-gate 						do_append = 0;
6377c478bd9Sstevel@tonic-gate 						break;
6387c478bd9Sstevel@tonic-gate 					}
6397c478bd9Sstevel@tonic-gate 				}
6407c478bd9Sstevel@tonic-gate 				if (do_append && n_results < max_results) {
6417c478bd9Sstevel@tonic-gate 					result[n_results++] = xstrdup(*q);
6427c478bd9Sstevel@tonic-gate 					result[n_results] = NULL;
6437c478bd9Sstevel@tonic-gate 				}
6447c478bd9Sstevel@tonic-gate 				break;
6457c478bd9Sstevel@tonic-gate 			}
6467c478bd9Sstevel@tonic-gate 		}
6477c478bd9Sstevel@tonic-gate 	}
64832f863ecSjp161948 
6497c478bd9Sstevel@tonic-gate 	xfree_split_list(langtag_list);
6507c478bd9Sstevel@tonic-gate 
65132f863ecSjp161948 	return (result);
6527c478bd9Sstevel@tonic-gate }
6537c478bd9Sstevel@tonic-gate 
6547c478bd9Sstevel@tonic-gate char *
g11n_srvr_locale_negotiate(char * clnt_langtags,char ** srvr_locales)6557c478bd9Sstevel@tonic-gate g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales)
6567c478bd9Sstevel@tonic-gate {
6579a8058b5Sjp161948 	char **results, **locales, *result = NULL;
6589a8058b5Sjp161948 
6599a8058b5Sjp161948 	if (srvr_locales == NULL)
6609a8058b5Sjp161948 		locales = g11n_getlocales();
6619a8058b5Sjp161948 	else
6629a8058b5Sjp161948 		locales = srvr_locales;
6637c478bd9Sstevel@tonic-gate 
6647c478bd9Sstevel@tonic-gate 	if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags,
6659a8058b5Sjp161948 	    locales)) == NULL)
6669a8058b5Sjp161948 		goto err;
6677c478bd9Sstevel@tonic-gate 
6687c478bd9Sstevel@tonic-gate 	if (*results != NULL)
6697c478bd9Sstevel@tonic-gate 		result = xstrdup(*results);
6707c478bd9Sstevel@tonic-gate 
6717c478bd9Sstevel@tonic-gate 	xfree_split_list(results);
6727c478bd9Sstevel@tonic-gate 
6739a8058b5Sjp161948 err:
674*b9aa66a7SJan Pechanec 	if (locales != NULL && locales != srvr_locales)
6759a8058b5Sjp161948 		g11n_freelist(locales);
67632f863ecSjp161948 	return (result);
6777c478bd9Sstevel@tonic-gate }
6787c478bd9Sstevel@tonic-gate 
6797c478bd9Sstevel@tonic-gate /*
6806f786aceSNobutomo Nakano  * Functions for converting to UTF-8 from the local codeset and
6816f786aceSNobutomo Nakano  * converting from UTF-8 to the local codeset.
6827c478bd9Sstevel@tonic-gate  *
6836f786aceSNobutomo Nakano  * The error_str parameter is an pointer to a char variable where to
6846f786aceSNobutomo Nakano  * store a string suitable for use with error() or fatal() or friends.
6856f786aceSNobutomo Nakano  * It is also used for an error indicator when NULL is returned.
6867c478bd9Sstevel@tonic-gate  *
6876f786aceSNobutomo Nakano  * If conversion isn't necessary, *error_str is set to NULL, and
6886f786aceSNobutomo Nakano  * NULL is returned.
6896f786aceSNobutomo Nakano  * If conversion error occured, *error_str points to an error message,
6906f786aceSNobutomo Nakano  * and NULL is returned.
6917c478bd9Sstevel@tonic-gate  */
6926f786aceSNobutomo Nakano char *
g11n_convert_from_utf8(const char * str,uint_t * lenp,char ** error_str)6936f786aceSNobutomo Nakano g11n_convert_from_utf8(const char *str, uint_t *lenp, char **error_str)
6947c478bd9Sstevel@tonic-gate {
6956f786aceSNobutomo Nakano 	static char *last_codeset;
6966f786aceSNobutomo Nakano 	static iconv_t cd = (iconv_t)-1;
6976f786aceSNobutomo Nakano 	char	*codeset;
6987c478bd9Sstevel@tonic-gate 
6996f786aceSNobutomo Nakano 	*error_str = NULL;
7007c478bd9Sstevel@tonic-gate 
7016f786aceSNobutomo Nakano 	codeset = nl_langinfo(CODESET);
70232f863ecSjp161948 
7036f786aceSNobutomo Nakano 	if (strcmp(codeset, "UTF-8") == 0)
70432f863ecSjp161948 		return (NULL);
7057c478bd9Sstevel@tonic-gate 
7066f786aceSNobutomo Nakano 	if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) {
7076f786aceSNobutomo Nakano 		if (last_codeset != NULL) {
7086f786aceSNobutomo Nakano 			xfree(last_codeset);
7096f786aceSNobutomo Nakano 			last_codeset = NULL;
71032f863ecSjp161948 		}
7116f786aceSNobutomo Nakano 		if (cd != (iconv_t)-1)
7126f786aceSNobutomo Nakano 			(void) iconv_close(cd);
71332f863ecSjp161948 
7146f786aceSNobutomo Nakano 		if ((cd = iconv_open(codeset, "UTF-8")) == (iconv_t)-1) {
7156f786aceSNobutomo Nakano 			*error_str = gettext("Cannot convert UTF-8 "
7166f786aceSNobutomo Nakano 			    "strings to the local codeset");
71732f863ecSjp161948 			return (NULL);
7187c478bd9Sstevel@tonic-gate 		}
7196f786aceSNobutomo Nakano 		last_codeset = xstrdup(codeset);
7206f786aceSNobutomo Nakano 	}
7216f786aceSNobutomo Nakano 	return (do_iconv(cd, str, lenp, error_str));
7227c478bd9Sstevel@tonic-gate }
7237c478bd9Sstevel@tonic-gate 
7247c478bd9Sstevel@tonic-gate char *
g11n_convert_to_utf8(const char * str,uint_t * lenp,int native,char ** error_str)7256f786aceSNobutomo Nakano g11n_convert_to_utf8(const char *str, uint_t *lenp,
7266f786aceSNobutomo Nakano     int native, char **error_str)
7277c478bd9Sstevel@tonic-gate {
7286f786aceSNobutomo Nakano 	static char *last_codeset;
7296f786aceSNobutomo Nakano 	static iconv_t cd = (iconv_t)-1;
7306f786aceSNobutomo Nakano 	char	*codeset;
7317c478bd9Sstevel@tonic-gate 
7326f786aceSNobutomo Nakano 	*error_str = NULL;
7337c478bd9Sstevel@tonic-gate 
7346f786aceSNobutomo Nakano 	if (native)
7356f786aceSNobutomo Nakano 		codeset = native_codeset;
7366f786aceSNobutomo Nakano 	else
7376f786aceSNobutomo Nakano 		codeset = nl_langinfo(CODESET);
73832f863ecSjp161948 
7396f786aceSNobutomo Nakano 	if (strcmp(codeset, "UTF-8") == 0)
7406f786aceSNobutomo Nakano 		return (NULL);
7416f786aceSNobutomo Nakano 
7426f786aceSNobutomo Nakano 	if (last_codeset == NULL || strcmp(codeset, last_codeset) != 0) {
7436f786aceSNobutomo Nakano 		if (last_codeset != NULL) {
7446f786aceSNobutomo Nakano 			xfree(last_codeset);
7456f786aceSNobutomo Nakano 			last_codeset = NULL;
7467c478bd9Sstevel@tonic-gate 		}
7476f786aceSNobutomo Nakano 		if (cd != (iconv_t)-1)
7486f786aceSNobutomo Nakano 			(void) iconv_close(cd);
7497c478bd9Sstevel@tonic-gate 
7506f786aceSNobutomo Nakano 		if ((cd = iconv_open("UTF-8", codeset)) == (iconv_t)-1) {
7516f786aceSNobutomo Nakano 			*error_str = gettext("Cannot convert the "
7526f786aceSNobutomo Nakano 			    "local codeset strings to UTF-8");
7536f786aceSNobutomo Nakano 			return (NULL);
7547c478bd9Sstevel@tonic-gate 		}
7556f786aceSNobutomo Nakano 		last_codeset = xstrdup(codeset);
7567c478bd9Sstevel@tonic-gate 	}
7576f786aceSNobutomo Nakano 	return (do_iconv(cd, str, lenp, error_str));
7587c478bd9Sstevel@tonic-gate }
7597c478bd9Sstevel@tonic-gate 
7607c478bd9Sstevel@tonic-gate /*
7617c478bd9Sstevel@tonic-gate  * Wrapper around iconv()
7627c478bd9Sstevel@tonic-gate  *
7636f786aceSNobutomo Nakano  * The caller is responsible for freeing the result. NULL is returned when
7647c478bd9Sstevel@tonic-gate  * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF).
7656f786aceSNobutomo Nakano  * The caller must ensure that the input string isn't NULL pointer.
7667c478bd9Sstevel@tonic-gate  */
7676f786aceSNobutomo Nakano static char *
do_iconv(iconv_t cd,const char * str,uint_t * lenp,char ** err_str)7686f786aceSNobutomo Nakano do_iconv(iconv_t cd, const char *str, uint_t *lenp, char **err_str)
7697c478bd9Sstevel@tonic-gate {
7706f786aceSNobutomo Nakano 	int	ilen, olen;
7716f786aceSNobutomo Nakano 	size_t	ileft, oleft;
7726f786aceSNobutomo Nakano 	char	*ostr, *optr;
7736f786aceSNobutomo Nakano 	const char *istr;
7747c478bd9Sstevel@tonic-gate 
7756f786aceSNobutomo Nakano 	ilen = *lenp;
7766f786aceSNobutomo Nakano 	olen = ilen + 1;
7776f786aceSNobutomo Nakano 
7786f786aceSNobutomo Nakano 	ostr = NULL;
7796f786aceSNobutomo Nakano 	for (;;) {
7806f786aceSNobutomo Nakano 		olen *= 2;
7816f786aceSNobutomo Nakano 		oleft = olen;
7826f786aceSNobutomo Nakano 		ostr = optr = xrealloc(ostr, olen);
7836f786aceSNobutomo Nakano 		istr = (const char *)str;
7846f786aceSNobutomo Nakano 		if ((ileft = ilen) == 0)
7856f786aceSNobutomo Nakano 			break;
7866f786aceSNobutomo Nakano 
7876f786aceSNobutomo Nakano 		if (iconv(cd, &istr, &ileft, &optr, &oleft) != (size_t)-1) {
7886f786aceSNobutomo Nakano 			/* success: generate reset sequence */
7896f786aceSNobutomo Nakano 			if (iconv(cd, NULL, NULL,
7906f786aceSNobutomo Nakano 			    &optr, &oleft) == (size_t)-1 && errno == E2BIG) {
7916f786aceSNobutomo Nakano 				continue;
7926f786aceSNobutomo Nakano 			}
7936f786aceSNobutomo Nakano 			break;
7946f786aceSNobutomo Nakano 		}
7956f786aceSNobutomo Nakano 		/* failed */
7966f786aceSNobutomo Nakano 		if (errno != E2BIG) {
7976f786aceSNobutomo Nakano 			oleft = olen;
7986f786aceSNobutomo Nakano 			(void) iconv(cd, NULL, NULL, &ostr, &oleft);
7996f786aceSNobutomo Nakano 			xfree(ostr);
8006f786aceSNobutomo Nakano 			*err_str = gettext("Codeset conversion failed");
80132f863ecSjp161948 			return (NULL);
8026f786aceSNobutomo Nakano 		}
8036f786aceSNobutomo Nakano 	}
8046f786aceSNobutomo Nakano 	olen = optr - ostr;
8056f786aceSNobutomo Nakano 	optr = xmalloc(olen + 1);
8066f786aceSNobutomo Nakano 	(void) memcpy(optr, ostr, olen);
8076f786aceSNobutomo Nakano 	xfree(ostr);
80832f863ecSjp161948 
8096f786aceSNobutomo Nakano 	optr[olen] = '\0';
8106f786aceSNobutomo Nakano 	*lenp = olen;
81132f863ecSjp161948 
8126f786aceSNobutomo Nakano 	return (optr);
8137c478bd9Sstevel@tonic-gate }
8147c478bd9Sstevel@tonic-gate 
8157c478bd9Sstevel@tonic-gate /*
8166f786aceSNobutomo Nakano  * A filter for output string. Control and unprintable characters
8176f786aceSNobutomo Nakano  * are converted into visible form (eg "\ooo").
8187c478bd9Sstevel@tonic-gate  */
8196f786aceSNobutomo Nakano char *
g11n_filter_string(char * s)8206f786aceSNobutomo Nakano g11n_filter_string(char *s)
8216f786aceSNobutomo Nakano {
8226f786aceSNobutomo Nakano 	int	mb_cur_max = MB_CUR_MAX;
8236f786aceSNobutomo Nakano 	int	mblen, len;
8246f786aceSNobutomo Nakano 	char	*os = s;
8256f786aceSNobutomo Nakano 	wchar_t	wc;
8266f786aceSNobutomo Nakano 	char	*obuf, *op;
8276f786aceSNobutomo Nakano 
8286f786aceSNobutomo Nakano 	/* all character may be converted into the form of \ooo */
8296f786aceSNobutomo Nakano 	obuf = op = xmalloc(strlen(s) * 4 + 1);
8306f786aceSNobutomo Nakano 
8316f786aceSNobutomo Nakano 	while (*s != '\0') {
8326f786aceSNobutomo Nakano 		mblen = mbtowc(&wc, s, mb_cur_max);
8336f786aceSNobutomo Nakano 		if (mblen <= 0) {
8346f786aceSNobutomo Nakano 			mblen = 1;
8356f786aceSNobutomo Nakano 			wc = (unsigned char)*s;
8366f786aceSNobutomo Nakano 		}
8376f786aceSNobutomo Nakano 		if (!iswprint(wc) &&
8386f786aceSNobutomo Nakano 		    wc != L'\n' && wc != L'\r' && wc != L'\t') {
8396f786aceSNobutomo Nakano 			/*
8406f786aceSNobutomo Nakano 			 * control chars which need to be replaced
8416f786aceSNobutomo Nakano 			 * with safe character sequence.
8426f786aceSNobutomo Nakano 			 */
8436f786aceSNobutomo Nakano 			while (mblen != 0) {
8446f786aceSNobutomo Nakano 				op += sprintf(op, "\\%03o",
8456f786aceSNobutomo Nakano 				    (unsigned char)*s++);
8466f786aceSNobutomo Nakano 				mblen--;
8476f786aceSNobutomo Nakano 			}
8486f786aceSNobutomo Nakano 		} else {
8496f786aceSNobutomo Nakano 			while (mblen != 0) {
8506f786aceSNobutomo Nakano 				*op++ = *s++;
8516f786aceSNobutomo Nakano 				mblen--;
8526f786aceSNobutomo Nakano 			}
8536f786aceSNobutomo Nakano 		}
8546f786aceSNobutomo Nakano 	}
8556f786aceSNobutomo Nakano 	*op = '\0';
8566f786aceSNobutomo Nakano 	len = op - obuf + 1;
8576f786aceSNobutomo Nakano 	op = xrealloc(os, len);
8586f786aceSNobutomo Nakano 	(void) memcpy(op, obuf, len);
8596f786aceSNobutomo Nakano 	xfree(obuf);
8606f786aceSNobutomo Nakano 	return (op);
8616f786aceSNobutomo Nakano }
8626f786aceSNobutomo Nakano 
8636f786aceSNobutomo Nakano /*
8646f786aceSNobutomo Nakano  * Once we negotiated with a langtag, server need to map it to a system
8656f786aceSNobutomo Nakano  * locale. That is done based on the locale supported on the server side.
8666f786aceSNobutomo Nakano  * We know (with the locale supported on Solaris) how the langtag is
8676f786aceSNobutomo Nakano  * mapped to. However, from the client point of view, there is no way to
8686f786aceSNobutomo Nakano  * know exactly what locale(encoding) will be used.
8696f786aceSNobutomo Nakano  *
8706f786aceSNobutomo Nakano  * With the bug fix of SSH_BUG_STRING_ENCODING, it is guaranteed that the
8716f786aceSNobutomo Nakano  * UTF-8 characters always come over the wire, so it is no longer the problem
8726f786aceSNobutomo Nakano  * as long as both side has the bug fix. However if the server side doesn't
8736f786aceSNobutomo Nakano  * have the fix, client can't safely perform the code conversion since the
8746f786aceSNobutomo Nakano  * incoming character encoding is unknown.
8756f786aceSNobutomo Nakano  *
8766f786aceSNobutomo Nakano  * To alleviate this situation, we take an empirical approach to find
8776f786aceSNobutomo Nakano  * encoding from langtag.
8786f786aceSNobutomo Nakano  *
8796f786aceSNobutomo Nakano  * If langtag has a subtag, we can directly map the langtag to UTF-8 locale
8806f786aceSNobutomo Nakano  * (eg en-US can be mapped to en_US.UTF-8) with a few exceptions.
8816f786aceSNobutomo Nakano  * Certain xx_YY locales don't support UTF-8 encoding (probably due to lack
8826f786aceSNobutomo Nakano  * of L10N support ..). Those are:
8836f786aceSNobutomo Nakano  *
8846f786aceSNobutomo Nakano  * 	no_NO, no_NY, sr_SP, sr_YU
8856f786aceSNobutomo Nakano  *
8866f786aceSNobutomo Nakano  * They all use ISO8859-X encoding.
8876f786aceSNobutomo Nakano  *
8886f786aceSNobutomo Nakano  * For those "xx" langtags, some of them can be mapped to "xx.UTF-8",
8896f786aceSNobutomo Nakano  * but others cannot. So we need to use the "xx" as the locale name.
8906f786aceSNobutomo Nakano  * Those locales are:
8916f786aceSNobutomo Nakano  *
8926f786aceSNobutomo Nakano  * ar, ca, cs, da, et, fi, he, hu, ja, lt, lv, nl, no, pt, sh, th, tr
8936f786aceSNobutomo Nakano  *
8946f786aceSNobutomo Nakano  * Their encoding vary. They could be ISO8859-X or EUC or something else.
8956f786aceSNobutomo Nakano  * So we don't perform code conversion for these langtags.
8966f786aceSNobutomo Nakano  */
8976f786aceSNobutomo Nakano static const char *non_utf8_langtag[] = {
8986f786aceSNobutomo Nakano 	"no-NO", "no-NY", "sr-SP", "sr-YU",
8996f786aceSNobutomo Nakano 	"ar", "ca", "cs", "da", "et", "fi", "he", "hu", "ja",
9006f786aceSNobutomo Nakano 	"lt", "lv", "nl", "no", "pt", "sh", "th", "tr", NULL};
9016f786aceSNobutomo Nakano 
9026f786aceSNobutomo Nakano void
g11n_test_langtag(const char * lang,int server)9036f786aceSNobutomo Nakano g11n_test_langtag(const char *lang, int server)
9046f786aceSNobutomo Nakano {
9056f786aceSNobutomo Nakano 	const char	**lp;
9066f786aceSNobutomo Nakano 
9076f786aceSNobutomo Nakano 	if (datafellows & SSH_BUG_LOCALES_NOT_LANGTAGS) {
9086f786aceSNobutomo Nakano 		/*
9096f786aceSNobutomo Nakano 		 * We negotiated with real locale name (not lang tag).
9106f786aceSNobutomo Nakano 		 * We shouldn't expect UTF-8, thus shouldn't do code
9116f786aceSNobutomo Nakano 		 * conversion.
9126f786aceSNobutomo Nakano 		 */
9136f786aceSNobutomo Nakano 		datafellows |= SSH_BUG_STRING_ENCODING;
9146f786aceSNobutomo Nakano 		return;
9156f786aceSNobutomo Nakano 	}
9166f786aceSNobutomo Nakano 
9176f786aceSNobutomo Nakano 	if (datafellows & SSH_BUG_STRING_ENCODING) {
9186f786aceSNobutomo Nakano 		if (server) {
9196f786aceSNobutomo Nakano 			/*
9206f786aceSNobutomo Nakano 			 * Whatever bug exists in the client side, server
9216f786aceSNobutomo Nakano 			 * side has nothing to do, since server has no way
9226f786aceSNobutomo Nakano 			 * to know what actual encoding is used on the client
9236f786aceSNobutomo Nakano 			 * side. For example, even if we negotiated with
9246f786aceSNobutomo Nakano 			 * en_US, client locale could be en_US.ISO8859-X or
9256f786aceSNobutomo Nakano 			 * en_US.UTF-8.
9266f786aceSNobutomo Nakano 			 */
9276f786aceSNobutomo Nakano 			return;
9286f786aceSNobutomo Nakano 		}
9296f786aceSNobutomo Nakano 		/*
9306f786aceSNobutomo Nakano 		 * We are on the client side. We'll check with known
9316f786aceSNobutomo Nakano 		 * locales to see if non-UTF8 characters could come in.
9326f786aceSNobutomo Nakano 		 */
9336f786aceSNobutomo Nakano 		for (lp = non_utf8_langtag; *lp != NULL; lp++) {
9346f786aceSNobutomo Nakano 			if (strcmp(lang, *lp) == 0)
9356f786aceSNobutomo Nakano 				break;
9366f786aceSNobutomo Nakano 		}
9376f786aceSNobutomo Nakano 		if (*lp == NULL) {
9386f786aceSNobutomo Nakano 			debug2("Server is expected to use UTF-8 locale");
9396f786aceSNobutomo Nakano 			datafellows &= ~SSH_BUG_STRING_ENCODING;
94032f863ecSjp161948 		} else {
9417c478bd9Sstevel@tonic-gate 			/*
9426f786aceSNobutomo Nakano 			 * Server is expected to use non-UTF8 encoding.
9437c478bd9Sstevel@tonic-gate 			 */
9446f786aceSNobutomo Nakano 			debug2("Enforcing no code conversion: %s", lang);
9457c478bd9Sstevel@tonic-gate 		}
9467c478bd9Sstevel@tonic-gate 	}
9477c478bd9Sstevel@tonic-gate }
9489a8058b5Sjp161948 
9499a8058b5Sjp161948 /*
9509a8058b5Sjp161948  * Free all strings in the list and then free the list itself. We know that the
9519a8058b5Sjp161948  * list ends with a NULL pointer.
9529a8058b5Sjp161948  */
9539a8058b5Sjp161948 void
g11n_freelist(char ** list)9549a8058b5Sjp161948 g11n_freelist(char **list)
9559a8058b5Sjp161948 {
9569a8058b5Sjp161948 	int i = 0;
9579a8058b5Sjp161948 
9589a8058b5Sjp161948 	while (list[i] != NULL) {
9599a8058b5Sjp161948 		xfree(list[i]);
9609a8058b5Sjp161948 		i++;
9619a8058b5Sjp161948 	}
9629a8058b5Sjp161948 
9639a8058b5Sjp161948 	xfree(list);
9649a8058b5Sjp161948 }
965