libssh/common/g11n.c

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 *
 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <errno.h>
#include <locale.h>
#include <langinfo.h>
#include <iconv.h>
#include <ctype.h>
#include <strings.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#include "includes.h"
#include "xmalloc.h"
#include "xlist.h"

#ifdef MIN
#undef MIN
#endif /* MIN */

#define MIN(x, y)		    ((x) < (y) ? (x) : (y))

#define LOCALE_PATH "/usr/bin/locale"

#define LANGTAG_MAX 5 /* two-char country code, '-' and two-char region code */

static u_char * do_iconv(iconv_t cd, u_int *mul_ptr,
		       const void *buf, u_int len,
		       u_int *outlen, int *err,
		       u_char **err_str);

static int locale_cmp(const void *d1, const void *d2);
static char *g11n_locale2langtag(char *locale);

u_int
g11n_validate_ascii(const char *str, u_int len, u_char **error_str);

u_int
g11n_validate_utf8(const u_char *str, u_int len, u_char **error_str);

static
char *
g11n_locale2langtag(char *locale)
{
    char *langtag;

    /* base cases */
    if (!locale || !*locale) return NULL;

    if (strcmp(locale, "POSIX") == 0 ||
	strcmp(locale, "C") == 0) return "i-default";

    /* Punt for language codes which are not exactly 2 letters */
    if (strlen(locale) < 2 ||
	!isalpha(locale[0]) ||
	!isalpha(locale[1]) ||
	(locale[2] != '\0' &&
	locale[2] != '_' &&
	locale[2] != '.' &&
	locale[2] != '@'))
	return NULL;


    /* We have a primary language sub-tag */
    langtag = (char *) xmalloc(LANGTAG_MAX + 1);

    strncpy(langtag, locale, 2);
    langtag[2] = '\0';

    /* Do we have country sub-tag? */
    if (locale[2] == '_') {
	if (strlen(locale) < 5 ||
	    !isalpha(locale[3]) ||
	    !isalpha(locale[4]) ||
	    (locale[5] != '\0' && (locale[5] != '.' && locale[5] != '@'))) {
	    return langtag;
	}

	/* yes, we do */
	/* if (snprintf(langtag, 6, "%s-%s,%s", lang_subtag,
		     country_subtag, langtag) == 8) */
	if (snprintf(langtag, 6, "%.*s-%.*s", 2, locale,
		     2, locale+3) == 5)
	    return langtag;
    }

    /* In all other cases we just use the primary language sub-tag */
    return langtag;
}

u_int
g11n_langtag_is_default(char *langtag)
{
    return (strcmp(langtag, "i-default") == 0);
}

/*
 * This lang tag / locale matching function works only for two-character
 * language primary sub-tags and two-character country sub-tags.
 */
u_int
g11n_langtag_matches_locale(char *langtag, char *locale)
{
    /* Match "i-default" to the process' current locale if possible */
    if (g11n_langtag_is_default(langtag)) {
	if (strcasecmp(locale, "POSIX") == 0 ||
	    strcasecmp(locale, "C") == 0)
	    return 1;
	else
	    return 0;
    }

    /* locale must be at least 2 chars long and the lang part must be
     * exactly two characters */
    if (strlen(locale) < 2 ||
	(!isalpha(locale[0]) || !isalpha(locale[1]) ||
	(locale[2] != '\0' && locale[2] != '_' && locale[2] != '.' && locale[2] != '@')))
	return 0;

    /* same thing with the langtag */
    if (strlen(langtag) < 2 ||
	(!isalpha(langtag[0]) || !isalpha(langtag[1]) ||
	(langtag[2] != '\0' && langtag[2] != '-')))
	return 0;

    /* primary language sub-tag and the locale's language part must match */
    if (strncasecmp(langtag, locale, 2) != 0)
	return 0;

    /* primary language sub-tag and the locale's language match, now
     * fuzzy check country part */

    /* neither langtag nor locale have more than one component */
    if (langtag[2] == '\0' &&
        (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@'))
	return 2;

    /* langtag has only one sub-tag... */
    if (langtag[2] == '\0')
	return 1;

    /* locale has no country code... */
    if (locale[2] == '\0' || locale[2] == '.' || locale[2] == '@')
	return 1;

    /* langtag has more than one subtag and the locale has a country code */

    /* ignore second subtag if not two chars */
    if (strlen(langtag) < 5)
	return 1;

    if (!isalpha(langtag[3]) || !isalpha(langtag[4]) ||
	(langtag[5] != '\0' && langtag[5] != '-'))
	return 1;

    /* ignore rest of locale if there is no two-character country part */
    if (strlen(locale) < 5)
	return 1;

    if (locale[2] != '_' || !isalpha(locale[3]) || !isalpha(locale[4]) ||
	(locale[5] != '\0' && locale[5] != '.' && locale[5] != '@'))
	return 1;

    /* if the country part matches, return 2 */
    if (strncasecmp(&langtag[3], &locale[3], 2) == 0)
	return 2;

    return 1;
}

char *
g11n_getlocale()
{
    /* We have one text domain - always set it */
    (void) textdomain(TEXT_DOMAIN);

    /* If the locale is not set, set it from the env vars */
    if (!setlocale(LC_MESSAGES, NULL))
	(void) setlocale(LC_MESSAGES, "");

    return setlocale(LC_MESSAGES, NULL);
}

void
g11n_setlocale(int category, const char *locale)
{
    char *curr;

    /* We have one text domain - always set it */
    (void) textdomain(TEXT_DOMAIN);

    if (!locale)
	return;

    if (*locale && ((curr = setlocale(category, NULL))) &&
	strcmp(curr, locale) == 0)
	return;

    /*
     * If <category> is bogus, setlocale() will do nothing.
     */
    (void) setlocale(category, locale);

    return;
}

char **
g11n_getlocales()
{
    FILE *locale_out;
    u_int n_elems, list_size, long_line = 0;
    char **list;
    char locale[64];	/* 64 bytes is plenty for locale names */

    if ((locale_out = popen(LOCALE_PATH " -a", "r")) == NULL) {
	return NULL;
    }

    /*
     * Start with enough room for 65 locales - that's a lot fewer than
     * all the locales available for installation, but a lot more than
     * what most users will need and install
     */
    n_elems=0;
    list_size=192;
    list = (char **) xmalloc(sizeof(char *) * (list_size + 1));
    memset(list, 0, sizeof(char *) * (list_size + 1));

    while (fgets(locale, sizeof(locale), locale_out)) {
	/* skip long locale names (if any) */
	if (!strchr(locale, '\n')) {
	    long_line = 1;
	    continue;
	}
	else if (long_line) {
	    long_line = 0;
	    continue;
	}
	if (strncmp(locale, "iso_8859", 8) == 0)
	    continue;		    /* ignore locale names like "iso_8859-1" */

	if (n_elems == list_size) {
	    list_size *= 2;
	    list = (char **) xrealloc((void *) list, (list_size + 1) * sizeof(char *));
	    memset(&list[n_elems+1], 0, sizeof(char *) * (list_size - n_elems + 1));
	}

	*(strchr(locale, '\n')) = '\0';      /* remove the trailing \n */

	list[n_elems++] = xstrdup(locale);
    }
    list[n_elems] = NULL;
    (void) pclose(locale_out);

    qsort(list, n_elems - 1, sizeof(char *), locale_cmp);
    return list;
}

char *
g11n_getlangs()
{
    char *locale;

    if (getenv("SSH_LANGS"))
	return xstrdup(getenv("SSH_LANGS"));

    locale = g11n_getlocale();

    if (!locale || !*locale)
	return xstrdup("i-default");

    return g11n_locale2langtag(locale);
}

char *
g11n_locales2langs(char **locale_set)
{
    char **p, **r, **q;
    char *langtag;
    int locales, skip;

    for (locales = 0, p = locale_set ; p && *p ; p++)
	locales++;

    r = (char **) xmalloc((locales + 1) * sizeof(char *));
    memset(r, 0, (locales + 1) * sizeof(char *));

    for (p = locale_set ; p && *p && ((p - locale_set) <= locales); p++) {
	skip = 0;
	if ((langtag = g11n_locale2langtag(*p)) == NULL)
	    continue;
	for (q = r ; (q - r) < locales ; q++) {
	    if (!*q) break;
	    if (*q && strcmp(*q, langtag) == 0)
		skip = 1;
	}
	if (!skip)
	    *(q++) = langtag;
	*q = NULL;
    }
    return xjoin(r, ',');
}

static
int
sortcmp(const void *d1, const void *d2)
{
    char *s1 = *(char **)d1;
    char *s2 = *(char **)d2;

    return strcmp(s1, s2);
}

int
g11n_langtag_match(char *langtag1, char *langtag2)
{
    int len1, len2;
    char c1, c2;

    len1 = (strchr(langtag1, '-')) ?
		(strchr(langtag1, '-') - langtag1)
		: strlen(langtag1);

    len2 = (strchr(langtag2, '-')) ?
		(strchr(langtag2, '-') - langtag2)
		: strlen(langtag2);

    /* no match */
    if (len1 != len2 ||
	strncmp(langtag1, langtag2, len1) != 0)
	return 0;

    c1 = *(langtag1 + len1);
    c2 = *(langtag2 + len2);

    /* no country sub-tags - exact match */
    if (c1 == '\0' && c2 == '\0')
	return 2;

    /* one langtag has a country sub-tag, the other doesn't */
    if (c1 == '\0' || c2 == '\0')
	return 1;

    /* can't happen - both langtags have a country sub-tag */
    if (c1 != '-' || c2 != '-')
	return 1;

    /* compare country subtags */
    langtag1 = langtag1 + len1 + 1;
    langtag2 = langtag2 + len2 + 1;

    len1 = (strchr(langtag1, '-')) ?
		(strchr(langtag1, '-') - langtag1)
		: strlen(langtag1);

    len2 = (strchr(langtag2, '-')) ?
		(strchr(langtag2, '-') - langtag2)
		: strlen(langtag2);

    if (len1 != len2 ||
	strncmp(langtag1, langtag2, len1) != 0)
	return 1;

    /* country tags matched - exact match */
    return 2;
}

char *
g11n_langtag_set_intersect(char *set1, char *set2)
{
    char **list1, **list2, **list3, **p, **q, **r;
    char *set3, *lang_subtag;
    u_int n1, n2, n3;
    u_int do_append;

    list1 = xsplit(set1, ',');
    list2 = xsplit(set2, ',');
    for (n1 = 0, p = list1 ; p && *p ; p++, n1++) ;
    for (n2 = 0, p = list2 ; p && *p ; p++, n2++) ;

    list3 = (char **) xmalloc(sizeof(char *) * (n1 + n2 + 1));
    *list3 = NULL;

    /* we must not sort the user langtags - sorting or not the server's
     * should not affect the outcome
     */
    qsort(list2, n2, sizeof(char *), sortcmp);

    for (n3 = 0, p = list1 ; p && *p ; p++) {
	do_append = 0;
	for (q = list2 ; q && *q ; q++) {
	    if (g11n_langtag_match(*p, *q) != 2) continue;
	    /* append element */
	    for (r = list3; (r - list3) <= (n1 + n2) ; r++) {
		do_append = 1;
		if (!*r) break;
		if (strcmp(*p, *r) == 0) {
		    do_append = 0;
		    break;
		}
	    }
	    if (do_append && n3 <= (n1 + n2)) {
		list3[n3++] = xstrdup(*p);
		list3[n3] = NULL;
	    }
	}
    }

    for (p = list1 ; p && *p ; p++) {
	do_append = 0;
	for (q = list2 ; q && *q ; q++) {
	    if (g11n_langtag_match(*p, *q) != 1) continue;
	    /* append element */
	    lang_subtag = xstrdup(*p);
	    if (strchr(lang_subtag, '-'))
		*(strchr(lang_subtag, '-')) = '\0';
	    for (r = list3; (r - list3) <= (n1 + n2) ; r++) {
		do_append = 1;
		if (!*r) break;
		if (strcmp(lang_subtag, *r) == 0) {
		    do_append = 0;
		    break;
		}
	    }
	    if (do_append && n3 <= (n1 + n2)) {
		list3[n3++] = lang_subtag;
		list3[n3] = NULL;
	    }
	    else
		xfree(lang_subtag);
	}
    }

    set3 = xjoin(list3, ',');
    xfree_split_list(list1);
    xfree_split_list(list2);
    xfree_split_list(list3);

    return set3;
}

char *
g11n_clnt_langtag_negotiate(char *clnt_langtags, char *srvr_langtags)
{
    char *list, *result;
    char **xlist;

    /* g11n_langtag_set_intersect uses xmalloc - should not return NULL */
    list = g11n_langtag_set_intersect(clnt_langtags, srvr_langtags);

    if (!list)
	    return NULL;

    xlist = xsplit(list, ',');

    xfree(list);

    if (!xlist || !*xlist)
	    return NULL;

    result = xstrdup(*xlist);

    xfree_split_list(xlist);

    return result;
}

/*
 * Compare locales, preferring UTF-8 codesets to others, otherwise doing
 * a stright strcmp()
 */
static
int
locale_cmp(const void *d1, const void *d2)
{
    char *dot_ptr;
    char *s1 = *(char **)d1;
    char *s2 = *(char **)d2;
    int s1_is_utf8 = 0;
    int s2_is_utf8 = 0;

    /* check if s1 is a UTF-8 locale */
    if (((dot_ptr = strchr((char *) s1, '.')) != NULL) && (*dot_ptr != '\0') &&
	(strncmp(dot_ptr+1, "UTF-8", 5) == 0) &&
	(*(dot_ptr+6) == '\0' || *(dot_ptr+6) == '@')) {
	s1_is_utf8++;
    }
    /* check if s2 is a UTF-8 locale */
    if (((dot_ptr = strchr((char *) s2, '.')) != NULL) && (*dot_ptr != '\0') &&
	(strncmp(dot_ptr+1, "UTF-8", 5) == 0) &&
	(*(dot_ptr+6) == '\0' || *(dot_ptr+6) == '@')) {
	s2_is_utf8++;
    }

    /* prefer UTF-8 locales */
    if (s1_is_utf8 && !s2_is_utf8)
	return -1;

    if (s2_is_utf8 && !s1_is_utf8)
	return 1;

    /* prefer any locale over the default locales */
    if (strcmp(s1, "C") == 0 ||
	strcmp(s1, "POSIX") == 0 ||
	strcmp(s1, "common") == 0)
	if (strcmp(s2, "C") != 0 &&
	    strcmp(s2, "POSIX") != 0 &&
	    strcmp(s2, "common") != 0)
	    return 1;

    if (strcmp(s2, "C") == 0 ||
	strcmp(s2, "POSIX") == 0 ||
	strcmp(s2, "common") == 0)
	if (strcmp(s1, "C") != 0 &&
	    strcmp(s1, "POSIX") != 0 &&
	    strcmp(s1, "common") != 0)
	    return -1;

    return strcmp(s1, s2);
}


char **
g11n_langtag_set_locale_set_intersect(char *langtag_set,
				      char **locale_set)
{
    char **langtag_list, **result, **p, **q, **r;
    char *s;
    u_int do_append, n_langtags, n_locales, n_results, max_results;

    /* Count lang tags and locales */
    for (n_locales = 0, p = locale_set ; p && *p ; p++) n_locales++;
    n_langtags = ((s = langtag_set) != NULL && *s && *s != ',') ? 1 : 0;
    for ( ; s = strchr(s, ',') ; s++, n_langtags++) ;
    /*
    while ((s = strchr(s, ','))) {
	n_langtags++;
	s++;
    }
     */

    qsort(locale_set, n_locales, sizeof(char *), locale_cmp);

    langtag_list = xsplit(langtag_set, ',');
    for ( n_langtags = 0, p = langtag_list ; p && *p ; p++, n_langtags++);

    max_results = MIN(n_locales, n_langtags) * 2;
    result = (char **) xmalloc(sizeof(char *) * (max_results + 1));
    *result = NULL;
    n_results = 0;

    /* More specific matches first */
    for (p = langtag_list ; p && *p ; p++) {
	do_append = 0;
	for (q = locale_set ; q && *q ; q++) {
	    if (g11n_langtag_matches_locale(*p, *q) == 2) {
		do_append = 1;
		for (r = result ; (r - result) <= MIN(n_locales, n_langtags) ; r++) {
		    if (!*r) break;
		    if (strcmp(*q, *r) == 0) {
			do_append = 0;
			break;
		    }
		}
		if (do_append && n_results < max_results) {
		    result[n_results++] = xstrdup(*q);
		    result[n_results] = NULL;
		}
		break;
	    }
	}
    }

    for (p = langtag_list ; p && *p ; p++) {
	do_append = 0;
	for (q = locale_set ; q && *q ; q++) {
	    if (g11n_langtag_matches_locale(*p, *q) == 1) {
		do_append = 1;
		for (r = result ; (r - result) <= MIN(n_locales, n_langtags) ; r++) {
		    if (!*r) break;
		    if (strcmp(*q, *r) == 0) {
			do_append = 0;
			break;
		    }
		}
		if (do_append && n_results < max_results) {
		    result[n_results++] = xstrdup(*q);
		    result[n_results] = NULL;
		}
		break;
	    }
	}
    }
    xfree_split_list(langtag_list);

    return result;
}

char *
g11n_srvr_locale_negotiate(char *clnt_langtags, char **srvr_locales)
{
    char **results, *result = NULL;

    if ((results = g11n_langtag_set_locale_set_intersect(clnt_langtags,
	    srvr_locales ?  srvr_locales : g11n_getlocales())) == NULL)
	return NULL;

    if (*results != NULL)
	    result = xstrdup(*results);

    xfree_split_list(results);

    return result;
}


/*
 * Functions for validating ASCII and UTF-8 strings
 *
 * The error_str parameter is an optional pointer to a char variable
 * where to store a string suitable for use with error() or fatal() or
 * friends.
 *
 * The return value is 0 if success, EILSEQ or EINVAL.
 *
 */

u_int
g11n_validate_ascii(const char *str, u_int len, u_char **error_str)
{
    u_char *p;

    for (p = (u_char *) str ; p && *p && (!(*p & 0x80)) ; p++) ;

    if (len && ((p - (u_char *) str) != len)) {
	return EILSEQ;
    }
    return 0;
}

u_int
g11n_validate_utf8(const u_char *str, u_int len, u_char **error_str)
{
    u_char *p;
    u_int c, l;

    if (len == 0) len = strlen((const char *)str);

    for (p = (u_char *) str ; p && (p - str < len) && *p ; ) {
	/* 8-bit chars begin a UTF-8 sequence */
	if (*p & 0x80) {
	    /* Get sequence length and sanity check first byte */
	    if (*p < 0xc0)
		return EILSEQ;
	    else if (*p < 0xe0)
		l=2;
	    else if (*p < 0xf0)
		l=3;
	    else if (*p < 0xf8)
		l=4;
	    else if (*p < 0xfc)
		l=5;
	    else if (*p < 0xfe)
		l=6;
	    else
		return EILSEQ;

	    if ((p + l - str) >= len)
		return EILSEQ;

	    /* overlong detection - build codepoint */
	    c = *p & 0x3f;
	    c = c << (6 * (l-1)); /* shift c bits from first byte */

	    if (l > 1) {
		if (*(p+1) && ((*(p+1) & 0xc0) == 0x80))
		    c = c | ((*(p+1) & 0x3f) << (6 * (l-2)));
		else
		    return EILSEQ;
		if (c < 0x80)
		    return EILSEQ;
	    }
	    if (l > 2) {
		if (*(p+2) && ((*(p+2) & 0xc0) == 0x80))
		    c = c | ((*(p+2) & 0x3f) << (6 * (l-3)));
		else
		    return EILSEQ;
		if (c < 0x800)
		    return EILSEQ;
	    }
	    if (l > 3) {
		if (*(p+3) && ((*(p+3) & 0xc0) == 0x80))
		    c = c | ((*(p+3) & 0x3f) << (6 * (l-4)));
		else
		    return EILSEQ;
		if (c < 0x10000)
		    return EILSEQ;
	    }
	    if (l > 4) {
		if (*(p+4) && ((*(p+4) & 0xc0) == 0x80))
		    c = c | ((*(p+4) & 0x3f) << (6 * (l-5)));
		else
		    return EILSEQ;
		if (c < 0x200000)
		    return EILSEQ;
	    }
	    if (l > 5) {
		if (*(p+5) && ((*(p+5) & 0xc0) == 0x80))
		    c = c | (*(p+5) & 0x3f) ;
		else
		    return EILSEQ;
		if (c < 0x4000000)
		    return EILSEQ;
	    }

	    /* Check for UTF-16 surrogates ifs other illegal UTF-8 * points */
	    if (((c <= 0xdfff) && (c >= 0xd800)) ||
	        (c == 0xfffe) || (c == 0xffff))
		return EILSEQ;
	    p += l;
	}
	/* 7-bit chars are fine */
	else
	    p++;
    }
    return 0;
}

/*
 * Functions for converting to ASCII or UTF-8 from the local codeset
 * Functions for converting from ASCII or UTF-8 to the local codeset
 *
 * The error_str parameter is an optional pointer to a char variable
 * where to store a string suitable for use with error() or fatal() or
 * friends.
 *
 * The err parameter is an optional pointer to an integer where 0
 * (success) or EILSEQ or EINVAL will be stored (failure).
 *
 * These functions return NULL if the conversion fails.
 *
 */

u_char *
g11n_convert_from_ascii(const char *str, int *err_ptr, u_char **error_str)
{
    static u_int initialized = 0;
    static u_int do_convert = 0;
    iconv_t cd;
    int err;

    if (!initialized) {
	/*
	 * iconv_open() fails if the to/from codesets are the
	 * same, and there are aliases of codesets to boot...
	 */
	if (strcmp("646", nl_langinfo(CODESET)) == 0 ||
	    strcmp("ASCII",  nl_langinfo(CODESET)) == 0 ||
	    strcmp("US-ASCII",  nl_langinfo(CODESET)) == 0) {
	    initialized = 1;
	    do_convert = 0;
	}
	else {
	    cd = iconv_open(nl_langinfo(CODESET), "646");
	    if (cd == (iconv_t) -1) {
		if (err_ptr) *err_ptr = errno;
		if (error_str) *error_str = (u_char *)
		    "Cannot convert ASCII strings to the local codeset";
	    }
	    initialized = 1;
	    do_convert = 1;
	}
    }

    if (!do_convert) {
	if ((err = g11n_validate_ascii(str, 0, error_str))) {
	    if (err_ptr) *err_ptr = err;
	    return NULL;
	}
	else
	    return (u_char *) xstrdup(str);
    }
    return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str);
}

u_char *
g11n_convert_from_utf8(const u_char *str, int *err_ptr, u_char **error_str)
{
    static u_int initialized = 0;
    static u_int do_convert = 0;
    iconv_t cd;
    int err;

    if (!initialized) {
	/*
	 * iconv_open() fails if the to/from codesets are the
	 * same, and there are aliases of codesets to boot...
	 */
	if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 ||
	    strcmp("UTF8",  nl_langinfo(CODESET)) == 0) {
	    initialized = 1;
	    do_convert = 0;
	}
	else {
	    cd = iconv_open(nl_langinfo(CODESET), "UTF-8");
	    if (cd == (iconv_t) -1) {
		if (err_ptr) *err_ptr = errno;
		if (error_str) *error_str = (u_char *)
		    "Cannot convert UTF-8 strings to the local codeset";
	    }
	    initialized = 1;
	    do_convert = 1;
	}
    }

    if (!do_convert) {
	if ((err = g11n_validate_utf8(str, 0, error_str))) {
	    if (err_ptr) *err_ptr = err;
	    return NULL;
	}
	else
	    return (u_char *) xstrdup((char *) str);
    }
    return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str);
}

char *
g11n_convert_to_ascii(const u_char *str, int *err_ptr, u_char **error_str)
{
    static u_int initialized = 0;
    static u_int do_convert = 0;
    iconv_t cd;

    if (!initialized) {
	/*
	 * iconv_open() fails if the to/from codesets are the
	 * same, and there are aliases of codesets to boot...
	 */
	if (strcmp("646", nl_langinfo(CODESET)) == 0 ||
	    strcmp("ASCII",  nl_langinfo(CODESET)) == 0 ||
	    strcmp("US-ASCII",  nl_langinfo(CODESET)) == 0) {
	    initialized = 1;
	    do_convert = 0;
	}
	else {
	    cd = iconv_open("646", nl_langinfo(CODESET));
	    if (cd == (iconv_t) -1) {
		if (err_ptr) *err_ptr = errno;
		if (error_str) *error_str = (u_char *)
		    "Cannot convert UTF-8 strings to the local codeset";
	    }
	    initialized = 1;
	    do_convert = 1;
	}
    }

    if (!do_convert)
	return xstrdup((char *) str);
    return (char *) do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str);
}

u_char *
g11n_convert_to_utf8(const u_char *str, int *err_ptr, u_char **error_str)
{
    static u_int initialized = 0;
    static u_int do_convert = 0;
    iconv_t cd;

    if (!initialized) {
	/*
	 * iconv_open() fails if the to/from codesets are the
	 * same, and there are aliases of codesets to boot...
	 */
	if (strcmp("UTF-8", nl_langinfo(CODESET)) == 0 ||
	    strcmp("UTF8",  nl_langinfo(CODESET)) == 0) {
	    initialized = 1;
	    do_convert = 0;
	}
	else {
	    cd = iconv_open("UTF-8", nl_langinfo(CODESET));
	    if (cd == (iconv_t) -1) {
		if (err_ptr) *err_ptr = errno;
		if (error_str) *error_str = (u_char *)
		    "Cannot convert UTF-8 strings to the local codeset";
	    }
	    initialized = 1;
	    do_convert = 1;
	}
    }

    if (!do_convert)
	return (u_char *) xstrdup((char *) str);
    return do_iconv(cd, NULL, str, 0, NULL, err_ptr, error_str);
}


/*
 * Wrapper around iconv()
 *
 * The caller is responsible for freeing the result and for handling
 * (errno && errno != E2BIG) (i.e., EILSEQ, EINVAL, EBADF).
 */

static
u_char *
do_iconv(iconv_t cd, u_int *mul_ptr,
	 const void *buf, u_int len,
	 u_int *outlen, int *err,
	 u_char **err_str)
{
    size_t inbytesleft, outbytesleft, converted_size;
    char *outbuf;
    u_char *converted;
    const char *inbuf;
    u_int mul = 0;

    if (!buf || !(*(char *)buf)) return NULL;
    if (len == 0) len = strlen(buf);
    /* reset conversion descriptor */
    /* XXX Do we need initial shift sequences for UTF-8??? */
    (void) iconv(cd, NULL, &inbytesleft, &outbuf, &outbytesleft);
    inbuf = (const char *) buf;
    if (mul_ptr) mul = *mul_ptr;
    converted_size = (len << mul);
    outbuf = (char *) xmalloc(converted_size + 1); /* for null */
    converted = (u_char *) outbuf;
    outbytesleft = len;
    do {
	if (iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft) ==
		(size_t) -1) {
	    if (errno == E2BIG) {
		/* UTF-8 codepoints are at most 8 bytes long. */
		if (mul > 2) {
		    if (err_str)
			*err_str = (u_char *) "Conversion to UTF-8 failed due to"
				  "preposterous space requirements";
		    if (err)
			*err = EILSEQ;
		    return NULL;
		}

		/*
		 * Re-alloc output and ensure that the outbuf
		 * and outbytesleft values are adjusted.
		 */
		converted = xrealloc(converted, converted_size << 1 + 1);
		outbuf = (char *) converted + converted_size - outbytesleft;
		converted_size = (len << ++(mul));
		outbytesleft = converted_size - outbytesleft;
	    }
	    else {
		/*
		 * Let the caller deal with iconv() errors, probably by
		 * calling fatal(); xfree() does not set errno.
		 */
		if (err) *err = errno;
		xfree(converted);
		return NULL;
	    }
	}
    } while (inbytesleft);
    *outbuf = '\0'; /* ensure null-termination */
    if (outlen) *outlen = converted_size - outbytesleft;
    if (mul_ptr) *mul_ptr = mul;
    return converted;
}