xref: /freebsd/lib/libc/locale/euc.c (revision fa02ee78c88a0293964700aae6b758c0b4959a11)
158f0484fSRodney W. Grimes /*-
2ca2dae42STim J. Robbins  * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
358f0484fSRodney W. Grimes  * Copyright (c) 1993
458f0484fSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
558f0484fSRodney W. Grimes  *
658f0484fSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
758f0484fSRodney W. Grimes  * Paul Borman at Krystal Technologies.
858f0484fSRodney W. Grimes  *
958f0484fSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
1058f0484fSRodney W. Grimes  * modification, are permitted provided that the following conditions
1158f0484fSRodney W. Grimes  * are met:
1258f0484fSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
1358f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
1458f0484fSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
1558f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
1658f0484fSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
1758f0484fSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
1858f0484fSRodney W. Grimes  *    must display the following acknowledgement:
1958f0484fSRodney W. Grimes  *	This product includes software developed by the University of
2058f0484fSRodney W. Grimes  *	California, Berkeley and its contributors.
2158f0484fSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
2258f0484fSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
2358f0484fSRodney W. Grimes  *    without specific prior written permission.
2458f0484fSRodney W. Grimes  *
2558f0484fSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2658f0484fSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2758f0484fSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2858f0484fSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2958f0484fSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3058f0484fSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3158f0484fSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3258f0484fSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3358f0484fSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3458f0484fSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3558f0484fSRodney W. Grimes  * SUCH DAMAGE.
3658f0484fSRodney W. Grimes  */
3758f0484fSRodney W. Grimes 
3858f0484fSRodney W. Grimes #if defined(LIBC_SCCS) && !defined(lint)
3958f0484fSRodney W. Grimes static char sccsid[] = "@(#)euc.c	8.1 (Berkeley) 6/4/93";
4058f0484fSRodney W. Grimes #endif /* LIBC_SCCS and not lint */
41ca2dae42STim J. Robbins #include <sys/param.h>
42333fc21eSDavid E. O'Brien __FBSDID("$FreeBSD$");
4358f0484fSRodney W. Grimes 
4458f0484fSRodney W. Grimes #include <errno.h>
45ca2dae42STim J. Robbins #include <limits.h>
4602f4f60aSTim J. Robbins #include <runetype.h>
4758f0484fSRodney W. Grimes #include <stdlib.h>
48ca2dae42STim J. Robbins #include <string.h>
4902f4f60aSTim J. Robbins #include <wchar.h>
5058f0484fSRodney W. Grimes 
5102f4f60aSTim J. Robbins extern size_t (*__mbrtowc)(wchar_t * __restrict, const char * __restrict,
5202f4f60aSTim J. Robbins     size_t, mbstate_t * __restrict);
53ca2dae42STim J. Robbins extern int (*__mbsinit)(const mbstate_t *);
5402f4f60aSTim J. Robbins extern size_t (*__wcrtomb)(char * __restrict, wchar_t, mbstate_t * __restrict);
5502f4f60aSTim J. Robbins 
5602f4f60aSTim J. Robbins int	_EUC_init(_RuneLocale *);
5702f4f60aSTim J. Robbins size_t	_EUC_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t,
5802f4f60aSTim J. Robbins 	    mbstate_t * __restrict);
59ca2dae42STim J. Robbins int	_EUC_mbsinit(const mbstate_t *);
6002f4f60aSTim J. Robbins size_t	_EUC_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict);
6158f0484fSRodney W. Grimes 
6258f0484fSRodney W. Grimes typedef struct {
6358f0484fSRodney W. Grimes 	int	count[4];
6402f4f60aSTim J. Robbins 	wchar_t	bits[4];
6502f4f60aSTim J. Robbins 	wchar_t	mask;
6658f0484fSRodney W. Grimes } _EucInfo;
6758f0484fSRodney W. Grimes 
68ca2dae42STim J. Robbins typedef struct {
69ca2dae42STim J. Robbins 	int	count;
70ca2dae42STim J. Robbins 	u_char	bytes[MB_LEN_MAX];
71ca2dae42STim J. Robbins } _EucState;
72ca2dae42STim J. Robbins 
7358f0484fSRodney W. Grimes int
7402f4f60aSTim J. Robbins _EUC_init(_RuneLocale *rl)
7558f0484fSRodney W. Grimes {
7658f0484fSRodney W. Grimes 	_EucInfo *ei;
7776692b80SAndrey A. Chernov 	int x, new__mb_cur_max;
7858f0484fSRodney W. Grimes 	char *v, *e;
7958f0484fSRodney W. Grimes 
8076692b80SAndrey A. Chernov 	if (rl->variable == NULL)
8158f0484fSRodney W. Grimes 		return (EFTYPE);
8276692b80SAndrey A. Chernov 
8358f0484fSRodney W. Grimes 	v = (char *)rl->variable;
8458f0484fSRodney W. Grimes 
8558f0484fSRodney W. Grimes 	while (*v == ' ' || *v == '\t')
8658f0484fSRodney W. Grimes 		++v;
8758f0484fSRodney W. Grimes 
8876692b80SAndrey A. Chernov 	if ((ei = malloc(sizeof(_EucInfo))) == NULL)
89ec5ca2ebSAndrey A. Chernov 		return (errno == 0 ? ENOMEM : errno);
9076692b80SAndrey A. Chernov 
9176692b80SAndrey A. Chernov 	new__mb_cur_max = 0;
9258f0484fSRodney W. Grimes 	for (x = 0; x < 4; ++x) {
9358f0484fSRodney W. Grimes 		ei->count[x] = (int)strtol(v, &e, 0);
9458f0484fSRodney W. Grimes 		if (v == e || !(v = e)) {
9558f0484fSRodney W. Grimes 			free(ei);
9658f0484fSRodney W. Grimes 			return (EFTYPE);
9758f0484fSRodney W. Grimes 		}
9876692b80SAndrey A. Chernov 		if (new__mb_cur_max < ei->count[x])
9976692b80SAndrey A. Chernov 			new__mb_cur_max = ei->count[x];
10058f0484fSRodney W. Grimes 		while (*v == ' ' || *v == '\t')
10158f0484fSRodney W. Grimes 			++v;
10258f0484fSRodney W. Grimes 		ei->bits[x] = (int)strtol(v, &e, 0);
10358f0484fSRodney W. Grimes 		if (v == e || !(v = e)) {
10458f0484fSRodney W. Grimes 			free(ei);
10558f0484fSRodney W. Grimes 			return (EFTYPE);
10658f0484fSRodney W. Grimes 		}
10758f0484fSRodney W. Grimes 		while (*v == ' ' || *v == '\t')
10858f0484fSRodney W. Grimes 			++v;
10958f0484fSRodney W. Grimes 	}
11058f0484fSRodney W. Grimes 	ei->mask = (int)strtol(v, &e, 0);
11158f0484fSRodney W. Grimes 	if (v == e || !(v = e)) {
11258f0484fSRodney W. Grimes 		free(ei);
11358f0484fSRodney W. Grimes 		return (EFTYPE);
11458f0484fSRodney W. Grimes 	}
11545206d5cSAndrey A. Chernov 	rl->variable = ei;
11658f0484fSRodney W. Grimes 	rl->variable_len = sizeof(_EucInfo);
11758f0484fSRodney W. Grimes 	_CurrentRuneLocale = rl;
11876692b80SAndrey A. Chernov 	__mb_cur_max = new__mb_cur_max;
11902f4f60aSTim J. Robbins 	__mbrtowc = _EUC_mbrtowc;
12002f4f60aSTim J. Robbins 	__wcrtomb = _EUC_wcrtomb;
121ca2dae42STim J. Robbins 	__mbsinit = _EUC_mbsinit;
12258f0484fSRodney W. Grimes 	return (0);
12358f0484fSRodney W. Grimes }
12458f0484fSRodney W. Grimes 
125ca2dae42STim J. Robbins int
126ca2dae42STim J. Robbins _EUC_mbsinit(const mbstate_t *ps)
127ca2dae42STim J. Robbins {
128ca2dae42STim J. Robbins 
129fa02ee78STim J. Robbins 	return (ps == NULL || ((const _EucState *)ps)->count == 0);
130ca2dae42STim J. Robbins }
131ca2dae42STim J. Robbins 
13258f0484fSRodney W. Grimes #define	CEI	((_EucInfo *)(_CurrentRuneLocale->variable))
13358f0484fSRodney W. Grimes 
13458f0484fSRodney W. Grimes #define	_SS2	0x008e
13558f0484fSRodney W. Grimes #define	_SS3	0x008f
13658f0484fSRodney W. Grimes 
137a243e676SJeroen Ruigrok van der Werven #define	GR_BITS	0x80808080 /* XXX: to be fixed */
138a243e676SJeroen Ruigrok van der Werven 
13902f4f60aSTim J. Robbins static __inline int
14002f4f60aSTim J. Robbins _euc_set(u_int c)
14158f0484fSRodney W. Grimes {
14258f0484fSRodney W. Grimes 	c &= 0xff;
14358f0484fSRodney W. Grimes 	return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0);
14458f0484fSRodney W. Grimes }
14558f0484fSRodney W. Grimes 
14602f4f60aSTim J. Robbins size_t
14702f4f60aSTim J. Robbins _EUC_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
148ca2dae42STim J. Robbins     mbstate_t * __restrict ps)
14902f4f60aSTim J. Robbins {
150ca2dae42STim J. Robbins 	_EucState *es;
151ca2dae42STim J. Robbins 	int len, ocount, remain, set;
15202f4f60aSTim J. Robbins 	wchar_t wc;
153ca2dae42STim J. Robbins 	size_t ncopy;
15402f4f60aSTim J. Robbins 
155ca2dae42STim J. Robbins 	es = (_EucState *)ps;
156ca2dae42STim J. Robbins 
157ca2dae42STim J. Robbins 	if (s == NULL) {
158ca2dae42STim J. Robbins 		s = "";
159ca2dae42STim J. Robbins 		n = 1;
160ca2dae42STim J. Robbins 		pwc = NULL;
161ca2dae42STim J. Robbins 	}
162ca2dae42STim J. Robbins 
163ca2dae42STim J. Robbins 	ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(es->bytes) - es->count);
164ca2dae42STim J. Robbins 	memcpy(es->bytes + es->count, s, ncopy);
165ca2dae42STim J. Robbins 	ocount = es->count;
166ca2dae42STim J. Robbins 	es->count += ncopy;
167ca2dae42STim J. Robbins 	s = (char *)es->bytes;
168ca2dae42STim J. Robbins 	n = es->count;
169ca2dae42STim J. Robbins 
17002f4f60aSTim J. Robbins 	if (n == 0 || (size_t)(len = CEI->count[set = _euc_set(*s)]) > n)
17102f4f60aSTim J. Robbins 		/* Incomplete multibyte sequence */
17202f4f60aSTim J. Robbins 		return ((size_t)-2);
17302f4f60aSTim J. Robbins 	wc = 0;
17402f4f60aSTim J. Robbins 	remain = len;
17558f0484fSRodney W. Grimes 	switch (set) {
17658f0484fSRodney W. Grimes 	case 3:
17758f0484fSRodney W. Grimes 	case 2:
17802f4f60aSTim J. Robbins 		--remain;
17902f4f60aSTim J. Robbins 		++s;
18058f0484fSRodney W. Grimes 		/* FALLTHROUGH */
18158f0484fSRodney W. Grimes 	case 1:
18258f0484fSRodney W. Grimes 	case 0:
18302f4f60aSTim J. Robbins 		while (remain-- > 0)
18402f4f60aSTim J. Robbins 			wc = (wc << 8) | (unsigned char)*s++;
18558f0484fSRodney W. Grimes 		break;
18658f0484fSRodney W. Grimes 	}
18702f4f60aSTim J. Robbins 	wc = (wc & ~CEI->mask) | CEI->bits[set];
18802f4f60aSTim J. Robbins 	if (pwc != NULL)
18902f4f60aSTim J. Robbins 		*pwc = wc;
190ca2dae42STim J. Robbins 	es->count = 0;
191ca2dae42STim J. Robbins 	return (wc == L'\0' ? 0 : len - ocount);
19258f0484fSRodney W. Grimes }
19358f0484fSRodney W. Grimes 
19402f4f60aSTim J. Robbins size_t
19502f4f60aSTim J. Robbins _EUC_wcrtomb(char * __restrict s, wchar_t wc,
19602f4f60aSTim J. Robbins     mbstate_t * __restrict ps __unused)
19758f0484fSRodney W. Grimes {
19802f4f60aSTim J. Robbins 	wchar_t m, nm;
19958f0484fSRodney W. Grimes 	int i, len;
20058f0484fSRodney W. Grimes 
20102f4f60aSTim J. Robbins 	if (s == NULL)
20202f4f60aSTim J. Robbins 		/* Reset to initial shift state (no-op) */
20302f4f60aSTim J. Robbins 		return (1);
20402f4f60aSTim J. Robbins 
20502f4f60aSTim J. Robbins 	m = wc & CEI->mask;
20602f4f60aSTim J. Robbins 	nm = wc & ~m;
20702f4f60aSTim J. Robbins 
20858f0484fSRodney W. Grimes 	if (m == CEI->bits[1]) {
20958f0484fSRodney W. Grimes CodeSet1:
21058f0484fSRodney W. Grimes 		/* Codeset 1: The first byte must have 0x80 in it. */
21158f0484fSRodney W. Grimes 		i = len = CEI->count[1];
21258f0484fSRodney W. Grimes 		while (i-- > 0)
21302f4f60aSTim J. Robbins 			*s++ = (nm >> (i << 3)) | 0x80;
21458f0484fSRodney W. Grimes 	} else {
21502f4f60aSTim J. Robbins 		if (m == CEI->bits[0])
21658f0484fSRodney W. Grimes 			i = len = CEI->count[0];
21702f4f60aSTim J. Robbins 		else if (m == CEI->bits[2]) {
21858f0484fSRodney W. Grimes 			i = len = CEI->count[2];
21902f4f60aSTim J. Robbins 			*s++ = _SS2;
22058f0484fSRodney W. Grimes 			--i;
221a243e676SJeroen Ruigrok van der Werven 			/* SS2 designates G2 into GR */
222a243e676SJeroen Ruigrok van der Werven 			nm |= GR_BITS;
22302f4f60aSTim J. Robbins 		} else if (m == CEI->bits[3]) {
22458f0484fSRodney W. Grimes 			i = len = CEI->count[3];
22502f4f60aSTim J. Robbins 			*s++ = _SS3;
22658f0484fSRodney W. Grimes 			--i;
227a243e676SJeroen Ruigrok van der Werven 			/* SS3 designates G3 into GR */
228a243e676SJeroen Ruigrok van der Werven 			nm |= GR_BITS;
22958f0484fSRodney W. Grimes 		} else
23058f0484fSRodney W. Grimes 			goto CodeSet1;	/* Bletch */
23158f0484fSRodney W. Grimes 		while (i-- > 0)
23202f4f60aSTim J. Robbins 			*s++ = (nm >> (i << 3)) & 0xff;
23358f0484fSRodney W. Grimes 	}
23458f0484fSRodney W. Grimes 	return (len);
23558f0484fSRodney W. Grimes }
236