xref: /freebsd/lib/libc/locale/euc.c (revision 2051a8f2d57177b199cf0cff25a18aaec0142086)
158f0484fSRodney W. Grimes /*-
2ca2dae42STim J. Robbins  * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
358f0484fSRodney W. Grimes  * Copyright (c) 1993
458f0484fSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
558f0484fSRodney W. Grimes  *
658f0484fSRodney W. Grimes  * This code is derived from software contributed to Berkeley by
758f0484fSRodney W. Grimes  * Paul Borman at Krystal Technologies.
858f0484fSRodney W. Grimes  *
958f0484fSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
1058f0484fSRodney W. Grimes  * modification, are permitted provided that the following conditions
1158f0484fSRodney W. Grimes  * are met:
1258f0484fSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
1358f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
1458f0484fSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
1558f0484fSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
1658f0484fSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
1758f0484fSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
1858f0484fSRodney W. Grimes  *    must display the following acknowledgement:
1958f0484fSRodney W. Grimes  *	This product includes software developed by the University of
2058f0484fSRodney W. Grimes  *	California, Berkeley and its contributors.
2158f0484fSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
2258f0484fSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
2358f0484fSRodney W. Grimes  *    without specific prior written permission.
2458f0484fSRodney W. Grimes  *
2558f0484fSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2658f0484fSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2758f0484fSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2858f0484fSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2958f0484fSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3058f0484fSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3158f0484fSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3258f0484fSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3358f0484fSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3458f0484fSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3558f0484fSRodney W. Grimes  * SUCH DAMAGE.
3658f0484fSRodney W. Grimes  */
3758f0484fSRodney W. Grimes 
3858f0484fSRodney W. Grimes #if defined(LIBC_SCCS) && !defined(lint)
3958f0484fSRodney W. Grimes static char sccsid[] = "@(#)euc.c	8.1 (Berkeley) 6/4/93";
4058f0484fSRodney W. Grimes #endif /* LIBC_SCCS and not lint */
41ca2dae42STim J. Robbins #include <sys/param.h>
42333fc21eSDavid E. O'Brien __FBSDID("$FreeBSD$");
4358f0484fSRodney W. Grimes 
4458f0484fSRodney W. Grimes #include <errno.h>
45ca2dae42STim J. Robbins #include <limits.h>
4602f4f60aSTim J. Robbins #include <runetype.h>
4758f0484fSRodney W. Grimes #include <stdlib.h>
48ca2dae42STim J. Robbins #include <string.h>
4902f4f60aSTim J. Robbins #include <wchar.h>
502051a8f2STim J. Robbins #include "mblocal.h"
5102f4f60aSTim J. Robbins 
5202f4f60aSTim J. Robbins int	_EUC_init(_RuneLocale *);
5302f4f60aSTim J. Robbins size_t	_EUC_mbrtowc(wchar_t * __restrict, const char * __restrict, size_t,
5402f4f60aSTim J. Robbins 	    mbstate_t * __restrict);
55ca2dae42STim J. Robbins int	_EUC_mbsinit(const mbstate_t *);
5602f4f60aSTim J. Robbins size_t	_EUC_wcrtomb(char * __restrict, wchar_t, mbstate_t * __restrict);
5758f0484fSRodney W. Grimes 
5858f0484fSRodney W. Grimes typedef struct {
5958f0484fSRodney W. Grimes 	int	count[4];
6002f4f60aSTim J. Robbins 	wchar_t	bits[4];
6102f4f60aSTim J. Robbins 	wchar_t	mask;
6258f0484fSRodney W. Grimes } _EucInfo;
6358f0484fSRodney W. Grimes 
64ca2dae42STim J. Robbins typedef struct {
65ca2dae42STim J. Robbins 	int	count;
66ca2dae42STim J. Robbins 	u_char	bytes[MB_LEN_MAX];
67ca2dae42STim J. Robbins } _EucState;
68ca2dae42STim J. Robbins 
6958f0484fSRodney W. Grimes int
7002f4f60aSTim J. Robbins _EUC_init(_RuneLocale *rl)
7158f0484fSRodney W. Grimes {
7258f0484fSRodney W. Grimes 	_EucInfo *ei;
7376692b80SAndrey A. Chernov 	int x, new__mb_cur_max;
7458f0484fSRodney W. Grimes 	char *v, *e;
7558f0484fSRodney W. Grimes 
7676692b80SAndrey A. Chernov 	if (rl->variable == NULL)
7758f0484fSRodney W. Grimes 		return (EFTYPE);
7876692b80SAndrey A. Chernov 
7958f0484fSRodney W. Grimes 	v = (char *)rl->variable;
8058f0484fSRodney W. Grimes 
8158f0484fSRodney W. Grimes 	while (*v == ' ' || *v == '\t')
8258f0484fSRodney W. Grimes 		++v;
8358f0484fSRodney W. Grimes 
8476692b80SAndrey A. Chernov 	if ((ei = malloc(sizeof(_EucInfo))) == NULL)
85ec5ca2ebSAndrey A. Chernov 		return (errno == 0 ? ENOMEM : errno);
8676692b80SAndrey A. Chernov 
8776692b80SAndrey A. Chernov 	new__mb_cur_max = 0;
8858f0484fSRodney W. Grimes 	for (x = 0; x < 4; ++x) {
8958f0484fSRodney W. Grimes 		ei->count[x] = (int)strtol(v, &e, 0);
9058f0484fSRodney W. Grimes 		if (v == e || !(v = e)) {
9158f0484fSRodney W. Grimes 			free(ei);
9258f0484fSRodney W. Grimes 			return (EFTYPE);
9358f0484fSRodney W. Grimes 		}
9476692b80SAndrey A. Chernov 		if (new__mb_cur_max < ei->count[x])
9576692b80SAndrey A. Chernov 			new__mb_cur_max = ei->count[x];
9658f0484fSRodney W. Grimes 		while (*v == ' ' || *v == '\t')
9758f0484fSRodney W. Grimes 			++v;
9858f0484fSRodney W. Grimes 		ei->bits[x] = (int)strtol(v, &e, 0);
9958f0484fSRodney W. Grimes 		if (v == e || !(v = e)) {
10058f0484fSRodney W. Grimes 			free(ei);
10158f0484fSRodney W. Grimes 			return (EFTYPE);
10258f0484fSRodney W. Grimes 		}
10358f0484fSRodney W. Grimes 		while (*v == ' ' || *v == '\t')
10458f0484fSRodney W. Grimes 			++v;
10558f0484fSRodney W. Grimes 	}
10658f0484fSRodney W. Grimes 	ei->mask = (int)strtol(v, &e, 0);
10758f0484fSRodney W. Grimes 	if (v == e || !(v = e)) {
10858f0484fSRodney W. Grimes 		free(ei);
10958f0484fSRodney W. Grimes 		return (EFTYPE);
11058f0484fSRodney W. Grimes 	}
11145206d5cSAndrey A. Chernov 	rl->variable = ei;
11258f0484fSRodney W. Grimes 	rl->variable_len = sizeof(_EucInfo);
11358f0484fSRodney W. Grimes 	_CurrentRuneLocale = rl;
11476692b80SAndrey A. Chernov 	__mb_cur_max = new__mb_cur_max;
11502f4f60aSTim J. Robbins 	__mbrtowc = _EUC_mbrtowc;
11602f4f60aSTim J. Robbins 	__wcrtomb = _EUC_wcrtomb;
117ca2dae42STim J. Robbins 	__mbsinit = _EUC_mbsinit;
11858f0484fSRodney W. Grimes 	return (0);
11958f0484fSRodney W. Grimes }
12058f0484fSRodney W. Grimes 
121ca2dae42STim J. Robbins int
122ca2dae42STim J. Robbins _EUC_mbsinit(const mbstate_t *ps)
123ca2dae42STim J. Robbins {
124ca2dae42STim J. Robbins 
125fa02ee78STim J. Robbins 	return (ps == NULL || ((const _EucState *)ps)->count == 0);
126ca2dae42STim J. Robbins }
127ca2dae42STim J. Robbins 
12858f0484fSRodney W. Grimes #define	CEI	((_EucInfo *)(_CurrentRuneLocale->variable))
12958f0484fSRodney W. Grimes 
13058f0484fSRodney W. Grimes #define	_SS2	0x008e
13158f0484fSRodney W. Grimes #define	_SS3	0x008f
13258f0484fSRodney W. Grimes 
133a243e676SJeroen Ruigrok van der Werven #define	GR_BITS	0x80808080 /* XXX: to be fixed */
134a243e676SJeroen Ruigrok van der Werven 
13502f4f60aSTim J. Robbins static __inline int
13602f4f60aSTim J. Robbins _euc_set(u_int c)
13758f0484fSRodney W. Grimes {
13858f0484fSRodney W. Grimes 	c &= 0xff;
13958f0484fSRodney W. Grimes 	return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0);
14058f0484fSRodney W. Grimes }
14158f0484fSRodney W. Grimes 
14202f4f60aSTim J. Robbins size_t
14302f4f60aSTim J. Robbins _EUC_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
144ca2dae42STim J. Robbins     mbstate_t * __restrict ps)
14502f4f60aSTim J. Robbins {
146ca2dae42STim J. Robbins 	_EucState *es;
147ca2dae42STim J. Robbins 	int len, ocount, remain, set;
14802f4f60aSTim J. Robbins 	wchar_t wc;
149ca2dae42STim J. Robbins 	size_t ncopy;
15002f4f60aSTim J. Robbins 
151ca2dae42STim J. Robbins 	es = (_EucState *)ps;
152ca2dae42STim J. Robbins 
153fc813796STim J. Robbins 	if (es->count < 0 || es->count > sizeof(es->bytes)) {
154fc813796STim J. Robbins 		errno = EINVAL;
155fc813796STim J. Robbins 		return ((size_t)-1);
156fc813796STim J. Robbins 	}
157fc813796STim J. Robbins 
158ca2dae42STim J. Robbins 	if (s == NULL) {
159ca2dae42STim J. Robbins 		s = "";
160ca2dae42STim J. Robbins 		n = 1;
161ca2dae42STim J. Robbins 		pwc = NULL;
162ca2dae42STim J. Robbins 	}
163ca2dae42STim J. Robbins 
164ca2dae42STim J. Robbins 	ncopy = MIN(MIN(n, MB_CUR_MAX), sizeof(es->bytes) - es->count);
165ca2dae42STim J. Robbins 	memcpy(es->bytes + es->count, s, ncopy);
166ca2dae42STim J. Robbins 	ocount = es->count;
167ca2dae42STim J. Robbins 	es->count += ncopy;
168ca2dae42STim J. Robbins 	s = (char *)es->bytes;
169ca2dae42STim J. Robbins 	n = es->count;
170ca2dae42STim J. Robbins 
17102f4f60aSTim J. Robbins 	if (n == 0 || (size_t)(len = CEI->count[set = _euc_set(*s)]) > n)
17202f4f60aSTim J. Robbins 		/* Incomplete multibyte sequence */
17302f4f60aSTim J. Robbins 		return ((size_t)-2);
17402f4f60aSTim J. Robbins 	wc = 0;
17502f4f60aSTim J. Robbins 	remain = len;
17658f0484fSRodney W. Grimes 	switch (set) {
17758f0484fSRodney W. Grimes 	case 3:
17858f0484fSRodney W. Grimes 	case 2:
17902f4f60aSTim J. Robbins 		--remain;
18002f4f60aSTim J. Robbins 		++s;
18158f0484fSRodney W. Grimes 		/* FALLTHROUGH */
18258f0484fSRodney W. Grimes 	case 1:
18358f0484fSRodney W. Grimes 	case 0:
18488af941aSTim J. Robbins 		wc = (unsigned char)*s++;
18588af941aSTim J. Robbins 		while (--remain > 0) {
18688af941aSTim J. Robbins 			if (*s == '\0') {
18788af941aSTim J. Robbins 				errno = EILSEQ;
18888af941aSTim J. Robbins 				return ((size_t)-1);
18988af941aSTim J. Robbins 			}
19002f4f60aSTim J. Robbins 			wc = (wc << 8) | (unsigned char)*s++;
19188af941aSTim J. Robbins 		}
19258f0484fSRodney W. Grimes 		break;
19358f0484fSRodney W. Grimes 	}
19402f4f60aSTim J. Robbins 	wc = (wc & ~CEI->mask) | CEI->bits[set];
19502f4f60aSTim J. Robbins 	if (pwc != NULL)
19602f4f60aSTim J. Robbins 		*pwc = wc;
197ca2dae42STim J. Robbins 	es->count = 0;
198ca2dae42STim J. Robbins 	return (wc == L'\0' ? 0 : len - ocount);
19958f0484fSRodney W. Grimes }
20058f0484fSRodney W. Grimes 
20102f4f60aSTim J. Robbins size_t
202fc813796STim J. Robbins _EUC_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
20358f0484fSRodney W. Grimes {
204fc813796STim J. Robbins 	_EucState *es;
20502f4f60aSTim J. Robbins 	wchar_t m, nm;
20658f0484fSRodney W. Grimes 	int i, len;
20758f0484fSRodney W. Grimes 
208fc813796STim J. Robbins 	es = (_EucState *)ps;
209fc813796STim J. Robbins 
210fc813796STim J. Robbins 	if (es->count != 0) {
211fc813796STim J. Robbins 		errno = EINVAL;
212fc813796STim J. Robbins 		return ((size_t)-1);
213fc813796STim J. Robbins 	}
214fc813796STim J. Robbins 
21502f4f60aSTim J. Robbins 	if (s == NULL)
21602f4f60aSTim J. Robbins 		/* Reset to initial shift state (no-op) */
21702f4f60aSTim J. Robbins 		return (1);
21802f4f60aSTim J. Robbins 
21902f4f60aSTim J. Robbins 	m = wc & CEI->mask;
22002f4f60aSTim J. Robbins 	nm = wc & ~m;
22102f4f60aSTim J. Robbins 
22258f0484fSRodney W. Grimes 	if (m == CEI->bits[1]) {
22358f0484fSRodney W. Grimes CodeSet1:
22458f0484fSRodney W. Grimes 		/* Codeset 1: The first byte must have 0x80 in it. */
22558f0484fSRodney W. Grimes 		i = len = CEI->count[1];
22658f0484fSRodney W. Grimes 		while (i-- > 0)
22702f4f60aSTim J. Robbins 			*s++ = (nm >> (i << 3)) | 0x80;
22858f0484fSRodney W. Grimes 	} else {
22902f4f60aSTim J. Robbins 		if (m == CEI->bits[0])
23058f0484fSRodney W. Grimes 			i = len = CEI->count[0];
23102f4f60aSTim J. Robbins 		else if (m == CEI->bits[2]) {
23258f0484fSRodney W. Grimes 			i = len = CEI->count[2];
23302f4f60aSTim J. Robbins 			*s++ = _SS2;
23458f0484fSRodney W. Grimes 			--i;
235a243e676SJeroen Ruigrok van der Werven 			/* SS2 designates G2 into GR */
236a243e676SJeroen Ruigrok van der Werven 			nm |= GR_BITS;
23702f4f60aSTim J. Robbins 		} else if (m == CEI->bits[3]) {
23858f0484fSRodney W. Grimes 			i = len = CEI->count[3];
23902f4f60aSTim J. Robbins 			*s++ = _SS3;
24058f0484fSRodney W. Grimes 			--i;
241a243e676SJeroen Ruigrok van der Werven 			/* SS3 designates G3 into GR */
242a243e676SJeroen Ruigrok van der Werven 			nm |= GR_BITS;
24358f0484fSRodney W. Grimes 		} else
24458f0484fSRodney W. Grimes 			goto CodeSet1;	/* Bletch */
24558f0484fSRodney W. Grimes 		while (i-- > 0)
24602f4f60aSTim J. Robbins 			*s++ = (nm >> (i << 3)) & 0xff;
24758f0484fSRodney W. Grimes 	}
24858f0484fSRodney W. Grimes 	return (len);
24958f0484fSRodney W. Grimes }
250