xref: /freebsd/lib/libc/locale/euc.c (revision 35a04710d7286aa9538917fd7f8e417dbee95b82)
1 /*-
2  * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
3  * Copyright (c) 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Paul Borman at Krystal Technologies.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *	This product includes software developed by the University of
20  *	California, Berkeley and its contributors.
21  * 4. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  */
37 
38 #if defined(LIBC_SCCS) && !defined(lint)
39 static char sccsid[] = "@(#)euc.c	8.1 (Berkeley) 6/4/93";
40 #endif /* LIBC_SCCS and not lint */
41 #include <sys/param.h>
42 __FBSDID("$FreeBSD$");
43 
44 #include <errno.h>
45 #include <limits.h>
46 #include <runetype.h>
47 #include <stdlib.h>
48 #include <string.h>
49 #include <wchar.h>
50 #include "mblocal.h"
51 
52 extern int __mb_sb_limit;
53 
54 static size_t	_EUC_mbrtowc(wchar_t * __restrict, const char * __restrict,
55 		    size_t, mbstate_t * __restrict);
56 static int	_EUC_mbsinit(const mbstate_t *);
57 static size_t	_EUC_wcrtomb(char * __restrict, wchar_t,
58 		    mbstate_t * __restrict);
59 
60 typedef struct {
61 	int	count[4];
62 	wchar_t	bits[4];
63 	wchar_t	mask;
64 } _EucInfo;
65 
66 typedef struct {
67 	wchar_t	ch;
68 	int	set;
69 	int	want;
70 } _EucState;
71 
72 int
73 _EUC_init(_RuneLocale *rl)
74 {
75 	_EucInfo *ei;
76 	int x, new__mb_cur_max;
77 	char *v, *e;
78 
79 	if (rl->__variable == NULL)
80 		return (EFTYPE);
81 
82 	v = (char *)rl->__variable;
83 
84 	while (*v == ' ' || *v == '\t')
85 		++v;
86 
87 	if ((ei = malloc(sizeof(_EucInfo))) == NULL)
88 		return (errno == 0 ? ENOMEM : errno);
89 
90 	new__mb_cur_max = 0;
91 	for (x = 0; x < 4; ++x) {
92 		ei->count[x] = (int)strtol(v, &e, 0);
93 		if (v == e || !(v = e)) {
94 			free(ei);
95 			return (EFTYPE);
96 		}
97 		if (new__mb_cur_max < ei->count[x])
98 			new__mb_cur_max = ei->count[x];
99 		while (*v == ' ' || *v == '\t')
100 			++v;
101 		ei->bits[x] = (int)strtol(v, &e, 0);
102 		if (v == e || !(v = e)) {
103 			free(ei);
104 			return (EFTYPE);
105 		}
106 		while (*v == ' ' || *v == '\t')
107 			++v;
108 	}
109 	ei->mask = (int)strtol(v, &e, 0);
110 	if (v == e || !(v = e)) {
111 		free(ei);
112 		return (EFTYPE);
113 	}
114 	rl->__variable = ei;
115 	rl->__variable_len = sizeof(_EucInfo);
116 	_CurrentRuneLocale = rl;
117 	__mb_cur_max = new__mb_cur_max;
118 	__mbrtowc = _EUC_mbrtowc;
119 	__wcrtomb = _EUC_wcrtomb;
120 	__mbsinit = _EUC_mbsinit;
121 	__mb_sb_limit = 256;
122 	return (0);
123 }
124 
125 static int
126 _EUC_mbsinit(const mbstate_t *ps)
127 {
128 
129 	return (ps == NULL || ((const _EucState *)ps)->want == 0);
130 }
131 
132 #define	CEI	((_EucInfo *)(_CurrentRuneLocale->__variable))
133 
134 #define	_SS2	0x008e
135 #define	_SS3	0x008f
136 
137 #define	GR_BITS	0x80808080 /* XXX: to be fixed */
138 
139 static __inline int
140 _euc_set(u_int c)
141 {
142 
143 	c &= 0xff;
144 	return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0);
145 }
146 
147 static size_t
148 _EUC_mbrtowc(wchar_t * __restrict pwc, const char * __restrict s, size_t n,
149     mbstate_t * __restrict ps)
150 {
151 	_EucState *es;
152 	int i, set, want;
153 	wchar_t wc;
154 	const char *os;
155 
156 	es = (_EucState *)ps;
157 
158 	if (es->want < 0 || es->want > MB_CUR_MAX || es->set < 0 ||
159 	    es->set > 3) {
160 		errno = EINVAL;
161 		return ((size_t)-1);
162 	}
163 
164 	if (s == NULL) {
165 		s = "";
166 		n = 1;
167 		pwc = NULL;
168 	}
169 
170 	if (n == 0)
171 		/* Incomplete multibyte sequence */
172 		return ((size_t)-2);
173 
174 	os = s;
175 
176 	if (es->want == 0) {
177 		want = CEI->count[set = _euc_set(*s)];
178 		if (set == 2 || set == 3) {
179 			--want;
180 			if (--n == 0) {
181 				/* Incomplete multibyte sequence */
182 				es->set = set;
183 				es->want = want;
184 				es->ch = 0;
185 				return ((size_t)-2);
186 			}
187 			++s;
188 			if (*s == '\0') {
189 				errno = EILSEQ;
190 				return ((size_t)-1);
191 			}
192 		}
193 		wc = (unsigned char)*s++;
194 	} else {
195 		set = es->set;
196 		want = es->want;
197 		wc = es->ch;
198 	}
199 	for (i = (es->want == 0) ? 1 : 0; i < MIN(want, n); i++) {
200 		if (*s == '\0') {
201 			errno = EILSEQ;
202 			return ((size_t)-1);
203 		}
204 		wc = (wc << 8) | (unsigned char)*s++;
205 	}
206 	if (i < want) {
207 		/* Incomplete multibyte sequence */
208 		es->set = set;
209 		es->want = want - i;
210 		es->ch = wc;
211 		return ((size_t)-2);
212 	}
213 	wc = (wc & ~CEI->mask) | CEI->bits[set];
214 	if (pwc != NULL)
215 		*pwc = wc;
216 	es->want = 0;
217 	return (wc == L'\0' ? 0 : s - os);
218 }
219 
220 static size_t
221 _EUC_wcrtomb(char * __restrict s, wchar_t wc, mbstate_t * __restrict ps)
222 {
223 	_EucState *es;
224 	wchar_t m, nm;
225 	int i, len;
226 
227 	es = (_EucState *)ps;
228 
229 	if (es->want != 0) {
230 		errno = EINVAL;
231 		return ((size_t)-1);
232 	}
233 
234 	if (s == NULL)
235 		/* Reset to initial shift state (no-op) */
236 		return (1);
237 
238 	m = wc & CEI->mask;
239 	nm = wc & ~m;
240 
241 	if (m == CEI->bits[1]) {
242 CodeSet1:
243 		/* Codeset 1: The first byte must have 0x80 in it. */
244 		i = len = CEI->count[1];
245 		while (i-- > 0)
246 			*s++ = (nm >> (i << 3)) | 0x80;
247 	} else {
248 		if (m == CEI->bits[0])
249 			i = len = CEI->count[0];
250 		else if (m == CEI->bits[2]) {
251 			i = len = CEI->count[2];
252 			*s++ = _SS2;
253 			--i;
254 			/* SS2 designates G2 into GR */
255 			nm |= GR_BITS;
256 		} else if (m == CEI->bits[3]) {
257 			i = len = CEI->count[3];
258 			*s++ = _SS3;
259 			--i;
260 			/* SS3 designates G3 into GR */
261 			nm |= GR_BITS;
262 		} else
263 			goto CodeSet1;	/* Bletch */
264 		while (i-- > 0)
265 			*s++ = (nm >> (i << 3)) & 0xff;
266 	}
267 	return (len);
268 }
269