xref: /illumos-gate/usr/src/lib/libc/port/locale/euc.c (revision 83eaeac78ef2f69de16c2fecd3077c0ee9269743)
1 /*
2  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
3  * Copyright (c) 2002-2004 Tim J. Robbins. All rights reserved.
4  * Copyright (c) 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * This code is derived from software contributed to Berkeley by
8  * Paul Borman at Krystal Technologies.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 4. Neither the name of the University nor the names of its contributors
19  *    may be used to endorse or promote products derived from this software
20  *    without specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include "lint.h"
36 #include <errno.h>
37 #include <limits.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <wchar.h>
41 #include <sys/types.h>
42 #include "runetype.h"
43 #include "mblocal.h"
44 
45 #define	MIN(a, b)	((a) < (b) ? (a) : (b))
46 
47 static size_t	_EUC_mbrtowc(wchar_t *_RESTRICT_KYWD,
48 		    const char *_RESTRICT_KYWD,
49 		    size_t, mbstate_t *_RESTRICT_KYWD);
50 static int	_EUC_mbsinit(const mbstate_t *);
51 static size_t	_EUC_wcrtomb(char *_RESTRICT_KYWD, wchar_t,
52 		    mbstate_t *_RESTRICT_KYWD);
53 
54 typedef struct {
55 	int	count[4];
56 	wchar_t	bits[4];
57 	wchar_t	mask;
58 } _EucInfo;
59 
60 typedef struct {
61 	wchar_t	ch;
62 	int	set;
63 	int	want;
64 } _EucState;
65 
66 int
67 _EUC_init(_RuneLocale *rl)
68 {
69 	_EucInfo *ei;
70 	int x, new__mb_cur_max;
71 	char *v, *e;
72 
73 	if (rl->__variable == NULL)
74 		return (EINVAL);
75 
76 	v = (char *)rl->__variable;
77 
78 	while (*v == ' ' || *v == '\t')
79 		++v;
80 
81 	if ((ei = malloc(sizeof (_EucInfo))) == NULL)
82 		return (errno == 0 ? ENOMEM : errno);
83 
84 	new__mb_cur_max = 0;
85 	for (x = 0; x < 4; ++x) {
86 		ei->count[x] = (int)strtol(v, &e, 0);
87 		if (v == e || !(v = e)) {
88 			free(ei);
89 			return (EINVAL);
90 		}
91 		if (new__mb_cur_max < ei->count[x])
92 			new__mb_cur_max = ei->count[x];
93 		while (*v == ' ' || *v == '\t')
94 			++v;
95 		ei->bits[x] = (int)strtol(v, &e, 0);
96 		if (v == e || !(v = e)) {
97 			free(ei);
98 			return (EINVAL);
99 		}
100 		while (*v == ' ' || *v == '\t')
101 			++v;
102 	}
103 	ei->mask = (int)strtol(v, &e, 0);
104 	if (v == e || !(v = e)) {
105 		free(ei);
106 		return (EINVAL);
107 	}
108 	rl->__variable = ei;
109 	rl->__variable_len = sizeof (_EucInfo);
110 	_CurrentRuneLocale = rl;
111 	__ctype[520] = new__mb_cur_max;
112 	__mbrtowc = _EUC_mbrtowc;
113 	__wcrtomb = _EUC_wcrtomb;
114 	__mbsinit = _EUC_mbsinit;
115 	charset_is_ascii = 0;
116 	return (0);
117 }
118 
119 static int
120 _EUC_mbsinit(const mbstate_t *ps)
121 {
122 
123 	return (ps == NULL || ((const _EucState *)ps)->want == 0);
124 }
125 
126 #define	CEI	((_EucInfo *)(_CurrentRuneLocale->__variable))
127 
128 #define	_SS2	0x008e
129 #define	_SS3	0x008f
130 
131 #define	GR_BITS	0x80808080 /* XXX: to be fixed */
132 
133 static int
134 _euc_set(uint_t c)
135 {
136 
137 	c &= 0xff;
138 	return ((c & 0x80) ? c == _SS3 ? 3 : c == _SS2 ? 2 : 1 : 0);
139 }
140 
141 static size_t
142 _EUC_mbrtowc(wchar_t *_RESTRICT_KYWD pwc, const char *_RESTRICT_KYWD s,
143     size_t n, mbstate_t *_RESTRICT_KYWD ps)
144 {
145 	_EucState *es;
146 	int i, set, want;
147 	wchar_t wc;
148 	const char *os;
149 
150 	es = (_EucState *)ps;
151 
152 	if (es->want < 0 || es->want > MB_CUR_MAX || es->set < 0 ||
153 	    es->set > 3) {
154 		errno = EINVAL;
155 		return ((size_t)-1);
156 	}
157 
158 	if (s == NULL) {
159 		s = "";
160 		n = 1;
161 		pwc = NULL;
162 	}
163 
164 	if (n == 0)
165 		/* Incomplete multibyte sequence */
166 		return ((size_t)-2);
167 
168 	os = s;
169 
170 	if (es->want == 0) {
171 		want = CEI->count[set = _euc_set(*s)];
172 		if (set == 2 || set == 3) {
173 			--want;
174 			if (--n == 0) {
175 				/* Incomplete multibyte sequence */
176 				es->set = set;
177 				es->want = want;
178 				es->ch = 0;
179 				return ((size_t)-2);
180 			}
181 			++s;
182 			if (*s == '\0') {
183 				errno = EILSEQ;
184 				return ((size_t)-1);
185 			}
186 		}
187 		wc = (unsigned char)*s++;
188 	} else {
189 		set = es->set;
190 		want = es->want;
191 		wc = es->ch;
192 	}
193 	for (i = (es->want == 0) ? 1 : 0; i < MIN(want, n); i++) {
194 		if (*s == '\0') {
195 			errno = EILSEQ;
196 			return ((size_t)-1);
197 		}
198 		wc = (wc << 8) | (unsigned char)*s++;
199 	}
200 	if (i < want) {
201 		/* Incomplete multibyte sequence */
202 		es->set = set;
203 		es->want = want - i;
204 		es->ch = wc;
205 		return ((size_t)-2);
206 	}
207 	wc = (wc & ~CEI->mask) | CEI->bits[set];
208 	if (pwc != NULL)
209 		*pwc = wc;
210 	es->want = 0;
211 	return (wc == L'\0' ? 0 : s - os);
212 }
213 
214 static size_t
215 _EUC_wcrtomb(char *_RESTRICT_KYWD s, wchar_t wc, mbstate_t *_RESTRICT_KYWD ps)
216 {
217 	_EucState *es;
218 	wchar_t m, nm;
219 	int i, len;
220 
221 	es = (_EucState *)ps;
222 
223 	if (es->want != 0) {
224 		errno = EINVAL;
225 		return ((size_t)-1);
226 	}
227 
228 	if (s == NULL)
229 		/* Reset to initial shift state (no-op) */
230 		return (1);
231 
232 	m = wc & CEI->mask;
233 	nm = wc & ~m;
234 
235 	if (m == CEI->bits[1]) {
236 CodeSet1:
237 		/* Codeset 1: The first byte must have 0x80 in it. */
238 		i = len = CEI->count[1];
239 		while (i-- > 0) {
240 			*(unsigned char *)s = (nm >> (i << 3)) | 0x80;
241 			s++;
242 		}
243 	} else {
244 		if (m == CEI->bits[0])
245 			i = len = CEI->count[0];
246 		else if (m == CEI->bits[2]) {
247 			i = len = CEI->count[2];
248 			*(unsigned char *)s = _SS2;
249 			s++;
250 			--i;
251 			/* SS2 designates G2 into GR */
252 			nm |= GR_BITS;
253 		} else if (m == CEI->bits[3]) {
254 			i = len = CEI->count[3];
255 			*(unsigned char *)s = _SS3;
256 			s++;
257 			--i;
258 			/* SS3 designates G3 into GR */
259 			nm |= GR_BITS;
260 		} else
261 			goto CodeSet1;	/* Bletch */
262 		while (i-- > 0)
263 			*s++ = (nm >> (i << 3)) & 0xff;
264 	}
265 	return (len);
266 }
267