xref: /freebsd/lib/libiconv_modules/DECHanyu/citrus_dechanyu.c (revision f8ea072a542112d5e0e74a2d6ecf75d967c3054c)
1 /* $FreeBSD$ */
2 /* $NetBSD: citrus_dechanyu.c,v 1.3 2008/06/14 16:01:07 tnozaki Exp $ */
3 
4 /*-
5  * Copyright (c)2007 Citrus Project,
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 #include <sys/cdefs.h>
30 #include <sys/types.h>
31 
32 #include <assert.h>
33 #include <errno.h>
34 #include <limits.h>
35 #include <stddef.h>
36 #include <stdint.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <wchar.h>
41 
42 #include "citrus_namespace.h"
43 #include "citrus_types.h"
44 #include "citrus_bcs.h"
45 #include "citrus_module.h"
46 #include "citrus_stdenc.h"
47 #include "citrus_dechanyu.h"
48 
49 /* ----------------------------------------------------------------------
50  * private stuffs used by templates
51  */
52 
53 typedef struct {
54 	size_t	 chlen;
55 	char	 ch[4];
56 } _DECHanyuState;
57 
58 typedef struct {
59 	int	 dummy;
60 } _DECHanyuEncodingInfo;
61 
62 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
63 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.__CONCAT(s_,_func_)
64 
65 #define _FUNCNAME(m)			__CONCAT(_citrus_DECHanyu_,m)
66 #define _ENCODING_INFO			_DECHanyuEncodingInfo
67 #define _ENCODING_STATE			_DECHanyuState
68 #define _ENCODING_MB_CUR_MAX(_ei_)		4
69 #define _ENCODING_IS_STATE_DEPENDENT		0
70 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
71 
72 static __inline void
73 /*ARGSUSED*/
74 _citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei __unused,
75     _DECHanyuState * __restrict psenc)
76 {
77 
78 	psenc->chlen = 0;
79 }
80 
81 static __inline void
82 /*ARGSUSED*/
83 _citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei __unused,
84     void * __restrict pspriv, const _DECHanyuState * __restrict psenc)
85 {
86 
87 	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
88 }
89 
90 static __inline void
91 /*ARGSUSED*/
92 _citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei __unused,
93     _DECHanyuState * __restrict psenc,
94     const void * __restrict pspriv)
95 {
96 
97 	memcpy((void *)psenc, pspriv, sizeof(*psenc));
98 }
99 
100 static void
101 /*ARGSUSED*/
102 _citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei __unused)
103 {
104 
105 	/* ei may be null */
106 }
107 
108 static int
109 /*ARGSUSED*/
110 _citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei __unused,
111     const void * __restrict var __unused, size_t lenvar __unused)
112 {
113 
114 	/* ei may be null */
115 	return (0);
116 }
117 
118 static __inline bool
119 is_singlebyte(int c)
120 {
121 
122 	return (c <= 0x7F);
123 }
124 
125 static __inline bool
126 is_leadbyte(int c)
127 {
128 
129 	return (c >= 0xA1 && c <= 0xFE);
130 }
131 
132 static __inline bool
133 is_trailbyte(int c)
134 {
135 
136 	c &= ~0x80;
137 	return (c >= 0x21 && c <= 0x7E);
138 }
139 
140 static __inline bool
141 is_hanyu1(int c)
142 {
143 
144 	return (c == 0xC2);
145 }
146 
147 static __inline bool
148 is_hanyu2(int c)
149 {
150 
151 	return (c == 0xCB);
152 }
153 
154 #define HANYUBIT	0xC2CB0000
155 
156 static __inline bool
157 is_94charset(int c)
158 {
159 
160 	return (c >= 0x21 && c <= 0x7E);
161 }
162 
163 static int
164 /*ARGSUSED*/
165 _citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei,
166     wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
167     _DECHanyuState * __restrict psenc, size_t * __restrict nresult)
168 {
169 	const char *s0;
170 	wchar_t wc;
171 	int ch;
172 
173 	if (*s == NULL) {
174 		_citrus_DECHanyu_init_state(ei, psenc);
175 		*nresult = _ENCODING_IS_STATE_DEPENDENT;
176 		return (0);
177 	}
178 	s0 = *s;
179 
180 	wc = (wchar_t)0;
181 	switch (psenc->chlen) {
182 	case 0:
183 		if (n-- < 1)
184 			goto restart;
185 		ch = *s0++ & 0xFF;
186 		if (is_singlebyte(ch)) {
187 			if (pwc != NULL)
188 				*pwc = (wchar_t)ch;
189 			*nresult = (size_t)((ch == 0) ? 0 : 1);
190 			*s = s0;
191 			return (0);
192 		}
193 		if (!is_leadbyte(ch))
194 			goto ilseq;
195 		psenc->ch[psenc->chlen++] = ch;
196 		break;
197 	case 1:
198 		ch = psenc->ch[0] & 0xFF;
199 		if (!is_leadbyte(ch))
200 			return (EINVAL);
201 		break;
202 	case 2: case 3:
203 		ch = psenc->ch[0] & 0xFF;
204 		if (is_hanyu1(ch)) {
205 			ch = psenc->ch[1] & 0xFF;
206 			if (is_hanyu2(ch)) {
207 				wc |= (wchar_t)HANYUBIT;
208 				break;
209 			}
210 		}
211 	/*FALLTHROUGH*/
212 	default:
213 		return (EINVAL);
214 	}
215 
216 	switch (psenc->chlen) {
217 	case 1:
218 		if (is_hanyu1(ch)) {
219 			if (n-- < 1)
220 				goto restart;
221 			ch = *s0++ & 0xFF;
222 			if (!is_hanyu2(ch))
223 				goto ilseq;
224 			psenc->ch[psenc->chlen++] = ch;
225 			wc |= (wchar_t)HANYUBIT;
226 			if (n-- < 1)
227 				goto restart;
228 			ch = *s0++ & 0xFF;
229 			if (!is_leadbyte(ch))
230 				goto ilseq;
231 			psenc->ch[psenc->chlen++] = ch;
232 		}
233 		break;
234 	case 2:
235 		if (n-- < 1)
236 			goto restart;
237 		ch = *s0++ & 0xFF;
238 		if (!is_leadbyte(ch))
239 			goto ilseq;
240 		psenc->ch[psenc->chlen++] = ch;
241 		break;
242 	case 3:
243 		ch = psenc->ch[2] & 0xFF;
244 		if (!is_leadbyte(ch))
245 			return (EINVAL);
246 	}
247 	if (n-- < 1)
248 		goto restart;
249 	wc |= (wchar_t)(ch << 8);
250 	ch = *s0++ & 0xFF;
251 	if (!is_trailbyte(ch))
252 		goto ilseq;
253 	wc |= (wchar_t)ch;
254 	if (pwc != NULL)
255 		*pwc = wc;
256 	*nresult = (size_t)(s0 - *s);
257 	*s = s0;
258 	psenc->chlen = 0;
259 
260 	return (0);
261 
262 restart:
263 	*nresult = (size_t)-2;
264 	*s = s0;
265 	return (0);
266 
267 ilseq:
268 	*nresult = (size_t)-1;
269 	return (EILSEQ);
270 }
271 
272 static int
273 /*ARGSUSED*/
274 _citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei __unused,
275     char * __restrict s, size_t n, wchar_t wc,
276     _DECHanyuState * __restrict psenc, size_t * __restrict nresult)
277 {
278 	int ch;
279 
280 	if (psenc->chlen != 0)
281 		return (EINVAL);
282 
283 	/* XXX: assume wchar_t as int */
284 	if ((uint32_t)wc <= 0x7F) {
285 		ch = wc & 0xFF;
286 	} else {
287 		if ((uint32_t)wc > 0xFFFF) {
288 			if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT)
289 				goto ilseq;
290 			psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF;
291 			psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF;
292 			wc &= 0xFFFF;
293 		}
294 		ch = (wc >> 8) & 0xFF;
295 		if (!is_leadbyte(ch))
296 			goto ilseq;
297 		psenc->ch[psenc->chlen++] = ch;
298 		ch = wc & 0xFF;
299 		if (!is_trailbyte(ch))
300 			goto ilseq;
301 	}
302 	psenc->ch[psenc->chlen++] = ch;
303 	if (n < psenc->chlen) {
304 		*nresult = (size_t)-1;
305 		return (E2BIG);
306 	}
307 	memcpy(s, psenc->ch, psenc->chlen);
308 	*nresult = psenc->chlen;
309 	psenc->chlen = 0;
310 
311 	return (0);
312 
313 ilseq:
314 	*nresult = (size_t)-1;
315 	return (EILSEQ);
316 }
317 
318 static __inline int
319 /*ARGSUSED*/
320 _citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei __unused,
321     _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
322 {
323 	wchar_t mask;
324 	int plane;
325 
326 	plane = 0;
327 	mask = 0x7F;
328 	/* XXX: assume wchar_t as int */
329 	if ((uint32_t)wc > 0x7F) {
330 		if ((uint32_t)wc > 0xFFFF) {
331 			if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT)
332 				return (EILSEQ);
333 			plane += 2;
334 		}
335 		if (!is_leadbyte((wc >> 8) & 0xFF) ||
336 		    !is_trailbyte(wc & 0xFF))
337 			return (EILSEQ);
338 		plane += (wc & 0x80) ? 1 : 2;
339 		mask |= 0x7F00;
340 	}
341 	*csid = plane;
342 	*idx = (_index_t)(wc & mask);
343 
344 	return (0);
345 }
346 
347 static __inline int
348 /*ARGSUSED*/
349 _citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei __unused,
350     wchar_t * __restrict wc, _csid_t csid, _index_t idx)
351 {
352 
353 	if (csid == 0) {
354 		if (idx > 0x7F)
355 			return (EILSEQ);
356 	} else if (csid <= 4) {
357 		if (!is_94charset(idx >> 8))
358 			return (EILSEQ);
359 		if (!is_94charset(idx & 0xFF))
360 			return (EILSEQ);
361 		if (csid % 2)
362 			idx |= 0x80;
363 		idx |= 0x8000;
364 		if (csid > 2)
365 			idx |= HANYUBIT;
366 	} else
367 		return (EILSEQ);
368 	*wc = (wchar_t)idx;
369 	return (0);
370 }
371 
372 static __inline int
373 /*ARGSUSED*/
374 _citrus_DECHanyu_stdenc_get_state_desc_generic(
375     _DECHanyuEncodingInfo * __restrict ei __unused,
376     _DECHanyuState * __restrict psenc, int * __restrict rstate)
377 {
378 
379 	*rstate = (psenc->chlen == 0)
380 	    ? _STDENC_SDGEN_INITIAL
381 	    : _STDENC_SDGEN_INCOMPLETE_CHAR;
382 	return (0);
383 }
384 
385 /* ----------------------------------------------------------------------
386  * public interface for stdenc
387  */
388 
389 _CITRUS_STDENC_DECLS(DECHanyu);
390 _CITRUS_STDENC_DEF_OPS(DECHanyu);
391 
392 #include "citrus_stdenc_template.h"
393