xref: /freebsd/lib/libiconv_modules/GBK2K/citrus_gbk2k.c (revision 0a10f22a30d61a6f32777a236a82d461129538cc)
1 /* $FreeBSD$ */
2 /* $NetBSD: citrus_gbk2k.c,v 1.7 2008/06/14 16:01:07 tnozaki Exp $ */
3 
4 /*-
5  * Copyright (c)2003 Citrus Project,
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 #include <sys/types.h>
32 
33 #include <assert.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <stdbool.h>
37 #include <stddef.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <wchar.h>
42 
43 #include "citrus_namespace.h"
44 #include "citrus_types.h"
45 #include "citrus_bcs.h"
46 #include "citrus_module.h"
47 #include "citrus_stdenc.h"
48 #include "citrus_gbk2k.h"
49 
50 
51 /* ----------------------------------------------------------------------
52  * private stuffs used by templates
53  */
54 
55 typedef struct _GBK2KState {
56 	int	 chlen;
57 	char	 ch[4];
58 } _GBK2KState;
59 
60 typedef struct {
61 	int	 mb_cur_max;
62 } _GBK2KEncodingInfo;
63 
64 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
65 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
66 
67 #define _FUNCNAME(m)			_citrus_GBK2K_##m
68 #define _ENCODING_INFO			_GBK2KEncodingInfo
69 #define _ENCODING_STATE			_GBK2KState
70 #define _ENCODING_MB_CUR_MAX(_ei_)	(_ei_)->mb_cur_max
71 #define _ENCODING_IS_STATE_DEPENDENT	0
72 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	0
73 
74 static __inline void
75 /*ARGSUSED*/
76 _citrus_GBK2K_init_state(_GBK2KEncodingInfo * __restrict ei __unused,
77     _GBK2KState * __restrict s)
78 {
79 
80 	memset(s, 0, sizeof(*s));
81 }
82 
83 static __inline void
84 /*ARGSUSED*/
85 _citrus_GBK2K_pack_state(_GBK2KEncodingInfo * __restrict ei __unused,
86     void * __restrict pspriv, const _GBK2KState * __restrict s)
87 {
88 
89 	memcpy(pspriv, (const void *)s, sizeof(*s));
90 }
91 
92 static __inline void
93 /*ARGSUSED*/
94 _citrus_GBK2K_unpack_state(_GBK2KEncodingInfo * __restrict ei __unused,
95     _GBK2KState * __restrict s, const void * __restrict pspriv)
96 {
97 
98 	memcpy((void *)s, pspriv, sizeof(*s));
99 }
100 
101 static  __inline bool
102 _mb_singlebyte(int c)
103 {
104 
105 	return ((c & 0xff) <= 0x7f);
106 }
107 
108 static __inline bool
109 _mb_leadbyte(int c)
110 {
111 
112 	c &= 0xff;
113 	return (0x81 <= c && c <= 0xfe);
114 }
115 
116 static __inline bool
117 _mb_trailbyte(int c)
118 {
119 
120 	c &= 0xff;
121 	return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe));
122 }
123 
124 static __inline bool
125 _mb_surrogate(int c)
126 {
127 
128 	c &= 0xff;
129 	return (0x30 <= c && c <= 0x39);
130 }
131 
132 static __inline int
133 _mb_count(wchar_t v)
134 {
135 	uint32_t c;
136 
137 	c = (uint32_t)v; /* XXX */
138 	if (!(c & 0xffffff00))
139 		return (1);
140 	if (!(c & 0xffff0000))
141 		return (2);
142 	return (4);
143 }
144 
145 #define	_PSENC		(psenc->ch[psenc->chlen - 1])
146 #define	_PUSH_PSENC(c)	(psenc->ch[psenc->chlen++] = (c))
147 
148 static int
149 _citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * __restrict ei,
150     wchar_t * __restrict pwc, const char ** __restrict s, size_t n,
151     _GBK2KState * __restrict psenc, size_t * __restrict nresult)
152 {
153 	const char *s0, *s1;
154 	wchar_t wc;
155 	int chlenbak, len;
156 
157 	s0 = *s;
158 
159 	if (s0 == NULL) {
160 		/* _citrus_GBK2K_init_state(ei, psenc); */
161 		psenc->chlen = 0;
162 		*nresult = 0;
163 		return (0);
164 	}
165 
166 	chlenbak = psenc->chlen;
167 
168 	switch (psenc->chlen) {
169 	case 3:
170 		if (!_mb_leadbyte (_PSENC))
171 			goto invalid;
172 	/* FALLTHROUGH */
173 	case 2:
174 		if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC))
175 			goto invalid;
176 	/* FALLTHROUGH */
177 	case 1:
178 		if (!_mb_leadbyte (_PSENC))
179 			goto invalid;
180 	/* FALLTHOROUGH */
181 	case 0:
182 		break;
183 	default:
184 		goto invalid;
185 	}
186 
187 	for (;;) {
188 		if (n-- < 1)
189 			goto restart;
190 
191 		_PUSH_PSENC(*s0++);
192 
193 		switch (psenc->chlen) {
194 		case 1:
195 			if (_mb_singlebyte(_PSENC))
196 				goto convert;
197 			if (_mb_leadbyte  (_PSENC))
198 				continue;
199 			goto ilseq;
200 		case 2:
201 			if (_mb_trailbyte (_PSENC))
202 				goto convert;
203 			if (ei->mb_cur_max == 4 &&
204 			    _mb_surrogate (_PSENC))
205 				continue;
206 			goto ilseq;
207 		case 3:
208 			if (_mb_leadbyte  (_PSENC))
209 				continue;
210 			goto ilseq;
211 		case 4:
212 			if (_mb_surrogate (_PSENC))
213 				goto convert;
214 			goto ilseq;
215 		}
216 	}
217 
218 convert:
219 	len = psenc->chlen;
220 	s1  = &psenc->ch[0];
221 	wc  = 0;
222 	while (len-- > 0)
223 		wc = (wc << 8) | (*s1++ & 0xff);
224 
225 	if (pwc != NULL)
226 		*pwc = wc;
227 	*s = s0;
228 	*nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak;
229 	/* _citrus_GBK2K_init_state(ei, psenc); */
230 	psenc->chlen = 0;
231 
232 	return (0);
233 
234 restart:
235 	*s = s0;
236 	*nresult = (size_t)-2;
237 
238 	return (0);
239 
240 invalid:
241 	return (EINVAL);
242 
243 ilseq:
244 	*nresult = (size_t)-1;
245 	return (EILSEQ);
246 }
247 
248 static int
249 _citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * __restrict ei,
250     char * __restrict s, size_t n, wchar_t wc, _GBK2KState * __restrict psenc,
251     size_t * __restrict nresult)
252 {
253 	size_t len;
254 	int ret;
255 
256 	if (psenc->chlen != 0) {
257 		ret = EINVAL;
258 		goto err;
259 	}
260 
261 	len = _mb_count(wc);
262 	if (n < len) {
263 		ret = E2BIG;
264 		goto err;
265 	}
266 
267 	switch (len) {
268 	case 1:
269 		if (!_mb_singlebyte(_PUSH_PSENC(wc     ))) {
270 			ret = EILSEQ;
271 			goto err;
272 		}
273 		break;
274 	case 2:
275 		if (!_mb_leadbyte  (_PUSH_PSENC(wc >> 8)) ||
276 		    !_mb_trailbyte (_PUSH_PSENC(wc))) {
277 			ret = EILSEQ;
278 			goto err;
279 		}
280 		break;
281 	case 4:
282 		if (ei->mb_cur_max != 4 ||
283 		    !_mb_leadbyte  (_PUSH_PSENC(wc >> 24)) ||
284 		    !_mb_surrogate (_PUSH_PSENC(wc >> 16)) ||
285 		    !_mb_leadbyte  (_PUSH_PSENC(wc >>  8)) ||
286 		    !_mb_surrogate (_PUSH_PSENC(wc))) {
287 			ret = EILSEQ;
288 			goto err;
289 		}
290 		break;
291 	}
292 
293 	memcpy(s, psenc->ch, psenc->chlen);
294 	*nresult = psenc->chlen;
295 	/* _citrus_GBK2K_init_state(ei, psenc); */
296 	psenc->chlen = 0;
297 
298 	return (0);
299 
300 err:
301 	*nresult = (size_t)-1;
302 	return (ret);
303 }
304 
305 static __inline int
306 /*ARGSUSED*/
307 _citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * __restrict ei __unused,
308     _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
309 {
310 	uint8_t ch, cl;
311 
312 	if ((uint32_t)wc < 0x80) {
313 		/* ISO646 */
314 		*csid = 0;
315 		*idx = (_index_t)wc;
316 	} else if ((uint32_t)wc >= 0x10000) {
317 		/* GBKUCS : XXX */
318 		*csid = 3;
319 		*idx = (_index_t)wc;
320 	} else {
321 		ch = (uint8_t)(wc >> 8);
322 		cl = (uint8_t)wc;
323 		if (ch >= 0xA1 && cl >= 0xA1) {
324 			/* EUC G1 */
325 			*csid = 1;
326 			*idx = (_index_t)wc & 0x7F7FU;
327 		} else {
328 			/* extended area (0x8140-) */
329 			*csid = 2;
330 			*idx = (_index_t)wc;
331 		}
332 	}
333 
334 	return (0);
335 }
336 
337 static __inline int
338 /*ARGSUSED*/
339 _citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * __restrict ei,
340     wchar_t * __restrict wc, _csid_t csid, _index_t idx)
341 {
342 
343 	switch (csid) {
344 	case 0:
345 		/* ISO646 */
346 		*wc = (wchar_t)idx;
347 		break;
348 	case 1:
349 		/* EUC G1 */
350 		*wc = (wchar_t)idx | 0x8080U;
351 		break;
352 	case 2:
353 		/* extended area */
354 		*wc = (wchar_t)idx;
355 		break;
356 	case 3:
357 		/* GBKUCS : XXX */
358 		if (ei->mb_cur_max != 4)
359 			return (EINVAL);
360 		*wc = (wchar_t)idx;
361 		break;
362 	default:
363 		return (EILSEQ);
364 	}
365 
366 	return (0);
367 }
368 
369 static __inline int
370 /*ARGSUSED*/
371 _citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * __restrict ei __unused,
372     _GBK2KState * __restrict psenc, int * __restrict rstate)
373 {
374 
375 	*rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL :
376 	    _STDENC_SDGEN_INCOMPLETE_CHAR;
377 	return (0);
378 }
379 
380 static int
381 /*ARGSUSED*/
382 _citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * __restrict ei,
383     const void * __restrict var, size_t lenvar)
384 {
385 	const char *p;
386 
387 	p = var;
388 	memset((void *)ei, 0, sizeof(*ei));
389 	ei->mb_cur_max = 4;
390 	while (lenvar > 0) {
391 		switch (_bcs_tolower(*p)) {
392 		case '2':
393 			MATCH("2byte", ei->mb_cur_max = 2);
394 			break;
395 		}
396 		p++;
397 		lenvar--;
398 	}
399 
400 	return (0);
401 }
402 
403 static void
404 /*ARGSUSED*/
405 _citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei __unused)
406 {
407 
408 }
409 
410 /* ----------------------------------------------------------------------
411  * public interface for stdenc
412  */
413 
414 _CITRUS_STDENC_DECLS(GBK2K);
415 _CITRUS_STDENC_DEF_OPS(GBK2K);
416 
417 #include "citrus_stdenc_template.h"
418