xref: /freebsd/lib/libiconv_modules/ZW/citrus_zw.c (revision 9a14aa017b21c292740c00ee098195cd46642730)
1 /* $FreeBSD$ */
2 /* $NetBSD: citrus_zw.c,v 1.4 2008/06/14 16:01:08 tnozaki Exp $ */
3 
4 /*-
5  * Copyright (c)2004, 2006 Citrus Project,
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  */
30 
31 #include <sys/cdefs.h>
32 #include <sys/types.h>
33 
34 #include <assert.h>
35 #include <errno.h>
36 #include <limits.h>
37 #include <stddef.h>
38 #include <stdio.h>
39 #include <stdint.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <wchar.h>
43 
44 #include "citrus_namespace.h"
45 #include "citrus_types.h"
46 #include "citrus_module.h"
47 #include "citrus_stdenc.h"
48 #include "citrus_zw.h"
49 
50 /* ----------------------------------------------------------------------
51  * private stuffs used by templates
52  */
53 
54 typedef struct {
55 	int	 dummy;
56 } _ZWEncodingInfo;
57 
58 typedef enum {
59 	NONE, AMBIGIOUS, ASCII, GB2312
60 } _ZWCharset;
61 
62 typedef struct {
63 	_ZWCharset	 charset;
64 	int		 chlen;
65 	char		 ch[4];
66 } _ZWState;
67 
68 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
69 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
70 
71 #define _FUNCNAME(m)			_citrus_ZW_##m
72 #define _ENCODING_INFO			_ZWEncodingInfo
73 #define _ENCODING_STATE			_ZWState
74 #define _ENCODING_MB_CUR_MAX(_ei_)	MB_LEN_MAX
75 #define _ENCODING_IS_STATE_DEPENDENT		1
76 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	((_ps_)->charset != NONE)
77 
78 static __inline void
79 /*ARGSUSED*/
80 _citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei __unused,
81     _ZWState * __restrict psenc)
82 {
83 
84 	psenc->chlen = 0;
85 	psenc->charset = NONE;
86 }
87 
88 static __inline void
89 /*ARGSUSED*/
90 _citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei __unused,
91     void *__restrict pspriv, const _ZWState * __restrict psenc)
92 {
93 
94 	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
95 }
96 
97 static __inline void
98 /*ARGSUSED*/
99 _citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei __unused,
100     _ZWState * __restrict psenc, const void * __restrict pspriv)
101 {
102 
103 	memcpy((void *)psenc, pspriv, sizeof(*psenc));
104 }
105 
106 static int
107 _citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei,
108     wchar_t * __restrict pwc, char **__restrict s, size_t n,
109     _ZWState * __restrict psenc, size_t * __restrict nresult)
110 {
111 	char *s0;
112 	wchar_t  wc;
113 	int ch, len;
114 
115 	if (*s == NULL) {
116 		_citrus_ZW_init_state(ei, psenc);
117 		*nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT;
118 		return (0);
119 	}
120 	s0 = *s;
121 	len = 0;
122 
123 #define	STORE				\
124 do {					\
125 	if (n-- < 1) {			\
126 		*nresult = (size_t)-2;	\
127 		*s = s0;		\
128 		return (0);		\
129 	}				\
130 	ch = (unsigned char)*s0++;	\
131 	if (len++ > MB_LEN_MAX || ch > 0x7F)\
132 		goto ilseq;		\
133 	psenc->ch[psenc->chlen++] = ch;	\
134 } while (/*CONSTCOND*/0)
135 
136 loop:
137 	switch (psenc->charset) {
138 	case ASCII:
139 		switch (psenc->chlen) {
140 		case 0:
141 			STORE;
142 			switch (psenc->ch[0]) {
143 			case '\0': case '\n':
144 				psenc->charset = NONE;
145 			}
146 		/*FALLTHROUGH*/
147 		case 1:
148 			break;
149 		default:
150 			return (EINVAL);
151 		}
152 		ch = (unsigned char)psenc->ch[0];
153 		if (ch > 0x7F)
154 			goto ilseq;
155 		wc = (wchar_t)ch;
156 		psenc->chlen = 0;
157 		break;
158 	case NONE:
159 		if (psenc->chlen != 0)
160 			return (EINVAL);
161 		STORE;
162 		ch = (unsigned char)psenc->ch[0];
163 		if (ch != 'z') {
164 			if (ch != '\n' && ch != '\0')
165 				psenc->charset = ASCII;
166 			wc = (wchar_t)ch;
167 			psenc->chlen = 0;
168 			break;
169 		}
170 		psenc->charset = AMBIGIOUS;
171 		psenc->chlen = 0;
172 	/* FALLTHROUGH */
173 	case AMBIGIOUS:
174 		if (psenc->chlen != 0)
175 			return (EINVAL);
176 		STORE;
177 		if (psenc->ch[0] != 'W') {
178 			psenc->charset = ASCII;
179 			wc = L'z';
180 			break;
181 		}
182 		psenc->charset = GB2312;
183 		psenc->chlen = 0;
184 	/* FALLTHROUGH */
185 	case GB2312:
186 		switch (psenc->chlen) {
187 		case 0:
188 			STORE;
189 			ch = (unsigned char)psenc->ch[0];
190 			if (ch == '\0') {
191 				psenc->charset = NONE;
192 				wc = (wchar_t)ch;
193 				psenc->chlen = 0;
194 				break;
195 			} else if (ch == '\n') {
196 				psenc->charset = NONE;
197 				psenc->chlen = 0;
198 				goto loop;
199 			}
200 		/*FALLTHROUGH*/
201 		case 1:
202 			STORE;
203 			if (psenc->ch[0] == ' ') {
204 				ch = (unsigned char)psenc->ch[1];
205 				wc = (wchar_t)ch;
206 				psenc->chlen = 0;
207 				break;
208 			} else if (psenc->ch[0] == '#') {
209 				ch = (unsigned char)psenc->ch[1];
210 				if (ch == '\n') {
211 					psenc->charset = NONE;
212 					wc = (wchar_t)ch;
213 					psenc->chlen = 0;
214 					break;
215 				} else if (ch == ' ') {
216 					wc = (wchar_t)ch;
217 					psenc->chlen = 0;
218 					break;
219 				}
220 			}
221 			ch = (unsigned char)psenc->ch[0];
222 			if (ch < 0x21 || ch > 0x7E)
223 				goto ilseq;
224 			wc = (wchar_t)(ch << 8);
225 			ch = (unsigned char)psenc->ch[1];
226 			if (ch < 0x21 || ch > 0x7E) {
227 ilseq:
228 				*nresult = (size_t)-1;
229 				return (EILSEQ);
230 			}
231 			wc |= (wchar_t)ch;
232 			psenc->chlen = 0;
233 			break;
234 		default:
235 			return (EINVAL);
236 		}
237 		break;
238 	default:
239 		return (EINVAL);
240 	}
241 	if (pwc != NULL)
242 		*pwc = wc;
243 
244 	*nresult = (size_t)(wc == 0 ? 0 : len);
245 	*s = s0;
246 
247 	return (0);
248 }
249 
250 static int
251 /*ARGSUSED*/
252 _citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei __unused,
253     char *__restrict s, size_t n, wchar_t wc,
254     _ZWState * __restrict psenc, size_t * __restrict nresult)
255 {
256 	int ch;
257 
258 	if (psenc->chlen != 0)
259 		return (EINVAL);
260 	if ((uint32_t)wc <= 0x7F) {
261 		ch = (unsigned char)wc;
262 		switch (psenc->charset) {
263 		case NONE:
264 			if (ch == '\0' || ch == '\n')
265 				psenc->ch[psenc->chlen++] = ch;
266 			else {
267 				if (n < 4)
268 					return (E2BIG);
269 				n -= 4;
270 				psenc->ch[psenc->chlen++] = 'z';
271 				psenc->ch[psenc->chlen++] = 'W';
272 				psenc->ch[psenc->chlen++] = ' ';
273 				psenc->ch[psenc->chlen++] = ch;
274 				psenc->charset = GB2312;
275 			}
276 			break;
277 		case GB2312:
278 			if (n < 2)
279 				return (E2BIG);
280 			n -= 2;
281 			if (ch == '\0') {
282 				psenc->ch[psenc->chlen++] = '\n';
283 				psenc->ch[psenc->chlen++] = '\0';
284 				psenc->charset = NONE;
285 			} else if (ch == '\n') {
286 				psenc->ch[psenc->chlen++] = '#';
287 				psenc->ch[psenc->chlen++] = '\n';
288 				psenc->charset = NONE;
289 			} else {
290 				psenc->ch[psenc->chlen++] = ' ';
291 				psenc->ch[psenc->chlen++] = ch;
292 			}
293 			break;
294 		default:
295 			return (EINVAL);
296 		}
297 	} else if ((uint32_t)wc <= 0x7E7E) {
298 		switch (psenc->charset) {
299 		case NONE:
300 			if (n < 2)
301 				return (E2BIG);
302 			n -= 2;
303 			psenc->ch[psenc->chlen++] = 'z';
304 			psenc->ch[psenc->chlen++] = 'W';
305 			psenc->charset = GB2312;
306 		/* FALLTHROUGH*/
307 		case GB2312:
308 			if (n < 2)
309 				return (E2BIG);
310 			n -= 2;
311 			ch = (wc >> 8) & 0xFF;
312 			if (ch < 0x21 || ch > 0x7E)
313 				goto ilseq;
314 			psenc->ch[psenc->chlen++] = ch;
315 			ch = wc & 0xFF;
316 			if (ch < 0x21 || ch > 0x7E)
317 				goto ilseq;
318 			psenc->ch[psenc->chlen++] = ch;
319 			break;
320 		default:
321 			return (EINVAL);
322 		}
323 	} else {
324 ilseq:
325 		*nresult = (size_t)-1;
326 		return (EILSEQ);
327 	}
328 	memcpy(s, psenc->ch, psenc->chlen);
329 	*nresult = psenc->chlen;
330 	psenc->chlen = 0;
331 
332 	return (0);
333 }
334 
335 static int
336 /*ARGSUSED*/
337 _citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei __unused,
338     char * __restrict s, size_t n, _ZWState * __restrict psenc,
339     size_t * __restrict nresult)
340 {
341 
342 	if (psenc->chlen != 0)
343 		return (EINVAL);
344 	switch (psenc->charset) {
345 	case GB2312:
346 		if (n-- < 1)
347 			return (E2BIG);
348 		psenc->ch[psenc->chlen++] = '\n';
349 		psenc->charset = NONE;
350 	/*FALLTHROUGH*/
351 	case NONE:
352 		*nresult = psenc->chlen;
353 		if (psenc->chlen > 0) {
354 			memcpy(s, psenc->ch, psenc->chlen);
355 			psenc->chlen = 0;
356 		}
357 		break;
358 	default:
359 		return (EINVAL);
360 	}
361 
362 	return (0);
363 }
364 
365 static __inline int
366 /*ARGSUSED*/
367 _citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei __unused,
368     _ZWState * __restrict psenc, int * __restrict rstate)
369 {
370 
371 	switch (psenc->charset) {
372 	case NONE:
373 		if (psenc->chlen != 0)
374 			return (EINVAL);
375 		*rstate = _STDENC_SDGEN_INITIAL;
376 		break;
377 	case AMBIGIOUS:
378 		if (psenc->chlen != 0)
379 			return (EINVAL);
380 		*rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT;
381 		break;
382 	case ASCII:
383 	case GB2312:
384 		switch (psenc->chlen) {
385 		case 0:
386 			*rstate = _STDENC_SDGEN_STABLE;
387 			break;
388 		case 1:
389 			*rstate = (psenc->ch[0] == '#') ?
390 			    _STDENC_SDGEN_INCOMPLETE_SHIFT :
391 			    _STDENC_SDGEN_INCOMPLETE_CHAR;
392 			break;
393 		default:
394 			return (EINVAL);
395 		}
396 		break;
397 	default:
398 		return (EINVAL);
399 	}
400 	return (0);
401 }
402 
403 static __inline int
404 /*ARGSUSED*/
405 _citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei __unused,
406     _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
407 {
408 
409 	*csid = (_csid_t)(wc <= (wchar_t)0x7FU) ? 0 : 1;
410 	*idx = (_index_t)wc;
411 
412 	return (0);
413 }
414 
415 static __inline int
416 /*ARGSUSED*/
417 _citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei __unused,
418     wchar_t * __restrict wc, _csid_t csid, _index_t idx)
419 {
420 
421 	switch (csid) {
422 	case 0: case 1:
423 		break;
424 	default:
425 		return (EINVAL);
426 	}
427 	*wc = (wchar_t)idx;
428 
429 	return (0);
430 }
431 
432 static void
433 /*ARGSUSED*/
434 _citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei __unused)
435 {
436 
437 }
438 
439 static int
440 /*ARGSUSED*/
441 _citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei __unused,
442     const void *__restrict var __unused, size_t lenvar __unused)
443 {
444 
445 	return (0);
446 }
447 
448 /* ----------------------------------------------------------------------
449  * public interface for stdenc
450  */
451 
452 _CITRUS_STDENC_DECLS(ZW);
453 _CITRUS_STDENC_DEF_OPS(ZW);
454 
455 #include "citrus_stdenc_template.h"
456