xref: /freebsd/lib/libiconv_modules/ZW/citrus_zw.c (revision d3d381b2b194b4d24853e92eecef55f262688d1a)
1 /* $FreeBSD$ */
2 /* $NetBSD: citrus_zw.c,v 1.4 2008/06/14 16:01:08 tnozaki Exp $ */
3 
4 /*-
5  * SPDX-License-Identifier: BSD-2-Clause
6  *
7  * Copyright (c)2004, 2006 Citrus Project,
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  */
32 
33 #include <sys/cdefs.h>
34 #include <sys/types.h>
35 
36 #include <assert.h>
37 #include <errno.h>
38 #include <limits.h>
39 #include <stddef.h>
40 #include <stdio.h>
41 #include <stdint.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <wchar.h>
45 
46 #include "citrus_namespace.h"
47 #include "citrus_types.h"
48 #include "citrus_module.h"
49 #include "citrus_stdenc.h"
50 #include "citrus_zw.h"
51 
52 /* ----------------------------------------------------------------------
53  * private stuffs used by templates
54  */
55 
56 typedef struct {
57 	int	 dummy;
58 } _ZWEncodingInfo;
59 
60 typedef enum {
61 	NONE, AMBIGIOUS, ASCII, GB2312
62 } _ZWCharset;
63 
64 typedef struct {
65 	_ZWCharset	 charset;
66 	int		 chlen;
67 	char		 ch[4];
68 } _ZWState;
69 
70 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
71 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
72 
73 #define _FUNCNAME(m)			_citrus_ZW_##m
74 #define _ENCODING_INFO			_ZWEncodingInfo
75 #define _ENCODING_STATE			_ZWState
76 #define _ENCODING_MB_CUR_MAX(_ei_)	MB_LEN_MAX
77 #define _ENCODING_IS_STATE_DEPENDENT		1
78 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	((_ps_)->charset != NONE)
79 
80 static __inline void
81 /*ARGSUSED*/
82 _citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei __unused,
83     _ZWState * __restrict psenc)
84 {
85 
86 	psenc->chlen = 0;
87 	psenc->charset = NONE;
88 }
89 
90 #if 0
91 static __inline void
92 /*ARGSUSED*/
93 _citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei __unused,
94     void *__restrict pspriv, const _ZWState * __restrict psenc)
95 {
96 
97 	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
98 }
99 
100 static __inline void
101 /*ARGSUSED*/
102 _citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei __unused,
103     _ZWState * __restrict psenc, const void * __restrict pspriv)
104 {
105 
106 	memcpy((void *)psenc, pspriv, sizeof(*psenc));
107 }
108 #endif
109 
110 static int
111 _citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei,
112     wchar_t * __restrict pwc, char **__restrict s, size_t n,
113     _ZWState * __restrict psenc, size_t * __restrict nresult)
114 {
115 	char *s0;
116 	wchar_t  wc;
117 	int ch, len;
118 
119 	if (*s == NULL) {
120 		_citrus_ZW_init_state(ei, psenc);
121 		*nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT;
122 		return (0);
123 	}
124 	s0 = *s;
125 	len = 0;
126 
127 #define	STORE				\
128 do {					\
129 	if (n-- < 1) {			\
130 		*nresult = (size_t)-2;	\
131 		*s = s0;		\
132 		return (0);		\
133 	}				\
134 	ch = (unsigned char)*s0++;	\
135 	if (len++ > MB_LEN_MAX || ch > 0x7F)\
136 		goto ilseq;		\
137 	psenc->ch[psenc->chlen++] = ch;	\
138 } while (/*CONSTCOND*/0)
139 
140 loop:
141 	switch (psenc->charset) {
142 	case ASCII:
143 		switch (psenc->chlen) {
144 		case 0:
145 			STORE;
146 			switch (psenc->ch[0]) {
147 			case '\0': case '\n':
148 				psenc->charset = NONE;
149 			}
150 		/*FALLTHROUGH*/
151 		case 1:
152 			break;
153 		default:
154 			return (EINVAL);
155 		}
156 		ch = (unsigned char)psenc->ch[0];
157 		if (ch > 0x7F)
158 			goto ilseq;
159 		wc = (wchar_t)ch;
160 		psenc->chlen = 0;
161 		break;
162 	case NONE:
163 		if (psenc->chlen != 0)
164 			return (EINVAL);
165 		STORE;
166 		ch = (unsigned char)psenc->ch[0];
167 		if (ch != 'z') {
168 			if (ch != '\n' && ch != '\0')
169 				psenc->charset = ASCII;
170 			wc = (wchar_t)ch;
171 			psenc->chlen = 0;
172 			break;
173 		}
174 		psenc->charset = AMBIGIOUS;
175 		psenc->chlen = 0;
176 	/* FALLTHROUGH */
177 	case AMBIGIOUS:
178 		if (psenc->chlen != 0)
179 			return (EINVAL);
180 		STORE;
181 		if (psenc->ch[0] != 'W') {
182 			psenc->charset = ASCII;
183 			wc = L'z';
184 			break;
185 		}
186 		psenc->charset = GB2312;
187 		psenc->chlen = 0;
188 	/* FALLTHROUGH */
189 	case GB2312:
190 		switch (psenc->chlen) {
191 		case 0:
192 			STORE;
193 			ch = (unsigned char)psenc->ch[0];
194 			if (ch == '\0') {
195 				psenc->charset = NONE;
196 				wc = (wchar_t)ch;
197 				psenc->chlen = 0;
198 				break;
199 			} else if (ch == '\n') {
200 				psenc->charset = NONE;
201 				psenc->chlen = 0;
202 				goto loop;
203 			}
204 		/*FALLTHROUGH*/
205 		case 1:
206 			STORE;
207 			if (psenc->ch[0] == ' ') {
208 				ch = (unsigned char)psenc->ch[1];
209 				wc = (wchar_t)ch;
210 				psenc->chlen = 0;
211 				break;
212 			} else if (psenc->ch[0] == '#') {
213 				ch = (unsigned char)psenc->ch[1];
214 				if (ch == '\n') {
215 					psenc->charset = NONE;
216 					wc = (wchar_t)ch;
217 					psenc->chlen = 0;
218 					break;
219 				} else if (ch == ' ') {
220 					wc = (wchar_t)ch;
221 					psenc->chlen = 0;
222 					break;
223 				}
224 			}
225 			ch = (unsigned char)psenc->ch[0];
226 			if (ch < 0x21 || ch > 0x7E)
227 				goto ilseq;
228 			wc = (wchar_t)(ch << 8);
229 			ch = (unsigned char)psenc->ch[1];
230 			if (ch < 0x21 || ch > 0x7E) {
231 ilseq:
232 				*nresult = (size_t)-1;
233 				return (EILSEQ);
234 			}
235 			wc |= (wchar_t)ch;
236 			psenc->chlen = 0;
237 			break;
238 		default:
239 			return (EINVAL);
240 		}
241 		break;
242 	default:
243 		return (EINVAL);
244 	}
245 	if (pwc != NULL)
246 		*pwc = wc;
247 
248 	*nresult = (size_t)(wc == 0 ? 0 : len);
249 	*s = s0;
250 
251 	return (0);
252 }
253 
254 static int
255 /*ARGSUSED*/
256 _citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei __unused,
257     char *__restrict s, size_t n, wchar_t wc,
258     _ZWState * __restrict psenc, size_t * __restrict nresult)
259 {
260 	int ch;
261 
262 	if (psenc->chlen != 0)
263 		return (EINVAL);
264 	if ((uint32_t)wc <= 0x7F) {
265 		ch = (unsigned char)wc;
266 		switch (psenc->charset) {
267 		case NONE:
268 			if (ch == '\0' || ch == '\n')
269 				psenc->ch[psenc->chlen++] = ch;
270 			else {
271 				if (n < 4)
272 					return (E2BIG);
273 				n -= 4;
274 				psenc->ch[psenc->chlen++] = 'z';
275 				psenc->ch[psenc->chlen++] = 'W';
276 				psenc->ch[psenc->chlen++] = ' ';
277 				psenc->ch[psenc->chlen++] = ch;
278 				psenc->charset = GB2312;
279 			}
280 			break;
281 		case GB2312:
282 			if (n < 2)
283 				return (E2BIG);
284 			n -= 2;
285 			if (ch == '\0') {
286 				psenc->ch[psenc->chlen++] = '\n';
287 				psenc->ch[psenc->chlen++] = '\0';
288 				psenc->charset = NONE;
289 			} else if (ch == '\n') {
290 				psenc->ch[psenc->chlen++] = '#';
291 				psenc->ch[psenc->chlen++] = '\n';
292 				psenc->charset = NONE;
293 			} else {
294 				psenc->ch[psenc->chlen++] = ' ';
295 				psenc->ch[psenc->chlen++] = ch;
296 			}
297 			break;
298 		default:
299 			return (EINVAL);
300 		}
301 	} else if ((uint32_t)wc <= 0x7E7E) {
302 		switch (psenc->charset) {
303 		case NONE:
304 			if (n < 2)
305 				return (E2BIG);
306 			n -= 2;
307 			psenc->ch[psenc->chlen++] = 'z';
308 			psenc->ch[psenc->chlen++] = 'W';
309 			psenc->charset = GB2312;
310 		/* FALLTHROUGH*/
311 		case GB2312:
312 			if (n < 2)
313 				return (E2BIG);
314 			n -= 2;
315 			ch = (wc >> 8) & 0xFF;
316 			if (ch < 0x21 || ch > 0x7E)
317 				goto ilseq;
318 			psenc->ch[psenc->chlen++] = ch;
319 			ch = wc & 0xFF;
320 			if (ch < 0x21 || ch > 0x7E)
321 				goto ilseq;
322 			psenc->ch[psenc->chlen++] = ch;
323 			break;
324 		default:
325 			return (EINVAL);
326 		}
327 	} else {
328 ilseq:
329 		*nresult = (size_t)-1;
330 		return (EILSEQ);
331 	}
332 	memcpy(s, psenc->ch, psenc->chlen);
333 	*nresult = psenc->chlen;
334 	psenc->chlen = 0;
335 
336 	return (0);
337 }
338 
339 static int
340 /*ARGSUSED*/
341 _citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei __unused,
342     char * __restrict s, size_t n, _ZWState * __restrict psenc,
343     size_t * __restrict nresult)
344 {
345 
346 	if (psenc->chlen != 0)
347 		return (EINVAL);
348 	switch (psenc->charset) {
349 	case GB2312:
350 		if (n-- < 1)
351 			return (E2BIG);
352 		psenc->ch[psenc->chlen++] = '\n';
353 		psenc->charset = NONE;
354 	/*FALLTHROUGH*/
355 	case NONE:
356 		*nresult = psenc->chlen;
357 		if (psenc->chlen > 0) {
358 			memcpy(s, psenc->ch, psenc->chlen);
359 			psenc->chlen = 0;
360 		}
361 		break;
362 	default:
363 		return (EINVAL);
364 	}
365 
366 	return (0);
367 }
368 
369 static __inline int
370 /*ARGSUSED*/
371 _citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei __unused,
372     _ZWState * __restrict psenc, int * __restrict rstate)
373 {
374 
375 	switch (psenc->charset) {
376 	case NONE:
377 		if (psenc->chlen != 0)
378 			return (EINVAL);
379 		*rstate = _STDENC_SDGEN_INITIAL;
380 		break;
381 	case AMBIGIOUS:
382 		if (psenc->chlen != 0)
383 			return (EINVAL);
384 		*rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT;
385 		break;
386 	case ASCII:
387 	case GB2312:
388 		switch (psenc->chlen) {
389 		case 0:
390 			*rstate = _STDENC_SDGEN_STABLE;
391 			break;
392 		case 1:
393 			*rstate = (psenc->ch[0] == '#') ?
394 			    _STDENC_SDGEN_INCOMPLETE_SHIFT :
395 			    _STDENC_SDGEN_INCOMPLETE_CHAR;
396 			break;
397 		default:
398 			return (EINVAL);
399 		}
400 		break;
401 	default:
402 		return (EINVAL);
403 	}
404 	return (0);
405 }
406 
407 static __inline int
408 /*ARGSUSED*/
409 _citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei __unused,
410     _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
411 {
412 
413 	*csid = (_csid_t)(wc <= (wchar_t)0x7FU) ? 0 : 1;
414 	*idx = (_index_t)wc;
415 
416 	return (0);
417 }
418 
419 static __inline int
420 /*ARGSUSED*/
421 _citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei __unused,
422     wchar_t * __restrict wc, _csid_t csid, _index_t idx)
423 {
424 
425 	switch (csid) {
426 	case 0: case 1:
427 		break;
428 	default:
429 		return (EINVAL);
430 	}
431 	*wc = (wchar_t)idx;
432 
433 	return (0);
434 }
435 
436 static void
437 /*ARGSUSED*/
438 _citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei __unused)
439 {
440 
441 }
442 
443 static int
444 /*ARGSUSED*/
445 _citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei __unused,
446     const void *__restrict var __unused, size_t lenvar __unused)
447 {
448 
449 	return (0);
450 }
451 
452 /* ----------------------------------------------------------------------
453  * public interface for stdenc
454  */
455 
456 _CITRUS_STDENC_DECLS(ZW);
457 _CITRUS_STDENC_DEF_OPS(ZW);
458 
459 #include "citrus_stdenc_template.h"
460