xref: /freebsd/lib/libiconv_modules/ZW/citrus_zw.c (revision 63f537551380d2dab29fa402ad1269feae17e594)
1 /* $NetBSD: citrus_zw.c,v 1.4 2008/06/14 16:01:08 tnozaki Exp $ */
2 
3 /*-
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c)2004, 2006 Citrus Project,
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/types.h>
34 
35 #include <assert.h>
36 #include <errno.h>
37 #include <limits.h>
38 #include <stddef.h>
39 #include <stdio.h>
40 #include <stdint.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <wchar.h>
44 
45 #include "citrus_namespace.h"
46 #include "citrus_types.h"
47 #include "citrus_module.h"
48 #include "citrus_stdenc.h"
49 #include "citrus_zw.h"
50 
51 /* ----------------------------------------------------------------------
52  * private stuffs used by templates
53  */
54 
55 typedef struct {
56 	int	 dummy;
57 } _ZWEncodingInfo;
58 
59 typedef enum {
60 	NONE, AMBIGIOUS, ASCII, GB2312
61 } _ZWCharset;
62 
63 typedef struct {
64 	_ZWCharset	 charset;
65 	int		 chlen;
66 	char		 ch[4];
67 } _ZWState;
68 
69 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
70 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
71 
72 #define _FUNCNAME(m)			_citrus_ZW_##m
73 #define _ENCODING_INFO			_ZWEncodingInfo
74 #define _ENCODING_STATE			_ZWState
75 #define _ENCODING_MB_CUR_MAX(_ei_)	MB_LEN_MAX
76 #define _ENCODING_IS_STATE_DEPENDENT		1
77 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	((_ps_)->charset != NONE)
78 
79 static __inline void
80 /*ARGSUSED*/
81 _citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei __unused,
82     _ZWState * __restrict psenc)
83 {
84 
85 	psenc->chlen = 0;
86 	psenc->charset = NONE;
87 }
88 
89 #if 0
90 static __inline void
91 /*ARGSUSED*/
92 _citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei __unused,
93     void *__restrict pspriv, const _ZWState * __restrict psenc)
94 {
95 
96 	memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
97 }
98 
99 static __inline void
100 /*ARGSUSED*/
101 _citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei __unused,
102     _ZWState * __restrict psenc, const void * __restrict pspriv)
103 {
104 
105 	memcpy((void *)psenc, pspriv, sizeof(*psenc));
106 }
107 #endif
108 
109 static int
110 _citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei,
111     wchar_t * __restrict pwc, char **__restrict s, size_t n,
112     _ZWState * __restrict psenc, size_t * __restrict nresult)
113 {
114 	char *s0;
115 	wchar_t  wc;
116 	int ch, len;
117 
118 	if (*s == NULL) {
119 		_citrus_ZW_init_state(ei, psenc);
120 		*nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT;
121 		return (0);
122 	}
123 	s0 = *s;
124 	len = 0;
125 
126 #define	STORE				\
127 do {					\
128 	if (n-- < 1) {			\
129 		*nresult = (size_t)-2;	\
130 		*s = s0;		\
131 		return (0);		\
132 	}				\
133 	ch = (unsigned char)*s0++;	\
134 	if (len++ > MB_LEN_MAX || ch > 0x7F)\
135 		goto ilseq;		\
136 	psenc->ch[psenc->chlen++] = ch;	\
137 } while (/*CONSTCOND*/0)
138 
139 loop:
140 	switch (psenc->charset) {
141 	case ASCII:
142 		switch (psenc->chlen) {
143 		case 0:
144 			STORE;
145 			switch (psenc->ch[0]) {
146 			case '\0': case '\n':
147 				psenc->charset = NONE;
148 			}
149 		/*FALLTHROUGH*/
150 		case 1:
151 			break;
152 		default:
153 			return (EINVAL);
154 		}
155 		ch = (unsigned char)psenc->ch[0];
156 		if (ch > 0x7F)
157 			goto ilseq;
158 		wc = (wchar_t)ch;
159 		psenc->chlen = 0;
160 		break;
161 	case NONE:
162 		if (psenc->chlen != 0)
163 			return (EINVAL);
164 		STORE;
165 		ch = (unsigned char)psenc->ch[0];
166 		if (ch != 'z') {
167 			if (ch != '\n' && ch != '\0')
168 				psenc->charset = ASCII;
169 			wc = (wchar_t)ch;
170 			psenc->chlen = 0;
171 			break;
172 		}
173 		psenc->charset = AMBIGIOUS;
174 		psenc->chlen = 0;
175 	/* FALLTHROUGH */
176 	case AMBIGIOUS:
177 		if (psenc->chlen != 0)
178 			return (EINVAL);
179 		STORE;
180 		if (psenc->ch[0] != 'W') {
181 			psenc->charset = ASCII;
182 			wc = L'z';
183 			break;
184 		}
185 		psenc->charset = GB2312;
186 		psenc->chlen = 0;
187 	/* FALLTHROUGH */
188 	case GB2312:
189 		switch (psenc->chlen) {
190 		case 0:
191 			STORE;
192 			ch = (unsigned char)psenc->ch[0];
193 			if (ch == '\0') {
194 				psenc->charset = NONE;
195 				wc = (wchar_t)ch;
196 				psenc->chlen = 0;
197 				break;
198 			} else if (ch == '\n') {
199 				psenc->charset = NONE;
200 				psenc->chlen = 0;
201 				goto loop;
202 			}
203 		/*FALLTHROUGH*/
204 		case 1:
205 			STORE;
206 			if (psenc->ch[0] == ' ') {
207 				ch = (unsigned char)psenc->ch[1];
208 				wc = (wchar_t)ch;
209 				psenc->chlen = 0;
210 				break;
211 			} else if (psenc->ch[0] == '#') {
212 				ch = (unsigned char)psenc->ch[1];
213 				if (ch == '\n') {
214 					psenc->charset = NONE;
215 					wc = (wchar_t)ch;
216 					psenc->chlen = 0;
217 					break;
218 				} else if (ch == ' ') {
219 					wc = (wchar_t)ch;
220 					psenc->chlen = 0;
221 					break;
222 				}
223 			}
224 			ch = (unsigned char)psenc->ch[0];
225 			if (ch < 0x21 || ch > 0x7E)
226 				goto ilseq;
227 			wc = (wchar_t)(ch << 8);
228 			ch = (unsigned char)psenc->ch[1];
229 			if (ch < 0x21 || ch > 0x7E) {
230 ilseq:
231 				*nresult = (size_t)-1;
232 				return (EILSEQ);
233 			}
234 			wc |= (wchar_t)ch;
235 			psenc->chlen = 0;
236 			break;
237 		default:
238 			return (EINVAL);
239 		}
240 		break;
241 	default:
242 		return (EINVAL);
243 	}
244 	if (pwc != NULL)
245 		*pwc = wc;
246 
247 	*nresult = (size_t)(wc == 0 ? 0 : len);
248 	*s = s0;
249 
250 	return (0);
251 }
252 
253 static int
254 /*ARGSUSED*/
255 _citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei __unused,
256     char *__restrict s, size_t n, wchar_t wc,
257     _ZWState * __restrict psenc, size_t * __restrict nresult)
258 {
259 	int ch;
260 
261 	if (psenc->chlen != 0)
262 		return (EINVAL);
263 	if ((uint32_t)wc <= 0x7F) {
264 		ch = (unsigned char)wc;
265 		switch (psenc->charset) {
266 		case NONE:
267 			if (ch == '\0' || ch == '\n')
268 				psenc->ch[psenc->chlen++] = ch;
269 			else {
270 				if (n < 4)
271 					return (E2BIG);
272 				n -= 4;
273 				psenc->ch[psenc->chlen++] = 'z';
274 				psenc->ch[psenc->chlen++] = 'W';
275 				psenc->ch[psenc->chlen++] = ' ';
276 				psenc->ch[psenc->chlen++] = ch;
277 				psenc->charset = GB2312;
278 			}
279 			break;
280 		case GB2312:
281 			if (n < 2)
282 				return (E2BIG);
283 			n -= 2;
284 			if (ch == '\0') {
285 				psenc->ch[psenc->chlen++] = '\n';
286 				psenc->ch[psenc->chlen++] = '\0';
287 				psenc->charset = NONE;
288 			} else if (ch == '\n') {
289 				psenc->ch[psenc->chlen++] = '#';
290 				psenc->ch[psenc->chlen++] = '\n';
291 				psenc->charset = NONE;
292 			} else {
293 				psenc->ch[psenc->chlen++] = ' ';
294 				psenc->ch[psenc->chlen++] = ch;
295 			}
296 			break;
297 		default:
298 			return (EINVAL);
299 		}
300 	} else if ((uint32_t)wc <= 0x7E7E) {
301 		switch (psenc->charset) {
302 		case NONE:
303 			if (n < 2)
304 				return (E2BIG);
305 			n -= 2;
306 			psenc->ch[psenc->chlen++] = 'z';
307 			psenc->ch[psenc->chlen++] = 'W';
308 			psenc->charset = GB2312;
309 		/* FALLTHROUGH*/
310 		case GB2312:
311 			if (n < 2)
312 				return (E2BIG);
313 			n -= 2;
314 			ch = (wc >> 8) & 0xFF;
315 			if (ch < 0x21 || ch > 0x7E)
316 				goto ilseq;
317 			psenc->ch[psenc->chlen++] = ch;
318 			ch = wc & 0xFF;
319 			if (ch < 0x21 || ch > 0x7E)
320 				goto ilseq;
321 			psenc->ch[psenc->chlen++] = ch;
322 			break;
323 		default:
324 			return (EINVAL);
325 		}
326 	} else {
327 ilseq:
328 		*nresult = (size_t)-1;
329 		return (EILSEQ);
330 	}
331 	memcpy(s, psenc->ch, psenc->chlen);
332 	*nresult = psenc->chlen;
333 	psenc->chlen = 0;
334 
335 	return (0);
336 }
337 
338 static int
339 /*ARGSUSED*/
340 _citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei __unused,
341     char * __restrict s, size_t n, _ZWState * __restrict psenc,
342     size_t * __restrict nresult)
343 {
344 
345 	if (psenc->chlen != 0)
346 		return (EINVAL);
347 	switch (psenc->charset) {
348 	case GB2312:
349 		if (n-- < 1)
350 			return (E2BIG);
351 		psenc->ch[psenc->chlen++] = '\n';
352 		psenc->charset = NONE;
353 	/*FALLTHROUGH*/
354 	case NONE:
355 		*nresult = psenc->chlen;
356 		if (psenc->chlen > 0) {
357 			memcpy(s, psenc->ch, psenc->chlen);
358 			psenc->chlen = 0;
359 		}
360 		break;
361 	default:
362 		return (EINVAL);
363 	}
364 
365 	return (0);
366 }
367 
368 static __inline int
369 /*ARGSUSED*/
370 _citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei __unused,
371     _ZWState * __restrict psenc, int * __restrict rstate)
372 {
373 
374 	switch (psenc->charset) {
375 	case NONE:
376 		if (psenc->chlen != 0)
377 			return (EINVAL);
378 		*rstate = _STDENC_SDGEN_INITIAL;
379 		break;
380 	case AMBIGIOUS:
381 		if (psenc->chlen != 0)
382 			return (EINVAL);
383 		*rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT;
384 		break;
385 	case ASCII:
386 	case GB2312:
387 		switch (psenc->chlen) {
388 		case 0:
389 			*rstate = _STDENC_SDGEN_STABLE;
390 			break;
391 		case 1:
392 			*rstate = (psenc->ch[0] == '#') ?
393 			    _STDENC_SDGEN_INCOMPLETE_SHIFT :
394 			    _STDENC_SDGEN_INCOMPLETE_CHAR;
395 			break;
396 		default:
397 			return (EINVAL);
398 		}
399 		break;
400 	default:
401 		return (EINVAL);
402 	}
403 	return (0);
404 }
405 
406 static __inline int
407 /*ARGSUSED*/
408 _citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei __unused,
409     _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
410 {
411 
412 	*csid = (_csid_t)(wc <= (wchar_t)0x7FU) ? 0 : 1;
413 	*idx = (_index_t)wc;
414 
415 	return (0);
416 }
417 
418 static __inline int
419 /*ARGSUSED*/
420 _citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei __unused,
421     wchar_t * __restrict wc, _csid_t csid, _index_t idx)
422 {
423 
424 	switch (csid) {
425 	case 0: case 1:
426 		break;
427 	default:
428 		return (EINVAL);
429 	}
430 	*wc = (wchar_t)idx;
431 
432 	return (0);
433 }
434 
435 static void
436 /*ARGSUSED*/
437 _citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei __unused)
438 {
439 
440 }
441 
442 static int
443 /*ARGSUSED*/
444 _citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei __unused,
445     const void *__restrict var __unused, size_t lenvar __unused)
446 {
447 
448 	return (0);
449 }
450 
451 /* ----------------------------------------------------------------------
452  * public interface for stdenc
453  */
454 
455 _CITRUS_STDENC_DECLS(ZW);
456 _CITRUS_STDENC_DEF_OPS(ZW);
457 
458 #include "citrus_stdenc_template.h"
459