1 /* $NetBSD: citrus_dechanyu.c,v 1.4 2011/11/19 18:20:13 tnozaki Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c)2007 Citrus Project,
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/types.h>
32
33 #include <assert.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <stddef.h>
37 #include <stdint.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <wchar.h>
42
43 #include "citrus_namespace.h"
44 #include "citrus_types.h"
45 #include "citrus_bcs.h"
46 #include "citrus_module.h"
47 #include "citrus_stdenc.h"
48 #include "citrus_dechanyu.h"
49
50 /* ----------------------------------------------------------------------
51 * private stuffs used by templates
52 */
53
54 typedef struct {
55 size_t chlen;
56 char ch[4];
57 } _DECHanyuState;
58
59 typedef struct {
60 int dummy;
61 } _DECHanyuEncodingInfo;
62
63 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
64 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.__CONCAT(s_,_func_)
65
66 #define _FUNCNAME(m) __CONCAT(_citrus_DECHanyu_,m)
67 #define _ENCODING_INFO _DECHanyuEncodingInfo
68 #define _ENCODING_STATE _DECHanyuState
69 #define _ENCODING_MB_CUR_MAX(_ei_) 4
70 #define _ENCODING_IS_STATE_DEPENDENT 0
71 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0
72
73 static __inline void
74 /*ARGSUSED*/
_citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei __unused,_DECHanyuState * __restrict psenc)75 _citrus_DECHanyu_init_state(_DECHanyuEncodingInfo * __restrict ei __unused,
76 _DECHanyuState * __restrict psenc)
77 {
78
79 psenc->chlen = 0;
80 }
81
82 #if 0
83 static __inline void
84 /*ARGSUSED*/
85 _citrus_DECHanyu_pack_state(_DECHanyuEncodingInfo * __restrict ei __unused,
86 void * __restrict pspriv, const _DECHanyuState * __restrict psenc)
87 {
88
89 memcpy(pspriv, (const void *)psenc, sizeof(*psenc));
90 }
91
92 static __inline void
93 /*ARGSUSED*/
94 _citrus_DECHanyu_unpack_state(_DECHanyuEncodingInfo * __restrict ei __unused,
95 _DECHanyuState * __restrict psenc,
96 const void * __restrict pspriv)
97 {
98
99 memcpy((void *)psenc, pspriv, sizeof(*psenc));
100 }
101 #endif
102
103 static void
104 /*ARGSUSED*/
_citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo * ei __unused)105 _citrus_DECHanyu_encoding_module_uninit(_DECHanyuEncodingInfo *ei __unused)
106 {
107
108 /* ei may be null */
109 }
110
111 static int
112 /*ARGSUSED*/
_citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei __unused,const void * __restrict var __unused,size_t lenvar __unused)113 _citrus_DECHanyu_encoding_module_init(_DECHanyuEncodingInfo * __restrict ei __unused,
114 const void * __restrict var __unused, size_t lenvar __unused)
115 {
116
117 /* ei may be null */
118 return (0);
119 }
120
121 static __inline bool
is_singlebyte(int c)122 is_singlebyte(int c)
123 {
124
125 return (c <= 0x7F);
126 }
127
128 static __inline bool
is_leadbyte(int c)129 is_leadbyte(int c)
130 {
131
132 return (c >= 0xA1 && c <= 0xFE);
133 }
134
135 static __inline bool
is_trailbyte(int c)136 is_trailbyte(int c)
137 {
138
139 c &= ~0x80;
140 return (c >= 0x21 && c <= 0x7E);
141 }
142
143 static __inline bool
is_hanyu1(int c)144 is_hanyu1(int c)
145 {
146
147 return (c == 0xC2);
148 }
149
150 static __inline bool
is_hanyu2(int c)151 is_hanyu2(int c)
152 {
153
154 return (c == 0xCB);
155 }
156
157 #define HANYUBIT 0xC2CB0000
158
159 static __inline bool
is_94charset(int c)160 is_94charset(int c)
161 {
162
163 return (c >= 0x21 && c <= 0x7E);
164 }
165
166 static int
167 /*ARGSUSED*/
_citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei,wchar_t * __restrict pwc,char ** __restrict s,size_t n,_DECHanyuState * __restrict psenc,size_t * __restrict nresult)168 _citrus_DECHanyu_mbrtowc_priv(_DECHanyuEncodingInfo * __restrict ei,
169 wchar_t * __restrict pwc, char ** __restrict s, size_t n,
170 _DECHanyuState * __restrict psenc, size_t * __restrict nresult)
171 {
172 char *s0;
173 wchar_t wc;
174 int ch;
175
176 if (*s == NULL) {
177 _citrus_DECHanyu_init_state(ei, psenc);
178 *nresult = _ENCODING_IS_STATE_DEPENDENT;
179 return (0);
180 }
181 s0 = *s;
182
183 wc = (wchar_t)0;
184 switch (psenc->chlen) {
185 case 0:
186 if (n-- < 1)
187 goto restart;
188 ch = *s0++ & 0xFF;
189 if (is_singlebyte(ch)) {
190 if (pwc != NULL)
191 *pwc = (wchar_t)ch;
192 *nresult = (size_t)((ch == 0) ? 0 : 1);
193 *s = s0;
194 return (0);
195 }
196 if (!is_leadbyte(ch))
197 goto ilseq;
198 psenc->ch[psenc->chlen++] = ch;
199 break;
200 case 1:
201 ch = psenc->ch[0] & 0xFF;
202 if (!is_leadbyte(ch))
203 return (EINVAL);
204 break;
205 case 2: case 3:
206 ch = psenc->ch[0] & 0xFF;
207 if (is_hanyu1(ch)) {
208 ch = psenc->ch[1] & 0xFF;
209 if (is_hanyu2(ch)) {
210 wc |= (wchar_t)HANYUBIT;
211 break;
212 }
213 }
214 /*FALLTHROUGH*/
215 default:
216 return (EINVAL);
217 }
218
219 switch (psenc->chlen) {
220 case 1:
221 if (is_hanyu1(ch)) {
222 if (n-- < 1)
223 goto restart;
224 ch = *s0++ & 0xFF;
225 if (!is_hanyu2(ch))
226 goto ilseq;
227 psenc->ch[psenc->chlen++] = ch;
228 wc |= (wchar_t)HANYUBIT;
229 if (n-- < 1)
230 goto restart;
231 ch = *s0++ & 0xFF;
232 if (!is_leadbyte(ch))
233 goto ilseq;
234 psenc->ch[psenc->chlen++] = ch;
235 }
236 break;
237 case 2:
238 if (n-- < 1)
239 goto restart;
240 ch = *s0++ & 0xFF;
241 if (!is_leadbyte(ch))
242 goto ilseq;
243 psenc->ch[psenc->chlen++] = ch;
244 break;
245 case 3:
246 ch = psenc->ch[2] & 0xFF;
247 if (!is_leadbyte(ch))
248 return (EINVAL);
249 }
250 if (n-- < 1)
251 goto restart;
252 wc |= (wchar_t)(ch << 8);
253 ch = *s0++ & 0xFF;
254 if (!is_trailbyte(ch))
255 goto ilseq;
256 wc |= (wchar_t)ch;
257 if (pwc != NULL)
258 *pwc = wc;
259 *nresult = (size_t)(s0 - *s);
260 *s = s0;
261 psenc->chlen = 0;
262
263 return (0);
264
265 restart:
266 *nresult = (size_t)-2;
267 *s = s0;
268 return (0);
269
270 ilseq:
271 *nresult = (size_t)-1;
272 return (EILSEQ);
273 }
274
275 static int
276 /*ARGSUSED*/
_citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei __unused,char * __restrict s,size_t n,wchar_t wc,_DECHanyuState * __restrict psenc,size_t * __restrict nresult)277 _citrus_DECHanyu_wcrtomb_priv(_DECHanyuEncodingInfo * __restrict ei __unused,
278 char * __restrict s, size_t n, wchar_t wc,
279 _DECHanyuState * __restrict psenc, size_t * __restrict nresult)
280 {
281 int ch;
282
283 if (psenc->chlen != 0)
284 return (EINVAL);
285
286 /* XXX: assume wchar_t as int */
287 if ((uint32_t)wc <= 0x7F) {
288 ch = wc & 0xFF;
289 } else {
290 if ((uint32_t)wc > 0xFFFF) {
291 if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT)
292 goto ilseq;
293 psenc->ch[psenc->chlen++] = (wc >> 24) & 0xFF;
294 psenc->ch[psenc->chlen++] = (wc >> 16) & 0xFF;
295 wc &= 0xFFFF;
296 }
297 ch = (wc >> 8) & 0xFF;
298 if (!is_leadbyte(ch))
299 goto ilseq;
300 psenc->ch[psenc->chlen++] = ch;
301 ch = wc & 0xFF;
302 if (!is_trailbyte(ch))
303 goto ilseq;
304 }
305 psenc->ch[psenc->chlen++] = ch;
306 if (n < psenc->chlen) {
307 *nresult = (size_t)-1;
308 return (E2BIG);
309 }
310 memcpy(s, psenc->ch, psenc->chlen);
311 *nresult = psenc->chlen;
312 psenc->chlen = 0;
313
314 return (0);
315
316 ilseq:
317 *nresult = (size_t)-1;
318 return (EILSEQ);
319 }
320
321 static __inline int
322 /*ARGSUSED*/
_citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei __unused,_csid_t * __restrict csid,_index_t * __restrict idx,wchar_t wc)323 _citrus_DECHanyu_stdenc_wctocs(_DECHanyuEncodingInfo * __restrict ei __unused,
324 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
325 {
326 wchar_t mask;
327 int plane;
328
329 plane = 0;
330 mask = 0x7F;
331 /* XXX: assume wchar_t as int */
332 if ((uint32_t)wc > 0x7F) {
333 if ((uint32_t)wc > 0xFFFF) {
334 if ((wc & ~0xFFFF) != (wchar_t)HANYUBIT)
335 return (EILSEQ);
336 plane += 2;
337 }
338 if (!is_leadbyte((wc >> 8) & 0xFF) ||
339 !is_trailbyte(wc & 0xFF))
340 return (EILSEQ);
341 plane += (wc & 0x80) ? 1 : 2;
342 mask |= 0x7F00;
343 }
344 *csid = plane;
345 *idx = (_index_t)(wc & mask);
346
347 return (0);
348 }
349
350 static __inline int
351 /*ARGSUSED*/
_citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei __unused,wchar_t * __restrict wc,_csid_t csid,_index_t idx)352 _citrus_DECHanyu_stdenc_cstowc(_DECHanyuEncodingInfo * __restrict ei __unused,
353 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
354 {
355
356 if (csid == 0) {
357 if (idx > 0x7F)
358 return (EILSEQ);
359 } else if (csid <= 4) {
360 if (!is_94charset(idx >> 8))
361 return (EILSEQ);
362 if (!is_94charset(idx & 0xFF))
363 return (EILSEQ);
364 if (csid % 2)
365 idx |= 0x80;
366 idx |= 0x8000;
367 if (csid > 2)
368 idx |= HANYUBIT;
369 } else
370 return (EILSEQ);
371 *wc = (wchar_t)idx;
372 return (0);
373 }
374
375 static __inline int
376 /*ARGSUSED*/
_citrus_DECHanyu_stdenc_get_state_desc_generic(_DECHanyuEncodingInfo * __restrict ei __unused,_DECHanyuState * __restrict psenc,int * __restrict rstate)377 _citrus_DECHanyu_stdenc_get_state_desc_generic(
378 _DECHanyuEncodingInfo * __restrict ei __unused,
379 _DECHanyuState * __restrict psenc, int * __restrict rstate)
380 {
381
382 *rstate = (psenc->chlen == 0)
383 ? _STDENC_SDGEN_INITIAL
384 : _STDENC_SDGEN_INCOMPLETE_CHAR;
385 return (0);
386 }
387
388 /* ----------------------------------------------------------------------
389 * public interface for stdenc
390 */
391
392 _CITRUS_STDENC_DECLS(DECHanyu);
393 _CITRUS_STDENC_DEF_OPS(DECHanyu);
394
395 #include "citrus_stdenc_template.h"
396