1 /* $NetBSD: citrus_mskanji.c,v 1.13 2008/06/14 16:01:08 tnozaki Exp $ */
2
3 /*-
4 * SPDX-License-Identifier: BSD-2-Clause
5 *
6 * Copyright (c)2002 Citrus Project,
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 /*
32 * ja_JP.SJIS locale table for BSD4.4/rune
33 * version 1.0
34 * (C) Sin'ichiro MIYATANI / Phase One, Inc
35 * May 12, 1995
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by Phase One, Inc.
48 * 4. The name of Phase One, Inc. may be used to endorse or promote products
49 * derived from this software without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 */
63
64 #include <sys/types.h>
65
66 #include <assert.h>
67 #include <errno.h>
68 #include <limits.h>
69 #include <stdbool.h>
70 #include <stddef.h>
71 #include <stdio.h>
72 #include <stdlib.h>
73 #include <string.h>
74 #include <wchar.h>
75
76 #include "citrus_namespace.h"
77 #include "citrus_types.h"
78 #include "citrus_bcs.h"
79 #include "citrus_module.h"
80 #include "citrus_stdenc.h"
81 #include "citrus_mskanji.h"
82
83
84 /* ----------------------------------------------------------------------
85 * private stuffs used by templates
86 */
87
88 typedef struct _MSKanjiState {
89 int chlen;
90 char ch[2];
91 } _MSKanjiState;
92
93 typedef struct {
94 int mode;
95 #define MODE_JIS2004 1
96 } _MSKanjiEncodingInfo;
97
98 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei)
99 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_
100
101 #define _FUNCNAME(m) _citrus_MSKanji_##m
102 #define _ENCODING_INFO _MSKanjiEncodingInfo
103 #define _ENCODING_STATE _MSKanjiState
104 #define _ENCODING_MB_CUR_MAX(_ei_) 2
105 #define _ENCODING_IS_STATE_DEPENDENT 0
106 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0
107
108
109 static bool
_mskanji1(int c)110 _mskanji1(int c)
111 {
112
113 return ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc));
114 }
115
116 static bool
_mskanji2(int c)117 _mskanji2(int c)
118 {
119
120 return ((c >= 0x40 && c <= 0x7e) || (c >= 0x80 && c <= 0xfc));
121 }
122
123 static __inline void
124 /*ARGSUSED*/
_citrus_MSKanji_init_state(_MSKanjiEncodingInfo * __restrict ei __unused,_MSKanjiState * __restrict s)125 _citrus_MSKanji_init_state(_MSKanjiEncodingInfo * __restrict ei __unused,
126 _MSKanjiState * __restrict s)
127 {
128
129 s->chlen = 0;
130 }
131
132 #if 0
133 static __inline void
134 /*ARGSUSED*/
135 _citrus_MSKanji_pack_state(_MSKanjiEncodingInfo * __restrict ei __unused,
136 void * __restrict pspriv, const _MSKanjiState * __restrict s)
137 {
138
139 memcpy(pspriv, (const void *)s, sizeof(*s));
140 }
141
142 static __inline void
143 /*ARGSUSED*/
144 _citrus_MSKanji_unpack_state(_MSKanjiEncodingInfo * __restrict ei __unused,
145 _MSKanjiState * __restrict s, const void * __restrict pspriv)
146 {
147
148 memcpy((void *)s, pspriv, sizeof(*s));
149 }
150 #endif
151
152 static int
153 /*ARGSUSED*/
_citrus_MSKanji_mbrtowc_priv(_MSKanjiEncodingInfo * __restrict ei,wchar_t * __restrict pwc,char ** __restrict s,size_t n,_MSKanjiState * __restrict psenc,size_t * __restrict nresult)154 _citrus_MSKanji_mbrtowc_priv(_MSKanjiEncodingInfo * __restrict ei,
155 wchar_t * __restrict pwc, char ** __restrict s, size_t n,
156 _MSKanjiState * __restrict psenc, size_t * __restrict nresult)
157 {
158 char *s0;
159 wchar_t wchar;
160 int chlenbak, len;
161
162 s0 = *s;
163
164 if (s0 == NULL) {
165 _citrus_MSKanji_init_state(ei, psenc);
166 *nresult = 0; /* state independent */
167 return (0);
168 }
169
170 chlenbak = psenc->chlen;
171
172 /* make sure we have the first byte in the buffer */
173 switch (psenc->chlen) {
174 case 0:
175 if (n < 1)
176 goto restart;
177 psenc->ch[0] = *s0++;
178 psenc->chlen = 1;
179 n--;
180 break;
181 case 1:
182 break;
183 default:
184 /* illegal state */
185 goto encoding_error;
186 }
187
188 len = _mskanji1(psenc->ch[0] & 0xff) ? 2 : 1;
189 while (psenc->chlen < len) {
190 if (n < 1)
191 goto restart;
192 psenc->ch[psenc->chlen] = *s0++;
193 psenc->chlen++;
194 n--;
195 }
196
197 *s = s0;
198
199 switch (len) {
200 case 1:
201 wchar = psenc->ch[0] & 0xff;
202 break;
203 case 2:
204 if (!_mskanji2(psenc->ch[1] & 0xff))
205 goto encoding_error;
206 wchar = ((psenc->ch[0] & 0xff) << 8) | (psenc->ch[1] & 0xff);
207 break;
208 default:
209 /* illegal state */
210 goto encoding_error;
211 }
212
213 psenc->chlen = 0;
214
215 if (pwc)
216 *pwc = wchar;
217 *nresult = wchar ? len - chlenbak : 0;
218 return (0);
219
220 encoding_error:
221 psenc->chlen = 0;
222 *nresult = (size_t)-1;
223 return (EILSEQ);
224
225 restart:
226 *nresult = (size_t)-2;
227 *s = s0;
228 return (0);
229 }
230
231
232 static int
_citrus_MSKanji_wcrtomb_priv(_MSKanjiEncodingInfo * __restrict ei __unused,char * __restrict s,size_t n,wchar_t wc,_MSKanjiState * __restrict psenc __unused,size_t * __restrict nresult)233 _citrus_MSKanji_wcrtomb_priv(_MSKanjiEncodingInfo * __restrict ei __unused,
234 char * __restrict s, size_t n, wchar_t wc,
235 _MSKanjiState * __restrict psenc __unused, size_t * __restrict nresult)
236 {
237 int ret;
238
239 /* check invalid sequence */
240 if (wc & ~0xffff) {
241 ret = EILSEQ;
242 goto err;
243 }
244
245 if (wc & 0xff00) {
246 if (n < 2) {
247 ret = E2BIG;
248 goto err;
249 }
250
251 s[0] = (wc >> 8) & 0xff;
252 s[1] = wc & 0xff;
253 if (!_mskanji1(s[0] & 0xff) || !_mskanji2(s[1] & 0xff)) {
254 ret = EILSEQ;
255 goto err;
256 }
257
258 *nresult = 2;
259 return (0);
260 } else {
261 if (n < 1) {
262 ret = E2BIG;
263 goto err;
264 }
265
266 s[0] = wc & 0xff;
267 if (_mskanji1(s[0] & 0xff)) {
268 ret = EILSEQ;
269 goto err;
270 }
271
272 *nresult = 1;
273 return (0);
274 }
275
276 err:
277 *nresult = (size_t)-1;
278 return (ret);
279 }
280
281
282 static __inline int
283 /*ARGSUSED*/
_citrus_MSKanji_stdenc_wctocs(_MSKanjiEncodingInfo * __restrict ei,_csid_t * __restrict csid,_index_t * __restrict idx,wchar_t wc)284 _citrus_MSKanji_stdenc_wctocs(_MSKanjiEncodingInfo * __restrict ei,
285 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc)
286 {
287 _index_t col, row;
288 int offset;
289
290 if ((_wc_t)wc < 0x80) {
291 /* ISO-646 */
292 *csid = 0;
293 *idx = (_index_t)wc;
294 } else if ((_wc_t)wc < 0x100) {
295 /* KANA */
296 *csid = 1;
297 *idx = (_index_t)wc & 0x7F;
298 } else {
299 /* Kanji (containing Gaiji zone) */
300 /*
301 * 94^2 zone (contains a part of Gaiji (0xED40 - 0xEEFC)):
302 * 0x8140 - 0x817E -> 0x2121 - 0x215F
303 * 0x8180 - 0x819E -> 0x2160 - 0x217E
304 * 0x819F - 0x81FC -> 0x2221 - 0x227E
305 *
306 * 0x8240 - 0x827E -> 0x2321 - 0x235F
307 * ...
308 * 0x9F9F - 0x9FFc -> 0x5E21 - 0x5E7E
309 *
310 * 0xE040 - 0xE07E -> 0x5F21 - 0x5F5F
311 * ...
312 * 0xEF9F - 0xEFFC -> 0x7E21 - 0x7E7E
313 *
314 * extended Gaiji zone:
315 * 0xF040 - 0xFCFC
316 *
317 * JIS X0213-plane2:
318 * 0xF040 - 0xF09E -> 0x2121 - 0x217E
319 * 0xF140 - 0xF19E -> 0x2321 - 0x237E
320 * ...
321 * 0xF240 - 0xF29E -> 0x2521 - 0x257E
322 *
323 * 0xF09F - 0xF0FC -> 0x2821 - 0x287E
324 * 0xF29F - 0xF2FC -> 0x2C21 - 0x2C7E
325 * ...
326 * 0xF44F - 0xF49E -> 0x2F21 - 0x2F7E
327 *
328 * 0xF49F - 0xF4FC -> 0x6E21 - 0x6E7E
329 * ...
330 * 0xFC9F - 0xFCFC -> 0x7E21 - 0x7E7E
331 */
332 row = ((_wc_t)wc >> 8) & 0xFF;
333 col = (_wc_t)wc & 0xFF;
334 if (!_mskanji1(row) || !_mskanji2(col))
335 return (EILSEQ);
336 if ((ei->mode & MODE_JIS2004) == 0 || row < 0xF0) {
337 *csid = 2;
338 offset = 0x81;
339 } else {
340 *csid = 3;
341 if ((_wc_t)wc <= 0xF49E) {
342 offset = (_wc_t)wc >= 0xF29F ||
343 ((_wc_t)wc >= 0xF09F &&
344 (_wc_t)wc <= 0xF0FC) ? 0xED : 0xF0;
345 } else
346 offset = 0xCE;
347 }
348 row -= offset;
349 if (row >= 0x5F)
350 row -= 0x40;
351 row = row * 2 + 0x21;
352 col -= 0x1F;
353 if (col >= 0x61)
354 col -= 1;
355 if (col > 0x7E) {
356 row += 1;
357 col -= 0x5E;
358 }
359 *idx = ((_index_t)row << 8) | col;
360 }
361
362 return (0);
363 }
364
365 static __inline int
366 /*ARGSUSED*/
_citrus_MSKanji_stdenc_cstowc(_MSKanjiEncodingInfo * __restrict ei,wchar_t * __restrict wc,_csid_t csid,_index_t idx)367 _citrus_MSKanji_stdenc_cstowc(_MSKanjiEncodingInfo * __restrict ei,
368 wchar_t * __restrict wc, _csid_t csid, _index_t idx)
369 {
370 uint32_t col, row;
371 int offset;
372
373 switch (csid) {
374 case 0:
375 /* ISO-646 */
376 if (idx >= 0x80)
377 return (EILSEQ);
378 *wc = (wchar_t)idx;
379 break;
380 case 1:
381 /* kana */
382 if (idx >= 0x80)
383 return (EILSEQ);
384 *wc = (wchar_t)idx + 0x80;
385 break;
386 case 3:
387 if ((ei->mode & MODE_JIS2004) == 0)
388 return (EILSEQ);
389 /*FALLTHROUGH*/
390 case 2:
391 /* kanji */
392 row = (idx >> 8);
393 if (row < 0x21)
394 return (EILSEQ);
395 if (csid == 3) {
396 if (row <= 0x2F)
397 offset = (row == 0x22 || row >= 0x26) ?
398 0xED : 0xF0;
399 else if (row >= 0x4D && row <= 0x7E)
400 offset = 0xCE;
401 else
402 return (EILSEQ);
403 } else {
404 if (row > 0x97)
405 return (EILSEQ);
406 offset = (row < 0x5F) ? 0x81 : 0xC1;
407 }
408 col = idx & 0xFF;
409 if (col < 0x21 || col > 0x7E)
410 return (EILSEQ);
411 row -= 0x21; col -= 0x21;
412 if ((row & 1) == 0) {
413 col += 0x40;
414 if (col >= 0x7F)
415 col += 1;
416 } else
417 col += 0x9F;
418 row = row / 2 + offset;
419 *wc = ((wchar_t)row << 8) | col;
420 break;
421 default:
422 return (EILSEQ);
423 }
424
425 return (0);
426 }
427
428 static __inline int
429 /*ARGSUSED*/
_citrus_MSKanji_stdenc_get_state_desc_generic(_MSKanjiEncodingInfo * __restrict ei __unused,_MSKanjiState * __restrict psenc,int * __restrict rstate)430 _citrus_MSKanji_stdenc_get_state_desc_generic(_MSKanjiEncodingInfo * __restrict ei __unused,
431 _MSKanjiState * __restrict psenc, int * __restrict rstate)
432 {
433
434 *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL :
435 _STDENC_SDGEN_INCOMPLETE_CHAR;
436 return (0);
437 }
438
439 static int
440 /*ARGSUSED*/
_citrus_MSKanji_encoding_module_init(_MSKanjiEncodingInfo * __restrict ei,const void * __restrict var,size_t lenvar)441 _citrus_MSKanji_encoding_module_init(_MSKanjiEncodingInfo * __restrict ei,
442 const void * __restrict var, size_t lenvar)
443 {
444 const char *p;
445
446 p = var;
447 memset((void *)ei, 0, sizeof(*ei));
448 while (lenvar > 0) {
449 switch (_bcs_toupper(*p)) {
450 case 'J':
451 MATCH(JIS2004, ei->mode |= MODE_JIS2004);
452 break;
453 }
454 ++p;
455 --lenvar;
456 }
457
458 return (0);
459 }
460
461 static void
_citrus_MSKanji_encoding_module_uninit(_MSKanjiEncodingInfo * ei __unused)462 _citrus_MSKanji_encoding_module_uninit(_MSKanjiEncodingInfo *ei __unused)
463 {
464
465 }
466
467 /* ----------------------------------------------------------------------
468 * public interface for stdenc
469 */
470
471 _CITRUS_STDENC_DECLS(MSKanji);
472 _CITRUS_STDENC_DEF_OPS(MSKanji);
473
474 #include "citrus_stdenc_template.h"
475