1 /* $FreeBSD$ */ 2 /* $NetBSD: citrus_mskanji.c,v 1.13 2008/06/14 16:01:08 tnozaki Exp $ */ 3 4 /*- 5 * SPDX-License-Identifier: BSD-2-Clause 6 * 7 * Copyright (c)2002 Citrus Project, 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32 /* 33 * ja_JP.SJIS locale table for BSD4.4/rune 34 * version 1.0 35 * (C) Sin'ichiro MIYATANI / Phase One, Inc 36 * May 12, 1995 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by Phase One, Inc. 49 * 4. The name of Phase One, Inc. may be used to endorse or promote products 50 * derived from this software without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 */ 64 65 66 #include <sys/cdefs.h> 67 #include <sys/types.h> 68 69 #include <assert.h> 70 #include <errno.h> 71 #include <limits.h> 72 #include <stdbool.h> 73 #include <stddef.h> 74 #include <stdio.h> 75 #include <stdlib.h> 76 #include <string.h> 77 #include <wchar.h> 78 79 #include "citrus_namespace.h" 80 #include "citrus_types.h" 81 #include "citrus_bcs.h" 82 #include "citrus_module.h" 83 #include "citrus_stdenc.h" 84 #include "citrus_mskanji.h" 85 86 87 /* ---------------------------------------------------------------------- 88 * private stuffs used by templates 89 */ 90 91 typedef struct _MSKanjiState { 92 int chlen; 93 char ch[2]; 94 } _MSKanjiState; 95 96 typedef struct { 97 int mode; 98 #define MODE_JIS2004 1 99 } _MSKanjiEncodingInfo; 100 101 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 102 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 103 104 #define _FUNCNAME(m) _citrus_MSKanji_##m 105 #define _ENCODING_INFO _MSKanjiEncodingInfo 106 #define _ENCODING_STATE _MSKanjiState 107 #define _ENCODING_MB_CUR_MAX(_ei_) 2 108 #define _ENCODING_IS_STATE_DEPENDENT 0 109 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 110 111 112 static bool 113 _mskanji1(int c) 114 { 115 116 return ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)); 117 } 118 119 static bool 120 _mskanji2(int c) 121 { 122 123 return ((c >= 0x40 && c <= 0x7e) || (c >= 0x80 && c <= 0xfc)); 124 } 125 126 static __inline void 127 /*ARGSUSED*/ 128 _citrus_MSKanji_init_state(_MSKanjiEncodingInfo * __restrict ei __unused, 129 _MSKanjiState * __restrict s) 130 { 131 132 s->chlen = 0; 133 } 134 135 #if 0 136 static __inline void 137 /*ARGSUSED*/ 138 _citrus_MSKanji_pack_state(_MSKanjiEncodingInfo * __restrict ei __unused, 139 void * __restrict pspriv, const _MSKanjiState * __restrict s) 140 { 141 142 memcpy(pspriv, (const void *)s, sizeof(*s)); 143 } 144 145 static __inline void 146 /*ARGSUSED*/ 147 _citrus_MSKanji_unpack_state(_MSKanjiEncodingInfo * __restrict ei __unused, 148 _MSKanjiState * __restrict s, const void * __restrict pspriv) 149 { 150 151 memcpy((void *)s, pspriv, sizeof(*s)); 152 } 153 #endif 154 155 static int 156 /*ARGSUSED*/ 157 _citrus_MSKanji_mbrtowc_priv(_MSKanjiEncodingInfo * __restrict ei, 158 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 159 _MSKanjiState * __restrict psenc, size_t * __restrict nresult) 160 { 161 char *s0; 162 wchar_t wchar; 163 int chlenbak, len; 164 165 s0 = *s; 166 167 if (s0 == NULL) { 168 _citrus_MSKanji_init_state(ei, psenc); 169 *nresult = 0; /* state independent */ 170 return (0); 171 } 172 173 chlenbak = psenc->chlen; 174 175 /* make sure we have the first byte in the buffer */ 176 switch (psenc->chlen) { 177 case 0: 178 if (n < 1) 179 goto restart; 180 psenc->ch[0] = *s0++; 181 psenc->chlen = 1; 182 n--; 183 break; 184 case 1: 185 break; 186 default: 187 /* illegal state */ 188 goto encoding_error; 189 } 190 191 len = _mskanji1(psenc->ch[0] & 0xff) ? 2 : 1; 192 while (psenc->chlen < len) { 193 if (n < 1) 194 goto restart; 195 psenc->ch[psenc->chlen] = *s0++; 196 psenc->chlen++; 197 n--; 198 } 199 200 *s = s0; 201 202 switch (len) { 203 case 1: 204 wchar = psenc->ch[0] & 0xff; 205 break; 206 case 2: 207 if (!_mskanji2(psenc->ch[1] & 0xff)) 208 goto encoding_error; 209 wchar = ((psenc->ch[0] & 0xff) << 8) | (psenc->ch[1] & 0xff); 210 break; 211 default: 212 /* illegal state */ 213 goto encoding_error; 214 } 215 216 psenc->chlen = 0; 217 218 if (pwc) 219 *pwc = wchar; 220 *nresult = wchar ? len - chlenbak : 0; 221 return (0); 222 223 encoding_error: 224 psenc->chlen = 0; 225 *nresult = (size_t)-1; 226 return (EILSEQ); 227 228 restart: 229 *nresult = (size_t)-2; 230 *s = s0; 231 return (0); 232 } 233 234 235 static int 236 _citrus_MSKanji_wcrtomb_priv(_MSKanjiEncodingInfo * __restrict ei __unused, 237 char * __restrict s, size_t n, wchar_t wc, 238 _MSKanjiState * __restrict psenc __unused, size_t * __restrict nresult) 239 { 240 int ret; 241 242 /* check invalid sequence */ 243 if (wc & ~0xffff) { 244 ret = EILSEQ; 245 goto err; 246 } 247 248 if (wc & 0xff00) { 249 if (n < 2) { 250 ret = E2BIG; 251 goto err; 252 } 253 254 s[0] = (wc >> 8) & 0xff; 255 s[1] = wc & 0xff; 256 if (!_mskanji1(s[0] & 0xff) || !_mskanji2(s[1] & 0xff)) { 257 ret = EILSEQ; 258 goto err; 259 } 260 261 *nresult = 2; 262 return (0); 263 } else { 264 if (n < 1) { 265 ret = E2BIG; 266 goto err; 267 } 268 269 s[0] = wc & 0xff; 270 if (_mskanji1(s[0] & 0xff)) { 271 ret = EILSEQ; 272 goto err; 273 } 274 275 *nresult = 1; 276 return (0); 277 } 278 279 err: 280 *nresult = (size_t)-1; 281 return (ret); 282 } 283 284 285 static __inline int 286 /*ARGSUSED*/ 287 _citrus_MSKanji_stdenc_wctocs(_MSKanjiEncodingInfo * __restrict ei, 288 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 289 { 290 _index_t col, row; 291 int offset; 292 293 if ((_wc_t)wc < 0x80) { 294 /* ISO-646 */ 295 *csid = 0; 296 *idx = (_index_t)wc; 297 } else if ((_wc_t)wc < 0x100) { 298 /* KANA */ 299 *csid = 1; 300 *idx = (_index_t)wc & 0x7F; 301 } else { 302 /* Kanji (containing Gaiji zone) */ 303 /* 304 * 94^2 zone (contains a part of Gaiji (0xED40 - 0xEEFC)): 305 * 0x8140 - 0x817E -> 0x2121 - 0x215F 306 * 0x8180 - 0x819E -> 0x2160 - 0x217E 307 * 0x819F - 0x81FC -> 0x2221 - 0x227E 308 * 309 * 0x8240 - 0x827E -> 0x2321 - 0x235F 310 * ... 311 * 0x9F9F - 0x9FFc -> 0x5E21 - 0x5E7E 312 * 313 * 0xE040 - 0xE07E -> 0x5F21 - 0x5F5F 314 * ... 315 * 0xEF9F - 0xEFFC -> 0x7E21 - 0x7E7E 316 * 317 * extended Gaiji zone: 318 * 0xF040 - 0xFCFC 319 * 320 * JIS X0213-plane2: 321 * 0xF040 - 0xF09E -> 0x2121 - 0x217E 322 * 0xF140 - 0xF19E -> 0x2321 - 0x237E 323 * ... 324 * 0xF240 - 0xF29E -> 0x2521 - 0x257E 325 * 326 * 0xF09F - 0xF0FC -> 0x2821 - 0x287E 327 * 0xF29F - 0xF2FC -> 0x2C21 - 0x2C7E 328 * ... 329 * 0xF44F - 0xF49E -> 0x2F21 - 0x2F7E 330 * 331 * 0xF49F - 0xF4FC -> 0x6E21 - 0x6E7E 332 * ... 333 * 0xFC9F - 0xFCFC -> 0x7E21 - 0x7E7E 334 */ 335 row = ((_wc_t)wc >> 8) & 0xFF; 336 col = (_wc_t)wc & 0xFF; 337 if (!_mskanji1(row) || !_mskanji2(col)) 338 return (EILSEQ); 339 if ((ei->mode & MODE_JIS2004) == 0 || row < 0xF0) { 340 *csid = 2; 341 offset = 0x81; 342 } else { 343 *csid = 3; 344 if ((_wc_t)wc <= 0xF49E) { 345 offset = (_wc_t)wc >= 0xF29F || 346 ((_wc_t)wc >= 0xF09F && 347 (_wc_t)wc <= 0xF0FC) ? 0xED : 0xF0; 348 } else 349 offset = 0xCE; 350 } 351 row -= offset; 352 if (row >= 0x5F) 353 row -= 0x40; 354 row = row * 2 + 0x21; 355 col -= 0x1F; 356 if (col >= 0x61) 357 col -= 1; 358 if (col > 0x7E) { 359 row += 1; 360 col -= 0x5E; 361 } 362 *idx = ((_index_t)row << 8) | col; 363 } 364 365 return (0); 366 } 367 368 static __inline int 369 /*ARGSUSED*/ 370 _citrus_MSKanji_stdenc_cstowc(_MSKanjiEncodingInfo * __restrict ei, 371 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 372 { 373 uint32_t col, row; 374 int offset; 375 376 switch (csid) { 377 case 0: 378 /* ISO-646 */ 379 if (idx >= 0x80) 380 return (EILSEQ); 381 *wc = (wchar_t)idx; 382 break; 383 case 1: 384 /* kana */ 385 if (idx >= 0x80) 386 return (EILSEQ); 387 *wc = (wchar_t)idx + 0x80; 388 break; 389 case 3: 390 if ((ei->mode & MODE_JIS2004) == 0) 391 return (EILSEQ); 392 /*FALLTHROUGH*/ 393 case 2: 394 /* kanji */ 395 row = (idx >> 8); 396 if (row < 0x21) 397 return (EILSEQ); 398 if (csid == 3) { 399 if (row <= 0x2F) 400 offset = (row == 0x22 || row >= 0x26) ? 401 0xED : 0xF0; 402 else if (row >= 0x4D && row <= 0x7E) 403 offset = 0xCE; 404 else 405 return (EILSEQ); 406 } else { 407 if (row > 0x97) 408 return (EILSEQ); 409 offset = (row < 0x5F) ? 0x81 : 0xC1; 410 } 411 col = idx & 0xFF; 412 if (col < 0x21 || col > 0x7E) 413 return (EILSEQ); 414 row -= 0x21; col -= 0x21; 415 if ((row & 1) == 0) { 416 col += 0x40; 417 if (col >= 0x7F) 418 col += 1; 419 } else 420 col += 0x9F; 421 row = row / 2 + offset; 422 *wc = ((wchar_t)row << 8) | col; 423 break; 424 default: 425 return (EILSEQ); 426 } 427 428 return (0); 429 } 430 431 static __inline int 432 /*ARGSUSED*/ 433 _citrus_MSKanji_stdenc_get_state_desc_generic(_MSKanjiEncodingInfo * __restrict ei __unused, 434 _MSKanjiState * __restrict psenc, int * __restrict rstate) 435 { 436 437 *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 438 _STDENC_SDGEN_INCOMPLETE_CHAR; 439 return (0); 440 } 441 442 static int 443 /*ARGSUSED*/ 444 _citrus_MSKanji_encoding_module_init(_MSKanjiEncodingInfo * __restrict ei, 445 const void * __restrict var, size_t lenvar) 446 { 447 const char *p; 448 449 p = var; 450 memset((void *)ei, 0, sizeof(*ei)); 451 while (lenvar > 0) { 452 switch (_bcs_toupper(*p)) { 453 case 'J': 454 MATCH(JIS2004, ei->mode |= MODE_JIS2004); 455 break; 456 } 457 ++p; 458 --lenvar; 459 } 460 461 return (0); 462 } 463 464 static void 465 _citrus_MSKanji_encoding_module_uninit(_MSKanjiEncodingInfo *ei __unused) 466 { 467 468 } 469 470 /* ---------------------------------------------------------------------- 471 * public interface for stdenc 472 */ 473 474 _CITRUS_STDENC_DECLS(MSKanji); 475 _CITRUS_STDENC_DEF_OPS(MSKanji); 476 477 #include "citrus_stdenc_template.h" 478