1 /* $FreeBSD$ */ 2 /* $NetBSD: citrus_mskanji.c,v 1.13 2008/06/14 16:01:08 tnozaki Exp $ */ 3 4 /*- 5 * Copyright (c)2002 Citrus Project, 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * ja_JP.SJIS locale table for BSD4.4/rune 32 * version 1.0 33 * (C) Sin'ichiro MIYATANI / Phase One, Inc 34 * May 12, 1995 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. All advertising materials mentioning features or use of this software 45 * must display the following acknowledgement: 46 * This product includes software developed by Phase One, Inc. 47 * 4. The name of Phase One, Inc. may be used to endorse or promote products 48 * derived from this software without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 */ 62 63 64 #include <sys/cdefs.h> 65 #include <sys/types.h> 66 67 #include <assert.h> 68 #include <errno.h> 69 #include <limits.h> 70 #include <stdbool.h> 71 #include <stddef.h> 72 #include <stdio.h> 73 #include <stdlib.h> 74 #include <string.h> 75 #include <wchar.h> 76 77 #include "citrus_namespace.h" 78 #include "citrus_types.h" 79 #include "citrus_bcs.h" 80 #include "citrus_module.h" 81 #include "citrus_stdenc.h" 82 #include "citrus_mskanji.h" 83 84 85 /* ---------------------------------------------------------------------- 86 * private stuffs used by templates 87 */ 88 89 typedef struct _MSKanjiState { 90 int chlen; 91 char ch[2]; 92 } _MSKanjiState; 93 94 typedef struct { 95 int mode; 96 #define MODE_JIS2004 1 97 } _MSKanjiEncodingInfo; 98 99 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 100 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 101 102 #define _FUNCNAME(m) _citrus_MSKanji_##m 103 #define _ENCODING_INFO _MSKanjiEncodingInfo 104 #define _ENCODING_STATE _MSKanjiState 105 #define _ENCODING_MB_CUR_MAX(_ei_) 2 106 #define _ENCODING_IS_STATE_DEPENDENT 0 107 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 108 109 110 static bool 111 _mskanji1(int c) 112 { 113 114 return ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)); 115 } 116 117 static bool 118 _mskanji2(int c) 119 { 120 121 return ((c >= 0x40 && c <= 0x7e) || (c >= 0x80 && c <= 0xfc)); 122 } 123 124 static __inline void 125 /*ARGSUSED*/ 126 _citrus_MSKanji_init_state(_MSKanjiEncodingInfo * __restrict ei __unused, 127 _MSKanjiState * __restrict s) 128 { 129 130 s->chlen = 0; 131 } 132 133 #if 0 134 static __inline void 135 /*ARGSUSED*/ 136 _citrus_MSKanji_pack_state(_MSKanjiEncodingInfo * __restrict ei __unused, 137 void * __restrict pspriv, const _MSKanjiState * __restrict s) 138 { 139 140 memcpy(pspriv, (const void *)s, sizeof(*s)); 141 } 142 143 static __inline void 144 /*ARGSUSED*/ 145 _citrus_MSKanji_unpack_state(_MSKanjiEncodingInfo * __restrict ei __unused, 146 _MSKanjiState * __restrict s, const void * __restrict pspriv) 147 { 148 149 memcpy((void *)s, pspriv, sizeof(*s)); 150 } 151 #endif 152 153 static int 154 /*ARGSUSED*/ 155 _citrus_MSKanji_mbrtowc_priv(_MSKanjiEncodingInfo * __restrict ei, 156 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 157 _MSKanjiState * __restrict psenc, size_t * __restrict nresult) 158 { 159 char *s0; 160 wchar_t wchar; 161 int chlenbak, len; 162 163 s0 = *s; 164 165 if (s0 == NULL) { 166 _citrus_MSKanji_init_state(ei, psenc); 167 *nresult = 0; /* state independent */ 168 return (0); 169 } 170 171 chlenbak = psenc->chlen; 172 173 /* make sure we have the first byte in the buffer */ 174 switch (psenc->chlen) { 175 case 0: 176 if (n < 1) 177 goto restart; 178 psenc->ch[0] = *s0++; 179 psenc->chlen = 1; 180 n--; 181 break; 182 case 1: 183 break; 184 default: 185 /* illegal state */ 186 goto encoding_error; 187 } 188 189 len = _mskanji1(psenc->ch[0] & 0xff) ? 2 : 1; 190 while (psenc->chlen < len) { 191 if (n < 1) 192 goto restart; 193 psenc->ch[psenc->chlen] = *s0++; 194 psenc->chlen++; 195 n--; 196 } 197 198 *s = s0; 199 200 switch (len) { 201 case 1: 202 wchar = psenc->ch[0] & 0xff; 203 break; 204 case 2: 205 if (!_mskanji2(psenc->ch[1] & 0xff)) 206 goto encoding_error; 207 wchar = ((psenc->ch[0] & 0xff) << 8) | (psenc->ch[1] & 0xff); 208 break; 209 default: 210 /* illegal state */ 211 goto encoding_error; 212 } 213 214 psenc->chlen = 0; 215 216 if (pwc) 217 *pwc = wchar; 218 *nresult = wchar ? len - chlenbak : 0; 219 return (0); 220 221 encoding_error: 222 psenc->chlen = 0; 223 *nresult = (size_t)-1; 224 return (EILSEQ); 225 226 restart: 227 *nresult = (size_t)-2; 228 *s = s0; 229 return (0); 230 } 231 232 233 static int 234 _citrus_MSKanji_wcrtomb_priv(_MSKanjiEncodingInfo * __restrict ei __unused, 235 char * __restrict s, size_t n, wchar_t wc, 236 _MSKanjiState * __restrict psenc __unused, size_t * __restrict nresult) 237 { 238 int ret; 239 240 /* check invalid sequence */ 241 if (wc & ~0xffff) { 242 ret = EILSEQ; 243 goto err; 244 } 245 246 if (wc & 0xff00) { 247 if (n < 2) { 248 ret = E2BIG; 249 goto err; 250 } 251 252 s[0] = (wc >> 8) & 0xff; 253 s[1] = wc & 0xff; 254 if (!_mskanji1(s[0] & 0xff) || !_mskanji2(s[1] & 0xff)) { 255 ret = EILSEQ; 256 goto err; 257 } 258 259 *nresult = 2; 260 return (0); 261 } else { 262 if (n < 1) { 263 ret = E2BIG; 264 goto err; 265 } 266 267 s[0] = wc & 0xff; 268 if (_mskanji1(s[0] & 0xff)) { 269 ret = EILSEQ; 270 goto err; 271 } 272 273 *nresult = 1; 274 return (0); 275 } 276 277 err: 278 *nresult = (size_t)-1; 279 return (ret); 280 } 281 282 283 static __inline int 284 /*ARGSUSED*/ 285 _citrus_MSKanji_stdenc_wctocs(_MSKanjiEncodingInfo * __restrict ei, 286 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 287 { 288 _index_t col, row; 289 int offset; 290 291 if ((_wc_t)wc < 0x80) { 292 /* ISO-646 */ 293 *csid = 0; 294 *idx = (_index_t)wc; 295 } else if ((_wc_t)wc < 0x100) { 296 /* KANA */ 297 *csid = 1; 298 *idx = (_index_t)wc & 0x7F; 299 } else { 300 /* Kanji (containing Gaiji zone) */ 301 /* 302 * 94^2 zone (contains a part of Gaiji (0xED40 - 0xEEFC)): 303 * 0x8140 - 0x817E -> 0x2121 - 0x215F 304 * 0x8180 - 0x819E -> 0x2160 - 0x217E 305 * 0x819F - 0x81FC -> 0x2221 - 0x227E 306 * 307 * 0x8240 - 0x827E -> 0x2321 - 0x235F 308 * ... 309 * 0x9F9F - 0x9FFc -> 0x5E21 - 0x5E7E 310 * 311 * 0xE040 - 0xE07E -> 0x5F21 - 0x5F5F 312 * ... 313 * 0xEF9F - 0xEFFC -> 0x7E21 - 0x7E7E 314 * 315 * extended Gaiji zone: 316 * 0xF040 - 0xFCFC 317 * 318 * JIS X0213-plane2: 319 * 0xF040 - 0xF09E -> 0x2121 - 0x217E 320 * 0xF140 - 0xF19E -> 0x2321 - 0x237E 321 * ... 322 * 0xF240 - 0xF29E -> 0x2521 - 0x257E 323 * 324 * 0xF09F - 0xF0FC -> 0x2821 - 0x287E 325 * 0xF29F - 0xF2FC -> 0x2C21 - 0x2C7E 326 * ... 327 * 0xF44F - 0xF49E -> 0x2F21 - 0x2F7E 328 * 329 * 0xF49F - 0xF4FC -> 0x6E21 - 0x6E7E 330 * ... 331 * 0xFC9F - 0xFCFC -> 0x7E21 - 0x7E7E 332 */ 333 row = ((_wc_t)wc >> 8) & 0xFF; 334 col = (_wc_t)wc & 0xFF; 335 if (!_mskanji1(row) || !_mskanji2(col)) 336 return (EILSEQ); 337 if ((ei->mode & MODE_JIS2004) == 0 || row < 0xF0) { 338 *csid = 2; 339 offset = 0x81; 340 } else { 341 *csid = 3; 342 if ((_wc_t)wc <= 0xF49E) { 343 offset = (_wc_t)wc >= 0xF29F || 344 ((_wc_t)wc >= 0xF09F && 345 (_wc_t)wc <= 0xF0FC) ? 0xED : 0xF0; 346 } else 347 offset = 0xCE; 348 } 349 row -= offset; 350 if (row >= 0x5F) 351 row -= 0x40; 352 row = row * 2 + 0x21; 353 col -= 0x1F; 354 if (col >= 0x61) 355 col -= 1; 356 if (col > 0x7E) { 357 row += 1; 358 col -= 0x5E; 359 } 360 *idx = ((_index_t)row << 8) | col; 361 } 362 363 return (0); 364 } 365 366 static __inline int 367 /*ARGSUSED*/ 368 _citrus_MSKanji_stdenc_cstowc(_MSKanjiEncodingInfo * __restrict ei, 369 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 370 { 371 uint32_t col, row; 372 int offset; 373 374 switch (csid) { 375 case 0: 376 /* ISO-646 */ 377 if (idx >= 0x80) 378 return (EILSEQ); 379 *wc = (wchar_t)idx; 380 break; 381 case 1: 382 /* kana */ 383 if (idx >= 0x80) 384 return (EILSEQ); 385 *wc = (wchar_t)idx + 0x80; 386 break; 387 case 3: 388 if ((ei->mode & MODE_JIS2004) == 0) 389 return (EILSEQ); 390 /*FALLTHROUGH*/ 391 case 2: 392 /* kanji */ 393 row = (idx >> 8); 394 if (row < 0x21) 395 return (EILSEQ); 396 if (csid == 3) { 397 if (row <= 0x2F) 398 offset = (row == 0x22 || row >= 0x26) ? 399 0xED : 0xF0; 400 else if (row >= 0x4D && row <= 0x7E) 401 offset = 0xCE; 402 else 403 return (EILSEQ); 404 } else { 405 if (row > 0x97) 406 return (EILSEQ); 407 offset = (row < 0x5F) ? 0x81 : 0xC1; 408 } 409 col = idx & 0xFF; 410 if (col < 0x21 || col > 0x7E) 411 return (EILSEQ); 412 row -= 0x21; col -= 0x21; 413 if ((row & 1) == 0) { 414 col += 0x40; 415 if (col >= 0x7F) 416 col += 1; 417 } else 418 col += 0x9F; 419 row = row / 2 + offset; 420 *wc = ((wchar_t)row << 8) | col; 421 break; 422 default: 423 return (EILSEQ); 424 } 425 426 return (0); 427 } 428 429 static __inline int 430 /*ARGSUSED*/ 431 _citrus_MSKanji_stdenc_get_state_desc_generic(_MSKanjiEncodingInfo * __restrict ei __unused, 432 _MSKanjiState * __restrict psenc, int * __restrict rstate) 433 { 434 435 *rstate = (psenc->chlen == 0) ? _STDENC_SDGEN_INITIAL : 436 _STDENC_SDGEN_INCOMPLETE_CHAR; 437 return (0); 438 } 439 440 static int 441 /*ARGSUSED*/ 442 _citrus_MSKanji_encoding_module_init(_MSKanjiEncodingInfo * __restrict ei, 443 const void * __restrict var, size_t lenvar) 444 { 445 const char *p; 446 447 p = var; 448 memset((void *)ei, 0, sizeof(*ei)); 449 while (lenvar > 0) { 450 switch (_bcs_toupper(*p)) { 451 case 'J': 452 MATCH(JIS2004, ei->mode |= MODE_JIS2004); 453 break; 454 } 455 ++p; 456 --lenvar; 457 } 458 459 return (0); 460 } 461 462 static void 463 _citrus_MSKanji_encoding_module_uninit(_MSKanjiEncodingInfo *ei __unused) 464 { 465 466 } 467 468 /* ---------------------------------------------------------------------- 469 * public interface for stdenc 470 */ 471 472 _CITRUS_STDENC_DECLS(MSKanji); 473 _CITRUS_STDENC_DEF_OPS(MSKanji); 474 475 #include "citrus_stdenc_template.h" 476