1 /* $FreeBSD$ */ 2 /* $NetBSD: citrus_hz.c,v 1.2 2008/06/14 16:01:07 tnozaki Exp $ */ 3 4 /*- 5 * Copyright (c)2004, 2006 Citrus Project, 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30 31 #include <sys/cdefs.h> 32 #include <sys/queue.h> 33 #include <sys/types.h> 34 35 #include <assert.h> 36 #include <errno.h> 37 #include <limits.h> 38 #include <stddef.h> 39 #include <stdint.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <wchar.h> 43 44 #include "citrus_namespace.h" 45 #include "citrus_types.h" 46 #include "citrus_bcs.h" 47 #include "citrus_module.h" 48 #include "citrus_stdenc.h" 49 50 #include "citrus_hz.h" 51 #include "citrus_prop.h" 52 53 /* 54 * wchar_t mapping: 55 * 56 * CTRL/ASCII 00000000 00000000 00000000 gxxxxxxx 57 * GB2312 00000000 00000000 0xxxxxxx gxxxxxxx 58 * 94/96*n (~M) 0mmmmmmm 0xxxxxxx 0xxxxxxx gxxxxxxx 59 */ 60 61 #define ESCAPE_CHAR '~' 62 63 typedef enum { 64 CTRL = 0, ASCII = 1, GB2312 = 2, CS94 = 3, CS96 = 4 65 } charset_t; 66 67 typedef struct { 68 int start; 69 int end; 70 int width; 71 } range_t; 72 73 static const range_t ranges[] = { 74 #define RANGE(start, end) { start, end, (end - start) + 1 } 75 /* CTRL */ RANGE(0x00, 0x1F), 76 /* ASCII */ RANGE(0x20, 0x7F), 77 /* GB2312 */ RANGE(0x21, 0x7E), 78 /* CS94 */ RANGE(0x21, 0x7E), 79 /* CS96 */ RANGE(0x20, 0x7F), 80 #undef RANGE 81 }; 82 83 typedef struct escape_t escape_t; 84 typedef struct { 85 charset_t charset; 86 escape_t *escape; 87 ssize_t length; 88 #define ROWCOL_MAX 3 89 } graphic_t; 90 91 typedef TAILQ_HEAD(escape_list, escape_t) escape_list; 92 struct escape_t { 93 TAILQ_ENTRY(escape_t) entry; 94 escape_list *set; 95 graphic_t *left; 96 graphic_t *right; 97 int ch; 98 }; 99 100 #define GL(escape) ((escape)->left) 101 #define GR(escape) ((escape)->right) 102 #define SET(escape) ((escape)->set) 103 #define ESC(escape) ((escape)->ch) 104 #define INIT(escape) (TAILQ_FIRST(SET(escape))) 105 106 static __inline escape_t * 107 find_escape(escape_list *set, int ch) 108 { 109 escape_t *escape; 110 111 TAILQ_FOREACH(escape, set, entry) { 112 if (ESC(escape) == ch) 113 break; 114 } 115 116 return (escape); 117 } 118 119 typedef struct { 120 escape_list e0; 121 escape_list e1; 122 graphic_t *ascii; 123 graphic_t *gb2312; 124 } _HZEncodingInfo; 125 126 #define E0SET(ei) (&(ei)->e0) 127 #define E1SET(ei) (&(ei)->e1) 128 #define INIT0(ei) (TAILQ_FIRST(E0SET(ei))) 129 #define INIT1(ei) (TAILQ_FIRST(E1SET(ei))) 130 131 typedef struct { 132 escape_t *inuse; 133 int chlen; 134 char ch[ROWCOL_MAX]; 135 } _HZState; 136 137 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 138 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 139 140 #define _FUNCNAME(m) _citrus_HZ_##m 141 #define _ENCODING_INFO _HZEncodingInfo 142 #define _ENCODING_STATE _HZState 143 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 144 #define _ENCODING_IS_STATE_DEPENDENT 1 145 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->inuse == NULL) 146 147 static __inline void 148 _citrus_HZ_init_state(_HZEncodingInfo * __restrict ei, 149 _HZState * __restrict psenc) 150 { 151 152 psenc->chlen = 0; 153 psenc->inuse = INIT0(ei); 154 } 155 156 #if 0 157 static __inline void 158 /*ARGSUSED*/ 159 _citrus_HZ_pack_state(_HZEncodingInfo * __restrict ei __unused, 160 void *__restrict pspriv, const _HZState * __restrict psenc) 161 { 162 163 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 164 } 165 166 static __inline void 167 /*ARGSUSED*/ 168 _citrus_HZ_unpack_state(_HZEncodingInfo * __restrict ei __unused, 169 _HZState * __restrict psenc, const void * __restrict pspriv) 170 { 171 172 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 173 } 174 #endif 175 176 static int 177 _citrus_HZ_mbrtowc_priv(_HZEncodingInfo * __restrict ei, 178 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 179 _HZState * __restrict psenc, size_t * __restrict nresult) 180 { 181 escape_t *candidate, *init; 182 graphic_t *graphic; 183 const range_t *range; 184 char *s0; 185 wchar_t wc; 186 int bit, ch, head, len, tail; 187 188 if (*s == NULL) { 189 _citrus_HZ_init_state(ei, psenc); 190 *nresult = 1; 191 return (0); 192 } 193 s0 = *s; 194 if (psenc->chlen < 0 || psenc->inuse == NULL) 195 return (EINVAL); 196 197 wc = (wchar_t)0; 198 bit = head = tail = 0; 199 graphic = NULL; 200 for (len = 0; len <= MB_LEN_MAX;) { 201 if (psenc->chlen == tail) { 202 if (n-- < 1) { 203 *s = s0; 204 *nresult = (size_t)-2; 205 return (0); 206 } 207 psenc->ch[psenc->chlen++] = *s0++; 208 ++len; 209 } 210 ch = (unsigned char)psenc->ch[tail++]; 211 if (tail == 1) { 212 if ((ch & ~0x80) <= 0x1F) { 213 if (psenc->inuse != INIT0(ei)) 214 break; 215 wc = (wchar_t)ch; 216 goto done; 217 } 218 if (ch & 0x80) { 219 graphic = GR(psenc->inuse); 220 bit = 0x80; 221 ch &= ~0x80; 222 } else { 223 graphic = GL(psenc->inuse); 224 if (ch == ESCAPE_CHAR) 225 continue; 226 bit = 0x0; 227 } 228 if (graphic == NULL) 229 break; 230 } else if (tail == 2 && psenc->ch[0] == ESCAPE_CHAR) { 231 if (tail < psenc->chlen) 232 return (EINVAL); 233 if (ch == ESCAPE_CHAR) { 234 ++head; 235 } else if (ch == '\n') { 236 if (psenc->inuse != INIT0(ei)) 237 break; 238 tail = psenc->chlen = 0; 239 continue; 240 } else { 241 candidate = NULL; 242 init = INIT0(ei); 243 if (psenc->inuse == init) { 244 init = INIT1(ei); 245 } else if (INIT(psenc->inuse) == init) { 246 if (ESC(init) != ch) 247 break; 248 candidate = init; 249 } 250 if (candidate == NULL) { 251 candidate = find_escape( 252 SET(psenc->inuse), ch); 253 if (candidate == NULL) { 254 if (init == NULL || 255 ESC(init) != ch) 256 break; 257 candidate = init; 258 } 259 } 260 psenc->inuse = candidate; 261 tail = psenc->chlen = 0; 262 continue; 263 } 264 } else if (ch & 0x80) { 265 if (graphic != GR(psenc->inuse)) 266 break; 267 ch &= ~0x80; 268 } else { 269 if (graphic != GL(psenc->inuse)) 270 break; 271 } 272 range = &ranges[(size_t)graphic->charset]; 273 if (range->start > ch || range->end < ch) 274 break; 275 wc <<= 8; 276 wc |= ch; 277 if (graphic->length == (tail - head)) { 278 if (graphic->charset > GB2312) 279 bit |= ESC(psenc->inuse) << 24; 280 wc |= bit; 281 goto done; 282 } 283 } 284 *nresult = (size_t)-1; 285 return (EILSEQ); 286 done: 287 if (tail < psenc->chlen) 288 return (EINVAL); 289 *s = s0; 290 if (pwc != NULL) 291 *pwc = wc; 292 psenc->chlen = 0; 293 *nresult = (wc == 0) ? 0 : len; 294 295 return (0); 296 } 297 298 static int 299 _citrus_HZ_wcrtomb_priv(_HZEncodingInfo * __restrict ei, 300 char * __restrict s, size_t n, wchar_t wc, 301 _HZState * __restrict psenc, size_t * __restrict nresult) 302 { 303 escape_t *candidate, *init; 304 graphic_t *graphic; 305 const range_t *range; 306 size_t len; 307 int bit, ch; 308 309 if (psenc->chlen != 0 || psenc->inuse == NULL) 310 return (EINVAL); 311 if (wc & 0x80) { 312 bit = 0x80; 313 wc &= ~0x80; 314 } else { 315 bit = 0x0; 316 } 317 if ((uint32_t)wc <= 0x1F) { 318 candidate = INIT0(ei); 319 graphic = (bit == 0) ? candidate->left : candidate->right; 320 if (graphic == NULL) 321 goto ilseq; 322 range = &ranges[(size_t)CTRL]; 323 len = 1; 324 } else if ((uint32_t)wc <= 0x7F) { 325 graphic = ei->ascii; 326 if (graphic == NULL) 327 goto ilseq; 328 candidate = graphic->escape; 329 range = &ranges[(size_t)graphic->charset]; 330 len = graphic->length; 331 } else if ((uint32_t)wc <= 0x7F7F) { 332 graphic = ei->gb2312; 333 if (graphic == NULL) 334 goto ilseq; 335 candidate = graphic->escape; 336 range = &ranges[(size_t)graphic->charset]; 337 len = graphic->length; 338 } else { 339 ch = (wc >> 24) & 0xFF; 340 candidate = find_escape(E0SET(ei), ch); 341 if (candidate == NULL) { 342 candidate = find_escape(E1SET(ei), ch); 343 if (candidate == NULL) 344 goto ilseq; 345 } 346 wc &= ~0xFF000000; 347 graphic = (bit == 0) ? candidate->left : candidate->right; 348 if (graphic == NULL) 349 goto ilseq; 350 range = &ranges[(size_t)graphic->charset]; 351 len = graphic->length; 352 } 353 if (psenc->inuse != candidate) { 354 init = INIT0(ei); 355 if (SET(psenc->inuse) == SET(candidate)) { 356 if (INIT(psenc->inuse) != init || 357 psenc->inuse == init || candidate == init) 358 init = NULL; 359 } else if (candidate == (init = INIT(candidate))) { 360 init = NULL; 361 } 362 if (init != NULL) { 363 if (n < 2) 364 return (E2BIG); 365 n -= 2; 366 psenc->ch[psenc->chlen++] = ESCAPE_CHAR; 367 psenc->ch[psenc->chlen++] = ESC(init); 368 } 369 if (n < 2) 370 return (E2BIG); 371 n -= 2; 372 psenc->ch[psenc->chlen++] = ESCAPE_CHAR; 373 psenc->ch[psenc->chlen++] = ESC(candidate); 374 psenc->inuse = candidate; 375 } 376 if (n < len) 377 return (E2BIG); 378 while (len-- > 0) { 379 ch = (wc >> (len * 8)) & 0xFF; 380 if (range->start > ch || range->end < ch) 381 goto ilseq; 382 psenc->ch[psenc->chlen++] = ch | bit; 383 } 384 memcpy(s, psenc->ch, psenc->chlen); 385 *nresult = psenc->chlen; 386 psenc->chlen = 0; 387 388 return (0); 389 390 ilseq: 391 *nresult = (size_t)-1; 392 return (EILSEQ); 393 } 394 395 static __inline int 396 _citrus_HZ_put_state_reset(_HZEncodingInfo * __restrict ei, 397 char * __restrict s, size_t n, _HZState * __restrict psenc, 398 size_t * __restrict nresult) 399 { 400 escape_t *candidate; 401 402 if (psenc->chlen != 0 || psenc->inuse == NULL) 403 return (EINVAL); 404 candidate = INIT0(ei); 405 if (psenc->inuse != candidate) { 406 if (n < 2) 407 return (E2BIG); 408 n -= 2; 409 psenc->ch[psenc->chlen++] = ESCAPE_CHAR; 410 psenc->ch[psenc->chlen++] = ESC(candidate); 411 } 412 if (n < 1) 413 return (E2BIG); 414 if (psenc->chlen > 0) 415 memcpy(s, psenc->ch, psenc->chlen); 416 *nresult = psenc->chlen; 417 _citrus_HZ_init_state(ei, psenc); 418 419 return (0); 420 } 421 422 static __inline int 423 _citrus_HZ_stdenc_get_state_desc_generic(_HZEncodingInfo * __restrict ei, 424 _HZState * __restrict psenc, int * __restrict rstate) 425 { 426 427 if (psenc->chlen < 0 || psenc->inuse == NULL) 428 return (EINVAL); 429 *rstate = (psenc->chlen == 0) 430 ? ((psenc->inuse == INIT0(ei)) 431 ? _STDENC_SDGEN_INITIAL 432 : _STDENC_SDGEN_STABLE) 433 : ((psenc->ch[0] == ESCAPE_CHAR) 434 ? _STDENC_SDGEN_INCOMPLETE_SHIFT 435 : _STDENC_SDGEN_INCOMPLETE_CHAR); 436 437 return (0); 438 } 439 440 static __inline int 441 /*ARGSUSED*/ 442 _citrus_HZ_stdenc_wctocs(_HZEncodingInfo * __restrict ei __unused, 443 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 444 { 445 int bit; 446 447 if (wc & 0x80) { 448 bit = 0x80; 449 wc &= ~0x80; 450 } else 451 bit = 0x0; 452 if ((uint32_t)wc <= 0x7F) { 453 *csid = (_csid_t)bit; 454 *idx = (_index_t)wc; 455 } else if ((uint32_t)wc <= 0x7F7F) { 456 *csid = (_csid_t)(bit | 0x8000); 457 *idx = (_index_t)wc; 458 } else { 459 *csid = (_index_t)(wc & ~0x00FFFF7F); 460 *idx = (_csid_t)(wc & 0x00FFFF7F); 461 } 462 463 return (0); 464 } 465 466 static __inline int 467 /*ARGSUSED*/ 468 _citrus_HZ_stdenc_cstowc(_HZEncodingInfo * __restrict ei __unused, 469 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 470 { 471 472 *wc = (wchar_t)idx; 473 switch (csid) { 474 case 0x80: 475 case 0x8080: 476 *wc |= (wchar_t)0x80; 477 /*FALLTHROUGH*/ 478 case 0x0: 479 case 0x8000: 480 break; 481 default: 482 *wc |= (wchar_t)csid; 483 } 484 485 return (0); 486 } 487 488 static void 489 _citrus_HZ_encoding_module_uninit(_HZEncodingInfo *ei) 490 { 491 escape_t *escape; 492 493 while ((escape = TAILQ_FIRST(E0SET(ei))) != NULL) { 494 TAILQ_REMOVE(E0SET(ei), escape, entry); 495 free(GL(escape)); 496 free(GR(escape)); 497 free(escape); 498 } 499 while ((escape = TAILQ_FIRST(E1SET(ei))) != NULL) { 500 TAILQ_REMOVE(E1SET(ei), escape, entry); 501 free(GL(escape)); 502 free(GR(escape)); 503 free(escape); 504 } 505 } 506 507 static int 508 _citrus_HZ_parse_char(void *context, const char *name __unused, const char *s) 509 { 510 escape_t *escape; 511 void **p; 512 513 p = (void **)context; 514 escape = (escape_t *)p[0]; 515 if (escape->ch != '\0') 516 return (EINVAL); 517 escape->ch = *s++; 518 if (escape->ch == ESCAPE_CHAR || *s != '\0') 519 return (EINVAL); 520 521 return (0); 522 } 523 524 static int 525 _citrus_HZ_parse_graphic(void *context, const char *name, const char *s) 526 { 527 _HZEncodingInfo *ei; 528 escape_t *escape; 529 graphic_t *graphic; 530 void **p; 531 532 p = (void **)context; 533 escape = (escape_t *)p[0]; 534 ei = (_HZEncodingInfo *)p[1]; 535 graphic = calloc(1, sizeof(*graphic)); 536 if (graphic == NULL) 537 return (ENOMEM); 538 if (strcmp("GL", name) == 0) { 539 if (GL(escape) != NULL) 540 goto release; 541 GL(escape) = graphic; 542 } else if (strcmp("GR", name) == 0) { 543 if (GR(escape) != NULL) 544 goto release; 545 GR(escape) = graphic; 546 } else { 547 release: 548 free(graphic); 549 return (EINVAL); 550 } 551 graphic->escape = escape; 552 if (_bcs_strncasecmp("ASCII", s, 5) == 0) { 553 if (s[5] != '\0') 554 return (EINVAL); 555 graphic->charset = ASCII; 556 graphic->length = 1; 557 ei->ascii = graphic; 558 return (0); 559 } else if (_bcs_strncasecmp("GB2312", s, 6) == 0) { 560 if (s[6] != '\0') 561 return (EINVAL); 562 graphic->charset = GB2312; 563 graphic->length = 2; 564 ei->gb2312 = graphic; 565 return (0); 566 } else if (strncmp("94*", s, 3) == 0) 567 graphic->charset = CS94; 568 else if (strncmp("96*", s, 3) == 0) 569 graphic->charset = CS96; 570 else 571 return (EINVAL); 572 s += 3; 573 switch(*s) { 574 case '1': case '2': case '3': 575 graphic->length = (size_t)(*s - '0'); 576 if (*++s == '\0') 577 break; 578 /*FALLTHROUGH*/ 579 default: 580 return (EINVAL); 581 } 582 return (0); 583 } 584 585 static const _citrus_prop_hint_t escape_hints[] = { 586 _CITRUS_PROP_HINT_STR("CH", &_citrus_HZ_parse_char), 587 _CITRUS_PROP_HINT_STR("GL", &_citrus_HZ_parse_graphic), 588 _CITRUS_PROP_HINT_STR("GR", &_citrus_HZ_parse_graphic), 589 _CITRUS_PROP_HINT_END 590 }; 591 592 static int 593 _citrus_HZ_parse_escape(void *context, const char *name, const char *s) 594 { 595 _HZEncodingInfo *ei; 596 escape_t *escape; 597 void *p[2]; 598 599 ei = (_HZEncodingInfo *)context; 600 escape = calloc(1, sizeof(*escape)); 601 if (escape == NULL) 602 return (EINVAL); 603 if (strcmp("0", name) == 0) { 604 escape->set = E0SET(ei); 605 TAILQ_INSERT_TAIL(E0SET(ei), escape, entry); 606 } else if (strcmp("1", name) == 0) { 607 escape->set = E1SET(ei); 608 TAILQ_INSERT_TAIL(E1SET(ei), escape, entry); 609 } else { 610 free(escape); 611 return (EINVAL); 612 } 613 p[0] = (void *)escape; 614 p[1] = (void *)ei; 615 return (_citrus_prop_parse_variable( 616 escape_hints, (void *)&p[0], s, strlen(s))); 617 } 618 619 static const _citrus_prop_hint_t root_hints[] = { 620 _CITRUS_PROP_HINT_STR("0", &_citrus_HZ_parse_escape), 621 _CITRUS_PROP_HINT_STR("1", &_citrus_HZ_parse_escape), 622 _CITRUS_PROP_HINT_END 623 }; 624 625 static int 626 _citrus_HZ_encoding_module_init(_HZEncodingInfo * __restrict ei, 627 const void * __restrict var, size_t lenvar) 628 { 629 int errnum; 630 631 memset(ei, 0, sizeof(*ei)); 632 TAILQ_INIT(E0SET(ei)); 633 TAILQ_INIT(E1SET(ei)); 634 errnum = _citrus_prop_parse_variable( 635 root_hints, (void *)ei, var, lenvar); 636 if (errnum != 0) 637 _citrus_HZ_encoding_module_uninit(ei); 638 return (errnum); 639 } 640 641 /* ---------------------------------------------------------------------- 642 * public interface for stdenc 643 */ 644 645 _CITRUS_STDENC_DECLS(HZ); 646 _CITRUS_STDENC_DEF_OPS(HZ); 647 648 #include "citrus_stdenc_template.h" 649