1 /* $FreeBSD$ */ 2 /* $NetBSD: citrus_hz.c,v 1.2 2008/06/14 16:01:07 tnozaki Exp $ */ 3 4 /*- 5 * Copyright (c)2004, 2006 Citrus Project, 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 */ 30 31 #include <sys/cdefs.h> 32 #include <sys/queue.h> 33 #include <sys/types.h> 34 35 #include <assert.h> 36 #include <errno.h> 37 #include <limits.h> 38 #include <stddef.h> 39 #include <stdint.h> 40 #include <stdlib.h> 41 #include <string.h> 42 #include <wchar.h> 43 44 #include "citrus_namespace.h" 45 #include "citrus_types.h" 46 #include "citrus_bcs.h" 47 #include "citrus_module.h" 48 #include "citrus_stdenc.h" 49 50 #include "citrus_hz.h" 51 #include "citrus_prop.h" 52 53 /* 54 * wchar_t mapping: 55 * 56 * CTRL/ASCII 00000000 00000000 00000000 gxxxxxxx 57 * GB2312 00000000 00000000 0xxxxxxx gxxxxxxx 58 * 94/96*n (~M) 0mmmmmmm 0xxxxxxx 0xxxxxxx gxxxxxxx 59 */ 60 61 #define ESCAPE_CHAR '~' 62 63 typedef enum { 64 CTRL = 0, ASCII = 1, GB2312 = 2, CS94 = 3, CS96 = 4 65 } charset_t; 66 67 typedef struct { 68 int end; 69 int start; 70 int width; 71 } range_t; 72 73 static const range_t ranges[] = { 74 #define RANGE(start, end) { start, end, (end - start) + 1 } 75 /* CTRL */ RANGE(0x00, 0x1F), 76 /* ASCII */ RANGE(0x20, 0x7F), 77 /* GB2312 */ RANGE(0x21, 0x7E), 78 /* CS94 */ RANGE(0x21, 0x7E), 79 /* CS96 */ RANGE(0x20, 0x7F), 80 #undef RANGE 81 }; 82 83 typedef struct escape_t escape_t; 84 typedef struct { 85 charset_t charset; 86 escape_t *escape; 87 ssize_t length; 88 #define ROWCOL_MAX 3 89 } graphic_t; 90 91 typedef TAILQ_HEAD(escape_list, escape_t) escape_list; 92 struct escape_t { 93 TAILQ_ENTRY(escape_t) entry; 94 escape_list *set; 95 graphic_t *left; 96 graphic_t *right; 97 int ch; 98 }; 99 100 #define GL(escape) ((escape)->left) 101 #define GR(escape) ((escape)->right) 102 #define SET(escape) ((escape)->set) 103 #define ESC(escape) ((escape)->ch) 104 #define INIT(escape) (TAILQ_FIRST(SET(escape))) 105 106 static __inline escape_t * 107 find_escape(escape_list *set, int ch) 108 { 109 escape_t *escape; 110 111 TAILQ_FOREACH(escape, set, entry) { 112 if (ESC(escape) == ch) 113 break; 114 } 115 116 return (escape); 117 } 118 119 typedef struct { 120 escape_list e0; 121 escape_list e1; 122 graphic_t *ascii; 123 graphic_t *gb2312; 124 } _HZEncodingInfo; 125 126 #define E0SET(ei) (&(ei)->e0) 127 #define E1SET(ei) (&(ei)->e1) 128 #define INIT0(ei) (TAILQ_FIRST(E0SET(ei))) 129 #define INIT1(ei) (TAILQ_FIRST(E1SET(ei))) 130 131 typedef struct { 132 escape_t *inuse; 133 int chlen; 134 char ch[ROWCOL_MAX]; 135 } _HZState; 136 137 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 138 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 139 140 #define _FUNCNAME(m) _citrus_HZ_##m 141 #define _ENCODING_INFO _HZEncodingInfo 142 #define _ENCODING_STATE _HZState 143 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 144 #define _ENCODING_IS_STATE_DEPENDENT 1 145 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->inuse == NULL) 146 147 static __inline void 148 _citrus_HZ_init_state(_HZEncodingInfo * __restrict ei, 149 _HZState * __restrict psenc) 150 { 151 152 psenc->chlen = 0; 153 psenc->inuse = INIT0(ei); 154 } 155 156 static __inline void 157 /*ARGSUSED*/ 158 _citrus_HZ_pack_state(_HZEncodingInfo * __restrict ei __unused, 159 void *__restrict pspriv, const _HZState * __restrict psenc) 160 { 161 162 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 163 } 164 165 static __inline void 166 /*ARGSUSED*/ 167 _citrus_HZ_unpack_state(_HZEncodingInfo * __restrict ei __unused, 168 _HZState * __restrict psenc, const void * __restrict pspriv) 169 { 170 171 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 172 } 173 174 static int 175 _citrus_HZ_mbrtowc_priv(_HZEncodingInfo * __restrict ei, 176 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 177 _HZState * __restrict psenc, size_t * __restrict nresult) 178 { 179 escape_t *candidate, *init; 180 graphic_t *graphic; 181 const range_t *range; 182 char *s0; 183 wchar_t wc; 184 int bit, ch, head, len, tail; 185 186 if (*s == NULL) { 187 _citrus_HZ_init_state(ei, psenc); 188 *nresult = 1; 189 return (0); 190 } 191 s0 = *s; 192 if (psenc->chlen < 0 || psenc->inuse == NULL) 193 return (EINVAL); 194 195 wc = (wchar_t)0; 196 bit = head = tail = 0; 197 graphic = NULL; 198 for (len = 0; len <= MB_LEN_MAX;) { 199 if (psenc->chlen == tail) { 200 if (n-- < 1) { 201 *s = s0; 202 *nresult = (size_t)-2; 203 return (0); 204 } 205 psenc->ch[psenc->chlen++] = *s0++; 206 ++len; 207 } 208 ch = (unsigned char)psenc->ch[tail++]; 209 if (tail == 1) { 210 if ((ch & ~0x80) <= 0x1F) { 211 if (psenc->inuse != INIT0(ei)) 212 break; 213 wc = (wchar_t)ch; 214 goto done; 215 } 216 if (ch & 0x80) { 217 graphic = GR(psenc->inuse); 218 bit = 0x80; 219 ch &= ~0x80; 220 } else { 221 graphic = GL(psenc->inuse); 222 if (ch == ESCAPE_CHAR) 223 continue; 224 bit = 0x0; 225 } 226 if (graphic == NULL) 227 break; 228 } else if (tail == 2 && psenc->ch[0] == ESCAPE_CHAR) { 229 if (tail < psenc->chlen) 230 return (EINVAL); 231 if (ch == ESCAPE_CHAR) { 232 ++head; 233 } else if (ch == '\n') { 234 if (psenc->inuse != INIT0(ei)) 235 break; 236 tail = psenc->chlen = 0; 237 continue; 238 } else { 239 candidate = NULL; 240 init = INIT0(ei); 241 if (psenc->inuse == init) { 242 init = INIT1(ei); 243 } else if (INIT(psenc->inuse) == init) { 244 if (ESC(init) != ch) 245 break; 246 candidate = init; 247 } 248 if (candidate == NULL) { 249 candidate = find_escape( 250 SET(psenc->inuse), ch); 251 if (candidate == NULL) { 252 if (init == NULL || 253 ESC(init) != ch) 254 break; 255 candidate = init; 256 } 257 } 258 psenc->inuse = candidate; 259 tail = psenc->chlen = 0; 260 continue; 261 } 262 } else if (ch & 0x80) { 263 if (graphic != GR(psenc->inuse)) 264 break; 265 ch &= ~0x80; 266 } else { 267 if (graphic != GL(psenc->inuse)) 268 break; 269 } 270 range = &ranges[(size_t)graphic->charset]; 271 if (range->start > ch || range->end < ch) 272 break; 273 wc <<= 8; 274 wc |= ch; 275 if (graphic->length == (tail - head)) { 276 if (graphic->charset > GB2312) 277 bit |= ESC(psenc->inuse) << 24; 278 wc |= bit; 279 goto done; 280 } 281 } 282 *nresult = (size_t)-1; 283 return (EILSEQ); 284 done: 285 if (tail < psenc->chlen) 286 return (EINVAL); 287 *s = s0; 288 if (pwc != NULL) 289 *pwc = wc; 290 psenc->chlen = 0; 291 *nresult = (wc == 0) ? 0 : len; 292 293 return (0); 294 } 295 296 static int 297 _citrus_HZ_wcrtomb_priv(_HZEncodingInfo * __restrict ei, 298 char * __restrict s, size_t n, wchar_t wc, 299 _HZState * __restrict psenc, size_t * __restrict nresult) 300 { 301 escape_t *candidate, *init; 302 graphic_t *graphic; 303 const range_t *range; 304 size_t len; 305 int bit, ch; 306 307 if (psenc->chlen != 0 || psenc->inuse == NULL) 308 return (EINVAL); 309 if (wc & 0x80) { 310 bit = 0x80; 311 wc &= ~0x80; 312 } else { 313 bit = 0x0; 314 } 315 if ((uint32_t)wc <= 0x1F) { 316 candidate = INIT0(ei); 317 graphic = (bit == 0) ? candidate->left : candidate->right; 318 if (graphic == NULL) 319 goto ilseq; 320 range = &ranges[(size_t)CTRL]; 321 len = 1; 322 } else if ((uint32_t)wc <= 0x7F) { 323 graphic = ei->ascii; 324 if (graphic == NULL) 325 goto ilseq; 326 candidate = graphic->escape; 327 range = &ranges[(size_t)graphic->charset]; 328 len = graphic->length; 329 } else if ((uint32_t)wc <= 0x7F7F) { 330 graphic = ei->gb2312; 331 if (graphic == NULL) 332 goto ilseq; 333 candidate = graphic->escape; 334 range = &ranges[(size_t)graphic->charset]; 335 len = graphic->length; 336 } else { 337 ch = (wc >> 24) & 0xFF; 338 candidate = find_escape(E0SET(ei), ch); 339 if (candidate == NULL) { 340 candidate = find_escape(E1SET(ei), ch); 341 if (candidate == NULL) 342 goto ilseq; 343 } 344 wc &= ~0xFF000000; 345 graphic = (bit == 0) ? candidate->left : candidate->right; 346 if (graphic == NULL) 347 goto ilseq; 348 range = &ranges[(size_t)graphic->charset]; 349 len = graphic->length; 350 } 351 if (psenc->inuse != candidate) { 352 init = INIT0(ei); 353 if (SET(psenc->inuse) == SET(candidate)) { 354 if (INIT(psenc->inuse) != init || 355 psenc->inuse == init || candidate == init) 356 init = NULL; 357 } else if (candidate == (init = INIT(candidate))) { 358 init = NULL; 359 } 360 if (init != NULL) { 361 if (n < 2) 362 return (E2BIG); 363 n -= 2; 364 psenc->ch[psenc->chlen++] = ESCAPE_CHAR; 365 psenc->ch[psenc->chlen++] = ESC(init); 366 } 367 if (n < 2) 368 return (E2BIG); 369 n -= 2; 370 psenc->ch[psenc->chlen++] = ESCAPE_CHAR; 371 psenc->ch[psenc->chlen++] = ESC(candidate); 372 psenc->inuse = candidate; 373 } 374 if (n < len) 375 return (E2BIG); 376 while (len-- > 0) { 377 ch = (wc >> (len * 8)) & 0xFF; 378 if (range->start > ch || range->end < ch) 379 goto ilseq; 380 psenc->ch[psenc->chlen++] = ch | bit; 381 } 382 memcpy(s, psenc->ch, psenc->chlen); 383 *nresult = psenc->chlen; 384 psenc->chlen = 0; 385 386 return (0); 387 388 ilseq: 389 *nresult = (size_t)-1; 390 return (EILSEQ); 391 } 392 393 static __inline int 394 _citrus_HZ_put_state_reset(_HZEncodingInfo * __restrict ei, 395 char * __restrict s, size_t n, _HZState * __restrict psenc, 396 size_t * __restrict nresult) 397 { 398 escape_t *candidate; 399 400 if (psenc->chlen != 0 || psenc->inuse == NULL) 401 return (EINVAL); 402 candidate = INIT0(ei); 403 if (psenc->inuse != candidate) { 404 if (n < 2) 405 return (E2BIG); 406 n -= 2; 407 psenc->ch[psenc->chlen++] = ESCAPE_CHAR; 408 psenc->ch[psenc->chlen++] = ESC(candidate); 409 } 410 if (n < 1) 411 return (E2BIG); 412 if (psenc->chlen > 0) 413 memcpy(s, psenc->ch, psenc->chlen); 414 *nresult = psenc->chlen; 415 _citrus_HZ_init_state(ei, psenc); 416 417 return (0); 418 } 419 420 static __inline int 421 _citrus_HZ_stdenc_get_state_desc_generic(_HZEncodingInfo * __restrict ei, 422 _HZState * __restrict psenc, int * __restrict rstate) 423 { 424 425 if (psenc->chlen < 0 || psenc->inuse == NULL) 426 return (EINVAL); 427 *rstate = (psenc->chlen == 0) 428 ? ((psenc->inuse == INIT0(ei)) 429 ? _STDENC_SDGEN_INITIAL 430 : _STDENC_SDGEN_STABLE) 431 : ((psenc->ch[0] == ESCAPE_CHAR) 432 ? _STDENC_SDGEN_INCOMPLETE_SHIFT 433 : _STDENC_SDGEN_INCOMPLETE_CHAR); 434 435 return (0); 436 } 437 438 static __inline int 439 /*ARGSUSED*/ 440 _citrus_HZ_stdenc_wctocs(_HZEncodingInfo * __restrict ei __unused, 441 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 442 { 443 int bit; 444 445 if (wc & 0x80) { 446 bit = 0x80; 447 wc &= ~0x80; 448 } else 449 bit = 0x0; 450 if ((uint32_t)wc <= 0x7F) { 451 *csid = (_csid_t)bit; 452 *idx = (_index_t)wc; 453 } else if ((uint32_t)wc <= 0x7F7F) { 454 *csid = (_csid_t)(bit | 0x8000); 455 *idx = (_index_t)wc; 456 } else { 457 *csid = (_index_t)(wc & ~0x00FFFF7F); 458 *idx = (_csid_t)(wc & 0x00FFFF7F); 459 } 460 461 return (0); 462 } 463 464 static __inline int 465 /*ARGSUSED*/ 466 _citrus_HZ_stdenc_cstowc(_HZEncodingInfo * __restrict ei __unused, 467 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 468 { 469 470 *wc = (wchar_t)idx; 471 switch (csid) { 472 case 0x80: 473 case 0x8080: 474 *wc |= (wchar_t)0x80; 475 /*FALLTHROUGH*/ 476 case 0x0: 477 case 0x8000: 478 break; 479 default: 480 *wc |= (wchar_t)csid; 481 } 482 483 return (0); 484 } 485 486 static void 487 _citrus_HZ_encoding_module_uninit(_HZEncodingInfo *ei) 488 { 489 escape_t *escape; 490 491 while ((escape = TAILQ_FIRST(E0SET(ei))) != NULL) { 492 TAILQ_REMOVE(E0SET(ei), escape, entry); 493 free(GL(escape)); 494 free(GR(escape)); 495 free(escape); 496 } 497 while ((escape = TAILQ_FIRST(E1SET(ei))) != NULL) { 498 TAILQ_REMOVE(E1SET(ei), escape, entry); 499 free(GL(escape)); 500 free(GR(escape)); 501 free(escape); 502 } 503 } 504 505 static int 506 _citrus_HZ_parse_char(void **context, const char *name __unused, const char *s) 507 { 508 escape_t *escape; 509 void **p; 510 511 p = (void **)*context; 512 escape = (escape_t *)p[0]; 513 if (escape->ch != '\0') 514 return (EINVAL); 515 escape->ch = *s++; 516 if (escape->ch == ESCAPE_CHAR || *s != '\0') 517 return (EINVAL); 518 519 return (0); 520 } 521 522 static int 523 _citrus_HZ_parse_graphic(void **context, const char *name, const char *s) 524 { 525 _HZEncodingInfo *ei; 526 escape_t *escape; 527 graphic_t *graphic; 528 void **p; 529 530 p = (void **)*context; 531 escape = (escape_t *)p[0]; 532 ei = (_HZEncodingInfo *)p[1]; 533 graphic = malloc(sizeof(*graphic)); 534 if (graphic == NULL) 535 return (ENOMEM); 536 memset(graphic, 0, sizeof(*graphic)); 537 if (strcmp("GL", name) == 0) { 538 if (GL(escape) != NULL) 539 goto release; 540 GL(escape) = graphic; 541 } else if (strcmp("GR", name) == 0) { 542 if (GR(escape) != NULL) 543 goto release; 544 GR(escape) = graphic; 545 } else { 546 release: 547 free(graphic); 548 return (EINVAL); 549 } 550 graphic->escape = escape; 551 if (_bcs_strncasecmp("ASCII", s, 5) == 0) { 552 if (s[5] != '\0') 553 return (EINVAL); 554 graphic->charset = ASCII; 555 graphic->length = 1; 556 ei->ascii = graphic; 557 return (0); 558 } else if (_bcs_strncasecmp("GB2312", s, 6) == 0) { 559 if (s[6] != '\0') 560 return (EINVAL); 561 graphic->charset = GB2312; 562 graphic->length = 2; 563 ei->gb2312 = graphic; 564 return (0); 565 } else if (strncmp("94*", s, 3) == 0) 566 graphic->charset = CS94; 567 else if (strncmp("96*", s, 3) == 0) 568 graphic->charset = CS96; 569 else 570 return (EINVAL); 571 s += 3; 572 switch(*s) { 573 case '1': case '2': case '3': 574 graphic->length = (size_t)(*s - '0'); 575 if (*++s == '\0') 576 break; 577 /*FALLTHROUGH*/ 578 default: 579 return (EINVAL); 580 } 581 return (0); 582 } 583 584 static const _citrus_prop_hint_t escape_hints[] = { 585 _CITRUS_PROP_HINT_STR("CH", &_citrus_HZ_parse_char), 586 _CITRUS_PROP_HINT_STR("GL", &_citrus_HZ_parse_graphic), 587 _CITRUS_PROP_HINT_STR("GR", &_citrus_HZ_parse_graphic), 588 _CITRUS_PROP_HINT_END 589 }; 590 591 static int 592 _citrus_HZ_parse_escape(void **context, const char *name, const char *s) 593 { 594 _HZEncodingInfo *ei; 595 escape_t *escape; 596 void *p[2]; 597 598 ei = (_HZEncodingInfo *)*context; 599 escape = malloc(sizeof(*escape)); 600 if (escape == NULL) 601 return (EINVAL); 602 memset(escape, 0, sizeof(*escape)); 603 if (strcmp("0", name) == 0) { 604 escape->set = E0SET(ei); 605 TAILQ_INSERT_TAIL(E0SET(ei), escape, entry); 606 } else if (strcmp("1", name) == 0) { 607 escape->set = E1SET(ei); 608 TAILQ_INSERT_TAIL(E1SET(ei), escape, entry); 609 } else { 610 free(escape); 611 return (EINVAL); 612 } 613 p[0] = (void *)escape; 614 p[1] = (void *)ei; 615 return (_citrus_prop_parse_variable( 616 escape_hints, (void *)&p[0], s, strlen(s))); 617 } 618 619 static const _citrus_prop_hint_t root_hints[] = { 620 _CITRUS_PROP_HINT_STR("0", &_citrus_HZ_parse_escape), 621 _CITRUS_PROP_HINT_STR("1", &_citrus_HZ_parse_escape), 622 _CITRUS_PROP_HINT_END 623 }; 624 625 static int 626 _citrus_HZ_encoding_module_init(_HZEncodingInfo * __restrict ei, 627 const void * __restrict var, size_t lenvar) 628 { 629 int errnum; 630 631 memset(ei, 0, sizeof(*ei)); 632 TAILQ_INIT(E0SET(ei)); 633 TAILQ_INIT(E1SET(ei)); 634 errnum = _citrus_prop_parse_variable( 635 root_hints, (void *)ei, var, lenvar); 636 if (errnum != 0) 637 _citrus_HZ_encoding_module_uninit(ei); 638 return (errnum); 639 } 640 641 /* ---------------------------------------------------------------------- 642 * public interface for stdenc 643 */ 644 645 _CITRUS_STDENC_DECLS(HZ); 646 _CITRUS_STDENC_DEF_OPS(HZ); 647 648 #include "citrus_stdenc_template.h" 649