1 /* $NetBSD: citrus_hz.c,v 1.2 2008/06/14 16:01:07 tnozaki Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c)2004, 2006 Citrus Project, 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 #include <sys/cdefs.h> 33 #include <sys/queue.h> 34 #include <sys/types.h> 35 36 #include <assert.h> 37 #include <errno.h> 38 #include <limits.h> 39 #include <stddef.h> 40 #include <stdint.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <wchar.h> 44 45 #include "citrus_namespace.h" 46 #include "citrus_types.h" 47 #include "citrus_bcs.h" 48 #include "citrus_module.h" 49 #include "citrus_stdenc.h" 50 51 #include "citrus_hz.h" 52 #include "citrus_prop.h" 53 54 /* 55 * wchar_t mapping: 56 * 57 * CTRL/ASCII 00000000 00000000 00000000 gxxxxxxx 58 * GB2312 00000000 00000000 0xxxxxxx gxxxxxxx 59 * 94/96*n (~M) 0mmmmmmm 0xxxxxxx 0xxxxxxx gxxxxxxx 60 */ 61 62 #define ESCAPE_CHAR '~' 63 64 typedef enum { 65 CTRL = 0, ASCII = 1, GB2312 = 2, CS94 = 3, CS96 = 4 66 } charset_t; 67 68 typedef struct { 69 int start; 70 int end; 71 int width; 72 } range_t; 73 74 static const range_t ranges[] = { 75 #define RANGE(start, end) { start, end, (end - start) + 1 } 76 /* CTRL */ RANGE(0x00, 0x1F), 77 /* ASCII */ RANGE(0x20, 0x7F), 78 /* GB2312 */ RANGE(0x21, 0x7E), 79 /* CS94 */ RANGE(0x21, 0x7E), 80 /* CS96 */ RANGE(0x20, 0x7F), 81 #undef RANGE 82 }; 83 84 typedef struct escape_t escape_t; 85 typedef struct { 86 charset_t charset; 87 escape_t *escape; 88 ssize_t length; 89 #define ROWCOL_MAX 3 90 } graphic_t; 91 92 typedef TAILQ_HEAD(escape_list, escape_t) escape_list; 93 struct escape_t { 94 TAILQ_ENTRY(escape_t) entry; 95 escape_list *set; 96 graphic_t *left; 97 graphic_t *right; 98 int ch; 99 }; 100 101 #define GL(escape) ((escape)->left) 102 #define GR(escape) ((escape)->right) 103 #define SET(escape) ((escape)->set) 104 #define ESC(escape) ((escape)->ch) 105 #define INIT(escape) (TAILQ_FIRST(SET(escape))) 106 107 static __inline escape_t * 108 find_escape(escape_list *set, int ch) 109 { 110 escape_t *escape; 111 112 TAILQ_FOREACH(escape, set, entry) { 113 if (ESC(escape) == ch) 114 break; 115 } 116 117 return (escape); 118 } 119 120 typedef struct { 121 escape_list e0; 122 escape_list e1; 123 graphic_t *ascii; 124 graphic_t *gb2312; 125 } _HZEncodingInfo; 126 127 #define E0SET(ei) (&(ei)->e0) 128 #define E1SET(ei) (&(ei)->e1) 129 #define INIT0(ei) (TAILQ_FIRST(E0SET(ei))) 130 #define INIT1(ei) (TAILQ_FIRST(E1SET(ei))) 131 132 typedef struct { 133 escape_t *inuse; 134 int chlen; 135 char ch[ROWCOL_MAX]; 136 } _HZState; 137 138 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 139 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 140 141 #define _FUNCNAME(m) _citrus_HZ_##m 142 #define _ENCODING_INFO _HZEncodingInfo 143 #define _ENCODING_STATE _HZState 144 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 145 #define _ENCODING_IS_STATE_DEPENDENT 1 146 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->inuse == NULL) 147 148 static __inline void 149 _citrus_HZ_init_state(_HZEncodingInfo * __restrict ei, 150 _HZState * __restrict psenc) 151 { 152 153 psenc->chlen = 0; 154 psenc->inuse = INIT0(ei); 155 } 156 157 #if 0 158 static __inline void 159 /*ARGSUSED*/ 160 _citrus_HZ_pack_state(_HZEncodingInfo * __restrict ei __unused, 161 void *__restrict pspriv, const _HZState * __restrict psenc) 162 { 163 164 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 165 } 166 167 static __inline void 168 /*ARGSUSED*/ 169 _citrus_HZ_unpack_state(_HZEncodingInfo * __restrict ei __unused, 170 _HZState * __restrict psenc, const void * __restrict pspriv) 171 { 172 173 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 174 } 175 #endif 176 177 static int 178 _citrus_HZ_mbrtowc_priv(_HZEncodingInfo * __restrict ei, 179 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 180 _HZState * __restrict psenc, size_t * __restrict nresult) 181 { 182 escape_t *candidate, *init; 183 graphic_t *graphic; 184 const range_t *range; 185 char *s0; 186 wchar_t wc; 187 int bit, ch, head, len, tail; 188 189 if (*s == NULL) { 190 _citrus_HZ_init_state(ei, psenc); 191 *nresult = 1; 192 return (0); 193 } 194 s0 = *s; 195 if (psenc->chlen < 0 || psenc->inuse == NULL) 196 return (EINVAL); 197 198 wc = (wchar_t)0; 199 bit = head = tail = 0; 200 graphic = NULL; 201 for (len = 0; len <= MB_LEN_MAX;) { 202 if (psenc->chlen == tail) { 203 if (n-- < 1) { 204 *s = s0; 205 *nresult = (size_t)-2; 206 return (0); 207 } 208 psenc->ch[psenc->chlen++] = *s0++; 209 ++len; 210 } 211 ch = (unsigned char)psenc->ch[tail++]; 212 if (tail == 1) { 213 if ((ch & ~0x80) <= 0x1F) { 214 if (psenc->inuse != INIT0(ei)) 215 break; 216 wc = (wchar_t)ch; 217 goto done; 218 } 219 if (ch & 0x80) { 220 graphic = GR(psenc->inuse); 221 bit = 0x80; 222 ch &= ~0x80; 223 } else { 224 graphic = GL(psenc->inuse); 225 if (ch == ESCAPE_CHAR) 226 continue; 227 bit = 0x0; 228 } 229 if (graphic == NULL) 230 break; 231 } else if (tail == 2 && psenc->ch[0] == ESCAPE_CHAR) { 232 if (tail < psenc->chlen) 233 return (EINVAL); 234 if (ch == ESCAPE_CHAR) { 235 ++head; 236 } else if (ch == '\n') { 237 if (psenc->inuse != INIT0(ei)) 238 break; 239 tail = psenc->chlen = 0; 240 continue; 241 } else { 242 candidate = NULL; 243 init = INIT0(ei); 244 if (psenc->inuse == init) { 245 init = INIT1(ei); 246 } else if (INIT(psenc->inuse) == init) { 247 if (ESC(init) != ch) 248 break; 249 candidate = init; 250 } 251 if (candidate == NULL) { 252 candidate = find_escape( 253 SET(psenc->inuse), ch); 254 if (candidate == NULL) { 255 if (init == NULL || 256 ESC(init) != ch) 257 break; 258 candidate = init; 259 } 260 } 261 psenc->inuse = candidate; 262 tail = psenc->chlen = 0; 263 continue; 264 } 265 } else if (ch & 0x80) { 266 if (graphic != GR(psenc->inuse)) 267 break; 268 ch &= ~0x80; 269 } else { 270 if (graphic != GL(psenc->inuse)) 271 break; 272 } 273 range = &ranges[(size_t)graphic->charset]; 274 if (range->start > ch || range->end < ch) 275 break; 276 wc <<= 8; 277 wc |= ch; 278 if (graphic->length == (tail - head)) { 279 if (graphic->charset > GB2312) 280 bit |= ESC(psenc->inuse) << 24; 281 wc |= bit; 282 goto done; 283 } 284 } 285 *nresult = (size_t)-1; 286 return (EILSEQ); 287 done: 288 if (tail < psenc->chlen) 289 return (EINVAL); 290 *s = s0; 291 if (pwc != NULL) 292 *pwc = wc; 293 psenc->chlen = 0; 294 *nresult = (wc == 0) ? 0 : len; 295 296 return (0); 297 } 298 299 static int 300 _citrus_HZ_wcrtomb_priv(_HZEncodingInfo * __restrict ei, 301 char * __restrict s, size_t n, wchar_t wc, 302 _HZState * __restrict psenc, size_t * __restrict nresult) 303 { 304 escape_t *candidate, *init; 305 graphic_t *graphic; 306 const range_t *range; 307 size_t len; 308 int bit, ch; 309 310 if (psenc->chlen != 0 || psenc->inuse == NULL) 311 return (EINVAL); 312 if (wc & 0x80) { 313 bit = 0x80; 314 wc &= ~0x80; 315 } else { 316 bit = 0x0; 317 } 318 if ((uint32_t)wc <= 0x1F) { 319 candidate = INIT0(ei); 320 graphic = (bit == 0) ? candidate->left : candidate->right; 321 if (graphic == NULL) 322 goto ilseq; 323 range = &ranges[(size_t)CTRL]; 324 len = 1; 325 } else if ((uint32_t)wc <= 0x7F) { 326 graphic = ei->ascii; 327 if (graphic == NULL) 328 goto ilseq; 329 candidate = graphic->escape; 330 range = &ranges[(size_t)graphic->charset]; 331 len = graphic->length; 332 } else if ((uint32_t)wc <= 0x7F7F) { 333 graphic = ei->gb2312; 334 if (graphic == NULL) 335 goto ilseq; 336 candidate = graphic->escape; 337 range = &ranges[(size_t)graphic->charset]; 338 len = graphic->length; 339 } else { 340 ch = (wc >> 24) & 0xFF; 341 candidate = find_escape(E0SET(ei), ch); 342 if (candidate == NULL) { 343 candidate = find_escape(E1SET(ei), ch); 344 if (candidate == NULL) 345 goto ilseq; 346 } 347 wc &= ~0xFF000000; 348 graphic = (bit == 0) ? candidate->left : candidate->right; 349 if (graphic == NULL) 350 goto ilseq; 351 range = &ranges[(size_t)graphic->charset]; 352 len = graphic->length; 353 } 354 if (psenc->inuse != candidate) { 355 init = INIT0(ei); 356 if (SET(psenc->inuse) == SET(candidate)) { 357 if (INIT(psenc->inuse) != init || 358 psenc->inuse == init || candidate == init) 359 init = NULL; 360 } else if (candidate == (init = INIT(candidate))) { 361 init = NULL; 362 } 363 if (init != NULL) { 364 if (n < 2) 365 return (E2BIG); 366 n -= 2; 367 psenc->ch[psenc->chlen++] = ESCAPE_CHAR; 368 psenc->ch[psenc->chlen++] = ESC(init); 369 } 370 if (n < 2) 371 return (E2BIG); 372 n -= 2; 373 psenc->ch[psenc->chlen++] = ESCAPE_CHAR; 374 psenc->ch[psenc->chlen++] = ESC(candidate); 375 psenc->inuse = candidate; 376 } 377 if (n < len) 378 return (E2BIG); 379 while (len-- > 0) { 380 ch = (wc >> (len * 8)) & 0xFF; 381 if (range->start > ch || range->end < ch) 382 goto ilseq; 383 psenc->ch[psenc->chlen++] = ch | bit; 384 } 385 memcpy(s, psenc->ch, psenc->chlen); 386 *nresult = psenc->chlen; 387 psenc->chlen = 0; 388 389 return (0); 390 391 ilseq: 392 *nresult = (size_t)-1; 393 return (EILSEQ); 394 } 395 396 static __inline int 397 _citrus_HZ_put_state_reset(_HZEncodingInfo * __restrict ei, 398 char * __restrict s, size_t n, _HZState * __restrict psenc, 399 size_t * __restrict nresult) 400 { 401 escape_t *candidate; 402 403 if (psenc->chlen != 0 || psenc->inuse == NULL) 404 return (EINVAL); 405 candidate = INIT0(ei); 406 if (psenc->inuse != candidate) { 407 if (n < 2) 408 return (E2BIG); 409 n -= 2; 410 psenc->ch[psenc->chlen++] = ESCAPE_CHAR; 411 psenc->ch[psenc->chlen++] = ESC(candidate); 412 } 413 if (n < 1) 414 return (E2BIG); 415 if (psenc->chlen > 0) 416 memcpy(s, psenc->ch, psenc->chlen); 417 *nresult = psenc->chlen; 418 _citrus_HZ_init_state(ei, psenc); 419 420 return (0); 421 } 422 423 static __inline int 424 _citrus_HZ_stdenc_get_state_desc_generic(_HZEncodingInfo * __restrict ei, 425 _HZState * __restrict psenc, int * __restrict rstate) 426 { 427 428 if (psenc->chlen < 0 || psenc->inuse == NULL) 429 return (EINVAL); 430 *rstate = (psenc->chlen == 0) 431 ? ((psenc->inuse == INIT0(ei)) 432 ? _STDENC_SDGEN_INITIAL 433 : _STDENC_SDGEN_STABLE) 434 : ((psenc->ch[0] == ESCAPE_CHAR) 435 ? _STDENC_SDGEN_INCOMPLETE_SHIFT 436 : _STDENC_SDGEN_INCOMPLETE_CHAR); 437 438 return (0); 439 } 440 441 static __inline int 442 /*ARGSUSED*/ 443 _citrus_HZ_stdenc_wctocs(_HZEncodingInfo * __restrict ei __unused, 444 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 445 { 446 int bit; 447 448 if (wc & 0x80) { 449 bit = 0x80; 450 wc &= ~0x80; 451 } else 452 bit = 0x0; 453 if ((uint32_t)wc <= 0x7F) { 454 *csid = (_csid_t)bit; 455 *idx = (_index_t)wc; 456 } else if ((uint32_t)wc <= 0x7F7F) { 457 *csid = (_csid_t)(bit | 0x8000); 458 *idx = (_index_t)wc; 459 } else { 460 *csid = (_index_t)(wc & ~0x00FFFF7F); 461 *idx = (_csid_t)(wc & 0x00FFFF7F); 462 } 463 464 return (0); 465 } 466 467 static __inline int 468 /*ARGSUSED*/ 469 _citrus_HZ_stdenc_cstowc(_HZEncodingInfo * __restrict ei __unused, 470 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 471 { 472 473 *wc = (wchar_t)idx; 474 switch (csid) { 475 case 0x80: 476 case 0x8080: 477 *wc |= (wchar_t)0x80; 478 /*FALLTHROUGH*/ 479 case 0x0: 480 case 0x8000: 481 break; 482 default: 483 *wc |= (wchar_t)csid; 484 } 485 486 return (0); 487 } 488 489 static void 490 _citrus_HZ_encoding_module_uninit(_HZEncodingInfo *ei) 491 { 492 escape_t *escape; 493 494 while ((escape = TAILQ_FIRST(E0SET(ei))) != NULL) { 495 TAILQ_REMOVE(E0SET(ei), escape, entry); 496 free(GL(escape)); 497 free(GR(escape)); 498 free(escape); 499 } 500 while ((escape = TAILQ_FIRST(E1SET(ei))) != NULL) { 501 TAILQ_REMOVE(E1SET(ei), escape, entry); 502 free(GL(escape)); 503 free(GR(escape)); 504 free(escape); 505 } 506 } 507 508 static int 509 _citrus_HZ_parse_char(void *context, const char *name __unused, const char *s) 510 { 511 escape_t *escape; 512 void **p; 513 514 p = (void **)context; 515 escape = (escape_t *)p[0]; 516 if (escape->ch != '\0') 517 return (EINVAL); 518 escape->ch = *s++; 519 if (escape->ch == ESCAPE_CHAR || *s != '\0') 520 return (EINVAL); 521 522 return (0); 523 } 524 525 static int 526 _citrus_HZ_parse_graphic(void *context, const char *name, const char *s) 527 { 528 _HZEncodingInfo *ei; 529 escape_t *escape; 530 graphic_t *graphic; 531 void **p; 532 533 p = (void **)context; 534 escape = (escape_t *)p[0]; 535 ei = (_HZEncodingInfo *)p[1]; 536 graphic = calloc(1, sizeof(*graphic)); 537 if (graphic == NULL) 538 return (ENOMEM); 539 if (strcmp("GL", name) == 0) { 540 if (GL(escape) != NULL) 541 goto release; 542 GL(escape) = graphic; 543 } else if (strcmp("GR", name) == 0) { 544 if (GR(escape) != NULL) 545 goto release; 546 GR(escape) = graphic; 547 } else { 548 release: 549 free(graphic); 550 return (EINVAL); 551 } 552 graphic->escape = escape; 553 if (_bcs_strncasecmp("ASCII", s, 5) == 0) { 554 if (s[5] != '\0') 555 return (EINVAL); 556 graphic->charset = ASCII; 557 graphic->length = 1; 558 ei->ascii = graphic; 559 return (0); 560 } else if (_bcs_strncasecmp("GB2312", s, 6) == 0) { 561 if (s[6] != '\0') 562 return (EINVAL); 563 graphic->charset = GB2312; 564 graphic->length = 2; 565 ei->gb2312 = graphic; 566 return (0); 567 } else if (strncmp("94*", s, 3) == 0) 568 graphic->charset = CS94; 569 else if (strncmp("96*", s, 3) == 0) 570 graphic->charset = CS96; 571 else 572 return (EINVAL); 573 s += 3; 574 switch(*s) { 575 case '1': case '2': case '3': 576 graphic->length = (size_t)(*s - '0'); 577 if (*++s == '\0') 578 break; 579 /*FALLTHROUGH*/ 580 default: 581 return (EINVAL); 582 } 583 return (0); 584 } 585 586 static const _citrus_prop_hint_t escape_hints[] = { 587 _CITRUS_PROP_HINT_STR("CH", &_citrus_HZ_parse_char), 588 _CITRUS_PROP_HINT_STR("GL", &_citrus_HZ_parse_graphic), 589 _CITRUS_PROP_HINT_STR("GR", &_citrus_HZ_parse_graphic), 590 _CITRUS_PROP_HINT_END 591 }; 592 593 static int 594 _citrus_HZ_parse_escape(void *context, const char *name, const char *s) 595 { 596 _HZEncodingInfo *ei; 597 escape_t *escape; 598 void *p[2]; 599 600 ei = (_HZEncodingInfo *)context; 601 escape = calloc(1, sizeof(*escape)); 602 if (escape == NULL) 603 return (EINVAL); 604 if (strcmp("0", name) == 0) { 605 escape->set = E0SET(ei); 606 TAILQ_INSERT_TAIL(E0SET(ei), escape, entry); 607 } else if (strcmp("1", name) == 0) { 608 escape->set = E1SET(ei); 609 TAILQ_INSERT_TAIL(E1SET(ei), escape, entry); 610 } else { 611 free(escape); 612 return (EINVAL); 613 } 614 p[0] = (void *)escape; 615 p[1] = (void *)ei; 616 return (_citrus_prop_parse_variable( 617 escape_hints, (void *)&p[0], s, strlen(s))); 618 } 619 620 static const _citrus_prop_hint_t root_hints[] = { 621 _CITRUS_PROP_HINT_STR("0", &_citrus_HZ_parse_escape), 622 _CITRUS_PROP_HINT_STR("1", &_citrus_HZ_parse_escape), 623 _CITRUS_PROP_HINT_END 624 }; 625 626 static int 627 _citrus_HZ_encoding_module_init(_HZEncodingInfo * __restrict ei, 628 const void * __restrict var, size_t lenvar) 629 { 630 int errnum; 631 632 memset(ei, 0, sizeof(*ei)); 633 TAILQ_INIT(E0SET(ei)); 634 TAILQ_INIT(E1SET(ei)); 635 errnum = _citrus_prop_parse_variable( 636 root_hints, (void *)ei, var, lenvar); 637 if (errnum != 0) 638 _citrus_HZ_encoding_module_uninit(ei); 639 return (errnum); 640 } 641 642 /* ---------------------------------------------------------------------- 643 * public interface for stdenc 644 */ 645 646 _CITRUS_STDENC_DECLS(HZ); 647 _CITRUS_STDENC_DEF_OPS(HZ); 648 649 #include "citrus_stdenc_template.h" 650