1 /* $FreeBSD$ */ 2 /* $NetBSD: citrus_hz.c,v 1.2 2008/06/14 16:01:07 tnozaki Exp $ */ 3 4 /*- 5 * SPDX-License-Identifier: BSD-2-Clause 6 * 7 * Copyright (c)2004, 2006 Citrus Project, 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 */ 32 33 #include <sys/cdefs.h> 34 #include <sys/queue.h> 35 #include <sys/types.h> 36 37 #include <assert.h> 38 #include <errno.h> 39 #include <limits.h> 40 #include <stddef.h> 41 #include <stdint.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <wchar.h> 45 46 #include "citrus_namespace.h" 47 #include "citrus_types.h" 48 #include "citrus_bcs.h" 49 #include "citrus_module.h" 50 #include "citrus_stdenc.h" 51 52 #include "citrus_hz.h" 53 #include "citrus_prop.h" 54 55 /* 56 * wchar_t mapping: 57 * 58 * CTRL/ASCII 00000000 00000000 00000000 gxxxxxxx 59 * GB2312 00000000 00000000 0xxxxxxx gxxxxxxx 60 * 94/96*n (~M) 0mmmmmmm 0xxxxxxx 0xxxxxxx gxxxxxxx 61 */ 62 63 #define ESCAPE_CHAR '~' 64 65 typedef enum { 66 CTRL = 0, ASCII = 1, GB2312 = 2, CS94 = 3, CS96 = 4 67 } charset_t; 68 69 typedef struct { 70 int start; 71 int end; 72 int width; 73 } range_t; 74 75 static const range_t ranges[] = { 76 #define RANGE(start, end) { start, end, (end - start) + 1 } 77 /* CTRL */ RANGE(0x00, 0x1F), 78 /* ASCII */ RANGE(0x20, 0x7F), 79 /* GB2312 */ RANGE(0x21, 0x7E), 80 /* CS94 */ RANGE(0x21, 0x7E), 81 /* CS96 */ RANGE(0x20, 0x7F), 82 #undef RANGE 83 }; 84 85 typedef struct escape_t escape_t; 86 typedef struct { 87 charset_t charset; 88 escape_t *escape; 89 ssize_t length; 90 #define ROWCOL_MAX 3 91 } graphic_t; 92 93 typedef TAILQ_HEAD(escape_list, escape_t) escape_list; 94 struct escape_t { 95 TAILQ_ENTRY(escape_t) entry; 96 escape_list *set; 97 graphic_t *left; 98 graphic_t *right; 99 int ch; 100 }; 101 102 #define GL(escape) ((escape)->left) 103 #define GR(escape) ((escape)->right) 104 #define SET(escape) ((escape)->set) 105 #define ESC(escape) ((escape)->ch) 106 #define INIT(escape) (TAILQ_FIRST(SET(escape))) 107 108 static __inline escape_t * 109 find_escape(escape_list *set, int ch) 110 { 111 escape_t *escape; 112 113 TAILQ_FOREACH(escape, set, entry) { 114 if (ESC(escape) == ch) 115 break; 116 } 117 118 return (escape); 119 } 120 121 typedef struct { 122 escape_list e0; 123 escape_list e1; 124 graphic_t *ascii; 125 graphic_t *gb2312; 126 } _HZEncodingInfo; 127 128 #define E0SET(ei) (&(ei)->e0) 129 #define E1SET(ei) (&(ei)->e1) 130 #define INIT0(ei) (TAILQ_FIRST(E0SET(ei))) 131 #define INIT1(ei) (TAILQ_FIRST(E1SET(ei))) 132 133 typedef struct { 134 escape_t *inuse; 135 int chlen; 136 char ch[ROWCOL_MAX]; 137 } _HZState; 138 139 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 140 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 141 142 #define _FUNCNAME(m) _citrus_HZ_##m 143 #define _ENCODING_INFO _HZEncodingInfo 144 #define _ENCODING_STATE _HZState 145 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 146 #define _ENCODING_IS_STATE_DEPENDENT 1 147 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->inuse == NULL) 148 149 static __inline void 150 _citrus_HZ_init_state(_HZEncodingInfo * __restrict ei, 151 _HZState * __restrict psenc) 152 { 153 154 psenc->chlen = 0; 155 psenc->inuse = INIT0(ei); 156 } 157 158 #if 0 159 static __inline void 160 /*ARGSUSED*/ 161 _citrus_HZ_pack_state(_HZEncodingInfo * __restrict ei __unused, 162 void *__restrict pspriv, const _HZState * __restrict psenc) 163 { 164 165 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 166 } 167 168 static __inline void 169 /*ARGSUSED*/ 170 _citrus_HZ_unpack_state(_HZEncodingInfo * __restrict ei __unused, 171 _HZState * __restrict psenc, const void * __restrict pspriv) 172 { 173 174 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 175 } 176 #endif 177 178 static int 179 _citrus_HZ_mbrtowc_priv(_HZEncodingInfo * __restrict ei, 180 wchar_t * __restrict pwc, char ** __restrict s, size_t n, 181 _HZState * __restrict psenc, size_t * __restrict nresult) 182 { 183 escape_t *candidate, *init; 184 graphic_t *graphic; 185 const range_t *range; 186 char *s0; 187 wchar_t wc; 188 int bit, ch, head, len, tail; 189 190 if (*s == NULL) { 191 _citrus_HZ_init_state(ei, psenc); 192 *nresult = 1; 193 return (0); 194 } 195 s0 = *s; 196 if (psenc->chlen < 0 || psenc->inuse == NULL) 197 return (EINVAL); 198 199 wc = (wchar_t)0; 200 bit = head = tail = 0; 201 graphic = NULL; 202 for (len = 0; len <= MB_LEN_MAX;) { 203 if (psenc->chlen == tail) { 204 if (n-- < 1) { 205 *s = s0; 206 *nresult = (size_t)-2; 207 return (0); 208 } 209 psenc->ch[psenc->chlen++] = *s0++; 210 ++len; 211 } 212 ch = (unsigned char)psenc->ch[tail++]; 213 if (tail == 1) { 214 if ((ch & ~0x80) <= 0x1F) { 215 if (psenc->inuse != INIT0(ei)) 216 break; 217 wc = (wchar_t)ch; 218 goto done; 219 } 220 if (ch & 0x80) { 221 graphic = GR(psenc->inuse); 222 bit = 0x80; 223 ch &= ~0x80; 224 } else { 225 graphic = GL(psenc->inuse); 226 if (ch == ESCAPE_CHAR) 227 continue; 228 bit = 0x0; 229 } 230 if (graphic == NULL) 231 break; 232 } else if (tail == 2 && psenc->ch[0] == ESCAPE_CHAR) { 233 if (tail < psenc->chlen) 234 return (EINVAL); 235 if (ch == ESCAPE_CHAR) { 236 ++head; 237 } else if (ch == '\n') { 238 if (psenc->inuse != INIT0(ei)) 239 break; 240 tail = psenc->chlen = 0; 241 continue; 242 } else { 243 candidate = NULL; 244 init = INIT0(ei); 245 if (psenc->inuse == init) { 246 init = INIT1(ei); 247 } else if (INIT(psenc->inuse) == init) { 248 if (ESC(init) != ch) 249 break; 250 candidate = init; 251 } 252 if (candidate == NULL) { 253 candidate = find_escape( 254 SET(psenc->inuse), ch); 255 if (candidate == NULL) { 256 if (init == NULL || 257 ESC(init) != ch) 258 break; 259 candidate = init; 260 } 261 } 262 psenc->inuse = candidate; 263 tail = psenc->chlen = 0; 264 continue; 265 } 266 } else if (ch & 0x80) { 267 if (graphic != GR(psenc->inuse)) 268 break; 269 ch &= ~0x80; 270 } else { 271 if (graphic != GL(psenc->inuse)) 272 break; 273 } 274 range = &ranges[(size_t)graphic->charset]; 275 if (range->start > ch || range->end < ch) 276 break; 277 wc <<= 8; 278 wc |= ch; 279 if (graphic->length == (tail - head)) { 280 if (graphic->charset > GB2312) 281 bit |= ESC(psenc->inuse) << 24; 282 wc |= bit; 283 goto done; 284 } 285 } 286 *nresult = (size_t)-1; 287 return (EILSEQ); 288 done: 289 if (tail < psenc->chlen) 290 return (EINVAL); 291 *s = s0; 292 if (pwc != NULL) 293 *pwc = wc; 294 psenc->chlen = 0; 295 *nresult = (wc == 0) ? 0 : len; 296 297 return (0); 298 } 299 300 static int 301 _citrus_HZ_wcrtomb_priv(_HZEncodingInfo * __restrict ei, 302 char * __restrict s, size_t n, wchar_t wc, 303 _HZState * __restrict psenc, size_t * __restrict nresult) 304 { 305 escape_t *candidate, *init; 306 graphic_t *graphic; 307 const range_t *range; 308 size_t len; 309 int bit, ch; 310 311 if (psenc->chlen != 0 || psenc->inuse == NULL) 312 return (EINVAL); 313 if (wc & 0x80) { 314 bit = 0x80; 315 wc &= ~0x80; 316 } else { 317 bit = 0x0; 318 } 319 if ((uint32_t)wc <= 0x1F) { 320 candidate = INIT0(ei); 321 graphic = (bit == 0) ? candidate->left : candidate->right; 322 if (graphic == NULL) 323 goto ilseq; 324 range = &ranges[(size_t)CTRL]; 325 len = 1; 326 } else if ((uint32_t)wc <= 0x7F) { 327 graphic = ei->ascii; 328 if (graphic == NULL) 329 goto ilseq; 330 candidate = graphic->escape; 331 range = &ranges[(size_t)graphic->charset]; 332 len = graphic->length; 333 } else if ((uint32_t)wc <= 0x7F7F) { 334 graphic = ei->gb2312; 335 if (graphic == NULL) 336 goto ilseq; 337 candidate = graphic->escape; 338 range = &ranges[(size_t)graphic->charset]; 339 len = graphic->length; 340 } else { 341 ch = (wc >> 24) & 0xFF; 342 candidate = find_escape(E0SET(ei), ch); 343 if (candidate == NULL) { 344 candidate = find_escape(E1SET(ei), ch); 345 if (candidate == NULL) 346 goto ilseq; 347 } 348 wc &= ~0xFF000000; 349 graphic = (bit == 0) ? candidate->left : candidate->right; 350 if (graphic == NULL) 351 goto ilseq; 352 range = &ranges[(size_t)graphic->charset]; 353 len = graphic->length; 354 } 355 if (psenc->inuse != candidate) { 356 init = INIT0(ei); 357 if (SET(psenc->inuse) == SET(candidate)) { 358 if (INIT(psenc->inuse) != init || 359 psenc->inuse == init || candidate == init) 360 init = NULL; 361 } else if (candidate == (init = INIT(candidate))) { 362 init = NULL; 363 } 364 if (init != NULL) { 365 if (n < 2) 366 return (E2BIG); 367 n -= 2; 368 psenc->ch[psenc->chlen++] = ESCAPE_CHAR; 369 psenc->ch[psenc->chlen++] = ESC(init); 370 } 371 if (n < 2) 372 return (E2BIG); 373 n -= 2; 374 psenc->ch[psenc->chlen++] = ESCAPE_CHAR; 375 psenc->ch[psenc->chlen++] = ESC(candidate); 376 psenc->inuse = candidate; 377 } 378 if (n < len) 379 return (E2BIG); 380 while (len-- > 0) { 381 ch = (wc >> (len * 8)) & 0xFF; 382 if (range->start > ch || range->end < ch) 383 goto ilseq; 384 psenc->ch[psenc->chlen++] = ch | bit; 385 } 386 memcpy(s, psenc->ch, psenc->chlen); 387 *nresult = psenc->chlen; 388 psenc->chlen = 0; 389 390 return (0); 391 392 ilseq: 393 *nresult = (size_t)-1; 394 return (EILSEQ); 395 } 396 397 static __inline int 398 _citrus_HZ_put_state_reset(_HZEncodingInfo * __restrict ei, 399 char * __restrict s, size_t n, _HZState * __restrict psenc, 400 size_t * __restrict nresult) 401 { 402 escape_t *candidate; 403 404 if (psenc->chlen != 0 || psenc->inuse == NULL) 405 return (EINVAL); 406 candidate = INIT0(ei); 407 if (psenc->inuse != candidate) { 408 if (n < 2) 409 return (E2BIG); 410 n -= 2; 411 psenc->ch[psenc->chlen++] = ESCAPE_CHAR; 412 psenc->ch[psenc->chlen++] = ESC(candidate); 413 } 414 if (n < 1) 415 return (E2BIG); 416 if (psenc->chlen > 0) 417 memcpy(s, psenc->ch, psenc->chlen); 418 *nresult = psenc->chlen; 419 _citrus_HZ_init_state(ei, psenc); 420 421 return (0); 422 } 423 424 static __inline int 425 _citrus_HZ_stdenc_get_state_desc_generic(_HZEncodingInfo * __restrict ei, 426 _HZState * __restrict psenc, int * __restrict rstate) 427 { 428 429 if (psenc->chlen < 0 || psenc->inuse == NULL) 430 return (EINVAL); 431 *rstate = (psenc->chlen == 0) 432 ? ((psenc->inuse == INIT0(ei)) 433 ? _STDENC_SDGEN_INITIAL 434 : _STDENC_SDGEN_STABLE) 435 : ((psenc->ch[0] == ESCAPE_CHAR) 436 ? _STDENC_SDGEN_INCOMPLETE_SHIFT 437 : _STDENC_SDGEN_INCOMPLETE_CHAR); 438 439 return (0); 440 } 441 442 static __inline int 443 /*ARGSUSED*/ 444 _citrus_HZ_stdenc_wctocs(_HZEncodingInfo * __restrict ei __unused, 445 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 446 { 447 int bit; 448 449 if (wc & 0x80) { 450 bit = 0x80; 451 wc &= ~0x80; 452 } else 453 bit = 0x0; 454 if ((uint32_t)wc <= 0x7F) { 455 *csid = (_csid_t)bit; 456 *idx = (_index_t)wc; 457 } else if ((uint32_t)wc <= 0x7F7F) { 458 *csid = (_csid_t)(bit | 0x8000); 459 *idx = (_index_t)wc; 460 } else { 461 *csid = (_index_t)(wc & ~0x00FFFF7F); 462 *idx = (_csid_t)(wc & 0x00FFFF7F); 463 } 464 465 return (0); 466 } 467 468 static __inline int 469 /*ARGSUSED*/ 470 _citrus_HZ_stdenc_cstowc(_HZEncodingInfo * __restrict ei __unused, 471 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 472 { 473 474 *wc = (wchar_t)idx; 475 switch (csid) { 476 case 0x80: 477 case 0x8080: 478 *wc |= (wchar_t)0x80; 479 /*FALLTHROUGH*/ 480 case 0x0: 481 case 0x8000: 482 break; 483 default: 484 *wc |= (wchar_t)csid; 485 } 486 487 return (0); 488 } 489 490 static void 491 _citrus_HZ_encoding_module_uninit(_HZEncodingInfo *ei) 492 { 493 escape_t *escape; 494 495 while ((escape = TAILQ_FIRST(E0SET(ei))) != NULL) { 496 TAILQ_REMOVE(E0SET(ei), escape, entry); 497 free(GL(escape)); 498 free(GR(escape)); 499 free(escape); 500 } 501 while ((escape = TAILQ_FIRST(E1SET(ei))) != NULL) { 502 TAILQ_REMOVE(E1SET(ei), escape, entry); 503 free(GL(escape)); 504 free(GR(escape)); 505 free(escape); 506 } 507 } 508 509 static int 510 _citrus_HZ_parse_char(void *context, const char *name __unused, const char *s) 511 { 512 escape_t *escape; 513 void **p; 514 515 p = (void **)context; 516 escape = (escape_t *)p[0]; 517 if (escape->ch != '\0') 518 return (EINVAL); 519 escape->ch = *s++; 520 if (escape->ch == ESCAPE_CHAR || *s != '\0') 521 return (EINVAL); 522 523 return (0); 524 } 525 526 static int 527 _citrus_HZ_parse_graphic(void *context, const char *name, const char *s) 528 { 529 _HZEncodingInfo *ei; 530 escape_t *escape; 531 graphic_t *graphic; 532 void **p; 533 534 p = (void **)context; 535 escape = (escape_t *)p[0]; 536 ei = (_HZEncodingInfo *)p[1]; 537 graphic = calloc(1, sizeof(*graphic)); 538 if (graphic == NULL) 539 return (ENOMEM); 540 if (strcmp("GL", name) == 0) { 541 if (GL(escape) != NULL) 542 goto release; 543 GL(escape) = graphic; 544 } else if (strcmp("GR", name) == 0) { 545 if (GR(escape) != NULL) 546 goto release; 547 GR(escape) = graphic; 548 } else { 549 release: 550 free(graphic); 551 return (EINVAL); 552 } 553 graphic->escape = escape; 554 if (_bcs_strncasecmp("ASCII", s, 5) == 0) { 555 if (s[5] != '\0') 556 return (EINVAL); 557 graphic->charset = ASCII; 558 graphic->length = 1; 559 ei->ascii = graphic; 560 return (0); 561 } else if (_bcs_strncasecmp("GB2312", s, 6) == 0) { 562 if (s[6] != '\0') 563 return (EINVAL); 564 graphic->charset = GB2312; 565 graphic->length = 2; 566 ei->gb2312 = graphic; 567 return (0); 568 } else if (strncmp("94*", s, 3) == 0) 569 graphic->charset = CS94; 570 else if (strncmp("96*", s, 3) == 0) 571 graphic->charset = CS96; 572 else 573 return (EINVAL); 574 s += 3; 575 switch(*s) { 576 case '1': case '2': case '3': 577 graphic->length = (size_t)(*s - '0'); 578 if (*++s == '\0') 579 break; 580 /*FALLTHROUGH*/ 581 default: 582 return (EINVAL); 583 } 584 return (0); 585 } 586 587 static const _citrus_prop_hint_t escape_hints[] = { 588 _CITRUS_PROP_HINT_STR("CH", &_citrus_HZ_parse_char), 589 _CITRUS_PROP_HINT_STR("GL", &_citrus_HZ_parse_graphic), 590 _CITRUS_PROP_HINT_STR("GR", &_citrus_HZ_parse_graphic), 591 _CITRUS_PROP_HINT_END 592 }; 593 594 static int 595 _citrus_HZ_parse_escape(void *context, const char *name, const char *s) 596 { 597 _HZEncodingInfo *ei; 598 escape_t *escape; 599 void *p[2]; 600 601 ei = (_HZEncodingInfo *)context; 602 escape = calloc(1, sizeof(*escape)); 603 if (escape == NULL) 604 return (EINVAL); 605 if (strcmp("0", name) == 0) { 606 escape->set = E0SET(ei); 607 TAILQ_INSERT_TAIL(E0SET(ei), escape, entry); 608 } else if (strcmp("1", name) == 0) { 609 escape->set = E1SET(ei); 610 TAILQ_INSERT_TAIL(E1SET(ei), escape, entry); 611 } else { 612 free(escape); 613 return (EINVAL); 614 } 615 p[0] = (void *)escape; 616 p[1] = (void *)ei; 617 return (_citrus_prop_parse_variable( 618 escape_hints, (void *)&p[0], s, strlen(s))); 619 } 620 621 static const _citrus_prop_hint_t root_hints[] = { 622 _CITRUS_PROP_HINT_STR("0", &_citrus_HZ_parse_escape), 623 _CITRUS_PROP_HINT_STR("1", &_citrus_HZ_parse_escape), 624 _CITRUS_PROP_HINT_END 625 }; 626 627 static int 628 _citrus_HZ_encoding_module_init(_HZEncodingInfo * __restrict ei, 629 const void * __restrict var, size_t lenvar) 630 { 631 int errnum; 632 633 memset(ei, 0, sizeof(*ei)); 634 TAILQ_INIT(E0SET(ei)); 635 TAILQ_INIT(E1SET(ei)); 636 errnum = _citrus_prop_parse_variable( 637 root_hints, (void *)ei, var, lenvar); 638 if (errnum != 0) 639 _citrus_HZ_encoding_module_uninit(ei); 640 return (errnum); 641 } 642 643 /* ---------------------------------------------------------------------- 644 * public interface for stdenc 645 */ 646 647 _CITRUS_STDENC_DECLS(HZ); 648 _CITRUS_STDENC_DEF_OPS(HZ); 649 650 #include "citrus_stdenc_template.h" 651