1 /* $NetBSD: citrus_iso2022.c,v 1.20 2010/12/07 22:01:45 joerg Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c)1999, 2002 Citrus Project, 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $ 31 */ 32 33 #include <sys/types.h> 34 35 #include <assert.h> 36 #include <errno.h> 37 #include <limits.h> 38 #include <stdbool.h> 39 #include <stddef.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <wchar.h> 44 45 #include "citrus_namespace.h" 46 #include "citrus_types.h" 47 #include "citrus_module.h" 48 #include "citrus_stdenc.h" 49 #include "citrus_iso2022.h" 50 51 52 /* ---------------------------------------------------------------------- 53 * private stuffs used by templates 54 */ 55 56 57 /* 58 * wchar_t mappings: 59 * ASCII (ESC ( B) 00000000 00000000 00000000 0xxxxxxx 60 * iso-8859-1 (ESC , A) 00000000 00000000 00000000 1xxxxxxx 61 * 94 charset (ESC ( F) 0fffffff 00000000 00000000 0xxxxxxx 62 * 94 charset (ESC ( M F) 0fffffff 1mmmmmmm 00000000 0xxxxxxx 63 * 96 charset (ESC , F) 0fffffff 00000000 00000000 1xxxxxxx 64 * 96 charset (ESC , M F) 0fffffff 1mmmmmmm 00000000 1xxxxxxx 65 * 94x94 charset (ESC $ ( F) 0fffffff 00000000 0xxxxxxx 0xxxxxxx 66 * 96x96 charset (ESC $ , F) 0fffffff 00000000 0xxxxxxx 1xxxxxxx 67 * 94x94 charset (ESC & V ESC $ ( F) 68 * 0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx 69 * 94x94x94 charset (ESC $ ( F) 0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx 70 * 96x96x96 charset (ESC $ , F) 0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx 71 * reserved for UCS4 co-existence (UCS4 is 31bit encoding thanks to mohta bit) 72 * 1xxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx 73 */ 74 75 #define CS94 (0U) 76 #define CS96 (1U) 77 #define CS94MULTI (2U) 78 #define CS96MULTI (3U) 79 80 typedef struct { 81 unsigned char type; 82 unsigned char final; 83 unsigned char interm; 84 unsigned char vers; 85 } _ISO2022Charset; 86 87 static const _ISO2022Charset ascii = { CS94, 'B', '\0', '\0' }; 88 static const _ISO2022Charset iso88591 = { CS96, 'A', '\0', '\0' }; 89 90 typedef struct { 91 _ISO2022Charset g[4]; 92 /* need 3 bits to hold -1, 0, ..., 3 */ 93 int gl:3, 94 gr:3, 95 singlegl:3, 96 singlegr:3; 97 char ch[7]; /* longest escape sequence (ESC & V ESC $ ( F) */ 98 size_t chlen; 99 int flags; 100 #define _ISO2022STATE_FLAG_INITIALIZED 1 101 } _ISO2022State; 102 103 typedef struct { 104 _ISO2022Charset *recommend[4]; 105 size_t recommendsize[4]; 106 _ISO2022Charset initg[4]; 107 int maxcharset; 108 int flags; 109 #define F_8BIT 0x0001 110 #define F_NOOLD 0x0002 111 #define F_SI 0x0010 /*0F*/ 112 #define F_SO 0x0020 /*0E*/ 113 #define F_LS0 0x0010 /*0F*/ 114 #define F_LS1 0x0020 /*0E*/ 115 #define F_LS2 0x0040 /*ESC n*/ 116 #define F_LS3 0x0080 /*ESC o*/ 117 #define F_LS1R 0x0100 /*ESC ~*/ 118 #define F_LS2R 0x0200 /*ESC }*/ 119 #define F_LS3R 0x0400 /*ESC |*/ 120 #define F_SS2 0x0800 /*ESC N*/ 121 #define F_SS3 0x1000 /*ESC O*/ 122 #define F_SS2R 0x2000 /*8E*/ 123 #define F_SS3R 0x4000 /*8F*/ 124 } _ISO2022EncodingInfo; 125 126 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 127 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 128 129 #define _FUNCNAME(m) _citrus_ISO2022_##m 130 #define _ENCODING_INFO _ISO2022EncodingInfo 131 #define _ENCODING_STATE _ISO2022State 132 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 133 #define _ENCODING_IS_STATE_DEPENDENT 1 134 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) \ 135 (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED)) 136 137 138 #define _ISO2022INVALID (wchar_t)-1 139 140 static __inline bool isc0(__uint8_t x) 141 { 142 143 return ((x & 0x1f) == x); 144 } 145 146 static __inline bool isc1(__uint8_t x) 147 { 148 149 return (0x80 <= x && x <= 0x9f); 150 } 151 152 static __inline bool iscntl(__uint8_t x) 153 { 154 155 return (isc0(x) || isc1(x) || x == 0x7f); 156 } 157 158 static __inline bool is94(__uint8_t x) 159 { 160 161 return (0x21 <= x && x <= 0x7e); 162 } 163 164 static __inline bool is96(__uint8_t x) 165 { 166 167 return (0x20 <= x && x <= 0x7f); 168 } 169 170 static __inline bool isecma(__uint8_t x) 171 { 172 173 return (0x30 <= x && x <= 0x7f); 174 } 175 176 static __inline bool isinterm(__uint8_t x) 177 { 178 179 return (0x20 <= x && x <= 0x2f); 180 } 181 182 static __inline bool isthree(__uint8_t x) 183 { 184 185 return (0x60 <= x && x <= 0x6f); 186 } 187 188 static __inline int 189 getcs(const char * __restrict p, _ISO2022Charset * __restrict cs) 190 { 191 192 if (!strncmp(p, "94$", 3) && p[3] && !p[4]) { 193 cs->final = (unsigned char)(p[3] & 0xff); 194 cs->interm = '\0'; 195 cs->vers = '\0'; 196 cs->type = CS94MULTI; 197 } else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) { 198 cs->final = (unsigned char)(p[3] & 0xff); 199 cs->interm = '\0'; 200 cs->vers = '\0'; 201 cs->type = CS96MULTI; 202 } else if (!strncmp(p, "94", 2) && p[2] && !p[3]) { 203 cs->final = (unsigned char)(p[2] & 0xff); 204 cs->interm = '\0'; 205 cs->vers = '\0'; 206 cs->type = CS94; 207 } else if (!strncmp(p, "96", 2) && p[2] && !p[3]) { 208 cs->final = (unsigned char )(p[2] & 0xff); 209 cs->interm = '\0'; 210 cs->vers = '\0'; 211 cs->type = CS96; 212 } else 213 return (1); 214 215 return (0); 216 } 217 218 219 #define _NOTMATCH 0 220 #define _MATCH 1 221 #define _PARSEFAIL 2 222 223 static __inline int 224 get_recommend(_ISO2022EncodingInfo * __restrict ei, 225 const char * __restrict token) 226 { 227 _ISO2022Charset cs, *p; 228 int i; 229 230 if (!strchr("0123", token[0]) || token[1] != '=') 231 return (_NOTMATCH); 232 233 if (getcs(&token[2], &cs) == 0) 234 ; 235 else if (!strcmp(&token[2], "94")) { 236 cs.final = (unsigned char)(token[4]); 237 cs.interm = '\0'; 238 cs.vers = '\0'; 239 cs.type = CS94; 240 } else if (!strcmp(&token[2], "96")) { 241 cs.final = (unsigned char)(token[4]); 242 cs.interm = '\0'; 243 cs.vers = '\0'; 244 cs.type = CS96; 245 } else if (!strcmp(&token[2], "94$")) { 246 cs.final = (unsigned char)(token[5]); 247 cs.interm = '\0'; 248 cs.vers = '\0'; 249 cs.type = CS94MULTI; 250 } else if (!strcmp(&token[2], "96$")) { 251 cs.final = (unsigned char)(token[5]); 252 cs.interm = '\0'; 253 cs.vers = '\0'; 254 cs.type = CS96MULTI; 255 } else 256 return (_PARSEFAIL); 257 258 i = token[0] - '0'; 259 if (!ei->recommend[i]) 260 ei->recommend[i] = malloc(sizeof(_ISO2022Charset)); 261 else { 262 p = reallocarray(ei->recommend[i], ei->recommendsize[i] + 1, 263 sizeof(_ISO2022Charset)); 264 if (!p) 265 return (_PARSEFAIL); 266 ei->recommend[i] = p; 267 } 268 if (!ei->recommend[i]) 269 return (_PARSEFAIL); 270 ei->recommendsize[i]++; 271 272 (ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final; 273 (ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm; 274 (ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers; 275 (ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type; 276 277 return (_MATCH); 278 } 279 280 static __inline int 281 get_initg(_ISO2022EncodingInfo * __restrict ei, 282 const char * __restrict token) 283 { 284 _ISO2022Charset cs; 285 286 if (strncmp("INIT", &token[0], 4) || 287 !strchr("0123", token[4]) || 288 token[5] != '=') 289 return (_NOTMATCH); 290 291 if (getcs(&token[6], &cs) != 0) 292 return (_PARSEFAIL); 293 294 ei->initg[token[4] - '0'].type = cs.type; 295 ei->initg[token[4] - '0'].final = cs.final; 296 ei->initg[token[4] - '0'].interm = cs.interm; 297 ei->initg[token[4] - '0'].vers = cs.vers; 298 299 return (_MATCH); 300 } 301 302 static __inline int 303 get_max(_ISO2022EncodingInfo * __restrict ei, 304 const char * __restrict token) 305 { 306 if (!strcmp(token, "MAX1")) 307 ei->maxcharset = 1; 308 else if (!strcmp(token, "MAX2")) 309 ei->maxcharset = 2; 310 else if (!strcmp(token, "MAX3")) 311 ei->maxcharset = 3; 312 else 313 return (_NOTMATCH); 314 315 return (_MATCH); 316 } 317 318 319 static __inline int 320 get_flags(_ISO2022EncodingInfo * __restrict ei, 321 const char * __restrict token) 322 { 323 static struct { 324 const char *tag; 325 int flag; 326 } const tags[] = { 327 { "DUMMY", 0 }, 328 { "8BIT", F_8BIT }, 329 { "NOOLD", F_NOOLD }, 330 { "SI", F_SI }, 331 { "SO", F_SO }, 332 { "LS0", F_LS0 }, 333 { "LS1", F_LS1 }, 334 { "LS2", F_LS2 }, 335 { "LS3", F_LS3 }, 336 { "LS1R", F_LS1R }, 337 { "LS2R", F_LS2R }, 338 { "LS3R", F_LS3R }, 339 { "SS2", F_SS2 }, 340 { "SS3", F_SS3 }, 341 { "SS2R", F_SS2R }, 342 { "SS3R", F_SS3R }, 343 { NULL, 0 } 344 }; 345 int i; 346 347 for (i = 0; tags[i].tag; i++) 348 if (!strcmp(token, tags[i].tag)) { 349 ei->flags |= tags[i].flag; 350 return (_MATCH); 351 } 352 353 return (_NOTMATCH); 354 } 355 356 357 static __inline int 358 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei, 359 const void * __restrict var, size_t lenvar __unused) 360 { 361 char const *e, *v; 362 char buf[20]; 363 size_t len; 364 int i, ret; 365 366 /* 367 * parse VARIABLE section. 368 */ 369 370 if (!var) 371 return (EFTYPE); 372 373 v = (const char *) var; 374 375 /* initialize structure */ 376 ei->maxcharset = 0; 377 for (i = 0; i < 4; i++) { 378 ei->recommend[i] = NULL; 379 ei->recommendsize[i] = 0; 380 } 381 ei->flags = 0; 382 383 while (*v) { 384 while (*v == ' ' || *v == '\t') 385 ++v; 386 387 /* find the token */ 388 e = v; 389 while (*e && *e != ' ' && *e != '\t') 390 ++e; 391 392 len = e - v; 393 if (len == 0) 394 break; 395 if (len >= sizeof(buf)) 396 goto parsefail; 397 snprintf(buf, sizeof(buf), "%.*s", (int)len, v); 398 399 if ((ret = get_recommend(ei, buf)) != _NOTMATCH) 400 ; 401 else if ((ret = get_initg(ei, buf)) != _NOTMATCH) 402 ; 403 else if ((ret = get_max(ei, buf)) != _NOTMATCH) 404 ; 405 else if ((ret = get_flags(ei, buf)) != _NOTMATCH) 406 ; 407 else 408 ret = _PARSEFAIL; 409 if (ret == _PARSEFAIL) 410 goto parsefail; 411 v = e; 412 413 } 414 415 return (0); 416 417 parsefail: 418 free(ei->recommend[0]); 419 free(ei->recommend[1]); 420 free(ei->recommend[2]); 421 free(ei->recommend[3]); 422 423 return (EFTYPE); 424 } 425 426 static __inline void 427 /*ARGSUSED*/ 428 _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei, 429 _ISO2022State * __restrict s) 430 { 431 int i; 432 433 memset(s, 0, sizeof(*s)); 434 s->gl = 0; 435 s->gr = (ei->flags & F_8BIT) ? 1 : -1; 436 437 for (i = 0; i < 4; i++) 438 if (ei->initg[i].final) { 439 s->g[i].type = ei->initg[i].type; 440 s->g[i].final = ei->initg[i].final; 441 s->g[i].interm = ei->initg[i].interm; 442 } 443 s->singlegl = s->singlegr = -1; 444 s->flags |= _ISO2022STATE_FLAG_INITIALIZED; 445 } 446 447 #if 0 448 static __inline void 449 /*ARGSUSED*/ 450 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei __unused, 451 void * __restrict pspriv, const _ISO2022State * __restrict s) 452 { 453 454 memcpy(pspriv, (const void *)s, sizeof(*s)); 455 } 456 457 static __inline void 458 /*ARGSUSED*/ 459 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei __unused, 460 _ISO2022State * __restrict s, const void * __restrict pspriv) 461 { 462 463 memcpy((void *)s, pspriv, sizeof(*s)); 464 } 465 #endif 466 467 static int 468 /*ARGSUSED*/ 469 _citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei, 470 const void * __restrict var, size_t lenvar) 471 { 472 473 return (_citrus_ISO2022_parse_variable(ei, var, lenvar)); 474 } 475 476 static void 477 /*ARGSUSED*/ 478 _citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei __unused) 479 { 480 481 } 482 483 #define ESC '\033' 484 #define ECMA -1 485 #define INTERM -2 486 #define OECMA -3 487 static const struct seqtable { 488 int type; 489 int csoff; 490 int finaloff; 491 int intermoff; 492 int versoff; 493 int len; 494 int chars[10]; 495 } seqtable[] = { 496 /* G0 94MULTI special */ 497 { CS94MULTI, -1, 2, -1, -1, 3, { ESC, '$', OECMA }, }, 498 /* G0 94MULTI special with version identification */ 499 { CS94MULTI, -1, 5, -1, 2, 6, { ESC, '&', ECMA, ESC, '$', OECMA }, }, 500 /* G? 94 */ 501 { CS94, 1, 2, -1, -1, 3, { ESC, CS94, ECMA, }, }, 502 /* G? 94 with 2nd intermediate char */ 503 { CS94, 1, 3, 2, -1, 4, { ESC, CS94, INTERM, ECMA, }, }, 504 /* G? 96 */ 505 { CS96, 1, 2, -1, -1, 3, { ESC, CS96, ECMA, }, }, 506 /* G? 96 with 2nd intermediate char */ 507 { CS96, 1, 3, 2, -1, 4, { ESC, CS96, INTERM, ECMA, }, }, 508 /* G? 94MULTI */ 509 { CS94MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS94, ECMA, }, }, 510 /* G? 96MULTI */ 511 { CS96MULTI, 2, 3, -1, -1, 4, { ESC, '$', CS96, ECMA, }, }, 512 /* G? 94MULTI with version specification */ 513 { CS94MULTI, 5, 6, -1, 2, 7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, }, 514 /* LS2/3 */ 515 { -1, -1, -1, -1, -1, 2, { ESC, 'n', }, }, 516 { -1, -1, -1, -1, -1, 2, { ESC, 'o', }, }, 517 /* LS1/2/3R */ 518 { -1, -1, -1, -1, -1, 2, { ESC, '~', }, }, 519 { -1, -1, -1, -1, -1, 2, { ESC, /*{*/ '}', }, }, 520 { -1, -1, -1, -1, -1, 2, { ESC, '|', }, }, 521 /* SS2/3 */ 522 { -1, -1, -1, -1, -1, 2, { ESC, 'N', }, }, 523 { -1, -1, -1, -1, -1, 2, { ESC, 'O', }, }, 524 /* end of records */ 525 // { 0, } 526 { 0, 0, 0, 0, 0, 0, { ESC, 0, }, } 527 }; 528 529 static int 530 seqmatch(const char * __restrict s, size_t n, 531 const struct seqtable * __restrict sp) 532 { 533 const int *p; 534 535 p = sp->chars; 536 while ((size_t)(p - sp->chars) < n && p - sp->chars < sp->len) { 537 switch (*p) { 538 case ECMA: 539 if (!isecma(*s)) 540 goto terminate; 541 break; 542 case OECMA: 543 if (*s && strchr("@AB", *s)) 544 break; 545 else 546 goto terminate; 547 case INTERM: 548 if (!isinterm(*s)) 549 goto terminate; 550 break; 551 case CS94: 552 if (*s && strchr("()*+", *s)) 553 break; 554 else 555 goto terminate; 556 case CS96: 557 if (*s && strchr(",-./", *s)) 558 break; 559 else 560 goto terminate; 561 default: 562 if (*s != *p) 563 goto terminate; 564 break; 565 } 566 567 p++; 568 s++; 569 } 570 571 terminate: 572 return (p - sp->chars); 573 } 574 575 static wchar_t 576 _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei __unused, 577 char * __restrict string, size_t n, char ** __restrict result, 578 _ISO2022State * __restrict psenc) 579 { 580 const struct seqtable *sp; 581 wchar_t wchar = 0; 582 int i, cur, nmatch; 583 584 while (1) { 585 /* SI/SO */ 586 if (1 <= n && string[0] == '\017') { 587 psenc->gl = 0; 588 string++; 589 n--; 590 continue; 591 } 592 if (1 <= n && string[0] == '\016') { 593 psenc->gl = 1; 594 string++; 595 n--; 596 continue; 597 } 598 599 /* SS2/3R */ 600 if (1 <= n && string[0] && strchr("\217\216", string[0])) { 601 psenc->singlegl = psenc->singlegr = 602 (string[0] - '\216') + 2; 603 string++; 604 n--; 605 continue; 606 } 607 608 /* eat the letter if this is not ESC */ 609 if (1 <= n && string[0] != '\033') 610 break; 611 612 /* look for a perfect match from escape sequences */ 613 for (sp = &seqtable[0]; sp->len; sp++) { 614 nmatch = seqmatch(string, n, sp); 615 if (sp->len == nmatch && n >= (size_t)(sp->len)) 616 break; 617 } 618 619 if (!sp->len) 620 goto notseq; 621 622 if (sp->type != -1) { 623 if (sp->csoff == -1) 624 i = 0; 625 else { 626 switch (sp->type) { 627 case CS94: 628 case CS94MULTI: 629 i = string[sp->csoff] - '('; 630 break; 631 case CS96: 632 case CS96MULTI: 633 i = string[sp->csoff] - ','; 634 break; 635 default: 636 return (_ISO2022INVALID); 637 } 638 } 639 psenc->g[i].type = sp->type; 640 psenc->g[i].final = '\0'; 641 psenc->g[i].interm = '\0'; 642 psenc->g[i].vers = '\0'; 643 /* sp->finaloff must not be -1 */ 644 if (sp->finaloff != -1) 645 psenc->g[i].final = string[sp->finaloff]; 646 if (sp->intermoff != -1) 647 psenc->g[i].interm = string[sp->intermoff]; 648 if (sp->versoff != -1) 649 psenc->g[i].vers = string[sp->versoff]; 650 651 string += sp->len; 652 n -= sp->len; 653 continue; 654 } 655 656 /* LS2/3 */ 657 if (2 <= n && string[0] == '\033' && 658 string[1] && strchr("no", string[1])) { 659 psenc->gl = string[1] - 'n' + 2; 660 string += 2; 661 n -= 2; 662 continue; 663 } 664 665 /* LS1/2/3R */ 666 /* XXX: { for vi showmatch */ 667 if (2 <= n && string[0] == '\033' && 668 string[1] && strchr("~}|", string[1])) { 669 psenc->gr = 3 - (string[1] - '|'); 670 string += 2; 671 n -= 2; 672 continue; 673 } 674 675 /* SS2/3 */ 676 if (2 <= n && string[0] == '\033' && string[1] && 677 strchr("NO", string[1])) { 678 psenc->singlegl = (string[1] - 'N') + 2; 679 string += 2; 680 n -= 2; 681 continue; 682 } 683 684 notseq: 685 /* 686 * if we've got an unknown escape sequence, eat the ESC at the 687 * head. otherwise, wait till full escape sequence comes. 688 */ 689 for (sp = &seqtable[0]; sp->len; sp++) { 690 nmatch = seqmatch(string, n, sp); 691 if (!nmatch) 692 continue; 693 694 /* 695 * if we are in the middle of escape sequence, 696 * we still need to wait for more characters to come 697 */ 698 if (n < (size_t)(sp->len)) { 699 if ((size_t)(nmatch) == n) { 700 if (result) 701 *result = string; 702 return (_ISO2022INVALID); 703 } 704 } else { 705 if (nmatch == sp->len) { 706 /* this case should not happen */ 707 goto eat; 708 } 709 } 710 } 711 712 break; 713 } 714 715 eat: 716 /* no letter to eat */ 717 if (n < 1) { 718 if (result) 719 *result = string; 720 return (_ISO2022INVALID); 721 } 722 723 /* normal chars. always eat C0/C1 as is. */ 724 if (iscntl(*string & 0xff)) 725 cur = -1; 726 else if (*string & 0x80) 727 cur = (psenc->singlegr == -1) ? psenc->gr : psenc->singlegr; 728 else 729 cur = (psenc->singlegl == -1) ? psenc->gl : psenc->singlegl; 730 731 if (cur == -1) { 732 asis: 733 wchar = *string++ & 0xff; 734 if (result) 735 *result = string; 736 /* reset single shift state */ 737 psenc->singlegr = psenc->singlegl = -1; 738 return (wchar); 739 } 740 741 /* length error check */ 742 switch (psenc->g[cur].type) { 743 case CS94MULTI: 744 case CS96MULTI: 745 if (!isthree(psenc->g[cur].final)) { 746 if (2 <= n && 747 (string[0] & 0x80) == (string[1] & 0x80)) 748 break; 749 } else { 750 if (3 <= n && 751 (string[0] & 0x80) == (string[1] & 0x80) && 752 (string[0] & 0x80) == (string[2] & 0x80)) 753 break; 754 } 755 756 /* we still need to wait for more characters to come */ 757 if (result) 758 *result = string; 759 return (_ISO2022INVALID); 760 761 case CS94: 762 case CS96: 763 if (1 <= n) 764 break; 765 766 /* we still need to wait for more characters to come */ 767 if (result) 768 *result = string; 769 return (_ISO2022INVALID); 770 } 771 772 /* range check */ 773 switch (psenc->g[cur].type) { 774 case CS94: 775 if (!(is94(string[0] & 0x7f))) 776 goto asis; 777 break; 778 case CS96: 779 if (!(is96(string[0] & 0x7f))) 780 goto asis; 781 break; 782 case CS94MULTI: 783 if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f))) 784 goto asis; 785 break; 786 case CS96MULTI: 787 if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f))) 788 goto asis; 789 break; 790 } 791 792 /* extract the character. */ 793 switch (psenc->g[cur].type) { 794 case CS94: 795 /* special case for ASCII. */ 796 if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) { 797 wchar = *string++; 798 wchar &= 0x7f; 799 break; 800 } 801 wchar = psenc->g[cur].final; 802 wchar = (wchar << 8); 803 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 804 wchar = (wchar << 8); 805 wchar = (wchar << 8) | (*string++ & 0x7f); 806 break; 807 case CS96: 808 /* special case for ISO-8859-1. */ 809 if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) { 810 wchar = *string++; 811 wchar &= 0x7f; 812 wchar |= 0x80; 813 break; 814 } 815 wchar = psenc->g[cur].final; 816 wchar = (wchar << 8); 817 wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0); 818 wchar = (wchar << 8); 819 wchar = (wchar << 8) | (*string++ & 0x7f); 820 wchar |= 0x80; 821 break; 822 case CS94MULTI: 823 case CS96MULTI: 824 wchar = psenc->g[cur].final; 825 wchar = (wchar << 8); 826 if (isthree(psenc->g[cur].final)) 827 wchar |= (*string++ & 0x7f); 828 wchar = (wchar << 8) | (*string++ & 0x7f); 829 wchar = (wchar << 8) | (*string++ & 0x7f); 830 if (psenc->g[cur].type == CS96MULTI) 831 wchar |= 0x80; 832 break; 833 } 834 835 if (result) 836 *result = string; 837 /* reset single shift state */ 838 psenc->singlegr = psenc->singlegl = -1; 839 return (wchar); 840 } 841 842 843 844 static int 845 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei, 846 wchar_t * __restrict pwc, char ** __restrict s, 847 size_t n, _ISO2022State * __restrict psenc, size_t * __restrict nresult) 848 { 849 char *p, *result, *s0; 850 wchar_t wchar; 851 int c, chlenbak; 852 853 if (*s == NULL) { 854 _citrus_ISO2022_init_state(ei, psenc); 855 *nresult = _ENCODING_IS_STATE_DEPENDENT; 856 return (0); 857 } 858 s0 = *s; 859 c = 0; 860 chlenbak = psenc->chlen; 861 862 /* 863 * if we have something in buffer, use that. 864 * otherwise, skip here 865 */ 866 if (psenc->chlen > sizeof(psenc->ch)) { 867 /* illgeal state */ 868 _citrus_ISO2022_init_state(ei, psenc); 869 goto encoding_error; 870 } 871 if (psenc->chlen == 0) 872 goto emptybuf; 873 874 /* buffer is not empty */ 875 p = psenc->ch; 876 while (psenc->chlen < sizeof(psenc->ch)) { 877 if (n > 0) { 878 psenc->ch[psenc->chlen++] = *s0++; 879 n--; 880 } 881 882 wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch), 883 &result, psenc); 884 c += result - p; 885 if (wchar != _ISO2022INVALID) { 886 if (psenc->chlen > (size_t)c) 887 memmove(psenc->ch, result, psenc->chlen - c); 888 if (psenc->chlen < (size_t)c) 889 psenc->chlen = 0; 890 else 891 psenc->chlen -= c; 892 goto output; 893 } 894 895 if (n == 0) { 896 if ((size_t)(result - p) == psenc->chlen) 897 /* complete shift sequence. */ 898 psenc->chlen = 0; 899 goto restart; 900 } 901 902 p = result; 903 } 904 905 /* escape sequence too long? */ 906 goto encoding_error; 907 908 emptybuf: 909 wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc); 910 if (wchar != _ISO2022INVALID) { 911 c += result - s0; 912 psenc->chlen = 0; 913 s0 = result; 914 goto output; 915 } 916 if (result > s0) { 917 c += (result - s0); 918 n -= (result - s0); 919 s0 = result; 920 if (n > 0) 921 goto emptybuf; 922 /* complete shift sequence. */ 923 goto restart; 924 } 925 n += c; 926 if (n < sizeof(psenc->ch)) { 927 memcpy(psenc->ch, s0 - c, n); 928 psenc->chlen = n; 929 s0 = result; 930 goto restart; 931 } 932 933 /* escape sequence too long? */ 934 935 encoding_error: 936 psenc->chlen = 0; 937 *nresult = (size_t)-1; 938 return (EILSEQ); 939 940 output: 941 *s = s0; 942 if (pwc) 943 *pwc = wchar; 944 *nresult = wchar ? c - chlenbak : 0; 945 return (0); 946 947 restart: 948 *s = s0; 949 *nresult = (size_t)-2; 950 951 return (0); 952 } 953 954 static int 955 recommendation(_ISO2022EncodingInfo * __restrict ei, 956 _ISO2022Charset * __restrict cs) 957 { 958 _ISO2022Charset *recommend; 959 size_t j; 960 int i; 961 962 /* first, try a exact match. */ 963 for (i = 0; i < 4; i++) { 964 recommend = ei->recommend[i]; 965 for (j = 0; j < ei->recommendsize[i]; j++) { 966 if (cs->type != recommend[j].type) 967 continue; 968 if (cs->final != recommend[j].final) 969 continue; 970 if (cs->interm != recommend[j].interm) 971 continue; 972 973 return (i); 974 } 975 } 976 977 /* then, try a wildcard match over final char. */ 978 for (i = 0; i < 4; i++) { 979 recommend = ei->recommend[i]; 980 for (j = 0; j < ei->recommendsize[i]; j++) { 981 if (cs->type != recommend[j].type) 982 continue; 983 if (cs->final && (cs->final != recommend[j].final)) 984 continue; 985 if (cs->interm && (cs->interm != recommend[j].interm)) 986 continue; 987 988 return (i); 989 } 990 } 991 992 /* there's no recommendation. make a guess. */ 993 if (ei->maxcharset == 0) { 994 return (0); 995 } else { 996 switch (cs->type) { 997 case CS94: 998 case CS94MULTI: 999 return (0); 1000 case CS96: 1001 case CS96MULTI: 1002 return (1); 1003 } 1004 } 1005 return (0); 1006 } 1007 1008 static int 1009 _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc, 1010 char * __restrict string, size_t n, char ** __restrict result, 1011 _ISO2022State * __restrict psenc, size_t * __restrict nresult) 1012 { 1013 _ISO2022Charset cs; 1014 char *p; 1015 char tmp[MB_LEN_MAX]; 1016 size_t len; 1017 int bit8, i = 0, target; 1018 unsigned char mask; 1019 1020 if (isc0(wc & 0xff)) { 1021 /* go back to INIT0 or ASCII on control chars */ 1022 cs = ei->initg[0].final ? ei->initg[0] : ascii; 1023 } else if (isc1(wc & 0xff)) { 1024 /* go back to INIT1 or ISO-8859-1 on control chars */ 1025 cs = ei->initg[1].final ? ei->initg[1] : iso88591; 1026 } else if (!(wc & ~0xff)) { 1027 if (wc & 0x80) { 1028 /* special treatment for ISO-8859-1 */ 1029 cs = iso88591; 1030 } else { 1031 /* special treatment for ASCII */ 1032 cs = ascii; 1033 } 1034 } else { 1035 cs.final = (wc >> 24) & 0x7f; 1036 if ((wc >> 16) & 0x80) 1037 cs.interm = (wc >> 16) & 0x7f; 1038 else 1039 cs.interm = '\0'; 1040 if (wc & 0x80) 1041 cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96; 1042 else 1043 cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94; 1044 } 1045 target = recommendation(ei, &cs); 1046 p = tmp; 1047 bit8 = ei->flags & F_8BIT; 1048 1049 /* designate the charset onto the target plane(G0/1/2/3). */ 1050 if (psenc->g[target].type == cs.type && 1051 psenc->g[target].final == cs.final && 1052 psenc->g[target].interm == cs.interm) 1053 goto planeok; 1054 1055 *p++ = '\033'; 1056 if (cs.type == CS94MULTI || cs.type == CS96MULTI) 1057 *p++ = '$'; 1058 if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final) && 1059 !cs.interm && !(ei->flags & F_NOOLD)) 1060 ; 1061 else if (cs.type == CS94 || cs.type == CS94MULTI) 1062 *p++ = "()*+"[target]; 1063 else 1064 *p++ = ",-./"[target]; 1065 if (cs.interm) 1066 *p++ = cs.interm; 1067 *p++ = cs.final; 1068 1069 psenc->g[target].type = cs.type; 1070 psenc->g[target].final = cs.final; 1071 psenc->g[target].interm = cs.interm; 1072 1073 planeok: 1074 /* invoke the plane onto GL or GR. */ 1075 if (psenc->gl == target) 1076 goto sideok; 1077 if (bit8 && psenc->gr == target) 1078 goto sideok; 1079 1080 if (target == 0 && (ei->flags & F_LS0)) { 1081 *p++ = '\017'; 1082 psenc->gl = 0; 1083 } else if (target == 1 && (ei->flags & F_LS1)) { 1084 *p++ = '\016'; 1085 psenc->gl = 1; 1086 } else if (target == 2 && (ei->flags & F_LS2)) { 1087 *p++ = '\033'; 1088 *p++ = 'n'; 1089 psenc->gl = 2; 1090 } else if (target == 3 && (ei->flags & F_LS3)) { 1091 *p++ = '\033'; 1092 *p++ = 'o'; 1093 psenc->gl = 3; 1094 } else if (bit8 && target == 1 && (ei->flags & F_LS1R)) { 1095 *p++ = '\033'; 1096 *p++ = '~'; 1097 psenc->gr = 1; 1098 } else if (bit8 && target == 2 && (ei->flags & F_LS2R)) { 1099 *p++ = '\033'; 1100 /*{*/ 1101 *p++ = '}'; 1102 psenc->gr = 2; 1103 } else if (bit8 && target == 3 && (ei->flags & F_LS3R)) { 1104 *p++ = '\033'; 1105 *p++ = '|'; 1106 psenc->gr = 3; 1107 } else if (target == 2 && (ei->flags & F_SS2)) { 1108 *p++ = '\033'; 1109 *p++ = 'N'; 1110 psenc->singlegl = 2; 1111 } else if (target == 3 && (ei->flags & F_SS3)) { 1112 *p++ = '\033'; 1113 *p++ = 'O'; 1114 psenc->singlegl = 3; 1115 } else if (bit8 && target == 2 && (ei->flags & F_SS2R)) { 1116 *p++ = '\216'; 1117 *p++ = 'N'; 1118 psenc->singlegl = psenc->singlegr = 2; 1119 } else if (bit8 && target == 3 && (ei->flags & F_SS3R)) { 1120 *p++ = '\217'; 1121 *p++ = 'O'; 1122 psenc->singlegl = psenc->singlegr = 3; 1123 } else 1124 goto ilseq; 1125 1126 sideok: 1127 if (psenc->singlegl == target) 1128 mask = 0x00; 1129 else if (psenc->singlegr == target) 1130 mask = 0x80; 1131 else if (psenc->gl == target) 1132 mask = 0x00; 1133 else if ((ei->flags & F_8BIT) && psenc->gr == target) 1134 mask = 0x80; 1135 else 1136 goto ilseq; 1137 1138 switch (cs.type) { 1139 case CS94: 1140 case CS96: 1141 i = 1; 1142 break; 1143 case CS94MULTI: 1144 case CS96MULTI: 1145 i = !iscntl(wc & 0xff) ? 1146 (isthree(cs.final) ? 3 : 2) : 1; 1147 break; 1148 } 1149 while (i-- > 0) 1150 *p++ = ((wc >> (i << 3)) & 0x7f) | mask; 1151 1152 /* reset single shift state */ 1153 psenc->singlegl = psenc->singlegr = -1; 1154 1155 len = (size_t)(p - tmp); 1156 if (n < len) { 1157 if (result) 1158 *result = (char *)0; 1159 *nresult = (size_t)-1; 1160 return (E2BIG); 1161 } 1162 if (result) 1163 *result = string + len; 1164 memcpy(string, tmp, len); 1165 *nresult = len; 1166 1167 return (0); 1168 1169 ilseq: 1170 *nresult = (size_t)-1; 1171 return (EILSEQ); 1172 } 1173 1174 static int 1175 _citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo * __restrict ei, 1176 char * __restrict s, size_t n, _ISO2022State * __restrict psenc, 1177 size_t * __restrict nresult) 1178 { 1179 char *result; 1180 char buf[MB_LEN_MAX]; 1181 size_t len; 1182 int ret; 1183 1184 /* XXX state will be modified after this operation... */ 1185 ret = _ISO2022_sputwchar(ei, L'\0', buf, sizeof(buf), &result, psenc, 1186 &len); 1187 if (ret) { 1188 *nresult = len; 1189 return (ret); 1190 } 1191 1192 if (sizeof(buf) < len || n < len-1) { 1193 /* XXX should recover state? */ 1194 *nresult = (size_t)-1; 1195 return (E2BIG); 1196 } 1197 1198 memcpy(s, buf, len - 1); 1199 *nresult = len - 1; 1200 return (0); 1201 } 1202 1203 static int 1204 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei, 1205 char * __restrict s, size_t n, wchar_t wc, 1206 _ISO2022State * __restrict psenc, size_t * __restrict nresult) 1207 { 1208 char *result; 1209 char buf[MB_LEN_MAX]; 1210 size_t len; 1211 int ret; 1212 1213 /* XXX state will be modified after this operation... */ 1214 ret = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc, 1215 &len); 1216 if (ret) { 1217 *nresult = len; 1218 return (ret); 1219 } 1220 1221 if (sizeof(buf) < len || n < len) { 1222 /* XXX should recover state? */ 1223 *nresult = (size_t)-1; 1224 return (E2BIG); 1225 } 1226 1227 memcpy(s, buf, len); 1228 *nresult = len; 1229 return (0); 1230 } 1231 1232 static __inline int 1233 /*ARGSUSED*/ 1234 _citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei __unused, 1235 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 1236 { 1237 wchar_t m, nm; 1238 1239 m = wc & 0x7FFF8080; 1240 nm = wc & 0x007F7F7F; 1241 if (m & 0x00800000) 1242 nm &= 0x00007F7F; 1243 else 1244 m &= 0x7F008080; 1245 if (nm & 0x007F0000) { 1246 /* ^3 mark */ 1247 m |= 0x007F0000; 1248 } else if (nm & 0x00007F00) { 1249 /* ^2 mark */ 1250 m |= 0x00007F00; 1251 } 1252 *csid = (_csid_t)m; 1253 *idx = (_index_t)nm; 1254 1255 return (0); 1256 } 1257 1258 static __inline int 1259 /*ARGSUSED*/ 1260 _citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei __unused, 1261 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 1262 { 1263 1264 *wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx; 1265 1266 return (0); 1267 } 1268 1269 static __inline int 1270 /*ARGSUSED*/ 1271 _citrus_ISO2022_stdenc_get_state_desc_generic(_ISO2022EncodingInfo * __restrict ei __unused, 1272 _ISO2022State * __restrict psenc, int * __restrict rstate) 1273 { 1274 1275 if (psenc->chlen == 0) { 1276 /* XXX: it should distinguish initial and stable. */ 1277 *rstate = _STDENC_SDGEN_STABLE; 1278 } else 1279 *rstate = (psenc->ch[0] == '\033') ? 1280 _STDENC_SDGEN_INCOMPLETE_SHIFT : 1281 _STDENC_SDGEN_INCOMPLETE_CHAR; 1282 return (0); 1283 } 1284 1285 /* ---------------------------------------------------------------------- 1286 * public interface for stdenc 1287 */ 1288 1289 _CITRUS_STDENC_DECLS(ISO2022); 1290 _CITRUS_STDENC_DEF_OPS(ISO2022); 1291 1292 #include "citrus_stdenc_template.h" 1293