1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, by Sun Microsystems, Inc. 24 * All rights reserved. 25 */ 26 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <errno.h> 30 #include <libintl.h> 31 32 33 #define MSB 0x80 /* most significant bit */ 34 #define MBYTE 0x8e /* multi-byte (4 byte character) */ 35 #define PMASK 0xa0 /* plane number mask */ 36 #define ONEBYTE 0xff /* right most byte */ 37 #define MSB_OFF 0x7f /* mask off MBS */ 38 39 #define SI 0x0f /* shift in */ 40 #define SO 0x0e /* shift out */ 41 #define ESC 0x1b /* escape */ 42 43 /* 44 * static const char plane_char[] = "0GH23456789:;<=>?"; 45 * static const char plane_char[] = "0GHIJKLMNOPQRSTUV"; 46 * #define GET_PLANEC(i) (plane_char[i]) 47 */ 48 49 #define NON_ID_CHAR '_' /* non-identified character */ 50 51 typedef struct _icv_state { 52 char keepc[4]; /* maximum # byte of CNS11643 code */ 53 short cstate; /* state machine id */ 54 int plane_no; /* plane number for Chinese character */ 55 int _errno; /* internal errno */ 56 } _iconv_st; 57 58 enum _CSTATE { C0, C1, C2, C3, C4, C5, C6, C7 }; 59 60 61 static int get_plane_no_by_iso(const char); 62 static int iso_to_cns(int, char[], char*, size_t); 63 64 #define LSG2 0x4e 65 #define LSG3 0x4f 66 67 68 typedef struct IOBuf { 69 char * myin; 70 char * myout; 71 size_t insize; 72 size_t outsize; 73 74 char mybuf[8]; 75 int bufc; 76 } IOBuf; 77 78 typedef struct Conversion { 79 int myplane; 80 } Conversion; 81 82 typedef struct GxCntl { 83 84 int gxplane[4]; 85 char gxc; 86 87 int mygx; 88 int inHLE1xConv; 89 int inHLE1xSO; 90 Conversion *convobj; 91 92 } GxCntl; 93 94 95 typedef struct TWNiconv { 96 GxCntl *cntl; 97 Conversion *conv; 98 IOBuf *iobuf; 99 100 } TWNiconv; 101 102 struct _cv_state { 103 TWNiconv * iconvobj; 104 }; 105 106 extern TWNiconv * aTWNiconv(); 107 extern void adeTWNiconv(TWNiconv *); 108 extern size_t aisotoeuc(TWNiconv *, char **, size_t *, char **, size_t *); 109 extern void areset(TWNiconv *); 110 111 extern Conversion * zConversion(); 112 extern void zdeConversion(Conversion *); 113 extern void zsetplane(Conversion *, int); 114 extern int zconversion(Conversion *, IOBuf *); 115 116 extern GxCntl * yGxCntl(Conversion *); 117 extern void ydeGxCntl(GxCntl *); 118 extern int ygetplaneno(GxCntl *, char c); 119 extern int yescSeq(GxCntl *, IOBuf *); 120 121 extern IOBuf * xIOBuf(); 122 extern void xdeIOBuf(IOBuf *); 123 extern int xgetc(IOBuf *); 124 extern void xbackup(IOBuf *, int); 125 extern int xputc(IOBuf *, int); 126 extern int xoutsize(IOBuf *); 127 128 129 /* 130 * Open; called from iconv_open() 131 */ 132 void * 133 _icv_open() 134 { 135 _iconv_st *st; 136 137 if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) { 138 errno = ENOMEM; 139 return ((void *) -1); 140 } 141 142 st->cstate = C0; 143 st->plane_no = 0; 144 st->_errno = 0; 145 146 #ifdef DEBUG 147 fprintf(stderr, "========== iconv(): ISO2022-7 --> CNS 11643 ==========\n"); 148 #endif 149 return ((void *) st); 150 } 151 152 153 /* 154 * Close; called from iconv_close() 155 */ 156 void 157 _icv_close(_iconv_st *st) 158 { 159 if (!st) 160 errno = EBADF; 161 else 162 free(st); 163 } 164 165 166 /* 167 * Actual conversion; called from iconv() 168 */ 169 /*========================================================================= 170 * 171 * State Machine for interpreting ISO 2022-7 code 172 * 173 *========================================================================= 174 * 175 * plane 2 - 16 176 * +---------->-------+ 177 * plane ^ | 178 * ESC $ ) number SO | plane 1 v 179 * +-> C0 ----> C1 ---> C2 ---> C3 ------> C4 --> C5 -------> C6 C7 180 * | | ascii | ascii | ascii | ascii | SI | | | | 181 * +----------------------------+ <-----+------+ +------<---+------+ 182 * ^ | 183 * | ascii v 184 * +---------<-------------<---------+ 185 * 186 *=========================================================================*/ 187 size_t 188 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft, 189 char **outbuf, size_t *outbytesleft) 190 { 191 int n; 192 193 if (st == NULL) { 194 errno = EBADF; 195 return ((size_t) -1); 196 } 197 198 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */ 199 st->cstate = C0; 200 st->_errno = 0; 201 return ((size_t) 0); 202 } 203 204 #ifdef DEBUG 205 fprintf(stderr, "=== (Re-entry) iconv(): ISO 2022-7 --> CNS 11643 ===\n"); 206 #endif 207 st->_errno = 0; /* reset internal errno */ 208 errno = 0; /* reset external errno */ 209 210 /* a state machine for interpreting ISO 2022-7 code */ 211 while (*inbytesleft > 0 && *outbytesleft > 0) { 212 switch (st->cstate) { 213 case C0: /* assuming ASCII in the beginning */ 214 if (**inbuf == ESC) { 215 st->cstate = C1; 216 } else { /* real ASCII */ 217 **outbuf = **inbuf; 218 (*outbuf)++; 219 (*outbytesleft)--; 220 } 221 break; 222 case C1: /* got ESC, expecting $ */ 223 if (**inbuf == '$') { 224 st->cstate = C2; 225 } else { 226 **outbuf = ESC; 227 (*outbuf)++; 228 (*outbytesleft)--; 229 st->cstate = C0; 230 st->_errno = 0; 231 continue; /* don't advance inbuf */ 232 } 233 break; 234 case C2: /* got $, expecting ) */ 235 if ((**inbuf == ')') || (**inbuf == '*')) { 236 st->cstate = C3; 237 } else { 238 if (*outbytesleft < 2) { 239 st->_errno = errno = E2BIG; 240 return((size_t)-1); 241 } 242 **outbuf = ESC; 243 *(*outbuf+1) = '$'; 244 (*outbuf) += 2; 245 (*outbytesleft) -= 2; 246 st->cstate = C0; 247 st->_errno = 0; 248 continue; /* don't advance inbuf */ 249 } 250 break; 251 case C3: /* got ) expecting G,H,I,...,V */ 252 st->plane_no = get_plane_no_by_iso(**inbuf); 253 if (st->plane_no > 0 ) { /* plane #1 - #16 */ 254 st->cstate = C4; 255 } else { 256 if (*outbytesleft < 3) { 257 st->_errno = errno = E2BIG; 258 return((size_t)-1); 259 } 260 **outbuf = ESC; 261 *(*outbuf+1) = '$'; 262 *(*outbuf+2) = ')'; 263 (*outbuf) += 3; 264 (*outbytesleft) -= 3; 265 st->cstate = C0; 266 st->_errno = 0; 267 continue; /* don't advance inbuf */ 268 } 269 break; 270 case C4: /* SI (Shift In) */ 271 if (**inbuf == ESC) { 272 st->cstate = C1; 273 break; 274 } 275 if (**inbuf == SO) { 276 #ifdef DEBUG 277 fprintf(stderr, "<-------------- SO -------------->\n"); 278 #endif 279 st->cstate = C5; 280 } else { /* ASCII */ 281 **outbuf = **inbuf; 282 (*outbuf)++; 283 (*outbytesleft)--; 284 st->cstate = C0; 285 st->_errno = 0; 286 } 287 break; 288 case C5: /* SO (Shift Out) */ 289 if (**inbuf == SI) { 290 #ifdef DEBUG 291 fprintf(stderr, ">-------------- SI --------------<\n"); 292 #endif 293 st->cstate = C4; 294 } else { /* 1st Chinese character */ 295 if (st->plane_no == 1) { 296 st->keepc[0] = (char) (**inbuf | MSB); 297 st->cstate = C6; 298 } else { /* 4-bypte code: plane #2 - #16 */ 299 st->keepc[0] = (char) MBYTE; 300 st->keepc[1] = (char) (PMASK + 301 st->plane_no); 302 st->keepc[2] = (char) (**inbuf | MSB); 303 st->cstate = C7; 304 } 305 } 306 break; 307 case C6: /* plane #1: 2nd Chinese character */ 308 st->keepc[1] = (char) (**inbuf | MSB); 309 st->keepc[2] = st->keepc[3] = NULL; 310 n = iso_to_cns(1, st->keepc, *outbuf, *outbytesleft); 311 if (n > 0) { 312 (*outbuf) += n; 313 (*outbytesleft) -= n; 314 } else { 315 st->_errno = errno; 316 return((size_t)-1); 317 } 318 st->cstate = C5; 319 break; 320 case C7: /* 4th Chinese character */ 321 st->keepc[3] = (char) (**inbuf | MSB); 322 n = iso_to_cns(st->plane_no, st->keepc, *outbuf, 323 *outbytesleft); 324 if (n > 0) { 325 (*outbuf) += n; 326 (*outbytesleft) -= n; 327 } else { 328 st->_errno = errno; 329 return((size_t)-1); 330 } 331 st->cstate = C5; 332 break; 333 default: /* should never come here */ 334 st->_errno = errno = EILSEQ; 335 st->cstate = C0; /* reset state */ 336 break; 337 } 338 339 (*inbuf)++; 340 (*inbytesleft)--; 341 342 if (st->_errno) { 343 #ifdef DEBUG 344 fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\tinbuf=%x\n", 345 st->_errno, st->cstate, **inbuf); 346 #endif 347 break; 348 } 349 if (errno) 350 return((size_t)-1); 351 } 352 353 if (*inbytesleft > 0 && *outbytesleft == 0) { 354 errno = E2BIG; 355 return((size_t)-1); 356 } 357 return (*inbytesleft); 358 } 359 360 361 /* 362 * Get plane number by ISO plane char; i.e. 'G' returns 1, 'H' returns 2, etc. 363 * Returns -1 on error conditions 364 */ 365 static int get_plane_no_by_iso(const char inbuf) 366 { 367 int ret; 368 unsigned char uc = (unsigned char) inbuf; 369 370 if (uc == '0') /* plane #0 */ 371 return(0); 372 373 ret = uc - 'F'; 374 switch (ret) { 375 case 1: /* 0x8EA1 - G */ 376 case 2: /* 0x8EA2 - H */ 377 case 3: /* 0x8EA3 - I */ 378 case 4: /* 0x8EA4 - J */ 379 case 5: /* 0x8EA5 - K */ 380 case 6: /* 0x8EA6 - L */ 381 case 7: /* 0x8EA7 - M */ 382 case 8: /* 0x8EA8 - N */ 383 case 9: /* 0x8EA9 - O */ 384 case 10: /* 0x8EAA - P */ 385 case 11: /* 0x8EAB - Q */ 386 case 12: /* 0x8EAC - R */ 387 case 13: /* 0x8EAD - S */ 388 case 14: /* 0x8EAE - T */ 389 case 15: /* 0x8EAF - U */ 390 case 16: /* 0x8EB0 - V */ 391 return (ret); 392 default: 393 return (-1); 394 } 395 } 396 397 398 /* 399 * ISO 2022-7 code --> CNS 11643-1992 (Chinese EUC) 400 * Return: > 0 - converted with enough space in output buffer 401 * = 0 - no space in outbuf 402 */ 403 static int iso_to_cns(int plane_no, char keepc[], char *buf, size_t buflen) 404 { 405 int ret_size; /* return buffer size */ 406 407 #ifdef DEBUG 408 fprintf(stderr, "%s %d ", keepc, plane_no); 409 #endif 410 if (plane_no == 1) 411 ret_size = 2; 412 else 413 ret_size = 4; 414 415 if (buflen < ret_size) { 416 errno = E2BIG; 417 return(0); 418 } 419 420 switch (plane_no) { 421 case 1: 422 *buf = keepc[0]; 423 *(buf+1) = keepc[1]; 424 break; 425 case 2: 426 case 3: 427 case 4: 428 case 5: 429 case 6: 430 case 7: 431 case 8: 432 case 9: 433 case 10: 434 case 11: 435 case 12: 436 case 13: 437 case 14: 438 case 15: 439 case 16: 440 *buf = keepc[0]; 441 *(buf+1) = keepc[1]; 442 *(buf+2) = keepc[2]; 443 *(buf+3) = keepc[3]; 444 break; 445 } 446 447 #ifdef DEBUG 448 fprintf(stderr, "\t#%d ->%s<-\n", plane_no, keepc); 449 #endif 450 451 return(ret_size); 452 } 453 void * 454 _cv_open(void) 455 { 456 struct _cv_state *st; 457 458 if ((st = (struct _cv_state *) malloc(sizeof (struct _cv_state))) == 459 NULL) 460 return ((void *) -1); 461 462 if ((st->iconvobj = aTWNiconv()) == NULL) { 463 free(st); 464 return ((void *) -1); 465 } 466 467 return ((void *) st); 468 } 469 470 void 471 _cv_close(struct _cv_state *st) 472 { 473 adeTWNiconv(st->iconvobj); 474 free(st); 475 } 476 477 478 size_t 479 _cv_enconv(struct _cv_state *st, char **cvinbuf, size_t *cvinbytesleft, 480 char **cvoutbuf, size_t *cvoutbytesleft) 481 { 482 if (cvinbuf == NULL || *cvinbuf == NULL) { /* Reset request. */ 483 /* 484 * Note that no shift sequence is needed for 485 * the target encoding. 486 */ 487 areset(st->iconvobj); 488 return (0); 489 } 490 491 return (aisotoeuc(st->iconvobj, cvinbuf, cvinbytesleft, 492 cvoutbuf, cvoutbytesleft)); 493 } 494 495 TWNiconv * aTWNiconv() { 496 TWNiconv *ret = (TWNiconv *) malloc(sizeof (TWNiconv)); 497 if (ret == NULL) 498 return (NULL); 499 if ((ret->conv = zConversion()) == NULL) { 500 free(ret); 501 return (NULL); 502 } 503 if ((ret->cntl = yGxCntl(ret->conv)) == NULL) { 504 free(ret->conv); 505 free(ret); 506 return (NULL); 507 } 508 if ((ret->iobuf = xIOBuf()) == NULL) { 509 free(ret->cntl); 510 free(ret->conv); 511 free(ret); 512 return (NULL); 513 } 514 return (ret); 515 } 516 517 size_t 518 aisotoeuc(TWNiconv *this, char **inbuf, size_t *inbufsize, 519 char **outbuf, size_t *outbufsize) { 520 521 this->iobuf->myin = *inbuf; 522 this->iobuf->myout = *outbuf; 523 this->iobuf->insize = *inbufsize; 524 this->iobuf->outsize = *outbufsize; 525 526 while (1) { 527 int ret; 528 if ((ret = yescSeq(this->cntl, this->iobuf)) == -1) 529 break; 530 else if (ret != 0) 531 continue; 532 533 if (zconversion(this->conv, this->iobuf) == -1) 534 break; 535 } 536 537 *inbuf = this->iobuf->myin; 538 *outbuf = this->iobuf->myout; 539 *inbufsize = this->iobuf->insize; 540 *outbufsize = this->iobuf->outsize; 541 542 return (*inbufsize); 543 } 544 545 void 546 adeTWNiconv(TWNiconv *this) { 547 zdeConversion(this->conv); 548 ydeGxCntl(this->cntl); 549 xdeIOBuf(this->iobuf); 550 free(this); 551 } 552 553 void 554 areset(TWNiconv *this) { 555 zdeConversion(this->conv); 556 ydeGxCntl(this->cntl); 557 xdeIOBuf(this->iobuf); 558 this->conv = zConversion(); 559 this->cntl = yGxCntl(this->conv); 560 this->iobuf = xIOBuf(); 561 } 562 563 Conversion * 564 zConversion() { 565 Conversion *ret = (Conversion *) malloc(sizeof (Conversion)); 566 if (ret == NULL) 567 return (NULL); 568 ret->myplane = 0; 569 return (ret); 570 } 571 572 void 573 zdeConversion(Conversion *this) { free(this); } 574 575 void 576 zsetplane(Conversion *this, int i) { this->myplane = i; } 577 578 int 579 zconversion(Conversion *this, IOBuf *ioobj) { 580 int c1, c2, c; 581 582 switch (this->myplane) { 583 584 case 0: 585 if (xoutsize(ioobj) < 1) 586 return (-1); 587 588 if ((c = xgetc(ioobj)) == -1) 589 return (-1); 590 xputc(ioobj, c); 591 return (0); 592 case 1: 593 if (xoutsize(ioobj) < 2) 594 return (-1); 595 596 if ((c1 = xgetc(ioobj)) == -1) 597 return (-1); 598 if ((c2 = xgetc(ioobj)) == -1) { 599 xbackup(ioobj, c1); 600 return (-1); 601 } 602 xputc(ioobj, c1 | MSB); 603 xputc(ioobj, c2 | MSB); 604 return (0); 605 default: /* plane 2 to 15 */ 606 if (xoutsize(ioobj) < 4) 607 return (-1); 608 609 if ((c1 = xgetc(ioobj)) == -1) 610 return (-1); 611 if ((c2 = xgetc(ioobj)) == -1) { 612 xbackup(ioobj, c1); 613 return (-1); 614 } 615 xputc(ioobj, 0x8e); 616 xputc(ioobj, 0xa0 + this->myplane); 617 xputc(ioobj, c1 | MSB); 618 xputc(ioobj, c2 | MSB); 619 return (0); 620 } 621 } 622 623 GxCntl * 624 yGxCntl(Conversion *obj) { 625 GxCntl *ret = (GxCntl *) malloc(sizeof (GxCntl)); 626 if (ret == NULL) 627 return (NULL); 628 629 ret->convobj = obj; 630 ret->gxplane[0] = ret->gxplane[1] = ret->gxplane[2] = 631 ret->gxplane[3] = 0; 632 ret->inHLE1xConv = 0; 633 return (ret); 634 } 635 636 void 637 ydeGxCntl(GxCntl *this) { 638 free(this); 639 } 640 641 int 642 yescSeq(GxCntl *this, IOBuf *obj) { 643 int c = xgetc(obj); 644 645 if (c == -1) 646 return (-1); 647 648 switch (c) { 649 case ESC: 650 break; 651 case SI: 652 zsetplane(this->convobj, this->gxplane[0]); 653 if (this->inHLE1xConv == 1) 654 this->inHLE1xSO = 0; 655 return (1); 656 case SO: 657 if (this->inHLE1xConv == 1) { 658 if (this->inHLE1xSO != 0) { 659 xbackup(obj, SO); 660 return (0); 661 } else 662 this->inHLE1xSO = 1; 663 664 } 665 zsetplane(this->convobj, this->gxplane[1]); 666 return (1); 667 default: 668 xbackup(obj, c); 669 return (0); 670 } 671 672 if ((c = xgetc(obj)) == -1) { 673 xbackup(obj, ESC); 674 return (1); 675 } 676 677 switch (c) { 678 679 case LSG2: 680 zsetplane(this->convobj, this->gxplane[2]); 681 return (1); 682 case LSG3: 683 zsetplane(this->convobj, this->gxplane[3]); 684 return (1); 685 case '$': 686 break; 687 case '(': 688 if (xgetc(obj) != -1) { 689 this->gxplane[0] = 0; 690 break; 691 } 692 /* else fall through */ 693 default: 694 xbackup(obj, c); 695 xbackup(obj, ESC); 696 return (0); 697 } 698 699 if ((this->gxc = xgetc(obj)) == -1) { 700 xbackup(obj, '$'); 701 xbackup(obj, ESC); 702 return (-1); 703 } 704 705 switch (this->gxc) { 706 707 case '(': 708 this->mygx = 0; 709 break; 710 case ')': 711 this->mygx = 1; 712 break; 713 case '*': 714 this->mygx = 2; 715 break; 716 case '+': 717 this->mygx = 3; 718 break; 719 default: 720 xbackup(obj, this->gxc); 721 xbackup(obj, '$'); 722 xbackup(obj, ESC); 723 return (0); 724 } 725 726 if ((c = xgetc(obj)) == -1) { 727 xbackup(obj, this->gxc); 728 xbackup(obj, '$'); 729 xbackup(obj, ESC); 730 return (-1); 731 } 732 733 if (c == '0' && this->mygx == 1) { /* HLE 1.x */ 734 this->inHLE1xConv = 1; 735 this->inHLE1xSO = 0; 736 this->gxplane[1] = 1; 737 } else { 738 this->inHLE1xConv = 0; 739 this->gxplane[this->mygx] = ygetplaneno(this, c); 740 } 741 return (1); 742 } 743 744 int 745 ygetplaneno(GxCntl *dummy, char c) { 746 if (c == 'G') 747 return (1); 748 else if (c == 'H') 749 return (2); 750 else 751 return (c - '0' + 1); 752 } 753 754 IOBuf * 755 xIOBuf() { 756 IOBuf *ret = (IOBuf *) malloc(sizeof (IOBuf)); 757 if (ret == NULL) 758 return (NULL); 759 ret->bufc = 0; 760 return (ret); 761 } 762 763 void 764 xdeIOBuf(IOBuf *this) { 765 free(this); 766 } 767 768 int 769 xgetc(IOBuf *this) { 770 if (this->bufc > 0) 771 return (this->mybuf[--this->bufc]); 772 773 if (this->insize == 0) 774 return (-1); 775 else { 776 this->insize--; 777 return (*this->myin++); 778 } 779 } 780 781 int 782 xputc(IOBuf *this, int c) { 783 if (this->outsize <= 0) 784 return (-1); 785 *(this->myout)++ = c; 786 this->outsize--; 787 return (0); 788 } 789 790 void 791 xbackup(IOBuf *this, int c) { this->mybuf[this->bufc++] = c; } 792 793 int 794 xoutsize(IOBuf *this) { return (this->outsize); } 795