1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright(c) 1998 Sun Microsystems, Inc. 23 * All rights reserved. 24 */ 25 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <strings.h> 29 #include <errno.h> 30 #ifdef DEBUG 31 #include <sys/fcntl.h> 32 #include <sys/stat.h> 33 #endif 34 #include <cns11643_big5.h> /* CNS 11643 to Big-5 mapping table */ 35 #include <big5_gb2312.h> /* Big-5 to GB mapping table */ 36 37 #define MSB 0x80 /* most significant bit */ 38 #define MBYTE 0x8e /* multi-byte (4 byte character) */ 39 #define PMASK 0xa0 /* plane number mask */ 40 #define ONEBYTE 0xff /* right most byte */ 41 #define MSB_OFF 0x7f /* mask off MBS */ 42 43 #define SI 0x0f /* shift in */ 44 #define SO 0x0e /* shift out */ 45 #define ESC 0x1b /* escape */ 46 #define SS2 0x4e /* SS2 shift out */ 47 #define SS3 0x4f /* SS3 shift out */ 48 #define NON_ID_CHAR_BYTE1 0xA1 /* non-identified character */ 49 #define NON_ID_CHAR_BYTE2 0xF5 /* non-identified character */ 50 51 typedef struct _icv_state { 52 char _buf[10]; 53 size_t _bufcont; 54 char _keepc[4]; /* maximum # byte of CNS11643 code */ 55 short _gstate; /* state machine id */ 56 short _istate; /* state for shift in/out */ 57 int _plane; /* plane number for Chinese character */ 58 int _last_plane; /* last charactor's plane # */ 59 int _errno; /* internal errno */ 60 } _iconv_st; 61 62 int binsearch_big5_gb(unsigned int big5code); 63 64 enum _GSTATE { G0, G1, G2, G3, G4, G5, G6, G7, G8, G9, \ 65 G10,G11,G12,G13,G14,G15,G16,G17,G18,G19, \ 66 G20,G21,G22,G23,G24,G25,G26,G27,G28,G29 }; 67 68 enum _ISTATE { IN, OUT }; 69 70 71 int iso_gb_to_gb(_iconv_st * st, char* buf, size_t buflen); 72 int iso_to_big5_to_gb(_iconv_st * st, char* buf, size_t buflen); 73 int binsearch(unsigned long x, table_t v[], int n); 74 int flush_buf(_iconv_st * st, char ** outbuf, size_t * outbytesleft); 75 76 int flush_buf(_iconv_st * st, char ** outbuf, size_t * outbytesleft) { 77 if (!st->_bufcont) 78 return 0; 79 if (st->_bufcont > *outbytesleft) { 80 st->_errno = E2BIG; 81 return -1; 82 } 83 if (st->_istate != IN) { 84 st->_errno = EILSEQ; 85 return -1; 86 } 87 strncpy(st->_buf, *outbuf, st->_bufcont); 88 (*outbuf)+=(st->_bufcont); 89 (*outbytesleft)-=(st->_bufcont); 90 st->_bufcont = 0; 91 return st->_bufcont; 92 } 93 94 /* 95 * Open; called from iconv_open() 96 */ 97 void * 98 _icv_open() 99 { 100 _iconv_st *st; 101 102 if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) { 103 errno = ENOMEM; 104 return ((void *) -1); 105 } 106 107 st->_gstate = G0; 108 st->_istate = IN; 109 st->_last_plane = st->_plane = -1; 110 st->_errno = 0; 111 st->_bufcont = 0; 112 113 return ((void *) st); 114 } 115 116 /* 117 * Close; called from iconv_close() 118 */ 119 void 120 _icv_close(_iconv_st *st) 121 { 122 if (st == NULL) 123 errno = EBADF; 124 else 125 free(st); 126 } 127 128 /* 129 * Actual conversion; called from iconv() 130 */ 131 /*========================================================================= 132 * 133 * State Machine for interpreting ISO 2022-7 code 134 * 135 *========================================================================= 136 * 137 * plane 2 - 16 138 * +---------->-------+ 139 * plane ^ | 140 * ESC $ ) number SO | plane 1 v 141 * +-> G0 ----> G1 ---> G2 ---> G3 ------> G4 --> G5 -------> G6 G7 142 * | | ascii | ascii | ascii | ascii | SI | | | | 143 * +----------------------------+ <-----+------+ +------<---+------+ 144 * ^ | 145 * | ascii v 146 * +---------<-------------<---------+ 147 * 148 *=========================================================================*/ 149 size_t _icv_iconv(_iconv_st *st, \ 150 char **inbuf, size_t *inbytesleft, \ 151 char **outbuf, size_t *outbytesleft) { 152 int n; 153 char c; 154 155 if (st == NULL) { 156 errno = EBADF; 157 return ((size_t) -1); 158 } 159 160 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */ 161 st->_gstate = G0; 162 st->_istate = IN; 163 st->_errno = 0; 164 st->_plane = st->_last_plane = -1; 165 return ((size_t) 0); 166 } 167 168 errno = st->_errno = 0; /* reset internal and external errno */ 169 170 /* a state machine for interpreting ISO 2022-7 code */ 171 while (*inbytesleft > 0 && *outbytesleft > 0) { 172 switch (st->_gstate) { 173 case G0: /* assuming ASCII in the beginning */ 174 if (**inbuf == ESC) { 175 st->_gstate = G1; 176 st->_buf[st->_bufcont++] = ESC; 177 } else { /* real ASCII */ 178 **outbuf = **inbuf; 179 (*outbuf)++; 180 (*outbytesleft)--; 181 } 182 break; 183 case G1: /* got ESC, expecting $ */ 184 if (**inbuf == '$') { 185 st->_gstate = G2; 186 st->_buf[st->_bufcont++] = '$'; 187 } else if (flush_buf(st, outbuf, outbytesleft) == -1) { 188 errno = st->_errno; 189 return (size_t)-1; 190 } else { 191 st->_gstate = G0; 192 st->_errno = 0; 193 st->_istate = IN; 194 continue; /* don't advance inbuf */ 195 } 196 break; 197 case G2: /* got $, expecting ) * or + */ 198 if (**inbuf == ')') { 199 st->_gstate = G3; 200 } else if (**inbuf == '*') { 201 st->_gstate = G12; 202 st->_plane = 2; 203 } else if (**inbuf == '+') { 204 st->_gstate = G19; 205 } else if (flush_buf(st, outbuf, outbytesleft) == -1) { 206 errno = st->_errno; 207 return (size_t)-1; 208 } else { 209 st->_gstate = G0; 210 st->_errno = 0; 211 st->_istate = IN; 212 continue; /* don't advance inbuf */ 213 } 214 st->_buf[st->_bufcont++] = **inbuf; 215 break; 216 case G3: /* got ) expecting A,G,H */ 217 /* H is for the bug of and zh_TW.BIG5 */ 218 if (**inbuf == 'A') { 219 st->_plane = 0; 220 st->_gstate = G4; 221 } else if (**inbuf == 'G') { 222 st->_plane = 1; 223 st->_gstate = G8; 224 } else if (**inbuf == 'H') { 225 st->_plane = 2; 226 st->_gstate = G8; 227 } else if (flush_buf(st, outbuf, outbytesleft) == -1) { 228 errno = st->_errno; 229 return (size_t)-1; 230 } else { 231 st->_gstate = G0; 232 st->_errno = 0; 233 st->_istate = IN; 234 continue; 235 } 236 st->_buf[st->_bufcont++] = **inbuf; 237 break; 238 case G4: /* ESC $ ) A got, and SO is expected */ 239 if (**inbuf == SO) { 240 st->_gstate = G5; 241 st->_istate = OUT; 242 st->_bufcont = 0; 243 st->_last_plane = st->_plane; 244 } else if (flush_buf(st, outbuf, outbytesleft) == -1) { 245 errno = st->_errno; 246 return (size_t)-1; 247 } else { 248 st->_gstate = G0; 249 st->_errno = 0; 250 st->_istate = IN; 251 st->_plane = st->_last_plane; 252 continue; 253 } 254 break; 255 case G5: /* SO (Shift Out) */ 256 if (**inbuf == SI) { 257 st->_istate = IN; 258 st->_gstate = G7; 259 st->_last_plane = st->_plane; 260 } else if (**inbuf == ESC) { 261 st->_bufcont = 0; 262 st->_gstate = G0; 263 continue; 264 } else { /* Chinese Charactors */ 265 st->_keepc[0] = **inbuf; 266 st->_gstate = G6; 267 } 268 break; 269 case G6: /* GB2312: 2nd Chinese character */ 270 st->_keepc[1] = **inbuf; 271 n = iso_gb_to_gb(st, *outbuf, *outbytesleft); 272 if (n > 0) { 273 (*outbuf) += n; 274 (*outbytesleft) -= n; 275 } else { 276 errno = st->_errno; 277 return (size_t)-1; 278 } 279 st->_gstate = G5; 280 break; 281 case G7: /* Shift in */ 282 if (**inbuf == SO) { 283 st->_gstate = G5; 284 st->_istate = OUT; 285 st->_last_plane = st->_plane; 286 st->_bufcont = 0; 287 } else if (**inbuf == ESC) { 288 st->_gstate = G0; 289 continue; 290 } else { 291 **outbuf = **inbuf; 292 (*outbuf)++; 293 (*outbytesleft) --; 294 } 295 break; 296 case G8: /* BIG5: Chinese character */ 297 if (**inbuf == SO) { 298 st->_istate = OUT; 299 st->_gstate = G9; 300 st->_bufcont = 0; 301 st->_last_plane = st->_plane; 302 } else if (flush_buf(st, outbuf, outbytesleft) == -1) { 303 errno = st->_errno; 304 return (size_t)-1; 305 } else { 306 st->_gstate = G0; 307 st->_errno = 0; 308 st->_plane = st->_last_plane; 309 st->_istate = IN; 310 continue; 311 } 312 break; 313 case G9: 314 if (**inbuf == SI) { 315 st->_istate = IN; 316 st->_gstate = G11; 317 st->_last_plane = st->_plane; 318 } else if (**inbuf == ESC) { 319 if (flush_buf(st, outbuf, outbytesleft) == -1) { 320 errno = st->_errno; 321 return (size_t)-1; 322 } 323 st->_gstate = G0; 324 continue; 325 } else { /* Chinese Charactor */ 326 st->_keepc[0] = **inbuf; 327 st->_gstate = G10; 328 } 329 break; 330 case G10: 331 st->_keepc[1] = **inbuf; 332 n = iso_to_big5_to_gb(st, *outbuf, *outbytesleft); 333 if (n > 0) { 334 (*outbuf) += n; 335 (*outbytesleft) -= n; 336 } else { 337 errno = st->_errno; 338 return (size_t)-1; 339 } 340 st->_gstate = G9; 341 break; 342 case G11: 343 st->_bufcont = 0; 344 if (**inbuf == SO) { 345 st->_istate = OUT; 346 st->_gstate = G9; 347 } else if (**inbuf == ESC) { 348 st->_gstate = G0; 349 continue; 350 } else { 351 **outbuf = **inbuf; 352 (*outbuf)++; 353 (*outbytesleft)--; 354 } 355 break; 356 case G12: 357 if (**inbuf == 'H') { 358 st->_buf[st->_bufcont++] = 'H'; 359 st->_gstate = G13; 360 } else if (flush_buf(st, outbuf, outbytesleft) == -1) { 361 errno = st->_errno; 362 return (size_t)-1; 363 } else { 364 st->_istate = IN; 365 st->_plane = st->_last_plane; 366 st->_gstate = G0; 367 continue; 368 } 369 break; 370 case G13: 371 if (**inbuf == ESC) { 372 st->_buf[st->_bufcont++] = **inbuf; 373 st->_gstate = G14; 374 } else if (flush_buf(st, outbuf, outbytesleft) == -1) { 375 errno = st->_errno; 376 return (size_t)-1; 377 } else { 378 st->_gstate = G0; 379 st->_istate = IN; 380 st->_plane = st->_last_plane; 381 continue; 382 } 383 break; 384 case G14: 385 if (**inbuf == SS2) { 386 st->_istate = OUT; 387 st->_gstate = G15; 388 st->_bufcont = 0; 389 st->_last_plane = st->_plane = 2; 390 } else if (**inbuf == '$') { 391 st->_bufcont --; 392 if (flush_buf(st, outbuf, outbytesleft) == -1) { 393 errno = st->_errno; 394 return (size_t)-1; 395 } else { 396 st->_gstate = G1; 397 st->_plane = st->_last_plane; 398 st->_istate = IN; 399 continue; 400 } 401 } else if (flush_buf(st, outbuf, outbytesleft) == -1) { 402 errno = st->_errno; 403 return (size_t)-1; 404 } else { 405 st->_gstate = G0; 406 st->_istate = IN; 407 st->_plane = st->_last_plane; 408 continue; 409 } 410 break; 411 case G15: 412 if (**inbuf == SI) { 413 st->_gstate = G16; 414 st->_istate = IN; 415 st->_last_plane = st->_plane; 416 } else if (**inbuf == ESC) { 417 st->_bufcont = 0; 418 st->_gstate = G0; 419 continue; 420 } else { 421 st->_keepc[0] = **inbuf; 422 st->_gstate = G18; 423 } 424 break; 425 case G16: 426 if (**inbuf == ESC) { 427 st->_gstate = G17; 428 st->_buf[st->_bufcont++] = ESC; 429 } else { 430 **outbuf = **inbuf; 431 (*outbuf) ++; 432 (*outbytesleft) --; 433 st->_bufcont = 0; 434 } 435 break; 436 case G17: 437 if (**inbuf == '$') { 438 st->_gstate = G1; 439 st->_buf[st->_bufcont++] = '$'; 440 continue; 441 } else if (**inbuf == SS2) { 442 st->_bufcont = 0; 443 st->_gstate = G15; 444 st->_istate = OUT; 445 } else if (flush_buf(st, outbuf, outbytesleft) == -1) { 446 errno = st->_errno; 447 return (size_t)-1; 448 } else { 449 st->_gstate = G16; 450 st->_istate = IN; 451 } 452 break; 453 case G18: 454 st->_keepc[1] = **inbuf; 455 st->_gstate = G0; 456 if ((n = iso_to_big5_to_gb(st, \ 457 *outbuf, \ 458 *outbytesleft)) > 0) { 459 (*outbuf)+=n; 460 (*outbytesleft)-=n; 461 } else { 462 errno = st->_errno; 463 return (size_t)-1; 464 } 465 break; 466 case G19: /* Plane #: 3 - 16 */ 467 c = **inbuf; 468 if (c == 'I' || \ 469 c == 'J' || \ 470 c == 'K' || \ 471 c == 'L' || \ 472 c == 'M' || \ 473 c == 'N' || \ 474 c == 'O' || \ 475 c == 'P' || \ 476 c == 'Q' || \ 477 c == 'R' || \ 478 c == 'S' || \ 479 c == 'T' || \ 480 c == 'U' || \ 481 c == 'V') { 482 st->_plane = c - 'I' + 3; 483 st->_gstate = G20; 484 } else if (flush_buf(st, outbuf, outbytesleft) == -1) { 485 errno = st->_errno; 486 return (size_t)-1; 487 } else { 488 st->_gstate = G0; 489 st->_errno = 0; 490 st->_istate = IN; 491 st->_plane = st->_last_plane; 492 continue; 493 } 494 st->_buf[st->_bufcont++] = c; 495 break; 496 case G20: 497 if (**inbuf == ESC) { 498 st->_buf[st->_bufcont++] = **inbuf; 499 st->_gstate = G21; 500 } else if (flush_buf(st, outbuf, outbytesleft) == -1) { 501 errno = st->_errno; 502 return (size_t)-1; 503 } else { 504 st->_gstate = G0; 505 st->_istate = IN; 506 st->_last_plane = st->_plane; 507 continue; 508 } 509 break; 510 case G21: 511 if (**inbuf == SS3) { 512 st->_istate = OUT; 513 st->_gstate = G22; 514 st->_bufcont = 0; 515 } else if (**inbuf == '$') { 516 st->_bufcont --; 517 if (flush_buf(st, outbuf, outbytesleft) == -1) { 518 errno = st->_errno; 519 return (size_t)-1; 520 } else { 521 st->_istate = IN; 522 st->_last_plane = st->_plane; 523 st->_gstate = G1; 524 continue; 525 } 526 } else if (flush_buf(st, outbuf, outbytesleft) == -1) { 527 errno = st->_errno; 528 return (size_t)-1; 529 } else { 530 st->_gstate = G0; 531 st->_istate = IN; 532 st->_last_plane = st->_plane; 533 continue; 534 } 535 break; 536 case G22: 537 if (**inbuf == SI) { 538 st->_istate = IN; 539 st->_gstate = G24; 540 st->_last_plane = st->_plane; 541 } else { 542 st->_keepc[0] = (char)MBYTE; 543 st->_keepc[1] = (char)(PMASK + st->_plane); 544 st->_keepc[2] = **inbuf; 545 st->_gstate = G23; 546 } 547 break; 548 case G23: 549 st->_keepc[3] = **inbuf; 550 if ((n = iso_to_big5_to_gb(st, \ 551 *outbuf, \ 552 *outbytesleft)) > 0) { 553 (*outbuf)+=n; 554 (*outbytesleft-=n); 555 } else { 556 st->_errno = errno; 557 return (size_t)-1; 558 } 559 st->_gstate = G22; 560 break; 561 case G24: 562 if (**inbuf == ESC) { 563 st->_gstate = G25; 564 st->_buf[st->_bufcont++] = ESC; 565 } else { 566 **outbuf = **inbuf; 567 (*outbuf)++; 568 (*outbytesleft)--; 569 st->_bufcont = 0; 570 } 571 break; 572 case G25: 573 if (**inbuf == '$') { 574 st->_gstate = G1; 575 continue; 576 } else if (**inbuf == SS3) { 577 st->_gstate = G22; 578 st->_bufcont = 0; 579 st->_istate = OUT; 580 } else if (flush_buf(st, outbuf, outbytesleft) == -1) { 581 errno = st->_errno; 582 return (size_t)-1; 583 } else { 584 st->_gstate = G24; 585 st->_istate = IN; 586 } 587 break; 588 default: /* should never come here */ 589 st->_errno = errno = EILSEQ; 590 st->_gstate = G0; /* reset state */ 591 break; 592 } /* end of switch */ 593 594 (*inbuf)++; 595 (*inbytesleft)--; 596 597 if (st->_errno) { 598 break; 599 } 600 if (errno) 601 return(-1); 602 } 603 604 if (*inbytesleft > 0 && *outbytesleft == 0) { 605 errno = E2BIG; 606 return((size_t)(-1)); 607 } 608 return ((size_t)(*inbytesleft)); 609 } 610 611 int iso_gb_to_gb(_iconv_st * st, char* buf, size_t buflen) { 612 if ( buflen < 2 ) { 613 st->_errno = E2BIG; 614 return -1; 615 } 616 *buf = st->_keepc[0] | MSB; 617 *(buf+1) = st->_keepc[1] | MSB; 618 return 2; 619 } 620 621 /* 622 * ISO 2022-7 code --> Big-5 code 623 * Return: > 0 - converted with enough space in output buffer 624 * = 0 - no space in outbuf 625 */ 626 int iso_to_big5_to_gb(_iconv_st * st, char* buf, size_t buflen) { 627 char cns_str[3], c1, c2; 628 unsigned long cns_val; /* MSB mask off CNS 11643 value */ 629 int unidx; /* binary search index */ 630 unsigned long big5_val, val; /* Big-5 code */ 631 int idx; 632 633 if (st->_plane == 1) { 634 cns_str[0] = st->_keepc[0] & MSB_OFF; 635 cns_str[1] = st->_keepc[1] & MSB_OFF; 636 } else { 637 cns_str[0] = st->_keepc[0] & MSB_OFF; 638 cns_str[1] = st->_keepc[1] & MSB_OFF; 639 } 640 cns_val = (cns_str[0] << 8) + cns_str[1]; 641 642 if (buflen < 2) { 643 errno = E2BIG; 644 return(0); 645 } 646 647 switch (st->_plane) { 648 case 1: 649 unidx = binsearch(cns_val, cns_big5_tab1, MAX_CNS1_NUM); 650 if (unidx >= 0) 651 big5_val = cns_big5_tab1[unidx].value; 652 break; 653 case 2: 654 unidx = binsearch(cns_val, cns_big5_tab2, MAX_CNS2_NUM); 655 if (unidx >= 0) 656 big5_val = cns_big5_tab2[unidx].value; 657 break; 658 default: 659 unidx = -1; /* no mapping from CNS to Big-5 out of plane 1&2 */ 660 break; 661 } 662 663 664 if (unidx < 0) { /* no match from CNS to Big-5 */ 665 *buf = NON_ID_CHAR_BYTE1; 666 *(buf+1) = NON_ID_CHAR_BYTE2; 667 } else { 668 val = big5_val & 0xffff; 669 *buf = c1 = (char) ((val & 0xff00) >> 8); 670 *(buf+1) = c2 = (char) (val & 0xff); 671 } 672 673 674 if (unidx < 0) { 675 return 2; 676 } else { 677 idx = binsearch_big5_gb((((*buf) & ONEBYTE) << 8) | ((*(buf+1)) & ONEBYTE)); 678 if (idx < 0) { 679 *buf = NON_ID_CHAR_BYTE1; 680 *(buf+1) = NON_ID_CHAR_BYTE2; 681 } else { 682 *buf = (big5_gb_tab[idx].value >> 8) & ONEBYTE; 683 *(buf+1) = big5_gb_tab[idx].value & ONEBYTE; 684 } 685 } 686 687 return(2); 688 } 689 690 /* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */ 691 int binsearch(unsigned long x, table_t v[], int n) 692 { 693 int low, high, mid; 694 695 low = 0; 696 high = n - 1; 697 while (low <= high) { 698 mid = (low + high) / 2; 699 if (x < v[mid].key) 700 high = mid - 1; 701 else if (x > v[mid].key) 702 low = mid + 1; 703 else /* found match */ 704 return mid; 705 } 706 return (-1); /* no match */ 707 } 708 709 int binsearch_big5_gb(unsigned int big5code) 710 { 711 int low, high, mid; 712 713 low = 0; 714 high = BIG5MAX - 1; 715 while (low <= high) { 716 mid = (low + high) / 2; 717 if (big5code < big5_gb_tab[mid].key) 718 high = mid - 1; 719 else if (big5code > big5_gb_tab[mid].key) 720 low = mid + 1; 721 else /* found match */ 722 return mid; 723 } 724 return (-1); /* no match */ 725 } 726 727 int 728 iso_to_gb(char in_byte1, char in_byte2, char *buf, int buflen) 729 { 730 if ( buflen < 2 ) 731 return 0; 732 *buf = in_byte1 | MSB; 733 *(buf+1) = in_byte2 | MSB; 734 return 2; 735 } 736 737 738 /* 739 * ================================================================== 740 * enconv functions 741 * ================================================================== 742 */ 743 744 typedef struct _enconv_st { 745 char _lastc; 746 short _gstate; 747 } _enconv_st; 748 749 /* 750 * Open; called from enconv_open() 751 */ 752 void * 753 _cv_open() 754 { 755 _enconv_st *st; 756 757 if ((st = (_enconv_st *)malloc(sizeof(_enconv_st))) == NULL) { 758 return ((void *) -1); 759 } 760 761 st->_gstate = G0; 762 return ((void *)st); 763 } 764 765 766 /* 767 * Close; called from enconv_close() 768 */ 769 void 770 _cv_close(_enconv_st *st) 771 { 772 if (st != NULL) 773 free(st); 774 } 775 776 777 /* 778 * Actual conversion; called from enconv() 779 */ 780 /*======================================================================= 781 * 782 * ESC $ ) A SO 1st C 783 * +-> G0 -----> G1 ----> G2 ----> G3 ----> G4 -----> G5 ---------> G6 784 * | | ascii | ascii | ascii | |ascii| SI | | 2nd C | 785 * +-------------------------------+ +-<---+--------+ +-<---------+ 786 *=======================================================================*/ 787 size_t 788 _cv_enconv(_enconv_st *st, char **inbuf, size_t*inbytesleft, 789 char **outbuf, size_t*outbytesleft) 790 { 791 int n; 792 793 if (st == NULL) { 794 return -1; 795 } 796 if (inbuf == NULL || *inbuf == NULL) { /* Reset request */ 797 st->_gstate = G0; 798 return 0; 799 } 800 801 while (*inbytesleft > 0 && *outbytesleft > 0) { 802 switch (st->_gstate) { 803 case G0: 804 if ( **inbuf == ESC ) { 805 st->_gstate = G1; 806 } else { /* ASCII */ 807 **outbuf = **inbuf; 808 (*outbuf)++, (*outbytesleft)--; 809 } 810 break; 811 case G1: 812 if ( **inbuf == '$' ) { 813 st->_gstate = G2; 814 } else { 815 **outbuf = ESC; 816 (*outbuf)++, (*outbytesleft)--; 817 st->_gstate = G0; 818 continue; 819 } 820 break; 821 case G2: 822 if ( **inbuf == ')' ) { 823 st->_gstate = G3; 824 } else { 825 if (*outbytesleft < 2) { 826 return (*inbytesleft); 827 } 828 **outbuf = ESC; 829 *(*outbuf+1) = '$'; 830 (*outbuf) += 2, (*outbytesleft) -= 2; 831 st->_gstate = G0; 832 continue; 833 } 834 break; 835 case G3: 836 if ( **inbuf == 'A' ) { 837 st->_gstate = G4; 838 } else { 839 if (*outbytesleft < 3) { 840 return (*inbytesleft); 841 } 842 **outbuf = ESC; 843 *(*outbuf+1) = '$'; 844 *(*outbuf+2) = ')'; 845 (*outbuf) += 3, (*outbytesleft) -= 3; 846 st->_gstate = G0; 847 continue; 848 } 849 break; 850 case G4: 851 if ( **inbuf == SO ) { 852 st->_gstate = G5; 853 } else { 854 **outbuf = **inbuf; 855 (*outbuf)++, (*outbytesleft)--; 856 } 857 break; 858 case G5: 859 if ( **inbuf == SI ) { 860 st->_gstate = G4; 861 } else { 862 st->_lastc = **inbuf; 863 st->_gstate = G6; 864 } 865 break; 866 case G6: 867 n = iso_to_gb(st->_lastc, **inbuf, *outbuf, *outbytesleft); 868 if (n > 0) { 869 (*outbuf) += n, (*outbytesleft) -= n; 870 } else { 871 return (*inbytesleft); 872 } 873 st->_gstate = G5; 874 break; 875 } 876 877 (*inbuf)++, (*inbytesleft)--; 878 } 879 880 return (*inbytesleft); 881 } 882 883 #ifdef DEBUG 884 main(int argc, char ** argv) { 885 char *inbuf, *outbuf, *in_tmp, *out_tmp; 886 size_t inbytesleft, outbytesleft; 887 int fd; 888 int i; 889 struct stat s; 890 _iconv_st * st; 891 if (argc < 2) { 892 fprintf(stderr, "Usage: %s input\n", argv[0]); 893 exit(-1); 894 } 895 if ((fd = open(argv[1], O_RDONLY)) == -1) { 896 perror("open"); 897 exit(-2); 898 } 899 if (fstat(fd, &s) == -1) { 900 perror("stat"); 901 exit(-3); 902 } 903 inbytesleft = outbytesleft = s.st_size; 904 in_tmp = inbuf = (char *)malloc(inbytesleft); 905 out_tmp = outbuf = (char *)malloc(outbytesleft); 906 if (!inbuf || !outbuf) { 907 perror("malloc"); 908 exit(-1); 909 } 910 if (read(fd, inbuf, inbytesleft) != inbytesleft) { 911 perror("read"); 912 exit(-4); 913 } 914 for (i = 0; i < inbytesleft; i++) 915 fprintf(stderr, "%x\t", *(inbuf+i)); 916 fprintf(stderr, "\n"); 917 st = (_iconv_st *)_icv_open(); 918 if (st == (_iconv_st *) -1) { 919 perror("_icv_open"); 920 exit(-1); 921 } 922 if (_icv_iconv(st, \ 923 &inbuf, &inbytesleft, \ 924 &outbuf, &outbytesleft) == -1) { 925 perror("icv_iconv"); 926 fprintf(stderr, "\ninbytesleft = %d\n", inbytesleft); 927 exit(-2); 928 } 929 if (write(1, out_tmp, s.st_size - outbytesleft) == -1) { 930 perror("write"); 931 exit(-1); 932 } 933 free(in_tmp); 934 free(out_tmp); 935 close(fd); 936 _icv_close(st); 937 } 938 #endif 939