1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, by Sun Microsystems, Inc. 24 * All rights reserved. 25 */ 26 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <errno.h> 30 #include <libintl.h> 31 32 #define MSB 0x80 /* most significant bit */ 33 #define MBYTE 0x8e /* multi-byte (4 byte character) */ 34 #define PMASK 0xa0 /* plane number mask */ 35 #define ONEBYTE 0xff /* right most byte */ 36 #define MSB_OFF 0x7f /* mask off MSB */ 37 38 #define SI 0x0f /* shift in */ 39 #define SO 0x0e /* shift out */ 40 #define ESC 0x1b /* escape */ 41 42 /* static const char plane_char[] = "0GH23456789:;<=>?"; */ 43 static const char plane_char[] = "0GHIJKLMNOPQRSTUV"; 44 45 #define GET_PLANEC(i) (plane_char[i]) 46 47 #define NON_ID_CHAR '_' /* non-identified character */ 48 49 typedef struct _icv_state { 50 char keepc[4]; /* maximum # byte of CNS11643 code */ 51 short cstate; /* state machine id (CNS) */ 52 short istate; /* state machine id (ISO) */ 53 int _errno; /* internal errno */ 54 } _iconv_st; 55 56 enum _CSTATE { C0, C1, C2, C3, C4 }; 57 enum _ISTATE { IN, OUT }; 58 59 60 static int get_plane_no_by_char(const char); 61 static int cns_to_iso(int, char[], char*, size_t); 62 63 static int get_plane_no_by_str(const char *); 64 struct _cv_state { 65 int plane_no; 66 int get_a_mbchar; 67 int more_bytes; 68 int first_byte; 69 int plane_changed; 70 char planec; 71 char *p; 72 char keepc[4]; 73 }; 74 75 /* 76 * Open; called from iconv_open() 77 */ 78 void * 79 _icv_open() 80 { 81 _iconv_st *st; 82 83 if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) { 84 errno = ENOMEM; 85 return ((void *) -1); 86 } 87 88 st->cstate = C0; 89 st->istate = IN; 90 st->_errno = 0; 91 92 #ifdef DEBUG 93 fprintf(stderr, "========== iconv(): CNS11643 --> ISO 2022-7 ==========\n"); 94 #endif 95 96 return ((void *) st); 97 } 98 99 100 /* 101 * Close; called from iconv_close() 102 */ 103 void 104 _icv_close(_iconv_st *st) 105 { 106 if (!st) 107 errno = EBADF; 108 else 109 free(st); 110 } 111 112 113 /* 114 * Actual conversion; called from iconv() 115 */ 116 /*======================================================= 117 * 118 * State Machine for interpreting CNS 11643 code 119 * 120 *======================================================= 121 * 122 * (ESC,SO) plane 2 - 16 123 * 1st C 2nd C 3rd C 124 * +------> C0 -----> C1 -----------> C2 -----> C3 125 * | ascii | plane 1 | 4th C | 126 * ^ | 2nd C v v 127 * | | C4 <------<--------<-------+ 128 * | v | (SI) 129 * +----<---+-----<----v 130 * 131 *=======================================================*/ 132 size_t 133 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft, 134 char **outbuf, size_t *outbytesleft) 135 { 136 int plane_no = -1, n; 137 /* pre_plane_no: need to be static when re-entry occurs on errno set */ 138 static int pre_plane_no = -1; /* previous plane number */ 139 140 if (st == NULL) { 141 errno = EBADF; 142 return ((size_t) -1); 143 } 144 145 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */ 146 if (st->cstate == C1) { 147 if (outbytesleft && *outbytesleft >= 1 148 && outbuf && *outbuf) { 149 **outbuf = SI; 150 (*outbuf)++; 151 (*outbytesleft)--; 152 } else { 153 errno = E2BIG; 154 return((size_t) -1); 155 } 156 } 157 st->cstate = C0; 158 st->istate = IN; 159 st->_errno = 0; 160 return ((size_t) 0); 161 } 162 163 #ifdef DEBUG 164 fprintf(stderr, "=== (Re-entry) iconv(): CNS11643 --> ISO 2022-7 ===\n"); 165 fprintf(stderr, "st->cstate=%d\tst->istate=%d\tst->_errno=%d\tplane_no=%d\n", 166 st->cstate, st->istate, st->_errno, plane_no); 167 #endif 168 st->_errno = 0; /* reset internal errno */ 169 errno = 0; /* reset external errno */ 170 171 /* a state machine for interpreting CNS 11643 code */ 172 while (*inbytesleft > 0 && *outbytesleft > 0) { 173 switch (st->cstate) { 174 case C0: /* assuming ASCII in the beginning */ 175 if (**inbuf & MSB) { 176 st->keepc[0] = (**inbuf); 177 st->cstate = C1; 178 } else { /* real ASCII */ 179 if (st->istate == OUT) { 180 st->cstate = C0; 181 st->istate = IN; 182 **outbuf = SI; 183 (*outbuf)++; 184 (*outbytesleft)--; 185 if (*outbytesleft <= 0) { 186 errno = E2BIG; 187 return((size_t)-1); 188 } 189 } 190 **outbuf = **inbuf; 191 (*outbuf)++; 192 (*outbytesleft)--; 193 } 194 break; 195 case C1: /* Chinese characters: 2nd byte */ 196 if ((st->keepc[0] & ONEBYTE) == MBYTE) { /* 4-byte (0x8e) */ 197 plane_no = get_plane_no_by_char(**inbuf); 198 if (plane_no == -1) { /* illegal plane */ 199 st->cstate = C0; 200 st->istate = IN; 201 st->_errno = errno = EILSEQ; 202 } else { /* 4-byte Chinese character */ 203 st->keepc[1] = (**inbuf); 204 st->cstate = C2; 205 } 206 } else { /* 2-byte Chinese character - plane #1 */ 207 if (**inbuf & MSB) { /* plane #1 */ 208 st->cstate = C4; 209 st->keepc[1] = (**inbuf); 210 st->keepc[2] = st->keepc[3] = NULL; 211 plane_no = 1; 212 continue; /* should not advance *inbuf */ 213 } else { /* input char doesn't belong 214 * to the input code set 215 */ 216 st->cstate = C0; 217 st->istate = IN; 218 st->_errno = errno = EINVAL; 219 } 220 } 221 break; 222 case C2: /* plane #2 - #16 (4 bytes): get 3nd byte */ 223 if (**inbuf & MSB) { /* 3rd byte */ 224 st->keepc[2] = (**inbuf); 225 st->cstate = C3; 226 } else { 227 st->_errno = errno = EINVAL; 228 st->cstate = C0; 229 } 230 break; 231 case C3: /* plane #2 - #16 (4 bytes): get 4th byte */ 232 if (**inbuf & MSB) { /* 4th byte */ 233 st->cstate = C4; 234 st->keepc[3] = (**inbuf); 235 continue; /* should not advance *inbuf */ 236 } else { 237 st->_errno = errno = EINVAL; 238 st->cstate = C0; 239 } 240 break; 241 case C4: /* Convert code from CNS 11643 to ISO 2022-7 */ 242 if ((st->istate == IN) || (pre_plane_no != plane_no)) { 243 /* change plane # in Chinese mode */ 244 if (st->istate == OUT) { 245 **outbuf = SI; 246 (*outbuf)++; 247 (*outbytesleft)--; 248 #ifdef DEBUG 249 fprintf(stderr, "(plane #=%d\tpre_plane #=%d)\t", plane_no, pre_plane_no); 250 #endif 251 } 252 if (*outbytesleft < 4) { 253 st->_errno = errno = E2BIG; 254 return((size_t)-1); 255 } 256 pre_plane_no = plane_no; 257 st->istate = OUT; /* shift out */ 258 **outbuf = ESC; 259 *(*outbuf+1) = '$'; 260 *(*outbuf+2) = ')'; 261 *(*outbuf+3) = GET_PLANEC(plane_no); 262 #ifdef DEBUG 263 fprintf(stderr, "ESC $ ) %c\n", *(*outbuf+3)); 264 #endif 265 (*outbuf) += 4; 266 (*outbytesleft) -= 4; 267 if (*outbytesleft <= 0) { 268 st->_errno = errno = E2BIG; 269 return((size_t)-1); 270 } 271 **outbuf = SO; 272 (*outbuf)++; 273 (*outbytesleft)--; 274 } 275 n = cns_to_iso(plane_no, st->keepc, *outbuf, *outbytesleft); 276 if (n > 0) { 277 (*outbuf) += n; 278 (*outbytesleft) -= n; 279 } else { 280 st->_errno = errno; 281 return((size_t)-1); 282 } 283 st->cstate = C0; 284 break; 285 default: /* should never come here */ 286 st->_errno = errno = EILSEQ; 287 st->cstate = C0; /* reset state */ 288 break; 289 } 290 291 (*inbuf)++; 292 (*inbytesleft)--; 293 294 if (st->_errno) { 295 #ifdef DEBUG 296 fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\n", 297 st->_errno, st->cstate); 298 #endif 299 break; 300 } 301 if (errno) 302 return((size_t)-1); 303 } 304 305 if (*inbytesleft > 0 && *outbytesleft == 0) { 306 errno = E2BIG; 307 return((size_t)-1); 308 } 309 return (*inbytesleft); 310 } 311 312 313 /* 314 * Get plane number by char; i.e. 0xa2 returns 2, 0xae returns 14, etc. 315 * Returns -1 on error conditions 316 */ 317 static int get_plane_no_by_char(const char inbuf) 318 { 319 int ret; 320 unsigned char uc = (unsigned char) inbuf; 321 322 ret = uc - PMASK; 323 switch (ret) { 324 case 1: /* 0x8EA1 */ 325 case 2: /* 0x8EA2 */ 326 case 3: /* 0x8EA3 */ 327 case 4: /* 0x8EA4 */ 328 case 5: /* 0x8EA5 */ 329 case 6: /* 0x8EA6 */ 330 case 7: /* 0x8EA7 */ 331 case 12: /* 0x8EAC */ 332 case 14: /* 0x8EAE */ 333 case 15: /* 0x8EAF */ 334 case 16: /* 0x8EB0 */ 335 return (ret); 336 default: 337 return (-1); 338 } 339 } 340 341 342 /* 343 * CNS 11643 code --> ISO 2022-7 344 * Return: > 0 - converted with enough space in output buffer 345 * = 0 - no space in outbuf 346 */ 347 static int cns_to_iso(int plane_no, char keepc[], char *buf, size_t buflen) 348 { 349 char cns_str[3]; 350 unsigned long cns_val; /* MSB mask off CNS 11643 value */ 351 352 #ifdef DEBUG 353 fprintf(stderr, "%s %d ", keepc, plane_no); 354 #endif 355 if (buflen < 2) { 356 errno = E2BIG; 357 return(0); 358 } 359 360 if (plane_no == 1) { 361 cns_str[0] = keepc[0] & MSB_OFF; 362 cns_str[1] = keepc[1] & MSB_OFF; 363 } else { 364 cns_str[0] = keepc[2] & MSB_OFF; 365 cns_str[1] = keepc[3] & MSB_OFF; 366 } 367 cns_val = (cns_str[0] << 8) + cns_str[1]; 368 #ifdef DEBUG 369 fprintf(stderr, "%x\t", cns_val); 370 #endif 371 372 *buf = (cns_val & 0xff00) >> 8; 373 *(buf+1) = cns_val & 0xff; 374 375 #ifdef DEBUG 376 fprintf(stderr, "->%x %x<-\t->%c %c<-\n", *buf, *(buf+1), *buf, *(buf+1)); 377 #endif 378 return(2); 379 } 380 void * 381 _cv_open() 382 { 383 struct _cv_state *st; 384 385 if ((st = (struct _cv_state *)malloc(sizeof(struct _cv_state))) == NULL) 386 return ((void *)-1); 387 388 st->plane_no = 0; 389 st->get_a_mbchar = 1; 390 st->first_byte = 1; 391 392 return (st); 393 } 394 395 void 396 _cv_close(struct _cv_state *st) 397 { 398 free(st); 399 } 400 401 402 size_t 403 _cv_enconv(struct _cv_state *st, char **cvinbuf, size_t *cvinbytesleft, 404 char **cvoutbuf, size_t *cvoutbytesleft) 405 { 406 register char *inbuf; 407 register char *outbuf; 408 register size_t insize; 409 register size_t outsize; 410 411 unsigned char uc; 412 int i; 413 414 if (cvinbuf == NULL || *cvinbuf == NULL) { /* Reset request. */ 415 if (cvoutbuf && *cvoutbuf != NULL && 416 *cvoutbytesleft > 0 && st->plane_no != 0) { 417 **cvoutbuf = SI; 418 (*cvoutbytesleft)--; 419 (*cvoutbuf)++; 420 } 421 st->plane_no = 0; 422 st->get_a_mbchar = 1; 423 st->first_byte = 1; 424 425 return (0); 426 } 427 428 429 inbuf = *cvinbuf; 430 outbuf = *cvoutbuf; 431 insize = *cvinbytesleft; 432 outsize = *cvoutbytesleft; 433 434 while ((int) insize > 0 && (int) outsize > 0) { 435 436 if (st->get_a_mbchar) { 437 if (st->plane_no == 0) { /* short cut */ 438 do { 439 uc = *inbuf; 440 if ((uc & MSB) == 0) { 441 *outbuf++ = uc; 442 outsize--; 443 inbuf++; 444 insize--; 445 } else 446 goto non_plane_0; 447 } while ((int) insize > 0 && (int) outsize > 0); 448 goto success; 449 } 450 451 non_plane_0: 452 if (st->first_byte) { 453 st->first_byte = 0; 454 st->keepc[0] = uc = *inbuf++; 455 insize--; 456 if (uc & MSB) { 457 if (uc == 0x8e) 458 st->more_bytes = 3; 459 else 460 st->more_bytes = 1; 461 st->p = st->keepc + 1; 462 } else 463 st->more_bytes = 0; 464 } 465 while (st->more_bytes > 0 && (int) insize > 0) { 466 *st->p++ = *inbuf++; 467 st->more_bytes--; 468 insize--; 469 } 470 if (st->more_bytes == 0) 471 st->get_a_mbchar = 0; 472 473 /* up to this point, st->keepc contains a complete mb char */ 474 475 i = get_plane_no_by_str(st->keepc); 476 st->plane_changed = (st->plane_no != i); 477 if (st->plane_changed) { /* generate SI */ 478 st->planec = GET_PLANEC(i); 479 if (st->plane_no != 0) { 480 *outbuf++ = SI; 481 outsize--; 482 st->plane_no = i; 483 if ((int) outsize <= 0) 484 goto success; 485 } else 486 st->plane_no = i; 487 } 488 } 489 490 /* 491 * up to this point, st->keepc contains a complete mb char and 492 * we know the plane_no 493 */ 494 495 switch (st->plane_no) { 496 case 0: 497 *outbuf++ = st->keepc[0]; 498 outsize--; 499 break; 500 case 1: 501 if (st->plane_changed) { 502 if (outsize < 7) 503 goto success; 504 *outbuf++ = ESC; 505 *outbuf++ = '$'; 506 *outbuf++ = ')'; 507 *outbuf++ = 'G'; 508 *outbuf++ = SO; 509 *outbuf++ = st->keepc[0] & MSB_OFF; 510 *outbuf++ = st->keepc[1] & MSB_OFF; 511 outsize -= 7; 512 } else { /* don't need the escape sequence */ 513 if (outsize < 2) 514 goto success; 515 *outbuf++ = st->keepc[0] & MSB_OFF; 516 *outbuf++ = st->keepc[1] & MSB_OFF; 517 outsize -= 2; 518 } 519 break; 520 default: 521 if (st->plane_changed) { 522 if (outsize < 7) 523 goto success; 524 *outbuf++ = ESC; 525 *outbuf++ = '$'; 526 *outbuf++ = ')'; 527 *outbuf++ = st->planec; 528 *outbuf++ = SO; 529 *outbuf++ = st->keepc[2] & MSB_OFF; 530 *outbuf++ = st->keepc[3] & MSB_OFF; 531 outsize -= 7; 532 } else { /* don't need the escape sequence */ 533 if (outsize < 2) 534 goto success; 535 *outbuf++ = st->keepc[2] & MSB_OFF; 536 *outbuf++ = st->keepc[3] & MSB_OFF; 537 outsize -= 2; 538 } 539 break; 540 } 541 /* 542 * up to this point, a complete multibyte character has been 543 * converted and written to outbuf, so need to grab the next 544 * mb char from inbuf 545 */ 546 st->get_a_mbchar = 1; 547 st->first_byte = 1; 548 } 549 550 success: 551 *cvinbytesleft = insize; 552 *cvoutbytesleft = outsize; 553 *cvinbuf = inbuf; 554 *cvoutbuf = outbuf; 555 556 return (insize); 557 } 558 559 static int get_plane_no_by_str(const char *inbuf) { 560 unsigned char uc = (unsigned char) *inbuf; 561 562 if (uc & MSB) { 563 if (uc != 0x8e) 564 return (1); 565 uc = *(++inbuf); 566 return (uc - 0xa0); 567 } else 568 return (0); 569 } 570