1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 1994-2003 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <errno.h> 29 #include <euc.h> 30 #include "japanese.h" 31 32 /* 33 * struct _cv_state; to keep status 34 */ 35 struct _icv_state { 36 int _st_cset; 37 int _st_cset_sav; 38 }; 39 40 static unsigned short lookuptbl(unsigned short); 41 42 void * 43 _icv_open() 44 { 45 struct _icv_state *st; 46 47 if ((st = (struct _icv_state *)malloc(sizeof (struct _icv_state))) 48 == NULL) 49 return ((void *)ERR_RETURN); 50 51 st->_st_cset = st->_st_cset_sav = CS_0; 52 53 return (st); 54 } 55 56 void 57 _icv_close(struct _icv_state *st) 58 { 59 free(st); 60 } 61 62 size_t 63 _icv_iconv(struct _icv_state *st, char **inbuf, size_t *inbytesleft, 64 char **outbuf, size_t *outbytesleft) 65 { 66 int cset; 67 int stat = ST_INIT; 68 unsigned char *op, ic; 69 char *ip; 70 size_t ileft, oleft; 71 size_t retval; 72 #ifdef RFC1468_MODE 73 unsigned short zenkaku; 74 #endif 75 76 /* 77 * If inbuf or *inbuf is NULL, reset conversion descriptor 78 * and put escape sequence if needed. 79 */ 80 if ((inbuf == NULL) || (*inbuf == NULL)) { 81 st->_st_cset_sav = st->_st_cset = CS_0; 82 return ((size_t)0); 83 } 84 85 cset = st->_st_cset; 86 87 ip = *inbuf; 88 op = (unsigned char *)*outbuf; 89 ileft = *inbytesleft; 90 oleft = *outbytesleft; 91 92 /* 93 * Main loop; basically 1 loop per 1 input byte 94 */ 95 96 while ((int)ileft > 0) { 97 GET(ic); 98 if (stat == ST_INIT) { 99 goto text; 100 } 101 /* 102 * Half way of Kanji or ESC sequence 103 */ 104 if (stat == ST_ESC) { 105 if (ic == MBTOG0_1) { 106 if ((int)ileft > 0) { 107 stat = ST_MBTOG0_1; 108 continue; 109 } else { 110 UNGET(); 111 UNGET(); 112 errno = EINVAL; 113 retval = (size_t)ERR_RETURN; 114 goto ret; 115 } 116 } else if (ic == SBTOG0_1) { 117 if ((int)ileft > 0) { 118 stat = ST_SBTOG0; 119 continue; 120 } else { 121 UNGET(); 122 UNGET(); 123 errno = EINVAL; 124 retval = (size_t)ERR_RETURN; 125 goto ret; 126 } 127 } else if (ic == X208REV_1) { 128 if ((int)ileft > 0) { 129 stat = ST_208REV_1; 130 continue; 131 } else { 132 UNGET(); 133 UNGET(); 134 errno = EINVAL; 135 retval = (size_t)ERR_RETURN; 136 goto ret; 137 } 138 } else { 139 UNGET(); 140 UNGET(); 141 errno = EILSEQ; 142 retval = (size_t)ERR_RETURN; 143 goto ret; 144 } 145 } else if (stat == ST_MBTOG0_1) { 146 if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) { 147 stat = ST_INIT; 148 st->_st_cset_sav = cset = CS_1; 149 continue; 150 } else if (ic == MBTOG0_2) { 151 if ((int)ileft > 0) { 152 stat = ST_MBTOG0_2; 153 continue; 154 } else { 155 UNGET(); 156 UNGET(); 157 UNGET(); 158 errno = EINVAL; 159 retval = (size_t)ERR_RETURN; 160 goto ret; 161 } 162 } else if (ic == F_X0212_90) { 163 stat = ST_INIT; 164 st->_st_cset_sav = cset = CS_3; 165 continue; 166 } else { 167 UNGET(); 168 UNGET(); 169 UNGET(); 170 errno = EILSEQ; 171 retval = (size_t)ERR_RETURN; 172 goto ret; 173 } 174 } else if (stat == ST_MBTOG0_2) { 175 if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) { 176 stat = ST_INIT; 177 st->_st_cset_sav = cset = CS_1; 178 continue; 179 } else if (ic == F_X0212_90) { 180 stat = ST_INIT; 181 st->_st_cset_sav = cset = CS_3; 182 continue; 183 } else { 184 UNGET(); 185 UNGET(); 186 UNGET(); 187 UNGET(); 188 errno = EILSEQ; 189 retval = (size_t)ERR_RETURN; 190 goto ret; 191 } 192 } else if (stat == ST_SBTOG0) { 193 if ((ic == F_ASCII) || 194 (ic == F_X0201_RM) || 195 (ic == F_ISO646)) { 196 stat = ST_INIT; 197 st->_st_cset_sav = cset = CS_0; 198 continue; 199 } if (ic == F_X0201_KN) { 200 st->_st_cset_sav = cset = CS_2; 201 stat = ST_INIT; 202 continue; 203 } else { 204 UNGET(); 205 UNGET(); 206 UNGET(); 207 errno = EILSEQ; 208 retval = (size_t)ERR_RETURN; 209 goto ret; 210 } 211 } else if (stat == ST_208REV_1) { 212 if (ic == X208REV_2) { 213 if ((int)ileft > 0) { 214 stat = ST_208REV_2; 215 continue; 216 } else { 217 UNGET(); 218 UNGET(); 219 UNGET(); 220 errno = EINVAL; 221 retval = (size_t)ERR_RETURN; 222 goto ret; 223 } 224 } else { 225 UNGET(); 226 UNGET(); 227 UNGET(); 228 errno = EILSEQ; 229 retval = (size_t)ERR_RETURN; 230 goto ret; 231 } 232 } else if (stat == ST_208REV_2) { 233 if (ic == ESC) { 234 if ((int)ileft > 0) { 235 stat = ST_REV_AFT_ESC; 236 continue; 237 } else { 238 UNGET(); 239 UNGET(); 240 UNGET(); 241 UNGET(); 242 errno = EINVAL; 243 retval = (size_t)ERR_RETURN; 244 goto ret; 245 } 246 } else { 247 UNGET(); 248 UNGET(); 249 UNGET(); 250 UNGET(); 251 errno = EILSEQ; 252 retval = (size_t)ERR_RETURN; 253 goto ret; 254 } 255 } else if (stat == ST_REV_AFT_ESC) { 256 if (ic == MBTOG0_1) { 257 if ((int)ileft > 0) { 258 stat = ST_REV_AFT_MBTOG0_1; 259 continue; 260 } else { 261 UNGET(); 262 UNGET(); 263 UNGET(); 264 UNGET(); 265 UNGET(); 266 errno = EINVAL; 267 retval = (size_t)ERR_RETURN; 268 goto ret; 269 } 270 } else { 271 UNGET(); 272 UNGET(); 273 UNGET(); 274 UNGET(); 275 UNGET(); 276 errno = EILSEQ; 277 retval = (size_t)ERR_RETURN; 278 goto ret; 279 } 280 } else if (stat == ST_REV_AFT_MBTOG0_1) { 281 if (ic == F_X0208_83_90) { 282 stat = ST_INIT; 283 st->_st_cset_sav = cset = CS_1; 284 continue; 285 } else if (ic == MBTOG0_2) { 286 if ((int)ileft > 0) { 287 stat = ST_REV_AFT_MBTOG0_2; 288 continue; 289 } else { 290 UNGET(); 291 UNGET(); 292 UNGET(); 293 UNGET(); 294 UNGET(); 295 UNGET(); 296 errno = EINVAL; 297 retval = (size_t)ERR_RETURN; 298 goto ret; 299 } 300 } else { 301 UNGET(); 302 UNGET(); 303 UNGET(); 304 UNGET(); 305 UNGET(); 306 UNGET(); 307 errno = EILSEQ; 308 retval = (size_t)ERR_RETURN; 309 goto ret; 310 } 311 } else if (stat == ST_REV_AFT_MBTOG0_2) { 312 if (ic == F_X0208_83_90) { 313 stat = ST_INIT; 314 st->_st_cset_sav = cset = CS_1; 315 continue; 316 } else { 317 UNGET(); 318 UNGET(); 319 UNGET(); 320 UNGET(); 321 UNGET(); 322 UNGET(); 323 UNGET(); 324 errno = EILSEQ; 325 retval = (size_t)ERR_RETURN; 326 goto ret; 327 } 328 } 329 text: 330 /* 331 * Break through chars or ESC sequence 332 */ 333 if (ic == ESC) { 334 if ((int)ileft > 0) { 335 stat = ST_ESC; 336 continue; 337 } else { 338 UNGET(); 339 errno = EINVAL; 340 retval = (size_t)ERR_RETURN; 341 goto ret; 342 } 343 /* 344 * XXX- Because V3 mailtool uses SI/SO to switch 345 * G0 and G1 sets while it puts "iso2022-7" 346 * as its "X-Sun-Charset" tag. Though it 347 * breaks ISO-2022-JP definition based on 348 * UI-OSF, dtmail have handle them correctly. 349 * Therefore, we have to following a few codes, UGH. 350 */ 351 } else if (ic == SO) { 352 cset = CS_2; 353 stat = ST_INIT; 354 continue; 355 } else if (ic == SI) { 356 cset = st->_st_cset_sav; 357 stat = ST_INIT; 358 continue; 359 } 360 if (!(ic & CMSB)) { 361 if (cset == CS_0) { /* ASCII or JIS roman */ 362 CHECK2BIG(SJISW0, 1); 363 PUT(ic); 364 continue; 365 } else if (cset == CS_1) { /* CS_1 Kanji starts */ 366 if ((int)ileft > 0) { 367 int even_ku; 368 CHECK2BIG(SJISW1, 1); 369 if ((ic < 0x21) || (ic == 0x7f)) { 370 UNGET(); 371 errno = EILSEQ; 372 retval = (size_t)ERR_RETURN; 373 goto ret; 374 } 375 if ((*ip < 0x21) || (*ip == 0x7f)) { 376 UNGET(); 377 errno = EILSEQ; 378 retval = (size_t)ERR_RETURN; 379 goto ret; 380 } 381 #ifdef RFC1468_MODE /* Convert VDC and UDC to GETA */ 382 if ((ic == 0x2d) || (0x75 <= ic)) { 383 PUT(PGETA >> 8); 384 GET(ic); /* Get dummy */ 385 PUT(PGETA & 0xff); 386 continue; 387 } 388 #endif /* RFC1468_MODE */ 389 PUT(jis208tosj1[ic]); 390 if ((ic % 2) == 0) 391 even_ku = TRUE; 392 else 393 even_ku = FALSE; 394 GET(ic); 395 if (even_ku) 396 ic += 0x80; 397 PUT(jistosj2[ic]); 398 continue; 399 } else { /* input fragment of Kanji */ 400 UNGET(); 401 errno = EINVAL; 402 retval = (size_t)ERR_RETURN; 403 goto ret; 404 } 405 } else if (cset == CS_2) { /* Hankaku Katakana */ 406 if (!ISSJKANA((ic | CMSB))) { 407 UNGET(); 408 errno = EILSEQ; 409 retval = (size_t)ERR_RETURN; 410 goto ret; 411 } 412 #ifdef RFC1468_MODE /* Convert JIS X 0201 kana to PCK zenkaku Kana */ 413 CHECK2BIG(SJISW1, 1); 414 zenkaku = halfkana2zenkakus[(ic - 0x21)]; 415 ic = (unsigned char)((zenkaku >> 8) & 0xff); 416 PUT(ic); 417 ic = (unsigned char)(zenkaku & 0xff); 418 PUT(ic); 419 #else /* ISO-2022-JP.UIOSF */ 420 CHECK2BIG(SJISW2, 1); 421 PUT(ic | CMSB); 422 #endif /* RFC1468_MODE */ 423 continue; 424 } else if (cset == CS_3) { /* CS_3 Kanji starts */ 425 unsigned short dest; 426 if ((int)ileft > 0) { 427 CHECK2BIG(SJISW1, 1); 428 if ((ic < 0x21) || (ic == 0x7f)) { 429 UNGET(); 430 errno = EILSEQ; 431 retval = (size_t)ERR_RETURN; 432 goto ret; 433 } 434 if ((*ip < 0x21) || (*ip == 0x7f)) { 435 UNGET(); 436 errno = EILSEQ; 437 retval = (size_t)ERR_RETURN; 438 goto ret; 439 } 440 441 #ifdef RFC1468_MODE /* Convert JIS X 0212 to GETA */ 442 PUT(PGETA >> 8); 443 GET(ic); /* Get dummy */ 444 PUT(PGETA & 0xff); 445 #else /* ISO-2022-JP.UIOSF */ 446 if (ic < 0x75) { /* check IBM area */ 447 dest = (ic << 8); 448 GET(ic); 449 dest += ic; 450 dest = lookuptbl(dest); 451 if (dest == 0xffff) { 452 /* 453 * Illegal code points 454 * in G3 plane. 455 */ 456 UNGET(); 457 UNGET(); 458 errno = EILSEQ; 459 retval = 460 (size_t)ERR_RETURN; 461 goto ret; 462 } else { 463 PUT((dest >> 8) & 0xff); 464 PUT(dest & 0xff); 465 } 466 } else { 467 int even_ku; 468 469 if ((ic % 2) == 0) 470 even_ku = TRUE; 471 else 472 even_ku = FALSE; 473 PUT(jis212tosj1[ic]); 474 GET(ic); 475 if (even_ku) 476 ic += 0x80; 477 PUT(jistosj2[ic]); 478 } 479 #endif /* RFC1468_MODE */ 480 continue; 481 } else { /* input fragment of Kanji */ 482 UNGET(); 483 errno = EINVAL; 484 retval = (size_t)ERR_RETURN; 485 goto ret; 486 } 487 } 488 } else { 489 UNGET(); 490 errno = EILSEQ; 491 retval = (size_t)ERR_RETURN; 492 goto ret; 493 } 494 } 495 retval = ileft; 496 ret: 497 *inbuf = ip; 498 *inbytesleft = ileft; 499 *outbuf = (char *)op; 500 *outbytesleft = oleft; 501 st->_st_cset = cset; 502 503 return (retval); 504 } 505 506 /* 507 * lookuptbl() 508 * Return the index number if its index-ed number 509 * is the same as dest value. 510 */ 511 static unsigned short 512 lookuptbl(unsigned short dest) 513 { 514 unsigned short tmp; 515 int i; 516 int sz = (sizeof (sjtoibmext) / sizeof (sjtoibmext[0])); 517 518 for (i = 0; i < sz; i++) { 519 tmp = (sjtoibmext[i] & 0x7f7f); 520 if (tmp == dest) 521 return ((i + 0xfa40 + ((i / 0xc0) * 0x40))); 522 } 523 return (PGETA); 524 } 525