1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 1997-2003 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <errno.h> 30 #include <euc.h> 31 #include "japanese.h" 32 #include "jfp_iconv_unicode.h" 33 34 /* Note: JFP_J2U_ICONV_RFC1468 macro pass through hankaku katakata. */ 35 #ifdef RFC1468_MODE 36 #define JFP_J2U_ICONV_RFC1468 37 #else 38 #define JFP_J2U_ICONV 39 #endif 40 #include "jfp_jis_to_ucs2.h" 41 42 /* 43 * struct _cv_state; to keep status 44 */ 45 struct _icv_state { 46 int _st_cset; 47 int _st_cset_sav; 48 }; 49 50 void * 51 _icv_open() 52 { 53 struct _icv_state *st; 54 55 if ((st = (struct _icv_state *)malloc(sizeof (struct _icv_state))) 56 == NULL) 57 return ((void *)ERR_RETURN); 58 59 st->_st_cset_sav = st->_st_cset = CS_0; 60 61 return (st); 62 } 63 64 void 65 _icv_close(struct _icv_state *st) 66 { 67 free(st); 68 } 69 70 size_t 71 _icv_iconv(struct _icv_state *st, char **inbuf, size_t *inbytesleft, 72 char **outbuf, size_t *outbytesleft) 73 { 74 int cset, stat, ret_val; 75 char *ip, ic; 76 size_t ileft; 77 size_t retval; 78 char *op; 79 size_t oleft; 80 unsigned int index = 0; 81 82 /* 83 * If inbuf and/or *inbuf are NULL, reset conversion descriptor 84 * and put escape sequence if needed. 85 */ 86 if ((inbuf == NULL) || (*inbuf == NULL)) { 87 st->_st_cset_sav = st->_st_cset = CS_0; 88 return ((size_t)0); 89 } 90 91 cset = st->_st_cset; 92 stat = ST_INIT; 93 94 ip = *inbuf; 95 op = *outbuf; 96 ileft = *inbytesleft; 97 oleft = *outbytesleft; 98 99 /* 100 * Main loop; 1 loop per 1 input byte 101 */ 102 103 while ((int)ileft > 0) { 104 GET(ic); 105 if (stat == ST_ESC) { 106 if (ic == MBTOG0_1) { 107 if ((int)ileft > 0) { 108 stat = ST_MBTOG0_1; 109 continue; 110 } else { 111 UNGET(); 112 UNGET(); 113 errno = EINVAL; 114 retval = (size_t)ERR_RETURN; 115 goto ret; 116 } 117 } else if (ic == SBTOG0_1) { 118 if ((int)ileft > 0) { 119 stat = ST_SBTOG0; 120 continue; 121 } else { 122 UNGET(); 123 UNGET(); 124 errno = EINVAL; 125 retval = (size_t)ERR_RETURN; 126 goto ret; 127 } 128 } else if (ic == X208REV_1) { 129 if ((int)ileft > 0) { 130 stat = ST_208REV_1; 131 continue; 132 } else { 133 UNGET(); 134 UNGET(); 135 errno = EINVAL; 136 retval = (size_t)ERR_RETURN; 137 goto ret; 138 } 139 } else { 140 UNGET(); 141 UNGET(); 142 errno = EILSEQ; 143 retval = (size_t)ERR_RETURN; 144 goto ret; 145 } 146 } else if (stat == ST_MBTOG0_1) { 147 if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) { 148 stat = ST_INIT; 149 st->_st_cset_sav = cset = CS_1; 150 continue; 151 } else if (ic == MBTOG0_2) { 152 if ((int)ileft > 0) { 153 stat = ST_MBTOG0_2; 154 continue; 155 } else { 156 UNGET(); 157 UNGET(); 158 UNGET(); 159 errno = EINVAL; 160 retval = (size_t)ERR_RETURN; 161 goto ret; 162 } 163 } else if (ic == F_X0212_90) { 164 stat = ST_INIT; 165 st->_st_cset_sav = cset = CS_3; 166 continue; 167 } else { 168 UNGET(); 169 UNGET(); 170 UNGET(); 171 errno = EILSEQ; 172 retval = (size_t)ERR_RETURN; 173 goto ret; 174 } 175 } else if (stat == ST_MBTOG0_2) { 176 if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) { 177 stat = ST_INIT; 178 st->_st_cset_sav = cset = CS_1; 179 continue; 180 } else if (ic == F_X0212_90) { 181 stat = ST_INIT; 182 st->_st_cset_sav = cset = CS_3; 183 continue; 184 } else { 185 UNGET(); 186 UNGET(); 187 UNGET(); 188 UNGET(); 189 errno = EILSEQ; 190 retval = (size_t)ERR_RETURN; 191 goto ret; 192 } 193 } else if (stat == ST_SBTOG0) { 194 if ((ic == F_ASCII) || 195 (ic == F_X0201_RM) || 196 (ic == F_ISO646)) { 197 stat = ST_INIT; 198 st->_st_cset_sav = cset = CS_0; 199 continue; 200 } else if (ic == F_X0201_KN) { 201 stat = ST_INIT; 202 st->_st_cset_sav = cset = CS_2; 203 continue; 204 } else { 205 UNGET(); 206 UNGET(); 207 UNGET(); 208 errno = EILSEQ; 209 retval = (size_t)ERR_RETURN; 210 goto ret; 211 } 212 } else if (stat == ST_208REV_1) { 213 if (ic == X208REV_2) { 214 if ((int)ileft > 0) { 215 stat = ST_208REV_2; 216 continue; 217 } else { 218 UNGET(); 219 UNGET(); 220 UNGET(); 221 errno = EINVAL; 222 retval = (size_t)ERR_RETURN; 223 goto ret; 224 } 225 } else { 226 UNGET(); 227 UNGET(); 228 UNGET(); 229 errno = EILSEQ; 230 retval = (size_t)ERR_RETURN; 231 goto ret; 232 } 233 } else if (stat == ST_208REV_2) { 234 if (ic == ESC) { 235 if ((int)ileft > 0) { 236 stat = ST_REV_AFT_ESC; 237 continue; 238 } else { 239 UNGET(); 240 UNGET(); 241 UNGET(); 242 UNGET(); 243 errno = EINVAL; 244 retval = (size_t)ERR_RETURN; 245 goto ret; 246 } 247 } else { 248 UNGET(); 249 UNGET(); 250 UNGET(); 251 UNGET(); 252 errno = EILSEQ; 253 retval = (size_t)ERR_RETURN; 254 goto ret; 255 } 256 } else if (stat == ST_REV_AFT_ESC) { 257 if (ic == MBTOG0_1) { 258 if ((int)ileft > 0) { 259 stat = ST_REV_AFT_MBTOG0_1; 260 continue; 261 } else { 262 UNGET(); 263 UNGET(); 264 UNGET(); 265 UNGET(); 266 UNGET(); 267 errno = EINVAL; 268 retval = (size_t)ERR_RETURN; 269 goto ret; 270 } 271 } else { 272 UNGET(); 273 UNGET(); 274 UNGET(); 275 UNGET(); 276 UNGET(); 277 errno = EILSEQ; 278 retval = (size_t)ERR_RETURN; 279 goto ret; 280 } 281 } else if (stat == ST_REV_AFT_MBTOG0_1) { 282 if (ic == F_X0208_83_90) { 283 stat = ST_INIT; 284 st->_st_cset_sav = cset = CS_1; 285 continue; 286 } else if (ic == MBTOG0_2) { 287 if ((int)ileft > 0) { 288 stat = ST_REV_AFT_MBTOG0_2; 289 continue; 290 } else { 291 UNGET(); 292 UNGET(); 293 UNGET(); 294 UNGET(); 295 UNGET(); 296 UNGET(); 297 errno = EINVAL; 298 retval = (size_t)ERR_RETURN; 299 goto ret; 300 } 301 } else { 302 UNGET(); 303 UNGET(); 304 UNGET(); 305 UNGET(); 306 UNGET(); 307 UNGET(); 308 errno = EILSEQ; 309 retval = (size_t)ERR_RETURN; 310 goto ret; 311 } 312 } else if (stat == ST_REV_AFT_MBTOG0_2) { 313 if (ic == F_X0208_83_90) { 314 stat = ST_INIT; 315 st->_st_cset_sav = cset = CS_1; 316 continue; 317 } else { 318 UNGET(); 319 UNGET(); 320 UNGET(); 321 UNGET(); 322 UNGET(); 323 UNGET(); 324 UNGET(); 325 errno = EILSEQ; 326 retval = (size_t)ERR_RETURN; 327 goto ret; 328 } 329 } 330 /* 331 * Break through chars or ESC sequence 332 * if (stat == ST_INIT) 333 */ 334 if (ic == ESC) { 335 if ((int)ileft > 0) { 336 stat = ST_ESC; 337 continue; 338 } else { 339 UNGET(); 340 errno = EINVAL; 341 retval = (size_t)ERR_RETURN; 342 goto ret; 343 } 344 /* 345 * XXX- Because V3 mailtool uses SI/SO to switch 346 * G0 and G1 sets while it puts "iso2022-7" 347 * as its "X-Sun-Charset" tag. Though it 348 * breaks ISO-2022-JP definition based on 349 * UI-OSF, dtmail have handle them correctly. 350 * Therefore, we have to following a few codes, UGH. 351 */ 352 } else if (ic == SO) { 353 cset = CS_2; 354 stat = ST_INIT; 355 continue; 356 } else if (ic == SI) { 357 cset = st->_st_cset_sav; 358 stat = ST_INIT; 359 continue; 360 } else if (!(ic & CMSB)) { 361 if ((cset == CS_0) || (cset == CS_2)){ 362 if (cset == CS_0) { 363 index = (int)_jfp_tbl_jisx0201roman_to_ucs2[(int)ic]; 364 } else if (cset == CS_2) { 365 index = 366 (int)_jfp_tbl_jisx0201kana_to_ucs2[(ic - 0x21)]; 367 } 368 if ((ret_val = write_unicode( 369 (unsigned int)index, &op, &oleft, 370 B_FALSE, "writing CS_0/2")) 371 < 0) { 372 /* errno is set in write_unicode */ 373 UNGET(); 374 retval = (size_t)ERR_RETURN; 375 goto ret; 376 } 377 stat = ST_INIT; 378 continue; 379 } else if ((cset == CS_1) || (cset == CS_3)) { 380 if ((int)ileft > 0) { 381 if ((ic < 0x21) || (ic == 0x7f)) { 382 UNGET(); 383 errno = EILSEQ; 384 retval = (size_t)ERR_RETURN; 385 goto ret; 386 } else if ((*ip < 0x21) || (*ip == 387 0x7f)) { 388 UNGET(); 389 errno = EILSEQ; 390 retval = (size_t)ERR_RETURN; 391 goto ret; 392 } 393 index = ((ic - 0x21) * 94) 394 + (*ip - 0x21); 395 if (cset == CS_1) { 396 #ifdef RFC1468_MODE /* Convert VDC and UDC to GETA(DEFC_U in jis%UTF-8.h) */ 397 if ((ic == 0x2d) || 398 (0x75 <= ic)) 399 index = 0x3013; 400 else 401 index = (int) 402 _jfp_tbl_jisx0208_to_ucs2[index]; 403 #else /* ISO-2022-JP.UIOSF */ 404 index = (int) 405 _jfp_tbl_jisx0208_to_ucs2[index]; 406 #endif /* RFC1468_MODE */ 407 } else if (cset == CS_3) { 408 #ifdef RFC1468_MODE /* Convert JIS X 0212 to GETA(DEFC_U in jis%UTF-8.h) */ 409 index = 0x3013; 410 #else /* ISO-2022-JP.UIOSF */ 411 index = 412 (int)_jfp_tbl_jisx0212_to_ucs2[index]; 413 #endif /* RFC1468_MODE */ 414 } 415 if ((ret_val = write_unicode( 416 (unsigned int)index, 417 &op, &oleft, 418 B_FALSE, "writing CS_1/3")) 419 < 0) { 420 /* errno is set 421 in write_unicode */ 422 UNGET(); 423 retval = 424 (size_t)ERR_RETURN; 425 goto ret; 426 } 427 /* dummy GET for 2nd byte */ 428 GET(ic); 429 stat = ST_INIT; 430 continue; 431 } else { 432 UNGET(); 433 errno = EINVAL; 434 retval = (size_t)ERR_RETURN; 435 goto ret; 436 } 437 } 438 } else { 439 UNGET(); 440 errno = EILSEQ; 441 retval = (size_t)ERR_RETURN; 442 goto ret; 443 } 444 } 445 retval = ileft; 446 ret: 447 *inbuf = ip; 448 *inbytesleft = ileft; 449 *outbuf = (char *)op; 450 *outbytesleft = oleft; 451 st->_st_cset = cset; 452 453 return (retval); 454 } 455