1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 1994-2003 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <errno.h> 30 #include <euc.h> 31 #include "japanese.h" 32 33 34 /* 35 * struct _cv_state; to keep status 36 */ 37 struct _icv_state { 38 int _st_cset; 39 int _st_cset_sav; 40 }; 41 42 void * 43 _icv_open() 44 { 45 struct _icv_state *st; 46 47 if ((st = (struct _icv_state *)malloc(sizeof (struct _icv_state))) 48 == NULL) 49 return ((void *)ERR_RETURN); 50 51 st->_st_cset = st->_st_cset_sav = CS_0; 52 53 return (st); 54 } 55 56 void 57 _icv_close(struct _icv_state *st) 58 { 59 free(st); 60 } 61 62 size_t 63 _icv_iconv(struct _icv_state *st, char **inbuf, size_t *inbytesleft, 64 char **outbuf, size_t *outbytesleft) 65 { 66 int cset; 67 int stat = ST_INIT; 68 unsigned char *op; 69 char *ip, ic; 70 size_t ileft, oleft; 71 size_t retval; 72 #ifdef RFC1468_MODE 73 unsigned short zenkaku; 74 #endif 75 76 /* 77 * If inbuf and/or *inbuf are NULL, reset conversion descriptor 78 * and put escape sequence if needed. 79 */ 80 if ((inbuf == NULL) || (*inbuf == NULL)) { 81 st->_st_cset_sav = st->_st_cset = CS_0; 82 return ((size_t)0); 83 } 84 85 cset = st->_st_cset; 86 87 ip = *inbuf; 88 op = (unsigned char *)*outbuf; 89 ileft = *inbytesleft; 90 oleft = *outbytesleft; 91 92 /* 93 * Main loop; basically 1 loop per 1 input byte 94 */ 95 96 while ((int)ileft > 0) { 97 GET(ic); 98 if (stat == ST_INIT) { 99 goto text; 100 } 101 102 if (stat == ST_ESC) { 103 if (ic == MBTOG0_1) { 104 if ((int)ileft > 0) { 105 stat = ST_MBTOG0_1; 106 continue; 107 } else { 108 UNGET(); 109 UNGET(); 110 errno = EINVAL; 111 retval = (size_t)ERR_RETURN; 112 goto ret; 113 } 114 } else if (ic == SBTOG0_1) { 115 if ((int)ileft > 0) { 116 stat = ST_SBTOG0; 117 continue; 118 } else { 119 UNGET(); 120 UNGET(); 121 errno = EINVAL; 122 retval = (size_t)ERR_RETURN; 123 goto ret; 124 } 125 } else if (ic == X208REV_1) { 126 if ((int)ileft > 0) { 127 stat = ST_208REV_1; 128 continue; 129 } else { 130 UNGET(); 131 UNGET(); 132 errno = EINVAL; 133 retval = (size_t)ERR_RETURN; 134 goto ret; 135 } 136 } else { 137 UNGET(); 138 UNGET(); 139 errno = EILSEQ; 140 retval = (size_t)ERR_RETURN; 141 goto ret; 142 } 143 } else if (stat == ST_MBTOG0_1) { 144 if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) { 145 stat = ST_INIT; 146 st->_st_cset_sav = cset = CS_1; 147 continue; 148 } else if (ic == MBTOG0_2) { 149 if ((int)ileft > 0) { 150 stat = ST_MBTOG0_2; 151 continue; 152 } else { 153 UNGET(); 154 UNGET(); 155 UNGET(); 156 errno = EINVAL; 157 retval = (size_t)ERR_RETURN; 158 goto ret; 159 } 160 } else if (ic == F_X0212_90) { 161 stat = ST_INIT; 162 st->_st_cset_sav = cset = CS_3; 163 continue; 164 } else { 165 UNGET(); 166 UNGET(); 167 UNGET(); 168 errno = EILSEQ; 169 retval = (size_t)ERR_RETURN; 170 goto ret; 171 } 172 } else if (stat == ST_MBTOG0_2) { 173 if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) { 174 stat = ST_INIT; 175 st->_st_cset_sav = cset = CS_1; 176 continue; 177 } else if (ic == F_X0212_90) { 178 stat = ST_INIT; 179 st->_st_cset_sav = cset = CS_3; 180 continue; 181 } else { 182 UNGET(); 183 UNGET(); 184 UNGET(); 185 UNGET(); 186 errno = EILSEQ; 187 retval = (size_t)ERR_RETURN; 188 goto ret; 189 } 190 } else if (stat == ST_SBTOG0) { 191 if ((ic == F_ASCII) || 192 (ic == F_X0201_RM) || 193 (ic == F_ISO646)) { 194 stat = ST_INIT; 195 st->_st_cset_sav = cset = CS_0; 196 continue; 197 } else if (ic == F_X0201_KN) { 198 stat = ST_INIT; 199 st->_st_cset_sav = cset = CS_2; 200 continue; 201 } else { 202 UNGET(); 203 UNGET(); 204 UNGET(); 205 errno = EILSEQ; 206 retval = (size_t)ERR_RETURN; 207 goto ret; 208 } 209 } else if (stat == ST_208REV_1) { 210 if (ic == X208REV_2) { 211 if ((int)ileft > 0) { 212 stat = ST_208REV_2; 213 continue; 214 } else { 215 UNGET(); 216 UNGET(); 217 UNGET(); 218 errno = EINVAL; 219 retval = (size_t)ERR_RETURN; 220 goto ret; 221 } 222 } else { 223 UNGET(); 224 UNGET(); 225 UNGET(); 226 errno = EILSEQ; 227 retval = (size_t)ERR_RETURN; 228 goto ret; 229 } 230 } else if (stat == ST_208REV_2) { 231 if (ic == ESC) { 232 if ((int)ileft > 0) { 233 stat = ST_REV_AFT_ESC; 234 continue; 235 } else { 236 UNGET(); 237 UNGET(); 238 UNGET(); 239 UNGET(); 240 errno = EINVAL; 241 retval = (size_t)ERR_RETURN; 242 goto ret; 243 } 244 } else { 245 UNGET(); 246 UNGET(); 247 UNGET(); 248 UNGET(); 249 errno = EILSEQ; 250 retval = (size_t)ERR_RETURN; 251 goto ret; 252 } 253 } else if (stat == ST_REV_AFT_ESC) { 254 if (ic == MBTOG0_1) { 255 if ((int)ileft > 0) { 256 stat = ST_REV_AFT_MBTOG0_1; 257 continue; 258 } else { 259 UNGET(); 260 UNGET(); 261 UNGET(); 262 UNGET(); 263 UNGET(); 264 errno = EINVAL; 265 retval = (size_t)ERR_RETURN; 266 goto ret; 267 } 268 } else { 269 UNGET(); 270 UNGET(); 271 UNGET(); 272 UNGET(); 273 UNGET(); 274 errno = EILSEQ; 275 retval = (size_t)ERR_RETURN; 276 goto ret; 277 } 278 } else if (stat == ST_REV_AFT_MBTOG0_1) { 279 if (ic == F_X0208_83_90) { 280 stat = ST_INIT; 281 st->_st_cset_sav = cset = CS_1; 282 continue; 283 } else if (ic == MBTOG0_2) { 284 if ((int)ileft > 0) { 285 stat = ST_REV_AFT_MBTOG0_2; 286 continue; 287 } else { 288 UNGET(); 289 UNGET(); 290 UNGET(); 291 UNGET(); 292 UNGET(); 293 UNGET(); 294 errno = EINVAL; 295 retval = (size_t)ERR_RETURN; 296 goto ret; 297 } 298 } else { 299 UNGET(); 300 UNGET(); 301 UNGET(); 302 UNGET(); 303 UNGET(); 304 UNGET(); 305 errno = EILSEQ; 306 retval = (size_t)ERR_RETURN; 307 goto ret; 308 } 309 } else if (stat == ST_REV_AFT_MBTOG0_2) { 310 if (ic == F_X0208_83_90) { 311 stat = ST_INIT; 312 st->_st_cset_sav = cset = CS_1; 313 continue; 314 } else { 315 UNGET(); 316 UNGET(); 317 UNGET(); 318 UNGET(); 319 UNGET(); 320 UNGET(); 321 UNGET(); 322 errno = EILSEQ; 323 retval = (size_t)ERR_RETURN; 324 goto ret; 325 } 326 } 327 text: 328 /* 329 * Break through chars or ESC sequence 330 */ 331 if (ic == ESC) { 332 if ((int)ileft > 0) { 333 stat = ST_ESC; 334 continue; 335 } else { 336 UNGET(); 337 errno = EINVAL; 338 retval = (size_t)ERR_RETURN; 339 goto ret; 340 } 341 /* 342 * XXX- Because V3 mailtool uses SI/SO to switch 343 * G0 and G1 sets while it puts "iso2022-7" 344 * as its "X-Sun-Charset" tag. Though it 345 * breaks ISO-2022-JP definition based on 346 * UI-OSF, dtmail have handle them correctly. 347 * Therefore, we have to following a few codes, UGH. 348 */ 349 } else if (ic == SO) { 350 cset = CS_2; 351 stat = ST_INIT; 352 continue; 353 } else if (ic == SI) { 354 cset = st->_st_cset_sav; 355 stat = ST_INIT; 356 continue; 357 } else if (!(ic & CMSB)) { 358 if (cset == CS_0) { 359 CHECK2BIG(EUCW0, 1); 360 PUT(ic); 361 continue; 362 } else if (cset == CS_1) { 363 if ((int)ileft > 0) { 364 CHECK2BIG(EUCW1, 1); 365 if ((ic < 0x21) || (ic == 0x7f)) { 366 UNGET(); 367 errno = EILSEQ; 368 retval = (size_t)ERR_RETURN; 369 goto ret; 370 } else if ((*ip < 0x21) || (*ip == 371 0x7f)) { 372 UNGET(); 373 errno = EILSEQ; 374 retval = (size_t)ERR_RETURN; 375 goto ret; 376 } 377 #ifdef RFC1468_MODE /* Convert VDC and UDC to GETA */ 378 if ((ic == 0x2d) || (0x75 <= ic )){ 379 PUT((EGETA >> 8) & 0xff); 380 GET(ic); /* Get dummy */ 381 PUT(EGETA & 0xff); 382 continue; 383 } 384 #endif /* RFC1468_MODE */ 385 PUT(ic | CMSB); 386 GET(ic); 387 PUT(ic | CMSB); 388 stat = ST_INIT; 389 continue; 390 } else { 391 UNGET(); 392 errno = EINVAL; 393 retval = (size_t)ERR_RETURN; 394 goto ret; 395 } 396 } else if (cset == CS_2) { 397 if (!ISSJKANA((ic | CMSB))) { 398 UNGET(); 399 errno = EILSEQ; 400 retval = (size_t)ERR_RETURN; 401 goto ret; 402 } 403 #ifdef RFC1468_MODE /* Convert JIS X 0201 Kana to JIS X 0208 Kana */ 404 CHECK2BIG(EUCW1, 1); 405 zenkaku = halfkana2zenkakue[(ic - 0x21)]; 406 ic = (unsigned char)((zenkaku >> 8) & 0xFF); 407 PUT(ic); 408 ic = (unsigned char)(zenkaku & 0xFF); 409 PUT(ic); 410 #else /* ISO-2022-JP.UIOSF */ 411 CHECK2BIG(EUCW2 + SEQ_SS, 1); 412 PUT(SS2); 413 PUT(ic | CMSB); 414 #endif /* RFC1468_MODE */ 415 continue; 416 } else if (cset == CS_3) { 417 if ((int)ileft > 0) { 418 if ((ic < 0x21) || (ic == 0x7f)) { 419 UNGET(); 420 errno = EILSEQ; 421 retval = (size_t)ERR_RETURN; 422 goto ret; 423 } else if ((*ip < 0x21) || (*ip == 424 0x7f)) { 425 UNGET(); 426 errno = EILSEQ; 427 retval = (size_t)ERR_RETURN; 428 goto ret; 429 } 430 #ifdef RFC1468_MODE /* Convert JIS X 0212 to GETA */ 431 CHECK2BIG(EUCW1, 1); 432 PUT((EGETA >> 8) | CMSB); 433 GET(ic); /* Get dummy */ 434 PUT((EGETA & CMASK) | CMSB); 435 #else /* ISO-2022-JP.UIOSF */ 436 CHECK2BIG(EUCW3 + SEQ_SS, 1); 437 PUT(SS3); 438 PUT(ic | CMSB); 439 GET(ic); 440 PUT(ic | CMSB); 441 #endif /* RFC1468_MODE */ 442 stat = ST_INIT; 443 continue; 444 } else { 445 UNGET(); 446 errno = EINVAL; 447 retval = (size_t)ERR_RETURN; 448 goto ret; 449 } 450 } 451 } else { 452 UNGET(); 453 errno = EILSEQ; 454 retval = (size_t)ERR_RETURN; 455 goto ret; 456 } 457 } 458 retval = ileft; 459 ret: 460 *inbuf = ip; 461 *inbytesleft = ileft; 462 *outbuf = (char *)op; 463 *outbytesleft = oleft; 464 st->_st_cset = cset; 465 466 return (retval); 467 } 468