1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 1994-2003 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <errno.h> 29 #include <euc.h> 30 #include "japanese.h" 31 32 /* 33 * struct _icv_state; to keep stat 34 */ 35 struct _icv_state { 36 int _st_cset; 37 }; 38 39 void * 40 _icv_open() 41 { 42 struct _icv_state *st; 43 44 if ((st = (struct _icv_state *)malloc(sizeof (struct _icv_state))) 45 == NULL) 46 return ((void *)ERR_RETURN); 47 48 st->_st_cset = CS_0; 49 return (st); 50 } 51 52 void 53 _icv_close(struct _icv_state *st) 54 { 55 free(st); 56 } 57 58 size_t 59 _icv_iconv(struct _icv_state *st, char **inbuf, size_t *inbytesleft, 60 char **outbuf, size_t *outbytesleft) 61 { 62 int cset; 63 unsigned char *ip, ic; 64 char *op; 65 size_t ileft, oleft; 66 size_t retval; 67 #ifdef RFC1468_MODE 68 unsigned short zenkaku; 69 #endif 70 71 /* 72 * If inbuf and/or *inbuf are NULL, reset conversion descriptor 73 * and put escape sequence if needed. 74 */ 75 if ((inbuf == NULL) || (*inbuf == NULL)) { 76 if (st->_st_cset != CS_0) { 77 if ((outbuf != NULL) && (*outbuf != NULL) 78 && (outbytesleft != NULL)) { 79 op = *outbuf; 80 oleft = *outbytesleft; 81 if (oleft < SEQ_SBTOG0) { 82 errno = E2BIG; 83 return ((size_t)-1); 84 } 85 PUT(ESC); 86 PUT(SBTOG0_1); 87 PUT(F_X0201_RM); 88 *outbuf = op; 89 *outbytesleft = oleft; 90 } 91 st->_st_cset = CS_0; 92 } 93 return ((size_t)0); 94 } 95 96 cset = st->_st_cset; 97 98 ip = (unsigned char *)*inbuf; 99 op = *outbuf; 100 ileft = *inbytesleft; 101 oleft = *outbytesleft; 102 103 /* 104 * Main loop; basically 1 loop per 1 input byte 105 */ 106 107 while ((int)ileft > 0) { 108 GET(ic); 109 if (ISASC((int)ic)) { /* ASCII */ 110 if (cset != CS_0) { 111 CHECK2BIG(SEQ_SBTOG0,1); 112 PUT(ESC); /* to JIS X 0201 Roman */ 113 PUT(SBTOG0_1); 114 PUT(F_X0201_RM); 115 } 116 cset = CS_0; 117 CHECK2BIG(JISW0,1); 118 PUT(ic); 119 continue; 120 } else if (ISSJKANA(ic)) { /* Kana starts */ 121 #ifdef RFC1468_MODE /* Substitute JIS X 0208 for JIS X 0201 katakana */ 122 if (cset != CS_1) { 123 CHECK2BIG(SEQ_MBTOG0_O,1); 124 cset = CS_1; 125 PUT(ESC); 126 PUT(MBTOG0_1); 127 PUT(F_X0208_83_90); 128 } 129 CHECK2BIG(JISW1,1); 130 zenkaku = halfkana2zenkakuj[ic - 0xA1]; 131 ic = (unsigned char)((zenkaku >> 8) & CMASK); 132 PUT(ic); 133 ic = (unsigned char)(zenkaku & CMASK); 134 PUT(ic); 135 #else /* ISO-2022-JP.UIOSF */ 136 if (cset != CS_2) { 137 CHECK2BIG(SEQ_SBTOG0,1); 138 cset = CS_2; 139 PUT(ESC); 140 PUT(SBTOG0_1); 141 PUT(F_X0201_KN); 142 } 143 CHECK2BIG(JISW2,1); 144 PUT(ic & CMASK); 145 #endif /* RFC1468_MODE */ 146 continue; 147 } else if (ISSJKANJI1(ic)) { /* CS_1 Kanji starts */ 148 if ((int)ileft > 0) { 149 if (ISSJKANJI2(*ip)) { 150 if (cset != CS_1) { 151 CHECK2BIG(SEQ_MBTOG0_O,1); 152 cset = CS_1; 153 PUT(ESC); 154 PUT(MBTOG0_1); 155 PUT(F_X0208_83_90); 156 } 157 CHECK2BIG(JISW1,1); 158 #ifdef RFC1468_MODE /* Convert VDC and UDC to GETA */ 159 if ((ic == 0x87) || (0xed <= ic )){ 160 PUT((JGETA >> 8) & CMASK); 161 GET(ic); /* Get dummy */ 162 PUT(JGETA & CMASK); 163 continue; 164 } 165 #endif /* RFC1468_MODE */ 166 ic = sjtojis1[(ic - 0x80)]; 167 if (*ip >= 0x9f) { 168 ic++; 169 } 170 PUT(ic); 171 GET(ic); 172 ic = sjtojis2[ic]; 173 PUT(ic); 174 continue; 175 } else { /* 2nd byte is illegal */ 176 UNGET(); 177 errno = EILSEQ; 178 retval = (size_t)ERR_RETURN; 179 goto ret; 180 } 181 } else { /* input fragment of Kanji */ 182 UNGET(); 183 errno = EINVAL; 184 retval = (size_t)ERR_RETURN; 185 goto ret; 186 } 187 } else if (ISSJSUPKANJI1(ic)) { /* CS_3 Kanji starts */ 188 if ((int)ileft > 0) { 189 if (ISSJKANJI2(*ip)) { 190 #ifdef RFC1468_MODE /* Substitute JIS X 0208 "Geta" for JIS X 0212 */ 191 if (cset != CS_1) { 192 CHECK2BIG(SEQ_MBTOG0_O,1); 193 cset = CS_1; 194 PUT(ESC); 195 PUT(MBTOG0_1); 196 PUT(F_X0208_83_90); 197 } 198 CHECK2BIG(JISW1,1); 199 /* Put GETA (0x222e) */ 200 ic = (unsigned char)((JGETA >> 8) & 201 CMASK); 202 PUT(ic); 203 ic = (unsigned char)(JGETA & CMASK); 204 PUT(ic); 205 GET(ic); /* dummy GET */ 206 #else /* ISO-2022-JP.UIOSF */ 207 if (cset != CS_3) { 208 CHECK2BIG(SEQ_MBTOG0,1); 209 cset = CS_3; 210 PUT(ESC); 211 PUT(MBTOG0_1); 212 PUT(MBTOG0_2); 213 PUT(F_X0212_90); 214 } 215 CHECK2BIG(JISW3,1); 216 ic = sjtojis1[(ic - 0x80)]; 217 if (*ip >= 0x9f) { 218 ic++; 219 } 220 PUT(ic); 221 GET(ic); 222 ic = sjtojis2[ic]; 223 PUT(ic); 224 #endif /* RFC1468_MODE */ 225 continue; 226 } else { /* 2nd byte is illegal */ 227 UNGET(); 228 errno = EILSEQ; 229 retval = (size_t)ERR_RETURN; 230 goto ret; 231 } 232 } else { /* input fragment of Kanji */ 233 UNGET(); 234 errno = EINVAL; 235 retval = (size_t)ERR_RETURN; 236 goto ret; 237 } 238 } else if (ISSJIBM(ic) || /* Extended IBM char. area */ 239 ISSJNECIBM(ic)) { /* NEC/IBM char. area */ 240 /* 241 * We need a special treatment for each codes. 242 * By adding some offset number for them, we 243 * can process them as the same way of that of 244 * extended IBM chars. 245 */ 246 if ((int)ileft > 0) { 247 if (ISSJKANJI2(*ip)) { 248 unsigned short dest; 249 dest = (ic << 8); 250 GET(ic); 251 dest += ic; 252 if ((0xed40 <= dest) && 253 (dest <= 0xeffc)) { 254 REMAP_NEC(dest); 255 if (dest == 0xffff) { 256 goto ill_ibm; 257 } 258 } 259 /* 260 * XXX: 0xfa54 and 0xfa5b must be mapped 261 * to JIS0208 area. Therefore we 262 * have to do special treatment. 263 */ 264 if ((cset != CS_1) && 265 ((dest == 0xfa54) || 266 (dest == 0xfa5b))) { 267 CHECK2BIG(SEQ_MBTOG0_O,2); 268 cset = CS_1; 269 PUT(ESC); 270 PUT(MBTOG0_1); 271 PUT(F_X0208_83_90); 272 CHECK2BIG(JISW1,2); 273 if (dest == 0xfa54) { 274 PUT(0x22); 275 PUT(0x4c); 276 } else { 277 PUT(0x22); 278 PUT(0x68); 279 } 280 continue; 281 } 282 if (cset != CS_3) { 283 CHECK2BIG(SEQ_MBTOG0,2); 284 cset = CS_3; 285 PUT(ESC); 286 PUT(MBTOG0_1); 287 PUT(MBTOG0_2); 288 PUT(F_X0212_90); 289 } 290 CHECK2BIG(JISW3,2); 291 dest = dest - 0xfa40 - 292 (((dest>>8) - 0xfa) * 0x40); 293 dest = sjtoibmext[dest]; 294 if (dest == 0xffff) { 295 /* 296 * Illegal code points 297 * in IBM-EXT area. 298 */ 299 ill_ibm: 300 UNGET(); 301 UNGET(); 302 errno = EILSEQ; 303 retval = (size_t)ERR_RETURN; 304 goto ret; 305 } 306 PUT(((dest>>8) & 0x7f)); 307 PUT(dest & 0x7f); 308 continue; 309 } else { /* 2nd byte is illegal */ 310 UNGET(); 311 errno = EILSEQ; 312 retval = (size_t)ERR_RETURN; 313 goto ret; 314 } 315 } else { /* input fragment of Kanji */ 316 UNGET(); 317 errno = EINVAL; 318 retval = (size_t)ERR_RETURN; 319 goto ret; 320 } 321 } else if ((0xeb <= ic) && (ic <= 0xec)) { 322 /* 323 * Based on the draft convention of OSF-JVC CDEWG, 324 * characters in this area will be mapped to 325 * "CHIKAN-MOJI." (convertible character) 326 * So far, we'll use (0x222e) for it. 327 */ 328 if ((int)ileft > 0) { 329 if (ISSJKANJI2(*ip)) { 330 if (cset != CS_1) { 331 CHECK2BIG(SEQ_MBTOG0_O,1); 332 cset = CS_1; 333 PUT(ESC); 334 PUT(MBTOG0_1); 335 PUT(F_X0208_83_90); 336 } 337 CHECK2BIG(JISW1,1); 338 GET(ic); /* Dummy */ 339 PUT((JGETA>>8) & CMASK); 340 PUT(JGETA & CMASK); 341 continue; 342 } else { /* 2nd byte is illegal */ 343 UNGET(); 344 errno = EILSEQ; 345 retval = (size_t)ERR_RETURN; 346 goto ret; 347 } 348 } else { /* input fragment of Kanji */ 349 UNGET(); 350 errno = EINVAL; 351 retval = (size_t)ERR_RETURN; 352 goto ret; 353 } 354 } else { /* 1st byte is illegal */ 355 UNGET(); 356 errno = EILSEQ; 357 retval = (size_t)ERR_RETURN; 358 goto ret; 359 } 360 } 361 retval = ileft; 362 ret: 363 *inbuf = (char *)ip; 364 *inbytesleft = ileft; 365 *outbuf = op; 366 *outbytesleft = oleft; 367 st->_st_cset = cset; 368 369 return (retval); 370 } 371