1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 1994-2003 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <errno.h> 29 #include <euc.h> 30 #include "japanese.h" 31 32 /* 33 * struct _icv_state; to keep stat 34 */ 35 struct _icv_state { 36 int _st_cset; 37 }; 38 39 void * 40 _icv_open() 41 { 42 struct _icv_state *st; 43 44 if ((st = (struct _icv_state *)malloc(sizeof (struct _icv_state))) 45 == NULL) 46 return ((void *)ERR_RETURN); 47 48 st->_st_cset = CS_0; 49 return (st); 50 } 51 52 void 53 _icv_close(struct _icv_state *st) 54 { 55 free(st); 56 } 57 58 59 60 /* 61 * Actual conversion; called from iconv() 62 */ 63 size_t 64 _icv_iconv(struct _icv_state *st, char **inbuf, size_t *inbytesleft, 65 char **outbuf, size_t *outbytesleft) 66 { 67 int cset; 68 int stat = ST_INIT; 69 unsigned char *ip, ic; 70 char *op; 71 size_t ileft, oleft; 72 size_t retval; 73 74 /* 75 * If inbuf and/or *inbuf are NULL, reset conversion descriptor 76 * and put escape sequence if needed. 77 */ 78 if ((inbuf == NULL) || (*inbuf == NULL)) { 79 if ((st->_st_cset == CS_1) || (st->_st_cset == CS_3)) { 80 if ((outbuf != NULL) && (*outbuf != NULL) 81 && (outbytesleft != NULL)) { 82 op = *outbuf; 83 oleft = *outbytesleft; 84 if (oleft < SEQ_SBTOG0) { 85 errno = E2BIG; 86 return ((size_t)-1); 87 } 88 PUT(ESC); 89 PUT(SBTOG0_1); 90 PUT(F_X0201_RM); 91 *outbuf = op; 92 *outbytesleft = oleft; 93 } 94 st->_st_cset = CS_0; 95 } else if (st->_st_cset == CS_2) { 96 if ((outbuf != NULL) && (*outbuf != NULL) 97 && (outbytesleft != NULL)) { 98 op = *outbuf; 99 oleft = *outbytesleft; 100 if (oleft < SEQ_SOSI) { 101 errno = E2BIG; 102 return ((size_t)-1); 103 } 104 PUT(SI); 105 *outbuf = op; 106 *outbytesleft = oleft; 107 } 108 st->_st_cset = CS_0; 109 } 110 return ((size_t)0); 111 } 112 113 cset = st->_st_cset; 114 115 ip = (unsigned char *)*inbuf; 116 op = *outbuf; 117 ileft = *inbytesleft; 118 oleft = *outbytesleft; 119 120 /* 121 * Main loop; basically 1 loop per 1 input byte 122 */ 123 124 while ((int)ileft > 0) { 125 GET(ic); 126 if ((stat == ST_INCS1) || (stat == ST_INCS3)) { 127 ic = sjtojis2[ic]; 128 PUT(ic); 129 stat = ST_INIT; 130 continue; 131 } else if (ISASC((int)ic)) { /* ASCII */ 132 if ((cset == CS_1) || (cset == CS_3)) { 133 CHECK2BIG(SEQ_SBTOG0,1); 134 PUT(ESC); /* to JIS X 0201 Roman */ 135 PUT(SBTOG0_1); 136 PUT(F_X0201_RM); 137 } else if (cset == CS_2) { 138 CHECK2BIG(SEQ_SOSI,1); 139 PUT(SI); /* Shift In */ 140 } 141 cset = CS_0; 142 CHECK2BIG(JISW0,1); 143 PUT(ic); 144 continue; 145 } else if (ISSJKANA(ic)) { /* Kana starts */ 146 if ((cset == CS_1) || (cset == CS_3)) { 147 CHECK2BIG(SEQ_SBTOG0,1); 148 cset = CS_0; 149 PUT(ESC); 150 PUT(SBTOG0_1); 151 PUT(F_X0201_RM); 152 } 153 if (cset != CS_2) { 154 CHECK2BIG(SEQ_SOSI,1); 155 cset = CS_2; 156 PUT(SO); 157 } 158 CHECK2BIG(JISW2,1); 159 stat = ST_INIT; 160 PUT(ic & CMASK); 161 continue; 162 } else if (ISSJKANJI1(ic)) { /* CS_1 Kanji starts */ 163 if ((int)ileft > 0) { 164 if (ISSJKANJI2(*ip)) { 165 if (cset == CS_2) { 166 cset = CS_0; 167 PUT(SI); 168 } 169 if (cset != CS_1) { 170 CHECK2BIG(SEQ_MBTOG0_O,1); 171 cset = CS_1; 172 PUT(ESC); 173 PUT(MBTOG0_1); 174 PUT(F_X0208_83_90); 175 } 176 CHECK2BIG(JISW1,1); 177 stat = ST_INCS1; 178 ic = sjtojis1[(ic - 0x80)]; 179 if (*ip >= 0x9f) { 180 ic++; 181 } 182 PUT(ic); 183 continue; 184 } else { /* 2nd byte is illegal */ 185 UNGET(); 186 errno = EILSEQ; 187 retval = (size_t)ERR_RETURN; 188 goto ret; 189 } 190 } else { /* input fragment of Kanji */ 191 UNGET(); 192 errno = EINVAL; 193 retval = (size_t)ERR_RETURN; 194 goto ret; 195 } 196 } else if (ISSJSUPKANJI1(ic)) { /* CS_3 Kanji starts */ 197 if ((int)ileft > 0) { 198 if (ISSJKANJI2(*ip)) { 199 if (cset == CS_2) { 200 cset = CS_0; 201 PUT(SI); 202 } 203 if (cset != CS_3) { 204 CHECK2BIG(SEQ_MBTOG0,1); 205 cset = CS_3; 206 PUT(ESC); 207 PUT(MBTOG0_1); 208 PUT(MBTOG0_2); 209 PUT(F_X0212_90); 210 } 211 CHECK2BIG(JISW3,1); 212 stat = ST_INCS3; 213 ic = sjtojis1[(ic - 0x80)]; 214 if (*ip >= 0x9f) { 215 ic++; 216 } 217 PUT(ic); 218 continue; 219 } else { /* 2nd byte is illegal */ 220 UNGET(); 221 errno = EILSEQ; 222 retval = (size_t)ERR_RETURN; 223 goto ret; 224 } 225 } else { /* input fragment of Kanji */ 226 UNGET(); 227 errno = EINVAL; 228 retval = (size_t)ERR_RETURN; 229 goto ret; 230 } 231 } else if (ISSJIBM(ic) || /* Extended IBM char. area */ 232 ISSJNECIBM(ic)) { /* NEC/IBM char. area */ 233 /* 234 * We need a special treatment for each codes. 235 * By adding some offset number for them, we 236 * can process them as the same way of that of 237 * extended IBM chars. 238 */ 239 if ((int)ileft > 0) { 240 if (ISSJKANJI2(*ip)) { 241 unsigned short dest; 242 dest = (ic << 8); 243 GET(ic); 244 dest += ic; 245 if (cset == CS_2) { 246 cset = CS_0; 247 PUT(SI); 248 } 249 if ((0xed40 <= dest) && 250 (dest <= 0xeffc)) { 251 REMAP_NEC(dest); 252 if (dest == 0xffff) { 253 goto ill_ibm; 254 } 255 } 256 /* 257 * XXX: 0xfa54 and 0xfa5b must be mapped 258 * to JIS0208 area. Therefore we 259 * have to do special treatment. 260 */ 261 if ((cset != CS_1) && 262 ((dest == 0xfa54) || 263 (dest == 0xfa5b))) { 264 CHECK2BIG(SEQ_MBTOG0_O,2); 265 cset = CS_1; 266 PUT(ESC); 267 PUT(MBTOG0_1); 268 PUT(F_X0208_83_90); 269 CHECK2BIG(JISW1,2); 270 if (dest == 0xfa54) { 271 PUT(0x22); 272 PUT(0x4c); 273 } else { 274 PUT(0x22); 275 PUT(0x68); 276 } 277 continue; 278 } 279 if (cset != CS_3) { 280 CHECK2BIG(SEQ_MBTOG0,2); 281 cset = CS_3; 282 PUT(ESC); 283 PUT(MBTOG0_1); 284 PUT(MBTOG0_2); 285 PUT(F_X0212_90); 286 } 287 CHECK2BIG(JISW3,2); 288 dest = dest - 0xfa40 - 289 (((dest>>8) - 0xfa) * 0x40); 290 dest = sjtoibmext[dest]; 291 if (dest == 0xffff) { 292 /* 293 * Illegal code points 294 * in IBM-EXT area. 295 */ 296 ill_ibm: 297 UNGET(); 298 UNGET(); 299 errno = EILSEQ; 300 retval = (size_t)ERR_RETURN; 301 goto ret; 302 } 303 PUT(((dest>>8) & 0x7f)); 304 PUT(dest & 0x7f); 305 continue; 306 } else { /* 2nd byte is illegal */ 307 UNGET(); 308 errno = EILSEQ; 309 retval = (size_t)ERR_RETURN; 310 goto ret; 311 } 312 } else { /* input fragment of Kanji */ 313 UNGET(); 314 errno = EINVAL; 315 retval = (size_t)ERR_RETURN; 316 goto ret; 317 } 318 } else if ((0xeb <= ic) && (ic <= 0xec)) { 319 /* 320 * Based on the draft convention of OSF-JVC CDEWG, 321 * characters in this area will be mapped to 322 * "CHIKAN-MOJI." (convertible character) 323 * So far, we'll use (0x222e) for it. 324 */ 325 if ((int)ileft > 0) { 326 if (ISSJKANJI2(*ip)) { 327 if (cset == CS_2) { 328 cset = CS_0; 329 PUT(SI); 330 } 331 if (cset != CS_1) { 332 CHECK2BIG(SEQ_MBTOG0_O,1); 333 cset = CS_1; 334 PUT(ESC); 335 PUT(MBTOG0_1); 336 PUT(F_X0208_83_90); 337 } 338 CHECK2BIG(JISW1,1); 339 GET(ic); /* Dummy */ 340 PUT((JGETA>>8) & CMASK); 341 PUT(JGETA & CMASK); 342 continue; 343 } else { /* 2nd byte is illegal */ 344 UNGET(); 345 errno = EILSEQ; 346 retval = (size_t)ERR_RETURN; 347 goto ret; 348 } 349 } else { /* input fragment of Kanji */ 350 UNGET(); 351 errno = EINVAL; 352 retval = (size_t)ERR_RETURN; 353 goto ret; 354 } 355 } else { /* 1st byte is illegal */ 356 UNGET(); 357 errno = EILSEQ; 358 retval = (size_t)ERR_RETURN; 359 goto ret; 360 } 361 } 362 retval = ileft; 363 ret: 364 *inbuf = (char *)ip; 365 *inbytesleft = ileft; 366 *outbuf = op; 367 *outbytesleft = oleft; 368 st->_st_cset = cset; 369 370 return (retval); 371 } 372