1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * COPYRIGHT AND PERMISSION NOTICE 23 * 24 * Copyright (c) 1991-2005 Unicode, Inc. All rights reserved. Distributed 25 * under the Terms of Use in http://www.unicode.org/copyright.html. 26 * 27 * This file has been modified by Sun Microsystems, Inc. 28 */ 29 /* 30 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 31 * Use is subject to license terms. 32 */ 33 34 35 #include <stdlib.h> 36 #include <errno.h> 37 #include <euc.h> 38 #include "japanese.h" 39 #include "jfp_iconv_unicode.h" 40 41 #ifdef JAVA_CONV_COMPAT 42 #define JFP_J2U_ICONV_JAVA 43 #elif JFP_ICONV_MS932 44 #define JFP_J2U_ICONV_MS932 45 #else 46 #define JFP_J2U_ICONV 47 #endif 48 #include "jfp_jis_to_ucs2.h" 49 50 void * 51 _icv_open(void) 52 { 53 return (_icv_open_unicode((size_t)0)); 54 } 55 56 void 57 _icv_close(void *cd) 58 { 59 _icv_close_unicode(cd); 60 return; 61 } 62 63 size_t 64 _icv_iconv(void *cd, char **inbuf, size_t *inbytesleft, 65 char **outbuf, size_t *outbytesleft) 66 { 67 unsigned int uni; /* UTF-32 */ 68 unsigned int index; /* index for table lookup */ 69 unsigned char ic1, ic2; /* 1st and 2nd bytes of a char */ 70 size_t rv = (size_t)0; /* return value of this function */ 71 72 unsigned char *ip; 73 size_t ileft; 74 char *op; 75 size_t oleft; 76 77 /* 78 * If inbuf and/or *inbuf are NULL, reset conversion descriptor 79 * and put escape sequence if needed. 80 */ 81 if ((inbuf == NULL) || (*inbuf == NULL)) { 82 _icv_reset_unicode(cd); 83 return ((size_t)0); 84 } 85 86 ip = (unsigned char *)*inbuf; 87 ileft = *inbytesleft; 88 op = *outbuf; 89 oleft = *outbytesleft; 90 91 while (ileft != 0) { 92 NGET(ic1, "never fail here"); /* get 1st byte */ 93 94 if (ISASC((int)ic1)) { /* ASCII; 1 byte */ 95 uni = _jfp_tbl_jisx0201roman_to_ucs2[ic1]; 96 PUTU(uni, "ASCII"); 97 } else if (ISSJKANA(ic1)) { /* JIS X 0201 Kana; 1 byte */ 98 uni = _jfp_tbl_jisx0201kana_to_ucs2[(ic1 - 0xa1)]; 99 PUTU(uni, "KANA"); 100 } else if (ISSJKANJI1(ic1)) { /* JIS X 0208 or UDC; 2 bytes */ 101 NGET(ic2, "CS1-2 not available"); 102 if (ISSJKANJI2(ic2)) { 103 ic1 = sjtojis1[(ic1 - 0x80)]; 104 if (ic2 >= 0x9f) { 105 ic1++; 106 } 107 index = ((ic1 - 0x21) * 94) 108 + (sjtojis2[ic2] - 0x21); 109 uni = _jfp_tbl_jisx0208_to_ucs2[index]; 110 PUTU(uni, "KANJI"); 111 } else { /* 2nd byte check failed */ 112 RETERROR(EILSEQ, "EILSEQ at CS1-2") 113 /* NOTREACHED */ 114 } 115 } else if (ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */ 116 NGET(ic2, "SUP-2 not available"); 117 if (ISSJKANJI2(ic2)) { 118 ic1 = sjtojis1[(ic1 - 0x80)]; 119 if (ic2 >= 0x9f) { 120 ic1++; 121 } 122 index = ((ic1 - 0x21) * 94) 123 + (sjtojis2[ic2] - 0x21); 124 uni = _jfp_tbl_jisx0212_to_ucs2[index]; 125 PUTU(uni, "SUPKANJI"); 126 } else { /* 2nd byte check failed */ 127 RETERROR(EILSEQ, "EILSEQ at CS1-2") 128 } 129 } else if (ISSJIBM(ic1) || /* Extended IBM char. area */ 130 ISSJNECIBM(ic1)) { /* NEC/IBM char. area */ 131 /* 132 * We need a special treatment for each codes. 133 * By adding some offset number for them, we 134 * can process them as the same way of that of 135 * extended IBM chars. 136 */ 137 NGET(ic2, "IBM-2 not available"); 138 if (ISSJKANJI2(ic2)) { 139 unsigned short dest, upper, lower; 140 dest = (ic1 << 8) + ic2; 141 if ((0xed40 <= dest) && (dest <= 0xeffc)) { 142 REMAP_NEC(dest); 143 if (dest == 0xffff) { 144 RETERROR(EILSEQ, "invalid NEC") 145 } 146 } 147 /* 148 * XXX: 0xfa54 and 0xfa5b must be mapped 149 * to JIS0208 area. Therefore we 150 * have to do special treatment. 151 */ 152 if ((dest == 0xfa54) || (dest == 0xfa5b)) { 153 if (dest == 0xfa54) { 154 /* map to JIS X 0208 row 2 cell 44 "NOT SIGN" */ 155 index = (2 - 1) * 94 + (44 - 1); 156 } else { 157 /* map to JIS X 0208 row 2 cell 72 "BECAUSE" */ 158 index = (2 - 1) * 94 + (72 - 1); 159 } 160 uni = _jfp_tbl_jisx0208_to_ucs2[index]; 161 PUTU(uni, "IBM"); 162 } else { 163 dest = dest - 0xfa40 - 164 (((dest>>8) - 0xfa) * 0x40); 165 dest = sjtoibmext[dest]; 166 if (dest == 0xffff) { 167 RETERROR(EILSEQ, "invalid IBM") 168 } 169 upper = ((dest >> 8) & 0x7f) - 0x21; 170 lower = (dest & 0x7f) - 0x21; 171 index = (unsigned int)(upper * 94 + 172 lower); 173 uni = _jfp_tbl_jisx0212_to_ucs2[index]; 174 PUTU(uni, "IBM"); 175 } 176 } else { /* 2nd byte check failed */ 177 RETERROR(EILSEQ, "EILSEQ at IBM-2") 178 } 179 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) { 180 /* 181 * Based on the draft convention of OSF-JVC CDEWG, 182 * characters in this area will be mapped to 183 * "CHIKAN-MOJI." (convertible character) 184 * We use U+FFFD in this case. 185 */ 186 NGET(ic2, "GAP-2 not available"); 187 if (ISSJKANJI2(ic2)) { 188 uni = 0xfffd; 189 PUTU(uni, "GAP"); 190 } else { /* 2nd byte check failed */ 191 RETERROR(EILSEQ, "EILSEQ at GAP-2") 192 } 193 } else { /* 1st byte check failed */ 194 RETERROR(EILSEQ, "EILSEQ at 1st") 195 } 196 197 /* 198 * One character successfully converted so update 199 * values outside of this function's stack. 200 */ 201 *inbuf = (char *)ip; 202 *inbytesleft = ileft; 203 *outbuf = op; 204 *outbytesleft = oleft; 205 } 206 207 ret: 208 DEBUGPRINTERROR 209 210 /* 211 * Return value for successful return is not defined by XPG 212 * so return same as *inbytesleft as existing codes do. 213 */ 214 return ((rv == (size_t)-1) ? rv : *inbytesleft); 215 } 216