1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * COPYRIGHT AND PERMISSION NOTICE
23 *
24 * Copyright (c) 1991-2005 Unicode, Inc. All rights reserved. Distributed
25 * under the Terms of Use in http://www.unicode.org/copyright.html.
26 *
27 * This file has been modified by Sun Microsystems, Inc.
28 */
29 /*
30 * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
31 * Use is subject to license terms.
32 */
33
34
35 #include <stdlib.h>
36 #include <errno.h>
37 #include <euc.h>
38 #include "japanese.h"
39 #include "jfp_iconv_unicode.h"
40
41 #ifdef JAVA_CONV_COMPAT
42 #define JFP_J2U_ICONV_JAVA
43 #elif JFP_ICONV_MS932
44 #define JFP_J2U_ICONV_MS932
45 #else
46 #define JFP_J2U_ICONV
47 #endif
48 #include "jfp_jis_to_ucs2.h"
49
50 void *
_icv_open(void)51 _icv_open(void)
52 {
53 return (_icv_open_unicode((size_t)0));
54 }
55
56 void
_icv_close(void * cd)57 _icv_close(void *cd)
58 {
59 _icv_close_unicode(cd);
60 return;
61 }
62
63 size_t
_icv_iconv(void * cd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)64 _icv_iconv(void *cd, char **inbuf, size_t *inbytesleft,
65 char **outbuf, size_t *outbytesleft)
66 {
67 unsigned int uni; /* UTF-32 */
68 unsigned int index; /* index for table lookup */
69 unsigned char ic1, ic2; /* 1st and 2nd bytes of a char */
70 size_t rv = (size_t)0; /* return value of this function */
71
72 unsigned char *ip;
73 size_t ileft;
74 char *op;
75 size_t oleft;
76
77 /*
78 * If inbuf and/or *inbuf are NULL, reset conversion descriptor
79 * and put escape sequence if needed.
80 */
81 if ((inbuf == NULL) || (*inbuf == NULL)) {
82 _icv_reset_unicode(cd);
83 return ((size_t)0);
84 }
85
86 ip = (unsigned char *)*inbuf;
87 ileft = *inbytesleft;
88 op = *outbuf;
89 oleft = *outbytesleft;
90
91 while (ileft != 0) {
92 NGET(ic1, "never fail here"); /* get 1st byte */
93
94 if (ISASC((int)ic1)) { /* ASCII; 1 byte */
95 uni = _jfp_tbl_jisx0201roman_to_ucs2[ic1];
96 PUTU(uni, "ASCII");
97 } else if (ISSJKANA(ic1)) { /* JIS X 0201 Kana; 1 byte */
98 uni = _jfp_tbl_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
99 PUTU(uni, "KANA");
100 } else if (ISSJKANJI1(ic1)) { /* JIS X 0208 or UDC; 2 bytes */
101 NGET(ic2, "CS1-2 not available");
102 if (ISSJKANJI2(ic2)) {
103 ic1 = sjtojis1[(ic1 - 0x80)];
104 if (ic2 >= 0x9f) {
105 ic1++;
106 }
107 index = ((ic1 - 0x21) * 94)
108 + (sjtojis2[ic2] - 0x21);
109 uni = _jfp_tbl_jisx0208_to_ucs2[index];
110 PUTU(uni, "KANJI");
111 } else { /* 2nd byte check failed */
112 RETERROR(EILSEQ, "EILSEQ at CS1-2")
113 /* NOTREACHED */
114 }
115 } else if (ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
116 NGET(ic2, "SUP-2 not available");
117 if (ISSJKANJI2(ic2)) {
118 ic1 = sjtojis1[(ic1 - 0x80)];
119 if (ic2 >= 0x9f) {
120 ic1++;
121 }
122 index = ((ic1 - 0x21) * 94)
123 + (sjtojis2[ic2] - 0x21);
124 uni = _jfp_tbl_jisx0212_to_ucs2[index];
125 PUTU(uni, "SUPKANJI");
126 } else { /* 2nd byte check failed */
127 RETERROR(EILSEQ, "EILSEQ at CS1-2")
128 }
129 } else if (ISSJIBM(ic1) || /* Extended IBM char. area */
130 ISSJNECIBM(ic1)) { /* NEC/IBM char. area */
131 /*
132 * We need a special treatment for each codes.
133 * By adding some offset number for them, we
134 * can process them as the same way of that of
135 * extended IBM chars.
136 */
137 NGET(ic2, "IBM-2 not available");
138 if (ISSJKANJI2(ic2)) {
139 unsigned short dest, upper, lower;
140 dest = (ic1 << 8) + ic2;
141 if ((0xed40 <= dest) && (dest <= 0xeffc)) {
142 REMAP_NEC(dest);
143 if (dest == 0xffff) {
144 RETERROR(EILSEQ, "invalid NEC")
145 }
146 }
147 /*
148 * XXX: 0xfa54 and 0xfa5b must be mapped
149 * to JIS0208 area. Therefore we
150 * have to do special treatment.
151 */
152 if ((dest == 0xfa54) || (dest == 0xfa5b)) {
153 if (dest == 0xfa54) {
154 /* map to JIS X 0208 row 2 cell 44 "NOT SIGN" */
155 index = (2 - 1) * 94 + (44 - 1);
156 } else {
157 /* map to JIS X 0208 row 2 cell 72 "BECAUSE" */
158 index = (2 - 1) * 94 + (72 - 1);
159 }
160 uni = _jfp_tbl_jisx0208_to_ucs2[index];
161 PUTU(uni, "IBM");
162 } else {
163 dest = dest - 0xfa40 -
164 (((dest>>8) - 0xfa) * 0x40);
165 dest = sjtoibmext[dest];
166 if (dest == 0xffff) {
167 RETERROR(EILSEQ, "invalid IBM")
168 }
169 upper = ((dest >> 8) & 0x7f) - 0x21;
170 lower = (dest & 0x7f) - 0x21;
171 index = (unsigned int)(upper * 94 +
172 lower);
173 uni = _jfp_tbl_jisx0212_to_ucs2[index];
174 PUTU(uni, "IBM");
175 }
176 } else { /* 2nd byte check failed */
177 RETERROR(EILSEQ, "EILSEQ at IBM-2")
178 }
179 } else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
180 /*
181 * Based on the draft convention of OSF-JVC CDEWG,
182 * characters in this area will be mapped to
183 * "CHIKAN-MOJI." (convertible character)
184 * We use U+FFFD in this case.
185 */
186 NGET(ic2, "GAP-2 not available");
187 if (ISSJKANJI2(ic2)) {
188 uni = 0xfffd;
189 PUTU(uni, "GAP");
190 } else { /* 2nd byte check failed */
191 RETERROR(EILSEQ, "EILSEQ at GAP-2")
192 }
193 } else { /* 1st byte check failed */
194 RETERROR(EILSEQ, "EILSEQ at 1st")
195 }
196
197 /*
198 * One character successfully converted so update
199 * values outside of this function's stack.
200 */
201 *inbuf = (char *)ip;
202 *inbytesleft = ileft;
203 *outbuf = op;
204 *outbytesleft = oleft;
205 }
206
207 ret:
208 DEBUGPRINTERROR
209
210 /*
211 * Return value for successful return is not defined by XPG
212 * so return same as *inbytesleft as existing codes do.
213 */
214 return ((rv == (size_t)-1) ? rv : *inbytesleft);
215 }
216