1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 1994-2003 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <euc.h>
30 #include "japanese.h"
31
32 /*
33 * struct _icv_state; to keep stat
34 */
35 struct _icv_state {
36 int _st_cset;
37 };
38
39 void *
_icv_open()40 _icv_open()
41 {
42 struct _icv_state *st;
43
44 if ((st = (struct _icv_state *)malloc(sizeof (struct _icv_state)))
45 == NULL)
46 return ((void *)ERR_RETURN);
47
48 st->_st_cset = CS_0;
49 return (st);
50 }
51
52 void
_icv_close(struct _icv_state * st)53 _icv_close(struct _icv_state *st)
54 {
55 free(st);
56 }
57
58 size_t
_icv_iconv(struct _icv_state * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)59 _icv_iconv(struct _icv_state *st, char **inbuf, size_t *inbytesleft,
60 char **outbuf, size_t *outbytesleft)
61 {
62 int cset;
63 unsigned char *ip, ic;
64 char *op;
65 size_t ileft, oleft;
66 size_t retval;
67 #ifdef RFC1468_MODE
68 unsigned short zenkaku;
69 #endif
70
71 /*
72 * If inbuf and/or *inbuf are NULL, reset conversion descriptor
73 * and put escape sequence if needed.
74 */
75 if ((inbuf == NULL) || (*inbuf == NULL)) {
76 if (st->_st_cset != CS_0) {
77 if ((outbuf != NULL) && (*outbuf != NULL)
78 && (outbytesleft != NULL)) {
79 op = *outbuf;
80 oleft = *outbytesleft;
81 if (oleft < SEQ_SBTOG0) {
82 errno = E2BIG;
83 return ((size_t)-1);
84 }
85 PUT(ESC);
86 PUT(SBTOG0_1);
87 PUT(F_X0201_RM);
88 *outbuf = op;
89 *outbytesleft = oleft;
90 }
91 st->_st_cset = CS_0;
92 }
93 return ((size_t)0);
94 }
95
96 cset = st->_st_cset;
97
98 ip = (unsigned char *)*inbuf;
99 op = *outbuf;
100 ileft = *inbytesleft;
101 oleft = *outbytesleft;
102
103 /*
104 * Main loop; basically 1 loop per 1 input byte
105 */
106
107 while ((int)ileft > 0) {
108 GET(ic);
109 if (ISASC((int)ic)) { /* ASCII */
110 if (cset != CS_0) {
111 CHECK2BIG(SEQ_SBTOG0,1);
112 PUT(ESC); /* to JIS X 0201 Roman */
113 PUT(SBTOG0_1);
114 PUT(F_X0201_RM);
115 }
116 cset = CS_0;
117 CHECK2BIG(JISW0,1);
118 PUT(ic);
119 continue;
120 } else if (ISSJKANA(ic)) { /* Kana starts */
121 #ifdef RFC1468_MODE /* Substitute JIS X 0208 for JIS X 0201 katakana */
122 if (cset != CS_1) {
123 CHECK2BIG(SEQ_MBTOG0_O,1);
124 cset = CS_1;
125 PUT(ESC);
126 PUT(MBTOG0_1);
127 PUT(F_X0208_83_90);
128 }
129 CHECK2BIG(JISW1,1);
130 zenkaku = halfkana2zenkakuj[ic - 0xA1];
131 ic = (unsigned char)((zenkaku >> 8) & CMASK);
132 PUT(ic);
133 ic = (unsigned char)(zenkaku & CMASK);
134 PUT(ic);
135 #else /* ISO-2022-JP.UIOSF */
136 if (cset != CS_2) {
137 CHECK2BIG(SEQ_SBTOG0,1);
138 cset = CS_2;
139 PUT(ESC);
140 PUT(SBTOG0_1);
141 PUT(F_X0201_KN);
142 }
143 CHECK2BIG(JISW2,1);
144 PUT(ic & CMASK);
145 #endif /* RFC1468_MODE */
146 continue;
147 } else if (ISSJKANJI1(ic)) { /* CS_1 Kanji starts */
148 if ((int)ileft > 0) {
149 if (ISSJKANJI2(*ip)) {
150 if (cset != CS_1) {
151 CHECK2BIG(SEQ_MBTOG0_O,1);
152 cset = CS_1;
153 PUT(ESC);
154 PUT(MBTOG0_1);
155 PUT(F_X0208_83_90);
156 }
157 CHECK2BIG(JISW1,1);
158 #ifdef RFC1468_MODE /* Convert VDC and UDC to GETA */
159 if ((ic == 0x87) || (0xed <= ic )){
160 PUT((JGETA >> 8) & CMASK);
161 GET(ic); /* Get dummy */
162 PUT(JGETA & CMASK);
163 continue;
164 }
165 #endif /* RFC1468_MODE */
166 ic = sjtojis1[(ic - 0x80)];
167 if (*ip >= 0x9f) {
168 ic++;
169 }
170 PUT(ic);
171 GET(ic);
172 ic = sjtojis2[ic];
173 PUT(ic);
174 continue;
175 } else { /* 2nd byte is illegal */
176 UNGET();
177 errno = EILSEQ;
178 retval = (size_t)ERR_RETURN;
179 goto ret;
180 }
181 } else { /* input fragment of Kanji */
182 UNGET();
183 errno = EINVAL;
184 retval = (size_t)ERR_RETURN;
185 goto ret;
186 }
187 } else if (ISSJSUPKANJI1(ic)) { /* CS_3 Kanji starts */
188 if ((int)ileft > 0) {
189 if (ISSJKANJI2(*ip)) {
190 #ifdef RFC1468_MODE /* Substitute JIS X 0208 "Geta" for JIS X 0212 */
191 if (cset != CS_1) {
192 CHECK2BIG(SEQ_MBTOG0_O,1);
193 cset = CS_1;
194 PUT(ESC);
195 PUT(MBTOG0_1);
196 PUT(F_X0208_83_90);
197 }
198 CHECK2BIG(JISW1,1);
199 /* Put GETA (0x222e) */
200 ic = (unsigned char)((JGETA >> 8) &
201 CMASK);
202 PUT(ic);
203 ic = (unsigned char)(JGETA & CMASK);
204 PUT(ic);
205 GET(ic); /* dummy GET */
206 #else /* ISO-2022-JP.UIOSF */
207 if (cset != CS_3) {
208 CHECK2BIG(SEQ_MBTOG0,1);
209 cset = CS_3;
210 PUT(ESC);
211 PUT(MBTOG0_1);
212 PUT(MBTOG0_2);
213 PUT(F_X0212_90);
214 }
215 CHECK2BIG(JISW3,1);
216 ic = sjtojis1[(ic - 0x80)];
217 if (*ip >= 0x9f) {
218 ic++;
219 }
220 PUT(ic);
221 GET(ic);
222 ic = sjtojis2[ic];
223 PUT(ic);
224 #endif /* RFC1468_MODE */
225 continue;
226 } else { /* 2nd byte is illegal */
227 UNGET();
228 errno = EILSEQ;
229 retval = (size_t)ERR_RETURN;
230 goto ret;
231 }
232 } else { /* input fragment of Kanji */
233 UNGET();
234 errno = EINVAL;
235 retval = (size_t)ERR_RETURN;
236 goto ret;
237 }
238 } else if (ISSJIBM(ic) || /* Extended IBM char. area */
239 ISSJNECIBM(ic)) { /* NEC/IBM char. area */
240 /*
241 * We need a special treatment for each codes.
242 * By adding some offset number for them, we
243 * can process them as the same way of that of
244 * extended IBM chars.
245 */
246 if ((int)ileft > 0) {
247 if (ISSJKANJI2(*ip)) {
248 unsigned short dest;
249 dest = (ic << 8);
250 GET(ic);
251 dest += ic;
252 if ((0xed40 <= dest) &&
253 (dest <= 0xeffc)) {
254 REMAP_NEC(dest);
255 if (dest == 0xffff) {
256 goto ill_ibm;
257 }
258 }
259 /*
260 * XXX: 0xfa54 and 0xfa5b must be mapped
261 * to JIS0208 area. Therefore we
262 * have to do special treatment.
263 */
264 if ((cset != CS_1) &&
265 ((dest == 0xfa54) ||
266 (dest == 0xfa5b))) {
267 CHECK2BIG(SEQ_MBTOG0_O,2);
268 cset = CS_1;
269 PUT(ESC);
270 PUT(MBTOG0_1);
271 PUT(F_X0208_83_90);
272 CHECK2BIG(JISW1,2);
273 if (dest == 0xfa54) {
274 PUT(0x22);
275 PUT(0x4c);
276 } else {
277 PUT(0x22);
278 PUT(0x68);
279 }
280 continue;
281 }
282 if (cset != CS_3) {
283 CHECK2BIG(SEQ_MBTOG0,2);
284 cset = CS_3;
285 PUT(ESC);
286 PUT(MBTOG0_1);
287 PUT(MBTOG0_2);
288 PUT(F_X0212_90);
289 }
290 CHECK2BIG(JISW3,2);
291 dest = dest - 0xfa40 -
292 (((dest>>8) - 0xfa) * 0x40);
293 dest = sjtoibmext[dest];
294 if (dest == 0xffff) {
295 /*
296 * Illegal code points
297 * in IBM-EXT area.
298 */
299 ill_ibm:
300 UNGET();
301 UNGET();
302 errno = EILSEQ;
303 retval = (size_t)ERR_RETURN;
304 goto ret;
305 }
306 PUT(((dest>>8) & 0x7f));
307 PUT(dest & 0x7f);
308 continue;
309 } else { /* 2nd byte is illegal */
310 UNGET();
311 errno = EILSEQ;
312 retval = (size_t)ERR_RETURN;
313 goto ret;
314 }
315 } else { /* input fragment of Kanji */
316 UNGET();
317 errno = EINVAL;
318 retval = (size_t)ERR_RETURN;
319 goto ret;
320 }
321 } else if ((0xeb <= ic) && (ic <= 0xec)) {
322 /*
323 * Based on the draft convention of OSF-JVC CDEWG,
324 * characters in this area will be mapped to
325 * "CHIKAN-MOJI." (convertible character)
326 * So far, we'll use (0x222e) for it.
327 */
328 if ((int)ileft > 0) {
329 if (ISSJKANJI2(*ip)) {
330 if (cset != CS_1) {
331 CHECK2BIG(SEQ_MBTOG0_O,1);
332 cset = CS_1;
333 PUT(ESC);
334 PUT(MBTOG0_1);
335 PUT(F_X0208_83_90);
336 }
337 CHECK2BIG(JISW1,1);
338 GET(ic); /* Dummy */
339 PUT((JGETA>>8) & CMASK);
340 PUT(JGETA & CMASK);
341 continue;
342 } else { /* 2nd byte is illegal */
343 UNGET();
344 errno = EILSEQ;
345 retval = (size_t)ERR_RETURN;
346 goto ret;
347 }
348 } else { /* input fragment of Kanji */
349 UNGET();
350 errno = EINVAL;
351 retval = (size_t)ERR_RETURN;
352 goto ret;
353 }
354 } else { /* 1st byte is illegal */
355 UNGET();
356 errno = EILSEQ;
357 retval = (size_t)ERR_RETURN;
358 goto ret;
359 }
360 }
361 retval = ileft;
362 ret:
363 *inbuf = (char *)ip;
364 *inbytesleft = ileft;
365 *outbuf = op;
366 *outbytesleft = oleft;
367 st->_st_cset = cset;
368
369 return (retval);
370 }
371