1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 1994-2003 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <euc.h>
30 #include "japanese.h"
31
32 /*
33 * struct _icv_state; to keep stat
34 */
35 struct _icv_state {
36 int _st_cset;
37 };
38
39 void *
_icv_open()40 _icv_open()
41 {
42 struct _icv_state *st;
43
44 if ((st = (struct _icv_state *)malloc(sizeof (struct _icv_state)))
45 == NULL)
46 return ((void *)ERR_RETURN);
47
48 st->_st_cset = CS_0;
49 return (st);
50 }
51
52 void
_icv_close(struct _icv_state * st)53 _icv_close(struct _icv_state *st)
54 {
55 free(st);
56 }
57
58
59
60 /*
61 * Actual conversion; called from iconv()
62 */
63 size_t
_icv_iconv(struct _icv_state * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)64 _icv_iconv(struct _icv_state *st, char **inbuf, size_t *inbytesleft,
65 char **outbuf, size_t *outbytesleft)
66 {
67 int cset;
68 int stat = ST_INIT;
69 unsigned char *ip, ic;
70 char *op;
71 size_t ileft, oleft;
72 size_t retval;
73
74 /*
75 * If inbuf and/or *inbuf are NULL, reset conversion descriptor
76 * and put escape sequence if needed.
77 */
78 if ((inbuf == NULL) || (*inbuf == NULL)) {
79 if ((st->_st_cset == CS_1) || (st->_st_cset == CS_3)) {
80 if ((outbuf != NULL) && (*outbuf != NULL)
81 && (outbytesleft != NULL)) {
82 op = *outbuf;
83 oleft = *outbytesleft;
84 if (oleft < SEQ_SBTOG0) {
85 errno = E2BIG;
86 return ((size_t)-1);
87 }
88 PUT(ESC);
89 PUT(SBTOG0_1);
90 PUT(F_X0201_RM);
91 *outbuf = op;
92 *outbytesleft = oleft;
93 }
94 st->_st_cset = CS_0;
95 } else if (st->_st_cset == CS_2) {
96 if ((outbuf != NULL) && (*outbuf != NULL)
97 && (outbytesleft != NULL)) {
98 op = *outbuf;
99 oleft = *outbytesleft;
100 if (oleft < SEQ_SOSI) {
101 errno = E2BIG;
102 return ((size_t)-1);
103 }
104 PUT(SI);
105 *outbuf = op;
106 *outbytesleft = oleft;
107 }
108 st->_st_cset = CS_0;
109 }
110 return ((size_t)0);
111 }
112
113 cset = st->_st_cset;
114
115 ip = (unsigned char *)*inbuf;
116 op = *outbuf;
117 ileft = *inbytesleft;
118 oleft = *outbytesleft;
119
120 /*
121 * Main loop; basically 1 loop per 1 input byte
122 */
123
124 while ((int)ileft > 0) {
125 GET(ic);
126 if ((stat == ST_INCS1) || (stat == ST_INCS3)) {
127 ic = sjtojis2[ic];
128 PUT(ic);
129 stat = ST_INIT;
130 continue;
131 } else if (ISASC((int)ic)) { /* ASCII */
132 if ((cset == CS_1) || (cset == CS_3)) {
133 CHECK2BIG(SEQ_SBTOG0,1);
134 PUT(ESC); /* to JIS X 0201 Roman */
135 PUT(SBTOG0_1);
136 PUT(F_X0201_RM);
137 } else if (cset == CS_2) {
138 CHECK2BIG(SEQ_SOSI,1);
139 PUT(SI); /* Shift In */
140 }
141 cset = CS_0;
142 CHECK2BIG(JISW0,1);
143 PUT(ic);
144 continue;
145 } else if (ISSJKANA(ic)) { /* Kana starts */
146 if ((cset == CS_1) || (cset == CS_3)) {
147 CHECK2BIG(SEQ_SBTOG0,1);
148 cset = CS_0;
149 PUT(ESC);
150 PUT(SBTOG0_1);
151 PUT(F_X0201_RM);
152 }
153 if (cset != CS_2) {
154 CHECK2BIG(SEQ_SOSI,1);
155 cset = CS_2;
156 PUT(SO);
157 }
158 CHECK2BIG(JISW2,1);
159 stat = ST_INIT;
160 PUT(ic & CMASK);
161 continue;
162 } else if (ISSJKANJI1(ic)) { /* CS_1 Kanji starts */
163 if ((int)ileft > 0) {
164 if (ISSJKANJI2(*ip)) {
165 if (cset == CS_2) {
166 cset = CS_0;
167 PUT(SI);
168 }
169 if (cset != CS_1) {
170 CHECK2BIG(SEQ_MBTOG0_O,1);
171 cset = CS_1;
172 PUT(ESC);
173 PUT(MBTOG0_1);
174 PUT(F_X0208_83_90);
175 }
176 CHECK2BIG(JISW1,1);
177 stat = ST_INCS1;
178 ic = sjtojis1[(ic - 0x80)];
179 if (*ip >= 0x9f) {
180 ic++;
181 }
182 PUT(ic);
183 continue;
184 } else { /* 2nd byte is illegal */
185 UNGET();
186 errno = EILSEQ;
187 retval = (size_t)ERR_RETURN;
188 goto ret;
189 }
190 } else { /* input fragment of Kanji */
191 UNGET();
192 errno = EINVAL;
193 retval = (size_t)ERR_RETURN;
194 goto ret;
195 }
196 } else if (ISSJSUPKANJI1(ic)) { /* CS_3 Kanji starts */
197 if ((int)ileft > 0) {
198 if (ISSJKANJI2(*ip)) {
199 if (cset == CS_2) {
200 cset = CS_0;
201 PUT(SI);
202 }
203 if (cset != CS_3) {
204 CHECK2BIG(SEQ_MBTOG0,1);
205 cset = CS_3;
206 PUT(ESC);
207 PUT(MBTOG0_1);
208 PUT(MBTOG0_2);
209 PUT(F_X0212_90);
210 }
211 CHECK2BIG(JISW3,1);
212 stat = ST_INCS3;
213 ic = sjtojis1[(ic - 0x80)];
214 if (*ip >= 0x9f) {
215 ic++;
216 }
217 PUT(ic);
218 continue;
219 } else { /* 2nd byte is illegal */
220 UNGET();
221 errno = EILSEQ;
222 retval = (size_t)ERR_RETURN;
223 goto ret;
224 }
225 } else { /* input fragment of Kanji */
226 UNGET();
227 errno = EINVAL;
228 retval = (size_t)ERR_RETURN;
229 goto ret;
230 }
231 } else if (ISSJIBM(ic) || /* Extended IBM char. area */
232 ISSJNECIBM(ic)) { /* NEC/IBM char. area */
233 /*
234 * We need a special treatment for each codes.
235 * By adding some offset number for them, we
236 * can process them as the same way of that of
237 * extended IBM chars.
238 */
239 if ((int)ileft > 0) {
240 if (ISSJKANJI2(*ip)) {
241 unsigned short dest;
242 dest = (ic << 8);
243 GET(ic);
244 dest += ic;
245 if (cset == CS_2) {
246 cset = CS_0;
247 PUT(SI);
248 }
249 if ((0xed40 <= dest) &&
250 (dest <= 0xeffc)) {
251 REMAP_NEC(dest);
252 if (dest == 0xffff) {
253 goto ill_ibm;
254 }
255 }
256 /*
257 * XXX: 0xfa54 and 0xfa5b must be mapped
258 * to JIS0208 area. Therefore we
259 * have to do special treatment.
260 */
261 if ((cset != CS_1) &&
262 ((dest == 0xfa54) ||
263 (dest == 0xfa5b))) {
264 CHECK2BIG(SEQ_MBTOG0_O,2);
265 cset = CS_1;
266 PUT(ESC);
267 PUT(MBTOG0_1);
268 PUT(F_X0208_83_90);
269 CHECK2BIG(JISW1,2);
270 if (dest == 0xfa54) {
271 PUT(0x22);
272 PUT(0x4c);
273 } else {
274 PUT(0x22);
275 PUT(0x68);
276 }
277 continue;
278 }
279 if (cset != CS_3) {
280 CHECK2BIG(SEQ_MBTOG0,2);
281 cset = CS_3;
282 PUT(ESC);
283 PUT(MBTOG0_1);
284 PUT(MBTOG0_2);
285 PUT(F_X0212_90);
286 }
287 CHECK2BIG(JISW3,2);
288 dest = dest - 0xfa40 -
289 (((dest>>8) - 0xfa) * 0x40);
290 dest = sjtoibmext[dest];
291 if (dest == 0xffff) {
292 /*
293 * Illegal code points
294 * in IBM-EXT area.
295 */
296 ill_ibm:
297 UNGET();
298 UNGET();
299 errno = EILSEQ;
300 retval = (size_t)ERR_RETURN;
301 goto ret;
302 }
303 PUT(((dest>>8) & 0x7f));
304 PUT(dest & 0x7f);
305 continue;
306 } else { /* 2nd byte is illegal */
307 UNGET();
308 errno = EILSEQ;
309 retval = (size_t)ERR_RETURN;
310 goto ret;
311 }
312 } else { /* input fragment of Kanji */
313 UNGET();
314 errno = EINVAL;
315 retval = (size_t)ERR_RETURN;
316 goto ret;
317 }
318 } else if ((0xeb <= ic) && (ic <= 0xec)) {
319 /*
320 * Based on the draft convention of OSF-JVC CDEWG,
321 * characters in this area will be mapped to
322 * "CHIKAN-MOJI." (convertible character)
323 * So far, we'll use (0x222e) for it.
324 */
325 if ((int)ileft > 0) {
326 if (ISSJKANJI2(*ip)) {
327 if (cset == CS_2) {
328 cset = CS_0;
329 PUT(SI);
330 }
331 if (cset != CS_1) {
332 CHECK2BIG(SEQ_MBTOG0_O,1);
333 cset = CS_1;
334 PUT(ESC);
335 PUT(MBTOG0_1);
336 PUT(F_X0208_83_90);
337 }
338 CHECK2BIG(JISW1,1);
339 GET(ic); /* Dummy */
340 PUT((JGETA>>8) & CMASK);
341 PUT(JGETA & CMASK);
342 continue;
343 } else { /* 2nd byte is illegal */
344 UNGET();
345 errno = EILSEQ;
346 retval = (size_t)ERR_RETURN;
347 goto ret;
348 }
349 } else { /* input fragment of Kanji */
350 UNGET();
351 errno = EINVAL;
352 retval = (size_t)ERR_RETURN;
353 goto ret;
354 }
355 } else { /* 1st byte is illegal */
356 UNGET();
357 errno = EILSEQ;
358 retval = (size_t)ERR_RETURN;
359 goto ret;
360 }
361 }
362 retval = ileft;
363 ret:
364 *inbuf = (char *)ip;
365 *inbytesleft = ileft;
366 *outbuf = op;
367 *outbytesleft = oleft;
368 st->_st_cset = cset;
369
370 return (retval);
371 }
372