1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 1997-2003 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <errno.h>
30 #include <euc.h>
31 #include "japanese.h"
32 #include "jfp_iconv_unicode.h"
33
34 /* Note: JFP_J2U_ICONV_RFC1468 macro pass through hankaku katakata. */
35 #ifdef RFC1468_MODE
36 #define JFP_J2U_ICONV_RFC1468
37 #else
38 #define JFP_J2U_ICONV
39 #endif
40 #include "jfp_jis_to_ucs2.h"
41
42 /*
43 * struct _cv_state; to keep status
44 */
45 struct _icv_state {
46 int _st_cset;
47 int _st_cset_sav;
48 };
49
50 void *
_icv_open()51 _icv_open()
52 {
53 struct _icv_state *st;
54
55 if ((st = (struct _icv_state *)malloc(sizeof (struct _icv_state)))
56 == NULL)
57 return ((void *)ERR_RETURN);
58
59 st->_st_cset_sav = st->_st_cset = CS_0;
60
61 return (st);
62 }
63
64 void
_icv_close(struct _icv_state * st)65 _icv_close(struct _icv_state *st)
66 {
67 free(st);
68 }
69
70 size_t
_icv_iconv(struct _icv_state * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)71 _icv_iconv(struct _icv_state *st, char **inbuf, size_t *inbytesleft,
72 char **outbuf, size_t *outbytesleft)
73 {
74 int cset, stat, ret_val;
75 char *ip, ic;
76 size_t ileft;
77 size_t retval;
78 char *op;
79 size_t oleft;
80 unsigned int index = 0;
81
82 /*
83 * If inbuf and/or *inbuf are NULL, reset conversion descriptor
84 * and put escape sequence if needed.
85 */
86 if ((inbuf == NULL) || (*inbuf == NULL)) {
87 st->_st_cset_sav = st->_st_cset = CS_0;
88 return ((size_t)0);
89 }
90
91 cset = st->_st_cset;
92 stat = ST_INIT;
93
94 ip = *inbuf;
95 op = *outbuf;
96 ileft = *inbytesleft;
97 oleft = *outbytesleft;
98
99 /*
100 * Main loop; 1 loop per 1 input byte
101 */
102
103 while ((int)ileft > 0) {
104 GET(ic);
105 if (stat == ST_ESC) {
106 if (ic == MBTOG0_1) {
107 if ((int)ileft > 0) {
108 stat = ST_MBTOG0_1;
109 continue;
110 } else {
111 UNGET();
112 UNGET();
113 errno = EINVAL;
114 retval = (size_t)ERR_RETURN;
115 goto ret;
116 }
117 } else if (ic == SBTOG0_1) {
118 if ((int)ileft > 0) {
119 stat = ST_SBTOG0;
120 continue;
121 } else {
122 UNGET();
123 UNGET();
124 errno = EINVAL;
125 retval = (size_t)ERR_RETURN;
126 goto ret;
127 }
128 } else if (ic == X208REV_1) {
129 if ((int)ileft > 0) {
130 stat = ST_208REV_1;
131 continue;
132 } else {
133 UNGET();
134 UNGET();
135 errno = EINVAL;
136 retval = (size_t)ERR_RETURN;
137 goto ret;
138 }
139 } else {
140 UNGET();
141 UNGET();
142 errno = EILSEQ;
143 retval = (size_t)ERR_RETURN;
144 goto ret;
145 }
146 } else if (stat == ST_MBTOG0_1) {
147 if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) {
148 stat = ST_INIT;
149 st->_st_cset_sav = cset = CS_1;
150 continue;
151 } else if (ic == MBTOG0_2) {
152 if ((int)ileft > 0) {
153 stat = ST_MBTOG0_2;
154 continue;
155 } else {
156 UNGET();
157 UNGET();
158 UNGET();
159 errno = EINVAL;
160 retval = (size_t)ERR_RETURN;
161 goto ret;
162 }
163 } else if (ic == F_X0212_90) {
164 stat = ST_INIT;
165 st->_st_cset_sav = cset = CS_3;
166 continue;
167 } else {
168 UNGET();
169 UNGET();
170 UNGET();
171 errno = EILSEQ;
172 retval = (size_t)ERR_RETURN;
173 goto ret;
174 }
175 } else if (stat == ST_MBTOG0_2) {
176 if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) {
177 stat = ST_INIT;
178 st->_st_cset_sav = cset = CS_1;
179 continue;
180 } else if (ic == F_X0212_90) {
181 stat = ST_INIT;
182 st->_st_cset_sav = cset = CS_3;
183 continue;
184 } else {
185 UNGET();
186 UNGET();
187 UNGET();
188 UNGET();
189 errno = EILSEQ;
190 retval = (size_t)ERR_RETURN;
191 goto ret;
192 }
193 } else if (stat == ST_SBTOG0) {
194 if ((ic == F_ASCII) ||
195 (ic == F_X0201_RM) ||
196 (ic == F_ISO646)) {
197 stat = ST_INIT;
198 st->_st_cset_sav = cset = CS_0;
199 continue;
200 } else if (ic == F_X0201_KN) {
201 stat = ST_INIT;
202 st->_st_cset_sav = cset = CS_2;
203 continue;
204 } else {
205 UNGET();
206 UNGET();
207 UNGET();
208 errno = EILSEQ;
209 retval = (size_t)ERR_RETURN;
210 goto ret;
211 }
212 } else if (stat == ST_208REV_1) {
213 if (ic == X208REV_2) {
214 if ((int)ileft > 0) {
215 stat = ST_208REV_2;
216 continue;
217 } else {
218 UNGET();
219 UNGET();
220 UNGET();
221 errno = EINVAL;
222 retval = (size_t)ERR_RETURN;
223 goto ret;
224 }
225 } else {
226 UNGET();
227 UNGET();
228 UNGET();
229 errno = EILSEQ;
230 retval = (size_t)ERR_RETURN;
231 goto ret;
232 }
233 } else if (stat == ST_208REV_2) {
234 if (ic == ESC) {
235 if ((int)ileft > 0) {
236 stat = ST_REV_AFT_ESC;
237 continue;
238 } else {
239 UNGET();
240 UNGET();
241 UNGET();
242 UNGET();
243 errno = EINVAL;
244 retval = (size_t)ERR_RETURN;
245 goto ret;
246 }
247 } else {
248 UNGET();
249 UNGET();
250 UNGET();
251 UNGET();
252 errno = EILSEQ;
253 retval = (size_t)ERR_RETURN;
254 goto ret;
255 }
256 } else if (stat == ST_REV_AFT_ESC) {
257 if (ic == MBTOG0_1) {
258 if ((int)ileft > 0) {
259 stat = ST_REV_AFT_MBTOG0_1;
260 continue;
261 } else {
262 UNGET();
263 UNGET();
264 UNGET();
265 UNGET();
266 UNGET();
267 errno = EINVAL;
268 retval = (size_t)ERR_RETURN;
269 goto ret;
270 }
271 } else {
272 UNGET();
273 UNGET();
274 UNGET();
275 UNGET();
276 UNGET();
277 errno = EILSEQ;
278 retval = (size_t)ERR_RETURN;
279 goto ret;
280 }
281 } else if (stat == ST_REV_AFT_MBTOG0_1) {
282 if (ic == F_X0208_83_90) {
283 stat = ST_INIT;
284 st->_st_cset_sav = cset = CS_1;
285 continue;
286 } else if (ic == MBTOG0_2) {
287 if ((int)ileft > 0) {
288 stat = ST_REV_AFT_MBTOG0_2;
289 continue;
290 } else {
291 UNGET();
292 UNGET();
293 UNGET();
294 UNGET();
295 UNGET();
296 UNGET();
297 errno = EINVAL;
298 retval = (size_t)ERR_RETURN;
299 goto ret;
300 }
301 } else {
302 UNGET();
303 UNGET();
304 UNGET();
305 UNGET();
306 UNGET();
307 UNGET();
308 errno = EILSEQ;
309 retval = (size_t)ERR_RETURN;
310 goto ret;
311 }
312 } else if (stat == ST_REV_AFT_MBTOG0_2) {
313 if (ic == F_X0208_83_90) {
314 stat = ST_INIT;
315 st->_st_cset_sav = cset = CS_1;
316 continue;
317 } else {
318 UNGET();
319 UNGET();
320 UNGET();
321 UNGET();
322 UNGET();
323 UNGET();
324 UNGET();
325 errno = EILSEQ;
326 retval = (size_t)ERR_RETURN;
327 goto ret;
328 }
329 }
330 /*
331 * Break through chars or ESC sequence
332 * if (stat == ST_INIT)
333 */
334 if (ic == ESC) {
335 if ((int)ileft > 0) {
336 stat = ST_ESC;
337 continue;
338 } else {
339 UNGET();
340 errno = EINVAL;
341 retval = (size_t)ERR_RETURN;
342 goto ret;
343 }
344 /*
345 * XXX- Because V3 mailtool uses SI/SO to switch
346 * G0 and G1 sets while it puts "iso2022-7"
347 * as its "X-Sun-Charset" tag. Though it
348 * breaks ISO-2022-JP definition based on
349 * UI-OSF, dtmail have handle them correctly.
350 * Therefore, we have to following a few codes, UGH.
351 */
352 } else if (ic == SO) {
353 cset = CS_2;
354 stat = ST_INIT;
355 continue;
356 } else if (ic == SI) {
357 cset = st->_st_cset_sav;
358 stat = ST_INIT;
359 continue;
360 } else if (!(ic & CMSB)) {
361 if ((cset == CS_0) || (cset == CS_2)){
362 if (cset == CS_0) {
363 index = (int)_jfp_tbl_jisx0201roman_to_ucs2[(int)ic];
364 } else if (cset == CS_2) {
365 index =
366 (int)_jfp_tbl_jisx0201kana_to_ucs2[(ic - 0x21)];
367 }
368 if ((ret_val = write_unicode(
369 (unsigned int)index, &op, &oleft,
370 B_FALSE, "writing CS_0/2"))
371 < 0) {
372 /* errno is set in write_unicode */
373 UNGET();
374 retval = (size_t)ERR_RETURN;
375 goto ret;
376 }
377 stat = ST_INIT;
378 continue;
379 } else if ((cset == CS_1) || (cset == CS_3)) {
380 if ((int)ileft > 0) {
381 if ((ic < 0x21) || (ic == 0x7f)) {
382 UNGET();
383 errno = EILSEQ;
384 retval = (size_t)ERR_RETURN;
385 goto ret;
386 } else if ((*ip < 0x21) || (*ip ==
387 0x7f)) {
388 UNGET();
389 errno = EILSEQ;
390 retval = (size_t)ERR_RETURN;
391 goto ret;
392 }
393 index = ((ic - 0x21) * 94)
394 + (*ip - 0x21);
395 if (cset == CS_1) {
396 #ifdef RFC1468_MODE /* Convert VDC and UDC to GETA(DEFC_U in jis%UTF-8.h) */
397 if ((ic == 0x2d) ||
398 (0x75 <= ic))
399 index = 0x3013;
400 else
401 index = (int)
402 _jfp_tbl_jisx0208_to_ucs2[index];
403 #else /* ISO-2022-JP.UIOSF */
404 index = (int)
405 _jfp_tbl_jisx0208_to_ucs2[index];
406 #endif /* RFC1468_MODE */
407 } else if (cset == CS_3) {
408 #ifdef RFC1468_MODE /* Convert JIS X 0212 to GETA(DEFC_U in jis%UTF-8.h) */
409 index = 0x3013;
410 #else /* ISO-2022-JP.UIOSF */
411 index =
412 (int)_jfp_tbl_jisx0212_to_ucs2[index];
413 #endif /* RFC1468_MODE */
414 }
415 if ((ret_val = write_unicode(
416 (unsigned int)index,
417 &op, &oleft,
418 B_FALSE, "writing CS_1/3"))
419 < 0) {
420 /* errno is set
421 in write_unicode */
422 UNGET();
423 retval =
424 (size_t)ERR_RETURN;
425 goto ret;
426 }
427 /* dummy GET for 2nd byte */
428 GET(ic);
429 stat = ST_INIT;
430 continue;
431 } else {
432 UNGET();
433 errno = EINVAL;
434 retval = (size_t)ERR_RETURN;
435 goto ret;
436 }
437 }
438 } else {
439 UNGET();
440 errno = EILSEQ;
441 retval = (size_t)ERR_RETURN;
442 goto ret;
443 }
444 }
445 retval = ileft;
446 ret:
447 *inbuf = ip;
448 *inbytesleft = ileft;
449 *outbuf = (char *)op;
450 *outbytesleft = oleft;
451 st->_st_cset = cset;
452
453 return (retval);
454 }
455