xref: /titanic_52/usr/src/lib/iconv_modules/ja/common/ISO-2022-JP_TO_eucJP.c (revision 91e1e26ac6a73ce959289cf7d3d96c4baedbe0b8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 1994-2003 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <errno.h>
30 #include <euc.h>
31 #include "japanese.h"
32 
33 
34 /*
35  * struct _cv_state; to keep status
36  */
37 struct _icv_state {
38 	int	_st_cset;
39 	int	_st_cset_sav;
40 };
41 
42 void *
43 _icv_open()
44 {
45 	struct _icv_state *st;
46 
47 	if ((st = (struct _icv_state *)malloc(sizeof (struct _icv_state)))
48 									== NULL)
49 		return ((void *)ERR_RETURN);
50 
51 	st->_st_cset = st->_st_cset_sav = CS_0;
52 
53 	return (st);
54 }
55 
56 void
57 _icv_close(struct _icv_state *st)
58 {
59 	free(st);
60 }
61 
62 size_t
63 _icv_iconv(struct _icv_state *st, char **inbuf, size_t *inbytesleft,
64 				char **outbuf, size_t *outbytesleft)
65 {
66 	int		cset;
67 	int		stat = ST_INIT;
68 	unsigned char	*op;
69 	char		*ip, ic;
70 	size_t 		ileft, oleft;
71 	size_t 		retval;
72 #ifdef  RFC1468_MODE
73         unsigned short  zenkaku;
74 #endif
75 
76 	/*
77 	 * If inbuf and/or *inbuf are NULL, reset conversion descriptor
78 	 * and put escape sequence if needed.
79 	 */
80 	if ((inbuf == NULL) || (*inbuf == NULL)) {
81 		st->_st_cset_sav = st->_st_cset = CS_0;
82 		return ((size_t)0);
83 	}
84 
85 	cset = st->_st_cset;
86 
87 	ip = *inbuf;
88 	op = (unsigned char *)*outbuf;
89 	ileft = *inbytesleft;
90 	oleft = *outbytesleft;
91 
92 	/*
93 	 * Main loop; basically 1 loop per 1 input byte
94 	 */
95 
96 	while ((int)ileft > 0) {
97 		GET(ic);
98 		if (stat == ST_INIT) {
99 			goto text;
100 		}
101 
102 		if (stat == ST_ESC) {
103 			if (ic == MBTOG0_1) {
104 				if ((int)ileft > 0) {
105 					stat = ST_MBTOG0_1;
106 					continue;
107 				} else {
108 					UNGET();
109 					UNGET();
110 					errno = EINVAL;
111 					retval = (size_t)ERR_RETURN;
112 					goto ret;
113 				}
114 			} else if (ic == SBTOG0_1) {
115 				if ((int)ileft > 0) {
116 					stat = ST_SBTOG0;
117 					continue;
118 				} else {
119 					UNGET();
120 					UNGET();
121 					errno = EINVAL;
122 					retval = (size_t)ERR_RETURN;
123 					goto ret;
124 				}
125 			} else if (ic == X208REV_1) {
126 				if ((int)ileft > 0) {
127 					stat = ST_208REV_1;
128 					continue;
129 				} else {
130 					UNGET();
131 					UNGET();
132 					errno = EINVAL;
133 					retval = (size_t)ERR_RETURN;
134 					goto ret;
135 				}
136 			} else {
137 				UNGET();
138 				UNGET();
139 				errno = EILSEQ;
140 				retval = (size_t)ERR_RETURN;
141 				goto ret;
142 			}
143 		} else if (stat == ST_MBTOG0_1) {
144 			if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) {
145 				stat = ST_INIT;
146 				st->_st_cset_sav = cset = CS_1;
147 				continue;
148 			} else if (ic == MBTOG0_2) {
149 				if ((int)ileft > 0) {
150 					stat = ST_MBTOG0_2;
151 					continue;
152 				} else {
153 					UNGET();
154 					UNGET();
155 					UNGET();
156 					errno = EINVAL;
157 					retval = (size_t)ERR_RETURN;
158 					goto ret;
159 				}
160 			} else if (ic == F_X0212_90) {
161 				stat = ST_INIT;
162 				st->_st_cset_sav = cset = CS_3;
163 				continue;
164 			} else {
165 				UNGET();
166 				UNGET();
167 				UNGET();
168 				errno = EILSEQ;
169 				retval = (size_t)ERR_RETURN;
170 				goto ret;
171 			}
172 		} else if (stat == ST_MBTOG0_2) {
173 			if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) {
174 				stat = ST_INIT;
175 				st->_st_cset_sav = cset = CS_1;
176 				continue;
177 			} else if (ic == F_X0212_90) {
178 				stat = ST_INIT;
179 				st->_st_cset_sav = cset = CS_3;
180 				continue;
181 			} else {
182 				UNGET();
183 				UNGET();
184 				UNGET();
185 				UNGET();
186 				errno = EILSEQ;
187 				retval = (size_t)ERR_RETURN;
188 				goto ret;
189 			}
190 		} else if (stat == ST_SBTOG0) {
191 			if ((ic == F_ASCII) ||
192 				(ic == F_X0201_RM) ||
193 				(ic == F_ISO646)) {
194 				stat = ST_INIT;
195 				st->_st_cset_sav = cset = CS_0;
196 				continue;
197 			} else if (ic == F_X0201_KN) {
198 				stat = ST_INIT;
199 				st->_st_cset_sav = cset = CS_2;
200 				continue;
201 			} else {
202 				UNGET();
203 				UNGET();
204 				UNGET();
205 				errno = EILSEQ;
206 				retval = (size_t)ERR_RETURN;
207 				goto ret;
208 			}
209 		} else if (stat == ST_208REV_1) {
210 			if (ic == X208REV_2) {
211 				if ((int)ileft > 0) {
212 					stat = ST_208REV_2;
213 					continue;
214 				} else {
215 					UNGET();
216 					UNGET();
217 					UNGET();
218 					errno = EINVAL;
219 					retval = (size_t)ERR_RETURN;
220 					goto ret;
221 				}
222 			} else {
223 				UNGET();
224 				UNGET();
225 				UNGET();
226 				errno = EILSEQ;
227 				retval = (size_t)ERR_RETURN;
228 				goto ret;
229 			}
230 		} else if (stat == ST_208REV_2) {
231 			if (ic == ESC) {
232 				if ((int)ileft > 0) {
233 					stat = ST_REV_AFT_ESC;
234 					continue;
235 				} else {
236 					UNGET();
237 					UNGET();
238 					UNGET();
239 					UNGET();
240 					errno = EINVAL;
241 					retval = (size_t)ERR_RETURN;
242 					goto ret;
243 				}
244 			} else {
245 				UNGET();
246 				UNGET();
247 				UNGET();
248 				UNGET();
249 				errno = EILSEQ;
250 				retval = (size_t)ERR_RETURN;
251 				goto ret;
252 			}
253 		} else if (stat == ST_REV_AFT_ESC) {
254 			if (ic == MBTOG0_1) {
255 				if ((int)ileft > 0) {
256 					stat = ST_REV_AFT_MBTOG0_1;
257 					continue;
258 				} else {
259 					UNGET();
260 					UNGET();
261 					UNGET();
262 					UNGET();
263 					UNGET();
264 					errno = EINVAL;
265 					retval = (size_t)ERR_RETURN;
266 					goto ret;
267 				}
268 			} else {
269 				UNGET();
270 				UNGET();
271 				UNGET();
272 				UNGET();
273 				UNGET();
274 				errno = EILSEQ;
275 				retval = (size_t)ERR_RETURN;
276 				goto ret;
277 			}
278 		} else if (stat == ST_REV_AFT_MBTOG0_1) {
279 			if (ic == F_X0208_83_90) {
280 				stat = ST_INIT;
281 				st->_st_cset_sav = cset = CS_1;
282 				continue;
283 			} else if (ic == MBTOG0_2) {
284 				if ((int)ileft > 0) {
285 					stat = ST_REV_AFT_MBTOG0_2;
286 					continue;
287 				} else {
288 					UNGET();
289 					UNGET();
290 					UNGET();
291 					UNGET();
292 					UNGET();
293 					UNGET();
294 					errno = EINVAL;
295 					retval = (size_t)ERR_RETURN;
296 					goto ret;
297 				}
298 			} else {
299 				UNGET();
300 				UNGET();
301 				UNGET();
302 				UNGET();
303 				UNGET();
304 				UNGET();
305 				errno = EILSEQ;
306 				retval = (size_t)ERR_RETURN;
307 				goto ret;
308 			}
309 		} else if (stat == ST_REV_AFT_MBTOG0_2) {
310 			if (ic == F_X0208_83_90) {
311 				stat = ST_INIT;
312 				st->_st_cset_sav = cset = CS_1;
313 				continue;
314 			} else {
315 				UNGET();
316 				UNGET();
317 				UNGET();
318 				UNGET();
319 				UNGET();
320 				UNGET();
321 				UNGET();
322 				errno = EILSEQ;
323 				retval = (size_t)ERR_RETURN;
324 				goto ret;
325 			}
326 		}
327 text:
328 		/*
329 		 * Break through chars or ESC sequence
330 		 */
331 		if (ic == ESC) {
332 			if ((int)ileft > 0) {
333 				stat = ST_ESC;
334 				continue;
335 			} else {
336 				UNGET();
337 				errno = EINVAL;
338 				retval = (size_t)ERR_RETURN;
339 				goto ret;
340 			}
341 		/*
342 		 * XXX- Because V3 mailtool uses SI/SO to switch
343 		 *	G0 and G1 sets while it puts "iso2022-7"
344 		 *	as its "X-Sun-Charset" tag. Though it
345 		 *	breaks ISO-2022-JP definition based on
346 		 *	UI-OSF, dtmail have handle them correctly.
347 		 *	Therefore, we have to following a few codes, UGH.
348 		 */
349 		} else if (ic == SO) {
350 			cset = CS_2;
351 			stat = ST_INIT;
352 			continue;
353 		} else if (ic == SI) {
354 			cset = st->_st_cset_sav;
355 			stat = ST_INIT;
356 			continue;
357 		} else if (!(ic & CMSB)) {
358 			if (cset == CS_0) {
359 				CHECK2BIG(EUCW0, 1);
360 				PUT(ic);
361 				continue;
362 			} else if (cset == CS_1) {
363 				if ((int)ileft > 0) {
364 					CHECK2BIG(EUCW1, 1);
365 					if ((ic < 0x21) || (ic == 0x7f)) {
366 						UNGET();
367 						errno = EILSEQ;
368 						retval = (size_t)ERR_RETURN;
369 						goto ret;
370 					} else if ((*ip < 0x21) || (*ip ==
371 					0x7f)) {
372 						UNGET();
373 						errno = EILSEQ;
374 						retval = (size_t)ERR_RETURN;
375 						goto ret;
376 					}
377 #ifdef  RFC1468_MODE /* Convert VDC and UDC to GETA */
378 					if ((ic == 0x2d) || (0x75 <= ic )){
379 						PUT((EGETA >> 8) & 0xff);
380 						GET(ic); /* Get dummy */
381 						PUT(EGETA & 0xff);
382 						continue;
383 					}
384 #endif  /* RFC1468_MODE */
385 					PUT(ic | CMSB);
386 					GET(ic);
387 					PUT(ic | CMSB);
388 					stat = ST_INIT;
389 					continue;
390 				} else {
391 					UNGET();
392 					errno = EINVAL;
393 					retval = (size_t)ERR_RETURN;
394 					goto ret;
395 				}
396 			} else if (cset == CS_2) {
397 				if (!ISSJKANA((ic | CMSB))) {
398 					UNGET();
399 					errno = EILSEQ;
400 					retval = (size_t)ERR_RETURN;
401 					goto ret;
402 				}
403 #ifdef  RFC1468_MODE /* Convert JIS X 0201 Kana to JIS X 0208 Kana */
404 				CHECK2BIG(EUCW1, 1);
405 				zenkaku = halfkana2zenkakue[(ic - 0x21)];
406 	                        ic = (unsigned char)((zenkaku >> 8) & 0xFF);
407 	                        PUT(ic);
408 	                        ic = (unsigned char)(zenkaku & 0xFF);
409 	                        PUT(ic);
410 #else   /* ISO-2022-JP.UIOSF */
411 				CHECK2BIG(EUCW2 + SEQ_SS, 1);
412 				PUT(SS2);
413 				PUT(ic | CMSB);
414 #endif  /* RFC1468_MODE */
415 				continue;
416 			} else if (cset == CS_3) {
417 				if ((int)ileft > 0) {
418 					if ((ic < 0x21) || (ic == 0x7f)) {
419 						UNGET();
420 						errno = EILSEQ;
421 						retval = (size_t)ERR_RETURN;
422 						goto ret;
423 					} else if ((*ip < 0x21) || (*ip ==
424 					0x7f)) {
425 						UNGET();
426 						errno = EILSEQ;
427 						retval = (size_t)ERR_RETURN;
428 						goto ret;
429 					}
430 #ifdef  RFC1468_MODE /* Convert JIS X 0212 to GETA */
431 					CHECK2BIG(EUCW1, 1);
432 					PUT((EGETA >> 8) | CMSB);
433 					GET(ic); /* Get dummy */
434 					PUT((EGETA & CMASK) | CMSB);
435 #else   /* ISO-2022-JP.UIOSF */
436 					CHECK2BIG(EUCW3 + SEQ_SS, 1);
437 					PUT(SS3);
438 					PUT(ic | CMSB);
439 					GET(ic);
440 					PUT(ic | CMSB);
441 #endif  /* RFC1468_MODE */
442 					stat = ST_INIT;
443 					continue;
444 				} else {
445 					UNGET();
446 					errno = EINVAL;
447 					retval = (size_t)ERR_RETURN;
448 					goto ret;
449 				}
450 			}
451 		} else {
452 			UNGET();
453 			errno = EILSEQ;
454 			retval = (size_t)ERR_RETURN;
455 			goto ret;
456 		}
457 	}
458 	retval = ileft;
459 ret:
460 	*inbuf = ip;
461 	*inbytesleft = ileft;
462 	*outbuf = (char *)op;
463 	*outbytesleft = oleft;
464 	st->_st_cset = cset;
465 
466 	return (retval);
467 }
468