xref: /titanic_51/usr/src/lib/iconv_modules/ja/common/ISO-2022-JP_TO_PCK.c (revision 91e1e26ac6a73ce959289cf7d3d96c4baedbe0b8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 1994-2003 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <euc.h>
30 #include "japanese.h"
31 
32 /*
33  * struct _cv_state; to keep status
34  */
35 struct _icv_state {
36 	int	_st_cset;
37 	int	_st_cset_sav;
38 };
39 
40 static unsigned short lookuptbl(unsigned short);
41 
42 void *
43 _icv_open()
44 {
45 	struct _icv_state *st;
46 
47 	if ((st = (struct _icv_state *)malloc(sizeof (struct _icv_state)))
48 									== NULL)
49 		return ((void *)ERR_RETURN);
50 
51 	st->_st_cset = st->_st_cset_sav = CS_0;
52 
53 	return (st);
54 }
55 
56 void
57 _icv_close(struct _icv_state *st)
58 {
59 	free(st);
60 }
61 
62 size_t
63 _icv_iconv(struct _icv_state *st, char **inbuf, size_t *inbytesleft,
64 				char **outbuf, size_t *outbytesleft)
65 {
66 	int		cset;
67 	int		stat = ST_INIT;
68 	unsigned char	*op, ic;
69 	char		*ip;
70 	size_t		ileft, oleft;
71 	size_t		retval;
72 #ifdef  RFC1468_MODE
73         unsigned short  zenkaku;
74 #endif
75 
76 	/*
77 	 * If inbuf or *inbuf is NULL, reset conversion descriptor
78 	 * and put escape sequence if needed.
79 	 */
80 	if ((inbuf == NULL) || (*inbuf == NULL)) {
81 		st->_st_cset_sav = st->_st_cset = CS_0;
82 		return ((size_t)0);
83 	}
84 
85 	cset = st->_st_cset;
86 
87 	ip = *inbuf;
88 	op = (unsigned char *)*outbuf;
89 	ileft = *inbytesleft;
90 	oleft = *outbytesleft;
91 
92 	/*
93 	 * Main loop; basically 1 loop per 1 input byte
94 	 */
95 
96 	while ((int)ileft > 0) {
97 		GET(ic);
98 		if (stat == ST_INIT) {
99 			goto text;
100 		}
101 		/*
102 		 *  Half way of Kanji or ESC sequence
103 		 */
104 		if (stat == ST_ESC) {
105 			if (ic == MBTOG0_1) {
106 				if ((int)ileft > 0) {
107 					stat = ST_MBTOG0_1;
108 					continue;
109 				} else {
110 					UNGET();
111 					UNGET();
112 					errno = EINVAL;
113 					retval = (size_t)ERR_RETURN;
114 					goto ret;
115 				}
116 			} else if (ic == SBTOG0_1) {
117 				if ((int)ileft > 0) {
118 					stat = ST_SBTOG0;
119 					continue;
120 				} else {
121 					UNGET();
122 					UNGET();
123 					errno = EINVAL;
124 					retval = (size_t)ERR_RETURN;
125 					goto ret;
126 				}
127 			} else if (ic == X208REV_1) {
128 				if ((int)ileft > 0) {
129 					stat = ST_208REV_1;
130 					continue;
131 				} else {
132 					UNGET();
133 					UNGET();
134 					errno = EINVAL;
135 					retval = (size_t)ERR_RETURN;
136 					goto ret;
137 				}
138 			} else {
139 				UNGET();
140 				UNGET();
141 				errno = EILSEQ;
142 				retval = (size_t)ERR_RETURN;
143 				goto ret;
144 			}
145 		} else if (stat == ST_MBTOG0_1) {
146 			if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) {
147 				stat = ST_INIT;
148 				st->_st_cset_sav = cset = CS_1;
149 				continue;
150 			} else if (ic == MBTOG0_2) {
151 				if ((int)ileft > 0) {
152 					stat = ST_MBTOG0_2;
153 					continue;
154 				} else {
155 					UNGET();
156 					UNGET();
157 					UNGET();
158 					errno = EINVAL;
159 					retval = (size_t)ERR_RETURN;
160 					goto ret;
161 				}
162 			} else if (ic == F_X0212_90) {
163 				stat = ST_INIT;
164 				st->_st_cset_sav = cset = CS_3;
165 				continue;
166 			} else {
167 				UNGET();
168 				UNGET();
169 				UNGET();
170 				errno = EILSEQ;
171 				retval = (size_t)ERR_RETURN;
172 				goto ret;
173 			}
174 		} else if (stat == ST_MBTOG0_2) {
175 			if ((ic == F_X0208_83_90) || (ic == F_X0208_78)) {
176 				stat = ST_INIT;
177 				st->_st_cset_sav = cset = CS_1;
178 				continue;
179 			} else if (ic == F_X0212_90) {
180 				stat = ST_INIT;
181 				st->_st_cset_sav = cset = CS_3;
182 				continue;
183 			} else {
184 				UNGET();
185 				UNGET();
186 				UNGET();
187 				UNGET();
188 				errno = EILSEQ;
189 				retval = (size_t)ERR_RETURN;
190 				goto ret;
191 			}
192 		} else if (stat == ST_SBTOG0) {
193 			if ((ic == F_ASCII) ||
194 				(ic == F_X0201_RM) ||
195 				(ic == F_ISO646)) {
196 				stat = ST_INIT;
197 				st->_st_cset_sav = cset = CS_0;
198 				continue;
199 			} if (ic == F_X0201_KN) {
200 				st->_st_cset_sav = cset = CS_2;
201 				stat = ST_INIT;
202 				continue;
203 			} else {
204 				UNGET();
205 				UNGET();
206 				UNGET();
207 				errno = EILSEQ;
208 				retval = (size_t)ERR_RETURN;
209 				goto ret;
210 			}
211 		} else if (stat == ST_208REV_1) {
212 			if (ic == X208REV_2) {
213 				if ((int)ileft > 0) {
214 					stat = ST_208REV_2;
215 					continue;
216 				} else {
217 					UNGET();
218 					UNGET();
219 					UNGET();
220 					errno = EINVAL;
221 					retval = (size_t)ERR_RETURN;
222 					goto ret;
223 				}
224 			} else {
225 				UNGET();
226 				UNGET();
227 				UNGET();
228 				errno = EILSEQ;
229 				retval = (size_t)ERR_RETURN;
230 				goto ret;
231 			}
232 		} else if (stat == ST_208REV_2) {
233 			if (ic == ESC) {
234 				if ((int)ileft > 0) {
235 					stat = ST_REV_AFT_ESC;
236 					continue;
237 				} else {
238 					UNGET();
239 					UNGET();
240 					UNGET();
241 					UNGET();
242 					errno = EINVAL;
243 					retval = (size_t)ERR_RETURN;
244 					goto ret;
245 				}
246 			} else {
247 				UNGET();
248 				UNGET();
249 				UNGET();
250 				UNGET();
251 				errno = EILSEQ;
252 				retval = (size_t)ERR_RETURN;
253 				goto ret;
254 			}
255 		} else if (stat == ST_REV_AFT_ESC) {
256 			if (ic == MBTOG0_1) {
257 				if ((int)ileft > 0) {
258 					stat = ST_REV_AFT_MBTOG0_1;
259 					continue;
260 				} else {
261 					UNGET();
262 					UNGET();
263 					UNGET();
264 					UNGET();
265 					UNGET();
266 					errno = EINVAL;
267 					retval = (size_t)ERR_RETURN;
268 					goto ret;
269 				}
270 			} else {
271 				UNGET();
272 				UNGET();
273 				UNGET();
274 				UNGET();
275 				UNGET();
276 				errno = EILSEQ;
277 				retval = (size_t)ERR_RETURN;
278 				goto ret;
279 			}
280 		} else if (stat == ST_REV_AFT_MBTOG0_1) {
281 			if (ic == F_X0208_83_90) {
282 				stat = ST_INIT;
283 				st->_st_cset_sav = cset = CS_1;
284 				continue;
285 			} else if (ic == MBTOG0_2) {
286 				if ((int)ileft > 0) {
287 					stat = ST_REV_AFT_MBTOG0_2;
288 					continue;
289 				} else {
290 					UNGET();
291 					UNGET();
292 					UNGET();
293 					UNGET();
294 					UNGET();
295 					UNGET();
296 					errno = EINVAL;
297 					retval = (size_t)ERR_RETURN;
298 					goto ret;
299 				}
300 			} else {
301 				UNGET();
302 				UNGET();
303 				UNGET();
304 				UNGET();
305 				UNGET();
306 				UNGET();
307 				errno = EILSEQ;
308 				retval = (size_t)ERR_RETURN;
309 				goto ret;
310 			}
311 		} else if (stat == ST_REV_AFT_MBTOG0_2) {
312 			if (ic == F_X0208_83_90) {
313 				stat = ST_INIT;
314 				st->_st_cset_sav = cset = CS_1;
315 				continue;
316 			} else {
317 				UNGET();
318 				UNGET();
319 				UNGET();
320 				UNGET();
321 				UNGET();
322 				UNGET();
323 				UNGET();
324 				errno = EILSEQ;
325 				retval = (size_t)ERR_RETURN;
326 				goto ret;
327 			}
328 		}
329 text:
330 		/*
331 		 * Break through chars or ESC sequence
332 		 */
333 		if (ic == ESC) {
334 			if ((int)ileft > 0) {
335 				stat = ST_ESC;
336 				continue;
337 			} else {
338 				UNGET();
339 				errno = EINVAL;
340 				retval = (size_t)ERR_RETURN;
341 				goto ret;
342 			}
343 		/*
344 		 * XXX- Because V3 mailtool uses SI/SO to switch
345 		 *	G0 and G1 sets while it puts "iso2022-7"
346 		 *	as its "X-Sun-Charset" tag. Though it
347 		 *	breaks ISO-2022-JP definition based on
348 		 *	UI-OSF, dtmail have handle them correctly.
349 		 *	Therefore, we have to following a few codes, UGH.
350 		 */
351 		} else if (ic == SO) {
352 			cset = CS_2;
353 			stat = ST_INIT;
354 			continue;
355 		} else if (ic == SI) {
356 			cset = st->_st_cset_sav;
357 			stat = ST_INIT;
358 			continue;
359 		}
360 		if (!(ic & CMSB)) {
361 			if (cset == CS_0) {	/* ASCII or JIS roman */
362 				CHECK2BIG(SJISW0, 1);
363 				PUT(ic);
364 				continue;
365 			} else if (cset == CS_1) { /* CS_1 Kanji starts */
366 				if ((int)ileft > 0) {
367 					int even_ku;
368 					CHECK2BIG(SJISW1, 1);
369 					if ((ic < 0x21) || (ic == 0x7f)) {
370 						UNGET();
371 						errno = EILSEQ;
372 						retval = (size_t)ERR_RETURN;
373 						goto ret;
374 					}
375 					if ((*ip < 0x21) || (*ip == 0x7f)) {
376 						UNGET();
377 						errno = EILSEQ;
378 						retval = (size_t)ERR_RETURN;
379 						goto ret;
380 					}
381 #ifdef  RFC1468_MODE /* Convert VDC and UDC to GETA */
382 					if ((ic == 0x2d) || (0x75 <= ic)) {
383 						PUT(PGETA >> 8);
384 						GET(ic); /* Get dummy */
385 						PUT(PGETA & 0xff);
386 						continue;
387 					}
388 #endif  /* RFC1468_MODE */
389 					PUT(jis208tosj1[ic]);
390 					if ((ic % 2) == 0)
391 						even_ku = TRUE;
392 					else
393 						even_ku = FALSE;
394 					GET(ic);
395 					if (even_ku)
396 						ic += 0x80;
397 					PUT(jistosj2[ic]);
398 					continue;
399 				} else {	/* input fragment of Kanji */
400 					UNGET();
401 					errno = EINVAL;
402 					retval = (size_t)ERR_RETURN;
403 					goto ret;
404 				}
405 			} else if (cset == CS_2) { /* Hankaku Katakana */
406 				if (!ISSJKANA((ic | CMSB))) {
407 					UNGET();
408 					errno = EILSEQ;
409 					retval = (size_t)ERR_RETURN;
410 					goto ret;
411 				}
412 #ifdef  RFC1468_MODE /* Convert JIS X 0201 kana to PCK zenkaku Kana */
413 				CHECK2BIG(SJISW1, 1);
414 				zenkaku = halfkana2zenkakus[(ic - 0x21)];
415 	                        ic = (unsigned char)((zenkaku >> 8) & 0xff);
416 	                        PUT(ic);
417 	                        ic = (unsigned char)(zenkaku & 0xff);
418 	                        PUT(ic);
419 #else   /* ISO-2022-JP.UIOSF */
420 				CHECK2BIG(SJISW2, 1);
421 				PUT(ic | CMSB);
422 #endif  /* RFC1468_MODE */
423 				continue;
424 			} else if (cset == CS_3) { /* CS_3 Kanji starts */
425 				unsigned short dest;
426 				if ((int)ileft > 0) {
427 					CHECK2BIG(SJISW1, 1);
428 					if ((ic < 0x21) || (ic == 0x7f)) {
429 						UNGET();
430 						errno = EILSEQ;
431 						retval = (size_t)ERR_RETURN;
432 						goto ret;
433 					}
434 					if ((*ip < 0x21) || (*ip == 0x7f)) {
435 						UNGET();
436 						errno = EILSEQ;
437 						retval = (size_t)ERR_RETURN;
438 						goto ret;
439 					}
440 
441 #ifdef  RFC1468_MODE /* Convert JIS X 0212 to GETA */
442 					PUT(PGETA >> 8);
443 					GET(ic); /* Get dummy */
444 					PUT(PGETA & 0xff);
445 #else   /* ISO-2022-JP.UIOSF */
446 					if (ic < 0x75) { /* check IBM area */
447 						dest = (ic << 8);
448 						GET(ic);
449 						dest += ic;
450 						dest = lookuptbl(dest);
451 						if (dest == 0xffff) {
452 							/*
453 							 * Illegal code points
454 							 * in G3 plane.
455 							 */
456 							UNGET();
457 							UNGET();
458 							errno = EILSEQ;
459 							retval =
460 							(size_t)ERR_RETURN;
461 							goto ret;
462 						} else {
463 							PUT((dest >> 8) & 0xff);
464 							PUT(dest & 0xff);
465 						}
466 					} else {
467 						int even_ku;
468 
469 						if ((ic % 2) == 0)
470 							even_ku = TRUE;
471 						else
472 							even_ku = FALSE;
473 						PUT(jis212tosj1[ic]);
474 						GET(ic);
475 						if (even_ku)
476 							ic += 0x80;
477 						PUT(jistosj2[ic]);
478 					}
479 #endif  /* RFC1468_MODE */
480 					continue;
481 				} else {	/* input fragment of Kanji */
482 					UNGET();
483 					errno = EINVAL;
484 					retval = (size_t)ERR_RETURN;
485 					goto ret;
486 				}
487 			}
488 		} else {
489 			UNGET();
490 			errno = EILSEQ;
491 			retval = (size_t)ERR_RETURN;
492 			goto ret;
493 		}
494 	}
495 	retval = ileft;
496 ret:
497 	*inbuf = ip;
498 	*inbytesleft = ileft;
499 	*outbuf = (char *)op;
500 	*outbytesleft = oleft;
501 	st->_st_cset = cset;
502 
503 	return (retval);
504 }
505 
506 /*
507  * lookuptbl()
508  * Return the index number if its index-ed number
509  * is the same as dest value.
510  */
511 static unsigned short
512 lookuptbl(unsigned short dest)
513 {
514 	unsigned short tmp;
515 	int i;
516 	int sz = (sizeof (sjtoibmext) / sizeof (sjtoibmext[0]));
517 
518 	for (i = 0; i < sz; i++) {
519 		tmp = (sjtoibmext[i] & 0x7f7f);
520 		if (tmp == dest)
521 			return ((i + 0xfa40 + ((i / 0xc0) * 0x40)));
522 	}
523 	return (PGETA);
524 }
525