xref: /titanic_50/usr/src/lib/iconv_modules/ja/common/PCK_TO_ISO-2022-JP.c (revision 880d797826457b77414b37d531cc3e1aa166ecbe)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 1994-2003 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <euc.h>
30 #include "japanese.h"
31 
32 /*
33  * struct _icv_state; to keep stat
34  */
35 struct _icv_state {
36 	int	_st_cset;
37 };
38 
39 void *
_icv_open()40 _icv_open()
41 {
42 	struct _icv_state *st;
43 
44 	if ((st = (struct _icv_state *)malloc(sizeof (struct _icv_state)))
45 									== NULL)
46 		return ((void *)ERR_RETURN);
47 
48 	st->_st_cset = CS_0;
49 	return (st);
50 }
51 
52 void
_icv_close(struct _icv_state * st)53 _icv_close(struct _icv_state *st)
54 {
55 	free(st);
56 }
57 
58 size_t
_icv_iconv(struct _icv_state * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)59 _icv_iconv(struct _icv_state *st, char **inbuf, size_t *inbytesleft,
60 				char **outbuf, size_t *outbytesleft)
61 {
62 	int		cset;
63 	unsigned char	*ip, ic;
64 	char			*op;
65 	size_t			ileft, oleft;
66 	size_t			retval;
67 #ifdef	RFC1468_MODE
68         unsigned short  zenkaku;
69 #endif
70 
71 	/*
72 	 * If inbuf and/or *inbuf are NULL, reset conversion descriptor
73 	 * and put escape sequence if needed.
74 	 */
75 	if ((inbuf == NULL) || (*inbuf == NULL)) {
76 		if (st->_st_cset != CS_0) {
77 			if ((outbuf != NULL) && (*outbuf != NULL)
78 					&& (outbytesleft != NULL)) {
79 				op = *outbuf;
80 				oleft = *outbytesleft;
81 				if (oleft < SEQ_SBTOG0) {
82 					errno = E2BIG;
83 					return ((size_t)-1);
84 				}
85 				PUT(ESC);
86 				PUT(SBTOG0_1);
87 				PUT(F_X0201_RM);
88 				*outbuf = op;
89 				*outbytesleft = oleft;
90 			}
91 			st->_st_cset = CS_0;
92 		}
93 		return ((size_t)0);
94 	}
95 
96 	cset = st->_st_cset;
97 
98 	ip = (unsigned char *)*inbuf;
99 	op = *outbuf;
100 	ileft = *inbytesleft;
101 	oleft = *outbytesleft;
102 
103 	/*
104 	 * Main loop; basically 1 loop per 1 input byte
105 	 */
106 
107 	while ((int)ileft > 0) {
108 		GET(ic);
109 		if (ISASC((int)ic)) {		/* ASCII */
110 			if (cset != CS_0) {
111 				CHECK2BIG(SEQ_SBTOG0,1);
112 				PUT(ESC);	/* to JIS X 0201 Roman */
113 				PUT(SBTOG0_1);
114 				PUT(F_X0201_RM);
115 			}
116 			cset = CS_0;
117 			CHECK2BIG(JISW0,1);
118 			PUT(ic);
119 			continue;
120 		} else if (ISSJKANA(ic)) {		/* Kana starts */
121 #ifdef  RFC1468_MODE	/* Substitute JIS X 0208 for JIS X 0201 katakana */
122 			if (cset != CS_1) {
123 				CHECK2BIG(SEQ_MBTOG0_O,1);
124 				cset = CS_1;
125 				PUT(ESC);
126 				PUT(MBTOG0_1);
127 				PUT(F_X0208_83_90);
128 			}
129 			CHECK2BIG(JISW1,1);
130 			zenkaku = halfkana2zenkakuj[ic - 0xA1];
131 			ic = (unsigned char)((zenkaku >> 8) & CMASK);
132 			PUT(ic);
133 			ic = (unsigned char)(zenkaku & CMASK);
134 			PUT(ic);
135 #else   /* ISO-2022-JP.UIOSF */
136 			if (cset != CS_2) {
137 				CHECK2BIG(SEQ_SBTOG0,1);
138 				cset = CS_2;
139 				PUT(ESC);
140 				PUT(SBTOG0_1);
141 				PUT(F_X0201_KN);
142 			}
143 			CHECK2BIG(JISW2,1);
144 			PUT(ic & CMASK);
145 #endif  /* RFC1468_MODE */
146 			continue;
147 		} else if (ISSJKANJI1(ic)) {	/* CS_1 Kanji starts */
148 			if ((int)ileft > 0) {
149 				if (ISSJKANJI2(*ip)) {
150 					if (cset != CS_1) {
151 						CHECK2BIG(SEQ_MBTOG0_O,1);
152 						cset = CS_1;
153 						PUT(ESC);
154 						PUT(MBTOG0_1);
155 						PUT(F_X0208_83_90);
156 					}
157 					CHECK2BIG(JISW1,1);
158 #ifdef  RFC1468_MODE /* Convert VDC and UDC to GETA */
159 					if ((ic == 0x87) || (0xed <= ic )){
160 						PUT((JGETA >> 8) & CMASK);
161 						GET(ic); /* Get dummy */
162 						PUT(JGETA & CMASK);
163 						continue;
164 					}
165 #endif  /* RFC1468_MODE */
166 					ic = sjtojis1[(ic - 0x80)];
167 					if (*ip >= 0x9f) {
168 						ic++;
169 					}
170 					PUT(ic);
171 					GET(ic);
172 					ic = sjtojis2[ic];
173 					PUT(ic);
174 					continue;
175 				} else {	/* 2nd byte is illegal */
176 					UNGET();
177 					errno = EILSEQ;
178 					retval = (size_t)ERR_RETURN;
179 					goto ret;
180 				}
181 			} else {		/* input fragment of Kanji */
182 				UNGET();
183 				errno = EINVAL;
184 				retval = (size_t)ERR_RETURN;
185 				goto ret;
186 			}
187 		} else if (ISSJSUPKANJI1(ic)) {	/* CS_3 Kanji starts */
188 			if ((int)ileft > 0) {
189 				if (ISSJKANJI2(*ip)) {
190 #ifdef  RFC1468_MODE	/* Substitute JIS X 0208 "Geta" for JIS X 0212 */
191 					if (cset != CS_1) {
192 						CHECK2BIG(SEQ_MBTOG0_O,1);
193 						cset = CS_1;
194 						PUT(ESC);
195 						PUT(MBTOG0_1);
196 						PUT(F_X0208_83_90);
197 					}
198 					CHECK2BIG(JISW1,1);
199 					/* Put GETA (0x222e) */
200 					ic = (unsigned char)((JGETA >> 8) &
201 					CMASK);
202 					PUT(ic);
203 					ic = (unsigned char)(JGETA & CMASK);
204 					PUT(ic);
205 					GET(ic); /* dummy GET */
206 #else   /* ISO-2022-JP.UIOSF */
207 					if (cset != CS_3) {
208 						CHECK2BIG(SEQ_MBTOG0,1);
209 						cset = CS_3;
210 						PUT(ESC);
211 						PUT(MBTOG0_1);
212 						PUT(MBTOG0_2);
213 						PUT(F_X0212_90);
214 					}
215 					CHECK2BIG(JISW3,1);
216 					ic = sjtojis1[(ic - 0x80)];
217 					if (*ip >= 0x9f) {
218 						ic++;
219 					}
220 					PUT(ic);
221 					GET(ic);
222 					ic = sjtojis2[ic];
223 					PUT(ic);
224 #endif  /* RFC1468_MODE */
225 					continue;
226 				} else {	/* 2nd byte is illegal */
227 					UNGET();
228 					errno = EILSEQ;
229 					retval = (size_t)ERR_RETURN;
230 					goto ret;
231 				}
232 			} else {		/* input fragment of Kanji */
233 				UNGET();
234 				errno = EINVAL;
235 				retval = (size_t)ERR_RETURN;
236 				goto ret;
237 			}
238 		} else if (ISSJIBM(ic) || /* Extended IBM char. area */
239 			ISSJNECIBM(ic)) { /* NEC/IBM char. area */
240 			/*
241 			 * We need a special treatment for each codes.
242 			 * By adding some offset number for them, we
243 			 * can process them as the same way of that of
244 			 * extended IBM chars.
245 			 */
246 			if ((int)ileft > 0) {
247 				if (ISSJKANJI2(*ip)) {
248 					unsigned short dest;
249 					dest = (ic << 8);
250 					GET(ic);
251 					dest += ic;
252 					if ((0xed40 <= dest) &&
253 						(dest <= 0xeffc)) {
254 						REMAP_NEC(dest);
255 						if (dest == 0xffff) {
256 							goto ill_ibm;
257 						}
258 					}
259 					/*
260 					 * XXX: 0xfa54 and 0xfa5b must be mapped
261 					 *	to JIS0208 area. Therefore we
262 					 *	have to do special treatment.
263 					 */
264 					if ((cset != CS_1) &&
265 						((dest == 0xfa54) ||
266 						(dest == 0xfa5b))) {
267 						CHECK2BIG(SEQ_MBTOG0_O,2);
268 						cset = CS_1;
269 						PUT(ESC);
270 						PUT(MBTOG0_1);
271 						PUT(F_X0208_83_90);
272 						CHECK2BIG(JISW1,2);
273 						if (dest == 0xfa54) {
274 							PUT(0x22);
275 							PUT(0x4c);
276 						} else {
277 							PUT(0x22);
278 							PUT(0x68);
279 						}
280 						continue;
281 					}
282 					if (cset != CS_3) {
283 						CHECK2BIG(SEQ_MBTOG0,2);
284 						cset = CS_3;
285 						PUT(ESC);
286 						PUT(MBTOG0_1);
287 						PUT(MBTOG0_2);
288 						PUT(F_X0212_90);
289 					}
290 					CHECK2BIG(JISW3,2);
291 					dest = dest - 0xfa40 -
292 						(((dest>>8) - 0xfa) * 0x40);
293 					dest = sjtoibmext[dest];
294 					if (dest == 0xffff) {
295 						/*
296 						 * Illegal code points
297 						 * in IBM-EXT area.
298 						 */
299 ill_ibm:
300 						UNGET();
301 						UNGET();
302 						errno = EILSEQ;
303 						retval = (size_t)ERR_RETURN;
304 						goto ret;
305 					}
306 					PUT(((dest>>8) & 0x7f));
307 					PUT(dest & 0x7f);
308 					continue;
309 				} else {	/* 2nd byte is illegal */
310 					UNGET();
311 					errno = EILSEQ;
312 					retval = (size_t)ERR_RETURN;
313 					goto ret;
314 				}
315 			} else {		/* input fragment of Kanji */
316 				UNGET();
317 				errno = EINVAL;
318 				retval = (size_t)ERR_RETURN;
319 				goto ret;
320 			}
321 		} else if ((0xeb <= ic) && (ic <= 0xec)) {
322 		/*
323 		 * Based on the draft convention of OSF-JVC CDEWG,
324 		 * characters in this area will be mapped to
325 		 * "CHIKAN-MOJI." (convertible character)
326 		 * So far, we'll use (0x222e) for it.
327 		 */
328 			if ((int)ileft > 0) {
329 				if (ISSJKANJI2(*ip)) {
330 					if (cset != CS_1) {
331 						CHECK2BIG(SEQ_MBTOG0_O,1);
332 						cset = CS_1;
333 						PUT(ESC);
334 						PUT(MBTOG0_1);
335 						PUT(F_X0208_83_90);
336 					}
337 					CHECK2BIG(JISW1,1);
338 					GET(ic); /* Dummy */
339 					PUT((JGETA>>8) & CMASK);
340 					PUT(JGETA & CMASK);
341 					continue;
342 				} else {	/* 2nd byte is illegal */
343 					UNGET();
344 					errno = EILSEQ;
345 					retval = (size_t)ERR_RETURN;
346 					goto ret;
347 				}
348 			} else {		/* input fragment of Kanji */
349 				UNGET();
350 				errno = EINVAL;
351 				retval = (size_t)ERR_RETURN;
352 				goto ret;
353 			}
354 		} else {			/* 1st byte is illegal */
355 			UNGET();
356 			errno = EILSEQ;
357 			retval = (size_t)ERR_RETURN;
358 			goto ret;
359 		}
360 	}
361 	retval = ileft;
362 ret:
363 	*inbuf = (char *)ip;
364 	*inbytesleft = ileft;
365 	*outbuf = op;
366 	*outbytesleft = oleft;
367 	st->_st_cset = cset;
368 
369 	return (retval);
370 }
371