xref: /illumos-gate/usr/src/lib/iconv_modules/zh/common/zh_TW-euc%zh_TW-iso2022-CN-EXT.c (revision 16d8656330ae5622ec32e5007f62145ebafdc50f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1995, by Sun Microsystems, Inc.
24  * All rights reserved.
25  */
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <errno.h>
30 
31 #define	MSB	0x80	/* most significant bit */
32 #define	MBYTE	0x8e	/* multi-byte (4 byte character) */
33 #define	PMASK	0xa0	/* plane number mask */
34 #define ONEBYTE	0xff	/* right most byte */
35 #define MSB_OFF	0x7f	/* mask off MSB */
36 
37 #define SI      0x0f    /* shift in */
38 #define SO      0x0e    /* shift out */
39 #define ESC     0x1b    /* escape */
40 
41 /* static const char plane_char[] = "0GH23456789:;<=>?"; */
42 static const char plane_char[] = "0GHIJKLMNOPQRSTUV";
43 
44 #define GET_PLANEC(i)   (plane_char[i])
45 
46 #define NON_ID_CHAR '_'	/* non-identified character */
47 
48 typedef struct _icv_state {
49 	char	keepc[4];	/* maximum # byte of CNS11643 code */
50 	short	cstate;		/* state machine id (CNS) */
51 	short	istate;		/* state machine id (ISO) */
52 	short	plane_no;	/* plane no */
53 	short	SOset;		/* So is set */
54 	short	SS2set;		/* SS2 is set */
55 	char	SS3char;	/* SS3 char. */
56 	int	_errno;		/* internal errno */
57 } _iconv_st;
58 
59 enum _CSTATE	{ C0, C1, C2, C3, C4 };
60 enum _ISTATE    { IN, OUT };
61 enum _truefalse	{ False, True };
62 
63 
64 static int get_plane_no_by_char(const char);
65 
66 /*
67  * Open; called from iconv_open()
68  */
69 void *
_icv_open()70 _icv_open()
71 {
72 	_iconv_st *st;
73 
74 	if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
75 		errno = ENOMEM;
76 		return ((void *) -1);
77 	}
78 
79 	st->cstate = C0;
80 	st->istate = IN;
81 	st->_errno = 0;
82 	st->plane_no = -1;
83 	st->SOset = False;
84 	st->SS2set = False;
85 	st->SS3char = '0';
86 
87 #ifdef DEBUG
88     fprintf(stderr, "==========     iconv(): CNS11643 --> ISO 2022-CN     ==========\n");
89 #endif
90 
91 	return ((void *) st);
92 }
93 
94 
95 /*
96  * Close; called from iconv_close()
97  */
98 void
_icv_close(_iconv_st * st)99 _icv_close(_iconv_st *st)
100 {
101 	if (!st)
102 		errno = EBADF;
103 	else
104 		free(st);
105 }
106 
107 
108 /*
109  * Actual conversion; called from iconv()
110  */
111 /*=======================================================
112  *
113  *   State Machine for interpreting CNS 11643 code
114  *
115  *=======================================================
116  *
117  *               (ESC,SO)   plane 2 - 16
118  *                1st C         2nd C       3rd C
119  *    +------> C0 -----> C1 -----------> C2 -----> C3
120  *    |  ascii |  plane 1 |                   4th C |
121  *    ^        |  2nd C   v                         v
122  *    |        |         C4 <------<--------<-------+
123  *    |        v          | (SI)
124  *    +----<---+-----<----v
125  *
126  *=======================================================*/
127 #define LEFT_CHECK(i)		if (*outbytesleft < i) {\
128 				    st->_errno = errno = E2BIG;\
129 				    return((size_t)-1);\
130 				} else\
131 				    (*outbytesleft) -= i
132 #define BUF_INPUT(c1, c2, c3, c4)\
133 				*(*outbuf)++ = c1;\
134 				*(*outbuf)++ = c2;\
135 				*(*outbuf)++ = c3;\
136 				*(*outbuf)++ = c4
137 size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)138 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
139 				char **outbuf, size_t *outbytesleft)
140 {
141 	if (st == NULL) {
142 	    errno = EBADF;
143 	    return ((size_t) -1);
144 	}
145 
146 	if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
147 	    if (st->istate == OUT) {
148 		if (outbytesleft && *outbytesleft >= 1 && outbuf && *outbuf) {
149 		    **outbuf = SI;
150 		    (*outbuf)++;
151 		    (*outbytesleft)--;
152 		} else {
153 		    errno = E2BIG;
154 		    return((size_t) -1);
155 		}
156 	    }
157 	    st->cstate = C0;
158 	    st->istate = IN;
159 	    st->_errno = 0;
160 	    st->plane_no = -1;
161 	    st->SOset = False;
162 	    st->SS2set = False;
163 	    st->SS3char = '0';
164 	    return ((size_t) 0);
165 	}
166 
167 #ifdef DEBUG
168     fprintf(stderr, "=== (Re-entry)     iconv(): CNS11643 --> ISO 2022-CN     ===\n");
169     fprintf(stderr, "st->cstate=%d\tst->istate=%d\tst->_errno=%d\tplane_no=%d\n",
170 	st->cstate, st->istate, st->_errno, st->plane_no);
171 #endif
172 	st->_errno = 0;         /* reset internal errno */
173 	errno = 0;		/* reset external errno */
174 
175 	/* a state machine for interpreting CNS 11643 code */
176 	while (*inbytesleft > 0 && *outbytesleft > 0) {
177 	    switch (st->cstate) {
178 	    case C0:		/* assuming ASCII in the beginning */
179 		if (**inbuf & MSB) {
180 		    st->keepc[0] = (**inbuf);
181 		    st->cstate = C1;
182 		} else {	/* real ASCII */
183 		    if (st->istate == OUT) {
184 			st->istate = IN;
185 			*(*outbuf)++ = SI;
186 			(*outbytesleft)--;
187 			if (*outbytesleft <= 0) {
188 			    errno = E2BIG;
189 			    return ((size_t) -1);
190 			}
191 		    }
192 		    *(*outbuf)++ = **inbuf;
193 		    (*outbytesleft)--;
194 		    if (**inbuf == '\n') {
195 			st->SOset = False;
196 			st->SS2set = False;
197 			st->SS3char = '0';
198 		    }
199 		}
200 		break;
201 	    case C1:		/* Chinese characters: 2nd byte */
202 		if ((st->keepc[0] & ONEBYTE) == MBYTE) { /* 4-byte (0x8e) */
203 		    st->plane_no = get_plane_no_by_char(**inbuf);
204 		    if (st->plane_no == -1) {	/* illegal plane */
205 			st->cstate = C0;
206 			st->istate = IN;
207 			st->_errno = errno = EILSEQ;
208 		    } else {	/* 4-byte Chinese character */
209 			st->cstate = C2;
210 			st->keepc[1] = (**inbuf);
211 		    }
212 		} else {	/* 2-byte Chinese character - plane #1 */
213 		    if (**inbuf & MSB) {	/* plane #1 */
214 			st->cstate = C4;
215 			st->keepc[1] = (**inbuf);
216 			st->plane_no = 1;
217 			continue;       /* should not advance *inbuf */
218 		    } else {	/* input char doesn't belong
219 				     * to the input code set */
220 			st->cstate = C0;
221 			st->istate = IN;
222 			st->_errno = errno = EINVAL;
223 		    }
224 		}
225 		break;
226 	    case C2:	/* plane #2 - #16 (4 bytes): get 3nd byte */
227 		if (**inbuf & MSB) {	/* 3rd byte */
228 		    st->keepc[2] = (**inbuf);
229 		    st->cstate = C3;
230 		} else {
231 		    st->_errno = errno = EINVAL;
232 		    st->cstate = C0;
233 		}
234 		break;
235 	    case C3:	/* plane #2 - #16 (4 bytes): get 4th byte */
236 		if (**inbuf & MSB) {	/* 4th byte */
237 		    st->cstate = C4;
238 		    st->keepc[3] = (**inbuf);
239 		    continue;       /* should not advance *inbuf */
240 		} else {
241 		    st->_errno = errno = EINVAL;
242 		    st->cstate = C0;
243 		}
244 		break;
245 	    case C4:	/* Convert code from CNS 11643 to ISO 2022-CN */
246 		if (st->plane_no == 1) {
247 		    if (st->istate == IN) {
248 			if (st->SOset == False) {
249 			    LEFT_CHECK(4);
250 			    BUF_INPUT(ESC, '$', ')', 'G');
251 			    st->SOset = True;
252 			}
253 			LEFT_CHECK(1);
254 			*(*outbuf)++ = SO;
255 			st->istate = OUT;
256 		    }
257 		    LEFT_CHECK(2);
258 		    *(*outbuf)++ = st->keepc[0] & MSB_OFF;
259 		    *(*outbuf)++ = st->keepc[1] & MSB_OFF;
260 
261 		} else if (st->plane_no == 2) {
262 		    if (st->SS2set == False) {
263 		        LEFT_CHECK(4);
264 			BUF_INPUT(ESC, '$', '*', 'H');
265 			st->SS2set = True;
266 		    }
267 		    LEFT_CHECK(4);
268 		    BUF_INPUT(ESC, 0x4E, st->keepc[2] & MSB_OFF, st->keepc[3] & MSB_OFF);
269 		} else {
270 		    if (st->SS3char != GET_PLANEC(st->plane_no)) {
271 			LEFT_CHECK(4);
272 			st->SS3char = GET_PLANEC(st->plane_no);
273 			BUF_INPUT(ESC, '$', '+', st->SS3char);
274 		    }
275 		    LEFT_CHECK(4);
276 		    BUF_INPUT(ESC, 0x4F, st->keepc[2] & MSB_OFF, st->keepc[3] & MSB_OFF);
277 		}
278 		st->cstate = C0;
279 		break;
280 	    default:			/* should never come here */
281 		st->_errno = errno = EILSEQ;
282 		st->cstate = C0;	/* reset state */
283 		break;
284 	    }
285 
286 	    (*inbuf)++;
287 	    (*inbytesleft)--;
288 
289 	    if (st->_errno) {
290 #ifdef DEBUG
291     fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\n",
292 		st->_errno, st->cstate);
293 #endif
294 		break;
295 	    }
296 	    if (errno) {
297 		return((size_t)-1);
298 	    }
299 
300 	}
301 
302 	if (*inbytesleft > 0 && *outbytesleft == 0) {
303 	    errno = E2BIG;
304 	    return ((size_t)-1);
305 	}
306 
307 	return (*inbytesleft);
308 }
309 
310 
311 /*
312  * Get plane number by char; i.e. 0xa2 returns 2, 0xae returns 14, etc.
313  * Returns -1 on error conditions
314  */
get_plane_no_by_char(const char inbuf)315 static int get_plane_no_by_char(const char inbuf)
316 {
317 	int ret;
318 	unsigned char uc = (unsigned char) inbuf;
319 
320 	ret = uc - PMASK;
321 	switch (ret) {
322 	case 1:		/* 0x8EA1 */
323 	case 2:		/* 0x8EA2 */
324 	case 3:		/* 0x8EA3 */
325 	case 4:		/* 0x8EA4 */
326 	case 5:		/* 0x8EA5 */
327 	case 6:		/* 0x8EA6 */
328 	case 7:		/* 0x8EA7 */
329 	case 12:	/* 0x8EAC */
330 	case 14:	/* 0x8EAE */
331 	case 15:	/* 0x8EAF */
332 	case 16:	/* 0x8EB0 */
333 		return (ret);
334 	default:
335 		return (-1);
336 	}
337 }
338