1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1995, by Sun Microsystems, Inc.
24 * All rights reserved.
25 */
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <errno.h>
30
31 #define MSB 0x80 /* most significant bit */
32 #define MBYTE 0x8e /* multi-byte (4 byte character) */
33 #define PMASK 0xa0 /* plane number mask */
34 #define ONEBYTE 0xff /* right most byte */
35 #define MSB_OFF 0x7f /* mask off MSB */
36
37 #define SI 0x0f /* shift in */
38 #define SO 0x0e /* shift out */
39 #define ESC 0x1b /* escape */
40
41 /* static const char plane_char[] = "0GH23456789:;<=>?"; */
42 static const char plane_char[] = "0GHIJKLMNOPQRSTUV";
43
44 #define GET_PLANEC(i) (plane_char[i])
45
46 #define NON_ID_CHAR '_' /* non-identified character */
47
48 typedef struct _icv_state {
49 char keepc[4]; /* maximum # byte of CNS11643 code */
50 short cstate; /* state machine id (CNS) */
51 short istate; /* state machine id (ISO) */
52 short plane_no; /* plane no */
53 short SOset; /* So is set */
54 short SS2set; /* SS2 is set */
55 char SS3char; /* SS3 char. */
56 int _errno; /* internal errno */
57 } _iconv_st;
58
59 enum _CSTATE { C0, C1, C2, C3, C4 };
60 enum _ISTATE { IN, OUT };
61 enum _truefalse { False, True };
62
63
64 static int get_plane_no_by_char(const char);
65
66 /*
67 * Open; called from iconv_open()
68 */
69 void *
_icv_open()70 _icv_open()
71 {
72 _iconv_st *st;
73
74 if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
75 errno = ENOMEM;
76 return ((void *) -1);
77 }
78
79 st->cstate = C0;
80 st->istate = IN;
81 st->_errno = 0;
82 st->plane_no = -1;
83 st->SOset = False;
84 st->SS2set = False;
85 st->SS3char = '0';
86
87 #ifdef DEBUG
88 fprintf(stderr, "========== iconv(): CNS11643 --> ISO 2022-CN ==========\n");
89 #endif
90
91 return ((void *) st);
92 }
93
94
95 /*
96 * Close; called from iconv_close()
97 */
98 void
_icv_close(_iconv_st * st)99 _icv_close(_iconv_st *st)
100 {
101 if (!st)
102 errno = EBADF;
103 else
104 free(st);
105 }
106
107
108 /*
109 * Actual conversion; called from iconv()
110 */
111 /*=======================================================
112 *
113 * State Machine for interpreting CNS 11643 code
114 *
115 *=======================================================
116 *
117 * (ESC,SO) plane 2 - 16
118 * 1st C 2nd C 3rd C
119 * +------> C0 -----> C1 -----------> C2 -----> C3
120 * | ascii | plane 1 | 4th C |
121 * ^ | 2nd C v v
122 * | | C4 <------<--------<-------+
123 * | v | (SI)
124 * +----<---+-----<----v
125 *
126 *=======================================================*/
127 #define LEFT_CHECK(i) if (*outbytesleft < i) {\
128 st->_errno = errno = E2BIG;\
129 return((size_t)-1);\
130 } else\
131 (*outbytesleft) -= i
132 #define BUF_INPUT(c1, c2, c3, c4)\
133 *(*outbuf)++ = c1;\
134 *(*outbuf)++ = c2;\
135 *(*outbuf)++ = c3;\
136 *(*outbuf)++ = c4
137 size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)138 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
139 char **outbuf, size_t *outbytesleft)
140 {
141 if (st == NULL) {
142 errno = EBADF;
143 return ((size_t) -1);
144 }
145
146 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
147 if (st->istate == OUT) {
148 if (outbytesleft && *outbytesleft >= 1 && outbuf && *outbuf) {
149 **outbuf = SI;
150 (*outbuf)++;
151 (*outbytesleft)--;
152 } else {
153 errno = E2BIG;
154 return((size_t) -1);
155 }
156 }
157 st->cstate = C0;
158 st->istate = IN;
159 st->_errno = 0;
160 st->plane_no = -1;
161 st->SOset = False;
162 st->SS2set = False;
163 st->SS3char = '0';
164 return ((size_t) 0);
165 }
166
167 #ifdef DEBUG
168 fprintf(stderr, "=== (Re-entry) iconv(): CNS11643 --> ISO 2022-CN ===\n");
169 fprintf(stderr, "st->cstate=%d\tst->istate=%d\tst->_errno=%d\tplane_no=%d\n",
170 st->cstate, st->istate, st->_errno, st->plane_no);
171 #endif
172 st->_errno = 0; /* reset internal errno */
173 errno = 0; /* reset external errno */
174
175 /* a state machine for interpreting CNS 11643 code */
176 while (*inbytesleft > 0 && *outbytesleft > 0) {
177 switch (st->cstate) {
178 case C0: /* assuming ASCII in the beginning */
179 if (**inbuf & MSB) {
180 st->keepc[0] = (**inbuf);
181 st->cstate = C1;
182 } else { /* real ASCII */
183 if (st->istate == OUT) {
184 st->istate = IN;
185 *(*outbuf)++ = SI;
186 (*outbytesleft)--;
187 if (*outbytesleft <= 0) {
188 errno = E2BIG;
189 return ((size_t) -1);
190 }
191 }
192 *(*outbuf)++ = **inbuf;
193 (*outbytesleft)--;
194 if (**inbuf == '\n') {
195 st->SOset = False;
196 st->SS2set = False;
197 st->SS3char = '0';
198 }
199 }
200 break;
201 case C1: /* Chinese characters: 2nd byte */
202 if ((st->keepc[0] & ONEBYTE) == MBYTE) { /* 4-byte (0x8e) */
203 st->plane_no = get_plane_no_by_char(**inbuf);
204 if (st->plane_no == -1) { /* illegal plane */
205 st->cstate = C0;
206 st->istate = IN;
207 st->_errno = errno = EILSEQ;
208 } else { /* 4-byte Chinese character */
209 st->cstate = C2;
210 st->keepc[1] = (**inbuf);
211 }
212 } else { /* 2-byte Chinese character - plane #1 */
213 if (**inbuf & MSB) { /* plane #1 */
214 st->cstate = C4;
215 st->keepc[1] = (**inbuf);
216 st->plane_no = 1;
217 continue; /* should not advance *inbuf */
218 } else { /* input char doesn't belong
219 * to the input code set */
220 st->cstate = C0;
221 st->istate = IN;
222 st->_errno = errno = EINVAL;
223 }
224 }
225 break;
226 case C2: /* plane #2 - #16 (4 bytes): get 3nd byte */
227 if (**inbuf & MSB) { /* 3rd byte */
228 st->keepc[2] = (**inbuf);
229 st->cstate = C3;
230 } else {
231 st->_errno = errno = EINVAL;
232 st->cstate = C0;
233 }
234 break;
235 case C3: /* plane #2 - #16 (4 bytes): get 4th byte */
236 if (**inbuf & MSB) { /* 4th byte */
237 st->cstate = C4;
238 st->keepc[3] = (**inbuf);
239 continue; /* should not advance *inbuf */
240 } else {
241 st->_errno = errno = EINVAL;
242 st->cstate = C0;
243 }
244 break;
245 case C4: /* Convert code from CNS 11643 to ISO 2022-CN */
246 if (st->plane_no == 1) {
247 if (st->istate == IN) {
248 if (st->SOset == False) {
249 LEFT_CHECK(4);
250 BUF_INPUT(ESC, '$', ')', 'G');
251 st->SOset = True;
252 }
253 LEFT_CHECK(1);
254 *(*outbuf)++ = SO;
255 st->istate = OUT;
256 }
257 LEFT_CHECK(2);
258 *(*outbuf)++ = st->keepc[0] & MSB_OFF;
259 *(*outbuf)++ = st->keepc[1] & MSB_OFF;
260
261 } else if (st->plane_no == 2) {
262 if (st->SS2set == False) {
263 LEFT_CHECK(4);
264 BUF_INPUT(ESC, '$', '*', 'H');
265 st->SS2set = True;
266 }
267 LEFT_CHECK(4);
268 BUF_INPUT(ESC, 0x4E, st->keepc[2] & MSB_OFF, st->keepc[3] & MSB_OFF);
269 } else {
270 if (st->SS3char != GET_PLANEC(st->plane_no)) {
271 LEFT_CHECK(4);
272 st->SS3char = GET_PLANEC(st->plane_no);
273 BUF_INPUT(ESC, '$', '+', st->SS3char);
274 }
275 LEFT_CHECK(4);
276 BUF_INPUT(ESC, 0x4F, st->keepc[2] & MSB_OFF, st->keepc[3] & MSB_OFF);
277 }
278 st->cstate = C0;
279 break;
280 default: /* should never come here */
281 st->_errno = errno = EILSEQ;
282 st->cstate = C0; /* reset state */
283 break;
284 }
285
286 (*inbuf)++;
287 (*inbytesleft)--;
288
289 if (st->_errno) {
290 #ifdef DEBUG
291 fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\n",
292 st->_errno, st->cstate);
293 #endif
294 break;
295 }
296 if (errno) {
297 return((size_t)-1);
298 }
299
300 }
301
302 if (*inbytesleft > 0 && *outbytesleft == 0) {
303 errno = E2BIG;
304 return ((size_t)-1);
305 }
306
307 return (*inbytesleft);
308 }
309
310
311 /*
312 * Get plane number by char; i.e. 0xa2 returns 2, 0xae returns 14, etc.
313 * Returns -1 on error conditions
314 */
get_plane_no_by_char(const char inbuf)315 static int get_plane_no_by_char(const char inbuf)
316 {
317 int ret;
318 unsigned char uc = (unsigned char) inbuf;
319
320 ret = uc - PMASK;
321 switch (ret) {
322 case 1: /* 0x8EA1 */
323 case 2: /* 0x8EA2 */
324 case 3: /* 0x8EA3 */
325 case 4: /* 0x8EA4 */
326 case 5: /* 0x8EA5 */
327 case 6: /* 0x8EA6 */
328 case 7: /* 0x8EA7 */
329 case 12: /* 0x8EAC */
330 case 14: /* 0x8EAE */
331 case 15: /* 0x8EAF */
332 case 16: /* 0x8EB0 */
333 return (ret);
334 default:
335 return (-1);
336 }
337 }
338