1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1994 by Sun Microsystems, Inc.
23 */
24
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <errno.h>
28 #include <gb2312_unicode.h>
29
30 #define MSB 0x80
31
32 #define UTF8_NON_ID_CHAR1 0xEF
33 #define UTF8_NON_ID_CHAR2 0xBF
34 #define UTF8_NON_ID_CHAR3 0xBD
35
36
37 enum _GSTATE { G0, G1, G2, G3, G4, G5};
38
39
40 typedef struct _icv_state {
41 char _lastc;
42 short _gstate;
43 } _iconv_st;
44
45 int
46 hz2utf8(char in_byte1, char in_byte2, char *buf, int buflen);
47
48 /*
49 * Open; called from iconv_open()
50 */
51 void *
_icv_open()52 _icv_open()
53 {
54 _iconv_st *st;
55
56 if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
57 errno = ENOMEM;
58 return ((void *) -1);
59 }
60
61 st->_gstate = G0;
62 return ((void *)st);
63 }
64
65
66 /*
67 * Close; called from iconv_close()
68 */
69 void
_icv_close(_iconv_st * st)70 _icv_close(_iconv_st *st)
71 {
72 if (st == NULL)
73 errno = EBADF;
74 else
75 free(st);
76 }
77
78
79 /*
80 * Actual conversion; called from iconv()
81 */
82 /*=======================================================================
83 *
84 * ~ { Chinese
85 * +-> G0 -----> G1 ----> G2 ----> G3
86 * | | ascii | ascii |~} |
87 * +----------------------+--------+
88 *=======================================================================*/
89 size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)90 _icv_iconv(_iconv_st *st, char **inbuf, size_t*inbytesleft,
91 char **outbuf, size_t*outbytesleft)
92 {
93 int n;
94
95 if (st == NULL) {
96 errno = EBADF;
97 return -1;
98 }
99 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
100 st->_gstate = G0;
101 return 0;
102 }
103
104 errno = 0;
105
106 while (*inbytesleft > 0 && *outbytesleft > 0) {
107 switch (st->_gstate) {
108 case G0:
109 if ( **inbuf == '~' ) {
110 st->_gstate = G1;
111 } else if (((**inbuf) & MSB) == 0) { /* ASCII */
112 **outbuf = **inbuf;
113 (*outbuf)++, (*outbytesleft)--;
114 }
115 break;
116 case G1:
117 if ( **inbuf == '{' ) {
118 st->_gstate = G2;
119 } else if (**inbuf == '\n') {
120 st->_gstate = G0;
121 } else if (**inbuf == '~') {
122 **outbuf = '~';
123 (*outbuf)++, (*outbytesleft)--;
124 st->_gstate = G0;
125 } else {
126 errno = EINVAL;
127 }
128 break;
129 case G2:
130 if ( **inbuf == '~' ) {
131 st->_gstate = G4;
132 } else {
133 st->_lastc = **inbuf;
134 st->_gstate = G3;
135 }
136 break;
137 case G3:
138 n = hz2utf8(st->_lastc, **inbuf, *outbuf, *outbytesleft);
139 if (n > 0) {
140 (*outbuf) += n, (*outbytesleft) -= n;
141 } else {
142 errno = E2BIG;
143 return (size_t)-1;
144 }
145 st->_gstate = G2;
146 break;
147 case G4:
148 if ( **inbuf == '}' ) {
149 st->_gstate = G0;
150 } else if (**inbuf == '\n') {
151 st->_gstate = G2;
152 continue;
153 } else {
154 errno = EINVAL;
155 }
156
157 break;
158 }
159
160 (*inbuf)++, (*inbytesleft)--;
161 if (errno)
162 {
163 return (size_t)-1;
164 }
165 }
166
167 if (*inbytesleft > 0 && *outbytesleft == 0) {
168 errno = E2BIG;
169 return (size_t)-1;
170 }
171 return ((size_t)(*inbytesleft));
172 }
173
174
175 int
hz2utf8(in_byte1,in_byte2,buf,buflen)176 hz2utf8(in_byte1, in_byte2, buf, buflen)
177 char in_byte1, in_byte2;
178 char *buf;
179 int buflen;
180 {
181
182 int idx;
183 int unicode;
184
185 if ( buflen < 2 )
186 return 0;
187 in_byte1 |= MSB;
188 in_byte2 |= MSB;
189
190
191 idx = (((in_byte1 & 0xff) - 0xa1) * 94) + (in_byte2 & 0xff) - 0xa1;
192 if (idx >= 0) {
193 unicode = Unicode[idx];
194 if (unicode >= 0x0080 && unicode <= 0x07ff) {
195 if ( buflen < 2 )
196 return 0;
197 *buf = ((unicode >> 6) & 0x1f) | 0xc0;
198 *(buf+1) = (unicode & 0x3f) | MSB;
199 return 2;
200 }
201 if (unicode >= 0x0800 && unicode <= 0xffff) {
202 if ( buflen < 3 )
203 return 0;
204 *buf = ((unicode >> 12) & 0x0f) | 0xe0;
205 *(buf+1) = ((unicode >> 6) & 0x3f) | MSB;
206 *(buf+2) = (unicode & 0x3f) | MSB;
207 return 3;
208 }
209 }
210 if ( buflen < 3 )
211 return 0;
212
213 *buf = UTF8_NON_ID_CHAR1;
214 *(buf+1) = UTF8_NON_ID_CHAR2;
215 *(buf+2) = UTF8_NON_ID_CHAR3;
216 return 3;
217 }
218