1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright(c) 2001, Sun Microsystems, Inc.
23 * All rights reserved.
24 */
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <gb18030_big5hk.h>
30
31 #define NON_ID_CHAR '_' /* non-identified character */
32 #define MSB 0x80
33 #define ONEBYTE 0xff
34
35 #define gbk4_2nd_byte(v) ( (v) >= 0x30 && (v) <= 0x39 )
36 #define gbk4_3rd_byte(v) ( (v) >= 0x81 && (v) <= 0xfe )
37 #define gbk4_4th_byte(v) gbk4_2nd_byte(v)
38
39 int binsearch(unsigned long x, table_t table[], int n);
40 int gbk_2nd_byte(char inbuf);
41 int gbk_to_hkscs(char keepc[], char *buf, size_t buflen);
42 int gbk4_to_hkscs(char keepc[], char *buf, size_t buflen);
43
44 typedef struct _icv_state {
45 char keepc[4]; /* maximum # byte of GB chararor in two bytes area */
46 short cstate;
47 int _errno; /* internal errno */
48 } _iconv_st;
49
50 enum _CSTATE { C0, C1, C2, C3 };
51
52 /*=======================================================
53 *
54 * State Machine for interpreting GBK code
55 *
56 *=======================================================
57 *
58 * 3rd C
59 * C2--------> C3
60 * ^ |
61 * 2nd C | 4th C |
62 * 1st C | |
63 * +--------> C0 ----------> C1 |
64 * | ascii | 2nd C | |
65 * ^ v v V
66 * +----<-----+-----<--------+-----<------+
67 *
68 *=======================================================*/
69 /*
70 * Open; called from iconv_open()
71 */
_icv_open()72 void * _icv_open() {
73 _iconv_st * st;
74
75 if ((st = (_iconv_st *) malloc(sizeof(_iconv_st))) == NULL) {
76 errno = ENOMEM;
77 return ((void *) -1);
78 }
79
80 st->cstate = C0;
81 st->_errno = 0;
82
83 return ((void *) st);
84 }
85
86 /*
87 * Close; called from iconv_close()
88 */
_icv_close(_iconv_st * st)89 void _icv_close(_iconv_st * st) {
90 if (!st)
91 errno = EBADF;
92 else
93 free(st);
94 }
95
96 /*
97 * Actual conversion; called from iconv()
98 */
99
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)100 size_t _icv_iconv(_iconv_st * st, char **inbuf, size_t *inbytesleft,
101 char ** outbuf, size_t *outbytesleft) {
102 int n;
103 if (st == NULL) {
104 errno = EBADF;
105 return ((size_t) -1);
106 }
107
108 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
109 st->cstate = C0;
110 st->_errno = 0;
111 return ((size_t) 0);
112 }
113
114 errno = st->_errno = 0;
115
116 while (*inbytesleft > 0 && *outbytesleft > 0) {
117 switch (st->cstate) {
118 case C0:
119 if (**inbuf & MSB) { /* gb charactor */
120 st->keepc[0] = (**inbuf);
121 st->cstate = C1;
122 } else { /* ASCII */
123 **outbuf = **inbuf;
124 (*outbuf)++;
125 (*outbytesleft)--;
126 }
127 break;
128 case C1: /* GBK charactor 2nd byte */
129 if (gbk_2nd_byte(**inbuf) == 0) {
130 st->keepc[1] = (**inbuf);
131 n = gbk_to_hkscs(st->keepc, *outbuf, *outbytesleft);
132 if (n > 0) {
133 (*outbuf) += n;
134 (*outbytesleft) -= n;
135
136 st->cstate = C0;
137 } else {
138 st->_errno = errno = E2BIG;
139 }
140 } else if ( gbk4_2nd_byte((unsigned char)**inbuf) ) {
141 st->keepc[1] = (**inbuf);
142 st->cstate = C2;
143 } else { /* illegal input, don't reset state */
144 st->_errno = errno = EILSEQ;
145 }
146 break;
147 case C2:
148 if ( gbk4_3rd_byte((unsigned char)**inbuf) ) {
149 st->keepc[2] = (**inbuf);
150 st->cstate = C3;
151 } else {
152 /* illegal input, don't reset state */
153 st->_errno = errno = EILSEQ;
154 }
155 break;
156 case C3:
157 if ( gbk4_4th_byte((unsigned char)**inbuf) ) {
158 st->keepc[3] = (**inbuf);
159
160 n = gbk4_to_hkscs(st->keepc, *outbuf, *outbytesleft);
161 if (n > 0) {
162 (*outbuf) += n;
163 (*outbytesleft) -= n;
164
165 st->cstate = C0;
166 } else {
167 st->_errno = errno = E2BIG;
168 }
169
170 } else {
171 /* illegal input, don't reset state */
172 st->_errno = errno = EILSEQ;
173 }
174 break;
175 default: /* un-reachable */
176 st->_errno = errno = EILSEQ;
177 st->cstate = C0;
178 break;
179 }
180
181 if (st->_errno) break;
182
183 (*inbuf)++;
184 (*inbytesleft)--;
185 }
186
187 if (errno) return ((size_t) -1);
188
189 if ( *inbytesleft == 0 && st->cstate != C0 ) {
190 errno = EINVAL;
191 return ((size_t) -1);
192 }
193
194 if ( *inbytesleft > 0 && *outbytesleft == 0 ) {
195 errno = E2BIG;
196 return ((size_t) -1);
197 }
198
199 return (size_t)(*inbytesleft);
200 }
201
202 /*
203 * Test whether inbuf is a valid character for
204 * 2nd byte of GB2312 charactor:
205 * Return: 0 --- valid GBK 2nd byte
206 * 1 --- invalid GBK 2nd byte
207 */
gbk_2nd_byte(inbuf)208 int gbk_2nd_byte(inbuf)
209 char inbuf;
210 {
211
212 unsigned int buf = (unsigned int) (inbuf & ONEBYTE);
213
214 if ((buf >= 0x40) && (buf <= 0x7e))
215 return 0;
216 if ((buf >= 0x80) && (buf <= 0xfe))
217 return 0;
218 return 1;
219 }
220
221 /*
222 * gbk_to_hkscs: Convert gbk charactor to hkscs.
223 * Return: >0 --- converted with enough space in output buffer
224 * =0 --- no space in outbuf
225 */
226
gbk_to_hkscs(char keepc[],char * buf,size_t buflen)227 int gbk_to_hkscs(char keepc[], char *buf, size_t buflen) {
228
229 unsigned long gbk_val; /* GBK value */
230 int index;
231 unsigned long hkscs_val; /* hkscs value */
232
233 if (buflen < 2) {
234 errno = E2BIG;
235 return 0;
236 }
237
238 gbk_val = ((keepc[0] & ONEBYTE) << 8) + (keepc[1] & ONEBYTE);
239 index = binsearch(gbk_val, gbk_hkscs_tab, GBKMAX);
240 if (index >= 0) {
241 hkscs_val = gbk_hkscs_tab[index].value;
242 *buf = (hkscs_val >> 8) & ONEBYTE;
243 *(buf + 1) = hkscs_val & ONEBYTE;
244 } else
245 *buf = *(buf + 1) = (char)NON_ID_CHAR;
246 return 2;
247 }
248
gbk4_to_hkscs(char keepc[],char * buf,size_t buflen)249 int gbk4_to_hkscs(char keepc[], char *buf, size_t buflen) {
250
251 unsigned long gbk_val; /* GBK value */
252 int index;
253 unsigned long hkscs_val; /* hkscs value */
254
255 if (buflen < 2) {
256 errno = E2BIG;
257 return 0;
258 }
259
260 gbk_val = ((keepc[0] & ONEBYTE) << 24) + ((keepc[1] & ONEBYTE) << 16) +
261 ((keepc[2] & ONEBYTE) << 8 ) + (keepc[3] & ONEBYTE);
262 index = binsearch(gbk_val, gbk4_hkscs_tab, GBK4MAX);
263 if (index >= 0) {
264 hkscs_val = gbk4_hkscs_tab[index].value;
265 *buf = (hkscs_val >> 8) & ONEBYTE;
266 *(buf + 1) = hkscs_val & ONEBYTE;
267 } else
268 *buf = *(buf + 1) = (char)NON_ID_CHAR;
269
270 return 2;
271 }
272
273 /*
274 * binsearch()
275 */
binsearch(unsigned long x,table_t table[],int n)276 int binsearch(unsigned long x, table_t table[], int n) {
277 int low, high, mid;
278
279 low = 0;
280 high = n - 1;
281 while (low <= high) {
282 mid = (low + high) >> 1;
283 if (x < table[mid].key)
284 high = mid - 1;
285 else if (x > table[mid].key)
286 low = mid + 1;
287 else
288 return mid;
289 }
290 return -1;
291 }
292
293 #ifdef DEBUG
main(int argc,char * argv[])294 main(int argc, char * argv[]) {
295 _iconv_st * ist;
296 char * inbuf = "�������е�ÿһ�������һ���Ѱ�װ��ע����������ʾ�� ��Ʒϵ�� ��";
297 char * outbuf;
298 char * ib, * oub;
299 int inbyteleft;
300 int outbyteleft;
301
302 ist = (_iconv_st *) _icv_open();
303 inbyteleft = outbyteleft = 2 * strlen(inbuf);
304 outbuf = (char *)malloc(outbyteleft);
305 ib = inbuf;
306 oub = outbuf;
307 _icv_iconv(ist, &inbuf, &inbyteleft, &outbuf, &outbyteleft);
308 printf("IN -- %s\n", ib);
309 printf("OUT -- %s\n", oub);
310 }
311 #endif
312