1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright(c) 1997, Sun Microsystems, Inc.
23 * All rights reserved.
24 */
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <gb18030_big5.h>
30
31 #define NON_ID_CHAR '_' /* non-identified character */
32 #define MSB 0x80
33 #define ONEBYTE 0xff
34
35 #define gbk4_2nd_byte(v) ( (v) >= 0x30 && (v) <= 0x39 )
36 #define gbk4_3rd_byte(v) ( (v) >= 0x81 && (v) <= 0xfe )
37 #define gbk4_4th_byte(v) gbk4_2nd_byte(v)
38
39 typedef struct _icv_state {
40 char keepc[2]; /* maximum # byte of GB chararor in two bytes area */
41 short cstate;
42 int _errno; /* internal errno */
43 } _iconv_st;
44
45 enum _CSTATE { C0, C1, C2, C3 };
46
47 int binsearch(unsigned long x, table_t table[], int n);
48 int gbk_2nd_byte(char inbuf);
49 int gbk_to_big5(char keepc[], char *buf, size_t buflen);
50
51 /*=======================================================
52 *
53 * State Machine for interpreting GBK code
54 *
55 *=======================================================
56 *
57 * 3rd C
58 * C2--------> C3
59 * ^ |
60 * 2nd C | 4th C |
61 * 1st C | |
62 * +--------> C0 ----------> C1 |
63 * | ascii | 2nd C | |
64 * ^ v v V
65 * +----<-----+-----<--------+-----<------+
66 *
67 *=======================================================*/
68 /*
69 * Open; called from iconv_open()
70 */
_icv_open()71 void * _icv_open() {
72 _iconv_st * st;
73
74 if ((st = (_iconv_st *) malloc(sizeof(_iconv_st))) == NULL) {
75 errno = ENOMEM;
76 return ((void *) -1);
77 }
78
79 st->cstate = C0;
80 st->_errno = 0;
81
82 return ((void *) st);
83 }
84
85 /*
86 * Close; called from iconv_close()
87 */
_icv_close(_iconv_st * st)88 void _icv_close(_iconv_st * st) {
89 if (!st)
90 errno = EBADF;
91 else
92 free(st);
93 }
94
95 /*
96 * Actual conversion; called from iconv()
97 */
98
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)99 size_t _icv_iconv(_iconv_st * st, char **inbuf, size_t *inbytesleft,
100 char ** outbuf, size_t *outbytesleft) {
101 int n;
102 if (st == NULL) {
103 errno = EBADF;
104 return ((size_t) -1);
105 }
106
107 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
108 st->cstate = C0;
109 st->_errno = 0;
110 return ((size_t) 0);
111 }
112
113 errno = st->_errno = 0;
114
115 while (*inbytesleft > 0 && *outbytesleft > 0) {
116 switch (st->cstate) {
117 case C0:
118 if (**inbuf & MSB) { /* gb2312 charactor */
119 st->keepc[0] = (**inbuf);
120 st->cstate = C1;
121 } else { /* ASCII */
122 **outbuf = **inbuf;
123 (*outbuf)++;
124 (*outbytesleft)--;
125 }
126 break;
127 case C1: /* GBK charactor 2nd byte */
128 if (gbk_2nd_byte(**inbuf) == 0) {
129 st->keepc[1] = (**inbuf);
130 n = gbk_to_big5(st->keepc, *outbuf, *outbytesleft);
131 if (n > 0) {
132 (*outbuf) += n;
133 (*outbytesleft) -= n;
134
135 st->cstate = C0;
136 } else {
137 st->_errno = errno = E2BIG;
138 }
139 } else if ( gbk4_2nd_byte((unsigned char)**inbuf) ) {
140 st->cstate = C2;
141 } else { /* illegal input */
142 st->_errno = errno = EILSEQ;
143 }
144 break;
145 case C2:
146 if ( gbk4_3rd_byte((unsigned char)**inbuf) ) {
147 st->cstate = C3;
148 } else {
149 st->_errno = errno = EILSEQ;
150 }
151 break;
152 case C3:
153 if ( gbk4_4th_byte((unsigned char)**inbuf) ) {
154 /*
155 * replace the four byte character with "__" in outbuf
156 * due to that there hasn't corresponding code in BIG5
157 */
158 if ( *outbytesleft < 2 ) {
159 st->_errno = errno = E2BIG;
160 } else {
161 **outbuf = *((*outbuf) + 1) = (char)NON_ID_CHAR;
162 *outbuf += 2;
163 *outbytesleft -= 2;
164
165 st->cstate = C0;
166 }
167 } else {
168 st->_errno = errno = EILSEQ;
169 }
170 break;
171 default: /* un-reachable */
172 st->_errno = errno = EILSEQ;
173 st->cstate = C0;
174 break;
175 }
176
177 if ( st->_errno ) break;
178
179 (*inbuf)++;
180 (*inbytesleft)--;
181 }
182
183 if ( errno ) return ((size_t) -1);
184
185 if (*inbytesleft == 0 && st->cstate != C0) {
186 errno = EINVAL;
187 return ((size_t) -1);
188 }
189
190 if (*inbytesleft > 0 && *outbytesleft == 0) {
191 errno = E2BIG;
192 return (size_t)-1;
193 }
194
195 return (size_t)(*inbytesleft);
196 }
197
198 /*
199 * Test whether inbuf is a valid character for
200 * 2nd byte of GB2312 charactor:
201 * Return: 0 --- valid GBK 2nd byte
202 * 1 --- invalid GBK 2nd byte
203 */
gbk_2nd_byte(inbuf)204 int gbk_2nd_byte(inbuf)
205 char inbuf;
206 {
207
208 unsigned int buf = (unsigned int) (inbuf & ONEBYTE);
209
210 if ((buf >= 0x40) && (buf <= 0x7e))
211 return 0;
212 if ((buf >= 0x80) && (buf <= 0xfe))
213 return 0;
214 return 1;
215 }
216
217 /*
218 * gbk_to_big5: Convert gbk charactor to Big5.
219 * Return: >0 --- converted with enough space in output buffer
220 * =0 --- no space in outbuf
221 */
222
gbk_to_big5(char keepc[],char * buf,size_t buflen)223 int gbk_to_big5(char keepc[], char *buf, size_t buflen) {
224
225 unsigned long gbk_val; /* GBK value */
226 int index;
227 unsigned long big5_val; /* BIG5 value */
228
229 if (buflen < 2) {
230 errno = E2BIG;
231 return 0;
232 }
233
234 gbk_val = ((keepc[0] & ONEBYTE) << 8) + (keepc[1] & ONEBYTE);
235 index = binsearch(gbk_val, gbk_big5_tab, BIG5MAX);
236 if (index >= 0) {
237 big5_val = gbk_big5_tab[index].value;
238 *buf = (big5_val >> 8) & ONEBYTE;
239 *(buf + 1) = big5_val & ONEBYTE;
240 } else
241 *buf = *(buf + 1) = (char)NON_ID_CHAR;
242 return 2;
243 }
244
245 /*
246 * binsearch()
247 */
binsearch(unsigned long x,table_t table[],int n)248 int binsearch(unsigned long x, table_t table[], int n) {
249 int low, high, mid;
250
251 low = 0;
252 high = n - 1;
253 while (low <= high) {
254 mid = (low + high) >> 1;
255 if (x < table[mid].key)
256 high = mid - 1;
257 else if (x > table[mid].key)
258 low = mid + 1;
259 else
260 return mid;
261 }
262 return -1;
263 }
264
265 #ifdef DEBUG
main(int argc,char * argv[])266 main(int argc, char * argv[]) {
267 _iconv_st * ist;
268 char * inbuf = "�������е�ÿһ�������һ���Ѱ�װ��ע����������ʾ�� ��Ʒϵ�� ��";
269 char * outbuf;
270 char * ib, * oub;
271 int inbyteleft;
272 int outbyteleft;
273
274 ist = (_iconv_st *) _icv_open();
275 inbyteleft = outbyteleft = 2 * strlen(inbuf);
276 outbuf = (char *)malloc(outbyteleft);
277 ib = inbuf;
278 oub = outbuf;
279 _icv_iconv(ist, &inbuf, &inbyteleft, &outbuf, &outbyteleft);
280 printf("IN -- %s\n", ib);
281 printf("OUT -- %s\n", oub);
282 }
283 #endif
284