1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright(c) 1997, Sun Microsystems, Inc.
23 * All rights reserved.
24 */
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <gb18030_big5p.h>
30
31 #define NON_ID_CHAR '_' /* non-identified character */
32 #define MSB 0x80
33 #define ONEBYTE 0xff
34
35 #define gbk4_2nd_byte(v) ((v) >= 0x30 && (v) <= 0x39)
36 #define gbk4_3rd_byte(v) ((v) >= 0x81 && (v) <= 0xfe)
37 #define gbk4_4th_byte(v) gbk4_2nd_byte(v)
38
39 int binsearch(unsigned long x, table_t table[], int n);
40 int gbk_2nd_byte(char inbuf);
41 int gbk_to_big5p(char keepc[], char *buf, size_t buflen);
42
43 typedef struct _icv_state {
44 char keepc[2]; /* maximum # byte of GB chararor */
45 short cstate;
46 int _errno; /* internal errno */
47 } _iconv_st;
48
49 enum _CSTATE { C0, C1, C2, C3 };
50
51
52 /*
53 * Open; called from iconv_open()
54 */
_icv_open()55 void * _icv_open() {
56 _iconv_st * st;
57
58 if ((st = (_iconv_st *) malloc(sizeof(_iconv_st))) == NULL) {
59 errno = ENOMEM;
60 return ((void *) -1);
61 }
62
63 st->cstate = C0;
64 st->_errno = 0;
65
66 return ((void *) st);
67 }
68
69 /*
70 * Close; called from iconv_close()
71 */
_icv_close(_iconv_st * st)72 void _icv_close(_iconv_st * st) {
73 if (!st)
74 errno = EBADF;
75 else
76 free(st);
77 }
78
79 /*
80 * Actual conversion; called from iconv()
81 */
82
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)83 size_t _icv_iconv(_iconv_st * st, char **inbuf, size_t *inbytesleft,
84 char ** outbuf, size_t *outbytesleft) {
85 int n;
86 if (st == NULL) {
87 errno = EBADF;
88 return ((size_t) -1);
89 }
90
91 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
92 st->cstate = C0;
93 st->_errno = 0;
94 return ((size_t) 0);
95 }
96
97 errno = st->_errno = 0;
98
99 while (*inbytesleft > 0 && *outbytesleft > 0) {
100 switch (st->cstate) {
101 case C0:
102 if (**inbuf & MSB) { /* gb2312 charactor */
103 st->keepc[0] = (**inbuf);
104 st->cstate = C1;
105 } else { /* ASCII */
106 **outbuf = **inbuf;
107 (*outbuf)++;
108 (*outbytesleft)--;
109 }
110 break;
111 case C1: /* GBK charactor 2nd byte */
112 if (gbk_2nd_byte(**inbuf) == 0) {
113 st->keepc[1] = (**inbuf);
114 n = gbk_to_big5p(st->keepc, *outbuf, *outbytesleft);
115 if (n > 0) {
116 (*outbuf) += n;
117 (*outbytesleft) -= n;
118
119 st->cstate = C0;
120 } else {
121 st->_errno = errno = E2BIG;
122 }
123 } else if ( gbk4_2nd_byte((unsigned char)**inbuf) ) {
124 st->cstate = C2;
125 } else { /* illegal input */
126 st->_errno = errno = EILSEQ;
127 }
128 break;
129 case C2:
130 if ( gbk4_3rd_byte((unsigned char)**inbuf) )
131 st->cstate = C3;
132 else
133 st->_errno = errno = EILSEQ;
134 break;
135 case C3:
136 if ( gbk4_4th_byte((unsigned char)**inbuf)) {
137
138 /* replace the four-bytes character with __ in outbuf
139 * since there wouldn't have corresponding code in BIG5P
140 */
141 if ( *outbytesleft < 2 ) st->_errno = errno = E2BIG;
142 else {
143 **outbuf = *((*outbuf)+1) = (char)NON_ID_CHAR;
144 *outbuf += 2;
145 *outbytesleft -= 2;
146
147 st->cstate = C0;
148 }
149 }
150 else
151 st->_errno = errno = EILSEQ;
152 break;
153 default: /* un-reachable */
154 st->_errno = errno = EILSEQ;
155 st->cstate = C0;
156 break;
157 }
158
159 if (st->_errno) break;
160
161 (*inbuf)++;
162 (*inbytesleft)--;
163 }
164
165 if (errno) return ((size_t) -1);
166
167 if (*inbytesleft == 0 && st->cstate != C0) {
168 errno = EINVAL;
169 return ((size_t) -1);
170 }
171
172 if (*inbytesleft > 0 && *outbytesleft == 0) {
173 errno = E2BIG;
174 return (size_t)-1;
175 }
176
177 return (size_t)(*inbytesleft);
178 }
179
180 /*
181 * Test whether inbuf is a valid character for
182 * 2nd byte of GB2312 charactor:
183 * Return: 0 --- valid GBK 2nd byte
184 * 1 --- invalid GBK 2nd byte
185 */
gbk_2nd_byte(inbuf)186 int gbk_2nd_byte(inbuf)
187 char inbuf;
188 {
189
190 unsigned int buf = (unsigned int) (inbuf & ONEBYTE);
191
192 if ((buf >= 0x40) && (buf <= 0x7e))
193 return 0;
194 if ((buf >= 0x80) && (buf <= 0xfe))
195 return 0;
196 return 1;
197 }
198
199 /*
200 * gbk_to_big5p: Convert gbk charactor to Big5p.
201 * Return: >0 --- converted with enough space in output buffer
202 * =0 --- no space in outbuf
203 */
204
gbk_to_big5p(char keepc[],char * buf,size_t buflen)205 int gbk_to_big5p(char keepc[], char *buf, size_t buflen) {
206
207 unsigned long gbk_val; /* GBK value */
208 int index;
209 unsigned long big5_val; /* BIG5 value */
210
211 if (buflen < 2) {
212 errno = E2BIG;
213 return 0;
214 }
215
216 gbk_val = ((keepc[0] & ONEBYTE) << 8) + (keepc[1] & ONEBYTE);
217 index = binsearch(gbk_val, gbk_big5p_tab, BIG5MAX);
218 if (index >= 0) {
219 big5_val = gbk_big5p_tab[index].value;
220 *buf = (big5_val >> 8) & ONEBYTE;
221 *(buf + 1) = big5_val & ONEBYTE;
222 } else
223 *buf = *(buf + 1) = (char)NON_ID_CHAR;
224 return 2;
225 }
226
227 /*
228 * binsearch()
229 */
binsearch(unsigned long x,table_t table[],int n)230 int binsearch(unsigned long x, table_t table[], int n) {
231 int low, high, mid;
232
233 low = 0;
234 high = n - 1;
235 while (low <= high) {
236 mid = (low + high) >> 1;
237 if (x < table[mid].key)
238 high = mid - 1;
239 else if (x > table[mid].key)
240 low = mid + 1;
241 else
242 return mid;
243 }
244 return -1;
245 }
246
247 #ifdef DEBUG
main(int argc,char * argv[])248 main(int argc, char * argv[]) {
249 _iconv_st * ist;
250 char * inbuf = "�������е�ÿһ�������һ���Ѱ�װ��ע����������ʾ�� ��Ʒϵ�� ��";
251 char * outbuf;
252 char * ib, * oub;
253 int inbyteleft;
254 int outbyteleft;
255
256 ist = (_iconv_st *) _icv_open();
257 inbyteleft = outbyteleft = 2 * strlen(inbuf);
258 outbuf = (char *)malloc(outbyteleft);
259 ib = inbuf;
260 oub = outbuf;
261 _icv_iconv(ist, &inbuf, &inbyteleft, &outbuf, &outbyteleft);
262 printf("IN -- %s\n", ib);
263 printf("OUT -- %s\n", oub);
264 }
265 #endif
266