xref: /titanic_53/usr/src/lib/iconv_modules/zh/common/zh_CN.gbk%zh_HK.hkscs.c (revision 91e1e26ac6a73ce959289cf7d3d96c4baedbe0b8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  *	Copyright(c) 2001, Sun Microsystems, Inc.
23  *	All rights reserved.
24  */
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <gb18030_big5hk.h>
30 
31 #define NON_ID_CHAR '_'	/* non-identified character */
32 #define MSB 0x80
33 #define ONEBYTE 0xff
34 
35 #define gbk4_2nd_byte(v)  ( (v) >= 0x30 && (v) <= 0x39 )
36 #define gbk4_3rd_byte(v)   ( (v) >= 0x81 && (v) <= 0xfe )
37 #define gbk4_4th_byte(v)  gbk4_2nd_byte(v)
38 
39 int binsearch(unsigned long x, table_t table[], int n);
40 int gbk_2nd_byte(char inbuf);
41 int gbk_to_hkscs(char keepc[], char *buf, size_t buflen);
42 int gbk4_to_hkscs(char keepc[], char *buf, size_t buflen);
43 
44 typedef struct _icv_state {
45 	char keepc[4];	/* maximum # byte of GB chararor in two bytes area */
46 	short cstate;
47 	int _errno;		/* internal errno */
48 } _iconv_st;
49 
50 enum _CSTATE { C0, C1, C2, C3 };
51 
52 /*=======================================================
53  *
54  *   State Machine for interpreting GBK code
55  *
56  *=======================================================
57  *
58  *                                  3rd C
59  *                              C2--------> C3
60  *                              ^            |
61  *                        2nd C |      4th C |
62  *                     1st C    |            |
63  *    +--------> C0 ----------> C1           |
64  *    |    ascii |        2nd C |            |
65  *    ^          v              v            V
66  *    +----<-----+-----<--------+-----<------+
67  *
68  *=======================================================*/
69 /*
70  *	Open; called from iconv_open()
71  */
_icv_open()72 void * _icv_open() {
73 	_iconv_st * st;
74 
75 	if ((st = (_iconv_st *) malloc(sizeof(_iconv_st))) == NULL) {
76 		errno = ENOMEM;
77 		return ((void *) -1);
78 	}
79 
80 	st->cstate = C0;
81 	st->_errno = 0;
82 
83 	return ((void *) st);
84 }
85 
86 /*
87  *	Close; called from iconv_close()
88  */
_icv_close(_iconv_st * st)89 void _icv_close(_iconv_st * st) {
90 	if (!st)
91 		errno = EBADF;
92 	else
93 		free(st);
94 }
95 
96 /*
97  *	Actual conversion; called from iconv()
98  */
99 
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)100 size_t _icv_iconv(_iconv_st * st, char **inbuf, size_t *inbytesleft,
101 					char ** outbuf, size_t *outbytesleft) {
102 	int n;
103 	if (st == NULL) {
104 		errno = EBADF;
105 		return ((size_t) -1);
106 	}
107 
108 	if (inbuf == NULL || *inbuf == NULL) {	/* Reset request. */
109 		st->cstate = C0;
110 		st->_errno = 0;
111 		return ((size_t) 0);
112 	}
113 
114 	errno = st->_errno = 0;
115 
116 	while (*inbytesleft > 0 && *outbytesleft > 0) {
117 		switch (st->cstate) {
118 			case C0:
119 				if (**inbuf & MSB) {	/* gb charactor */
120 					st->keepc[0] = (**inbuf);
121 					st->cstate = C1;
122 				} else {	/* ASCII */
123 					**outbuf = **inbuf;
124 					(*outbuf)++;
125 					(*outbytesleft)--;
126 				}
127 				break;
128 			case C1:	/* GBK charactor 2nd byte */
129 				if (gbk_2nd_byte(**inbuf) == 0) {
130 					st->keepc[1] = (**inbuf);
131 					n = gbk_to_hkscs(st->keepc, *outbuf, *outbytesleft);
132 					if (n > 0) {
133 						(*outbuf) += n;
134 						(*outbytesleft) -= n;
135 
136 						st->cstate = C0;
137 					} else {
138 						st->_errno = errno = E2BIG;
139 					}
140 				} else if ( gbk4_2nd_byte((unsigned char)**inbuf) ) {
141 					st->keepc[1] = (**inbuf);
142 					st->cstate = C2;
143 				} else {	/* illegal input, don't reset state */
144 					st->_errno = errno = EILSEQ;
145 				}
146 				break;
147 			case C2:
148 				if ( gbk4_3rd_byte((unsigned char)**inbuf) ) {
149 					st->keepc[2] = (**inbuf);
150 					st->cstate = C3;
151 				} else {
152 					/* illegal input, don't reset state */
153 					st->_errno = errno = EILSEQ;
154 				}
155 				break;
156 			case C3:
157 				if ( gbk4_4th_byte((unsigned char)**inbuf) ) {
158 					st->keepc[3] = (**inbuf);
159 
160                                         n = gbk4_to_hkscs(st->keepc, *outbuf, *outbytesleft);
161                                         if (n > 0) {
162                                                 (*outbuf) += n;
163                                                 (*outbytesleft) -= n;
164 
165 						st->cstate = C0;
166                                         } else {
167                                                 st->_errno = errno = E2BIG;
168                                         }
169 
170 				} else {
171 					/* illegal input, don't reset state */
172 					st->_errno = errno = EILSEQ;
173 				}
174 				break;
175 			default:	/* un-reachable */
176 				st->_errno = errno = EILSEQ;
177 				st->cstate = C0;
178 				break;
179 		}
180 
181 		if (st->_errno) break;
182 
183 		(*inbuf)++;
184 		(*inbytesleft)--;
185 	}
186 
187 	if (errno) return ((size_t) -1);
188 
189 	if ( *inbytesleft == 0 && st->cstate != C0 ) {
190 		errno = EINVAL;
191 		return ((size_t) -1);
192 	}
193 
194 	if ( *inbytesleft > 0 && *outbytesleft == 0 ) {
195 		errno = E2BIG;
196 		return ((size_t) -1);
197 	}
198 
199 	return (size_t)(*inbytesleft);
200 }
201 
202 /*
203  *	Test whether inbuf is a valid character for
204  *	2nd byte of GB2312 charactor:
205  *	Return:	0 --- valid GBK 2nd byte
206  *			1 --- invalid GBK 2nd byte
207  */
gbk_2nd_byte(inbuf)208 int gbk_2nd_byte(inbuf)
209 char inbuf;
210 {
211 
212 	unsigned int buf = (unsigned int) (inbuf & ONEBYTE);
213 
214 	if ((buf >= 0x40) && (buf <= 0x7e))
215 		return 0;
216 	if ((buf >= 0x80) && (buf <= 0xfe))
217 		return 0;
218 	return 1;
219 }
220 
221 /*
222  *	gbk_to_hkscs: Convert gbk charactor to hkscs.
223  *	Return:	>0 --- converted with enough space in output buffer
224  *			=0 --- no space in outbuf
225  */
226 
gbk_to_hkscs(char keepc[],char * buf,size_t buflen)227 int gbk_to_hkscs(char keepc[], char *buf, size_t buflen) {
228 
229 	unsigned long gbk_val;	/* GBK value */
230 	int index;
231 	unsigned long hkscs_val;	/* hkscs value */
232 
233 	if (buflen < 2) {
234 		errno = E2BIG;
235 		return 0;
236 	}
237 
238 	gbk_val = ((keepc[0] & ONEBYTE) << 8) + (keepc[1] & ONEBYTE);
239 	index = binsearch(gbk_val, gbk_hkscs_tab, GBKMAX);
240 	if (index >= 0) {
241 		hkscs_val = gbk_hkscs_tab[index].value;
242 		*buf = (hkscs_val >> 8) & ONEBYTE;
243 		*(buf + 1) = hkscs_val & ONEBYTE;
244 	} else
245 		*buf = *(buf + 1) = (char)NON_ID_CHAR;
246 	return 2;
247 }
248 
gbk4_to_hkscs(char keepc[],char * buf,size_t buflen)249 int gbk4_to_hkscs(char keepc[], char *buf, size_t buflen) {
250 
251 	unsigned long gbk_val;	/* GBK value */
252 	int index;
253 	unsigned long hkscs_val;	/* hkscs value */
254 
255 	if (buflen < 2) {
256 		errno = E2BIG;
257 		return 0;
258 	}
259 
260 	gbk_val = ((keepc[0] & ONEBYTE) << 24) + ((keepc[1] & ONEBYTE) << 16) +
261 		  ((keepc[2] & ONEBYTE) << 8 ) + (keepc[3] & ONEBYTE);
262 	index = binsearch(gbk_val, gbk4_hkscs_tab, GBK4MAX);
263 	if (index >= 0) {
264 		hkscs_val = gbk4_hkscs_tab[index].value;
265 		*buf = (hkscs_val >> 8) & ONEBYTE;
266 		*(buf + 1) = hkscs_val & ONEBYTE;
267 	} else
268 		*buf = *(buf + 1) = (char)NON_ID_CHAR;
269 
270 	return 2;
271 }
272 
273 /*
274  *	binsearch()
275  */
binsearch(unsigned long x,table_t table[],int n)276 int binsearch(unsigned long x, table_t table[], int n) {
277 	int low, high, mid;
278 
279 	low = 0;
280 	high = n - 1;
281 	while (low <= high) {
282 		mid = (low + high) >> 1;
283 		if (x < table[mid].key)
284 			high = mid - 1;
285 		else if (x > table[mid].key)
286 			low = mid + 1;
287 		else
288 			return mid;
289 	}
290 	return -1;
291 }
292 
293 #ifdef DEBUG
main(int argc,char * argv[])294 main(int argc, char * argv[]) {
295 	_iconv_st * ist;
296 	char * inbuf = "�������е�ÿһ�������һ���Ѱ�װ��ע����������ʾ�� ��Ʒϵ�� ��";
297 	char * outbuf;
298 	char * ib, * oub;
299 	int inbyteleft;
300 	int outbyteleft;
301 
302 	ist = (_iconv_st *) _icv_open();
303 	inbyteleft = outbyteleft = 2 * strlen(inbuf);
304 	outbuf = (char *)malloc(outbyteleft);
305 	ib = inbuf;
306 	oub = outbuf;
307 	_icv_iconv(ist, &inbuf, &inbyteleft, &outbuf, &outbyteleft);
308 	printf("IN -- %s\n", ib);
309 	printf("OUT -- %s\n", oub);
310 }
311 #endif
312