xref: /titanic_53/usr/src/lib/iconv_modules/zh/common/zh_CN.gbk%zh_TW-big5.c (revision 91e1e26ac6a73ce959289cf7d3d96c4baedbe0b8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  *	Copyright(c) 1997, Sun Microsystems, Inc.
23  *	All rights reserved.
24  */
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <gb18030_big5.h>
30 
31 #define NON_ID_CHAR '_'	/* non-identified character */
32 #define MSB 0x80
33 #define ONEBYTE 0xff
34 
35 #define gbk4_2nd_byte(v)  ( (v) >= 0x30 && (v) <= 0x39 )
36 #define gbk4_3rd_byte(v)   ( (v) >= 0x81 && (v) <= 0xfe )
37 #define gbk4_4th_byte(v)  gbk4_2nd_byte(v)
38 
39 typedef struct _icv_state {
40 	char keepc[2];	/* maximum # byte of GB chararor in two bytes area */
41 	short cstate;
42 	int _errno;		/* internal errno */
43 } _iconv_st;
44 
45 enum _CSTATE { C0, C1, C2, C3 };
46 
47 int binsearch(unsigned long x, table_t table[], int n);
48 int gbk_2nd_byte(char inbuf);
49 int gbk_to_big5(char keepc[], char *buf, size_t buflen);
50 
51 /*=======================================================
52  *
53  *   State Machine for interpreting GBK code
54  *
55  *=======================================================
56  *
57  *                                  3rd C
58  *                              C2--------> C3
59  *                              ^            |
60  *                        2nd C |      4th C |
61  *                     1st C    |            |
62  *    +--------> C0 ----------> C1           |
63  *    |    ascii |        2nd C |            |
64  *    ^          v              v            V
65  *    +----<-----+-----<--------+-----<------+
66  *
67  *=======================================================*/
68 /*
69  *	Open; called from iconv_open()
70  */
_icv_open()71 void * _icv_open() {
72 	_iconv_st * st;
73 
74 	if ((st = (_iconv_st *) malloc(sizeof(_iconv_st))) == NULL) {
75 		errno = ENOMEM;
76 		return ((void *) -1);
77 	}
78 
79 	st->cstate = C0;
80 	st->_errno = 0;
81 
82 	return ((void *) st);
83 }
84 
85 /*
86  *	Close; called from iconv_close()
87  */
_icv_close(_iconv_st * st)88 void _icv_close(_iconv_st * st) {
89 	if (!st)
90 		errno = EBADF;
91 	else
92 		free(st);
93 }
94 
95 /*
96  *	Actual conversion; called from iconv()
97  */
98 
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)99 size_t _icv_iconv(_iconv_st * st, char **inbuf, size_t *inbytesleft,
100 					char ** outbuf, size_t *outbytesleft) {
101 	int n;
102 	if (st == NULL) {
103 		errno = EBADF;
104 		return ((size_t) -1);
105 	}
106 
107 	if (inbuf == NULL || *inbuf == NULL) {	/* Reset request. */
108 		st->cstate = C0;
109 		st->_errno = 0;
110 		return ((size_t) 0);
111 	}
112 
113 	errno = st->_errno = 0;
114 
115 	while (*inbytesleft > 0 && *outbytesleft > 0) {
116 		switch (st->cstate) {
117 			case C0:
118 				if (**inbuf & MSB) {	/* gb2312 charactor */
119 					st->keepc[0] = (**inbuf);
120 					st->cstate = C1;
121 				} else {	/* ASCII */
122 					**outbuf = **inbuf;
123 					(*outbuf)++;
124 					(*outbytesleft)--;
125 				}
126 				break;
127 			case C1:	/* GBK charactor 2nd byte */
128 				if (gbk_2nd_byte(**inbuf) == 0) {
129 					st->keepc[1] = (**inbuf);
130 					n = gbk_to_big5(st->keepc, *outbuf, *outbytesleft);
131 					if (n > 0) {
132 						(*outbuf) += n;
133 						(*outbytesleft) -= n;
134 
135 						st->cstate = C0;
136 					} else {
137 						st->_errno = errno = E2BIG;
138 					}
139 				} else if ( gbk4_2nd_byte((unsigned char)**inbuf) ) {
140 					st->cstate = C2;
141 				} else {	/* illegal input */
142 					st->_errno = errno = EILSEQ;
143 				}
144 				break;
145 			case C2:
146 				if ( gbk4_3rd_byte((unsigned char)**inbuf) ) {
147 					st->cstate = C3;
148 				} else {
149 					st->_errno = errno = EILSEQ;
150 				}
151 				break;
152 			case C3:
153 				if ( gbk4_4th_byte((unsigned char)**inbuf) ) {
154 					/*
155 					 *  replace the four byte character with "__" in outbuf
156 					 *  due to that there hasn't corresponding code in BIG5
157 					 */
158 					if ( *outbytesleft < 2 ) {
159 						st->_errno = errno = E2BIG;
160 					} else {
161 						**outbuf = *((*outbuf) + 1) = (char)NON_ID_CHAR;
162 						*outbuf += 2;
163 						*outbytesleft -= 2;
164 
165 						st->cstate = C0;
166 					}
167 				} else {
168 					st->_errno = errno = EILSEQ;
169 				}
170 				break;
171 			default:	/* un-reachable */
172 				st->_errno = errno = EILSEQ;
173 				st->cstate = C0;
174 				break;
175 		}
176 
177 		if ( st->_errno ) break;
178 
179 		(*inbuf)++;
180 		(*inbytesleft)--;
181 	}
182 
183 	if ( errno ) return ((size_t) -1);
184 
185 	if (*inbytesleft == 0 && st->cstate != C0) {
186 		errno = EINVAL;
187 		return ((size_t) -1);
188 	}
189 
190 	if (*inbytesleft > 0 && *outbytesleft == 0) {
191 		errno = E2BIG;
192 		return (size_t)-1;
193 	}
194 
195 	return (size_t)(*inbytesleft);
196 }
197 
198 /*
199  *	Test whether inbuf is a valid character for
200  *	2nd byte of GB2312 charactor:
201  *	Return:	0 --- valid GBK 2nd byte
202  *			1 --- invalid GBK 2nd byte
203  */
gbk_2nd_byte(inbuf)204 int gbk_2nd_byte(inbuf)
205 char inbuf;
206 {
207 
208 	unsigned int buf = (unsigned int) (inbuf & ONEBYTE);
209 
210 	if ((buf >= 0x40) && (buf <= 0x7e))
211 		return 0;
212 	if ((buf >= 0x80) && (buf <= 0xfe))
213 		return 0;
214 	return 1;
215 }
216 
217 /*
218  *	gbk_to_big5: Convert gbk charactor to Big5.
219  *	Return:	>0 --- converted with enough space in output buffer
220  *			=0 --- no space in outbuf
221  */
222 
gbk_to_big5(char keepc[],char * buf,size_t buflen)223 int gbk_to_big5(char keepc[], char *buf, size_t buflen) {
224 
225 	unsigned long gbk_val;	/* GBK value */
226 	int index;
227 	unsigned long big5_val;	/* BIG5 value */
228 
229 	if (buflen < 2) {
230 		errno = E2BIG;
231 		return 0;
232 	}
233 
234 	gbk_val = ((keepc[0] & ONEBYTE) << 8) + (keepc[1] & ONEBYTE);
235 	index = binsearch(gbk_val, gbk_big5_tab, BIG5MAX);
236 	if (index >= 0) {
237 		big5_val = gbk_big5_tab[index].value;
238 		*buf = (big5_val >> 8) & ONEBYTE;
239 		*(buf + 1) = big5_val & ONEBYTE;
240 	} else
241 		*buf = *(buf + 1) = (char)NON_ID_CHAR;
242 	return 2;
243 }
244 
245 /*
246  *	binsearch()
247  */
binsearch(unsigned long x,table_t table[],int n)248 int binsearch(unsigned long x, table_t table[], int n) {
249 	int low, high, mid;
250 
251 	low = 0;
252 	high = n - 1;
253 	while (low <= high) {
254 		mid = (low + high) >> 1;
255 		if (x < table[mid].key)
256 			high = mid - 1;
257 		else if (x > table[mid].key)
258 			low = mid + 1;
259 		else
260 			return mid;
261 	}
262 	return -1;
263 }
264 
265 #ifdef DEBUG
main(int argc,char * argv[])266 main(int argc, char * argv[]) {
267 	_iconv_st * ist;
268 	char * inbuf = "�������е�ÿһ�������һ���Ѱ�װ��ע����������ʾ�� ��Ʒϵ�� ��";
269 	char * outbuf;
270 	char * ib, * oub;
271 	int inbyteleft;
272 	int outbyteleft;
273 
274 	ist = (_iconv_st *) _icv_open();
275 	inbyteleft = outbyteleft = 2 * strlen(inbuf);
276 	outbuf = (char *)malloc(outbyteleft);
277 	ib = inbuf;
278 	oub = outbuf;
279 	_icv_iconv(ist, &inbuf, &inbyteleft, &outbuf, &outbyteleft);
280 	printf("IN -- %s\n", ib);
281 	printf("OUT -- %s\n", oub);
282 }
283 #endif
284