xref: /titanic_51/usr/src/lib/iconv_modules/zh/common/zh_CN.gbk%zh_TW-big5p.c (revision 91e1e26ac6a73ce959289cf7d3d96c4baedbe0b8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  *	Copyright(c) 1997, Sun Microsystems, Inc.
23  *	All rights reserved.
24  */
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <errno.h>
29 #include <gb18030_big5p.h>
30 
31 #define NON_ID_CHAR '_'	/* non-identified character */
32 #define MSB 0x80
33 #define ONEBYTE 0xff
34 
35 #define gbk4_2nd_byte(v)  ((v) >= 0x30 && (v) <= 0x39)
36 #define gbk4_3rd_byte(v)  ((v) >= 0x81 && (v) <= 0xfe)
37 #define gbk4_4th_byte(v)  gbk4_2nd_byte(v)
38 
39 int binsearch(unsigned long x, table_t table[], int n);
40 int gbk_2nd_byte(char inbuf);
41 int gbk_to_big5p(char keepc[], char *buf, size_t buflen);
42 
43 typedef struct _icv_state {
44 	char keepc[2];	/* maximum # byte of GB chararor */
45 	short cstate;
46 	int _errno;		/* internal errno */
47 } _iconv_st;
48 
49 enum _CSTATE { C0, C1, C2, C3 };
50 
51 
52 /*
53  *	Open; called from iconv_open()
54  */
55 void * _icv_open() {
56 	_iconv_st * st;
57 
58 	if ((st = (_iconv_st *) malloc(sizeof(_iconv_st))) == NULL) {
59 		errno = ENOMEM;
60 		return ((void *) -1);
61 	}
62 
63 	st->cstate = C0;
64 	st->_errno = 0;
65 
66 	return ((void *) st);
67 }
68 
69 /*
70  *	Close; called from iconv_close()
71  */
72 void _icv_close(_iconv_st * st) {
73 	if (!st)
74 		errno = EBADF;
75 	else
76 		free(st);
77 }
78 
79 /*
80  *	Actual conversion; called from iconv()
81  */
82 
83 size_t _icv_iconv(_iconv_st * st, char **inbuf, size_t *inbytesleft,
84 					char ** outbuf, size_t *outbytesleft) {
85 	int n;
86 	if (st == NULL) {
87 		errno = EBADF;
88 		return ((size_t) -1);
89 	}
90 
91 	if (inbuf == NULL || *inbuf == NULL) {	/* Reset request. */
92 		st->cstate = C0;
93 		st->_errno = 0;
94 		return ((size_t) 0);
95 	}
96 
97 	errno = st->_errno = 0;
98 
99 	while (*inbytesleft > 0 && *outbytesleft > 0) {
100 		switch (st->cstate) {
101 			case C0:
102 				if (**inbuf & MSB) {	/* gb2312 charactor */
103 					st->keepc[0] = (**inbuf);
104 					st->cstate = C1;
105 				} else {	/* ASCII */
106 					**outbuf = **inbuf;
107 					(*outbuf)++;
108 					(*outbytesleft)--;
109 				}
110 				break;
111 			case C1:	/* GBK charactor 2nd byte */
112 				if (gbk_2nd_byte(**inbuf) == 0) {
113 					st->keepc[1] = (**inbuf);
114 					n = gbk_to_big5p(st->keepc, *outbuf, *outbytesleft);
115 					if (n > 0) {
116 						(*outbuf) += n;
117 						(*outbytesleft) -= n;
118 
119 						st->cstate = C0;
120 					} else {
121 						st->_errno = errno = E2BIG;
122 					}
123 				} else if ( gbk4_2nd_byte((unsigned char)**inbuf) ) {
124 				       st->cstate = C2;
125 				} else {	/* illegal input */
126 					st->_errno = errno = EILSEQ;
127 				}
128 				break;
129 		        case C2:
130 		                if ( gbk4_3rd_byte((unsigned char)**inbuf) )
131 		                     st->cstate = C3;
132 		                else
133 		                     st->_errno = errno = EILSEQ;
134 		                break;
135 		        case C3:
136 		                if ( gbk4_4th_byte((unsigned char)**inbuf)) {
137 
138 				     /* replace the four-bytes character with __ in outbuf
139 				      * since there wouldn't have corresponding code in BIG5P
140 				      */
141 				     if ( *outbytesleft < 2 ) st->_errno = errno = E2BIG;
142 				     else {
143 				          **outbuf = *((*outbuf)+1) = (char)NON_ID_CHAR;
144 					  *outbuf += 2;
145 					  *outbytesleft -= 2;
146 
147 		                          st->cstate = C0;
148 				     }
149 				}
150 		                else
151 		                     st->_errno = errno = EILSEQ;
152 		                break;
153 			default:	/* un-reachable */
154 				st->_errno = errno = EILSEQ;
155 				st->cstate = C0;
156 				break;
157 		}
158 
159 		if (st->_errno) break;
160 
161 		(*inbuf)++;
162 		(*inbytesleft)--;
163 	}
164 
165 	if (errno) return ((size_t) -1);
166 
167 	if (*inbytesleft == 0 && st->cstate != C0) {
168 		errno = EINVAL;
169 		return ((size_t) -1);
170 	}
171 
172 	if (*inbytesleft > 0 && *outbytesleft == 0) {
173 		errno = E2BIG;
174 		return (size_t)-1;
175 	}
176 
177 	return (size_t)(*inbytesleft);
178 }
179 
180 /*
181  *	Test whether inbuf is a valid character for
182  *	2nd byte of GB2312 charactor:
183  *	Return:	0 --- valid GBK 2nd byte
184  *			1 --- invalid GBK 2nd byte
185  */
186 int gbk_2nd_byte(inbuf)
187 char inbuf;
188 {
189 
190 	unsigned int buf = (unsigned int) (inbuf & ONEBYTE);
191 
192 	if ((buf >= 0x40) && (buf <= 0x7e))
193 		return 0;
194 	if ((buf >= 0x80) && (buf <= 0xfe))
195 		return 0;
196 	return 1;
197 }
198 
199 /*
200  *	gbk_to_big5p: Convert gbk charactor to Big5p.
201  *	Return:	>0 --- converted with enough space in output buffer
202  *			=0 --- no space in outbuf
203  */
204 
205 int gbk_to_big5p(char keepc[], char *buf, size_t buflen) {
206 
207 	unsigned long gbk_val;	/* GBK value */
208 	int index;
209 	unsigned long big5_val;	/* BIG5 value */
210 
211 	if (buflen < 2) {
212 		errno = E2BIG;
213 		return 0;
214 	}
215 
216 	gbk_val = ((keepc[0] & ONEBYTE) << 8) + (keepc[1] & ONEBYTE);
217 	index = binsearch(gbk_val, gbk_big5p_tab, BIG5MAX);
218 	if (index >= 0) {
219 		big5_val = gbk_big5p_tab[index].value;
220 		*buf = (big5_val >> 8) & ONEBYTE;
221 		*(buf + 1) = big5_val & ONEBYTE;
222 	} else
223 		*buf = *(buf + 1) = (char)NON_ID_CHAR;
224 	return 2;
225 }
226 
227 /*
228  *	binsearch()
229  */
230 int binsearch(unsigned long x, table_t table[], int n) {
231 	int low, high, mid;
232 
233 	low = 0;
234 	high = n - 1;
235 	while (low <= high) {
236 		mid = (low + high) >> 1;
237 		if (x < table[mid].key)
238 			high = mid - 1;
239 		else if (x > table[mid].key)
240 			low = mid + 1;
241 		else
242 			return mid;
243 	}
244 	return -1;
245 }
246 
247 #ifdef DEBUG
248 main(int argc, char * argv[]) {
249 	_iconv_st * ist;
250 	char * inbuf = "�������е�ÿһ�������һ���Ѱ�װ��ע����������ʾ�� ��Ʒϵ�� ��";
251 	char * outbuf;
252 	char * ib, * oub;
253 	int inbyteleft;
254 	int outbyteleft;
255 
256 	ist = (_iconv_st *) _icv_open();
257 	inbyteleft = outbyteleft = 2 * strlen(inbuf);
258 	outbuf = (char *)malloc(outbyteleft);
259 	ib = inbuf;
260 	oub = outbuf;
261 	_icv_iconv(ist, &inbuf, &inbyteleft, &outbuf, &outbyteleft);
262 	printf("IN -- %s\n", ib);
263 	printf("OUT -- %s\n", oub);
264 }
265 #endif
266