xref: /titanic_52/usr/src/lib/iconv_modules/zh/common/zh_TW-big5%zh_TW-euc.c (revision 91e1e26ac6a73ce959289cf7d3d96c4baedbe0b8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1995, by Sun Microsystems, Inc.
24  * All rights reserved.
25  */
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <errno.h>
30 #include "big5_cns11643.h"	/* Big-5 to CNS 11643 mapping table */
31 
32 #define	MSB	0x80	/* most significant bit */
33 #define	MBYTE	0x8e	/* multi-byte (4 byte character) */
34 #define	PMASK	0xa0	/* plane number mask */
35 #define ONEBYTE	0xff	/* right most byte */
36 
37 #define NON_ID_CHAR '_'	/* non-identified character */
38 
39 typedef struct _icv_state {
40 	char	keepc[2];	/* maximum # byte of Big-5 code */
41 	short	cstate;		/* state machine id */
42 	int	_errno;		/* internal errno */
43 } _iconv_st;
44 
45 enum _CSTATE	{ C0, C1 };
46 
47 static int big5_2nd_byte(char);
48 static int get_plane_no_by_big5(const char, const char, int*, unsigned long*);
49 static int big5_to_cns(int, int, unsigned long, char*, size_t);
50 static int binsearch(unsigned long, table_t[], int);
51 
52 /*
53  * Open; called from iconv_open()
54  */
55 void *
56 _icv_open()
57 {
58 	_iconv_st *st;
59 
60 	if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
61 		errno = ENOMEM;
62 		return ((void *) -1);
63 	}
64 
65 	st->cstate = C0;
66 	st->_errno = 0;
67 
68 #ifdef DEBUG
69     fprintf(stderr, "==========     iconv(): Big-5 --> CNS 11643     ==========\n");
70 #endif
71 	return ((void *) st);
72 }
73 
74 
75 /*
76  * Close; called from iconv_close()
77  */
78 void
79 _icv_close(_iconv_st *st)
80 {
81 	if (!st)
82 		errno = EBADF;
83 	else
84 		free(st);
85 }
86 
87 
88 /*
89  * Actual conversion; called from iconv()
90  */
91 /*=======================================================
92  *
93  *   State Machine for interpreting Big-5 code
94  *
95  *=======================================================
96  *
97  *                     1st C
98  *    +--------> C0 ----------> C1
99  *    |    ascii |        2nd C |
100  *    ^          v              v
101  *    +----<-----+-----<--------+
102  *
103  *=======================================================*/
104 /*
105  * Big-5 encoding range:
106  *	High byte: 0xA1 - 0xFE			(   94 encoding space)
107  *	Low byte:  0x40 - 0x7E, 0xA1 - 0xFE	(  157 encoding space)
108  *	Plane #1:  0xA140 - 0xC8FE		( 6280 encoding space)
109  *	Plane #2:  0xC940 - 0xFEFE		( 8478 encoding space)
110  *	Total:	   94 * 157 = 14,758		(14758 encoding space)
111  */
112 size_t
113 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
114 				char **outbuf, size_t *outbytesleft)
115 {
116 	int		plane_no, n, unidx;
117 	unsigned long	cnscode;
118 
119 #ifdef DEBUG
120     fprintf(stderr, "=== (Re-entry)   iconv(): Big-5 --> CNS 11643   ===\n");
121 #endif
122 	if (st == NULL) {
123 		errno = EBADF;
124 		return ((size_t) -1);
125 	}
126 
127 	if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
128 		st->cstate = C0;
129 		st->_errno = 0;
130 		return ((size_t) 0);
131 	}
132 
133 	st->_errno = 0;         /* reset internal errno */
134 	errno = 0;		/* reset external errno */
135 
136 	/* a state machine for interpreting Big-5 code */
137 	while (*inbytesleft > 0 && *outbytesleft > 0) {
138 		switch (st->cstate) {
139 		case C0:		/* assuming ASCII in the beginning */
140 			if (**inbuf & MSB) {
141 				st->keepc[0] = (**inbuf);
142 				st->cstate = C1;
143 			} else {	/* real ASCII */
144 				**outbuf = **inbuf;
145 				(*outbuf)++;
146 				(*outbytesleft)--;
147 			}
148 			break;
149 		case C1:		/* Chinese characters: 2nd byte */
150 			if (big5_2nd_byte(**inbuf) == 0) {
151 				st->keepc[1] = (**inbuf);
152 				plane_no = get_plane_no_by_big5(st->keepc[0],
153 					st->keepc[1], &unidx, &cnscode);
154 /* comment these lines, it is legal BIG5 character, but no corresponding CNS character
155 				if (plane_no < 0) {
156 					st->_errno = errno = EILSEQ;
157 					break;
158 				}
159 */
160 
161 				n = big5_to_cns(plane_no, unidx, cnscode,
162 					*outbuf, *outbytesleft);
163 				if (n > 0) {
164 					(*outbuf) += n;
165 					(*outbytesleft) -= n;
166 
167 					st->cstate = C0;
168 				} else {	/* don't reset state */
169 					st->_errno = errno = E2BIG;
170 				}
171 			} else {	/* input char doesn't belong
172 					 * to the input code set
173 					 */
174 				st->_errno = errno = EILSEQ;
175 			}
176 			break;
177 		default:			/* should never come here */
178 			st->_errno = errno = EILSEQ;
179 			st->cstate = C0;	/* reset state */
180 			break;
181 		}
182 
183 		if (st->_errno) {
184 #ifdef DEBUG
185     fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\n",
186 		st->_errno, st->cstate);
187 #endif
188 			break;
189 		}
190 
191 		(*inbuf)++;
192 		(*inbytesleft)--;
193 	}
194 
195         if (errno) return ((size_t) -1);
196 
197         if (*inbytesleft == 0 && st->cstate != C0) {
198                 errno = EINVAL;
199                 return ((size_t) -1);
200         }
201 
202 	if (*inbytesleft > 0 && *outbytesleft == 0) {
203 		errno = E2BIG;
204 		return((size_t)-1);
205 	}
206 	return (*inbytesleft);
207 }
208 
209 
210 /*
211  * Test whether inbuf is a valid character for 2nd byte Big-5 code
212  * Return: = 0 - valid Big-5 2nd byte
213  *         = 1 - invalid Big-5 2nd byte
214  */
215 static int big5_2nd_byte(char inbuf)
216 {
217 	unsigned int	buf = (unsigned int) (inbuf & ONEBYTE);
218 
219 	if ((buf >= 0x40) && (buf <= 0x7E))
220 		return (0);
221 	if ((buf >= 0xA1) && (buf <= 0xFE))
222 		return (0);
223 	return(1);
224 }
225 
226 
227 /*
228  * Get plane number by Big-5 code; i.e. plane #1 returns 1, #2 returns 2, etc.
229  * Returns -1 on error conditions
230  *
231  * Since binary search of the Big-5 to CNS table is necessary, might as well
232  * return index and CNS code matching to the unicode.
233  */
234 static int get_plane_no_by_big5(const char c1, const char c2,
235 			int *unidx, unsigned long *cnscode)
236 {
237 	int 		ret;
238 	unsigned long	big5code;
239 
240 	big5code = (unsigned long) ((c1 & ONEBYTE) << 8) + (c2 & ONEBYTE);
241 	*unidx = binsearch(big5code, big5_cns_tab, MAX_BIG5_NUM);
242 	if ((*unidx) >= 0)
243 		*cnscode = big5_cns_tab[*unidx].value;
244 	else
245 		return(0);	/* match from Big-5 to CNS not found */
246 #ifdef DEBUG
247     fprintf(stderr, "Big-5=%04x, idx=%5d, CNS=%x ", big5code, *unidx, *cnscode);
248 #endif
249 
250 	ret = (int) (*cnscode >> 16);
251 	switch (ret) {
252 	case 0x21:	/* 0x8EA1 - G */
253 	case 0x22:	/* 0x8EA2 - H */
254 	case 0x23:	/* 0x8EA3 - I */
255 	case 0x24:	/* 0x8EA4 - J */
256 	case 0x25:	/* 0x8EA5 - K */
257 	case 0x26:	/* 0x8EA6 - L */
258 	case 0x27:	/* 0x8EA7 - M */
259 	case 0x28:	/* 0x8EA8 - N */
260 	case 0x29:	/* 0x8EA9 - O */
261 	case 0x2a:	/* 0x8EAA - P */
262 	case 0x2b:	/* 0x8EAB - Q */
263 	case 0x2c:	/* 0x8EAC - R */
264 	case 0x2d:	/* 0x8EAD - S */
265 	case 0x2f:	/* 0x8EAF - U */
266 	case 0x30:	/* 0x8EB0 - V */
267 		return (ret - 0x20);	/* so that we can use GET_PLANEC() */
268 	case 0x2e:	/* 0x8EAE - T */
269 		return (3);		/* CNS 11643-1992 */
270 	default:
271 		return (-1);
272 	}
273 }
274 
275 
276 /*
277  * Big-5 code --> CNS 11643 (Chinese EUC)
278  * Return: > 0 - converted with enough space in output buffer
279  *         = 0 - no space in outbuf
280  */
281 static int big5_to_cns(int plane_no, int unidx, unsigned long cnscode,
282 						char *buf, size_t buflen)
283 {
284 	unsigned long	val;		/* CNS 11643 value */
285 	unsigned char c1 = '\0', c2 = '\0', cns_str[5];
286 	int		ret_size;	/* return buffer size */
287 
288 	if (unidx < 0) {	/* no match from UTF8 to CNS 11643 */
289 		if ( buflen < 2 ) goto err;
290 		*buf = *(buf+1) = NON_ID_CHAR;
291 		ret_size = 2;
292 	} else {
293 		val = cnscode & 0xffff;
294 		c1 = ((val & 0xff00) >> 8) | MSB;
295 		c2 = (val & 0xff) | MSB;
296 	}
297 
298 	switch (plane_no) {
299 	case 1:
300 		if ( buflen < 2 ) goto err;
301 		*buf = cns_str[0] = c1;
302 		*(buf+1) = cns_str[1] = c2;
303 		cns_str[2] = cns_str[3] = cns_str[4] = NULL;
304 		ret_size = 2;
305 		break;
306 	case 2:
307 	case 3:
308 	case 4:
309 	case 5:
310 	case 6:
311 	case 7:
312 	case 8:
313 	case 9:
314 	case 10:
315 	case 11:
316 	case 12:
317 	case 13:
318 	case 14:
319 	case 15:
320 	case 16:
321 		if ( buflen < 4 ) goto err;
322 		*(unsigned char*) buf = cns_str[0] = MBYTE;
323 		*(unsigned char*)(buf+1) = cns_str[1] = PMASK + plane_no;
324 		*(unsigned char*) (buf+2) = cns_str[2] = c1;
325 		*(unsigned char*) (buf+3) = cns_str[3] = c2;
326 		cns_str[4] = NULL;
327 		ret_size = 4;
328 		break;
329 	}
330 
331 #ifdef DEBUG
332     fprintf(stderr, "\t#%d ->%s<-\n", plane_no, cns_str);
333 #endif
334 
335 	return(ret_size);
336 
337 err:
338 	errno = E2BIG;
339 	return(0);
340 }
341 
342 
343 /* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
344 static int binsearch(unsigned long x, table_t v[], int n)
345 {
346 	int low, high, mid;
347 
348 	low = 0;
349 	high = n - 1;
350 	while (low <= high) {
351 		mid = (low + high) / 2;
352 		if (x < v[mid].key)
353 			high = mid - 1;
354 		else if (x > v[mid].key)
355 			low = mid + 1;
356 		else	/* found match */
357 			return mid;
358 	}
359 	return (-1);	/* no match */
360 }
361