xref: /titanic_51/usr/src/lib/iconv_modules/zh/common/zh_TW-iso2022-7%zh_TW-big5.c (revision 91e1e26ac6a73ce959289cf7d3d96c4baedbe0b8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1995, by Sun Microsystems, Inc.
24  * All rights reserved.
25  */
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <errno.h>
30 #include "cns11643_big5.h"   /* CNS 11643 to Big-5 mapping table */
31 
32 #define	MSB	0x80	/* most significant bit */
33 #define	MBYTE	0x8e	/* multi-byte (4 byte character) */
34 #define	PMASK	0xa0	/* plane number mask */
35 #define ONEBYTE	0xff	/* right most byte */
36 #define MSB_OFF	0x7f	/* mask off MBS */
37 
38 #define SI	0x0f	/* shift in */
39 #define SO	0x0e	/* shift out */
40 #define ESC	0x1b	/* escape */
41 
42 /*
43  * static const char plane_char[] = "0GH23456789:;<=>?";
44  * static const char plane_char[] = "0GHIJKLMNOPQRSTUV";
45  * #define	GET_PLANEC(i)	(plane_char[i])
46  */
47 
48 #define NON_ID_CHAR '_'	/* non-identified character */
49 
50 typedef struct _icv_state {
51 	char	keepc[4];	/* maximum # byte of CNS11643 code */
52 	short	cstate;		/* state machine id */
53 	int	plane_no;	/* plane number for Chinese character */
54 	int	_errno;		/* internal errno */
55 } _iconv_st;
56 
57 enum _CSTATE	{ C0, C1, C2, C3, C4, C5, C6, C7 };
58 
59 
60 static int get_plane_no_by_iso(const char);
61 static int iso_to_big5(int, char[], char*, size_t);
62 static int binsearch(unsigned long, table_t[], int);
63 
64 
65 /*
66  * Open; called from iconv_open()
67  */
68 void *
69 _icv_open()
70 {
71 	_iconv_st *st;
72 
73 	if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
74 		errno = ENOMEM;
75 		return ((void *) -1);
76 	}
77 
78 	st->cstate = C0;
79 	st->plane_no = 0;
80 	st->_errno = 0;
81 
82 #ifdef DEBUG
83     fprintf(stderr, "==========    iconv(): ISO2022-7 --> Big-5    ==========\n");
84 #endif
85 	return ((void *) st);
86 }
87 
88 
89 /*
90  * Close; called from iconv_close()
91  */
92 void
93 _icv_close(_iconv_st *st)
94 {
95 	if (!st)
96 		errno = EBADF;
97 	else
98 		free(st);
99 }
100 
101 
102 /*
103  * Actual conversion; called from iconv()
104  */
105 /*=========================================================================
106  *
107  *             State Machine for interpreting ISO 2022-7 code
108  *
109  *=========================================================================
110  *
111  *                                                        plane 2 - 16
112  *                                                    +---------->-------+
113  *                                    plane           ^                  |
114  *            ESC      $       )      number     SO   | plane 1          v
115  *    +-> C0 ----> C1 ---> C2 ---> C3 ------> C4 --> C5 -------> C6     C7
116  *    |   | ascii  | ascii | ascii |    ascii |   SI | |          |      |
117  *    +----------------------------+    <-----+------+ +------<---+------+
118  *    ^                                 |
119  *    |              ascii              v
120  *    +---------<-------------<---------+
121  *
122  *=========================================================================*/
123 size_t
124 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
125 				char **outbuf, size_t *outbytesleft)
126 {
127 	int		n;
128 
129 	if (st == NULL) {
130 		errno = EBADF;
131 		return ((size_t) -1);
132 	}
133 
134 	if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
135 		st->cstate = C0;
136 		st->_errno = 0;
137 		return ((size_t) 0);
138 	}
139 
140 #ifdef DEBUG
141     fprintf(stderr, "=== (Re-entry)   iconv(): ISO 2022-7 --> Big-5   ===\n");
142 #endif
143 	st->_errno = 0;         /* reset internal errno */
144 	errno = 0;		/* reset external errno */
145 
146 	/* a state machine for interpreting ISO 2022-7 code */
147 	while (*inbytesleft > 0 && *outbytesleft > 0) {
148 		switch (st->cstate) {
149 		case C0:		/* assuming ASCII in the beginning */
150 			if (**inbuf == ESC) {
151 				st->cstate = C1;
152 			} else {	/* real ASCII */
153 				**outbuf = **inbuf;
154 				(*outbuf)++;
155 				(*outbytesleft)--;
156 			}
157 			break;
158 		case C1:		/* got ESC, expecting $ */
159 			if (**inbuf == '$') {
160 				st->cstate = C2;
161 			} else {
162 				**outbuf = ESC;
163 				(*outbuf)++;
164 				(*outbytesleft)--;
165 				st->cstate = C0;
166 				st->_errno = 0;
167 				continue;	/* don't advance inbuf */
168 			}
169 			break;
170 		case C2:		/* got $, expecting ) */
171 			if (**inbuf == ')') {
172 				st->cstate = C3;
173 			} else {
174 				if (*outbytesleft < 2) {
175 					st->_errno = errno = E2BIG;
176 					return((size_t)-1);
177 				}
178 				**outbuf = ESC;
179 				*(*outbuf+1) = '$';
180 				(*outbuf) += 2;
181 				(*outbytesleft) -= 2;
182 				st->cstate = C0;
183 				st->_errno = 0;
184 				continue;	/* don't advance inbuf */
185 			}
186 			break;
187 		case C3:		/* got ) expecting G,H,I,...,V */
188 			st->plane_no = get_plane_no_by_iso(**inbuf);
189 			if (st->plane_no > 0 ) {	/* plane #1 - #16 */
190 				st->cstate = C4;
191 			} else {
192 				if (*outbytesleft < 3) {
193 					st->_errno = errno = E2BIG;
194 					return((size_t)-1);
195 				}
196 				**outbuf = ESC;
197 				*(*outbuf+1) = '$';
198 				*(*outbuf+2) = ')';
199 				(*outbuf) += 3;
200 				(*outbytesleft) -= 3;
201 				st->cstate = C0;
202 				st->_errno = 0;
203 				continue;	/* don't advance inbuf */
204 			}
205 			break;
206 		case C4:		/* SI (Shift In) */
207 			if (**inbuf == ESC) {
208 				st->cstate = C1;
209 				break;
210 			}
211 			if (**inbuf == SO) {
212 #ifdef DEBUG
213     fprintf(stderr, "<--------------  SO  -------------->\n");
214 #endif
215 				st->cstate = C5;
216 			} else {	/* ASCII */
217 				**outbuf = **inbuf;
218 				(*outbuf)++;
219 				(*outbytesleft)--;
220 				st->cstate = C0;
221 				st->_errno = 0;
222 			}
223 			break;
224 		case C5:		/* SO (Shift Out) */
225 			if (**inbuf == SI) {
226 #ifdef DEBUG
227     fprintf(stderr, ">--------------  SI  --------------<\n");
228 #endif
229 				st->cstate = C4;
230 			} else {	/* 1st Chinese character */
231 				if (st->plane_no == 1) {
232 					st->keepc[0] = (char) (**inbuf | MSB);
233 					st->cstate = C6;
234 				} else {	/* 4-bypte code: plane #2 - #16 */
235 					st->keepc[0] = (char) MBYTE;
236 					st->keepc[1] = (char) (PMASK +
237 								st->plane_no);
238 					st->keepc[2] = (char) (**inbuf | MSB);
239 					st->cstate = C7;
240 				}
241 			}
242 			break;
243 		case C6:		/* plane #1: 2nd Chinese character */
244 			st->keepc[1] = (char) (**inbuf | MSB);
245 			st->keepc[2] = st->keepc[3] = NULL;
246 			n = iso_to_big5(1, st->keepc, *outbuf, *outbytesleft);
247 			if (n > 0) {
248 				(*outbuf) += n;
249 				(*outbytesleft) -= n;
250 			} else {
251 				st->_errno = errno;
252 				return((size_t)-1);
253 			}
254 			st->cstate = C5;
255 			break;
256 		case C7:		/* 4th Chinese character */
257 			st->keepc[3] = (char) (**inbuf | MSB);
258 			n = iso_to_big5(st->plane_no, st->keepc, *outbuf,
259 					*outbytesleft);
260 			if (n > 0) {
261 				(*outbuf) += n;
262 				(*outbytesleft) -= n;
263 			} else {
264 				st->_errno = errno;
265 				return((size_t)-1);
266 			}
267 			st->cstate = C5;
268 			break;
269 		default:			/* should never come here */
270 			st->_errno = errno = EILSEQ;
271 			st->cstate = C0;	/* reset state */
272 			break;
273 		}
274 
275 		(*inbuf)++;
276 		(*inbytesleft)--;
277 
278 		if (st->_errno) {
279 #ifdef DEBUG
280     fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\tinbuf=%x\n",
281 		st->_errno, st->cstate, **inbuf);
282 #endif
283 			break;
284 		}
285 		if (errno)
286 			return((size_t)-1);
287 	}
288 
289 	if (*inbytesleft > 0 && *outbytesleft == 0) {
290 		errno = E2BIG;
291 		return((size_t)-1);
292 	}
293 	return (*inbytesleft);
294 }
295 
296 
297 /*
298  * Get plane number by ISO plane char; i.e. 'G' returns 1, 'H' returns 2, etc.
299  * Returns -1 on error conditions
300  */
301 static int get_plane_no_by_iso(const char inbuf)
302 {
303 	int ret;
304 	unsigned char uc = (unsigned char) inbuf;
305 
306 	if (uc == '0')	/* plane #0 */
307 		return(0);
308 
309 	ret = uc - 'F';
310 	switch (ret) {
311 	case 1:		/* 0x8EA1 - G */
312 	case 2:		/* 0x8EA2 - H */
313 	case 3:		/* 0x8EA3 - I */
314 	case 4:		/* 0x8EA4 - J */
315 	case 5:		/* 0x8EA5 - K */
316 	case 6:		/* 0x8EA6 - L */
317 	case 7:		/* 0x8EA7 - M */
318 	case 8:		/* 0x8EA8 - N */
319 	case 9:		/* 0x8EA9 - O */
320 	case 10:	/* 0x8EAA - P */
321 	case 11:	/* 0x8EAB - Q */
322 	case 12:	/* 0x8EAC - R */
323 	case 13:	/* 0x8EAD - S */
324 	case 14:	/* 0x8EAE - T */
325 	case 15:	/* 0x8EAF - U */
326 	case 16:	/* 0x8EB0 - V */
327 		return (ret);
328 	default:
329 		return (-1);
330 	}
331 }
332 
333 
334 /*
335  * ISO 2022-7 code --> Big-5 code
336  * Return: > 0 - converted with enough space in output buffer
337  *         = 0 - no space in outbuf
338  */
339 static int iso_to_big5(int plane_no, char keepc[], char *buf, size_t buflen)
340 {
341 	char		cns_str[3];
342 	unsigned long	cns_val;	/* MSB mask off CNS 11643 value */
343 	int		unidx;		/* binary search index */
344 	unsigned long	big5_val, val;	/* Big-5 code */
345 
346 #ifdef DEBUG
347     fprintf(stderr, "%s %d ", keepc, plane_no);
348 #endif
349 	if (plane_no == 1) {
350 		cns_str[0] = keepc[0] & MSB_OFF;
351 		cns_str[1] = keepc[1] & MSB_OFF;
352 	} else {
353 		cns_str[0] = keepc[2] & MSB_OFF;
354 		cns_str[1] = keepc[3] & MSB_OFF;
355 	}
356 	cns_val = (cns_str[0] << 8) + cns_str[1];
357 #ifdef DEBUG
358     fprintf(stderr, "%x\t", cns_val);
359 #endif
360 
361         if (buflen < 2) {
362                 errno = E2BIG;
363                 return(0);
364         }
365 
366 	switch (plane_no) {
367 	case 1:
368 		unidx = binsearch(cns_val, cns_big5_tab1, MAX_CNS1_NUM);
369 		if (unidx >= 0)
370 			big5_val = cns_big5_tab1[unidx].value;
371 		break;
372 	case 2:
373 		unidx = binsearch(cns_val, cns_big5_tab2, MAX_CNS2_NUM);
374 		if (unidx >= 0)
375 			big5_val = cns_big5_tab2[unidx].value;
376 		break;
377 	default:
378 		unidx = -1;	/* no mapping from CNS to Big-5 out of plane 1&2 */
379 		break;
380 	}
381 
382 #ifdef DEBUG
383     fprintf(stderr, "unidx = %d, big5code = %x\t", unidx, big5_val);
384 #endif
385 
386 	if (unidx < 0) {	/* no match from CNS to Big-5 */
387 		*buf = *(buf+1) = NON_ID_CHAR;
388 	} else {
389 		val = big5_val & 0xffff;
390 		*buf = (char) ((val & 0xff00) >> 8);
391 		*(buf+1) = (char) (val & 0xff);
392 	}
393 
394 #ifdef DEBUG
395     fprintf(stderr, "\t->%x %x<-\n", *buf, *(buf+1));
396 #endif
397 
398 	return(2);
399 }
400 
401 
402 /* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
403 static int binsearch(unsigned long x, table_t v[], int n)
404 {
405 	int low, high, mid;
406 
407 	low = 0;
408 	high = n - 1;
409 	while (low <= high) {
410 		mid = (low + high) / 2;
411 		if (x < v[mid].key)
412 			high = mid - 1;
413 		else if (x > v[mid].key)
414 			low = mid + 1;
415 		else	/* found match */
416 			return mid;
417 	}
418 	return (-1);	/* no match */
419 }
420