xref: /illumos-gate/usr/src/lib/iconv_modules/zh/common/zh_CN.iso2022-CN%zh_CN.euc.c (revision 16d8656330ae5622ec32e5007f62145ebafdc50f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright(c) 1998 Sun Microsystems, Inc.
23  * All rights reserved.
24  */
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <strings.h>
29 #include <errno.h>
30 #ifdef DEBUG
31 #include <sys/fcntl.h>
32 #include <sys/stat.h>
33 #endif
34 #include <cns11643_big5.h>	/* CNS 11643 to Big-5 mapping table */
35 #include <big5_gb2312.h>	/* Big-5 to GB mapping table */
36 
37 #define MSB 	0x80	/* most significant bit */
38 #define MBYTE	0x8e	/* multi-byte (4 byte character) */
39 #define PMASK	0xa0	/* plane number mask */
40 #define ONEBYTE 0xff	/* right most byte */
41 #define MSB_OFF 0x7f	/* mask off MBS */
42 
43 #define SI	0x0f		/* shift in */
44 #define SO	0x0e		/* shift out */
45 #define ESC 0x1b		/* escape */
46 #define SS2	0x4e		/* SS2 shift out */
47 #define SS3 0x4f		/* SS3 shift out */
48 #define NON_ID_CHAR_BYTE1	0xA1	/* non-identified character */
49 #define NON_ID_CHAR_BYTE2	0xF5	/* non-identified character */
50 
51 typedef struct _icv_state {
52 	char	_buf[10];
53 	size_t	_bufcont;
54 	char	_keepc[4];	/* maximum # byte of CNS11643 code */
55 	short	_gstate;		/* state machine id */
56 	short	_istate;		/* state for shift in/out */
57 	int		_plane;		/* plane number for Chinese character */
58 	int		_last_plane;	/* last charactor's plane # */
59 	int 	_errno;		/* internal errno */
60 } _iconv_st;
61 
62 int binsearch_big5_gb(unsigned int big5code);
63 
64 enum _GSTATE    { G0, G1, G2, G3, G4, G5, G6, G7, G8, G9, \
65 				  G10,G11,G12,G13,G14,G15,G16,G17,G18,G19, \
66 				  G20,G21,G22,G23,G24,G25,G26,G27,G28,G29 };
67 
68 enum _ISTATE	{ IN, OUT };
69 
70 
71 int iso_gb_to_gb(_iconv_st * st, char* buf, size_t buflen);
72 int iso_to_big5_to_gb(_iconv_st * st, char* buf, size_t buflen);
73 int binsearch(unsigned long x, table_t v[], int n);
74 int flush_buf(_iconv_st * st, char ** outbuf, size_t * outbytesleft);
75 
flush_buf(_iconv_st * st,char ** outbuf,size_t * outbytesleft)76 int flush_buf(_iconv_st * st, char ** outbuf, size_t * outbytesleft) {
77 	if (!st->_bufcont)
78 		return 0;
79 	if (st->_bufcont > *outbytesleft) {
80 		st->_errno = E2BIG;
81 		return -1;
82 	}
83 	if (st->_istate != IN) {
84 		st->_errno = EILSEQ;
85 		return -1;
86 	}
87 	strncpy(st->_buf, *outbuf, st->_bufcont);
88 	(*outbuf)+=(st->_bufcont);
89 	(*outbytesleft)-=(st->_bufcont);
90 	st->_bufcont = 0;
91 	return st->_bufcont;
92 }
93 
94 /*
95  * Open; called from iconv_open()
96  */
97 void *
_icv_open()98 _icv_open()
99 {
100 	_iconv_st *st;
101 
102 	if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
103 		errno = ENOMEM;
104 		return ((void *) -1);
105 	}
106 
107 	st->_gstate = G0;
108 	st->_istate = IN;
109 	st->_last_plane = st->_plane = -1;
110 	st->_errno = 0;
111 	st->_bufcont = 0;
112 
113 	return ((void *) st);
114 }
115 
116 /*
117  * Close; called from iconv_close()
118  */
119 void
_icv_close(_iconv_st * st)120 _icv_close(_iconv_st *st)
121 {
122 	if (st == NULL)
123 		errno = EBADF;
124 	else
125 		free(st);
126 }
127 
128 /*
129  * Actual conversion; called from iconv()
130  */
131 /*=========================================================================
132  *
133  *             State Machine for interpreting ISO 2022-7 code
134  *
135  *=========================================================================
136  *
137  *                                                        plane 2 - 16
138  *                                                    +---------->-------+
139  *                                    plane           ^                  |
140  *            ESC      $       )      number     SO   | plane 1          v
141  *    +-> G0 ----> G1 ---> G2 ---> G3 ------> G4 --> G5 -------> G6     G7
142  *    |   | ascii  | ascii | ascii |    ascii |   SI | |          |      |
143  *    +----------------------------+    <-----+------+ +------<---+------+
144  *    ^                                 |
145  *    |              ascii              v
146  *    +---------<-------------<---------+
147  *
148  *=========================================================================*/
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)149 size_t _icv_iconv(_iconv_st *st, \
150 					char **inbuf, size_t *inbytesleft, \
151 					char **outbuf, size_t *outbytesleft) {
152 	int		n;
153 	char	c;
154 
155 	if (st == NULL) {
156 		errno = EBADF;
157 		return ((size_t) -1);
158 	}
159 
160 	if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
161 		st->_gstate = G0;
162 		st->_istate = IN;
163 		st->_errno = 0;
164 		st->_plane = st->_last_plane = -1;
165 		return ((size_t) 0);
166 	}
167 
168 	errno = st->_errno = 0;	/* reset internal and external errno */
169 
170 	/* a state machine for interpreting ISO 2022-7 code */
171 	while (*inbytesleft > 0 && *outbytesleft > 0) {
172 		switch (st->_gstate) {
173 			case G0:		/* assuming ASCII in the beginning */
174 				if (**inbuf == ESC) {
175 					st->_gstate = G1;
176 					st->_buf[st->_bufcont++] = ESC;
177 				} else {	/* real ASCII */
178 					**outbuf = **inbuf;
179 					(*outbuf)++;
180 					(*outbytesleft)--;
181 				}
182 				break;
183 			case G1:		/* got ESC, expecting $ */
184 				if (**inbuf == '$') {
185 					st->_gstate = G2;
186 					st->_buf[st->_bufcont++] = '$';
187 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
188 					errno = st->_errno;
189 					return (size_t)-1;
190 				} else {
191 					st->_gstate = G0;
192 					st->_errno = 0;
193 					st->_istate = IN;
194 					continue;	/* don't advance inbuf */
195 				}
196 				break;
197 			case G2:		/* got $, expecting ) * or + */
198 				if (**inbuf == ')') {
199 					st->_gstate = G3;
200 				} else if (**inbuf == '*') {
201 					st->_gstate = G12;
202 					st->_plane = 2;
203 				} else if (**inbuf == '+') {
204 					st->_gstate = G19;
205 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
206 					errno = st->_errno;
207 					return (size_t)-1;
208 				} else {
209 					st->_gstate = G0;
210 					st->_errno = 0;
211 					st->_istate = IN;
212 					continue;	/* don't advance inbuf */
213 				}
214 				st->_buf[st->_bufcont++] = **inbuf;
215 				break;
216 			case G3:	/* got ) expecting A,G,H */
217 						/* H is for the bug of and zh_TW.BIG5 */
218 				if (**inbuf == 'A') {
219 					st->_plane = 0;
220 					st->_gstate = G4;
221 				} else if (**inbuf == 'G') {
222 					st->_plane = 1;
223 					st->_gstate = G8;
224 				} else if (**inbuf == 'H') {
225 					st->_plane = 2;
226 					st->_gstate = G8;
227 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
228 					errno = st->_errno;
229 					return (size_t)-1;
230 				} else {
231 					st->_gstate = G0;
232 					st->_errno = 0;
233 					st->_istate = IN;
234 					continue;
235 				}
236 				st->_buf[st->_bufcont++] = **inbuf;
237 				break;
238 		case G4:	/* ESC $ ) A got, and SO is expected */
239 				if (**inbuf == SO) {
240 					st->_gstate = G5;
241 					st->_istate = OUT;
242 					st->_bufcont = 0;
243 					st->_last_plane = st->_plane;
244 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
245 					errno = st->_errno;
246 					return (size_t)-1;
247 				} else {
248 					st->_gstate = G0;
249 					st->_errno = 0;
250 					st->_istate = IN;
251 					st->_plane = st->_last_plane;
252 					continue;
253 				}
254 				break;
255 		case G5:	/* SO (Shift Out) */
256 				if (**inbuf == SI) {
257 					st->_istate = IN;
258 				st->_gstate = G7;
259 					st->_last_plane = st->_plane;
260 				} else if (**inbuf == ESC) {
261 					st->_bufcont = 0;
262 					st->_gstate = G0;
263 					continue;
264 				} else {	/* Chinese Charactors */
265 					st->_keepc[0] = **inbuf;
266 					st->_gstate = G6;
267 				}
268 				break;
269 		case G6:	/* GB2312: 2nd Chinese character */
270 				st->_keepc[1] = **inbuf;
271 				n = iso_gb_to_gb(st, *outbuf, *outbytesleft);
272 				if (n > 0) {
273 					(*outbuf) += n;
274 					(*outbytesleft) -= n;
275 				} else {
276 					errno = st->_errno;
277 					return (size_t)-1;
278 				}
279 				st->_gstate = G5;
280 				break;
281 			case G7:	/* Shift in */
282 				if (**inbuf == SO) {
283 					st->_gstate = G5;
284 					st->_istate = OUT;
285 					st->_last_plane = st->_plane;
286 					st->_bufcont = 0;
287 				} else if (**inbuf == ESC) {
288 					st->_gstate = G0;
289 					continue;
290 				} else {
291 					**outbuf = **inbuf;
292 					(*outbuf)++;
293 					(*outbytesleft) --;
294 				}
295 				break;
296 		case G8:	/* BIG5: Chinese character */
297 				if (**inbuf == SO) {
298 					st->_istate = OUT;
299 					st->_gstate = G9;
300 					st->_bufcont = 0;
301 					st->_last_plane = st->_plane;
302 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
303 					errno = st->_errno;
304 					return (size_t)-1;
305 				} else {
306 					st->_gstate = G0;
307 					st->_errno = 0;
308 					st->_plane = st->_last_plane;
309 					st->_istate = IN;
310 					continue;
311 				}
312 				break;
313 		case G9:
314 				if (**inbuf == SI) {
315 					st->_istate = IN;
316 					st->_gstate = G11;
317 					st->_last_plane = st->_plane;
318 				} else if (**inbuf == ESC) {
319 					if (flush_buf(st, outbuf, outbytesleft) == -1) {
320 						errno = st->_errno;
321 						return (size_t)-1;
322 					}
323 					st->_gstate = G0;
324 					continue;
325 				} else {	/* Chinese Charactor */
326 					st->_keepc[0] = **inbuf;
327 					st->_gstate = G10;
328 				}
329 				break;
330 			case G10:
331 				st->_keepc[1] = **inbuf;
332 				n = iso_to_big5_to_gb(st, *outbuf, *outbytesleft);
333 				if (n > 0) {
334 					(*outbuf) += n;
335 					(*outbytesleft) -= n;
336 				} else {
337 					errno = st->_errno;
338 					return (size_t)-1;
339 				}
340 				st->_gstate = G9;
341 				break;
342 			case G11:
343 				st->_bufcont = 0;
344 				if (**inbuf == SO) {
345 					st->_istate = OUT;
346 					st->_gstate = G9;
347 				} else if (**inbuf == ESC) {
348 					st->_gstate = G0;
349 					continue;
350 				} else {
351 					**outbuf = **inbuf;
352 					(*outbuf)++;
353 					(*outbytesleft)--;
354 				}
355 				break;
356 			case G12:
357 				if (**inbuf == 'H') {
358 					st->_buf[st->_bufcont++] = 'H';
359 					st->_gstate = G13;
360 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
361 					errno = st->_errno;
362 					return (size_t)-1;
363 				} else {
364 					st->_istate = IN;
365 					st->_plane = st->_last_plane;
366 					st->_gstate = G0;
367 					continue;
368 				}
369 				break;
370 			case G13:
371 				if (**inbuf == ESC) {
372 					st->_buf[st->_bufcont++] = **inbuf;
373 					st->_gstate = G14;
374 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
375 					errno = st->_errno;
376 					return (size_t)-1;
377 				} else {
378 					st->_gstate = G0;
379 					st->_istate = IN;
380 					st->_plane = st->_last_plane;
381 					continue;
382 				}
383 				break;
384 			case G14:
385 				if (**inbuf == SS2) {
386 					st->_istate = OUT;
387 					st->_gstate = G15;
388 					st->_bufcont = 0;
389 					st->_last_plane = st->_plane = 2;
390 				} else if (**inbuf == '$') {
391 					st->_bufcont --;
392 					if (flush_buf(st, outbuf, outbytesleft) == -1) {
393 						errno = st->_errno;
394 						return (size_t)-1;
395 					} else {
396 						st->_gstate = G1;
397 						st->_plane = st->_last_plane;
398 						st->_istate = IN;
399 						continue;
400 					}
401 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
402 					errno = st->_errno;
403 					return (size_t)-1;
404 				} else {
405 					st->_gstate = G0;
406 					st->_istate = IN;
407 					st->_plane = st->_last_plane;
408 					continue;
409 				}
410 				break;
411 			case G15:
412 				if (**inbuf == SI) {
413 					st->_gstate = G16;
414 					st->_istate = IN;
415 					st->_last_plane = st->_plane;
416 				} else if (**inbuf == ESC) {
417 					st->_bufcont = 0;
418 					st->_gstate = G0;
419 					continue;
420 				} else {
421 					st->_keepc[0] = **inbuf;
422 					st->_gstate = G18;
423 				}
424 				break;
425 			case G16:
426 				if (**inbuf == ESC) {
427 					st->_gstate = G17;
428 					st->_buf[st->_bufcont++] = ESC;
429 				} else {
430 					**outbuf = **inbuf;
431 					(*outbuf) ++;
432 					(*outbytesleft) --;
433 					st->_bufcont = 0;
434 				}
435 				break;
436 			case G17:
437 				if (**inbuf == '$') {
438 					st->_gstate = G1;
439 					st->_buf[st->_bufcont++] = '$';
440 					continue;
441 				} else if (**inbuf == SS2) {
442 					st->_bufcont = 0;
443 					st->_gstate = G15;
444 					st->_istate = OUT;
445 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
446 					errno = st->_errno;
447 					return (size_t)-1;
448 				} else {
449 					st->_gstate = G16;
450 					st->_istate = IN;
451 				}
452 				break;
453 			case G18:
454 				st->_keepc[1] = **inbuf;
455 				st->_gstate = G0;
456 				if ((n = iso_to_big5_to_gb(st, \
457 											*outbuf, \
458 											*outbytesleft)) > 0) {
459 					(*outbuf)+=n;
460 					(*outbytesleft)-=n;
461 				} else {
462 					errno = st->_errno;
463 					return (size_t)-1;
464 				}
465 				break;
466 			case G19:	/* Plane #: 3 - 16 */
467 				c = **inbuf;
468 				if				(c == 'I' || \
469 								c == 'J' || \
470 								c == 'K' || \
471 								c == 'L' || \
472 								c == 'M' || \
473 								c == 'N' || \
474 								c == 'O' || \
475 								c == 'P' || \
476 								c == 'Q' || \
477 								c == 'R' || \
478 								c == 'S' || \
479 								c == 'T' || \
480 								c == 'U' || \
481 								c == 'V') {
482 					st->_plane = c - 'I' + 3;
483 					st->_gstate = G20;
484 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
485 					errno = st->_errno;
486 					return (size_t)-1;
487 				} else {
488 					st->_gstate = G0;
489 					st->_errno = 0;
490 					st->_istate = IN;
491 					st->_plane = st->_last_plane;
492 					continue;
493 				}
494 				st->_buf[st->_bufcont++] = c;
495 				break;
496 			case G20:
497 				if (**inbuf == ESC) {
498 					st->_buf[st->_bufcont++] = **inbuf;
499 					st->_gstate = G21;
500 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
501 					errno = st->_errno;
502 					return (size_t)-1;
503 				} else {
504 					st->_gstate = G0;
505 					st->_istate = IN;
506 					st->_last_plane = st->_plane;
507 					continue;
508 				}
509 				break;
510 			case G21:
511 				if (**inbuf == SS3) {
512 					st->_istate = OUT;
513 					st->_gstate = G22;
514 					st->_bufcont = 0;
515 				} else if (**inbuf == '$') {
516 					st->_bufcont --;
517 					if (flush_buf(st, outbuf, outbytesleft) == -1) {
518 						errno = st->_errno;
519 						return (size_t)-1;
520 					} else {
521 						st->_istate = IN;
522 						st->_last_plane = st->_plane;
523 						st->_gstate = G1;
524 						continue;
525 					}
526 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
527 					errno = st->_errno;
528 					return (size_t)-1;
529 				} else {
530 					st->_gstate = G0;
531 					st->_istate = IN;
532 					st->_last_plane = st->_plane;
533 					continue;
534 				}
535 				break;
536 			case G22:
537 				if (**inbuf == SI) {
538 					st->_istate = IN;
539 					st->_gstate = G24;
540 					st->_last_plane = st->_plane;
541 				} else {
542 					st->_keepc[0] = (char)MBYTE;
543 					st->_keepc[1] = (char)(PMASK + st->_plane);
544 					st->_keepc[2] = **inbuf;
545 					st->_gstate = G23;
546 				}
547 				break;
548 			case G23:
549 				st->_keepc[3] = **inbuf;
550 				if ((n = iso_to_big5_to_gb(st, \
551 											*outbuf, \
552 											*outbytesleft)) > 0) {
553 					(*outbuf)+=n;
554 					(*outbytesleft-=n);
555 				} else {
556 					st->_errno = errno;
557 					return (size_t)-1;
558 				}
559 				st->_gstate = G22;
560 				break;
561 			case G24:
562 				if (**inbuf == ESC) {
563 					st->_gstate = G25;
564 					st->_buf[st->_bufcont++] = ESC;
565 				} else {
566 					**outbuf = **inbuf;
567 					(*outbuf)++;
568 					(*outbytesleft)--;
569 					st->_bufcont = 0;
570 				}
571 				break;
572 			case G25:
573 				if (**inbuf == '$') {
574 					st->_gstate = G1;
575 					continue;
576 				} else if (**inbuf == SS3) {
577 					st->_gstate = G22;
578 					st->_bufcont = 0;
579 					st->_istate = OUT;
580 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
581 					errno = st->_errno;
582 					return (size_t)-1;
583 				} else {
584 					st->_gstate = G24;
585 					st->_istate = IN;
586 				}
587 				break;
588 			default:			/* should never come here */
589 				st->_errno = errno = EILSEQ;
590 				st->_gstate = G0;	/* reset state */
591 				break;
592 		}	/* end of switch */
593 
594 		(*inbuf)++;
595 		(*inbytesleft)--;
596 
597 		if (st->_errno) {
598 			break;
599 		}
600 		if (errno)
601 			return(-1);
602 	}
603 
604 	if (*inbytesleft > 0 && *outbytesleft == 0) {
605 		errno = E2BIG;
606 		return((size_t)(-1));
607 	}
608 	return ((size_t)(*inbytesleft));
609 }
610 
iso_gb_to_gb(_iconv_st * st,char * buf,size_t buflen)611 int iso_gb_to_gb(_iconv_st * st, char* buf, size_t buflen) {
612 	if ( buflen < 2 ) {
613 		st->_errno = E2BIG;
614 	    return -1;
615 	}
616 	*buf = st->_keepc[0] | MSB;
617 	*(buf+1) = st->_keepc[1] | MSB;
618 	return 2;
619 }
620 
621 /*
622  * ISO 2022-7 code --> Big-5 code
623  * Return: > 0 - converted with enough space in output buffer
624  *         = 0 - no space in outbuf
625  */
iso_to_big5_to_gb(_iconv_st * st,char * buf,size_t buflen)626 int iso_to_big5_to_gb(_iconv_st * st, char* buf, size_t buflen) {
627 	char		cns_str[3], c1, c2;
628 	unsigned long	cns_val;	/* MSB mask off CNS 11643 value */
629 	int		unidx;		/* binary search index */
630 	unsigned long	big5_val, val;	/* Big-5 code */
631 	int idx;
632 
633 	if (st->_plane == 1) {
634 		cns_str[0] = st->_keepc[0] & MSB_OFF;
635 		cns_str[1] = st->_keepc[1] & MSB_OFF;
636 	} else {
637 		cns_str[0] = st->_keepc[0] & MSB_OFF;
638 		cns_str[1] = st->_keepc[1] & MSB_OFF;
639 	}
640 	cns_val = (cns_str[0] << 8) + cns_str[1];
641 
642 	if (buflen < 2) {
643 		errno = E2BIG;
644 		return(0);
645 	}
646 
647 	switch (st->_plane) {
648 		case 1:
649 			unidx = binsearch(cns_val, cns_big5_tab1, MAX_CNS1_NUM);
650 			if (unidx >= 0)
651 				big5_val = cns_big5_tab1[unidx].value;
652 			break;
653 		case 2:
654 			unidx = binsearch(cns_val, cns_big5_tab2, MAX_CNS2_NUM);
655 			if (unidx >= 0)
656 				big5_val = cns_big5_tab2[unidx].value;
657 			break;
658 		default:
659 			unidx = -1;	/* no mapping from CNS to Big-5 out of plane 1&2 */
660 			break;
661 	}
662 
663 
664 	if (unidx < 0) {	/* no match from CNS to Big-5 */
665 		*buf     = NON_ID_CHAR_BYTE1;
666 		*(buf+1) = NON_ID_CHAR_BYTE2;
667 	} else {
668 		val = big5_val & 0xffff;
669 		*buf = c1 = (char) ((val & 0xff00) >> 8);
670 		*(buf+1) = c2 = (char) (val & 0xff);
671 	}
672 
673 
674 	if (unidx < 0) {
675 		return 2;
676 	} else {
677 		idx = binsearch_big5_gb((((*buf) & ONEBYTE) << 8) | ((*(buf+1)) & ONEBYTE));
678 		if (idx < 0) {
679 			*buf     = NON_ID_CHAR_BYTE1;
680 			*(buf+1) = NON_ID_CHAR_BYTE2;
681 		} else {
682 			*buf     = (big5_gb_tab[idx].value >> 8) & ONEBYTE;
683 			*(buf+1) = big5_gb_tab[idx].value & ONEBYTE;
684 		}
685 	}
686 
687 	return(2);
688 }
689 
690 /* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
binsearch(unsigned long x,table_t v[],int n)691 int binsearch(unsigned long x, table_t v[], int n)
692 {
693 	int low, high, mid;
694 
695 	low = 0;
696 	high = n - 1;
697 	while (low <= high) {
698 		mid = (low + high) / 2;
699 		if (x < v[mid].key)
700 			high = mid - 1;
701 		else if (x > v[mid].key)
702 			low = mid + 1;
703 		else	/* found match */
704 			return mid;
705 	}
706 	return (-1);	/* no match */
707 }
708 
binsearch_big5_gb(unsigned int big5code)709 int binsearch_big5_gb(unsigned int big5code)
710 {
711 	int low, high, mid;
712 
713 	low = 0;
714 	high = BIG5MAX - 1;
715 	while (low <= high) {
716 		mid = (low + high) / 2;
717 		if (big5code < big5_gb_tab[mid].key)
718 			high = mid - 1;
719 		else if (big5code > big5_gb_tab[mid].key)
720 			low = mid + 1;
721 		else	/* found match */
722 			return mid;
723 	}
724 	return (-1);	/* no match */
725 }
726 
727 int
iso_to_gb(char in_byte1,char in_byte2,char * buf,int buflen)728 iso_to_gb(char in_byte1, char in_byte2, char *buf, int buflen)
729 {
730 	if ( buflen < 2 )
731 	    return 0;
732 	*buf = in_byte1 | MSB;
733 	*(buf+1) = in_byte2 | MSB;
734 	return 2;
735 }
736 
737 
738 /*
739  * ==================================================================
740  * enconv functions
741  * ==================================================================
742  */
743 
744 typedef struct _enconv_st {
745 	char	_lastc;
746 	short	_gstate;
747 } _enconv_st;
748 
749 /*
750  * Open; called from enconv_open()
751  */
752 void *
_cv_open()753 _cv_open()
754 {
755 	_enconv_st *st;
756 
757 	if ((st = (_enconv_st *)malloc(sizeof(_enconv_st))) == NULL) {
758 		return ((void *) -1);
759 	}
760 
761 	st->_gstate = G0;
762 	return ((void *)st);
763 }
764 
765 
766 /*
767  * Close; called from enconv_close()
768  */
769 void
_cv_close(_enconv_st * st)770 _cv_close(_enconv_st *st)
771 {
772 	if (st != NULL)
773 		free(st);
774 }
775 
776 
777 /*
778  * Actual conversion; called from enconv()
779  */
780 /*=======================================================================
781  *
782  *         ESC        $       )        A        SO        1st C
783  * +-> G0 -----> G1 ----> G2 ----> G3 ----> G4 -----> G5 ---------> G6
784  * |   | ascii   | ascii  | ascii  |   |ascii|     SI | |     2nd C |
785  * +-------------------------------+   +-<---+--------+ +-<---------+
786  *=======================================================================*/
787 size_t
_cv_enconv(_enconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)788 _cv_enconv(_enconv_st *st, char **inbuf, size_t*inbytesleft,
789 			char **outbuf, size_t*outbytesleft)
790 {
791 	int	n;
792 
793 	if (st == NULL) {
794 		return -1;
795 	}
796 	if (inbuf == NULL || *inbuf == NULL) { /* Reset request */
797 		st->_gstate = G0;
798 		return 0;
799 	}
800 
801 	while (*inbytesleft > 0 && *outbytesleft > 0) {
802 	    switch (st->_gstate) {
803 	    case G0:
804 		if ( **inbuf == ESC ) {
805 		    st->_gstate = G1;
806 		} else {		/* ASCII */
807 		    **outbuf = **inbuf;
808 		    (*outbuf)++, (*outbytesleft)--;
809 		}
810 		break;
811 	    case G1:
812 		if ( **inbuf == '$' ) {
813 		    st->_gstate = G2;
814 		} else {
815 		    **outbuf = ESC;
816 		    (*outbuf)++, (*outbytesleft)--;
817 		    st->_gstate = G0;
818 		    continue;
819 		}
820 		break;
821 	    case G2:
822 		if ( **inbuf == ')' ) {
823 		    st->_gstate = G3;
824 		} else {
825 		    if (*outbytesleft < 2) {
826 			return (*inbytesleft);
827 		    }
828 		    **outbuf = ESC;
829 		    *(*outbuf+1) = '$';
830 		    (*outbuf) += 2, (*outbytesleft) -= 2;
831 		    st->_gstate = G0;
832 		    continue;
833 		}
834 		break;
835 	    case G3:
836 		if ( **inbuf == 'A' ) {
837 		    st->_gstate = G4;
838 		} else {
839 		    if (*outbytesleft < 3) {
840 			return (*inbytesleft);
841 		    }
842 		    **outbuf = ESC;
843 		    *(*outbuf+1) = '$';
844 		    *(*outbuf+2) = ')';
845 		    (*outbuf) += 3, (*outbytesleft) -= 3;
846 		    st->_gstate = G0;
847 		    continue;
848 		}
849 		break;
850 	    case G4:
851 		if ( **inbuf == SO ) {
852 		    st->_gstate = G5;
853 		} else {
854 		    **outbuf = **inbuf;
855 		    (*outbuf)++, (*outbytesleft)--;
856 		}
857 		break;
858 	    case G5:
859 		if ( **inbuf == SI ) {
860 		    st->_gstate = G4;
861 		} else {
862 		    st->_lastc = **inbuf;
863 		    st->_gstate = G6;
864 		}
865 		break;
866 	    case G6:
867 		n = iso_to_gb(st->_lastc, **inbuf, *outbuf, *outbytesleft);
868 		if (n > 0) {
869 		    (*outbuf) += n, (*outbytesleft) -= n;
870 		} else {
871 		    return (*inbytesleft);
872 		}
873 		st->_gstate = G5;
874 		break;
875 	    }
876 
877 	    (*inbuf)++, (*inbytesleft)--;
878 	}
879 
880 	return (*inbytesleft);
881 }
882 
883 #ifdef DEBUG
main(int argc,char ** argv)884 main(int argc, char ** argv) {
885 	char *inbuf, *outbuf, *in_tmp, *out_tmp;
886 	size_t inbytesleft, outbytesleft;
887 	int fd;
888 	int i;
889 	struct stat s;
890 	_iconv_st * st;
891 	if (argc < 2) {
892 		fprintf(stderr, "Usage: %s input\n", argv[0]);
893 		exit(-1);
894 	}
895 	if ((fd = open(argv[1], O_RDONLY)) == -1) {
896 		perror("open");
897 		exit(-2);
898 	}
899 	if (fstat(fd, &s) == -1) {
900 		perror("stat");
901 		exit(-3);
902 	}
903 	inbytesleft = outbytesleft = s.st_size;
904 	in_tmp = inbuf = (char *)malloc(inbytesleft);
905 	out_tmp = outbuf = (char *)malloc(outbytesleft);
906 	if (!inbuf || !outbuf) {
907 		perror("malloc");
908 		exit(-1);
909 	}
910 	if (read(fd, inbuf, inbytesleft) != inbytesleft) {
911 		perror("read");
912 		exit(-4);
913 	}
914 	for (i = 0; i < inbytesleft; i++)
915 		fprintf(stderr, "%x\t", *(inbuf+i));
916 	fprintf(stderr, "\n");
917 	st = (_iconv_st *)_icv_open();
918 	if (st == (_iconv_st *) -1) {
919 		perror("_icv_open");
920 		exit(-1);
921 	}
922 	if (_icv_iconv(st, \
923 				&inbuf, &inbytesleft, \
924 				&outbuf, &outbytesleft) == -1) {
925 		perror("icv_iconv");
926 		fprintf(stderr, "\ninbytesleft = %d\n", inbytesleft);
927 		exit(-2);
928 	}
929 	if (write(1, out_tmp, s.st_size - outbytesleft) == -1) {
930 		perror("write");
931 		exit(-1);
932 	}
933 	free(in_tmp);
934 	free(out_tmp);
935 	close(fd);
936 	_icv_close(st);
937 }
938 #endif
939