xref: /illumos-gate/usr/src/lib/iconv_modules/zh/common/zh_CN.iso2022-CN%zh_CN.gbk.c (revision 16d8656330ae5622ec32e5007f62145ebafdc50f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1995, by Sun Microsystems, Inc.
23  * All rights reserved.
24  */
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <strings.h>
29 #include <errno.h>
30 #ifdef DEBUG
31 #include <sys/fcntl.h>
32 #include <sys/stat.h>
33 #endif
34 #include <cns11643_big5.h>	/* CNS 11643 to Big-5 mapping table */
35 #include <big5_gb18030.h>	/* Big-5 to GBK mapping table */
36 
37 #define MSB 	0x80	/* most significant bit */
38 #define MBYTE	0x8e	/* multi-byte (4 byte character) */
39 #define PMASK	0xa0	/* plane number mask */
40 #define ONEBYTE 0xff	/* right most byte */
41 #define MSB_OFF 0x7f	/* mask off MBS */
42 
43 #define SI	0x0f		/* shift in */
44 #define SO	0x0e		/* shift out */
45 #define ESC 0x1b		/* escape */
46 #define SS2	0x4e		/* SS2 shift out */
47 #define SS3 0x4f		/* SS3 shift out */
48 #define NON_ID_CHAR_BYTE1	0xA1	/* non-identified character */
49 #define NON_ID_CHAR_BYTE2	0xF5	/* non-identified character */
50 
51 typedef struct _icv_state {
52 	char	_buf[10];
53 	size_t	_bufcont;
54 	char	_keepc[4];	/* maximum # byte of CNS11643 code */
55 	short	_gstate;		/* state machine id */
56 	short	_istate;		/* state for shift in/out */
57 	int		_plane;		/* plane number for Chinese character */
58 	int		_last_plane;	/* last charactor's plane # */
59 	int 	_errno;		/* internal errno */
60 } _iconv_st;
61 
62 enum _GSTATE    { G0, G1, G2, G3, G4, G5, G6, G7, G8, G9, \
63 				  G10,G11,G12,G13,G14,G15,G16,G17,G18,G19, \
64 				  G20,G21,G22,G23,G24,G25,G26,G27,G28,G29 };
65 
66 enum _ISTATE	{ IN, OUT };
67 
68 
69 int iso_gb_to_gbk(_iconv_st * st, char* buf, size_t buflen);
70 int iso_to_big5_to_gbk(_iconv_st * st, char* buf, size_t buflen);
71 int binsearch(unsigned long x, table_t v[], int n);
72 int binsearch_big5_gbk(unsigned int big5code);
73 int flush_buf(_iconv_st * st, char ** outbuf, size_t * outbytesleft);
74 
flush_buf(_iconv_st * st,char ** outbuf,size_t * outbytesleft)75 int flush_buf(_iconv_st * st, char ** outbuf, size_t * outbytesleft) {
76 	if (!st->_bufcont)
77 		return 0;
78 	if (st->_bufcont > *outbytesleft) {
79 		st->_errno = E2BIG;
80 		return -1;
81 	}
82 	if (st->_istate != IN) {
83 		st->_errno = EILSEQ;
84 		return -1;
85 	}
86 	strncpy(st->_buf, *outbuf, st->_bufcont);
87 	(*outbuf)+=(st->_bufcont);
88 	(*outbytesleft)-=(st->_bufcont);
89 	st->_bufcont = 0;
90 	return st->_bufcont;
91 }
92 
93 /*
94  * Open; called from iconv_open()
95  */
96 void *
_icv_open()97 _icv_open()
98 {
99 	_iconv_st *st;
100 
101 	if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
102 		errno = ENOMEM;
103 		return ((void *) -1);
104 	}
105 
106 	st->_gstate = G0;
107 	st->_istate = IN;
108 	st->_last_plane = st->_plane = -1;
109 	st->_errno = 0;
110 	st->_bufcont = 0;
111 
112 	return ((void *) st);
113 }
114 
115 /*
116  * Close; called from iconv_close()
117  */
118 void
_icv_close(_iconv_st * st)119 _icv_close(_iconv_st *st)
120 {
121 	if (st == NULL)
122 		errno = EBADF;
123 	else
124 		free(st);
125 }
126 
127 /*
128  * Actual conversion; called from iconv()
129  */
130 /*=========================================================================
131  *
132  *             State Machine for interpreting ISO 2022-7 code
133  *
134  *=========================================================================
135  *
136  *                                                        plane 2 - 16
137  *                                                    +---------->-------+
138  *                                    plane           ^                  |
139  *            ESC      $       )      number     SO   | plane 1          v
140  *    +-> G0 ----> G1 ---> G2 ---> G3 ------> G4 --> G5 -------> G6     G7
141  *    |   | ascii  | ascii | ascii |    ascii |   SI | |          |      |
142  *    +----------------------------+    <-----+------+ +------<---+------+
143  *    ^                                 |
144  *    |              ascii              v
145  *    +---------<-------------<---------+
146  *
147  *=========================================================================*/
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)148 size_t _icv_iconv(_iconv_st *st, \
149 					char **inbuf, size_t *inbytesleft, \
150 					char **outbuf, size_t *outbytesleft) {
151 	int		n;
152 	char	c;
153 
154 	if (st == NULL) {
155 		errno = EBADF;
156 		return ((size_t) -1);
157 	}
158 
159 	if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
160 		st->_gstate = G0;
161 		st->_istate = IN;
162 		st->_errno = 0;
163 		st->_plane = st->_last_plane = -1;
164 		return ((size_t) 0);
165 	}
166 
167 	errno = st->_errno = 0;	/* reset internal and external errno */
168 
169 	/* a state machine for interpreting ISO 2022-7 code */
170 	while (*inbytesleft > 0 && *outbytesleft > 0) {
171 		switch (st->_gstate) {
172 			case G0:		/* assuming ASCII in the beginning */
173 				if (**inbuf == ESC) {
174 					st->_gstate = G1;
175 					st->_buf[st->_bufcont++] = ESC;
176 				} else {	/* real ASCII */
177 					**outbuf = **inbuf;
178 					(*outbuf)++;
179 					(*outbytesleft)--;
180 				}
181 				break;
182 			case G1:		/* got ESC, expecting $ */
183 				if (**inbuf == '$') {
184 					st->_gstate = G2;
185 					st->_buf[st->_bufcont++] = '$';
186 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
187 					errno = st->_errno;
188 					return (size_t)-1;
189 				} else {
190 					st->_gstate = G0;
191 					st->_errno = 0;
192 					st->_istate = IN;
193 					continue;	/* don't advance inbuf */
194 				}
195 				break;
196 			case G2:		/* got $, expecting ) * or + */
197 				if (**inbuf == ')') {
198 					st->_gstate = G3;
199 				} else if (**inbuf == '*') {
200 					st->_gstate = G12;
201 					st->_plane = 2;
202 				} else if (**inbuf == '+') {
203 					st->_gstate = G19;
204 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
205 					errno = st->_errno;
206 					return (size_t)-1;
207 				} else {
208 					st->_gstate = G0;
209 					st->_errno = 0;
210 					st->_istate = IN;
211 					continue;	/* don't advance inbuf */
212 				}
213 				st->_buf[st->_bufcont++] = **inbuf;
214 				break;
215 			case G3:	/* got ) expecting A,G,H */
216 						/* H is for the bug of and zh_TW.BIG5 */
217 				if (**inbuf == 'A') {
218 					st->_plane = 0;
219 					st->_gstate = G4;
220 				} else if (**inbuf == 'G') {
221 					st->_plane = 1;
222 					st->_gstate = G8;
223 				} else if (**inbuf == 'H') {
224 					st->_plane = 2;
225 					st->_gstate = G8;
226 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
227 					errno = st->_errno;
228 					return (size_t)-1;
229 				} else {
230 					st->_gstate = G0;
231 					st->_errno = 0;
232 					st->_istate = IN;
233 					continue;
234 				}
235 				st->_buf[st->_bufcont++] = **inbuf;
236 				break;
237 		case G4:	/* ESC $ ) A got, and SO is expected */
238 				if (**inbuf == SO) {
239 					st->_gstate = G5;
240 					st->_istate = OUT;
241 					st->_bufcont = 0;
242 					st->_last_plane = st->_plane;
243 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
244 					errno = st->_errno;
245 					return (size_t)-1;
246 				} else {
247 					st->_gstate = G0;
248 					st->_errno = 0;
249 					st->_istate = IN;
250 					st->_plane = st->_last_plane;
251 					continue;
252 				}
253 				break;
254 		case G5:	/* SO (Shift Out) */
255 				if (**inbuf == SI) {
256 					st->_istate = IN;
257 				st->_gstate = G7;
258 					st->_last_plane = st->_plane;
259 				} else if (**inbuf == ESC) {
260 /*
261 				&& *((*inbuf) + 1) == '$') {
262 					if (flush_buf(st, outbuf, outbytesleft) == -1) {
263 						errno = st->_errno;
264 						return (size_t)-1;
265 					}
266  */
267 					st->_bufcont = 0;
268 					st->_gstate = G0;
269 					continue;
270 				} else {	/* Chinese Charactors */
271 					st->_keepc[0] = **inbuf;
272 					st->_gstate = G6;
273 				}
274 				break;
275 		case G6:	/* GB2312: 2nd Chinese character */
276 				st->_keepc[1] = **inbuf;
277 				n = iso_gb_to_gbk(st, *outbuf, *outbytesleft);
278 				if (n > 0) {
279 					(*outbuf) += n;
280 					(*outbytesleft) -= n;
281 				} else {
282 					errno = st->_errno;
283 					return (size_t)-1;
284 				}
285 				st->_gstate = G5;
286 				break;
287 			case G7:	/* Shift in */
288 				if (**inbuf == SO) {
289 					st->_gstate = G5;
290 					st->_istate = OUT;
291 					st->_last_plane = st->_plane;
292 					st->_bufcont = 0;
293 				} else if (**inbuf == ESC) {
294 				/*
295 				&& *((*inbuf) + 1) == '$') {
296 				 */
297 					st->_gstate = G0;
298 					continue;
299 				} else {
300 					**outbuf = **inbuf;
301 					(*outbuf)++;
302 					(*outbytesleft) --;
303 				}
304 				break;
305 		case G8:	/* BIG5: Chinese character */
306 				if (**inbuf == SO) {
307 					st->_istate = OUT;
308 					st->_gstate = G9;
309 					st->_bufcont = 0;
310 					st->_last_plane = st->_plane;
311 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
312 					errno = st->_errno;
313 					return (size_t)-1;
314 				} else {
315 					st->_gstate = G0;
316 					st->_errno = 0;
317 					st->_plane = st->_last_plane;
318 					st->_istate = IN;
319 					continue;
320 				}
321 				break;
322 		case G9:
323 				if (**inbuf == SI) {
324 					st->_istate = IN;
325 					st->_gstate = G11;
326 					st->_last_plane = st->_plane;
327 				} else if (**inbuf == ESC) {
328 				/*
329 				&& *((*inbuf) + 1) == '$') {
330 				 */
331 					if (flush_buf(st, outbuf, outbytesleft) == -1) {
332 						errno = st->_errno;
333 						return (size_t)-1;
334 					}
335 					st->_gstate = G0;
336 					continue;
337 				} else {	/* Chinese Charactor */
338 					st->_keepc[0] = **inbuf;
339 					st->_gstate = G10;
340 				}
341 				break;
342 			case G10:
343 				st->_keepc[1] = **inbuf;
344 				n = iso_to_big5_to_gbk(st, *outbuf, *outbytesleft);
345 				if (n > 0) {
346 					(*outbuf) += n;
347 					(*outbytesleft) -= n;
348 				} else {
349 					errno = st->_errno;
350 					return (size_t)-1;
351 				}
352 				st->_gstate = G9;
353 				break;
354 			case G11:
355 				st->_bufcont = 0;
356 				if (**inbuf == SO) {
357 					st->_istate = OUT;
358 					st->_gstate = G9;
359 				} else if (**inbuf == ESC) {
360 				/*
361 				&& *((*inbuf) + 1) == '$') {
362 				 */
363 					st->_gstate = G0;
364 					continue;
365 				} else {
366 					**outbuf = **inbuf;
367 					(*outbuf)++;
368 					(*outbytesleft)--;
369 				}
370 				break;
371 			case G12:
372 				if (**inbuf == 'H') {
373 					st->_buf[st->_bufcont++] = 'H';
374 					st->_gstate = G13;
375 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
376 					errno = st->_errno;
377 					return (size_t)-1;
378 				} else {
379 					st->_istate = IN;
380 					st->_plane = st->_last_plane;
381 					st->_gstate = G0;
382 					continue;
383 				}
384 				break;
385 			case G13:
386 				if (**inbuf == ESC) {
387 					st->_buf[st->_bufcont++] = **inbuf;
388 					st->_gstate = G14;
389 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
390 					errno = st->_errno;
391 					return (size_t)-1;
392 				} else {
393 					st->_gstate = G0;
394 					st->_istate = IN;
395 					st->_plane = st->_last_plane;
396 					continue;
397 				}
398 				break;
399 			case G14:
400 				if (**inbuf == SS2) {
401 					st->_istate = OUT;
402 					st->_gstate = G15;
403 					st->_bufcont = 0;
404 					st->_last_plane = st->_plane = 2;
405 				} else if (**inbuf == '$') {
406 					st->_bufcont --;
407 					if (flush_buf(st, outbuf, outbytesleft) == -1) {
408 						errno = st->_errno;
409 						return (size_t)-1;
410 					} else {
411 						st->_gstate = G1;
412 						st->_plane = st->_last_plane;
413 						st->_istate = IN;
414 						continue;
415 					}
416 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
417 					errno = st->_errno;
418 					return (size_t)-1;
419 				} else {
420 					st->_gstate = G0;
421 					st->_istate = IN;
422 					st->_plane = st->_last_plane;
423 					continue;
424 				}
425 				break;
426 			case G15:
427 				if (**inbuf == SI) {
428 					st->_gstate = G16;
429 					st->_istate = IN;
430 					st->_last_plane = st->_plane;
431 				} else if (**inbuf == ESC) {
432 				/*
433 				&& *((*inbuf) + 1) == '$') {
434 				 */
435 					st->_bufcont = 0;
436 					st->_gstate = G0;
437 					continue;
438 				} else {
439 					st->_keepc[0] = **inbuf;
440 					st->_gstate = G18;
441 				}
442 				break;
443 			case G16:
444 				if (**inbuf == ESC) {
445 					st->_gstate = G17;
446 					st->_buf[st->_bufcont++] = ESC;
447 				} else {
448 					**outbuf = **inbuf;
449 					(*outbuf) ++;
450 					(*outbytesleft) --;
451 					st->_bufcont = 0;
452 				}
453 				break;
454 			case G17:
455 				if (**inbuf == '$') {
456 					st->_gstate = G1;
457 					st->_buf[st->_bufcont++] = '$';
458 					continue;
459 				} else if (**inbuf == SS2) {
460 					st->_bufcont = 0;
461 					st->_gstate = G15;
462 					st->_istate = OUT;
463 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
464 					errno = st->_errno;
465 					return (size_t)-1;
466 				} else {
467 					st->_gstate = G16;
468 					st->_istate = IN;
469 				}
470 				break;
471 			case G18:
472 				st->_keepc[1] = **inbuf;
473 				st->_gstate = G15;
474 				if ((n = iso_to_big5_to_gbk(st, \
475 											*outbuf, \
476 											*outbytesleft)) > 0) {
477 					(*outbuf)+=n;
478 					(*outbytesleft)-=n;
479 				} else {
480 					errno = st->_errno;
481 					return (size_t)-1;
482 				}
483 				break;
484 			case G19:	/* Plane #: 3 - 16 */
485 				c = **inbuf;
486 				if				(c == 'I' || \
487 								c == 'J' || \
488 								c == 'K' || \
489 								c == 'L' || \
490 								c == 'M' || \
491 								c == 'N' || \
492 								c == 'O' || \
493 								c == 'P' || \
494 								c == 'Q' || \
495 								c == 'R' || \
496 								c == 'S' || \
497 								c == 'T' || \
498 								c == 'U' || \
499 								c == 'V') {
500 					st->_plane = c - 'I' + 3;
501 					st->_gstate = G20;
502 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
503 					errno = st->_errno;
504 					return (size_t)-1;
505 				} else {
506 					st->_gstate = G0;
507 					st->_errno = 0;
508 					st->_istate = IN;
509 					st->_plane = st->_last_plane;
510 					continue;
511 				}
512 				st->_buf[st->_bufcont++] = c;
513 				break;
514 			case G20:
515 				if (**inbuf == ESC) {
516 					st->_buf[st->_bufcont++] = **inbuf;
517 					st->_gstate = G21;
518 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
519 					errno = st->_errno;
520 					return (size_t)-1;
521 				} else {
522 					st->_gstate = G0;
523 					st->_istate = IN;
524 					st->_last_plane = st->_plane;
525 					continue;
526 				}
527 				break;
528 			case G21:
529 				if (**inbuf == SS3) {
530 					st->_istate = OUT;
531 					st->_gstate = G22;
532 					st->_bufcont = 0;
533 				} else if (**inbuf == '$') {
534 					st->_bufcont --;
535 					if (flush_buf(st, outbuf, outbytesleft) == -1) {
536 						errno = st->_errno;
537 						return (size_t)-1;
538 					} else {
539 						st->_istate = IN;
540 						st->_last_plane = st->_plane;
541 						st->_gstate = G1;
542 						continue;
543 					}
544 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
545 					errno = st->_errno;
546 					return (size_t)-1;
547 				} else {
548 					st->_gstate = G0;
549 					st->_istate = IN;
550 					st->_last_plane = st->_plane;
551 					continue;
552 				}
553 				break;
554 			case G22:
555 				if (**inbuf == SI) {
556 					st->_istate = IN;
557 					st->_gstate = G24;
558 					st->_last_plane = st->_plane;
559 				} else {
560 					st->_keepc[0] = (char)MBYTE;
561 					st->_keepc[1] = (char)(PMASK + st->_plane);
562 					st->_keepc[2] = **inbuf;
563 					st->_gstate = G23;
564 				}
565 				break;
566 			case G23:
567 				st->_keepc[3] = **inbuf;
568 				if ((n = iso_to_big5_to_gbk(st, \
569 											*outbuf, \
570 											*outbytesleft)) > 0) {
571 					(*outbuf)+=n;
572 					(*outbytesleft-=n);
573 				} else {
574 					st->_errno = errno;
575 					return (size_t)-1;
576 				}
577 				st->_gstate = G22;
578 				break;
579 			case G24:
580 				if (**inbuf == ESC) {
581 					st->_gstate = G25;
582 					st->_buf[st->_bufcont++] = ESC;
583 				} else {
584 					**outbuf = **inbuf;
585 					(*outbuf)++;
586 					(*outbytesleft)--;
587 					st->_bufcont = 0;
588 				}
589 				break;
590 			case G25:
591 				if (**inbuf == '$') {
592 					st->_gstate = G1;
593 					continue;
594 				} else if (**inbuf == SS3) {
595 					st->_gstate = G22;
596 					st->_bufcont = 0;
597 					st->_istate = OUT;
598 				} else if (flush_buf(st, outbuf, outbytesleft) == -1) {
599 					errno = st->_errno;
600 					return (size_t)-1;
601 				} else {
602 					st->_gstate = G24;
603 					st->_istate = IN;
604 				}
605 				break;
606 			default:			/* should never come here */
607 				st->_errno = errno = EILSEQ;
608 				st->_gstate = G0;	/* reset state */
609 				break;
610 		}	/* end of switch */
611 
612 		(*inbuf)++;
613 		(*inbytesleft)--;
614 
615 		if (st->_errno) {
616 			break;
617 		}
618 		if (errno)
619 			return((size_t)(-1));
620 	}
621 
622 	if (*inbytesleft > 0 && *outbytesleft == 0) {
623 		errno = E2BIG;
624 		return((size_t)(-1));
625 	}
626 	return (size_t)(*inbytesleft);
627 }
628 
iso_gb_to_gbk(_iconv_st * st,char * buf,size_t buflen)629 int iso_gb_to_gbk(_iconv_st * st, char* buf, size_t buflen) {
630 	if ( buflen < 2 ) {
631 		st->_errno = E2BIG;
632 	    return -1;
633 	}
634 	*buf = st->_keepc[0] | MSB;
635 	*(buf+1) = st->_keepc[1] | MSB;
636 	return 2;
637 }
638 
639 /*
640  * ISO 2022-7 code --> Big-5 code
641  * Return: > 0 - converted with enough space in output buffer
642  *         = 0 - no space in outbuf
643  */
iso_to_big5_to_gbk(_iconv_st * st,char * buf,size_t buflen)644 int iso_to_big5_to_gbk(_iconv_st * st, char* buf, size_t buflen) {
645 	char		cns_str[3], c1, c2;
646 	unsigned long	cns_val;	/* MSB mask off CNS 11643 value */
647 	int		unidx;		/* binary search index */
648 	unsigned long	big5_val, val;	/* Big-5 code */
649 	int idx;
650 
651 	if (st->_plane == 1) {
652 		cns_str[0] = st->_keepc[0] & MSB_OFF;
653 		cns_str[1] = st->_keepc[1] & MSB_OFF;
654 	} else {
655 		cns_str[0] = st->_keepc[0] & MSB_OFF;
656 		cns_str[1] = st->_keepc[1] & MSB_OFF;
657 	}
658 	cns_val = (cns_str[0] << 8) + cns_str[1];
659 
660 	if (buflen < 2) {
661 		errno = E2BIG;
662 		return(0);
663 	}
664 
665 	switch (st->_plane) {
666 		case 1:
667 			unidx = binsearch(cns_val, cns_big5_tab1, MAX_CNS1_NUM);
668 			if (unidx >= 0)
669 				big5_val = cns_big5_tab1[unidx].value;
670 			break;
671 		case 2:
672 			unidx = binsearch(cns_val, cns_big5_tab2, MAX_CNS2_NUM);
673 			if (unidx >= 0)
674 				big5_val = cns_big5_tab2[unidx].value;
675 			break;
676 		default:
677 			unidx = -1;	/* no mapping from CNS to Big-5 out of plane 1&2 */
678 			break;
679 	}
680 
681 
682 	if (unidx < 0) {	/* no match from CNS to Big-5 */
683 		*buf     = NON_ID_CHAR_BYTE1;
684 		*(buf+1) = NON_ID_CHAR_BYTE2;
685 	} else {
686 		val = big5_val & 0xffff;
687 		*buf = c1 = (char) ((val & 0xff00) >> 8);
688 		*(buf+1) = c2 = (char) (val & 0xff);
689 	}
690 
691 
692 	if (unidx < 0) {
693 		return 2;
694 	} else {
695 		idx = binsearch_big5_gbk((((*buf) & ONEBYTE) << 8) | ((*(buf+1)) & ONEBYTE));
696 		if (idx < 0) {
697 			*buf     = NON_ID_CHAR_BYTE1;
698 			*(buf+1) = NON_ID_CHAR_BYTE2;
699 		} else {
700 			*buf     = (big5_gbk_tab[idx].value >> 8) & ONEBYTE;
701 			*(buf+1) = big5_gbk_tab[idx].value & ONEBYTE;
702 		}
703 	}
704 
705 	return(2);
706 }
707 
708 /* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
binsearch(unsigned long x,table_t v[],int n)709 int binsearch(unsigned long x, table_t v[], int n)
710 {
711 	int low, high, mid;
712 
713 	low = 0;
714 	high = n - 1;
715 	while (low <= high) {
716 		mid = (low + high) / 2;
717 		if (x < v[mid].key)
718 			high = mid - 1;
719 		else if (x > v[mid].key)
720 			low = mid + 1;
721 		else	/* found match */
722 			return mid;
723 	}
724 	return (-1);	/* no match */
725 }
726 
binsearch_big5_gbk(unsigned int big5code)727 int binsearch_big5_gbk(unsigned int big5code)
728 {
729 	int low, high, mid;
730 
731 	low = 0;
732 	high = BIG5MAX - 1;
733 	while (low <= high) {
734 		mid = (low + high) / 2;
735 		if (big5code < big5_gbk_tab[mid].key)
736 			high = mid - 1;
737 		else if (big5code > big5_gbk_tab[mid].key)
738 			low = mid + 1;
739 		else	/* found match */
740 			return mid;
741 	}
742 	return (-1);	/* no match */
743 }
744 
745 #ifdef DEBUG
main(int argc,char ** argv)746 main(int argc, char ** argv) {
747 	char *inbuf, *outbuf, *in_tmp, *out_tmp;
748 	size_t inbytesleft, outbytesleft;
749 	int fd;
750 	int i;
751 	struct stat s;
752 	_iconv_st * st;
753 	if (argc < 2) {
754 		fprintf(stderr, "Usage: %s input\n", argv[0]);
755 		exit(-1);
756 	}
757 	if ((fd = open(argv[1], O_RDONLY)) == -1) {
758 		perror("open");
759 		exit(-2);
760 	}
761 	if (fstat(fd, &s) == -1) {
762 		perror("stat");
763 		exit(-3);
764 	}
765 	inbytesleft = outbytesleft = s.st_size;
766 	in_tmp = inbuf = (char *)malloc(inbytesleft);
767 	out_tmp = outbuf = (char *)malloc(outbytesleft);
768 	if (!inbuf || !outbuf) {
769 		perror("malloc");
770 		exit(-1);
771 	}
772 	if (read(fd, inbuf, inbytesleft) != inbytesleft) {
773 		perror("read");
774 		exit(-4);
775 	}
776 	for (i = 0; i < inbytesleft; i++)
777 		fprintf(stderr, "%x\t", *(inbuf+i));
778 	fprintf(stderr, "\n");
779 	st = (_iconv_st *)_icv_open();
780 	if (st == (_iconv_st *) -1) {
781 		perror("_icv_open");
782 		exit(-1);
783 	}
784 	if (_icv_iconv(st, \
785 				&inbuf, &inbytesleft, \
786 				&outbuf, &outbytesleft) == -1) {
787 		perror("icv_iconv");
788 		fprintf(stderr, "\ninbytesleft = %d\n", inbytesleft);
789 		exit(-2);
790 	}
791 	if (write(1, out_tmp, s.st_size - outbytesleft) == -1) {
792 		perror("write");
793 		exit(-1);
794 	}
795 	free(in_tmp);
796 	free(out_tmp);
797 	close(fd);
798 	_icv_close(st);
799 }
800 #endif
801