1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1995, by Sun Microsystems, Inc.
23 * All rights reserved.
24 */
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <strings.h>
29 #include <errno.h>
30 #ifdef DEBUG
31 #include <sys/fcntl.h>
32 #include <sys/stat.h>
33 #endif
34 #include <gb2312_unicode.h>
35 #include <cns11643_unicode_CN.h> /* CNS 11643 to Unicode mapping table */
36
37 #define UTF8_NON_ID_CHAR1 0xEF
38 #define UTF8_NON_ID_CHAR2 0xBF
39 #define UTF8_NON_ID_CHAR3 0xBD
40
41 #define MSB 0x80 /* most significant bit */
42 #define MBYTE 0x8e /* multi-byte (4 byte character) */
43 #define PMASK 0xa0 /* plane number mask */
44 #define ONEBYTE 0xff /* right most byte */
45 #define MSB_OFF 0x7f /* mask off MBS */
46
47 #define SI 0x0f /* shift in */
48 #define SO 0x0e /* shift out */
49 #define ESC 0x1b /* escape */
50 #define SS2 0x4e /* SS2 shift out */
51 #define SS3 0x4f /* SS3 shift out */
52 #define NON_ID_CHAR_BYTE1 0xA1 /* non-identified character */
53 #define NON_ID_CHAR_BYTE2 0xF5 /* non-identified character */
54
55 typedef struct _icv_state {
56 char _buf[10];
57 size_t _bufcont;
58 char _keepc[4]; /* maximum # byte of CNS11643 code */
59 short _gstate; /* state machine id */
60 short _istate; /* state for shift in/out */
61 int _plane; /* plane number for Chinese character */
62 int _last_plane; /* last charactor's plane # */
63 int _errno; /* internal errno */
64 } _iconv_st;
65
66 enum _GSTATE { G0, G1, G2, G3, G4, G5, G6, G7, G8, G9, \
67 G10,G11,G12,G13,G14,G15,G16,G17,G18,G19, \
68 G20,G21,G22,G23,G24,G25,G26,G27,G28,G29 };
69
70 enum _ISTATE { IN, OUT };
71
72
73 int iso_gb_to_utf(_iconv_st * st, char* buf, size_t buflen);
74 int iso_cns_to_utf(_iconv_st * st, char* buf, size_t buflen);
75 int iso_cns_to_utf(_iconv_st * st, char* buf, size_t buflen);
76 int binsearch(unsigned long x, table_t v[], int n);
77 int flush_buf(_iconv_st * st, char ** outbuf, size_t * outbytesleft);
78
flush_buf(_iconv_st * st,char ** outbuf,size_t * outbytesleft)79 int flush_buf(_iconv_st * st, char ** outbuf, size_t * outbytesleft) {
80 if (!st->_bufcont)
81 return 0;
82 if (st->_bufcont > *outbytesleft) {
83 st->_errno = E2BIG;
84 return -1;
85 }
86 if (st->_istate != IN) {
87 st->_errno = EILSEQ;
88 return -1;
89 }
90 strncpy(st->_buf, *outbuf, st->_bufcont);
91 (*outbuf)+=(st->_bufcont);
92 (*outbytesleft)-=(st->_bufcont);
93 st->_bufcont = 0;
94 return st->_bufcont;
95 }
96
97 /*
98 * Open; called from iconv_open()
99 */
100 void *
_icv_open()101 _icv_open()
102 {
103 _iconv_st *st;
104
105 if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
106 errno = ENOMEM;
107 return ((void *) -1);
108 }
109
110 st->_gstate = G0;
111 st->_istate = IN;
112 st->_last_plane = st->_plane = -1;
113 st->_errno = 0;
114 st->_bufcont = 0;
115
116 return ((void *) st);
117 }
118
119 /*
120 * Close; called from iconv_close()
121 */
122 void
_icv_close(_iconv_st * st)123 _icv_close(_iconv_st *st)
124 {
125 if (st == NULL)
126 errno = EBADF;
127 else
128 free(st);
129 }
130
131 /*
132 * Actual conversion; called from iconv()
133 */
134 /*=========================================================================
135 *
136 * State Machine for interpreting ISO 2022-7 code
137 *
138 *=========================================================================
139 *
140 * plane 2 - 16
141 * +---------->-------+
142 * plane ^ |
143 * ESC $ ) number SO | plane 1 v
144 * +-> G0 ----> G1 ---> G2 ---> G3 ------> G4 --> G5 -------> G6 G7
145 * | | ascii | ascii | ascii | ascii | SI | | | |
146 * +----------------------------+ <-----+------+ +------<---+------+
147 * ^ |
148 * | ascii v
149 * +---------<-------------<---------+
150 *
151 *=========================================================================*/
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)152 size_t _icv_iconv(_iconv_st *st, \
153 char **inbuf, size_t *inbytesleft, \
154 char **outbuf, size_t *outbytesleft) {
155 int n;
156 char c;
157
158 if (st == NULL) {
159 errno = EBADF;
160 return ((size_t) -1);
161 }
162
163 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
164 st->_gstate = G0;
165 st->_istate = IN;
166 st->_errno = 0;
167 st->_plane = st->_last_plane = -1;
168 return ((size_t) 0);
169 }
170
171 errno = st->_errno = 0; /* reset internal and external errno */
172
173 /* a state machine for interpreting ISO 2022-7 code */
174 while (*inbytesleft > 0 && *outbytesleft > 0) {
175 switch (st->_gstate) {
176 case G0: /* assuming ASCII in the beginning */
177 if (**inbuf == ESC) {
178 st->_gstate = G1;
179 st->_buf[st->_bufcont++] = ESC;
180 } else { /* real ASCII */
181 **outbuf = **inbuf;
182 (*outbuf)++;
183 (*outbytesleft)--;
184 }
185 break;
186 case G1: /* got ESC, expecting $ */
187 if (**inbuf == '$') {
188 st->_gstate = G2;
189 st->_buf[st->_bufcont++] = '$';
190 } else if (flush_buf(st, outbuf, outbytesleft) == -1) {
191 errno = st->_errno;
192 return (size_t)-1;
193 } else {
194 st->_gstate = G0;
195 st->_errno = 0;
196 st->_istate = IN;
197 continue; /* don't advance inbuf */
198 }
199 break;
200 case G2: /* got $, expecting ) * or + */
201 if (**inbuf == ')') {
202 st->_gstate = G3;
203 } else if (**inbuf == '*') {
204 st->_gstate = G12;
205 st->_plane = 2;
206 } else if (**inbuf == '+') {
207 st->_gstate = G19;
208 } else if (flush_buf(st, outbuf, outbytesleft) == -1) {
209 errno = st->_errno;
210 return (size_t)-1;
211 } else {
212 st->_gstate = G0;
213 st->_errno = 0;
214 st->_istate = IN;
215 continue; /* don't advance inbuf */
216 }
217 st->_buf[st->_bufcont++] = **inbuf;
218 break;
219 case G3: /* got ) expecting A,G,H */
220 /* H is for the bug of and zh_TW.BIG5 */
221 if (**inbuf == 'A') {
222 st->_plane = 0;
223 st->_gstate = G4;
224 } else if (**inbuf == 'G') {
225 st->_plane = 1;
226 st->_gstate = G8;
227 } else if (**inbuf == 'H') {
228 st->_plane = 2;
229 st->_gstate = G8;
230 } else if (flush_buf(st, outbuf, outbytesleft) == -1) {
231 errno = st->_errno;
232 return (size_t)-1;
233 } else {
234 st->_gstate = G0;
235 st->_errno = 0;
236 st->_istate = IN;
237 continue;
238 }
239 st->_buf[st->_bufcont++] = **inbuf;
240 break;
241 case G4: /* ESC $ ) A got, and SO is expected */
242 if (**inbuf == SO) {
243 st->_gstate = G5;
244 st->_istate = OUT;
245 st->_bufcont = 0;
246 st->_last_plane = st->_plane;
247 } else if (flush_buf(st, outbuf, outbytesleft) == -1) {
248 errno = st->_errno;
249 return (size_t)-1;
250 } else {
251 st->_gstate = G0;
252 st->_errno = 0;
253 st->_istate = IN;
254 st->_plane = st->_last_plane;
255 continue;
256 }
257 break;
258 case G5: /* SO (Shift Out) */
259 if (**inbuf == SI) {
260 st->_istate = IN;
261 st->_gstate = G7;
262 st->_last_plane = st->_plane;
263 } else if (**inbuf == ESC) {
264 /*
265 && *((*inbuf) + 1) == '$') {
266 if (flush_buf(st, outbuf, outbytesleft) == -1) {
267 errno = st->_errno;
268 return (size_t)-1;
269 }
270 */
271 st->_bufcont = 0;
272 st->_gstate = G0;
273 continue;
274 } else { /* Chinese Charactors */
275 st->_keepc[0] = **inbuf;
276 st->_gstate = G6;
277 }
278 break;
279 case G6: /* GB2312: 2nd Chinese character */
280 st->_keepc[1] = **inbuf;
281 n = iso_gb_to_utf(st, *outbuf, *outbytesleft);
282 if (n > 0) {
283 (*outbuf) += n;
284 (*outbytesleft) -= n;
285 } else {
286 errno = st->_errno;
287 return (size_t)-1;
288 }
289 st->_gstate = G5;
290 break;
291 case G7: /* Shift in */
292 if (**inbuf == SO) {
293 st->_gstate = G5;
294 st->_istate = OUT;
295 st->_last_plane = st->_plane;
296 st->_bufcont = 0;
297 } else if (**inbuf == ESC) {
298 /*
299 && *((*inbuf) + 1) == '$') {
300 */
301 st->_gstate = G0;
302 continue;
303 } else {
304 **outbuf = **inbuf;
305 (*outbuf)++;
306 (*outbytesleft) --;
307 }
308 break;
309 case G8: /* CNS: Chinese character */
310 if (**inbuf == SO) {
311 st->_istate = OUT;
312 st->_gstate = G9;
313 st->_bufcont = 0;
314 st->_last_plane = st->_plane;
315 } else if (flush_buf(st, outbuf, outbytesleft) == -1) {
316 errno = st->_errno;
317 return (size_t)-1;
318 } else {
319 st->_gstate = G0;
320 st->_errno = 0;
321 st->_plane = st->_last_plane;
322 st->_istate = IN;
323 continue;
324 }
325 break;
326 case G9:
327 if (**inbuf == SI) {
328 st->_istate = IN;
329 st->_gstate = G11;
330 st->_last_plane = st->_plane;
331 } else if (**inbuf == ESC) {
332 /*
333 && *((*inbuf) + 1) == '$') {
334 */
335 if (flush_buf(st, outbuf, outbytesleft) == -1) {
336 errno = st->_errno;
337 return (size_t)-1;
338 }
339 st->_gstate = G0;
340 continue;
341 } else { /* Chinese Charactor */
342 st->_keepc[0] = **inbuf;
343 st->_gstate = G10;
344 }
345 break;
346 case G10:
347 st->_keepc[1] = **inbuf;
348 n = iso_cns_to_utf(st, *outbuf, *outbytesleft);
349 if (n > 0) {
350 (*outbuf) += n;
351 (*outbytesleft) -= n;
352 } else {
353 errno = st->_errno;
354 return (size_t)-1;
355 }
356 st->_gstate = G9;
357 break;
358 case G11:
359 st->_bufcont = 0;
360 if (**inbuf == SO) {
361 st->_istate = OUT;
362 st->_gstate = G9;
363 } else if (**inbuf == ESC) {
364 /*
365 && *((*inbuf) + 1) == '$') {
366 */
367 st->_gstate = G0;
368 continue;
369 } else {
370 **outbuf = **inbuf;
371 (*outbuf)++;
372 (*outbytesleft)--;
373 }
374 break;
375 case G12:
376 if (**inbuf == 'H') {
377 st->_buf[st->_bufcont++] = 'H';
378 st->_gstate = G13;
379 } else if (flush_buf(st, outbuf, outbytesleft) == -1) {
380 errno = st->_errno;
381 return (size_t)-1;
382 } else {
383 st->_istate = IN;
384 st->_plane = st->_last_plane;
385 st->_gstate = G0;
386 continue;
387 }
388 break;
389 case G13:
390 if (**inbuf == ESC) {
391 st->_buf[st->_bufcont++] = **inbuf;
392 st->_gstate = G14;
393 } else if (flush_buf(st, outbuf, outbytesleft) == -1) {
394 errno = st->_errno;
395 return (size_t)-1;
396 } else {
397 st->_gstate = G0;
398 st->_istate = IN;
399 st->_plane = st->_last_plane;
400 continue;
401 }
402 break;
403 case G14:
404 if (**inbuf == SS2) {
405 st->_istate = OUT;
406 st->_gstate = G15;
407 st->_bufcont = 0;
408 st->_last_plane = st->_plane = 2;
409 } else if (**inbuf == '$') {
410 st->_bufcont --;
411 if (flush_buf(st, outbuf, outbytesleft) == -1) {
412 errno = st->_errno;
413 return (size_t)-1;
414 } else {
415 st->_gstate = G1;
416 st->_plane = st->_last_plane;
417 st->_istate = IN;
418 continue;
419 }
420 } else if (flush_buf(st, outbuf, outbytesleft) == -1) {
421 errno = st->_errno;
422 return (size_t)-1;
423 } else {
424 st->_gstate = G0;
425 st->_istate = IN;
426 st->_plane = st->_last_plane;
427 continue;
428 }
429 break;
430 case G15:
431 if (**inbuf == SI) {
432 st->_gstate = G16;
433 st->_istate = IN;
434 st->_last_plane = st->_plane;
435 } else if (**inbuf == ESC) {
436 /*
437 && *((*inbuf) + 1) == '$') {
438 */
439 st->_bufcont = 0;
440 st->_gstate = G0;
441 continue;
442 } else {
443 st->_keepc[0] = **inbuf;
444 st->_gstate = G18;
445 }
446 break;
447 case G16:
448 if (**inbuf == ESC) {
449 st->_gstate = G17;
450 st->_buf[st->_bufcont++] = ESC;
451 } else {
452 **outbuf = **inbuf;
453 (*outbuf) ++;
454 (*outbytesleft) --;
455 st->_bufcont = 0;
456 }
457 break;
458 case G17:
459 if (**inbuf == '$') {
460 st->_gstate = G1;
461 st->_buf[st->_bufcont++] = '$';
462 continue;
463 } else if (**inbuf == SS2) {
464 st->_bufcont = 0;
465 st->_gstate = G15;
466 st->_istate = OUT;
467 } else if (flush_buf(st, outbuf, outbytesleft) == -1) {
468 errno = st->_errno;
469 return (size_t)-1;
470 } else {
471 st->_gstate = G16;
472 st->_istate = IN;
473 }
474 break;
475 case G18:
476 st->_keepc[1] = **inbuf;
477 st->_gstate = G15;
478 if ((n = iso_cns_to_utf(st, \
479 *outbuf, \
480 *outbytesleft)) > 0) {
481 (*outbuf)+=n;
482 (*outbytesleft)-=n;
483 } else {
484 errno = st->_errno;
485 return (size_t)-1;
486 }
487 break;
488 case G19: /* Plane #: 3 - 16 */
489 c = **inbuf;
490 if (c == 'I' || \
491 c == 'J' || \
492 c == 'K' || \
493 c == 'L' || \
494 c == 'M' || \
495 c == 'N' || \
496 c == 'O' || \
497 c == 'P' || \
498 c == 'Q' || \
499 c == 'R' || \
500 c == 'S' || \
501 c == 'T' || \
502 c == 'U' || \
503 c == 'V') {
504 st->_plane = c - 'I' + 3;
505 st->_gstate = G20;
506 } else if (flush_buf(st, outbuf, outbytesleft) == -1) {
507 errno = st->_errno;
508 return (size_t)-1;
509 } else {
510 st->_gstate = G0;
511 st->_errno = 0;
512 st->_istate = IN;
513 st->_plane = st->_last_plane;
514 continue;
515 }
516 st->_buf[st->_bufcont++] = c;
517 break;
518 case G20:
519 if (**inbuf == ESC) {
520 st->_buf[st->_bufcont++] = **inbuf;
521 st->_gstate = G21;
522 } else if (flush_buf(st, outbuf, outbytesleft) == -1) {
523 errno = st->_errno;
524 return (size_t)-1;
525 } else {
526 st->_gstate = G0;
527 st->_istate = IN;
528 st->_last_plane = st->_plane;
529 continue;
530 }
531 break;
532 case G21:
533 if (**inbuf == SS3) {
534 st->_istate = OUT;
535 st->_gstate = G22;
536 st->_bufcont = 0;
537 } else if (**inbuf == '$') {
538 st->_bufcont --;
539 if (flush_buf(st, outbuf, outbytesleft) == -1) {
540 errno = st->_errno;
541 return (size_t)-1;
542 } else {
543 st->_istate = IN;
544 st->_last_plane = st->_plane;
545 st->_gstate = G1;
546 continue;
547 }
548 } else if (flush_buf(st, outbuf, outbytesleft) == -1) {
549 errno = st->_errno;
550 return (size_t)-1;
551 } else {
552 st->_gstate = G0;
553 st->_istate = IN;
554 st->_last_plane = st->_plane;
555 continue;
556 }
557 break;
558 case G22:
559 if (**inbuf == SI) {
560 st->_istate = IN;
561 st->_gstate = G24;
562 st->_last_plane = st->_plane;
563 } else {
564 st->_keepc[0] = (char)MBYTE;
565 st->_keepc[1] = (char)(PMASK + st->_plane);
566 st->_keepc[2] = **inbuf;
567 st->_gstate = G23;
568 }
569 break;
570 case G23:
571 st->_keepc[3] = **inbuf;
572 if ((n = iso_cns_to_utf(st, \
573 *outbuf, \
574 *outbytesleft)) > 0) {
575 (*outbuf)+=n;
576 (*outbytesleft-=n);
577 } else {
578 st->_errno = errno;
579 return (size_t)-1;
580 }
581 st->_gstate = G22;
582 break;
583 case G24:
584 if (**inbuf == ESC) {
585 st->_gstate = G25;
586 st->_buf[st->_bufcont++] = ESC;
587 } else {
588 **outbuf = **inbuf;
589 (*outbuf)++;
590 (*outbytesleft)--;
591 st->_bufcont = 0;
592 }
593 break;
594 case G25:
595 if (**inbuf == '$') {
596 st->_gstate = G1;
597 continue;
598 } else if (**inbuf == SS3) {
599 st->_gstate = G22;
600 st->_bufcont = 0;
601 st->_istate = OUT;
602 } else if (flush_buf(st, outbuf, outbytesleft) == -1) {
603 errno = st->_errno;
604 return (size_t)-1;
605 } else {
606 st->_gstate = G24;
607 st->_istate = IN;
608 }
609 break;
610 default: /* should never come here */
611 st->_errno = errno = EILSEQ;
612 st->_gstate = G0; /* reset state */
613 break;
614 } /* end of switch */
615
616 (*inbuf)++;
617 (*inbytesleft)--;
618
619 if (st->_errno) {
620 break;
621 }
622 if (errno)
623 {
624 return((size_t)(-1));
625 }
626 }
627
628 if (*inbytesleft > 0 && *outbytesleft == 0) {
629 errno = E2BIG;
630 return((size_t)(-1));
631 }
632 return (size_t)(*inbytesleft);
633 }
634
iso_gb_to_utf(_iconv_st * st,char * buf,size_t buflen)635 int iso_gb_to_utf(_iconv_st * st, char* buf, size_t buflen)
636 {
637 char in_byte1, in_byte2;
638 int idx;
639 int unicode;
640
641 if ( buflen < 2 ) {
642 st->_errno = E2BIG;
643 return -1;
644 }
645
646 in_byte1=st->_keepc[0];
647 in_byte2=st->_keepc[1];
648
649 idx = (((in_byte1 & 0xff) - 0x21) * 94) + (in_byte2 & 0xff) - 0x21;
650 if (idx < 0 || idx > GBMAX - 1) {
651 errno = EILSEQ;
652 return -1;
653 }
654 unicode = Unicode[idx];
655 if (unicode >= 0x0080 && unicode <= 0x07ff) {
656 if ( buflen < 2 ) {
657 errno = E2BIG;
658 return 0;
659 }
660 *buf = ((unicode >> 6) & 0x1f) | 0xc0;
661 *(buf+1) = (unicode & 0x3f) | MSB;
662 return 2;
663 }
664 if (unicode >= 0x0800 && unicode <= 0xffff) {
665 if ( buflen < 3 ) {
666 errno = E2BIG;
667 return 0;
668 }
669 *buf = ((unicode >> 12) & 0x0f) | 0xe0;
670 *(buf+1) = ((unicode >> 6) & 0x3f) | MSB;
671 *(buf+2) = (unicode & 0x3f) | MSB;
672 return 3;
673 }
674 if ( buflen < 3 ) {
675 errno = E2BIG;
676 return 0;
677 }
678
679 *buf = UTF8_NON_ID_CHAR1;
680 *(buf+1) = UTF8_NON_ID_CHAR2;
681 *(buf+2) = UTF8_NON_ID_CHAR3;
682 return 3;
683 }
684
685 /*
686 * Return: > 0 - converted with enough space in output buffer
687 * = 0 - no space in outbuf
688 */
iso_cns_to_utf(_iconv_st * st,char * buf,size_t buflen)689 int iso_cns_to_utf(_iconv_st * st, char* buf, size_t buflen) {
690 char cns_str[3];
691 unsigned long cns_val; /* MSB mask off CNS 11643 value */
692 int unidx; /* binary search index */
693 unsigned long utf_val; /* unicode code */
694
695 if (st->_plane == 1) {
696 cns_str[0] = st->_keepc[0] & MSB_OFF;
697 cns_str[1] = st->_keepc[1] & MSB_OFF;
698 } else {
699 cns_str[0] = st->_keepc[0] & MSB_OFF;
700 cns_str[1] = st->_keepc[1] & MSB_OFF;
701 }
702 cns_val = (cns_str[0] << 8) + cns_str[1];
703 if (buflen < 2) {
704 errno = E2BIG;
705 return(0);
706 }
707
708 switch (st->_plane) {
709 case 1:
710 unidx = binsearch(cns_val, cns1_utf_tab, MAX_CNS1_NUM);
711 if (unidx >= 0)
712 utf_val = cns1_utf_tab[unidx].value;
713 break;
714 case 2:
715 unidx = binsearch(cns_val, cns2_utf_tab, MAX_CNS2_NUM);
716 if (unidx >= 0)
717 utf_val = cns2_utf_tab[unidx].value;
718 break;
719 case 3:
720 unidx = binsearch(cns_val, cns3_utf_tab, MAX_CNS3_NUM);
721 if (unidx >= 0)
722 utf_val = cns3_utf_tab[unidx].value;
723 break;
724 default:
725 unidx = -1; /* no mapping from CNS to Unicode out of plane 1,2&3 */
726 break;
727 }
728
729
730 if (unidx < 0) { /* no match from CNS to Unicode */
731 *buf = UTF8_NON_ID_CHAR1;
732 *(buf+1) = UTF8_NON_ID_CHAR2;
733 *(buf+2) = UTF8_NON_ID_CHAR3;
734 return 3;
735 } else {
736 if (utf_val >= 0x0080 && utf_val <= 0x07ff) {
737 if ( buflen < 2 ) {
738 errno = E2BIG;
739 return 0;
740 }
741 *buf = ((utf_val >> 6) & 0x1f) | 0xc0;
742 *(buf+1) = (utf_val & 0x3f) | MSB;
743 return 2;
744 }
745 if (utf_val >= 0x0800 && utf_val <= 0xffff) {
746 if ( buflen < 3 ) {
747 errno = E2BIG;
748 return 0;
749 }
750 *buf = ((utf_val >> 12) & 0x0f) | 0xe0;
751 *(buf+1) = ((utf_val >> 6) & 0x3f) | MSB;
752 *(buf+2) = (utf_val & 0x3f) | MSB;
753 return 3;
754 }
755 if ( buflen < 3 ) {
756 errno = E2BIG;
757 return 0;
758 }
759
760 *buf = UTF8_NON_ID_CHAR1;
761 *(buf+1) = UTF8_NON_ID_CHAR2;
762 *(buf+2) = UTF8_NON_ID_CHAR3;
763 return 3;
764 }
765
766 }
767
768 /* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
binsearch(unsigned long x,table_t v[],int n)769 int binsearch(unsigned long x, table_t v[], int n)
770 {
771 int low, high, mid;
772
773 low = 0;
774 high = n - 1;
775 while (low <= high) {
776 mid = (low + high) / 2;
777 if (x < v[mid].key)
778 high = mid - 1;
779 else if (x > v[mid].key)
780 low = mid + 1;
781 else /* found match */
782 return mid;
783 }
784 return (-1); /* no match */
785 }
786
787
788 #ifdef DEBUG
main(int argc,char ** argv)789 main(int argc, char ** argv) {
790 char *inbuf, *outbuf, *in_tmp, *out_tmp;
791 size_t inbytesleft, outbytesleft;
792 int fd;
793 int i;
794 struct stat s;
795 _iconv_st * st;
796 if (argc < 2) {
797 fprintf(stderr, "Usage: %s input\n", argv[0]);
798 exit(-1);
799 }
800 if ((fd = open(argv[1], O_RDONLY)) == -1) {
801 perror("open");
802 exit(-2);
803 }
804 if (fstat(fd, &s) == -1) {
805 perror("stat");
806 exit(-3);
807 }
808 inbytesleft = outbytesleft = s.st_size;
809 in_tmp = inbuf = (char *)malloc(inbytesleft);
810 out_tmp = outbuf = (char *)malloc(outbytesleft);
811 if (!inbuf || !outbuf) {
812 perror("malloc");
813 exit(-1);
814 }
815 if (read(fd, inbuf, inbytesleft) != inbytesleft) {
816 perror("read");
817 exit(-4);
818 }
819 for (i = 0; i < inbytesleft; i++)
820 fprintf(stderr, "%x\t", *(inbuf+i));
821 fprintf(stderr, "\n");
822 st = (_iconv_st *)_icv_open();
823 if (st == (_iconv_st *) -1) {
824 perror("_icv_open");
825 exit(-1);
826 }
827 if (_icv_iconv(st, \
828 &inbuf, &inbytesleft, \
829 &outbuf, &outbytesleft) == -1) {
830 perror("icv_iconv");
831 fprintf(stderr, "\ninbytesleft = %d\n", inbytesleft);
832 exit(-2);
833 }
834 if (write(1, out_tmp, s.st_size - outbytesleft) == -1) {
835 perror("write");
836 exit(-1);
837 }
838 free(in_tmp);
839 free(out_tmp);
840 close(fd);
841 _icv_close(st);
842 }
843 #endif
844