116d86563SAlexander Pyhalov /*
216d86563SAlexander Pyhalov * CDDL HEADER START
316d86563SAlexander Pyhalov *
416d86563SAlexander Pyhalov * The contents of this file are subject to the terms of the
516d86563SAlexander Pyhalov * Common Development and Distribution License (the "License").
616d86563SAlexander Pyhalov * You may not use this file except in compliance with the License.
716d86563SAlexander Pyhalov *
816d86563SAlexander Pyhalov * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
916d86563SAlexander Pyhalov * or http://www.opensolaris.org/os/licensing.
1016d86563SAlexander Pyhalov * See the License for the specific language governing permissions
1116d86563SAlexander Pyhalov * and limitations under the License.
1216d86563SAlexander Pyhalov *
1316d86563SAlexander Pyhalov * When distributing Covered Code, include this CDDL HEADER in each
1416d86563SAlexander Pyhalov * file and include the License file at src/OPENSOLARIS.LICENSE.
1516d86563SAlexander Pyhalov * If applicable, add the following below this CDDL HEADER, with the
1616d86563SAlexander Pyhalov * fields enclosed by brackets "[]" replaced with your own identifying
1716d86563SAlexander Pyhalov * information: Portions Copyright [yyyy] [name of copyright owner]
1816d86563SAlexander Pyhalov *
1916d86563SAlexander Pyhalov * CDDL HEADER END
2016d86563SAlexander Pyhalov */
2116d86563SAlexander Pyhalov
2216d86563SAlexander Pyhalov /*
2316d86563SAlexander Pyhalov * Copyright (c) 1995, by Sun Microsystems, Inc.
2416d86563SAlexander Pyhalov * All rights reserved.
2516d86563SAlexander Pyhalov */
2616d86563SAlexander Pyhalov
2716d86563SAlexander Pyhalov #include <stdio.h>
2816d86563SAlexander Pyhalov #include <stdlib.h>
2916d86563SAlexander Pyhalov #include <errno.h>
3016d86563SAlexander Pyhalov #include "big5_cns11643.h" /* Big-5 to CNS 11643 mapping table */
3116d86563SAlexander Pyhalov
3216d86563SAlexander Pyhalov #define MSB 0x80 /* most significant bit */
3316d86563SAlexander Pyhalov #define MBYTE 0x8e /* multi-byte (4 byte character) */
3416d86563SAlexander Pyhalov #define PMASK 0xa0 /* plane number mask */
3516d86563SAlexander Pyhalov #define ONEBYTE 0xff /* right most byte */
3616d86563SAlexander Pyhalov
3716d86563SAlexander Pyhalov #define NON_ID_CHAR '_' /* non-identified character */
3816d86563SAlexander Pyhalov
3916d86563SAlexander Pyhalov typedef struct _icv_state {
4016d86563SAlexander Pyhalov char keepc[2]; /* maximum # byte of Big-5 code */
4116d86563SAlexander Pyhalov short cstate; /* state machine id */
4216d86563SAlexander Pyhalov int _errno; /* internal errno */
4316d86563SAlexander Pyhalov } _iconv_st;
4416d86563SAlexander Pyhalov
4516d86563SAlexander Pyhalov enum _CSTATE { C0, C1 };
4616d86563SAlexander Pyhalov
4716d86563SAlexander Pyhalov static int big5_2nd_byte(char);
4816d86563SAlexander Pyhalov static int get_plane_no_by_big5(const char, const char, int*, unsigned long*);
4916d86563SAlexander Pyhalov static int big5_to_cns(int, int, unsigned long, char*, size_t);
5016d86563SAlexander Pyhalov static int binsearch(unsigned long, table_t[], int);
5116d86563SAlexander Pyhalov
5216d86563SAlexander Pyhalov /*
5316d86563SAlexander Pyhalov * Open; called from iconv_open()
5416d86563SAlexander Pyhalov */
5516d86563SAlexander Pyhalov void *
_icv_open()5616d86563SAlexander Pyhalov _icv_open()
5716d86563SAlexander Pyhalov {
5816d86563SAlexander Pyhalov _iconv_st *st;
5916d86563SAlexander Pyhalov
6016d86563SAlexander Pyhalov if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
6116d86563SAlexander Pyhalov errno = ENOMEM;
6216d86563SAlexander Pyhalov return ((void *) -1);
6316d86563SAlexander Pyhalov }
6416d86563SAlexander Pyhalov
6516d86563SAlexander Pyhalov st->cstate = C0;
6616d86563SAlexander Pyhalov st->_errno = 0;
6716d86563SAlexander Pyhalov
6816d86563SAlexander Pyhalov #ifdef DEBUG
6916d86563SAlexander Pyhalov fprintf(stderr, "========== iconv(): Big-5 --> CNS 11643 ==========\n");
7016d86563SAlexander Pyhalov #endif
7116d86563SAlexander Pyhalov return ((void *) st);
7216d86563SAlexander Pyhalov }
7316d86563SAlexander Pyhalov
7416d86563SAlexander Pyhalov
7516d86563SAlexander Pyhalov /*
7616d86563SAlexander Pyhalov * Close; called from iconv_close()
7716d86563SAlexander Pyhalov */
7816d86563SAlexander Pyhalov void
_icv_close(_iconv_st * st)7916d86563SAlexander Pyhalov _icv_close(_iconv_st *st)
8016d86563SAlexander Pyhalov {
8116d86563SAlexander Pyhalov if (!st)
8216d86563SAlexander Pyhalov errno = EBADF;
8316d86563SAlexander Pyhalov else
8416d86563SAlexander Pyhalov free(st);
8516d86563SAlexander Pyhalov }
8616d86563SAlexander Pyhalov
8716d86563SAlexander Pyhalov
8816d86563SAlexander Pyhalov /*
8916d86563SAlexander Pyhalov * Actual conversion; called from iconv()
9016d86563SAlexander Pyhalov */
9116d86563SAlexander Pyhalov /*=======================================================
9216d86563SAlexander Pyhalov *
9316d86563SAlexander Pyhalov * State Machine for interpreting Big-5 code
9416d86563SAlexander Pyhalov *
9516d86563SAlexander Pyhalov *=======================================================
9616d86563SAlexander Pyhalov *
9716d86563SAlexander Pyhalov * 1st C
9816d86563SAlexander Pyhalov * +--------> C0 ----------> C1
9916d86563SAlexander Pyhalov * | ascii | 2nd C |
10016d86563SAlexander Pyhalov * ^ v v
10116d86563SAlexander Pyhalov * +----<-----+-----<--------+
10216d86563SAlexander Pyhalov *
10316d86563SAlexander Pyhalov *=======================================================*/
10416d86563SAlexander Pyhalov /*
10516d86563SAlexander Pyhalov * Big-5 encoding range:
10616d86563SAlexander Pyhalov * High byte: 0xA1 - 0xFE ( 94 encoding space)
10716d86563SAlexander Pyhalov * Low byte: 0x40 - 0x7E, 0xA1 - 0xFE ( 157 encoding space)
10816d86563SAlexander Pyhalov * Plane #1: 0xA140 - 0xC8FE ( 6280 encoding space)
10916d86563SAlexander Pyhalov * Plane #2: 0xC940 - 0xFEFE ( 8478 encoding space)
11016d86563SAlexander Pyhalov * Total: 94 * 157 = 14,758 (14758 encoding space)
11116d86563SAlexander Pyhalov */
11216d86563SAlexander Pyhalov size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)11316d86563SAlexander Pyhalov _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
11416d86563SAlexander Pyhalov char **outbuf, size_t *outbytesleft)
11516d86563SAlexander Pyhalov {
11616d86563SAlexander Pyhalov int plane_no, n, unidx;
11716d86563SAlexander Pyhalov unsigned long cnscode;
11816d86563SAlexander Pyhalov
11916d86563SAlexander Pyhalov #ifdef DEBUG
12016d86563SAlexander Pyhalov fprintf(stderr, "=== (Re-entry) iconv(): Big-5 --> CNS 11643 ===\n");
12116d86563SAlexander Pyhalov #endif
12216d86563SAlexander Pyhalov if (st == NULL) {
12316d86563SAlexander Pyhalov errno = EBADF;
12416d86563SAlexander Pyhalov return ((size_t) -1);
12516d86563SAlexander Pyhalov }
12616d86563SAlexander Pyhalov
12716d86563SAlexander Pyhalov if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
12816d86563SAlexander Pyhalov st->cstate = C0;
12916d86563SAlexander Pyhalov st->_errno = 0;
13016d86563SAlexander Pyhalov return ((size_t) 0);
13116d86563SAlexander Pyhalov }
13216d86563SAlexander Pyhalov
13316d86563SAlexander Pyhalov st->_errno = 0; /* reset internal errno */
13416d86563SAlexander Pyhalov errno = 0; /* reset external errno */
13516d86563SAlexander Pyhalov
13616d86563SAlexander Pyhalov /* a state machine for interpreting Big-5 code */
13716d86563SAlexander Pyhalov while (*inbytesleft > 0 && *outbytesleft > 0) {
13816d86563SAlexander Pyhalov switch (st->cstate) {
13916d86563SAlexander Pyhalov case C0: /* assuming ASCII in the beginning */
14016d86563SAlexander Pyhalov if (**inbuf & MSB) {
14116d86563SAlexander Pyhalov st->keepc[0] = (**inbuf);
14216d86563SAlexander Pyhalov st->cstate = C1;
14316d86563SAlexander Pyhalov } else { /* real ASCII */
14416d86563SAlexander Pyhalov **outbuf = **inbuf;
14516d86563SAlexander Pyhalov (*outbuf)++;
14616d86563SAlexander Pyhalov (*outbytesleft)--;
14716d86563SAlexander Pyhalov }
14816d86563SAlexander Pyhalov break;
14916d86563SAlexander Pyhalov case C1: /* Chinese characters: 2nd byte */
15016d86563SAlexander Pyhalov if (big5_2nd_byte(**inbuf) == 0) {
15116d86563SAlexander Pyhalov st->keepc[1] = (**inbuf);
15216d86563SAlexander Pyhalov plane_no = get_plane_no_by_big5(st->keepc[0],
15316d86563SAlexander Pyhalov st->keepc[1], &unidx, &cnscode);
15416d86563SAlexander Pyhalov /* comment these lines, it is legal BIG5 character, but no corresponding CNS character
15516d86563SAlexander Pyhalov if (plane_no < 0) {
15616d86563SAlexander Pyhalov st->_errno = errno = EILSEQ;
15716d86563SAlexander Pyhalov break;
15816d86563SAlexander Pyhalov }
15916d86563SAlexander Pyhalov */
16016d86563SAlexander Pyhalov
16116d86563SAlexander Pyhalov n = big5_to_cns(plane_no, unidx, cnscode,
16216d86563SAlexander Pyhalov *outbuf, *outbytesleft);
16316d86563SAlexander Pyhalov if (n > 0) {
16416d86563SAlexander Pyhalov (*outbuf) += n;
16516d86563SAlexander Pyhalov (*outbytesleft) -= n;
16616d86563SAlexander Pyhalov
16716d86563SAlexander Pyhalov st->cstate = C0;
16816d86563SAlexander Pyhalov } else { /* don't reset state */
16916d86563SAlexander Pyhalov st->_errno = errno = E2BIG;
17016d86563SAlexander Pyhalov }
17116d86563SAlexander Pyhalov } else { /* input char doesn't belong
17216d86563SAlexander Pyhalov * to the input code set
17316d86563SAlexander Pyhalov */
17416d86563SAlexander Pyhalov st->_errno = errno = EILSEQ;
17516d86563SAlexander Pyhalov }
17616d86563SAlexander Pyhalov break;
17716d86563SAlexander Pyhalov default: /* should never come here */
17816d86563SAlexander Pyhalov st->_errno = errno = EILSEQ;
17916d86563SAlexander Pyhalov st->cstate = C0; /* reset state */
18016d86563SAlexander Pyhalov break;
18116d86563SAlexander Pyhalov }
18216d86563SAlexander Pyhalov
18316d86563SAlexander Pyhalov if (st->_errno) {
18416d86563SAlexander Pyhalov #ifdef DEBUG
18516d86563SAlexander Pyhalov fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\n",
18616d86563SAlexander Pyhalov st->_errno, st->cstate);
18716d86563SAlexander Pyhalov #endif
18816d86563SAlexander Pyhalov break;
18916d86563SAlexander Pyhalov }
19016d86563SAlexander Pyhalov
19116d86563SAlexander Pyhalov (*inbuf)++;
19216d86563SAlexander Pyhalov (*inbytesleft)--;
19316d86563SAlexander Pyhalov }
19416d86563SAlexander Pyhalov
19516d86563SAlexander Pyhalov if (errno) return ((size_t) -1);
19616d86563SAlexander Pyhalov
19716d86563SAlexander Pyhalov if (*inbytesleft == 0 && st->cstate != C0) {
19816d86563SAlexander Pyhalov errno = EINVAL;
19916d86563SAlexander Pyhalov return ((size_t) -1);
20016d86563SAlexander Pyhalov }
20116d86563SAlexander Pyhalov
20216d86563SAlexander Pyhalov if (*inbytesleft > 0 && *outbytesleft == 0) {
20316d86563SAlexander Pyhalov errno = E2BIG;
20416d86563SAlexander Pyhalov return((size_t)-1);
20516d86563SAlexander Pyhalov }
20616d86563SAlexander Pyhalov return (*inbytesleft);
20716d86563SAlexander Pyhalov }
20816d86563SAlexander Pyhalov
20916d86563SAlexander Pyhalov
21016d86563SAlexander Pyhalov /*
21116d86563SAlexander Pyhalov * Test whether inbuf is a valid character for 2nd byte Big-5 code
21216d86563SAlexander Pyhalov * Return: = 0 - valid Big-5 2nd byte
21316d86563SAlexander Pyhalov * = 1 - invalid Big-5 2nd byte
21416d86563SAlexander Pyhalov */
big5_2nd_byte(char inbuf)21516d86563SAlexander Pyhalov static int big5_2nd_byte(char inbuf)
21616d86563SAlexander Pyhalov {
21716d86563SAlexander Pyhalov unsigned int buf = (unsigned int) (inbuf & ONEBYTE);
21816d86563SAlexander Pyhalov
21916d86563SAlexander Pyhalov if ((buf >= 0x40) && (buf <= 0x7E))
22016d86563SAlexander Pyhalov return (0);
22116d86563SAlexander Pyhalov if ((buf >= 0xA1) && (buf <= 0xFE))
22216d86563SAlexander Pyhalov return (0);
22316d86563SAlexander Pyhalov return(1);
22416d86563SAlexander Pyhalov }
22516d86563SAlexander Pyhalov
22616d86563SAlexander Pyhalov
22716d86563SAlexander Pyhalov /*
22816d86563SAlexander Pyhalov * Get plane number by Big-5 code; i.e. plane #1 returns 1, #2 returns 2, etc.
22916d86563SAlexander Pyhalov * Returns -1 on error conditions
23016d86563SAlexander Pyhalov *
23116d86563SAlexander Pyhalov * Since binary search of the Big-5 to CNS table is necessary, might as well
23216d86563SAlexander Pyhalov * return index and CNS code matching to the unicode.
23316d86563SAlexander Pyhalov */
get_plane_no_by_big5(const char c1,const char c2,int * unidx,unsigned long * cnscode)23416d86563SAlexander Pyhalov static int get_plane_no_by_big5(const char c1, const char c2,
23516d86563SAlexander Pyhalov int *unidx, unsigned long *cnscode)
23616d86563SAlexander Pyhalov {
23716d86563SAlexander Pyhalov int ret;
23816d86563SAlexander Pyhalov unsigned long big5code;
23916d86563SAlexander Pyhalov
24016d86563SAlexander Pyhalov big5code = (unsigned long) ((c1 & ONEBYTE) << 8) + (c2 & ONEBYTE);
24116d86563SAlexander Pyhalov *unidx = binsearch(big5code, big5_cns_tab, MAX_BIG5_NUM);
24216d86563SAlexander Pyhalov if ((*unidx) >= 0)
24316d86563SAlexander Pyhalov *cnscode = big5_cns_tab[*unidx].value;
24416d86563SAlexander Pyhalov else
24516d86563SAlexander Pyhalov return(0); /* match from Big-5 to CNS not found */
24616d86563SAlexander Pyhalov #ifdef DEBUG
24716d86563SAlexander Pyhalov fprintf(stderr, "Big-5=%04x, idx=%5d, CNS=%x ", big5code, *unidx, *cnscode);
24816d86563SAlexander Pyhalov #endif
24916d86563SAlexander Pyhalov
25016d86563SAlexander Pyhalov ret = (int) (*cnscode >> 16);
25116d86563SAlexander Pyhalov switch (ret) {
25216d86563SAlexander Pyhalov case 0x21: /* 0x8EA1 - G */
25316d86563SAlexander Pyhalov case 0x22: /* 0x8EA2 - H */
25416d86563SAlexander Pyhalov case 0x23: /* 0x8EA3 - I */
25516d86563SAlexander Pyhalov case 0x24: /* 0x8EA4 - J */
25616d86563SAlexander Pyhalov case 0x25: /* 0x8EA5 - K */
25716d86563SAlexander Pyhalov case 0x26: /* 0x8EA6 - L */
25816d86563SAlexander Pyhalov case 0x27: /* 0x8EA7 - M */
25916d86563SAlexander Pyhalov case 0x28: /* 0x8EA8 - N */
26016d86563SAlexander Pyhalov case 0x29: /* 0x8EA9 - O */
26116d86563SAlexander Pyhalov case 0x2a: /* 0x8EAA - P */
26216d86563SAlexander Pyhalov case 0x2b: /* 0x8EAB - Q */
26316d86563SAlexander Pyhalov case 0x2c: /* 0x8EAC - R */
26416d86563SAlexander Pyhalov case 0x2d: /* 0x8EAD - S */
26516d86563SAlexander Pyhalov case 0x2f: /* 0x8EAF - U */
26616d86563SAlexander Pyhalov case 0x30: /* 0x8EB0 - V */
26716d86563SAlexander Pyhalov return (ret - 0x20); /* so that we can use GET_PLANEC() */
26816d86563SAlexander Pyhalov case 0x2e: /* 0x8EAE - T */
26916d86563SAlexander Pyhalov return (3); /* CNS 11643-1992 */
27016d86563SAlexander Pyhalov default:
27116d86563SAlexander Pyhalov return (-1);
27216d86563SAlexander Pyhalov }
27316d86563SAlexander Pyhalov }
27416d86563SAlexander Pyhalov
27516d86563SAlexander Pyhalov
27616d86563SAlexander Pyhalov /*
27716d86563SAlexander Pyhalov * Big-5 code --> CNS 11643 (Chinese EUC)
27816d86563SAlexander Pyhalov * Return: > 0 - converted with enough space in output buffer
27916d86563SAlexander Pyhalov * = 0 - no space in outbuf
28016d86563SAlexander Pyhalov */
big5_to_cns(int plane_no,int unidx,unsigned long cnscode,char * buf,size_t buflen)28116d86563SAlexander Pyhalov static int big5_to_cns(int plane_no, int unidx, unsigned long cnscode,
28216d86563SAlexander Pyhalov char *buf, size_t buflen)
28316d86563SAlexander Pyhalov {
28416d86563SAlexander Pyhalov unsigned long val; /* CNS 11643 value */
28516d86563SAlexander Pyhalov unsigned char c1 = '\0', c2 = '\0', cns_str[5];
28616d86563SAlexander Pyhalov int ret_size; /* return buffer size */
28716d86563SAlexander Pyhalov
28816d86563SAlexander Pyhalov if (unidx < 0) { /* no match from UTF8 to CNS 11643 */
28916d86563SAlexander Pyhalov if ( buflen < 2 ) goto err;
29016d86563SAlexander Pyhalov *buf = *(buf+1) = NON_ID_CHAR;
29116d86563SAlexander Pyhalov ret_size = 2;
29216d86563SAlexander Pyhalov } else {
29316d86563SAlexander Pyhalov val = cnscode & 0xffff;
29416d86563SAlexander Pyhalov c1 = ((val & 0xff00) >> 8) | MSB;
29516d86563SAlexander Pyhalov c2 = (val & 0xff) | MSB;
29616d86563SAlexander Pyhalov }
29716d86563SAlexander Pyhalov
29816d86563SAlexander Pyhalov switch (plane_no) {
29916d86563SAlexander Pyhalov case 1:
30016d86563SAlexander Pyhalov if ( buflen < 2 ) goto err;
30116d86563SAlexander Pyhalov *buf = cns_str[0] = c1;
30216d86563SAlexander Pyhalov *(buf+1) = cns_str[1] = c2;
303*f642269fSToomas Soome cns_str[2] = cns_str[3] = cns_str[4] = '\0';
30416d86563SAlexander Pyhalov ret_size = 2;
30516d86563SAlexander Pyhalov break;
30616d86563SAlexander Pyhalov case 2:
30716d86563SAlexander Pyhalov case 3:
30816d86563SAlexander Pyhalov case 4:
30916d86563SAlexander Pyhalov case 5:
31016d86563SAlexander Pyhalov case 6:
31116d86563SAlexander Pyhalov case 7:
31216d86563SAlexander Pyhalov case 8:
31316d86563SAlexander Pyhalov case 9:
31416d86563SAlexander Pyhalov case 10:
31516d86563SAlexander Pyhalov case 11:
31616d86563SAlexander Pyhalov case 12:
31716d86563SAlexander Pyhalov case 13:
31816d86563SAlexander Pyhalov case 14:
31916d86563SAlexander Pyhalov case 15:
32016d86563SAlexander Pyhalov case 16:
32116d86563SAlexander Pyhalov if ( buflen < 4 ) goto err;
32216d86563SAlexander Pyhalov *(unsigned char*) buf = cns_str[0] = MBYTE;
32316d86563SAlexander Pyhalov *(unsigned char*)(buf+1) = cns_str[1] = PMASK + plane_no;
32416d86563SAlexander Pyhalov *(unsigned char*) (buf+2) = cns_str[2] = c1;
32516d86563SAlexander Pyhalov *(unsigned char*) (buf+3) = cns_str[3] = c2;
326*f642269fSToomas Soome cns_str[4] = '\0';
32716d86563SAlexander Pyhalov ret_size = 4;
32816d86563SAlexander Pyhalov break;
32916d86563SAlexander Pyhalov }
33016d86563SAlexander Pyhalov
33116d86563SAlexander Pyhalov #ifdef DEBUG
33216d86563SAlexander Pyhalov fprintf(stderr, "\t#%d ->%s<-\n", plane_no, cns_str);
33316d86563SAlexander Pyhalov #endif
33416d86563SAlexander Pyhalov
33516d86563SAlexander Pyhalov return(ret_size);
33616d86563SAlexander Pyhalov
33716d86563SAlexander Pyhalov err:
33816d86563SAlexander Pyhalov errno = E2BIG;
33916d86563SAlexander Pyhalov return(0);
34016d86563SAlexander Pyhalov }
34116d86563SAlexander Pyhalov
34216d86563SAlexander Pyhalov
34316d86563SAlexander Pyhalov /* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
binsearch(unsigned long x,table_t v[],int n)34416d86563SAlexander Pyhalov static int binsearch(unsigned long x, table_t v[], int n)
34516d86563SAlexander Pyhalov {
34616d86563SAlexander Pyhalov int low, high, mid;
34716d86563SAlexander Pyhalov
34816d86563SAlexander Pyhalov low = 0;
34916d86563SAlexander Pyhalov high = n - 1;
35016d86563SAlexander Pyhalov while (low <= high) {
35116d86563SAlexander Pyhalov mid = (low + high) / 2;
35216d86563SAlexander Pyhalov if (x < v[mid].key)
35316d86563SAlexander Pyhalov high = mid - 1;
35416d86563SAlexander Pyhalov else if (x > v[mid].key)
35516d86563SAlexander Pyhalov low = mid + 1;
35616d86563SAlexander Pyhalov else /* found match */
35716d86563SAlexander Pyhalov return mid;
35816d86563SAlexander Pyhalov }
35916d86563SAlexander Pyhalov return (-1); /* no match */
36016d86563SAlexander Pyhalov }
361