1*880d7978SAlexander Pyhalov /*
2*880d7978SAlexander Pyhalov * CDDL HEADER START
3*880d7978SAlexander Pyhalov *
4*880d7978SAlexander Pyhalov * The contents of this file are subject to the terms of the
5*880d7978SAlexander Pyhalov * Common Development and Distribution License (the "License").
6*880d7978SAlexander Pyhalov * You may not use this file except in compliance with the License.
7*880d7978SAlexander Pyhalov *
8*880d7978SAlexander Pyhalov * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9*880d7978SAlexander Pyhalov * or http://www.opensolaris.org/os/licensing.
10*880d7978SAlexander Pyhalov * See the License for the specific language governing permissions
11*880d7978SAlexander Pyhalov * and limitations under the License.
12*880d7978SAlexander Pyhalov *
13*880d7978SAlexander Pyhalov * When distributing Covered Code, include this CDDL HEADER in each
14*880d7978SAlexander Pyhalov * file and include the License file at src/OPENSOLARIS.LICENSE.
15*880d7978SAlexander Pyhalov * If applicable, add the following below this CDDL HEADER, with the
16*880d7978SAlexander Pyhalov * fields enclosed by brackets "[]" replaced with your own identifying
17*880d7978SAlexander Pyhalov * information: Portions Copyright [yyyy] [name of copyright owner]
18*880d7978SAlexander Pyhalov *
19*880d7978SAlexander Pyhalov * CDDL HEADER END
20*880d7978SAlexander Pyhalov */
21*880d7978SAlexander Pyhalov /*
22*880d7978SAlexander Pyhalov * Copyright (c) 2008, by Sun Microsystems, Inc.
23*880d7978SAlexander Pyhalov * All rights reserved.
24*880d7978SAlexander Pyhalov */
25*880d7978SAlexander Pyhalov #include <stdio.h>
26*880d7978SAlexander Pyhalov #include <errno.h>
27*880d7978SAlexander Pyhalov #include <stdlib.h>
28*880d7978SAlexander Pyhalov #include <sys/types.h>
29*880d7978SAlexander Pyhalov #define __NEED_UNI_2_TCVN__
30*880d7978SAlexander Pyhalov #include <unicode_tcvn.h> /* Unicode to TCVN mapping table */
31*880d7978SAlexander Pyhalov #include "common_defs.h"
32*880d7978SAlexander Pyhalov #define NON_ID_CHAR '?' /* non-identified character */
33*880d7978SAlexander Pyhalov
34*880d7978SAlexander Pyhalov typedef struct _icv_state {
35*880d7978SAlexander Pyhalov int _errno; /* internal errno */
36*880d7978SAlexander Pyhalov } _iconv_st;
37*880d7978SAlexander Pyhalov
38*880d7978SAlexander Pyhalov
39*880d7978SAlexander Pyhalov /*
40*880d7978SAlexander Pyhalov * Open; called from iconv_open()
41*880d7978SAlexander Pyhalov */
42*880d7978SAlexander Pyhalov void *
_icv_open()43*880d7978SAlexander Pyhalov _icv_open()
44*880d7978SAlexander Pyhalov {
45*880d7978SAlexander Pyhalov _iconv_st *st;
46*880d7978SAlexander Pyhalov
47*880d7978SAlexander Pyhalov if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
48*880d7978SAlexander Pyhalov errno = ENOMEM;
49*880d7978SAlexander Pyhalov return ((void *) -1);
50*880d7978SAlexander Pyhalov }
51*880d7978SAlexander Pyhalov
52*880d7978SAlexander Pyhalov st->_errno = 0;
53*880d7978SAlexander Pyhalov return ((void *) st);
54*880d7978SAlexander Pyhalov }
55*880d7978SAlexander Pyhalov
56*880d7978SAlexander Pyhalov
57*880d7978SAlexander Pyhalov /*
58*880d7978SAlexander Pyhalov * Close; called from iconv_close()
59*880d7978SAlexander Pyhalov */
60*880d7978SAlexander Pyhalov void
_icv_close(_iconv_st * st)61*880d7978SAlexander Pyhalov _icv_close(_iconv_st *st)
62*880d7978SAlexander Pyhalov {
63*880d7978SAlexander Pyhalov if (!st)
64*880d7978SAlexander Pyhalov errno = EBADF;
65*880d7978SAlexander Pyhalov else
66*880d7978SAlexander Pyhalov free(st);
67*880d7978SAlexander Pyhalov }
68*880d7978SAlexander Pyhalov
69*880d7978SAlexander Pyhalov
70*880d7978SAlexander Pyhalov /*
71*880d7978SAlexander Pyhalov * Actual conversion; called from iconv()
72*880d7978SAlexander Pyhalov */
73*880d7978SAlexander Pyhalov size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)74*880d7978SAlexander Pyhalov _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
75*880d7978SAlexander Pyhalov char **outbuf, size_t *outbytesleft)
76*880d7978SAlexander Pyhalov {
77*880d7978SAlexander Pyhalov int utf8_len = 1;
78*880d7978SAlexander Pyhalov int no_id_char_num = 0;
79*880d7978SAlexander Pyhalov unsigned char *op = (unsigned char*)*inbuf;
80*880d7978SAlexander Pyhalov #ifdef DEBUG
81*880d7978SAlexander Pyhalov fprintf(stderr, "========== iconv(): UCS-2 --> TCVN5712 ==========\n");
82*880d7978SAlexander Pyhalov #endif
83*880d7978SAlexander Pyhalov if (st == NULL) {
84*880d7978SAlexander Pyhalov errno = EBADF;
85*880d7978SAlexander Pyhalov return ((size_t) -1);
86*880d7978SAlexander Pyhalov }
87*880d7978SAlexander Pyhalov
88*880d7978SAlexander Pyhalov if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
89*880d7978SAlexander Pyhalov st->_errno = 0;
90*880d7978SAlexander Pyhalov return ((size_t) 0);
91*880d7978SAlexander Pyhalov }
92*880d7978SAlexander Pyhalov
93*880d7978SAlexander Pyhalov st->_errno = 0; /* Rreset internal errno */
94*880d7978SAlexander Pyhalov errno = 0; /* Rreset external errno */
95*880d7978SAlexander Pyhalov
96*880d7978SAlexander Pyhalov /* Convert UTF-8 encoding to TCVN5712 */
97*880d7978SAlexander Pyhalov while (*inbytesleft > 0 && *outbytesleft > 0) {
98*880d7978SAlexander Pyhalov unsigned long uni = 0;
99*880d7978SAlexander Pyhalov unsigned char ch = 0;
100*880d7978SAlexander Pyhalov unsigned long temp1 = 0,
101*880d7978SAlexander Pyhalov temp2 = 0,
102*880d7978SAlexander Pyhalov temp3 = 0;
103*880d7978SAlexander Pyhalov
104*880d7978SAlexander Pyhalov if(0x00 == (*op & 0x80)) {
105*880d7978SAlexander Pyhalov /* 1 byte UTF-8 Charater.*/
106*880d7978SAlexander Pyhalov uni = (unsigned short)*op;
107*880d7978SAlexander Pyhalov utf8_len = 1;
108*880d7978SAlexander Pyhalov goto conving;
109*880d7978SAlexander Pyhalov }
110*880d7978SAlexander Pyhalov
111*880d7978SAlexander Pyhalov if (*inbytesleft < 2)
112*880d7978SAlexander Pyhalov goto errexit;
113*880d7978SAlexander Pyhalov if ( 0xc0 == (*op & 0xe0) &&
114*880d7978SAlexander Pyhalov 0x80 == (*(op + 1) & 0xc0) ) {
115*880d7978SAlexander Pyhalov /* 2 bytes UTF-8 Charater.*/
116*880d7978SAlexander Pyhalov temp1 = (unsigned short)(*op & 0x1f);
117*880d7978SAlexander Pyhalov temp1 <<= 6;
118*880d7978SAlexander Pyhalov temp1 |= (unsigned short)(*(op + 1) & 0x3f);
119*880d7978SAlexander Pyhalov uni = temp1;
120*880d7978SAlexander Pyhalov utf8_len = 2;
121*880d7978SAlexander Pyhalov goto conving;
122*880d7978SAlexander Pyhalov }
123*880d7978SAlexander Pyhalov
124*880d7978SAlexander Pyhalov if (*inbytesleft < 3)
125*880d7978SAlexander Pyhalov goto errexit;
126*880d7978SAlexander Pyhalov if ( 0xe0 == (*op & 0xf0) &&
127*880d7978SAlexander Pyhalov 0x80 == (*(op + 1) & 0xc0) &&
128*880d7978SAlexander Pyhalov 0x80 == (*(op + 2) & 0xc0) ) {
129*880d7978SAlexander Pyhalov /* 3bytes UTF-8 Charater.*/
130*880d7978SAlexander Pyhalov temp1 = (unsigned short)(*op &0x0f);
131*880d7978SAlexander Pyhalov temp1 <<= 12;
132*880d7978SAlexander Pyhalov temp2 = (unsigned short)(*(op+1) & 0x3F);
133*880d7978SAlexander Pyhalov temp2 <<= 6;
134*880d7978SAlexander Pyhalov temp1 = temp1 | temp2 | (unsigned short)(*(op+2) & 0x3F);
135*880d7978SAlexander Pyhalov uni = temp1;
136*880d7978SAlexander Pyhalov utf8_len = 3;
137*880d7978SAlexander Pyhalov goto conving;
138*880d7978SAlexander Pyhalov }
139*880d7978SAlexander Pyhalov
140*880d7978SAlexander Pyhalov if (*inbytesleft < 4)
141*880d7978SAlexander Pyhalov goto errexit;
142*880d7978SAlexander Pyhalov if ( 0xf0 == (*op & 0xf8) &&
143*880d7978SAlexander Pyhalov 0x80 == (*(op + 1) & 0xc0) &&
144*880d7978SAlexander Pyhalov 0x80 == (*(op + 2) & 0xc0) ) {
145*880d7978SAlexander Pyhalov /* 4bytes UTF-8 Charater.*/
146*880d7978SAlexander Pyhalov temp1 = *op &0x07;
147*880d7978SAlexander Pyhalov temp1 <<= 18;
148*880d7978SAlexander Pyhalov temp2 = (*(op+1) & 0x3F);
149*880d7978SAlexander Pyhalov temp2 <<= 12;
150*880d7978SAlexander Pyhalov temp3 = (*(op+1) & 0x3F);
151*880d7978SAlexander Pyhalov temp3 <<= 6;
152*880d7978SAlexander Pyhalov temp1 = temp1 | temp2 | temp3 |(unsigned long)(*(op+2) & 0x3F);
153*880d7978SAlexander Pyhalov uni = temp1;
154*880d7978SAlexander Pyhalov utf8_len = 4;
155*880d7978SAlexander Pyhalov goto conving;
156*880d7978SAlexander Pyhalov }
157*880d7978SAlexander Pyhalov
158*880d7978SAlexander Pyhalov /* unrecognize byte. */
159*880d7978SAlexander Pyhalov st->_errno = errno = EILSEQ;
160*880d7978SAlexander Pyhalov errno = EILSEQ;
161*880d7978SAlexander Pyhalov return ((size_t)-1);
162*880d7978SAlexander Pyhalov
163*880d7978SAlexander Pyhalov conving:
164*880d7978SAlexander Pyhalov if (uni_2_tcvn(uni, &ch) == 1) {
165*880d7978SAlexander Pyhalov **outbuf = ch;
166*880d7978SAlexander Pyhalov } else {
167*880d7978SAlexander Pyhalov **outbuf = NON_ID_CHAR;
168*880d7978SAlexander Pyhalov no_id_char_num += 1;
169*880d7978SAlexander Pyhalov }
170*880d7978SAlexander Pyhalov (*outbuf) += 1;
171*880d7978SAlexander Pyhalov (*outbytesleft) -= 1;
172*880d7978SAlexander Pyhalov op += utf8_len;
173*880d7978SAlexander Pyhalov (*inbytesleft) -= utf8_len;
174*880d7978SAlexander Pyhalov
175*880d7978SAlexander Pyhalov }
176*880d7978SAlexander Pyhalov
177*880d7978SAlexander Pyhalov return ((size_t)no_id_char_num);
178*880d7978SAlexander Pyhalov
179*880d7978SAlexander Pyhalov errexit:
180*880d7978SAlexander Pyhalov st->_errno = errno = EINVAL;
181*880d7978SAlexander Pyhalov errno = EINVAL;
182*880d7978SAlexander Pyhalov return ((size_t)-1);
183*880d7978SAlexander Pyhalov }
184