1*91e1e26aSAlexander Pyhalov /*
2*91e1e26aSAlexander Pyhalov * CDDL HEADER START
3*91e1e26aSAlexander Pyhalov *
4*91e1e26aSAlexander Pyhalov * The contents of this file are subject to the terms of the
5*91e1e26aSAlexander Pyhalov * Common Development and Distribution License (the "License").
6*91e1e26aSAlexander Pyhalov * You may not use this file except in compliance with the License.
7*91e1e26aSAlexander Pyhalov *
8*91e1e26aSAlexander Pyhalov * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9*91e1e26aSAlexander Pyhalov * or http://www.opensolaris.org/os/licensing.
10*91e1e26aSAlexander Pyhalov * See the License for the specific language governing permissions
11*91e1e26aSAlexander Pyhalov * and limitations under the License.
12*91e1e26aSAlexander Pyhalov *
13*91e1e26aSAlexander Pyhalov * When distributing Covered Code, include this CDDL HEADER in each
14*91e1e26aSAlexander Pyhalov * file and include the License file at src/OPENSOLARIS.LICENSE.
15*91e1e26aSAlexander Pyhalov * If applicable, add the following below this CDDL HEADER, with the
16*91e1e26aSAlexander Pyhalov * fields enclosed by brackets "[]" replaced with your own identifying
17*91e1e26aSAlexander Pyhalov * information: Portions Copyright [yyyy] [name of copyright owner]
18*91e1e26aSAlexander Pyhalov *
19*91e1e26aSAlexander Pyhalov * CDDL HEADER END
20*91e1e26aSAlexander Pyhalov */
21*91e1e26aSAlexander Pyhalov /*
22*91e1e26aSAlexander Pyhalov * Copyright (c) 2008, by Sun Microsystems, Inc.
23*91e1e26aSAlexander Pyhalov * All rights reserved.
24*91e1e26aSAlexander Pyhalov */
25*91e1e26aSAlexander Pyhalov #include <stdio.h>
26*91e1e26aSAlexander Pyhalov #include <errno.h>
27*91e1e26aSAlexander Pyhalov #include <stdlib.h>
28*91e1e26aSAlexander Pyhalov #include <sys/types.h>
29*91e1e26aSAlexander Pyhalov #define __NEED_UNI_2_TCVN__
30*91e1e26aSAlexander Pyhalov #include <unicode_tcvn.h> /* Unicode to TCVN mapping table */
31*91e1e26aSAlexander Pyhalov #include "common_defs.h"
32*91e1e26aSAlexander Pyhalov #define NON_ID_CHAR '?' /* non-identified character */
33*91e1e26aSAlexander Pyhalov
34*91e1e26aSAlexander Pyhalov typedef struct _icv_state {
35*91e1e26aSAlexander Pyhalov int _errno; /* internal errno */
36*91e1e26aSAlexander Pyhalov } _iconv_st;
37*91e1e26aSAlexander Pyhalov
38*91e1e26aSAlexander Pyhalov
39*91e1e26aSAlexander Pyhalov /*
40*91e1e26aSAlexander Pyhalov * Open; called from iconv_open()
41*91e1e26aSAlexander Pyhalov */
42*91e1e26aSAlexander Pyhalov void *
_icv_open()43*91e1e26aSAlexander Pyhalov _icv_open()
44*91e1e26aSAlexander Pyhalov {
45*91e1e26aSAlexander Pyhalov _iconv_st *st;
46*91e1e26aSAlexander Pyhalov
47*91e1e26aSAlexander Pyhalov if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
48*91e1e26aSAlexander Pyhalov errno = ENOMEM;
49*91e1e26aSAlexander Pyhalov return ((void *) -1);
50*91e1e26aSAlexander Pyhalov }
51*91e1e26aSAlexander Pyhalov
52*91e1e26aSAlexander Pyhalov st->_errno = 0;
53*91e1e26aSAlexander Pyhalov return ((void *) st);
54*91e1e26aSAlexander Pyhalov }
55*91e1e26aSAlexander Pyhalov
56*91e1e26aSAlexander Pyhalov
57*91e1e26aSAlexander Pyhalov /*
58*91e1e26aSAlexander Pyhalov * Close; called from iconv_close()
59*91e1e26aSAlexander Pyhalov */
60*91e1e26aSAlexander Pyhalov void
_icv_close(_iconv_st * st)61*91e1e26aSAlexander Pyhalov _icv_close(_iconv_st *st)
62*91e1e26aSAlexander Pyhalov {
63*91e1e26aSAlexander Pyhalov if (!st)
64*91e1e26aSAlexander Pyhalov errno = EBADF;
65*91e1e26aSAlexander Pyhalov else
66*91e1e26aSAlexander Pyhalov free(st);
67*91e1e26aSAlexander Pyhalov }
68*91e1e26aSAlexander Pyhalov
69*91e1e26aSAlexander Pyhalov
70*91e1e26aSAlexander Pyhalov /*
71*91e1e26aSAlexander Pyhalov * Actual conversion; called from iconv()
72*91e1e26aSAlexander Pyhalov */
73*91e1e26aSAlexander Pyhalov size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)74*91e1e26aSAlexander Pyhalov _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
75*91e1e26aSAlexander Pyhalov char **outbuf, size_t *outbytesleft)
76*91e1e26aSAlexander Pyhalov {
77*91e1e26aSAlexander Pyhalov int utf8_len = 1;
78*91e1e26aSAlexander Pyhalov int no_id_char_num = 0;
79*91e1e26aSAlexander Pyhalov unsigned char *op = (unsigned char*)*inbuf;
80*91e1e26aSAlexander Pyhalov #ifdef DEBUG
81*91e1e26aSAlexander Pyhalov fprintf(stderr, "========== iconv(): UCS-2 --> TCVN5712 ==========\n");
82*91e1e26aSAlexander Pyhalov #endif
83*91e1e26aSAlexander Pyhalov if (st == NULL) {
84*91e1e26aSAlexander Pyhalov errno = EBADF;
85*91e1e26aSAlexander Pyhalov return ((size_t) -1);
86*91e1e26aSAlexander Pyhalov }
87*91e1e26aSAlexander Pyhalov
88*91e1e26aSAlexander Pyhalov if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
89*91e1e26aSAlexander Pyhalov st->_errno = 0;
90*91e1e26aSAlexander Pyhalov return ((size_t) 0);
91*91e1e26aSAlexander Pyhalov }
92*91e1e26aSAlexander Pyhalov
93*91e1e26aSAlexander Pyhalov st->_errno = 0; /* Rreset internal errno */
94*91e1e26aSAlexander Pyhalov errno = 0; /* Rreset external errno */
95*91e1e26aSAlexander Pyhalov
96*91e1e26aSAlexander Pyhalov /* Convert UTF-8 encoding to TCVN5712 */
97*91e1e26aSAlexander Pyhalov while (*inbytesleft > 0 && *outbytesleft > 0) {
98*91e1e26aSAlexander Pyhalov unsigned long uni = 0;
99*91e1e26aSAlexander Pyhalov unsigned char ch = 0;
100*91e1e26aSAlexander Pyhalov unsigned long temp1 = 0,
101*91e1e26aSAlexander Pyhalov temp2 = 0,
102*91e1e26aSAlexander Pyhalov temp3 = 0;
103*91e1e26aSAlexander Pyhalov
104*91e1e26aSAlexander Pyhalov if(0x00 == (*op & 0x80)) {
105*91e1e26aSAlexander Pyhalov /* 1 byte UTF-8 Charater.*/
106*91e1e26aSAlexander Pyhalov uni = (unsigned short)*op;
107*91e1e26aSAlexander Pyhalov utf8_len = 1;
108*91e1e26aSAlexander Pyhalov goto conving;
109*91e1e26aSAlexander Pyhalov }
110*91e1e26aSAlexander Pyhalov
111*91e1e26aSAlexander Pyhalov if (*inbytesleft < 2)
112*91e1e26aSAlexander Pyhalov goto errexit;
113*91e1e26aSAlexander Pyhalov if ( 0xc0 == (*op & 0xe0) &&
114*91e1e26aSAlexander Pyhalov 0x80 == (*(op + 1) & 0xc0) ) {
115*91e1e26aSAlexander Pyhalov /* 2 bytes UTF-8 Charater.*/
116*91e1e26aSAlexander Pyhalov temp1 = (unsigned short)(*op & 0x1f);
117*91e1e26aSAlexander Pyhalov temp1 <<= 6;
118*91e1e26aSAlexander Pyhalov temp1 |= (unsigned short)(*(op + 1) & 0x3f);
119*91e1e26aSAlexander Pyhalov uni = temp1;
120*91e1e26aSAlexander Pyhalov utf8_len = 2;
121*91e1e26aSAlexander Pyhalov goto conving;
122*91e1e26aSAlexander Pyhalov }
123*91e1e26aSAlexander Pyhalov
124*91e1e26aSAlexander Pyhalov if (*inbytesleft < 3)
125*91e1e26aSAlexander Pyhalov goto errexit;
126*91e1e26aSAlexander Pyhalov if ( 0xe0 == (*op & 0xf0) &&
127*91e1e26aSAlexander Pyhalov 0x80 == (*(op + 1) & 0xc0) &&
128*91e1e26aSAlexander Pyhalov 0x80 == (*(op + 2) & 0xc0) ) {
129*91e1e26aSAlexander Pyhalov /* 3bytes UTF-8 Charater.*/
130*91e1e26aSAlexander Pyhalov temp1 = (unsigned short)(*op &0x0f);
131*91e1e26aSAlexander Pyhalov temp1 <<= 12;
132*91e1e26aSAlexander Pyhalov temp2 = (unsigned short)(*(op+1) & 0x3F);
133*91e1e26aSAlexander Pyhalov temp2 <<= 6;
134*91e1e26aSAlexander Pyhalov temp1 = temp1 | temp2 | (unsigned short)(*(op+2) & 0x3F);
135*91e1e26aSAlexander Pyhalov uni = temp1;
136*91e1e26aSAlexander Pyhalov utf8_len = 3;
137*91e1e26aSAlexander Pyhalov goto conving;
138*91e1e26aSAlexander Pyhalov }
139*91e1e26aSAlexander Pyhalov
140*91e1e26aSAlexander Pyhalov if (*inbytesleft < 4)
141*91e1e26aSAlexander Pyhalov goto errexit;
142*91e1e26aSAlexander Pyhalov if ( 0xf0 == (*op & 0xf8) &&
143*91e1e26aSAlexander Pyhalov 0x80 == (*(op + 1) & 0xc0) &&
144*91e1e26aSAlexander Pyhalov 0x80 == (*(op + 2) & 0xc0) ) {
145*91e1e26aSAlexander Pyhalov /* 4bytes UTF-8 Charater.*/
146*91e1e26aSAlexander Pyhalov temp1 = *op &0x07;
147*91e1e26aSAlexander Pyhalov temp1 <<= 18;
148*91e1e26aSAlexander Pyhalov temp2 = (*(op+1) & 0x3F);
149*91e1e26aSAlexander Pyhalov temp2 <<= 12;
150*91e1e26aSAlexander Pyhalov temp3 = (*(op+1) & 0x3F);
151*91e1e26aSAlexander Pyhalov temp3 <<= 6;
152*91e1e26aSAlexander Pyhalov temp1 = temp1 | temp2 | temp3 |(unsigned long)(*(op+2) & 0x3F);
153*91e1e26aSAlexander Pyhalov uni = temp1;
154*91e1e26aSAlexander Pyhalov utf8_len = 4;
155*91e1e26aSAlexander Pyhalov goto conving;
156*91e1e26aSAlexander Pyhalov }
157*91e1e26aSAlexander Pyhalov
158*91e1e26aSAlexander Pyhalov /* unrecognize byte. */
159*91e1e26aSAlexander Pyhalov st->_errno = errno = EILSEQ;
160*91e1e26aSAlexander Pyhalov errno = EILSEQ;
161*91e1e26aSAlexander Pyhalov return ((size_t)-1);
162*91e1e26aSAlexander Pyhalov
163*91e1e26aSAlexander Pyhalov conving:
164*91e1e26aSAlexander Pyhalov if (uni_2_tcvn(uni, &ch) == 1) {
165*91e1e26aSAlexander Pyhalov **outbuf = ch;
166*91e1e26aSAlexander Pyhalov } else {
167*91e1e26aSAlexander Pyhalov **outbuf = NON_ID_CHAR;
168*91e1e26aSAlexander Pyhalov no_id_char_num += 1;
169*91e1e26aSAlexander Pyhalov }
170*91e1e26aSAlexander Pyhalov (*outbuf) += 1;
171*91e1e26aSAlexander Pyhalov (*outbytesleft) -= 1;
172*91e1e26aSAlexander Pyhalov op += utf8_len;
173*91e1e26aSAlexander Pyhalov (*inbytesleft) -= utf8_len;
174*91e1e26aSAlexander Pyhalov
175*91e1e26aSAlexander Pyhalov }
176*91e1e26aSAlexander Pyhalov
177*91e1e26aSAlexander Pyhalov return ((size_t)no_id_char_num);
178*91e1e26aSAlexander Pyhalov
179*91e1e26aSAlexander Pyhalov errexit:
180*91e1e26aSAlexander Pyhalov st->_errno = errno = EINVAL;
181*91e1e26aSAlexander Pyhalov errno = EINVAL;
182*91e1e26aSAlexander Pyhalov return ((size_t)-1);
183*91e1e26aSAlexander Pyhalov }
184