1*91e1e26aSAlexander Pyhalov /*
2*91e1e26aSAlexander Pyhalov * CDDL HEADER START
3*91e1e26aSAlexander Pyhalov *
4*91e1e26aSAlexander Pyhalov * The contents of this file are subject to the terms of the
5*91e1e26aSAlexander Pyhalov * Common Development and Distribution License (the "License").
6*91e1e26aSAlexander Pyhalov * You may not use this file except in compliance with the License.
7*91e1e26aSAlexander Pyhalov *
8*91e1e26aSAlexander Pyhalov * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9*91e1e26aSAlexander Pyhalov * or http://www.opensolaris.org/os/licensing.
10*91e1e26aSAlexander Pyhalov * See the License for the specific language governing permissions
11*91e1e26aSAlexander Pyhalov * and limitations under the License.
12*91e1e26aSAlexander Pyhalov *
13*91e1e26aSAlexander Pyhalov * When distributing Covered Code, include this CDDL HEADER in each
14*91e1e26aSAlexander Pyhalov * file and include the License file at src/OPENSOLARIS.LICENSE.
15*91e1e26aSAlexander Pyhalov * If applicable, add the following below this CDDL HEADER, with the
16*91e1e26aSAlexander Pyhalov * fields enclosed by brackets "[]" replaced with your own identifying
17*91e1e26aSAlexander Pyhalov * information: Portions Copyright [yyyy] [name of copyright owner]
18*91e1e26aSAlexander Pyhalov *
19*91e1e26aSAlexander Pyhalov * CDDL HEADER END
20*91e1e26aSAlexander Pyhalov */
21*91e1e26aSAlexander Pyhalov /*
22*91e1e26aSAlexander Pyhalov * Copyright (c) 2008, by Sun Microsystems, Inc.
23*91e1e26aSAlexander Pyhalov * All rights reserved.
24*91e1e26aSAlexander Pyhalov */
25*91e1e26aSAlexander Pyhalov
26*91e1e26aSAlexander Pyhalov #include <stdio.h>
27*91e1e26aSAlexander Pyhalov #include <errno.h>
28*91e1e26aSAlexander Pyhalov #include <stdlib.h>
29*91e1e26aSAlexander Pyhalov #include <sys/types.h>
30*91e1e26aSAlexander Pyhalov #define __NEED_TCVN_2_UNI__
31*91e1e26aSAlexander Pyhalov #include <unicode_tcvn.h> /* Unicode to tcvn mapping table */
32*91e1e26aSAlexander Pyhalov #include <vi_combine.h>
33*91e1e26aSAlexander Pyhalov #include "common_defs.h"
34*91e1e26aSAlexander Pyhalov
35*91e1e26aSAlexander Pyhalov
36*91e1e26aSAlexander Pyhalov typedef struct _icv_state {
37*91e1e26aSAlexander Pyhalov int _errno; /* internal errno */
38*91e1e26aSAlexander Pyhalov unsigned short last;
39*91e1e26aSAlexander Pyhalov } _iconv_st;
40*91e1e26aSAlexander Pyhalov
41*91e1e26aSAlexander Pyhalov
42*91e1e26aSAlexander Pyhalov static int binsearch(unsigned long x, Combine_map v[], int n);
43*91e1e26aSAlexander Pyhalov
44*91e1e26aSAlexander Pyhalov /*
45*91e1e26aSAlexander Pyhalov * Open; called from iconv_open()
46*91e1e26aSAlexander Pyhalov */
47*91e1e26aSAlexander Pyhalov void *
_icv_open()48*91e1e26aSAlexander Pyhalov _icv_open()
49*91e1e26aSAlexander Pyhalov {
50*91e1e26aSAlexander Pyhalov _iconv_st *st;
51*91e1e26aSAlexander Pyhalov
52*91e1e26aSAlexander Pyhalov if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
53*91e1e26aSAlexander Pyhalov errno = ENOMEM;
54*91e1e26aSAlexander Pyhalov return ((void *) -1);
55*91e1e26aSAlexander Pyhalov }
56*91e1e26aSAlexander Pyhalov
57*91e1e26aSAlexander Pyhalov st->_errno = 0;
58*91e1e26aSAlexander Pyhalov return ((void *) st);
59*91e1e26aSAlexander Pyhalov }
60*91e1e26aSAlexander Pyhalov
61*91e1e26aSAlexander Pyhalov
62*91e1e26aSAlexander Pyhalov /*
63*91e1e26aSAlexander Pyhalov * Close; called from iconv_close()
64*91e1e26aSAlexander Pyhalov */
65*91e1e26aSAlexander Pyhalov void
_icv_close(_iconv_st * st)66*91e1e26aSAlexander Pyhalov _icv_close(_iconv_st *st)
67*91e1e26aSAlexander Pyhalov {
68*91e1e26aSAlexander Pyhalov if (!st)
69*91e1e26aSAlexander Pyhalov errno = EBADF;
70*91e1e26aSAlexander Pyhalov else
71*91e1e26aSAlexander Pyhalov free(st);
72*91e1e26aSAlexander Pyhalov }
73*91e1e26aSAlexander Pyhalov
74*91e1e26aSAlexander Pyhalov
75*91e1e26aSAlexander Pyhalov /*
76*91e1e26aSAlexander Pyhalov * Actual conversion; called from iconv()
77*91e1e26aSAlexander Pyhalov */
78*91e1e26aSAlexander Pyhalov size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)79*91e1e26aSAlexander Pyhalov _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
80*91e1e26aSAlexander Pyhalov char **outbuf, size_t *outbytesleft)
81*91e1e26aSAlexander Pyhalov {
82*91e1e26aSAlexander Pyhalov int unidx = -1;
83*91e1e26aSAlexander Pyhalov #ifdef DEBUG
84*91e1e26aSAlexander Pyhalov fprintf(stderr, "========== iconv(): TCVN5712 -->UCS-2 ==========\n");
85*91e1e26aSAlexander Pyhalov #endif
86*91e1e26aSAlexander Pyhalov if (st == NULL) {
87*91e1e26aSAlexander Pyhalov errno = EBADF;
88*91e1e26aSAlexander Pyhalov return ((size_t) -1);
89*91e1e26aSAlexander Pyhalov }
90*91e1e26aSAlexander Pyhalov
91*91e1e26aSAlexander Pyhalov if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
92*91e1e26aSAlexander Pyhalov st->_errno = 0;
93*91e1e26aSAlexander Pyhalov return ((size_t) 0);
94*91e1e26aSAlexander Pyhalov }
95*91e1e26aSAlexander Pyhalov
96*91e1e26aSAlexander Pyhalov st->_errno = 0; /* Reset internal errno */
97*91e1e26aSAlexander Pyhalov errno = 0; /* Reset external errno */
98*91e1e26aSAlexander Pyhalov
99*91e1e26aSAlexander Pyhalov /* Convert tcvn encoding to UCS-2 */
100*91e1e26aSAlexander Pyhalov while (*inbytesleft > 0 && *outbytesleft > 0) {
101*91e1e26aSAlexander Pyhalov unsigned long uni = 0;
102*91e1e26aSAlexander Pyhalov
103*91e1e26aSAlexander Pyhalov tcvn_2_uni((unsigned char*)*inbuf, &uni);
104*91e1e26aSAlexander Pyhalov if (st->last != 0) {
105*91e1e26aSAlexander Pyhalov if (ISCOMB_UNI(uni)) {
106*91e1e26aSAlexander Pyhalov /*
107*91e1e26aSAlexander Pyhalov * Composed characters with combine character
108*91e1e26aSAlexander Pyhalov */
109*91e1e26aSAlexander Pyhalov unsigned int k = 0;
110*91e1e26aSAlexander Pyhalov switch (uni) {
111*91e1e26aSAlexander Pyhalov case 0x0300: k = 0; break;
112*91e1e26aSAlexander Pyhalov case 0x0301: k = 1; break;
113*91e1e26aSAlexander Pyhalov case 0x0303: k = 2; break;
114*91e1e26aSAlexander Pyhalov case 0x0309: k = 3; break;
115*91e1e26aSAlexander Pyhalov case 0x0323: k = 4; break;
116*91e1e26aSAlexander Pyhalov default:
117*91e1e26aSAlexander Pyhalov break;
118*91e1e26aSAlexander Pyhalov }
119*91e1e26aSAlexander Pyhalov unidx = binsearch(st->last, vi_comb_data, VOWEL_NUM);
120*91e1e26aSAlexander Pyhalov if (unidx >= 0) {
121*91e1e26aSAlexander Pyhalov uni = vi_comb_data[unidx].composed[k];
122*91e1e26aSAlexander Pyhalov } else {
123*91e1e26aSAlexander Pyhalov errno = EBADF;
124*91e1e26aSAlexander Pyhalov }
125*91e1e26aSAlexander Pyhalov st->last = 0;
126*91e1e26aSAlexander Pyhalov
127*91e1e26aSAlexander Pyhalov } else {
128*91e1e26aSAlexander Pyhalov if (st->last < 0x80) {
129*91e1e26aSAlexander Pyhalov *(*outbuf)++ = (char)st->last;
130*91e1e26aSAlexander Pyhalov (*outbytesleft) -= 1;
131*91e1e26aSAlexander Pyhalov } else if (st->last >= 0x0080 && st->last <= 0x07ff) {
132*91e1e26aSAlexander Pyhalov if (*outbytesleft < 2) {
133*91e1e26aSAlexander Pyhalov errno = E2BIG;
134*91e1e26aSAlexander Pyhalov return((size_t)-1);
135*91e1e26aSAlexander Pyhalov }
136*91e1e26aSAlexander Pyhalov *(*outbuf)++ = (char)((st->last >> 6) & 0x1f) | 0xc0;
137*91e1e26aSAlexander Pyhalov *(*outbuf)++ = (char)(st->last & 0x3f) | 0x80;
138*91e1e26aSAlexander Pyhalov (*outbytesleft) -= 2;
139*91e1e26aSAlexander Pyhalov } else if (st->last >= 0x0800) {
140*91e1e26aSAlexander Pyhalov if (*outbytesleft < 3) {
141*91e1e26aSAlexander Pyhalov errno = E2BIG;
142*91e1e26aSAlexander Pyhalov return((size_t)-1);
143*91e1e26aSAlexander Pyhalov }
144*91e1e26aSAlexander Pyhalov *(*outbuf)++ = (char)((st->last >> 12) & 0xf) | 0xe0;
145*91e1e26aSAlexander Pyhalov *(*outbuf)++ = (char)((st->last >>6) & 0x3f) | 0x80;
146*91e1e26aSAlexander Pyhalov *(*outbuf)++ = (char)(st->last & 0x3f) | 0x80;
147*91e1e26aSAlexander Pyhalov (*outbytesleft) -= 3;
148*91e1e26aSAlexander Pyhalov }
149*91e1e26aSAlexander Pyhalov }
150*91e1e26aSAlexander Pyhalov st->last = 0;
151*91e1e26aSAlexander Pyhalov } else {
152*91e1e26aSAlexander Pyhalov if (uni >= 0x0041 && uni <= 0x01b0
153*91e1e26aSAlexander Pyhalov && ((tcvn_comp_bases_mask[(uni-0x0040) >> 5] >> (uni & 0x1f)) & 1)) {
154*91e1e26aSAlexander Pyhalov /*
155*91e1e26aSAlexander Pyhalov * uni is vowel, it's a possible match with combine character.
156*91e1e26aSAlexander Pyhalov * Buffer it.
157*91e1e26aSAlexander Pyhalov * */
158*91e1e26aSAlexander Pyhalov st->last = uni;
159*91e1e26aSAlexander Pyhalov (*inbuf)++;
160*91e1e26aSAlexander Pyhalov (*inbytesleft)--;
161*91e1e26aSAlexander Pyhalov continue;
162*91e1e26aSAlexander Pyhalov }
163*91e1e26aSAlexander Pyhalov }
164*91e1e26aSAlexander Pyhalov
165*91e1e26aSAlexander Pyhalov if (uni < 0x80) {
166*91e1e26aSAlexander Pyhalov *(*outbuf)++ = (char)uni;
167*91e1e26aSAlexander Pyhalov (*outbytesleft) -= 1;
168*91e1e26aSAlexander Pyhalov } else if (uni >= 0x0080 && uni <= 0x07ff) {
169*91e1e26aSAlexander Pyhalov if (*outbytesleft < 2) {
170*91e1e26aSAlexander Pyhalov errno = E2BIG;
171*91e1e26aSAlexander Pyhalov return((size_t)-1);
172*91e1e26aSAlexander Pyhalov }
173*91e1e26aSAlexander Pyhalov *(*outbuf)++ = (char)((uni >> 6) & 0x1f) | 0xc0;
174*91e1e26aSAlexander Pyhalov *(*outbuf)++ = (char)(uni & 0x3f) | 0x80;
175*91e1e26aSAlexander Pyhalov (*outbytesleft) -= 2;
176*91e1e26aSAlexander Pyhalov } else if (uni >= 0x0800 && uni <= 0xffff) {
177*91e1e26aSAlexander Pyhalov if (*outbytesleft < 3) {
178*91e1e26aSAlexander Pyhalov errno = E2BIG;
179*91e1e26aSAlexander Pyhalov return((size_t)-1);
180*91e1e26aSAlexander Pyhalov }
181*91e1e26aSAlexander Pyhalov *(*outbuf)++ = (char)((uni >> 12) & 0xf) | 0xe0;
182*91e1e26aSAlexander Pyhalov *(*outbuf)++ = (char)((uni >>6) & 0x3f) | 0x80;
183*91e1e26aSAlexander Pyhalov *(*outbuf)++ = (char)(uni & 0x3f) | 0x80;
184*91e1e26aSAlexander Pyhalov (*outbytesleft) -= 3;
185*91e1e26aSAlexander Pyhalov }
186*91e1e26aSAlexander Pyhalov
187*91e1e26aSAlexander Pyhalov (*inbuf)++;
188*91e1e26aSAlexander Pyhalov (*inbytesleft)--;
189*91e1e26aSAlexander Pyhalov }
190*91e1e26aSAlexander Pyhalov
191*91e1e26aSAlexander Pyhalov if ( *inbytesleft > 0 && *outbytesleft <= 0 ) {
192*91e1e26aSAlexander Pyhalov errno = E2BIG;
193*91e1e26aSAlexander Pyhalov st->last = 0;
194*91e1e26aSAlexander Pyhalov return ((size_t)-1);
195*91e1e26aSAlexander Pyhalov }
196*91e1e26aSAlexander Pyhalov
197*91e1e26aSAlexander Pyhalov if (st->last !=0 ) {
198*91e1e26aSAlexander Pyhalov if (st->last < 0x80) {
199*91e1e26aSAlexander Pyhalov *(*outbuf)++ = (char)st->last;
200*91e1e26aSAlexander Pyhalov (*outbytesleft) -= 1;
201*91e1e26aSAlexander Pyhalov } else if (st->last >= 0x0080 && st->last <= 0x07ff) {
202*91e1e26aSAlexander Pyhalov if (*outbytesleft < 2 ) {
203*91e1e26aSAlexander Pyhalov errno = E2BIG;
204*91e1e26aSAlexander Pyhalov return((size_t)-1);
205*91e1e26aSAlexander Pyhalov }
206*91e1e26aSAlexander Pyhalov *(*outbuf)++ = (char)((st->last >> 6) & 0x1f) | 0xc0;
207*91e1e26aSAlexander Pyhalov *(*outbuf)++ = (char)(st->last & 0x3f) | 0x80;
208*91e1e26aSAlexander Pyhalov (*outbytesleft) -= 2;
209*91e1e26aSAlexander Pyhalov } else if (st->last >= 0x0800) {
210*91e1e26aSAlexander Pyhalov if (*outbytesleft < 3) {
211*91e1e26aSAlexander Pyhalov errno = E2BIG;
212*91e1e26aSAlexander Pyhalov return((size_t)-1);
213*91e1e26aSAlexander Pyhalov }
214*91e1e26aSAlexander Pyhalov *(*outbuf)++ = (char)((st->last >> 12) & 0xf) | 0xe0;
215*91e1e26aSAlexander Pyhalov *(*outbuf)++ = (char)((st->last >>6) & 0x3f) | 0x80;
216*91e1e26aSAlexander Pyhalov *(*outbuf)++ = (char)(st->last & 0x3f) | 0x80;
217*91e1e26aSAlexander Pyhalov (*outbytesleft) -= 3;
218*91e1e26aSAlexander Pyhalov }
219*91e1e26aSAlexander Pyhalov st->last = 0;
220*91e1e26aSAlexander Pyhalov }
221*91e1e26aSAlexander Pyhalov
222*91e1e26aSAlexander Pyhalov return ((size_t)(*inbytesleft));
223*91e1e26aSAlexander Pyhalov
224*91e1e26aSAlexander Pyhalov }
225*91e1e26aSAlexander Pyhalov
226*91e1e26aSAlexander Pyhalov /* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
binsearch(unsigned long x,Combine_map v[],int n)227*91e1e26aSAlexander Pyhalov static int binsearch(unsigned long x, Combine_map v[], int n)
228*91e1e26aSAlexander Pyhalov {
229*91e1e26aSAlexander Pyhalov int low = 0;
230*91e1e26aSAlexander Pyhalov int mid = 0;
231*91e1e26aSAlexander Pyhalov int high = n - 1;
232*91e1e26aSAlexander Pyhalov
233*91e1e26aSAlexander Pyhalov low = 0;
234*91e1e26aSAlexander Pyhalov while (low <= high) {
235*91e1e26aSAlexander Pyhalov mid = (low + high) / 2;
236*91e1e26aSAlexander Pyhalov if (x < v[mid].base)
237*91e1e26aSAlexander Pyhalov high = mid - 1;
238*91e1e26aSAlexander Pyhalov else if (x > v[mid].base)
239*91e1e26aSAlexander Pyhalov low = mid + 1;
240*91e1e26aSAlexander Pyhalov else
241*91e1e26aSAlexander Pyhalov /* found match */
242*91e1e26aSAlexander Pyhalov return mid;
243*91e1e26aSAlexander Pyhalov }
244*91e1e26aSAlexander Pyhalov
245*91e1e26aSAlexander Pyhalov /* no match */
246*91e1e26aSAlexander Pyhalov return (-1);
247*91e1e26aSAlexander Pyhalov }
248