xref: /titanic_50/usr/src/lib/iconv_modules/vi/common/UTF-8%tcvn.c (revision 880d797826457b77414b37d531cc3e1aa166ecbe)
1*880d7978SAlexander Pyhalov /*
2*880d7978SAlexander Pyhalov  * CDDL HEADER START
3*880d7978SAlexander Pyhalov  *
4*880d7978SAlexander Pyhalov  * The contents of this file are subject to the terms of the
5*880d7978SAlexander Pyhalov  * Common Development and Distribution License (the "License").
6*880d7978SAlexander Pyhalov  * You may not use this file except in compliance with the License.
7*880d7978SAlexander Pyhalov  *
8*880d7978SAlexander Pyhalov  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9*880d7978SAlexander Pyhalov  * or http://www.opensolaris.org/os/licensing.
10*880d7978SAlexander Pyhalov  * See the License for the specific language governing permissions
11*880d7978SAlexander Pyhalov  * and limitations under the License.
12*880d7978SAlexander Pyhalov  *
13*880d7978SAlexander Pyhalov  * When distributing Covered Code, include this CDDL HEADER in each
14*880d7978SAlexander Pyhalov  * file and include the License file at src/OPENSOLARIS.LICENSE.
15*880d7978SAlexander Pyhalov  * If applicable, add the following below this CDDL HEADER, with the
16*880d7978SAlexander Pyhalov  * fields enclosed by brackets "[]" replaced with your own identifying
17*880d7978SAlexander Pyhalov  * information: Portions Copyright [yyyy] [name of copyright owner]
18*880d7978SAlexander Pyhalov  *
19*880d7978SAlexander Pyhalov  * CDDL HEADER END
20*880d7978SAlexander Pyhalov  */
21*880d7978SAlexander Pyhalov /*
22*880d7978SAlexander Pyhalov  * Copyright (c) 2008, by Sun Microsystems, Inc.
23*880d7978SAlexander Pyhalov  * All rights reserved.
24*880d7978SAlexander Pyhalov  */
25*880d7978SAlexander Pyhalov #include <stdio.h>
26*880d7978SAlexander Pyhalov #include <errno.h>
27*880d7978SAlexander Pyhalov #include <stdlib.h>
28*880d7978SAlexander Pyhalov #include <sys/types.h>
29*880d7978SAlexander Pyhalov #define __NEED_UNI_2_TCVN__
30*880d7978SAlexander Pyhalov #include <unicode_tcvn.h>	/* Unicode to TCVN  mapping table */
31*880d7978SAlexander Pyhalov #include "common_defs.h"
32*880d7978SAlexander Pyhalov #define NON_ID_CHAR '?'     /* non-identified character */
33*880d7978SAlexander Pyhalov 
34*880d7978SAlexander Pyhalov typedef struct _icv_state {
35*880d7978SAlexander Pyhalov     int     _errno;    /* internal errno */
36*880d7978SAlexander Pyhalov } _iconv_st;
37*880d7978SAlexander Pyhalov 
38*880d7978SAlexander Pyhalov 
39*880d7978SAlexander Pyhalov /*
40*880d7978SAlexander Pyhalov  * Open; called from iconv_open()
41*880d7978SAlexander Pyhalov  */
42*880d7978SAlexander Pyhalov void *
_icv_open()43*880d7978SAlexander Pyhalov _icv_open()
44*880d7978SAlexander Pyhalov {
45*880d7978SAlexander Pyhalov     _iconv_st *st;
46*880d7978SAlexander Pyhalov 
47*880d7978SAlexander Pyhalov     if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
48*880d7978SAlexander Pyhalov         errno = ENOMEM;
49*880d7978SAlexander Pyhalov         return ((void *) -1);
50*880d7978SAlexander Pyhalov     }
51*880d7978SAlexander Pyhalov 
52*880d7978SAlexander Pyhalov     st->_errno = 0;
53*880d7978SAlexander Pyhalov     return ((void *) st);
54*880d7978SAlexander Pyhalov }
55*880d7978SAlexander Pyhalov 
56*880d7978SAlexander Pyhalov 
57*880d7978SAlexander Pyhalov /*
58*880d7978SAlexander Pyhalov  * Close; called from iconv_close()
59*880d7978SAlexander Pyhalov  */
60*880d7978SAlexander Pyhalov void
_icv_close(_iconv_st * st)61*880d7978SAlexander Pyhalov _icv_close(_iconv_st *st)
62*880d7978SAlexander Pyhalov {
63*880d7978SAlexander Pyhalov     if (!st)
64*880d7978SAlexander Pyhalov         errno = EBADF;
65*880d7978SAlexander Pyhalov     else
66*880d7978SAlexander Pyhalov         free(st);
67*880d7978SAlexander Pyhalov }
68*880d7978SAlexander Pyhalov 
69*880d7978SAlexander Pyhalov 
70*880d7978SAlexander Pyhalov /*
71*880d7978SAlexander Pyhalov  * Actual conversion; called from iconv()
72*880d7978SAlexander Pyhalov  */
73*880d7978SAlexander Pyhalov size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)74*880d7978SAlexander Pyhalov _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
75*880d7978SAlexander Pyhalov 				char **outbuf, size_t *outbytesleft)
76*880d7978SAlexander Pyhalov {
77*880d7978SAlexander Pyhalov     int             utf8_len = 1;
78*880d7978SAlexander Pyhalov     int             no_id_char_num = 0;
79*880d7978SAlexander Pyhalov     unsigned char   *op = (unsigned char*)*inbuf;
80*880d7978SAlexander Pyhalov #ifdef DEBUG
81*880d7978SAlexander Pyhalov     fprintf(stderr, "==========     iconv(): UCS-2 --> TCVN5712  ==========\n");
82*880d7978SAlexander Pyhalov #endif
83*880d7978SAlexander Pyhalov     if (st == NULL) {
84*880d7978SAlexander Pyhalov         errno = EBADF;
85*880d7978SAlexander Pyhalov         return ((size_t) -1);
86*880d7978SAlexander Pyhalov     }
87*880d7978SAlexander Pyhalov 
88*880d7978SAlexander Pyhalov     if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
89*880d7978SAlexander Pyhalov         st->_errno = 0;
90*880d7978SAlexander Pyhalov         return ((size_t) 0);
91*880d7978SAlexander Pyhalov     }
92*880d7978SAlexander Pyhalov 
93*880d7978SAlexander Pyhalov     st->_errno = 0; /* Rreset internal errno */
94*880d7978SAlexander Pyhalov     errno = 0;      /* Rreset external errno */
95*880d7978SAlexander Pyhalov 
96*880d7978SAlexander Pyhalov     /* Convert UTF-8 encoding to TCVN5712 */
97*880d7978SAlexander Pyhalov     while (*inbytesleft > 0 && *outbytesleft > 0) {
98*880d7978SAlexander Pyhalov         unsigned long uni = 0;
99*880d7978SAlexander Pyhalov         unsigned char ch = 0;
100*880d7978SAlexander Pyhalov         unsigned long temp1 = 0,
101*880d7978SAlexander Pyhalov                       temp2 = 0,
102*880d7978SAlexander Pyhalov                       temp3 = 0;
103*880d7978SAlexander Pyhalov 
104*880d7978SAlexander Pyhalov         if(0x00 == (*op & 0x80)) {
105*880d7978SAlexander Pyhalov             /* 1 byte UTF-8 Charater.*/
106*880d7978SAlexander Pyhalov              uni = (unsigned short)*op;
107*880d7978SAlexander Pyhalov              utf8_len = 1;
108*880d7978SAlexander Pyhalov              goto conving;
109*880d7978SAlexander Pyhalov         }
110*880d7978SAlexander Pyhalov 
111*880d7978SAlexander Pyhalov         if (*inbytesleft < 2)
112*880d7978SAlexander Pyhalov             goto errexit;
113*880d7978SAlexander Pyhalov         if ( 0xc0 == (*op & 0xe0) &&
114*880d7978SAlexander Pyhalov                 0x80 == (*(op + 1) & 0xc0) ) {
115*880d7978SAlexander Pyhalov             /* 2 bytes UTF-8 Charater.*/
116*880d7978SAlexander Pyhalov             temp1 = (unsigned short)(*op & 0x1f);
117*880d7978SAlexander Pyhalov             temp1 <<= 6;
118*880d7978SAlexander Pyhalov             temp1 |= (unsigned short)(*(op + 1) & 0x3f);
119*880d7978SAlexander Pyhalov             uni = temp1;
120*880d7978SAlexander Pyhalov             utf8_len = 2;
121*880d7978SAlexander Pyhalov             goto conving;
122*880d7978SAlexander Pyhalov         }
123*880d7978SAlexander Pyhalov 
124*880d7978SAlexander Pyhalov         if (*inbytesleft < 3)
125*880d7978SAlexander Pyhalov            goto errexit;
126*880d7978SAlexander Pyhalov         if ( 0xe0 == (*op & 0xf0) &&
127*880d7978SAlexander Pyhalov                 0x80 == (*(op + 1) & 0xc0) &&
128*880d7978SAlexander Pyhalov                 0x80 == (*(op + 2) & 0xc0) ) {
129*880d7978SAlexander Pyhalov             /* 3bytes UTF-8 Charater.*/
130*880d7978SAlexander Pyhalov             temp1 = (unsigned short)(*op &0x0f);
131*880d7978SAlexander Pyhalov             temp1 <<= 12;
132*880d7978SAlexander Pyhalov             temp2 = (unsigned short)(*(op+1) & 0x3F);
133*880d7978SAlexander Pyhalov             temp2 <<= 6;
134*880d7978SAlexander Pyhalov             temp1 = temp1 | temp2 | (unsigned short)(*(op+2) & 0x3F);
135*880d7978SAlexander Pyhalov             uni = temp1;
136*880d7978SAlexander Pyhalov             utf8_len = 3;
137*880d7978SAlexander Pyhalov             goto conving;
138*880d7978SAlexander Pyhalov         }
139*880d7978SAlexander Pyhalov 
140*880d7978SAlexander Pyhalov         if (*inbytesleft < 4)
141*880d7978SAlexander Pyhalov             goto errexit;
142*880d7978SAlexander Pyhalov         if ( 0xf0 == (*op & 0xf8) &&
143*880d7978SAlexander Pyhalov                 0x80 == (*(op + 1) & 0xc0) &&
144*880d7978SAlexander Pyhalov                 0x80 == (*(op + 2) & 0xc0) ) {
145*880d7978SAlexander Pyhalov             /* 4bytes UTF-8 Charater.*/
146*880d7978SAlexander Pyhalov             temp1 = *op &0x07;
147*880d7978SAlexander Pyhalov             temp1 <<= 18;
148*880d7978SAlexander Pyhalov             temp2 = (*(op+1) & 0x3F);
149*880d7978SAlexander Pyhalov             temp2 <<= 12;
150*880d7978SAlexander Pyhalov             temp3 = (*(op+1) & 0x3F);
151*880d7978SAlexander Pyhalov             temp3 <<= 6;
152*880d7978SAlexander Pyhalov             temp1 = temp1 | temp2 | temp3 |(unsigned long)(*(op+2) & 0x3F);
153*880d7978SAlexander Pyhalov             uni = temp1;
154*880d7978SAlexander Pyhalov             utf8_len = 4;
155*880d7978SAlexander Pyhalov             goto conving;
156*880d7978SAlexander Pyhalov         }
157*880d7978SAlexander Pyhalov 
158*880d7978SAlexander Pyhalov         /* unrecognize byte. */
159*880d7978SAlexander Pyhalov         st->_errno = errno = EILSEQ;
160*880d7978SAlexander Pyhalov         errno = EILSEQ;
161*880d7978SAlexander Pyhalov         return ((size_t)-1);
162*880d7978SAlexander Pyhalov 
163*880d7978SAlexander Pyhalov conving:
164*880d7978SAlexander Pyhalov         if (uni_2_tcvn(uni, &ch) == 1) {
165*880d7978SAlexander Pyhalov             **outbuf = ch;
166*880d7978SAlexander Pyhalov         } else {
167*880d7978SAlexander Pyhalov             **outbuf = NON_ID_CHAR;
168*880d7978SAlexander Pyhalov             no_id_char_num += 1;
169*880d7978SAlexander Pyhalov         }
170*880d7978SAlexander Pyhalov         (*outbuf) += 1;
171*880d7978SAlexander Pyhalov         (*outbytesleft) -= 1;
172*880d7978SAlexander Pyhalov         op += utf8_len;
173*880d7978SAlexander Pyhalov         (*inbytesleft) -= utf8_len;
174*880d7978SAlexander Pyhalov 
175*880d7978SAlexander Pyhalov     }
176*880d7978SAlexander Pyhalov 
177*880d7978SAlexander Pyhalov     return ((size_t)no_id_char_num);
178*880d7978SAlexander Pyhalov 
179*880d7978SAlexander Pyhalov errexit:
180*880d7978SAlexander Pyhalov     st->_errno = errno = EINVAL;
181*880d7978SAlexander Pyhalov     errno = EINVAL;
182*880d7978SAlexander Pyhalov     return ((size_t)-1);
183*880d7978SAlexander Pyhalov }
184