xref: /titanic_51/usr/src/lib/iconv_modules/vi/common/UTF-8%tcvn.c (revision 91e1e26ac6a73ce959289cf7d3d96c4baedbe0b8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008, by Sun Microsystems, Inc.
23  * All rights reserved.
24  */
25 #include <stdio.h>
26 #include <errno.h>
27 #include <stdlib.h>
28 #include <sys/types.h>
29 #define __NEED_UNI_2_TCVN__
30 #include <unicode_tcvn.h>	/* Unicode to TCVN  mapping table */
31 #include "common_defs.h"
32 #define NON_ID_CHAR '?'     /* non-identified character */
33 
34 typedef struct _icv_state {
35     int     _errno;    /* internal errno */
36 } _iconv_st;
37 
38 
39 /*
40  * Open; called from iconv_open()
41  */
42 void *
43 _icv_open()
44 {
45     _iconv_st *st;
46 
47     if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
48         errno = ENOMEM;
49         return ((void *) -1);
50     }
51 
52     st->_errno = 0;
53     return ((void *) st);
54 }
55 
56 
57 /*
58  * Close; called from iconv_close()
59  */
60 void
61 _icv_close(_iconv_st *st)
62 {
63     if (!st)
64         errno = EBADF;
65     else
66         free(st);
67 }
68 
69 
70 /*
71  * Actual conversion; called from iconv()
72  */
73 size_t
74 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
75 				char **outbuf, size_t *outbytesleft)
76 {
77     int             utf8_len = 1;
78     int             no_id_char_num = 0;
79     unsigned char   *op = (unsigned char*)*inbuf;
80 #ifdef DEBUG
81     fprintf(stderr, "==========     iconv(): UCS-2 --> TCVN5712  ==========\n");
82 #endif
83     if (st == NULL) {
84         errno = EBADF;
85         return ((size_t) -1);
86     }
87 
88     if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
89         st->_errno = 0;
90         return ((size_t) 0);
91     }
92 
93     st->_errno = 0; /* Rreset internal errno */
94     errno = 0;      /* Rreset external errno */
95 
96     /* Convert UTF-8 encoding to TCVN5712 */
97     while (*inbytesleft > 0 && *outbytesleft > 0) {
98         unsigned long uni = 0;
99         unsigned char ch = 0;
100         unsigned long temp1 = 0,
101                       temp2 = 0,
102                       temp3 = 0;
103 
104         if(0x00 == (*op & 0x80)) {
105             /* 1 byte UTF-8 Charater.*/
106              uni = (unsigned short)*op;
107              utf8_len = 1;
108              goto conving;
109         }
110 
111         if (*inbytesleft < 2)
112             goto errexit;
113         if ( 0xc0 == (*op & 0xe0) &&
114                 0x80 == (*(op + 1) & 0xc0) ) {
115             /* 2 bytes UTF-8 Charater.*/
116             temp1 = (unsigned short)(*op & 0x1f);
117             temp1 <<= 6;
118             temp1 |= (unsigned short)(*(op + 1) & 0x3f);
119             uni = temp1;
120             utf8_len = 2;
121             goto conving;
122         }
123 
124         if (*inbytesleft < 3)
125            goto errexit;
126         if ( 0xe0 == (*op & 0xf0) &&
127                 0x80 == (*(op + 1) & 0xc0) &&
128                 0x80 == (*(op + 2) & 0xc0) ) {
129             /* 3bytes UTF-8 Charater.*/
130             temp1 = (unsigned short)(*op &0x0f);
131             temp1 <<= 12;
132             temp2 = (unsigned short)(*(op+1) & 0x3F);
133             temp2 <<= 6;
134             temp1 = temp1 | temp2 | (unsigned short)(*(op+2) & 0x3F);
135             uni = temp1;
136             utf8_len = 3;
137             goto conving;
138         }
139 
140         if (*inbytesleft < 4)
141             goto errexit;
142         if ( 0xf0 == (*op & 0xf8) &&
143                 0x80 == (*(op + 1) & 0xc0) &&
144                 0x80 == (*(op + 2) & 0xc0) ) {
145             /* 4bytes UTF-8 Charater.*/
146             temp1 = *op &0x07;
147             temp1 <<= 18;
148             temp2 = (*(op+1) & 0x3F);
149             temp2 <<= 12;
150             temp3 = (*(op+1) & 0x3F);
151             temp3 <<= 6;
152             temp1 = temp1 | temp2 | temp3 |(unsigned long)(*(op+2) & 0x3F);
153             uni = temp1;
154             utf8_len = 4;
155             goto conving;
156         }
157 
158         /* unrecognize byte. */
159         st->_errno = errno = EILSEQ;
160         errno = EILSEQ;
161         return ((size_t)-1);
162 
163 conving:
164         if (uni_2_tcvn(uni, &ch) == 1) {
165             **outbuf = ch;
166         } else {
167             **outbuf = NON_ID_CHAR;
168             no_id_char_num += 1;
169         }
170         (*outbuf) += 1;
171         (*outbytesleft) -= 1;
172         op += utf8_len;
173         (*inbytesleft) -= utf8_len;
174 
175     }
176 
177     return ((size_t)no_id_char_num);
178 
179 errexit:
180     st->_errno = errno = EINVAL;
181     errno = EINVAL;
182     return ((size_t)-1);
183 }
184