1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2008, by Sun Microsystems, Inc. 23 * All rights reserved. 24 */ 25 #include <stdio.h> 26 #include <errno.h> 27 #include <stdlib.h> 28 #include <sys/types.h> 29 #define __NEED_UNI_2_TCVN__ 30 #include <unicode_tcvn.h> /* Unicode to TCVN mapping table */ 31 #include "common_defs.h" 32 #define NON_ID_CHAR '?' /* non-identified character */ 33 34 typedef struct _icv_state { 35 int _errno; /* internal errno */ 36 } _iconv_st; 37 38 39 /* 40 * Open; called from iconv_open() 41 */ 42 void * 43 _icv_open() 44 { 45 _iconv_st *st; 46 47 if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) { 48 errno = ENOMEM; 49 return ((void *) -1); 50 } 51 52 st->_errno = 0; 53 return ((void *) st); 54 } 55 56 57 /* 58 * Close; called from iconv_close() 59 */ 60 void 61 _icv_close(_iconv_st *st) 62 { 63 if (!st) 64 errno = EBADF; 65 else 66 free(st); 67 } 68 69 70 /* 71 * Actual conversion; called from iconv() 72 */ 73 size_t 74 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft, 75 char **outbuf, size_t *outbytesleft) 76 { 77 int utf8_len = 1; 78 int no_id_char_num = 0; 79 unsigned char *op = (unsigned char*)*inbuf; 80 #ifdef DEBUG 81 fprintf(stderr, "========== iconv(): UCS-2 --> TCVN5712 ==========\n"); 82 #endif 83 if (st == NULL) { 84 errno = EBADF; 85 return ((size_t) -1); 86 } 87 88 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */ 89 st->_errno = 0; 90 return ((size_t) 0); 91 } 92 93 st->_errno = 0; /* Rreset internal errno */ 94 errno = 0; /* Rreset external errno */ 95 96 /* Convert UTF-8 encoding to TCVN5712 */ 97 while (*inbytesleft > 0 && *outbytesleft > 0) { 98 unsigned long uni = 0; 99 unsigned char ch = 0; 100 unsigned long temp1 = 0, 101 temp2 = 0, 102 temp3 = 0; 103 104 if(0x00 == (*op & 0x80)) { 105 /* 1 byte UTF-8 Charater.*/ 106 uni = (unsigned short)*op; 107 utf8_len = 1; 108 goto conving; 109 } 110 111 if (*inbytesleft < 2) 112 goto errexit; 113 if ( 0xc0 == (*op & 0xe0) && 114 0x80 == (*(op + 1) & 0xc0) ) { 115 /* 2 bytes UTF-8 Charater.*/ 116 temp1 = (unsigned short)(*op & 0x1f); 117 temp1 <<= 6; 118 temp1 |= (unsigned short)(*(op + 1) & 0x3f); 119 uni = temp1; 120 utf8_len = 2; 121 goto conving; 122 } 123 124 if (*inbytesleft < 3) 125 goto errexit; 126 if ( 0xe0 == (*op & 0xf0) && 127 0x80 == (*(op + 1) & 0xc0) && 128 0x80 == (*(op + 2) & 0xc0) ) { 129 /* 3bytes UTF-8 Charater.*/ 130 temp1 = (unsigned short)(*op &0x0f); 131 temp1 <<= 12; 132 temp2 = (unsigned short)(*(op+1) & 0x3F); 133 temp2 <<= 6; 134 temp1 = temp1 | temp2 | (unsigned short)(*(op+2) & 0x3F); 135 uni = temp1; 136 utf8_len = 3; 137 goto conving; 138 } 139 140 if (*inbytesleft < 4) 141 goto errexit; 142 if ( 0xf0 == (*op & 0xf8) && 143 0x80 == (*(op + 1) & 0xc0) && 144 0x80 == (*(op + 2) & 0xc0) ) { 145 /* 4bytes UTF-8 Charater.*/ 146 temp1 = *op &0x07; 147 temp1 <<= 18; 148 temp2 = (*(op+1) & 0x3F); 149 temp2 <<= 12; 150 temp3 = (*(op+1) & 0x3F); 151 temp3 <<= 6; 152 temp1 = temp1 | temp2 | temp3 |(unsigned long)(*(op+2) & 0x3F); 153 uni = temp1; 154 utf8_len = 4; 155 goto conving; 156 } 157 158 /* unrecognize byte. */ 159 st->_errno = errno = EILSEQ; 160 errno = EILSEQ; 161 return ((size_t)-1); 162 163 conving: 164 if (uni_2_tcvn(uni, &ch) == 1) { 165 **outbuf = ch; 166 } else { 167 **outbuf = NON_ID_CHAR; 168 no_id_char_num += 1; 169 } 170 (*outbuf) += 1; 171 (*outbytesleft) -= 1; 172 op += utf8_len; 173 (*inbytesleft) -= utf8_len; 174 175 } 176 177 return ((size_t)no_id_char_num); 178 179 errexit: 180 st->_errno = errno = EINVAL; 181 errno = EINVAL; 182 return ((size_t)-1); 183 } 184