1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1996 by Sun Microsystems, Inc. 23 */ 24 25 26 #include <stdlib.h> 27 #include <errno.h> 28 #include <widec.h> 29 #include "common_def.h" 30 #include "common_han.h" 31 #include "utf_euc_api.h" 32 #include "common_defs.h" 33 34 #define ESC 0x1B 35 #define SO 0x0E 36 #define SI 0x0F 37 38 typedef struct __conv_desc { 39 enum { NOT_DEFINED_YET, KSC5601 } designator; 40 enum { ASCII, HANGUL } state; 41 } _conv_desc; 42 43 /**** _ I C V _ O P E N ****/ 44 45 void* _icv_open() 46 { 47 _conv_desc* cd = (_conv_desc*)malloc(sizeof(_conv_desc)); 48 49 if (cd == (_conv_desc*)NULL) 50 { 51 errno = ENOMEM; 52 return((void*)-1); 53 } 54 55 cd->designator = NOT_DEFINED_YET; 56 cd->state = ASCII; 57 58 return((void*)cd); 59 } /* end of int _icv_open(). */ 60 61 62 /**** _ I C V _ C L O S E ****/ 63 64 void _icv_close(_conv_desc* cd) 65 { 66 if (!cd) 67 errno = EBADF; 68 else 69 free((void*)cd); 70 } /* end of void _icv_close(_conv_desc*). */ 71 72 73 /**** _ I C V _ I C O N V ****/ 74 75 size_t _icv_iconv(_conv_desc* cd, char** inbuf, size_t* inbufleft, 76 char** outbuf, size_t* outbufleft) 77 { 78 size_t ret_val = 0; 79 unsigned char* ib; 80 unsigned char* ob; 81 unsigned char* ibtail; 82 unsigned char* obtail; 83 84 if (!cd) 85 { 86 errno = EBADF; 87 return((size_t)-1); 88 } 89 90 if (!inbuf || !(*inbuf)) 91 { 92 if (cd->state == HANGUL) 93 { 94 if (outbufleft && *outbufleft >= 1 && outbuf && *outbuf) 95 { 96 **outbuf = SI; 97 (*outbuf)++; 98 (*outbufleft)--; 99 } 100 else 101 { 102 errno = E2BIG; 103 return((size_t)-1); 104 } 105 } 106 107 cd->designator = NOT_DEFINED_YET; 108 cd->state = ASCII; 109 return((size_t)0); 110 } 111 112 ib = (unsigned char*)*inbuf; 113 ob = (unsigned char*)*outbuf; 114 ibtail = ib + *inbufleft; 115 obtail = ob + *outbufleft; 116 117 while (ib < ibtail) 118 { 119 if (!(*ib & 0x80)) /* 7 bits */ 120 { 121 if (ob >= obtail) 122 { 123 errno = E2BIG; 124 ret_val = (size_t)-1; 125 break; 126 } 127 128 /* Hangul string ends */ 129 if (cd->state == HANGUL) { 130 *ob++ = SI; 131 cd->state = ASCII; 132 } 133 *ob++ = *ib++; 134 } 135 else if ((*ib & 0xF0) == 0xE0) /* 16 bits */ 136 { 137 hcode_type utf8_code, euc_code; 138 139 if ((ibtail - ib) < 3) 140 { 141 errno = EINVAL; 142 ret_val = (size_t)-1; 143 break; 144 } 145 146 if (!is_valid_utf8_string(ib, 3)) 147 { 148 errno = EILSEQ; 149 ret_val = (size_t)-1; 150 break; 151 } 152 153 utf8_code.byte.byte1 = 0; 154 utf8_code.byte.byte2 = *ib; 155 utf8_code.byte.byte3 = *(ib + 1); 156 utf8_code.byte.byte4 = *(ib + 2); 157 158 euc_code = _utf8_to_wansung(utf8_code); 159 160 if (euc_code.code != 0) { 161 /* If find something -> EUC code */ 162 163 /* if the first time of Hangul -> put Mark */ 164 if (cd->designator == NOT_DEFINED_YET) { 165 *ob++ = ESC; 166 *ob++ = '$'; 167 *ob++ = ')'; 168 *ob++ = 'C'; 169 cd->designator = KSC5601; 170 } 171 172 /* if the first Hangul in a string */ 173 if (cd->state == ASCII) { 174 *ob++ = SO; 175 cd->state = HANGUL; 176 } 177 178 /* MSB clear for 7-bits encoding */ 179 euc_code.wansung.msb1 = 0; 180 euc_code.wansung.msb2 = 0; 181 182 *ob++ = euc_code.byte.byte3; 183 *ob++ = euc_code.byte.byte4; 184 } 185 else 186 { 187 /* Let's assume the code is non-identical. */ 188 if ((obtail - ob) < 2) 189 { 190 errno = E2BIG; 191 ret_val = (size_t)-1; 192 break; 193 } 194 195 /* non identical character == ASCII */ 196 if (cd->state == HANGUL) { 197 *ob++ = SI; 198 cd->state = ASCII; 199 } 200 *ob++ = NON_IDENTICAL; 201 *ob++ = NON_IDENTICAL; 202 ret_val += 2; 203 } 204 ib += 3; 205 } 206 else /* 11, 21, 26 & 31 bits codes won't be able to convert. */ 207 { 208 short int offset; 209 210 if ((*ib & 0xE0) == 0xC0) /* 11 */ 211 offset = 2; 212 else if ((*ib & 0xF0) == 0xE0) /* 16 */ 213 offset = 3; 214 else if ((*ib & 0xF8) == 0xF0) /* 21 */ 215 offset = 4; 216 else if ((*ib & 0xFC) == 0xF8) /* 26 */ 217 offset = 5; 218 else if ((*ib & 0xFE) == 0xFC) /* 31 */ 219 offset = 6; 220 else /* Illegal sequence. */ 221 offset = 1; 222 223 if ((ibtail - ib) < offset) 224 { 225 errno = EINVAL; 226 ret_val = (size_t)-1; 227 break; 228 } 229 230 if (!is_valid_utf8_string(ib, offset)) 231 { 232 errno = EILSEQ; 233 ret_val = (size_t)-1; 234 break; 235 } 236 237 ib += offset; 238 239 /* Let's assume the code is non-identical. */ 240 offset = (offset > 2) ? 2 : 1; 241 if ((obtail - ob) < offset) 242 { 243 errno = E2BIG; 244 ret_val = (size_t)-1; 245 break; 246 } 247 248 /* non identical character == ASCII */ 249 if (cd->state == HANGUL) { 250 *ob++ = SI; 251 cd->state = ASCII; 252 } 253 254 *ob++ = NON_IDENTICAL; 255 if (offset > 1) 256 *ob++ = NON_IDENTICAL; 257 ret_val += offset; 258 } 259 } 260 261 *inbuf = (char*)ib; 262 *inbufleft = ibtail - ib; 263 *outbuf = (char*)ob; 264 *outbufleft = obtail - ob; 265 266 return(ret_val); 267 } /* end of size_t _icv_iconv(int*, char**, size_t*, char**, size_t*).*/ 268