1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1994 by Sun Microsystems, Inc. 23 */ 24 25 26 #include <stdlib.h> 27 #include <errno.h> 28 #include "ktable.h" 29 #include "utf_nbyte.h" 30 31 32 /**** _ I C V _ O P E N ****/ 33 34 void* _icv_open() 35 { 36 _conv_desc* cd = (_conv_desc*)malloc(sizeof(_conv_desc)); 37 38 if (cd == (_conv_desc*)NULL) 39 { 40 errno = ENOMEM; 41 return((void*)-1); 42 } 43 44 RESET_CONV_DESC(); 45 cd->state = ASCII; 46 47 return((void*)cd); 48 } /* end of int _icv_open(). */ 49 50 51 /**** _ I C V _ C L O S E ****/ 52 53 void _icv_close(_conv_desc* cd) 54 { 55 if (!cd) 56 errno = EBADF; 57 else 58 free((void*)cd); 59 } /* end of void _icv_close(_conv_desc*). */ 60 61 62 /**** _ I C V _ I C O N V ****/ 63 64 size_t _icv_iconv(_conv_desc* cd, char** inbuf, size_t* inbufleft, 65 char** outbuf, size_t* outbufleft) 66 { 67 void echo_vowel(char*, int*), echo_consonant(char*, int*); 68 size_t ret_val = 0; 69 unsigned char* ib; 70 unsigned char* ob; 71 unsigned char* ibtail; 72 unsigned char* obtail; 73 74 if (!cd) 75 { 76 errno = EBADF; 77 return((size_t)-1); 78 } 79 80 if (!inbuf || !(*inbuf)) 81 { 82 RESET_CONV_DESC(); 83 cd->state = ASCII; 84 return((size_t)0); 85 } 86 87 ib = (unsigned char*)*inbuf; 88 ob = (unsigned char*)*outbuf; 89 ibtail = ib + *inbufleft; 90 obtail = ob + *outbufleft; 91 92 while (ib < ibtail) 93 { 94 if (!(*ib & 0x80)) /* 7 bits */ 95 { 96 PROCESS_PRIOR_CVC(); 97 98 if ((obtail - ob) < (cd->state == WANSUNG ? 2 : 1)) 99 { 100 errno = E2BIG; 101 ret_val = (size_t)-1; 102 break; 103 } 104 if (cd->state == WANSUNG) 105 { 106 *ob++ = SI; 107 cd->state = ASCII; 108 } 109 *ob++ = *ib++; 110 } 111 else if ((*ib & 0xF0) == 0xE0) /* 16 bits */ 112 { 113 unsigned long utf; 114 115 if ((ibtail - ib) < 3) 116 { 117 errno = EINVAL; 118 ret_val = (size_t)-1; 119 break; 120 } 121 122 utf = ((unsigned long)(*ib) << 16) | 123 ((unsigned long)(*(ib + 1)) << 8) | 124 (unsigned long)(*(ib + 2)); 125 if (utf == 0xE1859F || 126 (utf >= 0xE18480 && utf <= 0xE18492)) /* Ci */ 127 { 128 PROCESS_PRIOR_CVC(); 129 130 cd->ci = (utf == 0xE1859F) ? CVC_FILL 131 : utf - 0xE18480; 132 cd->prev_state = CI; 133 } 134 else if (utf == 0xE185A0 || 135 (utf >= 0xE185A1 && utf <= 0xE185B5)) /* V */ 136 { 137 if (cd->prev_state != E && cd->prev_state != CI) 138 PROCESS_PRIOR_CVC(); 139 140 cd->v = (utf == 0xE185A0) ? CVC_FILL 141 : utf - 0xE185A1; 142 cd->prev_state = V; 143 } 144 else if ((utf >= 0xE186A8 && utf <= 0xE186BF) || 145 (utf >= 0xE18780 && utf <= 0xE18782)) /* Cf */ 146 { 147 if (cd->prev_state != E && cd->prev_state != V) 148 PROCESS_PRIOR_CVC(); 149 150 cd->cf = utf - ((utf >= 0xE18780) ? 0xE18766 151 : 0xE186A6); 152 cd->prev_state = CF; 153 154 PROCESS_PRIOR_CVC(); 155 } 156 else 157 { 158 PROCESS_PRIOR_CVC(); 159 160 /* Let's assume the code is non-identical. */ 161 if ((obtail - ob) < 2) 162 { 163 errno = E2BIG; 164 ret_val = (size_t)-1; 165 break; 166 } 167 *ob++ = NON_IDENTICAL; 168 *ob++ = NON_IDENTICAL; 169 ret_val += 2; 170 } 171 ib += 3; 172 } 173 else /* 11, 21, 26 & 31 bits codes won't be able to convert. */ 174 { 175 short int offset; 176 177 PROCESS_PRIOR_CVC(); 178 179 if ((*ib & 0xE0) == 0xC0) /* 11 */ 180 offset = 2; 181 else if ((*ib & 0xF0) == 0xE0) /* 16 */ 182 offset = 3; 183 else if ((*ib & 0xF8) == 0xF0) /* 21 */ 184 offset = 4; 185 else if ((*ib & 0xFC) == 0xF8) /* 26 */ 186 offset = 5; 187 else if ((*ib & 0xFE) == 0xFC) /* 31 */ 188 offset = 6; 189 else /* Illegal sequence. */ 190 offset = 1; 191 192 if ((ibtail - ib) < offset) 193 { 194 errno = EINVAL; 195 ret_val = (size_t)-1; 196 break; 197 } 198 ib += offset; 199 200 /* Let's assume the code is non-identical. */ 201 offset = (offset > 2) ? 2 : 1; 202 if ((obtail - ob) < offset) 203 { 204 errno = E2BIG; 205 ret_val = (size_t)-1; 206 break; 207 } 208 *ob++ = NON_IDENTICAL; 209 if (offset > 1) 210 *ob++ = NON_IDENTICAL; 211 ret_val += offset; 212 } 213 } 214 215 *inbuf = (char*)ib; 216 *inbufleft = ibtail - ib; 217 *outbuf = (char*)ob; 218 *outbufleft = obtail - ob; 219 220 return(ret_val); 221 } /* end of size_t _icv_iconv(_conv_desc*, char**, size_t*, char**, size_t*).*/ 222 223 224 /**** E C H O _ V O W E L ****/ 225 226 void echo_vowel(char* c, int* i) 227 { 228 if (c[*i] == 'm') /* _|_|- */ 229 { 230 c[(*i)++] = 'l'; /* _|_ */ 231 c[(*i)++] = 'b'; /* |- */ 232 } 233 else if (c[*i] == 'n') /* _|_H */ 234 { 235 c[(*i)++] = 'l'; /* _|_ */ 236 c[(*i)++] = 'c'; /* H */ 237 } 238 else if (c[*i] == 'o') /* _|_| */ 239 { 240 c[(*i)++] = 'l'; /* _|_ */ 241 c[(*i)++] = '|'; /* | */ 242 } 243 else if (c[*i] == 't') /* T-| */ 244 { 245 c[(*i)++] = 's'; /* T */ 246 c[(*i)++] = 'f'; /* -| */ 247 } 248 else if (c[*i] == 'u') /* T-|| */ 249 { 250 c[(*i)++] = 's'; /* T */ 251 c[(*i)++] = 'g'; /* -|| */ 252 } 253 else if (c[*i] == 'v') /* T| */ 254 { 255 c[(*i)++] = 's'; /* T */ 256 c[(*i)++] = '|'; /* | */ 257 } 258 else if (c[*i] == '{') /* _| */ 259 { 260 c[(*i)++] = 'z'; /* __ */ 261 c[(*i)++] = '|'; /* | */ 262 } 263 else 264 (*i)++; 265 } /* end of void echo_vowel(char*, int*). */ 266 267 268 /**** E C H O _ C O N S O N A N T ****/ 269 270 void echo_consonant(char* c, int* i) 271 { 272 if (c[*i] == 'C') /* gs */ 273 { 274 c[(*i)++] = 'A'; /* g */ 275 c[(*i)++] = 'U'; /* s */ 276 } 277 else if (c[*i] == 'E') /* nj */ 278 { 279 c[(*i)++] = 'D'; /* n */ 280 c[(*i)++] = 'X'; /* j */ 281 } 282 else if (c[*i] == 'F') /* nh */ 283 { 284 c[(*i)++] = 'D'; /* n */ 285 c[(*i)++] = '^'; /* h */ 286 } 287 else if (c[*i] == 'J') /* rg */ 288 { 289 c[(*i)++] = 'I'; /* r */ 290 c[(*i)++] = 'A'; /* g */ 291 } 292 else if (c[*i] == 'K') /* rm */ 293 { 294 c[(*i)++] = 'I'; /* r */ 295 c[(*i)++] = 'Q'; /* m */ 296 } 297 else if (c[*i] == 'L') /* rb */ 298 { 299 c[(*i)++] = 'I'; /* r */ 300 c[(*i)++] = 'R'; /* b */ 301 } 302 else if (c[*i] == 'M') /* rs */ 303 { 304 c[(*i)++] = 'I'; /* r */ 305 c[(*i)++] = 'U'; /* s */ 306 } 307 else if (c[*i] == 'N') /* rt */ 308 { 309 c[(*i)++] = 'I'; /* r */ 310 c[(*i)++] = '\\'; /* t */ 311 } 312 else if (c[*i] == 'O') /* rp */ 313 { 314 c[(*i)++] = 'I'; /* r */ 315 c[(*i)++] = ']'; /* p */ 316 } 317 else if (c[*i] == 'P') /* rh */ 318 { 319 c[(*i)++] = 'I'; /* r */ 320 c[(*i)++] = '^'; /* h */ 321 } 322 else if (c[*i] == 'T') /* bs */ 323 { 324 c[(*i)++] = 'R'; /* b */ 325 c[(*i)++] = 'U'; /* s */ 326 } 327 else 328 (*i)++; 329 } /* end of void echo_consonant(char*, int*). */ 330