1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright(c) 2001 Sun Microsystems, Inc. 23 * All rights reserved. 24 */ 25 26 #include <stdio.h> 27 #include <errno.h> 28 #include <stdlib.h> 29 #include <strings.h> 30 #include "iscii.h" 31 32 #define MSB 0x80 /* most significant bit */ 33 #define ONEBYTE 0xff /* right most byte */ 34 35 #define REPLACE_CHAR1 0xEF /* invalid conversion character */ 36 #define REPLACE_CHAR2 0xBF 37 #define REPLACE_CHAR3 0xBD 38 39 #define UTF8_SET1B(b,v) \ 40 (b[0]=(v&0x7f)) 41 42 #define UTF8_SET2B(b,v) \ 43 (b[0]=(0xc0|((v>>6)&0x1f))); \ 44 (b[1]=(0x80|((v&0x3f)))) 45 46 #define UTF8_SET3B(b,v) \ 47 (b[0]=(0xe0|((v>>12)&0xf))); \ 48 (b[1]=(0x80|((v>>6)&0x3f))); \ 49 (b[2]=(0x80|((v&0x3f)))) 50 51 typedef struct _icv_state { 52 char keepc[3]; /* keepc[0] is attr, keepc[1] and keepc[2] are lookup-ed */ 53 short pState; /* Previous State */ 54 int _errno; 55 } _iconv_st; 56 57 enum _CSTATE { S_BASIC, S_ATR, S_EXT, S_NONE }; 58 59 #define have_nukta(isc_type) ( nukta_type[isc_type] != NULL ) 60 #define have_EXT(isc_type) ( EXT_type[isc_type] != NULL ) 61 #define FIRST_CHAR 0xA0 62 63 static int copy_to_outbuf(ucs_t uniid, char *buf, size_t buflen); 64 65 static ucs_t 66 get_nukta(uchar iscii, int type) 67 { 68 int indx = iscii - FIRST_CHAR; 69 int *iscii_nukta = nukta_type[type]; 70 71 return ((indx >= 0) ? iscii_nukta[indx] : 0 ); 72 } 73 74 static ucs_t 75 get_EXT(uchar iscii, int type) 76 { 77 int indx = iscii - FIRST_CHAR; 78 int *iscii_EXT = EXT_type[type]; 79 80 return ((indx >= 0) ? iscii_EXT[indx] : 0 ); 81 } 82 83 static ucs_t 84 traverse_table(Entry *entry, int num, uchar iscii) 85 { 86 int i=0; 87 ucs_t retucs=0; 88 89 for ( ; i < num; ++i ) { 90 Entry en = entry[i]; 91 92 if ( iscii < en.iscii ) break; 93 if ( iscii >= en.iscii && iscii < en.iscii + en.count ) { 94 retucs = en.ucs + ( iscii - en.iscii ); 95 break; 96 } 97 } 98 99 return retucs; 100 } 101 102 /* 103 * the copy_to_outbuf has to be called before the st->keepc needs to changed. 104 * if E2BIG error, keep st->keepc. Will flush it at the beginning of next 105 * _icv_iconv() invocation 106 */ 107 int 108 iscii_to_utf8(_iconv_st *st, char *buf, size_t buflen) 109 { 110 #define DEV_ATR 0x42 111 ucs_t uniid; 112 int nBytes=0; 113 ISCII isc_type = isc_TYPE[st->keepc[0] - DEV_ATR]; 114 Entries en = iscii_table[isc_type]; 115 /* unsigned int keepc0 = (unsigned int) (st->keepc[0] & ONEBYTE); */ 116 unsigned int keepc1 = (unsigned int) (st->keepc[1] & ONEBYTE); 117 unsigned int keepc2 = (unsigned int) (st->keepc[2] & ONEBYTE); 118 119 if (keepc1 == 0xFF) { /* FFFD */ 120 if ( buflen < 3 ) { 121 errno = E2BIG; 122 return 0; 123 } 124 125 *buf = (char)REPLACE_CHAR1; 126 *(buf+1) = (char)REPLACE_CHAR2; 127 *(buf+2) = (char)REPLACE_CHAR3; 128 return (3); 129 } 130 131 if (keepc2 == 0) { /* Flush Single Character */ 132 133 if (keepc1 & MSB) { /* ISCII - Non-Ascii Codepoints */ 134 uniid = traverse_table(en.entry, en.items, keepc1); 135 } else /* ASCII */ 136 uniid = keepc1; 137 138 if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big; 139 st->keepc[1] = 0; 140 141 } else { 142 /* keepc[1] and keepc[2] != 0 */ 143 if (keepc1 & MSB) { 144 145 switch (keepc1) 146 { 147 case ISC_ext: 148 149 if ( have_EXT(isc_type) && is_valid_ext_code(keepc2) ) 150 { /* EXT only supported in Devanagari script */ 151 152 uniid = get_EXT(keepc2, isc_type); 153 if ((nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big; 154 } 155 else 156 errno = EILSEQ; 157 158 st->keepc[1] = st->keepc[2] = 0; 159 break; 160 case ISC_halant: 161 /* test whether there has enough space to hold the converted bytes */ 162 if ((keepc2 == ISC_halant || keepc2 == ISC_nukta) && buflen < 6 ) 163 goto E2big; 164 165 uniid = traverse_table(en.entry, en.items, keepc1); 166 if ((nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big; 167 st->keepc[1] = st->keepc[2]; 168 169 if ( keepc2 == ISC_halant || keepc2 == ISC_nukta ) 170 { 171 int nbytes_2 = 0; 172 if (keepc2 == ISC_halant) uniid = UNI_ZWNJ; /* explicit Halant */ 173 if (keepc2 == ISC_nukta) uniid = UNI_ZWJ; /* soft Halant */ 174 175 if ((nbytes_2 = copy_to_outbuf(uniid, buf+nBytes, buflen)) == 0) goto E2big; 176 st->keepc[1] = st->keepc[2] = 0; 177 178 nBytes += nbytes_2; 179 } 180 181 break; 182 case ISC_danda: 183 if ( isc_type == DEV && keepc2 == ISC_danda ) 184 { /* only in Devanagari script, it works */ 185 uniid = UNI_DOUBLE_DANDA; 186 if ((nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big; 187 st->keepc[1] = st->keepc[2] = 0; 188 189 break; 190 } 191 192 /* fall into default case, convert the DANDA if it isn't DOUBLE_DANDA */ 193 /* FALLTHRU */ 194 default: 195 196 uniid = traverse_table(en.entry, en.items, keepc1); 197 198 if ( have_nukta(isc_type) && keepc2 == ISC_nukta) { 199 /* then try to test whether it is Nukta Cases */ 200 int ucs; 201 202 if (( ucs = get_nukta(keepc1, isc_type)) != 0 ) { 203 204 uniid = ucs; 205 206 if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big; 207 st->keepc[1] = st->keepc[2] = 0; 208 } else { 209 if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big; 210 st->keepc[1] = st->keepc[2]; 211 } 212 } else { 213 if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big; 214 st->keepc[1] = st->keepc[2]; 215 } 216 break; 217 } /* end of switch */ 218 } else { /* ASCII */ 219 uniid = keepc1; 220 if ( (nBytes = copy_to_outbuf(uniid, buf, buflen)) == 0) goto E2big; 221 st->keepc[1] = st->keepc[2]; 222 } 223 st->keepc[2] = 0; 224 } 225 226 E2big: 227 return nBytes; 228 } 229 230 static int 231 copy_to_outbuf(ucs_t uniid, char *buf, size_t buflen) 232 { 233 if (uniid > 0) { 234 if (uniid <= 0x7f) { 235 if (buflen < 1) { 236 errno = E2BIG; 237 return(0); 238 } 239 UTF8_SET1B(buf, uniid); 240 return (1); 241 } 242 243 if (uniid >= 0x80 && uniid <= 0x7ff) { 244 if (buflen < 2) { 245 errno = E2BIG; 246 return(0); 247 } 248 UTF8_SET2B(buf, uniid); 249 return (2); 250 } 251 252 if (uniid >= 0x800 && uniid <= 0xffff) { 253 if (buflen < 3) { 254 errno = E2BIG; 255 return(0); 256 } 257 UTF8_SET3B(buf, uniid); 258 return (3); 259 } 260 } else { /* Replacement Character */ 261 if ( buflen < 3 ) { 262 errno = E2BIG; 263 return 0; 264 } 265 266 *buf = (char)REPLACE_CHAR1; 267 *(buf+1) = (char)REPLACE_CHAR2; 268 *(buf+2) = (char)REPLACE_CHAR3; 269 return (3); 270 } 271 272 /* This code shouldn't be reached */ 273 return (0); 274 } 275 276 /* 277 * Open; called from iconv_open() 278 */ 279 void * 280 _icv_open() 281 { 282 _iconv_st *st; 283 284 if ((st = (_iconv_st*)malloc(sizeof(_iconv_st))) == NULL) { 285 errno = ENOMEM; 286 return ((void*)-1); 287 } 288 289 bzero(st, sizeof(_iconv_st)); 290 st->keepc[0] = DEV_ATR; 291 st->pState = S_BASIC; 292 293 return ((void*)st); 294 } 295 296 /* 297 * Close; called from iconv_close() 298 */ 299 void 300 _icv_close(_iconv_st *st) 301 { 302 if (!st) 303 errno = EBADF; 304 else 305 free(st); 306 } 307 308 /* 309 * Conversion routine; called from iconv() 310 */ 311 size_t 312 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft, 313 char **outbuf, size_t *outbytesleft) 314 { 315 int n; 316 short curState; 317 318 if (st == NULL) { 319 errno = EBADF; 320 return ((size_t) -1); 321 } 322 323 if (inbuf == NULL || *inbuf == NULL) { /* Reset request */ 324 st->keepc[0] = DEV_ATR; 325 st->pState = S_BASIC; 326 st->_errno = 0; 327 return ((size_t)0); 328 } 329 330 /* flush if possible */ 331 if ( st->_errno == E2BIG ) { 332 n = iscii_to_utf8(st, *outbuf, *outbytesleft); 333 (*outbuf) += n; 334 (*outbytesleft) -= n; 335 } 336 337 st->_errno = errno = 0; /* reset internal and external errno */ 338 339 /* a state machine for interpreting ISCII code */ 340 while (*inbytesleft > 0 && *outbytesleft > 0) { 341 unsigned int curChar = (unsigned int)(**inbuf & ONEBYTE); 342 unsigned int prevChar = (unsigned int)(st->keepc[1] & ONEBYTE); 343 344 if (curChar == ISC_ext) 345 curState = S_EXT; 346 else if (curChar == ISC_atr) 347 curState = S_ATR; 348 else 349 curState = S_BASIC; 350 351 switch (curState) { 352 case S_BASIC: 353 if (prevChar == 0) 354 st->keepc[1] = curChar; 355 else 356 st->keepc[2] = curChar; 357 358 if (st->pState == S_ATR) { 359 /* clear the keepc[1], which is part of attribute */ 360 st->keepc[1] = 0; 361 /* change the attribute for Indian Script Fonts */ 362 if ((curChar >= 0x42) && (curChar <= 0x4b) && curChar != 0x46) { 363 st->keepc[0] = curChar; 364 } 365 /* other attributes such as display attributes would be ignored */ 366 } else { /* Handle Cases and Flush */ 367 368 if ((curChar > 0 && curChar <= 0x7f) || prevChar != 0) { 369 n=iscii_to_utf8(st, *outbuf, *outbytesleft); 370 if (n > 0) { 371 (*outbuf) += n; 372 (*outbytesleft) -= n; 373 } else /* don't return immediately, need advance the *inbuf */ 374 st->_errno = errno; 375 } 376 } 377 break; 378 case S_ATR: 379 case S_EXT: /* Do nothing */ 380 if (st->pState == S_BASIC) { /* Flush */ 381 if ( st->keepc[1] == 0 ) 382 { 383 if (curState == S_EXT) st->keepc[1] = ISC_ext; 384 break; 385 } 386 n = iscii_to_utf8(st, *outbuf, *outbytesleft); 387 if (n > 0) { 388 (*outbuf) += n; 389 (*outbytesleft) -= n; 390 } else /* don't return immediately */ 391 st->_errno = errno; 392 393 if (curState == S_EXT) st->keepc[1] = ISC_ext; 394 } else { 395 errno = EILSEQ; 396 return (size_t)-1; 397 } 398 399 break; 400 default: /* should never come here */ 401 st->_errno = errno = EILSEQ; 402 st->pState = S_BASIC; /* reset state */ 403 break; 404 } 405 406 st->pState = curState; 407 408 (*inbuf)++; 409 (*inbytesleft)--; 410 411 if (errno) 412 return(size_t)-1; 413 } 414 415 if (*inbytesleft > 0 && *outbytesleft == 0) { 416 /* in this case, the st->_errno is zero */ 417 errno = E2BIG; 418 return(size_t)-1; 419 } 420 421 return (size_t)(*inbytesleft); 422 } 423