1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright(c) 2001 Sun Microsystems, Inc. 23 * All rights reserved. 24 */ 25 #include <stdio.h> 26 #include <ctype.h> 27 #include <errno.h> 28 #include <strings.h> 29 #include <stdlib.h> 30 #include "ea-iscii.h" 31 32 #define MSB 0x80 33 #define REPLACE_CHAR '?' 34 #define EA_START 0x40 35 36 #define get_vowel(a) EAISCII_vowel_type[(a) - EA_START] 37 #define get_nukta_value(a) EAISCII_nukta_type[(a) - EA_START] 38 #define is_first_vowel(a) ((a) == FIRST_VOWEL) 39 #define is_nukta(a) ((a) == NUKTA_VALUE) 40 41 typedef enum { SPACE, ASCII, POSSIBLE_ISCII, ISCII } CONTEXT; 42 typedef struct _icv_state { 43 uchar keepc; /* if is_vowel is true, store the char following the FIRST_VOWEL */ 44 CONTEXT context; 45 int is_vowel; 46 } _iconv_st; 47 48 static uchar 49 traverse_table(Entry *entry , int num, uchar ea_iscii) 50 { 51 int i=0; 52 uchar iscii=0; 53 54 for ( ; i < num; ++i) { 55 Entry en = entry[i]; 56 57 if ( ea_iscii < en.ea_iscii ) break; 58 if ( ea_iscii >= en.ea_iscii && ea_iscii < en.ea_iscii + en.count ) { 59 iscii = (ea_iscii - en.ea_iscii) + en.iscii; 60 break; 61 } 62 } 63 64 return iscii; 65 } 66 67 /* 68 * run in ISCII context. 69 * ea_iscii being 0: flush the keepc 70 * flag return 0: don't decide iscii yet, need to advance the next char in outbuf 71 */ 72 static uchar 73 get_iscii(_iconv_st *st, uchar ea_iscii, int *flag) 74 { 75 uchar iscii = 0; 76 77 if ( st->keepc == 0 ) { 78 if ( ea_iscii == 0 ) { *flag = 0; return 0; } 79 if ( ea_iscii < EA_START ) return 0; /* invalid iscii */ 80 81 if ( get_nukta_value(ea_iscii) || is_first_vowel(ea_iscii) ) { 82 /* do nothing except store ea_iscii into st->keepc */ 83 *flag = 0; 84 st->keepc = ea_iscii; 85 } else { 86 iscii = traverse_table( eaiscii_isc_tbl, 87 sizeof(eaiscii_isc_tbl)/sizeof(Entry), ea_iscii); 88 } 89 } else { 90 uchar vowel, nukta_value; 91 92 if ( st->is_vowel ) { 93 /* need decide whether it is 0xAE or 0xB2 case */ 94 if ( ea_iscii >= EA_START && is_nukta(ea_iscii) ) { 95 if ( st->keepc == 0x73 ) iscii = 0xAE; 96 if ( st->keepc == 0x76 ) iscii = 0xB2; 97 st->keepc = 0; 98 } else { 99 iscii = get_vowel(st->keepc); 100 st->keepc = ea_iscii; 101 } 102 st->is_vowel = 0; 103 goto end; 104 } 105 106 if ( is_first_vowel(st->keepc) ) { 107 if ( (ea_iscii >= EA_START) && (vowel = get_vowel(ea_iscii)) ) { 108 if ( ea_iscii == 0x73 || ea_iscii == 0x76 ) { 109 st->keepc = ea_iscii; 110 *flag = 0; 111 st->is_vowel = 1; 112 } else { 113 st->keepc = 0; 114 iscii = vowel; 115 } 116 } else { 117 iscii = traverse_table( eaiscii_isc_tbl, 118 sizeof(eaiscii_isc_tbl)/sizeof(Entry), st->keepc); 119 st->keepc = ea_iscii; 120 } 121 } else if ( (st->keepc >= EA_START) && (nukta_value = get_nukta_value(st->keepc))) { 122 if ( ea_iscii >= EA_START && is_nukta(ea_iscii) ) { 123 st->keepc = 0; 124 iscii = nukta_value; 125 } else { 126 iscii = traverse_table( eaiscii_isc_tbl, 127 sizeof(eaiscii_isc_tbl)/sizeof(Entry), st->keepc); 128 st->keepc = ea_iscii; 129 } 130 } else { 131 iscii = traverse_table( eaiscii_isc_tbl, 132 sizeof(eaiscii_isc_tbl)/sizeof(Entry), st->keepc); 133 st->keepc = ea_iscii; 134 } 135 } 136 137 end: 138 return iscii; 139 } 140 141 void * 142 _icv_open() 143 { 144 _iconv_st *st; 145 146 if ((st = (_iconv_st*)malloc(sizeof(_iconv_st))) == NULL) { 147 errno = ENOMEM; 148 return ((void*)-1); 149 } 150 151 bzero(st, sizeof(_iconv_st)); 152 153 return ((void*)st); 154 } 155 156 /* 157 * Close; called from iconv_close() 158 */ 159 void 160 _icv_close(_iconv_st *st) 161 { 162 if (!st) 163 errno = EBADF; 164 else 165 free(st); 166 } 167 168 size_t 169 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft, 170 char **outbuf, size_t *outbytesleft) 171 { 172 if (st == NULL) { 173 errno = EBADF; 174 return ((size_t) -1); 175 } 176 177 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */ 178 return ((size_t)0); 179 } 180 181 /* a state machine for interpreting ISCII code */ 182 while (*inbytesleft > 0 && *outbytesleft > 0) { 183 uchar c = (uchar)**inbuf; 184 185 if ( c & MSB ) { errno = EILSEQ; return (size_t)-1; } 186 187 switch (st->context) { 188 case SPACE: 189 if ( c == LEADING_BYTE ) st->context = POSSIBLE_ISCII; 190 else { 191 if ( !isspace(c) ) st->context = ASCII; 192 **outbuf = c; 193 (*outbuf)++; 194 (*outbytesleft)--; 195 } 196 break; 197 case ASCII: 198 if ( isspace(c) ) st->context = SPACE; 199 **outbuf = c; 200 (*outbuf)++; 201 (*outbytesleft)--; 202 break; 203 case POSSIBLE_ISCII: 204 /* it is impossible to represent with 'xx' one ASCII word that starts with 'x' */ 205 if ( !isspace(c) ) { st->context = ISCII; continue; } /* don't advance */ 206 207 **outbuf = LEADING_BYTE; /* the previous 'x' */ 208 (*outbuf)++; 209 (*outbytesleft)--; 210 st->context = ASCII; 211 212 if (*outbytesleft < 1) { 213 errno = E2BIG; 214 return (size_t)-1; 215 } 216 217 **outbuf = c; 218 (*outbuf)++; 219 (*outbytesleft)--; 220 st->context = SPACE; 221 222 break; 223 case ISCII: 224 if ( isspace(c) ) { 225 uchar iscii; 226 int flag = 1; 227 228 /* flush keepc */ 229 iscii = get_iscii(st, 0, &flag); 230 if (flag) { 231 if ( iscii ) **outbuf = iscii; 232 else **outbuf = REPLACE_CHAR; 233 234 (*outbuf)++; 235 (*outbytesleft)--; 236 } 237 238 if ( *outbytesleft < 1 ) { 239 errno = E2BIG; 240 return (size_t)-1; 241 } 242 243 **outbuf = c; 244 (*outbuf)++; 245 (*outbytesleft)--; 246 st->context = SPACE; 247 } else { 248 uchar iscii; 249 int flag = 1; 250 251 iscii = get_iscii(st, c, &flag); 252 if (flag) { 253 if ( iscii ) **outbuf = iscii; 254 else **outbuf = REPLACE_CHAR; 255 256 (*outbuf)++; 257 (*outbytesleft)--; 258 } 259 } 260 break; 261 } 262 263 (*inbuf)++; 264 (*inbytesleft)--; 265 } 266 267 if ( *inbytesleft > 0 && *outbytesleft == 0 ) { 268 errno = E2BIG; 269 return ((size_t)-1); 270 } 271 272 return ((size_t)(*inbytesleft)); 273 } 274