1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, by Sun Microsystems, Inc. 24 * All rights reserved. 25 */ 26 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <errno.h> 30 #include "cns11643_big5.h" /* CNS 11643 to Big-5 mapping table */ 31 32 #define MSB 0x80 /* most significant bit */ 33 #define MBYTE 0x8e /* multi-byte (4 byte character) */ 34 #define PMASK 0xa0 /* plane number mask */ 35 #define ONEBYTE 0xff /* right most byte */ 36 #define MSB_OFF 0x7f /* mask off MBS */ 37 38 #define NON_ID_CHAR '_' /* non-identified character */ 39 40 typedef struct _icv_state { 41 char keepc[4]; /* maximum # byte of CNS11643 code */ 42 short cstate; /* state machine id */ 43 int _errno; /* internal errno */ 44 } _iconv_st; 45 46 enum _CSTATE { C0, C1, C2, C3 }; 47 48 49 static int get_plane_no_by_char(const char); 50 static int cns_to_big5(int, char[], char*, size_t); 51 static int binsearch(unsigned long, table_t[], int); 52 53 54 /* 55 * Open; called from iconv_open() 56 */ 57 void * 58 _icv_open() 59 { 60 _iconv_st *st; 61 62 if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) { 63 errno = ENOMEM; 64 return ((void *) -1); 65 } 66 67 st->cstate = C0; 68 st->_errno = 0; 69 70 #ifdef DEBUG 71 fprintf(stderr, "========== iconv(): CNS11643 --> Big-5 ==========\n"); 72 #endif 73 74 return ((void *) st); 75 } 76 77 78 /* 79 * Close; called from iconv_close() 80 */ 81 void 82 _icv_close(_iconv_st *st) 83 { 84 if (!st) 85 errno = EBADF; 86 else 87 free(st); 88 } 89 90 91 /* 92 * Actual conversion; called from iconv() 93 */ 94 /*======================================================= 95 * 96 * State Machine for interpreting CNS 11643 code 97 * 98 *======================================================= 99 * 100 * plane 2 - 16 101 * 1st C 2nd C 3rd C 102 * +------> C0 -----> C1 -----------> C2 -----> C3 103 * | ascii | plane 1 | 4th C | 104 * ^ v 2nd C v v 105 * +----<---+-----<----+-------<---------<-------+ 106 * 107 *=======================================================*/ 108 size_t 109 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft, 110 char **outbuf, size_t *outbytesleft) 111 { 112 int plane_no = -1, n; 113 114 if (st == NULL) { 115 errno = EBADF; 116 return ((size_t) -1); 117 } 118 119 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */ 120 st->cstate = C0; 121 st->_errno = 0; 122 return ((size_t) 0); 123 } 124 125 #ifdef DEBUG 126 fprintf(stderr, "=== (Re-entry) iconv(): CNS 11643 --> Big-5 ===\n"); 127 #endif 128 st->_errno = 0; /* reset internal errno */ 129 errno = 0; /* reset external errno */ 130 131 /* a state machine for interpreting CNS 11643 code */ 132 while (*inbytesleft > 0 && *outbytesleft > 0) { 133 switch (st->cstate) { 134 case C0: /* assuming ASCII in the beginning */ 135 if (**inbuf & MSB) { 136 st->keepc[0] = (**inbuf); 137 st->cstate = C1; 138 } else { /* real ASCII */ 139 **outbuf = **inbuf; 140 (*outbuf)++; 141 (*outbytesleft)--; 142 } 143 break; 144 case C1: /* Chinese characters: 2nd byte */ 145 if ((st->keepc[0] & ONEBYTE) == MBYTE) { /* 4-byte (0x8e) */ 146 plane_no = get_plane_no_by_char(**inbuf); 147 if (plane_no == -1) { /* illegal plane */ 148 st->_errno = errno = EILSEQ; 149 } else { /* 4-byte Chinese character */ 150 st->keepc[1] = (**inbuf); 151 st->cstate = C2; 152 } 153 } else { /* 2-byte Chinese character - plane #1 */ 154 if (**inbuf & MSB) { /* plane #1 */ 155 st->keepc[1] = (**inbuf); 156 st->keepc[2] = st->keepc[3] = NULL; 157 n = cns_to_big5(1, st->keepc, *outbuf, 158 *outbytesleft); 159 if (n > 0) { 160 (*outbuf) += n; 161 (*outbytesleft) -= n; 162 163 st->cstate = C0; 164 } else { /* don't reset state */ 165 st->_errno = errno = E2BIG; 166 } 167 } else { /* input char doesn't belong 168 * to the input code set 169 */ 170 st->_errno = errno = EILSEQ; 171 } 172 } 173 break; 174 case C2: /* plane #2 - #16 (4 bytes): get 3nd byte */ 175 if (**inbuf & MSB) { /* 3rd byte */ 176 st->keepc[2] = (**inbuf); 177 st->cstate = C3; 178 } else { 179 st->_errno = errno = EILSEQ; 180 } 181 break; 182 case C3: /* plane #2 - #16 (4 bytes): get 4th byte */ 183 if (**inbuf & MSB) { /* 4th byte */ 184 st->keepc[3] = (**inbuf); 185 n = cns_to_big5(plane_no, st->keepc, *outbuf, 186 *outbytesleft ); 187 if (n > 0) { 188 (*outbuf) += n; 189 (*outbytesleft) -= n; 190 191 st->cstate = C0; /* reset state */ 192 } else { /* don't reset state */ 193 st->_errno = errno = E2BIG; 194 } 195 } else { 196 st->_errno = errno = EILSEQ; 197 } 198 break; 199 default: /* should never come here */ 200 st->_errno = errno = EILSEQ; 201 st->cstate = C0; /* reset state */ 202 break; 203 } 204 205 if (st->_errno) { 206 #ifdef DEBUG 207 fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\n", 208 st->_errno, st->cstate); 209 #endif 210 break; 211 } 212 213 (*inbuf)++; 214 (*inbytesleft)--; 215 } 216 217 if (errno) return ((size_t) -1); 218 219 if (*inbytesleft == 0 && st->cstate != C0) { 220 errno = EINVAL; 221 return ((size_t) -1); 222 } 223 224 if (*inbytesleft > 0 && *outbytesleft == 0) { 225 errno = E2BIG; 226 return((size_t)-1); 227 } 228 return (*inbytesleft); 229 } 230 231 232 /* 233 * Get plane number by char; i.e. 0xa2 returns 2, 0xae returns 14, etc. 234 * Returns -1 on error conditions 235 */ 236 static int get_plane_no_by_char(const char inbuf) 237 { 238 int ret; 239 unsigned char uc = (unsigned char) inbuf; 240 241 ret = uc - PMASK; 242 switch (ret) { 243 case 1: /* 0x8EA1 */ 244 case 2: /* 0x8EA2 */ 245 case 3: /* 0x8EA3 */ 246 case 4: /* 0x8EA4 */ 247 case 5: /* 0x8EA5 */ 248 case 6: /* 0x8EA6 */ 249 case 7: /* 0x8EA7 */ 250 case 12: /* 0x8EAC */ 251 case 14: /* 0x8EAE */ 252 case 15: /* 0x8EAF */ 253 case 16: /* 0x8EB0 */ 254 return (ret); 255 default: 256 return (-1); 257 } 258 } 259 260 261 /* 262 * CNS 11643 code --> Big-5 263 * Return: > 0 - converted with enough space in output buffer 264 * = 0 - no space in outbuf 265 */ 266 static int cns_to_big5(int plane_no, char keepc[], char *buf, size_t buflen) 267 { 268 char cns_str[3]; 269 unsigned long cns_val; /* MSB mask off CNS 11643 value */ 270 int unidx; /* binary search index */ 271 unsigned long big5_val, val; /* Big-5 code */ 272 273 #ifdef DEBUG 274 fprintf(stderr, "%s %d ", keepc, plane_no); 275 #endif 276 if (buflen < 2) { 277 errno = E2BIG; 278 return(0); 279 } 280 281 if (plane_no == 1) { 282 cns_str[0] = keepc[0] & MSB_OFF; 283 cns_str[1] = keepc[1] & MSB_OFF; 284 } else { 285 cns_str[0] = keepc[2] & MSB_OFF; 286 cns_str[1] = keepc[3] & MSB_OFF; 287 } 288 cns_val = (cns_str[0] << 8) + cns_str[1]; 289 #ifdef DEBUG 290 fprintf(stderr, "%x\t", cns_val); 291 #endif 292 293 switch (plane_no) { 294 case 1: 295 unidx = binsearch(cns_val, cns_big5_tab1, MAX_CNS1_NUM); 296 if (unidx >= 0) 297 big5_val = cns_big5_tab1[unidx].value; 298 break; 299 case 2: 300 unidx = binsearch(cns_val, cns_big5_tab2, MAX_CNS2_NUM); 301 if (unidx >= 0) 302 big5_val = cns_big5_tab2[unidx].value; 303 break; 304 case 3: 305 unidx = binsearch(cns_val, cns_big5_tab3, MAX_CNS3_NUM); 306 if (unidx >= 0) 307 big5_val = cns_big5_tab3[unidx].value; 308 break; 309 default: 310 unidx = -1; /* no mapping from CNS to Big-5 */ 311 break; 312 } 313 314 #ifdef DEBUG 315 fprintf(stderr, "unidx = %d, value = %x\t", unidx, big5_val); 316 #endif 317 318 if (unidx < 0) { /* no match from CNS to Big-5 */ 319 *buf = *(buf+1) = NON_ID_CHAR; 320 } else { 321 val = big5_val & 0xffff; 322 *buf = (char) ((val & 0xff00) >> 8); 323 *(buf+1) = (char) (val & 0xff); 324 } 325 326 #ifdef DEBUG 327 fprintf(stderr, "\t->%x %x<-\n", *buf, *(buf+1)); 328 #endif 329 330 return(2); 331 } 332 333 334 /* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */ 335 static int binsearch(unsigned long x, table_t v[], int n) 336 { 337 int low, high, mid; 338 339 low = 0; 340 high = n - 1; 341 while (low <= high) { 342 mid = (low + high) / 2; 343 if (x < v[mid].key) 344 high = mid - 1; 345 else if (x > v[mid].key) 346 low = mid + 1; 347 else /* found match */ 348 return mid; 349 } 350 return (-1); /* no match */ 351 } 352