1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, by Sun Microsystems, Inc. 24 * All rights reserved. 25 */ 26 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <errno.h> 30 #include "big5_cns11643.h" /* Big-5 to CNS 11643 mapping table */ 31 32 #define MSB 0x80 /* most significant bit */ 33 #define MBYTE 0x8e /* multi-byte (4 byte character) */ 34 #define PMASK 0xa0 /* plane number mask */ 35 #define ONEBYTE 0xff /* right most byte */ 36 #define MSB_OFF 0x7f /* mask off MSB */ 37 38 #define SI 0x0f /* shift in */ 39 #define SO 0x0e /* shift out */ 40 #define ESC 0x1b /* escape */ 41 42 /* static const char plane_char[] = "0GH23456789:;<=>?"; */ 43 static const char plane_char[] = "0GHIJKLMNOPQRSTUV"; 44 45 #define GET_PLANEC(i) (plane_char[i]) 46 47 #define NON_ID_CHAR '_' /* non-identified character */ 48 49 typedef struct _icv_state { 50 char keepc[2]; /* maximum # byte of Big-5 code */ 51 short cstate; /* state machine id (Big-5) */ 52 short istate; /* state machine id (ISO) */ 53 int _errno; /* internal errno */ 54 } _iconv_st; 55 56 enum _CSTATE { C0, C1 }; 57 enum _ISTATE { IN, OUT }; 58 59 60 static int big5_2nd_byte(char); 61 static int get_plane_no_by_big5(const char, const char, int*, unsigned long*); 62 static int big5_to_iso(int, int, unsigned long, char*, size_t); 63 static int binsearch(unsigned long, table_t[], int); 64 65 66 /* 67 * Open; called from iconv_open() 68 */ 69 void * 70 _icv_open() 71 { 72 _iconv_st *st; 73 74 if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) { 75 errno = ENOMEM; 76 return ((void *) -1); 77 } 78 79 st->cstate = C0; 80 st->istate = IN; 81 st->_errno = 0; 82 83 #ifdef DEBUG 84 fprintf(stderr, "========== iconv(): Big-5 --> ISO 2022-7 ==========\n"); 85 #endif 86 return ((void *) st); 87 } 88 89 90 /* 91 * Close; called from iconv_close() 92 */ 93 void 94 _icv_close(_iconv_st *st) 95 { 96 if (!st) 97 errno = EBADF; 98 else 99 free(st); 100 } 101 102 103 /* 104 * Actual conversion; called from iconv() 105 */ 106 /*======================================================= 107 * 108 * State Machine for interpreting Big-5 code 109 * 110 *======================================================= 111 * 112 * 1st C 113 * +--------> C0 ----------> C1 114 * | ascii | 2nd C | 115 * ^ v v 116 * +----<-----+-----<--------+ 117 * 118 *=======================================================*/ 119 /* 120 * Big-5 encoding range: 121 * High byte: 0xA1 - 0xFE ( 94 encoding space) 122 * Low byte: 0x40 - 0x7E, 0xA1 - 0xFE ( 157 encoding space) 123 * Plane #1: 0xA140 - 0xC8FE ( 6280 encoding space) 124 * Plane #2: 0xC940 - 0xFEFE ( 8478 encoding space) 125 * Total: 94 * 157 = 14,758 (14758 encoding space) 126 */ 127 size_t 128 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft, 129 char **outbuf, size_t *outbytesleft) 130 { 131 int plane_no, n, unidx; 132 unsigned long cnscode; 133 /* pre_plane_no: need to be static when re-entry occurs on errno set */ 134 static int pre_plane_no = -1; /* previous plane number */ 135 136 if (st == NULL) { 137 errno = EBADF; 138 return ((size_t) -1); 139 } 140 141 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */ 142 st->cstate = C0; 143 st->istate = IN; 144 st->_errno = 0; 145 return ((size_t) 0); 146 } 147 148 #ifdef DEBUG 149 fprintf(stderr, "=== (Re-entry) iconv(): Big-5 --> ISO 2022-7 ===\n"); 150 fprintf(stderr, "st->cstate=%d\tst->istate=%d\tst->_errno=%d\tplane_no=%d\n", 151 st->cstate, st->istate, st->_errno, plane_no); 152 #endif 153 st->_errno = 0; /* reset internal errno */ 154 errno = 0; /* reset external errno */ 155 156 /* a state machine for interpreting Big-5 code */ 157 while (*inbytesleft > 0 && *outbytesleft > 0) { 158 switch (st->cstate) { 159 case C0: /* assuming ASCII in the beginning */ 160 if (**inbuf & MSB) { 161 st->keepc[0] = (**inbuf); 162 st->cstate = C1; 163 } else { /* real ASCII */ 164 if (st->istate == OUT) { 165 st->cstate = C0; 166 st->istate = IN; 167 **outbuf = SI; 168 (*outbuf)++; 169 (*outbytesleft)--; 170 if (*outbytesleft <= 0) { 171 errno = E2BIG; 172 return((size_t)-1); 173 } 174 } 175 **outbuf = **inbuf; 176 (*outbuf)++; 177 (*outbytesleft)--; 178 } 179 break; 180 case C1: /* Chinese characters: 2nd byte */ 181 if (big5_2nd_byte(**inbuf) != 0) { /* illegal Big-5 */ 182 st->cstate = C0; 183 st->istate = IN; 184 st->_errno = errno = EILSEQ; 185 break; 186 } 187 st->keepc[1] = (**inbuf); 188 plane_no = get_plane_no_by_big5(st->keepc[0], 189 st->keepc[1], &unidx, &cnscode); 190 if (plane_no < 0) { /* legal Big-5; illegal CNS */ 191 st->cstate = C0; 192 st->istate = IN; 193 st->_errno = errno = EILSEQ; 194 break; 195 } 196 197 if ((st->istate == IN) || (pre_plane_no != plane_no)) { 198 /* change plane # in Chinese mode */ 199 if (st->istate == OUT) { 200 **outbuf = SI; 201 (*outbuf)++; 202 (*outbytesleft)--; 203 #ifdef DEBUG 204 fprintf(stderr, "(plane #=%d\tpre_plane #=%d)\t", plane_no, pre_plane_no); 205 #endif 206 } 207 if (*outbytesleft < 4) { 208 st->_errno = errno = E2BIG; 209 return((size_t)-1); 210 } 211 pre_plane_no = plane_no; 212 st->istate = OUT; /* shift out */ 213 **outbuf = ESC; 214 *(*outbuf+1) = '$'; 215 *(*outbuf+2) = ')'; 216 *(*outbuf+3) = GET_PLANEC(plane_no); 217 #ifdef DEBUG 218 fprintf(stderr, "ESC $ ) %c ", *(*outbuf+3)); 219 #endif 220 (*outbuf) += 4; 221 (*outbytesleft) -= 4; 222 if (*outbytesleft <= 0) { 223 st->_errno = errno = E2BIG; 224 return((size_t)-1); 225 } 226 st->istate = OUT; 227 **outbuf = SO; 228 (*outbuf)++; 229 (*outbytesleft)--; 230 } 231 n = big5_to_iso(plane_no, unidx, cnscode, 232 *outbuf, *outbytesleft); 233 if (n > 0) { 234 (*outbuf) += n; 235 (*outbytesleft) -= n; 236 } else { 237 st->_errno = errno; 238 return((size_t)-1); 239 } 240 st->cstate = C0; 241 break; 242 default: /* should never come here */ 243 st->_errno = errno = EILSEQ; 244 st->cstate = C0; /* reset state */ 245 break; 246 } 247 248 (*inbuf)++; 249 (*inbytesleft)--; 250 251 if (st->_errno) { 252 #ifdef DEBUG 253 fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\n", 254 st->_errno, st->cstate); 255 #endif 256 break; 257 } 258 if (errno) 259 return((size_t)-1); 260 } 261 262 if (*inbytesleft > 0 && *outbytesleft == 0) { 263 errno = E2BIG; 264 return((size_t)-1); 265 } 266 return (*inbytesleft); 267 } 268 269 270 /* 271 * Test whether inbuf is a valid character for 2nd byte Big-5 code 272 * Return: = 0 - valid Big-5 2nd byte 273 * = 1 - invalid Big-5 2nd byte 274 */ 275 static int big5_2nd_byte(char inbuf) 276 { 277 unsigned int buf = (unsigned int) (inbuf & ONEBYTE); 278 279 if ((buf >= 0x40) && (buf <= 0x7E)) 280 return (0); 281 if ((buf >= 0xA1) && (buf <= 0xFE)) 282 return (0); 283 return(1); 284 } 285 286 287 /* 288 * Get plane number by Big-5 code; i.e. plane #1 returns 1, #2 returns 2, etc. 289 * Returns -1 on error conditions 290 * 291 * Since binary search of the Big-5 to CNS table is necessary, might as well 292 * return index and CNS code matching to the unicode. 293 */ 294 static int get_plane_no_by_big5(const char c1, const char c2, 295 int *unidx, unsigned long *cnscode) 296 { 297 int ret; 298 unsigned long big5code; 299 300 big5code = (unsigned long) ((c1 & ONEBYTE) << 8) + (c2 & ONEBYTE); 301 *unidx = binsearch(big5code, big5_cns_tab, MAX_BIG5_NUM); 302 if ((*unidx) >= 0) 303 *cnscode = big5_cns_tab[*unidx].value; 304 else 305 return(0); /* match from Big-5 to CNS not found */ 306 #ifdef DEBUG 307 fprintf(stderr, "Big-5=%04x, idx=%5d, CNS=%06x ", big5code, *unidx, *cnscode); 308 #endif 309 310 ret = (int) (*cnscode >> 16); 311 switch (ret) { 312 case 0x21: /* 0x8EA1 - G */ 313 case 0x22: /* 0x8EA2 - H */ 314 case 0x23: /* 0x8EA3 - I */ 315 case 0x24: /* 0x8EA4 - J */ 316 case 0x25: /* 0x8EA5 - K */ 317 case 0x26: /* 0x8EA6 - L */ 318 case 0x27: /* 0x8EA7 - M */ 319 case 0x28: /* 0x8EA8 - N */ 320 case 0x29: /* 0x8EA9 - O */ 321 case 0x2a: /* 0x8EAA - P */ 322 case 0x2b: /* 0x8EAB - Q */ 323 case 0x2c: /* 0x8EAC - R */ 324 case 0x2d: /* 0x8EAD - S */ 325 case 0x2f: /* 0x8EAF - U */ 326 case 0x30: /* 0x8EB0 - V */ 327 return (ret - 0x20); /* so that we can use GET_PLANEC() */ 328 case 0x2e: /* 0x8EAE - T */ 329 return (3); /* CNS 11643-1992 */ 330 default: 331 return (-1); 332 } 333 } 334 335 336 /* 337 * Big-5 code --> ISO 2022-7 338 * Return: > 0 - converted with enough space in output buffer 339 * = 0 - no space in outbuf 340 */ 341 static int big5_to_iso(int plane_no, int unidx, unsigned long cnscode, 342 char *buf, size_t buflen) 343 { 344 unsigned long val; /* CNS 11643 value */ 345 #ifdef DEBUG 346 char cns_str[5]; 347 #endif 348 349 if (buflen < 2) { 350 errno = E2BIG; 351 return(0); 352 } 353 354 if (unidx < 0) { /* no match from UTF8 to CNS 11643 */ 355 *buf = *(buf+1) = NON_ID_CHAR; 356 } else { 357 val = cnscode & 0xffff; 358 *buf = (val & 0xff00) >> 8; 359 *(buf+1) = val & 0xff; 360 } 361 362 #ifdef DEBUG 363 fprintf(stderr, "->%02x %02x<-\t->%c %c<-\t", *buf, *(buf+1), *buf, *(buf+1)); 364 #endif 365 366 #ifdef DEBUG 367 switch (plane_no) { 368 case 1: 369 cns_str[0] = *buf | MSB; 370 cns_str[1] = *(buf+1) | MSB; 371 cns_str[2] = cns_str[3] = cns_str[4] = NULL; 372 break; 373 case 2: 374 case 3: 375 case 4: 376 case 5: 377 case 6: 378 case 7: 379 case 8: 380 case 9: 381 case 10: 382 case 11: 383 case 12: 384 case 13: 385 case 14: 386 case 15: 387 case 16: 388 cns_str[0] = MBYTE; 389 cns_str[1] = (char) PMASK + plane_no; 390 cns_str[2] = (char) *buf | MSB; 391 cns_str[3] = (char) *(buf+1) | MSB; 392 cns_str[4] = NULL; 393 break; 394 } 395 396 fprintf(stderr, "#%d ->%s<-\n", plane_no, cns_str); 397 #endif 398 399 return(2); 400 } 401 402 403 /* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */ 404 static int binsearch(unsigned long x, table_t v[], int n) 405 { 406 int low, high, mid; 407 408 low = 0; 409 high = n - 1; 410 while (low <= high) { 411 mid = (low + high) / 2; 412 if (x < v[mid].key) 413 high = mid - 1; 414 else if (x > v[mid].key) 415 low = mid + 1; 416 else /* found match */ 417 return mid; 418 } 419 return (-1); /* no match */ 420 } 421