1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 1995, by Sun Microsystems, Inc. 24 * All rights reserved. 25 */ 26 27 #include <stdio.h> 28 #include <stdlib.h> 29 #include <errno.h> 30 #include "cns11643_big5.h" /* CNS 11643 to Big-5 mapping table */ 31 32 #define MSB 0x80 /* most significant bit */ 33 #define MBYTE 0x8e /* multi-byte (4 byte character) */ 34 #define PMASK 0xa0 /* plane number mask */ 35 #define ONEBYTE 0xff /* right most byte */ 36 #define MSB_OFF 0x7f /* mask off MBS */ 37 38 #define SI 0x0f /* shift in */ 39 #define SO 0x0e /* shift out */ 40 #define ESC 0x1b /* escape */ 41 42 /* 43 * static const char plane_char[] = "0GH23456789:;<=>?"; 44 * static const char plane_char[] = "0GHIJKLMNOPQRSTUV"; 45 * #define GET_PLANEC(i) (plane_char[i]) 46 */ 47 48 #define NON_ID_CHAR '_' /* non-identified character */ 49 50 typedef struct _icv_state { 51 char keepc[4]; /* maximum # byte of CNS11643 code */ 52 short cstate; /* state machine id */ 53 int plane_no; /* plane number for Chinese character */ 54 int _errno; /* internal errno */ 55 } _iconv_st; 56 57 enum _CSTATE { C0, C1, C2, C3, C4, C5, C6, C7 }; 58 59 60 static int get_plane_no_by_iso(const char); 61 static int iso_to_big5(int, char[], char*, size_t); 62 static int binsearch(unsigned long, table_t[], int); 63 64 65 /* 66 * Open; called from iconv_open() 67 */ 68 void * 69 _icv_open() 70 { 71 _iconv_st *st; 72 73 if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) { 74 errno = ENOMEM; 75 return ((void *) -1); 76 } 77 78 st->cstate = C0; 79 st->plane_no = 0; 80 st->_errno = 0; 81 82 #ifdef DEBUG 83 fprintf(stderr, "========== iconv(): ISO2022-7 --> Big-5 ==========\n"); 84 #endif 85 return ((void *) st); 86 } 87 88 89 /* 90 * Close; called from iconv_close() 91 */ 92 void 93 _icv_close(_iconv_st *st) 94 { 95 if (!st) 96 errno = EBADF; 97 else 98 free(st); 99 } 100 101 102 /* 103 * Actual conversion; called from iconv() 104 */ 105 /*========================================================================= 106 * 107 * State Machine for interpreting ISO 2022-7 code 108 * 109 *========================================================================= 110 * 111 * plane 2 - 16 112 * +---------->-------+ 113 * plane ^ | 114 * ESC $ ) number SO | plane 1 v 115 * +-> C0 ----> C1 ---> C2 ---> C3 ------> C4 --> C5 -------> C6 C7 116 * | | ascii | ascii | ascii | ascii | SI | | | | 117 * +----------------------------+ <-----+------+ +------<---+------+ 118 * ^ | 119 * | ascii v 120 * +---------<-------------<---------+ 121 * 122 *=========================================================================*/ 123 size_t 124 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft, 125 char **outbuf, size_t *outbytesleft) 126 { 127 int n; 128 129 if (st == NULL) { 130 errno = EBADF; 131 return ((size_t) -1); 132 } 133 134 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */ 135 st->cstate = C0; 136 st->_errno = 0; 137 return ((size_t) 0); 138 } 139 140 #ifdef DEBUG 141 fprintf(stderr, "=== (Re-entry) iconv(): ISO 2022-7 --> Big-5 ===\n"); 142 #endif 143 st->_errno = 0; /* reset internal errno */ 144 errno = 0; /* reset external errno */ 145 146 /* a state machine for interpreting ISO 2022-7 code */ 147 while (*inbytesleft > 0 && *outbytesleft > 0) { 148 switch (st->cstate) { 149 case C0: /* assuming ASCII in the beginning */ 150 if (**inbuf == ESC) { 151 st->cstate = C1; 152 } else { /* real ASCII */ 153 **outbuf = **inbuf; 154 (*outbuf)++; 155 (*outbytesleft)--; 156 } 157 break; 158 case C1: /* got ESC, expecting $ */ 159 if (**inbuf == '$') { 160 st->cstate = C2; 161 } else { 162 **outbuf = ESC; 163 (*outbuf)++; 164 (*outbytesleft)--; 165 st->cstate = C0; 166 st->_errno = 0; 167 continue; /* don't advance inbuf */ 168 } 169 break; 170 case C2: /* got $, expecting ) */ 171 if (**inbuf == ')') { 172 st->cstate = C3; 173 } else { 174 if (*outbytesleft < 2) { 175 st->_errno = errno = E2BIG; 176 return((size_t)-1); 177 } 178 **outbuf = ESC; 179 *(*outbuf+1) = '$'; 180 (*outbuf) += 2; 181 (*outbytesleft) -= 2; 182 st->cstate = C0; 183 st->_errno = 0; 184 continue; /* don't advance inbuf */ 185 } 186 break; 187 case C3: /* got ) expecting G,H,I,...,V */ 188 st->plane_no = get_plane_no_by_iso(**inbuf); 189 if (st->plane_no > 0 ) { /* plane #1 - #16 */ 190 st->cstate = C4; 191 } else { 192 if (*outbytesleft < 3) { 193 st->_errno = errno = E2BIG; 194 return((size_t)-1); 195 } 196 **outbuf = ESC; 197 *(*outbuf+1) = '$'; 198 *(*outbuf+2) = ')'; 199 (*outbuf) += 3; 200 (*outbytesleft) -= 3; 201 st->cstate = C0; 202 st->_errno = 0; 203 continue; /* don't advance inbuf */ 204 } 205 break; 206 case C4: /* SI (Shift In) */ 207 if (**inbuf == ESC) { 208 st->cstate = C1; 209 break; 210 } 211 if (**inbuf == SO) { 212 #ifdef DEBUG 213 fprintf(stderr, "<-------------- SO -------------->\n"); 214 #endif 215 st->cstate = C5; 216 } else { /* ASCII */ 217 **outbuf = **inbuf; 218 (*outbuf)++; 219 (*outbytesleft)--; 220 st->cstate = C0; 221 st->_errno = 0; 222 } 223 break; 224 case C5: /* SO (Shift Out) */ 225 if (**inbuf == SI) { 226 #ifdef DEBUG 227 fprintf(stderr, ">-------------- SI --------------<\n"); 228 #endif 229 st->cstate = C4; 230 } else { /* 1st Chinese character */ 231 if (st->plane_no == 1) { 232 st->keepc[0] = (char) (**inbuf | MSB); 233 st->cstate = C6; 234 } else { /* 4-bypte code: plane #2 - #16 */ 235 st->keepc[0] = (char) MBYTE; 236 st->keepc[1] = (char) (PMASK + 237 st->plane_no); 238 st->keepc[2] = (char) (**inbuf | MSB); 239 st->cstate = C7; 240 } 241 } 242 break; 243 case C6: /* plane #1: 2nd Chinese character */ 244 st->keepc[1] = (char) (**inbuf | MSB); 245 st->keepc[2] = st->keepc[3] = NULL; 246 n = iso_to_big5(1, st->keepc, *outbuf, *outbytesleft); 247 if (n > 0) { 248 (*outbuf) += n; 249 (*outbytesleft) -= n; 250 } else { 251 st->_errno = errno; 252 return((size_t)-1); 253 } 254 st->cstate = C5; 255 break; 256 case C7: /* 4th Chinese character */ 257 st->keepc[3] = (char) (**inbuf | MSB); 258 n = iso_to_big5(st->plane_no, st->keepc, *outbuf, 259 *outbytesleft); 260 if (n > 0) { 261 (*outbuf) += n; 262 (*outbytesleft) -= n; 263 } else { 264 st->_errno = errno; 265 return((size_t)-1); 266 } 267 st->cstate = C5; 268 break; 269 default: /* should never come here */ 270 st->_errno = errno = EILSEQ; 271 st->cstate = C0; /* reset state */ 272 break; 273 } 274 275 (*inbuf)++; 276 (*inbytesleft)--; 277 278 if (st->_errno) { 279 #ifdef DEBUG 280 fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\tinbuf=%x\n", 281 st->_errno, st->cstate, **inbuf); 282 #endif 283 break; 284 } 285 if (errno) 286 return((size_t)-1); 287 } 288 289 if (*inbytesleft > 0 && *outbytesleft == 0) { 290 errno = E2BIG; 291 return((size_t)-1); 292 } 293 return (*inbytesleft); 294 } 295 296 297 /* 298 * Get plane number by ISO plane char; i.e. 'G' returns 1, 'H' returns 2, etc. 299 * Returns -1 on error conditions 300 */ 301 static int get_plane_no_by_iso(const char inbuf) 302 { 303 int ret; 304 unsigned char uc = (unsigned char) inbuf; 305 306 if (uc == '0') /* plane #0 */ 307 return(0); 308 309 ret = uc - 'F'; 310 switch (ret) { 311 case 1: /* 0x8EA1 - G */ 312 case 2: /* 0x8EA2 - H */ 313 case 3: /* 0x8EA3 - I */ 314 case 4: /* 0x8EA4 - J */ 315 case 5: /* 0x8EA5 - K */ 316 case 6: /* 0x8EA6 - L */ 317 case 7: /* 0x8EA7 - M */ 318 case 8: /* 0x8EA8 - N */ 319 case 9: /* 0x8EA9 - O */ 320 case 10: /* 0x8EAA - P */ 321 case 11: /* 0x8EAB - Q */ 322 case 12: /* 0x8EAC - R */ 323 case 13: /* 0x8EAD - S */ 324 case 14: /* 0x8EAE - T */ 325 case 15: /* 0x8EAF - U */ 326 case 16: /* 0x8EB0 - V */ 327 return (ret); 328 default: 329 return (-1); 330 } 331 } 332 333 334 /* 335 * ISO 2022-7 code --> Big-5 code 336 * Return: > 0 - converted with enough space in output buffer 337 * = 0 - no space in outbuf 338 */ 339 static int iso_to_big5(int plane_no, char keepc[], char *buf, size_t buflen) 340 { 341 char cns_str[3]; 342 unsigned long cns_val; /* MSB mask off CNS 11643 value */ 343 int unidx; /* binary search index */ 344 unsigned long big5_val, val; /* Big-5 code */ 345 346 #ifdef DEBUG 347 fprintf(stderr, "%s %d ", keepc, plane_no); 348 #endif 349 if (plane_no == 1) { 350 cns_str[0] = keepc[0] & MSB_OFF; 351 cns_str[1] = keepc[1] & MSB_OFF; 352 } else { 353 cns_str[0] = keepc[2] & MSB_OFF; 354 cns_str[1] = keepc[3] & MSB_OFF; 355 } 356 cns_val = (cns_str[0] << 8) + cns_str[1]; 357 #ifdef DEBUG 358 fprintf(stderr, "%x\t", cns_val); 359 #endif 360 361 if (buflen < 2) { 362 errno = E2BIG; 363 return(0); 364 } 365 366 switch (plane_no) { 367 case 1: 368 unidx = binsearch(cns_val, cns_big5_tab1, MAX_CNS1_NUM); 369 if (unidx >= 0) 370 big5_val = cns_big5_tab1[unidx].value; 371 break; 372 case 2: 373 unidx = binsearch(cns_val, cns_big5_tab2, MAX_CNS2_NUM); 374 if (unidx >= 0) 375 big5_val = cns_big5_tab2[unidx].value; 376 break; 377 default: 378 unidx = -1; /* no mapping from CNS to Big-5 out of plane 1&2 */ 379 break; 380 } 381 382 #ifdef DEBUG 383 fprintf(stderr, "unidx = %d, big5code = %x\t", unidx, big5_val); 384 #endif 385 386 if (unidx < 0) { /* no match from CNS to Big-5 */ 387 *buf = *(buf+1) = NON_ID_CHAR; 388 } else { 389 val = big5_val & 0xffff; 390 *buf = (char) ((val & 0xff00) >> 8); 391 *(buf+1) = (char) (val & 0xff); 392 } 393 394 #ifdef DEBUG 395 fprintf(stderr, "\t->%x %x<-\n", *buf, *(buf+1)); 396 #endif 397 398 return(2); 399 } 400 401 402 /* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */ 403 static int binsearch(unsigned long x, table_t v[], int n) 404 { 405 int low, high, mid; 406 407 low = 0; 408 high = n - 1; 409 while (low <= high) { 410 mid = (low + high) / 2; 411 if (x < v[mid].key) 412 high = mid - 1; 413 else if (x > v[mid].key) 414 low = mid + 1; 415 else /* found match */ 416 return mid; 417 } 418 return (-1); /* no match */ 419 } 420