1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 #include <stdlib.h> 23 * Copyright (c) 1997, by Sun Microsystems, Inc. 24 * All rights reserved. 25 */ 26 27 28 /* 29 Converts From: ISO2022-CN-EXT encoding. 30 Converts To: Taiwanese EUC encoding ( CNS11643 ) and big5 encoding 31 32 */ 33 34 #include "iso2022-cn.h" 35 36 /* Forward reference the functions constrained to the scope of this file */ 37 static int process_esc_seq(char, _iconv_st *); 38 static int ascii_to_euc(char, _iconv_st *, unsigned char **, size_t *); 39 static int iscns( _iconv_st * ); 40 41 42 extern int errno; 43 44 /* 45 * _icv_open: Called from iconv_open(). Allocates and initializes _iconv_st 46 * structure. Returns pointer to the structure as (void *). 47 */ 48 49 50 void * 51 _icv_open() 52 { 53 _iconv_st *st; 54 55 /* Allocate */ 56 if (( st = (_iconv_st *) malloc( sizeof( _iconv_st ))) == NULL ){ 57 errno = ENOMEM; 58 return ((void *) -1); 59 } 60 61 /* Initialize */ 62 st->Sfunc = SI; 63 st->SSfunc = NONE; 64 st->ESCstate = OFF; 65 st->firstbyte = True; 66 st->numsav = 0; 67 st->SOcharset = NULL; /* no default charset */ 68 st->SS2charset = NULL; /* no default charset */ 69 st->SS3charset = NULL; /* no default charset */ 70 st->nonidcount = 0; 71 st->_errno = 0; 72 73 /* Return struct */ 74 return ((void *) st); 75 } 76 77 78 79 /* 80 * _icv_close: Called from iconv_close(). Frees the _iconv_st structure as 81 * pointed by the argument. 82 */ 83 84 void 85 _icv_close(_iconv_st *st) 86 { 87 if (st == NULL ) 88 errno = EBADF; 89 else 90 free(st); 91 } 92 93 94 /* 95 * _icv_iconv: Called from iconv(). Does the convertion from ISO2022-CN-EXT 96 * to CNS11643 97 */ 98 /*======================================================= 99 * 100 * State machine for interpreting ISO2022-CN-EXT code 101 * 102 *======================================================= 103 * 104 * 105 *=======================================================*/ 106 107 size_t 108 iso2022_icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft, 109 unsigned char **outbuf, size_t *outbytesleft, int (*convert)() ) 110 { 111 112 int ret, n; 113 114 if (st == NULL) { 115 errno = EBADF; 116 return ((size_t) -1); 117 } 118 119 if ( inbuf == NULL || *inbuf == NULL || inbytesleft == NULL || 120 *inbytesleft <= 0 ) { /* Reset request */ 121 st->Sfunc = SI; 122 st->SSfunc = NONE; 123 st->ESCstate = OFF; 124 st->firstbyte = True; 125 st->numsav = 0; 126 st->SOcharset = NULL; 127 st->SS2charset = NULL; 128 st->SS3charset = NULL; 129 st->nonidcount = 0; 130 st->_errno = 0; 131 return ((size_t) 0); 132 } 133 134 st->_errno = 0; 135 errno = 0; 136 137 /* Before we use *inbytesleft or *outbytesleft we should confirm that 138 inbytesleft and outbytesleft are non-NULL. I am considering inbytesleft 139 or *inbytesleft having 0 or negative value as a reset request. I am 140 considering outbytesleft having 0 value as no space in output buffer. 141 Also, here itself I am verifying that outbuf and *outbuf should be non-NULL 142 pointers so I do not have to worry about them being NULL below in the 143 conversion sub-routines. I also confirm here that *outbytesleft should be 144 greater than 0 before we can continue further */ 145 146 if ( outbytesleft == NULL || *outbytesleft <= 0 || 147 outbuf == NULL || *outbuf == NULL ) { 148 errno = E2BIG; 149 return((size_t)-1); 150 } 151 152 /* A state machine to interpret ISO, driven by the shift functions SI, SO */ 153 154 do { 155 if (st->firstbyte == False) { /* Is SO, SS2, SS3 second byte */ 156 st->keepc[1] = **inbuf; 157 n = (*convert)( st, outbuf, outbytesleft, iscns(st) ); 158 if ( n < 0 ) 159 return((size_t)-1); /* Insufficient space in output buffer */ 160 else if ( n > 0 ){ /* No CNS for this Chinese code */ 161 n = ascii_to_euc(NON_ID_CHAR, st, outbuf, outbytesleft); 162 if ( n < 0 ) 163 return((size_t)-1); 164 st->nonidcount += 1; 165 } else 166 st->nonidcount -= 1; /* The first byte identified as 167 valid Chinese byte and is 168 processed */ 169 st->firstbyte = True; 170 st->SSfunc = NONE; /* If we just processed SS bytes, 171 this will reset SSfunc to NONE. If 172 we just processed SO bytes, this was 173 already NONE */ 174 } else if ( st->SSfunc != NONE ) { /* We are currently expecting 175 SS2 or SS3 Chinese bytes */ 176 st->keepc[0] = **inbuf; 177 st->nonidcount += 1; 178 st->firstbyte = False; 179 } else if ( **inbuf == ESC && st->ESCstate == OFF ) { 180 st->nonidcount += 1; /* For the ESC character */ 181 st->ESCstate = E0; 182 } else if ( st->ESCstate != OFF ) { /* Continue processing the 183 escape sequence */ 184 ret = process_esc_seq( **inbuf, st ); 185 if ( ret == DONE ) { /* ESC seq interpreted correctly. 186 Switch off the escape machine */ 187 st->ESCstate = OFF; 188 } else if ( ret == INVALID ){ 189 if (st->Sfunc == SI){ /* An invalid ESC sequence 190 encountered. Process 191 the text saved in 192 st->savbuf as ASCII. Switch 193 off the escape machine */ 194 n = ascii_to_euc( **inbuf, st, outbuf, outbytesleft ); 195 if ( n < 0 ) /* Insufficient space in output buffer */ 196 return((size_t)-1); 197 st->nonidcount -= st->numsav; /* Since invalid Esc 198 sequence is outputted 199 as ASCII */ 200 } else if (st->Sfunc == SO) { /* An invalid ESC sequence 201 encountered. Don't know 202 what to do. So flag 203 error illegal seq. It is 204 wise not to continue 205 processing input. Switch 206 off the escape machine */ 207 st->_errno = errno = EILSEQ; 208 st->nonidcount += 1; /* For this character */ 209 } 210 st->numsav = 0; /* Discard the saved characters of 211 invalid sequence */ 212 st->ESCstate = OFF; 213 } /* more char. needed for escape sequence */ 214 } else if (st->Sfunc == SI) { 215 /* Switch state to SO only if SOdesignation is set. */ 216 if ( **inbuf == SO && st->SOcharset != NULL ){ 217 st->Sfunc = SO; 218 } else { /* Is ASCII */ 219 n = ascii_to_euc(**inbuf, st, outbuf, outbytesleft ); 220 if ( n < 0 ) /* Insufficient space in output buffer */ 221 return((size_t)-1); 222 } 223 } else if (st->Sfunc == SO) { 224 if ( **inbuf == SI ){ /* Switch state to SO */ 225 st->Sfunc = SI; 226 } 227 else { 228 st->keepc[0] = **inbuf; 229 st->nonidcount += 1; 230 st->firstbyte = False; 231 } 232 } 233 else 234 fprintf(stderr, 235 "_icv_iconv():ISO-CN-EXT->CNS:Should never have come here\n"); 236 237 (*inbuf)++; 238 (*inbytesleft)--; 239 240 if ( st->_errno) 241 break; /* Break out of while loop */ 242 243 if (errno) /* We set st->_errno before we set errno. If errno is set 244 somewhere else we handle that here */ 245 return((size_t)-1); 246 247 } while (*inbytesleft > 0 && *outbytesleft > 0); 248 249 250 /* We now have to handle the case where we have successfully processed the 251 previous input character which exhausted the output buffer. This is handled 252 by the while loop. However, since there are more input characters that 253 haven't been processed yet, we need to set the errno appropriately and 254 return -1. */ 255 if ( *inbytesleft > 0 && *outbytesleft == 0) { 256 errno = E2BIG; 257 return((size_t)-1); 258 } 259 return (*inbytesleft + st->nonidcount); 260 } 261 262 263 static int 264 process_esc_seq( char c, _iconv_st *st ) 265 { 266 267 switch(st->ESCstate){ 268 case E0: 269 switch (c){ 270 case SS2LOW: 271 if ( st->SS2charset == NULL ){ 272 /* We do not expect SS2 shift function before 273 SS2 designation is set */ 274 st->savbuf[0] = ESC; 275 st->numsav = 1; 276 return(INVALID); 277 } 278 st->SSfunc = SS2; 279 /* Since valid ESC sequence remove the ESC from the 280 nonidcount */ 281 st->nonidcount -= 1; 282 return(DONE); 283 case SS3LOW: 284 if ( st->SS3charset == NULL ){ 285 /* We do not expect SS3 shift function before 286 SS3 designation is set */ 287 st->savbuf[0] = ESC; 288 st->numsav = 1; 289 return(INVALID); 290 } 291 st->SSfunc = SS3; 292 /* Since valid ESC sequence remove the ESC from the 293 nonidcount */ 294 st->nonidcount -= 1; 295 return(DONE); 296 case '$': 297 st->nonidcount += 1; /* ESC sequence not complete yet */ 298 st->ESCstate = E1; 299 return(NEEDMORE); 300 default: 301 st->savbuf[0] = ESC; 302 st->numsav = 1; 303 return(INVALID); 304 } /* end switch */ 305 306 307 case E1: 308 switch (c){ 309 case ')': 310 st->nonidcount += 1; /* ESC sequence not complete yet */ 311 st->ESCstate = E2; 312 return(NEEDMORE); 313 case '*': 314 st->nonidcount += 1; /* ESC sequence not complete yet */ 315 st->ESCstate = E3; 316 return(NEEDMORE); 317 case '+': 318 st->nonidcount += 1; /* ESC sequence not complete yet */ 319 st->ESCstate = E4; 320 return(NEEDMORE); 321 default: 322 st->savbuf[0] = ESC; 323 st->savbuf[1] = '$'; 324 st->numsav = 2; 325 return(INVALID); 326 } 327 328 case E2: 329 st->SOcharset = c; 330 /* Since valid ESC sequence remove decriment nonidcount 331 appropriately for all earlier characters in escape sequence */ 332 st->nonidcount -= 3; 333 return(DONE); 334 335 case E3: 336 st->SS2charset = c; 337 /* Since valid ESC sequence remove decriment nonidcount 338 appropriately for all earlier characters in escape sequence */ 339 st->nonidcount -= 3; 340 return(DONE); 341 342 case E4: 343 st->SS3charset = c; 344 /* Since valid ESC sequence remove decriment nonidcount 345 appropriately for all earlier characters in escape sequence */ 346 st->nonidcount -= 3; 347 return(DONE); 348 349 default: 350 fprintf(stderr, 351 "process_esc_seq():ISO-CN-EXT->CNS:Should never have come here\n"); 352 st->_errno = errno = EILSEQ; 353 return(DONE); 354 355 } /* end switch */ 356 } 357 358 359 static int 360 ascii_to_euc( char c, _iconv_st *st, unsigned char **outbuf, size_t *outbytesleft ) 361 { 362 363 int i; 364 365 if ( *outbytesleft < (1 + st->numsav) ) { 366 st->_errno = errno = E2BIG; 367 return (-1); 368 } 369 370 for ( i=0; i < st->numsav; i++ ) { 371 *(*outbuf)++ = (unsigned char) st->savbuf[i]; 372 (*outbytesleft)--; 373 } 374 375 *(*outbuf)++ = (unsigned char) c; 376 (*outbytesleft)--; 377 378 return(0); 379 } 380 381 382 static int 383 iscns( _iconv_st *st ) 384 { 385 int plane_no = -1; 386 387 if ( st->SSfunc == NULL && st->SOcharset == 'G' ) 388 plane_no = 1; 389 else if ( st->SSfunc == SS2 && st->SS2charset == 'H' ) 390 plane_no = 2; 391 else if ( st->SSfunc == SS3 ) 392 switch ( st->SS3charset ){ 393 case 'I': plane_no = 3; break; 394 case 'J': plane_no = 4; break; 395 case 'K': plane_no = 5; break; 396 case 'L': plane_no = 6; break; 397 case 'M': plane_no = 7; break; 398 } 399 return (plane_no); 400 } 401