1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1994 by Sun Microsystems, Inc. 23 */ 24 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <errno.h> 28 #include <gb2312_unicode.h> 29 30 #define MSB 0x80 31 32 #define UTF8_NON_ID_CHAR1 0xEF 33 #define UTF8_NON_ID_CHAR2 0xBF 34 #define UTF8_NON_ID_CHAR3 0xBD 35 36 37 enum _GSTATE { G0, G1, G2, G3, G4, G5}; 38 39 40 typedef struct _icv_state { 41 char _lastc; 42 short _gstate; 43 } _iconv_st; 44 45 int 46 hz2utf8(char in_byte1, char in_byte2, char *buf, int buflen); 47 48 /* 49 * Open; called from iconv_open() 50 */ 51 void * 52 _icv_open() 53 { 54 _iconv_st *st; 55 56 if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) { 57 errno = ENOMEM; 58 return ((void *) -1); 59 } 60 61 st->_gstate = G0; 62 return ((void *)st); 63 } 64 65 66 /* 67 * Close; called from iconv_close() 68 */ 69 void 70 _icv_close(_iconv_st *st) 71 { 72 if (st == NULL) 73 errno = EBADF; 74 else 75 free(st); 76 } 77 78 79 /* 80 * Actual conversion; called from iconv() 81 */ 82 /*======================================================================= 83 * 84 * ~ { Chinese 85 * +-> G0 -----> G1 ----> G2 ----> G3 86 * | | ascii | ascii |~} | 87 * +----------------------+--------+ 88 *=======================================================================*/ 89 size_t 90 _icv_iconv(_iconv_st *st, char **inbuf, size_t*inbytesleft, 91 char **outbuf, size_t*outbytesleft) 92 { 93 int n; 94 95 if (st == NULL) { 96 errno = EBADF; 97 return -1; 98 } 99 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */ 100 st->_gstate = G0; 101 return 0; 102 } 103 104 errno = 0; 105 106 while (*inbytesleft > 0 && *outbytesleft > 0) { 107 switch (st->_gstate) { 108 case G0: 109 if ( **inbuf == '~' ) { 110 st->_gstate = G1; 111 } else if (((**inbuf) & MSB) == 0) { /* ASCII */ 112 **outbuf = **inbuf; 113 (*outbuf)++, (*outbytesleft)--; 114 } 115 break; 116 case G1: 117 if ( **inbuf == '{' ) { 118 st->_gstate = G2; 119 } else if (**inbuf == '\n') { 120 st->_gstate = G0; 121 } else if (**inbuf == '~') { 122 **outbuf = '~'; 123 (*outbuf)++, (*outbytesleft)--; 124 st->_gstate = G0; 125 } else { 126 errno = EINVAL; 127 } 128 break; 129 case G2: 130 if ( **inbuf == '~' ) { 131 st->_gstate = G4; 132 } else { 133 st->_lastc = **inbuf; 134 st->_gstate = G3; 135 } 136 break; 137 case G3: 138 n = hz2utf8(st->_lastc, **inbuf, *outbuf, *outbytesleft); 139 if (n > 0) { 140 (*outbuf) += n, (*outbytesleft) -= n; 141 } else { 142 errno = E2BIG; 143 return (size_t)-1; 144 } 145 st->_gstate = G2; 146 break; 147 case G4: 148 if ( **inbuf == '}' ) { 149 st->_gstate = G0; 150 } else if (**inbuf == '\n') { 151 st->_gstate = G2; 152 continue; 153 } else { 154 errno = EINVAL; 155 } 156 157 break; 158 } 159 160 (*inbuf)++, (*inbytesleft)--; 161 if (errno) 162 { 163 return (size_t)-1; 164 } 165 } 166 167 if (*inbytesleft > 0 && *outbytesleft == 0) { 168 errno = E2BIG; 169 return (size_t)-1; 170 } 171 return ((size_t)(*inbytesleft)); 172 } 173 174 175 int 176 hz2utf8(in_byte1, in_byte2, buf, buflen) 177 char in_byte1, in_byte2; 178 char *buf; 179 int buflen; 180 { 181 182 int idx; 183 int unicode; 184 185 if ( buflen < 2 ) 186 return 0; 187 in_byte1 |= MSB; 188 in_byte2 |= MSB; 189 190 191 idx = (((in_byte1 & 0xff) - 0xa1) * 94) + (in_byte2 & 0xff) - 0xa1; 192 if (idx >= 0) { 193 unicode = Unicode[idx]; 194 if (unicode >= 0x0080 && unicode <= 0x07ff) { 195 if ( buflen < 2 ) 196 return 0; 197 *buf = ((unicode >> 6) & 0x1f) | 0xc0; 198 *(buf+1) = (unicode & 0x3f) | MSB; 199 return 2; 200 } 201 if (unicode >= 0x0800 && unicode <= 0xffff) { 202 if ( buflen < 3 ) 203 return 0; 204 *buf = ((unicode >> 12) & 0x0f) | 0xe0; 205 *(buf+1) = ((unicode >> 6) & 0x3f) | MSB; 206 *(buf+2) = (unicode & 0x3f) | MSB; 207 return 3; 208 } 209 } 210 if ( buflen < 3 ) 211 return 0; 212 213 *buf = UTF8_NON_ID_CHAR1; 214 *(buf+1) = UTF8_NON_ID_CHAR2; 215 *(buf+2) = UTF8_NON_ID_CHAR3; 216 return 3; 217 } 218