1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright(c) 1997, Sun Microsystems, Inc. 23 * All rights reserved. 24 */ 25 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <errno.h> 29 #include <gb18030_big5p.h> 30 31 #define NON_ID_CHAR '_' /* non-identified character */ 32 #define MSB 0x80 33 #define ONEBYTE 0xff 34 35 #define gbk4_2nd_byte(v) ((v) >= 0x30 && (v) <= 0x39) 36 #define gbk4_3rd_byte(v) ((v) >= 0x81 && (v) <= 0xfe) 37 #define gbk4_4th_byte(v) gbk4_2nd_byte(v) 38 39 int binsearch(unsigned long x, table_t table[], int n); 40 int gbk_2nd_byte(char inbuf); 41 int gbk_to_big5p(char keepc[], char *buf, size_t buflen); 42 43 typedef struct _icv_state { 44 char keepc[2]; /* maximum # byte of GB chararor */ 45 short cstate; 46 int _errno; /* internal errno */ 47 } _iconv_st; 48 49 enum _CSTATE { C0, C1, C2, C3 }; 50 51 52 /* 53 * Open; called from iconv_open() 54 */ 55 void * _icv_open() { 56 _iconv_st * st; 57 58 if ((st = (_iconv_st *) malloc(sizeof(_iconv_st))) == NULL) { 59 errno = ENOMEM; 60 return ((void *) -1); 61 } 62 63 st->cstate = C0; 64 st->_errno = 0; 65 66 return ((void *) st); 67 } 68 69 /* 70 * Close; called from iconv_close() 71 */ 72 void _icv_close(_iconv_st * st) { 73 if (!st) 74 errno = EBADF; 75 else 76 free(st); 77 } 78 79 /* 80 * Actual conversion; called from iconv() 81 */ 82 83 size_t _icv_iconv(_iconv_st * st, char **inbuf, size_t *inbytesleft, 84 char ** outbuf, size_t *outbytesleft) { 85 int n; 86 if (st == NULL) { 87 errno = EBADF; 88 return ((size_t) -1); 89 } 90 91 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */ 92 st->cstate = C0; 93 st->_errno = 0; 94 return ((size_t) 0); 95 } 96 97 errno = st->_errno = 0; 98 99 while (*inbytesleft > 0 && *outbytesleft > 0) { 100 switch (st->cstate) { 101 case C0: 102 if (**inbuf & MSB) { /* gb2312 charactor */ 103 st->keepc[0] = (**inbuf); 104 st->cstate = C1; 105 } else { /* ASCII */ 106 **outbuf = **inbuf; 107 (*outbuf)++; 108 (*outbytesleft)--; 109 } 110 break; 111 case C1: /* GBK charactor 2nd byte */ 112 if (gbk_2nd_byte(**inbuf) == 0) { 113 st->keepc[1] = (**inbuf); 114 n = gbk_to_big5p(st->keepc, *outbuf, *outbytesleft); 115 if (n > 0) { 116 (*outbuf) += n; 117 (*outbytesleft) -= n; 118 119 st->cstate = C0; 120 } else { 121 st->_errno = errno = E2BIG; 122 } 123 } else if ( gbk4_2nd_byte((unsigned char)**inbuf) ) { 124 st->cstate = C2; 125 } else { /* illegal input */ 126 st->_errno = errno = EILSEQ; 127 } 128 break; 129 case C2: 130 if ( gbk4_3rd_byte((unsigned char)**inbuf) ) 131 st->cstate = C3; 132 else 133 st->_errno = errno = EILSEQ; 134 break; 135 case C3: 136 if ( gbk4_4th_byte((unsigned char)**inbuf)) { 137 138 /* replace the four-bytes character with __ in outbuf 139 * since there wouldn't have corresponding code in BIG5P 140 */ 141 if ( *outbytesleft < 2 ) st->_errno = errno = E2BIG; 142 else { 143 **outbuf = *((*outbuf)+1) = (char)NON_ID_CHAR; 144 *outbuf += 2; 145 *outbytesleft -= 2; 146 147 st->cstate = C0; 148 } 149 } 150 else 151 st->_errno = errno = EILSEQ; 152 break; 153 default: /* un-reachable */ 154 st->_errno = errno = EILSEQ; 155 st->cstate = C0; 156 break; 157 } 158 159 if (st->_errno) break; 160 161 (*inbuf)++; 162 (*inbytesleft)--; 163 } 164 165 if (errno) return ((size_t) -1); 166 167 if (*inbytesleft == 0 && st->cstate != C0) { 168 errno = EINVAL; 169 return ((size_t) -1); 170 } 171 172 if (*inbytesleft > 0 && *outbytesleft == 0) { 173 errno = E2BIG; 174 return (size_t)-1; 175 } 176 177 return (size_t)(*inbytesleft); 178 } 179 180 /* 181 * Test whether inbuf is a valid character for 182 * 2nd byte of GB2312 charactor: 183 * Return: 0 --- valid GBK 2nd byte 184 * 1 --- invalid GBK 2nd byte 185 */ 186 int gbk_2nd_byte(inbuf) 187 char inbuf; 188 { 189 190 unsigned int buf = (unsigned int) (inbuf & ONEBYTE); 191 192 if ((buf >= 0x40) && (buf <= 0x7e)) 193 return 0; 194 if ((buf >= 0x80) && (buf <= 0xfe)) 195 return 0; 196 return 1; 197 } 198 199 /* 200 * gbk_to_big5p: Convert gbk charactor to Big5p. 201 * Return: >0 --- converted with enough space in output buffer 202 * =0 --- no space in outbuf 203 */ 204 205 int gbk_to_big5p(char keepc[], char *buf, size_t buflen) { 206 207 unsigned long gbk_val; /* GBK value */ 208 int index; 209 unsigned long big5_val; /* BIG5 value */ 210 211 if (buflen < 2) { 212 errno = E2BIG; 213 return 0; 214 } 215 216 gbk_val = ((keepc[0] & ONEBYTE) << 8) + (keepc[1] & ONEBYTE); 217 index = binsearch(gbk_val, gbk_big5p_tab, BIG5MAX); 218 if (index >= 0) { 219 big5_val = gbk_big5p_tab[index].value; 220 *buf = (big5_val >> 8) & ONEBYTE; 221 *(buf + 1) = big5_val & ONEBYTE; 222 } else 223 *buf = *(buf + 1) = (char)NON_ID_CHAR; 224 return 2; 225 } 226 227 /* 228 * binsearch() 229 */ 230 int binsearch(unsigned long x, table_t table[], int n) { 231 int low, high, mid; 232 233 low = 0; 234 high = n - 1; 235 while (low <= high) { 236 mid = (low + high) >> 1; 237 if (x < table[mid].key) 238 high = mid - 1; 239 else if (x > table[mid].key) 240 low = mid + 1; 241 else 242 return mid; 243 } 244 return -1; 245 } 246 247 #ifdef DEBUG 248 main(int argc, char * argv[]) { 249 _iconv_st * ist; 250 char * inbuf = "�������е�ÿһ�������һ���Ѱ�װ��ע����������ʾ�� ��Ʒϵ�� ��"; 251 char * outbuf; 252 char * ib, * oub; 253 int inbyteleft; 254 int outbyteleft; 255 256 ist = (_iconv_st *) _icv_open(); 257 inbyteleft = outbyteleft = 2 * strlen(inbuf); 258 outbuf = (char *)malloc(outbyteleft); 259 ib = inbuf; 260 oub = outbuf; 261 _icv_iconv(ist, &inbuf, &inbyteleft, &outbuf, &outbyteleft); 262 printf("IN -- %s\n", ib); 263 printf("OUT -- %s\n", oub); 264 } 265 #endif 266