1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1995, by Sun Microsystems, Inc.
24 * All rights reserved.
25 */
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <errno.h>
30 #include "big5_cns11643.h" /* Big-5 to CNS 11643 mapping table */
31
32 #define MSB 0x80 /* most significant bit */
33 #define MBYTE 0x8e /* multi-byte (4 byte character) */
34 #define PMASK 0xa0 /* plane number mask */
35 #define ONEBYTE 0xff /* right most byte */
36 #define MSB_OFF 0x7f /* mask off MSB */
37
38 #define SI 0x0f /* shift in */
39 #define SO 0x0e /* shift out */
40 #define ESC 0x1b /* escape */
41
42 /* static const char plane_char[] = "0GH23456789:;<=>?"; */
43 static const char plane_char[] = "0GHIJKLMNOPQRSTUV";
44
45 #define GET_PLANEC(i) (plane_char[i])
46
47 #define NON_ID_CHAR '_' /* non-identified character */
48
49 typedef struct _icv_state {
50 char keepc[2]; /* maximum # byte of Big-5 code */
51 short cstate; /* state machine id (Big-5) */
52 short istate; /* state machine id (ISO) */
53 int _errno; /* internal errno */
54 } _iconv_st;
55
56 enum _CSTATE { C0, C1 };
57 enum _ISTATE { IN, OUT };
58
59
60 static int big5_2nd_byte(char);
61 static int get_plane_no_by_big5(const char, const char, int*, unsigned long*);
62 static int big5_to_iso(int, int, unsigned long, char*, size_t);
63 static int binsearch(unsigned long, table_t[], int);
64
65
66 /*
67 * Open; called from iconv_open()
68 */
69 void *
_icv_open()70 _icv_open()
71 {
72 _iconv_st *st;
73
74 if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
75 errno = ENOMEM;
76 return ((void *) -1);
77 }
78
79 st->cstate = C0;
80 st->istate = IN;
81 st->_errno = 0;
82
83 #ifdef DEBUG
84 fprintf(stderr, "========== iconv(): Big-5 --> ISO 2022-7 ==========\n");
85 #endif
86 return ((void *) st);
87 }
88
89
90 /*
91 * Close; called from iconv_close()
92 */
93 void
_icv_close(_iconv_st * st)94 _icv_close(_iconv_st *st)
95 {
96 if (!st)
97 errno = EBADF;
98 else
99 free(st);
100 }
101
102
103 /*
104 * Actual conversion; called from iconv()
105 */
106 /*=======================================================
107 *
108 * State Machine for interpreting Big-5 code
109 *
110 *=======================================================
111 *
112 * 1st C
113 * +--------> C0 ----------> C1
114 * | ascii | 2nd C |
115 * ^ v v
116 * +----<-----+-----<--------+
117 *
118 *=======================================================*/
119 /*
120 * Big-5 encoding range:
121 * High byte: 0xA1 - 0xFE ( 94 encoding space)
122 * Low byte: 0x40 - 0x7E, 0xA1 - 0xFE ( 157 encoding space)
123 * Plane #1: 0xA140 - 0xC8FE ( 6280 encoding space)
124 * Plane #2: 0xC940 - 0xFEFE ( 8478 encoding space)
125 * Total: 94 * 157 = 14,758 (14758 encoding space)
126 */
127 size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)128 _icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
129 char **outbuf, size_t *outbytesleft)
130 {
131 int plane_no, n, unidx;
132 unsigned long cnscode;
133 /* pre_plane_no: need to be static when re-entry occurs on errno set */
134 static int pre_plane_no = -1; /* previous plane number */
135
136 if (st == NULL) {
137 errno = EBADF;
138 return ((size_t) -1);
139 }
140
141 if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
142 st->cstate = C0;
143 st->istate = IN;
144 st->_errno = 0;
145 return ((size_t) 0);
146 }
147
148 #ifdef DEBUG
149 fprintf(stderr, "=== (Re-entry) iconv(): Big-5 --> ISO 2022-7 ===\n");
150 fprintf(stderr, "st->cstate=%d\tst->istate=%d\tst->_errno=%d\tplane_no=%d\n",
151 st->cstate, st->istate, st->_errno, plane_no);
152 #endif
153 st->_errno = 0; /* reset internal errno */
154 errno = 0; /* reset external errno */
155
156 /* a state machine for interpreting Big-5 code */
157 while (*inbytesleft > 0 && *outbytesleft > 0) {
158 switch (st->cstate) {
159 case C0: /* assuming ASCII in the beginning */
160 if (**inbuf & MSB) {
161 st->keepc[0] = (**inbuf);
162 st->cstate = C1;
163 } else { /* real ASCII */
164 if (st->istate == OUT) {
165 st->cstate = C0;
166 st->istate = IN;
167 **outbuf = SI;
168 (*outbuf)++;
169 (*outbytesleft)--;
170 if (*outbytesleft <= 0) {
171 errno = E2BIG;
172 return((size_t)-1);
173 }
174 }
175 **outbuf = **inbuf;
176 (*outbuf)++;
177 (*outbytesleft)--;
178 }
179 break;
180 case C1: /* Chinese characters: 2nd byte */
181 if (big5_2nd_byte(**inbuf) != 0) { /* illegal Big-5 */
182 st->cstate = C0;
183 st->istate = IN;
184 st->_errno = errno = EILSEQ;
185 break;
186 }
187 st->keepc[1] = (**inbuf);
188 plane_no = get_plane_no_by_big5(st->keepc[0],
189 st->keepc[1], &unidx, &cnscode);
190 if (plane_no < 0) { /* legal Big-5; illegal CNS */
191 st->cstate = C0;
192 st->istate = IN;
193 st->_errno = errno = EILSEQ;
194 break;
195 }
196
197 if ((st->istate == IN) || (pre_plane_no != plane_no)) {
198 /* change plane # in Chinese mode */
199 if (st->istate == OUT) {
200 **outbuf = SI;
201 (*outbuf)++;
202 (*outbytesleft)--;
203 #ifdef DEBUG
204 fprintf(stderr, "(plane #=%d\tpre_plane #=%d)\t", plane_no, pre_plane_no);
205 #endif
206 }
207 if (*outbytesleft < 4) {
208 st->_errno = errno = E2BIG;
209 return((size_t)-1);
210 }
211 pre_plane_no = plane_no;
212 st->istate = OUT; /* shift out */
213 **outbuf = ESC;
214 *(*outbuf+1) = '$';
215 *(*outbuf+2) = ')';
216 *(*outbuf+3) = GET_PLANEC(plane_no);
217 #ifdef DEBUG
218 fprintf(stderr, "ESC $ ) %c ", *(*outbuf+3));
219 #endif
220 (*outbuf) += 4;
221 (*outbytesleft) -= 4;
222 if (*outbytesleft <= 0) {
223 st->_errno = errno = E2BIG;
224 return((size_t)-1);
225 }
226 st->istate = OUT;
227 **outbuf = SO;
228 (*outbuf)++;
229 (*outbytesleft)--;
230 }
231 n = big5_to_iso(plane_no, unidx, cnscode,
232 *outbuf, *outbytesleft);
233 if (n > 0) {
234 (*outbuf) += n;
235 (*outbytesleft) -= n;
236 } else {
237 st->_errno = errno;
238 return((size_t)-1);
239 }
240 st->cstate = C0;
241 break;
242 default: /* should never come here */
243 st->_errno = errno = EILSEQ;
244 st->cstate = C0; /* reset state */
245 break;
246 }
247
248 (*inbuf)++;
249 (*inbytesleft)--;
250
251 if (st->_errno) {
252 #ifdef DEBUG
253 fprintf(stderr, "!!!!!\tst->_errno = %d\tst->cstate = %d\n",
254 st->_errno, st->cstate);
255 #endif
256 break;
257 }
258 if (errno)
259 return((size_t)-1);
260 }
261
262 if (*inbytesleft > 0 && *outbytesleft == 0) {
263 errno = E2BIG;
264 return((size_t)-1);
265 }
266 return (*inbytesleft);
267 }
268
269
270 /*
271 * Test whether inbuf is a valid character for 2nd byte Big-5 code
272 * Return: = 0 - valid Big-5 2nd byte
273 * = 1 - invalid Big-5 2nd byte
274 */
big5_2nd_byte(char inbuf)275 static int big5_2nd_byte(char inbuf)
276 {
277 unsigned int buf = (unsigned int) (inbuf & ONEBYTE);
278
279 if ((buf >= 0x40) && (buf <= 0x7E))
280 return (0);
281 if ((buf >= 0xA1) && (buf <= 0xFE))
282 return (0);
283 return(1);
284 }
285
286
287 /*
288 * Get plane number by Big-5 code; i.e. plane #1 returns 1, #2 returns 2, etc.
289 * Returns -1 on error conditions
290 *
291 * Since binary search of the Big-5 to CNS table is necessary, might as well
292 * return index and CNS code matching to the unicode.
293 */
get_plane_no_by_big5(const char c1,const char c2,int * unidx,unsigned long * cnscode)294 static int get_plane_no_by_big5(const char c1, const char c2,
295 int *unidx, unsigned long *cnscode)
296 {
297 int ret;
298 unsigned long big5code;
299
300 big5code = (unsigned long) ((c1 & ONEBYTE) << 8) + (c2 & ONEBYTE);
301 *unidx = binsearch(big5code, big5_cns_tab, MAX_BIG5_NUM);
302 if ((*unidx) >= 0)
303 *cnscode = big5_cns_tab[*unidx].value;
304 else
305 return(0); /* match from Big-5 to CNS not found */
306 #ifdef DEBUG
307 fprintf(stderr, "Big-5=%04x, idx=%5d, CNS=%06x ", big5code, *unidx, *cnscode);
308 #endif
309
310 ret = (int) (*cnscode >> 16);
311 switch (ret) {
312 case 0x21: /* 0x8EA1 - G */
313 case 0x22: /* 0x8EA2 - H */
314 case 0x23: /* 0x8EA3 - I */
315 case 0x24: /* 0x8EA4 - J */
316 case 0x25: /* 0x8EA5 - K */
317 case 0x26: /* 0x8EA6 - L */
318 case 0x27: /* 0x8EA7 - M */
319 case 0x28: /* 0x8EA8 - N */
320 case 0x29: /* 0x8EA9 - O */
321 case 0x2a: /* 0x8EAA - P */
322 case 0x2b: /* 0x8EAB - Q */
323 case 0x2c: /* 0x8EAC - R */
324 case 0x2d: /* 0x8EAD - S */
325 case 0x2f: /* 0x8EAF - U */
326 case 0x30: /* 0x8EB0 - V */
327 return (ret - 0x20); /* so that we can use GET_PLANEC() */
328 case 0x2e: /* 0x8EAE - T */
329 return (3); /* CNS 11643-1992 */
330 default:
331 return (-1);
332 }
333 }
334
335
336 /*
337 * Big-5 code --> ISO 2022-7
338 * Return: > 0 - converted with enough space in output buffer
339 * = 0 - no space in outbuf
340 */
big5_to_iso(int plane_no,int unidx,unsigned long cnscode,char * buf,size_t buflen)341 static int big5_to_iso(int plane_no, int unidx, unsigned long cnscode,
342 char *buf, size_t buflen)
343 {
344 unsigned long val; /* CNS 11643 value */
345 #ifdef DEBUG
346 char cns_str[5];
347 #endif
348
349 if (buflen < 2) {
350 errno = E2BIG;
351 return(0);
352 }
353
354 if (unidx < 0) { /* no match from UTF8 to CNS 11643 */
355 *buf = *(buf+1) = NON_ID_CHAR;
356 } else {
357 val = cnscode & 0xffff;
358 *buf = (val & 0xff00) >> 8;
359 *(buf+1) = val & 0xff;
360 }
361
362 #ifdef DEBUG
363 fprintf(stderr, "->%02x %02x<-\t->%c %c<-\t", *buf, *(buf+1), *buf, *(buf+1));
364 #endif
365
366 #ifdef DEBUG
367 switch (plane_no) {
368 case 1:
369 cns_str[0] = *buf | MSB;
370 cns_str[1] = *(buf+1) | MSB;
371 cns_str[2] = cns_str[3] = cns_str[4] = NULL;
372 break;
373 case 2:
374 case 3:
375 case 4:
376 case 5:
377 case 6:
378 case 7:
379 case 8:
380 case 9:
381 case 10:
382 case 11:
383 case 12:
384 case 13:
385 case 14:
386 case 15:
387 case 16:
388 cns_str[0] = MBYTE;
389 cns_str[1] = (char) PMASK + plane_no;
390 cns_str[2] = (char) *buf | MSB;
391 cns_str[3] = (char) *(buf+1) | MSB;
392 cns_str[4] = NULL;
393 break;
394 }
395
396 fprintf(stderr, "#%d ->%s<-\n", plane_no, cns_str);
397 #endif
398
399 return(2);
400 }
401
402
403 /* binsearch: find x in v[0] <= v[1] <= ... <= v[n-1] */
binsearch(unsigned long x,table_t v[],int n)404 static int binsearch(unsigned long x, table_t v[], int n)
405 {
406 int low, high, mid;
407
408 low = 0;
409 high = n - 1;
410 while (low <= high) {
411 mid = (low + high) / 2;
412 if (x < v[mid].key)
413 high = mid - 1;
414 else if (x > v[mid].key)
415 low = mid + 1;
416 else /* found match */
417 return mid;
418 }
419 return (-1); /* no match */
420 }
421