1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 #include <stdlib.h>
23 * Copyright (c) 1997, by Sun Microsystems, Inc.
24 * All rights reserved.
25 */
26
27
28 /*
29 Converts From: ISO2022-CN-EXT encoding.
30 Converts To: Taiwanese EUC encoding ( CNS11643 ) and big5 encoding
31
32 */
33
34 #include "iso2022-cn.h"
35
36 /* Forward reference the functions constrained to the scope of this file */
37 static int process_esc_seq(char, _iconv_st *);
38 static int ascii_to_euc(char, _iconv_st *, unsigned char **, size_t *);
39 static int iscns( _iconv_st * );
40
41
42 extern int errno;
43
44 /*
45 * _icv_open: Called from iconv_open(). Allocates and initializes _iconv_st
46 * structure. Returns pointer to the structure as (void *).
47 */
48
49
50 void *
_icv_open()51 _icv_open()
52 {
53 _iconv_st *st;
54
55 /* Allocate */
56 if (( st = (_iconv_st *) malloc( sizeof( _iconv_st ))) == NULL ){
57 errno = ENOMEM;
58 return ((void *) -1);
59 }
60
61 /* Initialize */
62 st->Sfunc = SI;
63 st->SSfunc = NONE;
64 st->ESCstate = OFF;
65 st->firstbyte = True;
66 st->numsav = 0;
67 st->SOcharset = 0; /* no default charset */
68 st->SS2charset = 0; /* no default charset */
69 st->SS3charset = 0; /* no default charset */
70 st->nonidcount = 0;
71 st->_errno = 0;
72
73 /* Return struct */
74 return ((void *) st);
75 }
76
77
78
79 /*
80 * _icv_close: Called from iconv_close(). Frees the _iconv_st structure as
81 * pointed by the argument.
82 */
83
84 void
_icv_close(_iconv_st * st)85 _icv_close(_iconv_st *st)
86 {
87 if (st == NULL )
88 errno = EBADF;
89 else
90 free(st);
91 }
92
93
94 /*
95 * _icv_iconv: Called from iconv(). Does the convertion from ISO2022-CN-EXT
96 * to CNS11643
97 */
98 /*=======================================================
99 *
100 * State machine for interpreting ISO2022-CN-EXT code
101 *
102 *=======================================================
103 *
104 *
105 *=======================================================*/
106
107 size_t
iso2022_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,unsigned char ** outbuf,size_t * outbytesleft,int (* convert)())108 iso2022_icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
109 unsigned char **outbuf, size_t *outbytesleft, int (*convert)() )
110 {
111
112 int ret, n;
113
114 if (st == NULL) {
115 errno = EBADF;
116 return ((size_t) -1);
117 }
118
119 if ( inbuf == NULL || *inbuf == NULL || inbytesleft == NULL ||
120 *inbytesleft <= 0 ) { /* Reset request */
121 st->Sfunc = SI;
122 st->SSfunc = NONE;
123 st->ESCstate = OFF;
124 st->firstbyte = True;
125 st->numsav = 0;
126 st->SOcharset = 0;
127 st->SS2charset = 0;
128 st->SS3charset = 0;
129 st->nonidcount = 0;
130 st->_errno = 0;
131 return ((size_t) 0);
132 }
133
134 st->_errno = 0;
135 errno = 0;
136
137 /* Before we use *inbytesleft or *outbytesleft we should confirm that
138 inbytesleft and outbytesleft are non-NULL. I am considering inbytesleft
139 or *inbytesleft having 0 or negative value as a reset request. I am
140 considering outbytesleft having 0 value as no space in output buffer.
141 Also, here itself I am verifying that outbuf and *outbuf should be non-NULL
142 pointers so I do not have to worry about them being NULL below in the
143 conversion sub-routines. I also confirm here that *outbytesleft should be
144 greater than 0 before we can continue further */
145
146 if ( outbytesleft == NULL || *outbytesleft <= 0 ||
147 outbuf == NULL || *outbuf == NULL ) {
148 errno = E2BIG;
149 return((size_t)-1);
150 }
151
152 /* A state machine to interpret ISO, driven by the shift functions SI, SO */
153
154 do {
155 if (st->firstbyte == False) { /* Is SO, SS2, SS3 second byte */
156 st->keepc[1] = **inbuf;
157 n = (*convert)( st, outbuf, outbytesleft, iscns(st) );
158 if ( n < 0 )
159 return((size_t)-1); /* Insufficient space in output buffer */
160 else if ( n > 0 ){ /* No CNS for this Chinese code */
161 n = ascii_to_euc(NON_ID_CHAR, st, outbuf, outbytesleft);
162 if ( n < 0 )
163 return((size_t)-1);
164 st->nonidcount += 1;
165 } else
166 st->nonidcount -= 1; /* The first byte identified as
167 valid Chinese byte and is
168 processed */
169 st->firstbyte = True;
170 st->SSfunc = NONE; /* If we just processed SS bytes,
171 this will reset SSfunc to NONE. If
172 we just processed SO bytes, this was
173 already NONE */
174 } else if ( st->SSfunc != NONE ) { /* We are currently expecting
175 SS2 or SS3 Chinese bytes */
176 st->keepc[0] = **inbuf;
177 st->nonidcount += 1;
178 st->firstbyte = False;
179 } else if ( **inbuf == ESC && st->ESCstate == OFF ) {
180 st->nonidcount += 1; /* For the ESC character */
181 st->ESCstate = E0;
182 } else if ( st->ESCstate != OFF ) { /* Continue processing the
183 escape sequence */
184 ret = process_esc_seq( **inbuf, st );
185 if ( ret == DONE ) { /* ESC seq interpreted correctly.
186 Switch off the escape machine */
187 st->ESCstate = OFF;
188 } else if ( ret == INVALID ){
189 if (st->Sfunc == SI){ /* An invalid ESC sequence
190 encountered. Process
191 the text saved in
192 st->savbuf as ASCII. Switch
193 off the escape machine */
194 n = ascii_to_euc( **inbuf, st, outbuf, outbytesleft );
195 if ( n < 0 ) /* Insufficient space in output buffer */
196 return((size_t)-1);
197 st->nonidcount -= st->numsav; /* Since invalid Esc
198 sequence is outputted
199 as ASCII */
200 } else if (st->Sfunc == SO) { /* An invalid ESC sequence
201 encountered. Don't know
202 what to do. So flag
203 error illegal seq. It is
204 wise not to continue
205 processing input. Switch
206 off the escape machine */
207 st->_errno = errno = EILSEQ;
208 st->nonidcount += 1; /* For this character */
209 }
210 st->numsav = 0; /* Discard the saved characters of
211 invalid sequence */
212 st->ESCstate = OFF;
213 } /* more char. needed for escape sequence */
214 } else if (st->Sfunc == SI) {
215 /* Switch state to SO only if SOdesignation is set. */
216 if ( **inbuf == SO && st->SOcharset != 0 ){
217 st->Sfunc = SO;
218 } else { /* Is ASCII */
219 n = ascii_to_euc(**inbuf, st, outbuf, outbytesleft );
220 if ( n < 0 ) /* Insufficient space in output buffer */
221 return((size_t)-1);
222 }
223 } else if (st->Sfunc == SO) {
224 if ( **inbuf == SI ){ /* Switch state to SO */
225 st->Sfunc = SI;
226 }
227 else {
228 st->keepc[0] = **inbuf;
229 st->nonidcount += 1;
230 st->firstbyte = False;
231 }
232 }
233 else
234 fprintf(stderr,
235 "_icv_iconv():ISO-CN-EXT->CNS:Should never have come here\n");
236
237 (*inbuf)++;
238 (*inbytesleft)--;
239
240 if ( st->_errno)
241 break; /* Break out of while loop */
242
243 if (errno) /* We set st->_errno before we set errno. If errno is set
244 somewhere else we handle that here */
245 return((size_t)-1);
246
247 } while (*inbytesleft > 0 && *outbytesleft > 0);
248
249
250 /* We now have to handle the case where we have successfully processed the
251 previous input character which exhausted the output buffer. This is handled
252 by the while loop. However, since there are more input characters that
253 haven't been processed yet, we need to set the errno appropriately and
254 return -1. */
255 if ( *inbytesleft > 0 && *outbytesleft == 0) {
256 errno = E2BIG;
257 return((size_t)-1);
258 }
259 return (*inbytesleft + st->nonidcount);
260 }
261
262
263 static int
process_esc_seq(char c,_iconv_st * st)264 process_esc_seq( char c, _iconv_st *st )
265 {
266
267 switch(st->ESCstate){
268 case E0:
269 switch (c){
270 case SS2LOW:
271 if ( st->SS2charset == 0 ){
272 /* We do not expect SS2 shift function before
273 SS2 designation is set */
274 st->savbuf[0] = ESC;
275 st->numsav = 1;
276 return(INVALID);
277 }
278 st->SSfunc = SS2;
279 /* Since valid ESC sequence remove the ESC from the
280 nonidcount */
281 st->nonidcount -= 1;
282 return(DONE);
283 case SS3LOW:
284 if ( st->SS3charset == 0 ){
285 /* We do not expect SS3 shift function before
286 SS3 designation is set */
287 st->savbuf[0] = ESC;
288 st->numsav = 1;
289 return(INVALID);
290 }
291 st->SSfunc = SS3;
292 /* Since valid ESC sequence remove the ESC from the
293 nonidcount */
294 st->nonidcount -= 1;
295 return(DONE);
296 case '$':
297 st->nonidcount += 1; /* ESC sequence not complete yet */
298 st->ESCstate = E1;
299 return(NEEDMORE);
300 default:
301 st->savbuf[0] = ESC;
302 st->numsav = 1;
303 return(INVALID);
304 } /* end switch */
305
306
307 case E1:
308 switch (c){
309 case ')':
310 st->nonidcount += 1; /* ESC sequence not complete yet */
311 st->ESCstate = E2;
312 return(NEEDMORE);
313 case '*':
314 st->nonidcount += 1; /* ESC sequence not complete yet */
315 st->ESCstate = E3;
316 return(NEEDMORE);
317 case '+':
318 st->nonidcount += 1; /* ESC sequence not complete yet */
319 st->ESCstate = E4;
320 return(NEEDMORE);
321 default:
322 st->savbuf[0] = ESC;
323 st->savbuf[1] = '$';
324 st->numsav = 2;
325 return(INVALID);
326 }
327
328 case E2:
329 st->SOcharset = c;
330 /* Since valid ESC sequence remove decriment nonidcount
331 appropriately for all earlier characters in escape sequence */
332 st->nonidcount -= 3;
333 return(DONE);
334
335 case E3:
336 st->SS2charset = c;
337 /* Since valid ESC sequence remove decriment nonidcount
338 appropriately for all earlier characters in escape sequence */
339 st->nonidcount -= 3;
340 return(DONE);
341
342 case E4:
343 st->SS3charset = c;
344 /* Since valid ESC sequence remove decriment nonidcount
345 appropriately for all earlier characters in escape sequence */
346 st->nonidcount -= 3;
347 return(DONE);
348
349 default:
350 fprintf(stderr,
351 "process_esc_seq():ISO-CN-EXT->CNS:Should never have come here\n");
352 st->_errno = errno = EILSEQ;
353 return(DONE);
354
355 } /* end switch */
356 }
357
358
359 static int
ascii_to_euc(char c,_iconv_st * st,unsigned char ** outbuf,size_t * outbytesleft)360 ascii_to_euc( char c, _iconv_st *st, unsigned char **outbuf, size_t *outbytesleft )
361 {
362
363 int i;
364
365 if ( *outbytesleft < (1 + st->numsav) ) {
366 st->_errno = errno = E2BIG;
367 return (-1);
368 }
369
370 for ( i=0; i < st->numsav; i++ ) {
371 *(*outbuf)++ = (unsigned char) st->savbuf[i];
372 (*outbytesleft)--;
373 }
374
375 *(*outbuf)++ = (unsigned char) c;
376 (*outbytesleft)--;
377
378 return(0);
379 }
380
381
382 static int
iscns(_iconv_st * st)383 iscns( _iconv_st *st )
384 {
385 int plane_no = -1;
386
387 if ( st->SSfunc == NONE && st->SOcharset == 'G' )
388 plane_no = 1;
389 else if ( st->SSfunc == SS2 && st->SS2charset == 'H' )
390 plane_no = 2;
391 else if ( st->SSfunc == SS3 )
392 switch ( st->SS3charset ){
393 case 'I': plane_no = 3; break;
394 case 'J': plane_no = 4; break;
395 case 'K': plane_no = 5; break;
396 case 'L': plane_no = 6; break;
397 case 'M': plane_no = 7; break;
398 }
399 return (plane_no);
400 }
401