xref: /illumos-gate/usr/src/lib/iconv_modules/zh/common/zh_TW-iso2022-CN-EXT.c (revision 142d813a06c6f9a6142e2c276b62129a17a31a65)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22 #include <stdlib.h>
23  * Copyright (c) 1997, by Sun Microsystems, Inc.
24  * All rights reserved.
25  */
26 
27 
28 /*
29    Converts From:	ISO2022-CN-EXT encoding.
30    Converts To:		Taiwanese EUC encoding ( CNS11643 ) and big5 encoding
31 
32  */
33 
34 #include "iso2022-cn.h"
35 
36 /* Forward reference the functions constrained to the scope of this file */
37 static int process_esc_seq(char, _iconv_st *);
38 static int ascii_to_euc(char, _iconv_st *, unsigned char **, size_t *);
39 static int iscns( _iconv_st * );
40 
41 
42 extern int errno;
43 
44 /*
45  * _icv_open: Called from iconv_open(). Allocates and initializes _iconv_st
46  *            structure. Returns pointer to the structure as (void *).
47  */
48 
49 
50 void *
51 _icv_open()
52 {
53 	_iconv_st  *st;
54 
55 	/* Allocate */
56 	if (( st = (_iconv_st *) malloc( sizeof( _iconv_st ))) == NULL ){
57 	    errno = ENOMEM;
58 	    return ((void *) -1);
59 	}
60 
61 	/* Initialize */
62 	st->Sfunc = SI;
63 	st->SSfunc = NONE;
64 	st->ESCstate = OFF;
65 	st->firstbyte = True;
66 	st->numsav = 0;
67 	st->SOcharset = 0;		/* no default charset */
68 	st->SS2charset = 0;		/* no default charset */
69 	st->SS3charset = 0;		/* no default charset */
70 	st->nonidcount = 0;
71 	st->_errno = 0;
72 
73 	/* Return struct */
74 	return ((void *) st);
75 }
76 
77 
78 
79 /*
80  * _icv_close: Called from iconv_close(). Frees the _iconv_st structure as
81  *	       pointed by the argument.
82  */
83 
84 void
85 _icv_close(_iconv_st *st)
86 {
87 	if (st == NULL )
88 	    errno = EBADF;
89 	else
90 	    free(st);
91 }
92 
93 
94 /*
95  * _icv_iconv: Called from iconv(). Does the convertion from ISO2022-CN-EXT
96  *			   to CNS11643
97  */
98 /*=======================================================
99  *
100  *   State machine for interpreting ISO2022-CN-EXT code
101  *
102  *=======================================================
103  *
104  *
105  *=======================================================*/
106 
107 size_t
108 iso2022_icv_iconv(_iconv_st *st, char **inbuf, size_t *inbytesleft,
109 			unsigned char **outbuf, size_t *outbytesleft, int (*convert)() )
110 {
111 
112 	int ret, n;
113 
114 	if (st == NULL) {
115 	    errno = EBADF;
116 	    return ((size_t) -1);
117 	}
118 
119 	if ( inbuf == NULL || *inbuf == NULL || inbytesleft == NULL ||
120 			*inbytesleft <= 0 ) { /* Reset request */
121 	    st->Sfunc = SI;
122 	    st->SSfunc = NONE;
123 	    st->ESCstate = OFF;
124 	    st->firstbyte = True;
125 	    st->numsav = 0;
126 	    st->SOcharset = 0;
127 	    st->SS2charset = 0;
128 	    st->SS3charset = 0;
129 	    st->nonidcount = 0;
130 	    st->_errno = 0;
131 	    return ((size_t) 0);
132 	}
133 
134 	st->_errno = 0;
135 	errno = 0;
136 
137 	/* Before we use *inbytesleft or *outbytesleft we should confirm that
138 	inbytesleft and outbytesleft are non-NULL. I am considering inbytesleft
139 	or *inbytesleft having 0 or negative value as a reset request. I am
140 	considering outbytesleft having 0 value as no space in output buffer.
141 	Also, here itself I am verifying that outbuf and *outbuf should be non-NULL
142 	pointers so I do not have to worry about them being NULL below in the
143 	conversion sub-routines. I also confirm here that *outbytesleft should be
144 	greater than 0 before we can continue further */
145 
146 	if ( outbytesleft == NULL || *outbytesleft <= 0 ||
147 			outbuf == NULL || *outbuf == NULL ) {
148 	    errno = E2BIG;
149 	    return((size_t)-1);
150 	}
151 
152 	/* A state machine to interpret ISO, driven by the shift functions SI, SO */
153 
154 	do {
155 	    if (st->firstbyte == False) { /* Is SO, SS2, SS3 second byte */
156 		st->keepc[1] = **inbuf;
157 		n = (*convert)( st, outbuf, outbytesleft, iscns(st) );
158 		if ( n < 0 )
159 		    return((size_t)-1); /* Insufficient space in output buffer */
160 		else if ( n > 0 ){ /* No CNS for this Chinese code */
161 		    n = ascii_to_euc(NON_ID_CHAR, st, outbuf, outbytesleft);
162 		    if ( n < 0 )
163 			return((size_t)-1);
164 		    st->nonidcount += 1;
165 		} else
166 		    st->nonidcount -= 1; /* The first byte identified as
167 						valid Chinese byte and is
168 						processed */
169 		st->firstbyte = True;
170 		st->SSfunc = NONE;	/* If we just processed SS bytes,
171 					   this will reset SSfunc to NONE. If
172 					   we just processed SO bytes, this was
173 					   already NONE */
174 	    } else if ( st->SSfunc != NONE ) { /* We are currently expecting
175 						 SS2 or SS3 Chinese bytes */
176 		    st->keepc[0] = **inbuf;
177 		    st->nonidcount += 1;
178 		    st->firstbyte = False;
179 	    } else if ( **inbuf == ESC && st->ESCstate == OFF ) {
180 		    st->nonidcount += 1; /* For the ESC character */
181 		    st->ESCstate = E0;
182 	    } else if ( st->ESCstate != OFF ) { /* Continue processing the
183 						  escape sequence */
184 		ret = process_esc_seq( **inbuf, st );
185 		if ( ret == DONE ) {	/* ESC seq interpreted correctly.
186 					     Switch off the escape machine */
187 		    st->ESCstate = OFF;
188 		} else if ( ret == INVALID ){
189 		    if (st->Sfunc == SI){	/* An invalid ESC sequence
190 						 encountered.  Process
191 						 the text saved in
192 						 st->savbuf as ASCII. Switch
193 						 off the escape machine */
194 			n = ascii_to_euc( **inbuf, st, outbuf, outbytesleft );
195 			if ( n < 0 ) /* Insufficient space in output buffer */
196 				return((size_t)-1);
197 			st->nonidcount -= st->numsav; /* Since invalid Esc
198 						       sequence is outputted
199 						       as ASCII */
200 		    } else if (st->Sfunc == SO) { /* An invalid ESC sequence
201 						     encountered. Don't know
202 						     what to do. So flag
203 						     error illegal seq. It is
204 						     wise not to continue
205 						     processing input. Switch
206 						     off the escape machine */
207 			st->_errno = errno = EILSEQ;
208 			st->nonidcount += 1; /* For this character */
209 		    }
210 		    st->numsav = 0;	 /* Discard the saved characters of
211 					    invalid sequence */
212 		    st->ESCstate = OFF;
213 		} /* more char. needed for escape sequence */
214 	    } else if (st->Sfunc  == SI) {
215 		/* Switch state to SO only if SOdesignation is set. */
216 		if ( **inbuf == SO && st->SOcharset != 0 ){
217 		    st->Sfunc = SO;
218 		} else { /* Is ASCII */
219 		    n = ascii_to_euc(**inbuf, st, outbuf, outbytesleft );
220 		    if ( n < 0 ) /* Insufficient space in output buffer */
221 			return((size_t)-1);
222 		}
223 	    } else if (st->Sfunc  == SO) {
224 		if ( **inbuf == SI ){ /* Switch state to SO */
225 		    st->Sfunc = SI;
226 		}
227 		else {
228 		    st->keepc[0] = **inbuf;
229 		    st->nonidcount += 1;
230 		    st->firstbyte = False;
231 		}
232 	    }
233 	    else
234 		fprintf(stderr,
235 		    "_icv_iconv():ISO-CN-EXT->CNS:Should never have come here\n");
236 
237 	    (*inbuf)++;
238 	    (*inbytesleft)--;
239 
240 	    if ( st->_errno)
241 		break; /* Break out of while loop */
242 
243 	    if (errno) /* We set st->_errno before we set errno. If errno is set
244 				      somewhere else we handle that here */
245 		return((size_t)-1);
246 
247 	} while (*inbytesleft > 0 && *outbytesleft > 0);
248 
249 
250 /* We now have to handle the case where we have successfully processed the
251    previous input character which exhausted the output buffer. This is handled
252    by the while loop. However, since there are more input characters that
253    haven't been processed yet, we need to set the errno appropriately and
254    return -1. */
255 	if ( *inbytesleft > 0 && *outbytesleft == 0) {
256 	    errno = E2BIG;
257 	    return((size_t)-1);
258 	}
259 	return (*inbytesleft + st->nonidcount);
260 }
261 
262 
263 static int
264 process_esc_seq( char c, _iconv_st *st )
265 {
266 
267 	switch(st->ESCstate){
268 	case E0:
269 	    switch (c){
270 	    case SS2LOW:
271 		if ( st->SS2charset == 0 ){
272 		    /* We do not expect SS2 shift function before
273 		       SS2 designation is set */
274 		    st->savbuf[0] = ESC;
275 		    st->numsav = 1;
276 		    return(INVALID);
277 		}
278 		st->SSfunc = SS2;
279 		/* Since valid ESC sequence remove the ESC from the
280 		   nonidcount */
281 		st->nonidcount -= 1;
282 		return(DONE);
283 	    case SS3LOW:
284 		if ( st->SS3charset == 0 ){
285 		    /* We do not expect SS3 shift function before
286 		       SS3 designation is set */
287 		    st->savbuf[0] = ESC;
288 		    st->numsav = 1;
289 		    return(INVALID);
290 		}
291 		st->SSfunc = SS3;
292 		/* Since valid ESC sequence remove the ESC from the
293 		   nonidcount */
294 		st->nonidcount -= 1;
295 		return(DONE);
296 	    case '$':
297 		st->nonidcount += 1; /* ESC sequence not complete yet */
298 		st->ESCstate = E1;
299 		return(NEEDMORE);
300 	    default:
301 		st->savbuf[0] = ESC;
302 		st->numsav = 1;
303 		return(INVALID);
304 	    } /* end switch */
305 
306 
307 	case E1:
308 	    switch (c){
309 	    case ')':
310 		st->nonidcount += 1; /* ESC sequence not complete yet */
311 		st->ESCstate = E2;
312 		return(NEEDMORE);
313 	    case '*':
314 		st->nonidcount += 1; /* ESC sequence not complete yet */
315 		st->ESCstate = E3;
316 		return(NEEDMORE);
317 	    case '+':
318 		st->nonidcount += 1; /* ESC sequence not complete yet */
319 		st->ESCstate = E4;
320 		return(NEEDMORE);
321 	    default:
322 		st->savbuf[0] = ESC;
323 		st->savbuf[1] = '$';
324 		st->numsav = 2;
325 		return(INVALID);
326 	    }
327 
328 	case E2:
329 	    st->SOcharset = c;
330 	    /* Since valid ESC sequence remove decriment nonidcount
331 	       appropriately for all earlier characters in escape sequence */
332 	    st->nonidcount -= 3;
333 	    return(DONE);
334 
335 	case E3:
336 	    st->SS2charset = c;
337 	    /* Since valid ESC sequence remove decriment nonidcount
338 	       appropriately for all earlier characters in escape sequence */
339 	    st->nonidcount -= 3;
340 	    return(DONE);
341 
342 	case E4:
343 	    st->SS3charset = c;
344 	    /* Since valid ESC sequence remove decriment nonidcount
345 	       appropriately for all earlier characters in escape sequence */
346 	    st->nonidcount -= 3;
347 	    return(DONE);
348 
349 	default:
350 	    fprintf(stderr,
351 		    "process_esc_seq():ISO-CN-EXT->CNS:Should never have come here\n");
352 	    st->_errno = errno = EILSEQ;
353 	    return(DONE);
354 
355 	} /* end switch */
356 }
357 
358 
359 static int
360 ascii_to_euc( char c, _iconv_st *st, unsigned char **outbuf, size_t *outbytesleft )
361 {
362 
363 	int i;
364 
365 	if ( *outbytesleft < (1 + st->numsav) ) {
366 	    st->_errno = errno = E2BIG;
367 	    return (-1);
368 	}
369 
370 	for ( i=0; i < st->numsav; i++ ) {
371 	    *(*outbuf)++ = (unsigned char) st->savbuf[i];
372 	    (*outbytesleft)--;
373 	}
374 
375 	*(*outbuf)++ = (unsigned char) c;
376 	(*outbytesleft)--;
377 
378 	return(0);
379 }
380 
381 
382 static int
383 iscns( _iconv_st *st )
384 {
385 	int plane_no = -1;
386 
387 	if ( st->SSfunc == NONE && st->SOcharset == 'G' )
388 	    plane_no = 1;
389 	else if ( st->SSfunc == SS2 && st->SS2charset == 'H' )
390 	    plane_no = 2;
391 	else if ( st->SSfunc == SS3 )
392 	    switch ( st->SS3charset ){
393 	    case 'I': plane_no = 3; break;
394 	    case 'J': plane_no = 4; break;
395 	    case 'K': plane_no = 5; break;
396 	    case 'L': plane_no = 6; break;
397 	    case 'M': plane_no = 7; break;
398 	    }
399 	return (plane_no);
400 }
401