xref: /titanic_50/usr/src/lib/iconv_modules/zh/common/zh_CN.gbk%HZ-GB-2312.c (revision 880d797826457b77414b37d531cc3e1aa166ecbe)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1994 by Sun Microsystems, Inc.
23  */
24 
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <errno.h>
28 
29 #define	MSB	0x80
30 
31 #define NON_ID_CHAR_BYTE1 0x21  /* non-identified character */
32 #define NON_ID_CHAR_BYTE2 0x75  /* non-identified character */
33 
34 #define gbk_2nd_byte(v)   ( (v) >= 0x40 && (v) <= 0xfe && (v) != 0x7f )
35 #define gbk4_2nd_byte(v)  ( (v) >= 0x30 && (v) <= 0x39 )
36 #define gbk4_3rd_byte(v)   ( (v) >= 0x81 && (v) <= 0xfe )
37 #define gbk4_4th_byte(v)  gbk4_2nd_byte(v)
38 
39 enum	_GSTATE { G0, G1, G2, G3, G4};
40 
41 
42 typedef struct _icv_state {
43 	char	_lastc;
44 	short	_gstate;
45 } _iconv_st;
46 
47 /*
48  * Open; called from iconv_open()
49  */
50 void *
_icv_open()51 _icv_open()
52 {
53 	_iconv_st *st;
54 
55 	if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
56 		errno = ENOMEM;
57 		return ((void *) -1);
58 	}
59 
60 	st->_gstate = G0;
61 	return ((void *)st);
62 }
63 
64 
65 /*
66  * Close; called from iconv_close()
67  */
68 void
_icv_close(_iconv_st * st)69 _icv_close(_iconv_st *st)
70 {
71 	if (st == NULL)
72 		errno = EBADF;
73 	else
74 		free(st);
75 }
76 
77 
78 /*
79  * Actual conversion; called from iconv()
80  */
81 /*=======================================================================
82  * 				30-39
83  *			 |--------------------------|
84  *      +----------------|--------------------+     |
85  *      V   MSB          |  MSB       ascii  |      | 81-fe
86  *  +-> G0 ------------> G1 ------> G2 -------+    G3------G4
87  *  | ascii  (~{)    ^   MSB        | |(~})   	     30-39 |
88  *  +----+               +----------+  ---------------------
89  *=======================================================================*/
90 size_t
_icv_iconv(_iconv_st * st,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)91 _icv_iconv(_iconv_st *st, char **inbuf, size_t*inbytesleft,
92 			char **outbuf, size_t*outbytesleft)
93 {
94 	if (st == NULL) {
95 		errno = EBADF;
96 		return -1;
97 	}
98 	if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
99 		st->_gstate = G0;
100 		return 0;
101 	}
102 
103 	errno = 0;
104 
105 	while (*inbytesleft > 0 && *outbytesleft > 0) {
106 	    switch (st->_gstate) {
107 	    case G0:
108 		if ( **inbuf & MSB ) {
109 		   if(*outbytesleft >=2) {
110 		    **outbuf = '~';
111                     *(*outbuf+1) = '{';
112 		    (*outbuf) += 2, (*outbytesleft) -= 2;
113 		    st->_lastc = **inbuf;
114 		    st->_gstate = G1;
115 		    } else {
116 			errno = E2BIG;
117 			return (size_t)-1;
118                     }
119 
120 		} else {
121 		    **outbuf = **inbuf;
122 		    (*outbuf)++, (*outbytesleft)--;
123 		    if (**inbuf == '~') {
124 		    **outbuf = '~';
125 		    (*outbuf)++, (*outbytesleft)--;
126 		    }
127 		}
128 		break;
129 	    case G1:
130 		if ( gbk4_2nd_byte((unsigned char) **inbuf )) {
131 			st->_lastc = **inbuf;
132 			st->_gstate = G3;
133 		} else if ( **inbuf  & MSB ) {
134 		   if(*outbytesleft >=2) {
135 			**outbuf = st->_lastc - 0x80;
136 			*(*outbuf+1) = **inbuf - 0x80;
137 			(*outbuf) += 2, (*outbytesleft) -= 2;
138 			st->_gstate = G2;
139 		    } else {
140                         errno = E2BIG;
141                         return (size_t)-1;
142                     }
143                 } else if ( gbk_2nd_byte((unsigned char ) **inbuf )) {
144                         if ( *outbytesleft >= 2 ) {
145                                 **outbuf = NON_ID_CHAR_BYTE1;
146                                 *(*outbuf +1) = NON_ID_CHAR_BYTE2;
147                                 (*outbuf) += 2, (*outbytesleft) -= 2;
148                                 st->_gstate = G2;
149                         } else {
150                                 errno = E2BIG;
151                                 return (size_t)-1;
152                         }
153 		} else {
154 		    errno = EILSEQ;
155 		    return (size_t)-1;
156 		}
157 		break;
158 	    case G2:
159 		if ( **inbuf & MSB ) {
160 		    st->_lastc = **inbuf;
161 		    st->_gstate = G1;
162 		} else {
163 		   if(*outbytesleft >=3) {
164 		    **outbuf = '~';
165                     *(*outbuf+1) = '}';
166                     *(*outbuf+2) = **inbuf;
167 		    (*outbuf) += 3, (*outbytesleft) -= 3;
168 		    st->_gstate = G0;
169 		    }else {
170                         errno = E2BIG;
171                         return (size_t)-1;
172                     }
173 
174 		}
175 		break;
176 	    case G3:
177 		if ( gbk4_3rd_byte( (unsigned char)**inbuf )) {
178 		    st->_lastc = **inbuf;
179 		    st->_gstate = G4;
180 		} else {
181 			errno = EILSEQ;
182 			return (size_t)-1;
183 		}
184 		break;
185 	    case G4:
186 		if ( gbk4_4th_byte( (unsigned char) **inbuf )) {
187 			if ( *outbytesleft >= 2 ) {
188 				**outbuf = NON_ID_CHAR_BYTE1;
189 				*(*outbuf +1) = NON_ID_CHAR_BYTE2;
190 				(*outbuf) += 2, (*outbytesleft) -= 2;
191 				st->_gstate = G2;
192 			} else {
193 				errno = E2BIG;
194 				return (size_t)-1;
195 			}
196 		} else {
197 			errno = EILSEQ;
198 			return (size_t)-1;
199 		}
200 		break;
201 	    default:
202 		errno = EILSEQ;
203 		return (size_t)-1;
204 	    }
205 
206 	    (*inbuf)++, (*inbytesleft)--;
207 	    if (errno)
208 		return -1;
209 	}
210 
211 	if (st->_gstate != G0 && st->_gstate != G2 && *inbytesleft == 0) {
212 		errno = EINVAL;
213 		return (size_t)-1;
214 	}
215 
216 	if (*inbytesleft > 0 && *outbytesleft == 0) {
217 		errno = E2BIG;
218 		return -1;
219 	}
220 	return (*inbytesleft);
221 }
222