xref: /titanic_52/usr/src/lib/iconv_modules/zh/common/HZ-GB-2312%UTF-8.c (revision 91e1e26ac6a73ce959289cf7d3d96c4baedbe0b8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1994 by Sun Microsystems, Inc.
23  */
24 
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <errno.h>
28 #include <gb2312_unicode.h>
29 
30 #define MSB     0x80
31 
32 #define UTF8_NON_ID_CHAR1 0xEF
33 #define UTF8_NON_ID_CHAR2 0xBF
34 #define UTF8_NON_ID_CHAR3 0xBD
35 
36 
37 enum	_GSTATE { G0, G1, G2, G3, G4, G5};
38 
39 
40 typedef struct _icv_state {
41 	char	_lastc;
42 	short	_gstate;
43 } _iconv_st;
44 
45 int
46 hz2utf8(char	in_byte1, char in_byte2, char	*buf, int	buflen);
47 
48 /*
49  * Open; called from iconv_open()
50  */
51 void *
52 _icv_open()
53 {
54 	_iconv_st *st;
55 
56 	if ((st = (_iconv_st *)malloc(sizeof(_iconv_st))) == NULL) {
57 		errno = ENOMEM;
58 		return ((void *) -1);
59 	}
60 
61 	st->_gstate = G0;
62 	return ((void *)st);
63 }
64 
65 
66 /*
67  * Close; called from iconv_close()
68  */
69 void
70 _icv_close(_iconv_st *st)
71 {
72 	if (st == NULL)
73 		errno = EBADF;
74 	else
75 		free(st);
76 }
77 
78 
79 /*
80  * Actual conversion; called from iconv()
81  */
82 /*=======================================================================
83  *
84  *         ~          {     Chinese
85  * +-> G0 -----> G1 ----> G2 ----> G3
86  * |   | ascii   | ascii  |~}      |
87  * +----------------------+--------+
88  *=======================================================================*/
89 size_t
90 _icv_iconv(_iconv_st *st, char **inbuf, size_t*inbytesleft,
91 			char **outbuf, size_t*outbytesleft)
92 {
93 	int	n;
94 
95 	if (st == NULL) {
96 		errno = EBADF;
97 		return -1;
98 	}
99 	if (inbuf == NULL || *inbuf == NULL) { /* Reset request. */
100 		st->_gstate = G0;
101 		return 0;
102 	}
103 
104 	errno = 0;
105 
106 	while (*inbytesleft > 0 && *outbytesleft > 0) {
107 	    switch (st->_gstate) {
108 	    case G0:
109 		if ( **inbuf == '~' ) {
110 		    st->_gstate = G1;
111 		} else if (((**inbuf) & MSB) == 0) {	/* ASCII */
112 		    **outbuf = **inbuf;
113 		    (*outbuf)++, (*outbytesleft)--;
114 		}
115 		break;
116 	    case G1:
117 		if ( **inbuf == '{' ) {
118 		    st->_gstate = G2;
119 		} else if (**inbuf == '\n') {
120 		    st->_gstate = G0;
121 		} else if (**inbuf == '~') {
122 		    **outbuf = '~';
123 		    (*outbuf)++, (*outbytesleft)--;
124 		    st->_gstate = G0;
125 		} else {
126 		    errno = EINVAL;
127 		}
128 		break;
129 	    case G2:
130 		if ( **inbuf == '~' ) {
131 		    st->_gstate = G4;
132 		} else {
133 		    st->_lastc = **inbuf;
134 		    st->_gstate = G3;
135 		}
136 		break;
137 	    case G3:
138 		n = hz2utf8(st->_lastc, **inbuf, *outbuf, *outbytesleft);
139 		if (n > 0) {
140 		    (*outbuf) += n, (*outbytesleft) -= n;
141 		} else {
142 		    errno = E2BIG;
143 		    return (size_t)-1;
144 		}
145 		st->_gstate = G2;
146 		break;
147 	    case G4:
148 		if ( **inbuf == '}' ) {
149 		    st->_gstate = G0;
150 		} else if (**inbuf == '\n') {
151 			st->_gstate = G2;
152 			continue;
153 		} else {
154 		    errno = EINVAL;
155 		}
156 
157 		break;
158 	    }
159 
160 	    (*inbuf)++, (*inbytesleft)--;
161 	    if (errno)
162 		{
163 		return (size_t)-1;
164 		}
165 	}
166 
167 	if (*inbytesleft > 0 && *outbytesleft == 0) {
168 		errno = E2BIG;
169 		return (size_t)-1;
170 	}
171 	return ((size_t)(*inbytesleft));
172 }
173 
174 
175 int
176 hz2utf8(in_byte1, in_byte2, buf, buflen)
177 char	in_byte1, in_byte2;
178 char	*buf;
179 int	buflen;
180 {
181 
182         int     idx;
183         int     unicode;
184 
185 	if ( buflen < 2 )
186 	    return 0;
187 	in_byte1 |=  MSB;
188 	in_byte2 |=  MSB;
189 
190 
191         idx = (((in_byte1 & 0xff) - 0xa1) * 94)  + (in_byte2 & 0xff) - 0xa1;
192         if (idx >= 0) {
193                 unicode = Unicode[idx];
194                 if (unicode >= 0x0080 && unicode <= 0x07ff) {
195                     if ( buflen < 2 )
196                         return 0;
197                     *buf = ((unicode >> 6) & 0x1f) | 0xc0;
198                     *(buf+1) = (unicode & 0x3f) | MSB;
199                     return 2;
200                 }
201                 if (unicode >= 0x0800 && unicode <= 0xffff) {
202                     if ( buflen < 3 )
203                         return 0;
204                     *buf = ((unicode >> 12) & 0x0f) | 0xe0;
205                     *(buf+1) = ((unicode >> 6) & 0x3f) | MSB;
206                     *(buf+2) = (unicode & 0x3f) | MSB;
207                     return 3;
208                 }
209         }
210         if ( buflen < 3 )
211             return 0;
212 
213         *buf     = UTF8_NON_ID_CHAR1;
214         *(buf+1) = UTF8_NON_ID_CHAR2;
215         *(buf+2) = UTF8_NON_ID_CHAR3;
216         return 3;
217 }
218