xref: /illumos-gate/usr/src/lib/iconv_modules/ko/common/utf_to_iso_main.c (revision 16d8656330ae5622ec32e5007f62145ebafdc50f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1996 by Sun Microsystems, Inc.
23  */
24 
25 
26 #include <stdlib.h>
27 #include <errno.h>
28 #include <widec.h>
29 #include "common_def.h"
30 #include "common_han.h"
31 #include "utf_euc_api.h"
32 #include "common_defs.h"
33 
34 #define ESC		0x1B
35 #define SO		0x0E
36 #define SI		0x0F
37 
38 typedef struct __conv_desc {
39 	enum { NOT_DEFINED_YET, KSC5601 }	designator;
40 	enum { ASCII, HANGUL }	 		state;
41 } _conv_desc;
42 
43 /****  _ I C V _ O P E N  ****/
44 
_icv_open()45 void* _icv_open()
46 {
47 	_conv_desc* cd = (_conv_desc*)malloc(sizeof(_conv_desc));
48 
49 	if (cd == (_conv_desc*)NULL)
50 	{
51 		errno = ENOMEM;
52 		return((void*)-1);
53 	}
54 
55 	cd->designator = NOT_DEFINED_YET;
56 	cd->state = ASCII;
57 
58 	return((void*)cd);
59 }  /* end of int _icv_open(). */
60 
61 
62 /****  _ I C V _ C L O S E  ****/
63 
_icv_close(_conv_desc * cd)64 void _icv_close(_conv_desc* cd)
65 {
66 	if (!cd)
67 		errno = EBADF;
68 	else
69 		free((void*)cd);
70 }  /* end of void _icv_close(_conv_desc*). */
71 
72 
73 /****  _ I C V _ I C O N V  ****/
74 
_icv_iconv(_conv_desc * cd,char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft)75 size_t _icv_iconv(_conv_desc* cd, char** inbuf, size_t* inbufleft,
76 			char** outbuf, size_t* outbufleft)
77 {
78 	size_t		ret_val = 0;
79 	unsigned char*	ib;
80 	unsigned char*	ob;
81 	unsigned char*	ibtail;
82 	unsigned char*	obtail;
83 
84 	if (!cd)
85 	{
86 		errno = EBADF;
87 		return((size_t)-1);
88 	}
89 
90 	if (!inbuf || !(*inbuf))
91 	{
92 		if (cd->state == HANGUL)
93 		{
94 			if (outbufleft && *outbufleft >= 1 && outbuf && *outbuf)
95 			{
96 				**outbuf = SI;
97 				(*outbuf)++;
98 				(*outbufleft)--;
99 			}
100 			else
101 			{
102 				errno = E2BIG;
103 				return((size_t)-1);
104 			}
105 		}
106 
107 		cd->designator = NOT_DEFINED_YET;
108 		cd->state = ASCII;
109 		return((size_t)0);
110 	}
111 
112 	ib = (unsigned char*)*inbuf;
113 	ob = (unsigned char*)*outbuf;
114 	ibtail = ib + *inbufleft;
115 	obtail = ob + *outbufleft;
116 
117 	while (ib < ibtail)
118 	{
119 		if (!(*ib & 0x80))		/* 7 bits */
120 		{
121 			if (ob >= obtail)
122 			{
123 				errno = E2BIG;
124 				ret_val = (size_t)-1;
125 				break;
126 			}
127 
128 			/* Hangul string ends */
129 			if (cd->state == HANGUL) {
130 				*ob++ = SI;
131 				cd->state = ASCII;
132 			}
133 			*ob++ = *ib++;
134 		}
135 		else if ((*ib & 0xF0) == 0xE0)	/* 16 bits */
136 		{
137 			hcode_type utf8_code, euc_code;
138 
139 			if ((ibtail - ib) < 3)
140 			{
141 				errno = EINVAL;
142 				ret_val = (size_t)-1;
143 				break;
144 			}
145 
146 			if (!is_valid_utf8_string(ib, 3))
147 		        {
148 				errno = EILSEQ;
149 				ret_val = (size_t)-1;
150 				break;
151 		        }
152 
153 			utf8_code.byte.byte1 = 0;
154 			utf8_code.byte.byte2 = *ib;
155 			utf8_code.byte.byte3 = *(ib + 1);
156 			utf8_code.byte.byte4 = *(ib + 2);
157 
158 			euc_code = _utf8_to_wansung(utf8_code);
159 
160 			if (euc_code.code != 0) {
161 				/* If find something -> EUC code */
162 
163 				/* if the first time of Hangul -> put Mark */
164 				if (cd->designator ==  NOT_DEFINED_YET) {
165 					*ob++ = ESC;
166 					*ob++ = '$';
167 					*ob++ = ')';
168 					*ob++ = 'C';
169 					cd->designator = KSC5601;
170 				}
171 
172 				/* if the first Hangul in a string */
173 				if (cd->state == ASCII) {
174 					*ob++ = SO;
175 					cd->state = HANGUL;
176 				}
177 
178 				/* MSB clear for 7-bits encoding */
179 				euc_code.wansung.msb1 = 0;
180 				euc_code.wansung.msb2 = 0;
181 
182 				*ob++ = euc_code.byte.byte3;
183 				*ob++ = euc_code.byte.byte4;
184 			}
185 			else
186 			{
187 				/* Let's assume the code is non-identical. */
188 				if ((obtail - ob) < 2)
189 				{
190 					errno = E2BIG;
191 					ret_val = (size_t)-1;
192 					break;
193 				}
194 
195 				/* non identical character == ASCII */
196 				if (cd->state == HANGUL) {
197 					*ob++ = SI;
198 					cd->state = ASCII;
199 				}
200 				*ob++ = NON_IDENTICAL;
201 				*ob++ = NON_IDENTICAL;
202 				ret_val += 2;
203 			}
204 			ib += 3;
205 		}
206 		else  /* 11, 21, 26 & 31 bits codes won't be able to convert. */
207 		{
208 			short int offset;
209 
210 			if ((*ib & 0xE0) == 0xC0)  /* 11 */
211 				offset = 2;
212 			else if ((*ib & 0xF0) == 0xE0)  /* 16 */
213 				offset = 3;
214 			else if ((*ib & 0xF8) == 0xF0)  /* 21 */
215 				offset = 4;
216 			else if ((*ib & 0xFC) == 0xF8)  /* 26 */
217 				offset = 5;
218 			else if ((*ib & 0xFE) == 0xFC)  /* 31 */
219 				offset = 6;
220 			else  /* Illegal sequence. */
221 				offset = 1;
222 
223 			if ((ibtail - ib) < offset)
224 			{
225 				errno = EINVAL;
226 				ret_val = (size_t)-1;
227 				break;
228 			}
229 
230 			if (!is_valid_utf8_string(ib, offset))
231 		        {
232 				errno = EILSEQ;
233 				ret_val = (size_t)-1;
234 				break;
235 		        }
236 
237 			ib += offset;
238 
239 			/* Let's assume the code is non-identical. */
240 			offset = (offset > 2) ? 2 : 1;
241 			if ((obtail - ob) < offset)
242 			{
243 				errno = E2BIG;
244 				ret_val = (size_t)-1;
245 				break;
246 			}
247 
248 			/* non identical character == ASCII */
249 			if (cd->state == HANGUL) {
250 				*ob++ = SI;
251 				cd->state = ASCII;
252 			}
253 
254 			*ob++ = NON_IDENTICAL;
255 			if (offset > 1)
256 				*ob++ = NON_IDENTICAL;
257 			ret_val += offset;
258 		}
259 	}
260 
261 	*inbuf = (char*)ib;
262 	*inbufleft = ibtail - ib;
263 	*outbuf = (char*)ob;
264 	*outbufleft = obtail - ob;
265 
266 	return(ret_val);
267 }  /* end of size_t _icv_iconv(int*, char**, size_t*, char**, size_t*).*/
268