xref: /titanic_51/usr/src/lib/iconv_modules/ko/common/utf_to_uhang_main.c (revision 91e1e26ac6a73ce959289cf7d3d96c4baedbe0b8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1999 by Sun Microsystems, Inc.
23  */
24 
25 #include <errno.h>
26 #include <widec.h>
27 #include "common_def.h"
28 #include "common_han.h"
29 #include "uhang_utf_api.h"
30 #include "common_defs.h"
31 
32 /****  _ I C V _ O P E N  ****/
33 
34 void* _icv_open()
35 {
36         return((void*)MAGIC_NUMBER);
37 }  /* end of int _icv_open(). */
38 
39 
40 /****  _ I C V _ C L O S E  ****/
41 
42 void _icv_close(int* cd)
43 {
44         if (!cd || cd != (int*)MAGIC_NUMBER)
45                 errno = EBADF;
46 }  /* end of void _icv_close(int*). */
47 
48 
49 /****  _ I C V _ I C O N V  ****/
50 
51 size_t _icv_iconv(int* cd, char** inbuf, size_t* inbufleft,
52 			char** outbuf, size_t* outbufleft)
53 {
54 	size_t		ret_val = 0;
55 	unsigned char*	ib;
56 	unsigned char*	ob;
57 	unsigned char*	ibtail;
58 	unsigned char*	obtail;
59 
60 	if (!cd || cd != (int*)MAGIC_NUMBER)
61 	{
62 		errno = EBADF;
63 		return((size_t)-1);
64 	}
65 
66 	if (!inbuf || !(*inbuf))
67 		return((size_t)0);
68 
69 	ib = (unsigned char*)*inbuf;
70 	ob = (unsigned char*)*outbuf;
71 	ibtail = ib + *inbufleft;
72 	obtail = ob + *outbufleft;
73 	while (ib < ibtail)
74 	{
75 		if (!(*ib & 0x80))		/* 7 bits */
76 		{
77 			if (ob >= obtail)
78 			{
79 				errno = E2BIG;
80 				ret_val = (size_t)-1;
81 				break;
82 			}
83 			*ob++ = *ib++;
84 		 }
85 		else if((*ib & 0xE0) == 0xC0){		/* 2-byte UTF-8 */
86 			hcode_type utf8_code, euc_code;
87 			if((ibtail - ib) < 2)
88 			{
89 				errno = EINVAL;
90 				ret_val = (size_t)-1;
91 				break;
92 			}
93 
94 			if (!is_valid_utf8_string(ib, 2))
95 		        {
96 				errno = EILSEQ;
97 				ret_val = (size_t)-1;
98 				break;
99 		        }
100 
101 			utf8_code.byte.byte1 = 0;
102 			utf8_code.byte.byte2 = 0;
103 			utf8_code.byte.byte3 = *ib;
104 			utf8_code.byte.byte4 = *(ib + 1);
105 
106 			euc_code = _utf8_to_unified_hangul(utf8_code);
107 
108 			if(euc_code.code != 0){
109 				/* If find something -> EUC code */
110 				*ob++ = euc_code.byte.byte3;
111 				*ob++ = euc_code.byte.byte4;
112 			}
113 			else
114 			{
115 				/* Let's assume the code is non-identical. */
116 				if ((obtail - ob) < 2)
117 				{
118 					errno = E2BIG;
119 					ret_val = (size_t)-1;
120 					break;
121 				}
122 				*ob++ = NON_IDENTICAL;
123 				*ob++ = NON_IDENTICAL;
124 				ret_val += 2;
125 			}
126 			ib += 2;
127 		}
128 		else if ((*ib & 0xF0) == 0xE0)		/* 3-byte UTF-8 */
129 		{
130 			hcode_type utf8_code, euc_code;
131 
132 			if ((ibtail - ib) < 3)
133 			{
134 				errno = EINVAL;
135 				ret_val = (size_t)-1;
136 				break;
137 			}
138 
139 			if (!is_valid_utf8_string(ib, 3))
140 		        {
141 				errno = EILSEQ;
142 				ret_val = (size_t)-1;
143 				break;
144 		        }
145 
146 			utf8_code.byte.byte1 = 0;
147 			utf8_code.byte.byte2 = *ib;
148 			utf8_code.byte.byte3 = *(ib + 1);
149 			utf8_code.byte.byte4 = *(ib + 2);
150 
151 			euc_code = _utf8_to_unified_hangul(utf8_code);
152 
153 			if (euc_code.code != 0) {
154 				/* If find something -> EUC code */
155 				*ob++ = euc_code.byte.byte3;
156 				*ob++ = euc_code.byte.byte4;
157 			}
158 			else
159 			{
160 				/* Let's assume the code is non-identical. */
161 				if ((obtail - ob) < 2)
162 				{
163 					errno = E2BIG;
164 					ret_val = (size_t)-1;
165 					break;
166 				}
167 				*ob++ = NON_IDENTICAL;
168 				*ob++ = NON_IDENTICAL;
169 				ret_val += 2;
170 			}
171 			ib += 3;
172 		}
173 		else  /* 11, 21, 26 & 31 bits codes won't be able to convert. */
174 		{
175 			short int offset;
176 
177 			if ((*ib & 0xE0) == 0xC0)  /* 11 */
178 				offset = 2;
179 			else if ((*ib & 0xF0) == 0xE0)  /* 16 */
180 				offset = 3;
181 			else if ((*ib & 0xF8) == 0xF0)  /* 21 */
182 				offset = 4;
183 			else if ((*ib & 0xFC) == 0xF8)  /* 26 */
184 				offset = 5;
185 			else if ((*ib & 0xFE) == 0xFC)  /* 31 */
186 				offset = 6;
187 			else  /* Illegal sequence. */
188 				offset = 1;
189 
190 			if ((ibtail - ib) < offset)
191 			{
192 				errno = EINVAL;
193 				ret_val = (size_t)-1;
194 				break;
195 			}
196 
197 			if (!is_valid_utf8_string(ib, offset))
198 		        {
199 				errno = EILSEQ;
200 				ret_val = (size_t)-1;
201 				break;
202 		        }
203 
204 			ib += offset;
205 
206 			/* Let's assume the code is non-identical. */
207 			offset = (offset > 2) ? 2 : 1;
208 			if ((obtail - ob) < offset)
209 			{
210 				errno = E2BIG;
211 				ret_val = (size_t)-1;
212 				break;
213 			}
214 			*ob++ = NON_IDENTICAL;
215 			if (offset > 1)
216 				*ob++ = NON_IDENTICAL;
217 			ret_val += offset;
218 		}
219 	}
220 
221 	*inbuf = (char*)ib;
222 	*inbufleft = ibtail - ib;
223 	*outbuf = (char*)ob;
224 	*outbufleft = obtail - ob;
225 
226 	return(ret_val);
227 }  /* end of size_t _icv_iconv(int*, char**, size_t*, char**, size_t*).*/
228