xref: /titanic_51/usr/src/lib/iconv_modules/ko/common/UTF2_to_nbyte.c (revision 91e1e26ac6a73ce959289cf7d3d96c4baedbe0b8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1994 by Sun Microsystems, Inc.
23  */
24 
25 
26 #include <stdlib.h>
27 #include <errno.h>
28 #include "ktable.h"
29 #include "utf_nbyte.h"
30 
31 
32 /****  _ I C V _ O P E N  ****/
33 
34 void* _icv_open()
35 {
36 	_conv_desc* cd = (_conv_desc*)malloc(sizeof(_conv_desc));
37 
38 	if (cd == (_conv_desc*)NULL)
39 	{
40 		errno = ENOMEM;
41 		return((void*)-1);
42 	}
43 
44 	RESET_CONV_DESC();
45 	cd->state = ASCII;
46 
47 	return((void*)cd);
48 }  /* end of int _icv_open(). */
49 
50 
51 /****  _ I C V _ C L O S E  ****/
52 
53 void _icv_close(_conv_desc* cd)
54 {
55 	if (!cd)
56 		errno = EBADF;
57 	else
58 		free((void*)cd);
59 }  /* end of void _icv_close(_conv_desc*). */
60 
61 
62 /****  _ I C V _ I C O N V  ****/
63 
64 size_t _icv_iconv(_conv_desc* cd, char** inbuf, size_t* inbufleft,
65 			char** outbuf, size_t* outbufleft)
66 {
67 	void		echo_vowel(char*, int*), echo_consonant(char*, int*);
68 	size_t		ret_val = 0;
69 	unsigned char*	ib;
70 	unsigned char*	ob;
71 	unsigned char*	ibtail;
72 	unsigned char*	obtail;
73 
74 	if (!cd)
75 	{
76 		errno = EBADF;
77 		return((size_t)-1);
78 	}
79 
80 	if (!inbuf || !(*inbuf))
81 	{
82 		RESET_CONV_DESC();
83 		cd->state = ASCII;
84 		return((size_t)0);
85 	}
86 
87 	ib = (unsigned char*)*inbuf;
88 	ob = (unsigned char*)*outbuf;
89 	ibtail = ib + *inbufleft;
90 	obtail = ob + *outbufleft;
91 
92 	while (ib < ibtail)
93 	{
94 		if (!(*ib & 0x80))		/* 7 bits */
95 		{
96 			PROCESS_PRIOR_CVC();
97 
98 			if ((obtail - ob) < (cd->state == WANSUNG ? 2 : 1))
99 			{
100 				errno = E2BIG;
101 				ret_val = (size_t)-1;
102 				break;
103 			}
104 			if (cd->state == WANSUNG)
105 			{
106 				*ob++ = SI;
107 				cd->state = ASCII;
108 			}
109 			*ob++ = *ib++;
110 		}
111 		else if ((*ib & 0xF0) == 0xE0)	/* 16 bits */
112 		{
113 			unsigned long	utf;
114 
115 			if ((ibtail - ib) < 3)
116 			{
117 				errno = EINVAL;
118 				ret_val = (size_t)-1;
119 				break;
120 			}
121 
122 			utf = ((unsigned long)(*ib) << 16) |
123 			      ((unsigned long)(*(ib + 1)) << 8) |
124 			      (unsigned long)(*(ib + 2));
125 			if (utf == 0xE1859F ||
126 			    (utf >= 0xE18480 && utf <= 0xE18492))  /* Ci */
127 			{
128 				PROCESS_PRIOR_CVC();
129 
130 				cd->ci = (utf == 0xE1859F) ? CVC_FILL
131 							   : utf - 0xE18480;
132 				cd->prev_state = CI;
133 			}
134 			else if (utf == 0xE185A0 ||
135 				 (utf >= 0xE185A1 && utf <= 0xE185B5))  /* V */
136 			{
137 				if (cd->prev_state != E && cd->prev_state != CI)
138 					PROCESS_PRIOR_CVC();
139 
140 				cd->v = (utf == 0xE185A0) ? CVC_FILL
141 							  : utf - 0xE185A1;
142 				cd->prev_state = V;
143 			}
144 			else if ((utf >= 0xE186A8 && utf <= 0xE186BF) ||
145 				 (utf >= 0xE18780 && utf <= 0xE18782))  /* Cf */
146 			{
147 				if (cd->prev_state != E && cd->prev_state != V)
148 					PROCESS_PRIOR_CVC();
149 
150 				cd->cf = utf - ((utf >= 0xE18780) ? 0xE18766
151 								 : 0xE186A6);
152 				cd->prev_state = CF;
153 
154 				PROCESS_PRIOR_CVC();
155 			}
156 			else
157 			{
158 				PROCESS_PRIOR_CVC();
159 
160 				/* Let's assume the code is non-identical. */
161 				if ((obtail - ob) < 2)
162 				{
163 					errno = E2BIG;
164 					ret_val = (size_t)-1;
165 					break;
166 				}
167 				*ob++ = NON_IDENTICAL;
168 				*ob++ = NON_IDENTICAL;
169 				ret_val += 2;
170 			}
171 			ib += 3;
172 		}
173 		else  /* 11, 21, 26 & 31 bits codes won't be able to convert. */
174 		{
175 			short int offset;
176 
177 			PROCESS_PRIOR_CVC();
178 
179 			if ((*ib & 0xE0) == 0xC0)  /* 11 */
180 				offset = 2;
181 			else if ((*ib & 0xF0) == 0xE0)  /* 16 */
182 				offset = 3;
183 			else if ((*ib & 0xF8) == 0xF0)  /* 21 */
184 				offset = 4;
185 			else if ((*ib & 0xFC) == 0xF8)  /* 26 */
186 				offset = 5;
187 			else if ((*ib & 0xFE) == 0xFC)  /* 31 */
188 				offset = 6;
189 			else  /* Illegal sequence. */
190 				offset = 1;
191 
192 			if ((ibtail - ib) < offset)
193 			{
194 				errno = EINVAL;
195 				ret_val = (size_t)-1;
196 				break;
197 			}
198 			ib += offset;
199 
200 			/* Let's assume the code is non-identical. */
201 			offset = (offset > 2) ? 2 : 1;
202 			if ((obtail - ob) < offset)
203 			{
204 				errno = E2BIG;
205 				ret_val = (size_t)-1;
206 				break;
207 			}
208 			*ob++ = NON_IDENTICAL;
209 			if (offset > 1)
210 				*ob++ = NON_IDENTICAL;
211 			ret_val += offset;
212 		}
213 	}
214 
215 	*inbuf = (char*)ib;
216 	*inbufleft = ibtail - ib;
217 	*outbuf = (char*)ob;
218 	*outbufleft = obtail - ob;
219 
220 	return(ret_val);
221 }  /* end of size_t _icv_iconv(_conv_desc*, char**, size_t*, char**, size_t*).*/
222 
223 
224 /****  E C H O _ V O W E L  ****/
225 
226 void echo_vowel(char* c,  int* i)
227 {
228 	if (c[*i] == 'm')  /* _|_|- */
229 	{
230 		c[(*i)++] = 'l';	/* _|_ */
231 		c[(*i)++] = 'b';	/* |- */
232 	}
233 	else if (c[*i] == 'n')  /* _|_H */
234 	{
235 		c[(*i)++] = 'l';	/* _|_ */
236 		c[(*i)++] = 'c';	/* H */
237 	}
238 	else if (c[*i] == 'o')  /* _|_| */
239 	{
240 		c[(*i)++] = 'l';	/* _|_ */
241 		c[(*i)++] = '|';	/* | */
242 	}
243 	else if (c[*i] == 't')  /* T-| */
244 	{
245 		c[(*i)++] = 's';	/* T */
246 		c[(*i)++] = 'f';	/* -| */
247 	}
248 	else if (c[*i] == 'u')  /* T-|| */
249 	{
250 		c[(*i)++] = 's';	/* T */
251 		c[(*i)++] = 'g';	/* -|| */
252 	}
253 	else if (c[*i] == 'v')  /* T| */
254 	{
255 		c[(*i)++] = 's';	/* T */
256 		c[(*i)++] = '|';	/* | */
257 	}
258 	else if (c[*i] == '{')  /* _| */
259 	{
260 		c[(*i)++] = 'z';	/* __ */
261 		c[(*i)++] = '|';	/* | */
262 	}
263 	else
264 		(*i)++;
265 }  /* end of void echo_vowel(char*, int*). */
266 
267 
268 /****  E C H O _ C O N S O N A N T  ****/
269 
270 void echo_consonant(char* c,  int* i)
271 {
272 	if (c[*i] == 'C')  /* gs */
273 	{
274 		c[(*i)++] = 'A';	/* g */
275 		c[(*i)++] = 'U';	/* s */
276 	}
277 	else if (c[*i] == 'E')  /* nj */
278 	{
279 		c[(*i)++] = 'D';	/* n */
280 		c[(*i)++] = 'X';	/* j */
281 	}
282 	else if (c[*i] == 'F')  /* nh */
283 	{
284 		c[(*i)++] = 'D';	/* n */
285 		c[(*i)++] = '^';	/* h */
286 	}
287 	else if (c[*i] == 'J')  /* rg */
288 	{
289 		c[(*i)++] = 'I';	/* r */
290 		c[(*i)++] = 'A';	/* g */
291 	}
292 	else if (c[*i] == 'K')  /* rm */
293 	{
294 		c[(*i)++] = 'I';	/* r */
295 		c[(*i)++] = 'Q';	/* m */
296 	}
297 	else if (c[*i] == 'L')  /* rb */
298 	{
299 		c[(*i)++] = 'I';	/* r */
300 		c[(*i)++] = 'R';	/* b */
301 	}
302 	else if (c[*i] == 'M')  /* rs */
303 	{
304 		c[(*i)++] = 'I';	/* r */
305 		c[(*i)++] = 'U';	/* s */
306 	}
307 	else if (c[*i] == 'N')  /* rt */
308 	{
309 		c[(*i)++] = 'I';	/* r */
310 		c[(*i)++] = '\\';	/* t */
311 	}
312 	else if (c[*i] == 'O')  /* rp */
313 	{
314 		c[(*i)++] = 'I';	/* r */
315 		c[(*i)++] = ']';	/* p */
316 	}
317 	else if (c[*i] == 'P')  /* rh */
318 	{
319 		c[(*i)++] = 'I';	/* r */
320 		c[(*i)++] = '^';	/* h */
321 	}
322 	else if (c[*i] == 'T')  /* bs */
323 	{
324 		c[(*i)++] = 'R';	/* b */
325 		c[(*i)++] = 'U';	/* s */
326 	}
327 	else
328 		(*i)++;
329 }  /* end of void echo_consonant(char*, int*). */
330