xref: /freebsd/sys/libkern/iconv_xlat16.c (revision 484820d4422cd808b39ef1f3c1f0ea2b7ac9e469)
1c4f02a89SMax Khon /*-
26ac937c8SXin LI  * Copyright (c) 2003, 2005 Ryuichiro Imura
3c4f02a89SMax Khon  * All rights reserved.
4c4f02a89SMax Khon  *
5c4f02a89SMax Khon  * Redistribution and use in source and binary forms, with or without
6c4f02a89SMax Khon  * modification, are permitted provided that the following conditions
7c4f02a89SMax Khon  * are met:
8c4f02a89SMax Khon  * 1. Redistributions of source code must retain the above copyright
9c4f02a89SMax Khon  *    notice, this list of conditions and the following disclaimer.
10c4f02a89SMax Khon  * 2. Redistributions in binary form must reproduce the above copyright
11c4f02a89SMax Khon  *    notice, this list of conditions and the following disclaimer in the
12c4f02a89SMax Khon  *    documentation and/or other materials provided with the distribution.
13c4f02a89SMax Khon  *
14c4f02a89SMax Khon  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15c4f02a89SMax Khon  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16c4f02a89SMax Khon  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17c4f02a89SMax Khon  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18c4f02a89SMax Khon  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19c4f02a89SMax Khon  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20c4f02a89SMax Khon  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21c4f02a89SMax Khon  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22c4f02a89SMax Khon  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23c4f02a89SMax Khon  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24c4f02a89SMax Khon  * SUCH DAMAGE.
25c4f02a89SMax Khon  */
26c4f02a89SMax Khon 
27c4f02a89SMax Khon #include <sys/cdefs.h>
28c4f02a89SMax Khon __FBSDID("$FreeBSD$");
29c4f02a89SMax Khon 
30c4f02a89SMax Khon #include <sys/param.h>
31c4f02a89SMax Khon #include <sys/kernel.h>
32c4f02a89SMax Khon #include <sys/systm.h>
33c4f02a89SMax Khon #include <sys/malloc.h>
34c4f02a89SMax Khon #include <sys/iconv.h>
35c4f02a89SMax Khon 
36c4f02a89SMax Khon #include "iconv_converter_if.h"
37c4f02a89SMax Khon 
38c4f02a89SMax Khon /*
39c4f02a89SMax Khon  * "XLAT16" converter
40c4f02a89SMax Khon  */
41c4f02a89SMax Khon 
42c4f02a89SMax Khon #ifdef MODULE_DEPEND
43c4f02a89SMax Khon MODULE_DEPEND(iconv_xlat16, libiconv, 2, 2, 2);
44c4f02a89SMax Khon #endif
45c4f02a89SMax Khon 
466ac937c8SXin LI #define C2I1(c)	((c) & 0x8000 ? ((c) & 0xff) | 0x100 : (c) & 0xff)
476ac937c8SXin LI #define C2I2(c)	((c) & 0x8000 ? ((c) >> 8) & 0x7f : ((c) >> 8) & 0xff)
486ac937c8SXin LI 
49c4f02a89SMax Khon /*
50c4f02a89SMax Khon  * XLAT16 converter instance
51c4f02a89SMax Khon  */
52c4f02a89SMax Khon struct iconv_xlat16 {
53c4f02a89SMax Khon 	KOBJ_FIELDS;
54c4f02a89SMax Khon 	uint32_t *		d_table[0x200];
556ac937c8SXin LI 	void *			f_ctp;
566ac937c8SXin LI 	void *			t_ctp;
57c4f02a89SMax Khon 	struct iconv_cspair *	d_csp;
58c4f02a89SMax Khon };
59c4f02a89SMax Khon 
60c4f02a89SMax Khon static int
61c4f02a89SMax Khon iconv_xlat16_open(struct iconv_converter_class *dcp,
62c4f02a89SMax Khon 	struct iconv_cspair *csp, struct iconv_cspair *cspf, void **dpp)
63c4f02a89SMax Khon {
64c4f02a89SMax Khon 	struct iconv_xlat16 *dp;
65bf8ba9abSR. Imura 	uint32_t *headp, **idxp;
66c4f02a89SMax Khon 	int i;
67c4f02a89SMax Khon 
68c4f02a89SMax Khon 	dp = (struct iconv_xlat16 *)kobj_create((struct kobj_class*)dcp, M_ICONV, M_WAITOK);
69bf8ba9abSR. Imura 	headp = (uint32_t *)((caddr_t)csp->cp_data + sizeof(dp->d_table));
70bf8ba9abSR. Imura 	idxp = (uint32_t **)csp->cp_data;
71c4f02a89SMax Khon 	for (i = 0 ; i < 0x200 ; i++) {
72c4f02a89SMax Khon 		if (*idxp) {
73bf8ba9abSR. Imura 			dp->d_table[i] = headp;
74bf8ba9abSR. Imura 			headp += 0x80;
75c4f02a89SMax Khon 		} else {
76c4f02a89SMax Khon 			dp->d_table[i] = NULL;
77c4f02a89SMax Khon 		}
78c4f02a89SMax Khon 		idxp++;
79c4f02a89SMax Khon 	}
806ac937c8SXin LI 
816ac937c8SXin LI 	if (strcmp(csp->cp_to, KICONV_WCTYPE_NAME) != 0) {
826ac937c8SXin LI 		if (iconv_open(KICONV_WCTYPE_NAME, csp->cp_from, &dp->f_ctp) != 0)
836ac937c8SXin LI 			dp->f_ctp = NULL;
846ac937c8SXin LI 		if (iconv_open(KICONV_WCTYPE_NAME, csp->cp_to, &dp->t_ctp) != 0)
856ac937c8SXin LI 			dp->t_ctp = NULL;
866ac937c8SXin LI 	} else {
876ac937c8SXin LI 		dp->f_ctp = dp->t_ctp = dp;
886ac937c8SXin LI 	}
896ac937c8SXin LI 
90c4f02a89SMax Khon 	dp->d_csp = csp;
91c4f02a89SMax Khon 	csp->cp_refcount++;
92c4f02a89SMax Khon 	*dpp = (void*)dp;
93c4f02a89SMax Khon 	return (0);
94c4f02a89SMax Khon }
95c4f02a89SMax Khon 
96c4f02a89SMax Khon static int
97c4f02a89SMax Khon iconv_xlat16_close(void *data)
98c4f02a89SMax Khon {
99c4f02a89SMax Khon 	struct iconv_xlat16 *dp = data;
100c4f02a89SMax Khon 
1016ac937c8SXin LI 	if (dp->f_ctp && dp->f_ctp != data)
1026ac937c8SXin LI 		iconv_close(dp->f_ctp);
1036ac937c8SXin LI 	if (dp->t_ctp && dp->t_ctp != data)
1046ac937c8SXin LI 		iconv_close(dp->t_ctp);
105c4f02a89SMax Khon 	dp->d_csp->cp_refcount--;
106c4f02a89SMax Khon 	kobj_delete((struct kobj*)data, M_ICONV);
107c4f02a89SMax Khon 	return (0);
108c4f02a89SMax Khon }
109c4f02a89SMax Khon 
110c4f02a89SMax Khon static int
111c4f02a89SMax Khon iconv_xlat16_conv(void *d2p, const char **inbuf,
112c4f02a89SMax Khon 	size_t *inbytesleft, char **outbuf, size_t *outbytesleft,
113c4f02a89SMax Khon 	int convchar, int casetype)
114c4f02a89SMax Khon {
115c4f02a89SMax Khon 	struct iconv_xlat16 *dp = (struct iconv_xlat16*)d2p;
116c4f02a89SMax Khon 	const char *src;
117c4f02a89SMax Khon 	char *dst;
1180f4e4130SMax Khon 	int nullin, ret = 0;
119c4f02a89SMax Khon 	size_t in, on, ir, or, inlen;
120c4f02a89SMax Khon 	uint32_t code;
121c4f02a89SMax Khon 	u_char u, l;
1226ac937c8SXin LI 	uint16_t c1, c2, ctmp;
123c4f02a89SMax Khon 
124c4f02a89SMax Khon 	if (inbuf == NULL || *inbuf == NULL || outbuf == NULL || *outbuf == NULL)
125c4f02a89SMax Khon 		return (0);
126c4f02a89SMax Khon 	ir = in = *inbytesleft;
127c4f02a89SMax Khon 	or = on = *outbytesleft;
128c4f02a89SMax Khon 	src = *inbuf;
129c4f02a89SMax Khon 	dst = *outbuf;
130c4f02a89SMax Khon 
131c4f02a89SMax Khon 	while(ir > 0 && or > 0) {
132c4f02a89SMax Khon 
133c4f02a89SMax Khon 		inlen = 0;
1346ac937c8SXin LI 		code = 0;
135c4f02a89SMax Khon 
136c4f02a89SMax Khon 		c1 = ir > 1 ? *(src+1) & 0xff : 0;
137c4f02a89SMax Khon 		c2 = *src & 0xff;
1386ac937c8SXin LI 		ctmp = 0;
139c4f02a89SMax Khon 
140c4f02a89SMax Khon 		c1 = c2 & 0x80 ? c1 | 0x100 : c1;
141c4f02a89SMax Khon 		c2 = c2 & 0x80 ? c2 & 0x7f : c2;
142c4f02a89SMax Khon 
1436ac937c8SXin LI 		if (ir > 1 && dp->d_table[c1] && dp->d_table[c1][c2]) {
144c4f02a89SMax Khon 			/*
145c4f02a89SMax Khon 			 * inbuf char is a double byte char
146c4f02a89SMax Khon 			 */
147c4f02a89SMax Khon 			inlen = 2;
1486ac937c8SXin LI 
1496ac937c8SXin LI 			/* toupper,tolower */
1506ac937c8SXin LI 			if (casetype == KICONV_FROM_LOWER && dp->f_ctp)
1516ac937c8SXin LI 				ctmp = towlower(((u_char)*src << 8) | (u_char)*(src + 1),
1526ac937c8SXin LI 				    dp->f_ctp);
1536ac937c8SXin LI 			else if (casetype == KICONV_FROM_UPPER && dp->f_ctp)
1546ac937c8SXin LI 				ctmp = towupper(((u_char)*src << 8) | (u_char)*(src + 1),
1556ac937c8SXin LI 				    dp->f_ctp);
1566ac937c8SXin LI 			if (ctmp) {
1576ac937c8SXin LI 				c1 = C2I1(ctmp);
1586ac937c8SXin LI 				c2 = C2I2(ctmp);
1596ac937c8SXin LI 			}
160c4f02a89SMax Khon 		}
161c4f02a89SMax Khon 
162c4f02a89SMax Khon 		if (inlen == 0) {
163c4f02a89SMax Khon 			c1 &= 0xff00;
164c4f02a89SMax Khon 			if (!dp->d_table[c1]) {
165c4f02a89SMax Khon 				ret = -1;
166c4f02a89SMax Khon 				break;
167c4f02a89SMax Khon 			}
168c4f02a89SMax Khon 			/*
169c4f02a89SMax Khon 			 * inbuf char is a single byte char
170c4f02a89SMax Khon 			 */
171c4f02a89SMax Khon 			inlen = 1;
1726ac937c8SXin LI 
1736ac937c8SXin LI 			if (casetype & (KICONV_FROM_LOWER|KICONV_FROM_UPPER))
1746ac937c8SXin LI 				code = dp->d_table[c1][c2];
1756ac937c8SXin LI 
1766ac937c8SXin LI 			if (casetype == KICONV_FROM_LOWER) {
1776ac937c8SXin LI 				if (dp->f_ctp)
1786ac937c8SXin LI 					ctmp = towlower((u_char)*src, dp->f_ctp);
1796ac937c8SXin LI 				else if (code & XLAT16_HAS_FROM_LOWER_CASE)
1806ac937c8SXin LI 					ctmp = (u_char)(code >> 16);
1816ac937c8SXin LI 			} else if (casetype == KICONV_FROM_UPPER) {
1826ac937c8SXin LI 				if (dp->f_ctp)
1836ac937c8SXin LI 					ctmp = towupper((u_char)*src, dp->f_ctp);
1846ac937c8SXin LI 				else if (code & XLAT16_HAS_FROM_UPPER_CASE)
1856ac937c8SXin LI 					ctmp = (u_char)(code >> 16);
1866ac937c8SXin LI 			}
1876ac937c8SXin LI 			if (ctmp) {
1886ac937c8SXin LI 				c1 = C2I1(ctmp << 8);
1896ac937c8SXin LI 				c2 = C2I2(ctmp << 8);
1906ac937c8SXin LI 			}
1916ac937c8SXin LI 		}
1926ac937c8SXin LI 
193c4f02a89SMax Khon 		code = dp->d_table[c1][c2];
194c4f02a89SMax Khon 		if (!code) {
195c4f02a89SMax Khon 			ret = -1;
196c4f02a89SMax Khon 			break;
197c4f02a89SMax Khon 		}
198c4f02a89SMax Khon 
1990f4e4130SMax Khon 		nullin = (code & XLAT16_ACCEPT_NULL_IN) ? 1 : 0;
2000f4e4130SMax Khon 		if (inlen == 1 && nullin) {
201c4f02a89SMax Khon 			/*
202c4f02a89SMax Khon 			 * XLAT16_ACCEPT_NULL_IN requires inbuf has 2byte
203c4f02a89SMax Khon 			 */
204c4f02a89SMax Khon 			ret = -1;
205c4f02a89SMax Khon 			break;
206c4f02a89SMax Khon 		}
207c4f02a89SMax Khon 
208c4f02a89SMax Khon 		/*
209c4f02a89SMax Khon 		 * now start translation
210c4f02a89SMax Khon 		 */
211c4f02a89SMax Khon 		u = (u_char)(code >> 8);
212c4f02a89SMax Khon 		l = (u_char)code;
213c4f02a89SMax Khon 
214c4f02a89SMax Khon #ifdef XLAT16_ACCEPT_3BYTE_CHR
215c4f02a89SMax Khon 		if (code & XLAT16_IS_3BYTE_CHR) {
216c4f02a89SMax Khon 			if (or < 3) {
217c4f02a89SMax Khon 				ret = -1;
218c4f02a89SMax Khon 				break;
219c4f02a89SMax Khon 			}
220c4f02a89SMax Khon 			*dst++ = u;
221c4f02a89SMax Khon 			*dst++ = l;
222c4f02a89SMax Khon 			*dst++ = (u_char)(code >> 16);
223c4f02a89SMax Khon 			or -= 3;
224c4f02a89SMax Khon 		} else
225c4f02a89SMax Khon #endif
226c4f02a89SMax Khon 		if (u || code & XLAT16_ACCEPT_NULL_OUT) {
227c4f02a89SMax Khon 			if (or < 2) {
228c4f02a89SMax Khon 				ret = -1;
229c4f02a89SMax Khon 				break;
230c4f02a89SMax Khon 			}
2316ac937c8SXin LI 
2326ac937c8SXin LI 			/* toupper,tolower */
2336ac937c8SXin LI 			if (casetype == KICONV_LOWER && dp->t_ctp) {
2346ac937c8SXin LI 				code = towlower((uint16_t)code, dp->t_ctp);
2356ac937c8SXin LI 				u = (u_char)(code >> 8);
2366ac937c8SXin LI 				l = (u_char)code;
2376ac937c8SXin LI 			}
2386ac937c8SXin LI 			if (casetype == KICONV_UPPER && dp->t_ctp) {
2396ac937c8SXin LI 				code = towupper((uint16_t)code, dp->t_ctp);
2406ac937c8SXin LI 				u = (u_char)(code >> 8);
2416ac937c8SXin LI 				l = (u_char)code;
2426ac937c8SXin LI 			}
2436ac937c8SXin LI 
244c4f02a89SMax Khon 			*dst++ = u;
245c4f02a89SMax Khon 			*dst++ = l;
246c4f02a89SMax Khon 			or -= 2;
247c4f02a89SMax Khon 		} else {
2486ac937c8SXin LI 			/* toupper,tolower */
2496ac937c8SXin LI 			if (casetype == KICONV_LOWER) {
2506ac937c8SXin LI 				if (dp->t_ctp)
2516ac937c8SXin LI 					l = (u_char)towlower(l, dp->t_ctp);
2526ac937c8SXin LI 				else if (code & XLAT16_HAS_LOWER_CASE)
2536ac937c8SXin LI 					l = (u_char)(code >> 16);
2546ac937c8SXin LI 			}
2556ac937c8SXin LI 			if (casetype == KICONV_UPPER) {
2566ac937c8SXin LI 				if (dp->t_ctp)
2576ac937c8SXin LI 					l = (u_char)towupper(l, dp->t_ctp);
2586ac937c8SXin LI 				else if (code & XLAT16_HAS_UPPER_CASE)
2596ac937c8SXin LI 					l = (u_char)(code >> 16);
2606ac937c8SXin LI 			}
2616ac937c8SXin LI 
262c4f02a89SMax Khon 			*dst++ = l;
263c4f02a89SMax Khon 			or--;
264c4f02a89SMax Khon 		}
265c4f02a89SMax Khon 
266c4f02a89SMax Khon 		if (inlen == 2) {
267c4f02a89SMax Khon 			/*
268c4f02a89SMax Khon 			 * there is a case that inbuf char is a single
269c4f02a89SMax Khon 			 * byte char while inlen == 2
270c4f02a89SMax Khon 			 */
2710f4e4130SMax Khon 			if ((u_char)*(src+1) == 0 && !nullin ) {
272c4f02a89SMax Khon 				src++;
273c4f02a89SMax Khon 				ir--;
274c4f02a89SMax Khon 			} else {
275c4f02a89SMax Khon 				src += 2;
276c4f02a89SMax Khon 				ir -= 2;
277c4f02a89SMax Khon 			}
278c4f02a89SMax Khon 		} else {
279c4f02a89SMax Khon 			src++;
280c4f02a89SMax Khon 			ir--;
281c4f02a89SMax Khon 		}
282c4f02a89SMax Khon 
283c4f02a89SMax Khon 		if (convchar == 1)
284c4f02a89SMax Khon 			break;
285c4f02a89SMax Khon 	}
286c4f02a89SMax Khon 
287c4f02a89SMax Khon 	*inbuf += in - ir;
288c4f02a89SMax Khon 	*outbuf += on - or;
289c4f02a89SMax Khon 	*inbytesleft -= in - ir;
290c4f02a89SMax Khon 	*outbytesleft -= on - or;
291c4f02a89SMax Khon 	return (ret);
292c4f02a89SMax Khon }
293c4f02a89SMax Khon 
294c4f02a89SMax Khon static const char *
295c4f02a89SMax Khon iconv_xlat16_name(struct iconv_converter_class *dcp)
296c4f02a89SMax Khon {
297c4f02a89SMax Khon 	return ("xlat16");
298c4f02a89SMax Khon }
299c4f02a89SMax Khon 
3006ac937c8SXin LI static int
301*484820d4SConrad Meyer iconv_xlat16_tolower(void *d2p, int c)
3026ac937c8SXin LI {
3036ac937c8SXin LI         struct iconv_xlat16 *dp = (struct iconv_xlat16*)d2p;
304*484820d4SConrad Meyer 	int c1, c2, out;
3056ac937c8SXin LI 
3066ac937c8SXin LI 	if (c < 0x100) {
3076ac937c8SXin LI 		c1 = C2I1(c << 8);
3086ac937c8SXin LI 		c2 = C2I2(c << 8);
3096ac937c8SXin LI 	} else if (c < 0x10000) {
3106ac937c8SXin LI                 c1 = C2I1(c);
3116ac937c8SXin LI                 c2 = C2I2(c);
3126ac937c8SXin LI 	} else
3136ac937c8SXin LI 		return (c);
3146ac937c8SXin LI 
3156ac937c8SXin LI 	if (dp->d_table[c1] && dp->d_table[c1][c2] & XLAT16_HAS_LOWER_CASE) {
3166ac937c8SXin LI 		/*return (int)(dp->d_table[c1][c2] & 0xffff);*/
3176ac937c8SXin LI 		out = dp->d_table[c1][c2] & 0xffff;
3186ac937c8SXin LI 		if ((out & 0xff) == 0)
3196ac937c8SXin LI 			out = (out >> 8) & 0xff;
3206ac937c8SXin LI 		return (out);
3216ac937c8SXin LI 	} else
3226ac937c8SXin LI 		return (c);
3236ac937c8SXin LI }
3246ac937c8SXin LI 
3256ac937c8SXin LI static int
326*484820d4SConrad Meyer iconv_xlat16_toupper(void *d2p, int c)
3276ac937c8SXin LI {
3286ac937c8SXin LI         struct iconv_xlat16 *dp = (struct iconv_xlat16*)d2p;
329*484820d4SConrad Meyer 	int c1, c2, out;
3306ac937c8SXin LI 
3316ac937c8SXin LI 	if (c < 0x100) {
3326ac937c8SXin LI 		c1 = C2I1(c << 8);
3336ac937c8SXin LI 		c2 = C2I2(c << 8);
3346ac937c8SXin LI 	} else if (c < 0x10000) {
3356ac937c8SXin LI                 c1 = C2I1(c);
3366ac937c8SXin LI                 c2 = C2I2(c);
3376ac937c8SXin LI 	} else
3386ac937c8SXin LI 		return (c);
3396ac937c8SXin LI 
3406ac937c8SXin LI 	if (dp->d_table[c1] && dp->d_table[c1][c2] & XLAT16_HAS_UPPER_CASE) {
3416ac937c8SXin LI 		out = dp->d_table[c1][c2] & 0xffff;
3426ac937c8SXin LI 		if ((out & 0xff) == 0)
3436ac937c8SXin LI 			out = (out >> 8) & 0xff;
3446ac937c8SXin LI 		return (out);
3456ac937c8SXin LI 	} else
3466ac937c8SXin LI 		return (c);
3476ac937c8SXin LI }
3486ac937c8SXin LI 
349c4f02a89SMax Khon static kobj_method_t iconv_xlat16_methods[] = {
350c4f02a89SMax Khon 	KOBJMETHOD(iconv_converter_open,	iconv_xlat16_open),
351c4f02a89SMax Khon 	KOBJMETHOD(iconv_converter_close,	iconv_xlat16_close),
352c4f02a89SMax Khon 	KOBJMETHOD(iconv_converter_conv,	iconv_xlat16_conv),
353c4f02a89SMax Khon #if 0
354c4f02a89SMax Khon 	KOBJMETHOD(iconv_converter_init,	iconv_xlat16_init),
355c4f02a89SMax Khon 	KOBJMETHOD(iconv_converter_done,	iconv_xlat16_done),
356c4f02a89SMax Khon #endif
357c4f02a89SMax Khon 	KOBJMETHOD(iconv_converter_name,	iconv_xlat16_name),
3586ac937c8SXin LI 	KOBJMETHOD(iconv_converter_tolower,	iconv_xlat16_tolower),
3596ac937c8SXin LI 	KOBJMETHOD(iconv_converter_toupper,	iconv_xlat16_toupper),
360c4f02a89SMax Khon 	{0, 0}
361c4f02a89SMax Khon };
362c4f02a89SMax Khon 
363c4f02a89SMax Khon KICONV_CONVERTER(xlat16, sizeof(struct iconv_xlat16));
364