xref: /freebsd/sys/libkern/iconv_xlat16.c (revision fdafd315ad0d0f28a11b9fb4476a9ab059c62b92)
1c4f02a89SMax Khon /*-
2*4d846d26SWarner Losh  * SPDX-License-Identifier: BSD-2-Clause
38a36da99SPedro F. Giffuni  *
46ac937c8SXin LI  * Copyright (c) 2003, 2005 Ryuichiro Imura
5c4f02a89SMax Khon  * All rights reserved.
6c4f02a89SMax Khon  *
7c4f02a89SMax Khon  * Redistribution and use in source and binary forms, with or without
8c4f02a89SMax Khon  * modification, are permitted provided that the following conditions
9c4f02a89SMax Khon  * are met:
10c4f02a89SMax Khon  * 1. Redistributions of source code must retain the above copyright
11c4f02a89SMax Khon  *    notice, this list of conditions and the following disclaimer.
12c4f02a89SMax Khon  * 2. Redistributions in binary form must reproduce the above copyright
13c4f02a89SMax Khon  *    notice, this list of conditions and the following disclaimer in the
14c4f02a89SMax Khon  *    documentation and/or other materials provided with the distribution.
15c4f02a89SMax Khon  *
16c4f02a89SMax Khon  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17c4f02a89SMax Khon  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18c4f02a89SMax Khon  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19c4f02a89SMax Khon  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20c4f02a89SMax Khon  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21c4f02a89SMax Khon  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22c4f02a89SMax Khon  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23c4f02a89SMax Khon  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24c4f02a89SMax Khon  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25c4f02a89SMax Khon  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26c4f02a89SMax Khon  * SUCH DAMAGE.
27c4f02a89SMax Khon  */
28c4f02a89SMax Khon 
29c4f02a89SMax Khon #include <sys/param.h>
30c4f02a89SMax Khon #include <sys/kernel.h>
31c4f02a89SMax Khon #include <sys/systm.h>
32c4f02a89SMax Khon #include <sys/malloc.h>
33c4f02a89SMax Khon #include <sys/iconv.h>
34c4f02a89SMax Khon 
35c4f02a89SMax Khon #include "iconv_converter_if.h"
36c4f02a89SMax Khon 
37c4f02a89SMax Khon /*
38c4f02a89SMax Khon  * "XLAT16" converter
39c4f02a89SMax Khon  */
40c4f02a89SMax Khon 
41c4f02a89SMax Khon #ifdef MODULE_DEPEND
42c4f02a89SMax Khon MODULE_DEPEND(iconv_xlat16, libiconv, 2, 2, 2);
43c4f02a89SMax Khon #endif
44c4f02a89SMax Khon 
456ac937c8SXin LI #define C2I1(c)	((c) & 0x8000 ? ((c) & 0xff) | 0x100 : (c) & 0xff)
466ac937c8SXin LI #define C2I2(c)	((c) & 0x8000 ? ((c) >> 8) & 0x7f : ((c) >> 8) & 0xff)
476ac937c8SXin LI 
48c4f02a89SMax Khon /*
49c4f02a89SMax Khon  * XLAT16 converter instance
50c4f02a89SMax Khon  */
51c4f02a89SMax Khon struct iconv_xlat16 {
52c4f02a89SMax Khon 	KOBJ_FIELDS;
53c4f02a89SMax Khon 	uint32_t *		d_table[0x200];
546ac937c8SXin LI 	void *			f_ctp;
556ac937c8SXin LI 	void *			t_ctp;
56c4f02a89SMax Khon 	struct iconv_cspair *	d_csp;
57c4f02a89SMax Khon };
58c4f02a89SMax Khon 
59c4f02a89SMax Khon static int
iconv_xlat16_open(struct iconv_converter_class * dcp,struct iconv_cspair * csp,struct iconv_cspair * cspf,void ** dpp)60c4f02a89SMax Khon iconv_xlat16_open(struct iconv_converter_class *dcp,
61c4f02a89SMax Khon 	struct iconv_cspair *csp, struct iconv_cspair *cspf, void **dpp)
62c4f02a89SMax Khon {
63c4f02a89SMax Khon 	struct iconv_xlat16 *dp;
64bf8ba9abSR. Imura 	uint32_t *headp, **idxp;
65c4f02a89SMax Khon 	int i;
66c4f02a89SMax Khon 
67c4f02a89SMax Khon 	dp = (struct iconv_xlat16 *)kobj_create((struct kobj_class*)dcp, M_ICONV, M_WAITOK);
68bf8ba9abSR. Imura 	headp = (uint32_t *)((caddr_t)csp->cp_data + sizeof(dp->d_table));
69bf8ba9abSR. Imura 	idxp = (uint32_t **)csp->cp_data;
70c4f02a89SMax Khon 	for (i = 0 ; i < 0x200 ; i++) {
71c4f02a89SMax Khon 		if (*idxp) {
72bf8ba9abSR. Imura 			dp->d_table[i] = headp;
73bf8ba9abSR. Imura 			headp += 0x80;
74c4f02a89SMax Khon 		} else {
75c4f02a89SMax Khon 			dp->d_table[i] = NULL;
76c4f02a89SMax Khon 		}
77c4f02a89SMax Khon 		idxp++;
78c4f02a89SMax Khon 	}
796ac937c8SXin LI 
806ac937c8SXin LI 	if (strcmp(csp->cp_to, KICONV_WCTYPE_NAME) != 0) {
816ac937c8SXin LI 		if (iconv_open(KICONV_WCTYPE_NAME, csp->cp_from, &dp->f_ctp) != 0)
826ac937c8SXin LI 			dp->f_ctp = NULL;
836ac937c8SXin LI 		if (iconv_open(KICONV_WCTYPE_NAME, csp->cp_to, &dp->t_ctp) != 0)
846ac937c8SXin LI 			dp->t_ctp = NULL;
856ac937c8SXin LI 	} else {
866ac937c8SXin LI 		dp->f_ctp = dp->t_ctp = dp;
876ac937c8SXin LI 	}
886ac937c8SXin LI 
89c4f02a89SMax Khon 	dp->d_csp = csp;
90c4f02a89SMax Khon 	csp->cp_refcount++;
91c4f02a89SMax Khon 	*dpp = (void*)dp;
92c4f02a89SMax Khon 	return (0);
93c4f02a89SMax Khon }
94c4f02a89SMax Khon 
95c4f02a89SMax Khon static int
iconv_xlat16_close(void * data)96c4f02a89SMax Khon iconv_xlat16_close(void *data)
97c4f02a89SMax Khon {
98c4f02a89SMax Khon 	struct iconv_xlat16 *dp = data;
99c4f02a89SMax Khon 
1006ac937c8SXin LI 	if (dp->f_ctp && dp->f_ctp != data)
1016ac937c8SXin LI 		iconv_close(dp->f_ctp);
1026ac937c8SXin LI 	if (dp->t_ctp && dp->t_ctp != data)
1036ac937c8SXin LI 		iconv_close(dp->t_ctp);
104c4f02a89SMax Khon 	dp->d_csp->cp_refcount--;
105c4f02a89SMax Khon 	kobj_delete((struct kobj*)data, M_ICONV);
106c4f02a89SMax Khon 	return (0);
107c4f02a89SMax Khon }
108c4f02a89SMax Khon 
109c4f02a89SMax Khon static int
iconv_xlat16_conv(void * d2p,const char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int convchar,int casetype)110c4f02a89SMax Khon iconv_xlat16_conv(void *d2p, const char **inbuf,
111c4f02a89SMax Khon 	size_t *inbytesleft, char **outbuf, size_t *outbytesleft,
112c4f02a89SMax Khon 	int convchar, int casetype)
113c4f02a89SMax Khon {
114c4f02a89SMax Khon 	struct iconv_xlat16 *dp = (struct iconv_xlat16*)d2p;
115c4f02a89SMax Khon 	const char *src;
116c4f02a89SMax Khon 	char *dst;
1170f4e4130SMax Khon 	int nullin, ret = 0;
118c4f02a89SMax Khon 	size_t in, on, ir, or, inlen;
119c4f02a89SMax Khon 	uint32_t code;
120c4f02a89SMax Khon 	u_char u, l;
1216ac937c8SXin LI 	uint16_t c1, c2, ctmp;
122c4f02a89SMax Khon 
123c4f02a89SMax Khon 	if (inbuf == NULL || *inbuf == NULL || outbuf == NULL || *outbuf == NULL)
124c4f02a89SMax Khon 		return (0);
125c4f02a89SMax Khon 	ir = in = *inbytesleft;
126c4f02a89SMax Khon 	or = on = *outbytesleft;
127c4f02a89SMax Khon 	src = *inbuf;
128c4f02a89SMax Khon 	dst = *outbuf;
129c4f02a89SMax Khon 
130c4f02a89SMax Khon 	while(ir > 0 && or > 0) {
131c4f02a89SMax Khon 		inlen = 0;
1326ac937c8SXin LI 		code = 0;
133c4f02a89SMax Khon 
134c4f02a89SMax Khon 		c1 = ir > 1 ? *(src+1) & 0xff : 0;
135c4f02a89SMax Khon 		c2 = *src & 0xff;
1366ac937c8SXin LI 		ctmp = 0;
137c4f02a89SMax Khon 
138c4f02a89SMax Khon 		c1 = c2 & 0x80 ? c1 | 0x100 : c1;
139c4f02a89SMax Khon 		c2 = c2 & 0x80 ? c2 & 0x7f : c2;
140c4f02a89SMax Khon 
1416ac937c8SXin LI 		if (ir > 1 && dp->d_table[c1] && dp->d_table[c1][c2]) {
142c4f02a89SMax Khon 			/*
143c4f02a89SMax Khon 			 * inbuf char is a double byte char
144c4f02a89SMax Khon 			 */
145c4f02a89SMax Khon 			inlen = 2;
1466ac937c8SXin LI 
1476ac937c8SXin LI 			/* toupper,tolower */
1486ac937c8SXin LI 			if (casetype == KICONV_FROM_LOWER && dp->f_ctp)
1496ac937c8SXin LI 				ctmp = towlower(((u_char)*src << 8) | (u_char)*(src + 1),
1506ac937c8SXin LI 				    dp->f_ctp);
1516ac937c8SXin LI 			else if (casetype == KICONV_FROM_UPPER && dp->f_ctp)
1526ac937c8SXin LI 				ctmp = towupper(((u_char)*src << 8) | (u_char)*(src + 1),
1536ac937c8SXin LI 				    dp->f_ctp);
1546ac937c8SXin LI 			if (ctmp) {
1556ac937c8SXin LI 				c1 = C2I1(ctmp);
1566ac937c8SXin LI 				c2 = C2I2(ctmp);
1576ac937c8SXin LI 			}
158c4f02a89SMax Khon 		}
159c4f02a89SMax Khon 
160c4f02a89SMax Khon 		if (inlen == 0) {
161c4f02a89SMax Khon 			c1 &= 0xff00;
162c4f02a89SMax Khon 			if (!dp->d_table[c1]) {
163c4f02a89SMax Khon 				ret = -1;
164c4f02a89SMax Khon 				break;
165c4f02a89SMax Khon 			}
166c4f02a89SMax Khon 			/*
167c4f02a89SMax Khon 			 * inbuf char is a single byte char
168c4f02a89SMax Khon 			 */
169c4f02a89SMax Khon 			inlen = 1;
1706ac937c8SXin LI 
1716ac937c8SXin LI 			if (casetype & (KICONV_FROM_LOWER|KICONV_FROM_UPPER))
1726ac937c8SXin LI 				code = dp->d_table[c1][c2];
1736ac937c8SXin LI 
1746ac937c8SXin LI 			if (casetype == KICONV_FROM_LOWER) {
1756ac937c8SXin LI 				if (dp->f_ctp)
1766ac937c8SXin LI 					ctmp = towlower((u_char)*src, dp->f_ctp);
1776ac937c8SXin LI 				else if (code & XLAT16_HAS_FROM_LOWER_CASE)
1786ac937c8SXin LI 					ctmp = (u_char)(code >> 16);
1796ac937c8SXin LI 			} else if (casetype == KICONV_FROM_UPPER) {
1806ac937c8SXin LI 				if (dp->f_ctp)
1816ac937c8SXin LI 					ctmp = towupper((u_char)*src, dp->f_ctp);
1826ac937c8SXin LI 				else if (code & XLAT16_HAS_FROM_UPPER_CASE)
1836ac937c8SXin LI 					ctmp = (u_char)(code >> 16);
1846ac937c8SXin LI 			}
1856ac937c8SXin LI 			if (ctmp) {
1866ac937c8SXin LI 				c1 = C2I1(ctmp << 8);
1876ac937c8SXin LI 				c2 = C2I2(ctmp << 8);
1886ac937c8SXin LI 			}
1896ac937c8SXin LI 		}
1906ac937c8SXin LI 
191c4f02a89SMax Khon 		code = dp->d_table[c1][c2];
192c4f02a89SMax Khon 		if (!code) {
193c4f02a89SMax Khon 			ret = -1;
194c4f02a89SMax Khon 			break;
195c4f02a89SMax Khon 		}
196c4f02a89SMax Khon 
1970f4e4130SMax Khon 		nullin = (code & XLAT16_ACCEPT_NULL_IN) ? 1 : 0;
1980f4e4130SMax Khon 		if (inlen == 1 && nullin) {
199c4f02a89SMax Khon 			/*
200c4f02a89SMax Khon 			 * XLAT16_ACCEPT_NULL_IN requires inbuf has 2byte
201c4f02a89SMax Khon 			 */
202c4f02a89SMax Khon 			ret = -1;
203c4f02a89SMax Khon 			break;
204c4f02a89SMax Khon 		}
205c4f02a89SMax Khon 
206c4f02a89SMax Khon 		/*
207c4f02a89SMax Khon 		 * now start translation
208c4f02a89SMax Khon 		 */
209c4f02a89SMax Khon 		u = (u_char)(code >> 8);
210c4f02a89SMax Khon 		l = (u_char)code;
211c4f02a89SMax Khon 
212c4f02a89SMax Khon #ifdef XLAT16_ACCEPT_3BYTE_CHR
213c4f02a89SMax Khon 		if (code & XLAT16_IS_3BYTE_CHR) {
214c4f02a89SMax Khon 			if (or < 3) {
215c4f02a89SMax Khon 				ret = -1;
216c4f02a89SMax Khon 				break;
217c4f02a89SMax Khon 			}
218c4f02a89SMax Khon 			*dst++ = u;
219c4f02a89SMax Khon 			*dst++ = l;
220c4f02a89SMax Khon 			*dst++ = (u_char)(code >> 16);
221c4f02a89SMax Khon 			or -= 3;
222c4f02a89SMax Khon 		} else
223c4f02a89SMax Khon #endif
224c4f02a89SMax Khon 		if (u || code & XLAT16_ACCEPT_NULL_OUT) {
225c4f02a89SMax Khon 			if (or < 2) {
226c4f02a89SMax Khon 				ret = -1;
227c4f02a89SMax Khon 				break;
228c4f02a89SMax Khon 			}
2296ac937c8SXin LI 
2306ac937c8SXin LI 			/* toupper,tolower */
2316ac937c8SXin LI 			if (casetype == KICONV_LOWER && dp->t_ctp) {
2326ac937c8SXin LI 				code = towlower((uint16_t)code, dp->t_ctp);
2336ac937c8SXin LI 				u = (u_char)(code >> 8);
2346ac937c8SXin LI 				l = (u_char)code;
2356ac937c8SXin LI 			}
2366ac937c8SXin LI 			if (casetype == KICONV_UPPER && dp->t_ctp) {
2376ac937c8SXin LI 				code = towupper((uint16_t)code, dp->t_ctp);
2386ac937c8SXin LI 				u = (u_char)(code >> 8);
2396ac937c8SXin LI 				l = (u_char)code;
2406ac937c8SXin LI 			}
2416ac937c8SXin LI 
242c4f02a89SMax Khon 			*dst++ = u;
243c4f02a89SMax Khon 			*dst++ = l;
244c4f02a89SMax Khon 			or -= 2;
245c4f02a89SMax Khon 		} else {
2466ac937c8SXin LI 			/* toupper,tolower */
2476ac937c8SXin LI 			if (casetype == KICONV_LOWER) {
2486ac937c8SXin LI 				if (dp->t_ctp)
2496ac937c8SXin LI 					l = (u_char)towlower(l, dp->t_ctp);
2506ac937c8SXin LI 				else if (code & XLAT16_HAS_LOWER_CASE)
2516ac937c8SXin LI 					l = (u_char)(code >> 16);
2526ac937c8SXin LI 			}
2536ac937c8SXin LI 			if (casetype == KICONV_UPPER) {
2546ac937c8SXin LI 				if (dp->t_ctp)
2556ac937c8SXin LI 					l = (u_char)towupper(l, dp->t_ctp);
2566ac937c8SXin LI 				else if (code & XLAT16_HAS_UPPER_CASE)
2576ac937c8SXin LI 					l = (u_char)(code >> 16);
2586ac937c8SXin LI 			}
2596ac937c8SXin LI 
260c4f02a89SMax Khon 			*dst++ = l;
261c4f02a89SMax Khon 			or--;
262c4f02a89SMax Khon 		}
263c4f02a89SMax Khon 
264c4f02a89SMax Khon 		if (inlen == 2) {
265c4f02a89SMax Khon 			/*
266c4f02a89SMax Khon 			 * there is a case that inbuf char is a single
267c4f02a89SMax Khon 			 * byte char while inlen == 2
268c4f02a89SMax Khon 			 */
269e099b90bSPedro F. Giffuni 			if ((u_char)*(src+1) == '\0' && !nullin ) {
270c4f02a89SMax Khon 				src++;
271c4f02a89SMax Khon 				ir--;
272c4f02a89SMax Khon 			} else {
273c4f02a89SMax Khon 				src += 2;
274c4f02a89SMax Khon 				ir -= 2;
275c4f02a89SMax Khon 			}
276c4f02a89SMax Khon 		} else {
277c4f02a89SMax Khon 			src++;
278c4f02a89SMax Khon 			ir--;
279c4f02a89SMax Khon 		}
280c4f02a89SMax Khon 
281c4f02a89SMax Khon 		if (convchar == 1)
282c4f02a89SMax Khon 			break;
283c4f02a89SMax Khon 	}
284c4f02a89SMax Khon 
285c4f02a89SMax Khon 	*inbuf += in - ir;
286c4f02a89SMax Khon 	*outbuf += on - or;
287c4f02a89SMax Khon 	*inbytesleft -= in - ir;
288c4f02a89SMax Khon 	*outbytesleft -= on - or;
289c4f02a89SMax Khon 	return (ret);
290c4f02a89SMax Khon }
291c4f02a89SMax Khon 
292c4f02a89SMax Khon static const char *
iconv_xlat16_name(struct iconv_converter_class * dcp)293c4f02a89SMax Khon iconv_xlat16_name(struct iconv_converter_class *dcp)
294c4f02a89SMax Khon {
295c4f02a89SMax Khon 	return ("xlat16");
296c4f02a89SMax Khon }
297c4f02a89SMax Khon 
2986ac937c8SXin LI static int
iconv_xlat16_tolower(void * d2p,int c)299484820d4SConrad Meyer iconv_xlat16_tolower(void *d2p, int c)
3006ac937c8SXin LI {
3016ac937c8SXin LI         struct iconv_xlat16 *dp = (struct iconv_xlat16*)d2p;
302484820d4SConrad Meyer 	int c1, c2, out;
3036ac937c8SXin LI 
3046ac937c8SXin LI 	if (c < 0x100) {
3056ac937c8SXin LI 		c1 = C2I1(c << 8);
3066ac937c8SXin LI 		c2 = C2I2(c << 8);
3076ac937c8SXin LI 	} else if (c < 0x10000) {
3086ac937c8SXin LI                 c1 = C2I1(c);
3096ac937c8SXin LI                 c2 = C2I2(c);
3106ac937c8SXin LI 	} else
3116ac937c8SXin LI 		return (c);
3126ac937c8SXin LI 
3136ac937c8SXin LI 	if (dp->d_table[c1] && dp->d_table[c1][c2] & XLAT16_HAS_LOWER_CASE) {
3146ac937c8SXin LI 		/*return (int)(dp->d_table[c1][c2] & 0xffff);*/
3156ac937c8SXin LI 		out = dp->d_table[c1][c2] & 0xffff;
3166ac937c8SXin LI 		if ((out & 0xff) == 0)
3176ac937c8SXin LI 			out = (out >> 8) & 0xff;
3186ac937c8SXin LI 		return (out);
3196ac937c8SXin LI 	} else
3206ac937c8SXin LI 		return (c);
3216ac937c8SXin LI }
3226ac937c8SXin LI 
3236ac937c8SXin LI static int
iconv_xlat16_toupper(void * d2p,int c)324484820d4SConrad Meyer iconv_xlat16_toupper(void *d2p, int c)
3256ac937c8SXin LI {
3266ac937c8SXin LI         struct iconv_xlat16 *dp = (struct iconv_xlat16*)d2p;
327484820d4SConrad Meyer 	int c1, c2, out;
3286ac937c8SXin LI 
3296ac937c8SXin LI 	if (c < 0x100) {
3306ac937c8SXin LI 		c1 = C2I1(c << 8);
3316ac937c8SXin LI 		c2 = C2I2(c << 8);
3326ac937c8SXin LI 	} else if (c < 0x10000) {
3336ac937c8SXin LI                 c1 = C2I1(c);
3346ac937c8SXin LI                 c2 = C2I2(c);
3356ac937c8SXin LI 	} else
3366ac937c8SXin LI 		return (c);
3376ac937c8SXin LI 
3386ac937c8SXin LI 	if (dp->d_table[c1] && dp->d_table[c1][c2] & XLAT16_HAS_UPPER_CASE) {
3396ac937c8SXin LI 		out = dp->d_table[c1][c2] & 0xffff;
3406ac937c8SXin LI 		if ((out & 0xff) == 0)
3416ac937c8SXin LI 			out = (out >> 8) & 0xff;
3426ac937c8SXin LI 		return (out);
3436ac937c8SXin LI 	} else
3446ac937c8SXin LI 		return (c);
3456ac937c8SXin LI }
3466ac937c8SXin LI 
347c4f02a89SMax Khon static kobj_method_t iconv_xlat16_methods[] = {
348c4f02a89SMax Khon 	KOBJMETHOD(iconv_converter_open,	iconv_xlat16_open),
349c4f02a89SMax Khon 	KOBJMETHOD(iconv_converter_close,	iconv_xlat16_close),
350c4f02a89SMax Khon 	KOBJMETHOD(iconv_converter_conv,	iconv_xlat16_conv),
351c4f02a89SMax Khon #if 0
352c4f02a89SMax Khon 	KOBJMETHOD(iconv_converter_init,	iconv_xlat16_init),
353c4f02a89SMax Khon 	KOBJMETHOD(iconv_converter_done,	iconv_xlat16_done),
354c4f02a89SMax Khon #endif
355c4f02a89SMax Khon 	KOBJMETHOD(iconv_converter_name,	iconv_xlat16_name),
3566ac937c8SXin LI 	KOBJMETHOD(iconv_converter_tolower,	iconv_xlat16_tolower),
3576ac937c8SXin LI 	KOBJMETHOD(iconv_converter_toupper,	iconv_xlat16_toupper),
358c4f02a89SMax Khon 	{0, 0}
359c4f02a89SMax Khon };
360c4f02a89SMax Khon 
361c4f02a89SMax Khon KICONV_CONVERTER(xlat16, sizeof(struct iconv_xlat16));
362