xref: /freebsd/sys/libkern/iconv_xlat16.c (revision 8a36da99deb0e19363ec04e4d3facd869c1028f5)
1c4f02a89SMax Khon /*-
2*8a36da99SPedro F. Giffuni  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3*8a36da99SPedro F. Giffuni  *
46ac937c8SXin LI  * Copyright (c) 2003, 2005 Ryuichiro Imura
5c4f02a89SMax Khon  * All rights reserved.
6c4f02a89SMax Khon  *
7c4f02a89SMax Khon  * Redistribution and use in source and binary forms, with or without
8c4f02a89SMax Khon  * modification, are permitted provided that the following conditions
9c4f02a89SMax Khon  * are met:
10c4f02a89SMax Khon  * 1. Redistributions of source code must retain the above copyright
11c4f02a89SMax Khon  *    notice, this list of conditions and the following disclaimer.
12c4f02a89SMax Khon  * 2. Redistributions in binary form must reproduce the above copyright
13c4f02a89SMax Khon  *    notice, this list of conditions and the following disclaimer in the
14c4f02a89SMax Khon  *    documentation and/or other materials provided with the distribution.
15c4f02a89SMax Khon  *
16c4f02a89SMax Khon  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17c4f02a89SMax Khon  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18c4f02a89SMax Khon  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19c4f02a89SMax Khon  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20c4f02a89SMax Khon  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21c4f02a89SMax Khon  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22c4f02a89SMax Khon  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23c4f02a89SMax Khon  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24c4f02a89SMax Khon  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25c4f02a89SMax Khon  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26c4f02a89SMax Khon  * SUCH DAMAGE.
27c4f02a89SMax Khon  */
28c4f02a89SMax Khon 
29c4f02a89SMax Khon #include <sys/cdefs.h>
30c4f02a89SMax Khon __FBSDID("$FreeBSD$");
31c4f02a89SMax Khon 
32c4f02a89SMax Khon #include <sys/param.h>
33c4f02a89SMax Khon #include <sys/kernel.h>
34c4f02a89SMax Khon #include <sys/systm.h>
35c4f02a89SMax Khon #include <sys/malloc.h>
36c4f02a89SMax Khon #include <sys/iconv.h>
37c4f02a89SMax Khon 
38c4f02a89SMax Khon #include "iconv_converter_if.h"
39c4f02a89SMax Khon 
40c4f02a89SMax Khon /*
41c4f02a89SMax Khon  * "XLAT16" converter
42c4f02a89SMax Khon  */
43c4f02a89SMax Khon 
44c4f02a89SMax Khon #ifdef MODULE_DEPEND
45c4f02a89SMax Khon MODULE_DEPEND(iconv_xlat16, libiconv, 2, 2, 2);
46c4f02a89SMax Khon #endif
47c4f02a89SMax Khon 
486ac937c8SXin LI #define C2I1(c)	((c) & 0x8000 ? ((c) & 0xff) | 0x100 : (c) & 0xff)
496ac937c8SXin LI #define C2I2(c)	((c) & 0x8000 ? ((c) >> 8) & 0x7f : ((c) >> 8) & 0xff)
506ac937c8SXin LI 
51c4f02a89SMax Khon /*
52c4f02a89SMax Khon  * XLAT16 converter instance
53c4f02a89SMax Khon  */
54c4f02a89SMax Khon struct iconv_xlat16 {
55c4f02a89SMax Khon 	KOBJ_FIELDS;
56c4f02a89SMax Khon 	uint32_t *		d_table[0x200];
576ac937c8SXin LI 	void *			f_ctp;
586ac937c8SXin LI 	void *			t_ctp;
59c4f02a89SMax Khon 	struct iconv_cspair *	d_csp;
60c4f02a89SMax Khon };
61c4f02a89SMax Khon 
62c4f02a89SMax Khon static int
63c4f02a89SMax Khon iconv_xlat16_open(struct iconv_converter_class *dcp,
64c4f02a89SMax Khon 	struct iconv_cspair *csp, struct iconv_cspair *cspf, void **dpp)
65c4f02a89SMax Khon {
66c4f02a89SMax Khon 	struct iconv_xlat16 *dp;
67bf8ba9abSR. Imura 	uint32_t *headp, **idxp;
68c4f02a89SMax Khon 	int i;
69c4f02a89SMax Khon 
70c4f02a89SMax Khon 	dp = (struct iconv_xlat16 *)kobj_create((struct kobj_class*)dcp, M_ICONV, M_WAITOK);
71bf8ba9abSR. Imura 	headp = (uint32_t *)((caddr_t)csp->cp_data + sizeof(dp->d_table));
72bf8ba9abSR. Imura 	idxp = (uint32_t **)csp->cp_data;
73c4f02a89SMax Khon 	for (i = 0 ; i < 0x200 ; i++) {
74c4f02a89SMax Khon 		if (*idxp) {
75bf8ba9abSR. Imura 			dp->d_table[i] = headp;
76bf8ba9abSR. Imura 			headp += 0x80;
77c4f02a89SMax Khon 		} else {
78c4f02a89SMax Khon 			dp->d_table[i] = NULL;
79c4f02a89SMax Khon 		}
80c4f02a89SMax Khon 		idxp++;
81c4f02a89SMax Khon 	}
826ac937c8SXin LI 
836ac937c8SXin LI 	if (strcmp(csp->cp_to, KICONV_WCTYPE_NAME) != 0) {
846ac937c8SXin LI 		if (iconv_open(KICONV_WCTYPE_NAME, csp->cp_from, &dp->f_ctp) != 0)
856ac937c8SXin LI 			dp->f_ctp = NULL;
866ac937c8SXin LI 		if (iconv_open(KICONV_WCTYPE_NAME, csp->cp_to, &dp->t_ctp) != 0)
876ac937c8SXin LI 			dp->t_ctp = NULL;
886ac937c8SXin LI 	} else {
896ac937c8SXin LI 		dp->f_ctp = dp->t_ctp = dp;
906ac937c8SXin LI 	}
916ac937c8SXin LI 
92c4f02a89SMax Khon 	dp->d_csp = csp;
93c4f02a89SMax Khon 	csp->cp_refcount++;
94c4f02a89SMax Khon 	*dpp = (void*)dp;
95c4f02a89SMax Khon 	return (0);
96c4f02a89SMax Khon }
97c4f02a89SMax Khon 
98c4f02a89SMax Khon static int
99c4f02a89SMax Khon iconv_xlat16_close(void *data)
100c4f02a89SMax Khon {
101c4f02a89SMax Khon 	struct iconv_xlat16 *dp = data;
102c4f02a89SMax Khon 
1036ac937c8SXin LI 	if (dp->f_ctp && dp->f_ctp != data)
1046ac937c8SXin LI 		iconv_close(dp->f_ctp);
1056ac937c8SXin LI 	if (dp->t_ctp && dp->t_ctp != data)
1066ac937c8SXin LI 		iconv_close(dp->t_ctp);
107c4f02a89SMax Khon 	dp->d_csp->cp_refcount--;
108c4f02a89SMax Khon 	kobj_delete((struct kobj*)data, M_ICONV);
109c4f02a89SMax Khon 	return (0);
110c4f02a89SMax Khon }
111c4f02a89SMax Khon 
112c4f02a89SMax Khon static int
113c4f02a89SMax Khon iconv_xlat16_conv(void *d2p, const char **inbuf,
114c4f02a89SMax Khon 	size_t *inbytesleft, char **outbuf, size_t *outbytesleft,
115c4f02a89SMax Khon 	int convchar, int casetype)
116c4f02a89SMax Khon {
117c4f02a89SMax Khon 	struct iconv_xlat16 *dp = (struct iconv_xlat16*)d2p;
118c4f02a89SMax Khon 	const char *src;
119c4f02a89SMax Khon 	char *dst;
1200f4e4130SMax Khon 	int nullin, ret = 0;
121c4f02a89SMax Khon 	size_t in, on, ir, or, inlen;
122c4f02a89SMax Khon 	uint32_t code;
123c4f02a89SMax Khon 	u_char u, l;
1246ac937c8SXin LI 	uint16_t c1, c2, ctmp;
125c4f02a89SMax Khon 
126c4f02a89SMax Khon 	if (inbuf == NULL || *inbuf == NULL || outbuf == NULL || *outbuf == NULL)
127c4f02a89SMax Khon 		return (0);
128c4f02a89SMax Khon 	ir = in = *inbytesleft;
129c4f02a89SMax Khon 	or = on = *outbytesleft;
130c4f02a89SMax Khon 	src = *inbuf;
131c4f02a89SMax Khon 	dst = *outbuf;
132c4f02a89SMax Khon 
133c4f02a89SMax Khon 	while(ir > 0 && or > 0) {
134c4f02a89SMax Khon 
135c4f02a89SMax Khon 		inlen = 0;
1366ac937c8SXin LI 		code = 0;
137c4f02a89SMax Khon 
138c4f02a89SMax Khon 		c1 = ir > 1 ? *(src+1) & 0xff : 0;
139c4f02a89SMax Khon 		c2 = *src & 0xff;
1406ac937c8SXin LI 		ctmp = 0;
141c4f02a89SMax Khon 
142c4f02a89SMax Khon 		c1 = c2 & 0x80 ? c1 | 0x100 : c1;
143c4f02a89SMax Khon 		c2 = c2 & 0x80 ? c2 & 0x7f : c2;
144c4f02a89SMax Khon 
1456ac937c8SXin LI 		if (ir > 1 && dp->d_table[c1] && dp->d_table[c1][c2]) {
146c4f02a89SMax Khon 			/*
147c4f02a89SMax Khon 			 * inbuf char is a double byte char
148c4f02a89SMax Khon 			 */
149c4f02a89SMax Khon 			inlen = 2;
1506ac937c8SXin LI 
1516ac937c8SXin LI 			/* toupper,tolower */
1526ac937c8SXin LI 			if (casetype == KICONV_FROM_LOWER && dp->f_ctp)
1536ac937c8SXin LI 				ctmp = towlower(((u_char)*src << 8) | (u_char)*(src + 1),
1546ac937c8SXin LI 				    dp->f_ctp);
1556ac937c8SXin LI 			else if (casetype == KICONV_FROM_UPPER && dp->f_ctp)
1566ac937c8SXin LI 				ctmp = towupper(((u_char)*src << 8) | (u_char)*(src + 1),
1576ac937c8SXin LI 				    dp->f_ctp);
1586ac937c8SXin LI 			if (ctmp) {
1596ac937c8SXin LI 				c1 = C2I1(ctmp);
1606ac937c8SXin LI 				c2 = C2I2(ctmp);
1616ac937c8SXin LI 			}
162c4f02a89SMax Khon 		}
163c4f02a89SMax Khon 
164c4f02a89SMax Khon 		if (inlen == 0) {
165c4f02a89SMax Khon 			c1 &= 0xff00;
166c4f02a89SMax Khon 			if (!dp->d_table[c1]) {
167c4f02a89SMax Khon 				ret = -1;
168c4f02a89SMax Khon 				break;
169c4f02a89SMax Khon 			}
170c4f02a89SMax Khon 			/*
171c4f02a89SMax Khon 			 * inbuf char is a single byte char
172c4f02a89SMax Khon 			 */
173c4f02a89SMax Khon 			inlen = 1;
1746ac937c8SXin LI 
1756ac937c8SXin LI 			if (casetype & (KICONV_FROM_LOWER|KICONV_FROM_UPPER))
1766ac937c8SXin LI 				code = dp->d_table[c1][c2];
1776ac937c8SXin LI 
1786ac937c8SXin LI 			if (casetype == KICONV_FROM_LOWER) {
1796ac937c8SXin LI 				if (dp->f_ctp)
1806ac937c8SXin LI 					ctmp = towlower((u_char)*src, dp->f_ctp);
1816ac937c8SXin LI 				else if (code & XLAT16_HAS_FROM_LOWER_CASE)
1826ac937c8SXin LI 					ctmp = (u_char)(code >> 16);
1836ac937c8SXin LI 			} else if (casetype == KICONV_FROM_UPPER) {
1846ac937c8SXin LI 				if (dp->f_ctp)
1856ac937c8SXin LI 					ctmp = towupper((u_char)*src, dp->f_ctp);
1866ac937c8SXin LI 				else if (code & XLAT16_HAS_FROM_UPPER_CASE)
1876ac937c8SXin LI 					ctmp = (u_char)(code >> 16);
1886ac937c8SXin LI 			}
1896ac937c8SXin LI 			if (ctmp) {
1906ac937c8SXin LI 				c1 = C2I1(ctmp << 8);
1916ac937c8SXin LI 				c2 = C2I2(ctmp << 8);
1926ac937c8SXin LI 			}
1936ac937c8SXin LI 		}
1946ac937c8SXin LI 
195c4f02a89SMax Khon 		code = dp->d_table[c1][c2];
196c4f02a89SMax Khon 		if (!code) {
197c4f02a89SMax Khon 			ret = -1;
198c4f02a89SMax Khon 			break;
199c4f02a89SMax Khon 		}
200c4f02a89SMax Khon 
2010f4e4130SMax Khon 		nullin = (code & XLAT16_ACCEPT_NULL_IN) ? 1 : 0;
2020f4e4130SMax Khon 		if (inlen == 1 && nullin) {
203c4f02a89SMax Khon 			/*
204c4f02a89SMax Khon 			 * XLAT16_ACCEPT_NULL_IN requires inbuf has 2byte
205c4f02a89SMax Khon 			 */
206c4f02a89SMax Khon 			ret = -1;
207c4f02a89SMax Khon 			break;
208c4f02a89SMax Khon 		}
209c4f02a89SMax Khon 
210c4f02a89SMax Khon 		/*
211c4f02a89SMax Khon 		 * now start translation
212c4f02a89SMax Khon 		 */
213c4f02a89SMax Khon 		u = (u_char)(code >> 8);
214c4f02a89SMax Khon 		l = (u_char)code;
215c4f02a89SMax Khon 
216c4f02a89SMax Khon #ifdef XLAT16_ACCEPT_3BYTE_CHR
217c4f02a89SMax Khon 		if (code & XLAT16_IS_3BYTE_CHR) {
218c4f02a89SMax Khon 			if (or < 3) {
219c4f02a89SMax Khon 				ret = -1;
220c4f02a89SMax Khon 				break;
221c4f02a89SMax Khon 			}
222c4f02a89SMax Khon 			*dst++ = u;
223c4f02a89SMax Khon 			*dst++ = l;
224c4f02a89SMax Khon 			*dst++ = (u_char)(code >> 16);
225c4f02a89SMax Khon 			or -= 3;
226c4f02a89SMax Khon 		} else
227c4f02a89SMax Khon #endif
228c4f02a89SMax Khon 		if (u || code & XLAT16_ACCEPT_NULL_OUT) {
229c4f02a89SMax Khon 			if (or < 2) {
230c4f02a89SMax Khon 				ret = -1;
231c4f02a89SMax Khon 				break;
232c4f02a89SMax Khon 			}
2336ac937c8SXin LI 
2346ac937c8SXin LI 			/* toupper,tolower */
2356ac937c8SXin LI 			if (casetype == KICONV_LOWER && dp->t_ctp) {
2366ac937c8SXin LI 				code = towlower((uint16_t)code, dp->t_ctp);
2376ac937c8SXin LI 				u = (u_char)(code >> 8);
2386ac937c8SXin LI 				l = (u_char)code;
2396ac937c8SXin LI 			}
2406ac937c8SXin LI 			if (casetype == KICONV_UPPER && dp->t_ctp) {
2416ac937c8SXin LI 				code = towupper((uint16_t)code, dp->t_ctp);
2426ac937c8SXin LI 				u = (u_char)(code >> 8);
2436ac937c8SXin LI 				l = (u_char)code;
2446ac937c8SXin LI 			}
2456ac937c8SXin LI 
246c4f02a89SMax Khon 			*dst++ = u;
247c4f02a89SMax Khon 			*dst++ = l;
248c4f02a89SMax Khon 			or -= 2;
249c4f02a89SMax Khon 		} else {
2506ac937c8SXin LI 			/* toupper,tolower */
2516ac937c8SXin LI 			if (casetype == KICONV_LOWER) {
2526ac937c8SXin LI 				if (dp->t_ctp)
2536ac937c8SXin LI 					l = (u_char)towlower(l, dp->t_ctp);
2546ac937c8SXin LI 				else if (code & XLAT16_HAS_LOWER_CASE)
2556ac937c8SXin LI 					l = (u_char)(code >> 16);
2566ac937c8SXin LI 			}
2576ac937c8SXin LI 			if (casetype == KICONV_UPPER) {
2586ac937c8SXin LI 				if (dp->t_ctp)
2596ac937c8SXin LI 					l = (u_char)towupper(l, dp->t_ctp);
2606ac937c8SXin LI 				else if (code & XLAT16_HAS_UPPER_CASE)
2616ac937c8SXin LI 					l = (u_char)(code >> 16);
2626ac937c8SXin LI 			}
2636ac937c8SXin LI 
264c4f02a89SMax Khon 			*dst++ = l;
265c4f02a89SMax Khon 			or--;
266c4f02a89SMax Khon 		}
267c4f02a89SMax Khon 
268c4f02a89SMax Khon 		if (inlen == 2) {
269c4f02a89SMax Khon 			/*
270c4f02a89SMax Khon 			 * there is a case that inbuf char is a single
271c4f02a89SMax Khon 			 * byte char while inlen == 2
272c4f02a89SMax Khon 			 */
273e099b90bSPedro F. Giffuni 			if ((u_char)*(src+1) == '\0' && !nullin ) {
274c4f02a89SMax Khon 				src++;
275c4f02a89SMax Khon 				ir--;
276c4f02a89SMax Khon 			} else {
277c4f02a89SMax Khon 				src += 2;
278c4f02a89SMax Khon 				ir -= 2;
279c4f02a89SMax Khon 			}
280c4f02a89SMax Khon 		} else {
281c4f02a89SMax Khon 			src++;
282c4f02a89SMax Khon 			ir--;
283c4f02a89SMax Khon 		}
284c4f02a89SMax Khon 
285c4f02a89SMax Khon 		if (convchar == 1)
286c4f02a89SMax Khon 			break;
287c4f02a89SMax Khon 	}
288c4f02a89SMax Khon 
289c4f02a89SMax Khon 	*inbuf += in - ir;
290c4f02a89SMax Khon 	*outbuf += on - or;
291c4f02a89SMax Khon 	*inbytesleft -= in - ir;
292c4f02a89SMax Khon 	*outbytesleft -= on - or;
293c4f02a89SMax Khon 	return (ret);
294c4f02a89SMax Khon }
295c4f02a89SMax Khon 
296c4f02a89SMax Khon static const char *
297c4f02a89SMax Khon iconv_xlat16_name(struct iconv_converter_class *dcp)
298c4f02a89SMax Khon {
299c4f02a89SMax Khon 	return ("xlat16");
300c4f02a89SMax Khon }
301c4f02a89SMax Khon 
3026ac937c8SXin LI static int
303484820d4SConrad Meyer iconv_xlat16_tolower(void *d2p, int c)
3046ac937c8SXin LI {
3056ac937c8SXin LI         struct iconv_xlat16 *dp = (struct iconv_xlat16*)d2p;
306484820d4SConrad Meyer 	int c1, c2, out;
3076ac937c8SXin LI 
3086ac937c8SXin LI 	if (c < 0x100) {
3096ac937c8SXin LI 		c1 = C2I1(c << 8);
3106ac937c8SXin LI 		c2 = C2I2(c << 8);
3116ac937c8SXin LI 	} else if (c < 0x10000) {
3126ac937c8SXin LI                 c1 = C2I1(c);
3136ac937c8SXin LI                 c2 = C2I2(c);
3146ac937c8SXin LI 	} else
3156ac937c8SXin LI 		return (c);
3166ac937c8SXin LI 
3176ac937c8SXin LI 	if (dp->d_table[c1] && dp->d_table[c1][c2] & XLAT16_HAS_LOWER_CASE) {
3186ac937c8SXin LI 		/*return (int)(dp->d_table[c1][c2] & 0xffff);*/
3196ac937c8SXin LI 		out = dp->d_table[c1][c2] & 0xffff;
3206ac937c8SXin LI 		if ((out & 0xff) == 0)
3216ac937c8SXin LI 			out = (out >> 8) & 0xff;
3226ac937c8SXin LI 		return (out);
3236ac937c8SXin LI 	} else
3246ac937c8SXin LI 		return (c);
3256ac937c8SXin LI }
3266ac937c8SXin LI 
3276ac937c8SXin LI static int
328484820d4SConrad Meyer iconv_xlat16_toupper(void *d2p, int c)
3296ac937c8SXin LI {
3306ac937c8SXin LI         struct iconv_xlat16 *dp = (struct iconv_xlat16*)d2p;
331484820d4SConrad Meyer 	int c1, c2, out;
3326ac937c8SXin LI 
3336ac937c8SXin LI 	if (c < 0x100) {
3346ac937c8SXin LI 		c1 = C2I1(c << 8);
3356ac937c8SXin LI 		c2 = C2I2(c << 8);
3366ac937c8SXin LI 	} else if (c < 0x10000) {
3376ac937c8SXin LI                 c1 = C2I1(c);
3386ac937c8SXin LI                 c2 = C2I2(c);
3396ac937c8SXin LI 	} else
3406ac937c8SXin LI 		return (c);
3416ac937c8SXin LI 
3426ac937c8SXin LI 	if (dp->d_table[c1] && dp->d_table[c1][c2] & XLAT16_HAS_UPPER_CASE) {
3436ac937c8SXin LI 		out = dp->d_table[c1][c2] & 0xffff;
3446ac937c8SXin LI 		if ((out & 0xff) == 0)
3456ac937c8SXin LI 			out = (out >> 8) & 0xff;
3466ac937c8SXin LI 		return (out);
3476ac937c8SXin LI 	} else
3486ac937c8SXin LI 		return (c);
3496ac937c8SXin LI }
3506ac937c8SXin LI 
351c4f02a89SMax Khon static kobj_method_t iconv_xlat16_methods[] = {
352c4f02a89SMax Khon 	KOBJMETHOD(iconv_converter_open,	iconv_xlat16_open),
353c4f02a89SMax Khon 	KOBJMETHOD(iconv_converter_close,	iconv_xlat16_close),
354c4f02a89SMax Khon 	KOBJMETHOD(iconv_converter_conv,	iconv_xlat16_conv),
355c4f02a89SMax Khon #if 0
356c4f02a89SMax Khon 	KOBJMETHOD(iconv_converter_init,	iconv_xlat16_init),
357c4f02a89SMax Khon 	KOBJMETHOD(iconv_converter_done,	iconv_xlat16_done),
358c4f02a89SMax Khon #endif
359c4f02a89SMax Khon 	KOBJMETHOD(iconv_converter_name,	iconv_xlat16_name),
3606ac937c8SXin LI 	KOBJMETHOD(iconv_converter_tolower,	iconv_xlat16_tolower),
3616ac937c8SXin LI 	KOBJMETHOD(iconv_converter_toupper,	iconv_xlat16_toupper),
362c4f02a89SMax Khon 	{0, 0}
363c4f02a89SMax Khon };
364c4f02a89SMax Khon 
365c4f02a89SMax Khon KICONV_CONVERTER(xlat16, sizeof(struct iconv_xlat16));
366