xref: /titanic_51/usr/src/uts/common/os/kiconv.c (revision d14d7d31f2a70b1e0a9c933607d6627f5718980e)
1*d14d7d31Sis /*
2*d14d7d31Sis  * CDDL HEADER START
3*d14d7d31Sis  *
4*d14d7d31Sis  * The contents of this file are subject to the terms of the
5*d14d7d31Sis  * Common Development and Distribution License (the "License").
6*d14d7d31Sis  * You may not use this file except in compliance with the License.
7*d14d7d31Sis  *
8*d14d7d31Sis  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*d14d7d31Sis  * or http://www.opensolaris.org/os/licensing.
10*d14d7d31Sis  * See the License for the specific language governing permissions
11*d14d7d31Sis  * and limitations under the License.
12*d14d7d31Sis  *
13*d14d7d31Sis  * When distributing Covered Code, include this CDDL HEADER in each
14*d14d7d31Sis  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*d14d7d31Sis  * If applicable, add the following below this CDDL HEADER, with the
16*d14d7d31Sis  * fields enclosed by brackets "[]" replaced with your own identifying
17*d14d7d31Sis  * information: Portions Copyright [yyyy] [name of copyright owner]
18*d14d7d31Sis  *
19*d14d7d31Sis  * CDDL HEADER END
20*d14d7d31Sis  */
21*d14d7d31Sis /*
22*d14d7d31Sis  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23*d14d7d31Sis  * Use is subject to license terms.
24*d14d7d31Sis  */
25*d14d7d31Sis 
26*d14d7d31Sis #pragma ident	"%Z%%M%	%I%	%E% SMI"
27*d14d7d31Sis 
28*d14d7d31Sis /*
29*d14d7d31Sis  * Kernel iconv code conversion functions (PSARC/2007/173).
30*d14d7d31Sis  *
31*d14d7d31Sis  * Man pages: kiconv_open(9F), kiconv(9F), kiconv_close(9F), and kiconvstr(9F).
32*d14d7d31Sis  * Interface stability: Committed.
33*d14d7d31Sis  */
34*d14d7d31Sis 
35*d14d7d31Sis #include <sys/types.h>
36*d14d7d31Sis #include <sys/param.h>
37*d14d7d31Sis #include <sys/sysmacros.h>
38*d14d7d31Sis #include <sys/systm.h>
39*d14d7d31Sis #include <sys/debug.h>
40*d14d7d31Sis #include <sys/kmem.h>
41*d14d7d31Sis #include <sys/sunddi.h>
42*d14d7d31Sis #include <sys/ksynch.h>
43*d14d7d31Sis #include <sys/modctl.h>
44*d14d7d31Sis #include <sys/byteorder.h>
45*d14d7d31Sis #include <sys/errno.h>
46*d14d7d31Sis #include <sys/kiconv.h>
47*d14d7d31Sis #include <sys/kiconv_latin1.h>
48*d14d7d31Sis 
49*d14d7d31Sis 
50*d14d7d31Sis /*
51*d14d7d31Sis  * The following macros indicate ids to the correct code conversion mapping
52*d14d7d31Sis  * data tables to use. The actual tables are coming from <sys/kiconv_latin1.h>.
53*d14d7d31Sis  */
54*d14d7d31Sis #define	KICONV_TBLID_1252		(0x00)
55*d14d7d31Sis #define	KICONV_TBLID_8859_1		(0x01)
56*d14d7d31Sis #define	KICONV_TBLID_8859_15		(0x02)
57*d14d7d31Sis #define	KICONV_TBLID_850		(0x03)
58*d14d7d31Sis 
59*d14d7d31Sis #define	KICONV_MAX_MAPPING_TBLID	(0x03)
60*d14d7d31Sis 
61*d14d7d31Sis /*
62*d14d7d31Sis  * The following tables are coming from u8_textprep.c. We use them to
63*d14d7d31Sis  * check on validity of UTF-8 characters and their bytes.
64*d14d7d31Sis  */
65*d14d7d31Sis extern const int8_t u8_number_of_bytes[];
66*d14d7d31Sis extern const uint8_t u8_valid_min_2nd_byte[];
67*d14d7d31Sis extern const uint8_t u8_valid_max_2nd_byte[];
68*d14d7d31Sis 
69*d14d7d31Sis 
70*d14d7d31Sis /*
71*d14d7d31Sis  * The following four functions, open_to_1252(), open_to_88591(),
72*d14d7d31Sis  * open_to_885915(), and open_to_850(), are kiconv_open functions from
73*d14d7d31Sis  * UTF-8 to corresponding single byte codesets.
74*d14d7d31Sis  */
75*d14d7d31Sis static void *
76*d14d7d31Sis open_to_1252()
77*d14d7d31Sis {
78*d14d7d31Sis 	kiconv_state_t s;
79*d14d7d31Sis 
80*d14d7d31Sis 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
81*d14d7d31Sis 	s->id = KICONV_TBLID_1252;
82*d14d7d31Sis 	s->bom_processed = 0;
83*d14d7d31Sis 
84*d14d7d31Sis 	return ((void *)s);
85*d14d7d31Sis }
86*d14d7d31Sis 
87*d14d7d31Sis static void *
88*d14d7d31Sis open_to_88591()
89*d14d7d31Sis {
90*d14d7d31Sis 	kiconv_state_t s;
91*d14d7d31Sis 
92*d14d7d31Sis 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
93*d14d7d31Sis 	s->id = KICONV_TBLID_8859_1;
94*d14d7d31Sis 	s->bom_processed = 0;
95*d14d7d31Sis 
96*d14d7d31Sis 	return ((void *)s);
97*d14d7d31Sis }
98*d14d7d31Sis 
99*d14d7d31Sis static void *
100*d14d7d31Sis open_to_885915()
101*d14d7d31Sis {
102*d14d7d31Sis 	kiconv_state_t s;
103*d14d7d31Sis 
104*d14d7d31Sis 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
105*d14d7d31Sis 	s->id = KICONV_TBLID_8859_15;
106*d14d7d31Sis 	s->bom_processed = 0;
107*d14d7d31Sis 
108*d14d7d31Sis 	return ((void *)s);
109*d14d7d31Sis }
110*d14d7d31Sis 
111*d14d7d31Sis static void *
112*d14d7d31Sis open_to_850()
113*d14d7d31Sis {
114*d14d7d31Sis 	kiconv_state_t s;
115*d14d7d31Sis 
116*d14d7d31Sis 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
117*d14d7d31Sis 	s->id = KICONV_TBLID_850;
118*d14d7d31Sis 	s->bom_processed = 0;
119*d14d7d31Sis 
120*d14d7d31Sis 	return ((void *)s);
121*d14d7d31Sis }
122*d14d7d31Sis 
123*d14d7d31Sis /*
124*d14d7d31Sis  * The following four functions, open_fr_1252(), open_fr_88591(),
125*d14d7d31Sis  * open_fr_885915(), and open_fr_850(), are kiconv_open functions from
126*d14d7d31Sis  * corresponding single byte codesets to UTF-8.
127*d14d7d31Sis  */
128*d14d7d31Sis static void *
129*d14d7d31Sis open_fr_1252()
130*d14d7d31Sis {
131*d14d7d31Sis 	return ((void *)KICONV_TBLID_1252);
132*d14d7d31Sis }
133*d14d7d31Sis 
134*d14d7d31Sis static void *
135*d14d7d31Sis open_fr_88591()
136*d14d7d31Sis {
137*d14d7d31Sis 	return ((void *)KICONV_TBLID_8859_1);
138*d14d7d31Sis }
139*d14d7d31Sis 
140*d14d7d31Sis static void *
141*d14d7d31Sis open_fr_885915()
142*d14d7d31Sis {
143*d14d7d31Sis 	return ((void *)KICONV_TBLID_8859_15);
144*d14d7d31Sis }
145*d14d7d31Sis 
146*d14d7d31Sis static void *
147*d14d7d31Sis open_fr_850()
148*d14d7d31Sis {
149*d14d7d31Sis 	return ((void *)KICONV_TBLID_850);
150*d14d7d31Sis }
151*d14d7d31Sis 
152*d14d7d31Sis /*
153*d14d7d31Sis  * The following close_to_sb() function is kiconv_close function for
154*d14d7d31Sis  * the conversions from UTF-8 to single byte codesets. The close_fr_sb()
155*d14d7d31Sis  * is kiconv_close function for the conversions from single byte codesets to
156*d14d7d31Sis  * UTF-8.
157*d14d7d31Sis  */
158*d14d7d31Sis static int
159*d14d7d31Sis close_to_sb(void *s)
160*d14d7d31Sis {
161*d14d7d31Sis 	if (! s || s == (void *)-1)
162*d14d7d31Sis 		return (EBADF);
163*d14d7d31Sis 
164*d14d7d31Sis 	kmem_free(s, sizeof (kiconv_state_data_t));
165*d14d7d31Sis 
166*d14d7d31Sis 	return (0);
167*d14d7d31Sis }
168*d14d7d31Sis 
169*d14d7d31Sis static int
170*d14d7d31Sis close_fr_sb(void *s)
171*d14d7d31Sis {
172*d14d7d31Sis 	if ((ulong_t)s > KICONV_MAX_MAPPING_TBLID)
173*d14d7d31Sis 		return (EBADF);
174*d14d7d31Sis 
175*d14d7d31Sis 	return (0);
176*d14d7d31Sis }
177*d14d7d31Sis 
178*d14d7d31Sis /*
179*d14d7d31Sis  * The following is the common kiconv function for conversions from UTF-8
180*d14d7d31Sis  * to single byte codesets.
181*d14d7d31Sis  */
182*d14d7d31Sis static size_t
183*d14d7d31Sis kiconv_to_sb(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf,
184*d14d7d31Sis 	size_t *outbytesleft, int *errno)
185*d14d7d31Sis {
186*d14d7d31Sis 	size_t id;
187*d14d7d31Sis 	size_t ret_val;
188*d14d7d31Sis 	uchar_t *ib;
189*d14d7d31Sis 	uchar_t *oldib;
190*d14d7d31Sis 	uchar_t *ob;
191*d14d7d31Sis 	uchar_t *ibtail;
192*d14d7d31Sis 	uchar_t *obtail;
193*d14d7d31Sis 	uint32_t u8;
194*d14d7d31Sis 	size_t i;
195*d14d7d31Sis 	size_t l;
196*d14d7d31Sis 	size_t h;
197*d14d7d31Sis 	size_t init_h;
198*d14d7d31Sis 	int8_t sz;
199*d14d7d31Sis 	boolean_t second;
200*d14d7d31Sis 
201*d14d7d31Sis 	/* Check on the kiconv code conversion descriptor. */
202*d14d7d31Sis 	if (! kcd || kcd == (void *)-1) {
203*d14d7d31Sis 		*errno = EBADF;
204*d14d7d31Sis 		return ((size_t)-1);
205*d14d7d31Sis 	}
206*d14d7d31Sis 
207*d14d7d31Sis 	/*
208*d14d7d31Sis 	 * Get the table id we are going to use for the code conversion
209*d14d7d31Sis 	 * and let's double check on it.
210*d14d7d31Sis 	 */
211*d14d7d31Sis 	id = ((kiconv_state_t)kcd)->id;
212*d14d7d31Sis 	if (id > KICONV_MAX_MAPPING_TBLID) {
213*d14d7d31Sis 		*errno = EBADF;
214*d14d7d31Sis 		return ((size_t)-1);
215*d14d7d31Sis 	}
216*d14d7d31Sis 
217*d14d7d31Sis 	/* If this is a state reset request, process and return. */
218*d14d7d31Sis 	if (! inbuf || ! (*inbuf)) {
219*d14d7d31Sis 		((kiconv_state_t)kcd)->bom_processed = 0;
220*d14d7d31Sis 		return ((size_t)0);
221*d14d7d31Sis 	}
222*d14d7d31Sis 
223*d14d7d31Sis 	ret_val = 0;
224*d14d7d31Sis 	ib = (uchar_t *)*inbuf;
225*d14d7d31Sis 	ob = (uchar_t *)*outbuf;
226*d14d7d31Sis 	ibtail = ib + *inbytesleft;
227*d14d7d31Sis 	obtail = ob + *outbytesleft;
228*d14d7d31Sis 
229*d14d7d31Sis 	/*
230*d14d7d31Sis 	 * The inital high value for the binary search we will be using
231*d14d7d31Sis 	 * shortly is a literal constant as of today but to be future proof,
232*d14d7d31Sis 	 * let's calculate it like the following at here.
233*d14d7d31Sis 	 */
234*d14d7d31Sis 	init_h = sizeof (to_sb_tbl[id]) / sizeof (kiconv_to_sb_tbl_comp_t) - 1;
235*d14d7d31Sis 
236*d14d7d31Sis 	/*
237*d14d7d31Sis 	 * If we haven't checked on the UTF-8 signature BOM character in
238*d14d7d31Sis 	 * the beginning of the conversion data stream, we check it and if
239*d14d7d31Sis 	 * find one, we skip it since we have no use for it.
240*d14d7d31Sis 	 */
241*d14d7d31Sis 	if (((kiconv_state_t)kcd)->bom_processed == 0 && (ibtail - ib) >= 3 &&
242*d14d7d31Sis 	    *ib == 0xef && *(ib + 1) == 0xbb && *(ib + 2) == 0xbf)
243*d14d7d31Sis 			ib += 3;
244*d14d7d31Sis 	((kiconv_state_t)kcd)->bom_processed = 1;
245*d14d7d31Sis 
246*d14d7d31Sis 	while (ib < ibtail) {
247*d14d7d31Sis 		sz = u8_number_of_bytes[*ib];
248*d14d7d31Sis 		if (sz <= 0) {
249*d14d7d31Sis 			*errno = EILSEQ;
250*d14d7d31Sis 			ret_val = (size_t)-1;
251*d14d7d31Sis 			break;
252*d14d7d31Sis 		}
253*d14d7d31Sis 
254*d14d7d31Sis 		/*
255*d14d7d31Sis 		 * If there is no room to write at the output buffer,
256*d14d7d31Sis 		 * issue E2BIG error.
257*d14d7d31Sis 		 */
258*d14d7d31Sis 		if (ob >= obtail) {
259*d14d7d31Sis 			*errno = E2BIG;
260*d14d7d31Sis 			ret_val = (size_t)-1;
261*d14d7d31Sis 			break;
262*d14d7d31Sis 		}
263*d14d7d31Sis 
264*d14d7d31Sis 		/*
265*d14d7d31Sis 		 * If it is a 7-bit ASCII character, we don't need to
266*d14d7d31Sis 		 * process further and we just copy the character over.
267*d14d7d31Sis 		 *
268*d14d7d31Sis 		 * If not, we collect the character bytes up to four bytes,
269*d14d7d31Sis 		 * validate the bytes, and binary search for the corresponding
270*d14d7d31Sis 		 * single byte codeset character byte. If we find it from
271*d14d7d31Sis 		 * the mapping table, we put that into the output buffer;
272*d14d7d31Sis 		 * otherwise, we put a replacement character instead as
273*d14d7d31Sis 		 * a non-identical conversion.
274*d14d7d31Sis 		 */
275*d14d7d31Sis 		if (sz == 1) {
276*d14d7d31Sis 			*ob++ = *ib++;
277*d14d7d31Sis 			continue;
278*d14d7d31Sis 		}
279*d14d7d31Sis 
280*d14d7d31Sis 		/*
281*d14d7d31Sis 		 * Issue EINVAL error if input buffer has an incomplete
282*d14d7d31Sis 		 * character at the end of the buffer.
283*d14d7d31Sis 		 */
284*d14d7d31Sis 		if ((ibtail - ib) < sz) {
285*d14d7d31Sis 			*errno = EINVAL;
286*d14d7d31Sis 			ret_val = (size_t)-1;
287*d14d7d31Sis 			break;
288*d14d7d31Sis 		}
289*d14d7d31Sis 
290*d14d7d31Sis 		/*
291*d14d7d31Sis 		 * We collect UTF-8 character bytes and also check if
292*d14d7d31Sis 		 * this is a valid UTF-8 character without any bogus bytes
293*d14d7d31Sis 		 * based on the latest UTF-8 binary representation.
294*d14d7d31Sis 		 */
295*d14d7d31Sis 		oldib = ib;
296*d14d7d31Sis 		u8 = *ib++;
297*d14d7d31Sis 		second = B_TRUE;
298*d14d7d31Sis 		for (i = 1; i < sz; i++) {
299*d14d7d31Sis 			if (second) {
300*d14d7d31Sis 				if (*ib < u8_valid_min_2nd_byte[u8] ||
301*d14d7d31Sis 				    *ib > u8_valid_max_2nd_byte[u8]) {
302*d14d7d31Sis 					*errno = EILSEQ;
303*d14d7d31Sis 					ret_val = (size_t)-1;
304*d14d7d31Sis 					ib = oldib;
305*d14d7d31Sis 					goto TO_SB_ILLEGAL_CHAR_ERR;
306*d14d7d31Sis 				}
307*d14d7d31Sis 				second = B_FALSE;
308*d14d7d31Sis 			} else if (*ib < 0x80 || *ib > 0xbf) {
309*d14d7d31Sis 				*errno = EILSEQ;
310*d14d7d31Sis 				ret_val = (size_t)-1;
311*d14d7d31Sis 				ib = oldib;
312*d14d7d31Sis 				goto TO_SB_ILLEGAL_CHAR_ERR;
313*d14d7d31Sis 			}
314*d14d7d31Sis 			u8 = (u8 << 8) | ((uint32_t)*ib);
315*d14d7d31Sis 			ib++;
316*d14d7d31Sis 		}
317*d14d7d31Sis 
318*d14d7d31Sis 		i = l = 0;
319*d14d7d31Sis 		h = init_h;
320*d14d7d31Sis 		while (l <= h) {
321*d14d7d31Sis 			i = (l + h) / 2;
322*d14d7d31Sis 			if (to_sb_tbl[id][i].u8 == u8)
323*d14d7d31Sis 				break;
324*d14d7d31Sis 			else if (to_sb_tbl[id][i].u8 < u8)
325*d14d7d31Sis 				l = i + 1;
326*d14d7d31Sis 			else
327*d14d7d31Sis 				h = i - 1;
328*d14d7d31Sis 		}
329*d14d7d31Sis 
330*d14d7d31Sis 		if (to_sb_tbl[id][i].u8 == u8) {
331*d14d7d31Sis 			*ob++ = to_sb_tbl[id][i].sb;
332*d14d7d31Sis 		} else {
333*d14d7d31Sis 			/*
334*d14d7d31Sis 			 * If we don't find a character in the target
335*d14d7d31Sis 			 * codeset, we insert an ASCII replacement character
336*d14d7d31Sis 			 * at the output buffer and indicate such
337*d14d7d31Sis 			 * "non-identical" conversion by increasing the
338*d14d7d31Sis 			 * return value which is the non-identical conversion
339*d14d7d31Sis 			 * counter if bigger than 0.
340*d14d7d31Sis 			 */
341*d14d7d31Sis 			*ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
342*d14d7d31Sis 			ret_val++;
343*d14d7d31Sis 		}
344*d14d7d31Sis 	}
345*d14d7d31Sis 
346*d14d7d31Sis TO_SB_ILLEGAL_CHAR_ERR:
347*d14d7d31Sis 	*inbuf = (char *)ib;
348*d14d7d31Sis 	*inbytesleft = ibtail - ib;
349*d14d7d31Sis 	*outbuf = (char *)ob;
350*d14d7d31Sis 	*outbytesleft = obtail - ob;
351*d14d7d31Sis 
352*d14d7d31Sis 	return (ret_val);
353*d14d7d31Sis }
354*d14d7d31Sis 
355*d14d7d31Sis /*
356*d14d7d31Sis  * The following is the common kiconv function from single byte codesets to
357*d14d7d31Sis  * UTF-8.
358*d14d7d31Sis  */
359*d14d7d31Sis static size_t
360*d14d7d31Sis kiconv_fr_sb(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf,
361*d14d7d31Sis 	size_t *outbytesleft, int *errno)
362*d14d7d31Sis {
363*d14d7d31Sis 	size_t ret_val;
364*d14d7d31Sis 	uchar_t *ib;
365*d14d7d31Sis 	uchar_t *ob;
366*d14d7d31Sis 	uchar_t *ibtail;
367*d14d7d31Sis 	uchar_t *obtail;
368*d14d7d31Sis 	size_t i;
369*d14d7d31Sis 	size_t k;
370*d14d7d31Sis 	int8_t sz;
371*d14d7d31Sis 
372*d14d7d31Sis 	/* Check on the kiconv code conversion descriptor validity. */
373*d14d7d31Sis 	if ((ulong_t)kcd > KICONV_MAX_MAPPING_TBLID) {
374*d14d7d31Sis 		*errno = EBADF;
375*d14d7d31Sis 		return ((size_t)-1);
376*d14d7d31Sis 	}
377*d14d7d31Sis 
378*d14d7d31Sis 	/*
379*d14d7d31Sis 	 * If this is a state reset request, there is nothing to do and so
380*d14d7d31Sis 	 * we just return.
381*d14d7d31Sis 	 */
382*d14d7d31Sis 	if (! inbuf || ! (*inbuf))
383*d14d7d31Sis 		return ((size_t)0);
384*d14d7d31Sis 
385*d14d7d31Sis 	ret_val = 0;
386*d14d7d31Sis 	ib = (uchar_t *)*inbuf;
387*d14d7d31Sis 	ob = (uchar_t *)*outbuf;
388*d14d7d31Sis 	ibtail = ib + *inbytesleft;
389*d14d7d31Sis 	obtail = ob + *outbytesleft;
390*d14d7d31Sis 
391*d14d7d31Sis 	while (ib < ibtail) {
392*d14d7d31Sis 		/*
393*d14d7d31Sis 		 * If this is a 7-bit ASCII character, we just copy over and
394*d14d7d31Sis 		 * that's all we need to do for this character.
395*d14d7d31Sis 		 */
396*d14d7d31Sis 		if (*ib < 0x80) {
397*d14d7d31Sis 			if (ob >= obtail) {
398*d14d7d31Sis 				*errno = E2BIG;
399*d14d7d31Sis 				ret_val = (size_t)-1;
400*d14d7d31Sis 				break;
401*d14d7d31Sis 			}
402*d14d7d31Sis 
403*d14d7d31Sis 			*ob++ = *ib++;
404*d14d7d31Sis 			continue;
405*d14d7d31Sis 		}
406*d14d7d31Sis 
407*d14d7d31Sis 		/*
408*d14d7d31Sis 		 * Otherwise, we get the corresponding UTF-8 character bytes
409*d14d7d31Sis 		 * from the mapping table and copy them over.
410*d14d7d31Sis 		 *
411*d14d7d31Sis 		 * We don't need to worry about if the UTF-8 character bytes
412*d14d7d31Sis 		 * at the mapping tables are valid or not since they are good.
413*d14d7d31Sis 		 */
414*d14d7d31Sis 		k = *ib - 0x80;
415*d14d7d31Sis 		sz = u8_number_of_bytes[to_u8_tbl[(ulong_t)kcd][k].u8[0]];
416*d14d7d31Sis 
417*d14d7d31Sis 		/*
418*d14d7d31Sis 		 * If sz <= 0, that means we don't have any assigned character
419*d14d7d31Sis 		 * at the code point, k + 0x80, of the single byte codeset
420*d14d7d31Sis 		 * which is the fromcode. In other words, the input buffer
421*d14d7d31Sis 		 * has an illegal character.
422*d14d7d31Sis 		 */
423*d14d7d31Sis 		if (sz <= 0) {
424*d14d7d31Sis 			*errno = EILSEQ;
425*d14d7d31Sis 			ret_val = (size_t)-1;
426*d14d7d31Sis 			break;
427*d14d7d31Sis 		}
428*d14d7d31Sis 
429*d14d7d31Sis 		if ((obtail - ob) < sz) {
430*d14d7d31Sis 			*errno = E2BIG;
431*d14d7d31Sis 			ret_val = (size_t)-1;
432*d14d7d31Sis 			break;
433*d14d7d31Sis 		}
434*d14d7d31Sis 
435*d14d7d31Sis 		for (i = 0; i < sz; i++)
436*d14d7d31Sis 			*ob++ = to_u8_tbl[(ulong_t)kcd][k].u8[i];
437*d14d7d31Sis 
438*d14d7d31Sis 		ib++;
439*d14d7d31Sis 	}
440*d14d7d31Sis 
441*d14d7d31Sis 	*inbuf = (char *)ib;
442*d14d7d31Sis 	*inbytesleft = ibtail - ib;
443*d14d7d31Sis 	*outbuf = (char *)ob;
444*d14d7d31Sis 	*outbytesleft = obtail - ob;
445*d14d7d31Sis 
446*d14d7d31Sis 	return (ret_val);
447*d14d7d31Sis }
448*d14d7d31Sis 
449*d14d7d31Sis /*
450*d14d7d31Sis  * The following is the common kiconvstr function from UTF-8 to single byte
451*d14d7d31Sis  * codesets.
452*d14d7d31Sis  */
453*d14d7d31Sis static size_t
454*d14d7d31Sis kiconvstr_to_sb(size_t id, uchar_t *ib, size_t *inlen, uchar_t *ob,
455*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
456*d14d7d31Sis {
457*d14d7d31Sis 	size_t ret_val;
458*d14d7d31Sis 	uchar_t *oldib;
459*d14d7d31Sis 	uchar_t *ibtail;
460*d14d7d31Sis 	uchar_t *obtail;
461*d14d7d31Sis 	uint32_t u8;
462*d14d7d31Sis 	size_t i;
463*d14d7d31Sis 	size_t l;
464*d14d7d31Sis 	size_t h;
465*d14d7d31Sis 	size_t init_h;
466*d14d7d31Sis 	int8_t sz;
467*d14d7d31Sis 	boolean_t second;
468*d14d7d31Sis 	boolean_t do_not_ignore_null;
469*d14d7d31Sis 
470*d14d7d31Sis 	/* Let's make sure that the table id is within the valid boundary. */
471*d14d7d31Sis 	if (id > KICONV_MAX_MAPPING_TBLID) {
472*d14d7d31Sis 		*errno = EBADF;
473*d14d7d31Sis 		return ((size_t)-1);
474*d14d7d31Sis 	}
475*d14d7d31Sis 
476*d14d7d31Sis 	ret_val = 0;
477*d14d7d31Sis 	ibtail = ib + *inlen;
478*d14d7d31Sis 	obtail = ob + *outlen;
479*d14d7d31Sis 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
480*d14d7d31Sis 	init_h = sizeof (to_sb_tbl[id]) / sizeof (kiconv_to_sb_tbl_comp_t) - 1;
481*d14d7d31Sis 
482*d14d7d31Sis 	/* Skip any UTF-8 signature BOM character in the beginning. */
483*d14d7d31Sis 	if ((ibtail - ib) >= 3 && *ib == 0xef && *(ib + 1) == 0xbb &&
484*d14d7d31Sis 	    *(ib + 2) == 0xbf)
485*d14d7d31Sis 			ib += 3;
486*d14d7d31Sis 
487*d14d7d31Sis 	/*
488*d14d7d31Sis 	 * Basically this is pretty much the same as kiconv_to_sb() except
489*d14d7d31Sis 	 * that we are now accepting two flag values and doing the processing
490*d14d7d31Sis 	 * accordingly.
491*d14d7d31Sis 	 */
492*d14d7d31Sis 	while (ib < ibtail) {
493*d14d7d31Sis 		sz = u8_number_of_bytes[*ib];
494*d14d7d31Sis 		if (sz <= 0) {
495*d14d7d31Sis 			if (flag & KICONV_REPLACE_INVALID) {
496*d14d7d31Sis 				if (ob >= obtail) {
497*d14d7d31Sis 					*errno = E2BIG;
498*d14d7d31Sis 					ret_val = (size_t)-1;
499*d14d7d31Sis 					break;
500*d14d7d31Sis 				}
501*d14d7d31Sis 
502*d14d7d31Sis 				ib++;
503*d14d7d31Sis 				goto STR_TO_SB_REPLACE_INVALID;
504*d14d7d31Sis 			}
505*d14d7d31Sis 
506*d14d7d31Sis 			*errno = EILSEQ;
507*d14d7d31Sis 			ret_val = (size_t)-1;
508*d14d7d31Sis 			break;
509*d14d7d31Sis 		}
510*d14d7d31Sis 
511*d14d7d31Sis 		if (*ib == '\0' && do_not_ignore_null)
512*d14d7d31Sis 			break;
513*d14d7d31Sis 
514*d14d7d31Sis 		if (ob >= obtail) {
515*d14d7d31Sis 			*errno = E2BIG;
516*d14d7d31Sis 			ret_val = (size_t)-1;
517*d14d7d31Sis 			break;
518*d14d7d31Sis 		}
519*d14d7d31Sis 
520*d14d7d31Sis 		if (sz == 1) {
521*d14d7d31Sis 			*ob++ = *ib++;
522*d14d7d31Sis 			continue;
523*d14d7d31Sis 		}
524*d14d7d31Sis 
525*d14d7d31Sis 		if ((ibtail - ib) < sz) {
526*d14d7d31Sis 			if (flag & KICONV_REPLACE_INVALID) {
527*d14d7d31Sis 				ib = ibtail;
528*d14d7d31Sis 				goto STR_TO_SB_REPLACE_INVALID;
529*d14d7d31Sis 			}
530*d14d7d31Sis 
531*d14d7d31Sis 			*errno = EINVAL;
532*d14d7d31Sis 			ret_val = (size_t)-1;
533*d14d7d31Sis 			break;
534*d14d7d31Sis 		}
535*d14d7d31Sis 
536*d14d7d31Sis 		oldib = ib;
537*d14d7d31Sis 		u8 = *ib++;
538*d14d7d31Sis 		second = B_TRUE;
539*d14d7d31Sis 		for (i = 1; i < sz; i++) {
540*d14d7d31Sis 			if (second) {
541*d14d7d31Sis 				if (*ib < u8_valid_min_2nd_byte[u8] ||
542*d14d7d31Sis 				    *ib > u8_valid_max_2nd_byte[u8]) {
543*d14d7d31Sis 					if (flag & KICONV_REPLACE_INVALID) {
544*d14d7d31Sis 						ib = oldib + sz;
545*d14d7d31Sis 						goto STR_TO_SB_REPLACE_INVALID;
546*d14d7d31Sis 					}
547*d14d7d31Sis 
548*d14d7d31Sis 					*errno = EILSEQ;
549*d14d7d31Sis 					ret_val = (size_t)-1;
550*d14d7d31Sis 					ib = oldib;
551*d14d7d31Sis 					goto STR_TO_SB_ILLEGAL_CHAR_ERR;
552*d14d7d31Sis 				}
553*d14d7d31Sis 				second = B_FALSE;
554*d14d7d31Sis 			} else if (*ib < 0x80 || *ib > 0xbf) {
555*d14d7d31Sis 				if (flag & KICONV_REPLACE_INVALID) {
556*d14d7d31Sis 					ib = oldib + sz;
557*d14d7d31Sis 					goto STR_TO_SB_REPLACE_INVALID;
558*d14d7d31Sis 				}
559*d14d7d31Sis 
560*d14d7d31Sis 				*errno = EILSEQ;
561*d14d7d31Sis 				ret_val = (size_t)-1;
562*d14d7d31Sis 				ib = oldib;
563*d14d7d31Sis 				goto STR_TO_SB_ILLEGAL_CHAR_ERR;
564*d14d7d31Sis 			}
565*d14d7d31Sis 			u8 = (u8 << 8) | ((uint32_t)*ib);
566*d14d7d31Sis 			ib++;
567*d14d7d31Sis 		}
568*d14d7d31Sis 
569*d14d7d31Sis 		i = l = 0;
570*d14d7d31Sis 		h = init_h;
571*d14d7d31Sis 		while (l <= h) {
572*d14d7d31Sis 			i = (l + h) / 2;
573*d14d7d31Sis 			if (to_sb_tbl[id][i].u8 == u8)
574*d14d7d31Sis 				break;
575*d14d7d31Sis 			else if (to_sb_tbl[id][i].u8 < u8)
576*d14d7d31Sis 				l = i + 1;
577*d14d7d31Sis 			else
578*d14d7d31Sis 				h = i - 1;
579*d14d7d31Sis 		}
580*d14d7d31Sis 
581*d14d7d31Sis 		if (to_sb_tbl[id][i].u8 == u8) {
582*d14d7d31Sis 			*ob++ = to_sb_tbl[id][i].sb;
583*d14d7d31Sis 		} else {
584*d14d7d31Sis STR_TO_SB_REPLACE_INVALID:
585*d14d7d31Sis 			*ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
586*d14d7d31Sis 			ret_val++;
587*d14d7d31Sis 		}
588*d14d7d31Sis 	}
589*d14d7d31Sis 
590*d14d7d31Sis STR_TO_SB_ILLEGAL_CHAR_ERR:
591*d14d7d31Sis 	*inlen = ibtail - ib;
592*d14d7d31Sis 	*outlen = obtail - ob;
593*d14d7d31Sis 
594*d14d7d31Sis 	return (ret_val);
595*d14d7d31Sis }
596*d14d7d31Sis 
597*d14d7d31Sis /*
598*d14d7d31Sis  * The following four functions are entry points recorded at the conv_list[]
599*d14d7d31Sis  * defined at below.
600*d14d7d31Sis  */
601*d14d7d31Sis static size_t
602*d14d7d31Sis kiconvstr_to_1252(char *inarray, size_t *inlen, char *outarray,
603*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
604*d14d7d31Sis {
605*d14d7d31Sis 	return (kiconvstr_to_sb(KICONV_TBLID_1252, (uchar_t *)inarray,
606*d14d7d31Sis 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
607*d14d7d31Sis }
608*d14d7d31Sis 
609*d14d7d31Sis static size_t
610*d14d7d31Sis kiconvstr_to_1(char *inarray, size_t *inlen, char *outarray,
611*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
612*d14d7d31Sis {
613*d14d7d31Sis 	return (kiconvstr_to_sb(KICONV_TBLID_8859_1, (uchar_t *)inarray,
614*d14d7d31Sis 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
615*d14d7d31Sis }
616*d14d7d31Sis 
617*d14d7d31Sis static size_t
618*d14d7d31Sis kiconvstr_to_15(char *inarray, size_t *inlen, char *outarray,
619*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
620*d14d7d31Sis {
621*d14d7d31Sis 	return (kiconvstr_to_sb(KICONV_TBLID_8859_15, (uchar_t *)inarray,
622*d14d7d31Sis 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
623*d14d7d31Sis }
624*d14d7d31Sis 
625*d14d7d31Sis static size_t
626*d14d7d31Sis kiconvstr_to_850(char *inarray, size_t *inlen, char *outarray,
627*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
628*d14d7d31Sis {
629*d14d7d31Sis 	return (kiconvstr_to_sb(KICONV_TBLID_850, (uchar_t *)inarray,
630*d14d7d31Sis 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
631*d14d7d31Sis }
632*d14d7d31Sis 
633*d14d7d31Sis /*
634*d14d7d31Sis  * The following is the common kiconvstr function for conversions from
635*d14d7d31Sis  * single byte codesets to UTF-8.
636*d14d7d31Sis  */
637*d14d7d31Sis static size_t
638*d14d7d31Sis kiconvstr_fr_sb(size_t id, uchar_t *ib, size_t *inlen, uchar_t *ob,
639*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
640*d14d7d31Sis {
641*d14d7d31Sis 	size_t ret_val;
642*d14d7d31Sis 	uchar_t *ibtail;
643*d14d7d31Sis 	uchar_t *obtail;
644*d14d7d31Sis 	size_t i;
645*d14d7d31Sis 	size_t k;
646*d14d7d31Sis 	int8_t sz;
647*d14d7d31Sis 	boolean_t do_not_ignore_null;
648*d14d7d31Sis 
649*d14d7d31Sis 	ret_val = 0;
650*d14d7d31Sis 	ibtail = ib + *inlen;
651*d14d7d31Sis 	obtail = ob + *outlen;
652*d14d7d31Sis 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
653*d14d7d31Sis 
654*d14d7d31Sis 	while (ib < ibtail) {
655*d14d7d31Sis 		if (*ib == '\0' && do_not_ignore_null)
656*d14d7d31Sis 			break;
657*d14d7d31Sis 
658*d14d7d31Sis 		if (*ib < 0x80) {
659*d14d7d31Sis 			if (ob >= obtail) {
660*d14d7d31Sis 				*errno = E2BIG;
661*d14d7d31Sis 				ret_val = (size_t)-1;
662*d14d7d31Sis 				break;
663*d14d7d31Sis 			}
664*d14d7d31Sis 			*ob++ = *ib++;
665*d14d7d31Sis 			continue;
666*d14d7d31Sis 		}
667*d14d7d31Sis 
668*d14d7d31Sis 		k = *ib - 0x80;
669*d14d7d31Sis 		sz = u8_number_of_bytes[to_u8_tbl[id][k].u8[0]];
670*d14d7d31Sis 
671*d14d7d31Sis 		if (sz <= 0) {
672*d14d7d31Sis 			if (flag & KICONV_REPLACE_INVALID) {
673*d14d7d31Sis 				if ((obtail - ob) < 3) {
674*d14d7d31Sis 					*errno = E2BIG;
675*d14d7d31Sis 					ret_val = (size_t)-1;
676*d14d7d31Sis 					break;
677*d14d7d31Sis 				}
678*d14d7d31Sis 
679*d14d7d31Sis 				/* Save KICONV_UTF8_REPLACEMENT_CHAR. */
680*d14d7d31Sis 				*ob++ = 0xef;
681*d14d7d31Sis 				*ob++ = 0xbf;
682*d14d7d31Sis 				*ob++ = 0xbd;
683*d14d7d31Sis 				ret_val++;
684*d14d7d31Sis 				ib++;
685*d14d7d31Sis 
686*d14d7d31Sis 				continue;
687*d14d7d31Sis 			}
688*d14d7d31Sis 
689*d14d7d31Sis 			*errno = EILSEQ;
690*d14d7d31Sis 			ret_val = (size_t)-1;
691*d14d7d31Sis 			break;
692*d14d7d31Sis 		}
693*d14d7d31Sis 
694*d14d7d31Sis 		if ((obtail - ob) < sz) {
695*d14d7d31Sis 			*errno = E2BIG;
696*d14d7d31Sis 			ret_val = (size_t)-1;
697*d14d7d31Sis 			break;
698*d14d7d31Sis 		}
699*d14d7d31Sis 
700*d14d7d31Sis 		for (i = 0; i < sz; i++)
701*d14d7d31Sis 			*ob++ = to_u8_tbl[id][k].u8[i];
702*d14d7d31Sis 
703*d14d7d31Sis 		ib++;
704*d14d7d31Sis 	}
705*d14d7d31Sis 
706*d14d7d31Sis 	*inlen = ibtail - ib;
707*d14d7d31Sis 	*outlen = obtail - ob;
708*d14d7d31Sis 
709*d14d7d31Sis 	return (ret_val);
710*d14d7d31Sis }
711*d14d7d31Sis 
712*d14d7d31Sis /*
713*d14d7d31Sis  * The following four functions are also entry points recorded at
714*d14d7d31Sis  * the conv_list[] at below.
715*d14d7d31Sis  */
716*d14d7d31Sis static size_t
717*d14d7d31Sis kiconvstr_fr_1252(char *inarray, size_t *inlen, char *outarray,
718*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
719*d14d7d31Sis {
720*d14d7d31Sis 	return (kiconvstr_fr_sb(KICONV_TBLID_1252, (uchar_t *)inarray,
721*d14d7d31Sis 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
722*d14d7d31Sis }
723*d14d7d31Sis 
724*d14d7d31Sis static size_t
725*d14d7d31Sis kiconvstr_fr_1(char *inarray, size_t *inlen, char *outarray,
726*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
727*d14d7d31Sis {
728*d14d7d31Sis 	return (kiconvstr_fr_sb(KICONV_TBLID_8859_1, (uchar_t *)inarray,
729*d14d7d31Sis 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
730*d14d7d31Sis }
731*d14d7d31Sis 
732*d14d7d31Sis static size_t
733*d14d7d31Sis kiconvstr_fr_15(char *inarray, size_t *inlen, char *outarray,
734*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
735*d14d7d31Sis {
736*d14d7d31Sis 	return (kiconvstr_fr_sb(KICONV_TBLID_8859_15, (uchar_t *)inarray,
737*d14d7d31Sis 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
738*d14d7d31Sis }
739*d14d7d31Sis 
740*d14d7d31Sis static size_t
741*d14d7d31Sis kiconvstr_fr_850(char *inarray, size_t *inlen, char *outarray,
742*d14d7d31Sis 	size_t *outlen, int flag, int *errno)
743*d14d7d31Sis {
744*d14d7d31Sis 	return (kiconvstr_fr_sb(KICONV_TBLID_850, (uchar_t *)inarray,
745*d14d7d31Sis 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
746*d14d7d31Sis }
747*d14d7d31Sis 
748*d14d7d31Sis /*
749*d14d7d31Sis  * The following static vector contains the normalized code names
750*d14d7d31Sis  * and their corresponding code ids. They are somewhat arbitrarily ordered
751*d14d7d31Sis  * based on marketing data available. A code id could repeat for aliases.
752*d14d7d31Sis  *
753*d14d7d31Sis  * The vector was generated by using a small utility program called
754*d14d7d31Sis  * codeidlistgen.c that you can find from PSARC/2007/173/materials/util/.
755*d14d7d31Sis  *
756*d14d7d31Sis  * The code ids must be portable, i.e., if needed, you can always generate
757*d14d7d31Sis  * the code_list[] again with different code ids. You'll also need to
758*d14d7d31Sis  * update the conv_list[] at below.
759*d14d7d31Sis  */
760*d14d7d31Sis #define	KICONV_MAX_CODEID_ENTRY		68
761*d14d7d31Sis #define	KICONV_MAX_CODEID		42
762*d14d7d31Sis 
763*d14d7d31Sis static kiconv_code_list_t code_list[KICONV_MAX_CODEID_ENTRY] = {
764*d14d7d31Sis 	{ "utf8", 0 },
765*d14d7d31Sis 	{ "cp1252", 1 },
766*d14d7d31Sis 	{ "1252", 1 },
767*d14d7d31Sis 	{ "iso88591", 2 },
768*d14d7d31Sis 	{ "iso885915", 3 },
769*d14d7d31Sis 	{ "cp850", 4 },
770*d14d7d31Sis 	{ "850", 4 },
771*d14d7d31Sis 	{ "eucjp", 5 },
772*d14d7d31Sis 	{ "eucjpms", 6 },
773*d14d7d31Sis 	{ "cp932", 7 },
774*d14d7d31Sis 	{ "932", 7 },
775*d14d7d31Sis 	{ "shiftjis", 8 },
776*d14d7d31Sis 	{ "pck", 8 },
777*d14d7d31Sis 	{ "sjis", 8 },
778*d14d7d31Sis 	{ "gb18030", 9 },
779*d14d7d31Sis 	{ "gbk", 10 },
780*d14d7d31Sis 	{ "cp936", 10 },
781*d14d7d31Sis 	{ "936", 10 },
782*d14d7d31Sis 	{ "euccn", 11 },
783*d14d7d31Sis 	{ "euckr", 12 },
784*d14d7d31Sis 	{ "unifiedhangul", 13 },
785*d14d7d31Sis 	{ "cp949", 13 },
786*d14d7d31Sis 	{ "949", 13 },
787*d14d7d31Sis 	{ "big5", 14 },
788*d14d7d31Sis 	{ "cp950", 14 },
789*d14d7d31Sis 	{ "950", 14 },
790*d14d7d31Sis 	{ "big5hkscs", 15 },
791*d14d7d31Sis 	{ "euctw", 16 },
792*d14d7d31Sis 	{ "cp950hkscs", 17 },
793*d14d7d31Sis 	{ "cp1250", 18 },
794*d14d7d31Sis 	{ "1250", 18 },
795*d14d7d31Sis 	{ "iso88592", 19 },
796*d14d7d31Sis 	{ "cp852", 20 },
797*d14d7d31Sis 	{ "852", 20 },
798*d14d7d31Sis 	{ "cp1251", 21 },
799*d14d7d31Sis 	{ "1251", 21 },
800*d14d7d31Sis 	{ "iso88595", 22 },
801*d14d7d31Sis 	{ "koi8r", 23 },
802*d14d7d31Sis 	{ "cp866", 24 },
803*d14d7d31Sis 	{ "866", 24 },
804*d14d7d31Sis 	{ "cp1253", 25 },
805*d14d7d31Sis 	{ "1253", 25 },
806*d14d7d31Sis 	{ "iso88597", 26 },
807*d14d7d31Sis 	{ "cp737", 27 },
808*d14d7d31Sis 	{ "737", 27 },
809*d14d7d31Sis 	{ "cp1254", 28 },
810*d14d7d31Sis 	{ "1254", 28 },
811*d14d7d31Sis 	{ "iso88599", 29 },
812*d14d7d31Sis 	{ "cp857", 30 },
813*d14d7d31Sis 	{ "857", 30 },
814*d14d7d31Sis 	{ "cp1256", 31 },
815*d14d7d31Sis 	{ "1256", 31 },
816*d14d7d31Sis 	{ "iso88596", 32 },
817*d14d7d31Sis 	{ "cp720", 33 },
818*d14d7d31Sis 	{ "720", 33 },
819*d14d7d31Sis 	{ "cp1255", 34 },
820*d14d7d31Sis 	{ "1255", 34 },
821*d14d7d31Sis 	{ "iso88598", 35 },
822*d14d7d31Sis 	{ "cp862", 36 },
823*d14d7d31Sis 	{ "862", 36 },
824*d14d7d31Sis 	{ "cp1257", 37 },
825*d14d7d31Sis 	{ "1257", 37 },
826*d14d7d31Sis 	{ "iso885913", 38 },
827*d14d7d31Sis 	{ "iso885910", 39 },
828*d14d7d31Sis 	{ "iso885911", 40 },
829*d14d7d31Sis 	{ "tis620", 40 },
830*d14d7d31Sis 	{ "iso88593", 41 },
831*d14d7d31Sis 	{ "iso88594", 42 },
832*d14d7d31Sis };
833*d14d7d31Sis 
834*d14d7d31Sis /*
835*d14d7d31Sis  * The list of code conversions supported are grouped together per
836*d14d7d31Sis  * module which will be loaded as needed.
837*d14d7d31Sis  */
838*d14d7d31Sis #define	KICONV_MAX_CONVERSIONS		84
839*d14d7d31Sis 
840*d14d7d31Sis static kiconv_conv_list_t conv_list[KICONV_MAX_CONVERSIONS] = {
841*d14d7d31Sis 	/* Embedded code conversions: */
842*d14d7d31Sis 	{
843*d14d7d31Sis 		1, 0, KICONV_EMBEDDED,
844*d14d7d31Sis 		open_to_1252, kiconv_to_sb, close_to_sb, kiconvstr_to_1252
845*d14d7d31Sis 	},
846*d14d7d31Sis 	{
847*d14d7d31Sis 		0, 1, KICONV_EMBEDDED,
848*d14d7d31Sis 		open_fr_1252, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_1252
849*d14d7d31Sis 	},
850*d14d7d31Sis 	{
851*d14d7d31Sis 		2, 0, KICONV_EMBEDDED,
852*d14d7d31Sis 		open_to_88591, kiconv_to_sb, close_to_sb, kiconvstr_to_1
853*d14d7d31Sis 	},
854*d14d7d31Sis 	{
855*d14d7d31Sis 		0, 2, KICONV_EMBEDDED,
856*d14d7d31Sis 		open_fr_88591, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_1
857*d14d7d31Sis 	},
858*d14d7d31Sis 	{
859*d14d7d31Sis 		3, 0, KICONV_EMBEDDED,
860*d14d7d31Sis 		open_to_885915, kiconv_to_sb, close_to_sb, kiconvstr_to_15
861*d14d7d31Sis 	},
862*d14d7d31Sis 	{
863*d14d7d31Sis 		0, 3, KICONV_EMBEDDED,
864*d14d7d31Sis 		open_fr_885915, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_15
865*d14d7d31Sis 	},
866*d14d7d31Sis 	{
867*d14d7d31Sis 		4, 0, KICONV_EMBEDDED,
868*d14d7d31Sis 		open_to_850, kiconv_to_sb, close_to_sb, kiconvstr_to_850
869*d14d7d31Sis 	},
870*d14d7d31Sis 	{
871*d14d7d31Sis 		0, 4, KICONV_EMBEDDED,
872*d14d7d31Sis 		open_fr_850, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_850
873*d14d7d31Sis 	},
874*d14d7d31Sis 
875*d14d7d31Sis 	/* kiconv_ja module conversions: */
876*d14d7d31Sis 	{ 0, 5, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
877*d14d7d31Sis 	{ 5, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
878*d14d7d31Sis 	{ 0, 6, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
879*d14d7d31Sis 	{ 6, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
880*d14d7d31Sis 	{ 0, 7, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
881*d14d7d31Sis 	{ 7, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
882*d14d7d31Sis 	{ 0, 8, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
883*d14d7d31Sis 	{ 8, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
884*d14d7d31Sis 
885*d14d7d31Sis 	/* kiconv_sc module conversions: */
886*d14d7d31Sis 	{ 0, 9, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
887*d14d7d31Sis 	{ 9, 0, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
888*d14d7d31Sis 	{ 0, 10, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
889*d14d7d31Sis 	{ 10, 0, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
890*d14d7d31Sis 	{ 0, 11, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
891*d14d7d31Sis 	{ 11, 0, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
892*d14d7d31Sis 
893*d14d7d31Sis 	/* kiconv_ko module conversions: */
894*d14d7d31Sis 	{ 0, 12, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
895*d14d7d31Sis 	{ 12, 0, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
896*d14d7d31Sis 	{ 0, 13, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
897*d14d7d31Sis 	{ 13, 0, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
898*d14d7d31Sis 
899*d14d7d31Sis 	/* kiconv_tc module conversions: */
900*d14d7d31Sis 	{ 0, 14, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
901*d14d7d31Sis 	{ 14, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
902*d14d7d31Sis 	{ 0, 15, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
903*d14d7d31Sis 	{ 15, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
904*d14d7d31Sis 	{ 0, 16, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
905*d14d7d31Sis 	{ 16, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
906*d14d7d31Sis 	{ 0, 17, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
907*d14d7d31Sis 	{ 17, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
908*d14d7d31Sis 
909*d14d7d31Sis 	/* kiconv_emea module conversions: */
910*d14d7d31Sis 	{ 0, 18, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
911*d14d7d31Sis 	{ 18, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
912*d14d7d31Sis 	{ 0, 19, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
913*d14d7d31Sis 	{ 19, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
914*d14d7d31Sis 	{ 0, 20, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
915*d14d7d31Sis 	{ 20, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
916*d14d7d31Sis 	{ 0, 21, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
917*d14d7d31Sis 	{ 21, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
918*d14d7d31Sis 	{ 0, 22, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
919*d14d7d31Sis 	{ 22, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
920*d14d7d31Sis 	{ 0, 23, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
921*d14d7d31Sis 	{ 23, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
922*d14d7d31Sis 	{ 0, 24, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
923*d14d7d31Sis 	{ 24, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
924*d14d7d31Sis 	{ 0, 25, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
925*d14d7d31Sis 	{ 25, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
926*d14d7d31Sis 	{ 0, 26, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
927*d14d7d31Sis 	{ 26, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
928*d14d7d31Sis 	{ 0, 27, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
929*d14d7d31Sis 	{ 27, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
930*d14d7d31Sis 	{ 0, 28, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
931*d14d7d31Sis 	{ 28, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
932*d14d7d31Sis 	{ 0, 29, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
933*d14d7d31Sis 	{ 29, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
934*d14d7d31Sis 	{ 0, 30, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
935*d14d7d31Sis 	{ 30, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
936*d14d7d31Sis 	{ 0, 31, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
937*d14d7d31Sis 	{ 31, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
938*d14d7d31Sis 	{ 0, 32, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
939*d14d7d31Sis 	{ 32, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
940*d14d7d31Sis 	{ 0, 33, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
941*d14d7d31Sis 	{ 33, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
942*d14d7d31Sis 	{ 0, 34, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
943*d14d7d31Sis 	{ 34, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
944*d14d7d31Sis 	{ 0, 35, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
945*d14d7d31Sis 	{ 35, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
946*d14d7d31Sis 	{ 0, 36, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
947*d14d7d31Sis 	{ 36, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
948*d14d7d31Sis 	{ 0, 37, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
949*d14d7d31Sis 	{ 37, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
950*d14d7d31Sis 	{ 0, 38, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
951*d14d7d31Sis 	{ 38, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
952*d14d7d31Sis 	{ 0, 39, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
953*d14d7d31Sis 	{ 39, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
954*d14d7d31Sis 	{ 0, 40, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
955*d14d7d31Sis 	{ 40, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
956*d14d7d31Sis 	{ 0, 41, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
957*d14d7d31Sis 	{ 41, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
958*d14d7d31Sis 	{ 0, 42, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
959*d14d7d31Sis 	{ 42, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
960*d14d7d31Sis };
961*d14d7d31Sis 
962*d14d7d31Sis /* The list of implemeted and supported modules. */
963*d14d7d31Sis static kiconv_mod_list_t module_list[KICONV_MAX_MODULE_ID + 1] = {
964*d14d7d31Sis 	"kiconv_embedded", 0,
965*d14d7d31Sis 	"kiconv_ja", 0,
966*d14d7d31Sis 	"kiconv_sc", 0,
967*d14d7d31Sis 	"kiconv_ko", 0,
968*d14d7d31Sis 	"kiconv_tc", 0,
969*d14d7d31Sis 	"kiconv_emea", 0,
970*d14d7d31Sis };
971*d14d7d31Sis 
972*d14d7d31Sis /*
973*d14d7d31Sis  * We use conv_list_lock to restrict data access of both conv_list[] and
974*d14d7d31Sis  * module_list[] as they are tightly coupled critical sections that need to be
975*d14d7d31Sis  * dealt together as a unit.
976*d14d7d31Sis  */
977*d14d7d31Sis static kmutex_t conv_list_lock;
978*d14d7d31Sis 
979*d14d7d31Sis void
980*d14d7d31Sis kiconv_init()
981*d14d7d31Sis {
982*d14d7d31Sis 	mutex_init(&conv_list_lock, NULL, MUTEX_DEFAULT, NULL);
983*d14d7d31Sis }
984*d14d7d31Sis 
985*d14d7d31Sis /*
986*d14d7d31Sis  * The following is used to check on whether a kiconv module is being
987*d14d7d31Sis  * used or not at the _fini() of the module.
988*d14d7d31Sis  */
989*d14d7d31Sis size_t
990*d14d7d31Sis kiconv_module_ref_count(size_t mid)
991*d14d7d31Sis {
992*d14d7d31Sis 	int count;
993*d14d7d31Sis 
994*d14d7d31Sis 	if (mid <= 0 || mid > KICONV_MAX_MODULE_ID)
995*d14d7d31Sis 		return (0);
996*d14d7d31Sis 
997*d14d7d31Sis 	mutex_enter(&conv_list_lock);
998*d14d7d31Sis 
999*d14d7d31Sis 	count = module_list[mid].refcount;
1000*d14d7d31Sis 
1001*d14d7d31Sis 	mutex_exit(&conv_list_lock);
1002*d14d7d31Sis 
1003*d14d7d31Sis 	return (count);
1004*d14d7d31Sis }
1005*d14d7d31Sis 
1006*d14d7d31Sis /*
1007*d14d7d31Sis  * This function "normalizes" a given code name, n, by not including skippable
1008*d14d7d31Sis  * characters and folding uppercase letters to corresponding lowercase letters.
1009*d14d7d31Sis  * We only fold 7-bit ASCII uppercase characters since the names should be in
1010*d14d7d31Sis  * Portable Character Set of 7-bit ASCII.
1011*d14d7d31Sis  *
1012*d14d7d31Sis  * By doing this, we will be able to maximize the code name matches.
1013*d14d7d31Sis  */
1014*d14d7d31Sis static size_t
1015*d14d7d31Sis normalize_codename(const char *n)
1016*d14d7d31Sis {
1017*d14d7d31Sis 	char s[KICONV_MAX_CODENAME_LEN + 1];
1018*d14d7d31Sis 	size_t i;
1019*d14d7d31Sis 
1020*d14d7d31Sis 	if (n == NULL)
1021*d14d7d31Sis 		return ((size_t)-1);
1022*d14d7d31Sis 
1023*d14d7d31Sis 	for (i = 0; *n; n++) {
1024*d14d7d31Sis 		if (KICONV_SKIPPABLE_CHAR(*n))
1025*d14d7d31Sis 			continue;
1026*d14d7d31Sis 
1027*d14d7d31Sis 		/* If unreasonably lengthy, we don't support such names. */
1028*d14d7d31Sis 		if (i >= KICONV_MAX_CODENAME_LEN)
1029*d14d7d31Sis 			return ((size_t)-1);
1030*d14d7d31Sis 
1031*d14d7d31Sis 		s[i++] = (*n >= 'A' && *n <= 'Z') ? *n - 'A' + 'a' : *n;
1032*d14d7d31Sis 	}
1033*d14d7d31Sis 	s[i] = '\0';
1034*d14d7d31Sis 
1035*d14d7d31Sis 	/* With the normalized name, find the corresponding codeset id. */
1036*d14d7d31Sis 	for (i = 0; i < KICONV_MAX_CODEID_ENTRY; i++)
1037*d14d7d31Sis 		if (strcmp(s, code_list[i].name) == 0)
1038*d14d7d31Sis 			return (code_list[i].id);
1039*d14d7d31Sis 
1040*d14d7d31Sis 	/*
1041*d14d7d31Sis 	 * In future time, we will also have a few more lines of code at below
1042*d14d7d31Sis 	 * that will deal with other user-created modules' fromcodes and
1043*d14d7d31Sis 	 * tocodes including aliases in a different vector. For now, we don't
1044*d14d7d31Sis 	 * support that but only the known names to this project at this time.
1045*d14d7d31Sis 	 */
1046*d14d7d31Sis 
1047*d14d7d31Sis 	return ((size_t)-1);
1048*d14d7d31Sis }
1049*d14d7d31Sis 
1050*d14d7d31Sis /*
1051*d14d7d31Sis  * This function called from mod_install() registers supplied code
1052*d14d7d31Sis  * conversions. At this point, it does not honor aliases and hence does not
1053*d14d7d31Sis  * use nowait data field from the kiconv module info data structure.
1054*d14d7d31Sis  */
1055*d14d7d31Sis int
1056*d14d7d31Sis kiconv_register_module(kiconv_module_info_t *info)
1057*d14d7d31Sis {
1058*d14d7d31Sis 	size_t mid;
1059*d14d7d31Sis 	size_t fid;
1060*d14d7d31Sis 	size_t tid;
1061*d14d7d31Sis 	size_t i;
1062*d14d7d31Sis 	size_t j;
1063*d14d7d31Sis 	kiconv_ops_t *op;
1064*d14d7d31Sis 
1065*d14d7d31Sis 	/* Validate the given kiconv module info. */
1066*d14d7d31Sis 	if (info == NULL || info->module_name == NULL ||
1067*d14d7d31Sis 	    info->kiconv_num_convs == 0 || info->kiconv_ops_tbl == NULL)
1068*d14d7d31Sis 		return (EINVAL);
1069*d14d7d31Sis 
1070*d14d7d31Sis 	/*
1071*d14d7d31Sis 	 * Check if this is one of the known modules. At this point,
1072*d14d7d31Sis 	 * we do not allow user-defined kiconv modules and that'd be for
1073*d14d7d31Sis 	 * a future project.
1074*d14d7d31Sis 	 */
1075*d14d7d31Sis 	for (mid = 1; mid <= KICONV_MAX_MODULE_ID; mid++)
1076*d14d7d31Sis 		if (strcmp(module_list[mid].name, info->module_name) == 0)
1077*d14d7d31Sis 			break;
1078*d14d7d31Sis 	if (mid > KICONV_MAX_MODULE_ID)
1079*d14d7d31Sis 		return (EINVAL);
1080*d14d7d31Sis 
1081*d14d7d31Sis 	/* Let's register the conversions supplied. */
1082*d14d7d31Sis 	mutex_enter(&conv_list_lock);
1083*d14d7d31Sis 
1084*d14d7d31Sis 	/*
1085*d14d7d31Sis 	 * This is very unlikely situation but by any chance we don't want to
1086*d14d7d31Sis 	 * register a module that is already in.
1087*d14d7d31Sis 	 */
1088*d14d7d31Sis 	if (module_list[mid].refcount > 0) {
1089*d14d7d31Sis 		mutex_exit(&conv_list_lock);
1090*d14d7d31Sis 		return (EAGAIN);
1091*d14d7d31Sis 	}
1092*d14d7d31Sis 
1093*d14d7d31Sis 	for (i = 0; i < info->kiconv_num_convs; i++) {
1094*d14d7d31Sis 		op = &(info->kiconv_ops_tbl[i]);
1095*d14d7d31Sis 
1096*d14d7d31Sis 		fid = normalize_codename(op->fromcode);
1097*d14d7d31Sis 		tid = normalize_codename(op->tocode);
1098*d14d7d31Sis 
1099*d14d7d31Sis 		/*
1100*d14d7d31Sis 		 * If we find anything wrong in this particular conversion,
1101*d14d7d31Sis 		 * we skip this one and continue to the next one. This include
1102*d14d7d31Sis 		 * a case where there is a conversion already being assigned
1103*d14d7d31Sis 		 * into the conv_list[] somehow, i.e., new one never kicks out
1104*d14d7d31Sis 		 * old one.
1105*d14d7d31Sis 		 */
1106*d14d7d31Sis 		if (op->kiconv_open == NULL || op->kiconv == NULL ||
1107*d14d7d31Sis 		    op->kiconv_close == NULL || op->kiconvstr == NULL)
1108*d14d7d31Sis 			continue;
1109*d14d7d31Sis 
1110*d14d7d31Sis 		for (j = 0; j < KICONV_MAX_CONVERSIONS; j++) {
1111*d14d7d31Sis 			if (conv_list[j].mid == mid &&
1112*d14d7d31Sis 			    conv_list[j].fid == fid &&
1113*d14d7d31Sis 			    conv_list[j].tid == tid) {
1114*d14d7d31Sis 				if (conv_list[j].open == NULL) {
1115*d14d7d31Sis 					conv_list[j].open = op->kiconv_open;
1116*d14d7d31Sis 					conv_list[j].kiconv = op->kiconv;
1117*d14d7d31Sis 					conv_list[j].close = op->kiconv_close;
1118*d14d7d31Sis 					conv_list[j].kiconvstr = op->kiconvstr;
1119*d14d7d31Sis 				}
1120*d14d7d31Sis 				break;
1121*d14d7d31Sis 			}
1122*d14d7d31Sis 		}
1123*d14d7d31Sis 	}
1124*d14d7d31Sis 
1125*d14d7d31Sis 	mutex_exit(&conv_list_lock);
1126*d14d7d31Sis 
1127*d14d7d31Sis 	return (0);
1128*d14d7d31Sis }
1129*d14d7d31Sis 
1130*d14d7d31Sis /*
1131*d14d7d31Sis  * The following function called during mod_remove() will try to unregister,
1132*d14d7d31Sis  * i.e., clear up conversion function pointers, from the conv_list[] if it
1133*d14d7d31Sis  * can. If there is any code conversions being used, then, the function will
1134*d14d7d31Sis  * just return EBUSY indicating that the module cannot be unloaded.
1135*d14d7d31Sis  */
1136*d14d7d31Sis int
1137*d14d7d31Sis kiconv_unregister_module(kiconv_module_info_t *info)
1138*d14d7d31Sis {
1139*d14d7d31Sis 	size_t mid;
1140*d14d7d31Sis 	size_t i;
1141*d14d7d31Sis 
1142*d14d7d31Sis 	if (info == NULL || info->module_name == NULL ||
1143*d14d7d31Sis 	    info->kiconv_num_convs == 0 || info->kiconv_ops_tbl == NULL)
1144*d14d7d31Sis 		return (EINVAL);
1145*d14d7d31Sis 
1146*d14d7d31Sis 	for (mid = 1; mid <= KICONV_MAX_MODULE_ID; mid++)
1147*d14d7d31Sis 		if (strcmp(module_list[mid].name, info->module_name) == 0)
1148*d14d7d31Sis 			break;
1149*d14d7d31Sis 	if (mid > KICONV_MAX_MODULE_ID)
1150*d14d7d31Sis 		return (EINVAL);
1151*d14d7d31Sis 
1152*d14d7d31Sis 	mutex_enter(&conv_list_lock);
1153*d14d7d31Sis 
1154*d14d7d31Sis 	/*
1155*d14d7d31Sis 	 * If any of the conversions are used, then, this module canont be
1156*d14d7d31Sis 	 * unloaded.
1157*d14d7d31Sis 	 */
1158*d14d7d31Sis 	if (module_list[mid].refcount > 0) {
1159*d14d7d31Sis 		mutex_exit(&conv_list_lock);
1160*d14d7d31Sis 		return (EBUSY);
1161*d14d7d31Sis 	}
1162*d14d7d31Sis 
1163*d14d7d31Sis 	/*
1164*d14d7d31Sis 	 * Otherwise, we unregister all conversions from this module
1165*d14d7d31Sis 	 * and be ready for the unloading. At this point, we only care about
1166*d14d7d31Sis 	 * the conversions we know about with the module.
1167*d14d7d31Sis 	 */
1168*d14d7d31Sis 	for (i = 0; i < KICONV_MAX_CONVERSIONS; i++) {
1169*d14d7d31Sis 		if (conv_list[i].mid == mid) {
1170*d14d7d31Sis 			conv_list[i].open = NULL;
1171*d14d7d31Sis 			conv_list[i].kiconv = NULL;
1172*d14d7d31Sis 			conv_list[i].close = NULL;
1173*d14d7d31Sis 			conv_list[i].kiconvstr = NULL;
1174*d14d7d31Sis 		}
1175*d14d7d31Sis 	}
1176*d14d7d31Sis 
1177*d14d7d31Sis 	mutex_exit(&conv_list_lock);
1178*d14d7d31Sis 
1179*d14d7d31Sis 	return (0);
1180*d14d7d31Sis }
1181*d14d7d31Sis 
1182*d14d7d31Sis /*
1183*d14d7d31Sis  * The following function check if asked code conversion is available
1184*d14d7d31Sis  * and if necessary, load the corresponding kiconv module that contains
1185*d14d7d31Sis  * the conversion (and others).
1186*d14d7d31Sis  */
1187*d14d7d31Sis static kiconv_t
1188*d14d7d31Sis check_and_load_conversions(const char *tocode, const char *fromcode)
1189*d14d7d31Sis {
1190*d14d7d31Sis 	kiconv_t kcd;
1191*d14d7d31Sis 	size_t tid;
1192*d14d7d31Sis 	size_t fid;
1193*d14d7d31Sis 	size_t mid;
1194*d14d7d31Sis 	size_t i;
1195*d14d7d31Sis 
1196*d14d7d31Sis 	/* Normalize the given names and find the corresponding code ids. */
1197*d14d7d31Sis 	tid = normalize_codename(tocode);
1198*d14d7d31Sis 	if (tid == (size_t)-1)
1199*d14d7d31Sis 		return ((kiconv_t)-1);
1200*d14d7d31Sis 
1201*d14d7d31Sis 	fid = normalize_codename(fromcode);
1202*d14d7d31Sis 	if (fid == (size_t)-1)
1203*d14d7d31Sis 		return ((kiconv_t)-1);
1204*d14d7d31Sis 
1205*d14d7d31Sis 	/*
1206*d14d7d31Sis 	 * Search the conversion.
1207*d14d7d31Sis 	 *
1208*d14d7d31Sis 	 * If the conversion isn't supported, just return -1.
1209*d14d7d31Sis 	 * If the conversion is supported but there is no corresponding
1210*d14d7d31Sis 	 * module loaded, try to load it and if successful, return
1211*d14d7d31Sis 	 * a kiconv conversion descriptor memory block.
1212*d14d7d31Sis 	 *
1213*d14d7d31Sis 	 * We maintain a reference counter of uint_t for each module.
1214*d14d7d31Sis 	 */
1215*d14d7d31Sis 	mutex_enter(&conv_list_lock);
1216*d14d7d31Sis 
1217*d14d7d31Sis 	for (i = 0; i < KICONV_MAX_CONVERSIONS; i++)
1218*d14d7d31Sis 		if (conv_list[i].tid == tid && conv_list[i].fid == fid)
1219*d14d7d31Sis 			break;
1220*d14d7d31Sis 	if (i >= KICONV_MAX_CONVERSIONS) {
1221*d14d7d31Sis 		mutex_exit(&conv_list_lock);
1222*d14d7d31Sis 		return ((kiconv_t)-1);
1223*d14d7d31Sis 	}
1224*d14d7d31Sis 
1225*d14d7d31Sis 	mid = conv_list[i].mid;
1226*d14d7d31Sis 
1227*d14d7d31Sis 	if (conv_list[i].open == NULL) {
1228*d14d7d31Sis 		mutex_exit(&conv_list_lock);
1229*d14d7d31Sis 
1230*d14d7d31Sis 		if (modload("kiconv", module_list[mid].name) < 0)
1231*d14d7d31Sis 			return ((kiconv_t)-1);
1232*d14d7d31Sis 
1233*d14d7d31Sis 		/*
1234*d14d7d31Sis 		 * Let's double check if something happened right after
1235*d14d7d31Sis 		 * the modload and/or if the module really has the conversion.
1236*d14d7d31Sis 		 */
1237*d14d7d31Sis 		mutex_enter(&conv_list_lock);
1238*d14d7d31Sis 
1239*d14d7d31Sis 		if (conv_list[i].open == NULL) {
1240*d14d7d31Sis 			mutex_exit(&conv_list_lock);
1241*d14d7d31Sis 			return ((kiconv_t)-1);
1242*d14d7d31Sis 		}
1243*d14d7d31Sis 	}
1244*d14d7d31Sis 
1245*d14d7d31Sis 	/*
1246*d14d7d31Sis 	 * If we got the conversion, we will use the conversion function
1247*d14d7d31Sis 	 * in the module and so let's increase the module's refcounter
1248*d14d7d31Sis 	 * so that the module won't be kicked out. (To be more exact and
1249*d14d7d31Sis 	 * specific, the "refcount" is thus the reference counter of
1250*d14d7d31Sis 	 * the module functions being used.)
1251*d14d7d31Sis 	 */
1252*d14d7d31Sis 	if (module_list[mid].refcount < UINT_MAX)
1253*d14d7d31Sis 		module_list[mid].refcount++;
1254*d14d7d31Sis 
1255*d14d7d31Sis 	mutex_exit(&conv_list_lock);
1256*d14d7d31Sis 
1257*d14d7d31Sis 	kcd = (kiconv_t)kmem_alloc(sizeof (kiconv_data_t), KM_SLEEP);
1258*d14d7d31Sis 	kcd->handle = (void *)-1;
1259*d14d7d31Sis 	kcd->id = i;
1260*d14d7d31Sis 
1261*d14d7d31Sis 	return (kcd);
1262*d14d7d31Sis }
1263*d14d7d31Sis 
1264*d14d7d31Sis /*
1265*d14d7d31Sis  * The following are the four "Committed" interfaces.
1266*d14d7d31Sis  */
1267*d14d7d31Sis kiconv_t
1268*d14d7d31Sis kiconv_open(const char *tocode, const char *fromcode)
1269*d14d7d31Sis {
1270*d14d7d31Sis 	kiconv_t kcd;
1271*d14d7d31Sis 	size_t mid;
1272*d14d7d31Sis 
1273*d14d7d31Sis 	kcd = check_and_load_conversions(tocode, fromcode);
1274*d14d7d31Sis 	if (kcd == (kiconv_t)-1)
1275*d14d7d31Sis 		return ((kiconv_t)-1);
1276*d14d7d31Sis 
1277*d14d7d31Sis 	kcd->handle = (conv_list[kcd->id].open)();
1278*d14d7d31Sis 	if (kcd->handle == (void *)-1) {
1279*d14d7d31Sis 		/*
1280*d14d7d31Sis 		 * If the conversion couldn't be opened for some reason,
1281*d14d7d31Sis 		 * then, we unallocate the kcd and, more importantly, before
1282*d14d7d31Sis 		 * that, we also decrease the module reference counter.
1283*d14d7d31Sis 		 */
1284*d14d7d31Sis 		mid = conv_list[kcd->id].mid;
1285*d14d7d31Sis 
1286*d14d7d31Sis 		mutex_enter(&conv_list_lock);
1287*d14d7d31Sis 
1288*d14d7d31Sis 		if (module_list[mid].refcount > 0)
1289*d14d7d31Sis 			module_list[mid].refcount--;
1290*d14d7d31Sis 
1291*d14d7d31Sis 		mutex_exit(&conv_list_lock);
1292*d14d7d31Sis 
1293*d14d7d31Sis 		kmem_free((void *)kcd, sizeof (kiconv_data_t));
1294*d14d7d31Sis 
1295*d14d7d31Sis 		return ((kiconv_t)-1);
1296*d14d7d31Sis 	}
1297*d14d7d31Sis 
1298*d14d7d31Sis 	return (kcd);
1299*d14d7d31Sis }
1300*d14d7d31Sis 
1301*d14d7d31Sis size_t
1302*d14d7d31Sis kiconv(kiconv_t kcd, char **inbuf, size_t *inbytesleft,
1303*d14d7d31Sis 	char **outbuf, size_t *outbytesleft, int *errno)
1304*d14d7d31Sis {
1305*d14d7d31Sis 	/* Do some minimum checking on the kiconv conversion descriptor. */
1306*d14d7d31Sis 	if (! kcd || kcd == (kiconv_t)-1 || conv_list[kcd->id].kiconv == NULL) {
1307*d14d7d31Sis 		*errno = EBADF;
1308*d14d7d31Sis 		return ((size_t)-1);
1309*d14d7d31Sis 	}
1310*d14d7d31Sis 
1311*d14d7d31Sis 	return ((conv_list[kcd->id].kiconv)(kcd->handle, inbuf, inbytesleft,
1312*d14d7d31Sis 	    outbuf, outbytesleft, errno));
1313*d14d7d31Sis }
1314*d14d7d31Sis 
1315*d14d7d31Sis int
1316*d14d7d31Sis kiconv_close(kiconv_t kcd)
1317*d14d7d31Sis {
1318*d14d7d31Sis 	int ret;
1319*d14d7d31Sis 	size_t mid;
1320*d14d7d31Sis 
1321*d14d7d31Sis 	if (! kcd || kcd == (kiconv_t)-1 || conv_list[kcd->id].close == NULL)
1322*d14d7d31Sis 		return (EBADF);
1323*d14d7d31Sis 
1324*d14d7d31Sis 	mid = conv_list[kcd->id].mid;
1325*d14d7d31Sis 
1326*d14d7d31Sis 	ret = (conv_list[kcd->id].close)(kcd->handle);
1327*d14d7d31Sis 
1328*d14d7d31Sis 	kmem_free((void *)kcd, sizeof (kiconv_data_t));
1329*d14d7d31Sis 
1330*d14d7d31Sis 	mutex_enter(&conv_list_lock);
1331*d14d7d31Sis 
1332*d14d7d31Sis 	/*
1333*d14d7d31Sis 	 * While we maintain reference conter for each module, once loaded,
1334*d14d7d31Sis 	 * we don't modunload from kiconv functions even if the counter
1335*d14d7d31Sis 	 * reaches back to zero.
1336*d14d7d31Sis 	 */
1337*d14d7d31Sis 	if (module_list[mid].refcount > 0)
1338*d14d7d31Sis 		module_list[mid].refcount--;
1339*d14d7d31Sis 
1340*d14d7d31Sis 	mutex_exit(&conv_list_lock);
1341*d14d7d31Sis 
1342*d14d7d31Sis 	return (ret);
1343*d14d7d31Sis }
1344*d14d7d31Sis 
1345*d14d7d31Sis size_t
1346*d14d7d31Sis kiconvstr(const char *tocode, const char *fromcode, char *inarray,
1347*d14d7d31Sis 	size_t *inlen, char *outarray, size_t *outlen, int flag, int *errno)
1348*d14d7d31Sis {
1349*d14d7d31Sis 	kiconv_t kcd;
1350*d14d7d31Sis 	size_t ret;
1351*d14d7d31Sis 	size_t mid;
1352*d14d7d31Sis 
1353*d14d7d31Sis 	kcd = check_and_load_conversions(tocode, fromcode);
1354*d14d7d31Sis 	if (kcd == (kiconv_t)-1 || conv_list[kcd->id].kiconvstr == NULL) {
1355*d14d7d31Sis 		*errno = EBADF;
1356*d14d7d31Sis 		return ((size_t)-1);
1357*d14d7d31Sis 	}
1358*d14d7d31Sis 
1359*d14d7d31Sis 	mid = conv_list[kcd->id].mid;
1360*d14d7d31Sis 
1361*d14d7d31Sis 	ret = (conv_list[kcd->id].kiconvstr)(inarray, inlen, outarray, outlen,
1362*d14d7d31Sis 	    flag, errno);
1363*d14d7d31Sis 
1364*d14d7d31Sis 	kmem_free((void *)kcd, sizeof (kiconv_data_t));
1365*d14d7d31Sis 
1366*d14d7d31Sis 	mutex_enter(&conv_list_lock);
1367*d14d7d31Sis 
1368*d14d7d31Sis 	if (module_list[mid].refcount > 0)
1369*d14d7d31Sis 		module_list[mid].refcount--;
1370*d14d7d31Sis 
1371*d14d7d31Sis 	mutex_exit(&conv_list_lock);
1372*d14d7d31Sis 
1373*d14d7d31Sis 	return (ret);
1374*d14d7d31Sis }
1375