xref: /illumos-gate/usr/src/uts/common/os/kiconv.c (revision 2d6eb4a5e0a47d30189497241345dc5466bb68ab)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Kernel iconv code conversion functions (PSARC/2007/173).
28  *
29  * Man pages: kiconv_open(9F), kiconv(9F), kiconv_close(9F), and kiconvstr(9F).
30  * Interface stability: Committed.
31  */
32 
33 #include <sys/types.h>
34 #include <sys/param.h>
35 #include <sys/sysmacros.h>
36 #include <sys/systm.h>
37 #include <sys/debug.h>
38 #include <sys/kmem.h>
39 #include <sys/sunddi.h>
40 #include <sys/ksynch.h>
41 #include <sys/modctl.h>
42 #include <sys/byteorder.h>
43 #include <sys/errno.h>
44 #include <sys/kiconv.h>
45 #include <sys/kiconv_latin1.h>
46 
47 
48 /*
49  * The following macros indicate ids to the correct code conversion mapping
50  * data tables to use. The actual tables are coming from <sys/kiconv_latin1.h>.
51  */
52 #define	KICONV_TBLID_1252		(0x00)
53 #define	KICONV_TBLID_8859_1		(0x01)
54 #define	KICONV_TBLID_8859_15		(0x02)
55 #define	KICONV_TBLID_850		(0x03)
56 
57 #define	KICONV_MAX_MAPPING_TBLID	(0x03)
58 
59 /*
60  * The following tables are coming from u8_textprep.c. We use them to
61  * check on validity of UTF-8 characters and their bytes.
62  */
63 extern const int8_t u8_number_of_bytes[];
64 extern const uint8_t u8_valid_min_2nd_byte[];
65 extern const uint8_t u8_valid_max_2nd_byte[];
66 
67 
68 /*
69  * The following four functions, open_to_1252(), open_to_88591(),
70  * open_to_885915(), and open_to_850(), are kiconv_open functions from
71  * UTF-8 to corresponding single byte codesets.
72  */
73 static void *
open_to_1252()74 open_to_1252()
75 {
76 	kiconv_state_t s;
77 
78 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
79 	s->id = KICONV_TBLID_1252;
80 	s->bom_processed = 0;
81 
82 	return ((void *)s);
83 }
84 
85 static void *
open_to_88591()86 open_to_88591()
87 {
88 	kiconv_state_t s;
89 
90 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
91 	s->id = KICONV_TBLID_8859_1;
92 	s->bom_processed = 0;
93 
94 	return ((void *)s);
95 }
96 
97 static void *
open_to_885915()98 open_to_885915()
99 {
100 	kiconv_state_t s;
101 
102 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
103 	s->id = KICONV_TBLID_8859_15;
104 	s->bom_processed = 0;
105 
106 	return ((void *)s);
107 }
108 
109 static void *
open_to_850()110 open_to_850()
111 {
112 	kiconv_state_t s;
113 
114 	s = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t), KM_SLEEP);
115 	s->id = KICONV_TBLID_850;
116 	s->bom_processed = 0;
117 
118 	return ((void *)s);
119 }
120 
121 /*
122  * The following four functions, open_fr_1252(), open_fr_88591(),
123  * open_fr_885915(), and open_fr_850(), are kiconv_open functions from
124  * corresponding single byte codesets to UTF-8.
125  */
126 static void *
open_fr_1252()127 open_fr_1252()
128 {
129 	return ((void *)KICONV_TBLID_1252);
130 }
131 
132 static void *
open_fr_88591()133 open_fr_88591()
134 {
135 	return ((void *)KICONV_TBLID_8859_1);
136 }
137 
138 static void *
open_fr_885915()139 open_fr_885915()
140 {
141 	return ((void *)KICONV_TBLID_8859_15);
142 }
143 
144 static void *
open_fr_850()145 open_fr_850()
146 {
147 	return ((void *)KICONV_TBLID_850);
148 }
149 
150 /*
151  * The following close_to_sb() function is kiconv_close function for
152  * the conversions from UTF-8 to single byte codesets. The close_fr_sb()
153  * is kiconv_close function for the conversions from single byte codesets to
154  * UTF-8.
155  */
156 static int
close_to_sb(void * s)157 close_to_sb(void *s)
158 {
159 	if (! s || s == (void *)-1)
160 		return (EBADF);
161 
162 	kmem_free(s, sizeof (kiconv_state_data_t));
163 
164 	return (0);
165 }
166 
167 static int
close_fr_sb(void * s)168 close_fr_sb(void *s)
169 {
170 	if ((ulong_t)s > KICONV_MAX_MAPPING_TBLID)
171 		return (EBADF);
172 
173 	return (0);
174 }
175 
176 /*
177  * The following is the common kiconv function for conversions from UTF-8
178  * to single byte codesets.
179  */
180 static size_t
kiconv_to_sb(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)181 kiconv_to_sb(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf,
182 	size_t *outbytesleft, int *errno)
183 {
184 	size_t id;
185 	size_t ret_val;
186 	uchar_t *ib;
187 	uchar_t *oldib;
188 	uchar_t *ob;
189 	uchar_t *ibtail;
190 	uchar_t *obtail;
191 	uint32_t u8;
192 	size_t i;
193 	size_t l;
194 	size_t h;
195 	size_t init_h;
196 	int8_t sz;
197 	boolean_t second;
198 
199 	/* Check on the kiconv code conversion descriptor. */
200 	if (! kcd || kcd == (void *)-1) {
201 		*errno = EBADF;
202 		return ((size_t)-1);
203 	}
204 
205 	/*
206 	 * Get the table id we are going to use for the code conversion
207 	 * and let's double check on it.
208 	 */
209 	id = ((kiconv_state_t)kcd)->id;
210 	if (id > KICONV_MAX_MAPPING_TBLID) {
211 		*errno = EBADF;
212 		return ((size_t)-1);
213 	}
214 
215 	/* If this is a state reset request, process and return. */
216 	if (! inbuf || ! (*inbuf)) {
217 		((kiconv_state_t)kcd)->bom_processed = 0;
218 		return ((size_t)0);
219 	}
220 
221 	ret_val = 0;
222 	ib = (uchar_t *)*inbuf;
223 	ob = (uchar_t *)*outbuf;
224 	ibtail = ib + *inbytesleft;
225 	obtail = ob + *outbytesleft;
226 
227 	/*
228 	 * The inital high value for the binary search we will be using
229 	 * shortly is a literal constant as of today but to be future proof,
230 	 * let's calculate it like the following at here.
231 	 */
232 	init_h = sizeof (to_sb_tbl[id]) / sizeof (kiconv_to_sb_tbl_comp_t) - 1;
233 
234 	/*
235 	 * If we haven't checked on the UTF-8 signature BOM character in
236 	 * the beginning of the conversion data stream, we check it and if
237 	 * find one, we skip it since we have no use for it.
238 	 */
239 	if (((kiconv_state_t)kcd)->bom_processed == 0 && (ibtail - ib) >= 3 &&
240 	    *ib == 0xef && *(ib + 1) == 0xbb && *(ib + 2) == 0xbf)
241 			ib += 3;
242 	((kiconv_state_t)kcd)->bom_processed = 1;
243 
244 	while (ib < ibtail) {
245 		sz = u8_number_of_bytes[*ib];
246 		if (sz <= 0) {
247 			*errno = EILSEQ;
248 			ret_val = (size_t)-1;
249 			break;
250 		}
251 
252 		/*
253 		 * If there is no room to write at the output buffer,
254 		 * issue E2BIG error.
255 		 */
256 		if (ob >= obtail) {
257 			*errno = E2BIG;
258 			ret_val = (size_t)-1;
259 			break;
260 		}
261 
262 		/*
263 		 * If it is a 7-bit ASCII character, we don't need to
264 		 * process further and we just copy the character over.
265 		 *
266 		 * If not, we collect the character bytes up to four bytes,
267 		 * validate the bytes, and binary search for the corresponding
268 		 * single byte codeset character byte. If we find it from
269 		 * the mapping table, we put that into the output buffer;
270 		 * otherwise, we put a replacement character instead as
271 		 * a non-identical conversion.
272 		 */
273 		if (sz == 1) {
274 			*ob++ = *ib++;
275 			continue;
276 		}
277 
278 		/*
279 		 * Issue EINVAL error if input buffer has an incomplete
280 		 * character at the end of the buffer.
281 		 */
282 		if ((ibtail - ib) < sz) {
283 			*errno = EINVAL;
284 			ret_val = (size_t)-1;
285 			break;
286 		}
287 
288 		/*
289 		 * We collect UTF-8 character bytes and also check if
290 		 * this is a valid UTF-8 character without any bogus bytes
291 		 * based on the latest UTF-8 binary representation.
292 		 */
293 		oldib = ib;
294 		u8 = *ib++;
295 		second = B_TRUE;
296 		for (i = 1; i < sz; i++) {
297 			if (second) {
298 				if (*ib < u8_valid_min_2nd_byte[u8] ||
299 				    *ib > u8_valid_max_2nd_byte[u8]) {
300 					*errno = EILSEQ;
301 					ret_val = (size_t)-1;
302 					ib = oldib;
303 					goto TO_SB_ILLEGAL_CHAR_ERR;
304 				}
305 				second = B_FALSE;
306 			} else if (*ib < 0x80 || *ib > 0xbf) {
307 				*errno = EILSEQ;
308 				ret_val = (size_t)-1;
309 				ib = oldib;
310 				goto TO_SB_ILLEGAL_CHAR_ERR;
311 			}
312 			u8 = (u8 << 8) | ((uint32_t)*ib);
313 			ib++;
314 		}
315 
316 		i = l = 0;
317 		h = init_h;
318 		while (l <= h) {
319 			i = (l + h) / 2;
320 			if (to_sb_tbl[id][i].u8 == u8)
321 				break;
322 			else if (to_sb_tbl[id][i].u8 < u8)
323 				l = i + 1;
324 			else
325 				h = i - 1;
326 		}
327 
328 		if (to_sb_tbl[id][i].u8 == u8) {
329 			*ob++ = to_sb_tbl[id][i].sb;
330 		} else {
331 			/*
332 			 * If we don't find a character in the target
333 			 * codeset, we insert an ASCII replacement character
334 			 * at the output buffer and indicate such
335 			 * "non-identical" conversion by increasing the
336 			 * return value which is the non-identical conversion
337 			 * counter if bigger than 0.
338 			 */
339 			*ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
340 			ret_val++;
341 		}
342 	}
343 
344 TO_SB_ILLEGAL_CHAR_ERR:
345 	*inbuf = (char *)ib;
346 	*inbytesleft = ibtail - ib;
347 	*outbuf = (char *)ob;
348 	*outbytesleft = obtail - ob;
349 
350 	return (ret_val);
351 }
352 
353 /*
354  * The following is the common kiconv function from single byte codesets to
355  * UTF-8.
356  */
357 static size_t
kiconv_fr_sb(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)358 kiconv_fr_sb(void *kcd, char **inbuf, size_t *inbytesleft, char **outbuf,
359 	size_t *outbytesleft, int *errno)
360 {
361 	size_t ret_val;
362 	uchar_t *ib;
363 	uchar_t *ob;
364 	uchar_t *ibtail;
365 	uchar_t *obtail;
366 	size_t i;
367 	size_t k;
368 	int8_t sz;
369 
370 	/* Check on the kiconv code conversion descriptor validity. */
371 	if ((ulong_t)kcd > KICONV_MAX_MAPPING_TBLID) {
372 		*errno = EBADF;
373 		return ((size_t)-1);
374 	}
375 
376 	/*
377 	 * If this is a state reset request, there is nothing to do and so
378 	 * we just return.
379 	 */
380 	if (! inbuf || ! (*inbuf))
381 		return ((size_t)0);
382 
383 	ret_val = 0;
384 	ib = (uchar_t *)*inbuf;
385 	ob = (uchar_t *)*outbuf;
386 	ibtail = ib + *inbytesleft;
387 	obtail = ob + *outbytesleft;
388 
389 	while (ib < ibtail) {
390 		/*
391 		 * If this is a 7-bit ASCII character, we just copy over and
392 		 * that's all we need to do for this character.
393 		 */
394 		if (*ib < 0x80) {
395 			if (ob >= obtail) {
396 				*errno = E2BIG;
397 				ret_val = (size_t)-1;
398 				break;
399 			}
400 
401 			*ob++ = *ib++;
402 			continue;
403 		}
404 
405 		/*
406 		 * Otherwise, we get the corresponding UTF-8 character bytes
407 		 * from the mapping table and copy them over.
408 		 *
409 		 * We don't need to worry about if the UTF-8 character bytes
410 		 * at the mapping tables are valid or not since they are good.
411 		 */
412 		k = *ib - 0x80;
413 		sz = u8_number_of_bytes[to_u8_tbl[(ulong_t)kcd][k].u8[0]];
414 
415 		/*
416 		 * If sz <= 0, that means we don't have any assigned character
417 		 * at the code point, k + 0x80, of the single byte codeset
418 		 * which is the fromcode. In other words, the input buffer
419 		 * has an illegal character.
420 		 */
421 		if (sz <= 0) {
422 			*errno = EILSEQ;
423 			ret_val = (size_t)-1;
424 			break;
425 		}
426 
427 		if ((obtail - ob) < sz) {
428 			*errno = E2BIG;
429 			ret_val = (size_t)-1;
430 			break;
431 		}
432 
433 		for (i = 0; i < sz; i++)
434 			*ob++ = to_u8_tbl[(ulong_t)kcd][k].u8[i];
435 
436 		ib++;
437 	}
438 
439 	*inbuf = (char *)ib;
440 	*inbytesleft = ibtail - ib;
441 	*outbuf = (char *)ob;
442 	*outbytesleft = obtail - ob;
443 
444 	return (ret_val);
445 }
446 
447 /*
448  * The following is the common kiconvstr function from UTF-8 to single byte
449  * codesets.
450  */
451 static size_t
kiconvstr_to_sb(size_t id,uchar_t * ib,size_t * inlen,uchar_t * ob,size_t * outlen,int flag,int * errno)452 kiconvstr_to_sb(size_t id, uchar_t *ib, size_t *inlen, uchar_t *ob,
453 	size_t *outlen, int flag, int *errno)
454 {
455 	size_t ret_val;
456 	uchar_t *oldib;
457 	uchar_t *ibtail;
458 	uchar_t *obtail;
459 	uint32_t u8;
460 	size_t i;
461 	size_t l;
462 	size_t h;
463 	size_t init_h;
464 	int8_t sz;
465 	boolean_t second;
466 	boolean_t do_not_ignore_null;
467 
468 	/* Let's make sure that the table id is within the valid boundary. */
469 	if (id > KICONV_MAX_MAPPING_TBLID) {
470 		*errno = EBADF;
471 		return ((size_t)-1);
472 	}
473 
474 	ret_val = 0;
475 	ibtail = ib + *inlen;
476 	obtail = ob + *outlen;
477 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
478 	init_h = sizeof (to_sb_tbl[id]) / sizeof (kiconv_to_sb_tbl_comp_t) - 1;
479 
480 	/* Skip any UTF-8 signature BOM character in the beginning. */
481 	if ((ibtail - ib) >= 3 && *ib == 0xef && *(ib + 1) == 0xbb &&
482 	    *(ib + 2) == 0xbf)
483 			ib += 3;
484 
485 	/*
486 	 * Basically this is pretty much the same as kiconv_to_sb() except
487 	 * that we are now accepting two flag values and doing the processing
488 	 * accordingly.
489 	 */
490 	while (ib < ibtail) {
491 		sz = u8_number_of_bytes[*ib];
492 		if (sz <= 0) {
493 			if (flag & KICONV_REPLACE_INVALID) {
494 				if (ob >= obtail) {
495 					*errno = E2BIG;
496 					ret_val = (size_t)-1;
497 					break;
498 				}
499 
500 				ib++;
501 				goto STR_TO_SB_REPLACE_INVALID;
502 			}
503 
504 			*errno = EILSEQ;
505 			ret_val = (size_t)-1;
506 			break;
507 		}
508 
509 		if (*ib == '\0' && do_not_ignore_null)
510 			break;
511 
512 		if (ob >= obtail) {
513 			*errno = E2BIG;
514 			ret_val = (size_t)-1;
515 			break;
516 		}
517 
518 		if (sz == 1) {
519 			*ob++ = *ib++;
520 			continue;
521 		}
522 
523 		if ((ibtail - ib) < sz) {
524 			if (flag & KICONV_REPLACE_INVALID) {
525 				ib = ibtail;
526 				goto STR_TO_SB_REPLACE_INVALID;
527 			}
528 
529 			*errno = EINVAL;
530 			ret_val = (size_t)-1;
531 			break;
532 		}
533 
534 		oldib = ib;
535 		u8 = *ib++;
536 		second = B_TRUE;
537 		for (i = 1; i < sz; i++) {
538 			if (second) {
539 				if (*ib < u8_valid_min_2nd_byte[u8] ||
540 				    *ib > u8_valid_max_2nd_byte[u8]) {
541 					if (flag & KICONV_REPLACE_INVALID) {
542 						ib = oldib + sz;
543 						goto STR_TO_SB_REPLACE_INVALID;
544 					}
545 
546 					*errno = EILSEQ;
547 					ret_val = (size_t)-1;
548 					ib = oldib;
549 					goto STR_TO_SB_ILLEGAL_CHAR_ERR;
550 				}
551 				second = B_FALSE;
552 			} else if (*ib < 0x80 || *ib > 0xbf) {
553 				if (flag & KICONV_REPLACE_INVALID) {
554 					ib = oldib + sz;
555 					goto STR_TO_SB_REPLACE_INVALID;
556 				}
557 
558 				*errno = EILSEQ;
559 				ret_val = (size_t)-1;
560 				ib = oldib;
561 				goto STR_TO_SB_ILLEGAL_CHAR_ERR;
562 			}
563 			u8 = (u8 << 8) | ((uint32_t)*ib);
564 			ib++;
565 		}
566 
567 		i = l = 0;
568 		h = init_h;
569 		while (l <= h) {
570 			i = (l + h) / 2;
571 			if (to_sb_tbl[id][i].u8 == u8)
572 				break;
573 			else if (to_sb_tbl[id][i].u8 < u8)
574 				l = i + 1;
575 			else
576 				h = i - 1;
577 		}
578 
579 		if (to_sb_tbl[id][i].u8 == u8) {
580 			*ob++ = to_sb_tbl[id][i].sb;
581 		} else {
582 STR_TO_SB_REPLACE_INVALID:
583 			*ob++ = KICONV_ASCII_REPLACEMENT_CHAR;
584 			ret_val++;
585 		}
586 	}
587 
588 STR_TO_SB_ILLEGAL_CHAR_ERR:
589 	*inlen = ibtail - ib;
590 	*outlen = obtail - ob;
591 
592 	return (ret_val);
593 }
594 
595 /*
596  * The following four functions are entry points recorded at the conv_list[]
597  * defined at below.
598  */
599 static size_t
kiconvstr_to_1252(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)600 kiconvstr_to_1252(char *inarray, size_t *inlen, char *outarray,
601 	size_t *outlen, int flag, int *errno)
602 {
603 	return (kiconvstr_to_sb(KICONV_TBLID_1252, (uchar_t *)inarray,
604 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
605 }
606 
607 static size_t
kiconvstr_to_1(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)608 kiconvstr_to_1(char *inarray, size_t *inlen, char *outarray,
609 	size_t *outlen, int flag, int *errno)
610 {
611 	return (kiconvstr_to_sb(KICONV_TBLID_8859_1, (uchar_t *)inarray,
612 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
613 }
614 
615 static size_t
kiconvstr_to_15(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)616 kiconvstr_to_15(char *inarray, size_t *inlen, char *outarray,
617 	size_t *outlen, int flag, int *errno)
618 {
619 	return (kiconvstr_to_sb(KICONV_TBLID_8859_15, (uchar_t *)inarray,
620 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
621 }
622 
623 static size_t
kiconvstr_to_850(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)624 kiconvstr_to_850(char *inarray, size_t *inlen, char *outarray,
625 	size_t *outlen, int flag, int *errno)
626 {
627 	return (kiconvstr_to_sb(KICONV_TBLID_850, (uchar_t *)inarray,
628 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
629 }
630 
631 /*
632  * The following is the common kiconvstr function for conversions from
633  * single byte codesets to UTF-8.
634  */
635 static size_t
kiconvstr_fr_sb(size_t id,uchar_t * ib,size_t * inlen,uchar_t * ob,size_t * outlen,int flag,int * errno)636 kiconvstr_fr_sb(size_t id, uchar_t *ib, size_t *inlen, uchar_t *ob,
637 	size_t *outlen, int flag, int *errno)
638 {
639 	size_t ret_val;
640 	uchar_t *ibtail;
641 	uchar_t *obtail;
642 	size_t i;
643 	size_t k;
644 	int8_t sz;
645 	boolean_t do_not_ignore_null;
646 
647 	ret_val = 0;
648 	ibtail = ib + *inlen;
649 	obtail = ob + *outlen;
650 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
651 
652 	while (ib < ibtail) {
653 		if (*ib == '\0' && do_not_ignore_null)
654 			break;
655 
656 		if (*ib < 0x80) {
657 			if (ob >= obtail) {
658 				*errno = E2BIG;
659 				ret_val = (size_t)-1;
660 				break;
661 			}
662 			*ob++ = *ib++;
663 			continue;
664 		}
665 
666 		k = *ib - 0x80;
667 		sz = u8_number_of_bytes[to_u8_tbl[id][k].u8[0]];
668 
669 		if (sz <= 0) {
670 			if (flag & KICONV_REPLACE_INVALID) {
671 				if ((obtail - ob) < 3) {
672 					*errno = E2BIG;
673 					ret_val = (size_t)-1;
674 					break;
675 				}
676 
677 				/* Save KICONV_UTF8_REPLACEMENT_CHAR. */
678 				*ob++ = 0xef;
679 				*ob++ = 0xbf;
680 				*ob++ = 0xbd;
681 				ret_val++;
682 				ib++;
683 
684 				continue;
685 			}
686 
687 			*errno = EILSEQ;
688 			ret_val = (size_t)-1;
689 			break;
690 		}
691 
692 		if ((obtail - ob) < sz) {
693 			*errno = E2BIG;
694 			ret_val = (size_t)-1;
695 			break;
696 		}
697 
698 		for (i = 0; i < sz; i++)
699 			*ob++ = to_u8_tbl[id][k].u8[i];
700 
701 		ib++;
702 	}
703 
704 	*inlen = ibtail - ib;
705 	*outlen = obtail - ob;
706 
707 	return (ret_val);
708 }
709 
710 /*
711  * The following four functions are also entry points recorded at
712  * the conv_list[] at below.
713  */
714 static size_t
kiconvstr_fr_1252(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)715 kiconvstr_fr_1252(char *inarray, size_t *inlen, char *outarray,
716 	size_t *outlen, int flag, int *errno)
717 {
718 	return (kiconvstr_fr_sb(KICONV_TBLID_1252, (uchar_t *)inarray,
719 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
720 }
721 
722 static size_t
kiconvstr_fr_1(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)723 kiconvstr_fr_1(char *inarray, size_t *inlen, char *outarray,
724 	size_t *outlen, int flag, int *errno)
725 {
726 	return (kiconvstr_fr_sb(KICONV_TBLID_8859_1, (uchar_t *)inarray,
727 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
728 }
729 
730 static size_t
kiconvstr_fr_15(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)731 kiconvstr_fr_15(char *inarray, size_t *inlen, char *outarray,
732 	size_t *outlen, int flag, int *errno)
733 {
734 	return (kiconvstr_fr_sb(KICONV_TBLID_8859_15, (uchar_t *)inarray,
735 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
736 }
737 
738 static size_t
kiconvstr_fr_850(char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)739 kiconvstr_fr_850(char *inarray, size_t *inlen, char *outarray,
740 	size_t *outlen, int flag, int *errno)
741 {
742 	return (kiconvstr_fr_sb(KICONV_TBLID_850, (uchar_t *)inarray,
743 	    inlen, (uchar_t *)outarray, outlen, flag, errno));
744 }
745 
746 /*
747  * The following static vector contains the normalized code names
748  * and their corresponding code ids. They are somewhat arbitrarily ordered
749  * based on marketing data available. A code id could repeat for aliases.
750  *
751  * The vector was generated by using a small utility program called
752  * codeidlistgen.c that you can find from PSARC/2007/173/materials/util/.
753  *
754  * The code ids must be portable, i.e., if needed, you can always generate
755  * the code_list[] again with different code ids. You'll also need to
756  * update the conv_list[] at below.
757  */
758 #define	KICONV_MAX_CODEID_ENTRY		68
759 #define	KICONV_MAX_CODEID		42
760 
761 static kiconv_code_list_t code_list[KICONV_MAX_CODEID_ENTRY] = {
762 	{ "utf8", 0 },
763 	{ "cp1252", 1 },
764 	{ "1252", 1 },
765 	{ "iso88591", 2 },
766 	{ "iso885915", 3 },
767 	{ "cp850", 4 },
768 	{ "850", 4 },
769 	{ "eucjp", 5 },
770 	{ "eucjpms", 6 },
771 	{ "cp932", 7 },
772 	{ "932", 7 },
773 	{ "shiftjis", 8 },
774 	{ "pck", 8 },
775 	{ "sjis", 8 },
776 	{ "gb18030", 9 },
777 	{ "gbk", 10 },
778 	{ "cp936", 10 },
779 	{ "936", 10 },
780 	{ "euccn", 11 },
781 	{ "euckr", 12 },
782 	{ "unifiedhangul", 13 },
783 	{ "cp949", 13 },
784 	{ "949", 13 },
785 	{ "big5", 14 },
786 	{ "cp950", 14 },
787 	{ "950", 14 },
788 	{ "big5hkscs", 15 },
789 	{ "euctw", 16 },
790 	{ "cp950hkscs", 17 },
791 	{ "cp1250", 18 },
792 	{ "1250", 18 },
793 	{ "iso88592", 19 },
794 	{ "cp852", 20 },
795 	{ "852", 20 },
796 	{ "cp1251", 21 },
797 	{ "1251", 21 },
798 	{ "iso88595", 22 },
799 	{ "koi8r", 23 },
800 	{ "cp866", 24 },
801 	{ "866", 24 },
802 	{ "cp1253", 25 },
803 	{ "1253", 25 },
804 	{ "iso88597", 26 },
805 	{ "cp737", 27 },
806 	{ "737", 27 },
807 	{ "cp1254", 28 },
808 	{ "1254", 28 },
809 	{ "iso88599", 29 },
810 	{ "cp857", 30 },
811 	{ "857", 30 },
812 	{ "cp1256", 31 },
813 	{ "1256", 31 },
814 	{ "iso88596", 32 },
815 	{ "cp720", 33 },
816 	{ "720", 33 },
817 	{ "cp1255", 34 },
818 	{ "1255", 34 },
819 	{ "iso88598", 35 },
820 	{ "cp862", 36 },
821 	{ "862", 36 },
822 	{ "cp1257", 37 },
823 	{ "1257", 37 },
824 	{ "iso885913", 38 },
825 	{ "iso885910", 39 },
826 	{ "iso885911", 40 },
827 	{ "tis620", 40 },
828 	{ "iso88593", 41 },
829 	{ "iso88594", 42 },
830 };
831 
832 /*
833  * The list of code conversions supported are grouped together per
834  * module which will be loaded as needed.
835  */
836 #define	KICONV_MAX_CONVERSIONS		84
837 
838 static kiconv_conv_list_t conv_list[KICONV_MAX_CONVERSIONS] = {
839 	/* Embedded code conversions: */
840 	{
841 		1, 0, KICONV_EMBEDDED,
842 		open_to_1252, kiconv_to_sb, close_to_sb, kiconvstr_to_1252
843 	},
844 	{
845 		0, 1, KICONV_EMBEDDED,
846 		open_fr_1252, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_1252
847 	},
848 	{
849 		2, 0, KICONV_EMBEDDED,
850 		open_to_88591, kiconv_to_sb, close_to_sb, kiconvstr_to_1
851 	},
852 	{
853 		0, 2, KICONV_EMBEDDED,
854 		open_fr_88591, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_1
855 	},
856 	{
857 		3, 0, KICONV_EMBEDDED,
858 		open_to_885915, kiconv_to_sb, close_to_sb, kiconvstr_to_15
859 	},
860 	{
861 		0, 3, KICONV_EMBEDDED,
862 		open_fr_885915, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_15
863 	},
864 	{
865 		4, 0, KICONV_EMBEDDED,
866 		open_to_850, kiconv_to_sb, close_to_sb, kiconvstr_to_850
867 	},
868 	{
869 		0, 4, KICONV_EMBEDDED,
870 		open_fr_850, kiconv_fr_sb, close_fr_sb, kiconvstr_fr_850
871 	},
872 
873 	/* kiconv_ja module conversions: */
874 	{ 0, 5, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
875 	{ 5, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
876 	{ 0, 6, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
877 	{ 6, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
878 	{ 0, 7, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
879 	{ 7, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
880 	{ 0, 8, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
881 	{ 8, 0, KICONV_MODULE_ID_JA, NULL, NULL, NULL, NULL },
882 
883 	/* kiconv_sc module conversions: */
884 	{ 0, 9, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
885 	{ 9, 0, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
886 	{ 0, 10, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
887 	{ 10, 0, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
888 	{ 0, 11, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
889 	{ 11, 0, KICONV_MODULE_ID_SC, NULL, NULL, NULL, NULL },
890 
891 	/* kiconv_ko module conversions: */
892 	{ 0, 12, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
893 	{ 12, 0, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
894 	{ 0, 13, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
895 	{ 13, 0, KICONV_MODULE_ID_KO, NULL, NULL, NULL, NULL },
896 
897 	/* kiconv_tc module conversions: */
898 	{ 0, 14, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
899 	{ 14, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
900 	{ 0, 15, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
901 	{ 15, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
902 	{ 0, 16, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
903 	{ 16, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
904 	{ 0, 17, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
905 	{ 17, 0, KICONV_MODULE_ID_TC, NULL, NULL, NULL, NULL },
906 
907 	/* kiconv_emea module conversions: */
908 	{ 0, 18, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
909 	{ 18, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
910 	{ 0, 19, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
911 	{ 19, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
912 	{ 0, 20, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
913 	{ 20, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
914 	{ 0, 21, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
915 	{ 21, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
916 	{ 0, 22, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
917 	{ 22, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
918 	{ 0, 23, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
919 	{ 23, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
920 	{ 0, 24, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
921 	{ 24, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
922 	{ 0, 25, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
923 	{ 25, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
924 	{ 0, 26, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
925 	{ 26, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
926 	{ 0, 27, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
927 	{ 27, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
928 	{ 0, 28, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
929 	{ 28, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
930 	{ 0, 29, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
931 	{ 29, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
932 	{ 0, 30, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
933 	{ 30, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
934 	{ 0, 31, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
935 	{ 31, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
936 	{ 0, 32, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
937 	{ 32, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
938 	{ 0, 33, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
939 	{ 33, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
940 	{ 0, 34, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
941 	{ 34, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
942 	{ 0, 35, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
943 	{ 35, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
944 	{ 0, 36, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
945 	{ 36, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
946 	{ 0, 37, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
947 	{ 37, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
948 	{ 0, 38, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
949 	{ 38, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
950 	{ 0, 39, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
951 	{ 39, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
952 	{ 0, 40, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
953 	{ 40, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
954 	{ 0, 41, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
955 	{ 41, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
956 	{ 0, 42, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
957 	{ 42, 0, KICONV_MODULE_ID_EMEA, NULL, NULL, NULL, NULL },
958 };
959 
960 /* The list of implemeted and supported modules. */
961 static kiconv_mod_list_t module_list[KICONV_MAX_MODULE_ID + 1] = {
962 	"kiconv_embedded", 0,
963 	"kiconv_ja", 0,
964 	"kiconv_sc", 0,
965 	"kiconv_ko", 0,
966 	"kiconv_tc", 0,
967 	"kiconv_emea", 0,
968 };
969 
970 /*
971  * We use conv_list_lock to restrict data access of both conv_list[] and
972  * module_list[] as they are tightly coupled critical sections that need to be
973  * dealt together as a unit.
974  */
975 static kmutex_t conv_list_lock;
976 
977 void
kiconv_init()978 kiconv_init()
979 {
980 	mutex_init(&conv_list_lock, NULL, MUTEX_DEFAULT, NULL);
981 }
982 
983 /*
984  * The following is used to check on whether a kiconv module is being
985  * used or not at the _fini() of the module.
986  */
987 size_t
kiconv_module_ref_count(size_t mid)988 kiconv_module_ref_count(size_t mid)
989 {
990 	int count;
991 
992 	if (mid <= 0 || mid > KICONV_MAX_MODULE_ID)
993 		return (0);
994 
995 	mutex_enter(&conv_list_lock);
996 
997 	count = module_list[mid].refcount;
998 
999 	mutex_exit(&conv_list_lock);
1000 
1001 	return (count);
1002 }
1003 
1004 /*
1005  * This function "normalizes" a given code name, n, by not including skippable
1006  * characters and folding uppercase letters to corresponding lowercase letters.
1007  * We only fold 7-bit ASCII uppercase characters since the names should be in
1008  * Portable Character Set of 7-bit ASCII.
1009  *
1010  * By doing this, we will be able to maximize the code name matches.
1011  */
1012 static size_t
normalize_codename(const char * n)1013 normalize_codename(const char *n)
1014 {
1015 	char s[KICONV_MAX_CODENAME_LEN + 1];
1016 	size_t i;
1017 
1018 	if (n == NULL)
1019 		return ((size_t)-1);
1020 
1021 	for (i = 0; *n; n++) {
1022 		if (KICONV_SKIPPABLE_CHAR(*n))
1023 			continue;
1024 
1025 		/* If unreasonably lengthy, we don't support such names. */
1026 		if (i >= KICONV_MAX_CODENAME_LEN)
1027 			return ((size_t)-1);
1028 
1029 		s[i++] = (*n >= 'A' && *n <= 'Z') ? *n - 'A' + 'a' : *n;
1030 	}
1031 	s[i] = '\0';
1032 
1033 	/* With the normalized name, find the corresponding codeset id. */
1034 	for (i = 0; i < KICONV_MAX_CODEID_ENTRY; i++)
1035 		if (strcmp(s, code_list[i].name) == 0)
1036 			return (code_list[i].id);
1037 
1038 	/*
1039 	 * In future time, we will also have a few more lines of code at below
1040 	 * that will deal with other user-created modules' fromcodes and
1041 	 * tocodes including aliases in a different vector. For now, we don't
1042 	 * support that but only the known names to this project at this time.
1043 	 */
1044 
1045 	return ((size_t)-1);
1046 }
1047 
1048 /*
1049  * This function called from mod_install() registers supplied code
1050  * conversions. At this point, it does not honor aliases and hence does not
1051  * use nowait data field from the kiconv module info data structure.
1052  */
1053 int
kiconv_register_module(kiconv_module_info_t * info)1054 kiconv_register_module(kiconv_module_info_t *info)
1055 {
1056 	size_t mid;
1057 	size_t fid;
1058 	size_t tid;
1059 	size_t i;
1060 	size_t j;
1061 	kiconv_ops_t *op;
1062 
1063 	/* Validate the given kiconv module info. */
1064 	if (info == NULL || info->module_name == NULL ||
1065 	    info->kiconv_num_convs == 0 || info->kiconv_ops_tbl == NULL)
1066 		return (EINVAL);
1067 
1068 	/*
1069 	 * Check if this is one of the known modules. At this point,
1070 	 * we do not allow user-defined kiconv modules and that'd be for
1071 	 * a future project.
1072 	 */
1073 	for (mid = 1; mid <= KICONV_MAX_MODULE_ID; mid++)
1074 		if (strcmp(module_list[mid].name, info->module_name) == 0)
1075 			break;
1076 	if (mid > KICONV_MAX_MODULE_ID)
1077 		return (EINVAL);
1078 
1079 	/* Let's register the conversions supplied. */
1080 	mutex_enter(&conv_list_lock);
1081 
1082 	/*
1083 	 * This is very unlikely situation but by any chance we don't want to
1084 	 * register a module that is already in.
1085 	 */
1086 	if (module_list[mid].refcount > 0) {
1087 		mutex_exit(&conv_list_lock);
1088 		return (EAGAIN);
1089 	}
1090 
1091 	for (i = 0; i < info->kiconv_num_convs; i++) {
1092 		op = &(info->kiconv_ops_tbl[i]);
1093 
1094 		fid = normalize_codename(op->fromcode);
1095 		tid = normalize_codename(op->tocode);
1096 
1097 		/*
1098 		 * If we find anything wrong in this particular conversion,
1099 		 * we skip this one and continue to the next one. This include
1100 		 * a case where there is a conversion already being assigned
1101 		 * into the conv_list[] somehow, i.e., new one never kicks out
1102 		 * old one.
1103 		 */
1104 		if (op->kiconv_open == NULL || op->kiconv == NULL ||
1105 		    op->kiconv_close == NULL || op->kiconvstr == NULL)
1106 			continue;
1107 
1108 		for (j = 0; j < KICONV_MAX_CONVERSIONS; j++) {
1109 			if (conv_list[j].mid == mid &&
1110 			    conv_list[j].fid == fid &&
1111 			    conv_list[j].tid == tid) {
1112 				if (conv_list[j].open == NULL) {
1113 					conv_list[j].open = op->kiconv_open;
1114 					conv_list[j].kiconv = op->kiconv;
1115 					conv_list[j].close = op->kiconv_close;
1116 					conv_list[j].kiconvstr = op->kiconvstr;
1117 				}
1118 				break;
1119 			}
1120 		}
1121 	}
1122 
1123 	mutex_exit(&conv_list_lock);
1124 
1125 	return (0);
1126 }
1127 
1128 /*
1129  * The following function called during mod_remove() will try to unregister,
1130  * i.e., clear up conversion function pointers, from the conv_list[] if it
1131  * can. If there is any code conversions being used, then, the function will
1132  * just return EBUSY indicating that the module cannot be unloaded.
1133  */
1134 int
kiconv_unregister_module(kiconv_module_info_t * info)1135 kiconv_unregister_module(kiconv_module_info_t *info)
1136 {
1137 	size_t mid;
1138 	size_t i;
1139 
1140 	if (info == NULL || info->module_name == NULL ||
1141 	    info->kiconv_num_convs == 0 || info->kiconv_ops_tbl == NULL)
1142 		return (EINVAL);
1143 
1144 	for (mid = 1; mid <= KICONV_MAX_MODULE_ID; mid++)
1145 		if (strcmp(module_list[mid].name, info->module_name) == 0)
1146 			break;
1147 	if (mid > KICONV_MAX_MODULE_ID)
1148 		return (EINVAL);
1149 
1150 	mutex_enter(&conv_list_lock);
1151 
1152 	/*
1153 	 * If any of the conversions are used, then, this module canont be
1154 	 * unloaded.
1155 	 */
1156 	if (module_list[mid].refcount > 0) {
1157 		mutex_exit(&conv_list_lock);
1158 		return (EBUSY);
1159 	}
1160 
1161 	/*
1162 	 * Otherwise, we unregister all conversions from this module
1163 	 * and be ready for the unloading. At this point, we only care about
1164 	 * the conversions we know about with the module.
1165 	 */
1166 	for (i = 0; i < KICONV_MAX_CONVERSIONS; i++) {
1167 		if (conv_list[i].mid == mid) {
1168 			conv_list[i].open = NULL;
1169 			conv_list[i].kiconv = NULL;
1170 			conv_list[i].close = NULL;
1171 			conv_list[i].kiconvstr = NULL;
1172 		}
1173 	}
1174 
1175 	mutex_exit(&conv_list_lock);
1176 
1177 	return (0);
1178 }
1179 
1180 /*
1181  * The following function check if asked code conversion is available
1182  * and if necessary, load the corresponding kiconv module that contains
1183  * the conversion (and others).
1184  */
1185 static kiconv_t
check_and_load_conversions(const char * tocode,const char * fromcode)1186 check_and_load_conversions(const char *tocode, const char *fromcode)
1187 {
1188 	kiconv_t kcd;
1189 	size_t tid;
1190 	size_t fid;
1191 	size_t mid;
1192 	size_t i;
1193 
1194 	/* Normalize the given names and find the corresponding code ids. */
1195 	tid = normalize_codename(tocode);
1196 	if (tid == (size_t)-1)
1197 		return ((kiconv_t)-1);
1198 
1199 	fid = normalize_codename(fromcode);
1200 	if (fid == (size_t)-1)
1201 		return ((kiconv_t)-1);
1202 
1203 	/*
1204 	 * Search the conversion.
1205 	 *
1206 	 * If the conversion isn't supported, just return -1.
1207 	 * If the conversion is supported but there is no corresponding
1208 	 * module loaded, try to load it and if successful, return
1209 	 * a kiconv conversion descriptor memory block.
1210 	 *
1211 	 * We maintain a reference counter of uint_t for each module.
1212 	 */
1213 	mutex_enter(&conv_list_lock);
1214 
1215 	for (i = 0; i < KICONV_MAX_CONVERSIONS; i++)
1216 		if (conv_list[i].tid == tid && conv_list[i].fid == fid)
1217 			break;
1218 	if (i >= KICONV_MAX_CONVERSIONS) {
1219 		mutex_exit(&conv_list_lock);
1220 		return ((kiconv_t)-1);
1221 	}
1222 
1223 	mid = conv_list[i].mid;
1224 
1225 	if (conv_list[i].open == NULL) {
1226 		mutex_exit(&conv_list_lock);
1227 
1228 		if (modload("kiconv", module_list[mid].name) < 0)
1229 			return ((kiconv_t)-1);
1230 
1231 		/*
1232 		 * Let's double check if something happened right after
1233 		 * the modload and/or if the module really has the conversion.
1234 		 */
1235 		mutex_enter(&conv_list_lock);
1236 
1237 		if (conv_list[i].open == NULL) {
1238 			mutex_exit(&conv_list_lock);
1239 			return ((kiconv_t)-1);
1240 		}
1241 	}
1242 
1243 	/*
1244 	 * If we got the conversion, we will use the conversion function
1245 	 * in the module and so let's increase the module's refcounter
1246 	 * so that the module won't be kicked out. (To be more exact and
1247 	 * specific, the "refcount" is thus the reference counter of
1248 	 * the module functions being used.)
1249 	 */
1250 	if (module_list[mid].refcount < UINT_MAX)
1251 		module_list[mid].refcount++;
1252 
1253 	mutex_exit(&conv_list_lock);
1254 
1255 	kcd = (kiconv_t)kmem_alloc(sizeof (kiconv_data_t), KM_SLEEP);
1256 	kcd->handle = (void *)-1;
1257 	kcd->id = i;
1258 
1259 	return (kcd);
1260 }
1261 
1262 /*
1263  * The following are the four "Committed" interfaces.
1264  */
1265 kiconv_t
kiconv_open(const char * tocode,const char * fromcode)1266 kiconv_open(const char *tocode, const char *fromcode)
1267 {
1268 	kiconv_t kcd;
1269 	size_t mid;
1270 
1271 	kcd = check_and_load_conversions(tocode, fromcode);
1272 	if (kcd == (kiconv_t)-1)
1273 		return ((kiconv_t)-1);
1274 
1275 	kcd->handle = (conv_list[kcd->id].open)();
1276 	if (kcd->handle == (void *)-1) {
1277 		/*
1278 		 * If the conversion couldn't be opened for some reason,
1279 		 * then, we unallocate the kcd and, more importantly, before
1280 		 * that, we also decrease the module reference counter.
1281 		 */
1282 		mid = conv_list[kcd->id].mid;
1283 
1284 		mutex_enter(&conv_list_lock);
1285 
1286 		if (module_list[mid].refcount > 0)
1287 			module_list[mid].refcount--;
1288 
1289 		mutex_exit(&conv_list_lock);
1290 
1291 		kmem_free((void *)kcd, sizeof (kiconv_data_t));
1292 
1293 		return ((kiconv_t)-1);
1294 	}
1295 
1296 	return (kcd);
1297 }
1298 
1299 size_t
kiconv(kiconv_t kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)1300 kiconv(kiconv_t kcd, char **inbuf, size_t *inbytesleft,
1301 	char **outbuf, size_t *outbytesleft, int *errno)
1302 {
1303 	/* Do some minimum checking on the kiconv conversion descriptor. */
1304 	if (! kcd || kcd == (kiconv_t)-1 || conv_list[kcd->id].kiconv == NULL) {
1305 		*errno = EBADF;
1306 		return ((size_t)-1);
1307 	}
1308 
1309 	return ((conv_list[kcd->id].kiconv)(kcd->handle, inbuf, inbytesleft,
1310 	    outbuf, outbytesleft, errno));
1311 }
1312 
1313 int
kiconv_close(kiconv_t kcd)1314 kiconv_close(kiconv_t kcd)
1315 {
1316 	int ret;
1317 	size_t mid;
1318 
1319 	if (! kcd || kcd == (kiconv_t)-1 || conv_list[kcd->id].close == NULL)
1320 		return (EBADF);
1321 
1322 	mid = conv_list[kcd->id].mid;
1323 
1324 	ret = (conv_list[kcd->id].close)(kcd->handle);
1325 
1326 	kmem_free((void *)kcd, sizeof (kiconv_data_t));
1327 
1328 	mutex_enter(&conv_list_lock);
1329 
1330 	/*
1331 	 * While we maintain reference conter for each module, once loaded,
1332 	 * we don't modunload from kiconv functions even if the counter
1333 	 * reaches back to zero.
1334 	 */
1335 	if (module_list[mid].refcount > 0)
1336 		module_list[mid].refcount--;
1337 
1338 	mutex_exit(&conv_list_lock);
1339 
1340 	return (ret);
1341 }
1342 
1343 size_t
kiconvstr(const char * tocode,const char * fromcode,char * inarray,size_t * inlen,char * outarray,size_t * outlen,int flag,int * errno)1344 kiconvstr(const char *tocode, const char *fromcode, char *inarray,
1345 	size_t *inlen, char *outarray, size_t *outlen, int flag, int *errno)
1346 {
1347 	kiconv_t kcd;
1348 	size_t ret;
1349 	size_t mid;
1350 
1351 	kcd = check_and_load_conversions(tocode, fromcode);
1352 	if (kcd == (kiconv_t)-1 || conv_list[kcd->id].kiconvstr == NULL) {
1353 		*errno = EBADF;
1354 		return ((size_t)-1);
1355 	}
1356 
1357 	mid = conv_list[kcd->id].mid;
1358 
1359 	ret = (conv_list[kcd->id].kiconvstr)(inarray, inlen, outarray, outlen,
1360 	    flag, errno);
1361 
1362 	kmem_free((void *)kcd, sizeof (kiconv_data_t));
1363 
1364 	mutex_enter(&conv_list_lock);
1365 
1366 	if (module_list[mid].refcount > 0)
1367 		module_list[mid].refcount--;
1368 
1369 	mutex_exit(&conv_list_lock);
1370 
1371 	return (ret);
1372 }
1373