xref: /illumos-gate/usr/src/uts/common/kiconv/kiconv_ja/kiconv_ja.c (revision 354507029a42e4bcb1ea64fc4685f2bfd4792db8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/sysmacros.h>
32 #include <sys/systm.h>
33 #include <sys/debug.h>
34 #include <sys/kmem.h>
35 #include <sys/sunddi.h>
36 #include <sys/byteorder.h>
37 #include <sys/errno.h>
38 #include <sys/euc.h>
39 #include <sys/modctl.h>
40 #include <sys/kiconv.h>
41 
42 #include <sys/kiconv_ja.h>
43 #include <sys/kiconv_ja_jis_to_unicode.h>
44 #include <sys/kiconv_ja_unicode_to_jis.h>
45 
46 /*
47  * The following vector shows remaining bytes in a UTF-8 character.
48  * Index will be the first byte of the character. This is defined in
49  * u8_textprep.c.
50  */
51 extern const int8_t u8_number_of_bytes[];
52 
53 /*
54  * The following is a vector of bit-masks to get used bits in
55  * the first byte of a UTF-8 character. Index is remaining bytes at above of
56  * the character. This is defined in uconv.c.
57  */
58 extern const uchar_t u8_masks_tbl[];
59 
60 /*
61  * The following two vectors are to provide valid minimum and
62  * maximum values for the 2'nd byte of a multibyte UTF-8 character for
63  * better illegal sequence checking. The index value must be the value of
64  * the first byte of the UTF-8 character. These are defined in u8_textprep.c.
65  */
66 extern const uint8_t u8_valid_min_2nd_byte[];
67 extern const uint8_t u8_valid_max_2nd_byte[];
68 
69 static kiconv_ja_euc16_t
70 kiconv_ja_ucs2_to_euc16(kiconv_ja_ucs2_t ucs2)
71 {
72 	const kiconv_ja_euc16_t	*p;
73 
74 	if ((p = kiconv_ja_ucs2_to_euc16_index[ucs2 >> 8]) != NULL)
75 		return (p[ucs2 & 0xff]);
76 
77 	return (KICONV_JA_NODEST);
78 }
79 
80 static size_t
81 utf8_ucs(uint_t *p, uchar_t **pip, size_t *pileft, int *errno)
82 {
83 	uint_t	l;		/* to be copied to *p on successful return */
84 	uchar_t	ic;		/* current byte */
85 	uchar_t	ic1;		/* 1st byte */
86 	uchar_t	*ip = *pip;	/* next byte to read */
87 	size_t	ileft = *pileft; /* number of bytes available */
88 	size_t	rv = 0; 	/* return value of this function */
89 	int	remaining_bytes;
90 	int	u8_size;
91 
92 	KICONV_JA_NGET(ic1);	/* read 1st byte */
93 
94 	if (ic1 < 0x80) {
95 		/* successfully converted */
96 		*p = (uint_t)ic1;
97 		goto ret;
98 	}
99 
100 	u8_size = u8_number_of_bytes[ic1];
101 	if (u8_size == U8_ILLEGAL_CHAR) {
102 		KICONV_JA_RETERROR(EILSEQ)
103 	} else if (u8_size == U8_OUT_OF_RANGE_CHAR) {
104 		KICONV_JA_RETERROR(ERANGE)
105 	}
106 
107 	remaining_bytes = u8_size - 1;
108 	if (remaining_bytes != 0) {
109 		l = ic1 & u8_masks_tbl[remaining_bytes];
110 
111 		for (; remaining_bytes > 0; remaining_bytes--) {
112 			KICONV_JA_NGET(ic);
113 			if (ic1 != 0U) {
114 				if ((ic < u8_valid_min_2nd_byte[ic1]) ||
115 				    (ic > u8_valid_max_2nd_byte[ic1])) {
116 					KICONV_JA_RETERROR(EILSEQ)
117 				}
118 				ic1 = 0U; /* 2nd byte check done */
119 			} else {
120 				if ((ic < 0x80) || (ic > 0xbf)) {
121 					KICONV_JA_RETERROR(EILSEQ)
122 				}
123 			}
124 			l = (l << 6) | (ic & 0x3f);
125 		}
126 
127 		/* successfully converted */
128 		*p = l;
129 	} else {
130 		KICONV_JA_RETERROR(EILSEQ)
131 	}
132 
133 ret:
134 	if (rv == 0) {
135 		/*
136 		 * Update rv, *pip, and *pileft on successfule return.
137 		 */
138 		rv = *pileft - ileft;
139 		*pip = ip;
140 		*pileft = ileft;
141 	}
142 
143 	return (rv);
144 }
145 
146 static size_t
147 utf8_ucs_replace(uint_t *p, uchar_t **pip, size_t *pileft, size_t *repnum)
148 {
149 	uint_t	l;		/* to be copied to *p on successful return */
150 	uchar_t	ic;		/* current byte */
151 	uchar_t	ic1;		/* 1st byte */
152 	uchar_t	*ip = *pip;	/* next byte to read */
153 	size_t	ileft = *pileft; /* number of bytes available */
154 	size_t	rv = 0; 	/* return value of this function */
155 	int	remaining_bytes;
156 	int	u8_size;
157 
158 	KICONV_JA_NGET_REP_TO_MB(ic1); 	/* read 1st byte */
159 
160 	if (ic1 < 0x80) {
161 		/* successfully converted */
162 		l = (uint_t)ic1;
163 		goto ret;
164 	}
165 
166 	u8_size = u8_number_of_bytes[ic1];
167 	if (u8_size == U8_ILLEGAL_CHAR || u8_size == U8_OUT_OF_RANGE_CHAR) {
168 		l = KICONV_JA_DEF_SINGLE;
169 		(*repnum)++;
170 		goto ret;
171 	}
172 
173 	remaining_bytes = u8_size - 1;
174 
175 	if (remaining_bytes != 0) {
176 		l = ic1 & u8_masks_tbl[remaining_bytes];
177 
178 		for (; remaining_bytes > 0; remaining_bytes--) {
179 			KICONV_JA_NGET_REP_TO_MB(ic);
180 			if (ic1 != 0U) {
181 				if ((ic < u8_valid_min_2nd_byte[ic1]) ||
182 				    (ic > u8_valid_max_2nd_byte[ic1])) {
183 					l = KICONV_JA_DEF_SINGLE;
184 					(*repnum)++;
185 					ileft -= (remaining_bytes - 1);
186 					ip += (remaining_bytes - 1);
187 					break;
188 				}
189 				ic1 = 0U; /* 2nd byte check done */
190 			} else {
191 				if ((ic < 0x80) || (ic > 0xbf)) {
192 					l = KICONV_JA_DEF_SINGLE;
193 					(*repnum)++;
194 					ileft -= (remaining_bytes - 1);
195 					ip += (remaining_bytes - 1);
196 					break;
197 				}
198 			}
199 			l = (l << 6) | (ic & 0x3f);
200 		}
201 	} else {
202 		l = KICONV_JA_DEF_SINGLE;
203 		(*repnum)++;
204 	}
205 
206 ret:
207 	/* successfully converted */
208 	*p = l;
209 	rv = *pileft - ileft;
210 
211 	*pip = ip;
212 	*pileft = ileft;
213 
214 	return (rv);
215 }
216 
217 static size_t				/* return #bytes read, or -1 */
218 read_unicode(
219 	uint_t	*p,		/* point variable to store UTF-32 */
220 	uchar_t	**pip,		/* point pointer to input buf */
221 	size_t	*pileft,	/* point #bytes left in input buf */
222 	int	*errno,		/* point variable to errno */
223 	int	flag,		/* kiconvstr flag */
224 	size_t	*rv)		/* point return valuse */
225 {
226 	if (flag & KICONV_REPLACE_INVALID)
227 		return (utf8_ucs_replace(p, pip, pileft, rv));
228 	else
229 		return (utf8_ucs(p, pip, pileft, errno));
230 }
231 
232 static size_t
233 write_unicode(
234 	uint_t	u32,		/* UTF-32 to write */
235 	char	**pop,		/* point pointer to output buf */
236 	size_t	*poleft,	/* point #bytes left in output buf */
237 	int	*errno)		/* point variable to errno */
238 {
239 	char	*op = *pop;
240 	size_t	oleft = *poleft;
241 	size_t	rv = 0;			/* return value */
242 
243 	if (u32 <= 0x7f) {
244 		KICONV_JA_NPUT((uchar_t)(u32));
245 		rv = 1;
246 	} else if (u32 <= 0x7ff) {
247 		KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x1f) | 0xc0));
248 		KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
249 		rv = 2;
250 	} else if ((u32 >= 0xd800) && (u32 <= 0xdfff)) {
251 		KICONV_JA_RETERROR(EILSEQ)
252 	} else if (u32 <= 0xffff) {
253 		KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x0f) | 0xe0));
254 		KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80));
255 		KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
256 		rv = 3;
257 	} else if (u32 <= 0x10ffff) {
258 		KICONV_JA_NPUT((uchar_t)((((u32)>>18) & 0x07) | 0xf0));
259 		KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x3f) | 0x80));
260 		KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80));
261 		KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
262 		rv = 4;
263 	} else {
264 		KICONV_JA_RETERROR(EILSEQ)
265 	}
266 
267 ret:
268 	if (rv != (size_t)-1) {
269 		/* update *pop and *poleft only on successful return */
270 		*pop = op;
271 		*poleft = oleft;
272 	}
273 
274 	return (rv);
275 }
276 
277 static void *
278 _kiconv_ja_open_unicode(uint8_t id)
279 {
280 	kiconv_state_t	kcd;
281 
282 	kcd = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t),
283 	    KM_SLEEP);
284 	kcd->id = id;
285 	kcd->bom_processed = 0;
286 	return ((void *)kcd);
287 }
288 
289 static void *
290 open_eucjp(void)
291 {
292 	return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP));
293 }
294 
295 static void *
296 open_eucjpms(void)
297 {
298 	return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP_MS));
299 }
300 
301 static void *
302 open_sjis(void)
303 {
304 	return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_SJIS));
305 }
306 
307 static void *
308 open_cp932(void)
309 {
310 	return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_CP932));
311 }
312 
313 int
314 close_ja(void *kcd)
315 {
316 	if (! kcd || kcd == (void *)-1)
317 		return (EBADF);
318 
319 	kmem_free(kcd, sizeof (kiconv_state_data_t));
320 
321 	return (0);
322 }
323 
324 static size_t
325 _do_kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
326 		char **outbuf, size_t *outbytesleft, int *errno)
327 {
328 	uint_t		u32;		/* UTF-32 */
329 	uint_t		index;		/* index for table lookup */
330 	uchar_t		ic1, ic2, ic3;	/* 1st, 2nd, and 3rd bytes of a char */
331 	size_t		rv = 0;		/* return value of this function */
332 
333 	uchar_t	*ip;
334 	size_t		ileft;
335 	char		*op;
336 	size_t		oleft;
337 	size_t		id = ((kiconv_state_t)kcd)->id;
338 
339 	if ((inbuf == NULL) || (*inbuf == NULL)) {
340 		return (0);
341 	}
342 
343 	ip = (uchar_t *)*inbuf;
344 	ileft = *inbytesleft;
345 	op = *outbuf;
346 	oleft = *outbytesleft;
347 
348 	while (ileft != 0) {
349 		KICONV_JA_NGET(ic1); 		/* get 1st byte */
350 
351 		if (KICONV_JA_ISASC(ic1)) { 	/* ASCII; 1 byte */
352 			u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1];
353 			KICONV_JA_PUTU(u32);
354 		} else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */
355 			KICONV_JA_NGET(ic2);
356 			if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */
357 				ic1 &= KICONV_JA_CMASK;
358 				ic2 &= KICONV_JA_CMASK;
359 				KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2);
360 				if (u32 == KICONV_JA_NODEST) {
361 					index = (ic1 - 0x21) * 94 + ic2 - 0x21;
362 					u32 = kiconv_ja_jisx0208_to_ucs2[index];
363 				}
364 				if (u32 == KICONV_JA_REPLACE)
365 					rv++;
366 				KICONV_JA_PUTU(u32);
367 			} else { /* 2nd byte check failed */
368 				KICONV_JA_RETERROR(EILSEQ)
369 			}
370 		} else if (ic1 == SS2) { /* JIS X 0201 Kana; 2 bytes */
371 			KICONV_JA_NGET(ic2);
372 			if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */
373 				index = (ic2 - 0xa1);
374 				u32 = kiconv_ja_jisx0201kana_to_ucs2[index];
375 				KICONV_JA_PUTU(u32);
376 			} else { /* 2nd byte check failed */
377 				KICONV_JA_RETERROR(EILSEQ)
378 			}
379 		} else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */
380 			KICONV_JA_NGET(ic2);
381 			if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */
382 				KICONV_JA_NGET(ic3);
383 				if (KICONV_JA_ISCS3(ic3)) {
384 					/* 3rd byte check passed */
385 					ic2 &= KICONV_JA_CMASK;
386 					ic3 &= KICONV_JA_CMASK;
387 					KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32,
388 					    ic2, ic3);
389 					if (u32 == KICONV_JA_NODEST) {
390 						index = ((ic2 - 0x21) * 94 +
391 						    (ic3 - 0x21));
392 						u32 = kiconv_ja_jisx0212_to_ucs2
393 						    [index];
394 					}
395 					if (u32 == KICONV_JA_REPLACE)
396 						rv++;
397 					KICONV_JA_PUTU(u32);
398 				} else { /* 3rd byte check failed */
399 					KICONV_JA_RETERROR(EILSEQ)
400 				}
401 			} else { /* 2nd byte check failed */
402 				KICONV_JA_RETERROR(EILSEQ)
403 			}
404 		} else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
405 			/* C1 control; 1 byte */
406 			u32 = ic1;
407 			KICONV_JA_PUTU(u32);
408 		} else { /* 1st byte check failed */
409 			KICONV_JA_RETERROR(EILSEQ)
410 		}
411 
412 		/*
413 		 * One character successfully converted so update
414 		 * values outside of this function's stack.
415 		 */
416 		*inbuf = (char *)ip;
417 		*inbytesleft = ileft;
418 		*outbuf = op;
419 		*outbytesleft = oleft;
420 	}
421 
422 ret:
423 	return (rv);
424 }
425 
426 static size_t
427 _do_kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
428 		char **outbuf, size_t *outbytesleft, int *errno)
429 {
430 	uchar_t		ic;
431 	size_t		rv = 0;
432 	uint_t		ucs4;
433 	ushort_t	euc16;
434 
435 	uchar_t	*ip;
436 	size_t		ileft;
437 	char		*op;
438 	size_t		oleft;
439 	size_t		read_len;
440 
441 	size_t		id = ((kiconv_state_t)kcd)->id;
442 
443 	if ((inbuf == NULL) || (*inbuf == NULL)) {
444 		return (0);
445 	}
446 
447 	ip = (uchar_t *)*inbuf;
448 	ileft = *inbytesleft;
449 	op = *outbuf;
450 	oleft = *outbytesleft;
451 
452 	KICONV_JA_CHECK_UTF8_BOM(ip, ileft);
453 
454 	while (ileft != 0) {
455 		KICONV_JA_GETU(&ucs4, 0);
456 
457 		if (ucs4 > 0xffff) {
458 			/* non-BMP */
459 			KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
460 			rv++;
461 			goto next;
462 		}
463 
464 		KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
465 		if (euc16 == KICONV_JA_NODEST) {
466 			euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
467 		}
468 		if (euc16 == KICONV_JA_NODEST) {
469 			KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
470 			rv++;
471 			goto next;
472 		}
473 
474 		switch (euc16 & 0x8080) {
475 		case 0x0000:	/* CS0 */
476 			ic = (uchar_t)euc16;
477 			KICONV_JA_NPUT(ic);
478 			break;
479 		case 0x8080:	/* CS1 */
480 			ic = (uchar_t)((euc16 >> 8) & 0xff);
481 			KICONV_JA_NPUT(ic);
482 			ic = (uchar_t)(euc16 & 0xff);
483 			KICONV_JA_NPUT(ic);
484 			break;
485 		case 0x0080:	/* CS2 */
486 			KICONV_JA_NPUT(SS2);
487 			ic = (uchar_t)euc16;
488 			KICONV_JA_NPUT(ic);
489 			break;
490 		case 0x8000:	/* CS3 */
491 			KICONV_JA_NPUT(SS3);
492 			ic = (uchar_t)((euc16 >> 8) & 0xff);
493 			KICONV_JA_NPUT(ic);
494 			ic = (uchar_t)(euc16 & KICONV_JA_CMASK);
495 			KICONV_JA_NPUT(ic | KICONV_JA_CMSB);
496 			break;
497 		}
498 next:
499 		/*
500 		 * One character successfully converted so update
501 		 * values outside of this function's stack.
502 		 */
503 		*inbuf = (char *)ip;
504 		*inbytesleft = ileft;
505 		*outbuf = op;
506 		*outbytesleft = oleft;
507 	}
508 
509 ret:
510 	return (rv);
511 }
512 
513 static size_t
514 _do_kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
515 	size_t *outbytesleft, int flag, int *errno, uint8_t id)
516 {
517 	uint_t		u32;		/* UTF-32 */
518 	uint_t		index;		/* index for table lookup */
519 	uchar_t		ic1, ic2, ic3;	/* 1st, 2nd, and 3rd bytes of a char */
520 	size_t		rv = 0;		/* return value of this function */
521 
522 	uchar_t	*ip;
523 	size_t		ileft;
524 	char		*op;
525 	size_t		oleft;
526 
527 	boolean_t do_not_ignore_null;
528 
529 	if ((inbuf == NULL) || (*inbuf == NULL)) {
530 		return (0);
531 	}
532 
533 	ip = (uchar_t *)inbuf;
534 	ileft = *inbytesleft;
535 	op = outbuf;
536 	oleft = *outbytesleft;
537 
538 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
539 
540 	while (ileft != 0) {
541 		KICONV_JA_NGET(ic1); 		/* get 1st byte */
542 
543 		if (KICONV_JA_ISASC(ic1)) { 	/* ASCII; 1 byte */
544 			if (ic1 == '\0' && do_not_ignore_null) {
545 				return (0);
546 			}
547 			u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1];
548 			KICONV_JA_PUTU(u32);
549 		} else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */
550 			if (flag & KICONV_REPLACE_INVALID) {
551 				KICONV_JA_NGET_REP_FR_MB(ic2);
552 			} else {
553 				KICONV_JA_NGET(ic2);
554 			}
555 			if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */
556 				ic1 &= KICONV_JA_CMASK;
557 				ic2 &= KICONV_JA_CMASK;
558 				KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2);
559 				if (u32 == KICONV_JA_NODEST) {
560 					index = (ic1 - 0x21) * 94 + ic2 - 0x21;
561 					u32 = kiconv_ja_jisx0208_to_ucs2[index];
562 				}
563 				if (u32 == KICONV_JA_REPLACE)
564 					rv++;
565 				KICONV_JA_PUTU(u32);
566 			} else { /* 2nd byte check failed */
567 				if (flag & KICONV_REPLACE_INVALID) {
568 					KICONV_JA_PUTU(KICONV_JA_REPLACE);
569 					rv++;
570 				} else {
571 					KICONV_JA_RETERROR(EILSEQ)
572 				}
573 			}
574 		} else if (ic1 == SS2) { /* JIS X 0201 Kana; 2bytes */
575 			if (flag & KICONV_REPLACE_INVALID) {
576 				KICONV_JA_NGET_REP_FR_MB(ic2);
577 			} else {
578 				KICONV_JA_NGET(ic2);
579 			}
580 			if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */
581 				index = (ic2 - 0xa1);
582 				u32 = kiconv_ja_jisx0201kana_to_ucs2[index];
583 				KICONV_JA_PUTU(u32);
584 			} else { /* 2nd byte check failed */
585 				if (flag & KICONV_REPLACE_INVALID) {
586 					KICONV_JA_PUTU(KICONV_JA_REPLACE);
587 					rv++;
588 				} else {
589 					KICONV_JA_RETERROR(EILSEQ)
590 				}
591 			}
592 		} else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */
593 			if (flag & KICONV_REPLACE_INVALID) {
594 				KICONV_JA_NGET_REP_FR_MB(ic2);
595 			} else {
596 				KICONV_JA_NGET(ic2);
597 			}
598 			if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */
599 				if (flag & KICONV_REPLACE_INVALID) {
600 					KICONV_JA_NGET_REP_FR_MB(ic3);
601 				} else {
602 					KICONV_JA_NGET(ic3);
603 				}
604 				if (KICONV_JA_ISCS3(ic3)) {
605 					/* 3rd byte check passed */
606 					ic2 &= KICONV_JA_CMASK;
607 					ic3 &= KICONV_JA_CMASK;
608 					KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32,
609 					    ic2, ic3);
610 					if (u32 == KICONV_JA_NODEST) {
611 						index = ((ic2 - 0x21) * 94 +
612 						    (ic3 - 0x21));
613 						u32 = kiconv_ja_jisx0212_to_ucs2
614 						    [index];
615 					}
616 					if (u32 == KICONV_JA_REPLACE)
617 						rv++;
618 					KICONV_JA_PUTU(u32);
619 				} else { /* 3rd byte check failed */
620 					if (flag & KICONV_REPLACE_INVALID) {
621 						KICONV_JA_PUTU(
622 						    KICONV_JA_REPLACE);
623 						rv++;
624 					} else {
625 						KICONV_JA_RETERROR(EILSEQ)
626 					}
627 				}
628 			} else { /* 2nd byte check failed */
629 				if (flag & KICONV_REPLACE_INVALID) {
630 					KICONV_JA_PUTU(KICONV_JA_REPLACE);
631 					rv++;
632 				} else {
633 					KICONV_JA_RETERROR(EILSEQ)
634 				}
635 			}
636 		} else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
637 			/* C1 control; 1 byte */
638 			u32 = ic1;
639 			KICONV_JA_PUTU(u32);
640 		} else { /* 1st byte check failed */
641 			if (flag & KICONV_REPLACE_INVALID) {
642 				KICONV_JA_PUTU(KICONV_JA_REPLACE);
643 				rv++;
644 			} else {
645 				KICONV_JA_RETERROR(EILSEQ)
646 			}
647 		}
648 
649 next:
650 		/*
651 		 * One character successfully converted so update
652 		 * values outside of this function's stack.
653 		 */
654 		*inbytesleft = ileft;
655 		*outbytesleft = oleft;
656 	}
657 
658 ret:
659 	return (rv);
660 }
661 
662 static size_t
663 _do_kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
664 	size_t *outbytesleft, int flag, int *errno, uint8_t id)
665 {
666 	uchar_t		ic;
667 	size_t		rv = 0;
668 	uint_t		ucs4;
669 	ushort_t	euc16;
670 
671 	uchar_t	*ip;
672 	size_t		ileft;
673 	char		*op;
674 	size_t		oleft;
675 	size_t		read_len;
676 
677 	boolean_t do_not_ignore_null;
678 
679 	if ((inbuf == NULL) || (*inbuf == NULL)) {
680 		return (0);
681 	}
682 
683 	ip = (uchar_t *)inbuf;
684 	ileft = *inbytesleft;
685 	op = outbuf;
686 	oleft = *outbytesleft;
687 
688 	KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft);
689 
690 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
691 
692 	while (ileft != 0) {
693 		KICONV_JA_GETU(&ucs4, flag);
694 
695 		if (ucs4 == 0x0 && do_not_ignore_null) {
696 			return (0);
697 		}
698 
699 		if (ucs4 > 0xffff) {
700 			/* non-BMP */
701 			KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
702 			rv++;
703 			goto next;
704 		}
705 
706 		KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
707 		if (euc16 == KICONV_JA_NODEST) {
708 			euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
709 		}
710 		if (euc16 == KICONV_JA_NODEST) {
711 			KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
712 			rv++;
713 			goto next;
714 		}
715 
716 		switch (euc16 & 0x8080) {
717 		case 0x0000:	/* CS0 */
718 			ic = (uchar_t)euc16;
719 			KICONV_JA_NPUT(ic);
720 			break;
721 		case 0x8080:	/* CS1 */
722 			ic = (uchar_t)((euc16 >> 8) & 0xff);
723 			KICONV_JA_NPUT(ic);
724 			ic = (uchar_t)(euc16 & 0xff);
725 			KICONV_JA_NPUT(ic);
726 			break;
727 		case 0x0080:	/* CS2 */
728 			KICONV_JA_NPUT(SS2);
729 			ic = (uchar_t)euc16;
730 			KICONV_JA_NPUT(ic);
731 			break;
732 		case 0x8000:	/* CS3 */
733 			KICONV_JA_NPUT(SS3);
734 			ic = (uchar_t)((euc16 >> 8) & 0xff);
735 			KICONV_JA_NPUT(ic);
736 			ic = (uchar_t)(euc16 & KICONV_JA_CMASK);
737 			KICONV_JA_NPUT(ic | KICONV_JA_CMSB);
738 			break;
739 		}
740 next:
741 		/*
742 		 * One character successfully converted so update
743 		 * values outside of this function's stack.
744 		 */
745 		*inbytesleft = ileft;
746 		*outbytesleft = oleft;
747 	}
748 
749 ret:
750 	return (rv);
751 }
752 
753 static size_t
754 kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
755 		char **outbuf, size_t *outbytesleft, int *errno)
756 {
757 	if (! kcd || kcd == (void *)-1) {
758 		*errno = EBADF;
759 		return ((size_t)-1);
760 	}
761 
762 	return (_do_kiconv_fr_eucjp(kcd, inbuf, inbytesleft,
763 	    outbuf, outbytesleft, errno));
764 }
765 
766 static size_t
767 kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
768 		char **outbuf, size_t *outbytesleft, int *errno)
769 {
770 	if (! kcd || kcd == (void *)-1) {
771 		*errno = EBADF;
772 		return ((size_t)-1);
773 	}
774 
775 	return (_do_kiconv_to_eucjp(kcd, inbuf, inbytesleft,
776 	    outbuf, outbytesleft, errno));
777 }
778 
779 static size_t
780 kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
781 	size_t *outbytesleft, int flag, int *errno)
782 {
783 	return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf,
784 	    outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP));
785 }
786 
787 static size_t
788 kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
789 	size_t *outbytesleft, int flag, int *errno)
790 {
791 	return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf,
792 	    outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP));
793 }
794 
795 static size_t
796 kiconvstr_fr_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf,
797 	size_t *outbytesleft, int flag, int *errno)
798 {
799 	return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf,
800 	    outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS));
801 }
802 
803 static size_t
804 kiconvstr_to_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf,
805 	size_t *outbytesleft, int flag, int *errno)
806 {
807 	return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf,
808 	    outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS));
809 }
810 
811 static size_t
812 _do_kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
813 		char **outbuf, size_t *outbytesleft, int *errno)
814 {
815 	uint_t	uni;			/* UTF-32 */
816 	uint_t	index;			/* index for table lookup */
817 	uchar_t	ic1, ic2;		/* 1st and 2nd bytes of a char */
818 	size_t	rv = 0;			/* return value of this function */
819 
820 	uchar_t	*ip;
821 	size_t		ileft;
822 	char		*op;
823 	size_t		oleft;
824 	size_t		id = ((kiconv_state_t)kcd)->id;
825 
826 	if ((inbuf == NULL) || (*inbuf == NULL)) {
827 		return (0);
828 	}
829 
830 	ip = (uchar_t *)*inbuf;
831 	ileft = *inbytesleft;
832 	op = *outbuf;
833 	oleft = *outbytesleft;
834 
835 	while (ileft != 0) {
836 		KICONV_JA_NGET(ic1); 			/* get 1st byte */
837 
838 		if (KICONV_JA_ISASC((int)ic1)) {	/* ASCII; 1 byte */
839 			uni = kiconv_ja_jisx0201roman_to_ucs2[ic1];
840 			KICONV_JA_PUTU(uni);
841 		} else if (KICONV_JA_ISSJKANA(ic1)) { /* 0201 Kana; 1byte */
842 			uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
843 			KICONV_JA_PUTU(uni);
844 		} else if (KICONV_JA_ISSJKANJI1(ic1)) { /* 0208/UDC; 2bytes */
845 			KICONV_JA_NGET(ic2);
846 			if (KICONV_JA_ISSJKANJI2(ic2)) {
847 				ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
848 				if (ic2 >= 0x9f) {
849 					ic1++;
850 				}
851 				ic2 = kiconv_ja_sjtojis2[ic2];
852 				KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2);
853 				if (uni == KICONV_JA_NODEST) {
854 					index = ((ic1 - 0x21) * 94)
855 					    + (ic2 - 0x21);
856 					uni = kiconv_ja_jisx0208_to_ucs2[index];
857 				}
858 				if (uni == KICONV_JA_REPLACE)
859 					rv++;
860 				KICONV_JA_PUTU(uni);
861 			} else { /* 2nd byte check failed */
862 				KICONV_JA_RETERROR(EILSEQ)
863 				/* NOTREACHED */
864 			}
865 		} else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
866 			KICONV_JA_NGET(ic2);
867 			if (KICONV_JA_ISSJKANJI2(ic2)) {
868 				ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
869 				if (ic2 >= 0x9f) {
870 					ic1++;
871 				}
872 				index = ((ic1 - 0x21) * 94)
873 				    + (kiconv_ja_sjtojis2[ic2] - 0x21);
874 				uni = kiconv_ja_jisx0212_to_ucs2[index];
875 				if (uni == KICONV_JA_REPLACE)
876 					rv++;
877 				KICONV_JA_PUTU(uni);
878 			} else { /* 2nd byte check failed */
879 				KICONV_JA_RETERROR(EILSEQ)
880 			}
881 		} else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */
882 		    KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */
883 			/*
884 			 * We need a special treatment for each codes.
885 			 * By adding some offset number for them, we
886 			 * can process them as the same way of that of
887 			 * extended IBM chars.
888 			 */
889 			KICONV_JA_NGET(ic2);
890 			if (KICONV_JA_ISSJKANJI2(ic2)) {
891 				ushort_t dest, upper, lower;
892 				dest = (ic1 << 8) + ic2;
893 				if ((0xed40 <= dest) && (dest <= 0xeffc)) {
894 					KICONV_JA_REMAP_NEC(dest);
895 					if (dest == 0xffff) {
896 						KICONV_JA_RETERROR(EILSEQ)
897 					}
898 				}
899 				/*
900 				 * XXX: 0xfa54 and 0xfa5b must be mapped
901 				 *	to JIS0208 area. Therefore we
902 				 *	have to do special treatment.
903 				 */
904 				if ((dest == 0xfa54) || (dest == 0xfa5b)) {
905 					if (dest == 0xfa54) {
906 						upper = 0x22;
907 						lower = 0x4c;
908 					} else {
909 						upper = 0x22;
910 						lower = 0x68;
911 					}
912 					KICONV_JA_CNV_JISMS_TO_U2(id, uni,
913 					    upper, lower);
914 					if (uni == KICONV_JA_NODEST) {
915 						index = (uint_t)((upper - 0x21)
916 						    * 94 + (lower - 0x21));
917 						uni = kiconv_ja_jisx0208_to_ucs2
918 						    [index];
919 					}
920 					if (uni == KICONV_JA_REPLACE)
921 						rv++;
922 					KICONV_JA_PUTU(uni);
923 				} else {
924 					dest = dest - 0xfa40 -
925 					    (((dest>>8) - 0xfa) * 0x40);
926 					dest = kiconv_ja_sjtoibmext[dest];
927 					if (dest == 0xffff) {
928 						KICONV_JA_RETERROR(EILSEQ)
929 					}
930 					upper = (dest >> 8) & KICONV_JA_CMASK;
931 					lower = dest & KICONV_JA_CMASK;
932 					KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni,
933 					    upper, lower);
934 					if (uni == KICONV_JA_NODEST) {
935 						index = (uint_t)((upper - 0x21)
936 						    * 94 + (lower - 0x21));
937 						uni = kiconv_ja_jisx0212_to_ucs2
938 						    [index];
939 					}
940 					if (uni == KICONV_JA_REPLACE)
941 						rv++;
942 					KICONV_JA_PUTU(uni);
943 				}
944 			} else { /* 2nd byte check failed */
945 				KICONV_JA_RETERROR(EILSEQ)
946 			}
947 		} else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
948 		/*
949 		 * Based on the draft convention of OSF-JVC CDEWG,
950 		 * characters in this area will be mapped to
951 		 * "CHIKAN-MOJI." (convertible character)
952 		 * We use U+FFFD in this case.
953 		 */
954 			KICONV_JA_NGET(ic2);
955 			if (KICONV_JA_ISSJKANJI2(ic2)) {
956 				uni = 0xfffd;
957 				KICONV_JA_PUTU(uni);
958 			} else { /* 2nd byte check failed */
959 				KICONV_JA_RETERROR(EILSEQ)
960 			}
961 		} else { /* 1st byte check failed */
962 			KICONV_JA_RETERROR(EILSEQ)
963 		}
964 
965 		/*
966 		 * One character successfully converted so update
967 		 * values outside of this function's stack.
968 		 */
969 		*inbuf = (char *)ip;
970 		*inbytesleft = ileft;
971 		*outbuf = op;
972 		*outbytesleft = oleft;
973 	}
974 
975 ret:
976 	return (rv);
977 }
978 
979 /*
980  * _kiconv_ja_lookuptbl()
981  * Return the index number if its index-ed number
982  * is the same as dest value.
983  */
984 static ushort_t
985 _kiconv_ja_lookuptbl(ushort_t dest)
986 {
987 	ushort_t tmp;
988 	int i;
989 	int sz = (sizeof (kiconv_ja_sjtoibmext) /
990 	    sizeof (kiconv_ja_sjtoibmext[0]));
991 
992 	for (i = 0; i < sz; i++) {
993 		tmp = (kiconv_ja_sjtoibmext[i] & 0x7f7f);
994 		if (tmp == dest)
995 			return ((i + 0xfa40 + ((i / 0xc0) * 0x40)));
996 	}
997 	return (0x3f);
998 }
999 
1000 static size_t
1001 _do_kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1002 		char **outbuf, size_t *outbytesleft, int *errno)
1003 {
1004 	uchar_t	ic;
1005 	size_t		rv = 0;
1006 	uint_t		ucs4;
1007 	ushort_t	euc16;
1008 	ushort_t	dest;
1009 
1010 	uchar_t	*ip;
1011 	size_t		ileft;
1012 	char		*op;
1013 	size_t		oleft;
1014 	size_t		read_len;
1015 
1016 	size_t		id = ((kiconv_state_t)kcd)->id;
1017 
1018 	if ((inbuf == NULL) || (*inbuf == NULL)) {
1019 		return (0);
1020 	}
1021 
1022 	ip = (uchar_t *)*inbuf;
1023 	ileft = *inbytesleft;
1024 	op = *outbuf;
1025 	oleft = *outbytesleft;
1026 
1027 	KICONV_JA_CHECK_UTF8_BOM(ip, ileft);
1028 
1029 	while (ileft != 0) {
1030 		KICONV_JA_GETU(&ucs4, 0);
1031 
1032 		if (ucs4 > 0xffff) {
1033 			/* non-BMP */
1034 			KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1035 			rv++;
1036 			goto next;
1037 		}
1038 
1039 		KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
1040 		if (euc16 == KICONV_JA_NODEST) {
1041 			euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
1042 		}
1043 		if (euc16 == KICONV_JA_NODEST) {
1044 			KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1045 			rv++;
1046 			goto next;
1047 		}
1048 
1049 		switch (euc16 & 0x8080) {
1050 		case 0x0000:	/* CS0 */
1051 			if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) {
1052 				KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1053 				rv++;
1054 			} else {
1055 				ic = (uchar_t)euc16;
1056 				KICONV_JA_NPUT(ic);
1057 			}
1058 			break;
1059 		case 0x8080:	/* CS1 */
1060 			ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1061 			KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]);
1062 			/*
1063 			 * for even number row (Ku), add 0x80 to
1064 			 * look latter half of kiconv_ja_jistosj2[] array
1065 			 */
1066 			ic = (uchar_t)((euc16 & KICONV_JA_CMASK)
1067 			    + (((ic % 2) == 0) ? 0x80 : 0x00));
1068 			KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1069 			break;
1070 		case 0x0080:	/* CS2 */
1071 			ic = (uchar_t)euc16;
1072 			KICONV_JA_NPUT(ic);
1073 			break;
1074 		case 0x8000:	/* CS3 */
1075 			ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1076 			if (euc16 == 0xa271) {
1077 				/* NUMERO SIGN */
1078 				KICONV_JA_NPUT(0x87);
1079 				KICONV_JA_NPUT(0x82);
1080 			} else if (ic < 0x75) { /* check if IBM VDC */
1081 				dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f);
1082 				if (dest == 0xffff) {
1083 					KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1084 				} else {
1085 					/* avoid putting NUL ('\0') */
1086 					if (dest > 0xff) {
1087 						KICONV_JA_NPUT(
1088 						    (dest >> 8) & 0xff);
1089 						KICONV_JA_NPUT(dest & 0xff);
1090 					} else {
1091 						KICONV_JA_NPUT(dest & 0xff);
1092 					}
1093 				}
1094 			} else {
1095 				KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]);
1096 				/*
1097 				 * for even number row (Ku), add 0x80 to
1098 				 * look latter half of kiconv_ja_jistosj2[]
1099 				 */
1100 				ic = (ushort_t)((euc16 & KICONV_JA_CMASK)
1101 				    + (((ic % 2) == 0) ? 0x80 : 0x00));
1102 				KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1103 			}
1104 			break;
1105 		}
1106 
1107 next:
1108 		/*
1109 		 * One character successfully converted so update
1110 		 * values outside of this function's stack.
1111 		 */
1112 		*inbuf = (char *)ip;
1113 		*inbytesleft = ileft;
1114 		*outbuf = op;
1115 		*outbytesleft = oleft;
1116 	}
1117 
1118 ret:
1119 	return (rv);
1120 }
1121 
1122 static size_t
1123 _do_kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1124 	size_t *outbytesleft, int flag, int *errno, uint8_t id)
1125 {
1126 	uint_t		uni;		/* UTF-32 */
1127 	uint_t		index;		/* index for table lookup */
1128 	uchar_t		ic1, ic2;	/* 1st and 2nd bytes of a char */
1129 	size_t		rv = 0;		/* return value of this function */
1130 
1131 	uchar_t	*ip;
1132 	size_t		ileft;
1133 	char		*op;
1134 	size_t		oleft;
1135 
1136 	boolean_t do_not_ignore_null;
1137 
1138 	if ((inbuf == NULL) || (*inbuf == NULL)) {
1139 		return (0);
1140 	}
1141 
1142 	ip = (uchar_t *)inbuf;
1143 	ileft = *inbytesleft;
1144 	op = outbuf;
1145 	oleft = *outbytesleft;
1146 
1147 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
1148 
1149 	while (ileft != 0) {
1150 		KICONV_JA_NGET(ic1); 			/* get 1st byte */
1151 
1152 		if (KICONV_JA_ISASC((int)ic1)) {	/* ASCII; 1 byte */
1153 			if (ic1 == '\0' && do_not_ignore_null) {
1154 				return (0);
1155 			}
1156 			uni = kiconv_ja_jisx0201roman_to_ucs2[ic1];
1157 			KICONV_JA_PUTU(uni);
1158 		} else if (KICONV_JA_ISSJKANA(ic1)) {
1159 			/* JIS X 0201 Kana; 1 byte */
1160 			uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
1161 			KICONV_JA_PUTU(uni);
1162 		} else if (KICONV_JA_ISSJKANJI1(ic1)) {
1163 			/* JIS X 0208 or UDC; 2 bytes */
1164 			if (flag & KICONV_REPLACE_INVALID) {
1165 				KICONV_JA_NGET_REP_FR_MB(ic2);
1166 			} else {
1167 				KICONV_JA_NGET(ic2);
1168 			}
1169 			if (KICONV_JA_ISSJKANJI2(ic2)) {
1170 				ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
1171 				if (ic2 >= 0x9f) {
1172 					ic1++;
1173 				}
1174 				ic2 = kiconv_ja_sjtojis2[ic2];
1175 				KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2);
1176 				if (uni == KICONV_JA_NODEST) {
1177 					index = ((ic1 - 0x21) * 94)
1178 					    + (ic2 - 0x21);
1179 					uni = kiconv_ja_jisx0208_to_ucs2[index];
1180 				}
1181 				if (uni == KICONV_JA_REPLACE)
1182 					rv++;
1183 				KICONV_JA_PUTU(uni);
1184 			} else { /* 2nd byte check failed */
1185 				if (flag & KICONV_REPLACE_INVALID) {
1186 					KICONV_JA_PUTU(KICONV_JA_REPLACE);
1187 					rv++;
1188 				} else {
1189 					KICONV_JA_RETERROR(EILSEQ)
1190 				}
1191 				/* NOTREACHED */
1192 			}
1193 		} else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
1194 			if (flag & KICONV_REPLACE_INVALID) {
1195 				KICONV_JA_NGET_REP_FR_MB(ic2);
1196 			} else {
1197 				KICONV_JA_NGET(ic2);
1198 			}
1199 			if (KICONV_JA_ISSJKANJI2(ic2)) {
1200 				ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
1201 				if (ic2 >= 0x9f) {
1202 					ic1++;
1203 				}
1204 				index = ((ic1 - 0x21) * 94)
1205 				    + (kiconv_ja_sjtojis2[ic2] - 0x21);
1206 				uni = kiconv_ja_jisx0212_to_ucs2[index];
1207 				if (uni == KICONV_JA_REPLACE)
1208 					rv++;
1209 				KICONV_JA_PUTU(uni);
1210 			} else { /* 2nd byte check failed */
1211 				if (flag & KICONV_REPLACE_INVALID) {
1212 					KICONV_JA_PUTU(KICONV_JA_REPLACE);
1213 					rv++;
1214 				} else {
1215 					KICONV_JA_RETERROR(EILSEQ)
1216 				}
1217 			}
1218 		} else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */
1219 		    KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */
1220 			/*
1221 			 * We need a special treatment for each codes.
1222 			 * By adding some offset number for them, we
1223 			 * can process them as the same way of that of
1224 			 * extended IBM chars.
1225 			 */
1226 			if (flag & KICONV_REPLACE_INVALID) {
1227 				KICONV_JA_NGET_REP_FR_MB(ic2);
1228 			} else {
1229 				KICONV_JA_NGET(ic2);
1230 			}
1231 			if (KICONV_JA_ISSJKANJI2(ic2)) {
1232 				ushort_t dest, upper, lower;
1233 				dest = (ic1 << 8) + ic2;
1234 				if ((0xed40 <= dest) && (dest <= 0xeffc)) {
1235 					KICONV_JA_REMAP_NEC(dest);
1236 					if (dest == 0xffff) {
1237 						if (flag &
1238 						    KICONV_REPLACE_INVALID) {
1239 							KICONV_JA_PUTU(
1240 							    KICONV_JA_REPLACE);
1241 							rv++;
1242 						} else {
1243 							KICONV_JA_RETERROR(
1244 							    EILSEQ)
1245 						}
1246 					}
1247 				}
1248 				/*
1249 				 * XXX: 0xfa54 and 0xfa5b must be mapped
1250 				 *	to JIS0208 area. Therefore we
1251 				 *	have to do special treatment.
1252 				 */
1253 				if ((dest == 0xfa54) || (dest == 0xfa5b)) {
1254 					if (dest == 0xfa54) {
1255 						upper = 0x22;
1256 						lower = 0x4c;
1257 					} else {
1258 						upper = 0x22;
1259 						lower = 0x68;
1260 					}
1261 					KICONV_JA_CNV_JISMS_TO_U2(id, uni,
1262 					    upper, lower);
1263 					if (uni == KICONV_JA_NODEST) {
1264 						index = (uint_t)((upper - 0x21)
1265 						    * 94 + (lower - 0x21));
1266 						uni = kiconv_ja_jisx0208_to_ucs2
1267 						    [index];
1268 					}
1269 					if (uni == KICONV_JA_REPLACE)
1270 						rv++;
1271 					KICONV_JA_PUTU(uni);
1272 				} else {
1273 					dest = dest - 0xfa40 -
1274 					    (((dest>>8) - 0xfa) * 0x40);
1275 					dest = kiconv_ja_sjtoibmext[dest];
1276 					if (dest == 0xffff) {
1277 						if (flag &
1278 						    KICONV_REPLACE_INVALID) {
1279 							KICONV_JA_PUTU(
1280 							    KICONV_JA_REPLACE);
1281 							rv++;
1282 						} else {
1283 							KICONV_JA_RETERROR(
1284 							    EILSEQ)
1285 						}
1286 					}
1287 					upper = (dest >> 8) & KICONV_JA_CMASK;
1288 					lower = dest & KICONV_JA_CMASK;
1289 					KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni,
1290 					    upper, lower);
1291 					if (uni == KICONV_JA_NODEST) {
1292 						index = (uint_t)((upper - 0x21)
1293 						    * 94 + (lower - 0x21));
1294 						uni = kiconv_ja_jisx0212_to_ucs2
1295 						    [index];
1296 					}
1297 					if (uni == KICONV_JA_REPLACE)
1298 						rv++;
1299 					KICONV_JA_PUTU(uni);
1300 				}
1301 			} else { /* 2nd byte check failed */
1302 				if (flag & KICONV_REPLACE_INVALID) {
1303 					KICONV_JA_PUTU(KICONV_JA_REPLACE);
1304 					rv++;
1305 				} else {
1306 					KICONV_JA_RETERROR(EILSEQ)
1307 				}
1308 			}
1309 		} else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
1310 		/*
1311 		 * Based on the draft convention of OSF-JVC CDEWG,
1312 		 * characters in this area will be mapped to
1313 		 * "CHIKAN-MOJI." (convertible character)
1314 		 * We use U+FFFD in this case.
1315 		 */
1316 			if (flag & KICONV_REPLACE_INVALID) {
1317 				KICONV_JA_NGET_REP_FR_MB(ic2);
1318 			} else {
1319 				KICONV_JA_NGET(ic2);
1320 			}
1321 			if (KICONV_JA_ISSJKANJI2(ic2)) {
1322 				uni = 0xfffd;
1323 				KICONV_JA_PUTU(uni);
1324 			} else { /* 2nd byte check failed */
1325 				if (flag & KICONV_REPLACE_INVALID) {
1326 					KICONV_JA_PUTU(KICONV_JA_REPLACE);
1327 					rv++;
1328 				} else {
1329 					KICONV_JA_RETERROR(EILSEQ)
1330 				}
1331 			}
1332 		} else { /* 1st byte check failed */
1333 			if (flag & KICONV_REPLACE_INVALID) {
1334 				KICONV_JA_PUTU(KICONV_JA_REPLACE);
1335 				rv++;
1336 			} else {
1337 				KICONV_JA_RETERROR(EILSEQ)
1338 			}
1339 		}
1340 
1341 next:
1342 		/*
1343 		 * One character successfully converted so update
1344 		 * values outside of this function's stack.
1345 		 */
1346 		*inbytesleft = ileft;
1347 		*outbytesleft = oleft;
1348 	}
1349 
1350 ret:
1351 	return (rv);
1352 }
1353 
1354 static size_t
1355 _do_kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1356 	size_t *outbytesleft, int flag, int *errno, uint8_t id)
1357 {
1358 	uchar_t		ic;
1359 	size_t		rv = 0;
1360 	uint_t		ucs4;
1361 	ushort_t	euc16;
1362 	ushort_t	dest;
1363 
1364 	uchar_t	*ip;
1365 	size_t		ileft;
1366 	char		*op;
1367 	size_t		oleft;
1368 	size_t		read_len;
1369 
1370 	boolean_t do_not_ignore_null;
1371 
1372 	if ((inbuf == NULL) || (*inbuf == NULL)) {
1373 		return (0);
1374 	}
1375 
1376 	ip = (uchar_t *)inbuf;
1377 	ileft = *inbytesleft;
1378 	op = outbuf;
1379 	oleft = *outbytesleft;
1380 
1381 	KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft);
1382 
1383 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
1384 
1385 	while (ileft != 0) {
1386 		KICONV_JA_GETU(&ucs4, flag);
1387 
1388 		if (ucs4 == 0x0 && do_not_ignore_null) {
1389 			return (0);
1390 		}
1391 
1392 		if (ucs4 > 0xffff) {
1393 			/* non-BMP */
1394 			KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1395 			rv++;
1396 			goto next;
1397 		}
1398 
1399 		KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
1400 		if (euc16 == KICONV_JA_NODEST) {
1401 			euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
1402 		}
1403 		if (euc16 == KICONV_JA_NODEST) {
1404 			KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1405 			rv++;
1406 			goto next;
1407 		}
1408 
1409 		switch (euc16 & 0x8080) {
1410 		case 0x0000:	/* CS0 */
1411 			if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) {
1412 				KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1413 				rv++;
1414 			} else {
1415 				ic = (uchar_t)euc16;
1416 				KICONV_JA_NPUT(ic);
1417 			}
1418 			break;
1419 		case 0x8080:	/* CS1 */
1420 			ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1421 			KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]);
1422 			/*
1423 			 * for even number row (Ku), add 0x80 to
1424 			 * look latter half of kiconv_ja_jistosj2[] array
1425 			 */
1426 			ic = (uchar_t)((euc16 & KICONV_JA_CMASK)
1427 			    + (((ic % 2) == 0) ? 0x80 : 0x00));
1428 			KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1429 			break;
1430 		case 0x0080:	/* CS2 */
1431 			ic = (uchar_t)euc16;
1432 			KICONV_JA_NPUT(ic);
1433 			break;
1434 		case 0x8000:	/* CS3 */
1435 			ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1436 			if (euc16 == 0xa271) {
1437 				/* NUMERO SIGN */
1438 				KICONV_JA_NPUT(0x87);
1439 				KICONV_JA_NPUT(0x82);
1440 			} else if (ic < 0x75) { /* check if IBM VDC */
1441 				dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f);
1442 				if (dest == 0xffff) {
1443 					KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1444 				} else {
1445 					/* avoid putting NUL ('\0') */
1446 					if (dest > 0xff) {
1447 						KICONV_JA_NPUT(
1448 						    (dest >> 8) & 0xff);
1449 						KICONV_JA_NPUT(dest & 0xff);
1450 					} else {
1451 						KICONV_JA_NPUT(dest & 0xff);
1452 					}
1453 				}
1454 			} else {
1455 				KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]);
1456 				/*
1457 				 * for even number row (Ku), add 0x80 to
1458 				 * look latter half of kiconv_ja_jistosj2[]
1459 				 */
1460 				ic = (ushort_t)((euc16 & KICONV_JA_CMASK)
1461 				    + (((ic % 2) == 0) ? 0x80 : 0x00));
1462 				KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1463 			}
1464 			break;
1465 		}
1466 
1467 next:
1468 		/*
1469 		 * One character successfully converted so update
1470 		 * values outside of this function's stack.
1471 		 */
1472 		*inbytesleft = ileft;
1473 		*outbytesleft = oleft;
1474 	}
1475 
1476 ret:
1477 	return (rv);
1478 }
1479 
1480 static size_t
1481 kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1482 		char **outbuf, size_t *outbytesleft, int *errno)
1483 {
1484 	if (! kcd || kcd == (void *)-1) {
1485 		*errno = EBADF;
1486 		return ((size_t)-1);
1487 	}
1488 
1489 	return (_do_kiconv_fr_sjis(kcd, inbuf, inbytesleft,
1490 	    outbuf, outbytesleft, errno));
1491 }
1492 
1493 static size_t
1494 kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1495 		char **outbuf, size_t *outbytesleft, int *errno)
1496 {
1497 	if (! kcd || kcd == (void *)-1) {
1498 		*errno = EBADF;
1499 		return ((size_t)-1);
1500 	}
1501 
1502 	return (_do_kiconv_to_sjis(kcd, inbuf, inbytesleft,
1503 	    outbuf, outbytesleft, errno));
1504 }
1505 
1506 static size_t
1507 kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1508 	size_t *outbytesleft, int flag, int *errno)
1509 {
1510 	return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf,
1511 	    outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS));
1512 }
1513 
1514 static size_t
1515 kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1516 	size_t *outbytesleft, int flag, int *errno)
1517 {
1518 	return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf,
1519 	    outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS));
1520 }
1521 
1522 static size_t
1523 kiconvstr_fr_cp932(char *inbuf, size_t *inbytesleft, char *outbuf,
1524 	size_t *outbytesleft, int flag, int *errno)
1525 {
1526 	return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf,
1527 	    outbytesleft, flag, errno, KICONV_JA_TBLID_CP932));
1528 }
1529 
1530 static size_t
1531 kiconvstr_to_cp932(char *inbuf, size_t *inbytesleft, char *outbuf,
1532 	size_t *outbytesleft, int flag, int *errno)
1533 {
1534 	return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf,
1535 	    outbytesleft, flag, errno, KICONV_JA_TBLID_CP932));
1536 }
1537 
1538 static kiconv_ops_t kiconv_ja_ops_tbl[] = {
1539 	{
1540 		"eucjp", "utf-8", open_eucjp,
1541 		kiconv_to_eucjp, close_ja, kiconvstr_to_eucjp
1542 	},
1543 	{
1544 		"utf-8", "eucjp", open_eucjp,
1545 		kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjp
1546 	},
1547 	{
1548 		"eucjpms", "utf-8", open_eucjpms,
1549 		kiconv_to_eucjp, close_ja, kiconvstr_to_eucjpms
1550 	},
1551 	{
1552 		"utf-8", "eucjpms", open_eucjpms,
1553 		kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjpms
1554 	},
1555 	{
1556 		"sjis", "utf-8", open_sjis,
1557 		kiconv_to_sjis, close_ja, kiconvstr_to_sjis
1558 	},
1559 	{
1560 		"utf-8", "sjis", open_sjis,
1561 		kiconv_fr_sjis, close_ja, kiconvstr_fr_sjis
1562 	},
1563 	{
1564 		"cp932", "utf-8", open_cp932,
1565 		kiconv_to_sjis, close_ja, kiconvstr_to_cp932
1566 	},
1567 	{
1568 		"utf-8", "cp932", open_cp932,
1569 		kiconv_fr_sjis, close_ja, kiconvstr_fr_cp932
1570 	}
1571 };
1572 
1573 static char *kiconv_ja_aliases[] = {"932", "shiftjis", "pck"};
1574 static char *kiconv_ja_canonicals[] = {"cp932", "sjis", "sjis"};
1575 
1576 #define	KICONV_JA_MAX_JA_OPS \
1577 	(sizeof (kiconv_ja_ops_tbl) / sizeof (kiconv_ops_t))
1578 #define	KICONV_JA_MAX_JA_ALIAS \
1579 	(sizeof (kiconv_ja_aliases) / sizeof (char *))
1580 
1581 static kiconv_module_info_t kiconv_ja_info = {
1582 	"kiconv_ja",		/* module name */
1583 	KICONV_JA_MAX_JA_OPS,	/* number of conversion in kiconv_ja */
1584 	kiconv_ja_ops_tbl,	/* kiconv_ja ops table */
1585 	KICONV_JA_MAX_JA_ALIAS,	/* number of alias in kiconv_ja */
1586 	kiconv_ja_aliases,	/* kiconv_ja aliases */
1587 	kiconv_ja_canonicals,	/* kiconv_ja canonicals */
1588 	0
1589 };
1590 
1591 static struct modlkiconv modlkiconv_ja = {
1592 	&mod_kiconvops,
1593 	"kiconv module for Japanese",
1594 	&kiconv_ja_info
1595 };
1596 
1597 static struct modlinkage modlinkage = {
1598 	MODREV_1,
1599 	(void *)&modlkiconv_ja,
1600 	NULL
1601 };
1602 
1603 int
1604 _init(void)
1605 {
1606 	int err;
1607 
1608 	err = mod_install(&modlinkage);
1609 	if (err)
1610 		cmn_err(CE_WARN, "kiconv_ja: failed to load kernel module");
1611 
1612 	return (err);
1613 }
1614 
1615 int
1616 _info(struct modinfo *modinfop)
1617 {
1618 	return (mod_info(&modlinkage, modinfop));
1619 }
1620 
1621 int
1622 _fini(void)
1623 {
1624 	int err;
1625 
1626 	/*
1627 	 * If this module is being used, then, we cannot remove the module.
1628 	 * The following checking will catch pretty much all usual cases.
1629 	 *
1630 	 * Any remaining will be catached by the kiconv_unregister_module()
1631 	 * during mod_remove() at below.
1632 	 */
1633 	if (kiconv_module_ref_count(KICONV_MODULE_ID_JA))
1634 		return (EBUSY);
1635 
1636 	err = mod_remove(&modlinkage);
1637 	if (err)
1638 		cmn_err(CE_WARN, "kiconv_ja: failed to remove kernel module");
1639 
1640 	return (err);
1641 }
1642