xref: /illumos-gate/usr/src/uts/common/kiconv/kiconv_ja/kiconv_ja.c (revision 2667222682da9a362b1246a1e39cdaf1f38cb56e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/sysmacros.h>
30 #include <sys/systm.h>
31 #include <sys/debug.h>
32 #include <sys/kmem.h>
33 #include <sys/sunddi.h>
34 #include <sys/byteorder.h>
35 #include <sys/errno.h>
36 #include <sys/euc.h>
37 #include <sys/modctl.h>
38 #include <sys/kiconv.h>
39 
40 #include <sys/kiconv_ja.h>
41 #include <sys/kiconv_ja_jis_to_unicode.h>
42 #include <sys/kiconv_ja_unicode_to_jis.h>
43 
44 /*
45  * The following vector shows remaining bytes in a UTF-8 character.
46  * Index will be the first byte of the character. This is defined in
47  * u8_textprep.c.
48  */
49 extern const int8_t u8_number_of_bytes[];
50 
51 /*
52  * The following is a vector of bit-masks to get used bits in
53  * the first byte of a UTF-8 character. Index is remaining bytes at above of
54  * the character. This is defined in uconv.c.
55  */
56 extern const uchar_t u8_masks_tbl[];
57 
58 /*
59  * The following two vectors are to provide valid minimum and
60  * maximum values for the 2'nd byte of a multibyte UTF-8 character for
61  * better illegal sequence checking. The index value must be the value of
62  * the first byte of the UTF-8 character. These are defined in u8_textprep.c.
63  */
64 extern const uint8_t u8_valid_min_2nd_byte[];
65 extern const uint8_t u8_valid_max_2nd_byte[];
66 
67 static kiconv_ja_euc16_t
kiconv_ja_ucs2_to_euc16(kiconv_ja_ucs2_t ucs2)68 kiconv_ja_ucs2_to_euc16(kiconv_ja_ucs2_t ucs2)
69 {
70 	const kiconv_ja_euc16_t	*p;
71 
72 	if ((p = kiconv_ja_ucs2_to_euc16_index[ucs2 >> 8]) != NULL)
73 		return (p[ucs2 & 0xff]);
74 
75 	return (KICONV_JA_NODEST);
76 }
77 
78 static size_t
utf8_ucs(uint_t * p,uchar_t ** pip,size_t * pileft,int * errno)79 utf8_ucs(uint_t *p, uchar_t **pip, size_t *pileft, int *errno)
80 {
81 	uint_t	l;		/* to be copied to *p on successful return */
82 	uchar_t	ic;		/* current byte */
83 	uchar_t	ic1;		/* 1st byte */
84 	uchar_t	*ip = *pip;	/* next byte to read */
85 	size_t	ileft = *pileft; /* number of bytes available */
86 	size_t	rv = 0;		/* return value of this function */
87 	int	remaining_bytes;
88 	int	u8_size;
89 
90 	KICONV_JA_NGET(ic1);	/* read 1st byte */
91 
92 	if (ic1 < 0x80) {
93 		/* successfully converted */
94 		*p = (uint_t)ic1;
95 		goto ret;
96 	}
97 
98 	u8_size = u8_number_of_bytes[ic1];
99 	if (u8_size == U8_ILLEGAL_CHAR) {
100 		KICONV_JA_RETERROR(EILSEQ)
101 	} else if (u8_size == U8_OUT_OF_RANGE_CHAR) {
102 		KICONV_JA_RETERROR(ERANGE)
103 	}
104 
105 	remaining_bytes = u8_size - 1;
106 	if (remaining_bytes != 0) {
107 		l = ic1 & u8_masks_tbl[remaining_bytes];
108 
109 		for (; remaining_bytes > 0; remaining_bytes--) {
110 			KICONV_JA_NGET(ic);
111 			if (ic1 != 0U) {
112 				if ((ic < u8_valid_min_2nd_byte[ic1]) ||
113 				    (ic > u8_valid_max_2nd_byte[ic1])) {
114 					KICONV_JA_RETERROR(EILSEQ)
115 				}
116 				ic1 = 0U; /* 2nd byte check done */
117 			} else {
118 				if ((ic < 0x80) || (ic > 0xbf)) {
119 					KICONV_JA_RETERROR(EILSEQ)
120 				}
121 			}
122 			l = (l << 6) | (ic & 0x3f);
123 		}
124 
125 		/* successfully converted */
126 		*p = l;
127 	} else {
128 		KICONV_JA_RETERROR(EILSEQ)
129 	}
130 
131 ret:
132 	if (rv == 0) {
133 		/*
134 		 * Update rv, *pip, and *pileft on successfule return.
135 		 */
136 		rv = *pileft - ileft;
137 		*pip = ip;
138 		*pileft = ileft;
139 	}
140 
141 	return (rv);
142 }
143 
144 static size_t
utf8_ucs_replace(uint_t * p,uchar_t ** pip,size_t * pileft,size_t * repnum)145 utf8_ucs_replace(uint_t *p, uchar_t **pip, size_t *pileft, size_t *repnum)
146 {
147 	uint_t	l;		/* to be copied to *p on successful return */
148 	uchar_t	ic;		/* current byte */
149 	uchar_t	ic1;		/* 1st byte */
150 	uchar_t	*ip = *pip;	/* next byte to read */
151 	size_t	ileft = *pileft; /* number of bytes available */
152 	size_t	rv = 0;		/* return value of this function */
153 	int	remaining_bytes;
154 	int	u8_size;
155 
156 	KICONV_JA_NGET_REP_TO_MB(ic1);	/* read 1st byte */
157 
158 	if (ic1 < 0x80) {
159 		/* successfully converted */
160 		l = (uint_t)ic1;
161 		goto ret;
162 	}
163 
164 	u8_size = u8_number_of_bytes[ic1];
165 	if (u8_size == U8_ILLEGAL_CHAR || u8_size == U8_OUT_OF_RANGE_CHAR) {
166 		l = KICONV_JA_DEF_SINGLE;
167 		(*repnum)++;
168 		goto ret;
169 	}
170 
171 	remaining_bytes = u8_size - 1;
172 
173 	if (remaining_bytes != 0) {
174 		l = ic1 & u8_masks_tbl[remaining_bytes];
175 
176 		for (; remaining_bytes > 0; remaining_bytes--) {
177 			KICONV_JA_NGET_REP_TO_MB(ic);
178 			if (ic1 != 0U) {
179 				if ((ic < u8_valid_min_2nd_byte[ic1]) ||
180 				    (ic > u8_valid_max_2nd_byte[ic1])) {
181 					l = KICONV_JA_DEF_SINGLE;
182 					(*repnum)++;
183 					ileft -= (remaining_bytes - 1);
184 					ip += (remaining_bytes - 1);
185 					break;
186 				}
187 				ic1 = 0U; /* 2nd byte check done */
188 			} else {
189 				if ((ic < 0x80) || (ic > 0xbf)) {
190 					l = KICONV_JA_DEF_SINGLE;
191 					(*repnum)++;
192 					ileft -= (remaining_bytes - 1);
193 					ip += (remaining_bytes - 1);
194 					break;
195 				}
196 			}
197 			l = (l << 6) | (ic & 0x3f);
198 		}
199 	} else {
200 		l = KICONV_JA_DEF_SINGLE;
201 		(*repnum)++;
202 	}
203 
204 ret:
205 	/* successfully converted */
206 	*p = l;
207 	rv = *pileft - ileft;
208 
209 	*pip = ip;
210 	*pileft = ileft;
211 
212 	return (rv);
213 }
214 
215 static size_t				/* return #bytes read, or -1 */
read_unicode(uint_t * p,uchar_t ** pip,size_t * pileft,int * errno,int flag,size_t * rv)216 read_unicode(
217 	uint_t	*p,		/* point variable to store UTF-32 */
218 	uchar_t	**pip,		/* point pointer to input buf */
219 	size_t	*pileft,	/* point #bytes left in input buf */
220 	int	*errno,		/* point variable to errno */
221 	int	flag,		/* kiconvstr flag */
222 	size_t	*rv)		/* point return valuse */
223 {
224 	if (flag & KICONV_REPLACE_INVALID)
225 		return (utf8_ucs_replace(p, pip, pileft, rv));
226 	else
227 		return (utf8_ucs(p, pip, pileft, errno));
228 }
229 
230 static size_t
write_unicode(uint_t u32,char ** pop,size_t * poleft,int * errno)231 write_unicode(
232 	uint_t	u32,		/* UTF-32 to write */
233 	char	**pop,		/* point pointer to output buf */
234 	size_t	*poleft,	/* point #bytes left in output buf */
235 	int	*errno)		/* point variable to errno */
236 {
237 	char	*op = *pop;
238 	size_t	oleft = *poleft;
239 	size_t	rv = 0;			/* return value */
240 
241 	if (u32 <= 0x7f) {
242 		KICONV_JA_NPUT((uchar_t)(u32));
243 		rv = 1;
244 	} else if (u32 <= 0x7ff) {
245 		KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x1f) | 0xc0));
246 		KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
247 		rv = 2;
248 	} else if ((u32 >= 0xd800) && (u32 <= 0xdfff)) {
249 		KICONV_JA_RETERROR(EILSEQ)
250 	} else if (u32 <= 0xffff) {
251 		KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x0f) | 0xe0));
252 		KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80));
253 		KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
254 		rv = 3;
255 	} else if (u32 <= 0x10ffff) {
256 		KICONV_JA_NPUT((uchar_t)((((u32)>>18) & 0x07) | 0xf0));
257 		KICONV_JA_NPUT((uchar_t)((((u32)>>12) & 0x3f) | 0x80));
258 		KICONV_JA_NPUT((uchar_t)((((u32)>>6) & 0x3f) | 0x80));
259 		KICONV_JA_NPUT((uchar_t)(((u32) & 0x3f) | 0x80));
260 		rv = 4;
261 	} else {
262 		KICONV_JA_RETERROR(EILSEQ)
263 	}
264 
265 ret:
266 	if (rv != (size_t)-1) {
267 		/* update *pop and *poleft only on successful return */
268 		*pop = op;
269 		*poleft = oleft;
270 	}
271 
272 	return (rv);
273 }
274 
275 static void *
_kiconv_ja_open_unicode(uint8_t id)276 _kiconv_ja_open_unicode(uint8_t id)
277 {
278 	kiconv_state_t	kcd;
279 
280 	kcd = (kiconv_state_t)kmem_alloc(sizeof (kiconv_state_data_t),
281 	    KM_SLEEP);
282 	kcd->id = id;
283 	kcd->bom_processed = 0;
284 	return ((void *)kcd);
285 }
286 
287 static void *
open_eucjp(void)288 open_eucjp(void)
289 {
290 	return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP));
291 }
292 
293 static void *
open_eucjpms(void)294 open_eucjpms(void)
295 {
296 	return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_EUCJP_MS));
297 }
298 
299 static void *
open_sjis(void)300 open_sjis(void)
301 {
302 	return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_SJIS));
303 }
304 
305 static void *
open_cp932(void)306 open_cp932(void)
307 {
308 	return (_kiconv_ja_open_unicode(KICONV_JA_TBLID_CP932));
309 }
310 
311 int
close_ja(void * kcd)312 close_ja(void *kcd)
313 {
314 	if (! kcd || kcd == (void *)-1)
315 		return (EBADF);
316 
317 	kmem_free(kcd, sizeof (kiconv_state_data_t));
318 
319 	return (0);
320 }
321 
322 static size_t
_do_kiconv_fr_eucjp(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)323 _do_kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
324     char **outbuf, size_t *outbytesleft, int *errno)
325 {
326 	uint_t		u32;		/* UTF-32 */
327 	uint_t		index;		/* index for table lookup */
328 	uchar_t		ic1, ic2, ic3;	/* 1st, 2nd, and 3rd bytes of a char */
329 	size_t		rv = 0;		/* return value of this function */
330 
331 	uchar_t	*ip;
332 	size_t		ileft;
333 	char		*op;
334 	size_t		oleft;
335 	size_t		id = ((kiconv_state_t)kcd)->id;
336 
337 	if ((inbuf == NULL) || (*inbuf == NULL)) {
338 		return (0);
339 	}
340 
341 	ip = (uchar_t *)*inbuf;
342 	ileft = *inbytesleft;
343 	op = *outbuf;
344 	oleft = *outbytesleft;
345 
346 	while (ileft != 0) {
347 		KICONV_JA_NGET(ic1);		/* get 1st byte */
348 
349 		if (KICONV_JA_ISASC(ic1)) {	/* ASCII; 1 byte */
350 			u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1];
351 			KICONV_JA_PUTU(u32);
352 		} else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */
353 			KICONV_JA_NGET(ic2);
354 			if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */
355 				ic1 &= KICONV_JA_CMASK;
356 				ic2 &= KICONV_JA_CMASK;
357 				KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2);
358 				if (u32 == KICONV_JA_NODEST) {
359 					index = (ic1 - 0x21) * 94 + ic2 - 0x21;
360 					u32 = kiconv_ja_jisx0208_to_ucs2[index];
361 				}
362 				if (u32 == KICONV_JA_REPLACE)
363 					rv++;
364 				KICONV_JA_PUTU(u32);
365 			} else { /* 2nd byte check failed */
366 				KICONV_JA_RETERROR(EILSEQ)
367 			}
368 		} else if (ic1 == SS2) { /* JIS X 0201 Kana; 2 bytes */
369 			KICONV_JA_NGET(ic2);
370 			if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */
371 				index = (ic2 - 0xa1);
372 				u32 = kiconv_ja_jisx0201kana_to_ucs2[index];
373 				KICONV_JA_PUTU(u32);
374 			} else { /* 2nd byte check failed */
375 				KICONV_JA_RETERROR(EILSEQ)
376 			}
377 		} else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */
378 			KICONV_JA_NGET(ic2);
379 			if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */
380 				KICONV_JA_NGET(ic3);
381 				if (KICONV_JA_ISCS3(ic3)) {
382 					/* 3rd byte check passed */
383 					ic2 &= KICONV_JA_CMASK;
384 					ic3 &= KICONV_JA_CMASK;
385 					KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32,
386 					    ic2, ic3);
387 					if (u32 == KICONV_JA_NODEST) {
388 						index = ((ic2 - 0x21) * 94 +
389 						    (ic3 - 0x21));
390 						u32 = kiconv_ja_jisx0212_to_ucs2
391 						    [index];
392 					}
393 					if (u32 == KICONV_JA_REPLACE)
394 						rv++;
395 					KICONV_JA_PUTU(u32);
396 				} else { /* 3rd byte check failed */
397 					KICONV_JA_RETERROR(EILSEQ)
398 				}
399 			} else { /* 2nd byte check failed */
400 				KICONV_JA_RETERROR(EILSEQ)
401 			}
402 		} else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
403 			/* C1 control; 1 byte */
404 			u32 = ic1;
405 			KICONV_JA_PUTU(u32);
406 		} else { /* 1st byte check failed */
407 			KICONV_JA_RETERROR(EILSEQ)
408 		}
409 
410 		/*
411 		 * One character successfully converted so update
412 		 * values outside of this function's stack.
413 		 */
414 		*inbuf = (char *)ip;
415 		*inbytesleft = ileft;
416 		*outbuf = op;
417 		*outbytesleft = oleft;
418 	}
419 
420 ret:
421 	return (rv);
422 }
423 
424 static size_t
_do_kiconv_to_eucjp(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)425 _do_kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
426     char **outbuf, size_t *outbytesleft, int *errno)
427 {
428 	uchar_t		ic;
429 	size_t		rv = 0;
430 	uint_t		ucs4;
431 	ushort_t	euc16;
432 
433 	uchar_t	*ip;
434 	size_t		ileft;
435 	char		*op;
436 	size_t		oleft;
437 	size_t		read_len;
438 
439 	size_t		id = ((kiconv_state_t)kcd)->id;
440 
441 	if ((inbuf == NULL) || (*inbuf == NULL)) {
442 		return (0);
443 	}
444 
445 	ip = (uchar_t *)*inbuf;
446 	ileft = *inbytesleft;
447 	op = *outbuf;
448 	oleft = *outbytesleft;
449 
450 	KICONV_JA_CHECK_UTF8_BOM(ip, ileft);
451 
452 	while (ileft != 0) {
453 		KICONV_JA_GETU(&ucs4, 0);
454 
455 		if (ucs4 > 0xffff) {
456 			/* non-BMP */
457 			KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
458 			rv++;
459 			goto next;
460 		}
461 
462 		KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
463 		if (euc16 == KICONV_JA_NODEST) {
464 			euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
465 		}
466 		if (euc16 == KICONV_JA_NODEST) {
467 			KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
468 			rv++;
469 			goto next;
470 		}
471 
472 		switch (euc16 & 0x8080) {
473 		case 0x0000:	/* CS0 */
474 			ic = (uchar_t)euc16;
475 			KICONV_JA_NPUT(ic);
476 			break;
477 		case 0x8080:	/* CS1 */
478 			ic = (uchar_t)((euc16 >> 8) & 0xff);
479 			KICONV_JA_NPUT(ic);
480 			ic = (uchar_t)(euc16 & 0xff);
481 			KICONV_JA_NPUT(ic);
482 			break;
483 		case 0x0080:	/* CS2 */
484 			KICONV_JA_NPUT(SS2);
485 			ic = (uchar_t)euc16;
486 			KICONV_JA_NPUT(ic);
487 			break;
488 		case 0x8000:	/* CS3 */
489 			KICONV_JA_NPUT(SS3);
490 			ic = (uchar_t)((euc16 >> 8) & 0xff);
491 			KICONV_JA_NPUT(ic);
492 			ic = (uchar_t)(euc16 & KICONV_JA_CMASK);
493 			KICONV_JA_NPUT(ic | KICONV_JA_CMSB);
494 			break;
495 		}
496 next:
497 		/*
498 		 * One character successfully converted so update
499 		 * values outside of this function's stack.
500 		 */
501 		*inbuf = (char *)ip;
502 		*inbytesleft = ileft;
503 		*outbuf = op;
504 		*outbytesleft = oleft;
505 	}
506 
507 ret:
508 	return (rv);
509 }
510 
511 static size_t
_do_kiconvstr_fr_eucjp(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno,uint8_t id)512 _do_kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
513     size_t *outbytesleft, int flag, int *errno, uint8_t id)
514 {
515 	uint_t		u32;		/* UTF-32 */
516 	uint_t		index;		/* index for table lookup */
517 	uchar_t		ic1, ic2, ic3;	/* 1st, 2nd, and 3rd bytes of a char */
518 	size_t		rv = 0;		/* return value of this function */
519 
520 	uchar_t	*ip;
521 	size_t		ileft;
522 	char		*op;
523 	size_t		oleft;
524 
525 	boolean_t do_not_ignore_null;
526 
527 	if ((inbuf == NULL) || (*inbuf == '\0')) {
528 		return (0);
529 	}
530 
531 	ip = (uchar_t *)inbuf;
532 	ileft = *inbytesleft;
533 	op = outbuf;
534 	oleft = *outbytesleft;
535 
536 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
537 
538 	while (ileft != 0) {
539 		KICONV_JA_NGET(ic1);		/* get 1st byte */
540 
541 		if (KICONV_JA_ISASC(ic1)) {	/* ASCII; 1 byte */
542 			if (ic1 == '\0' && do_not_ignore_null) {
543 				return (0);
544 			}
545 			u32 = kiconv_ja_jisx0201roman_to_ucs2[ic1];
546 			KICONV_JA_PUTU(u32);
547 		} else if (KICONV_JA_ISCS1(ic1)) { /* 0208 or UDC; 2 bytes */
548 			if (flag & KICONV_REPLACE_INVALID) {
549 				KICONV_JA_NGET_REP_FR_MB(ic2);
550 			} else {
551 				KICONV_JA_NGET(ic2);
552 			}
553 			if (KICONV_JA_ISCS1(ic2)) { /* 2nd byte check passed */
554 				ic1 &= KICONV_JA_CMASK;
555 				ic2 &= KICONV_JA_CMASK;
556 				KICONV_JA_CNV_JISMS_TO_U2(id, u32, ic1, ic2);
557 				if (u32 == KICONV_JA_NODEST) {
558 					index = (ic1 - 0x21) * 94 + ic2 - 0x21;
559 					u32 = kiconv_ja_jisx0208_to_ucs2[index];
560 				}
561 				if (u32 == KICONV_JA_REPLACE)
562 					rv++;
563 				KICONV_JA_PUTU(u32);
564 			} else { /* 2nd byte check failed */
565 				if (flag & KICONV_REPLACE_INVALID) {
566 					KICONV_JA_PUTU(KICONV_JA_REPLACE);
567 					rv++;
568 				} else {
569 					KICONV_JA_RETERROR(EILSEQ)
570 				}
571 			}
572 		} else if (ic1 == SS2) { /* JIS X 0201 Kana; 2bytes */
573 			if (flag & KICONV_REPLACE_INVALID) {
574 				KICONV_JA_NGET_REP_FR_MB(ic2);
575 			} else {
576 				KICONV_JA_NGET(ic2);
577 			}
578 			if (KICONV_JA_ISCS2(ic2)) { /* 2nd byte check passed */
579 				index = (ic2 - 0xa1);
580 				u32 = kiconv_ja_jisx0201kana_to_ucs2[index];
581 				KICONV_JA_PUTU(u32);
582 			} else { /* 2nd byte check failed */
583 				if (flag & KICONV_REPLACE_INVALID) {
584 					KICONV_JA_PUTU(KICONV_JA_REPLACE);
585 					rv++;
586 				} else {
587 					KICONV_JA_RETERROR(EILSEQ)
588 				}
589 			}
590 		} else if (ic1 == SS3) { /* JIS X 0212 or UDC; 3 bytes */
591 			if (flag & KICONV_REPLACE_INVALID) {
592 				KICONV_JA_NGET_REP_FR_MB(ic2);
593 			} else {
594 				KICONV_JA_NGET(ic2);
595 			}
596 			if (KICONV_JA_ISCS3(ic2)) { /* 2nd byte check passed */
597 				if (flag & KICONV_REPLACE_INVALID) {
598 					KICONV_JA_NGET_REP_FR_MB(ic3);
599 				} else {
600 					KICONV_JA_NGET(ic3);
601 				}
602 				if (KICONV_JA_ISCS3(ic3)) {
603 					/* 3rd byte check passed */
604 					ic2 &= KICONV_JA_CMASK;
605 					ic3 &= KICONV_JA_CMASK;
606 					KICONV_JA_CNV_JIS0212MS_TO_U2(id, u32,
607 					    ic2, ic3);
608 					if (u32 == KICONV_JA_NODEST) {
609 						index = ((ic2 - 0x21) * 94 +
610 						    (ic3 - 0x21));
611 						u32 = kiconv_ja_jisx0212_to_ucs2
612 						    [index];
613 					}
614 					if (u32 == KICONV_JA_REPLACE)
615 						rv++;
616 					KICONV_JA_PUTU(u32);
617 				} else { /* 3rd byte check failed */
618 					if (flag & KICONV_REPLACE_INVALID) {
619 						KICONV_JA_PUTU(
620 						    KICONV_JA_REPLACE);
621 						rv++;
622 					} else {
623 						KICONV_JA_RETERROR(EILSEQ)
624 					}
625 				}
626 			} else { /* 2nd byte check failed */
627 				if (flag & KICONV_REPLACE_INVALID) {
628 					KICONV_JA_PUTU(KICONV_JA_REPLACE);
629 					rv++;
630 				} else {
631 					KICONV_JA_RETERROR(EILSEQ)
632 				}
633 			}
634 		} else if (KICONV_JA_ISC1CTRLEUC(ic1)) {
635 			/* C1 control; 1 byte */
636 			u32 = ic1;
637 			KICONV_JA_PUTU(u32);
638 		} else { /* 1st byte check failed */
639 			if (flag & KICONV_REPLACE_INVALID) {
640 				KICONV_JA_PUTU(KICONV_JA_REPLACE);
641 				rv++;
642 			} else {
643 				KICONV_JA_RETERROR(EILSEQ)
644 			}
645 		}
646 
647 next:
648 		/*
649 		 * One character successfully converted so update
650 		 * values outside of this function's stack.
651 		 */
652 		*inbytesleft = ileft;
653 		*outbytesleft = oleft;
654 	}
655 
656 ret:
657 	return (rv);
658 }
659 
660 static size_t
_do_kiconvstr_to_eucjp(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno,uint8_t id)661 _do_kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
662     size_t *outbytesleft, int flag, int *errno, uint8_t id)
663 {
664 	uchar_t		ic;
665 	size_t		rv = 0;
666 	uint_t		ucs4;
667 	ushort_t	euc16;
668 
669 	uchar_t	*ip;
670 	size_t		ileft;
671 	char		*op;
672 	size_t		oleft;
673 	size_t		read_len;
674 
675 	boolean_t do_not_ignore_null;
676 
677 	if ((inbuf == NULL) || (*inbuf == '\0')) {
678 		return (0);
679 	}
680 
681 	ip = (uchar_t *)inbuf;
682 	ileft = *inbytesleft;
683 	op = outbuf;
684 	oleft = *outbytesleft;
685 
686 	KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft);
687 
688 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
689 
690 	while (ileft != 0) {
691 		KICONV_JA_GETU(&ucs4, flag);
692 
693 		if (ucs4 == 0x0 && do_not_ignore_null) {
694 			return (0);
695 		}
696 
697 		if (ucs4 > 0xffff) {
698 			/* non-BMP */
699 			KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
700 			rv++;
701 			goto next;
702 		}
703 
704 		KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
705 		if (euc16 == KICONV_JA_NODEST) {
706 			euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
707 		}
708 		if (euc16 == KICONV_JA_NODEST) {
709 			KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
710 			rv++;
711 			goto next;
712 		}
713 
714 		switch (euc16 & 0x8080) {
715 		case 0x0000:	/* CS0 */
716 			ic = (uchar_t)euc16;
717 			KICONV_JA_NPUT(ic);
718 			break;
719 		case 0x8080:	/* CS1 */
720 			ic = (uchar_t)((euc16 >> 8) & 0xff);
721 			KICONV_JA_NPUT(ic);
722 			ic = (uchar_t)(euc16 & 0xff);
723 			KICONV_JA_NPUT(ic);
724 			break;
725 		case 0x0080:	/* CS2 */
726 			KICONV_JA_NPUT(SS2);
727 			ic = (uchar_t)euc16;
728 			KICONV_JA_NPUT(ic);
729 			break;
730 		case 0x8000:	/* CS3 */
731 			KICONV_JA_NPUT(SS3);
732 			ic = (uchar_t)((euc16 >> 8) & 0xff);
733 			KICONV_JA_NPUT(ic);
734 			ic = (uchar_t)(euc16 & KICONV_JA_CMASK);
735 			KICONV_JA_NPUT(ic | KICONV_JA_CMSB);
736 			break;
737 		}
738 next:
739 		/*
740 		 * One character successfully converted so update
741 		 * values outside of this function's stack.
742 		 */
743 		*inbytesleft = ileft;
744 		*outbytesleft = oleft;
745 	}
746 
747 ret:
748 	return (rv);
749 }
750 
751 static size_t
kiconv_fr_eucjp(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)752 kiconv_fr_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
753     char **outbuf, size_t *outbytesleft, int *errno)
754 {
755 	if (! kcd || kcd == (void *)-1) {
756 		*errno = EBADF;
757 		return ((size_t)-1);
758 	}
759 
760 	return (_do_kiconv_fr_eucjp(kcd, inbuf, inbytesleft,
761 	    outbuf, outbytesleft, errno));
762 }
763 
764 static size_t
kiconv_to_eucjp(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)765 kiconv_to_eucjp(void *kcd, char **inbuf, size_t *inbytesleft,
766     char **outbuf, size_t *outbytesleft, int *errno)
767 {
768 	if (! kcd || kcd == (void *)-1) {
769 		*errno = EBADF;
770 		return ((size_t)-1);
771 	}
772 
773 	return (_do_kiconv_to_eucjp(kcd, inbuf, inbytesleft,
774 	    outbuf, outbytesleft, errno));
775 }
776 
777 static size_t
kiconvstr_fr_eucjp(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)778 kiconvstr_fr_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
779     size_t *outbytesleft, int flag, int *errno)
780 {
781 	return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf,
782 	    outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP));
783 }
784 
785 static size_t
kiconvstr_to_eucjp(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)786 kiconvstr_to_eucjp(char *inbuf, size_t *inbytesleft, char *outbuf,
787     size_t *outbytesleft, int flag, int *errno)
788 {
789 	return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf,
790 	    outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP));
791 }
792 
793 static size_t
kiconvstr_fr_eucjpms(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)794 kiconvstr_fr_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf,
795     size_t *outbytesleft, int flag, int *errno)
796 {
797 	return (_do_kiconvstr_fr_eucjp(inbuf, inbytesleft, outbuf,
798 	    outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS));
799 }
800 
801 static size_t
kiconvstr_to_eucjpms(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)802 kiconvstr_to_eucjpms(char *inbuf, size_t *inbytesleft, char *outbuf,
803     size_t *outbytesleft, int flag, int *errno)
804 {
805 	return (_do_kiconvstr_to_eucjp(inbuf, inbytesleft, outbuf,
806 	    outbytesleft, flag, errno, KICONV_JA_TBLID_EUCJP_MS));
807 }
808 
809 static size_t
_do_kiconv_fr_sjis(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)810 _do_kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
811     char **outbuf, size_t *outbytesleft, int *errno)
812 {
813 	uint_t	uni;			/* UTF-32 */
814 	uint_t	index;			/* index for table lookup */
815 	uchar_t	ic1, ic2;		/* 1st and 2nd bytes of a char */
816 	size_t	rv = 0;			/* return value of this function */
817 
818 	uchar_t	*ip;
819 	size_t		ileft;
820 	char		*op;
821 	size_t		oleft;
822 	size_t		id = ((kiconv_state_t)kcd)->id;
823 
824 	if ((inbuf == NULL) || (*inbuf == NULL)) {
825 		return (0);
826 	}
827 
828 	ip = (uchar_t *)*inbuf;
829 	ileft = *inbytesleft;
830 	op = *outbuf;
831 	oleft = *outbytesleft;
832 
833 	while (ileft != 0) {
834 		KICONV_JA_NGET(ic1);			/* get 1st byte */
835 
836 		if (KICONV_JA_ISASC((int)ic1)) {	/* ASCII; 1 byte */
837 			uni = kiconv_ja_jisx0201roman_to_ucs2[ic1];
838 			KICONV_JA_PUTU(uni);
839 		} else if (KICONV_JA_ISSJKANA(ic1)) { /* 0201 Kana; 1byte */
840 			uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
841 			KICONV_JA_PUTU(uni);
842 		} else if (KICONV_JA_ISSJKANJI1(ic1)) { /* 0208/UDC; 2bytes */
843 			KICONV_JA_NGET(ic2);
844 			if (KICONV_JA_ISSJKANJI2(ic2)) {
845 				ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
846 				if (ic2 >= 0x9f) {
847 					ic1++;
848 				}
849 				ic2 = kiconv_ja_sjtojis2[ic2];
850 				KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2);
851 				if (uni == KICONV_JA_NODEST) {
852 					index = ((ic1 - 0x21) * 94)
853 					    + (ic2 - 0x21);
854 					uni = kiconv_ja_jisx0208_to_ucs2[index];
855 				}
856 				if (uni == KICONV_JA_REPLACE)
857 					rv++;
858 				KICONV_JA_PUTU(uni);
859 			} else { /* 2nd byte check failed */
860 				KICONV_JA_RETERROR(EILSEQ)
861 				/* NOTREACHED */
862 			}
863 		} else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
864 			KICONV_JA_NGET(ic2);
865 			if (KICONV_JA_ISSJKANJI2(ic2)) {
866 				ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
867 				if (ic2 >= 0x9f) {
868 					ic1++;
869 				}
870 				index = ((ic1 - 0x21) * 94)
871 				    + (kiconv_ja_sjtojis2[ic2] - 0x21);
872 				uni = kiconv_ja_jisx0212_to_ucs2[index];
873 				if (uni == KICONV_JA_REPLACE)
874 					rv++;
875 				KICONV_JA_PUTU(uni);
876 			} else { /* 2nd byte check failed */
877 				KICONV_JA_RETERROR(EILSEQ)
878 			}
879 		} else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */
880 		    KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */
881 			/*
882 			 * We need a special treatment for each codes.
883 			 * By adding some offset number for them, we
884 			 * can process them as the same way of that of
885 			 * extended IBM chars.
886 			 */
887 			KICONV_JA_NGET(ic2);
888 			if (KICONV_JA_ISSJKANJI2(ic2)) {
889 				ushort_t dest, upper, lower;
890 				dest = (ic1 << 8) + ic2;
891 				if ((0xed40 <= dest) && (dest <= 0xeffc)) {
892 					KICONV_JA_REMAP_NEC(dest);
893 					if (dest == 0xffff) {
894 						KICONV_JA_RETERROR(EILSEQ)
895 					}
896 				}
897 				/*
898 				 * XXX: 0xfa54 and 0xfa5b must be mapped
899 				 *	to JIS0208 area. Therefore we
900 				 *	have to do special treatment.
901 				 */
902 				if ((dest == 0xfa54) || (dest == 0xfa5b)) {
903 					if (dest == 0xfa54) {
904 						upper = 0x22;
905 						lower = 0x4c;
906 					} else {
907 						upper = 0x22;
908 						lower = 0x68;
909 					}
910 					KICONV_JA_CNV_JISMS_TO_U2(id, uni,
911 					    upper, lower);
912 					if (uni == KICONV_JA_NODEST) {
913 						index = (uint_t)((upper - 0x21)
914 						    * 94 + (lower - 0x21));
915 						uni = kiconv_ja_jisx0208_to_ucs2
916 						    [index];
917 					}
918 					if (uni == KICONV_JA_REPLACE)
919 						rv++;
920 					KICONV_JA_PUTU(uni);
921 				} else {
922 					dest = dest - 0xfa40 -
923 					    (((dest>>8) - 0xfa) * 0x40);
924 					dest = kiconv_ja_sjtoibmext[dest];
925 					if (dest == 0xffff) {
926 						KICONV_JA_RETERROR(EILSEQ)
927 					}
928 					upper = (dest >> 8) & KICONV_JA_CMASK;
929 					lower = dest & KICONV_JA_CMASK;
930 					KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni,
931 					    upper, lower);
932 					if (uni == KICONV_JA_NODEST) {
933 						index = (uint_t)((upper - 0x21)
934 						    * 94 + (lower - 0x21));
935 						uni = kiconv_ja_jisx0212_to_ucs2
936 						    [index];
937 					}
938 					if (uni == KICONV_JA_REPLACE)
939 						rv++;
940 					KICONV_JA_PUTU(uni);
941 				}
942 			} else { /* 2nd byte check failed */
943 				KICONV_JA_RETERROR(EILSEQ)
944 			}
945 		} else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
946 		/*
947 		 * Based on the draft convention of OSF-JVC CDEWG,
948 		 * characters in this area will be mapped to
949 		 * "CHIKAN-MOJI." (convertible character)
950 		 * We use U+FFFD in this case.
951 		 */
952 			KICONV_JA_NGET(ic2);
953 			if (KICONV_JA_ISSJKANJI2(ic2)) {
954 				uni = 0xfffd;
955 				KICONV_JA_PUTU(uni);
956 			} else { /* 2nd byte check failed */
957 				KICONV_JA_RETERROR(EILSEQ)
958 			}
959 		} else { /* 1st byte check failed */
960 			KICONV_JA_RETERROR(EILSEQ)
961 		}
962 
963 		/*
964 		 * One character successfully converted so update
965 		 * values outside of this function's stack.
966 		 */
967 		*inbuf = (char *)ip;
968 		*inbytesleft = ileft;
969 		*outbuf = op;
970 		*outbytesleft = oleft;
971 	}
972 
973 ret:
974 	return (rv);
975 }
976 
977 /*
978  * _kiconv_ja_lookuptbl()
979  * Return the index number if its index-ed number
980  * is the same as dest value.
981  */
982 static ushort_t
_kiconv_ja_lookuptbl(ushort_t dest)983 _kiconv_ja_lookuptbl(ushort_t dest)
984 {
985 	ushort_t tmp;
986 	int i;
987 	int sz = (sizeof (kiconv_ja_sjtoibmext) /
988 	    sizeof (kiconv_ja_sjtoibmext[0]));
989 
990 	for (i = 0; i < sz; i++) {
991 		tmp = (kiconv_ja_sjtoibmext[i] & 0x7f7f);
992 		if (tmp == dest)
993 			return ((i + 0xfa40 + ((i / 0xc0) * 0x40)));
994 	}
995 	return (0x3f);
996 }
997 
998 static size_t
_do_kiconv_to_sjis(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)999 _do_kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1000     char **outbuf, size_t *outbytesleft, int *errno)
1001 {
1002 	uchar_t	ic;
1003 	size_t		rv = 0;
1004 	uint_t		ucs4;
1005 	ushort_t	euc16;
1006 	ushort_t	dest;
1007 
1008 	uchar_t	*ip;
1009 	size_t		ileft;
1010 	char		*op;
1011 	size_t		oleft;
1012 	size_t		read_len;
1013 
1014 	size_t		id = ((kiconv_state_t)kcd)->id;
1015 
1016 	if ((inbuf == NULL) || (*inbuf == NULL)) {
1017 		return (0);
1018 	}
1019 
1020 	ip = (uchar_t *)*inbuf;
1021 	ileft = *inbytesleft;
1022 	op = *outbuf;
1023 	oleft = *outbytesleft;
1024 
1025 	KICONV_JA_CHECK_UTF8_BOM(ip, ileft);
1026 
1027 	while (ileft != 0) {
1028 		KICONV_JA_GETU(&ucs4, 0);
1029 
1030 		if (ucs4 > 0xffff) {
1031 			/* non-BMP */
1032 			KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1033 			rv++;
1034 			goto next;
1035 		}
1036 
1037 		KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
1038 		if (euc16 == KICONV_JA_NODEST) {
1039 			euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
1040 		}
1041 		if (euc16 == KICONV_JA_NODEST) {
1042 			KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1043 			rv++;
1044 			goto next;
1045 		}
1046 
1047 		switch (euc16 & 0x8080) {
1048 		case 0x0000:	/* CS0 */
1049 			if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) {
1050 				KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1051 				rv++;
1052 			} else {
1053 				ic = (uchar_t)euc16;
1054 				KICONV_JA_NPUT(ic);
1055 			}
1056 			break;
1057 		case 0x8080:	/* CS1 */
1058 			ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1059 			KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]);
1060 			/*
1061 			 * for even number row (Ku), add 0x80 to
1062 			 * look latter half of kiconv_ja_jistosj2[] array
1063 			 */
1064 			ic = (uchar_t)((euc16 & KICONV_JA_CMASK)
1065 			    + (((ic % 2) == 0) ? 0x80 : 0x00));
1066 			KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1067 			break;
1068 		case 0x0080:	/* CS2 */
1069 			ic = (uchar_t)euc16;
1070 			KICONV_JA_NPUT(ic);
1071 			break;
1072 		case 0x8000:	/* CS3 */
1073 			ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1074 			if (euc16 == 0xa271) {
1075 				/* NUMERO SIGN */
1076 				KICONV_JA_NPUT(0x87);
1077 				KICONV_JA_NPUT(0x82);
1078 			} else if (ic < 0x75) { /* check if IBM VDC */
1079 				dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f);
1080 				if (dest == 0xffff) {
1081 					KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1082 				} else {
1083 					/* avoid putting NUL ('\0') */
1084 					if (dest > 0xff) {
1085 						KICONV_JA_NPUT(
1086 						    (dest >> 8) & 0xff);
1087 						KICONV_JA_NPUT(dest & 0xff);
1088 					} else {
1089 						KICONV_JA_NPUT(dest & 0xff);
1090 					}
1091 				}
1092 			} else {
1093 				KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]);
1094 				/*
1095 				 * for even number row (Ku), add 0x80 to
1096 				 * look latter half of kiconv_ja_jistosj2[]
1097 				 */
1098 				ic = (ushort_t)((euc16 & KICONV_JA_CMASK)
1099 				    + (((ic % 2) == 0) ? 0x80 : 0x00));
1100 				KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1101 			}
1102 			break;
1103 		}
1104 
1105 next:
1106 		/*
1107 		 * One character successfully converted so update
1108 		 * values outside of this function's stack.
1109 		 */
1110 		*inbuf = (char *)ip;
1111 		*inbytesleft = ileft;
1112 		*outbuf = op;
1113 		*outbytesleft = oleft;
1114 	}
1115 
1116 ret:
1117 	return (rv);
1118 }
1119 
1120 static size_t
_do_kiconvstr_fr_sjis(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno,uint8_t id)1121 _do_kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1122     size_t *outbytesleft, int flag, int *errno, uint8_t id)
1123 {
1124 	uint_t		uni;		/* UTF-32 */
1125 	uint_t		index;		/* index for table lookup */
1126 	uchar_t		ic1, ic2;	/* 1st and 2nd bytes of a char */
1127 	size_t		rv = 0;		/* return value of this function */
1128 
1129 	uchar_t	*ip;
1130 	size_t		ileft;
1131 	char		*op;
1132 	size_t		oleft;
1133 
1134 	boolean_t do_not_ignore_null;
1135 
1136 	if ((inbuf == NULL) || (*inbuf == '\0')) {
1137 		return (0);
1138 	}
1139 
1140 	ip = (uchar_t *)inbuf;
1141 	ileft = *inbytesleft;
1142 	op = outbuf;
1143 	oleft = *outbytesleft;
1144 
1145 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
1146 
1147 	while (ileft != 0) {
1148 		KICONV_JA_NGET(ic1);			/* get 1st byte */
1149 
1150 		if (KICONV_JA_ISASC((int)ic1)) {	/* ASCII; 1 byte */
1151 			if (ic1 == '\0' && do_not_ignore_null) {
1152 				return (0);
1153 			}
1154 			uni = kiconv_ja_jisx0201roman_to_ucs2[ic1];
1155 			KICONV_JA_PUTU(uni);
1156 		} else if (KICONV_JA_ISSJKANA(ic1)) {
1157 			/* JIS X 0201 Kana; 1 byte */
1158 			uni = kiconv_ja_jisx0201kana_to_ucs2[(ic1 - 0xa1)];
1159 			KICONV_JA_PUTU(uni);
1160 		} else if (KICONV_JA_ISSJKANJI1(ic1)) {
1161 			/* JIS X 0208 or UDC; 2 bytes */
1162 			if (flag & KICONV_REPLACE_INVALID) {
1163 				KICONV_JA_NGET_REP_FR_MB(ic2);
1164 			} else {
1165 				KICONV_JA_NGET(ic2);
1166 			}
1167 			if (KICONV_JA_ISSJKANJI2(ic2)) {
1168 				ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
1169 				if (ic2 >= 0x9f) {
1170 					ic1++;
1171 				}
1172 				ic2 = kiconv_ja_sjtojis2[ic2];
1173 				KICONV_JA_CNV_JISMS_TO_U2(id, uni, ic1, ic2);
1174 				if (uni == KICONV_JA_NODEST) {
1175 					index = ((ic1 - 0x21) * 94)
1176 					    + (ic2 - 0x21);
1177 					uni = kiconv_ja_jisx0208_to_ucs2[index];
1178 				}
1179 				if (uni == KICONV_JA_REPLACE)
1180 					rv++;
1181 				KICONV_JA_PUTU(uni);
1182 			} else { /* 2nd byte check failed */
1183 				if (flag & KICONV_REPLACE_INVALID) {
1184 					KICONV_JA_PUTU(KICONV_JA_REPLACE);
1185 					rv++;
1186 				} else {
1187 					KICONV_JA_RETERROR(EILSEQ)
1188 				}
1189 				/* NOTREACHED */
1190 			}
1191 		} else if (KICONV_JA_ISSJSUPKANJI1(ic1)) { /* VDC, 2 bytes */
1192 			if (flag & KICONV_REPLACE_INVALID) {
1193 				KICONV_JA_NGET_REP_FR_MB(ic2);
1194 			} else {
1195 				KICONV_JA_NGET(ic2);
1196 			}
1197 			if (KICONV_JA_ISSJKANJI2(ic2)) {
1198 				ic1 = kiconv_ja_sjtojis1[(ic1 - 0x80)];
1199 				if (ic2 >= 0x9f) {
1200 					ic1++;
1201 				}
1202 				index = ((ic1 - 0x21) * 94)
1203 				    + (kiconv_ja_sjtojis2[ic2] - 0x21);
1204 				uni = kiconv_ja_jisx0212_to_ucs2[index];
1205 				if (uni == KICONV_JA_REPLACE)
1206 					rv++;
1207 				KICONV_JA_PUTU(uni);
1208 			} else { /* 2nd byte check failed */
1209 				if (flag & KICONV_REPLACE_INVALID) {
1210 					KICONV_JA_PUTU(KICONV_JA_REPLACE);
1211 					rv++;
1212 				} else {
1213 					KICONV_JA_RETERROR(EILSEQ)
1214 				}
1215 			}
1216 		} else if (KICONV_JA_ISSJIBM(ic1) || /* Extended IBM area */
1217 		    KICONV_JA_ISSJNECIBM(ic1)) { /* NEC/IBM area */
1218 			/*
1219 			 * We need a special treatment for each codes.
1220 			 * By adding some offset number for them, we
1221 			 * can process them as the same way of that of
1222 			 * extended IBM chars.
1223 			 */
1224 			if (flag & KICONV_REPLACE_INVALID) {
1225 				KICONV_JA_NGET_REP_FR_MB(ic2);
1226 			} else {
1227 				KICONV_JA_NGET(ic2);
1228 			}
1229 			if (KICONV_JA_ISSJKANJI2(ic2)) {
1230 				ushort_t dest, upper, lower;
1231 				dest = (ic1 << 8) + ic2;
1232 				if ((0xed40 <= dest) && (dest <= 0xeffc)) {
1233 					KICONV_JA_REMAP_NEC(dest);
1234 					if (dest == 0xffff) {
1235 						if (flag &
1236 						    KICONV_REPLACE_INVALID) {
1237 							KICONV_JA_PUTU(
1238 							    KICONV_JA_REPLACE);
1239 							rv++;
1240 						} else {
1241 							KICONV_JA_RETERROR(
1242 							    EILSEQ)
1243 						}
1244 					}
1245 				}
1246 				/*
1247 				 * XXX: 0xfa54 and 0xfa5b must be mapped
1248 				 *	to JIS0208 area. Therefore we
1249 				 *	have to do special treatment.
1250 				 */
1251 				if ((dest == 0xfa54) || (dest == 0xfa5b)) {
1252 					if (dest == 0xfa54) {
1253 						upper = 0x22;
1254 						lower = 0x4c;
1255 					} else {
1256 						upper = 0x22;
1257 						lower = 0x68;
1258 					}
1259 					KICONV_JA_CNV_JISMS_TO_U2(id, uni,
1260 					    upper, lower);
1261 					if (uni == KICONV_JA_NODEST) {
1262 						index = (uint_t)((upper - 0x21)
1263 						    * 94 + (lower - 0x21));
1264 						uni = kiconv_ja_jisx0208_to_ucs2
1265 						    [index];
1266 					}
1267 					if (uni == KICONV_JA_REPLACE)
1268 						rv++;
1269 					KICONV_JA_PUTU(uni);
1270 				} else {
1271 					dest = dest - 0xfa40 -
1272 					    (((dest>>8) - 0xfa) * 0x40);
1273 					dest = kiconv_ja_sjtoibmext[dest];
1274 					if (dest == 0xffff) {
1275 						if (flag &
1276 						    KICONV_REPLACE_INVALID) {
1277 							KICONV_JA_PUTU(
1278 							    KICONV_JA_REPLACE);
1279 							rv++;
1280 						} else {
1281 							KICONV_JA_RETERROR(
1282 							    EILSEQ)
1283 						}
1284 					}
1285 					upper = (dest >> 8) & KICONV_JA_CMASK;
1286 					lower = dest & KICONV_JA_CMASK;
1287 					KICONV_JA_CNV_JIS0212MS_TO_U2(id, uni,
1288 					    upper, lower);
1289 					if (uni == KICONV_JA_NODEST) {
1290 						index = (uint_t)((upper - 0x21)
1291 						    * 94 + (lower - 0x21));
1292 						uni = kiconv_ja_jisx0212_to_ucs2
1293 						    [index];
1294 					}
1295 					if (uni == KICONV_JA_REPLACE)
1296 						rv++;
1297 					KICONV_JA_PUTU(uni);
1298 				}
1299 			} else { /* 2nd byte check failed */
1300 				if (flag & KICONV_REPLACE_INVALID) {
1301 					KICONV_JA_PUTU(KICONV_JA_REPLACE);
1302 					rv++;
1303 				} else {
1304 					KICONV_JA_RETERROR(EILSEQ)
1305 				}
1306 			}
1307 		} else if ((0xeb <= ic1) && (ic1 <= 0xec)) {
1308 		/*
1309 		 * Based on the draft convention of OSF-JVC CDEWG,
1310 		 * characters in this area will be mapped to
1311 		 * "CHIKAN-MOJI." (convertible character)
1312 		 * We use U+FFFD in this case.
1313 		 */
1314 			if (flag & KICONV_REPLACE_INVALID) {
1315 				KICONV_JA_NGET_REP_FR_MB(ic2);
1316 			} else {
1317 				KICONV_JA_NGET(ic2);
1318 			}
1319 			if (KICONV_JA_ISSJKANJI2(ic2)) {
1320 				uni = 0xfffd;
1321 				KICONV_JA_PUTU(uni);
1322 			} else { /* 2nd byte check failed */
1323 				if (flag & KICONV_REPLACE_INVALID) {
1324 					KICONV_JA_PUTU(KICONV_JA_REPLACE);
1325 					rv++;
1326 				} else {
1327 					KICONV_JA_RETERROR(EILSEQ)
1328 				}
1329 			}
1330 		} else { /* 1st byte check failed */
1331 			if (flag & KICONV_REPLACE_INVALID) {
1332 				KICONV_JA_PUTU(KICONV_JA_REPLACE);
1333 				rv++;
1334 			} else {
1335 				KICONV_JA_RETERROR(EILSEQ)
1336 			}
1337 		}
1338 
1339 next:
1340 		/*
1341 		 * One character successfully converted so update
1342 		 * values outside of this function's stack.
1343 		 */
1344 		*inbytesleft = ileft;
1345 		*outbytesleft = oleft;
1346 	}
1347 
1348 ret:
1349 	return (rv);
1350 }
1351 
1352 static size_t
_do_kiconvstr_to_sjis(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno,uint8_t id)1353 _do_kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1354     size_t *outbytesleft, int flag, int *errno, uint8_t id)
1355 {
1356 	uchar_t		ic;
1357 	size_t		rv = 0;
1358 	uint_t		ucs4;
1359 	ushort_t	euc16;
1360 	ushort_t	dest;
1361 
1362 	uchar_t	*ip;
1363 	size_t		ileft;
1364 	char		*op;
1365 	size_t		oleft;
1366 	size_t		read_len;
1367 
1368 	boolean_t do_not_ignore_null;
1369 
1370 	if ((inbuf == NULL) || (*inbuf == '\0')) {
1371 		return (0);
1372 	}
1373 
1374 	ip = (uchar_t *)inbuf;
1375 	ileft = *inbytesleft;
1376 	op = outbuf;
1377 	oleft = *outbytesleft;
1378 
1379 	KICONV_JA_CHECK_UTF8_BOM_WITHOUT_STATE(ip, ileft);
1380 
1381 	do_not_ignore_null = ((flag & KICONV_IGNORE_NULL) == 0);
1382 
1383 	while (ileft != 0) {
1384 		KICONV_JA_GETU(&ucs4, flag);
1385 
1386 		if (ucs4 == 0x0 && do_not_ignore_null) {
1387 			return (0);
1388 		}
1389 
1390 		if (ucs4 > 0xffff) {
1391 			/* non-BMP */
1392 			KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1393 			rv++;
1394 			goto next;
1395 		}
1396 
1397 		KICONV_JA_CNV_U2_TO_EUCJPMS(id, euc16, ucs4);
1398 		if (euc16 == KICONV_JA_NODEST) {
1399 			euc16 = kiconv_ja_ucs2_to_euc16((ushort_t)ucs4);
1400 		}
1401 		if (euc16 == KICONV_JA_NODEST) {
1402 			KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1403 			rv++;
1404 			goto next;
1405 		}
1406 
1407 		switch (euc16 & 0x8080) {
1408 		case 0x0000:	/* CS0 */
1409 			if (KICONV_JA_ISC1CTRL((uchar_t)euc16)) {
1410 				KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1411 				rv++;
1412 			} else {
1413 				ic = (uchar_t)euc16;
1414 				KICONV_JA_NPUT(ic);
1415 			}
1416 			break;
1417 		case 0x8080:	/* CS1 */
1418 			ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1419 			KICONV_JA_NPUT(kiconv_ja_jis208tosj1[ic]);
1420 			/*
1421 			 * for even number row (Ku), add 0x80 to
1422 			 * look latter half of kiconv_ja_jistosj2[] array
1423 			 */
1424 			ic = (uchar_t)((euc16 & KICONV_JA_CMASK)
1425 			    + (((ic % 2) == 0) ? 0x80 : 0x00));
1426 			KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1427 			break;
1428 		case 0x0080:	/* CS2 */
1429 			ic = (uchar_t)euc16;
1430 			KICONV_JA_NPUT(ic);
1431 			break;
1432 		case 0x8000:	/* CS3 */
1433 			ic = (ushort_t)((euc16 >> 8) & KICONV_JA_CMASK);
1434 			if (euc16 == 0xa271) {
1435 				/* NUMERO SIGN */
1436 				KICONV_JA_NPUT(0x87);
1437 				KICONV_JA_NPUT(0x82);
1438 			} else if (ic < 0x75) { /* check if IBM VDC */
1439 				dest = _kiconv_ja_lookuptbl(euc16 & 0x7f7f);
1440 				if (dest == 0xffff) {
1441 					KICONV_JA_NPUT(KICONV_JA_DEF_SINGLE);
1442 				} else {
1443 					/* avoid putting NUL ('\0') */
1444 					if (dest > 0xff) {
1445 						KICONV_JA_NPUT(
1446 						    (dest >> 8) & 0xff);
1447 						KICONV_JA_NPUT(dest & 0xff);
1448 					} else {
1449 						KICONV_JA_NPUT(dest & 0xff);
1450 					}
1451 				}
1452 			} else {
1453 				KICONV_JA_NPUT(kiconv_ja_jis212tosj1[ic]);
1454 				/*
1455 				 * for even number row (Ku), add 0x80 to
1456 				 * look latter half of kiconv_ja_jistosj2[]
1457 				 */
1458 				ic = (ushort_t)((euc16 & KICONV_JA_CMASK)
1459 				    + (((ic % 2) == 0) ? 0x80 : 0x00));
1460 				KICONV_JA_NPUT(kiconv_ja_jistosj2[ic]);
1461 			}
1462 			break;
1463 		}
1464 
1465 next:
1466 		/*
1467 		 * One character successfully converted so update
1468 		 * values outside of this function's stack.
1469 		 */
1470 		*inbytesleft = ileft;
1471 		*outbytesleft = oleft;
1472 	}
1473 
1474 ret:
1475 	return (rv);
1476 }
1477 
1478 static size_t
kiconv_fr_sjis(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)1479 kiconv_fr_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1480     char **outbuf, size_t *outbytesleft, int *errno)
1481 {
1482 	if (! kcd || kcd == (void *)-1) {
1483 		*errno = EBADF;
1484 		return ((size_t)-1);
1485 	}
1486 
1487 	return (_do_kiconv_fr_sjis(kcd, inbuf, inbytesleft,
1488 	    outbuf, outbytesleft, errno));
1489 }
1490 
1491 static size_t
kiconv_to_sjis(void * kcd,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft,int * errno)1492 kiconv_to_sjis(void *kcd, char **inbuf, size_t *inbytesleft,
1493     char **outbuf, size_t *outbytesleft, int *errno)
1494 {
1495 	if (! kcd || kcd == (void *)-1) {
1496 		*errno = EBADF;
1497 		return ((size_t)-1);
1498 	}
1499 
1500 	return (_do_kiconv_to_sjis(kcd, inbuf, inbytesleft,
1501 	    outbuf, outbytesleft, errno));
1502 }
1503 
1504 static size_t
kiconvstr_fr_sjis(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)1505 kiconvstr_fr_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1506     size_t *outbytesleft, int flag, int *errno)
1507 {
1508 	return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf,
1509 	    outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS));
1510 }
1511 
1512 static size_t
kiconvstr_to_sjis(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)1513 kiconvstr_to_sjis(char *inbuf, size_t *inbytesleft, char *outbuf,
1514     size_t *outbytesleft, int flag, int *errno)
1515 {
1516 	return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf,
1517 	    outbytesleft, flag, errno, KICONV_JA_TBLID_SJIS));
1518 }
1519 
1520 static size_t
kiconvstr_fr_cp932(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)1521 kiconvstr_fr_cp932(char *inbuf, size_t *inbytesleft, char *outbuf,
1522     size_t *outbytesleft, int flag, int *errno)
1523 {
1524 	return (_do_kiconvstr_fr_sjis(inbuf, inbytesleft, outbuf,
1525 	    outbytesleft, flag, errno, KICONV_JA_TBLID_CP932));
1526 }
1527 
1528 static size_t
kiconvstr_to_cp932(char * inbuf,size_t * inbytesleft,char * outbuf,size_t * outbytesleft,int flag,int * errno)1529 kiconvstr_to_cp932(char *inbuf, size_t *inbytesleft, char *outbuf,
1530     size_t *outbytesleft, int flag, int *errno)
1531 {
1532 	return (_do_kiconvstr_to_sjis(inbuf, inbytesleft, outbuf,
1533 	    outbytesleft, flag, errno, KICONV_JA_TBLID_CP932));
1534 }
1535 
1536 static kiconv_ops_t kiconv_ja_ops_tbl[] = {
1537 	{
1538 		"eucjp", "utf-8", open_eucjp,
1539 		kiconv_to_eucjp, close_ja, kiconvstr_to_eucjp
1540 	},
1541 	{
1542 		"utf-8", "eucjp", open_eucjp,
1543 		kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjp
1544 	},
1545 	{
1546 		"eucjpms", "utf-8", open_eucjpms,
1547 		kiconv_to_eucjp, close_ja, kiconvstr_to_eucjpms
1548 	},
1549 	{
1550 		"utf-8", "eucjpms", open_eucjpms,
1551 		kiconv_fr_eucjp, close_ja, kiconvstr_fr_eucjpms
1552 	},
1553 	{
1554 		"sjis", "utf-8", open_sjis,
1555 		kiconv_to_sjis, close_ja, kiconvstr_to_sjis
1556 	},
1557 	{
1558 		"utf-8", "sjis", open_sjis,
1559 		kiconv_fr_sjis, close_ja, kiconvstr_fr_sjis
1560 	},
1561 	{
1562 		"cp932", "utf-8", open_cp932,
1563 		kiconv_to_sjis, close_ja, kiconvstr_to_cp932
1564 	},
1565 	{
1566 		"utf-8", "cp932", open_cp932,
1567 		kiconv_fr_sjis, close_ja, kiconvstr_fr_cp932
1568 	}
1569 };
1570 
1571 static char *kiconv_ja_aliases[] = {"932", "shiftjis", "pck"};
1572 static char *kiconv_ja_canonicals[] = {"cp932", "sjis", "sjis"};
1573 
1574 #define	KICONV_JA_MAX_JA_OPS \
1575 	(sizeof (kiconv_ja_ops_tbl) / sizeof (kiconv_ops_t))
1576 #define	KICONV_JA_MAX_JA_ALIAS \
1577 	(sizeof (kiconv_ja_aliases) / sizeof (char *))
1578 
1579 static kiconv_module_info_t kiconv_ja_info = {
1580 	"kiconv_ja",		/* module name */
1581 	KICONV_JA_MAX_JA_OPS,	/* number of conversion in kiconv_ja */
1582 	kiconv_ja_ops_tbl,	/* kiconv_ja ops table */
1583 	KICONV_JA_MAX_JA_ALIAS,	/* number of alias in kiconv_ja */
1584 	kiconv_ja_aliases,	/* kiconv_ja aliases */
1585 	kiconv_ja_canonicals,	/* kiconv_ja canonicals */
1586 	0
1587 };
1588 
1589 static struct modlkiconv modlkiconv_ja = {
1590 	&mod_kiconvops,
1591 	"kiconv module for Japanese",
1592 	&kiconv_ja_info
1593 };
1594 
1595 static struct modlinkage modlinkage = {
1596 	MODREV_1,
1597 	(void *)&modlkiconv_ja,
1598 	NULL
1599 };
1600 
1601 int
_init(void)1602 _init(void)
1603 {
1604 	int err;
1605 
1606 	err = mod_install(&modlinkage);
1607 	if (err)
1608 		cmn_err(CE_WARN, "kiconv_ja: failed to load kernel module");
1609 
1610 	return (err);
1611 }
1612 
1613 int
_info(struct modinfo * modinfop)1614 _info(struct modinfo *modinfop)
1615 {
1616 	return (mod_info(&modlinkage, modinfop));
1617 }
1618 
1619 int
_fini(void)1620 _fini(void)
1621 {
1622 	int err;
1623 
1624 	/*
1625 	 * If this module is being used, then, we cannot remove the module.
1626 	 * The following checking will catch pretty much all usual cases.
1627 	 *
1628 	 * Any remaining will be catached by the kiconv_unregister_module()
1629 	 * during mod_remove() at below.
1630 	 */
1631 	if (kiconv_module_ref_count(KICONV_MODULE_ID_JA))
1632 		return (EBUSY);
1633 
1634 	err = mod_remove(&modlinkage);
1635 	if (err)
1636 		cmn_err(CE_WARN, "kiconv_ja: failed to remove kernel module");
1637 
1638 	return (err);
1639 }
1640