xref: /illumos-gate/usr/src/lib/iconv_modules/ja/common/jfp_iconv_unicode.h (revision a026698cee452cd5e158d158601d992ae9de1e82)
116d86563SAlexander Pyhalov /*
216d86563SAlexander Pyhalov  * CDDL HEADER START
316d86563SAlexander Pyhalov  *
416d86563SAlexander Pyhalov  * The contents of this file are subject to the terms of the
516d86563SAlexander Pyhalov  * Common Development and Distribution License (the "License").
616d86563SAlexander Pyhalov  * You may not use this file except in compliance with the License.
716d86563SAlexander Pyhalov  *
816d86563SAlexander Pyhalov  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
916d86563SAlexander Pyhalov  * or http://www.opensolaris.org/os/licensing.
1016d86563SAlexander Pyhalov  * See the License for the specific language governing permissions
1116d86563SAlexander Pyhalov  * and limitations under the License.
1216d86563SAlexander Pyhalov  *
1316d86563SAlexander Pyhalov  * When distributing Covered Code, include this CDDL HEADER in each
1416d86563SAlexander Pyhalov  * file and include the License file at src/OPENSOLARIS.LICENSE.
1516d86563SAlexander Pyhalov  * If applicable, add the following below this CDDL HEADER, with the
1616d86563SAlexander Pyhalov  * fields enclosed by brackets "[]" replaced with your own identifying
1716d86563SAlexander Pyhalov  * information: Portions Copyright [yyyy] [name of copyright owner]
1816d86563SAlexander Pyhalov  *
1916d86563SAlexander Pyhalov  * CDDL HEADER END
2016d86563SAlexander Pyhalov  */
2116d86563SAlexander Pyhalov /*
2216d86563SAlexander Pyhalov  * COPYRIGHT AND PERMISSION NOTICE
2316d86563SAlexander Pyhalov  *
2416d86563SAlexander Pyhalov  * Copyright (c) 1991-2005 Unicode, Inc. All rights reserved. Distributed
2516d86563SAlexander Pyhalov  * under the Terms of Use in http://www.unicode.org/copyright.html.
2616d86563SAlexander Pyhalov  *
2716d86563SAlexander Pyhalov  * This file has been modified by Sun Microsystems, Inc.
2816d86563SAlexander Pyhalov  */
2916d86563SAlexander Pyhalov /*
3016d86563SAlexander Pyhalov  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
3116d86563SAlexander Pyhalov  * Use is subject to license terms.
3216d86563SAlexander Pyhalov  */
3316d86563SAlexander Pyhalov 
3416d86563SAlexander Pyhalov 
3516d86563SAlexander Pyhalov #include	<sys/types.h>
3616d86563SAlexander Pyhalov 
3716d86563SAlexander Pyhalov #if	defined(JFP_ICONV_FROMCODE_UTF32BE)||defined(JFP_ICONV_FROMCODE_UTF32LE)
3816d86563SAlexander Pyhalov #define	JFP_ICONV_FROMCODE_UTF32
3916d86563SAlexander Pyhalov #endif
4016d86563SAlexander Pyhalov 
4116d86563SAlexander Pyhalov #if	defined(JFP_ICONV_FROMCODE_UTF16BE)||defined(JFP_ICONV_FROMCODE_UTF16LE)
4216d86563SAlexander Pyhalov #define	JFP_ICONV_FROMCODE_UTF16
4316d86563SAlexander Pyhalov #endif
4416d86563SAlexander Pyhalov 
4516d86563SAlexander Pyhalov #if	defined(JFP_ICONV_FROMCODE_UCS2BE)||defined(JFP_ICONV_FROMCODE_UCS2LE)
4616d86563SAlexander Pyhalov #define	JFP_ICONV_FROMCODE_UCS2
4716d86563SAlexander Pyhalov #endif
4816d86563SAlexander Pyhalov 
4916d86563SAlexander Pyhalov #if	defined(JFP_ICONV_TOCODE_UTF32BE)||defined(JFP_ICONV_TOCODE_UTF32LE)
5016d86563SAlexander Pyhalov #define	JFP_ICONV_TOCODE_UTF32
5116d86563SAlexander Pyhalov #endif
5216d86563SAlexander Pyhalov 
5316d86563SAlexander Pyhalov #if	defined(JFP_ICONV_TOCODE_UTF16BE)||defined(JFP_ICONV_TOCODE_UTF16LE)
5416d86563SAlexander Pyhalov #define	JFP_ICONV_TOCODE_UTF16
5516d86563SAlexander Pyhalov #endif
5616d86563SAlexander Pyhalov 
5716d86563SAlexander Pyhalov #if	defined(JFP_ICONV_TOCODE_UCS2BE)||defined(JFP_ICONV_TOCODE_UCS2LE)
5816d86563SAlexander Pyhalov #define	JFP_ICONV_TOCODE_UCS2
5916d86563SAlexander Pyhalov #endif
6016d86563SAlexander Pyhalov 
6116d86563SAlexander Pyhalov 
6216d86563SAlexander Pyhalov #define	BOM	0xfeff
6316d86563SAlexander Pyhalov #define	BSBOM16	0xfffe
6416d86563SAlexander Pyhalov #define	BSBOM32	0xfffe0000
6516d86563SAlexander Pyhalov #define	REPLACE	0xfffd
6616d86563SAlexander Pyhalov #define	IFHISUR(x)	((0xd800 <= (x)) && ((x) <= 0xdbff))
6716d86563SAlexander Pyhalov #define	IFLOSUR(x)	((0xdc00 <= (x)) && ((x) <= 0xdfff))
6816d86563SAlexander Pyhalov 
6916d86563SAlexander Pyhalov typedef struct {
7016d86563SAlexander Pyhalov 	boolean_t         bom_written;
7116d86563SAlexander Pyhalov 	boolean_t         little_endian;
7216d86563SAlexander Pyhalov } ucs_state_t;
7316d86563SAlexander Pyhalov 
7416d86563SAlexander Pyhalov 
7516d86563SAlexander Pyhalov #if	defined(JFP_ICONV_FROMCODE_UTF32)
7616d86563SAlexander Pyhalov 
7716d86563SAlexander Pyhalov static size_t				/* return #bytes read, or -1 */
read_unicode(unsigned int * p,unsigned char ** pip,size_t * pileft,ucs_state_t * state)7816d86563SAlexander Pyhalov read_unicode(
7916d86563SAlexander Pyhalov 	unsigned int	*p,		/* point variable to store UTF-32 */
8016d86563SAlexander Pyhalov 	unsigned char	**pip,		/* point pointer to input buf */
8116d86563SAlexander Pyhalov 	size_t		*pileft,	/* point #bytes left in input buf */
8216d86563SAlexander Pyhalov 	ucs_state_t	*state)		/* BOM state and endian */
8316d86563SAlexander Pyhalov {
8416d86563SAlexander Pyhalov 	unsigned char	*ip = *pip;
8516d86563SAlexander Pyhalov 	size_t		ileft = *pileft;
8616d86563SAlexander Pyhalov 	size_t		rv = (size_t)0; /* return value */
8716d86563SAlexander Pyhalov 	unsigned char	ic1, ic2, ic3, ic4;	/* bytes read */
8816d86563SAlexander Pyhalov 	unsigned int	u32;		/* resulted UTF-32 */
8916d86563SAlexander Pyhalov 
9016d86563SAlexander Pyhalov 	NGET(ic1, "UTF32-1");
9116d86563SAlexander Pyhalov 	NGET(ic2, "UTF32-2");
9216d86563SAlexander Pyhalov 	NGET(ic3, "UTF32-3");
9316d86563SAlexander Pyhalov 	NGET(ic4, "UTF32-4");
9416d86563SAlexander Pyhalov 
9516d86563SAlexander Pyhalov 	if (state->bom_written == B_FALSE) {
9616d86563SAlexander Pyhalov 		u32 = 0U;
9716d86563SAlexander Pyhalov 		u32 |= (unsigned int)ic1 << 24;
9816d86563SAlexander Pyhalov 		u32 |= (unsigned int)ic2 << 16;
9916d86563SAlexander Pyhalov 		u32 |= (unsigned int)ic3 << 8;
10016d86563SAlexander Pyhalov 		u32 |= (unsigned int)ic4 << 0;
10116d86563SAlexander Pyhalov 		if (u32 == BOM) {
10216d86563SAlexander Pyhalov 			state->bom_written = B_TRUE;
10316d86563SAlexander Pyhalov 			state->little_endian = B_FALSE;
10416d86563SAlexander Pyhalov 			*p = BOM;
10516d86563SAlexander Pyhalov 			rv = (size_t)0;
10616d86563SAlexander Pyhalov 			goto ret;
10716d86563SAlexander Pyhalov 		} else if (u32 == BSBOM32) {
10816d86563SAlexander Pyhalov 			state->bom_written = B_TRUE;
10916d86563SAlexander Pyhalov 			state->little_endian = B_TRUE;
11016d86563SAlexander Pyhalov 			*p = BOM;
11116d86563SAlexander Pyhalov 			rv = (size_t)0;
11216d86563SAlexander Pyhalov 			goto ret;
11316d86563SAlexander Pyhalov 		} else {
11416d86563SAlexander Pyhalov 			state->bom_written = B_TRUE;
11516d86563SAlexander Pyhalov 		}
11616d86563SAlexander Pyhalov 	}
11716d86563SAlexander Pyhalov 
11816d86563SAlexander Pyhalov 	if (state->little_endian == B_TRUE) {
11916d86563SAlexander Pyhalov 		u32 = 0U;
12016d86563SAlexander Pyhalov 		u32 |= (unsigned int)ic1 << 0;
12116d86563SAlexander Pyhalov 		u32 |= (unsigned int)ic2 << 8;
12216d86563SAlexander Pyhalov 		u32 |= (unsigned int)ic3 << 16;
12316d86563SAlexander Pyhalov 		u32 |= (unsigned int)ic4 << 24;
12416d86563SAlexander Pyhalov 	} else {
12516d86563SAlexander Pyhalov 		u32 = 0U;
12616d86563SAlexander Pyhalov 		u32 |= (unsigned int)ic1 << 24;
12716d86563SAlexander Pyhalov 		u32 |= (unsigned int)ic2 << 16;
12816d86563SAlexander Pyhalov 		u32 |= (unsigned int)ic3 << 8;
12916d86563SAlexander Pyhalov 		u32 |= (unsigned int)ic4 << 0;
13016d86563SAlexander Pyhalov 	}
13116d86563SAlexander Pyhalov 
13216d86563SAlexander Pyhalov 	if (u32 == BSBOM32) {
13316d86563SAlexander Pyhalov 		RETERROR(EILSEQ, "byte-swapped BOM detected")
13416d86563SAlexander Pyhalov 	}
13516d86563SAlexander Pyhalov 
13616d86563SAlexander Pyhalov 	if ((u32 == 0xfffe) || (u32 == 0xffff) || (u32 > 0x10ffff)
13716d86563SAlexander Pyhalov 			|| IFHISUR(u32) || IFLOSUR(u32)) {
13816d86563SAlexander Pyhalov 		RETERROR(EILSEQ, "illegal in UTF-32")
13916d86563SAlexander Pyhalov 	}
14016d86563SAlexander Pyhalov 
14116d86563SAlexander Pyhalov 	*p = u32;
14216d86563SAlexander Pyhalov 	rv = *pileft - ileft;
14316d86563SAlexander Pyhalov 
14416d86563SAlexander Pyhalov ret:
14516d86563SAlexander Pyhalov 	if (rv != (size_t)-1) {
14616d86563SAlexander Pyhalov 		/* update *pip and *pileft only on successful return */
14716d86563SAlexander Pyhalov 		*pip = ip;
14816d86563SAlexander Pyhalov 		*pileft = ileft;
14916d86563SAlexander Pyhalov 	}
15016d86563SAlexander Pyhalov 
15116d86563SAlexander Pyhalov 	return (rv);
15216d86563SAlexander Pyhalov }
15316d86563SAlexander Pyhalov 
15416d86563SAlexander Pyhalov #elif	defined(JFP_ICONV_FROMCODE_UTF16) || defined(JFP_ICONV_FROMCODE_UCS2)
15516d86563SAlexander Pyhalov 
15616d86563SAlexander Pyhalov static size_t				/* return #bytes read, or -1 */
read_unicode(unsigned int * p,unsigned char ** pip,size_t * pileft,ucs_state_t * state)15716d86563SAlexander Pyhalov read_unicode(
15816d86563SAlexander Pyhalov 	unsigned int	*p,		/* point variable to store UTF-32 */
15916d86563SAlexander Pyhalov 	unsigned char	**pip,		/* point pointer to input buf */
16016d86563SAlexander Pyhalov 	size_t		*pileft,	/* point #bytes left in input buf */
16116d86563SAlexander Pyhalov 	ucs_state_t	*state)		/* BOM state and endian */
16216d86563SAlexander Pyhalov {
16316d86563SAlexander Pyhalov 	unsigned char	*ip = *pip;
16416d86563SAlexander Pyhalov 	size_t		ileft = *pileft;
16516d86563SAlexander Pyhalov 	size_t		rv = (size_t)0; /* return value */
16616d86563SAlexander Pyhalov 	unsigned char	ic1, ic2;	/* bytes read */
16716d86563SAlexander Pyhalov 	unsigned int	u32;		/* resulted UTF-32 */
16816d86563SAlexander Pyhalov #ifndef	JFP_ICONV_FROMCODE_UCS2
16916d86563SAlexander Pyhalov 	unsigned int	losur;		/* low surrogate */
17016d86563SAlexander Pyhalov #endif
17116d86563SAlexander Pyhalov 
17216d86563SAlexander Pyhalov 	NGET(ic1, "UTF16-1");	/* read 1st byte */
17316d86563SAlexander Pyhalov 	NGET(ic2, "UTF16-2");	/* read 2nd byte */
17416d86563SAlexander Pyhalov 
17516d86563SAlexander Pyhalov 	if (state->bom_written == B_FALSE) {
17616d86563SAlexander Pyhalov 		u32 = 0U;
17716d86563SAlexander Pyhalov 		u32 |= (unsigned int)ic1 << 8;
17816d86563SAlexander Pyhalov 		u32 |= (unsigned int)ic2 << 0;
17916d86563SAlexander Pyhalov 		if (u32 == BOM) {
18016d86563SAlexander Pyhalov 			state->bom_written = B_TRUE;
18116d86563SAlexander Pyhalov 			state->little_endian = B_FALSE;
18216d86563SAlexander Pyhalov 			*p = BOM;
18316d86563SAlexander Pyhalov 			rv = (size_t)0;
18416d86563SAlexander Pyhalov 			goto ret;
18516d86563SAlexander Pyhalov 		} else if (u32 == BSBOM16) {
18616d86563SAlexander Pyhalov 			state->bom_written = B_TRUE;
18716d86563SAlexander Pyhalov 			state->little_endian = B_TRUE;
18816d86563SAlexander Pyhalov 			*p = BOM;
18916d86563SAlexander Pyhalov 			rv = (size_t)0;
19016d86563SAlexander Pyhalov 			goto ret;
19116d86563SAlexander Pyhalov 		} else {
19216d86563SAlexander Pyhalov 			state->bom_written = B_TRUE;
19316d86563SAlexander Pyhalov 		}
19416d86563SAlexander Pyhalov 	}
19516d86563SAlexander Pyhalov 
19616d86563SAlexander Pyhalov 	if (state->little_endian == B_TRUE) {
19716d86563SAlexander Pyhalov 		u32 = (((unsigned int)ic2) << 8) | ic1;
19816d86563SAlexander Pyhalov 	} else {
19916d86563SAlexander Pyhalov 		u32 = (((unsigned int)ic1) << 8) | ic2;
20016d86563SAlexander Pyhalov 	}
20116d86563SAlexander Pyhalov 
20216d86563SAlexander Pyhalov 	if (u32 == BSBOM16) {
20316d86563SAlexander Pyhalov 		RETERROR(EILSEQ, "byte-swapped BOM detected")
20416d86563SAlexander Pyhalov 	}
20516d86563SAlexander Pyhalov 
20616d86563SAlexander Pyhalov 	if ((u32 == 0xfffe) || (u32 == 0xffff) || (u32 > 0x10ffff)
20716d86563SAlexander Pyhalov 			|| (IFLOSUR(u32))) {
20816d86563SAlexander Pyhalov 		RETERROR(EILSEQ, "illegal in UTF16")
20916d86563SAlexander Pyhalov 	}
21016d86563SAlexander Pyhalov 
21116d86563SAlexander Pyhalov 	if (IFHISUR(u32)) {
21216d86563SAlexander Pyhalov #if	defined(JFP_ICONV_FROMCODE_UCS2)
21316d86563SAlexander Pyhalov 		RETERROR(EILSEQ, "surrogate is illegal in UCS2")
21416d86563SAlexander Pyhalov #else	/* !defined(JFP_ICONV_FROMCODE_UCS2) */
21516d86563SAlexander Pyhalov 		NGET(ic1, "LOSUR-1");
21616d86563SAlexander Pyhalov 		NGET(ic2, "LOSUR-2");
21716d86563SAlexander Pyhalov 
21816d86563SAlexander Pyhalov 		if (state->little_endian == B_TRUE) {
21916d86563SAlexander Pyhalov 			losur = (((unsigned int)ic2) << 8) | ic1;
22016d86563SAlexander Pyhalov 		} else {
22116d86563SAlexander Pyhalov 			losur = (((unsigned int)ic1) << 8) | ic2;
22216d86563SAlexander Pyhalov 		}
22316d86563SAlexander Pyhalov 
22416d86563SAlexander Pyhalov 		if (IFLOSUR(losur)) {
22516d86563SAlexander Pyhalov 			u32 = ((u32 - 0xd800) * 0x400)
22616d86563SAlexander Pyhalov 				+ (losur - 0xdc00) + 0x10000;
22716d86563SAlexander Pyhalov 		} else {
22816d86563SAlexander Pyhalov 			RETERROR(EILSEQ, "low-surrogate expected")
22916d86563SAlexander Pyhalov 		}
23016d86563SAlexander Pyhalov #endif	/* defined(JFP_ICONV_FROMCODE_UCS2) */
23116d86563SAlexander Pyhalov 	}
23216d86563SAlexander Pyhalov 
23316d86563SAlexander Pyhalov 	*p = u32;
23416d86563SAlexander Pyhalov 	rv = *pileft - ileft;
23516d86563SAlexander Pyhalov 
23616d86563SAlexander Pyhalov ret:
23716d86563SAlexander Pyhalov 	if (rv != (size_t)-1) {
23816d86563SAlexander Pyhalov 		/* update *pip and *pileft only on successful return */
23916d86563SAlexander Pyhalov 		*pip = ip;
24016d86563SAlexander Pyhalov 		*pileft = ileft;
24116d86563SAlexander Pyhalov 	}
24216d86563SAlexander Pyhalov 
24316d86563SAlexander Pyhalov 	return (rv);
24416d86563SAlexander Pyhalov }
24516d86563SAlexander Pyhalov 
24616d86563SAlexander Pyhalov #else	/* JFP_ICONV_FROMCODE_UTF8 (default) */
24716d86563SAlexander Pyhalov 
24816d86563SAlexander Pyhalov /*
24916d86563SAlexander Pyhalov  * The following vector shows remaining bytes in a UTF-8 character.
25016d86563SAlexander Pyhalov  * Index will be the first byte of the character.
25116d86563SAlexander Pyhalov  */
25216d86563SAlexander Pyhalov static const char remaining_bytes_tbl[0x100] = {
25316d86563SAlexander Pyhalov 	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
25416d86563SAlexander Pyhalov 	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
25516d86563SAlexander Pyhalov 	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
25616d86563SAlexander Pyhalov 	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
25716d86563SAlexander Pyhalov 	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
25816d86563SAlexander Pyhalov 	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
25916d86563SAlexander Pyhalov 	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
26016d86563SAlexander Pyhalov 	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
26116d86563SAlexander Pyhalov 	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
26216d86563SAlexander Pyhalov 	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
26316d86563SAlexander Pyhalov 	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
26416d86563SAlexander Pyhalov 	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
26516d86563SAlexander Pyhalov 
26616d86563SAlexander Pyhalov    /*  C0  C1  C2  C3  C4  C5  C6  C7  C8  C9  CA  CB  CC  CD  CE  CF  */
26716d86563SAlexander Pyhalov 	0,  0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
26816d86563SAlexander Pyhalov 
26916d86563SAlexander Pyhalov    /*  D0  D1  D2  D3  D4  D5  D6  D7  D8  D9  DA  DB  DC  DD  DE  DF  */
27016d86563SAlexander Pyhalov 	1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
27116d86563SAlexander Pyhalov 
27216d86563SAlexander Pyhalov    /*  E0  E1  E2  E3  E4  E5  E6  E7  E8  E9  EA  EB  EC  ED  EE  EF  */
27316d86563SAlexander Pyhalov 	2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
27416d86563SAlexander Pyhalov 
27516d86563SAlexander Pyhalov    /*  F0  F1  F2  F3  F4  F5  F6  F7  F8  F9  FA  FB  FC  FD  FE  FF  */
27616d86563SAlexander Pyhalov 	3,  3,  3,  3,  3,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
27716d86563SAlexander Pyhalov };
27816d86563SAlexander Pyhalov 
27916d86563SAlexander Pyhalov 
28016d86563SAlexander Pyhalov /*
28116d86563SAlexander Pyhalov  * The following is a vector of bit-masks to get used bits in
28216d86563SAlexander Pyhalov  * the first byte of a UTF-8 character.  Index is remaining bytes at above of
28316d86563SAlexander Pyhalov  * the character.
28416d86563SAlexander Pyhalov  */
28516d86563SAlexander Pyhalov static const char masks_tbl[6] = { 0x00, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
28616d86563SAlexander Pyhalov 
28716d86563SAlexander Pyhalov 
28816d86563SAlexander Pyhalov /*
28916d86563SAlexander Pyhalov  * The following two vectors are to provide valid minimum and
29016d86563SAlexander Pyhalov  * maximum values for the 2'nd byte of a multibyte UTF-8 character for
29116d86563SAlexander Pyhalov  * better illegal sequence checking. The index value must be the value of
29216d86563SAlexander Pyhalov  * the first byte of the UTF-8 character.
29316d86563SAlexander Pyhalov  */
29416d86563SAlexander Pyhalov static const unsigned char valid_min_2nd_byte[0x100] = {
29516d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
29616d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
29716d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
29816d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
29916d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
30016d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
30116d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
30216d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
30316d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
30416d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
30516d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
30616d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
30716d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
30816d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
30916d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
31016d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
31116d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
31216d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
31316d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
31416d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
31516d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
31616d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
31716d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
31816d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
31916d86563SAlexander Pyhalov      /*  C0    C1    C2    C3    C4    C5    C6    C7  */
32016d86563SAlexander Pyhalov 	0,    0,    0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
32116d86563SAlexander Pyhalov      /*  C8    C9    CA    CB    CC    CD    CE    CF  */
32216d86563SAlexander Pyhalov 	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
32316d86563SAlexander Pyhalov      /*  D0    D1    D2    D3    D4    D5    D6    D7  */
32416d86563SAlexander Pyhalov 	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
32516d86563SAlexander Pyhalov      /*  D8    D9    DA    DB    DC    DD    DE    DF  */
32616d86563SAlexander Pyhalov 	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
32716d86563SAlexander Pyhalov      /*  E0    E1    E2    E3    E4    E5    E6    E7  */
32816d86563SAlexander Pyhalov 	0xa0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
32916d86563SAlexander Pyhalov      /*  E8    E9    EA    EB    EC    ED    EE    EF  */
33016d86563SAlexander Pyhalov 	0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
33116d86563SAlexander Pyhalov      /*  F0    F1    F2    F3    F4    F5    F6    F7  */
33216d86563SAlexander Pyhalov 	0x90, 0x80, 0x80, 0x80, 0x80, 0,    0,    0,
33316d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
33416d86563SAlexander Pyhalov };
33516d86563SAlexander Pyhalov 
33616d86563SAlexander Pyhalov static const unsigned char valid_max_2nd_byte[0x100] = {
33716d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
33816d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
33916d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
34016d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
34116d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
34216d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
34316d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
34416d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
34516d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
34616d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
34716d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
34816d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
34916d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
35016d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
35116d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
35216d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
35316d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
35416d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
35516d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
35616d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
35716d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
35816d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
35916d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
36016d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
36116d86563SAlexander Pyhalov      /*  C0    C1    C2    C3    C4    C5    C6    C7  */
36216d86563SAlexander Pyhalov 	0,    0,    0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
36316d86563SAlexander Pyhalov      /*  C8    C9    CA    CB    CC    CD    CE    CF  */
36416d86563SAlexander Pyhalov 	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
36516d86563SAlexander Pyhalov      /*  D0    D1    D2    D3    D4    D5    D6    D7  */
36616d86563SAlexander Pyhalov 	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
36716d86563SAlexander Pyhalov      /*  D8    D9    DA    DB    DC    DD    DE    DF  */
36816d86563SAlexander Pyhalov 	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
36916d86563SAlexander Pyhalov      /*  E0    E1    E2    E3    E4    E5    E6    E7  */
37016d86563SAlexander Pyhalov 	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0xbf,
37116d86563SAlexander Pyhalov      /*  E8    E9    EA    EB    EC    ED    EE    EF  */
37216d86563SAlexander Pyhalov 	0xbf, 0xbf, 0xbf, 0xbf, 0xbf, 0x9f, 0xbf, 0xbf,
37316d86563SAlexander Pyhalov      /*  F0    F1    F2    F3    F4    F5    F6    F7  */
37416d86563SAlexander Pyhalov 	0xbf, 0xbf, 0xbf, 0xbf, 0x8f, 0,    0,    0,
37516d86563SAlexander Pyhalov 	0,    0,    0,    0,    0,    0,    0,    0,
37616d86563SAlexander Pyhalov };
37716d86563SAlexander Pyhalov 
37816d86563SAlexander Pyhalov static size_t
utf8_ucs(unsigned int * p,unsigned char ** pip,size_t * pileft)37916d86563SAlexander Pyhalov utf8_ucs(unsigned int *p, unsigned char **pip, size_t *pileft)
38016d86563SAlexander Pyhalov {
38116d86563SAlexander Pyhalov 	unsigned int	l;	/* to be copied to *p on successful return */
38216d86563SAlexander Pyhalov 	unsigned char	ic;	/* current byte */
38316d86563SAlexander Pyhalov 	unsigned char	ic1;	/* 1st byte */
38416d86563SAlexander Pyhalov 	unsigned char	*ip = *pip;	/* next byte to read */
38516d86563SAlexander Pyhalov 	size_t		ileft = *pileft; /* number of bytes available */
38616d86563SAlexander Pyhalov 	size_t		rv = (size_t)0; /* return value of this function */
38716d86563SAlexander Pyhalov 	int		remaining_bytes;
38816d86563SAlexander Pyhalov 
38916d86563SAlexander Pyhalov 	NGET(ic, "no bytes available");	/* read 1st byte */
39016d86563SAlexander Pyhalov 	ic1 = ic;
39116d86563SAlexander Pyhalov 	l = ic1; /* get bits from 1st byte to UCS value */
39216d86563SAlexander Pyhalov 
39316d86563SAlexander Pyhalov 	if (ic1 < 0x80) {
39416d86563SAlexander Pyhalov 		/* successfully converted */
39516d86563SAlexander Pyhalov 		*p = l;
39616d86563SAlexander Pyhalov 		rv = *pileft - ileft;
39716d86563SAlexander Pyhalov 		goto ret;
39816d86563SAlexander Pyhalov 	}
39916d86563SAlexander Pyhalov 
40016d86563SAlexander Pyhalov 	remaining_bytes = remaining_bytes_tbl[ic1];
40116d86563SAlexander Pyhalov 
40216d86563SAlexander Pyhalov 	if (remaining_bytes != 0) {
40316d86563SAlexander Pyhalov 		l &= masks_tbl[remaining_bytes];
40416d86563SAlexander Pyhalov 
40516d86563SAlexander Pyhalov 		for (; remaining_bytes > 0; remaining_bytes--) {
40616d86563SAlexander Pyhalov 			if (ic1 != 0U) {
40716d86563SAlexander Pyhalov 				NGET(ic, "2nd byte of UTF-8");
40816d86563SAlexander Pyhalov 				if ((ic < valid_min_2nd_byte[ic1]) ||
40916d86563SAlexander Pyhalov 					(ic > valid_max_2nd_byte[ic1])) {
41016d86563SAlexander Pyhalov 					RETERROR(EILSEQ, "2nd byte is invalid")
41116d86563SAlexander Pyhalov 				}
41216d86563SAlexander Pyhalov 				ic1 = 0U; /* 2nd byte check done */
41316d86563SAlexander Pyhalov 			} else {
41416d86563SAlexander Pyhalov 				NGET(ic, "3rd or later byte of UTF-8");
41516d86563SAlexander Pyhalov 				if ((ic < 0x80) || (ic > 0xbf)) {
41616d86563SAlexander Pyhalov 				RETERROR(EILSEQ, "3rd or later byte is invalid")
41716d86563SAlexander Pyhalov 				}
41816d86563SAlexander Pyhalov 			}
41916d86563SAlexander Pyhalov 			l = (l << 6) | (ic & 0x3f);
42016d86563SAlexander Pyhalov 		}
42116d86563SAlexander Pyhalov 
42216d86563SAlexander Pyhalov 		/* successfully converted */
42316d86563SAlexander Pyhalov 		*p = l;
42416d86563SAlexander Pyhalov 		rv = *pileft - ileft;
42516d86563SAlexander Pyhalov 		goto ret;
42616d86563SAlexander Pyhalov 	} else {
42716d86563SAlexander Pyhalov 		RETERROR(EILSEQ, "1st byte is invalid")
42816d86563SAlexander Pyhalov 	}
42916d86563SAlexander Pyhalov 
43016d86563SAlexander Pyhalov ret:
43116d86563SAlexander Pyhalov 	if (rv != (size_t)-1) {
43216d86563SAlexander Pyhalov 		/*
43316d86563SAlexander Pyhalov 		 * update *pip and *pileft on successful return
43416d86563SAlexander Pyhalov 		 */
43516d86563SAlexander Pyhalov 		*pip = ip;
43616d86563SAlexander Pyhalov 		*pileft = ileft;
43716d86563SAlexander Pyhalov 	}
43816d86563SAlexander Pyhalov 
43916d86563SAlexander Pyhalov 	return (rv);
44016d86563SAlexander Pyhalov }
44116d86563SAlexander Pyhalov 
44216d86563SAlexander Pyhalov /* for UTF-8 */
44316d86563SAlexander Pyhalov static size_t				/* return #bytes read, or -1 */
read_unicode(unsigned int * p,unsigned char ** pip,size_t * pileft,ucs_state_t * state)44416d86563SAlexander Pyhalov read_unicode(
44516d86563SAlexander Pyhalov 	unsigned int	*p,		/* point variable to store UTF-32 */
44616d86563SAlexander Pyhalov 	unsigned char	**pip,		/* point pointer to input buf */
44716d86563SAlexander Pyhalov 	size_t		*pileft,	/* point #bytes left in input buf */
44816d86563SAlexander Pyhalov 	ucs_state_t	*state)		/* BOM state and endian - unused */
44916d86563SAlexander Pyhalov {
45016d86563SAlexander Pyhalov 	return (utf8_ucs(p, pip, pileft));
45116d86563SAlexander Pyhalov }
45216d86563SAlexander Pyhalov 
45316d86563SAlexander Pyhalov #endif
45416d86563SAlexander Pyhalov 
45516d86563SAlexander Pyhalov #if	defined(JFP_ICONV_TOCODE_UTF32)
45616d86563SAlexander Pyhalov 
45716d86563SAlexander Pyhalov static size_t
write_unicode(unsigned int u32,char ** pop,size_t * poleft,ucs_state_t * state,const char * msg)45816d86563SAlexander Pyhalov write_unicode(
45916d86563SAlexander Pyhalov 	unsigned int	u32,		/* UTF-32 to write */
46016d86563SAlexander Pyhalov 	char		**pop,		/* point pointer to output buf */
46116d86563SAlexander Pyhalov 	size_t		*poleft,	/* point #bytes left in output buf */
46216d86563SAlexander Pyhalov 	ucs_state_t	*state,		/* BOM state and endian */
46316d86563SAlexander Pyhalov 	const char	*msg)		/* debug message */
46416d86563SAlexander Pyhalov {
46516d86563SAlexander Pyhalov 	char		*op = *pop;
46616d86563SAlexander Pyhalov 	size_t		oleft = *poleft;
46716d86563SAlexander Pyhalov 	size_t		rv = (size_t)0;		/* return value */
46816d86563SAlexander Pyhalov 	unsigned char	ic1, ic2, ic3, ic4;	/* bytes to be written */
46916d86563SAlexander Pyhalov 
47016d86563SAlexander Pyhalov 	if (state->bom_written == B_FALSE) {
47116d86563SAlexander Pyhalov 		if (state->little_endian == B_TRUE) {
47216d86563SAlexander Pyhalov 			ic1 = (unsigned char)((BOM >> 0) & 0xff);
47316d86563SAlexander Pyhalov 			ic2 = (unsigned char)((BOM >> 8) & 0xff);
47416d86563SAlexander Pyhalov 			ic3 = (unsigned char)((BOM >> 16) & 0xff);
47516d86563SAlexander Pyhalov 			ic4 = (unsigned char)((BOM >> 24) & 0xff);
47616d86563SAlexander Pyhalov 		} else {
47716d86563SAlexander Pyhalov 			ic1 = (unsigned char)((BOM >> 24) & 0xff);
47816d86563SAlexander Pyhalov 			ic2 = (unsigned char)((BOM >> 16) & 0xff);
47916d86563SAlexander Pyhalov 			ic3 = (unsigned char)((BOM >> 8) & 0xff);
48016d86563SAlexander Pyhalov 			ic4 = (unsigned char)((BOM >> 0) & 0xff);
48116d86563SAlexander Pyhalov 		}
48216d86563SAlexander Pyhalov 		rv += 4;
48316d86563SAlexander Pyhalov 		NPUT(ic1, "BOM32-1")
48416d86563SAlexander Pyhalov 		NPUT(ic2, "BOM32-2")
48516d86563SAlexander Pyhalov 		NPUT(ic3, "BOM32-3")
48616d86563SAlexander Pyhalov 		NPUT(ic4, "BOM32-4")
48716d86563SAlexander Pyhalov 	}
48816d86563SAlexander Pyhalov 
48916d86563SAlexander Pyhalov 	if (state->little_endian == B_TRUE) {
49016d86563SAlexander Pyhalov 		ic1 = (unsigned char)((u32 >> 0) & 0xff);
49116d86563SAlexander Pyhalov 		ic2 = (unsigned char)((u32 >> 8) & 0xff);
49216d86563SAlexander Pyhalov 		ic3 = (unsigned char)((u32 >> 16) & 0xff);
49316d86563SAlexander Pyhalov 		ic4 = (unsigned char)((u32 >> 24) & 0xff);
49416d86563SAlexander Pyhalov 		rv += 4;
49516d86563SAlexander Pyhalov 	} else {
49616d86563SAlexander Pyhalov 		ic1 = (unsigned char)((u32 >> 24) & 0xff);
49716d86563SAlexander Pyhalov 		ic2 = (unsigned char)((u32 >> 16) & 0xff);
49816d86563SAlexander Pyhalov 		ic3 = (unsigned char)((u32 >> 8) & 0xff);
49916d86563SAlexander Pyhalov 		ic4 = (unsigned char)((u32 >> 0) & 0xff);
50016d86563SAlexander Pyhalov 		rv += 4;
50116d86563SAlexander Pyhalov 	}
50216d86563SAlexander Pyhalov 
50316d86563SAlexander Pyhalov 	NPUT(ic1, "UTF32-1")
50416d86563SAlexander Pyhalov 	NPUT(ic2, "UTF32-2")
50516d86563SAlexander Pyhalov 	NPUT(ic3, "UTF32-3")
50616d86563SAlexander Pyhalov 	NPUT(ic4, "UTF32-4")
50716d86563SAlexander Pyhalov 
50816d86563SAlexander Pyhalov ret:
50916d86563SAlexander Pyhalov 	if (rv != (size_t)-1) {
51016d86563SAlexander Pyhalov 		/* update *pop and *poleft only on successful return */
51116d86563SAlexander Pyhalov 		*pop = op;
51216d86563SAlexander Pyhalov 		*poleft = oleft;
51316d86563SAlexander Pyhalov 		if (state->bom_written == B_FALSE)
51416d86563SAlexander Pyhalov 			state->bom_written = B_TRUE;
51516d86563SAlexander Pyhalov 	}
51616d86563SAlexander Pyhalov 
51716d86563SAlexander Pyhalov 	return (rv);
51816d86563SAlexander Pyhalov }
51916d86563SAlexander Pyhalov 
52016d86563SAlexander Pyhalov #elif	defined(JFP_ICONV_TOCODE_UTF16) || defined(JFP_ICONV_TOCODE_UCS2)
52116d86563SAlexander Pyhalov 
52216d86563SAlexander Pyhalov static size_t
write_unicode(unsigned int u32,char ** pop,size_t * poleft,ucs_state_t * state,const char * msg)52316d86563SAlexander Pyhalov write_unicode(
52416d86563SAlexander Pyhalov 	unsigned int	u32,		/* UTF-32 to write */
52516d86563SAlexander Pyhalov 	char		**pop,		/* point pointer to output buf */
52616d86563SAlexander Pyhalov 	size_t		*poleft,	/* point #bytes left in output buf */
52716d86563SAlexander Pyhalov 	ucs_state_t	*state,		/* BOM state and endian */
52816d86563SAlexander Pyhalov 	const char	*msg)		/* debug message */
52916d86563SAlexander Pyhalov {
53016d86563SAlexander Pyhalov 	char		*op = *pop;
53116d86563SAlexander Pyhalov 	size_t		oleft = *poleft;
53216d86563SAlexander Pyhalov 	size_t		rv = (size_t)0;	/* return value */
53316d86563SAlexander Pyhalov 	unsigned char	ic1, ic2;	/* bytes to be written */
53416d86563SAlexander Pyhalov 	unsigned int	losur = 0U;		/* Hi/Lo surrogates */
53516d86563SAlexander Pyhalov 
53616d86563SAlexander Pyhalov 	if (state->bom_written == B_FALSE) {
53716d86563SAlexander Pyhalov 		if (state->little_endian == B_TRUE) {
53816d86563SAlexander Pyhalov 			ic1 = (unsigned char)((BOM >> 0) & 0xff);
53916d86563SAlexander Pyhalov 			ic2 = (unsigned char)((BOM >> 8) & 0xff);
54016d86563SAlexander Pyhalov 		} else {
54116d86563SAlexander Pyhalov 			ic1 = (unsigned char)((BOM >> 8) & 0xff);
54216d86563SAlexander Pyhalov 			ic2 = (unsigned char)((BOM >> 0) & 0xff);
54316d86563SAlexander Pyhalov 		}
54416d86563SAlexander Pyhalov 		rv += 2;
54516d86563SAlexander Pyhalov 		NPUT(ic1, "BOM16-1")
54616d86563SAlexander Pyhalov 		NPUT(ic2, "BOM16-2")
54716d86563SAlexander Pyhalov 	}
54816d86563SAlexander Pyhalov 
54916d86563SAlexander Pyhalov 	if (u32 > 0xffff) {
55016d86563SAlexander Pyhalov #if	defined(JFP_ICONV_TOCODE_UCS2)
55116d86563SAlexander Pyhalov 		u32 = REPLACE;
55216d86563SAlexander Pyhalov #else	/* !defined(JFP_ICONV_TOCODE_UCS2) */
55316d86563SAlexander Pyhalov 		losur = ((u32 - 0x10000) % 0x400) + 0xdc00;
55416d86563SAlexander Pyhalov 		u32 = ((u32 - 0x10000) / 0x400) + 0xd800;
55516d86563SAlexander Pyhalov #endif	/* defined(JFP_ICONV_TOCODE_UCS2) */
55616d86563SAlexander Pyhalov 	}
55716d86563SAlexander Pyhalov 
55816d86563SAlexander Pyhalov 	if (state->little_endian == B_TRUE) {
55916d86563SAlexander Pyhalov 		ic1 = (unsigned char)(u32 & 0xff);
56016d86563SAlexander Pyhalov 		ic2 = (unsigned char)((u32 >> 8) & 0xff);
56116d86563SAlexander Pyhalov 		rv += 2;
56216d86563SAlexander Pyhalov 	} else {
56316d86563SAlexander Pyhalov 		ic1 = (unsigned char)((u32 >> 8) & 0xff);
56416d86563SAlexander Pyhalov 		ic2 = (unsigned char)(u32 & 0xff);
56516d86563SAlexander Pyhalov 		rv += 2;
56616d86563SAlexander Pyhalov 	}
56716d86563SAlexander Pyhalov 
56816d86563SAlexander Pyhalov 	NPUT(ic1, "UTF16-1")
56916d86563SAlexander Pyhalov 	NPUT(ic2, "UTF16-2")
57016d86563SAlexander Pyhalov 
57116d86563SAlexander Pyhalov 	if (losur != 0U) {
57216d86563SAlexander Pyhalov 		if (state->little_endian == B_TRUE) {
57316d86563SAlexander Pyhalov 			ic1 = (unsigned char)(losur & 0xff);
57416d86563SAlexander Pyhalov 			ic2 = (unsigned char)((losur >> 8) & 0xff);
57516d86563SAlexander Pyhalov 			rv += 2;
57616d86563SAlexander Pyhalov 		} else {
57716d86563SAlexander Pyhalov 			ic1 = (unsigned char)((losur >> 8) & 0xff);
57816d86563SAlexander Pyhalov 			ic2 = (unsigned char)(losur & 0xff);
57916d86563SAlexander Pyhalov 			rv += 2;
58016d86563SAlexander Pyhalov 		}
58116d86563SAlexander Pyhalov 
58216d86563SAlexander Pyhalov 		NPUT(ic1, "LOSUR-1")
58316d86563SAlexander Pyhalov 		NPUT(ic2, "LOSUR-2")
58416d86563SAlexander Pyhalov 	}
58516d86563SAlexander Pyhalov 
58616d86563SAlexander Pyhalov 
58716d86563SAlexander Pyhalov ret:
58816d86563SAlexander Pyhalov 	if (rv != (size_t)-1) {
58916d86563SAlexander Pyhalov 		/* update *pop and *poleft only on successful return */
59016d86563SAlexander Pyhalov 		*pop = op;
59116d86563SAlexander Pyhalov 		*poleft = oleft;
59216d86563SAlexander Pyhalov 		if (state->bom_written == B_FALSE)
59316d86563SAlexander Pyhalov 			state->bom_written = B_TRUE;
59416d86563SAlexander Pyhalov 	}
59516d86563SAlexander Pyhalov 
59616d86563SAlexander Pyhalov 	return (rv);
59716d86563SAlexander Pyhalov }
59816d86563SAlexander Pyhalov 
59916d86563SAlexander Pyhalov #else	/* JFP_ICONV_TOCODE_UTF8 (default) */
60016d86563SAlexander Pyhalov 
60116d86563SAlexander Pyhalov static size_t
write_unicode(unsigned int u32,char ** pop,size_t * poleft,ucs_state_t * state,const char * msg)60216d86563SAlexander Pyhalov write_unicode(
60316d86563SAlexander Pyhalov 	unsigned int	u32,		/* UTF-32 to write */
60416d86563SAlexander Pyhalov 	char		**pop,		/* point pointer to output buf */
60516d86563SAlexander Pyhalov 	size_t		*poleft,	/* point #bytes left in output buf */
60616d86563SAlexander Pyhalov 	ucs_state_t	*state,		/* BOM state and endian - unused */
60716d86563SAlexander Pyhalov 	const char	*msg)		/* debug message */
60816d86563SAlexander Pyhalov {
60916d86563SAlexander Pyhalov 	char	*op = *pop;
61016d86563SAlexander Pyhalov 	size_t	oleft = *poleft;
61116d86563SAlexander Pyhalov 	size_t	rv = 0;			/* return value */
61216d86563SAlexander Pyhalov 
61316d86563SAlexander Pyhalov 	if (u32 <= 0x7f) {
61416d86563SAlexander Pyhalov 		NPUT((unsigned char)(u32), msg);
61516d86563SAlexander Pyhalov 		rv = 1;
61616d86563SAlexander Pyhalov 	} else if (u32 <= 0x7ff) {
61716d86563SAlexander Pyhalov 		NPUT((unsigned char)((((u32)>>6) & 0x1f) | 0xc0), msg);
61816d86563SAlexander Pyhalov 		NPUT((unsigned char)((((u32)>>0) & 0x3f) | 0x80), msg);
61916d86563SAlexander Pyhalov 		rv = 2;
62016d86563SAlexander Pyhalov 	} else if ((u32 >= 0xd800) && (u32 <= 0xdfff)) {
62116d86563SAlexander Pyhalov 		RETERROR(EILSEQ, "surrogate in UTF-8")
62216d86563SAlexander Pyhalov 	} else if (u32 <= 0xffff) {
62316d86563SAlexander Pyhalov 		NPUT((unsigned char)((((u32)>>12) & 0x0f) | 0xe0), msg);
62416d86563SAlexander Pyhalov 		NPUT((unsigned char)((((u32)>>6) & 0x3f) | 0x80), msg);
62516d86563SAlexander Pyhalov 		NPUT((unsigned char)((((u32)>>0) & 0x3f) | 0x80), msg);
62616d86563SAlexander Pyhalov 		rv = 3;
62716d86563SAlexander Pyhalov 	} else if (u32 <= 0x10ffff) {
62816d86563SAlexander Pyhalov 		NPUT((unsigned char)((((u32)>>18) & 0x07) | 0xf0), msg);
62916d86563SAlexander Pyhalov 		NPUT((unsigned char)((((u32)>>12) & 0x3f) | 0x80), msg);
63016d86563SAlexander Pyhalov 		NPUT((unsigned char)((((u32)>>6) & 0x3f) | 0x80), msg);
63116d86563SAlexander Pyhalov 		NPUT((unsigned char)((((u32)>>0) & 0x3f) | 0x80), msg);
63216d86563SAlexander Pyhalov 		rv = 4;
63316d86563SAlexander Pyhalov 	} else {
63416d86563SAlexander Pyhalov 		RETERROR(EILSEQ, "beyond range of UTF-8")
63516d86563SAlexander Pyhalov 	}
63616d86563SAlexander Pyhalov 
63716d86563SAlexander Pyhalov ret:
63816d86563SAlexander Pyhalov 	if (rv != (size_t)-1) {
63916d86563SAlexander Pyhalov 		/* update *pop and *poleft only on successful return */
64016d86563SAlexander Pyhalov 		*pop = op;
64116d86563SAlexander Pyhalov 		*poleft = oleft;
64216d86563SAlexander Pyhalov 	}
64316d86563SAlexander Pyhalov 
64416d86563SAlexander Pyhalov 	return (rv);
64516d86563SAlexander Pyhalov }
64616d86563SAlexander Pyhalov 
64716d86563SAlexander Pyhalov #endif
64816d86563SAlexander Pyhalov 
64916d86563SAlexander Pyhalov #define	GETU(pu32) \
65016d86563SAlexander Pyhalov 	switch (read_unicode(pu32, &ip, &ileft, (ucs_state_t *)cd)) { \
65116d86563SAlexander Pyhalov 	case (size_t)-1: \
65216d86563SAlexander Pyhalov 		/* errno has been set in read_unicode() */ \
65316d86563SAlexander Pyhalov 		rv = (size_t)-1; \
65416d86563SAlexander Pyhalov 		goto ret; \
65516d86563SAlexander Pyhalov 	case (size_t)0: \
65616d86563SAlexander Pyhalov 		/* character read was handled in the read_unicode() */ \
65716d86563SAlexander Pyhalov 		/* no further evaluation needed in caller side */ \
65816d86563SAlexander Pyhalov 		rv = (size_t)0; \
65916d86563SAlexander Pyhalov 		goto next; \
66016d86563SAlexander Pyhalov 	default: \
66116d86563SAlexander Pyhalov 		break; \
66216d86563SAlexander Pyhalov 	}
66316d86563SAlexander Pyhalov 
66416d86563SAlexander Pyhalov 
66516d86563SAlexander Pyhalov #define	PUTU(u32, msg)	\
66616d86563SAlexander Pyhalov 	if (write_unicode(u32, &op, &oleft, (ucs_state_t *)cd, msg) \
66716d86563SAlexander Pyhalov 			== (size_t)-1) { \
66816d86563SAlexander Pyhalov 		rv = ((size_t)-1);\
66916d86563SAlexander Pyhalov 		goto ret; \
67016d86563SAlexander Pyhalov 	}
67116d86563SAlexander Pyhalov 
67216d86563SAlexander Pyhalov #include	<stdlib.h>
67316d86563SAlexander Pyhalov 
67416d86563SAlexander Pyhalov static void
_icv_reset_unicode(void * cd)67516d86563SAlexander Pyhalov _icv_reset_unicode(void *cd)
67616d86563SAlexander Pyhalov {
67716d86563SAlexander Pyhalov 	ucs_state_t	*state = (ucs_state_t *)cd;
67816d86563SAlexander Pyhalov 
67916d86563SAlexander Pyhalov #if	defined(JFP_ICONV_FROMCODE_UTF32BE) || \
68016d86563SAlexander Pyhalov 	defined(JFP_ICONV_TOCODE_UTF32BE) || \
68116d86563SAlexander Pyhalov 	defined(JFP_ICONV_FROMCODE_UTF16BE) || \
68216d86563SAlexander Pyhalov 	defined(JFP_ICONV_TOCODE_UTF16BE) || \
68316d86563SAlexander Pyhalov 	defined(JFP_ICONV_FROMCODE_UCS2BE) || \
68416d86563SAlexander Pyhalov 	defined(JFP_ICONV_TOCODE_UCS2BE)
68516d86563SAlexander Pyhalov 	state->little_endian = B_FALSE;
68616d86563SAlexander Pyhalov 	state->bom_written = B_TRUE;
68716d86563SAlexander Pyhalov #elif	defined(JFP_ICONV_FROMCODE_UTF32LE) || \
68816d86563SAlexander Pyhalov 	defined(JFP_ICONV_TOCODE_UTF32LE) || \
68916d86563SAlexander Pyhalov 	defined(JFP_ICONV_FROMCODE_UTF16LE) || \
69016d86563SAlexander Pyhalov 	defined(JFP_ICONV_TOCODE_UTF16LE) || \
69116d86563SAlexander Pyhalov 	defined(JFP_ICONV_FROMCODE_UCS2LE) || \
69216d86563SAlexander Pyhalov 	defined(JFP_ICONV_TOCODE_UCS2LE)
69316d86563SAlexander Pyhalov 	state->little_endian = B_TRUE;
69416d86563SAlexander Pyhalov 	state->bom_written = B_TRUE;
69516d86563SAlexander Pyhalov #elif	defined(_LITTLE_ENDIAN)
69616d86563SAlexander Pyhalov 	state->little_endian = B_TRUE;
69716d86563SAlexander Pyhalov 	state->bom_written = B_FALSE;
698*a026698cSPeter Tribble #elif	defined(_BIG_ENDIAN)
699*a026698cSPeter Tribble 	state->little_endian = B_FALSE;
700*a026698cSPeter Tribble 	state->bom_written = B_FALSE;
70116d86563SAlexander Pyhalov #endif
70216d86563SAlexander Pyhalov 
70316d86563SAlexander Pyhalov 	return;
70416d86563SAlexander Pyhalov }
70516d86563SAlexander Pyhalov 
70616d86563SAlexander Pyhalov static void *
_icv_open_unicode(size_t extsize)70716d86563SAlexander Pyhalov _icv_open_unicode(size_t extsize)
70816d86563SAlexander Pyhalov {
70916d86563SAlexander Pyhalov 	ucs_state_t	*cd;
71016d86563SAlexander Pyhalov 
71116d86563SAlexander Pyhalov 	if ((cd = (ucs_state_t *)calloc(1,
71216d86563SAlexander Pyhalov 			sizeof (ucs_state_t) + extsize)) == NULL) {
71316d86563SAlexander Pyhalov 		errno = ENOMEM;
71416d86563SAlexander Pyhalov 		return ((void *)-1);
71516d86563SAlexander Pyhalov 	}
71616d86563SAlexander Pyhalov 
71716d86563SAlexander Pyhalov 	_icv_reset_unicode((void *)cd);
71816d86563SAlexander Pyhalov 
71916d86563SAlexander Pyhalov 	return ((void *)cd);
72016d86563SAlexander Pyhalov }
72116d86563SAlexander Pyhalov 
72216d86563SAlexander Pyhalov static void
_icv_close_unicode(void * cd)72316d86563SAlexander Pyhalov _icv_close_unicode(void *cd)
72416d86563SAlexander Pyhalov {
72516d86563SAlexander Pyhalov 	if (cd == NULL) {
72616d86563SAlexander Pyhalov 		errno = EBADF;
72716d86563SAlexander Pyhalov 	} else {
72816d86563SAlexander Pyhalov 		free(cd);
72916d86563SAlexander Pyhalov 	}
73016d86563SAlexander Pyhalov 	return;
73116d86563SAlexander Pyhalov }
73216d86563SAlexander Pyhalov 
73316d86563SAlexander Pyhalov static void *
_icv_get_ext(void * cd)73416d86563SAlexander Pyhalov _icv_get_ext(void *cd)
73516d86563SAlexander Pyhalov {
73616d86563SAlexander Pyhalov 	return ((void *)((unsigned char *)cd + sizeof (ucs_state_t)));
73716d86563SAlexander Pyhalov }
738