xref: /illumos-gate/usr/src/lib/iconv_modules/utf-8/common/utf32_to_ucs4.c (revision 16d8656330ae5622ec32e5007f62145ebafdc50f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * This particular file is to cover conversions from UTF-32, UTF-32BE, and
26  * UTF-32LE to UCS-4, UCS-4BE, and UCS-4LE.
27  */
28 
29 
30 #include <stdlib.h>
31 #include <errno.h>
32 #include <sys/types.h>
33 #include <sys/isa_defs.h>
34 
35 /* We include the ucs_to_ucs4.h at the moment. */
36 #include "ucs_to_ucs4.h"
37 
38 
39 void *
_icv_open()40 _icv_open()
41 {
42 	ucs_ucs_state_t *cd;
43 
44 	cd = (ucs_ucs_state_t *)calloc(1, sizeof(ucs_ucs_state_t));
45 	if (cd == (ucs_ucs_state_t *)NULL) {
46 		errno = ENOMEM;
47 		return((void *)-1);
48 	}
49 
50 #if defined(UTF_32BE)
51 	cd->input.little_endian = false;
52 	cd->input.bom_written = true;
53 #elif defined(UTF_32LE)
54 	cd->input.little_endian = true;
55 	cd->input.bom_written = true;
56 #elif defined(_LITTLE_ENDIAN)
57 	cd->input.little_endian = true;
58 #endif
59 
60 #if defined(UCS_4BE)
61 	cd->output.little_endian = false;
62 	cd->output.bom_written = true;
63 #elif defined(UCS_4LE)
64 	cd->output.little_endian = true;
65 	cd->output.bom_written = true;
66 #elif defined(_LITTLE_ENDIAN)
67 	cd->output.little_endian = true;
68 #endif
69 
70 	return((void *)cd);
71 }
72 
73 
74 void
_icv_close(ucs_ucs_state_t * cd)75 _icv_close(ucs_ucs_state_t *cd)
76 {
77 	if (! cd)
78 		errno = EBADF;
79 	else
80 		free((void *)cd);
81 }
82 
83 
84 size_t
_icv_iconv(ucs_ucs_state_t * cd,char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft)85 _icv_iconv(ucs_ucs_state_t *cd, char **inbuf, size_t *inbufleft, char **outbuf,
86                 size_t *outbufleft)
87 {
88 	size_t ret_val = 0;
89 	uchar_t *ib;
90 	uchar_t *ob;
91 	uchar_t *ibtail;
92 	uchar_t *obtail;
93 	uint_t u4;
94 	signed char obsz;
95 	int i;
96 
97 
98 	if (! cd) {
99 		errno = EBADF;
100 		return((size_t)-1);
101 	}
102 
103 	if (!inbuf || !(*inbuf)) {
104 #if defined(UTF_32)
105 		cd->input.bom_written = false;
106 #endif
107 #if defined(UCS_4)
108 		cd->output.bom_written = false;
109 #endif
110 		return((size_t)0);
111 	}
112 
113 	ib = (uchar_t *)*inbuf;
114 	ob = (uchar_t *)*outbuf;
115 	ibtail = ib + *inbufleft;
116 	obtail = ob + *outbufleft;
117 
118 #if defined(UTF_32)
119 	if (! cd->input.bom_written) {
120 		if ((ibtail - ib) < ICV_FETCH_UCS4_SIZE) {
121 			errno = EINVAL;
122 			ret_val = (size_t)-1;
123 			goto need_more_input_err;
124 		}
125 
126 		for (u4 = 0, i = 0; i < ICV_FETCH_UCS4_SIZE; i++)
127 			u4 = (u4 << 8) | ((uint_t)(*(ib + i)));
128 
129 		if (u4 == ICV_BOM_IN_BIG_ENDIAN) {
130 			ib += ICV_FETCH_UCS4_SIZE;
131 			cd->input.little_endian = false;
132 		} else if (u4 == ICV_BOM_IN_LITTLE_ENDIAN_UCS4) {
133 			ib += ICV_FETCH_UCS4_SIZE;
134 			cd->input.little_endian = true;
135 		}
136 	}
137 	cd->input.bom_written = true;
138 #endif
139 
140 
141 	while (ib < ibtail) {
142 		if ((ibtail - ib) < ICV_FETCH_UCS4_SIZE) {
143 			errno = EINVAL;
144 			ret_val = (size_t)-1;
145 			break;
146 		}
147 
148 		u4 = 0;
149 		if (cd->input.little_endian) {
150 			for (i = ICV_FETCH_UCS4_SIZE - 1; i >= 0; i--)
151 				u4 = (u4 << 8) | ((uint_t)(*(ib + i)));
152 		} else {
153 			for (i = 0; i < ICV_FETCH_UCS4_SIZE; i++)
154 				u4 = (u4 << 8) | ((uint_t)(*(ib + i)));
155 		}
156 
157 		if (u4 == 0x00fffe || u4 == 0x00ffff || u4 > 0x10ffff ||
158 		    (u4 >= 0x00d800 && u4 <= 0x00dfff)) {
159 			errno = EILSEQ;
160 			ret_val = (size_t)-1;
161 			goto illegal_char_err;
162 		}
163 
164 		obsz = (cd->output.bom_written) ? 2 : 4;
165 		if ((obtail - ob) < obsz) {
166 			errno = E2BIG;
167 			ret_val = (size_t)-1;
168 			break;
169 		}
170 
171 		if (cd->output.little_endian) {
172 			if (! cd->output.bom_written) {
173 				*ob++ = (uchar_t)0xff;
174 				*ob++ = (uchar_t)0xfe;
175 				*(ushort_t *)ob = (ushort_t)0;
176 				ob += 2;
177 				cd->output.bom_written = true;
178 			}
179 			*ob++ = (uchar_t)(u4 & 0xff);
180 			*ob++ = (uchar_t)((u4 >> 8) & 0xff);
181 			*ob++ = (uchar_t)((u4 >> 16) & 0xff);
182 			*ob++ = (uchar_t)((u4 >> 24) & 0xff);
183 		} else {
184 			if (! cd->output.bom_written) {
185 				*(ushort_t *)ob = (ushort_t)0;
186 				ob += 2;
187 				*ob++ = (uchar_t)0xfe;
188 				*ob++ = (uchar_t)0xff;
189 				cd->output.bom_written = true;
190 			}
191 			*ob++ = (uchar_t)((u4 >> 24) & 0xff);
192 			*ob++ = (uchar_t)((u4 >> 16) & 0xff);
193 			*ob++ = (uchar_t)((u4 >> 8) & 0xff);
194 			*ob++ = (uchar_t)(u4 & 0xff);
195 		}
196 		ib += ICV_FETCH_UCS4_SIZE;
197 	}
198 
199 #if defined(UTF_32)
200 need_more_input_err:
201 #endif
202 illegal_char_err:
203 	*inbuf = (char *)ib;
204 	*inbufleft = ibtail - ib;
205 	*outbuf = (char *)ob;
206 	*outbufleft = obtail - ob;
207 
208 	return(ret_val);
209 }
210