xref: /titanic_50/usr/src/lib/iconv_modules/utf-8/common/ucs4_to_utf32.c (revision 880d797826457b77414b37d531cc3e1aa166ecbe)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * This particular file is to cover conversions from UCS-4, UCS-4BE, and
26  * UCS-4LE to UTF-32, UTF-32BE, and UTF-32LE.
27  */
28 
29 
30 #include <stdlib.h>
31 #include <errno.h>
32 #include <sys/types.h>
33 #include <sys/isa_defs.h>
34 
35 /* We include the ucs4_to_ucs.h at the moment. */
36 #include "ucs4_to_ucs.h"
37 
38 
39 void *
_icv_open()40 _icv_open()
41 {
42 	ucs_ucs_state_t *cd;
43 
44 	cd = (ucs_ucs_state_t *)calloc(1, sizeof(ucs_ucs_state_t));
45 	if (cd == (ucs_ucs_state_t *)NULL) {
46 		errno = ENOMEM;
47 		return((void *)-1);
48 	}
49 
50 #if defined(UCS_4BE)
51 	cd->input.little_endian = false;
52 	cd->input.bom_written = true;
53 #elif defined(UCS_4LE)
54 	cd->input.little_endian = true;
55 	cd->input.bom_written = true;
56 #elif defined(_LITTLE_ENDIAN)
57 	cd->input.little_endian = true;
58 #endif
59 
60 #if defined(UTF_32BE)
61 	cd->output.little_endian = false;
62 	cd->output.bom_written = true;
63 #elif defined(UTF_32LE)
64 	cd->output.little_endian = true;
65 	cd->output.bom_written = true;
66 #elif defined(_LITTLE_ENDIAN)
67 	cd->output.little_endian = true;
68 #endif
69 
70 	return((void *)cd);
71 }
72 
73 
74 void
_icv_close(ucs_ucs_state_t * cd)75 _icv_close(ucs_ucs_state_t *cd)
76 {
77 	if (! cd)
78 		errno = EBADF;
79 	else
80 		free((void *)cd);
81 }
82 
83 
84 size_t
_icv_iconv(ucs_ucs_state_t * cd,char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft)85 _icv_iconv(ucs_ucs_state_t *cd, char **inbuf, size_t *inbufleft, char **outbuf,
86                 size_t *outbufleft)
87 {
88 	size_t ret_val = 0;
89 	uchar_t *ib;
90 	uchar_t *ob;
91 	uchar_t *ibtail;
92 	uchar_t *obtail;
93 	uint_t u4;
94 	signed char obsz;
95 	int i;
96 
97 
98 	if (! cd) {
99 		errno = EBADF;
100 		return((size_t)-1);
101 	}
102 
103 	if (!inbuf || !(*inbuf)) {
104 #if defined(UCS_4)
105 		cd->input.bom_written = false;
106 #endif
107 #if defined(UTF_32)
108 		cd->output.bom_written = false;
109 #endif
110 		return((size_t)0);
111 	}
112 
113 	ib = (uchar_t *)*inbuf;
114 	ob = (uchar_t *)*outbuf;
115 	ibtail = ib + *inbufleft;
116 	obtail = ob + *outbufleft;
117 
118 #if defined(UCS_4)
119 	if (! cd->input.bom_written) {
120 		if ((ibtail - ib) < ICV_FETCH_UCS4_SIZE) {
121 			errno = EINVAL;
122 			ret_val = (size_t)-1;
123 			goto need_more_input_err;
124 		}
125 
126 		for (u4 = 0, i = 0; i < ICV_FETCH_UCS4_SIZE; i++)
127 			u4 = (u4 << 8) | ((uint_t)(*(ib + i)));
128 
129 		if (u4 == ICV_BOM_IN_BIG_ENDIAN) {
130 			ib += ICV_FETCH_UCS4_SIZE;
131 			cd->input.little_endian = false;
132 		} else if (u4 == ICV_BOM_IN_LITTLE_ENDIAN_UCS4) {
133 			ib += ICV_FETCH_UCS4_SIZE;
134 			cd->input.little_endian = true;
135 		}
136 	}
137 	cd->input.bom_written = true;
138 #endif
139 
140 
141 	while (ib < ibtail) {
142 		if ((ibtail - ib) < ICV_FETCH_UCS4_SIZE) {
143 			errno = EINVAL;
144 			ret_val = (size_t)-1;
145 			break;
146 		}
147 
148 		u4 = 0;
149 		if (cd->input.little_endian) {
150 			for (i = ICV_FETCH_UCS4_SIZE - 1; i >= 0; i--)
151 				u4 = (u4 << 8) | ((uint_t)(*(ib + i)));
152 		} else {
153 			for (i = 0; i < ICV_FETCH_UCS4_SIZE; i++)
154 				u4 = (u4 << 8) | ((uint_t)(*(ib + i)));
155 		}
156 
157 		if (u4 == 0x00fffe || u4 == 0x00ffff || u4 > 0x7fffffff ||
158 		    (u4 >= 0x00d800 && u4 <= 0x00dfff)) {
159 			errno = EILSEQ;
160 			ret_val = (size_t)-1;
161 			goto illegal_char_err;
162 		}
163 
164 		if (u4 > 0x10ffff) {
165 			u4 = ICV_CHAR_UCS2_REPLACEMENT;
166 			ret_val++;
167 		}
168 
169 		obsz = (cd->output.bom_written) ? 2 : 4;
170 		if ((obtail - ob) < obsz) {
171 			errno = E2BIG;
172 			ret_val = (size_t)-1;
173 			break;
174 		}
175 
176 		if (cd->output.little_endian) {
177 			if (! cd->output.bom_written) {
178 				*ob++ = (uchar_t)0xff;
179 				*ob++ = (uchar_t)0xfe;
180 				*(ushort_t *)ob = (ushort_t)0;
181 				ob += 2;
182 				cd->output.bom_written = true;
183 			}
184 			*ob++ = (uchar_t)(u4 & 0xff);
185 			*ob++ = (uchar_t)((u4 >> 8) & 0xff);
186 			*ob++ = (uchar_t)((u4 >> 16) & 0xff);
187 			*ob++ = (uchar_t)((u4 >> 24) & 0xff);
188 		} else {
189 			if (! cd->output.bom_written) {
190 				*(ushort_t *)ob = (ushort_t)0;
191 				ob += 2;
192 				*ob++ = (uchar_t)0xfe;
193 				*ob++ = (uchar_t)0xff;
194 				cd->output.bom_written = true;
195 			}
196 			*ob++ = (uchar_t)((u4 >> 24) & 0xff);
197 			*ob++ = (uchar_t)((u4 >> 16) & 0xff);
198 			*ob++ = (uchar_t)((u4 >> 8) & 0xff);
199 			*ob++ = (uchar_t)(u4 & 0xff);
200 		}
201 		ib += ICV_FETCH_UCS4_SIZE;
202 	}
203 
204 #if defined(UCS_4)
205 need_more_input_err:
206 #endif
207 illegal_char_err:
208 	*inbuf = (char *)ib;
209 	*inbufleft = ibtail - ib;
210 	*outbuf = (char *)ob;
211 	*outbufleft = obtail - ob;
212 
213 	return(ret_val);
214 }
215