1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * This particular file is to cover conversions from various UCS formats,
26 * especially, UCS-2, UCS-2BE, UCS-2LE, UTF-16, UTF-16BE, and, UTF-16LE to
27 * another various UCS formats, UCS-4, UCS-4BE, UCS-4LE, UTF-32, UTF-32BE,
28 * and, UTF-32LE.
29 */
30
31
32 #include <stdlib.h>
33 #include <errno.h>
34 #include <sys/types.h>
35 #include <sys/isa_defs.h>
36 #include "ucs_to_ucs4.h"
37
38
39 void *
_icv_open()40 _icv_open()
41 {
42 ucs_ucs_state_t *cd;
43
44 cd = (ucs_ucs_state_t *)calloc(1, sizeof(ucs_ucs_state_t));
45 if (cd == (ucs_ucs_state_t *)NULL) {
46 errno = ENOMEM;
47 return((void *)-1);
48 }
49
50 #if defined(UTF_16BE) || defined(UCS_2BE)
51 cd->input.little_endian = false;
52 cd->input.bom_written = true;
53 #elif defined(UTF_16LE) || defined(UCS_2LE)
54 cd->input.little_endian = true;
55 cd->input.bom_written = true;
56 #elif defined(_LITTLE_ENDIAN)
57 cd->input.little_endian = true;
58 #endif
59
60 #if defined(UCS_4BE) || defined(UTF_32BE)
61 cd->output.little_endian = false;
62 cd->output.bom_written = true;
63 #elif defined(UCS_4LE) || defined(UTF_32LE)
64 cd->output.little_endian = true;
65 cd->output.bom_written = true;
66 #elif defined(_LITTLE_ENDIAN)
67 cd->output.little_endian = true;
68 #endif
69
70 return((void *)cd);
71 }
72
73
74 void
_icv_close(ucs_ucs_state_t * cd)75 _icv_close(ucs_ucs_state_t *cd)
76 {
77 if (! cd)
78 errno = EBADF;
79 else
80 free((void *)cd);
81 }
82
83
84 size_t
_icv_iconv(ucs_ucs_state_t * cd,char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft)85 _icv_iconv(ucs_ucs_state_t *cd, char **inbuf, size_t *inbufleft, char **outbuf,
86 size_t *outbufleft)
87 {
88 size_t ret_val = 0;
89 uchar_t *ib;
90 uchar_t *ob;
91 uchar_t *ibtail;
92 uchar_t *obtail;
93 uint_t u4;
94 uint_t u4_2;
95 register int i;
96
97 if (! cd) {
98 errno = EBADF;
99 return((size_t)-1);
100 }
101
102 if (!inbuf || !(*inbuf)) {
103 #if defined(UCS_2) || defined(UTF_16)
104 cd->input.bom_written = false;
105 #endif
106 #if defined(UCS_4) || defined(UTF_32)
107 cd->output.bom_written = false;
108 #endif
109 return((size_t)0);
110 }
111
112 ib = (uchar_t *)*inbuf;
113 ob = (uchar_t *)*outbuf;
114 ibtail = ib + *inbufleft;
115 obtail = ob + *outbufleft;
116
117 #if defined(UCS_2) || defined(UTF_16)
118 if (! cd->input.bom_written) {
119 if ((ibtail - ib) < ICV_FETCH_UCS_SIZE) {
120 errno = EINVAL;
121 ret_val = (size_t)-1;
122 goto need_more_input_err;
123 }
124
125 for (u4 = 0, i = 0; i < ICV_FETCH_UCS_SIZE; i++)
126 u4 = (u4 << 8) | ((uint_t)(*(ib + i)));
127
128 if (u4 == ICV_BOM_IN_BIG_ENDIAN) {
129 ib += ICV_FETCH_UCS_SIZE;
130 cd->input.little_endian = false;
131 } else if (u4 == ICV_BOM_IN_LITTLE_ENDIAN) {
132 ib += ICV_FETCH_UCS_SIZE;
133 cd->input.little_endian = true;
134 }
135 }
136 cd->input.bom_written = true;
137 #endif
138
139 while (ib < ibtail) {
140 if ((ibtail - ib) < ICV_FETCH_UCS_SIZE) {
141 errno = EINVAL;
142 ret_val = (size_t)-1;
143 break;
144 }
145
146 u4 = u4_2 = 0;
147 if (cd->input.little_endian) {
148 for (i = ICV_FETCH_UCS_SIZE - 1; i >= 0; i--)
149 u4 = (u4 << 8) | ((uint_t)(*(ib + i)));
150 } else {
151 for (i = 0; i < ICV_FETCH_UCS_SIZE; i++)
152 u4 = (u4 << 8) | ((uint_t)(*(ib + i)));
153 }
154
155 #if defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE)
156 if ((u4 >= 0x00dc00 && u4 <= 0x00dfff) || u4 >= 0x00fffe) {
157 errno = EILSEQ;
158 ret_val = (size_t)-1;
159 break;
160 }
161
162 if (u4 >= 0x00d800 && u4 <= 0x00dbff) {
163 if ((ibtail - ib) < ICV_FETCH_UCS_SIZE_TWO) {
164 errno = EINVAL;
165 ret_val = (size_t)-1;
166 break;
167 }
168
169 if (cd->input.little_endian) {
170 for (i = ICV_FETCH_UCS_SIZE_TWO - 1;
171 i >= ICV_FETCH_UCS_SIZE;
172 i--)
173 u4_2 = (u4_2<<8)|((uint_t)(*(ib + i)));
174 } else {
175 for (i = ICV_FETCH_UCS_SIZE;
176 i < ICV_FETCH_UCS_SIZE_TWO;
177 i++)
178 u4_2 = (u4_2<<8)|((uint_t)(*(ib + i)));
179 }
180
181 if (u4_2 < 0x00dc00 || u4_2 > 0x00dfff) {
182 errno = EILSEQ;
183 ret_val = (size_t)-1;
184 break;
185 }
186
187 u4 = ((((u4 - 0x00d800) * 0x400) +
188 (u4_2 - 0x00dc00)) & 0x0fffff) + 0x010000;
189 }
190 #elif defined(UCS_2) || defined(UCS_2BE) || defined(UCS_2LE)
191 if (u4 >= 0x00fffe || (u4 >= 0x00d800 && u4 <= 0x00dfff)) {
192 errno = EILSEQ;
193 ret_val = (size_t)-1;
194 break;
195 }
196 #elif defined(UCS_4) || defined(UCS_4BE) || defined(UCS_4LE) || \
197 defined(UTF_32) || defined(UTF_32BE) || defined(UTF_32LE)
198 /*
199 * We do nothing here since these if expressions are
200 * only for input characters particularly of
201 * UCS-2, UCS-2BE, UCS-2LE, UTF-16, UTF-16BE, and
202 * UTF-16LE.
203 */
204 #else
205 #error "Fatal: one of the UCS macros need to be defined."
206 #endif
207
208 if ((obtail - ob) < ((cd->output.bom_written) ? 4 : 8)) {
209 errno = E2BIG;
210 ret_val = (size_t)-1;
211 break;
212 }
213
214 if (cd->output.little_endian) {
215 if (! cd->output.bom_written) {
216 *ob++ = (uchar_t)0xff;
217 *ob++ = (uchar_t)0xfe;
218 *(ushort_t *)ob = (ushort_t)0;
219 ob += 2;
220 cd->output.bom_written = true;
221 }
222 *ob++ = (uchar_t)(u4 & 0xff);
223 *ob++ = (uchar_t)((u4 >> 8) & 0xff);
224 *ob++ = (uchar_t)((u4 >> 16) & 0xff);
225 *ob++ = (uchar_t)((u4 >> 24) & 0xff);
226 } else {
227 if (! cd->output.bom_written) {
228 *(ushort_t *)ob = (ushort_t)0;
229 ob += 2;
230 *ob++ = (uchar_t)0xfe;
231 *ob++ = (uchar_t)0xff;
232 cd->output.bom_written = true;
233 }
234 *ob++ = (uchar_t)((u4 >> 24) & 0xff);
235 *ob++ = (uchar_t)((u4 >> 16) & 0xff);
236 *ob++ = (uchar_t)((u4 >> 8) & 0xff);
237 *ob++ = (uchar_t)(u4 & 0xff);
238 }
239 ib += ((u4_2) ? ICV_FETCH_UCS_SIZE_TWO : ICV_FETCH_UCS_SIZE);
240 }
241
242 #if defined(UCS_2) || defined(UTF_16)
243 need_more_input_err:
244 #endif
245 *inbuf = (char *)ib;
246 *inbufleft = ibtail - ib;
247 *outbuf = (char *)ob;
248 *outbufleft = obtail - ob;
249
250 return(ret_val);
251 }
252