1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * This particular file is to cover conversions from UCS-4, UCS-4BE, UCS-4LE,
26 * UTF-32, UTF-32BE, and UTF-32LE to various other UCS formats, especially,
27 * UCS-2, UCS-2BE, UCS-2LE, UTF-16, UTF-16BE, and UTF-16LE.
28 */
29
30
31 #include <stdlib.h>
32 #include <errno.h>
33 #include <sys/types.h>
34 #include <sys/isa_defs.h>
35 #include "ucs4_to_ucs.h"
36
37
38 void *
_icv_open()39 _icv_open()
40 {
41 ucs_ucs_state_t *cd;
42
43 cd = (ucs_ucs_state_t *)calloc(1, sizeof(ucs_ucs_state_t));
44 if (cd == (ucs_ucs_state_t *)NULL) {
45 errno = ENOMEM;
46 return((void *)-1);
47 }
48
49 #if defined(UCS_4BE) || defined(UTF_32BE)
50 cd->input.little_endian = false;
51 cd->input.bom_written = true;
52 #elif defined(UCS_4LE) || defined(UTF_32LE)
53 cd->input.little_endian = true;
54 cd->input.bom_written = true;
55 #elif defined(_LITTLE_ENDIAN)
56 cd->input.little_endian = true;
57 #endif
58
59 #if defined(UTF_16BE) || defined(UCS_2BE)
60 cd->output.little_endian = false;
61 cd->output.bom_written = true;
62 #elif defined(UTF_16LE) || defined(UCS_2LE)
63 cd->output.little_endian = true;
64 cd->output.bom_written = true;
65 #elif defined(_LITTLE_ENDIAN)
66 cd->output.little_endian = true;
67 #endif
68
69 return((void *)cd);
70 }
71
72
73 void
_icv_close(ucs_ucs_state_t * cd)74 _icv_close(ucs_ucs_state_t *cd)
75 {
76 if (! cd)
77 errno = EBADF;
78 else
79 free((void *)cd);
80 }
81
82
83 size_t
_icv_iconv(ucs_ucs_state_t * cd,char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft)84 _icv_iconv(ucs_ucs_state_t *cd, char **inbuf, size_t *inbufleft, char **outbuf,
85 size_t *outbufleft)
86 {
87 size_t ret_val = 0;
88 uchar_t *ib;
89 uchar_t *ob;
90 uchar_t *ibtail;
91 uchar_t *obtail;
92 uint_t u4;
93 uint_t u4_2;
94 signed char obsz;
95 int i;
96
97
98 if (! cd) {
99 errno = EBADF;
100 return((size_t)-1);
101 }
102
103 if (!inbuf || !(*inbuf)) {
104 #if defined(UCS_4) || defined(UTF_32)
105 cd->input.bom_written = false;
106 #endif
107 #if defined(UCS_2) || defined(UTF_16)
108 cd->output.bom_written = false;
109 #endif
110 return((size_t)0);
111 }
112
113 ib = (uchar_t *)*inbuf;
114 ob = (uchar_t *)*outbuf;
115 ibtail = ib + *inbufleft;
116 obtail = ob + *outbufleft;
117
118 #if defined(UCS_4) || defined(UTF_32)
119 if (! cd->input.bom_written) {
120 if ((ibtail - ib) < ICV_FETCH_UCS4_SIZE) {
121 errno = EINVAL;
122 ret_val = (size_t)-1;
123 goto need_more_input_err;
124 }
125
126 for (u4 = 0, i = 0; i < ICV_FETCH_UCS4_SIZE; i++)
127 u4 = (u4 << 8) | ((uint_t)(*(ib + i)));
128
129 if (u4 == ICV_BOM_IN_BIG_ENDIAN) {
130 ib += ICV_FETCH_UCS4_SIZE;
131 cd->input.little_endian = false;
132 } else if (u4 == ICV_BOM_IN_LITTLE_ENDIAN_UCS4) {
133 ib += ICV_FETCH_UCS4_SIZE;
134 cd->input.little_endian = true;
135 }
136 }
137 cd->input.bom_written = true;
138 #endif
139
140
141 while (ib < ibtail) {
142 if ((ibtail - ib) < ICV_FETCH_UCS4_SIZE) {
143 errno = EINVAL;
144 ret_val = (size_t)-1;
145 break;
146 }
147
148 u4 = u4_2 = 0;
149 if (cd->input.little_endian) {
150 for (i = ICV_FETCH_UCS4_SIZE - 1; i >= 0; i--)
151 u4 = (u4 << 8) | ((uint_t)(*(ib + i)));
152 } else {
153 for (i = 0; i < ICV_FETCH_UCS4_SIZE; i++)
154 u4 = (u4 << 8) | ((uint_t)(*(ib + i)));
155 }
156
157 if (u4 == 0x00fffe || u4 == 0x00ffff ||
158 #if defined(UTF_32) || defined(UTF_32BE) || defined(UTF_32LE)
159 u4 > 0x10ffff ||
160 #elif defined(UCS_4) || defined(UCS_4BE) || defined(UCS_4LE)
161 u4 > 0x7fffffff ||
162 #endif
163 (u4 >= 0x00d800 && u4 <= 0x00dfff)) {
164 errno = EILSEQ;
165 ret_val = (size_t)-1;
166 goto illegal_char_err;
167 }
168
169 obsz = (cd->output.bom_written) ? 2 : 4;
170 #if defined(UCS_2) || defined(UCS_2BE) || defined(UCS_2LE)
171 if (u4 > 0x00ffff) {
172 u4 = ICV_CHAR_UCS2_REPLACEMENT;
173 ret_val++;
174 }
175 #elif defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE)
176 if (u4 > 0x10ffff) {
177 u4 = ICV_CHAR_UCS2_REPLACEMENT;
178 ret_val++;
179 } else if (u4 > 0x00ffff) {
180 u4_2 = ((u4 - 0x010000) % 0x400) + 0x00dc00;
181 u4 = ((u4 - 0x010000) / 0x400) + 0x00d800;
182 obsz += 2;
183 }
184 #elif defined(UCS_4) || defined(UCS_4BE) || defined(UCS_4LE) || \
185 defined(UTF_32) || defined(UTF_32BE) || defined(UTF_32LE)
186 /*
187 * We do nothing here since these if expressions
188 * are only for preparing for output buffer;
189 * macros such as UCS_4/UCS_4BE/UCS_4LE and
190 * UTF_32/UTF_32BE/UTF_32LE are only for input.
191 */
192 #else
193 #error "Fatal: one of the UCS macros need to be defined."
194 #endif
195 if ((obtail - ob) < obsz) {
196 errno = E2BIG;
197 ret_val = (size_t)-1;
198 break;
199 }
200
201 if (cd->output.little_endian) {
202 if (! cd->output.bom_written) {
203 *ob++ = (uchar_t)0xff;
204 *ob++ = (uchar_t)0xfe;
205 cd->output.bom_written = true;
206 }
207 *ob++ = (uchar_t)(u4 & 0xff);
208 *ob++ = (uchar_t)((u4 >> 8) & 0xff);
209 #if defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE)
210 if (u4_2) {
211 *ob++ = (uchar_t)(u4_2 & 0xff);
212 *ob++ = (uchar_t)((u4_2 >> 8) & 0xff);
213 }
214 #endif
215 } else {
216 if (! cd->output.bom_written) {
217 *ob++ = (uchar_t)0xfe;
218 *ob++ = (uchar_t)0xff;
219 cd->output.bom_written = true;
220 }
221 *ob++ = (uchar_t)((u4 >> 8) & 0xff);
222 *ob++ = (uchar_t)(u4 & 0xff);
223 #if defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE)
224 if (u4_2) {
225 *ob++ = (uchar_t)((u4_2 >> 8) & 0xff);
226 *ob++ = (uchar_t)(u4_2 & 0xff);
227 }
228 #endif
229 }
230 ib += ICV_FETCH_UCS4_SIZE;
231 }
232
233 #if defined(UCS_4) || defined(UTF_32)
234 need_more_input_err:
235 #endif
236 illegal_char_err:
237 *inbuf = (char *)ib;
238 *inbufleft = ibtail - ib;
239 *outbuf = (char *)ob;
240 *outbufleft = obtail - ob;
241
242 return(ret_val);
243 }
244