1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * This particular file is to cover conversions from UTF-32, UTF-32BE, and
26 * UTF-32LE to UCS-4, UCS-4BE, and UCS-4LE.
27 */
28
29
30 #include <stdlib.h>
31 #include <errno.h>
32 #include <sys/types.h>
33 #include <sys/isa_defs.h>
34
35 /* We include the ucs_to_ucs4.h at the moment. */
36 #include "ucs_to_ucs4.h"
37
38
39 void *
_icv_open()40 _icv_open()
41 {
42 ucs_ucs_state_t *cd;
43
44 cd = (ucs_ucs_state_t *)calloc(1, sizeof(ucs_ucs_state_t));
45 if (cd == (ucs_ucs_state_t *)NULL) {
46 errno = ENOMEM;
47 return((void *)-1);
48 }
49
50 #if defined(UTF_32BE)
51 cd->input.little_endian = false;
52 cd->input.bom_written = true;
53 #elif defined(UTF_32LE)
54 cd->input.little_endian = true;
55 cd->input.bom_written = true;
56 #elif defined(_LITTLE_ENDIAN)
57 cd->input.little_endian = true;
58 #endif
59
60 #if defined(UCS_4BE)
61 cd->output.little_endian = false;
62 cd->output.bom_written = true;
63 #elif defined(UCS_4LE)
64 cd->output.little_endian = true;
65 cd->output.bom_written = true;
66 #elif defined(_LITTLE_ENDIAN)
67 cd->output.little_endian = true;
68 #endif
69
70 return((void *)cd);
71 }
72
73
74 void
_icv_close(ucs_ucs_state_t * cd)75 _icv_close(ucs_ucs_state_t *cd)
76 {
77 if (! cd)
78 errno = EBADF;
79 else
80 free((void *)cd);
81 }
82
83
84 size_t
_icv_iconv(ucs_ucs_state_t * cd,char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft)85 _icv_iconv(ucs_ucs_state_t *cd, char **inbuf, size_t *inbufleft, char **outbuf,
86 size_t *outbufleft)
87 {
88 size_t ret_val = 0;
89 uchar_t *ib;
90 uchar_t *ob;
91 uchar_t *ibtail;
92 uchar_t *obtail;
93 uint_t u4;
94 signed char obsz;
95 int i;
96
97
98 if (! cd) {
99 errno = EBADF;
100 return((size_t)-1);
101 }
102
103 if (!inbuf || !(*inbuf)) {
104 #if defined(UTF_32)
105 cd->input.bom_written = false;
106 #endif
107 #if defined(UCS_4)
108 cd->output.bom_written = false;
109 #endif
110 return((size_t)0);
111 }
112
113 ib = (uchar_t *)*inbuf;
114 ob = (uchar_t *)*outbuf;
115 ibtail = ib + *inbufleft;
116 obtail = ob + *outbufleft;
117
118 #if defined(UTF_32)
119 if (! cd->input.bom_written) {
120 if ((ibtail - ib) < ICV_FETCH_UCS4_SIZE) {
121 errno = EINVAL;
122 ret_val = (size_t)-1;
123 goto need_more_input_err;
124 }
125
126 for (u4 = 0, i = 0; i < ICV_FETCH_UCS4_SIZE; i++)
127 u4 = (u4 << 8) | ((uint_t)(*(ib + i)));
128
129 if (u4 == ICV_BOM_IN_BIG_ENDIAN) {
130 ib += ICV_FETCH_UCS4_SIZE;
131 cd->input.little_endian = false;
132 } else if (u4 == ICV_BOM_IN_LITTLE_ENDIAN_UCS4) {
133 ib += ICV_FETCH_UCS4_SIZE;
134 cd->input.little_endian = true;
135 }
136 }
137 cd->input.bom_written = true;
138 #endif
139
140
141 while (ib < ibtail) {
142 if ((ibtail - ib) < ICV_FETCH_UCS4_SIZE) {
143 errno = EINVAL;
144 ret_val = (size_t)-1;
145 break;
146 }
147
148 u4 = 0;
149 if (cd->input.little_endian) {
150 for (i = ICV_FETCH_UCS4_SIZE - 1; i >= 0; i--)
151 u4 = (u4 << 8) | ((uint_t)(*(ib + i)));
152 } else {
153 for (i = 0; i < ICV_FETCH_UCS4_SIZE; i++)
154 u4 = (u4 << 8) | ((uint_t)(*(ib + i)));
155 }
156
157 if (u4 == 0x00fffe || u4 == 0x00ffff || u4 > 0x10ffff ||
158 (u4 >= 0x00d800 && u4 <= 0x00dfff)) {
159 errno = EILSEQ;
160 ret_val = (size_t)-1;
161 goto illegal_char_err;
162 }
163
164 obsz = (cd->output.bom_written) ? 2 : 4;
165 if ((obtail - ob) < obsz) {
166 errno = E2BIG;
167 ret_val = (size_t)-1;
168 break;
169 }
170
171 if (cd->output.little_endian) {
172 if (! cd->output.bom_written) {
173 *ob++ = (uchar_t)0xff;
174 *ob++ = (uchar_t)0xfe;
175 *(ushort_t *)ob = (ushort_t)0;
176 ob += 2;
177 cd->output.bom_written = true;
178 }
179 *ob++ = (uchar_t)(u4 & 0xff);
180 *ob++ = (uchar_t)((u4 >> 8) & 0xff);
181 *ob++ = (uchar_t)((u4 >> 16) & 0xff);
182 *ob++ = (uchar_t)((u4 >> 24) & 0xff);
183 } else {
184 if (! cd->output.bom_written) {
185 *(ushort_t *)ob = (ushort_t)0;
186 ob += 2;
187 *ob++ = (uchar_t)0xfe;
188 *ob++ = (uchar_t)0xff;
189 cd->output.bom_written = true;
190 }
191 *ob++ = (uchar_t)((u4 >> 24) & 0xff);
192 *ob++ = (uchar_t)((u4 >> 16) & 0xff);
193 *ob++ = (uchar_t)((u4 >> 8) & 0xff);
194 *ob++ = (uchar_t)(u4 & 0xff);
195 }
196 ib += ICV_FETCH_UCS4_SIZE;
197 }
198
199 #if defined(UTF_32)
200 need_more_input_err:
201 #endif
202 illegal_char_err:
203 *inbuf = (char *)ib;
204 *inbufleft = ibtail - ib;
205 *outbuf = (char *)ob;
206 *outbufleft = obtail - ob;
207
208 return(ret_val);
209 }
210