1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * In this program, we assume that each table entry provided will contain
26 * a valid UCS character, an illegal character, or, a replacement character.
27 * In other words, it is table provider's responsibility to provide
28 * an appropriate mapping for each single byte character in the table since
29 * the program in this file will not do any special checking on the table
30 * component values.
31 *
32 * This particular file is to cover conversions from various single byte
33 * codesets to UCS-2, UCS-2BE, UCS-2LE, UCS-4, UCS-4BE, UCS-4LE, UTF-16,
34 * UTF-16BE, UTF-16LE, UTF-32, UTF-32BE, and UTF-32LE.
35 */
36
37
38 #include <stdlib.h>
39 #include <errno.h>
40 #include <sys/types.h>
41 #include <sys/isa_defs.h>
42 #include "sb_to_ucs.h"
43
44
45 void *
_icv_open()46 _icv_open()
47 {
48 ucs_state_t *cd = (ucs_state_t *)calloc(1, sizeof(ucs_state_t));
49
50 if (cd == (ucs_state_t *)NULL) {
51 errno = ENOMEM;
52 return((void *)-1);
53 }
54
55 #if defined(UTF_16BE) || defined(UCS_2BE) || defined(UCS_4BE) || \
56 defined(UTF_32BE)
57 cd->little_endian = false;
58 cd->bom_written = true;
59 #elif defined(UTF_16LE) || defined(UCS_2LE) || defined(UCS_4LE) || \
60 defined(UTF_32LE)
61 cd->little_endian = true;
62 cd->bom_written = true;
63 #elif defined(_LITTLE_ENDIAN)
64 cd->little_endian = true;
65 #endif
66
67 return((void *)cd);
68 }
69
70
71 void
_icv_close(ucs_state_t * cd)72 _icv_close(ucs_state_t *cd)
73 {
74 if (! cd)
75 errno = EBADF;
76 else
77 free((void *)cd);
78 }
79
80
81 size_t
_icv_iconv(ucs_state_t * cd,char ** inbuf,size_t * inbufleft,char ** outbuf,size_t * outbufleft)82 _icv_iconv(ucs_state_t *cd, char **inbuf, size_t *inbufleft, char **outbuf,
83 size_t *outbufleft)
84 {
85 size_t ret_val = 0;
86 unsigned char *ib;
87 unsigned char *ob;
88 unsigned char *ibtail;
89 unsigned char *obtail;
90 unsigned int u4;
91 #if defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE)
92 unsigned int u4_2;
93 #endif
94 signed char obsz;
95
96
97 if (! cd) {
98 errno = EBADF;
99 return((size_t)-1);
100 }
101
102 if (!inbuf || !(*inbuf)) {
103 #if defined(UCS_2) || defined(UCS_4) || defined(UTF_16) || defined(UTF_32)
104 cd->bom_written = false;
105 #endif
106 return((size_t)0);
107 }
108
109 ib = (unsigned char *)*inbuf;
110 ob = (unsigned char *)*outbuf;
111 ibtail = ib + *inbufleft;
112 obtail = ob + *outbufleft;
113
114 while (ib < ibtail) {
115 u4 = sb_u4_tbl[*ib].u8;
116 #if defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE)
117 u4_2 = 0;
118 #endif
119
120 if (sb_u4_tbl[*ib].size == ICV_TYPE_ILLEGAL_CHAR) {
121 errno = EILSEQ;
122 ret_val = (size_t)-1;
123 break;
124 }
125
126 obsz = (cd->bom_written) ? ICV_FETCH_UCS_SIZE :
127 ICV_FETCH_UCS_SIZE_TWO;
128 #if defined(UCS_2) || defined(UCS_2BE) || defined(UCS_2LE)
129 if (u4 > 0x00ffff) {
130 u4 = ICV_CHAR_UCS2_REPLACEMENT;
131 ret_val++;
132 }
133 #elif defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE)
134 if (u4 > 0x00ffff && u4 < 0x110000) {
135 u4_2 = ((u4 - 0x010000) % 0x400) + 0x00dc00;
136 u4 = ((u4 - 0x010000) / 0x400) + 0x00d800;
137 obsz += 2;
138 } else if (u4 > 0x10ffff) {
139 u4 = ICV_CHAR_UCS2_REPLACEMENT;
140 ret_val++;
141 }
142 #elif defined(UTF_32) || defined(UTF_32BE) || defined(UTF_32LE)
143 if (u4 > 0x10ffff) {
144 u4 = ICV_CHAR_UCS2_REPLACEMENT;
145 ret_val++;
146 }
147 #elif defined(UCS_4) || defined(UCS_4BE) || defined(UCS_4LE)
148 /* do nothing */
149 #else
150 #error "Fatal: one of the UCS macros need to be defined."
151 #endif
152
153 /*
154 * The target values in the conversion tables are in UCS-4
155 * without BOM and so the max target value possible would be
156 * U+7FFFFFFF.
157 */
158 if (u4 == 0x00fffe || u4 == 0x00ffff || u4 > 0x7fffffff ||
159 (u4 >= 0x00d800 && u4 <= 0x00dfff)) {
160 /*
161 * if conversion table is right, this should not
162 * happen.
163 */
164 errno = EILSEQ;
165 ret_val = (size_t)-1;
166 break;
167 }
168
169 if ((obtail - ob) < obsz) {
170 errno = E2BIG;
171 ret_val = (size_t)-1;
172 break;
173 }
174
175 if (cd->little_endian) {
176 if (! cd->bom_written) {
177 *ob++ = (uchar_t)0xff;
178 *ob++ = (uchar_t)0xfe;
179 #if defined(UCS_4) || defined(UCS_4BE) || defined(UCS_4LE) || \
180 defined(UTF_32) || defined(UTF_32BE) || defined(UTF_32LE)
181 *(ushort_t *)ob = (ushort_t)0;
182 ob += 2;
183 #endif
184 cd->bom_written = true;
185 }
186 *ob++ = (uchar_t)(u4 & 0xff);
187 *ob++ = (uchar_t)((u4 >> 8) & 0xff);
188 #if defined(UCS_4) || defined(UCS_4BE) || defined(UCS_4LE) || \
189 defined(UTF_32) || defined(UTF_32BE) || defined(UTF_32LE)
190 *ob++ = (uchar_t)((u4 >> 16) & 0xff);
191 *ob++ = (uchar_t)((u4 >> 24) & 0xff);
192 #elif defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE)
193 if (u4_2) {
194 *ob++ = (uchar_t)(u4_2 & 0xff);
195 *ob++ = (uchar_t)((u4_2 >> 8) & 0xff);
196 }
197 #endif
198 } else {
199 if (! cd->bom_written) {
200 #if defined(UCS_4) || defined(UCS_4BE) || defined(UCS_4LE) || \
201 defined(UTF_32) || defined(UTF_32BE) || defined(UTF_32LE)
202 *(ushort_t *)ob = (ushort_t)0;
203 ob += 2;
204 #endif
205 *ob++ = (uchar_t)0xfe;
206 *ob++ = (uchar_t)0xff;
207 cd->bom_written = true;
208 }
209 #if defined(UCS_4) || defined(UCS_4BE) || defined(UCS_4LE) || \
210 defined(UTF_32) || defined(UTF_32BE) || defined(UTF_32LE)
211 *ob++ = (uchar_t)((u4 >> 24) & 0xff);
212 *ob++ = (uchar_t)((u4 >> 16) & 0xff);
213 #endif
214 *ob++ = (uchar_t)((u4 >> 8) & 0xff);
215 *ob++ = (uchar_t)(u4 & 0xff);
216 #if defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE)
217 if (u4_2) {
218 *ob++ = (uchar_t)((u4_2 >> 8) & 0xff);
219 *ob++ = (uchar_t)(u4_2 & 0xff);
220 }
221 #endif
222 }
223 ib++;
224 }
225
226 *inbuf = (char *)ib;
227 *inbufleft = ibtail - ib;
228 *outbuf = (char *)ob;
229 *outbufleft = obtail - ob;
230
231 return(ret_val);
232 }
233