1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * This particular file is to cover conversions from UCS-4, UCS-4BE, UCS-4LE, 26 * UTF-32, UTF-32BE, and UTF-32LE to various other UCS formats, especially, 27 * UCS-2, UCS-2BE, UCS-2LE, UTF-16, UTF-16BE, and UTF-16LE. 28 */ 29 30 31 #include <stdlib.h> 32 #include <errno.h> 33 #include <sys/types.h> 34 #include <sys/isa_defs.h> 35 #include "ucs4_to_ucs.h" 36 37 38 void * 39 _icv_open() 40 { 41 ucs_ucs_state_t *cd; 42 43 cd = (ucs_ucs_state_t *)calloc(1, sizeof(ucs_ucs_state_t)); 44 if (cd == (ucs_ucs_state_t *)NULL) { 45 errno = ENOMEM; 46 return((void *)-1); 47 } 48 49 #if defined(UCS_4BE) || defined(UTF_32BE) 50 cd->input.little_endian = false; 51 cd->input.bom_written = true; 52 #elif defined(UCS_4LE) || defined(UTF_32LE) 53 cd->input.little_endian = true; 54 cd->input.bom_written = true; 55 #elif defined(_LITTLE_ENDIAN) 56 cd->input.little_endian = true; 57 #endif 58 59 #if defined(UTF_16BE) || defined(UCS_2BE) 60 cd->output.little_endian = false; 61 cd->output.bom_written = true; 62 #elif defined(UTF_16LE) || defined(UCS_2LE) 63 cd->output.little_endian = true; 64 cd->output.bom_written = true; 65 #elif defined(_LITTLE_ENDIAN) 66 cd->output.little_endian = true; 67 #endif 68 69 return((void *)cd); 70 } 71 72 73 void 74 _icv_close(ucs_ucs_state_t *cd) 75 { 76 if (! cd) 77 errno = EBADF; 78 else 79 free((void *)cd); 80 } 81 82 83 size_t 84 _icv_iconv(ucs_ucs_state_t *cd, char **inbuf, size_t *inbufleft, char **outbuf, 85 size_t *outbufleft) 86 { 87 size_t ret_val = 0; 88 uchar_t *ib; 89 uchar_t *ob; 90 uchar_t *ibtail; 91 uchar_t *obtail; 92 uint_t u4; 93 uint_t u4_2; 94 signed char obsz; 95 int i; 96 97 98 if (! cd) { 99 errno = EBADF; 100 return((size_t)-1); 101 } 102 103 if (!inbuf || !(*inbuf)) { 104 #if defined(UCS_4) || defined(UTF_32) 105 cd->input.bom_written = false; 106 #endif 107 #if defined(UCS_2) || defined(UTF_16) 108 cd->output.bom_written = false; 109 #endif 110 return((size_t)0); 111 } 112 113 ib = (uchar_t *)*inbuf; 114 ob = (uchar_t *)*outbuf; 115 ibtail = ib + *inbufleft; 116 obtail = ob + *outbufleft; 117 118 #if defined(UCS_4) || defined(UTF_32) 119 if (! cd->input.bom_written) { 120 if ((ibtail - ib) < ICV_FETCH_UCS4_SIZE) { 121 errno = EINVAL; 122 ret_val = (size_t)-1; 123 goto need_more_input_err; 124 } 125 126 for (u4 = 0, i = 0; i < ICV_FETCH_UCS4_SIZE; i++) 127 u4 = (u4 << 8) | ((uint_t)(*(ib + i))); 128 129 if (u4 == ICV_BOM_IN_BIG_ENDIAN) { 130 ib += ICV_FETCH_UCS4_SIZE; 131 cd->input.little_endian = false; 132 } else if (u4 == ICV_BOM_IN_LITTLE_ENDIAN_UCS4) { 133 ib += ICV_FETCH_UCS4_SIZE; 134 cd->input.little_endian = true; 135 } 136 } 137 cd->input.bom_written = true; 138 #endif 139 140 141 while (ib < ibtail) { 142 if ((ibtail - ib) < ICV_FETCH_UCS4_SIZE) { 143 errno = EINVAL; 144 ret_val = (size_t)-1; 145 break; 146 } 147 148 u4 = u4_2 = 0; 149 if (cd->input.little_endian) { 150 for (i = ICV_FETCH_UCS4_SIZE - 1; i >= 0; i--) 151 u4 = (u4 << 8) | ((uint_t)(*(ib + i))); 152 } else { 153 for (i = 0; i < ICV_FETCH_UCS4_SIZE; i++) 154 u4 = (u4 << 8) | ((uint_t)(*(ib + i))); 155 } 156 157 if (u4 == 0x00fffe || u4 == 0x00ffff || 158 #if defined(UTF_32) || defined(UTF_32BE) || defined(UTF_32LE) 159 u4 > 0x10ffff || 160 #elif defined(UCS_4) || defined(UCS_4BE) || defined(UCS_4LE) 161 u4 > 0x7fffffff || 162 #endif 163 (u4 >= 0x00d800 && u4 <= 0x00dfff)) { 164 errno = EILSEQ; 165 ret_val = (size_t)-1; 166 goto illegal_char_err; 167 } 168 169 obsz = (cd->output.bom_written) ? 2 : 4; 170 #if defined(UCS_2) || defined(UCS_2BE) || defined(UCS_2LE) 171 if (u4 > 0x00ffff) { 172 u4 = ICV_CHAR_UCS2_REPLACEMENT; 173 ret_val++; 174 } 175 #elif defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE) 176 if (u4 > 0x10ffff) { 177 u4 = ICV_CHAR_UCS2_REPLACEMENT; 178 ret_val++; 179 } else if (u4 > 0x00ffff) { 180 u4_2 = ((u4 - 0x010000) % 0x400) + 0x00dc00; 181 u4 = ((u4 - 0x010000) / 0x400) + 0x00d800; 182 obsz += 2; 183 } 184 #elif defined(UCS_4) || defined(UCS_4BE) || defined(UCS_4LE) || \ 185 defined(UTF_32) || defined(UTF_32BE) || defined(UTF_32LE) 186 /* 187 * We do nothing here since these if expressions 188 * are only for preparing for output buffer; 189 * macros such as UCS_4/UCS_4BE/UCS_4LE and 190 * UTF_32/UTF_32BE/UTF_32LE are only for input. 191 */ 192 #else 193 #error "Fatal: one of the UCS macros need to be defined." 194 #endif 195 if ((obtail - ob) < obsz) { 196 errno = E2BIG; 197 ret_val = (size_t)-1; 198 break; 199 } 200 201 if (cd->output.little_endian) { 202 if (! cd->output.bom_written) { 203 *ob++ = (uchar_t)0xff; 204 *ob++ = (uchar_t)0xfe; 205 cd->output.bom_written = true; 206 } 207 *ob++ = (uchar_t)(u4 & 0xff); 208 *ob++ = (uchar_t)((u4 >> 8) & 0xff); 209 #if defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE) 210 if (u4_2) { 211 *ob++ = (uchar_t)(u4_2 & 0xff); 212 *ob++ = (uchar_t)((u4_2 >> 8) & 0xff); 213 } 214 #endif 215 } else { 216 if (! cd->output.bom_written) { 217 *ob++ = (uchar_t)0xfe; 218 *ob++ = (uchar_t)0xff; 219 cd->output.bom_written = true; 220 } 221 *ob++ = (uchar_t)((u4 >> 8) & 0xff); 222 *ob++ = (uchar_t)(u4 & 0xff); 223 #if defined(UTF_16) || defined(UTF_16BE) || defined(UTF_16LE) 224 if (u4_2) { 225 *ob++ = (uchar_t)((u4_2 >> 8) & 0xff); 226 *ob++ = (uchar_t)(u4_2 & 0xff); 227 } 228 #endif 229 } 230 ib += ICV_FETCH_UCS4_SIZE; 231 } 232 233 #if defined(UCS_4) || defined(UTF_32) 234 need_more_input_err: 235 #endif 236 illegal_char_err: 237 *inbuf = (char *)ib; 238 *inbufleft = ibtail - ib; 239 *outbuf = (char *)ob; 240 *outbufleft = obtail - ob; 241 242 return(ret_val); 243 } 244