1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * This particular file is to cover conversions from UCS-4, UCS-4BE, and 26 * UCS-4LE to UTF-32, UTF-32BE, and UTF-32LE. 27 */ 28 29 30 #include <stdlib.h> 31 #include <errno.h> 32 #include <sys/types.h> 33 #include <sys/isa_defs.h> 34 35 /* We include the ucs4_to_ucs.h at the moment. */ 36 #include "ucs4_to_ucs.h" 37 38 39 void * 40 _icv_open() 41 { 42 ucs_ucs_state_t *cd; 43 44 cd = (ucs_ucs_state_t *)calloc(1, sizeof(ucs_ucs_state_t)); 45 if (cd == (ucs_ucs_state_t *)NULL) { 46 errno = ENOMEM; 47 return((void *)-1); 48 } 49 50 #if defined(UCS_4BE) 51 cd->input.little_endian = false; 52 cd->input.bom_written = true; 53 #elif defined(UCS_4LE) 54 cd->input.little_endian = true; 55 cd->input.bom_written = true; 56 #elif defined(_LITTLE_ENDIAN) 57 cd->input.little_endian = true; 58 #endif 59 60 #if defined(UTF_32BE) 61 cd->output.little_endian = false; 62 cd->output.bom_written = true; 63 #elif defined(UTF_32LE) 64 cd->output.little_endian = true; 65 cd->output.bom_written = true; 66 #elif defined(_LITTLE_ENDIAN) 67 cd->output.little_endian = true; 68 #endif 69 70 return((void *)cd); 71 } 72 73 74 void 75 _icv_close(ucs_ucs_state_t *cd) 76 { 77 if (! cd) 78 errno = EBADF; 79 else 80 free((void *)cd); 81 } 82 83 84 size_t 85 _icv_iconv(ucs_ucs_state_t *cd, char **inbuf, size_t *inbufleft, char **outbuf, 86 size_t *outbufleft) 87 { 88 size_t ret_val = 0; 89 uchar_t *ib; 90 uchar_t *ob; 91 uchar_t *ibtail; 92 uchar_t *obtail; 93 uint_t u4; 94 signed char obsz; 95 int i; 96 97 98 if (! cd) { 99 errno = EBADF; 100 return((size_t)-1); 101 } 102 103 if (!inbuf || !(*inbuf)) { 104 #if defined(UCS_4) 105 cd->input.bom_written = false; 106 #endif 107 #if defined(UTF_32) 108 cd->output.bom_written = false; 109 #endif 110 return((size_t)0); 111 } 112 113 ib = (uchar_t *)*inbuf; 114 ob = (uchar_t *)*outbuf; 115 ibtail = ib + *inbufleft; 116 obtail = ob + *outbufleft; 117 118 #if defined(UCS_4) 119 if (! cd->input.bom_written) { 120 if ((ibtail - ib) < ICV_FETCH_UCS4_SIZE) { 121 errno = EINVAL; 122 ret_val = (size_t)-1; 123 goto need_more_input_err; 124 } 125 126 for (u4 = 0, i = 0; i < ICV_FETCH_UCS4_SIZE; i++) 127 u4 = (u4 << 8) | ((uint_t)(*(ib + i))); 128 129 if (u4 == ICV_BOM_IN_BIG_ENDIAN) { 130 ib += ICV_FETCH_UCS4_SIZE; 131 cd->input.little_endian = false; 132 } else if (u4 == ICV_BOM_IN_LITTLE_ENDIAN_UCS4) { 133 ib += ICV_FETCH_UCS4_SIZE; 134 cd->input.little_endian = true; 135 } 136 } 137 cd->input.bom_written = true; 138 #endif 139 140 141 while (ib < ibtail) { 142 if ((ibtail - ib) < ICV_FETCH_UCS4_SIZE) { 143 errno = EINVAL; 144 ret_val = (size_t)-1; 145 break; 146 } 147 148 u4 = 0; 149 if (cd->input.little_endian) { 150 for (i = ICV_FETCH_UCS4_SIZE - 1; i >= 0; i--) 151 u4 = (u4 << 8) | ((uint_t)(*(ib + i))); 152 } else { 153 for (i = 0; i < ICV_FETCH_UCS4_SIZE; i++) 154 u4 = (u4 << 8) | ((uint_t)(*(ib + i))); 155 } 156 157 if (u4 == 0x00fffe || u4 == 0x00ffff || u4 > 0x7fffffff || 158 (u4 >= 0x00d800 && u4 <= 0x00dfff)) { 159 errno = EILSEQ; 160 ret_val = (size_t)-1; 161 goto illegal_char_err; 162 } 163 164 if (u4 > 0x10ffff) { 165 u4 = ICV_CHAR_UCS2_REPLACEMENT; 166 ret_val++; 167 } 168 169 obsz = (cd->output.bom_written) ? 2 : 4; 170 if ((obtail - ob) < obsz) { 171 errno = E2BIG; 172 ret_val = (size_t)-1; 173 break; 174 } 175 176 if (cd->output.little_endian) { 177 if (! cd->output.bom_written) { 178 *ob++ = (uchar_t)0xff; 179 *ob++ = (uchar_t)0xfe; 180 *(ushort_t *)ob = (ushort_t)0; 181 ob += 2; 182 cd->output.bom_written = true; 183 } 184 *ob++ = (uchar_t)(u4 & 0xff); 185 *ob++ = (uchar_t)((u4 >> 8) & 0xff); 186 *ob++ = (uchar_t)((u4 >> 16) & 0xff); 187 *ob++ = (uchar_t)((u4 >> 24) & 0xff); 188 } else { 189 if (! cd->output.bom_written) { 190 *(ushort_t *)ob = (ushort_t)0; 191 ob += 2; 192 *ob++ = (uchar_t)0xfe; 193 *ob++ = (uchar_t)0xff; 194 cd->output.bom_written = true; 195 } 196 *ob++ = (uchar_t)((u4 >> 24) & 0xff); 197 *ob++ = (uchar_t)((u4 >> 16) & 0xff); 198 *ob++ = (uchar_t)((u4 >> 8) & 0xff); 199 *ob++ = (uchar_t)(u4 & 0xff); 200 } 201 ib += ICV_FETCH_UCS4_SIZE; 202 } 203 204 #if defined(UCS_4) 205 need_more_input_err: 206 #endif 207 illegal_char_err: 208 *inbuf = (char *)ib; 209 *inbufleft = ibtail - ib; 210 *outbuf = (char *)ob; 211 *outbufleft = obtail - ob; 212 213 return(ret_val); 214 } 215