1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * This particular file is to cover conversions from UTF-8 to various single 26 * byte codesets. 27 */ 28 29 30 #include <stdlib.h> 31 #include <errno.h> 32 #include <sys/types.h> 33 #include "utf8_to_sb.h" 34 35 36 37 void * 38 _icv_open() 39 { 40 ucs_state_t *cd = (ucs_state_t *)calloc(1, sizeof(ucs_state_t)); 41 if (cd == (ucs_state_t *)NULL) { 42 errno = ENOMEM; 43 return((void *)-1); 44 } 45 46 return((void *)cd); 47 } 48 49 50 void 51 _icv_close(ucs_state_t *cd) 52 { 53 if (! cd) 54 errno = EBADF; 55 else 56 free((void *)cd); 57 } 58 59 60 size_t 61 _icv_iconv(ucs_state_t *cd, char **inbuf, size_t *inbufleft, char **outbuf, 62 size_t *outbufleft) 63 { 64 size_t ret_val = 0; 65 unsigned char *ib; 66 unsigned char *ob; 67 unsigned char *ibtail; 68 unsigned char *obtail; 69 register int i, l, h; 70 signed char sz; 71 unsigned long u8; 72 73 if (! cd) { 74 errno = EBADF; 75 return((size_t)-1); 76 } 77 78 if (!inbuf || !(*inbuf)) { 79 cd->bom_written = false; 80 return((size_t)0); 81 } 82 83 ib = (unsigned char *)*inbuf; 84 ob = (unsigned char *)*outbuf; 85 ibtail = ib + *inbufleft; 86 obtail = ob + *outbufleft; 87 88 /* We skip any first signiture of UTF-8 */ 89 if (!cd->bom_written && 90 ((ibtail - ib) >= ICV_FETCH_UTF8_BOM_SIZE)) { 91 for (u8 = 0, i = 0; i < ICV_FETCH_UTF8_BOM_SIZE; i++) 92 u8 = (u8 << 8) | ((uint_t)(*(ib + i))); 93 if (u8 == ICV_BOM_IN_BIG_ENDIAN) 94 ib += ICV_FETCH_UTF8_BOM_SIZE; 95 } 96 cd->bom_written = true; 97 98 while (ib < ibtail) { 99 sz = number_of_bytes_in_utf8_char[*ib]; 100 if (sz == ICV_TYPE_ILLEGAL_CHAR) { 101 errno = EILSEQ; 102 ret_val = (size_t)-1; 103 break; 104 } 105 106 if (ob >= obtail) { 107 errno = E2BIG; 108 ret_val = (size_t)-1; 109 break; 110 } 111 112 if (sz == 1) { 113 *ob++ = *ib++; 114 } else { 115 if ((ibtail - ib) < sz) { 116 errno = EINVAL; 117 ret_val = (size_t)-1; 118 break; 119 } 120 121 u8 = 0; 122 for (i = 0; i < sz; i++) { 123 if (((unsigned int)*ib) < 0x80) { 124 errno = EILSEQ; 125 ret_val = (size_t)-1; 126 goto illegal_char_err; 127 } 128 u8 = (u8 << 8) | ((unsigned int)*ib); 129 ib++; 130 } 131 if ((u8 & ICV_UTF8_REPRESENTATION_ffff_mask) == 132 ICV_UTF8_REPRESENTATION_fffe || 133 (u8 & ICV_UTF8_REPRESENTATION_ffff_mask) == 134 ICV_UTF8_REPRESENTATION_ffff || 135 u8 > ICV_UTF8_REPRESENTATION_10fffd || 136 (u8 >= ICV_UTF8_REPRESENTATION_d800 && 137 u8 <= ICV_UTF8_REPRESENTATION_dfff) || 138 (u8 >= ICV_UTF8_REPRESENTATION_fdd0 && 139 u8 <= ICV_UTF8_REPRESENTATION_fdef)) { 140 ib -= sz; 141 errno = EILSEQ; 142 ret_val = (size_t)-1; 143 goto illegal_char_err; 144 } 145 146 i = l = 0; 147 h = (sizeof(u8_sb_tbl) / 148 sizeof(to_sb_table_component_t)) - 1; 149 while (l <= h) { 150 i = (l + h) / 2; 151 if (u8_sb_tbl[i].u8 == u8) 152 break; 153 else if (u8_sb_tbl[i].u8 < u8) 154 l = i + 1; 155 else 156 h = i - 1; 157 } 158 159 /* 160 * We just assume that either we found it or it is 161 * a non-identical character that we need to 162 * provide a replacement character. 163 */ 164 if (u8_sb_tbl[i].u8 == u8) { 165 *ob++ = u8_sb_tbl[i].sb; 166 } else { 167 *ob++ = ICV_CHAR_ASCII_REPLACEMENT; 168 ret_val++; 169 } 170 } 171 } 172 173 illegal_char_err: 174 *inbuf = (char *)ib; 175 *inbufleft = ibtail - ib; 176 *outbuf = (char *)ob; 177 *outbufleft = obtail - ob; 178 179 return(ret_val); 180 } 181