1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1998-1999 by Sun Microsystems, Inc. 23 * All rights reserved. 24 * 25 * This program covers UTF-7 to UTF-8, UCS-2, and, UCS-4 code conversions. 26 * UTF-7 described in RFC 2152. 27 * We don't support any other UCS formats to and from UTF-7 unless there is 28 * a significant requirement. 29 */ 30 31 32 #include <stdio.h> 33 #include <stdlib.h> 34 #include <errno.h> 35 #include <sys/types.h> 36 #include <sys/isa_defs.h> 37 #include "utf7_to_ucs.h" 38 39 40 void * 41 _icv_open() 42 { 43 utf7_state_t *cd = (utf7_state_t *)calloc(1, sizeof(utf7_state_t)); 44 45 if (cd == (utf7_state_t *)NULL) { 46 errno = ENOMEM; 47 return((void *)-1); 48 } 49 50 #if defined(_LITTLE_ENDIAN) 51 cd->little_endian = true; 52 #endif 53 54 return((void *)cd); 55 } 56 57 58 void 59 _icv_close(utf7_state_t *cd) 60 { 61 if (! cd) 62 errno = EBADF; 63 else 64 free((void *)cd); 65 } 66 67 68 size_t 69 _icv_iconv(utf7_state_t *cd, char **inbuf, size_t *inbufleft, char **outbuf, 70 size_t *outbufleft) 71 { 72 size_t ret_val = 0; 73 uchar_t *ib; 74 uchar_t *ob; 75 uchar_t *ibtail; 76 uchar_t *obtail; 77 78 if (! cd) { 79 errno = EBADF; 80 return((size_t)-1); 81 } 82 83 if (!inbuf || !(*inbuf)) { 84 /* We just ignore any remnant bits we so far accumulated. */ 85 cd->in_the_middle_of_utf7_sequence = false; 86 cd->remnant = 0; 87 cd->remnant_count = 0; 88 cd->prevch = (uchar_t)'\0'; 89 90 return((size_t)0); 91 } 92 93 ib = (uchar_t *)*inbuf; 94 ob = (uchar_t *)*outbuf; 95 ibtail = ib + *inbufleft; 96 obtail = ob + *outbufleft; 97 98 while (ib < ibtail) { 99 uint_t temp_remnant; 100 uint_t u4; 101 #if defined(UCS_2) || defined(UCS_4) 102 signed char obsz; 103 #endif 104 105 u4 = ICV_U7_UCS4_OUTOFUTF16; 106 if (cd->in_the_middle_of_utf7_sequence) { 107 if (rmb64[*ib] >= 0) { 108 temp_remnant = (cd->remnant << 6) | rmb64[*ib]; 109 110 switch (cd->remnant_count) { 111 case ICV_U7_ACTION_HARVEST1: 112 u4 = (temp_remnant >> 2) & 0xffff; 113 break; 114 case ICV_U7_ACTION_HARVEST2: 115 u4 = (temp_remnant >> 4) & 0xffff; 116 break; 117 case ICV_U7_ACTION_HARVEST3: 118 u4 = temp_remnant & 0xffff; 119 break; 120 } 121 122 if (u4 != ICV_U7_UCS4_OUTOFUTF16) { 123 if (u4 == 0x00fffe || u4 == 0x00ffff || 124 (u4 >= 0x00d800 && 125 u4 <= 0x00dfff)) { 126 errno = EILSEQ; 127 ret_val = (size_t)-1; 128 break; 129 } 130 #if defined(UCS_2) 131 CHECK_OUTBUF_SZ_AND_WRITE_U2; 132 #elif defined(UCS_4) 133 CHECK_OUTBUF_SZ_AND_WRITE_U4; 134 #elif defined(UTF_8) 135 CHECK_OUTBUF_SZ_AND_WRITE_U8_OR_EILSEQ; 136 #else 137 #error "Fatal: One of UCS_2, UCS_4, or, UTF_8 is needed." 138 #endif 139 } 140 141 /* It's now safe to have the bits. */ 142 cd->remnant = temp_remnant; 143 if (cd->remnant_count == ICV_U7_ACTION_HARVEST3) 144 cd->remnant_count = ICV_U7_ACTION_START; 145 else 146 cd->remnant_count++; 147 } else { 148 if (*ib == (uint_t)'-') { 149 if (cd->prevch == '+') 150 u4 = (uint_t)'+'; 151 } else 152 u4 = (uint_t)(*ib); 153 154 switch (cd->remnant_count) { 155 case ICV_U7_ACTION_START: 156 /* (ICV_U7_ACTION_HARVEST3+1): */ 157 /* These are normal cases. */ 158 break; 159 case (ICV_U7_ACTION_HARVEST1+1): 160 if (cd->remnant & 0x03) { 161 errno = EILSEQ; 162 ret_val = (size_t)-1; 163 goto illegal_char_err; 164 } 165 break; 166 case (ICV_U7_ACTION_HARVEST2+1): 167 if (cd->remnant & 0x0f) { 168 errno = EILSEQ; 169 ret_val = (size_t)-1; 170 goto illegal_char_err; 171 } 172 break; 173 default: 174 errno = EILSEQ; 175 ret_val = (size_t)-1; 176 goto illegal_char_err; 177 break; 178 } 179 180 if (u4 != ICV_U7_UCS4_OUTOFUTF16) { 181 #if defined(UCS_2) 182 CHECK_OUTBUF_SZ_AND_WRITE_U2; 183 #elif defined(UCS_4) 184 CHECK_OUTBUF_SZ_AND_WRITE_U4; 185 #elif defined(UTF_8) 186 if (ob >= obtail) { 187 errno = E2BIG; 188 ret_val = (size_t)-1; 189 break; 190 } 191 *ob++ = (uchar_t)(u4 & 0x7f); 192 #else 193 #error "Fatal: One of UCS_2, UCS_4, or, UTF_8 is needed." 194 #endif 195 } 196 197 cd->in_the_middle_of_utf7_sequence = false; 198 cd->remnant = 0; 199 cd->remnant_count = 0; 200 } 201 } else { 202 if (*ib == '+') { 203 cd->in_the_middle_of_utf7_sequence = true; 204 cd->remnant = 0; 205 cd->remnant_count = 0; 206 } else { 207 #if defined(UCS_2) 208 u4 = (uint_t)*ib; 209 CHECK_OUTBUF_SZ_AND_WRITE_U2; 210 #elif defined(UCS_4) 211 u4 = (uint_t)*ib; 212 CHECK_OUTBUF_SZ_AND_WRITE_U4; 213 #elif defined(UTF_8) 214 if (ob >= obtail) { 215 errno = E2BIG; 216 ret_val = (size_t)-1; 217 break; 218 } 219 *ob++ = *ib; 220 #else 221 #error "Fatal: One of UCS_2, UCS_4, or, UTF_8 is needed." 222 #endif 223 } 224 } 225 cd->prevch = *ib++; 226 } 227 228 illegal_char_err: 229 *inbuf = (char *)ib; 230 *inbufleft = ibtail - ib; 231 *outbuf = (char *)ob; 232 *outbufleft = obtail - ob; 233 234 235 return(ret_val); 236 } 237