1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * This is for UTF-8 to UTF-8 code conversion; it simply passes through 26 * all things with UTF-8 byte sequence checking to screen out any illegal 27 * and thus potentially harmful bytes. 28 */ 29 30 31 #include <stdlib.h> 32 #include <errno.h> 33 #include <sys/types.h> 34 #include <sys/isa_defs.h> 35 #include "common_defs.h" 36 37 38 void * 39 _icv_open() 40 { 41 return((void *)MAGIC_NUMBER); 42 } 43 44 45 void 46 _icv_close(int *cd) 47 { 48 if (! cd || cd != (int *)MAGIC_NUMBER) 49 errno = EBADF; 50 } 51 52 53 size_t 54 _icv_iconv(int *cd, char **inbuf, size_t *inbufleft, char **outbuf, 55 size_t *outbufleft) 56 { 57 size_t ret_val = 0; 58 uchar_t *ib; 59 uchar_t *ob; 60 uchar_t *ibtail; 61 uchar_t *obtail; 62 uchar_t *ib_copy; 63 uint_t u4; 64 uint_t first_byte; 65 signed char sz; 66 signed char obsz; 67 68 if (! cd || cd != (int *)MAGIC_NUMBER) { 69 errno = EBADF; 70 return((size_t)-1); 71 } 72 73 if (!inbuf || !(*inbuf)) 74 return((size_t)0); 75 76 ib = (uchar_t *)*inbuf; 77 ob = (uchar_t *)*outbuf; 78 ibtail = ib + *inbufleft; 79 obtail = ob + *outbufleft; 80 81 while (ib < ibtail) { 82 sz = number_of_bytes_in_utf8_char[*ib]; 83 if (sz == ICV_TYPE_ILLEGAL_CHAR) { 84 errno = EILSEQ; 85 ret_val = (size_t)-1; 86 break; 87 } 88 obsz = sz; 89 90 if ((ibtail - ib) < sz) { 91 errno = EINVAL; 92 ret_val = (size_t)-1; 93 break; 94 } 95 96 ib_copy = ib; 97 first_byte = *ib_copy++; 98 u4 = first_byte & (uint_t)masks_tbl[sz]; 99 for (; sz > 1; sz--) { 100 if (first_byte) { 101 if (((uchar_t)*ib_copy) < 102 valid_min_2nd_byte[first_byte] || 103 ((uchar_t)*ib_copy) > 104 valid_max_2nd_byte[first_byte]) { 105 errno = EILSEQ; 106 ret_val = (size_t)-1; 107 goto ILLEGAL_CHAR_ERR; 108 } 109 first_byte = 0; 110 } else if (((uint_t)*ib_copy) < 0x80 || 111 ((uint_t)*ib_copy) > 0xbf) { 112 errno = EILSEQ; 113 ret_val = (size_t)-1; 114 goto ILLEGAL_CHAR_ERR; 115 } 116 u4 = (u4 << ICV_UTF8_BIT_SHIFT) | 117 (((uint_t)*ib_copy) & ICV_UTF8_BIT_MASK); 118 ib_copy++; 119 } 120 121 /* 122 * Check some more illegal characters and noncharacters from 123 * the input buffer. Surrogate pairs (U+D800 - U+DFFF) are 124 * checked at the above for loop. 125 */ 126 if ((u4 & 0xffff) == 0x00fffe || (u4 & 0xffff) == 0x00ffff || 127 (u4 >= 0x00fdd0 && u4 <= 0x00fdef) || u4 > 0x10fffd) { 128 errno = EILSEQ; 129 ret_val = (size_t)-1; 130 goto ILLEGAL_CHAR_ERR; 131 } 132 133 if ((obtail - ob) < obsz) { 134 errno = E2BIG; 135 ret_val = (size_t)-1; 136 break; 137 } 138 139 for (; obsz >= 1; obsz--) 140 *ob++ = *ib++; 141 } 142 143 ILLEGAL_CHAR_ERR: 144 *inbuf = (char *)ib; 145 *inbufleft = ibtail - ib; 146 *outbuf = (char *)ob; 147 *outbufleft = obtail - ob; 148 149 return(ret_val); 150 } 151