1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1998-1999 by Sun Microsystems, Inc. 23 * All rights reserved. 24 */ 25 26 #ifndef UTF7_TO_UCS_H 27 #define UTF7_TO_UCS_H 28 29 30 #include "common_defs.h" 31 32 33 /* Modified Base64 alphabet to Value mapping table -- see RFC 2045. */ 34 static const signed char rmb64[0x100] = { 35 /*00*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 36 /*10*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 37 /*20*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 38 /*30*/ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, 39 /*40*/ -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 40 /*50*/ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, 41 /*60*/ -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 42 /*70*/ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, 43 /*80*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 44 /*90*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 45 /*a0*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 46 /*b0*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 47 /*c0*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 48 /*d0*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 49 /*e0*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 50 /*f0*/ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 51 }; 52 53 /* 54 * Any UCS-2 character sequences will yield: 55 * 56 * +-16 bits (UCS-2)-+ +-16 bits (UCS-2)-+ +-16 bits (UCS-2)-+ 57 * | | | | | | 58 * xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx 59 * | || | | || | | || | | || | 60 * +--0--++--1--+ +---2--++--3--+ +--4--++---5--+ +--6--++--7--+ MBase64 chars 61 * ^ ^ 62 * initially, | | 63 * four remnant bits, | 64 * two remnant bits, 65 * 66 * and, then no remnant bit for three sequential UCS-2 characters, 67 * respectively, and repeat these three UCS-2 character sequences. For the 68 * first UCS-2 character in this sequence, there will be two MBase64 69 * characters, and for the second and the third UCS-2 characters, there will be 70 * three MBase64 characters. 71 * 72 * Following action numbers, 0, 2, 5, and, 7, are assigned to each of 73 * corresponding MBase64 characters that can either yield a UCS-2 character or 74 * indicate a character that is the starting/initial one. 75 */ 76 #define ICV_U7_ACTION_START 0 77 #define ICV_U7_ACTION_HARVEST1 2 78 #define ICV_U7_ACTION_HARVEST2 5 79 #define ICV_U7_ACTION_HARVEST3 7 80 81 #define ICV_U7_UCS4_OUTOFUTF16 0xfffefeff 82 83 #define OUTBUF_SIZE_CHECK(sz) \ 84 if ((obtail - ob) < (sz)) { \ 85 errno = E2BIG; \ 86 ret_val = (size_t)-1; \ 87 break; \ 88 } 89 90 /* 91 * For better performance and readability, we perfer to write macros like 92 * below instead of putting them in functions and then calling them. 93 */ 94 #define CHECK_OUTBUF_SZ_AND_WRITE_U2 \ 95 obsz = (cd->bom_written) ? ICV_FETCH_UCS_SIZE : ICV_FETCH_UCS_SIZE_TWO;\ 96 if ((obtail - ob) < obsz) { \ 97 errno = E2BIG; \ 98 ret_val = (size_t)-1; \ 99 break; \ 100 } \ 101 if (cd->little_endian) { \ 102 if (! cd->bom_written) { \ 103 *ob++ = (uchar_t)0xff; \ 104 *ob++ = (uchar_t)0xfe; \ 105 cd->bom_written = true; \ 106 } \ 107 *ob++ = (uchar_t)(u4 & 0xff); \ 108 *ob++ = (uchar_t)((u4 >> 8) & 0xff); \ 109 } else { \ 110 if (! cd->bom_written) { \ 111 *ob++ = (uchar_t)0xfe; \ 112 *ob++ = (uchar_t)0xff; \ 113 cd->bom_written = true; \ 114 } \ 115 *ob++ = (uchar_t)((u4 >> 8) & 0xff); \ 116 *ob++ = (uchar_t)(u4 & 0xff); \ 117 } 118 119 #define CHECK_OUTBUF_SZ_AND_WRITE_U4 \ 120 obsz = (cd->bom_written) ? ICV_FETCH_UCS_SIZE : ICV_FETCH_UCS_SIZE_TWO;\ 121 if ((obtail - ob) < obsz) { \ 122 errno = E2BIG; \ 123 ret_val = (size_t)-1; \ 124 break; \ 125 } \ 126 if (cd->little_endian) { \ 127 if (! cd->bom_written) { \ 128 *ob++ = (uchar_t)0xff; \ 129 *ob++ = (uchar_t)0xfe; \ 130 *(ushort_t *)ob = (ushort_t)0; \ 131 ob += 2; \ 132 cd->bom_written = true; \ 133 } \ 134 *ob++ = (uchar_t)(u4 & 0xff); \ 135 *ob++ = (uchar_t)((u4 >> 8) & 0xff); \ 136 *ob++ = (uchar_t)((u4 >> 16) & 0xff); \ 137 *ob++ = (uchar_t)((u4 >> 24) & 0xff); \ 138 } else { \ 139 if (! cd->bom_written) { \ 140 *(ushort_t *)ob = (ushort_t)0; \ 141 ob += 2; \ 142 *ob++ = (uchar_t)0xfe; \ 143 *ob++ = (uchar_t)0xff; \ 144 cd->bom_written = true; \ 145 } \ 146 *ob++ = (uchar_t)((u4 >> 24) & 0xff); \ 147 *ob++ = (uchar_t)((u4 >> 16) & 0xff); \ 148 *ob++ = (uchar_t)((u4 >> 8) & 0xff); \ 149 *ob++ = (uchar_t)(u4 & 0xff); \ 150 } 151 152 /* 153 * UTF-7's code range is basically that of UTF-16, i.e., 154 * U+0000 0000 ~ U+0010 FFFF, it cannot go beyond the U+0010 FFFF. 155 */ 156 #define CHECK_OUTBUF_SZ_AND_WRITE_U8_OR_EILSEQ \ 157 if (u4 <= 0x7f) { \ 158 OUTBUF_SIZE_CHECK(1); \ 159 *ob++ = (uchar_t)u4; \ 160 } else if (u4 <= 0x7ff) { \ 161 OUTBUF_SIZE_CHECK(2); \ 162 *ob++ = (uchar_t)(0xc0 | ((u4 & 0x07c0) >> 6)); \ 163 *ob++ = (uchar_t)(0x80 | (u4 & 0x003f)); \ 164 } else if (u4 <= 0x00ffff) { \ 165 OUTBUF_SIZE_CHECK(3); \ 166 *ob++ = (uchar_t)(0xe0 | ((u4 & 0x0f000) >> 12)); \ 167 *ob++ = (uchar_t)(0x80 | ((u4 & 0x00fc0) >> 6)); \ 168 *ob++ = (uchar_t)(0x80 | (u4 & 0x0003f)); \ 169 } else if (u4 <= 0x10ffff) { \ 170 OUTBUF_SIZE_CHECK(4); \ 171 *ob++ = (uchar_t)(0xf0 | ((u4 & 0x01c0000) >> 18)); \ 172 *ob++ = (uchar_t)(0x80 | ((u4 & 0x003f000) >> 12)); \ 173 *ob++ = (uchar_t)(0x80 | ((u4 & 0x0000fc0) >> 6)); \ 174 *ob++ = (uchar_t)(0x80 | (u4 & 0x000003f)); \ 175 } else { \ 176 errno = EILSEQ; \ 177 ret_val = (size_t)-1; \ 178 break; \ 179 } 180 181 182 #endif /* UTF7_TO_UCS_H */ 183