1 /* 2 * Copyright 2013 Garrett D'Amore <garrett@damore.org> 3 * Copyright 2017 Nexenta Systems, Inc. 4 * Copyright 2019 Joyent, Inc. 5 * Copyright (c) 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Paul Borman at Krystal Technologies. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 4. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include "lint.h" 37 #include <ctype.h> 38 #include <wchar.h> 39 #include "runetype.h" 40 #include "mblocal.h" 41 #include "_ctype.h" 42 43 #define _DEFRUNETYPE \ 44 /* 00 */ \ 45 _CTYPE_C, \ 46 _CTYPE_C, \ 47 _CTYPE_C, \ 48 _CTYPE_C, \ 49 _CTYPE_C, \ 50 _CTYPE_C, \ 51 _CTYPE_C, \ 52 _CTYPE_C, \ 53 /* 08 */ \ 54 _CTYPE_C, \ 55 _CTYPE_C|_CTYPE_S|_CTYPE_B, \ 56 _CTYPE_C|_CTYPE_S, \ 57 _CTYPE_C|_CTYPE_S, \ 58 _CTYPE_C|_CTYPE_S, \ 59 _CTYPE_C|_CTYPE_S, \ 60 _CTYPE_C, \ 61 _CTYPE_C, \ 62 /* 10 */ \ 63 _CTYPE_C, \ 64 _CTYPE_C, \ 65 _CTYPE_C, \ 66 _CTYPE_C, \ 67 _CTYPE_C, \ 68 _CTYPE_C, \ 69 _CTYPE_C, \ 70 _CTYPE_C, \ 71 /* 18 */ \ 72 _CTYPE_C, \ 73 _CTYPE_C, \ 74 _CTYPE_C, \ 75 _CTYPE_C, \ 76 _CTYPE_C, \ 77 _CTYPE_C, \ 78 _CTYPE_C, \ 79 _CTYPE_C, \ 80 /* 20 */ \ 81 _CTYPE_S|_CTYPE_B|_CTYPE_R, \ 82 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 83 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 84 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 85 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 86 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 87 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 88 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 89 /* 28 */ \ 90 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 91 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 92 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 93 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 94 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 95 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 96 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 97 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 98 /* 30 */ \ 99 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \ 100 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \ 101 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \ 102 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \ 103 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \ 104 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \ 105 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \ 106 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \ 107 /* 38 */ \ 108 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \ 109 _CTYPE_D|_CTYPE_R|_CTYPE_G|_CTYPE_X, \ 110 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 111 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 112 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 113 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 114 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 115 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 116 /* 40 */ \ 117 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 118 _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 119 _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 120 _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 121 _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 122 _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 123 _CTYPE_U|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 124 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 125 /* 48 */ \ 126 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 127 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 128 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 129 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 130 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 131 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 132 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 133 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 134 /* 50 */ \ 135 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 136 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 137 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 138 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 139 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 140 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 141 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 142 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 143 /* 58 */ \ 144 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 145 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 146 _CTYPE_U|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 147 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 148 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 149 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 150 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 151 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 152 /* 60 */ \ 153 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 154 _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 155 _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 156 _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 157 _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 158 _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 159 _CTYPE_L|_CTYPE_X|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 160 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 161 /* 68 */ \ 162 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 163 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 164 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 165 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 166 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 167 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 168 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 169 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 170 /* 70 */ \ 171 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 172 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 173 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 174 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 175 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 176 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 177 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 178 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 179 /* 78 */ \ 180 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 181 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 182 _CTYPE_L|_CTYPE_R|_CTYPE_G|_CTYPE_A, \ 183 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 184 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 185 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 186 _CTYPE_P|_CTYPE_R|_CTYPE_G, \ 187 _CTYPE_C 188 189 #define _DEFMAPLOWER \ 190 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ 191 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, \ 192 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, \ 193 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, \ 194 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, \ 195 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, \ 196 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, \ 197 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, \ 198 0x40, 'a', 'b', 'c', 'd', 'e', 'f', 'g', \ 199 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', \ 200 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', \ 201 'x', 'y', 'z', 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, \ 202 0x60, 'a', 'b', 'c', 'd', 'e', 'f', 'g', \ 203 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', \ 204 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', \ 205 'x', 'y', 'z', 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, \ 206 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, \ 207 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, \ 208 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, \ 209 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, \ 210 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, \ 211 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, \ 212 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, \ 213 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, \ 214 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, \ 215 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, \ 216 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, \ 217 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, \ 218 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, \ 219 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, \ 220 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, \ 221 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff 222 223 #define _DEFMAPUPPER \ 224 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, \ 225 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, \ 226 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, \ 227 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, \ 228 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, \ 229 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, \ 230 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, \ 231 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, \ 232 0x40, 'A', 'B', 'C', 'D', 'E', 'F', 'G', \ 233 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', \ 234 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', \ 235 'X', 'Y', 'Z', 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, \ 236 0x60, 'A', 'B', 'C', 'D', 'E', 'F', 'G', \ 237 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', \ 238 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', \ 239 'X', 'Y', 'Z', 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, \ 240 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, \ 241 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, \ 242 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, \ 243 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, \ 244 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, \ 245 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, \ 246 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, \ 247 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, \ 248 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, \ 249 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, \ 250 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, \ 251 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, \ 252 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, \ 253 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, \ 254 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, \ 255 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, 256 257 _RuneLocale _DefaultRuneLocale = { 258 _RUNE_MAGIC_1, 259 "NONE", 260 { _DEFRUNETYPE }, 261 { _DEFMAPLOWER }, 262 { _DEFMAPUPPER }, 263 }; 264 265 /* 266 * __ctype_mask, __trans_lower, and __trans_upper come from former _ctype.c and 267 * have to stay pointers for binary compatibility, so we provide separate 268 * storage for them, initialized to "C" locale contents by default. Note that 269 * legacy code may dereference __ctype_mask[-1] when checking against EOF, 270 * relying on that value to be 0. To allow this, ___ctype_mask is expanded by 271 * one value and prepended with a leading 0, with __ctype_mask being set to 272 * point to ___ctype_mask[1]. (__trans_lower and __trans_upper do not suffer 273 * from this as EOF access was prevented in legacy code by a check against 274 * isascii(), which always returned 0 for EOF.) 275 */ 276 static unsigned int ___ctype_mask[_CACHED_RUNES + 1] = { 0, _DEFRUNETYPE }; 277 unsigned int *__ctype_mask = &___ctype_mask[1]; 278 279 static int ___trans_lower[_CACHED_RUNES] = { _DEFMAPLOWER }; 280 int *__trans_lower = ___trans_lower; 281 282 static int ___trans_upper[_CACHED_RUNES] = { _DEFMAPUPPER }; 283 int *__trans_upper = ___trans_upper; 284