1 /*- 2 * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 3 * Copyright 2015 John Marino <draco@marino.st> 4 * 5 * This source code is derived from the illumos localedef command, and 6 * provided under BSD-style license terms by Nexenta Systems, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 /* 32 * CHARMAP file handling for localedef. 33 */ 34 #include <sys/cdefs.h> 35 #include <sys/types.h> 36 #include <sys/tree.h> 37 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <limits.h> 42 #include <stddef.h> 43 #include <unistd.h> 44 #include "localedef.h" 45 #include "parser.h" 46 47 48 typedef struct charmap { 49 const char *name; 50 wchar_t wc; 51 RB_ENTRY(charmap) rb_sym; 52 RB_ENTRY(charmap) rb_wc; 53 } charmap_t; 54 55 static int cmap_compare_sym(const void *n1, const void *n2); 56 static int cmap_compare_wc(const void *n1, const void *n2); 57 58 static RB_HEAD(cmap_sym, charmap) cmap_sym; 59 static RB_HEAD(cmap_wc, charmap) cmap_wc; 60 61 RB_GENERATE_STATIC(cmap_sym, charmap, rb_sym, cmap_compare_sym); 62 RB_GENERATE_STATIC(cmap_wc, charmap, rb_wc, cmap_compare_wc); 63 64 /* 65 * Array of POSIX specific portable characters. 66 */ 67 68 static const struct { 69 const char *name; 70 int ch; 71 } portable_chars[] = { 72 { "NUL", '\0' }, 73 { "SOH", '\x01' }, 74 { "STX", '\x02' }, 75 { "ETX", '\x03' }, 76 { "EOT", '\x04' }, 77 { "ENQ", '\x05' }, 78 { "ACK", '\x06' }, 79 { "BEL", '\a' }, 80 { "alert", '\a' }, 81 { "BS", '\b' }, 82 { "backspace", '\b' }, 83 { "HT", '\t' }, 84 { "tab", '\t' }, 85 { "LF", '\n' }, 86 { "newline", '\n' }, 87 { "VT", '\v' }, 88 { "vertical-tab", '\v' }, 89 { "FF", '\f' }, 90 { "form-feed", '\f' }, 91 { "CR", '\r' }, 92 { "carriage-return", '\r' }, 93 { "SO", '\x0e' }, 94 { "SI", '\x0f' }, 95 { "DLE", '\x10' }, 96 { "DC1", '\x11' }, 97 { "DC2", '\x12' }, 98 { "DC3", '\x13' }, 99 { "DC4", '\x14' }, 100 { "NAK", '\x15' }, 101 { "SYN", '\x16' }, 102 { "ETB", '\x17' }, 103 { "CAN", '\x18' }, 104 { "EM", '\x19' }, 105 { "SUB", '\x1a' }, 106 { "ESC", '\x1b' }, 107 { "FS", '\x1c' }, 108 { "IS4", '\x1c' }, 109 { "GS", '\x1d' }, 110 { "IS3", '\x1d' }, 111 { "RS", '\x1e' }, 112 { "IS2", '\x1e' }, 113 { "US", '\x1f' }, 114 { "IS1", '\x1f' }, 115 { "DEL", '\x7f' }, 116 { "space", ' ' }, 117 { "exclamation-mark", '!' }, 118 { "quotation-mark", '"' }, 119 { "number-sign", '#' }, 120 { "dollar-sign", '$' }, 121 { "percent-sign", '%' }, 122 { "ampersand", '&' }, 123 { "apostrophe", '\'' }, 124 { "left-parenthesis", '(' }, 125 { "right-parenthesis", ')' }, 126 { "asterisk", '*' }, 127 { "plus-sign", '+' }, 128 { "comma", ','}, 129 { "hyphen-minus", '-' }, 130 { "hyphen", '-' }, 131 { "full-stop", '.' }, 132 { "period", '.' }, 133 { "slash", '/' }, 134 { "solidus", '/' }, 135 { "zero", '0' }, 136 { "one", '1' }, 137 { "two", '2' }, 138 { "three", '3' }, 139 { "four", '4' }, 140 { "five", '5' }, 141 { "six", '6' }, 142 { "seven", '7' }, 143 { "eight", '8' }, 144 { "nine", '9' }, 145 { "colon", ':' }, 146 { "semicolon", ';' }, 147 { "less-than-sign", '<' }, 148 { "equals-sign", '=' }, 149 { "greater-than-sign", '>' }, 150 { "question-mark", '?' }, 151 { "commercial-at", '@' }, 152 { "left-square-bracket", '[' }, 153 { "backslash", '\\' }, 154 { "reverse-solidus", '\\' }, 155 { "right-square-bracket", ']' }, 156 { "circumflex", '^' }, 157 { "circumflex-accent", '^' }, 158 { "low-line", '_' }, 159 { "underscore", '_' }, 160 { "grave-accent", '`' }, 161 { "left-brace", '{' }, 162 { "left-curly-bracket", '{' }, 163 { "vertical-line", '|' }, 164 { "right-brace", '}' }, 165 { "right-curly-bracket", '}' }, 166 { "tilde", '~' }, 167 { "A", 'A' }, 168 { "B", 'B' }, 169 { "C", 'C' }, 170 { "D", 'D' }, 171 { "E", 'E' }, 172 { "F", 'F' }, 173 { "G", 'G' }, 174 { "H", 'H' }, 175 { "I", 'I' }, 176 { "J", 'J' }, 177 { "K", 'K' }, 178 { "L", 'L' }, 179 { "M", 'M' }, 180 { "N", 'N' }, 181 { "O", 'O' }, 182 { "P", 'P' }, 183 { "Q", 'Q' }, 184 { "R", 'R' }, 185 { "S", 'S' }, 186 { "T", 'T' }, 187 { "U", 'U' }, 188 { "V", 'V' }, 189 { "W", 'W' }, 190 { "X", 'X' }, 191 { "Y", 'Y' }, 192 { "Z", 'Z' }, 193 { "a", 'a' }, 194 { "b", 'b' }, 195 { "c", 'c' }, 196 { "d", 'd' }, 197 { "e", 'e' }, 198 { "f", 'f' }, 199 { "g", 'g' }, 200 { "h", 'h' }, 201 { "i", 'i' }, 202 { "j", 'j' }, 203 { "k", 'k' }, 204 { "l", 'l' }, 205 { "m", 'm' }, 206 { "n", 'n' }, 207 { "o", 'o' }, 208 { "p", 'p' }, 209 { "q", 'q' }, 210 { "r", 'r' }, 211 { "s", 's' }, 212 { "t", 't' }, 213 { "u", 'u' }, 214 { "v", 'v' }, 215 { "w", 'w' }, 216 { "x", 'x' }, 217 { "y", 'y' }, 218 { "z", 'z' }, 219 { NULL, 0 } 220 }; 221 222 static int 223 cmap_compare_sym(const void *n1, const void *n2) 224 { 225 const charmap_t *c1 = n1; 226 const charmap_t *c2 = n2; 227 int rv; 228 229 rv = strcmp(c1->name, c2->name); 230 return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); 231 } 232 233 static int 234 cmap_compare_wc(const void *n1, const void *n2) 235 { 236 const charmap_t *c1 = n1; 237 const charmap_t *c2 = n2; 238 239 return ((c1->wc < c2->wc) ? -1 : (c1->wc > c2->wc) ? 1 : 0); 240 } 241 242 void 243 init_charmap(void) 244 { 245 RB_INIT(&cmap_sym); 246 247 RB_INIT(&cmap_wc); 248 } 249 250 static void 251 add_charmap_impl(const char *sym, wchar_t wc, int nodups) 252 { 253 charmap_t srch; 254 charmap_t *n = NULL; 255 256 srch.wc = wc; 257 srch.name = sym; 258 259 /* 260 * also possibly insert the wide mapping, although note that there 261 * can only be one of these per wide character code. 262 */ 263 if ((wc != (wchar_t)-1) && ((RB_FIND(cmap_wc, &cmap_wc, &srch)) == NULL)) { 264 if ((n = calloc(1, sizeof (*n))) == NULL) { 265 errf("out of memory"); 266 return; 267 } 268 n->wc = wc; 269 RB_INSERT(cmap_wc, &cmap_wc, n); 270 } 271 272 if (sym) { 273 if (RB_FIND(cmap_sym, &cmap_sym, &srch) != NULL) { 274 if (nodups) { 275 errf("duplicate character definition"); 276 } 277 return; 278 } 279 if ((n == NULL) && ((n = calloc(1, sizeof (*n))) == NULL)) { 280 errf("out of memory"); 281 return; 282 } 283 n->wc = wc; 284 n->name = sym; 285 286 RB_INSERT(cmap_sym, &cmap_sym, n); 287 } 288 } 289 290 void 291 add_charmap(const char *sym, int c) 292 { 293 add_charmap_impl(sym, c, 1); 294 } 295 296 void 297 add_charmap_undefined(char *sym) 298 { 299 charmap_t srch; 300 charmap_t *cm = NULL; 301 302 srch.name = sym; 303 cm = RB_FIND(cmap_sym, &cmap_sym, &srch); 304 305 if ((undefok == 0) && ((cm == NULL) || (cm->wc == (wchar_t)-1))) { 306 warn("undefined symbol <%s>", sym); 307 add_charmap_impl(sym, -1, 0); 308 } else { 309 free(sym); 310 } 311 } 312 313 void 314 add_charmap_range(char *s, char *e, int wc) 315 { 316 int ls, le; 317 int si; 318 int sn, en; 319 int i; 320 321 static const char *digits = "0123456789"; 322 323 ls = strlen(s); 324 le = strlen(e); 325 326 if (((si = strcspn(s, digits)) == 0) || (si == ls) || 327 (strncmp(s, e, si) != 0) || 328 ((int)strspn(s + si, digits) != (ls - si)) || 329 ((int)strspn(e + si, digits) != (le - si)) || 330 ((sn = atoi(s + si)) > ((en = atoi(e + si))))) { 331 errf("malformed charmap range"); 332 return; 333 } 334 335 s[si] = 0; 336 337 for (i = sn; i <= en; i++) { 338 char *nn; 339 (void) asprintf(&nn, "%s%0*u", s, ls - si, i); 340 if (nn == NULL) { 341 errf("out of memory"); 342 return; 343 } 344 345 add_charmap_impl(nn, wc, 1); 346 wc++; 347 } 348 free(s); 349 free(e); 350 } 351 352 void 353 add_charmap_char(const char *name, int val) 354 { 355 add_charmap_impl(name, val, 0); 356 } 357 358 /* 359 * POSIX insists that certain entries be present, even when not in the 360 * original charmap file. 361 */ 362 void 363 add_charmap_posix(void) 364 { 365 int i; 366 367 for (i = 0; portable_chars[i].name; i++) { 368 add_charmap_char(portable_chars[i].name, portable_chars[i].ch); 369 } 370 } 371 372 int 373 lookup_charmap(const char *sym, wchar_t *wc) 374 { 375 charmap_t srch; 376 charmap_t *n; 377 378 srch.name = sym; 379 n = RB_FIND(cmap_sym, &cmap_sym, &srch); 380 if (n && n->wc != (wchar_t)-1) { 381 if (wc) 382 *wc = n->wc; 383 return (0); 384 } 385 return (-1); 386 } 387 388 int 389 check_charmap(wchar_t wc) 390 { 391 charmap_t srch; 392 393 srch.wc = wc; 394 return (RB_FIND(cmap_wc, &cmap_wc, &srch) ? 0 : -1); 395 } 396