1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 14 */ 15 16 /* 17 * CHARMAP file handling for localedef. 18 */ 19 20 #include <stdio.h> 21 #include <stdlib.h> 22 #include <string.h> 23 #include <limits.h> 24 #include <unistd.h> 25 #include <alloca.h> 26 #include <sys/avl.h> 27 #include <stddef.h> 28 #include <unistd.h> 29 #include "localedef.h" 30 #include "parser.tab.h" 31 32 static avl_tree_t cmap_sym; 33 static avl_tree_t cmap_wc; 34 35 typedef struct charmap { 36 const char *name; 37 wchar_t wc; 38 avl_node_t avl_sym; 39 avl_node_t avl_wc; 40 } charmap_t; 41 42 /* 43 * Array of POSIX specific portable characters. 44 */ 45 static const struct { 46 const char *name; 47 int ch; 48 } portable_chars[] = { 49 { "NUL", '\0' }, 50 { "alert", '\a' }, 51 { "backspace", '\b' }, 52 { "tab", '\t' }, 53 { "carriage-return", '\r' }, 54 { "newline", '\n' }, 55 { "vertical-tab", '\v' }, 56 { "form-feed", '\f' }, 57 { "space", ' ' }, 58 { "exclamation-mark", '!' }, 59 { "quotation-mark", '"' }, 60 { "number-sign", '#' }, 61 { "dollar-sign", '$' }, 62 { "percent-sign", '%' }, 63 { "ampersand", '&' }, 64 { "apostrophe", '\'' }, 65 { "left-parenthesis", '(' }, 66 { "right-parenthesis", '(' }, 67 { "asterisk", '*' }, 68 { "plus-sign", '+' }, 69 { "comma", ','}, 70 { "hyphen-minus", '-' }, 71 { "hyphen", '-' }, 72 { "full-stop", '.' }, 73 { "period", '.' }, 74 { "slash", '/' }, 75 { "solidus", '/' }, 76 { "zero", '0' }, 77 { "one", '1' }, 78 { "two", '2' }, 79 { "three", '3' }, 80 { "four", '4' }, 81 { "five", '5' }, 82 { "six", '6' }, 83 { "seven", '7' }, 84 { "eight", '8' }, 85 { "nine", '9' }, 86 { "colon", ':' }, 87 { "semicolon", ';' }, 88 { "less-than-sign", '<' }, 89 { "equals-sign", '=' }, 90 { "greater-than-sign", '>' }, 91 { "question-mark", '?' }, 92 { "commercial-at", '@' }, 93 { "left-square-bracket", '[' }, 94 { "backslash", '\\' }, 95 { "reverse-solidus", '\\' }, 96 { "right-square-bracket", ']' }, 97 { "circumflex", '^' }, 98 { "circumflex-accent", '^' }, 99 { "low-line", '_' }, 100 { "underscore", '_' }, 101 { "grave-accent", '`' }, 102 { "left-brace", '{' }, 103 { "left-curly-bracket", '{' }, 104 { "vertical-line", '|' }, 105 { "right-brace", '}' }, 106 { "right-curly-bracket", '}' }, 107 { "tilde", '~' }, 108 { "A", 'A' }, 109 { "B", 'B' }, 110 { "C", 'C' }, 111 { "D", 'D' }, 112 { "E", 'E' }, 113 { "F", 'F' }, 114 { "G", 'G' }, 115 { "H", 'H' }, 116 { "I", 'I' }, 117 { "J", 'J' }, 118 { "K", 'K' }, 119 { "L", 'L' }, 120 { "M", 'M' }, 121 { "N", 'N' }, 122 { "O", 'O' }, 123 { "P", 'P' }, 124 { "Q", 'Q' }, 125 { "R", 'R' }, 126 { "S", 'S' }, 127 { "T", 'T' }, 128 { "U", 'U' }, 129 { "V", 'V' }, 130 { "W", 'W' }, 131 { "X", 'X' }, 132 { "Y", 'Y' }, 133 { "Z", 'Z' }, 134 { "a", 'a' }, 135 { "b", 'b' }, 136 { "c", 'c' }, 137 { "d", 'd' }, 138 { "e", 'e' }, 139 { "f", 'f' }, 140 { "g", 'g' }, 141 { "h", 'h' }, 142 { "i", 'i' }, 143 { "j", 'j' }, 144 { "k", 'k' }, 145 { "l", 'l' }, 146 { "m", 'm' }, 147 { "n", 'n' }, 148 { "o", 'o' }, 149 { "p", 'p' }, 150 { "q", 'q' }, 151 { "r", 'r' }, 152 { "s", 's' }, 153 { "t", 't' }, 154 { "u", 'u' }, 155 { "v", 'v' }, 156 { "w", 'w' }, 157 { "x", 'x' }, 158 { "y", 'y' }, 159 { "z", 'z' }, 160 { NULL, 0 } 161 }; 162 163 static int 164 cmap_compare_sym(const void *n1, const void *n2) 165 { 166 const charmap_t *c1 = n1; 167 const charmap_t *c2 = n2; 168 int rv; 169 170 rv = strcmp(c1->name, c2->name); 171 return ((rv < 0) ? -1 : (rv > 0) ? 1 : 0); 172 } 173 174 static int 175 cmap_compare_wc(const void *n1, const void *n2) 176 { 177 const charmap_t *c1 = n1; 178 const charmap_t *c2 = n2; 179 180 return ((c1->wc < c2->wc) ? -1 : (c1->wc > c2->wc) ? 1 : 0); 181 } 182 183 void 184 init_charmap(void) 185 { 186 avl_create(&cmap_sym, cmap_compare_sym, sizeof (charmap_t), 187 offsetof(charmap_t, avl_sym)); 188 189 avl_create(&cmap_wc, cmap_compare_wc, sizeof (charmap_t), 190 offsetof(charmap_t, avl_wc)); 191 } 192 193 static void 194 add_charmap_impl(const char *sym, wchar_t wc, int nodups) 195 { 196 charmap_t srch; 197 charmap_t *n = NULL; 198 avl_index_t where; 199 200 srch.wc = wc; 201 srch.name = sym; 202 203 /* 204 * also possibly insert the wide mapping, although note that there 205 * can only be one of these per wide character code. 206 */ 207 if ((wc != (wchar_t)-1) && 208 ((avl_find(&cmap_wc, &srch, &where)) == NULL)) { 209 if ((n = calloc(1, sizeof (*n))) == NULL) { 210 errf(_("out of memory")); 211 return; 212 } 213 n->wc = wc; 214 avl_insert(&cmap_wc, n, where); 215 } 216 217 if (sym) { 218 if (avl_find(&cmap_sym, &srch, &where) != NULL) { 219 if (nodups) { 220 errf(_("duplicate character definition")); 221 } 222 return; 223 } 224 if ((n == NULL) && ((n = calloc(1, sizeof (*n))) == NULL)) { 225 errf(_("out of memory")); 226 return; 227 } 228 n->wc = wc; 229 n->name = sym; 230 231 avl_insert(&cmap_sym, n, where); 232 } 233 } 234 235 void 236 add_charmap(const char *sym, int c) 237 { 238 add_charmap_impl(sym, c, 1); 239 } 240 241 void 242 add_charmap_undefined(char *sym) 243 { 244 charmap_t srch; 245 charmap_t *cm = NULL; 246 247 srch.name = sym; 248 cm = avl_find(&cmap_sym, &srch, NULL); 249 250 if ((undefok == 0) && ((cm == NULL) || (cm->wc == (wchar_t)-1))) { 251 warn(_("undefined symbol <%s>"), sym); 252 add_charmap_impl(sym, -1, 0); 253 } else { 254 free(sym); 255 } 256 } 257 258 void 259 add_charmap_range(char *s, char *e, int wc) 260 { 261 int ls, le; 262 int si; 263 int sn, en; 264 int i; 265 266 static const char *digits = "0123456789"; 267 268 ls = strlen(s); 269 le = strlen(e); 270 271 if (((si = strcspn(s, digits)) == 0) || (si == ls) || 272 (strncmp(s, e, si) != 0) || 273 (strspn(s + si, digits) != (ls - si)) || 274 (strspn(e + si, digits) != (le - si)) || 275 ((sn = atoi(s + si)) > ((en = atoi(e + si))))) { 276 errf(_("malformed charmap range")); 277 return; 278 } 279 280 s[si] = 0; 281 282 for (i = sn; i <= en; i++) { 283 char *nn; 284 (void) asprintf(&nn, "%s%0*u", s, ls - si, i); 285 if (nn == NULL) { 286 errf(_("out of memory")); 287 return; 288 } 289 290 add_charmap_impl(nn, wc, 1); 291 wc++; 292 } 293 free(s); 294 free(e); 295 } 296 297 void 298 add_charmap_char(const char *name, int val) 299 { 300 add_charmap_impl(name, val, 0); 301 } 302 303 /* 304 * POSIX insists that certain entries be present, even when not in the 305 * original charmap file. 306 */ 307 void 308 add_charmap_posix(void) 309 { 310 int i; 311 312 for (i = 0; portable_chars[i].name; i++) { 313 add_charmap_char(portable_chars[i].name, portable_chars[i].ch); 314 } 315 } 316 317 int 318 lookup_charmap(const char *sym, wchar_t *wc) 319 { 320 charmap_t srch; 321 charmap_t *n; 322 323 srch.name = sym; 324 n = avl_find(&cmap_sym, &srch, NULL); 325 if (n && n->wc != (wchar_t)-1) { 326 if (wc) 327 *wc = n->wc; 328 return (0); 329 } 330 return (-1); 331 } 332 333 int 334 check_charmap(wchar_t wc) 335 { 336 charmap_t srch; 337 338 srch.wc = wc; 339 return (avl_find(&cmap_wc, &srch, NULL) ? 0 : -1); 340 } 341