1*e12a957fSPedro F. Giffuni /*- 2057ca2d4SBaptiste Daroussin * Copyright 2010 Nexenta Systems, Inc. All rights reserved. 3057ca2d4SBaptiste Daroussin * Copyright 2015 John Marino <draco@marino.st> 4057ca2d4SBaptiste Daroussin * 5057ca2d4SBaptiste Daroussin * This source code is derived from the illumos localedef command, and 6057ca2d4SBaptiste Daroussin * provided under BSD-style license terms by Nexenta Systems, Inc. 7057ca2d4SBaptiste Daroussin * 8057ca2d4SBaptiste Daroussin * Redistribution and use in source and binary forms, with or without 9057ca2d4SBaptiste Daroussin * modification, are permitted provided that the following conditions 10057ca2d4SBaptiste Daroussin * are met: 11057ca2d4SBaptiste Daroussin * 12057ca2d4SBaptiste Daroussin * 1. Redistributions of source code must retain the above copyright 13057ca2d4SBaptiste Daroussin * notice, this list of conditions and the following disclaimer. 14057ca2d4SBaptiste Daroussin * 2. Redistributions in binary form must reproduce the above copyright 15057ca2d4SBaptiste Daroussin * notice, this list of conditions and the following disclaimer in the 16057ca2d4SBaptiste Daroussin * documentation and/or other materials provided with the distribution. 17057ca2d4SBaptiste Daroussin * 18057ca2d4SBaptiste Daroussin * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19057ca2d4SBaptiste Daroussin * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20057ca2d4SBaptiste Daroussin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21057ca2d4SBaptiste Daroussin * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 22057ca2d4SBaptiste Daroussin * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23057ca2d4SBaptiste Daroussin * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24057ca2d4SBaptiste Daroussin * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25057ca2d4SBaptiste Daroussin * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26057ca2d4SBaptiste Daroussin * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27057ca2d4SBaptiste Daroussin * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28057ca2d4SBaptiste Daroussin * POSSIBILITY OF SUCH DAMAGE. 29057ca2d4SBaptiste Daroussin */ 30057ca2d4SBaptiste Daroussin 31057ca2d4SBaptiste Daroussin /* 32057ca2d4SBaptiste Daroussin * This file contains the "scanner", which tokenizes the input files 33057ca2d4SBaptiste Daroussin * for localedef for processing by the higher level grammar processor. 34057ca2d4SBaptiste Daroussin */ 35057ca2d4SBaptiste Daroussin #include <sys/cdefs.h> 36057ca2d4SBaptiste Daroussin __FBSDID("$FreeBSD$"); 37057ca2d4SBaptiste Daroussin 38057ca2d4SBaptiste Daroussin #include <stdio.h> 39057ca2d4SBaptiste Daroussin #include <stdlib.h> 40057ca2d4SBaptiste Daroussin #include <ctype.h> 41057ca2d4SBaptiste Daroussin #include <limits.h> 42057ca2d4SBaptiste Daroussin #include <string.h> 43057ca2d4SBaptiste Daroussin #include <wchar.h> 44057ca2d4SBaptiste Daroussin #include <sys/types.h> 45057ca2d4SBaptiste Daroussin #include <assert.h> 46057ca2d4SBaptiste Daroussin #include "localedef.h" 47057ca2d4SBaptiste Daroussin #include "parser.h" 48057ca2d4SBaptiste Daroussin 49057ca2d4SBaptiste Daroussin int com_char = '#'; 50057ca2d4SBaptiste Daroussin int esc_char = '\\'; 51057ca2d4SBaptiste Daroussin int mb_cur_min = 1; 52057ca2d4SBaptiste Daroussin int mb_cur_max = 1; 53057ca2d4SBaptiste Daroussin int lineno = 1; 54057ca2d4SBaptiste Daroussin int warnings = 0; 55057ca2d4SBaptiste Daroussin int is_stdin = 1; 56057ca2d4SBaptiste Daroussin FILE *input; 57057ca2d4SBaptiste Daroussin static int nextline; 58057ca2d4SBaptiste Daroussin //static FILE *input = stdin; 59057ca2d4SBaptiste Daroussin static const char *filename = "<stdin>"; 60057ca2d4SBaptiste Daroussin static int instring = 0; 61057ca2d4SBaptiste Daroussin static int escaped = 0; 62057ca2d4SBaptiste Daroussin 63057ca2d4SBaptiste Daroussin /* 64057ca2d4SBaptiste Daroussin * Token space ... grows on demand. 65057ca2d4SBaptiste Daroussin */ 66057ca2d4SBaptiste Daroussin static char *token = NULL; 67057ca2d4SBaptiste Daroussin static int tokidx; 68057ca2d4SBaptiste Daroussin static int toksz = 0; 69057ca2d4SBaptiste Daroussin static int hadtok = 0; 70057ca2d4SBaptiste Daroussin 71057ca2d4SBaptiste Daroussin /* 72057ca2d4SBaptiste Daroussin * Wide string space ... grows on demand. 73057ca2d4SBaptiste Daroussin */ 74057ca2d4SBaptiste Daroussin static wchar_t *widestr = NULL; 75057ca2d4SBaptiste Daroussin static int wideidx = 0; 76057ca2d4SBaptiste Daroussin static int widesz = 0; 77057ca2d4SBaptiste Daroussin 78057ca2d4SBaptiste Daroussin /* 79057ca2d4SBaptiste Daroussin * The last keyword seen. This is useful to trigger the special lexer rules 80057ca2d4SBaptiste Daroussin * for "copy" and also collating symbols and elements. 81057ca2d4SBaptiste Daroussin */ 82057ca2d4SBaptiste Daroussin int last_kw = 0; 83057ca2d4SBaptiste Daroussin static int category = T_END; 84057ca2d4SBaptiste Daroussin 85057ca2d4SBaptiste Daroussin static struct token { 86057ca2d4SBaptiste Daroussin int id; 87057ca2d4SBaptiste Daroussin const char *name; 88057ca2d4SBaptiste Daroussin } keywords[] = { 89057ca2d4SBaptiste Daroussin { T_COM_CHAR, "comment_char" }, 90057ca2d4SBaptiste Daroussin { T_ESC_CHAR, "escape_char" }, 91057ca2d4SBaptiste Daroussin { T_END, "END" }, 92057ca2d4SBaptiste Daroussin { T_COPY, "copy" }, 93057ca2d4SBaptiste Daroussin { T_MESSAGES, "LC_MESSAGES" }, 94057ca2d4SBaptiste Daroussin { T_YESSTR, "yesstr" }, 95057ca2d4SBaptiste Daroussin { T_YESEXPR, "yesexpr" }, 96057ca2d4SBaptiste Daroussin { T_NOSTR, "nostr" }, 97057ca2d4SBaptiste Daroussin { T_NOEXPR, "noexpr" }, 98057ca2d4SBaptiste Daroussin { T_MONETARY, "LC_MONETARY" }, 99057ca2d4SBaptiste Daroussin { T_INT_CURR_SYMBOL, "int_curr_symbol" }, 100057ca2d4SBaptiste Daroussin { T_CURRENCY_SYMBOL, "currency_symbol" }, 101057ca2d4SBaptiste Daroussin { T_MON_DECIMAL_POINT, "mon_decimal_point" }, 102057ca2d4SBaptiste Daroussin { T_MON_THOUSANDS_SEP, "mon_thousands_sep" }, 103057ca2d4SBaptiste Daroussin { T_POSITIVE_SIGN, "positive_sign" }, 104057ca2d4SBaptiste Daroussin { T_NEGATIVE_SIGN, "negative_sign" }, 105057ca2d4SBaptiste Daroussin { T_MON_GROUPING, "mon_grouping" }, 106057ca2d4SBaptiste Daroussin { T_INT_FRAC_DIGITS, "int_frac_digits" }, 107057ca2d4SBaptiste Daroussin { T_FRAC_DIGITS, "frac_digits" }, 108057ca2d4SBaptiste Daroussin { T_P_CS_PRECEDES, "p_cs_precedes" }, 109057ca2d4SBaptiste Daroussin { T_P_SEP_BY_SPACE, "p_sep_by_space" }, 110057ca2d4SBaptiste Daroussin { T_N_CS_PRECEDES, "n_cs_precedes" }, 111057ca2d4SBaptiste Daroussin { T_N_SEP_BY_SPACE, "n_sep_by_space" }, 112057ca2d4SBaptiste Daroussin { T_P_SIGN_POSN, "p_sign_posn" }, 113057ca2d4SBaptiste Daroussin { T_N_SIGN_POSN, "n_sign_posn" }, 114057ca2d4SBaptiste Daroussin { T_INT_P_CS_PRECEDES, "int_p_cs_precedes" }, 115057ca2d4SBaptiste Daroussin { T_INT_N_CS_PRECEDES, "int_n_cs_precedes" }, 116057ca2d4SBaptiste Daroussin { T_INT_P_SEP_BY_SPACE, "int_p_sep_by_space" }, 117057ca2d4SBaptiste Daroussin { T_INT_N_SEP_BY_SPACE, "int_n_sep_by_space" }, 118057ca2d4SBaptiste Daroussin { T_INT_P_SIGN_POSN, "int_p_sign_posn" }, 119057ca2d4SBaptiste Daroussin { T_INT_N_SIGN_POSN, "int_n_sign_posn" }, 120057ca2d4SBaptiste Daroussin { T_COLLATE, "LC_COLLATE" }, 121057ca2d4SBaptiste Daroussin { T_COLLATING_SYMBOL, "collating-symbol" }, 122057ca2d4SBaptiste Daroussin { T_COLLATING_ELEMENT, "collating-element" }, 123057ca2d4SBaptiste Daroussin { T_FROM, "from" }, 124057ca2d4SBaptiste Daroussin { T_ORDER_START, "order_start" }, 125057ca2d4SBaptiste Daroussin { T_ORDER_END, "order_end" }, 126057ca2d4SBaptiste Daroussin { T_FORWARD, "forward" }, 127057ca2d4SBaptiste Daroussin { T_BACKWARD, "backward" }, 128057ca2d4SBaptiste Daroussin { T_POSITION, "position" }, 129057ca2d4SBaptiste Daroussin { T_IGNORE, "IGNORE" }, 130057ca2d4SBaptiste Daroussin { T_UNDEFINED, "UNDEFINED" }, 131057ca2d4SBaptiste Daroussin { T_NUMERIC, "LC_NUMERIC" }, 132057ca2d4SBaptiste Daroussin { T_DECIMAL_POINT, "decimal_point" }, 133057ca2d4SBaptiste Daroussin { T_THOUSANDS_SEP, "thousands_sep" }, 134057ca2d4SBaptiste Daroussin { T_GROUPING, "grouping" }, 135057ca2d4SBaptiste Daroussin { T_TIME, "LC_TIME" }, 136057ca2d4SBaptiste Daroussin { T_ABDAY, "abday" }, 137057ca2d4SBaptiste Daroussin { T_DAY, "day" }, 138057ca2d4SBaptiste Daroussin { T_ABMON, "abmon" }, 139057ca2d4SBaptiste Daroussin { T_MON, "mon" }, 140057ca2d4SBaptiste Daroussin { T_D_T_FMT, "d_t_fmt" }, 141057ca2d4SBaptiste Daroussin { T_D_FMT, "d_fmt" }, 142057ca2d4SBaptiste Daroussin { T_T_FMT, "t_fmt" }, 143057ca2d4SBaptiste Daroussin { T_AM_PM, "am_pm" }, 144057ca2d4SBaptiste Daroussin { T_T_FMT_AMPM, "t_fmt_ampm" }, 145057ca2d4SBaptiste Daroussin { T_ERA, "era" }, 146057ca2d4SBaptiste Daroussin { T_ERA_D_FMT, "era_d_fmt" }, 147057ca2d4SBaptiste Daroussin { T_ERA_T_FMT, "era_t_fmt" }, 148057ca2d4SBaptiste Daroussin { T_ERA_D_T_FMT, "era_d_t_fmt" }, 149057ca2d4SBaptiste Daroussin { T_ALT_DIGITS, "alt_digits" }, 150057ca2d4SBaptiste Daroussin { T_CTYPE, "LC_CTYPE" }, 151057ca2d4SBaptiste Daroussin { T_ISUPPER, "upper" }, 152057ca2d4SBaptiste Daroussin { T_ISLOWER, "lower" }, 153057ca2d4SBaptiste Daroussin { T_ISALPHA, "alpha" }, 154057ca2d4SBaptiste Daroussin { T_ISDIGIT, "digit" }, 155057ca2d4SBaptiste Daroussin { T_ISPUNCT, "punct" }, 156057ca2d4SBaptiste Daroussin { T_ISXDIGIT, "xdigit" }, 157057ca2d4SBaptiste Daroussin { T_ISSPACE, "space" }, 158057ca2d4SBaptiste Daroussin { T_ISPRINT, "print" }, 159057ca2d4SBaptiste Daroussin { T_ISGRAPH, "graph" }, 160057ca2d4SBaptiste Daroussin { T_ISBLANK, "blank" }, 161057ca2d4SBaptiste Daroussin { T_ISCNTRL, "cntrl" }, 162057ca2d4SBaptiste Daroussin /* 163057ca2d4SBaptiste Daroussin * These entries are local additions, and not specified by 164057ca2d4SBaptiste Daroussin * TOG. Note that they are not guaranteed to be accurate for 165057ca2d4SBaptiste Daroussin * all locales, and so applications should not depend on them. 166057ca2d4SBaptiste Daroussin */ 167057ca2d4SBaptiste Daroussin { T_ISSPECIAL, "special" }, 168057ca2d4SBaptiste Daroussin { T_ISENGLISH, "english" }, 169057ca2d4SBaptiste Daroussin { T_ISPHONOGRAM, "phonogram" }, 170057ca2d4SBaptiste Daroussin { T_ISIDEOGRAM, "ideogram" }, 171057ca2d4SBaptiste Daroussin { T_ISNUMBER, "number" }, 172057ca2d4SBaptiste Daroussin /* 173057ca2d4SBaptiste Daroussin * We have to support this in the grammar, but it would be a 174057ca2d4SBaptiste Daroussin * syntax error to define a character as one of these without 175057ca2d4SBaptiste Daroussin * also defining it as an alpha or digit. We ignore it in our 176057ca2d4SBaptiste Daroussin * parsing. 177057ca2d4SBaptiste Daroussin */ 178057ca2d4SBaptiste Daroussin { T_ISALNUM, "alnum" }, 179057ca2d4SBaptiste Daroussin { T_TOUPPER, "toupper" }, 180057ca2d4SBaptiste Daroussin { T_TOLOWER, "tolower" }, 181057ca2d4SBaptiste Daroussin 182057ca2d4SBaptiste Daroussin /* 183057ca2d4SBaptiste Daroussin * These are keywords used in the charmap file. Note that 184fcc7baa1SPedro F. Giffuni * Solaris originally used angle brackets to wrap some of them, 185057ca2d4SBaptiste Daroussin * but we removed that to simplify our parser. The first of these 186057ca2d4SBaptiste Daroussin * items are "global items." 187057ca2d4SBaptiste Daroussin */ 188057ca2d4SBaptiste Daroussin { T_CHARMAP, "CHARMAP" }, 189057ca2d4SBaptiste Daroussin { T_WIDTH, "WIDTH" }, 190057ca2d4SBaptiste Daroussin 191057ca2d4SBaptiste Daroussin { -1, NULL }, 192057ca2d4SBaptiste Daroussin }; 193057ca2d4SBaptiste Daroussin 194057ca2d4SBaptiste Daroussin /* 195057ca2d4SBaptiste Daroussin * These special words are only used in a charmap file, enclosed in <>. 196057ca2d4SBaptiste Daroussin */ 197057ca2d4SBaptiste Daroussin static struct token symwords[] = { 198057ca2d4SBaptiste Daroussin { T_COM_CHAR, "comment_char" }, 199057ca2d4SBaptiste Daroussin { T_ESC_CHAR, "escape_char" }, 200057ca2d4SBaptiste Daroussin { T_CODE_SET, "code_set_name" }, 201057ca2d4SBaptiste Daroussin { T_MB_CUR_MAX, "mb_cur_max" }, 202057ca2d4SBaptiste Daroussin { T_MB_CUR_MIN, "mb_cur_min" }, 203057ca2d4SBaptiste Daroussin { -1, NULL }, 204057ca2d4SBaptiste Daroussin }; 205057ca2d4SBaptiste Daroussin 206057ca2d4SBaptiste Daroussin static int categories[] = { 207057ca2d4SBaptiste Daroussin T_CHARMAP, 208057ca2d4SBaptiste Daroussin T_CTYPE, 209057ca2d4SBaptiste Daroussin T_COLLATE, 210057ca2d4SBaptiste Daroussin T_MESSAGES, 211057ca2d4SBaptiste Daroussin T_MONETARY, 212057ca2d4SBaptiste Daroussin T_NUMERIC, 213057ca2d4SBaptiste Daroussin T_TIME, 214057ca2d4SBaptiste Daroussin T_WIDTH, 215057ca2d4SBaptiste Daroussin 0 216057ca2d4SBaptiste Daroussin }; 217057ca2d4SBaptiste Daroussin 218057ca2d4SBaptiste Daroussin void 219057ca2d4SBaptiste Daroussin reset_scanner(const char *fname) 220057ca2d4SBaptiste Daroussin { 221057ca2d4SBaptiste Daroussin if (fname == NULL) { 222057ca2d4SBaptiste Daroussin filename = "<stdin>"; 223057ca2d4SBaptiste Daroussin is_stdin = 1; 224057ca2d4SBaptiste Daroussin } else { 225057ca2d4SBaptiste Daroussin if (!is_stdin) 226057ca2d4SBaptiste Daroussin (void) fclose(input); 227057ca2d4SBaptiste Daroussin if ((input = fopen(fname, "r")) == NULL) { 228057ca2d4SBaptiste Daroussin perror("fopen"); 229057ca2d4SBaptiste Daroussin exit(4); 230057ca2d4SBaptiste Daroussin } else { 231057ca2d4SBaptiste Daroussin is_stdin = 0; 232057ca2d4SBaptiste Daroussin } 233057ca2d4SBaptiste Daroussin filename = fname; 234057ca2d4SBaptiste Daroussin } 235057ca2d4SBaptiste Daroussin com_char = '#'; 236057ca2d4SBaptiste Daroussin esc_char = '\\'; 237057ca2d4SBaptiste Daroussin instring = 0; 238057ca2d4SBaptiste Daroussin escaped = 0; 239057ca2d4SBaptiste Daroussin lineno = 1; 240057ca2d4SBaptiste Daroussin nextline = 1; 241057ca2d4SBaptiste Daroussin tokidx = 0; 242057ca2d4SBaptiste Daroussin wideidx = 0; 243057ca2d4SBaptiste Daroussin } 244057ca2d4SBaptiste Daroussin 245057ca2d4SBaptiste Daroussin #define hex(x) \ 246057ca2d4SBaptiste Daroussin (isdigit(x) ? (x - '0') : ((islower(x) ? (x - 'a') : (x - 'A')) + 10)) 247057ca2d4SBaptiste Daroussin #define isodigit(x) ((x >= '0') && (x <= '7')) 248057ca2d4SBaptiste Daroussin 249057ca2d4SBaptiste Daroussin static int 250057ca2d4SBaptiste Daroussin scanc(void) 251057ca2d4SBaptiste Daroussin { 252057ca2d4SBaptiste Daroussin int c; 253057ca2d4SBaptiste Daroussin 254057ca2d4SBaptiste Daroussin if (is_stdin) 255057ca2d4SBaptiste Daroussin c = getc(stdin); 256057ca2d4SBaptiste Daroussin else 257057ca2d4SBaptiste Daroussin c = getc(input); 258057ca2d4SBaptiste Daroussin lineno = nextline; 259057ca2d4SBaptiste Daroussin if (c == '\n') { 260057ca2d4SBaptiste Daroussin nextline++; 261057ca2d4SBaptiste Daroussin } 262057ca2d4SBaptiste Daroussin return (c); 263057ca2d4SBaptiste Daroussin } 264057ca2d4SBaptiste Daroussin 265057ca2d4SBaptiste Daroussin static void 266057ca2d4SBaptiste Daroussin unscanc(int c) 267057ca2d4SBaptiste Daroussin { 268057ca2d4SBaptiste Daroussin if (c == '\n') { 269057ca2d4SBaptiste Daroussin nextline--; 270057ca2d4SBaptiste Daroussin } 271057ca2d4SBaptiste Daroussin if (ungetc(c, is_stdin ? stdin : input) < 0) { 272057ca2d4SBaptiste Daroussin yyerror("ungetc failed"); 273057ca2d4SBaptiste Daroussin } 274057ca2d4SBaptiste Daroussin } 275057ca2d4SBaptiste Daroussin 276057ca2d4SBaptiste Daroussin static int 277057ca2d4SBaptiste Daroussin scan_hex_byte(void) 278057ca2d4SBaptiste Daroussin { 279057ca2d4SBaptiste Daroussin int c1, c2; 280057ca2d4SBaptiste Daroussin int v; 281057ca2d4SBaptiste Daroussin 282057ca2d4SBaptiste Daroussin c1 = scanc(); 283057ca2d4SBaptiste Daroussin if (!isxdigit(c1)) { 284057ca2d4SBaptiste Daroussin yyerror("malformed hex digit"); 285057ca2d4SBaptiste Daroussin return (0); 286057ca2d4SBaptiste Daroussin } 287057ca2d4SBaptiste Daroussin c2 = scanc(); 288057ca2d4SBaptiste Daroussin if (!isxdigit(c2)) { 289057ca2d4SBaptiste Daroussin yyerror("malformed hex digit"); 290057ca2d4SBaptiste Daroussin return (0); 291057ca2d4SBaptiste Daroussin } 292057ca2d4SBaptiste Daroussin v = ((hex(c1) << 4) | hex(c2)); 293057ca2d4SBaptiste Daroussin return (v); 294057ca2d4SBaptiste Daroussin } 295057ca2d4SBaptiste Daroussin 296057ca2d4SBaptiste Daroussin static int 297057ca2d4SBaptiste Daroussin scan_dec_byte(void) 298057ca2d4SBaptiste Daroussin { 299057ca2d4SBaptiste Daroussin int c1, c2, c3; 300057ca2d4SBaptiste Daroussin int b; 301057ca2d4SBaptiste Daroussin 302057ca2d4SBaptiste Daroussin c1 = scanc(); 303057ca2d4SBaptiste Daroussin if (!isdigit(c1)) { 304057ca2d4SBaptiste Daroussin yyerror("malformed decimal digit"); 305057ca2d4SBaptiste Daroussin return (0); 306057ca2d4SBaptiste Daroussin } 307057ca2d4SBaptiste Daroussin b = c1 - '0'; 308057ca2d4SBaptiste Daroussin c2 = scanc(); 309057ca2d4SBaptiste Daroussin if (!isdigit(c2)) { 310057ca2d4SBaptiste Daroussin yyerror("malformed decimal digit"); 311057ca2d4SBaptiste Daroussin return (0); 312057ca2d4SBaptiste Daroussin } 313057ca2d4SBaptiste Daroussin b *= 10; 314057ca2d4SBaptiste Daroussin b += (c2 - '0'); 315057ca2d4SBaptiste Daroussin c3 = scanc(); 316057ca2d4SBaptiste Daroussin if (!isdigit(c3)) { 317057ca2d4SBaptiste Daroussin unscanc(c3); 318057ca2d4SBaptiste Daroussin } else { 319057ca2d4SBaptiste Daroussin b *= 10; 320057ca2d4SBaptiste Daroussin b += (c3 - '0'); 321057ca2d4SBaptiste Daroussin } 322057ca2d4SBaptiste Daroussin return (b); 323057ca2d4SBaptiste Daroussin } 324057ca2d4SBaptiste Daroussin 325057ca2d4SBaptiste Daroussin static int 326057ca2d4SBaptiste Daroussin scan_oct_byte(void) 327057ca2d4SBaptiste Daroussin { 328057ca2d4SBaptiste Daroussin int c1, c2, c3; 329057ca2d4SBaptiste Daroussin int b; 330057ca2d4SBaptiste Daroussin 331057ca2d4SBaptiste Daroussin b = 0; 332057ca2d4SBaptiste Daroussin 333057ca2d4SBaptiste Daroussin c1 = scanc(); 334057ca2d4SBaptiste Daroussin if (!isodigit(c1)) { 335057ca2d4SBaptiste Daroussin yyerror("malformed octal digit"); 336057ca2d4SBaptiste Daroussin return (0); 337057ca2d4SBaptiste Daroussin } 338057ca2d4SBaptiste Daroussin b = c1 - '0'; 339057ca2d4SBaptiste Daroussin c2 = scanc(); 340057ca2d4SBaptiste Daroussin if (!isodigit(c2)) { 341057ca2d4SBaptiste Daroussin yyerror("malformed octal digit"); 342057ca2d4SBaptiste Daroussin return (0); 343057ca2d4SBaptiste Daroussin } 344057ca2d4SBaptiste Daroussin b *= 8; 345057ca2d4SBaptiste Daroussin b += (c2 - '0'); 346057ca2d4SBaptiste Daroussin c3 = scanc(); 347057ca2d4SBaptiste Daroussin if (!isodigit(c3)) { 348057ca2d4SBaptiste Daroussin unscanc(c3); 349057ca2d4SBaptiste Daroussin } else { 350057ca2d4SBaptiste Daroussin b *= 8; 351057ca2d4SBaptiste Daroussin b += (c3 - '0'); 352057ca2d4SBaptiste Daroussin } 353057ca2d4SBaptiste Daroussin return (b); 354057ca2d4SBaptiste Daroussin } 355057ca2d4SBaptiste Daroussin 356057ca2d4SBaptiste Daroussin void 357057ca2d4SBaptiste Daroussin add_tok(int c) 358057ca2d4SBaptiste Daroussin { 359057ca2d4SBaptiste Daroussin if ((tokidx + 1) >= toksz) { 360057ca2d4SBaptiste Daroussin toksz += 64; 361057ca2d4SBaptiste Daroussin if ((token = realloc(token, toksz)) == NULL) { 362057ca2d4SBaptiste Daroussin yyerror("out of memory"); 363057ca2d4SBaptiste Daroussin tokidx = 0; 364057ca2d4SBaptiste Daroussin toksz = 0; 365057ca2d4SBaptiste Daroussin return; 366057ca2d4SBaptiste Daroussin } 367057ca2d4SBaptiste Daroussin } 368057ca2d4SBaptiste Daroussin 369057ca2d4SBaptiste Daroussin token[tokidx++] = (char)c; 370057ca2d4SBaptiste Daroussin token[tokidx] = 0; 371057ca2d4SBaptiste Daroussin } 372057ca2d4SBaptiste Daroussin void 373057ca2d4SBaptiste Daroussin add_wcs(wchar_t c) 374057ca2d4SBaptiste Daroussin { 375057ca2d4SBaptiste Daroussin if ((wideidx + 1) >= widesz) { 376057ca2d4SBaptiste Daroussin widesz += 64; 377057ca2d4SBaptiste Daroussin widestr = realloc(widestr, (widesz * sizeof (wchar_t))); 378057ca2d4SBaptiste Daroussin if (widestr == NULL) { 379057ca2d4SBaptiste Daroussin yyerror("out of memory"); 380057ca2d4SBaptiste Daroussin wideidx = 0; 381057ca2d4SBaptiste Daroussin widesz = 0; 382057ca2d4SBaptiste Daroussin return; 383057ca2d4SBaptiste Daroussin } 384057ca2d4SBaptiste Daroussin } 385057ca2d4SBaptiste Daroussin 386057ca2d4SBaptiste Daroussin widestr[wideidx++] = c; 387057ca2d4SBaptiste Daroussin widestr[wideidx] = 0; 388057ca2d4SBaptiste Daroussin } 389057ca2d4SBaptiste Daroussin 390057ca2d4SBaptiste Daroussin wchar_t * 391057ca2d4SBaptiste Daroussin get_wcs(void) 392057ca2d4SBaptiste Daroussin { 393057ca2d4SBaptiste Daroussin wchar_t *ws = widestr; 394057ca2d4SBaptiste Daroussin wideidx = 0; 395057ca2d4SBaptiste Daroussin widestr = NULL; 396057ca2d4SBaptiste Daroussin widesz = 0; 397057ca2d4SBaptiste Daroussin if (ws == NULL) { 398057ca2d4SBaptiste Daroussin if ((ws = wcsdup(L"")) == NULL) { 399057ca2d4SBaptiste Daroussin yyerror("out of memory"); 400057ca2d4SBaptiste Daroussin } 401057ca2d4SBaptiste Daroussin } 402057ca2d4SBaptiste Daroussin return (ws); 403057ca2d4SBaptiste Daroussin } 404057ca2d4SBaptiste Daroussin 405057ca2d4SBaptiste Daroussin static int 406057ca2d4SBaptiste Daroussin get_byte(void) 407057ca2d4SBaptiste Daroussin { 408057ca2d4SBaptiste Daroussin int c; 409057ca2d4SBaptiste Daroussin 410057ca2d4SBaptiste Daroussin if ((c = scanc()) != esc_char) { 411057ca2d4SBaptiste Daroussin unscanc(c); 412057ca2d4SBaptiste Daroussin return (EOF); 413057ca2d4SBaptiste Daroussin } 414057ca2d4SBaptiste Daroussin c = scanc(); 415057ca2d4SBaptiste Daroussin 416057ca2d4SBaptiste Daroussin switch (c) { 417057ca2d4SBaptiste Daroussin case 'd': 418057ca2d4SBaptiste Daroussin case 'D': 419057ca2d4SBaptiste Daroussin return (scan_dec_byte()); 420057ca2d4SBaptiste Daroussin case 'x': 421057ca2d4SBaptiste Daroussin case 'X': 422057ca2d4SBaptiste Daroussin return (scan_hex_byte()); 423057ca2d4SBaptiste Daroussin case '0': 424057ca2d4SBaptiste Daroussin case '1': 425057ca2d4SBaptiste Daroussin case '2': 426057ca2d4SBaptiste Daroussin case '3': 427057ca2d4SBaptiste Daroussin case '4': 428057ca2d4SBaptiste Daroussin case '5': 429057ca2d4SBaptiste Daroussin case '6': 430057ca2d4SBaptiste Daroussin case '7': 431057ca2d4SBaptiste Daroussin /* put the character back so we can get it */ 432057ca2d4SBaptiste Daroussin unscanc(c); 433057ca2d4SBaptiste Daroussin return (scan_oct_byte()); 434057ca2d4SBaptiste Daroussin default: 435057ca2d4SBaptiste Daroussin unscanc(c); 436057ca2d4SBaptiste Daroussin unscanc(esc_char); 437057ca2d4SBaptiste Daroussin return (EOF); 438057ca2d4SBaptiste Daroussin } 439057ca2d4SBaptiste Daroussin } 440057ca2d4SBaptiste Daroussin 441057ca2d4SBaptiste Daroussin int 442057ca2d4SBaptiste Daroussin get_escaped(int c) 443057ca2d4SBaptiste Daroussin { 444057ca2d4SBaptiste Daroussin switch (c) { 445057ca2d4SBaptiste Daroussin case 'n': 446057ca2d4SBaptiste Daroussin return ('\n'); 447057ca2d4SBaptiste Daroussin case 'r': 448057ca2d4SBaptiste Daroussin return ('\r'); 449057ca2d4SBaptiste Daroussin case 't': 450057ca2d4SBaptiste Daroussin return ('\t'); 451057ca2d4SBaptiste Daroussin case 'f': 452057ca2d4SBaptiste Daroussin return ('\f'); 453057ca2d4SBaptiste Daroussin case 'v': 454057ca2d4SBaptiste Daroussin return ('\v'); 455057ca2d4SBaptiste Daroussin case 'b': 456057ca2d4SBaptiste Daroussin return ('\b'); 457057ca2d4SBaptiste Daroussin case 'a': 458057ca2d4SBaptiste Daroussin return ('\a'); 459057ca2d4SBaptiste Daroussin default: 460057ca2d4SBaptiste Daroussin return (c); 461057ca2d4SBaptiste Daroussin } 462057ca2d4SBaptiste Daroussin } 463057ca2d4SBaptiste Daroussin 464057ca2d4SBaptiste Daroussin int 465057ca2d4SBaptiste Daroussin get_wide(void) 466057ca2d4SBaptiste Daroussin { 467057ca2d4SBaptiste Daroussin static char mbs[MB_LEN_MAX + 1] = ""; 468057ca2d4SBaptiste Daroussin static int mbi = 0; 469057ca2d4SBaptiste Daroussin int c; 470057ca2d4SBaptiste Daroussin wchar_t wc; 471057ca2d4SBaptiste Daroussin 472057ca2d4SBaptiste Daroussin if (mb_cur_max >= (int)sizeof (mbs)) { 473057ca2d4SBaptiste Daroussin yyerror("max multibyte character size too big"); 474057ca2d4SBaptiste Daroussin mbi = 0; 475057ca2d4SBaptiste Daroussin return (T_NULL); 476057ca2d4SBaptiste Daroussin } 477057ca2d4SBaptiste Daroussin for (;;) { 478057ca2d4SBaptiste Daroussin if ((mbi == mb_cur_max) || ((c = get_byte()) == EOF)) { 479057ca2d4SBaptiste Daroussin /* 480057ca2d4SBaptiste Daroussin * end of the byte sequence reached, but no 481057ca2d4SBaptiste Daroussin * valid wide decoding. fatal error. 482057ca2d4SBaptiste Daroussin */ 483057ca2d4SBaptiste Daroussin mbi = 0; 484057ca2d4SBaptiste Daroussin yyerror("not a valid character encoding"); 485057ca2d4SBaptiste Daroussin return (T_NULL); 486057ca2d4SBaptiste Daroussin } 487057ca2d4SBaptiste Daroussin mbs[mbi++] = c; 488057ca2d4SBaptiste Daroussin mbs[mbi] = 0; 489057ca2d4SBaptiste Daroussin 490057ca2d4SBaptiste Daroussin /* does it decode? */ 491057ca2d4SBaptiste Daroussin if (to_wide(&wc, mbs) >= 0) { 492057ca2d4SBaptiste Daroussin break; 493057ca2d4SBaptiste Daroussin } 494057ca2d4SBaptiste Daroussin } 495057ca2d4SBaptiste Daroussin 496057ca2d4SBaptiste Daroussin mbi = 0; 497057ca2d4SBaptiste Daroussin if ((category != T_CHARMAP) && (category != T_WIDTH)) { 498057ca2d4SBaptiste Daroussin if (check_charmap(wc) < 0) { 499057ca2d4SBaptiste Daroussin yyerror("no symbolic name for character"); 500057ca2d4SBaptiste Daroussin return (T_NULL); 501057ca2d4SBaptiste Daroussin } 502057ca2d4SBaptiste Daroussin } 503057ca2d4SBaptiste Daroussin 504057ca2d4SBaptiste Daroussin yylval.wc = wc; 505057ca2d4SBaptiste Daroussin return (T_CHAR); 506057ca2d4SBaptiste Daroussin } 507057ca2d4SBaptiste Daroussin 508057ca2d4SBaptiste Daroussin int 509057ca2d4SBaptiste Daroussin get_symbol(void) 510057ca2d4SBaptiste Daroussin { 511057ca2d4SBaptiste Daroussin int c; 512057ca2d4SBaptiste Daroussin 513057ca2d4SBaptiste Daroussin while ((c = scanc()) != EOF) { 514057ca2d4SBaptiste Daroussin if (escaped) { 515057ca2d4SBaptiste Daroussin escaped = 0; 516057ca2d4SBaptiste Daroussin if (c == '\n') 517057ca2d4SBaptiste Daroussin continue; 518057ca2d4SBaptiste Daroussin add_tok(get_escaped(c)); 519057ca2d4SBaptiste Daroussin continue; 520057ca2d4SBaptiste Daroussin } 521057ca2d4SBaptiste Daroussin if (c == esc_char) { 522057ca2d4SBaptiste Daroussin escaped = 1; 523057ca2d4SBaptiste Daroussin continue; 524057ca2d4SBaptiste Daroussin } 525057ca2d4SBaptiste Daroussin if (c == '\n') { /* well that's strange! */ 526057ca2d4SBaptiste Daroussin yyerror("unterminated symbolic name"); 527057ca2d4SBaptiste Daroussin continue; 528057ca2d4SBaptiste Daroussin } 529057ca2d4SBaptiste Daroussin if (c == '>') { /* end of symbol */ 530057ca2d4SBaptiste Daroussin 531057ca2d4SBaptiste Daroussin /* 532057ca2d4SBaptiste Daroussin * This restarts the token from the beginning 533057ca2d4SBaptiste Daroussin * the next time we scan a character. (This 534057ca2d4SBaptiste Daroussin * token is complete.) 535057ca2d4SBaptiste Daroussin */ 536057ca2d4SBaptiste Daroussin 537057ca2d4SBaptiste Daroussin if (token == NULL) { 538057ca2d4SBaptiste Daroussin yyerror("missing symbolic name"); 539057ca2d4SBaptiste Daroussin return (T_NULL); 540057ca2d4SBaptiste Daroussin } 541057ca2d4SBaptiste Daroussin tokidx = 0; 542057ca2d4SBaptiste Daroussin 543057ca2d4SBaptiste Daroussin /* 544057ca2d4SBaptiste Daroussin * A few symbols are handled as keywords outside 545057ca2d4SBaptiste Daroussin * of the normal categories. 546057ca2d4SBaptiste Daroussin */ 547057ca2d4SBaptiste Daroussin if (category == T_END) { 548057ca2d4SBaptiste Daroussin int i; 549057ca2d4SBaptiste Daroussin for (i = 0; symwords[i].name != 0; i++) { 550057ca2d4SBaptiste Daroussin if (strcmp(token, symwords[i].name) == 551057ca2d4SBaptiste Daroussin 0) { 552057ca2d4SBaptiste Daroussin last_kw = symwords[i].id; 553057ca2d4SBaptiste Daroussin return (last_kw); 554057ca2d4SBaptiste Daroussin } 555057ca2d4SBaptiste Daroussin } 556057ca2d4SBaptiste Daroussin } 557057ca2d4SBaptiste Daroussin /* 558057ca2d4SBaptiste Daroussin * Contextual rule: Only literal characters are 559057ca2d4SBaptiste Daroussin * permitted in CHARMAP. Anywhere else the symbolic 560057ca2d4SBaptiste Daroussin * forms are fine. 561057ca2d4SBaptiste Daroussin */ 562057ca2d4SBaptiste Daroussin if ((category != T_CHARMAP) && 563057ca2d4SBaptiste Daroussin (lookup_charmap(token, &yylval.wc)) != -1) { 564057ca2d4SBaptiste Daroussin return (T_CHAR); 565057ca2d4SBaptiste Daroussin } 566057ca2d4SBaptiste Daroussin if ((yylval.collsym = lookup_collsym(token)) != NULL) { 567057ca2d4SBaptiste Daroussin return (T_COLLSYM); 568057ca2d4SBaptiste Daroussin } 569057ca2d4SBaptiste Daroussin if ((yylval.collelem = lookup_collelem(token)) != 570057ca2d4SBaptiste Daroussin NULL) { 571057ca2d4SBaptiste Daroussin return (T_COLLELEM); 572057ca2d4SBaptiste Daroussin } 573057ca2d4SBaptiste Daroussin /* its an undefined symbol */ 574057ca2d4SBaptiste Daroussin yylval.token = strdup(token); 575057ca2d4SBaptiste Daroussin token = NULL; 576057ca2d4SBaptiste Daroussin toksz = 0; 577057ca2d4SBaptiste Daroussin tokidx = 0; 578057ca2d4SBaptiste Daroussin return (T_SYMBOL); 579057ca2d4SBaptiste Daroussin } 580057ca2d4SBaptiste Daroussin add_tok(c); 581057ca2d4SBaptiste Daroussin } 582057ca2d4SBaptiste Daroussin 583057ca2d4SBaptiste Daroussin yyerror("unterminated symbolic name"); 584057ca2d4SBaptiste Daroussin return (EOF); 585057ca2d4SBaptiste Daroussin } 586057ca2d4SBaptiste Daroussin 587057ca2d4SBaptiste Daroussin int 588057ca2d4SBaptiste Daroussin get_category(void) 589057ca2d4SBaptiste Daroussin { 590057ca2d4SBaptiste Daroussin return (category); 591057ca2d4SBaptiste Daroussin } 592057ca2d4SBaptiste Daroussin 593057ca2d4SBaptiste Daroussin static int 594057ca2d4SBaptiste Daroussin consume_token(void) 595057ca2d4SBaptiste Daroussin { 596057ca2d4SBaptiste Daroussin int len = tokidx; 597057ca2d4SBaptiste Daroussin int i; 598057ca2d4SBaptiste Daroussin 599057ca2d4SBaptiste Daroussin tokidx = 0; 600057ca2d4SBaptiste Daroussin if (token == NULL) 601057ca2d4SBaptiste Daroussin return (T_NULL); 602057ca2d4SBaptiste Daroussin 603057ca2d4SBaptiste Daroussin /* 604057ca2d4SBaptiste Daroussin * this one is special, because we don't want it to alter the 605057ca2d4SBaptiste Daroussin * last_kw field. 606057ca2d4SBaptiste Daroussin */ 607057ca2d4SBaptiste Daroussin if (strcmp(token, "...") == 0) { 608057ca2d4SBaptiste Daroussin return (T_ELLIPSIS); 609057ca2d4SBaptiste Daroussin } 610057ca2d4SBaptiste Daroussin 611057ca2d4SBaptiste Daroussin /* search for reserved words first */ 612057ca2d4SBaptiste Daroussin for (i = 0; keywords[i].name; i++) { 613057ca2d4SBaptiste Daroussin int j; 614057ca2d4SBaptiste Daroussin if (strcmp(keywords[i].name, token) != 0) { 615057ca2d4SBaptiste Daroussin continue; 616057ca2d4SBaptiste Daroussin } 617057ca2d4SBaptiste Daroussin 618057ca2d4SBaptiste Daroussin last_kw = keywords[i].id; 619057ca2d4SBaptiste Daroussin 620057ca2d4SBaptiste Daroussin /* clear the top level category if we're done with it */ 621057ca2d4SBaptiste Daroussin if (last_kw == T_END) { 622057ca2d4SBaptiste Daroussin category = T_END; 623057ca2d4SBaptiste Daroussin } 624057ca2d4SBaptiste Daroussin 625057ca2d4SBaptiste Daroussin /* set the top level category if we're changing */ 626057ca2d4SBaptiste Daroussin for (j = 0; categories[j]; j++) { 627057ca2d4SBaptiste Daroussin if (categories[j] != last_kw) 628057ca2d4SBaptiste Daroussin continue; 629057ca2d4SBaptiste Daroussin category = last_kw; 630057ca2d4SBaptiste Daroussin } 631057ca2d4SBaptiste Daroussin 632057ca2d4SBaptiste Daroussin return (keywords[i].id); 633057ca2d4SBaptiste Daroussin } 634057ca2d4SBaptiste Daroussin 635057ca2d4SBaptiste Daroussin /* maybe its a numeric constant? */ 636057ca2d4SBaptiste Daroussin if (isdigit(*token) || (*token == '-' && isdigit(token[1]))) { 637057ca2d4SBaptiste Daroussin char *eptr; 638057ca2d4SBaptiste Daroussin yylval.num = strtol(token, &eptr, 10); 639057ca2d4SBaptiste Daroussin if (*eptr != 0) 640057ca2d4SBaptiste Daroussin yyerror("malformed number"); 641057ca2d4SBaptiste Daroussin return (T_NUMBER); 642057ca2d4SBaptiste Daroussin } 643057ca2d4SBaptiste Daroussin 644057ca2d4SBaptiste Daroussin /* 645057ca2d4SBaptiste Daroussin * A single lone character is treated as a character literal. 646057ca2d4SBaptiste Daroussin * To avoid duplication of effort, we stick in the charmap. 647057ca2d4SBaptiste Daroussin */ 648057ca2d4SBaptiste Daroussin if (len == 1) { 649057ca2d4SBaptiste Daroussin yylval.wc = token[0]; 650057ca2d4SBaptiste Daroussin return (T_CHAR); 651057ca2d4SBaptiste Daroussin } 652057ca2d4SBaptiste Daroussin 653057ca2d4SBaptiste Daroussin /* anything else is treated as a symbolic name */ 654057ca2d4SBaptiste Daroussin yylval.token = strdup(token); 655057ca2d4SBaptiste Daroussin token = NULL; 656057ca2d4SBaptiste Daroussin toksz = 0; 657057ca2d4SBaptiste Daroussin tokidx = 0; 658057ca2d4SBaptiste Daroussin return (T_NAME); 659057ca2d4SBaptiste Daroussin } 660057ca2d4SBaptiste Daroussin 661057ca2d4SBaptiste Daroussin void 662057ca2d4SBaptiste Daroussin scan_to_eol(void) 663057ca2d4SBaptiste Daroussin { 664057ca2d4SBaptiste Daroussin int c; 665057ca2d4SBaptiste Daroussin while ((c = scanc()) != '\n') { 666057ca2d4SBaptiste Daroussin if (c == EOF) { 667057ca2d4SBaptiste Daroussin /* end of file without newline! */ 668057ca2d4SBaptiste Daroussin errf("missing newline"); 669057ca2d4SBaptiste Daroussin return; 670057ca2d4SBaptiste Daroussin } 671057ca2d4SBaptiste Daroussin } 672057ca2d4SBaptiste Daroussin assert(c == '\n'); 673057ca2d4SBaptiste Daroussin } 674057ca2d4SBaptiste Daroussin 675057ca2d4SBaptiste Daroussin int 676057ca2d4SBaptiste Daroussin yylex(void) 677057ca2d4SBaptiste Daroussin { 678057ca2d4SBaptiste Daroussin int c; 679057ca2d4SBaptiste Daroussin 680057ca2d4SBaptiste Daroussin while ((c = scanc()) != EOF) { 681057ca2d4SBaptiste Daroussin 682057ca2d4SBaptiste Daroussin /* special handling for quoted string */ 683057ca2d4SBaptiste Daroussin if (instring) { 684057ca2d4SBaptiste Daroussin if (escaped) { 685057ca2d4SBaptiste Daroussin escaped = 0; 686057ca2d4SBaptiste Daroussin 687057ca2d4SBaptiste Daroussin /* if newline, just eat and forget it */ 688057ca2d4SBaptiste Daroussin if (c == '\n') 689057ca2d4SBaptiste Daroussin continue; 690057ca2d4SBaptiste Daroussin 691057ca2d4SBaptiste Daroussin if (strchr("xXd01234567", c)) { 692057ca2d4SBaptiste Daroussin unscanc(c); 693057ca2d4SBaptiste Daroussin unscanc(esc_char); 694057ca2d4SBaptiste Daroussin return (get_wide()); 695057ca2d4SBaptiste Daroussin } 696057ca2d4SBaptiste Daroussin yylval.wc = get_escaped(c); 697057ca2d4SBaptiste Daroussin return (T_CHAR); 698057ca2d4SBaptiste Daroussin } 699057ca2d4SBaptiste Daroussin if (c == esc_char) { 700057ca2d4SBaptiste Daroussin escaped = 1; 701057ca2d4SBaptiste Daroussin continue; 702057ca2d4SBaptiste Daroussin } 703057ca2d4SBaptiste Daroussin switch (c) { 704057ca2d4SBaptiste Daroussin case '<': 705057ca2d4SBaptiste Daroussin return (get_symbol()); 706057ca2d4SBaptiste Daroussin case '>': 707057ca2d4SBaptiste Daroussin /* oops! should generate syntax error */ 708057ca2d4SBaptiste Daroussin return (T_GT); 709057ca2d4SBaptiste Daroussin case '"': 710057ca2d4SBaptiste Daroussin instring = 0; 711057ca2d4SBaptiste Daroussin return (T_QUOTE); 712057ca2d4SBaptiste Daroussin default: 713057ca2d4SBaptiste Daroussin yylval.wc = c; 714057ca2d4SBaptiste Daroussin return (T_CHAR); 715057ca2d4SBaptiste Daroussin } 716057ca2d4SBaptiste Daroussin } 717057ca2d4SBaptiste Daroussin 718057ca2d4SBaptiste Daroussin /* escaped characters first */ 719057ca2d4SBaptiste Daroussin if (escaped) { 720057ca2d4SBaptiste Daroussin escaped = 0; 721057ca2d4SBaptiste Daroussin if (c == '\n') { 722057ca2d4SBaptiste Daroussin /* eat the newline */ 723057ca2d4SBaptiste Daroussin continue; 724057ca2d4SBaptiste Daroussin } 725057ca2d4SBaptiste Daroussin hadtok = 1; 726057ca2d4SBaptiste Daroussin if (tokidx) { 727057ca2d4SBaptiste Daroussin /* an escape mid-token is nonsense */ 728057ca2d4SBaptiste Daroussin return (T_NULL); 729057ca2d4SBaptiste Daroussin } 730057ca2d4SBaptiste Daroussin 731057ca2d4SBaptiste Daroussin /* numeric escapes are treated as wide characters */ 732057ca2d4SBaptiste Daroussin if (strchr("xXd01234567", c)) { 733057ca2d4SBaptiste Daroussin unscanc(c); 734057ca2d4SBaptiste Daroussin unscanc(esc_char); 735057ca2d4SBaptiste Daroussin return (get_wide()); 736057ca2d4SBaptiste Daroussin } 737057ca2d4SBaptiste Daroussin 738057ca2d4SBaptiste Daroussin add_tok(get_escaped(c)); 739057ca2d4SBaptiste Daroussin continue; 740057ca2d4SBaptiste Daroussin } 741057ca2d4SBaptiste Daroussin 742057ca2d4SBaptiste Daroussin /* if it is the escape charter itself note it */ 743057ca2d4SBaptiste Daroussin if (c == esc_char) { 744057ca2d4SBaptiste Daroussin escaped = 1; 745057ca2d4SBaptiste Daroussin continue; 746057ca2d4SBaptiste Daroussin } 747057ca2d4SBaptiste Daroussin 748057ca2d4SBaptiste Daroussin /* remove from the comment char to end of line */ 749057ca2d4SBaptiste Daroussin if (c == com_char) { 750057ca2d4SBaptiste Daroussin while (c != '\n') { 751057ca2d4SBaptiste Daroussin if ((c = scanc()) == EOF) { 752057ca2d4SBaptiste Daroussin /* end of file without newline! */ 753057ca2d4SBaptiste Daroussin return (EOF); 754057ca2d4SBaptiste Daroussin } 755057ca2d4SBaptiste Daroussin } 756057ca2d4SBaptiste Daroussin assert(c == '\n'); 757057ca2d4SBaptiste Daroussin if (!hadtok) { 758057ca2d4SBaptiste Daroussin /* 759057ca2d4SBaptiste Daroussin * If there were no tokens on this line, 760057ca2d4SBaptiste Daroussin * then just pretend it didn't exist at all. 761057ca2d4SBaptiste Daroussin */ 762057ca2d4SBaptiste Daroussin continue; 763057ca2d4SBaptiste Daroussin } 764057ca2d4SBaptiste Daroussin hadtok = 0; 765057ca2d4SBaptiste Daroussin return (T_NL); 766057ca2d4SBaptiste Daroussin } 767057ca2d4SBaptiste Daroussin 768057ca2d4SBaptiste Daroussin if (strchr(" \t\n;()<>,\"", c) && (tokidx != 0)) { 769057ca2d4SBaptiste Daroussin /* 770057ca2d4SBaptiste Daroussin * These are all token delimiters. If there 771057ca2d4SBaptiste Daroussin * is a token already in progress, we need to 772057ca2d4SBaptiste Daroussin * process it. 773057ca2d4SBaptiste Daroussin */ 774057ca2d4SBaptiste Daroussin unscanc(c); 775057ca2d4SBaptiste Daroussin return (consume_token()); 776057ca2d4SBaptiste Daroussin } 777057ca2d4SBaptiste Daroussin 778057ca2d4SBaptiste Daroussin switch (c) { 779057ca2d4SBaptiste Daroussin case '\n': 780057ca2d4SBaptiste Daroussin if (!hadtok) { 781057ca2d4SBaptiste Daroussin /* 782057ca2d4SBaptiste Daroussin * If the line was completely devoid of tokens, 783057ca2d4SBaptiste Daroussin * then just ignore it. 784057ca2d4SBaptiste Daroussin */ 785057ca2d4SBaptiste Daroussin continue; 786057ca2d4SBaptiste Daroussin } 787057ca2d4SBaptiste Daroussin /* we're starting a new line, reset the token state */ 788057ca2d4SBaptiste Daroussin hadtok = 0; 789057ca2d4SBaptiste Daroussin return (T_NL); 790057ca2d4SBaptiste Daroussin case ',': 791057ca2d4SBaptiste Daroussin hadtok = 1; 792057ca2d4SBaptiste Daroussin return (T_COMMA); 793057ca2d4SBaptiste Daroussin case ';': 794057ca2d4SBaptiste Daroussin hadtok = 1; 795057ca2d4SBaptiste Daroussin return (T_SEMI); 796057ca2d4SBaptiste Daroussin case '(': 797057ca2d4SBaptiste Daroussin hadtok = 1; 798057ca2d4SBaptiste Daroussin return (T_LPAREN); 799057ca2d4SBaptiste Daroussin case ')': 800057ca2d4SBaptiste Daroussin hadtok = 1; 801057ca2d4SBaptiste Daroussin return (T_RPAREN); 802057ca2d4SBaptiste Daroussin case '>': 803057ca2d4SBaptiste Daroussin hadtok = 1; 804057ca2d4SBaptiste Daroussin return (T_GT); 805057ca2d4SBaptiste Daroussin case '<': 806057ca2d4SBaptiste Daroussin /* symbol start! */ 807057ca2d4SBaptiste Daroussin hadtok = 1; 808057ca2d4SBaptiste Daroussin return (get_symbol()); 809057ca2d4SBaptiste Daroussin case ' ': 810057ca2d4SBaptiste Daroussin case '\t': 811057ca2d4SBaptiste Daroussin /* whitespace, just ignore it */ 812057ca2d4SBaptiste Daroussin continue; 813057ca2d4SBaptiste Daroussin case '"': 814057ca2d4SBaptiste Daroussin hadtok = 1; 815057ca2d4SBaptiste Daroussin instring = 1; 816057ca2d4SBaptiste Daroussin return (T_QUOTE); 817057ca2d4SBaptiste Daroussin default: 818057ca2d4SBaptiste Daroussin hadtok = 1; 819057ca2d4SBaptiste Daroussin add_tok(c); 820057ca2d4SBaptiste Daroussin continue; 821057ca2d4SBaptiste Daroussin } 822057ca2d4SBaptiste Daroussin } 823057ca2d4SBaptiste Daroussin return (EOF); 824057ca2d4SBaptiste Daroussin } 825057ca2d4SBaptiste Daroussin 826057ca2d4SBaptiste Daroussin void 827057ca2d4SBaptiste Daroussin yyerror(const char *msg) 828057ca2d4SBaptiste Daroussin { 829057ca2d4SBaptiste Daroussin (void) fprintf(stderr, "%s: %d: error: %s\n", 830057ca2d4SBaptiste Daroussin filename, lineno, msg); 831057ca2d4SBaptiste Daroussin exit(4); 832057ca2d4SBaptiste Daroussin } 833057ca2d4SBaptiste Daroussin 834057ca2d4SBaptiste Daroussin void 835057ca2d4SBaptiste Daroussin errf(const char *fmt, ...) 836057ca2d4SBaptiste Daroussin { 837057ca2d4SBaptiste Daroussin char *msg; 838057ca2d4SBaptiste Daroussin 839057ca2d4SBaptiste Daroussin va_list va; 840057ca2d4SBaptiste Daroussin va_start(va, fmt); 841057ca2d4SBaptiste Daroussin (void) vasprintf(&msg, fmt, va); 842057ca2d4SBaptiste Daroussin va_end(va); 843057ca2d4SBaptiste Daroussin 844057ca2d4SBaptiste Daroussin (void) fprintf(stderr, "%s: %d: error: %s\n", 845057ca2d4SBaptiste Daroussin filename, lineno, msg); 846057ca2d4SBaptiste Daroussin free(msg); 847057ca2d4SBaptiste Daroussin exit(4); 848057ca2d4SBaptiste Daroussin } 849057ca2d4SBaptiste Daroussin 850057ca2d4SBaptiste Daroussin void 851057ca2d4SBaptiste Daroussin warn(const char *fmt, ...) 852057ca2d4SBaptiste Daroussin { 853057ca2d4SBaptiste Daroussin char *msg; 854057ca2d4SBaptiste Daroussin 855057ca2d4SBaptiste Daroussin va_list va; 856057ca2d4SBaptiste Daroussin va_start(va, fmt); 857057ca2d4SBaptiste Daroussin (void) vasprintf(&msg, fmt, va); 858057ca2d4SBaptiste Daroussin va_end(va); 859057ca2d4SBaptiste Daroussin 860057ca2d4SBaptiste Daroussin (void) fprintf(stderr, "%s: %d: warning: %s\n", 861057ca2d4SBaptiste Daroussin filename, lineno, msg); 862057ca2d4SBaptiste Daroussin free(msg); 863057ca2d4SBaptiste Daroussin warnings++; 864057ca2d4SBaptiste Daroussin if (!warnok) 865057ca2d4SBaptiste Daroussin exit(4); 866057ca2d4SBaptiste Daroussin } 867