1*e12a957fSPedro F. Giffuni /*-
2057ca2d4SBaptiste Daroussin * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
3057ca2d4SBaptiste Daroussin * Copyright 2015 John Marino <draco@marino.st>
4057ca2d4SBaptiste Daroussin *
5057ca2d4SBaptiste Daroussin * This source code is derived from the illumos localedef command, and
6057ca2d4SBaptiste Daroussin * provided under BSD-style license terms by Nexenta Systems, Inc.
7057ca2d4SBaptiste Daroussin *
8057ca2d4SBaptiste Daroussin * Redistribution and use in source and binary forms, with or without
9057ca2d4SBaptiste Daroussin * modification, are permitted provided that the following conditions
10057ca2d4SBaptiste Daroussin * are met:
11057ca2d4SBaptiste Daroussin *
12057ca2d4SBaptiste Daroussin * 1. Redistributions of source code must retain the above copyright
13057ca2d4SBaptiste Daroussin * notice, this list of conditions and the following disclaimer.
14057ca2d4SBaptiste Daroussin * 2. Redistributions in binary form must reproduce the above copyright
15057ca2d4SBaptiste Daroussin * notice, this list of conditions and the following disclaimer in the
16057ca2d4SBaptiste Daroussin * documentation and/or other materials provided with the distribution.
17057ca2d4SBaptiste Daroussin *
18057ca2d4SBaptiste Daroussin * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19057ca2d4SBaptiste Daroussin * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20057ca2d4SBaptiste Daroussin * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21057ca2d4SBaptiste Daroussin * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22057ca2d4SBaptiste Daroussin * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23057ca2d4SBaptiste Daroussin * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24057ca2d4SBaptiste Daroussin * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25057ca2d4SBaptiste Daroussin * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26057ca2d4SBaptiste Daroussin * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27057ca2d4SBaptiste Daroussin * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28057ca2d4SBaptiste Daroussin * POSSIBILITY OF SUCH DAMAGE.
29057ca2d4SBaptiste Daroussin */
30057ca2d4SBaptiste Daroussin
31057ca2d4SBaptiste Daroussin /*
32057ca2d4SBaptiste Daroussin * This file contains the "scanner", which tokenizes the input files
33057ca2d4SBaptiste Daroussin * for localedef for processing by the higher level grammar processor.
34057ca2d4SBaptiste Daroussin */
35057ca2d4SBaptiste Daroussin #include <sys/cdefs.h>
36057ca2d4SBaptiste Daroussin #include <stdio.h>
37057ca2d4SBaptiste Daroussin #include <stdlib.h>
38057ca2d4SBaptiste Daroussin #include <ctype.h>
39057ca2d4SBaptiste Daroussin #include <limits.h>
40057ca2d4SBaptiste Daroussin #include <string.h>
41057ca2d4SBaptiste Daroussin #include <wchar.h>
42057ca2d4SBaptiste Daroussin #include <sys/types.h>
43057ca2d4SBaptiste Daroussin #include <assert.h>
44057ca2d4SBaptiste Daroussin #include "localedef.h"
45057ca2d4SBaptiste Daroussin #include "parser.h"
46057ca2d4SBaptiste Daroussin
47057ca2d4SBaptiste Daroussin int com_char = '#';
48057ca2d4SBaptiste Daroussin int esc_char = '\\';
49057ca2d4SBaptiste Daroussin int mb_cur_min = 1;
50057ca2d4SBaptiste Daroussin int mb_cur_max = 1;
51057ca2d4SBaptiste Daroussin int lineno = 1;
52057ca2d4SBaptiste Daroussin int warnings = 0;
53057ca2d4SBaptiste Daroussin int is_stdin = 1;
54057ca2d4SBaptiste Daroussin FILE *input;
55057ca2d4SBaptiste Daroussin static int nextline;
56057ca2d4SBaptiste Daroussin //static FILE *input = stdin;
57057ca2d4SBaptiste Daroussin static const char *filename = "<stdin>";
58057ca2d4SBaptiste Daroussin static int instring = 0;
59057ca2d4SBaptiste Daroussin static int escaped = 0;
60057ca2d4SBaptiste Daroussin
61057ca2d4SBaptiste Daroussin /*
62057ca2d4SBaptiste Daroussin * Token space ... grows on demand.
63057ca2d4SBaptiste Daroussin */
64057ca2d4SBaptiste Daroussin static char *token = NULL;
65057ca2d4SBaptiste Daroussin static int tokidx;
66057ca2d4SBaptiste Daroussin static int toksz = 0;
67057ca2d4SBaptiste Daroussin static int hadtok = 0;
68057ca2d4SBaptiste Daroussin
69057ca2d4SBaptiste Daroussin /*
70057ca2d4SBaptiste Daroussin * Wide string space ... grows on demand.
71057ca2d4SBaptiste Daroussin */
72057ca2d4SBaptiste Daroussin static wchar_t *widestr = NULL;
73057ca2d4SBaptiste Daroussin static int wideidx = 0;
74057ca2d4SBaptiste Daroussin static int widesz = 0;
75057ca2d4SBaptiste Daroussin
76057ca2d4SBaptiste Daroussin /*
77057ca2d4SBaptiste Daroussin * The last keyword seen. This is useful to trigger the special lexer rules
78057ca2d4SBaptiste Daroussin * for "copy" and also collating symbols and elements.
79057ca2d4SBaptiste Daroussin */
80057ca2d4SBaptiste Daroussin int last_kw = 0;
81057ca2d4SBaptiste Daroussin static int category = T_END;
82057ca2d4SBaptiste Daroussin
83057ca2d4SBaptiste Daroussin static struct token {
84057ca2d4SBaptiste Daroussin int id;
85057ca2d4SBaptiste Daroussin const char *name;
86057ca2d4SBaptiste Daroussin } keywords[] = {
87057ca2d4SBaptiste Daroussin { T_COM_CHAR, "comment_char" },
88057ca2d4SBaptiste Daroussin { T_ESC_CHAR, "escape_char" },
89057ca2d4SBaptiste Daroussin { T_END, "END" },
90057ca2d4SBaptiste Daroussin { T_COPY, "copy" },
91057ca2d4SBaptiste Daroussin { T_MESSAGES, "LC_MESSAGES" },
92057ca2d4SBaptiste Daroussin { T_YESSTR, "yesstr" },
93057ca2d4SBaptiste Daroussin { T_YESEXPR, "yesexpr" },
94057ca2d4SBaptiste Daroussin { T_NOSTR, "nostr" },
95057ca2d4SBaptiste Daroussin { T_NOEXPR, "noexpr" },
96057ca2d4SBaptiste Daroussin { T_MONETARY, "LC_MONETARY" },
97057ca2d4SBaptiste Daroussin { T_INT_CURR_SYMBOL, "int_curr_symbol" },
98057ca2d4SBaptiste Daroussin { T_CURRENCY_SYMBOL, "currency_symbol" },
99057ca2d4SBaptiste Daroussin { T_MON_DECIMAL_POINT, "mon_decimal_point" },
100057ca2d4SBaptiste Daroussin { T_MON_THOUSANDS_SEP, "mon_thousands_sep" },
101057ca2d4SBaptiste Daroussin { T_POSITIVE_SIGN, "positive_sign" },
102057ca2d4SBaptiste Daroussin { T_NEGATIVE_SIGN, "negative_sign" },
103057ca2d4SBaptiste Daroussin { T_MON_GROUPING, "mon_grouping" },
104057ca2d4SBaptiste Daroussin { T_INT_FRAC_DIGITS, "int_frac_digits" },
105057ca2d4SBaptiste Daroussin { T_FRAC_DIGITS, "frac_digits" },
106057ca2d4SBaptiste Daroussin { T_P_CS_PRECEDES, "p_cs_precedes" },
107057ca2d4SBaptiste Daroussin { T_P_SEP_BY_SPACE, "p_sep_by_space" },
108057ca2d4SBaptiste Daroussin { T_N_CS_PRECEDES, "n_cs_precedes" },
109057ca2d4SBaptiste Daroussin { T_N_SEP_BY_SPACE, "n_sep_by_space" },
110057ca2d4SBaptiste Daroussin { T_P_SIGN_POSN, "p_sign_posn" },
111057ca2d4SBaptiste Daroussin { T_N_SIGN_POSN, "n_sign_posn" },
112057ca2d4SBaptiste Daroussin { T_INT_P_CS_PRECEDES, "int_p_cs_precedes" },
113057ca2d4SBaptiste Daroussin { T_INT_N_CS_PRECEDES, "int_n_cs_precedes" },
114057ca2d4SBaptiste Daroussin { T_INT_P_SEP_BY_SPACE, "int_p_sep_by_space" },
115057ca2d4SBaptiste Daroussin { T_INT_N_SEP_BY_SPACE, "int_n_sep_by_space" },
116057ca2d4SBaptiste Daroussin { T_INT_P_SIGN_POSN, "int_p_sign_posn" },
117057ca2d4SBaptiste Daroussin { T_INT_N_SIGN_POSN, "int_n_sign_posn" },
118057ca2d4SBaptiste Daroussin { T_COLLATE, "LC_COLLATE" },
119057ca2d4SBaptiste Daroussin { T_COLLATING_SYMBOL, "collating-symbol" },
120057ca2d4SBaptiste Daroussin { T_COLLATING_ELEMENT, "collating-element" },
121057ca2d4SBaptiste Daroussin { T_FROM, "from" },
122057ca2d4SBaptiste Daroussin { T_ORDER_START, "order_start" },
123057ca2d4SBaptiste Daroussin { T_ORDER_END, "order_end" },
124057ca2d4SBaptiste Daroussin { T_FORWARD, "forward" },
125057ca2d4SBaptiste Daroussin { T_BACKWARD, "backward" },
126057ca2d4SBaptiste Daroussin { T_POSITION, "position" },
127057ca2d4SBaptiste Daroussin { T_IGNORE, "IGNORE" },
128057ca2d4SBaptiste Daroussin { T_UNDEFINED, "UNDEFINED" },
129057ca2d4SBaptiste Daroussin { T_NUMERIC, "LC_NUMERIC" },
130057ca2d4SBaptiste Daroussin { T_DECIMAL_POINT, "decimal_point" },
131057ca2d4SBaptiste Daroussin { T_THOUSANDS_SEP, "thousands_sep" },
132057ca2d4SBaptiste Daroussin { T_GROUPING, "grouping" },
133057ca2d4SBaptiste Daroussin { T_TIME, "LC_TIME" },
134057ca2d4SBaptiste Daroussin { T_ABDAY, "abday" },
135057ca2d4SBaptiste Daroussin { T_DAY, "day" },
136057ca2d4SBaptiste Daroussin { T_ABMON, "abmon" },
137057ca2d4SBaptiste Daroussin { T_MON, "mon" },
138057ca2d4SBaptiste Daroussin { T_D_T_FMT, "d_t_fmt" },
139057ca2d4SBaptiste Daroussin { T_D_FMT, "d_fmt" },
140057ca2d4SBaptiste Daroussin { T_T_FMT, "t_fmt" },
141057ca2d4SBaptiste Daroussin { T_AM_PM, "am_pm" },
142057ca2d4SBaptiste Daroussin { T_T_FMT_AMPM, "t_fmt_ampm" },
143057ca2d4SBaptiste Daroussin { T_ERA, "era" },
144057ca2d4SBaptiste Daroussin { T_ERA_D_FMT, "era_d_fmt" },
145057ca2d4SBaptiste Daroussin { T_ERA_T_FMT, "era_t_fmt" },
146057ca2d4SBaptiste Daroussin { T_ERA_D_T_FMT, "era_d_t_fmt" },
147057ca2d4SBaptiste Daroussin { T_ALT_DIGITS, "alt_digits" },
148057ca2d4SBaptiste Daroussin { T_CTYPE, "LC_CTYPE" },
149057ca2d4SBaptiste Daroussin { T_ISUPPER, "upper" },
150057ca2d4SBaptiste Daroussin { T_ISLOWER, "lower" },
151057ca2d4SBaptiste Daroussin { T_ISALPHA, "alpha" },
152057ca2d4SBaptiste Daroussin { T_ISDIGIT, "digit" },
153057ca2d4SBaptiste Daroussin { T_ISPUNCT, "punct" },
154057ca2d4SBaptiste Daroussin { T_ISXDIGIT, "xdigit" },
155057ca2d4SBaptiste Daroussin { T_ISSPACE, "space" },
156057ca2d4SBaptiste Daroussin { T_ISPRINT, "print" },
157057ca2d4SBaptiste Daroussin { T_ISGRAPH, "graph" },
158057ca2d4SBaptiste Daroussin { T_ISBLANK, "blank" },
159057ca2d4SBaptiste Daroussin { T_ISCNTRL, "cntrl" },
160057ca2d4SBaptiste Daroussin /*
161057ca2d4SBaptiste Daroussin * These entries are local additions, and not specified by
162057ca2d4SBaptiste Daroussin * TOG. Note that they are not guaranteed to be accurate for
163057ca2d4SBaptiste Daroussin * all locales, and so applications should not depend on them.
164057ca2d4SBaptiste Daroussin */
165057ca2d4SBaptiste Daroussin { T_ISSPECIAL, "special" },
166057ca2d4SBaptiste Daroussin { T_ISENGLISH, "english" },
167057ca2d4SBaptiste Daroussin { T_ISPHONOGRAM, "phonogram" },
168057ca2d4SBaptiste Daroussin { T_ISIDEOGRAM, "ideogram" },
169057ca2d4SBaptiste Daroussin { T_ISNUMBER, "number" },
170057ca2d4SBaptiste Daroussin /*
171057ca2d4SBaptiste Daroussin * We have to support this in the grammar, but it would be a
172057ca2d4SBaptiste Daroussin * syntax error to define a character as one of these without
173057ca2d4SBaptiste Daroussin * also defining it as an alpha or digit. We ignore it in our
174057ca2d4SBaptiste Daroussin * parsing.
175057ca2d4SBaptiste Daroussin */
176057ca2d4SBaptiste Daroussin { T_ISALNUM, "alnum" },
177057ca2d4SBaptiste Daroussin { T_TOUPPER, "toupper" },
178057ca2d4SBaptiste Daroussin { T_TOLOWER, "tolower" },
179057ca2d4SBaptiste Daroussin
180057ca2d4SBaptiste Daroussin /*
181057ca2d4SBaptiste Daroussin * These are keywords used in the charmap file. Note that
182fcc7baa1SPedro F. Giffuni * Solaris originally used angle brackets to wrap some of them,
183057ca2d4SBaptiste Daroussin * but we removed that to simplify our parser. The first of these
184057ca2d4SBaptiste Daroussin * items are "global items."
185057ca2d4SBaptiste Daroussin */
186057ca2d4SBaptiste Daroussin { T_CHARMAP, "CHARMAP" },
187057ca2d4SBaptiste Daroussin { T_WIDTH, "WIDTH" },
188057ca2d4SBaptiste Daroussin
189057ca2d4SBaptiste Daroussin { -1, NULL },
190057ca2d4SBaptiste Daroussin };
191057ca2d4SBaptiste Daroussin
192057ca2d4SBaptiste Daroussin /*
193057ca2d4SBaptiste Daroussin * These special words are only used in a charmap file, enclosed in <>.
194057ca2d4SBaptiste Daroussin */
195057ca2d4SBaptiste Daroussin static struct token symwords[] = {
196057ca2d4SBaptiste Daroussin { T_COM_CHAR, "comment_char" },
197057ca2d4SBaptiste Daroussin { T_ESC_CHAR, "escape_char" },
198057ca2d4SBaptiste Daroussin { T_CODE_SET, "code_set_name" },
199057ca2d4SBaptiste Daroussin { T_MB_CUR_MAX, "mb_cur_max" },
200057ca2d4SBaptiste Daroussin { T_MB_CUR_MIN, "mb_cur_min" },
201057ca2d4SBaptiste Daroussin { -1, NULL },
202057ca2d4SBaptiste Daroussin };
203057ca2d4SBaptiste Daroussin
204057ca2d4SBaptiste Daroussin static int categories[] = {
205057ca2d4SBaptiste Daroussin T_CHARMAP,
206057ca2d4SBaptiste Daroussin T_CTYPE,
207057ca2d4SBaptiste Daroussin T_COLLATE,
208057ca2d4SBaptiste Daroussin T_MESSAGES,
209057ca2d4SBaptiste Daroussin T_MONETARY,
210057ca2d4SBaptiste Daroussin T_NUMERIC,
211057ca2d4SBaptiste Daroussin T_TIME,
212057ca2d4SBaptiste Daroussin T_WIDTH,
213057ca2d4SBaptiste Daroussin 0
214057ca2d4SBaptiste Daroussin };
215057ca2d4SBaptiste Daroussin
216057ca2d4SBaptiste Daroussin void
reset_scanner(const char * fname)217057ca2d4SBaptiste Daroussin reset_scanner(const char *fname)
218057ca2d4SBaptiste Daroussin {
219057ca2d4SBaptiste Daroussin if (fname == NULL) {
220057ca2d4SBaptiste Daroussin filename = "<stdin>";
221057ca2d4SBaptiste Daroussin is_stdin = 1;
222057ca2d4SBaptiste Daroussin } else {
223057ca2d4SBaptiste Daroussin if (!is_stdin)
224057ca2d4SBaptiste Daroussin (void) fclose(input);
225057ca2d4SBaptiste Daroussin if ((input = fopen(fname, "r")) == NULL) {
226057ca2d4SBaptiste Daroussin perror("fopen");
227057ca2d4SBaptiste Daroussin exit(4);
228057ca2d4SBaptiste Daroussin } else {
229057ca2d4SBaptiste Daroussin is_stdin = 0;
230057ca2d4SBaptiste Daroussin }
231057ca2d4SBaptiste Daroussin filename = fname;
232057ca2d4SBaptiste Daroussin }
233057ca2d4SBaptiste Daroussin com_char = '#';
234057ca2d4SBaptiste Daroussin esc_char = '\\';
235057ca2d4SBaptiste Daroussin instring = 0;
236057ca2d4SBaptiste Daroussin escaped = 0;
237057ca2d4SBaptiste Daroussin lineno = 1;
238057ca2d4SBaptiste Daroussin nextline = 1;
239057ca2d4SBaptiste Daroussin tokidx = 0;
240057ca2d4SBaptiste Daroussin wideidx = 0;
241057ca2d4SBaptiste Daroussin }
242057ca2d4SBaptiste Daroussin
243057ca2d4SBaptiste Daroussin #define hex(x) \
244057ca2d4SBaptiste Daroussin (isdigit(x) ? (x - '0') : ((islower(x) ? (x - 'a') : (x - 'A')) + 10))
245057ca2d4SBaptiste Daroussin #define isodigit(x) ((x >= '0') && (x <= '7'))
246057ca2d4SBaptiste Daroussin
247057ca2d4SBaptiste Daroussin static int
scanc(void)248057ca2d4SBaptiste Daroussin scanc(void)
249057ca2d4SBaptiste Daroussin {
250057ca2d4SBaptiste Daroussin int c;
251057ca2d4SBaptiste Daroussin
252057ca2d4SBaptiste Daroussin if (is_stdin)
253057ca2d4SBaptiste Daroussin c = getc(stdin);
254057ca2d4SBaptiste Daroussin else
255057ca2d4SBaptiste Daroussin c = getc(input);
256057ca2d4SBaptiste Daroussin lineno = nextline;
257057ca2d4SBaptiste Daroussin if (c == '\n') {
258057ca2d4SBaptiste Daroussin nextline++;
259057ca2d4SBaptiste Daroussin }
260057ca2d4SBaptiste Daroussin return (c);
261057ca2d4SBaptiste Daroussin }
262057ca2d4SBaptiste Daroussin
263057ca2d4SBaptiste Daroussin static void
unscanc(int c)264057ca2d4SBaptiste Daroussin unscanc(int c)
265057ca2d4SBaptiste Daroussin {
266057ca2d4SBaptiste Daroussin if (c == '\n') {
267057ca2d4SBaptiste Daroussin nextline--;
268057ca2d4SBaptiste Daroussin }
269057ca2d4SBaptiste Daroussin if (ungetc(c, is_stdin ? stdin : input) < 0) {
270057ca2d4SBaptiste Daroussin yyerror("ungetc failed");
271057ca2d4SBaptiste Daroussin }
272057ca2d4SBaptiste Daroussin }
273057ca2d4SBaptiste Daroussin
274057ca2d4SBaptiste Daroussin static int
scan_hex_byte(void)275057ca2d4SBaptiste Daroussin scan_hex_byte(void)
276057ca2d4SBaptiste Daroussin {
277057ca2d4SBaptiste Daroussin int c1, c2;
278057ca2d4SBaptiste Daroussin int v;
279057ca2d4SBaptiste Daroussin
280057ca2d4SBaptiste Daroussin c1 = scanc();
281057ca2d4SBaptiste Daroussin if (!isxdigit(c1)) {
282057ca2d4SBaptiste Daroussin yyerror("malformed hex digit");
283057ca2d4SBaptiste Daroussin return (0);
284057ca2d4SBaptiste Daroussin }
285057ca2d4SBaptiste Daroussin c2 = scanc();
286057ca2d4SBaptiste Daroussin if (!isxdigit(c2)) {
287057ca2d4SBaptiste Daroussin yyerror("malformed hex digit");
288057ca2d4SBaptiste Daroussin return (0);
289057ca2d4SBaptiste Daroussin }
290057ca2d4SBaptiste Daroussin v = ((hex(c1) << 4) | hex(c2));
291057ca2d4SBaptiste Daroussin return (v);
292057ca2d4SBaptiste Daroussin }
293057ca2d4SBaptiste Daroussin
294057ca2d4SBaptiste Daroussin static int
scan_dec_byte(void)295057ca2d4SBaptiste Daroussin scan_dec_byte(void)
296057ca2d4SBaptiste Daroussin {
297057ca2d4SBaptiste Daroussin int c1, c2, c3;
298057ca2d4SBaptiste Daroussin int b;
299057ca2d4SBaptiste Daroussin
300057ca2d4SBaptiste Daroussin c1 = scanc();
301057ca2d4SBaptiste Daroussin if (!isdigit(c1)) {
302057ca2d4SBaptiste Daroussin yyerror("malformed decimal digit");
303057ca2d4SBaptiste Daroussin return (0);
304057ca2d4SBaptiste Daroussin }
305057ca2d4SBaptiste Daroussin b = c1 - '0';
306057ca2d4SBaptiste Daroussin c2 = scanc();
307057ca2d4SBaptiste Daroussin if (!isdigit(c2)) {
308057ca2d4SBaptiste Daroussin yyerror("malformed decimal digit");
309057ca2d4SBaptiste Daroussin return (0);
310057ca2d4SBaptiste Daroussin }
311057ca2d4SBaptiste Daroussin b *= 10;
312057ca2d4SBaptiste Daroussin b += (c2 - '0');
313057ca2d4SBaptiste Daroussin c3 = scanc();
314057ca2d4SBaptiste Daroussin if (!isdigit(c3)) {
315057ca2d4SBaptiste Daroussin unscanc(c3);
316057ca2d4SBaptiste Daroussin } else {
317057ca2d4SBaptiste Daroussin b *= 10;
318057ca2d4SBaptiste Daroussin b += (c3 - '0');
319057ca2d4SBaptiste Daroussin }
320057ca2d4SBaptiste Daroussin return (b);
321057ca2d4SBaptiste Daroussin }
322057ca2d4SBaptiste Daroussin
323057ca2d4SBaptiste Daroussin static int
scan_oct_byte(void)324057ca2d4SBaptiste Daroussin scan_oct_byte(void)
325057ca2d4SBaptiste Daroussin {
326057ca2d4SBaptiste Daroussin int c1, c2, c3;
327057ca2d4SBaptiste Daroussin int b;
328057ca2d4SBaptiste Daroussin
329057ca2d4SBaptiste Daroussin b = 0;
330057ca2d4SBaptiste Daroussin
331057ca2d4SBaptiste Daroussin c1 = scanc();
332057ca2d4SBaptiste Daroussin if (!isodigit(c1)) {
333057ca2d4SBaptiste Daroussin yyerror("malformed octal digit");
334057ca2d4SBaptiste Daroussin return (0);
335057ca2d4SBaptiste Daroussin }
336057ca2d4SBaptiste Daroussin b = c1 - '0';
337057ca2d4SBaptiste Daroussin c2 = scanc();
338057ca2d4SBaptiste Daroussin if (!isodigit(c2)) {
339057ca2d4SBaptiste Daroussin yyerror("malformed octal digit");
340057ca2d4SBaptiste Daroussin return (0);
341057ca2d4SBaptiste Daroussin }
342057ca2d4SBaptiste Daroussin b *= 8;
343057ca2d4SBaptiste Daroussin b += (c2 - '0');
344057ca2d4SBaptiste Daroussin c3 = scanc();
345057ca2d4SBaptiste Daroussin if (!isodigit(c3)) {
346057ca2d4SBaptiste Daroussin unscanc(c3);
347057ca2d4SBaptiste Daroussin } else {
348057ca2d4SBaptiste Daroussin b *= 8;
349057ca2d4SBaptiste Daroussin b += (c3 - '0');
350057ca2d4SBaptiste Daroussin }
351057ca2d4SBaptiste Daroussin return (b);
352057ca2d4SBaptiste Daroussin }
353057ca2d4SBaptiste Daroussin
354057ca2d4SBaptiste Daroussin void
add_tok(int c)355057ca2d4SBaptiste Daroussin add_tok(int c)
356057ca2d4SBaptiste Daroussin {
357057ca2d4SBaptiste Daroussin if ((tokidx + 1) >= toksz) {
358057ca2d4SBaptiste Daroussin toksz += 64;
359057ca2d4SBaptiste Daroussin if ((token = realloc(token, toksz)) == NULL) {
360057ca2d4SBaptiste Daroussin yyerror("out of memory");
361057ca2d4SBaptiste Daroussin tokidx = 0;
362057ca2d4SBaptiste Daroussin toksz = 0;
363057ca2d4SBaptiste Daroussin return;
364057ca2d4SBaptiste Daroussin }
365057ca2d4SBaptiste Daroussin }
366057ca2d4SBaptiste Daroussin
367057ca2d4SBaptiste Daroussin token[tokidx++] = (char)c;
368057ca2d4SBaptiste Daroussin token[tokidx] = 0;
369057ca2d4SBaptiste Daroussin }
370057ca2d4SBaptiste Daroussin void
add_wcs(wchar_t c)371057ca2d4SBaptiste Daroussin add_wcs(wchar_t c)
372057ca2d4SBaptiste Daroussin {
373057ca2d4SBaptiste Daroussin if ((wideidx + 1) >= widesz) {
374057ca2d4SBaptiste Daroussin widesz += 64;
375057ca2d4SBaptiste Daroussin widestr = realloc(widestr, (widesz * sizeof (wchar_t)));
376057ca2d4SBaptiste Daroussin if (widestr == NULL) {
377057ca2d4SBaptiste Daroussin yyerror("out of memory");
378057ca2d4SBaptiste Daroussin wideidx = 0;
379057ca2d4SBaptiste Daroussin widesz = 0;
380057ca2d4SBaptiste Daroussin return;
381057ca2d4SBaptiste Daroussin }
382057ca2d4SBaptiste Daroussin }
383057ca2d4SBaptiste Daroussin
384057ca2d4SBaptiste Daroussin widestr[wideidx++] = c;
385057ca2d4SBaptiste Daroussin widestr[wideidx] = 0;
386057ca2d4SBaptiste Daroussin }
387057ca2d4SBaptiste Daroussin
388057ca2d4SBaptiste Daroussin wchar_t *
get_wcs(void)389057ca2d4SBaptiste Daroussin get_wcs(void)
390057ca2d4SBaptiste Daroussin {
391057ca2d4SBaptiste Daroussin wchar_t *ws = widestr;
392057ca2d4SBaptiste Daroussin wideidx = 0;
393057ca2d4SBaptiste Daroussin widestr = NULL;
394057ca2d4SBaptiste Daroussin widesz = 0;
395057ca2d4SBaptiste Daroussin if (ws == NULL) {
396057ca2d4SBaptiste Daroussin if ((ws = wcsdup(L"")) == NULL) {
397057ca2d4SBaptiste Daroussin yyerror("out of memory");
398057ca2d4SBaptiste Daroussin }
399057ca2d4SBaptiste Daroussin }
400057ca2d4SBaptiste Daroussin return (ws);
401057ca2d4SBaptiste Daroussin }
402057ca2d4SBaptiste Daroussin
403057ca2d4SBaptiste Daroussin static int
get_byte(void)404057ca2d4SBaptiste Daroussin get_byte(void)
405057ca2d4SBaptiste Daroussin {
406057ca2d4SBaptiste Daroussin int c;
407057ca2d4SBaptiste Daroussin
408057ca2d4SBaptiste Daroussin if ((c = scanc()) != esc_char) {
409057ca2d4SBaptiste Daroussin unscanc(c);
410057ca2d4SBaptiste Daroussin return (EOF);
411057ca2d4SBaptiste Daroussin }
412057ca2d4SBaptiste Daroussin c = scanc();
413057ca2d4SBaptiste Daroussin
414057ca2d4SBaptiste Daroussin switch (c) {
415057ca2d4SBaptiste Daroussin case 'd':
416057ca2d4SBaptiste Daroussin case 'D':
417057ca2d4SBaptiste Daroussin return (scan_dec_byte());
418057ca2d4SBaptiste Daroussin case 'x':
419057ca2d4SBaptiste Daroussin case 'X':
420057ca2d4SBaptiste Daroussin return (scan_hex_byte());
421057ca2d4SBaptiste Daroussin case '0':
422057ca2d4SBaptiste Daroussin case '1':
423057ca2d4SBaptiste Daroussin case '2':
424057ca2d4SBaptiste Daroussin case '3':
425057ca2d4SBaptiste Daroussin case '4':
426057ca2d4SBaptiste Daroussin case '5':
427057ca2d4SBaptiste Daroussin case '6':
428057ca2d4SBaptiste Daroussin case '7':
429057ca2d4SBaptiste Daroussin /* put the character back so we can get it */
430057ca2d4SBaptiste Daroussin unscanc(c);
431057ca2d4SBaptiste Daroussin return (scan_oct_byte());
432057ca2d4SBaptiste Daroussin default:
433057ca2d4SBaptiste Daroussin unscanc(c);
434057ca2d4SBaptiste Daroussin unscanc(esc_char);
435057ca2d4SBaptiste Daroussin return (EOF);
436057ca2d4SBaptiste Daroussin }
437057ca2d4SBaptiste Daroussin }
438057ca2d4SBaptiste Daroussin
439057ca2d4SBaptiste Daroussin int
get_escaped(int c)440057ca2d4SBaptiste Daroussin get_escaped(int c)
441057ca2d4SBaptiste Daroussin {
442057ca2d4SBaptiste Daroussin switch (c) {
443057ca2d4SBaptiste Daroussin case 'n':
444057ca2d4SBaptiste Daroussin return ('\n');
445057ca2d4SBaptiste Daroussin case 'r':
446057ca2d4SBaptiste Daroussin return ('\r');
447057ca2d4SBaptiste Daroussin case 't':
448057ca2d4SBaptiste Daroussin return ('\t');
449057ca2d4SBaptiste Daroussin case 'f':
450057ca2d4SBaptiste Daroussin return ('\f');
451057ca2d4SBaptiste Daroussin case 'v':
452057ca2d4SBaptiste Daroussin return ('\v');
453057ca2d4SBaptiste Daroussin case 'b':
454057ca2d4SBaptiste Daroussin return ('\b');
455057ca2d4SBaptiste Daroussin case 'a':
456057ca2d4SBaptiste Daroussin return ('\a');
457057ca2d4SBaptiste Daroussin default:
458057ca2d4SBaptiste Daroussin return (c);
459057ca2d4SBaptiste Daroussin }
460057ca2d4SBaptiste Daroussin }
461057ca2d4SBaptiste Daroussin
462057ca2d4SBaptiste Daroussin int
get_wide(void)463057ca2d4SBaptiste Daroussin get_wide(void)
464057ca2d4SBaptiste Daroussin {
465057ca2d4SBaptiste Daroussin static char mbs[MB_LEN_MAX + 1] = "";
466057ca2d4SBaptiste Daroussin static int mbi = 0;
467057ca2d4SBaptiste Daroussin int c;
468057ca2d4SBaptiste Daroussin wchar_t wc;
469057ca2d4SBaptiste Daroussin
470057ca2d4SBaptiste Daroussin if (mb_cur_max >= (int)sizeof (mbs)) {
471057ca2d4SBaptiste Daroussin yyerror("max multibyte character size too big");
472057ca2d4SBaptiste Daroussin mbi = 0;
473057ca2d4SBaptiste Daroussin return (T_NULL);
474057ca2d4SBaptiste Daroussin }
475057ca2d4SBaptiste Daroussin for (;;) {
476057ca2d4SBaptiste Daroussin if ((mbi == mb_cur_max) || ((c = get_byte()) == EOF)) {
477057ca2d4SBaptiste Daroussin /*
478057ca2d4SBaptiste Daroussin * end of the byte sequence reached, but no
479057ca2d4SBaptiste Daroussin * valid wide decoding. fatal error.
480057ca2d4SBaptiste Daroussin */
481057ca2d4SBaptiste Daroussin mbi = 0;
482057ca2d4SBaptiste Daroussin yyerror("not a valid character encoding");
483057ca2d4SBaptiste Daroussin return (T_NULL);
484057ca2d4SBaptiste Daroussin }
485057ca2d4SBaptiste Daroussin mbs[mbi++] = c;
486057ca2d4SBaptiste Daroussin mbs[mbi] = 0;
487057ca2d4SBaptiste Daroussin
488057ca2d4SBaptiste Daroussin /* does it decode? */
489057ca2d4SBaptiste Daroussin if (to_wide(&wc, mbs) >= 0) {
490057ca2d4SBaptiste Daroussin break;
491057ca2d4SBaptiste Daroussin }
492057ca2d4SBaptiste Daroussin }
493057ca2d4SBaptiste Daroussin
494057ca2d4SBaptiste Daroussin mbi = 0;
495057ca2d4SBaptiste Daroussin if ((category != T_CHARMAP) && (category != T_WIDTH)) {
496057ca2d4SBaptiste Daroussin if (check_charmap(wc) < 0) {
497057ca2d4SBaptiste Daroussin yyerror("no symbolic name for character");
498057ca2d4SBaptiste Daroussin return (T_NULL);
499057ca2d4SBaptiste Daroussin }
500057ca2d4SBaptiste Daroussin }
501057ca2d4SBaptiste Daroussin
502057ca2d4SBaptiste Daroussin yylval.wc = wc;
503057ca2d4SBaptiste Daroussin return (T_CHAR);
504057ca2d4SBaptiste Daroussin }
505057ca2d4SBaptiste Daroussin
506057ca2d4SBaptiste Daroussin int
get_symbol(void)507057ca2d4SBaptiste Daroussin get_symbol(void)
508057ca2d4SBaptiste Daroussin {
509057ca2d4SBaptiste Daroussin int c;
510057ca2d4SBaptiste Daroussin
511057ca2d4SBaptiste Daroussin while ((c = scanc()) != EOF) {
512057ca2d4SBaptiste Daroussin if (escaped) {
513057ca2d4SBaptiste Daroussin escaped = 0;
514057ca2d4SBaptiste Daroussin if (c == '\n')
515057ca2d4SBaptiste Daroussin continue;
516057ca2d4SBaptiste Daroussin add_tok(get_escaped(c));
517057ca2d4SBaptiste Daroussin continue;
518057ca2d4SBaptiste Daroussin }
519057ca2d4SBaptiste Daroussin if (c == esc_char) {
520057ca2d4SBaptiste Daroussin escaped = 1;
521057ca2d4SBaptiste Daroussin continue;
522057ca2d4SBaptiste Daroussin }
523057ca2d4SBaptiste Daroussin if (c == '\n') { /* well that's strange! */
524057ca2d4SBaptiste Daroussin yyerror("unterminated symbolic name");
525057ca2d4SBaptiste Daroussin continue;
526057ca2d4SBaptiste Daroussin }
527057ca2d4SBaptiste Daroussin if (c == '>') { /* end of symbol */
528057ca2d4SBaptiste Daroussin
529057ca2d4SBaptiste Daroussin /*
530057ca2d4SBaptiste Daroussin * This restarts the token from the beginning
531057ca2d4SBaptiste Daroussin * the next time we scan a character. (This
532057ca2d4SBaptiste Daroussin * token is complete.)
533057ca2d4SBaptiste Daroussin */
534057ca2d4SBaptiste Daroussin
535057ca2d4SBaptiste Daroussin if (token == NULL) {
536057ca2d4SBaptiste Daroussin yyerror("missing symbolic name");
537057ca2d4SBaptiste Daroussin return (T_NULL);
538057ca2d4SBaptiste Daroussin }
539057ca2d4SBaptiste Daroussin tokidx = 0;
540057ca2d4SBaptiste Daroussin
541057ca2d4SBaptiste Daroussin /*
542057ca2d4SBaptiste Daroussin * A few symbols are handled as keywords outside
543057ca2d4SBaptiste Daroussin * of the normal categories.
544057ca2d4SBaptiste Daroussin */
545057ca2d4SBaptiste Daroussin if (category == T_END) {
546057ca2d4SBaptiste Daroussin int i;
547057ca2d4SBaptiste Daroussin for (i = 0; symwords[i].name != 0; i++) {
548057ca2d4SBaptiste Daroussin if (strcmp(token, symwords[i].name) ==
549057ca2d4SBaptiste Daroussin 0) {
550057ca2d4SBaptiste Daroussin last_kw = symwords[i].id;
551057ca2d4SBaptiste Daroussin return (last_kw);
552057ca2d4SBaptiste Daroussin }
553057ca2d4SBaptiste Daroussin }
554057ca2d4SBaptiste Daroussin }
555057ca2d4SBaptiste Daroussin /*
556057ca2d4SBaptiste Daroussin * Contextual rule: Only literal characters are
557057ca2d4SBaptiste Daroussin * permitted in CHARMAP. Anywhere else the symbolic
558057ca2d4SBaptiste Daroussin * forms are fine.
559057ca2d4SBaptiste Daroussin */
560057ca2d4SBaptiste Daroussin if ((category != T_CHARMAP) &&
561057ca2d4SBaptiste Daroussin (lookup_charmap(token, &yylval.wc)) != -1) {
562057ca2d4SBaptiste Daroussin return (T_CHAR);
563057ca2d4SBaptiste Daroussin }
564057ca2d4SBaptiste Daroussin if ((yylval.collsym = lookup_collsym(token)) != NULL) {
565057ca2d4SBaptiste Daroussin return (T_COLLSYM);
566057ca2d4SBaptiste Daroussin }
567057ca2d4SBaptiste Daroussin if ((yylval.collelem = lookup_collelem(token)) !=
568057ca2d4SBaptiste Daroussin NULL) {
569057ca2d4SBaptiste Daroussin return (T_COLLELEM);
570057ca2d4SBaptiste Daroussin }
571057ca2d4SBaptiste Daroussin /* its an undefined symbol */
572057ca2d4SBaptiste Daroussin yylval.token = strdup(token);
573057ca2d4SBaptiste Daroussin token = NULL;
574057ca2d4SBaptiste Daroussin toksz = 0;
575057ca2d4SBaptiste Daroussin tokidx = 0;
576057ca2d4SBaptiste Daroussin return (T_SYMBOL);
577057ca2d4SBaptiste Daroussin }
578057ca2d4SBaptiste Daroussin add_tok(c);
579057ca2d4SBaptiste Daroussin }
580057ca2d4SBaptiste Daroussin
581057ca2d4SBaptiste Daroussin yyerror("unterminated symbolic name");
582057ca2d4SBaptiste Daroussin return (EOF);
583057ca2d4SBaptiste Daroussin }
584057ca2d4SBaptiste Daroussin
585057ca2d4SBaptiste Daroussin int
get_category(void)586057ca2d4SBaptiste Daroussin get_category(void)
587057ca2d4SBaptiste Daroussin {
588057ca2d4SBaptiste Daroussin return (category);
589057ca2d4SBaptiste Daroussin }
590057ca2d4SBaptiste Daroussin
591057ca2d4SBaptiste Daroussin static int
consume_token(void)592057ca2d4SBaptiste Daroussin consume_token(void)
593057ca2d4SBaptiste Daroussin {
594057ca2d4SBaptiste Daroussin int len = tokidx;
595057ca2d4SBaptiste Daroussin int i;
596057ca2d4SBaptiste Daroussin
597057ca2d4SBaptiste Daroussin tokidx = 0;
598057ca2d4SBaptiste Daroussin if (token == NULL)
599057ca2d4SBaptiste Daroussin return (T_NULL);
600057ca2d4SBaptiste Daroussin
601057ca2d4SBaptiste Daroussin /*
602057ca2d4SBaptiste Daroussin * this one is special, because we don't want it to alter the
603057ca2d4SBaptiste Daroussin * last_kw field.
604057ca2d4SBaptiste Daroussin */
605057ca2d4SBaptiste Daroussin if (strcmp(token, "...") == 0) {
606057ca2d4SBaptiste Daroussin return (T_ELLIPSIS);
607057ca2d4SBaptiste Daroussin }
608057ca2d4SBaptiste Daroussin
609057ca2d4SBaptiste Daroussin /* search for reserved words first */
610057ca2d4SBaptiste Daroussin for (i = 0; keywords[i].name; i++) {
611057ca2d4SBaptiste Daroussin int j;
612057ca2d4SBaptiste Daroussin if (strcmp(keywords[i].name, token) != 0) {
613057ca2d4SBaptiste Daroussin continue;
614057ca2d4SBaptiste Daroussin }
615057ca2d4SBaptiste Daroussin
616057ca2d4SBaptiste Daroussin last_kw = keywords[i].id;
617057ca2d4SBaptiste Daroussin
618057ca2d4SBaptiste Daroussin /* clear the top level category if we're done with it */
619057ca2d4SBaptiste Daroussin if (last_kw == T_END) {
620057ca2d4SBaptiste Daroussin category = T_END;
621057ca2d4SBaptiste Daroussin }
622057ca2d4SBaptiste Daroussin
623057ca2d4SBaptiste Daroussin /* set the top level category if we're changing */
624057ca2d4SBaptiste Daroussin for (j = 0; categories[j]; j++) {
625057ca2d4SBaptiste Daroussin if (categories[j] != last_kw)
626057ca2d4SBaptiste Daroussin continue;
627057ca2d4SBaptiste Daroussin category = last_kw;
628057ca2d4SBaptiste Daroussin }
629057ca2d4SBaptiste Daroussin
630057ca2d4SBaptiste Daroussin return (keywords[i].id);
631057ca2d4SBaptiste Daroussin }
632057ca2d4SBaptiste Daroussin
633057ca2d4SBaptiste Daroussin /* maybe its a numeric constant? */
634057ca2d4SBaptiste Daroussin if (isdigit(*token) || (*token == '-' && isdigit(token[1]))) {
635057ca2d4SBaptiste Daroussin char *eptr;
636057ca2d4SBaptiste Daroussin yylval.num = strtol(token, &eptr, 10);
637057ca2d4SBaptiste Daroussin if (*eptr != 0)
638057ca2d4SBaptiste Daroussin yyerror("malformed number");
639057ca2d4SBaptiste Daroussin return (T_NUMBER);
640057ca2d4SBaptiste Daroussin }
641057ca2d4SBaptiste Daroussin
642057ca2d4SBaptiste Daroussin /*
643057ca2d4SBaptiste Daroussin * A single lone character is treated as a character literal.
644057ca2d4SBaptiste Daroussin * To avoid duplication of effort, we stick in the charmap.
645057ca2d4SBaptiste Daroussin */
646057ca2d4SBaptiste Daroussin if (len == 1) {
647057ca2d4SBaptiste Daroussin yylval.wc = token[0];
648057ca2d4SBaptiste Daroussin return (T_CHAR);
649057ca2d4SBaptiste Daroussin }
650057ca2d4SBaptiste Daroussin
651057ca2d4SBaptiste Daroussin /* anything else is treated as a symbolic name */
652057ca2d4SBaptiste Daroussin yylval.token = strdup(token);
653057ca2d4SBaptiste Daroussin token = NULL;
654057ca2d4SBaptiste Daroussin toksz = 0;
655057ca2d4SBaptiste Daroussin tokidx = 0;
656057ca2d4SBaptiste Daroussin return (T_NAME);
657057ca2d4SBaptiste Daroussin }
658057ca2d4SBaptiste Daroussin
659057ca2d4SBaptiste Daroussin void
scan_to_eol(void)660057ca2d4SBaptiste Daroussin scan_to_eol(void)
661057ca2d4SBaptiste Daroussin {
662057ca2d4SBaptiste Daroussin int c;
663057ca2d4SBaptiste Daroussin while ((c = scanc()) != '\n') {
664057ca2d4SBaptiste Daroussin if (c == EOF) {
665057ca2d4SBaptiste Daroussin /* end of file without newline! */
666057ca2d4SBaptiste Daroussin errf("missing newline");
667057ca2d4SBaptiste Daroussin return;
668057ca2d4SBaptiste Daroussin }
669057ca2d4SBaptiste Daroussin }
670057ca2d4SBaptiste Daroussin assert(c == '\n');
671057ca2d4SBaptiste Daroussin }
672057ca2d4SBaptiste Daroussin
673057ca2d4SBaptiste Daroussin int
yylex(void)674057ca2d4SBaptiste Daroussin yylex(void)
675057ca2d4SBaptiste Daroussin {
676057ca2d4SBaptiste Daroussin int c;
677057ca2d4SBaptiste Daroussin
678057ca2d4SBaptiste Daroussin while ((c = scanc()) != EOF) {
679057ca2d4SBaptiste Daroussin
680057ca2d4SBaptiste Daroussin /* special handling for quoted string */
681057ca2d4SBaptiste Daroussin if (instring) {
682057ca2d4SBaptiste Daroussin if (escaped) {
683057ca2d4SBaptiste Daroussin escaped = 0;
684057ca2d4SBaptiste Daroussin
685057ca2d4SBaptiste Daroussin /* if newline, just eat and forget it */
686057ca2d4SBaptiste Daroussin if (c == '\n')
687057ca2d4SBaptiste Daroussin continue;
688057ca2d4SBaptiste Daroussin
689057ca2d4SBaptiste Daroussin if (strchr("xXd01234567", c)) {
690057ca2d4SBaptiste Daroussin unscanc(c);
691057ca2d4SBaptiste Daroussin unscanc(esc_char);
692057ca2d4SBaptiste Daroussin return (get_wide());
693057ca2d4SBaptiste Daroussin }
694057ca2d4SBaptiste Daroussin yylval.wc = get_escaped(c);
695057ca2d4SBaptiste Daroussin return (T_CHAR);
696057ca2d4SBaptiste Daroussin }
697057ca2d4SBaptiste Daroussin if (c == esc_char) {
698057ca2d4SBaptiste Daroussin escaped = 1;
699057ca2d4SBaptiste Daroussin continue;
700057ca2d4SBaptiste Daroussin }
701057ca2d4SBaptiste Daroussin switch (c) {
702057ca2d4SBaptiste Daroussin case '<':
703057ca2d4SBaptiste Daroussin return (get_symbol());
704057ca2d4SBaptiste Daroussin case '>':
705057ca2d4SBaptiste Daroussin /* oops! should generate syntax error */
706057ca2d4SBaptiste Daroussin return (T_GT);
707057ca2d4SBaptiste Daroussin case '"':
708057ca2d4SBaptiste Daroussin instring = 0;
709057ca2d4SBaptiste Daroussin return (T_QUOTE);
710057ca2d4SBaptiste Daroussin default:
711057ca2d4SBaptiste Daroussin yylval.wc = c;
712057ca2d4SBaptiste Daroussin return (T_CHAR);
713057ca2d4SBaptiste Daroussin }
714057ca2d4SBaptiste Daroussin }
715057ca2d4SBaptiste Daroussin
716057ca2d4SBaptiste Daroussin /* escaped characters first */
717057ca2d4SBaptiste Daroussin if (escaped) {
718057ca2d4SBaptiste Daroussin escaped = 0;
719057ca2d4SBaptiste Daroussin if (c == '\n') {
720057ca2d4SBaptiste Daroussin /* eat the newline */
721057ca2d4SBaptiste Daroussin continue;
722057ca2d4SBaptiste Daroussin }
723057ca2d4SBaptiste Daroussin hadtok = 1;
724057ca2d4SBaptiste Daroussin if (tokidx) {
725057ca2d4SBaptiste Daroussin /* an escape mid-token is nonsense */
726057ca2d4SBaptiste Daroussin return (T_NULL);
727057ca2d4SBaptiste Daroussin }
728057ca2d4SBaptiste Daroussin
729057ca2d4SBaptiste Daroussin /* numeric escapes are treated as wide characters */
730057ca2d4SBaptiste Daroussin if (strchr("xXd01234567", c)) {
731057ca2d4SBaptiste Daroussin unscanc(c);
732057ca2d4SBaptiste Daroussin unscanc(esc_char);
733057ca2d4SBaptiste Daroussin return (get_wide());
734057ca2d4SBaptiste Daroussin }
735057ca2d4SBaptiste Daroussin
736057ca2d4SBaptiste Daroussin add_tok(get_escaped(c));
737057ca2d4SBaptiste Daroussin continue;
738057ca2d4SBaptiste Daroussin }
739057ca2d4SBaptiste Daroussin
740057ca2d4SBaptiste Daroussin /* if it is the escape charter itself note it */
741057ca2d4SBaptiste Daroussin if (c == esc_char) {
742057ca2d4SBaptiste Daroussin escaped = 1;
743057ca2d4SBaptiste Daroussin continue;
744057ca2d4SBaptiste Daroussin }
745057ca2d4SBaptiste Daroussin
746057ca2d4SBaptiste Daroussin /* remove from the comment char to end of line */
747057ca2d4SBaptiste Daroussin if (c == com_char) {
748057ca2d4SBaptiste Daroussin while (c != '\n') {
749057ca2d4SBaptiste Daroussin if ((c = scanc()) == EOF) {
750057ca2d4SBaptiste Daroussin /* end of file without newline! */
751057ca2d4SBaptiste Daroussin return (EOF);
752057ca2d4SBaptiste Daroussin }
753057ca2d4SBaptiste Daroussin }
754057ca2d4SBaptiste Daroussin assert(c == '\n');
755057ca2d4SBaptiste Daroussin if (!hadtok) {
756057ca2d4SBaptiste Daroussin /*
757057ca2d4SBaptiste Daroussin * If there were no tokens on this line,
758057ca2d4SBaptiste Daroussin * then just pretend it didn't exist at all.
759057ca2d4SBaptiste Daroussin */
760057ca2d4SBaptiste Daroussin continue;
761057ca2d4SBaptiste Daroussin }
762057ca2d4SBaptiste Daroussin hadtok = 0;
763057ca2d4SBaptiste Daroussin return (T_NL);
764057ca2d4SBaptiste Daroussin }
765057ca2d4SBaptiste Daroussin
766057ca2d4SBaptiste Daroussin if (strchr(" \t\n;()<>,\"", c) && (tokidx != 0)) {
767057ca2d4SBaptiste Daroussin /*
768057ca2d4SBaptiste Daroussin * These are all token delimiters. If there
769057ca2d4SBaptiste Daroussin * is a token already in progress, we need to
770057ca2d4SBaptiste Daroussin * process it.
771057ca2d4SBaptiste Daroussin */
772057ca2d4SBaptiste Daroussin unscanc(c);
773057ca2d4SBaptiste Daroussin return (consume_token());
774057ca2d4SBaptiste Daroussin }
775057ca2d4SBaptiste Daroussin
776057ca2d4SBaptiste Daroussin switch (c) {
777057ca2d4SBaptiste Daroussin case '\n':
778057ca2d4SBaptiste Daroussin if (!hadtok) {
779057ca2d4SBaptiste Daroussin /*
780057ca2d4SBaptiste Daroussin * If the line was completely devoid of tokens,
781057ca2d4SBaptiste Daroussin * then just ignore it.
782057ca2d4SBaptiste Daroussin */
783057ca2d4SBaptiste Daroussin continue;
784057ca2d4SBaptiste Daroussin }
785057ca2d4SBaptiste Daroussin /* we're starting a new line, reset the token state */
786057ca2d4SBaptiste Daroussin hadtok = 0;
787057ca2d4SBaptiste Daroussin return (T_NL);
788057ca2d4SBaptiste Daroussin case ',':
789057ca2d4SBaptiste Daroussin hadtok = 1;
790057ca2d4SBaptiste Daroussin return (T_COMMA);
791057ca2d4SBaptiste Daroussin case ';':
792057ca2d4SBaptiste Daroussin hadtok = 1;
793057ca2d4SBaptiste Daroussin return (T_SEMI);
794057ca2d4SBaptiste Daroussin case '(':
795057ca2d4SBaptiste Daroussin hadtok = 1;
796057ca2d4SBaptiste Daroussin return (T_LPAREN);
797057ca2d4SBaptiste Daroussin case ')':
798057ca2d4SBaptiste Daroussin hadtok = 1;
799057ca2d4SBaptiste Daroussin return (T_RPAREN);
800057ca2d4SBaptiste Daroussin case '>':
801057ca2d4SBaptiste Daroussin hadtok = 1;
802057ca2d4SBaptiste Daroussin return (T_GT);
803057ca2d4SBaptiste Daroussin case '<':
804057ca2d4SBaptiste Daroussin /* symbol start! */
805057ca2d4SBaptiste Daroussin hadtok = 1;
806057ca2d4SBaptiste Daroussin return (get_symbol());
807057ca2d4SBaptiste Daroussin case ' ':
808057ca2d4SBaptiste Daroussin case '\t':
809057ca2d4SBaptiste Daroussin /* whitespace, just ignore it */
810057ca2d4SBaptiste Daroussin continue;
811057ca2d4SBaptiste Daroussin case '"':
812057ca2d4SBaptiste Daroussin hadtok = 1;
813057ca2d4SBaptiste Daroussin instring = 1;
814057ca2d4SBaptiste Daroussin return (T_QUOTE);
815057ca2d4SBaptiste Daroussin default:
816057ca2d4SBaptiste Daroussin hadtok = 1;
817057ca2d4SBaptiste Daroussin add_tok(c);
818057ca2d4SBaptiste Daroussin continue;
819057ca2d4SBaptiste Daroussin }
820057ca2d4SBaptiste Daroussin }
821057ca2d4SBaptiste Daroussin return (EOF);
822057ca2d4SBaptiste Daroussin }
823057ca2d4SBaptiste Daroussin
824057ca2d4SBaptiste Daroussin void
yyerror(const char * msg)825057ca2d4SBaptiste Daroussin yyerror(const char *msg)
826057ca2d4SBaptiste Daroussin {
827057ca2d4SBaptiste Daroussin (void) fprintf(stderr, "%s: %d: error: %s\n",
828057ca2d4SBaptiste Daroussin filename, lineno, msg);
829057ca2d4SBaptiste Daroussin exit(4);
830057ca2d4SBaptiste Daroussin }
831057ca2d4SBaptiste Daroussin
832057ca2d4SBaptiste Daroussin void
errf(const char * fmt,...)833057ca2d4SBaptiste Daroussin errf(const char *fmt, ...)
834057ca2d4SBaptiste Daroussin {
835057ca2d4SBaptiste Daroussin char *msg;
836057ca2d4SBaptiste Daroussin
837057ca2d4SBaptiste Daroussin va_list va;
838057ca2d4SBaptiste Daroussin va_start(va, fmt);
839057ca2d4SBaptiste Daroussin (void) vasprintf(&msg, fmt, va);
840057ca2d4SBaptiste Daroussin va_end(va);
841057ca2d4SBaptiste Daroussin
842057ca2d4SBaptiste Daroussin (void) fprintf(stderr, "%s: %d: error: %s\n",
843057ca2d4SBaptiste Daroussin filename, lineno, msg);
844057ca2d4SBaptiste Daroussin free(msg);
845057ca2d4SBaptiste Daroussin exit(4);
846057ca2d4SBaptiste Daroussin }
847057ca2d4SBaptiste Daroussin
848057ca2d4SBaptiste Daroussin void
warn(const char * fmt,...)849057ca2d4SBaptiste Daroussin warn(const char *fmt, ...)
850057ca2d4SBaptiste Daroussin {
851057ca2d4SBaptiste Daroussin char *msg;
852057ca2d4SBaptiste Daroussin
853057ca2d4SBaptiste Daroussin va_list va;
854057ca2d4SBaptiste Daroussin va_start(va, fmt);
855057ca2d4SBaptiste Daroussin (void) vasprintf(&msg, fmt, va);
856057ca2d4SBaptiste Daroussin va_end(va);
857057ca2d4SBaptiste Daroussin
858057ca2d4SBaptiste Daroussin (void) fprintf(stderr, "%s: %d: warning: %s\n",
859057ca2d4SBaptiste Daroussin filename, lineno, msg);
860057ca2d4SBaptiste Daroussin free(msg);
861057ca2d4SBaptiste Daroussin warnings++;
862057ca2d4SBaptiste Daroussin if (!warnok)
863057ca2d4SBaptiste Daroussin exit(4);
864057ca2d4SBaptiste Daroussin }
865