xref: /freebsd/usr.bin/localedef/scanner.c (revision 8aac90f18aef7c9eea906c3ff9a001ca7b94f375)
1 /*-
2  * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
3  * Copyright 2015 John Marino <draco@marino.st>
4  *
5  * This source code is derived from the illumos localedef command, and
6  * provided under BSD-style license terms by Nexenta Systems, Inc.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 /*
32  * This file contains the "scanner", which tokenizes the input files
33  * for localedef for processing by the higher level grammar processor.
34  */
35 #include <sys/cdefs.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <ctype.h>
39 #include <limits.h>
40 #include <string.h>
41 #include <wchar.h>
42 #include <sys/types.h>
43 #include <assert.h>
44 #include "localedef.h"
45 #include "parser.h"
46 
47 int			com_char = '#';
48 int			esc_char = '\\';
49 int			mb_cur_min = 1;
50 int			mb_cur_max = 1;
51 int			lineno = 1;
52 int			warnings = 0;
53 int			is_stdin = 1;
54 FILE			*input;
55 static int		nextline;
56 //static FILE		*input = stdin;
57 static const char	*filename = "<stdin>";
58 static int		instring = 0;
59 static int		escaped = 0;
60 
61 /*
62  * Token space ... grows on demand.
63  */
64 static char *token = NULL;
65 static int tokidx;
66 static int toksz = 0;
67 static int hadtok = 0;
68 
69 /*
70  * Wide string space ... grows on demand.
71  */
72 static wchar_t *widestr = NULL;
73 static int wideidx = 0;
74 static int widesz = 0;
75 
76 /*
77  * The last keyword seen.  This is useful to trigger the special lexer rules
78  * for "copy" and also collating symbols and elements.
79  */
80 int	last_kw = 0;
81 static int	category = T_END;
82 
83 static struct token {
84 	int id;
85 	const char *name;
86 } keywords[] = {
87 	{ T_COM_CHAR,		"comment_char" },
88 	{ T_ESC_CHAR,		"escape_char" },
89 	{ T_END,		"END" },
90 	{ T_COPY,		"copy" },
91 	{ T_MESSAGES,		"LC_MESSAGES" },
92 	{ T_YESSTR,		"yesstr" },
93 	{ T_YESEXPR,		"yesexpr" },
94 	{ T_NOSTR,		"nostr" },
95 	{ T_NOEXPR,		"noexpr" },
96 	{ T_MONETARY,		"LC_MONETARY" },
97 	{ T_INT_CURR_SYMBOL,	"int_curr_symbol" },
98 	{ T_CURRENCY_SYMBOL,	"currency_symbol" },
99 	{ T_MON_DECIMAL_POINT,	"mon_decimal_point" },
100 	{ T_MON_THOUSANDS_SEP,	"mon_thousands_sep" },
101 	{ T_POSITIVE_SIGN,	"positive_sign" },
102 	{ T_NEGATIVE_SIGN,	"negative_sign" },
103 	{ T_MON_GROUPING,	"mon_grouping" },
104 	{ T_INT_FRAC_DIGITS,	"int_frac_digits" },
105 	{ T_FRAC_DIGITS,	"frac_digits" },
106 	{ T_P_CS_PRECEDES,	"p_cs_precedes" },
107 	{ T_P_SEP_BY_SPACE,	"p_sep_by_space" },
108 	{ T_N_CS_PRECEDES,	"n_cs_precedes" },
109 	{ T_N_SEP_BY_SPACE,	"n_sep_by_space" },
110 	{ T_P_SIGN_POSN,	"p_sign_posn" },
111 	{ T_N_SIGN_POSN,	"n_sign_posn" },
112 	{ T_INT_P_CS_PRECEDES,	"int_p_cs_precedes" },
113 	{ T_INT_N_CS_PRECEDES,	"int_n_cs_precedes" },
114 	{ T_INT_P_SEP_BY_SPACE,	"int_p_sep_by_space" },
115 	{ T_INT_N_SEP_BY_SPACE,	"int_n_sep_by_space" },
116 	{ T_INT_P_SIGN_POSN,	"int_p_sign_posn" },
117 	{ T_INT_N_SIGN_POSN,	"int_n_sign_posn" },
118 	{ T_COLLATE,		"LC_COLLATE" },
119 	{ T_COLLATING_SYMBOL,	"collating-symbol" },
120 	{ T_COLLATING_ELEMENT,	"collating-element" },
121 	{ T_FROM,		"from" },
122 	{ T_ORDER_START,	"order_start" },
123 	{ T_ORDER_END,		"order_end" },
124 	{ T_FORWARD,		"forward" },
125 	{ T_BACKWARD,		"backward" },
126 	{ T_POSITION,		"position" },
127 	{ T_IGNORE,		"IGNORE" },
128 	{ T_UNDEFINED,		"UNDEFINED" },
129 	{ T_NUMERIC,		"LC_NUMERIC" },
130 	{ T_DECIMAL_POINT,	"decimal_point" },
131 	{ T_THOUSANDS_SEP,	"thousands_sep" },
132 	{ T_GROUPING,		"grouping" },
133 	{ T_TIME,		"LC_TIME" },
134 	{ T_ABDAY,		"abday" },
135 	{ T_DAY,		"day" },
136 	{ T_ABMON,		"abmon" },
137 	{ T_MON,		"mon" },
138 	{ T_D_T_FMT,		"d_t_fmt" },
139 	{ T_D_FMT,		"d_fmt" },
140 	{ T_T_FMT,		"t_fmt" },
141 	{ T_AM_PM,		"am_pm" },
142 	{ T_T_FMT_AMPM,		"t_fmt_ampm" },
143 	{ T_ERA,		"era" },
144 	{ T_ERA_D_FMT,		"era_d_fmt" },
145 	{ T_ERA_T_FMT,		"era_t_fmt" },
146 	{ T_ERA_D_T_FMT,	"era_d_t_fmt" },
147 	{ T_ALT_DIGITS,		"alt_digits" },
148 	{ T_CTYPE,		"LC_CTYPE" },
149 	{ T_ISUPPER,		"upper" },
150 	{ T_ISLOWER,		"lower" },
151 	{ T_ISALPHA,		"alpha" },
152 	{ T_ISDIGIT,		"digit" },
153 	{ T_ISPUNCT,		"punct" },
154 	{ T_ISXDIGIT,		"xdigit" },
155 	{ T_ISSPACE,		"space" },
156 	{ T_ISPRINT,		"print" },
157 	{ T_ISGRAPH,		"graph" },
158 	{ T_ISBLANK,		"blank" },
159 	{ T_ISCNTRL,		"cntrl" },
160 	/*
161 	 * These entries are local additions, and not specified by
162 	 * TOG.  Note that they are not guaranteed to be accurate for
163 	 * all locales, and so applications should not depend on them.
164 	 */
165 	{ T_ISSPECIAL,		"special" },
166 	{ T_ISENGLISH,		"english" },
167 	{ T_ISPHONOGRAM,	"phonogram" },
168 	{ T_ISIDEOGRAM,		"ideogram" },
169 	{ T_ISNUMBER,		"number" },
170 	/*
171 	 * We have to support this in the grammar, but it would be a
172 	 * syntax error to define a character as one of these without
173 	 * also defining it as an alpha or digit.  We ignore it in our
174 	 * parsing.
175 	 */
176 	{ T_ISALNUM,		"alnum" },
177 	{ T_TOUPPER,		"toupper" },
178 	{ T_TOLOWER,		"tolower" },
179 
180 	/*
181 	 * These are keywords used in the charmap file.  Note that
182 	 * Solaris originally used angle brackets to wrap some of them,
183 	 * but we removed that to simplify our parser.  The first of these
184 	 * items are "global items."
185 	 */
186 	{ T_CHARMAP,		"CHARMAP" },
187 	{ T_WIDTH,		"WIDTH" },
188 
189 	{ -1, NULL },
190 };
191 
192 /*
193  * These special words are only used in a charmap file, enclosed in <>.
194  */
195 static struct token symwords[] = {
196 	{ T_COM_CHAR,		"comment_char" },
197 	{ T_ESC_CHAR,		"escape_char" },
198 	{ T_CODE_SET,		"code_set_name" },
199 	{ T_MB_CUR_MAX,		"mb_cur_max" },
200 	{ T_MB_CUR_MIN,		"mb_cur_min" },
201 	{ -1, NULL },
202 };
203 
204 static int categories[] = {
205 	T_CHARMAP,
206 	T_CTYPE,
207 	T_COLLATE,
208 	T_MESSAGES,
209 	T_MONETARY,
210 	T_NUMERIC,
211 	T_TIME,
212 	T_WIDTH,
213 	0
214 };
215 
216 void
217 reset_scanner(const char *fname)
218 {
219 	if (fname == NULL) {
220 		filename = "<stdin>";
221 		is_stdin = 1;
222 	} else {
223 		if (!is_stdin)
224 			(void) fclose(input);
225 		if ((input = fopen(fname, "r")) == NULL) {
226 			perror("fopen");
227 			exit(4);
228 		} else {
229 			is_stdin = 0;
230 		}
231 		filename = fname;
232 	}
233 	com_char = '#';
234 	esc_char = '\\';
235 	instring = 0;
236 	escaped = 0;
237 	lineno = 1;
238 	nextline = 1;
239 	tokidx = 0;
240 	wideidx = 0;
241 }
242 
243 #define	hex(x)	\
244 	(isdigit(x) ? (x - '0') : ((islower(x) ? (x - 'a') : (x - 'A')) + 10))
245 #define	isodigit(x)	((x >= '0') && (x <= '7'))
246 
247 static int
248 scanc(void)
249 {
250 	int	c;
251 
252 	if (is_stdin)
253 		c = getc(stdin);
254 	else
255 		c = getc(input);
256 	lineno = nextline;
257 	if (c == '\n') {
258 		nextline++;
259 	}
260 	return (c);
261 }
262 
263 static void
264 unscanc(int c)
265 {
266 	if (c == '\n') {
267 		nextline--;
268 	}
269 	if (ungetc(c, is_stdin ? stdin : input) < 0) {
270 		yyerror("ungetc failed");
271 	}
272 }
273 
274 static int
275 scan_hex_byte(void)
276 {
277 	int	c1, c2;
278 	int	v;
279 
280 	c1 = scanc();
281 	if (!isxdigit(c1)) {
282 		yyerror("malformed hex digit");
283 		return (0);
284 	}
285 	c2 = scanc();
286 	if (!isxdigit(c2)) {
287 		yyerror("malformed hex digit");
288 		return (0);
289 	}
290 	v = ((hex(c1) << 4) | hex(c2));
291 	return (v);
292 }
293 
294 static int
295 scan_dec_byte(void)
296 {
297 	int	c1, c2, c3;
298 	int	b;
299 
300 	c1 = scanc();
301 	if (!isdigit(c1)) {
302 		yyerror("malformed decimal digit");
303 		return (0);
304 	}
305 	b = c1 - '0';
306 	c2 = scanc();
307 	if (!isdigit(c2)) {
308 		yyerror("malformed decimal digit");
309 		return (0);
310 	}
311 	b *= 10;
312 	b += (c2 - '0');
313 	c3 = scanc();
314 	if (!isdigit(c3)) {
315 		unscanc(c3);
316 	} else {
317 		b *= 10;
318 		b += (c3 - '0');
319 	}
320 	return (b);
321 }
322 
323 static int
324 scan_oct_byte(void)
325 {
326 	int c1, c2, c3;
327 	int	b;
328 
329 	b = 0;
330 
331 	c1 = scanc();
332 	if (!isodigit(c1)) {
333 		yyerror("malformed octal digit");
334 		return (0);
335 	}
336 	b = c1 - '0';
337 	c2 = scanc();
338 	if (!isodigit(c2)) {
339 		yyerror("malformed octal digit");
340 		return (0);
341 	}
342 	b *= 8;
343 	b += (c2 - '0');
344 	c3 = scanc();
345 	if (!isodigit(c3)) {
346 		unscanc(c3);
347 	} else {
348 		b *= 8;
349 		b += (c3 - '0');
350 	}
351 	return (b);
352 }
353 
354 void
355 add_tok(int c)
356 {
357 	if ((tokidx + 1) >= toksz) {
358 		toksz += 64;
359 		if ((token = realloc(token, toksz)) == NULL) {
360 			yyerror("out of memory");
361 			tokidx = 0;
362 			toksz = 0;
363 			return;
364 		}
365 	}
366 
367 	token[tokidx++] = (char)c;
368 	token[tokidx] = 0;
369 }
370 void
371 add_wcs(wchar_t c)
372 {
373 	if ((wideidx + 1) >= widesz) {
374 		widesz += 64;
375 		widestr = realloc(widestr, (widesz * sizeof (wchar_t)));
376 		if (widestr == NULL) {
377 			yyerror("out of memory");
378 			wideidx = 0;
379 			widesz = 0;
380 			return;
381 		}
382 	}
383 
384 	widestr[wideidx++] = c;
385 	widestr[wideidx] = 0;
386 }
387 
388 wchar_t *
389 get_wcs(void)
390 {
391 	wchar_t *ws = widestr;
392 	wideidx = 0;
393 	widestr = NULL;
394 	widesz = 0;
395 	if (ws == NULL) {
396 		if ((ws = wcsdup(L"")) == NULL) {
397 			yyerror("out of memory");
398 		}
399 	}
400 	return (ws);
401 }
402 
403 static int
404 get_byte(void)
405 {
406 	int	c;
407 
408 	if ((c = scanc()) != esc_char) {
409 		unscanc(c);
410 		return (EOF);
411 	}
412 	c = scanc();
413 
414 	switch (c) {
415 	case 'd':
416 	case 'D':
417 		return (scan_dec_byte());
418 	case 'x':
419 	case 'X':
420 		return (scan_hex_byte());
421 	case '0':
422 	case '1':
423 	case '2':
424 	case '3':
425 	case '4':
426 	case '5':
427 	case '6':
428 	case '7':
429 		/* put the character back so we can get it */
430 		unscanc(c);
431 		return (scan_oct_byte());
432 	default:
433 		unscanc(c);
434 		unscanc(esc_char);
435 		return (EOF);
436 	}
437 }
438 
439 int
440 get_escaped(int c)
441 {
442 	switch (c) {
443 	case 'n':
444 		return ('\n');
445 	case 'r':
446 		return ('\r');
447 	case 't':
448 		return ('\t');
449 	case 'f':
450 		return ('\f');
451 	case 'v':
452 		return ('\v');
453 	case 'b':
454 		return ('\b');
455 	case 'a':
456 		return ('\a');
457 	default:
458 		return (c);
459 	}
460 }
461 
462 int
463 get_wide(void)
464 {
465 	static char mbs[MB_LEN_MAX + 1] = "";
466 	static int mbi = 0;
467 	int c;
468 	wchar_t	wc;
469 
470 	if (mb_cur_max >= (int)sizeof (mbs)) {
471 		yyerror("max multibyte character size too big");
472 		mbi = 0;
473 		return (T_NULL);
474 	}
475 	for (;;) {
476 		if ((mbi == mb_cur_max) || ((c = get_byte()) == EOF)) {
477 			/*
478 			 * end of the byte sequence reached, but no
479 			 * valid wide decoding.  fatal error.
480 			 */
481 			mbi = 0;
482 			yyerror("not a valid character encoding");
483 			return (T_NULL);
484 		}
485 		mbs[mbi++] = c;
486 		mbs[mbi] = 0;
487 
488 		/* does it decode? */
489 		if (to_wide(&wc, mbs) >= 0) {
490 			break;
491 		}
492 	}
493 
494 	mbi = 0;
495 	if ((category != T_CHARMAP) && (category != T_WIDTH)) {
496 		if (check_charmap(wc) < 0) {
497 			yyerror("no symbolic name for character");
498 			return (T_NULL);
499 		}
500 	}
501 
502 	yylval.wc = wc;
503 	return (T_CHAR);
504 }
505 
506 int
507 get_symbol(void)
508 {
509 	int	c;
510 
511 	while ((c = scanc()) != EOF) {
512 		if (escaped) {
513 			escaped = 0;
514 			if (c == '\n')
515 				continue;
516 			add_tok(get_escaped(c));
517 			continue;
518 		}
519 		if (c == esc_char) {
520 			escaped = 1;
521 			continue;
522 		}
523 		if (c == '\n') {	/* well that's strange! */
524 			yyerror("unterminated symbolic name");
525 			continue;
526 		}
527 		if (c == '>') {		/* end of symbol */
528 
529 			/*
530 			 * This restarts the token from the beginning
531 			 * the next time we scan a character.  (This
532 			 * token is complete.)
533 			 */
534 
535 			if (token == NULL) {
536 				yyerror("missing symbolic name");
537 				return (T_NULL);
538 			}
539 			tokidx = 0;
540 
541 			/*
542 			 * A few symbols are handled as keywords outside
543 			 * of the normal categories.
544 			 */
545 			if (category == T_END) {
546 				int i;
547 				for (i = 0; symwords[i].name != 0; i++) {
548 					if (strcmp(token, symwords[i].name) ==
549 					    0) {
550 						last_kw = symwords[i].id;
551 						return (last_kw);
552 					}
553 				}
554 			}
555 			/*
556 			 * Contextual rule: Only literal characters are
557 			 * permitted in CHARMAP.  Anywhere else the symbolic
558 			 * forms are fine.
559 			 */
560 			if ((category != T_CHARMAP) &&
561 			    (lookup_charmap(token, &yylval.wc)) != -1) {
562 				return (T_CHAR);
563 			}
564 			if ((yylval.collsym = lookup_collsym(token)) != NULL) {
565 				return (T_COLLSYM);
566 			}
567 			if ((yylval.collelem = lookup_collelem(token)) !=
568 			    NULL) {
569 				return (T_COLLELEM);
570 			}
571 			/* its an undefined symbol */
572 			yylval.token = strdup(token);
573 			token = NULL;
574 			toksz = 0;
575 			tokidx = 0;
576 			return (T_SYMBOL);
577 		}
578 		add_tok(c);
579 	}
580 
581 	yyerror("unterminated symbolic name");
582 	return (EOF);
583 }
584 
585 int
586 get_category(void)
587 {
588 	return (category);
589 }
590 
591 static int
592 consume_token(void)
593 {
594 	int	len = tokidx;
595 	int	i;
596 
597 	tokidx = 0;
598 	if (token == NULL)
599 		return (T_NULL);
600 
601 	/*
602 	 * this one is special, because we don't want it to alter the
603 	 * last_kw field.
604 	 */
605 	if (strcmp(token, "...") == 0) {
606 		return (T_ELLIPSIS);
607 	}
608 
609 	/* search for reserved words first */
610 	for (i = 0; keywords[i].name; i++) {
611 		int j;
612 		if (strcmp(keywords[i].name, token) != 0) {
613 			continue;
614 		}
615 
616 		last_kw = keywords[i].id;
617 
618 		/* clear the top level category if we're done with it */
619 		if (last_kw == T_END) {
620 			category = T_END;
621 		}
622 
623 		/* set the top level category if we're changing */
624 		for (j = 0; categories[j]; j++) {
625 			if (categories[j] != last_kw)
626 				continue;
627 			category = last_kw;
628 		}
629 
630 		return (keywords[i].id);
631 	}
632 
633 	/* maybe its a numeric constant? */
634 	if (isdigit(*token) || (*token == '-' && isdigit(token[1]))) {
635 		char *eptr;
636 		yylval.num = strtol(token, &eptr, 10);
637 		if (*eptr != 0)
638 			yyerror("malformed number");
639 		return (T_NUMBER);
640 	}
641 
642 	/*
643 	 * A single lone character is treated as a character literal.
644 	 * To avoid duplication of effort, we stick in the charmap.
645 	 */
646 	if (len == 1) {
647 		yylval.wc = token[0];
648 		return (T_CHAR);
649 	}
650 
651 	/* anything else is treated as a symbolic name */
652 	yylval.token = strdup(token);
653 	token = NULL;
654 	toksz = 0;
655 	tokidx = 0;
656 	return (T_NAME);
657 }
658 
659 void
660 scan_to_eol(void)
661 {
662 	int	c;
663 	while ((c = scanc()) != '\n') {
664 		if (c == EOF) {
665 			/* end of file without newline! */
666 			errf("missing newline");
667 			return;
668 		}
669 	}
670 	assert(c == '\n');
671 }
672 
673 int
674 yylex(void)
675 {
676 	int		c;
677 
678 	while ((c = scanc()) != EOF) {
679 
680 		/* special handling for quoted string */
681 		if (instring) {
682 			if (escaped) {
683 				escaped = 0;
684 
685 				/* if newline, just eat and forget it */
686 				if (c == '\n')
687 					continue;
688 
689 				if (strchr("xXd01234567", c)) {
690 					unscanc(c);
691 					unscanc(esc_char);
692 					return (get_wide());
693 				}
694 				yylval.wc = get_escaped(c);
695 				return (T_CHAR);
696 			}
697 			if (c == esc_char) {
698 				escaped = 1;
699 				continue;
700 			}
701 			switch (c) {
702 			case '<':
703 				return (get_symbol());
704 			case '>':
705 				/* oops! should generate syntax error  */
706 				return (T_GT);
707 			case '"':
708 				instring = 0;
709 				return (T_QUOTE);
710 			default:
711 				yylval.wc = c;
712 				return (T_CHAR);
713 			}
714 		}
715 
716 		/* escaped characters first */
717 		if (escaped) {
718 			escaped = 0;
719 			if (c == '\n') {
720 				/* eat the newline */
721 				continue;
722 			}
723 			hadtok = 1;
724 			if (tokidx) {
725 				/* an escape mid-token is nonsense */
726 				return (T_NULL);
727 			}
728 
729 			/* numeric escapes are treated as wide characters */
730 			if (strchr("xXd01234567", c)) {
731 				unscanc(c);
732 				unscanc(esc_char);
733 				return (get_wide());
734 			}
735 
736 			add_tok(get_escaped(c));
737 			continue;
738 		}
739 
740 		/* if it is the escape charter itself note it */
741 		if (c == esc_char) {
742 			escaped = 1;
743 			continue;
744 		}
745 
746 		/* remove from the comment char to end of line */
747 		if (c == com_char) {
748 			while (c != '\n') {
749 				if ((c = scanc()) == EOF) {
750 					/* end of file without newline! */
751 					return (EOF);
752 				}
753 			}
754 			assert(c == '\n');
755 			if (!hadtok) {
756 				/*
757 				 * If there were no tokens on this line,
758 				 * then just pretend it didn't exist at all.
759 				 */
760 				continue;
761 			}
762 			hadtok = 0;
763 			return (T_NL);
764 		}
765 
766 		if (strchr(" \t\n;()<>,\"", c) && (tokidx != 0)) {
767 			/*
768 			 * These are all token delimiters.  If there
769 			 * is a token already in progress, we need to
770 			 * process it.
771 			 */
772 			unscanc(c);
773 			return (consume_token());
774 		}
775 
776 		switch (c) {
777 		case '\n':
778 			if (!hadtok) {
779 				/*
780 				 * If the line was completely devoid of tokens,
781 				 * then just ignore it.
782 				 */
783 				continue;
784 			}
785 			/* we're starting a new line, reset the token state */
786 			hadtok = 0;
787 			return (T_NL);
788 		case ',':
789 			hadtok = 1;
790 			return (T_COMMA);
791 		case ';':
792 			hadtok = 1;
793 			return (T_SEMI);
794 		case '(':
795 			hadtok = 1;
796 			return (T_LPAREN);
797 		case ')':
798 			hadtok = 1;
799 			return (T_RPAREN);
800 		case '>':
801 			hadtok = 1;
802 			return (T_GT);
803 		case '<':
804 			/* symbol start! */
805 			hadtok = 1;
806 			return (get_symbol());
807 		case ' ':
808 		case '\t':
809 			/* whitespace, just ignore it */
810 			continue;
811 		case '"':
812 			hadtok = 1;
813 			instring = 1;
814 			return (T_QUOTE);
815 		default:
816 			hadtok = 1;
817 			add_tok(c);
818 			continue;
819 		}
820 	}
821 	return (EOF);
822 }
823 
824 void
825 yyerror(const char *msg)
826 {
827 	(void) fprintf(stderr, "%s: %d: error: %s\n",
828 	    filename, lineno, msg);
829 	exit(4);
830 }
831 
832 void
833 errf(const char *fmt, ...)
834 {
835 	char	*msg;
836 
837 	va_list	va;
838 	va_start(va, fmt);
839 	(void) vasprintf(&msg, fmt, va);
840 	va_end(va);
841 
842 	(void) fprintf(stderr, "%s: %d: error: %s\n",
843 	    filename, lineno, msg);
844 	free(msg);
845 	exit(4);
846 }
847 
848 void
849 warn(const char *fmt, ...)
850 {
851 	char	*msg;
852 
853 	va_list	va;
854 	va_start(va, fmt);
855 	(void) vasprintf(&msg, fmt, va);
856 	va_end(va);
857 
858 	(void) fprintf(stderr, "%s: %d: warning: %s\n",
859 	    filename, lineno, msg);
860 	free(msg);
861 	warnings++;
862 	if (!warnok)
863 		exit(4);
864 }
865