xref: /freebsd/contrib/flex/src/parse.y (revision 19fae0f66023a97a9b464b3beeeabb2081f575b3)
1 /* parse.y - parser for flex input */
2 
3 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
4 %token TOK_OPTION TOK_OUTFILE TOK_PREFIX TOK_YYCLASS TOK_HEADER_FILE TOK_EXTRA_TYPE
5 %token TOK_TABLES_FILE
6 
7 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
8 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
9 
10 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
11 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
12 
13 %left CCL_OP_DIFF CCL_OP_UNION
14 
15 /*
16  *POSIX and AT&T lex place the
17  * precedence of the repeat operator, {}, below that of concatenation.
18  * Thus, ab{3} is ababab.  Most other POSIX utilities use an Extended
19  * Regular Expression (ERE) precedence that has the repeat operator
20  * higher than concatenation.  This causes ab{3} to yield abbb.
21  *
22  * In order to support the POSIX and AT&T precedence and the flex
23  * precedence we define two token sets for the begin and end tokens of
24  * the repeat operator, '{' and '}'.  The lexical scanner chooses
25  * which tokens to return based on whether posix_compat or lex_compat
26  * are specified. Specifying either posix_compat or lex_compat will
27  * cause flex to parse scanner files as per the AT&T and
28  * POSIX-mandated behavior.
29  */
30 
31 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
32 
33 
34 %{
35 /*  Copyright (c) 1990 The Regents of the University of California. */
36 /*  All rights reserved. */
37 
38 /*  This code is derived from software contributed to Berkeley by */
39 /*  Vern Paxson. */
40 
41 /*  The United States Government has rights in this work pursuant */
42 /*  to contract no. DE-AC03-76SF00098 between the United States */
43 /*  Department of Energy and the University of California. */
44 
45 /*  This file is part of flex. */
46 
47 /*  Redistribution and use in source and binary forms, with or without */
48 /*  modification, are permitted provided that the following conditions */
49 /*  are met: */
50 
51 /*  1. Redistributions of source code must retain the above copyright */
52 /*     notice, this list of conditions and the following disclaimer. */
53 /*  2. Redistributions in binary form must reproduce the above copyright */
54 /*     notice, this list of conditions and the following disclaimer in the */
55 /*     documentation and/or other materials provided with the distribution. */
56 
57 /*  Neither the name of the University nor the names of its contributors */
58 /*  may be used to endorse or promote products derived from this software */
59 /*  without specific prior written permission. */
60 
61 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
62 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
63 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
64 /*  PURPOSE. */
65 
66 #include "flexdef.h"
67 #include "tables.h"
68 
69 int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
70 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
71 
72 int *scon_stk;
73 int scon_stk_ptr;
74 
75 static int madeany = false;  /* whether we've made the '.' character class */
76 static int ccldot, cclany;
77 int previous_continued_action;	/* whether the previous rule's action was '|' */
78 
79 #define format_warn3(fmt, a1, a2) \
80 	do{ \
81         char fw3_msg[MAXLINE];\
82         snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
83         lwarn( fw3_msg );\
84 	}while(0)
85 
86 /* Expand a POSIX character class expression. */
87 #define CCL_EXPR(func) \
88 	do{ \
89 	int c; \
90 	for ( c = 0; c < csize; ++c ) \
91 		if ( isascii(c) && func(c) ) \
92 			ccladd( currccl, c ); \
93 	}while(0)
94 
95 /* negated class */
96 #define CCL_NEG_EXPR(func) \
97 	do{ \
98 	int c; \
99 	for ( c = 0; c < csize; ++c ) \
100 		if ( !func(c) ) \
101 			ccladd( currccl, c ); \
102 	}while(0)
103 
104 /* While POSIX defines isblank(), it's not ANSI C. */
105 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
106 
107 /* On some over-ambitious machines, such as DEC Alpha's, the default
108  * token type is "long" instead of "int"; this leads to problems with
109  * declaring yylval in flexdef.h.  But so far, all the yacc's I've seen
110  * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
111  * following should ensure that the default token type is "int".
112  */
113 #define YYSTYPE int
114 
115 %}
116 
117 %%
118 goal		:  initlex sect1 sect1end sect2 initforrule
119 			{ /* add default rule */
120 			int def_rule;
121 
122 			pat = cclinit();
123 			cclnegate( pat );
124 
125 			def_rule = mkstate( -pat );
126 
127 			/* Remember the number of the default rule so we
128 			 * don't generate "can't match" warnings for it.
129 			 */
130 			default_rule = num_rules;
131 
132 			finish_rule( def_rule, false, 0, 0, 0);
133 
134 			for ( i = 1; i <= lastsc; ++i )
135 				scset[i] = mkbranch( scset[i], def_rule );
136 
137 			if ( spprdflt )
138 				add_action(
139 				"YY_FATAL_ERROR( \"flex scanner jammed\" )" );
140 			else
141 				add_action( "ECHO" );
142 
143 			add_action( ";\n\tYY_BREAK]]\n" );
144 			}
145 		;
146 
147 initlex		:
148 			{ /* initialize for processing rules */
149 
150 			/* Create default DFA start condition. */
151 			scinstal( "INITIAL", false );
152 			}
153 		;
154 
155 sect1		:  sect1 startconddecl namelist1
156 		|  sect1 options
157 		|
158 		|  error
159 			{ synerr( _("unknown error processing section 1") ); }
160 		;
161 
162 sect1end	:  SECTEND
163 			{
164 			check_options();
165 			scon_stk = allocate_integer_array( lastsc + 1 );
166 			scon_stk_ptr = 0;
167 			}
168 		;
169 
170 startconddecl	:  SCDECL
171 			{ xcluflg = false; }
172 
173 		|  XSCDECL
174 			{ xcluflg = true; }
175 		;
176 
177 namelist1	:  namelist1 NAME
178 			{ scinstal( nmstr, xcluflg ); }
179 
180 		|  NAME
181 			{ scinstal( nmstr, xcluflg ); }
182 
183 		|  error
184 			{ synerr( _("bad start condition list") ); }
185 		;
186 
187 options		:  TOK_OPTION optionlist
188 		;
189 
190 optionlist	:  optionlist option
191 		|
192 		;
193 
194 option		:  TOK_OUTFILE '=' NAME
195 			{
196 			outfilename = xstrdup(nmstr);
197 			did_outfilename = 1;
198 			}
199 		|  TOK_EXTRA_TYPE '=' NAME
200 			{ extra_type = xstrdup(nmstr); }
201 		|  TOK_PREFIX '=' NAME
202 			{ prefix = xstrdup(nmstr);
203                           if (strchr(prefix, '[') || strchr(prefix, ']'))
204                               flexerror(_("Prefix must not contain [ or ]")); }
205 		|  TOK_YYCLASS '=' NAME
206 			{ yyclass = xstrdup(nmstr); }
207 		|  TOK_HEADER_FILE '=' NAME
208 			{ headerfilename = xstrdup(nmstr); }
209 	    |  TOK_TABLES_FILE '=' NAME
210             { tablesext = true; tablesfilename = xstrdup(nmstr); }
211 		;
212 
213 sect2		:  sect2 scon initforrule flexrule '\n'
214 			{ scon_stk_ptr = $2; }
215 		|  sect2 scon '{' sect2 '}'
216 			{ scon_stk_ptr = $2; }
217 		|
218 		;
219 
220 initforrule	:
221 			{
222 			/* Initialize for a parse of one rule. */
223 			trlcontxt = variable_trail_rule = varlength = false;
224 			trailcnt = headcnt = rulelen = 0;
225 			current_state_type = STATE_NORMAL;
226 			previous_continued_action = continued_action;
227 			in_rule = true;
228 
229 			new_rule();
230 			}
231 		;
232 
233 flexrule	:  '^' rule
234 			{
235 			pat = $2;
236 			finish_rule( pat, variable_trail_rule,
237 				headcnt, trailcnt , previous_continued_action);
238 
239 			if ( scon_stk_ptr > 0 )
240 				{
241 				for ( i = 1; i <= scon_stk_ptr; ++i )
242 					scbol[scon_stk[i]] =
243 						mkbranch( scbol[scon_stk[i]],
244 								pat );
245 				}
246 
247 			else
248 				{
249 				/* Add to all non-exclusive start conditions,
250 				 * including the default (0) start condition.
251 				 */
252 
253 				for ( i = 1; i <= lastsc; ++i )
254 					if ( ! scxclu[i] )
255 						scbol[i] = mkbranch( scbol[i],
256 									pat );
257 				}
258 
259 			if ( ! bol_needed )
260 				{
261 				bol_needed = true;
262 
263 				if ( performance_report > 1 )
264 					pinpoint_message(
265 			"'^' operator results in sub-optimal performance" );
266 				}
267 			}
268 
269 		|  rule
270 			{
271 			pat = $1;
272 			finish_rule( pat, variable_trail_rule,
273 				headcnt, trailcnt , previous_continued_action);
274 
275 			if ( scon_stk_ptr > 0 )
276 				{
277 				for ( i = 1; i <= scon_stk_ptr; ++i )
278 					scset[scon_stk[i]] =
279 						mkbranch( scset[scon_stk[i]],
280 								pat );
281 				}
282 
283 			else
284 				{
285 				for ( i = 1; i <= lastsc; ++i )
286 					if ( ! scxclu[i] )
287 						scset[i] =
288 							mkbranch( scset[i],
289 								pat );
290 				}
291 			}
292 
293 		|  EOF_OP
294 			{
295 			if ( scon_stk_ptr > 0 )
296 				build_eof_action();
297 
298 			else
299 				{
300 				/* This EOF applies to all start conditions
301 				 * which don't already have EOF actions.
302 				 */
303 				for ( i = 1; i <= lastsc; ++i )
304 					if ( ! sceof[i] )
305 						scon_stk[++scon_stk_ptr] = i;
306 
307 				if ( scon_stk_ptr == 0 )
308 					lwarn(
309 			"all start conditions already have <<EOF>> rules" );
310 
311 				else
312 					build_eof_action();
313 				}
314 			}
315 
316 		|  error
317 			{ synerr( _("unrecognized rule") ); }
318 		;
319 
320 scon_stk_ptr	:
321 			{ $$ = scon_stk_ptr; }
322 		;
323 
324 scon		:  '<' scon_stk_ptr namelist2 '>'
325 			{ $$ = $2; }
326 
327 		|  '<' '*' '>'
328 			{
329 			$$ = scon_stk_ptr;
330 
331 			for ( i = 1; i <= lastsc; ++i )
332 				{
333 				int j;
334 
335 				for ( j = 1; j <= scon_stk_ptr; ++j )
336 					if ( scon_stk[j] == i )
337 						break;
338 
339 				if ( j > scon_stk_ptr )
340 					scon_stk[++scon_stk_ptr] = i;
341 				}
342 			}
343 
344 		|
345 			{ $$ = scon_stk_ptr; }
346 		;
347 
348 namelist2	:  namelist2 ',' sconname
349 
350 		|  sconname
351 
352 		|  error
353 			{ synerr( _("bad start condition list") ); }
354 		;
355 
356 sconname	:  NAME
357 			{
358 			if ( (scnum = sclookup( nmstr )) == 0 )
359 				format_pinpoint_message(
360 					"undeclared start condition %s",
361 					nmstr );
362 			else
363 				{
364 				for ( i = 1; i <= scon_stk_ptr; ++i )
365 					if ( scon_stk[i] == scnum )
366 						{
367 						format_warn(
368 							"<%s> specified twice",
369 							scname[scnum] );
370 						break;
371 						}
372 
373 				if ( i > scon_stk_ptr )
374 					scon_stk[++scon_stk_ptr] = scnum;
375 				}
376 			}
377 		;
378 
379 rule		:  re2 re
380 			{
381 			if ( transchar[lastst[$2]] != SYM_EPSILON )
382 				/* Provide final transition \now/ so it
383 				 * will be marked as a trailing context
384 				 * state.
385 				 */
386 				$2 = link_machines( $2,
387 						mkstate( SYM_EPSILON ) );
388 
389 			mark_beginning_as_normal( $2 );
390 			current_state_type = STATE_NORMAL;
391 
392 			if ( previous_continued_action )
393 				{
394 				/* We need to treat this as variable trailing
395 				 * context so that the backup does not happen
396 				 * in the action but before the action switch
397 				 * statement.  If the backup happens in the
398 				 * action, then the rules "falling into" this
399 				 * one's action will *also* do the backup,
400 				 * erroneously.
401 				 */
402 				if ( ! varlength || headcnt != 0 )
403 					lwarn(
404 		"trailing context made variable due to preceding '|' action" );
405 
406 				/* Mark as variable. */
407 				varlength = true;
408 				headcnt = 0;
409 
410 				}
411 
412 			if ( lex_compat || (varlength && headcnt == 0) )
413 				{ /* variable trailing context rule */
414 				/* Mark the first part of the rule as the
415 				 * accepting "head" part of a trailing
416 				 * context rule.
417 				 *
418 				 * By the way, we didn't do this at the
419 				 * beginning of this production because back
420 				 * then current_state_type was set up for a
421 				 * trail rule, and add_accept() can create
422 				 * a new state ...
423 				 */
424 				add_accept( $1,
425 					num_rules | YY_TRAILING_HEAD_MASK );
426 				variable_trail_rule = true;
427 				}
428 
429 			else
430 				trailcnt = rulelen;
431 
432 			$$ = link_machines( $1, $2 );
433 			}
434 
435 		|  re2 re '$'
436 			{ synerr( _("trailing context used twice") ); }
437 
438 		|  re '$'
439 			{
440 			headcnt = 0;
441 			trailcnt = 1;
442 			rulelen = 1;
443 			varlength = false;
444 
445 			current_state_type = STATE_TRAILING_CONTEXT;
446 
447 			if ( trlcontxt )
448 				{
449 				synerr( _("trailing context used twice") );
450 				$$ = mkstate( SYM_EPSILON );
451 				}
452 
453 			else if ( previous_continued_action )
454 				{
455 				/* See the comment in the rule for "re2 re"
456 				 * above.
457 				 */
458 				lwarn(
459 		"trailing context made variable due to preceding '|' action" );
460 
461 				varlength = true;
462 				}
463 
464 			if ( lex_compat || varlength )
465 				{
466 				/* Again, see the comment in the rule for
467 				 * "re2 re" above.
468 				 */
469 				add_accept( $1,
470 					num_rules | YY_TRAILING_HEAD_MASK );
471 				variable_trail_rule = true;
472 				}
473 
474 			trlcontxt = true;
475 
476 			eps = mkstate( SYM_EPSILON );
477 			$$ = link_machines( $1,
478 				link_machines( eps, mkstate( '\n' ) ) );
479 			}
480 
481 		|  re
482 			{
483 			$$ = $1;
484 
485 			if ( trlcontxt )
486 				{
487 				if ( lex_compat || (varlength && headcnt == 0) )
488 					/* Both head and trail are
489 					 * variable-length.
490 					 */
491 					variable_trail_rule = true;
492 				else
493 					trailcnt = rulelen;
494 				}
495 			}
496 		;
497 
498 
499 re		:  re '|' series
500 			{
501 			varlength = true;
502 			$$ = mkor( $1, $3 );
503 			}
504 
505 		|  series
506 			{ $$ = $1; }
507 		;
508 
509 
510 re2		:  re '/'
511 			{
512 			/* This rule is written separately so the
513 			 * reduction will occur before the trailing
514 			 * series is parsed.
515 			 */
516 
517 			if ( trlcontxt )
518 				synerr( _("trailing context used twice") );
519 			else
520 				trlcontxt = true;
521 
522 			if ( varlength )
523 				/* We hope the trailing context is
524 				 * fixed-length.
525 				 */
526 				varlength = false;
527 			else
528 				headcnt = rulelen;
529 
530 			rulelen = 0;
531 
532 			current_state_type = STATE_TRAILING_CONTEXT;
533 			$$ = $1;
534 			}
535 		;
536 
537 series		:  series singleton
538 			{
539 			/* This is where concatenation of adjacent patterns
540 			 * gets done.
541 			 */
542 			$$ = link_machines( $1, $2 );
543 			}
544 
545 		|  singleton
546 			{ $$ = $1; }
547 
548 		|  series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
549 			{
550 			varlength = true;
551 
552 			if ( $3 > $5 || $3 < 0 )
553 				{
554 				synerr( _("bad iteration values") );
555 				$$ = $1;
556 				}
557 			else
558 				{
559 				if ( $3 == 0 )
560 					{
561 					if ( $5 <= 0 )
562 						{
563 						synerr(
564 						_("bad iteration values") );
565 						$$ = $1;
566 						}
567 					else
568 						$$ = mkopt(
569 							mkrep( $1, 1, $5 ) );
570 					}
571 				else
572 					$$ = mkrep( $1, $3, $5 );
573 				}
574 			}
575 
576 		|  series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
577 			{
578 			varlength = true;
579 
580 			if ( $3 <= 0 )
581 				{
582 				synerr( _("iteration value must be positive") );
583 				$$ = $1;
584 				}
585 
586 			else
587 				$$ = mkrep( $1, $3, INFINITE_REPEAT );
588 			}
589 
590 		|  series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
591 			{
592 			/* The series could be something like "(foo)",
593 			 * in which case we have no idea what its length
594 			 * is, so we punt here.
595 			 */
596 			varlength = true;
597 
598 			if ( $3 <= 0 )
599 				{
600 				  synerr( _("iteration value must be positive")
601 					  );
602 				$$ = $1;
603 				}
604 
605 			else
606 				$$ = link_machines( $1,
607 						copysingl( $1, $3 - 1 ) );
608 			}
609 
610 		;
611 
612 singleton	:  singleton '*'
613 			{
614 			varlength = true;
615 
616 			$$ = mkclos( $1 );
617 			}
618 
619 		|  singleton '+'
620 			{
621 			varlength = true;
622 			$$ = mkposcl( $1 );
623 			}
624 
625 		|  singleton '?'
626 			{
627 			varlength = true;
628 			$$ = mkopt( $1 );
629 			}
630 
631 		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
632 			{
633 			varlength = true;
634 
635 			if ( $3 > $5 || $3 < 0 )
636 				{
637 				synerr( _("bad iteration values") );
638 				$$ = $1;
639 				}
640 			else
641 				{
642 				if ( $3 == 0 )
643 					{
644 					if ( $5 <= 0 )
645 						{
646 						synerr(
647 						_("bad iteration values") );
648 						$$ = $1;
649 						}
650 					else
651 						$$ = mkopt(
652 							mkrep( $1, 1, $5 ) );
653 					}
654 				else
655 					$$ = mkrep( $1, $3, $5 );
656 				}
657 			}
658 
659 		|  singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
660 			{
661 			varlength = true;
662 
663 			if ( $3 <= 0 )
664 				{
665 				synerr( _("iteration value must be positive") );
666 				$$ = $1;
667 				}
668 
669 			else
670 				$$ = mkrep( $1, $3, INFINITE_REPEAT );
671 			}
672 
673 		|  singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
674 			{
675 			/* The singleton could be something like "(foo)",
676 			 * in which case we have no idea what its length
677 			 * is, so we punt here.
678 			 */
679 			varlength = true;
680 
681 			if ( $3 <= 0 )
682 				{
683 				synerr( _("iteration value must be positive") );
684 				$$ = $1;
685 				}
686 
687 			else
688 				$$ = link_machines( $1,
689 						copysingl( $1, $3 - 1 ) );
690 			}
691 
692 		|  '.'
693 			{
694 			if ( ! madeany )
695 				{
696 				/* Create the '.' character class. */
697                     ccldot = cclinit();
698                     ccladd( ccldot, '\n' );
699                     cclnegate( ccldot );
700 
701                     if ( useecs )
702                         mkeccl( ccltbl + cclmap[ccldot],
703                             ccllen[ccldot], nextecm,
704                             ecgroup, csize, csize );
705 
706 				/* Create the (?s:'.') character class. */
707                     cclany = cclinit();
708                     cclnegate( cclany );
709 
710                     if ( useecs )
711                         mkeccl( ccltbl + cclmap[cclany],
712                             ccllen[cclany], nextecm,
713                             ecgroup, csize, csize );
714 
715 				madeany = true;
716 				}
717 
718 			++rulelen;
719 
720             if (sf_dot_all())
721                 $$ = mkstate( -cclany );
722             else
723                 $$ = mkstate( -ccldot );
724 			}
725 
726 		|  fullccl
727 			{
728 				/* Sort characters for fast searching.
729 				 */
730 				qsort( ccltbl + cclmap[$1], (size_t) ccllen[$1], sizeof (*ccltbl), cclcmp );
731 
732 			if ( useecs )
733 				mkeccl( ccltbl + cclmap[$1], ccllen[$1],
734 					nextecm, ecgroup, csize, csize );
735 
736 			++rulelen;
737 
738 			if (ccl_has_nl[$1])
739 				rule_has_nl[num_rules] = true;
740 
741 			$$ = mkstate( -$1 );
742 			}
743 
744 		|  PREVCCL
745 			{
746 			++rulelen;
747 
748 			if (ccl_has_nl[$1])
749 				rule_has_nl[num_rules] = true;
750 
751 			$$ = mkstate( -$1 );
752 			}
753 
754 		|  '"' string '"'
755 			{ $$ = $2; }
756 
757 		|  '(' re ')'
758 			{ $$ = $2; }
759 
760 		|  CHAR
761 			{
762 			++rulelen;
763 
764 			if ($1 == nlch)
765 				rule_has_nl[num_rules] = true;
766 
767             if (sf_case_ins() && has_case($1))
768                 /* create an alternation, as in (a|A) */
769                 $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
770             else
771                 $$ = mkstate( $1 );
772 			}
773 		;
774 fullccl:
775         fullccl CCL_OP_DIFF  braceccl  { $$ = ccl_set_diff  ($1, $3); }
776     |   fullccl CCL_OP_UNION braceccl  { $$ = ccl_set_union ($1, $3); }
777     |   braceccl
778     ;
779 
780 braceccl:
781 
782             '[' ccl ']' { $$ = $2; }
783 
784 		|  '[' '^' ccl ']'
785 			{
786 			cclnegate( $3 );
787 			$$ = $3;
788 			}
789 		;
790 
791 ccl		:  ccl CHAR '-' CHAR
792 			{
793 
794 			if (sf_case_ins())
795 			  {
796 
797 			    /* If one end of the range has case and the other
798 			     * does not, or the cases are different, then we're not
799 			     * sure what range the user is trying to express.
800 			     * Examples: [@-z] or [S-t]
801 			     */
802 			    if (has_case ($2) != has_case ($4)
803 				     || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
804 				     || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
805 			      format_warn3 (
806 			      _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
807 					    $2, $4);
808 
809 			    /* If the range spans uppercase characters but not
810 			     * lowercase (or vice-versa), then should we automatically
811 			     * include lowercase characters in the range?
812 			     * Example: [@-_] spans [a-z] but not [A-Z]
813 			     */
814 			    else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
815 			      format_warn3 (
816 			      _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
817 					    $2, $4);
818 			  }
819 
820 			if ( $2 > $4 )
821 				synerr( _("negative range in character class") );
822 
823 			else
824 				{
825 				for ( i = $2; i <= $4; ++i )
826 					ccladd( $1, i );
827 
828 				/* Keep track if this ccl is staying in
829 				 * alphabetical order.
830 				 */
831 				cclsorted = cclsorted && ($2 > lastchar);
832 				lastchar = $4;
833 
834                 /* Do it again for upper/lowercase */
835                 if (sf_case_ins() && has_case($2) && has_case($4)){
836                     $2 = reverse_case ($2);
837                     $4 = reverse_case ($4);
838 
839                     for ( i = $2; i <= $4; ++i )
840                         ccladd( $1, i );
841 
842                     cclsorted = cclsorted && ($2 > lastchar);
843                     lastchar = $4;
844                 }
845 
846 				}
847 
848 			$$ = $1;
849 			}
850 
851 		|  ccl CHAR
852 			{
853 			ccladd( $1, $2 );
854 			cclsorted = cclsorted && ($2 > lastchar);
855 			lastchar = $2;
856 
857             /* Do it again for upper/lowercase */
858             if (sf_case_ins() && has_case($2)){
859                 $2 = reverse_case ($2);
860                 ccladd ($1, $2);
861 
862                 cclsorted = cclsorted && ($2 > lastchar);
863                 lastchar = $2;
864             }
865 
866 			$$ = $1;
867 			}
868 
869 		|  ccl ccl_expr
870 			{
871 			/* Too hard to properly maintain cclsorted. */
872 			cclsorted = false;
873 			$$ = $1;
874 			}
875 
876 		|
877 			{
878 			cclsorted = true;
879 			lastchar = 0;
880 			currccl = $$ = cclinit();
881 			}
882 		;
883 
884 ccl_expr:
885            CCE_ALNUM	{ CCL_EXPR(isalnum); }
886 		|  CCE_ALPHA	{ CCL_EXPR(isalpha); }
887 		|  CCE_BLANK	{ CCL_EXPR(IS_BLANK); }
888 		|  CCE_CNTRL	{ CCL_EXPR(iscntrl); }
889 		|  CCE_DIGIT	{ CCL_EXPR(isdigit); }
890 		|  CCE_GRAPH	{ CCL_EXPR(isgraph); }
891 		|  CCE_LOWER	{
892                           CCL_EXPR(islower);
893                           if (sf_case_ins())
894                               CCL_EXPR(isupper);
895                         }
896 		|  CCE_PRINT	{ CCL_EXPR(isprint); }
897 		|  CCE_PUNCT	{ CCL_EXPR(ispunct); }
898 		|  CCE_SPACE	{ CCL_EXPR(isspace); }
899 		|  CCE_XDIGIT	{ CCL_EXPR(isxdigit); }
900 		|  CCE_UPPER	{
901                     CCL_EXPR(isupper);
902                     if (sf_case_ins())
903                         CCL_EXPR(islower);
904 				}
905 
906         |  CCE_NEG_ALNUM	{ CCL_NEG_EXPR(isalnum); }
907 		|  CCE_NEG_ALPHA	{ CCL_NEG_EXPR(isalpha); }
908 		|  CCE_NEG_BLANK	{ CCL_NEG_EXPR(IS_BLANK); }
909 		|  CCE_NEG_CNTRL	{ CCL_NEG_EXPR(iscntrl); }
910 		|  CCE_NEG_DIGIT	{ CCL_NEG_EXPR(isdigit); }
911 		|  CCE_NEG_GRAPH	{ CCL_NEG_EXPR(isgraph); }
912 		|  CCE_NEG_PRINT	{ CCL_NEG_EXPR(isprint); }
913 		|  CCE_NEG_PUNCT	{ CCL_NEG_EXPR(ispunct); }
914 		|  CCE_NEG_SPACE	{ CCL_NEG_EXPR(isspace); }
915 		|  CCE_NEG_XDIGIT	{ CCL_NEG_EXPR(isxdigit); }
916 		|  CCE_NEG_LOWER	{
917 				if ( sf_case_ins() )
918 					lwarn(_("[:^lower:] is ambiguous in case insensitive scanner"));
919 				else
920 					CCL_NEG_EXPR(islower);
921 				}
922 		|  CCE_NEG_UPPER	{
923 				if ( sf_case_ins() )
924 					lwarn(_("[:^upper:] ambiguous in case insensitive scanner"));
925 				else
926 					CCL_NEG_EXPR(isupper);
927 				}
928 		;
929 
930 string		:  string CHAR
931 			{
932 			if ( $2 == nlch )
933 				rule_has_nl[num_rules] = true;
934 
935 			++rulelen;
936 
937             if (sf_case_ins() && has_case($2))
938                 $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
939             else
940                 $$ = mkstate ($2);
941 
942 			$$ = link_machines( $1, $$);
943 			}
944 
945 		|
946 			{ $$ = mkstate( SYM_EPSILON ); }
947 		;
948 
949 %%
950 
951 
952 /* build_eof_action - build the "<<EOF>>" action for the active start
953  *                    conditions
954  */
955 
956 void build_eof_action(void)
957 	{
958 	int i;
959 	char action_text[MAXLINE];
960 
961 	for ( i = 1; i <= scon_stk_ptr; ++i )
962 		{
963 		if ( sceof[scon_stk[i]] )
964 			format_pinpoint_message(
965 				"multiple <<EOF>> rules for start condition %s",
966 				scname[scon_stk[i]] );
967 
968 		else
969 			{
970 			sceof[scon_stk[i]] = true;
971 
972 			if (previous_continued_action /* && previous action was regular */)
973 				add_action("YY_RULE_SETUP\n");
974 
975 			snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
976 				scname[scon_stk[i]] );
977 			add_action( action_text );
978 			}
979 		}
980 
981 	line_directive_out(NULL, 1);
982         add_action("[[");
983 
984 	/* This isn't a normal rule after all - don't count it as
985 	 * such, so we don't have any holes in the rule numbering
986 	 * (which make generating "rule can never match" warnings
987 	 * more difficult.
988 	 */
989 	--num_rules;
990 	++num_eof_rules;
991 	}
992 
993 
994 /* format_synerr - write out formatted syntax error */
995 
996 void format_synerr( const char *msg, const char arg[] )
997 	{
998 	char errmsg[MAXLINE];
999 
1000 	(void) snprintf( errmsg, sizeof(errmsg), msg, arg );
1001 	synerr( errmsg );
1002 	}
1003 
1004 
1005 /* synerr - report a syntax error */
1006 
1007 void synerr( const char *str )
1008 	{
1009 	syntaxerror = true;
1010 	pinpoint_message( str );
1011 	}
1012 
1013 
1014 /* format_warn - write out formatted warning */
1015 
1016 void format_warn( const char *msg, const char arg[] )
1017 	{
1018 	char warn_msg[MAXLINE];
1019 
1020 	snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1021 	lwarn( warn_msg );
1022 	}
1023 
1024 
1025 /* lwarn - report a warning, unless -w was given */
1026 
1027 void lwarn( const char *str )
1028 	{
1029 	line_warning( str, linenum );
1030 	}
1031 
1032 /* format_pinpoint_message - write out a message formatted with one string,
1033  *			     pinpointing its location
1034  */
1035 
1036 void format_pinpoint_message( const char *msg, const char arg[] )
1037 	{
1038 	char errmsg[MAXLINE];
1039 
1040 	snprintf( errmsg, sizeof(errmsg), msg, arg );
1041 	pinpoint_message( errmsg );
1042 	}
1043 
1044 
1045 /* pinpoint_message - write out a message, pinpointing its location */
1046 
1047 void pinpoint_message( const char *str )
1048 	{
1049 	line_pinpoint( str, linenum );
1050 	}
1051 
1052 
1053 /* line_warning - report a warning at a given line, unless -w was given */
1054 
1055 void line_warning( const char *str, int line )
1056 	{
1057 	char warning[MAXLINE];
1058 
1059 	if ( ! nowarn )
1060 		{
1061 		snprintf( warning, sizeof(warning), "warning, %s", str );
1062 		line_pinpoint( warning, line );
1063 		}
1064 	}
1065 
1066 
1067 /* line_pinpoint - write out a message, pinpointing it at the given line */
1068 
1069 void line_pinpoint( const char *str, int line )
1070 	{
1071 	fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1072 	}
1073 
1074 
1075 /* yyerror - eat up an error message from the parser;
1076  *	     currently, messages are ignore
1077  */
1078 
1079 void yyerror( const char *msg )
1080 	{
1081 		(void)msg;
1082 	}
1083