1 /* parse.y - parser for flex input */
2
3 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP
4 %token TOK_OPTION TOK_OUTFILE TOK_PREFIX TOK_YYCLASS TOK_HEADER_FILE TOK_EXTRA_TYPE
5 %token TOK_TABLES_FILE
6
7 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH
8 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT
9
10 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH
11 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT
12
13 %left CCL_OP_DIFF CCL_OP_UNION
14
15 /*
16 *POSIX and AT&T lex place the
17 * precedence of the repeat operator, {}, below that of concatenation.
18 * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended
19 * Regular Expression (ERE) precedence that has the repeat operator
20 * higher than concatenation. This causes ab{3} to yield abbb.
21 *
22 * In order to support the POSIX and AT&T precedence and the flex
23 * precedence we define two token sets for the begin and end tokens of
24 * the repeat operator, '{' and '}'. The lexical scanner chooses
25 * which tokens to return based on whether posix_compat or lex_compat
26 * are specified. Specifying either posix_compat or lex_compat will
27 * cause flex to parse scanner files as per the AT&T and
28 * POSIX-mandated behavior.
29 */
30
31 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX
32
33
34 %{
35 /* Copyright (c) 1990 The Regents of the University of California. */
36 /* All rights reserved. */
37
38 /* This code is derived from software contributed to Berkeley by */
39 /* Vern Paxson. */
40
41 /* The United States Government has rights in this work pursuant */
42 /* to contract no. DE-AC03-76SF00098 between the United States */
43 /* Department of Energy and the University of California. */
44
45 /* This file is part of flex. */
46
47 /* Redistribution and use in source and binary forms, with or without */
48 /* modification, are permitted provided that the following conditions */
49 /* are met: */
50
51 /* 1. Redistributions of source code must retain the above copyright */
52 /* notice, this list of conditions and the following disclaimer. */
53 /* 2. Redistributions in binary form must reproduce the above copyright */
54 /* notice, this list of conditions and the following disclaimer in the */
55 /* documentation and/or other materials provided with the distribution. */
56
57 /* Neither the name of the University nor the names of its contributors */
58 /* may be used to endorse or promote products derived from this software */
59 /* without specific prior written permission. */
60
61 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
62 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
63 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
64 /* PURPOSE. */
65
66 #include "flexdef.h"
67 #include "tables.h"
68
69 int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen;
70 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule;
71
72 int *scon_stk;
73 int scon_stk_ptr;
74
75 static int madeany = false; /* whether we've made the '.' character class */
76 static int ccldot, cclany;
77 int previous_continued_action; /* whether the previous rule's action was '|' */
78
79 #define format_warn3(fmt, a1, a2) \
80 do{ \
81 char fw3_msg[MAXLINE];\
82 snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\
83 lwarn( fw3_msg );\
84 }while(0)
85
86 /* Expand a POSIX character class expression. */
87 #define CCL_EXPR(func) \
88 do{ \
89 int c; \
90 for ( c = 0; c < csize; ++c ) \
91 if ( isascii(c) && func(c) ) \
92 ccladd( currccl, c ); \
93 }while(0)
94
95 /* negated class */
96 #define CCL_NEG_EXPR(func) \
97 do{ \
98 int c; \
99 for ( c = 0; c < csize; ++c ) \
100 if ( !func(c) ) \
101 ccladd( currccl, c ); \
102 }while(0)
103
104 /* While POSIX defines isblank(), it's not ANSI C. */
105 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t')
106
107 /* On some over-ambitious machines, such as DEC Alpha's, the default
108 * token type is "long" instead of "int"; this leads to problems with
109 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen
110 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the
111 * following should ensure that the default token type is "int".
112 */
113 #define YYSTYPE int
114
115 %}
116
117 %%
118 goal : initlex sect1 sect1end sect2 initforrule
119 { /* add default rule */
120 int def_rule;
121
122 pat = cclinit();
123 cclnegate( pat );
124
125 def_rule = mkstate( -pat );
126
127 /* Remember the number of the default rule so we
128 * don't generate "can't match" warnings for it.
129 */
130 default_rule = num_rules;
131
132 finish_rule( def_rule, false, 0, 0, 0);
133
134 for ( i = 1; i <= lastsc; ++i )
135 scset[i] = mkbranch( scset[i], def_rule );
136
137 if ( spprdflt )
138 add_action(
139 "YY_FATAL_ERROR( \"flex scanner jammed\" )" );
140 else
141 add_action( "ECHO" );
142
143 add_action( ";\n\tYY_BREAK]]\n" );
144 }
145 ;
146
147 initlex :
148 { /* initialize for processing rules */
149
150 /* Create default DFA start condition. */
151 scinstal( "INITIAL", false );
152 }
153 ;
154
155 sect1 : sect1 startconddecl namelist1
156 | sect1 options
157 |
158 | error
159 { synerr( _("unknown error processing section 1") ); }
160 ;
161
162 sect1end : SECTEND
163 {
164 check_options();
165 scon_stk = allocate_integer_array( lastsc + 1 );
166 scon_stk_ptr = 0;
167 }
168 ;
169
170 startconddecl : SCDECL
171 { xcluflg = false; }
172
173 | XSCDECL
174 { xcluflg = true; }
175 ;
176
177 namelist1 : namelist1 NAME
178 { scinstal( nmstr, xcluflg ); }
179
180 | NAME
181 { scinstal( nmstr, xcluflg ); }
182
183 | error
184 { synerr( _("bad start condition list") ); }
185 ;
186
187 options : TOK_OPTION optionlist
188 ;
189
190 optionlist : optionlist option
191 |
192 ;
193
194 option : TOK_OUTFILE '=' NAME
195 {
196 outfilename = xstrdup(nmstr);
197 did_outfilename = 1;
198 }
199 | TOK_EXTRA_TYPE '=' NAME
200 { extra_type = xstrdup(nmstr); }
201 | TOK_PREFIX '=' NAME
202 { prefix = xstrdup(nmstr);
203 if (strchr(prefix, '[') || strchr(prefix, ']'))
204 flexerror(_("Prefix must not contain [ or ]")); }
205 | TOK_YYCLASS '=' NAME
206 { yyclass = xstrdup(nmstr); }
207 | TOK_HEADER_FILE '=' NAME
208 { headerfilename = xstrdup(nmstr); }
209 | TOK_TABLES_FILE '=' NAME
210 { tablesext = true; tablesfilename = xstrdup(nmstr); }
211 ;
212
213 sect2 : sect2 scon initforrule flexrule '\n'
214 { scon_stk_ptr = $2; }
215 | sect2 scon '{' sect2 '}'
216 { scon_stk_ptr = $2; }
217 |
218 ;
219
220 initforrule :
221 {
222 /* Initialize for a parse of one rule. */
223 trlcontxt = variable_trail_rule = varlength = false;
224 trailcnt = headcnt = rulelen = 0;
225 current_state_type = STATE_NORMAL;
226 previous_continued_action = continued_action;
227 in_rule = true;
228
229 new_rule();
230 }
231 ;
232
233 flexrule : '^' rule
234 {
235 pat = $2;
236 finish_rule( pat, variable_trail_rule,
237 headcnt, trailcnt , previous_continued_action);
238
239 if ( scon_stk_ptr > 0 )
240 {
241 for ( i = 1; i <= scon_stk_ptr; ++i )
242 scbol[scon_stk[i]] =
243 mkbranch( scbol[scon_stk[i]],
244 pat );
245 }
246
247 else
248 {
249 /* Add to all non-exclusive start conditions,
250 * including the default (0) start condition.
251 */
252
253 for ( i = 1; i <= lastsc; ++i )
254 if ( ! scxclu[i] )
255 scbol[i] = mkbranch( scbol[i],
256 pat );
257 }
258
259 if ( ! bol_needed )
260 {
261 bol_needed = true;
262
263 if ( performance_report > 1 )
264 pinpoint_message(
265 "'^' operator results in sub-optimal performance" );
266 }
267 }
268
269 | rule
270 {
271 pat = $1;
272 finish_rule( pat, variable_trail_rule,
273 headcnt, trailcnt , previous_continued_action);
274
275 if ( scon_stk_ptr > 0 )
276 {
277 for ( i = 1; i <= scon_stk_ptr; ++i )
278 scset[scon_stk[i]] =
279 mkbranch( scset[scon_stk[i]],
280 pat );
281 }
282
283 else
284 {
285 for ( i = 1; i <= lastsc; ++i )
286 if ( ! scxclu[i] )
287 scset[i] =
288 mkbranch( scset[i],
289 pat );
290 }
291 }
292
293 | EOF_OP
294 {
295 if ( scon_stk_ptr > 0 )
296 build_eof_action();
297
298 else
299 {
300 /* This EOF applies to all start conditions
301 * which don't already have EOF actions.
302 */
303 for ( i = 1; i <= lastsc; ++i )
304 if ( ! sceof[i] )
305 scon_stk[++scon_stk_ptr] = i;
306
307 if ( scon_stk_ptr == 0 )
308 lwarn(
309 "all start conditions already have <<EOF>> rules" );
310
311 else
312 build_eof_action();
313 }
314 }
315
316 | error
317 { synerr( _("unrecognized rule") ); }
318 ;
319
320 scon_stk_ptr :
321 { $$ = scon_stk_ptr; }
322 ;
323
324 scon : '<' scon_stk_ptr namelist2 '>'
325 { $$ = $2; }
326
327 | '<' '*' '>'
328 {
329 $$ = scon_stk_ptr;
330
331 for ( i = 1; i <= lastsc; ++i )
332 {
333 int j;
334
335 for ( j = 1; j <= scon_stk_ptr; ++j )
336 if ( scon_stk[j] == i )
337 break;
338
339 if ( j > scon_stk_ptr )
340 scon_stk[++scon_stk_ptr] = i;
341 }
342 }
343
344 |
345 { $$ = scon_stk_ptr; }
346 ;
347
348 namelist2 : namelist2 ',' sconname
349
350 | sconname
351
352 | error
353 { synerr( _("bad start condition list") ); }
354 ;
355
356 sconname : NAME
357 {
358 if ( (scnum = sclookup( nmstr )) == 0 )
359 format_pinpoint_message(
360 "undeclared start condition %s",
361 nmstr );
362 else
363 {
364 for ( i = 1; i <= scon_stk_ptr; ++i )
365 if ( scon_stk[i] == scnum )
366 {
367 format_warn(
368 "<%s> specified twice",
369 scname[scnum] );
370 break;
371 }
372
373 if ( i > scon_stk_ptr )
374 scon_stk[++scon_stk_ptr] = scnum;
375 }
376 }
377 ;
378
379 rule : re2 re
380 {
381 if ( transchar[lastst[$2]] != SYM_EPSILON )
382 /* Provide final transition \now/ so it
383 * will be marked as a trailing context
384 * state.
385 */
386 $2 = link_machines( $2,
387 mkstate( SYM_EPSILON ) );
388
389 mark_beginning_as_normal( $2 );
390 current_state_type = STATE_NORMAL;
391
392 if ( previous_continued_action )
393 {
394 /* We need to treat this as variable trailing
395 * context so that the backup does not happen
396 * in the action but before the action switch
397 * statement. If the backup happens in the
398 * action, then the rules "falling into" this
399 * one's action will *also* do the backup,
400 * erroneously.
401 */
402 if ( ! varlength || headcnt != 0 )
403 lwarn(
404 "trailing context made variable due to preceding '|' action" );
405
406 /* Mark as variable. */
407 varlength = true;
408 headcnt = 0;
409
410 }
411
412 if ( lex_compat || (varlength && headcnt == 0) )
413 { /* variable trailing context rule */
414 /* Mark the first part of the rule as the
415 * accepting "head" part of a trailing
416 * context rule.
417 *
418 * By the way, we didn't do this at the
419 * beginning of this production because back
420 * then current_state_type was set up for a
421 * trail rule, and add_accept() can create
422 * a new state ...
423 */
424 add_accept( $1,
425 num_rules | YY_TRAILING_HEAD_MASK );
426 variable_trail_rule = true;
427 }
428
429 else
430 trailcnt = rulelen;
431
432 $$ = link_machines( $1, $2 );
433 }
434
435 | re2 re '$'
436 { synerr( _("trailing context used twice") ); }
437
438 | re '$'
439 {
440 headcnt = 0;
441 trailcnt = 1;
442 rulelen = 1;
443 varlength = false;
444
445 current_state_type = STATE_TRAILING_CONTEXT;
446
447 if ( trlcontxt )
448 {
449 synerr( _("trailing context used twice") );
450 $$ = mkstate( SYM_EPSILON );
451 }
452
453 else if ( previous_continued_action )
454 {
455 /* See the comment in the rule for "re2 re"
456 * above.
457 */
458 lwarn(
459 "trailing context made variable due to preceding '|' action" );
460
461 varlength = true;
462 }
463
464 if ( lex_compat || varlength )
465 {
466 /* Again, see the comment in the rule for
467 * "re2 re" above.
468 */
469 add_accept( $1,
470 num_rules | YY_TRAILING_HEAD_MASK );
471 variable_trail_rule = true;
472 }
473
474 trlcontxt = true;
475
476 eps = mkstate( SYM_EPSILON );
477 $$ = link_machines( $1,
478 link_machines( eps, mkstate( '\n' ) ) );
479 }
480
481 | re
482 {
483 $$ = $1;
484
485 if ( trlcontxt )
486 {
487 if ( lex_compat || (varlength && headcnt == 0) )
488 /* Both head and trail are
489 * variable-length.
490 */
491 variable_trail_rule = true;
492 else
493 trailcnt = rulelen;
494 }
495 }
496 ;
497
498
499 re : re '|' series
500 {
501 varlength = true;
502 $$ = mkor( $1, $3 );
503 }
504
505 | series
506 { $$ = $1; }
507 ;
508
509
510 re2 : re '/'
511 {
512 /* This rule is written separately so the
513 * reduction will occur before the trailing
514 * series is parsed.
515 */
516
517 if ( trlcontxt )
518 synerr( _("trailing context used twice") );
519 else
520 trlcontxt = true;
521
522 if ( varlength )
523 /* We hope the trailing context is
524 * fixed-length.
525 */
526 varlength = false;
527 else
528 headcnt = rulelen;
529
530 rulelen = 0;
531
532 current_state_type = STATE_TRAILING_CONTEXT;
533 $$ = $1;
534 }
535 ;
536
537 series : series singleton
538 {
539 /* This is where concatenation of adjacent patterns
540 * gets done.
541 */
542 $$ = link_machines( $1, $2 );
543 }
544
545 | singleton
546 { $$ = $1; }
547
548 | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX
549 {
550 varlength = true;
551
552 if ( $3 > $5 || $3 < 0 )
553 {
554 synerr( _("bad iteration values") );
555 $$ = $1;
556 }
557 else
558 {
559 if ( $3 == 0 )
560 {
561 if ( $5 <= 0 )
562 {
563 synerr(
564 _("bad iteration values") );
565 $$ = $1;
566 }
567 else
568 $$ = mkopt(
569 mkrep( $1, 1, $5 ) );
570 }
571 else
572 $$ = mkrep( $1, $3, $5 );
573 }
574 }
575
576 | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX
577 {
578 varlength = true;
579
580 if ( $3 <= 0 )
581 {
582 synerr( _("iteration value must be positive") );
583 $$ = $1;
584 }
585
586 else
587 $$ = mkrep( $1, $3, INFINITE_REPEAT );
588 }
589
590 | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX
591 {
592 /* The series could be something like "(foo)",
593 * in which case we have no idea what its length
594 * is, so we punt here.
595 */
596 varlength = true;
597
598 if ( $3 <= 0 )
599 {
600 synerr( _("iteration value must be positive")
601 );
602 $$ = $1;
603 }
604
605 else
606 $$ = link_machines( $1,
607 copysingl( $1, $3 - 1 ) );
608 }
609
610 ;
611
612 singleton : singleton '*'
613 {
614 varlength = true;
615
616 $$ = mkclos( $1 );
617 }
618
619 | singleton '+'
620 {
621 varlength = true;
622 $$ = mkposcl( $1 );
623 }
624
625 | singleton '?'
626 {
627 varlength = true;
628 $$ = mkopt( $1 );
629 }
630
631 | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX
632 {
633 varlength = true;
634
635 if ( $3 > $5 || $3 < 0 )
636 {
637 synerr( _("bad iteration values") );
638 $$ = $1;
639 }
640 else
641 {
642 if ( $3 == 0 )
643 {
644 if ( $5 <= 0 )
645 {
646 synerr(
647 _("bad iteration values") );
648 $$ = $1;
649 }
650 else
651 $$ = mkopt(
652 mkrep( $1, 1, $5 ) );
653 }
654 else
655 $$ = mkrep( $1, $3, $5 );
656 }
657 }
658
659 | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX
660 {
661 varlength = true;
662
663 if ( $3 <= 0 )
664 {
665 synerr( _("iteration value must be positive") );
666 $$ = $1;
667 }
668
669 else
670 $$ = mkrep( $1, $3, INFINITE_REPEAT );
671 }
672
673 | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX
674 {
675 /* The singleton could be something like "(foo)",
676 * in which case we have no idea what its length
677 * is, so we punt here.
678 */
679 varlength = true;
680
681 if ( $3 <= 0 )
682 {
683 synerr( _("iteration value must be positive") );
684 $$ = $1;
685 }
686
687 else
688 $$ = link_machines( $1,
689 copysingl( $1, $3 - 1 ) );
690 }
691
692 | '.'
693 {
694 if ( ! madeany )
695 {
696 /* Create the '.' character class. */
697 ccldot = cclinit();
698 ccladd( ccldot, '\n' );
699 cclnegate( ccldot );
700
701 if ( useecs )
702 mkeccl( ccltbl + cclmap[ccldot],
703 ccllen[ccldot], nextecm,
704 ecgroup, csize, csize );
705
706 /* Create the (?s:'.') character class. */
707 cclany = cclinit();
708 cclnegate( cclany );
709
710 if ( useecs )
711 mkeccl( ccltbl + cclmap[cclany],
712 ccllen[cclany], nextecm,
713 ecgroup, csize, csize );
714
715 madeany = true;
716 }
717
718 ++rulelen;
719
720 if (sf_dot_all())
721 $$ = mkstate( -cclany );
722 else
723 $$ = mkstate( -ccldot );
724 }
725
726 | fullccl
727 {
728 /* Sort characters for fast searching.
729 */
730 qsort( ccltbl + cclmap[$1], (size_t) ccllen[$1], sizeof (*ccltbl), cclcmp );
731
732 if ( useecs )
733 mkeccl( ccltbl + cclmap[$1], ccllen[$1],
734 nextecm, ecgroup, csize, csize );
735
736 ++rulelen;
737
738 if (ccl_has_nl[$1])
739 rule_has_nl[num_rules] = true;
740
741 $$ = mkstate( -$1 );
742 }
743
744 | PREVCCL
745 {
746 ++rulelen;
747
748 if (ccl_has_nl[$1])
749 rule_has_nl[num_rules] = true;
750
751 $$ = mkstate( -$1 );
752 }
753
754 | '"' string '"'
755 { $$ = $2; }
756
757 | '(' re ')'
758 { $$ = $2; }
759
760 | CHAR
761 {
762 ++rulelen;
763
764 if ($1 == nlch)
765 rule_has_nl[num_rules] = true;
766
767 if (sf_case_ins() && has_case($1))
768 /* create an alternation, as in (a|A) */
769 $$ = mkor (mkstate($1), mkstate(reverse_case($1)));
770 else
771 $$ = mkstate( $1 );
772 }
773 ;
774 fullccl:
775 fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); }
776 | fullccl CCL_OP_UNION braceccl { $$ = ccl_set_union ($1, $3); }
777 | braceccl
778 ;
779
780 braceccl:
781
782 '[' ccl ']' { $$ = $2; }
783
784 | '[' '^' ccl ']'
785 {
786 cclnegate( $3 );
787 $$ = $3;
788 }
789 ;
790
791 ccl : ccl CHAR '-' CHAR
792 {
793
794 if (sf_case_ins())
795 {
796
797 /* If one end of the range has case and the other
798 * does not, or the cases are different, then we're not
799 * sure what range the user is trying to express.
800 * Examples: [@-z] or [S-t]
801 */
802 if (has_case ($2) != has_case ($4)
803 || (has_case ($2) && (b_islower ($2) != b_islower ($4)))
804 || (has_case ($2) && (b_isupper ($2) != b_isupper ($4))))
805 format_warn3 (
806 _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
807 $2, $4);
808
809 /* If the range spans uppercase characters but not
810 * lowercase (or vice-versa), then should we automatically
811 * include lowercase characters in the range?
812 * Example: [@-_] spans [a-z] but not [A-Z]
813 */
814 else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4))
815 format_warn3 (
816 _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"),
817 $2, $4);
818 }
819
820 if ( $2 > $4 )
821 synerr( _("negative range in character class") );
822
823 else
824 {
825 for ( i = $2; i <= $4; ++i )
826 ccladd( $1, i );
827
828 /* Keep track if this ccl is staying in
829 * alphabetical order.
830 */
831 cclsorted = cclsorted && ($2 > lastchar);
832 lastchar = $4;
833
834 /* Do it again for upper/lowercase */
835 if (sf_case_ins() && has_case($2) && has_case($4)){
836 $2 = reverse_case ($2);
837 $4 = reverse_case ($4);
838
839 for ( i = $2; i <= $4; ++i )
840 ccladd( $1, i );
841
842 cclsorted = cclsorted && ($2 > lastchar);
843 lastchar = $4;
844 }
845
846 }
847
848 $$ = $1;
849 }
850
851 | ccl CHAR
852 {
853 ccladd( $1, $2 );
854 cclsorted = cclsorted && ($2 > lastchar);
855 lastchar = $2;
856
857 /* Do it again for upper/lowercase */
858 if (sf_case_ins() && has_case($2)){
859 $2 = reverse_case ($2);
860 ccladd ($1, $2);
861
862 cclsorted = cclsorted && ($2 > lastchar);
863 lastchar = $2;
864 }
865
866 $$ = $1;
867 }
868
869 | ccl ccl_expr
870 {
871 /* Too hard to properly maintain cclsorted. */
872 cclsorted = false;
873 $$ = $1;
874 }
875
876 |
877 {
878 cclsorted = true;
879 lastchar = 0;
880 currccl = $$ = cclinit();
881 }
882 ;
883
884 ccl_expr:
885 CCE_ALNUM { CCL_EXPR(isalnum); }
886 | CCE_ALPHA { CCL_EXPR(isalpha); }
887 | CCE_BLANK { CCL_EXPR(IS_BLANK); }
888 | CCE_CNTRL { CCL_EXPR(iscntrl); }
889 | CCE_DIGIT { CCL_EXPR(isdigit); }
890 | CCE_GRAPH { CCL_EXPR(isgraph); }
891 | CCE_LOWER {
892 CCL_EXPR(islower);
893 if (sf_case_ins())
894 CCL_EXPR(isupper);
895 }
896 | CCE_PRINT { CCL_EXPR(isprint); }
897 | CCE_PUNCT { CCL_EXPR(ispunct); }
898 | CCE_SPACE { CCL_EXPR(isspace); }
899 | CCE_XDIGIT { CCL_EXPR(isxdigit); }
900 | CCE_UPPER {
901 CCL_EXPR(isupper);
902 if (sf_case_ins())
903 CCL_EXPR(islower);
904 }
905
906 | CCE_NEG_ALNUM { CCL_NEG_EXPR(isalnum); }
907 | CCE_NEG_ALPHA { CCL_NEG_EXPR(isalpha); }
908 | CCE_NEG_BLANK { CCL_NEG_EXPR(IS_BLANK); }
909 | CCE_NEG_CNTRL { CCL_NEG_EXPR(iscntrl); }
910 | CCE_NEG_DIGIT { CCL_NEG_EXPR(isdigit); }
911 | CCE_NEG_GRAPH { CCL_NEG_EXPR(isgraph); }
912 | CCE_NEG_PRINT { CCL_NEG_EXPR(isprint); }
913 | CCE_NEG_PUNCT { CCL_NEG_EXPR(ispunct); }
914 | CCE_NEG_SPACE { CCL_NEG_EXPR(isspace); }
915 | CCE_NEG_XDIGIT { CCL_NEG_EXPR(isxdigit); }
916 | CCE_NEG_LOWER {
917 if ( sf_case_ins() )
918 lwarn(_("[:^lower:] is ambiguous in case insensitive scanner"));
919 else
920 CCL_NEG_EXPR(islower);
921 }
922 | CCE_NEG_UPPER {
923 if ( sf_case_ins() )
924 lwarn(_("[:^upper:] ambiguous in case insensitive scanner"));
925 else
926 CCL_NEG_EXPR(isupper);
927 }
928 ;
929
930 string : string CHAR
931 {
932 if ( $2 == nlch )
933 rule_has_nl[num_rules] = true;
934
935 ++rulelen;
936
937 if (sf_case_ins() && has_case($2))
938 $$ = mkor (mkstate($2), mkstate(reverse_case($2)));
939 else
940 $$ = mkstate ($2);
941
942 $$ = link_machines( $1, $$);
943 }
944
945 |
946 { $$ = mkstate( SYM_EPSILON ); }
947 ;
948
949 %%
950
951
952 /* build_eof_action - build the "<<EOF>>" action for the active start
953 * conditions
954 */
955
956 void build_eof_action(void)
957 {
958 int i;
959 char action_text[MAXLINE];
960
961 for ( i = 1; i <= scon_stk_ptr; ++i )
962 {
963 if ( sceof[scon_stk[i]] )
964 format_pinpoint_message(
965 "multiple <<EOF>> rules for start condition %s",
966 scname[scon_stk[i]] );
967
968 else
969 {
970 sceof[scon_stk[i]] = true;
971
972 if (previous_continued_action /* && previous action was regular */)
973 add_action("YY_RULE_SETUP\n");
974
975 snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n",
976 scname[scon_stk[i]] );
977 add_action( action_text );
978 }
979 }
980
981 line_directive_out(NULL, 1);
982 add_action("[[");
983
984 /* This isn't a normal rule after all - don't count it as
985 * such, so we don't have any holes in the rule numbering
986 * (which make generating "rule can never match" warnings
987 * more difficult.
988 */
989 --num_rules;
990 ++num_eof_rules;
991 }
992
993
994 /* format_synerr - write out formatted syntax error */
995
format_synerr(const char * msg,const char arg[])996 void format_synerr( const char *msg, const char arg[] )
997 {
998 char errmsg[MAXLINE];
999
1000 (void) snprintf( errmsg, sizeof(errmsg), msg, arg );
1001 synerr( errmsg );
1002 }
1003
1004
1005 /* synerr - report a syntax error */
1006
synerr(const char * str)1007 void synerr( const char *str )
1008 {
1009 syntaxerror = true;
1010 pinpoint_message( str );
1011 }
1012
1013
1014 /* format_warn - write out formatted warning */
1015
format_warn(const char * msg,const char arg[])1016 void format_warn( const char *msg, const char arg[] )
1017 {
1018 char warn_msg[MAXLINE];
1019
1020 snprintf( warn_msg, sizeof(warn_msg), msg, arg );
1021 lwarn( warn_msg );
1022 }
1023
1024
1025 /* lwarn - report a warning, unless -w was given */
1026
lwarn(const char * str)1027 void lwarn( const char *str )
1028 {
1029 line_warning( str, linenum );
1030 }
1031
1032 /* format_pinpoint_message - write out a message formatted with one string,
1033 * pinpointing its location
1034 */
1035
format_pinpoint_message(const char * msg,const char arg[])1036 void format_pinpoint_message( const char *msg, const char arg[] )
1037 {
1038 char errmsg[MAXLINE];
1039
1040 snprintf( errmsg, sizeof(errmsg), msg, arg );
1041 pinpoint_message( errmsg );
1042 }
1043
1044
1045 /* pinpoint_message - write out a message, pinpointing its location */
1046
pinpoint_message(const char * str)1047 void pinpoint_message( const char *str )
1048 {
1049 line_pinpoint( str, linenum );
1050 }
1051
1052
1053 /* line_warning - report a warning at a given line, unless -w was given */
1054
line_warning(const char * str,int line)1055 void line_warning( const char *str, int line )
1056 {
1057 char warning[MAXLINE];
1058
1059 if ( ! nowarn )
1060 {
1061 snprintf( warning, sizeof(warning), "warning, %s", str );
1062 line_pinpoint( warning, line );
1063 }
1064 }
1065
1066
1067 /* line_pinpoint - write out a message, pinpointing it at the given line */
1068
line_pinpoint(const char * str,int line)1069 void line_pinpoint( const char *str, int line )
1070 {
1071 fprintf( stderr, "%s:%d: %s\n", infilename, line, str );
1072 }
1073
1074
1075 /* yyerror - eat up an error message from the parser;
1076 * currently, messages are ignore
1077 */
1078
yyerror(const char * msg)1079 void yyerror( const char *msg )
1080 {
1081 (void)msg;
1082 }
1083