1 /* parse.y - parser for flex input */ 2 3 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP 4 %token TOK_OPTION TOK_OUTFILE TOK_PREFIX TOK_YYCLASS TOK_HEADER_FILE TOK_EXTRA_TYPE 5 %token TOK_TABLES_FILE 6 7 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH 8 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT 9 10 %token CCE_NEG_ALNUM CCE_NEG_ALPHA CCE_NEG_BLANK CCE_NEG_CNTRL CCE_NEG_DIGIT CCE_NEG_GRAPH 11 %token CCE_NEG_LOWER CCE_NEG_PRINT CCE_NEG_PUNCT CCE_NEG_SPACE CCE_NEG_UPPER CCE_NEG_XDIGIT 12 13 %left CCL_OP_DIFF CCL_OP_UNION 14 15 /* 16 *POSIX and AT&T lex place the 17 * precedence of the repeat operator, {}, below that of concatenation. 18 * Thus, ab{3} is ababab. Most other POSIX utilities use an Extended 19 * Regular Expression (ERE) precedence that has the repeat operator 20 * higher than concatenation. This causes ab{3} to yield abbb. 21 * 22 * In order to support the POSIX and AT&T precedence and the flex 23 * precedence we define two token sets for the begin and end tokens of 24 * the repeat operator, '{' and '}'. The lexical scanner chooses 25 * which tokens to return based on whether posix_compat or lex_compat 26 * are specified. Specifying either posix_compat or lex_compat will 27 * cause flex to parse scanner files as per the AT&T and 28 * POSIX-mandated behavior. 29 */ 30 31 %token BEGIN_REPEAT_POSIX END_REPEAT_POSIX BEGIN_REPEAT_FLEX END_REPEAT_FLEX 32 33 34 %{ 35 /* Copyright (c) 1990 The Regents of the University of California. */ 36 /* All rights reserved. */ 37 38 /* This code is derived from software contributed to Berkeley by */ 39 /* Vern Paxson. */ 40 41 /* The United States Government has rights in this work pursuant */ 42 /* to contract no. DE-AC03-76SF00098 between the United States */ 43 /* Department of Energy and the University of California. */ 44 45 /* This file is part of flex. */ 46 47 /* Redistribution and use in source and binary forms, with or without */ 48 /* modification, are permitted provided that the following conditions */ 49 /* are met: */ 50 51 /* 1. Redistributions of source code must retain the above copyright */ 52 /* notice, this list of conditions and the following disclaimer. */ 53 /* 2. Redistributions in binary form must reproduce the above copyright */ 54 /* notice, this list of conditions and the following disclaimer in the */ 55 /* documentation and/or other materials provided with the distribution. */ 56 57 /* Neither the name of the University nor the names of its contributors */ 58 /* may be used to endorse or promote products derived from this software */ 59 /* without specific prior written permission. */ 60 61 /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */ 62 /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */ 63 /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */ 64 /* PURPOSE. */ 65 66 #include "flexdef.h" 67 #include "tables.h" 68 69 int pat, scnum, eps, headcnt, trailcnt, lastchar, i, rulelen; 70 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule; 71 72 int *scon_stk; 73 int scon_stk_ptr; 74 75 static int madeany = false; /* whether we've made the '.' character class */ 76 static int ccldot, cclany; 77 int previous_continued_action; /* whether the previous rule's action was '|' */ 78 79 #define format_warn3(fmt, a1, a2) \ 80 do{ \ 81 char fw3_msg[MAXLINE];\ 82 snprintf( fw3_msg, MAXLINE,(fmt), (a1), (a2) );\ 83 lwarn( fw3_msg );\ 84 }while(0) 85 86 /* Expand a POSIX character class expression. */ 87 #define CCL_EXPR(func) \ 88 do{ \ 89 int c; \ 90 for ( c = 0; c < csize; ++c ) \ 91 if ( isascii(c) && func(c) ) \ 92 ccladd( currccl, c ); \ 93 }while(0) 94 95 /* negated class */ 96 #define CCL_NEG_EXPR(func) \ 97 do{ \ 98 int c; \ 99 for ( c = 0; c < csize; ++c ) \ 100 if ( !func(c) ) \ 101 ccladd( currccl, c ); \ 102 }while(0) 103 104 /* While POSIX defines isblank(), it's not ANSI C. */ 105 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t') 106 107 /* On some over-ambitious machines, such as DEC Alpha's, the default 108 * token type is "long" instead of "int"; this leads to problems with 109 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen 110 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the 111 * following should ensure that the default token type is "int". 112 */ 113 #define YYSTYPE int 114 115 %} 116 117 %% 118 goal : initlex sect1 sect1end sect2 initforrule 119 { /* add default rule */ 120 int def_rule; 121 122 pat = cclinit(); 123 cclnegate( pat ); 124 125 def_rule = mkstate( -pat ); 126 127 /* Remember the number of the default rule so we 128 * don't generate "can't match" warnings for it. 129 */ 130 default_rule = num_rules; 131 132 finish_rule( def_rule, false, 0, 0, 0); 133 134 for ( i = 1; i <= lastsc; ++i ) 135 scset[i] = mkbranch( scset[i], def_rule ); 136 137 if ( spprdflt ) 138 add_action( 139 "YY_FATAL_ERROR( \"flex scanner jammed\" )" ); 140 else 141 add_action( "ECHO" ); 142 143 add_action( ";\n\tYY_BREAK]]\n" ); 144 } 145 ; 146 147 initlex : 148 { /* initialize for processing rules */ 149 150 /* Create default DFA start condition. */ 151 scinstal( "INITIAL", false ); 152 } 153 ; 154 155 sect1 : sect1 startconddecl namelist1 156 | sect1 options 157 | 158 | error 159 { synerr( _("unknown error processing section 1") ); } 160 ; 161 162 sect1end : SECTEND 163 { 164 check_options(); 165 scon_stk = allocate_integer_array( lastsc + 1 ); 166 scon_stk_ptr = 0; 167 } 168 ; 169 170 startconddecl : SCDECL 171 { xcluflg = false; } 172 173 | XSCDECL 174 { xcluflg = true; } 175 ; 176 177 namelist1 : namelist1 NAME 178 { scinstal( nmstr, xcluflg ); } 179 180 | NAME 181 { scinstal( nmstr, xcluflg ); } 182 183 | error 184 { synerr( _("bad start condition list") ); } 185 ; 186 187 options : TOK_OPTION optionlist 188 ; 189 190 optionlist : optionlist option 191 | 192 ; 193 194 option : TOK_OUTFILE '=' NAME 195 { 196 outfilename = xstrdup(nmstr); 197 did_outfilename = 1; 198 } 199 | TOK_EXTRA_TYPE '=' NAME 200 { extra_type = xstrdup(nmstr); } 201 | TOK_PREFIX '=' NAME 202 { prefix = xstrdup(nmstr); 203 if (strchr(prefix, '[') || strchr(prefix, ']')) 204 flexerror(_("Prefix must not contain [ or ]")); } 205 | TOK_YYCLASS '=' NAME 206 { yyclass = xstrdup(nmstr); } 207 | TOK_HEADER_FILE '=' NAME 208 { headerfilename = xstrdup(nmstr); } 209 | TOK_TABLES_FILE '=' NAME 210 { tablesext = true; tablesfilename = xstrdup(nmstr); } 211 ; 212 213 sect2 : sect2 scon initforrule flexrule '\n' 214 { scon_stk_ptr = $2; } 215 | sect2 scon '{' sect2 '}' 216 { scon_stk_ptr = $2; } 217 | 218 ; 219 220 initforrule : 221 { 222 /* Initialize for a parse of one rule. */ 223 trlcontxt = variable_trail_rule = varlength = false; 224 trailcnt = headcnt = rulelen = 0; 225 current_state_type = STATE_NORMAL; 226 previous_continued_action = continued_action; 227 in_rule = true; 228 229 new_rule(); 230 } 231 ; 232 233 flexrule : '^' rule 234 { 235 pat = $2; 236 finish_rule( pat, variable_trail_rule, 237 headcnt, trailcnt , previous_continued_action); 238 239 if ( scon_stk_ptr > 0 ) 240 { 241 for ( i = 1; i <= scon_stk_ptr; ++i ) 242 scbol[scon_stk[i]] = 243 mkbranch( scbol[scon_stk[i]], 244 pat ); 245 } 246 247 else 248 { 249 /* Add to all non-exclusive start conditions, 250 * including the default (0) start condition. 251 */ 252 253 for ( i = 1; i <= lastsc; ++i ) 254 if ( ! scxclu[i] ) 255 scbol[i] = mkbranch( scbol[i], 256 pat ); 257 } 258 259 if ( ! bol_needed ) 260 { 261 bol_needed = true; 262 263 if ( performance_report > 1 ) 264 pinpoint_message( 265 "'^' operator results in sub-optimal performance" ); 266 } 267 } 268 269 | rule 270 { 271 pat = $1; 272 finish_rule( pat, variable_trail_rule, 273 headcnt, trailcnt , previous_continued_action); 274 275 if ( scon_stk_ptr > 0 ) 276 { 277 for ( i = 1; i <= scon_stk_ptr; ++i ) 278 scset[scon_stk[i]] = 279 mkbranch( scset[scon_stk[i]], 280 pat ); 281 } 282 283 else 284 { 285 for ( i = 1; i <= lastsc; ++i ) 286 if ( ! scxclu[i] ) 287 scset[i] = 288 mkbranch( scset[i], 289 pat ); 290 } 291 } 292 293 | EOF_OP 294 { 295 if ( scon_stk_ptr > 0 ) 296 build_eof_action(); 297 298 else 299 { 300 /* This EOF applies to all start conditions 301 * which don't already have EOF actions. 302 */ 303 for ( i = 1; i <= lastsc; ++i ) 304 if ( ! sceof[i] ) 305 scon_stk[++scon_stk_ptr] = i; 306 307 if ( scon_stk_ptr == 0 ) 308 lwarn( 309 "all start conditions already have <<EOF>> rules" ); 310 311 else 312 build_eof_action(); 313 } 314 } 315 316 | error 317 { synerr( _("unrecognized rule") ); } 318 ; 319 320 scon_stk_ptr : 321 { $$ = scon_stk_ptr; } 322 ; 323 324 scon : '<' scon_stk_ptr namelist2 '>' 325 { $$ = $2; } 326 327 | '<' '*' '>' 328 { 329 $$ = scon_stk_ptr; 330 331 for ( i = 1; i <= lastsc; ++i ) 332 { 333 int j; 334 335 for ( j = 1; j <= scon_stk_ptr; ++j ) 336 if ( scon_stk[j] == i ) 337 break; 338 339 if ( j > scon_stk_ptr ) 340 scon_stk[++scon_stk_ptr] = i; 341 } 342 } 343 344 | 345 { $$ = scon_stk_ptr; } 346 ; 347 348 namelist2 : namelist2 ',' sconname 349 350 | sconname 351 352 | error 353 { synerr( _("bad start condition list") ); } 354 ; 355 356 sconname : NAME 357 { 358 if ( (scnum = sclookup( nmstr )) == 0 ) 359 format_pinpoint_message( 360 "undeclared start condition %s", 361 nmstr ); 362 else 363 { 364 for ( i = 1; i <= scon_stk_ptr; ++i ) 365 if ( scon_stk[i] == scnum ) 366 { 367 format_warn( 368 "<%s> specified twice", 369 scname[scnum] ); 370 break; 371 } 372 373 if ( i > scon_stk_ptr ) 374 scon_stk[++scon_stk_ptr] = scnum; 375 } 376 } 377 ; 378 379 rule : re2 re 380 { 381 if ( transchar[lastst[$2]] != SYM_EPSILON ) 382 /* Provide final transition \now/ so it 383 * will be marked as a trailing context 384 * state. 385 */ 386 $2 = link_machines( $2, 387 mkstate( SYM_EPSILON ) ); 388 389 mark_beginning_as_normal( $2 ); 390 current_state_type = STATE_NORMAL; 391 392 if ( previous_continued_action ) 393 { 394 /* We need to treat this as variable trailing 395 * context so that the backup does not happen 396 * in the action but before the action switch 397 * statement. If the backup happens in the 398 * action, then the rules "falling into" this 399 * one's action will *also* do the backup, 400 * erroneously. 401 */ 402 if ( ! varlength || headcnt != 0 ) 403 lwarn( 404 "trailing context made variable due to preceding '|' action" ); 405 406 /* Mark as variable. */ 407 varlength = true; 408 headcnt = 0; 409 410 } 411 412 if ( lex_compat || (varlength && headcnt == 0) ) 413 { /* variable trailing context rule */ 414 /* Mark the first part of the rule as the 415 * accepting "head" part of a trailing 416 * context rule. 417 * 418 * By the way, we didn't do this at the 419 * beginning of this production because back 420 * then current_state_type was set up for a 421 * trail rule, and add_accept() can create 422 * a new state ... 423 */ 424 add_accept( $1, 425 num_rules | YY_TRAILING_HEAD_MASK ); 426 variable_trail_rule = true; 427 } 428 429 else 430 trailcnt = rulelen; 431 432 $$ = link_machines( $1, $2 ); 433 } 434 435 | re2 re '$' 436 { synerr( _("trailing context used twice") ); } 437 438 | re '$' 439 { 440 headcnt = 0; 441 trailcnt = 1; 442 rulelen = 1; 443 varlength = false; 444 445 current_state_type = STATE_TRAILING_CONTEXT; 446 447 if ( trlcontxt ) 448 { 449 synerr( _("trailing context used twice") ); 450 $$ = mkstate( SYM_EPSILON ); 451 } 452 453 else if ( previous_continued_action ) 454 { 455 /* See the comment in the rule for "re2 re" 456 * above. 457 */ 458 lwarn( 459 "trailing context made variable due to preceding '|' action" ); 460 461 varlength = true; 462 } 463 464 if ( lex_compat || varlength ) 465 { 466 /* Again, see the comment in the rule for 467 * "re2 re" above. 468 */ 469 add_accept( $1, 470 num_rules | YY_TRAILING_HEAD_MASK ); 471 variable_trail_rule = true; 472 } 473 474 trlcontxt = true; 475 476 eps = mkstate( SYM_EPSILON ); 477 $$ = link_machines( $1, 478 link_machines( eps, mkstate( '\n' ) ) ); 479 } 480 481 | re 482 { 483 $$ = $1; 484 485 if ( trlcontxt ) 486 { 487 if ( lex_compat || (varlength && headcnt == 0) ) 488 /* Both head and trail are 489 * variable-length. 490 */ 491 variable_trail_rule = true; 492 else 493 trailcnt = rulelen; 494 } 495 } 496 ; 497 498 499 re : re '|' series 500 { 501 varlength = true; 502 $$ = mkor( $1, $3 ); 503 } 504 505 | series 506 { $$ = $1; } 507 ; 508 509 510 re2 : re '/' 511 { 512 /* This rule is written separately so the 513 * reduction will occur before the trailing 514 * series is parsed. 515 */ 516 517 if ( trlcontxt ) 518 synerr( _("trailing context used twice") ); 519 else 520 trlcontxt = true; 521 522 if ( varlength ) 523 /* We hope the trailing context is 524 * fixed-length. 525 */ 526 varlength = false; 527 else 528 headcnt = rulelen; 529 530 rulelen = 0; 531 532 current_state_type = STATE_TRAILING_CONTEXT; 533 $$ = $1; 534 } 535 ; 536 537 series : series singleton 538 { 539 /* This is where concatenation of adjacent patterns 540 * gets done. 541 */ 542 $$ = link_machines( $1, $2 ); 543 } 544 545 | singleton 546 { $$ = $1; } 547 548 | series BEGIN_REPEAT_POSIX NUMBER ',' NUMBER END_REPEAT_POSIX 549 { 550 varlength = true; 551 552 if ( $3 > $5 || $3 < 0 ) 553 { 554 synerr( _("bad iteration values") ); 555 $$ = $1; 556 } 557 else 558 { 559 if ( $3 == 0 ) 560 { 561 if ( $5 <= 0 ) 562 { 563 synerr( 564 _("bad iteration values") ); 565 $$ = $1; 566 } 567 else 568 $$ = mkopt( 569 mkrep( $1, 1, $5 ) ); 570 } 571 else 572 $$ = mkrep( $1, $3, $5 ); 573 } 574 } 575 576 | series BEGIN_REPEAT_POSIX NUMBER ',' END_REPEAT_POSIX 577 { 578 varlength = true; 579 580 if ( $3 <= 0 ) 581 { 582 synerr( _("iteration value must be positive") ); 583 $$ = $1; 584 } 585 586 else 587 $$ = mkrep( $1, $3, INFINITE_REPEAT ); 588 } 589 590 | series BEGIN_REPEAT_POSIX NUMBER END_REPEAT_POSIX 591 { 592 /* The series could be something like "(foo)", 593 * in which case we have no idea what its length 594 * is, so we punt here. 595 */ 596 varlength = true; 597 598 if ( $3 <= 0 ) 599 { 600 synerr( _("iteration value must be positive") 601 ); 602 $$ = $1; 603 } 604 605 else 606 $$ = link_machines( $1, 607 copysingl( $1, $3 - 1 ) ); 608 } 609 610 ; 611 612 singleton : singleton '*' 613 { 614 varlength = true; 615 616 $$ = mkclos( $1 ); 617 } 618 619 | singleton '+' 620 { 621 varlength = true; 622 $$ = mkposcl( $1 ); 623 } 624 625 | singleton '?' 626 { 627 varlength = true; 628 $$ = mkopt( $1 ); 629 } 630 631 | singleton BEGIN_REPEAT_FLEX NUMBER ',' NUMBER END_REPEAT_FLEX 632 { 633 varlength = true; 634 635 if ( $3 > $5 || $3 < 0 ) 636 { 637 synerr( _("bad iteration values") ); 638 $$ = $1; 639 } 640 else 641 { 642 if ( $3 == 0 ) 643 { 644 if ( $5 <= 0 ) 645 { 646 synerr( 647 _("bad iteration values") ); 648 $$ = $1; 649 } 650 else 651 $$ = mkopt( 652 mkrep( $1, 1, $5 ) ); 653 } 654 else 655 $$ = mkrep( $1, $3, $5 ); 656 } 657 } 658 659 | singleton BEGIN_REPEAT_FLEX NUMBER ',' END_REPEAT_FLEX 660 { 661 varlength = true; 662 663 if ( $3 <= 0 ) 664 { 665 synerr( _("iteration value must be positive") ); 666 $$ = $1; 667 } 668 669 else 670 $$ = mkrep( $1, $3, INFINITE_REPEAT ); 671 } 672 673 | singleton BEGIN_REPEAT_FLEX NUMBER END_REPEAT_FLEX 674 { 675 /* The singleton could be something like "(foo)", 676 * in which case we have no idea what its length 677 * is, so we punt here. 678 */ 679 varlength = true; 680 681 if ( $3 <= 0 ) 682 { 683 synerr( _("iteration value must be positive") ); 684 $$ = $1; 685 } 686 687 else 688 $$ = link_machines( $1, 689 copysingl( $1, $3 - 1 ) ); 690 } 691 692 | '.' 693 { 694 if ( ! madeany ) 695 { 696 /* Create the '.' character class. */ 697 ccldot = cclinit(); 698 ccladd( ccldot, '\n' ); 699 cclnegate( ccldot ); 700 701 if ( useecs ) 702 mkeccl( ccltbl + cclmap[ccldot], 703 ccllen[ccldot], nextecm, 704 ecgroup, csize, csize ); 705 706 /* Create the (?s:'.') character class. */ 707 cclany = cclinit(); 708 cclnegate( cclany ); 709 710 if ( useecs ) 711 mkeccl( ccltbl + cclmap[cclany], 712 ccllen[cclany], nextecm, 713 ecgroup, csize, csize ); 714 715 madeany = true; 716 } 717 718 ++rulelen; 719 720 if (sf_dot_all()) 721 $$ = mkstate( -cclany ); 722 else 723 $$ = mkstate( -ccldot ); 724 } 725 726 | fullccl 727 { 728 /* Sort characters for fast searching. 729 */ 730 qsort( ccltbl + cclmap[$1], (size_t) ccllen[$1], sizeof (*ccltbl), cclcmp ); 731 732 if ( useecs ) 733 mkeccl( ccltbl + cclmap[$1], ccllen[$1], 734 nextecm, ecgroup, csize, csize ); 735 736 ++rulelen; 737 738 if (ccl_has_nl[$1]) 739 rule_has_nl[num_rules] = true; 740 741 $$ = mkstate( -$1 ); 742 } 743 744 | PREVCCL 745 { 746 ++rulelen; 747 748 if (ccl_has_nl[$1]) 749 rule_has_nl[num_rules] = true; 750 751 $$ = mkstate( -$1 ); 752 } 753 754 | '"' string '"' 755 { $$ = $2; } 756 757 | '(' re ')' 758 { $$ = $2; } 759 760 | CHAR 761 { 762 ++rulelen; 763 764 if ($1 == nlch) 765 rule_has_nl[num_rules] = true; 766 767 if (sf_case_ins() && has_case($1)) 768 /* create an alternation, as in (a|A) */ 769 $$ = mkor (mkstate($1), mkstate(reverse_case($1))); 770 else 771 $$ = mkstate( $1 ); 772 } 773 ; 774 fullccl: 775 fullccl CCL_OP_DIFF braceccl { $$ = ccl_set_diff ($1, $3); } 776 | fullccl CCL_OP_UNION braceccl { $$ = ccl_set_union ($1, $3); } 777 | braceccl 778 ; 779 780 braceccl: 781 782 '[' ccl ']' { $$ = $2; } 783 784 | '[' '^' ccl ']' 785 { 786 cclnegate( $3 ); 787 $$ = $3; 788 } 789 ; 790 791 ccl : ccl CHAR '-' CHAR 792 { 793 794 if (sf_case_ins()) 795 { 796 797 /* If one end of the range has case and the other 798 * does not, or the cases are different, then we're not 799 * sure what range the user is trying to express. 800 * Examples: [@-z] or [S-t] 801 */ 802 if (has_case ($2) != has_case ($4) 803 || (has_case ($2) && (b_islower ($2) != b_islower ($4))) 804 || (has_case ($2) && (b_isupper ($2) != b_isupper ($4)))) 805 format_warn3 ( 806 _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"), 807 $2, $4); 808 809 /* If the range spans uppercase characters but not 810 * lowercase (or vice-versa), then should we automatically 811 * include lowercase characters in the range? 812 * Example: [@-_] spans [a-z] but not [A-Z] 813 */ 814 else if (!has_case ($2) && !has_case ($4) && !range_covers_case ($2, $4)) 815 format_warn3 ( 816 _("the character range [%c-%c] is ambiguous in a case-insensitive scanner"), 817 $2, $4); 818 } 819 820 if ( $2 > $4 ) 821 synerr( _("negative range in character class") ); 822 823 else 824 { 825 for ( i = $2; i <= $4; ++i ) 826 ccladd( $1, i ); 827 828 /* Keep track if this ccl is staying in 829 * alphabetical order. 830 */ 831 cclsorted = cclsorted && ($2 > lastchar); 832 lastchar = $4; 833 834 /* Do it again for upper/lowercase */ 835 if (sf_case_ins() && has_case($2) && has_case($4)){ 836 $2 = reverse_case ($2); 837 $4 = reverse_case ($4); 838 839 for ( i = $2; i <= $4; ++i ) 840 ccladd( $1, i ); 841 842 cclsorted = cclsorted && ($2 > lastchar); 843 lastchar = $4; 844 } 845 846 } 847 848 $$ = $1; 849 } 850 851 | ccl CHAR 852 { 853 ccladd( $1, $2 ); 854 cclsorted = cclsorted && ($2 > lastchar); 855 lastchar = $2; 856 857 /* Do it again for upper/lowercase */ 858 if (sf_case_ins() && has_case($2)){ 859 $2 = reverse_case ($2); 860 ccladd ($1, $2); 861 862 cclsorted = cclsorted && ($2 > lastchar); 863 lastchar = $2; 864 } 865 866 $$ = $1; 867 } 868 869 | ccl ccl_expr 870 { 871 /* Too hard to properly maintain cclsorted. */ 872 cclsorted = false; 873 $$ = $1; 874 } 875 876 | 877 { 878 cclsorted = true; 879 lastchar = 0; 880 currccl = $$ = cclinit(); 881 } 882 ; 883 884 ccl_expr: 885 CCE_ALNUM { CCL_EXPR(isalnum); } 886 | CCE_ALPHA { CCL_EXPR(isalpha); } 887 | CCE_BLANK { CCL_EXPR(IS_BLANK); } 888 | CCE_CNTRL { CCL_EXPR(iscntrl); } 889 | CCE_DIGIT { CCL_EXPR(isdigit); } 890 | CCE_GRAPH { CCL_EXPR(isgraph); } 891 | CCE_LOWER { 892 CCL_EXPR(islower); 893 if (sf_case_ins()) 894 CCL_EXPR(isupper); 895 } 896 | CCE_PRINT { CCL_EXPR(isprint); } 897 | CCE_PUNCT { CCL_EXPR(ispunct); } 898 | CCE_SPACE { CCL_EXPR(isspace); } 899 | CCE_XDIGIT { CCL_EXPR(isxdigit); } 900 | CCE_UPPER { 901 CCL_EXPR(isupper); 902 if (sf_case_ins()) 903 CCL_EXPR(islower); 904 } 905 906 | CCE_NEG_ALNUM { CCL_NEG_EXPR(isalnum); } 907 | CCE_NEG_ALPHA { CCL_NEG_EXPR(isalpha); } 908 | CCE_NEG_BLANK { CCL_NEG_EXPR(IS_BLANK); } 909 | CCE_NEG_CNTRL { CCL_NEG_EXPR(iscntrl); } 910 | CCE_NEG_DIGIT { CCL_NEG_EXPR(isdigit); } 911 | CCE_NEG_GRAPH { CCL_NEG_EXPR(isgraph); } 912 | CCE_NEG_PRINT { CCL_NEG_EXPR(isprint); } 913 | CCE_NEG_PUNCT { CCL_NEG_EXPR(ispunct); } 914 | CCE_NEG_SPACE { CCL_NEG_EXPR(isspace); } 915 | CCE_NEG_XDIGIT { CCL_NEG_EXPR(isxdigit); } 916 | CCE_NEG_LOWER { 917 if ( sf_case_ins() ) 918 lwarn(_("[:^lower:] is ambiguous in case insensitive scanner")); 919 else 920 CCL_NEG_EXPR(islower); 921 } 922 | CCE_NEG_UPPER { 923 if ( sf_case_ins() ) 924 lwarn(_("[:^upper:] ambiguous in case insensitive scanner")); 925 else 926 CCL_NEG_EXPR(isupper); 927 } 928 ; 929 930 string : string CHAR 931 { 932 if ( $2 == nlch ) 933 rule_has_nl[num_rules] = true; 934 935 ++rulelen; 936 937 if (sf_case_ins() && has_case($2)) 938 $$ = mkor (mkstate($2), mkstate(reverse_case($2))); 939 else 940 $$ = mkstate ($2); 941 942 $$ = link_machines( $1, $$); 943 } 944 945 | 946 { $$ = mkstate( SYM_EPSILON ); } 947 ; 948 949 %% 950 951 952 /* build_eof_action - build the "<<EOF>>" action for the active start 953 * conditions 954 */ 955 956 void build_eof_action(void) 957 { 958 int i; 959 char action_text[MAXLINE]; 960 961 for ( i = 1; i <= scon_stk_ptr; ++i ) 962 { 963 if ( sceof[scon_stk[i]] ) 964 format_pinpoint_message( 965 "multiple <<EOF>> rules for start condition %s", 966 scname[scon_stk[i]] ); 967 968 else 969 { 970 sceof[scon_stk[i]] = true; 971 972 if (previous_continued_action /* && previous action was regular */) 973 add_action("YY_RULE_SETUP\n"); 974 975 snprintf( action_text, sizeof(action_text), "case YY_STATE_EOF(%s):\n", 976 scname[scon_stk[i]] ); 977 add_action( action_text ); 978 } 979 } 980 981 line_directive_out(NULL, 1); 982 add_action("[["); 983 984 /* This isn't a normal rule after all - don't count it as 985 * such, so we don't have any holes in the rule numbering 986 * (which make generating "rule can never match" warnings 987 * more difficult. 988 */ 989 --num_rules; 990 ++num_eof_rules; 991 } 992 993 994 /* format_synerr - write out formatted syntax error */ 995 996 void format_synerr( const char *msg, const char arg[] ) 997 { 998 char errmsg[MAXLINE]; 999 1000 (void) snprintf( errmsg, sizeof(errmsg), msg, arg ); 1001 synerr( errmsg ); 1002 } 1003 1004 1005 /* synerr - report a syntax error */ 1006 1007 void synerr( const char *str ) 1008 { 1009 syntaxerror = true; 1010 pinpoint_message( str ); 1011 } 1012 1013 1014 /* format_warn - write out formatted warning */ 1015 1016 void format_warn( const char *msg, const char arg[] ) 1017 { 1018 char warn_msg[MAXLINE]; 1019 1020 snprintf( warn_msg, sizeof(warn_msg), msg, arg ); 1021 lwarn( warn_msg ); 1022 } 1023 1024 1025 /* lwarn - report a warning, unless -w was given */ 1026 1027 void lwarn( const char *str ) 1028 { 1029 line_warning( str, linenum ); 1030 } 1031 1032 /* format_pinpoint_message - write out a message formatted with one string, 1033 * pinpointing its location 1034 */ 1035 1036 void format_pinpoint_message( const char *msg, const char arg[] ) 1037 { 1038 char errmsg[MAXLINE]; 1039 1040 snprintf( errmsg, sizeof(errmsg), msg, arg ); 1041 pinpoint_message( errmsg ); 1042 } 1043 1044 1045 /* pinpoint_message - write out a message, pinpointing its location */ 1046 1047 void pinpoint_message( const char *str ) 1048 { 1049 line_pinpoint( str, linenum ); 1050 } 1051 1052 1053 /* line_warning - report a warning at a given line, unless -w was given */ 1054 1055 void line_warning( const char *str, int line ) 1056 { 1057 char warning[MAXLINE]; 1058 1059 if ( ! nowarn ) 1060 { 1061 snprintf( warning, sizeof(warning), "warning, %s", str ); 1062 line_pinpoint( warning, line ); 1063 } 1064 } 1065 1066 1067 /* line_pinpoint - write out a message, pinpointing it at the given line */ 1068 1069 void line_pinpoint( const char *str, int line ) 1070 { 1071 fprintf( stderr, "%s:%d: %s\n", infilename, line, str ); 1072 } 1073 1074 1075 /* yyerror - eat up an error message from the parser; 1076 * currently, messages are ignore 1077 */ 1078 1079 void yyerror( const char *msg ) 1080 { 1081 (void)msg; 1082 } 1083