xref: /freebsd/contrib/flex/src/scan.l (revision 911f0260390e18cf85f3dbf2c719b593efdc1e3c)
1 /* scan.l - scanner for flex input -*-C-*- */
2 
3 %{
4 /*  Copyright (c) 1990 The Regents of the University of California. */
5 /*  All rights reserved. */
6 
7 /*  This code is derived from software contributed to Berkeley by */
8 /*  Vern Paxson. */
9 
10 /*  The United States Government has rights in this work pursuant */
11 /*  to contract no. DE-AC03-76SF00098 between the United States */
12 /*  Department of Energy and the University of California. */
13 
14 /*  This file is part of flex. */
15 
16 /*  Redistribution and use in source and binary forms, with or without */
17 /*  modification, are permitted provided that the following conditions */
18 /*  are met: */
19 
20 /*  1. Redistributions of source code must retain the above copyright */
21 /*     notice, this list of conditions and the following disclaimer. */
22 /*  2. Redistributions in binary form must reproduce the above copyright */
23 /*     notice, this list of conditions and the following disclaimer in the */
24 /*     documentation and/or other materials provided with the distribution. */
25 
26 /*  Neither the name of the University nor the names of its contributors */
27 /*  may be used to endorse or promote products derived from this software */
28 /*  without specific prior written permission. */
29 
30 /*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
31 /*  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
32 /*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
33 /*  PURPOSE. */
34 
35 #include "flexdef.h"
36 #include "parse.h"
37 extern bool tablesverify, tablesext;
38 extern int trlcontxt; /* Set in  parse.y for each rule. */
39 extern const char *escaped_qstart, *escaped_qend;
40 extern int yylval;
41 
42 #define M4QSTART "[""["
43 #define M4QEND "]""]"
44 
45 #define ESCAPED_QSTART "[" M4QEND M4QSTART "[" M4QEND M4QSTART
46 #define ESCAPED_QEND M4QEND "]" M4QSTART M4QEND "]" M4QSTART
47 
48 #define ACTION_ECHO add_action( yytext )
49 #define ACTION_IFDEF(def, should_define) \
50 	{ \
51 	if ( should_define ) \
52 		action_define( def, 1 ); \
53 	}
54 
55 #define ACTION_ECHO_QSTART add_action (ESCAPED_QSTART)
56 #define ACTION_ECHO_QEND   add_action (ESCAPED_QEND)
57 
58 #define ACTION_M4_IFDEF(def, should_define) \
59     do{ \
60         if ( should_define ) \
61             buf_m4_define( &m4defs_buf, def, NULL);\
62         else \
63             buf_m4_undefine( &m4defs_buf, def);\
64     } while(0)
65 
66 #define MARK_END_OF_PROLOG mark_prolog();
67 
68 #define YY_DECL \
69 	int flexscan(void)
70 
71 #define RETURNCHAR \
72 	yylval = (unsigned char) yytext[0]; \
73 	return CHAR;
74 
75 #define RETURNNAME \
76 	if(yyleng < MAXLINE) \
77          { \
78 	strncpy( nmstr, yytext, sizeof(nmstr) ); \
79 	return NAME; \
80 	 } \
81 	else \
82 	 do { \
83 	   synerr(_("Input line too long\n")); \
84 	   FLEX_EXIT(EXIT_FAILURE);  \
85 	 } while (0)
86 
87 #define PUT_BACK_STRING(str, start) \
88 	{ size_t i = strlen( str );	\
89 	  while ( i > start )		\
90 	    unput((str)[--i]);		\
91 	}
92 
93 #define CHECK_REJECT(str) \
94 	if ( all_upper( str ) ) \
95 		reject = true;
96 
97 #define CHECK_YYMORE(str) \
98 	if ( all_lower( str ) ) \
99 		yymore_used = true;
100 
101 #define YY_USER_INIT \
102 	if ( getenv("POSIXLY_CORRECT") ) \
103 		posix_compat = true;
104 
105 #define START_CODEBLOCK(x) do { \
106     /* Emit the needed line directive... */\
107     if (indented_code == false) { \
108         linenum++; \
109         line_directive_out(NULL, 1); \
110     } \
111     add_action(M4QSTART); \
112     yy_push_state(CODEBLOCK); \
113     if ((indented_code = x)) ACTION_ECHO; \
114 } while(0)
115 
116 #define END_CODEBLOCK do { \
117     yy_pop_state();\
118     add_action(M4QEND); \
119     if (!indented_code) line_directive_out(NULL, 0);\
120 } while (0)
121 
122 %}
123 
124 %option caseless nodefault noreject stack noyy_top_state
125 %option nostdinit
126 
127 %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE
128 %x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION
129 %x OPTION LINEDIR CODEBLOCK_MATCH_BRACE
130 %x GROUP_WITH_PARAMS
131 %x GROUP_MINUS_PARAMS
132 %x EXTENDED_COMMENT
133 %x COMMENT_DISCARD CODE_COMMENT
134 %x SECT3_NOESCAPE
135 %x CHARACTER_CONSTANT
136 
137 WS		[[:blank:]]+
138 OPTWS		[[:blank:]]*
139 NOT_WS		[^[:blank:]\r\n]
140 
141 NL		\r?\n
142 
143 NAME		([[:alpha:]_][[:alnum:]_-]*)
144 NOT_NAME	[^[:alpha:]_*\n]+
145 
146 SCNAME		{NAME}
147 
148 ESCSEQ		(\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2}))
149 
150 FIRST_CCL_CHAR	([^\\\n]|{ESCSEQ})
151 CCL_CHAR	([^\\\n\]]|{ESCSEQ})
152 CCL_EXPR	("[:"^?[[:alpha:]]+":]")
153 
154 LEXOPT		[aceknopr]
155 
156 M4QSTART    "[""["
157 M4QEND      "]""]"
158 
159 %%
160 	static int bracelevel, didadef, indented_code;
161 	static int doing_rule_action = false;
162 	static int option_sense;
163 
164 	int doing_codeblock = false;
165 	int brace_depth=0, brace_start_line=0;
166 	char nmdef[MAXLINE];
167 
168 
169 <INITIAL>{
170 	^{WS}		START_CODEBLOCK(true);
171 	^"/*"		add_action("/*[""["); yy_push_state( COMMENT );
172 	^#{OPTWS}line{WS}	yy_push_state( LINEDIR );
173 	^"%s"{NAME}?	return SCDECL;
174 	^"%x"{NAME}?	return XSCDECL;
175 	^"%{".*{NL}	START_CODEBLOCK(false);
176     ^"%top"[[:blank:]]*"{"[[:blank:]]*{NL}    {
177                 brace_start_line = linenum;
178                 ++linenum;
179                 buf_linedir( &top_buf, infilename?infilename:"<stdin>", linenum);
180                 brace_depth = 1;
181                 yy_push_state(CODEBLOCK_MATCH_BRACE);
182             }
183 
184     ^"%top".*   synerr( _("malformed '%top' directive") );
185 
186 	{WS}		/* discard */
187 
188 	^"%%".*		{
189 			sectnum = 2;
190 			bracelevel = 0;
191 			mark_defs1();
192 			line_directive_out(NULL, 1);
193 			BEGIN(SECT2PROLOG);
194 			return SECTEND;
195 			}
196 
197 	^"%pointer".*{NL}	yytext_is_array = false; ++linenum;
198 	^"%array".*{NL}		yytext_is_array = true; ++linenum;
199 
200 	^"%option"	BEGIN(OPTION); return TOK_OPTION;
201 
202 	^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL}	++linenum; /* ignore */
203 	^"%"{LEXOPT}{WS}.*{NL}	++linenum;	/* ignore */
204 
205 	/* xgettext: no-c-format */
206 	^"%"[^sxaceknopr{}].*	synerr( _( "unrecognized '%' directive" ) );
207 
208 	^{NAME}		{
209 			if(yyleng < MAXLINE)
210         		 {
211 			strncpy( nmstr, yytext, sizeof(nmstr) );
212 			 }
213 			else
214 			 {
215 			   synerr( _("Definition name too long\n"));
216 			   FLEX_EXIT(EXIT_FAILURE);
217 			 }
218 
219 			didadef = false;
220 			BEGIN(PICKUPDEF);
221 			}
222 
223 	{SCNAME}	RETURNNAME;
224 	^{OPTWS}{NL}	++linenum; /* allows blank lines in section 1 */
225 	{OPTWS}{NL}	ACTION_ECHO; ++linenum; /* maybe end of comment line */
226 }
227 
228 
229 <COMMENT,CODE_COMMENT>{ /* */
230         [^\[\]\*\n]*  ACTION_ECHO;
231         .           ACTION_ECHO;
232 
233 	{NL}	    ++linenum; ACTION_ECHO;
234 }
235 <COMMENT>{
236 	"*/"	    add_action("*/]""]"); yy_pop_state();
237 }
238 <CODE_COMMENT>{
239         "*/"        ACTION_ECHO; yy_pop_state();
240 }
241 
242 <COMMENT_DISCARD>{
243         /* This is the same as COMMENT, but is discarded rather than output. */
244 	"*/"		yy_pop_state();
245     "*"         ;
246 	[^*\n]      ;
247 	{NL}	    ++linenum;
248 }
249 
250 <EXTENDED_COMMENT>{
251     ")"         yy_pop_state();
252     [^\n\)]+      ;
253     {NL}        ++linenum;
254 }
255 
256 <LINEDIR>{
257 	\n		yy_pop_state();
258 	[[:digit:]]+	linenum = myctoi( yytext );
259 
260 	\"[^"\n]*\"	{
261 			free(infilename);
262 			infilename = xstrdup(yytext + 1);
263 			infilename[strlen( infilename ) - 1] = '\0';
264 			}
265 	.		/* ignore spurious characters */
266 }
267 <ACTION,CODEBLOCK,ACTION_STRING,PERCENT_BRACE_ACTION,CHARACTER_CONSTANT,COMMENT,CODE_COMMENT>{
268    {M4QSTART}   ACTION_ECHO_QSTART;
269    {M4QEND}     ACTION_ECHO_QEND;
270 }
271 
272 <CODEBLOCK>{
273 	^"%}".*{NL}	++linenum; END_CODEBLOCK;
274 	[^\n%\[\]]*         ACTION_ECHO;
275         .		ACTION_ECHO;
276 	{NL}		{
277 			++linenum;
278 			ACTION_ECHO;
279 			if ( indented_code ) END_CODEBLOCK;
280 			}
281 }
282 
283 <CODEBLOCK_MATCH_BRACE>{
284     "}"     {
285                 if( --brace_depth == 0){
286                     /* TODO: Matched. */
287                     yy_pop_state();
288                 }else
289                     buf_strnappend(&top_buf, yytext, yyleng);
290             }
291 
292     "{"     {
293                 brace_depth++;
294                 buf_strnappend(&top_buf, yytext, yyleng);
295             }
296 
297     {NL}    {
298                 ++linenum;
299                 buf_strnappend(&top_buf, yytext, yyleng);
300             }
301 
302     {M4QSTART}  buf_strnappend(&top_buf, escaped_qstart, (int) strlen(escaped_qstart));
303     {M4QEND}    buf_strnappend(&top_buf, escaped_qend, (int) strlen(escaped_qend));
304     ([^{}\r\n\[\]]+)|[^{}\r\n]  {
305        buf_strnappend(&top_buf, yytext, yyleng);
306     }
307 
308     <<EOF>>     {
309                 linenum = brace_start_line;
310                 synerr(_("Unmatched '{'"));
311                 yyterminate();
312                 }
313 }
314 
315 
316 <PICKUPDEF>{
317 	{WS}		/* separates name and definition */
318 
319 	{NOT_WS}[^\r\n]*	{
320  		        if(yyleng < MAXLINE)
321  		         {
322 			strncpy( nmdef, yytext, sizeof(nmdef) );
323  		         }
324  		        else
325  		         {
326  		           format_synerr( _("Definition value for {%s} too long\n"), nmstr);
327  		           FLEX_EXIT(EXIT_FAILURE);
328 			 }
329 			/* Skip trailing whitespace. */
330 			{
331 			    size_t i = strlen( nmdef );
332 			    while (i > 0 && (nmdef[i-1] == ' ' || nmdef[i-1] == '\t'))
333 			       --i;
334 			    nmdef[i] = '\0';
335 			}
336 
337 			ndinstal( nmstr, nmdef );
338 			didadef = true;
339 			}
340 
341 	{NL}		{
342 			if ( ! didadef )
343 				synerr( _( "incomplete name definition" ) );
344 			BEGIN(INITIAL);
345 			++linenum;
346 			}
347 }
348 
349 
350 <OPTION>{
351 	{NL}		++linenum; BEGIN(INITIAL);
352 	{WS}		option_sense = true;
353 
354 	"="		return '=';
355 
356 	no		option_sense = ! option_sense;
357 
358 	7bit		csize = option_sense ? 128 : 256;
359 	8bit		csize = option_sense ? 256 : 128;
360 
361 	align		long_align = option_sense;
362 	always-interactive	{
363 			ACTION_M4_IFDEF( "M4""_YY_ALWAYS_INTERACTIVE", option_sense );
364             interactive = option_sense;
365 			}
366 	array		yytext_is_array = option_sense;
367 	backup		backing_up_report = option_sense;
368 	batch		interactive = ! option_sense;
369     bison-bridge     bison_bridge_lval = option_sense;
370     bison-locations  { if((bison_bridge_lloc = option_sense))
371                             bison_bridge_lval = true;
372                      }
373 	"c++"		C_plus_plus = option_sense;
374 	caseful|case-sensitive		sf_set_case_ins(!option_sense);
375 	caseless|case-insensitive	sf_set_case_ins(option_sense);
376 	debug		ddebug = option_sense;
377 	default		spprdflt = ! option_sense;
378 	ecs		useecs = option_sense;
379 	fast		{
380 			useecs = usemecs = false;
381 			use_read = fullspd = true;
382 			}
383 	full		{
384 			useecs = usemecs = false;
385 			use_read = fulltbl = true;
386 			}
387 	input		ACTION_IFDEF("YY_NO_INPUT", ! option_sense);
388 	interactive	interactive = option_sense;
389 	lex-compat	lex_compat = option_sense;
390 	posix-compat	posix_compat = option_sense;
391 	line		gen_line_dirs = option_sense;
392 	main		{
393 			ACTION_M4_IFDEF( "M4""_YY_MAIN", option_sense);
394             /* Override yywrap */
395             if( option_sense == true )
396                 do_yywrap = false;
397 			}
398 	meta-ecs	usemecs = option_sense;
399 	never-interactive	{
400 			ACTION_M4_IFDEF( "M4""_YY_NEVER_INTERACTIVE", option_sense );
401             interactive = !option_sense;
402 			}
403 	perf-report	performance_report += option_sense ? 1 : -1;
404 	pointer		yytext_is_array = ! option_sense;
405 	read		use_read = option_sense;
406     reentrant   reentrant = option_sense;
407 	reject		reject_really_used = option_sense;
408 	stack		ACTION_M4_IFDEF( "M4""_YY_STACK_USED", option_sense );
409 	stdinit		do_stdinit = option_sense;
410 	stdout		use_stdout = option_sense;
411     unistd      ACTION_IFDEF("YY_NO_UNISTD_H", ! option_sense);
412 	unput		ACTION_M4_IFDEF("M4""_YY_NO_UNPUT", ! option_sense);
413 	verbose		printstats = option_sense;
414 	warn		nowarn = ! option_sense;
415 	yylineno	do_yylineno = option_sense; ACTION_M4_IFDEF("M4""_YY_USE_LINENO", option_sense);
416 	yymore		yymore_really_used = option_sense;
417 	yywrap      do_yywrap = option_sense;
418 
419 	yy_push_state	ACTION_M4_IFDEF("M4""_YY_NO_PUSH_STATE", ! option_sense);
420 	yy_pop_state	ACTION_M4_IFDEF("M4""_YY_NO_POP_STATE", ! option_sense);
421 	yy_top_state	ACTION_M4_IFDEF("M4""_YY_NO_TOP_STATE", ! option_sense);
422 
423 	yy_scan_buffer	ACTION_M4_IFDEF("M4""_YY_NO_SCAN_BUFFER", ! option_sense);
424 	yy_scan_bytes	ACTION_M4_IFDEF("M4""_YY_NO_SCAN_BYTES", ! option_sense);
425 	yy_scan_string	ACTION_M4_IFDEF("M4""_YY_NO_SCAN_STRING", ! option_sense);
426 
427     yyalloc         ACTION_M4_IFDEF("M4""_YY_NO_FLEX_ALLOC", ! option_sense);
428     yyrealloc       ACTION_M4_IFDEF("M4""_YY_NO_FLEX_REALLOC", ! option_sense);
429     yyfree          ACTION_M4_IFDEF("M4""_YY_NO_FLEX_FREE", ! option_sense);
430 
431     yyget_debug     ACTION_M4_IFDEF("M4""_YY_NO_GET_DEBUG", ! option_sense);
432     yyset_debug     ACTION_M4_IFDEF("M4""_YY_NO_SET_DEBUG", ! option_sense);
433     yyget_extra     ACTION_M4_IFDEF("M4""_YY_NO_GET_EXTRA", ! option_sense);
434     yyset_extra     ACTION_M4_IFDEF("M4""_YY_NO_SET_EXTRA", ! option_sense);
435     yyget_leng      ACTION_M4_IFDEF("M4""_YY_NO_GET_LENG", ! option_sense);
436     yyget_text      ACTION_M4_IFDEF("M4""_YY_NO_GET_TEXT", ! option_sense);
437     yyget_lineno    ACTION_M4_IFDEF("M4""_YY_NO_GET_LINENO", ! option_sense);
438     yyset_lineno    ACTION_M4_IFDEF("M4""_YY_NO_SET_LINENO", ! option_sense);
439     yyget_in        ACTION_M4_IFDEF("M4""_YY_NO_GET_IN", ! option_sense);
440     yyset_in        ACTION_M4_IFDEF("M4""_YY_NO_SET_IN", ! option_sense);
441     yyget_out       ACTION_M4_IFDEF("M4""_YY_NO_GET_OUT", ! option_sense);
442     yyset_out       ACTION_M4_IFDEF("M4""_YY_NO_SET_OUT", ! option_sense);
443     yyget_lval      ACTION_M4_IFDEF("M4""_YY_NO_GET_LVAL", ! option_sense);
444     yyset_lval      ACTION_M4_IFDEF("M4""_YY_NO_SET_LVAL", ! option_sense);
445     yyget_lloc      ACTION_M4_IFDEF("M4""_YY_NO_GET_LLOC", ! option_sense);
446     yyset_lloc      ACTION_M4_IFDEF("M4""_YY_NO_SET_LLOC", ! option_sense);
447 
448 	extra-type	return TOK_EXTRA_TYPE;
449 	outfile		return TOK_OUTFILE;
450 	prefix		return TOK_PREFIX;
451 	yyclass		return TOK_YYCLASS;
452 	header(-file)?      return TOK_HEADER_FILE;
453 	tables-file         return TOK_TABLES_FILE;
454 	tables-verify   {
455                     tablesverify = option_sense;
456                     if(!tablesext && option_sense)
457                         tablesext = true;
458                     }
459 
460 
461 	\"[^"\n]*\"	{
462 			if(yyleng-1 < MAXLINE)
463         		 {
464 			strncpy( nmstr, yytext + 1, sizeof(nmstr) );
465 			 }
466 			else
467 			 {
468 			   synerr( _("Option line too long\n"));
469 			   FLEX_EXIT(EXIT_FAILURE);
470 			 }
471 			nmstr[strlen( nmstr ) - 1] = '\0';
472 			return NAME;
473 			}
474 
475 	(([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|.	{
476 			format_synerr( _( "unrecognized %%option: %s" ),
477 				yytext );
478 			BEGIN(RECOVER);
479 			}
480 }
481 
482 <RECOVER>.*{NL}		++linenum; BEGIN(INITIAL);
483 
484 
485 <SECT2PROLOG>{
486 	^"%{".*	++bracelevel; yyless( 2 );	/* eat only %{ */
487 	^"%}".*	--bracelevel; yyless( 2 );	/* eat only %} */
488 
489 	^{WS} START_CODEBLOCK(true); /* indented code in prolog */
490 
491 	^{NOT_WS}.*	{
492         /* non-indented code */
493 		if ( bracelevel <= 0 ) {
494             /* not in %{ ... %} */
495             yyless( 0 );	/* put it all back */
496             yy_set_bol( 1 );
497             mark_prolog();
498             BEGIN(SECT2);
499         } else {
500             START_CODEBLOCK(true);
501         }
502     }
503 
504 	.		ACTION_ECHO;
505 	{NL}	++linenum; ACTION_ECHO;
506 
507 	<<EOF>>		{
508 			mark_prolog();
509 			sectnum = 0;
510 			yyterminate(); /* to stop the parser */
511 			}
512 }
513 
514 <SECT2>{
515 	^{OPTWS}{NL}	++linenum; /* allow blank lines in section 2 */
516 
517 	^{OPTWS}"%{"	{
518 			indented_code = false;
519 			doing_codeblock = true;
520 			bracelevel = 1;
521 			BEGIN(PERCENT_BRACE_ACTION);
522 			}
523 
524 	^{OPTWS}"<"	    {
525                         /* Allow "<" to appear in (?x) patterns. */
526                         if (!sf_skip_ws())
527                             BEGIN(SC);
528                         return '<';
529                     }
530 	^{OPTWS}"^"	return '^';
531 	\"		BEGIN(QUOTE); return '"';
532 	"{"/[[:digit:]]	{
533 			BEGIN(NUM);
534 			if ( lex_compat || posix_compat )
535 				return BEGIN_REPEAT_POSIX;
536 			else
537 				return BEGIN_REPEAT_FLEX;
538 			}
539 	"$"/([[:blank:]]|{NL})	return '$';
540 
541 	{WS}"%{"		{
542 			bracelevel = 1;
543 			BEGIN(PERCENT_BRACE_ACTION);
544 
545 			if ( in_rule )
546 				{
547 				doing_rule_action = true;
548 				in_rule = false;
549 				return '\n';
550 				}
551 			}
552 	{WS}"|".*{NL}	{
553                         if (sf_skip_ws()){
554                             /* We're in the middle of a (?x: ) pattern. */
555                             /* Push back everything starting at the "|" */
556                             int amt = (int) (strchr (yytext, '|') - yytext);
557                             yyless(amt);
558                         }
559                         else {
560                             add_action("]""]");
561                             continued_action = true;
562                             ++linenum;
563                             return '\n';
564                         }
565                     }
566 
567 	^{WS}"/*"	{
568 
569                 if (sf_skip_ws()){
570                     /* We're in the middle of a (?x: ) pattern. */
571                     yy_push_state(COMMENT_DISCARD);
572                 }
573                 else{
574                     yyless( yyleng - 2 );	/* put back '/', '*' */
575                     bracelevel = 0;
576                     continued_action = false;
577                     BEGIN(ACTION);
578                 }
579 			}
580 
581 	^{WS}		/* allow indented rules */ ;
582 
583 	{WS}		{
584             if (sf_skip_ws()){
585                 /* We're in the middle of a (?x: ) pattern. */
586             }
587             else{
588                 /* This rule is separate from the one below because
589                  * otherwise we get variable trailing context, so
590                  * we can't build the scanner using -{f,F}.
591                  */
592                 bracelevel = 0;
593                 continued_action = false;
594                 BEGIN(ACTION);
595 
596                 if ( in_rule )
597                     {
598                     doing_rule_action = true;
599                     in_rule = false;
600                     return '\n';
601                     }
602             }
603 			}
604 
605 	{OPTWS}{NL}	{
606             if (sf_skip_ws()){
607                 /* We're in the middle of a (?x: ) pattern. */
608                 ++linenum;
609             }
610             else{
611                 bracelevel = 0;
612                 continued_action = false;
613                 BEGIN(ACTION);
614                 unput( '\n' );	/* so <ACTION> sees it */
615 
616                 if ( in_rule )
617                     {
618                     doing_rule_action = true;
619                     in_rule = false;
620                     return '\n';
621                     }
622             }
623 			}
624 
625 	^{OPTWS}"<<EOF>>"	|
626 	"<<EOF>>"	return EOF_OP;
627 
628 	^"%%".*		{
629 			sectnum = 3;
630 			BEGIN(no_section3_escape ? SECT3_NOESCAPE : SECT3);
631 			outn("/* Begin user sect3 */");
632 			yyterminate(); /* to stop the parser */
633 
634 			}
635 
636 	"["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})*	{
637 			int cclval;
638 
639 			if(yyleng < MAXLINE)
640         		 {
641 			strncpy( nmstr, yytext, sizeof(nmstr) );
642 			 }
643 			else
644 			 {
645 			   synerr( _("Input line too long\n"));
646 			   FLEX_EXIT(EXIT_FAILURE);
647 			 }
648 
649 			/* Check to see if we've already encountered this
650 			 * ccl.
651 			 */
652 			if (0 /* <--- This "0" effectively disables the reuse of a
653                    * character class (purely based on its source text).
654                    * The reason it was disabled is so yacc/bison can parse
655                    * ccl operations, such as ccl difference and union.
656                    */
657                 &&  (cclval = ccllookup( nmstr )) != 0 )
658 				{
659 				if ( input() != ']' )
660 					synerr( _( "bad character class" ) );
661 
662 				yylval = cclval;
663 				++cclreuse;
664 				return PREVCCL;
665 				}
666 			else
667 				{
668 				/* We fudge a bit.  We know that this ccl will
669 				 * soon be numbered as lastccl + 1 by cclinit.
670 				 */
671 				cclinstal( nmstr, lastccl + 1 );
672 
673 				/* Push back everything but the leading bracket
674 				 * so the ccl can be rescanned.
675 				 */
676 				yyless( 1 );
677 
678 				BEGIN(FIRSTCCL);
679 				return '[';
680 				}
681 			}
682     "{-}"       return CCL_OP_DIFF;
683     "{+}"       return CCL_OP_UNION;
684 
685 
686     /* Check for :space: at the end of the rule so we don't
687      * wrap the expanded regex in '(' ')' -- breaking trailing
688      * context.
689      */
690 	"{"{NAME}"}"[[:space:]]?	 {
691 			char *nmdefptr;
692             int end_is_ws, end_ch;
693 
694             end_ch = yytext[yyleng-1];
695             end_is_ws = end_ch != '}' ? 1 : 0;
696 
697  			if(yyleng-1 < MAXLINE)
698          		 {
699 			strncpy( nmstr, yytext + 1, sizeof(nmstr) );
700  			 }
701  			else
702  			 {
703  			   synerr( _("Input line too long\n"));
704  			   FLEX_EXIT(EXIT_FAILURE);
705  			 }
706 nmstr[yyleng - 2 - end_is_ws] = '\0';  /* chop trailing brace */
707 
708 			if ( (nmdefptr = ndlookup( nmstr )) == NULL )
709 				format_synerr(
710 					_( "undefined definition {%s}" ),
711 						nmstr );
712 
713 			else
714 				{ /* push back name surrounded by ()'s */
715 				size_t len = strlen( nmdefptr );
716                 if (end_is_ws)
717                     unput(end_ch);
718 
719 				if ( lex_compat || nmdefptr[0] == '^' ||
720 				     (len > 0 && nmdefptr[len - 1] == '$')
721                      || (end_is_ws && trlcontxt && !sf_skip_ws()))
722 					{ /* don't use ()'s after all */
723 					PUT_BACK_STRING(nmdefptr, 0);
724 
725 					if ( nmdefptr[0] == '^' )
726 						BEGIN(CARETISBOL);
727 					}
728 
729 				else
730 					{
731 					unput(')');
732 					PUT_BACK_STRING(nmdefptr, 0);
733 					unput('(');
734 					}
735 				}
736 			}
737 
738     "/*"        {
739                     if (sf_skip_ws())
740                         yy_push_state(COMMENT_DISCARD);
741                     else{
742                         /* Push back the "*" and return "/" as usual. */
743                         yyless(1);
744                         return '/';
745                     }
746                 }
747 
748     "(?#"       {
749                     if (lex_compat || posix_compat){
750                         /* Push back the "?#" and treat it like a normal parens. */
751                         yyless(1);
752                         sf_push();
753                         return '(';
754                     }
755                     else
756                         yy_push_state(EXTENDED_COMMENT);
757                 }
758     "(?"        {
759                     sf_push();
760                     if (lex_compat || posix_compat)
761                         /* Push back the "?" and treat it like a normal parens. */
762                         yyless(1);
763                     else
764                         BEGIN(GROUP_WITH_PARAMS);
765                     return '(';
766                 }
767     "("         sf_push(); return '(';
768     ")"         {
769                     if (_sf_top_ix > 0) {
770                         sf_pop();
771                         return ')';
772                     } else
773                         synerr(_("unbalanced parenthesis"));
774                 }
775 
776 	[/|*+?.(){}]	return (unsigned char) yytext[0];
777 	.		RETURNCHAR;
778 }
779 
780 
781 <SC>{
782 	{OPTWS}{NL}{OPTWS}	++linenum;	/* Allow blank lines & continuations */
783 	[,*]		return (unsigned char) yytext[0];
784 	">"		BEGIN(SECT2); return '>';
785 	">"/^		BEGIN(CARETISBOL); return '>';
786 	{SCNAME}	RETURNNAME;
787 	.		{
788 			format_synerr( _( "bad <start condition>: %s" ),
789 				yytext );
790 			}
791 }
792 
793 <CARETISBOL>"^"		BEGIN(SECT2); return '^';
794 
795 
796 <QUOTE>{
797 	[^"\n]		RETURNCHAR;
798 	\"		BEGIN(SECT2); return '"';
799 
800 	{NL}		{
801 			synerr( _( "missing quote" ) );
802 			BEGIN(SECT2);
803 			++linenum;
804 			return '"';
805 			}
806 }
807 
808 <GROUP_WITH_PARAMS>{
809     ":"     BEGIN(SECT2);
810     "-"     BEGIN(GROUP_MINUS_PARAMS);
811     i       sf_set_case_ins(1);
812     s       sf_set_dot_all(1);
813     x       sf_set_skip_ws(1);
814 }
815 <GROUP_MINUS_PARAMS>{
816     ":"     BEGIN(SECT2);
817     i       sf_set_case_ins(0);
818     s       sf_set_dot_all(0);
819     x       sf_set_skip_ws(0);
820 }
821 
822 <FIRSTCCL>{
823 	"^"/[^-\]\n]	BEGIN(CCL); return '^';
824 	"^"/("-"|"]")	return '^';
825 	.		BEGIN(CCL); RETURNCHAR;
826 }
827 
828 <CCL>{
829 	-/[^\]\n]	return '-';
830 	[^\]\n]		RETURNCHAR;
831 	"]"		BEGIN(SECT2); return ']';
832 	.|{NL}		{
833 			synerr( _( "bad character class" ) );
834 			BEGIN(SECT2);
835 			return ']';
836 			}
837 }
838 
839 <FIRSTCCL,CCL>{
840 	"[:alnum:]"	BEGIN(CCL); return CCE_ALNUM;
841 	"[:alpha:]"	BEGIN(CCL); return CCE_ALPHA;
842 	"[:blank:]"	BEGIN(CCL); return CCE_BLANK;
843 	"[:cntrl:]"	BEGIN(CCL); return CCE_CNTRL;
844 	"[:digit:]"	BEGIN(CCL); return CCE_DIGIT;
845 	"[:graph:]"	BEGIN(CCL); return CCE_GRAPH;
846 	"[:lower:]"	BEGIN(CCL); return CCE_LOWER;
847 	"[:print:]"	BEGIN(CCL); return CCE_PRINT;
848 	"[:punct:]"	BEGIN(CCL); return CCE_PUNCT;
849 	"[:space:]"	BEGIN(CCL); return CCE_SPACE;
850 	"[:upper:]"	BEGIN(CCL); return CCE_UPPER;
851 	"[:xdigit:]"	BEGIN(CCL); return CCE_XDIGIT;
852 
853 	"[:^alnum:]"	BEGIN(CCL); return CCE_NEG_ALNUM;
854 	"[:^alpha:]"	BEGIN(CCL); return CCE_NEG_ALPHA;
855 	"[:^blank:]"	BEGIN(CCL); return CCE_NEG_BLANK;
856 	"[:^cntrl:]"	BEGIN(CCL); return CCE_NEG_CNTRL;
857 	"[:^digit:]"	BEGIN(CCL); return CCE_NEG_DIGIT;
858 	"[:^graph:]"	BEGIN(CCL); return CCE_NEG_GRAPH;
859 	"[:^lower:]"	BEGIN(CCL); return CCE_NEG_LOWER;
860 	"[:^print:]"	BEGIN(CCL); return CCE_NEG_PRINT;
861 	"[:^punct:]"	BEGIN(CCL); return CCE_NEG_PUNCT;
862 	"[:^space:]"	BEGIN(CCL); return CCE_NEG_SPACE;
863 	"[:^upper:]"	BEGIN(CCL); return CCE_NEG_UPPER;
864 	"[:^xdigit:]"	BEGIN(CCL); return CCE_NEG_XDIGIT;
865 	{CCL_EXPR}	{
866 			format_synerr(
867 				_( "bad character class expression: %s" ),
868 					yytext );
869 			BEGIN(CCL); return CCE_ALNUM;
870 			}
871 }
872 
873 <NUM>{
874 	[[:digit:]]+	{
875 			yylval = myctoi( yytext );
876 			return NUMBER;
877 			}
878 
879 	","		return ',';
880 	"}"		{
881 			BEGIN(SECT2);
882 			if ( lex_compat || posix_compat )
883 				return END_REPEAT_POSIX;
884 			else
885 				return END_REPEAT_FLEX;
886 			}
887 
888 	.		{
889 			synerr( _( "bad character inside {}'s" ) );
890 			BEGIN(SECT2);
891 			return '}';
892 			}
893 
894 	{NL}		{
895 			synerr( _( "missing }" ) );
896 			BEGIN(SECT2);
897 			++linenum;
898 			return '}';
899 			}
900 }
901 
902 
903 <PERCENT_BRACE_ACTION>{
904 	{OPTWS}"%}".*		bracelevel = 0;
905 
906 	<ACTION>"/*"		ACTION_ECHO; yy_push_state( CODE_COMMENT );
907 
908 	<CODEBLOCK,ACTION>{
909 		"reject" {
910             ACTION_ECHO;
911             CHECK_REJECT(yytext);
912         }
913 		"yymore" {
914             ACTION_ECHO;
915             CHECK_YYMORE(yytext);
916         }
917 	}
918 
919     .       ACTION_ECHO;
920 	{NL}	{
921 		++linenum;
922 		ACTION_ECHO;
923 		if (bracelevel <= 0 || (doing_codeblock && indented_code)) {
924             if ( doing_rule_action )
925                 add_action( "\tYY_BREAK]""]\n" );
926 
927             doing_rule_action = doing_codeblock = false;
928             BEGIN(SECT2);
929         }
930     }
931 }
932 
933 
934 	/* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */
935 <ACTION>{
936 	"{"		ACTION_ECHO; ++bracelevel;
937 	"}"		ACTION_ECHO; --bracelevel;
938 	[^[:alpha:]_{}\"'/\n\[\]]+	ACTION_ECHO;
939         {NAME}		ACTION_ECHO;
940         "'"([^\'\\\n]|\\.)"'" ACTION_ECHO; /* character constant */
941         "'"             ACTION_ECHO; BEGIN(CHARACTER_CONSTANT);
942 	\"		ACTION_ECHO; BEGIN(ACTION_STRING);
943 	{NL} {
944                 ++linenum;
945                 ACTION_ECHO;
946                 if (bracelevel <= 0) {
947                    if ( doing_rule_action )
948                       add_action( "\tYY_BREAK]""]\n" );
949 
950                    doing_rule_action = false;
951                    BEGIN(SECT2);
952                 }
953              }
954         .      ACTION_ECHO;
955 }
956 
957 <ACTION_STRING>{
958 	[^\[\]\"\\\n]+	ACTION_ECHO;
959 	\"		ACTION_ECHO; BEGIN(ACTION);
960 }
961 <CHARACTER_CONSTANT>{
962 	[^\[\]\'\\\n]+  ACTION_ECHO;
963         \'              ACTION_ECHO; BEGIN(ACTION);
964 }
965 <ACTION_STRING,CHARACTER_CONSTANT>{
966         (\\\n)*         ACTION_ECHO;
967 	\\(\\\n)*.	ACTION_ECHO;
968 	{NL}	++linenum; ACTION_ECHO; if (bracelevel <= 0) { BEGIN(SECT2); } else { BEGIN(ACTION); }
969         .	ACTION_ECHO;
970 }
971 
972 <COMMENT,CODE_COMMENT,COMMENT_DISCARD,ACTION,ACTION_STRING,CHARACTER_CONSTANT><<EOF>>	{
973 			synerr( _( "EOF encountered inside an action" ) );
974 			yyterminate();
975 			}
976 
977 <EXTENDED_COMMENT,GROUP_WITH_PARAMS,GROUP_MINUS_PARAMS><<EOF>>	{
978 			synerr( _( "EOF encountered inside pattern" ) );
979 			yyterminate();
980 			}
981 
982 <SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ}	{
983 			yylval = myesc( (unsigned char *) yytext );
984 
985 			if ( YY_START == FIRSTCCL )
986 				BEGIN(CCL);
987 
988 			return CHAR;
989 			}
990 
991 <SECT3>{
992     {M4QSTART}   fputs(escaped_qstart, yyout);
993     {M4QEND}     fputs(escaped_qend, yyout);
994     [^\[\]]*     ECHO;
995     [][]         ECHO;
996     <<EOF>>      {
997         sectnum = 0;
998         yyterminate();
999     }
1000 }
1001 <SECT3_NOESCAPE>{
1002     {M4QSTART}  fprintf(yyout, "[""[%s]""]", escaped_qstart);
1003     {M4QEND}    fprintf(yyout, "[""[%s]""]", escaped_qend);
1004     [^][]*      ECHO;
1005     [][]        ECHO;
1006     <<EOF>>		{
1007        sectnum = 0;
1008        yyterminate();
1009     }
1010 }
1011 <*>.|\n			format_synerr( _( "bad character: %s" ), yytext );
1012 
1013 %%
1014 
1015 
1016 int yywrap(void)
1017 	{
1018 	if ( --num_input_files > 0 )
1019 		{
1020 		set_input_file( *++input_files );
1021 		return 0;
1022 		}
1023 
1024 	else
1025 		return 1;
1026 	}
1027 
1028 
1029 /* set_input_file - open the given file (if NULL, stdin) for scanning */
1030 
1031 void set_input_file( char *file )
1032 	{
1033 	if ( file && strcmp( file, "-" ) )
1034 		{
1035 		infilename = xstrdup(file);
1036 		yyin = fopen( infilename, "r" );
1037 
1038 		if ( yyin == NULL )
1039 			lerr( _( "can't open %s" ), file );
1040 		}
1041 
1042 	else
1043 		{
1044 		yyin = stdin;
1045 		infilename = xstrdup("<stdin>");
1046 		}
1047 
1048 	linenum = 1;
1049 	}
1050