xref: /titanic_51/usr/src/cmd/oawk/awk.lx.l (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1*7c478bd9Sstevel@tonic-gate %{
2*7c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
3*7c478bd9Sstevel@tonic-gate %}
4*7c478bd9Sstevel@tonic-gate %{
5*7c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
6*7c478bd9Sstevel@tonic-gate %}
7*7c478bd9Sstevel@tonic-gate %{
8*7c478bd9Sstevel@tonic-gate /*
9*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
10*7c478bd9Sstevel@tonic-gate  *
11*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
12*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
13*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
14*7c478bd9Sstevel@tonic-gate  * with the License.
15*7c478bd9Sstevel@tonic-gate  *
16*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
17*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
18*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
19*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
20*7c478bd9Sstevel@tonic-gate  *
21*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
22*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
23*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
24*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
25*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
26*7c478bd9Sstevel@tonic-gate  *
27*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
28*7c478bd9Sstevel@tonic-gate  */
29*7c478bd9Sstevel@tonic-gate /* Copyright (c) 1996, by Sun Microsystems, Inc.                */
30*7c478bd9Sstevel@tonic-gate /* All rights reserved.                                         */
31*7c478bd9Sstevel@tonic-gate %}
32*7c478bd9Sstevel@tonic-gate 
33*7c478bd9Sstevel@tonic-gate %{
34*7c478bd9Sstevel@tonic-gate #ident	"%Z%%M%	%I%	%E% SMI"
35*7c478bd9Sstevel@tonic-gate %}
36*7c478bd9Sstevel@tonic-gate 
37*7c478bd9Sstevel@tonic-gate %Start A str chc sc reg comment
38*7c478bd9Sstevel@tonic-gate 
39*7c478bd9Sstevel@tonic-gate %{
40*7c478bd9Sstevel@tonic-gate #include	"awk.h"
41*7c478bd9Sstevel@tonic-gate #include	"awk.def"
42*7c478bd9Sstevel@tonic-gate #undef	input	/* defeat lex */
43*7c478bd9Sstevel@tonic-gate extern int	yylval;
44*7c478bd9Sstevel@tonic-gate extern int	mustfld;
45*7c478bd9Sstevel@tonic-gate 
46*7c478bd9Sstevel@tonic-gate long long	lineno	= 1;
47*7c478bd9Sstevel@tonic-gate #ifdef	DEBUG
48*7c478bd9Sstevel@tonic-gate #	define	RETURN(x)	{if (dbg) ptoken(x); return (x); }
49*7c478bd9Sstevel@tonic-gate #else
50*7c478bd9Sstevel@tonic-gate #	define	RETURN(x)	return (x)
51*7c478bd9Sstevel@tonic-gate #endif
52*7c478bd9Sstevel@tonic-gate #define	CADD	{ cbuf[clen++]=yytext[0]; if (clen>=CBUFLEN-1) { yyerror(\
53*7c478bd9Sstevel@tonic-gate 		"string too long", cbuf); BEGIN A; } }
54*7c478bd9Sstevel@tonic-gate #define	CBUFLEN	150
55*7c478bd9Sstevel@tonic-gate wchar_t cbuf[CBUFLEN];
56*7c478bd9Sstevel@tonic-gate int	clen, cflag;
57*7c478bd9Sstevel@tonic-gate %}
58*7c478bd9Sstevel@tonic-gate 
59*7c478bd9Sstevel@tonic-gate %a	50000
60*7c478bd9Sstevel@tonic-gate %o	50000
61*7c478bd9Sstevel@tonic-gate 
62*7c478bd9Sstevel@tonic-gate A	[a-zA-Z_]
63*7c478bd9Sstevel@tonic-gate B	[a-zA-Z0-9_]
64*7c478bd9Sstevel@tonic-gate D	[0-9]
65*7c478bd9Sstevel@tonic-gate WS	[ \t]
66*7c478bd9Sstevel@tonic-gate 
67*7c478bd9Sstevel@tonic-gate %%
68*7c478bd9Sstevel@tonic-gate 	switch (yybgin-yysvec-1) {	/* witchcraft */
69*7c478bd9Sstevel@tonic-gate 	case 0:
70*7c478bd9Sstevel@tonic-gate 		BEGIN A;
71*7c478bd9Sstevel@tonic-gate 		break;
72*7c478bd9Sstevel@tonic-gate 	case sc:
73*7c478bd9Sstevel@tonic-gate 		BEGIN A;
74*7c478bd9Sstevel@tonic-gate 		RETURN('}');
75*7c478bd9Sstevel@tonic-gate 	}
76*7c478bd9Sstevel@tonic-gate 
77*7c478bd9Sstevel@tonic-gate <A>^\n		lineno++;
78*7c478bd9Sstevel@tonic-gate <A>^{WS}*#.*\n	lineno++;	/* strip comment lines */
79*7c478bd9Sstevel@tonic-gate <A>{WS}		/* dummy for cstyle */;
80*7c478bd9Sstevel@tonic-gate <A>"\\"\n	lineno++;
81*7c478bd9Sstevel@tonic-gate <reg>"\\"\n	lineno++;
82*7c478bd9Sstevel@tonic-gate <A>"||"		RETURN(BOR);
83*7c478bd9Sstevel@tonic-gate <A>BEGIN	RETURN(XBEGIN);
84*7c478bd9Sstevel@tonic-gate <A>END		RETURN(XEND);
85*7c478bd9Sstevel@tonic-gate <A>PROGEND	RETURN(EOF);
86*7c478bd9Sstevel@tonic-gate <A>"&&"		RETURN(AND);
87*7c478bd9Sstevel@tonic-gate <A>"!"		RETURN(NOT);
88*7c478bd9Sstevel@tonic-gate <A>"!="		{ yylval = NE; RETURN(RELOP); }
89*7c478bd9Sstevel@tonic-gate <A>"~"		{ yylval = MATCH; RETURN(MATCHOP); }
90*7c478bd9Sstevel@tonic-gate <A>"!~"		{ yylval = NOTMATCH; RETURN(MATCHOP); }
91*7c478bd9Sstevel@tonic-gate <A>"<"		{ yylval = LT; RETURN(RELOP); }
92*7c478bd9Sstevel@tonic-gate <A>"<="		{ yylval = LE; RETURN(RELOP); }
93*7c478bd9Sstevel@tonic-gate <A>"=="		{ yylval = EQ; RETURN(RELOP); }
94*7c478bd9Sstevel@tonic-gate <A>">="		{ yylval = GE; RETURN(RELOP); }
95*7c478bd9Sstevel@tonic-gate <A>">"		{ yylval = GT; RETURN(RELOP); }
96*7c478bd9Sstevel@tonic-gate <A>">>"		{ yylval = APPEND; RETURN(RELOP); }
97*7c478bd9Sstevel@tonic-gate <A>"++"		{ yylval = INCR; RETURN(INCR); }
98*7c478bd9Sstevel@tonic-gate <A>"--"		{ yylval = DECR; RETURN(DECR); }
99*7c478bd9Sstevel@tonic-gate <A>"+="		{ yylval = ADDEQ; RETURN(ASGNOP); }
100*7c478bd9Sstevel@tonic-gate <A>"-="		{ yylval = SUBEQ; RETURN(ASGNOP); }
101*7c478bd9Sstevel@tonic-gate <A>"*="		{ yylval = MULTEQ; RETURN(ASGNOP); }
102*7c478bd9Sstevel@tonic-gate <A>"/="		{ yylval = DIVEQ; RETURN(ASGNOP); }
103*7c478bd9Sstevel@tonic-gate <A>"%="		{ yylval = MODEQ; RETURN(ASGNOP); }
104*7c478bd9Sstevel@tonic-gate <A>"="		{ yylval = ASSIGN; RETURN(ASGNOP); }
105*7c478bd9Sstevel@tonic-gate 
106*7c478bd9Sstevel@tonic-gate <A>"$"{D}+	{
107*7c478bd9Sstevel@tonic-gate 		static wchar_t L_record[] = L"$record";
108*7c478bd9Sstevel@tonic-gate 		if (watoi(yytext+1)==0) {
109*7c478bd9Sstevel@tonic-gate 				yylval = (int)lookup(L_record, symtab, 0);
110*7c478bd9Sstevel@tonic-gate 				RETURN(STRING);
111*7c478bd9Sstevel@tonic-gate 			} else {
112*7c478bd9Sstevel@tonic-gate 				yylval = fieldadr(watoi(yytext+1));
113*7c478bd9Sstevel@tonic-gate 				RETURN(FIELD);
114*7c478bd9Sstevel@tonic-gate 			}
115*7c478bd9Sstevel@tonic-gate 		}
116*7c478bd9Sstevel@tonic-gate <A>"$"{WS}*	{ RETURN(INDIRECT); }
117*7c478bd9Sstevel@tonic-gate <A>NF		{ mustfld=1;
118*7c478bd9Sstevel@tonic-gate 		yylval = (int)setsymtab(yytext, NULL, 0.0, NUM, symtab);
119*7c478bd9Sstevel@tonic-gate 		RETURN(VAR); }
120*7c478bd9Sstevel@tonic-gate <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?	{
121*7c478bd9Sstevel@tonic-gate 		yylval = (int)setsymtab(yytext, NULL, watof(yytext),
122*7c478bd9Sstevel@tonic-gate 			CON|NUM, symtab); RETURN(NUMBER); }
123*7c478bd9Sstevel@tonic-gate <A>"}"{WS}*\n	{ BEGIN sc; lineno++; RETURN(';'); }
124*7c478bd9Sstevel@tonic-gate <A>"}"		{ BEGIN sc; RETURN(';'); }
125*7c478bd9Sstevel@tonic-gate <A>";"\n		{ lineno++; RETURN(';'); }
126*7c478bd9Sstevel@tonic-gate <A>\n		{ lineno++; RETURN(NL); }
127*7c478bd9Sstevel@tonic-gate <A>while	RETURN(WHILE);
128*7c478bd9Sstevel@tonic-gate <A>for		RETURN(FOR);
129*7c478bd9Sstevel@tonic-gate <A>if		RETURN(IF);
130*7c478bd9Sstevel@tonic-gate <A>else		RETURN(ELSE);
131*7c478bd9Sstevel@tonic-gate <A>next		RETURN(NEXT);
132*7c478bd9Sstevel@tonic-gate <A>exit		RETURN(EXIT);
133*7c478bd9Sstevel@tonic-gate <A>break	RETURN(BREAK);
134*7c478bd9Sstevel@tonic-gate <A>continue	RETURN(CONTINUE);
135*7c478bd9Sstevel@tonic-gate <A>print	{ yylval = PRINT; RETURN(PRINT); }
136*7c478bd9Sstevel@tonic-gate <A>printf	{ yylval = PRINTF; RETURN(PRINTF); }
137*7c478bd9Sstevel@tonic-gate <A>sprintf	{ yylval = SPRINTF; RETURN(SPRINTF); }
138*7c478bd9Sstevel@tonic-gate <A>split	{ yylval = SPLIT; RETURN(SPLIT); }
139*7c478bd9Sstevel@tonic-gate <A>substr	RETURN(SUBSTR);
140*7c478bd9Sstevel@tonic-gate <A>index	RETURN(INDEX);
141*7c478bd9Sstevel@tonic-gate <A>in		RETURN(IN);
142*7c478bd9Sstevel@tonic-gate <A>getline	RETURN(GETLINE);
143*7c478bd9Sstevel@tonic-gate <A>length	{ yylval = FLENGTH; RETURN(FNCN); }
144*7c478bd9Sstevel@tonic-gate <A>log		{ yylval = FLOG; RETURN(FNCN); }
145*7c478bd9Sstevel@tonic-gate <A>int		{ yylval = FINT; RETURN(FNCN); }
146*7c478bd9Sstevel@tonic-gate <A>exp		{ yylval = FEXP; RETURN(FNCN); }
147*7c478bd9Sstevel@tonic-gate <A>sqrt		{ yylval = FSQRT; RETURN(FNCN); }
148*7c478bd9Sstevel@tonic-gate <A>{A}{B}*	{
149*7c478bd9Sstevel@tonic-gate 		static wchar_t L_0[] = { 0 };
150*7c478bd9Sstevel@tonic-gate 		yylval = (int)setsymtab(yytext, tostring(L_0), 0.0, STR|NUM,
151*7c478bd9Sstevel@tonic-gate 			symtab);
152*7c478bd9Sstevel@tonic-gate 		RETURN(VAR);
153*7c478bd9Sstevel@tonic-gate 		}
154*7c478bd9Sstevel@tonic-gate <A>\"		{ BEGIN str; clen=0; }
155*7c478bd9Sstevel@tonic-gate 
156*7c478bd9Sstevel@tonic-gate <A>#		{ BEGIN comment; }
157*7c478bd9Sstevel@tonic-gate <comment>\n	{ BEGIN A; lineno++; RETURN(NL); }
158*7c478bd9Sstevel@tonic-gate <comment>.	/* dummy */;
159*7c478bd9Sstevel@tonic-gate 
160*7c478bd9Sstevel@tonic-gate <A>.		{ yylval = yytext[0]; RETURN(yytext[0]); }
161*7c478bd9Sstevel@tonic-gate 
162*7c478bd9Sstevel@tonic-gate <reg>"["	{ BEGIN chc; clen=0; cflag=0; }
163*7c478bd9Sstevel@tonic-gate <reg>"[^"	{ BEGIN chc; clen=0; cflag=1; }
164*7c478bd9Sstevel@tonic-gate 
165*7c478bd9Sstevel@tonic-gate <reg>"?"	RETURN(QUEST);
166*7c478bd9Sstevel@tonic-gate <reg>"+"	RETURN(PLUS);
167*7c478bd9Sstevel@tonic-gate <reg>"*"	RETURN(STAR);
168*7c478bd9Sstevel@tonic-gate <reg>"|"	RETURN(OR);
169*7c478bd9Sstevel@tonic-gate <reg>"."	RETURN(DOT);
170*7c478bd9Sstevel@tonic-gate <reg>"("	RETURN('(');
171*7c478bd9Sstevel@tonic-gate <reg>")"	RETURN(')');
172*7c478bd9Sstevel@tonic-gate <reg>"^"	RETURN('^');
173*7c478bd9Sstevel@tonic-gate <reg>"$"	RETURN('$');
174*7c478bd9Sstevel@tonic-gate <reg>\\{D}{D}{D}	{ wsscanf(yytext+1, "%o", &yylval); RETURN(CHAR); }
175*7c478bd9Sstevel@tonic-gate <reg>\\.	{	if (yytext[1]=='n') yylval = '\n';
176*7c478bd9Sstevel@tonic-gate 			else if (yytext[1] == 't') yylval = '\t';
177*7c478bd9Sstevel@tonic-gate 			else if (yytext[1] == 'b') yylval = '\b';
178*7c478bd9Sstevel@tonic-gate 			else if (yytext[1] == 'r') yylval = '\r';
179*7c478bd9Sstevel@tonic-gate 			else if (yytext[1] == 'f') yylval = '\f';
180*7c478bd9Sstevel@tonic-gate 			else yylval = yytext[1];
181*7c478bd9Sstevel@tonic-gate 			RETURN(CHAR);
182*7c478bd9Sstevel@tonic-gate 		}
183*7c478bd9Sstevel@tonic-gate <reg>"/"	{ BEGIN A; unput('/'); }
184*7c478bd9Sstevel@tonic-gate <reg>\n		{ yyerror("newline in regular expression"); lineno++; BEGIN A; }
185*7c478bd9Sstevel@tonic-gate <reg>.		{ yylval = yytext[0]; RETURN(CHAR); }
186*7c478bd9Sstevel@tonic-gate 
187*7c478bd9Sstevel@tonic-gate <str>\"		{ wchar_t *s; BEGIN A; cbuf[clen]=0; s = tostring(cbuf);
188*7c478bd9Sstevel@tonic-gate 		cbuf[clen] = ' '; cbuf[++clen] = 0;
189*7c478bd9Sstevel@tonic-gate 		yylval = (int)setsymtab(cbuf, s, 0.0, CON|STR, symtab);
190*7c478bd9Sstevel@tonic-gate 		RETURN(STRING); }
191*7c478bd9Sstevel@tonic-gate <str>\n		{ yyerror("newline in string"); lineno++; BEGIN A; }
192*7c478bd9Sstevel@tonic-gate <str>"\\\""	{ cbuf[clen++]='"'; }
193*7c478bd9Sstevel@tonic-gate <str>"\\"n	{ cbuf[clen++]='\n'; }
194*7c478bd9Sstevel@tonic-gate <chc>"\\"n	{ cbuf[clen++]='\n'; }
195*7c478bd9Sstevel@tonic-gate <str>"\\"t	{ cbuf[clen++]='\t'; }
196*7c478bd9Sstevel@tonic-gate <chc>"\\"t	{ cbuf[clen++]='\t'; }
197*7c478bd9Sstevel@tonic-gate <str>"\\"b	{ cbuf[clen++]='\b'; }
198*7c478bd9Sstevel@tonic-gate <chc>"\\"b	{ cbuf[clen++]='\b'; }
199*7c478bd9Sstevel@tonic-gate <str>"\\"r	{ cbuf[clen++]='\r'; }
200*7c478bd9Sstevel@tonic-gate <chc>"\\"r	{ cbuf[clen++]='\r'; }
201*7c478bd9Sstevel@tonic-gate <str>"\\"f 	{ cbuf[clen++]='\f'; }
202*7c478bd9Sstevel@tonic-gate <chc>"\\"f 	{ cbuf[clen++]='\f'; }
203*7c478bd9Sstevel@tonic-gate <str>"\\\\"	{ cbuf[clen++]='\\'; }
204*7c478bd9Sstevel@tonic-gate <chc>"\\\\"	{ cbuf[clen++]='\\'; }
205*7c478bd9Sstevel@tonic-gate <str>.		{ CADD; }
206*7c478bd9Sstevel@tonic-gate 
207*7c478bd9Sstevel@tonic-gate <chc>"\\""]"	{ cbuf[clen++]=']'; }
208*7c478bd9Sstevel@tonic-gate <chc>"]"	{ BEGIN reg; cbuf[clen]=0; yylval = (int)tostring(cbuf);
209*7c478bd9Sstevel@tonic-gate 		if (cflag==0) { RETURN(CCL); }
210*7c478bd9Sstevel@tonic-gate 		else { RETURN(NCCL); } }
211*7c478bd9Sstevel@tonic-gate <chc>\n		{ yyerror("newline in character class"); lineno++; BEGIN A; }
212*7c478bd9Sstevel@tonic-gate <chc>.		{ CADD; }
213*7c478bd9Sstevel@tonic-gate 
214*7c478bd9Sstevel@tonic-gate %%
215*7c478bd9Sstevel@tonic-gate 
216*7c478bd9Sstevel@tonic-gate input()
217*7c478bd9Sstevel@tonic-gate {
218*7c478bd9Sstevel@tonic-gate 	register c;
219*7c478bd9Sstevel@tonic-gate 	extern wchar_t *lexprog;
220*7c478bd9Sstevel@tonic-gate 
221*7c478bd9Sstevel@tonic-gate 	if (yysptr > yysbuf)
222*7c478bd9Sstevel@tonic-gate 		c = U(*--yysptr);
223*7c478bd9Sstevel@tonic-gate 	else if (yyin == NULL)
224*7c478bd9Sstevel@tonic-gate 		c = *lexprog++;
225*7c478bd9Sstevel@tonic-gate 	else
226*7c478bd9Sstevel@tonic-gate 		c = getwc(yyin);
227*7c478bd9Sstevel@tonic-gate 	if (c == '\n')
228*7c478bd9Sstevel@tonic-gate 		yylineno++;
229*7c478bd9Sstevel@tonic-gate 	else if (c == EOF)
230*7c478bd9Sstevel@tonic-gate 		c = 0;
231*7c478bd9Sstevel@tonic-gate 	return (c);
232*7c478bd9Sstevel@tonic-gate }
233*7c478bd9Sstevel@tonic-gate 
234*7c478bd9Sstevel@tonic-gate startreg()
235*7c478bd9Sstevel@tonic-gate {
236*7c478bd9Sstevel@tonic-gate 	BEGIN reg;
237*7c478bd9Sstevel@tonic-gate }
238