xref: /titanic_52/usr/src/cmd/oawk/awk.lx.l (revision bdfc6d18da790deeec2e0eb09c625902defe2498)
1 %{
2 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
3 %}
4 %{
5 /*	  All Rights Reserved  	*/
6 %}
7 %{
8 /*
9  * CDDL HEADER START
10  *
11  * The contents of this file are subject to the terms of the
12  * Common Development and Distribution License, Version 1.0 only
13  * (the "License").  You may not use this file except in compliance
14  * with the License.
15  *
16  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
17  * or http://www.opensolaris.org/os/licensing.
18  * See the License for the specific language governing permissions
19  * and limitations under the License.
20  *
21  * When distributing Covered Code, include this CDDL HEADER in each
22  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
23  * If applicable, add the following below this CDDL HEADER, with the
24  * fields enclosed by brackets "[]" replaced with your own identifying
25  * information: Portions Copyright [yyyy] [name of copyright owner]
26  *
27  * CDDL HEADER END
28  */
29 /* Copyright (c) 1996, by Sun Microsystems, Inc.                */
30 /* All rights reserved.                                         */
31 %}
32 
33 %{
34 #ident	"%Z%%M%	%I%	%E% SMI"
35 %}
36 
37 %Start A str chc sc reg comment
38 
39 %{
40 #include	"awk.h"
41 #include	"awk.def"
42 #undef	input	/* defeat lex */
43 extern int	yylval;
44 extern int	mustfld;
45 
46 long long	lineno	= 1;
47 #ifdef	DEBUG
48 #	define	RETURN(x)	{if (dbg) ptoken(x); return (x); }
49 #else
50 #	define	RETURN(x)	return (x)
51 #endif
52 #define	CADD	{ cbuf[clen++]=yytext[0]; if (clen>=CBUFLEN-1) { yyerror(\
53 		"string too long", cbuf); BEGIN A; } }
54 #define	CBUFLEN	150
55 wchar_t cbuf[CBUFLEN];
56 int	clen, cflag;
57 %}
58 
59 %a	50000
60 %o	50000
61 
62 A	[a-zA-Z_]
63 B	[a-zA-Z0-9_]
64 D	[0-9]
65 WS	[ \t]
66 
67 %%
68 	switch (yybgin-yysvec-1) {	/* witchcraft */
69 	case 0:
70 		BEGIN A;
71 		break;
72 	case sc:
73 		BEGIN A;
74 		RETURN('}');
75 	}
76 
77 <A>^\n		lineno++;
78 <A>^{WS}*#.*\n	lineno++;	/* strip comment lines */
79 <A>{WS}		/* dummy for cstyle */;
80 <A>"\\"\n	lineno++;
81 <reg>"\\"\n	lineno++;
82 <A>"||"		RETURN(BOR);
83 <A>BEGIN	RETURN(XBEGIN);
84 <A>END		RETURN(XEND);
85 <A>PROGEND	RETURN(EOF);
86 <A>"&&"		RETURN(AND);
87 <A>"!"		RETURN(NOT);
88 <A>"!="		{ yylval = NE; RETURN(RELOP); }
89 <A>"~"		{ yylval = MATCH; RETURN(MATCHOP); }
90 <A>"!~"		{ yylval = NOTMATCH; RETURN(MATCHOP); }
91 <A>"<"		{ yylval = LT; RETURN(RELOP); }
92 <A>"<="		{ yylval = LE; RETURN(RELOP); }
93 <A>"=="		{ yylval = EQ; RETURN(RELOP); }
94 <A>">="		{ yylval = GE; RETURN(RELOP); }
95 <A>">"		{ yylval = GT; RETURN(RELOP); }
96 <A>">>"		{ yylval = APPEND; RETURN(RELOP); }
97 <A>"++"		{ yylval = INCR; RETURN(INCR); }
98 <A>"--"		{ yylval = DECR; RETURN(DECR); }
99 <A>"+="		{ yylval = ADDEQ; RETURN(ASGNOP); }
100 <A>"-="		{ yylval = SUBEQ; RETURN(ASGNOP); }
101 <A>"*="		{ yylval = MULTEQ; RETURN(ASGNOP); }
102 <A>"/="		{ yylval = DIVEQ; RETURN(ASGNOP); }
103 <A>"%="		{ yylval = MODEQ; RETURN(ASGNOP); }
104 <A>"="		{ yylval = ASSIGN; RETURN(ASGNOP); }
105 
106 <A>"$"{D}+	{
107 		static wchar_t L_record[] = L"$record";
108 		if (watoi(yytext+1)==0) {
109 				yylval = (int)lookup(L_record, symtab, 0);
110 				RETURN(STRING);
111 			} else {
112 				yylval = fieldadr(watoi(yytext+1));
113 				RETURN(FIELD);
114 			}
115 		}
116 <A>"$"{WS}*	{ RETURN(INDIRECT); }
117 <A>NF		{ mustfld=1;
118 		yylval = (int)setsymtab(yytext, NULL, 0.0, NUM, symtab);
119 		RETURN(VAR); }
120 <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?	{
121 		yylval = (int)setsymtab(yytext, NULL, watof(yytext),
122 			CON|NUM, symtab); RETURN(NUMBER); }
123 <A>"}"{WS}*\n	{ BEGIN sc; lineno++; RETURN(';'); }
124 <A>"}"		{ BEGIN sc; RETURN(';'); }
125 <A>";"\n		{ lineno++; RETURN(';'); }
126 <A>\n		{ lineno++; RETURN(NL); }
127 <A>while	RETURN(WHILE);
128 <A>for		RETURN(FOR);
129 <A>if		RETURN(IF);
130 <A>else		RETURN(ELSE);
131 <A>next		RETURN(NEXT);
132 <A>exit		RETURN(EXIT);
133 <A>break	RETURN(BREAK);
134 <A>continue	RETURN(CONTINUE);
135 <A>print	{ yylval = PRINT; RETURN(PRINT); }
136 <A>printf	{ yylval = PRINTF; RETURN(PRINTF); }
137 <A>sprintf	{ yylval = SPRINTF; RETURN(SPRINTF); }
138 <A>split	{ yylval = SPLIT; RETURN(SPLIT); }
139 <A>substr	RETURN(SUBSTR);
140 <A>index	RETURN(INDEX);
141 <A>in		RETURN(IN);
142 <A>getline	RETURN(GETLINE);
143 <A>length	{ yylval = FLENGTH; RETURN(FNCN); }
144 <A>log		{ yylval = FLOG; RETURN(FNCN); }
145 <A>int		{ yylval = FINT; RETURN(FNCN); }
146 <A>exp		{ yylval = FEXP; RETURN(FNCN); }
147 <A>sqrt		{ yylval = FSQRT; RETURN(FNCN); }
148 <A>{A}{B}*	{
149 		static wchar_t L_0[] = { 0 };
150 		yylval = (int)setsymtab(yytext, tostring(L_0), 0.0, STR|NUM,
151 			symtab);
152 		RETURN(VAR);
153 		}
154 <A>\"		{ BEGIN str; clen=0; }
155 
156 <A>#		{ BEGIN comment; }
157 <comment>\n	{ BEGIN A; lineno++; RETURN(NL); }
158 <comment>.	/* dummy */;
159 
160 <A>.		{ yylval = yytext[0]; RETURN(yytext[0]); }
161 
162 <reg>"["	{ BEGIN chc; clen=0; cflag=0; }
163 <reg>"[^"	{ BEGIN chc; clen=0; cflag=1; }
164 
165 <reg>"?"	RETURN(QUEST);
166 <reg>"+"	RETURN(PLUS);
167 <reg>"*"	RETURN(STAR);
168 <reg>"|"	RETURN(OR);
169 <reg>"."	RETURN(DOT);
170 <reg>"("	RETURN('(');
171 <reg>")"	RETURN(')');
172 <reg>"^"	RETURN('^');
173 <reg>"$"	RETURN('$');
174 <reg>\\{D}{D}{D}	{ wsscanf(yytext+1, "%o", &yylval); RETURN(CHAR); }
175 <reg>\\.	{	if (yytext[1]=='n') yylval = '\n';
176 			else if (yytext[1] == 't') yylval = '\t';
177 			else if (yytext[1] == 'b') yylval = '\b';
178 			else if (yytext[1] == 'r') yylval = '\r';
179 			else if (yytext[1] == 'f') yylval = '\f';
180 			else yylval = yytext[1];
181 			RETURN(CHAR);
182 		}
183 <reg>"/"	{ BEGIN A; unput('/'); }
184 <reg>\n		{ yyerror("newline in regular expression"); lineno++; BEGIN A; }
185 <reg>.		{ yylval = yytext[0]; RETURN(CHAR); }
186 
187 <str>\"		{ wchar_t *s; BEGIN A; cbuf[clen]=0; s = tostring(cbuf);
188 		cbuf[clen] = ' '; cbuf[++clen] = 0;
189 		yylval = (int)setsymtab(cbuf, s, 0.0, CON|STR, symtab);
190 		RETURN(STRING); }
191 <str>\n		{ yyerror("newline in string"); lineno++; BEGIN A; }
192 <str>"\\\""	{ cbuf[clen++]='"'; }
193 <str>"\\"n	{ cbuf[clen++]='\n'; }
194 <chc>"\\"n	{ cbuf[clen++]='\n'; }
195 <str>"\\"t	{ cbuf[clen++]='\t'; }
196 <chc>"\\"t	{ cbuf[clen++]='\t'; }
197 <str>"\\"b	{ cbuf[clen++]='\b'; }
198 <chc>"\\"b	{ cbuf[clen++]='\b'; }
199 <str>"\\"r	{ cbuf[clen++]='\r'; }
200 <chc>"\\"r	{ cbuf[clen++]='\r'; }
201 <str>"\\"f 	{ cbuf[clen++]='\f'; }
202 <chc>"\\"f 	{ cbuf[clen++]='\f'; }
203 <str>"\\\\"	{ cbuf[clen++]='\\'; }
204 <chc>"\\\\"	{ cbuf[clen++]='\\'; }
205 <str>.		{ CADD; }
206 
207 <chc>"\\""]"	{ cbuf[clen++]=']'; }
208 <chc>"]"	{ BEGIN reg; cbuf[clen]=0; yylval = (int)tostring(cbuf);
209 		if (cflag==0) { RETURN(CCL); }
210 		else { RETURN(NCCL); } }
211 <chc>\n		{ yyerror("newline in character class"); lineno++; BEGIN A; }
212 <chc>.		{ CADD; }
213 
214 %%
215 
216 input()
217 {
218 	register c;
219 	extern wchar_t *lexprog;
220 
221 	if (yysptr > yysbuf)
222 		c = U(*--yysptr);
223 	else if (yyin == NULL)
224 		c = *lexprog++;
225 	else
226 		c = getwc(yyin);
227 	if (c == '\n')
228 		yylineno++;
229 	else if (c == EOF)
230 		c = 0;
231 	return (c);
232 }
233 
234 startreg()
235 {
236 	BEGIN reg;
237 }
238