xref: /titanic_52/usr/src/cmd/awk/awk.lx.l (revision 1a7c1b724419d3cb5fa6eea75123c6b2060ba31b)
1 %{
2 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
3 %}
4 %{
5 /*	  All Rights Reserved  	*/
6 %}
7 
8 %{
9 /*								*/
10 %}
11 
12 %{
13 /*
14  * CDDL HEADER START
15  *
16  * The contents of this file are subject to the terms of the
17  * Common Development and Distribution License, Version 1.0 only
18  * (the "License").  You may not use this file except in compliance
19  * with the License.
20  *
21  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
22  * or http://www.opensolaris.org/os/licensing.
23  * See the License for the specific language governing permissions
24  * and limitations under the License.
25  *
26  * When distributing Covered Code, include this CDDL HEADER in each
27  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
28  * If applicable, add the following below this CDDL HEADER, with the
29  * fields enclosed by brackets "[]" replaced with your own identifying
30  * information: Portions Copyright [yyyy] [name of copyright owner]
31  *
32  * CDDL HEADER END
33  */
34 /* Copyright (c) 1996, 2001 by Sun Microsystems, Inc.           */
35 /* All rights reserved.                                         */
36 %}
37 %{
38 /*								*/
39 %}
40 
41 %{
42 #ident	"%Z%%M%	%I%	%E% SMI"	/* SVr4.0 2.9	*/
43 %}
44 
45 %Start A str sc reg comment
46 
47 %{
48 
49 #include	<sys/types.h>
50 #include	"awk.h"
51 #include	"y.tab.h"
52 
53 #undef	input	/* defeat lex */
54 #undef	unput
55 
56 extern YYSTYPE	yylval;
57 extern int	infunc;
58 
59 off_t	lineno	= 1;
60 int	bracecnt = 0;
61 int	brackcnt  = 0;
62 int	parencnt = 0;
63 #define DEBUG
64 #ifdef	DEBUG
65 #	define	RET(x)	{if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); }
66 #else
67 #	define	RET(x)	return(x)
68 #endif
69 
70 #define	CADD	cbuf[clen++] = yytext[0]; \
71 		if (clen >= RECSIZE-1) { \
72 			ERROR "string/reg expr %.10s... too long", cbuf SYNTAX; \
73 			BEGIN A; \
74 		}
75 
76 uchar	cbuf[RECSIZE];
77 uchar	*s;
78 int	clen, cflag;
79 %}
80 
81 A	[a-zA-Z_]
82 B	[a-zA-Z0-9_]
83 D	[0-9]
84 O	[0-7]
85 H	[0-9a-fA-F]
86 WS	[ \t]
87 
88 %%
89 	switch (yybgin-yysvec-1) {	/* witchcraft */
90 	case 0:
91 		BEGIN A;
92 		break;
93 	case sc:
94 		BEGIN A;
95 		RET('}');
96 	}
97 
98 <A>\n		{ lineno++; RET(NL); }
99 <A>#.*		{ ; }	/* strip comments */
100 <A>{WS}+	{ ; }
101 <A>;		{ RET(';'); }
102 
103 <A>"\\"\n	{ lineno++; }
104 <A>BEGIN	{ RET(XBEGIN); }
105 <A>END		{ RET(XEND); }
106 <A>func(tion)?	{ if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); }
107 <A>return	{ if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); }
108 <A>"&&"		{ RET(AND); }
109 <A>"||"		{ RET(BOR); }
110 <A>"!"		{ RET(NOT); }
111 <A>"!="		{ yylval.i = NE; RET(NE); }
112 <A>"~"		{ yylval.i = MATCH; RET(MATCHOP); }
113 <A>"!~"		{ yylval.i = NOTMATCH; RET(MATCHOP); }
114 <A>"<"		{ yylval.i = LT; RET(LT); }
115 <A>"<="		{ yylval.i = LE; RET(LE); }
116 <A>"=="		{ yylval.i = EQ; RET(EQ); }
117 <A>">="		{ yylval.i = GE; RET(GE); }
118 <A>">"		{ yylval.i = GT; RET(GT); }
119 <A>">>"		{ yylval.i = APPEND; RET(APPEND); }
120 <A>"++"		{ yylval.i = INCR; RET(INCR); }
121 <A>"--"		{ yylval.i = DECR; RET(DECR); }
122 <A>"+="		{ yylval.i = ADDEQ; RET(ASGNOP); }
123 <A>"-="		{ yylval.i = SUBEQ; RET(ASGNOP); }
124 <A>"*="		{ yylval.i = MULTEQ; RET(ASGNOP); }
125 <A>"/="		{ yylval.i = DIVEQ; RET(ASGNOP); }
126 <A>"%="		{ yylval.i = MODEQ; RET(ASGNOP); }
127 <A>"^="		{ yylval.i = POWEQ; RET(ASGNOP); }
128 <A>"**="	{ yylval.i = POWEQ; RET(ASGNOP); }
129 <A>"="		{ yylval.i = ASSIGN; RET(ASGNOP); }
130 <A>"**"		{ RET(POWER); }
131 <A>"^"		{ RET(POWER); }
132 
133 <A>"$"{D}+	{ yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); }
134 <A>"$NF"	{ unputstr("(NF)"); return(INDIRECT); }
135 <A>"$"{A}{B}*	{ int c, n;
136 		  c = input(); unput(c);
137 		  if (c == '(' || c == '[' || infunc && (n=isarg(yytext+1)) >= 0) {
138 			unputstr(yytext+1);
139 			return(INDIRECT);
140 		  } else {
141 			yylval.cp = setsymtab(yytext+1,"",0.0,STR|NUM,symtab);
142 			RET(IVAR);
143 		  }
144 		}
145 <A>"$"		{ RET(INDIRECT); }
146 <A>NF		{ yylval.cp = setsymtab(yytext, "", 0.0, NUM, symtab); RET(VARNF); }
147 
148 <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?	{
149 		  yylval.cp = setsymtab(yytext, tostring(yytext), atof(yytext), CON|NUM, symtab);
150 		  RET(NUMBER); }
151 
152 <A>while	{ RET(WHILE); }
153 <A>for		{ RET(FOR); }
154 <A>do		{ RET(DO); }
155 <A>if		{ RET(IF); }
156 <A>else		{ RET(ELSE); }
157 <A>next		{ RET(NEXT); }
158 <A>exit		{ RET(EXIT); }
159 <A>break	{ RET(BREAK); }
160 <A>continue	{ RET(CONTINUE); }
161 <A>print	{ yylval.i = PRINT; RET(PRINT); }
162 <A>printf	{ yylval.i = PRINTF; RET(PRINTF); }
163 <A>sprintf	{ yylval.i = SPRINTF; RET(SPRINTF); }
164 <A>split	{ yylval.i = SPLIT; RET(SPLIT); }
165 <A>substr	{ RET(SUBSTR); }
166 <A>sub		{ yylval.i = SUB; RET(SUB); }
167 <A>gsub		{ yylval.i = GSUB; RET(GSUB); }
168 <A>index	{ RET(INDEX); }
169 <A>match	{ RET(MATCHFCN); }
170 <A>in		{ RET(IN); }
171 <A>getline	{ RET(GETLINE); }
172 <A>close	{ RET(CLOSE); }
173 <A>delete	{ RET(DELETE); }
174 <A>length	{ yylval.i = FLENGTH; RET(BLTIN); }
175 <A>log		{ yylval.i = FLOG; RET(BLTIN); }
176 <A>int		{ yylval.i = FINT; RET(BLTIN); }
177 <A>exp		{ yylval.i = FEXP; RET(BLTIN); }
178 <A>sqrt		{ yylval.i = FSQRT; RET(BLTIN); }
179 <A>sin		{ yylval.i = FSIN; RET(BLTIN); }
180 <A>cos		{ yylval.i = FCOS; RET(BLTIN); }
181 <A>atan2	{ yylval.i = FATAN; RET(BLTIN); }
182 <A>system	{ yylval.i = FSYSTEM; RET(BLTIN); }
183 <A>rand		{ yylval.i = FRAND; RET(BLTIN); }
184 <A>srand	{ yylval.i = FSRAND; RET(BLTIN); }
185 <A>toupper	{ yylval.i = FTOUPPER; RET(BLTIN); }
186 <A>tolower	{ yylval.i = FTOLOWER; RET(BLTIN); }
187 
188 <A>{A}{B}*	{ int n, c;
189 		  c = input(); unput(c);	/* look for '(' */
190 		  if (c != '(' && infunc && (n=isarg(yytext)) >= 0) {
191 			yylval.i = n;
192 			RET(ARG);
193 		  } else {
194 			yylval.cp = setsymtab(yytext,"",0.0,STR|NUM,symtab);
195 			if (c == '(') {
196 				RET(CALL);
197 			} else {
198 				RET(VAR);
199 			}
200 		  }
201 		}
202 <A>\"		{ BEGIN str; clen = 0; }
203 
204 <A>"}"		{ if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); }
205 <A>"]"		{ if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); }
206 <A>")"		{ if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); }
207 
208 <A>.		{ if (yytext[0] == '{') bracecnt++;
209 		  else if (yytext[0] == '[') brackcnt++;
210 		  else if (yytext[0] == '(') parencnt++;
211 		  RET(yylval.i = yytext[0]); /* everything else */ }
212 
213 <reg>\\.	{ cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; }
214 <reg>\n		{ ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
215 <reg>"/"	{ BEGIN A;
216 		  cbuf[clen] = 0;
217 		  yylval.s = tostring(cbuf);
218 		  unput('/');
219 		  RET(REGEXPR); }
220 <reg>.		{ CADD; }
221 
222 <str>\"		{ BEGIN A;
223 		  cbuf[clen] = 0; s = tostring(cbuf);
224 		  cbuf[clen] = ' '; cbuf[++clen] = 0;
225 		  yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab);
226 		  RET(STRING); }
227 <str>\n		{ ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
228 <str>"\\\""	{ cbuf[clen++] = '"'; }
229 <str>"\\"n	{ cbuf[clen++] = '\n'; }
230 <str>"\\"t	{ cbuf[clen++] = '\t'; }
231 <str>"\\"f	{ cbuf[clen++] = '\f'; }
232 <str>"\\"r	{ cbuf[clen++] = '\r'; }
233 <str>"\\"b	{ cbuf[clen++] = '\b'; }
234 <str>"\\"v	{ cbuf[clen++] = '\v'; }	/* these ANSIisms may not be known by */
235 <str>"\\"a	{ cbuf[clen++] = '\007'; }	/* your compiler. hence 007 for bell */
236 <str>"\\\\"	{ cbuf[clen++] = '\\'; }
237 <str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n;
238 		  sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; }
239 <str>"\\"x({H}+) { int n;	/* ANSI permits any number! */
240 		  sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; }
241 <str>"\\".	{ cbuf[clen++] = yytext[1]; }
242 <str>.		{ CADD; }
243 
244 %%
245 
246 startreg()
247 {
248 	BEGIN reg;
249 	clen = 0;
250 }
251 
252 /* input() and unput() are transcriptions of the standard lex
253    macros for input and output with additions for error message
254    printing.  God help us all if someone changes how lex works.
255 */
256 
257 uchar	ebuf[300];
258 uchar	*ep = ebuf;
259 
260 input()
261 {
262 	register int c;
263 	extern uchar *lexprog;
264 
265 	if (yysptr > yysbuf)
266 		c = U(*--yysptr);
267 	else if (lexprog != NULL)	/* awk '...' */
268 		c = *lexprog++;
269 	else				/* awk -f ... */
270 		c = pgetc();
271 	if (c == '\n')
272 		yylineno++;
273 	else if (c == EOF)
274 		c = 0;
275 	if (ep >= ebuf + sizeof ebuf)
276 		ep = ebuf;
277 	return *ep++ = c;
278 }
279 
280 unput(c)
281 {
282 	yytchar = c;
283 	if (yytchar == '\n')
284 		yylineno--;
285 	*yysptr++ = yytchar;
286 	if (--ep < ebuf)
287 		ep = ebuf + sizeof(ebuf) - 1;
288 }
289 
290 
291 unputstr(s)
292 	char *s;
293 {
294 	int i;
295 
296 	for (i = strlen(s)-1; i >= 0; i--)
297 		unput(s[i]);
298 }
299