xref: /titanic_50/usr/src/cmd/awk/awk.lx.l (revision da2e3ebdc1edfbc5028edf1354e7dd2fa69a7968)
1 %{
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License, Version 1.0 only
7  * (the "License").  You may not use this file except in compliance
8  * with the License.
9  *
10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11  * or http://www.opensolaris.org/os/licensing.
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  *
15  * When distributing Covered Code, include this CDDL HEADER in each
16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  */
23 
24 /*
25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
26  * Use is subject to license terms.
27  */
28 
29 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
30 /*	  All Rights Reserved  	*/
31 %}
32 
33 %{
34 #pragma ident	"%Z%%M%	%I%	%E% SMI"
35 %}
36 
37 %Start A str sc reg comment
38 
39 %{
40 
41 #include	<sys/types.h>
42 #include	"awk.h"
43 #include	"y.tab.h"
44 
45 #undef	input	/* defeat lex */
46 #undef	unput
47 
48 static void unput(int);
49 static void unputstr(char *);
50 
51 extern YYSTYPE	yylval;
52 extern int	infunc;
53 
54 off_t	lineno	= 1;
55 int	bracecnt = 0;
56 int	brackcnt  = 0;
57 int	parencnt = 0;
58 #define DEBUG
59 #ifdef	DEBUG
60 #	define	RET(x)	{if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); }
61 #else
62 #	define	RET(x)	return(x)
63 #endif
64 
65 /*
66  * The standards (SUSV2) requires that Record size be atleast LINE_MAX.
67  * LINE_MAX is a standard variable defined in limits.h.
68  * Though nawk is not standards compliant, we let RECSIZE
69  * grow with LINE_MAX instead of the magic number 1024.
70  */
71 #define	CBUFLEN	(3 * LINE_MAX)
72 
73 #define	CADD	cbuf[clen++] = yytext[0]; \
74 		if (clen >= CBUFLEN-1) { \
75 			ERROR "string/reg expr %.10s... too long", cbuf SYNTAX; \
76 			BEGIN A; \
77 		}
78 
79 static uchar	cbuf[CBUFLEN];
80 static uchar	*s;
81 static int	clen, cflag;
82 %}
83 
84 A	[a-zA-Z_]
85 B	[a-zA-Z0-9_]
86 D	[0-9]
87 O	[0-7]
88 H	[0-9a-fA-F]
89 WS	[ \t]
90 
91 %%
92 	switch (yybgin-yysvec-1) {	/* witchcraft */
93 	case 0:
94 		BEGIN A;
95 		break;
96 	case sc:
97 		BEGIN A;
98 		RET('}');
99 	}
100 
101 <A>\n		{ lineno++; RET(NL); }
102 <A>#.*		{ ; }	/* strip comments */
103 <A>{WS}+	{ ; }
104 <A>;		{ RET(';'); }
105 
106 <A>"\\"\n	{ lineno++; }
107 <A>BEGIN	{ RET(XBEGIN); }
108 <A>END		{ RET(XEND); }
109 <A>func(tion)?	{ if (infunc) ERROR "illegal nested function" SYNTAX; RET(FUNC); }
110 <A>return	{ if (!infunc) ERROR "return not in function" SYNTAX; RET(RETURN); }
111 <A>"&&"		{ RET(AND); }
112 <A>"||"		{ RET(BOR); }
113 <A>"!"		{ RET(NOT); }
114 <A>"!="		{ yylval.i = NE; RET(NE); }
115 <A>"~"		{ yylval.i = MATCH; RET(MATCHOP); }
116 <A>"!~"		{ yylval.i = NOTMATCH; RET(MATCHOP); }
117 <A>"<"		{ yylval.i = LT; RET(LT); }
118 <A>"<="		{ yylval.i = LE; RET(LE); }
119 <A>"=="		{ yylval.i = EQ; RET(EQ); }
120 <A>">="		{ yylval.i = GE; RET(GE); }
121 <A>">"		{ yylval.i = GT; RET(GT); }
122 <A>">>"		{ yylval.i = APPEND; RET(APPEND); }
123 <A>"++"		{ yylval.i = INCR; RET(INCR); }
124 <A>"--"		{ yylval.i = DECR; RET(DECR); }
125 <A>"+="		{ yylval.i = ADDEQ; RET(ASGNOP); }
126 <A>"-="		{ yylval.i = SUBEQ; RET(ASGNOP); }
127 <A>"*="		{ yylval.i = MULTEQ; RET(ASGNOP); }
128 <A>"/="		{ yylval.i = DIVEQ; RET(ASGNOP); }
129 <A>"%="		{ yylval.i = MODEQ; RET(ASGNOP); }
130 <A>"^="		{ yylval.i = POWEQ; RET(ASGNOP); }
131 <A>"**="	{ yylval.i = POWEQ; RET(ASGNOP); }
132 <A>"="		{ yylval.i = ASSIGN; RET(ASGNOP); }
133 <A>"**"		{ RET(POWER); }
134 <A>"^"		{ RET(POWER); }
135 
136 <A>"$"{D}+	{ yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); }
137 <A>"$NF"	{ unputstr("(NF)"); return(INDIRECT); }
138 <A>"$"{A}{B}*	{ int c, n;
139 		  c = input(); unput(c);
140 		  if (c == '(' || c == '[' || infunc && (n=isarg(yytext+1)) >= 0) {
141 			unputstr(yytext+1);
142 			return(INDIRECT);
143 		  } else {
144 			yylval.cp = setsymtab((uchar *)yytext+1,
145 				(uchar *)"",0.0,STR|NUM,symtab);
146 			RET(IVAR);
147 		  }
148 		}
149 <A>"$"		{ RET(INDIRECT); }
150 <A>NF		{ yylval.cp = setsymtab((uchar *)yytext, (uchar *)"", 0.0, NUM, symtab); RET(VARNF); }
151 
152 <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?	{
153 		  yylval.cp = setsymtab((uchar *)yytext, tostring((uchar *)yytext), atof(yytext), CON|NUM, symtab);
154 		  RET(NUMBER); }
155 
156 <A>while	{ RET(WHILE); }
157 <A>for		{ RET(FOR); }
158 <A>do		{ RET(DO); }
159 <A>if		{ RET(IF); }
160 <A>else		{ RET(ELSE); }
161 <A>next		{ RET(NEXT); }
162 <A>exit		{ RET(EXIT); }
163 <A>break	{ RET(BREAK); }
164 <A>continue	{ RET(CONTINUE); }
165 <A>print	{ yylval.i = PRINT; RET(PRINT); }
166 <A>printf	{ yylval.i = PRINTF; RET(PRINTF); }
167 <A>sprintf	{ yylval.i = SPRINTF; RET(SPRINTF); }
168 <A>split	{ yylval.i = SPLIT; RET(SPLIT); }
169 <A>substr	{ RET(SUBSTR); }
170 <A>sub		{ yylval.i = SUB; RET(SUB); }
171 <A>gsub		{ yylval.i = GSUB; RET(GSUB); }
172 <A>index	{ RET(INDEX); }
173 <A>match	{ RET(MATCHFCN); }
174 <A>in		{ RET(IN); }
175 <A>getline	{ RET(GETLINE); }
176 <A>close	{ RET(CLOSE); }
177 <A>delete	{ RET(DELETE); }
178 <A>length	{ yylval.i = FLENGTH; RET(BLTIN); }
179 <A>log		{ yylval.i = FLOG; RET(BLTIN); }
180 <A>int		{ yylval.i = FINT; RET(BLTIN); }
181 <A>exp		{ yylval.i = FEXP; RET(BLTIN); }
182 <A>sqrt		{ yylval.i = FSQRT; RET(BLTIN); }
183 <A>sin		{ yylval.i = FSIN; RET(BLTIN); }
184 <A>cos		{ yylval.i = FCOS; RET(BLTIN); }
185 <A>atan2	{ yylval.i = FATAN; RET(BLTIN); }
186 <A>system	{ yylval.i = FSYSTEM; RET(BLTIN); }
187 <A>rand		{ yylval.i = FRAND; RET(BLTIN); }
188 <A>srand	{ yylval.i = FSRAND; RET(BLTIN); }
189 <A>toupper	{ yylval.i = FTOUPPER; RET(BLTIN); }
190 <A>tolower	{ yylval.i = FTOLOWER; RET(BLTIN); }
191 
192 <A>{A}{B}*	{ int n, c;
193 		  c = input(); unput(c);	/* look for '(' */
194 		  if (c != '(' && infunc && (n=isarg(yytext)) >= 0) {
195 			yylval.i = n;
196 			RET(ARG);
197 		  } else {
198 			yylval.cp = setsymtab((uchar *)yytext,
199 				(uchar *)"",0.0,STR|NUM,symtab);
200 			if (c == '(') {
201 				RET(CALL);
202 			} else {
203 				RET(VAR);
204 			}
205 		  }
206 		}
207 <A>\"		{ BEGIN str; clen = 0; }
208 
209 <A>"}"		{ if (--bracecnt < 0) ERROR "extra }" SYNTAX; BEGIN sc; RET(';'); }
210 <A>"]"		{ if (--brackcnt < 0) ERROR "extra ]" SYNTAX; RET(']'); }
211 <A>")"		{ if (--parencnt < 0) ERROR "extra )" SYNTAX; RET(')'); }
212 
213 <A>.		{ if (yytext[0] == '{') bracecnt++;
214 		  else if (yytext[0] == '[') brackcnt++;
215 		  else if (yytext[0] == '(') parencnt++;
216 		  RET(yylval.i = yytext[0]); /* everything else */ }
217 
218 <reg>\\.	{ cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; }
219 <reg>\n		{ ERROR "newline in regular expression %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
220 <reg>"/"	{ BEGIN A;
221 		  cbuf[clen] = 0;
222 		  yylval.s = tostring(cbuf);
223 		  unput('/');
224 		  RET(REGEXPR); }
225 <reg>.		{ CADD; }
226 
227 <str>\"		{ BEGIN A;
228 		  cbuf[clen] = 0; s = tostring(cbuf);
229 		  cbuf[clen] = ' '; cbuf[++clen] = 0;
230 		  yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab);
231 		  RET(STRING); }
232 <str>\n		{ ERROR "newline in string %.10s...", cbuf SYNTAX; lineno++; BEGIN A; }
233 <str>"\\\""	{ cbuf[clen++] = '"'; }
234 <str>"\\"n	{ cbuf[clen++] = '\n'; }
235 <str>"\\"t	{ cbuf[clen++] = '\t'; }
236 <str>"\\"f	{ cbuf[clen++] = '\f'; }
237 <str>"\\"r	{ cbuf[clen++] = '\r'; }
238 <str>"\\"b	{ cbuf[clen++] = '\b'; }
239 <str>"\\"v	{ cbuf[clen++] = '\v'; }	/* these ANSIisms may not be known by */
240 <str>"\\"a	{ cbuf[clen++] = '\007'; }	/* your compiler. hence 007 for bell */
241 <str>"\\\\"	{ cbuf[clen++] = '\\'; }
242 <str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n;
243 		  sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; }
244 <str>"\\"x({H}+) { int n;	/* ANSI permits any number! */
245 		  sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; }
246 <str>"\\".	{ cbuf[clen++] = yytext[1]; }
247 <str>.		{ CADD; }
248 
249 %%
250 
251 void
252 startreg()
253 {
254 	BEGIN reg;
255 	clen = 0;
256 }
257 
258 /* input() and unput() are transcriptions of the standard lex
259    macros for input and output with additions for error message
260    printing.  God help us all if someone changes how lex works.
261 */
262 
263 uchar	ebuf[300];
264 uchar	*ep = ebuf;
265 
266 int
267 input(void)
268 {
269 	register int c;
270 	extern uchar *lexprog;
271 
272 	if (yysptr > yysbuf)
273 		c = U(*--yysptr);
274 	else if (lexprog != NULL)	/* awk '...' */
275 		c = *lexprog++;
276 	else				/* awk -f ... */
277 		c = pgetc();
278 	if (c == '\n')
279 		yylineno++;
280 	else if (c == EOF)
281 		c = 0;
282 	if (ep >= ebuf + sizeof ebuf)
283 		ep = ebuf;
284 	return *ep++ = c;
285 }
286 
287 static void
288 unput(int c)
289 {
290 	yytchar = c;
291 	if (yytchar == '\n')
292 		yylineno--;
293 	*yysptr++ = yytchar;
294 	if (--ep < ebuf)
295 		ep = ebuf + sizeof(ebuf) - 1;
296 }
297 
298 
299 static void
300 unputstr(char *s)
301 {
302 	int i;
303 
304 	for (i = strlen(s)-1; i >= 0; i--)
305 		unput(s[i]);
306 }
307