xref: /illumos-gate/usr/src/cmd/oawk/awk.lx.l (revision bea83d026ee1bd1b2a2419e1d0232f107a5d7d9b)
1 %{
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License, Version 1.0 only
7  * (the "License").  You may not use this file except in compliance
8  * with the License.
9  *
10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11  * or http://www.opensolaris.org/os/licensing.
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  *
15  * When distributing Covered Code, include this CDDL HEADER in each
16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  */
23 /*
24  * Copyright 1996 Sun Microsystems, Inc.  All rights reserved.
25  * Use is subject to license terms.
26  */
27 %}
28 %{
29 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
30 %}
31 %{
32 /*	  All Rights Reserved  	*/
33 %}
34 %{
35 #pragma ident	"%Z%%M%	%I%	%E% SMI"
36 %}
37 
38 %Start A str chc sc reg comment
39 
40 %{
41 #include	"awk.h"
42 #include	"awk.def"
43 #undef	input	/* defeat lex */
44 extern int	yylval;
45 extern int	mustfld;
46 
47 long long	lineno	= 1;
48 #ifdef	DEBUG
49 #	define	RETURN(x)	{if (dbg) ptoken(x); return (x); }
50 #else
51 #	define	RETURN(x)	return (x)
52 #endif
53 #define	CADD	{ cbuf[clen++]=yytext[0]; if (clen>=CBUFLEN-1) { yyerror(\
54 		"string too long", cbuf); BEGIN A; } }
55 #define	CBUFLEN	150
56 wchar_t cbuf[CBUFLEN];
57 int	clen, cflag;
58 %}
59 
60 %a	50000
61 %o	50000
62 
63 A	[a-zA-Z_]
64 B	[a-zA-Z0-9_]
65 D	[0-9]
66 WS	[ \t]
67 
68 %%
69 	switch (yybgin-yysvec-1) {	/* witchcraft */
70 	case 0:
71 		BEGIN A;
72 		break;
73 	case sc:
74 		BEGIN A;
75 		RETURN('}');
76 	}
77 
78 <A>^\n		lineno++;
79 <A>^{WS}*#.*\n	lineno++;	/* strip comment lines */
80 <A>{WS}		/* dummy for cstyle */;
81 <A>"\\"\n	lineno++;
82 <reg>"\\"\n	lineno++;
83 <A>"||"		RETURN(BOR);
84 <A>BEGIN	RETURN(XBEGIN);
85 <A>END		RETURN(XEND);
86 <A>PROGEND	RETURN(EOF);
87 <A>"&&"		RETURN(AND);
88 <A>"!"		RETURN(NOT);
89 <A>"!="		{ yylval = NE; RETURN(RELOP); }
90 <A>"~"		{ yylval = MATCH; RETURN(MATCHOP); }
91 <A>"!~"		{ yylval = NOTMATCH; RETURN(MATCHOP); }
92 <A>"<"		{ yylval = LT; RETURN(RELOP); }
93 <A>"<="		{ yylval = LE; RETURN(RELOP); }
94 <A>"=="		{ yylval = EQ; RETURN(RELOP); }
95 <A>">="		{ yylval = GE; RETURN(RELOP); }
96 <A>">"		{ yylval = GT; RETURN(RELOP); }
97 <A>">>"		{ yylval = APPEND; RETURN(RELOP); }
98 <A>"++"		{ yylval = INCR; RETURN(INCR); }
99 <A>"--"		{ yylval = DECR; RETURN(DECR); }
100 <A>"+="		{ yylval = ADDEQ; RETURN(ASGNOP); }
101 <A>"-="		{ yylval = SUBEQ; RETURN(ASGNOP); }
102 <A>"*="		{ yylval = MULTEQ; RETURN(ASGNOP); }
103 <A>"/="		{ yylval = DIVEQ; RETURN(ASGNOP); }
104 <A>"%="		{ yylval = MODEQ; RETURN(ASGNOP); }
105 <A>"="		{ yylval = ASSIGN; RETURN(ASGNOP); }
106 
107 <A>"$"{D}+	{
108 		static wchar_t L_record[] = L"$record";
109 		if (watoi(yytext+1)==0) {
110 				yylval = (int)lookup(L_record, symtab, 0);
111 				RETURN(STRING);
112 			} else {
113 				yylval = fieldadr(watoi(yytext+1));
114 				RETURN(FIELD);
115 			}
116 		}
117 <A>"$"{WS}*	{ RETURN(INDIRECT); }
118 <A>NF		{ mustfld=1;
119 		yylval = (int)setsymtab(yytext, NULL, 0.0, NUM, symtab);
120 		RETURN(VAR); }
121 <A>({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)?	{
122 		yylval = (int)setsymtab(yytext, NULL, watof(yytext),
123 			CON|NUM, symtab); RETURN(NUMBER); }
124 <A>"}"{WS}*\n	{ BEGIN sc; lineno++; RETURN(';'); }
125 <A>"}"		{ BEGIN sc; RETURN(';'); }
126 <A>";"\n		{ lineno++; RETURN(';'); }
127 <A>\n		{ lineno++; RETURN(NL); }
128 <A>while	RETURN(WHILE);
129 <A>for		RETURN(FOR);
130 <A>if		RETURN(IF);
131 <A>else		RETURN(ELSE);
132 <A>next		RETURN(NEXT);
133 <A>exit		RETURN(EXIT);
134 <A>break	RETURN(BREAK);
135 <A>continue	RETURN(CONTINUE);
136 <A>print	{ yylval = PRINT; RETURN(PRINT); }
137 <A>printf	{ yylval = PRINTF; RETURN(PRINTF); }
138 <A>sprintf	{ yylval = SPRINTF; RETURN(SPRINTF); }
139 <A>split	{ yylval = SPLIT; RETURN(SPLIT); }
140 <A>substr	RETURN(SUBSTR);
141 <A>index	RETURN(INDEX);
142 <A>in		RETURN(IN);
143 <A>getline	RETURN(GETLINE);
144 <A>length	{ yylval = FLENGTH; RETURN(FNCN); }
145 <A>log		{ yylval = FLOG; RETURN(FNCN); }
146 <A>int		{ yylval = FINT; RETURN(FNCN); }
147 <A>exp		{ yylval = FEXP; RETURN(FNCN); }
148 <A>sqrt		{ yylval = FSQRT; RETURN(FNCN); }
149 <A>{A}{B}*	{
150 		static wchar_t L_0[] = { 0 };
151 		yylval = (int)setsymtab(yytext, tostring(L_0), 0.0, STR|NUM,
152 			symtab);
153 		RETURN(VAR);
154 		}
155 <A>\"		{ BEGIN str; clen=0; }
156 
157 <A>#		{ BEGIN comment; }
158 <comment>\n	{ BEGIN A; lineno++; RETURN(NL); }
159 <comment>.	/* dummy */;
160 
161 <A>.		{ yylval = yytext[0]; RETURN(yytext[0]); }
162 
163 <reg>"["	{ BEGIN chc; clen=0; cflag=0; }
164 <reg>"[^"	{ BEGIN chc; clen=0; cflag=1; }
165 
166 <reg>"?"	RETURN(QUEST);
167 <reg>"+"	RETURN(PLUS);
168 <reg>"*"	RETURN(STAR);
169 <reg>"|"	RETURN(OR);
170 <reg>"."	RETURN(DOT);
171 <reg>"("	RETURN('(');
172 <reg>")"	RETURN(')');
173 <reg>"^"	RETURN('^');
174 <reg>"$"	RETURN('$');
175 <reg>\\{D}{D}{D}	{ wsscanf(yytext+1, "%o", &yylval); RETURN(CHAR); }
176 <reg>\\.	{	if (yytext[1]=='n') yylval = '\n';
177 			else if (yytext[1] == 't') yylval = '\t';
178 			else if (yytext[1] == 'b') yylval = '\b';
179 			else if (yytext[1] == 'r') yylval = '\r';
180 			else if (yytext[1] == 'f') yylval = '\f';
181 			else yylval = yytext[1];
182 			RETURN(CHAR);
183 		}
184 <reg>"/"	{ BEGIN A; unput('/'); }
185 <reg>\n		{ yyerror("newline in regular expression"); lineno++; BEGIN A; }
186 <reg>.		{ yylval = yytext[0]; RETURN(CHAR); }
187 
188 <str>\"		{ wchar_t *s; BEGIN A; cbuf[clen]=0; s = tostring(cbuf);
189 		cbuf[clen] = ' '; cbuf[++clen] = 0;
190 		yylval = (int)setsymtab(cbuf, s, 0.0, CON|STR, symtab);
191 		RETURN(STRING); }
192 <str>\n		{ yyerror("newline in string"); lineno++; BEGIN A; }
193 <str>"\\\""	{ cbuf[clen++]='"'; }
194 <str>"\\"n	{ cbuf[clen++]='\n'; }
195 <chc>"\\"n	{ cbuf[clen++]='\n'; }
196 <str>"\\"t	{ cbuf[clen++]='\t'; }
197 <chc>"\\"t	{ cbuf[clen++]='\t'; }
198 <str>"\\"b	{ cbuf[clen++]='\b'; }
199 <chc>"\\"b	{ cbuf[clen++]='\b'; }
200 <str>"\\"r	{ cbuf[clen++]='\r'; }
201 <chc>"\\"r	{ cbuf[clen++]='\r'; }
202 <str>"\\"f 	{ cbuf[clen++]='\f'; }
203 <chc>"\\"f 	{ cbuf[clen++]='\f'; }
204 <str>"\\\\"	{ cbuf[clen++]='\\'; }
205 <chc>"\\\\"	{ cbuf[clen++]='\\'; }
206 <str>.		{ CADD; }
207 
208 <chc>"\\""]"	{ cbuf[clen++]=']'; }
209 <chc>"]"	{ BEGIN reg; cbuf[clen]=0; yylval = (int)tostring(cbuf);
210 		if (cflag==0) { RETURN(CCL); }
211 		else { RETURN(NCCL); } }
212 <chc>\n		{ yyerror("newline in character class"); lineno++; BEGIN A; }
213 <chc>.		{ CADD; }
214 
215 %%
216 
217 int
218 input()
219 {
220 	int c;
221 	extern wchar_t *lexprog;
222 
223 	if (yysptr > yysbuf)
224 		c = U(*--yysptr);
225 	else if (yyin == NULL)
226 		c = *lexprog++;
227 	else
228 		c = getwc(yyin);
229 	if (c == '\n')
230 		yylineno++;
231 	else if (c == EOF)
232 		c = 0;
233 	return (c);
234 }
235 
236 void
237 startreg()
238 {
239 	BEGIN reg;
240 }
241