xref: /illumos-gate/usr/src/cmd/awk/awk.g.y (revision 88f8b78a88cbdc6d8c1af5c3e54bc49d25095c98)
1 %{
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License, Version 1.0 only
7  * (the "License").  You may not use this file except in compliance
8  * with the License.
9  *
10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11  * or http://www.opensolaris.org/os/licensing.
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  *
15  * When distributing Covered Code, include this CDDL HEADER in each
16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  */
23 %}
24 /*
25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
26  * Use is subject to license terms.
27  */
28 
29 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
30 /*	  All Rights Reserved  	*/
31 
32 %{
33 #ident	"%Z%%M%	%I%	%E% SMI"	/* SVr4.0 2.10	*/
34 %}
35 
36 %{
37 #include "awk.h"
38 int yywrap(void) { return(1); }
39 #ifndef	DEBUG
40 #	define	PUTS(x)
41 #endif
42 Node	*beginloc = 0, *endloc = 0;
43 int	infunc	= 0;	/* = 1 if in arglist or body of func */
44 uchar	*curfname = 0;
45 Node	*arglist = 0;	/* list of args for current function */
46 static void	setfname(Cell *);
47 static int	constnode(Node *);
48 static uchar	*strnode(Node *);
49 static Node	*notnull();
50 %}
51 
52 %union {
53 	Node	*p;
54 	Cell	*cp;
55 	int	i;
56 	uchar	*s;
57 }
58 
59 %token	<i>	FIRSTTOKEN	/* must be first */
60 %token	<p>	PROGRAM PASTAT PASTAT2 XBEGIN XEND
61 %token	<i>	NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
62 %token	<i>	ARRAY
63 %token	<i>	MATCH NOTMATCH MATCHOP
64 %token	<i>	FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS
65 %token	<i>	AND BOR APPEND EQ GE GT LE LT NE IN
66 %token	<i>	ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
67 %token	<i>	SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT
68 %token	<i>	ADD MINUS MULT DIVIDE MOD
69 %token	<i>	ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
70 %token	<i>	PRINT PRINTF SPRINTF
71 %token	<p>	ELSE INTEST CONDEXPR
72 %token	<i>	POSTINCR PREINCR POSTDECR PREDECR
73 %token	<cp>	VAR IVAR VARNF CALL NUMBER STRING FIELD
74 %token	<s>	REGEXPR
75 
76 %type	<p>	pas pattern ppattern plist pplist patlist prarg term
77 %type	<p>	pa_pat pa_stat pa_stats
78 %type	<s>	reg_expr
79 %type	<p>	simple_stmt opt_simple_stmt stmt stmtlist
80 %type	<p>	var varname funcname varlist
81 %type	<p>	for if while
82 %type	<i>	pst opt_pst lbrace rparen comma nl opt_nl and bor
83 %type	<i>	subop print
84 
85 %right	ASGNOP
86 %right	'?'
87 %right	':'
88 %left	BOR
89 %left	AND
90 %left	GETLINE
91 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
92 %left	ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FIELD FUNC
93 %left	GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
94 %left	PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
95 %left	REGEXPR VAR VARNF IVAR WHILE '('
96 %left	CAT
97 %left	'+' '-'
98 %left	'*' '/' '%'
99 %left	NOT UMINUS
100 %right	POWER
101 %right	DECR INCR
102 %left	INDIRECT
103 %token	LASTTOKEN	/* must be last */
104 
105 %%
106 
107 program:
108 	  pas	{ if (errorflag==0)
109 			winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
110 	| error	{ yyclearin; bracecheck(); ERROR "bailing out" SYNTAX; }
111 	;
112 
113 and:
114 	  AND | and NL
115 	;
116 
117 bor:
118 	  BOR | bor NL
119 	;
120 
121 comma:
122 	  ',' | comma NL
123 	;
124 
125 do:
126 	  DO | do NL
127 	;
128 
129 else:
130 	  ELSE | else NL
131 	;
132 
133 for:
134 	  FOR '(' opt_simple_stmt ';' pattern ';' opt_simple_stmt rparen stmt
135 		{ $$ = stat4(FOR, $3, notnull($5), $7, $9); }
136 	| FOR '(' opt_simple_stmt ';'  ';' opt_simple_stmt rparen stmt
137 		{ $$ = stat4(FOR, $3, NIL, $6, $8); }
138 	| FOR '(' varname IN varname rparen stmt
139 		{ $$ = stat3(IN, $3, makearr($5), $7); }
140 	;
141 
142 funcname:
143 	  VAR	{ setfname($1); }
144 	| CALL	{ setfname($1); }
145 	;
146 
147 if:
148 	  IF '(' pattern rparen		{ $$ = notnull($3); }
149 	;
150 
151 lbrace:
152 	  '{' | lbrace NL
153 	;
154 
155 nl:
156 	  NL | nl NL
157 	;
158 
159 opt_nl:
160 	  /* empty */	{ $$ = 0; }
161 	| nl
162 	;
163 
164 opt_pst:
165 	  /* empty */	{ $$ = 0; }
166 	| pst
167 	;
168 
169 
170 opt_simple_stmt:
171 	  /* empty */			{ $$ = 0; }
172 	| simple_stmt
173 	;
174 
175 pas:
176 	  opt_pst			{ $$ = 0; }
177 	| opt_pst pa_stats opt_pst	{ $$ = $2; }
178 	;
179 
180 pa_pat:
181 	  pattern	{ $$ = notnull($1); }
182 	;
183 
184 pa_stat:
185 	  pa_pat			{ $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
186 	| pa_pat lbrace stmtlist '}'	{ $$ = stat2(PASTAT, $1, $3); }
187 	| pa_pat ',' pa_pat		{ $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); }
188 	| pa_pat ',' pa_pat lbrace stmtlist '}'	{ $$ = pa2stat($1, $3, $5); }
189 	| lbrace stmtlist '}'		{ $$ = stat2(PASTAT, NIL, $2); }
190 	| XBEGIN lbrace stmtlist '}'
191 		{ beginloc = linkum(beginloc, $3); $$ = 0; }
192 	| XEND lbrace stmtlist '}'
193 		{ endloc = linkum(endloc, $3); $$ = 0; }
194 	| FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
195 		{ infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
196 	;
197 
198 pa_stats:
199 	  pa_stat
200 	| pa_stats opt_pst pa_stat	{ $$ = linkum($1, $3); }
201 	;
202 
203 patlist:
204 	  pattern
205 	| patlist comma pattern	{ $$ = linkum($1, $3); }
206 	;
207 
208 ppattern:
209 	  var ASGNOP ppattern		{ $$ = op2($2, $1, $3); }
210 	| ppattern '?' ppattern ':' ppattern %prec '?'
211 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
212 	| ppattern bor ppattern %prec BOR
213 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
214 	| ppattern and ppattern %prec AND
215 		{ $$ = op2(AND, notnull($1), notnull($3)); }
216 	| NOT ppattern
217 		{ $$ = op1(NOT, notnull($2)); }
218 	| ppattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
219 	| ppattern MATCHOP ppattern
220 		{ if (constnode($3))
221 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
222 		  else
223 			$$ = op3($2, (Node *)1, $1, $3); }
224 	| ppattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
225 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
226 	| ppattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
227 	| reg_expr
228 		{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
229 	| term
230 	;
231 
232 pattern:
233 	  var ASGNOP pattern		{ $$ = op2($2, $1, $3); }
234 	| pattern '?' pattern ':' pattern %prec '?'
235 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
236 	| pattern bor pattern %prec BOR
237 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
238 	| pattern and pattern %prec AND
239 		{ $$ = op2(AND, notnull($1), notnull($3)); }
240 	| NOT pattern
241 		{ $$ = op1(NOT, op2(NE,$2,valtonode(lookup((uchar *)"$zero&null",symtab),CCON))); }
242 	| pattern EQ pattern		{ $$ = op2($2, $1, $3); }
243 	| pattern GE pattern		{ $$ = op2($2, $1, $3); }
244 	| pattern GT pattern		{ $$ = op2($2, $1, $3); }
245 	| pattern LE pattern		{ $$ = op2($2, $1, $3); }
246 	| pattern LT pattern		{ $$ = op2($2, $1, $3); }
247 	| pattern NE pattern		{ $$ = op2($2, $1, $3); }
248 	| pattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
249 	| pattern MATCHOP pattern
250 		{ if (constnode($3))
251 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
252 		  else
253 			$$ = op3($2, (Node *)1, $1, $3); }
254 	| pattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
255 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
256 	| pattern '|' GETLINE var	{ $$ = op3(GETLINE, $4, (Node*)$2, $1); }
257 	| pattern '|' GETLINE		{ $$ = op3(GETLINE, (Node*)0, (Node*)$2, $1); }
258 	| pattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
259 	| reg_expr
260 		{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
261 	| term
262 	;
263 
264 plist:
265 	  pattern comma pattern		{ $$ = linkum($1, $3); }
266 	| plist comma pattern		{ $$ = linkum($1, $3); }
267 	;
268 
269 pplist:
270 	  ppattern
271 	| pplist comma ppattern		{ $$ = linkum($1, $3); }
272 
273 prarg:
274 	  /* empty */			{ $$ = rectonode(); }
275 	| pplist
276 	| '(' plist ')'			{ $$ = $2; }
277 	;
278 
279 print:
280 	  PRINT | PRINTF
281 	;
282 
283 pst:
284 	  NL | ';' | pst NL | pst ';'
285 	;
286 
287 rbrace:
288 	  '}' | rbrace NL
289 	;
290 
291 reg_expr:
292 	  '/' {startreg();} REGEXPR '/'		{ $$ = $3; }
293 	;
294 
295 rparen:
296 	  ')' | rparen NL
297 	;
298 
299 simple_stmt:
300 	  print prarg '|' term		{ $$ = stat3($1, $2, (Node *) $3, $4); }
301 	| print prarg APPEND term	{ $$ = stat3($1, $2, (Node *) $3, $4); }
302 	| print prarg GT term		{ $$ = stat3($1, $2, (Node *) $3, $4); }
303 	| print prarg			{ $$ = stat3($1, $2, NIL, NIL); }
304 	| DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
305 	| DELETE varname		{ yyclearin; ERROR "you can only delete array[element]" SYNTAX; $$ = stat1(DELETE, $2); }
306 	| pattern			{ $$ = exptostat($1); }
307 	| error				{ yyclearin; ERROR "illegal statement" SYNTAX; }
308 	;
309 
310 st:
311 	  nl | ';' opt_nl
312 	;
313 
314 stmt:
315 	  BREAK st		{ $$ = stat1(BREAK, NIL); }
316 	| CLOSE pattern st	{ $$ = stat1(CLOSE, $2); }
317 	| CONTINUE st		{ $$ = stat1(CONTINUE, NIL); }
318 	| do stmt WHILE '(' pattern ')' st
319 		{ $$ = stat2(DO, $2, notnull($5)); }
320 	| EXIT pattern st	{ $$ = stat1(EXIT, $2); }
321 	| EXIT st		{ $$ = stat1(EXIT, NIL); }
322 	| for
323 	| if stmt else stmt	{ $$ = stat3(IF, $1, $2, $4); }
324 	| if stmt		{ $$ = stat3(IF, $1, $2, NIL); }
325 	| lbrace stmtlist rbrace { $$ = $2; }
326 	| NEXT st	{ if (infunc)
327 				ERROR "next is illegal inside a function" SYNTAX;
328 			  $$ = stat1(NEXT, NIL); }
329 	| RETURN pattern st	{ $$ = stat1(RETURN, $2); }
330 	| RETURN st		{ $$ = stat1(RETURN, NIL); }
331 	| simple_stmt st
332 	| while stmt		{ $$ = stat2(WHILE, $1, $2); }
333 	| ';' opt_nl		{ $$ = 0; }
334 	;
335 
336 stmtlist:
337 	  stmt
338 	| stmtlist stmt		{ $$ = linkum($1, $2); }
339 	;
340 
341 subop:
342 	  SUB | GSUB
343 	;
344 
345 term:
346 	  term '+' term			{ $$ = op2(ADD, $1, $3); }
347 	| term '-' term			{ $$ = op2(MINUS, $1, $3); }
348 	| term '*' term			{ $$ = op2(MULT, $1, $3); }
349 	| term '/' term			{ $$ = op2(DIVIDE, $1, $3); }
350 	| term '%' term			{ $$ = op2(MOD, $1, $3); }
351 	| term POWER term		{ $$ = op2(POWER, $1, $3); }
352 	| '-' term %prec UMINUS		{ $$ = op1(UMINUS, $2); }
353 	| '+' term %prec UMINUS		{ $$ = $2; }
354 	| BLTIN '(' ')'			{ $$ = op2(BLTIN, (Node *) $1, rectonode()); }
355 	| BLTIN '(' patlist ')'		{ $$ = op2(BLTIN, (Node *) $1, $3); }
356 	| BLTIN				{ $$ = op2(BLTIN, (Node *) $1, rectonode()); }
357 	| CALL '(' ')'			{ $$ = op2(CALL, valtonode($1,CVAR), NIL); }
358 	| CALL '(' patlist ')'		{ $$ = op2(CALL, valtonode($1,CVAR), $3); }
359 	| DECR var			{ $$ = op1(PREDECR, $2); }
360 	| INCR var			{ $$ = op1(PREINCR, $2); }
361 	| var DECR			{ $$ = op1(POSTDECR, $1); }
362 	| var INCR			{ $$ = op1(POSTINCR, $1); }
363 	| GETLINE var LT term		{ $$ = op3(GETLINE, $2, (Node *)$3, $4); }
364 	| GETLINE LT term		{ $$ = op3(GETLINE, NIL, (Node *)$2, $3); }
365 	| GETLINE var			{ $$ = op3(GETLINE, $2, NIL, NIL); }
366 	| GETLINE			{ $$ = op3(GETLINE, NIL, NIL, NIL); }
367 	| INDEX '(' pattern comma pattern ')'
368 		{ $$ = op2(INDEX, $3, $5); }
369 	| INDEX '(' pattern comma reg_expr ')'
370 		{ ERROR "index() doesn't permit regular expressions" SYNTAX;
371 		  $$ = op2(INDEX, $3, (Node*)$5); }
372 	| '(' pattern ')'		{ $$ = $2; }
373 	| MATCHFCN '(' pattern comma reg_expr ')'
374 		{ $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
375 	| MATCHFCN '(' pattern comma pattern ')'
376 		{ if (constnode($5))
377 			$$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
378 		  else
379 			$$ = op3(MATCHFCN, (Node *)1, $3, $5); }
380 	| NUMBER			{ $$ = valtonode($1, CCON); }
381 	| SPLIT '(' pattern comma varname comma pattern ')'     /* string */
382 		{ $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
383 	| SPLIT '(' pattern comma varname comma reg_expr ')'    /* const /regexp/ */
384 		{ $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
385 	| SPLIT '(' pattern comma varname ')'
386 		{ $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); }  /* default */
387 	| SPRINTF '(' patlist ')'	{ $$ = op1($1, $3); }
388 	| STRING	 		{ $$ = valtonode($1, CCON); }
389 	| subop '(' reg_expr comma pattern ')'
390 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
391 	| subop '(' pattern comma pattern ')'
392 		{ if (constnode($3))
393 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
394 		  else
395 			$$ = op4($1, (Node *)1, $3, $5, rectonode()); }
396 	| subop '(' reg_expr comma pattern comma var ')'
397 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
398 	| subop '(' pattern comma pattern comma var ')'
399 		{ if (constnode($3))
400 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
401 		  else
402 			$$ = op4($1, (Node *)1, $3, $5, $7); }
403 	| SUBSTR '(' pattern comma pattern comma pattern ')'
404 		{ $$ = op3(SUBSTR, $3, $5, $7); }
405 	| SUBSTR '(' pattern comma pattern ')'
406 		{ $$ = op3(SUBSTR, $3, $5, NIL); }
407 	| var
408 	;
409 
410 var:
411 	  varname
412 	| varname '[' patlist ']'	{ $$ = op2(ARRAY, makearr($1), $3); }
413 	| FIELD				{ $$ = valtonode($1, CFLD); }
414 	| IVAR				{ $$ = op1(INDIRECT, valtonode($1, CVAR)); }
415 	| INDIRECT term	 		{ $$ = op1(INDIRECT, $2); }
416 	;
417 
418 varlist:
419 	  /* nothing */		{ arglist = $$ = 0; }
420 	| VAR			{ arglist = $$ = valtonode($1,CVAR); }
421 	| varlist comma VAR	{ arglist = $$ = linkum($1,valtonode($3,CVAR)); }
422 	;
423 
424 varname:
425 	  VAR			{ $$ = valtonode($1, CVAR); }
426 	| ARG 			{ $$ = op1(ARG, (Node *) $1); }
427 	| VARNF			{ $$ = op1(VARNF, (Node *) $1); }
428 	;
429 
430 
431 while:
432 	  WHILE '(' pattern rparen	{ $$ = notnull($3); }
433 	;
434 
435 %%
436 
437 static void
438 setfname(Cell *p)
439 {
440 	if (isarr(p))
441 		ERROR "%s is an array, not a function", p->nval SYNTAX;
442 	else if (isfunc(p))
443 		ERROR "you can't define function %s more than once", p->nval SYNTAX;
444 	curfname = p->nval;
445 }
446 
447 
448 static int
449 constnode(Node *p)
450 {
451 	return p->ntype == NVALUE && ((Cell *) (p->narg[0]))->csub == CCON;
452 }
453 
454 static uchar *
455 strnode(Node *p)
456 {
457 	return ((Cell *)(p->narg[0]))->sval;
458 }
459 
460 static Node *
461 notnull(Node *n)
462 {
463 	switch (n->nobj) {
464 	case LE: case LT: case EQ: case NE: case GT: case GE:
465 	case BOR: case AND: case NOT:
466 		return n;
467 	default:
468 		return op2(NE, n, nullnode);
469 	}
470 }
471