xref: /illumos-gate/usr/src/cmd/awk_xpg4/awk.y (revision b8767451d156f585534afac0bf22721810d0dc63)
1 %{
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License, Version 1.0 only
7  * (the "License").  You may not use this file except in compliance
8  * with the License.
9  *
10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11  * or http://www.opensolaris.org/os/licensing.
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  *
15  * When distributing Covered Code, include this CDDL HEADER in each
16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  */
23 /*
24  * awk -- YACC grammar
25  *
26  * Copyright (c) 1995 by Sun Microsystems, Inc.
27  *
28  * Copyright 1986, 1992 by Mortice Kern Systems Inc.  All rights reserved.
29  *
30  * This Software is unpublished, valuable, confidential property of
31  * Mortice Kern Systems Inc.  Use is authorized only in accordance
32  * with the terms and conditions of the source licence agreement
33  * protecting this Software.  Any unauthorized use or disclosure of
34  * this Software is strictly prohibited and will result in the
35  * termination of the licence agreement.
36  *
37  * NOTE: this grammar correctly produces NO shift/reduce conflicts from YACC.
38  *
39  */
40 
41 /*
42  * Do not use any character constants as tokens, so the resulting C file
43  * is codeset independent.
44  */
45 
46 #ident	"%Z%%M%	%I%	%E% SMI"
47 #include "awk.h"
48 static NODE * fliplist ANSI((NODE *np));
49 %}
50 
51 %union	{
52 	NODE	*node;
53 };
54 
55 /*
56  * Do not use any character constants as tokens, so the resulting C file
57  * is codeset independent.
58  *
59  * Declare terminal symbols before their operator
60  * precedences to get them in a contiguous block
61  * for giant switches in action() and exprreduce().
62  */
63 /* Tokens from exprreduce() */
64 %token	<node>	PARM ARRAY UFUNC FIELD IN INDEX CONCAT
65 %token	<node>	NOT AND OR EXP QUEST
66 %token	<node>	EQ NE GE LE GT LT
67 %token	<node>	ADD SUB MUL DIV REM INC DEC PRE_INC PRE_DEC
68 %token	<node>	GETLINE CALLFUNC RE TILDE NRE
69 
70 /* Tokens shared by exprreduce() and action() */
71 %token		ASG
72 
73 /* Tokens from action() */
74 %token	<node>	PRINT PRINTF
75 %token	<node>	EXIT RETURN BREAK CONTINUE NEXT
76 %token	<node>	DELETE WHILE DO FOR FORIN IF
77 
78 /*
79  * Terminal symbols not used in action() and exprrreduce()
80  * switch statements.
81  */
82 %token	<node>	CONSTANT VAR FUNC
83 %token	<node>	DEFFUNC BEGIN END CLOSE ELSE PACT
84 %right		ELSE
85 %token		DOT CALLUFUNC
86 
87 /*
88  * Tokens not used in grammar
89  */
90 %token		KEYWORD SVAR
91 %token		PIPESYM
92 
93 /*
94  * Tokens representing character constants
95  * TILDE, '~', taken care of above
96  */
97 %token BAR		/* '|' */
98        CARAT		/* '^' */
99        LANGLE		/* '<' */
100        RANGLE		/* '>' */
101        PLUSC		/* '+' */
102        HYPHEN		/* '-' */
103        STAR		/* '*' */
104        SLASH		/* '/' */
105        PERCENT		/* '%' */
106        EXCLAMATION	/* '!' */
107        DOLLAR		/* '$' */
108        LSQUARE		/* '[' */
109        RSQUARE		/* ']' */
110        LPAREN		/* '(' */
111        RPAREN		/* ')' */
112        SEMI		/* ';' */
113        LBRACE		/* '{' */
114        RBRACE		/* '}' */
115 
116 /*
117  * Priorities of operators
118  * Lowest to highest
119  */
120 %left	COMMA
121 %right	BAR PIPE WRITE APPEND
122 %right	ASG AADD ASUB AMUL ADIV AREM AEXP
123 %right	QUEST COLON
124 %left	OR
125 %left	AND
126 %left	IN
127 %left	CARAT
128 %left	TILDE NRE
129 %left	EQ NE LANGLE RANGLE GE LE
130 %left	CONCAT
131 %left	PLUSC HYPHEN
132 %left	STAR SLASH PERCENT
133 %right	UPLUS UMINUS
134 %right	EXCLAMATION
135 %right	EXP
136 %right	INC DEC URE
137 %left	DOLLAR LSQUARE RSQUARE
138 %left	LPAREN RPAREN
139 
140 %type	<node>	prog rule pattern expr rvalue lvalue fexpr varlist varlist2
141 %type	<node>	statement statlist fileout exprlist eexprlist simplepattern
142 %type	<node>	getline optvar var
143 %type	<node>	dummy
144 
145 %start	dummy
146 %%
147 
148 dummy:
149 		prog			= {
150 			yytree = fliplist(yytree);
151 		}
152 		;
153 prog:
154 	  rule				= {
155 		yytree = $1;
156 	}
157 	| rule SEMI prog		= {
158 		if ($1 != NNULL) {
159 			if (yytree != NNULL)
160 				yytree = node(COMMA, $1, yytree); else
161 				yytree = $1;
162 		}
163 	}
164 	;
165 
166 rule:	  pattern LBRACE statlist RBRACE	= {
167 		$$ = node(PACT, $1, $3);
168 		doing_begin = 0;
169 	}
170 	| LBRACE statlist RBRACE		= {
171 		npattern++;
172 		$$ = node(PACT, NNULL, $2);
173 	}
174 	| pattern				= {
175 		$$ = node(PACT, $1, node(PRINT, NNULL, NNULL));
176 		doing_begin = 0;
177 	}
178 	| DEFFUNC VAR
179 		{ $2->n_type = UFUNC; funparm = 1; }
180 	    LPAREN varlist RPAREN
181 		{ funparm = 0; }
182 	    LBRACE statlist { uexit($5); } RBRACE = {
183 		$2->n_ufunc = node(DEFFUNC, $5, fliplist($9));
184 		$$ = NNULL;
185 	}
186 	| DEFFUNC UFUNC				= {
187 		awkerr((char *) gettext("function \"%S\" redefined"), $2->n_name);
188 		/* NOTREACHED */
189 	}
190 	|					= {
191 		$$ = NNULL;
192 	}
193 	;
194 
195 pattern:
196 	  simplepattern
197 	| expr COMMA expr			= {
198 		++npattern;
199 		$$ = node(COMMA, $1, $3);
200 	}
201 	;
202 
203 simplepattern:
204 	  BEGIN					= {
205 		$$ = node(BEGIN, NNULL, NNULL);
206 		doing_begin++;
207 	}
208 	| END					= {
209 		++npattern;
210 		$$ = node(END, NNULL, NNULL);
211 	}
212 	| expr					 = {
213 		++npattern;
214 		$$ = $1;
215 	}
216 	;
217 
218 eexprlist:
219 	  exprlist
220 	|					= {
221 		$$ = NNULL;
222 	}
223 	;
224 
225 exprlist:
226 	  expr %prec COMMA
227 	| exprlist COMMA expr			= {
228 		$$ = node(COMMA, $1, $3);
229 	}
230 	;
231 
232 varlist:
233 	  					= {
234 		$$ = NNULL;
235 	}
236 	| varlist2
237 	;
238 
239 varlist2:
240 	  var
241 	| var COMMA varlist2			= {
242 		$$ = node(COMMA, $1, $3);
243 	}
244 	;
245 
246 fexpr:
247 	  expr
248 	|					= {
249 		$$ = NNULL;
250 	}
251 	;
252 
253 /*
254  * Normal expression (includes regular expression)
255  */
256 expr:
257 	  expr PLUSC expr			= {
258 		$$ = node(ADD, $1, $3);
259 	}
260 	| expr HYPHEN expr			= {
261 		$$ = node(SUB, $1, $3);
262 	}
263 	| expr STAR expr			= {
264 		$$ = node(MUL, $1, $3);
265 	}
266 	| expr SLASH expr			= {
267 		$$ = node(DIV, $1, $3);
268 	}
269 	| expr PERCENT expr			= {
270 		$$ = node(REM, $1, $3);
271 	}
272 	| expr EXP expr				= {
273 		$$ = node(EXP, $1, $3);
274 	}
275 	| expr AND expr				= {
276 		$$ = node(AND, $1, $3);
277 	}
278 	| expr OR expr				= {
279 		$$ = node(OR, $1, $3);
280 	}
281 	| expr QUEST expr COLON expr		= {
282 		$$ = node(QUEST, $1, node(COLON, $3, $5));
283 	}
284 	| lvalue ASG expr			= {
285 		$$ = node(ASG, $1, $3);
286 	}
287 	| lvalue AADD expr			= {
288 		$$ = node(AADD, $1, $3);
289 	}
290 	| lvalue ASUB expr			= {
291 		$$ = node(ASUB, $1, $3);
292 	}
293 	| lvalue AMUL expr			= {
294 		$$ = node(AMUL, $1, $3);
295 	}
296 	| lvalue ADIV expr			= {
297 		$$ = node(ADIV, $1, $3);
298 	}
299 	| lvalue AREM expr			= {
300 		$$ = node(AREM, $1, $3);
301 	}
302 	| lvalue AEXP expr			= {
303 		$$ = node(AEXP, $1, $3);
304 	}
305 	| lvalue INC				= {
306 		$$ = node(INC, $1, NNULL);
307 	}
308 	| lvalue DEC				= {
309 		$$ = node(DEC, $1, NNULL);
310 	}
311 	| expr EQ expr				= {
312 		$$ = node(EQ, $1, $3);
313 	}
314 	| expr NE expr				= {
315 		$$ = node(NE, $1, $3);
316 	}
317 	| expr RANGLE expr			= {
318 		$$ = node(GT, $1, $3);
319 	}
320 	| expr LANGLE expr			= {
321 		$$ = node(LT, $1, $3);
322 	}
323 	| expr GE expr				= {
324 		$$ = node(GE, $1, $3);
325 	}
326 	| expr LE expr				= {
327 		$$ = node(LE, $1, $3);
328 	}
329 	| expr TILDE expr			= {
330 		$$ = node(TILDE, $1, $3);
331 	}
332 	| expr NRE expr				= {
333 		$$ = node(NRE, $1, $3);
334 	}
335 	| expr IN var				= {
336 		$$ = node(IN, $3, $1);
337 	}
338 	| LPAREN exprlist RPAREN IN var		= {
339 		$$ = node(IN, $5, $2);
340 	}
341 	| getline
342 	| rvalue
343 	| expr CONCAT expr			= {
344 		$$ = node(CONCAT, $1, $3);
345 	}
346 	;
347 
348 lvalue:
349 	  DOLLAR rvalue				= {
350 		$$ = node(FIELD, $2, NNULL);
351 	}
352 	/*
353 	 * Prevents conflict with FOR LPAREN var IN var RPAREN production
354 	 */
355 	| var %prec COMMA
356 	| var LSQUARE exprlist RSQUARE		= {
357 		$$ = node(INDEX, $1, $3);
358 	}
359 	;
360 
361 var:
362 	  VAR
363 	| PARM
364 	;
365 
366 rvalue:
367 	  lvalue %prec COMMA
368 	| CONSTANT
369 	| LPAREN expr RPAREN term		= {
370 		$$ = $2;
371 	}
372 	| EXCLAMATION expr			= {
373 		$$ = node(NOT, $2, NNULL);
374 	}
375 	| HYPHEN expr %prec UMINUS		= {
376 		$$ = node(SUB, const0, $2);
377 	}
378 	| PLUSC expr %prec UPLUS		= {
379 		$$ = $2;
380 	}
381 	| DEC lvalue				= {
382 		$$ = node(PRE_DEC, $2, NNULL);
383 	}
384 	| INC lvalue				= {
385 		$$ = node(PRE_INC, $2, NNULL);
386 	}
387 	| FUNC					= {
388 		$$ = node(CALLFUNC, $1, NNULL);
389 	}
390 	| FUNC LPAREN eexprlist RPAREN term	= {
391 		$$ = node(CALLFUNC, $1, $3);
392 	}
393 	| UFUNC LPAREN eexprlist RPAREN term	= {
394 		$$ = node(CALLUFUNC, $1, $3);
395 	}
396 	| VAR LPAREN eexprlist RPAREN term	= {
397 		$$ = node(CALLUFUNC, $1, $3);
398 	}
399 	| SLASH {redelim='/';} URE SLASH %prec URE	= {
400 		$$ = $<node>3;
401 	}
402 	;
403 
404 statement:
405 	  FOR LPAREN fexpr SEMI fexpr SEMI fexpr RPAREN statement = {
406 		$$ = node(FOR, node(COMMA, $3, node(COMMA, $5, $7)), $9);
407 	}
408 	| FOR LPAREN var IN var RPAREN statement = {
409 		register NODE *np;
410 
411 		/*
412 		 * attempt to optimize statements for the form
413 		 *    for (i in x) delete x[i]
414 		 * to
415 		 *    delete x
416 		 */
417 		np = $7;
418 		if (np != NNULL
419 		 && np->n_type == DELETE
420 		 && (np = np->n_left)->n_type == INDEX
421 		 && np->n_left == $5
422 		 && np->n_right == $3)
423 			$$ = node(DELETE, $5, NNULL);
424 		else
425 			$$ = node(FORIN, node(IN, $3, $5), $7);
426 	}
427 	| WHILE LPAREN expr RPAREN statement	= {
428 		$$ = node(WHILE, $3, $5);
429 	}
430 	| DO statement WHILE LPAREN expr RPAREN	= {
431 		$$ = node(DO, $5, $2);
432 	}
433 	| IF LPAREN expr RPAREN statement ELSE statement = {
434 		$$ = node(IF, $3, node(ELSE, $5, $7));
435 	}
436 	| IF LPAREN expr RPAREN statement %prec ELSE	= {
437 		$$ = node(IF, $3, node(ELSE, $5, NNULL));
438 	}
439 	| CONTINUE SEMI				= {
440 		$$ = node(CONTINUE, NNULL, NNULL);
441 	}
442 	| BREAK SEMI				= {
443 		$$ = node(BREAK, NNULL, NNULL);
444 	}
445 	| NEXT SEMI				= {
446 		$$ = node(NEXT, NNULL, NNULL);
447 	}
448 	| DELETE lvalue SEMI			= {
449 		$$ = node(DELETE, $2, NNULL);
450 	}
451 	| RETURN fexpr SEMI			= {
452 		$$ = node(RETURN, $2, NNULL);
453 	}
454 	| EXIT fexpr SEMI			= {
455 		$$ = node(EXIT, $2, NNULL);
456 	}
457 	| PRINT eexprlist fileout SEMI		= {
458 		$$ = node(PRINT, $2, $3);
459 	}
460 	| PRINT LPAREN exprlist RPAREN fileout SEMI	= {
461 		$$ = node(PRINT, $3, $5);
462 	}
463 	| PRINTF exprlist fileout SEMI		= {
464 		$$ = node(PRINTF, $2, $3);
465 	}
466 	| PRINTF LPAREN exprlist RPAREN fileout SEMI	= {
467 		$$ = node(PRINTF, $3, $5);
468 	}
469 	| expr SEMI 				= {
470 		$$ = $1;
471 	}
472 	| SEMI					= {
473 		$$ = NNULL;
474 	}
475 	| LBRACE statlist RBRACE		= {
476 		$$ = $2;
477 	}
478 	;
479 
480 
481 statlist:
482 	  statement
483 	| statlist statement			= {
484 		if ($1 == NNULL)
485 			$$ = $2;
486 		else if ($2 == NNULL)
487 			$$ = $1;
488 		else
489 			$$ = node(COMMA, $1, $2);
490 	}
491 	;
492 
493 fileout:
494 	  WRITE expr				= {
495 		$$ = node(WRITE, $2, NNULL);
496 	}
497 	| APPEND expr				= {
498 		$$ = node(APPEND, $2, NNULL);
499 	}
500 	| PIPE expr				= {
501 		$$ = node(PIPE, $2, NNULL);
502 	}
503 	|					= {
504 		$$ = NNULL;
505 	}
506 	;
507 
508 getline:
509 	  GETLINE optvar %prec WRITE		= {
510 		$$ = node(GETLINE, $2, NNULL);
511 	}
512 	| expr BAR GETLINE optvar		= {
513 		$$ = node(GETLINE, $4, node(PIPESYM, $1, NNULL));
514 	}
515 	| GETLINE optvar LANGLE expr		= {
516 		$$ = node(GETLINE, $2, node(LT, $4, NNULL));
517 	}
518 	;
519 
520 optvar:
521 	  lvalue
522 	|					= {
523 		$$ = NNULL;
524 	}
525 	;
526 
527 term:
528 	  {catterm = 1;}
529 	;
530 %%
531 /*
532  * Flip a left-recursively generated list
533  * so that it can easily be traversed from left
534  * to right without recursion.
535  */
536 static NODE *
537 fliplist(np)
538 register NODE *np;
539 {
540 	int type;
541 
542 	if (np!=NNULL && !isleaf(np->n_flags)
543 #if 0
544 		 && (type = np->n_type)!=FUNC && type!=UFUNC
545 #endif
546 	) {
547 		np->n_right = fliplist(np->n_right);
548 		if ((type=np->n_type)==COMMA) {
549 			register NODE *lp;
550 
551 			while ((lp = np->n_left)!=NNULL && lp->n_type==COMMA) {
552 				register NODE* *spp;
553 
554 				lp->n_right = fliplist(lp->n_right);
555 				for (spp = &lp->n_right;
556 				    *spp != NNULL && (*spp)->n_type==COMMA;
557 				     spp = &(*spp)->n_right)
558 					;
559 				np->n_left = *spp;
560 				*spp = np;
561 				np = lp;
562 			}
563 		}
564 		if (np->n_left != NULL &&
565 		    (type = np->n_left->n_type)!= FUNC && type!=UFUNC)
566 			np->n_left = fliplist(np->n_left);
567 	}
568 	return (np);
569 }
570