xref: /illumos-gate/usr/src/cmd/awk_xpg4/awk.y (revision cdd3e9a818787b4def17c9f707f435885ce0ed31)
1 %{
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License, Version 1.0 only
7  * (the "License").  You may not use this file except in compliance
8  * with the License.
9  *
10  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
11  * or http://www.opensolaris.org/os/licensing.
12  * See the License for the specific language governing permissions
13  * and limitations under the License.
14  *
15  * When distributing Covered Code, include this CDDL HEADER in each
16  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
17  * If applicable, add the following below this CDDL HEADER, with the
18  * fields enclosed by brackets "[]" replaced with your own identifying
19  * information: Portions Copyright [yyyy] [name of copyright owner]
20  *
21  * CDDL HEADER END
22  */
23 /*
24  * awk -- YACC grammar
25  *
26  * Copyright (c) 1995 by Sun Microsystems, Inc.
27  *
28  * Copyright 1986, 1992 by Mortice Kern Systems Inc.  All rights reserved.
29  *
30  * This Software is unpublished, valuable, confidential property of
31  * Mortice Kern Systems Inc.  Use is authorized only in accordance
32  * with the terms and conditions of the source licence agreement
33  * protecting this Software.  Any unauthorized use or disclosure of
34  * this Software is strictly prohibited and will result in the
35  * termination of the licence agreement.
36  *
37  * NOTE: this grammar correctly produces NO shift/reduce conflicts from YACC.
38  *
39  */
40 
41 /*
42  * Do not use any character constants as tokens, so the resulting C file
43  * is codeset independent.
44  */
45 
46 #include "awk.h"
47 static NODE * fliplist ANSI((NODE *np));
48 %}
49 
50 %union	{
51 	NODE	*node;
52 };
53 
54 /*
55  * Do not use any character constants as tokens, so the resulting C file
56  * is codeset independent.
57  *
58  * Declare terminal symbols before their operator
59  * precedences to get them in a contiguous block
60  * for giant switches in action() and exprreduce().
61  */
62 /* Tokens from exprreduce() */
63 %token	<node>	PARM ARRAY UFUNC FIELD IN INDEX CONCAT
64 %token	<node>	NOT AND OR EXP QUEST
65 %token	<node>	EQ NE GE LE GT LT
66 %token	<node>	ADD SUB MUL DIV REM INC DEC PRE_INC PRE_DEC
67 %token	<node>	GETLINE CALLFUNC RE TILDE NRE
68 
69 /* Tokens shared by exprreduce() and action() */
70 %token		ASG
71 
72 /* Tokens from action() */
73 %token	<node>	PRINT PRINTF
74 %token	<node>	EXIT RETURN BREAK CONTINUE NEXT
75 %token	<node>	DELETE WHILE DO FOR FORIN IF
76 
77 /*
78  * Terminal symbols not used in action() and exprrreduce()
79  * switch statements.
80  */
81 %token	<node>	CONSTANT VAR FUNC
82 %token	<node>	DEFFUNC BEGIN END CLOSE ELSE PACT
83 %right		ELSE
84 %token		DOT CALLUFUNC
85 
86 /*
87  * Tokens not used in grammar
88  */
89 %token		KEYWORD SVAR
90 %token		PIPESYM
91 
92 /*
93  * Tokens representing character constants
94  * TILDE, '~', taken care of above
95  */
96 %token BAR		/* '|' */
97        CARAT		/* '^' */
98        LANGLE		/* '<' */
99        RANGLE		/* '>' */
100        PLUSC		/* '+' */
101        HYPHEN		/* '-' */
102        STAR		/* '*' */
103        SLASH		/* '/' */
104        PERCENT		/* '%' */
105        EXCLAMATION	/* '!' */
106        DOLLAR		/* '$' */
107        LSQUARE		/* '[' */
108        RSQUARE		/* ']' */
109        LPAREN		/* '(' */
110        RPAREN		/* ')' */
111        SEMI		/* ';' */
112        LBRACE		/* '{' */
113        RBRACE		/* '}' */
114 
115 /*
116  * Priorities of operators
117  * Lowest to highest
118  */
119 %left	COMMA
120 %right	BAR PIPE WRITE APPEND
121 %right	ASG AADD ASUB AMUL ADIV AREM AEXP
122 %right	QUEST COLON
123 %left	OR
124 %left	AND
125 %left	IN
126 %left	CARAT
127 %left	TILDE NRE
128 %left	EQ NE LANGLE RANGLE GE LE
129 %left	CONCAT
130 %left	PLUSC HYPHEN
131 %left	STAR SLASH PERCENT
132 %right	UPLUS UMINUS
133 %right	EXCLAMATION
134 %right	EXP
135 %right	INC DEC URE
136 %left	DOLLAR LSQUARE RSQUARE
137 %left	LPAREN RPAREN
138 
139 %type	<node>	prog rule pattern expr rvalue lvalue fexpr varlist varlist2
140 %type	<node>	statement statlist fileout exprlist eexprlist simplepattern
141 %type	<node>	getline optvar var
142 %type	<node>	dummy
143 
144 %start	dummy
145 %%
146 
147 dummy:
148 		prog			{
149 			yytree = fliplist(yytree);
150 		}
151 		;
152 prog:
153 	  rule				{
154 		yytree = $1;
155 	}
156 	| rule SEMI prog		{
157 		if ($1 != NNULL) {
158 			if (yytree != NNULL)
159 				yytree = node(COMMA, $1, yytree); else
160 				yytree = $1;
161 		}
162 	}
163 	;
164 
165 rule:	  pattern LBRACE statlist RBRACE	{
166 		$$ = node(PACT, $1, $3);
167 		doing_begin = 0;
168 	}
169 	| LBRACE statlist RBRACE		{
170 		npattern++;
171 		$$ = node(PACT, NNULL, $2);
172 	}
173 	| pattern				{
174 		$$ = node(PACT, $1, node(PRINT, NNULL, NNULL));
175 		doing_begin = 0;
176 	}
177 	| DEFFUNC VAR
178 		{ $2->n_type = UFUNC; funparm = 1; }
179 	    LPAREN varlist RPAREN
180 		{ funparm = 0; }
181 	    LBRACE statlist { uexit($5); } RBRACE {
182 		$2->n_ufunc = node(DEFFUNC, $5, fliplist($9));
183 		$$ = NNULL;
184 	}
185 	| DEFFUNC UFUNC				{
186 		awkerr((char *) gettext("function \"%S\" redefined"), $2->n_name);
187 		/* NOTREACHED */
188 	}
189 	|					{
190 		$$ = NNULL;
191 	}
192 	;
193 
194 pattern:
195 	  simplepattern
196 	| expr COMMA expr			{
197 		++npattern;
198 		$$ = node(COMMA, $1, $3);
199 	}
200 	;
201 
202 simplepattern:
203 	  BEGIN					{
204 		$$ = node(BEGIN, NNULL, NNULL);
205 		doing_begin++;
206 	}
207 	| END					{
208 		++npattern;
209 		$$ = node(END, NNULL, NNULL);
210 	}
211 	| expr					 {
212 		++npattern;
213 		$$ = $1;
214 	}
215 	;
216 
217 eexprlist:
218 	  exprlist
219 	|					{
220 		$$ = NNULL;
221 	}
222 	;
223 
224 exprlist:
225 	  expr %prec COMMA
226 	| exprlist COMMA expr			{
227 		$$ = node(COMMA, $1, $3);
228 	}
229 	;
230 
231 varlist:
232 						{
233 		$$ = NNULL;
234 	}
235 	| varlist2
236 	;
237 
238 varlist2:
239 	  var
240 	| var COMMA varlist2			{
241 		$$ = node(COMMA, $1, $3);
242 	}
243 	;
244 
245 fexpr:
246 	  expr
247 	|					{
248 		$$ = NNULL;
249 	}
250 	;
251 
252 /*
253  * Normal expression (includes regular expression)
254  */
255 expr:
256 	  expr PLUSC expr			{
257 		$$ = node(ADD, $1, $3);
258 	}
259 	| expr HYPHEN expr			{
260 		$$ = node(SUB, $1, $3);
261 	}
262 	| expr STAR expr			{
263 		$$ = node(MUL, $1, $3);
264 	}
265 	| expr SLASH expr			{
266 		$$ = node(DIV, $1, $3);
267 	}
268 	| expr PERCENT expr			{
269 		$$ = node(REM, $1, $3);
270 	}
271 	| expr EXP expr				{
272 		$$ = node(EXP, $1, $3);
273 	}
274 	| expr AND expr				{
275 		$$ = node(AND, $1, $3);
276 	}
277 	| expr OR expr				{
278 		$$ = node(OR, $1, $3);
279 	}
280 	| expr QUEST expr COLON expr		{
281 		$$ = node(QUEST, $1, node(COLON, $3, $5));
282 	}
283 	| lvalue ASG expr			{
284 		$$ = node(ASG, $1, $3);
285 	}
286 	| lvalue AADD expr			{
287 		$$ = node(AADD, $1, $3);
288 	}
289 	| lvalue ASUB expr			{
290 		$$ = node(ASUB, $1, $3);
291 	}
292 	| lvalue AMUL expr			{
293 		$$ = node(AMUL, $1, $3);
294 	}
295 	| lvalue ADIV expr			{
296 		$$ = node(ADIV, $1, $3);
297 	}
298 	| lvalue AREM expr			{
299 		$$ = node(AREM, $1, $3);
300 	}
301 	| lvalue AEXP expr			{
302 		$$ = node(AEXP, $1, $3);
303 	}
304 	| lvalue INC				{
305 		$$ = node(INC, $1, NNULL);
306 	}
307 	| lvalue DEC				{
308 		$$ = node(DEC, $1, NNULL);
309 	}
310 	| expr EQ expr				{
311 		$$ = node(EQ, $1, $3);
312 	}
313 	| expr NE expr				{
314 		$$ = node(NE, $1, $3);
315 	}
316 	| expr RANGLE expr			{
317 		$$ = node(GT, $1, $3);
318 	}
319 	| expr LANGLE expr			{
320 		$$ = node(LT, $1, $3);
321 	}
322 	| expr GE expr				{
323 		$$ = node(GE, $1, $3);
324 	}
325 	| expr LE expr				{
326 		$$ = node(LE, $1, $3);
327 	}
328 	| expr TILDE expr			{
329 		$$ = node(TILDE, $1, $3);
330 	}
331 	| expr NRE expr				{
332 		$$ = node(NRE, $1, $3);
333 	}
334 	| expr IN var				{
335 		$$ = node(IN, $3, $1);
336 	}
337 	| LPAREN exprlist RPAREN IN var		{
338 		$$ = node(IN, $5, $2);
339 	}
340 	| getline
341 	| rvalue
342 	| expr CONCAT expr			{
343 		$$ = node(CONCAT, $1, $3);
344 	}
345 	;
346 
347 lvalue:
348 	  DOLLAR rvalue				{
349 		$$ = node(FIELD, $2, NNULL);
350 	}
351 	/*
352 	 * Prevents conflict with FOR LPAREN var IN var RPAREN production
353 	 */
354 	| var %prec COMMA
355 	| var LSQUARE exprlist RSQUARE		{
356 		$$ = node(INDEX, $1, $3);
357 	}
358 	;
359 
360 var:
361 	  VAR
362 	| PARM
363 	;
364 
365 rvalue:
366 	  lvalue %prec COMMA
367 	| CONSTANT
368 	| LPAREN expr RPAREN term		{
369 		$$ = $2;
370 	}
371 	| EXCLAMATION expr			{
372 		$$ = node(NOT, $2, NNULL);
373 	}
374 	| HYPHEN expr %prec UMINUS		{
375 		$$ = node(SUB, const0, $2);
376 	}
377 	| PLUSC expr %prec UPLUS		{
378 		$$ = $2;
379 	}
380 	| DEC lvalue				{
381 		$$ = node(PRE_DEC, $2, NNULL);
382 	}
383 	| INC lvalue				{
384 		$$ = node(PRE_INC, $2, NNULL);
385 	}
386 	| FUNC					{
387 		$$ = node(CALLFUNC, $1, NNULL);
388 	}
389 	| FUNC LPAREN eexprlist RPAREN term	{
390 		$$ = node(CALLFUNC, $1, $3);
391 	}
392 	| UFUNC LPAREN eexprlist RPAREN term	{
393 		$$ = node(CALLUFUNC, $1, $3);
394 	}
395 	| VAR LPAREN eexprlist RPAREN term	{
396 		$$ = node(CALLUFUNC, $1, $3);
397 	}
398 	| SLASH {redelim='/';} URE SLASH %prec URE	{
399 		$$ = $<node>3;
400 	}
401 	;
402 
403 statement:
404 	  FOR LPAREN fexpr SEMI fexpr SEMI fexpr RPAREN statement {
405 		$$ = node(FOR, node(COMMA, $3, node(COMMA, $5, $7)), $9);
406 	}
407 	| FOR LPAREN var IN var RPAREN statement {
408 		register NODE *np;
409 
410 		/*
411 		 * attempt to optimize statements for the form
412 		 *    for (i in x) delete x[i]
413 		 * to
414 		 *    delete x
415 		 */
416 		np = $7;
417 		if (np != NNULL
418 		 && np->n_type == DELETE
419 		 && (np = np->n_left)->n_type == INDEX
420 		 && np->n_left == $5
421 		 && np->n_right == $3)
422 			$$ = node(DELETE, $5, NNULL);
423 		else
424 			$$ = node(FORIN, node(IN, $3, $5), $7);
425 	}
426 	| WHILE LPAREN expr RPAREN statement	{
427 		$$ = node(WHILE, $3, $5);
428 	}
429 	| DO statement WHILE LPAREN expr RPAREN	{
430 		$$ = node(DO, $5, $2);
431 	}
432 	| IF LPAREN expr RPAREN statement ELSE statement {
433 		$$ = node(IF, $3, node(ELSE, $5, $7));
434 	}
435 	| IF LPAREN expr RPAREN statement %prec ELSE	{
436 		$$ = node(IF, $3, node(ELSE, $5, NNULL));
437 	}
438 	| CONTINUE SEMI				{
439 		$$ = node(CONTINUE, NNULL, NNULL);
440 	}
441 	| BREAK SEMI				{
442 		$$ = node(BREAK, NNULL, NNULL);
443 	}
444 	| NEXT SEMI				{
445 		$$ = node(NEXT, NNULL, NNULL);
446 	}
447 	| DELETE lvalue SEMI			{
448 		$$ = node(DELETE, $2, NNULL);
449 	}
450 	| RETURN fexpr SEMI			{
451 		$$ = node(RETURN, $2, NNULL);
452 	}
453 	| EXIT fexpr SEMI			{
454 		$$ = node(EXIT, $2, NNULL);
455 	}
456 	| PRINT eexprlist fileout SEMI		{
457 		$$ = node(PRINT, $2, $3);
458 	}
459 	| PRINT LPAREN exprlist RPAREN fileout SEMI	{
460 		$$ = node(PRINT, $3, $5);
461 	}
462 	| PRINTF exprlist fileout SEMI		{
463 		$$ = node(PRINTF, $2, $3);
464 	}
465 	| PRINTF LPAREN exprlist RPAREN fileout SEMI	{
466 		$$ = node(PRINTF, $3, $5);
467 	}
468 	| expr SEMI				{
469 		$$ = $1;
470 	}
471 	| SEMI					{
472 		$$ = NNULL;
473 	}
474 	| LBRACE statlist RBRACE		{
475 		$$ = $2;
476 	}
477 	;
478 
479 
480 statlist:
481 	  statement
482 	| statlist statement			{
483 		if ($1 == NNULL)
484 			$$ = $2;
485 		else if ($2 == NNULL)
486 			$$ = $1;
487 		else
488 			$$ = node(COMMA, $1, $2);
489 	}
490 	;
491 
492 fileout:
493 	  WRITE expr				{
494 		$$ = node(WRITE, $2, NNULL);
495 	}
496 	| APPEND expr				{
497 		$$ = node(APPEND, $2, NNULL);
498 	}
499 	| PIPE expr				{
500 		$$ = node(PIPE, $2, NNULL);
501 	}
502 	|					{
503 		$$ = NNULL;
504 	}
505 	;
506 
507 getline:
508 	  GETLINE optvar %prec WRITE		{
509 		$$ = node(GETLINE, $2, NNULL);
510 	}
511 	| expr BAR GETLINE optvar		{
512 		$$ = node(GETLINE, $4, node(PIPESYM, $1, NNULL));
513 	}
514 	| GETLINE optvar LANGLE expr		{
515 		$$ = node(GETLINE, $2, node(LT, $4, NNULL));
516 	}
517 	;
518 
519 optvar:
520 	  lvalue
521 	|					{
522 		$$ = NNULL;
523 	}
524 	;
525 
526 term:
527 	  {catterm = 1;}
528 	;
529 %%
530 /*
531  * Flip a left-recursively generated list
532  * so that it can easily be traversed from left
533  * to right without recursion.
534  */
535 static NODE *
536 fliplist(NODE *np)
537 {
538 	int type;
539 
540 	if (np!=NNULL && !isleaf(np->n_flags)
541 #if 0
542 		 && (type = np->n_type)!=FUNC && type!=UFUNC
543 #endif
544 	) {
545 		np->n_right = fliplist(np->n_right);
546 		if ((type=np->n_type)==COMMA) {
547 			register NODE *lp;
548 
549 			while ((lp = np->n_left)!=NNULL && lp->n_type==COMMA) {
550 				register NODE* *spp;
551 
552 				lp->n_right = fliplist(lp->n_right);
553 				for (spp = &lp->n_right;
554 				    *spp != NNULL && (*spp)->n_type==COMMA;
555 				     spp = &(*spp)->n_right)
556 					;
557 				np->n_left = *spp;
558 				*spp = np;
559 				np = lp;
560 			}
561 		}
562 		if (np->n_left != NULL &&
563 		    (type = np->n_left->n_type)!= FUNC && type!=UFUNC)
564 			np->n_left = fliplist(np->n_left);
565 	}
566 	return (np);
567 }
568