xref: /illumos-gate/usr/src/cmd/awk/awk.g.y (revision 9a63ec2733c93e3ee31e95779a4f2f45a57eda59)
1 %{
2 /*
3  * Copyright (C) Lucent Technologies 1997
4  * All Rights Reserved
5  *
6  * Permission to use, copy, modify, and distribute this software and
7  * its documentation for any purpose and without fee is hereby
8  * granted, provided that the above copyright notice appear in all
9  * copies and that both that the copyright notice and this
10  * permission notice and warranty disclaimer appear in supporting
11  * documentation, and that the name Lucent Technologies or any of
12  * its entities not be used in advertising or publicity pertaining
13  * to distribution of the software without specific, written prior
14  * permission.
15  *
16  * LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
17  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
18  * IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
19  * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
20  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
21  * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
22  * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
23  * THIS SOFTWARE.
24  */
25 
26 /*
27  * CDDL HEADER START
28  *
29  * The contents of this file are subject to the terms of the
30  * Common Development and Distribution License, Version 1.0 only
31  * (the "License").  You may not use this file except in compliance
32  * with the License.
33  *
34  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
35  * or http://www.opensolaris.org/os/licensing.
36  * See the License for the specific language governing permissions
37  * and limitations under the License.
38  *
39  * When distributing Covered Code, include this CDDL HEADER in each
40  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
41  * If applicable, add the following below this CDDL HEADER, with the
42  * fields enclosed by brackets "[]" replaced with your own identifying
43  * information: Portions Copyright [yyyy] [name of copyright owner]
44  *
45  * CDDL HEADER END
46  */
47 %}
48 /*
49  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
50  * Use is subject to license terms.
51  */
52 
53 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
54 /*	  All Rights Reserved  	*/
55 
56 %{
57 #include "awk.h"
58 
59 void checkdup(Node *list, Cell *item);
60 int yywrap(void) { return(1); }
61 
62 Node	*beginloc = NULL;
63 Node	*endloc = NULL;
64 int	infunc	= 0;		/* = 1 if in arglist or body of func */
65 int	inloop	= 0;		/* = 1 if in while, for, do */
66 char	*curfname = NULL;	/* current function name */
67 Node	*arglist = NULL;	/* list of args for current function */
68 static void	setfname(Cell *);
69 static int	constnode(Node *);
70 static char	*strnode(Node *);
71 static Node	*notnull(Node *);
72 %}
73 
74 %union {
75 	Node	*p;
76 	Cell	*cp;
77 	int	i;
78 	char	*s;
79 }
80 
81 %token	<i>	FIRSTTOKEN	/* must be first */
82 %token	<p>	PROGRAM PASTAT PASTAT2 XBEGIN XEND
83 %token	<i>	NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
84 %token	<i>	ARRAY
85 %token	<i>	MATCH NOTMATCH MATCHOP
86 %token	<i>	FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE
87 %token	<i>	AND BOR APPEND EQ GE GT LE LT NE IN
88 %token	<i>	ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
89 %token	<i>	SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
90 %token	<i>	ADD MINUS MULT DIVIDE MOD
91 %token	<i>	ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
92 %token	<i>	PRINT PRINTF SPRINTF
93 %token	<p>	ELSE INTEST CONDEXPR
94 %token	<i>	POSTINCR PREINCR POSTDECR PREDECR
95 %token	<cp>	VAR IVAR VARNF CALL NUMBER STRING
96 %token	<s>	REGEXPR
97 
98 %type	<p>	pas pattern ppattern plist pplist patlist prarg term re
99 %type	<p>	pa_pat pa_stat pa_stats
100 %type	<s>	reg_expr
101 %type	<p>	simple_stmt opt_simple_stmt stmt stmtlist
102 %type	<p>	var varname funcname varlist
103 %type	<p>	for if else while
104 %type	<i>	do st
105 %type	<i>	pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
106 %type	<i>	subop print
107 
108 %right	ASGNOP
109 %right	'?'
110 %right	':'
111 %left	BOR
112 %left	AND
113 %left	GETLINE
114 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
115 %left	ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
116 %left	GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
117 %left	PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
118 %left	REGEXPR VAR VARNF IVAR WHILE '('
119 %left	CAT
120 %left	'+' '-'
121 %left	'*' '/' '%'
122 %left	NOT UMINUS UPLUS
123 %right	POWER
124 %right	DECR INCR
125 %left	INDIRECT
126 %token	LASTTOKEN	/* must be last */
127 
128 %%
129 
130 program:
131 	  pas	{ if (errorflag==0)
132 			winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
133 	| error	{ yyclearin; bracecheck(); SYNTAX("bailing out"); }
134 	;
135 
136 and:
137 	  AND | and NL
138 	;
139 
140 bor:
141 	  BOR | bor NL
142 	;
143 
144 comma:
145 	  ',' | comma NL
146 	;
147 
148 do:
149 	  DO | do NL
150 	;
151 
152 else:
153 	  ELSE | else NL
154 	;
155 
156 for:
157 	  FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
158 		{ --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
159 	| FOR '(' opt_simple_stmt ';'  ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
160 		{ --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
161 	| FOR '(' varname IN varname rparen {inloop++;} stmt
162 		{ --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
163 	;
164 
165 funcname:
166 	  VAR	{ setfname($1); }
167 	| CALL	{ setfname($1); }
168 	;
169 
170 if:
171 	  IF '(' pattern rparen		{ $$ = notnull($3); }
172 	;
173 
174 lbrace:
175 	  '{' | lbrace NL
176 	;
177 
178 nl:
179 	  NL | nl NL
180 	;
181 
182 opt_nl:
183 	  /* empty */	{ $$ = 0; }
184 	| nl
185 	;
186 
187 opt_pst:
188 	  /* empty */	{ $$ = 0; }
189 	| pst
190 	;
191 
192 
193 opt_simple_stmt:
194 	  /* empty */			{ $$ = 0; }
195 	| simple_stmt
196 	;
197 
198 pas:
199 	  opt_pst			{ $$ = 0; }
200 	| opt_pst pa_stats opt_pst	{ $$ = $2; }
201 	;
202 
203 pa_pat:
204 	  pattern	{ $$ = notnull($1); }
205 	;
206 
207 pa_stat:
208 	  pa_pat			{ $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
209 	| pa_pat lbrace stmtlist '}'	{ $$ = stat2(PASTAT, $1, $3); }
210 	| pa_pat ',' opt_nl pa_pat		{ $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); }
211 	| pa_pat ',' opt_nl pa_pat lbrace stmtlist '}'	{ $$ = pa2stat($1, $4, $6); }
212 	| lbrace stmtlist '}'		{ $$ = stat2(PASTAT, NIL, $2); }
213 	| XBEGIN lbrace stmtlist '}'
214 		{ beginloc = linkum(beginloc, $3); $$ = 0; }
215 	| XEND lbrace stmtlist '}'
216 		{ endloc = linkum(endloc, $3); $$ = 0; }
217 	| FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
218 		{ infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
219 	;
220 
221 pa_stats:
222 	  pa_stat
223 	| pa_stats opt_pst pa_stat	{ $$ = linkum($1, $3); }
224 	;
225 
226 patlist:
227 	  pattern
228 	| patlist comma pattern		{ $$ = linkum($1, $3); }
229 	;
230 
231 ppattern:
232 	  var ASGNOP ppattern		{ $$ = op2($2, $1, $3); }
233 	| ppattern '?' ppattern ':' ppattern %prec '?'
234 		{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
235 	| ppattern bor ppattern %prec BOR
236 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
237 	| ppattern and ppattern %prec AND
238 		{ $$ = op2(AND, notnull($1), notnull($3)); }
239 	| ppattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
240 	| ppattern MATCHOP ppattern
241 		{ if (constnode($3))
242 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
243 		  else
244 			$$ = op3($2, (Node *)1, $1, $3); }
245 	| ppattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
246 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
247 	| ppattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
248 	| re
249 	| term
250 	;
251 
252 pattern:
253 	  var ASGNOP pattern		{ $$ = op2($2, $1, $3); }
254 	| pattern '?' pattern ':' pattern %prec '?'
255 		{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
256 	| pattern bor pattern %prec BOR
257 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
258 	| pattern and pattern %prec AND
259 		{ $$ = op2(AND, notnull($1), notnull($3)); }
260 	| pattern EQ pattern		{ $$ = op2($2, $1, $3); }
261 	| pattern GE pattern		{ $$ = op2($2, $1, $3); }
262 	| pattern GT pattern		{ $$ = op2($2, $1, $3); }
263 	| pattern LE pattern		{ $$ = op2($2, $1, $3); }
264 	| pattern LT pattern		{ $$ = op2($2, $1, $3); }
265 	| pattern NE pattern		{ $$ = op2($2, $1, $3); }
266 	| pattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
267 	| pattern MATCHOP pattern
268 		{ if (constnode($3))
269 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
270 		  else
271 			$$ = op3($2, (Node *)1, $1, $3); }
272 	| pattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
273 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
274 	| pattern '|' GETLINE var	{
275 			if (safe) SYNTAX("cmd | getline is unsafe");
276 			else $$ = op3(GETLINE, $4, itonp($2), $1); }
277 	| pattern '|' GETLINE		{
278 			if (safe) SYNTAX("cmd | getline is unsafe");
279 			else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
280 	| pattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
281 	| re
282 	| term
283 	;
284 
285 plist:
286 	  pattern comma pattern		{ $$ = linkum($1, $3); }
287 	| plist comma pattern		{ $$ = linkum($1, $3); }
288 	;
289 
290 pplist:
291 	  ppattern
292 	| pplist comma ppattern		{ $$ = linkum($1, $3); }
293 	;
294 
295 prarg:
296 	  /* empty */			{ $$ = rectonode(); }
297 	| pplist
298 	| '(' plist ')'			{ $$ = $2; }
299 	;
300 
301 print:
302 	  PRINT | PRINTF
303 	;
304 
305 pst:
306 	  NL | ';' | pst NL | pst ';'
307 	;
308 
309 rbrace:
310 	  '}' | rbrace NL
311 	;
312 
313 re:
314 	   reg_expr
315 		{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
316 	| NOT re	{ $$ = op1(NOT, notnull($2)); }
317 	;
318 
319 reg_expr:
320 	  '/' {startreg();} REGEXPR '/'		{ $$ = $3; }
321 	;
322 
323 rparen:
324 	  ')' | rparen NL
325 	;
326 
327 simple_stmt:
328 	  print prarg '|' term		{
329 			if (safe) SYNTAX("print | is unsafe");
330 			else $$ = stat3($1, $2, itonp($3), $4); }
331 	| print prarg APPEND term	{
332 			if (safe) SYNTAX("print >> is unsafe");
333 			else $$ = stat3($1, $2, itonp($3), $4); }
334 	| print prarg GT term		{
335 			if (safe) SYNTAX("print > is unsafe");
336 			else $$ = stat3($1, $2, itonp($3), $4); }
337 	| print prarg			{ $$ = stat3($1, $2, NIL, NIL); }
338 	| DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
339 	| DELETE varname		{ $$ = stat2(DELETE, makearr($2), 0); }
340 	| pattern			{ $$ = exptostat($1); }
341 	| error				{ yyclearin; SYNTAX("illegal statement"); }
342 	;
343 
344 st:
345 	  nl
346 	| ';' opt_nl
347 	;
348 
349 stmt:
350 	  BREAK st		{ if (!inloop) SYNTAX("break illegal outside of loops");
351 				  $$ = stat1(BREAK, NIL); }
352 	| CONTINUE st		{  if (!inloop) SYNTAX("continue illegal outside of loops");
353 				  $$ = stat1(CONTINUE, NIL); }
354 	| do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
355 		{ $$ = stat2(DO, $3, notnull($7)); }
356 	| EXIT pattern st	{ $$ = stat1(EXIT, $2); }
357 	| EXIT st		{ $$ = stat1(EXIT, NIL); }
358 	| for
359 	| if stmt else stmt	{ $$ = stat3(IF, $1, $2, $4); }
360 	| if stmt		{ $$ = stat3(IF, $1, $2, NIL); }
361 	| lbrace stmtlist rbrace { $$ = $2; }
362 	| NEXT st	{ if (infunc)
363 				SYNTAX("next is illegal inside a function");
364 			  $$ = stat1(NEXT, NIL); }
365 	| NEXTFILE st	{ if (infunc)
366 				SYNTAX("nextfile is illegal inside a function");
367 			  $$ = stat1(NEXTFILE, NIL); }
368 	| RETURN pattern st	{ $$ = stat1(RETURN, $2); }
369 	| RETURN st		{ $$ = stat1(RETURN, NIL); }
370 	| simple_stmt st
371 	| while {inloop++;} stmt	{ --inloop; $$ = stat2(WHILE, $1, $3); }
372 	| ';' opt_nl		{ $$ = 0; }
373 	;
374 
375 stmtlist:
376 	  stmt
377 	| stmtlist stmt		{ $$ = linkum($1, $2); }
378 	;
379 
380 subop:
381 	  SUB | GSUB
382 	;
383 
384 term:
385 	  term '/' ASGNOP term		{ $$ = op2(DIVEQ, $1, $4); }
386 	| term '+' term			{ $$ = op2(ADD, $1, $3); }
387 	| term '-' term			{ $$ = op2(MINUS, $1, $3); }
388 	| term '*' term			{ $$ = op2(MULT, $1, $3); }
389 	| term '/' term			{ $$ = op2(DIVIDE, $1, $3); }
390 	| term '%' term			{ $$ = op2(MOD, $1, $3); }
391 	| term POWER term		{ $$ = op2(POWER, $1, $3); }
392 	| '-' term %prec UMINUS		{ $$ = op1(UMINUS, $2); }
393 	| '+' term %prec UMINUS		{ $$ = op1(UPLUS, $2); }
394 	| NOT term %prec UMINUS		{ $$ = op1(NOT, notnull($2)); }
395 	| BLTIN '(' ')'			{ $$ = op2(BLTIN, itonp($1), rectonode()); }
396 	| BLTIN '(' patlist ')'		{ $$ = op2(BLTIN, itonp($1), $3); }
397 	| BLTIN				{ $$ = op2(BLTIN, itonp($1), rectonode()); }
398 	| CALL '(' ')'			{ $$ = op2(CALL, celltonode($1,CVAR), NIL); }
399 	| CALL '(' patlist ')'		{ $$ = op2(CALL, celltonode($1,CVAR), $3); }
400 	| CLOSE term			{ $$ = op1(CLOSE, $2); }
401 	| DECR var			{ $$ = op1(PREDECR, $2); }
402 	| INCR var			{ $$ = op1(PREINCR, $2); }
403 	| var DECR			{ $$ = op1(POSTDECR, $1); }
404 	| var INCR			{ $$ = op1(POSTINCR, $1); }
405 	| GETLINE var LT term		{ $$ = op3(GETLINE, $2, itonp($3), $4); }
406 	| GETLINE LT term		{ $$ = op3(GETLINE, NIL, itonp($2), $3); }
407 	| GETLINE var			{ $$ = op3(GETLINE, $2, NIL, NIL); }
408 	| GETLINE			{ $$ = op3(GETLINE, NIL, NIL, NIL); }
409 	| INDEX '(' pattern comma pattern ')'
410 		{ $$ = op2(INDEX, $3, $5); }
411 	| INDEX '(' pattern comma reg_expr ')'
412 		{ SYNTAX("index() doesn't permit regular expressions");
413 		  $$ = op2(INDEX, $3, (Node*)$5); }
414 	| '(' pattern ')'		{ $$ = $2; }
415 	| MATCHFCN '(' pattern comma reg_expr ')'
416 		{ $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
417 	| MATCHFCN '(' pattern comma pattern ')'
418 		{ if (constnode($5))
419 			$$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
420 		  else
421 			$$ = op3(MATCHFCN, (Node *)1, $3, $5); }
422 	| NUMBER			{ $$ = celltonode($1, CCON); }
423 	| SPLIT '(' pattern comma varname comma pattern ')'     /* string */
424 		{ $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
425 	| SPLIT '(' pattern comma varname comma reg_expr ')'    /* const /regexp/ */
426 		{ $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
427 	| SPLIT '(' pattern comma varname ')'
428 		{ $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); }  /* default */
429 	| SPRINTF '(' patlist ')'	{ $$ = op1($1, $3); }
430 	| STRING			{ $$ = celltonode($1, CCON); }
431 	| subop '(' reg_expr comma pattern ')'
432 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
433 	| subop '(' pattern comma pattern ')'
434 		{ if (constnode($3))
435 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
436 		  else
437 			$$ = op4($1, (Node *)1, $3, $5, rectonode()); }
438 	| subop '(' reg_expr comma pattern comma var ')'
439 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
440 	| subop '(' pattern comma pattern comma var ')'
441 		{ if (constnode($3))
442 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
443 		  else
444 			$$ = op4($1, (Node *)1, $3, $5, $7); }
445 	| SUBSTR '(' pattern comma pattern comma pattern ')'
446 		{ $$ = op3(SUBSTR, $3, $5, $7); }
447 	| SUBSTR '(' pattern comma pattern ')'
448 		{ $$ = op3(SUBSTR, $3, $5, NIL); }
449 	| var
450 	;
451 
452 var:
453 	  varname
454 	| varname '[' patlist ']'	{ $$ = op2(ARRAY, makearr($1), $3); }
455 	| IVAR				{ $$ = op1(INDIRECT, celltonode($1, CVAR)); }
456 	| INDIRECT term			{ $$ = op1(INDIRECT, $2); }
457 	;
458 
459 varlist:
460 	  /* nothing */		{ arglist = $$ = 0; }
461 	| VAR			{ arglist = $$ = celltonode($1,CVAR); }
462 	| varlist comma VAR	{
463 			checkdup($1, $3);
464 			arglist = $$ = linkum($1,celltonode($3,CVAR)); }
465 	;
466 
467 varname:
468 	  VAR			{ $$ = celltonode($1, CVAR); }
469 	| ARG			{ $$ = op1(ARG, itonp($1)); }
470 	| VARNF			{ $$ = op1(VARNF, (Node *) $1); }
471 	;
472 
473 
474 while:
475 	  WHILE '(' pattern rparen	{ $$ = notnull($3); }
476 	;
477 
478 %%
479 
480 static void
481 setfname(Cell *p)
482 {
483 	if (isarr(p))
484 		SYNTAX("%s is an array, not a function", p->nval);
485 	else if (isfcn(p))
486 		SYNTAX("you can't define function %s more than once", p->nval);
487 	curfname = p->nval;
488 	p->tval |= FCN;
489 }
490 
491 static int
492 constnode(Node *p)
493 {
494 	return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
495 }
496 
497 static char *
498 strnode(Node *p)
499 {
500 	return ((Cell *)(p->narg[0]))->sval;
501 }
502 
503 static Node *
504 notnull(Node *n)
505 {
506 	switch (n->nobj) {
507 	case LE: case LT: case EQ: case NE: case GT: case GE:
508 	case BOR: case AND: case NOT:
509 		return n;
510 	default:
511 		return op2(NE, n, nullnode);
512 	}
513 }
514 
515 void
516 checkdup(Node *vl, Cell *cp)	/* check if name already in list */
517 {
518 	char *s = cp->nval;
519 	for (; vl; vl = vl->nnext) {
520 		if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
521 			SYNTAX("duplicate argument %s", s);
522 			break;
523 		}
524 	}
525 }
526