xref: /freebsd/contrib/one-true-awk/awkgram.y (revision 3f0efe05432b1633991114ca4ca330102a561959)
1 /****************************************************************
2 Copyright (C) Lucent Technologies 1997
3 All Rights Reserved
4 
5 Permission to use, copy, modify, and distribute this software and
6 its documentation for any purpose and without fee is hereby
7 granted, provided that the above copyright notice appear in all
8 copies and that both that the copyright notice and this
9 permission notice and warranty disclaimer appear in supporting
10 documentation, and that the name Lucent Technologies or any of
11 its entities not be used in advertising or publicity pertaining
12 to distribution of the software without specific, written prior
13 permission.
14 
15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 THIS SOFTWARE.
23 ****************************************************************/
24 
25 %{
26 #include <stdio.h>
27 #include <string.h>
28 #include "awk.h"
29 
30 void checkdup(Node *list, Cell *item);
31 int yywrap(void) { return(1); }
32 
33 Node	*beginloc = 0;
34 Node	*endloc = 0;
35 bool	infunc	= false;	/* = true if in arglist or body of func */
36 int	inloop	= 0;	/* >= 1 if in while, for, do; can't be bool, since loops can next */
37 char	*curfname = 0;	/* current function name */
38 Node	*arglist = 0;	/* list of args for current function */
39 %}
40 
41 %union {
42 	Node	*p;
43 	Cell	*cp;
44 	int	i;
45 	char	*s;
46 }
47 
48 %token	<i>	FIRSTTOKEN	/* must be first */
49 %token	<p>	PROGRAM PASTAT PASTAT2 XBEGIN XEND
50 %token	<i>	NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
51 %token	<i>	ARRAY
52 %token	<i>	MATCH NOTMATCH MATCHOP
53 %token	<i>	FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO
54 %token	<i>	AND BOR APPEND EQ GE GT LE LT NE IN
55 %token	<i>	ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
56 %token	<i>	GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
57 %token	<i>	ADD MINUS MULT DIVIDE MOD
58 %token	<i>	ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
59 %token	<i>	PRINT PRINTF SPRINTF
60 %token	<p>	ELSE INTEST CONDEXPR
61 %token	<i>	POSTINCR PREINCR POSTDECR PREDECR
62 %token	<cp>	VAR IVAR VARNF CALL NUMBER STRING
63 %token	<s>	REGEXPR
64 
65 %type	<p>	pas pattern ppattern plist pplist patlist prarg term re
66 %type	<p>	pa_pat pa_stat pa_stats
67 %type	<s>	reg_expr
68 %type	<p>	simple_stmt opt_simple_stmt stmt stmtlist
69 %type	<p>	var varname funcname varlist
70 %type	<p>	for if else while
71 %type	<i>	do st
72 %type	<i>	pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
73 %type	<i>	subop print
74 %type	<cp>	string
75 
76 %right	ASGNOP
77 %right	'?'
78 %right	':'
79 %left	BOR
80 %left	AND
81 %left	GETLINE
82 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
83 %left	ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
84 %left	GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
85 %left	PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
86 %left	REGEXPR VAR VARNF IVAR WHILE '('
87 %left	CAT
88 %left	'+' '-'
89 %left	'*' '/' '%'
90 %left	NOT UMINUS UPLUS
91 %right	POWER
92 %right	DECR INCR
93 %left	INDIRECT
94 %token	LASTTOKEN	/* must be last */
95 
96 %%
97 
98 program:
99 	  pas	{ if (errorflag==0)
100 			winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
101 	| error	{ yyclearin; bracecheck(); SYNTAX("bailing out"); }
102 	;
103 
104 and:
105 	  AND | and NL
106 	;
107 
108 bor:
109 	  BOR | bor NL
110 	;
111 
112 comma:
113 	  ',' | comma NL
114 	;
115 
116 do:
117 	  DO | do NL
118 	;
119 
120 else:
121 	  ELSE | else NL
122 	;
123 
124 for:
125 	  FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
126 		{ --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
127 	| FOR '(' opt_simple_stmt ';'  ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
128 		{ --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
129 	| FOR '(' varname IN varname rparen {inloop++;} stmt
130 		{ --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
131 	;
132 
133 funcname:
134 	  VAR	{ setfname($1); }
135 	| CALL	{ setfname($1); }
136 	;
137 
138 if:
139 	  IF '(' pattern rparen		{ $$ = notnull($3); }
140 	;
141 
142 lbrace:
143 	  '{' | lbrace NL
144 	;
145 
146 nl:
147 	  NL | nl NL
148 	;
149 
150 opt_nl:
151 	  /* empty */	{ $$ = 0; }
152 	| nl
153 	;
154 
155 opt_pst:
156 	  /* empty */	{ $$ = 0; }
157 	| pst
158 	;
159 
160 
161 opt_simple_stmt:
162 	  /* empty */			{ $$ = 0; }
163 	| simple_stmt
164 	;
165 
166 pas:
167 	  opt_pst			{ $$ = 0; }
168 	| opt_pst pa_stats opt_pst	{ $$ = $2; }
169 	;
170 
171 pa_pat:
172 	  pattern	{ $$ = notnull($1); }
173 	;
174 
175 pa_stat:
176 	  pa_pat			{ $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
177 	| pa_pat lbrace stmtlist '}'	{ $$ = stat2(PASTAT, $1, $3); }
178 	| pa_pat ',' opt_nl pa_pat		{ $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); }
179 	| pa_pat ',' opt_nl pa_pat lbrace stmtlist '}'	{ $$ = pa2stat($1, $4, $6); }
180 	| lbrace stmtlist '}'		{ $$ = stat2(PASTAT, NIL, $2); }
181 	| XBEGIN lbrace stmtlist '}'
182 		{ beginloc = linkum(beginloc, $3); $$ = 0; }
183 	| XEND lbrace stmtlist '}'
184 		{ endloc = linkum(endloc, $3); $$ = 0; }
185 	| FUNC funcname '(' varlist rparen {infunc = true;} lbrace stmtlist '}'
186 		{ infunc = false; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
187 	;
188 
189 pa_stats:
190 	  pa_stat
191 	| pa_stats opt_pst pa_stat	{ $$ = linkum($1, $3); }
192 	;
193 
194 patlist:
195 	  pattern
196 	| patlist comma pattern		{ $$ = linkum($1, $3); }
197 	;
198 
199 ppattern:
200 	  var ASGNOP ppattern		{ $$ = op2($2, $1, $3); }
201 	| ppattern '?' ppattern ':' ppattern %prec '?'
202 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
203 	| ppattern bor ppattern %prec BOR
204 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
205 	| ppattern and ppattern %prec AND
206 		{ $$ = op2(AND, notnull($1), notnull($3)); }
207 	| ppattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); free($3); }
208 	| ppattern MATCHOP ppattern
209 		{ if (constnode($3)) {
210 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
211 			free($3);
212 		  } else
213 			$$ = op3($2, (Node *)1, $1, $3); }
214 	| ppattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
215 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
216 	| ppattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
217 	| re
218 	| term
219 	;
220 
221 pattern:
222 	  var ASGNOP pattern		{ $$ = op2($2, $1, $3); }
223 	| pattern '?' pattern ':' pattern %prec '?'
224 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
225 	| pattern bor pattern %prec BOR
226 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
227 	| pattern and pattern %prec AND
228 		{ $$ = op2(AND, notnull($1), notnull($3)); }
229 	| pattern EQ pattern		{ $$ = op2($2, $1, $3); }
230 	| pattern GE pattern		{ $$ = op2($2, $1, $3); }
231 	| pattern GT pattern		{ $$ = op2($2, $1, $3); }
232 	| pattern LE pattern		{ $$ = op2($2, $1, $3); }
233 	| pattern LT pattern		{ $$ = op2($2, $1, $3); }
234 	| pattern NE pattern		{ $$ = op2($2, $1, $3); }
235 	| pattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); free($3); }
236 	| pattern MATCHOP pattern
237 		{ if (constnode($3)) {
238 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
239 			free($3);
240 		  } else
241 			$$ = op3($2, (Node *)1, $1, $3); }
242 	| pattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
243 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
244 	| pattern '|' GETLINE var	{
245 			if (safe) SYNTAX("cmd | getline is unsafe");
246 			else $$ = op3(GETLINE, $4, itonp($2), $1); }
247 	| pattern '|' GETLINE		{
248 			if (safe) SYNTAX("cmd | getline is unsafe");
249 			else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
250 	| pattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
251 	| re
252 	| term
253 	;
254 
255 plist:
256 	  pattern comma pattern		{ $$ = linkum($1, $3); }
257 	| plist comma pattern		{ $$ = linkum($1, $3); }
258 	;
259 
260 pplist:
261 	  ppattern
262 	| pplist comma ppattern		{ $$ = linkum($1, $3); }
263 	;
264 
265 prarg:
266 	  /* empty */			{ $$ = rectonode(); }
267 	| pplist
268 	| '(' plist ')'			{ $$ = $2; }
269 	;
270 
271 print:
272 	  PRINT | PRINTF
273 	;
274 
275 pst:
276 	  NL | ';' | pst NL | pst ';'
277 	;
278 
279 rbrace:
280 	  '}' | rbrace NL
281 	;
282 
283 re:
284 	   reg_expr
285 		{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); free($1); }
286 	| NOT re	{ $$ = op1(NOT, notnull($2)); }
287 	;
288 
289 reg_expr:
290 	  '/' {startreg();} REGEXPR '/'		{ $$ = $3; }
291 	;
292 
293 rparen:
294 	  ')' | rparen NL
295 	;
296 
297 simple_stmt:
298 	  print prarg '|' term		{
299 			if (safe) SYNTAX("print | is unsafe");
300 			else $$ = stat3($1, $2, itonp($3), $4); }
301 	| print prarg APPEND term	{
302 			if (safe) SYNTAX("print >> is unsafe");
303 			else $$ = stat3($1, $2, itonp($3), $4); }
304 	| print prarg GT term		{
305 			if (safe) SYNTAX("print > is unsafe");
306 			else $$ = stat3($1, $2, itonp($3), $4); }
307 	| print prarg			{ $$ = stat3($1, $2, NIL, NIL); }
308 	| DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
309 	| DELETE varname		 { $$ = stat2(DELETE, makearr($2), 0); }
310 	| pattern			{ $$ = exptostat($1); }
311 	| error				{ yyclearin; SYNTAX("illegal statement"); }
312 	;
313 
314 st:
315 	  nl
316 	| ';' opt_nl
317 	;
318 
319 stmt:
320 	  BREAK st		{ if (!inloop) SYNTAX("break illegal outside of loops");
321 				  $$ = stat1(BREAK, NIL); }
322 	| CONTINUE st		{  if (!inloop) SYNTAX("continue illegal outside of loops");
323 				  $$ = stat1(CONTINUE, NIL); }
324 	| do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
325 		{ $$ = stat2(DO, $3, notnull($7)); }
326 	| EXIT pattern st	{ $$ = stat1(EXIT, $2); }
327 	| EXIT st		{ $$ = stat1(EXIT, NIL); }
328 	| for
329 	| if stmt else stmt	{ $$ = stat3(IF, $1, $2, $4); }
330 	| if stmt		{ $$ = stat3(IF, $1, $2, NIL); }
331 	| lbrace stmtlist rbrace { $$ = $2; }
332 	| NEXT st	{ if (infunc)
333 				SYNTAX("next is illegal inside a function");
334 			  $$ = stat1(NEXT, NIL); }
335 	| NEXTFILE st	{ if (infunc)
336 				SYNTAX("nextfile is illegal inside a function");
337 			  $$ = stat1(NEXTFILE, NIL); }
338 	| RETURN pattern st	{ $$ = stat1(RETURN, $2); }
339 	| RETURN st		{ $$ = stat1(RETURN, NIL); }
340 	| simple_stmt st
341 	| while {inloop++;} stmt	{ --inloop; $$ = stat2(WHILE, $1, $3); }
342 	| ';' opt_nl		{ $$ = 0; }
343 	;
344 
345 stmtlist:
346 	  stmt
347 	| stmtlist stmt		{ $$ = linkum($1, $2); }
348 	;
349 
350 subop:
351 	  SUB | GSUB
352 	;
353 
354 string:
355 	  STRING
356 	| string STRING		{ $$ = catstr($1, $2); }
357 	;
358 
359 term:
360  	  term '/' ASGNOP term		{ $$ = op2(DIVEQ, $1, $4); }
361  	| term '+' term			{ $$ = op2(ADD, $1, $3); }
362 	| term '-' term			{ $$ = op2(MINUS, $1, $3); }
363 	| term '*' term			{ $$ = op2(MULT, $1, $3); }
364 	| term '/' term			{ $$ = op2(DIVIDE, $1, $3); }
365 	| term '%' term			{ $$ = op2(MOD, $1, $3); }
366 	| term POWER term		{ $$ = op2(POWER, $1, $3); }
367 	| '-' term %prec UMINUS		{ $$ = op1(UMINUS, $2); }
368 	| '+' term %prec UMINUS		{ $$ = op1(UPLUS, $2); }
369 	| NOT term %prec UMINUS		{ $$ = op1(NOT, notnull($2)); }
370 	| BLTIN '(' ')'			{ $$ = op2(BLTIN, itonp($1), rectonode()); }
371 	| BLTIN '(' patlist ')'		{ $$ = op2(BLTIN, itonp($1), $3); }
372 	| BLTIN				{ $$ = op2(BLTIN, itonp($1), rectonode()); }
373 	| CALL '(' ')'			{ $$ = op2(CALL, celltonode($1,CVAR), NIL); }
374 	| CALL '(' patlist ')'		{ $$ = op2(CALL, celltonode($1,CVAR), $3); }
375 	| CLOSE term			{ $$ = op1(CLOSE, $2); }
376 	| DECR var			{ $$ = op1(PREDECR, $2); }
377 	| INCR var			{ $$ = op1(PREINCR, $2); }
378 	| var DECR			{ $$ = op1(POSTDECR, $1); }
379 	| var INCR			{ $$ = op1(POSTINCR, $1); }
380 	| GENSUB '(' reg_expr comma pattern comma pattern ')'
381 		{ $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); }
382 	| GENSUB '(' pattern comma pattern comma pattern ')'
383 		{ if (constnode($3)) {
384 			$$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode());
385 			free($3);
386 		  } else
387 			$$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode());
388 		}
389 	| GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')'
390 		{ $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); }
391 	| GENSUB '(' pattern comma pattern comma pattern comma pattern ')'
392 		{ if (constnode($3)) {
393 			$$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9);
394 			free($3);
395 		  } else
396 			$$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9);
397 		}
398 	| GETLINE var LT term		{ $$ = op3(GETLINE, $2, itonp($3), $4); }
399 	| GETLINE LT term		{ $$ = op3(GETLINE, NIL, itonp($2), $3); }
400 	| GETLINE var			{ $$ = op3(GETLINE, $2, NIL, NIL); }
401 	| GETLINE			{ $$ = op3(GETLINE, NIL, NIL, NIL); }
402 	| INDEX '(' pattern comma pattern ')'
403 		{ $$ = op2(INDEX, $3, $5); }
404 	| INDEX '(' pattern comma reg_expr ')'
405 		{ SYNTAX("index() doesn't permit regular expressions");
406 		  $$ = op2(INDEX, $3, (Node*)$5); }
407 	| '(' pattern ')'		{ $$ = $2; }
408 	| MATCHFCN '(' pattern comma reg_expr ')'
409 		{ $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); free($5); }
410 	| MATCHFCN '(' pattern comma pattern ')'
411 		{ if (constnode($5)) {
412 			$$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
413 			free($5);
414 		  } else
415 			$$ = op3(MATCHFCN, (Node *)1, $3, $5); }
416 	| NUMBER			{ $$ = celltonode($1, CCON); }
417 	| SPLIT '(' pattern comma varname comma pattern ')'     /* string */
418 		{ $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
419 	| SPLIT '(' pattern comma varname comma reg_expr ')'    /* const /regexp/ */
420 		{ $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); free($7); }
421 	| SPLIT '(' pattern comma varname ')'
422 		{ $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); }  /* default */
423 	| SPRINTF '(' patlist ')'	{ $$ = op1($1, $3); }
424 	| string	 		{ $$ = celltonode($1, CCON); }
425 	| subop '(' reg_expr comma pattern ')'
426 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); free($3); }
427 	| subop '(' pattern comma pattern ')'
428 		{ if (constnode($3)) {
429 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
430 			free($3);
431 		  } else
432 			$$ = op4($1, (Node *)1, $3, $5, rectonode()); }
433 	| subop '(' reg_expr comma pattern comma var ')'
434 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); free($3); }
435 	| subop '(' pattern comma pattern comma var ')'
436 		{ if (constnode($3)) {
437 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
438 			free($3);
439 		  } else
440 			$$ = op4($1, (Node *)1, $3, $5, $7); }
441 	| SUBSTR '(' pattern comma pattern comma pattern ')'
442 		{ $$ = op3(SUBSTR, $3, $5, $7); }
443 	| SUBSTR '(' pattern comma pattern ')'
444 		{ $$ = op3(SUBSTR, $3, $5, NIL); }
445 	| var
446 	;
447 
448 var:
449 	  varname
450 	| varname '[' patlist ']'	{ $$ = op2(ARRAY, makearr($1), $3); }
451 	| IVAR				{ $$ = op1(INDIRECT, celltonode($1, CVAR)); }
452 	| INDIRECT term	 		{ $$ = op1(INDIRECT, $2); }
453 	;
454 
455 varlist:
456 	  /* nothing */		{ arglist = $$ = 0; }
457 	| VAR			{ arglist = $$ = celltonode($1,CVAR); }
458 	| varlist comma VAR	{
459 			checkdup($1, $3);
460 			arglist = $$ = linkum($1,celltonode($3,CVAR)); }
461 	;
462 
463 varname:
464 	  VAR			{ $$ = celltonode($1, CVAR); }
465 	| ARG 			{ $$ = op1(ARG, itonp($1)); }
466 	| VARNF			{ $$ = op1(VARNF, (Node *) $1); }
467 	;
468 
469 
470 while:
471 	  WHILE '(' pattern rparen	{ $$ = notnull($3); }
472 	;
473 
474 %%
475 
476 void setfname(Cell *p)
477 {
478 	if (isarr(p))
479 		SYNTAX("%s is an array, not a function", p->nval);
480 	else if (isfcn(p))
481 		SYNTAX("you can't define function %s more than once", p->nval);
482 	curfname = p->nval;
483 }
484 
485 int constnode(Node *p)
486 {
487 	return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
488 }
489 
490 char *strnode(Node *p)
491 {
492 	return ((Cell *)(p->narg[0]))->sval;
493 }
494 
495 Node *notnull(Node *n)
496 {
497 	switch (n->nobj) {
498 	case LE: case LT: case EQ: case NE: case GT: case GE:
499 	case BOR: case AND: case NOT:
500 		return n;
501 	default:
502 		return op2(NE, n, nullnode);
503 	}
504 }
505 
506 void checkdup(Node *vl, Cell *cp)	/* check if name already in list */
507 {
508 	char *s = cp->nval;
509 	for ( ; vl; vl = vl->nnext) {
510 		if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
511 			SYNTAX("duplicate argument %s", s);
512 			break;
513 		}
514 	}
515 }
516