1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 %{ 26 #include <stdio.h> 27 #include <string.h> 28 #include "awk.h" 29 30 void checkdup(Node *list, Cell *item); 31 int yywrap(void) { return(1); } 32 33 Node *beginloc = 0; 34 Node *endloc = 0; 35 bool infunc = false; /* = true if in arglist or body of func */ 36 int inloop = 0; /* >= 1 if in while, for, do; can't be bool, since loops can next */ 37 char *curfname = 0; /* current function name */ 38 Node *arglist = 0; /* list of args for current function */ 39 %} 40 41 %union { 42 Node *p; 43 Cell *cp; 44 int i; 45 char *s; 46 } 47 48 %token <i> FIRSTTOKEN /* must be first */ 49 %token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND 50 %token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']' 51 %token <i> ARRAY 52 %token <i> MATCH NOTMATCH MATCHOP 53 %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO 54 %token <i> AND BOR APPEND EQ GE GT LE LT NE IN 55 %token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC 56 %token <i> GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE 57 %token <i> ADD MINUS MULT DIVIDE MOD 58 %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ 59 %token <i> PRINT PRINTF SPRINTF 60 %token <p> ELSE INTEST CONDEXPR 61 %token <i> POSTINCR PREINCR POSTDECR PREDECR 62 %token <cp> VAR IVAR VARNF CALL NUMBER STRING 63 %token <s> REGEXPR 64 65 %type <p> pas pattern ppattern plist pplist patlist prarg term re 66 %type <p> pa_pat pa_stat pa_stats 67 %type <s> reg_expr 68 %type <p> simple_stmt opt_simple_stmt stmt stmtlist 69 %type <p> var varname funcname varlist 70 %type <p> for if else while 71 %type <i> do st 72 %type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor 73 %type <i> subop print 74 %type <cp> string 75 76 %right ASGNOP 77 %right '?' 78 %right ':' 79 %left BOR 80 %left AND 81 %left GETLINE 82 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|' 83 %left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC 84 %left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER 85 %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR 86 %left REGEXPR VAR VARNF IVAR WHILE '(' 87 %left CAT 88 %left '+' '-' 89 %left '*' '/' '%' 90 %left NOT UMINUS UPLUS 91 %right POWER 92 %right DECR INCR 93 %left INDIRECT 94 %token LASTTOKEN /* must be last */ 95 96 %% 97 98 program: 99 pas { if (errorflag==0) 100 winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); } 101 | error { yyclearin; bracecheck(); SYNTAX("bailing out"); } 102 ; 103 104 and: 105 AND | and NL 106 ; 107 108 bor: 109 BOR | bor NL 110 ; 111 112 comma: 113 ',' | comma NL 114 ; 115 116 do: 117 DO | do NL 118 ; 119 120 else: 121 ELSE | else NL 122 ; 123 124 for: 125 FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt 126 { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); } 127 | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt 128 { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); } 129 | FOR '(' varname IN varname rparen {inloop++;} stmt 130 { --inloop; $$ = stat3(IN, $3, makearr($5), $8); } 131 ; 132 133 funcname: 134 VAR { setfname($1); } 135 | CALL { setfname($1); } 136 ; 137 138 if: 139 IF '(' pattern rparen { $$ = notnull($3); } 140 ; 141 142 lbrace: 143 '{' | lbrace NL 144 ; 145 146 nl: 147 NL | nl NL 148 ; 149 150 opt_nl: 151 /* empty */ { $$ = 0; } 152 | nl 153 ; 154 155 opt_pst: 156 /* empty */ { $$ = 0; } 157 | pst 158 ; 159 160 161 opt_simple_stmt: 162 /* empty */ { $$ = 0; } 163 | simple_stmt 164 ; 165 166 pas: 167 opt_pst { $$ = 0; } 168 | opt_pst pa_stats opt_pst { $$ = $2; } 169 ; 170 171 pa_pat: 172 pattern { $$ = notnull($1); } 173 ; 174 175 pa_stat: 176 pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); } 177 | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); } 178 | pa_pat ',' opt_nl pa_pat { $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); } 179 | pa_pat ',' opt_nl pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $4, $6); } 180 | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); } 181 | XBEGIN lbrace stmtlist '}' 182 { beginloc = linkum(beginloc, $3); $$ = 0; } 183 | XEND lbrace stmtlist '}' 184 { endloc = linkum(endloc, $3); $$ = 0; } 185 | FUNC funcname '(' varlist rparen {infunc = true;} lbrace stmtlist '}' 186 { infunc = false; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; } 187 ; 188 189 pa_stats: 190 pa_stat 191 | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); } 192 ; 193 194 patlist: 195 pattern 196 | patlist comma pattern { $$ = linkum($1, $3); } 197 ; 198 199 ppattern: 200 var ASGNOP ppattern { $$ = op2($2, $1, $3); } 201 | ppattern '?' ppattern ':' ppattern %prec '?' 202 { $$ = op3(CONDEXPR, notnull($1), $3, $5); } 203 | ppattern bor ppattern %prec BOR 204 { $$ = op2(BOR, notnull($1), notnull($3)); } 205 | ppattern and ppattern %prec AND 206 { $$ = op2(AND, notnull($1), notnull($3)); } 207 | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } 208 | ppattern MATCHOP ppattern 209 { if (constnode($3)) 210 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); 211 else 212 $$ = op3($2, (Node *)1, $1, $3); } 213 | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } 214 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } 215 | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); } 216 | re 217 | term 218 ; 219 220 pattern: 221 var ASGNOP pattern { $$ = op2($2, $1, $3); } 222 | pattern '?' pattern ':' pattern %prec '?' 223 { $$ = op3(CONDEXPR, notnull($1), $3, $5); } 224 | pattern bor pattern %prec BOR 225 { $$ = op2(BOR, notnull($1), notnull($3)); } 226 | pattern and pattern %prec AND 227 { $$ = op2(AND, notnull($1), notnull($3)); } 228 | pattern EQ pattern { $$ = op2($2, $1, $3); } 229 | pattern GE pattern { $$ = op2($2, $1, $3); } 230 | pattern GT pattern { $$ = op2($2, $1, $3); } 231 | pattern LE pattern { $$ = op2($2, $1, $3); } 232 | pattern LT pattern { $$ = op2($2, $1, $3); } 233 | pattern NE pattern { $$ = op2($2, $1, $3); } 234 | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } 235 | pattern MATCHOP pattern 236 { if (constnode($3)) 237 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); 238 else 239 $$ = op3($2, (Node *)1, $1, $3); } 240 | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } 241 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } 242 | pattern '|' GETLINE var { 243 if (safe) SYNTAX("cmd | getline is unsafe"); 244 else $$ = op3(GETLINE, $4, itonp($2), $1); } 245 | pattern '|' GETLINE { 246 if (safe) SYNTAX("cmd | getline is unsafe"); 247 else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); } 248 | pattern term %prec CAT { $$ = op2(CAT, $1, $2); } 249 | re 250 | term 251 ; 252 253 plist: 254 pattern comma pattern { $$ = linkum($1, $3); } 255 | plist comma pattern { $$ = linkum($1, $3); } 256 ; 257 258 pplist: 259 ppattern 260 | pplist comma ppattern { $$ = linkum($1, $3); } 261 ; 262 263 prarg: 264 /* empty */ { $$ = rectonode(); } 265 | pplist 266 | '(' plist ')' { $$ = $2; } 267 ; 268 269 print: 270 PRINT | PRINTF 271 ; 272 273 pst: 274 NL | ';' | pst NL | pst ';' 275 ; 276 277 rbrace: 278 '}' | rbrace NL 279 ; 280 281 re: 282 reg_expr 283 { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); } 284 | NOT re { $$ = op1(NOT, notnull($2)); } 285 ; 286 287 reg_expr: 288 '/' {startreg();} REGEXPR '/' { $$ = $3; } 289 ; 290 291 rparen: 292 ')' | rparen NL 293 ; 294 295 simple_stmt: 296 print prarg '|' term { 297 if (safe) SYNTAX("print | is unsafe"); 298 else $$ = stat3($1, $2, itonp($3), $4); } 299 | print prarg APPEND term { 300 if (safe) SYNTAX("print >> is unsafe"); 301 else $$ = stat3($1, $2, itonp($3), $4); } 302 | print prarg GT term { 303 if (safe) SYNTAX("print > is unsafe"); 304 else $$ = stat3($1, $2, itonp($3), $4); } 305 | print prarg { $$ = stat3($1, $2, NIL, NIL); } 306 | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); } 307 | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); } 308 | pattern { $$ = exptostat($1); } 309 | error { yyclearin; SYNTAX("illegal statement"); } 310 ; 311 312 st: 313 nl 314 | ';' opt_nl 315 ; 316 317 stmt: 318 BREAK st { if (!inloop) SYNTAX("break illegal outside of loops"); 319 $$ = stat1(BREAK, NIL); } 320 | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops"); 321 $$ = stat1(CONTINUE, NIL); } 322 | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st 323 { $$ = stat2(DO, $3, notnull($7)); } 324 | EXIT pattern st { $$ = stat1(EXIT, $2); } 325 | EXIT st { $$ = stat1(EXIT, NIL); } 326 | for 327 | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); } 328 | if stmt { $$ = stat3(IF, $1, $2, NIL); } 329 | lbrace stmtlist rbrace { $$ = $2; } 330 | NEXT st { if (infunc) 331 SYNTAX("next is illegal inside a function"); 332 $$ = stat1(NEXT, NIL); } 333 | NEXTFILE st { if (infunc) 334 SYNTAX("nextfile is illegal inside a function"); 335 $$ = stat1(NEXTFILE, NIL); } 336 | RETURN pattern st { $$ = stat1(RETURN, $2); } 337 | RETURN st { $$ = stat1(RETURN, NIL); } 338 | simple_stmt st 339 | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); } 340 | ';' opt_nl { $$ = 0; } 341 ; 342 343 stmtlist: 344 stmt 345 | stmtlist stmt { $$ = linkum($1, $2); } 346 ; 347 348 subop: 349 SUB | GSUB 350 ; 351 352 string: 353 STRING 354 | string STRING { $$ = catstr($1, $2); } 355 ; 356 357 term: 358 term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); } 359 | term '+' term { $$ = op2(ADD, $1, $3); } 360 | term '-' term { $$ = op2(MINUS, $1, $3); } 361 | term '*' term { $$ = op2(MULT, $1, $3); } 362 | term '/' term { $$ = op2(DIVIDE, $1, $3); } 363 | term '%' term { $$ = op2(MOD, $1, $3); } 364 | term POWER term { $$ = op2(POWER, $1, $3); } 365 | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); } 366 | '+' term %prec UMINUS { $$ = op1(UPLUS, $2); } 367 | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); } 368 | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); } 369 | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); } 370 | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); } 371 | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); } 372 | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); } 373 | CLOSE term { $$ = op1(CLOSE, $2); } 374 | DECR var { $$ = op1(PREDECR, $2); } 375 | INCR var { $$ = op1(PREINCR, $2); } 376 | var DECR { $$ = op1(POSTDECR, $1); } 377 | var INCR { $$ = op1(POSTINCR, $1); } 378 | GENSUB '(' reg_expr comma pattern comma pattern ')' 379 { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); } 380 | GENSUB '(' pattern comma pattern comma pattern ')' 381 { if (constnode($3)) 382 $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode()); 383 else 384 $$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode()); 385 } 386 | GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')' 387 { $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); } 388 | GENSUB '(' pattern comma pattern comma pattern comma pattern ')' 389 { if (constnode($3)) 390 $$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9); 391 else 392 $$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9); 393 } 394 | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); } 395 | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); } 396 | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } 397 | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); } 398 | INDEX '(' pattern comma pattern ')' 399 { $$ = op2(INDEX, $3, $5); } 400 | INDEX '(' pattern comma reg_expr ')' 401 { SYNTAX("index() doesn't permit regular expressions"); 402 $$ = op2(INDEX, $3, (Node*)$5); } 403 | '(' pattern ')' { $$ = $2; } 404 | MATCHFCN '(' pattern comma reg_expr ')' 405 { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); } 406 | MATCHFCN '(' pattern comma pattern ')' 407 { if (constnode($5)) 408 $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1)); 409 else 410 $$ = op3(MATCHFCN, (Node *)1, $3, $5); } 411 | NUMBER { $$ = celltonode($1, CCON); } 412 | SPLIT '(' pattern comma varname comma pattern ')' /* string */ 413 { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); } 414 | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */ 415 { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); } 416 | SPLIT '(' pattern comma varname ')' 417 { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */ 418 | SPRINTF '(' patlist ')' { $$ = op1($1, $3); } 419 | string { $$ = celltonode($1, CCON); } 420 | subop '(' reg_expr comma pattern ')' 421 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); } 422 | subop '(' pattern comma pattern ')' 423 { if (constnode($3)) 424 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode()); 425 else 426 $$ = op4($1, (Node *)1, $3, $5, rectonode()); } 427 | subop '(' reg_expr comma pattern comma var ')' 428 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); } 429 | subop '(' pattern comma pattern comma var ')' 430 { if (constnode($3)) 431 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7); 432 else 433 $$ = op4($1, (Node *)1, $3, $5, $7); } 434 | SUBSTR '(' pattern comma pattern comma pattern ')' 435 { $$ = op3(SUBSTR, $3, $5, $7); } 436 | SUBSTR '(' pattern comma pattern ')' 437 { $$ = op3(SUBSTR, $3, $5, NIL); } 438 | var 439 ; 440 441 var: 442 varname 443 | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); } 444 | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); } 445 | INDIRECT term { $$ = op1(INDIRECT, $2); } 446 ; 447 448 varlist: 449 /* nothing */ { arglist = $$ = 0; } 450 | VAR { arglist = $$ = celltonode($1,CVAR); } 451 | varlist comma VAR { 452 checkdup($1, $3); 453 arglist = $$ = linkum($1,celltonode($3,CVAR)); } 454 ; 455 456 varname: 457 VAR { $$ = celltonode($1, CVAR); } 458 | ARG { $$ = op1(ARG, itonp($1)); } 459 | VARNF { $$ = op1(VARNF, (Node *) $1); } 460 ; 461 462 463 while: 464 WHILE '(' pattern rparen { $$ = notnull($3); } 465 ; 466 467 %% 468 469 void setfname(Cell *p) 470 { 471 if (isarr(p)) 472 SYNTAX("%s is an array, not a function", p->nval); 473 else if (isfcn(p)) 474 SYNTAX("you can't define function %s more than once", p->nval); 475 curfname = p->nval; 476 } 477 478 int constnode(Node *p) 479 { 480 return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON; 481 } 482 483 char *strnode(Node *p) 484 { 485 return ((Cell *)(p->narg[0]))->sval; 486 } 487 488 Node *notnull(Node *n) 489 { 490 switch (n->nobj) { 491 case LE: case LT: case EQ: case NE: case GT: case GE: 492 case BOR: case AND: case NOT: 493 return n; 494 default: 495 return op2(NE, n, nullnode); 496 } 497 } 498 499 void checkdup(Node *vl, Cell *cp) /* check if name already in list */ 500 { 501 char *s = cp->nval; 502 for ( ; vl; vl = vl->nnext) { 503 if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) { 504 SYNTAX("duplicate argument %s", s); 505 break; 506 } 507 } 508 } 509