1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 #include <ctype.h> 29 #include "awk.h" 30 #include "awkgram.tab.h" 31 32 extern YYSTYPE yylval; 33 extern bool infunc; 34 35 int lineno = 1; 36 int bracecnt = 0; 37 int brackcnt = 0; 38 int parencnt = 0; 39 40 typedef struct Keyword { 41 const char *word; 42 int sub; 43 int type; 44 } Keyword; 45 46 const Keyword keywords[] = { /* keep sorted: binary searched */ 47 { "BEGIN", XBEGIN, XBEGIN }, 48 { "END", XEND, XEND }, 49 { "NF", VARNF, VARNF }, 50 { "and", FAND, BLTIN }, 51 { "atan2", FATAN, BLTIN }, 52 { "break", BREAK, BREAK }, 53 { "close", CLOSE, CLOSE }, 54 { "compl", FCOMPL, BLTIN }, 55 { "continue", CONTINUE, CONTINUE }, 56 { "cos", FCOS, BLTIN }, 57 { "delete", DELETE, DELETE }, 58 { "do", DO, DO }, 59 { "else", ELSE, ELSE }, 60 { "exit", EXIT, EXIT }, 61 { "exp", FEXP, BLTIN }, 62 { "fflush", FFLUSH, BLTIN }, 63 { "for", FOR, FOR }, 64 { "func", FUNC, FUNC }, 65 { "function", FUNC, FUNC }, 66 { "gensub", GENSUB, GENSUB }, 67 { "getline", GETLINE, GETLINE }, 68 { "gsub", GSUB, GSUB }, 69 { "if", IF, IF }, 70 { "in", IN, IN }, 71 { "index", INDEX, INDEX }, 72 { "int", FINT, BLTIN }, 73 { "length", FLENGTH, BLTIN }, 74 { "log", FLOG, BLTIN }, 75 { "lshift", FLSHIFT, BLTIN }, 76 { "match", MATCHFCN, MATCHFCN }, 77 { "next", NEXT, NEXT }, 78 { "nextfile", NEXTFILE, NEXTFILE }, 79 { "or", FFOR, BLTIN }, 80 { "print", PRINT, PRINT }, 81 { "printf", PRINTF, PRINTF }, 82 { "rand", FRAND, BLTIN }, 83 { "return", RETURN, RETURN }, 84 { "rshift", FRSHIFT, BLTIN }, 85 { "sin", FSIN, BLTIN }, 86 { "split", SPLIT, SPLIT }, 87 { "sprintf", SPRINTF, SPRINTF }, 88 { "sqrt", FSQRT, BLTIN }, 89 { "srand", FSRAND, BLTIN }, 90 { "strftime", FSTRFTIME, BLTIN }, 91 { "sub", SUB, SUB }, 92 { "substr", SUBSTR, SUBSTR }, 93 { "system", FSYSTEM, BLTIN }, 94 { "systime", FSYSTIME, BLTIN }, 95 { "tolower", FTOLOWER, BLTIN }, 96 { "toupper", FTOUPPER, BLTIN }, 97 { "while", WHILE, WHILE }, 98 { "xor", FXOR, BLTIN }, 99 }; 100 101 #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); } 102 103 static int peek(void) 104 { 105 int c = input(); 106 unput(c); 107 return c; 108 } 109 110 static int gettok(char **pbuf, int *psz) /* get next input token */ 111 { 112 int c, retc; 113 char *buf = *pbuf; 114 int sz = *psz; 115 char *bp = buf; 116 117 c = input(); 118 if (c == 0) 119 return 0; 120 buf[0] = c; 121 buf[1] = 0; 122 if (!isalnum(c) && c != '.' && c != '_') 123 return c; 124 125 *bp++ = c; 126 if (isalpha(c) || c == '_') { /* it's a varname */ 127 for ( ; (c = input()) != 0; ) { 128 if (bp-buf >= sz) 129 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok")) 130 FATAL( "out of space for name %.10s...", buf ); 131 if (isalnum(c) || c == '_') 132 *bp++ = c; 133 else { 134 *bp = 0; 135 unput(c); 136 break; 137 } 138 } 139 *bp = 0; 140 retc = 'a'; /* alphanumeric */ 141 } else { /* maybe it's a number, but could be . */ 142 char *rem; 143 /* read input until can't be a number */ 144 for ( ; (c = input()) != 0; ) { 145 if (bp-buf >= sz) 146 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok")) 147 FATAL( "out of space for number %.10s...", buf ); 148 if (isdigit(c) || c == 'e' || c == 'E' 149 || c == '.' || c == '+' || c == '-') 150 *bp++ = c; 151 else { 152 unput(c); 153 break; 154 } 155 } 156 *bp = 0; 157 strtod(buf, &rem); /* parse the number */ 158 if (rem == buf) { /* it wasn't a valid number at all */ 159 buf[1] = 0; /* return one character as token */ 160 retc = (uschar)buf[0]; /* character is its own type */ 161 unputstr(rem+1); /* put rest back for later */ 162 } else { /* some prefix was a number */ 163 unputstr(rem); /* put rest back for later */ 164 rem[0] = 0; /* truncate buf after number part */ 165 retc = '0'; /* type is number */ 166 } 167 } 168 *pbuf = buf; 169 *psz = sz; 170 return retc; 171 } 172 173 int word(char *); 174 int string(void); 175 int regexpr(void); 176 bool sc = false; /* true => return a } right now */ 177 bool reg = false; /* true => return a REGEXPR now */ 178 179 int yylex(void) 180 { 181 int c; 182 static char *buf = NULL; 183 static int bufsize = 5; /* BUG: setting this small causes core dump! */ 184 185 if (buf == NULL && (buf = (char *) malloc(bufsize)) == NULL) 186 FATAL( "out of space in yylex" ); 187 if (sc) { 188 sc = false; 189 RET('}'); 190 } 191 if (reg) { 192 reg = false; 193 return regexpr(); 194 } 195 for (;;) { 196 c = gettok(&buf, &bufsize); 197 if (c == 0) 198 return 0; 199 if (isalpha(c) || c == '_') 200 return word(buf); 201 if (isdigit(c)) { 202 char *cp = tostring(buf); 203 double result; 204 205 if (is_number(cp, & result)) 206 yylval.cp = setsymtab(buf, cp, result, CON|NUM, symtab); 207 else 208 yylval.cp = setsymtab(buf, cp, 0.0, STR, symtab); 209 free(cp); 210 /* should this also have STR set? */ 211 RET(NUMBER); 212 } 213 214 yylval.i = c; 215 switch (c) { 216 case '\n': /* {EOL} */ 217 lineno++; 218 RET(NL); 219 case '\r': /* assume \n is coming */ 220 case ' ': /* {WS}+ */ 221 case '\t': 222 break; 223 case '#': /* #.* strip comments */ 224 while ((c = input()) != '\n' && c != 0) 225 ; 226 unput(c); 227 /* 228 * Next line is a hack, itcompensates for 229 * unput's treatment of \n. 230 */ 231 lineno++; 232 break; 233 case ';': 234 RET(';'); 235 case '\\': 236 if (peek() == '\n') { 237 input(); 238 lineno++; 239 } else if (peek() == '\r') { 240 input(); input(); /* \n */ 241 lineno++; 242 } else { 243 RET(c); 244 } 245 break; 246 case '&': 247 if (peek() == '&') { 248 input(); RET(AND); 249 } else 250 RET('&'); 251 case '|': 252 if (peek() == '|') { 253 input(); RET(BOR); 254 } else 255 RET('|'); 256 case '!': 257 if (peek() == '=') { 258 input(); yylval.i = NE; RET(NE); 259 } else if (peek() == '~') { 260 input(); yylval.i = NOTMATCH; RET(MATCHOP); 261 } else 262 RET(NOT); 263 case '~': 264 yylval.i = MATCH; 265 RET(MATCHOP); 266 case '<': 267 if (peek() == '=') { 268 input(); yylval.i = LE; RET(LE); 269 } else { 270 yylval.i = LT; RET(LT); 271 } 272 case '=': 273 if (peek() == '=') { 274 input(); yylval.i = EQ; RET(EQ); 275 } else { 276 yylval.i = ASSIGN; RET(ASGNOP); 277 } 278 case '>': 279 if (peek() == '=') { 280 input(); yylval.i = GE; RET(GE); 281 } else if (peek() == '>') { 282 input(); yylval.i = APPEND; RET(APPEND); 283 } else { 284 yylval.i = GT; RET(GT); 285 } 286 case '+': 287 if (peek() == '+') { 288 input(); yylval.i = INCR; RET(INCR); 289 } else if (peek() == '=') { 290 input(); yylval.i = ADDEQ; RET(ASGNOP); 291 } else 292 RET('+'); 293 case '-': 294 if (peek() == '-') { 295 input(); yylval.i = DECR; RET(DECR); 296 } else if (peek() == '=') { 297 input(); yylval.i = SUBEQ; RET(ASGNOP); 298 } else 299 RET('-'); 300 case '*': 301 if (peek() == '=') { /* *= */ 302 input(); yylval.i = MULTEQ; RET(ASGNOP); 303 } else if (peek() == '*') { /* ** or **= */ 304 input(); /* eat 2nd * */ 305 if (peek() == '=') { 306 input(); yylval.i = POWEQ; RET(ASGNOP); 307 } else { 308 RET(POWER); 309 } 310 } else 311 RET('*'); 312 case '/': 313 RET('/'); 314 case '%': 315 if (peek() == '=') { 316 input(); yylval.i = MODEQ; RET(ASGNOP); 317 } else 318 RET('%'); 319 case '^': 320 if (peek() == '=') { 321 input(); yylval.i = POWEQ; RET(ASGNOP); 322 } else 323 RET(POWER); 324 325 case '$': 326 /* BUG: awkward, if not wrong */ 327 c = gettok(&buf, &bufsize); 328 if (isalpha(c)) { 329 if (strcmp(buf, "NF") == 0) { /* very special */ 330 unputstr("(NF)"); 331 RET(INDIRECT); 332 } 333 c = peek(); 334 if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) { 335 unputstr(buf); 336 RET(INDIRECT); 337 } 338 yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab); 339 RET(IVAR); 340 } else if (c == 0) { /* */ 341 SYNTAX( "unexpected end of input after $" ); 342 RET(';'); 343 } else { 344 unputstr(buf); 345 RET(INDIRECT); 346 } 347 348 case '}': 349 if (--bracecnt < 0) 350 SYNTAX( "extra }" ); 351 sc = true; 352 RET(';'); 353 case ']': 354 if (--brackcnt < 0) 355 SYNTAX( "extra ]" ); 356 RET(']'); 357 case ')': 358 if (--parencnt < 0) 359 SYNTAX( "extra )" ); 360 RET(')'); 361 case '{': 362 bracecnt++; 363 RET('{'); 364 case '[': 365 brackcnt++; 366 RET('['); 367 case '(': 368 parencnt++; 369 RET('('); 370 371 case '"': 372 return string(); /* BUG: should be like tran.c ? */ 373 374 default: 375 RET(c); 376 } 377 } 378 } 379 380 int string(void) 381 { 382 int c, n; 383 char *s, *bp; 384 static char *buf = NULL; 385 static int bufsz = 500; 386 387 if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL) 388 FATAL("out of space for strings"); 389 for (bp = buf; (c = input()) != '"'; ) { 390 if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string")) 391 FATAL("out of space for string %.10s...", buf); 392 switch (c) { 393 case '\n': 394 case '\r': 395 case 0: 396 *bp = '\0'; 397 SYNTAX( "non-terminated string %.10s...", buf ); 398 if (c == 0) /* hopeless */ 399 FATAL( "giving up" ); 400 lineno++; 401 break; 402 case '\\': 403 c = input(); 404 switch (c) { 405 case '\n': break; 406 case '"': *bp++ = '"'; break; 407 case 'n': *bp++ = '\n'; break; 408 case 't': *bp++ = '\t'; break; 409 case 'f': *bp++ = '\f'; break; 410 case 'r': *bp++ = '\r'; break; 411 case 'b': *bp++ = '\b'; break; 412 case 'v': *bp++ = '\v'; break; 413 case 'a': *bp++ = '\a'; break; 414 case '\\': *bp++ = '\\'; break; 415 416 case '0': case '1': case '2': /* octal: \d \dd \ddd */ 417 case '3': case '4': case '5': case '6': case '7': 418 n = c - '0'; 419 if ((c = peek()) >= '0' && c < '8') { 420 n = 8 * n + input() - '0'; 421 if ((c = peek()) >= '0' && c < '8') 422 n = 8 * n + input() - '0'; 423 } 424 *bp++ = n; 425 break; 426 427 case 'x': /* hex \x0-9a-fA-F + */ 428 { char xbuf[100], *px; 429 for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) { 430 if (isdigit(c) 431 || (c >= 'a' && c <= 'f') 432 || (c >= 'A' && c <= 'F')) 433 *px++ = c; 434 else 435 break; 436 } 437 *px = 0; 438 unput(c); 439 sscanf(xbuf, "%x", (unsigned int *) &n); 440 *bp++ = n; 441 break; 442 } 443 444 default: 445 *bp++ = c; 446 break; 447 } 448 break; 449 default: 450 *bp++ = c; 451 break; 452 } 453 } 454 *bp = 0; 455 s = tostring(buf); 456 *bp++ = ' '; *bp++ = '\0'; 457 yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab); 458 free(s); 459 RET(STRING); 460 } 461 462 463 static int binsearch(char *w, const Keyword *kp, int n) 464 { 465 int cond, low, mid, high; 466 467 low = 0; 468 high = n - 1; 469 while (low <= high) { 470 mid = (low + high) / 2; 471 if ((cond = strcmp(w, kp[mid].word)) < 0) 472 high = mid - 1; 473 else if (cond > 0) 474 low = mid + 1; 475 else 476 return mid; 477 } 478 return -1; 479 } 480 481 int word(char *w) 482 { 483 const Keyword *kp; 484 int c, n; 485 486 n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0])); 487 if (n != -1) { /* found in table */ 488 kp = keywords + n; 489 yylval.i = kp->sub; 490 switch (kp->type) { /* special handling */ 491 case BLTIN: 492 if (kp->sub == FSYSTEM && safe) 493 SYNTAX( "system is unsafe" ); 494 RET(kp->type); 495 case FUNC: 496 if (infunc) 497 SYNTAX( "illegal nested function" ); 498 RET(kp->type); 499 case RETURN: 500 if (!infunc) 501 SYNTAX( "return not in function" ); 502 RET(kp->type); 503 case VARNF: 504 yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab); 505 RET(VARNF); 506 default: 507 RET(kp->type); 508 } 509 } 510 c = peek(); /* look for '(' */ 511 if (c != '(' && infunc && (n=isarg(w)) >= 0) { 512 yylval.i = n; 513 RET(ARG); 514 } else { 515 yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab); 516 if (c == '(') { 517 RET(CALL); 518 } else { 519 RET(VAR); 520 } 521 } 522 } 523 524 void startreg(void) /* next call to yylex will return a regular expression */ 525 { 526 reg = true; 527 } 528 529 int regexpr(void) 530 { 531 int c; 532 static char *buf = NULL; 533 static int bufsz = 500; 534 char *bp; 535 536 if (buf == NULL && (buf = (char *) malloc(bufsz)) == NULL) 537 FATAL("out of space for rex expr"); 538 bp = buf; 539 for ( ; (c = input()) != '/' && c != 0; ) { 540 if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr")) 541 FATAL("out of space for reg expr %.10s...", buf); 542 if (c == '\n') { 543 *bp = '\0'; 544 SYNTAX( "newline in regular expression %.10s...", buf ); 545 unput('\n'); 546 break; 547 } else if (c == '\\') { 548 *bp++ = '\\'; 549 *bp++ = input(); 550 } else { 551 *bp++ = c; 552 } 553 } 554 *bp = 0; 555 if (c == 0) 556 SYNTAX("non-terminated regular expression %.10s...", buf); 557 yylval.s = tostring(buf); 558 unput('/'); 559 RET(REGEXPR); 560 } 561 562 /* low-level lexical stuff, sort of inherited from lex */ 563 564 char ebuf[300]; 565 char *ep = ebuf; 566 char yysbuf[100]; /* pushback buffer */ 567 char *yysptr = yysbuf; 568 FILE *yyin = NULL; 569 570 int input(void) /* get next lexical input character */ 571 { 572 int c; 573 extern char *lexprog; 574 575 if (yysptr > yysbuf) 576 c = (uschar)*--yysptr; 577 else if (lexprog != NULL) { /* awk '...' */ 578 if ((c = (uschar)*lexprog) != 0) 579 lexprog++; 580 } else /* awk -f ... */ 581 c = pgetc(); 582 if (c == EOF) 583 c = 0; 584 if (ep >= ebuf + sizeof ebuf) 585 ep = ebuf; 586 *ep = c; 587 if (c != 0) { 588 ep++; 589 } 590 return (c); 591 } 592 593 void unput(int c) /* put lexical character back on input */ 594 { 595 if (c == '\n') 596 lineno--; 597 if (yysptr >= yysbuf + sizeof(yysbuf)) 598 FATAL("pushed back too much: %.20s...", yysbuf); 599 *yysptr++ = c; 600 if (--ep < ebuf) 601 ep = ebuf + sizeof(ebuf) - 1; 602 } 603 604 void unputstr(const char *s) /* put a string back on input */ 605 { 606 int i; 607 608 for (i = strlen(s)-1; i >= 0; i--) 609 unput(s[i]); 610 } 611