1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 #include <ctype.h> 29 #include "awk.h" 30 #include "ytab.h" 31 32 extern YYSTYPE yylval; 33 extern int infunc; 34 35 int lineno = 1; 36 int bracecnt = 0; 37 int brackcnt = 0; 38 int parencnt = 0; 39 40 typedef struct Keyword { 41 char *word; 42 int sub; 43 int type; 44 } Keyword; 45 46 Keyword keywords[] ={ /* keep sorted: binary searched */ 47 { "BEGIN", XBEGIN, XBEGIN }, 48 { "END", XEND, XEND }, 49 { "NF", VARNF, VARNF }, 50 { "atan2", FATAN, BLTIN }, 51 { "break", BREAK, BREAK }, 52 { "close", CLOSE, CLOSE }, 53 { "continue", CONTINUE, CONTINUE }, 54 { "cos", FCOS, BLTIN }, 55 { "delete", DELETE, DELETE }, 56 { "do", DO, DO }, 57 { "else", ELSE, ELSE }, 58 { "exit", EXIT, EXIT }, 59 { "exp", FEXP, BLTIN }, 60 { "fflush", FFLUSH, BLTIN }, 61 { "for", FOR, FOR }, 62 { "func", FUNC, FUNC }, 63 { "function", FUNC, FUNC }, 64 { "getline", GETLINE, GETLINE }, 65 { "gsub", GSUB, GSUB }, 66 { "if", IF, IF }, 67 { "in", IN, IN }, 68 { "index", INDEX, INDEX }, 69 { "int", FINT, BLTIN }, 70 { "length", FLENGTH, BLTIN }, 71 { "log", FLOG, BLTIN }, 72 { "match", MATCHFCN, MATCHFCN }, 73 { "next", NEXT, NEXT }, 74 { "nextfile", NEXTFILE, NEXTFILE }, 75 { "print", PRINT, PRINT }, 76 { "printf", PRINTF, PRINTF }, 77 { "rand", FRAND, BLTIN }, 78 { "return", RETURN, RETURN }, 79 { "sin", FSIN, BLTIN }, 80 { "split", SPLIT, SPLIT }, 81 { "sprintf", SPRINTF, SPRINTF }, 82 { "sqrt", FSQRT, BLTIN }, 83 { "srand", FSRAND, BLTIN }, 84 { "sub", SUB, SUB }, 85 { "substr", SUBSTR, SUBSTR }, 86 { "system", FSYSTEM, BLTIN }, 87 { "tolower", FTOLOWER, BLTIN }, 88 { "toupper", FTOUPPER, BLTIN }, 89 { "while", WHILE, WHILE }, 90 }; 91 92 #define DEBUG 93 #ifdef DEBUG 94 #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); } 95 #else 96 #define RET(x) return(x) 97 #endif 98 99 int peek(void) 100 { 101 int c = input(); 102 unput(c); 103 return c; 104 } 105 106 int gettok(char **pbuf, int *psz) /* get next input token */ 107 { 108 int c; 109 char *buf = *pbuf; 110 int sz = *psz; 111 char *bp = buf; 112 113 c = input(); 114 if (c == 0) 115 return 0; 116 buf[0] = c; 117 buf[1] = 0; 118 if (!isalnum(c) && c != '.' && c != '_') 119 return c; 120 121 *bp++ = c; 122 if (isalpha(c) || c == '_') { /* it's a varname */ 123 for ( ; (c = input()) != 0; ) { 124 if (bp-buf >= sz) 125 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0)) 126 FATAL( "out of space for name %.10s...", buf ); 127 if (isalnum(c) || c == '_') 128 *bp++ = c; 129 else { 130 *bp = 0; 131 unput(c); 132 break; 133 } 134 } 135 *bp = 0; 136 } else { /* it's a number */ 137 char *rem; 138 /* read input until can't be a number */ 139 for ( ; (c = input()) != 0; ) { 140 if (bp-buf >= sz) 141 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0)) 142 FATAL( "out of space for number %.10s...", buf ); 143 if (isdigit(c) || c == 'e' || c == 'E' 144 || c == '.' || c == '+' || c == '-') 145 *bp++ = c; 146 else { 147 unput(c); 148 break; 149 } 150 } 151 *bp = 0; 152 strtod(buf, &rem); /* parse the number */ 153 unputstr(rem); /* put rest back for later */ 154 rem[0] = 0; 155 } 156 *pbuf = buf; 157 *psz = sz; 158 return buf[0]; 159 } 160 161 int word(char *); 162 int string(void); 163 int regexpr(void); 164 int sc = 0; /* 1 => return a } right now */ 165 int reg = 0; /* 1 => return a REGEXPR now */ 166 167 int yylex(void) 168 { 169 int c; 170 static char *buf = 0; 171 static int bufsize = 500; 172 173 if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL) 174 FATAL( "out of space in yylex" ); 175 if (sc) { 176 sc = 0; 177 RET('}'); 178 } 179 if (reg) { 180 reg = 0; 181 return regexpr(); 182 } 183 for (;;) { 184 c = gettok(&buf, &bufsize); 185 if (c == 0) 186 return 0; 187 if (isalpha(c) || c == '_') 188 return word(buf); 189 if (isdigit(c) || c == '.') { 190 yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab); 191 /* should this also have STR set? */ 192 RET(NUMBER); 193 } 194 195 yylval.i = c; 196 switch (c) { 197 case '\n': /* {EOL} */ 198 RET(NL); 199 case '\r': /* assume \n is coming */ 200 case ' ': /* {WS}+ */ 201 case '\t': 202 break; 203 case '#': /* #.* strip comments */ 204 while ((c = input()) != '\n' && c != 0) 205 ; 206 unput(c); 207 break; 208 case ';': 209 RET(';'); 210 case '\\': 211 if (peek() == '\n') { 212 input(); 213 } else if (peek() == '\r') { 214 input(); input(); /* \n */ 215 lineno++; 216 } else { 217 RET(c); 218 } 219 break; 220 case '&': 221 if (peek() == '&') { 222 input(); RET(AND); 223 } else 224 RET('&'); 225 case '|': 226 if (peek() == '|') { 227 input(); RET(BOR); 228 } else 229 RET('|'); 230 case '!': 231 if (peek() == '=') { 232 input(); yylval.i = NE; RET(NE); 233 } else if (peek() == '~') { 234 input(); yylval.i = NOTMATCH; RET(MATCHOP); 235 } else 236 RET(NOT); 237 case '~': 238 yylval.i = MATCH; 239 RET(MATCHOP); 240 case '<': 241 if (peek() == '=') { 242 input(); yylval.i = LE; RET(LE); 243 } else { 244 yylval.i = LT; RET(LT); 245 } 246 case '=': 247 if (peek() == '=') { 248 input(); yylval.i = EQ; RET(EQ); 249 } else { 250 yylval.i = ASSIGN; RET(ASGNOP); 251 } 252 case '>': 253 if (peek() == '=') { 254 input(); yylval.i = GE; RET(GE); 255 } else if (peek() == '>') { 256 input(); yylval.i = APPEND; RET(APPEND); 257 } else { 258 yylval.i = GT; RET(GT); 259 } 260 case '+': 261 if (peek() == '+') { 262 input(); yylval.i = INCR; RET(INCR); 263 } else if (peek() == '=') { 264 input(); yylval.i = ADDEQ; RET(ASGNOP); 265 } else 266 RET('+'); 267 case '-': 268 if (peek() == '-') { 269 input(); yylval.i = DECR; RET(DECR); 270 } else if (peek() == '=') { 271 input(); yylval.i = SUBEQ; RET(ASGNOP); 272 } else 273 RET('-'); 274 case '*': 275 if (peek() == '=') { /* *= */ 276 input(); yylval.i = MULTEQ; RET(ASGNOP); 277 } else if (peek() == '*') { /* ** or **= */ 278 input(); /* eat 2nd * */ 279 if (peek() == '=') { 280 input(); yylval.i = POWEQ; RET(ASGNOP); 281 } else { 282 RET(POWER); 283 } 284 } else 285 RET('*'); 286 case '/': 287 RET('/'); 288 case '%': 289 if (peek() == '=') { 290 input(); yylval.i = MODEQ; RET(ASGNOP); 291 } else 292 RET('%'); 293 case '^': 294 if (peek() == '=') { 295 input(); yylval.i = POWEQ; RET(ASGNOP); 296 } else 297 RET(POWER); 298 299 case '$': 300 /* BUG: awkward, if not wrong */ 301 c = gettok(&buf, &bufsize); 302 if (isalpha(c)) { 303 if (strcmp(buf, "NF") == 0) { /* very special */ 304 unputstr("(NF)"); 305 RET(INDIRECT); 306 } 307 c = peek(); 308 if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) { 309 unputstr(buf); 310 RET(INDIRECT); 311 } 312 yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab); 313 RET(IVAR); 314 } else { 315 unputstr(buf); 316 RET(INDIRECT); 317 } 318 319 case '}': 320 if (--bracecnt < 0) 321 SYNTAX( "extra }" ); 322 sc = 1; 323 RET(';'); 324 case ']': 325 if (--brackcnt < 0) 326 SYNTAX( "extra ]" ); 327 RET(']'); 328 case ')': 329 if (--parencnt < 0) 330 SYNTAX( "extra )" ); 331 RET(')'); 332 case '{': 333 bracecnt++; 334 RET('{'); 335 case '[': 336 brackcnt++; 337 RET('['); 338 case '(': 339 parencnt++; 340 RET('('); 341 342 case '"': 343 return string(); /* BUG: should be like tran.c ? */ 344 345 default: 346 RET(c); 347 } 348 } 349 } 350 351 int string(void) 352 { 353 int c, n; 354 char *s, *bp; 355 static char *buf = 0; 356 static int bufsz = 500; 357 358 if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) 359 FATAL("out of space for strings"); 360 for (bp = buf; (c = input()) != '"'; ) { 361 if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, 0)) 362 FATAL("out of space for string %.10s...", buf); 363 switch (c) { 364 case '\n': 365 case '\r': 366 case 0: 367 SYNTAX( "non-terminated string %.10s...", buf ); 368 lineno++; 369 break; 370 case '\\': 371 c = input(); 372 switch (c) { 373 case '"': *bp++ = '"'; break; 374 case 'n': *bp++ = '\n'; break; 375 case 't': *bp++ = '\t'; break; 376 case 'f': *bp++ = '\f'; break; 377 case 'r': *bp++ = '\r'; break; 378 case 'b': *bp++ = '\b'; break; 379 case 'v': *bp++ = '\v'; break; 380 case 'a': *bp++ = '\007'; break; 381 case '\\': *bp++ = '\\'; break; 382 383 case '0': case '1': case '2': /* octal: \d \dd \ddd */ 384 case '3': case '4': case '5': case '6': case '7': 385 n = c - '0'; 386 if ((c = peek()) >= '0' && c < '8') { 387 n = 8 * n + input() - '0'; 388 if ((c = peek()) >= '0' && c < '8') 389 n = 8 * n + input() - '0'; 390 } 391 *bp++ = n; 392 break; 393 394 case 'x': /* hex \x0-9a-fA-F + */ 395 { char xbuf[100], *px; 396 for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) { 397 if (isdigit(c) 398 || (c >= 'a' && c <= 'f') 399 || (c >= 'A' && c <= 'F')) 400 *px++ = c; 401 else 402 break; 403 } 404 *px = 0; 405 unput(c); 406 sscanf(xbuf, "%x", &n); 407 *bp++ = n; 408 break; 409 } 410 411 default: 412 *bp++ = c; 413 break; 414 } 415 break; 416 default: 417 *bp++ = c; 418 break; 419 } 420 } 421 *bp = 0; 422 s = tostring(buf); 423 *bp++ = ' '; *bp++ = 0; 424 yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab); 425 RET(STRING); 426 } 427 428 429 int binsearch(char *w, Keyword *kp, int n) 430 { 431 int cond, low, mid, high; 432 433 low = 0; 434 high = n - 1; 435 while (low <= high) { 436 mid = (low + high) / 2; 437 if ((cond = strcmp(w, kp[mid].word)) < 0) 438 high = mid - 1; 439 else if (cond > 0) 440 low = mid + 1; 441 else 442 return mid; 443 } 444 return -1; 445 } 446 447 int word(char *w) 448 { 449 Keyword *kp; 450 int c, n; 451 452 n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0])); 453 kp = keywords + n; 454 if (n != -1) { /* found in table */ 455 yylval.i = kp->sub; 456 switch (kp->type) { /* special handling */ 457 case FSYSTEM: 458 if (safe) 459 SYNTAX( "system is unsafe" ); 460 RET(kp->type); 461 case FUNC: 462 if (infunc) 463 SYNTAX( "illegal nested function" ); 464 RET(kp->type); 465 case RETURN: 466 if (!infunc) 467 SYNTAX( "return not in function" ); 468 RET(kp->type); 469 case VARNF: 470 yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab); 471 RET(VARNF); 472 default: 473 RET(kp->type); 474 } 475 } 476 c = peek(); /* look for '(' */ 477 if (c != '(' && infunc && (n=isarg(w)) >= 0) { 478 yylval.i = n; 479 RET(ARG); 480 } else { 481 yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab); 482 if (c == '(') { 483 RET(CALL); 484 } else { 485 RET(VAR); 486 } 487 } 488 } 489 490 void startreg(void) /* next call to yyles will return a regular expression */ 491 { 492 reg = 1; 493 } 494 495 int regexpr(void) 496 { 497 int c; 498 static char *buf = 0; 499 static int bufsz = 500; 500 char *bp; 501 502 if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL) 503 FATAL("out of space for rex expr"); 504 bp = buf; 505 for ( ; (c = input()) != '/' && c != 0; ) { 506 if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, 0)) 507 FATAL("out of space for reg expr %.10s...", buf); 508 if (c == '\n') { 509 SYNTAX( "newline in regular expression %.10s...", buf ); 510 unput('\n'); 511 break; 512 } else if (c == '\\') { 513 *bp++ = '\\'; 514 *bp++ = input(); 515 } else { 516 *bp++ = c; 517 } 518 } 519 *bp = 0; 520 yylval.s = tostring(buf); 521 unput('/'); 522 RET(REGEXPR); 523 } 524 525 /* low-level lexical stuff, sort of inherited from lex */ 526 527 char ebuf[300]; 528 char *ep = ebuf; 529 char yysbuf[100]; /* pushback buffer */ 530 char *yysptr = yysbuf; 531 FILE *yyin = 0; 532 533 int input(void) /* get next lexical input character */ 534 { 535 int c; 536 extern char *lexprog; 537 538 if (yysptr > yysbuf) 539 c = *--yysptr; 540 else if (lexprog != NULL) { /* awk '...' */ 541 if ((c = *lexprog) != 0) 542 lexprog++; 543 } else /* awk -f ... */ 544 c = pgetc(); 545 if (c == '\n') 546 lineno++; 547 else if (c == EOF) 548 c = 0; 549 if (ep >= ebuf + sizeof ebuf) 550 ep = ebuf; 551 return *ep++ = c; 552 } 553 554 void unput(int c) /* put lexical character back on input */ 555 { 556 if (c == '\n') 557 lineno--; 558 if (yysptr >= yysbuf + sizeof(yysbuf)) 559 FATAL("pushed back too much: %.20s...", yysbuf); 560 *yysptr++ = c; 561 if (--ep < ebuf) 562 ep = ebuf + sizeof(ebuf) - 1; 563 } 564 565 void unputstr(char *s) /* put a string back on input */ 566 { 567 int i; 568 569 for (i = strlen(s)-1; i >= 0; i--) 570 unput(s[i]); 571 } 572