1 /*- 2 * Copyright (c) 2013 David Chisnall 3 * All rights reserved. 4 * 5 * This software was developed by SRI International and the University of 6 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) 7 * ("CTSRD"), as part of the DARPA CRASH research programme. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $FreeBSD$ 31 */ 32 33 #include "input_buffer.hh" 34 #include <ctype.h> 35 #include <limits.h> 36 #include <stdint.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <functional> 41 #ifndef NDEBUG 42 #include <iostream> 43 #endif 44 45 46 #include <sys/stat.h> 47 #include <sys/mman.h> 48 #include <assert.h> 49 50 #ifndef MAP_PREFAULT_READ 51 #define MAP_PREFAULT_READ 0 52 #endif 53 54 namespace dtc 55 { 56 void 57 input_buffer::skip_spaces() 58 { 59 if (cursor >= size) { return; } 60 if (cursor < 0) { return; } 61 char c = buffer[cursor]; 62 while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\f') 63 || (c == '\v') || (c == '\r')) 64 { 65 cursor++; 66 if (cursor > size) 67 { 68 c = '\0'; 69 } 70 else 71 { 72 c = buffer[cursor]; 73 } 74 } 75 } 76 77 input_buffer 78 input_buffer::buffer_from_offset(int offset, int s) 79 { 80 if (s == 0) 81 { 82 s = size - offset; 83 } 84 if (offset > size) 85 { 86 return input_buffer(); 87 } 88 if (s > (size-offset)) 89 { 90 return input_buffer(); 91 } 92 return input_buffer(&buffer[offset], s); 93 } 94 95 bool 96 input_buffer::consume(const char *str) 97 { 98 int len = strlen(str); 99 if (len > size - cursor) 100 { 101 return false; 102 } 103 else 104 { 105 for (int i=0 ; i<len ; ++i) 106 { 107 if (str[i] != buffer[cursor + i]) 108 { 109 return false; 110 } 111 } 112 cursor += len; 113 return true; 114 } 115 return false; 116 } 117 118 bool 119 input_buffer::consume_integer(unsigned long long &outInt) 120 { 121 // The first character must be a digit. Hex and octal strings 122 // are prefixed by 0 and 0x, respectively. 123 if (!isdigit((*this)[0])) 124 { 125 return false; 126 } 127 char *end=0; 128 outInt = strtoull(&buffer[cursor], &end, 0); 129 if (end == &buffer[cursor]) 130 { 131 return false; 132 } 133 cursor = end - buffer; 134 return true; 135 } 136 137 namespace { 138 139 /** 140 * Convenience typedef for the type that we use for all values. 141 */ 142 typedef unsigned long long valty; 143 144 /** 145 * Expression tree currently being parsed. 146 */ 147 struct expression 148 { 149 /** 150 * Evaluate this node, taking into account operator precedence. 151 */ 152 virtual valty operator()() = 0; 153 /** 154 * Returns the precedence of this node. Lower values indicate higher 155 * precedence. 156 */ 157 virtual int precedence() = 0; 158 virtual ~expression() {} 159 #ifndef NDEBUG 160 /** 161 * Dumps this expression to `std::cerr`, appending a newline if `nl` is 162 * `true`. 163 */ 164 void dump(bool nl=false) 165 { 166 if (this == nullptr) 167 { 168 std::cerr << "{nullptr}\n"; 169 return; 170 } 171 dump_impl(); 172 if (nl) 173 { 174 std::cerr << '\n'; 175 } 176 } 177 private: 178 /** 179 * Method that sublcasses override to implement the behaviour of `dump()`. 180 */ 181 virtual void dump_impl() = 0; 182 #endif 183 }; 184 185 /** 186 * Expression wrapping a single integer. Leaf nodes in the expression tree. 187 */ 188 class terminal_expr : public expression 189 { 190 /** 191 * The value that this wraps. 192 */ 193 valty val; 194 /** 195 * Evaluate. Trivially returns the value that this class wraps. 196 */ 197 valty operator()() override 198 { 199 return val; 200 } 201 int precedence() override 202 { 203 return 0; 204 } 205 public: 206 /** 207 * Constructor. 208 */ 209 terminal_expr(valty v) : val(v) {} 210 #ifndef NDEBUG 211 void dump_impl() override { std::cerr << val; } 212 #endif 213 }; 214 215 /** 216 * Parenthetical expression. Exists to make the contents opaque. 217 */ 218 struct paren_expression : public expression 219 { 220 /** 221 * The expression within the parentheses. 222 */ 223 expression_ptr subexpr; 224 /** 225 * Constructor. Takes the child expression as the only argument. 226 */ 227 paren_expression(expression_ptr p) : subexpr(std::move(p)) {} 228 int precedence() override 229 { 230 return 0; 231 } 232 /** 233 * Evaluate - just forwards to the underlying expression. 234 */ 235 valty operator()() override 236 { 237 return (*subexpr)(); 238 } 239 #ifndef NDEBUG 240 void dump_impl() override 241 { 242 std::cerr << " ("; 243 subexpr->dump(); 244 std::cerr << ") "; 245 } 246 #endif 247 }; 248 249 /** 250 * Template class for unary operators. The `OpChar` template parameter is 251 * solely for debugging and makes it easy to print the expression. The `Op` 252 * template parameter is a function object that implements the operator that 253 * this class provides. Most of these are provided by the `<functional>` 254 * header. 255 */ 256 template<char OpChar, class Op> 257 class unary_operator : public expression 258 { 259 /** 260 * The subexpression for this unary operator. 261 */ 262 expression_ptr subexpr; 263 valty operator()() override 264 { 265 Op op; 266 return op((*subexpr)()); 267 } 268 /** 269 * All unary operators have the same precedence. They are all evaluated 270 * before binary expressions, but after parentheses. 271 */ 272 int precedence() override 273 { 274 return 3; 275 } 276 public: 277 unary_operator(expression_ptr p) : subexpr(std::move(p)) {} 278 #ifndef NDEBUG 279 void dump_impl() override 280 { 281 std::cerr << OpChar; 282 subexpr->dump(); 283 } 284 #endif 285 }; 286 287 /** 288 * Abstract base class for binary operators. Allows the tree to be modified 289 * without knowing what the operations actually are. 290 */ 291 struct binary_operator_base : public expression 292 { 293 /** 294 * The left side of the expression. 295 */ 296 expression_ptr lhs; 297 /** 298 * The right side of the expression. 299 */ 300 expression_ptr rhs; 301 /** 302 * Insert a node somewhere down the path of left children, until it would 303 * be preempting something that should execute first. 304 */ 305 void insert_left(binary_operator_base *new_left) 306 { 307 if (lhs->precedence() < new_left->precedence()) 308 { 309 new_left->rhs = std::move(lhs); 310 lhs.reset(new_left); 311 } 312 else 313 { 314 static_cast<binary_operator_base*>(lhs.get())->insert_left(new_left); 315 } 316 } 317 }; 318 319 /** 320 * Template class for binary operators. The precedence and the operation are 321 * provided as template parameters. 322 */ 323 template<int Precedence, class Op> 324 struct binary_operator : public binary_operator_base 325 { 326 valty operator()() override 327 { 328 Op op; 329 return op((*lhs)(), (*rhs)()); 330 } 331 int precedence() override 332 { 333 return Precedence; 334 } 335 #ifdef NDEBUG 336 /** 337 * Constructor. Takes the name of the operator as an argument, for 338 * debugging. Only stores it in debug mode. 339 */ 340 binary_operator(const char *) {} 341 #else 342 const char *opName; 343 binary_operator(const char *o) : opName(o) {} 344 void dump_impl() override 345 { 346 lhs->dump(); 347 std::cerr << opName; 348 rhs->dump(); 349 } 350 #endif 351 }; 352 353 /** 354 * Ternary conditional operators (`cond ? true : false`) are a special case - 355 * there are no other ternary operators. 356 */ 357 class ternary_conditional_operator : public expression 358 { 359 /** 360 * The condition for the clause. 361 */ 362 expression_ptr cond; 363 /** 364 * The expression that this evaluates to if the condition is true. 365 */ 366 expression_ptr lhs; 367 /** 368 * The expression that this evaluates to if the condition is false. 369 */ 370 expression_ptr rhs; 371 valty operator()() override 372 { 373 return (*cond)() ? (*lhs)() : (*rhs)(); 374 } 375 int precedence() override 376 { 377 // The actual precedence of a ternary conditional operator is 15, but 378 // its associativity is the opposite way around to the other operators, 379 // so we fudge it slightly. 380 return 3; 381 } 382 #ifndef NDEBUG 383 void dump_impl() override 384 { 385 cond->dump(); 386 std::cerr << " ? "; 387 lhs->dump(); 388 std::cerr << " : "; 389 rhs->dump(); 390 } 391 #endif 392 public: 393 ternary_conditional_operator(expression_ptr c, 394 expression_ptr l, 395 expression_ptr r) : 396 cond(std::move(c)), lhs(std::move(l)), rhs(std::move(r)) {} 397 }; 398 399 template<typename T> 400 struct lshift 401 { 402 constexpr T operator()(const T &lhs, const T &rhs) const 403 { 404 return lhs << rhs; 405 } 406 }; 407 template<typename T> 408 struct rshift 409 { 410 constexpr T operator()(const T &lhs, const T &rhs) const 411 { 412 return lhs >> rhs; 413 } 414 }; 415 template<typename T> 416 struct unary_plus 417 { 418 constexpr T operator()(const T &val) const 419 { 420 return +val; 421 } 422 }; 423 // TODO: Replace with std::bit_not once we can guarantee C++14 as a baseline. 424 template<typename T> 425 struct bit_not 426 { 427 constexpr T operator()(const T &val) const 428 { 429 return ~val; 430 } 431 }; 432 433 } // anonymous namespace 434 435 436 expression_ptr input_buffer::parse_binary_expression(expression_ptr lhs) 437 { 438 next_token(); 439 binary_operator_base *expr = nullptr; 440 char op = ((*this)[0]); 441 switch (op) 442 { 443 default: 444 return lhs; 445 case '+': 446 expr = new binary_operator<6, std::plus<valty>>("+"); 447 break; 448 case '-': 449 expr = new binary_operator<6, std::minus<valty>>("-"); 450 break; 451 case '%': 452 expr = new binary_operator<5, std::modulus<valty>>("%"); 453 break; 454 case '*': 455 expr = new binary_operator<5, std::multiplies<valty>>("*"); 456 break; 457 case '/': 458 expr = new binary_operator<5, std::divides<valty>>("/"); 459 break; 460 case '<': 461 cursor++; 462 switch ((*this)[0]) 463 { 464 default: 465 parse_error("Invalid operator"); 466 return nullptr; 467 case ' ': 468 case '(': 469 case '0'...'9': 470 cursor--; 471 expr = new binary_operator<8, std::less<valty>>("<"); 472 break; 473 case '=': 474 expr = new binary_operator<8, std::less_equal<valty>>("<="); 475 break; 476 case '<': 477 expr = new binary_operator<7, lshift<valty>>("<<"); 478 break; 479 } 480 break; 481 case '>': 482 cursor++; 483 switch ((*this)[0]) 484 { 485 default: 486 parse_error("Invalid operator"); 487 return nullptr; 488 case '(': 489 case ' ': 490 case '0'...'9': 491 cursor--; 492 expr = new binary_operator<8, std::greater<valty>>(">"); 493 break; 494 case '=': 495 expr = new binary_operator<8, std::greater_equal<valty>>(">="); 496 break; 497 case '>': 498 expr = new binary_operator<7, rshift<valty>>(">>"); 499 break; 500 return lhs; 501 } 502 break; 503 case '=': 504 if ((*this)[1] != '=') 505 { 506 parse_error("Invalid operator"); 507 return nullptr; 508 } 509 consume('='); 510 expr = new binary_operator<9, std::equal_to<valty>>("=="); 511 break; 512 case '!': 513 if ((*this)[1] != '=') 514 { 515 parse_error("Invalid operator"); 516 return nullptr; 517 } 518 cursor++; 519 expr = new binary_operator<9, std::not_equal_to<valty>>("!="); 520 break; 521 case '&': 522 if ((*this)[1] == '&') 523 { 524 expr = new binary_operator<13, std::logical_and<valty>>("&&"); 525 } 526 else 527 { 528 expr = new binary_operator<10, std::bit_and<valty>>("&"); 529 } 530 break; 531 case '|': 532 if ((*this)[1] == '|') 533 { 534 expr = new binary_operator<12, std::logical_or<valty>>("||"); 535 } 536 else 537 { 538 expr = new binary_operator<14, std::bit_or<valty>>("|"); 539 } 540 break; 541 case '?': 542 { 543 consume('?'); 544 expression_ptr true_case = parse_expression(); 545 next_token(); 546 if (!true_case || !consume(':')) 547 { 548 parse_error("Expected : in ternary conditional operator"); 549 return nullptr; 550 } 551 expression_ptr false_case = parse_expression(); 552 if (!false_case) 553 { 554 parse_error("Expected false condition for ternary operator"); 555 return nullptr; 556 } 557 return expression_ptr(new ternary_conditional_operator(std::move(lhs), 558 std::move(true_case), std::move(false_case))); 559 } 560 } 561 cursor++; 562 next_token(); 563 expression_ptr e(expr); 564 expression_ptr rhs(parse_expression()); 565 if (!rhs) 566 { 567 return nullptr; 568 } 569 expr->lhs = std::move(lhs); 570 if (rhs->precedence() < expr->precedence()) 571 { 572 expr->rhs = std::move(rhs); 573 } 574 else 575 { 576 // If we're a normal left-to-right expression, then we need to insert 577 // this as the far-left child node of the rhs expression 578 binary_operator_base *rhs_op = 579 static_cast<binary_operator_base*>(rhs.get()); 580 rhs_op->insert_left(expr); 581 e.release(); 582 return rhs; 583 } 584 return e; 585 } 586 587 expression_ptr input_buffer::parse_expression(bool stopAtParen) 588 { 589 next_token(); 590 unsigned long long leftVal; 591 expression_ptr lhs; 592 switch ((*this)[0]) 593 { 594 case '0'...'9': 595 if (!consume_integer(leftVal)) 596 { 597 return nullptr; 598 } 599 lhs.reset(new terminal_expr(leftVal)); 600 break; 601 case '(': 602 { 603 consume('('); 604 expression_ptr &&subexpr = parse_expression(); 605 if (!subexpr) 606 { 607 return nullptr; 608 } 609 lhs.reset(new paren_expression(std::move(subexpr))); 610 if (!consume(')')) 611 { 612 return nullptr; 613 } 614 if (stopAtParen) 615 { 616 return lhs; 617 } 618 break; 619 } 620 case '+': 621 { 622 consume('+'); 623 expression_ptr &&subexpr = parse_expression(); 624 if (!subexpr) 625 { 626 return nullptr; 627 } 628 lhs.reset(new unary_operator<'+', unary_plus<valty>>(std::move(subexpr))); 629 break; 630 } 631 case '-': 632 { 633 consume('-'); 634 expression_ptr &&subexpr = parse_expression(); 635 if (!subexpr) 636 { 637 return nullptr; 638 } 639 lhs.reset(new unary_operator<'-', std::negate<valty>>(std::move(subexpr))); 640 break; 641 } 642 case '!': 643 { 644 consume('!'); 645 expression_ptr &&subexpr = parse_expression(); 646 if (!subexpr) 647 { 648 return nullptr; 649 } 650 lhs.reset(new unary_operator<'!', std::logical_not<valty>>(std::move(subexpr))); 651 break; 652 } 653 case '~': 654 { 655 consume('~'); 656 expression_ptr &&subexpr = parse_expression(); 657 if (!subexpr) 658 { 659 return nullptr; 660 } 661 lhs.reset(new unary_operator<'~', bit_not<valty>>(std::move(subexpr))); 662 break; 663 } 664 } 665 if (!lhs) 666 { 667 return nullptr; 668 } 669 return parse_binary_expression(std::move(lhs)); 670 } 671 672 bool 673 input_buffer::consume_integer_expression(unsigned long long &outInt) 674 { 675 switch ((*this)[0]) 676 { 677 case '(': 678 { 679 expression_ptr e(parse_expression(true)); 680 if (!e) 681 { 682 return false; 683 } 684 outInt = (*e)(); 685 return true; 686 } 687 case '0'...'9': 688 return consume_integer(outInt); 689 default: 690 return false; 691 } 692 } 693 694 bool 695 input_buffer::consume_hex_byte(uint8_t &outByte) 696 { 697 if (!ishexdigit((*this)[0]) && !ishexdigit((*this)[1])) 698 { 699 return false; 700 } 701 outByte = (digittoint((*this)[0]) << 4) | digittoint((*this)[1]); 702 cursor += 2; 703 return true; 704 } 705 706 input_buffer& 707 input_buffer::next_token() 708 { 709 int start; 710 do { 711 start = cursor; 712 skip_spaces(); 713 // Parse /* comments 714 if ((*this)[0] == '/' && (*this)[1] == '*') 715 { 716 // eat the start of the comment 717 ++(*this); 718 ++(*this); 719 do { 720 // Find the ending * of */ 721 while ((**this != '\0') && (**this != '*')) 722 { 723 ++(*this); 724 } 725 // Eat the * 726 ++(*this); 727 } while ((**this != '\0') && (**this != '/')); 728 // Eat the / 729 ++(*this); 730 } 731 // Parse // comments 732 if (((*this)[0] == '/' && (*this)[1] == '/')) 733 { 734 // eat the start of the comment 735 ++(*this); 736 ++(*this); 737 // Find the ending of the line 738 while (**this != '\n') 739 { 740 ++(*this); 741 } 742 // Eat the \n 743 ++(*this); 744 } 745 } while (start != cursor); 746 return *this; 747 } 748 749 void 750 input_buffer::parse_error(const char *msg) 751 { 752 int line_count = 1; 753 int line_start = 0; 754 int line_end = cursor; 755 for (int i=cursor ; i>0 ; --i) 756 { 757 if (buffer[i] == '\n') 758 { 759 line_count++; 760 if (line_start == 0) 761 { 762 line_start = i+1; 763 } 764 } 765 } 766 for (int i=cursor+1 ; i<size ; ++i) 767 { 768 if (buffer[i] == '\n') 769 { 770 line_end = i; 771 break; 772 } 773 } 774 fprintf(stderr, "Error on line %d: %s\n", line_count, msg); 775 fwrite(&buffer[line_start], line_end-line_start, 1, stderr); 776 putc('\n', stderr); 777 for (int i=0 ; i<(cursor-line_start) ; ++i) 778 { 779 char c = (buffer[i+line_start] == '\t') ? '\t' : ' '; 780 putc(c, stderr); 781 } 782 putc('^', stderr); 783 putc('\n', stderr); 784 } 785 #ifndef NDEBUG 786 void 787 input_buffer::dump() 788 { 789 fprintf(stderr, "Current cursor: %d\n", cursor); 790 fwrite(&buffer[cursor], size-cursor, 1, stderr); 791 } 792 #endif 793 794 mmap_input_buffer::mmap_input_buffer(int fd) : input_buffer(0, 0) 795 { 796 struct stat sb; 797 if (fstat(fd, &sb)) 798 { 799 perror("Failed to stat file"); 800 } 801 size = sb.st_size; 802 buffer = (const char*)mmap(0, size, PROT_READ, MAP_PRIVATE | 803 MAP_PREFAULT_READ, fd, 0); 804 if (buffer == MAP_FAILED) 805 { 806 perror("Failed to mmap file"); 807 exit(EXIT_FAILURE); 808 } 809 } 810 811 mmap_input_buffer::~mmap_input_buffer() 812 { 813 if (buffer != 0) 814 { 815 munmap((void*)buffer, size); 816 } 817 } 818 819 stream_input_buffer::stream_input_buffer() : input_buffer(0, 0) 820 { 821 int c; 822 while ((c = fgetc(stdin)) != EOF) 823 { 824 b.push_back(c); 825 } 826 buffer = b.data(); 827 size = b.size(); 828 } 829 830 } // namespace dtc 831 832