1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 David Chisnall 5 * All rights reserved. 6 * 7 * This software was developed by SRI International and the University of 8 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) 9 * ("CTSRD"), as part of the DARPA CRASH research programme. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include "input_buffer.hh" 34 #include <ctype.h> 35 #include <errno.h> 36 #include <stdint.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <functional> 41 #ifndef NDEBUG 42 #include <iostream> 43 #endif 44 #include <limits> 45 46 47 #include <sys/stat.h> 48 #include <sys/mman.h> 49 #include <assert.h> 50 #include <fcntl.h> 51 #include <unistd.h> 52 53 #ifndef MAP_PREFAULT_READ 54 #define MAP_PREFAULT_READ 0 55 #endif 56 57 using std::string; 58 59 namespace 60 { 61 /** 62 * Subclass of input_buffer that mmap()s a file and owns the resulting memory. 63 * When this object is destroyed, the memory is unmapped. 64 */ 65 struct mmap_input_buffer : public dtc::input_buffer 66 { 67 string fn; 68 const string &filename() const override 69 { 70 return fn; 71 } 72 /** 73 * Constructs a new buffer from the file passed in as a file 74 * descriptor. 75 */ 76 mmap_input_buffer(int fd, string &&filename); 77 /** 78 * Unmaps the buffer, if one exists. 79 */ 80 virtual ~mmap_input_buffer(); 81 }; 82 /** 83 * Input buffer read from standard input. This is used for reading device tree 84 * blobs and source from standard input. It reads the entire input into 85 * malloc'd memory, so will be very slow for large inputs. DTS and DTB files 86 * are very rarely more than 10KB though, so this is probably not a problem. 87 */ 88 struct stream_input_buffer : public dtc::input_buffer 89 { 90 const string &filename() const override 91 { 92 static string n = "<standard input>"; 93 return n; 94 } 95 /** 96 * The buffer that will store the data read from the standard input. 97 */ 98 std::vector<char> b; 99 /** 100 * Constructs a new buffer from the standard input. 101 */ 102 stream_input_buffer(); 103 }; 104 105 mmap_input_buffer::mmap_input_buffer(int fd, string &&filename) 106 : input_buffer(0, 0), fn(filename) 107 { 108 struct stat sb; 109 if (fstat(fd, &sb)) 110 { 111 perror("Failed to stat file"); 112 } 113 size = sb.st_size; 114 buffer = (const char*)mmap(0, size, PROT_READ, MAP_PRIVATE | 115 MAP_PREFAULT_READ, fd, 0); 116 if (buffer == MAP_FAILED) 117 { 118 perror("Failed to mmap file"); 119 exit(EXIT_FAILURE); 120 } 121 } 122 123 mmap_input_buffer::~mmap_input_buffer() 124 { 125 if (buffer != 0) 126 { 127 munmap(const_cast<char*>(buffer), size); 128 } 129 } 130 131 stream_input_buffer::stream_input_buffer() : input_buffer(0, 0) 132 { 133 int c; 134 while ((c = fgetc(stdin)) != EOF) 135 { 136 b.push_back(c); 137 } 138 buffer = b.data(); 139 size = b.size(); 140 } 141 142 } // Anonymous namespace 143 144 145 namespace dtc 146 { 147 148 void 149 input_buffer::skip_to(char c) 150 { 151 while ((cursor < size) && (buffer[cursor] != c)) 152 { 153 cursor++; 154 } 155 } 156 157 void 158 text_input_buffer::skip_to(char c) 159 { 160 while (!finished() && (*(*this) != c)) 161 { 162 ++(*this); 163 } 164 } 165 166 void 167 text_input_buffer::skip_spaces() 168 { 169 if (finished()) { return; } 170 char c = *(*this); 171 bool last_nl = false; 172 while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\f') 173 || (c == '\v') || (c == '\r')) 174 { 175 last_nl = ((c == '\n') || (c == '\r')); 176 ++(*this); 177 if (finished()) 178 { 179 c = '\0'; 180 } 181 else 182 { 183 c = *(*this); 184 } 185 } 186 // Skip C preprocessor leftovers 187 if ((c == '#') && ((cursor == 0) || last_nl)) 188 { 189 skip_to('\n'); 190 skip_spaces(); 191 } 192 if (consume("/include/")) 193 { 194 handle_include(); 195 skip_spaces(); 196 } 197 } 198 199 void 200 text_input_buffer::handle_include() 201 { 202 bool reallyInclude = true; 203 if (consume("if ")) 204 { 205 next_token(); 206 string name = parse_property_name(); 207 if (defines.count(name) == 0) 208 { 209 reallyInclude = false; 210 } 211 consume('/'); 212 } 213 next_token(); 214 if (!consume('"')) 215 { 216 parse_error("Expected quoted filename"); 217 return; 218 } 219 auto loc = location(); 220 string file = parse_to('"'); 221 consume('"'); 222 if (!reallyInclude) 223 { 224 return; 225 } 226 string include_file = dir + '/' + file; 227 auto include_buffer = input_buffer::buffer_for_file(include_file, false); 228 if (include_buffer == 0) 229 { 230 for (auto i : include_paths) 231 { 232 include_file = i + '/' + file; 233 include_buffer = input_buffer::buffer_for_file(include_file, false); 234 if (include_buffer != 0) 235 { 236 break; 237 } 238 } 239 } 240 if (depfile) 241 { 242 putc(' ', depfile); 243 fputs(include_file.c_str(), depfile); 244 } 245 if (!include_buffer) 246 { 247 loc.report_error("Unable to locate input file"); 248 return; 249 } 250 input_stack.push(std::move(include_buffer)); 251 } 252 253 bool text_input_buffer::read_binary_file(const std::string &filename, byte_buffer &b) 254 { 255 bool try_include_paths = true; 256 string include_file; 257 if (filename[0] == '/') 258 { 259 include_file = filename; 260 // Don't try include paths if we're given an absolute path. 261 // Failing is better so that we don't accidentally do the wrong thing, 262 // but make it seem like everything is alright. 263 try_include_paths = false; 264 } 265 else 266 { 267 include_file = dir + '/' + filename; 268 } 269 auto include_buffer = input_buffer::buffer_for_file(include_file, false); 270 if (include_buffer == 0 && try_include_paths) 271 { 272 for (auto i : include_paths) 273 { 274 include_file = i + '/' + filename; 275 include_buffer = input_buffer::buffer_for_file(include_file, false); 276 if (include_buffer != 0) 277 { 278 break; 279 } 280 } 281 } 282 if (!include_buffer) 283 { 284 return false; 285 } 286 if (depfile) 287 { 288 putc(' ', depfile); 289 fputs(include_file.c_str(), depfile); 290 } 291 b.insert(b.begin(), include_buffer->begin(), include_buffer->end()); 292 return true; 293 } 294 295 input_buffer 296 input_buffer::buffer_from_offset(int offset, int s) 297 { 298 if (offset < 0) 299 { 300 return input_buffer(); 301 } 302 if (s == 0) 303 { 304 s = size - offset; 305 } 306 if (offset > size) 307 { 308 return input_buffer(); 309 } 310 if (s > (size-offset)) 311 { 312 return input_buffer(); 313 } 314 return input_buffer(&buffer[offset], s); 315 } 316 317 bool 318 input_buffer::consume(const char *str) 319 { 320 int len = strlen(str); 321 if (len > size - cursor) 322 { 323 return false; 324 } 325 else 326 { 327 for (int i=0 ; i<len ; ++i) 328 { 329 if (str[i] != (*this)[i]) 330 { 331 return false; 332 } 333 } 334 cursor += len; 335 return true; 336 } 337 return false; 338 } 339 340 bool 341 input_buffer::consume_char_literal(unsigned long long &outInt) 342 { 343 outInt = (unsigned char)((*this)[0]); 344 cursor++; 345 346 if(outInt != '\\') 347 { 348 return true; 349 } 350 else if(cursor >= size) 351 { 352 return false; 353 } 354 355 outInt = (unsigned char)((*this)[0]); 356 cursor++; 357 358 switch (outInt) { 359 default: 360 return false; 361 case 'n': 362 outInt = (unsigned char)'\n'; 363 break; 364 case 'r': 365 outInt = (unsigned char)'\r'; 366 break; 367 case 't': 368 outInt = (unsigned char)'\t'; 369 break; 370 case '0': 371 outInt = 0; 372 break; 373 case '\'': 374 case '\\': 375 break; 376 } 377 378 return true; 379 } 380 381 bool 382 input_buffer::consume_integer(unsigned long long &outInt) 383 { 384 // The first character must be a digit. Hex and octal strings 385 // are prefixed by 0 and 0x, respectively. 386 if (!isdigit((*this)[0])) 387 { 388 return false; 389 } 390 char *end= const_cast<char*>(&buffer[size]); 391 errno = 0; 392 outInt = strtoull(&buffer[cursor], &end, 0); 393 if (end == &buffer[cursor] || 394 (outInt == std::numeric_limits<unsigned long long>::max() && 395 errno == ERANGE)) 396 { 397 return false; 398 } 399 cursor = end - buffer; 400 return true; 401 } 402 403 namespace { 404 405 /** 406 * Convenience typedef for the type that we use for all values. 407 */ 408 typedef unsigned long long valty; 409 410 /** 411 * Expression tree currently being parsed. 412 */ 413 struct expression 414 { 415 typedef text_input_buffer::source_location source_location; 416 /** 417 * The type that is returned when computing the result. The boolean value 418 * indicates whether this is a valid expression. 419 * 420 * FIXME: Once we can use C++17, this should be `std::optional`. 421 */ 422 typedef std::pair<valty, bool> result; 423 /** 424 * Evaluate this node, taking into account operator precedence. 425 */ 426 virtual result operator()() = 0; 427 /** 428 * Returns the precedence of this node. Lower values indicate higher 429 * precedence. 430 */ 431 virtual int precedence() = 0; 432 /** 433 * Constructs an expression, storing the location where it was created. 434 */ 435 expression(source_location l) : loc(l) {} 436 virtual ~expression() {} 437 #ifndef NDEBUG 438 /** 439 * Dumps this expression to `std::cerr`, appending a newline if `nl` is 440 * `true`. 441 */ 442 void dump(bool nl=false) 443 { 444 void *ptr = this; 445 if (ptr == nullptr) 446 { 447 std::cerr << "{nullptr}\n"; 448 return; 449 } 450 dump_impl(); 451 if (nl) 452 { 453 std::cerr << '\n'; 454 } 455 } 456 private: 457 /** 458 * Method that sublcasses override to implement the behaviour of `dump()`. 459 */ 460 virtual void dump_impl() = 0; 461 #endif 462 protected: 463 source_location loc; 464 }; 465 466 /** 467 * Expression wrapping a single integer. Leaf nodes in the expression tree. 468 */ 469 class terminal_expr : public expression 470 { 471 /** 472 * The value that this wraps. 473 */ 474 valty val; 475 /** 476 * Evaluate. Trivially returns the value that this class wraps. 477 */ 478 result operator()() override 479 { 480 return {val, true}; 481 } 482 int precedence() override 483 { 484 return 0; 485 } 486 public: 487 /** 488 * Constructor. 489 */ 490 terminal_expr(source_location l, valty v) : expression(l), val(v) {} 491 #ifndef NDEBUG 492 void dump_impl() override { std::cerr << val; } 493 #endif 494 }; 495 496 /** 497 * Parenthetical expression. Exists to make the contents opaque. 498 */ 499 struct paren_expression : public expression 500 { 501 /** 502 * The expression within the parentheses. 503 */ 504 expression_ptr subexpr; 505 /** 506 * Constructor. Takes the child expression as the only argument. 507 */ 508 paren_expression(source_location l, expression_ptr p) : expression(l), 509 subexpr(std::move(p)) {} 510 int precedence() override 511 { 512 return 0; 513 } 514 /** 515 * Evaluate - just forwards to the underlying expression. 516 */ 517 result operator()() override 518 { 519 return (*subexpr)(); 520 } 521 #ifndef NDEBUG 522 void dump_impl() override 523 { 524 std::cerr << " ("; 525 subexpr->dump(); 526 std::cerr << ") "; 527 } 528 #endif 529 }; 530 531 /** 532 * Template class for unary operators. The `OpChar` template parameter is 533 * solely for debugging and makes it easy to print the expression. The `Op` 534 * template parameter is a function object that implements the operator that 535 * this class provides. Most of these are provided by the `<functional>` 536 * header. 537 */ 538 template<char OpChar, class Op> 539 class unary_operator : public expression 540 { 541 /** 542 * The subexpression for this unary operator. 543 */ 544 expression_ptr subexpr; 545 result operator()() override 546 { 547 Op op; 548 result s = (*subexpr)(); 549 if (!s.second) 550 { 551 return s; 552 } 553 return {op(s.first), true}; 554 } 555 /** 556 * All unary operators have the same precedence. They are all evaluated 557 * before binary expressions, but after parentheses. 558 */ 559 int precedence() override 560 { 561 return 3; 562 } 563 public: 564 unary_operator(source_location l, expression_ptr p) : 565 expression(l), subexpr(std::move(p)) {} 566 #ifndef NDEBUG 567 void dump_impl() override 568 { 569 std::cerr << OpChar; 570 subexpr->dump(); 571 } 572 #endif 573 }; 574 575 /** 576 * Abstract base class for binary operators. Allows the tree to be modified 577 * without knowing what the operations actually are. 578 */ 579 struct binary_operator_base : public expression 580 { 581 using expression::expression; 582 /** 583 * The left side of the expression. 584 */ 585 expression_ptr lhs; 586 /** 587 * The right side of the expression. 588 */ 589 expression_ptr rhs; 590 /** 591 * Insert a node somewhere down the path of left children, until it would 592 * be preempting something that should execute first. 593 */ 594 void insert_left(binary_operator_base *new_left) 595 { 596 if (lhs->precedence() < new_left->precedence()) 597 { 598 new_left->rhs = std::move(lhs); 599 lhs.reset(new_left); 600 } 601 else 602 { 603 static_cast<binary_operator_base*>(lhs.get())->insert_left(new_left); 604 } 605 } 606 }; 607 608 /** 609 * Template class for binary operators. The precedence and the operation are 610 * provided as template parameters. 611 */ 612 template<int Precedence, class Op> 613 struct binary_operator : public binary_operator_base 614 { 615 result operator()() override 616 { 617 Op op; 618 result l = (*lhs)(); 619 result r = (*rhs)(); 620 if (!(l.second && r.second)) 621 { 622 return {0, false}; 623 } 624 return {op(l.first, r.first), true}; 625 } 626 int precedence() override 627 { 628 return Precedence; 629 } 630 #ifdef NDEBUG 631 /** 632 * Constructor. Takes the name of the operator as an argument, for 633 * debugging. Only stores it in debug mode. 634 */ 635 binary_operator(source_location l, const char *) : 636 binary_operator_base(l) {} 637 #else 638 const char *opName; 639 binary_operator(source_location l, const char *o) : 640 binary_operator_base(l), opName(o) {} 641 void dump_impl() override 642 { 643 lhs->dump(); 644 std::cerr << opName; 645 rhs->dump(); 646 } 647 #endif 648 }; 649 650 /** 651 * Ternary conditional operators (`cond ? true : false`) are a special case - 652 * there are no other ternary operators. 653 */ 654 class ternary_conditional_operator : public expression 655 { 656 /** 657 * The condition for the clause. 658 */ 659 expression_ptr cond; 660 /** 661 * The expression that this evaluates to if the condition is true. 662 */ 663 expression_ptr lhs; 664 /** 665 * The expression that this evaluates to if the condition is false. 666 */ 667 expression_ptr rhs; 668 result operator()() override 669 { 670 result c = (*cond)(); 671 result l = (*lhs)(); 672 result r = (*rhs)(); 673 if (!(l.second && r.second && c.second)) 674 { 675 return {0, false}; 676 } 677 return c.first ? l : r; 678 } 679 int precedence() override 680 { 681 // The actual precedence of a ternary conditional operator is 15, but 682 // its associativity is the opposite way around to the other operators, 683 // so we fudge it slightly. 684 return 3; 685 } 686 #ifndef NDEBUG 687 void dump_impl() override 688 { 689 cond->dump(); 690 std::cerr << " ? "; 691 lhs->dump(); 692 std::cerr << " : "; 693 rhs->dump(); 694 } 695 #endif 696 public: 697 ternary_conditional_operator(source_location sl, 698 expression_ptr c, 699 expression_ptr l, 700 expression_ptr r) : 701 expression(sl), cond(std::move(c)), lhs(std::move(l)), 702 rhs(std::move(r)) {} 703 }; 704 705 template<typename T> 706 struct lshift 707 { 708 constexpr T operator()(const T &lhs, const T &rhs) const 709 { 710 return lhs << rhs; 711 } 712 }; 713 template<typename T> 714 struct rshift 715 { 716 constexpr T operator()(const T &lhs, const T &rhs) const 717 { 718 return lhs >> rhs; 719 } 720 }; 721 template<typename T> 722 struct unary_plus 723 { 724 constexpr T operator()(const T &val) const 725 { 726 return +val; 727 } 728 }; 729 // TODO: Replace with std::bit_not once we can guarantee C++14 as a baseline. 730 template<typename T> 731 struct bit_not 732 { 733 constexpr T operator()(const T &val) const 734 { 735 return ~val; 736 } 737 }; 738 739 template<typename T> 740 struct divmod : public binary_operator<5, T> 741 { 742 using binary_operator<5, T>::binary_operator; 743 using typename binary_operator_base::result; 744 result operator()() override 745 { 746 result r = (*binary_operator_base::rhs)(); 747 if (r.second && (r.first == 0)) 748 { 749 expression::loc.report_error("Division by zero"); 750 return {0, false}; 751 } 752 return binary_operator<5, T>::operator()(); 753 } 754 }; 755 756 } // anonymous namespace 757 758 759 expression_ptr text_input_buffer::parse_binary_expression(expression_ptr lhs) 760 { 761 next_token(); 762 binary_operator_base *expr = nullptr; 763 char op = *(*this); 764 source_location l = location(); 765 switch (op) 766 { 767 default: 768 return lhs; 769 case '+': 770 expr = new binary_operator<6, std::plus<valty>>(l, "+"); 771 break; 772 case '-': 773 expr = new binary_operator<6, std::minus<valty>>(l, "-"); 774 break; 775 case '%': 776 expr = new divmod<std::modulus<valty>>(l, "/"); 777 break; 778 case '*': 779 expr = new binary_operator<5, std::multiplies<valty>>(l, "*"); 780 break; 781 case '/': 782 expr = new divmod<std::divides<valty>>(l, "/"); 783 break; 784 case '<': 785 switch (peek()) 786 { 787 default: 788 parse_error("Invalid operator"); 789 return nullptr; 790 case ' ': 791 case '(': 792 case '0'...'9': 793 expr = new binary_operator<8, std::less<valty>>(l, "<"); 794 break; 795 case '=': 796 ++(*this); 797 expr = new binary_operator<8, std::less_equal<valty>>(l, "<="); 798 break; 799 case '<': 800 ++(*this); 801 expr = new binary_operator<7, lshift<valty>>(l, "<<"); 802 break; 803 } 804 break; 805 case '>': 806 switch (peek()) 807 { 808 default: 809 parse_error("Invalid operator"); 810 return nullptr; 811 case '(': 812 case ' ': 813 case '0'...'9': 814 expr = new binary_operator<8, std::greater<valty>>(l, ">"); 815 break; 816 case '=': 817 ++(*this); 818 expr = new binary_operator<8, std::greater_equal<valty>>(l, ">="); 819 break; 820 case '>': 821 ++(*this); 822 expr = new binary_operator<7, rshift<valty>>(l, ">>"); 823 break; 824 return lhs; 825 } 826 break; 827 case '=': 828 if (peek() != '=') 829 { 830 parse_error("Invalid operator"); 831 return nullptr; 832 } 833 expr = new binary_operator<9, std::equal_to<valty>>(l, "=="); 834 break; 835 case '!': 836 if (peek() != '=') 837 { 838 parse_error("Invalid operator"); 839 return nullptr; 840 } 841 cursor++; 842 expr = new binary_operator<9, std::not_equal_to<valty>>(l, "!="); 843 break; 844 case '&': 845 if (peek() == '&') 846 { 847 expr = new binary_operator<13, std::logical_and<valty>>(l, "&&"); 848 } 849 else 850 { 851 expr = new binary_operator<10, std::bit_and<valty>>(l, "&"); 852 } 853 break; 854 case '|': 855 if (peek() == '|') 856 { 857 expr = new binary_operator<12, std::logical_or<valty>>(l, "||"); 858 } 859 else 860 { 861 expr = new binary_operator<14, std::bit_or<valty>>(l, "|"); 862 } 863 break; 864 case '?': 865 { 866 consume('?'); 867 expression_ptr true_case = parse_expression(); 868 next_token(); 869 if (!true_case || !consume(':')) 870 { 871 parse_error("Expected : in ternary conditional operator"); 872 return nullptr; 873 } 874 expression_ptr false_case = parse_expression(); 875 if (!false_case) 876 { 877 parse_error("Expected false condition for ternary operator"); 878 return nullptr; 879 } 880 return expression_ptr(new ternary_conditional_operator(l, std::move(lhs), 881 std::move(true_case), std::move(false_case))); 882 } 883 } 884 ++(*this); 885 next_token(); 886 expression_ptr e(expr); 887 expression_ptr rhs(parse_expression()); 888 if (!rhs) 889 { 890 return nullptr; 891 } 892 expr->lhs = std::move(lhs); 893 if (rhs->precedence() < expr->precedence()) 894 { 895 expr->rhs = std::move(rhs); 896 } 897 else 898 { 899 // If we're a normal left-to-right expression, then we need to insert 900 // this as the far-left child node of the rhs expression 901 binary_operator_base *rhs_op = 902 static_cast<binary_operator_base*>(rhs.get()); 903 rhs_op->insert_left(expr); 904 e.release(); 905 return rhs; 906 } 907 return e; 908 } 909 910 expression_ptr text_input_buffer::parse_expression(bool stopAtParen) 911 { 912 next_token(); 913 unsigned long long leftVal; 914 expression_ptr lhs; 915 source_location l = location(); 916 switch (*(*this)) 917 { 918 case '\'': 919 consume('\''); 920 if(!consume_char_literal(leftVal)) 921 { 922 return nullptr; 923 } 924 if (!consume('\'')) 925 { 926 return nullptr; 927 } 928 lhs.reset(new terminal_expr(l, leftVal)); 929 break; 930 case '0'...'9': 931 if (!consume_integer(leftVal)) 932 { 933 return nullptr; 934 } 935 lhs.reset(new terminal_expr(l, leftVal)); 936 break; 937 case '(': 938 { 939 consume('('); 940 expression_ptr &&subexpr = parse_expression(); 941 if (!subexpr) 942 { 943 return nullptr; 944 } 945 lhs.reset(new paren_expression(l, std::move(subexpr))); 946 if (!consume(')')) 947 { 948 return nullptr; 949 } 950 if (stopAtParen) 951 { 952 return lhs; 953 } 954 break; 955 } 956 case '+': 957 { 958 consume('+'); 959 expression_ptr &&subexpr = parse_expression(); 960 if (!subexpr) 961 { 962 return nullptr; 963 } 964 lhs.reset(new unary_operator<'+', unary_plus<valty>>(l, std::move(subexpr))); 965 break; 966 } 967 case '-': 968 { 969 consume('-'); 970 expression_ptr &&subexpr = parse_expression(); 971 if (!subexpr) 972 { 973 return nullptr; 974 } 975 lhs.reset(new unary_operator<'-', std::negate<valty>>(l, std::move(subexpr))); 976 break; 977 } 978 case '!': 979 { 980 consume('!'); 981 expression_ptr &&subexpr = parse_expression(); 982 if (!subexpr) 983 { 984 return nullptr; 985 } 986 lhs.reset(new unary_operator<'!', std::logical_not<valty>>(l, std::move(subexpr))); 987 break; 988 } 989 case '~': 990 { 991 consume('~'); 992 expression_ptr &&subexpr = parse_expression(); 993 if (!subexpr) 994 { 995 return nullptr; 996 } 997 lhs.reset(new unary_operator<'~', bit_not<valty>>(l, std::move(subexpr))); 998 break; 999 } 1000 } 1001 if (!lhs) 1002 { 1003 return nullptr; 1004 } 1005 return parse_binary_expression(std::move(lhs)); 1006 } 1007 1008 bool 1009 text_input_buffer::consume_integer_expression(unsigned long long &outInt) 1010 { 1011 switch (*(*this)) 1012 { 1013 case '(': 1014 { 1015 expression_ptr e(parse_expression(true)); 1016 if (!e) 1017 { 1018 return false; 1019 } 1020 auto r = (*e)(); 1021 if (r.second) 1022 { 1023 outInt = r.first; 1024 return true; 1025 } 1026 return false; 1027 } 1028 case '0'...'9': 1029 return consume_integer(outInt); 1030 default: 1031 return false; 1032 } 1033 } 1034 1035 bool 1036 input_buffer::consume_hex_byte(uint8_t &outByte) 1037 { 1038 if (!ishexdigit((*this)[0]) && !ishexdigit((*this)[1])) 1039 { 1040 return false; 1041 } 1042 outByte = (digittoint((*this)[0]) << 4) | digittoint((*this)[1]); 1043 cursor += 2; 1044 return true; 1045 } 1046 1047 text_input_buffer& 1048 text_input_buffer::next_token() 1049 { 1050 auto &self = *this; 1051 int start; 1052 do { 1053 start = cursor; 1054 skip_spaces(); 1055 if (finished()) 1056 { 1057 return self; 1058 } 1059 // Parse /* comments 1060 if (*self == '/' && peek() == '*') 1061 { 1062 // eat the start of the comment 1063 ++self; 1064 ++self; 1065 do { 1066 // Find the ending * of */ 1067 while ((*self != '\0') && (*self != '*') && !finished()) 1068 { 1069 ++self; 1070 } 1071 // Eat the * 1072 ++self; 1073 } while ((*self != '\0') && (*self != '/') && !finished()); 1074 // Eat the / 1075 ++self; 1076 } 1077 // Parse // comments 1078 if ((*self == '/' && peek() == '/')) 1079 { 1080 // eat the start of the comment 1081 ++self; 1082 ++self; 1083 // Find the ending of the line 1084 while (*self != '\n' && !finished()) 1085 { 1086 ++self; 1087 } 1088 // Eat the \n 1089 ++self; 1090 } 1091 } while (start != cursor); 1092 return self; 1093 } 1094 1095 void 1096 text_input_buffer::parse_error(const char *msg) 1097 { 1098 if (input_stack.empty()) 1099 { 1100 fprintf(stderr, "Error: %s\n", msg); 1101 return; 1102 } 1103 input_buffer &b = *input_stack.top(); 1104 parse_error(msg, b, b.cursor); 1105 } 1106 void 1107 text_input_buffer::parse_error(const char *msg, 1108 input_buffer &b, 1109 int loc) 1110 { 1111 int line_count = 1; 1112 int line_start = 0; 1113 int line_end = loc; 1114 if (loc < 0 || loc > b.size) 1115 { 1116 return; 1117 } 1118 for (int i=loc ; i>0 ; --i) 1119 { 1120 if (b.buffer[i] == '\n') 1121 { 1122 line_count++; 1123 if (line_start == 0) 1124 { 1125 line_start = i+1; 1126 } 1127 } 1128 } 1129 for (int i=loc+1 ; i<b.size ; ++i) 1130 { 1131 if (b.buffer[i] == '\n') 1132 { 1133 line_end = i; 1134 break; 1135 } 1136 } 1137 fprintf(stderr, "Error at %s:%d:%d: %s\n", b.filename().c_str(), line_count, loc - line_start, msg); 1138 fwrite(&b.buffer[line_start], line_end-line_start, 1, stderr); 1139 putc('\n', stderr); 1140 for (int i=0 ; i<(loc-line_start) ; ++i) 1141 { 1142 char c = (b.buffer[i+line_start] == '\t') ? '\t' : ' '; 1143 putc(c, stderr); 1144 } 1145 putc('^', stderr); 1146 putc('\n', stderr); 1147 } 1148 #ifndef NDEBUG 1149 void 1150 input_buffer::dump() 1151 { 1152 fprintf(stderr, "Current cursor: %d\n", cursor); 1153 fwrite(&buffer[cursor], size-cursor, 1, stderr); 1154 } 1155 #endif 1156 1157 1158 namespace 1159 { 1160 /** 1161 * The source files are ASCII, so we provide a non-locale-aware version of 1162 * isalpha. This is a class so that it can be used with a template function 1163 * for parsing strings. 1164 */ 1165 struct is_alpha 1166 { 1167 static inline bool check(const char c) 1168 { 1169 return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && 1170 (c <= 'Z')); 1171 } 1172 }; 1173 /** 1174 * Check whether a character is in the set allowed for node names. This is a 1175 * class so that it can be used with a template function for parsing strings. 1176 */ 1177 struct is_node_name_character 1178 { 1179 static inline bool check(const char c) 1180 { 1181 switch(c) 1182 { 1183 default: 1184 return false; 1185 case 'a'...'z': case 'A'...'Z': case '0'...'9': 1186 case ',': case '.': case '+': case '-': 1187 case '_': 1188 return true; 1189 } 1190 } 1191 }; 1192 /** 1193 * Check whether a character is in the set allowed for property names. This is 1194 * a class so that it can be used with a template function for parsing strings. 1195 */ 1196 struct is_property_name_character 1197 { 1198 static inline bool check(const char c) 1199 { 1200 switch(c) 1201 { 1202 default: 1203 return false; 1204 case 'a'...'z': case 'A'...'Z': case '0'...'9': 1205 case ',': case '.': case '+': case '-': 1206 case '_': case '#': 1207 return true; 1208 } 1209 } 1210 }; 1211 1212 template<class T> 1213 string parse(text_input_buffer &s) 1214 { 1215 std::vector<char> bytes; 1216 for (char c=*s ; T::check(c) ; c=*(++s)) 1217 { 1218 bytes.push_back(c); 1219 } 1220 return string(bytes.begin(), bytes.end()); 1221 } 1222 1223 } 1224 1225 string 1226 text_input_buffer::parse_node_name() 1227 { 1228 return parse<is_node_name_character>(*this); 1229 } 1230 1231 string 1232 text_input_buffer::parse_property_name() 1233 { 1234 return parse<is_property_name_character>(*this); 1235 } 1236 1237 string 1238 text_input_buffer::parse_node_or_property_name(bool &is_property) 1239 { 1240 if (is_property) 1241 { 1242 return parse_property_name(); 1243 } 1244 std::vector<char> bytes; 1245 for (char c=*(*this) ; is_node_name_character::check(c) ; c=*(++(*this))) 1246 { 1247 bytes.push_back(c); 1248 } 1249 for (char c=*(*this) ; is_property_name_character::check(c) ; c=*(++(*this))) 1250 { 1251 bytes.push_back(c); 1252 is_property = true; 1253 } 1254 return string(bytes.begin(), bytes.end()); 1255 } 1256 1257 string 1258 input_buffer::parse_to(char stop) 1259 { 1260 std::vector<char> bytes; 1261 for (char c=*(*this) ; c != stop ; c=*(++(*this))) 1262 { 1263 bytes.push_back(c); 1264 } 1265 return string(bytes.begin(), bytes.end()); 1266 } 1267 1268 string 1269 text_input_buffer::parse_to(char stop) 1270 { 1271 std::vector<char> bytes; 1272 for (char c=*(*this) ; c != stop ; c=*(++(*this))) 1273 { 1274 if (finished()) 1275 { 1276 break; 1277 } 1278 bytes.push_back(c); 1279 } 1280 return string(bytes.begin(), bytes.end()); 1281 } 1282 1283 char 1284 text_input_buffer::peek() 1285 { 1286 return (*input_stack.top())[1]; 1287 } 1288 1289 std::unique_ptr<input_buffer> 1290 input_buffer::buffer_for_file(const string &path, bool warn) 1291 { 1292 if (path == "-") 1293 { 1294 std::unique_ptr<input_buffer> b(new stream_input_buffer()); 1295 return b; 1296 } 1297 int source = open(path.c_str(), O_RDONLY); 1298 if (source == -1) 1299 { 1300 if (warn) 1301 { 1302 fprintf(stderr, "Unable to open file '%s'. %s\n", path.c_str(), strerror(errno)); 1303 } 1304 return 0; 1305 } 1306 struct stat st; 1307 if (fstat(source, &st) == 0 && S_ISDIR(st.st_mode)) 1308 { 1309 if (warn) 1310 { 1311 fprintf(stderr, "File %s is a directory\n", path.c_str()); 1312 } 1313 close(source); 1314 return 0; 1315 } 1316 std::unique_ptr<input_buffer> b(new mmap_input_buffer(source, string(path))); 1317 close(source); 1318 return b; 1319 } 1320 1321 } // namespace dtc 1322 1323