1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2013 David Chisnall 5 * All rights reserved. 6 * 7 * This software was developed by SRI International and the University of 8 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) 9 * ("CTSRD"), as part of the DARPA CRASH research programme. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * $FreeBSD$ 33 */ 34 35 #include "input_buffer.hh" 36 #include <ctype.h> 37 #include <errno.h> 38 #include <limits.h> 39 #include <stdint.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <functional> 44 #ifndef NDEBUG 45 #include <iostream> 46 #endif 47 48 49 #include <sys/stat.h> 50 #include <sys/mman.h> 51 #include <assert.h> 52 #include <fcntl.h> 53 #include <unistd.h> 54 55 #ifndef MAP_PREFAULT_READ 56 #define MAP_PREFAULT_READ 0 57 #endif 58 59 using std::string; 60 61 namespace 62 { 63 /** 64 * Subclass of input_buffer that mmap()s a file and owns the resulting memory. 65 * When this object is destroyed, the memory is unmapped. 66 */ 67 struct mmap_input_buffer : public dtc::input_buffer 68 { 69 string fn; 70 const string &filename() const override 71 { 72 return fn; 73 } 74 /** 75 * Constructs a new buffer from the file passed in as a file 76 * descriptor. 77 */ 78 mmap_input_buffer(int fd, string &&filename); 79 /** 80 * Unmaps the buffer, if one exists. 81 */ 82 virtual ~mmap_input_buffer(); 83 }; 84 /** 85 * Input buffer read from standard input. This is used for reading device tree 86 * blobs and source from standard input. It reads the entire input into 87 * malloc'd memory, so will be very slow for large inputs. DTS and DTB files 88 * are very rarely more than 10KB though, so this is probably not a problem. 89 */ 90 struct stream_input_buffer : public dtc::input_buffer 91 { 92 const string &filename() const override 93 { 94 static string n = "<standard input>"; 95 return n; 96 } 97 /** 98 * The buffer that will store the data read from the standard input. 99 */ 100 std::vector<char> b; 101 /** 102 * Constructs a new buffer from the standard input. 103 */ 104 stream_input_buffer(); 105 }; 106 107 mmap_input_buffer::mmap_input_buffer(int fd, string &&filename) 108 : input_buffer(0, 0), fn(filename) 109 { 110 struct stat sb; 111 if (fstat(fd, &sb)) 112 { 113 perror("Failed to stat file"); 114 } 115 size = sb.st_size; 116 buffer = (const char*)mmap(0, size, PROT_READ, MAP_PRIVATE | 117 MAP_PREFAULT_READ, fd, 0); 118 if (buffer == MAP_FAILED) 119 { 120 perror("Failed to mmap file"); 121 exit(EXIT_FAILURE); 122 } 123 } 124 125 mmap_input_buffer::~mmap_input_buffer() 126 { 127 if (buffer != 0) 128 { 129 munmap(const_cast<char*>(buffer), size); 130 } 131 } 132 133 stream_input_buffer::stream_input_buffer() : input_buffer(0, 0) 134 { 135 int c; 136 while ((c = fgetc(stdin)) != EOF) 137 { 138 b.push_back(c); 139 } 140 buffer = b.data(); 141 size = b.size(); 142 } 143 144 } // Anonymous namespace 145 146 147 namespace dtc 148 { 149 150 void 151 input_buffer::skip_to(char c) 152 { 153 while ((cursor < size) && (buffer[cursor] != c)) 154 { 155 cursor++; 156 } 157 } 158 159 void 160 text_input_buffer::skip_to(char c) 161 { 162 while (!finished() && (*(*this) != c)) 163 { 164 ++(*this); 165 } 166 } 167 168 void 169 text_input_buffer::skip_spaces() 170 { 171 if (finished()) { return; } 172 char c = *(*this); 173 bool last_nl = false; 174 while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\f') 175 || (c == '\v') || (c == '\r')) 176 { 177 last_nl = ((c == '\n') || (c == '\r')); 178 ++(*this); 179 if (finished()) 180 { 181 c = '\0'; 182 } 183 else 184 { 185 c = *(*this); 186 } 187 } 188 // Skip C preprocessor leftovers 189 if ((c == '#') && ((cursor == 0) || last_nl)) 190 { 191 skip_to('\n'); 192 skip_spaces(); 193 } 194 if (consume("/include/")) 195 { 196 handle_include(); 197 skip_spaces(); 198 } 199 } 200 201 void 202 text_input_buffer::handle_include() 203 { 204 bool reallyInclude = true; 205 if (consume("if ")) 206 { 207 next_token(); 208 string name = parse_property_name(); 209 if (defines.count(name) == 0) 210 { 211 reallyInclude = false; 212 } 213 consume('/'); 214 } 215 next_token(); 216 if (!consume('"')) 217 { 218 parse_error("Expected quoted filename"); 219 return; 220 } 221 auto loc = location(); 222 string file = parse_to('"'); 223 consume('"'); 224 if (!reallyInclude) 225 { 226 return; 227 } 228 string include_file = dir + '/' + file; 229 auto include_buffer = input_buffer::buffer_for_file(include_file, false); 230 if (include_buffer == 0) 231 { 232 for (auto i : include_paths) 233 { 234 include_file = i + '/' + file; 235 include_buffer = input_buffer::buffer_for_file(include_file, false); 236 if (include_buffer != 0) 237 { 238 break; 239 } 240 } 241 } 242 if (depfile) 243 { 244 putc(' ', depfile); 245 fputs(include_file.c_str(), depfile); 246 } 247 if (!include_buffer) 248 { 249 loc.report_error("Unable to locate input file"); 250 return; 251 } 252 input_stack.push(std::move(include_buffer)); 253 } 254 255 bool text_input_buffer::read_binary_file(const std::string &filename, byte_buffer &b) 256 { 257 bool try_include_paths = true; 258 string include_file; 259 if (filename[0] == '/') 260 { 261 include_file = filename; 262 // Don't try include paths if we're given an absolute path. 263 // Failing is better so that we don't accidentally do the wrong thing, 264 // but make it seem like everything is alright. 265 try_include_paths = false; 266 } 267 else 268 { 269 include_file = dir + '/' + filename; 270 } 271 auto include_buffer = input_buffer::buffer_for_file(include_file, false); 272 if (include_buffer == 0 && try_include_paths) 273 { 274 for (auto i : include_paths) 275 { 276 include_file = i + '/' + filename; 277 include_buffer = input_buffer::buffer_for_file(include_file, false); 278 if (include_buffer != 0) 279 { 280 break; 281 } 282 } 283 } 284 if (!include_buffer) 285 { 286 return false; 287 } 288 if (depfile) 289 { 290 putc(' ', depfile); 291 fputs(include_file.c_str(), depfile); 292 } 293 b.insert(b.begin(), include_buffer->begin(), include_buffer->end()); 294 return true; 295 } 296 297 input_buffer 298 input_buffer::buffer_from_offset(int offset, int s) 299 { 300 if (offset < 0) 301 { 302 return input_buffer(); 303 } 304 if (s == 0) 305 { 306 s = size - offset; 307 } 308 if (offset > size) 309 { 310 return input_buffer(); 311 } 312 if (s > (size-offset)) 313 { 314 return input_buffer(); 315 } 316 return input_buffer(&buffer[offset], s); 317 } 318 319 bool 320 input_buffer::consume(const char *str) 321 { 322 int len = strlen(str); 323 if (len > size - cursor) 324 { 325 return false; 326 } 327 else 328 { 329 for (int i=0 ; i<len ; ++i) 330 { 331 if (str[i] != (*this)[i]) 332 { 333 return false; 334 } 335 } 336 cursor += len; 337 return true; 338 } 339 return false; 340 } 341 342 bool 343 input_buffer::consume_integer(unsigned long long &outInt) 344 { 345 // The first character must be a digit. Hex and octal strings 346 // are prefixed by 0 and 0x, respectively. 347 if (!isdigit((*this)[0])) 348 { 349 return false; 350 } 351 char *end= const_cast<char*>(&buffer[size]); 352 errno = 0; 353 outInt = strtoull(&buffer[cursor], &end, 0); 354 if (end == &buffer[cursor] || 355 (outInt == std::numeric_limits<unsigned long long>::max() && 356 errno == ERANGE)) 357 { 358 return false; 359 } 360 cursor = end - buffer; 361 return true; 362 } 363 364 namespace { 365 366 /** 367 * Convenience typedef for the type that we use for all values. 368 */ 369 typedef unsigned long long valty; 370 371 /** 372 * Expression tree currently being parsed. 373 */ 374 struct expression 375 { 376 typedef text_input_buffer::source_location source_location; 377 /** 378 * The type that is returned when computing the result. The boolean value 379 * indicates whether this is a valid expression. 380 * 381 * FIXME: Once we can use C++17, this should be `std::optional`. 382 */ 383 typedef std::pair<valty, bool> result; 384 /** 385 * Evaluate this node, taking into account operator precedence. 386 */ 387 virtual result operator()() = 0; 388 /** 389 * Returns the precedence of this node. Lower values indicate higher 390 * precedence. 391 */ 392 virtual int precedence() = 0; 393 /** 394 * Constructs an expression, storing the location where it was created. 395 */ 396 expression(source_location l) : loc(l) {} 397 virtual ~expression() {} 398 #ifndef NDEBUG 399 /** 400 * Dumps this expression to `std::cerr`, appending a newline if `nl` is 401 * `true`. 402 */ 403 void dump(bool nl=false) 404 { 405 void *ptr = this; 406 if (ptr == nullptr) 407 { 408 std::cerr << "{nullptr}\n"; 409 return; 410 } 411 dump_impl(); 412 if (nl) 413 { 414 std::cerr << '\n'; 415 } 416 } 417 private: 418 /** 419 * Method that sublcasses override to implement the behaviour of `dump()`. 420 */ 421 virtual void dump_impl() = 0; 422 #endif 423 protected: 424 source_location loc; 425 }; 426 427 /** 428 * Expression wrapping a single integer. Leaf nodes in the expression tree. 429 */ 430 class terminal_expr : public expression 431 { 432 /** 433 * The value that this wraps. 434 */ 435 valty val; 436 /** 437 * Evaluate. Trivially returns the value that this class wraps. 438 */ 439 result operator()() override 440 { 441 return {val, true}; 442 } 443 int precedence() override 444 { 445 return 0; 446 } 447 public: 448 /** 449 * Constructor. 450 */ 451 terminal_expr(source_location l, valty v) : expression(l), val(v) {} 452 #ifndef NDEBUG 453 void dump_impl() override { std::cerr << val; } 454 #endif 455 }; 456 457 /** 458 * Parenthetical expression. Exists to make the contents opaque. 459 */ 460 struct paren_expression : public expression 461 { 462 /** 463 * The expression within the parentheses. 464 */ 465 expression_ptr subexpr; 466 /** 467 * Constructor. Takes the child expression as the only argument. 468 */ 469 paren_expression(source_location l, expression_ptr p) : expression(l), 470 subexpr(std::move(p)) {} 471 int precedence() override 472 { 473 return 0; 474 } 475 /** 476 * Evaluate - just forwards to the underlying expression. 477 */ 478 result operator()() override 479 { 480 return (*subexpr)(); 481 } 482 #ifndef NDEBUG 483 void dump_impl() override 484 { 485 std::cerr << " ("; 486 subexpr->dump(); 487 std::cerr << ") "; 488 } 489 #endif 490 }; 491 492 /** 493 * Template class for unary operators. The `OpChar` template parameter is 494 * solely for debugging and makes it easy to print the expression. The `Op` 495 * template parameter is a function object that implements the operator that 496 * this class provides. Most of these are provided by the `<functional>` 497 * header. 498 */ 499 template<char OpChar, class Op> 500 class unary_operator : public expression 501 { 502 /** 503 * The subexpression for this unary operator. 504 */ 505 expression_ptr subexpr; 506 result operator()() override 507 { 508 Op op; 509 result s = (*subexpr)(); 510 if (!s.second) 511 { 512 return s; 513 } 514 return {op(s.first), true}; 515 } 516 /** 517 * All unary operators have the same precedence. They are all evaluated 518 * before binary expressions, but after parentheses. 519 */ 520 int precedence() override 521 { 522 return 3; 523 } 524 public: 525 unary_operator(source_location l, expression_ptr p) : 526 expression(l), subexpr(std::move(p)) {} 527 #ifndef NDEBUG 528 void dump_impl() override 529 { 530 std::cerr << OpChar; 531 subexpr->dump(); 532 } 533 #endif 534 }; 535 536 /** 537 * Abstract base class for binary operators. Allows the tree to be modified 538 * without knowing what the operations actually are. 539 */ 540 struct binary_operator_base : public expression 541 { 542 using expression::expression; 543 /** 544 * The left side of the expression. 545 */ 546 expression_ptr lhs; 547 /** 548 * The right side of the expression. 549 */ 550 expression_ptr rhs; 551 /** 552 * Insert a node somewhere down the path of left children, until it would 553 * be preempting something that should execute first. 554 */ 555 void insert_left(binary_operator_base *new_left) 556 { 557 if (lhs->precedence() < new_left->precedence()) 558 { 559 new_left->rhs = std::move(lhs); 560 lhs.reset(new_left); 561 } 562 else 563 { 564 static_cast<binary_operator_base*>(lhs.get())->insert_left(new_left); 565 } 566 } 567 }; 568 569 /** 570 * Template class for binary operators. The precedence and the operation are 571 * provided as template parameters. 572 */ 573 template<int Precedence, class Op> 574 struct binary_operator : public binary_operator_base 575 { 576 result operator()() override 577 { 578 Op op; 579 result l = (*lhs)(); 580 result r = (*rhs)(); 581 if (!(l.second && r.second)) 582 { 583 return {0, false}; 584 } 585 return {op(l.first, r.first), true}; 586 } 587 int precedence() override 588 { 589 return Precedence; 590 } 591 #ifdef NDEBUG 592 /** 593 * Constructor. Takes the name of the operator as an argument, for 594 * debugging. Only stores it in debug mode. 595 */ 596 binary_operator(source_location l, const char *) : 597 binary_operator_base(l) {} 598 #else 599 const char *opName; 600 binary_operator(source_location l, const char *o) : 601 binary_operator_base(l), opName(o) {} 602 void dump_impl() override 603 { 604 lhs->dump(); 605 std::cerr << opName; 606 rhs->dump(); 607 } 608 #endif 609 }; 610 611 /** 612 * Ternary conditional operators (`cond ? true : false`) are a special case - 613 * there are no other ternary operators. 614 */ 615 class ternary_conditional_operator : public expression 616 { 617 /** 618 * The condition for the clause. 619 */ 620 expression_ptr cond; 621 /** 622 * The expression that this evaluates to if the condition is true. 623 */ 624 expression_ptr lhs; 625 /** 626 * The expression that this evaluates to if the condition is false. 627 */ 628 expression_ptr rhs; 629 result operator()() override 630 { 631 result c = (*cond)(); 632 result l = (*lhs)(); 633 result r = (*rhs)(); 634 if (!(l.second && r.second && c.second)) 635 { 636 return {0, false}; 637 } 638 return c.first ? l : r; 639 } 640 int precedence() override 641 { 642 // The actual precedence of a ternary conditional operator is 15, but 643 // its associativity is the opposite way around to the other operators, 644 // so we fudge it slightly. 645 return 3; 646 } 647 #ifndef NDEBUG 648 void dump_impl() override 649 { 650 cond->dump(); 651 std::cerr << " ? "; 652 lhs->dump(); 653 std::cerr << " : "; 654 rhs->dump(); 655 } 656 #endif 657 public: 658 ternary_conditional_operator(source_location sl, 659 expression_ptr c, 660 expression_ptr l, 661 expression_ptr r) : 662 expression(sl), cond(std::move(c)), lhs(std::move(l)), 663 rhs(std::move(r)) {} 664 }; 665 666 template<typename T> 667 struct lshift 668 { 669 constexpr T operator()(const T &lhs, const T &rhs) const 670 { 671 return lhs << rhs; 672 } 673 }; 674 template<typename T> 675 struct rshift 676 { 677 constexpr T operator()(const T &lhs, const T &rhs) const 678 { 679 return lhs >> rhs; 680 } 681 }; 682 template<typename T> 683 struct unary_plus 684 { 685 constexpr T operator()(const T &val) const 686 { 687 return +val; 688 } 689 }; 690 // TODO: Replace with std::bit_not once we can guarantee C++14 as a baseline. 691 template<typename T> 692 struct bit_not 693 { 694 constexpr T operator()(const T &val) const 695 { 696 return ~val; 697 } 698 }; 699 700 template<typename T> 701 struct divmod : public binary_operator<5, T> 702 { 703 using binary_operator<5, T>::binary_operator; 704 using typename binary_operator_base::result; 705 result operator()() override 706 { 707 result r = (*binary_operator_base::rhs)(); 708 if (r.second && (r.first == 0)) 709 { 710 expression::loc.report_error("Division by zero"); 711 return {0, false}; 712 } 713 return binary_operator<5, T>::operator()(); 714 } 715 }; 716 717 } // anonymous namespace 718 719 720 expression_ptr text_input_buffer::parse_binary_expression(expression_ptr lhs) 721 { 722 next_token(); 723 binary_operator_base *expr = nullptr; 724 char op = *(*this); 725 source_location l = location(); 726 switch (op) 727 { 728 default: 729 return lhs; 730 case '+': 731 expr = new binary_operator<6, std::plus<valty>>(l, "+"); 732 break; 733 case '-': 734 expr = new binary_operator<6, std::minus<valty>>(l, "-"); 735 break; 736 case '%': 737 expr = new divmod<std::modulus<valty>>(l, "/"); 738 break; 739 case '*': 740 expr = new binary_operator<5, std::multiplies<valty>>(l, "*"); 741 break; 742 case '/': 743 expr = new divmod<std::divides<valty>>(l, "/"); 744 break; 745 case '<': 746 switch (peek()) 747 { 748 default: 749 parse_error("Invalid operator"); 750 return nullptr; 751 case ' ': 752 case '(': 753 case '0'...'9': 754 expr = new binary_operator<8, std::less<valty>>(l, "<"); 755 break; 756 case '=': 757 ++(*this); 758 expr = new binary_operator<8, std::less_equal<valty>>(l, "<="); 759 break; 760 case '<': 761 ++(*this); 762 expr = new binary_operator<7, lshift<valty>>(l, "<<"); 763 break; 764 } 765 break; 766 case '>': 767 switch (peek()) 768 { 769 default: 770 parse_error("Invalid operator"); 771 return nullptr; 772 case '(': 773 case ' ': 774 case '0'...'9': 775 expr = new binary_operator<8, std::greater<valty>>(l, ">"); 776 break; 777 case '=': 778 ++(*this); 779 expr = new binary_operator<8, std::greater_equal<valty>>(l, ">="); 780 break; 781 case '>': 782 ++(*this); 783 expr = new binary_operator<7, rshift<valty>>(l, ">>"); 784 break; 785 return lhs; 786 } 787 break; 788 case '=': 789 if (peek() != '=') 790 { 791 parse_error("Invalid operator"); 792 return nullptr; 793 } 794 expr = new binary_operator<9, std::equal_to<valty>>(l, "=="); 795 break; 796 case '!': 797 if (peek() != '=') 798 { 799 parse_error("Invalid operator"); 800 return nullptr; 801 } 802 cursor++; 803 expr = new binary_operator<9, std::not_equal_to<valty>>(l, "!="); 804 break; 805 case '&': 806 if (peek() == '&') 807 { 808 expr = new binary_operator<13, std::logical_and<valty>>(l, "&&"); 809 } 810 else 811 { 812 expr = new binary_operator<10, std::bit_and<valty>>(l, "&"); 813 } 814 break; 815 case '|': 816 if (peek() == '|') 817 { 818 expr = new binary_operator<12, std::logical_or<valty>>(l, "||"); 819 } 820 else 821 { 822 expr = new binary_operator<14, std::bit_or<valty>>(l, "|"); 823 } 824 break; 825 case '?': 826 { 827 consume('?'); 828 expression_ptr true_case = parse_expression(); 829 next_token(); 830 if (!true_case || !consume(':')) 831 { 832 parse_error("Expected : in ternary conditional operator"); 833 return nullptr; 834 } 835 expression_ptr false_case = parse_expression(); 836 if (!false_case) 837 { 838 parse_error("Expected false condition for ternary operator"); 839 return nullptr; 840 } 841 return expression_ptr(new ternary_conditional_operator(l, std::move(lhs), 842 std::move(true_case), std::move(false_case))); 843 } 844 } 845 ++(*this); 846 next_token(); 847 expression_ptr e(expr); 848 expression_ptr rhs(parse_expression()); 849 if (!rhs) 850 { 851 return nullptr; 852 } 853 expr->lhs = std::move(lhs); 854 if (rhs->precedence() < expr->precedence()) 855 { 856 expr->rhs = std::move(rhs); 857 } 858 else 859 { 860 // If we're a normal left-to-right expression, then we need to insert 861 // this as the far-left child node of the rhs expression 862 binary_operator_base *rhs_op = 863 static_cast<binary_operator_base*>(rhs.get()); 864 rhs_op->insert_left(expr); 865 e.release(); 866 return rhs; 867 } 868 return e; 869 } 870 871 expression_ptr text_input_buffer::parse_expression(bool stopAtParen) 872 { 873 next_token(); 874 unsigned long long leftVal; 875 expression_ptr lhs; 876 source_location l = location(); 877 switch (*(*this)) 878 { 879 case '0'...'9': 880 if (!consume_integer(leftVal)) 881 { 882 return nullptr; 883 } 884 lhs.reset(new terminal_expr(l, leftVal)); 885 break; 886 case '(': 887 { 888 consume('('); 889 expression_ptr &&subexpr = parse_expression(); 890 if (!subexpr) 891 { 892 return nullptr; 893 } 894 lhs.reset(new paren_expression(l, std::move(subexpr))); 895 if (!consume(')')) 896 { 897 return nullptr; 898 } 899 if (stopAtParen) 900 { 901 return lhs; 902 } 903 break; 904 } 905 case '+': 906 { 907 consume('+'); 908 expression_ptr &&subexpr = parse_expression(); 909 if (!subexpr) 910 { 911 return nullptr; 912 } 913 lhs.reset(new unary_operator<'+', unary_plus<valty>>(l, std::move(subexpr))); 914 break; 915 } 916 case '-': 917 { 918 consume('-'); 919 expression_ptr &&subexpr = parse_expression(); 920 if (!subexpr) 921 { 922 return nullptr; 923 } 924 lhs.reset(new unary_operator<'-', std::negate<valty>>(l, std::move(subexpr))); 925 break; 926 } 927 case '!': 928 { 929 consume('!'); 930 expression_ptr &&subexpr = parse_expression(); 931 if (!subexpr) 932 { 933 return nullptr; 934 } 935 lhs.reset(new unary_operator<'!', std::logical_not<valty>>(l, std::move(subexpr))); 936 break; 937 } 938 case '~': 939 { 940 consume('~'); 941 expression_ptr &&subexpr = parse_expression(); 942 if (!subexpr) 943 { 944 return nullptr; 945 } 946 lhs.reset(new unary_operator<'~', bit_not<valty>>(l, std::move(subexpr))); 947 break; 948 } 949 } 950 if (!lhs) 951 { 952 return nullptr; 953 } 954 return parse_binary_expression(std::move(lhs)); 955 } 956 957 bool 958 text_input_buffer::consume_integer_expression(unsigned long long &outInt) 959 { 960 switch (*(*this)) 961 { 962 case '(': 963 { 964 expression_ptr e(parse_expression(true)); 965 if (!e) 966 { 967 return false; 968 } 969 auto r = (*e)(); 970 if (r.second) 971 { 972 outInt = r.first; 973 return true; 974 } 975 return false; 976 } 977 case '0'...'9': 978 return consume_integer(outInt); 979 default: 980 return false; 981 } 982 } 983 984 bool 985 input_buffer::consume_hex_byte(uint8_t &outByte) 986 { 987 if (!ishexdigit((*this)[0]) && !ishexdigit((*this)[1])) 988 { 989 return false; 990 } 991 outByte = (digittoint((*this)[0]) << 4) | digittoint((*this)[1]); 992 cursor += 2; 993 return true; 994 } 995 996 text_input_buffer& 997 text_input_buffer::next_token() 998 { 999 auto &self = *this; 1000 int start; 1001 do { 1002 start = cursor; 1003 skip_spaces(); 1004 if (finished()) 1005 { 1006 return self; 1007 } 1008 // Parse /* comments 1009 if (*self == '/' && peek() == '*') 1010 { 1011 // eat the start of the comment 1012 ++self; 1013 ++self; 1014 do { 1015 // Find the ending * of */ 1016 while ((*self != '\0') && (*self != '*') && !finished()) 1017 { 1018 ++self; 1019 } 1020 // Eat the * 1021 ++self; 1022 } while ((*self != '\0') && (*self != '/') && !finished()); 1023 // Eat the / 1024 ++self; 1025 } 1026 // Parse // comments 1027 if ((*self == '/' && peek() == '/')) 1028 { 1029 // eat the start of the comment 1030 ++self; 1031 ++self; 1032 // Find the ending of the line 1033 while (*self != '\n' && !finished()) 1034 { 1035 ++self; 1036 } 1037 // Eat the \n 1038 ++self; 1039 } 1040 } while (start != cursor); 1041 return self; 1042 } 1043 1044 void 1045 text_input_buffer::parse_error(const char *msg) 1046 { 1047 if (input_stack.empty()) 1048 { 1049 fprintf(stderr, "Error: %s\n", msg); 1050 return; 1051 } 1052 input_buffer &b = *input_stack.top(); 1053 parse_error(msg, b, b.cursor); 1054 } 1055 void 1056 text_input_buffer::parse_error(const char *msg, 1057 input_buffer &b, 1058 int loc) 1059 { 1060 int line_count = 1; 1061 int line_start = 0; 1062 int line_end = loc; 1063 if (loc < 0 || loc > b.size) 1064 { 1065 return; 1066 } 1067 for (int i=loc ; i>0 ; --i) 1068 { 1069 if (b.buffer[i] == '\n') 1070 { 1071 line_count++; 1072 if (line_start == 0) 1073 { 1074 line_start = i+1; 1075 } 1076 } 1077 } 1078 for (int i=loc+1 ; i<b.size ; ++i) 1079 { 1080 if (b.buffer[i] == '\n') 1081 { 1082 line_end = i; 1083 break; 1084 } 1085 } 1086 fprintf(stderr, "Error at %s:%d:%d: %s\n", b.filename().c_str(), line_count, loc - line_start, msg); 1087 fwrite(&b.buffer[line_start], line_end-line_start, 1, stderr); 1088 putc('\n', stderr); 1089 for (int i=0 ; i<(loc-line_start) ; ++i) 1090 { 1091 char c = (b.buffer[i+line_start] == '\t') ? '\t' : ' '; 1092 putc(c, stderr); 1093 } 1094 putc('^', stderr); 1095 putc('\n', stderr); 1096 } 1097 #ifndef NDEBUG 1098 void 1099 input_buffer::dump() 1100 { 1101 fprintf(stderr, "Current cursor: %d\n", cursor); 1102 fwrite(&buffer[cursor], size-cursor, 1, stderr); 1103 } 1104 #endif 1105 1106 1107 namespace 1108 { 1109 /** 1110 * The source files are ASCII, so we provide a non-locale-aware version of 1111 * isalpha. This is a class so that it can be used with a template function 1112 * for parsing strings. 1113 */ 1114 struct is_alpha 1115 { 1116 static inline bool check(const char c) 1117 { 1118 return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && 1119 (c <= 'Z')); 1120 } 1121 }; 1122 /** 1123 * Check whether a character is in the set allowed for node names. This is a 1124 * class so that it can be used with a template function for parsing strings. 1125 */ 1126 struct is_node_name_character 1127 { 1128 static inline bool check(const char c) 1129 { 1130 switch(c) 1131 { 1132 default: 1133 return false; 1134 case 'a'...'z': case 'A'...'Z': case '0'...'9': 1135 case ',': case '.': case '+': case '-': 1136 case '_': 1137 return true; 1138 } 1139 } 1140 }; 1141 /** 1142 * Check whether a character is in the set allowed for property names. This is 1143 * a class so that it can be used with a template function for parsing strings. 1144 */ 1145 struct is_property_name_character 1146 { 1147 static inline bool check(const char c) 1148 { 1149 switch(c) 1150 { 1151 default: 1152 return false; 1153 case 'a'...'z': case 'A'...'Z': case '0'...'9': 1154 case ',': case '.': case '+': case '-': 1155 case '_': case '#': 1156 return true; 1157 } 1158 } 1159 }; 1160 1161 template<class T> 1162 string parse(text_input_buffer &s) 1163 { 1164 std::vector<char> bytes; 1165 for (char c=*s ; T::check(c) ; c=*(++s)) 1166 { 1167 bytes.push_back(c); 1168 } 1169 return string(bytes.begin(), bytes.end()); 1170 } 1171 1172 } 1173 1174 string 1175 text_input_buffer::parse_node_name() 1176 { 1177 return parse<is_node_name_character>(*this); 1178 } 1179 1180 string 1181 text_input_buffer::parse_property_name() 1182 { 1183 return parse<is_property_name_character>(*this); 1184 } 1185 1186 string 1187 text_input_buffer::parse_node_or_property_name(bool &is_property) 1188 { 1189 if (is_property) 1190 { 1191 return parse_property_name(); 1192 } 1193 std::vector<char> bytes; 1194 for (char c=*(*this) ; is_node_name_character::check(c) ; c=*(++(*this))) 1195 { 1196 bytes.push_back(c); 1197 } 1198 for (char c=*(*this) ; is_property_name_character::check(c) ; c=*(++(*this))) 1199 { 1200 bytes.push_back(c); 1201 is_property = true; 1202 } 1203 return string(bytes.begin(), bytes.end()); 1204 } 1205 1206 string 1207 input_buffer::parse_to(char stop) 1208 { 1209 std::vector<char> bytes; 1210 for (char c=*(*this) ; c != stop ; c=*(++(*this))) 1211 { 1212 bytes.push_back(c); 1213 } 1214 return string(bytes.begin(), bytes.end()); 1215 } 1216 1217 string 1218 text_input_buffer::parse_to(char stop) 1219 { 1220 std::vector<char> bytes; 1221 for (char c=*(*this) ; c != stop ; c=*(++(*this))) 1222 { 1223 if (finished()) 1224 { 1225 break; 1226 } 1227 bytes.push_back(c); 1228 } 1229 return string(bytes.begin(), bytes.end()); 1230 } 1231 1232 char 1233 text_input_buffer::peek() 1234 { 1235 return (*input_stack.top())[1]; 1236 } 1237 1238 std::unique_ptr<input_buffer> 1239 input_buffer::buffer_for_file(const string &path, bool warn) 1240 { 1241 if (path == "-") 1242 { 1243 std::unique_ptr<input_buffer> b(new stream_input_buffer()); 1244 return b; 1245 } 1246 int source = open(path.c_str(), O_RDONLY); 1247 if (source == -1) 1248 { 1249 if (warn) 1250 { 1251 fprintf(stderr, "Unable to open file '%s'. %s\n", path.c_str(), strerror(errno)); 1252 } 1253 return 0; 1254 } 1255 struct stat st; 1256 if (fstat(source, &st) == 0 && S_ISDIR(st.st_mode)) 1257 { 1258 if (warn) 1259 { 1260 fprintf(stderr, "File %s is a directory\n", path.c_str()); 1261 } 1262 close(source); 1263 return 0; 1264 } 1265 std::unique_ptr<input_buffer> b(new mmap_input_buffer(source, string(path))); 1266 close(source); 1267 return b; 1268 } 1269 1270 } // namespace dtc 1271 1272