1 /*- 2 * Copyright (c) 2013 David Chisnall 3 * All rights reserved. 4 * 5 * This software was developed by SRI International and the University of 6 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) 7 * ("CTSRD"), as part of the DARPA CRASH research programme. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $FreeBSD$ 31 */ 32 33 #include "input_buffer.hh" 34 #include <ctype.h> 35 #include <limits.h> 36 #include <stdint.h> 37 #include <stdio.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <functional> 41 #ifndef NDEBUG 42 #include <iostream> 43 #endif 44 45 46 #include <sys/stat.h> 47 #include <sys/mman.h> 48 #include <assert.h> 49 #include <fcntl.h> 50 #include <unistd.h> 51 52 #ifndef MAP_PREFAULT_READ 53 #define MAP_PREFAULT_READ 0 54 #endif 55 56 using std::string; 57 58 namespace 59 { 60 /** 61 * Subclass of input_buffer that mmap()s a file and owns the resulting memory. 62 * When this object is destroyed, the memory is unmapped. 63 */ 64 struct mmap_input_buffer : public dtc::input_buffer 65 { 66 string fn; 67 const string &filename() const override 68 { 69 return fn; 70 } 71 /** 72 * Constructs a new buffer from the file passed in as a file 73 * descriptor. 74 */ 75 mmap_input_buffer(int fd, string &&filename); 76 /** 77 * Unmaps the buffer, if one exists. 78 */ 79 virtual ~mmap_input_buffer(); 80 }; 81 /** 82 * Input buffer read from standard input. This is used for reading device tree 83 * blobs and source from standard input. It reads the entire input into 84 * malloc'd memory, so will be very slow for large inputs. DTS and DTB files 85 * are very rarely more than 10KB though, so this is probably not a problem. 86 */ 87 struct stream_input_buffer : public dtc::input_buffer 88 { 89 const string &filename() const override 90 { 91 static string n = "<standard input>"; 92 return n; 93 } 94 /** 95 * The buffer that will store the data read from the standard input. 96 */ 97 std::vector<char> b; 98 /** 99 * Constructs a new buffer from the standard input. 100 */ 101 stream_input_buffer(); 102 }; 103 104 mmap_input_buffer::mmap_input_buffer(int fd, std::string &&filename) 105 : input_buffer(0, 0), fn(filename) 106 { 107 struct stat sb; 108 if (fstat(fd, &sb)) 109 { 110 perror("Failed to stat file"); 111 } 112 size = sb.st_size; 113 buffer = (const char*)mmap(0, size, PROT_READ, MAP_PRIVATE | 114 MAP_PREFAULT_READ, fd, 0); 115 if (buffer == MAP_FAILED) 116 { 117 perror("Failed to mmap file"); 118 exit(EXIT_FAILURE); 119 } 120 } 121 122 mmap_input_buffer::~mmap_input_buffer() 123 { 124 if (buffer != 0) 125 { 126 munmap((void*)buffer, size); 127 } 128 } 129 130 stream_input_buffer::stream_input_buffer() : input_buffer(0, 0) 131 { 132 int c; 133 while ((c = fgetc(stdin)) != EOF) 134 { 135 b.push_back(c); 136 } 137 buffer = b.data(); 138 size = b.size(); 139 } 140 141 } // Anonymous namespace 142 143 144 namespace dtc 145 { 146 147 void 148 input_buffer::skip_to(char c) 149 { 150 while ((cursor < size) && (buffer[cursor] != c)) 151 { 152 cursor++; 153 } 154 } 155 156 void 157 text_input_buffer::skip_to(char c) 158 { 159 while (!finished() && (*(*this) != c)) 160 { 161 ++(*this); 162 } 163 } 164 165 void 166 text_input_buffer::skip_spaces() 167 { 168 if (finished()) { return; } 169 char c = *(*this); 170 bool last_nl = false; 171 while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\f') 172 || (c == '\v') || (c == '\r')) 173 { 174 last_nl = ((c == '\n') || (c == '\r')); 175 ++(*this); 176 if (finished()) 177 { 178 c = '\0'; 179 } 180 else 181 { 182 c = *(*this); 183 } 184 } 185 // Skip C preprocessor leftovers 186 if ((c == '#') && ((cursor == 0) || last_nl)) 187 { 188 skip_to('\n'); 189 skip_spaces(); 190 } 191 if (consume("/include/")) 192 { 193 handle_include(); 194 skip_spaces(); 195 } 196 } 197 198 void 199 text_input_buffer::handle_include() 200 { 201 bool reallyInclude = true; 202 if (consume("if ")) 203 { 204 next_token(); 205 string name = parse_property_name(); 206 if (defines.count(name) > 0) 207 { 208 reallyInclude = true; 209 } 210 consume('/'); 211 } 212 next_token(); 213 if (!consume('"')) 214 { 215 parse_error("Expected quoted filename"); 216 return; 217 } 218 string file = parse_to('"'); 219 consume('"'); 220 if (!reallyInclude) 221 { 222 return; 223 } 224 string include_file = dir + '/' + file; 225 auto include_buffer = input_buffer::buffer_for_file(include_file, false); 226 if (include_buffer == 0) 227 { 228 for (auto i : include_paths) 229 { 230 include_file = i + '/' + file; 231 include_buffer = input_buffer::buffer_for_file(include_file, false); 232 if (include_buffer != 0) 233 { 234 break; 235 } 236 } 237 } 238 if (depfile) 239 { 240 putc(' ', depfile); 241 fputs(include_file.c_str(), depfile); 242 } 243 if (!include_buffer) 244 { 245 parse_error("Unable to locate input file"); 246 return; 247 } 248 input_stack.push(std::move(include_buffer)); 249 } 250 251 input_buffer 252 input_buffer::buffer_from_offset(int offset, int s) 253 { 254 if (offset < 0) 255 { 256 return input_buffer(); 257 } 258 if (s == 0) 259 { 260 s = size - offset; 261 } 262 if (offset > size) 263 { 264 return input_buffer(); 265 } 266 if (s > (size-offset)) 267 { 268 return input_buffer(); 269 } 270 return input_buffer(&buffer[offset], s); 271 } 272 273 bool 274 input_buffer::consume(const char *str) 275 { 276 int len = strlen(str); 277 if (len > size - cursor) 278 { 279 return false; 280 } 281 else 282 { 283 for (int i=0 ; i<len ; ++i) 284 { 285 if (str[i] != (*this)[i]) 286 { 287 return false; 288 } 289 } 290 cursor += len; 291 return true; 292 } 293 return false; 294 } 295 296 bool 297 input_buffer::consume_integer(unsigned long long &outInt) 298 { 299 // The first character must be a digit. Hex and octal strings 300 // are prefixed by 0 and 0x, respectively. 301 if (!isdigit((*this)[0])) 302 { 303 return false; 304 } 305 char *end= const_cast<char*>(&buffer[size]); 306 outInt = strtoull(&buffer[cursor], &end, 0); 307 if (end == &buffer[cursor]) 308 { 309 return false; 310 } 311 cursor = end - buffer; 312 return true; 313 } 314 315 namespace { 316 317 /** 318 * Convenience typedef for the type that we use for all values. 319 */ 320 typedef unsigned long long valty; 321 322 /** 323 * Expression tree currently being parsed. 324 */ 325 struct expression 326 { 327 typedef text_input_buffer::source_location source_location; 328 /** 329 * The type that is returned when computing the result. The boolean value 330 * indicates whether this is a valid expression. 331 * 332 * FIXME: Once we can use C++17, this should be `std::optional`. 333 */ 334 typedef std::pair<valty, bool> result; 335 /** 336 * Evaluate this node, taking into account operator precedence. 337 */ 338 virtual result operator()() = 0; 339 /** 340 * Returns the precedence of this node. Lower values indicate higher 341 * precedence. 342 */ 343 virtual int precedence() = 0; 344 /** 345 * Constructs an expression, storing the location where it was created. 346 */ 347 expression(source_location l) : loc(l) {} 348 virtual ~expression() {} 349 #ifndef NDEBUG 350 /** 351 * Dumps this expression to `std::cerr`, appending a newline if `nl` is 352 * `true`. 353 */ 354 void dump(bool nl=false) 355 { 356 void *ptr = this; 357 if (ptr == nullptr) 358 { 359 std::cerr << "{nullptr}\n"; 360 return; 361 } 362 dump_impl(); 363 if (nl) 364 { 365 std::cerr << '\n'; 366 } 367 } 368 private: 369 /** 370 * Method that sublcasses override to implement the behaviour of `dump()`. 371 */ 372 virtual void dump_impl() = 0; 373 #endif 374 protected: 375 source_location loc; 376 }; 377 378 /** 379 * Expression wrapping a single integer. Leaf nodes in the expression tree. 380 */ 381 class terminal_expr : public expression 382 { 383 /** 384 * The value that this wraps. 385 */ 386 valty val; 387 /** 388 * Evaluate. Trivially returns the value that this class wraps. 389 */ 390 result operator()() override 391 { 392 return {val, true}; 393 } 394 int precedence() override 395 { 396 return 0; 397 } 398 public: 399 /** 400 * Constructor. 401 */ 402 terminal_expr(source_location l, valty v) : expression(l), val(v) {} 403 #ifndef NDEBUG 404 void dump_impl() override { std::cerr << val; } 405 #endif 406 }; 407 408 /** 409 * Parenthetical expression. Exists to make the contents opaque. 410 */ 411 struct paren_expression : public expression 412 { 413 /** 414 * The expression within the parentheses. 415 */ 416 expression_ptr subexpr; 417 /** 418 * Constructor. Takes the child expression as the only argument. 419 */ 420 paren_expression(source_location l, expression_ptr p) : expression(l), 421 subexpr(std::move(p)) {} 422 int precedence() override 423 { 424 return 0; 425 } 426 /** 427 * Evaluate - just forwards to the underlying expression. 428 */ 429 result operator()() override 430 { 431 return (*subexpr)(); 432 } 433 #ifndef NDEBUG 434 void dump_impl() override 435 { 436 std::cerr << " ("; 437 subexpr->dump(); 438 std::cerr << ") "; 439 } 440 #endif 441 }; 442 443 /** 444 * Template class for unary operators. The `OpChar` template parameter is 445 * solely for debugging and makes it easy to print the expression. The `Op` 446 * template parameter is a function object that implements the operator that 447 * this class provides. Most of these are provided by the `<functional>` 448 * header. 449 */ 450 template<char OpChar, class Op> 451 class unary_operator : public expression 452 { 453 /** 454 * The subexpression for this unary operator. 455 */ 456 expression_ptr subexpr; 457 result operator()() override 458 { 459 Op op; 460 result s = (*subexpr)(); 461 if (!s.second) 462 { 463 return s; 464 } 465 return {op(s.first), true}; 466 } 467 /** 468 * All unary operators have the same precedence. They are all evaluated 469 * before binary expressions, but after parentheses. 470 */ 471 int precedence() override 472 { 473 return 3; 474 } 475 public: 476 unary_operator(source_location l, expression_ptr p) : 477 expression(l), subexpr(std::move(p)) {} 478 #ifndef NDEBUG 479 void dump_impl() override 480 { 481 std::cerr << OpChar; 482 subexpr->dump(); 483 } 484 #endif 485 }; 486 487 /** 488 * Abstract base class for binary operators. Allows the tree to be modified 489 * without knowing what the operations actually are. 490 */ 491 struct binary_operator_base : public expression 492 { 493 using expression::expression; 494 /** 495 * The left side of the expression. 496 */ 497 expression_ptr lhs; 498 /** 499 * The right side of the expression. 500 */ 501 expression_ptr rhs; 502 /** 503 * Insert a node somewhere down the path of left children, until it would 504 * be preempting something that should execute first. 505 */ 506 void insert_left(binary_operator_base *new_left) 507 { 508 if (lhs->precedence() < new_left->precedence()) 509 { 510 new_left->rhs = std::move(lhs); 511 lhs.reset(new_left); 512 } 513 else 514 { 515 static_cast<binary_operator_base*>(lhs.get())->insert_left(new_left); 516 } 517 } 518 }; 519 520 /** 521 * Template class for binary operators. The precedence and the operation are 522 * provided as template parameters. 523 */ 524 template<int Precedence, class Op> 525 struct binary_operator : public binary_operator_base 526 { 527 result operator()() override 528 { 529 Op op; 530 result l = (*lhs)(); 531 result r = (*rhs)(); 532 if (!(l.second && r.second)) 533 { 534 return {0, false}; 535 } 536 return {op(l.first, r.first), true}; 537 } 538 int precedence() override 539 { 540 return Precedence; 541 } 542 #ifdef NDEBUG 543 /** 544 * Constructor. Takes the name of the operator as an argument, for 545 * debugging. Only stores it in debug mode. 546 */ 547 binary_operator(source_location l, const char *) : expression(l) {} 548 #else 549 const char *opName; 550 binary_operator(source_location l, const char *o) : 551 binary_operator_base(l), opName(o) {} 552 void dump_impl() override 553 { 554 lhs->dump(); 555 std::cerr << opName; 556 rhs->dump(); 557 } 558 #endif 559 }; 560 561 /** 562 * Ternary conditional operators (`cond ? true : false`) are a special case - 563 * there are no other ternary operators. 564 */ 565 class ternary_conditional_operator : public expression 566 { 567 /** 568 * The condition for the clause. 569 */ 570 expression_ptr cond; 571 /** 572 * The expression that this evaluates to if the condition is true. 573 */ 574 expression_ptr lhs; 575 /** 576 * The expression that this evaluates to if the condition is false. 577 */ 578 expression_ptr rhs; 579 result operator()() override 580 { 581 result c = (*cond)(); 582 result l = (*lhs)(); 583 result r = (*rhs)(); 584 if (!(l.second && r.second && c.second)) 585 { 586 return {0, false}; 587 } 588 return c.first ? l : r; 589 } 590 int precedence() override 591 { 592 // The actual precedence of a ternary conditional operator is 15, but 593 // its associativity is the opposite way around to the other operators, 594 // so we fudge it slightly. 595 return 3; 596 } 597 #ifndef NDEBUG 598 void dump_impl() override 599 { 600 cond->dump(); 601 std::cerr << " ? "; 602 lhs->dump(); 603 std::cerr << " : "; 604 rhs->dump(); 605 } 606 #endif 607 public: 608 ternary_conditional_operator(source_location sl, 609 expression_ptr c, 610 expression_ptr l, 611 expression_ptr r) : 612 expression(sl), cond(std::move(c)), lhs(std::move(l)), 613 rhs(std::move(r)) {} 614 }; 615 616 template<typename T> 617 struct lshift 618 { 619 constexpr T operator()(const T &lhs, const T &rhs) const 620 { 621 return lhs << rhs; 622 } 623 }; 624 template<typename T> 625 struct rshift 626 { 627 constexpr T operator()(const T &lhs, const T &rhs) const 628 { 629 return lhs >> rhs; 630 } 631 }; 632 template<typename T> 633 struct unary_plus 634 { 635 constexpr T operator()(const T &val) const 636 { 637 return +val; 638 } 639 }; 640 // TODO: Replace with std::bit_not once we can guarantee C++14 as a baseline. 641 template<typename T> 642 struct bit_not 643 { 644 constexpr T operator()(const T &val) const 645 { 646 return ~val; 647 } 648 }; 649 650 template<typename T> 651 struct divmod : public binary_operator<5, T> 652 { 653 using binary_operator<5, T>::binary_operator; 654 using binary_operator_base::result; 655 result operator()() override 656 { 657 result r = (*binary_operator_base::rhs)(); 658 if (r.second && (r.first == 0)) 659 { 660 expression::loc.report_error("Division by zero"); 661 return {0, false}; 662 } 663 return binary_operator<5, T>::operator()(); 664 } 665 }; 666 667 } // anonymous namespace 668 669 670 expression_ptr text_input_buffer::parse_binary_expression(expression_ptr lhs) 671 { 672 next_token(); 673 binary_operator_base *expr = nullptr; 674 char op = *(*this); 675 source_location l = location(); 676 switch (op) 677 { 678 default: 679 return lhs; 680 case '+': 681 expr = new binary_operator<6, std::plus<valty>>(l, "+"); 682 break; 683 case '-': 684 expr = new binary_operator<6, std::minus<valty>>(l, "-"); 685 break; 686 case '%': 687 expr = new divmod<std::modulus<valty>>(l, "/"); 688 break; 689 case '*': 690 expr = new binary_operator<5, std::multiplies<valty>>(l, "*"); 691 break; 692 case '/': 693 expr = new divmod<std::divides<valty>>(l, "/"); 694 break; 695 case '<': 696 switch (peek()) 697 { 698 default: 699 parse_error("Invalid operator"); 700 return nullptr; 701 case ' ': 702 case '(': 703 case '0'...'9': 704 expr = new binary_operator<8, std::less<valty>>(l, "<"); 705 break; 706 case '=': 707 ++(*this); 708 expr = new binary_operator<8, std::less_equal<valty>>(l, "<="); 709 break; 710 case '<': 711 ++(*this); 712 expr = new binary_operator<7, lshift<valty>>(l, "<<"); 713 break; 714 } 715 break; 716 case '>': 717 switch (peek()) 718 { 719 default: 720 parse_error("Invalid operator"); 721 return nullptr; 722 case '(': 723 case ' ': 724 case '0'...'9': 725 expr = new binary_operator<8, std::greater<valty>>(l, ">"); 726 break; 727 case '=': 728 ++(*this); 729 expr = new binary_operator<8, std::greater_equal<valty>>(l, ">="); 730 break; 731 case '>': 732 ++(*this); 733 expr = new binary_operator<7, rshift<valty>>(l, ">>"); 734 break; 735 return lhs; 736 } 737 break; 738 case '=': 739 if (peek() != '=') 740 { 741 parse_error("Invalid operator"); 742 return nullptr; 743 } 744 expr = new binary_operator<9, std::equal_to<valty>>(l, "=="); 745 break; 746 case '!': 747 if (peek() != '=') 748 { 749 parse_error("Invalid operator"); 750 return nullptr; 751 } 752 cursor++; 753 expr = new binary_operator<9, std::not_equal_to<valty>>(l, "!="); 754 break; 755 case '&': 756 if (peek() == '&') 757 { 758 expr = new binary_operator<13, std::logical_and<valty>>(l, "&&"); 759 } 760 else 761 { 762 expr = new binary_operator<10, std::bit_and<valty>>(l, "&"); 763 } 764 break; 765 case '|': 766 if (peek() == '|') 767 { 768 expr = new binary_operator<12, std::logical_or<valty>>(l, "||"); 769 } 770 else 771 { 772 expr = new binary_operator<14, std::bit_or<valty>>(l, "|"); 773 } 774 break; 775 case '?': 776 { 777 consume('?'); 778 expression_ptr true_case = parse_expression(); 779 next_token(); 780 if (!true_case || !consume(':')) 781 { 782 parse_error("Expected : in ternary conditional operator"); 783 return nullptr; 784 } 785 expression_ptr false_case = parse_expression(); 786 if (!false_case) 787 { 788 parse_error("Expected false condition for ternary operator"); 789 return nullptr; 790 } 791 return expression_ptr(new ternary_conditional_operator(l, std::move(lhs), 792 std::move(true_case), std::move(false_case))); 793 } 794 } 795 ++(*this); 796 next_token(); 797 expression_ptr e(expr); 798 expression_ptr rhs(parse_expression()); 799 if (!rhs) 800 { 801 return nullptr; 802 } 803 expr->lhs = std::move(lhs); 804 if (rhs->precedence() < expr->precedence()) 805 { 806 expr->rhs = std::move(rhs); 807 } 808 else 809 { 810 // If we're a normal left-to-right expression, then we need to insert 811 // this as the far-left child node of the rhs expression 812 binary_operator_base *rhs_op = 813 static_cast<binary_operator_base*>(rhs.get()); 814 rhs_op->insert_left(expr); 815 e.release(); 816 return rhs; 817 } 818 return e; 819 } 820 821 expression_ptr text_input_buffer::parse_expression(bool stopAtParen) 822 { 823 next_token(); 824 unsigned long long leftVal; 825 expression_ptr lhs; 826 source_location l = location(); 827 switch (*(*this)) 828 { 829 case '0'...'9': 830 if (!consume_integer(leftVal)) 831 { 832 return nullptr; 833 } 834 lhs.reset(new terminal_expr(l, leftVal)); 835 break; 836 case '(': 837 { 838 consume('('); 839 expression_ptr &&subexpr = parse_expression(); 840 if (!subexpr) 841 { 842 return nullptr; 843 } 844 lhs.reset(new paren_expression(l, std::move(subexpr))); 845 if (!consume(')')) 846 { 847 return nullptr; 848 } 849 if (stopAtParen) 850 { 851 return lhs; 852 } 853 break; 854 } 855 case '+': 856 { 857 consume('+'); 858 expression_ptr &&subexpr = parse_expression(); 859 if (!subexpr) 860 { 861 return nullptr; 862 } 863 lhs.reset(new unary_operator<'+', unary_plus<valty>>(l, std::move(subexpr))); 864 break; 865 } 866 case '-': 867 { 868 consume('-'); 869 expression_ptr &&subexpr = parse_expression(); 870 if (!subexpr) 871 { 872 return nullptr; 873 } 874 lhs.reset(new unary_operator<'-', std::negate<valty>>(l, std::move(subexpr))); 875 break; 876 } 877 case '!': 878 { 879 consume('!'); 880 expression_ptr &&subexpr = parse_expression(); 881 if (!subexpr) 882 { 883 return nullptr; 884 } 885 lhs.reset(new unary_operator<'!', std::logical_not<valty>>(l, std::move(subexpr))); 886 break; 887 } 888 case '~': 889 { 890 consume('~'); 891 expression_ptr &&subexpr = parse_expression(); 892 if (!subexpr) 893 { 894 return nullptr; 895 } 896 lhs.reset(new unary_operator<'~', bit_not<valty>>(l, std::move(subexpr))); 897 break; 898 } 899 } 900 if (!lhs) 901 { 902 return nullptr; 903 } 904 return parse_binary_expression(std::move(lhs)); 905 } 906 907 bool 908 text_input_buffer::consume_integer_expression(unsigned long long &outInt) 909 { 910 switch (*(*this)) 911 { 912 case '(': 913 { 914 expression_ptr e(parse_expression(true)); 915 if (!e) 916 { 917 return false; 918 } 919 auto r = (*e)(); 920 if (r.second) 921 { 922 outInt = r.first; 923 return true; 924 } 925 return false; 926 } 927 case '0'...'9': 928 return consume_integer(outInt); 929 default: 930 return false; 931 } 932 } 933 934 bool 935 input_buffer::consume_hex_byte(uint8_t &outByte) 936 { 937 if (!ishexdigit((*this)[0]) && !ishexdigit((*this)[1])) 938 { 939 return false; 940 } 941 outByte = (digittoint((*this)[0]) << 4) | digittoint((*this)[1]); 942 cursor += 2; 943 return true; 944 } 945 946 text_input_buffer& 947 text_input_buffer::next_token() 948 { 949 auto &self = *this; 950 int start; 951 do { 952 start = cursor; 953 skip_spaces(); 954 if (finished()) 955 { 956 return self; 957 } 958 // Parse /* comments 959 if (*self == '/' && peek() == '*') 960 { 961 // eat the start of the comment 962 ++self; 963 ++self; 964 do { 965 // Find the ending * of */ 966 while ((*self != '\0') && (*self != '*') && !finished()) 967 { 968 ++self; 969 } 970 // Eat the * 971 ++self; 972 } while ((*self != '\0') && (*self != '/') && !finished()); 973 // Eat the / 974 ++self; 975 } 976 // Parse // comments 977 if ((*self == '/' && peek() == '/')) 978 { 979 // eat the start of the comment 980 ++self; 981 ++self; 982 // Find the ending of the line 983 while (*self != '\n' && !finished()) 984 { 985 ++self; 986 } 987 // Eat the \n 988 ++self; 989 } 990 } while (start != cursor); 991 return self; 992 } 993 994 void 995 text_input_buffer::parse_error(const char *msg) 996 { 997 if (input_stack.empty()) 998 { 999 fprintf(stderr, "Error: %s\n", msg); 1000 return; 1001 } 1002 input_buffer &b = *input_stack.top(); 1003 parse_error(msg, b, b.cursor); 1004 } 1005 void 1006 text_input_buffer::parse_error(const char *msg, 1007 input_buffer &b, 1008 int loc) 1009 { 1010 int line_count = 1; 1011 int line_start = 0; 1012 int line_end = loc; 1013 if (loc < 0 || loc > b.size) 1014 { 1015 return; 1016 } 1017 for (int i=loc ; i>0 ; --i) 1018 { 1019 if (b.buffer[i] == '\n') 1020 { 1021 line_count++; 1022 if (line_start == 0) 1023 { 1024 line_start = i+1; 1025 } 1026 } 1027 } 1028 for (int i=loc+1 ; i<b.size ; ++i) 1029 { 1030 if (b.buffer[i] == '\n') 1031 { 1032 line_end = i; 1033 break; 1034 } 1035 } 1036 fprintf(stderr, "Error at %s:%d:%d: %s\n", b.filename().c_str(), line_count, loc - line_start, msg); 1037 fwrite(&b.buffer[line_start], line_end-line_start, 1, stderr); 1038 putc('\n', stderr); 1039 for (int i=0 ; i<(loc-line_start) ; ++i) 1040 { 1041 char c = (b.buffer[i+line_start] == '\t') ? '\t' : ' '; 1042 putc(c, stderr); 1043 } 1044 putc('^', stderr); 1045 putc('\n', stderr); 1046 } 1047 #ifndef NDEBUG 1048 void 1049 input_buffer::dump() 1050 { 1051 fprintf(stderr, "Current cursor: %d\n", cursor); 1052 fwrite(&buffer[cursor], size-cursor, 1, stderr); 1053 } 1054 #endif 1055 1056 1057 namespace 1058 { 1059 /** 1060 * The source files are ASCII, so we provide a non-locale-aware version of 1061 * isalpha. This is a class so that it can be used with a template function 1062 * for parsing strings. 1063 */ 1064 struct is_alpha 1065 { 1066 static inline bool check(const char c) 1067 { 1068 return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && 1069 (c <= 'Z')); 1070 } 1071 }; 1072 /** 1073 * Check whether a character is in the set allowed for node names. This is a 1074 * class so that it can be used with a template function for parsing strings. 1075 */ 1076 struct is_node_name_character 1077 { 1078 static inline bool check(const char c) 1079 { 1080 switch(c) 1081 { 1082 default: 1083 return false; 1084 case 'a'...'z': case 'A'...'Z': case '0'...'9': 1085 case ',': case '.': case '+': case '-': 1086 case '_': 1087 return true; 1088 } 1089 } 1090 }; 1091 /** 1092 * Check whether a character is in the set allowed for property names. This is 1093 * a class so that it can be used with a template function for parsing strings. 1094 */ 1095 struct is_property_name_character 1096 { 1097 static inline bool check(const char c) 1098 { 1099 switch(c) 1100 { 1101 default: 1102 return false; 1103 case 'a'...'z': case 'A'...'Z': case '0'...'9': 1104 case ',': case '.': case '+': case '-': 1105 case '_': case '#': 1106 return true; 1107 } 1108 } 1109 }; 1110 1111 template<class T> 1112 string parse(text_input_buffer &s) 1113 { 1114 std::vector<char> bytes; 1115 for (char c=*s ; T::check(c) ; c=*(++s)) 1116 { 1117 bytes.push_back(c); 1118 } 1119 return string(bytes.begin(), bytes.end()); 1120 } 1121 1122 } 1123 1124 string 1125 text_input_buffer::parse_node_name() 1126 { 1127 return parse<is_node_name_character>(*this); 1128 } 1129 1130 string 1131 text_input_buffer::parse_property_name() 1132 { 1133 return parse<is_property_name_character>(*this); 1134 } 1135 1136 string 1137 text_input_buffer::parse_node_or_property_name(bool &is_property) 1138 { 1139 if (is_property) 1140 { 1141 return parse_property_name(); 1142 } 1143 std::vector<char> bytes; 1144 for (char c=*(*this) ; is_node_name_character::check(c) ; c=*(++(*this))) 1145 { 1146 bytes.push_back(c); 1147 } 1148 for (char c=*(*this) ; is_property_name_character::check(c) ; c=*(++(*this))) 1149 { 1150 bytes.push_back(c); 1151 is_property = true; 1152 } 1153 return string(bytes.begin(), bytes.end()); 1154 } 1155 1156 string 1157 input_buffer::parse_to(char stop) 1158 { 1159 std::vector<char> bytes; 1160 for (char c=*(*this) ; c != stop ; c=*(++(*this))) 1161 { 1162 bytes.push_back(c); 1163 } 1164 return string(bytes.begin(), bytes.end()); 1165 } 1166 1167 string 1168 text_input_buffer::parse_to(char stop) 1169 { 1170 std::vector<char> bytes; 1171 for (char c=*(*this) ; c != stop ; c=*(++(*this))) 1172 { 1173 if (finished()) 1174 { 1175 break; 1176 } 1177 bytes.push_back(c); 1178 } 1179 return string(bytes.begin(), bytes.end()); 1180 } 1181 1182 char 1183 text_input_buffer::peek() 1184 { 1185 return (*input_stack.top())[1]; 1186 } 1187 1188 std::unique_ptr<input_buffer> 1189 input_buffer::buffer_for_file(const string &path, bool warn) 1190 { 1191 if (path == "-") 1192 { 1193 std::unique_ptr<input_buffer> b(new stream_input_buffer()); 1194 return b; 1195 } 1196 int source = open(path.c_str(), O_RDONLY); 1197 if (source == -1) 1198 { 1199 if (warn) 1200 { 1201 fprintf(stderr, "Unable to open file '%s'. %s\n", path.c_str(), strerror(errno)); 1202 } 1203 return 0; 1204 } 1205 struct stat st; 1206 if (fstat(source, &st) == 0 && S_ISDIR(st.st_mode)) 1207 { 1208 if (warn) 1209 { 1210 fprintf(stderr, "File %s is a directory\n", path.c_str()); 1211 } 1212 close(source); 1213 return 0; 1214 } 1215 std::unique_ptr<input_buffer> b(new mmap_input_buffer(source, std::string(path))); 1216 close(source); 1217 return b; 1218 } 1219 1220 } // namespace dtc 1221 1222