1 /*- 2 * Copyright (c) 2013 David Chisnall 3 * All rights reserved. 4 * 5 * This software was developed by SRI International and the University of 6 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) 7 * ("CTSRD"), as part of the DARPA CRASH research programme. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $FreeBSD$ 31 */ 32 33 #include "input_buffer.hh" 34 #include <ctype.h> 35 #include <errno.h> 36 #include <limits.h> 37 #include <stdint.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <functional> 42 #ifndef NDEBUG 43 #include <iostream> 44 #endif 45 46 47 #include <sys/stat.h> 48 #include <sys/mman.h> 49 #include <assert.h> 50 #include <fcntl.h> 51 #include <unistd.h> 52 53 #ifndef MAP_PREFAULT_READ 54 #define MAP_PREFAULT_READ 0 55 #endif 56 57 using std::string; 58 59 namespace 60 { 61 /** 62 * Subclass of input_buffer that mmap()s a file and owns the resulting memory. 63 * When this object is destroyed, the memory is unmapped. 64 */ 65 struct mmap_input_buffer : public dtc::input_buffer 66 { 67 string fn; 68 const string &filename() const override 69 { 70 return fn; 71 } 72 /** 73 * Constructs a new buffer from the file passed in as a file 74 * descriptor. 75 */ 76 mmap_input_buffer(int fd, string &&filename); 77 /** 78 * Unmaps the buffer, if one exists. 79 */ 80 virtual ~mmap_input_buffer(); 81 }; 82 /** 83 * Input buffer read from standard input. This is used for reading device tree 84 * blobs and source from standard input. It reads the entire input into 85 * malloc'd memory, so will be very slow for large inputs. DTS and DTB files 86 * are very rarely more than 10KB though, so this is probably not a problem. 87 */ 88 struct stream_input_buffer : public dtc::input_buffer 89 { 90 const string &filename() const override 91 { 92 static string n = "<standard input>"; 93 return n; 94 } 95 /** 96 * The buffer that will store the data read from the standard input. 97 */ 98 std::vector<char> b; 99 /** 100 * Constructs a new buffer from the standard input. 101 */ 102 stream_input_buffer(); 103 }; 104 105 mmap_input_buffer::mmap_input_buffer(int fd, std::string &&filename) 106 : input_buffer(0, 0), fn(filename) 107 { 108 struct stat sb; 109 if (fstat(fd, &sb)) 110 { 111 perror("Failed to stat file"); 112 } 113 size = sb.st_size; 114 buffer = (const char*)mmap(0, size, PROT_READ, MAP_PRIVATE | 115 MAP_PREFAULT_READ, fd, 0); 116 if (buffer == MAP_FAILED) 117 { 118 perror("Failed to mmap file"); 119 exit(EXIT_FAILURE); 120 } 121 } 122 123 mmap_input_buffer::~mmap_input_buffer() 124 { 125 if (buffer != 0) 126 { 127 munmap((void*)buffer, size); 128 } 129 } 130 131 stream_input_buffer::stream_input_buffer() : input_buffer(0, 0) 132 { 133 int c; 134 while ((c = fgetc(stdin)) != EOF) 135 { 136 b.push_back(c); 137 } 138 buffer = b.data(); 139 size = b.size(); 140 } 141 142 } // Anonymous namespace 143 144 145 namespace dtc 146 { 147 148 void 149 input_buffer::skip_to(char c) 150 { 151 while ((cursor < size) && (buffer[cursor] != c)) 152 { 153 cursor++; 154 } 155 } 156 157 void 158 text_input_buffer::skip_to(char c) 159 { 160 while (!finished() && (*(*this) != c)) 161 { 162 ++(*this); 163 } 164 } 165 166 void 167 text_input_buffer::skip_spaces() 168 { 169 if (finished()) { return; } 170 char c = *(*this); 171 bool last_nl = false; 172 while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\f') 173 || (c == '\v') || (c == '\r')) 174 { 175 last_nl = ((c == '\n') || (c == '\r')); 176 ++(*this); 177 if (finished()) 178 { 179 c = '\0'; 180 } 181 else 182 { 183 c = *(*this); 184 } 185 } 186 // Skip C preprocessor leftovers 187 if ((c == '#') && ((cursor == 0) || last_nl)) 188 { 189 skip_to('\n'); 190 skip_spaces(); 191 } 192 if (consume("/include/")) 193 { 194 handle_include(); 195 skip_spaces(); 196 } 197 } 198 199 void 200 text_input_buffer::handle_include() 201 { 202 bool reallyInclude = true; 203 if (consume("if ")) 204 { 205 next_token(); 206 string name = parse_property_name(); 207 if (defines.count(name) > 0) 208 { 209 reallyInclude = true; 210 } 211 consume('/'); 212 } 213 next_token(); 214 if (!consume('"')) 215 { 216 parse_error("Expected quoted filename"); 217 return; 218 } 219 string file = parse_to('"'); 220 consume('"'); 221 if (!reallyInclude) 222 { 223 return; 224 } 225 string include_file = dir + '/' + file; 226 auto include_buffer = input_buffer::buffer_for_file(include_file, false); 227 if (include_buffer == 0) 228 { 229 for (auto i : include_paths) 230 { 231 include_file = i + '/' + file; 232 include_buffer = input_buffer::buffer_for_file(include_file, false); 233 if (include_buffer != 0) 234 { 235 break; 236 } 237 } 238 } 239 if (depfile) 240 { 241 putc(' ', depfile); 242 fputs(include_file.c_str(), depfile); 243 } 244 if (!include_buffer) 245 { 246 parse_error("Unable to locate input file"); 247 return; 248 } 249 input_stack.push(std::move(include_buffer)); 250 } 251 252 input_buffer 253 input_buffer::buffer_from_offset(int offset, int s) 254 { 255 if (offset < 0) 256 { 257 return input_buffer(); 258 } 259 if (s == 0) 260 { 261 s = size - offset; 262 } 263 if (offset > size) 264 { 265 return input_buffer(); 266 } 267 if (s > (size-offset)) 268 { 269 return input_buffer(); 270 } 271 return input_buffer(&buffer[offset], s); 272 } 273 274 bool 275 input_buffer::consume(const char *str) 276 { 277 int len = strlen(str); 278 if (len > size - cursor) 279 { 280 return false; 281 } 282 else 283 { 284 for (int i=0 ; i<len ; ++i) 285 { 286 if (str[i] != (*this)[i]) 287 { 288 return false; 289 } 290 } 291 cursor += len; 292 return true; 293 } 294 return false; 295 } 296 297 bool 298 input_buffer::consume_integer(unsigned long long &outInt) 299 { 300 // The first character must be a digit. Hex and octal strings 301 // are prefixed by 0 and 0x, respectively. 302 if (!isdigit((*this)[0])) 303 { 304 return false; 305 } 306 char *end= const_cast<char*>(&buffer[size]); 307 outInt = strtoull(&buffer[cursor], &end, 0); 308 if (end == &buffer[cursor]) 309 { 310 return false; 311 } 312 cursor = end - buffer; 313 return true; 314 } 315 316 namespace { 317 318 /** 319 * Convenience typedef for the type that we use for all values. 320 */ 321 typedef unsigned long long valty; 322 323 /** 324 * Expression tree currently being parsed. 325 */ 326 struct expression 327 { 328 typedef text_input_buffer::source_location source_location; 329 /** 330 * The type that is returned when computing the result. The boolean value 331 * indicates whether this is a valid expression. 332 * 333 * FIXME: Once we can use C++17, this should be `std::optional`. 334 */ 335 typedef std::pair<valty, bool> result; 336 /** 337 * Evaluate this node, taking into account operator precedence. 338 */ 339 virtual result operator()() = 0; 340 /** 341 * Returns the precedence of this node. Lower values indicate higher 342 * precedence. 343 */ 344 virtual int precedence() = 0; 345 /** 346 * Constructs an expression, storing the location where it was created. 347 */ 348 expression(source_location l) : loc(l) {} 349 virtual ~expression() {} 350 #ifndef NDEBUG 351 /** 352 * Dumps this expression to `std::cerr`, appending a newline if `nl` is 353 * `true`. 354 */ 355 void dump(bool nl=false) 356 { 357 void *ptr = this; 358 if (ptr == nullptr) 359 { 360 std::cerr << "{nullptr}\n"; 361 return; 362 } 363 dump_impl(); 364 if (nl) 365 { 366 std::cerr << '\n'; 367 } 368 } 369 private: 370 /** 371 * Method that sublcasses override to implement the behaviour of `dump()`. 372 */ 373 virtual void dump_impl() = 0; 374 #endif 375 protected: 376 source_location loc; 377 }; 378 379 /** 380 * Expression wrapping a single integer. Leaf nodes in the expression tree. 381 */ 382 class terminal_expr : public expression 383 { 384 /** 385 * The value that this wraps. 386 */ 387 valty val; 388 /** 389 * Evaluate. Trivially returns the value that this class wraps. 390 */ 391 result operator()() override 392 { 393 return {val, true}; 394 } 395 int precedence() override 396 { 397 return 0; 398 } 399 public: 400 /** 401 * Constructor. 402 */ 403 terminal_expr(source_location l, valty v) : expression(l), val(v) {} 404 #ifndef NDEBUG 405 void dump_impl() override { std::cerr << val; } 406 #endif 407 }; 408 409 /** 410 * Parenthetical expression. Exists to make the contents opaque. 411 */ 412 struct paren_expression : public expression 413 { 414 /** 415 * The expression within the parentheses. 416 */ 417 expression_ptr subexpr; 418 /** 419 * Constructor. Takes the child expression as the only argument. 420 */ 421 paren_expression(source_location l, expression_ptr p) : expression(l), 422 subexpr(std::move(p)) {} 423 int precedence() override 424 { 425 return 0; 426 } 427 /** 428 * Evaluate - just forwards to the underlying expression. 429 */ 430 result operator()() override 431 { 432 return (*subexpr)(); 433 } 434 #ifndef NDEBUG 435 void dump_impl() override 436 { 437 std::cerr << " ("; 438 subexpr->dump(); 439 std::cerr << ") "; 440 } 441 #endif 442 }; 443 444 /** 445 * Template class for unary operators. The `OpChar` template parameter is 446 * solely for debugging and makes it easy to print the expression. The `Op` 447 * template parameter is a function object that implements the operator that 448 * this class provides. Most of these are provided by the `<functional>` 449 * header. 450 */ 451 template<char OpChar, class Op> 452 class unary_operator : public expression 453 { 454 /** 455 * The subexpression for this unary operator. 456 */ 457 expression_ptr subexpr; 458 result operator()() override 459 { 460 Op op; 461 result s = (*subexpr)(); 462 if (!s.second) 463 { 464 return s; 465 } 466 return {op(s.first), true}; 467 } 468 /** 469 * All unary operators have the same precedence. They are all evaluated 470 * before binary expressions, but after parentheses. 471 */ 472 int precedence() override 473 { 474 return 3; 475 } 476 public: 477 unary_operator(source_location l, expression_ptr p) : 478 expression(l), subexpr(std::move(p)) {} 479 #ifndef NDEBUG 480 void dump_impl() override 481 { 482 std::cerr << OpChar; 483 subexpr->dump(); 484 } 485 #endif 486 }; 487 488 /** 489 * Abstract base class for binary operators. Allows the tree to be modified 490 * without knowing what the operations actually are. 491 */ 492 struct binary_operator_base : public expression 493 { 494 using expression::expression; 495 /** 496 * The left side of the expression. 497 */ 498 expression_ptr lhs; 499 /** 500 * The right side of the expression. 501 */ 502 expression_ptr rhs; 503 /** 504 * Insert a node somewhere down the path of left children, until it would 505 * be preempting something that should execute first. 506 */ 507 void insert_left(binary_operator_base *new_left) 508 { 509 if (lhs->precedence() < new_left->precedence()) 510 { 511 new_left->rhs = std::move(lhs); 512 lhs.reset(new_left); 513 } 514 else 515 { 516 static_cast<binary_operator_base*>(lhs.get())->insert_left(new_left); 517 } 518 } 519 }; 520 521 /** 522 * Template class for binary operators. The precedence and the operation are 523 * provided as template parameters. 524 */ 525 template<int Precedence, class Op> 526 struct binary_operator : public binary_operator_base 527 { 528 result operator()() override 529 { 530 Op op; 531 result l = (*lhs)(); 532 result r = (*rhs)(); 533 if (!(l.second && r.second)) 534 { 535 return {0, false}; 536 } 537 return {op(l.first, r.first), true}; 538 } 539 int precedence() override 540 { 541 return Precedence; 542 } 543 #ifdef NDEBUG 544 /** 545 * Constructor. Takes the name of the operator as an argument, for 546 * debugging. Only stores it in debug mode. 547 */ 548 binary_operator(source_location l, const char *) : 549 binary_operator_base(l) {} 550 #else 551 const char *opName; 552 binary_operator(source_location l, const char *o) : 553 binary_operator_base(l), opName(o) {} 554 void dump_impl() override 555 { 556 lhs->dump(); 557 std::cerr << opName; 558 rhs->dump(); 559 } 560 #endif 561 }; 562 563 /** 564 * Ternary conditional operators (`cond ? true : false`) are a special case - 565 * there are no other ternary operators. 566 */ 567 class ternary_conditional_operator : public expression 568 { 569 /** 570 * The condition for the clause. 571 */ 572 expression_ptr cond; 573 /** 574 * The expression that this evaluates to if the condition is true. 575 */ 576 expression_ptr lhs; 577 /** 578 * The expression that this evaluates to if the condition is false. 579 */ 580 expression_ptr rhs; 581 result operator()() override 582 { 583 result c = (*cond)(); 584 result l = (*lhs)(); 585 result r = (*rhs)(); 586 if (!(l.second && r.second && c.second)) 587 { 588 return {0, false}; 589 } 590 return c.first ? l : r; 591 } 592 int precedence() override 593 { 594 // The actual precedence of a ternary conditional operator is 15, but 595 // its associativity is the opposite way around to the other operators, 596 // so we fudge it slightly. 597 return 3; 598 } 599 #ifndef NDEBUG 600 void dump_impl() override 601 { 602 cond->dump(); 603 std::cerr << " ? "; 604 lhs->dump(); 605 std::cerr << " : "; 606 rhs->dump(); 607 } 608 #endif 609 public: 610 ternary_conditional_operator(source_location sl, 611 expression_ptr c, 612 expression_ptr l, 613 expression_ptr r) : 614 expression(sl), cond(std::move(c)), lhs(std::move(l)), 615 rhs(std::move(r)) {} 616 }; 617 618 template<typename T> 619 struct lshift 620 { 621 constexpr T operator()(const T &lhs, const T &rhs) const 622 { 623 return lhs << rhs; 624 } 625 }; 626 template<typename T> 627 struct rshift 628 { 629 constexpr T operator()(const T &lhs, const T &rhs) const 630 { 631 return lhs >> rhs; 632 } 633 }; 634 template<typename T> 635 struct unary_plus 636 { 637 constexpr T operator()(const T &val) const 638 { 639 return +val; 640 } 641 }; 642 // TODO: Replace with std::bit_not once we can guarantee C++14 as a baseline. 643 template<typename T> 644 struct bit_not 645 { 646 constexpr T operator()(const T &val) const 647 { 648 return ~val; 649 } 650 }; 651 652 template<typename T> 653 struct divmod : public binary_operator<5, T> 654 { 655 using binary_operator<5, T>::binary_operator; 656 using binary_operator_base::result; 657 result operator()() override 658 { 659 result r = (*binary_operator_base::rhs)(); 660 if (r.second && (r.first == 0)) 661 { 662 expression::loc.report_error("Division by zero"); 663 return {0, false}; 664 } 665 return binary_operator<5, T>::operator()(); 666 } 667 }; 668 669 } // anonymous namespace 670 671 672 expression_ptr text_input_buffer::parse_binary_expression(expression_ptr lhs) 673 { 674 next_token(); 675 binary_operator_base *expr = nullptr; 676 char op = *(*this); 677 source_location l = location(); 678 switch (op) 679 { 680 default: 681 return lhs; 682 case '+': 683 expr = new binary_operator<6, std::plus<valty>>(l, "+"); 684 break; 685 case '-': 686 expr = new binary_operator<6, std::minus<valty>>(l, "-"); 687 break; 688 case '%': 689 expr = new divmod<std::modulus<valty>>(l, "/"); 690 break; 691 case '*': 692 expr = new binary_operator<5, std::multiplies<valty>>(l, "*"); 693 break; 694 case '/': 695 expr = new divmod<std::divides<valty>>(l, "/"); 696 break; 697 case '<': 698 switch (peek()) 699 { 700 default: 701 parse_error("Invalid operator"); 702 return nullptr; 703 case ' ': 704 case '(': 705 case '0'...'9': 706 expr = new binary_operator<8, std::less<valty>>(l, "<"); 707 break; 708 case '=': 709 ++(*this); 710 expr = new binary_operator<8, std::less_equal<valty>>(l, "<="); 711 break; 712 case '<': 713 ++(*this); 714 expr = new binary_operator<7, lshift<valty>>(l, "<<"); 715 break; 716 } 717 break; 718 case '>': 719 switch (peek()) 720 { 721 default: 722 parse_error("Invalid operator"); 723 return nullptr; 724 case '(': 725 case ' ': 726 case '0'...'9': 727 expr = new binary_operator<8, std::greater<valty>>(l, ">"); 728 break; 729 case '=': 730 ++(*this); 731 expr = new binary_operator<8, std::greater_equal<valty>>(l, ">="); 732 break; 733 case '>': 734 ++(*this); 735 expr = new binary_operator<7, rshift<valty>>(l, ">>"); 736 break; 737 return lhs; 738 } 739 break; 740 case '=': 741 if (peek() != '=') 742 { 743 parse_error("Invalid operator"); 744 return nullptr; 745 } 746 expr = new binary_operator<9, std::equal_to<valty>>(l, "=="); 747 break; 748 case '!': 749 if (peek() != '=') 750 { 751 parse_error("Invalid operator"); 752 return nullptr; 753 } 754 cursor++; 755 expr = new binary_operator<9, std::not_equal_to<valty>>(l, "!="); 756 break; 757 case '&': 758 if (peek() == '&') 759 { 760 expr = new binary_operator<13, std::logical_and<valty>>(l, "&&"); 761 } 762 else 763 { 764 expr = new binary_operator<10, std::bit_and<valty>>(l, "&"); 765 } 766 break; 767 case '|': 768 if (peek() == '|') 769 { 770 expr = new binary_operator<12, std::logical_or<valty>>(l, "||"); 771 } 772 else 773 { 774 expr = new binary_operator<14, std::bit_or<valty>>(l, "|"); 775 } 776 break; 777 case '?': 778 { 779 consume('?'); 780 expression_ptr true_case = parse_expression(); 781 next_token(); 782 if (!true_case || !consume(':')) 783 { 784 parse_error("Expected : in ternary conditional operator"); 785 return nullptr; 786 } 787 expression_ptr false_case = parse_expression(); 788 if (!false_case) 789 { 790 parse_error("Expected false condition for ternary operator"); 791 return nullptr; 792 } 793 return expression_ptr(new ternary_conditional_operator(l, std::move(lhs), 794 std::move(true_case), std::move(false_case))); 795 } 796 } 797 ++(*this); 798 next_token(); 799 expression_ptr e(expr); 800 expression_ptr rhs(parse_expression()); 801 if (!rhs) 802 { 803 return nullptr; 804 } 805 expr->lhs = std::move(lhs); 806 if (rhs->precedence() < expr->precedence()) 807 { 808 expr->rhs = std::move(rhs); 809 } 810 else 811 { 812 // If we're a normal left-to-right expression, then we need to insert 813 // this as the far-left child node of the rhs expression 814 binary_operator_base *rhs_op = 815 static_cast<binary_operator_base*>(rhs.get()); 816 rhs_op->insert_left(expr); 817 e.release(); 818 return rhs; 819 } 820 return e; 821 } 822 823 expression_ptr text_input_buffer::parse_expression(bool stopAtParen) 824 { 825 next_token(); 826 unsigned long long leftVal; 827 expression_ptr lhs; 828 source_location l = location(); 829 switch (*(*this)) 830 { 831 case '0'...'9': 832 if (!consume_integer(leftVal)) 833 { 834 return nullptr; 835 } 836 lhs.reset(new terminal_expr(l, leftVal)); 837 break; 838 case '(': 839 { 840 consume('('); 841 expression_ptr &&subexpr = parse_expression(); 842 if (!subexpr) 843 { 844 return nullptr; 845 } 846 lhs.reset(new paren_expression(l, std::move(subexpr))); 847 if (!consume(')')) 848 { 849 return nullptr; 850 } 851 if (stopAtParen) 852 { 853 return lhs; 854 } 855 break; 856 } 857 case '+': 858 { 859 consume('+'); 860 expression_ptr &&subexpr = parse_expression(); 861 if (!subexpr) 862 { 863 return nullptr; 864 } 865 lhs.reset(new unary_operator<'+', unary_plus<valty>>(l, std::move(subexpr))); 866 break; 867 } 868 case '-': 869 { 870 consume('-'); 871 expression_ptr &&subexpr = parse_expression(); 872 if (!subexpr) 873 { 874 return nullptr; 875 } 876 lhs.reset(new unary_operator<'-', std::negate<valty>>(l, std::move(subexpr))); 877 break; 878 } 879 case '!': 880 { 881 consume('!'); 882 expression_ptr &&subexpr = parse_expression(); 883 if (!subexpr) 884 { 885 return nullptr; 886 } 887 lhs.reset(new unary_operator<'!', std::logical_not<valty>>(l, std::move(subexpr))); 888 break; 889 } 890 case '~': 891 { 892 consume('~'); 893 expression_ptr &&subexpr = parse_expression(); 894 if (!subexpr) 895 { 896 return nullptr; 897 } 898 lhs.reset(new unary_operator<'~', bit_not<valty>>(l, std::move(subexpr))); 899 break; 900 } 901 } 902 if (!lhs) 903 { 904 return nullptr; 905 } 906 return parse_binary_expression(std::move(lhs)); 907 } 908 909 bool 910 text_input_buffer::consume_integer_expression(unsigned long long &outInt) 911 { 912 switch (*(*this)) 913 { 914 case '(': 915 { 916 expression_ptr e(parse_expression(true)); 917 if (!e) 918 { 919 return false; 920 } 921 auto r = (*e)(); 922 if (r.second) 923 { 924 outInt = r.first; 925 return true; 926 } 927 return false; 928 } 929 case '0'...'9': 930 return consume_integer(outInt); 931 default: 932 return false; 933 } 934 } 935 936 bool 937 input_buffer::consume_hex_byte(uint8_t &outByte) 938 { 939 if (!ishexdigit((*this)[0]) && !ishexdigit((*this)[1])) 940 { 941 return false; 942 } 943 outByte = (digittoint((*this)[0]) << 4) | digittoint((*this)[1]); 944 cursor += 2; 945 return true; 946 } 947 948 text_input_buffer& 949 text_input_buffer::next_token() 950 { 951 auto &self = *this; 952 int start; 953 do { 954 start = cursor; 955 skip_spaces(); 956 if (finished()) 957 { 958 return self; 959 } 960 // Parse /* comments 961 if (*self == '/' && peek() == '*') 962 { 963 // eat the start of the comment 964 ++self; 965 ++self; 966 do { 967 // Find the ending * of */ 968 while ((*self != '\0') && (*self != '*') && !finished()) 969 { 970 ++self; 971 } 972 // Eat the * 973 ++self; 974 } while ((*self != '\0') && (*self != '/') && !finished()); 975 // Eat the / 976 ++self; 977 } 978 // Parse // comments 979 if ((*self == '/' && peek() == '/')) 980 { 981 // eat the start of the comment 982 ++self; 983 ++self; 984 // Find the ending of the line 985 while (*self != '\n' && !finished()) 986 { 987 ++self; 988 } 989 // Eat the \n 990 ++self; 991 } 992 } while (start != cursor); 993 return self; 994 } 995 996 void 997 text_input_buffer::parse_error(const char *msg) 998 { 999 if (input_stack.empty()) 1000 { 1001 fprintf(stderr, "Error: %s\n", msg); 1002 return; 1003 } 1004 input_buffer &b = *input_stack.top(); 1005 parse_error(msg, b, b.cursor); 1006 } 1007 void 1008 text_input_buffer::parse_error(const char *msg, 1009 input_buffer &b, 1010 int loc) 1011 { 1012 int line_count = 1; 1013 int line_start = 0; 1014 int line_end = loc; 1015 if (loc < 0 || loc > b.size) 1016 { 1017 return; 1018 } 1019 for (int i=loc ; i>0 ; --i) 1020 { 1021 if (b.buffer[i] == '\n') 1022 { 1023 line_count++; 1024 if (line_start == 0) 1025 { 1026 line_start = i+1; 1027 } 1028 } 1029 } 1030 for (int i=loc+1 ; i<b.size ; ++i) 1031 { 1032 if (b.buffer[i] == '\n') 1033 { 1034 line_end = i; 1035 break; 1036 } 1037 } 1038 fprintf(stderr, "Error at %s:%d:%d: %s\n", b.filename().c_str(), line_count, loc - line_start, msg); 1039 fwrite(&b.buffer[line_start], line_end-line_start, 1, stderr); 1040 putc('\n', stderr); 1041 for (int i=0 ; i<(loc-line_start) ; ++i) 1042 { 1043 char c = (b.buffer[i+line_start] == '\t') ? '\t' : ' '; 1044 putc(c, stderr); 1045 } 1046 putc('^', stderr); 1047 putc('\n', stderr); 1048 } 1049 #ifndef NDEBUG 1050 void 1051 input_buffer::dump() 1052 { 1053 fprintf(stderr, "Current cursor: %d\n", cursor); 1054 fwrite(&buffer[cursor], size-cursor, 1, stderr); 1055 } 1056 #endif 1057 1058 1059 namespace 1060 { 1061 /** 1062 * The source files are ASCII, so we provide a non-locale-aware version of 1063 * isalpha. This is a class so that it can be used with a template function 1064 * for parsing strings. 1065 */ 1066 struct is_alpha 1067 { 1068 static inline bool check(const char c) 1069 { 1070 return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && 1071 (c <= 'Z')); 1072 } 1073 }; 1074 /** 1075 * Check whether a character is in the set allowed for node names. This is a 1076 * class so that it can be used with a template function for parsing strings. 1077 */ 1078 struct is_node_name_character 1079 { 1080 static inline bool check(const char c) 1081 { 1082 switch(c) 1083 { 1084 default: 1085 return false; 1086 case 'a'...'z': case 'A'...'Z': case '0'...'9': 1087 case ',': case '.': case '+': case '-': 1088 case '_': 1089 return true; 1090 } 1091 } 1092 }; 1093 /** 1094 * Check whether a character is in the set allowed for property names. This is 1095 * a class so that it can be used with a template function for parsing strings. 1096 */ 1097 struct is_property_name_character 1098 { 1099 static inline bool check(const char c) 1100 { 1101 switch(c) 1102 { 1103 default: 1104 return false; 1105 case 'a'...'z': case 'A'...'Z': case '0'...'9': 1106 case ',': case '.': case '+': case '-': 1107 case '_': case '#': 1108 return true; 1109 } 1110 } 1111 }; 1112 1113 template<class T> 1114 string parse(text_input_buffer &s) 1115 { 1116 std::vector<char> bytes; 1117 for (char c=*s ; T::check(c) ; c=*(++s)) 1118 { 1119 bytes.push_back(c); 1120 } 1121 return string(bytes.begin(), bytes.end()); 1122 } 1123 1124 } 1125 1126 string 1127 text_input_buffer::parse_node_name() 1128 { 1129 return parse<is_node_name_character>(*this); 1130 } 1131 1132 string 1133 text_input_buffer::parse_property_name() 1134 { 1135 return parse<is_property_name_character>(*this); 1136 } 1137 1138 string 1139 text_input_buffer::parse_node_or_property_name(bool &is_property) 1140 { 1141 if (is_property) 1142 { 1143 return parse_property_name(); 1144 } 1145 std::vector<char> bytes; 1146 for (char c=*(*this) ; is_node_name_character::check(c) ; c=*(++(*this))) 1147 { 1148 bytes.push_back(c); 1149 } 1150 for (char c=*(*this) ; is_property_name_character::check(c) ; c=*(++(*this))) 1151 { 1152 bytes.push_back(c); 1153 is_property = true; 1154 } 1155 return string(bytes.begin(), bytes.end()); 1156 } 1157 1158 string 1159 input_buffer::parse_to(char stop) 1160 { 1161 std::vector<char> bytes; 1162 for (char c=*(*this) ; c != stop ; c=*(++(*this))) 1163 { 1164 bytes.push_back(c); 1165 } 1166 return string(bytes.begin(), bytes.end()); 1167 } 1168 1169 string 1170 text_input_buffer::parse_to(char stop) 1171 { 1172 std::vector<char> bytes; 1173 for (char c=*(*this) ; c != stop ; c=*(++(*this))) 1174 { 1175 if (finished()) 1176 { 1177 break; 1178 } 1179 bytes.push_back(c); 1180 } 1181 return string(bytes.begin(), bytes.end()); 1182 } 1183 1184 char 1185 text_input_buffer::peek() 1186 { 1187 return (*input_stack.top())[1]; 1188 } 1189 1190 std::unique_ptr<input_buffer> 1191 input_buffer::buffer_for_file(const string &path, bool warn) 1192 { 1193 if (path == "-") 1194 { 1195 std::unique_ptr<input_buffer> b(new stream_input_buffer()); 1196 return b; 1197 } 1198 int source = open(path.c_str(), O_RDONLY); 1199 if (source == -1) 1200 { 1201 if (warn) 1202 { 1203 fprintf(stderr, "Unable to open file '%s'. %s\n", path.c_str(), strerror(errno)); 1204 } 1205 return 0; 1206 } 1207 struct stat st; 1208 if (fstat(source, &st) == 0 && S_ISDIR(st.st_mode)) 1209 { 1210 if (warn) 1211 { 1212 fprintf(stderr, "File %s is a directory\n", path.c_str()); 1213 } 1214 close(source); 1215 return 0; 1216 } 1217 std::unique_ptr<input_buffer> b(new mmap_input_buffer(source, std::string(path))); 1218 close(source); 1219 return b; 1220 } 1221 1222 } // namespace dtc 1223 1224