1 /*- 2 * Copyright (c) 2013 David Chisnall 3 * All rights reserved. 4 * 5 * This software was developed by SRI International and the University of 6 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) 7 * ("CTSRD"), as part of the DARPA CRASH research programme. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $FreeBSD$ 31 */ 32 33 #include "input_buffer.hh" 34 #include <ctype.h> 35 #include <errno.h> 36 #include <limits.h> 37 #include <stdint.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <functional> 42 #ifndef NDEBUG 43 #include <iostream> 44 #endif 45 46 47 #include <sys/stat.h> 48 #include <sys/mman.h> 49 #include <assert.h> 50 #include <fcntl.h> 51 #include <unistd.h> 52 53 #ifndef MAP_PREFAULT_READ 54 #define MAP_PREFAULT_READ 0 55 #endif 56 57 using std::string; 58 59 namespace 60 { 61 /** 62 * Subclass of input_buffer that mmap()s a file and owns the resulting memory. 63 * When this object is destroyed, the memory is unmapped. 64 */ 65 struct mmap_input_buffer : public dtc::input_buffer 66 { 67 string fn; 68 const string &filename() const override 69 { 70 return fn; 71 } 72 /** 73 * Constructs a new buffer from the file passed in as a file 74 * descriptor. 75 */ 76 mmap_input_buffer(int fd, string &&filename); 77 /** 78 * Unmaps the buffer, if one exists. 79 */ 80 virtual ~mmap_input_buffer(); 81 }; 82 /** 83 * Input buffer read from standard input. This is used for reading device tree 84 * blobs and source from standard input. It reads the entire input into 85 * malloc'd memory, so will be very slow for large inputs. DTS and DTB files 86 * are very rarely more than 10KB though, so this is probably not a problem. 87 */ 88 struct stream_input_buffer : public dtc::input_buffer 89 { 90 const string &filename() const override 91 { 92 static string n = "<standard input>"; 93 return n; 94 } 95 /** 96 * The buffer that will store the data read from the standard input. 97 */ 98 std::vector<char> b; 99 /** 100 * Constructs a new buffer from the standard input. 101 */ 102 stream_input_buffer(); 103 }; 104 105 mmap_input_buffer::mmap_input_buffer(int fd, string &&filename) 106 : input_buffer(0, 0), fn(filename) 107 { 108 struct stat sb; 109 if (fstat(fd, &sb)) 110 { 111 perror("Failed to stat file"); 112 } 113 size = sb.st_size; 114 buffer = (const char*)mmap(0, size, PROT_READ, MAP_PRIVATE | 115 MAP_PREFAULT_READ, fd, 0); 116 if (buffer == MAP_FAILED) 117 { 118 perror("Failed to mmap file"); 119 exit(EXIT_FAILURE); 120 } 121 } 122 123 mmap_input_buffer::~mmap_input_buffer() 124 { 125 if (buffer != 0) 126 { 127 munmap((void*)buffer, size); 128 } 129 } 130 131 stream_input_buffer::stream_input_buffer() : input_buffer(0, 0) 132 { 133 int c; 134 while ((c = fgetc(stdin)) != EOF) 135 { 136 b.push_back(c); 137 } 138 buffer = b.data(); 139 size = b.size(); 140 } 141 142 } // Anonymous namespace 143 144 145 namespace dtc 146 { 147 148 void 149 input_buffer::skip_to(char c) 150 { 151 while ((cursor < size) && (buffer[cursor] != c)) 152 { 153 cursor++; 154 } 155 } 156 157 void 158 text_input_buffer::skip_to(char c) 159 { 160 while (!finished() && (*(*this) != c)) 161 { 162 ++(*this); 163 } 164 } 165 166 void 167 text_input_buffer::skip_spaces() 168 { 169 if (finished()) { return; } 170 char c = *(*this); 171 bool last_nl = false; 172 while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\f') 173 || (c == '\v') || (c == '\r')) 174 { 175 last_nl = ((c == '\n') || (c == '\r')); 176 ++(*this); 177 if (finished()) 178 { 179 c = '\0'; 180 } 181 else 182 { 183 c = *(*this); 184 } 185 } 186 // Skip C preprocessor leftovers 187 if ((c == '#') && ((cursor == 0) || last_nl)) 188 { 189 skip_to('\n'); 190 skip_spaces(); 191 } 192 if (consume("/include/")) 193 { 194 handle_include(); 195 skip_spaces(); 196 } 197 } 198 199 void 200 text_input_buffer::handle_include() 201 { 202 bool reallyInclude = true; 203 if (consume("if ")) 204 { 205 next_token(); 206 string name = parse_property_name(); 207 if (defines.count(name) > 0) 208 { 209 reallyInclude = true; 210 } 211 consume('/'); 212 } 213 next_token(); 214 if (!consume('"')) 215 { 216 parse_error("Expected quoted filename"); 217 return; 218 } 219 auto loc = location(); 220 string file = parse_to('"'); 221 consume('"'); 222 if (!reallyInclude) 223 { 224 return; 225 } 226 string include_file = dir + '/' + file; 227 auto include_buffer = input_buffer::buffer_for_file(include_file, false); 228 if (include_buffer == 0) 229 { 230 for (auto i : include_paths) 231 { 232 include_file = i + '/' + file; 233 include_buffer = input_buffer::buffer_for_file(include_file, false); 234 if (include_buffer != 0) 235 { 236 break; 237 } 238 } 239 } 240 if (depfile) 241 { 242 putc(' ', depfile); 243 fputs(include_file.c_str(), depfile); 244 } 245 if (!include_buffer) 246 { 247 loc.report_error("Unable to locate input file"); 248 return; 249 } 250 input_stack.push(std::move(include_buffer)); 251 } 252 253 input_buffer 254 input_buffer::buffer_from_offset(int offset, int s) 255 { 256 if (offset < 0) 257 { 258 return input_buffer(); 259 } 260 if (s == 0) 261 { 262 s = size - offset; 263 } 264 if (offset > size) 265 { 266 return input_buffer(); 267 } 268 if (s > (size-offset)) 269 { 270 return input_buffer(); 271 } 272 return input_buffer(&buffer[offset], s); 273 } 274 275 bool 276 input_buffer::consume(const char *str) 277 { 278 int len = strlen(str); 279 if (len > size - cursor) 280 { 281 return false; 282 } 283 else 284 { 285 for (int i=0 ; i<len ; ++i) 286 { 287 if (str[i] != (*this)[i]) 288 { 289 return false; 290 } 291 } 292 cursor += len; 293 return true; 294 } 295 return false; 296 } 297 298 bool 299 input_buffer::consume_integer(unsigned long long &outInt) 300 { 301 // The first character must be a digit. Hex and octal strings 302 // are prefixed by 0 and 0x, respectively. 303 if (!isdigit((*this)[0])) 304 { 305 return false; 306 } 307 char *end= const_cast<char*>(&buffer[size]); 308 outInt = strtoull(&buffer[cursor], &end, 0); 309 if (end == &buffer[cursor]) 310 { 311 return false; 312 } 313 cursor = end - buffer; 314 return true; 315 } 316 317 namespace { 318 319 /** 320 * Convenience typedef for the type that we use for all values. 321 */ 322 typedef unsigned long long valty; 323 324 /** 325 * Expression tree currently being parsed. 326 */ 327 struct expression 328 { 329 typedef text_input_buffer::source_location source_location; 330 /** 331 * The type that is returned when computing the result. The boolean value 332 * indicates whether this is a valid expression. 333 * 334 * FIXME: Once we can use C++17, this should be `std::optional`. 335 */ 336 typedef std::pair<valty, bool> result; 337 /** 338 * Evaluate this node, taking into account operator precedence. 339 */ 340 virtual result operator()() = 0; 341 /** 342 * Returns the precedence of this node. Lower values indicate higher 343 * precedence. 344 */ 345 virtual int precedence() = 0; 346 /** 347 * Constructs an expression, storing the location where it was created. 348 */ 349 expression(source_location l) : loc(l) {} 350 virtual ~expression() {} 351 #ifndef NDEBUG 352 /** 353 * Dumps this expression to `std::cerr`, appending a newline if `nl` is 354 * `true`. 355 */ 356 void dump(bool nl=false) 357 { 358 void *ptr = this; 359 if (ptr == nullptr) 360 { 361 std::cerr << "{nullptr}\n"; 362 return; 363 } 364 dump_impl(); 365 if (nl) 366 { 367 std::cerr << '\n'; 368 } 369 } 370 private: 371 /** 372 * Method that sublcasses override to implement the behaviour of `dump()`. 373 */ 374 virtual void dump_impl() = 0; 375 #endif 376 protected: 377 source_location loc; 378 }; 379 380 /** 381 * Expression wrapping a single integer. Leaf nodes in the expression tree. 382 */ 383 class terminal_expr : public expression 384 { 385 /** 386 * The value that this wraps. 387 */ 388 valty val; 389 /** 390 * Evaluate. Trivially returns the value that this class wraps. 391 */ 392 result operator()() override 393 { 394 return {val, true}; 395 } 396 int precedence() override 397 { 398 return 0; 399 } 400 public: 401 /** 402 * Constructor. 403 */ 404 terminal_expr(source_location l, valty v) : expression(l), val(v) {} 405 #ifndef NDEBUG 406 void dump_impl() override { std::cerr << val; } 407 #endif 408 }; 409 410 /** 411 * Parenthetical expression. Exists to make the contents opaque. 412 */ 413 struct paren_expression : public expression 414 { 415 /** 416 * The expression within the parentheses. 417 */ 418 expression_ptr subexpr; 419 /** 420 * Constructor. Takes the child expression as the only argument. 421 */ 422 paren_expression(source_location l, expression_ptr p) : expression(l), 423 subexpr(std::move(p)) {} 424 int precedence() override 425 { 426 return 0; 427 } 428 /** 429 * Evaluate - just forwards to the underlying expression. 430 */ 431 result operator()() override 432 { 433 return (*subexpr)(); 434 } 435 #ifndef NDEBUG 436 void dump_impl() override 437 { 438 std::cerr << " ("; 439 subexpr->dump(); 440 std::cerr << ") "; 441 } 442 #endif 443 }; 444 445 /** 446 * Template class for unary operators. The `OpChar` template parameter is 447 * solely for debugging and makes it easy to print the expression. The `Op` 448 * template parameter is a function object that implements the operator that 449 * this class provides. Most of these are provided by the `<functional>` 450 * header. 451 */ 452 template<char OpChar, class Op> 453 class unary_operator : public expression 454 { 455 /** 456 * The subexpression for this unary operator. 457 */ 458 expression_ptr subexpr; 459 result operator()() override 460 { 461 Op op; 462 result s = (*subexpr)(); 463 if (!s.second) 464 { 465 return s; 466 } 467 return {op(s.first), true}; 468 } 469 /** 470 * All unary operators have the same precedence. They are all evaluated 471 * before binary expressions, but after parentheses. 472 */ 473 int precedence() override 474 { 475 return 3; 476 } 477 public: 478 unary_operator(source_location l, expression_ptr p) : 479 expression(l), subexpr(std::move(p)) {} 480 #ifndef NDEBUG 481 void dump_impl() override 482 { 483 std::cerr << OpChar; 484 subexpr->dump(); 485 } 486 #endif 487 }; 488 489 /** 490 * Abstract base class for binary operators. Allows the tree to be modified 491 * without knowing what the operations actually are. 492 */ 493 struct binary_operator_base : public expression 494 { 495 using expression::expression; 496 /** 497 * The left side of the expression. 498 */ 499 expression_ptr lhs; 500 /** 501 * The right side of the expression. 502 */ 503 expression_ptr rhs; 504 /** 505 * Insert a node somewhere down the path of left children, until it would 506 * be preempting something that should execute first. 507 */ 508 void insert_left(binary_operator_base *new_left) 509 { 510 if (lhs->precedence() < new_left->precedence()) 511 { 512 new_left->rhs = std::move(lhs); 513 lhs.reset(new_left); 514 } 515 else 516 { 517 static_cast<binary_operator_base*>(lhs.get())->insert_left(new_left); 518 } 519 } 520 }; 521 522 /** 523 * Template class for binary operators. The precedence and the operation are 524 * provided as template parameters. 525 */ 526 template<int Precedence, class Op> 527 struct binary_operator : public binary_operator_base 528 { 529 result operator()() override 530 { 531 Op op; 532 result l = (*lhs)(); 533 result r = (*rhs)(); 534 if (!(l.second && r.second)) 535 { 536 return {0, false}; 537 } 538 return {op(l.first, r.first), true}; 539 } 540 int precedence() override 541 { 542 return Precedence; 543 } 544 #ifdef NDEBUG 545 /** 546 * Constructor. Takes the name of the operator as an argument, for 547 * debugging. Only stores it in debug mode. 548 */ 549 binary_operator(source_location l, const char *) : 550 binary_operator_base(l) {} 551 #else 552 const char *opName; 553 binary_operator(source_location l, const char *o) : 554 binary_operator_base(l), opName(o) {} 555 void dump_impl() override 556 { 557 lhs->dump(); 558 std::cerr << opName; 559 rhs->dump(); 560 } 561 #endif 562 }; 563 564 /** 565 * Ternary conditional operators (`cond ? true : false`) are a special case - 566 * there are no other ternary operators. 567 */ 568 class ternary_conditional_operator : public expression 569 { 570 /** 571 * The condition for the clause. 572 */ 573 expression_ptr cond; 574 /** 575 * The expression that this evaluates to if the condition is true. 576 */ 577 expression_ptr lhs; 578 /** 579 * The expression that this evaluates to if the condition is false. 580 */ 581 expression_ptr rhs; 582 result operator()() override 583 { 584 result c = (*cond)(); 585 result l = (*lhs)(); 586 result r = (*rhs)(); 587 if (!(l.second && r.second && c.second)) 588 { 589 return {0, false}; 590 } 591 return c.first ? l : r; 592 } 593 int precedence() override 594 { 595 // The actual precedence of a ternary conditional operator is 15, but 596 // its associativity is the opposite way around to the other operators, 597 // so we fudge it slightly. 598 return 3; 599 } 600 #ifndef NDEBUG 601 void dump_impl() override 602 { 603 cond->dump(); 604 std::cerr << " ? "; 605 lhs->dump(); 606 std::cerr << " : "; 607 rhs->dump(); 608 } 609 #endif 610 public: 611 ternary_conditional_operator(source_location sl, 612 expression_ptr c, 613 expression_ptr l, 614 expression_ptr r) : 615 expression(sl), cond(std::move(c)), lhs(std::move(l)), 616 rhs(std::move(r)) {} 617 }; 618 619 template<typename T> 620 struct lshift 621 { 622 constexpr T operator()(const T &lhs, const T &rhs) const 623 { 624 return lhs << rhs; 625 } 626 }; 627 template<typename T> 628 struct rshift 629 { 630 constexpr T operator()(const T &lhs, const T &rhs) const 631 { 632 return lhs >> rhs; 633 } 634 }; 635 template<typename T> 636 struct unary_plus 637 { 638 constexpr T operator()(const T &val) const 639 { 640 return +val; 641 } 642 }; 643 // TODO: Replace with std::bit_not once we can guarantee C++14 as a baseline. 644 template<typename T> 645 struct bit_not 646 { 647 constexpr T operator()(const T &val) const 648 { 649 return ~val; 650 } 651 }; 652 653 template<typename T> 654 struct divmod : public binary_operator<5, T> 655 { 656 using binary_operator<5, T>::binary_operator; 657 using binary_operator_base::result; 658 result operator()() override 659 { 660 result r = (*binary_operator_base::rhs)(); 661 if (r.second && (r.first == 0)) 662 { 663 expression::loc.report_error("Division by zero"); 664 return {0, false}; 665 } 666 return binary_operator<5, T>::operator()(); 667 } 668 }; 669 670 } // anonymous namespace 671 672 673 expression_ptr text_input_buffer::parse_binary_expression(expression_ptr lhs) 674 { 675 next_token(); 676 binary_operator_base *expr = nullptr; 677 char op = *(*this); 678 source_location l = location(); 679 switch (op) 680 { 681 default: 682 return lhs; 683 case '+': 684 expr = new binary_operator<6, std::plus<valty>>(l, "+"); 685 break; 686 case '-': 687 expr = new binary_operator<6, std::minus<valty>>(l, "-"); 688 break; 689 case '%': 690 expr = new divmod<std::modulus<valty>>(l, "/"); 691 break; 692 case '*': 693 expr = new binary_operator<5, std::multiplies<valty>>(l, "*"); 694 break; 695 case '/': 696 expr = new divmod<std::divides<valty>>(l, "/"); 697 break; 698 case '<': 699 switch (peek()) 700 { 701 default: 702 parse_error("Invalid operator"); 703 return nullptr; 704 case ' ': 705 case '(': 706 case '0'...'9': 707 expr = new binary_operator<8, std::less<valty>>(l, "<"); 708 break; 709 case '=': 710 ++(*this); 711 expr = new binary_operator<8, std::less_equal<valty>>(l, "<="); 712 break; 713 case '<': 714 ++(*this); 715 expr = new binary_operator<7, lshift<valty>>(l, "<<"); 716 break; 717 } 718 break; 719 case '>': 720 switch (peek()) 721 { 722 default: 723 parse_error("Invalid operator"); 724 return nullptr; 725 case '(': 726 case ' ': 727 case '0'...'9': 728 expr = new binary_operator<8, std::greater<valty>>(l, ">"); 729 break; 730 case '=': 731 ++(*this); 732 expr = new binary_operator<8, std::greater_equal<valty>>(l, ">="); 733 break; 734 case '>': 735 ++(*this); 736 expr = new binary_operator<7, rshift<valty>>(l, ">>"); 737 break; 738 return lhs; 739 } 740 break; 741 case '=': 742 if (peek() != '=') 743 { 744 parse_error("Invalid operator"); 745 return nullptr; 746 } 747 expr = new binary_operator<9, std::equal_to<valty>>(l, "=="); 748 break; 749 case '!': 750 if (peek() != '=') 751 { 752 parse_error("Invalid operator"); 753 return nullptr; 754 } 755 cursor++; 756 expr = new binary_operator<9, std::not_equal_to<valty>>(l, "!="); 757 break; 758 case '&': 759 if (peek() == '&') 760 { 761 expr = new binary_operator<13, std::logical_and<valty>>(l, "&&"); 762 } 763 else 764 { 765 expr = new binary_operator<10, std::bit_and<valty>>(l, "&"); 766 } 767 break; 768 case '|': 769 if (peek() == '|') 770 { 771 expr = new binary_operator<12, std::logical_or<valty>>(l, "||"); 772 } 773 else 774 { 775 expr = new binary_operator<14, std::bit_or<valty>>(l, "|"); 776 } 777 break; 778 case '?': 779 { 780 consume('?'); 781 expression_ptr true_case = parse_expression(); 782 next_token(); 783 if (!true_case || !consume(':')) 784 { 785 parse_error("Expected : in ternary conditional operator"); 786 return nullptr; 787 } 788 expression_ptr false_case = parse_expression(); 789 if (!false_case) 790 { 791 parse_error("Expected false condition for ternary operator"); 792 return nullptr; 793 } 794 return expression_ptr(new ternary_conditional_operator(l, std::move(lhs), 795 std::move(true_case), std::move(false_case))); 796 } 797 } 798 ++(*this); 799 next_token(); 800 expression_ptr e(expr); 801 expression_ptr rhs(parse_expression()); 802 if (!rhs) 803 { 804 return nullptr; 805 } 806 expr->lhs = std::move(lhs); 807 if (rhs->precedence() < expr->precedence()) 808 { 809 expr->rhs = std::move(rhs); 810 } 811 else 812 { 813 // If we're a normal left-to-right expression, then we need to insert 814 // this as the far-left child node of the rhs expression 815 binary_operator_base *rhs_op = 816 static_cast<binary_operator_base*>(rhs.get()); 817 rhs_op->insert_left(expr); 818 e.release(); 819 return rhs; 820 } 821 return e; 822 } 823 824 expression_ptr text_input_buffer::parse_expression(bool stopAtParen) 825 { 826 next_token(); 827 unsigned long long leftVal; 828 expression_ptr lhs; 829 source_location l = location(); 830 switch (*(*this)) 831 { 832 case '0'...'9': 833 if (!consume_integer(leftVal)) 834 { 835 return nullptr; 836 } 837 lhs.reset(new terminal_expr(l, leftVal)); 838 break; 839 case '(': 840 { 841 consume('('); 842 expression_ptr &&subexpr = parse_expression(); 843 if (!subexpr) 844 { 845 return nullptr; 846 } 847 lhs.reset(new paren_expression(l, std::move(subexpr))); 848 if (!consume(')')) 849 { 850 return nullptr; 851 } 852 if (stopAtParen) 853 { 854 return lhs; 855 } 856 break; 857 } 858 case '+': 859 { 860 consume('+'); 861 expression_ptr &&subexpr = parse_expression(); 862 if (!subexpr) 863 { 864 return nullptr; 865 } 866 lhs.reset(new unary_operator<'+', unary_plus<valty>>(l, std::move(subexpr))); 867 break; 868 } 869 case '-': 870 { 871 consume('-'); 872 expression_ptr &&subexpr = parse_expression(); 873 if (!subexpr) 874 { 875 return nullptr; 876 } 877 lhs.reset(new unary_operator<'-', std::negate<valty>>(l, std::move(subexpr))); 878 break; 879 } 880 case '!': 881 { 882 consume('!'); 883 expression_ptr &&subexpr = parse_expression(); 884 if (!subexpr) 885 { 886 return nullptr; 887 } 888 lhs.reset(new unary_operator<'!', std::logical_not<valty>>(l, std::move(subexpr))); 889 break; 890 } 891 case '~': 892 { 893 consume('~'); 894 expression_ptr &&subexpr = parse_expression(); 895 if (!subexpr) 896 { 897 return nullptr; 898 } 899 lhs.reset(new unary_operator<'~', bit_not<valty>>(l, std::move(subexpr))); 900 break; 901 } 902 } 903 if (!lhs) 904 { 905 return nullptr; 906 } 907 return parse_binary_expression(std::move(lhs)); 908 } 909 910 bool 911 text_input_buffer::consume_integer_expression(unsigned long long &outInt) 912 { 913 switch (*(*this)) 914 { 915 case '(': 916 { 917 expression_ptr e(parse_expression(true)); 918 if (!e) 919 { 920 return false; 921 } 922 auto r = (*e)(); 923 if (r.second) 924 { 925 outInt = r.first; 926 return true; 927 } 928 return false; 929 } 930 case '0'...'9': 931 return consume_integer(outInt); 932 default: 933 return false; 934 } 935 } 936 937 bool 938 input_buffer::consume_hex_byte(uint8_t &outByte) 939 { 940 if (!ishexdigit((*this)[0]) && !ishexdigit((*this)[1])) 941 { 942 return false; 943 } 944 outByte = (digittoint((*this)[0]) << 4) | digittoint((*this)[1]); 945 cursor += 2; 946 return true; 947 } 948 949 text_input_buffer& 950 text_input_buffer::next_token() 951 { 952 auto &self = *this; 953 int start; 954 do { 955 start = cursor; 956 skip_spaces(); 957 if (finished()) 958 { 959 return self; 960 } 961 // Parse /* comments 962 if (*self == '/' && peek() == '*') 963 { 964 // eat the start of the comment 965 ++self; 966 ++self; 967 do { 968 // Find the ending * of */ 969 while ((*self != '\0') && (*self != '*') && !finished()) 970 { 971 ++self; 972 } 973 // Eat the * 974 ++self; 975 } while ((*self != '\0') && (*self != '/') && !finished()); 976 // Eat the / 977 ++self; 978 } 979 // Parse // comments 980 if ((*self == '/' && peek() == '/')) 981 { 982 // eat the start of the comment 983 ++self; 984 ++self; 985 // Find the ending of the line 986 while (*self != '\n' && !finished()) 987 { 988 ++self; 989 } 990 // Eat the \n 991 ++self; 992 } 993 } while (start != cursor); 994 return self; 995 } 996 997 void 998 text_input_buffer::parse_error(const char *msg) 999 { 1000 if (input_stack.empty()) 1001 { 1002 fprintf(stderr, "Error: %s\n", msg); 1003 return; 1004 } 1005 input_buffer &b = *input_stack.top(); 1006 parse_error(msg, b, b.cursor); 1007 } 1008 void 1009 text_input_buffer::parse_error(const char *msg, 1010 input_buffer &b, 1011 int loc) 1012 { 1013 int line_count = 1; 1014 int line_start = 0; 1015 int line_end = loc; 1016 if (loc < 0 || loc > b.size) 1017 { 1018 return; 1019 } 1020 for (int i=loc ; i>0 ; --i) 1021 { 1022 if (b.buffer[i] == '\n') 1023 { 1024 line_count++; 1025 if (line_start == 0) 1026 { 1027 line_start = i+1; 1028 } 1029 } 1030 } 1031 for (int i=loc+1 ; i<b.size ; ++i) 1032 { 1033 if (b.buffer[i] == '\n') 1034 { 1035 line_end = i; 1036 break; 1037 } 1038 } 1039 fprintf(stderr, "Error at %s:%d:%d: %s\n", b.filename().c_str(), line_count, loc - line_start, msg); 1040 fwrite(&b.buffer[line_start], line_end-line_start, 1, stderr); 1041 putc('\n', stderr); 1042 for (int i=0 ; i<(loc-line_start) ; ++i) 1043 { 1044 char c = (b.buffer[i+line_start] == '\t') ? '\t' : ' '; 1045 putc(c, stderr); 1046 } 1047 putc('^', stderr); 1048 putc('\n', stderr); 1049 } 1050 #ifndef NDEBUG 1051 void 1052 input_buffer::dump() 1053 { 1054 fprintf(stderr, "Current cursor: %d\n", cursor); 1055 fwrite(&buffer[cursor], size-cursor, 1, stderr); 1056 } 1057 #endif 1058 1059 1060 namespace 1061 { 1062 /** 1063 * The source files are ASCII, so we provide a non-locale-aware version of 1064 * isalpha. This is a class so that it can be used with a template function 1065 * for parsing strings. 1066 */ 1067 struct is_alpha 1068 { 1069 static inline bool check(const char c) 1070 { 1071 return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && 1072 (c <= 'Z')); 1073 } 1074 }; 1075 /** 1076 * Check whether a character is in the set allowed for node names. This is a 1077 * class so that it can be used with a template function for parsing strings. 1078 */ 1079 struct is_node_name_character 1080 { 1081 static inline bool check(const char c) 1082 { 1083 switch(c) 1084 { 1085 default: 1086 return false; 1087 case 'a'...'z': case 'A'...'Z': case '0'...'9': 1088 case ',': case '.': case '+': case '-': 1089 case '_': 1090 return true; 1091 } 1092 } 1093 }; 1094 /** 1095 * Check whether a character is in the set allowed for property names. This is 1096 * a class so that it can be used with a template function for parsing strings. 1097 */ 1098 struct is_property_name_character 1099 { 1100 static inline bool check(const char c) 1101 { 1102 switch(c) 1103 { 1104 default: 1105 return false; 1106 case 'a'...'z': case 'A'...'Z': case '0'...'9': 1107 case ',': case '.': case '+': case '-': 1108 case '_': case '#': 1109 return true; 1110 } 1111 } 1112 }; 1113 1114 template<class T> 1115 string parse(text_input_buffer &s) 1116 { 1117 std::vector<char> bytes; 1118 for (char c=*s ; T::check(c) ; c=*(++s)) 1119 { 1120 bytes.push_back(c); 1121 } 1122 return string(bytes.begin(), bytes.end()); 1123 } 1124 1125 } 1126 1127 string 1128 text_input_buffer::parse_node_name() 1129 { 1130 return parse<is_node_name_character>(*this); 1131 } 1132 1133 string 1134 text_input_buffer::parse_property_name() 1135 { 1136 return parse<is_property_name_character>(*this); 1137 } 1138 1139 string 1140 text_input_buffer::parse_node_or_property_name(bool &is_property) 1141 { 1142 if (is_property) 1143 { 1144 return parse_property_name(); 1145 } 1146 std::vector<char> bytes; 1147 for (char c=*(*this) ; is_node_name_character::check(c) ; c=*(++(*this))) 1148 { 1149 bytes.push_back(c); 1150 } 1151 for (char c=*(*this) ; is_property_name_character::check(c) ; c=*(++(*this))) 1152 { 1153 bytes.push_back(c); 1154 is_property = true; 1155 } 1156 return string(bytes.begin(), bytes.end()); 1157 } 1158 1159 string 1160 input_buffer::parse_to(char stop) 1161 { 1162 std::vector<char> bytes; 1163 for (char c=*(*this) ; c != stop ; c=*(++(*this))) 1164 { 1165 bytes.push_back(c); 1166 } 1167 return string(bytes.begin(), bytes.end()); 1168 } 1169 1170 string 1171 text_input_buffer::parse_to(char stop) 1172 { 1173 std::vector<char> bytes; 1174 for (char c=*(*this) ; c != stop ; c=*(++(*this))) 1175 { 1176 if (finished()) 1177 { 1178 break; 1179 } 1180 bytes.push_back(c); 1181 } 1182 return string(bytes.begin(), bytes.end()); 1183 } 1184 1185 char 1186 text_input_buffer::peek() 1187 { 1188 return (*input_stack.top())[1]; 1189 } 1190 1191 std::unique_ptr<input_buffer> 1192 input_buffer::buffer_for_file(const string &path, bool warn) 1193 { 1194 if (path == "-") 1195 { 1196 std::unique_ptr<input_buffer> b(new stream_input_buffer()); 1197 return b; 1198 } 1199 int source = open(path.c_str(), O_RDONLY); 1200 if (source == -1) 1201 { 1202 if (warn) 1203 { 1204 fprintf(stderr, "Unable to open file '%s'. %s\n", path.c_str(), strerror(errno)); 1205 } 1206 return 0; 1207 } 1208 struct stat st; 1209 if (fstat(source, &st) == 0 && S_ISDIR(st.st_mode)) 1210 { 1211 if (warn) 1212 { 1213 fprintf(stderr, "File %s is a directory\n", path.c_str()); 1214 } 1215 close(source); 1216 return 0; 1217 } 1218 std::unique_ptr<input_buffer> b(new mmap_input_buffer(source, string(path))); 1219 close(source); 1220 return b; 1221 } 1222 1223 } // namespace dtc 1224 1225