1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2013 David Chisnall 5 * All rights reserved. 6 * 7 * This software was developed by SRI International and the University of 8 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) 9 * ("CTSRD"), as part of the DARPA CRASH research programme. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * $FreeBSD$ 33 */ 34 35 #include "input_buffer.hh" 36 #include <ctype.h> 37 #include <errno.h> 38 #include <limits.h> 39 #include <stdint.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <functional> 44 #ifndef NDEBUG 45 #include <iostream> 46 #endif 47 48 49 #include <sys/stat.h> 50 #include <sys/mman.h> 51 #include <assert.h> 52 #include <fcntl.h> 53 #include <unistd.h> 54 55 #ifndef MAP_PREFAULT_READ 56 #define MAP_PREFAULT_READ 0 57 #endif 58 59 using std::string; 60 61 namespace 62 { 63 /** 64 * Subclass of input_buffer that mmap()s a file and owns the resulting memory. 65 * When this object is destroyed, the memory is unmapped. 66 */ 67 struct mmap_input_buffer : public dtc::input_buffer 68 { 69 string fn; 70 const string &filename() const override 71 { 72 return fn; 73 } 74 /** 75 * Constructs a new buffer from the file passed in as a file 76 * descriptor. 77 */ 78 mmap_input_buffer(int fd, string &&filename); 79 /** 80 * Unmaps the buffer, if one exists. 81 */ 82 virtual ~mmap_input_buffer(); 83 }; 84 /** 85 * Input buffer read from standard input. This is used for reading device tree 86 * blobs and source from standard input. It reads the entire input into 87 * malloc'd memory, so will be very slow for large inputs. DTS and DTB files 88 * are very rarely more than 10KB though, so this is probably not a problem. 89 */ 90 struct stream_input_buffer : public dtc::input_buffer 91 { 92 const string &filename() const override 93 { 94 static string n = "<standard input>"; 95 return n; 96 } 97 /** 98 * The buffer that will store the data read from the standard input. 99 */ 100 std::vector<char> b; 101 /** 102 * Constructs a new buffer from the standard input. 103 */ 104 stream_input_buffer(); 105 }; 106 107 mmap_input_buffer::mmap_input_buffer(int fd, string &&filename) 108 : input_buffer(0, 0), fn(filename) 109 { 110 struct stat sb; 111 if (fstat(fd, &sb)) 112 { 113 perror("Failed to stat file"); 114 } 115 size = sb.st_size; 116 buffer = (const char*)mmap(0, size, PROT_READ, MAP_PRIVATE | 117 MAP_PREFAULT_READ, fd, 0); 118 if (buffer == MAP_FAILED) 119 { 120 perror("Failed to mmap file"); 121 exit(EXIT_FAILURE); 122 } 123 } 124 125 mmap_input_buffer::~mmap_input_buffer() 126 { 127 if (buffer != 0) 128 { 129 munmap((void*)buffer, size); 130 } 131 } 132 133 stream_input_buffer::stream_input_buffer() : input_buffer(0, 0) 134 { 135 int c; 136 while ((c = fgetc(stdin)) != EOF) 137 { 138 b.push_back(c); 139 } 140 buffer = b.data(); 141 size = b.size(); 142 } 143 144 } // Anonymous namespace 145 146 147 namespace dtc 148 { 149 150 void 151 input_buffer::skip_to(char c) 152 { 153 while ((cursor < size) && (buffer[cursor] != c)) 154 { 155 cursor++; 156 } 157 } 158 159 void 160 text_input_buffer::skip_to(char c) 161 { 162 while (!finished() && (*(*this) != c)) 163 { 164 ++(*this); 165 } 166 } 167 168 void 169 text_input_buffer::skip_spaces() 170 { 171 if (finished()) { return; } 172 char c = *(*this); 173 bool last_nl = false; 174 while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\f') 175 || (c == '\v') || (c == '\r')) 176 { 177 last_nl = ((c == '\n') || (c == '\r')); 178 ++(*this); 179 if (finished()) 180 { 181 c = '\0'; 182 } 183 else 184 { 185 c = *(*this); 186 } 187 } 188 // Skip C preprocessor leftovers 189 if ((c == '#') && ((cursor == 0) || last_nl)) 190 { 191 skip_to('\n'); 192 skip_spaces(); 193 } 194 if (consume("/include/")) 195 { 196 handle_include(); 197 skip_spaces(); 198 } 199 } 200 201 void 202 text_input_buffer::handle_include() 203 { 204 bool reallyInclude = true; 205 if (consume("if ")) 206 { 207 next_token(); 208 string name = parse_property_name(); 209 if (defines.count(name) > 0) 210 { 211 reallyInclude = true; 212 } 213 consume('/'); 214 } 215 next_token(); 216 if (!consume('"')) 217 { 218 parse_error("Expected quoted filename"); 219 return; 220 } 221 auto loc = location(); 222 string file = parse_to('"'); 223 consume('"'); 224 if (!reallyInclude) 225 { 226 return; 227 } 228 string include_file = dir + '/' + file; 229 auto include_buffer = input_buffer::buffer_for_file(include_file, false); 230 if (include_buffer == 0) 231 { 232 for (auto i : include_paths) 233 { 234 include_file = i + '/' + file; 235 include_buffer = input_buffer::buffer_for_file(include_file, false); 236 if (include_buffer != 0) 237 { 238 break; 239 } 240 } 241 } 242 if (depfile) 243 { 244 putc(' ', depfile); 245 fputs(include_file.c_str(), depfile); 246 } 247 if (!include_buffer) 248 { 249 loc.report_error("Unable to locate input file"); 250 return; 251 } 252 input_stack.push(std::move(include_buffer)); 253 } 254 255 input_buffer 256 input_buffer::buffer_from_offset(int offset, int s) 257 { 258 if (offset < 0) 259 { 260 return input_buffer(); 261 } 262 if (s == 0) 263 { 264 s = size - offset; 265 } 266 if (offset > size) 267 { 268 return input_buffer(); 269 } 270 if (s > (size-offset)) 271 { 272 return input_buffer(); 273 } 274 return input_buffer(&buffer[offset], s); 275 } 276 277 bool 278 input_buffer::consume(const char *str) 279 { 280 int len = strlen(str); 281 if (len > size - cursor) 282 { 283 return false; 284 } 285 else 286 { 287 for (int i=0 ; i<len ; ++i) 288 { 289 if (str[i] != (*this)[i]) 290 { 291 return false; 292 } 293 } 294 cursor += len; 295 return true; 296 } 297 return false; 298 } 299 300 bool 301 input_buffer::consume_integer(unsigned long long &outInt) 302 { 303 // The first character must be a digit. Hex and octal strings 304 // are prefixed by 0 and 0x, respectively. 305 if (!isdigit((*this)[0])) 306 { 307 return false; 308 } 309 char *end= const_cast<char*>(&buffer[size]); 310 outInt = strtoull(&buffer[cursor], &end, 0); 311 if (end == &buffer[cursor]) 312 { 313 return false; 314 } 315 cursor = end - buffer; 316 return true; 317 } 318 319 namespace { 320 321 /** 322 * Convenience typedef for the type that we use for all values. 323 */ 324 typedef unsigned long long valty; 325 326 /** 327 * Expression tree currently being parsed. 328 */ 329 struct expression 330 { 331 typedef text_input_buffer::source_location source_location; 332 /** 333 * The type that is returned when computing the result. The boolean value 334 * indicates whether this is a valid expression. 335 * 336 * FIXME: Once we can use C++17, this should be `std::optional`. 337 */ 338 typedef std::pair<valty, bool> result; 339 /** 340 * Evaluate this node, taking into account operator precedence. 341 */ 342 virtual result operator()() = 0; 343 /** 344 * Returns the precedence of this node. Lower values indicate higher 345 * precedence. 346 */ 347 virtual int precedence() = 0; 348 /** 349 * Constructs an expression, storing the location where it was created. 350 */ 351 expression(source_location l) : loc(l) {} 352 virtual ~expression() {} 353 #ifndef NDEBUG 354 /** 355 * Dumps this expression to `std::cerr`, appending a newline if `nl` is 356 * `true`. 357 */ 358 void dump(bool nl=false) 359 { 360 void *ptr = this; 361 if (ptr == nullptr) 362 { 363 std::cerr << "{nullptr}\n"; 364 return; 365 } 366 dump_impl(); 367 if (nl) 368 { 369 std::cerr << '\n'; 370 } 371 } 372 private: 373 /** 374 * Method that sublcasses override to implement the behaviour of `dump()`. 375 */ 376 virtual void dump_impl() = 0; 377 #endif 378 protected: 379 source_location loc; 380 }; 381 382 /** 383 * Expression wrapping a single integer. Leaf nodes in the expression tree. 384 */ 385 class terminal_expr : public expression 386 { 387 /** 388 * The value that this wraps. 389 */ 390 valty val; 391 /** 392 * Evaluate. Trivially returns the value that this class wraps. 393 */ 394 result operator()() override 395 { 396 return {val, true}; 397 } 398 int precedence() override 399 { 400 return 0; 401 } 402 public: 403 /** 404 * Constructor. 405 */ 406 terminal_expr(source_location l, valty v) : expression(l), val(v) {} 407 #ifndef NDEBUG 408 void dump_impl() override { std::cerr << val; } 409 #endif 410 }; 411 412 /** 413 * Parenthetical expression. Exists to make the contents opaque. 414 */ 415 struct paren_expression : public expression 416 { 417 /** 418 * The expression within the parentheses. 419 */ 420 expression_ptr subexpr; 421 /** 422 * Constructor. Takes the child expression as the only argument. 423 */ 424 paren_expression(source_location l, expression_ptr p) : expression(l), 425 subexpr(std::move(p)) {} 426 int precedence() override 427 { 428 return 0; 429 } 430 /** 431 * Evaluate - just forwards to the underlying expression. 432 */ 433 result operator()() override 434 { 435 return (*subexpr)(); 436 } 437 #ifndef NDEBUG 438 void dump_impl() override 439 { 440 std::cerr << " ("; 441 subexpr->dump(); 442 std::cerr << ") "; 443 } 444 #endif 445 }; 446 447 /** 448 * Template class for unary operators. The `OpChar` template parameter is 449 * solely for debugging and makes it easy to print the expression. The `Op` 450 * template parameter is a function object that implements the operator that 451 * this class provides. Most of these are provided by the `<functional>` 452 * header. 453 */ 454 template<char OpChar, class Op> 455 class unary_operator : public expression 456 { 457 /** 458 * The subexpression for this unary operator. 459 */ 460 expression_ptr subexpr; 461 result operator()() override 462 { 463 Op op; 464 result s = (*subexpr)(); 465 if (!s.second) 466 { 467 return s; 468 } 469 return {op(s.first), true}; 470 } 471 /** 472 * All unary operators have the same precedence. They are all evaluated 473 * before binary expressions, but after parentheses. 474 */ 475 int precedence() override 476 { 477 return 3; 478 } 479 public: 480 unary_operator(source_location l, expression_ptr p) : 481 expression(l), subexpr(std::move(p)) {} 482 #ifndef NDEBUG 483 void dump_impl() override 484 { 485 std::cerr << OpChar; 486 subexpr->dump(); 487 } 488 #endif 489 }; 490 491 /** 492 * Abstract base class for binary operators. Allows the tree to be modified 493 * without knowing what the operations actually are. 494 */ 495 struct binary_operator_base : public expression 496 { 497 using expression::expression; 498 /** 499 * The left side of the expression. 500 */ 501 expression_ptr lhs; 502 /** 503 * The right side of the expression. 504 */ 505 expression_ptr rhs; 506 /** 507 * Insert a node somewhere down the path of left children, until it would 508 * be preempting something that should execute first. 509 */ 510 void insert_left(binary_operator_base *new_left) 511 { 512 if (lhs->precedence() < new_left->precedence()) 513 { 514 new_left->rhs = std::move(lhs); 515 lhs.reset(new_left); 516 } 517 else 518 { 519 static_cast<binary_operator_base*>(lhs.get())->insert_left(new_left); 520 } 521 } 522 }; 523 524 /** 525 * Template class for binary operators. The precedence and the operation are 526 * provided as template parameters. 527 */ 528 template<int Precedence, class Op> 529 struct binary_operator : public binary_operator_base 530 { 531 result operator()() override 532 { 533 Op op; 534 result l = (*lhs)(); 535 result r = (*rhs)(); 536 if (!(l.second && r.second)) 537 { 538 return {0, false}; 539 } 540 return {op(l.first, r.first), true}; 541 } 542 int precedence() override 543 { 544 return Precedence; 545 } 546 #ifdef NDEBUG 547 /** 548 * Constructor. Takes the name of the operator as an argument, for 549 * debugging. Only stores it in debug mode. 550 */ 551 binary_operator(source_location l, const char *) : 552 binary_operator_base(l) {} 553 #else 554 const char *opName; 555 binary_operator(source_location l, const char *o) : 556 binary_operator_base(l), opName(o) {} 557 void dump_impl() override 558 { 559 lhs->dump(); 560 std::cerr << opName; 561 rhs->dump(); 562 } 563 #endif 564 }; 565 566 /** 567 * Ternary conditional operators (`cond ? true : false`) are a special case - 568 * there are no other ternary operators. 569 */ 570 class ternary_conditional_operator : public expression 571 { 572 /** 573 * The condition for the clause. 574 */ 575 expression_ptr cond; 576 /** 577 * The expression that this evaluates to if the condition is true. 578 */ 579 expression_ptr lhs; 580 /** 581 * The expression that this evaluates to if the condition is false. 582 */ 583 expression_ptr rhs; 584 result operator()() override 585 { 586 result c = (*cond)(); 587 result l = (*lhs)(); 588 result r = (*rhs)(); 589 if (!(l.second && r.second && c.second)) 590 { 591 return {0, false}; 592 } 593 return c.first ? l : r; 594 } 595 int precedence() override 596 { 597 // The actual precedence of a ternary conditional operator is 15, but 598 // its associativity is the opposite way around to the other operators, 599 // so we fudge it slightly. 600 return 3; 601 } 602 #ifndef NDEBUG 603 void dump_impl() override 604 { 605 cond->dump(); 606 std::cerr << " ? "; 607 lhs->dump(); 608 std::cerr << " : "; 609 rhs->dump(); 610 } 611 #endif 612 public: 613 ternary_conditional_operator(source_location sl, 614 expression_ptr c, 615 expression_ptr l, 616 expression_ptr r) : 617 expression(sl), cond(std::move(c)), lhs(std::move(l)), 618 rhs(std::move(r)) {} 619 }; 620 621 template<typename T> 622 struct lshift 623 { 624 constexpr T operator()(const T &lhs, const T &rhs) const 625 { 626 return lhs << rhs; 627 } 628 }; 629 template<typename T> 630 struct rshift 631 { 632 constexpr T operator()(const T &lhs, const T &rhs) const 633 { 634 return lhs >> rhs; 635 } 636 }; 637 template<typename T> 638 struct unary_plus 639 { 640 constexpr T operator()(const T &val) const 641 { 642 return +val; 643 } 644 }; 645 // TODO: Replace with std::bit_not once we can guarantee C++14 as a baseline. 646 template<typename T> 647 struct bit_not 648 { 649 constexpr T operator()(const T &val) const 650 { 651 return ~val; 652 } 653 }; 654 655 template<typename T> 656 struct divmod : public binary_operator<5, T> 657 { 658 using binary_operator<5, T>::binary_operator; 659 using typename binary_operator_base::result; 660 result operator()() override 661 { 662 result r = (*binary_operator_base::rhs)(); 663 if (r.second && (r.first == 0)) 664 { 665 expression::loc.report_error("Division by zero"); 666 return {0, false}; 667 } 668 return binary_operator<5, T>::operator()(); 669 } 670 }; 671 672 } // anonymous namespace 673 674 675 expression_ptr text_input_buffer::parse_binary_expression(expression_ptr lhs) 676 { 677 next_token(); 678 binary_operator_base *expr = nullptr; 679 char op = *(*this); 680 source_location l = location(); 681 switch (op) 682 { 683 default: 684 return lhs; 685 case '+': 686 expr = new binary_operator<6, std::plus<valty>>(l, "+"); 687 break; 688 case '-': 689 expr = new binary_operator<6, std::minus<valty>>(l, "-"); 690 break; 691 case '%': 692 expr = new divmod<std::modulus<valty>>(l, "/"); 693 break; 694 case '*': 695 expr = new binary_operator<5, std::multiplies<valty>>(l, "*"); 696 break; 697 case '/': 698 expr = new divmod<std::divides<valty>>(l, "/"); 699 break; 700 case '<': 701 switch (peek()) 702 { 703 default: 704 parse_error("Invalid operator"); 705 return nullptr; 706 case ' ': 707 case '(': 708 case '0'...'9': 709 expr = new binary_operator<8, std::less<valty>>(l, "<"); 710 break; 711 case '=': 712 ++(*this); 713 expr = new binary_operator<8, std::less_equal<valty>>(l, "<="); 714 break; 715 case '<': 716 ++(*this); 717 expr = new binary_operator<7, lshift<valty>>(l, "<<"); 718 break; 719 } 720 break; 721 case '>': 722 switch (peek()) 723 { 724 default: 725 parse_error("Invalid operator"); 726 return nullptr; 727 case '(': 728 case ' ': 729 case '0'...'9': 730 expr = new binary_operator<8, std::greater<valty>>(l, ">"); 731 break; 732 case '=': 733 ++(*this); 734 expr = new binary_operator<8, std::greater_equal<valty>>(l, ">="); 735 break; 736 case '>': 737 ++(*this); 738 expr = new binary_operator<7, rshift<valty>>(l, ">>"); 739 break; 740 return lhs; 741 } 742 break; 743 case '=': 744 if (peek() != '=') 745 { 746 parse_error("Invalid operator"); 747 return nullptr; 748 } 749 expr = new binary_operator<9, std::equal_to<valty>>(l, "=="); 750 break; 751 case '!': 752 if (peek() != '=') 753 { 754 parse_error("Invalid operator"); 755 return nullptr; 756 } 757 cursor++; 758 expr = new binary_operator<9, std::not_equal_to<valty>>(l, "!="); 759 break; 760 case '&': 761 if (peek() == '&') 762 { 763 expr = new binary_operator<13, std::logical_and<valty>>(l, "&&"); 764 } 765 else 766 { 767 expr = new binary_operator<10, std::bit_and<valty>>(l, "&"); 768 } 769 break; 770 case '|': 771 if (peek() == '|') 772 { 773 expr = new binary_operator<12, std::logical_or<valty>>(l, "||"); 774 } 775 else 776 { 777 expr = new binary_operator<14, std::bit_or<valty>>(l, "|"); 778 } 779 break; 780 case '?': 781 { 782 consume('?'); 783 expression_ptr true_case = parse_expression(); 784 next_token(); 785 if (!true_case || !consume(':')) 786 { 787 parse_error("Expected : in ternary conditional operator"); 788 return nullptr; 789 } 790 expression_ptr false_case = parse_expression(); 791 if (!false_case) 792 { 793 parse_error("Expected false condition for ternary operator"); 794 return nullptr; 795 } 796 return expression_ptr(new ternary_conditional_operator(l, std::move(lhs), 797 std::move(true_case), std::move(false_case))); 798 } 799 } 800 ++(*this); 801 next_token(); 802 expression_ptr e(expr); 803 expression_ptr rhs(parse_expression()); 804 if (!rhs) 805 { 806 return nullptr; 807 } 808 expr->lhs = std::move(lhs); 809 if (rhs->precedence() < expr->precedence()) 810 { 811 expr->rhs = std::move(rhs); 812 } 813 else 814 { 815 // If we're a normal left-to-right expression, then we need to insert 816 // this as the far-left child node of the rhs expression 817 binary_operator_base *rhs_op = 818 static_cast<binary_operator_base*>(rhs.get()); 819 rhs_op->insert_left(expr); 820 e.release(); 821 return rhs; 822 } 823 return e; 824 } 825 826 expression_ptr text_input_buffer::parse_expression(bool stopAtParen) 827 { 828 next_token(); 829 unsigned long long leftVal; 830 expression_ptr lhs; 831 source_location l = location(); 832 switch (*(*this)) 833 { 834 case '0'...'9': 835 if (!consume_integer(leftVal)) 836 { 837 return nullptr; 838 } 839 lhs.reset(new terminal_expr(l, leftVal)); 840 break; 841 case '(': 842 { 843 consume('('); 844 expression_ptr &&subexpr = parse_expression(); 845 if (!subexpr) 846 { 847 return nullptr; 848 } 849 lhs.reset(new paren_expression(l, std::move(subexpr))); 850 if (!consume(')')) 851 { 852 return nullptr; 853 } 854 if (stopAtParen) 855 { 856 return lhs; 857 } 858 break; 859 } 860 case '+': 861 { 862 consume('+'); 863 expression_ptr &&subexpr = parse_expression(); 864 if (!subexpr) 865 { 866 return nullptr; 867 } 868 lhs.reset(new unary_operator<'+', unary_plus<valty>>(l, std::move(subexpr))); 869 break; 870 } 871 case '-': 872 { 873 consume('-'); 874 expression_ptr &&subexpr = parse_expression(); 875 if (!subexpr) 876 { 877 return nullptr; 878 } 879 lhs.reset(new unary_operator<'-', std::negate<valty>>(l, std::move(subexpr))); 880 break; 881 } 882 case '!': 883 { 884 consume('!'); 885 expression_ptr &&subexpr = parse_expression(); 886 if (!subexpr) 887 { 888 return nullptr; 889 } 890 lhs.reset(new unary_operator<'!', std::logical_not<valty>>(l, std::move(subexpr))); 891 break; 892 } 893 case '~': 894 { 895 consume('~'); 896 expression_ptr &&subexpr = parse_expression(); 897 if (!subexpr) 898 { 899 return nullptr; 900 } 901 lhs.reset(new unary_operator<'~', bit_not<valty>>(l, std::move(subexpr))); 902 break; 903 } 904 } 905 if (!lhs) 906 { 907 return nullptr; 908 } 909 return parse_binary_expression(std::move(lhs)); 910 } 911 912 bool 913 text_input_buffer::consume_integer_expression(unsigned long long &outInt) 914 { 915 switch (*(*this)) 916 { 917 case '(': 918 { 919 expression_ptr e(parse_expression(true)); 920 if (!e) 921 { 922 return false; 923 } 924 auto r = (*e)(); 925 if (r.second) 926 { 927 outInt = r.first; 928 return true; 929 } 930 return false; 931 } 932 case '0'...'9': 933 return consume_integer(outInt); 934 default: 935 return false; 936 } 937 } 938 939 bool 940 input_buffer::consume_hex_byte(uint8_t &outByte) 941 { 942 if (!ishexdigit((*this)[0]) && !ishexdigit((*this)[1])) 943 { 944 return false; 945 } 946 outByte = (digittoint((*this)[0]) << 4) | digittoint((*this)[1]); 947 cursor += 2; 948 return true; 949 } 950 951 text_input_buffer& 952 text_input_buffer::next_token() 953 { 954 auto &self = *this; 955 int start; 956 do { 957 start = cursor; 958 skip_spaces(); 959 if (finished()) 960 { 961 return self; 962 } 963 // Parse /* comments 964 if (*self == '/' && peek() == '*') 965 { 966 // eat the start of the comment 967 ++self; 968 ++self; 969 do { 970 // Find the ending * of */ 971 while ((*self != '\0') && (*self != '*') && !finished()) 972 { 973 ++self; 974 } 975 // Eat the * 976 ++self; 977 } while ((*self != '\0') && (*self != '/') && !finished()); 978 // Eat the / 979 ++self; 980 } 981 // Parse // comments 982 if ((*self == '/' && peek() == '/')) 983 { 984 // eat the start of the comment 985 ++self; 986 ++self; 987 // Find the ending of the line 988 while (*self != '\n' && !finished()) 989 { 990 ++self; 991 } 992 // Eat the \n 993 ++self; 994 } 995 } while (start != cursor); 996 return self; 997 } 998 999 void 1000 text_input_buffer::parse_error(const char *msg) 1001 { 1002 if (input_stack.empty()) 1003 { 1004 fprintf(stderr, "Error: %s\n", msg); 1005 return; 1006 } 1007 input_buffer &b = *input_stack.top(); 1008 parse_error(msg, b, b.cursor); 1009 } 1010 void 1011 text_input_buffer::parse_error(const char *msg, 1012 input_buffer &b, 1013 int loc) 1014 { 1015 int line_count = 1; 1016 int line_start = 0; 1017 int line_end = loc; 1018 if (loc < 0 || loc > b.size) 1019 { 1020 return; 1021 } 1022 for (int i=loc ; i>0 ; --i) 1023 { 1024 if (b.buffer[i] == '\n') 1025 { 1026 line_count++; 1027 if (line_start == 0) 1028 { 1029 line_start = i+1; 1030 } 1031 } 1032 } 1033 for (int i=loc+1 ; i<b.size ; ++i) 1034 { 1035 if (b.buffer[i] == '\n') 1036 { 1037 line_end = i; 1038 break; 1039 } 1040 } 1041 fprintf(stderr, "Error at %s:%d:%d: %s\n", b.filename().c_str(), line_count, loc - line_start, msg); 1042 fwrite(&b.buffer[line_start], line_end-line_start, 1, stderr); 1043 putc('\n', stderr); 1044 for (int i=0 ; i<(loc-line_start) ; ++i) 1045 { 1046 char c = (b.buffer[i+line_start] == '\t') ? '\t' : ' '; 1047 putc(c, stderr); 1048 } 1049 putc('^', stderr); 1050 putc('\n', stderr); 1051 } 1052 #ifndef NDEBUG 1053 void 1054 input_buffer::dump() 1055 { 1056 fprintf(stderr, "Current cursor: %d\n", cursor); 1057 fwrite(&buffer[cursor], size-cursor, 1, stderr); 1058 } 1059 #endif 1060 1061 1062 namespace 1063 { 1064 /** 1065 * The source files are ASCII, so we provide a non-locale-aware version of 1066 * isalpha. This is a class so that it can be used with a template function 1067 * for parsing strings. 1068 */ 1069 struct is_alpha 1070 { 1071 static inline bool check(const char c) 1072 { 1073 return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && 1074 (c <= 'Z')); 1075 } 1076 }; 1077 /** 1078 * Check whether a character is in the set allowed for node names. This is a 1079 * class so that it can be used with a template function for parsing strings. 1080 */ 1081 struct is_node_name_character 1082 { 1083 static inline bool check(const char c) 1084 { 1085 switch(c) 1086 { 1087 default: 1088 return false; 1089 case 'a'...'z': case 'A'...'Z': case '0'...'9': 1090 case ',': case '.': case '+': case '-': 1091 case '_': 1092 return true; 1093 } 1094 } 1095 }; 1096 /** 1097 * Check whether a character is in the set allowed for property names. This is 1098 * a class so that it can be used with a template function for parsing strings. 1099 */ 1100 struct is_property_name_character 1101 { 1102 static inline bool check(const char c) 1103 { 1104 switch(c) 1105 { 1106 default: 1107 return false; 1108 case 'a'...'z': case 'A'...'Z': case '0'...'9': 1109 case ',': case '.': case '+': case '-': 1110 case '_': case '#': 1111 return true; 1112 } 1113 } 1114 }; 1115 1116 template<class T> 1117 string parse(text_input_buffer &s) 1118 { 1119 std::vector<char> bytes; 1120 for (char c=*s ; T::check(c) ; c=*(++s)) 1121 { 1122 bytes.push_back(c); 1123 } 1124 return string(bytes.begin(), bytes.end()); 1125 } 1126 1127 } 1128 1129 string 1130 text_input_buffer::parse_node_name() 1131 { 1132 return parse<is_node_name_character>(*this); 1133 } 1134 1135 string 1136 text_input_buffer::parse_property_name() 1137 { 1138 return parse<is_property_name_character>(*this); 1139 } 1140 1141 string 1142 text_input_buffer::parse_node_or_property_name(bool &is_property) 1143 { 1144 if (is_property) 1145 { 1146 return parse_property_name(); 1147 } 1148 std::vector<char> bytes; 1149 for (char c=*(*this) ; is_node_name_character::check(c) ; c=*(++(*this))) 1150 { 1151 bytes.push_back(c); 1152 } 1153 for (char c=*(*this) ; is_property_name_character::check(c) ; c=*(++(*this))) 1154 { 1155 bytes.push_back(c); 1156 is_property = true; 1157 } 1158 return string(bytes.begin(), bytes.end()); 1159 } 1160 1161 string 1162 input_buffer::parse_to(char stop) 1163 { 1164 std::vector<char> bytes; 1165 for (char c=*(*this) ; c != stop ; c=*(++(*this))) 1166 { 1167 bytes.push_back(c); 1168 } 1169 return string(bytes.begin(), bytes.end()); 1170 } 1171 1172 string 1173 text_input_buffer::parse_to(char stop) 1174 { 1175 std::vector<char> bytes; 1176 for (char c=*(*this) ; c != stop ; c=*(++(*this))) 1177 { 1178 if (finished()) 1179 { 1180 break; 1181 } 1182 bytes.push_back(c); 1183 } 1184 return string(bytes.begin(), bytes.end()); 1185 } 1186 1187 char 1188 text_input_buffer::peek() 1189 { 1190 return (*input_stack.top())[1]; 1191 } 1192 1193 std::unique_ptr<input_buffer> 1194 input_buffer::buffer_for_file(const string &path, bool warn) 1195 { 1196 if (path == "-") 1197 { 1198 std::unique_ptr<input_buffer> b(new stream_input_buffer()); 1199 return b; 1200 } 1201 int source = open(path.c_str(), O_RDONLY); 1202 if (source == -1) 1203 { 1204 if (warn) 1205 { 1206 fprintf(stderr, "Unable to open file '%s'. %s\n", path.c_str(), strerror(errno)); 1207 } 1208 return 0; 1209 } 1210 struct stat st; 1211 if (fstat(source, &st) == 0 && S_ISDIR(st.st_mode)) 1212 { 1213 if (warn) 1214 { 1215 fprintf(stderr, "File %s is a directory\n", path.c_str()); 1216 } 1217 close(source); 1218 return 0; 1219 } 1220 std::unique_ptr<input_buffer> b(new mmap_input_buffer(source, string(path))); 1221 close(source); 1222 return b; 1223 } 1224 1225 } // namespace dtc 1226 1227