1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 David Chisnall 5 * All rights reserved. 6 * 7 * This software was developed by SRI International and the University of 8 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) 9 * ("CTSRD"), as part of the DARPA CRASH research programme. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #ifndef _INPUT_BUFFER_HH_ 34 #define _INPUT_BUFFER_HH_ 35 #include "util.hh" 36 #include <assert.h> 37 #include <stack> 38 #include <string> 39 #include <unordered_set> 40 41 namespace dtc 42 { 43 44 namespace { 45 struct expression; 46 typedef std::unique_ptr<expression> expression_ptr; 47 } 48 49 /** 50 * Class encapsulating the input file. Can be used as a const char*, but has 51 * range checking. Attempting to access anything out of range will return a 0 52 * byte. The input buffer can be cheaply copied, without copying the 53 * underlying memory, however it is the user's responsibility to ensure that 54 * such copies do not persist beyond the lifetime of the underlying memory. 55 * 56 * This also contains methods for reporting errors and for consuming the token 57 * stream. 58 */ 59 class input_buffer 60 { 61 friend class text_input_buffer; 62 protected: 63 /** 64 * The buffer. This class doesn't own the buffer, but the 65 * mmap_input_buffer subclass does. 66 */ 67 const char* buffer; 68 /** 69 * The size of the buffer. 70 */ 71 int size; 72 private: 73 /** 74 * The current place in the buffer where we are reading. This class 75 * keeps a separate size, pointer, and cursor so that we can move 76 * forwards and backwards and still have checks that we haven't fallen 77 * off either end. 78 */ 79 int cursor; 80 /** 81 * Private constructor. This is used to create input buffers that 82 * refer to the same memory, but have different cursors. 83 */ 84 input_buffer(const char* b, int s, int c) : buffer(b), size(s), 85 cursor(c) {} 86 public: 87 /** 88 * Returns the file name associated with this buffer. 89 */ 90 virtual const std::string &filename() const 91 { 92 static std::string s; 93 return s; 94 } 95 static std::unique_ptr<input_buffer> buffer_for_file(const std::string &path, 96 bool warn=true); 97 /** 98 * Skips all characters in the input until the specified character is 99 * encountered. 100 */ 101 void skip_to(char); 102 /** 103 * Parses up to a specified character and returns the intervening 104 * characters as a string. 105 */ 106 std::string parse_to(char); 107 /** 108 * Return whether all input has been consumed. 109 */ 110 bool finished() { return cursor >= size; } 111 /** 112 * Virtual destructor. Does nothing, but exists so that subclasses 113 * that own the memory can run cleanup code for deallocating it. 114 */ 115 virtual ~input_buffer() {}; 116 /** 117 * Constructs an empty buffer. 118 */ 119 input_buffer() : buffer(0), size(0), cursor(0) {} 120 /** 121 * Constructs a new buffer with a specified memory region and size. 122 */ 123 input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){} 124 /** 125 * Returns a new input buffer referring into this input, clamped to the 126 * specified size. If the requested buffer would fall outside the 127 * range of this one, then it returns an empty buffer. 128 * 129 * The returned buffer shares the same underlying storage as the 130 * original. This is intended to be used for splitting up the various 131 * sections of a device tree blob. Requesting a size of 0 will give a 132 * buffer that extends to the end of the available memory. 133 */ 134 input_buffer buffer_from_offset(int offset, int s=0); 135 /** 136 * Dereferencing operator, allows the buffer to be treated as a char* 137 * and dereferenced to give a character. This returns a null byte if 138 * the cursor is out of range. 139 */ 140 inline char operator*() 141 { 142 if (cursor >= size) { return '\0'; } 143 if (cursor < 0) { return '\0'; } 144 return buffer[cursor]; 145 } 146 /** 147 * Array subscripting operator, returns a character at the specified 148 * index offset from the current cursor. The offset may be negative, 149 * to reread characters that have already been read. If the current 150 * cursor plus offset is outside of the range, this returns a nul 151 * byte. 152 */ 153 inline char operator[](int offset) 154 { 155 if (cursor + offset >= size) { return '\0'; } 156 if (cursor + offset < 0) { return '\0'; } 157 return buffer[cursor + offset]; 158 } 159 /** 160 * Increments the cursor, iterating forward in the buffer. 161 */ 162 inline input_buffer &operator++() 163 { 164 cursor++; 165 return *this; 166 } 167 const char *begin() 168 { 169 return buffer; 170 } 171 const char *end() 172 { 173 return buffer + size; 174 } 175 /** 176 * Consumes a character. Moves the cursor one character forward if the 177 * next character matches the argument, returning true. If the current 178 * character does not match the argument, returns false. 179 */ 180 inline bool consume(char c) 181 { 182 if (*(*this) == c) 183 { 184 ++(*this); 185 return true; 186 } 187 return false; 188 } 189 /** 190 * Consumes a string. If the (null-terminated) string passed as the 191 * argument appears in the input, advances the cursor to the end and 192 * returns true. Returns false if the string does not appear at the 193 * current point in the input. 194 */ 195 bool consume(const char *str); 196 /** 197 * Reads unsigned from char literal. Returns true and advances 198 * the cursor to next char. 199 * 200 * The parsed value is returned via the argument. 201 */ 202 bool consume_char_literal(unsigned long long &outInt); 203 /** 204 * Reads an integer in base 8, 10, or 16. Returns true and advances 205 * the cursor to the end of the integer if the cursor points to an 206 * integer, returns false and does not move the cursor otherwise. 207 * 208 * The parsed value is returned via the argument. 209 */ 210 bool consume_integer(unsigned long long &outInt); 211 /** 212 * Reads an arithmetic expression (containing any of the normal C 213 * operators), evaluates it, and returns the result. 214 */ 215 bool consume_integer_expression(unsigned long long &outInt); 216 /** 217 * Consumes two hex digits and return the resulting byte via the first 218 * argument. If the next two characters are hex digits, returns true 219 * and advances the cursor. If not, then returns false and leaves the 220 * cursor in place. 221 */ 222 bool consume_hex_byte(uint8_t &outByte); 223 /** 224 * Template function that consumes a binary value in big-endian format 225 * from the input stream. Returns true and advances the cursor if 226 * there is a value of the correct size. This function assumes that 227 * all values must be natively aligned, and so advances the cursor to 228 * the correct alignment before reading. 229 */ 230 template<typename T> 231 bool consume_binary(T &out) 232 { 233 int align = 0; 234 int type_size = sizeof(T); 235 if (cursor % type_size != 0) 236 { 237 align = type_size - (cursor % type_size); 238 } 239 if (size < cursor + align + type_size) 240 { 241 return false; 242 } 243 cursor += align; 244 assert(cursor % type_size == 0); 245 out = 0; 246 for (int i=0 ; i<type_size ; ++i) 247 { 248 if (size < cursor) 249 { 250 return false; 251 } 252 out <<= 8; 253 out |= (((T)buffer[cursor++]) & 0xff); 254 } 255 return true; 256 } 257 #ifndef NDEBUG 258 /** 259 * Dumps the current cursor value and the unconsumed values in the 260 * input buffer to the standard error. This method is intended solely 261 * for debugging. 262 */ 263 void dump(); 264 #endif 265 }; 266 /** 267 * Explicit specialisation for reading a single byte. 268 */ 269 template<> 270 inline bool input_buffer::consume_binary(uint8_t &out) 271 { 272 if (size < cursor + 1) 273 { 274 return false; 275 } 276 out = buffer[cursor++]; 277 return true; 278 } 279 280 /** 281 * An input buffer subclass used for parsing DTS files. This manages a stack 282 * of input buffers to handle /input/ operations. 283 */ 284 class text_input_buffer 285 { 286 std::unordered_set<std::string> defines; 287 /** 288 * The cursor is the input into the input stream where we are currently reading. 289 */ 290 int cursor = 0; 291 /** 292 * The current stack of includes. The current input is always from the top 293 * of the stack. 294 */ 295 std::stack<std::shared_ptr<input_buffer>> input_stack; 296 /** 297 * 298 */ 299 const std::vector<std::string> include_paths; 300 /** 301 * Reads forward past any spaces. The DTS format is not whitespace 302 * sensitive and so we want to scan past whitespace when reading it. 303 */ 304 void skip_spaces(); 305 /** 306 * Returns the character immediately after the current one. 307 * 308 * This method does not look between files. 309 */ 310 char peek(); 311 /** 312 * If a /include/ token is encountered, then look up the corresponding 313 * input file, push it onto the input stack, and continue. 314 */ 315 void handle_include(); 316 /** 317 * The base directory for this file. 318 */ 319 const std::string dir; 320 /** 321 * The file where dependencies should be output. 322 */ 323 FILE *depfile; 324 public: 325 /** 326 * Construct a new text input buffer with the specified buffer as the start 327 * of parsing and the specified set of input paths for handling new 328 * inclusions. 329 */ 330 text_input_buffer(std::unique_ptr<input_buffer> &&b, 331 std::unordered_set<std::string> &&d, 332 std::vector<std::string> &&i, 333 const std::string directory, 334 FILE *deps) 335 : defines(d), include_paths(i), dir(directory), depfile(deps) 336 { 337 input_stack.push(std::move(b)); 338 } 339 /** 340 * Skips all characters in the input until the specified character is 341 * encountered. 342 */ 343 void skip_to(char); 344 /** 345 * Parse an expression. If `stopAtParen` is set, then only parse a number 346 * or a parenthetical expression, otherwise assume that either is the 347 * left-hand side of a binary expression and try to parse the right-hand 348 * side. 349 */ 350 expression_ptr parse_expression(bool stopAtParen=false); 351 /** 352 * Parse a binary expression, having already parsed the right-hand side. 353 */ 354 expression_ptr parse_binary_expression(expression_ptr lhs); 355 /** 356 * Return whether all input has been consumed. 357 */ 358 bool finished() 359 { 360 return input_stack.empty() || 361 ((input_stack.size() == 1) && input_stack.top()->finished()); 362 } 363 /** 364 * Dereferencing operator. Returns the current character in the top input buffer. 365 */ 366 inline char operator*() 367 { 368 if (input_stack.empty()) 369 { 370 return 0; 371 } 372 return *(*input_stack.top()); 373 } 374 /** 375 * Increments the cursor, iterating forward in the buffer. 376 */ 377 inline text_input_buffer &operator++() 378 { 379 if (input_stack.empty()) 380 { 381 return *this; 382 } 383 cursor++; 384 auto &top = *input_stack.top(); 385 ++top; 386 if (top.finished()) 387 { 388 input_stack.pop(); 389 } 390 return *this; 391 } 392 /** 393 * Consumes a character. Moves the cursor one character forward if the 394 * next character matches the argument, returning true. If the current 395 * character does not match the argument, returns false. 396 */ 397 inline bool consume(char c) 398 { 399 if (*(*this) == c) 400 { 401 ++(*this); 402 return true; 403 } 404 return false; 405 } 406 /** 407 * Consumes a string. If the (null-terminated) string passed as the 408 * argument appears in the input, advances the cursor to the end and 409 * returns true. Returns false if the string does not appear at the 410 * current point in the input. 411 * 412 * This method does not scan between files. 413 */ 414 bool consume(const char *str) 415 { 416 if (input_stack.empty()) 417 { 418 return false; 419 } 420 return input_stack.top()->consume(str); 421 } 422 /** 423 * Converts next char into unsigned 424 * 425 * The parsed value is returned via the argument. 426 * 427 * This method does not scan between files. 428 */ 429 bool consume_char_literal(unsigned long long &outInt) 430 { 431 if (input_stack.empty()) 432 { 433 return false; 434 } 435 return input_stack.top()->consume_char_literal(outInt); 436 } 437 /** 438 * Reads an integer in base 8, 10, or 16. Returns true and advances 439 * the cursor to the end of the integer if the cursor points to an 440 * integer, returns false and does not move the cursor otherwise. 441 * 442 * The parsed value is returned via the argument. 443 * 444 * This method does not scan between files. 445 */ 446 bool consume_integer(unsigned long long &outInt) 447 { 448 if (input_stack.empty()) 449 { 450 return false; 451 } 452 return input_stack.top()->consume_integer(outInt); 453 } 454 /** 455 * Reads an arithmetic expression (containing any of the normal C 456 * operators), evaluates it, and returns the result. 457 */ 458 bool consume_integer_expression(unsigned long long &outInt); 459 /** 460 * Consumes two hex digits and return the resulting byte via the first 461 * argument. If the next two characters are hex digits, returns true 462 * and advances the cursor. If not, then returns false and leaves the 463 * cursor in place. 464 * 465 * This method does not scan between files. 466 */ 467 bool consume_hex_byte(uint8_t &outByte) 468 { 469 if (input_stack.empty()) 470 { 471 return false; 472 } 473 return input_stack.top()->consume_hex_byte(outByte); 474 } 475 /** 476 * Returns the longest string in the input buffer starting at the 477 * current cursor and composed entirely of characters that are valid in 478 * node names. 479 */ 480 std::string parse_node_name(); 481 /** 482 * Returns the longest string in the input buffer starting at the 483 * current cursor and composed entirely of characters that are valid in 484 * property names. 485 */ 486 std::string parse_property_name(); 487 /** 488 * Parses either a node or a property name. If is_property is true on 489 * entry, then only property names are parsed. If it is false, then it 490 * will be set, on return, to indicate whether the parsed name is only 491 * valid as a property. 492 */ 493 std::string parse_node_or_property_name(bool &is_property); 494 /** 495 * Parses up to a specified character and returns the intervening 496 * characters as a string. 497 */ 498 std::string parse_to(char); 499 /** 500 * Advances the cursor to the start of the next token, skipping 501 * comments and whitespace. If the cursor already points to the start 502 * of a token, then this function does nothing. 503 */ 504 text_input_buffer &next_token(); 505 /** 506 * Location in the source file. This should never be interpreted by 507 * anything other than error reporting functions of this class. It will 508 * eventually become something more complex than an `int`. 509 */ 510 class source_location 511 { 512 friend class text_input_buffer; 513 /** 514 * The text buffer object that included `b`. 515 */ 516 text_input_buffer &buffer; 517 /** 518 * The underlying buffer that contains this location. 519 */ 520 std::shared_ptr<input_buffer> b; 521 /** 522 * The offset within the current buffer of the source location. 523 */ 524 int cursor; 525 source_location(text_input_buffer &buf) 526 : buffer(buf), 527 b(buf.input_stack.empty() ? nullptr : buf.input_stack.top()), 528 cursor(b ? b->cursor : 0) {} 529 public: 530 /** 531 * Report an error at this location. 532 */ 533 void report_error(const char *msg) 534 { 535 if (b) 536 { 537 buffer.parse_error(msg, *b, cursor); 538 } 539 else 540 { 541 buffer.parse_error(msg); 542 } 543 } 544 }; 545 /** 546 * Returns the current source location. 547 */ 548 source_location location() 549 { 550 return { *this }; 551 } 552 /** 553 * Prints a message indicating the location of a parse error. 554 */ 555 void parse_error(const char *msg); 556 /** 557 * Reads the contents of a binary file into `b`. The file name is assumed 558 * to be relative to one of the include paths. 559 * 560 * Returns true if the file exists and can be read, false otherwise. 561 */ 562 bool read_binary_file(const std::string &filename, byte_buffer &b); 563 private: 564 /** 565 * Prints a message indicating the location of a parse error, given a 566 * specified location. This is used when input has already moved beyond 567 * the location that caused the failure. 568 */ 569 void parse_error(const char *msg, input_buffer &b, int loc); 570 }; 571 572 } // namespace dtc 573 574 #endif // !_INPUT_BUFFER_HH_ 575