1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 David Chisnall 5 * All rights reserved. 6 * 7 * This software was developed by SRI International and the University of 8 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) 9 * ("CTSRD"), as part of the DARPA CRASH research programme. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #ifndef _INPUT_BUFFER_HH_ 34 #define _INPUT_BUFFER_HH_ 35 #include "util.hh" 36 #include <assert.h> 37 #include <stack> 38 #include <string> 39 #include <unordered_set> 40 41 namespace dtc 42 { 43 44 namespace { 45 struct expression; 46 typedef std::unique_ptr<expression> expression_ptr; 47 } 48 49 /** 50 * Class encapsulating the input file. Can be used as a const char*, but has 51 * range checking. Attempting to access anything out of range will return a 0 52 * byte. The input buffer can be cheaply copied, without copying the 53 * underlying memory, however it is the user's responsibility to ensure that 54 * such copies do not persist beyond the lifetime of the underlying memory. 55 * 56 * This also contains methods for reporting errors and for consuming the token 57 * stream. 58 */ 59 class input_buffer 60 { 61 friend class text_input_buffer; 62 protected: 63 /** 64 * The buffer. This class doesn't own the buffer, but the 65 * mmap_input_buffer subclass does. 66 */ 67 const char* buffer; 68 /** 69 * The size of the buffer. 70 */ 71 int size; 72 private: 73 /** 74 * The current place in the buffer where we are reading. This class 75 * keeps a separate size, pointer, and cursor so that we can move 76 * forwards and backwards and still have checks that we haven't fallen 77 * off either end. 78 */ 79 int cursor; 80 /** 81 * Private constructor. This is used to create input buffers that 82 * refer to the same memory, but have different cursors. 83 */ 84 input_buffer(const char* b, int s, int c) : buffer(b), size(s), 85 cursor(c) {} 86 public: 87 /** 88 * Returns the file name associated with this buffer. 89 */ 90 virtual const std::string &filename() const 91 { 92 static std::string s; 93 return s; 94 } 95 static std::unique_ptr<input_buffer> buffer_for_file(const std::string &path, 96 bool warn=true); 97 /** 98 * Skips all characters in the input until the specified character is 99 * encountered. 100 */ 101 void skip_to(char); 102 /** 103 * Parses up to a specified character and returns the intervening 104 * characters as a string. 105 */ 106 std::string parse_to(char); 107 /** 108 * Return whether all input has been consumed. 109 */ 110 bool finished() { return cursor >= size; } 111 /** 112 * Virtual destructor. Does nothing, but exists so that subclasses 113 * that own the memory can run cleanup code for deallocating it. 114 */ 115 virtual ~input_buffer() {}; 116 /** 117 * Constructs an empty buffer. 118 */ 119 input_buffer() : buffer(0), size(0), cursor(0) {} 120 /** 121 * Constructs a new buffer with a specified memory region and size. 122 */ 123 input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){} 124 /** 125 * Returns a new input buffer referring into this input, clamped to the 126 * specified size. If the requested buffer would fall outside the 127 * range of this one, then it returns an empty buffer. 128 * 129 * The returned buffer shares the same underlying storage as the 130 * original. This is intended to be used for splitting up the various 131 * sections of a device tree blob. Requesting a size of 0 will give a 132 * buffer that extends to the end of the available memory. 133 */ 134 input_buffer buffer_from_offset(int offset, int s=0); 135 /** 136 * Dereferencing operator, allows the buffer to be treated as a char* 137 * and dereferenced to give a character. This returns a null byte if 138 * the cursor is out of range. 139 */ 140 inline char operator*() 141 { 142 if (cursor >= size) { return '\0'; } 143 if (cursor < 0) { return '\0'; } 144 return buffer[cursor]; 145 } 146 /** 147 * Array subscripting operator, returns a character at the specified 148 * index offset from the current cursor. The offset may be negative, 149 * to reread characters that have already been read. If the current 150 * cursor plus offset is outside of the range, this returns a nul 151 * byte. 152 */ 153 inline char operator[](int offset) 154 { 155 if (cursor + offset >= size) { return '\0'; } 156 if (cursor + offset < 0) { return '\0'; } 157 return buffer[cursor + offset]; 158 } 159 /** 160 * Increments the cursor, iterating forward in the buffer. 161 */ 162 inline input_buffer &operator++() 163 { 164 cursor++; 165 return *this; 166 } 167 const char *begin() 168 { 169 return buffer; 170 } 171 const char *end() 172 { 173 return buffer + size; 174 } 175 /** 176 * Consumes a character. Moves the cursor one character forward if the 177 * next character matches the argument, returning true. If the current 178 * character does not match the argument, returns false. 179 */ 180 inline bool consume(char c) 181 { 182 if (*(*this) == c) 183 { 184 ++(*this); 185 return true; 186 } 187 return false; 188 } 189 /** 190 * Consumes a string. If the (null-terminated) string passed as the 191 * argument appears in the input, advances the cursor to the end and 192 * returns true. Returns false if the string does not appear at the 193 * current point in the input. 194 */ 195 bool consume(const char *str); 196 /** 197 * Reads an integer in base 8, 10, or 16. Returns true and advances 198 * the cursor to the end of the integer if the cursor points to an 199 * integer, returns false and does not move the cursor otherwise. 200 * 201 * The parsed value is returned via the argument. 202 */ 203 bool consume_integer(unsigned long long &outInt); 204 /** 205 * Reads an arithmetic expression (containing any of the normal C 206 * operators), evaluates it, and returns the result. 207 */ 208 bool consume_integer_expression(unsigned long long &outInt); 209 /** 210 * Consumes two hex digits and return the resulting byte via the first 211 * argument. If the next two characters are hex digits, returns true 212 * and advances the cursor. If not, then returns false and leaves the 213 * cursor in place. 214 */ 215 bool consume_hex_byte(uint8_t &outByte); 216 /** 217 * Template function that consumes a binary value in big-endian format 218 * from the input stream. Returns true and advances the cursor if 219 * there is a value of the correct size. This function assumes that 220 * all values must be natively aligned, and so advances the cursor to 221 * the correct alignment before reading. 222 */ 223 template<typename T> 224 bool consume_binary(T &out) 225 { 226 int align = 0; 227 int type_size = sizeof(T); 228 if (cursor % type_size != 0) 229 { 230 align = type_size - (cursor % type_size); 231 } 232 if (size < cursor + align + type_size) 233 { 234 return false; 235 } 236 cursor += align; 237 assert(cursor % type_size == 0); 238 out = 0; 239 for (int i=0 ; i<type_size ; ++i) 240 { 241 if (size < cursor) 242 { 243 return false; 244 } 245 out <<= 8; 246 out |= (((T)buffer[cursor++]) & 0xff); 247 } 248 return true; 249 } 250 #ifndef NDEBUG 251 /** 252 * Dumps the current cursor value and the unconsumed values in the 253 * input buffer to the standard error. This method is intended solely 254 * for debugging. 255 */ 256 void dump(); 257 #endif 258 }; 259 /** 260 * Explicit specialisation for reading a single byte. 261 */ 262 template<> 263 inline bool input_buffer::consume_binary(uint8_t &out) 264 { 265 if (size < cursor + 1) 266 { 267 return false; 268 } 269 out = buffer[cursor++]; 270 return true; 271 } 272 273 /** 274 * An input buffer subclass used for parsing DTS files. This manages a stack 275 * of input buffers to handle /input/ operations. 276 */ 277 class text_input_buffer 278 { 279 std::unordered_set<std::string> defines; 280 /** 281 * The cursor is the input into the input stream where we are currently reading. 282 */ 283 int cursor = 0; 284 /** 285 * The current stack of includes. The current input is always from the top 286 * of the stack. 287 */ 288 std::stack<std::shared_ptr<input_buffer>> input_stack; 289 /** 290 * 291 */ 292 const std::vector<std::string> include_paths; 293 /** 294 * Reads forward past any spaces. The DTS format is not whitespace 295 * sensitive and so we want to scan past whitespace when reading it. 296 */ 297 void skip_spaces(); 298 /** 299 * Returns the character immediately after the current one. 300 * 301 * This method does not look between files. 302 */ 303 char peek(); 304 /** 305 * If a /include/ token is encountered, then look up the corresponding 306 * input file, push it onto the input stack, and continue. 307 */ 308 void handle_include(); 309 /** 310 * The base directory for this file. 311 */ 312 const std::string dir; 313 /** 314 * The file where dependencies should be output. 315 */ 316 FILE *depfile; 317 public: 318 /** 319 * Construct a new text input buffer with the specified buffer as the start 320 * of parsing and the specified set of input paths for handling new 321 * inclusions. 322 */ 323 text_input_buffer(std::unique_ptr<input_buffer> &&b, 324 std::unordered_set<std::string> &&d, 325 std::vector<std::string> &&i, 326 const std::string directory, 327 FILE *deps) 328 : defines(d), include_paths(i), dir(directory), depfile(deps) 329 { 330 input_stack.push(std::move(b)); 331 } 332 /** 333 * Skips all characters in the input until the specified character is 334 * encountered. 335 */ 336 void skip_to(char); 337 /** 338 * Parse an expression. If `stopAtParen` is set, then only parse a number 339 * or a parenthetical expression, otherwise assume that either is the 340 * left-hand side of a binary expression and try to parse the right-hand 341 * side. 342 */ 343 expression_ptr parse_expression(bool stopAtParen=false); 344 /** 345 * Parse a binary expression, having already parsed the right-hand side. 346 */ 347 expression_ptr parse_binary_expression(expression_ptr lhs); 348 /** 349 * Return whether all input has been consumed. 350 */ 351 bool finished() 352 { 353 return input_stack.empty() || 354 ((input_stack.size() == 1) && input_stack.top()->finished()); 355 } 356 /** 357 * Dereferencing operator. Returns the current character in the top input buffer. 358 */ 359 inline char operator*() 360 { 361 if (input_stack.empty()) 362 { 363 return 0; 364 } 365 return *(*input_stack.top()); 366 } 367 /** 368 * Increments the cursor, iterating forward in the buffer. 369 */ 370 inline text_input_buffer &operator++() 371 { 372 if (input_stack.empty()) 373 { 374 return *this; 375 } 376 cursor++; 377 auto &top = *input_stack.top(); 378 ++top; 379 if (top.finished()) 380 { 381 input_stack.pop(); 382 } 383 return *this; 384 } 385 /** 386 * Consumes a character. Moves the cursor one character forward if the 387 * next character matches the argument, returning true. If the current 388 * character does not match the argument, returns false. 389 */ 390 inline bool consume(char c) 391 { 392 if (*(*this) == c) 393 { 394 ++(*this); 395 return true; 396 } 397 return false; 398 } 399 /** 400 * Consumes a string. If the (null-terminated) string passed as the 401 * argument appears in the input, advances the cursor to the end and 402 * returns true. Returns false if the string does not appear at the 403 * current point in the input. 404 * 405 * This method does not scan between files. 406 */ 407 bool consume(const char *str) 408 { 409 if (input_stack.empty()) 410 { 411 return false; 412 } 413 return input_stack.top()->consume(str); 414 } 415 /** 416 * Reads an integer in base 8, 10, or 16. Returns true and advances 417 * the cursor to the end of the integer if the cursor points to an 418 * integer, returns false and does not move the cursor otherwise. 419 * 420 * The parsed value is returned via the argument. 421 * 422 * This method does not scan between files. 423 */ 424 bool consume_integer(unsigned long long &outInt) 425 { 426 if (input_stack.empty()) 427 { 428 return false; 429 } 430 return input_stack.top()->consume_integer(outInt); 431 } 432 /** 433 * Reads an arithmetic expression (containing any of the normal C 434 * operators), evaluates it, and returns the result. 435 */ 436 bool consume_integer_expression(unsigned long long &outInt); 437 /** 438 * Consumes two hex digits and return the resulting byte via the first 439 * argument. If the next two characters are hex digits, returns true 440 * and advances the cursor. If not, then returns false and leaves the 441 * cursor in place. 442 * 443 * This method does not scan between files. 444 */ 445 bool consume_hex_byte(uint8_t &outByte) 446 { 447 if (input_stack.empty()) 448 { 449 return false; 450 } 451 return input_stack.top()->consume_hex_byte(outByte); 452 } 453 /** 454 * Returns the longest string in the input buffer starting at the 455 * current cursor and composed entirely of characters that are valid in 456 * node names. 457 */ 458 std::string parse_node_name(); 459 /** 460 * Returns the longest string in the input buffer starting at the 461 * current cursor and composed entirely of characters that are valid in 462 * property names. 463 */ 464 std::string parse_property_name(); 465 /** 466 * Parses either a node or a property name. If is_property is true on 467 * entry, then only property names are parsed. If it is false, then it 468 * will be set, on return, to indicate whether the parsed name is only 469 * valid as a property. 470 */ 471 std::string parse_node_or_property_name(bool &is_property); 472 /** 473 * Parses up to a specified character and returns the intervening 474 * characters as a string. 475 */ 476 std::string parse_to(char); 477 /** 478 * Advances the cursor to the start of the next token, skipping 479 * comments and whitespace. If the cursor already points to the start 480 * of a token, then this function does nothing. 481 */ 482 text_input_buffer &next_token(); 483 /** 484 * Location in the source file. This should never be interpreted by 485 * anything other than error reporting functions of this class. It will 486 * eventually become something more complex than an `int`. 487 */ 488 class source_location 489 { 490 friend class text_input_buffer; 491 /** 492 * The text buffer object that included `b`. 493 */ 494 text_input_buffer &buffer; 495 /** 496 * The underlying buffer that contains this location. 497 */ 498 std::shared_ptr<input_buffer> b; 499 /** 500 * The offset within the current buffer of the source location. 501 */ 502 int cursor; 503 source_location(text_input_buffer &buf) 504 : buffer(buf), 505 b(buf.input_stack.empty() ? nullptr : buf.input_stack.top()), 506 cursor(b ? b->cursor : 0) {} 507 public: 508 /** 509 * Report an error at this location. 510 */ 511 void report_error(const char *msg) 512 { 513 if (b) 514 { 515 buffer.parse_error(msg, *b, cursor); 516 } 517 else 518 { 519 buffer.parse_error(msg); 520 } 521 } 522 }; 523 /** 524 * Returns the current source location. 525 */ 526 source_location location() 527 { 528 return { *this }; 529 } 530 /** 531 * Prints a message indicating the location of a parse error. 532 */ 533 void parse_error(const char *msg); 534 /** 535 * Reads the contents of a binary file into `b`. The file name is assumed 536 * to be relative to one of the include paths. 537 * 538 * Returns true if the file exists and can be read, false otherwise. 539 */ 540 bool read_binary_file(const std::string &filename, byte_buffer &b); 541 private: 542 /** 543 * Prints a message indicating the location of a parse error, given a 544 * specified location. This is used when input has already moved beyond 545 * the location that caused the failure. 546 */ 547 void parse_error(const char *msg, input_buffer &b, int loc); 548 }; 549 550 } // namespace dtc 551 552 #endif // !_INPUT_BUFFER_HH_ 553