1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2013 David Chisnall 5 * All rights reserved. 6 * 7 * This software was developed by SRI International and the University of 8 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) 9 * ("CTSRD"), as part of the DARPA CRASH research programme. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * $FreeBSD$ 33 */ 34 35 #ifndef _INPUT_BUFFER_HH_ 36 #define _INPUT_BUFFER_HH_ 37 #include "util.hh" 38 #include <assert.h> 39 #include <stack> 40 #include <string> 41 #include <unordered_set> 42 43 namespace dtc 44 { 45 46 namespace { 47 struct expression; 48 typedef std::unique_ptr<expression> expression_ptr; 49 } 50 51 /** 52 * Class encapsulating the input file. Can be used as a const char*, but has 53 * range checking. Attempting to access anything out of range will return a 0 54 * byte. The input buffer can be cheaply copied, without copying the 55 * underlying memory, however it is the user's responsibility to ensure that 56 * such copies do not persist beyond the lifetime of the underlying memory. 57 * 58 * This also contains methods for reporting errors and for consuming the token 59 * stream. 60 */ 61 class input_buffer 62 { 63 friend class text_input_buffer; 64 protected: 65 /** 66 * The buffer. This class doesn't own the buffer, but the 67 * mmap_input_buffer subclass does. 68 */ 69 const char* buffer; 70 /** 71 * The size of the buffer. 72 */ 73 int size; 74 private: 75 /** 76 * The current place in the buffer where we are reading. This class 77 * keeps a separate size, pointer, and cursor so that we can move 78 * forwards and backwards and still have checks that we haven't fallen 79 * off either end. 80 */ 81 int cursor; 82 /** 83 * Private constructor. This is used to create input buffers that 84 * refer to the same memory, but have different cursors. 85 */ 86 input_buffer(const char* b, int s, int c) : buffer(b), size(s), 87 cursor(c) {} 88 public: 89 /** 90 * Returns the file name associated with this buffer. 91 */ 92 virtual const std::string &filename() const 93 { 94 static std::string s; 95 return s; 96 } 97 static std::unique_ptr<input_buffer> buffer_for_file(const std::string &path, 98 bool warn=true); 99 /** 100 * Skips all characters in the input until the specified character is 101 * encountered. 102 */ 103 void skip_to(char); 104 /** 105 * Parses up to a specified character and returns the intervening 106 * characters as a string. 107 */ 108 std::string parse_to(char); 109 /** 110 * Return whether all input has been consumed. 111 */ 112 bool finished() { return cursor >= size; } 113 /** 114 * Virtual destructor. Does nothing, but exists so that subclasses 115 * that own the memory can run cleanup code for deallocating it. 116 */ 117 virtual ~input_buffer() {}; 118 /** 119 * Constructs an empty buffer. 120 */ 121 input_buffer() : buffer(0), size(0), cursor(0) {} 122 /** 123 * Constructs a new buffer with a specified memory region and size. 124 */ 125 input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){} 126 /** 127 * Returns a new input buffer referring into this input, clamped to the 128 * specified size. If the requested buffer would fall outside the 129 * range of this one, then it returns an empty buffer. 130 * 131 * The returned buffer shares the same underlying storage as the 132 * original. This is intended to be used for splitting up the various 133 * sections of a device tree blob. Requesting a size of 0 will give a 134 * buffer that extends to the end of the available memory. 135 */ 136 input_buffer buffer_from_offset(int offset, int s=0); 137 /** 138 * Dereferencing operator, allows the buffer to be treated as a char* 139 * and dereferenced to give a character. This returns a null byte if 140 * the cursor is out of range. 141 */ 142 inline char operator*() 143 { 144 if (cursor >= size) { return '\0'; } 145 if (cursor < 0) { return '\0'; } 146 return buffer[cursor]; 147 } 148 /** 149 * Array subscripting operator, returns a character at the specified 150 * index offset from the current cursor. The offset may be negative, 151 * to reread characters that have already been read. If the current 152 * cursor plus offset is outside of the range, this returns a nul 153 * byte. 154 */ 155 inline char operator[](int offset) 156 { 157 if (cursor + offset >= size) { return '\0'; } 158 if (cursor + offset < 0) { return '\0'; } 159 return buffer[cursor + offset]; 160 } 161 /** 162 * Increments the cursor, iterating forward in the buffer. 163 */ 164 inline input_buffer &operator++() 165 { 166 cursor++; 167 return *this; 168 } 169 const char *begin() 170 { 171 return buffer; 172 } 173 const char *end() 174 { 175 return buffer + size; 176 } 177 /** 178 * Consumes a character. Moves the cursor one character forward if the 179 * next character matches the argument, returning true. If the current 180 * character does not match the argument, returns false. 181 */ 182 inline bool consume(char c) 183 { 184 if (*(*this) == c) 185 { 186 ++(*this); 187 return true; 188 } 189 return false; 190 } 191 /** 192 * Consumes a string. If the (null-terminated) string passed as the 193 * argument appears in the input, advances the cursor to the end and 194 * returns true. Returns false if the string does not appear at the 195 * current point in the input. 196 */ 197 bool consume(const char *str); 198 /** 199 * Reads an integer in base 8, 10, or 16. Returns true and advances 200 * the cursor to the end of the integer if the cursor points to an 201 * integer, returns false and does not move the cursor otherwise. 202 * 203 * The parsed value is returned via the argument. 204 */ 205 bool consume_integer(unsigned long long &outInt); 206 /** 207 * Reads an arithmetic expression (containing any of the normal C 208 * operators), evaluates it, and returns the result. 209 */ 210 bool consume_integer_expression(unsigned long long &outInt); 211 /** 212 * Consumes two hex digits and return the resulting byte via the first 213 * argument. If the next two characters are hex digits, returns true 214 * and advances the cursor. If not, then returns false and leaves the 215 * cursor in place. 216 */ 217 bool consume_hex_byte(uint8_t &outByte); 218 /** 219 * Template function that consumes a binary value in big-endian format 220 * from the input stream. Returns true and advances the cursor if 221 * there is a value of the correct size. This function assumes that 222 * all values must be natively aligned, and so advances the cursor to 223 * the correct alignment before reading. 224 */ 225 template<typename T> 226 bool consume_binary(T &out) 227 { 228 int align = 0; 229 int type_size = sizeof(T); 230 if (cursor % type_size != 0) 231 { 232 align = type_size - (cursor % type_size); 233 } 234 if (size < cursor + align + type_size) 235 { 236 return false; 237 } 238 cursor += align; 239 assert(cursor % type_size == 0); 240 out = 0; 241 for (int i=0 ; i<type_size ; ++i) 242 { 243 if (size < cursor) 244 { 245 return false; 246 } 247 out <<= 8; 248 out |= (((T)buffer[cursor++]) & 0xff); 249 } 250 return true; 251 } 252 #ifndef NDEBUG 253 /** 254 * Dumps the current cursor value and the unconsumed values in the 255 * input buffer to the standard error. This method is intended solely 256 * for debugging. 257 */ 258 void dump(); 259 #endif 260 }; 261 /** 262 * Explicit specialisation for reading a single byte. 263 */ 264 template<> 265 inline bool input_buffer::consume_binary(uint8_t &out) 266 { 267 if (size < cursor + 1) 268 { 269 return false; 270 } 271 out = buffer[cursor++]; 272 return true; 273 } 274 275 /** 276 * An input buffer subclass used for parsing DTS files. This manages a stack 277 * of input buffers to handle /input/ operations. 278 */ 279 class text_input_buffer 280 { 281 std::unordered_set<std::string> defines; 282 /** 283 * The cursor is the input into the input stream where we are currently reading. 284 */ 285 int cursor = 0; 286 /** 287 * The current stack of includes. The current input is always from the top 288 * of the stack. 289 */ 290 std::stack<std::shared_ptr<input_buffer>> input_stack; 291 /** 292 * 293 */ 294 const std::vector<std::string> include_paths; 295 /** 296 * Reads forward past any spaces. The DTS format is not whitespace 297 * sensitive and so we want to scan past whitespace when reading it. 298 */ 299 void skip_spaces(); 300 /** 301 * Returns the character immediately after the current one. 302 * 303 * This method does not look between files. 304 */ 305 char peek(); 306 /** 307 * If a /include/ token is encountered, then look up the corresponding 308 * input file, push it onto the input stack, and continue. 309 */ 310 void handle_include(); 311 /** 312 * The base directory for this file. 313 */ 314 const std::string dir; 315 /** 316 * The file where dependencies should be output. 317 */ 318 FILE *depfile; 319 public: 320 /** 321 * Construct a new text input buffer with the specified buffer as the start 322 * of parsing and the specified set of input paths for handling new 323 * inclusions. 324 */ 325 text_input_buffer(std::unique_ptr<input_buffer> &&b, 326 std::unordered_set<std::string> &&d, 327 std::vector<std::string> &&i, 328 const std::string directory, 329 FILE *deps) 330 : defines(d), include_paths(i), dir(directory), depfile(deps) 331 { 332 input_stack.push(std::move(b)); 333 } 334 /** 335 * Skips all characters in the input until the specified character is 336 * encountered. 337 */ 338 void skip_to(char); 339 /** 340 * Parse an expression. If `stopAtParen` is set, then only parse a number 341 * or a parenthetical expression, otherwise assume that either is the 342 * left-hand side of a binary expression and try to parse the right-hand 343 * side. 344 */ 345 expression_ptr parse_expression(bool stopAtParen=false); 346 /** 347 * Parse a binary expression, having already parsed the right-hand side. 348 */ 349 expression_ptr parse_binary_expression(expression_ptr lhs); 350 /** 351 * Return whether all input has been consumed. 352 */ 353 bool finished() 354 { 355 return input_stack.empty() || 356 ((input_stack.size() == 1) && input_stack.top()->finished()); 357 } 358 /** 359 * Dereferencing operator. Returns the current character in the top input buffer. 360 */ 361 inline char operator*() 362 { 363 if (input_stack.empty()) 364 { 365 return 0; 366 } 367 return *(*input_stack.top()); 368 } 369 /** 370 * Increments the cursor, iterating forward in the buffer. 371 */ 372 inline text_input_buffer &operator++() 373 { 374 if (input_stack.empty()) 375 { 376 return *this; 377 } 378 cursor++; 379 auto &top = *input_stack.top(); 380 ++top; 381 if (top.finished()) 382 { 383 input_stack.pop(); 384 } 385 return *this; 386 } 387 /** 388 * Consumes a character. Moves the cursor one character forward if the 389 * next character matches the argument, returning true. If the current 390 * character does not match the argument, returns false. 391 */ 392 inline bool consume(char c) 393 { 394 if (*(*this) == c) 395 { 396 ++(*this); 397 return true; 398 } 399 return false; 400 } 401 /** 402 * Consumes a string. If the (null-terminated) string passed as the 403 * argument appears in the input, advances the cursor to the end and 404 * returns true. Returns false if the string does not appear at the 405 * current point in the input. 406 * 407 * This method does not scan between files. 408 */ 409 bool consume(const char *str) 410 { 411 if (input_stack.empty()) 412 { 413 return false; 414 } 415 return input_stack.top()->consume(str); 416 } 417 /** 418 * Reads an integer in base 8, 10, or 16. Returns true and advances 419 * the cursor to the end of the integer if the cursor points to an 420 * integer, returns false and does not move the cursor otherwise. 421 * 422 * The parsed value is returned via the argument. 423 * 424 * This method does not scan between files. 425 */ 426 bool consume_integer(unsigned long long &outInt) 427 { 428 if (input_stack.empty()) 429 { 430 return false; 431 } 432 return input_stack.top()->consume_integer(outInt); 433 } 434 /** 435 * Reads an arithmetic expression (containing any of the normal C 436 * operators), evaluates it, and returns the result. 437 */ 438 bool consume_integer_expression(unsigned long long &outInt); 439 /** 440 * Consumes two hex digits and return the resulting byte via the first 441 * argument. If the next two characters are hex digits, returns true 442 * and advances the cursor. If not, then returns false and leaves the 443 * cursor in place. 444 * 445 * This method does not scan between files. 446 */ 447 bool consume_hex_byte(uint8_t &outByte) 448 { 449 if (input_stack.empty()) 450 { 451 return false; 452 } 453 return input_stack.top()->consume_hex_byte(outByte); 454 } 455 /** 456 * Returns the longest string in the input buffer starting at the 457 * current cursor and composed entirely of characters that are valid in 458 * node names. 459 */ 460 std::string parse_node_name(); 461 /** 462 * Returns the longest string in the input buffer starting at the 463 * current cursor and composed entirely of characters that are valid in 464 * property names. 465 */ 466 std::string parse_property_name(); 467 /** 468 * Parses either a node or a property name. If is_property is true on 469 * entry, then only property names are parsed. If it is false, then it 470 * will be set, on return, to indicate whether the parsed name is only 471 * valid as a property. 472 */ 473 std::string parse_node_or_property_name(bool &is_property); 474 /** 475 * Parses up to a specified character and returns the intervening 476 * characters as a string. 477 */ 478 std::string parse_to(char); 479 /** 480 * Advances the cursor to the start of the next token, skipping 481 * comments and whitespace. If the cursor already points to the start 482 * of a token, then this function does nothing. 483 */ 484 text_input_buffer &next_token(); 485 /** 486 * Location in the source file. This should never be interpreted by 487 * anything other than error reporting functions of this class. It will 488 * eventually become something more complex than an `int`. 489 */ 490 class source_location 491 { 492 friend class text_input_buffer; 493 /** 494 * The text buffer object that included `b`. 495 */ 496 text_input_buffer &buffer; 497 /** 498 * The underlying buffer that contains this location. 499 */ 500 std::shared_ptr<input_buffer> b; 501 /** 502 * The offset within the current buffer of the source location. 503 */ 504 int cursor; 505 source_location(text_input_buffer &buf) 506 : buffer(buf), 507 b(buf.input_stack.empty() ? nullptr : buf.input_stack.top()), 508 cursor(b ? b->cursor : 0) {} 509 public: 510 /** 511 * Report an error at this location. 512 */ 513 void report_error(const char *msg) 514 { 515 if (b) 516 { 517 buffer.parse_error(msg, *b, cursor); 518 } 519 else 520 { 521 buffer.parse_error(msg); 522 } 523 } 524 }; 525 /** 526 * Returns the current source location. 527 */ 528 source_location location() 529 { 530 return { *this }; 531 } 532 /** 533 * Prints a message indicating the location of a parse error. 534 */ 535 void parse_error(const char *msg); 536 /** 537 * Reads the contents of a binary file into `b`. The file name is assumed 538 * to be relative to one of the include paths. 539 * 540 * Returns true if the file exists and can be read, false otherwise. 541 */ 542 bool read_binary_file(const std::string &filename, byte_buffer &b); 543 private: 544 /** 545 * Prints a message indicating the location of a parse error, given a 546 * specified location. This is used when input has already moved beyond 547 * the location that caused the failure. 548 */ 549 void parse_error(const char *msg, input_buffer &b, int loc); 550 }; 551 552 } // namespace dtc 553 554 #endif // !_INPUT_BUFFER_HH_ 555