input_buffer.hh (009e81b16465ea457c0e63fd49fe77f47cc27a5a) | input_buffer.hh (bbe31b709a653884e18995a1c97cdafd7392999a) |
---|---|
1/*- 2 * Copyright (c) 2013 David Chisnall 3 * All rights reserved. 4 * 5 * This software was developed by SRI International and the University of 6 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) 7 * ("CTSRD"), as part of the DARPA CRASH research programme. 8 * --- 20 unchanged lines hidden (view full) --- 29 * 30 * $FreeBSD$ 31 */ 32 33#ifndef _INPUT_BUFFER_HH_ 34#define _INPUT_BUFFER_HH_ 35#include "util.hh" 36#include <assert.h> | 1/*- 2 * Copyright (c) 2013 David Chisnall 3 * All rights reserved. 4 * 5 * This software was developed by SRI International and the University of 6 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) 7 * ("CTSRD"), as part of the DARPA CRASH research programme. 8 * --- 20 unchanged lines hidden (view full) --- 29 * 30 * $FreeBSD$ 31 */ 32 33#ifndef _INPUT_BUFFER_HH_ 34#define _INPUT_BUFFER_HH_ 35#include "util.hh" 36#include <assert.h> |
37#include <stack> 38#include <string> 39#include <unordered_set> |
|
37 38namespace dtc 39{ 40 41namespace { 42struct expression; 43typedef std::unique_ptr<expression> expression_ptr; 44} --- 5 unchanged lines hidden (view full) --- 50 * underlying memory, however it is the user's responsibility to ensure that 51 * such copies do not persist beyond the lifetime of the underlying memory. 52 * 53 * This also contains methods for reporting errors and for consuming the token 54 * stream. 55 */ 56class input_buffer 57{ | 40 41namespace dtc 42{ 43 44namespace { 45struct expression; 46typedef std::unique_ptr<expression> expression_ptr; 47} --- 5 unchanged lines hidden (view full) --- 53 * underlying memory, however it is the user's responsibility to ensure that 54 * such copies do not persist beyond the lifetime of the underlying memory. 55 * 56 * This also contains methods for reporting errors and for consuming the token 57 * stream. 58 */ 59class input_buffer 60{ |
61 friend class text_input_buffer; |
|
58 protected: 59 /** 60 * The buffer. This class doesn't own the buffer, but the 61 * mmap_input_buffer subclass does. 62 */ 63 const char* buffer; 64 /** 65 * The size of the buffer. 66 */ 67 int size; 68 private: 69 /** | 62 protected: 63 /** 64 * The buffer. This class doesn't own the buffer, but the 65 * mmap_input_buffer subclass does. 66 */ 67 const char* buffer; 68 /** 69 * The size of the buffer. 70 */ 71 int size; 72 private: 73 /** |
70 * Parse an expression. If `stopAtParen` is set, then only parse a number 71 * or a parenthetical expression, otherwise assume that either is the 72 * left-hand side of a binary expression and try to parse the right-hand 73 * side. 74 */ 75 expression_ptr parse_expression(bool stopAtParen=false); 76 /** 77 * Parse a binary expression, having already parsed the right-hand side. 78 */ 79 expression_ptr parse_binary_expression(expression_ptr lhs); 80 /** | |
81 * The current place in the buffer where we are reading. This class 82 * keeps a separate size, pointer, and cursor so that we can move 83 * forwards and backwards and still have checks that we haven't fallen 84 * off either end. 85 */ 86 int cursor; 87 /** 88 * Private constructor. This is used to create input buffers that 89 * refer to the same memory, but have different cursors. 90 */ 91 input_buffer(const char* b, int s, int c) : buffer(b), size(s), 92 cursor(c) {} | 74 * The current place in the buffer where we are reading. This class 75 * keeps a separate size, pointer, and cursor so that we can move 76 * forwards and backwards and still have checks that we haven't fallen 77 * off either end. 78 */ 79 int cursor; 80 /** 81 * Private constructor. This is used to create input buffers that 82 * refer to the same memory, but have different cursors. 83 */ 84 input_buffer(const char* b, int s, int c) : buffer(b), size(s), 85 cursor(c) {} |
86 public: |
|
93 /** | 87 /** |
94 * Reads forward past any spaces. The DTS format is not whitespace 95 * sensitive and so we want to scan past whitespace when reading it. | 88 * Returns the file name associated with this buffer. |
96 */ | 89 */ |
97 void skip_spaces(); 98 public: | 90 virtual const std::string &filename() const 91 { 92 static std::string s; 93 return s; 94 } 95 static std::unique_ptr<input_buffer> buffer_for_file(const std::string &path, 96 bool warn=true); |
99 /** | 97 /** |
98 * Skips all characters in the input until the specified character is 99 * encountered. 100 */ 101 void skip_to(char); 102 /** 103 * Parses up to a specified character and returns the intervening 104 * characters as a string. 105 */ 106 std::string parse_to(char); 107 /** |
|
100 * Return whether all input has been consumed. 101 */ 102 bool finished() { return cursor >= size; } 103 /** 104 * Virtual destructor. Does nothing, but exists so that subclasses 105 * that own the memory can run cleanup code for deallocating it. 106 */ 107 virtual ~input_buffer() {}; --- 12 unchanged lines hidden (view full) --- 120 * 121 * The returned buffer shares the same underlying storage as the 122 * original. This is intended to be used for splitting up the various 123 * sections of a device tree blob. Requesting a size of 0 will give a 124 * buffer that extends to the end of the available memory. 125 */ 126 input_buffer buffer_from_offset(int offset, int s=0); 127 /** | 108 * Return whether all input has been consumed. 109 */ 110 bool finished() { return cursor >= size; } 111 /** 112 * Virtual destructor. Does nothing, but exists so that subclasses 113 * that own the memory can run cleanup code for deallocating it. 114 */ 115 virtual ~input_buffer() {}; --- 12 unchanged lines hidden (view full) --- 128 * 129 * The returned buffer shares the same underlying storage as the 130 * original. This is intended to be used for splitting up the various 131 * sections of a device tree blob. Requesting a size of 0 will give a 132 * buffer that extends to the end of the available memory. 133 */ 134 input_buffer buffer_from_offset(int offset, int s=0); 135 /** |
128 * Returns true if this buffer has no unconsumed space in it. 129 */ 130 inline bool empty() 131 { 132 return cursor >= size; 133 } 134 /** | |
135 * Dereferencing operator, allows the buffer to be treated as a char* 136 * and dereferenced to give a character. This returns a null byte if 137 * the cursor is out of range. 138 */ 139 inline char operator*() 140 { 141 if (cursor >= size) { return '\0'; } 142 if (cursor < 0) { return '\0'; } --- 16 unchanged lines hidden (view full) --- 159 * Increments the cursor, iterating forward in the buffer. 160 */ 161 inline input_buffer &operator++() 162 { 163 cursor++; 164 return *this; 165 } 166 /** | 136 * Dereferencing operator, allows the buffer to be treated as a char* 137 * and dereferenced to give a character. This returns a null byte if 138 * the cursor is out of range. 139 */ 140 inline char operator*() 141 { 142 if (cursor >= size) { return '\0'; } 143 if (cursor < 0) { return '\0'; } --- 16 unchanged lines hidden (view full) --- 160 * Increments the cursor, iterating forward in the buffer. 161 */ 162 inline input_buffer &operator++() 163 { 164 cursor++; 165 return *this; 166 } 167 /** |
167 * Cast to char* operator. Returns a pointer into the buffer that can 168 * be used for constructing strings. 169 */ 170 inline operator const char*() 171 { 172 if (cursor >= size) { return 0; } 173 if (cursor < 0) { return 0; } 174 return &buffer[cursor]; 175 } 176 /** | |
177 * Consumes a character. Moves the cursor one character forward if the 178 * next character matches the argument, returning true. If the current 179 * character does not match the argument, returns false. 180 */ 181 inline bool consume(char c) 182 { | 168 * Consumes a character. Moves the cursor one character forward if the 169 * next character matches the argument, returning true. If the current 170 * character does not match the argument, returns false. 171 */ 172 inline bool consume(char c) 173 { |
183 if ((*this)[0] == c) | 174 if (*(*this) == c) |
184 { 185 ++(*this); 186 return true; 187 } 188 return false; 189 } 190 /** 191 * Consumes a string. If the (null-terminated) string passed as the --- 11 unchanged lines hidden (view full) --- 203 */ 204 bool consume_integer(unsigned long long &outInt); 205 /** 206 * Reads an arithmetic expression (containing any of the normal C 207 * operators), evaluates it, and returns the result. 208 */ 209 bool consume_integer_expression(unsigned long long &outInt); 210 /** | 175 { 176 ++(*this); 177 return true; 178 } 179 return false; 180 } 181 /** 182 * Consumes a string. If the (null-terminated) string passed as the --- 11 unchanged lines hidden (view full) --- 194 */ 195 bool consume_integer(unsigned long long &outInt); 196 /** 197 * Reads an arithmetic expression (containing any of the normal C 198 * operators), evaluates it, and returns the result. 199 */ 200 bool consume_integer_expression(unsigned long long &outInt); 201 /** |
202 * Consumes two hex digits and return the resulting byte via the first 203 * argument. If the next two characters are hex digits, returns true 204 * and advances the cursor. If not, then returns false and leaves the 205 * cursor in place. 206 */ 207 bool consume_hex_byte(uint8_t &outByte); 208 /** |
|
211 * Template function that consumes a binary value in big-endian format 212 * from the input stream. Returns true and advances the cursor if 213 * there is a value of the correct size. This function assumes that 214 * all values must be natively aligned, and so advances the cursor to 215 * the correct alignment before reading. 216 */ 217 template<typename T> 218 bool consume_binary(T &out) --- 8 unchanged lines hidden (view full) --- 227 { 228 return false; 229 } 230 cursor += align; 231 assert(cursor % type_size == 0); 232 out = 0; 233 for (int i=0 ; i<type_size ; ++i) 234 { | 209 * Template function that consumes a binary value in big-endian format 210 * from the input stream. Returns true and advances the cursor if 211 * there is a value of the correct size. This function assumes that 212 * all values must be natively aligned, and so advances the cursor to 213 * the correct alignment before reading. 214 */ 215 template<typename T> 216 bool consume_binary(T &out) --- 8 unchanged lines hidden (view full) --- 225 { 226 return false; 227 } 228 cursor += align; 229 assert(cursor % type_size == 0); 230 out = 0; 231 for (int i=0 ; i<type_size ; ++i) 232 { |
233 if (size < cursor) 234 { 235 return false; 236 } |
|
235 out <<= 8; 236 out |= (((T)buffer[cursor++]) & 0xff); 237 } 238 return true; 239 } | 237 out <<= 8; 238 out |= (((T)buffer[cursor++]) & 0xff); 239 } 240 return true; 241 } |
240 /** 241 * Consumes two hex digits and return the resulting byte via the first 242 * argument. If the next two characters are hex digits, returns true 243 * and advances the cursor. If not, then returns false and leaves the 244 * cursor in place. 245 */ 246 bool consume_hex_byte(uint8_t &outByte); 247 /** 248 * Advances the cursor to the start of the next token, skipping 249 * comments and whitespace. If the cursor already points to the start 250 * of a token, then this function does nothing. 251 */ 252 input_buffer &next_token(); 253 /** 254 * Prints a message indicating the location of a parse error. 255 */ 256 void parse_error(const char *msg); | |
257#ifndef NDEBUG 258 /** 259 * Dumps the current cursor value and the unconsumed values in the 260 * input buffer to the standard error. This method is intended solely 261 * for debugging. 262 */ 263 void dump(); 264#endif --- 8 unchanged lines hidden (view full) --- 273 { 274 return false; 275 } 276 out = buffer[cursor++]; 277 return true; 278} 279 280/** | 242#ifndef NDEBUG 243 /** 244 * Dumps the current cursor value and the unconsumed values in the 245 * input buffer to the standard error. This method is intended solely 246 * for debugging. 247 */ 248 void dump(); 249#endif --- 8 unchanged lines hidden (view full) --- 258 { 259 return false; 260 } 261 out = buffer[cursor++]; 262 return true; 263} 264 265/** |
281 * Subclass of input_buffer that mmap()s a file and owns the resulting memory. 282 * When this object is destroyed, the memory is unmapped. | 266 * An input buffer subclass used for parsing DTS files. This manages a stack 267 * of input buffers to handle /input/ operations. |
283 */ | 268 */ |
284struct mmap_input_buffer : public input_buffer | 269class text_input_buffer |
285{ | 270{ |
271 std::unordered_set<std::string> defines; |
|
286 /** | 272 /** |
287 * Constructs a new buffer from the file passed in as a file 288 * descriptor. | 273 * The cursor is the input into the input stream where we are currently reading. |
289 */ | 274 */ |
290 mmap_input_buffer(int fd); | 275 int cursor = 0; |
291 /** | 276 /** |
292 * Unmaps the buffer, if one exists. | 277 * The current stack of includes. The current input is always from the top 278 * of the stack. |
293 */ | 279 */ |
294 virtual ~mmap_input_buffer(); 295}; 296/** 297 * Input buffer read from standard input. This is used for reading device tree 298 * blobs and source from standard input. It reads the entire input into 299 * malloc'd memory, so will be very slow for large inputs. DTS and DTB files 300 * are very rarely more than 10KB though, so this is probably not a problem. 301 */ 302struct stream_input_buffer : public input_buffer 303{ | 280 std::stack<std::shared_ptr<input_buffer>> input_stack; |
304 /** | 281 /** |
305 * The buffer that will store the data read from the standard input. | 282 * |
306 */ | 283 */ |
307 std::vector<char> b; | 284 const std::vector<std::string> include_paths; |
308 /** | 285 /** |
309 * Constructs a new buffer from the standard input. | 286 * Reads forward past any spaces. The DTS format is not whitespace 287 * sensitive and so we want to scan past whitespace when reading it. |
310 */ | 288 */ |
311 stream_input_buffer(); | 289 void skip_spaces(); 290 /** 291 * Returns the character immediately after the current one. 292 * 293 * This method does not look between files. 294 */ 295 char peek(); 296 /** 297 * If a /include/ token is encountered, then look up the corresponding 298 * input file, push it onto the input stack, and continue. 299 */ 300 void handle_include(); 301 /** 302 * The base directory for this file. 303 */ 304 const std::string dir; 305 /** 306 * The file where dependencies should be output. 307 */ 308 FILE *depfile; 309 public: 310 /** 311 * Construct a new text input buffer with the specified buffer as the start 312 * of parsing and the specified set of input paths for handling new 313 * inclusions. 314 */ 315 text_input_buffer(std::unique_ptr<input_buffer> &&b, 316 std::unordered_set<std::string> &&d, 317 std::vector<std::string> &&i, 318 const std::string directory, 319 FILE *deps) 320 : defines(d), include_paths(i), dir(directory), depfile(deps) 321 { 322 input_stack.push(std::move(b)); 323 } 324 /** 325 * Skips all characters in the input until the specified character is 326 * encountered. 327 */ 328 void skip_to(char); 329 /** 330 * Parse an expression. If `stopAtParen` is set, then only parse a number 331 * or a parenthetical expression, otherwise assume that either is the 332 * left-hand side of a binary expression and try to parse the right-hand 333 * side. 334 */ 335 expression_ptr parse_expression(bool stopAtParen=false); 336 /** 337 * Parse a binary expression, having already parsed the right-hand side. 338 */ 339 expression_ptr parse_binary_expression(expression_ptr lhs); 340 /** 341 * Return whether all input has been consumed. 342 */ 343 bool finished() 344 { 345 return input_stack.empty() || 346 ((input_stack.size() == 1) && input_stack.top()->finished()); 347 } 348 /** 349 * Dereferencing operator. Returns the current character in the top input buffer. 350 */ 351 inline char operator*() 352 { 353 if (input_stack.empty()) 354 { 355 return 0; 356 } 357 return *(*input_stack.top()); 358 } 359 /** 360 * Increments the cursor, iterating forward in the buffer. 361 */ 362 inline text_input_buffer &operator++() 363 { 364 if (input_stack.empty()) 365 { 366 return *this; 367 } 368 cursor++; 369 auto &top = *input_stack.top(); 370 ++top; 371 if (top.finished()) 372 { 373 input_stack.pop(); 374 } 375 return *this; 376 } 377 /** 378 * Consumes a character. Moves the cursor one character forward if the 379 * next character matches the argument, returning true. If the current 380 * character does not match the argument, returns false. 381 */ 382 inline bool consume(char c) 383 { 384 if (*(*this) == c) 385 { 386 ++(*this); 387 return true; 388 } 389 return false; 390 } 391 /** 392 * Consumes a string. If the (null-terminated) string passed as the 393 * argument appears in the input, advances the cursor to the end and 394 * returns true. Returns false if the string does not appear at the 395 * current point in the input. 396 * 397 * This method does not scan between files. 398 */ 399 bool consume(const char *str) 400 { 401 if (input_stack.empty()) 402 { 403 return false; 404 } 405 return input_stack.top()->consume(str); 406 } 407 /** 408 * Reads an integer in base 8, 10, or 16. Returns true and advances 409 * the cursor to the end of the integer if the cursor points to an 410 * integer, returns false and does not move the cursor otherwise. 411 * 412 * The parsed value is returned via the argument. 413 * 414 * This method does not scan between files. 415 */ 416 bool consume_integer(unsigned long long &outInt) 417 { 418 if (input_stack.empty()) 419 { 420 return false; 421 } 422 return input_stack.top()->consume_integer(outInt); 423 } 424 /** 425 * Reads an arithmetic expression (containing any of the normal C 426 * operators), evaluates it, and returns the result. 427 */ 428 bool consume_integer_expression(unsigned long long &outInt); 429 /** 430 * Consumes two hex digits and return the resulting byte via the first 431 * argument. If the next two characters are hex digits, returns true 432 * and advances the cursor. If not, then returns false and leaves the 433 * cursor in place. 434 * 435 * This method does not scan between files. 436 */ 437 bool consume_hex_byte(uint8_t &outByte) 438 { 439 if (input_stack.empty()) 440 { 441 return false; 442 } 443 return input_stack.top()->consume_hex_byte(outByte); 444 } 445 /** 446 * Returns the longest string in the input buffer starting at the 447 * current cursor and composed entirely of characters that are valid in 448 * node names. 449 */ 450 std::string parse_node_name(); 451 /** 452 * Returns the longest string in the input buffer starting at the 453 * current cursor and composed entirely of characters that are valid in 454 * property names. 455 */ 456 std::string parse_property_name(); 457 /** 458 * Parses either a node or a property name. If is_property is true on 459 * entry, then only property names are parsed. If it is false, then it 460 * will be set, on return, to indicate whether the parsed name is only 461 * valid as a property. 462 */ 463 std::string parse_node_or_property_name(bool &is_property); 464 /** 465 * Parses up to a specified character and returns the intervening 466 * characters as a string. 467 */ 468 std::string parse_to(char); 469 /** 470 * Advances the cursor to the start of the next token, skipping 471 * comments and whitespace. If the cursor already points to the start 472 * of a token, then this function does nothing. 473 */ 474 text_input_buffer &next_token(); 475 /** 476 * Location in the source file. This should never be interpreted by 477 * anything other than error reporting functions of this class. It will 478 * eventually become something more complex than an `int`. 479 */ 480 class source_location 481 { 482 friend class text_input_buffer; 483 /** 484 * The text buffer object that included `b`. 485 */ 486 text_input_buffer &buffer; 487 /** 488 * The underlying buffer that contains this location. 489 */ 490 std::shared_ptr<input_buffer> b; 491 /** 492 * The offset within the current buffer of the source location. 493 */ 494 int cursor; 495 source_location(text_input_buffer &buf) 496 : buffer(buf), 497 b(buf.input_stack.empty() ? nullptr : buf.input_stack.top()), 498 cursor(b ? b->cursor : 0) {} 499 public: 500 /** 501 * Report an error at this location. 502 */ 503 void report_error(const char *msg) 504 { 505 if (b) 506 { 507 buffer.parse_error(msg, *b, cursor); 508 } 509 else 510 { 511 buffer.parse_error(msg); 512 } 513 } 514 }; 515 /** 516 * Returns the current source location. 517 */ 518 source_location location() 519 { 520 return { *this }; 521 } 522 /** 523 * Prints a message indicating the location of a parse error. 524 */ 525 void parse_error(const char *msg); 526 private: 527 /** 528 * Prints a message indicating the location of a parse error, given a 529 * specified location. This is used when input has already moved beyond 530 * the location that caused the failure. 531 */ 532 void parse_error(const char *msg, input_buffer &b, int loc); |
312}; 313 314} // namespace dtc 315 316#endif // !_INPUT_BUFFER_HH_ | 533}; 534 535} // namespace dtc 536 537#endif // !_INPUT_BUFFER_HH_ |