input_buffer.hh (009e81b16465ea457c0e63fd49fe77f47cc27a5a) input_buffer.hh (bbe31b709a653884e18995a1c97cdafd7392999a)
1/*-
2 * Copyright (c) 2013 David Chisnall
3 * All rights reserved.
4 *
5 * This software was developed by SRI International and the University of
6 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
7 * ("CTSRD"), as part of the DARPA CRASH research programme.
8 *

--- 20 unchanged lines hidden (view full) ---

29 *
30 * $FreeBSD$
31 */
32
33#ifndef _INPUT_BUFFER_HH_
34#define _INPUT_BUFFER_HH_
35#include "util.hh"
36#include <assert.h>
1/*-
2 * Copyright (c) 2013 David Chisnall
3 * All rights reserved.
4 *
5 * This software was developed by SRI International and the University of
6 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
7 * ("CTSRD"), as part of the DARPA CRASH research programme.
8 *

--- 20 unchanged lines hidden (view full) ---

29 *
30 * $FreeBSD$
31 */
32
33#ifndef _INPUT_BUFFER_HH_
34#define _INPUT_BUFFER_HH_
35#include "util.hh"
36#include <assert.h>
37#include <stack>
38#include <string>
39#include <unordered_set>
37
38namespace dtc
39{
40
41namespace {
42struct expression;
43typedef std::unique_ptr<expression> expression_ptr;
44}

--- 5 unchanged lines hidden (view full) ---

50 * underlying memory, however it is the user's responsibility to ensure that
51 * such copies do not persist beyond the lifetime of the underlying memory.
52 *
53 * This also contains methods for reporting errors and for consuming the token
54 * stream.
55 */
56class input_buffer
57{
40
41namespace dtc
42{
43
44namespace {
45struct expression;
46typedef std::unique_ptr<expression> expression_ptr;
47}

--- 5 unchanged lines hidden (view full) ---

53 * underlying memory, however it is the user's responsibility to ensure that
54 * such copies do not persist beyond the lifetime of the underlying memory.
55 *
56 * This also contains methods for reporting errors and for consuming the token
57 * stream.
58 */
59class input_buffer
60{
61 friend class text_input_buffer;
58 protected:
59 /**
60 * The buffer. This class doesn't own the buffer, but the
61 * mmap_input_buffer subclass does.
62 */
63 const char* buffer;
64 /**
65 * The size of the buffer.
66 */
67 int size;
68 private:
69 /**
62 protected:
63 /**
64 * The buffer. This class doesn't own the buffer, but the
65 * mmap_input_buffer subclass does.
66 */
67 const char* buffer;
68 /**
69 * The size of the buffer.
70 */
71 int size;
72 private:
73 /**
70 * Parse an expression. If `stopAtParen` is set, then only parse a number
71 * or a parenthetical expression, otherwise assume that either is the
72 * left-hand side of a binary expression and try to parse the right-hand
73 * side.
74 */
75 expression_ptr parse_expression(bool stopAtParen=false);
76 /**
77 * Parse a binary expression, having already parsed the right-hand side.
78 */
79 expression_ptr parse_binary_expression(expression_ptr lhs);
80 /**
81 * The current place in the buffer where we are reading. This class
82 * keeps a separate size, pointer, and cursor so that we can move
83 * forwards and backwards and still have checks that we haven't fallen
84 * off either end.
85 */
86 int cursor;
87 /**
88 * Private constructor. This is used to create input buffers that
89 * refer to the same memory, but have different cursors.
90 */
91 input_buffer(const char* b, int s, int c) : buffer(b), size(s),
92 cursor(c) {}
74 * The current place in the buffer where we are reading. This class
75 * keeps a separate size, pointer, and cursor so that we can move
76 * forwards and backwards and still have checks that we haven't fallen
77 * off either end.
78 */
79 int cursor;
80 /**
81 * Private constructor. This is used to create input buffers that
82 * refer to the same memory, but have different cursors.
83 */
84 input_buffer(const char* b, int s, int c) : buffer(b), size(s),
85 cursor(c) {}
86 public:
93 /**
87 /**
94 * Reads forward past any spaces. The DTS format is not whitespace
95 * sensitive and so we want to scan past whitespace when reading it.
88 * Returns the file name associated with this buffer.
96 */
89 */
97 void skip_spaces();
98 public:
90 virtual const std::string &filename() const
91 {
92 static std::string s;
93 return s;
94 }
95 static std::unique_ptr<input_buffer> buffer_for_file(const std::string &path,
96 bool warn=true);
99 /**
97 /**
98 * Skips all characters in the input until the specified character is
99 * encountered.
100 */
101 void skip_to(char);
102 /**
103 * Parses up to a specified character and returns the intervening
104 * characters as a string.
105 */
106 std::string parse_to(char);
107 /**
100 * Return whether all input has been consumed.
101 */
102 bool finished() { return cursor >= size; }
103 /**
104 * Virtual destructor. Does nothing, but exists so that subclasses
105 * that own the memory can run cleanup code for deallocating it.
106 */
107 virtual ~input_buffer() {};

--- 12 unchanged lines hidden (view full) ---

120 *
121 * The returned buffer shares the same underlying storage as the
122 * original. This is intended to be used for splitting up the various
123 * sections of a device tree blob. Requesting a size of 0 will give a
124 * buffer that extends to the end of the available memory.
125 */
126 input_buffer buffer_from_offset(int offset, int s=0);
127 /**
108 * Return whether all input has been consumed.
109 */
110 bool finished() { return cursor >= size; }
111 /**
112 * Virtual destructor. Does nothing, but exists so that subclasses
113 * that own the memory can run cleanup code for deallocating it.
114 */
115 virtual ~input_buffer() {};

--- 12 unchanged lines hidden (view full) ---

128 *
129 * The returned buffer shares the same underlying storage as the
130 * original. This is intended to be used for splitting up the various
131 * sections of a device tree blob. Requesting a size of 0 will give a
132 * buffer that extends to the end of the available memory.
133 */
134 input_buffer buffer_from_offset(int offset, int s=0);
135 /**
128 * Returns true if this buffer has no unconsumed space in it.
129 */
130 inline bool empty()
131 {
132 return cursor >= size;
133 }
134 /**
135 * Dereferencing operator, allows the buffer to be treated as a char*
136 * and dereferenced to give a character. This returns a null byte if
137 * the cursor is out of range.
138 */
139 inline char operator*()
140 {
141 if (cursor >= size) { return '\0'; }
142 if (cursor < 0) { return '\0'; }

--- 16 unchanged lines hidden (view full) ---

159 * Increments the cursor, iterating forward in the buffer.
160 */
161 inline input_buffer &operator++()
162 {
163 cursor++;
164 return *this;
165 }
166 /**
136 * Dereferencing operator, allows the buffer to be treated as a char*
137 * and dereferenced to give a character. This returns a null byte if
138 * the cursor is out of range.
139 */
140 inline char operator*()
141 {
142 if (cursor >= size) { return '\0'; }
143 if (cursor < 0) { return '\0'; }

--- 16 unchanged lines hidden (view full) ---

160 * Increments the cursor, iterating forward in the buffer.
161 */
162 inline input_buffer &operator++()
163 {
164 cursor++;
165 return *this;
166 }
167 /**
167 * Cast to char* operator. Returns a pointer into the buffer that can
168 * be used for constructing strings.
169 */
170 inline operator const char*()
171 {
172 if (cursor >= size) { return 0; }
173 if (cursor < 0) { return 0; }
174 return &buffer[cursor];
175 }
176 /**
177 * Consumes a character. Moves the cursor one character forward if the
178 * next character matches the argument, returning true. If the current
179 * character does not match the argument, returns false.
180 */
181 inline bool consume(char c)
182 {
168 * Consumes a character. Moves the cursor one character forward if the
169 * next character matches the argument, returning true. If the current
170 * character does not match the argument, returns false.
171 */
172 inline bool consume(char c)
173 {
183 if ((*this)[0] == c)
174 if (*(*this) == c)
184 {
185 ++(*this);
186 return true;
187 }
188 return false;
189 }
190 /**
191 * Consumes a string. If the (null-terminated) string passed as the

--- 11 unchanged lines hidden (view full) ---

203 */
204 bool consume_integer(unsigned long long &outInt);
205 /**
206 * Reads an arithmetic expression (containing any of the normal C
207 * operators), evaluates it, and returns the result.
208 */
209 bool consume_integer_expression(unsigned long long &outInt);
210 /**
175 {
176 ++(*this);
177 return true;
178 }
179 return false;
180 }
181 /**
182 * Consumes a string. If the (null-terminated) string passed as the

--- 11 unchanged lines hidden (view full) ---

194 */
195 bool consume_integer(unsigned long long &outInt);
196 /**
197 * Reads an arithmetic expression (containing any of the normal C
198 * operators), evaluates it, and returns the result.
199 */
200 bool consume_integer_expression(unsigned long long &outInt);
201 /**
202 * Consumes two hex digits and return the resulting byte via the first
203 * argument. If the next two characters are hex digits, returns true
204 * and advances the cursor. If not, then returns false and leaves the
205 * cursor in place.
206 */
207 bool consume_hex_byte(uint8_t &outByte);
208 /**
211 * Template function that consumes a binary value in big-endian format
212 * from the input stream. Returns true and advances the cursor if
213 * there is a value of the correct size. This function assumes that
214 * all values must be natively aligned, and so advances the cursor to
215 * the correct alignment before reading.
216 */
217 template<typename T>
218 bool consume_binary(T &out)

--- 8 unchanged lines hidden (view full) ---

227 {
228 return false;
229 }
230 cursor += align;
231 assert(cursor % type_size == 0);
232 out = 0;
233 for (int i=0 ; i<type_size ; ++i)
234 {
209 * Template function that consumes a binary value in big-endian format
210 * from the input stream. Returns true and advances the cursor if
211 * there is a value of the correct size. This function assumes that
212 * all values must be natively aligned, and so advances the cursor to
213 * the correct alignment before reading.
214 */
215 template<typename T>
216 bool consume_binary(T &out)

--- 8 unchanged lines hidden (view full) ---

225 {
226 return false;
227 }
228 cursor += align;
229 assert(cursor % type_size == 0);
230 out = 0;
231 for (int i=0 ; i<type_size ; ++i)
232 {
233 if (size < cursor)
234 {
235 return false;
236 }
235 out <<= 8;
236 out |= (((T)buffer[cursor++]) & 0xff);
237 }
238 return true;
239 }
237 out <<= 8;
238 out |= (((T)buffer[cursor++]) & 0xff);
239 }
240 return true;
241 }
240 /**
241 * Consumes two hex digits and return the resulting byte via the first
242 * argument. If the next two characters are hex digits, returns true
243 * and advances the cursor. If not, then returns false and leaves the
244 * cursor in place.
245 */
246 bool consume_hex_byte(uint8_t &outByte);
247 /**
248 * Advances the cursor to the start of the next token, skipping
249 * comments and whitespace. If the cursor already points to the start
250 * of a token, then this function does nothing.
251 */
252 input_buffer &next_token();
253 /**
254 * Prints a message indicating the location of a parse error.
255 */
256 void parse_error(const char *msg);
257#ifndef NDEBUG
258 /**
259 * Dumps the current cursor value and the unconsumed values in the
260 * input buffer to the standard error. This method is intended solely
261 * for debugging.
262 */
263 void dump();
264#endif

--- 8 unchanged lines hidden (view full) ---

273 {
274 return false;
275 }
276 out = buffer[cursor++];
277 return true;
278}
279
280/**
242#ifndef NDEBUG
243 /**
244 * Dumps the current cursor value and the unconsumed values in the
245 * input buffer to the standard error. This method is intended solely
246 * for debugging.
247 */
248 void dump();
249#endif

--- 8 unchanged lines hidden (view full) ---

258 {
259 return false;
260 }
261 out = buffer[cursor++];
262 return true;
263}
264
265/**
281 * Subclass of input_buffer that mmap()s a file and owns the resulting memory.
282 * When this object is destroyed, the memory is unmapped.
266 * An input buffer subclass used for parsing DTS files. This manages a stack
267 * of input buffers to handle /input/ operations.
283 */
268 */
284struct mmap_input_buffer : public input_buffer
269class text_input_buffer
285{
270{
271 std::unordered_set<std::string> defines;
286 /**
272 /**
287 * Constructs a new buffer from the file passed in as a file
288 * descriptor.
273 * The cursor is the input into the input stream where we are currently reading.
289 */
274 */
290 mmap_input_buffer(int fd);
275 int cursor = 0;
291 /**
276 /**
292 * Unmaps the buffer, if one exists.
277 * The current stack of includes. The current input is always from the top
278 * of the stack.
293 */
279 */
294 virtual ~mmap_input_buffer();
295};
296/**
297 * Input buffer read from standard input. This is used for reading device tree
298 * blobs and source from standard input. It reads the entire input into
299 * malloc'd memory, so will be very slow for large inputs. DTS and DTB files
300 * are very rarely more than 10KB though, so this is probably not a problem.
301 */
302struct stream_input_buffer : public input_buffer
303{
280 std::stack<std::shared_ptr<input_buffer>> input_stack;
304 /**
281 /**
305 * The buffer that will store the data read from the standard input.
282 *
306 */
283 */
307 std::vector<char> b;
284 const std::vector<std::string> include_paths;
308 /**
285 /**
309 * Constructs a new buffer from the standard input.
286 * Reads forward past any spaces. The DTS format is not whitespace
287 * sensitive and so we want to scan past whitespace when reading it.
310 */
288 */
311 stream_input_buffer();
289 void skip_spaces();
290 /**
291 * Returns the character immediately after the current one.
292 *
293 * This method does not look between files.
294 */
295 char peek();
296 /**
297 * If a /include/ token is encountered, then look up the corresponding
298 * input file, push it onto the input stack, and continue.
299 */
300 void handle_include();
301 /**
302 * The base directory for this file.
303 */
304 const std::string dir;
305 /**
306 * The file where dependencies should be output.
307 */
308 FILE *depfile;
309 public:
310 /**
311 * Construct a new text input buffer with the specified buffer as the start
312 * of parsing and the specified set of input paths for handling new
313 * inclusions.
314 */
315 text_input_buffer(std::unique_ptr<input_buffer> &&b,
316 std::unordered_set<std::string> &&d,
317 std::vector<std::string> &&i,
318 const std::string directory,
319 FILE *deps)
320 : defines(d), include_paths(i), dir(directory), depfile(deps)
321 {
322 input_stack.push(std::move(b));
323 }
324 /**
325 * Skips all characters in the input until the specified character is
326 * encountered.
327 */
328 void skip_to(char);
329 /**
330 * Parse an expression. If `stopAtParen` is set, then only parse a number
331 * or a parenthetical expression, otherwise assume that either is the
332 * left-hand side of a binary expression and try to parse the right-hand
333 * side.
334 */
335 expression_ptr parse_expression(bool stopAtParen=false);
336 /**
337 * Parse a binary expression, having already parsed the right-hand side.
338 */
339 expression_ptr parse_binary_expression(expression_ptr lhs);
340 /**
341 * Return whether all input has been consumed.
342 */
343 bool finished()
344 {
345 return input_stack.empty() ||
346 ((input_stack.size() == 1) && input_stack.top()->finished());
347 }
348 /**
349 * Dereferencing operator. Returns the current character in the top input buffer.
350 */
351 inline char operator*()
352 {
353 if (input_stack.empty())
354 {
355 return 0;
356 }
357 return *(*input_stack.top());
358 }
359 /**
360 * Increments the cursor, iterating forward in the buffer.
361 */
362 inline text_input_buffer &operator++()
363 {
364 if (input_stack.empty())
365 {
366 return *this;
367 }
368 cursor++;
369 auto &top = *input_stack.top();
370 ++top;
371 if (top.finished())
372 {
373 input_stack.pop();
374 }
375 return *this;
376 }
377 /**
378 * Consumes a character. Moves the cursor one character forward if the
379 * next character matches the argument, returning true. If the current
380 * character does not match the argument, returns false.
381 */
382 inline bool consume(char c)
383 {
384 if (*(*this) == c)
385 {
386 ++(*this);
387 return true;
388 }
389 return false;
390 }
391 /**
392 * Consumes a string. If the (null-terminated) string passed as the
393 * argument appears in the input, advances the cursor to the end and
394 * returns true. Returns false if the string does not appear at the
395 * current point in the input.
396 *
397 * This method does not scan between files.
398 */
399 bool consume(const char *str)
400 {
401 if (input_stack.empty())
402 {
403 return false;
404 }
405 return input_stack.top()->consume(str);
406 }
407 /**
408 * Reads an integer in base 8, 10, or 16. Returns true and advances
409 * the cursor to the end of the integer if the cursor points to an
410 * integer, returns false and does not move the cursor otherwise.
411 *
412 * The parsed value is returned via the argument.
413 *
414 * This method does not scan between files.
415 */
416 bool consume_integer(unsigned long long &outInt)
417 {
418 if (input_stack.empty())
419 {
420 return false;
421 }
422 return input_stack.top()->consume_integer(outInt);
423 }
424 /**
425 * Reads an arithmetic expression (containing any of the normal C
426 * operators), evaluates it, and returns the result.
427 */
428 bool consume_integer_expression(unsigned long long &outInt);
429 /**
430 * Consumes two hex digits and return the resulting byte via the first
431 * argument. If the next two characters are hex digits, returns true
432 * and advances the cursor. If not, then returns false and leaves the
433 * cursor in place.
434 *
435 * This method does not scan between files.
436 */
437 bool consume_hex_byte(uint8_t &outByte)
438 {
439 if (input_stack.empty())
440 {
441 return false;
442 }
443 return input_stack.top()->consume_hex_byte(outByte);
444 }
445 /**
446 * Returns the longest string in the input buffer starting at the
447 * current cursor and composed entirely of characters that are valid in
448 * node names.
449 */
450 std::string parse_node_name();
451 /**
452 * Returns the longest string in the input buffer starting at the
453 * current cursor and composed entirely of characters that are valid in
454 * property names.
455 */
456 std::string parse_property_name();
457 /**
458 * Parses either a node or a property name. If is_property is true on
459 * entry, then only property names are parsed. If it is false, then it
460 * will be set, on return, to indicate whether the parsed name is only
461 * valid as a property.
462 */
463 std::string parse_node_or_property_name(bool &is_property);
464 /**
465 * Parses up to a specified character and returns the intervening
466 * characters as a string.
467 */
468 std::string parse_to(char);
469 /**
470 * Advances the cursor to the start of the next token, skipping
471 * comments and whitespace. If the cursor already points to the start
472 * of a token, then this function does nothing.
473 */
474 text_input_buffer &next_token();
475 /**
476 * Location in the source file. This should never be interpreted by
477 * anything other than error reporting functions of this class. It will
478 * eventually become something more complex than an `int`.
479 */
480 class source_location
481 {
482 friend class text_input_buffer;
483 /**
484 * The text buffer object that included `b`.
485 */
486 text_input_buffer &buffer;
487 /**
488 * The underlying buffer that contains this location.
489 */
490 std::shared_ptr<input_buffer> b;
491 /**
492 * The offset within the current buffer of the source location.
493 */
494 int cursor;
495 source_location(text_input_buffer &buf)
496 : buffer(buf),
497 b(buf.input_stack.empty() ? nullptr : buf.input_stack.top()),
498 cursor(b ? b->cursor : 0) {}
499 public:
500 /**
501 * Report an error at this location.
502 */
503 void report_error(const char *msg)
504 {
505 if (b)
506 {
507 buffer.parse_error(msg, *b, cursor);
508 }
509 else
510 {
511 buffer.parse_error(msg);
512 }
513 }
514 };
515 /**
516 * Returns the current source location.
517 */
518 source_location location()
519 {
520 return { *this };
521 }
522 /**
523 * Prints a message indicating the location of a parse error.
524 */
525 void parse_error(const char *msg);
526 private:
527 /**
528 * Prints a message indicating the location of a parse error, given a
529 * specified location. This is used when input has already moved beyond
530 * the location that caused the failure.
531 */
532 void parse_error(const char *msg, input_buffer &b, int loc);
312};
313
314} // namespace dtc
315
316#endif // !_INPUT_BUFFER_HH_
533};
534
535} // namespace dtc
536
537#endif // !_INPUT_BUFFER_HH_