1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2013 David Chisnall
5 * All rights reserved.
6 *
7 * This software was developed by SRI International and the University of
8 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
9 * ("CTSRD"), as part of the DARPA CRASH research programme.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33 #ifndef _INPUT_BUFFER_HH_
34 #define _INPUT_BUFFER_HH_
35 #include "util.hh"
36 #include <assert.h>
37 #include <stack>
38 #include <string>
39 #include <unordered_set>
40
41 namespace dtc
42 {
43
44 namespace {
45 struct expression;
46 typedef std::unique_ptr<expression> expression_ptr;
47 }
48
49 /**
50 * Class encapsulating the input file. Can be used as a const char*, but has
51 * range checking. Attempting to access anything out of range will return a 0
52 * byte. The input buffer can be cheaply copied, without copying the
53 * underlying memory, however it is the user's responsibility to ensure that
54 * such copies do not persist beyond the lifetime of the underlying memory.
55 *
56 * This also contains methods for reporting errors and for consuming the token
57 * stream.
58 */
59 class input_buffer
60 {
61 friend class text_input_buffer;
62 protected:
63 /**
64 * The buffer. This class doesn't own the buffer, but the
65 * mmap_input_buffer subclass does.
66 */
67 const char* buffer;
68 /**
69 * The size of the buffer.
70 */
71 int size;
72 private:
73 /**
74 * The current place in the buffer where we are reading. This class
75 * keeps a separate size, pointer, and cursor so that we can move
76 * forwards and backwards and still have checks that we haven't fallen
77 * off either end.
78 */
79 int cursor;
80 /**
81 * Private constructor. This is used to create input buffers that
82 * refer to the same memory, but have different cursors.
83 */
input_buffer(const char * b,int s,int c)84 input_buffer(const char* b, int s, int c) : buffer(b), size(s),
85 cursor(c) {}
86 public:
87 /**
88 * Returns the file name associated with this buffer.
89 */
filename() const90 virtual const std::string &filename() const
91 {
92 static std::string s;
93 return s;
94 }
95 static std::unique_ptr<input_buffer> buffer_for_file(const std::string &path,
96 bool warn=true);
97 /**
98 * Skips all characters in the input until the specified character is
99 * encountered.
100 */
101 void skip_to(char);
102 /**
103 * Parses up to a specified character and returns the intervening
104 * characters as a string.
105 */
106 std::string parse_to(char);
107 /**
108 * Return whether all input has been consumed.
109 */
finished()110 bool finished() { return cursor >= size; }
111 /**
112 * Virtual destructor. Does nothing, but exists so that subclasses
113 * that own the memory can run cleanup code for deallocating it.
114 */
~input_buffer()115 virtual ~input_buffer() {};
116 /**
117 * Constructs an empty buffer.
118 */
input_buffer()119 input_buffer() : buffer(0), size(0), cursor(0) {}
120 /**
121 * Constructs a new buffer with a specified memory region and size.
122 */
input_buffer(const char * b,int s)123 input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){}
124 /**
125 * Returns a new input buffer referring into this input, clamped to the
126 * specified size. If the requested buffer would fall outside the
127 * range of this one, then it returns an empty buffer.
128 *
129 * The returned buffer shares the same underlying storage as the
130 * original. This is intended to be used for splitting up the various
131 * sections of a device tree blob. Requesting a size of 0 will give a
132 * buffer that extends to the end of the available memory.
133 */
134 input_buffer buffer_from_offset(int offset, int s=0);
135 /**
136 * Dereferencing operator, allows the buffer to be treated as a char*
137 * and dereferenced to give a character. This returns a null byte if
138 * the cursor is out of range.
139 */
operator *()140 inline char operator*()
141 {
142 if (cursor >= size) { return '\0'; }
143 if (cursor < 0) { return '\0'; }
144 return buffer[cursor];
145 }
146 /**
147 * Array subscripting operator, returns a character at the specified
148 * index offset from the current cursor. The offset may be negative,
149 * to reread characters that have already been read. If the current
150 * cursor plus offset is outside of the range, this returns a nul
151 * byte.
152 */
operator [](int offset)153 inline char operator[](int offset)
154 {
155 if (cursor + offset >= size) { return '\0'; }
156 if (cursor + offset < 0) { return '\0'; }
157 return buffer[cursor + offset];
158 }
159 /**
160 * Increments the cursor, iterating forward in the buffer.
161 */
operator ++()162 inline input_buffer &operator++()
163 {
164 cursor++;
165 return *this;
166 }
begin()167 const char *begin()
168 {
169 return buffer;
170 }
end()171 const char *end()
172 {
173 return buffer + size;
174 }
175 /**
176 * Consumes a character. Moves the cursor one character forward if the
177 * next character matches the argument, returning true. If the current
178 * character does not match the argument, returns false.
179 */
consume(char c)180 inline bool consume(char c)
181 {
182 if (*(*this) == c)
183 {
184 ++(*this);
185 return true;
186 }
187 return false;
188 }
189 /**
190 * Consumes a string. If the (null-terminated) string passed as the
191 * argument appears in the input, advances the cursor to the end and
192 * returns true. Returns false if the string does not appear at the
193 * current point in the input.
194 */
195 bool consume(const char *str);
196 /**
197 * Reads unsigned from char literal. Returns true and advances
198 * the cursor to next char.
199 *
200 * The parsed value is returned via the argument.
201 */
202 bool consume_char_literal(unsigned long long &outInt);
203 /**
204 * Reads an integer in base 8, 10, or 16. Returns true and advances
205 * the cursor to the end of the integer if the cursor points to an
206 * integer, returns false and does not move the cursor otherwise.
207 *
208 * The parsed value is returned via the argument.
209 */
210 bool consume_integer(unsigned long long &outInt);
211 /**
212 * Reads an arithmetic expression (containing any of the normal C
213 * operators), evaluates it, and returns the result.
214 */
215 bool consume_integer_expression(unsigned long long &outInt);
216 /**
217 * Consumes two hex digits and return the resulting byte via the first
218 * argument. If the next two characters are hex digits, returns true
219 * and advances the cursor. If not, then returns false and leaves the
220 * cursor in place.
221 */
222 bool consume_hex_byte(uint8_t &outByte);
223 /**
224 * Template function that consumes a binary value in big-endian format
225 * from the input stream. Returns true and advances the cursor if
226 * there is a value of the correct size. This function assumes that
227 * all values must be natively aligned, and so advances the cursor to
228 * the correct alignment before reading.
229 */
230 template<typename T>
consume_binary(T & out)231 bool consume_binary(T &out)
232 {
233 int align = 0;
234 int type_size = sizeof(T);
235 if (cursor % type_size != 0)
236 {
237 align = type_size - (cursor % type_size);
238 }
239 if (size < cursor + align + type_size)
240 {
241 return false;
242 }
243 cursor += align;
244 assert(cursor % type_size == 0);
245 out = 0;
246 for (int i=0 ; i<type_size ; ++i)
247 {
248 if (size < cursor)
249 {
250 return false;
251 }
252 out <<= 8;
253 out |= (((T)buffer[cursor++]) & 0xff);
254 }
255 return true;
256 }
257 #ifndef NDEBUG
258 /**
259 * Dumps the current cursor value and the unconsumed values in the
260 * input buffer to the standard error. This method is intended solely
261 * for debugging.
262 */
263 void dump();
264 #endif
265 };
266 /**
267 * Explicit specialisation for reading a single byte.
268 */
269 template<>
consume_binary(uint8_t & out)270 inline bool input_buffer::consume_binary(uint8_t &out)
271 {
272 if (size < cursor + 1)
273 {
274 return false;
275 }
276 out = buffer[cursor++];
277 return true;
278 }
279
280 /**
281 * An input buffer subclass used for parsing DTS files. This manages a stack
282 * of input buffers to handle /input/ operations.
283 */
284 class text_input_buffer
285 {
286 std::unordered_set<std::string> defines;
287 /**
288 * The cursor is the input into the input stream where we are currently reading.
289 */
290 int cursor = 0;
291 /**
292 * The current stack of includes. The current input is always from the top
293 * of the stack.
294 */
295 std::stack<std::shared_ptr<input_buffer>> input_stack;
296 /**
297 *
298 */
299 const std::vector<std::string> include_paths;
300 /**
301 * Reads forward past any spaces. The DTS format is not whitespace
302 * sensitive and so we want to scan past whitespace when reading it.
303 */
304 void skip_spaces();
305 /**
306 * Returns the character immediately after the current one.
307 *
308 * This method does not look between files.
309 */
310 char peek();
311 /**
312 * If a /include/ token is encountered, then look up the corresponding
313 * input file, push it onto the input stack, and continue.
314 */
315 void handle_include();
316 /**
317 * The base directory for this file.
318 */
319 const std::string dir;
320 /**
321 * The file where dependencies should be output.
322 */
323 FILE *depfile;
324 public:
325 /**
326 * Construct a new text input buffer with the specified buffer as the start
327 * of parsing and the specified set of input paths for handling new
328 * inclusions.
329 */
text_input_buffer(std::unique_ptr<input_buffer> && b,std::unordered_set<std::string> && d,std::vector<std::string> && i,const std::string directory,FILE * deps)330 text_input_buffer(std::unique_ptr<input_buffer> &&b,
331 std::unordered_set<std::string> &&d,
332 std::vector<std::string> &&i,
333 const std::string directory,
334 FILE *deps)
335 : defines(d), include_paths(i), dir(directory), depfile(deps)
336 {
337 input_stack.push(std::move(b));
338 }
339 /**
340 * Skips all characters in the input until the specified character is
341 * encountered.
342 */
343 void skip_to(char);
344 /**
345 * Parse an expression. If `stopAtParen` is set, then only parse a number
346 * or a parenthetical expression, otherwise assume that either is the
347 * left-hand side of a binary expression and try to parse the right-hand
348 * side.
349 */
350 expression_ptr parse_expression(bool stopAtParen=false);
351 /**
352 * Parse a binary expression, having already parsed the right-hand side.
353 */
354 expression_ptr parse_binary_expression(expression_ptr lhs);
355 /**
356 * Return whether all input has been consumed.
357 */
finished()358 bool finished()
359 {
360 return input_stack.empty() ||
361 ((input_stack.size() == 1) && input_stack.top()->finished());
362 }
363 /**
364 * Dereferencing operator. Returns the current character in the top input buffer.
365 */
operator *()366 inline char operator*()
367 {
368 if (input_stack.empty())
369 {
370 return 0;
371 }
372 return *(*input_stack.top());
373 }
374 /**
375 * Increments the cursor, iterating forward in the buffer.
376 */
operator ++()377 inline text_input_buffer &operator++()
378 {
379 if (input_stack.empty())
380 {
381 return *this;
382 }
383 cursor++;
384 auto &top = *input_stack.top();
385 ++top;
386 if (top.finished())
387 {
388 input_stack.pop();
389 }
390 return *this;
391 }
392 /**
393 * Consumes a character. Moves the cursor one character forward if the
394 * next character matches the argument, returning true. If the current
395 * character does not match the argument, returns false.
396 */
consume(char c)397 inline bool consume(char c)
398 {
399 if (*(*this) == c)
400 {
401 ++(*this);
402 return true;
403 }
404 return false;
405 }
406 /**
407 * Consumes a string. If the (null-terminated) string passed as the
408 * argument appears in the input, advances the cursor to the end and
409 * returns true. Returns false if the string does not appear at the
410 * current point in the input.
411 *
412 * This method does not scan between files.
413 */
consume(const char * str)414 bool consume(const char *str)
415 {
416 if (input_stack.empty())
417 {
418 return false;
419 }
420 return input_stack.top()->consume(str);
421 }
422 /**
423 * Converts next char into unsigned
424 *
425 * The parsed value is returned via the argument.
426 *
427 * This method does not scan between files.
428 */
consume_char_literal(unsigned long long & outInt)429 bool consume_char_literal(unsigned long long &outInt)
430 {
431 if (input_stack.empty())
432 {
433 return false;
434 }
435 return input_stack.top()->consume_char_literal(outInt);
436 }
437 /**
438 * Reads an integer in base 8, 10, or 16. Returns true and advances
439 * the cursor to the end of the integer if the cursor points to an
440 * integer, returns false and does not move the cursor otherwise.
441 *
442 * The parsed value is returned via the argument.
443 *
444 * This method does not scan between files.
445 */
consume_integer(unsigned long long & outInt)446 bool consume_integer(unsigned long long &outInt)
447 {
448 if (input_stack.empty())
449 {
450 return false;
451 }
452 return input_stack.top()->consume_integer(outInt);
453 }
454 /**
455 * Reads an arithmetic expression (containing any of the normal C
456 * operators), evaluates it, and returns the result.
457 */
458 bool consume_integer_expression(unsigned long long &outInt);
459 /**
460 * Consumes two hex digits and return the resulting byte via the first
461 * argument. If the next two characters are hex digits, returns true
462 * and advances the cursor. If not, then returns false and leaves the
463 * cursor in place.
464 *
465 * This method does not scan between files.
466 */
consume_hex_byte(uint8_t & outByte)467 bool consume_hex_byte(uint8_t &outByte)
468 {
469 if (input_stack.empty())
470 {
471 return false;
472 }
473 return input_stack.top()->consume_hex_byte(outByte);
474 }
475 /**
476 * Returns the longest string in the input buffer starting at the
477 * current cursor and composed entirely of characters that are valid in
478 * node names.
479 */
480 std::string parse_node_name();
481 /**
482 * Returns the longest string in the input buffer starting at the
483 * current cursor and composed entirely of characters that are valid in
484 * property names.
485 */
486 std::string parse_property_name();
487 /**
488 * Parses either a node or a property name. If is_property is true on
489 * entry, then only property names are parsed. If it is false, then it
490 * will be set, on return, to indicate whether the parsed name is only
491 * valid as a property.
492 */
493 std::string parse_node_or_property_name(bool &is_property);
494 /**
495 * Parses up to a specified character and returns the intervening
496 * characters as a string.
497 */
498 std::string parse_to(char);
499 /**
500 * Advances the cursor to the start of the next token, skipping
501 * comments and whitespace. If the cursor already points to the start
502 * of a token, then this function does nothing.
503 */
504 text_input_buffer &next_token();
505 /**
506 * Location in the source file. This should never be interpreted by
507 * anything other than error reporting functions of this class. It will
508 * eventually become something more complex than an `int`.
509 */
510 class source_location
511 {
512 friend class text_input_buffer;
513 /**
514 * The text buffer object that included `b`.
515 */
516 text_input_buffer &buffer;
517 /**
518 * The underlying buffer that contains this location.
519 */
520 std::shared_ptr<input_buffer> b;
521 /**
522 * The offset within the current buffer of the source location.
523 */
524 int cursor;
source_location(text_input_buffer & buf)525 source_location(text_input_buffer &buf)
526 : buffer(buf),
527 b(buf.input_stack.empty() ? nullptr : buf.input_stack.top()),
528 cursor(b ? b->cursor : 0) {}
529 public:
530 /**
531 * Report an error at this location.
532 */
report_error(const char * msg)533 void report_error(const char *msg)
534 {
535 if (b)
536 {
537 buffer.parse_error(msg, *b, cursor);
538 }
539 else
540 {
541 buffer.parse_error(msg);
542 }
543 }
544 };
545 /**
546 * Returns the current source location.
547 */
location()548 source_location location()
549 {
550 return { *this };
551 }
552 /**
553 * Prints a message indicating the location of a parse error.
554 */
555 void parse_error(const char *msg);
556 /**
557 * Reads the contents of a binary file into `b`. The file name is assumed
558 * to be relative to one of the include paths.
559 *
560 * Returns true if the file exists and can be read, false otherwise.
561 */
562 bool read_binary_file(const std::string &filename, byte_buffer &b);
563 private:
564 /**
565 * Prints a message indicating the location of a parse error, given a
566 * specified location. This is used when input has already moved beyond
567 * the location that caused the failure.
568 */
569 void parse_error(const char *msg, input_buffer &b, int loc);
570 };
571
572 } // namespace dtc
573
574 #endif // !_INPUT_BUFFER_HH_
575