xref: /freebsd/usr.bin/dtc/input_buffer.hh (revision 4dfbc03d6492d9fccb781700cc17d58111dff456)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 David Chisnall
5  * All rights reserved.
6  *
7  * This software was developed by SRI International and the University of
8  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
9  * ("CTSRD"), as part of the DARPA CRASH research programme.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #ifndef _INPUT_BUFFER_HH_
34 #define _INPUT_BUFFER_HH_
35 #include "util.hh"
36 #include <assert.h>
37 #include <stack>
38 #include <string>
39 #include <unordered_set>
40 
41 namespace dtc
42 {
43 
44 namespace {
45 struct expression;
46 typedef std::unique_ptr<expression> expression_ptr;
47 }
48 
49 /**
50  * Class encapsulating the input file.  Can be used as a const char*, but has
51  * range checking.  Attempting to access anything out of range will return a 0
52  * byte.  The input buffer can be cheaply copied, without copying the
53  * underlying memory, however it is the user's responsibility to ensure that
54  * such copies do not persist beyond the lifetime of the underlying memory.
55  *
56  * This also contains methods for reporting errors and for consuming the token
57  * stream.
58  */
59 class input_buffer
60 {
61 	friend class text_input_buffer;
62 	protected:
63 	/**
64 	 * The buffer.  This class doesn't own the buffer, but the
65 	 * mmap_input_buffer subclass does.
66 	 */
67 	const char* buffer;
68 	/**
69 	 * The size of the buffer.
70 	 */
71 	int size;
72 	private:
73 	/**
74 	 * The current place in the buffer where we are reading.  This class
75 	 * keeps a separate size, pointer, and cursor so that we can move
76 	 * forwards and backwards and still have checks that we haven't fallen
77 	 * off either end.
78 	 */
79 	int cursor;
80 	/**
81 	 * Private constructor.  This is used to create input buffers that
82 	 * refer to the same memory, but have different cursors.
83 	 */
input_buffer(const char * b,int s,int c)84 	input_buffer(const char* b, int s, int c) : buffer(b), size(s),
85 		cursor(c) {}
86 	public:
87 	/**
88 	 * Returns the file name associated with this buffer.
89 	 */
filename() const90 	virtual const std::string &filename() const
91 	{
92 		static std::string s;
93 		return s;
94 	}
95 	static std::unique_ptr<input_buffer> buffer_for_file(const std::string &path,
96 	                                                     bool warn=true);
97 	/**
98 	 * Skips all characters in the input until the specified character is
99 	 * encountered.
100 	 */
101 	void skip_to(char);
102 	/**
103 	 * Parses up to a specified character and returns the intervening
104 	 * characters as a string.
105 	 */
106 	std::string parse_to(char);
107 	/**
108 	 * Return whether all input has been consumed.
109 	 */
finished()110 	bool finished() { return cursor >= size; }
111 	/**
112 	 * Virtual destructor.  Does nothing, but exists so that subclasses
113 	 * that own the memory can run cleanup code for deallocating it.
114 	 */
~input_buffer()115 	virtual ~input_buffer() {};
116 	/**
117 	 * Constructs an empty buffer.
118 	 */
input_buffer()119 	input_buffer() : buffer(0), size(0), cursor(0) {}
120 	/**
121 	 * Constructs a new buffer with a specified memory region and size.
122 	 */
input_buffer(const char * b,int s)123 	input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){}
124 	/**
125 	 * Returns a new input buffer referring into this input, clamped to the
126 	 * specified size.  If the requested buffer would fall outside the
127 	 * range of this one, then it returns an empty buffer.
128 	 *
129 	 * The returned buffer shares the same underlying storage as the
130 	 * original.  This is intended to be used for splitting up the various
131 	 * sections of a device tree blob.  Requesting a size of 0 will give a
132 	 * buffer that extends to the end of the available memory.
133 	 */
134 	input_buffer buffer_from_offset(int offset, int s=0);
135 	/**
136 	 * Dereferencing operator, allows the buffer to be treated as a char*
137 	 * and dereferenced to give a character.  This returns a null byte if
138 	 * the cursor is out of range.
139 	 */
operator *()140 	inline char operator*()
141 	{
142 		if (cursor >= size) { return '\0'; }
143 		if (cursor < 0) { return '\0'; }
144 		return buffer[cursor];
145 	}
146 	/**
147 	 * Array subscripting operator, returns a character at the specified
148 	 * index offset from the current cursor.  The offset may be negative,
149 	 * to reread characters that have already been read.  If the current
150 	 * cursor plus offset is outside of the range, this returns a nul
151 	 * byte.
152 	 */
operator [](int offset)153 	inline char operator[](int offset)
154 	{
155 		if (cursor + offset >= size) { return '\0'; }
156 		if (cursor + offset < 0) { return '\0'; }
157 		return buffer[cursor + offset];
158 	}
159 	/**
160 	 * Increments the cursor, iterating forward in the buffer.
161 	 */
operator ++()162 	inline input_buffer &operator++()
163 	{
164 		cursor++;
165 		return *this;
166 	}
begin()167 	const char *begin()
168 	{
169 		return buffer;
170 	}
end()171 	const char *end()
172 	{
173 		return buffer + size;
174 	}
175 	/**
176 	 * Consumes a character.  Moves the cursor one character forward if the
177 	 * next character matches the argument, returning true.  If the current
178 	 * character does not match the argument, returns false.
179 	 */
consume(char c)180 	inline bool consume(char c)
181 	{
182 		if (*(*this) == c)
183 		{
184 			++(*this);
185 			return true;
186 		}
187 		return false;
188 	}
189 	/**
190 	 * Consumes a string.  If the (null-terminated) string passed as the
191 	 * argument appears in the input, advances the cursor to the end and
192 	 * returns true.  Returns false if the string does not appear at the
193 	 * current point in the input.
194 	 */
195 	bool consume(const char *str);
196 	/**
197 	 * Reads unsigned from char literal.  Returns true and advances
198 	 * the cursor to next char.
199 	 *
200 	 * The parsed value is returned via the argument.
201 	 */
202 	bool consume_char_literal(unsigned long long &outInt);
203 	/**
204 	 * Reads an integer in base 8, 10, or 16.  Returns true and advances
205 	 * the cursor to the end of the integer if the cursor points to an
206 	 * integer, returns false and does not move the cursor otherwise.
207 	 *
208 	 * The parsed value is returned via the argument.
209 	 */
210 	bool consume_integer(unsigned long long &outInt);
211 	/**
212 	 * Reads an arithmetic expression (containing any of the normal C
213 	 * operators), evaluates it, and returns the result.
214 	 */
215 	bool consume_integer_expression(unsigned long long &outInt);
216 	/**
217 	 * Consumes two hex digits and return the resulting byte via the first
218 	 * argument.  If the next two characters are hex digits, returns true
219 	 * and advances the cursor.  If not, then returns false and leaves the
220 	 * cursor in place.
221 	 */
222 	bool consume_hex_byte(uint8_t &outByte);
223 	/**
224 	 * Template function that consumes a binary value in big-endian format
225 	 * from the input stream.  Returns true and advances the cursor if
226 	 * there is a value of the correct size.  This function assumes that
227 	 * all values must be natively aligned, and so advances the cursor to
228 	 * the correct alignment before reading.
229 	 */
230 	template<typename T>
consume_binary(T & out)231 	bool consume_binary(T &out)
232 	{
233 		int align = 0;
234 		int type_size = sizeof(T);
235 		if (cursor % type_size != 0)
236 		{
237 			align = type_size - (cursor % type_size);
238 		}
239 		if (size < cursor + align + type_size)
240 		{
241 			return false;
242 		}
243 		cursor += align;
244 		assert(cursor % type_size == 0);
245 		out = 0;
246 		for (int i=0 ; i<type_size ; ++i)
247 		{
248 			if (size < cursor)
249 			{
250 				return false;
251 			}
252 			out <<= 8;
253 			out |= (((T)buffer[cursor++]) & 0xff);
254 		}
255 		return true;
256 	}
257 #ifndef NDEBUG
258 	/**
259 	 * Dumps the current cursor value and the unconsumed values in the
260 	 * input buffer to the standard error.  This method is intended solely
261 	 * for debugging.
262 	 */
263 	void dump();
264 #endif
265 };
266 /**
267  * Explicit specialisation for reading a single byte.
268  */
269 template<>
consume_binary(uint8_t & out)270 inline bool input_buffer::consume_binary(uint8_t &out)
271 {
272 	if (size < cursor + 1)
273 	{
274 		return false;
275 	}
276 	out = buffer[cursor++];
277 	return true;
278 }
279 
280 /**
281  * An input buffer subclass used for parsing DTS files.  This manages a stack
282  * of input buffers to handle /input/ operations.
283  */
284 class text_input_buffer
285 {
286 	std::unordered_set<std::string> defines;
287 	/**
288 	 * The cursor is the input into the input stream where we are currently reading.
289 	 */
290 	int cursor = 0;
291 	/**
292 	 * The current stack of includes.  The current input is always from the top
293 	 * of the stack.
294 	 */
295 	std::stack<std::shared_ptr<input_buffer>> input_stack;
296 	/**
297 	 *
298 	 */
299 	const std::vector<std::string> include_paths;
300 	/**
301 	 * Reads forward past any spaces.  The DTS format is not whitespace
302 	 * sensitive and so we want to scan past whitespace when reading it.
303 	 */
304 	void skip_spaces();
305 	/**
306 	 * Returns the character immediately after the current one.
307 	 *
308 	 * This method does not look between files.
309 	 */
310 	char peek();
311 	/**
312 	 * If a /include/ token is encountered, then look up the corresponding
313 	 * input file, push it onto the input stack, and continue.
314 	 */
315 	void handle_include();
316 	/**
317 	 * The base directory for this file.
318 	 */
319 	const std::string dir;
320 	/**
321 	 * The file where dependencies should be output.
322 	 */
323 	FILE *depfile;
324 	public:
325 	/**
326 	 * Construct a new text input buffer with the specified buffer as the start
327 	 * of parsing and the specified set of input paths for handling new
328 	 * inclusions.
329 	 */
text_input_buffer(std::unique_ptr<input_buffer> && b,std::unordered_set<std::string> && d,std::vector<std::string> && i,const std::string directory,FILE * deps)330 	text_input_buffer(std::unique_ptr<input_buffer> &&b,
331 	                  std::unordered_set<std::string> &&d,
332 	                  std::vector<std::string> &&i,
333 	                  const std::string directory,
334 	                  FILE *deps)
335 		: defines(d), include_paths(i), dir(directory), depfile(deps)
336 	{
337 		input_stack.push(std::move(b));
338 	}
339 	/**
340 	 * Skips all characters in the input until the specified character is
341 	 * encountered.
342 	 */
343 	void skip_to(char);
344 	/**
345 	 * Parse an expression.  If `stopAtParen` is set, then only parse a number
346 	 * or a parenthetical expression, otherwise assume that either is the
347 	 * left-hand side of a binary expression and try to parse the right-hand
348 	 * side.
349 	 */
350 	expression_ptr parse_expression(bool stopAtParen=false);
351 	/**
352 	 * Parse a binary expression, having already parsed the right-hand side.
353 	 */
354 	expression_ptr parse_binary_expression(expression_ptr lhs);
355 	/**
356 	 * Return whether all input has been consumed.
357 	 */
finished()358 	bool finished()
359 	{
360 		return input_stack.empty() ||
361 			((input_stack.size() == 1) && input_stack.top()->finished());
362 	}
363 	/**
364 	 * Dereferencing operator.  Returns the current character in the top input buffer.
365 	 */
operator *()366 	inline char operator*()
367 	{
368 		if (input_stack.empty())
369 		{
370 			return 0;
371 		}
372 		return *(*input_stack.top());
373 	}
374 	/**
375 	 * Increments the cursor, iterating forward in the buffer.
376 	 */
operator ++()377 	inline text_input_buffer &operator++()
378 	{
379 		if (input_stack.empty())
380 		{
381 			return *this;
382 		}
383 		cursor++;
384 		auto &top = *input_stack.top();
385 		++top;
386 		if (top.finished())
387 		{
388 			input_stack.pop();
389 		}
390 		return *this;
391 	}
392 	/**
393 	 * Consumes a character.  Moves the cursor one character forward if the
394 	 * next character matches the argument, returning true.  If the current
395 	 * character does not match the argument, returns false.
396 	 */
consume(char c)397 	inline bool consume(char c)
398 	{
399 		if (*(*this) == c)
400 		{
401 			++(*this);
402 			return true;
403 		}
404 		return false;
405 	}
406 	/**
407 	 * Consumes a string.  If the (null-terminated) string passed as the
408 	 * argument appears in the input, advances the cursor to the end and
409 	 * returns true.  Returns false if the string does not appear at the
410 	 * current point in the input.
411 	 *
412 	 * This method does not scan between files.
413 	 */
consume(const char * str)414 	bool consume(const char *str)
415 	{
416 		if (input_stack.empty())
417 		{
418 			return false;
419 		}
420 		return input_stack.top()->consume(str);
421 	}
422 	/**
423 	 * Converts next char into unsigned
424 	 *
425 	 * The parsed value is returned via the argument.
426 	 *
427 	 * This method does not scan between files.
428 	 */
consume_char_literal(unsigned long long & outInt)429 	bool consume_char_literal(unsigned long long &outInt)
430 	{
431 		if (input_stack.empty())
432 		{
433 			return false;
434 		}
435 		return input_stack.top()->consume_char_literal(outInt);
436 	}
437 	/**
438 	 * Reads an integer in base 8, 10, or 16.  Returns true and advances
439 	 * the cursor to the end of the integer if the cursor points to an
440 	 * integer, returns false and does not move the cursor otherwise.
441 	 *
442 	 * The parsed value is returned via the argument.
443 	 *
444 	 * This method does not scan between files.
445 	 */
consume_integer(unsigned long long & outInt)446 	bool consume_integer(unsigned long long &outInt)
447 	{
448 		if (input_stack.empty())
449 		{
450 			return false;
451 		}
452 		return input_stack.top()->consume_integer(outInt);
453 	}
454 	/**
455 	 * Reads an arithmetic expression (containing any of the normal C
456 	 * operators), evaluates it, and returns the result.
457 	 */
458 	bool consume_integer_expression(unsigned long long &outInt);
459 	/**
460 	 * Consumes two hex digits and return the resulting byte via the first
461 	 * argument.  If the next two characters are hex digits, returns true
462 	 * and advances the cursor.  If not, then returns false and leaves the
463 	 * cursor in place.
464 	 *
465 	 * This method does not scan between files.
466 	 */
consume_hex_byte(uint8_t & outByte)467 	bool consume_hex_byte(uint8_t &outByte)
468 	{
469 		if (input_stack.empty())
470 		{
471 			return false;
472 		}
473 		return input_stack.top()->consume_hex_byte(outByte);
474 	}
475 	/**
476 	 * Returns the longest string in the input buffer starting at the
477 	 * current cursor and composed entirely of characters that are valid in
478 	 * node names.
479 	*/
480 	std::string parse_node_name();
481 	/**
482 	 * Returns the longest string in the input buffer starting at the
483 	 * current cursor and composed entirely of characters that are valid in
484 	 * property names.
485 	 */
486 	std::string parse_property_name();
487 	/**
488 	 * Parses either a node or a property name.  If is_property is true on
489 	 * entry, then only property names are parsed.  If it is false, then it
490 	 * will be set, on return, to indicate whether the parsed name is only
491 	 * valid as a property.
492 	 */
493 	std::string parse_node_or_property_name(bool &is_property);
494 	/**
495 	 * Parses up to a specified character and returns the intervening
496 	 * characters as a string.
497 	 */
498 	std::string parse_to(char);
499 	/**
500 	 * Advances the cursor to the start of the next token, skipping
501 	 * comments and whitespace.  If the cursor already points to the start
502 	 * of a token, then this function does nothing.
503 	 */
504 	text_input_buffer &next_token();
505 	/**
506 	 * Location in the source file.  This should never be interpreted by
507 	 * anything other than error reporting functions of this class.  It will
508 	 * eventually become something more complex than an `int`.
509 	 */
510 	class source_location
511 	{
512 		friend class text_input_buffer;
513 		/**
514 		 * The text buffer object that included `b`.
515 		 */
516 		text_input_buffer &buffer;
517 		/**
518 		 * The underlying buffer that contains this location.
519 		 */
520 		std::shared_ptr<input_buffer> b;
521 		/**
522 		 * The offset within the current buffer of the source location.
523 		 */
524 		int cursor;
source_location(text_input_buffer & buf)525 		source_location(text_input_buffer &buf)
526 			: buffer(buf),
527 			  b(buf.input_stack.empty() ? nullptr : buf.input_stack.top()),
528 			  cursor(b ? b->cursor : 0) {}
529 		public:
530 		/**
531 		 * Report an error at this location.
532 		 */
report_error(const char * msg)533 		void report_error(const char *msg)
534 		{
535 			if (b)
536 			{
537 				buffer.parse_error(msg, *b, cursor);
538 			}
539 			else
540 			{
541 				buffer.parse_error(msg);
542 			}
543 		}
544 	};
545 	/**
546 	 * Returns the current source location.
547 	 */
location()548 	source_location location()
549 	{
550 		return { *this };
551 	}
552 	/**
553 	 * Prints a message indicating the location of a parse error.
554 	 */
555 	void parse_error(const char *msg);
556 	/**
557 	 * Reads the contents of a binary file into `b`.  The file name is assumed
558 	 * to be relative to one of the include paths.
559 	 *
560 	 * Returns true if the file exists and can be read, false otherwise.
561 	 */
562 	bool read_binary_file(const std::string &filename, byte_buffer &b);
563 	private:
564 	/**
565 	 * Prints a message indicating the location of a parse error, given a
566 	 * specified location.  This is used when input has already moved beyond
567 	 * the location that caused the failure.
568 	 */
569 	void parse_error(const char *msg, input_buffer &b, int loc);
570 };
571 
572 } // namespace dtc
573 
574 #endif // !_INPUT_BUFFER_HH_
575