xref: /freebsd/usr.bin/dtc/input_buffer.hh (revision a64729f5077d77e13b9497cb33ecb3c82e606ee8)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 David Chisnall
5  * All rights reserved.
6  *
7  * This software was developed by SRI International and the University of
8  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
9  * ("CTSRD"), as part of the DARPA CRASH research programme.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #ifndef _INPUT_BUFFER_HH_
34 #define _INPUT_BUFFER_HH_
35 #include "util.hh"
36 #include <assert.h>
37 #include <stack>
38 #include <string>
39 #include <unordered_set>
40 
41 namespace dtc
42 {
43 
44 namespace {
45 struct expression;
46 typedef std::unique_ptr<expression> expression_ptr;
47 }
48 
49 /**
50  * Class encapsulating the input file.  Can be used as a const char*, but has
51  * range checking.  Attempting to access anything out of range will return a 0
52  * byte.  The input buffer can be cheaply copied, without copying the
53  * underlying memory, however it is the user's responsibility to ensure that
54  * such copies do not persist beyond the lifetime of the underlying memory.
55  *
56  * This also contains methods for reporting errors and for consuming the token
57  * stream.
58  */
59 class input_buffer
60 {
61 	friend class text_input_buffer;
62 	protected:
63 	/**
64 	 * The buffer.  This class doesn't own the buffer, but the
65 	 * mmap_input_buffer subclass does.
66 	 */
67 	const char* buffer;
68 	/**
69 	 * The size of the buffer.
70 	 */
71 	int size;
72 	private:
73 	/**
74 	 * The current place in the buffer where we are reading.  This class
75 	 * keeps a separate size, pointer, and cursor so that we can move
76 	 * forwards and backwards and still have checks that we haven't fallen
77 	 * off either end.
78 	 */
79 	int cursor;
80 	/**
81 	 * Private constructor.  This is used to create input buffers that
82 	 * refer to the same memory, but have different cursors.
83 	 */
84 	input_buffer(const char* b, int s, int c) : buffer(b), size(s),
85 		cursor(c) {}
86 	public:
87 	/**
88 	 * Returns the file name associated with this buffer.
89 	 */
90 	virtual const std::string &filename() const
91 	{
92 		static std::string s;
93 		return s;
94 	}
95 	static std::unique_ptr<input_buffer> buffer_for_file(const std::string &path,
96 	                                                     bool warn=true);
97 	/**
98 	 * Skips all characters in the input until the specified character is
99 	 * encountered.
100 	 */
101 	void skip_to(char);
102 	/**
103 	 * Parses up to a specified character and returns the intervening
104 	 * characters as a string.
105 	 */
106 	std::string parse_to(char);
107 	/**
108 	 * Return whether all input has been consumed.
109 	 */
110 	bool finished() { return cursor >= size; }
111 	/**
112 	 * Virtual destructor.  Does nothing, but exists so that subclasses
113 	 * that own the memory can run cleanup code for deallocating it.
114 	 */
115 	virtual ~input_buffer() {};
116 	/**
117 	 * Constructs an empty buffer.
118 	 */
119 	input_buffer() : buffer(0), size(0), cursor(0) {}
120 	/**
121 	 * Constructs a new buffer with a specified memory region and size.
122 	 */
123 	input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){}
124 	/**
125 	 * Returns a new input buffer referring into this input, clamped to the
126 	 * specified size.  If the requested buffer would fall outside the
127 	 * range of this one, then it returns an empty buffer.
128 	 *
129 	 * The returned buffer shares the same underlying storage as the
130 	 * original.  This is intended to be used for splitting up the various
131 	 * sections of a device tree blob.  Requesting a size of 0 will give a
132 	 * buffer that extends to the end of the available memory.
133 	 */
134 	input_buffer buffer_from_offset(int offset, int s=0);
135 	/**
136 	 * Dereferencing operator, allows the buffer to be treated as a char*
137 	 * and dereferenced to give a character.  This returns a null byte if
138 	 * the cursor is out of range.
139 	 */
140 	inline char operator*()
141 	{
142 		if (cursor >= size) { return '\0'; }
143 		if (cursor < 0) { return '\0'; }
144 		return buffer[cursor];
145 	}
146 	/**
147 	 * Array subscripting operator, returns a character at the specified
148 	 * index offset from the current cursor.  The offset may be negative,
149 	 * to reread characters that have already been read.  If the current
150 	 * cursor plus offset is outside of the range, this returns a nul
151 	 * byte.
152 	 */
153 	inline char operator[](int offset)
154 	{
155 		if (cursor + offset >= size) { return '\0'; }
156 		if (cursor + offset < 0) { return '\0'; }
157 		return buffer[cursor + offset];
158 	}
159 	/**
160 	 * Increments the cursor, iterating forward in the buffer.
161 	 */
162 	inline input_buffer &operator++()
163 	{
164 		cursor++;
165 		return *this;
166 	}
167 	const char *begin()
168 	{
169 		return buffer;
170 	}
171 	const char *end()
172 	{
173 		return buffer + size;
174 	}
175 	/**
176 	 * Consumes a character.  Moves the cursor one character forward if the
177 	 * next character matches the argument, returning true.  If the current
178 	 * character does not match the argument, returns false.
179 	 */
180 	inline bool consume(char c)
181 	{
182 		if (*(*this) == c)
183 		{
184 			++(*this);
185 			return true;
186 		}
187 		return false;
188 	}
189 	/**
190 	 * Consumes a string.  If the (null-terminated) string passed as the
191 	 * argument appears in the input, advances the cursor to the end and
192 	 * returns true.  Returns false if the string does not appear at the
193 	 * current point in the input.
194 	 */
195 	bool consume(const char *str);
196 	/**
197 	 * Reads an integer in base 8, 10, or 16.  Returns true and advances
198 	 * the cursor to the end of the integer if the cursor points to an
199 	 * integer, returns false and does not move the cursor otherwise.
200 	 *
201 	 * The parsed value is returned via the argument.
202 	 */
203 	bool consume_integer(unsigned long long &outInt);
204 	/**
205 	 * Reads an arithmetic expression (containing any of the normal C
206 	 * operators), evaluates it, and returns the result.
207 	 */
208 	bool consume_integer_expression(unsigned long long &outInt);
209 	/**
210 	 * Consumes two hex digits and return the resulting byte via the first
211 	 * argument.  If the next two characters are hex digits, returns true
212 	 * and advances the cursor.  If not, then returns false and leaves the
213 	 * cursor in place.
214 	 */
215 	bool consume_hex_byte(uint8_t &outByte);
216 	/**
217 	 * Template function that consumes a binary value in big-endian format
218 	 * from the input stream.  Returns true and advances the cursor if
219 	 * there is a value of the correct size.  This function assumes that
220 	 * all values must be natively aligned, and so advances the cursor to
221 	 * the correct alignment before reading.
222 	 */
223 	template<typename T>
224 	bool consume_binary(T &out)
225 	{
226 		int align = 0;
227 		int type_size = sizeof(T);
228 		if (cursor % type_size != 0)
229 		{
230 			align = type_size - (cursor % type_size);
231 		}
232 		if (size < cursor + align + type_size)
233 		{
234 			return false;
235 		}
236 		cursor += align;
237 		assert(cursor % type_size == 0);
238 		out = 0;
239 		for (int i=0 ; i<type_size ; ++i)
240 		{
241 			if (size < cursor)
242 			{
243 				return false;
244 			}
245 			out <<= 8;
246 			out |= (((T)buffer[cursor++]) & 0xff);
247 		}
248 		return true;
249 	}
250 #ifndef NDEBUG
251 	/**
252 	 * Dumps the current cursor value and the unconsumed values in the
253 	 * input buffer to the standard error.  This method is intended solely
254 	 * for debugging.
255 	 */
256 	void dump();
257 #endif
258 };
259 /**
260  * Explicit specialisation for reading a single byte.
261  */
262 template<>
263 inline bool input_buffer::consume_binary(uint8_t &out)
264 {
265 	if (size < cursor + 1)
266 	{
267 		return false;
268 	}
269 	out = buffer[cursor++];
270 	return true;
271 }
272 
273 /**
274  * An input buffer subclass used for parsing DTS files.  This manages a stack
275  * of input buffers to handle /input/ operations.
276  */
277 class text_input_buffer
278 {
279 	std::unordered_set<std::string> defines;
280 	/**
281 	 * The cursor is the input into the input stream where we are currently reading.
282 	 */
283 	int cursor = 0;
284 	/**
285 	 * The current stack of includes.  The current input is always from the top
286 	 * of the stack.
287 	 */
288 	std::stack<std::shared_ptr<input_buffer>> input_stack;
289 	/**
290 	 *
291 	 */
292 	const std::vector<std::string> include_paths;
293 	/**
294 	 * Reads forward past any spaces.  The DTS format is not whitespace
295 	 * sensitive and so we want to scan past whitespace when reading it.
296 	 */
297 	void skip_spaces();
298 	/**
299 	 * Returns the character immediately after the current one.
300 	 *
301 	 * This method does not look between files.
302 	 */
303 	char peek();
304 	/**
305 	 * If a /include/ token is encountered, then look up the corresponding
306 	 * input file, push it onto the input stack, and continue.
307 	 */
308 	void handle_include();
309 	/**
310 	 * The base directory for this file.
311 	 */
312 	const std::string dir;
313 	/**
314 	 * The file where dependencies should be output.
315 	 */
316 	FILE *depfile;
317 	public:
318 	/**
319 	 * Construct a new text input buffer with the specified buffer as the start
320 	 * of parsing and the specified set of input paths for handling new
321 	 * inclusions.
322 	 */
323 	text_input_buffer(std::unique_ptr<input_buffer> &&b,
324 	                  std::unordered_set<std::string> &&d,
325 	                  std::vector<std::string> &&i,
326 	                  const std::string directory,
327 	                  FILE *deps)
328 		: defines(d), include_paths(i), dir(directory), depfile(deps)
329 	{
330 		input_stack.push(std::move(b));
331 	}
332 	/**
333 	 * Skips all characters in the input until the specified character is
334 	 * encountered.
335 	 */
336 	void skip_to(char);
337 	/**
338 	 * Parse an expression.  If `stopAtParen` is set, then only parse a number
339 	 * or a parenthetical expression, otherwise assume that either is the
340 	 * left-hand side of a binary expression and try to parse the right-hand
341 	 * side.
342 	 */
343 	expression_ptr parse_expression(bool stopAtParen=false);
344 	/**
345 	 * Parse a binary expression, having already parsed the right-hand side.
346 	 */
347 	expression_ptr parse_binary_expression(expression_ptr lhs);
348 	/**
349 	 * Return whether all input has been consumed.
350 	 */
351 	bool finished()
352 	{
353 		return input_stack.empty() ||
354 			((input_stack.size() == 1) && input_stack.top()->finished());
355 	}
356 	/**
357 	 * Dereferencing operator.  Returns the current character in the top input buffer.
358 	 */
359 	inline char operator*()
360 	{
361 		if (input_stack.empty())
362 		{
363 			return 0;
364 		}
365 		return *(*input_stack.top());
366 	}
367 	/**
368 	 * Increments the cursor, iterating forward in the buffer.
369 	 */
370 	inline text_input_buffer &operator++()
371 	{
372 		if (input_stack.empty())
373 		{
374 			return *this;
375 		}
376 		cursor++;
377 		auto &top = *input_stack.top();
378 		++top;
379 		if (top.finished())
380 		{
381 			input_stack.pop();
382 		}
383 		return *this;
384 	}
385 	/**
386 	 * Consumes a character.  Moves the cursor one character forward if the
387 	 * next character matches the argument, returning true.  If the current
388 	 * character does not match the argument, returns false.
389 	 */
390 	inline bool consume(char c)
391 	{
392 		if (*(*this) == c)
393 		{
394 			++(*this);
395 			return true;
396 		}
397 		return false;
398 	}
399 	/**
400 	 * Consumes a string.  If the (null-terminated) string passed as the
401 	 * argument appears in the input, advances the cursor to the end and
402 	 * returns true.  Returns false if the string does not appear at the
403 	 * current point in the input.
404 	 *
405 	 * This method does not scan between files.
406 	 */
407 	bool consume(const char *str)
408 	{
409 		if (input_stack.empty())
410 		{
411 			return false;
412 		}
413 		return input_stack.top()->consume(str);
414 	}
415 	/**
416 	 * Reads an integer in base 8, 10, or 16.  Returns true and advances
417 	 * the cursor to the end of the integer if the cursor points to an
418 	 * integer, returns false and does not move the cursor otherwise.
419 	 *
420 	 * The parsed value is returned via the argument.
421 	 *
422 	 * This method does not scan between files.
423 	 */
424 	bool consume_integer(unsigned long long &outInt)
425 	{
426 		if (input_stack.empty())
427 		{
428 			return false;
429 		}
430 		return input_stack.top()->consume_integer(outInt);
431 	}
432 	/**
433 	 * Reads an arithmetic expression (containing any of the normal C
434 	 * operators), evaluates it, and returns the result.
435 	 */
436 	bool consume_integer_expression(unsigned long long &outInt);
437 	/**
438 	 * Consumes two hex digits and return the resulting byte via the first
439 	 * argument.  If the next two characters are hex digits, returns true
440 	 * and advances the cursor.  If not, then returns false and leaves the
441 	 * cursor in place.
442 	 *
443 	 * This method does not scan between files.
444 	 */
445 	bool consume_hex_byte(uint8_t &outByte)
446 	{
447 		if (input_stack.empty())
448 		{
449 			return false;
450 		}
451 		return input_stack.top()->consume_hex_byte(outByte);
452 	}
453 	/**
454 	 * Returns the longest string in the input buffer starting at the
455 	 * current cursor and composed entirely of characters that are valid in
456 	 * node names.
457 	*/
458 	std::string parse_node_name();
459 	/**
460 	 * Returns the longest string in the input buffer starting at the
461 	 * current cursor and composed entirely of characters that are valid in
462 	 * property names.
463 	 */
464 	std::string parse_property_name();
465 	/**
466 	 * Parses either a node or a property name.  If is_property is true on
467 	 * entry, then only property names are parsed.  If it is false, then it
468 	 * will be set, on return, to indicate whether the parsed name is only
469 	 * valid as a property.
470 	 */
471 	std::string parse_node_or_property_name(bool &is_property);
472 	/**
473 	 * Parses up to a specified character and returns the intervening
474 	 * characters as a string.
475 	 */
476 	std::string parse_to(char);
477 	/**
478 	 * Advances the cursor to the start of the next token, skipping
479 	 * comments and whitespace.  If the cursor already points to the start
480 	 * of a token, then this function does nothing.
481 	 */
482 	text_input_buffer &next_token();
483 	/**
484 	 * Location in the source file.  This should never be interpreted by
485 	 * anything other than error reporting functions of this class.  It will
486 	 * eventually become something more complex than an `int`.
487 	 */
488 	class source_location
489 	{
490 		friend class text_input_buffer;
491 		/**
492 		 * The text buffer object that included `b`.
493 		 */
494 		text_input_buffer &buffer;
495 		/**
496 		 * The underlying buffer that contains this location.
497 		 */
498 		std::shared_ptr<input_buffer> b;
499 		/**
500 		 * The offset within the current buffer of the source location.
501 		 */
502 		int cursor;
503 		source_location(text_input_buffer &buf)
504 			: buffer(buf),
505 			  b(buf.input_stack.empty() ? nullptr : buf.input_stack.top()),
506 			  cursor(b ? b->cursor : 0) {}
507 		public:
508 		/**
509 		 * Report an error at this location.
510 		 */
511 		void report_error(const char *msg)
512 		{
513 			if (b)
514 			{
515 				buffer.parse_error(msg, *b, cursor);
516 			}
517 			else
518 			{
519 				buffer.parse_error(msg);
520 			}
521 		}
522 	};
523 	/**
524 	 * Returns the current source location.
525 	 */
526 	source_location location()
527 	{
528 		return { *this };
529 	}
530 	/**
531 	 * Prints a message indicating the location of a parse error.
532 	 */
533 	void parse_error(const char *msg);
534 	/**
535 	 * Reads the contents of a binary file into `b`.  The file name is assumed
536 	 * to be relative to one of the include paths.
537 	 *
538 	 * Returns true if the file exists and can be read, false otherwise.
539 	 */
540 	bool read_binary_file(const std::string &filename, byte_buffer &b);
541 	private:
542 	/**
543 	 * Prints a message indicating the location of a parse error, given a
544 	 * specified location.  This is used when input has already moved beyond
545 	 * the location that caused the failure.
546 	 */
547 	void parse_error(const char *msg, input_buffer &b, int loc);
548 };
549 
550 } // namespace dtc
551 
552 #endif // !_INPUT_BUFFER_HH_
553