xref: /freebsd/usr.bin/dtc/input_buffer.hh (revision 6829dae12bb055451fa467da4589c43bd03b1e64)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2013 David Chisnall
5  * All rights reserved.
6  *
7  * This software was developed by SRI International and the University of
8  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
9  * ("CTSRD"), as part of the DARPA CRASH research programme.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $FreeBSD$
33  */
34 
35 #ifndef _INPUT_BUFFER_HH_
36 #define _INPUT_BUFFER_HH_
37 #include "util.hh"
38 #include <assert.h>
39 #include <stack>
40 #include <string>
41 #include <unordered_set>
42 
43 namespace dtc
44 {
45 
46 namespace {
47 struct expression;
48 typedef std::unique_ptr<expression> expression_ptr;
49 }
50 
51 /**
52  * Class encapsulating the input file.  Can be used as a const char*, but has
53  * range checking.  Attempting to access anything out of range will return a 0
54  * byte.  The input buffer can be cheaply copied, without copying the
55  * underlying memory, however it is the user's responsibility to ensure that
56  * such copies do not persist beyond the lifetime of the underlying memory.
57  *
58  * This also contains methods for reporting errors and for consuming the token
59  * stream.
60  */
61 class input_buffer
62 {
63 	friend class text_input_buffer;
64 	protected:
65 	/**
66 	 * The buffer.  This class doesn't own the buffer, but the
67 	 * mmap_input_buffer subclass does.
68 	 */
69 	const char* buffer;
70 	/**
71 	 * The size of the buffer.
72 	 */
73 	int size;
74 	private:
75 	/**
76 	 * The current place in the buffer where we are reading.  This class
77 	 * keeps a separate size, pointer, and cursor so that we can move
78 	 * forwards and backwards and still have checks that we haven't fallen
79 	 * off either end.
80 	 */
81 	int cursor;
82 	/**
83 	 * Private constructor.  This is used to create input buffers that
84 	 * refer to the same memory, but have different cursors.
85 	 */
86 	input_buffer(const char* b, int s, int c) : buffer(b), size(s),
87 		cursor(c) {}
88 	public:
89 	/**
90 	 * Returns the file name associated with this buffer.
91 	 */
92 	virtual const std::string &filename() const
93 	{
94 		static std::string s;
95 		return s;
96 	}
97 	static std::unique_ptr<input_buffer> buffer_for_file(const std::string &path,
98 	                                                     bool warn=true);
99 	/**
100 	 * Skips all characters in the input until the specified character is
101 	 * encountered.
102 	 */
103 	void skip_to(char);
104 	/**
105 	 * Parses up to a specified character and returns the intervening
106 	 * characters as a string.
107 	 */
108 	std::string parse_to(char);
109 	/**
110 	 * Return whether all input has been consumed.
111 	 */
112 	bool finished() { return cursor >= size; }
113 	/**
114 	 * Virtual destructor.  Does nothing, but exists so that subclasses
115 	 * that own the memory can run cleanup code for deallocating it.
116 	 */
117 	virtual ~input_buffer() {};
118 	/**
119 	 * Constructs an empty buffer.
120 	 */
121 	input_buffer() : buffer(0), size(0), cursor(0) {}
122 	/**
123 	 * Constructs a new buffer with a specified memory region and size.
124 	 */
125 	input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){}
126 	/**
127 	 * Returns a new input buffer referring into this input, clamped to the
128 	 * specified size.  If the requested buffer would fall outside the
129 	 * range of this one, then it returns an empty buffer.
130 	 *
131 	 * The returned buffer shares the same underlying storage as the
132 	 * original.  This is intended to be used for splitting up the various
133 	 * sections of a device tree blob.  Requesting a size of 0 will give a
134 	 * buffer that extends to the end of the available memory.
135 	 */
136 	input_buffer buffer_from_offset(int offset, int s=0);
137 	/**
138 	 * Dereferencing operator, allows the buffer to be treated as a char*
139 	 * and dereferenced to give a character.  This returns a null byte if
140 	 * the cursor is out of range.
141 	 */
142 	inline char operator*()
143 	{
144 		if (cursor >= size) { return '\0'; }
145 		if (cursor < 0) { return '\0'; }
146 		return buffer[cursor];
147 	}
148 	/**
149 	 * Array subscripting operator, returns a character at the specified
150 	 * index offset from the current cursor.  The offset may be negative,
151 	 * to reread characters that have already been read.  If the current
152 	 * cursor plus offset is outside of the range, this returns a nul
153 	 * byte.
154 	 */
155 	inline char operator[](int offset)
156 	{
157 		if (cursor + offset >= size) { return '\0'; }
158 		if (cursor + offset < 0) { return '\0'; }
159 		return buffer[cursor + offset];
160 	}
161 	/**
162 	 * Increments the cursor, iterating forward in the buffer.
163 	 */
164 	inline input_buffer &operator++()
165 	{
166 		cursor++;
167 		return *this;
168 	}
169 	const char *begin()
170 	{
171 		return buffer;
172 	}
173 	const char *end()
174 	{
175 		return buffer + size;
176 	}
177 	/**
178 	 * Consumes a character.  Moves the cursor one character forward if the
179 	 * next character matches the argument, returning true.  If the current
180 	 * character does not match the argument, returns false.
181 	 */
182 	inline bool consume(char c)
183 	{
184 		if (*(*this) == c)
185 		{
186 			++(*this);
187 			return true;
188 		}
189 		return false;
190 	}
191 	/**
192 	 * Consumes a string.  If the (null-terminated) string passed as the
193 	 * argument appears in the input, advances the cursor to the end and
194 	 * returns true.  Returns false if the string does not appear at the
195 	 * current point in the input.
196 	 */
197 	bool consume(const char *str);
198 	/**
199 	 * Reads an integer in base 8, 10, or 16.  Returns true and advances
200 	 * the cursor to the end of the integer if the cursor points to an
201 	 * integer, returns false and does not move the cursor otherwise.
202 	 *
203 	 * The parsed value is returned via the argument.
204 	 */
205 	bool consume_integer(unsigned long long &outInt);
206 	/**
207 	 * Reads an arithmetic expression (containing any of the normal C
208 	 * operators), evaluates it, and returns the result.
209 	 */
210 	bool consume_integer_expression(unsigned long long &outInt);
211 	/**
212 	 * Consumes two hex digits and return the resulting byte via the first
213 	 * argument.  If the next two characters are hex digits, returns true
214 	 * and advances the cursor.  If not, then returns false and leaves the
215 	 * cursor in place.
216 	 */
217 	bool consume_hex_byte(uint8_t &outByte);
218 	/**
219 	 * Template function that consumes a binary value in big-endian format
220 	 * from the input stream.  Returns true and advances the cursor if
221 	 * there is a value of the correct size.  This function assumes that
222 	 * all values must be natively aligned, and so advances the cursor to
223 	 * the correct alignment before reading.
224 	 */
225 	template<typename T>
226 	bool consume_binary(T &out)
227 	{
228 		int align = 0;
229 		int type_size = sizeof(T);
230 		if (cursor % type_size != 0)
231 		{
232 			align = type_size - (cursor % type_size);
233 		}
234 		if (size < cursor + align + type_size)
235 		{
236 			return false;
237 		}
238 		cursor += align;
239 		assert(cursor % type_size == 0);
240 		out = 0;
241 		for (int i=0 ; i<type_size ; ++i)
242 		{
243 			if (size < cursor)
244 			{
245 				return false;
246 			}
247 			out <<= 8;
248 			out |= (((T)buffer[cursor++]) & 0xff);
249 		}
250 		return true;
251 	}
252 #ifndef NDEBUG
253 	/**
254 	 * Dumps the current cursor value and the unconsumed values in the
255 	 * input buffer to the standard error.  This method is intended solely
256 	 * for debugging.
257 	 */
258 	void dump();
259 #endif
260 };
261 /**
262  * Explicit specialisation for reading a single byte.
263  */
264 template<>
265 inline bool input_buffer::consume_binary(uint8_t &out)
266 {
267 	if (size < cursor + 1)
268 	{
269 		return false;
270 	}
271 	out = buffer[cursor++];
272 	return true;
273 }
274 
275 /**
276  * An input buffer subclass used for parsing DTS files.  This manages a stack
277  * of input buffers to handle /input/ operations.
278  */
279 class text_input_buffer
280 {
281 	std::unordered_set<std::string> defines;
282 	/**
283 	 * The cursor is the input into the input stream where we are currently reading.
284 	 */
285 	int cursor = 0;
286 	/**
287 	 * The current stack of includes.  The current input is always from the top
288 	 * of the stack.
289 	 */
290 	std::stack<std::shared_ptr<input_buffer>> input_stack;
291 	/**
292 	 *
293 	 */
294 	const std::vector<std::string> include_paths;
295 	/**
296 	 * Reads forward past any spaces.  The DTS format is not whitespace
297 	 * sensitive and so we want to scan past whitespace when reading it.
298 	 */
299 	void skip_spaces();
300 	/**
301 	 * Returns the character immediately after the current one.
302 	 *
303 	 * This method does not look between files.
304 	 */
305 	char peek();
306 	/**
307 	 * If a /include/ token is encountered, then look up the corresponding
308 	 * input file, push it onto the input stack, and continue.
309 	 */
310 	void handle_include();
311 	/**
312 	 * The base directory for this file.
313 	 */
314 	const std::string dir;
315 	/**
316 	 * The file where dependencies should be output.
317 	 */
318 	FILE *depfile;
319 	public:
320 	/**
321 	 * Construct a new text input buffer with the specified buffer as the start
322 	 * of parsing and the specified set of input paths for handling new
323 	 * inclusions.
324 	 */
325 	text_input_buffer(std::unique_ptr<input_buffer> &&b,
326 	                  std::unordered_set<std::string> &&d,
327 	                  std::vector<std::string> &&i,
328 	                  const std::string directory,
329 	                  FILE *deps)
330 		: defines(d), include_paths(i), dir(directory), depfile(deps)
331 	{
332 		input_stack.push(std::move(b));
333 	}
334 	/**
335 	 * Skips all characters in the input until the specified character is
336 	 * encountered.
337 	 */
338 	void skip_to(char);
339 	/**
340 	 * Parse an expression.  If `stopAtParen` is set, then only parse a number
341 	 * or a parenthetical expression, otherwise assume that either is the
342 	 * left-hand side of a binary expression and try to parse the right-hand
343 	 * side.
344 	 */
345 	expression_ptr parse_expression(bool stopAtParen=false);
346 	/**
347 	 * Parse a binary expression, having already parsed the right-hand side.
348 	 */
349 	expression_ptr parse_binary_expression(expression_ptr lhs);
350 	/**
351 	 * Return whether all input has been consumed.
352 	 */
353 	bool finished()
354 	{
355 		return input_stack.empty() ||
356 			((input_stack.size() == 1) && input_stack.top()->finished());
357 	}
358 	/**
359 	 * Dereferencing operator.  Returns the current character in the top input buffer.
360 	 */
361 	inline char operator*()
362 	{
363 		if (input_stack.empty())
364 		{
365 			return 0;
366 		}
367 		return *(*input_stack.top());
368 	}
369 	/**
370 	 * Increments the cursor, iterating forward in the buffer.
371 	 */
372 	inline text_input_buffer &operator++()
373 	{
374 		if (input_stack.empty())
375 		{
376 			return *this;
377 		}
378 		cursor++;
379 		auto &top = *input_stack.top();
380 		++top;
381 		if (top.finished())
382 		{
383 			input_stack.pop();
384 		}
385 		return *this;
386 	}
387 	/**
388 	 * Consumes a character.  Moves the cursor one character forward if the
389 	 * next character matches the argument, returning true.  If the current
390 	 * character does not match the argument, returns false.
391 	 */
392 	inline bool consume(char c)
393 	{
394 		if (*(*this) == c)
395 		{
396 			++(*this);
397 			return true;
398 		}
399 		return false;
400 	}
401 	/**
402 	 * Consumes a string.  If the (null-terminated) string passed as the
403 	 * argument appears in the input, advances the cursor to the end and
404 	 * returns true.  Returns false if the string does not appear at the
405 	 * current point in the input.
406 	 *
407 	 * This method does not scan between files.
408 	 */
409 	bool consume(const char *str)
410 	{
411 		if (input_stack.empty())
412 		{
413 			return false;
414 		}
415 		return input_stack.top()->consume(str);
416 	}
417 	/**
418 	 * Reads an integer in base 8, 10, or 16.  Returns true and advances
419 	 * the cursor to the end of the integer if the cursor points to an
420 	 * integer, returns false and does not move the cursor otherwise.
421 	 *
422 	 * The parsed value is returned via the argument.
423 	 *
424 	 * This method does not scan between files.
425 	 */
426 	bool consume_integer(unsigned long long &outInt)
427 	{
428 		if (input_stack.empty())
429 		{
430 			return false;
431 		}
432 		return input_stack.top()->consume_integer(outInt);
433 	}
434 	/**
435 	 * Reads an arithmetic expression (containing any of the normal C
436 	 * operators), evaluates it, and returns the result.
437 	 */
438 	bool consume_integer_expression(unsigned long long &outInt);
439 	/**
440 	 * Consumes two hex digits and return the resulting byte via the first
441 	 * argument.  If the next two characters are hex digits, returns true
442 	 * and advances the cursor.  If not, then returns false and leaves the
443 	 * cursor in place.
444 	 *
445 	 * This method does not scan between files.
446 	 */
447 	bool consume_hex_byte(uint8_t &outByte)
448 	{
449 		if (input_stack.empty())
450 		{
451 			return false;
452 		}
453 		return input_stack.top()->consume_hex_byte(outByte);
454 	}
455 	/**
456 	 * Returns the longest string in the input buffer starting at the
457 	 * current cursor and composed entirely of characters that are valid in
458 	 * node names.
459 	*/
460 	std::string parse_node_name();
461 	/**
462 	 * Returns the longest string in the input buffer starting at the
463 	 * current cursor and composed entirely of characters that are valid in
464 	 * property names.
465 	 */
466 	std::string parse_property_name();
467 	/**
468 	 * Parses either a node or a property name.  If is_property is true on
469 	 * entry, then only property names are parsed.  If it is false, then it
470 	 * will be set, on return, to indicate whether the parsed name is only
471 	 * valid as a property.
472 	 */
473 	std::string parse_node_or_property_name(bool &is_property);
474 	/**
475 	 * Parses up to a specified character and returns the intervening
476 	 * characters as a string.
477 	 */
478 	std::string parse_to(char);
479 	/**
480 	 * Advances the cursor to the start of the next token, skipping
481 	 * comments and whitespace.  If the cursor already points to the start
482 	 * of a token, then this function does nothing.
483 	 */
484 	text_input_buffer &next_token();
485 	/**
486 	 * Location in the source file.  This should never be interpreted by
487 	 * anything other than error reporting functions of this class.  It will
488 	 * eventually become something more complex than an `int`.
489 	 */
490 	class source_location
491 	{
492 		friend class text_input_buffer;
493 		/**
494 		 * The text buffer object that included `b`.
495 		 */
496 		text_input_buffer &buffer;
497 		/**
498 		 * The underlying buffer that contains this location.
499 		 */
500 		std::shared_ptr<input_buffer> b;
501 		/**
502 		 * The offset within the current buffer of the source location.
503 		 */
504 		int cursor;
505 		source_location(text_input_buffer &buf)
506 			: buffer(buf),
507 			  b(buf.input_stack.empty() ? nullptr : buf.input_stack.top()),
508 			  cursor(b ? b->cursor : 0) {}
509 		public:
510 		/**
511 		 * Report an error at this location.
512 		 */
513 		void report_error(const char *msg)
514 		{
515 			if (b)
516 			{
517 				buffer.parse_error(msg, *b, cursor);
518 			}
519 			else
520 			{
521 				buffer.parse_error(msg);
522 			}
523 		}
524 	};
525 	/**
526 	 * Returns the current source location.
527 	 */
528 	source_location location()
529 	{
530 		return { *this };
531 	}
532 	/**
533 	 * Prints a message indicating the location of a parse error.
534 	 */
535 	void parse_error(const char *msg);
536 	/**
537 	 * Reads the contents of a binary file into `b`.  The file name is assumed
538 	 * to be relative to one of the include paths.
539 	 *
540 	 * Returns true if the file exists and can be read, false otherwise.
541 	 */
542 	bool read_binary_file(const std::string &filename, byte_buffer &b);
543 	private:
544 	/**
545 	 * Prints a message indicating the location of a parse error, given a
546 	 * specified location.  This is used when input has already moved beyond
547 	 * the location that caused the failure.
548 	 */
549 	void parse_error(const char *msg, input_buffer &b, int loc);
550 };
551 
552 } // namespace dtc
553 
554 #endif // !_INPUT_BUFFER_HH_
555