xref: /freebsd/usr.bin/dtc/input_buffer.hh (revision 97cb52fa9aefd90fad38790fded50905aeeb9b9e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2013 David Chisnall
5  * All rights reserved.
6  *
7  * This software was developed by SRI International and the University of
8  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
9  * ("CTSRD"), as part of the DARPA CRASH research programme.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $FreeBSD$
33  */
34 
35 #ifndef _INPUT_BUFFER_HH_
36 #define _INPUT_BUFFER_HH_
37 #include "util.hh"
38 #include <assert.h>
39 #include <stack>
40 #include <string>
41 #include <unordered_set>
42 
43 namespace dtc
44 {
45 
46 namespace {
47 struct expression;
48 typedef std::unique_ptr<expression> expression_ptr;
49 }
50 
51 /**
52  * Class encapsulating the input file.  Can be used as a const char*, but has
53  * range checking.  Attempting to access anything out of range will return a 0
54  * byte.  The input buffer can be cheaply copied, without copying the
55  * underlying memory, however it is the user's responsibility to ensure that
56  * such copies do not persist beyond the lifetime of the underlying memory.
57  *
58  * This also contains methods for reporting errors and for consuming the token
59  * stream.
60  */
61 class input_buffer
62 {
63 	friend class text_input_buffer;
64 	protected:
65 	/**
66 	 * The buffer.  This class doesn't own the buffer, but the
67 	 * mmap_input_buffer subclass does.
68 	 */
69 	const char* buffer;
70 	/**
71 	 * The size of the buffer.
72 	 */
73 	int size;
74 	private:
75 	/**
76 	 * The current place in the buffer where we are reading.  This class
77 	 * keeps a separate size, pointer, and cursor so that we can move
78 	 * forwards and backwards and still have checks that we haven't fallen
79 	 * off either end.
80 	 */
81 	int cursor;
82 	/**
83 	 * Private constructor.  This is used to create input buffers that
84 	 * refer to the same memory, but have different cursors.
85 	 */
86 	input_buffer(const char* b, int s, int c) : buffer(b), size(s),
87 		cursor(c) {}
88 	public:
89 	/**
90 	 * Returns the file name associated with this buffer.
91 	 */
92 	virtual const std::string &filename() const
93 	{
94 		static std::string s;
95 		return s;
96 	}
97 	static std::unique_ptr<input_buffer> buffer_for_file(const std::string &path,
98 	                                                     bool warn=true);
99 	/**
100 	 * Skips all characters in the input until the specified character is
101 	 * encountered.
102 	 */
103 	void skip_to(char);
104 	/**
105 	 * Parses up to a specified character and returns the intervening
106 	 * characters as a string.
107 	 */
108 	std::string parse_to(char);
109 	/**
110 	 * Return whether all input has been consumed.
111 	 */
112 	bool finished() { return cursor >= size; }
113 	/**
114 	 * Virtual destructor.  Does nothing, but exists so that subclasses
115 	 * that own the memory can run cleanup code for deallocating it.
116 	 */
117 	virtual ~input_buffer() {};
118 	/**
119 	 * Constructs an empty buffer.
120 	 */
121 	input_buffer() : buffer(0), size(0), cursor(0) {}
122 	/**
123 	 * Constructs a new buffer with a specified memory region and size.
124 	 */
125 	input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){}
126 	/**
127 	 * Returns a new input buffer referring into this input, clamped to the
128 	 * specified size.  If the requested buffer would fall outside the
129 	 * range of this one, then it returns an empty buffer.
130 	 *
131 	 * The returned buffer shares the same underlying storage as the
132 	 * original.  This is intended to be used for splitting up the various
133 	 * sections of a device tree blob.  Requesting a size of 0 will give a
134 	 * buffer that extends to the end of the available memory.
135 	 */
136 	input_buffer buffer_from_offset(int offset, int s=0);
137 	/**
138 	 * Dereferencing operator, allows the buffer to be treated as a char*
139 	 * and dereferenced to give a character.  This returns a null byte if
140 	 * the cursor is out of range.
141 	 */
142 	inline char operator*()
143 	{
144 		if (cursor >= size) { return '\0'; }
145 		if (cursor < 0) { return '\0'; }
146 		return buffer[cursor];
147 	}
148 	/**
149 	 * Array subscripting operator, returns a character at the specified
150 	 * index offset from the current cursor.  The offset may be negative,
151 	 * to reread characters that have already been read.  If the current
152 	 * cursor plus offset is outside of the range, this returns a nul
153 	 * byte.
154 	 */
155 	inline char operator[](int offset)
156 	{
157 		if (cursor + offset >= size) { return '\0'; }
158 		if (cursor + offset < 0) { return '\0'; }
159 		return buffer[cursor + offset];
160 	}
161 	/**
162 	 * Increments the cursor, iterating forward in the buffer.
163 	 */
164 	inline input_buffer &operator++()
165 	{
166 		cursor++;
167 		return *this;
168 	}
169 	/**
170 	 * Consumes a character.  Moves the cursor one character forward if the
171 	 * next character matches the argument, returning true.  If the current
172 	 * character does not match the argument, returns false.
173 	 */
174 	inline bool consume(char c)
175 	{
176 		if (*(*this) == c)
177 		{
178 			++(*this);
179 			return true;
180 		}
181 		return false;
182 	}
183 	/**
184 	 * Consumes a string.  If the (null-terminated) string passed as the
185 	 * argument appears in the input, advances the cursor to the end and
186 	 * returns true.  Returns false if the string does not appear at the
187 	 * current point in the input.
188 	 */
189 	bool consume(const char *str);
190 	/**
191 	 * Reads an integer in base 8, 10, or 16.  Returns true and advances
192 	 * the cursor to the end of the integer if the cursor points to an
193 	 * integer, returns false and does not move the cursor otherwise.
194 	 *
195 	 * The parsed value is returned via the argument.
196 	 */
197 	bool consume_integer(unsigned long long &outInt);
198 	/**
199 	 * Reads an arithmetic expression (containing any of the normal C
200 	 * operators), evaluates it, and returns the result.
201 	 */
202 	bool consume_integer_expression(unsigned long long &outInt);
203 	/**
204 	 * Consumes two hex digits and return the resulting byte via the first
205 	 * argument.  If the next two characters are hex digits, returns true
206 	 * and advances the cursor.  If not, then returns false and leaves the
207 	 * cursor in place.
208 	 */
209 	bool consume_hex_byte(uint8_t &outByte);
210 	/**
211 	 * Template function that consumes a binary value in big-endian format
212 	 * from the input stream.  Returns true and advances the cursor if
213 	 * there is a value of the correct size.  This function assumes that
214 	 * all values must be natively aligned, and so advances the cursor to
215 	 * the correct alignment before reading.
216 	 */
217 	template<typename T>
218 	bool consume_binary(T &out)
219 	{
220 		int align = 0;
221 		int type_size = sizeof(T);
222 		if (cursor % type_size != 0)
223 		{
224 			align = type_size - (cursor % type_size);
225 		}
226 		if (size < cursor + align + type_size)
227 		{
228 			return false;
229 		}
230 		cursor += align;
231 		assert(cursor % type_size == 0);
232 		out = 0;
233 		for (int i=0 ; i<type_size ; ++i)
234 		{
235 			if (size < cursor)
236 			{
237 				return false;
238 			}
239 			out <<= 8;
240 			out |= (((T)buffer[cursor++]) & 0xff);
241 		}
242 		return true;
243 	}
244 #ifndef NDEBUG
245 	/**
246 	 * Dumps the current cursor value and the unconsumed values in the
247 	 * input buffer to the standard error.  This method is intended solely
248 	 * for debugging.
249 	 */
250 	void dump();
251 #endif
252 };
253 /**
254  * Explicit specialisation for reading a single byte.
255  */
256 template<>
257 inline bool input_buffer::consume_binary(uint8_t &out)
258 {
259 	if (size < cursor + 1)
260 	{
261 		return false;
262 	}
263 	out = buffer[cursor++];
264 	return true;
265 }
266 
267 /**
268  * An input buffer subclass used for parsing DTS files.  This manages a stack
269  * of input buffers to handle /input/ operations.
270  */
271 class text_input_buffer
272 {
273 	std::unordered_set<std::string> defines;
274 	/**
275 	 * The cursor is the input into the input stream where we are currently reading.
276 	 */
277 	int cursor = 0;
278 	/**
279 	 * The current stack of includes.  The current input is always from the top
280 	 * of the stack.
281 	 */
282 	std::stack<std::shared_ptr<input_buffer>> input_stack;
283 	/**
284 	 *
285 	 */
286 	const std::vector<std::string> include_paths;
287 	/**
288 	 * Reads forward past any spaces.  The DTS format is not whitespace
289 	 * sensitive and so we want to scan past whitespace when reading it.
290 	 */
291 	void skip_spaces();
292 	/**
293 	 * Returns the character immediately after the current one.
294 	 *
295 	 * This method does not look between files.
296 	 */
297 	char peek();
298 	/**
299 	 * If a /include/ token is encountered, then look up the corresponding
300 	 * input file, push it onto the input stack, and continue.
301 	 */
302 	void handle_include();
303 	/**
304 	 * The base directory for this file.
305 	 */
306 	const std::string dir;
307 	/**
308 	 * The file where dependencies should be output.
309 	 */
310 	FILE *depfile;
311 	public:
312 	/**
313 	 * Construct a new text input buffer with the specified buffer as the start
314 	 * of parsing and the specified set of input paths for handling new
315 	 * inclusions.
316 	 */
317 	text_input_buffer(std::unique_ptr<input_buffer> &&b,
318 	                  std::unordered_set<std::string> &&d,
319 	                  std::vector<std::string> &&i,
320 	                  const std::string directory,
321 	                  FILE *deps)
322 		: defines(d), include_paths(i), dir(directory), depfile(deps)
323 	{
324 		input_stack.push(std::move(b));
325 	}
326 	/**
327 	 * Skips all characters in the input until the specified character is
328 	 * encountered.
329 	 */
330 	void skip_to(char);
331 	/**
332 	 * Parse an expression.  If `stopAtParen` is set, then only parse a number
333 	 * or a parenthetical expression, otherwise assume that either is the
334 	 * left-hand side of a binary expression and try to parse the right-hand
335 	 * side.
336 	 */
337 	expression_ptr parse_expression(bool stopAtParen=false);
338 	/**
339 	 * Parse a binary expression, having already parsed the right-hand side.
340 	 */
341 	expression_ptr parse_binary_expression(expression_ptr lhs);
342 	/**
343 	 * Return whether all input has been consumed.
344 	 */
345 	bool finished()
346 	{
347 		return input_stack.empty() ||
348 			((input_stack.size() == 1) && input_stack.top()->finished());
349 	}
350 	/**
351 	 * Dereferencing operator.  Returns the current character in the top input buffer.
352 	 */
353 	inline char operator*()
354 	{
355 		if (input_stack.empty())
356 		{
357 			return 0;
358 		}
359 		return *(*input_stack.top());
360 	}
361 	/**
362 	 * Increments the cursor, iterating forward in the buffer.
363 	 */
364 	inline text_input_buffer &operator++()
365 	{
366 		if (input_stack.empty())
367 		{
368 			return *this;
369 		}
370 		cursor++;
371 		auto &top = *input_stack.top();
372 		++top;
373 		if (top.finished())
374 		{
375 			input_stack.pop();
376 		}
377 		return *this;
378 	}
379 	/**
380 	 * Consumes a character.  Moves the cursor one character forward if the
381 	 * next character matches the argument, returning true.  If the current
382 	 * character does not match the argument, returns false.
383 	 */
384 	inline bool consume(char c)
385 	{
386 		if (*(*this) == c)
387 		{
388 			++(*this);
389 			return true;
390 		}
391 		return false;
392 	}
393 	/**
394 	 * Consumes a string.  If the (null-terminated) string passed as the
395 	 * argument appears in the input, advances the cursor to the end and
396 	 * returns true.  Returns false if the string does not appear at the
397 	 * current point in the input.
398 	 *
399 	 * This method does not scan between files.
400 	 */
401 	bool consume(const char *str)
402 	{
403 		if (input_stack.empty())
404 		{
405 			return false;
406 		}
407 		return input_stack.top()->consume(str);
408 	}
409 	/**
410 	 * Reads an integer in base 8, 10, or 16.  Returns true and advances
411 	 * the cursor to the end of the integer if the cursor points to an
412 	 * integer, returns false and does not move the cursor otherwise.
413 	 *
414 	 * The parsed value is returned via the argument.
415 	 *
416 	 * This method does not scan between files.
417 	 */
418 	bool consume_integer(unsigned long long &outInt)
419 	{
420 		if (input_stack.empty())
421 		{
422 			return false;
423 		}
424 		return input_stack.top()->consume_integer(outInt);
425 	}
426 	/**
427 	 * Reads an arithmetic expression (containing any of the normal C
428 	 * operators), evaluates it, and returns the result.
429 	 */
430 	bool consume_integer_expression(unsigned long long &outInt);
431 	/**
432 	 * Consumes two hex digits and return the resulting byte via the first
433 	 * argument.  If the next two characters are hex digits, returns true
434 	 * and advances the cursor.  If not, then returns false and leaves the
435 	 * cursor in place.
436 	 *
437 	 * This method does not scan between files.
438 	 */
439 	bool consume_hex_byte(uint8_t &outByte)
440 	{
441 		if (input_stack.empty())
442 		{
443 			return false;
444 		}
445 		return input_stack.top()->consume_hex_byte(outByte);
446 	}
447 	/**
448 	 * Returns the longest string in the input buffer starting at the
449 	 * current cursor and composed entirely of characters that are valid in
450 	 * node names.
451 	*/
452 	std::string parse_node_name();
453 	/**
454 	 * Returns the longest string in the input buffer starting at the
455 	 * current cursor and composed entirely of characters that are valid in
456 	 * property names.
457 	 */
458 	std::string parse_property_name();
459 	/**
460 	 * Parses either a node or a property name.  If is_property is true on
461 	 * entry, then only property names are parsed.  If it is false, then it
462 	 * will be set, on return, to indicate whether the parsed name is only
463 	 * valid as a property.
464 	 */
465 	std::string parse_node_or_property_name(bool &is_property);
466 	/**
467 	 * Parses up to a specified character and returns the intervening
468 	 * characters as a string.
469 	 */
470 	std::string parse_to(char);
471 	/**
472 	 * Advances the cursor to the start of the next token, skipping
473 	 * comments and whitespace.  If the cursor already points to the start
474 	 * of a token, then this function does nothing.
475 	 */
476 	text_input_buffer &next_token();
477 	/**
478 	 * Location in the source file.  This should never be interpreted by
479 	 * anything other than error reporting functions of this class.  It will
480 	 * eventually become something more complex than an `int`.
481 	 */
482 	class source_location
483 	{
484 		friend class text_input_buffer;
485 		/**
486 		 * The text buffer object that included `b`.
487 		 */
488 		text_input_buffer &buffer;
489 		/**
490 		 * The underlying buffer that contains this location.
491 		 */
492 		std::shared_ptr<input_buffer> b;
493 		/**
494 		 * The offset within the current buffer of the source location.
495 		 */
496 		int cursor;
497 		source_location(text_input_buffer &buf)
498 			: buffer(buf),
499 			  b(buf.input_stack.empty() ? nullptr : buf.input_stack.top()),
500 			  cursor(b ? b->cursor : 0) {}
501 		public:
502 		/**
503 		 * Report an error at this location.
504 		 */
505 		void report_error(const char *msg)
506 		{
507 			if (b)
508 			{
509 				buffer.parse_error(msg, *b, cursor);
510 			}
511 			else
512 			{
513 				buffer.parse_error(msg);
514 			}
515 		}
516 	};
517 	/**
518 	 * Returns the current source location.
519 	 */
520 	source_location location()
521 	{
522 		return { *this };
523 	}
524 	/**
525 	 * Prints a message indicating the location of a parse error.
526 	 */
527 	void parse_error(const char *msg);
528 	private:
529 	/**
530 	 * Prints a message indicating the location of a parse error, given a
531 	 * specified location.  This is used when input has already moved beyond
532 	 * the location that caused the failure.
533 	 */
534 	void parse_error(const char *msg, input_buffer &b, int loc);
535 };
536 
537 } // namespace dtc
538 
539 #endif // !_INPUT_BUFFER_HH_
540