xref: /freebsd/usr.bin/dtc/input_buffer.hh (revision 792bbaba989533a1fc93823df1720c8c4aaf0442)
1 /*-
2  * Copyright (c) 2013 David Chisnall
3  * All rights reserved.
4  *
5  * This software was developed by SRI International and the University of
6  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
7  * ("CTSRD"), as part of the DARPA CRASH research programme.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  * $FreeBSD$
31  */
32 
33 #ifndef _INPUT_BUFFER_HH_
34 #define _INPUT_BUFFER_HH_
35 #include "util.hh"
36 #include <assert.h>
37 #include <stack>
38 #include <string>
39 #include <unordered_set>
40 
41 namespace dtc
42 {
43 
44 namespace {
45 struct expression;
46 typedef std::unique_ptr<expression> expression_ptr;
47 }
48 
49 /**
50  * Class encapsulating the input file.  Can be used as a const char*, but has
51  * range checking.  Attempting to access anything out of range will return a 0
52  * byte.  The input buffer can be cheaply copied, without copying the
53  * underlying memory, however it is the user's responsibility to ensure that
54  * such copies do not persist beyond the lifetime of the underlying memory.
55  *
56  * This also contains methods for reporting errors and for consuming the token
57  * stream.
58  */
59 class input_buffer
60 {
61 	friend class text_input_buffer;
62 	protected:
63 	/**
64 	 * The buffer.  This class doesn't own the buffer, but the
65 	 * mmap_input_buffer subclass does.
66 	 */
67 	const char* buffer;
68 	/**
69 	 * The size of the buffer.
70 	 */
71 	int size;
72 	private:
73 	/**
74 	 * The current place in the buffer where we are reading.  This class
75 	 * keeps a separate size, pointer, and cursor so that we can move
76 	 * forwards and backwards and still have checks that we haven't fallen
77 	 * off either end.
78 	 */
79 	int cursor;
80 	/**
81 	 * Private constructor.  This is used to create input buffers that
82 	 * refer to the same memory, but have different cursors.
83 	 */
84 	input_buffer(const char* b, int s, int c) : buffer(b), size(s),
85 		cursor(c) {}
86 	public:
87 	/**
88 	 * Returns the file name associated with this buffer.
89 	 */
90 	virtual const std::string &filename() const
91 	{
92 		static std::string s;
93 		return s;
94 	}
95 	static std::unique_ptr<input_buffer> buffer_for_file(const std::string &path,
96 	                                                     bool warn=true);
97 	/**
98 	 * Skips all characters in the input until the specified character is
99 	 * encountered.
100 	 */
101 	void skip_to(char);
102 	/**
103 	 * Parses up to a specified character and returns the intervening
104 	 * characters as a string.
105 	 */
106 	std::string parse_to(char);
107 	/**
108 	 * Return whether all input has been consumed.
109 	 */
110 	bool finished() { return cursor >= size; }
111 	/**
112 	 * Virtual destructor.  Does nothing, but exists so that subclasses
113 	 * that own the memory can run cleanup code for deallocating it.
114 	 */
115 	virtual ~input_buffer() {};
116 	/**
117 	 * Constructs an empty buffer.
118 	 */
119 	input_buffer() : buffer(0), size(0), cursor(0) {}
120 	/**
121 	 * Constructs a new buffer with a specified memory region and size.
122 	 */
123 	input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){}
124 	/**
125 	 * Returns a new input buffer referring into this input, clamped to the
126 	 * specified size.  If the requested buffer would fall outside the
127 	 * range of this one, then it returns an empty buffer.
128 	 *
129 	 * The returned buffer shares the same underlying storage as the
130 	 * original.  This is intended to be used for splitting up the various
131 	 * sections of a device tree blob.  Requesting a size of 0 will give a
132 	 * buffer that extends to the end of the available memory.
133 	 */
134 	input_buffer buffer_from_offset(int offset, int s=0);
135 	/**
136 	 * Dereferencing operator, allows the buffer to be treated as a char*
137 	 * and dereferenced to give a character.  This returns a null byte if
138 	 * the cursor is out of range.
139 	 */
140 	inline char operator*()
141 	{
142 		if (cursor >= size) { return '\0'; }
143 		if (cursor < 0) { return '\0'; }
144 		return buffer[cursor];
145 	}
146 	/**
147 	 * Array subscripting operator, returns a character at the specified
148 	 * index offset from the current cursor.  The offset may be negative,
149 	 * to reread characters that have already been read.  If the current
150 	 * cursor plus offset is outside of the range, this returns a nul
151 	 * byte.
152 	 */
153 	inline char operator[](int offset)
154 	{
155 		if (cursor + offset >= size) { return '\0'; }
156 		if (cursor + offset < 0) { return '\0'; }
157 		return buffer[cursor + offset];
158 	}
159 	/**
160 	 * Increments the cursor, iterating forward in the buffer.
161 	 */
162 	inline input_buffer &operator++()
163 	{
164 		cursor++;
165 		return *this;
166 	}
167 	/**
168 	 * Consumes a character.  Moves the cursor one character forward if the
169 	 * next character matches the argument, returning true.  If the current
170 	 * character does not match the argument, returns false.
171 	 */
172 	inline bool consume(char c)
173 	{
174 		if (*(*this) == c)
175 		{
176 			++(*this);
177 			return true;
178 		}
179 		return false;
180 	}
181 	/**
182 	 * Consumes a string.  If the (null-terminated) string passed as the
183 	 * argument appears in the input, advances the cursor to the end and
184 	 * returns true.  Returns false if the string does not appear at the
185 	 * current point in the input.
186 	 */
187 	bool consume(const char *str);
188 	/**
189 	 * Reads an integer in base 8, 10, or 16.  Returns true and advances
190 	 * the cursor to the end of the integer if the cursor points to an
191 	 * integer, returns false and does not move the cursor otherwise.
192 	 *
193 	 * The parsed value is returned via the argument.
194 	 */
195 	bool consume_integer(unsigned long long &outInt);
196 	/**
197 	 * Reads an arithmetic expression (containing any of the normal C
198 	 * operators), evaluates it, and returns the result.
199 	 */
200 	bool consume_integer_expression(unsigned long long &outInt);
201 	/**
202 	 * Consumes two hex digits and return the resulting byte via the first
203 	 * argument.  If the next two characters are hex digits, returns true
204 	 * and advances the cursor.  If not, then returns false and leaves the
205 	 * cursor in place.
206 	 */
207 	bool consume_hex_byte(uint8_t &outByte);
208 	/**
209 	 * Template function that consumes a binary value in big-endian format
210 	 * from the input stream.  Returns true and advances the cursor if
211 	 * there is a value of the correct size.  This function assumes that
212 	 * all values must be natively aligned, and so advances the cursor to
213 	 * the correct alignment before reading.
214 	 */
215 	template<typename T>
216 	bool consume_binary(T &out)
217 	{
218 		int align = 0;
219 		int type_size = sizeof(T);
220 		if (cursor % type_size != 0)
221 		{
222 			align = type_size - (cursor % type_size);
223 		}
224 		if (size < cursor + align + type_size)
225 		{
226 			return false;
227 		}
228 		cursor += align;
229 		assert(cursor % type_size == 0);
230 		out = 0;
231 		for (int i=0 ; i<type_size ; ++i)
232 		{
233 			if (size < cursor)
234 			{
235 				return false;
236 			}
237 			out <<= 8;
238 			out |= (((T)buffer[cursor++]) & 0xff);
239 		}
240 		return true;
241 	}
242 #ifndef NDEBUG
243 	/**
244 	 * Dumps the current cursor value and the unconsumed values in the
245 	 * input buffer to the standard error.  This method is intended solely
246 	 * for debugging.
247 	 */
248 	void dump();
249 #endif
250 };
251 /**
252  * Explicit specialisation for reading a single byte.
253  */
254 template<>
255 inline bool input_buffer::consume_binary(uint8_t &out)
256 {
257 	if (size < cursor + 1)
258 	{
259 		return false;
260 	}
261 	out = buffer[cursor++];
262 	return true;
263 }
264 
265 /**
266  * An input buffer subclass used for parsing DTS files.  This manages a stack
267  * of input buffers to handle /input/ operations.
268  */
269 class text_input_buffer
270 {
271 	std::unordered_set<std::string> defines;
272 	/**
273 	 * The cursor is the input into the input stream where we are currently reading.
274 	 */
275 	int cursor = 0;
276 	/**
277 	 * The current stack of includes.  The current input is always from the top
278 	 * of the stack.
279 	 */
280 	std::stack<std::shared_ptr<input_buffer>> input_stack;
281 	/**
282 	 *
283 	 */
284 	const std::vector<std::string> include_paths;
285 	/**
286 	 * Reads forward past any spaces.  The DTS format is not whitespace
287 	 * sensitive and so we want to scan past whitespace when reading it.
288 	 */
289 	void skip_spaces();
290 	/**
291 	 * Returns the character immediately after the current one.
292 	 *
293 	 * This method does not look between files.
294 	 */
295 	char peek();
296 	/**
297 	 * If a /include/ token is encountered, then look up the corresponding
298 	 * input file, push it onto the input stack, and continue.
299 	 */
300 	void handle_include();
301 	/**
302 	 * The base directory for this file.
303 	 */
304 	const std::string dir;
305 	/**
306 	 * The file where dependencies should be output.
307 	 */
308 	FILE *depfile;
309 	public:
310 	/**
311 	 * Construct a new text input buffer with the specified buffer as the start
312 	 * of parsing and the specified set of input paths for handling new
313 	 * inclusions.
314 	 */
315 	text_input_buffer(std::unique_ptr<input_buffer> &&b,
316 	                  std::unordered_set<std::string> &&d,
317 	                  std::vector<std::string> &&i,
318 	                  const std::string directory,
319 	                  FILE *deps)
320 		: defines(d), include_paths(i), dir(directory), depfile(deps)
321 	{
322 		input_stack.push(std::move(b));
323 	}
324 	/**
325 	 * Skips all characters in the input until the specified character is
326 	 * encountered.
327 	 */
328 	void skip_to(char);
329 	/**
330 	 * Parse an expression.  If `stopAtParen` is set, then only parse a number
331 	 * or a parenthetical expression, otherwise assume that either is the
332 	 * left-hand side of a binary expression and try to parse the right-hand
333 	 * side.
334 	 */
335 	expression_ptr parse_expression(bool stopAtParen=false);
336 	/**
337 	 * Parse a binary expression, having already parsed the right-hand side.
338 	 */
339 	expression_ptr parse_binary_expression(expression_ptr lhs);
340 	/**
341 	 * Return whether all input has been consumed.
342 	 */
343 	bool finished()
344 	{
345 		return input_stack.empty() ||
346 			((input_stack.size() == 1) && input_stack.top()->finished());
347 	}
348 	/**
349 	 * Dereferencing operator.  Returns the current character in the top input buffer.
350 	 */
351 	inline char operator*()
352 	{
353 		if (input_stack.empty())
354 		{
355 			return 0;
356 		}
357 		return *(*input_stack.top());
358 	}
359 	/**
360 	 * Increments the cursor, iterating forward in the buffer.
361 	 */
362 	inline text_input_buffer &operator++()
363 	{
364 		if (input_stack.empty())
365 		{
366 			return *this;
367 		}
368 		cursor++;
369 		auto &top = *input_stack.top();
370 		++top;
371 		if (top.finished())
372 		{
373 			input_stack.pop();
374 		}
375 		return *this;
376 	}
377 	/**
378 	 * Consumes a character.  Moves the cursor one character forward if the
379 	 * next character matches the argument, returning true.  If the current
380 	 * character does not match the argument, returns false.
381 	 */
382 	inline bool consume(char c)
383 	{
384 		if (*(*this) == c)
385 		{
386 			++(*this);
387 			return true;
388 		}
389 		return false;
390 	}
391 	/**
392 	 * Consumes a string.  If the (null-terminated) string passed as the
393 	 * argument appears in the input, advances the cursor to the end and
394 	 * returns true.  Returns false if the string does not appear at the
395 	 * current point in the input.
396 	 *
397 	 * This method does not scan between files.
398 	 */
399 	bool consume(const char *str)
400 	{
401 		if (input_stack.empty())
402 		{
403 			return false;
404 		}
405 		return input_stack.top()->consume(str);
406 	}
407 	/**
408 	 * Reads an integer in base 8, 10, or 16.  Returns true and advances
409 	 * the cursor to the end of the integer if the cursor points to an
410 	 * integer, returns false and does not move the cursor otherwise.
411 	 *
412 	 * The parsed value is returned via the argument.
413 	 *
414 	 * This method does not scan between files.
415 	 */
416 	bool consume_integer(unsigned long long &outInt)
417 	{
418 		if (input_stack.empty())
419 		{
420 			return false;
421 		}
422 		return input_stack.top()->consume_integer(outInt);
423 	}
424 	/**
425 	 * Reads an arithmetic expression (containing any of the normal C
426 	 * operators), evaluates it, and returns the result.
427 	 */
428 	bool consume_integer_expression(unsigned long long &outInt);
429 	/**
430 	 * Consumes two hex digits and return the resulting byte via the first
431 	 * argument.  If the next two characters are hex digits, returns true
432 	 * and advances the cursor.  If not, then returns false and leaves the
433 	 * cursor in place.
434 	 *
435 	 * This method does not scan between files.
436 	 */
437 	bool consume_hex_byte(uint8_t &outByte)
438 	{
439 		if (input_stack.empty())
440 		{
441 			return false;
442 		}
443 		return input_stack.top()->consume_hex_byte(outByte);
444 	}
445 	/**
446 	 * Returns the longest string in the input buffer starting at the
447 	 * current cursor and composed entirely of characters that are valid in
448 	 * node names.
449 	*/
450 	std::string parse_node_name();
451 	/**
452 	 * Returns the longest string in the input buffer starting at the
453 	 * current cursor and composed entirely of characters that are valid in
454 	 * property names.
455 	 */
456 	std::string parse_property_name();
457 	/**
458 	 * Parses either a node or a property name.  If is_property is true on
459 	 * entry, then only property names are parsed.  If it is false, then it
460 	 * will be set, on return, to indicate whether the parsed name is only
461 	 * valid as a property.
462 	 */
463 	std::string parse_node_or_property_name(bool &is_property);
464 	/**
465 	 * Parses up to a specified character and returns the intervening
466 	 * characters as a string.
467 	 */
468 	std::string parse_to(char);
469 	/**
470 	 * Advances the cursor to the start of the next token, skipping
471 	 * comments and whitespace.  If the cursor already points to the start
472 	 * of a token, then this function does nothing.
473 	 */
474 	text_input_buffer &next_token();
475 	/**
476 	 * Location in the source file.  This should never be interpreted by
477 	 * anything other than error reporting functions of this class.  It will
478 	 * eventually become something more complex than an `int`.
479 	 */
480 	class source_location
481 	{
482 		friend class text_input_buffer;
483 		/**
484 		 * The text buffer object that included `b`.
485 		 */
486 		text_input_buffer &buffer;
487 		/**
488 		 * The underlying buffer that contains this location.
489 		 */
490 		std::shared_ptr<input_buffer> b;
491 		/**
492 		 * The offset within the current buffer of the source location.
493 		 */
494 		int cursor;
495 		source_location(text_input_buffer &buf)
496 			: buffer(buf),
497 			  b(buf.input_stack.empty() ? nullptr : buf.input_stack.top()),
498 			  cursor(b ? b->cursor : 0) {}
499 		public:
500 		/**
501 		 * Report an error at this location.
502 		 */
503 		void report_error(const char *msg)
504 		{
505 			if (b)
506 			{
507 				buffer.parse_error(msg, *b, cursor);
508 			}
509 			else
510 			{
511 				buffer.parse_error(msg);
512 			}
513 		}
514 	};
515 	/**
516 	 * Returns the current source location.
517 	 */
518 	source_location location()
519 	{
520 		return { *this };
521 	}
522 	/**
523 	 * Prints a message indicating the location of a parse error.
524 	 */
525 	void parse_error(const char *msg);
526 	private:
527 	/**
528 	 * Prints a message indicating the location of a parse error, given a
529 	 * specified location.  This is used when input has already moved beyond
530 	 * the location that caused the failure.
531 	 */
532 	void parse_error(const char *msg, input_buffer &b, int loc);
533 };
534 
535 } // namespace dtc
536 
537 #endif // !_INPUT_BUFFER_HH_
538