xref: /freebsd/usr.bin/dtc/input_buffer.hh (revision 4f0a4502a1f33fef287ac558c98e5ef99a32216f)
1 /*-
2  * Copyright (c) 2013 David Chisnall
3  * All rights reserved.
4  *
5  * This software was developed by SRI International and the University of
6  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
7  * ("CTSRD"), as part of the DARPA CRASH research programme.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  * $FreeBSD$
31  */
32 
33 #ifndef _INPUT_BUFFER_HH_
34 #define _INPUT_BUFFER_HH_
35 #include "util.hh"
36 #include <assert.h>
37 
38 namespace dtc
39 {
40 
41 namespace {
42 struct expression;
43 typedef std::unique_ptr<expression> expression_ptr;
44 }
45 
46 /**
47  * Class encapsulating the input file.  Can be used as a const char*, but has
48  * range checking.  Attempting to access anything out of range will return a 0
49  * byte.  The input buffer can be cheaply copied, without copying the
50  * underlying memory, however it is the user's responsibility to ensure that
51  * such copies do not persist beyond the lifetime of the underlying memory.
52  *
53  * This also contains methods for reporting errors and for consuming the token
54  * stream.
55  */
56 class input_buffer
57 {
58 	protected:
59 	/**
60 	 * The buffer.  This class doesn't own the buffer, but the
61 	 * mmap_input_buffer subclass does.
62 	 */
63 	const char* buffer;
64 	/**
65 	 * The size of the buffer.
66 	 */
67 	int size;
68 	private:
69 	/**
70 	 * Parse an expression.  If `stopAtParen` is set, then only parse a number
71 	 * or a parenthetical expression, otherwise assume that either is the
72 	 * left-hand side of a binary expression and try to parse the right-hand
73 	 * side.
74 	 */
75 	expression_ptr parse_expression(bool stopAtParen=false);
76 	/**
77 	 * Parse a binary expression, having already parsed the right-hand side.
78 	 */
79 	expression_ptr parse_binary_expression(expression_ptr lhs);
80 	/**
81 	 * The current place in the buffer where we are reading.  This class
82 	 * keeps a separate size, pointer, and cursor so that we can move
83 	 * forwards and backwards and still have checks that we haven't fallen
84 	 * off either end.
85 	 */
86 	int cursor;
87 	/**
88 	 * Private constructor.  This is used to create input buffers that
89 	 * refer to the same memory, but have different cursors.
90 	 */
91 	input_buffer(const char* b, int s, int c) : buffer(b), size(s),
92 		cursor(c) {}
93 	/**
94 	 * Reads forward past any spaces.  The DTS format is not whitespace
95 	 * sensitive and so we want to scan past whitespace when reading it.
96 	 */
97 	void skip_spaces();
98 	public:
99 	/**
100 	 * Return whether all input has been consumed.
101 	 */
102 	bool finished() { return cursor >= size; }
103 	/**
104 	 * Virtual destructor.  Does nothing, but exists so that subclasses
105 	 * that own the memory can run cleanup code for deallocating it.
106 	 */
107 	virtual ~input_buffer() {};
108 	/**
109 	 * Constructs an empty buffer.
110 	 */
111 	input_buffer() : buffer(0), size(0), cursor(0) {}
112 	/**
113 	 * Constructs a new buffer with a specified memory region and size.
114 	 */
115 	input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){}
116 	/**
117 	 * Returns a new input buffer referring into this input, clamped to the
118 	 * specified size.  If the requested buffer would fall outside the
119 	 * range of this one, then it returns an empty buffer.
120 	 *
121 	 * The returned buffer shares the same underlying storage as the
122 	 * original.  This is intended to be used for splitting up the various
123 	 * sections of a device tree blob.  Requesting a size of 0 will give a
124 	 * buffer that extends to the end of the available memory.
125 	 */
126 	input_buffer buffer_from_offset(int offset, int s=0);
127 	/**
128 	 * Returns true if this buffer has no unconsumed space in it.
129 	 */
130 	inline bool empty()
131 	{
132 		return cursor >= size;
133 	}
134 	/**
135 	 * Dereferencing operator, allows the buffer to be treated as a char*
136 	 * and dereferenced to give a character.  This returns a null byte if
137 	 * the cursor is out of range.
138 	 */
139 	inline char operator*()
140 	{
141 		if (cursor >= size) { return '\0'; }
142 		if (cursor < 0) { return '\0'; }
143 		return buffer[cursor];
144 	}
145 	/**
146 	 * Array subscripting operator, returns a character at the specified
147 	 * index offset from the current cursor.  The offset may be negative,
148 	 * to reread characters that have already been read.  If the current
149 	 * cursor plus offset is outside of the range, this returns a nul
150 	 * byte.
151 	 */
152 	inline char operator[](int offset)
153 	{
154 		if (cursor + offset >= size) { return '\0'; }
155 		if (cursor + offset < 0) { return '\0'; }
156 		return buffer[cursor + offset];
157 	}
158 	/**
159 	 * Increments the cursor, iterating forward in the buffer.
160 	 */
161 	inline input_buffer &operator++()
162 	{
163 		cursor++;
164 		return *this;
165 	}
166 	/**
167 	 * Cast to char* operator.  Returns a pointer into the buffer that can
168 	 * be used for constructing strings.
169 	 */
170 	inline operator const char*()
171 	{
172 		if (cursor >= size) { return 0; }
173 		if (cursor < 0) { return 0; }
174 		return &buffer[cursor];
175 	}
176 	/**
177 	 * Consumes a character.  Moves the cursor one character forward if the
178 	 * next character matches the argument, returning true.  If the current
179 	 * character does not match the argument, returns false.
180 	 */
181 	inline bool consume(char c)
182 	{
183 		if ((*this)[0] == c)
184 		{
185 			++(*this);
186 			return true;
187 		}
188 		return false;
189 	}
190 	/**
191 	 * Consumes a string.  If the (null-terminated) string passed as the
192 	 * argument appears in the input, advances the cursor to the end and
193 	 * returns true.  Returns false if the string does not appear at the
194 	 * current point in the input.
195 	 */
196 	bool consume(const char *str);
197 	/**
198 	 * Reads an integer in base 8, 10, or 16.  Returns true and advances
199 	 * the cursor to the end of the integer if the cursor points to an
200 	 * integer, returns false and does not move the cursor otherwise.
201 	 *
202 	 * The parsed value is returned via the argument.
203 	 */
204 	bool consume_integer(unsigned long long &outInt);
205 	/**
206 	 * Reads an arithmetic expression (containing any of the normal C
207 	 * operators), evaluates it, and returns the result.
208 	 */
209 	bool consume_integer_expression(unsigned long long &outInt);
210 	/**
211 	 * Template function that consumes a binary value in big-endian format
212 	 * from the input stream.  Returns true and advances the cursor if
213 	 * there is a value of the correct size.  This function assumes that
214 	 * all values must be natively aligned, and so advances the cursor to
215 	 * the correct alignment before reading.
216 	 */
217 	template<typename T>
218 	bool consume_binary(T &out)
219 	{
220 		int align = 0;
221 		int type_size = sizeof(T);
222 		if (cursor % type_size != 0)
223 		{
224 			align = type_size - (cursor % type_size);
225 		}
226 		if (size < cursor + align + type_size)
227 		{
228 			return false;
229 		}
230 		cursor += align;
231 		assert(cursor % type_size == 0);
232 		out = 0;
233 		for (int i=0 ; i<type_size ; ++i)
234 		{
235 			out <<= 8;
236 			out |= (((T)buffer[cursor++]) & 0xff);
237 		}
238 		return true;
239 	}
240 	/**
241 	 * Consumes two hex digits and return the resulting byte via the first
242 	 * argument.  If the next two characters are hex digits, returns true
243 	 * and advances the cursor.  If not, then returns false and leaves the
244 	 * cursor in place.
245 	 */
246 	bool consume_hex_byte(uint8_t &outByte);
247 	/**
248 	 * Advances the cursor to the start of the next token, skipping
249 	 * comments and whitespace.  If the cursor already points to the start
250 	 * of a token, then this function does nothing.
251 	 */
252 	input_buffer &next_token();
253 	/**
254 	 * Prints a message indicating the location of a parse error.
255 	 */
256 	void parse_error(const char *msg);
257 #ifndef NDEBUG
258 	/**
259 	 * Dumps the current cursor value and the unconsumed values in the
260 	 * input buffer to the standard error.  This method is intended solely
261 	 * for debugging.
262 	 */
263 	void dump();
264 #endif
265 };
266 /**
267  * Explicit specialisation for reading a single byte.
268  */
269 template<>
270 inline bool input_buffer::consume_binary(uint8_t &out)
271 {
272 	if (size < cursor + 1)
273 	{
274 		return false;
275 	}
276 	out = buffer[cursor++];
277 	return true;
278 }
279 
280 /**
281  * Subclass of input_buffer that mmap()s a file and owns the resulting memory.
282  * When this object is destroyed, the memory is unmapped.
283  */
284 struct mmap_input_buffer : public input_buffer
285 {
286 	/**
287 	 * Constructs a new buffer from the file passed in as a file
288 	 * descriptor.
289 	 */
290 	mmap_input_buffer(int fd);
291 	/**
292 	 * Unmaps the buffer, if one exists.
293 	 */
294 	virtual ~mmap_input_buffer();
295 };
296 /**
297  * Input buffer read from standard input.  This is used for reading device tree
298  * blobs and source from standard input.  It reads the entire input into
299  * malloc'd memory, so will be very slow for large inputs.  DTS and DTB files
300  * are very rarely more than 10KB though, so this is probably not a problem.
301  */
302 struct stream_input_buffer : public input_buffer
303 {
304 	/**
305 	 * The buffer that will store the data read from the standard input.
306 	 */
307 	std::vector<char> b;
308 	/**
309 	 * Constructs a new buffer from the standard input.
310 	 */
311 	stream_input_buffer();
312 };
313 
314 } // namespace dtc
315 
316 #endif // !_INPUT_BUFFER_HH_
317