1 /*- 2 * Copyright (c) 2013 David Chisnall 3 * All rights reserved. 4 * 5 * This software was developed by SRI International and the University of 6 * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237) 7 * ("CTSRD"), as part of the DARPA CRASH research programme. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $FreeBSD$ 31 */ 32 33 #ifndef _INPUT_BUFFER_HH_ 34 #define _INPUT_BUFFER_HH_ 35 #include "util.hh" 36 #include <assert.h> 37 38 namespace dtc 39 { 40 41 namespace { 42 struct expression; 43 typedef std::unique_ptr<expression> expression_ptr; 44 } 45 46 /** 47 * Class encapsulating the input file. Can be used as a const char*, but has 48 * range checking. Attempting to access anything out of range will return a 0 49 * byte. The input buffer can be cheaply copied, without copying the 50 * underlying memory, however it is the user's responsibility to ensure that 51 * such copies do not persist beyond the lifetime of the underlying memory. 52 * 53 * This also contains methods for reporting errors and for consuming the token 54 * stream. 55 */ 56 class input_buffer 57 { 58 protected: 59 /** 60 * The buffer. This class doesn't own the buffer, but the 61 * mmap_input_buffer subclass does. 62 */ 63 const char* buffer; 64 /** 65 * The size of the buffer. 66 */ 67 int size; 68 private: 69 /** 70 * Parse an expression. If `stopAtParen` is set, then only parse a number 71 * or a parenthetical expression, otherwise assume that either is the 72 * left-hand side of a binary expression and try to parse the right-hand 73 * side. 74 */ 75 expression_ptr parse_expression(bool stopAtParen=false); 76 /** 77 * Parse a binary expression, having already parsed the right-hand side. 78 */ 79 expression_ptr parse_binary_expression(expression_ptr lhs); 80 /** 81 * The current place in the buffer where we are reading. This class 82 * keeps a separate size, pointer, and cursor so that we can move 83 * forwards and backwards and still have checks that we haven't fallen 84 * off either end. 85 */ 86 int cursor; 87 /** 88 * Private constructor. This is used to create input buffers that 89 * refer to the same memory, but have different cursors. 90 */ 91 input_buffer(const char* b, int s, int c) : buffer(b), size(s), 92 cursor(c) {} 93 /** 94 * Reads forward past any spaces. The DTS format is not whitespace 95 * sensitive and so we want to scan past whitespace when reading it. 96 */ 97 void skip_spaces(); 98 public: 99 /** 100 * Return whether all input has been consumed. 101 */ 102 bool finished() { return cursor >= size; } 103 /** 104 * Virtual destructor. Does nothing, but exists so that subclasses 105 * that own the memory can run cleanup code for deallocating it. 106 */ 107 virtual ~input_buffer() {}; 108 /** 109 * Constructs an empty buffer. 110 */ 111 input_buffer() : buffer(0), size(0), cursor(0) {} 112 /** 113 * Constructs a new buffer with a specified memory region and size. 114 */ 115 input_buffer(const char* b, int s) : buffer(b), size(s), cursor(0){} 116 /** 117 * Returns a new input buffer referring into this input, clamped to the 118 * specified size. If the requested buffer would fall outside the 119 * range of this one, then it returns an empty buffer. 120 * 121 * The returned buffer shares the same underlying storage as the 122 * original. This is intended to be used for splitting up the various 123 * sections of a device tree blob. Requesting a size of 0 will give a 124 * buffer that extends to the end of the available memory. 125 */ 126 input_buffer buffer_from_offset(int offset, int s=0); 127 /** 128 * Returns true if this buffer has no unconsumed space in it. 129 */ 130 inline bool empty() 131 { 132 return cursor >= size; 133 } 134 /** 135 * Dereferencing operator, allows the buffer to be treated as a char* 136 * and dereferenced to give a character. This returns a null byte if 137 * the cursor is out of range. 138 */ 139 inline char operator*() 140 { 141 if (cursor >= size) { return '\0'; } 142 if (cursor < 0) { return '\0'; } 143 return buffer[cursor]; 144 } 145 /** 146 * Array subscripting operator, returns a character at the specified 147 * index offset from the current cursor. The offset may be negative, 148 * to reread characters that have already been read. If the current 149 * cursor plus offset is outside of the range, this returns a nul 150 * byte. 151 */ 152 inline char operator[](int offset) 153 { 154 if (cursor + offset >= size) { return '\0'; } 155 if (cursor + offset < 0) { return '\0'; } 156 return buffer[cursor + offset]; 157 } 158 /** 159 * Increments the cursor, iterating forward in the buffer. 160 */ 161 inline input_buffer &operator++() 162 { 163 cursor++; 164 return *this; 165 } 166 /** 167 * Cast to char* operator. Returns a pointer into the buffer that can 168 * be used for constructing strings. 169 */ 170 inline operator const char*() 171 { 172 if (cursor >= size) { return 0; } 173 if (cursor < 0) { return 0; } 174 return &buffer[cursor]; 175 } 176 /** 177 * Consumes a character. Moves the cursor one character forward if the 178 * next character matches the argument, returning true. If the current 179 * character does not match the argument, returns false. 180 */ 181 inline bool consume(char c) 182 { 183 if ((*this)[0] == c) 184 { 185 ++(*this); 186 return true; 187 } 188 return false; 189 } 190 /** 191 * Consumes a string. If the (null-terminated) string passed as the 192 * argument appears in the input, advances the cursor to the end and 193 * returns true. Returns false if the string does not appear at the 194 * current point in the input. 195 */ 196 bool consume(const char *str); 197 /** 198 * Reads an integer in base 8, 10, or 16. Returns true and advances 199 * the cursor to the end of the integer if the cursor points to an 200 * integer, returns false and does not move the cursor otherwise. 201 * 202 * The parsed value is returned via the argument. 203 */ 204 bool consume_integer(unsigned long long &outInt); 205 /** 206 * Reads an arithmetic expression (containing any of the normal C 207 * operators), evaluates it, and returns the result. 208 */ 209 bool consume_integer_expression(unsigned long long &outInt); 210 /** 211 * Template function that consumes a binary value in big-endian format 212 * from the input stream. Returns true and advances the cursor if 213 * there is a value of the correct size. This function assumes that 214 * all values must be natively aligned, and so advances the cursor to 215 * the correct alignment before reading. 216 */ 217 template<typename T> 218 bool consume_binary(T &out) 219 { 220 int align = 0; 221 int type_size = sizeof(T); 222 if (cursor % type_size != 0) 223 { 224 align = type_size - (cursor % type_size); 225 } 226 if (size < cursor + align + type_size) 227 { 228 return false; 229 } 230 cursor += align; 231 assert(cursor % type_size == 0); 232 out = 0; 233 for (int i=0 ; i<type_size ; ++i) 234 { 235 out <<= 8; 236 out |= (((T)buffer[cursor++]) & 0xff); 237 } 238 return true; 239 } 240 /** 241 * Consumes two hex digits and return the resulting byte via the first 242 * argument. If the next two characters are hex digits, returns true 243 * and advances the cursor. If not, then returns false and leaves the 244 * cursor in place. 245 */ 246 bool consume_hex_byte(uint8_t &outByte); 247 /** 248 * Advances the cursor to the start of the next token, skipping 249 * comments and whitespace. If the cursor already points to the start 250 * of a token, then this function does nothing. 251 */ 252 input_buffer &next_token(); 253 /** 254 * Prints a message indicating the location of a parse error. 255 */ 256 void parse_error(const char *msg); 257 #ifndef NDEBUG 258 /** 259 * Dumps the current cursor value and the unconsumed values in the 260 * input buffer to the standard error. This method is intended solely 261 * for debugging. 262 */ 263 void dump(); 264 #endif 265 }; 266 /** 267 * Explicit specialisation for reading a single byte. 268 */ 269 template<> 270 inline bool input_buffer::consume_binary(uint8_t &out) 271 { 272 if (size < cursor + 1) 273 { 274 return false; 275 } 276 out = buffer[cursor++]; 277 return true; 278 } 279 280 /** 281 * Subclass of input_buffer that mmap()s a file and owns the resulting memory. 282 * When this object is destroyed, the memory is unmapped. 283 */ 284 struct mmap_input_buffer : public input_buffer 285 { 286 /** 287 * Constructs a new buffer from the file passed in as a file 288 * descriptor. 289 */ 290 mmap_input_buffer(int fd); 291 /** 292 * Unmaps the buffer, if one exists. 293 */ 294 virtual ~mmap_input_buffer(); 295 }; 296 /** 297 * Input buffer read from standard input. This is used for reading device tree 298 * blobs and source from standard input. It reads the entire input into 299 * malloc'd memory, so will be very slow for large inputs. DTS and DTB files 300 * are very rarely more than 10KB though, so this is probably not a problem. 301 */ 302 struct stream_input_buffer : public input_buffer 303 { 304 /** 305 * The buffer that will store the data read from the standard input. 306 */ 307 std::vector<char> b; 308 /** 309 * Constructs a new buffer from the standard input. 310 */ 311 stream_input_buffer(); 312 }; 313 314 } // namespace dtc 315 316 #endif // !_INPUT_BUFFER_HH_ 317