xref: /freebsd/usr.bin/dtc/input_buffer.cc (revision 1de7b4b805ddbf2429da511c053686ac4591ed89)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2013 David Chisnall
5  * All rights reserved.
6  *
7  * This software was developed by SRI International and the University of
8  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
9  * ("CTSRD"), as part of the DARPA CRASH research programme.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $FreeBSD$
33  */
34 
35 #include "input_buffer.hh"
36 #include <ctype.h>
37 #include <errno.h>
38 #include <limits.h>
39 #include <stdint.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43 #include <functional>
44 #ifndef NDEBUG
45 #include <iostream>
46 #endif
47 
48 
49 #include <sys/stat.h>
50 #include <sys/mman.h>
51 #include <assert.h>
52 #include <fcntl.h>
53 #include <unistd.h>
54 
55 #ifndef MAP_PREFAULT_READ
56 #define MAP_PREFAULT_READ 0
57 #endif
58 
59 using std::string;
60 
61 namespace
62 {
63 /**
64  * Subclass of input_buffer that mmap()s a file and owns the resulting memory.
65  * When this object is destroyed, the memory is unmapped.
66  */
67 struct mmap_input_buffer : public dtc::input_buffer
68 {
69 	string fn;
70 	const string &filename() const override
71 	{
72 		return fn;
73 	}
74 	/**
75 	 * Constructs a new buffer from the file passed in as a file
76 	 * descriptor.
77 	 */
78 	mmap_input_buffer(int fd, string &&filename);
79 	/**
80 	 * Unmaps the buffer, if one exists.
81 	 */
82 	virtual ~mmap_input_buffer();
83 };
84 /**
85  * Input buffer read from standard input.  This is used for reading device tree
86  * blobs and source from standard input.  It reads the entire input into
87  * malloc'd memory, so will be very slow for large inputs.  DTS and DTB files
88  * are very rarely more than 10KB though, so this is probably not a problem.
89  */
90 struct stream_input_buffer : public dtc::input_buffer
91 {
92 	const string &filename() const override
93 	{
94 		static string n = "<standard input>";
95 		return n;
96 	}
97 	/**
98 	 * The buffer that will store the data read from the standard input.
99 	 */
100 	std::vector<char> b;
101 	/**
102 	 * Constructs a new buffer from the standard input.
103 	 */
104 	stream_input_buffer();
105 };
106 
107 mmap_input_buffer::mmap_input_buffer(int fd, string &&filename)
108 	: input_buffer(0, 0), fn(filename)
109 {
110 	struct stat sb;
111 	if (fstat(fd, &sb))
112 	{
113 		perror("Failed to stat file");
114 	}
115 	size = sb.st_size;
116 	buffer = (const char*)mmap(0, size, PROT_READ, MAP_PRIVATE |
117 			MAP_PREFAULT_READ, fd, 0);
118 	if (buffer == MAP_FAILED)
119 	{
120 		perror("Failed to mmap file");
121 		exit(EXIT_FAILURE);
122 	}
123 }
124 
125 mmap_input_buffer::~mmap_input_buffer()
126 {
127 	if (buffer != 0)
128 	{
129 		munmap((void*)buffer, size);
130 	}
131 }
132 
133 stream_input_buffer::stream_input_buffer() : input_buffer(0, 0)
134 {
135 	int c;
136 	while ((c = fgetc(stdin)) != EOF)
137 	{
138 		b.push_back(c);
139 	}
140 	buffer = b.data();
141 	size = b.size();
142 }
143 
144 } // Anonymous namespace
145 
146 
147 namespace dtc
148 {
149 
150 void
151 input_buffer::skip_to(char c)
152 {
153 	while ((cursor < size) && (buffer[cursor] != c))
154 	{
155 		cursor++;
156 	}
157 }
158 
159 void
160 text_input_buffer::skip_to(char c)
161 {
162 	while (!finished() && (*(*this) != c))
163 	{
164 		++(*this);
165 	}
166 }
167 
168 void
169 text_input_buffer::skip_spaces()
170 {
171 	if (finished()) { return; }
172 	char c = *(*this);
173 	bool last_nl = false;
174 	while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\f')
175 	       || (c == '\v') || (c == '\r'))
176 	{
177 		last_nl = ((c == '\n') || (c == '\r'));
178 		++(*this);
179 		if (finished())
180 		{
181 			c = '\0';
182 		}
183 		else
184 		{
185 			c = *(*this);
186 		}
187 	}
188 	// Skip C preprocessor leftovers
189 	if ((c == '#') && ((cursor == 0) || last_nl))
190 	{
191 		skip_to('\n');
192 		skip_spaces();
193 	}
194 	if (consume("/include/"))
195 	{
196 		handle_include();
197 		skip_spaces();
198 	}
199 }
200 
201 void
202 text_input_buffer::handle_include()
203 {
204 	bool reallyInclude = true;
205 	if (consume("if "))
206 	{
207 		next_token();
208 		string name = parse_property_name();
209 		if (defines.count(name) > 0)
210 		{
211 			reallyInclude = true;
212 		}
213 		consume('/');
214 	}
215 	next_token();
216 	if (!consume('"'))
217 	{
218 		parse_error("Expected quoted filename");
219 		return;
220 	}
221 	auto loc = location();
222 	string file = parse_to('"');
223 	consume('"');
224 	if (!reallyInclude)
225 	{
226 		return;
227 	}
228 	string include_file = dir + '/' + file;
229 	auto include_buffer = input_buffer::buffer_for_file(include_file, false);
230 	if (include_buffer == 0)
231 	{
232 		for (auto i : include_paths)
233 		{
234 			include_file = i + '/' + file;
235 			include_buffer = input_buffer::buffer_for_file(include_file, false);
236 			if (include_buffer != 0)
237 			{
238 				break;
239 			}
240 		}
241 	}
242 	if (depfile)
243 	{
244 		putc(' ', depfile);
245 		fputs(include_file.c_str(), depfile);
246 	}
247 	if (!include_buffer)
248 	{
249 		loc.report_error("Unable to locate input file");
250 		return;
251 	}
252 	input_stack.push(std::move(include_buffer));
253 }
254 
255 input_buffer
256 input_buffer::buffer_from_offset(int offset, int s)
257 {
258 	if (offset < 0)
259 	{
260 		return input_buffer();
261 	}
262 	if (s == 0)
263 	{
264 		s = size - offset;
265 	}
266 	if (offset > size)
267 	{
268 		return input_buffer();
269 	}
270 	if (s > (size-offset))
271 	{
272 		return input_buffer();
273 	}
274 	return input_buffer(&buffer[offset], s);
275 }
276 
277 bool
278 input_buffer::consume(const char *str)
279 {
280 	int len = strlen(str);
281 	if (len > size - cursor)
282 	{
283 		return false;
284 	}
285 	else
286 	{
287 		for (int i=0 ; i<len ; ++i)
288 		{
289 			if (str[i] != (*this)[i])
290 			{
291 				return false;
292 			}
293 		}
294 		cursor += len;
295 		return true;
296 	}
297 	return false;
298 }
299 
300 bool
301 input_buffer::consume_integer(unsigned long long &outInt)
302 {
303 	// The first character must be a digit.  Hex and octal strings
304 	// are prefixed by 0 and 0x, respectively.
305 	if (!isdigit((*this)[0]))
306 	{
307 		return false;
308 	}
309 	char *end= const_cast<char*>(&buffer[size]);
310 	outInt = strtoull(&buffer[cursor], &end, 0);
311 	if (end == &buffer[cursor])
312 	{
313 		return false;
314 	}
315 	cursor = end - buffer;
316 	return true;
317 }
318 
319 namespace {
320 
321 /**
322  * Convenience typedef for the type that we use for all values.
323  */
324 typedef unsigned long long valty;
325 
326 /**
327  * Expression tree currently being parsed.
328  */
329 struct expression
330 {
331 	typedef text_input_buffer::source_location source_location;
332 	/**
333 	 * The type that is returned when computing the result.  The boolean value
334 	 * indicates whether this is a valid expression.
335 	 *
336 	 * FIXME: Once we can use C++17, this should be `std::optional`.
337 	 */
338 	typedef std::pair<valty, bool> result;
339 	/**
340 	 * Evaluate this node, taking into account operator precedence.
341 	 */
342 	virtual result operator()() = 0;
343 	/**
344 	 * Returns the precedence of this node.  Lower values indicate higher
345 	 * precedence.
346 	 */
347 	virtual int precedence() = 0;
348 	/**
349 	 * Constructs an expression, storing the location where it was created.
350 	 */
351 	expression(source_location l) : loc(l) {}
352 	virtual ~expression() {}
353 #ifndef NDEBUG
354 	/**
355 	 * Dumps this expression to `std::cerr`, appending a newline if `nl` is
356 	 * `true`.
357 	 */
358 	void dump(bool nl=false)
359 	{
360 		void *ptr = this;
361 		if (ptr == nullptr)
362 		{
363 			std::cerr << "{nullptr}\n";
364 			return;
365 		}
366 		dump_impl();
367 		if (nl)
368 		{
369 			std::cerr << '\n';
370 		}
371 	}
372 	private:
373 	/**
374 	 * Method that sublcasses override to implement the behaviour of `dump()`.
375 	 */
376 	virtual void dump_impl() = 0;
377 #endif
378 	protected:
379 	source_location loc;
380 };
381 
382 /**
383  * Expression wrapping a single integer.  Leaf nodes in the expression tree.
384  */
385 class terminal_expr : public expression
386 {
387 	/**
388 	 * The value that this wraps.
389 	 */
390 	valty val;
391 	/**
392 	 * Evaluate.  Trivially returns the value that this class wraps.
393 	 */
394 	result operator()() override
395 	{
396 		return {val, true};
397 	}
398 	int precedence() override
399 	{
400 		return 0;
401 	}
402 	public:
403 	/**
404 	 * Constructor.
405 	 */
406 	terminal_expr(source_location l, valty v) : expression(l), val(v) {}
407 #ifndef NDEBUG
408 	void dump_impl() override { std::cerr << val; }
409 #endif
410 };
411 
412 /**
413  * Parenthetical expression.  Exists to make the contents opaque.
414  */
415 struct paren_expression : public expression
416 {
417 	/**
418 	 * The expression within the parentheses.
419 	 */
420 	expression_ptr subexpr;
421 	/**
422 	 * Constructor.  Takes the child expression as the only argument.
423 	 */
424 	paren_expression(source_location l, expression_ptr p) : expression(l),
425 	subexpr(std::move(p)) {}
426 	int precedence() override
427 	{
428 		return 0;
429 	}
430 	/**
431 	 * Evaluate - just forwards to the underlying expression.
432 	 */
433 	result operator()() override
434 	{
435 		return (*subexpr)();
436 	}
437 #ifndef NDEBUG
438 	void dump_impl() override
439 	{
440 		std::cerr << " (";
441 		subexpr->dump();
442 		std::cerr << ") ";
443 	}
444 #endif
445 };
446 
447 /**
448  * Template class for unary operators.  The `OpChar` template parameter is
449  * solely for debugging and makes it easy to print the expression.  The `Op`
450  * template parameter is a function object that implements the operator that
451  * this class provides.  Most of these are provided by the `<functional>`
452  * header.
453  */
454 template<char OpChar, class Op>
455 class unary_operator : public expression
456 {
457 	/**
458 	 * The subexpression for this unary operator.
459 	 */
460 	expression_ptr subexpr;
461 	result operator()() override
462 	{
463 		Op op;
464 		result s = (*subexpr)();
465 		if (!s.second)
466 		{
467 			return s;
468 		}
469 		return {op(s.first), true};
470 	}
471 	/**
472 	 * All unary operators have the same precedence.  They are all evaluated
473 	 * before binary expressions, but after parentheses.
474 	 */
475 	int precedence() override
476 	{
477 		return 3;
478 	}
479 	public:
480 	unary_operator(source_location l, expression_ptr p) :
481 		expression(l), subexpr(std::move(p)) {}
482 #ifndef NDEBUG
483 	void dump_impl() override
484 	{
485 		std::cerr << OpChar;
486 		subexpr->dump();
487 	}
488 #endif
489 };
490 
491 /**
492  * Abstract base class for binary operators.  Allows the tree to be modified
493  * without knowing what the operations actually are.
494  */
495 struct binary_operator_base : public expression
496 {
497 	using expression::expression;
498 	/**
499 	 * The left side of the expression.
500 	 */
501 	expression_ptr lhs;
502 	/**
503 	 * The right side of the expression.
504 	 */
505 	expression_ptr rhs;
506 	/**
507 	 * Insert a node somewhere down the path of left children, until it would
508 	 * be preempting something that should execute first.
509 	 */
510 	void insert_left(binary_operator_base *new_left)
511 	{
512 		if (lhs->precedence() < new_left->precedence())
513 		{
514 			new_left->rhs = std::move(lhs);
515 			lhs.reset(new_left);
516 		}
517 		else
518 		{
519 			static_cast<binary_operator_base*>(lhs.get())->insert_left(new_left);
520 		}
521 	}
522 };
523 
524 /**
525  * Template class for binary operators.  The precedence and the operation are
526  * provided as template parameters.
527  */
528 template<int Precedence, class Op>
529 struct binary_operator : public binary_operator_base
530 {
531 	result operator()() override
532 	{
533 		Op op;
534 		result l = (*lhs)();
535 		result r = (*rhs)();
536 		if (!(l.second && r.second))
537 		{
538 			return {0, false};
539 		}
540 		return {op(l.first, r.first), true};
541 	}
542 	int precedence() override
543 	{
544 		return Precedence;
545 	}
546 #ifdef NDEBUG
547 	/**
548 	 * Constructor.  Takes the name of the operator as an argument, for
549 	 * debugging.  Only stores it in debug mode.
550 	 */
551 	binary_operator(source_location l, const char *) :
552 		binary_operator_base(l) {}
553 #else
554 	const char *opName;
555 	binary_operator(source_location l, const char *o) :
556 		binary_operator_base(l), opName(o) {}
557 	void dump_impl() override
558 	{
559 		lhs->dump();
560 		std::cerr << opName;
561 		rhs->dump();
562 	}
563 #endif
564 };
565 
566 /**
567  * Ternary conditional operators (`cond ? true : false`) are a special case -
568  * there are no other ternary operators.
569  */
570 class ternary_conditional_operator : public expression
571 {
572 	/**
573 	 * The condition for the clause.
574 	 */
575 	expression_ptr cond;
576 	/**
577 	 * The expression that this evaluates to if the condition is true.
578 	 */
579 	expression_ptr lhs;
580 	/**
581 	 * The expression that this evaluates to if the condition is false.
582 	 */
583 	expression_ptr rhs;
584 	result operator()() override
585 	{
586 		result c = (*cond)();
587 		result l = (*lhs)();
588 		result r = (*rhs)();
589 		if (!(l.second && r.second && c.second))
590 		{
591 			return {0, false};
592 		}
593 		return c.first ? l : r;
594 	}
595 	int precedence() override
596 	{
597 		// The actual precedence of a ternary conditional operator is 15, but
598 		// its associativity is the opposite way around to the other operators,
599 		// so we fudge it slightly.
600 		return 3;
601 	}
602 #ifndef NDEBUG
603 	void dump_impl() override
604 	{
605 		cond->dump();
606 		std::cerr << " ? ";
607 		lhs->dump();
608 		std::cerr << " : ";
609 		rhs->dump();
610 	}
611 #endif
612 	public:
613 	ternary_conditional_operator(source_location sl,
614 	                             expression_ptr c,
615 	                             expression_ptr l,
616 	                             expression_ptr r) :
617 		expression(sl), cond(std::move(c)), lhs(std::move(l)),
618 		rhs(std::move(r)) {}
619 };
620 
621 template<typename T>
622 struct lshift
623 {
624 	constexpr T operator()(const T &lhs, const T &rhs) const
625 	{
626 		return lhs << rhs;
627 	}
628 };
629 template<typename T>
630 struct rshift
631 {
632 	constexpr T operator()(const T &lhs, const T &rhs) const
633 	{
634 		return lhs >> rhs;
635 	}
636 };
637 template<typename T>
638 struct unary_plus
639 {
640 	constexpr T operator()(const T &val) const
641 	{
642 		return +val;
643 	}
644 };
645 // TODO: Replace with std::bit_not once we can guarantee C++14 as a baseline.
646 template<typename T>
647 struct bit_not
648 {
649 	constexpr T operator()(const T &val) const
650 	{
651 		return ~val;
652 	}
653 };
654 
655 template<typename T>
656 struct divmod : public binary_operator<5, T>
657 {
658 	using binary_operator<5, T>::binary_operator;
659 	using typename binary_operator_base::result;
660 	result operator()() override
661 	{
662 		result r = (*binary_operator_base::rhs)();
663 		if (r.second && (r.first == 0))
664 		{
665 			expression::loc.report_error("Division by zero");
666 			return {0, false};
667 		}
668 		return binary_operator<5, T>::operator()();
669 	}
670 };
671 
672 } // anonymous namespace
673 
674 
675 expression_ptr text_input_buffer::parse_binary_expression(expression_ptr lhs)
676 {
677 	next_token();
678 	binary_operator_base *expr = nullptr;
679 	char op = *(*this);
680 	source_location l = location();
681 	switch (op)
682 	{
683 		default:
684 			return lhs;
685 		case '+':
686 			expr = new binary_operator<6, std::plus<valty>>(l, "+");
687 			break;
688 		case '-':
689 			expr = new binary_operator<6, std::minus<valty>>(l, "-");
690 			break;
691 		case '%':
692 			expr = new divmod<std::modulus<valty>>(l, "/");
693 			break;
694 		case '*':
695 			expr = new binary_operator<5, std::multiplies<valty>>(l, "*");
696 			break;
697 		case '/':
698 			expr = new divmod<std::divides<valty>>(l, "/");
699 			break;
700 		case '<':
701 			switch (peek())
702 			{
703 				default:
704 					parse_error("Invalid operator");
705 					return nullptr;
706 				case ' ':
707 				case '(':
708 				case '0'...'9':
709 					expr = new binary_operator<8, std::less<valty>>(l, "<");
710 					break;
711 				case '=':
712 					++(*this);
713 					expr = new binary_operator<8, std::less_equal<valty>>(l, "<=");
714 					break;
715 				case '<':
716 					++(*this);
717 					expr = new binary_operator<7, lshift<valty>>(l, "<<");
718 					break;
719 			}
720 			break;
721 		case '>':
722 			switch (peek())
723 			{
724 				default:
725 					parse_error("Invalid operator");
726 					return nullptr;
727 				case '(':
728 				case ' ':
729 				case '0'...'9':
730 					expr = new binary_operator<8, std::greater<valty>>(l, ">");
731 					break;
732 				case '=':
733 					++(*this);
734 					expr = new binary_operator<8, std::greater_equal<valty>>(l, ">=");
735 					break;
736 				case '>':
737 					++(*this);
738 					expr = new binary_operator<7, rshift<valty>>(l, ">>");
739 					break;
740 					return lhs;
741 			}
742 			break;
743 		case '=':
744 			if (peek() != '=')
745 			{
746 				parse_error("Invalid operator");
747 				return nullptr;
748 			}
749 			expr = new binary_operator<9, std::equal_to<valty>>(l, "==");
750 			break;
751 		case '!':
752 			if (peek() != '=')
753 			{
754 				parse_error("Invalid operator");
755 				return nullptr;
756 			}
757 			cursor++;
758 			expr = new binary_operator<9, std::not_equal_to<valty>>(l, "!=");
759 			break;
760 		case '&':
761 			if (peek() == '&')
762 			{
763 				expr = new binary_operator<13, std::logical_and<valty>>(l, "&&");
764 			}
765 			else
766 			{
767 				expr = new binary_operator<10, std::bit_and<valty>>(l, "&");
768 			}
769 			break;
770 		case '|':
771 			if (peek() == '|')
772 			{
773 				expr = new binary_operator<12, std::logical_or<valty>>(l, "||");
774 			}
775 			else
776 			{
777 				expr = new binary_operator<14, std::bit_or<valty>>(l, "|");
778 			}
779 			break;
780 		case '?':
781 		{
782 			consume('?');
783 			expression_ptr true_case = parse_expression();
784 			next_token();
785 			if (!true_case || !consume(':'))
786 			{
787 				parse_error("Expected : in ternary conditional operator");
788 				return nullptr;
789 			}
790 			expression_ptr false_case = parse_expression();
791 			if (!false_case)
792 			{
793 				parse_error("Expected false condition for ternary operator");
794 				return nullptr;
795 			}
796 			return expression_ptr(new ternary_conditional_operator(l, std::move(lhs),
797 						std::move(true_case), std::move(false_case)));
798 		}
799 	}
800 	++(*this);
801 	next_token();
802 	expression_ptr e(expr);
803 	expression_ptr rhs(parse_expression());
804 	if (!rhs)
805 	{
806 		return nullptr;
807 	}
808 	expr->lhs = std::move(lhs);
809 	if (rhs->precedence() < expr->precedence())
810 	{
811 		expr->rhs = std::move(rhs);
812 	}
813 	else
814 	{
815 		// If we're a normal left-to-right expression, then we need to insert
816 		// this as the far-left child node of the rhs expression
817 		binary_operator_base *rhs_op =
818 			static_cast<binary_operator_base*>(rhs.get());
819 		rhs_op->insert_left(expr);
820 		e.release();
821 		return rhs;
822 	}
823 	return e;
824 }
825 
826 expression_ptr text_input_buffer::parse_expression(bool stopAtParen)
827 {
828 	next_token();
829 	unsigned long long leftVal;
830 	expression_ptr lhs;
831 	source_location l = location();
832 	switch (*(*this))
833 	{
834 		case '0'...'9':
835 			if (!consume_integer(leftVal))
836 			{
837 				return nullptr;
838 			}
839 			lhs.reset(new terminal_expr(l, leftVal));
840 			break;
841 		case '(':
842 		{
843 			consume('(');
844 			expression_ptr &&subexpr = parse_expression();
845 			if (!subexpr)
846 			{
847 				return nullptr;
848 			}
849 			lhs.reset(new paren_expression(l, std::move(subexpr)));
850 			if (!consume(')'))
851 			{
852 				return nullptr;
853 			}
854 			if (stopAtParen)
855 			{
856 				return lhs;
857 			}
858 			break;
859 		}
860 		case '+':
861 		{
862 			consume('+');
863 			expression_ptr &&subexpr = parse_expression();
864 			if (!subexpr)
865 			{
866 				return nullptr;
867 			}
868 			lhs.reset(new unary_operator<'+', unary_plus<valty>>(l, std::move(subexpr)));
869 			break;
870 		}
871 		case '-':
872 		{
873 			consume('-');
874 			expression_ptr &&subexpr = parse_expression();
875 			if (!subexpr)
876 			{
877 				return nullptr;
878 			}
879 			lhs.reset(new unary_operator<'-', std::negate<valty>>(l, std::move(subexpr)));
880 			break;
881 		}
882 		case '!':
883 		{
884 			consume('!');
885 			expression_ptr &&subexpr = parse_expression();
886 			if (!subexpr)
887 			{
888 				return nullptr;
889 			}
890 			lhs.reset(new unary_operator<'!', std::logical_not<valty>>(l, std::move(subexpr)));
891 			break;
892 		}
893 		case '~':
894 		{
895 			consume('~');
896 			expression_ptr &&subexpr = parse_expression();
897 			if (!subexpr)
898 			{
899 				return nullptr;
900 			}
901 			lhs.reset(new unary_operator<'~', bit_not<valty>>(l, std::move(subexpr)));
902 			break;
903 		}
904 	}
905 	if (!lhs)
906 	{
907 		return nullptr;
908 	}
909 	return parse_binary_expression(std::move(lhs));
910 }
911 
912 bool
913 text_input_buffer::consume_integer_expression(unsigned long long &outInt)
914 {
915 	switch (*(*this))
916 	{
917 		case '(':
918 		{
919 			expression_ptr e(parse_expression(true));
920 			if (!e)
921 			{
922 				return false;
923 			}
924 			auto r = (*e)();
925 			if (r.second)
926 			{
927 				outInt = r.first;
928 				return true;
929 			}
930 			return false;
931 		}
932 		case '0'...'9':
933 			return consume_integer(outInt);
934 		default:
935 			return false;
936 	}
937 }
938 
939 bool
940 input_buffer::consume_hex_byte(uint8_t &outByte)
941 {
942 	if (!ishexdigit((*this)[0]) && !ishexdigit((*this)[1]))
943 	{
944 		return false;
945 	}
946 	outByte = (digittoint((*this)[0]) << 4) | digittoint((*this)[1]);
947 	cursor += 2;
948 	return true;
949 }
950 
951 text_input_buffer&
952 text_input_buffer::next_token()
953 {
954 	auto &self = *this;
955 	int start;
956 	do {
957 		start = cursor;
958 		skip_spaces();
959 		if (finished())
960 		{
961 			return self;
962 		}
963 		// Parse /* comments
964 		if (*self == '/' && peek() == '*')
965 		{
966 			// eat the start of the comment
967 			++self;
968 			++self;
969 			do {
970 				// Find the ending * of */
971 				while ((*self != '\0') && (*self != '*') && !finished())
972 				{
973 					++self;
974 				}
975 				// Eat the *
976 				++self;
977 			} while ((*self != '\0') && (*self != '/') && !finished());
978 			// Eat the /
979 			++self;
980 		}
981 		// Parse // comments
982 		if ((*self == '/' && peek() == '/'))
983 		{
984 			// eat the start of the comment
985 			++self;
986 			++self;
987 			// Find the ending of the line
988 			while (*self != '\n' && !finished())
989 			{
990 				++self;
991 			}
992 			// Eat the \n
993 			++self;
994 		}
995 	} while (start != cursor);
996 	return self;
997 }
998 
999 void
1000 text_input_buffer::parse_error(const char *msg)
1001 {
1002 	if (input_stack.empty())
1003 	{
1004 		fprintf(stderr, "Error: %s\n", msg);
1005 		return;
1006 	}
1007 	input_buffer &b = *input_stack.top();
1008 	parse_error(msg, b, b.cursor);
1009 }
1010 void
1011 text_input_buffer::parse_error(const char *msg,
1012                                input_buffer &b,
1013                                int loc)
1014 {
1015 	int line_count = 1;
1016 	int line_start = 0;
1017 	int line_end = loc;
1018 	if (loc < 0 || loc > b.size)
1019 	{
1020 		return;
1021 	}
1022 	for (int i=loc ; i>0 ; --i)
1023 	{
1024 		if (b.buffer[i] == '\n')
1025 		{
1026 			line_count++;
1027 			if (line_start == 0)
1028 			{
1029 				line_start = i+1;
1030 			}
1031 		}
1032 	}
1033 	for (int i=loc+1 ; i<b.size ; ++i)
1034 	{
1035 		if (b.buffer[i] == '\n')
1036 		{
1037 			line_end = i;
1038 			break;
1039 		}
1040 	}
1041 	fprintf(stderr, "Error at %s:%d:%d: %s\n", b.filename().c_str(), line_count, loc - line_start, msg);
1042 	fwrite(&b.buffer[line_start], line_end-line_start, 1, stderr);
1043 	putc('\n', stderr);
1044 	for (int i=0 ; i<(loc-line_start) ; ++i)
1045 	{
1046 		char c = (b.buffer[i+line_start] == '\t') ? '\t' : ' ';
1047 		putc(c, stderr);
1048 	}
1049 	putc('^', stderr);
1050 	putc('\n', stderr);
1051 }
1052 #ifndef NDEBUG
1053 void
1054 input_buffer::dump()
1055 {
1056 	fprintf(stderr, "Current cursor: %d\n", cursor);
1057 	fwrite(&buffer[cursor], size-cursor, 1, stderr);
1058 }
1059 #endif
1060 
1061 
1062 namespace
1063 {
1064 /**
1065  * The source files are ASCII, so we provide a non-locale-aware version of
1066  * isalpha.  This is a class so that it can be used with a template function
1067  * for parsing strings.
1068  */
1069 struct is_alpha
1070 {
1071 	static inline bool check(const char c)
1072 	{
1073 		return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') &&
1074 			(c <= 'Z'));
1075 	}
1076 };
1077 /**
1078  * Check whether a character is in the set allowed for node names.  This is a
1079  * class so that it can be used with a template function for parsing strings.
1080  */
1081 struct is_node_name_character
1082 {
1083 	static inline bool check(const char c)
1084 	{
1085 		switch(c)
1086 		{
1087 			default:
1088 				return false;
1089 			case 'a'...'z': case 'A'...'Z': case '0'...'9':
1090 			case ',': case '.': case '+': case '-':
1091 			case '_':
1092 				return true;
1093 		}
1094 	}
1095 };
1096 /**
1097  * Check whether a character is in the set allowed for property names.  This is
1098  * a class so that it can be used with a template function for parsing strings.
1099  */
1100 struct is_property_name_character
1101 {
1102 	static inline bool check(const char c)
1103 	{
1104 		switch(c)
1105 		{
1106 			default:
1107 				return false;
1108 			case 'a'...'z': case 'A'...'Z': case '0'...'9':
1109 			case ',': case '.': case '+': case '-':
1110 			case '_': case '#':
1111 				return true;
1112 		}
1113 	}
1114 };
1115 
1116 template<class T>
1117 string parse(text_input_buffer &s)
1118 {
1119 	std::vector<char> bytes;
1120 	for (char c=*s ; T::check(c) ; c=*(++s))
1121 	{
1122 		bytes.push_back(c);
1123 	}
1124 	return string(bytes.begin(), bytes.end());
1125 }
1126 
1127 }
1128 
1129 string
1130 text_input_buffer::parse_node_name()
1131 {
1132 	return parse<is_node_name_character>(*this);
1133 }
1134 
1135 string
1136 text_input_buffer::parse_property_name()
1137 {
1138 	return parse<is_property_name_character>(*this);
1139 }
1140 
1141 string
1142 text_input_buffer::parse_node_or_property_name(bool &is_property)
1143 {
1144 	if (is_property)
1145 	{
1146 		return parse_property_name();
1147 	}
1148 	std::vector<char> bytes;
1149 	for (char c=*(*this) ; is_node_name_character::check(c) ; c=*(++(*this)))
1150 	{
1151 		bytes.push_back(c);
1152 	}
1153 	for (char c=*(*this) ; is_property_name_character::check(c) ; c=*(++(*this)))
1154 	{
1155 		bytes.push_back(c);
1156 		is_property = true;
1157 	}
1158 	return string(bytes.begin(), bytes.end());
1159 }
1160 
1161 string
1162 input_buffer::parse_to(char stop)
1163 {
1164 	std::vector<char> bytes;
1165 	for (char c=*(*this) ; c != stop ; c=*(++(*this)))
1166 	{
1167 		bytes.push_back(c);
1168 	}
1169 	return string(bytes.begin(), bytes.end());
1170 }
1171 
1172 string
1173 text_input_buffer::parse_to(char stop)
1174 {
1175 	std::vector<char> bytes;
1176 	for (char c=*(*this) ; c != stop ; c=*(++(*this)))
1177 	{
1178 		if (finished())
1179 		{
1180 			break;
1181 		}
1182 		bytes.push_back(c);
1183 	}
1184 	return string(bytes.begin(), bytes.end());
1185 }
1186 
1187 char
1188 text_input_buffer::peek()
1189 {
1190 	return (*input_stack.top())[1];
1191 }
1192 
1193 std::unique_ptr<input_buffer>
1194 input_buffer::buffer_for_file(const string &path, bool warn)
1195 {
1196 	if (path == "-")
1197 	{
1198 		std::unique_ptr<input_buffer> b(new stream_input_buffer());
1199 		return b;
1200 	}
1201 	int source = open(path.c_str(), O_RDONLY);
1202 	if (source == -1)
1203 	{
1204 		if (warn)
1205 		{
1206 			fprintf(stderr, "Unable to open file '%s'.  %s\n", path.c_str(), strerror(errno));
1207 		}
1208 		return 0;
1209 	}
1210 	struct stat st;
1211 	if (fstat(source, &st) == 0 && S_ISDIR(st.st_mode))
1212 	{
1213 		if (warn)
1214 		{
1215 			fprintf(stderr, "File %s is a directory\n", path.c_str());
1216 		}
1217 		close(source);
1218 		return 0;
1219 	}
1220 	std::unique_ptr<input_buffer> b(new mmap_input_buffer(source, string(path)));
1221 	close(source);
1222 	return b;
1223 }
1224 
1225 } // namespace dtc
1226 
1227