xref: /freebsd/usr.bin/dtc/input_buffer.cc (revision fd45b686f9d92f583366c75b22c04c7ee49709c0)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2013 David Chisnall
5  * All rights reserved.
6  *
7  * This software was developed by SRI International and the University of
8  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
9  * ("CTSRD"), as part of the DARPA CRASH research programme.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 #include "input_buffer.hh"
34 #include <ctype.h>
35 #include <errno.h>
36 #include <stdint.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <functional>
41 #ifndef NDEBUG
42 #include <iostream>
43 #endif
44 #include <limits>
45 
46 
47 #include <sys/stat.h>
48 #include <sys/mman.h>
49 #include <assert.h>
50 #include <fcntl.h>
51 #include <unistd.h>
52 
53 #ifndef MAP_PREFAULT_READ
54 #define MAP_PREFAULT_READ 0
55 #endif
56 
57 using std::string;
58 
59 namespace
60 {
61 /**
62  * Subclass of input_buffer that mmap()s a file and owns the resulting memory.
63  * When this object is destroyed, the memory is unmapped.
64  */
65 struct mmap_input_buffer : public dtc::input_buffer
66 {
67 	string fn;
68 	const string &filename() const override
69 	{
70 		return fn;
71 	}
72 	/**
73 	 * Constructs a new buffer from the file passed in as a file
74 	 * descriptor.
75 	 */
76 	mmap_input_buffer(int fd, string &&filename);
77 	/**
78 	 * Unmaps the buffer, if one exists.
79 	 */
80 	virtual ~mmap_input_buffer();
81 };
82 /**
83  * Input buffer read from standard input.  This is used for reading device tree
84  * blobs and source from standard input.  It reads the entire input into
85  * malloc'd memory, so will be very slow for large inputs.  DTS and DTB files
86  * are very rarely more than 10KB though, so this is probably not a problem.
87  */
88 struct stream_input_buffer : public dtc::input_buffer
89 {
90 	const string &filename() const override
91 	{
92 		static string n = "<standard input>";
93 		return n;
94 	}
95 	/**
96 	 * The buffer that will store the data read from the standard input.
97 	 */
98 	std::vector<char> b;
99 	/**
100 	 * Constructs a new buffer from the standard input.
101 	 */
102 	stream_input_buffer();
103 };
104 
105 mmap_input_buffer::mmap_input_buffer(int fd, string &&filename)
106 	: input_buffer(0, 0), fn(filename)
107 {
108 	struct stat sb;
109 	if (fstat(fd, &sb))
110 	{
111 		perror("Failed to stat file");
112 	}
113 	size = sb.st_size;
114 	buffer = (const char*)mmap(0, size, PROT_READ, MAP_PRIVATE |
115 			MAP_PREFAULT_READ, fd, 0);
116 	if (buffer == MAP_FAILED)
117 	{
118 		perror("Failed to mmap file");
119 		exit(EXIT_FAILURE);
120 	}
121 }
122 
123 mmap_input_buffer::~mmap_input_buffer()
124 {
125 	if (buffer != 0)
126 	{
127 		munmap(const_cast<char*>(buffer), size);
128 	}
129 }
130 
131 stream_input_buffer::stream_input_buffer() : input_buffer(0, 0)
132 {
133 	int c;
134 	while ((c = fgetc(stdin)) != EOF)
135 	{
136 		b.push_back(c);
137 	}
138 	buffer = b.data();
139 	size = b.size();
140 }
141 
142 } // Anonymous namespace
143 
144 
145 namespace dtc
146 {
147 
148 void
149 input_buffer::skip_to(char c)
150 {
151 	while ((cursor < size) && (buffer[cursor] != c))
152 	{
153 		cursor++;
154 	}
155 }
156 
157 void
158 text_input_buffer::skip_to(char c)
159 {
160 	while (!finished() && (*(*this) != c))
161 	{
162 		++(*this);
163 	}
164 }
165 
166 void
167 text_input_buffer::skip_spaces()
168 {
169 	if (finished()) { return; }
170 	char c = *(*this);
171 	bool last_nl = false;
172 	while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\f')
173 	       || (c == '\v') || (c == '\r'))
174 	{
175 		last_nl = ((c == '\n') || (c == '\r'));
176 		++(*this);
177 		if (finished())
178 		{
179 			c = '\0';
180 		}
181 		else
182 		{
183 			c = *(*this);
184 		}
185 	}
186 	// Skip C preprocessor leftovers
187 	if ((c == '#') && ((cursor == 0) || last_nl))
188 	{
189 		skip_to('\n');
190 		skip_spaces();
191 	}
192 	if (consume("/include/"))
193 	{
194 		handle_include();
195 		skip_spaces();
196 	}
197 }
198 
199 void
200 text_input_buffer::handle_include()
201 {
202 	bool reallyInclude = true;
203 	if (consume("if "))
204 	{
205 		next_token();
206 		string name = parse_property_name();
207 		if (defines.count(name) == 0)
208 		{
209 			reallyInclude = false;
210 		}
211 		consume('/');
212 	}
213 	next_token();
214 	if (!consume('"'))
215 	{
216 		parse_error("Expected quoted filename");
217 		return;
218 	}
219 	auto loc = location();
220 	string file = parse_to('"');
221 	consume('"');
222 	if (!reallyInclude)
223 	{
224 		return;
225 	}
226 	string include_file = dir + '/' + file;
227 	auto include_buffer = input_buffer::buffer_for_file(include_file, false);
228 	if (include_buffer == 0)
229 	{
230 		for (auto i : include_paths)
231 		{
232 			include_file = i + '/' + file;
233 			include_buffer = input_buffer::buffer_for_file(include_file, false);
234 			if (include_buffer != 0)
235 			{
236 				break;
237 			}
238 		}
239 	}
240 	if (depfile)
241 	{
242 		putc(' ', depfile);
243 		fputs(include_file.c_str(), depfile);
244 	}
245 	if (!include_buffer)
246 	{
247 		loc.report_error("Unable to locate input file");
248 		return;
249 	}
250 	input_stack.push(std::move(include_buffer));
251 }
252 
253 bool text_input_buffer::read_binary_file(const std::string &filename, byte_buffer &b)
254 {
255 	bool try_include_paths = true;
256 	string include_file;
257 	if (filename[0] == '/')
258 	{
259 		include_file = filename;
260 		// Don't try include paths if we're given an absolute path.
261 		// Failing is better so that we don't accidentally do the wrong thing,
262 		// but make it seem like everything is alright.
263 		try_include_paths = false;
264 	}
265 	else
266 	{
267 		include_file = dir + '/' + filename;
268 	}
269 	auto include_buffer = input_buffer::buffer_for_file(include_file, false);
270 	if (include_buffer == 0 && try_include_paths)
271 	{
272 		for (auto i : include_paths)
273 		{
274 			include_file = i + '/' + filename;
275 			include_buffer = input_buffer::buffer_for_file(include_file, false);
276 			if (include_buffer != 0)
277 			{
278 				break;
279 			}
280 		}
281 	}
282 	if (!include_buffer)
283 	{
284 		return false;
285 	}
286 	if (depfile)
287 	{
288 		putc(' ', depfile);
289 		fputs(include_file.c_str(), depfile);
290 	}
291 	b.insert(b.begin(), include_buffer->begin(), include_buffer->end());
292 	return true;
293 }
294 
295 input_buffer
296 input_buffer::buffer_from_offset(int offset, int s)
297 {
298 	if (offset < 0)
299 	{
300 		return input_buffer();
301 	}
302 	if (s == 0)
303 	{
304 		s = size - offset;
305 	}
306 	if (offset > size)
307 	{
308 		return input_buffer();
309 	}
310 	if (s > (size-offset))
311 	{
312 		return input_buffer();
313 	}
314 	return input_buffer(&buffer[offset], s);
315 }
316 
317 bool
318 input_buffer::consume(const char *str)
319 {
320 	int len = strlen(str);
321 	if (len > size - cursor)
322 	{
323 		return false;
324 	}
325 	else
326 	{
327 		for (int i=0 ; i<len ; ++i)
328 		{
329 			if (str[i] != (*this)[i])
330 			{
331 				return false;
332 			}
333 		}
334 		cursor += len;
335 		return true;
336 	}
337 	return false;
338 }
339 
340 bool
341 input_buffer::consume_integer(unsigned long long &outInt)
342 {
343 	// The first character must be a digit.  Hex and octal strings
344 	// are prefixed by 0 and 0x, respectively.
345 	if (!isdigit((*this)[0]))
346 	{
347 		return false;
348 	}
349 	char *end= const_cast<char*>(&buffer[size]);
350 	errno = 0;
351 	outInt = strtoull(&buffer[cursor], &end, 0);
352 	if (end == &buffer[cursor] ||
353 	    (outInt == std::numeric_limits<unsigned long long>::max() &&
354 	     errno == ERANGE))
355 	{
356 		return false;
357 	}
358 	cursor = end - buffer;
359 	return true;
360 }
361 
362 namespace {
363 
364 /**
365  * Convenience typedef for the type that we use for all values.
366  */
367 typedef unsigned long long valty;
368 
369 /**
370  * Expression tree currently being parsed.
371  */
372 struct expression
373 {
374 	typedef text_input_buffer::source_location source_location;
375 	/**
376 	 * The type that is returned when computing the result.  The boolean value
377 	 * indicates whether this is a valid expression.
378 	 *
379 	 * FIXME: Once we can use C++17, this should be `std::optional`.
380 	 */
381 	typedef std::pair<valty, bool> result;
382 	/**
383 	 * Evaluate this node, taking into account operator precedence.
384 	 */
385 	virtual result operator()() = 0;
386 	/**
387 	 * Returns the precedence of this node.  Lower values indicate higher
388 	 * precedence.
389 	 */
390 	virtual int precedence() = 0;
391 	/**
392 	 * Constructs an expression, storing the location where it was created.
393 	 */
394 	expression(source_location l) : loc(l) {}
395 	virtual ~expression() {}
396 #ifndef NDEBUG
397 	/**
398 	 * Dumps this expression to `std::cerr`, appending a newline if `nl` is
399 	 * `true`.
400 	 */
401 	void dump(bool nl=false)
402 	{
403 		void *ptr = this;
404 		if (ptr == nullptr)
405 		{
406 			std::cerr << "{nullptr}\n";
407 			return;
408 		}
409 		dump_impl();
410 		if (nl)
411 		{
412 			std::cerr << '\n';
413 		}
414 	}
415 	private:
416 	/**
417 	 * Method that sublcasses override to implement the behaviour of `dump()`.
418 	 */
419 	virtual void dump_impl() = 0;
420 #endif
421 	protected:
422 	source_location loc;
423 };
424 
425 /**
426  * Expression wrapping a single integer.  Leaf nodes in the expression tree.
427  */
428 class terminal_expr : public expression
429 {
430 	/**
431 	 * The value that this wraps.
432 	 */
433 	valty val;
434 	/**
435 	 * Evaluate.  Trivially returns the value that this class wraps.
436 	 */
437 	result operator()() override
438 	{
439 		return {val, true};
440 	}
441 	int precedence() override
442 	{
443 		return 0;
444 	}
445 	public:
446 	/**
447 	 * Constructor.
448 	 */
449 	terminal_expr(source_location l, valty v) : expression(l), val(v) {}
450 #ifndef NDEBUG
451 	void dump_impl() override { std::cerr << val; }
452 #endif
453 };
454 
455 /**
456  * Parenthetical expression.  Exists to make the contents opaque.
457  */
458 struct paren_expression : public expression
459 {
460 	/**
461 	 * The expression within the parentheses.
462 	 */
463 	expression_ptr subexpr;
464 	/**
465 	 * Constructor.  Takes the child expression as the only argument.
466 	 */
467 	paren_expression(source_location l, expression_ptr p) : expression(l),
468 	subexpr(std::move(p)) {}
469 	int precedence() override
470 	{
471 		return 0;
472 	}
473 	/**
474 	 * Evaluate - just forwards to the underlying expression.
475 	 */
476 	result operator()() override
477 	{
478 		return (*subexpr)();
479 	}
480 #ifndef NDEBUG
481 	void dump_impl() override
482 	{
483 		std::cerr << " (";
484 		subexpr->dump();
485 		std::cerr << ") ";
486 	}
487 #endif
488 };
489 
490 /**
491  * Template class for unary operators.  The `OpChar` template parameter is
492  * solely for debugging and makes it easy to print the expression.  The `Op`
493  * template parameter is a function object that implements the operator that
494  * this class provides.  Most of these are provided by the `<functional>`
495  * header.
496  */
497 template<char OpChar, class Op>
498 class unary_operator : public expression
499 {
500 	/**
501 	 * The subexpression for this unary operator.
502 	 */
503 	expression_ptr subexpr;
504 	result operator()() override
505 	{
506 		Op op;
507 		result s = (*subexpr)();
508 		if (!s.second)
509 		{
510 			return s;
511 		}
512 		return {op(s.first), true};
513 	}
514 	/**
515 	 * All unary operators have the same precedence.  They are all evaluated
516 	 * before binary expressions, but after parentheses.
517 	 */
518 	int precedence() override
519 	{
520 		return 3;
521 	}
522 	public:
523 	unary_operator(source_location l, expression_ptr p) :
524 		expression(l), subexpr(std::move(p)) {}
525 #ifndef NDEBUG
526 	void dump_impl() override
527 	{
528 		std::cerr << OpChar;
529 		subexpr->dump();
530 	}
531 #endif
532 };
533 
534 /**
535  * Abstract base class for binary operators.  Allows the tree to be modified
536  * without knowing what the operations actually are.
537  */
538 struct binary_operator_base : public expression
539 {
540 	using expression::expression;
541 	/**
542 	 * The left side of the expression.
543 	 */
544 	expression_ptr lhs;
545 	/**
546 	 * The right side of the expression.
547 	 */
548 	expression_ptr rhs;
549 	/**
550 	 * Insert a node somewhere down the path of left children, until it would
551 	 * be preempting something that should execute first.
552 	 */
553 	void insert_left(binary_operator_base *new_left)
554 	{
555 		if (lhs->precedence() < new_left->precedence())
556 		{
557 			new_left->rhs = std::move(lhs);
558 			lhs.reset(new_left);
559 		}
560 		else
561 		{
562 			static_cast<binary_operator_base*>(lhs.get())->insert_left(new_left);
563 		}
564 	}
565 };
566 
567 /**
568  * Template class for binary operators.  The precedence and the operation are
569  * provided as template parameters.
570  */
571 template<int Precedence, class Op>
572 struct binary_operator : public binary_operator_base
573 {
574 	result operator()() override
575 	{
576 		Op op;
577 		result l = (*lhs)();
578 		result r = (*rhs)();
579 		if (!(l.second && r.second))
580 		{
581 			return {0, false};
582 		}
583 		return {op(l.first, r.first), true};
584 	}
585 	int precedence() override
586 	{
587 		return Precedence;
588 	}
589 #ifdef NDEBUG
590 	/**
591 	 * Constructor.  Takes the name of the operator as an argument, for
592 	 * debugging.  Only stores it in debug mode.
593 	 */
594 	binary_operator(source_location l, const char *) :
595 		binary_operator_base(l) {}
596 #else
597 	const char *opName;
598 	binary_operator(source_location l, const char *o) :
599 		binary_operator_base(l), opName(o) {}
600 	void dump_impl() override
601 	{
602 		lhs->dump();
603 		std::cerr << opName;
604 		rhs->dump();
605 	}
606 #endif
607 };
608 
609 /**
610  * Ternary conditional operators (`cond ? true : false`) are a special case -
611  * there are no other ternary operators.
612  */
613 class ternary_conditional_operator : public expression
614 {
615 	/**
616 	 * The condition for the clause.
617 	 */
618 	expression_ptr cond;
619 	/**
620 	 * The expression that this evaluates to if the condition is true.
621 	 */
622 	expression_ptr lhs;
623 	/**
624 	 * The expression that this evaluates to if the condition is false.
625 	 */
626 	expression_ptr rhs;
627 	result operator()() override
628 	{
629 		result c = (*cond)();
630 		result l = (*lhs)();
631 		result r = (*rhs)();
632 		if (!(l.second && r.second && c.second))
633 		{
634 			return {0, false};
635 		}
636 		return c.first ? l : r;
637 	}
638 	int precedence() override
639 	{
640 		// The actual precedence of a ternary conditional operator is 15, but
641 		// its associativity is the opposite way around to the other operators,
642 		// so we fudge it slightly.
643 		return 3;
644 	}
645 #ifndef NDEBUG
646 	void dump_impl() override
647 	{
648 		cond->dump();
649 		std::cerr << " ? ";
650 		lhs->dump();
651 		std::cerr << " : ";
652 		rhs->dump();
653 	}
654 #endif
655 	public:
656 	ternary_conditional_operator(source_location sl,
657 	                             expression_ptr c,
658 	                             expression_ptr l,
659 	                             expression_ptr r) :
660 		expression(sl), cond(std::move(c)), lhs(std::move(l)),
661 		rhs(std::move(r)) {}
662 };
663 
664 template<typename T>
665 struct lshift
666 {
667 	constexpr T operator()(const T &lhs, const T &rhs) const
668 	{
669 		return lhs << rhs;
670 	}
671 };
672 template<typename T>
673 struct rshift
674 {
675 	constexpr T operator()(const T &lhs, const T &rhs) const
676 	{
677 		return lhs >> rhs;
678 	}
679 };
680 template<typename T>
681 struct unary_plus
682 {
683 	constexpr T operator()(const T &val) const
684 	{
685 		return +val;
686 	}
687 };
688 // TODO: Replace with std::bit_not once we can guarantee C++14 as a baseline.
689 template<typename T>
690 struct bit_not
691 {
692 	constexpr T operator()(const T &val) const
693 	{
694 		return ~val;
695 	}
696 };
697 
698 template<typename T>
699 struct divmod : public binary_operator<5, T>
700 {
701 	using binary_operator<5, T>::binary_operator;
702 	using typename binary_operator_base::result;
703 	result operator()() override
704 	{
705 		result r = (*binary_operator_base::rhs)();
706 		if (r.second && (r.first == 0))
707 		{
708 			expression::loc.report_error("Division by zero");
709 			return {0, false};
710 		}
711 		return binary_operator<5, T>::operator()();
712 	}
713 };
714 
715 } // anonymous namespace
716 
717 
718 expression_ptr text_input_buffer::parse_binary_expression(expression_ptr lhs)
719 {
720 	next_token();
721 	binary_operator_base *expr = nullptr;
722 	char op = *(*this);
723 	source_location l = location();
724 	switch (op)
725 	{
726 		default:
727 			return lhs;
728 		case '+':
729 			expr = new binary_operator<6, std::plus<valty>>(l, "+");
730 			break;
731 		case '-':
732 			expr = new binary_operator<6, std::minus<valty>>(l, "-");
733 			break;
734 		case '%':
735 			expr = new divmod<std::modulus<valty>>(l, "/");
736 			break;
737 		case '*':
738 			expr = new binary_operator<5, std::multiplies<valty>>(l, "*");
739 			break;
740 		case '/':
741 			expr = new divmod<std::divides<valty>>(l, "/");
742 			break;
743 		case '<':
744 			switch (peek())
745 			{
746 				default:
747 					parse_error("Invalid operator");
748 					return nullptr;
749 				case ' ':
750 				case '(':
751 				case '0'...'9':
752 					expr = new binary_operator<8, std::less<valty>>(l, "<");
753 					break;
754 				case '=':
755 					++(*this);
756 					expr = new binary_operator<8, std::less_equal<valty>>(l, "<=");
757 					break;
758 				case '<':
759 					++(*this);
760 					expr = new binary_operator<7, lshift<valty>>(l, "<<");
761 					break;
762 			}
763 			break;
764 		case '>':
765 			switch (peek())
766 			{
767 				default:
768 					parse_error("Invalid operator");
769 					return nullptr;
770 				case '(':
771 				case ' ':
772 				case '0'...'9':
773 					expr = new binary_operator<8, std::greater<valty>>(l, ">");
774 					break;
775 				case '=':
776 					++(*this);
777 					expr = new binary_operator<8, std::greater_equal<valty>>(l, ">=");
778 					break;
779 				case '>':
780 					++(*this);
781 					expr = new binary_operator<7, rshift<valty>>(l, ">>");
782 					break;
783 					return lhs;
784 			}
785 			break;
786 		case '=':
787 			if (peek() != '=')
788 			{
789 				parse_error("Invalid operator");
790 				return nullptr;
791 			}
792 			expr = new binary_operator<9, std::equal_to<valty>>(l, "==");
793 			break;
794 		case '!':
795 			if (peek() != '=')
796 			{
797 				parse_error("Invalid operator");
798 				return nullptr;
799 			}
800 			cursor++;
801 			expr = new binary_operator<9, std::not_equal_to<valty>>(l, "!=");
802 			break;
803 		case '&':
804 			if (peek() == '&')
805 			{
806 				expr = new binary_operator<13, std::logical_and<valty>>(l, "&&");
807 			}
808 			else
809 			{
810 				expr = new binary_operator<10, std::bit_and<valty>>(l, "&");
811 			}
812 			break;
813 		case '|':
814 			if (peek() == '|')
815 			{
816 				expr = new binary_operator<12, std::logical_or<valty>>(l, "||");
817 			}
818 			else
819 			{
820 				expr = new binary_operator<14, std::bit_or<valty>>(l, "|");
821 			}
822 			break;
823 		case '?':
824 		{
825 			consume('?');
826 			expression_ptr true_case = parse_expression();
827 			next_token();
828 			if (!true_case || !consume(':'))
829 			{
830 				parse_error("Expected : in ternary conditional operator");
831 				return nullptr;
832 			}
833 			expression_ptr false_case = parse_expression();
834 			if (!false_case)
835 			{
836 				parse_error("Expected false condition for ternary operator");
837 				return nullptr;
838 			}
839 			return expression_ptr(new ternary_conditional_operator(l, std::move(lhs),
840 						std::move(true_case), std::move(false_case)));
841 		}
842 	}
843 	++(*this);
844 	next_token();
845 	expression_ptr e(expr);
846 	expression_ptr rhs(parse_expression());
847 	if (!rhs)
848 	{
849 		return nullptr;
850 	}
851 	expr->lhs = std::move(lhs);
852 	if (rhs->precedence() < expr->precedence())
853 	{
854 		expr->rhs = std::move(rhs);
855 	}
856 	else
857 	{
858 		// If we're a normal left-to-right expression, then we need to insert
859 		// this as the far-left child node of the rhs expression
860 		binary_operator_base *rhs_op =
861 			static_cast<binary_operator_base*>(rhs.get());
862 		rhs_op->insert_left(expr);
863 		e.release();
864 		return rhs;
865 	}
866 	return e;
867 }
868 
869 expression_ptr text_input_buffer::parse_expression(bool stopAtParen)
870 {
871 	next_token();
872 	unsigned long long leftVal;
873 	expression_ptr lhs;
874 	source_location l = location();
875 	switch (*(*this))
876 	{
877 		case '0'...'9':
878 			if (!consume_integer(leftVal))
879 			{
880 				return nullptr;
881 			}
882 			lhs.reset(new terminal_expr(l, leftVal));
883 			break;
884 		case '(':
885 		{
886 			consume('(');
887 			expression_ptr &&subexpr = parse_expression();
888 			if (!subexpr)
889 			{
890 				return nullptr;
891 			}
892 			lhs.reset(new paren_expression(l, std::move(subexpr)));
893 			if (!consume(')'))
894 			{
895 				return nullptr;
896 			}
897 			if (stopAtParen)
898 			{
899 				return lhs;
900 			}
901 			break;
902 		}
903 		case '+':
904 		{
905 			consume('+');
906 			expression_ptr &&subexpr = parse_expression();
907 			if (!subexpr)
908 			{
909 				return nullptr;
910 			}
911 			lhs.reset(new unary_operator<'+', unary_plus<valty>>(l, std::move(subexpr)));
912 			break;
913 		}
914 		case '-':
915 		{
916 			consume('-');
917 			expression_ptr &&subexpr = parse_expression();
918 			if (!subexpr)
919 			{
920 				return nullptr;
921 			}
922 			lhs.reset(new unary_operator<'-', std::negate<valty>>(l, std::move(subexpr)));
923 			break;
924 		}
925 		case '!':
926 		{
927 			consume('!');
928 			expression_ptr &&subexpr = parse_expression();
929 			if (!subexpr)
930 			{
931 				return nullptr;
932 			}
933 			lhs.reset(new unary_operator<'!', std::logical_not<valty>>(l, std::move(subexpr)));
934 			break;
935 		}
936 		case '~':
937 		{
938 			consume('~');
939 			expression_ptr &&subexpr = parse_expression();
940 			if (!subexpr)
941 			{
942 				return nullptr;
943 			}
944 			lhs.reset(new unary_operator<'~', bit_not<valty>>(l, std::move(subexpr)));
945 			break;
946 		}
947 	}
948 	if (!lhs)
949 	{
950 		return nullptr;
951 	}
952 	return parse_binary_expression(std::move(lhs));
953 }
954 
955 bool
956 text_input_buffer::consume_integer_expression(unsigned long long &outInt)
957 {
958 	switch (*(*this))
959 	{
960 		case '(':
961 		{
962 			expression_ptr e(parse_expression(true));
963 			if (!e)
964 			{
965 				return false;
966 			}
967 			auto r = (*e)();
968 			if (r.second)
969 			{
970 				outInt = r.first;
971 				return true;
972 			}
973 			return false;
974 		}
975 		case '0'...'9':
976 			return consume_integer(outInt);
977 		default:
978 			return false;
979 	}
980 }
981 
982 bool
983 input_buffer::consume_hex_byte(uint8_t &outByte)
984 {
985 	if (!ishexdigit((*this)[0]) && !ishexdigit((*this)[1]))
986 	{
987 		return false;
988 	}
989 	outByte = (digittoint((*this)[0]) << 4) | digittoint((*this)[1]);
990 	cursor += 2;
991 	return true;
992 }
993 
994 text_input_buffer&
995 text_input_buffer::next_token()
996 {
997 	auto &self = *this;
998 	int start;
999 	do {
1000 		start = cursor;
1001 		skip_spaces();
1002 		if (finished())
1003 		{
1004 			return self;
1005 		}
1006 		// Parse /* comments
1007 		if (*self == '/' && peek() == '*')
1008 		{
1009 			// eat the start of the comment
1010 			++self;
1011 			++self;
1012 			do {
1013 				// Find the ending * of */
1014 				while ((*self != '\0') && (*self != '*') && !finished())
1015 				{
1016 					++self;
1017 				}
1018 				// Eat the *
1019 				++self;
1020 			} while ((*self != '\0') && (*self != '/') && !finished());
1021 			// Eat the /
1022 			++self;
1023 		}
1024 		// Parse // comments
1025 		if ((*self == '/' && peek() == '/'))
1026 		{
1027 			// eat the start of the comment
1028 			++self;
1029 			++self;
1030 			// Find the ending of the line
1031 			while (*self != '\n' && !finished())
1032 			{
1033 				++self;
1034 			}
1035 			// Eat the \n
1036 			++self;
1037 		}
1038 	} while (start != cursor);
1039 	return self;
1040 }
1041 
1042 void
1043 text_input_buffer::parse_error(const char *msg)
1044 {
1045 	if (input_stack.empty())
1046 	{
1047 		fprintf(stderr, "Error: %s\n", msg);
1048 		return;
1049 	}
1050 	input_buffer &b = *input_stack.top();
1051 	parse_error(msg, b, b.cursor);
1052 }
1053 void
1054 text_input_buffer::parse_error(const char *msg,
1055                                input_buffer &b,
1056                                int loc)
1057 {
1058 	int line_count = 1;
1059 	int line_start = 0;
1060 	int line_end = loc;
1061 	if (loc < 0 || loc > b.size)
1062 	{
1063 		return;
1064 	}
1065 	for (int i=loc ; i>0 ; --i)
1066 	{
1067 		if (b.buffer[i] == '\n')
1068 		{
1069 			line_count++;
1070 			if (line_start == 0)
1071 			{
1072 				line_start = i+1;
1073 			}
1074 		}
1075 	}
1076 	for (int i=loc+1 ; i<b.size ; ++i)
1077 	{
1078 		if (b.buffer[i] == '\n')
1079 		{
1080 			line_end = i;
1081 			break;
1082 		}
1083 	}
1084 	fprintf(stderr, "Error at %s:%d:%d: %s\n", b.filename().c_str(), line_count, loc - line_start, msg);
1085 	fwrite(&b.buffer[line_start], line_end-line_start, 1, stderr);
1086 	putc('\n', stderr);
1087 	for (int i=0 ; i<(loc-line_start) ; ++i)
1088 	{
1089 		char c = (b.buffer[i+line_start] == '\t') ? '\t' : ' ';
1090 		putc(c, stderr);
1091 	}
1092 	putc('^', stderr);
1093 	putc('\n', stderr);
1094 }
1095 #ifndef NDEBUG
1096 void
1097 input_buffer::dump()
1098 {
1099 	fprintf(stderr, "Current cursor: %d\n", cursor);
1100 	fwrite(&buffer[cursor], size-cursor, 1, stderr);
1101 }
1102 #endif
1103 
1104 
1105 namespace
1106 {
1107 /**
1108  * The source files are ASCII, so we provide a non-locale-aware version of
1109  * isalpha.  This is a class so that it can be used with a template function
1110  * for parsing strings.
1111  */
1112 struct is_alpha
1113 {
1114 	static inline bool check(const char c)
1115 	{
1116 		return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') &&
1117 			(c <= 'Z'));
1118 	}
1119 };
1120 /**
1121  * Check whether a character is in the set allowed for node names.  This is a
1122  * class so that it can be used with a template function for parsing strings.
1123  */
1124 struct is_node_name_character
1125 {
1126 	static inline bool check(const char c)
1127 	{
1128 		switch(c)
1129 		{
1130 			default:
1131 				return false;
1132 			case 'a'...'z': case 'A'...'Z': case '0'...'9':
1133 			case ',': case '.': case '+': case '-':
1134 			case '_':
1135 				return true;
1136 		}
1137 	}
1138 };
1139 /**
1140  * Check whether a character is in the set allowed for property names.  This is
1141  * a class so that it can be used with a template function for parsing strings.
1142  */
1143 struct is_property_name_character
1144 {
1145 	static inline bool check(const char c)
1146 	{
1147 		switch(c)
1148 		{
1149 			default:
1150 				return false;
1151 			case 'a'...'z': case 'A'...'Z': case '0'...'9':
1152 			case ',': case '.': case '+': case '-':
1153 			case '_': case '#':
1154 				return true;
1155 		}
1156 	}
1157 };
1158 
1159 template<class T>
1160 string parse(text_input_buffer &s)
1161 {
1162 	std::vector<char> bytes;
1163 	for (char c=*s ; T::check(c) ; c=*(++s))
1164 	{
1165 		bytes.push_back(c);
1166 	}
1167 	return string(bytes.begin(), bytes.end());
1168 }
1169 
1170 }
1171 
1172 string
1173 text_input_buffer::parse_node_name()
1174 {
1175 	return parse<is_node_name_character>(*this);
1176 }
1177 
1178 string
1179 text_input_buffer::parse_property_name()
1180 {
1181 	return parse<is_property_name_character>(*this);
1182 }
1183 
1184 string
1185 text_input_buffer::parse_node_or_property_name(bool &is_property)
1186 {
1187 	if (is_property)
1188 	{
1189 		return parse_property_name();
1190 	}
1191 	std::vector<char> bytes;
1192 	for (char c=*(*this) ; is_node_name_character::check(c) ; c=*(++(*this)))
1193 	{
1194 		bytes.push_back(c);
1195 	}
1196 	for (char c=*(*this) ; is_property_name_character::check(c) ; c=*(++(*this)))
1197 	{
1198 		bytes.push_back(c);
1199 		is_property = true;
1200 	}
1201 	return string(bytes.begin(), bytes.end());
1202 }
1203 
1204 string
1205 input_buffer::parse_to(char stop)
1206 {
1207 	std::vector<char> bytes;
1208 	for (char c=*(*this) ; c != stop ; c=*(++(*this)))
1209 	{
1210 		bytes.push_back(c);
1211 	}
1212 	return string(bytes.begin(), bytes.end());
1213 }
1214 
1215 string
1216 text_input_buffer::parse_to(char stop)
1217 {
1218 	std::vector<char> bytes;
1219 	for (char c=*(*this) ; c != stop ; c=*(++(*this)))
1220 	{
1221 		if (finished())
1222 		{
1223 			break;
1224 		}
1225 		bytes.push_back(c);
1226 	}
1227 	return string(bytes.begin(), bytes.end());
1228 }
1229 
1230 char
1231 text_input_buffer::peek()
1232 {
1233 	return (*input_stack.top())[1];
1234 }
1235 
1236 std::unique_ptr<input_buffer>
1237 input_buffer::buffer_for_file(const string &path, bool warn)
1238 {
1239 	if (path == "-")
1240 	{
1241 		std::unique_ptr<input_buffer> b(new stream_input_buffer());
1242 		return b;
1243 	}
1244 	int source = open(path.c_str(), O_RDONLY);
1245 	if (source == -1)
1246 	{
1247 		if (warn)
1248 		{
1249 			fprintf(stderr, "Unable to open file '%s'.  %s\n", path.c_str(), strerror(errno));
1250 		}
1251 		return 0;
1252 	}
1253 	struct stat st;
1254 	if (fstat(source, &st) == 0 && S_ISDIR(st.st_mode))
1255 	{
1256 		if (warn)
1257 		{
1258 			fprintf(stderr, "File %s is a directory\n", path.c_str());
1259 		}
1260 		close(source);
1261 		return 0;
1262 	}
1263 	std::unique_ptr<input_buffer> b(new mmap_input_buffer(source, string(path)));
1264 	close(source);
1265 	return b;
1266 }
1267 
1268 } // namespace dtc
1269 
1270