xref: /freebsd/usr.bin/dtc/input_buffer.cc (revision d5b0e70f7e04d971691517ce1304d86a1e367e2e)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2013 David Chisnall
5  * All rights reserved.
6  *
7  * This software was developed by SRI International and the University of
8  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
9  * ("CTSRD"), as part of the DARPA CRASH research programme.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $FreeBSD$
33  */
34 
35 #include "input_buffer.hh"
36 #include <ctype.h>
37 #include <errno.h>
38 #include <stdint.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <functional>
43 #ifndef NDEBUG
44 #include <iostream>
45 #endif
46 #include <limits>
47 
48 
49 #include <sys/stat.h>
50 #include <sys/mman.h>
51 #include <assert.h>
52 #include <fcntl.h>
53 #include <unistd.h>
54 
55 #ifndef MAP_PREFAULT_READ
56 #define MAP_PREFAULT_READ 0
57 #endif
58 
59 using std::string;
60 
61 namespace
62 {
63 /**
64  * Subclass of input_buffer that mmap()s a file and owns the resulting memory.
65  * When this object is destroyed, the memory is unmapped.
66  */
67 struct mmap_input_buffer : public dtc::input_buffer
68 {
69 	string fn;
70 	const string &filename() const override
71 	{
72 		return fn;
73 	}
74 	/**
75 	 * Constructs a new buffer from the file passed in as a file
76 	 * descriptor.
77 	 */
78 	mmap_input_buffer(int fd, string &&filename);
79 	/**
80 	 * Unmaps the buffer, if one exists.
81 	 */
82 	virtual ~mmap_input_buffer();
83 };
84 /**
85  * Input buffer read from standard input.  This is used for reading device tree
86  * blobs and source from standard input.  It reads the entire input into
87  * malloc'd memory, so will be very slow for large inputs.  DTS and DTB files
88  * are very rarely more than 10KB though, so this is probably not a problem.
89  */
90 struct stream_input_buffer : public dtc::input_buffer
91 {
92 	const string &filename() const override
93 	{
94 		static string n = "<standard input>";
95 		return n;
96 	}
97 	/**
98 	 * The buffer that will store the data read from the standard input.
99 	 */
100 	std::vector<char> b;
101 	/**
102 	 * Constructs a new buffer from the standard input.
103 	 */
104 	stream_input_buffer();
105 };
106 
107 mmap_input_buffer::mmap_input_buffer(int fd, string &&filename)
108 	: input_buffer(0, 0), fn(filename)
109 {
110 	struct stat sb;
111 	if (fstat(fd, &sb))
112 	{
113 		perror("Failed to stat file");
114 	}
115 	size = sb.st_size;
116 	buffer = (const char*)mmap(0, size, PROT_READ, MAP_PRIVATE |
117 			MAP_PREFAULT_READ, fd, 0);
118 	if (buffer == MAP_FAILED)
119 	{
120 		perror("Failed to mmap file");
121 		exit(EXIT_FAILURE);
122 	}
123 }
124 
125 mmap_input_buffer::~mmap_input_buffer()
126 {
127 	if (buffer != 0)
128 	{
129 		munmap(const_cast<char*>(buffer), size);
130 	}
131 }
132 
133 stream_input_buffer::stream_input_buffer() : input_buffer(0, 0)
134 {
135 	int c;
136 	while ((c = fgetc(stdin)) != EOF)
137 	{
138 		b.push_back(c);
139 	}
140 	buffer = b.data();
141 	size = b.size();
142 }
143 
144 } // Anonymous namespace
145 
146 
147 namespace dtc
148 {
149 
150 void
151 input_buffer::skip_to(char c)
152 {
153 	while ((cursor < size) && (buffer[cursor] != c))
154 	{
155 		cursor++;
156 	}
157 }
158 
159 void
160 text_input_buffer::skip_to(char c)
161 {
162 	while (!finished() && (*(*this) != c))
163 	{
164 		++(*this);
165 	}
166 }
167 
168 void
169 text_input_buffer::skip_spaces()
170 {
171 	if (finished()) { return; }
172 	char c = *(*this);
173 	bool last_nl = false;
174 	while ((c == ' ') || (c == '\t') || (c == '\n') || (c == '\f')
175 	       || (c == '\v') || (c == '\r'))
176 	{
177 		last_nl = ((c == '\n') || (c == '\r'));
178 		++(*this);
179 		if (finished())
180 		{
181 			c = '\0';
182 		}
183 		else
184 		{
185 			c = *(*this);
186 		}
187 	}
188 	// Skip C preprocessor leftovers
189 	if ((c == '#') && ((cursor == 0) || last_nl))
190 	{
191 		skip_to('\n');
192 		skip_spaces();
193 	}
194 	if (consume("/include/"))
195 	{
196 		handle_include();
197 		skip_spaces();
198 	}
199 }
200 
201 void
202 text_input_buffer::handle_include()
203 {
204 	bool reallyInclude = true;
205 	if (consume("if "))
206 	{
207 		next_token();
208 		string name = parse_property_name();
209 		if (defines.count(name) == 0)
210 		{
211 			reallyInclude = false;
212 		}
213 		consume('/');
214 	}
215 	next_token();
216 	if (!consume('"'))
217 	{
218 		parse_error("Expected quoted filename");
219 		return;
220 	}
221 	auto loc = location();
222 	string file = parse_to('"');
223 	consume('"');
224 	if (!reallyInclude)
225 	{
226 		return;
227 	}
228 	string include_file = dir + '/' + file;
229 	auto include_buffer = input_buffer::buffer_for_file(include_file, false);
230 	if (include_buffer == 0)
231 	{
232 		for (auto i : include_paths)
233 		{
234 			include_file = i + '/' + file;
235 			include_buffer = input_buffer::buffer_for_file(include_file, false);
236 			if (include_buffer != 0)
237 			{
238 				break;
239 			}
240 		}
241 	}
242 	if (depfile)
243 	{
244 		putc(' ', depfile);
245 		fputs(include_file.c_str(), depfile);
246 	}
247 	if (!include_buffer)
248 	{
249 		loc.report_error("Unable to locate input file");
250 		return;
251 	}
252 	input_stack.push(std::move(include_buffer));
253 }
254 
255 bool text_input_buffer::read_binary_file(const std::string &filename, byte_buffer &b)
256 {
257 	bool try_include_paths = true;
258 	string include_file;
259 	if (filename[0] == '/')
260 	{
261 		include_file = filename;
262 		// Don't try include paths if we're given an absolute path.
263 		// Failing is better so that we don't accidentally do the wrong thing,
264 		// but make it seem like everything is alright.
265 		try_include_paths = false;
266 	}
267 	else
268 	{
269 		include_file = dir + '/' + filename;
270 	}
271 	auto include_buffer = input_buffer::buffer_for_file(include_file, false);
272 	if (include_buffer == 0 && try_include_paths)
273 	{
274 		for (auto i : include_paths)
275 		{
276 			include_file = i + '/' + filename;
277 			include_buffer = input_buffer::buffer_for_file(include_file, false);
278 			if (include_buffer != 0)
279 			{
280 				break;
281 			}
282 		}
283 	}
284 	if (!include_buffer)
285 	{
286 		return false;
287 	}
288 	if (depfile)
289 	{
290 		putc(' ', depfile);
291 		fputs(include_file.c_str(), depfile);
292 	}
293 	b.insert(b.begin(), include_buffer->begin(), include_buffer->end());
294 	return true;
295 }
296 
297 input_buffer
298 input_buffer::buffer_from_offset(int offset, int s)
299 {
300 	if (offset < 0)
301 	{
302 		return input_buffer();
303 	}
304 	if (s == 0)
305 	{
306 		s = size - offset;
307 	}
308 	if (offset > size)
309 	{
310 		return input_buffer();
311 	}
312 	if (s > (size-offset))
313 	{
314 		return input_buffer();
315 	}
316 	return input_buffer(&buffer[offset], s);
317 }
318 
319 bool
320 input_buffer::consume(const char *str)
321 {
322 	int len = strlen(str);
323 	if (len > size - cursor)
324 	{
325 		return false;
326 	}
327 	else
328 	{
329 		for (int i=0 ; i<len ; ++i)
330 		{
331 			if (str[i] != (*this)[i])
332 			{
333 				return false;
334 			}
335 		}
336 		cursor += len;
337 		return true;
338 	}
339 	return false;
340 }
341 
342 bool
343 input_buffer::consume_integer(unsigned long long &outInt)
344 {
345 	// The first character must be a digit.  Hex and octal strings
346 	// are prefixed by 0 and 0x, respectively.
347 	if (!isdigit((*this)[0]))
348 	{
349 		return false;
350 	}
351 	char *end= const_cast<char*>(&buffer[size]);
352 	errno = 0;
353 	outInt = strtoull(&buffer[cursor], &end, 0);
354 	if (end == &buffer[cursor] ||
355 	    (outInt == std::numeric_limits<unsigned long long>::max() &&
356 	     errno == ERANGE))
357 	{
358 		return false;
359 	}
360 	cursor = end - buffer;
361 	return true;
362 }
363 
364 namespace {
365 
366 /**
367  * Convenience typedef for the type that we use for all values.
368  */
369 typedef unsigned long long valty;
370 
371 /**
372  * Expression tree currently being parsed.
373  */
374 struct expression
375 {
376 	typedef text_input_buffer::source_location source_location;
377 	/**
378 	 * The type that is returned when computing the result.  The boolean value
379 	 * indicates whether this is a valid expression.
380 	 *
381 	 * FIXME: Once we can use C++17, this should be `std::optional`.
382 	 */
383 	typedef std::pair<valty, bool> result;
384 	/**
385 	 * Evaluate this node, taking into account operator precedence.
386 	 */
387 	virtual result operator()() = 0;
388 	/**
389 	 * Returns the precedence of this node.  Lower values indicate higher
390 	 * precedence.
391 	 */
392 	virtual int precedence() = 0;
393 	/**
394 	 * Constructs an expression, storing the location where it was created.
395 	 */
396 	expression(source_location l) : loc(l) {}
397 	virtual ~expression() {}
398 #ifndef NDEBUG
399 	/**
400 	 * Dumps this expression to `std::cerr`, appending a newline if `nl` is
401 	 * `true`.
402 	 */
403 	void dump(bool nl=false)
404 	{
405 		void *ptr = this;
406 		if (ptr == nullptr)
407 		{
408 			std::cerr << "{nullptr}\n";
409 			return;
410 		}
411 		dump_impl();
412 		if (nl)
413 		{
414 			std::cerr << '\n';
415 		}
416 	}
417 	private:
418 	/**
419 	 * Method that sublcasses override to implement the behaviour of `dump()`.
420 	 */
421 	virtual void dump_impl() = 0;
422 #endif
423 	protected:
424 	source_location loc;
425 };
426 
427 /**
428  * Expression wrapping a single integer.  Leaf nodes in the expression tree.
429  */
430 class terminal_expr : public expression
431 {
432 	/**
433 	 * The value that this wraps.
434 	 */
435 	valty val;
436 	/**
437 	 * Evaluate.  Trivially returns the value that this class wraps.
438 	 */
439 	result operator()() override
440 	{
441 		return {val, true};
442 	}
443 	int precedence() override
444 	{
445 		return 0;
446 	}
447 	public:
448 	/**
449 	 * Constructor.
450 	 */
451 	terminal_expr(source_location l, valty v) : expression(l), val(v) {}
452 #ifndef NDEBUG
453 	void dump_impl() override { std::cerr << val; }
454 #endif
455 };
456 
457 /**
458  * Parenthetical expression.  Exists to make the contents opaque.
459  */
460 struct paren_expression : public expression
461 {
462 	/**
463 	 * The expression within the parentheses.
464 	 */
465 	expression_ptr subexpr;
466 	/**
467 	 * Constructor.  Takes the child expression as the only argument.
468 	 */
469 	paren_expression(source_location l, expression_ptr p) : expression(l),
470 	subexpr(std::move(p)) {}
471 	int precedence() override
472 	{
473 		return 0;
474 	}
475 	/**
476 	 * Evaluate - just forwards to the underlying expression.
477 	 */
478 	result operator()() override
479 	{
480 		return (*subexpr)();
481 	}
482 #ifndef NDEBUG
483 	void dump_impl() override
484 	{
485 		std::cerr << " (";
486 		subexpr->dump();
487 		std::cerr << ") ";
488 	}
489 #endif
490 };
491 
492 /**
493  * Template class for unary operators.  The `OpChar` template parameter is
494  * solely for debugging and makes it easy to print the expression.  The `Op`
495  * template parameter is a function object that implements the operator that
496  * this class provides.  Most of these are provided by the `<functional>`
497  * header.
498  */
499 template<char OpChar, class Op>
500 class unary_operator : public expression
501 {
502 	/**
503 	 * The subexpression for this unary operator.
504 	 */
505 	expression_ptr subexpr;
506 	result operator()() override
507 	{
508 		Op op;
509 		result s = (*subexpr)();
510 		if (!s.second)
511 		{
512 			return s;
513 		}
514 		return {op(s.first), true};
515 	}
516 	/**
517 	 * All unary operators have the same precedence.  They are all evaluated
518 	 * before binary expressions, but after parentheses.
519 	 */
520 	int precedence() override
521 	{
522 		return 3;
523 	}
524 	public:
525 	unary_operator(source_location l, expression_ptr p) :
526 		expression(l), subexpr(std::move(p)) {}
527 #ifndef NDEBUG
528 	void dump_impl() override
529 	{
530 		std::cerr << OpChar;
531 		subexpr->dump();
532 	}
533 #endif
534 };
535 
536 /**
537  * Abstract base class for binary operators.  Allows the tree to be modified
538  * without knowing what the operations actually are.
539  */
540 struct binary_operator_base : public expression
541 {
542 	using expression::expression;
543 	/**
544 	 * The left side of the expression.
545 	 */
546 	expression_ptr lhs;
547 	/**
548 	 * The right side of the expression.
549 	 */
550 	expression_ptr rhs;
551 	/**
552 	 * Insert a node somewhere down the path of left children, until it would
553 	 * be preempting something that should execute first.
554 	 */
555 	void insert_left(binary_operator_base *new_left)
556 	{
557 		if (lhs->precedence() < new_left->precedence())
558 		{
559 			new_left->rhs = std::move(lhs);
560 			lhs.reset(new_left);
561 		}
562 		else
563 		{
564 			static_cast<binary_operator_base*>(lhs.get())->insert_left(new_left);
565 		}
566 	}
567 };
568 
569 /**
570  * Template class for binary operators.  The precedence and the operation are
571  * provided as template parameters.
572  */
573 template<int Precedence, class Op>
574 struct binary_operator : public binary_operator_base
575 {
576 	result operator()() override
577 	{
578 		Op op;
579 		result l = (*lhs)();
580 		result r = (*rhs)();
581 		if (!(l.second && r.second))
582 		{
583 			return {0, false};
584 		}
585 		return {op(l.first, r.first), true};
586 	}
587 	int precedence() override
588 	{
589 		return Precedence;
590 	}
591 #ifdef NDEBUG
592 	/**
593 	 * Constructor.  Takes the name of the operator as an argument, for
594 	 * debugging.  Only stores it in debug mode.
595 	 */
596 	binary_operator(source_location l, const char *) :
597 		binary_operator_base(l) {}
598 #else
599 	const char *opName;
600 	binary_operator(source_location l, const char *o) :
601 		binary_operator_base(l), opName(o) {}
602 	void dump_impl() override
603 	{
604 		lhs->dump();
605 		std::cerr << opName;
606 		rhs->dump();
607 	}
608 #endif
609 };
610 
611 /**
612  * Ternary conditional operators (`cond ? true : false`) are a special case -
613  * there are no other ternary operators.
614  */
615 class ternary_conditional_operator : public expression
616 {
617 	/**
618 	 * The condition for the clause.
619 	 */
620 	expression_ptr cond;
621 	/**
622 	 * The expression that this evaluates to if the condition is true.
623 	 */
624 	expression_ptr lhs;
625 	/**
626 	 * The expression that this evaluates to if the condition is false.
627 	 */
628 	expression_ptr rhs;
629 	result operator()() override
630 	{
631 		result c = (*cond)();
632 		result l = (*lhs)();
633 		result r = (*rhs)();
634 		if (!(l.second && r.second && c.second))
635 		{
636 			return {0, false};
637 		}
638 		return c.first ? l : r;
639 	}
640 	int precedence() override
641 	{
642 		// The actual precedence of a ternary conditional operator is 15, but
643 		// its associativity is the opposite way around to the other operators,
644 		// so we fudge it slightly.
645 		return 3;
646 	}
647 #ifndef NDEBUG
648 	void dump_impl() override
649 	{
650 		cond->dump();
651 		std::cerr << " ? ";
652 		lhs->dump();
653 		std::cerr << " : ";
654 		rhs->dump();
655 	}
656 #endif
657 	public:
658 	ternary_conditional_operator(source_location sl,
659 	                             expression_ptr c,
660 	                             expression_ptr l,
661 	                             expression_ptr r) :
662 		expression(sl), cond(std::move(c)), lhs(std::move(l)),
663 		rhs(std::move(r)) {}
664 };
665 
666 template<typename T>
667 struct lshift
668 {
669 	constexpr T operator()(const T &lhs, const T &rhs) const
670 	{
671 		return lhs << rhs;
672 	}
673 };
674 template<typename T>
675 struct rshift
676 {
677 	constexpr T operator()(const T &lhs, const T &rhs) const
678 	{
679 		return lhs >> rhs;
680 	}
681 };
682 template<typename T>
683 struct unary_plus
684 {
685 	constexpr T operator()(const T &val) const
686 	{
687 		return +val;
688 	}
689 };
690 // TODO: Replace with std::bit_not once we can guarantee C++14 as a baseline.
691 template<typename T>
692 struct bit_not
693 {
694 	constexpr T operator()(const T &val) const
695 	{
696 		return ~val;
697 	}
698 };
699 
700 template<typename T>
701 struct divmod : public binary_operator<5, T>
702 {
703 	using binary_operator<5, T>::binary_operator;
704 	using typename binary_operator_base::result;
705 	result operator()() override
706 	{
707 		result r = (*binary_operator_base::rhs)();
708 		if (r.second && (r.first == 0))
709 		{
710 			expression::loc.report_error("Division by zero");
711 			return {0, false};
712 		}
713 		return binary_operator<5, T>::operator()();
714 	}
715 };
716 
717 } // anonymous namespace
718 
719 
720 expression_ptr text_input_buffer::parse_binary_expression(expression_ptr lhs)
721 {
722 	next_token();
723 	binary_operator_base *expr = nullptr;
724 	char op = *(*this);
725 	source_location l = location();
726 	switch (op)
727 	{
728 		default:
729 			return lhs;
730 		case '+':
731 			expr = new binary_operator<6, std::plus<valty>>(l, "+");
732 			break;
733 		case '-':
734 			expr = new binary_operator<6, std::minus<valty>>(l, "-");
735 			break;
736 		case '%':
737 			expr = new divmod<std::modulus<valty>>(l, "/");
738 			break;
739 		case '*':
740 			expr = new binary_operator<5, std::multiplies<valty>>(l, "*");
741 			break;
742 		case '/':
743 			expr = new divmod<std::divides<valty>>(l, "/");
744 			break;
745 		case '<':
746 			switch (peek())
747 			{
748 				default:
749 					parse_error("Invalid operator");
750 					return nullptr;
751 				case ' ':
752 				case '(':
753 				case '0'...'9':
754 					expr = new binary_operator<8, std::less<valty>>(l, "<");
755 					break;
756 				case '=':
757 					++(*this);
758 					expr = new binary_operator<8, std::less_equal<valty>>(l, "<=");
759 					break;
760 				case '<':
761 					++(*this);
762 					expr = new binary_operator<7, lshift<valty>>(l, "<<");
763 					break;
764 			}
765 			break;
766 		case '>':
767 			switch (peek())
768 			{
769 				default:
770 					parse_error("Invalid operator");
771 					return nullptr;
772 				case '(':
773 				case ' ':
774 				case '0'...'9':
775 					expr = new binary_operator<8, std::greater<valty>>(l, ">");
776 					break;
777 				case '=':
778 					++(*this);
779 					expr = new binary_operator<8, std::greater_equal<valty>>(l, ">=");
780 					break;
781 				case '>':
782 					++(*this);
783 					expr = new binary_operator<7, rshift<valty>>(l, ">>");
784 					break;
785 					return lhs;
786 			}
787 			break;
788 		case '=':
789 			if (peek() != '=')
790 			{
791 				parse_error("Invalid operator");
792 				return nullptr;
793 			}
794 			expr = new binary_operator<9, std::equal_to<valty>>(l, "==");
795 			break;
796 		case '!':
797 			if (peek() != '=')
798 			{
799 				parse_error("Invalid operator");
800 				return nullptr;
801 			}
802 			cursor++;
803 			expr = new binary_operator<9, std::not_equal_to<valty>>(l, "!=");
804 			break;
805 		case '&':
806 			if (peek() == '&')
807 			{
808 				expr = new binary_operator<13, std::logical_and<valty>>(l, "&&");
809 			}
810 			else
811 			{
812 				expr = new binary_operator<10, std::bit_and<valty>>(l, "&");
813 			}
814 			break;
815 		case '|':
816 			if (peek() == '|')
817 			{
818 				expr = new binary_operator<12, std::logical_or<valty>>(l, "||");
819 			}
820 			else
821 			{
822 				expr = new binary_operator<14, std::bit_or<valty>>(l, "|");
823 			}
824 			break;
825 		case '?':
826 		{
827 			consume('?');
828 			expression_ptr true_case = parse_expression();
829 			next_token();
830 			if (!true_case || !consume(':'))
831 			{
832 				parse_error("Expected : in ternary conditional operator");
833 				return nullptr;
834 			}
835 			expression_ptr false_case = parse_expression();
836 			if (!false_case)
837 			{
838 				parse_error("Expected false condition for ternary operator");
839 				return nullptr;
840 			}
841 			return expression_ptr(new ternary_conditional_operator(l, std::move(lhs),
842 						std::move(true_case), std::move(false_case)));
843 		}
844 	}
845 	++(*this);
846 	next_token();
847 	expression_ptr e(expr);
848 	expression_ptr rhs(parse_expression());
849 	if (!rhs)
850 	{
851 		return nullptr;
852 	}
853 	expr->lhs = std::move(lhs);
854 	if (rhs->precedence() < expr->precedence())
855 	{
856 		expr->rhs = std::move(rhs);
857 	}
858 	else
859 	{
860 		// If we're a normal left-to-right expression, then we need to insert
861 		// this as the far-left child node of the rhs expression
862 		binary_operator_base *rhs_op =
863 			static_cast<binary_operator_base*>(rhs.get());
864 		rhs_op->insert_left(expr);
865 		e.release();
866 		return rhs;
867 	}
868 	return e;
869 }
870 
871 expression_ptr text_input_buffer::parse_expression(bool stopAtParen)
872 {
873 	next_token();
874 	unsigned long long leftVal;
875 	expression_ptr lhs;
876 	source_location l = location();
877 	switch (*(*this))
878 	{
879 		case '0'...'9':
880 			if (!consume_integer(leftVal))
881 			{
882 				return nullptr;
883 			}
884 			lhs.reset(new terminal_expr(l, leftVal));
885 			break;
886 		case '(':
887 		{
888 			consume('(');
889 			expression_ptr &&subexpr = parse_expression();
890 			if (!subexpr)
891 			{
892 				return nullptr;
893 			}
894 			lhs.reset(new paren_expression(l, std::move(subexpr)));
895 			if (!consume(')'))
896 			{
897 				return nullptr;
898 			}
899 			if (stopAtParen)
900 			{
901 				return lhs;
902 			}
903 			break;
904 		}
905 		case '+':
906 		{
907 			consume('+');
908 			expression_ptr &&subexpr = parse_expression();
909 			if (!subexpr)
910 			{
911 				return nullptr;
912 			}
913 			lhs.reset(new unary_operator<'+', unary_plus<valty>>(l, std::move(subexpr)));
914 			break;
915 		}
916 		case '-':
917 		{
918 			consume('-');
919 			expression_ptr &&subexpr = parse_expression();
920 			if (!subexpr)
921 			{
922 				return nullptr;
923 			}
924 			lhs.reset(new unary_operator<'-', std::negate<valty>>(l, std::move(subexpr)));
925 			break;
926 		}
927 		case '!':
928 		{
929 			consume('!');
930 			expression_ptr &&subexpr = parse_expression();
931 			if (!subexpr)
932 			{
933 				return nullptr;
934 			}
935 			lhs.reset(new unary_operator<'!', std::logical_not<valty>>(l, std::move(subexpr)));
936 			break;
937 		}
938 		case '~':
939 		{
940 			consume('~');
941 			expression_ptr &&subexpr = parse_expression();
942 			if (!subexpr)
943 			{
944 				return nullptr;
945 			}
946 			lhs.reset(new unary_operator<'~', bit_not<valty>>(l, std::move(subexpr)));
947 			break;
948 		}
949 	}
950 	if (!lhs)
951 	{
952 		return nullptr;
953 	}
954 	return parse_binary_expression(std::move(lhs));
955 }
956 
957 bool
958 text_input_buffer::consume_integer_expression(unsigned long long &outInt)
959 {
960 	switch (*(*this))
961 	{
962 		case '(':
963 		{
964 			expression_ptr e(parse_expression(true));
965 			if (!e)
966 			{
967 				return false;
968 			}
969 			auto r = (*e)();
970 			if (r.second)
971 			{
972 				outInt = r.first;
973 				return true;
974 			}
975 			return false;
976 		}
977 		case '0'...'9':
978 			return consume_integer(outInt);
979 		default:
980 			return false;
981 	}
982 }
983 
984 bool
985 input_buffer::consume_hex_byte(uint8_t &outByte)
986 {
987 	if (!ishexdigit((*this)[0]) && !ishexdigit((*this)[1]))
988 	{
989 		return false;
990 	}
991 	outByte = (digittoint((*this)[0]) << 4) | digittoint((*this)[1]);
992 	cursor += 2;
993 	return true;
994 }
995 
996 text_input_buffer&
997 text_input_buffer::next_token()
998 {
999 	auto &self = *this;
1000 	int start;
1001 	do {
1002 		start = cursor;
1003 		skip_spaces();
1004 		if (finished())
1005 		{
1006 			return self;
1007 		}
1008 		// Parse /* comments
1009 		if (*self == '/' && peek() == '*')
1010 		{
1011 			// eat the start of the comment
1012 			++self;
1013 			++self;
1014 			do {
1015 				// Find the ending * of */
1016 				while ((*self != '\0') && (*self != '*') && !finished())
1017 				{
1018 					++self;
1019 				}
1020 				// Eat the *
1021 				++self;
1022 			} while ((*self != '\0') && (*self != '/') && !finished());
1023 			// Eat the /
1024 			++self;
1025 		}
1026 		// Parse // comments
1027 		if ((*self == '/' && peek() == '/'))
1028 		{
1029 			// eat the start of the comment
1030 			++self;
1031 			++self;
1032 			// Find the ending of the line
1033 			while (*self != '\n' && !finished())
1034 			{
1035 				++self;
1036 			}
1037 			// Eat the \n
1038 			++self;
1039 		}
1040 	} while (start != cursor);
1041 	return self;
1042 }
1043 
1044 void
1045 text_input_buffer::parse_error(const char *msg)
1046 {
1047 	if (input_stack.empty())
1048 	{
1049 		fprintf(stderr, "Error: %s\n", msg);
1050 		return;
1051 	}
1052 	input_buffer &b = *input_stack.top();
1053 	parse_error(msg, b, b.cursor);
1054 }
1055 void
1056 text_input_buffer::parse_error(const char *msg,
1057                                input_buffer &b,
1058                                int loc)
1059 {
1060 	int line_count = 1;
1061 	int line_start = 0;
1062 	int line_end = loc;
1063 	if (loc < 0 || loc > b.size)
1064 	{
1065 		return;
1066 	}
1067 	for (int i=loc ; i>0 ; --i)
1068 	{
1069 		if (b.buffer[i] == '\n')
1070 		{
1071 			line_count++;
1072 			if (line_start == 0)
1073 			{
1074 				line_start = i+1;
1075 			}
1076 		}
1077 	}
1078 	for (int i=loc+1 ; i<b.size ; ++i)
1079 	{
1080 		if (b.buffer[i] == '\n')
1081 		{
1082 			line_end = i;
1083 			break;
1084 		}
1085 	}
1086 	fprintf(stderr, "Error at %s:%d:%d: %s\n", b.filename().c_str(), line_count, loc - line_start, msg);
1087 	fwrite(&b.buffer[line_start], line_end-line_start, 1, stderr);
1088 	putc('\n', stderr);
1089 	for (int i=0 ; i<(loc-line_start) ; ++i)
1090 	{
1091 		char c = (b.buffer[i+line_start] == '\t') ? '\t' : ' ';
1092 		putc(c, stderr);
1093 	}
1094 	putc('^', stderr);
1095 	putc('\n', stderr);
1096 }
1097 #ifndef NDEBUG
1098 void
1099 input_buffer::dump()
1100 {
1101 	fprintf(stderr, "Current cursor: %d\n", cursor);
1102 	fwrite(&buffer[cursor], size-cursor, 1, stderr);
1103 }
1104 #endif
1105 
1106 
1107 namespace
1108 {
1109 /**
1110  * The source files are ASCII, so we provide a non-locale-aware version of
1111  * isalpha.  This is a class so that it can be used with a template function
1112  * for parsing strings.
1113  */
1114 struct is_alpha
1115 {
1116 	static inline bool check(const char c)
1117 	{
1118 		return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') &&
1119 			(c <= 'Z'));
1120 	}
1121 };
1122 /**
1123  * Check whether a character is in the set allowed for node names.  This is a
1124  * class so that it can be used with a template function for parsing strings.
1125  */
1126 struct is_node_name_character
1127 {
1128 	static inline bool check(const char c)
1129 	{
1130 		switch(c)
1131 		{
1132 			default:
1133 				return false;
1134 			case 'a'...'z': case 'A'...'Z': case '0'...'9':
1135 			case ',': case '.': case '+': case '-':
1136 			case '_':
1137 				return true;
1138 		}
1139 	}
1140 };
1141 /**
1142  * Check whether a character is in the set allowed for property names.  This is
1143  * a class so that it can be used with a template function for parsing strings.
1144  */
1145 struct is_property_name_character
1146 {
1147 	static inline bool check(const char c)
1148 	{
1149 		switch(c)
1150 		{
1151 			default:
1152 				return false;
1153 			case 'a'...'z': case 'A'...'Z': case '0'...'9':
1154 			case ',': case '.': case '+': case '-':
1155 			case '_': case '#':
1156 				return true;
1157 		}
1158 	}
1159 };
1160 
1161 template<class T>
1162 string parse(text_input_buffer &s)
1163 {
1164 	std::vector<char> bytes;
1165 	for (char c=*s ; T::check(c) ; c=*(++s))
1166 	{
1167 		bytes.push_back(c);
1168 	}
1169 	return string(bytes.begin(), bytes.end());
1170 }
1171 
1172 }
1173 
1174 string
1175 text_input_buffer::parse_node_name()
1176 {
1177 	return parse<is_node_name_character>(*this);
1178 }
1179 
1180 string
1181 text_input_buffer::parse_property_name()
1182 {
1183 	return parse<is_property_name_character>(*this);
1184 }
1185 
1186 string
1187 text_input_buffer::parse_node_or_property_name(bool &is_property)
1188 {
1189 	if (is_property)
1190 	{
1191 		return parse_property_name();
1192 	}
1193 	std::vector<char> bytes;
1194 	for (char c=*(*this) ; is_node_name_character::check(c) ; c=*(++(*this)))
1195 	{
1196 		bytes.push_back(c);
1197 	}
1198 	for (char c=*(*this) ; is_property_name_character::check(c) ; c=*(++(*this)))
1199 	{
1200 		bytes.push_back(c);
1201 		is_property = true;
1202 	}
1203 	return string(bytes.begin(), bytes.end());
1204 }
1205 
1206 string
1207 input_buffer::parse_to(char stop)
1208 {
1209 	std::vector<char> bytes;
1210 	for (char c=*(*this) ; c != stop ; c=*(++(*this)))
1211 	{
1212 		bytes.push_back(c);
1213 	}
1214 	return string(bytes.begin(), bytes.end());
1215 }
1216 
1217 string
1218 text_input_buffer::parse_to(char stop)
1219 {
1220 	std::vector<char> bytes;
1221 	for (char c=*(*this) ; c != stop ; c=*(++(*this)))
1222 	{
1223 		if (finished())
1224 		{
1225 			break;
1226 		}
1227 		bytes.push_back(c);
1228 	}
1229 	return string(bytes.begin(), bytes.end());
1230 }
1231 
1232 char
1233 text_input_buffer::peek()
1234 {
1235 	return (*input_stack.top())[1];
1236 }
1237 
1238 std::unique_ptr<input_buffer>
1239 input_buffer::buffer_for_file(const string &path, bool warn)
1240 {
1241 	if (path == "-")
1242 	{
1243 		std::unique_ptr<input_buffer> b(new stream_input_buffer());
1244 		return b;
1245 	}
1246 	int source = open(path.c_str(), O_RDONLY);
1247 	if (source == -1)
1248 	{
1249 		if (warn)
1250 		{
1251 			fprintf(stderr, "Unable to open file '%s'.  %s\n", path.c_str(), strerror(errno));
1252 		}
1253 		return 0;
1254 	}
1255 	struct stat st;
1256 	if (fstat(source, &st) == 0 && S_ISDIR(st.st_mode))
1257 	{
1258 		if (warn)
1259 		{
1260 			fprintf(stderr, "File %s is a directory\n", path.c_str());
1261 		}
1262 		close(source);
1263 		return 0;
1264 	}
1265 	std::unique_ptr<input_buffer> b(new mmap_input_buffer(source, string(path)));
1266 	close(source);
1267 	return b;
1268 }
1269 
1270 } // namespace dtc
1271 
1272