xref: /freebsd/contrib/libucl/src/ucl_parser.c (revision 864c53ead899f7838cd2e1cca3b485a4a82f5cdc)
1 /* Copyright (c) 2013, Vsevolod Stakhov
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *       * Redistributions of source code must retain the above copyright
7  *         notice, this list of conditions and the following disclaimer.
8  *       * Redistributions in binary form must reproduce the above copyright
9  *         notice, this list of conditions and the following disclaimer in the
10  *         documentation and/or other materials provided with the distribution.
11  *
12  * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15  * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22  */
23 
24 #include "ucl.h"
25 #include "ucl_internal.h"
26 #include "ucl_chartable.h"
27 
28 /**
29  * @file rcl_parser.c
30  * The implementation of rcl parser
31  */
32 
33 struct ucl_parser_saved_state {
34 	unsigned int line;
35 	unsigned int column;
36 	size_t remain;
37 	const unsigned char *pos;
38 };
39 
40 /**
41  * Move up to len characters
42  * @param parser
43  * @param begin
44  * @param len
45  * @return new position in chunk
46  */
47 #define ucl_chunk_skipc(chunk, p)    do{					\
48     if (*(p) == '\n') {										\
49         (chunk)->line ++;									\
50         (chunk)->column = 0;								\
51     }														\
52     else (chunk)->column ++;								\
53     (p++);													\
54     (chunk)->pos ++;										\
55     (chunk)->remain --;										\
56     } while (0)
57 
58 static inline void
59 ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err)
60 {
61 	if (chunk->pos < chunk->end) {
62 		if (isgraph (*chunk->pos)) {
63 			ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'",
64 					chunk->line, chunk->column, str, *chunk->pos);
65 		}
66 		else {
67 			ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'",
68 					chunk->line, chunk->column, str, (int)*chunk->pos);
69 		}
70 	}
71 	else {
72 		ucl_create_err (err, "error at the end of chunk: %s", str);
73 	}
74 }
75 
76 /**
77  * Skip all comments from the current pos resolving nested and multiline comments
78  * @param parser
79  * @return
80  */
81 static bool
82 ucl_skip_comments (struct ucl_parser *parser)
83 {
84 	struct ucl_chunk *chunk = parser->chunks;
85 	const unsigned char *p;
86 	int comments_nested = 0;
87 
88 	p = chunk->pos;
89 
90 start:
91 	if (*p == '#') {
92 		if (parser->state != UCL_STATE_SCOMMENT &&
93 				parser->state != UCL_STATE_MCOMMENT) {
94 			while (p < chunk->end) {
95 				if (*p == '\n') {
96 					ucl_chunk_skipc (chunk, p);
97 					goto start;
98 				}
99 				ucl_chunk_skipc (chunk, p);
100 			}
101 		}
102 	}
103 	else if (*p == '/' && chunk->remain >= 2) {
104 		if (p[1] == '*') {
105 			ucl_chunk_skipc (chunk, p);
106 			comments_nested ++;
107 			ucl_chunk_skipc (chunk, p);
108 
109 			while (p < chunk->end) {
110 				if (*p == '*') {
111 					ucl_chunk_skipc (chunk, p);
112 					if (*p == '/') {
113 						comments_nested --;
114 						if (comments_nested == 0) {
115 							ucl_chunk_skipc (chunk, p);
116 							goto start;
117 						}
118 					}
119 					ucl_chunk_skipc (chunk, p);
120 				}
121 				else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
122 					comments_nested ++;
123 					ucl_chunk_skipc (chunk, p);
124 					ucl_chunk_skipc (chunk, p);
125 					continue;
126 				}
127 				ucl_chunk_skipc (chunk, p);
128 			}
129 			if (comments_nested != 0) {
130 				ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err);
131 				return false;
132 			}
133 		}
134 	}
135 
136 	return true;
137 }
138 
139 /**
140  * Return multiplier for a character
141  * @param c multiplier character
142  * @param is_bytes if true use 1024 multiplier
143  * @return multiplier
144  */
145 static inline unsigned long
146 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
147 	const struct {
148 		char c;
149 		long mult_normal;
150 		long mult_bytes;
151 	} multipliers[] = {
152 			{'m', 1000 * 1000, 1024 * 1024},
153 			{'k', 1000, 1024},
154 			{'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
155 	};
156 	int i;
157 
158 	for (i = 0; i < 3; i ++) {
159 		if (tolower (c) == multipliers[i].c) {
160 			if (is_bytes) {
161 				return multipliers[i].mult_bytes;
162 			}
163 			return multipliers[i].mult_normal;
164 		}
165 	}
166 
167 	return 1;
168 }
169 
170 
171 /**
172  * Return multiplier for time scaling
173  * @param c
174  * @return
175  */
176 static inline double
177 ucl_lex_time_multiplier (const unsigned char c) {
178 	const struct {
179 		char c;
180 		double mult;
181 	} multipliers[] = {
182 			{'m', 60},
183 			{'h', 60 * 60},
184 			{'d', 60 * 60 * 24},
185 			{'w', 60 * 60 * 24 * 7},
186 			{'y', 60 * 60 * 24 * 7 * 365}
187 	};
188 	int i;
189 
190 	for (i = 0; i < 5; i ++) {
191 		if (tolower (c) == multipliers[i].c) {
192 			return multipliers[i].mult;
193 		}
194 	}
195 
196 	return 1;
197 }
198 
199 /**
200  * Return true if a character is a end of an atom
201  * @param c
202  * @return
203  */
204 static inline bool
205 ucl_lex_is_atom_end (const unsigned char c)
206 {
207 	return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
208 }
209 
210 static inline bool
211 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
212 {
213 	if (c1 == '/') {
214 		if (c2 == '*') {
215 			return true;
216 		}
217 	}
218 	else if (c1 == '#') {
219 		return true;
220 	}
221 	return false;
222 }
223 
224 /**
225  * Check variable found
226  * @param parser
227  * @param ptr
228  * @param remain
229  * @param out_len
230  * @param strict
231  * @param found
232  * @return
233  */
234 static inline const char *
235 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
236 		size_t *out_len, bool strict, bool *found)
237 {
238 	struct ucl_variable *var;
239 	unsigned char *dst;
240 	size_t dstlen;
241 	bool need_free = false;
242 
243 	LL_FOREACH (parser->variables, var) {
244 		if (strict) {
245 			if (remain == var->var_len) {
246 				if (memcmp (ptr, var->var, var->var_len) == 0) {
247 					*out_len += var->value_len;
248 					*found = true;
249 					return (ptr + var->var_len);
250 				}
251 			}
252 		}
253 		else {
254 			if (remain >= var->var_len) {
255 				if (memcmp (ptr, var->var, var->var_len) == 0) {
256 					*out_len += var->value_len;
257 					*found = true;
258 					return (ptr + var->var_len);
259 				}
260 			}
261 		}
262 	}
263 
264 	/* XXX: can only handle ${VAR} */
265 	if (!(*found) && parser->var_handler != NULL && strict) {
266 		/* Call generic handler */
267 		if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
268 				parser->var_data)) {
269 			*found = true;
270 			if (need_free) {
271 				free (dst);
272 			}
273 			return (ptr + remain);
274 		}
275 	}
276 
277 	return ptr;
278 }
279 
280 /**
281  * Check for a variable in a given string
282  * @param parser
283  * @param ptr
284  * @param remain
285  * @param out_len
286  * @param vars_found
287  * @return
288  */
289 static const char *
290 ucl_check_variable (struct ucl_parser *parser, const char *ptr,
291 		size_t remain, size_t *out_len, bool *vars_found)
292 {
293 	const char *p, *end, *ret = ptr;
294 	bool found = false;
295 
296 	if (*ptr == '{') {
297 		/* We need to match the variable enclosed in braces */
298 		p = ptr + 1;
299 		end = ptr + remain;
300 		while (p < end) {
301 			if (*p == '}') {
302 				ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1,
303 						out_len, true, &found);
304 				if (found) {
305 					/* {} must be excluded actually */
306 					ret ++;
307 					if (!*vars_found) {
308 						*vars_found = true;
309 					}
310 				}
311 				else {
312 					*out_len += 2;
313 				}
314 				break;
315 			}
316 			p ++;
317 		}
318 	}
319 	else if (*ptr != '$') {
320 		/* Not count escaped dollar sign */
321 		ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
322 		if (found && !*vars_found) {
323 			*vars_found = true;
324 		}
325 		if (!found) {
326 			(*out_len) ++;
327 		}
328 	}
329 	else {
330 		ret ++;
331 		(*out_len) ++;
332 	}
333 
334 	return ret;
335 }
336 
337 /**
338  * Expand a single variable
339  * @param parser
340  * @param ptr
341  * @param remain
342  * @param dest
343  * @return
344  */
345 static const char *
346 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
347 		size_t remain, unsigned char **dest)
348 {
349 	unsigned char *d = *dest, *dst;
350 	const char *p = ptr + 1, *ret;
351 	struct ucl_variable *var;
352 	size_t dstlen;
353 	bool need_free = false;
354 	bool found = false;
355 	bool strict = false;
356 
357 	ret = ptr + 1;
358 	remain --;
359 
360 	if (*p == '$') {
361 		*d++ = *p++;
362 		*dest = d;
363 		return p;
364 	}
365 	else if (*p == '{') {
366 		p ++;
367 		strict = true;
368 		ret += 2;
369 		remain -= 2;
370 	}
371 
372 	LL_FOREACH (parser->variables, var) {
373 		if (remain >= var->var_len) {
374 			if (memcmp (p, var->var, var->var_len) == 0) {
375 				memcpy (d, var->value, var->value_len);
376 				ret += var->var_len;
377 				d += var->value_len;
378 				found = true;
379 				break;
380 			}
381 		}
382 	}
383 	if (!found) {
384 		if (strict && parser->var_handler != NULL) {
385 			if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
386 							parser->var_data)) {
387 				memcpy (d, dst, dstlen);
388 				ret += dstlen;
389 				d += remain;
390 				found = true;
391 			}
392 		}
393 
394 		/* Leave variable as is */
395 		if (!found) {
396 			memcpy (d, ptr, 2);
397 			d += 2;
398 			ret --;
399 		}
400 	}
401 
402 	*dest = d;
403 	return ret;
404 }
405 
406 /**
407  * Expand variables in string
408  * @param parser
409  * @param dst
410  * @param src
411  * @param in_len
412  * @return
413  */
414 static ssize_t
415 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
416 		const char *src, size_t in_len)
417 {
418 	const char *p, *end = src + in_len;
419 	unsigned char *d;
420 	size_t out_len = 0;
421 	bool vars_found = false;
422 
423 	p = src;
424 	while (p != end) {
425 		if (*p == '$') {
426 			p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
427 		}
428 		else {
429 			p ++;
430 			out_len ++;
431 		}
432 	}
433 
434 	if (!vars_found) {
435 		/* Trivial case */
436 		*dst = NULL;
437 		return in_len;
438 	}
439 
440 	*dst = UCL_ALLOC (out_len + 1);
441 	if (*dst == NULL) {
442 		return in_len;
443 	}
444 
445 	d = *dst;
446 	p = src;
447 	while (p != end) {
448 		if (*p == '$') {
449 			p = ucl_expand_single_variable (parser, p, end - p, &d);
450 		}
451 		else {
452 			*d++ = *p++;
453 		}
454 	}
455 
456 	*d = '\0';
457 
458 	return out_len;
459 }
460 
461 /**
462  * Store or copy pointer to the trash stack
463  * @param parser parser object
464  * @param src src string
465  * @param dst destination buffer (trash stack pointer)
466  * @param dst_const const destination pointer (e.g. value of object)
467  * @param in_len input length
468  * @param need_unescape need to unescape source (and copy it)
469  * @param need_lowercase need to lowercase value (and copy)
470  * @param need_expand need to expand variables (and copy as well)
471  * @return output length (excluding \0 symbol)
472  */
473 static inline ssize_t
474 ucl_copy_or_store_ptr (struct ucl_parser *parser,
475 		const unsigned char *src, unsigned char **dst,
476 		const char **dst_const, size_t in_len,
477 		bool need_unescape, bool need_lowercase, bool need_expand)
478 {
479 	ssize_t ret = -1, tret;
480 	unsigned char *tmp;
481 
482 	if (need_unescape || need_lowercase ||
483 			(need_expand && parser->variables != NULL) ||
484 			!(parser->flags & UCL_PARSER_ZEROCOPY)) {
485 		/* Copy string */
486 		*dst = UCL_ALLOC (in_len + 1);
487 		if (*dst == NULL) {
488 			ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err);
489 			return false;
490 		}
491 		if (need_lowercase) {
492 			ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
493 		}
494 		else {
495 			ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
496 		}
497 
498 		if (need_unescape) {
499 			ret = ucl_unescape_json_string (*dst, ret);
500 		}
501 		if (need_expand) {
502 			tmp = *dst;
503 			tret = ret;
504 			ret = ucl_expand_variable (parser, dst, tmp, ret);
505 			if (*dst == NULL) {
506 				/* Nothing to expand */
507 				*dst = tmp;
508 				ret = tret;
509 			}
510 		}
511 		*dst_const = *dst;
512 	}
513 	else {
514 		*dst_const = src;
515 		ret = in_len;
516 	}
517 
518 	return ret;
519 }
520 
521 /**
522  * Create and append an object at the specified level
523  * @param parser
524  * @param is_array
525  * @param level
526  * @return
527  */
528 static inline ucl_object_t *
529 ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level)
530 {
531 	struct ucl_stack *st;
532 
533 	if (!is_array) {
534 		if (obj == NULL) {
535 			obj = ucl_object_typed_new (UCL_OBJECT);
536 		}
537 		else {
538 			obj->type = UCL_OBJECT;
539 		}
540 		obj->value.ov = ucl_hash_create ();
541 		parser->state = UCL_STATE_KEY;
542 	}
543 	else {
544 		if (obj == NULL) {
545 			obj = ucl_object_typed_new (UCL_ARRAY);
546 		}
547 		else {
548 			obj->type = UCL_ARRAY;
549 		}
550 		parser->state = UCL_STATE_VALUE;
551 	}
552 
553 	st = UCL_ALLOC (sizeof (struct ucl_stack));
554 	if (st == NULL) {
555 		ucl_set_err (parser->chunks, 0, "cannot allocate memory for an object", &parser->err);
556 		return NULL;
557 	}
558 	st->obj = obj;
559 	st->level = level;
560 	LL_PREPEND (parser->stack, st);
561 	parser->cur_obj = obj;
562 
563 	return obj;
564 }
565 
566 int
567 ucl_maybe_parse_number (ucl_object_t *obj,
568 		const char *start, const char *end, const char **pos,
569 		bool allow_double, bool number_bytes, bool allow_time)
570 {
571 	const char *p = start, *c = start;
572 	char *endptr;
573 	bool got_dot = false, got_exp = false, need_double = false,
574 			is_time = false, valid_start = false, is_hex = false,
575 			is_neg = false;
576 	double dv = 0;
577 	int64_t lv = 0;
578 
579 	if (*p == '-') {
580 		is_neg = true;
581 		c ++;
582 		p ++;
583 	}
584 	while (p < end) {
585 		if (is_hex && isxdigit (*p)) {
586 			p ++;
587 		}
588 		else if (isdigit (*p)) {
589 			valid_start = true;
590 			p ++;
591 		}
592 		else if (!is_hex && (*p == 'x' || *p == 'X')) {
593 			is_hex = true;
594 			allow_double = false;
595 			c = p + 1;
596 		}
597 		else if (allow_double) {
598 			if (p == c) {
599 				/* Empty digits sequence, not a number */
600 				*pos = start;
601 				return EINVAL;
602 			}
603 			else if (*p == '.') {
604 				if (got_dot) {
605 					/* Double dots, not a number */
606 					*pos = start;
607 					return EINVAL;
608 				}
609 				else {
610 					got_dot = true;
611 					need_double = true;
612 					p ++;
613 				}
614 			}
615 			else if (*p == 'e' || *p == 'E') {
616 				if (got_exp) {
617 					/* Double exp, not a number */
618 					*pos = start;
619 					return EINVAL;
620 				}
621 				else {
622 					got_exp = true;
623 					need_double = true;
624 					p ++;
625 					if (p >= end) {
626 						*pos = start;
627 						return EINVAL;
628 					}
629 					if (!isdigit (*p) && *p != '+' && *p != '-') {
630 						/* Wrong exponent sign */
631 						*pos = start;
632 						return EINVAL;
633 					}
634 					else {
635 						p ++;
636 					}
637 				}
638 			}
639 			else {
640 				/* Got the end of the number, need to check */
641 				break;
642 			}
643 		}
644 		else {
645 			break;
646 		}
647 	}
648 
649 	if (!valid_start) {
650 		*pos = start;
651 		return EINVAL;
652 	}
653 
654 	errno = 0;
655 	if (need_double) {
656 		dv = strtod (c, &endptr);
657 	}
658 	else {
659 		if (is_hex) {
660 			lv = strtoimax (c, &endptr, 16);
661 		}
662 		else {
663 			lv = strtoimax (c, &endptr, 10);
664 		}
665 	}
666 	if (errno == ERANGE) {
667 		*pos = start;
668 		return ERANGE;
669 	}
670 
671 	/* Now check endptr */
672 	if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0' ||
673 			ucl_test_character (*endptr, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
674 		p = endptr;
675 		goto set_obj;
676 	}
677 
678 	if (endptr < end && endptr != start) {
679 		p = endptr;
680 		switch (*p) {
681 		case 'm':
682 		case 'M':
683 		case 'g':
684 		case 'G':
685 		case 'k':
686 		case 'K':
687 			if (end - p >= 2) {
688 				if (p[1] == 's' || p[1] == 'S') {
689 					/* Milliseconds */
690 					if (!need_double) {
691 						need_double = true;
692 						dv = lv;
693 					}
694 					is_time = true;
695 					if (p[0] == 'm' || p[0] == 'M') {
696 						dv /= 1000.;
697 					}
698 					else {
699 						dv *= ucl_lex_num_multiplier (*p, false);
700 					}
701 					p += 2;
702 					goto set_obj;
703 				}
704 				else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
705 					/* Bytes */
706 					if (need_double) {
707 						need_double = false;
708 						lv = dv;
709 					}
710 					lv *= ucl_lex_num_multiplier (*p, true);
711 					p += 2;
712 					goto set_obj;
713 				}
714 				else if (ucl_lex_is_atom_end (p[1])) {
715 					if (need_double) {
716 						dv *= ucl_lex_num_multiplier (*p, false);
717 					}
718 					else {
719 						lv *= ucl_lex_num_multiplier (*p, number_bytes);
720 					}
721 					p ++;
722 					goto set_obj;
723 				}
724 				else if (allow_time && end - p >= 3) {
725 					if (tolower (p[0]) == 'm' &&
726 							tolower (p[1]) == 'i' &&
727 							tolower (p[2]) == 'n') {
728 						/* Minutes */
729 						if (!need_double) {
730 							need_double = true;
731 							dv = lv;
732 						}
733 						is_time = true;
734 						dv *= 60.;
735 						p += 3;
736 						goto set_obj;
737 					}
738 				}
739 			}
740 			else {
741 				if (need_double) {
742 					dv *= ucl_lex_num_multiplier (*p, false);
743 				}
744 				else {
745 					lv *= ucl_lex_num_multiplier (*p, number_bytes);
746 				}
747 				p ++;
748 				goto set_obj;
749 			}
750 			break;
751 		case 'S':
752 		case 's':
753 			if (allow_time &&
754 					(p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
755 				if (!need_double) {
756 					need_double = true;
757 					dv = lv;
758 				}
759 				p ++;
760 				is_time = true;
761 				goto set_obj;
762 			}
763 			break;
764 		case 'h':
765 		case 'H':
766 		case 'd':
767 		case 'D':
768 		case 'w':
769 		case 'W':
770 		case 'Y':
771 		case 'y':
772 			if (allow_time &&
773 					(p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
774 				if (!need_double) {
775 					need_double = true;
776 					dv = lv;
777 				}
778 				is_time = true;
779 				dv *= ucl_lex_time_multiplier (*p);
780 				p ++;
781 				goto set_obj;
782 			}
783 			break;
784 		}
785 	}
786 
787 	*pos = c;
788 	return EINVAL;
789 
790 	set_obj:
791 	if (allow_double && (need_double || is_time)) {
792 		if (!is_time) {
793 			obj->type = UCL_FLOAT;
794 		}
795 		else {
796 			obj->type = UCL_TIME;
797 		}
798 		obj->value.dv = is_neg ? (-dv) : dv;
799 	}
800 	else {
801 		obj->type = UCL_INT;
802 		obj->value.iv = is_neg ? (-lv) : lv;
803 	}
804 	*pos = p;
805 	return 0;
806 }
807 
808 /**
809  * Parse possible number
810  * @param parser
811  * @param chunk
812  * @return true if a number has been parsed
813  */
814 static bool
815 ucl_lex_number (struct ucl_parser *parser,
816 		struct ucl_chunk *chunk, ucl_object_t *obj)
817 {
818 	const unsigned char *pos;
819 	int ret;
820 
821 	ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos,
822 			true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
823 
824 	if (ret == 0) {
825 		chunk->remain -= pos - chunk->pos;
826 		chunk->column += pos - chunk->pos;
827 		chunk->pos = pos;
828 		return true;
829 	}
830 	else if (ret == ERANGE) {
831 		ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err);
832 	}
833 
834 	return false;
835 }
836 
837 /**
838  * Parse quoted string with possible escapes
839  * @param parser
840  * @param chunk
841  * @return true if a string has been parsed
842  */
843 static bool
844 ucl_lex_json_string (struct ucl_parser *parser,
845 		struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
846 {
847 	const unsigned char *p = chunk->pos;
848 	unsigned char c;
849 	int i;
850 
851 	while (p < chunk->end) {
852 		c = *p;
853 		if (c < 0x1F) {
854 			/* Unmasked control character */
855 			if (c == '\n') {
856 				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err);
857 			}
858 			else {
859 				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err);
860 			}
861 			return false;
862 		}
863 		else if (c == '\\') {
864 			ucl_chunk_skipc (chunk, p);
865 			c = *p;
866 			if (p >= chunk->end) {
867 				ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
868 				return false;
869 			}
870 			else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
871 				if (c == 'u') {
872 					ucl_chunk_skipc (chunk, p);
873 					for (i = 0; i < 4 && p < chunk->end; i ++) {
874 						if (!isxdigit (*p)) {
875 							ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err);
876 							return false;
877 						}
878 						ucl_chunk_skipc (chunk, p);
879 					}
880 					if (p >= chunk->end) {
881 						ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
882 						return false;
883 					}
884 				}
885 				else {
886 					ucl_chunk_skipc (chunk, p);
887 				}
888 			}
889 			*need_unescape = true;
890 			*ucl_escape = true;
891 			continue;
892 		}
893 		else if (c == '"') {
894 			ucl_chunk_skipc (chunk, p);
895 			return true;
896 		}
897 		else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
898 			*ucl_escape = true;
899 		}
900 		else if (c == '$') {
901 			*var_expand = true;
902 		}
903 		ucl_chunk_skipc (chunk, p);
904 	}
905 
906 	ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err);
907 	return false;
908 }
909 
910 /**
911  * Parse a key in an object
912  * @param parser
913  * @param chunk
914  * @return true if a key has been parsed
915  */
916 static bool
917 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object)
918 {
919 	const unsigned char *p, *c = NULL, *end, *t;
920 	const char *key = NULL;
921 	bool got_quote = false, got_eq = false, got_semicolon = false,
922 			need_unescape = false, ucl_escape = false, var_expand = false,
923 			got_content = false, got_sep = false;
924 	ucl_object_t *nobj, *tobj;
925 	ucl_hash_t *container;
926 	ssize_t keylen;
927 
928 	p = chunk->pos;
929 
930 	if (*p == '.') {
931 		/* It is macro actually */
932 		ucl_chunk_skipc (chunk, p);
933 		parser->prev_state = parser->state;
934 		parser->state = UCL_STATE_MACRO_NAME;
935 		return true;
936 	}
937 	while (p < chunk->end) {
938 		/*
939 		 * A key must start with alpha, number, '/' or '_' and end with space character
940 		 */
941 		if (c == NULL) {
942 			if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
943 				if (!ucl_skip_comments (parser)) {
944 					return false;
945 				}
946 				p = chunk->pos;
947 			}
948 			else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
949 				ucl_chunk_skipc (chunk, p);
950 			}
951 			else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
952 				/* The first symbol */
953 				c = p;
954 				ucl_chunk_skipc (chunk, p);
955 				got_content = true;
956 			}
957 			else if (*p == '"') {
958 				/* JSON style key */
959 				c = p + 1;
960 				got_quote = true;
961 				got_content = true;
962 				ucl_chunk_skipc (chunk, p);
963 			}
964 			else if (*p == '}') {
965 				/* We have actually end of an object */
966 				*end_of_object = true;
967 				return true;
968 			}
969 			else if (*p == '.') {
970 				ucl_chunk_skipc (chunk, p);
971 				parser->prev_state = parser->state;
972 				parser->state = UCL_STATE_MACRO_NAME;
973 				return true;
974 			}
975 			else {
976 				/* Invalid identifier */
977 				ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err);
978 				return false;
979 			}
980 		}
981 		else {
982 			/* Parse the body of a key */
983 			if (!got_quote) {
984 				if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
985 					got_content = true;
986 					ucl_chunk_skipc (chunk, p);
987 				}
988 				else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
989 					end = p;
990 					break;
991 				}
992 				else {
993 					ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err);
994 					return false;
995 				}
996 			}
997 			else {
998 				/* We need to parse json like quoted string */
999 				if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1000 					return false;
1001 				}
1002 				/* Always escape keys obtained via json */
1003 				end = chunk->pos - 1;
1004 				p = chunk->pos;
1005 				break;
1006 			}
1007 		}
1008 	}
1009 
1010 	if (p >= chunk->end && got_content) {
1011 		ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1012 		return false;
1013 	}
1014 	else if (!got_content) {
1015 		return true;
1016 	}
1017 	*end_of_object = false;
1018 	/* We are now at the end of the key, need to parse the rest */
1019 	while (p < chunk->end) {
1020 		if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1021 			ucl_chunk_skipc (chunk, p);
1022 		}
1023 		else if (*p == '=') {
1024 			if (!got_eq && !got_semicolon) {
1025 				ucl_chunk_skipc (chunk, p);
1026 				got_eq = true;
1027 			}
1028 			else {
1029 				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err);
1030 				return false;
1031 			}
1032 		}
1033 		else if (*p == ':') {
1034 			if (!got_eq && !got_semicolon) {
1035 				ucl_chunk_skipc (chunk, p);
1036 				got_semicolon = true;
1037 			}
1038 			else {
1039 				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err);
1040 				return false;
1041 			}
1042 		}
1043 		else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1044 			/* Check for comment */
1045 			if (!ucl_skip_comments (parser)) {
1046 				return false;
1047 			}
1048 			p = chunk->pos;
1049 		}
1050 		else {
1051 			/* Start value */
1052 			break;
1053 		}
1054 	}
1055 
1056 	if (p >= chunk->end && got_content) {
1057 		ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1058 		return false;
1059 	}
1060 
1061 	got_sep = got_semicolon || got_eq;
1062 
1063 	if (!got_sep) {
1064 		/*
1065 		 * Maybe we have more keys nested, so search for termination character.
1066 		 * Possible choices:
1067 		 * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1068 		 * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1069 		 * 3) key1 value[;,\n] <- we treat that as linear object
1070 		 */
1071 		t = p;
1072 		*next_key = false;
1073 		while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1074 			t ++;
1075 		}
1076 		/* Check first non-space character after a key */
1077 		if (*t != '{' && *t != '[') {
1078 			while (t < chunk->end) {
1079 				if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1080 					break;
1081 				}
1082 				else if (*t == '{' || *t == '[') {
1083 					*next_key = true;
1084 					break;
1085 				}
1086 				t ++;
1087 			}
1088 		}
1089 	}
1090 
1091 	/* Create a new object */
1092 	nobj = ucl_object_new ();
1093 	keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1094 			&key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1095 	if (keylen == -1) {
1096 		ucl_object_unref (nobj);
1097 		return false;
1098 	}
1099 	else if (keylen == 0) {
1100 		ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1101 		ucl_object_unref (nobj);
1102 		return false;
1103 	}
1104 
1105 	container = parser->stack->obj->value.ov;
1106 	nobj->key = key;
1107 	nobj->keylen = keylen;
1108 	tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj));
1109 	if (tobj == NULL) {
1110 		container = ucl_hash_insert_object (container, nobj);
1111 		nobj->prev = nobj;
1112 		nobj->next = NULL;
1113 		parser->stack->obj->len ++;
1114 	}
1115 	else {
1116 		DL_APPEND (tobj, nobj);
1117 	}
1118 
1119 	if (ucl_escape) {
1120 		nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1121 	}
1122 	parser->stack->obj->value.ov = container;
1123 
1124 	parser->cur_obj = nobj;
1125 
1126 	return true;
1127 }
1128 
1129 /**
1130  * Parse a cl string
1131  * @param parser
1132  * @param chunk
1133  * @return true if a key has been parsed
1134  */
1135 static bool
1136 ucl_parse_string_value (struct ucl_parser *parser,
1137 		struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1138 {
1139 	const unsigned char *p;
1140 	enum {
1141 		UCL_BRACE_ROUND = 0,
1142 		UCL_BRACE_SQUARE,
1143 		UCL_BRACE_FIGURE
1144 	};
1145 	int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1146 
1147 	p = chunk->pos;
1148 
1149 	while (p < chunk->end) {
1150 
1151 		/* Skip pairs of figure braces */
1152 		if (*p == '{') {
1153 			braces[UCL_BRACE_FIGURE][0] ++;
1154 		}
1155 		else if (*p == '}') {
1156 			braces[UCL_BRACE_FIGURE][1] ++;
1157 			if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1158 				/* This is not a termination symbol, continue */
1159 				ucl_chunk_skipc (chunk, p);
1160 				continue;
1161 			}
1162 		}
1163 		/* Skip pairs of square braces */
1164 		else if (*p == '[') {
1165 			braces[UCL_BRACE_SQUARE][0] ++;
1166 		}
1167 		else if (*p == ']') {
1168 			braces[UCL_BRACE_SQUARE][1] ++;
1169 			if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1170 				/* This is not a termination symbol, continue */
1171 				ucl_chunk_skipc (chunk, p);
1172 				continue;
1173 			}
1174 		}
1175 		else if (*p == '$') {
1176 			*var_expand = true;
1177 		}
1178 		else if (*p == '\\') {
1179 			*need_unescape = true;
1180 			ucl_chunk_skipc (chunk, p);
1181 			if (p < chunk->end) {
1182 				ucl_chunk_skipc (chunk, p);
1183 			}
1184 			continue;
1185 		}
1186 
1187 		if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1188 			break;
1189 		}
1190 		ucl_chunk_skipc (chunk, p);
1191 	}
1192 
1193 	if (p >= chunk->end) {
1194 		ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err);
1195 		return false;
1196 	}
1197 
1198 	return true;
1199 }
1200 
1201 /**
1202  * Parse multiline string ending with \n{term}\n
1203  * @param parser
1204  * @param chunk
1205  * @param term
1206  * @param term_len
1207  * @return size of multiline string or 0 in case of error
1208  */
1209 static int
1210 ucl_parse_multiline_string (struct ucl_parser *parser,
1211 		struct ucl_chunk *chunk, const unsigned char *term,
1212 		int term_len, unsigned char const **beg,
1213 		bool *var_expand)
1214 {
1215 	const unsigned char *p, *c;
1216 	bool newline = false;
1217 	int len = 0;
1218 
1219 	p = chunk->pos;
1220 
1221 	c = p;
1222 
1223 	while (p < chunk->end) {
1224 		if (newline) {
1225 			if (chunk->end - p < term_len) {
1226 				return 0;
1227 			}
1228 			else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) {
1229 				len = p - c;
1230 				chunk->remain -= term_len;
1231 				chunk->pos = p + term_len;
1232 				chunk->column = term_len;
1233 				*beg = c;
1234 				break;
1235 			}
1236 		}
1237 		if (*p == '\n') {
1238 			newline = true;
1239 		}
1240 		else {
1241 			if (*p == '$') {
1242 				*var_expand = true;
1243 			}
1244 			newline = false;
1245 		}
1246 		ucl_chunk_skipc (chunk, p);
1247 	}
1248 
1249 	return len;
1250 }
1251 
1252 static ucl_object_t*
1253 ucl_get_value_object (struct ucl_parser *parser)
1254 {
1255 	ucl_object_t *t, *obj = NULL;
1256 
1257 	if (parser->stack->obj->type == UCL_ARRAY) {
1258 		/* Object must be allocated */
1259 		obj = ucl_object_new ();
1260 		t = parser->stack->obj->value.av;
1261 		DL_APPEND (t, obj);
1262 		parser->cur_obj = obj;
1263 		parser->stack->obj->value.av = t;
1264 		parser->stack->obj->len ++;
1265 	}
1266 	else {
1267 		/* Object has been already allocated */
1268 		obj = parser->cur_obj;
1269 	}
1270 
1271 	return obj;
1272 }
1273 
1274 /**
1275  * Handle value data
1276  * @param parser
1277  * @param chunk
1278  * @return
1279  */
1280 static bool
1281 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1282 {
1283 	const unsigned char *p, *c;
1284 	ucl_object_t *obj = NULL;
1285 	unsigned int stripped_spaces;
1286 	int str_len;
1287 	bool need_unescape = false, ucl_escape = false, var_expand = false;
1288 
1289 	p = chunk->pos;
1290 
1291 	/* Skip any spaces and comments */
1292 	if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1293 			(chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1294 		while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1295 			ucl_chunk_skipc (chunk, p);
1296 		}
1297 		if (!ucl_skip_comments (parser)) {
1298 			return false;
1299 		}
1300 		p = chunk->pos;
1301 	}
1302 
1303 	while (p < chunk->end) {
1304 		c = p;
1305 		switch (*p) {
1306 		case '"':
1307 			obj = ucl_get_value_object (parser);
1308 			ucl_chunk_skipc (chunk, p);
1309 			if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1310 				return false;
1311 			}
1312 			str_len = chunk->pos - c - 2;
1313 			obj->type = UCL_STRING;
1314 			if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE],
1315 					&obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) {
1316 				return false;
1317 			}
1318 			obj->len = str_len;
1319 			parser->state = UCL_STATE_AFTER_VALUE;
1320 			p = chunk->pos;
1321 			return true;
1322 			break;
1323 		case '{':
1324 			obj = ucl_get_value_object (parser);
1325 			/* We have a new object */
1326 			obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level);
1327 			if (obj == NULL) {
1328 				return false;
1329 			}
1330 
1331 			ucl_chunk_skipc (chunk, p);
1332 			return true;
1333 			break;
1334 		case '[':
1335 			obj = ucl_get_value_object (parser);
1336 			/* We have a new array */
1337 			obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level);
1338 			if (obj == NULL) {
1339 				return false;
1340 			}
1341 
1342 			ucl_chunk_skipc (chunk, p);
1343 			return true;
1344 			break;
1345 		case ']':
1346 			/* We have the array ending */
1347 			if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1348 				parser->state = UCL_STATE_AFTER_VALUE;
1349 				return true;
1350 			}
1351 			else {
1352 				goto parse_string;
1353 			}
1354 			break;
1355 		case '<':
1356 			obj = ucl_get_value_object (parser);
1357 			/* We have something like multiline value, which must be <<[A-Z]+\n */
1358 			if (chunk->end - p > 3) {
1359 				if (memcmp (p, "<<", 2) == 0) {
1360 					p += 2;
1361 					/* We allow only uppercase characters in multiline definitions */
1362 					while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1363 						p ++;
1364 					}
1365 					if (*p =='\n') {
1366 						/* Set chunk positions and start multiline parsing */
1367 						c += 2;
1368 						chunk->remain -= p - c;
1369 						chunk->pos = p + 1;
1370 						chunk->column = 0;
1371 						chunk->line ++;
1372 						if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1373 								p - c, &c, &var_expand)) == 0) {
1374 							ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err);
1375 							return false;
1376 						}
1377 						obj->type = UCL_STRING;
1378 						if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1379 							&obj->value.sv, str_len - 1, false, false, var_expand)) == -1) {
1380 							return false;
1381 						}
1382 						obj->len = str_len;
1383 						parser->state = UCL_STATE_AFTER_VALUE;
1384 						return true;
1385 					}
1386 				}
1387 			}
1388 			/* Fallback to ordinary strings */
1389 		default:
1390 parse_string:
1391 			if (obj == NULL) {
1392 				obj = ucl_get_value_object (parser);
1393 			}
1394 			/* Parse atom */
1395 			if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1396 				if (!ucl_lex_number (parser, chunk, obj)) {
1397 					if (parser->state == UCL_STATE_ERROR) {
1398 						return false;
1399 					}
1400 				}
1401 				else {
1402 					parser->state = UCL_STATE_AFTER_VALUE;
1403 					return true;
1404 				}
1405 				/* Fallback to normal string */
1406 			}
1407 
1408 			if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) {
1409 				return false;
1410 			}
1411 			/* Cut trailing spaces */
1412 			stripped_spaces = 0;
1413 			while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1414 					UCL_CHARACTER_WHITESPACE)) {
1415 				stripped_spaces ++;
1416 			}
1417 			str_len = chunk->pos - c - stripped_spaces;
1418 			if (str_len <= 0) {
1419 				ucl_set_err (chunk, 0, "string value must not be empty", &parser->err);
1420 				return false;
1421 			}
1422 			else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1423 				obj->len = 0;
1424 				obj->type = UCL_NULL;
1425 			}
1426 			else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1427 				obj->type = UCL_STRING;
1428 				if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1429 						&obj->value.sv, str_len, need_unescape,
1430 						false, var_expand)) == -1) {
1431 					return false;
1432 				}
1433 				obj->len = str_len;
1434 			}
1435 			parser->state = UCL_STATE_AFTER_VALUE;
1436 			p = chunk->pos;
1437 
1438 			return true;
1439 			break;
1440 		}
1441 	}
1442 
1443 	return true;
1444 }
1445 
1446 /**
1447  * Handle after value data
1448  * @param parser
1449  * @param chunk
1450  * @return
1451  */
1452 static bool
1453 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1454 {
1455 	const unsigned char *p;
1456 	bool got_sep = false;
1457 	struct ucl_stack *st;
1458 
1459 	p = chunk->pos;
1460 
1461 	while (p < chunk->end) {
1462 		if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1463 			/* Skip whitespaces */
1464 			ucl_chunk_skipc (chunk, p);
1465 		}
1466 		else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1467 			/* Skip comment */
1468 			if (!ucl_skip_comments (parser)) {
1469 				return false;
1470 			}
1471 			/* Treat comment as a separator */
1472 			got_sep = true;
1473 			p = chunk->pos;
1474 		}
1475 		else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1476 			if (*p == '}' || *p == ']') {
1477 				if (parser->stack == NULL) {
1478 					ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err);
1479 					return false;
1480 				}
1481 				if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1482 						(*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1483 
1484 					/* Pop all nested objects from a stack */
1485 					st = parser->stack;
1486 					parser->stack = st->next;
1487 					UCL_FREE (sizeof (struct ucl_stack), st);
1488 
1489 					while (parser->stack != NULL) {
1490 						st = parser->stack;
1491 						if (st->next == NULL || st->next->level == st->level) {
1492 							break;
1493 						}
1494 						parser->stack = st->next;
1495 						UCL_FREE (sizeof (struct ucl_stack), st);
1496 					}
1497 				}
1498 				else {
1499 					ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err);
1500 					return false;
1501 				}
1502 
1503 				if (parser->stack == NULL) {
1504 					/* Ignore everything after a top object */
1505 					return true;
1506 				}
1507 				else {
1508 					ucl_chunk_skipc (chunk, p);
1509 				}
1510 				got_sep = true;
1511 			}
1512 			else {
1513 				/* Got a separator */
1514 				got_sep = true;
1515 				ucl_chunk_skipc (chunk, p);
1516 			}
1517 		}
1518 		else {
1519 			/* Anything else */
1520 			if (!got_sep) {
1521 				ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err);
1522 				return false;
1523 			}
1524 			return true;
1525 		}
1526 	}
1527 
1528 	return true;
1529 }
1530 
1531 /**
1532  * Handle macro data
1533  * @param parser
1534  * @param chunk
1535  * @return
1536  */
1537 static bool
1538 ucl_parse_macro_value (struct ucl_parser *parser,
1539 		struct ucl_chunk *chunk, struct ucl_macro *macro,
1540 		unsigned char const **macro_start, size_t *macro_len)
1541 {
1542 	const unsigned char *p, *c;
1543 	bool need_unescape = false, ucl_escape = false, var_expand = false;
1544 
1545 	p = chunk->pos;
1546 
1547 	switch (*p) {
1548 	case '"':
1549 		/* We have macro value encoded in quotes */
1550 		c = p;
1551 		ucl_chunk_skipc (chunk, p);
1552 		if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1553 			return false;
1554 		}
1555 
1556 		*macro_start = c + 1;
1557 		*macro_len = chunk->pos - c - 2;
1558 		p = chunk->pos;
1559 		break;
1560 	case '{':
1561 		/* We got a multiline macro body */
1562 		ucl_chunk_skipc (chunk, p);
1563 		/* Skip spaces at the beginning */
1564 		while (p < chunk->end) {
1565 			if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1566 				ucl_chunk_skipc (chunk, p);
1567 			}
1568 			else {
1569 				break;
1570 			}
1571 		}
1572 		c = p;
1573 		while (p < chunk->end) {
1574 			if (*p == '}') {
1575 				break;
1576 			}
1577 			ucl_chunk_skipc (chunk, p);
1578 		}
1579 		*macro_start = c;
1580 		*macro_len = p - c;
1581 		ucl_chunk_skipc (chunk, p);
1582 		break;
1583 	default:
1584 		/* Macro is not enclosed in quotes or braces */
1585 		c = p;
1586 		while (p < chunk->end) {
1587 			if (ucl_lex_is_atom_end (*p)) {
1588 				break;
1589 			}
1590 			ucl_chunk_skipc (chunk, p);
1591 		}
1592 		*macro_start = c;
1593 		*macro_len = p - c;
1594 		break;
1595 	}
1596 
1597 	/* We are at the end of a macro */
1598 	/* Skip ';' and space characters and return to previous state */
1599 	while (p < chunk->end) {
1600 		if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
1601 			break;
1602 		}
1603 		ucl_chunk_skipc (chunk, p);
1604 	}
1605 	return true;
1606 }
1607 
1608 /**
1609  * Handle the main states of rcl parser
1610  * @param parser parser structure
1611  * @param data the pointer to the beginning of a chunk
1612  * @param len the length of a chunk
1613  * @return true if chunk has been parsed and false in case of error
1614  */
1615 static bool
1616 ucl_state_machine (struct ucl_parser *parser)
1617 {
1618 	ucl_object_t *obj;
1619 	struct ucl_chunk *chunk = parser->chunks;
1620 	const unsigned char *p, *c = NULL, *macro_start = NULL;
1621 	unsigned char *macro_escaped;
1622 	size_t macro_len = 0;
1623 	struct ucl_macro *macro = NULL;
1624 	bool next_key = false, end_of_object = false;
1625 
1626 	if (parser->top_obj == NULL) {
1627 		if (*chunk->pos == '[') {
1628 			obj = ucl_add_parser_stack (NULL, parser, true, 0);
1629 		}
1630 		else {
1631 			obj = ucl_add_parser_stack (NULL, parser, false, 0);
1632 		}
1633 		if (obj == NULL) {
1634 			return false;
1635 		}
1636 		parser->top_obj = obj;
1637 		parser->cur_obj = obj;
1638 		parser->state = UCL_STATE_INIT;
1639 	}
1640 
1641 	p = chunk->pos;
1642 	while (chunk->pos < chunk->end) {
1643 		switch (parser->state) {
1644 		case UCL_STATE_INIT:
1645 			/*
1646 			 * At the init state we can either go to the parse array or object
1647 			 * if we got [ or { correspondingly or can just treat new data as
1648 			 * a key of newly created object
1649 			 */
1650 			obj = parser->cur_obj;
1651 			if (!ucl_skip_comments (parser)) {
1652 				parser->prev_state = parser->state;
1653 				parser->state = UCL_STATE_ERROR;
1654 				return false;
1655 			}
1656 			else {
1657 				p = chunk->pos;
1658 				if (*p == '[') {
1659 					parser->state = UCL_STATE_VALUE;
1660 					ucl_chunk_skipc (chunk, p);
1661 				}
1662 				else {
1663 					parser->state = UCL_STATE_KEY;
1664 					if (*p == '{') {
1665 						ucl_chunk_skipc (chunk, p);
1666 					}
1667 				}
1668 			}
1669 			break;
1670 		case UCL_STATE_KEY:
1671 			/* Skip any spaces */
1672 			while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1673 				ucl_chunk_skipc (chunk, p);
1674 			}
1675 			if (*p == '}') {
1676 				/* We have the end of an object */
1677 				parser->state = UCL_STATE_AFTER_VALUE;
1678 				continue;
1679 			}
1680 			if (parser->stack == NULL) {
1681 				/* No objects are on stack, but we want to parse a key */
1682 				ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser "
1683 						"expects a key", &parser->err);
1684 				parser->prev_state = parser->state;
1685 				parser->state = UCL_STATE_ERROR;
1686 				return false;
1687 			}
1688 			if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
1689 				parser->prev_state = parser->state;
1690 				parser->state = UCL_STATE_ERROR;
1691 				return false;
1692 			}
1693 			if (end_of_object) {
1694 				p = chunk->pos;
1695 				parser->state = UCL_STATE_AFTER_VALUE;
1696 				continue;
1697 			}
1698 			else if (parser->state != UCL_STATE_MACRO_NAME) {
1699 				if (next_key && parser->stack->obj->type == UCL_OBJECT) {
1700 					/* Parse more keys and nest objects accordingly */
1701 					obj = ucl_add_parser_stack (parser->cur_obj, parser, false,
1702 							parser->stack->level + 1);
1703 					if (obj == NULL) {
1704 						return false;
1705 					}
1706 				}
1707 				else {
1708 					parser->state = UCL_STATE_VALUE;
1709 				}
1710 			}
1711 			else {
1712 				c = chunk->pos;
1713 			}
1714 			p = chunk->pos;
1715 			break;
1716 		case UCL_STATE_VALUE:
1717 			/* We need to check what we do have */
1718 			if (!ucl_parse_value (parser, chunk)) {
1719 				parser->prev_state = parser->state;
1720 				parser->state = UCL_STATE_ERROR;
1721 				return false;
1722 			}
1723 			/* State is set in ucl_parse_value call */
1724 			p = chunk->pos;
1725 			break;
1726 		case UCL_STATE_AFTER_VALUE:
1727 			if (!ucl_parse_after_value (parser, chunk)) {
1728 				parser->prev_state = parser->state;
1729 				parser->state = UCL_STATE_ERROR;
1730 				return false;
1731 			}
1732 			if (parser->stack != NULL) {
1733 				if (parser->stack->obj->type == UCL_OBJECT) {
1734 					parser->state = UCL_STATE_KEY;
1735 				}
1736 				else {
1737 					/* Array */
1738 					parser->state = UCL_STATE_VALUE;
1739 				}
1740 			}
1741 			else {
1742 				/* Skip everything at the end */
1743 				return true;
1744 			}
1745 			p = chunk->pos;
1746 			break;
1747 		case UCL_STATE_MACRO_NAME:
1748 			if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1749 				ucl_chunk_skipc (chunk, p);
1750 			}
1751 			else if (p - c > 0) {
1752 				/* We got macro name */
1753 				macro_len = (size_t)(p - c);
1754 				HASH_FIND (hh, parser->macroes, c, macro_len, macro);
1755 				if (macro == NULL) {
1756 					ucl_create_err (&parser->err, "error on line %d at column %d: "
1757 							"unknown macro: '%.*s', character: '%c'",
1758 								chunk->line, chunk->column, (int)(p - c), c, *chunk->pos);
1759 					parser->state = UCL_STATE_ERROR;
1760 					return false;
1761 				}
1762 				/* Now we need to skip all spaces */
1763 				while (p < chunk->end) {
1764 					if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1765 						if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1766 							/* Skip comment */
1767 							if (!ucl_skip_comments (parser)) {
1768 								return false;
1769 							}
1770 							p = chunk->pos;
1771 						}
1772 						break;
1773 					}
1774 					ucl_chunk_skipc (chunk, p);
1775 				}
1776 				parser->state = UCL_STATE_MACRO;
1777 			}
1778 			break;
1779 		case UCL_STATE_MACRO:
1780 			if (!ucl_parse_macro_value (parser, chunk, macro,
1781 					&macro_start, &macro_len)) {
1782 				parser->prev_state = parser->state;
1783 				parser->state = UCL_STATE_ERROR;
1784 				return false;
1785 			}
1786 			macro_len = ucl_expand_variable (parser, &macro_escaped, macro_start, macro_len);
1787 			parser->state = parser->prev_state;
1788 			if (macro_escaped == NULL) {
1789 				if (!macro->handler (macro_start, macro_len, macro->ud)) {
1790 					return false;
1791 				}
1792 			}
1793 			else {
1794 				if (!macro->handler (macro_escaped, macro_len, macro->ud)) {
1795 					UCL_FREE (macro_len + 1, macro_escaped);
1796 					return false;
1797 				}
1798 				UCL_FREE (macro_len + 1, macro_escaped);
1799 			}
1800 			p = chunk->pos;
1801 			break;
1802 		default:
1803 			/* TODO: add all states */
1804 			ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err);
1805 			parser->state = UCL_STATE_ERROR;
1806 			return false;
1807 		}
1808 	}
1809 
1810 	return true;
1811 }
1812 
1813 struct ucl_parser*
1814 ucl_parser_new (int flags)
1815 {
1816 	struct ucl_parser *new;
1817 
1818 	new = UCL_ALLOC (sizeof (struct ucl_parser));
1819 	if (new == NULL) {
1820 		return NULL;
1821 	}
1822 	memset (new, 0, sizeof (struct ucl_parser));
1823 
1824 	ucl_parser_register_macro (new, "include", ucl_include_handler, new);
1825 	ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new);
1826 	ucl_parser_register_macro (new, "includes", ucl_includes_handler, new);
1827 
1828 	new->flags = flags;
1829 
1830 	/* Initial assumption about filevars */
1831 	ucl_parser_set_filevars (new, NULL, false);
1832 
1833 	return new;
1834 }
1835 
1836 
1837 void
1838 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
1839 		ucl_macro_handler handler, void* ud)
1840 {
1841 	struct ucl_macro *new;
1842 
1843 	if (macro == NULL || handler == NULL) {
1844 		return;
1845 	}
1846 	new = UCL_ALLOC (sizeof (struct ucl_macro));
1847 	if (new == NULL) {
1848 		return;
1849 	}
1850 	memset (new, 0, sizeof (struct ucl_macro));
1851 	new->handler = handler;
1852 	new->name = strdup (macro);
1853 	new->ud = ud;
1854 	HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
1855 }
1856 
1857 void
1858 ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
1859 		const char *value)
1860 {
1861 	struct ucl_variable *new = NULL, *cur;
1862 
1863 	if (var == NULL) {
1864 		return;
1865 	}
1866 
1867 	/* Find whether a variable already exists */
1868 	LL_FOREACH (parser->variables, cur) {
1869 		if (strcmp (cur->var, var) == 0) {
1870 			new = cur;
1871 			break;
1872 		}
1873 	}
1874 
1875 	if (value == NULL) {
1876 
1877 		if (new != NULL) {
1878 			/* Remove variable */
1879 			LL_DELETE (parser->variables, new);
1880 			free (new->var);
1881 			free (new->value);
1882 			UCL_FREE (sizeof (struct ucl_variable), new);
1883 		}
1884 		else {
1885 			/* Do nothing */
1886 			return;
1887 		}
1888 	}
1889 	else {
1890 		if (new == NULL) {
1891 			new = UCL_ALLOC (sizeof (struct ucl_variable));
1892 			if (new == NULL) {
1893 				return;
1894 			}
1895 			memset (new, 0, sizeof (struct ucl_variable));
1896 			new->var = strdup (var);
1897 			new->var_len = strlen (var);
1898 			new->value = strdup (value);
1899 			new->value_len = strlen (value);
1900 
1901 			LL_PREPEND (parser->variables, new);
1902 		}
1903 		else {
1904 			free (new->value);
1905 			new->value = strdup (value);
1906 			new->value_len = strlen (value);
1907 		}
1908 	}
1909 }
1910 
1911 void
1912 ucl_parser_set_variables_handler (struct ucl_parser *parser,
1913 		ucl_variable_handler handler, void *ud)
1914 {
1915 	parser->var_handler = handler;
1916 	parser->var_data = ud;
1917 }
1918 
1919 bool
1920 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
1921 		size_t len)
1922 {
1923 	struct ucl_chunk *chunk;
1924 
1925 	if (data == NULL || len == 0) {
1926 		ucl_create_err (&parser->err, "invalid chunk added");
1927 		return false;
1928 	}
1929 	if (parser->state != UCL_STATE_ERROR) {
1930 		chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
1931 		if (chunk == NULL) {
1932 			ucl_create_err (&parser->err, "cannot allocate chunk structure");
1933 			return false;
1934 		}
1935 		chunk->begin = data;
1936 		chunk->remain = len;
1937 		chunk->pos = chunk->begin;
1938 		chunk->end = chunk->begin + len;
1939 		chunk->line = 1;
1940 		chunk->column = 0;
1941 		LL_PREPEND (parser->chunks, chunk);
1942 		parser->recursion ++;
1943 		if (parser->recursion > UCL_MAX_RECURSION) {
1944 			ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
1945 					parser->recursion);
1946 			return false;
1947 		}
1948 		return ucl_state_machine (parser);
1949 	}
1950 
1951 	ucl_create_err (&parser->err, "a parser is in an invalid state");
1952 
1953 	return false;
1954 }
1955 
1956 bool
1957 ucl_parser_add_string (struct ucl_parser *parser, const char *data,
1958 		size_t len)
1959 {
1960 	if (data == NULL) {
1961 		ucl_create_err (&parser->err, "invalid string added");
1962 		return false;
1963 	}
1964 	if (len == 0) {
1965 		len = strlen (data);
1966 	}
1967 
1968 	return ucl_parser_add_chunk (parser, (const unsigned char *)data, len);
1969 }
1970