xref: /freebsd/contrib/libucl/src/ucl_parser.c (revision 38f0b757fd84d17d0fc24739a7cda160c4516d81)
1 /* Copyright (c) 2013, Vsevolod Stakhov
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *       * Redistributions of source code must retain the above copyright
7  *         notice, this list of conditions and the following disclaimer.
8  *       * Redistributions in binary form must reproduce the above copyright
9  *         notice, this list of conditions and the following disclaimer in the
10  *         documentation and/or other materials provided with the distribution.
11  *
12  * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15  * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22  */
23 
24 #include "ucl.h"
25 #include "ucl_internal.h"
26 #include "ucl_chartable.h"
27 
28 /**
29  * @file rcl_parser.c
30  * The implementation of rcl parser
31  */
32 
33 struct ucl_parser_saved_state {
34 	unsigned int line;
35 	unsigned int column;
36 	size_t remain;
37 	const unsigned char *pos;
38 };
39 
40 /**
41  * Move up to len characters
42  * @param parser
43  * @param begin
44  * @param len
45  * @return new position in chunk
46  */
47 #define ucl_chunk_skipc(chunk, p)    do{					\
48     if (*(p) == '\n') {										\
49         (chunk)->line ++;									\
50         (chunk)->column = 0;								\
51     }														\
52     else (chunk)->column ++;								\
53     (p++);													\
54     (chunk)->pos ++;										\
55     (chunk)->remain --;										\
56     } while (0)
57 
58 /**
59  * Save parser state
60  * @param chunk
61  * @param s
62  */
63 static inline void
64 ucl_chunk_save_state (struct ucl_chunk *chunk, struct ucl_parser_saved_state *s)
65 {
66 	s->column = chunk->column;
67 	s->pos = chunk->pos;
68 	s->line = chunk->line;
69 	s->remain = chunk->remain;
70 }
71 
72 /**
73  * Restore parser state
74  * @param chunk
75  * @param s
76  */
77 static inline void
78 ucl_chunk_restore_state (struct ucl_chunk *chunk, struct ucl_parser_saved_state *s)
79 {
80 	chunk->column = s->column;
81 	chunk->pos = s->pos;
82 	chunk->line = s->line;
83 	chunk->remain = s->remain;
84 }
85 
86 static inline void
87 ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err)
88 {
89 	if (chunk->pos < chunk->end) {
90 		if (isgraph (*chunk->pos)) {
91 			ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'",
92 					chunk->line, chunk->column, str, *chunk->pos);
93 		}
94 		else {
95 			ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'",
96 					chunk->line, chunk->column, str, (int)*chunk->pos);
97 		}
98 	}
99 	else {
100 		ucl_create_err (err, "error at the end of chunk: %s", str);
101 	}
102 }
103 
104 /**
105  * Skip all comments from the current pos resolving nested and multiline comments
106  * @param parser
107  * @return
108  */
109 static bool
110 ucl_skip_comments (struct ucl_parser *parser)
111 {
112 	struct ucl_chunk *chunk = parser->chunks;
113 	const unsigned char *p;
114 	int comments_nested = 0;
115 
116 	p = chunk->pos;
117 
118 start:
119 	if (*p == '#') {
120 		if (parser->state != UCL_STATE_SCOMMENT &&
121 				parser->state != UCL_STATE_MCOMMENT) {
122 			while (p < chunk->end) {
123 				if (*p == '\n') {
124 					ucl_chunk_skipc (chunk, p);
125 					goto start;
126 				}
127 				ucl_chunk_skipc (chunk, p);
128 			}
129 		}
130 	}
131 	else if (*p == '/' && chunk->remain >= 2) {
132 		if (p[1] == '*') {
133 			ucl_chunk_skipc (chunk, p);
134 			comments_nested ++;
135 			ucl_chunk_skipc (chunk, p);
136 
137 			while (p < chunk->end) {
138 				if (*p == '*') {
139 					ucl_chunk_skipc (chunk, p);
140 					if (*p == '/') {
141 						comments_nested --;
142 						if (comments_nested == 0) {
143 							ucl_chunk_skipc (chunk, p);
144 							goto start;
145 						}
146 					}
147 					ucl_chunk_skipc (chunk, p);
148 				}
149 				else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
150 					comments_nested ++;
151 					ucl_chunk_skipc (chunk, p);
152 					ucl_chunk_skipc (chunk, p);
153 					continue;
154 				}
155 				ucl_chunk_skipc (chunk, p);
156 			}
157 			if (comments_nested != 0) {
158 				ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err);
159 				return false;
160 			}
161 		}
162 	}
163 
164 	return true;
165 }
166 
167 /**
168  * Return multiplier for a character
169  * @param c multiplier character
170  * @param is_bytes if true use 1024 multiplier
171  * @return multiplier
172  */
173 static inline unsigned long
174 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
175 	const struct {
176 		char c;
177 		long mult_normal;
178 		long mult_bytes;
179 	} multipliers[] = {
180 			{'m', 1000 * 1000, 1024 * 1024},
181 			{'k', 1000, 1024},
182 			{'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
183 	};
184 	int i;
185 
186 	for (i = 0; i < 3; i ++) {
187 		if (tolower (c) == multipliers[i].c) {
188 			if (is_bytes) {
189 				return multipliers[i].mult_bytes;
190 			}
191 			return multipliers[i].mult_normal;
192 		}
193 	}
194 
195 	return 1;
196 }
197 
198 
199 /**
200  * Return multiplier for time scaling
201  * @param c
202  * @return
203  */
204 static inline double
205 ucl_lex_time_multiplier (const unsigned char c) {
206 	const struct {
207 		char c;
208 		double mult;
209 	} multipliers[] = {
210 			{'m', 60},
211 			{'h', 60 * 60},
212 			{'d', 60 * 60 * 24},
213 			{'w', 60 * 60 * 24 * 7},
214 			{'y', 60 * 60 * 24 * 7 * 365}
215 	};
216 	int i;
217 
218 	for (i = 0; i < 5; i ++) {
219 		if (tolower (c) == multipliers[i].c) {
220 			return multipliers[i].mult;
221 		}
222 	}
223 
224 	return 1;
225 }
226 
227 /**
228  * Return true if a character is a end of an atom
229  * @param c
230  * @return
231  */
232 static inline bool
233 ucl_lex_is_atom_end (const unsigned char c)
234 {
235 	return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
236 }
237 
238 static inline bool
239 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
240 {
241 	if (c1 == '/') {
242 		if (c2 == '*') {
243 			return true;
244 		}
245 	}
246 	else if (c1 == '#') {
247 		return true;
248 	}
249 	return false;
250 }
251 
252 /**
253  * Check variable found
254  * @param parser
255  * @param ptr
256  * @param remain
257  * @param out_len
258  * @param strict
259  * @param found
260  * @return
261  */
262 static inline const char *
263 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
264 		size_t *out_len, bool strict, bool *found)
265 {
266 	struct ucl_variable *var;
267 
268 	LL_FOREACH (parser->variables, var) {
269 		if (strict) {
270 			if (remain == var->var_len) {
271 				if (memcmp (ptr, var->var, var->var_len) == 0) {
272 					*out_len += var->value_len;
273 					*found = true;
274 					return (ptr + var->var_len);
275 				}
276 			}
277 		}
278 		else {
279 			if (remain >= var->var_len) {
280 				if (memcmp (ptr, var->var, var->var_len) == 0) {
281 					*out_len += var->value_len;
282 					*found = true;
283 					return (ptr + var->var_len);
284 				}
285 			}
286 		}
287 	}
288 
289 	return ptr;
290 }
291 
292 /**
293  * Check for a variable in a given string
294  * @param parser
295  * @param ptr
296  * @param remain
297  * @param out_len
298  * @param vars_found
299  * @return
300  */
301 static const char *
302 ucl_check_variable (struct ucl_parser *parser, const char *ptr, size_t remain, size_t *out_len, bool *vars_found)
303 {
304 	const char *p, *end, *ret = ptr;
305 	bool found = false;
306 
307 	if (*ptr == '{') {
308 		/* We need to match the variable enclosed in braces */
309 		p = ptr + 1;
310 		end = ptr + remain;
311 		while (p < end) {
312 			if (*p == '}') {
313 				ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, out_len, true, &found);
314 				if (found) {
315 					/* {} must be excluded actually */
316 					ret ++;
317 					if (!*vars_found) {
318 						*vars_found = true;
319 					}
320 				}
321 				else {
322 					*out_len += 2;
323 				}
324 				break;
325 			}
326 			p ++;
327 		}
328 	}
329 	else if (*ptr != '$') {
330 		/* Not count escaped dollar sign */
331 		ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
332 		if (found && !*vars_found) {
333 			*vars_found = true;
334 		}
335 		if (!found) {
336 			(*out_len) ++;
337 		}
338 	}
339 	else {
340 		ret ++;
341 		(*out_len) ++;
342 	}
343 
344 	return ret;
345 }
346 
347 /**
348  * Expand a single variable
349  * @param parser
350  * @param ptr
351  * @param remain
352  * @param dest
353  * @return
354  */
355 static const char *
356 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
357 		size_t remain, unsigned char **dest)
358 {
359 	unsigned char *d = *dest;
360 	const char *p = ptr + 1, *ret;
361 	struct ucl_variable *var;
362 	bool found = false;
363 
364 	ret = ptr + 1;
365 	remain --;
366 
367 	if (*p == '$') {
368 		*d++ = *p++;
369 		*dest = d;
370 		return p;
371 	}
372 	else if (*p == '{') {
373 		p ++;
374 		ret += 2;
375 		remain -= 2;
376 	}
377 
378 	LL_FOREACH (parser->variables, var) {
379 		if (remain >= var->var_len) {
380 			if (memcmp (p, var->var, var->var_len) == 0) {
381 				memcpy (d, var->value, var->value_len);
382 				ret += var->var_len;
383 				d += var->value_len;
384 				found = true;
385 				break;
386 			}
387 		}
388 	}
389 	if (!found) {
390 		memcpy (d, ptr, 2);
391 		d += 2;
392 		ret --;
393 	}
394 
395 	*dest = d;
396 	return ret;
397 }
398 
399 /**
400  * Expand variables in string
401  * @param parser
402  * @param dst
403  * @param src
404  * @param in_len
405  * @return
406  */
407 static ssize_t
408 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
409 		const char *src, size_t in_len)
410 {
411 	const char *p, *end = src + in_len;
412 	unsigned char *d;
413 	size_t out_len = 0;
414 	bool vars_found = false;
415 
416 	p = src;
417 	while (p != end) {
418 		if (*p == '$') {
419 			p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
420 		}
421 		else {
422 			p ++;
423 			out_len ++;
424 		}
425 	}
426 
427 	if (!vars_found) {
428 		/* Trivial case */
429 		*dst = NULL;
430 		return in_len;
431 	}
432 
433 	*dst = UCL_ALLOC (out_len + 1);
434 	if (*dst == NULL) {
435 		return in_len;
436 	}
437 
438 	d = *dst;
439 	p = src;
440 	while (p != end) {
441 		if (*p == '$') {
442 			p = ucl_expand_single_variable (parser, p, end - p, &d);
443 		}
444 		else {
445 			*d++ = *p++;
446 		}
447 	}
448 
449 	*d = '\0';
450 
451 	return out_len;
452 }
453 
454 /**
455  * Store or copy pointer to the trash stack
456  * @param parser parser object
457  * @param src src string
458  * @param dst destination buffer (trash stack pointer)
459  * @param dst_const const destination pointer (e.g. value of object)
460  * @param in_len input length
461  * @param need_unescape need to unescape source (and copy it)
462  * @param need_lowercase need to lowercase value (and copy)
463  * @param need_expand need to expand variables (and copy as well)
464  * @return output length (excluding \0 symbol)
465  */
466 static inline ssize_t
467 ucl_copy_or_store_ptr (struct ucl_parser *parser,
468 		const unsigned char *src, unsigned char **dst,
469 		const char **dst_const, size_t in_len,
470 		bool need_unescape, bool need_lowercase, bool need_expand)
471 {
472 	ssize_t ret = -1, tret;
473 	unsigned char *tmp;
474 
475 	if (need_unescape || need_lowercase ||
476 			(need_expand && parser->variables != NULL) ||
477 			!(parser->flags & UCL_PARSER_ZEROCOPY)) {
478 		/* Copy string */
479 		*dst = UCL_ALLOC (in_len + 1);
480 		if (*dst == NULL) {
481 			ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err);
482 			return false;
483 		}
484 		if (need_lowercase) {
485 			ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
486 		}
487 		else {
488 			ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
489 		}
490 
491 		if (need_unescape) {
492 			ret = ucl_unescape_json_string (*dst, ret);
493 		}
494 		if (need_expand) {
495 			tmp = *dst;
496 			tret = ret;
497 			ret = ucl_expand_variable (parser, dst, tmp, ret);
498 			if (*dst == NULL) {
499 				/* Nothing to expand */
500 				*dst = tmp;
501 				ret = tret;
502 			}
503 		}
504 		*dst_const = *dst;
505 	}
506 	else {
507 		*dst_const = src;
508 		ret = in_len;
509 	}
510 
511 	return ret;
512 }
513 
514 /**
515  * Create and append an object at the specified level
516  * @param parser
517  * @param is_array
518  * @param level
519  * @return
520  */
521 static inline ucl_object_t *
522 ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level)
523 {
524 	struct ucl_stack *st;
525 
526 	if (!is_array) {
527 		if (obj == NULL) {
528 			obj = ucl_object_typed_new (UCL_OBJECT);
529 		}
530 		else {
531 			obj->type = UCL_OBJECT;
532 		}
533 		obj->value.ov = ucl_hash_create ();
534 		parser->state = UCL_STATE_KEY;
535 	}
536 	else {
537 		if (obj == NULL) {
538 			obj = ucl_object_typed_new (UCL_ARRAY);
539 		}
540 		else {
541 			obj->type = UCL_ARRAY;
542 		}
543 		parser->state = UCL_STATE_VALUE;
544 	}
545 
546 	st = UCL_ALLOC (sizeof (struct ucl_stack));
547 	if (st == NULL) {
548 		ucl_set_err (parser->chunks, 0, "cannot allocate memory for an object", &parser->err);
549 		return NULL;
550 	}
551 	st->obj = obj;
552 	st->level = level;
553 	LL_PREPEND (parser->stack, st);
554 	parser->cur_obj = obj;
555 
556 	return obj;
557 }
558 
559 int
560 ucl_maybe_parse_number (ucl_object_t *obj,
561 		const char *start, const char *end, const char **pos,
562 		bool allow_double, bool number_bytes, bool allow_time)
563 {
564 	const char *p = start, *c = start;
565 	char *endptr;
566 	bool got_dot = false, got_exp = false, need_double = false,
567 			is_time = false, valid_start = false, is_hex = false,
568 			is_neg = false;
569 	double dv = 0;
570 	int64_t lv = 0;
571 
572 	if (*p == '-') {
573 		is_neg = true;
574 		c ++;
575 		p ++;
576 	}
577 	while (p < end) {
578 		if (is_hex && isxdigit (*p)) {
579 			p ++;
580 		}
581 		else if (isdigit (*p)) {
582 			valid_start = true;
583 			p ++;
584 		}
585 		else if (!is_hex && (*p == 'x' || *p == 'X')) {
586 			is_hex = true;
587 			allow_double = false;
588 			c = p + 1;
589 		}
590 		else if (allow_double) {
591 			if (p == c) {
592 				/* Empty digits sequence, not a number */
593 				*pos = start;
594 				return EINVAL;
595 			}
596 			else if (*p == '.') {
597 				if (got_dot) {
598 					/* Double dots, not a number */
599 					*pos = start;
600 					return EINVAL;
601 				}
602 				else {
603 					got_dot = true;
604 					need_double = true;
605 					p ++;
606 				}
607 			}
608 			else if (*p == 'e' || *p == 'E') {
609 				if (got_exp) {
610 					/* Double exp, not a number */
611 					*pos = start;
612 					return EINVAL;
613 				}
614 				else {
615 					got_exp = true;
616 					need_double = true;
617 					p ++;
618 					if (p >= end) {
619 						*pos = start;
620 						return EINVAL;
621 					}
622 					if (!isdigit (*p) && *p != '+' && *p != '-') {
623 						/* Wrong exponent sign */
624 						*pos = start;
625 						return EINVAL;
626 					}
627 					else {
628 						p ++;
629 					}
630 				}
631 			}
632 			else {
633 				/* Got the end of the number, need to check */
634 				break;
635 			}
636 		}
637 		else {
638 			break;
639 		}
640 	}
641 
642 	if (!valid_start) {
643 		*pos = start;
644 		return EINVAL;
645 	}
646 
647 	errno = 0;
648 	if (need_double) {
649 		dv = strtod (c, &endptr);
650 	}
651 	else {
652 		if (is_hex) {
653 			lv = strtoimax (c, &endptr, 16);
654 		}
655 		else {
656 			lv = strtoimax (c, &endptr, 10);
657 		}
658 	}
659 	if (errno == ERANGE) {
660 		*pos = start;
661 		return ERANGE;
662 	}
663 
664 	/* Now check endptr */
665 	if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0' ||
666 			ucl_test_character (*endptr, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
667 		p = endptr;
668 		goto set_obj;
669 	}
670 
671 	if (endptr < end && endptr != start) {
672 		p = endptr;
673 		switch (*p) {
674 		case 'm':
675 		case 'M':
676 		case 'g':
677 		case 'G':
678 		case 'k':
679 		case 'K':
680 			if (end - p >= 2) {
681 				if (p[1] == 's' || p[1] == 'S') {
682 					/* Milliseconds */
683 					if (!need_double) {
684 						need_double = true;
685 						dv = lv;
686 					}
687 					is_time = true;
688 					if (p[0] == 'm' || p[0] == 'M') {
689 						dv /= 1000.;
690 					}
691 					else {
692 						dv *= ucl_lex_num_multiplier (*p, false);
693 					}
694 					p += 2;
695 					goto set_obj;
696 				}
697 				else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
698 					/* Bytes */
699 					if (need_double) {
700 						need_double = false;
701 						lv = dv;
702 					}
703 					lv *= ucl_lex_num_multiplier (*p, true);
704 					p += 2;
705 					goto set_obj;
706 				}
707 				else if (ucl_lex_is_atom_end (p[1])) {
708 					if (need_double) {
709 						dv *= ucl_lex_num_multiplier (*p, false);
710 					}
711 					else {
712 						lv *= ucl_lex_num_multiplier (*p, number_bytes);
713 					}
714 					p ++;
715 					goto set_obj;
716 				}
717 				else if (allow_time && end - p >= 3) {
718 					if (tolower (p[0]) == 'm' &&
719 							tolower (p[1]) == 'i' &&
720 							tolower (p[2]) == 'n') {
721 						/* Minutes */
722 						if (!need_double) {
723 							need_double = true;
724 							dv = lv;
725 						}
726 						is_time = true;
727 						dv *= 60.;
728 						p += 3;
729 						goto set_obj;
730 					}
731 				}
732 			}
733 			else {
734 				if (need_double) {
735 					dv *= ucl_lex_num_multiplier (*p, false);
736 				}
737 				else {
738 					lv *= ucl_lex_num_multiplier (*p, number_bytes);
739 				}
740 				p ++;
741 				goto set_obj;
742 			}
743 			break;
744 		case 'S':
745 		case 's':
746 			if (allow_time &&
747 					(p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
748 				if (!need_double) {
749 					need_double = true;
750 					dv = lv;
751 				}
752 				p ++;
753 				is_time = true;
754 				goto set_obj;
755 			}
756 			break;
757 		case 'h':
758 		case 'H':
759 		case 'd':
760 		case 'D':
761 		case 'w':
762 		case 'W':
763 		case 'Y':
764 		case 'y':
765 			if (allow_time &&
766 					(p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
767 				if (!need_double) {
768 					need_double = true;
769 					dv = lv;
770 				}
771 				is_time = true;
772 				dv *= ucl_lex_time_multiplier (*p);
773 				p ++;
774 				goto set_obj;
775 			}
776 			break;
777 		}
778 	}
779 
780 	*pos = c;
781 	return EINVAL;
782 
783 	set_obj:
784 	if (allow_double && (need_double || is_time)) {
785 		if (!is_time) {
786 			obj->type = UCL_FLOAT;
787 		}
788 		else {
789 			obj->type = UCL_TIME;
790 		}
791 		obj->value.dv = is_neg ? (-dv) : dv;
792 	}
793 	else {
794 		obj->type = UCL_INT;
795 		obj->value.iv = is_neg ? (-lv) : lv;
796 	}
797 	*pos = p;
798 	return 0;
799 }
800 
801 /**
802  * Parse possible number
803  * @param parser
804  * @param chunk
805  * @return true if a number has been parsed
806  */
807 static bool
808 ucl_lex_number (struct ucl_parser *parser,
809 		struct ucl_chunk *chunk, ucl_object_t *obj)
810 {
811 	const unsigned char *pos;
812 	int ret;
813 
814 	ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos,
815 			true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
816 
817 	if (ret == 0) {
818 		chunk->remain -= pos - chunk->pos;
819 		chunk->column += pos - chunk->pos;
820 		chunk->pos = pos;
821 		return true;
822 	}
823 	else if (ret == ERANGE) {
824 		ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err);
825 	}
826 
827 	return false;
828 }
829 
830 /**
831  * Parse quoted string with possible escapes
832  * @param parser
833  * @param chunk
834  * @return true if a string has been parsed
835  */
836 static bool
837 ucl_lex_json_string (struct ucl_parser *parser,
838 		struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
839 {
840 	const unsigned char *p = chunk->pos;
841 	unsigned char c;
842 	int i;
843 
844 	while (p < chunk->end) {
845 		c = *p;
846 		if (c < 0x1F) {
847 			/* Unmasked control character */
848 			if (c == '\n') {
849 				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err);
850 			}
851 			else {
852 				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err);
853 			}
854 			return false;
855 		}
856 		else if (c == '\\') {
857 			ucl_chunk_skipc (chunk, p);
858 			c = *p;
859 			if (p >= chunk->end) {
860 				ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
861 				return false;
862 			}
863 			else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
864 				if (c == 'u') {
865 					ucl_chunk_skipc (chunk, p);
866 					for (i = 0; i < 4 && p < chunk->end; i ++) {
867 						if (!isxdigit (*p)) {
868 							ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err);
869 							return false;
870 						}
871 						ucl_chunk_skipc (chunk, p);
872 					}
873 					if (p >= chunk->end) {
874 						ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err);
875 						return false;
876 					}
877 				}
878 				else {
879 					ucl_chunk_skipc (chunk, p);
880 				}
881 			}
882 			*need_unescape = true;
883 			*ucl_escape = true;
884 			continue;
885 		}
886 		else if (c == '"') {
887 			ucl_chunk_skipc (chunk, p);
888 			return true;
889 		}
890 		else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
891 			*ucl_escape = true;
892 		}
893 		else if (c == '$') {
894 			*var_expand = true;
895 		}
896 		ucl_chunk_skipc (chunk, p);
897 	}
898 
899 	ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err);
900 	return false;
901 }
902 
903 /**
904  * Parse a key in an object
905  * @param parser
906  * @param chunk
907  * @return true if a key has been parsed
908  */
909 static bool
910 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object)
911 {
912 	const unsigned char *p, *c = NULL, *end, *t;
913 	const char *key = NULL;
914 	bool got_quote = false, got_eq = false, got_semicolon = false,
915 			need_unescape = false, ucl_escape = false, var_expand = false,
916 			got_content = false, got_sep = false;
917 	ucl_object_t *nobj, *tobj;
918 	ucl_hash_t *container;
919 	ssize_t keylen;
920 
921 	p = chunk->pos;
922 
923 	if (*p == '.') {
924 		/* It is macro actually */
925 		ucl_chunk_skipc (chunk, p);
926 		parser->prev_state = parser->state;
927 		parser->state = UCL_STATE_MACRO_NAME;
928 		return true;
929 	}
930 	while (p < chunk->end) {
931 		/*
932 		 * A key must start with alpha, number, '/' or '_' and end with space character
933 		 */
934 		if (c == NULL) {
935 			if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
936 				if (!ucl_skip_comments (parser)) {
937 					return false;
938 				}
939 				p = chunk->pos;
940 			}
941 			else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
942 				ucl_chunk_skipc (chunk, p);
943 			}
944 			else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
945 				/* The first symbol */
946 				c = p;
947 				ucl_chunk_skipc (chunk, p);
948 				got_content = true;
949 			}
950 			else if (*p == '"') {
951 				/* JSON style key */
952 				c = p + 1;
953 				got_quote = true;
954 				got_content = true;
955 				ucl_chunk_skipc (chunk, p);
956 			}
957 			else if (*p == '}') {
958 				/* We have actually end of an object */
959 				*end_of_object = true;
960 				return true;
961 			}
962 			else if (*p == '.') {
963 				ucl_chunk_skipc (chunk, p);
964 				parser->prev_state = parser->state;
965 				parser->state = UCL_STATE_MACRO_NAME;
966 				return true;
967 			}
968 			else {
969 				/* Invalid identifier */
970 				ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err);
971 				return false;
972 			}
973 		}
974 		else {
975 			/* Parse the body of a key */
976 			if (!got_quote) {
977 				if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
978 					got_content = true;
979 					ucl_chunk_skipc (chunk, p);
980 				}
981 				else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
982 					end = p;
983 					break;
984 				}
985 				else {
986 					ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err);
987 					return false;
988 				}
989 			}
990 			else {
991 				/* We need to parse json like quoted string */
992 				if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
993 					return false;
994 				}
995 				/* Always escape keys obtained via json */
996 				end = chunk->pos - 1;
997 				p = chunk->pos;
998 				break;
999 			}
1000 		}
1001 	}
1002 
1003 	if (p >= chunk->end && got_content) {
1004 		ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1005 		return false;
1006 	}
1007 	else if (!got_content) {
1008 		return true;
1009 	}
1010 	*end_of_object = false;
1011 	/* We are now at the end of the key, need to parse the rest */
1012 	while (p < chunk->end) {
1013 		if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1014 			ucl_chunk_skipc (chunk, p);
1015 		}
1016 		else if (*p == '=') {
1017 			if (!got_eq && !got_semicolon) {
1018 				ucl_chunk_skipc (chunk, p);
1019 				got_eq = true;
1020 			}
1021 			else {
1022 				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err);
1023 				return false;
1024 			}
1025 		}
1026 		else if (*p == ':') {
1027 			if (!got_eq && !got_semicolon) {
1028 				ucl_chunk_skipc (chunk, p);
1029 				got_semicolon = true;
1030 			}
1031 			else {
1032 				ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err);
1033 				return false;
1034 			}
1035 		}
1036 		else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1037 			/* Check for comment */
1038 			if (!ucl_skip_comments (parser)) {
1039 				return false;
1040 			}
1041 			p = chunk->pos;
1042 		}
1043 		else {
1044 			/* Start value */
1045 			break;
1046 		}
1047 	}
1048 
1049 	if (p >= chunk->end && got_content) {
1050 		ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err);
1051 		return false;
1052 	}
1053 
1054 	got_sep = got_semicolon || got_eq;
1055 
1056 	if (!got_sep) {
1057 		/*
1058 		 * Maybe we have more keys nested, so search for termination character.
1059 		 * Possible choices:
1060 		 * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1061 		 * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1062 		 * 3) key1 value[;,\n] <- we treat that as linear object
1063 		 */
1064 		t = p;
1065 		*next_key = false;
1066 		while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1067 			t ++;
1068 		}
1069 		/* Check first non-space character after a key */
1070 		if (*t != '{' && *t != '[') {
1071 			while (t < chunk->end) {
1072 				if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1073 					break;
1074 				}
1075 				else if (*t == '{' || *t == '[') {
1076 					*next_key = true;
1077 					break;
1078 				}
1079 				t ++;
1080 			}
1081 		}
1082 	}
1083 
1084 	/* Create a new object */
1085 	nobj = ucl_object_new ();
1086 	keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1087 			&key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1088 	if (keylen == -1) {
1089 		ucl_object_free(nobj);
1090 		return false;
1091 	}
1092 	else if (keylen == 0) {
1093 		ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1094 		ucl_object_free(nobj);
1095 		return false;
1096 	}
1097 
1098 	container = parser->stack->obj->value.ov;
1099 	nobj->key = key;
1100 	nobj->keylen = keylen;
1101 	tobj = ucl_hash_search_obj (container, nobj);
1102 	if (tobj == NULL) {
1103 		container = ucl_hash_insert_object (container, nobj);
1104 		nobj->prev = nobj;
1105 		nobj->next = NULL;
1106 		parser->stack->obj->len ++;
1107 	}
1108 	else {
1109 		DL_APPEND (tobj, nobj);
1110 	}
1111 
1112 	if (ucl_escape) {
1113 		nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1114 	}
1115 	parser->stack->obj->value.ov = container;
1116 
1117 	parser->cur_obj = nobj;
1118 
1119 	return true;
1120 }
1121 
1122 /**
1123  * Parse a cl string
1124  * @param parser
1125  * @param chunk
1126  * @return true if a key has been parsed
1127  */
1128 static bool
1129 ucl_parse_string_value (struct ucl_parser *parser,
1130 		struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1131 {
1132 	const unsigned char *p;
1133 	enum {
1134 		UCL_BRACE_ROUND = 0,
1135 		UCL_BRACE_SQUARE,
1136 		UCL_BRACE_FIGURE
1137 	};
1138 	int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1139 
1140 	p = chunk->pos;
1141 
1142 	while (p < chunk->end) {
1143 
1144 		/* Skip pairs of figure braces */
1145 		if (*p == '{') {
1146 			braces[UCL_BRACE_FIGURE][0] ++;
1147 		}
1148 		else if (*p == '}') {
1149 			braces[UCL_BRACE_FIGURE][1] ++;
1150 			if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1151 				/* This is not a termination symbol, continue */
1152 				ucl_chunk_skipc (chunk, p);
1153 				continue;
1154 			}
1155 		}
1156 		/* Skip pairs of square braces */
1157 		else if (*p == '[') {
1158 			braces[UCL_BRACE_SQUARE][0] ++;
1159 		}
1160 		else if (*p == ']') {
1161 			braces[UCL_BRACE_SQUARE][1] ++;
1162 			if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1163 				/* This is not a termination symbol, continue */
1164 				ucl_chunk_skipc (chunk, p);
1165 				continue;
1166 			}
1167 		}
1168 		else if (*p == '$') {
1169 			*var_expand = true;
1170 		}
1171 		else if (*p == '\\') {
1172 			*need_unescape = true;
1173 			ucl_chunk_skipc (chunk, p);
1174 			if (p < chunk->end) {
1175 				ucl_chunk_skipc (chunk, p);
1176 			}
1177 			continue;
1178 		}
1179 
1180 		if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1181 			break;
1182 		}
1183 		ucl_chunk_skipc (chunk, p);
1184 	}
1185 
1186 	if (p >= chunk->end) {
1187 		ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err);
1188 		return false;
1189 	}
1190 
1191 	return true;
1192 }
1193 
1194 /**
1195  * Parse multiline string ending with \n{term}\n
1196  * @param parser
1197  * @param chunk
1198  * @param term
1199  * @param term_len
1200  * @return size of multiline string or 0 in case of error
1201  */
1202 static int
1203 ucl_parse_multiline_string (struct ucl_parser *parser,
1204 		struct ucl_chunk *chunk, const unsigned char *term,
1205 		int term_len, unsigned char const **beg,
1206 		bool *var_expand)
1207 {
1208 	const unsigned char *p, *c;
1209 	bool newline = false;
1210 	int len = 0;
1211 
1212 	p = chunk->pos;
1213 
1214 	c = p;
1215 
1216 	while (p < chunk->end) {
1217 		if (newline) {
1218 			if (chunk->end - p < term_len) {
1219 				return 0;
1220 			}
1221 			else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) {
1222 				len = p - c;
1223 				chunk->remain -= term_len;
1224 				chunk->pos = p + term_len;
1225 				chunk->column = term_len;
1226 				*beg = c;
1227 				break;
1228 			}
1229 		}
1230 		if (*p == '\n') {
1231 			newline = true;
1232 		}
1233 		else {
1234 			if (*p == '$') {
1235 				*var_expand = true;
1236 			}
1237 			newline = false;
1238 		}
1239 		ucl_chunk_skipc (chunk, p);
1240 	}
1241 
1242 	return len;
1243 }
1244 
1245 static ucl_object_t*
1246 ucl_get_value_object (struct ucl_parser *parser)
1247 {
1248 	ucl_object_t *t, *obj = NULL;
1249 
1250 	if (parser->stack->obj->type == UCL_ARRAY) {
1251 		/* Object must be allocated */
1252 		obj = ucl_object_new ();
1253 		t = parser->stack->obj->value.av;
1254 		DL_APPEND (t, obj);
1255 		parser->cur_obj = obj;
1256 		parser->stack->obj->value.av = t;
1257 		parser->stack->obj->len ++;
1258 	}
1259 	else {
1260 		/* Object has been already allocated */
1261 		obj = parser->cur_obj;
1262 	}
1263 
1264 	return obj;
1265 }
1266 
1267 /**
1268  * Handle value data
1269  * @param parser
1270  * @param chunk
1271  * @return
1272  */
1273 static bool
1274 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1275 {
1276 	const unsigned char *p, *c;
1277 	ucl_object_t *obj = NULL;
1278 	unsigned int stripped_spaces;
1279 	int str_len;
1280 	bool need_unescape = false, ucl_escape = false, var_expand = false;
1281 
1282 	p = chunk->pos;
1283 
1284 	/* Skip any spaces and comments */
1285 	if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1286 			(chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1287 		while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1288 			ucl_chunk_skipc (chunk, p);
1289 		}
1290 		if (!ucl_skip_comments (parser)) {
1291 			return false;
1292 		}
1293 		p = chunk->pos;
1294 	}
1295 
1296 	while (p < chunk->end) {
1297 		c = p;
1298 		switch (*p) {
1299 		case '"':
1300 			obj = ucl_get_value_object (parser);
1301 			ucl_chunk_skipc (chunk, p);
1302 			if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1303 				return false;
1304 			}
1305 			str_len = chunk->pos - c - 2;
1306 			obj->type = UCL_STRING;
1307 			if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE],
1308 					&obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) {
1309 				return false;
1310 			}
1311 			obj->len = str_len;
1312 			parser->state = UCL_STATE_AFTER_VALUE;
1313 			p = chunk->pos;
1314 			return true;
1315 			break;
1316 		case '{':
1317 			obj = ucl_get_value_object (parser);
1318 			/* We have a new object */
1319 			obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level);
1320 			if (obj == NULL) {
1321 				return false;
1322 			}
1323 
1324 			ucl_chunk_skipc (chunk, p);
1325 			return true;
1326 			break;
1327 		case '[':
1328 			obj = ucl_get_value_object (parser);
1329 			/* We have a new array */
1330 			obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level);
1331 			if (obj == NULL) {
1332 				return false;
1333 			}
1334 
1335 			ucl_chunk_skipc (chunk, p);
1336 			return true;
1337 			break;
1338 		case ']':
1339 			/* We have the array ending */
1340 			if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1341 				parser->state = UCL_STATE_AFTER_VALUE;
1342 				return true;
1343 			}
1344 			else {
1345 				goto parse_string;
1346 			}
1347 			break;
1348 		case '<':
1349 			obj = ucl_get_value_object (parser);
1350 			/* We have something like multiline value, which must be <<[A-Z]+\n */
1351 			if (chunk->end - p > 3) {
1352 				if (memcmp (p, "<<", 2) == 0) {
1353 					p += 2;
1354 					/* We allow only uppercase characters in multiline definitions */
1355 					while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1356 						p ++;
1357 					}
1358 					if (*p =='\n') {
1359 						/* Set chunk positions and start multiline parsing */
1360 						c += 2;
1361 						chunk->remain -= p - c;
1362 						chunk->pos = p + 1;
1363 						chunk->column = 0;
1364 						chunk->line ++;
1365 						if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1366 								p - c, &c, &var_expand)) == 0) {
1367 							ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err);
1368 							return false;
1369 						}
1370 						obj->type = UCL_STRING;
1371 						if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1372 							&obj->value.sv, str_len - 1, false, false, var_expand)) == -1) {
1373 							return false;
1374 						}
1375 						obj->len = str_len;
1376 						parser->state = UCL_STATE_AFTER_VALUE;
1377 						return true;
1378 					}
1379 				}
1380 			}
1381 			/* Fallback to ordinary strings */
1382 		default:
1383 parse_string:
1384 			if (obj == NULL) {
1385 				obj = ucl_get_value_object (parser);
1386 			}
1387 			/* Parse atom */
1388 			if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1389 				if (!ucl_lex_number (parser, chunk, obj)) {
1390 					if (parser->state == UCL_STATE_ERROR) {
1391 						return false;
1392 					}
1393 				}
1394 				else {
1395 					parser->state = UCL_STATE_AFTER_VALUE;
1396 					return true;
1397 				}
1398 				/* Fallback to normal string */
1399 			}
1400 
1401 			if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) {
1402 				return false;
1403 			}
1404 			/* Cut trailing spaces */
1405 			stripped_spaces = 0;
1406 			while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1407 					UCL_CHARACTER_WHITESPACE)) {
1408 				stripped_spaces ++;
1409 			}
1410 			str_len = chunk->pos - c - stripped_spaces;
1411 			if (str_len <= 0) {
1412 				ucl_set_err (chunk, 0, "string value must not be empty", &parser->err);
1413 				return false;
1414 			}
1415 			else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1416 				obj->len = 0;
1417 				obj->type = UCL_NULL;
1418 			}
1419 			else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1420 				obj->type = UCL_STRING;
1421 				if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1422 						&obj->value.sv, str_len, need_unescape,
1423 						false, var_expand)) == -1) {
1424 					return false;
1425 				}
1426 				obj->len = str_len;
1427 			}
1428 			parser->state = UCL_STATE_AFTER_VALUE;
1429 			p = chunk->pos;
1430 
1431 			return true;
1432 			break;
1433 		}
1434 	}
1435 
1436 	return true;
1437 }
1438 
1439 /**
1440  * Handle after value data
1441  * @param parser
1442  * @param chunk
1443  * @return
1444  */
1445 static bool
1446 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1447 {
1448 	const unsigned char *p;
1449 	bool got_sep = false;
1450 	struct ucl_stack *st;
1451 
1452 	p = chunk->pos;
1453 
1454 	while (p < chunk->end) {
1455 		if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1456 			/* Skip whitespaces */
1457 			ucl_chunk_skipc (chunk, p);
1458 		}
1459 		else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1460 			/* Skip comment */
1461 			if (!ucl_skip_comments (parser)) {
1462 				return false;
1463 			}
1464 			/* Treat comment as a separator */
1465 			got_sep = true;
1466 			p = chunk->pos;
1467 		}
1468 		else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1469 			if (*p == '}' || *p == ']') {
1470 				if (parser->stack == NULL) {
1471 					ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err);
1472 					return false;
1473 				}
1474 				if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1475 						(*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1476 
1477 					/* Pop all nested objects from a stack */
1478 					st = parser->stack;
1479 					parser->stack = st->next;
1480 					UCL_FREE (sizeof (struct ucl_stack), st);
1481 
1482 					while (parser->stack != NULL) {
1483 						st = parser->stack;
1484 						if (st->next == NULL || st->next->level == st->level) {
1485 							break;
1486 						}
1487 						parser->stack = st->next;
1488 						UCL_FREE (sizeof (struct ucl_stack), st);
1489 					}
1490 				}
1491 				else {
1492 					ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err);
1493 					return false;
1494 				}
1495 
1496 				if (parser->stack == NULL) {
1497 					/* Ignore everything after a top object */
1498 					return true;
1499 				}
1500 				else {
1501 					ucl_chunk_skipc (chunk, p);
1502 				}
1503 				got_sep = true;
1504 			}
1505 			else {
1506 				/* Got a separator */
1507 				got_sep = true;
1508 				ucl_chunk_skipc (chunk, p);
1509 			}
1510 		}
1511 		else {
1512 			/* Anything else */
1513 			if (!got_sep) {
1514 				ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err);
1515 				return false;
1516 			}
1517 			return true;
1518 		}
1519 	}
1520 
1521 	return true;
1522 }
1523 
1524 /**
1525  * Handle macro data
1526  * @param parser
1527  * @param chunk
1528  * @return
1529  */
1530 static bool
1531 ucl_parse_macro_value (struct ucl_parser *parser,
1532 		struct ucl_chunk *chunk, struct ucl_macro *macro,
1533 		unsigned char const **macro_start, size_t *macro_len)
1534 {
1535 	const unsigned char *p, *c;
1536 	bool need_unescape = false, ucl_escape = false, var_expand = false;
1537 
1538 	p = chunk->pos;
1539 
1540 	switch (*p) {
1541 	case '"':
1542 		/* We have macro value encoded in quotes */
1543 		c = p;
1544 		ucl_chunk_skipc (chunk, p);
1545 		if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1546 			return false;
1547 		}
1548 
1549 		*macro_start = c + 1;
1550 		*macro_len = chunk->pos - c - 2;
1551 		p = chunk->pos;
1552 		break;
1553 	case '{':
1554 		/* We got a multiline macro body */
1555 		ucl_chunk_skipc (chunk, p);
1556 		/* Skip spaces at the beginning */
1557 		while (p < chunk->end) {
1558 			if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1559 				ucl_chunk_skipc (chunk, p);
1560 			}
1561 			else {
1562 				break;
1563 			}
1564 		}
1565 		c = p;
1566 		while (p < chunk->end) {
1567 			if (*p == '}') {
1568 				break;
1569 			}
1570 			ucl_chunk_skipc (chunk, p);
1571 		}
1572 		*macro_start = c;
1573 		*macro_len = p - c;
1574 		ucl_chunk_skipc (chunk, p);
1575 		break;
1576 	default:
1577 		/* Macro is not enclosed in quotes or braces */
1578 		c = p;
1579 		while (p < chunk->end) {
1580 			if (ucl_lex_is_atom_end (*p)) {
1581 				break;
1582 			}
1583 			ucl_chunk_skipc (chunk, p);
1584 		}
1585 		*macro_start = c;
1586 		*macro_len = p - c;
1587 		break;
1588 	}
1589 
1590 	/* We are at the end of a macro */
1591 	/* Skip ';' and space characters and return to previous state */
1592 	while (p < chunk->end) {
1593 		if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
1594 			break;
1595 		}
1596 		ucl_chunk_skipc (chunk, p);
1597 	}
1598 	return true;
1599 }
1600 
1601 /**
1602  * Handle the main states of rcl parser
1603  * @param parser parser structure
1604  * @param data the pointer to the beginning of a chunk
1605  * @param len the length of a chunk
1606  * @return true if chunk has been parsed and false in case of error
1607  */
1608 static bool
1609 ucl_state_machine (struct ucl_parser *parser)
1610 {
1611 	ucl_object_t *obj;
1612 	struct ucl_chunk *chunk = parser->chunks;
1613 	const unsigned char *p, *c = NULL, *macro_start = NULL;
1614 	unsigned char *macro_escaped;
1615 	size_t macro_len = 0;
1616 	struct ucl_macro *macro = NULL;
1617 	bool next_key = false, end_of_object = false;
1618 
1619 	if (parser->top_obj == NULL) {
1620 		if (*chunk->pos == '[') {
1621 			obj = ucl_add_parser_stack (NULL, parser, true, 0);
1622 		}
1623 		else {
1624 			obj = ucl_add_parser_stack (NULL, parser, false, 0);
1625 		}
1626 		if (obj == NULL) {
1627 			return false;
1628 		}
1629 		parser->top_obj = obj;
1630 		parser->cur_obj = obj;
1631 		parser->state = UCL_STATE_INIT;
1632 	}
1633 
1634 	p = chunk->pos;
1635 	while (chunk->pos < chunk->end) {
1636 		switch (parser->state) {
1637 		case UCL_STATE_INIT:
1638 			/*
1639 			 * At the init state we can either go to the parse array or object
1640 			 * if we got [ or { correspondingly or can just treat new data as
1641 			 * a key of newly created object
1642 			 */
1643 			obj = parser->cur_obj;
1644 			if (!ucl_skip_comments (parser)) {
1645 				parser->prev_state = parser->state;
1646 				parser->state = UCL_STATE_ERROR;
1647 				return false;
1648 			}
1649 			else {
1650 				p = chunk->pos;
1651 				if (*p == '[') {
1652 					parser->state = UCL_STATE_VALUE;
1653 					ucl_chunk_skipc (chunk, p);
1654 				}
1655 				else {
1656 					parser->state = UCL_STATE_KEY;
1657 					if (*p == '{') {
1658 						ucl_chunk_skipc (chunk, p);
1659 					}
1660 				}
1661 			}
1662 			break;
1663 		case UCL_STATE_KEY:
1664 			/* Skip any spaces */
1665 			while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1666 				ucl_chunk_skipc (chunk, p);
1667 			}
1668 			if (*p == '}') {
1669 				/* We have the end of an object */
1670 				parser->state = UCL_STATE_AFTER_VALUE;
1671 				continue;
1672 			}
1673 			if (parser->stack == NULL) {
1674 				/* No objects are on stack, but we want to parse a key */
1675 				ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser "
1676 						"expects a key", &parser->err);
1677 				parser->prev_state = parser->state;
1678 				parser->state = UCL_STATE_ERROR;
1679 				return false;
1680 			}
1681 			if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
1682 				parser->prev_state = parser->state;
1683 				parser->state = UCL_STATE_ERROR;
1684 				return false;
1685 			}
1686 			if (end_of_object) {
1687 				p = chunk->pos;
1688 				parser->state = UCL_STATE_AFTER_VALUE;
1689 				continue;
1690 			}
1691 			else if (parser->state != UCL_STATE_MACRO_NAME) {
1692 				if (next_key && parser->stack->obj->type == UCL_OBJECT) {
1693 					/* Parse more keys and nest objects accordingly */
1694 					obj = ucl_add_parser_stack (parser->cur_obj, parser, false,
1695 							parser->stack->level + 1);
1696 					if (obj == NULL) {
1697 						return false;
1698 					}
1699 				}
1700 				else {
1701 					parser->state = UCL_STATE_VALUE;
1702 				}
1703 			}
1704 			else {
1705 				c = chunk->pos;
1706 			}
1707 			p = chunk->pos;
1708 			break;
1709 		case UCL_STATE_VALUE:
1710 			/* We need to check what we do have */
1711 			if (!ucl_parse_value (parser, chunk)) {
1712 				parser->prev_state = parser->state;
1713 				parser->state = UCL_STATE_ERROR;
1714 				return false;
1715 			}
1716 			/* State is set in ucl_parse_value call */
1717 			p = chunk->pos;
1718 			break;
1719 		case UCL_STATE_AFTER_VALUE:
1720 			if (!ucl_parse_after_value (parser, chunk)) {
1721 				parser->prev_state = parser->state;
1722 				parser->state = UCL_STATE_ERROR;
1723 				return false;
1724 			}
1725 			if (parser->stack != NULL) {
1726 				if (parser->stack->obj->type == UCL_OBJECT) {
1727 					parser->state = UCL_STATE_KEY;
1728 				}
1729 				else {
1730 					/* Array */
1731 					parser->state = UCL_STATE_VALUE;
1732 				}
1733 			}
1734 			else {
1735 				/* Skip everything at the end */
1736 				return true;
1737 			}
1738 			p = chunk->pos;
1739 			break;
1740 		case UCL_STATE_MACRO_NAME:
1741 			if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1742 				ucl_chunk_skipc (chunk, p);
1743 			}
1744 			else if (p - c > 0) {
1745 				/* We got macro name */
1746 				macro_len = (size_t)(p - c);
1747 				HASH_FIND (hh, parser->macroes, c, macro_len, macro);
1748 				if (macro == NULL) {
1749 					ucl_create_err (&parser->err, "error on line %d at column %d: "
1750 							"unknown macro: '%.*s', character: '%c'",
1751 								chunk->line, chunk->column, (int)(p - c), c, *chunk->pos);
1752 					parser->state = UCL_STATE_ERROR;
1753 					return false;
1754 				}
1755 				/* Now we need to skip all spaces */
1756 				while (p < chunk->end) {
1757 					if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1758 						if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1759 							/* Skip comment */
1760 							if (!ucl_skip_comments (parser)) {
1761 								return false;
1762 							}
1763 							p = chunk->pos;
1764 						}
1765 						break;
1766 					}
1767 					ucl_chunk_skipc (chunk, p);
1768 				}
1769 				parser->state = UCL_STATE_MACRO;
1770 			}
1771 			break;
1772 		case UCL_STATE_MACRO:
1773 			if (!ucl_parse_macro_value (parser, chunk, macro,
1774 					&macro_start, &macro_len)) {
1775 				parser->prev_state = parser->state;
1776 				parser->state = UCL_STATE_ERROR;
1777 				return false;
1778 			}
1779 			macro_len = ucl_expand_variable (parser, &macro_escaped, macro_start, macro_len);
1780 			parser->state = parser->prev_state;
1781 			if (macro_escaped == NULL) {
1782 				if (!macro->handler (macro_start, macro_len, macro->ud)) {
1783 					return false;
1784 				}
1785 			}
1786 			else {
1787 				if (!macro->handler (macro_escaped, macro_len, macro->ud)) {
1788 					UCL_FREE (macro_len + 1, macro_escaped);
1789 					return false;
1790 				}
1791 				UCL_FREE (macro_len + 1, macro_escaped);
1792 			}
1793 			p = chunk->pos;
1794 			break;
1795 		default:
1796 			/* TODO: add all states */
1797 			ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err);
1798 			parser->state = UCL_STATE_ERROR;
1799 			return false;
1800 		}
1801 	}
1802 
1803 	return true;
1804 }
1805 
1806 struct ucl_parser*
1807 ucl_parser_new (int flags)
1808 {
1809 	struct ucl_parser *new;
1810 
1811 	new = UCL_ALLOC (sizeof (struct ucl_parser));
1812 	if (new == NULL) {
1813 		return NULL;
1814 	}
1815 	memset (new, 0, sizeof (struct ucl_parser));
1816 
1817 	ucl_parser_register_macro (new, "include", ucl_include_handler, new);
1818 	ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new);
1819 	ucl_parser_register_macro (new, "includes", ucl_includes_handler, new);
1820 
1821 	new->flags = flags;
1822 
1823 	/* Initial assumption about filevars */
1824 	ucl_parser_set_filevars (new, NULL, false);
1825 
1826 	return new;
1827 }
1828 
1829 
1830 void
1831 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
1832 		ucl_macro_handler handler, void* ud)
1833 {
1834 	struct ucl_macro *new;
1835 
1836 	if (macro == NULL || handler == NULL) {
1837 		return;
1838 	}
1839 	new = UCL_ALLOC (sizeof (struct ucl_macro));
1840 	if (new == NULL) {
1841 		return;
1842 	}
1843 	memset (new, 0, sizeof (struct ucl_macro));
1844 	new->handler = handler;
1845 	new->name = strdup (macro);
1846 	new->ud = ud;
1847 	HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
1848 }
1849 
1850 void
1851 ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
1852 		const char *value)
1853 {
1854 	struct ucl_variable *new = NULL, *cur;
1855 
1856 	if (var == NULL) {
1857 		return;
1858 	}
1859 
1860 	/* Find whether a variable already exists */
1861 	LL_FOREACH (parser->variables, cur) {
1862 		if (strcmp (cur->var, var) == 0) {
1863 			new = cur;
1864 			break;
1865 		}
1866 	}
1867 
1868 	if (value == NULL) {
1869 
1870 		if (new != NULL) {
1871 			/* Remove variable */
1872 			LL_DELETE (parser->variables, new);
1873 			free (new->var);
1874 			free (new->value);
1875 			UCL_FREE (sizeof (struct ucl_variable), new);
1876 		}
1877 		else {
1878 			/* Do nothing */
1879 			return;
1880 		}
1881 	}
1882 	else {
1883 		if (new == NULL) {
1884 			new = UCL_ALLOC (sizeof (struct ucl_variable));
1885 			if (new == NULL) {
1886 				return;
1887 			}
1888 			memset (new, 0, sizeof (struct ucl_variable));
1889 			new->var = strdup (var);
1890 			new->var_len = strlen (var);
1891 			new->value = strdup (value);
1892 			new->value_len = strlen (value);
1893 
1894 			LL_PREPEND (parser->variables, new);
1895 		}
1896 		else {
1897 			free (new->value);
1898 			new->value = strdup (value);
1899 			new->value_len = strlen (value);
1900 		}
1901 	}
1902 }
1903 
1904 bool
1905 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
1906 		size_t len)
1907 {
1908 	struct ucl_chunk *chunk;
1909 
1910 	if (data == NULL || len == 0) {
1911 		ucl_create_err (&parser->err, "invalid chunk added");
1912 		return false;
1913 	}
1914 	if (parser->state != UCL_STATE_ERROR) {
1915 		chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
1916 		if (chunk == NULL) {
1917 			ucl_create_err (&parser->err, "cannot allocate chunk structure");
1918 			return false;
1919 		}
1920 		chunk->begin = data;
1921 		chunk->remain = len;
1922 		chunk->pos = chunk->begin;
1923 		chunk->end = chunk->begin + len;
1924 		chunk->line = 1;
1925 		chunk->column = 0;
1926 		LL_PREPEND (parser->chunks, chunk);
1927 		parser->recursion ++;
1928 		if (parser->recursion > UCL_MAX_RECURSION) {
1929 			ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
1930 					parser->recursion);
1931 			return false;
1932 		}
1933 		return ucl_state_machine (parser);
1934 	}
1935 
1936 	ucl_create_err (&parser->err, "a parser is in an invalid state");
1937 
1938 	return false;
1939 }
1940 
1941 bool
1942 ucl_parser_add_string (struct ucl_parser *parser, const char *data,
1943 		size_t len)
1944 {
1945 	if (data == NULL) {
1946 		ucl_create_err (&parser->err, "invalid string added");
1947 		return false;
1948 	}
1949 	if (len == 0) {
1950 		len = strlen (data);
1951 	}
1952 
1953 	return ucl_parser_add_chunk (parser, (const unsigned char *)data, len);
1954 }
1955