xref: /freebsd/contrib/libucl/src/ucl_parser.c (revision ec0e626bafb335b30c499d06066997f54b10c092)
1 /* Copyright (c) 2013, Vsevolod Stakhov
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *       * Redistributions of source code must retain the above copyright
7  *         notice, this list of conditions and the following disclaimer.
8  *       * Redistributions in binary form must reproduce the above copyright
9  *         notice, this list of conditions and the following disclaimer in the
10  *         documentation and/or other materials provided with the distribution.
11  *
12  * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15  * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22  */
23 
24 #include "ucl.h"
25 #include "ucl_internal.h"
26 #include "ucl_chartable.h"
27 
28 /**
29  * @file ucl_parser.c
30  * The implementation of ucl parser
31  */
32 
33 struct ucl_parser_saved_state {
34 	unsigned int line;
35 	unsigned int column;
36 	size_t remain;
37 	const unsigned char *pos;
38 };
39 
40 /**
41  * Move up to len characters
42  * @param parser
43  * @param begin
44  * @param len
45  * @return new position in chunk
46  */
47 #define ucl_chunk_skipc(chunk, p)    do{					\
48     if (*(p) == '\n') {										\
49         (chunk)->line ++;									\
50         (chunk)->column = 0;								\
51     }														\
52     else (chunk)->column ++;								\
53     (p++);													\
54     (chunk)->pos ++;										\
55     (chunk)->remain --;										\
56     } while (0)
57 
58 static inline void
59 ucl_set_err (struct ucl_parser *parser, int code, const char *str, UT_string **err)
60 {
61 	const char *fmt_string, *filename;
62 	struct ucl_chunk *chunk = parser->chunks;
63 
64 	if (parser->cur_file) {
65 		filename = parser->cur_file;
66 	}
67 	else {
68 		filename = "<unknown>";
69 	}
70 	if (chunk->pos < chunk->end) {
71 		if (isgraph (*chunk->pos)) {
72 			fmt_string = "error while parsing %s: "
73 					"line: %d, column: %d - '%s', character: '%c'";
74 		}
75 		else {
76 			fmt_string = "error while parsing %s: "
77 					"line: %d, column: %d - '%s', character: '0x%02x'";
78 		}
79 		ucl_create_err (err, fmt_string,
80 			filename, chunk->line, chunk->column,
81 			str, *chunk->pos);
82 	}
83 	else {
84 		ucl_create_err (err, "error while parsing %s: at the end of chunk: %s",
85 			filename, str);
86 	}
87 }
88 
89 /**
90  * Skip all comments from the current pos resolving nested and multiline comments
91  * @param parser
92  * @return
93  */
94 static bool
95 ucl_skip_comments (struct ucl_parser *parser)
96 {
97 	struct ucl_chunk *chunk = parser->chunks;
98 	const unsigned char *p;
99 	int comments_nested = 0;
100 	bool quoted = false;
101 
102 	p = chunk->pos;
103 
104 start:
105 	if (chunk->remain > 0 && *p == '#') {
106 		if (parser->state != UCL_STATE_SCOMMENT &&
107 				parser->state != UCL_STATE_MCOMMENT) {
108 			while (p < chunk->end) {
109 				if (*p == '\n') {
110 					ucl_chunk_skipc (chunk, p);
111 					goto start;
112 				}
113 				ucl_chunk_skipc (chunk, p);
114 			}
115 		}
116 	}
117 	else if (chunk->remain >= 2 && *p == '/') {
118 		if (p[1] == '*') {
119 			ucl_chunk_skipc (chunk, p);
120 			comments_nested ++;
121 			ucl_chunk_skipc (chunk, p);
122 
123 			while (p < chunk->end) {
124 				if (*p == '"' && *(p - 1) != '\\') {
125 					quoted = !quoted;
126 				}
127 
128 				if (!quoted) {
129 					if (*p == '*') {
130 						ucl_chunk_skipc (chunk, p);
131 						if (*p == '/') {
132 							comments_nested --;
133 							if (comments_nested == 0) {
134 								ucl_chunk_skipc (chunk, p);
135 								goto start;
136 							}
137 						}
138 						ucl_chunk_skipc (chunk, p);
139 					}
140 					else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
141 						comments_nested ++;
142 						ucl_chunk_skipc (chunk, p);
143 						ucl_chunk_skipc (chunk, p);
144 						continue;
145 					}
146 				}
147 				ucl_chunk_skipc (chunk, p);
148 			}
149 			if (comments_nested != 0) {
150 				ucl_set_err (parser, UCL_ENESTED,
151 						"unfinished multiline comment", &parser->err);
152 				return false;
153 			}
154 		}
155 	}
156 
157 	return true;
158 }
159 
160 /**
161  * Return multiplier for a character
162  * @param c multiplier character
163  * @param is_bytes if true use 1024 multiplier
164  * @return multiplier
165  */
166 static inline unsigned long
167 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
168 	const struct {
169 		char c;
170 		long mult_normal;
171 		long mult_bytes;
172 	} multipliers[] = {
173 			{'m', 1000 * 1000, 1024 * 1024},
174 			{'k', 1000, 1024},
175 			{'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
176 	};
177 	int i;
178 
179 	for (i = 0; i < 3; i ++) {
180 		if (tolower (c) == multipliers[i].c) {
181 			if (is_bytes) {
182 				return multipliers[i].mult_bytes;
183 			}
184 			return multipliers[i].mult_normal;
185 		}
186 	}
187 
188 	return 1;
189 }
190 
191 
192 /**
193  * Return multiplier for time scaling
194  * @param c
195  * @return
196  */
197 static inline double
198 ucl_lex_time_multiplier (const unsigned char c) {
199 	const struct {
200 		char c;
201 		double mult;
202 	} multipliers[] = {
203 			{'m', 60},
204 			{'h', 60 * 60},
205 			{'d', 60 * 60 * 24},
206 			{'w', 60 * 60 * 24 * 7},
207 			{'y', 60 * 60 * 24 * 7 * 365}
208 	};
209 	int i;
210 
211 	for (i = 0; i < 5; i ++) {
212 		if (tolower (c) == multipliers[i].c) {
213 			return multipliers[i].mult;
214 		}
215 	}
216 
217 	return 1;
218 }
219 
220 /**
221  * Return true if a character is a end of an atom
222  * @param c
223  * @return
224  */
225 static inline bool
226 ucl_lex_is_atom_end (const unsigned char c)
227 {
228 	return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
229 }
230 
231 static inline bool
232 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
233 {
234 	if (c1 == '/') {
235 		if (c2 == '*') {
236 			return true;
237 		}
238 	}
239 	else if (c1 == '#') {
240 		return true;
241 	}
242 	return false;
243 }
244 
245 /**
246  * Check variable found
247  * @param parser
248  * @param ptr
249  * @param remain
250  * @param out_len
251  * @param strict
252  * @param found
253  * @return
254  */
255 static inline const char *
256 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
257 		size_t *out_len, bool strict, bool *found)
258 {
259 	struct ucl_variable *var;
260 	unsigned char *dst;
261 	size_t dstlen;
262 	bool need_free = false;
263 
264 	LL_FOREACH (parser->variables, var) {
265 		if (strict) {
266 			if (remain == var->var_len) {
267 				if (memcmp (ptr, var->var, var->var_len) == 0) {
268 					*out_len += var->value_len;
269 					*found = true;
270 					return (ptr + var->var_len);
271 				}
272 			}
273 		}
274 		else {
275 			if (remain >= var->var_len) {
276 				if (memcmp (ptr, var->var, var->var_len) == 0) {
277 					*out_len += var->value_len;
278 					*found = true;
279 					return (ptr + var->var_len);
280 				}
281 			}
282 		}
283 	}
284 
285 	/* XXX: can only handle ${VAR} */
286 	if (!(*found) && parser->var_handler != NULL && strict) {
287 		/* Call generic handler */
288 		if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
289 				parser->var_data)) {
290 			*found = true;
291 			if (need_free) {
292 				free (dst);
293 			}
294 			return (ptr + remain);
295 		}
296 	}
297 
298 	return ptr;
299 }
300 
301 /**
302  * Check for a variable in a given string
303  * @param parser
304  * @param ptr
305  * @param remain
306  * @param out_len
307  * @param vars_found
308  * @return
309  */
310 static const char *
311 ucl_check_variable (struct ucl_parser *parser, const char *ptr,
312 		size_t remain, size_t *out_len, bool *vars_found)
313 {
314 	const char *p, *end, *ret = ptr;
315 	bool found = false;
316 
317 	if (*ptr == '{') {
318 		/* We need to match the variable enclosed in braces */
319 		p = ptr + 1;
320 		end = ptr + remain;
321 		while (p < end) {
322 			if (*p == '}') {
323 				ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1,
324 						out_len, true, &found);
325 				if (found) {
326 					/* {} must be excluded actually */
327 					ret ++;
328 					if (!*vars_found) {
329 						*vars_found = true;
330 					}
331 				}
332 				else {
333 					*out_len += 2;
334 				}
335 				break;
336 			}
337 			p ++;
338 		}
339 	}
340 	else if (*ptr != '$') {
341 		/* Not count escaped dollar sign */
342 		ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
343 		if (found && !*vars_found) {
344 			*vars_found = true;
345 		}
346 		if (!found) {
347 			(*out_len) ++;
348 		}
349 	}
350 	else {
351 		ret ++;
352 		(*out_len) ++;
353 	}
354 
355 	return ret;
356 }
357 
358 /**
359  * Expand a single variable
360  * @param parser
361  * @param ptr
362  * @param remain
363  * @param dest
364  * @return
365  */
366 static const char *
367 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
368 		size_t remain, unsigned char **dest)
369 {
370 	unsigned char *d = *dest, *dst;
371 	const char *p = ptr + 1, *ret;
372 	struct ucl_variable *var;
373 	size_t dstlen;
374 	bool need_free = false;
375 	bool found = false;
376 	bool strict = false;
377 
378 	ret = ptr + 1;
379 	remain --;
380 
381 	if (*p == '$') {
382 		*d++ = *p++;
383 		*dest = d;
384 		return p;
385 	}
386 	else if (*p == '{') {
387 		p ++;
388 		strict = true;
389 		ret += 2;
390 		remain -= 2;
391 	}
392 
393 	LL_FOREACH (parser->variables, var) {
394 		if (remain >= var->var_len) {
395 			if (memcmp (p, var->var, var->var_len) == 0) {
396 				memcpy (d, var->value, var->value_len);
397 				ret += var->var_len;
398 				d += var->value_len;
399 				found = true;
400 				break;
401 			}
402 		}
403 	}
404 	if (!found) {
405 		if (strict && parser->var_handler != NULL) {
406 			if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
407 							parser->var_data)) {
408 				memcpy (d, dst, dstlen);
409 				ret += dstlen;
410 				d += remain;
411 				found = true;
412 			}
413 		}
414 
415 		/* Leave variable as is */
416 		if (!found) {
417 			if (strict) {
418 				/* Copy '${' */
419 				memcpy (d, ptr, 2);
420 				d += 2;
421 				ret --;
422 			}
423 			else {
424 				memcpy (d, ptr, 1);
425 				d ++;
426 			}
427 		}
428 	}
429 
430 	*dest = d;
431 	return ret;
432 }
433 
434 /**
435  * Expand variables in string
436  * @param parser
437  * @param dst
438  * @param src
439  * @param in_len
440  * @return
441  */
442 static ssize_t
443 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
444 		const char *src, size_t in_len)
445 {
446 	const char *p, *end = src + in_len;
447 	unsigned char *d;
448 	size_t out_len = 0;
449 	bool vars_found = false;
450 
451 	p = src;
452 	while (p != end) {
453 		if (*p == '$') {
454 			p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
455 		}
456 		else {
457 			p ++;
458 			out_len ++;
459 		}
460 	}
461 
462 	if (!vars_found) {
463 		/* Trivial case */
464 		*dst = NULL;
465 		return in_len;
466 	}
467 
468 	*dst = UCL_ALLOC (out_len + 1);
469 	if (*dst == NULL) {
470 		return in_len;
471 	}
472 
473 	d = *dst;
474 	p = src;
475 	while (p != end) {
476 		if (*p == '$') {
477 			p = ucl_expand_single_variable (parser, p, end - p, &d);
478 		}
479 		else {
480 			*d++ = *p++;
481 		}
482 	}
483 
484 	*d = '\0';
485 
486 	return out_len;
487 }
488 
489 /**
490  * Store or copy pointer to the trash stack
491  * @param parser parser object
492  * @param src src string
493  * @param dst destination buffer (trash stack pointer)
494  * @param dst_const const destination pointer (e.g. value of object)
495  * @param in_len input length
496  * @param need_unescape need to unescape source (and copy it)
497  * @param need_lowercase need to lowercase value (and copy)
498  * @param need_expand need to expand variables (and copy as well)
499  * @return output length (excluding \0 symbol)
500  */
501 static inline ssize_t
502 ucl_copy_or_store_ptr (struct ucl_parser *parser,
503 		const unsigned char *src, unsigned char **dst,
504 		const char **dst_const, size_t in_len,
505 		bool need_unescape, bool need_lowercase, bool need_expand)
506 {
507 	ssize_t ret = -1, tret;
508 	unsigned char *tmp;
509 
510 	if (need_unescape || need_lowercase ||
511 			(need_expand && parser->variables != NULL) ||
512 			!(parser->flags & UCL_PARSER_ZEROCOPY)) {
513 		/* Copy string */
514 		*dst = UCL_ALLOC (in_len + 1);
515 		if (*dst == NULL) {
516 			ucl_set_err (parser, 0, "cannot allocate memory for a string",
517 					&parser->err);
518 			return false;
519 		}
520 		if (need_lowercase) {
521 			ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
522 		}
523 		else {
524 			ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
525 		}
526 
527 		if (need_unescape) {
528 			ret = ucl_unescape_json_string (*dst, ret);
529 		}
530 		if (need_expand) {
531 			tmp = *dst;
532 			tret = ret;
533 			ret = ucl_expand_variable (parser, dst, tmp, ret);
534 			if (*dst == NULL) {
535 				/* Nothing to expand */
536 				*dst = tmp;
537 				ret = tret;
538 			}
539 			else {
540 				/* Free unexpanded value */
541 				UCL_FREE (in_len + 1, tmp);
542 			}
543 		}
544 		*dst_const = *dst;
545 	}
546 	else {
547 		*dst_const = src;
548 		ret = in_len;
549 	}
550 
551 	return ret;
552 }
553 
554 /**
555  * Create and append an object at the specified level
556  * @param parser
557  * @param is_array
558  * @param level
559  * @return
560  */
561 static inline ucl_object_t *
562 ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level)
563 {
564 	struct ucl_stack *st;
565 
566 	if (!is_array) {
567 		if (obj == NULL) {
568 			obj = ucl_object_new_full (UCL_OBJECT, parser->chunks->priority);
569 		}
570 		else {
571 			obj->type = UCL_OBJECT;
572 		}
573 		obj->value.ov = ucl_hash_create (parser->flags & UCL_PARSER_KEY_LOWERCASE);
574 		parser->state = UCL_STATE_KEY;
575 	}
576 	else {
577 		if (obj == NULL) {
578 			obj = ucl_object_new_full (UCL_ARRAY, parser->chunks->priority);
579 		}
580 		else {
581 			obj->type = UCL_ARRAY;
582 		}
583 		parser->state = UCL_STATE_VALUE;
584 	}
585 
586 	st = UCL_ALLOC (sizeof (struct ucl_stack));
587 	if (st == NULL) {
588 		ucl_set_err (parser, 0, "cannot allocate memory for an object",
589 				&parser->err);
590 		ucl_object_unref (obj);
591 		return NULL;
592 	}
593 	st->obj = obj;
594 	st->level = level;
595 	LL_PREPEND (parser->stack, st);
596 	parser->cur_obj = obj;
597 
598 	return obj;
599 }
600 
601 int
602 ucl_maybe_parse_number (ucl_object_t *obj,
603 		const char *start, const char *end, const char **pos,
604 		bool allow_double, bool number_bytes, bool allow_time)
605 {
606 	const char *p = start, *c = start;
607 	char *endptr;
608 	bool got_dot = false, got_exp = false, need_double = false,
609 			is_time = false, valid_start = false, is_hex = false,
610 			is_neg = false;
611 	double dv = 0;
612 	int64_t lv = 0;
613 
614 	if (*p == '-') {
615 		is_neg = true;
616 		c ++;
617 		p ++;
618 	}
619 	while (p < end) {
620 		if (is_hex && isxdigit (*p)) {
621 			p ++;
622 		}
623 		else if (isdigit (*p)) {
624 			valid_start = true;
625 			p ++;
626 		}
627 		else if (!is_hex && (*p == 'x' || *p == 'X')) {
628 			is_hex = true;
629 			allow_double = false;
630 			c = p + 1;
631 		}
632 		else if (allow_double) {
633 			if (p == c) {
634 				/* Empty digits sequence, not a number */
635 				*pos = start;
636 				return EINVAL;
637 			}
638 			else if (*p == '.') {
639 				if (got_dot) {
640 					/* Double dots, not a number */
641 					*pos = start;
642 					return EINVAL;
643 				}
644 				else {
645 					got_dot = true;
646 					need_double = true;
647 					p ++;
648 				}
649 			}
650 			else if (*p == 'e' || *p == 'E') {
651 				if (got_exp) {
652 					/* Double exp, not a number */
653 					*pos = start;
654 					return EINVAL;
655 				}
656 				else {
657 					got_exp = true;
658 					need_double = true;
659 					p ++;
660 					if (p >= end) {
661 						*pos = start;
662 						return EINVAL;
663 					}
664 					if (!isdigit (*p) && *p != '+' && *p != '-') {
665 						/* Wrong exponent sign */
666 						*pos = start;
667 						return EINVAL;
668 					}
669 					else {
670 						p ++;
671 					}
672 				}
673 			}
674 			else {
675 				/* Got the end of the number, need to check */
676 				break;
677 			}
678 		}
679 		else {
680 			break;
681 		}
682 	}
683 
684 	if (!valid_start) {
685 		*pos = start;
686 		return EINVAL;
687 	}
688 
689 	errno = 0;
690 	if (need_double) {
691 		dv = strtod (c, &endptr);
692 	}
693 	else {
694 		if (is_hex) {
695 			lv = strtoimax (c, &endptr, 16);
696 		}
697 		else {
698 			lv = strtoimax (c, &endptr, 10);
699 		}
700 	}
701 	if (errno == ERANGE) {
702 		*pos = start;
703 		return ERANGE;
704 	}
705 
706 	/* Now check endptr */
707 	if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0') {
708 		p = endptr;
709 		goto set_obj;
710 	}
711 
712 	if (endptr < end && endptr != start) {
713 		p = endptr;
714 		switch (*p) {
715 		case 'm':
716 		case 'M':
717 		case 'g':
718 		case 'G':
719 		case 'k':
720 		case 'K':
721 			if (end - p >= 2) {
722 				if (p[1] == 's' || p[1] == 'S') {
723 					/* Milliseconds */
724 					if (!need_double) {
725 						need_double = true;
726 						dv = lv;
727 					}
728 					is_time = true;
729 					if (p[0] == 'm' || p[0] == 'M') {
730 						dv /= 1000.;
731 					}
732 					else {
733 						dv *= ucl_lex_num_multiplier (*p, false);
734 					}
735 					p += 2;
736 					goto set_obj;
737 				}
738 				else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
739 					/* Bytes */
740 					if (need_double) {
741 						need_double = false;
742 						lv = dv;
743 					}
744 					lv *= ucl_lex_num_multiplier (*p, true);
745 					p += 2;
746 					goto set_obj;
747 				}
748 				else if (ucl_lex_is_atom_end (p[1])) {
749 					if (need_double) {
750 						dv *= ucl_lex_num_multiplier (*p, false);
751 					}
752 					else {
753 						lv *= ucl_lex_num_multiplier (*p, number_bytes);
754 					}
755 					p ++;
756 					goto set_obj;
757 				}
758 				else if (allow_time && end - p >= 3) {
759 					if (tolower (p[0]) == 'm' &&
760 							tolower (p[1]) == 'i' &&
761 							tolower (p[2]) == 'n') {
762 						/* Minutes */
763 						if (!need_double) {
764 							need_double = true;
765 							dv = lv;
766 						}
767 						is_time = true;
768 						dv *= 60.;
769 						p += 3;
770 						goto set_obj;
771 					}
772 				}
773 			}
774 			else {
775 				if (need_double) {
776 					dv *= ucl_lex_num_multiplier (*p, false);
777 				}
778 				else {
779 					lv *= ucl_lex_num_multiplier (*p, number_bytes);
780 				}
781 				p ++;
782 				goto set_obj;
783 			}
784 			break;
785 		case 'S':
786 		case 's':
787 			if (allow_time &&
788 					(p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
789 				if (!need_double) {
790 					need_double = true;
791 					dv = lv;
792 				}
793 				p ++;
794 				is_time = true;
795 				goto set_obj;
796 			}
797 			break;
798 		case 'h':
799 		case 'H':
800 		case 'd':
801 		case 'D':
802 		case 'w':
803 		case 'W':
804 		case 'Y':
805 		case 'y':
806 			if (allow_time &&
807 					(p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
808 				if (!need_double) {
809 					need_double = true;
810 					dv = lv;
811 				}
812 				is_time = true;
813 				dv *= ucl_lex_time_multiplier (*p);
814 				p ++;
815 				goto set_obj;
816 			}
817 			break;
818 		case '\t':
819 		case ' ':
820 			while (p < end && ucl_test_character(*p, UCL_CHARACTER_WHITESPACE)) {
821 				p++;
822 			}
823 			if (ucl_lex_is_atom_end(*p))
824 				goto set_obj;
825 			break;
826 		}
827 	}
828 	else if (endptr == end) {
829 		/* Just a number at the end of chunk */
830 		p = endptr;
831 		goto set_obj;
832 	}
833 
834 	*pos = c;
835 	return EINVAL;
836 
837 	set_obj:
838 	if (allow_double && (need_double || is_time)) {
839 		if (!is_time) {
840 			obj->type = UCL_FLOAT;
841 		}
842 		else {
843 			obj->type = UCL_TIME;
844 		}
845 		obj->value.dv = is_neg ? (-dv) : dv;
846 	}
847 	else {
848 		obj->type = UCL_INT;
849 		obj->value.iv = is_neg ? (-lv) : lv;
850 	}
851 	*pos = p;
852 	return 0;
853 }
854 
855 /**
856  * Parse possible number
857  * @param parser
858  * @param chunk
859  * @return true if a number has been parsed
860  */
861 static bool
862 ucl_lex_number (struct ucl_parser *parser,
863 		struct ucl_chunk *chunk, ucl_object_t *obj)
864 {
865 	const unsigned char *pos;
866 	int ret;
867 
868 	ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos,
869 			true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
870 
871 	if (ret == 0) {
872 		chunk->remain -= pos - chunk->pos;
873 		chunk->column += pos - chunk->pos;
874 		chunk->pos = pos;
875 		return true;
876 	}
877 	else if (ret == ERANGE) {
878 		ucl_set_err (parser, ERANGE, "numeric value out of range", &parser->err);
879 	}
880 
881 	return false;
882 }
883 
884 /**
885  * Parse quoted string with possible escapes
886  * @param parser
887  * @param chunk
888  * @return true if a string has been parsed
889  */
890 static bool
891 ucl_lex_json_string (struct ucl_parser *parser,
892 		struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand)
893 {
894 	const unsigned char *p = chunk->pos;
895 	unsigned char c;
896 	int i;
897 
898 	while (p < chunk->end) {
899 		c = *p;
900 		if (c < 0x1F) {
901 			/* Unmasked control character */
902 			if (c == '\n') {
903 				ucl_set_err (parser, UCL_ESYNTAX, "unexpected newline",
904 						&parser->err);
905 			}
906 			else {
907 				ucl_set_err (parser, UCL_ESYNTAX, "unexpected control character",
908 						&parser->err);
909 			}
910 			return false;
911 		}
912 		else if (c == '\\') {
913 			ucl_chunk_skipc (chunk, p);
914 			c = *p;
915 			if (p >= chunk->end) {
916 				ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character",
917 						&parser->err);
918 				return false;
919 			}
920 			else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
921 				if (c == 'u') {
922 					ucl_chunk_skipc (chunk, p);
923 					for (i = 0; i < 4 && p < chunk->end; i ++) {
924 						if (!isxdigit (*p)) {
925 							ucl_set_err (parser, UCL_ESYNTAX, "invalid utf escape",
926 									&parser->err);
927 							return false;
928 						}
929 						ucl_chunk_skipc (chunk, p);
930 					}
931 					if (p >= chunk->end) {
932 						ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character",
933 								&parser->err);
934 						return false;
935 					}
936 				}
937 				else {
938 					ucl_chunk_skipc (chunk, p);
939 				}
940 			}
941 			*need_unescape = true;
942 			*ucl_escape = true;
943 			continue;
944 		}
945 		else if (c == '"') {
946 			ucl_chunk_skipc (chunk, p);
947 			return true;
948 		}
949 		else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
950 			*ucl_escape = true;
951 		}
952 		else if (c == '$') {
953 			*var_expand = true;
954 		}
955 		ucl_chunk_skipc (chunk, p);
956 	}
957 
958 	ucl_set_err (parser, UCL_ESYNTAX, "no quote at the end of json string",
959 			&parser->err);
960 	return false;
961 }
962 
963 static void
964 ucl_parser_append_elt (struct ucl_parser *parser, ucl_hash_t *cont,
965 		ucl_object_t *top,
966 		ucl_object_t *elt)
967 {
968 	ucl_object_t *nobj;
969 
970 	if ((parser->flags & UCL_PARSER_NO_IMPLICIT_ARRAYS) == 0) {
971 		/* Implicit array */
972 		top->flags |= UCL_OBJECT_MULTIVALUE;
973 		DL_APPEND (top, elt);
974 	}
975 	else {
976 		if ((top->flags & UCL_OBJECT_MULTIVALUE) != 0) {
977 			/* Just add to the explicit array */
978 			ucl_array_append (top, elt);
979 		}
980 		else {
981 			/* Convert to an array */
982 			ucl_hash_delete (cont, top);
983 			nobj = ucl_object_typed_new (UCL_ARRAY);
984 			nobj->key = top->key;
985 			nobj->keylen = top->keylen;
986 			nobj->flags |= UCL_OBJECT_MULTIVALUE;
987 			ucl_array_append (nobj, top);
988 			ucl_array_append (nobj, elt);
989 			ucl_hash_insert (cont, nobj, nobj->key, nobj->keylen);
990 		}
991 	}
992 }
993 
994 /**
995  * Parse a key in an object
996  * @param parser
997  * @param chunk
998  * @return true if a key has been parsed
999  */
1000 static bool
1001 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object)
1002 {
1003 	const unsigned char *p, *c = NULL, *end, *t;
1004 	const char *key = NULL;
1005 	bool got_quote = false, got_eq = false, got_semicolon = false,
1006 			need_unescape = false, ucl_escape = false, var_expand = false,
1007 			got_content = false, got_sep = false;
1008 	ucl_object_t *nobj, *tobj;
1009 	ucl_hash_t *container;
1010 	ssize_t keylen;
1011 
1012 	p = chunk->pos;
1013 
1014 	if (*p == '.') {
1015 		/* It is macro actually */
1016 		ucl_chunk_skipc (chunk, p);
1017 		parser->prev_state = parser->state;
1018 		parser->state = UCL_STATE_MACRO_NAME;
1019 		*end_of_object = false;
1020 		return true;
1021 	}
1022 	while (p < chunk->end) {
1023 		/*
1024 		 * A key must start with alpha, number, '/' or '_' and end with space character
1025 		 */
1026 		if (c == NULL) {
1027 			if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1028 				if (!ucl_skip_comments (parser)) {
1029 					return false;
1030 				}
1031 				p = chunk->pos;
1032 			}
1033 			else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1034 				ucl_chunk_skipc (chunk, p);
1035 			}
1036 			else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
1037 				/* The first symbol */
1038 				c = p;
1039 				ucl_chunk_skipc (chunk, p);
1040 				got_content = true;
1041 			}
1042 			else if (*p == '"') {
1043 				/* JSON style key */
1044 				c = p + 1;
1045 				got_quote = true;
1046 				got_content = true;
1047 				ucl_chunk_skipc (chunk, p);
1048 			}
1049 			else if (*p == '}') {
1050 				/* We have actually end of an object */
1051 				*end_of_object = true;
1052 				return true;
1053 			}
1054 			else if (*p == '.') {
1055 				ucl_chunk_skipc (chunk, p);
1056 				parser->prev_state = parser->state;
1057 				parser->state = UCL_STATE_MACRO_NAME;
1058 				return true;
1059 			}
1060 			else {
1061 				/* Invalid identifier */
1062 				ucl_set_err (parser, UCL_ESYNTAX, "key must begin with a letter",
1063 						&parser->err);
1064 				return false;
1065 			}
1066 		}
1067 		else {
1068 			/* Parse the body of a key */
1069 			if (!got_quote) {
1070 				if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
1071 					got_content = true;
1072 					ucl_chunk_skipc (chunk, p);
1073 				}
1074 				else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
1075 					end = p;
1076 					break;
1077 				}
1078 				else {
1079 					ucl_set_err (parser, UCL_ESYNTAX, "invalid character in a key",
1080 							&parser->err);
1081 					return false;
1082 				}
1083 			}
1084 			else {
1085 				/* We need to parse json like quoted string */
1086 				if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1087 					return false;
1088 				}
1089 				/* Always escape keys obtained via json */
1090 				end = chunk->pos - 1;
1091 				p = chunk->pos;
1092 				break;
1093 			}
1094 		}
1095 	}
1096 
1097 	if (p >= chunk->end && got_content) {
1098 		ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err);
1099 		return false;
1100 	}
1101 	else if (!got_content) {
1102 		return true;
1103 	}
1104 	*end_of_object = false;
1105 	/* We are now at the end of the key, need to parse the rest */
1106 	while (p < chunk->end) {
1107 		if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1108 			ucl_chunk_skipc (chunk, p);
1109 		}
1110 		else if (*p == '=') {
1111 			if (!got_eq && !got_semicolon) {
1112 				ucl_chunk_skipc (chunk, p);
1113 				got_eq = true;
1114 			}
1115 			else {
1116 				ucl_set_err (parser, UCL_ESYNTAX, "unexpected '=' character",
1117 						&parser->err);
1118 				return false;
1119 			}
1120 		}
1121 		else if (*p == ':') {
1122 			if (!got_eq && !got_semicolon) {
1123 				ucl_chunk_skipc (chunk, p);
1124 				got_semicolon = true;
1125 			}
1126 			else {
1127 				ucl_set_err (parser, UCL_ESYNTAX, "unexpected ':' character",
1128 						&parser->err);
1129 				return false;
1130 			}
1131 		}
1132 		else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1133 			/* Check for comment */
1134 			if (!ucl_skip_comments (parser)) {
1135 				return false;
1136 			}
1137 			p = chunk->pos;
1138 		}
1139 		else {
1140 			/* Start value */
1141 			break;
1142 		}
1143 	}
1144 
1145 	if (p >= chunk->end && got_content) {
1146 		ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err);
1147 		return false;
1148 	}
1149 
1150 	got_sep = got_semicolon || got_eq;
1151 
1152 	if (!got_sep) {
1153 		/*
1154 		 * Maybe we have more keys nested, so search for termination character.
1155 		 * Possible choices:
1156 		 * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1157 		 * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1158 		 * 3) key1 value[;,\n] <- we treat that as linear object
1159 		 */
1160 		t = p;
1161 		*next_key = false;
1162 		while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1163 			t ++;
1164 		}
1165 		/* Check first non-space character after a key */
1166 		if (*t != '{' && *t != '[') {
1167 			while (t < chunk->end) {
1168 				if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1169 					break;
1170 				}
1171 				else if (*t == '{' || *t == '[') {
1172 					*next_key = true;
1173 					break;
1174 				}
1175 				t ++;
1176 			}
1177 		}
1178 	}
1179 
1180 	/* Create a new object */
1181 	nobj = ucl_object_new_full (UCL_NULL, parser->chunks->priority);
1182 	keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1183 			&key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false);
1184 	if (keylen == -1) {
1185 		ucl_object_unref (nobj);
1186 		return false;
1187 	}
1188 	else if (keylen == 0) {
1189 		ucl_set_err (parser, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1190 		ucl_object_unref (nobj);
1191 		return false;
1192 	}
1193 
1194 	container = parser->stack->obj->value.ov;
1195 	nobj->key = key;
1196 	nobj->keylen = keylen;
1197 	tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj));
1198 	if (tobj == NULL) {
1199 		container = ucl_hash_insert_object (container, nobj,
1200 				parser->flags & UCL_PARSER_KEY_LOWERCASE);
1201 		nobj->prev = nobj;
1202 		nobj->next = NULL;
1203 		parser->stack->obj->len ++;
1204 	}
1205 	else {
1206 		/*
1207 		 * The logic here is the following:
1208 		 *
1209 		 * - if we have two objects with the same priority, then we form an
1210 		 * implicit or explicit array
1211 		 * - if a new object has bigger priority, then we overwrite an old one
1212 		 * - if a new object has lower priority, then we ignore it
1213 		 */
1214 		unsigned priold = ucl_object_get_priority (tobj),
1215 				prinew = ucl_object_get_priority (nobj);
1216 		if (priold == prinew) {
1217 			ucl_parser_append_elt (parser, container, tobj, nobj);
1218 		}
1219 		else if (priold > prinew) {
1220 			ucl_object_unref (nobj);
1221 			return true;
1222 		}
1223 		else {
1224 			ucl_hash_replace (container, tobj, nobj);
1225 			ucl_object_unref (tobj);
1226 		}
1227 	}
1228 
1229 	if (ucl_escape) {
1230 		nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1231 	}
1232 	parser->stack->obj->value.ov = container;
1233 
1234 	parser->cur_obj = nobj;
1235 
1236 	return true;
1237 }
1238 
1239 /**
1240  * Parse a cl string
1241  * @param parser
1242  * @param chunk
1243  * @return true if a key has been parsed
1244  */
1245 static bool
1246 ucl_parse_string_value (struct ucl_parser *parser,
1247 		struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1248 {
1249 	const unsigned char *p;
1250 	enum {
1251 		UCL_BRACE_ROUND = 0,
1252 		UCL_BRACE_SQUARE,
1253 		UCL_BRACE_FIGURE
1254 	};
1255 	int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1256 
1257 	p = chunk->pos;
1258 
1259 	while (p < chunk->end) {
1260 
1261 		/* Skip pairs of figure braces */
1262 		if (*p == '{') {
1263 			braces[UCL_BRACE_FIGURE][0] ++;
1264 		}
1265 		else if (*p == '}') {
1266 			braces[UCL_BRACE_FIGURE][1] ++;
1267 			if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1268 				/* This is not a termination symbol, continue */
1269 				ucl_chunk_skipc (chunk, p);
1270 				continue;
1271 			}
1272 		}
1273 		/* Skip pairs of square braces */
1274 		else if (*p == '[') {
1275 			braces[UCL_BRACE_SQUARE][0] ++;
1276 		}
1277 		else if (*p == ']') {
1278 			braces[UCL_BRACE_SQUARE][1] ++;
1279 			if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1280 				/* This is not a termination symbol, continue */
1281 				ucl_chunk_skipc (chunk, p);
1282 				continue;
1283 			}
1284 		}
1285 		else if (*p == '$') {
1286 			*var_expand = true;
1287 		}
1288 		else if (*p == '\\') {
1289 			*need_unescape = true;
1290 			ucl_chunk_skipc (chunk, p);
1291 			if (p < chunk->end) {
1292 				ucl_chunk_skipc (chunk, p);
1293 			}
1294 			continue;
1295 		}
1296 
1297 		if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1298 			break;
1299 		}
1300 		ucl_chunk_skipc (chunk, p);
1301 	}
1302 
1303 	return true;
1304 }
1305 
1306 /**
1307  * Parse multiline string ending with \n{term}\n
1308  * @param parser
1309  * @param chunk
1310  * @param term
1311  * @param term_len
1312  * @return size of multiline string or 0 in case of error
1313  */
1314 static int
1315 ucl_parse_multiline_string (struct ucl_parser *parser,
1316 		struct ucl_chunk *chunk, const unsigned char *term,
1317 		int term_len, unsigned char const **beg,
1318 		bool *var_expand)
1319 {
1320 	const unsigned char *p, *c, *tend;
1321 	bool newline = false;
1322 	int len = 0;
1323 
1324 	p = chunk->pos;
1325 
1326 	c = p;
1327 
1328 	while (p < chunk->end) {
1329 		if (newline) {
1330 			if (chunk->end - p < term_len) {
1331 				return 0;
1332 			}
1333 			else if (memcmp (p, term, term_len) == 0) {
1334 				tend = p + term_len;
1335 				if (*tend != '\n' && *tend != ';' && *tend != ',') {
1336 					/* Incomplete terminator */
1337 					ucl_chunk_skipc (chunk, p);
1338 					continue;
1339 				}
1340 				len = p - c;
1341 				chunk->remain -= term_len;
1342 				chunk->pos = p + term_len;
1343 				chunk->column = term_len;
1344 				*beg = c;
1345 				break;
1346 			}
1347 		}
1348 		if (*p == '\n') {
1349 			newline = true;
1350 		}
1351 		else {
1352 			if (*p == '$') {
1353 				*var_expand = true;
1354 			}
1355 			newline = false;
1356 		}
1357 		ucl_chunk_skipc (chunk, p);
1358 	}
1359 
1360 	return len;
1361 }
1362 
1363 static ucl_object_t*
1364 ucl_get_value_object (struct ucl_parser *parser)
1365 {
1366 	ucl_object_t *t, *obj = NULL;
1367 
1368 	if (parser == NULL || parser->stack == NULL || parser->stack->obj == NULL) {
1369 		return NULL;
1370 	}
1371 
1372 	if (parser->stack->obj->type == UCL_ARRAY) {
1373 		/* Object must be allocated */
1374 		obj = ucl_object_new_full (UCL_NULL, parser->chunks->priority);
1375 		t = parser->stack->obj;
1376 		ucl_array_append (t, obj);
1377 		parser->cur_obj = obj;
1378 	}
1379 	else {
1380 		/* Object has been already allocated */
1381 		obj = parser->cur_obj;
1382 	}
1383 
1384 	return obj;
1385 }
1386 
1387 /**
1388  * Handle value data
1389  * @param parser
1390  * @param chunk
1391  * @return
1392  */
1393 static bool
1394 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1395 {
1396 	const unsigned char *p, *c;
1397 	ucl_object_t *obj = NULL;
1398 	unsigned int stripped_spaces;
1399 	int str_len;
1400 	bool need_unescape = false, ucl_escape = false, var_expand = false;
1401 
1402 	p = chunk->pos;
1403 
1404 	/* Skip any spaces and comments */
1405 	if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1406 			(chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1407 		while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1408 			ucl_chunk_skipc (chunk, p);
1409 		}
1410 		if (!ucl_skip_comments (parser)) {
1411 			return false;
1412 		}
1413 		p = chunk->pos;
1414 	}
1415 
1416 	while (p < chunk->end) {
1417 		c = p;
1418 		switch (*p) {
1419 		case '"':
1420 			obj = ucl_get_value_object (parser);
1421 			ucl_chunk_skipc (chunk, p);
1422 			if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1423 				return false;
1424 			}
1425 			str_len = chunk->pos - c - 2;
1426 			obj->type = UCL_STRING;
1427 			if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE],
1428 					&obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) {
1429 				return false;
1430 			}
1431 			obj->len = str_len;
1432 			parser->state = UCL_STATE_AFTER_VALUE;
1433 			p = chunk->pos;
1434 			return true;
1435 			break;
1436 		case '{':
1437 			obj = ucl_get_value_object (parser);
1438 			/* We have a new object */
1439 			obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level);
1440 			if (obj == NULL) {
1441 				return false;
1442 			}
1443 
1444 			ucl_chunk_skipc (chunk, p);
1445 			return true;
1446 			break;
1447 		case '[':
1448 			obj = ucl_get_value_object (parser);
1449 			/* We have a new array */
1450 			obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level);
1451 			if (obj == NULL) {
1452 				return false;
1453 			}
1454 
1455 			ucl_chunk_skipc (chunk, p);
1456 			return true;
1457 			break;
1458 		case ']':
1459 			/* We have the array ending */
1460 			if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1461 				parser->state = UCL_STATE_AFTER_VALUE;
1462 				return true;
1463 			}
1464 			else {
1465 				goto parse_string;
1466 			}
1467 			break;
1468 		case '<':
1469 			obj = ucl_get_value_object (parser);
1470 			/* We have something like multiline value, which must be <<[A-Z]+\n */
1471 			if (chunk->end - p > 3) {
1472 				if (memcmp (p, "<<", 2) == 0) {
1473 					p += 2;
1474 					/* We allow only uppercase characters in multiline definitions */
1475 					while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1476 						p ++;
1477 					}
1478 					if (*p =='\n') {
1479 						/* Set chunk positions and start multiline parsing */
1480 						c += 2;
1481 						chunk->remain -= p - c;
1482 						chunk->pos = p + 1;
1483 						chunk->column = 0;
1484 						chunk->line ++;
1485 						if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1486 								p - c, &c, &var_expand)) == 0) {
1487 							ucl_set_err (parser, UCL_ESYNTAX,
1488 									"unterminated multiline value", &parser->err);
1489 							return false;
1490 						}
1491 						obj->type = UCL_STRING;
1492 						if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1493 							&obj->value.sv, str_len - 1, false, false, var_expand)) == -1) {
1494 							return false;
1495 						}
1496 						obj->len = str_len;
1497 						parser->state = UCL_STATE_AFTER_VALUE;
1498 						return true;
1499 					}
1500 				}
1501 			}
1502 			/* Fallback to ordinary strings */
1503 		default:
1504 parse_string:
1505 			if (obj == NULL) {
1506 				obj = ucl_get_value_object (parser);
1507 			}
1508 			/* Parse atom */
1509 			if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1510 				if (!ucl_lex_number (parser, chunk, obj)) {
1511 					if (parser->state == UCL_STATE_ERROR) {
1512 						return false;
1513 					}
1514 				}
1515 				else {
1516 					parser->state = UCL_STATE_AFTER_VALUE;
1517 					return true;
1518 				}
1519 				/* Fallback to normal string */
1520 			}
1521 
1522 			if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) {
1523 				return false;
1524 			}
1525 			/* Cut trailing spaces */
1526 			stripped_spaces = 0;
1527 			while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1528 					UCL_CHARACTER_WHITESPACE)) {
1529 				stripped_spaces ++;
1530 			}
1531 			str_len = chunk->pos - c - stripped_spaces;
1532 			if (str_len <= 0) {
1533 				ucl_set_err (parser, 0, "string value must not be empty",
1534 						&parser->err);
1535 				return false;
1536 			}
1537 			else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1538 				obj->len = 0;
1539 				obj->type = UCL_NULL;
1540 			}
1541 			else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1542 				obj->type = UCL_STRING;
1543 				if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE],
1544 						&obj->value.sv, str_len, need_unescape,
1545 						false, var_expand)) == -1) {
1546 					return false;
1547 				}
1548 				obj->len = str_len;
1549 			}
1550 			parser->state = UCL_STATE_AFTER_VALUE;
1551 			p = chunk->pos;
1552 
1553 			return true;
1554 			break;
1555 		}
1556 	}
1557 
1558 	return true;
1559 }
1560 
1561 /**
1562  * Handle after value data
1563  * @param parser
1564  * @param chunk
1565  * @return
1566  */
1567 static bool
1568 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1569 {
1570 	const unsigned char *p;
1571 	bool got_sep = false;
1572 	struct ucl_stack *st;
1573 
1574 	p = chunk->pos;
1575 
1576 	while (p < chunk->end) {
1577 		if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1578 			/* Skip whitespaces */
1579 			ucl_chunk_skipc (chunk, p);
1580 		}
1581 		else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1582 			/* Skip comment */
1583 			if (!ucl_skip_comments (parser)) {
1584 				return false;
1585 			}
1586 			/* Treat comment as a separator */
1587 			got_sep = true;
1588 			p = chunk->pos;
1589 		}
1590 		else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
1591 			if (*p == '}' || *p == ']') {
1592 				if (parser->stack == NULL) {
1593 					ucl_set_err (parser, UCL_ESYNTAX,
1594 							"end of array or object detected without corresponding start",
1595 							&parser->err);
1596 					return false;
1597 				}
1598 				if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
1599 						(*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
1600 
1601 					/* Pop all nested objects from a stack */
1602 					st = parser->stack;
1603 					parser->stack = st->next;
1604 					UCL_FREE (sizeof (struct ucl_stack), st);
1605 
1606 					while (parser->stack != NULL) {
1607 						st = parser->stack;
1608 						if (st->next == NULL || st->next->level == st->level) {
1609 							break;
1610 						}
1611 						parser->stack = st->next;
1612 						UCL_FREE (sizeof (struct ucl_stack), st);
1613 					}
1614 				}
1615 				else {
1616 					ucl_set_err (parser, UCL_ESYNTAX,
1617 							"unexpected terminating symbol detected",
1618 							&parser->err);
1619 					return false;
1620 				}
1621 
1622 				if (parser->stack == NULL) {
1623 					/* Ignore everything after a top object */
1624 					return true;
1625 				}
1626 				else {
1627 					ucl_chunk_skipc (chunk, p);
1628 				}
1629 				got_sep = true;
1630 			}
1631 			else {
1632 				/* Got a separator */
1633 				got_sep = true;
1634 				ucl_chunk_skipc (chunk, p);
1635 			}
1636 		}
1637 		else {
1638 			/* Anything else */
1639 			if (!got_sep) {
1640 				ucl_set_err (parser, UCL_ESYNTAX, "delimiter is missing",
1641 						&parser->err);
1642 				return false;
1643 			}
1644 			return true;
1645 		}
1646 	}
1647 
1648 	return true;
1649 }
1650 
1651 /**
1652  * Handle macro data
1653  * @param parser
1654  * @param chunk
1655  * @return
1656  */
1657 static bool
1658 ucl_parse_macro_value (struct ucl_parser *parser,
1659 		struct ucl_chunk *chunk, struct ucl_macro *macro,
1660 		unsigned char const **macro_start, size_t *macro_len)
1661 {
1662 	const unsigned char *p, *c;
1663 	bool need_unescape = false, ucl_escape = false, var_expand = false;
1664 
1665 	p = chunk->pos;
1666 
1667 	switch (*p) {
1668 	case '"':
1669 		/* We have macro value encoded in quotes */
1670 		c = p;
1671 		ucl_chunk_skipc (chunk, p);
1672 		if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1673 			return false;
1674 		}
1675 
1676 		*macro_start = c + 1;
1677 		*macro_len = chunk->pos - c - 2;
1678 		p = chunk->pos;
1679 		break;
1680 	case '{':
1681 		/* We got a multiline macro body */
1682 		ucl_chunk_skipc (chunk, p);
1683 		/* Skip spaces at the beginning */
1684 		while (p < chunk->end) {
1685 			if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1686 				ucl_chunk_skipc (chunk, p);
1687 			}
1688 			else {
1689 				break;
1690 			}
1691 		}
1692 		c = p;
1693 		while (p < chunk->end) {
1694 			if (*p == '}') {
1695 				break;
1696 			}
1697 			ucl_chunk_skipc (chunk, p);
1698 		}
1699 		*macro_start = c;
1700 		*macro_len = p - c;
1701 		ucl_chunk_skipc (chunk, p);
1702 		break;
1703 	default:
1704 		/* Macro is not enclosed in quotes or braces */
1705 		c = p;
1706 		while (p < chunk->end) {
1707 			if (ucl_lex_is_atom_end (*p)) {
1708 				break;
1709 			}
1710 			ucl_chunk_skipc (chunk, p);
1711 		}
1712 		*macro_start = c;
1713 		*macro_len = p - c;
1714 		break;
1715 	}
1716 
1717 	/* We are at the end of a macro */
1718 	/* Skip ';' and space characters and return to previous state */
1719 	while (p < chunk->end) {
1720 		if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
1721 			break;
1722 		}
1723 		ucl_chunk_skipc (chunk, p);
1724 	}
1725 	return true;
1726 }
1727 
1728 /**
1729  * Parse macro arguments as UCL object
1730  * @param parser parser structure
1731  * @param chunk the current data chunk
1732  * @return
1733  */
1734 static ucl_object_t *
1735 ucl_parse_macro_arguments (struct ucl_parser *parser,
1736 		struct ucl_chunk *chunk)
1737 {
1738 	ucl_object_t *res = NULL;
1739 	struct ucl_parser *params_parser;
1740 	int obraces = 1, ebraces = 0, state = 0;
1741 	const unsigned char *p, *c;
1742 	size_t args_len = 0;
1743 	struct ucl_parser_saved_state saved;
1744 
1745 	saved.column = chunk->column;
1746 	saved.line = chunk->line;
1747 	saved.pos = chunk->pos;
1748 	saved.remain = chunk->remain;
1749 	p = chunk->pos;
1750 
1751 	if (*p != '(' || chunk->remain < 2) {
1752 		return NULL;
1753 	}
1754 
1755 	/* Set begin and start */
1756 	ucl_chunk_skipc (chunk, p);
1757 	c = p;
1758 
1759 	while ((p) < (chunk)->end) {
1760 		switch (state) {
1761 		case 0:
1762 			/* Parse symbols and check for '(', ')' and '"' */
1763 			if (*p == '(') {
1764 				obraces ++;
1765 			}
1766 			else if (*p == ')') {
1767 				ebraces ++;
1768 			}
1769 			else if (*p == '"') {
1770 				state = 1;
1771 			}
1772 			/* Check pairing */
1773 			if (obraces == ebraces) {
1774 				state = 99;
1775 			}
1776 			else {
1777 				args_len ++;
1778 			}
1779 			/* Check overflow */
1780 			if (chunk->remain == 0) {
1781 				goto restore_chunk;
1782 			}
1783 			ucl_chunk_skipc (chunk, p);
1784 			break;
1785 		case 1:
1786 			/* We have quote character, so skip all but quotes */
1787 			if (*p == '"' && *(p - 1) != '\\') {
1788 				state = 0;
1789 			}
1790 			if (chunk->remain == 0) {
1791 				goto restore_chunk;
1792 			}
1793 			ucl_chunk_skipc (chunk, p);
1794 			break;
1795 		case 99:
1796 			/*
1797 			 * We have read the full body of arguments, so we need to parse and set
1798 			 * object from that
1799 			 */
1800 			params_parser = ucl_parser_new (parser->flags);
1801 			if (!ucl_parser_add_chunk (params_parser, c, args_len)) {
1802 				ucl_set_err (parser, UCL_ESYNTAX, "macro arguments parsing error",
1803 						&parser->err);
1804 			}
1805 			else {
1806 				res = ucl_parser_get_object (params_parser);
1807 			}
1808 			ucl_parser_free (params_parser);
1809 
1810 			return res;
1811 
1812 			break;
1813 		}
1814 	}
1815 
1816 	return res;
1817 
1818 restore_chunk:
1819 	chunk->column = saved.column;
1820 	chunk->line = saved.line;
1821 	chunk->pos = saved.pos;
1822 	chunk->remain = saved.remain;
1823 
1824 	return NULL;
1825 }
1826 
1827 #define SKIP_SPACES_COMMENTS(parser, chunk, p) do {								\
1828 	while ((p) < (chunk)->end) {												\
1829 		if (!ucl_test_character (*(p), UCL_CHARACTER_WHITESPACE_UNSAFE)) {		\
1830 			if ((chunk)->remain >= 2 && ucl_lex_is_comment ((p)[0], (p)[1])) {	\
1831 				if (!ucl_skip_comments (parser)) {								\
1832 					return false;												\
1833 				}																\
1834 				p = (chunk)->pos;												\
1835 			}																	\
1836 			break;																\
1837 		}																		\
1838 		ucl_chunk_skipc (chunk, p);												\
1839 	}																			\
1840 } while(0)
1841 
1842 /**
1843  * Handle the main states of rcl parser
1844  * @param parser parser structure
1845  * @param data the pointer to the beginning of a chunk
1846  * @param len the length of a chunk
1847  * @return true if chunk has been parsed and false in case of error
1848  */
1849 static bool
1850 ucl_state_machine (struct ucl_parser *parser)
1851 {
1852 	ucl_object_t *obj, *macro_args;
1853 	struct ucl_chunk *chunk = parser->chunks;
1854 	const unsigned char *p, *c = NULL, *macro_start = NULL;
1855 	unsigned char *macro_escaped;
1856 	size_t macro_len = 0;
1857 	struct ucl_macro *macro = NULL;
1858 	bool next_key = false, end_of_object = false, ret;
1859 
1860 	if (parser->top_obj == NULL) {
1861 		if (*chunk->pos == '[') {
1862 			obj = ucl_add_parser_stack (NULL, parser, true, 0);
1863 		}
1864 		else {
1865 			obj = ucl_add_parser_stack (NULL, parser, false, 0);
1866 		}
1867 		if (obj == NULL) {
1868 			return false;
1869 		}
1870 		parser->top_obj = obj;
1871 		parser->cur_obj = obj;
1872 		parser->state = UCL_STATE_INIT;
1873 	}
1874 
1875 	p = chunk->pos;
1876 	while (chunk->pos < chunk->end) {
1877 		switch (parser->state) {
1878 		case UCL_STATE_INIT:
1879 			/*
1880 			 * At the init state we can either go to the parse array or object
1881 			 * if we got [ or { correspondingly or can just treat new data as
1882 			 * a key of newly created object
1883 			 */
1884 			if (!ucl_skip_comments (parser)) {
1885 				parser->prev_state = parser->state;
1886 				parser->state = UCL_STATE_ERROR;
1887 				return false;
1888 			}
1889 			else {
1890 				/* Skip any spaces */
1891 				while (p < chunk->end && ucl_test_character (*p,
1892 						UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1893 					ucl_chunk_skipc (chunk, p);
1894 				}
1895 				p = chunk->pos;
1896 				if (*p == '[') {
1897 					parser->state = UCL_STATE_VALUE;
1898 					ucl_chunk_skipc (chunk, p);
1899 				}
1900 				else {
1901 					parser->state = UCL_STATE_KEY;
1902 					if (*p == '{') {
1903 						ucl_chunk_skipc (chunk, p);
1904 					}
1905 				}
1906 			}
1907 			break;
1908 		case UCL_STATE_KEY:
1909 			/* Skip any spaces */
1910 			while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1911 				ucl_chunk_skipc (chunk, p);
1912 			}
1913 			if (*p == '}') {
1914 				/* We have the end of an object */
1915 				parser->state = UCL_STATE_AFTER_VALUE;
1916 				continue;
1917 			}
1918 			if (parser->stack == NULL) {
1919 				/* No objects are on stack, but we want to parse a key */
1920 				ucl_set_err (parser, UCL_ESYNTAX, "top object is finished but the parser "
1921 						"expects a key", &parser->err);
1922 				parser->prev_state = parser->state;
1923 				parser->state = UCL_STATE_ERROR;
1924 				return false;
1925 			}
1926 			if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) {
1927 				parser->prev_state = parser->state;
1928 				parser->state = UCL_STATE_ERROR;
1929 				return false;
1930 			}
1931 			if (end_of_object) {
1932 				p = chunk->pos;
1933 				parser->state = UCL_STATE_AFTER_VALUE;
1934 				continue;
1935 			}
1936 			else if (parser->state != UCL_STATE_MACRO_NAME) {
1937 				if (next_key && parser->stack->obj->type == UCL_OBJECT) {
1938 					/* Parse more keys and nest objects accordingly */
1939 					obj = ucl_add_parser_stack (parser->cur_obj, parser, false,
1940 							parser->stack->level + 1);
1941 					if (obj == NULL) {
1942 						return false;
1943 					}
1944 				}
1945 				else {
1946 					parser->state = UCL_STATE_VALUE;
1947 				}
1948 			}
1949 			else {
1950 				c = chunk->pos;
1951 			}
1952 			p = chunk->pos;
1953 			break;
1954 		case UCL_STATE_VALUE:
1955 			/* We need to check what we do have */
1956 			if (!ucl_parse_value (parser, chunk)) {
1957 				parser->prev_state = parser->state;
1958 				parser->state = UCL_STATE_ERROR;
1959 				return false;
1960 			}
1961 			/* State is set in ucl_parse_value call */
1962 			p = chunk->pos;
1963 			break;
1964 		case UCL_STATE_AFTER_VALUE:
1965 			if (!ucl_parse_after_value (parser, chunk)) {
1966 				parser->prev_state = parser->state;
1967 				parser->state = UCL_STATE_ERROR;
1968 				return false;
1969 			}
1970 			if (parser->stack != NULL) {
1971 				if (parser->stack->obj->type == UCL_OBJECT) {
1972 					parser->state = UCL_STATE_KEY;
1973 				}
1974 				else {
1975 					/* Array */
1976 					parser->state = UCL_STATE_VALUE;
1977 				}
1978 			}
1979 			else {
1980 				/* Skip everything at the end */
1981 				return true;
1982 			}
1983 			p = chunk->pos;
1984 			break;
1985 		case UCL_STATE_MACRO_NAME:
1986 			if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) &&
1987 					*p != '(') {
1988 				ucl_chunk_skipc (chunk, p);
1989 			}
1990 			else if (p - c > 0) {
1991 				/* We got macro name */
1992 				macro_len = (size_t)(p - c);
1993 				HASH_FIND (hh, parser->macroes, c, macro_len, macro);
1994 				if (macro == NULL) {
1995 					ucl_create_err (&parser->err, "error on line %d at column %d: "
1996 							"unknown macro: '%.*s', character: '%c'",
1997 								chunk->line, chunk->column, (int)(p - c), c, *chunk->pos);
1998 					parser->state = UCL_STATE_ERROR;
1999 					return false;
2000 				}
2001 				/* Now we need to skip all spaces */
2002 				SKIP_SPACES_COMMENTS(parser, chunk, p);
2003 				parser->state = UCL_STATE_MACRO;
2004 			}
2005 			break;
2006 		case UCL_STATE_MACRO:
2007 			if (*chunk->pos == '(') {
2008 				macro_args = ucl_parse_macro_arguments (parser, chunk);
2009 				p = chunk->pos;
2010 				if (macro_args) {
2011 					SKIP_SPACES_COMMENTS(parser, chunk, p);
2012 				}
2013 			}
2014 			else {
2015 				macro_args = NULL;
2016 			}
2017 			if (!ucl_parse_macro_value (parser, chunk, macro,
2018 					&macro_start, &macro_len)) {
2019 				parser->prev_state = parser->state;
2020 				parser->state = UCL_STATE_ERROR;
2021 				return false;
2022 			}
2023 			macro_len = ucl_expand_variable (parser, &macro_escaped,
2024 					macro_start, macro_len);
2025 			parser->state = parser->prev_state;
2026 			if (macro_escaped == NULL) {
2027 				ret = macro->handler (macro_start, macro_len, macro_args,
2028 						macro->ud);
2029 			}
2030 			else {
2031 				ret = macro->handler (macro_escaped, macro_len, macro_args,
2032 						macro->ud);
2033 				UCL_FREE (macro_len + 1, macro_escaped);
2034 			}
2035 			p = chunk->pos;
2036 			if (macro_args) {
2037 				ucl_object_unref (macro_args);
2038 			}
2039 			if (!ret) {
2040 				return false;
2041 			}
2042 			break;
2043 		default:
2044 			/* TODO: add all states */
2045 			ucl_set_err (parser, UCL_EINTERNAL,
2046 					"internal error: parser is in an unknown state", &parser->err);
2047 			parser->state = UCL_STATE_ERROR;
2048 			return false;
2049 		}
2050 	}
2051 
2052 	return true;
2053 }
2054 
2055 struct ucl_parser*
2056 ucl_parser_new (int flags)
2057 {
2058 	struct ucl_parser *new;
2059 
2060 	new = UCL_ALLOC (sizeof (struct ucl_parser));
2061 	if (new == NULL) {
2062 		return NULL;
2063 	}
2064 	memset (new, 0, sizeof (struct ucl_parser));
2065 
2066 	ucl_parser_register_macro (new, "include", ucl_include_handler, new);
2067 	ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new);
2068 	ucl_parser_register_macro (new, "includes", ucl_includes_handler, new);
2069 
2070 	new->flags = flags;
2071 
2072 	/* Initial assumption about filevars */
2073 	ucl_parser_set_filevars (new, NULL, false);
2074 
2075 	return new;
2076 }
2077 
2078 
2079 void
2080 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
2081 		ucl_macro_handler handler, void* ud)
2082 {
2083 	struct ucl_macro *new;
2084 
2085 	if (macro == NULL || handler == NULL) {
2086 		return;
2087 	}
2088 	new = UCL_ALLOC (sizeof (struct ucl_macro));
2089 	if (new == NULL) {
2090 		return;
2091 	}
2092 	memset (new, 0, sizeof (struct ucl_macro));
2093 	new->handler = handler;
2094 	new->name = strdup (macro);
2095 	new->ud = ud;
2096 	HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
2097 }
2098 
2099 void
2100 ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
2101 		const char *value)
2102 {
2103 	struct ucl_variable *new = NULL, *cur;
2104 
2105 	if (var == NULL) {
2106 		return;
2107 	}
2108 
2109 	/* Find whether a variable already exists */
2110 	LL_FOREACH (parser->variables, cur) {
2111 		if (strcmp (cur->var, var) == 0) {
2112 			new = cur;
2113 			break;
2114 		}
2115 	}
2116 
2117 	if (value == NULL) {
2118 
2119 		if (new != NULL) {
2120 			/* Remove variable */
2121 			DL_DELETE (parser->variables, new);
2122 			free (new->var);
2123 			free (new->value);
2124 			UCL_FREE (sizeof (struct ucl_variable), new);
2125 		}
2126 		else {
2127 			/* Do nothing */
2128 			return;
2129 		}
2130 	}
2131 	else {
2132 		if (new == NULL) {
2133 			new = UCL_ALLOC (sizeof (struct ucl_variable));
2134 			if (new == NULL) {
2135 				return;
2136 			}
2137 			memset (new, 0, sizeof (struct ucl_variable));
2138 			new->var = strdup (var);
2139 			new->var_len = strlen (var);
2140 			new->value = strdup (value);
2141 			new->value_len = strlen (value);
2142 
2143 			DL_APPEND (parser->variables, new);
2144 		}
2145 		else {
2146 			free (new->value);
2147 			new->value = strdup (value);
2148 			new->value_len = strlen (value);
2149 		}
2150 	}
2151 }
2152 
2153 void
2154 ucl_parser_set_variables_handler (struct ucl_parser *parser,
2155 		ucl_variable_handler handler, void *ud)
2156 {
2157 	parser->var_handler = handler;
2158 	parser->var_data = ud;
2159 }
2160 
2161 bool
2162 ucl_parser_add_chunk_priority (struct ucl_parser *parser, const unsigned char *data,
2163 		size_t len, unsigned priority)
2164 {
2165 	struct ucl_chunk *chunk;
2166 
2167 	if (data == NULL) {
2168 		ucl_create_err (&parser->err, "invalid chunk added");
2169 		return false;
2170 	}
2171 	if (len == 0) {
2172 		parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority);
2173 		return true;
2174 	}
2175 	if (parser->state != UCL_STATE_ERROR) {
2176 		chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
2177 		if (chunk == NULL) {
2178 			ucl_create_err (&parser->err, "cannot allocate chunk structure");
2179 			return false;
2180 		}
2181 		chunk->begin = data;
2182 		chunk->remain = len;
2183 		chunk->pos = chunk->begin;
2184 		chunk->end = chunk->begin + len;
2185 		chunk->line = 1;
2186 		chunk->column = 0;
2187 		chunk->priority = priority;
2188 		LL_PREPEND (parser->chunks, chunk);
2189 		parser->recursion ++;
2190 		if (parser->recursion > UCL_MAX_RECURSION) {
2191 			ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
2192 					parser->recursion);
2193 			return false;
2194 		}
2195 		return ucl_state_machine (parser);
2196 	}
2197 
2198 	ucl_create_err (&parser->err, "a parser is in an invalid state");
2199 
2200 	return false;
2201 }
2202 
2203 bool
2204 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
2205 		size_t len)
2206 {
2207 	return ucl_parser_add_chunk_priority (parser, data, len, 0);
2208 }
2209 
2210 bool
2211 ucl_parser_add_string (struct ucl_parser *parser, const char *data,
2212 		size_t len)
2213 {
2214 	if (data == NULL) {
2215 		ucl_create_err (&parser->err, "invalid string added");
2216 		return false;
2217 	}
2218 	if (len == 0) {
2219 		len = strlen (data);
2220 	}
2221 
2222 	return ucl_parser_add_chunk (parser, (const unsigned char *)data, len);
2223 }
2224