xref: /freebsd/contrib/libucl/src/ucl_parser.c (revision abda442d92fdbadcf81c79bc9ddba001d133c429)
1 /* Copyright (c) 2013, Vsevolod Stakhov
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *       * Redistributions of source code must retain the above copyright
7  *         notice, this list of conditions and the following disclaimer.
8  *       * Redistributions in binary form must reproduce the above copyright
9  *         notice, this list of conditions and the following disclaimer in the
10  *         documentation and/or other materials provided with the distribution.
11  *
12  * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15  * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22  */
23 
24 #include <math.h>
25 #include "ucl.h"
26 #include "ucl_internal.h"
27 #include "ucl_chartable.h"
28 
29 /**
30  * @file ucl_parser.c
31  * The implementation of ucl parser
32  */
33 
34 struct ucl_parser_saved_state {
35 	unsigned int line;
36 	unsigned int column;
37 	size_t remain;
38 	const unsigned char *pos;
39 };
40 
41 /**
42  * Move up to len characters
43  * @param parser
44  * @param begin
45  * @param len
46  * @return new position in chunk
47  */
48 #define ucl_chunk_skipc(chunk, p) \
49 	do {                          \
50 		if (p == chunk->end) {    \
51 			break;                \
52 		}                         \
53 		if (*(p) == '\n') {       \
54 			(chunk)->line++;      \
55 			(chunk)->column = 0;  \
56 		}                         \
57 		else                      \
58 			(chunk)->column++;    \
59 		(p++);                    \
60 		(chunk)->pos++;           \
61 		(chunk)->remain--;        \
62 	} while (0)
63 
64 static inline void
ucl_set_err(struct ucl_parser * parser,int code,const char * str,UT_string ** err)65 ucl_set_err(struct ucl_parser *parser, int code, const char *str, UT_string **err)
66 {
67 	const char *fmt_string, *filename;
68 	struct ucl_chunk *chunk = parser->chunks;
69 
70 	if (parser->cur_file) {
71 		filename = parser->cur_file;
72 	}
73 	else {
74 		filename = "<unknown>";
75 	}
76 
77 	if (chunk->pos < chunk->end) {
78 		if (isgraph(*chunk->pos)) {
79 			fmt_string = "error while parsing %s: "
80 						 "line: %d, column: %d - '%s', character: '%c'";
81 		}
82 		else {
83 			fmt_string = "error while parsing %s: "
84 						 "line: %d, column: %d - '%s', character: '0x%02x'";
85 		}
86 		ucl_create_err(err, fmt_string,
87 					   filename, chunk->line, chunk->column,
88 					   str, *chunk->pos);
89 	}
90 	else {
91 		ucl_create_err(err, "error while parsing %s: at the end of chunk: %s",
92 					   filename, str);
93 	}
94 
95 	parser->err_code = code;
96 	parser->state = UCL_STATE_ERROR;
97 }
98 
99 static void
ucl_save_comment(struct ucl_parser * parser,const char * begin,size_t len)100 ucl_save_comment(struct ucl_parser *parser, const char *begin, size_t len)
101 {
102 	ucl_object_t *nobj;
103 
104 	if (len > 0 && begin != NULL) {
105 		nobj = ucl_object_fromstring_common(begin, len, 0);
106 
107 		if (parser->last_comment) {
108 			/* We need to append data to an existing object */
109 			DL_APPEND(parser->last_comment, nobj);
110 		}
111 		else {
112 			parser->last_comment = nobj;
113 		}
114 	}
115 }
116 
117 static void
ucl_attach_comment(struct ucl_parser * parser,ucl_object_t * obj,bool before)118 ucl_attach_comment(struct ucl_parser *parser, ucl_object_t *obj, bool before)
119 {
120 	if (parser->last_comment) {
121 		ucl_object_insert_key(parser->comments, parser->last_comment,
122 							  (const char *) &obj, sizeof(void *), true);
123 
124 		if (before) {
125 			parser->last_comment->flags |= UCL_OBJECT_INHERITED;
126 		}
127 
128 		parser->last_comment = NULL;
129 	}
130 }
131 
132 /**
133  * Skip all comments from the current pos resolving nested and multiline comments
134  * @param parser
135  * @return
136  */
137 static bool
ucl_skip_comments(struct ucl_parser * parser)138 ucl_skip_comments(struct ucl_parser *parser)
139 {
140 	struct ucl_chunk *chunk = parser->chunks;
141 	const unsigned char *p, *beg = NULL;
142 	int comments_nested = 0;
143 	bool quoted = false;
144 
145 	p = chunk->pos;
146 
147 start:
148 	if (chunk->remain > 0 && *p == '#') {
149 		if (parser->state != UCL_STATE_SCOMMENT &&
150 			parser->state != UCL_STATE_MCOMMENT) {
151 			beg = p;
152 
153 			while (p < chunk->end) {
154 				if (*p == '\n') {
155 					if (parser->flags & UCL_PARSER_SAVE_COMMENTS) {
156 						ucl_save_comment(parser, beg, p - beg);
157 						beg = NULL;
158 					}
159 
160 					ucl_chunk_skipc(chunk, p);
161 
162 					goto start;
163 				}
164 				ucl_chunk_skipc(chunk, p);
165 			}
166 		}
167 	}
168 	else if (chunk->remain >= 2 && *p == '/' && p[1] == '*') {
169 		beg = p;
170 		comments_nested ++;
171 		ucl_chunk_skipc (chunk, p);
172 		ucl_chunk_skipc (chunk, p);
173 		while (p < chunk->end) {
174 			if (*p == '"' && *(p - 1) != '\\') {
175 				/* begin or end double-quoted string */
176 				quoted = !quoted;
177 				ucl_chunk_skipc (chunk, p);
178 			}
179 			else if (quoted) {
180 				/* quoted character */
181 				ucl_chunk_skipc (chunk, p);
182 			}
183 			else if (chunk->remain >= 2 && *p == '*' && p[1] == '/') {
184 				/* end of comment */
185 				ucl_chunk_skipc (chunk, p);
186 				ucl_chunk_skipc (chunk, p);
187 				comments_nested --;
188 				if (comments_nested == 0) {
189 					if (parser->flags & UCL_PARSER_SAVE_COMMENTS) {
190 						ucl_save_comment (parser, beg, p - beg + 1);
191 						beg = NULL;
192 					}
193 					goto start;
194 				}
195 			}
196 			else if (chunk->remain >= 2 && *p == '/' && p[1] == '*') {
197 				/* start of nested comment */
198 				comments_nested ++;
199 				ucl_chunk_skipc (chunk, p);
200 				ucl_chunk_skipc (chunk, p);
201 			}
202 			else {
203 				/* anything else */
204 				ucl_chunk_skipc (chunk, p);
205 			}
206 		}
207 		if (comments_nested != 0) {
208 			ucl_set_err(parser, UCL_ENESTED,
209 						"unfinished multiline comment", &parser->err);
210 			return false;
211 		}
212 	}
213 
214 	if (beg && p > beg && (parser->flags & UCL_PARSER_SAVE_COMMENTS)) {
215 		ucl_save_comment(parser, beg, p - beg);
216 	}
217 
218 	return true;
219 }
220 
221 /**
222  * Return multiplier for a character
223  * @param c multiplier character
224  * @param is_bytes if true use 1024 multiplier
225  * @return multiplier
226  */
227 static inline unsigned long
ucl_lex_num_multiplier(const unsigned char c,bool is_bytes)228 ucl_lex_num_multiplier(const unsigned char c, bool is_bytes)
229 {
230 	const struct {
231 		char c;
232 		long mult_normal;
233 		long mult_bytes;
234 	} multipliers[] = {
235 		{'m', 1000 * 1000, 1024 * 1024},
236 		{'k', 1000, 1024},
237 		{'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}};
238 	int i;
239 
240 	for (i = 0; i < 3; i++) {
241 		if (tolower(c) == multipliers[i].c) {
242 			if (is_bytes) {
243 				return multipliers[i].mult_bytes;
244 			}
245 			return multipliers[i].mult_normal;
246 		}
247 	}
248 
249 	return 1;
250 }
251 
252 
253 /**
254  * Return multiplier for time scaling
255  * @param c
256  * @return
257  */
258 static inline double
ucl_lex_time_multiplier(const unsigned char c)259 ucl_lex_time_multiplier(const unsigned char c)
260 {
261 	const struct {
262 		char c;
263 		double mult;
264 	} multipliers[] = {
265 		{'m', 60},
266 		{'h', 60 * 60},
267 		{'d', 60 * 60 * 24},
268 		{'w', 60 * 60 * 24 * 7},
269 		{'y', 60 * 60 * 24 * 365}};
270 	int i;
271 
272 	for (i = 0; i < 5; i++) {
273 		if (tolower(c) == multipliers[i].c) {
274 			return multipliers[i].mult;
275 		}
276 	}
277 
278 	return 1;
279 }
280 
281 /**
282  * Return true if a character is a end of an atom
283  * @param c
284  * @return
285  */
286 static inline bool
ucl_lex_is_atom_end(const unsigned char c)287 ucl_lex_is_atom_end(const unsigned char c)
288 {
289 	return ucl_test_character(c, UCL_CHARACTER_VALUE_END);
290 }
291 
292 static inline bool
ucl_lex_is_comment(const unsigned char c1,const unsigned char c2)293 ucl_lex_is_comment(const unsigned char c1, const unsigned char c2)
294 {
295 	if (c1 == '/') {
296 		if (c2 == '*') {
297 			return true;
298 		}
299 	}
300 	else if (c1 == '#') {
301 		return true;
302 	}
303 	return false;
304 }
305 
306 /**
307  * Check variable found
308  * @param parser
309  * @param ptr
310  * @param remain
311  * @param out_len
312  * @param strict
313  * @param found
314  * @return
315  */
316 static inline const char *
ucl_check_variable_safe(struct ucl_parser * parser,const char * ptr,size_t remain,size_t * out_len,bool strict,bool * found)317 ucl_check_variable_safe(struct ucl_parser *parser, const char *ptr, size_t remain,
318 						size_t *out_len, bool strict, bool *found)
319 {
320 	struct ucl_variable *var;
321 	unsigned char *dst;
322 	size_t dstlen;
323 	bool need_free = false;
324 
325 	LL_FOREACH(parser->variables, var)
326 	{
327 		if (strict) {
328 			if (remain == var->var_len) {
329 				if (memcmp(ptr, var->var, var->var_len) == 0) {
330 					*out_len += var->value_len;
331 					*found = true;
332 					return (ptr + var->var_len);
333 				}
334 			}
335 		}
336 		else {
337 			if (remain >= var->var_len) {
338 				if (memcmp(ptr, var->var, var->var_len) == 0) {
339 					*out_len += var->value_len;
340 					*found = true;
341 					return (ptr + var->var_len);
342 				}
343 			}
344 		}
345 	}
346 
347 	/* XXX: can only handle ${VAR} */
348 	if (!(*found) && parser->var_handler != NULL && strict) {
349 		/* Call generic handler */
350 		if (parser->var_handler(ptr, remain, &dst, &dstlen, &need_free,
351 								parser->var_data)) {
352 			*found = true;
353 			*out_len = dstlen;
354 
355 			if (need_free) {
356 				free(dst);
357 			}
358 			return (ptr + remain);
359 		}
360 	}
361 
362 	return ptr;
363 }
364 
365 /**
366  * Check for a variable in a given string
367  * @param parser
368  * @param ptr
369  * @param remain
370  * @param out_len
371  * @param vars_found
372  * @return
373  */
374 static const char *
ucl_check_variable(struct ucl_parser * parser,const char * ptr,size_t remain,size_t * out_len,bool * vars_found)375 ucl_check_variable(struct ucl_parser *parser, const char *ptr,
376 				   size_t remain, size_t *out_len, bool *vars_found)
377 {
378 	const char *p, *end, *ret = ptr;
379 	bool found = false;
380 
381 	if (*ptr == '{') {
382 		/* We need to match the variable enclosed in braces */
383 		p = ptr + 1;
384 		end = ptr + remain;
385 		while (p < end) {
386 			if (*p == '}') {
387 				ret = ucl_check_variable_safe(parser, ptr + 1, p - ptr - 1,
388 											  out_len, true, &found);
389 				if (found) {
390 					/* {} must be excluded actually */
391 					ret++;
392 					if (!*vars_found) {
393 						*vars_found = true;
394 					}
395 				}
396 				else {
397 					*out_len += 2;
398 				}
399 				break;
400 			}
401 			p++;
402 		}
403 		if (p == end) {
404 			(*out_len)++;
405 		}
406 	}
407 	else if (*ptr != '$') {
408 		/* Not count escaped dollar sign */
409 		ret = ucl_check_variable_safe(parser, ptr, remain, out_len, false, &found);
410 		if (found && !*vars_found) {
411 			*vars_found = true;
412 		}
413 		if (!found) {
414 			(*out_len)++;
415 		}
416 	}
417 	else {
418 		ret++;
419 		(*out_len)++;
420 	}
421 
422 	return ret;
423 }
424 
425 /**
426  * Expand a single variable
427  * @param parser
428  * @param ptr
429  * @param in_len
430  * @param dest
431  * @param out_len
432  * @return
433  */
434 static const char *
ucl_expand_single_variable(struct ucl_parser * parser,const char * ptr,size_t in_len,unsigned char ** dest,size_t out_len)435 ucl_expand_single_variable(struct ucl_parser *parser, const char *ptr,
436 						   size_t in_len, unsigned char **dest, size_t out_len)
437 {
438 	unsigned char *d = *dest, *dst;
439 	const char *p = ptr + 1, *ret;
440 	struct ucl_variable *var;
441 	size_t dstlen;
442 	bool need_free = false;
443 	bool found = false;
444 	bool strict = false;
445 
446 	ret = ptr + 1;
447 	/* For the $ sign */
448 	in_len--;
449 
450 	if (*p == '$') {
451 		*d++ = *p++;
452 		*dest = d;
453 		return p;
454 	}
455 	else if (*p == '{') {
456 		p++;
457 		in_len--;
458 		strict = true;
459 		ret += 2;
460 	}
461 
462 	LL_FOREACH(parser->variables, var)
463 	{
464 		if (out_len >= var->value_len && in_len >= (var->var_len + (strict ? 1 : 0))) {
465 			if (memcmp(p, var->var, var->var_len) == 0) {
466 				if (!strict || p[var->var_len] == '}') {
467 					memcpy(d, var->value, var->value_len);
468 					ret += var->var_len;
469 					d += var->value_len;
470 					found = true;
471 					break;
472 				}
473 			}
474 		}
475 	}
476 
477 	if (!found) {
478 		if (strict && parser->var_handler != NULL) {
479 			dstlen = out_len;
480 
481 			if (parser->var_handler(p, in_len, &dst, &dstlen, &need_free,
482 									parser->var_data)) {
483 				if (dstlen > out_len) {
484 					/* We do not have enough space! */
485 					if (need_free) {
486 						free(dst);
487 					}
488 				}
489 				else {
490 					memcpy(d, dst, dstlen);
491 					ret += in_len;
492 					d += dstlen;
493 					found = true;
494 
495 					if (need_free) {
496 						free(dst);
497 					}
498 				}
499 			}
500 		}
501 
502 		/* Leave variable as is, in this case we use dest */
503 		if (!found) {
504 			if (strict && out_len >= 2) {
505 				/* Copy '${' */
506 				memcpy(d, ptr, 2);
507 				d += 2;
508 				ret--;
509 			}
510 			else {
511 				memcpy(d, ptr, 1);
512 				d++;
513 			}
514 		}
515 	}
516 
517 	*dest = d;
518 	return ret;
519 }
520 
521 /**
522  * Expand variables in string
523  * @param parser
524  * @param dst
525  * @param src
526  * @param in_len
527  * @return
528  */
529 static ssize_t
ucl_expand_variable(struct ucl_parser * parser,unsigned char ** dst,const char * src,size_t in_len)530 ucl_expand_variable(struct ucl_parser *parser, unsigned char **dst,
531 					const char *src, size_t in_len)
532 {
533 	const char *p, *end = src + in_len;
534 	unsigned char *d, *d_end;
535 	size_t out_len = 0;
536 	bool vars_found = false;
537 
538 	if (parser->flags & UCL_PARSER_DISABLE_MACRO) {
539 		*dst = NULL;
540 		return in_len;
541 	}
542 
543 	p = src;
544 	while (p != end) {
545 		if (*p == '$' && p + 1 != end) {
546 			p = ucl_check_variable(parser, p + 1, end - p - 1, &out_len, &vars_found);
547 		}
548 		else {
549 			p++;
550 			out_len++;
551 		}
552 	}
553 
554 	if (!vars_found) {
555 		/* Trivial case */
556 		*dst = NULL;
557 		return in_len;
558 	}
559 
560 	*dst = UCL_ALLOC(out_len + 1);
561 	if (*dst == NULL) {
562 		return in_len;
563 	}
564 
565 	d = *dst;
566 	d_end = d + out_len;
567 	p = src;
568 	while (p != end && d != d_end) {
569 		if (*p == '$' && p + 1 != end) {
570 			p = ucl_expand_single_variable(parser, p, end - p, &d, d_end - d);
571 		}
572 		else {
573 			*d++ = *p++;
574 		}
575 	}
576 
577 	*d = '\0';
578 
579 	return out_len;
580 }
581 
582 /**
583  * Store or copy pointer to the trash stack
584  * @param parser parser object
585  * @param src src string
586  * @param dst destination buffer (trash stack pointer)
587  * @param dst_const const destination pointer (e.g. value of object)
588  * @param in_len input length
589  * @param need_unescape need to unescape source (and copy it)
590  * @param need_lowercase need to lowercase value (and copy)
591  * @param need_expand need to expand variables (and copy as well)
592  * @param unescape_squote unescape single quoted string
593  * @return output length (excluding \0 symbol)
594  */
595 static inline ssize_t
ucl_copy_or_store_ptr(struct ucl_parser * parser,const unsigned char * src,unsigned char ** dst,const char ** dst_const,size_t in_len,bool need_unescape,bool need_lowercase,bool need_expand,bool unescape_squote)596 ucl_copy_or_store_ptr(struct ucl_parser *parser,
597 					  const unsigned char *src, unsigned char **dst,
598 					  const char **dst_const, size_t in_len,
599 					  bool need_unescape, bool need_lowercase, bool need_expand,
600 					  bool unescape_squote)
601 {
602 	ssize_t ret = -1, tret;
603 	unsigned char *tmp;
604 
605 	if (need_unescape || need_lowercase ||
606 		(need_expand && parser->variables != NULL) ||
607 		!(parser->flags & UCL_PARSER_ZEROCOPY)) {
608 		/* Copy string */
609 		*dst = UCL_ALLOC(in_len + 1);
610 		if (*dst == NULL) {
611 			ucl_set_err(parser, UCL_EINTERNAL, "cannot allocate memory for a string",
612 						&parser->err);
613 			return false;
614 		}
615 		if (need_lowercase) {
616 			ret = ucl_strlcpy_tolower(*dst, src, in_len + 1);
617 		}
618 		else {
619 			ret = ucl_strlcpy_unsafe(*dst, src, in_len + 1);
620 		}
621 
622 		if (need_unescape) {
623 			if (!unescape_squote) {
624 				ret = ucl_unescape_json_string(*dst, ret);
625 			}
626 			else {
627 				ret = ucl_unescape_squoted_string(*dst, ret);
628 			}
629 		}
630 
631 		if (need_expand) {
632 			tmp = *dst;
633 			tret = ret;
634 			ret = ucl_expand_variable(parser, dst, tmp, ret);
635 			if (*dst == NULL) {
636 				/* Nothing to expand */
637 				*dst = tmp;
638 				ret = tret;
639 			}
640 			else {
641 				/* Free unexpanded value */
642 				UCL_FREE(in_len + 1, tmp);
643 			}
644 		}
645 		*dst_const = *dst;
646 	}
647 	else {
648 		*dst_const = src;
649 		ret = in_len;
650 	}
651 
652 	return ret;
653 }
654 
655 /**
656  * Create and append an object at the specified level
657  * @param parser
658  * @param is_array
659  * @param level
660  * @return
661  */
662 static inline ucl_object_t *
ucl_parser_add_container(ucl_object_t * obj,struct ucl_parser * parser,bool is_array,uint32_t level,bool has_obrace)663 ucl_parser_add_container(ucl_object_t *obj, struct ucl_parser *parser,
664 						 bool is_array, uint32_t level, bool has_obrace)
665 {
666 	struct ucl_stack *st;
667 	ucl_object_t *nobj;
668 
669 	if (obj == NULL) {
670 		nobj = ucl_object_new_full(is_array ? UCL_ARRAY : UCL_OBJECT, parser->chunks->priority);
671 		if (nobj == NULL) {
672 			goto enomem0;
673 		}
674 	}
675 	else {
676 		if (obj->type == (is_array ? UCL_OBJECT : UCL_ARRAY)) {
677 			/* Bad combination for merge: array and object */
678 			ucl_set_err(parser, UCL_EMERGE,
679 						"cannot merge an object with an array",
680 						&parser->err);
681 
682 			return NULL;
683 		}
684 		nobj = obj;
685 		nobj->type = is_array ? UCL_ARRAY : UCL_OBJECT;
686 	}
687 
688 	if (!is_array) {
689 		if (nobj->value.ov == NULL) {
690 			nobj->value.ov = ucl_hash_create(parser->flags & UCL_PARSER_KEY_LOWERCASE);
691 			if (nobj->value.ov == NULL) {
692 				goto enomem1;
693 			}
694 		}
695 		parser->state = UCL_STATE_KEY;
696 	}
697 	else {
698 		parser->state = UCL_STATE_VALUE;
699 	}
700 
701 	st = UCL_ALLOC(sizeof(struct ucl_stack));
702 
703 	if (st == NULL) {
704 		goto enomem1;
705 	}
706 
707 	st->obj = nobj;
708 
709 	if (level >= UINT16_MAX) {
710 		ucl_set_err(parser, UCL_ENESTED,
711 					"objects are nesting too deep (over 65535 limit)",
712 					&parser->err);
713 		if (nobj != obj) {
714 			ucl_object_unref(obj);
715 		}
716 
717 		UCL_FREE(sizeof(struct ucl_stack), st);
718 
719 		return NULL;
720 	}
721 
722 
723 	st->e.params.level = level;
724 	st->e.params.line = parser->chunks->line;
725 	st->chunk = parser->chunks;
726 
727 	if (has_obrace) {
728 		st->e.params.flags = UCL_STACK_HAS_OBRACE;
729 	}
730 	else {
731 		st->e.params.flags = 0;
732 	}
733 
734 	LL_PREPEND(parser->stack, st);
735 	parser->cur_obj = nobj;
736 
737 	return nobj;
738 enomem1:
739 	if (nobj != obj)
740 		ucl_object_unref(nobj);
741 enomem0:
742 	ucl_set_err(parser, UCL_EINTERNAL, "cannot allocate memory for an object",
743 				&parser->err);
744 	return NULL;
745 }
746 
ucl_maybe_parse_number(ucl_object_t * obj,const char * start,const char * end,const char ** pos,bool allow_double,bool number_bytes,bool allow_time)747 int ucl_maybe_parse_number(ucl_object_t *obj,
748 						   const char *start, const char *end, const char **pos,
749 						   bool allow_double, bool number_bytes, bool allow_time)
750 {
751 	const char *p = start, *c = start;
752 	char *endptr;
753 	bool got_dot = false, got_exp = false, need_double = false,
754 		 is_time = false, valid_start = false, is_hex = false;
755 	int is_neg = 0;
756 	double dv = 0;
757 	int64_t lv = 0;
758 
759 	if (*p == '-') {
760 		is_neg = 1;
761 		c++;
762 		p++;
763 	}
764 	while (p < end) {
765 		if (is_hex && isxdigit(*p)) {
766 			p++;
767 		}
768 		else if (isdigit(*p)) {
769 			valid_start = true;
770 			p++;
771 		}
772 		else if (!is_hex && (*p == 'x' || *p == 'X')) {
773 			is_hex = true;
774 			allow_double = false;
775 			c = p + 1;
776 			p++;
777 		}
778 		else if (allow_double) {
779 			if (p == c) {
780 				/* Empty digits sequence, not a number */
781 				*pos = start;
782 				return EINVAL;
783 			}
784 			else if (*p == '.') {
785 				if (got_dot) {
786 					/* Double dots, not a number */
787 					*pos = start;
788 					return EINVAL;
789 				}
790 				else {
791 					got_dot = true;
792 					need_double = true;
793 					p++;
794 				}
795 			}
796 			else if (*p == 'e' || *p == 'E') {
797 				if (got_exp) {
798 					/* Double exp, not a number */
799 					*pos = start;
800 					return EINVAL;
801 				}
802 				else {
803 					got_exp = true;
804 					need_double = true;
805 					p++;
806 					if (p >= end) {
807 						*pos = start;
808 						return EINVAL;
809 					}
810 					if (!isdigit(*p) && *p != '+' && *p != '-') {
811 						/* Wrong exponent sign */
812 						*pos = start;
813 						return EINVAL;
814 					}
815 					else {
816 						p++;
817 					}
818 				}
819 			}
820 			else {
821 				/* Got the end of the number, need to check */
822 				break;
823 			}
824 		}
825 		else if (!allow_double && *p == '.') {
826 			/* Unexpected dot */
827 			*pos = start;
828 			return EINVAL;
829 		}
830 		else {
831 			break;
832 		}
833 	}
834 
835 	if (!valid_start || p == c) {
836 		*pos = start;
837 		return EINVAL;
838 	}
839 
840 	char numbuf[128];
841 
842 	if ((size_t) (p - c + 1) >= sizeof(numbuf)) {
843 		*pos = start;
844 		return EINVAL;
845 	}
846 
847 	if (is_neg) {
848 		numbuf[0] = '-';
849 		ucl_strlcpy(&numbuf[1], c, p - c + 1);
850 	}
851 	else {
852 		ucl_strlcpy(numbuf, c, p - c + 1);
853 	}
854 
855 	errno = 0;
856 	if (need_double) {
857 		dv = strtod(numbuf, &endptr);
858 	}
859 	else {
860 		if (is_hex) {
861 			lv = strtoimax(numbuf, &endptr, 16);
862 		}
863 		else {
864 			lv = strtoimax(numbuf, &endptr, 10);
865 		}
866 	}
867 	if (errno == ERANGE) {
868 		*pos = start;
869 		return ERANGE;
870 	}
871 
872 	/* Now check endptr and move it from numbuf to the real ending */
873 	if (endptr != NULL) {
874 		long shift = endptr - numbuf - is_neg;
875 		endptr = (char *) c + shift;
876 	}
877 	if (endptr >= end) {
878 		p = end;
879 		goto set_obj;
880 	}
881 	if (endptr == NULL || ucl_lex_is_atom_end(*endptr) || *endptr == '\0') {
882 		p = endptr;
883 		goto set_obj;
884 	}
885 
886 	if (endptr < end && endptr != start) {
887 		p = endptr;
888 		switch (*p) {
889 		case 'm':
890 		case 'M':
891 		case 'g':
892 		case 'G':
893 		case 'k':
894 		case 'K':
895 			if (end - p >= 2) {
896 				if (p[1] == 's' || p[1] == 'S') {
897 					/* Milliseconds */
898 					if (!need_double) {
899 						need_double = true;
900 						dv = lv;
901 					}
902 					is_time = true;
903 					if (p[0] == 'm' || p[0] == 'M') {
904 						dv /= 1000.;
905 					}
906 					else {
907 						dv *= ucl_lex_num_multiplier(*p, false);
908 					}
909 					p += 2;
910 					if (end - p > 0 && !ucl_lex_is_atom_end(*p)) {
911 						*pos = start;
912 						return EINVAL;
913 					}
914 					goto set_obj;
915 				}
916 				else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
917 					/* Bytes */
918 					if (need_double) {
919 						need_double = false;
920 						lv = dv;
921 					}
922 					lv *= ucl_lex_num_multiplier(*p, true);
923 					p += 2;
924 					if (end - p > 0 && !ucl_lex_is_atom_end(*p)) {
925 						*pos = start;
926 						return EINVAL;
927 					}
928 					goto set_obj;
929 				}
930 				else if (ucl_lex_is_atom_end(p[1])) {
931 					if (need_double) {
932 						dv *= ucl_lex_num_multiplier(*p, false);
933 					}
934 					else {
935 						lv *= ucl_lex_num_multiplier(*p, number_bytes);
936 					}
937 					p++;
938 					goto set_obj;
939 				}
940 				else if (allow_time && end - p >= 3) {
941 					if (tolower(p[0]) == 'm' &&
942 						tolower(p[1]) == 'i' &&
943 						tolower(p[2]) == 'n') {
944 						/* Minutes */
945 						if (!need_double) {
946 							need_double = true;
947 							dv = lv;
948 						}
949 						is_time = true;
950 						dv *= 60.;
951 						p += 3;
952 						if (end - p > 0 && !ucl_lex_is_atom_end(*p)) {
953 							*pos = start;
954 							return EINVAL;
955 						}
956 						goto set_obj;
957 					}
958 				}
959 			}
960 			else {
961 				if (need_double) {
962 					dv *= ucl_lex_num_multiplier(*p, false);
963 				}
964 				else {
965 					lv *= ucl_lex_num_multiplier(*p, number_bytes);
966 				}
967 				p++;
968 				if (end - p > 0 && !ucl_lex_is_atom_end(*p)) {
969 					*pos = start;
970 					return EINVAL;
971 				}
972 				goto set_obj;
973 			}
974 			break;
975 		case 'S':
976 		case 's':
977 			if (allow_time &&
978 				(p == end - 1 || ucl_lex_is_atom_end(p[1]))) {
979 				if (!need_double) {
980 					need_double = true;
981 					dv = lv;
982 				}
983 				p++;
984 				is_time = true;
985 				goto set_obj;
986 			}
987 			break;
988 		case 'h':
989 		case 'H':
990 		case 'd':
991 		case 'D':
992 		case 'w':
993 		case 'W':
994 		case 'Y':
995 		case 'y':
996 			if (allow_time &&
997 				(p == end - 1 || ucl_lex_is_atom_end(p[1]))) {
998 				if (!need_double) {
999 					need_double = true;
1000 					dv = lv;
1001 				}
1002 				is_time = true;
1003 				dv *= ucl_lex_time_multiplier(*p);
1004 				p++;
1005 				goto set_obj;
1006 			}
1007 			break;
1008 		case '\t':
1009 		case ' ':
1010 			while (p < end && ucl_test_character(*p, UCL_CHARACTER_WHITESPACE)) {
1011 				p++;
1012 			}
1013 			if (p == end || ucl_lex_is_atom_end(*p))
1014 				goto set_obj;
1015 			break;
1016 		}
1017 	}
1018 	else if (endptr == end) {
1019 		/* Just a number at the end of chunk */
1020 		p = end;
1021 		goto set_obj;
1022 	}
1023 
1024 	*pos = c;
1025 	return EINVAL;
1026 
1027 set_obj:
1028 	if (obj != NULL) {
1029 		if (allow_double && (need_double || is_time)) {
1030 			if (!is_time) {
1031 				obj->type = UCL_FLOAT;
1032 			}
1033 			else {
1034 				obj->type = UCL_TIME;
1035 			}
1036 			obj->value.dv = dv;
1037 		}
1038 		else {
1039 			obj->type = UCL_INT;
1040 			obj->value.iv = lv;
1041 		}
1042 	}
1043 	*pos = p;
1044 	return 0;
1045 }
1046 
1047 /**
1048  * Parse possible number
1049  * @param parser
1050  * @param chunk
1051  * @param obj
1052  * @return true if a number has been parsed
1053  */
1054 static bool
ucl_lex_number(struct ucl_parser * parser,struct ucl_chunk * chunk,ucl_object_t * obj)1055 ucl_lex_number(struct ucl_parser *parser,
1056 			   struct ucl_chunk *chunk, ucl_object_t *obj)
1057 {
1058 	const unsigned char *pos;
1059 	int ret;
1060 
1061 	ret = ucl_maybe_parse_number(obj, chunk->pos, chunk->end, (const char **) &pos,
1062 								 true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
1063 
1064 	if (ret == 0) {
1065 		chunk->remain -= pos - chunk->pos;
1066 		chunk->column += pos - chunk->pos;
1067 		chunk->pos = pos;
1068 		return true;
1069 	}
1070 	else if (ret == ERANGE) {
1071 		ucl_set_err(parser, UCL_ESYNTAX, "numeric value out of range",
1072 					&parser->err);
1073 	}
1074 
1075 	return false;
1076 }
1077 
1078 /**
1079  * Parse quoted string with possible escapes
1080  * @param parser
1081  * @param chunk
1082  * @param need_unescape
1083  * @param ucl_escape
1084  * @param var_expand
1085  * @return true if a string has been parsed
1086  */
1087 static bool
ucl_lex_json_string(struct ucl_parser * parser,struct ucl_chunk * chunk,bool * need_unescape,bool * ucl_escape,bool * var_expand)1088 ucl_lex_json_string(struct ucl_parser *parser,
1089 					struct ucl_chunk *chunk,
1090 					bool *need_unescape,
1091 					bool *ucl_escape,
1092 					bool *var_expand)
1093 {
1094 	const unsigned char *p = chunk->pos;
1095 	unsigned char c;
1096 	int i;
1097 
1098 	while (p < chunk->end) {
1099 		c = *p;
1100 		if (c < 0x1F) {
1101 			/* Unmasked control character */
1102 			if (c == '\n') {
1103 				ucl_set_err(parser, UCL_ESYNTAX, "unexpected newline",
1104 							&parser->err);
1105 			}
1106 			else {
1107 				ucl_set_err(parser, UCL_ESYNTAX, "unexpected control character",
1108 							&parser->err);
1109 			}
1110 			return false;
1111 		}
1112 		else if (c == '\\') {
1113 			ucl_chunk_skipc(chunk, p);
1114 			if (p >= chunk->end) {
1115 				ucl_set_err(parser, UCL_ESYNTAX, "unfinished escape character",
1116 							&parser->err);
1117 				return false;
1118 			}
1119 			c = *p;
1120 			if (ucl_test_character(c, UCL_CHARACTER_ESCAPE)) {
1121 				if (c == 'u') {
1122 					ucl_chunk_skipc(chunk, p);
1123 					for (i = 0; i < 4 && p < chunk->end; i++) {
1124 						if (!isxdigit(*p)) {
1125 							ucl_set_err(parser, UCL_ESYNTAX, "invalid utf escape",
1126 										&parser->err);
1127 							return false;
1128 						}
1129 						ucl_chunk_skipc(chunk, p);
1130 					}
1131 					if (p >= chunk->end) {
1132 						ucl_set_err(parser, UCL_ESYNTAX,
1133 									"unfinished escape character",
1134 									&parser->err);
1135 						return false;
1136 					}
1137 				}
1138 				else {
1139 					ucl_chunk_skipc(chunk, p);
1140 				}
1141 			}
1142 			*need_unescape = true;
1143 			*ucl_escape = true;
1144 			continue;
1145 		}
1146 		else if (c == '"') {
1147 			ucl_chunk_skipc(chunk, p);
1148 			return true;
1149 		}
1150 		else if (ucl_test_character(c, UCL_CHARACTER_UCL_UNSAFE)) {
1151 			*ucl_escape = true;
1152 		}
1153 		else if (c == '$') {
1154 			*var_expand = true;
1155 		}
1156 		ucl_chunk_skipc(chunk, p);
1157 	}
1158 
1159 	ucl_set_err(parser, UCL_ESYNTAX,
1160 				"no quote at the end of json string",
1161 				&parser->err);
1162 	return false;
1163 }
1164 
1165 /**
1166  * Process single quoted string
1167  * @param parser
1168  * @param chunk
1169  * @param need_unescape
1170  * @return
1171  */
1172 static bool
ucl_lex_squoted_string(struct ucl_parser * parser,struct ucl_chunk * chunk,bool * need_unescape)1173 ucl_lex_squoted_string(struct ucl_parser *parser,
1174 					   struct ucl_chunk *chunk, bool *need_unescape)
1175 {
1176 	const unsigned char *p = chunk->pos;
1177 	unsigned char c;
1178 
1179 	while (p < chunk->end) {
1180 		c = *p;
1181 		if (c == '\\') {
1182 			ucl_chunk_skipc(chunk, p);
1183 
1184 			if (p >= chunk->end) {
1185 				ucl_set_err(parser, UCL_ESYNTAX,
1186 							"unfinished escape character",
1187 							&parser->err);
1188 				return false;
1189 			}
1190 			else {
1191 				ucl_chunk_skipc(chunk, p);
1192 			}
1193 
1194 			*need_unescape = true;
1195 			continue;
1196 		}
1197 		else if (c == '\'') {
1198 			ucl_chunk_skipc(chunk, p);
1199 			return true;
1200 		}
1201 
1202 		ucl_chunk_skipc(chunk, p);
1203 	}
1204 
1205 	ucl_set_err(parser, UCL_ESYNTAX,
1206 				"no quote at the end of single quoted string",
1207 				&parser->err);
1208 	return false;
1209 }
1210 
1211 static void
ucl_parser_append_elt(struct ucl_parser * parser,ucl_hash_t * cont,ucl_object_t * top,ucl_object_t * elt)1212 ucl_parser_append_elt(struct ucl_parser *parser, ucl_hash_t *cont,
1213 					  ucl_object_t *top,
1214 					  ucl_object_t *elt)
1215 {
1216 	ucl_object_t *nobj;
1217 
1218 	if ((parser->flags & UCL_PARSER_NO_IMPLICIT_ARRAYS) == 0) {
1219 		/* Implicit array */
1220 		top->flags |= UCL_OBJECT_MULTIVALUE;
1221 		DL_APPEND(top, elt);
1222 		parser->stack->obj->len++;
1223 	}
1224 	else {
1225 		if ((top->flags & UCL_OBJECT_MULTIVALUE) != 0) {
1226 			/* Just add to the explicit array */
1227 			ucl_array_append(top, elt);
1228 		}
1229 		else {
1230 			/* Convert to an array */
1231 			nobj = ucl_object_typed_new(UCL_ARRAY);
1232 			nobj->key = top->key;
1233 			nobj->keylen = top->keylen;
1234 			nobj->flags |= UCL_OBJECT_MULTIVALUE;
1235 			ucl_array_append(nobj, top);
1236 			ucl_array_append(nobj, elt);
1237 			ucl_hash_replace(cont, top, nobj);
1238 		}
1239 	}
1240 }
1241 
ucl_parser_process_object_element(struct ucl_parser * parser,ucl_object_t * nobj)1242 bool ucl_parser_process_object_element(struct ucl_parser *parser, ucl_object_t *nobj)
1243 {
1244 	ucl_hash_t *container;
1245 	ucl_object_t *tobj = NULL, *cur;
1246 	char errmsg[256];
1247 
1248 	container = parser->stack->obj->value.ov;
1249 
1250 	DL_FOREACH(parser->stack->obj, cur)
1251 	{
1252 		if (cur->type == UCL_OBJECT) {
1253 			tobj = __DECONST(ucl_object_t *, ucl_hash_search_obj(cur->value.ov, nobj));
1254 
1255 			if (tobj != NULL) {
1256 				/*
1257 				 * Check if we have found an object in the same container.
1258 				 * If not, we should probably ignore it as we cannot replace it
1259 				 * effectively and we definitely should not unref it.
1260 				 */
1261 				if (cur->value.ov != container) {
1262 					tobj = NULL;
1263 					continue;
1264 				}
1265 				break;
1266 			}
1267 		}
1268 	}
1269 
1270 
1271 	if (tobj == NULL) {
1272 		container = ucl_hash_insert_object(container, nobj,
1273 										   parser->flags & UCL_PARSER_KEY_LOWERCASE);
1274 		if (container == NULL) {
1275 			return false;
1276 		}
1277 		nobj->prev = nobj;
1278 		nobj->next = NULL;
1279 		parser->stack->obj->len++;
1280 	}
1281 	else {
1282 		unsigned priold = ucl_object_get_priority(tobj),
1283 				 prinew = ucl_object_get_priority(nobj);
1284 		switch (parser->chunks->strategy) {
1285 
1286 		case UCL_DUPLICATE_APPEND:
1287 			/*
1288 			 * The logic here is the following:
1289 			 *
1290 			 * - if we have two objects with the same priority, then we form an
1291 			 * implicit or explicit array
1292 			 * - if a new object has bigger priority, then we overwrite an old one
1293 			 * - if a new object has lower priority, then we ignore it
1294 			 */
1295 			/* Special case for inherited objects */
1296 			if (tobj->flags & UCL_OBJECT_INHERITED) {
1297 				prinew = priold + 1;
1298 			}
1299 
1300 			if (priold == prinew) {
1301 				ucl_parser_append_elt(parser, container, tobj, nobj);
1302 			}
1303 			else if (priold > prinew) {
1304 				/*
1305 				 * We add this new object to a list of trash objects just to ensure
1306 				 * that it won't come to any real object
1307 				 * XXX: rather inefficient approach
1308 				 */
1309 				DL_APPEND(parser->trash_objs, nobj);
1310 			}
1311 			else {
1312 				ucl_hash_replace(container, tobj, nobj);
1313 				ucl_object_unref(tobj);
1314 			}
1315 
1316 			break;
1317 
1318 		case UCL_DUPLICATE_REWRITE:
1319 			/* We just rewrite old values regardless of priority */
1320 			ucl_hash_replace(container, tobj, nobj);
1321 			ucl_object_unref(tobj);
1322 
1323 			break;
1324 
1325 		case UCL_DUPLICATE_ERROR:
1326 			snprintf(errmsg, sizeof(errmsg),
1327 					 "duplicate element for key '%s' found",
1328 					 nobj->key);
1329 			ucl_set_err(parser, UCL_EMERGE, errmsg, &parser->err);
1330 			return false;
1331 
1332 		case UCL_DUPLICATE_MERGE:
1333 			/*
1334 			 * Here we do have some old object so we just push it on top of objects stack
1335 			 * Check priority and then perform the merge on the remaining objects
1336 			 */
1337 			if (tobj->type == UCL_OBJECT || tobj->type == UCL_ARRAY) {
1338 				ucl_object_unref(nobj);
1339 				nobj = tobj;
1340 			}
1341 			else if (priold == prinew) {
1342 				ucl_parser_append_elt(parser, container, tobj, nobj);
1343 			}
1344 			else if (priold > prinew) {
1345 				/*
1346 				 * We add this new object to a list of trash objects just to ensure
1347 				 * that it won't come to any real object
1348 				 * XXX: rather inefficient approach
1349 				 */
1350 				DL_APPEND(parser->trash_objs, nobj);
1351 			}
1352 			else {
1353 				ucl_hash_replace(container, tobj, nobj);
1354 				ucl_object_unref(tobj);
1355 			}
1356 			break;
1357 		}
1358 	}
1359 
1360 	parser->stack->obj->value.ov = container;
1361 	parser->cur_obj = nobj;
1362 	ucl_attach_comment(parser, nobj, false);
1363 
1364 	return true;
1365 }
1366 
1367 /**
1368  * Parse a key in an object
1369  * @param parser
1370  * @param chunk
1371  * @param next_key
1372  * @param end_of_object
1373  * @return true if a key has been parsed
1374  */
1375 static bool
ucl_parse_key(struct ucl_parser * parser,struct ucl_chunk * chunk,bool * next_key,bool * end_of_object,bool * got_content)1376 ucl_parse_key(struct ucl_parser *parser, struct ucl_chunk *chunk,
1377 			  bool *next_key, bool *end_of_object, bool *got_content)
1378 {
1379 	const unsigned char *p, *c = NULL, *end, *t;
1380 	const char *key = NULL;
1381 	bool got_quote = false, got_eq = false, got_semicolon = false,
1382 		 need_unescape = false, ucl_escape = false, var_expand = false,
1383 		 got_sep = false;
1384 	ucl_object_t *nobj;
1385 	ssize_t keylen;
1386 
1387 	p = chunk->pos;
1388 
1389 	if (*p == '.' && !(parser->flags & UCL_PARSER_DISABLE_MACRO)) {
1390 		ucl_chunk_skipc(chunk, p);
1391 		parser->prev_state = parser->state;
1392 		parser->state = UCL_STATE_MACRO_NAME;
1393 		*end_of_object = false;
1394 		return true;
1395 	}
1396 	while (p < chunk->end) {
1397 		/*
1398 		 * A key must start with alpha, number, '/' or '_' and end with space character
1399 		 */
1400 		if (c == NULL) {
1401 			if (chunk->remain >= 2 && ucl_lex_is_comment(p[0], p[1])) {
1402 				if (!ucl_skip_comments(parser)) {
1403 					return false;
1404 				}
1405 				p = chunk->pos;
1406 			}
1407 			else if (ucl_test_character(*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1408 				ucl_chunk_skipc(chunk, p);
1409 			}
1410 			else if (ucl_test_character(*p, UCL_CHARACTER_KEY_START)) {
1411 				/* The first symbol */
1412 				c = p;
1413 				ucl_chunk_skipc(chunk, p);
1414 				*got_content = true;
1415 			}
1416 			else if (*p == '"') {
1417 				/* JSON style key */
1418 				c = p + 1;
1419 				got_quote = true;
1420 				*got_content = true;
1421 				ucl_chunk_skipc(chunk, p);
1422 			}
1423 			else if (*p == '}') {
1424 				/* We have actually end of an object */
1425 				*end_of_object = true;
1426 				return true;
1427 			}
1428 			else if (*p == '.' && !(parser->flags & UCL_PARSER_DISABLE_MACRO)) {
1429 				ucl_chunk_skipc(chunk, p);
1430 				parser->prev_state = parser->state;
1431 				parser->state = UCL_STATE_MACRO_NAME;
1432 				return true;
1433 			}
1434 			else {
1435 				/* Invalid identifier */
1436 				ucl_set_err(parser, UCL_ESYNTAX, "key must begin with a letter",
1437 							&parser->err);
1438 				return false;
1439 			}
1440 		}
1441 		else {
1442 			/* Parse the body of a key */
1443 			if (!got_quote) {
1444 				if (ucl_test_character(*p, UCL_CHARACTER_KEY)) {
1445 					*got_content = true;
1446 					ucl_chunk_skipc(chunk, p);
1447 				}
1448 				else if (ucl_test_character(*p, UCL_CHARACTER_KEY_SEP)) {
1449 					end = p;
1450 					break;
1451 				}
1452 				else {
1453 					ucl_set_err(parser, UCL_ESYNTAX, "invalid character in a key",
1454 								&parser->err);
1455 					return false;
1456 				}
1457 			}
1458 			else {
1459 				/* We need to parse json like quoted string */
1460 				if (!ucl_lex_json_string(parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1461 					return false;
1462 				}
1463 				/* Always escape keys obtained via json */
1464 				end = chunk->pos - 1;
1465 				p = chunk->pos;
1466 				break;
1467 			}
1468 		}
1469 	}
1470 
1471 	if (p >= chunk->end && *got_content) {
1472 		ucl_set_err(parser, UCL_ESYNTAX, "unfinished key", &parser->err);
1473 		return false;
1474 	}
1475 	else if (!*got_content) {
1476 		return true;
1477 	}
1478 	*end_of_object = false;
1479 	/* We are now at the end of the key, need to parse the rest */
1480 	while (p < chunk->end) {
1481 		if (ucl_test_character(*p, UCL_CHARACTER_WHITESPACE)) {
1482 			ucl_chunk_skipc(chunk, p);
1483 		}
1484 		else if (*p == '=') {
1485 			if (!got_eq && !got_semicolon) {
1486 				ucl_chunk_skipc(chunk, p);
1487 				got_eq = true;
1488 			}
1489 			else {
1490 				ucl_set_err(parser, UCL_ESYNTAX, "unexpected '=' character",
1491 							&parser->err);
1492 				return false;
1493 			}
1494 		}
1495 		else if (*p == ':') {
1496 			if (!got_eq && !got_semicolon) {
1497 				ucl_chunk_skipc(chunk, p);
1498 				got_semicolon = true;
1499 			}
1500 			else {
1501 				ucl_set_err(parser, UCL_ESYNTAX, "unexpected ':' character",
1502 							&parser->err);
1503 				return false;
1504 			}
1505 		}
1506 		else if (chunk->remain >= 2 && ucl_lex_is_comment(p[0], p[1])) {
1507 			/* Check for comment */
1508 			if (!ucl_skip_comments(parser)) {
1509 				return false;
1510 			}
1511 			p = chunk->pos;
1512 		}
1513 		else {
1514 			/* Start value */
1515 			break;
1516 		}
1517 	}
1518 
1519 	if (p >= chunk->end && got_content) {
1520 		ucl_set_err(parser, UCL_ESYNTAX, "unfinished key", &parser->err);
1521 		return false;
1522 	}
1523 
1524 	got_sep = got_semicolon || got_eq;
1525 
1526 	if (!got_sep) {
1527 		/*
1528 		 * Maybe we have more keys nested, so search for termination character.
1529 		 * Possible choices:
1530 		 * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1531 		 * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1532 		 * 3) key1 value[;,\n] <- we treat that as linear object
1533 		 */
1534 		t = p;
1535 		*next_key = false;
1536 		while (ucl_test_character(*t, UCL_CHARACTER_WHITESPACE)) {
1537 			t++;
1538 		}
1539 		/* Check first non-space character after a key */
1540 		if (*t != '{' && *t != '[') {
1541 			while (t < chunk->end) {
1542 				if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1543 					break;
1544 				}
1545 				else if (*t == '{' || *t == '[') {
1546 					*next_key = true;
1547 					break;
1548 				}
1549 				t++;
1550 			}
1551 		}
1552 	}
1553 
1554 	/* Create a new object */
1555 	nobj = ucl_object_new_full(UCL_NULL, parser->chunks->priority);
1556 	if (nobj == NULL) {
1557 		return false;
1558 	}
1559 	keylen = ucl_copy_or_store_ptr(parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1560 								   &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE,
1561 								   false, false);
1562 	if (keylen == -1) {
1563 		ucl_object_unref(nobj);
1564 		return false;
1565 	}
1566 	else if (keylen == 0) {
1567 		ucl_set_err(parser, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1568 		ucl_object_unref(nobj);
1569 		return false;
1570 	}
1571 
1572 	nobj->key = key;
1573 	nobj->keylen = keylen;
1574 
1575 	if (!ucl_parser_process_object_element(parser, nobj)) {
1576 		return false;
1577 	}
1578 
1579 	if (ucl_escape) {
1580 		nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1581 	}
1582 
1583 
1584 	return true;
1585 }
1586 
1587 /**
1588  * Parse a cl string
1589  * @param parser
1590  * @param chunk
1591  * @param var_expand
1592  * @param need_unescape
1593  * @return true if a key has been parsed
1594  */
1595 static bool
ucl_parse_string_value(struct ucl_parser * parser,struct ucl_chunk * chunk,bool * var_expand,bool * need_unescape)1596 ucl_parse_string_value(struct ucl_parser *parser,
1597 					   struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1598 {
1599 	const unsigned char *p;
1600 	enum {
1601 		UCL_BRACE_ROUND = 0,
1602 		UCL_BRACE_SQUARE,
1603 		UCL_BRACE_FIGURE
1604 	};
1605 	int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1606 
1607 	p = chunk->pos;
1608 
1609 	while (p < chunk->end) {
1610 
1611 		/* Skip pairs of figure braces */
1612 		if (*p == '{') {
1613 			braces[UCL_BRACE_FIGURE][0]++;
1614 		}
1615 		else if (*p == '}') {
1616 			braces[UCL_BRACE_FIGURE][1]++;
1617 			if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1618 				/* This is not a termination symbol, continue */
1619 				ucl_chunk_skipc(chunk, p);
1620 				continue;
1621 			}
1622 		}
1623 		/* Skip pairs of square braces */
1624 		else if (*p == '[') {
1625 			braces[UCL_BRACE_SQUARE][0]++;
1626 		}
1627 		else if (*p == ']') {
1628 			braces[UCL_BRACE_SQUARE][1]++;
1629 			if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1630 				/* This is not a termination symbol, continue */
1631 				ucl_chunk_skipc(chunk, p);
1632 				continue;
1633 			}
1634 		}
1635 		else if (*p == '$') {
1636 			*var_expand = true;
1637 		}
1638 		else if (*p == '\\') {
1639 			*need_unescape = true;
1640 			ucl_chunk_skipc(chunk, p);
1641 			if (p < chunk->end) {
1642 				ucl_chunk_skipc(chunk, p);
1643 			}
1644 			continue;
1645 		}
1646 
1647 		if (ucl_lex_is_atom_end(*p) || (chunk->remain >= 2 && ucl_lex_is_comment(p[0], p[1]))) {
1648 			break;
1649 		}
1650 		ucl_chunk_skipc(chunk, p);
1651 	}
1652 
1653 	return true;
1654 }
1655 
1656 /**
1657  * Parse multiline string ending with \n{term}\n
1658  * @param parser
1659  * @param chunk
1660  * @param term
1661  * @param term_len
1662  * @param beg
1663  * @param var_expand
1664  * @return size of multiline string or 0 in case of error
1665  */
1666 static int
ucl_parse_multiline_string(struct ucl_parser * parser,struct ucl_chunk * chunk,const unsigned char * term,int term_len,unsigned char const ** beg,bool * var_expand)1667 ucl_parse_multiline_string(struct ucl_parser *parser,
1668 						   struct ucl_chunk *chunk, const unsigned char *term,
1669 						   int term_len, unsigned char const **beg,
1670 						   bool *var_expand)
1671 {
1672 	const unsigned char *p, *c, *tend;
1673 	bool newline = false;
1674 	int len = 0;
1675 
1676 	p = chunk->pos;
1677 
1678 	c = p;
1679 
1680 	while (p < chunk->end) {
1681 		if (newline) {
1682 			if (chunk->end - p < term_len) {
1683 				return 0;
1684 			}
1685 			else if (memcmp(p, term, term_len) == 0) {
1686 				tend = p + term_len;
1687 				if (tend < chunk->end && *tend != '\n' && *tend != ';' && *tend != ',') {
1688 					/* Incomplete terminator */
1689 					ucl_chunk_skipc(chunk, p);
1690 					continue;
1691 				}
1692 				len = p - c;
1693 				chunk->remain -= term_len;
1694 				chunk->pos = p + term_len;
1695 				chunk->column = term_len;
1696 				*beg = c;
1697 				break;
1698 			}
1699 		}
1700 		if (*p == '\n') {
1701 			newline = true;
1702 		}
1703 		else {
1704 			if (*p == '$') {
1705 				*var_expand = true;
1706 			}
1707 			newline = false;
1708 		}
1709 		ucl_chunk_skipc(chunk, p);
1710 	}
1711 
1712 	return len;
1713 }
1714 
1715 static inline ucl_object_t *
ucl_parser_get_container(struct ucl_parser * parser)1716 ucl_parser_get_container(struct ucl_parser *parser)
1717 {
1718 	ucl_object_t *t, *obj = NULL;
1719 
1720 	if (parser == NULL || parser->stack == NULL || parser->stack->obj == NULL) {
1721 		return NULL;
1722 	}
1723 
1724 	if (parser->stack->obj->type == UCL_ARRAY) {
1725 		/* Object must be allocated */
1726 		obj = ucl_object_new_full(UCL_NULL, parser->chunks->priority);
1727 		t = parser->stack->obj;
1728 
1729 		if (!ucl_array_append(t, obj)) {
1730 			ucl_object_unref(obj);
1731 			return NULL;
1732 		}
1733 
1734 		parser->cur_obj = obj;
1735 		ucl_attach_comment(parser, obj, false);
1736 	}
1737 	else {
1738 		/* Object has been already allocated */
1739 		obj = parser->cur_obj;
1740 	}
1741 
1742 	return obj;
1743 }
1744 
1745 /**
1746  * Handle value data
1747  * @param parser
1748  * @param chunk
1749  * @return
1750  */
1751 static bool
ucl_parse_value(struct ucl_parser * parser,struct ucl_chunk * chunk)1752 ucl_parse_value(struct ucl_parser *parser, struct ucl_chunk *chunk)
1753 {
1754 	const unsigned char *p, *c;
1755 	ucl_object_t *obj = NULL;
1756 	unsigned int stripped_spaces;
1757 	ssize_t str_len;
1758 	bool need_unescape = false, ucl_escape = false, var_expand = false;
1759 
1760 	p = chunk->pos;
1761 
1762 	/* Skip any spaces and comments */
1763 	if (ucl_test_character(*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1764 		(chunk->remain >= 2 && ucl_lex_is_comment(p[0], p[1]))) {
1765 		while (p < chunk->end && ucl_test_character(*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1766 			ucl_chunk_skipc(chunk, p);
1767 		}
1768 		if (!ucl_skip_comments(parser)) {
1769 			return false;
1770 		}
1771 		p = chunk->pos;
1772 	}
1773 
1774 	while (p < chunk->end) {
1775 		c = p;
1776 		switch (*p) {
1777 		case '"':
1778 			ucl_chunk_skipc(chunk, p);
1779 
1780 			if (!ucl_lex_json_string(parser, chunk, &need_unescape, &ucl_escape,
1781 									 &var_expand)) {
1782 				return false;
1783 			}
1784 
1785 			obj = ucl_parser_get_container(parser);
1786 			if (!obj) {
1787 				return false;
1788 			}
1789 
1790 			str_len = chunk->pos - c - 2;
1791 			obj->type = UCL_STRING;
1792 			if ((str_len = ucl_copy_or_store_ptr(parser, c + 1,
1793 												 &obj->trash_stack[UCL_TRASH_VALUE],
1794 												 &obj->value.sv, str_len, need_unescape, false,
1795 												 var_expand, false)) == -1) {
1796 				return false;
1797 			}
1798 
1799 			obj->len = str_len;
1800 			parser->state = UCL_STATE_AFTER_VALUE;
1801 
1802 			return true;
1803 			break;
1804 		case '\'':
1805 			ucl_chunk_skipc(chunk, p);
1806 
1807 			if (!ucl_lex_squoted_string(parser, chunk, &need_unescape)) {
1808 				return false;
1809 			}
1810 
1811 			obj = ucl_parser_get_container(parser);
1812 			if (!obj) {
1813 				return false;
1814 			}
1815 
1816 			str_len = chunk->pos - c - 2;
1817 			obj->type = UCL_STRING;
1818 			obj->flags |= UCL_OBJECT_SQUOTED;
1819 
1820 			if ((str_len = ucl_copy_or_store_ptr(parser, c + 1,
1821 												 &obj->trash_stack[UCL_TRASH_VALUE],
1822 												 &obj->value.sv, str_len, need_unescape, false,
1823 												 var_expand, true)) == -1) {
1824 				return false;
1825 			}
1826 
1827 			obj->len = str_len;
1828 
1829 			parser->state = UCL_STATE_AFTER_VALUE;
1830 
1831 			return true;
1832 			break;
1833 		case '{':
1834 			obj = ucl_parser_get_container(parser);
1835 			if (obj == NULL) {
1836 				parser->state = UCL_STATE_ERROR;
1837 				ucl_set_err(parser, UCL_ESYNTAX, "object value must be a part of an object",
1838 							&parser->err);
1839 				return false;
1840 			}
1841 			/* We have a new object */
1842 			if (parser->stack) {
1843 				obj = ucl_parser_add_container(obj, parser, false,
1844 											   parser->stack->e.params.level, true);
1845 			}
1846 			else {
1847 				return false;
1848 			}
1849 			if (obj == NULL) {
1850 				return false;
1851 			}
1852 
1853 			ucl_chunk_skipc(chunk, p);
1854 
1855 			return true;
1856 			break;
1857 		case '[':
1858 			obj = ucl_parser_get_container(parser);
1859 			if (obj == NULL) {
1860 				parser->state = UCL_STATE_ERROR;
1861 				ucl_set_err(parser, UCL_ESYNTAX, "array value must be a part of an object",
1862 							&parser->err);
1863 				return false;
1864 			}
1865 			/* We have a new array */
1866 			if (parser->stack) {
1867 				obj = ucl_parser_add_container(obj, parser, true,
1868 											   parser->stack->e.params.level, true);
1869 			}
1870 			else {
1871 				return false;
1872 			}
1873 
1874 			if (obj == NULL) {
1875 				return false;
1876 			}
1877 
1878 			ucl_chunk_skipc(chunk, p);
1879 
1880 			return true;
1881 			break;
1882 		case ']':
1883 			/* We have the array ending */
1884 			if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1885 				parser->state = UCL_STATE_AFTER_VALUE;
1886 				return true;
1887 			}
1888 			else {
1889 				goto parse_string;
1890 			}
1891 			break;
1892 		case '<':
1893 			obj = ucl_parser_get_container(parser);
1894 			if (obj == NULL) {
1895 				parser->state = UCL_STATE_ERROR;
1896 				ucl_set_err(parser, UCL_ESYNTAX, "multiline value must be a part of an object",
1897 							&parser->err);
1898 				return false;
1899 			}
1900 			/* We have something like multiline value, which must be <<[A-Z]+\n */
1901 			if (chunk->end - p > 3) {
1902 				if (memcmp(p, "<<", 2) == 0) {
1903 					p += 2;
1904 					/* We allow only uppercase characters in multiline definitions */
1905 					while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1906 						p++;
1907 					}
1908 					if (p == chunk->end) {
1909 						ucl_set_err(parser, UCL_ESYNTAX,
1910 									"unterminated multiline value", &parser->err);
1911 						return false;
1912 					}
1913 					if (*p == '\n') {
1914 						/* Set chunk positions and start multiline parsing */
1915 						chunk->remain -= p - c + 1;
1916 						c += 2;
1917 						chunk->pos = p + 1;
1918 						chunk->column = 0;
1919 						chunk->line++;
1920 						if ((str_len = ucl_parse_multiline_string(parser, chunk, c,
1921 																  p - c, &c, &var_expand)) == 0) {
1922 							ucl_set_err(parser, UCL_ESYNTAX,
1923 										"unterminated multiline value", &parser->err);
1924 							return false;
1925 						}
1926 
1927 						obj->type = UCL_STRING;
1928 						obj->flags |= UCL_OBJECT_MULTILINE;
1929 						if ((str_len = ucl_copy_or_store_ptr(parser, c,
1930 															 &obj->trash_stack[UCL_TRASH_VALUE],
1931 															 &obj->value.sv, str_len - 1, false,
1932 															 false, var_expand, false)) == -1) {
1933 							return false;
1934 						}
1935 						obj->len = str_len;
1936 
1937 						parser->state = UCL_STATE_AFTER_VALUE;
1938 
1939 						return true;
1940 					}
1941 				}
1942 			}
1943 			/* Fallback to ordinary strings */
1944 			/* FALLTHRU */
1945 		default:
1946 		parse_string:
1947 			if (obj == NULL) {
1948 				obj = ucl_parser_get_container(parser);
1949 			}
1950 
1951 			if (obj == NULL) {
1952 				parser->state = UCL_STATE_ERROR;
1953 				ucl_set_err(parser, UCL_ESYNTAX, "value must be a part of an object",
1954 							&parser->err);
1955 				return false;
1956 			}
1957 
1958 			/* Parse atom */
1959 			if (ucl_test_character(*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1960 				if (!ucl_lex_number(parser, chunk, obj)) {
1961 					if (parser->state == UCL_STATE_ERROR) {
1962 						return false;
1963 					}
1964 				}
1965 				else {
1966 					parser->state = UCL_STATE_AFTER_VALUE;
1967 					return true;
1968 				}
1969 				/* Fallback to normal string */
1970 			}
1971 
1972 			if (!ucl_parse_string_value(parser, chunk, &var_expand,
1973 										&need_unescape)) {
1974 				return false;
1975 			}
1976 			/* Cut trailing spaces */
1977 			stripped_spaces = 0;
1978 			while (ucl_test_character(*(chunk->pos - 1 - stripped_spaces),
1979 									  UCL_CHARACTER_WHITESPACE)) {
1980 				stripped_spaces++;
1981 			}
1982 			str_len = chunk->pos - c - stripped_spaces;
1983 			if (str_len <= 0) {
1984 				ucl_set_err(parser, UCL_ESYNTAX, "string value must not be empty",
1985 							&parser->err);
1986 				return false;
1987 			}
1988 			else if (str_len == 4 && memcmp(c, "null", 4) == 0) {
1989 				obj->len = 0;
1990 				obj->type = UCL_NULL;
1991 			}
1992 			else if (str_len == 3 && memcmp(c, "nan", 3) == 0) {
1993 				obj->len = 0;
1994 				obj->type = UCL_FLOAT;
1995 				obj->value.dv = NAN;
1996 			}
1997 			else if (str_len == 3 && memcmp(c, "inf", 3) == 0) {
1998 				obj->len = 0;
1999 				obj->type = UCL_FLOAT;
2000 				obj->value.dv = INFINITY;
2001 			}
2002 			else if (!ucl_maybe_parse_boolean(obj, c, str_len)) {
2003 				obj->type = UCL_STRING;
2004 				if ((str_len = ucl_copy_or_store_ptr(parser, c,
2005 													 &obj->trash_stack[UCL_TRASH_VALUE],
2006 													 &obj->value.sv, str_len, need_unescape,
2007 													 false, var_expand, false)) == -1) {
2008 					return false;
2009 				}
2010 				obj->len = str_len;
2011 			}
2012 
2013 			parser->state = UCL_STATE_AFTER_VALUE;
2014 
2015 			return true;
2016 			break;
2017 		}
2018 	}
2019 
2020 	return true;
2021 }
2022 
2023 /**
2024  * Handle after value data
2025  * @param parser
2026  * @param chunk
2027  * @return
2028  */
2029 static bool
ucl_parse_after_value(struct ucl_parser * parser,struct ucl_chunk * chunk)2030 ucl_parse_after_value(struct ucl_parser *parser, struct ucl_chunk *chunk)
2031 {
2032 	const unsigned char *p;
2033 	bool got_sep = false;
2034 	struct ucl_stack *st;
2035 
2036 	p = chunk->pos;
2037 
2038 	while (p < chunk->end) {
2039 		if (ucl_test_character(*p, UCL_CHARACTER_WHITESPACE)) {
2040 			/* Skip whitespaces */
2041 			ucl_chunk_skipc(chunk, p);
2042 		}
2043 		else if (chunk->remain >= 2 && ucl_lex_is_comment(p[0], p[1])) {
2044 			/* Skip comment */
2045 			if (!ucl_skip_comments(parser)) {
2046 				return false;
2047 			}
2048 			/* Treat comment as a separator */
2049 			got_sep = true;
2050 			p = chunk->pos;
2051 		}
2052 		else if (ucl_test_character(*p, UCL_CHARACTER_VALUE_END)) {
2053 			if (*p == '}' || *p == ']') {
2054 				if (parser->stack == NULL) {
2055 					ucl_set_err(parser, UCL_ESYNTAX,
2056 								"end of array or object detected without corresponding start",
2057 								&parser->err);
2058 					return false;
2059 				}
2060 				if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
2061 					(*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
2062 
2063 					/* Pop all nested objects from a stack */
2064 					st = parser->stack;
2065 
2066 					if (!(st->e.params.flags & UCL_STACK_HAS_OBRACE)) {
2067 						parser->err_code = UCL_EUNPAIRED;
2068 						ucl_create_err(&parser->err,
2069 									   "%s:%d object closed with } is not opened with { at line %d",
2070 									   chunk->fname ? chunk->fname : "memory",
2071 									   parser->chunks->line, st->e.params.line);
2072 
2073 						return false;
2074 					}
2075 
2076 					if ((st->e.params.flags & UCL_STACK_AUTOMATIC)) {
2077 						st->e.params.flags = 0;
2078 					}
2079 					else {
2080 						parser->stack = st->next;
2081 						UCL_FREE(sizeof(struct ucl_stack), st);
2082 					}
2083 
2084 					if (parser->cur_obj) {
2085 						ucl_attach_comment(parser, parser->cur_obj, true);
2086 					}
2087 
2088 					while (parser->stack != NULL) {
2089 						st = parser->stack;
2090 
2091 						if (st->next == NULL) {
2092 							break;
2093 						}
2094 						else if (st->next->e.params.level == st->e.params.level) {
2095 							break;
2096 						}
2097 
2098 
2099 						parser->stack = st->next;
2100 						parser->cur_obj = st->obj;
2101 						UCL_FREE(sizeof(struct ucl_stack), st);
2102 					}
2103 				}
2104 				else {
2105 					ucl_set_err(parser, UCL_ESYNTAX,
2106 								"unexpected terminating symbol detected",
2107 								&parser->err);
2108 					return false;
2109 				}
2110 
2111 				if (parser->stack == NULL) {
2112 					/* Ignore everything after a top object */
2113 					return true;
2114 				}
2115 				else {
2116 					ucl_chunk_skipc(chunk, p);
2117 				}
2118 				got_sep = true;
2119 			}
2120 			else {
2121 				/* Got a separator */
2122 				got_sep = true;
2123 				ucl_chunk_skipc(chunk, p);
2124 			}
2125 		}
2126 		else {
2127 			/* Anything else */
2128 			if (!got_sep) {
2129 				ucl_set_err(parser, UCL_ESYNTAX, "delimiter is missing",
2130 							&parser->err);
2131 				return false;
2132 			}
2133 			return true;
2134 		}
2135 	}
2136 
2137 	return true;
2138 }
2139 
2140 static bool
ucl_skip_macro_as_comment(struct ucl_parser * parser,struct ucl_chunk * chunk)2141 ucl_skip_macro_as_comment(struct ucl_parser *parser,
2142 						  struct ucl_chunk *chunk)
2143 {
2144 	const unsigned char *p, *c;
2145 	enum {
2146 		macro_skip_start = 0,
2147 		macro_has_symbols,
2148 		macro_has_obrace,
2149 		macro_has_quote,
2150 		macro_has_backslash,
2151 		macro_has_sqbrace,
2152 		macro_save
2153 	} state = macro_skip_start,
2154 	  prev_state = macro_skip_start;
2155 
2156 	p = chunk->pos;
2157 	c = chunk->pos;
2158 
2159 	while (p < chunk->end) {
2160 		switch (state) {
2161 		case macro_skip_start:
2162 			if (!ucl_test_character(*p, UCL_CHARACTER_WHITESPACE)) {
2163 				state = macro_has_symbols;
2164 			}
2165 			else if (ucl_test_character(*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
2166 				state = macro_save;
2167 				continue;
2168 			}
2169 
2170 			ucl_chunk_skipc(chunk, p);
2171 			break;
2172 
2173 		case macro_has_symbols:
2174 			if (*p == '{') {
2175 				state = macro_has_sqbrace;
2176 			}
2177 			else if (*p == '(') {
2178 				state = macro_has_obrace;
2179 			}
2180 			else if (*p == '"') {
2181 				state = macro_has_quote;
2182 			}
2183 			else if (*p == '\n') {
2184 				state = macro_save;
2185 				continue;
2186 			}
2187 
2188 			ucl_chunk_skipc(chunk, p);
2189 			break;
2190 
2191 		case macro_has_obrace:
2192 			if (*p == '\\') {
2193 				prev_state = state;
2194 				state = macro_has_backslash;
2195 			}
2196 			else if (*p == ')') {
2197 				state = macro_has_symbols;
2198 			}
2199 
2200 			ucl_chunk_skipc(chunk, p);
2201 			break;
2202 
2203 		case macro_has_sqbrace:
2204 			if (*p == '\\') {
2205 				prev_state = state;
2206 				state = macro_has_backslash;
2207 			}
2208 			else if (*p == '}') {
2209 				state = macro_save;
2210 			}
2211 
2212 			ucl_chunk_skipc(chunk, p);
2213 			break;
2214 
2215 		case macro_has_quote:
2216 			if (*p == '\\') {
2217 				prev_state = state;
2218 				state = macro_has_backslash;
2219 			}
2220 			else if (*p == '"') {
2221 				state = macro_save;
2222 			}
2223 
2224 			ucl_chunk_skipc(chunk, p);
2225 			break;
2226 
2227 		case macro_has_backslash:
2228 			state = prev_state;
2229 			ucl_chunk_skipc(chunk, p);
2230 			break;
2231 
2232 		case macro_save:
2233 			if (parser->flags & UCL_PARSER_SAVE_COMMENTS) {
2234 				ucl_save_comment(parser, c, p - c);
2235 			}
2236 
2237 			return true;
2238 		}
2239 	}
2240 
2241 	return false;
2242 }
2243 
2244 /**
2245  * Handle macro data
2246  * @param parser
2247  * @param chunk
2248  * @param marco
2249  * @param macro_start
2250  * @param macro_len
2251  * @return
2252  */
2253 static bool
ucl_parse_macro_value(struct ucl_parser * parser,struct ucl_chunk * chunk,struct ucl_macro * macro,unsigned char const ** macro_start,size_t * macro_len)2254 ucl_parse_macro_value(struct ucl_parser *parser,
2255 					  struct ucl_chunk *chunk, struct ucl_macro *macro,
2256 					  unsigned char const **macro_start, size_t *macro_len)
2257 {
2258 	const unsigned char *p, *c;
2259 	bool need_unescape = false, ucl_escape = false, var_expand = false;
2260 
2261 	p = chunk->pos;
2262 
2263 	switch (*p) {
2264 	case '"':
2265 		/* We have macro value encoded in quotes */
2266 		c = p;
2267 		ucl_chunk_skipc(chunk, p);
2268 		if (!ucl_lex_json_string(parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
2269 			return false;
2270 		}
2271 
2272 		*macro_start = c + 1;
2273 		*macro_len = chunk->pos - c - 2;
2274 		p = chunk->pos;
2275 		break;
2276 	case '{':
2277 		/* We got a multiline macro body */
2278 		ucl_chunk_skipc(chunk, p);
2279 		/* Skip spaces at the beginning */
2280 		while (p < chunk->end) {
2281 			if (ucl_test_character(*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
2282 				ucl_chunk_skipc(chunk, p);
2283 			}
2284 			else {
2285 				break;
2286 			}
2287 		}
2288 		c = p;
2289 		while (p < chunk->end) {
2290 			if (*p == '}') {
2291 				break;
2292 			}
2293 			ucl_chunk_skipc(chunk, p);
2294 		}
2295 		*macro_start = c;
2296 		*macro_len = p - c;
2297 		ucl_chunk_skipc(chunk, p);
2298 		break;
2299 	default:
2300 		/* Macro is not enclosed in quotes or braces */
2301 		c = p;
2302 		while (p < chunk->end) {
2303 			if (ucl_lex_is_atom_end(*p)) {
2304 				break;
2305 			}
2306 			ucl_chunk_skipc(chunk, p);
2307 		}
2308 		*macro_start = c;
2309 		*macro_len = p - c;
2310 		break;
2311 	}
2312 
2313 	/* We are at the end of a macro */
2314 	/* Skip ';' and space characters and return to previous state */
2315 	while (p < chunk->end) {
2316 		if (!ucl_test_character(*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
2317 			break;
2318 		}
2319 		ucl_chunk_skipc(chunk, p);
2320 	}
2321 	return true;
2322 }
2323 
2324 /**
2325  * Parse macro arguments as UCL object
2326  * @param parser parser structure
2327  * @param chunk the current data chunk
2328  * @return
2329  */
2330 static ucl_object_t *
ucl_parse_macro_arguments(struct ucl_parser * parser,struct ucl_chunk * chunk)2331 ucl_parse_macro_arguments(struct ucl_parser *parser,
2332 						  struct ucl_chunk *chunk)
2333 {
2334 	ucl_object_t *res = NULL;
2335 	struct ucl_parser *params_parser;
2336 	int obraces = 1, ebraces = 0, state = 0;
2337 	const unsigned char *p, *c;
2338 	size_t args_len = 0;
2339 	struct ucl_parser_saved_state saved;
2340 
2341 	saved.column = chunk->column;
2342 	saved.line = chunk->line;
2343 	saved.pos = chunk->pos;
2344 	saved.remain = chunk->remain;
2345 	p = chunk->pos;
2346 
2347 	if (*p != '(' || chunk->remain < 2) {
2348 		return NULL;
2349 	}
2350 
2351 	/* Set begin and start */
2352 	ucl_chunk_skipc(chunk, p);
2353 	c = p;
2354 
2355 	while ((p) < (chunk)->end) {
2356 		switch (state) {
2357 		case 0:
2358 			/* Parse symbols and check for '(', ')' and '"' */
2359 			if (*p == '(') {
2360 				obraces++;
2361 			}
2362 			else if (*p == ')') {
2363 				ebraces++;
2364 			}
2365 			else if (*p == '"') {
2366 				state = 1;
2367 			}
2368 			/* Check pairing */
2369 			if (obraces == ebraces) {
2370 				state = 99;
2371 			}
2372 			else {
2373 				args_len++;
2374 			}
2375 			/* Check overflow */
2376 			if (chunk->remain == 0) {
2377 				goto restore_chunk;
2378 			}
2379 			ucl_chunk_skipc(chunk, p);
2380 			break;
2381 		case 1:
2382 			/* We have quote character, so skip all but quotes */
2383 			if (*p == '"' && *(p - 1) != '\\') {
2384 				state = 0;
2385 			}
2386 			if (chunk->remain == 0) {
2387 				goto restore_chunk;
2388 			}
2389 			args_len++;
2390 			ucl_chunk_skipc(chunk, p);
2391 			break;
2392 		case 99:
2393 			/*
2394 			 * We have read the full body of arguments, so we need to parse and set
2395 			 * object from that
2396 			 */
2397 			params_parser = ucl_parser_new(parser->flags);
2398 			if (!ucl_parser_add_chunk(params_parser, c, args_len)) {
2399 				ucl_set_err(parser, UCL_ESYNTAX, "macro arguments parsing error",
2400 							&parser->err);
2401 			}
2402 			else {
2403 				res = ucl_parser_get_object(params_parser);
2404 			}
2405 			ucl_parser_free(params_parser);
2406 
2407 			return res;
2408 
2409 			break;
2410 		}
2411 	}
2412 
2413 	return res;
2414 
2415 restore_chunk:
2416 	chunk->column = saved.column;
2417 	chunk->line = saved.line;
2418 	chunk->pos = saved.pos;
2419 	chunk->remain = saved.remain;
2420 
2421 	return NULL;
2422 }
2423 
2424 #define SKIP_SPACES_COMMENTS(parser, chunk, p)                                    \
2425 	do {                                                                          \
2426 		while ((p) < (chunk)->end) {                                              \
2427 			if (!ucl_test_character(*(p), UCL_CHARACTER_WHITESPACE_UNSAFE)) {     \
2428 				if ((chunk)->remain >= 2 && ucl_lex_is_comment((p)[0], (p)[1])) { \
2429 					if (!ucl_skip_comments(parser)) {                             \
2430 						return false;                                             \
2431 					}                                                             \
2432 					p = (chunk)->pos;                                             \
2433 				}                                                                 \
2434 				break;                                                            \
2435 			}                                                                     \
2436 			ucl_chunk_skipc(chunk, p);                                            \
2437 		}                                                                         \
2438 	} while (0)
2439 
2440 /**
2441  * Handle the main states of rcl parser
2442  * @param parser parser structure
2443  * @return true if chunk has been parsed and false in case of error
2444  */
2445 static bool
ucl_state_machine(struct ucl_parser * parser)2446 ucl_state_machine(struct ucl_parser *parser)
2447 {
2448 	ucl_object_t *obj, *macro_args;
2449 	struct ucl_chunk *chunk = parser->chunks;
2450 	const unsigned char *p, *c = NULL, *macro_start = NULL;
2451 	unsigned char *macro_escaped;
2452 	size_t macro_len = 0;
2453 	struct ucl_macro *macro = NULL;
2454 	bool next_key = false, end_of_object = false, got_content = false, ret;
2455 
2456 	if (parser->top_obj == NULL) {
2457 		parser->state = UCL_STATE_INIT;
2458 	}
2459 
2460 	p = chunk->pos;
2461 	while (chunk->pos < chunk->end) {
2462 		switch (parser->state) {
2463 		case UCL_STATE_INIT:
2464 			/*
2465 			 * At the init state we can either go to the parse array or object
2466 			 * if we got [ or { correspondingly or can just treat new data as
2467 			 * a key of newly created object
2468 			 */
2469 			if (!ucl_skip_comments(parser)) {
2470 				parser->prev_state = parser->state;
2471 				parser->state = UCL_STATE_ERROR;
2472 				return false;
2473 			}
2474 			else {
2475 				bool seen_obrace = false;
2476 
2477 				/* Skip any spaces */
2478 				while (p < chunk->end && ucl_test_character(*p,
2479 															UCL_CHARACTER_WHITESPACE_UNSAFE)) {
2480 					ucl_chunk_skipc(chunk, p);
2481 				}
2482 
2483 				p = chunk->pos;
2484 
2485 				if (p < chunk->end) {
2486 					if (*p == '[') {
2487 						parser->state = UCL_STATE_VALUE;
2488 						ucl_chunk_skipc(chunk, p);
2489 						seen_obrace = true;
2490 					}
2491 					else {
2492 
2493 						if (*p == '{') {
2494 							ucl_chunk_skipc(chunk, p);
2495 							parser->state = UCL_STATE_KEY_OBRACE;
2496 							seen_obrace = true;
2497 						}
2498 						else {
2499 							parser->state = UCL_STATE_KEY;
2500 						}
2501 					}
2502 				}
2503 
2504 				if (parser->top_obj == NULL) {
2505 					if (parser->state == UCL_STATE_VALUE) {
2506 						obj = ucl_parser_add_container(NULL, parser, true, 0,
2507 													   seen_obrace);
2508 					}
2509 					else {
2510 						obj = ucl_parser_add_container(NULL, parser, false, 0,
2511 													   seen_obrace);
2512 					}
2513 
2514 					if (obj == NULL) {
2515 						return false;
2516 					}
2517 
2518 					parser->top_obj = obj;
2519 					parser->cur_obj = obj;
2520 				}
2521 			}
2522 			break;
2523 		case UCL_STATE_KEY_OBRACE:
2524 			parser->stack->e.params.flags |= UCL_STACK_HAS_OBRACE | UCL_STACK_AUTOMATIC;
2525 			/* FALLTHROUGHT */
2526 		case UCL_STATE_KEY:
2527 			/* Skip any spaces */
2528 			while (p < chunk->end && ucl_test_character(*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
2529 				ucl_chunk_skipc(chunk, p);
2530 			}
2531 			if (p == chunk->end || *p == '}') {
2532 				/* We have the end of an object */
2533 				parser->state = UCL_STATE_AFTER_VALUE;
2534 				continue;
2535 			}
2536 			if (parser->stack == NULL) {
2537 				/* No objects are on stack, but we want to parse a key */
2538 				ucl_set_err(parser, UCL_ESYNTAX, "top object is finished but the parser "
2539 												 "expects a key",
2540 							&parser->err);
2541 				parser->prev_state = parser->state;
2542 				parser->state = UCL_STATE_ERROR;
2543 				return false;
2544 			}
2545 
2546 			got_content = false;
2547 
2548 			if (!ucl_parse_key(parser, chunk, &next_key, &end_of_object, &got_content)) {
2549 				parser->prev_state = parser->state;
2550 				parser->state = UCL_STATE_ERROR;
2551 				return false;
2552 			}
2553 
2554 			if (end_of_object) {
2555 				p = chunk->pos;
2556 				parser->state = UCL_STATE_AFTER_VALUE;
2557 				continue;
2558 			}
2559 			else if (parser->state != UCL_STATE_MACRO_NAME) {
2560 				if (next_key && parser->stack->obj->type == UCL_OBJECT) {
2561 					/* Parse more keys and nest objects accordingly */
2562 					obj = ucl_parser_add_container(parser->cur_obj,
2563 												   parser,
2564 												   false,
2565 												   parser->stack->e.params.level + 1,
2566 												   parser->state == UCL_STATE_KEY_OBRACE);
2567 					if (obj == NULL) {
2568 						return false;
2569 					}
2570 				}
2571 				else if (got_content) {
2572 					/* Do not switch state if we have not read any content */
2573 					parser->state = UCL_STATE_VALUE;
2574 				}
2575 			}
2576 			else {
2577 				c = chunk->pos;
2578 			}
2579 			p = chunk->pos;
2580 			break;
2581 		case UCL_STATE_VALUE:
2582 			/* We need to check what we do have */
2583 			if (!parser->cur_obj || !ucl_parse_value(parser, chunk)) {
2584 				parser->prev_state = parser->state;
2585 				parser->state = UCL_STATE_ERROR;
2586 				return false;
2587 			}
2588 			/* State is set in ucl_parse_value call */
2589 			p = chunk->pos;
2590 			break;
2591 		case UCL_STATE_AFTER_VALUE:
2592 			if (!ucl_parse_after_value(parser, chunk)) {
2593 				parser->prev_state = parser->state;
2594 				parser->state = UCL_STATE_ERROR;
2595 				return false;
2596 			}
2597 
2598 			if (parser->stack != NULL) {
2599 				if (parser->stack->obj->type == UCL_OBJECT) {
2600 					parser->state = UCL_STATE_KEY;
2601 				}
2602 				else {
2603 					/* Array */
2604 					parser->state = UCL_STATE_VALUE;
2605 				}
2606 			}
2607 			else {
2608 				/* Skip everything at the end */
2609 				return true;
2610 			}
2611 
2612 			p = chunk->pos;
2613 			break;
2614 		case UCL_STATE_MACRO_NAME:
2615 			if (parser->flags & UCL_PARSER_DISABLE_MACRO) {
2616 				if (!ucl_skip_macro_as_comment(parser, chunk)) {
2617 					/* We have invalid macro */
2618 					ucl_create_err(&parser->err,
2619 								   "error at %s:%d at column %d: invalid macro",
2620 								   chunk->fname ? chunk->fname : "memory",
2621 								   chunk->line,
2622 								   chunk->column);
2623 					parser->state = UCL_STATE_ERROR;
2624 					return false;
2625 				}
2626 				else {
2627 					p = chunk->pos;
2628 					parser->state = parser->prev_state;
2629 				}
2630 			}
2631 			else {
2632 				if (!ucl_test_character(*p, UCL_CHARACTER_WHITESPACE_UNSAFE) &&
2633 					*p != '(') {
2634 					ucl_chunk_skipc(chunk, p);
2635 				}
2636 				else {
2637 					if (c != NULL && p - c > 0) {
2638 						/* We got macro name */
2639 						macro_len = (size_t) (p - c);
2640 						HASH_FIND(hh, parser->macroes, c, macro_len, macro);
2641 						if (macro == NULL) {
2642 							ucl_create_err(&parser->err,
2643 										   "error at %s:%d at column %d: "
2644 										   "unknown macro: '%.*s', character: '%c'",
2645 										   chunk->fname ? chunk->fname : "memory",
2646 										   chunk->line,
2647 										   chunk->column,
2648 										   (int) (p - c),
2649 										   c,
2650 										   *chunk->pos);
2651 							parser->state = UCL_STATE_ERROR;
2652 							return false;
2653 						}
2654 						/* Now we need to skip all spaces */
2655 						SKIP_SPACES_COMMENTS(parser, chunk, p);
2656 						parser->state = UCL_STATE_MACRO;
2657 					}
2658 					else {
2659 						/* We have invalid macro name */
2660 						ucl_create_err(&parser->err,
2661 									   "error at %s:%d at column %d: invalid macro name",
2662 									   chunk->fname ? chunk->fname : "memory",
2663 									   chunk->line,
2664 									   chunk->column);
2665 						parser->state = UCL_STATE_ERROR;
2666 						return false;
2667 					}
2668 				}
2669 			}
2670 			break;
2671 		case UCL_STATE_MACRO:
2672 			if (*chunk->pos == '(') {
2673 				macro_args = ucl_parse_macro_arguments(parser, chunk);
2674 				p = chunk->pos;
2675 				if (macro_args) {
2676 					SKIP_SPACES_COMMENTS(parser, chunk, p);
2677 				}
2678 			}
2679 			else {
2680 				macro_args = NULL;
2681 			}
2682 			if (!ucl_parse_macro_value(parser, chunk, macro,
2683 									   &macro_start, &macro_len)) {
2684 				parser->prev_state = parser->state;
2685 				parser->state = UCL_STATE_ERROR;
2686 				return false;
2687 			}
2688 			macro_len = ucl_expand_variable(parser, &macro_escaped,
2689 											macro_start, macro_len);
2690 			parser->state = parser->prev_state;
2691 
2692 			if (macro_escaped == NULL && macro != NULL) {
2693 				if (macro->is_context) {
2694 					ret = macro->h.context_handler(macro_start, macro_len,
2695 												   macro_args,
2696 												   parser->top_obj,
2697 												   macro->ud);
2698 				}
2699 				else {
2700 					ret = macro->h.handler(macro_start, macro_len, macro_args,
2701 										   macro->ud);
2702 				}
2703 			}
2704 			else if (macro != NULL) {
2705 				if (macro->is_context) {
2706 					ret = macro->h.context_handler(macro_escaped, macro_len,
2707 												   macro_args,
2708 												   parser->top_obj,
2709 												   macro->ud);
2710 				}
2711 				else {
2712 					ret = macro->h.handler(macro_escaped, macro_len, macro_args,
2713 										   macro->ud);
2714 				}
2715 
2716 				UCL_FREE(macro_len + 1, macro_escaped);
2717 			}
2718 			else {
2719 				ret = false;
2720 				ucl_set_err(parser, UCL_EINTERNAL,
2721 							"internal error: parser has macro undefined", &parser->err);
2722 			}
2723 
2724 			/*
2725 			 * Chunk can be modified within macro handler
2726 			 */
2727 			chunk = parser->chunks;
2728 			p = chunk->pos;
2729 
2730 			if (macro_args) {
2731 				ucl_object_unref(macro_args);
2732 			}
2733 
2734 			if (!ret) {
2735 				return false;
2736 			}
2737 			break;
2738 		case UCL_STATE_ERROR:
2739 			/* Already in the error state */
2740 			return false;
2741 		default:
2742 			ucl_set_err(parser, UCL_EINTERNAL,
2743 						"internal error: parser is in an unknown state", &parser->err);
2744 			parser->state = UCL_STATE_ERROR;
2745 			return false;
2746 		}
2747 	}
2748 
2749 	if (parser->last_comment) {
2750 		if (parser->cur_obj) {
2751 			ucl_attach_comment(parser, parser->cur_obj, true);
2752 		}
2753 		else if (parser->stack && parser->stack->obj) {
2754 			ucl_attach_comment(parser, parser->stack->obj, true);
2755 		}
2756 		else if (parser->top_obj) {
2757 			ucl_attach_comment(parser, parser->top_obj, true);
2758 		}
2759 		else {
2760 			ucl_object_unref(parser->last_comment);
2761 		}
2762 	}
2763 
2764 	if (parser->stack != NULL && parser->state != UCL_STATE_ERROR) {
2765 		struct ucl_stack *st;
2766 		bool has_error = false;
2767 
2768 		LL_FOREACH(parser->stack, st)
2769 		{
2770 			if (st->chunk != parser->chunks) {
2771 				break; /* Not our chunk, give up */
2772 			}
2773 			if (st->e.params.flags & UCL_STACK_HAS_OBRACE) {
2774 				if (parser->err == NULL) {
2775 					utstring_new(parser->err);
2776 				}
2777 
2778 				utstring_printf(parser->err, "%s:%d unmatched open brace at %d; ",
2779 								chunk->fname ? chunk->fname : "memory",
2780 								parser->chunks->line,
2781 								st->e.params.line);
2782 
2783 				has_error = true;
2784 			}
2785 		}
2786 
2787 		if (has_error) {
2788 			parser->err_code = UCL_EUNPAIRED;
2789 
2790 			return false;
2791 		}
2792 	}
2793 
2794 	return true;
2795 }
2796 
2797 #define UPRM_SAFE(fn, a, b, c, el) \
2798 	do {                           \
2799 		if (!fn(a, b, c, a))       \
2800 			goto el;               \
2801 	} while (0)
2802 
2803 struct ucl_parser *
ucl_parser_new(int flags)2804 ucl_parser_new(int flags)
2805 {
2806 	struct ucl_parser *parser;
2807 
2808 	parser = UCL_ALLOC(sizeof(struct ucl_parser));
2809 	if (parser == NULL) {
2810 		return NULL;
2811 	}
2812 
2813 	memset(parser, 0, sizeof(struct ucl_parser));
2814 
2815 	UPRM_SAFE(ucl_parser_register_macro, parser, "include", ucl_include_handler, e0);
2816 	UPRM_SAFE(ucl_parser_register_macro, parser, "try_include", ucl_try_include_handler, e0);
2817 	UPRM_SAFE(ucl_parser_register_macro, parser, "includes", ucl_includes_handler, e0);
2818 	UPRM_SAFE(ucl_parser_register_macro, parser, "priority", ucl_priority_handler, e0);
2819 	UPRM_SAFE(ucl_parser_register_macro, parser, "load", ucl_load_handler, e0);
2820 	UPRM_SAFE(ucl_parser_register_context_macro, parser, "inherit", ucl_inherit_handler, e0);
2821 
2822 	parser->flags = flags;
2823 	parser->includepaths = NULL;
2824 
2825 	if (flags & UCL_PARSER_SAVE_COMMENTS) {
2826 		parser->comments = ucl_object_typed_new(UCL_OBJECT);
2827 	}
2828 
2829 	if (!(flags & UCL_PARSER_NO_FILEVARS)) {
2830 		/* Initial assumption about filevars */
2831 		ucl_parser_set_filevars(parser, NULL, false);
2832 	}
2833 
2834 	return parser;
2835 e0:
2836 	ucl_parser_free(parser);
2837 	return NULL;
2838 }
2839 
ucl_parser_set_default_priority(struct ucl_parser * parser,unsigned prio)2840 bool ucl_parser_set_default_priority(struct ucl_parser *parser, unsigned prio)
2841 {
2842 	if (parser == NULL) {
2843 		return false;
2844 	}
2845 
2846 	parser->default_priority = prio;
2847 
2848 	return true;
2849 }
2850 
ucl_parser_get_default_priority(struct ucl_parser * parser)2851 int ucl_parser_get_default_priority(struct ucl_parser *parser)
2852 {
2853 	if (parser == NULL) {
2854 		return -1;
2855 	}
2856 
2857 	return parser->default_priority;
2858 }
2859 
ucl_parser_register_macro(struct ucl_parser * parser,const char * macro,ucl_macro_handler handler,void * ud)2860 bool ucl_parser_register_macro(struct ucl_parser *parser, const char *macro,
2861 							   ucl_macro_handler handler, void *ud)
2862 {
2863 	struct ucl_macro *new;
2864 
2865 	if (macro == NULL || handler == NULL) {
2866 		return false;
2867 	}
2868 
2869 	new = UCL_ALLOC(sizeof(struct ucl_macro));
2870 	if (new == NULL) {
2871 		return false;
2872 	}
2873 
2874 	memset(new, 0, sizeof(struct ucl_macro));
2875 	new->h.handler = handler;
2876 	new->name = UCL_STRDUP(macro);
2877 	if (new->name == NULL) {
2878 		UCL_FREE(sizeof(struct ucl_macro), new);
2879 		return false;
2880 	}
2881 	new->ud = ud;
2882 	HASH_ADD_KEYPTR(hh, parser->macroes, new->name, strlen(new->name), new);
2883 	return true;
2884 }
2885 
ucl_parser_register_context_macro(struct ucl_parser * parser,const char * macro,ucl_context_macro_handler handler,void * ud)2886 bool ucl_parser_register_context_macro(struct ucl_parser *parser, const char *macro,
2887 									   ucl_context_macro_handler handler, void *ud)
2888 {
2889 	struct ucl_macro *new;
2890 
2891 	if (macro == NULL || handler == NULL) {
2892 		return false;
2893 	}
2894 
2895 	new = UCL_ALLOC(sizeof(struct ucl_macro));
2896 	if (new == NULL) {
2897 		return false;
2898 	}
2899 
2900 	memset(new, 0, sizeof(struct ucl_macro));
2901 	new->h.context_handler = handler;
2902 	new->name = UCL_STRDUP(macro);
2903 	if (new->name == NULL) {
2904 		UCL_FREE(sizeof(struct ucl_macro), new);
2905 		return false;
2906 	}
2907 	new->ud = ud;
2908 	new->is_context = true;
2909 	HASH_ADD_KEYPTR(hh, parser->macroes, new->name, strlen(new->name), new);
2910 	return true;
2911 }
2912 
ucl_parser_register_variable(struct ucl_parser * parser,const char * var,const char * value)2913 void ucl_parser_register_variable(struct ucl_parser *parser, const char *var,
2914 								  const char *value)
2915 {
2916 	struct ucl_variable *new = NULL, *cur;
2917 
2918 	if (var == NULL) {
2919 		return;
2920 	}
2921 
2922 	/* Find whether a variable already exists */
2923 	LL_FOREACH(parser->variables, cur)
2924 	{
2925 		if (strcmp(cur->var, var) == 0) {
2926 			new = cur;
2927 			break;
2928 		}
2929 	}
2930 
2931 	if (value == NULL) {
2932 
2933 		if (new != NULL) {
2934 			/* Remove variable */
2935 			DL_DELETE(parser->variables, new);
2936 			UCL_FREE(new->var_len + 1, new->var);
2937 			UCL_FREE(new->value_len + 1, new->value);
2938 			UCL_FREE(sizeof(struct ucl_variable), new);
2939 		}
2940 		else {
2941 			/* Do nothing */
2942 			return;
2943 		}
2944 	}
2945 	else {
2946 		if (new == NULL) {
2947 			new = UCL_ALLOC(sizeof(struct ucl_variable));
2948 			if (new == NULL) {
2949 				return;
2950 			}
2951 			memset(new, 0, sizeof(struct ucl_variable));
2952 			new->var = UCL_STRDUP(var);
2953 			new->var_len = strlen(var);
2954 			new->value = UCL_STRDUP(value);
2955 			new->value_len = strlen(value);
2956 
2957 			DL_APPEND(parser->variables, new);
2958 		}
2959 		else {
2960 			UCL_FREE(new->value_len + 1, new->value);
2961 			new->value = UCL_STRDUP(value);
2962 			new->value_len = strlen(value);
2963 		}
2964 	}
2965 }
2966 
ucl_parser_set_variables_handler(struct ucl_parser * parser,ucl_variable_handler handler,void * ud)2967 void ucl_parser_set_variables_handler(struct ucl_parser *parser,
2968 									  ucl_variable_handler handler, void *ud)
2969 {
2970 	parser->var_handler = handler;
2971 	parser->var_data = ud;
2972 }
2973 
ucl_parser_add_chunk_full(struct ucl_parser * parser,const unsigned char * data,size_t len,unsigned priority,enum ucl_duplicate_strategy strat,enum ucl_parse_type parse_type)2974 bool ucl_parser_add_chunk_full(struct ucl_parser *parser, const unsigned char *data,
2975 							   size_t len, unsigned priority, enum ucl_duplicate_strategy strat,
2976 							   enum ucl_parse_type parse_type)
2977 {
2978 	struct ucl_chunk *chunk;
2979 	struct ucl_parser_special_handler *special_handler;
2980 
2981 	if (parser == NULL) {
2982 		return false;
2983 	}
2984 
2985 	if (data == NULL && len != 0) {
2986 		ucl_create_err(&parser->err, "invalid chunk added");
2987 		return false;
2988 	}
2989 
2990 	if (parser->state != UCL_STATE_ERROR) {
2991 		chunk = UCL_ALLOC(sizeof(struct ucl_chunk));
2992 		if (chunk == NULL) {
2993 			ucl_create_err(&parser->err, "cannot allocate chunk structure");
2994 			return false;
2995 		}
2996 
2997 		memset(chunk, 0, sizeof(*chunk));
2998 
2999 		/* Apply all matching handlers from the first to the last */
3000 		LL_FOREACH(parser->special_handlers, special_handler)
3001 		{
3002 			if ((special_handler->flags & UCL_SPECIAL_HANDLER_PREPROCESS_ALL) ||
3003 				(len >= special_handler->magic_len &&
3004 				 memcmp(data, special_handler->magic, special_handler->magic_len) == 0)) {
3005 				unsigned char *ndata = NULL;
3006 				size_t nlen = 0;
3007 
3008 				if (!special_handler->handler(parser, data, len, &ndata, &nlen,
3009 											  special_handler->user_data)) {
3010 					UCL_FREE(sizeof(struct ucl_chunk), chunk);
3011 					ucl_create_err(&parser->err, "call for external handler failed");
3012 
3013 					return false;
3014 				}
3015 
3016 				struct ucl_parser_special_handler_chain *nchain;
3017 				nchain = UCL_ALLOC(sizeof(*nchain));
3018 				nchain->begin = ndata;
3019 				nchain->len = nlen;
3020 				nchain->special_handler = special_handler;
3021 
3022 				/* Free order is reversed */
3023 				LL_PREPEND(chunk->special_handlers, nchain);
3024 
3025 				data = ndata;
3026 				len = nlen;
3027 			}
3028 		}
3029 
3030 		if (parse_type == UCL_PARSE_AUTO && len > 0) {
3031 			/* We need to detect parse type by the first symbol */
3032 			if ((*data & 0x80) == 0x80) {
3033 				parse_type = UCL_PARSE_MSGPACK;
3034 			}
3035 			else if (*data == '(') {
3036 				parse_type = UCL_PARSE_CSEXP;
3037 			}
3038 			else {
3039 				parse_type = UCL_PARSE_UCL;
3040 			}
3041 		}
3042 
3043 		chunk->begin = data;
3044 		chunk->remain = len;
3045 		chunk->pos = chunk->begin;
3046 		chunk->end = chunk->begin + len;
3047 		chunk->line = 1;
3048 		chunk->column = 0;
3049 		chunk->priority = priority;
3050 		chunk->strategy = strat;
3051 		chunk->parse_type = parse_type;
3052 
3053 		if (parser->cur_file) {
3054 			chunk->fname = strdup(parser->cur_file);
3055 		}
3056 
3057 		LL_PREPEND(parser->chunks, chunk);
3058 		parser->recursion++;
3059 
3060 		if (parser->recursion > UCL_MAX_RECURSION) {
3061 			ucl_create_err(&parser->err, "maximum include nesting limit is reached: %d",
3062 						   parser->recursion);
3063 			return false;
3064 		}
3065 
3066 		if (len > 0) {
3067 			/* Need to parse something */
3068 			switch (parse_type) {
3069 			default:
3070 			case UCL_PARSE_UCL:
3071 				return ucl_state_machine(parser);
3072 			case UCL_PARSE_MSGPACK:
3073 				return ucl_parse_msgpack(parser);
3074 			case UCL_PARSE_CSEXP:
3075 				return ucl_parse_csexp(parser);
3076 			}
3077 		}
3078 		else {
3079 			/* Just add empty chunk and go forward */
3080 			if (parser->top_obj == NULL) {
3081 				/*
3082 				 * In case of empty object, create one to indicate that we've
3083 				 * read something
3084 				 */
3085 				parser->top_obj = ucl_object_new_full(UCL_OBJECT, priority);
3086 			}
3087 
3088 			return true;
3089 		}
3090 	}
3091 
3092 	ucl_create_err(&parser->err, "a parser is in an invalid state");
3093 
3094 	return false;
3095 }
3096 
ucl_parser_add_chunk_priority(struct ucl_parser * parser,const unsigned char * data,size_t len,unsigned priority)3097 bool ucl_parser_add_chunk_priority(struct ucl_parser *parser,
3098 								   const unsigned char *data, size_t len, unsigned priority)
3099 {
3100 	/* We dereference parser, so this check is essential */
3101 	if (parser == NULL) {
3102 		return false;
3103 	}
3104 
3105 	return ucl_parser_add_chunk_full(parser, data, len,
3106 									 priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL);
3107 }
3108 
ucl_parser_add_chunk(struct ucl_parser * parser,const unsigned char * data,size_t len)3109 bool ucl_parser_add_chunk(struct ucl_parser *parser, const unsigned char *data,
3110 						  size_t len)
3111 {
3112 	if (parser == NULL) {
3113 		return false;
3114 	}
3115 
3116 	return ucl_parser_add_chunk_full(parser, data, len,
3117 									 parser->default_priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL);
3118 }
3119 
ucl_parser_insert_chunk(struct ucl_parser * parser,const unsigned char * data,size_t len)3120 bool ucl_parser_insert_chunk(struct ucl_parser *parser, const unsigned char *data,
3121 							 size_t len)
3122 {
3123 	if (parser == NULL || parser->top_obj == NULL) {
3124 		return false;
3125 	}
3126 
3127 	bool res;
3128 	struct ucl_chunk *chunk;
3129 
3130 	int state = parser->state;
3131 	parser->state = UCL_STATE_INIT;
3132 
3133 	/* Prevent inserted chunks from unintentionally closing the current object */
3134 	if (parser->stack != NULL && parser->stack->next != NULL) {
3135 		parser->stack->e.params.level = parser->stack->next->e.params.level;
3136 	}
3137 
3138 	res = ucl_parser_add_chunk_full(parser, data, len, parser->chunks->priority,
3139 									parser->chunks->strategy, parser->chunks->parse_type);
3140 
3141 	/* Remove chunk from the stack */
3142 	chunk = parser->chunks;
3143 	if (chunk != NULL) {
3144 		parser->chunks = chunk->next;
3145 		ucl_chunk_free(chunk);
3146 		parser->recursion--;
3147 	}
3148 
3149 	parser->state = state;
3150 
3151 	return res;
3152 }
3153 
ucl_parser_add_string_priority(struct ucl_parser * parser,const char * data,size_t len,unsigned priority)3154 bool ucl_parser_add_string_priority(struct ucl_parser *parser, const char *data,
3155 									size_t len, unsigned priority)
3156 {
3157 	if (data == NULL) {
3158 		ucl_create_err(&parser->err, "invalid string added");
3159 		return false;
3160 	}
3161 	if (len == 0) {
3162 		len = strlen(data);
3163 	}
3164 
3165 	return ucl_parser_add_chunk_priority(parser,
3166 										 (const unsigned char *) data, len, priority);
3167 }
3168 
ucl_parser_add_string(struct ucl_parser * parser,const char * data,size_t len)3169 bool ucl_parser_add_string(struct ucl_parser *parser, const char *data,
3170 						   size_t len)
3171 {
3172 	if (parser == NULL) {
3173 		return false;
3174 	}
3175 
3176 	return ucl_parser_add_string_priority(parser,
3177 										  (const unsigned char *) data, len, parser->default_priority);
3178 }
3179 
ucl_set_include_path(struct ucl_parser * parser,ucl_object_t * paths)3180 bool ucl_set_include_path(struct ucl_parser *parser, ucl_object_t *paths)
3181 {
3182 	if (parser == NULL || paths == NULL || paths->type != UCL_ARRAY) {
3183 		return false;
3184 	}
3185 
3186 	if (parser->includepaths == NULL) {
3187 		parser->includepaths = ucl_object_copy(paths);
3188 	}
3189 	else {
3190 		ucl_object_unref(parser->includepaths);
3191 		parser->includepaths = ucl_object_copy(paths);
3192 	}
3193 
3194 	if (parser->includepaths == NULL) {
3195 		return false;
3196 	}
3197 
3198 	return true;
3199 }
3200 
ucl_parser_chunk_peek(struct ucl_parser * parser)3201 unsigned char ucl_parser_chunk_peek(struct ucl_parser *parser)
3202 {
3203 	if (parser == NULL || parser->chunks == NULL || parser->chunks->pos == NULL || parser->chunks->end == NULL ||
3204 		parser->chunks->pos == parser->chunks->end) {
3205 		return 0;
3206 	}
3207 
3208 	return (*parser->chunks->pos);
3209 }
3210 
ucl_parser_chunk_skip(struct ucl_parser * parser)3211 bool ucl_parser_chunk_skip(struct ucl_parser *parser)
3212 {
3213 	if (parser == NULL || parser->chunks == NULL || parser->chunks->pos == NULL || parser->chunks->end == NULL ||
3214 		parser->chunks->pos == parser->chunks->end) {
3215 		return false;
3216 	}
3217 
3218 	const unsigned char *p = parser->chunks->pos;
3219 	ucl_chunk_skipc(parser->chunks, p);
3220 	if (parser->chunks->pos != NULL) return true;
3221 	return false;
3222 }
3223 
3224 ucl_object_t *
ucl_parser_get_current_stack_object(struct ucl_parser * parser,unsigned int depth)3225 ucl_parser_get_current_stack_object(struct ucl_parser *parser, unsigned int depth)
3226 {
3227 	ucl_object_t *obj;
3228 
3229 	if (parser == NULL || parser->stack == NULL) {
3230 		return NULL;
3231 	}
3232 
3233 	struct ucl_stack *stack = parser->stack;
3234 	if (stack == NULL || stack->obj == NULL || ucl_object_type(stack->obj) != UCL_OBJECT) {
3235 		return NULL;
3236 	}
3237 
3238 	for (unsigned int i = 0; i < depth; ++i) {
3239 		stack = stack->next;
3240 		if (stack == NULL || stack->obj == NULL || ucl_object_type(stack->obj) != UCL_OBJECT) {
3241 			return NULL;
3242 		}
3243 	}
3244 
3245 	obj = ucl_object_ref(stack->obj);
3246 	return obj;
3247 }
3248