xref: /freebsd/contrib/libucl/src/ucl_parser.c (revision 2326db40a1d2dd98631d70aae200ca52575139fb)
1 /* Copyright (c) 2013, Vsevolod Stakhov
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *       * Redistributions of source code must retain the above copyright
7  *         notice, this list of conditions and the following disclaimer.
8  *       * Redistributions in binary form must reproduce the above copyright
9  *         notice, this list of conditions and the following disclaimer in the
10  *         documentation and/or other materials provided with the distribution.
11  *
12  * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15  * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22  */
23 
24 #include <math.h>
25 #include "ucl.h"
26 #include "ucl_internal.h"
27 #include "ucl_chartable.h"
28 
29 /**
30  * @file ucl_parser.c
31  * The implementation of ucl parser
32  */
33 
34 struct ucl_parser_saved_state {
35 	unsigned int line;
36 	unsigned int column;
37 	size_t remain;
38 	const unsigned char *pos;
39 };
40 
41 /**
42  * Move up to len characters
43  * @param parser
44  * @param begin
45  * @param len
46  * @return new position in chunk
47  */
48 #define ucl_chunk_skipc(chunk, p)    \
49 do {                                 \
50 	if (p == chunk->end) {       \
51 		break;                   \
52 	}                            \
53 	if (*(p) == '\n') {          \
54 		(chunk)->line ++;    \
55 		(chunk)->column = 0; \
56 	}                            \
57 	else (chunk)->column ++;     \
58 	(p++);                       \
59 	(chunk)->pos ++;             \
60 	(chunk)->remain --;          \
61 } while (0)
62 
63 static inline void
ucl_set_err(struct ucl_parser * parser,int code,const char * str,UT_string ** err)64 ucl_set_err (struct ucl_parser *parser, int code, const char *str, UT_string **err)
65 {
66 	const char *fmt_string, *filename;
67 	struct ucl_chunk *chunk = parser->chunks;
68 
69 	if (parser->cur_file) {
70 		filename = parser->cur_file;
71 	}
72 	else {
73 		filename = "<unknown>";
74 	}
75 
76 	if (chunk->pos < chunk->end) {
77 		if (isgraph (*chunk->pos)) {
78 			fmt_string = "error while parsing %s: "
79 					"line: %d, column: %d - '%s', character: '%c'";
80 		}
81 		else {
82 			fmt_string = "error while parsing %s: "
83 					"line: %d, column: %d - '%s', character: '0x%02x'";
84 		}
85 		ucl_create_err (err, fmt_string,
86 			filename, chunk->line, chunk->column,
87 			str, *chunk->pos);
88 	}
89 	else {
90 		ucl_create_err (err, "error while parsing %s: at the end of chunk: %s",
91 			filename, str);
92 	}
93 
94 	parser->err_code = code;
95 	parser->state = UCL_STATE_ERROR;
96 }
97 
98 static void
ucl_save_comment(struct ucl_parser * parser,const char * begin,size_t len)99 ucl_save_comment (struct ucl_parser *parser, const char *begin, size_t len)
100 {
101 	ucl_object_t *nobj;
102 
103 	if (len > 0 && begin != NULL) {
104 		nobj = ucl_object_fromstring_common (begin, len, 0);
105 
106 		if (parser->last_comment) {
107 			/* We need to append data to an existing object */
108 			DL_APPEND (parser->last_comment, nobj);
109 		}
110 		else {
111 			parser->last_comment = nobj;
112 		}
113 	}
114 }
115 
116 static void
ucl_attach_comment(struct ucl_parser * parser,ucl_object_t * obj,bool before)117 ucl_attach_comment (struct ucl_parser *parser, ucl_object_t *obj, bool before)
118 {
119 	if (parser->last_comment) {
120 		ucl_object_insert_key (parser->comments, parser->last_comment,
121 				(const char *)&obj, sizeof (void *), true);
122 
123 		if (before) {
124 			parser->last_comment->flags |= UCL_OBJECT_INHERITED;
125 		}
126 
127 		parser->last_comment = NULL;
128 	}
129 }
130 
131 /**
132  * Skip all comments from the current pos resolving nested and multiline comments
133  * @param parser
134  * @return
135  */
136 static bool
ucl_skip_comments(struct ucl_parser * parser)137 ucl_skip_comments (struct ucl_parser *parser)
138 {
139 	struct ucl_chunk *chunk = parser->chunks;
140 	const unsigned char *p, *beg = NULL;
141 	int comments_nested = 0;
142 	bool quoted = false;
143 
144 	p = chunk->pos;
145 
146 start:
147 	if (chunk->remain > 0 && *p == '#') {
148 		if (parser->state != UCL_STATE_SCOMMENT &&
149 				parser->state != UCL_STATE_MCOMMENT) {
150 			beg = p;
151 
152 			while (p < chunk->end) {
153 				if (*p == '\n') {
154 					if (parser->flags & UCL_PARSER_SAVE_COMMENTS) {
155 						ucl_save_comment (parser, beg, p - beg);
156 						beg = NULL;
157 					}
158 
159 					ucl_chunk_skipc (chunk, p);
160 
161 					goto start;
162 				}
163 				ucl_chunk_skipc (chunk, p);
164 			}
165 		}
166 	}
167 	else if (chunk->remain >= 2 && *p == '/') {
168 		if (p[1] == '*') {
169 			beg = p;
170 			ucl_chunk_skipc (chunk, p);
171 			comments_nested ++;
172 			ucl_chunk_skipc (chunk, p);
173 
174 			while (p < chunk->end) {
175 				if (*p == '"' && *(p - 1) != '\\') {
176 					quoted = !quoted;
177 				}
178 
179 				if (!quoted) {
180 					if (*p == '*') {
181 						ucl_chunk_skipc (chunk, p);
182 						if (chunk->remain > 0 && *p == '/') {
183 							comments_nested --;
184 							if (comments_nested == 0) {
185 								if (parser->flags & UCL_PARSER_SAVE_COMMENTS) {
186 									ucl_save_comment (parser, beg, p - beg + 1);
187 									beg = NULL;
188 								}
189 
190 								ucl_chunk_skipc (chunk, p);
191 								goto start;
192 							}
193 						}
194 						ucl_chunk_skipc (chunk, p);
195 					}
196 					else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') {
197 						comments_nested ++;
198 						ucl_chunk_skipc (chunk, p);
199 						ucl_chunk_skipc (chunk, p);
200 						continue;
201 					}
202 				}
203 
204 				ucl_chunk_skipc (chunk, p);
205 			}
206 			if (comments_nested != 0) {
207 				ucl_set_err (parser, UCL_ENESTED,
208 						"unfinished multiline comment", &parser->err);
209 				return false;
210 			}
211 		}
212 	}
213 
214 	if (beg && p > beg && (parser->flags & UCL_PARSER_SAVE_COMMENTS)) {
215 		ucl_save_comment (parser, beg, p - beg);
216 	}
217 
218 	return true;
219 }
220 
221 /**
222  * Return multiplier for a character
223  * @param c multiplier character
224  * @param is_bytes if true use 1024 multiplier
225  * @return multiplier
226  */
227 static inline unsigned long
ucl_lex_num_multiplier(const unsigned char c,bool is_bytes)228 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) {
229 	const struct {
230 		char c;
231 		long mult_normal;
232 		long mult_bytes;
233 	} multipliers[] = {
234 			{'m', 1000 * 1000, 1024 * 1024},
235 			{'k', 1000, 1024},
236 			{'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024}
237 	};
238 	int i;
239 
240 	for (i = 0; i < 3; i ++) {
241 		if (tolower (c) == multipliers[i].c) {
242 			if (is_bytes) {
243 				return multipliers[i].mult_bytes;
244 			}
245 			return multipliers[i].mult_normal;
246 		}
247 	}
248 
249 	return 1;
250 }
251 
252 
253 /**
254  * Return multiplier for time scaling
255  * @param c
256  * @return
257  */
258 static inline double
ucl_lex_time_multiplier(const unsigned char c)259 ucl_lex_time_multiplier (const unsigned char c) {
260 	const struct {
261 		char c;
262 		double mult;
263 	} multipliers[] = {
264 			{'m', 60},
265 			{'h', 60 * 60},
266 			{'d', 60 * 60 * 24},
267 			{'w', 60 * 60 * 24 * 7},
268 			{'y', 60 * 60 * 24 * 365}
269 	};
270 	int i;
271 
272 	for (i = 0; i < 5; i ++) {
273 		if (tolower (c) == multipliers[i].c) {
274 			return multipliers[i].mult;
275 		}
276 	}
277 
278 	return 1;
279 }
280 
281 /**
282  * Return true if a character is a end of an atom
283  * @param c
284  * @return
285  */
286 static inline bool
ucl_lex_is_atom_end(const unsigned char c)287 ucl_lex_is_atom_end (const unsigned char c)
288 {
289 	return ucl_test_character (c, UCL_CHARACTER_VALUE_END);
290 }
291 
292 static inline bool
ucl_lex_is_comment(const unsigned char c1,const unsigned char c2)293 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2)
294 {
295 	if (c1 == '/') {
296 		if (c2 == '*') {
297 			return true;
298 		}
299 	}
300 	else if (c1 == '#') {
301 		return true;
302 	}
303 	return false;
304 }
305 
306 /**
307  * Check variable found
308  * @param parser
309  * @param ptr
310  * @param remain
311  * @param out_len
312  * @param strict
313  * @param found
314  * @return
315  */
316 static inline const char *
ucl_check_variable_safe(struct ucl_parser * parser,const char * ptr,size_t remain,size_t * out_len,bool strict,bool * found)317 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain,
318 		size_t *out_len, bool strict, bool *found)
319 {
320 	struct ucl_variable *var;
321 	unsigned char *dst;
322 	size_t dstlen;
323 	bool need_free = false;
324 
325 	LL_FOREACH (parser->variables, var) {
326 		if (strict) {
327 			if (remain == var->var_len) {
328 				if (memcmp (ptr, var->var, var->var_len) == 0) {
329 					*out_len += var->value_len;
330 					*found = true;
331 					return (ptr + var->var_len);
332 				}
333 			}
334 		}
335 		else {
336 			if (remain >= var->var_len) {
337 				if (memcmp (ptr, var->var, var->var_len) == 0) {
338 					*out_len += var->value_len;
339 					*found = true;
340 					return (ptr + var->var_len);
341 				}
342 			}
343 		}
344 	}
345 
346 	/* XXX: can only handle ${VAR} */
347 	if (!(*found) && parser->var_handler != NULL && strict) {
348 		/* Call generic handler */
349 		if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free,
350 				parser->var_data)) {
351 			*found = true;
352 			*out_len = dstlen;
353 
354 			if (need_free) {
355 				free (dst);
356 			}
357 			return (ptr + remain);
358 		}
359 	}
360 
361 	return ptr;
362 }
363 
364 /**
365  * Check for a variable in a given string
366  * @param parser
367  * @param ptr
368  * @param remain
369  * @param out_len
370  * @param vars_found
371  * @return
372  */
373 static const char *
ucl_check_variable(struct ucl_parser * parser,const char * ptr,size_t remain,size_t * out_len,bool * vars_found)374 ucl_check_variable (struct ucl_parser *parser, const char *ptr,
375 		size_t remain, size_t *out_len, bool *vars_found)
376 {
377 	const char *p, *end, *ret = ptr;
378 	bool found = false;
379 
380 	if (*ptr == '{') {
381 		/* We need to match the variable enclosed in braces */
382 		p = ptr + 1;
383 		end = ptr + remain;
384 		while (p < end) {
385 			if (*p == '}') {
386 				ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1,
387 						out_len, true, &found);
388 				if (found) {
389 					/* {} must be excluded actually */
390 					ret ++;
391 					if (!*vars_found) {
392 						*vars_found = true;
393 					}
394 				}
395 				else {
396 					*out_len += 2;
397 				}
398 				break;
399 			}
400 			p ++;
401 		}
402 		if(p == end) {
403 			(*out_len) ++;
404 		}
405 	}
406 	else if (*ptr != '$') {
407 		/* Not count escaped dollar sign */
408 		ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found);
409 		if (found && !*vars_found) {
410 			*vars_found = true;
411 		}
412 		if (!found) {
413 			(*out_len) ++;
414 		}
415 	}
416 	else {
417 		ret ++;
418 		(*out_len) ++;
419 	}
420 
421 	return ret;
422 }
423 
424 /**
425  * Expand a single variable
426  * @param parser
427  * @param ptr
428  * @param in_len
429  * @param dest
430  * @param out_len
431  * @return
432  */
433 static const char *
ucl_expand_single_variable(struct ucl_parser * parser,const char * ptr,size_t in_len,unsigned char ** dest,size_t out_len)434 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr,
435 		size_t in_len, unsigned char **dest, size_t out_len)
436 {
437 	unsigned char *d = *dest, *dst;
438 	const char *p = ptr + 1, *ret;
439 	struct ucl_variable *var;
440 	size_t dstlen;
441 	bool need_free = false;
442 	bool found = false;
443 	bool strict = false;
444 
445 	ret = ptr + 1;
446 	/* For the $ sign */
447 	in_len --;
448 
449 	if (*p == '$') {
450 		*d++ = *p++;
451 		*dest = d;
452 		return p;
453 	}
454 	else if (*p == '{') {
455 		p ++;
456 		in_len --;
457 		strict = true;
458 		ret += 2;
459 	}
460 
461 	LL_FOREACH (parser->variables, var) {
462 		if (out_len >= var->value_len && in_len >= (var->var_len + (strict ? 1 : 0))) {
463 			if (memcmp (p, var->var, var->var_len) == 0) {
464 				if (!strict || p[var->var_len] == '}') {
465 					memcpy (d, var->value, var->value_len);
466 					ret += var->var_len;
467 					d += var->value_len;
468 					found = true;
469 					break;
470 				}
471 			}
472 		}
473 	}
474 
475 	if (!found) {
476 		if (strict && parser->var_handler != NULL) {
477 			dstlen = out_len;
478 
479 			if (parser->var_handler (p, in_len, &dst, &dstlen, &need_free,
480 							parser->var_data)) {
481 				if (dstlen > out_len) {
482 					/* We do not have enough space! */
483 					if (need_free) {
484 						free (dst);
485 					}
486 				}
487 				else {
488 					memcpy(d, dst, dstlen);
489 					ret += in_len;
490 					d += dstlen;
491 					found = true;
492 
493 					if (need_free) {
494 						free(dst);
495 					}
496 				}
497 			}
498 		}
499 
500 		/* Leave variable as is, in this case we use dest */
501 		if (!found) {
502 			if (strict && out_len >= 2) {
503 				/* Copy '${' */
504 				memcpy (d, ptr, 2);
505 				d += 2;
506 				ret --;
507 			}
508 			else {
509 				memcpy (d, ptr, 1);
510 				d ++;
511 			}
512 		}
513 	}
514 
515 	*dest = d;
516 	return ret;
517 }
518 
519 /**
520  * Expand variables in string
521  * @param parser
522  * @param dst
523  * @param src
524  * @param in_len
525  * @return
526  */
527 static ssize_t
ucl_expand_variable(struct ucl_parser * parser,unsigned char ** dst,const char * src,size_t in_len)528 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst,
529 		const char *src, size_t in_len)
530 {
531 	const char *p, *end = src + in_len;
532 	unsigned char *d, *d_end;
533 	size_t out_len = 0;
534 	bool vars_found = false;
535 
536 	if (parser->flags & UCL_PARSER_DISABLE_MACRO) {
537 		*dst = NULL;
538 		return in_len;
539 	}
540 
541 	p = src;
542 	while (p != end) {
543 		if (*p == '$' && p + 1 != end) {
544 			p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found);
545 		}
546 		else {
547 			p ++;
548 			out_len ++;
549 		}
550 	}
551 
552 	if (!vars_found) {
553 		/* Trivial case */
554 		*dst = NULL;
555 		return in_len;
556 	}
557 
558 	*dst = UCL_ALLOC (out_len + 1);
559 	if (*dst == NULL) {
560 		return in_len;
561 	}
562 
563 	d = *dst;
564 	d_end = d + out_len;
565 	p = src;
566 	while (p != end && d != d_end) {
567 		if (*p == '$' && p + 1 != end) {
568 			p = ucl_expand_single_variable (parser, p, end - p, &d, d_end - d);
569 		}
570 		else {
571 			*d++ = *p++;
572 		}
573 	}
574 
575 	*d = '\0';
576 
577 	return out_len;
578 }
579 
580 /**
581  * Store or copy pointer to the trash stack
582  * @param parser parser object
583  * @param src src string
584  * @param dst destination buffer (trash stack pointer)
585  * @param dst_const const destination pointer (e.g. value of object)
586  * @param in_len input length
587  * @param need_unescape need to unescape source (and copy it)
588  * @param need_lowercase need to lowercase value (and copy)
589  * @param need_expand need to expand variables (and copy as well)
590  * @param unescape_squote unescape single quoted string
591  * @return output length (excluding \0 symbol)
592  */
593 static inline ssize_t
ucl_copy_or_store_ptr(struct ucl_parser * parser,const unsigned char * src,unsigned char ** dst,const char ** dst_const,size_t in_len,bool need_unescape,bool need_lowercase,bool need_expand,bool unescape_squote)594 ucl_copy_or_store_ptr (struct ucl_parser *parser,
595 		const unsigned char *src, unsigned char **dst,
596 		const char **dst_const, size_t in_len,
597 		bool need_unescape, bool need_lowercase, bool need_expand,
598 		bool unescape_squote)
599 {
600 	ssize_t ret = -1, tret;
601 	unsigned char *tmp;
602 
603 	if (need_unescape || need_lowercase ||
604 			(need_expand && parser->variables != NULL) ||
605 			!(parser->flags & UCL_PARSER_ZEROCOPY)) {
606 		/* Copy string */
607 		*dst = UCL_ALLOC (in_len + 1);
608 		if (*dst == NULL) {
609 			ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for a string",
610 					&parser->err);
611 			return false;
612 		}
613 		if (need_lowercase) {
614 			ret = ucl_strlcpy_tolower (*dst, src, in_len + 1);
615 		}
616 		else {
617 			ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1);
618 		}
619 
620 		if (need_unescape) {
621 			if (!unescape_squote) {
622 				ret = ucl_unescape_json_string (*dst, ret);
623 			}
624 			else {
625 				ret = ucl_unescape_squoted_string (*dst, ret);
626 			}
627 		}
628 
629 		if (need_expand) {
630 			tmp = *dst;
631 			tret = ret;
632 			ret = ucl_expand_variable (parser, dst, tmp, ret);
633 			if (*dst == NULL) {
634 				/* Nothing to expand */
635 				*dst = tmp;
636 				ret = tret;
637 			}
638 			else {
639 				/* Free unexpanded value */
640 				UCL_FREE (in_len + 1, tmp);
641 			}
642 		}
643 		*dst_const = *dst;
644 	}
645 	else {
646 		*dst_const = src;
647 		ret = in_len;
648 	}
649 
650 	return ret;
651 }
652 
653 /**
654  * Create and append an object at the specified level
655  * @param parser
656  * @param is_array
657  * @param level
658  * @return
659  */
660 static inline ucl_object_t *
ucl_parser_add_container(ucl_object_t * obj,struct ucl_parser * parser,bool is_array,uint32_t level,bool has_obrace)661 ucl_parser_add_container (ucl_object_t *obj, struct ucl_parser *parser,
662 		bool is_array, uint32_t level, bool has_obrace)
663 {
664 	struct ucl_stack *st;
665 	ucl_object_t *nobj;
666 
667 	if (obj == NULL) {
668 		nobj = ucl_object_new_full (is_array ? UCL_ARRAY : UCL_OBJECT, parser->chunks->priority);
669 		if (nobj == NULL) {
670 			goto enomem0;
671 		}
672 	} else {
673 		if (obj->type == (is_array ? UCL_OBJECT : UCL_ARRAY)) {
674 			/* Bad combination for merge: array and object */
675 			ucl_set_err (parser, UCL_EMERGE,
676 					"cannot merge an object with an array",
677 					&parser->err);
678 
679 			return NULL;
680 		}
681 		nobj = obj;
682 		nobj->type = is_array ? UCL_ARRAY : UCL_OBJECT;
683 	}
684 
685 	if (!is_array) {
686 		if (nobj->value.ov == NULL) {
687 			nobj->value.ov = ucl_hash_create (parser->flags & UCL_PARSER_KEY_LOWERCASE);
688 			if (nobj->value.ov == NULL) {
689 				goto enomem1;
690 			}
691 		}
692 		parser->state = UCL_STATE_KEY;
693 	} else {
694 		parser->state = UCL_STATE_VALUE;
695 	}
696 
697 	st = UCL_ALLOC (sizeof (struct ucl_stack));
698 
699 	if (st == NULL) {
700 		goto enomem1;
701 	}
702 
703 	st->obj = nobj;
704 
705 	if (level >= UINT16_MAX) {
706 		ucl_set_err (parser, UCL_ENESTED,
707 				"objects are nesting too deep (over 65535 limit)",
708 				&parser->err);
709 		if (nobj != obj) {
710 			ucl_object_unref (obj);
711 		}
712 
713 		UCL_FREE(sizeof (struct ucl_stack), st);
714 
715 		return NULL;
716 	}
717 
718 
719 	st->e.params.level = level;
720 	st->e.params.line = parser->chunks->line;
721 	st->chunk = parser->chunks;
722 
723 	if (has_obrace) {
724 		st->e.params.flags = UCL_STACK_HAS_OBRACE;
725 	}
726 	else {
727 		st->e.params.flags = 0;
728 	}
729 
730 	LL_PREPEND (parser->stack, st);
731 	parser->cur_obj = nobj;
732 
733 	return nobj;
734 enomem1:
735 	if (nobj != obj)
736 		ucl_object_unref (nobj);
737 enomem0:
738 	ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for an object",
739 			&parser->err);
740 	return NULL;
741 }
742 
743 int
ucl_maybe_parse_number(ucl_object_t * obj,const char * start,const char * end,const char ** pos,bool allow_double,bool number_bytes,bool allow_time)744 ucl_maybe_parse_number (ucl_object_t *obj,
745 		const char *start, const char *end, const char **pos,
746 		bool allow_double, bool number_bytes, bool allow_time)
747 {
748 	const char *p = start, *c = start;
749 	char *endptr;
750 	bool got_dot = false, got_exp = false, need_double = false,
751 			is_time = false, valid_start = false, is_hex = false;
752 	int is_neg = 0;
753 	double dv = 0;
754 	int64_t lv = 0;
755 
756 	if (*p == '-') {
757 		is_neg = 1;
758 		c ++;
759 		p ++;
760 	}
761 	while (p < end) {
762 		if (is_hex && isxdigit (*p)) {
763 			p ++;
764 		}
765 		else if (isdigit (*p)) {
766 			valid_start = true;
767 			p ++;
768 		}
769 		else if (!is_hex && (*p == 'x' || *p == 'X')) {
770 			is_hex = true;
771 			allow_double = false;
772 			c = p + 1;
773 			p ++;
774 		}
775 		else if (allow_double) {
776 			if (p == c) {
777 				/* Empty digits sequence, not a number */
778 				*pos = start;
779 				return EINVAL;
780 			}
781 			else if (*p == '.') {
782 				if (got_dot) {
783 					/* Double dots, not a number */
784 					*pos = start;
785 					return EINVAL;
786 				}
787 				else {
788 					got_dot = true;
789 					need_double = true;
790 					p ++;
791 				}
792 			}
793 			else if (*p == 'e' || *p == 'E') {
794 				if (got_exp) {
795 					/* Double exp, not a number */
796 					*pos = start;
797 					return EINVAL;
798 				}
799 				else {
800 					got_exp = true;
801 					need_double = true;
802 					p ++;
803 					if (p >= end) {
804 						*pos = start;
805 						return EINVAL;
806 					}
807 					if (!isdigit (*p) && *p != '+' && *p != '-') {
808 						/* Wrong exponent sign */
809 						*pos = start;
810 						return EINVAL;
811 					}
812 					else {
813 						p ++;
814 					}
815 				}
816 			}
817 			else {
818 				/* Got the end of the number, need to check */
819 				break;
820 			}
821 		}
822 		else if (!allow_double && *p == '.') {
823 			/* Unexpected dot */
824 			*pos = start;
825 			return EINVAL;
826 		}
827 		else {
828 			break;
829 		}
830 	}
831 
832 	if (!valid_start || p == c) {
833 		*pos = start;
834 		return EINVAL;
835 	}
836 
837 	char numbuf[128];
838 
839 	if ((size_t)(p - c + 1) >= sizeof(numbuf)) {
840 		*pos = start;
841 		return EINVAL;
842 	}
843 
844 	if (is_neg) {
845 		numbuf[0] = '-';
846 		ucl_strlcpy (&numbuf[1], c, p - c + 1);
847 	}
848 	else {
849 		ucl_strlcpy (numbuf, c, p - c + 1);
850 	}
851 
852 	errno = 0;
853 	if (need_double) {
854 		dv = strtod (numbuf, &endptr);
855 	}
856 	else {
857 		if (is_hex) {
858 			lv = strtoimax (numbuf, &endptr, 16);
859 		}
860 		else {
861 			lv = strtoimax (numbuf, &endptr, 10);
862 		}
863 	}
864 	if (errno == ERANGE) {
865 		*pos = start;
866 		return ERANGE;
867 	}
868 
869 	/* Now check endptr and move it from numbuf to the real ending */
870 	if (endptr != NULL) {
871 		long shift = endptr - numbuf - is_neg;
872 		endptr = (char *)c + shift;
873 	}
874 	if (endptr >= end) {
875 		p = end;
876 		goto set_obj;
877 	}
878 	if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0') {
879 		p = endptr;
880 		goto set_obj;
881 	}
882 
883 	if (endptr < end && endptr != start) {
884 		p = endptr;
885 		switch (*p) {
886 		case 'm':
887 		case 'M':
888 		case 'g':
889 		case 'G':
890 		case 'k':
891 		case 'K':
892 			if (end - p >= 2) {
893 				if (p[1] == 's' || p[1] == 'S') {
894 					/* Milliseconds */
895 					if (!need_double) {
896 						need_double = true;
897 						dv = lv;
898 					}
899 					is_time = true;
900 					if (p[0] == 'm' || p[0] == 'M') {
901 						dv /= 1000.;
902 					}
903 					else {
904 						dv *= ucl_lex_num_multiplier (*p, false);
905 					}
906 					p += 2;
907 					if (end - p > 0 && !ucl_lex_is_atom_end (*p)) {
908 						*pos = start;
909 						return EINVAL;
910 					}
911 					goto set_obj;
912 				}
913 				else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) {
914 					/* Bytes */
915 					if (need_double) {
916 						need_double = false;
917 						lv = dv;
918 					}
919 					lv *= ucl_lex_num_multiplier (*p, true);
920 					p += 2;
921 					if (end - p > 0 && !ucl_lex_is_atom_end (*p)) {
922 						*pos = start;
923 						return EINVAL;
924 					}
925 					goto set_obj;
926 				}
927 				else if (ucl_lex_is_atom_end (p[1])) {
928 					if (need_double) {
929 						dv *= ucl_lex_num_multiplier (*p, false);
930 					}
931 					else {
932 						lv *= ucl_lex_num_multiplier (*p, number_bytes);
933 					}
934 					p ++;
935 					goto set_obj;
936 				}
937 				else if (allow_time && end - p >= 3) {
938 					if (tolower (p[0]) == 'm' &&
939 							tolower (p[1]) == 'i' &&
940 							tolower (p[2]) == 'n') {
941 						/* Minutes */
942 						if (!need_double) {
943 							need_double = true;
944 							dv = lv;
945 						}
946 						is_time = true;
947 						dv *= 60.;
948 						p += 3;
949 						if (end - p > 0 && !ucl_lex_is_atom_end (*p)) {
950 							*pos = start;
951 							return EINVAL;
952 						}
953 						goto set_obj;
954 					}
955 				}
956 			}
957 			else {
958 				if (need_double) {
959 					dv *= ucl_lex_num_multiplier (*p, false);
960 				}
961 				else {
962 					lv *= ucl_lex_num_multiplier (*p, number_bytes);
963 				}
964 				p ++;
965 				if (end - p > 0 && !ucl_lex_is_atom_end (*p)) {
966 					*pos = start;
967 					return EINVAL;
968 				}
969 				goto set_obj;
970 			}
971 			break;
972 		case 'S':
973 		case 's':
974 			if (allow_time &&
975 					(p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
976 				if (!need_double) {
977 					need_double = true;
978 					dv = lv;
979 				}
980 				p ++;
981 				is_time = true;
982 				goto set_obj;
983 			}
984 			break;
985 		case 'h':
986 		case 'H':
987 		case 'd':
988 		case 'D':
989 		case 'w':
990 		case 'W':
991 		case 'Y':
992 		case 'y':
993 			if (allow_time &&
994 					(p == end - 1 || ucl_lex_is_atom_end (p[1]))) {
995 				if (!need_double) {
996 					need_double = true;
997 					dv = lv;
998 				}
999 				is_time = true;
1000 				dv *= ucl_lex_time_multiplier (*p);
1001 				p ++;
1002 				goto set_obj;
1003 			}
1004 			break;
1005 		case '\t':
1006 		case ' ':
1007 			while (p < end && ucl_test_character(*p, UCL_CHARACTER_WHITESPACE)) {
1008 				p++;
1009 			}
1010 			if (ucl_lex_is_atom_end(*p))
1011 				goto set_obj;
1012 			break;
1013 		}
1014 	}
1015 	else if (endptr == end) {
1016 		/* Just a number at the end of chunk */
1017 		p = end;
1018 		goto set_obj;
1019 	}
1020 
1021 	*pos = c;
1022 	return EINVAL;
1023 
1024 set_obj:
1025 	if (obj != NULL) {
1026 		if (allow_double && (need_double || is_time)) {
1027 			if (!is_time) {
1028 				obj->type = UCL_FLOAT;
1029 			}
1030 			else {
1031 				obj->type = UCL_TIME;
1032 			}
1033 			obj->value.dv = dv;
1034 		}
1035 		else {
1036 			obj->type = UCL_INT;
1037 			obj->value.iv = lv;
1038 		}
1039 	}
1040 	*pos = p;
1041 	return 0;
1042 }
1043 
1044 /**
1045  * Parse possible number
1046  * @param parser
1047  * @param chunk
1048  * @param obj
1049  * @return true if a number has been parsed
1050  */
1051 static bool
ucl_lex_number(struct ucl_parser * parser,struct ucl_chunk * chunk,ucl_object_t * obj)1052 ucl_lex_number (struct ucl_parser *parser,
1053 		struct ucl_chunk *chunk, ucl_object_t *obj)
1054 {
1055 	const unsigned char *pos;
1056 	int ret;
1057 
1058 	ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos,
1059 			true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0));
1060 
1061 	if (ret == 0) {
1062 		chunk->remain -= pos - chunk->pos;
1063 		chunk->column += pos - chunk->pos;
1064 		chunk->pos = pos;
1065 		return true;
1066 	}
1067 	else if (ret == ERANGE) {
1068 		ucl_set_err (parser, UCL_ESYNTAX, "numeric value out of range",
1069 				&parser->err);
1070 	}
1071 
1072 	return false;
1073 }
1074 
1075 /**
1076  * Parse quoted string with possible escapes
1077  * @param parser
1078  * @param chunk
1079  * @param need_unescape
1080  * @param ucl_escape
1081  * @param var_expand
1082  * @return true if a string has been parsed
1083  */
1084 static bool
ucl_lex_json_string(struct ucl_parser * parser,struct ucl_chunk * chunk,bool * need_unescape,bool * ucl_escape,bool * var_expand)1085 ucl_lex_json_string (struct ucl_parser *parser,
1086 		struct ucl_chunk *chunk,
1087 		bool *need_unescape,
1088 		bool *ucl_escape,
1089 		bool *var_expand)
1090 {
1091 	const unsigned char *p = chunk->pos;
1092 	unsigned char c;
1093 	int i;
1094 
1095 	while (p < chunk->end) {
1096 		c = *p;
1097 		if (c < 0x1F) {
1098 			/* Unmasked control character */
1099 			if (c == '\n') {
1100 				ucl_set_err (parser, UCL_ESYNTAX, "unexpected newline",
1101 						&parser->err);
1102 			}
1103 			else {
1104 				ucl_set_err (parser, UCL_ESYNTAX, "unexpected control character",
1105 						&parser->err);
1106 			}
1107 			return false;
1108 		}
1109 		else if (c == '\\') {
1110 			ucl_chunk_skipc (chunk, p);
1111 			if (p >= chunk->end) {
1112 				ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character",
1113 						&parser->err);
1114 				return false;
1115 			}
1116 			c = *p;
1117 			if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) {
1118 				if (c == 'u') {
1119 					ucl_chunk_skipc (chunk, p);
1120 					for (i = 0; i < 4 && p < chunk->end; i ++) {
1121 						if (!isxdigit (*p)) {
1122 							ucl_set_err (parser, UCL_ESYNTAX, "invalid utf escape",
1123 									&parser->err);
1124 							return false;
1125 						}
1126 						ucl_chunk_skipc (chunk, p);
1127 					}
1128 					if (p >= chunk->end) {
1129 						ucl_set_err (parser, UCL_ESYNTAX,
1130 								"unfinished escape character",
1131 								&parser->err);
1132 						return false;
1133 					}
1134 				}
1135 				else {
1136 					ucl_chunk_skipc (chunk, p);
1137 				}
1138 			}
1139 			*need_unescape = true;
1140 			*ucl_escape = true;
1141 			continue;
1142 		}
1143 		else if (c == '"') {
1144 			ucl_chunk_skipc (chunk, p);
1145 			return true;
1146 		}
1147 		else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) {
1148 			*ucl_escape = true;
1149 		}
1150 		else if (c == '$') {
1151 			*var_expand = true;
1152 		}
1153 		ucl_chunk_skipc (chunk, p);
1154 	}
1155 
1156 	ucl_set_err (parser, UCL_ESYNTAX,
1157 			"no quote at the end of json string",
1158 			&parser->err);
1159 	return false;
1160 }
1161 
1162 /**
1163  * Process single quoted string
1164  * @param parser
1165  * @param chunk
1166  * @param need_unescape
1167  * @return
1168  */
1169 static bool
ucl_lex_squoted_string(struct ucl_parser * parser,struct ucl_chunk * chunk,bool * need_unescape)1170 ucl_lex_squoted_string (struct ucl_parser *parser,
1171 		struct ucl_chunk *chunk, bool *need_unescape)
1172 {
1173 	const unsigned char *p = chunk->pos;
1174 	unsigned char c;
1175 
1176 	while (p < chunk->end) {
1177 		c = *p;
1178 		if (c == '\\') {
1179 			ucl_chunk_skipc (chunk, p);
1180 
1181 			if (p >= chunk->end) {
1182 				ucl_set_err (parser, UCL_ESYNTAX,
1183 						"unfinished escape character",
1184 						&parser->err);
1185 				return false;
1186 			}
1187 			else {
1188 				ucl_chunk_skipc (chunk, p);
1189 			}
1190 
1191 			*need_unescape = true;
1192 			continue;
1193 		}
1194 		else if (c == '\'') {
1195 			ucl_chunk_skipc (chunk, p);
1196 			return true;
1197 		}
1198 
1199 		ucl_chunk_skipc (chunk, p);
1200 	}
1201 
1202 	ucl_set_err (parser, UCL_ESYNTAX,
1203 			"no quote at the end of single quoted string",
1204 			&parser->err);
1205 	return false;
1206 }
1207 
1208 static void
ucl_parser_append_elt(struct ucl_parser * parser,ucl_hash_t * cont,ucl_object_t * top,ucl_object_t * elt)1209 ucl_parser_append_elt (struct ucl_parser *parser, ucl_hash_t *cont,
1210 		ucl_object_t *top,
1211 		ucl_object_t *elt)
1212 {
1213 	ucl_object_t *nobj;
1214 
1215 	if ((parser->flags & UCL_PARSER_NO_IMPLICIT_ARRAYS) == 0) {
1216 		/* Implicit array */
1217 		top->flags |= UCL_OBJECT_MULTIVALUE;
1218 		DL_APPEND (top, elt);
1219 		parser->stack->obj->len ++;
1220 	}
1221 	else {
1222 		if ((top->flags & UCL_OBJECT_MULTIVALUE) != 0) {
1223 			/* Just add to the explicit array */
1224 			ucl_array_append (top, elt);
1225 		}
1226 		else {
1227 			/* Convert to an array */
1228 			nobj = ucl_object_typed_new (UCL_ARRAY);
1229 			nobj->key = top->key;
1230 			nobj->keylen = top->keylen;
1231 			nobj->flags |= UCL_OBJECT_MULTIVALUE;
1232 			ucl_array_append (nobj, top);
1233 			ucl_array_append (nobj, elt);
1234 			ucl_hash_replace (cont, top, nobj);
1235 		}
1236 	}
1237 }
1238 
1239 bool
ucl_parser_process_object_element(struct ucl_parser * parser,ucl_object_t * nobj)1240 ucl_parser_process_object_element (struct ucl_parser *parser, ucl_object_t *nobj)
1241 {
1242 	ucl_hash_t *container;
1243 	ucl_object_t *tobj = NULL, *cur;
1244 	char errmsg[256];
1245 
1246 	container = parser->stack->obj->value.ov;
1247 
1248 	DL_FOREACH (parser->stack->obj, cur) {
1249 		tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (cur->value.ov, nobj));
1250 
1251 		if (tobj != NULL) {
1252 			break;
1253 		}
1254 	}
1255 
1256 
1257 	if (tobj == NULL) {
1258 		container = ucl_hash_insert_object (container, nobj,
1259 				parser->flags & UCL_PARSER_KEY_LOWERCASE);
1260 		if (container == NULL) {
1261 			return false;
1262 		}
1263 		nobj->prev = nobj;
1264 		nobj->next = NULL;
1265 		parser->stack->obj->len ++;
1266 	}
1267 	else {
1268 		unsigned priold = ucl_object_get_priority (tobj),
1269 				prinew = ucl_object_get_priority (nobj);
1270 		switch (parser->chunks->strategy) {
1271 
1272 		case UCL_DUPLICATE_APPEND:
1273 			/*
1274 			 * The logic here is the following:
1275 			 *
1276 			 * - if we have two objects with the same priority, then we form an
1277 			 * implicit or explicit array
1278 			 * - if a new object has bigger priority, then we overwrite an old one
1279 			 * - if a new object has lower priority, then we ignore it
1280 			 */
1281 			/* Special case for inherited objects */
1282 			if (tobj->flags & UCL_OBJECT_INHERITED) {
1283 				prinew = priold + 1;
1284 			}
1285 
1286 			if (priold == prinew) {
1287 				ucl_parser_append_elt (parser, container, tobj, nobj);
1288 			}
1289 			else if (priold > prinew) {
1290 				/*
1291 				 * We add this new object to a list of trash objects just to ensure
1292 				 * that it won't come to any real object
1293 				 * XXX: rather inefficient approach
1294 				 */
1295 				DL_APPEND (parser->trash_objs, nobj);
1296 			}
1297 			else {
1298 				ucl_hash_replace (container, tobj, nobj);
1299 				ucl_object_unref (tobj);
1300 			}
1301 
1302 			break;
1303 
1304 		case UCL_DUPLICATE_REWRITE:
1305 			/* We just rewrite old values regardless of priority */
1306 			ucl_hash_replace (container, tobj, nobj);
1307 			ucl_object_unref (tobj);
1308 
1309 			break;
1310 
1311 		case UCL_DUPLICATE_ERROR:
1312 			snprintf(errmsg, sizeof(errmsg),
1313 					"duplicate element for key '%s' found",
1314 					nobj->key);
1315 			ucl_set_err (parser, UCL_EMERGE, errmsg, &parser->err);
1316 			return false;
1317 
1318 		case UCL_DUPLICATE_MERGE:
1319 			/*
1320 			 * Here we do have some old object so we just push it on top of objects stack
1321 			 * Check priority and then perform the merge on the remaining objects
1322 			 */
1323 			if (tobj->type == UCL_OBJECT || tobj->type == UCL_ARRAY) {
1324 				ucl_object_unref (nobj);
1325 				nobj = tobj;
1326 			}
1327 			else if (priold == prinew) {
1328 				ucl_parser_append_elt (parser, container, tobj, nobj);
1329 			}
1330 			else if (priold > prinew) {
1331 				/*
1332 				 * We add this new object to a list of trash objects just to ensure
1333 				 * that it won't come to any real object
1334 				 * XXX: rather inefficient approach
1335 				 */
1336 				DL_APPEND (parser->trash_objs, nobj);
1337 			}
1338 			else {
1339 				ucl_hash_replace (container, tobj, nobj);
1340 				ucl_object_unref (tobj);
1341 			}
1342 			break;
1343 		}
1344 	}
1345 
1346 	parser->stack->obj->value.ov = container;
1347 	parser->cur_obj = nobj;
1348 	ucl_attach_comment (parser, nobj, false);
1349 
1350 	return true;
1351 }
1352 
1353 /**
1354  * Parse a key in an object
1355  * @param parser
1356  * @param chunk
1357  * @param next_key
1358  * @param end_of_object
1359  * @return true if a key has been parsed
1360  */
1361 static bool
ucl_parse_key(struct ucl_parser * parser,struct ucl_chunk * chunk,bool * next_key,bool * end_of_object,bool * got_content)1362 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk,
1363 		bool *next_key, bool *end_of_object, bool *got_content)
1364 {
1365 	const unsigned char *p, *c = NULL, *end, *t;
1366 	const char *key = NULL;
1367 	bool got_quote = false, got_eq = false, got_semicolon = false,
1368 			need_unescape = false, ucl_escape = false, var_expand = false,
1369 			got_sep = false;
1370 	ucl_object_t *nobj;
1371 	ssize_t keylen;
1372 
1373 	p = chunk->pos;
1374 
1375 	if (*p == '.' && !(parser->flags & UCL_PARSER_DISABLE_MACRO)) {
1376 		ucl_chunk_skipc (chunk, p);
1377 		parser->prev_state = parser->state;
1378 		parser->state = UCL_STATE_MACRO_NAME;
1379 		*end_of_object = false;
1380 		return true;
1381 	}
1382 	while (p < chunk->end) {
1383 		/*
1384 		 * A key must start with alpha, number, '/' or '_' and end with space character
1385 		 */
1386 		if (c == NULL) {
1387 			if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1388 				if (!ucl_skip_comments (parser)) {
1389 					return false;
1390 				}
1391 				p = chunk->pos;
1392 			}
1393 			else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1394 				ucl_chunk_skipc (chunk, p);
1395 			}
1396 			else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) {
1397 				/* The first symbol */
1398 				c = p;
1399 				ucl_chunk_skipc (chunk, p);
1400 				*got_content = true;
1401 			}
1402 			else if (*p == '"') {
1403 				/* JSON style key */
1404 				c = p + 1;
1405 				got_quote = true;
1406 				*got_content = true;
1407 				ucl_chunk_skipc (chunk, p);
1408 			}
1409 			else if (*p == '}') {
1410 				/* We have actually end of an object */
1411 				*end_of_object = true;
1412 				return true;
1413 			}
1414 			else if (*p == '.' && !(parser->flags & UCL_PARSER_DISABLE_MACRO)) {
1415 				ucl_chunk_skipc (chunk, p);
1416 				parser->prev_state = parser->state;
1417 				parser->state = UCL_STATE_MACRO_NAME;
1418 				return true;
1419 			}
1420 			else {
1421 				/* Invalid identifier */
1422 				ucl_set_err (parser, UCL_ESYNTAX, "key must begin with a letter",
1423 						&parser->err);
1424 				return false;
1425 			}
1426 		}
1427 		else {
1428 			/* Parse the body of a key */
1429 			if (!got_quote) {
1430 				if (ucl_test_character (*p, UCL_CHARACTER_KEY)) {
1431 					*got_content = true;
1432 					ucl_chunk_skipc (chunk, p);
1433 				}
1434 				else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) {
1435 					end = p;
1436 					break;
1437 				}
1438 				else {
1439 					ucl_set_err (parser, UCL_ESYNTAX, "invalid character in a key",
1440 							&parser->err);
1441 					return false;
1442 				}
1443 			}
1444 			else {
1445 				/* We need to parse json like quoted string */
1446 				if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
1447 					return false;
1448 				}
1449 				/* Always escape keys obtained via json */
1450 				end = chunk->pos - 1;
1451 				p = chunk->pos;
1452 				break;
1453 			}
1454 		}
1455 	}
1456 
1457 	if (p >= chunk->end && *got_content) {
1458 		ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err);
1459 		return false;
1460 	}
1461 	else if (!*got_content) {
1462 		return true;
1463 	}
1464 	*end_of_object = false;
1465 	/* We are now at the end of the key, need to parse the rest */
1466 	while (p < chunk->end) {
1467 		if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
1468 			ucl_chunk_skipc (chunk, p);
1469 		}
1470 		else if (*p == '=') {
1471 			if (!got_eq && !got_semicolon) {
1472 				ucl_chunk_skipc (chunk, p);
1473 				got_eq = true;
1474 			}
1475 			else {
1476 				ucl_set_err (parser, UCL_ESYNTAX, "unexpected '=' character",
1477 						&parser->err);
1478 				return false;
1479 			}
1480 		}
1481 		else if (*p == ':') {
1482 			if (!got_eq && !got_semicolon) {
1483 				ucl_chunk_skipc (chunk, p);
1484 				got_semicolon = true;
1485 			}
1486 			else {
1487 				ucl_set_err (parser, UCL_ESYNTAX, "unexpected ':' character",
1488 						&parser->err);
1489 				return false;
1490 			}
1491 		}
1492 		else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
1493 			/* Check for comment */
1494 			if (!ucl_skip_comments (parser)) {
1495 				return false;
1496 			}
1497 			p = chunk->pos;
1498 		}
1499 		else {
1500 			/* Start value */
1501 			break;
1502 		}
1503 	}
1504 
1505 	if (p >= chunk->end && got_content) {
1506 		ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err);
1507 		return false;
1508 	}
1509 
1510 	got_sep = got_semicolon || got_eq;
1511 
1512 	if (!got_sep) {
1513 		/*
1514 		 * Maybe we have more keys nested, so search for termination character.
1515 		 * Possible choices:
1516 		 * 1) key1 key2 ... keyN [:=] value <- we treat that as error
1517 		 * 2) key1 ... keyN {} or [] <- we treat that as nested objects
1518 		 * 3) key1 value[;,\n] <- we treat that as linear object
1519 		 */
1520 		t = p;
1521 		*next_key = false;
1522 		while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) {
1523 			t ++;
1524 		}
1525 		/* Check first non-space character after a key */
1526 		if (*t != '{' && *t != '[') {
1527 			while (t < chunk->end) {
1528 				if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') {
1529 					break;
1530 				}
1531 				else if (*t == '{' || *t == '[') {
1532 					*next_key = true;
1533 					break;
1534 				}
1535 				t ++;
1536 			}
1537 		}
1538 	}
1539 
1540 	/* Create a new object */
1541 	nobj = ucl_object_new_full (UCL_NULL, parser->chunks->priority);
1542 	if (nobj == NULL) {
1543 		return false;
1544 	}
1545 	keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY],
1546 			&key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE,
1547 			false, false);
1548 	if (keylen == -1) {
1549 		ucl_object_unref (nobj);
1550 		return false;
1551 	}
1552 	else if (keylen == 0) {
1553 		ucl_set_err (parser, UCL_ESYNTAX, "empty keys are not allowed", &parser->err);
1554 		ucl_object_unref (nobj);
1555 		return false;
1556 	}
1557 
1558 	nobj->key = key;
1559 	nobj->keylen = keylen;
1560 
1561 	if (!ucl_parser_process_object_element (parser, nobj)) {
1562 		return false;
1563 	}
1564 
1565 	if (ucl_escape) {
1566 		nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE;
1567 	}
1568 
1569 
1570 	return true;
1571 }
1572 
1573 /**
1574  * Parse a cl string
1575  * @param parser
1576  * @param chunk
1577  * @param var_expand
1578  * @param need_unescape
1579  * @return true if a key has been parsed
1580  */
1581 static bool
ucl_parse_string_value(struct ucl_parser * parser,struct ucl_chunk * chunk,bool * var_expand,bool * need_unescape)1582 ucl_parse_string_value (struct ucl_parser *parser,
1583 		struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape)
1584 {
1585 	const unsigned char *p;
1586 	enum {
1587 		UCL_BRACE_ROUND = 0,
1588 		UCL_BRACE_SQUARE,
1589 		UCL_BRACE_FIGURE
1590 	};
1591 	int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}};
1592 
1593 	p = chunk->pos;
1594 
1595 	while (p < chunk->end) {
1596 
1597 		/* Skip pairs of figure braces */
1598 		if (*p == '{') {
1599 			braces[UCL_BRACE_FIGURE][0] ++;
1600 		}
1601 		else if (*p == '}') {
1602 			braces[UCL_BRACE_FIGURE][1] ++;
1603 			if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) {
1604 				/* This is not a termination symbol, continue */
1605 				ucl_chunk_skipc (chunk, p);
1606 				continue;
1607 			}
1608 		}
1609 		/* Skip pairs of square braces */
1610 		else if (*p == '[') {
1611 			braces[UCL_BRACE_SQUARE][0] ++;
1612 		}
1613 		else if (*p == ']') {
1614 			braces[UCL_BRACE_SQUARE][1] ++;
1615 			if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) {
1616 				/* This is not a termination symbol, continue */
1617 				ucl_chunk_skipc (chunk, p);
1618 				continue;
1619 			}
1620 		}
1621 		else if (*p == '$') {
1622 			*var_expand = true;
1623 		}
1624 		else if (*p == '\\') {
1625 			*need_unescape = true;
1626 			ucl_chunk_skipc (chunk, p);
1627 			if (p < chunk->end) {
1628 				ucl_chunk_skipc (chunk, p);
1629 			}
1630 			continue;
1631 		}
1632 
1633 		if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1634 			break;
1635 		}
1636 		ucl_chunk_skipc (chunk, p);
1637 	}
1638 
1639 	return true;
1640 }
1641 
1642 /**
1643  * Parse multiline string ending with \n{term}\n
1644  * @param parser
1645  * @param chunk
1646  * @param term
1647  * @param term_len
1648  * @param beg
1649  * @param var_expand
1650  * @return size of multiline string or 0 in case of error
1651  */
1652 static int
ucl_parse_multiline_string(struct ucl_parser * parser,struct ucl_chunk * chunk,const unsigned char * term,int term_len,unsigned char const ** beg,bool * var_expand)1653 ucl_parse_multiline_string (struct ucl_parser *parser,
1654 		struct ucl_chunk *chunk, const unsigned char *term,
1655 		int term_len, unsigned char const **beg,
1656 		bool *var_expand)
1657 {
1658 	const unsigned char *p, *c, *tend;
1659 	bool newline = false;
1660 	int len = 0;
1661 
1662 	p = chunk->pos;
1663 
1664 	c = p;
1665 
1666 	while (p < chunk->end) {
1667 		if (newline) {
1668 			if (chunk->end - p < term_len) {
1669 				return 0;
1670 			}
1671 			else if (memcmp (p, term, term_len) == 0) {
1672 				tend = p + term_len;
1673 				if (*tend != '\n' && *tend != ';' && *tend != ',') {
1674 					/* Incomplete terminator */
1675 					ucl_chunk_skipc (chunk, p);
1676 					continue;
1677 				}
1678 				len = p - c;
1679 				chunk->remain -= term_len;
1680 				chunk->pos = p + term_len;
1681 				chunk->column = term_len;
1682 				*beg = c;
1683 				break;
1684 			}
1685 		}
1686 		if (*p == '\n') {
1687 			newline = true;
1688 		}
1689 		else {
1690 			if (*p == '$') {
1691 				*var_expand = true;
1692 			}
1693 			newline = false;
1694 		}
1695 		ucl_chunk_skipc (chunk, p);
1696 	}
1697 
1698 	return len;
1699 }
1700 
1701 static inline ucl_object_t*
ucl_parser_get_container(struct ucl_parser * parser)1702 ucl_parser_get_container (struct ucl_parser *parser)
1703 {
1704 	ucl_object_t *t, *obj = NULL;
1705 
1706 	if (parser == NULL || parser->stack == NULL || parser->stack->obj == NULL) {
1707 		return NULL;
1708 	}
1709 
1710 	if (parser->stack->obj->type == UCL_ARRAY) {
1711 		/* Object must be allocated */
1712 		obj = ucl_object_new_full (UCL_NULL, parser->chunks->priority);
1713 		t = parser->stack->obj;
1714 
1715 		if (!ucl_array_append (t, obj)) {
1716 			ucl_object_unref (obj);
1717 			return NULL;
1718 		}
1719 
1720 		parser->cur_obj = obj;
1721 		ucl_attach_comment (parser, obj, false);
1722 	}
1723 	else {
1724 		/* Object has been already allocated */
1725 		obj = parser->cur_obj;
1726 	}
1727 
1728 	return obj;
1729 }
1730 
1731 /**
1732  * Handle value data
1733  * @param parser
1734  * @param chunk
1735  * @return
1736  */
1737 static bool
ucl_parse_value(struct ucl_parser * parser,struct ucl_chunk * chunk)1738 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
1739 {
1740 	const unsigned char *p, *c;
1741 	ucl_object_t *obj = NULL;
1742 	unsigned int stripped_spaces;
1743 	ssize_t str_len;
1744 	bool need_unescape = false, ucl_escape = false, var_expand = false;
1745 
1746 	p = chunk->pos;
1747 
1748 	/* Skip any spaces and comments */
1749 	if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) ||
1750 			(chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) {
1751 		while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
1752 			ucl_chunk_skipc (chunk, p);
1753 		}
1754 		if (!ucl_skip_comments (parser)) {
1755 			return false;
1756 		}
1757 		p = chunk->pos;
1758 	}
1759 
1760 	while (p < chunk->end) {
1761 		c = p;
1762 		switch (*p) {
1763 		case '"':
1764 			ucl_chunk_skipc (chunk, p);
1765 
1766 			if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape,
1767 					&var_expand)) {
1768 				return false;
1769 			}
1770 
1771 			obj = ucl_parser_get_container (parser);
1772 			if (!obj) {
1773 				return false;
1774 			}
1775 
1776 			str_len = chunk->pos - c - 2;
1777 			obj->type = UCL_STRING;
1778 			if ((str_len = ucl_copy_or_store_ptr (parser, c + 1,
1779 					&obj->trash_stack[UCL_TRASH_VALUE],
1780 					&obj->value.sv, str_len, need_unescape, false,
1781 					var_expand, false)) == -1) {
1782 				return false;
1783 			}
1784 
1785 			obj->len = str_len;
1786 			parser->state = UCL_STATE_AFTER_VALUE;
1787 
1788 			return true;
1789 			break;
1790 		case '\'':
1791 			ucl_chunk_skipc (chunk, p);
1792 
1793 			if (!ucl_lex_squoted_string (parser, chunk, &need_unescape)) {
1794 				return false;
1795 			}
1796 
1797 			obj = ucl_parser_get_container (parser);
1798 			if (!obj) {
1799 				return false;
1800 			}
1801 
1802 			str_len = chunk->pos - c - 2;
1803 			obj->type = UCL_STRING;
1804 			obj->flags |= UCL_OBJECT_SQUOTED;
1805 
1806 			if ((str_len = ucl_copy_or_store_ptr (parser, c + 1,
1807 					&obj->trash_stack[UCL_TRASH_VALUE],
1808 					&obj->value.sv, str_len, need_unescape, false,
1809 					var_expand, true)) == -1) {
1810 				return false;
1811 			}
1812 
1813 			obj->len = str_len;
1814 
1815 			parser->state = UCL_STATE_AFTER_VALUE;
1816 
1817 			return true;
1818 			break;
1819 		case '{':
1820 			obj = ucl_parser_get_container (parser);
1821 			if (obj == NULL) {
1822 				parser->state = UCL_STATE_ERROR;
1823 				ucl_set_err(parser, UCL_ESYNTAX, "object value must be a part of an object",
1824 					&parser->err);
1825 				return false;
1826 			}
1827 			/* We have a new object */
1828 			if (parser->stack) {
1829 				obj = ucl_parser_add_container (obj, parser, false,
1830 						parser->stack->e.params.level, true);
1831 			}
1832 			else {
1833 				return false;
1834 			}
1835 			if (obj == NULL) {
1836 				return false;
1837 			}
1838 
1839 			ucl_chunk_skipc (chunk, p);
1840 
1841 			return true;
1842 			break;
1843 		case '[':
1844 			obj = ucl_parser_get_container (parser);
1845 			if (obj == NULL) {
1846 				parser->state = UCL_STATE_ERROR;
1847 				ucl_set_err(parser, UCL_ESYNTAX, "array value must be a part of an object",
1848 					&parser->err);
1849 				return false;
1850 			}
1851 			/* We have a new array */
1852 			if (parser->stack) {
1853 				obj = ucl_parser_add_container (obj, parser, true,
1854 						parser->stack->e.params.level, true);
1855 			}
1856 			else {
1857 				return false;
1858 			}
1859 
1860 			if (obj == NULL) {
1861 				return false;
1862 			}
1863 
1864 			ucl_chunk_skipc (chunk, p);
1865 
1866 			return true;
1867 			break;
1868 		case ']':
1869 			/* We have the array ending */
1870 			if (parser->stack && parser->stack->obj->type == UCL_ARRAY) {
1871 				parser->state = UCL_STATE_AFTER_VALUE;
1872 				return true;
1873 			}
1874 			else {
1875 				goto parse_string;
1876 			}
1877 			break;
1878 		case '<':
1879 			obj = ucl_parser_get_container (parser);
1880 			if (obj == NULL) {
1881 				parser->state = UCL_STATE_ERROR;
1882 				ucl_set_err(parser, UCL_ESYNTAX, "multiline value must be a part of an object",
1883 						&parser->err);
1884 				return false;
1885 			}
1886 			/* We have something like multiline value, which must be <<[A-Z]+\n */
1887 			if (chunk->end - p > 3) {
1888 				if (memcmp (p, "<<", 2) == 0) {
1889 					p += 2;
1890 					/* We allow only uppercase characters in multiline definitions */
1891 					while (p < chunk->end && *p >= 'A' && *p <= 'Z') {
1892 						p ++;
1893 					}
1894 					if(p == chunk->end) {
1895 						ucl_set_err (parser, UCL_ESYNTAX,
1896 								"unterminated multiline value", &parser->err);
1897 						return false;
1898 					}
1899 					if (*p =='\n') {
1900 						/* Set chunk positions and start multiline parsing */
1901 						chunk->remain -= p - c + 1;
1902 						c += 2;
1903 						chunk->pos = p + 1;
1904 						chunk->column = 0;
1905 						chunk->line ++;
1906 						if ((str_len = ucl_parse_multiline_string (parser, chunk, c,
1907 								p - c, &c, &var_expand)) == 0) {
1908 							ucl_set_err (parser, UCL_ESYNTAX,
1909 									"unterminated multiline value", &parser->err);
1910 							return false;
1911 						}
1912 
1913 						obj->type = UCL_STRING;
1914 						obj->flags |= UCL_OBJECT_MULTILINE;
1915 						if ((str_len = ucl_copy_or_store_ptr (parser, c,
1916 								&obj->trash_stack[UCL_TRASH_VALUE],
1917 								&obj->value.sv, str_len - 1, false,
1918 								false, var_expand, false)) == -1) {
1919 							return false;
1920 						}
1921 						obj->len = str_len;
1922 
1923 						parser->state = UCL_STATE_AFTER_VALUE;
1924 
1925 						return true;
1926 					}
1927 				}
1928 			}
1929 			/* Fallback to ordinary strings */
1930 			/* FALLTHRU */
1931 		default:
1932 parse_string:
1933 			if (obj == NULL) {
1934 				obj = ucl_parser_get_container (parser);
1935 			}
1936 
1937 			if (obj == NULL) {
1938 				parser->state = UCL_STATE_ERROR;
1939 				ucl_set_err(parser, UCL_ESYNTAX, "value must be a part of an object",
1940 					&parser->err);
1941 				return false;
1942 			}
1943 
1944 			/* Parse atom */
1945 			if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) {
1946 				if (!ucl_lex_number (parser, chunk, obj)) {
1947 					if (parser->state == UCL_STATE_ERROR) {
1948 						return false;
1949 					}
1950 				}
1951 				else {
1952 					parser->state = UCL_STATE_AFTER_VALUE;
1953 					return true;
1954 				}
1955 				/* Fallback to normal string */
1956 			}
1957 
1958 			if (!ucl_parse_string_value (parser, chunk, &var_expand,
1959 					&need_unescape)) {
1960 				return false;
1961 			}
1962 			/* Cut trailing spaces */
1963 			stripped_spaces = 0;
1964 			while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces),
1965 					UCL_CHARACTER_WHITESPACE)) {
1966 				stripped_spaces ++;
1967 			}
1968 			str_len = chunk->pos - c - stripped_spaces;
1969 			if (str_len <= 0) {
1970 				ucl_set_err (parser, UCL_ESYNTAX, "string value must not be empty",
1971 						&parser->err);
1972 				return false;
1973 			}
1974 			else if (str_len == 4 && memcmp (c, "null", 4) == 0) {
1975 				obj->len = 0;
1976 				obj->type = UCL_NULL;
1977 			}
1978 			else if (str_len == 3 && memcmp (c, "nan", 3) == 0) {
1979 				obj->len = 0;
1980 				obj->type = UCL_FLOAT;
1981 				obj->value.dv = NAN;
1982 			}
1983 			else if (str_len == 3 && memcmp (c, "inf", 3) == 0) {
1984 				obj->len = 0;
1985 				obj->type = UCL_FLOAT;
1986 				obj->value.dv = INFINITY;
1987 			}
1988 			else if (!ucl_maybe_parse_boolean (obj, c, str_len)) {
1989 				obj->type = UCL_STRING;
1990 				if ((str_len = ucl_copy_or_store_ptr (parser, c,
1991 						&obj->trash_stack[UCL_TRASH_VALUE],
1992 						&obj->value.sv, str_len, need_unescape,
1993 						false, var_expand, false)) == -1) {
1994 					return false;
1995 				}
1996 				obj->len = str_len;
1997 			}
1998 
1999 			parser->state = UCL_STATE_AFTER_VALUE;
2000 
2001 			return true;
2002 			break;
2003 		}
2004 	}
2005 
2006 	return true;
2007 }
2008 
2009 /**
2010  * Handle after value data
2011  * @param parser
2012  * @param chunk
2013  * @return
2014  */
2015 static bool
ucl_parse_after_value(struct ucl_parser * parser,struct ucl_chunk * chunk)2016 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk)
2017 {
2018 	const unsigned char *p;
2019 	bool got_sep = false;
2020 	struct ucl_stack *st;
2021 
2022 	p = chunk->pos;
2023 
2024 	while (p < chunk->end) {
2025 		if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
2026 			/* Skip whitespaces */
2027 			ucl_chunk_skipc (chunk, p);
2028 		}
2029 		else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) {
2030 			/* Skip comment */
2031 			if (!ucl_skip_comments (parser)) {
2032 				return false;
2033 			}
2034 			/* Treat comment as a separator */
2035 			got_sep = true;
2036 			p = chunk->pos;
2037 		}
2038 		else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) {
2039 			if (*p == '}' || *p == ']') {
2040 				if (parser->stack == NULL) {
2041 					ucl_set_err (parser, UCL_ESYNTAX,
2042 							"end of array or object detected without corresponding start",
2043 							&parser->err);
2044 					return false;
2045 				}
2046 				if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) ||
2047 						(*p == ']' && parser->stack->obj->type == UCL_ARRAY)) {
2048 
2049 					/* Pop all nested objects from a stack */
2050 					st = parser->stack;
2051 
2052 					if (!(st->e.params.flags & UCL_STACK_HAS_OBRACE)) {
2053 						parser->err_code = UCL_EUNPAIRED;
2054 						ucl_create_err (&parser->err,
2055 								"%s:%d object closed with } is not opened with { at line %d",
2056 								chunk->fname ? chunk->fname : "memory",
2057 								parser->chunks->line, st->e.params.line);
2058 
2059 						return false;
2060 					}
2061 
2062 					parser->stack = st->next;
2063 					UCL_FREE (sizeof (struct ucl_stack), st);
2064 
2065 					if (parser->cur_obj) {
2066 						ucl_attach_comment (parser, parser->cur_obj, true);
2067 					}
2068 
2069 					while (parser->stack != NULL) {
2070 						st = parser->stack;
2071 
2072 						if (st->next == NULL) {
2073 							break;
2074 						}
2075 						else if (st->next->e.params.level == st->e.params.level) {
2076 							break;
2077 						}
2078 
2079 
2080 						parser->stack = st->next;
2081 						parser->cur_obj = st->obj;
2082 						UCL_FREE (sizeof (struct ucl_stack), st);
2083 					}
2084 				}
2085 				else {
2086 					ucl_set_err (parser, UCL_ESYNTAX,
2087 							"unexpected terminating symbol detected",
2088 							&parser->err);
2089 					return false;
2090 				}
2091 
2092 				if (parser->stack == NULL) {
2093 					/* Ignore everything after a top object */
2094 					return true;
2095 				}
2096 				else {
2097 					ucl_chunk_skipc (chunk, p);
2098 				}
2099 				got_sep = true;
2100 			}
2101 			else {
2102 				/* Got a separator */
2103 				got_sep = true;
2104 				ucl_chunk_skipc (chunk, p);
2105 			}
2106 		}
2107 		else {
2108 			/* Anything else */
2109 			if (!got_sep) {
2110 				ucl_set_err (parser, UCL_ESYNTAX, "delimiter is missing",
2111 						&parser->err);
2112 				return false;
2113 			}
2114 			return true;
2115 		}
2116 	}
2117 
2118 	return true;
2119 }
2120 
2121 static bool
ucl_skip_macro_as_comment(struct ucl_parser * parser,struct ucl_chunk * chunk)2122 ucl_skip_macro_as_comment (struct ucl_parser *parser,
2123 		struct ucl_chunk *chunk)
2124 {
2125 	const unsigned char *p, *c;
2126 	enum {
2127 		macro_skip_start = 0,
2128 		macro_has_symbols,
2129 		macro_has_obrace,
2130 		macro_has_quote,
2131 		macro_has_backslash,
2132 		macro_has_sqbrace,
2133 		macro_save
2134 	} state = macro_skip_start, prev_state = macro_skip_start;
2135 
2136 	p = chunk->pos;
2137 	c = chunk->pos;
2138 
2139 	while (p < chunk->end) {
2140 		switch (state) {
2141 		case macro_skip_start:
2142 			if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) {
2143 				state = macro_has_symbols;
2144 			}
2145 			else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
2146 				state = macro_save;
2147 				continue;
2148 			}
2149 
2150 			ucl_chunk_skipc (chunk, p);
2151 			break;
2152 
2153 		case macro_has_symbols:
2154 			if (*p == '{') {
2155 				state = macro_has_sqbrace;
2156 			}
2157 			else if (*p == '(') {
2158 				state = macro_has_obrace;
2159 			}
2160 			else if (*p == '"') {
2161 				state = macro_has_quote;
2162 			}
2163 			else if (*p == '\n') {
2164 				state = macro_save;
2165 				continue;
2166 			}
2167 
2168 			ucl_chunk_skipc (chunk, p);
2169 			break;
2170 
2171 		case macro_has_obrace:
2172 			if (*p == '\\') {
2173 				prev_state = state;
2174 				state = macro_has_backslash;
2175 			}
2176 			else if (*p == ')') {
2177 				state = macro_has_symbols;
2178 			}
2179 
2180 			ucl_chunk_skipc (chunk, p);
2181 			break;
2182 
2183 		case macro_has_sqbrace:
2184 			if (*p == '\\') {
2185 				prev_state = state;
2186 				state = macro_has_backslash;
2187 			}
2188 			else if (*p == '}') {
2189 				state = macro_save;
2190 			}
2191 
2192 			ucl_chunk_skipc (chunk, p);
2193 			break;
2194 
2195 		case macro_has_quote:
2196 			if (*p == '\\') {
2197 				prev_state = state;
2198 				state = macro_has_backslash;
2199 			}
2200 			else if (*p == '"') {
2201 				state = macro_save;
2202 			}
2203 
2204 			ucl_chunk_skipc (chunk, p);
2205 			break;
2206 
2207 		case macro_has_backslash:
2208 			state = prev_state;
2209 			ucl_chunk_skipc (chunk, p);
2210 			break;
2211 
2212 		case macro_save:
2213 			if (parser->flags & UCL_PARSER_SAVE_COMMENTS) {
2214 				ucl_save_comment (parser, c, p - c);
2215 			}
2216 
2217 			return true;
2218 		}
2219 	}
2220 
2221 	return false;
2222 }
2223 
2224 /**
2225  * Handle macro data
2226  * @param parser
2227  * @param chunk
2228  * @param marco
2229  * @param macro_start
2230  * @param macro_len
2231  * @return
2232  */
2233 static bool
ucl_parse_macro_value(struct ucl_parser * parser,struct ucl_chunk * chunk,struct ucl_macro * macro,unsigned char const ** macro_start,size_t * macro_len)2234 ucl_parse_macro_value (struct ucl_parser *parser,
2235 		struct ucl_chunk *chunk, struct ucl_macro *macro,
2236 		unsigned char const **macro_start, size_t *macro_len)
2237 {
2238 	const unsigned char *p, *c;
2239 	bool need_unescape = false, ucl_escape = false, var_expand = false;
2240 
2241 	p = chunk->pos;
2242 
2243 	switch (*p) {
2244 	case '"':
2245 		/* We have macro value encoded in quotes */
2246 		c = p;
2247 		ucl_chunk_skipc (chunk, p);
2248 		if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) {
2249 			return false;
2250 		}
2251 
2252 		*macro_start = c + 1;
2253 		*macro_len = chunk->pos - c - 2;
2254 		p = chunk->pos;
2255 		break;
2256 	case '{':
2257 		/* We got a multiline macro body */
2258 		ucl_chunk_skipc (chunk, p);
2259 		/* Skip spaces at the beginning */
2260 		while (p < chunk->end) {
2261 			if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
2262 				ucl_chunk_skipc (chunk, p);
2263 			}
2264 			else {
2265 				break;
2266 			}
2267 		}
2268 		c = p;
2269 		while (p < chunk->end) {
2270 			if (*p == '}') {
2271 				break;
2272 			}
2273 			ucl_chunk_skipc (chunk, p);
2274 		}
2275 		*macro_start = c;
2276 		*macro_len = p - c;
2277 		ucl_chunk_skipc (chunk, p);
2278 		break;
2279 	default:
2280 		/* Macro is not enclosed in quotes or braces */
2281 		c = p;
2282 		while (p < chunk->end) {
2283 			if (ucl_lex_is_atom_end (*p)) {
2284 				break;
2285 			}
2286 			ucl_chunk_skipc (chunk, p);
2287 		}
2288 		*macro_start = c;
2289 		*macro_len = p - c;
2290 		break;
2291 	}
2292 
2293 	/* We are at the end of a macro */
2294 	/* Skip ';' and space characters and return to previous state */
2295 	while (p < chunk->end) {
2296 		if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') {
2297 			break;
2298 		}
2299 		ucl_chunk_skipc (chunk, p);
2300 	}
2301 	return true;
2302 }
2303 
2304 /**
2305  * Parse macro arguments as UCL object
2306  * @param parser parser structure
2307  * @param chunk the current data chunk
2308  * @return
2309  */
2310 static ucl_object_t *
ucl_parse_macro_arguments(struct ucl_parser * parser,struct ucl_chunk * chunk)2311 ucl_parse_macro_arguments (struct ucl_parser *parser,
2312 		struct ucl_chunk *chunk)
2313 {
2314 	ucl_object_t *res = NULL;
2315 	struct ucl_parser *params_parser;
2316 	int obraces = 1, ebraces = 0, state = 0;
2317 	const unsigned char *p, *c;
2318 	size_t args_len = 0;
2319 	struct ucl_parser_saved_state saved;
2320 
2321 	saved.column = chunk->column;
2322 	saved.line = chunk->line;
2323 	saved.pos = chunk->pos;
2324 	saved.remain = chunk->remain;
2325 	p = chunk->pos;
2326 
2327 	if (*p != '(' || chunk->remain < 2) {
2328 		return NULL;
2329 	}
2330 
2331 	/* Set begin and start */
2332 	ucl_chunk_skipc (chunk, p);
2333 	c = p;
2334 
2335 	while ((p) < (chunk)->end) {
2336 		switch (state) {
2337 		case 0:
2338 			/* Parse symbols and check for '(', ')' and '"' */
2339 			if (*p == '(') {
2340 				obraces ++;
2341 			}
2342 			else if (*p == ')') {
2343 				ebraces ++;
2344 			}
2345 			else if (*p == '"') {
2346 				state = 1;
2347 			}
2348 			/* Check pairing */
2349 			if (obraces == ebraces) {
2350 				state = 99;
2351 			}
2352 			else {
2353 				args_len ++;
2354 			}
2355 			/* Check overflow */
2356 			if (chunk->remain == 0) {
2357 				goto restore_chunk;
2358 			}
2359 			ucl_chunk_skipc (chunk, p);
2360 			break;
2361 		case 1:
2362 			/* We have quote character, so skip all but quotes */
2363 			if (*p == '"' && *(p - 1) != '\\') {
2364 				state = 0;
2365 			}
2366 			if (chunk->remain == 0) {
2367 				goto restore_chunk;
2368 			}
2369 			args_len ++;
2370 			ucl_chunk_skipc (chunk, p);
2371 			break;
2372 		case 99:
2373 			/*
2374 			 * We have read the full body of arguments, so we need to parse and set
2375 			 * object from that
2376 			 */
2377 			params_parser = ucl_parser_new (parser->flags);
2378 			if (!ucl_parser_add_chunk (params_parser, c, args_len)) {
2379 				ucl_set_err (parser, UCL_ESYNTAX, "macro arguments parsing error",
2380 						&parser->err);
2381 			}
2382 			else {
2383 				res = ucl_parser_get_object (params_parser);
2384 			}
2385 			ucl_parser_free (params_parser);
2386 
2387 			return res;
2388 
2389 			break;
2390 		}
2391 	}
2392 
2393 	return res;
2394 
2395 restore_chunk:
2396 	chunk->column = saved.column;
2397 	chunk->line = saved.line;
2398 	chunk->pos = saved.pos;
2399 	chunk->remain = saved.remain;
2400 
2401 	return NULL;
2402 }
2403 
2404 #define SKIP_SPACES_COMMENTS(parser, chunk, p) do {								\
2405 	while ((p) < (chunk)->end) {												\
2406 		if (!ucl_test_character (*(p), UCL_CHARACTER_WHITESPACE_UNSAFE)) {		\
2407 			if ((chunk)->remain >= 2 && ucl_lex_is_comment ((p)[0], (p)[1])) {	\
2408 				if (!ucl_skip_comments (parser)) {								\
2409 					return false;												\
2410 				}																\
2411 				p = (chunk)->pos;												\
2412 			}																	\
2413 			break;																\
2414 		}																		\
2415 		ucl_chunk_skipc (chunk, p);												\
2416 	}																			\
2417 } while(0)
2418 
2419 /**
2420  * Handle the main states of rcl parser
2421  * @param parser parser structure
2422  * @return true if chunk has been parsed and false in case of error
2423  */
2424 static bool
ucl_state_machine(struct ucl_parser * parser)2425 ucl_state_machine (struct ucl_parser *parser)
2426 {
2427 	ucl_object_t *obj, *macro_args;
2428 	struct ucl_chunk *chunk = parser->chunks;
2429 	const unsigned char *p, *c = NULL, *macro_start = NULL;
2430 	unsigned char *macro_escaped;
2431 	size_t macro_len = 0;
2432 	struct ucl_macro *macro = NULL;
2433 	bool next_key = false, end_of_object = false, got_content = false, ret;
2434 
2435 	if (parser->top_obj == NULL) {
2436 		parser->state = UCL_STATE_INIT;
2437 	}
2438 
2439 	p = chunk->pos;
2440 	while (chunk->pos < chunk->end) {
2441 		switch (parser->state) {
2442 		case UCL_STATE_INIT:
2443 			/*
2444 			 * At the init state we can either go to the parse array or object
2445 			 * if we got [ or { correspondingly or can just treat new data as
2446 			 * a key of newly created object
2447 			 */
2448 			if (!ucl_skip_comments (parser)) {
2449 				parser->prev_state = parser->state;
2450 				parser->state = UCL_STATE_ERROR;
2451 				return false;
2452 			}
2453 			else {
2454 				bool seen_obrace = false;
2455 
2456 				/* Skip any spaces */
2457 				while (p < chunk->end && ucl_test_character (*p,
2458 						UCL_CHARACTER_WHITESPACE_UNSAFE)) {
2459 					ucl_chunk_skipc (chunk, p);
2460 				}
2461 
2462 				p = chunk->pos;
2463 
2464 				if (p < chunk->end) {
2465 					if (*p == '[') {
2466 						parser->state = UCL_STATE_VALUE;
2467 						ucl_chunk_skipc (chunk, p);
2468 						seen_obrace = true;
2469 					}
2470 					else {
2471 
2472 						if (*p == '{') {
2473 							ucl_chunk_skipc (chunk, p);
2474 							parser->state = UCL_STATE_KEY_OBRACE;
2475 							seen_obrace = true;
2476 						}
2477 						else {
2478 							parser->state = UCL_STATE_KEY;
2479 						}
2480 					}
2481 				}
2482 
2483 				if (parser->top_obj == NULL) {
2484 					if (parser->state == UCL_STATE_VALUE) {
2485 						obj = ucl_parser_add_container (NULL, parser, true, 0,
2486 								seen_obrace);
2487 					}
2488 					else {
2489 						obj = ucl_parser_add_container (NULL, parser, false, 0,
2490 								seen_obrace);
2491 					}
2492 
2493 					if (obj == NULL) {
2494 						return false;
2495 					}
2496 
2497 					parser->top_obj = obj;
2498 					parser->cur_obj = obj;
2499 				}
2500 
2501 			}
2502 			break;
2503 		case UCL_STATE_KEY:
2504 		case UCL_STATE_KEY_OBRACE:
2505 			/* Skip any spaces */
2506 			while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) {
2507 				ucl_chunk_skipc (chunk, p);
2508 			}
2509 			if (p == chunk->end || *p == '}') {
2510 				/* We have the end of an object */
2511 				parser->state = UCL_STATE_AFTER_VALUE;
2512 				continue;
2513 			}
2514 			if (parser->stack == NULL) {
2515 				/* No objects are on stack, but we want to parse a key */
2516 				ucl_set_err (parser, UCL_ESYNTAX, "top object is finished but the parser "
2517 						"expects a key", &parser->err);
2518 				parser->prev_state = parser->state;
2519 				parser->state = UCL_STATE_ERROR;
2520 				return false;
2521 			}
2522 
2523 			got_content = false;
2524 
2525 			if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object, &got_content)) {
2526 				parser->prev_state = parser->state;
2527 				parser->state = UCL_STATE_ERROR;
2528 				return false;
2529 			}
2530 
2531 			if (end_of_object) {
2532 				p = chunk->pos;
2533 				parser->state = UCL_STATE_AFTER_VALUE;
2534 				continue;
2535 			}
2536 			else if (parser->state != UCL_STATE_MACRO_NAME) {
2537 				if (next_key && parser->stack->obj->type == UCL_OBJECT) {
2538 					/* Parse more keys and nest objects accordingly */
2539 					obj = ucl_parser_add_container (parser->cur_obj,
2540 							parser,
2541 							false,
2542 							parser->stack->e.params.level + 1,
2543 							parser->state == UCL_STATE_KEY_OBRACE);
2544 					if (obj == NULL) {
2545 						return false;
2546 					}
2547 				}
2548 				else if (got_content) {
2549 					/* Do not switch state if we have not read any content */
2550 					parser->state = UCL_STATE_VALUE;
2551 				}
2552 			}
2553 			else {
2554 				c = chunk->pos;
2555 			}
2556 			p = chunk->pos;
2557 			break;
2558 		case UCL_STATE_VALUE:
2559 			/* We need to check what we do have */
2560 			if (!parser->cur_obj || !ucl_parse_value (parser, chunk)) {
2561 				parser->prev_state = parser->state;
2562 				parser->state = UCL_STATE_ERROR;
2563 				return false;
2564 			}
2565 			/* State is set in ucl_parse_value call */
2566 			p = chunk->pos;
2567 			break;
2568 		case UCL_STATE_AFTER_VALUE:
2569 			if (!ucl_parse_after_value (parser, chunk)) {
2570 				parser->prev_state = parser->state;
2571 				parser->state = UCL_STATE_ERROR;
2572 				return false;
2573 			}
2574 
2575 			if (parser->stack != NULL) {
2576 				if (parser->stack->obj->type == UCL_OBJECT) {
2577 					parser->state = UCL_STATE_KEY;
2578 				}
2579 				else {
2580 					/* Array */
2581 					parser->state = UCL_STATE_VALUE;
2582 				}
2583 			}
2584 			else {
2585 				/* Skip everything at the end */
2586 				return true;
2587 			}
2588 
2589 			p = chunk->pos;
2590 			break;
2591 		case UCL_STATE_MACRO_NAME:
2592 			if (parser->flags & UCL_PARSER_DISABLE_MACRO) {
2593 				if (!ucl_skip_macro_as_comment (parser, chunk)) {
2594 					/* We have invalid macro */
2595 					ucl_create_err (&parser->err,
2596 							"error at %s:%d at column %d: invalid macro",
2597 							chunk->fname ? chunk->fname : "memory",
2598 							chunk->line,
2599 							chunk->column);
2600 					parser->state = UCL_STATE_ERROR;
2601 					return false;
2602 				}
2603 				else {
2604 					p = chunk->pos;
2605 					parser->state = parser->prev_state;
2606 				}
2607 			}
2608 			else {
2609 				if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) &&
2610 						*p != '(') {
2611 					ucl_chunk_skipc (chunk, p);
2612 				}
2613 				else {
2614 					if (c != NULL && p - c > 0) {
2615 						/* We got macro name */
2616 						macro_len = (size_t) (p - c);
2617 						HASH_FIND (hh, parser->macroes, c, macro_len, macro);
2618 						if (macro == NULL) {
2619 							ucl_create_err (&parser->err,
2620 									"error at %s:%d at column %d: "
2621 									"unknown macro: '%.*s', character: '%c'",
2622 									chunk->fname ? chunk->fname : "memory",
2623 									chunk->line,
2624 									chunk->column,
2625 									(int) (p - c),
2626 									c,
2627 									*chunk->pos);
2628 							parser->state = UCL_STATE_ERROR;
2629 							return false;
2630 						}
2631 						/* Now we need to skip all spaces */
2632 						SKIP_SPACES_COMMENTS(parser, chunk, p);
2633 						parser->state = UCL_STATE_MACRO;
2634 					}
2635 					else {
2636 						/* We have invalid macro name */
2637 						ucl_create_err (&parser->err,
2638 								"error at %s:%d at column %d: invalid macro name",
2639 								chunk->fname ? chunk->fname : "memory",
2640 								chunk->line,
2641 								chunk->column);
2642 						parser->state = UCL_STATE_ERROR;
2643 						return false;
2644 					}
2645 				}
2646 			}
2647 			break;
2648 		case UCL_STATE_MACRO:
2649 			if (*chunk->pos == '(') {
2650 				macro_args = ucl_parse_macro_arguments (parser, chunk);
2651 				p = chunk->pos;
2652 				if (macro_args) {
2653 					SKIP_SPACES_COMMENTS(parser, chunk, p);
2654 				}
2655 			}
2656 			else {
2657 				macro_args = NULL;
2658 			}
2659 			if (!ucl_parse_macro_value (parser, chunk, macro,
2660 					&macro_start, &macro_len)) {
2661 				parser->prev_state = parser->state;
2662 				parser->state = UCL_STATE_ERROR;
2663 				return false;
2664 			}
2665 			macro_len = ucl_expand_variable (parser, &macro_escaped,
2666 					macro_start, macro_len);
2667 			parser->state = parser->prev_state;
2668 
2669 			if (macro_escaped == NULL && macro != NULL) {
2670 				if (macro->is_context) {
2671 					ret = macro->h.context_handler (macro_start, macro_len,
2672 							macro_args,
2673 							parser->top_obj,
2674 							macro->ud);
2675 				}
2676 				else {
2677 					ret = macro->h.handler (macro_start, macro_len, macro_args,
2678 							macro->ud);
2679 				}
2680 			}
2681 			else if (macro != NULL) {
2682 				if (macro->is_context) {
2683 					ret = macro->h.context_handler (macro_escaped, macro_len,
2684 							macro_args,
2685 							parser->top_obj,
2686 							macro->ud);
2687 				}
2688 				else {
2689 					ret = macro->h.handler (macro_escaped, macro_len, macro_args,
2690 						macro->ud);
2691 				}
2692 
2693 				UCL_FREE (macro_len + 1, macro_escaped);
2694 			}
2695 			else {
2696 				ret = false;
2697 				ucl_set_err (parser, UCL_EINTERNAL,
2698 						"internal error: parser has macro undefined", &parser->err);
2699 			}
2700 
2701 			/*
2702 			 * Chunk can be modified within macro handler
2703 			 */
2704 			chunk = parser->chunks;
2705 			p = chunk->pos;
2706 
2707 			if (macro_args) {
2708 				ucl_object_unref (macro_args);
2709 			}
2710 
2711 			if (!ret) {
2712 				return false;
2713 			}
2714 			break;
2715 		case UCL_STATE_ERROR:
2716 			/* Already in the error state */
2717 			return false;
2718 		default:
2719 			ucl_set_err (parser, UCL_EINTERNAL,
2720 					"internal error: parser is in an unknown state", &parser->err);
2721 			parser->state = UCL_STATE_ERROR;
2722 			return false;
2723 		}
2724 	}
2725 
2726 	if (parser->last_comment) {
2727 		if (parser->cur_obj) {
2728 			ucl_attach_comment (parser, parser->cur_obj, true);
2729 		}
2730 		else if (parser->stack && parser->stack->obj) {
2731 			ucl_attach_comment (parser, parser->stack->obj, true);
2732 		}
2733 		else if (parser->top_obj) {
2734 			ucl_attach_comment (parser, parser->top_obj, true);
2735 		}
2736 		else {
2737 			ucl_object_unref (parser->last_comment);
2738 		}
2739 	}
2740 
2741 	if (parser->stack != NULL && parser->state != UCL_STATE_ERROR) {
2742 		struct ucl_stack *st;
2743 		bool has_error = false;
2744 
2745 		LL_FOREACH (parser->stack, st) {
2746 			if (st->chunk != parser->chunks) {
2747 				break; /* Not our chunk, give up */
2748 			}
2749 			if (st->e.params.flags & UCL_STACK_HAS_OBRACE) {
2750 				if (parser->err == NULL) {
2751 					utstring_new (parser->err);
2752 				}
2753 
2754 				utstring_printf (parser->err, "%s:%d unmatched open brace at %d; ",
2755 						chunk->fname ? chunk->fname : "memory",
2756 						parser->chunks->line,
2757 						st->e.params.line);
2758 
2759 				has_error = true;
2760 			}
2761 		}
2762 
2763 		if (has_error) {
2764 			parser->err_code = UCL_EUNPAIRED;
2765 
2766 			return false;
2767 		}
2768 	}
2769 
2770 	return true;
2771 }
2772 
2773 #define UPRM_SAFE(fn, a, b, c, el) do { \
2774 		if (!fn(a, b, c, a)) \
2775 			goto el; \
2776 	} while (0)
2777 
2778 struct ucl_parser*
ucl_parser_new(int flags)2779 ucl_parser_new (int flags)
2780 {
2781 	struct ucl_parser *parser;
2782 
2783 	parser = UCL_ALLOC (sizeof (struct ucl_parser));
2784 	if (parser == NULL) {
2785 		return NULL;
2786 	}
2787 
2788 	memset (parser, 0, sizeof (struct ucl_parser));
2789 
2790 	UPRM_SAFE(ucl_parser_register_macro, parser, "include", ucl_include_handler, e0);
2791 	UPRM_SAFE(ucl_parser_register_macro, parser, "try_include", ucl_try_include_handler, e0);
2792 	UPRM_SAFE(ucl_parser_register_macro, parser, "includes", ucl_includes_handler, e0);
2793 	UPRM_SAFE(ucl_parser_register_macro, parser, "priority", ucl_priority_handler, e0);
2794 	UPRM_SAFE(ucl_parser_register_macro, parser, "load", ucl_load_handler, e0);
2795 	UPRM_SAFE(ucl_parser_register_context_macro, parser, "inherit", ucl_inherit_handler, e0);
2796 
2797 	parser->flags = flags;
2798 	parser->includepaths = NULL;
2799 
2800 	if (flags & UCL_PARSER_SAVE_COMMENTS) {
2801 		parser->comments = ucl_object_typed_new (UCL_OBJECT);
2802 	}
2803 
2804 	if (!(flags & UCL_PARSER_NO_FILEVARS)) {
2805 		/* Initial assumption about filevars */
2806 		ucl_parser_set_filevars (parser, NULL, false);
2807 	}
2808 
2809 	return parser;
2810 e0:
2811 	ucl_parser_free(parser);
2812 	return NULL;
2813 }
2814 
2815 bool
ucl_parser_set_default_priority(struct ucl_parser * parser,unsigned prio)2816 ucl_parser_set_default_priority (struct ucl_parser *parser, unsigned prio)
2817 {
2818 	if (parser == NULL) {
2819 		return false;
2820 	}
2821 
2822 	parser->default_priority = prio;
2823 
2824 	return true;
2825 }
2826 
2827 int
ucl_parser_get_default_priority(struct ucl_parser * parser)2828 ucl_parser_get_default_priority (struct ucl_parser *parser)
2829 {
2830 	if (parser == NULL) {
2831 		return -1;
2832 	}
2833 
2834 	return parser->default_priority;
2835 }
2836 
2837 bool
ucl_parser_register_macro(struct ucl_parser * parser,const char * macro,ucl_macro_handler handler,void * ud)2838 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro,
2839 		ucl_macro_handler handler, void* ud)
2840 {
2841 	struct ucl_macro *new;
2842 
2843 	if (macro == NULL || handler == NULL) {
2844 		return false;
2845 	}
2846 
2847 	new = UCL_ALLOC (sizeof (struct ucl_macro));
2848 	if (new == NULL) {
2849 		return false;
2850 	}
2851 
2852 	memset (new, 0, sizeof (struct ucl_macro));
2853 	new->h.handler = handler;
2854 	new->name = strdup (macro);
2855 	if (new->name == NULL) {
2856 		UCL_FREE (sizeof (struct ucl_macro), new);
2857 		return false;
2858 	}
2859 	new->ud = ud;
2860 	HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
2861 	return true;
2862 }
2863 
2864 bool
ucl_parser_register_context_macro(struct ucl_parser * parser,const char * macro,ucl_context_macro_handler handler,void * ud)2865 ucl_parser_register_context_macro (struct ucl_parser *parser, const char *macro,
2866 		ucl_context_macro_handler handler, void* ud)
2867 {
2868 	struct ucl_macro *new;
2869 
2870 	if (macro == NULL || handler == NULL) {
2871 		return false;
2872 	}
2873 
2874 	new = UCL_ALLOC (sizeof (struct ucl_macro));
2875 	if (new == NULL) {
2876 		return false;
2877 	}
2878 
2879 	memset (new, 0, sizeof (struct ucl_macro));
2880 	new->h.context_handler = handler;
2881 	new->name = strdup (macro);
2882 	if (new->name == NULL) {
2883 		UCL_FREE (sizeof (struct ucl_macro), new);
2884 		return false;
2885 	}
2886 	new->ud = ud;
2887 	new->is_context = true;
2888 	HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new);
2889 	return true;
2890 }
2891 
2892 void
ucl_parser_register_variable(struct ucl_parser * parser,const char * var,const char * value)2893 ucl_parser_register_variable (struct ucl_parser *parser, const char *var,
2894 		const char *value)
2895 {
2896 	struct ucl_variable *new = NULL, *cur;
2897 
2898 	if (var == NULL) {
2899 		return;
2900 	}
2901 
2902 	/* Find whether a variable already exists */
2903 	LL_FOREACH (parser->variables, cur) {
2904 		if (strcmp (cur->var, var) == 0) {
2905 			new = cur;
2906 			break;
2907 		}
2908 	}
2909 
2910 	if (value == NULL) {
2911 
2912 		if (new != NULL) {
2913 			/* Remove variable */
2914 			DL_DELETE (parser->variables, new);
2915 			free (new->var);
2916 			free (new->value);
2917 			UCL_FREE (sizeof (struct ucl_variable), new);
2918 		}
2919 		else {
2920 			/* Do nothing */
2921 			return;
2922 		}
2923 	}
2924 	else {
2925 		if (new == NULL) {
2926 			new = UCL_ALLOC (sizeof (struct ucl_variable));
2927 			if (new == NULL) {
2928 				return;
2929 			}
2930 			memset (new, 0, sizeof (struct ucl_variable));
2931 			new->var = strdup (var);
2932 			new->var_len = strlen (var);
2933 			new->value = strdup (value);
2934 			new->value_len = strlen (value);
2935 
2936 			DL_APPEND (parser->variables, new);
2937 		}
2938 		else {
2939 			free (new->value);
2940 			new->value = strdup (value);
2941 			new->value_len = strlen (value);
2942 		}
2943 	}
2944 }
2945 
2946 void
ucl_parser_set_variables_handler(struct ucl_parser * parser,ucl_variable_handler handler,void * ud)2947 ucl_parser_set_variables_handler (struct ucl_parser *parser,
2948 		ucl_variable_handler handler, void *ud)
2949 {
2950 	parser->var_handler = handler;
2951 	parser->var_data = ud;
2952 }
2953 
2954 bool
ucl_parser_add_chunk_full(struct ucl_parser * parser,const unsigned char * data,size_t len,unsigned priority,enum ucl_duplicate_strategy strat,enum ucl_parse_type parse_type)2955 ucl_parser_add_chunk_full (struct ucl_parser *parser, const unsigned char *data,
2956 		size_t len, unsigned priority, enum ucl_duplicate_strategy strat,
2957 		enum ucl_parse_type parse_type)
2958 {
2959 	struct ucl_chunk *chunk;
2960 	struct ucl_parser_special_handler *special_handler;
2961 
2962 	if (parser == NULL) {
2963 		return false;
2964 	}
2965 
2966 	if (data == NULL && len != 0) {
2967 		ucl_create_err (&parser->err, "invalid chunk added");
2968 		return false;
2969 	}
2970 
2971 	if (parser->state != UCL_STATE_ERROR) {
2972 		chunk = UCL_ALLOC (sizeof (struct ucl_chunk));
2973 		if (chunk == NULL) {
2974 			ucl_create_err (&parser->err, "cannot allocate chunk structure");
2975 			return false;
2976 		}
2977 
2978 		memset (chunk, 0, sizeof (*chunk));
2979 
2980 		/* Apply all matching handlers from the first to the last */
2981 		LL_FOREACH (parser->special_handlers, special_handler) {
2982 			if ((special_handler->flags & UCL_SPECIAL_HANDLER_PREPROCESS_ALL) ||
2983 					(len >= special_handler->magic_len &&
2984 					 memcmp (data, special_handler->magic, special_handler->magic_len) == 0)) {
2985 				unsigned char *ndata = NULL;
2986 				size_t nlen = 0;
2987 
2988 				if (!special_handler->handler (parser, data, len, &ndata, &nlen,
2989 						special_handler->user_data)) {
2990 					UCL_FREE(sizeof (struct ucl_chunk), chunk);
2991 					ucl_create_err (&parser->err, "call for external handler failed");
2992 
2993 					return false;
2994 				}
2995 
2996 				struct ucl_parser_special_handler_chain *nchain;
2997 				nchain = UCL_ALLOC (sizeof (*nchain));
2998 				nchain->begin = ndata;
2999 				nchain->len = nlen;
3000 				nchain->special_handler = special_handler;
3001 
3002 				/* Free order is reversed */
3003 				LL_PREPEND (chunk->special_handlers, nchain);
3004 
3005 				data = ndata;
3006 				len = nlen;
3007 			}
3008 		}
3009 
3010 		if (parse_type == UCL_PARSE_AUTO && len > 0) {
3011 			/* We need to detect parse type by the first symbol */
3012 			if ((*data & 0x80) == 0x80) {
3013 				parse_type = UCL_PARSE_MSGPACK;
3014 			}
3015 			else if (*data == '(') {
3016 				parse_type = UCL_PARSE_CSEXP;
3017 			}
3018 			else {
3019 				parse_type = UCL_PARSE_UCL;
3020 			}
3021 		}
3022 
3023 		chunk->begin = data;
3024 		chunk->remain = len;
3025 		chunk->pos = chunk->begin;
3026 		chunk->end = chunk->begin + len;
3027 		chunk->line = 1;
3028 		chunk->column = 0;
3029 		chunk->priority = priority;
3030 		chunk->strategy = strat;
3031 		chunk->parse_type = parse_type;
3032 
3033 		if (parser->cur_file) {
3034 			chunk->fname = strdup (parser->cur_file);
3035 		}
3036 
3037 		LL_PREPEND (parser->chunks, chunk);
3038 		parser->recursion ++;
3039 
3040 		if (parser->recursion > UCL_MAX_RECURSION) {
3041 			ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d",
3042 					parser->recursion);
3043 			return false;
3044 		}
3045 
3046 		if (len > 0) {
3047 			/* Need to parse something */
3048 			switch (parse_type) {
3049 			default:
3050 			case UCL_PARSE_UCL:
3051 				return ucl_state_machine (parser);
3052 			case UCL_PARSE_MSGPACK:
3053 				return ucl_parse_msgpack (parser);
3054 			case UCL_PARSE_CSEXP:
3055 				return ucl_parse_csexp (parser);
3056 			}
3057 		}
3058 		else {
3059 			/* Just add empty chunk and go forward */
3060 			if (parser->top_obj == NULL) {
3061 				/*
3062 				 * In case of empty object, create one to indicate that we've
3063 				 * read something
3064 				 */
3065 				parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority);
3066 			}
3067 
3068 			return true;
3069 		}
3070 	}
3071 
3072 	ucl_create_err (&parser->err, "a parser is in an invalid state");
3073 
3074 	return false;
3075 }
3076 
3077 bool
ucl_parser_add_chunk_priority(struct ucl_parser * parser,const unsigned char * data,size_t len,unsigned priority)3078 ucl_parser_add_chunk_priority (struct ucl_parser *parser,
3079 		const unsigned char *data, size_t len, unsigned priority)
3080 {
3081 	/* We dereference parser, so this check is essential */
3082 	if (parser == NULL) {
3083 		return false;
3084 	}
3085 
3086 	return ucl_parser_add_chunk_full (parser, data, len,
3087 				priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL);
3088 }
3089 
3090 bool
ucl_parser_add_chunk(struct ucl_parser * parser,const unsigned char * data,size_t len)3091 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data,
3092 		size_t len)
3093 {
3094 	if (parser == NULL) {
3095 		return false;
3096 	}
3097 
3098 	return ucl_parser_add_chunk_full (parser, data, len,
3099 			parser->default_priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL);
3100 }
3101 
3102 bool
ucl_parser_insert_chunk(struct ucl_parser * parser,const unsigned char * data,size_t len)3103 ucl_parser_insert_chunk (struct ucl_parser *parser, const unsigned char *data,
3104 		size_t len)
3105 {
3106 	if (parser == NULL || parser->top_obj == NULL) {
3107 		return false;
3108 	}
3109 
3110 	bool res;
3111 	struct ucl_chunk *chunk;
3112 
3113 	int state = parser->state;
3114 	parser->state = UCL_STATE_INIT;
3115 
3116 	/* Prevent inserted chunks from unintentionally closing the current object */
3117 	if (parser->stack != NULL && parser->stack->next != NULL) {
3118 		parser->stack->e.params.level = parser->stack->next->e.params.level;
3119 	}
3120 
3121 	res = ucl_parser_add_chunk_full (parser, data, len, parser->chunks->priority,
3122 					parser->chunks->strategy, parser->chunks->parse_type);
3123 
3124 	/* Remove chunk from the stack */
3125 	chunk = parser->chunks;
3126 	if (chunk != NULL) {
3127 		parser->chunks = chunk->next;
3128 		ucl_chunk_free (chunk);
3129 		parser->recursion --;
3130 	}
3131 
3132 	parser->state = state;
3133 
3134 	return res;
3135 }
3136 
3137 bool
ucl_parser_add_string_priority(struct ucl_parser * parser,const char * data,size_t len,unsigned priority)3138 ucl_parser_add_string_priority (struct ucl_parser *parser, const char *data,
3139 		size_t len, unsigned priority)
3140 {
3141 	if (data == NULL) {
3142 		ucl_create_err (&parser->err, "invalid string added");
3143 		return false;
3144 	}
3145 	if (len == 0) {
3146 		len = strlen (data);
3147 	}
3148 
3149 	return ucl_parser_add_chunk_priority (parser,
3150 			(const unsigned char *)data, len, priority);
3151 }
3152 
3153 bool
ucl_parser_add_string(struct ucl_parser * parser,const char * data,size_t len)3154 ucl_parser_add_string (struct ucl_parser *parser, const char *data,
3155 		size_t len)
3156 {
3157 	if (parser == NULL) {
3158 		return false;
3159 	}
3160 
3161 	return ucl_parser_add_string_priority (parser,
3162 			(const unsigned char *)data, len, parser->default_priority);
3163 }
3164 
3165 bool
ucl_set_include_path(struct ucl_parser * parser,ucl_object_t * paths)3166 ucl_set_include_path (struct ucl_parser *parser, ucl_object_t *paths)
3167 {
3168 	if (parser == NULL || paths == NULL) {
3169 		return false;
3170 	}
3171 
3172 	if (parser->includepaths == NULL) {
3173 		parser->includepaths = ucl_object_copy (paths);
3174 	}
3175 	else {
3176 		ucl_object_unref (parser->includepaths);
3177 		parser->includepaths = ucl_object_copy (paths);
3178 	}
3179 
3180 	if (parser->includepaths == NULL) {
3181 		return false;
3182 	}
3183 
3184 	return true;
3185 }
3186 
ucl_parser_chunk_peek(struct ucl_parser * parser)3187 unsigned char ucl_parser_chunk_peek (struct ucl_parser *parser)
3188 {
3189 	if (parser == NULL || parser->chunks == NULL || parser->chunks->pos == NULL || parser->chunks->end == NULL ||
3190 		parser->chunks->pos == parser->chunks->end) {
3191 		return 0;
3192 	}
3193 
3194 	return( *parser->chunks->pos );
3195 }
3196 
ucl_parser_chunk_skip(struct ucl_parser * parser)3197 bool ucl_parser_chunk_skip (struct ucl_parser *parser)
3198 {
3199 	if (parser == NULL || parser->chunks == NULL || parser->chunks->pos == NULL || parser->chunks->end == NULL ||
3200 		parser->chunks->pos == parser->chunks->end) {
3201 		return false;
3202 	}
3203 
3204 	const unsigned char *p = parser->chunks->pos;
3205 	ucl_chunk_skipc( parser->chunks, p );
3206 	if( parser->chunks->pos != NULL ) return true;
3207 	return false;
3208 }
3209 
3210 ucl_object_t*
ucl_parser_get_current_stack_object(struct ucl_parser * parser,unsigned int depth)3211 ucl_parser_get_current_stack_object (struct ucl_parser *parser, unsigned int depth)
3212 {
3213 	ucl_object_t *obj;
3214 
3215 	if (parser == NULL || parser->stack == NULL) {
3216 		return NULL;
3217 	}
3218 
3219 	struct ucl_stack *stack = parser->stack;
3220 	if(stack == NULL || stack->obj == NULL || ucl_object_type (stack->obj) != UCL_OBJECT)
3221 	{
3222 		return NULL;
3223 	}
3224 
3225 	for( unsigned int i = 0; i < depth; ++i )
3226 	{
3227 		stack = stack->next;
3228 		if(stack == NULL || stack->obj == NULL || ucl_object_type (stack->obj) != UCL_OBJECT)
3229 		{
3230 			return NULL;
3231 		}
3232 	}
3233 
3234 	obj = ucl_object_ref (stack->obj);
3235 	return obj;
3236 }
3237 
3238