xref: /freebsd/contrib/libucl/src/ucl_internal.h (revision abda442d92fdbadcf81c79bc9ddba001d133c429)
1 /* Copyright (c) 2013, Vsevolod Stakhov
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *       * Redistributions of source code must retain the above copyright
7  *         notice, this list of conditions and the following disclaimer.
8  *       * Redistributions in binary form must reproduce the above copyright
9  *         notice, this list of conditions and the following disclaimer in the
10  *         documentation and/or other materials provided with the distribution.
11  *
12  * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15  * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22  */
23 
24 #ifndef UCL_INTERNAL_H_
25 #define UCL_INTERNAL_H_
26 
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #else
30 /* Help embedded builds */
31 #define HAVE_SYS_TYPES_H
32 #define HAVE_SYS_MMAN_H
33 #define HAVE_SYS_STAT_H
34 #define HAVE_SYS_PARAM_H
35 #define HAVE_LIMITS_H
36 #define HAVE_FCNTL_H
37 #define HAVE_ERRNO_H
38 #define HAVE_UNISTD_H
39 #define HAVE_CTYPE_H
40 #define HAVE_STDIO_H
41 #define HAVE_STRING_H
42 #define HAVE_FLOAT_H
43 #define HAVE_LIBGEN_H
44 #define HAVE_MATH_H
45 #define HAVE_STDBOOL_H
46 #define HAVE_STDINT_H
47 #define HAVE_STDARG_H
48 #ifndef _WIN32
49 #define HAVE_REGEX_H
50 #endif
51 #endif
52 
53 #ifdef HAVE_SYS_TYPES_H
54 #include <sys/types.h>
55 #endif
56 
57 #ifdef HAVE_SYS_MMAN_H
58 #ifndef _WIN32
59 #include <sys/mman.h>
60 #endif
61 #endif
62 #ifdef HAVE_SYS_STAT_H
63 #include <sys/stat.h>
64 #endif
65 #ifdef HAVE_SYS_PARAM_H
66 #ifndef _WIN32
67 #include <sys/param.h>
68 #endif
69 #endif
70 
71 #ifdef HAVE_LIMITS_H
72 #include <limits.h>
73 #endif
74 #ifdef HAVE_FCNTL_H
75 #include <fcntl.h>
76 #endif
77 #ifdef HAVE_ERRNO_H
78 #include <errno.h>
79 #endif
80 #ifdef HAVE_UNISTD_H
81 #ifndef _WIN32
82 #include <unistd.h>
83 #endif
84 #endif
85 #ifdef HAVE_CTYPE_H
86 #include <ctype.h>
87 #endif
88 #ifdef HAVE_STDIO_H
89 #include <stdio.h>
90 #endif
91 #ifdef HAVE_STRING_H
92 #include <string.h>
93 #endif
94 #ifdef HAVE_STRINGS_H
95 #include <strings.h>
96 #endif
97 
98 #if defined(_MSC_VER)
99 /* Windows hacks */
100 #include <BaseTsd.h>
101 #include <inttypes.h>
102 typedef SSIZE_T ssize_t;
103 #define strdup _strdup
104 #define snprintf _snprintf
105 #define vsnprintf _vsnprintf
106 #define strcasecmp _stricmp
107 #define strncasecmp _strnicmp
108 #define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
109 #define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
110 #if _MSC_VER >= 1900
111 #include <../ucrt/stdlib.h>
112 #else
113 #include <../include/stdlib.h>
114 #endif
115 #ifndef PATH_MAX
116 #define PATH_MAX _MAX_PATH
117 #endif
118 
119 /* Dirname, basename implementations */
120 
121 
122 #endif
123 
124 #include "utlist.h"
125 #include "utstring.h"
126 #include "uthash.h"
127 #include "ucl.h"
128 #include "ucl_hash.h"
129 
130 #ifdef HAVE_OPENSSL
131 #include <openssl/evp.h>
132 #endif
133 
134 #ifndef __DECONST
135 #define __DECONST(type, var) ((type) (uintptr_t) (const void *) (var))
136 #endif
137 
138 /**
139  * @file rcl_internal.h
140  * Internal structures and functions of UCL library
141  */
142 
143 #define UCL_MAX_RECURSION 16
144 #define UCL_TRASH_KEY 0
145 #define UCL_TRASH_VALUE 1
146 
147 enum ucl_parser_state {
148 	UCL_STATE_INIT = 0,
149 	UCL_STATE_OBJECT,
150 	UCL_STATE_ARRAY,
151 	UCL_STATE_KEY,
152 	UCL_STATE_KEY_OBRACE,
153 	UCL_STATE_VALUE,
154 	UCL_STATE_AFTER_VALUE,
155 	UCL_STATE_ARRAY_VALUE,
156 	UCL_STATE_SCOMMENT,
157 	UCL_STATE_MCOMMENT,
158 	UCL_STATE_MACRO_NAME,
159 	UCL_STATE_MACRO,
160 	UCL_STATE_ERROR
161 };
162 
163 enum ucl_character_type {
164 	UCL_CHARACTER_DENIED = (1 << 0),
165 	UCL_CHARACTER_KEY = (1 << 1),
166 	UCL_CHARACTER_KEY_START = (1 << 2),
167 	UCL_CHARACTER_WHITESPACE = (1 << 3),
168 	UCL_CHARACTER_WHITESPACE_UNSAFE = (1 << 4),
169 	UCL_CHARACTER_VALUE_END = (1 << 5),
170 	UCL_CHARACTER_VALUE_STR = (1 << 6),
171 	UCL_CHARACTER_VALUE_DIGIT = (1 << 7),
172 	UCL_CHARACTER_VALUE_DIGIT_START = (1 << 8),
173 	UCL_CHARACTER_ESCAPE = (1 << 9),
174 	UCL_CHARACTER_KEY_SEP = (1 << 10),
175 	UCL_CHARACTER_JSON_UNSAFE = (1 << 11),
176 	UCL_CHARACTER_UCL_UNSAFE = (1 << 12)
177 };
178 
179 struct ucl_macro {
180 	char *name;
181 	union _ucl_macro {
182 		ucl_macro_handler handler;
183 		ucl_context_macro_handler context_handler;
184 	} h;
185 	void *ud;
186 	bool is_context;
187 	UT_hash_handle hh;
188 };
189 
190 enum ucl_stack_flags {
191 	UCL_STACK_HAS_OBRACE = (1u << 0),
192 	UCL_STACK_AUTOMATIC = (1u << 1),
193 	UCL_STACK_MAX = (1u << 2),
194 };
195 
196 struct ucl_stack {
197 	ucl_object_t *obj;
198 	struct ucl_stack *next;
199 	union {
200 		struct {
201 			uint16_t level;
202 			uint16_t flags;
203 			uint32_t line;
204 		} params;
205 		uint64_t len;
206 	} e;
207 	struct ucl_chunk *chunk;
208 };
209 
210 struct ucl_parser_special_handler_chain {
211 	unsigned char *begin;
212 	size_t len;
213 	struct ucl_parser_special_handler *special_handler;
214 	struct ucl_parser_special_handler_chain *next;
215 };
216 
217 struct ucl_chunk {
218 	const unsigned char *begin;
219 	const unsigned char *end;
220 	const unsigned char *pos;
221 	char *fname;
222 	size_t remain;
223 	unsigned int line;
224 	unsigned int column;
225 	unsigned priority;
226 	enum ucl_duplicate_strategy strategy;
227 	enum ucl_parse_type parse_type;
228 	struct ucl_parser_special_handler_chain *special_handlers;
229 	struct ucl_chunk *next;
230 };
231 
232 #ifdef HAVE_OPENSSL
233 struct ucl_pubkey {
234 	EVP_PKEY *key;
235 	struct ucl_pubkey *next;
236 };
237 #else
238 struct ucl_pubkey {
239 	struct ucl_pubkey *next;
240 };
241 #endif
242 
243 struct ucl_variable {
244 	char *var;
245 	char *value;
246 	size_t var_len;
247 	size_t value_len;
248 	struct ucl_variable *prev, *next;
249 };
250 
251 struct ucl_parser {
252 	enum ucl_parser_state state;
253 	enum ucl_parser_state prev_state;
254 	unsigned int recursion;
255 	int flags;
256 	unsigned default_priority;
257 	int err_code;
258 	ucl_object_t *top_obj;
259 	ucl_object_t *cur_obj;
260 	ucl_object_t *trash_objs;
261 	ucl_object_t *includepaths;
262 	char *cur_file;
263 	struct ucl_macro *macroes;
264 	struct ucl_stack *stack;
265 	struct ucl_chunk *chunks;
266 	struct ucl_pubkey *keys;
267 	struct ucl_parser_special_handler *special_handlers;
268 	ucl_include_trace_func_t *include_trace_func;
269 	void *include_trace_ud;
270 	struct ucl_variable *variables;
271 	ucl_variable_handler var_handler;
272 	void *var_data;
273 	ucl_object_t *comments;
274 	ucl_object_t *last_comment;
275 	UT_string *err;
276 };
277 
278 struct ucl_object_userdata {
279 	ucl_object_t obj;
280 	ucl_userdata_dtor dtor;
281 	ucl_userdata_emitter emitter;
282 };
283 
284 /**
285  * Unescape json string inplace
286  * @param str
287  */
288 size_t ucl_unescape_json_string(char *str, size_t len);
289 
290 
291 /**
292  * Unescape single quoted string inplace
293  * @param str
294  */
295 size_t ucl_unescape_squoted_string(char *str, size_t len);
296 
297 /**
298  * Handle include macro
299  * @param data include data
300  * @param len length of data
301  * @param args UCL object representing arguments to the macro
302  * @param ud user data
303  * @return
304  */
305 bool ucl_include_handler(const unsigned char *data, size_t len,
306 						 const ucl_object_t *args, void *ud);
307 
308 /**
309  * Handle tryinclude macro
310  * @param data include data
311  * @param len length of data
312  * @param args UCL object representing arguments to the macro
313  * @param ud user data
314  * @return
315  */
316 bool ucl_try_include_handler(const unsigned char *data, size_t len,
317 							 const ucl_object_t *args, void *ud);
318 
319 /**
320  * Handle includes macro
321  * @param data include data
322  * @param len length of data
323  * @param args UCL object representing arguments to the macro
324  * @param ud user data
325  * @return
326  */
327 bool ucl_includes_handler(const unsigned char *data, size_t len,
328 						  const ucl_object_t *args, void *ud);
329 
330 /**
331  * Handle priority macro
332  * @param data include data
333  * @param len length of data
334  * @param args UCL object representing arguments to the macro
335  * @param ud user data
336  * @return
337  */
338 bool ucl_priority_handler(const unsigned char *data, size_t len,
339 						  const ucl_object_t *args, void *ud);
340 
341 /**
342  * Handle load macro
343  * @param data include data
344  * @param len length of data
345  * @param args UCL object representing arguments to the macro
346  * @param ud user data
347  * @return
348  */
349 bool ucl_load_handler(const unsigned char *data, size_t len,
350 					  const ucl_object_t *args, void *ud);
351 /**
352  * Handle inherit macro
353  * @param data include data
354  * @param len length of data
355  * @param args UCL object representing arguments to the macro
356  * @param ctx the current context object
357  * @param ud user data
358  * @return
359  */
360 bool ucl_inherit_handler(const unsigned char *data, size_t len,
361 						 const ucl_object_t *args, const ucl_object_t *ctx, void *ud);
362 
363 size_t ucl_strlcpy(char *dst, const char *src, size_t siz);
364 size_t ucl_strlcpy_unsafe(char *dst, const char *src, size_t siz);
365 size_t ucl_strlcpy_tolower(char *dst, const char *src, size_t siz);
366 
367 char *ucl_strnstr(const char *s, const char *find, int len);
368 char *ucl_strncasestr(const char *s, const char *find, int len);
369 
370 #ifdef __GNUC__
371 static inline void
372 ucl_create_err(UT_string **err, const char *fmt, ...)
373 	__attribute__((format(printf, 2, 3)));
374 #endif
375 
376 #undef UCL_FATAL_ERRORS
377 
378 static inline void
ucl_create_err(UT_string ** err,const char * fmt,...)379 ucl_create_err(UT_string **err, const char *fmt, ...)
380 {
381 	if (*err == NULL) {
382 		utstring_new(*err);
383 		va_list ap;
384 		va_start(ap, fmt);
385 		utstring_printf_va(*err, fmt, ap);
386 		va_end(ap);
387 	}
388 
389 #ifdef UCL_FATAL_ERRORS
390 	assert(0);
391 #endif
392 }
393 
394 /**
395  * Check whether a given string contains a boolean value
396  * @param obj object to set
397  * @param start start of a string
398  * @param len length of a string
399  * @return true if a string is a boolean value
400  */
401 static inline bool
ucl_maybe_parse_boolean(ucl_object_t * obj,const unsigned char * start,size_t len)402 ucl_maybe_parse_boolean(ucl_object_t *obj, const unsigned char *start, size_t len)
403 {
404 	const char *p = (const char *) start;
405 	bool ret = false, val = false;
406 
407 	if (len == 5) {
408 		if ((p[0] == 'f' || p[0] == 'F') && strncasecmp(p, "false", 5) == 0) {
409 			ret = true;
410 			val = false;
411 		}
412 	}
413 	else if (len == 4) {
414 		if ((p[0] == 't' || p[0] == 'T') && strncasecmp(p, "true", 4) == 0) {
415 			ret = true;
416 			val = true;
417 		}
418 	}
419 	else if (len == 3) {
420 		if ((p[0] == 'y' || p[0] == 'Y') && strncasecmp(p, "yes", 3) == 0) {
421 			ret = true;
422 			val = true;
423 		}
424 		else if ((p[0] == 'o' || p[0] == 'O') && strncasecmp(p, "off", 3) == 0) {
425 			ret = true;
426 			val = false;
427 		}
428 	}
429 	else if (len == 2) {
430 		if ((p[0] == 'n' || p[0] == 'N') && strncasecmp(p, "no", 2) == 0) {
431 			ret = true;
432 			val = false;
433 		}
434 		else if ((p[0] == 'o' || p[0] == 'O') && strncasecmp(p, "on", 2) == 0) {
435 			ret = true;
436 			val = true;
437 		}
438 	}
439 
440 	if (ret && obj != NULL) {
441 		obj->type = UCL_BOOLEAN;
442 		obj->value.iv = val;
443 	}
444 
445 	return ret;
446 }
447 
448 /**
449  * Check numeric string
450  * @param obj object to set if a string is numeric
451  * @param start start of string
452  * @param end end of string
453  * @param pos position where parsing has stopped
454  * @param allow_double allow parsing of floating point values
455  * @return 0 if string is numeric and error code (EINVAL or ERANGE) in case of conversion error
456  */
457 int ucl_maybe_parse_number(ucl_object_t *obj,
458 						   const char *start, const char *end, const char **pos,
459 						   bool allow_double, bool number_bytes, bool allow_time);
460 
461 
462 static inline const ucl_object_t *
ucl_hash_search_obj(ucl_hash_t * hashlin,ucl_object_t * obj)463 ucl_hash_search_obj(ucl_hash_t *hashlin, ucl_object_t *obj)
464 {
465 	return (const ucl_object_t *) ucl_hash_search(hashlin, obj->key, obj->keylen);
466 }
467 
468 static inline ucl_hash_t *ucl_hash_insert_object(ucl_hash_t *hashlin,
469 												 const ucl_object_t *obj,
470 												 bool ignore_case) UCL_WARN_UNUSED_RESULT;
471 
472 static inline ucl_hash_t *
ucl_hash_insert_object(ucl_hash_t * hashlin,const ucl_object_t * obj,bool ignore_case)473 ucl_hash_insert_object(ucl_hash_t *hashlin,
474 					   const ucl_object_t *obj,
475 					   bool ignore_case)
476 {
477 	ucl_hash_t *nhp;
478 
479 	if (hashlin == NULL) {
480 		nhp = ucl_hash_create(ignore_case);
481 		if (nhp == NULL) {
482 			return NULL;
483 		}
484 	}
485 	else {
486 		nhp = hashlin;
487 	}
488 	if (!ucl_hash_insert(nhp, obj, obj->key, obj->keylen)) {
489 		if (nhp != hashlin) {
490 			ucl_hash_destroy(nhp, NULL);
491 		}
492 		return NULL;
493 	}
494 
495 	return nhp;
496 }
497 
498 /**
499  * Get standard emitter context for a specified emit_type
500  * @param emit_type type of emitter
501  * @return context or NULL if input is invalid
502  */
503 const struct ucl_emitter_context *
504 ucl_emit_get_standard_context(enum ucl_emitter emit_type);
505 
506 /**
507  * Serialize string as JSON string
508  * @param str string to emit
509  * @param buf target buffer
510  */
511 void ucl_elt_string_write_json(const char *str, size_t size,
512 							   struct ucl_emitter_context *ctx);
513 
514 
515 /**
516  * Serialize string as single quoted string
517  * @param str string to emit
518  * @param buf target buffer
519  */
520 void ucl_elt_string_write_squoted(const char *str, size_t size,
521 								  struct ucl_emitter_context *ctx);
522 
523 /**
524  * Write multiline string using `EOD` as string terminator
525  * @param str
526  * @param size
527  * @param ctx
528  */
529 void ucl_elt_string_write_multiline(const char *str, size_t size,
530 									struct ucl_emitter_context *ctx);
531 
532 /**
533  * Emit a single object to string
534  * @param obj
535  * @return
536  */
537 unsigned char *ucl_object_emit_single_json(const ucl_object_t *obj);
538 
539 /**
540  * Check whether a specified string is long and should be likely printed in
541  * multiline mode
542  * @param obj
543  * @return
544  */
545 bool ucl_maybe_long_string(const ucl_object_t *obj);
546 
547 /**
548  * Print integer to the msgpack output
549  * @param ctx
550  * @param val
551  */
552 void ucl_emitter_print_int_msgpack(struct ucl_emitter_context *ctx,
553 								   int64_t val);
554 /**
555  * Print integer to the msgpack output
556  * @param ctx
557  * @param val
558  */
559 void ucl_emitter_print_double_msgpack(struct ucl_emitter_context *ctx,
560 									  double val);
561 /**
562  * Print double to the msgpack output
563  * @param ctx
564  * @param val
565  */
566 void ucl_emitter_print_bool_msgpack(struct ucl_emitter_context *ctx,
567 									bool val);
568 /**
569  * Print string to the msgpack output
570  * @param ctx
571  * @param s
572  * @param len
573  */
574 void ucl_emitter_print_string_msgpack(struct ucl_emitter_context *ctx,
575 									  const char *s, size_t len);
576 
577 /**
578  * Print binary string to the msgpack output
579  * @param ctx
580  * @param s
581  * @param len
582  */
583 void ucl_emitter_print_binary_string_msgpack(struct ucl_emitter_context *ctx,
584 											 const char *s, size_t len);
585 
586 /**
587  * Print array preamble for msgpack
588  * @param ctx
589  * @param len
590  */
591 void ucl_emitter_print_array_msgpack(struct ucl_emitter_context *ctx,
592 									 size_t len);
593 
594 /**
595  * Print object preamble for msgpack
596  * @param ctx
597  * @param len
598  */
599 void ucl_emitter_print_object_msgpack(struct ucl_emitter_context *ctx,
600 									  size_t len);
601 /**
602  * Print NULL to the msgpack output
603  * @param ctx
604  */
605 void ucl_emitter_print_null_msgpack(struct ucl_emitter_context *ctx);
606 /**
607  * Print object's key if needed to the msgpack output
608  * @param print_key
609  * @param ctx
610  * @param obj
611  */
612 void ucl_emitter_print_key_msgpack(bool print_key,
613 								   struct ucl_emitter_context *ctx,
614 								   const ucl_object_t *obj);
615 
616 /**
617  * Fetch URL into a buffer
618  * @param url url to fetch
619  * @param buf pointer to buffer (must be freed by callee)
620  * @param buflen pointer to buffer length
621  * @param err pointer to error argument
622  * @param must_exist fail if cannot find a url
623  */
624 bool ucl_fetch_url(const unsigned char *url,
625 				   unsigned char **buf,
626 				   size_t *buflen,
627 				   UT_string **err,
628 				   bool must_exist);
629 
630 /**
631  * Fetch a file and save results to the memory buffer
632  * @param filename filename to fetch
633  * @param len length of filename
634  * @param buf target buffer
635  * @param buflen target length
636  * @return
637  */
638 bool ucl_fetch_file(const unsigned char *filename,
639 					unsigned char **buf,
640 					size_t *buflen,
641 					UT_string **err,
642 					bool must_exist);
643 
644 /**
645  * Add new element to an object using the current merge strategy and priority
646  * @param parser
647  * @param nobj
648  * @return
649  */
650 bool ucl_parser_process_object_element(struct ucl_parser *parser,
651 									   ucl_object_t *nobj);
652 
653 /**
654  * Parse msgpack chunk
655  * @param parser
656  * @return
657  */
658 bool ucl_parse_msgpack(struct ucl_parser *parser);
659 
660 bool ucl_parse_csexp(struct ucl_parser *parser);
661 
662 /**
663  * Free ucl chunk
664  * @param chunk
665  */
666 void ucl_chunk_free(struct ucl_chunk *chunk);
667 
668 #endif /* UCL_INTERNAL_H_ */
669