xref: /freebsd/contrib/libucl/src/ucl_internal.h (revision 59c8e88e72633afbc47a4ace0d2170d00d51f7dc)
1 /* Copyright (c) 2013, Vsevolod Stakhov
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *       * Redistributions of source code must retain the above copyright
7  *         notice, this list of conditions and the following disclaimer.
8  *       * Redistributions in binary form must reproduce the above copyright
9  *         notice, this list of conditions and the following disclaimer in the
10  *         documentation and/or other materials provided with the distribution.
11  *
12  * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15  * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22  */
23 
24 #ifndef UCL_INTERNAL_H_
25 #define UCL_INTERNAL_H_
26 
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #else
30 /* Help embedded builds */
31 #define HAVE_SYS_TYPES_H
32 #define HAVE_SYS_MMAN_H
33 #define HAVE_SYS_STAT_H
34 #define HAVE_SYS_PARAM_H
35 #define HAVE_LIMITS_H
36 #define HAVE_FCNTL_H
37 #define HAVE_ERRNO_H
38 #define HAVE_UNISTD_H
39 #define HAVE_CTYPE_H
40 #define HAVE_STDIO_H
41 #define HAVE_STRING_H
42 #define HAVE_FLOAT_H
43 #define HAVE_LIBGEN_H
44 #define HAVE_MATH_H
45 #define HAVE_STDBOOL_H
46 #define HAVE_STDINT_H
47 #define HAVE_STDARG_H
48 #ifndef _WIN32
49 # define HAVE_REGEX_H
50 #endif
51 #endif
52 
53 #ifdef HAVE_SYS_TYPES_H
54 #include <sys/types.h>
55 #endif
56 
57 #ifdef HAVE_SYS_MMAN_H
58 # ifndef _WIN32
59 #  include <sys/mman.h>
60 # endif
61 #endif
62 #ifdef HAVE_SYS_STAT_H
63 #include <sys/stat.h>
64 #endif
65 #ifdef HAVE_SYS_PARAM_H
66 # ifndef _WIN32
67 # include <sys/param.h>
68 # endif
69 #endif
70 
71 #ifdef HAVE_LIMITS_H
72 #include <limits.h>
73 #endif
74 #ifdef HAVE_FCNTL_H
75 #include <fcntl.h>
76 #endif
77 #ifdef HAVE_ERRNO_H
78 #include <errno.h>
79 #endif
80 #ifdef HAVE_UNISTD_H
81 # ifndef _WIN32
82 # include <unistd.h>
83 # endif
84 #endif
85 #ifdef HAVE_CTYPE_H
86 #include <ctype.h>
87 #endif
88 #ifdef HAVE_STDIO_H
89 #include <stdio.h>
90 #endif
91 #ifdef HAVE_STRING_H
92 #include <string.h>
93 #endif
94 #ifdef HAVE_STRINGS_H
95 #include <strings.h>
96 #endif
97 
98 #if defined(_MSC_VER)
99 /* Windows hacks */
100 #include <BaseTsd.h>
101 #include <inttypes.h>
102 typedef SSIZE_T ssize_t;
103 #define strdup _strdup
104 #define snprintf _snprintf
105 #define vsnprintf _vsnprintf
106 #define strcasecmp _stricmp
107 #define strncasecmp _strnicmp
108 #define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
109 #define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR)
110 #if _MSC_VER >= 1900
111 #include <../ucrt/stdlib.h>
112 #else
113 #include <../include/stdlib.h>
114 #endif
115 #ifndef PATH_MAX
116 #define PATH_MAX _MAX_PATH
117 #endif
118 
119 /* Dirname, basename implementations */
120 
121 
122 #endif
123 
124 #include "utlist.h"
125 #include "utstring.h"
126 #include "uthash.h"
127 #include "ucl.h"
128 #include "ucl_hash.h"
129 
130 #ifdef HAVE_OPENSSL
131 #include <openssl/evp.h>
132 #endif
133 
134 #ifndef __DECONST
135 #define __DECONST(type, var)    ((type)(uintptr_t)(const void *)(var))
136 #endif
137 
138 /**
139  * @file rcl_internal.h
140  * Internal structures and functions of UCL library
141  */
142 
143 #define UCL_MAX_RECURSION 16
144 #define UCL_TRASH_KEY 0
145 #define UCL_TRASH_VALUE 1
146 
147 enum ucl_parser_state {
148 	UCL_STATE_INIT = 0,
149 	UCL_STATE_OBJECT,
150 	UCL_STATE_ARRAY,
151 	UCL_STATE_KEY,
152 	UCL_STATE_KEY_OBRACE,
153 	UCL_STATE_VALUE,
154 	UCL_STATE_AFTER_VALUE,
155 	UCL_STATE_ARRAY_VALUE,
156 	UCL_STATE_SCOMMENT,
157 	UCL_STATE_MCOMMENT,
158 	UCL_STATE_MACRO_NAME,
159 	UCL_STATE_MACRO,
160 	UCL_STATE_ERROR
161 };
162 
163 enum ucl_character_type {
164 	UCL_CHARACTER_DENIED = (1 << 0),
165 	UCL_CHARACTER_KEY = (1 << 1),
166 	UCL_CHARACTER_KEY_START = (1 << 2),
167 	UCL_CHARACTER_WHITESPACE = (1 << 3),
168 	UCL_CHARACTER_WHITESPACE_UNSAFE = (1 << 4),
169 	UCL_CHARACTER_VALUE_END = (1 << 5),
170 	UCL_CHARACTER_VALUE_STR = (1 << 6),
171 	UCL_CHARACTER_VALUE_DIGIT = (1 << 7),
172 	UCL_CHARACTER_VALUE_DIGIT_START = (1 << 8),
173 	UCL_CHARACTER_ESCAPE = (1 << 9),
174 	UCL_CHARACTER_KEY_SEP = (1 << 10),
175 	UCL_CHARACTER_JSON_UNSAFE = (1 << 11),
176 	UCL_CHARACTER_UCL_UNSAFE = (1 << 12)
177 };
178 
179 struct ucl_macro {
180 	char *name;
181 	union _ucl_macro {
182 		ucl_macro_handler handler;
183 		ucl_context_macro_handler context_handler;
184 	} h;
185 	void* ud;
186 	bool is_context;
187 	UT_hash_handle hh;
188 };
189 
190 enum ucl_stack_flags {
191 	UCL_STACK_HAS_OBRACE = (1u << 0),
192 	UCL_STACK_MAX = (1u << 1),
193 };
194 
195 struct ucl_stack {
196 	ucl_object_t *obj;
197 	struct ucl_stack *next;
198 	union {
199 		struct {
200 			uint16_t level;
201 			uint16_t flags;
202 			uint32_t line;
203 		} params;
204 		uint64_t len;
205 	} e;
206 	struct ucl_chunk *chunk;
207 };
208 
209 struct ucl_parser_special_handler_chain {
210 	unsigned char *begin;
211 	size_t len;
212 	struct ucl_parser_special_handler *special_handler;
213 	struct ucl_parser_special_handler_chain *next;
214 };
215 
216 struct ucl_chunk {
217 	const unsigned char *begin;
218 	const unsigned char *end;
219 	const unsigned char *pos;
220 	char *fname;
221 	size_t remain;
222 	unsigned int line;
223 	unsigned int column;
224 	unsigned priority;
225 	enum ucl_duplicate_strategy strategy;
226 	enum ucl_parse_type parse_type;
227 	struct ucl_parser_special_handler_chain *special_handlers;
228 	struct ucl_chunk *next;
229 };
230 
231 #ifdef HAVE_OPENSSL
232 struct ucl_pubkey {
233 	EVP_PKEY *key;
234 	struct ucl_pubkey *next;
235 };
236 #else
237 struct ucl_pubkey {
238 	struct ucl_pubkey *next;
239 };
240 #endif
241 
242 struct ucl_variable {
243 	char *var;
244 	char *value;
245 	size_t var_len;
246 	size_t value_len;
247 	struct ucl_variable *prev, *next;
248 };
249 
250 struct ucl_parser {
251 	enum ucl_parser_state state;
252 	enum ucl_parser_state prev_state;
253 	unsigned int recursion;
254 	int flags;
255 	unsigned default_priority;
256 	int err_code;
257 	ucl_object_t *top_obj;
258 	ucl_object_t *cur_obj;
259 	ucl_object_t *trash_objs;
260 	ucl_object_t *includepaths;
261 	char *cur_file;
262 	struct ucl_macro *macroes;
263 	struct ucl_stack *stack;
264 	struct ucl_chunk *chunks;
265 	struct ucl_pubkey *keys;
266 	struct ucl_parser_special_handler *special_handlers;
267 	ucl_include_trace_func_t *include_trace_func;
268 	void *include_trace_ud;
269 	struct ucl_variable *variables;
270 	ucl_variable_handler var_handler;
271 	void *var_data;
272 	ucl_object_t *comments;
273 	ucl_object_t *last_comment;
274 	UT_string *err;
275 };
276 
277 struct ucl_object_userdata {
278 	ucl_object_t obj;
279 	ucl_userdata_dtor dtor;
280 	ucl_userdata_emitter emitter;
281 };
282 
283 /**
284  * Unescape json string inplace
285  * @param str
286  */
287 size_t ucl_unescape_json_string (char *str, size_t len);
288 
289 
290 /**
291  * Unescape single quoted string inplace
292  * @param str
293  */
294 size_t ucl_unescape_squoted_string (char *str, size_t len);
295 
296 /**
297  * Handle include macro
298  * @param data include data
299  * @param len length of data
300  * @param args UCL object representing arguments to the macro
301  * @param ud user data
302  * @return
303  */
304 bool ucl_include_handler (const unsigned char *data, size_t len,
305 		const ucl_object_t *args, void* ud);
306 
307 /**
308  * Handle tryinclude macro
309  * @param data include data
310  * @param len length of data
311  * @param args UCL object representing arguments to the macro
312  * @param ud user data
313  * @return
314  */
315 bool ucl_try_include_handler (const unsigned char *data, size_t len,
316 		const ucl_object_t *args, void* ud);
317 
318 /**
319  * Handle includes macro
320  * @param data include data
321  * @param len length of data
322  * @param args UCL object representing arguments to the macro
323  * @param ud user data
324  * @return
325  */
326 bool ucl_includes_handler (const unsigned char *data, size_t len,
327 		const ucl_object_t *args, void* ud);
328 
329 /**
330  * Handle priority macro
331  * @param data include data
332  * @param len length of data
333  * @param args UCL object representing arguments to the macro
334  * @param ud user data
335  * @return
336  */
337 bool ucl_priority_handler (const unsigned char *data, size_t len,
338 		const ucl_object_t *args, void* ud);
339 
340 /**
341  * Handle load macro
342  * @param data include data
343  * @param len length of data
344  * @param args UCL object representing arguments to the macro
345  * @param ud user data
346  * @return
347  */
348 bool ucl_load_handler (const unsigned char *data, size_t len,
349 		const ucl_object_t *args, void* ud);
350 /**
351  * Handle inherit macro
352  * @param data include data
353  * @param len length of data
354  * @param args UCL object representing arguments to the macro
355  * @param ctx the current context object
356  * @param ud user data
357  * @return
358  */
359 bool ucl_inherit_handler (const unsigned char *data, size_t len,
360 		const ucl_object_t *args, const ucl_object_t *ctx, void* ud);
361 
362 size_t ucl_strlcpy (char *dst, const char *src, size_t siz);
363 size_t ucl_strlcpy_unsafe (char *dst, const char *src, size_t siz);
364 size_t ucl_strlcpy_tolower (char *dst, const char *src, size_t siz);
365 
366 char *ucl_strnstr (const char *s, const char *find, int len);
367 char *ucl_strncasestr (const char *s, const char *find, int len);
368 
369 #ifdef __GNUC__
370 static inline void
371 ucl_create_err (UT_string **err, const char *fmt, ...)
372 __attribute__ (( format( printf, 2, 3) ));
373 #endif
374 
375 #undef UCL_FATAL_ERRORS
376 
377 static inline void
378 ucl_create_err (UT_string **err, const char *fmt, ...)
379 {
380 	if (*err == NULL) {
381 		utstring_new (*err);
382 		va_list ap;
383 		va_start (ap, fmt);
384 		utstring_printf_va (*err, fmt, ap);
385 		va_end (ap);
386 	}
387 
388 #ifdef UCL_FATAL_ERRORS
389 	assert (0);
390 #endif
391 }
392 
393 /**
394  * Check whether a given string contains a boolean value
395  * @param obj object to set
396  * @param start start of a string
397  * @param len length of a string
398  * @return true if a string is a boolean value
399  */
400 static inline bool
401 ucl_maybe_parse_boolean (ucl_object_t *obj, const unsigned char *start, size_t len)
402 {
403 	const char *p = (const char *)start;
404 	bool ret = false, val = false;
405 
406 	if (len == 5) {
407 		if ((p[0] == 'f' || p[0] == 'F') && strncasecmp (p, "false", 5) == 0) {
408 			ret = true;
409 			val = false;
410 		}
411 	}
412 	else if (len == 4) {
413 		if ((p[0] == 't' || p[0] == 'T') && strncasecmp (p, "true", 4) == 0) {
414 			ret = true;
415 			val = true;
416 		}
417 	}
418 	else if (len == 3) {
419 		if ((p[0] == 'y' || p[0] == 'Y') && strncasecmp (p, "yes", 3) == 0) {
420 			ret = true;
421 			val = true;
422 		}
423 		else if ((p[0] == 'o' || p[0] == 'O') && strncasecmp (p, "off", 3) == 0) {
424 			ret = true;
425 			val = false;
426 		}
427 	}
428 	else if (len == 2) {
429 		if ((p[0] == 'n' || p[0] == 'N') && strncasecmp (p, "no", 2) == 0) {
430 			ret = true;
431 			val = false;
432 		}
433 		else if ((p[0] == 'o' || p[0] == 'O') && strncasecmp (p, "on", 2) == 0) {
434 			ret = true;
435 			val = true;
436 		}
437 	}
438 
439 	if (ret && obj != NULL) {
440 		obj->type = UCL_BOOLEAN;
441 		obj->value.iv = val;
442 	}
443 
444 	return ret;
445 }
446 
447 /**
448  * Check numeric string
449  * @param obj object to set if a string is numeric
450  * @param start start of string
451  * @param end end of string
452  * @param pos position where parsing has stopped
453  * @param allow_double allow parsing of floating point values
454  * @return 0 if string is numeric and error code (EINVAL or ERANGE) in case of conversion error
455  */
456 int ucl_maybe_parse_number (ucl_object_t *obj,
457 		const char *start, const char *end, const char **pos,
458 		bool allow_double, bool number_bytes, bool allow_time);
459 
460 
461 static inline const ucl_object_t *
462 ucl_hash_search_obj (ucl_hash_t* hashlin, ucl_object_t *obj)
463 {
464 	return (const ucl_object_t *)ucl_hash_search (hashlin, obj->key, obj->keylen);
465 }
466 
467 static inline ucl_hash_t * ucl_hash_insert_object (ucl_hash_t *hashlin,
468 		const ucl_object_t *obj,
469 		bool ignore_case) UCL_WARN_UNUSED_RESULT;
470 
471 static inline ucl_hash_t *
472 ucl_hash_insert_object (ucl_hash_t *hashlin,
473 		const ucl_object_t *obj,
474 		bool ignore_case)
475 {
476 	ucl_hash_t *nhp;
477 
478 	if (hashlin == NULL) {
479 		nhp = ucl_hash_create (ignore_case);
480 		if (nhp == NULL) {
481 			return NULL;
482 		}
483 	} else {
484 		nhp = hashlin;
485 	}
486 	if (!ucl_hash_insert (nhp, obj, obj->key, obj->keylen)) {
487 		if (nhp != hashlin) {
488 			ucl_hash_destroy(nhp, NULL);
489 		}
490 		return NULL;
491 	}
492 
493 	return nhp;
494 }
495 
496 /**
497  * Get standard emitter context for a specified emit_type
498  * @param emit_type type of emitter
499  * @return context or NULL if input is invalid
500  */
501 const struct ucl_emitter_context *
502 ucl_emit_get_standard_context (enum ucl_emitter emit_type);
503 
504 /**
505  * Serialize string as JSON string
506  * @param str string to emit
507  * @param buf target buffer
508  */
509 void ucl_elt_string_write_json (const char *str, size_t size,
510 		struct ucl_emitter_context *ctx);
511 
512 
513 /**
514  * Serialize string as single quoted string
515  * @param str string to emit
516  * @param buf target buffer
517  */
518 void
519 ucl_elt_string_write_squoted (const char *str, size_t size,
520 		struct ucl_emitter_context *ctx);
521 
522 /**
523  * Write multiline string using `EOD` as string terminator
524  * @param str
525  * @param size
526  * @param ctx
527  */
528 void ucl_elt_string_write_multiline (const char *str, size_t size,
529 		struct ucl_emitter_context *ctx);
530 
531 /**
532  * Emit a single object to string
533  * @param obj
534  * @return
535  */
536 unsigned char * ucl_object_emit_single_json (const ucl_object_t *obj);
537 
538 /**
539  * Check whether a specified string is long and should be likely printed in
540  * multiline mode
541  * @param obj
542  * @return
543  */
544 bool ucl_maybe_long_string (const ucl_object_t *obj);
545 
546 /**
547  * Print integer to the msgpack output
548  * @param ctx
549  * @param val
550  */
551 void ucl_emitter_print_int_msgpack (struct ucl_emitter_context *ctx,
552 		int64_t val);
553 /**
554  * Print integer to the msgpack output
555  * @param ctx
556  * @param val
557  */
558 void ucl_emitter_print_double_msgpack (struct ucl_emitter_context *ctx,
559 		double val);
560 /**
561  * Print double to the msgpack output
562  * @param ctx
563  * @param val
564  */
565 void ucl_emitter_print_bool_msgpack (struct ucl_emitter_context *ctx,
566 		bool val);
567 /**
568  * Print string to the msgpack output
569  * @param ctx
570  * @param s
571  * @param len
572  */
573 void ucl_emitter_print_string_msgpack (struct ucl_emitter_context *ctx,
574 		const char *s, size_t len);
575 
576 /**
577  * Print binary string to the msgpack output
578  * @param ctx
579  * @param s
580  * @param len
581  */
582 void ucl_emitter_print_binary_string_msgpack (struct ucl_emitter_context *ctx,
583 		const char *s, size_t len);
584 
585 /**
586  * Print array preamble for msgpack
587  * @param ctx
588  * @param len
589  */
590 void ucl_emitter_print_array_msgpack (struct ucl_emitter_context *ctx,
591 		size_t len);
592 
593 /**
594  * Print object preamble for msgpack
595  * @param ctx
596  * @param len
597  */
598 void ucl_emitter_print_object_msgpack (struct ucl_emitter_context *ctx,
599 		size_t len);
600 /**
601  * Print NULL to the msgpack output
602  * @param ctx
603  */
604 void ucl_emitter_print_null_msgpack (struct ucl_emitter_context *ctx);
605 /**
606  * Print object's key if needed to the msgpack output
607  * @param print_key
608  * @param ctx
609  * @param obj
610  */
611 void ucl_emitter_print_key_msgpack (bool print_key,
612 		struct ucl_emitter_context *ctx,
613 		const ucl_object_t *obj);
614 
615 /**
616  * Fetch URL into a buffer
617  * @param url url to fetch
618  * @param buf pointer to buffer (must be freed by callee)
619  * @param buflen pointer to buffer length
620  * @param err pointer to error argument
621  * @param must_exist fail if cannot find a url
622  */
623 bool ucl_fetch_url (const unsigned char *url,
624 		unsigned char **buf,
625 		size_t *buflen,
626 		UT_string **err,
627 		bool must_exist);
628 
629 /**
630  * Fetch a file and save results to the memory buffer
631  * @param filename filename to fetch
632  * @param len length of filename
633  * @param buf target buffer
634  * @param buflen target length
635  * @return
636  */
637 bool ucl_fetch_file (const unsigned char *filename,
638 		unsigned char **buf,
639 		size_t *buflen,
640 		UT_string **err,
641 		bool must_exist);
642 
643 /**
644  * Add new element to an object using the current merge strategy and priority
645  * @param parser
646  * @param nobj
647  * @return
648  */
649 bool ucl_parser_process_object_element (struct ucl_parser *parser,
650 		ucl_object_t *nobj);
651 
652 /**
653  * Parse msgpack chunk
654  * @param parser
655  * @return
656  */
657 bool ucl_parse_msgpack (struct ucl_parser *parser);
658 
659 bool ucl_parse_csexp (struct ucl_parser *parser);
660 
661 /**
662  * Free ucl chunk
663  * @param chunk
664  */
665 void ucl_chunk_free (struct ucl_chunk *chunk);
666 
667 #endif /* UCL_INTERNAL_H_ */
668