xref: /freebsd/contrib/libucl/src/ucl_internal.h (revision 8ef24a0d4b28fe230e20637f56869cc4148cd2ca)
1 /* Copyright (c) 2013, Vsevolod Stakhov
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *       * Redistributions of source code must retain the above copyright
7  *         notice, this list of conditions and the following disclaimer.
8  *       * Redistributions in binary form must reproduce the above copyright
9  *         notice, this list of conditions and the following disclaimer in the
10  *         documentation and/or other materials provided with the distribution.
11  *
12  * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15  * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22  */
23 
24 #ifndef UCL_INTERNAL_H_
25 #define UCL_INTERNAL_H_
26 
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #else
30 /* Help embedded builds */
31 #define HAVE_SYS_TYPES_H
32 #define HAVE_SYS_MMAN_H
33 #define HAVE_SYS_STAT_H
34 #define HAVE_SYS_PARAM_H
35 #define HAVE_LIMITS_H
36 #define HAVE_FCNTL_H
37 #define HAVE_ERRNO_H
38 #define HAVE_UNISTD_H
39 #define HAVE_CTYPE_H
40 #define HAVE_STDIO_H
41 #define HAVE_STRING_H
42 #define HAVE_FLOAT_H
43 #define HAVE_LIBGEN_H
44 #define HAVE_MATH_H
45 #define HAVE_STDBOOL_H
46 #define HAVE_STDINT_H
47 #define HAVE_STDARG_H
48 #ifndef _WIN32
49 # define HAVE_REGEX_H
50 #endif
51 #endif
52 
53 #ifdef HAVE_SYS_TYPES_H
54 #include <sys/types.h>
55 #endif
56 
57 #ifdef HAVE_SYS_MMAN_H
58 # ifndef _WIN32
59 #  include <sys/mman.h>
60 # endif
61 #endif
62 #ifdef HAVE_SYS_STAT_H
63 #include <sys/stat.h>
64 #endif
65 #ifdef HAVE_SYS_PARAM_H
66 #include <sys/param.h>
67 #endif
68 
69 #ifdef HAVE_LIMITS_H
70 #include <limits.h>
71 #endif
72 #ifdef HAVE_FCNTL_H
73 #include <fcntl.h>
74 #endif
75 #ifdef HAVE_ERRNO_H
76 #include <errno.h>
77 #endif
78 #ifdef HAVE_UNISTD_H
79 #include <unistd.h>
80 #endif
81 #ifdef HAVE_CTYPE_H
82 #include <ctype.h>
83 #endif
84 #ifdef HAVE_STDIO_H
85 #include <stdio.h>
86 #endif
87 #ifdef HAVE_STRING_H
88 #include <string.h>
89 #endif
90 
91 #include "utlist.h"
92 #include "utstring.h"
93 #include "uthash.h"
94 #include "ucl.h"
95 #include "ucl_hash.h"
96 #include "xxhash.h"
97 
98 #ifdef HAVE_OPENSSL
99 #include <openssl/evp.h>
100 #endif
101 
102 #ifndef __DECONST
103 #define __DECONST(type, var)    ((type)(uintptr_t)(const void *)(var))
104 #endif
105 
106 /**
107  * @file rcl_internal.h
108  * Internal structures and functions of UCL library
109  */
110 
111 #define UCL_MAX_RECURSION 16
112 #define UCL_TRASH_KEY 0
113 #define UCL_TRASH_VALUE 1
114 
115 enum ucl_parser_state {
116 	UCL_STATE_INIT = 0,
117 	UCL_STATE_OBJECT,
118 	UCL_STATE_ARRAY,
119 	UCL_STATE_KEY,
120 	UCL_STATE_VALUE,
121 	UCL_STATE_AFTER_VALUE,
122 	UCL_STATE_ARRAY_VALUE,
123 	UCL_STATE_SCOMMENT,
124 	UCL_STATE_MCOMMENT,
125 	UCL_STATE_MACRO_NAME,
126 	UCL_STATE_MACRO,
127 	UCL_STATE_ERROR
128 };
129 
130 enum ucl_character_type {
131 	UCL_CHARACTER_DENIED = 0,
132 	UCL_CHARACTER_KEY = 1,
133 	UCL_CHARACTER_KEY_START = 1 << 1,
134 	UCL_CHARACTER_WHITESPACE = 1 << 2,
135 	UCL_CHARACTER_WHITESPACE_UNSAFE = 1 << 3,
136 	UCL_CHARACTER_VALUE_END = 1 << 4,
137 	UCL_CHARACTER_VALUE_STR = 1 << 5,
138 	UCL_CHARACTER_VALUE_DIGIT = 1 << 6,
139 	UCL_CHARACTER_VALUE_DIGIT_START = 1 << 7,
140 	UCL_CHARACTER_ESCAPE = 1 << 8,
141 	UCL_CHARACTER_KEY_SEP = 1 << 9,
142 	UCL_CHARACTER_JSON_UNSAFE = 1 << 10,
143 	UCL_CHARACTER_UCL_UNSAFE = 1 << 11
144 };
145 
146 struct ucl_macro {
147 	char *name;
148 	union {
149 		ucl_macro_handler handler;
150 		ucl_context_macro_handler context_handler;
151 	} h;
152 	void* ud;
153 	bool is_context;
154 	UT_hash_handle hh;
155 };
156 
157 struct ucl_stack {
158 	ucl_object_t *obj;
159 	struct ucl_stack *next;
160 	uint64_t level;
161 };
162 
163 struct ucl_chunk {
164 	const unsigned char *begin;
165 	const unsigned char *end;
166 	const unsigned char *pos;
167 	size_t remain;
168 	unsigned int line;
169 	unsigned int column;
170 	unsigned priority;
171 	enum ucl_duplicate_strategy strategy;
172 	enum ucl_parse_type parse_type;
173 	struct ucl_chunk *next;
174 };
175 
176 #ifdef HAVE_OPENSSL
177 struct ucl_pubkey {
178 	EVP_PKEY *key;
179 	struct ucl_pubkey *next;
180 };
181 #else
182 struct ucl_pubkey {
183 	struct ucl_pubkey *next;
184 };
185 #endif
186 
187 struct ucl_variable {
188 	char *var;
189 	char *value;
190 	size_t var_len;
191 	size_t value_len;
192 	struct ucl_variable *prev, *next;
193 };
194 
195 struct ucl_parser {
196 	enum ucl_parser_state state;
197 	enum ucl_parser_state prev_state;
198 	unsigned int recursion;
199 	int flags;
200 	unsigned default_priority;
201 	int err_code;
202 	ucl_object_t *top_obj;
203 	ucl_object_t *cur_obj;
204 	ucl_object_t *trash_objs;
205 	ucl_object_t *includepaths;
206 	char *cur_file;
207 	struct ucl_macro *macroes;
208 	struct ucl_stack *stack;
209 	struct ucl_chunk *chunks;
210 	struct ucl_pubkey *keys;
211 	struct ucl_variable *variables;
212 	ucl_variable_handler var_handler;
213 	void *var_data;
214 	ucl_object_t *comments;
215 	ucl_object_t *last_comment;
216 	UT_string *err;
217 };
218 
219 struct ucl_object_userdata {
220 	ucl_object_t obj;
221 	ucl_userdata_dtor dtor;
222 	ucl_userdata_emitter emitter;
223 };
224 
225 /**
226  * Unescape json string inplace
227  * @param str
228  */
229 size_t ucl_unescape_json_string (char *str, size_t len);
230 
231 /**
232  * Handle include macro
233  * @param data include data
234  * @param len length of data
235  * @param args UCL object representing arguments to the macro
236  * @param ud user data
237  * @return
238  */
239 bool ucl_include_handler (const unsigned char *data, size_t len,
240 		const ucl_object_t *args, void* ud);
241 
242 /**
243  * Handle tryinclude macro
244  * @param data include data
245  * @param len length of data
246  * @param args UCL object representing arguments to the macro
247  * @param ud user data
248  * @return
249  */
250 bool ucl_try_include_handler (const unsigned char *data, size_t len,
251 		const ucl_object_t *args, void* ud);
252 
253 /**
254  * Handle includes macro
255  * @param data include data
256  * @param len length of data
257  * @param args UCL object representing arguments to the macro
258  * @param ud user data
259  * @return
260  */
261 bool ucl_includes_handler (const unsigned char *data, size_t len,
262 		const ucl_object_t *args, void* ud);
263 
264 /**
265  * Handle priority macro
266  * @param data include data
267  * @param len length of data
268  * @param args UCL object representing arguments to the macro
269  * @param ud user data
270  * @return
271  */
272 bool ucl_priority_handler (const unsigned char *data, size_t len,
273 		const ucl_object_t *args, void* ud);
274 
275 /**
276  * Handle load macro
277  * @param data include data
278  * @param len length of data
279  * @param args UCL object representing arguments to the macro
280  * @param ud user data
281  * @return
282  */
283 bool ucl_load_handler (const unsigned char *data, size_t len,
284 		const ucl_object_t *args, void* ud);
285 /**
286  * Handle inherit macro
287  * @param data include data
288  * @param len length of data
289  * @param args UCL object representing arguments to the macro
290  * @param ctx the current context object
291  * @param ud user data
292  * @return
293  */
294 bool ucl_inherit_handler (const unsigned char *data, size_t len,
295 		const ucl_object_t *args, const ucl_object_t *ctx, void* ud);
296 
297 size_t ucl_strlcpy (char *dst, const char *src, size_t siz);
298 size_t ucl_strlcpy_unsafe (char *dst, const char *src, size_t siz);
299 size_t ucl_strlcpy_tolower (char *dst, const char *src, size_t siz);
300 
301 char *ucl_strnstr (const char *s, const char *find, int len);
302 char *ucl_strncasestr (const char *s, const char *find, int len);
303 
304 #ifdef __GNUC__
305 static inline void
306 ucl_create_err (UT_string **err, const char *fmt, ...)
307 __attribute__ (( format( printf, 2, 3) ));
308 #endif
309 
310 #undef UCL_FATAL_ERRORS
311 
312 static inline void
313 ucl_create_err (UT_string **err, const char *fmt, ...)
314 {
315 	if (*err == NULL) {
316 		utstring_new (*err);
317 		va_list ap;
318 		va_start (ap, fmt);
319 		utstring_printf_va (*err, fmt, ap);
320 		va_end (ap);
321 	}
322 
323 #ifdef UCL_FATAL_ERRORS
324 	assert (0);
325 #endif
326 }
327 
328 /**
329  * Check whether a given string contains a boolean value
330  * @param obj object to set
331  * @param start start of a string
332  * @param len length of a string
333  * @return true if a string is a boolean value
334  */
335 static inline bool
336 ucl_maybe_parse_boolean (ucl_object_t *obj, const unsigned char *start, size_t len)
337 {
338 	const char *p = (const char *)start;
339 	bool ret = false, val = false;
340 
341 	if (len == 5) {
342 		if ((p[0] == 'f' || p[0] == 'F') && strncasecmp (p, "false", 5) == 0) {
343 			ret = true;
344 			val = false;
345 		}
346 	}
347 	else if (len == 4) {
348 		if ((p[0] == 't' || p[0] == 'T') && strncasecmp (p, "true", 4) == 0) {
349 			ret = true;
350 			val = true;
351 		}
352 	}
353 	else if (len == 3) {
354 		if ((p[0] == 'y' || p[0] == 'Y') && strncasecmp (p, "yes", 3) == 0) {
355 			ret = true;
356 			val = true;
357 		}
358 		else if ((p[0] == 'o' || p[0] == 'O') && strncasecmp (p, "off", 3) == 0) {
359 			ret = true;
360 			val = false;
361 		}
362 	}
363 	else if (len == 2) {
364 		if ((p[0] == 'n' || p[0] == 'N') && strncasecmp (p, "no", 2) == 0) {
365 			ret = true;
366 			val = false;
367 		}
368 		else if ((p[0] == 'o' || p[0] == 'O') && strncasecmp (p, "on", 2) == 0) {
369 			ret = true;
370 			val = true;
371 		}
372 	}
373 
374 	if (ret && obj != NULL) {
375 		obj->type = UCL_BOOLEAN;
376 		obj->value.iv = val;
377 	}
378 
379 	return ret;
380 }
381 
382 /**
383  * Check numeric string
384  * @param obj object to set if a string is numeric
385  * @param start start of string
386  * @param end end of string
387  * @param pos position where parsing has stopped
388  * @param allow_double allow parsing of floating point values
389  * @return 0 if string is numeric and error code (EINVAL or ERANGE) in case of conversion error
390  */
391 int ucl_maybe_parse_number (ucl_object_t *obj,
392 		const char *start, const char *end, const char **pos,
393 		bool allow_double, bool number_bytes, bool allow_time);
394 
395 
396 static inline const ucl_object_t *
397 ucl_hash_search_obj (ucl_hash_t* hashlin, ucl_object_t *obj)
398 {
399 	return (const ucl_object_t *)ucl_hash_search (hashlin, obj->key, obj->keylen);
400 }
401 
402 static inline ucl_hash_t * ucl_hash_insert_object (ucl_hash_t *hashlin,
403 		const ucl_object_t *obj,
404 		bool ignore_case) UCL_WARN_UNUSED_RESULT;
405 
406 static inline ucl_hash_t *
407 ucl_hash_insert_object (ucl_hash_t *hashlin,
408 		const ucl_object_t *obj,
409 		bool ignore_case)
410 {
411 	if (hashlin == NULL) {
412 		hashlin = ucl_hash_create (ignore_case);
413 	}
414 	ucl_hash_insert (hashlin, obj, obj->key, obj->keylen);
415 
416 	return hashlin;
417 }
418 
419 /**
420  * Get standard emitter context for a specified emit_type
421  * @param emit_type type of emitter
422  * @return context or NULL if input is invalid
423  */
424 const struct ucl_emitter_context *
425 ucl_emit_get_standard_context (enum ucl_emitter emit_type);
426 
427 /**
428  * Serialize string as JSON string
429  * @param str string to emit
430  * @param buf target buffer
431  */
432 void ucl_elt_string_write_json (const char *str, size_t size,
433 		struct ucl_emitter_context *ctx);
434 
435 /**
436  * Write multiline string using `EOD` as string terminator
437  * @param str
438  * @param size
439  * @param ctx
440  */
441 void ucl_elt_string_write_multiline (const char *str, size_t size,
442 		struct ucl_emitter_context *ctx);
443 
444 /**
445  * Emit a single object to string
446  * @param obj
447  * @return
448  */
449 unsigned char * ucl_object_emit_single_json (const ucl_object_t *obj);
450 
451 /**
452  * Check whether a specified string is long and should be likely printed in
453  * multiline mode
454  * @param obj
455  * @return
456  */
457 bool ucl_maybe_long_string (const ucl_object_t *obj);
458 
459 /**
460  * Print integer to the msgpack output
461  * @param ctx
462  * @param val
463  */
464 void ucl_emitter_print_int_msgpack (struct ucl_emitter_context *ctx,
465 		int64_t val);
466 /**
467  * Print integer to the msgpack output
468  * @param ctx
469  * @param val
470  */
471 void ucl_emitter_print_double_msgpack (struct ucl_emitter_context *ctx,
472 		double val);
473 /**
474  * Print double to the msgpack output
475  * @param ctx
476  * @param val
477  */
478 void ucl_emitter_print_bool_msgpack (struct ucl_emitter_context *ctx,
479 		bool val);
480 /**
481  * Print string to the msgpack output
482  * @param ctx
483  * @param s
484  * @param len
485  */
486 void ucl_emitter_print_string_msgpack (struct ucl_emitter_context *ctx,
487 		const char *s, size_t len);
488 
489 /**
490  * Print binary string to the msgpack output
491  * @param ctx
492  * @param s
493  * @param len
494  */
495 void ucl_emitter_print_binary_string_msgpack (struct ucl_emitter_context *ctx,
496 		const char *s, size_t len);
497 
498 /**
499  * Print array preamble for msgpack
500  * @param ctx
501  * @param len
502  */
503 void ucl_emitter_print_array_msgpack (struct ucl_emitter_context *ctx,
504 		size_t len);
505 
506 /**
507  * Print object preamble for msgpack
508  * @param ctx
509  * @param len
510  */
511 void ucl_emitter_print_object_msgpack (struct ucl_emitter_context *ctx,
512 		size_t len);
513 /**
514  * Print NULL to the msgpack output
515  * @param ctx
516  */
517 void ucl_emitter_print_null_msgpack (struct ucl_emitter_context *ctx);
518 /**
519  * Print object's key if needed to the msgpack output
520  * @param print_key
521  * @param ctx
522  * @param obj
523  */
524 void ucl_emitter_print_key_msgpack (bool print_key,
525 		struct ucl_emitter_context *ctx,
526 		const ucl_object_t *obj);
527 
528 /**
529  * Fetch URL into a buffer
530  * @param url url to fetch
531  * @param buf pointer to buffer (must be freed by callee)
532  * @param buflen pointer to buffer length
533  * @param err pointer to error argument
534  * @param must_exist fail if cannot find a url
535  */
536 bool ucl_fetch_url (const unsigned char *url,
537 		unsigned char **buf,
538 		size_t *buflen,
539 		UT_string **err,
540 		bool must_exist);
541 
542 /**
543  * Fetch a file and save results to the memory buffer
544  * @param filename filename to fetch
545  * @param len length of filename
546  * @param buf target buffer
547  * @param buflen target length
548  * @return
549  */
550 bool ucl_fetch_file (const unsigned char *filename,
551 		unsigned char **buf,
552 		size_t *buflen,
553 		UT_string **err,
554 		bool must_exist);
555 
556 /**
557  * Add new element to an object using the current merge strategy and priority
558  * @param parser
559  * @param nobj
560  * @return
561  */
562 bool ucl_parser_process_object_element (struct ucl_parser *parser,
563 		ucl_object_t *nobj);
564 
565 /**
566  * Parse msgpack chunk
567  * @param parser
568  * @return
569  */
570 bool ucl_parse_msgpack (struct ucl_parser *parser);
571 
572 #endif /* UCL_INTERNAL_H_ */
573