xref: /freebsd/contrib/libucl/src/ucl_internal.h (revision eb69d1f144a6fcc765d1b9d44a5ae8082353e70b)
1 /* Copyright (c) 2013, Vsevolod Stakhov
2  * All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are met:
6  *       * Redistributions of source code must retain the above copyright
7  *         notice, this list of conditions and the following disclaimer.
8  *       * Redistributions in binary form must reproduce the above copyright
9  *         notice, this list of conditions and the following disclaimer in the
10  *         documentation and/or other materials provided with the distribution.
11  *
12  * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY
13  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
14  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
15  * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY
16  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
17  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
18  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
19  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
20  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22  */
23 
24 #ifndef UCL_INTERNAL_H_
25 #define UCL_INTERNAL_H_
26 
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #else
30 /* Help embedded builds */
31 #define HAVE_SYS_TYPES_H
32 #define HAVE_SYS_MMAN_H
33 #define HAVE_SYS_STAT_H
34 #define HAVE_SYS_PARAM_H
35 #define HAVE_LIMITS_H
36 #define HAVE_FCNTL_H
37 #define HAVE_ERRNO_H
38 #define HAVE_UNISTD_H
39 #define HAVE_CTYPE_H
40 #define HAVE_STDIO_H
41 #define HAVE_STRING_H
42 #define HAVE_FLOAT_H
43 #define HAVE_LIBGEN_H
44 #define HAVE_MATH_H
45 #define HAVE_STDBOOL_H
46 #define HAVE_STDINT_H
47 #define HAVE_STDARG_H
48 #ifndef _WIN32
49 # define HAVE_REGEX_H
50 #endif
51 #endif
52 
53 #ifdef HAVE_SYS_TYPES_H
54 #include <sys/types.h>
55 #endif
56 
57 #ifdef HAVE_SYS_MMAN_H
58 # ifndef _WIN32
59 #  include <sys/mman.h>
60 # endif
61 #endif
62 #ifdef HAVE_SYS_STAT_H
63 #include <sys/stat.h>
64 #endif
65 #ifdef HAVE_SYS_PARAM_H
66 #include <sys/param.h>
67 #endif
68 
69 #ifdef HAVE_LIMITS_H
70 #include <limits.h>
71 #endif
72 #ifdef HAVE_FCNTL_H
73 #include <fcntl.h>
74 #endif
75 #ifdef HAVE_ERRNO_H
76 #include <errno.h>
77 #endif
78 #ifdef HAVE_UNISTD_H
79 #include <unistd.h>
80 #endif
81 #ifdef HAVE_CTYPE_H
82 #include <ctype.h>
83 #endif
84 #ifdef HAVE_STDIO_H
85 #include <stdio.h>
86 #endif
87 #ifdef HAVE_STRING_H
88 #include <string.h>
89 #endif
90 #ifdef HAVE_STRINGS_H
91 #include <strings.h>
92 #endif
93 
94 #include "utlist.h"
95 #include "utstring.h"
96 #include "uthash.h"
97 #include "ucl.h"
98 #include "ucl_hash.h"
99 
100 #ifdef HAVE_OPENSSL
101 #include <openssl/evp.h>
102 #endif
103 
104 #ifndef __DECONST
105 #define __DECONST(type, var)    ((type)(uintptr_t)(const void *)(var))
106 #endif
107 
108 /**
109  * @file rcl_internal.h
110  * Internal structures and functions of UCL library
111  */
112 
113 #define UCL_MAX_RECURSION 16
114 #define UCL_TRASH_KEY 0
115 #define UCL_TRASH_VALUE 1
116 
117 enum ucl_parser_state {
118 	UCL_STATE_INIT = 0,
119 	UCL_STATE_OBJECT,
120 	UCL_STATE_ARRAY,
121 	UCL_STATE_KEY,
122 	UCL_STATE_VALUE,
123 	UCL_STATE_AFTER_VALUE,
124 	UCL_STATE_ARRAY_VALUE,
125 	UCL_STATE_SCOMMENT,
126 	UCL_STATE_MCOMMENT,
127 	UCL_STATE_MACRO_NAME,
128 	UCL_STATE_MACRO,
129 	UCL_STATE_ERROR
130 };
131 
132 enum ucl_character_type {
133 	UCL_CHARACTER_DENIED = (1 << 0),
134 	UCL_CHARACTER_KEY = (1 << 1),
135 	UCL_CHARACTER_KEY_START = (1 << 2),
136 	UCL_CHARACTER_WHITESPACE = (1 << 3),
137 	UCL_CHARACTER_WHITESPACE_UNSAFE = (1 << 4),
138 	UCL_CHARACTER_VALUE_END = (1 << 5),
139 	UCL_CHARACTER_VALUE_STR = (1 << 6),
140 	UCL_CHARACTER_VALUE_DIGIT = (1 << 7),
141 	UCL_CHARACTER_VALUE_DIGIT_START = (1 << 8),
142 	UCL_CHARACTER_ESCAPE = (1 << 9),
143 	UCL_CHARACTER_KEY_SEP = (1 << 10),
144 	UCL_CHARACTER_JSON_UNSAFE = (1 << 11),
145 	UCL_CHARACTER_UCL_UNSAFE = (1 << 12)
146 };
147 
148 struct ucl_macro {
149 	char *name;
150 	union {
151 		ucl_macro_handler handler;
152 		ucl_context_macro_handler context_handler;
153 	} h;
154 	void* ud;
155 	bool is_context;
156 	UT_hash_handle hh;
157 };
158 
159 struct ucl_stack {
160 	ucl_object_t *obj;
161 	struct ucl_stack *next;
162 	uint64_t level;
163 };
164 
165 struct ucl_chunk {
166 	const unsigned char *begin;
167 	const unsigned char *end;
168 	const unsigned char *pos;
169 	size_t remain;
170 	unsigned int line;
171 	unsigned int column;
172 	unsigned priority;
173 	enum ucl_duplicate_strategy strategy;
174 	enum ucl_parse_type parse_type;
175 	struct ucl_chunk *next;
176 };
177 
178 #ifdef HAVE_OPENSSL
179 struct ucl_pubkey {
180 	EVP_PKEY *key;
181 	struct ucl_pubkey *next;
182 };
183 #else
184 struct ucl_pubkey {
185 	struct ucl_pubkey *next;
186 };
187 #endif
188 
189 struct ucl_variable {
190 	char *var;
191 	char *value;
192 	size_t var_len;
193 	size_t value_len;
194 	struct ucl_variable *prev, *next;
195 };
196 
197 struct ucl_parser {
198 	enum ucl_parser_state state;
199 	enum ucl_parser_state prev_state;
200 	unsigned int recursion;
201 	int flags;
202 	unsigned default_priority;
203 	int err_code;
204 	ucl_object_t *top_obj;
205 	ucl_object_t *cur_obj;
206 	ucl_object_t *trash_objs;
207 	ucl_object_t *includepaths;
208 	char *cur_file;
209 	struct ucl_macro *macroes;
210 	struct ucl_stack *stack;
211 	struct ucl_chunk *chunks;
212 	struct ucl_pubkey *keys;
213 	struct ucl_variable *variables;
214 	ucl_variable_handler var_handler;
215 	void *var_data;
216 	ucl_object_t *comments;
217 	ucl_object_t *last_comment;
218 	UT_string *err;
219 };
220 
221 struct ucl_object_userdata {
222 	ucl_object_t obj;
223 	ucl_userdata_dtor dtor;
224 	ucl_userdata_emitter emitter;
225 };
226 
227 /**
228  * Unescape json string inplace
229  * @param str
230  */
231 size_t ucl_unescape_json_string (char *str, size_t len);
232 
233 /**
234  * Handle include macro
235  * @param data include data
236  * @param len length of data
237  * @param args UCL object representing arguments to the macro
238  * @param ud user data
239  * @return
240  */
241 bool ucl_include_handler (const unsigned char *data, size_t len,
242 		const ucl_object_t *args, void* ud);
243 
244 /**
245  * Handle tryinclude macro
246  * @param data include data
247  * @param len length of data
248  * @param args UCL object representing arguments to the macro
249  * @param ud user data
250  * @return
251  */
252 bool ucl_try_include_handler (const unsigned char *data, size_t len,
253 		const ucl_object_t *args, void* ud);
254 
255 /**
256  * Handle includes macro
257  * @param data include data
258  * @param len length of data
259  * @param args UCL object representing arguments to the macro
260  * @param ud user data
261  * @return
262  */
263 bool ucl_includes_handler (const unsigned char *data, size_t len,
264 		const ucl_object_t *args, void* ud);
265 
266 /**
267  * Handle priority macro
268  * @param data include data
269  * @param len length of data
270  * @param args UCL object representing arguments to the macro
271  * @param ud user data
272  * @return
273  */
274 bool ucl_priority_handler (const unsigned char *data, size_t len,
275 		const ucl_object_t *args, void* ud);
276 
277 /**
278  * Handle load macro
279  * @param data include data
280  * @param len length of data
281  * @param args UCL object representing arguments to the macro
282  * @param ud user data
283  * @return
284  */
285 bool ucl_load_handler (const unsigned char *data, size_t len,
286 		const ucl_object_t *args, void* ud);
287 /**
288  * Handle inherit macro
289  * @param data include data
290  * @param len length of data
291  * @param args UCL object representing arguments to the macro
292  * @param ctx the current context object
293  * @param ud user data
294  * @return
295  */
296 bool ucl_inherit_handler (const unsigned char *data, size_t len,
297 		const ucl_object_t *args, const ucl_object_t *ctx, void* ud);
298 
299 size_t ucl_strlcpy (char *dst, const char *src, size_t siz);
300 size_t ucl_strlcpy_unsafe (char *dst, const char *src, size_t siz);
301 size_t ucl_strlcpy_tolower (char *dst, const char *src, size_t siz);
302 
303 char *ucl_strnstr (const char *s, const char *find, int len);
304 char *ucl_strncasestr (const char *s, const char *find, int len);
305 
306 #ifdef __GNUC__
307 static inline void
308 ucl_create_err (UT_string **err, const char *fmt, ...)
309 __attribute__ (( format( printf, 2, 3) ));
310 #endif
311 
312 #undef UCL_FATAL_ERRORS
313 
314 static inline void
315 ucl_create_err (UT_string **err, const char *fmt, ...)
316 {
317 	if (*err == NULL) {
318 		utstring_new (*err);
319 		va_list ap;
320 		va_start (ap, fmt);
321 		utstring_printf_va (*err, fmt, ap);
322 		va_end (ap);
323 	}
324 
325 #ifdef UCL_FATAL_ERRORS
326 	assert (0);
327 #endif
328 }
329 
330 /**
331  * Check whether a given string contains a boolean value
332  * @param obj object to set
333  * @param start start of a string
334  * @param len length of a string
335  * @return true if a string is a boolean value
336  */
337 static inline bool
338 ucl_maybe_parse_boolean (ucl_object_t *obj, const unsigned char *start, size_t len)
339 {
340 	const char *p = (const char *)start;
341 	bool ret = false, val = false;
342 
343 	if (len == 5) {
344 		if ((p[0] == 'f' || p[0] == 'F') && strncasecmp (p, "false", 5) == 0) {
345 			ret = true;
346 			val = false;
347 		}
348 	}
349 	else if (len == 4) {
350 		if ((p[0] == 't' || p[0] == 'T') && strncasecmp (p, "true", 4) == 0) {
351 			ret = true;
352 			val = true;
353 		}
354 	}
355 	else if (len == 3) {
356 		if ((p[0] == 'y' || p[0] == 'Y') && strncasecmp (p, "yes", 3) == 0) {
357 			ret = true;
358 			val = true;
359 		}
360 		else if ((p[0] == 'o' || p[0] == 'O') && strncasecmp (p, "off", 3) == 0) {
361 			ret = true;
362 			val = false;
363 		}
364 	}
365 	else if (len == 2) {
366 		if ((p[0] == 'n' || p[0] == 'N') && strncasecmp (p, "no", 2) == 0) {
367 			ret = true;
368 			val = false;
369 		}
370 		else if ((p[0] == 'o' || p[0] == 'O') && strncasecmp (p, "on", 2) == 0) {
371 			ret = true;
372 			val = true;
373 		}
374 	}
375 
376 	if (ret && obj != NULL) {
377 		obj->type = UCL_BOOLEAN;
378 		obj->value.iv = val;
379 	}
380 
381 	return ret;
382 }
383 
384 /**
385  * Check numeric string
386  * @param obj object to set if a string is numeric
387  * @param start start of string
388  * @param end end of string
389  * @param pos position where parsing has stopped
390  * @param allow_double allow parsing of floating point values
391  * @return 0 if string is numeric and error code (EINVAL or ERANGE) in case of conversion error
392  */
393 int ucl_maybe_parse_number (ucl_object_t *obj,
394 		const char *start, const char *end, const char **pos,
395 		bool allow_double, bool number_bytes, bool allow_time);
396 
397 
398 static inline const ucl_object_t *
399 ucl_hash_search_obj (ucl_hash_t* hashlin, ucl_object_t *obj)
400 {
401 	return (const ucl_object_t *)ucl_hash_search (hashlin, obj->key, obj->keylen);
402 }
403 
404 static inline ucl_hash_t * ucl_hash_insert_object (ucl_hash_t *hashlin,
405 		const ucl_object_t *obj,
406 		bool ignore_case) UCL_WARN_UNUSED_RESULT;
407 
408 static inline ucl_hash_t *
409 ucl_hash_insert_object (ucl_hash_t *hashlin,
410 		const ucl_object_t *obj,
411 		bool ignore_case)
412 {
413 	if (hashlin == NULL) {
414 		hashlin = ucl_hash_create (ignore_case);
415 	}
416 	ucl_hash_insert (hashlin, obj, obj->key, obj->keylen);
417 
418 	return hashlin;
419 }
420 
421 /**
422  * Get standard emitter context for a specified emit_type
423  * @param emit_type type of emitter
424  * @return context or NULL if input is invalid
425  */
426 const struct ucl_emitter_context *
427 ucl_emit_get_standard_context (enum ucl_emitter emit_type);
428 
429 /**
430  * Serialize string as JSON string
431  * @param str string to emit
432  * @param buf target buffer
433  */
434 void ucl_elt_string_write_json (const char *str, size_t size,
435 		struct ucl_emitter_context *ctx);
436 
437 /**
438  * Write multiline string using `EOD` as string terminator
439  * @param str
440  * @param size
441  * @param ctx
442  */
443 void ucl_elt_string_write_multiline (const char *str, size_t size,
444 		struct ucl_emitter_context *ctx);
445 
446 /**
447  * Emit a single object to string
448  * @param obj
449  * @return
450  */
451 unsigned char * ucl_object_emit_single_json (const ucl_object_t *obj);
452 
453 /**
454  * Check whether a specified string is long and should be likely printed in
455  * multiline mode
456  * @param obj
457  * @return
458  */
459 bool ucl_maybe_long_string (const ucl_object_t *obj);
460 
461 /**
462  * Print integer to the msgpack output
463  * @param ctx
464  * @param val
465  */
466 void ucl_emitter_print_int_msgpack (struct ucl_emitter_context *ctx,
467 		int64_t val);
468 /**
469  * Print integer to the msgpack output
470  * @param ctx
471  * @param val
472  */
473 void ucl_emitter_print_double_msgpack (struct ucl_emitter_context *ctx,
474 		double val);
475 /**
476  * Print double to the msgpack output
477  * @param ctx
478  * @param val
479  */
480 void ucl_emitter_print_bool_msgpack (struct ucl_emitter_context *ctx,
481 		bool val);
482 /**
483  * Print string to the msgpack output
484  * @param ctx
485  * @param s
486  * @param len
487  */
488 void ucl_emitter_print_string_msgpack (struct ucl_emitter_context *ctx,
489 		const char *s, size_t len);
490 
491 /**
492  * Print binary string to the msgpack output
493  * @param ctx
494  * @param s
495  * @param len
496  */
497 void ucl_emitter_print_binary_string_msgpack (struct ucl_emitter_context *ctx,
498 		const char *s, size_t len);
499 
500 /**
501  * Print array preamble for msgpack
502  * @param ctx
503  * @param len
504  */
505 void ucl_emitter_print_array_msgpack (struct ucl_emitter_context *ctx,
506 		size_t len);
507 
508 /**
509  * Print object preamble for msgpack
510  * @param ctx
511  * @param len
512  */
513 void ucl_emitter_print_object_msgpack (struct ucl_emitter_context *ctx,
514 		size_t len);
515 /**
516  * Print NULL to the msgpack output
517  * @param ctx
518  */
519 void ucl_emitter_print_null_msgpack (struct ucl_emitter_context *ctx);
520 /**
521  * Print object's key if needed to the msgpack output
522  * @param print_key
523  * @param ctx
524  * @param obj
525  */
526 void ucl_emitter_print_key_msgpack (bool print_key,
527 		struct ucl_emitter_context *ctx,
528 		const ucl_object_t *obj);
529 
530 /**
531  * Fetch URL into a buffer
532  * @param url url to fetch
533  * @param buf pointer to buffer (must be freed by callee)
534  * @param buflen pointer to buffer length
535  * @param err pointer to error argument
536  * @param must_exist fail if cannot find a url
537  */
538 bool ucl_fetch_url (const unsigned char *url,
539 		unsigned char **buf,
540 		size_t *buflen,
541 		UT_string **err,
542 		bool must_exist);
543 
544 /**
545  * Fetch a file and save results to the memory buffer
546  * @param filename filename to fetch
547  * @param len length of filename
548  * @param buf target buffer
549  * @param buflen target length
550  * @return
551  */
552 bool ucl_fetch_file (const unsigned char *filename,
553 		unsigned char **buf,
554 		size_t *buflen,
555 		UT_string **err,
556 		bool must_exist);
557 
558 /**
559  * Add new element to an object using the current merge strategy and priority
560  * @param parser
561  * @param nobj
562  * @return
563  */
564 bool ucl_parser_process_object_element (struct ucl_parser *parser,
565 		ucl_object_t *nobj);
566 
567 /**
568  * Parse msgpack chunk
569  * @param parser
570  * @return
571  */
572 bool ucl_parse_msgpack (struct ucl_parser *parser);
573 
574 bool ucl_parse_csexp (struct ucl_parser *parser);
575 
576 #endif /* UCL_INTERNAL_H_ */
577