1 /* Copyright (c) 2013, Vsevolod Stakhov 2 * All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * * Redistributions of source code must retain the above copyright 7 * notice, this list of conditions and the following disclaimer. 8 * * Redistributions in binary form must reproduce the above copyright 9 * notice, this list of conditions and the following disclaimer in the 10 * documentation and/or other materials provided with the distribution. 11 * 12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY 13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY 16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 */ 23 24 #include "ucl.h" 25 #include "ucl_internal.h" 26 #include "ucl_chartable.h" 27 28 /** 29 * @file ucl_parser.c 30 * The implementation of ucl parser 31 */ 32 33 struct ucl_parser_saved_state { 34 unsigned int line; 35 unsigned int column; 36 size_t remain; 37 const unsigned char *pos; 38 }; 39 40 /** 41 * Move up to len characters 42 * @param parser 43 * @param begin 44 * @param len 45 * @return new position in chunk 46 */ 47 #define ucl_chunk_skipc(chunk, p) do{ \ 48 if (*(p) == '\n') { \ 49 (chunk)->line ++; \ 50 (chunk)->column = 0; \ 51 } \ 52 else (chunk)->column ++; \ 53 (p++); \ 54 (chunk)->pos ++; \ 55 (chunk)->remain --; \ 56 } while (0) 57 58 static inline void 59 ucl_set_err (struct ucl_parser *parser, int code, const char *str, UT_string **err) 60 { 61 const char *fmt_string, *filename; 62 struct ucl_chunk *chunk = parser->chunks; 63 64 if (parser->cur_file) { 65 filename = parser->cur_file; 66 } 67 else { 68 filename = "<unknown>"; 69 } 70 71 if (chunk->pos < chunk->end) { 72 if (isgraph (*chunk->pos)) { 73 fmt_string = "error while parsing %s: " 74 "line: %d, column: %d - '%s', character: '%c'"; 75 } 76 else { 77 fmt_string = "error while parsing %s: " 78 "line: %d, column: %d - '%s', character: '0x%02x'"; 79 } 80 ucl_create_err (err, fmt_string, 81 filename, chunk->line, chunk->column, 82 str, *chunk->pos); 83 } 84 else { 85 ucl_create_err (err, "error while parsing %s: at the end of chunk: %s", 86 filename, str); 87 } 88 89 parser->err_code = code; 90 } 91 92 static void 93 ucl_save_comment (struct ucl_parser *parser, const char *begin, size_t len) 94 { 95 ucl_object_t *nobj; 96 97 if (len > 0 && begin != NULL) { 98 nobj = ucl_object_fromstring_common (begin, len, 0); 99 100 if (parser->last_comment) { 101 /* We need to append data to an existing object */ 102 DL_APPEND (parser->last_comment, nobj); 103 } 104 else { 105 parser->last_comment = nobj; 106 } 107 } 108 } 109 110 static void 111 ucl_attach_comment (struct ucl_parser *parser, ucl_object_t *obj, bool before) 112 { 113 if (parser->last_comment) { 114 ucl_object_insert_key (parser->comments, parser->last_comment, 115 (const char *)&obj, sizeof (void *), true); 116 117 if (before) { 118 parser->last_comment->flags |= UCL_OBJECT_INHERITED; 119 } 120 121 parser->last_comment = NULL; 122 } 123 } 124 125 /** 126 * Skip all comments from the current pos resolving nested and multiline comments 127 * @param parser 128 * @return 129 */ 130 static bool 131 ucl_skip_comments (struct ucl_parser *parser) 132 { 133 struct ucl_chunk *chunk = parser->chunks; 134 const unsigned char *p, *beg = NULL; 135 int comments_nested = 0; 136 bool quoted = false; 137 138 p = chunk->pos; 139 140 start: 141 if (chunk->remain > 0 && *p == '#') { 142 if (parser->state != UCL_STATE_SCOMMENT && 143 parser->state != UCL_STATE_MCOMMENT) { 144 beg = p; 145 146 while (p < chunk->end) { 147 if (*p == '\n') { 148 if (parser->flags & UCL_PARSER_SAVE_COMMENTS) { 149 ucl_save_comment (parser, beg, p - beg); 150 beg = NULL; 151 } 152 153 ucl_chunk_skipc (chunk, p); 154 155 goto start; 156 } 157 ucl_chunk_skipc (chunk, p); 158 } 159 } 160 } 161 else if (chunk->remain >= 2 && *p == '/') { 162 if (p[1] == '*') { 163 beg = p; 164 ucl_chunk_skipc (chunk, p); 165 comments_nested ++; 166 ucl_chunk_skipc (chunk, p); 167 168 while (p < chunk->end) { 169 if (*p == '"' && *(p - 1) != '\\') { 170 quoted = !quoted; 171 } 172 173 if (!quoted) { 174 if (*p == '*') { 175 ucl_chunk_skipc (chunk, p); 176 if (*p == '/') { 177 comments_nested --; 178 if (comments_nested == 0) { 179 if (parser->flags & UCL_PARSER_SAVE_COMMENTS) { 180 ucl_save_comment (parser, beg, p - beg + 1); 181 beg = NULL; 182 } 183 184 ucl_chunk_skipc (chunk, p); 185 goto start; 186 } 187 } 188 ucl_chunk_skipc (chunk, p); 189 } 190 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') { 191 comments_nested ++; 192 ucl_chunk_skipc (chunk, p); 193 ucl_chunk_skipc (chunk, p); 194 continue; 195 } 196 } 197 198 ucl_chunk_skipc (chunk, p); 199 } 200 if (comments_nested != 0) { 201 ucl_set_err (parser, UCL_ENESTED, 202 "unfinished multiline comment", &parser->err); 203 return false; 204 } 205 } 206 } 207 208 if (beg && p > beg && (parser->flags & UCL_PARSER_SAVE_COMMENTS)) { 209 ucl_save_comment (parser, beg, p - beg); 210 } 211 212 return true; 213 } 214 215 /** 216 * Return multiplier for a character 217 * @param c multiplier character 218 * @param is_bytes if true use 1024 multiplier 219 * @return multiplier 220 */ 221 static inline unsigned long 222 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) { 223 const struct { 224 char c; 225 long mult_normal; 226 long mult_bytes; 227 } multipliers[] = { 228 {'m', 1000 * 1000, 1024 * 1024}, 229 {'k', 1000, 1024}, 230 {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024} 231 }; 232 int i; 233 234 for (i = 0; i < 3; i ++) { 235 if (tolower (c) == multipliers[i].c) { 236 if (is_bytes) { 237 return multipliers[i].mult_bytes; 238 } 239 return multipliers[i].mult_normal; 240 } 241 } 242 243 return 1; 244 } 245 246 247 /** 248 * Return multiplier for time scaling 249 * @param c 250 * @return 251 */ 252 static inline double 253 ucl_lex_time_multiplier (const unsigned char c) { 254 const struct { 255 char c; 256 double mult; 257 } multipliers[] = { 258 {'m', 60}, 259 {'h', 60 * 60}, 260 {'d', 60 * 60 * 24}, 261 {'w', 60 * 60 * 24 * 7}, 262 {'y', 60 * 60 * 24 * 365} 263 }; 264 int i; 265 266 for (i = 0; i < 5; i ++) { 267 if (tolower (c) == multipliers[i].c) { 268 return multipliers[i].mult; 269 } 270 } 271 272 return 1; 273 } 274 275 /** 276 * Return true if a character is a end of an atom 277 * @param c 278 * @return 279 */ 280 static inline bool 281 ucl_lex_is_atom_end (const unsigned char c) 282 { 283 return ucl_test_character (c, UCL_CHARACTER_VALUE_END); 284 } 285 286 static inline bool 287 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2) 288 { 289 if (c1 == '/') { 290 if (c2 == '*') { 291 return true; 292 } 293 } 294 else if (c1 == '#') { 295 return true; 296 } 297 return false; 298 } 299 300 /** 301 * Check variable found 302 * @param parser 303 * @param ptr 304 * @param remain 305 * @param out_len 306 * @param strict 307 * @param found 308 * @return 309 */ 310 static inline const char * 311 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain, 312 size_t *out_len, bool strict, bool *found) 313 { 314 struct ucl_variable *var; 315 unsigned char *dst; 316 size_t dstlen; 317 bool need_free = false; 318 319 LL_FOREACH (parser->variables, var) { 320 if (strict) { 321 if (remain == var->var_len) { 322 if (memcmp (ptr, var->var, var->var_len) == 0) { 323 *out_len += var->value_len; 324 *found = true; 325 return (ptr + var->var_len); 326 } 327 } 328 } 329 else { 330 if (remain >= var->var_len) { 331 if (memcmp (ptr, var->var, var->var_len) == 0) { 332 *out_len += var->value_len; 333 *found = true; 334 return (ptr + var->var_len); 335 } 336 } 337 } 338 } 339 340 /* XXX: can only handle ${VAR} */ 341 if (!(*found) && parser->var_handler != NULL && strict) { 342 /* Call generic handler */ 343 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free, 344 parser->var_data)) { 345 *found = true; 346 if (need_free) { 347 free (dst); 348 } 349 return (ptr + remain); 350 } 351 } 352 353 return ptr; 354 } 355 356 /** 357 * Check for a variable in a given string 358 * @param parser 359 * @param ptr 360 * @param remain 361 * @param out_len 362 * @param vars_found 363 * @return 364 */ 365 static const char * 366 ucl_check_variable (struct ucl_parser *parser, const char *ptr, 367 size_t remain, size_t *out_len, bool *vars_found) 368 { 369 const char *p, *end, *ret = ptr; 370 bool found = false; 371 372 if (*ptr == '{') { 373 /* We need to match the variable enclosed in braces */ 374 p = ptr + 1; 375 end = ptr + remain; 376 while (p < end) { 377 if (*p == '}') { 378 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, 379 out_len, true, &found); 380 if (found) { 381 /* {} must be excluded actually */ 382 ret ++; 383 if (!*vars_found) { 384 *vars_found = true; 385 } 386 } 387 else { 388 *out_len += 2; 389 } 390 break; 391 } 392 p ++; 393 } 394 } 395 else if (*ptr != '$') { 396 /* Not count escaped dollar sign */ 397 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found); 398 if (found && !*vars_found) { 399 *vars_found = true; 400 } 401 if (!found) { 402 (*out_len) ++; 403 } 404 } 405 else { 406 ret ++; 407 (*out_len) ++; 408 } 409 410 return ret; 411 } 412 413 /** 414 * Expand a single variable 415 * @param parser 416 * @param ptr 417 * @param remain 418 * @param dest 419 * @return 420 */ 421 static const char * 422 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr, 423 size_t remain, unsigned char **dest) 424 { 425 unsigned char *d = *dest, *dst; 426 const char *p = ptr + 1, *ret; 427 struct ucl_variable *var; 428 size_t dstlen; 429 bool need_free = false; 430 bool found = false; 431 bool strict = false; 432 433 ret = ptr + 1; 434 remain --; 435 436 if (*p == '$') { 437 *d++ = *p++; 438 *dest = d; 439 return p; 440 } 441 else if (*p == '{') { 442 p ++; 443 strict = true; 444 ret += 2; 445 remain -= 2; 446 } 447 448 LL_FOREACH (parser->variables, var) { 449 if (remain >= var->var_len) { 450 if (memcmp (p, var->var, var->var_len) == 0) { 451 memcpy (d, var->value, var->value_len); 452 ret += var->var_len; 453 d += var->value_len; 454 found = true; 455 break; 456 } 457 } 458 } 459 if (!found) { 460 if (strict && parser->var_handler != NULL) { 461 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free, 462 parser->var_data)) { 463 memcpy (d, dst, dstlen); 464 ret += dstlen; 465 d += remain; 466 found = true; 467 } 468 } 469 470 /* Leave variable as is */ 471 if (!found) { 472 if (strict) { 473 /* Copy '${' */ 474 memcpy (d, ptr, 2); 475 d += 2; 476 ret --; 477 } 478 else { 479 memcpy (d, ptr, 1); 480 d ++; 481 } 482 } 483 } 484 485 *dest = d; 486 return ret; 487 } 488 489 /** 490 * Expand variables in string 491 * @param parser 492 * @param dst 493 * @param src 494 * @param in_len 495 * @return 496 */ 497 static ssize_t 498 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst, 499 const char *src, size_t in_len) 500 { 501 const char *p, *end = src + in_len; 502 unsigned char *d; 503 size_t out_len = 0; 504 bool vars_found = false; 505 506 if (parser->flags & UCL_PARSER_DISABLE_MACRO) { 507 *dst = NULL; 508 return in_len; 509 } 510 511 p = src; 512 while (p != end) { 513 if (*p == '$') { 514 p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found); 515 } 516 else { 517 p ++; 518 out_len ++; 519 } 520 } 521 522 if (!vars_found) { 523 /* Trivial case */ 524 *dst = NULL; 525 return in_len; 526 } 527 528 *dst = UCL_ALLOC (out_len + 1); 529 if (*dst == NULL) { 530 return in_len; 531 } 532 533 d = *dst; 534 p = src; 535 while (p != end) { 536 if (*p == '$') { 537 p = ucl_expand_single_variable (parser, p, end - p, &d); 538 } 539 else { 540 *d++ = *p++; 541 } 542 } 543 544 *d = '\0'; 545 546 return out_len; 547 } 548 549 /** 550 * Store or copy pointer to the trash stack 551 * @param parser parser object 552 * @param src src string 553 * @param dst destination buffer (trash stack pointer) 554 * @param dst_const const destination pointer (e.g. value of object) 555 * @param in_len input length 556 * @param need_unescape need to unescape source (and copy it) 557 * @param need_lowercase need to lowercase value (and copy) 558 * @param need_expand need to expand variables (and copy as well) 559 * @return output length (excluding \0 symbol) 560 */ 561 static inline ssize_t 562 ucl_copy_or_store_ptr (struct ucl_parser *parser, 563 const unsigned char *src, unsigned char **dst, 564 const char **dst_const, size_t in_len, 565 bool need_unescape, bool need_lowercase, bool need_expand) 566 { 567 ssize_t ret = -1, tret; 568 unsigned char *tmp; 569 570 if (need_unescape || need_lowercase || 571 (need_expand && parser->variables != NULL) || 572 !(parser->flags & UCL_PARSER_ZEROCOPY)) { 573 /* Copy string */ 574 *dst = UCL_ALLOC (in_len + 1); 575 if (*dst == NULL) { 576 ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for a string", 577 &parser->err); 578 return false; 579 } 580 if (need_lowercase) { 581 ret = ucl_strlcpy_tolower (*dst, src, in_len + 1); 582 } 583 else { 584 ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1); 585 } 586 587 if (need_unescape) { 588 ret = ucl_unescape_json_string (*dst, ret); 589 } 590 if (need_expand) { 591 tmp = *dst; 592 tret = ret; 593 ret = ucl_expand_variable (parser, dst, tmp, ret); 594 if (*dst == NULL) { 595 /* Nothing to expand */ 596 *dst = tmp; 597 ret = tret; 598 } 599 else { 600 /* Free unexpanded value */ 601 UCL_FREE (in_len + 1, tmp); 602 } 603 } 604 *dst_const = *dst; 605 } 606 else { 607 *dst_const = src; 608 ret = in_len; 609 } 610 611 return ret; 612 } 613 614 /** 615 * Create and append an object at the specified level 616 * @param parser 617 * @param is_array 618 * @param level 619 * @return 620 */ 621 static inline ucl_object_t * 622 ucl_parser_add_container (ucl_object_t *obj, struct ucl_parser *parser, 623 bool is_array, int level) 624 { 625 struct ucl_stack *st; 626 627 if (!is_array) { 628 if (obj == NULL) { 629 obj = ucl_object_new_full (UCL_OBJECT, parser->chunks->priority); 630 } 631 else { 632 obj->type = UCL_OBJECT; 633 } 634 if (obj->value.ov == NULL) { 635 obj->value.ov = ucl_hash_create (parser->flags & UCL_PARSER_KEY_LOWERCASE); 636 } 637 parser->state = UCL_STATE_KEY; 638 } 639 else { 640 if (obj == NULL) { 641 obj = ucl_object_new_full (UCL_ARRAY, parser->chunks->priority); 642 } 643 else { 644 obj->type = UCL_ARRAY; 645 } 646 parser->state = UCL_STATE_VALUE; 647 } 648 649 st = UCL_ALLOC (sizeof (struct ucl_stack)); 650 651 if (st == NULL) { 652 ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for an object", 653 &parser->err); 654 ucl_object_unref (obj); 655 return NULL; 656 } 657 658 st->obj = obj; 659 st->level = level; 660 LL_PREPEND (parser->stack, st); 661 parser->cur_obj = obj; 662 663 return obj; 664 } 665 666 int 667 ucl_maybe_parse_number (ucl_object_t *obj, 668 const char *start, const char *end, const char **pos, 669 bool allow_double, bool number_bytes, bool allow_time) 670 { 671 const char *p = start, *c = start; 672 char *endptr; 673 bool got_dot = false, got_exp = false, need_double = false, 674 is_time = false, valid_start = false, is_hex = false, 675 is_neg = false; 676 double dv = 0; 677 int64_t lv = 0; 678 679 if (*p == '-') { 680 is_neg = true; 681 c ++; 682 p ++; 683 } 684 while (p < end) { 685 if (is_hex && isxdigit (*p)) { 686 p ++; 687 } 688 else if (isdigit (*p)) { 689 valid_start = true; 690 p ++; 691 } 692 else if (!is_hex && (*p == 'x' || *p == 'X')) { 693 is_hex = true; 694 allow_double = false; 695 c = p + 1; 696 } 697 else if (allow_double) { 698 if (p == c) { 699 /* Empty digits sequence, not a number */ 700 *pos = start; 701 return EINVAL; 702 } 703 else if (*p == '.') { 704 if (got_dot) { 705 /* Double dots, not a number */ 706 *pos = start; 707 return EINVAL; 708 } 709 else { 710 got_dot = true; 711 need_double = true; 712 p ++; 713 } 714 } 715 else if (*p == 'e' || *p == 'E') { 716 if (got_exp) { 717 /* Double exp, not a number */ 718 *pos = start; 719 return EINVAL; 720 } 721 else { 722 got_exp = true; 723 need_double = true; 724 p ++; 725 if (p >= end) { 726 *pos = start; 727 return EINVAL; 728 } 729 if (!isdigit (*p) && *p != '+' && *p != '-') { 730 /* Wrong exponent sign */ 731 *pos = start; 732 return EINVAL; 733 } 734 else { 735 p ++; 736 } 737 } 738 } 739 else { 740 /* Got the end of the number, need to check */ 741 break; 742 } 743 } 744 else { 745 break; 746 } 747 } 748 749 if (!valid_start) { 750 *pos = start; 751 return EINVAL; 752 } 753 754 errno = 0; 755 if (need_double) { 756 dv = strtod (c, &endptr); 757 } 758 else { 759 if (is_hex) { 760 lv = strtoimax (c, &endptr, 16); 761 } 762 else { 763 lv = strtoimax (c, &endptr, 10); 764 } 765 } 766 if (errno == ERANGE) { 767 *pos = start; 768 return ERANGE; 769 } 770 771 /* Now check endptr */ 772 if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0') { 773 p = endptr; 774 goto set_obj; 775 } 776 777 if (endptr < end && endptr != start) { 778 p = endptr; 779 switch (*p) { 780 case 'm': 781 case 'M': 782 case 'g': 783 case 'G': 784 case 'k': 785 case 'K': 786 if (end - p >= 2) { 787 if (p[1] == 's' || p[1] == 'S') { 788 /* Milliseconds */ 789 if (!need_double) { 790 need_double = true; 791 dv = lv; 792 } 793 is_time = true; 794 if (p[0] == 'm' || p[0] == 'M') { 795 dv /= 1000.; 796 } 797 else { 798 dv *= ucl_lex_num_multiplier (*p, false); 799 } 800 p += 2; 801 goto set_obj; 802 } 803 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) { 804 /* Bytes */ 805 if (need_double) { 806 need_double = false; 807 lv = dv; 808 } 809 lv *= ucl_lex_num_multiplier (*p, true); 810 p += 2; 811 goto set_obj; 812 } 813 else if (ucl_lex_is_atom_end (p[1])) { 814 if (need_double) { 815 dv *= ucl_lex_num_multiplier (*p, false); 816 } 817 else { 818 lv *= ucl_lex_num_multiplier (*p, number_bytes); 819 } 820 p ++; 821 goto set_obj; 822 } 823 else if (allow_time && end - p >= 3) { 824 if (tolower (p[0]) == 'm' && 825 tolower (p[1]) == 'i' && 826 tolower (p[2]) == 'n') { 827 /* Minutes */ 828 if (!need_double) { 829 need_double = true; 830 dv = lv; 831 } 832 is_time = true; 833 dv *= 60.; 834 p += 3; 835 goto set_obj; 836 } 837 } 838 } 839 else { 840 if (need_double) { 841 dv *= ucl_lex_num_multiplier (*p, false); 842 } 843 else { 844 lv *= ucl_lex_num_multiplier (*p, number_bytes); 845 } 846 p ++; 847 goto set_obj; 848 } 849 break; 850 case 'S': 851 case 's': 852 if (allow_time && 853 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { 854 if (!need_double) { 855 need_double = true; 856 dv = lv; 857 } 858 p ++; 859 is_time = true; 860 goto set_obj; 861 } 862 break; 863 case 'h': 864 case 'H': 865 case 'd': 866 case 'D': 867 case 'w': 868 case 'W': 869 case 'Y': 870 case 'y': 871 if (allow_time && 872 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { 873 if (!need_double) { 874 need_double = true; 875 dv = lv; 876 } 877 is_time = true; 878 dv *= ucl_lex_time_multiplier (*p); 879 p ++; 880 goto set_obj; 881 } 882 break; 883 case '\t': 884 case ' ': 885 while (p < end && ucl_test_character(*p, UCL_CHARACTER_WHITESPACE)) { 886 p++; 887 } 888 if (ucl_lex_is_atom_end(*p)) 889 goto set_obj; 890 break; 891 } 892 } 893 else if (endptr == end) { 894 /* Just a number at the end of chunk */ 895 p = endptr; 896 goto set_obj; 897 } 898 899 *pos = c; 900 return EINVAL; 901 902 set_obj: 903 if (obj != NULL) { 904 if (allow_double && (need_double || is_time)) { 905 if (!is_time) { 906 obj->type = UCL_FLOAT; 907 } 908 else { 909 obj->type = UCL_TIME; 910 } 911 obj->value.dv = is_neg ? (-dv) : dv; 912 } 913 else { 914 obj->type = UCL_INT; 915 obj->value.iv = is_neg ? (-lv) : lv; 916 } 917 } 918 *pos = p; 919 return 0; 920 } 921 922 /** 923 * Parse possible number 924 * @param parser 925 * @param chunk 926 * @param obj 927 * @return true if a number has been parsed 928 */ 929 static bool 930 ucl_lex_number (struct ucl_parser *parser, 931 struct ucl_chunk *chunk, ucl_object_t *obj) 932 { 933 const unsigned char *pos; 934 int ret; 935 936 ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos, 937 true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0)); 938 939 if (ret == 0) { 940 chunk->remain -= pos - chunk->pos; 941 chunk->column += pos - chunk->pos; 942 chunk->pos = pos; 943 return true; 944 } 945 else if (ret == ERANGE) { 946 ucl_set_err (parser, UCL_ESYNTAX, "numeric value out of range", 947 &parser->err); 948 } 949 950 return false; 951 } 952 953 /** 954 * Parse quoted string with possible escapes 955 * @param parser 956 * @param chunk 957 * @param need_unescape 958 * @param ucl_escape 959 * @param var_expand 960 * @return true if a string has been parsed 961 */ 962 static bool 963 ucl_lex_json_string (struct ucl_parser *parser, 964 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand) 965 { 966 const unsigned char *p = chunk->pos; 967 unsigned char c; 968 int i; 969 970 while (p < chunk->end) { 971 c = *p; 972 if (c < 0x1F) { 973 /* Unmasked control character */ 974 if (c == '\n') { 975 ucl_set_err (parser, UCL_ESYNTAX, "unexpected newline", 976 &parser->err); 977 } 978 else { 979 ucl_set_err (parser, UCL_ESYNTAX, "unexpected control character", 980 &parser->err); 981 } 982 return false; 983 } 984 else if (c == '\\') { 985 ucl_chunk_skipc (chunk, p); 986 c = *p; 987 if (p >= chunk->end) { 988 ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character", 989 &parser->err); 990 return false; 991 } 992 else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) { 993 if (c == 'u') { 994 ucl_chunk_skipc (chunk, p); 995 for (i = 0; i < 4 && p < chunk->end; i ++) { 996 if (!isxdigit (*p)) { 997 ucl_set_err (parser, UCL_ESYNTAX, "invalid utf escape", 998 &parser->err); 999 return false; 1000 } 1001 ucl_chunk_skipc (chunk, p); 1002 } 1003 if (p >= chunk->end) { 1004 ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character", 1005 &parser->err); 1006 return false; 1007 } 1008 } 1009 else { 1010 ucl_chunk_skipc (chunk, p); 1011 } 1012 } 1013 *need_unescape = true; 1014 *ucl_escape = true; 1015 continue; 1016 } 1017 else if (c == '"') { 1018 ucl_chunk_skipc (chunk, p); 1019 return true; 1020 } 1021 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) { 1022 *ucl_escape = true; 1023 } 1024 else if (c == '$') { 1025 *var_expand = true; 1026 } 1027 ucl_chunk_skipc (chunk, p); 1028 } 1029 1030 ucl_set_err (parser, UCL_ESYNTAX, "no quote at the end of json string", 1031 &parser->err); 1032 return false; 1033 } 1034 1035 static void 1036 ucl_parser_append_elt (struct ucl_parser *parser, ucl_hash_t *cont, 1037 ucl_object_t *top, 1038 ucl_object_t *elt) 1039 { 1040 ucl_object_t *nobj; 1041 1042 if ((parser->flags & UCL_PARSER_NO_IMPLICIT_ARRAYS) == 0) { 1043 /* Implicit array */ 1044 top->flags |= UCL_OBJECT_MULTIVALUE; 1045 DL_APPEND (top, elt); 1046 parser->stack->obj->len ++; 1047 } 1048 else { 1049 if ((top->flags & UCL_OBJECT_MULTIVALUE) != 0) { 1050 /* Just add to the explicit array */ 1051 ucl_array_append (top, elt); 1052 } 1053 else { 1054 /* Convert to an array */ 1055 nobj = ucl_object_typed_new (UCL_ARRAY); 1056 nobj->key = top->key; 1057 nobj->keylen = top->keylen; 1058 nobj->flags |= UCL_OBJECT_MULTIVALUE; 1059 ucl_array_append (nobj, top); 1060 ucl_array_append (nobj, elt); 1061 ucl_hash_replace (cont, top, nobj); 1062 } 1063 } 1064 } 1065 1066 bool 1067 ucl_parser_process_object_element (struct ucl_parser *parser, ucl_object_t *nobj) 1068 { 1069 ucl_hash_t *container; 1070 ucl_object_t *tobj; 1071 char errmsg[256]; 1072 1073 container = parser->stack->obj->value.ov; 1074 1075 tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj)); 1076 if (tobj == NULL) { 1077 container = ucl_hash_insert_object (container, nobj, 1078 parser->flags & UCL_PARSER_KEY_LOWERCASE); 1079 nobj->prev = nobj; 1080 nobj->next = NULL; 1081 parser->stack->obj->len ++; 1082 } 1083 else { 1084 unsigned priold = ucl_object_get_priority (tobj), 1085 prinew = ucl_object_get_priority (nobj); 1086 switch (parser->chunks->strategy) { 1087 1088 case UCL_DUPLICATE_APPEND: 1089 /* 1090 * The logic here is the following: 1091 * 1092 * - if we have two objects with the same priority, then we form an 1093 * implicit or explicit array 1094 * - if a new object has bigger priority, then we overwrite an old one 1095 * - if a new object has lower priority, then we ignore it 1096 */ 1097 1098 1099 /* Special case for inherited objects */ 1100 if (tobj->flags & UCL_OBJECT_INHERITED) { 1101 prinew = priold + 1; 1102 } 1103 1104 if (priold == prinew) { 1105 ucl_parser_append_elt (parser, container, tobj, nobj); 1106 } 1107 else if (priold > prinew) { 1108 /* 1109 * We add this new object to a list of trash objects just to ensure 1110 * that it won't come to any real object 1111 * XXX: rather inefficient approach 1112 */ 1113 DL_APPEND (parser->trash_objs, nobj); 1114 } 1115 else { 1116 ucl_hash_replace (container, tobj, nobj); 1117 ucl_object_unref (tobj); 1118 } 1119 1120 break; 1121 1122 case UCL_DUPLICATE_REWRITE: 1123 /* We just rewrite old values regardless of priority */ 1124 ucl_hash_replace (container, tobj, nobj); 1125 ucl_object_unref (tobj); 1126 1127 break; 1128 1129 case UCL_DUPLICATE_ERROR: 1130 snprintf(errmsg, sizeof(errmsg), 1131 "duplicate element for key '%s' found", 1132 nobj->key); 1133 ucl_set_err (parser, UCL_EMERGE, errmsg, &parser->err); 1134 return false; 1135 1136 case UCL_DUPLICATE_MERGE: 1137 /* 1138 * Here we do have some old object so we just push it on top of objects stack 1139 * Check priority and then perform the merge on the remaining objects 1140 */ 1141 if (tobj->type == UCL_OBJECT || tobj->type == UCL_ARRAY) { 1142 ucl_object_unref (nobj); 1143 nobj = tobj; 1144 } 1145 else if (priold == prinew) { 1146 ucl_parser_append_elt (parser, container, tobj, nobj); 1147 } 1148 else if (priold > prinew) { 1149 /* 1150 * We add this new object to a list of trash objects just to ensure 1151 * that it won't come to any real object 1152 * XXX: rather inefficient approach 1153 */ 1154 DL_APPEND (parser->trash_objs, nobj); 1155 } 1156 else { 1157 ucl_hash_replace (container, tobj, nobj); 1158 ucl_object_unref (tobj); 1159 } 1160 break; 1161 } 1162 } 1163 1164 parser->stack->obj->value.ov = container; 1165 parser->cur_obj = nobj; 1166 ucl_attach_comment (parser, nobj, false); 1167 1168 return true; 1169 } 1170 1171 /** 1172 * Parse a key in an object 1173 * @param parser 1174 * @param chunk 1175 * @param next_key 1176 * @param end_of_object 1177 * @return true if a key has been parsed 1178 */ 1179 static bool 1180 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, 1181 bool *next_key, bool *end_of_object) 1182 { 1183 const unsigned char *p, *c = NULL, *end, *t; 1184 const char *key = NULL; 1185 bool got_quote = false, got_eq = false, got_semicolon = false, 1186 need_unescape = false, ucl_escape = false, var_expand = false, 1187 got_content = false, got_sep = false; 1188 ucl_object_t *nobj; 1189 ssize_t keylen; 1190 1191 p = chunk->pos; 1192 1193 if (*p == '.') { 1194 /* It is macro actually */ 1195 if (!(parser->flags & UCL_PARSER_DISABLE_MACRO)) { 1196 ucl_chunk_skipc (chunk, p); 1197 } 1198 1199 parser->prev_state = parser->state; 1200 parser->state = UCL_STATE_MACRO_NAME; 1201 *end_of_object = false; 1202 return true; 1203 } 1204 while (p < chunk->end) { 1205 /* 1206 * A key must start with alpha, number, '/' or '_' and end with space character 1207 */ 1208 if (c == NULL) { 1209 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1210 if (!ucl_skip_comments (parser)) { 1211 return false; 1212 } 1213 p = chunk->pos; 1214 } 1215 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1216 ucl_chunk_skipc (chunk, p); 1217 } 1218 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) { 1219 /* The first symbol */ 1220 c = p; 1221 ucl_chunk_skipc (chunk, p); 1222 got_content = true; 1223 } 1224 else if (*p == '"') { 1225 /* JSON style key */ 1226 c = p + 1; 1227 got_quote = true; 1228 got_content = true; 1229 ucl_chunk_skipc (chunk, p); 1230 } 1231 else if (*p == '}') { 1232 /* We have actually end of an object */ 1233 *end_of_object = true; 1234 return true; 1235 } 1236 else if (*p == '.') { 1237 ucl_chunk_skipc (chunk, p); 1238 parser->prev_state = parser->state; 1239 parser->state = UCL_STATE_MACRO_NAME; 1240 return true; 1241 } 1242 else { 1243 /* Invalid identifier */ 1244 ucl_set_err (parser, UCL_ESYNTAX, "key must begin with a letter", 1245 &parser->err); 1246 return false; 1247 } 1248 } 1249 else { 1250 /* Parse the body of a key */ 1251 if (!got_quote) { 1252 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) { 1253 got_content = true; 1254 ucl_chunk_skipc (chunk, p); 1255 } 1256 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) { 1257 end = p; 1258 break; 1259 } 1260 else { 1261 ucl_set_err (parser, UCL_ESYNTAX, "invalid character in a key", 1262 &parser->err); 1263 return false; 1264 } 1265 } 1266 else { 1267 /* We need to parse json like quoted string */ 1268 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1269 return false; 1270 } 1271 /* Always escape keys obtained via json */ 1272 end = chunk->pos - 1; 1273 p = chunk->pos; 1274 break; 1275 } 1276 } 1277 } 1278 1279 if (p >= chunk->end && got_content) { 1280 ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err); 1281 return false; 1282 } 1283 else if (!got_content) { 1284 return true; 1285 } 1286 *end_of_object = false; 1287 /* We are now at the end of the key, need to parse the rest */ 1288 while (p < chunk->end) { 1289 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1290 ucl_chunk_skipc (chunk, p); 1291 } 1292 else if (*p == '=') { 1293 if (!got_eq && !got_semicolon) { 1294 ucl_chunk_skipc (chunk, p); 1295 got_eq = true; 1296 } 1297 else { 1298 ucl_set_err (parser, UCL_ESYNTAX, "unexpected '=' character", 1299 &parser->err); 1300 return false; 1301 } 1302 } 1303 else if (*p == ':') { 1304 if (!got_eq && !got_semicolon) { 1305 ucl_chunk_skipc (chunk, p); 1306 got_semicolon = true; 1307 } 1308 else { 1309 ucl_set_err (parser, UCL_ESYNTAX, "unexpected ':' character", 1310 &parser->err); 1311 return false; 1312 } 1313 } 1314 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1315 /* Check for comment */ 1316 if (!ucl_skip_comments (parser)) { 1317 return false; 1318 } 1319 p = chunk->pos; 1320 } 1321 else { 1322 /* Start value */ 1323 break; 1324 } 1325 } 1326 1327 if (p >= chunk->end && got_content) { 1328 ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err); 1329 return false; 1330 } 1331 1332 got_sep = got_semicolon || got_eq; 1333 1334 if (!got_sep) { 1335 /* 1336 * Maybe we have more keys nested, so search for termination character. 1337 * Possible choices: 1338 * 1) key1 key2 ... keyN [:=] value <- we treat that as error 1339 * 2) key1 ... keyN {} or [] <- we treat that as nested objects 1340 * 3) key1 value[;,\n] <- we treat that as linear object 1341 */ 1342 t = p; 1343 *next_key = false; 1344 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) { 1345 t ++; 1346 } 1347 /* Check first non-space character after a key */ 1348 if (*t != '{' && *t != '[') { 1349 while (t < chunk->end) { 1350 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') { 1351 break; 1352 } 1353 else if (*t == '{' || *t == '[') { 1354 *next_key = true; 1355 break; 1356 } 1357 t ++; 1358 } 1359 } 1360 } 1361 1362 /* Create a new object */ 1363 nobj = ucl_object_new_full (UCL_NULL, parser->chunks->priority); 1364 keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY], 1365 &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false); 1366 if (keylen == -1) { 1367 ucl_object_unref (nobj); 1368 return false; 1369 } 1370 else if (keylen == 0) { 1371 ucl_set_err (parser, UCL_ESYNTAX, "empty keys are not allowed", &parser->err); 1372 ucl_object_unref (nobj); 1373 return false; 1374 } 1375 1376 nobj->key = key; 1377 nobj->keylen = keylen; 1378 1379 if (!ucl_parser_process_object_element (parser, nobj)) { 1380 return false; 1381 } 1382 1383 if (ucl_escape) { 1384 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE; 1385 } 1386 1387 1388 return true; 1389 } 1390 1391 /** 1392 * Parse a cl string 1393 * @param parser 1394 * @param chunk 1395 * @param var_expand 1396 * @param need_unescape 1397 * @return true if a key has been parsed 1398 */ 1399 static bool 1400 ucl_parse_string_value (struct ucl_parser *parser, 1401 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape) 1402 { 1403 const unsigned char *p; 1404 enum { 1405 UCL_BRACE_ROUND = 0, 1406 UCL_BRACE_SQUARE, 1407 UCL_BRACE_FIGURE 1408 }; 1409 int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}}; 1410 1411 p = chunk->pos; 1412 1413 while (p < chunk->end) { 1414 1415 /* Skip pairs of figure braces */ 1416 if (*p == '{') { 1417 braces[UCL_BRACE_FIGURE][0] ++; 1418 } 1419 else if (*p == '}') { 1420 braces[UCL_BRACE_FIGURE][1] ++; 1421 if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) { 1422 /* This is not a termination symbol, continue */ 1423 ucl_chunk_skipc (chunk, p); 1424 continue; 1425 } 1426 } 1427 /* Skip pairs of square braces */ 1428 else if (*p == '[') { 1429 braces[UCL_BRACE_SQUARE][0] ++; 1430 } 1431 else if (*p == ']') { 1432 braces[UCL_BRACE_SQUARE][1] ++; 1433 if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) { 1434 /* This is not a termination symbol, continue */ 1435 ucl_chunk_skipc (chunk, p); 1436 continue; 1437 } 1438 } 1439 else if (*p == '$') { 1440 *var_expand = true; 1441 } 1442 else if (*p == '\\') { 1443 *need_unescape = true; 1444 ucl_chunk_skipc (chunk, p); 1445 if (p < chunk->end) { 1446 ucl_chunk_skipc (chunk, p); 1447 } 1448 continue; 1449 } 1450 1451 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { 1452 break; 1453 } 1454 ucl_chunk_skipc (chunk, p); 1455 } 1456 1457 return true; 1458 } 1459 1460 /** 1461 * Parse multiline string ending with \n{term}\n 1462 * @param parser 1463 * @param chunk 1464 * @param term 1465 * @param term_len 1466 * @param beg 1467 * @param var_expand 1468 * @return size of multiline string or 0 in case of error 1469 */ 1470 static int 1471 ucl_parse_multiline_string (struct ucl_parser *parser, 1472 struct ucl_chunk *chunk, const unsigned char *term, 1473 int term_len, unsigned char const **beg, 1474 bool *var_expand) 1475 { 1476 const unsigned char *p, *c, *tend; 1477 bool newline = false; 1478 int len = 0; 1479 1480 p = chunk->pos; 1481 1482 c = p; 1483 1484 while (p < chunk->end) { 1485 if (newline) { 1486 if (chunk->end - p < term_len) { 1487 return 0; 1488 } 1489 else if (memcmp (p, term, term_len) == 0) { 1490 tend = p + term_len; 1491 if (*tend != '\n' && *tend != ';' && *tend != ',') { 1492 /* Incomplete terminator */ 1493 ucl_chunk_skipc (chunk, p); 1494 continue; 1495 } 1496 len = p - c; 1497 chunk->remain -= term_len; 1498 chunk->pos = p + term_len; 1499 chunk->column = term_len; 1500 *beg = c; 1501 break; 1502 } 1503 } 1504 if (*p == '\n') { 1505 newline = true; 1506 } 1507 else { 1508 if (*p == '$') { 1509 *var_expand = true; 1510 } 1511 newline = false; 1512 } 1513 ucl_chunk_skipc (chunk, p); 1514 } 1515 1516 return len; 1517 } 1518 1519 static inline ucl_object_t* 1520 ucl_parser_get_container (struct ucl_parser *parser) 1521 { 1522 ucl_object_t *t, *obj = NULL; 1523 1524 if (parser == NULL || parser->stack == NULL || parser->stack->obj == NULL) { 1525 return NULL; 1526 } 1527 1528 if (parser->stack->obj->type == UCL_ARRAY) { 1529 /* Object must be allocated */ 1530 obj = ucl_object_new_full (UCL_NULL, parser->chunks->priority); 1531 t = parser->stack->obj; 1532 1533 if (!ucl_array_append (t, obj)) { 1534 ucl_object_unref (obj); 1535 return NULL; 1536 } 1537 1538 parser->cur_obj = obj; 1539 ucl_attach_comment (parser, obj, false); 1540 } 1541 else { 1542 /* Object has been already allocated */ 1543 obj = parser->cur_obj; 1544 } 1545 1546 return obj; 1547 } 1548 1549 /** 1550 * Handle value data 1551 * @param parser 1552 * @param chunk 1553 * @return 1554 */ 1555 static bool 1556 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) 1557 { 1558 const unsigned char *p, *c; 1559 ucl_object_t *obj = NULL; 1560 unsigned int stripped_spaces; 1561 int str_len; 1562 bool need_unescape = false, ucl_escape = false, var_expand = false; 1563 1564 p = chunk->pos; 1565 1566 /* Skip any spaces and comments */ 1567 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) || 1568 (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { 1569 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1570 ucl_chunk_skipc (chunk, p); 1571 } 1572 if (!ucl_skip_comments (parser)) { 1573 return false; 1574 } 1575 p = chunk->pos; 1576 } 1577 1578 while (p < chunk->end) { 1579 c = p; 1580 switch (*p) { 1581 case '"': 1582 ucl_chunk_skipc (chunk, p); 1583 1584 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, 1585 &var_expand)) { 1586 return false; 1587 } 1588 1589 obj = ucl_parser_get_container (parser); 1590 if (!obj) { 1591 return false; 1592 } 1593 1594 str_len = chunk->pos - c - 2; 1595 obj->type = UCL_STRING; 1596 if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, 1597 &obj->trash_stack[UCL_TRASH_VALUE], 1598 &obj->value.sv, str_len, need_unescape, false, 1599 var_expand)) == -1) { 1600 return false; 1601 } 1602 obj->len = str_len; 1603 1604 parser->state = UCL_STATE_AFTER_VALUE; 1605 p = chunk->pos; 1606 1607 return true; 1608 break; 1609 case '{': 1610 obj = ucl_parser_get_container (parser); 1611 /* We have a new object */ 1612 obj = ucl_parser_add_container (obj, parser, false, parser->stack->level); 1613 if (obj == NULL) { 1614 return false; 1615 } 1616 1617 ucl_chunk_skipc (chunk, p); 1618 1619 return true; 1620 break; 1621 case '[': 1622 obj = ucl_parser_get_container (parser); 1623 /* We have a new array */ 1624 obj = ucl_parser_add_container (obj, parser, true, parser->stack->level); 1625 if (obj == NULL) { 1626 return false; 1627 } 1628 1629 ucl_chunk_skipc (chunk, p); 1630 1631 return true; 1632 break; 1633 case ']': 1634 /* We have the array ending */ 1635 if (parser->stack && parser->stack->obj->type == UCL_ARRAY) { 1636 parser->state = UCL_STATE_AFTER_VALUE; 1637 return true; 1638 } 1639 else { 1640 goto parse_string; 1641 } 1642 break; 1643 case '<': 1644 obj = ucl_parser_get_container (parser); 1645 /* We have something like multiline value, which must be <<[A-Z]+\n */ 1646 if (chunk->end - p > 3) { 1647 if (memcmp (p, "<<", 2) == 0) { 1648 p += 2; 1649 /* We allow only uppercase characters in multiline definitions */ 1650 while (p < chunk->end && *p >= 'A' && *p <= 'Z') { 1651 p ++; 1652 } 1653 if (*p =='\n') { 1654 /* Set chunk positions and start multiline parsing */ 1655 c += 2; 1656 chunk->remain -= p - c; 1657 chunk->pos = p + 1; 1658 chunk->column = 0; 1659 chunk->line ++; 1660 if ((str_len = ucl_parse_multiline_string (parser, chunk, c, 1661 p - c, &c, &var_expand)) == 0) { 1662 ucl_set_err (parser, UCL_ESYNTAX, 1663 "unterminated multiline value", &parser->err); 1664 return false; 1665 } 1666 1667 obj->type = UCL_STRING; 1668 obj->flags |= UCL_OBJECT_MULTILINE; 1669 if ((str_len = ucl_copy_or_store_ptr (parser, c, 1670 &obj->trash_stack[UCL_TRASH_VALUE], 1671 &obj->value.sv, str_len - 1, false, 1672 false, var_expand)) == -1) { 1673 return false; 1674 } 1675 obj->len = str_len; 1676 1677 parser->state = UCL_STATE_AFTER_VALUE; 1678 1679 return true; 1680 } 1681 } 1682 } 1683 /* Fallback to ordinary strings */ 1684 default: 1685 parse_string: 1686 if (obj == NULL) { 1687 obj = ucl_parser_get_container (parser); 1688 } 1689 1690 /* Parse atom */ 1691 if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) { 1692 if (!ucl_lex_number (parser, chunk, obj)) { 1693 if (parser->state == UCL_STATE_ERROR) { 1694 return false; 1695 } 1696 } 1697 else { 1698 parser->state = UCL_STATE_AFTER_VALUE; 1699 return true; 1700 } 1701 /* Fallback to normal string */ 1702 } 1703 1704 if (!ucl_parse_string_value (parser, chunk, &var_expand, 1705 &need_unescape)) { 1706 return false; 1707 } 1708 /* Cut trailing spaces */ 1709 stripped_spaces = 0; 1710 while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces), 1711 UCL_CHARACTER_WHITESPACE)) { 1712 stripped_spaces ++; 1713 } 1714 str_len = chunk->pos - c - stripped_spaces; 1715 if (str_len <= 0) { 1716 ucl_set_err (parser, UCL_ESYNTAX, "string value must not be empty", 1717 &parser->err); 1718 return false; 1719 } 1720 else if (str_len == 4 && memcmp (c, "null", 4) == 0) { 1721 obj->len = 0; 1722 obj->type = UCL_NULL; 1723 } 1724 else if (!ucl_maybe_parse_boolean (obj, c, str_len)) { 1725 obj->type = UCL_STRING; 1726 if ((str_len = ucl_copy_or_store_ptr (parser, c, 1727 &obj->trash_stack[UCL_TRASH_VALUE], 1728 &obj->value.sv, str_len, need_unescape, 1729 false, var_expand)) == -1) { 1730 return false; 1731 } 1732 obj->len = str_len; 1733 } 1734 parser->state = UCL_STATE_AFTER_VALUE; 1735 p = chunk->pos; 1736 1737 return true; 1738 break; 1739 } 1740 } 1741 1742 return true; 1743 } 1744 1745 /** 1746 * Handle after value data 1747 * @param parser 1748 * @param chunk 1749 * @return 1750 */ 1751 static bool 1752 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk) 1753 { 1754 const unsigned char *p; 1755 bool got_sep = false; 1756 struct ucl_stack *st; 1757 1758 p = chunk->pos; 1759 1760 while (p < chunk->end) { 1761 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1762 /* Skip whitespaces */ 1763 ucl_chunk_skipc (chunk, p); 1764 } 1765 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1766 /* Skip comment */ 1767 if (!ucl_skip_comments (parser)) { 1768 return false; 1769 } 1770 /* Treat comment as a separator */ 1771 got_sep = true; 1772 p = chunk->pos; 1773 } 1774 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) { 1775 if (*p == '}' || *p == ']') { 1776 if (parser->stack == NULL) { 1777 ucl_set_err (parser, UCL_ESYNTAX, 1778 "end of array or object detected without corresponding start", 1779 &parser->err); 1780 return false; 1781 } 1782 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) || 1783 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) { 1784 1785 /* Pop all nested objects from a stack */ 1786 st = parser->stack; 1787 parser->stack = st->next; 1788 UCL_FREE (sizeof (struct ucl_stack), st); 1789 1790 if (parser->cur_obj) { 1791 ucl_attach_comment (parser, parser->cur_obj, true); 1792 } 1793 1794 while (parser->stack != NULL) { 1795 st = parser->stack; 1796 1797 if (st->next == NULL || st->next->level == st->level) { 1798 break; 1799 } 1800 1801 parser->stack = st->next; 1802 parser->cur_obj = st->obj; 1803 UCL_FREE (sizeof (struct ucl_stack), st); 1804 } 1805 } 1806 else { 1807 ucl_set_err (parser, UCL_ESYNTAX, 1808 "unexpected terminating symbol detected", 1809 &parser->err); 1810 return false; 1811 } 1812 1813 if (parser->stack == NULL) { 1814 /* Ignore everything after a top object */ 1815 return true; 1816 } 1817 else { 1818 ucl_chunk_skipc (chunk, p); 1819 } 1820 got_sep = true; 1821 } 1822 else { 1823 /* Got a separator */ 1824 got_sep = true; 1825 ucl_chunk_skipc (chunk, p); 1826 } 1827 } 1828 else { 1829 /* Anything else */ 1830 if (!got_sep) { 1831 ucl_set_err (parser, UCL_ESYNTAX, "delimiter is missing", 1832 &parser->err); 1833 return false; 1834 } 1835 return true; 1836 } 1837 } 1838 1839 return true; 1840 } 1841 1842 static bool 1843 ucl_skip_macro_as_comment (struct ucl_parser *parser, 1844 struct ucl_chunk *chunk) 1845 { 1846 const unsigned char *p, *c; 1847 enum { 1848 macro_skip_start = 0, 1849 macro_has_symbols, 1850 macro_has_obrace, 1851 macro_has_quote, 1852 macro_has_backslash, 1853 macro_has_sqbrace, 1854 macro_save 1855 } state = macro_skip_start, prev_state = macro_skip_start; 1856 1857 p = chunk->pos; 1858 c = chunk->pos; 1859 1860 while (p < chunk->end) { 1861 switch (state) { 1862 case macro_skip_start: 1863 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1864 state = macro_has_symbols; 1865 } 1866 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1867 state = macro_save; 1868 continue; 1869 } 1870 1871 ucl_chunk_skipc (chunk, p); 1872 break; 1873 1874 case macro_has_symbols: 1875 if (*p == '{') { 1876 state = macro_has_sqbrace; 1877 } 1878 else if (*p == '(') { 1879 state = macro_has_obrace; 1880 } 1881 else if (*p == '"') { 1882 state = macro_has_quote; 1883 } 1884 else if (*p == '\n') { 1885 state = macro_save; 1886 continue; 1887 } 1888 1889 ucl_chunk_skipc (chunk, p); 1890 break; 1891 1892 case macro_has_obrace: 1893 if (*p == '\\') { 1894 prev_state = state; 1895 state = macro_has_backslash; 1896 } 1897 else if (*p == ')') { 1898 state = macro_has_symbols; 1899 } 1900 1901 ucl_chunk_skipc (chunk, p); 1902 break; 1903 1904 case macro_has_sqbrace: 1905 if (*p == '\\') { 1906 prev_state = state; 1907 state = macro_has_backslash; 1908 } 1909 else if (*p == '}') { 1910 state = macro_save; 1911 } 1912 1913 ucl_chunk_skipc (chunk, p); 1914 break; 1915 1916 case macro_has_quote: 1917 if (*p == '\\') { 1918 prev_state = state; 1919 state = macro_has_backslash; 1920 } 1921 else if (*p == '"') { 1922 state = macro_save; 1923 } 1924 1925 ucl_chunk_skipc (chunk, p); 1926 break; 1927 1928 case macro_has_backslash: 1929 state = prev_state; 1930 ucl_chunk_skipc (chunk, p); 1931 break; 1932 1933 case macro_save: 1934 if (parser->flags & UCL_PARSER_SAVE_COMMENTS) { 1935 ucl_save_comment (parser, c, p - c); 1936 } 1937 1938 return true; 1939 } 1940 } 1941 1942 return false; 1943 } 1944 1945 /** 1946 * Handle macro data 1947 * @param parser 1948 * @param chunk 1949 * @param marco 1950 * @param macro_start 1951 * @param macro_len 1952 * @return 1953 */ 1954 static bool 1955 ucl_parse_macro_value (struct ucl_parser *parser, 1956 struct ucl_chunk *chunk, struct ucl_macro *macro, 1957 unsigned char const **macro_start, size_t *macro_len) 1958 { 1959 const unsigned char *p, *c; 1960 bool need_unescape = false, ucl_escape = false, var_expand = false; 1961 1962 p = chunk->pos; 1963 1964 switch (*p) { 1965 case '"': 1966 /* We have macro value encoded in quotes */ 1967 c = p; 1968 ucl_chunk_skipc (chunk, p); 1969 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1970 return false; 1971 } 1972 1973 *macro_start = c + 1; 1974 *macro_len = chunk->pos - c - 2; 1975 p = chunk->pos; 1976 break; 1977 case '{': 1978 /* We got a multiline macro body */ 1979 ucl_chunk_skipc (chunk, p); 1980 /* Skip spaces at the beginning */ 1981 while (p < chunk->end) { 1982 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1983 ucl_chunk_skipc (chunk, p); 1984 } 1985 else { 1986 break; 1987 } 1988 } 1989 c = p; 1990 while (p < chunk->end) { 1991 if (*p == '}') { 1992 break; 1993 } 1994 ucl_chunk_skipc (chunk, p); 1995 } 1996 *macro_start = c; 1997 *macro_len = p - c; 1998 ucl_chunk_skipc (chunk, p); 1999 break; 2000 default: 2001 /* Macro is not enclosed in quotes or braces */ 2002 c = p; 2003 while (p < chunk->end) { 2004 if (ucl_lex_is_atom_end (*p)) { 2005 break; 2006 } 2007 ucl_chunk_skipc (chunk, p); 2008 } 2009 *macro_start = c; 2010 *macro_len = p - c; 2011 break; 2012 } 2013 2014 /* We are at the end of a macro */ 2015 /* Skip ';' and space characters and return to previous state */ 2016 while (p < chunk->end) { 2017 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') { 2018 break; 2019 } 2020 ucl_chunk_skipc (chunk, p); 2021 } 2022 return true; 2023 } 2024 2025 /** 2026 * Parse macro arguments as UCL object 2027 * @param parser parser structure 2028 * @param chunk the current data chunk 2029 * @return 2030 */ 2031 static ucl_object_t * 2032 ucl_parse_macro_arguments (struct ucl_parser *parser, 2033 struct ucl_chunk *chunk) 2034 { 2035 ucl_object_t *res = NULL; 2036 struct ucl_parser *params_parser; 2037 int obraces = 1, ebraces = 0, state = 0; 2038 const unsigned char *p, *c; 2039 size_t args_len = 0; 2040 struct ucl_parser_saved_state saved; 2041 2042 saved.column = chunk->column; 2043 saved.line = chunk->line; 2044 saved.pos = chunk->pos; 2045 saved.remain = chunk->remain; 2046 p = chunk->pos; 2047 2048 if (*p != '(' || chunk->remain < 2) { 2049 return NULL; 2050 } 2051 2052 /* Set begin and start */ 2053 ucl_chunk_skipc (chunk, p); 2054 c = p; 2055 2056 while ((p) < (chunk)->end) { 2057 switch (state) { 2058 case 0: 2059 /* Parse symbols and check for '(', ')' and '"' */ 2060 if (*p == '(') { 2061 obraces ++; 2062 } 2063 else if (*p == ')') { 2064 ebraces ++; 2065 } 2066 else if (*p == '"') { 2067 state = 1; 2068 } 2069 /* Check pairing */ 2070 if (obraces == ebraces) { 2071 state = 99; 2072 } 2073 else { 2074 args_len ++; 2075 } 2076 /* Check overflow */ 2077 if (chunk->remain == 0) { 2078 goto restore_chunk; 2079 } 2080 ucl_chunk_skipc (chunk, p); 2081 break; 2082 case 1: 2083 /* We have quote character, so skip all but quotes */ 2084 if (*p == '"' && *(p - 1) != '\\') { 2085 state = 0; 2086 } 2087 if (chunk->remain == 0) { 2088 goto restore_chunk; 2089 } 2090 args_len ++; 2091 ucl_chunk_skipc (chunk, p); 2092 break; 2093 case 99: 2094 /* 2095 * We have read the full body of arguments, so we need to parse and set 2096 * object from that 2097 */ 2098 params_parser = ucl_parser_new (parser->flags); 2099 if (!ucl_parser_add_chunk (params_parser, c, args_len)) { 2100 ucl_set_err (parser, UCL_ESYNTAX, "macro arguments parsing error", 2101 &parser->err); 2102 } 2103 else { 2104 res = ucl_parser_get_object (params_parser); 2105 } 2106 ucl_parser_free (params_parser); 2107 2108 return res; 2109 2110 break; 2111 } 2112 } 2113 2114 return res; 2115 2116 restore_chunk: 2117 chunk->column = saved.column; 2118 chunk->line = saved.line; 2119 chunk->pos = saved.pos; 2120 chunk->remain = saved.remain; 2121 2122 return NULL; 2123 } 2124 2125 #define SKIP_SPACES_COMMENTS(parser, chunk, p) do { \ 2126 while ((p) < (chunk)->end) { \ 2127 if (!ucl_test_character (*(p), UCL_CHARACTER_WHITESPACE_UNSAFE)) { \ 2128 if ((chunk)->remain >= 2 && ucl_lex_is_comment ((p)[0], (p)[1])) { \ 2129 if (!ucl_skip_comments (parser)) { \ 2130 return false; \ 2131 } \ 2132 p = (chunk)->pos; \ 2133 } \ 2134 break; \ 2135 } \ 2136 ucl_chunk_skipc (chunk, p); \ 2137 } \ 2138 } while(0) 2139 2140 /** 2141 * Handle the main states of rcl parser 2142 * @param parser parser structure 2143 * @return true if chunk has been parsed and false in case of error 2144 */ 2145 static bool 2146 ucl_state_machine (struct ucl_parser *parser) 2147 { 2148 ucl_object_t *obj, *macro_args; 2149 struct ucl_chunk *chunk = parser->chunks; 2150 const unsigned char *p, *c = NULL, *macro_start = NULL; 2151 unsigned char *macro_escaped; 2152 size_t macro_len = 0; 2153 struct ucl_macro *macro = NULL; 2154 bool next_key = false, end_of_object = false, ret; 2155 2156 if (parser->top_obj == NULL) { 2157 parser->state = UCL_STATE_INIT; 2158 } 2159 2160 p = chunk->pos; 2161 while (chunk->pos < chunk->end) { 2162 switch (parser->state) { 2163 case UCL_STATE_INIT: 2164 /* 2165 * At the init state we can either go to the parse array or object 2166 * if we got [ or { correspondingly or can just treat new data as 2167 * a key of newly created object 2168 */ 2169 if (!ucl_skip_comments (parser)) { 2170 parser->prev_state = parser->state; 2171 parser->state = UCL_STATE_ERROR; 2172 return false; 2173 } 2174 else { 2175 /* Skip any spaces */ 2176 while (p < chunk->end && ucl_test_character (*p, 2177 UCL_CHARACTER_WHITESPACE_UNSAFE)) { 2178 ucl_chunk_skipc (chunk, p); 2179 } 2180 2181 p = chunk->pos; 2182 2183 if (*p == '[') { 2184 parser->state = UCL_STATE_VALUE; 2185 ucl_chunk_skipc (chunk, p); 2186 } 2187 else { 2188 parser->state = UCL_STATE_KEY; 2189 if (*p == '{') { 2190 ucl_chunk_skipc (chunk, p); 2191 } 2192 } 2193 2194 if (parser->top_obj == NULL) { 2195 if (parser->state == UCL_STATE_VALUE) { 2196 obj = ucl_parser_add_container (NULL, parser, true, 0); 2197 } 2198 else { 2199 obj = ucl_parser_add_container (NULL, parser, false, 0); 2200 } 2201 2202 if (obj == NULL) { 2203 return false; 2204 } 2205 2206 parser->top_obj = obj; 2207 parser->cur_obj = obj; 2208 } 2209 2210 } 2211 break; 2212 case UCL_STATE_KEY: 2213 /* Skip any spaces */ 2214 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 2215 ucl_chunk_skipc (chunk, p); 2216 } 2217 if (p == chunk->end || *p == '}') { 2218 /* We have the end of an object */ 2219 parser->state = UCL_STATE_AFTER_VALUE; 2220 continue; 2221 } 2222 if (parser->stack == NULL) { 2223 /* No objects are on stack, but we want to parse a key */ 2224 ucl_set_err (parser, UCL_ESYNTAX, "top object is finished but the parser " 2225 "expects a key", &parser->err); 2226 parser->prev_state = parser->state; 2227 parser->state = UCL_STATE_ERROR; 2228 return false; 2229 } 2230 if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) { 2231 parser->prev_state = parser->state; 2232 parser->state = UCL_STATE_ERROR; 2233 return false; 2234 } 2235 if (end_of_object) { 2236 p = chunk->pos; 2237 parser->state = UCL_STATE_AFTER_VALUE; 2238 continue; 2239 } 2240 else if (parser->state != UCL_STATE_MACRO_NAME) { 2241 if (next_key && parser->stack->obj->type == UCL_OBJECT) { 2242 /* Parse more keys and nest objects accordingly */ 2243 obj = ucl_parser_add_container (parser->cur_obj, parser, false, 2244 parser->stack->level + 1); 2245 if (obj == NULL) { 2246 return false; 2247 } 2248 } 2249 else { 2250 parser->state = UCL_STATE_VALUE; 2251 } 2252 } 2253 else { 2254 c = chunk->pos; 2255 } 2256 p = chunk->pos; 2257 break; 2258 case UCL_STATE_VALUE: 2259 /* We need to check what we do have */ 2260 if (!parser->cur_obj || !ucl_parse_value (parser, chunk)) { 2261 parser->prev_state = parser->state; 2262 parser->state = UCL_STATE_ERROR; 2263 return false; 2264 } 2265 /* State is set in ucl_parse_value call */ 2266 p = chunk->pos; 2267 break; 2268 case UCL_STATE_AFTER_VALUE: 2269 if (!ucl_parse_after_value (parser, chunk)) { 2270 parser->prev_state = parser->state; 2271 parser->state = UCL_STATE_ERROR; 2272 return false; 2273 } 2274 2275 if (parser->stack != NULL) { 2276 if (parser->stack->obj->type == UCL_OBJECT) { 2277 parser->state = UCL_STATE_KEY; 2278 } 2279 else { 2280 /* Array */ 2281 parser->state = UCL_STATE_VALUE; 2282 } 2283 } 2284 else { 2285 /* Skip everything at the end */ 2286 return true; 2287 } 2288 2289 p = chunk->pos; 2290 break; 2291 case UCL_STATE_MACRO_NAME: 2292 if (parser->flags & UCL_PARSER_DISABLE_MACRO) { 2293 if (!ucl_skip_macro_as_comment (parser, chunk)) { 2294 /* We have invalid macro */ 2295 ucl_create_err (&parser->err, 2296 "error on line %d at column %d: invalid macro", 2297 chunk->line, 2298 chunk->column); 2299 parser->state = UCL_STATE_ERROR; 2300 return false; 2301 } 2302 else { 2303 p = chunk->pos; 2304 parser->state = parser->prev_state; 2305 } 2306 } 2307 else { 2308 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && 2309 *p != '(') { 2310 ucl_chunk_skipc (chunk, p); 2311 } 2312 else { 2313 if (c != NULL && p - c > 0) { 2314 /* We got macro name */ 2315 macro_len = (size_t) (p - c); 2316 HASH_FIND (hh, parser->macroes, c, macro_len, macro); 2317 if (macro == NULL) { 2318 ucl_create_err (&parser->err, 2319 "error on line %d at column %d: " 2320 "unknown macro: '%.*s', character: '%c'", 2321 chunk->line, 2322 chunk->column, 2323 (int) (p - c), 2324 c, 2325 *chunk->pos); 2326 parser->state = UCL_STATE_ERROR; 2327 return false; 2328 } 2329 /* Now we need to skip all spaces */ 2330 SKIP_SPACES_COMMENTS(parser, chunk, p); 2331 parser->state = UCL_STATE_MACRO; 2332 } 2333 else { 2334 /* We have invalid macro name */ 2335 ucl_create_err (&parser->err, 2336 "error on line %d at column %d: invalid macro name", 2337 chunk->line, 2338 chunk->column); 2339 parser->state = UCL_STATE_ERROR; 2340 return false; 2341 } 2342 } 2343 } 2344 break; 2345 case UCL_STATE_MACRO: 2346 if (*chunk->pos == '(') { 2347 macro_args = ucl_parse_macro_arguments (parser, chunk); 2348 p = chunk->pos; 2349 if (macro_args) { 2350 SKIP_SPACES_COMMENTS(parser, chunk, p); 2351 } 2352 } 2353 else { 2354 macro_args = NULL; 2355 } 2356 if (!ucl_parse_macro_value (parser, chunk, macro, 2357 ¯o_start, ¯o_len)) { 2358 parser->prev_state = parser->state; 2359 parser->state = UCL_STATE_ERROR; 2360 return false; 2361 } 2362 macro_len = ucl_expand_variable (parser, ¯o_escaped, 2363 macro_start, macro_len); 2364 parser->state = parser->prev_state; 2365 2366 if (macro_escaped == NULL && macro != NULL) { 2367 if (macro->is_context) { 2368 ret = macro->h.context_handler (macro_start, macro_len, 2369 macro_args, 2370 parser->top_obj, 2371 macro->ud); 2372 } 2373 else { 2374 ret = macro->h.handler (macro_start, macro_len, macro_args, 2375 macro->ud); 2376 } 2377 } 2378 else if (macro != NULL) { 2379 if (macro->is_context) { 2380 ret = macro->h.context_handler (macro_escaped, macro_len, 2381 macro_args, 2382 parser->top_obj, 2383 macro->ud); 2384 } 2385 else { 2386 ret = macro->h.handler (macro_escaped, macro_len, macro_args, 2387 macro->ud); 2388 } 2389 2390 UCL_FREE (macro_len + 1, macro_escaped); 2391 } 2392 else { 2393 ret = false; 2394 ucl_set_err (parser, UCL_EINTERNAL, 2395 "internal error: parser has macro undefined", &parser->err); 2396 } 2397 2398 /* 2399 * Chunk can be modified within macro handler 2400 */ 2401 chunk = parser->chunks; 2402 p = chunk->pos; 2403 2404 if (macro_args) { 2405 ucl_object_unref (macro_args); 2406 } 2407 2408 if (!ret) { 2409 return false; 2410 } 2411 break; 2412 default: 2413 ucl_set_err (parser, UCL_EINTERNAL, 2414 "internal error: parser is in an unknown state", &parser->err); 2415 parser->state = UCL_STATE_ERROR; 2416 return false; 2417 } 2418 } 2419 2420 if (parser->last_comment) { 2421 if (parser->cur_obj) { 2422 ucl_attach_comment (parser, parser->cur_obj, true); 2423 } 2424 else if (parser->stack && parser->stack->obj) { 2425 ucl_attach_comment (parser, parser->stack->obj, true); 2426 } 2427 else if (parser->top_obj) { 2428 ucl_attach_comment (parser, parser->top_obj, true); 2429 } 2430 else { 2431 ucl_object_unref (parser->last_comment); 2432 } 2433 } 2434 2435 return true; 2436 } 2437 2438 struct ucl_parser* 2439 ucl_parser_new (int flags) 2440 { 2441 struct ucl_parser *parser; 2442 2443 parser = UCL_ALLOC (sizeof (struct ucl_parser)); 2444 if (parser == NULL) { 2445 return NULL; 2446 } 2447 2448 memset (parser, 0, sizeof (struct ucl_parser)); 2449 2450 ucl_parser_register_macro (parser, "include", ucl_include_handler, parser); 2451 ucl_parser_register_macro (parser, "try_include", ucl_try_include_handler, parser); 2452 ucl_parser_register_macro (parser, "includes", ucl_includes_handler, parser); 2453 ucl_parser_register_macro (parser, "priority", ucl_priority_handler, parser); 2454 ucl_parser_register_macro (parser, "load", ucl_load_handler, parser); 2455 ucl_parser_register_context_macro (parser, "inherit", ucl_inherit_handler, parser); 2456 2457 parser->flags = flags; 2458 parser->includepaths = NULL; 2459 2460 if (flags & UCL_PARSER_SAVE_COMMENTS) { 2461 parser->comments = ucl_object_typed_new (UCL_OBJECT); 2462 } 2463 2464 /* Initial assumption about filevars */ 2465 ucl_parser_set_filevars (parser, NULL, false); 2466 2467 return parser; 2468 } 2469 2470 bool 2471 ucl_parser_set_default_priority (struct ucl_parser *parser, unsigned prio) 2472 { 2473 if (parser == NULL) { 2474 return false; 2475 } 2476 2477 parser->default_priority = prio; 2478 2479 return true; 2480 } 2481 2482 void 2483 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro, 2484 ucl_macro_handler handler, void* ud) 2485 { 2486 struct ucl_macro *new; 2487 2488 if (macro == NULL || handler == NULL) { 2489 return; 2490 } 2491 2492 new = UCL_ALLOC (sizeof (struct ucl_macro)); 2493 if (new == NULL) { 2494 return; 2495 } 2496 2497 memset (new, 0, sizeof (struct ucl_macro)); 2498 new->h.handler = handler; 2499 new->name = strdup (macro); 2500 new->ud = ud; 2501 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new); 2502 } 2503 2504 void 2505 ucl_parser_register_context_macro (struct ucl_parser *parser, const char *macro, 2506 ucl_context_macro_handler handler, void* ud) 2507 { 2508 struct ucl_macro *new; 2509 2510 if (macro == NULL || handler == NULL) { 2511 return; 2512 } 2513 2514 new = UCL_ALLOC (sizeof (struct ucl_macro)); 2515 if (new == NULL) { 2516 return; 2517 } 2518 2519 memset (new, 0, sizeof (struct ucl_macro)); 2520 new->h.context_handler = handler; 2521 new->name = strdup (macro); 2522 new->ud = ud; 2523 new->is_context = true; 2524 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new); 2525 } 2526 2527 void 2528 ucl_parser_register_variable (struct ucl_parser *parser, const char *var, 2529 const char *value) 2530 { 2531 struct ucl_variable *new = NULL, *cur; 2532 2533 if (var == NULL) { 2534 return; 2535 } 2536 2537 /* Find whether a variable already exists */ 2538 LL_FOREACH (parser->variables, cur) { 2539 if (strcmp (cur->var, var) == 0) { 2540 new = cur; 2541 break; 2542 } 2543 } 2544 2545 if (value == NULL) { 2546 2547 if (new != NULL) { 2548 /* Remove variable */ 2549 DL_DELETE (parser->variables, new); 2550 free (new->var); 2551 free (new->value); 2552 UCL_FREE (sizeof (struct ucl_variable), new); 2553 } 2554 else { 2555 /* Do nothing */ 2556 return; 2557 } 2558 } 2559 else { 2560 if (new == NULL) { 2561 new = UCL_ALLOC (sizeof (struct ucl_variable)); 2562 if (new == NULL) { 2563 return; 2564 } 2565 memset (new, 0, sizeof (struct ucl_variable)); 2566 new->var = strdup (var); 2567 new->var_len = strlen (var); 2568 new->value = strdup (value); 2569 new->value_len = strlen (value); 2570 2571 DL_APPEND (parser->variables, new); 2572 } 2573 else { 2574 free (new->value); 2575 new->value = strdup (value); 2576 new->value_len = strlen (value); 2577 } 2578 } 2579 } 2580 2581 void 2582 ucl_parser_set_variables_handler (struct ucl_parser *parser, 2583 ucl_variable_handler handler, void *ud) 2584 { 2585 parser->var_handler = handler; 2586 parser->var_data = ud; 2587 } 2588 2589 bool 2590 ucl_parser_add_chunk_full (struct ucl_parser *parser, const unsigned char *data, 2591 size_t len, unsigned priority, enum ucl_duplicate_strategy strat, 2592 enum ucl_parse_type parse_type) 2593 { 2594 struct ucl_chunk *chunk; 2595 2596 if (parser == NULL) { 2597 return false; 2598 } 2599 2600 if (len == 0) { 2601 parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority); 2602 return true; 2603 } 2604 2605 if (data == NULL) { 2606 ucl_create_err (&parser->err, "invalid chunk added"); 2607 return false; 2608 } 2609 2610 if (parser->state != UCL_STATE_ERROR) { 2611 chunk = UCL_ALLOC (sizeof (struct ucl_chunk)); 2612 if (chunk == NULL) { 2613 ucl_create_err (&parser->err, "cannot allocate chunk structure"); 2614 return false; 2615 } 2616 chunk->begin = data; 2617 chunk->remain = len; 2618 chunk->pos = chunk->begin; 2619 chunk->end = chunk->begin + len; 2620 chunk->line = 1; 2621 chunk->column = 0; 2622 chunk->priority = priority; 2623 chunk->strategy = strat; 2624 chunk->parse_type = parse_type; 2625 LL_PREPEND (parser->chunks, chunk); 2626 parser->recursion ++; 2627 2628 if (parser->recursion > UCL_MAX_RECURSION) { 2629 ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d", 2630 parser->recursion); 2631 return false; 2632 } 2633 2634 switch (parse_type) { 2635 default: 2636 case UCL_PARSE_UCL: 2637 return ucl_state_machine (parser); 2638 case UCL_PARSE_MSGPACK: 2639 return ucl_parse_msgpack (parser); 2640 } 2641 } 2642 2643 ucl_create_err (&parser->err, "a parser is in an invalid state"); 2644 2645 return false; 2646 } 2647 2648 bool 2649 ucl_parser_add_chunk_priority (struct ucl_parser *parser, 2650 const unsigned char *data, size_t len, unsigned priority) 2651 { 2652 /* We dereference parser, so this check is essential */ 2653 if (parser == NULL) { 2654 return false; 2655 } 2656 2657 return ucl_parser_add_chunk_full (parser, data, len, 2658 priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL); 2659 } 2660 2661 bool 2662 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data, 2663 size_t len) 2664 { 2665 if (parser == NULL) { 2666 return false; 2667 } 2668 2669 return ucl_parser_add_chunk_full (parser, data, len, 2670 parser->default_priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL); 2671 } 2672 2673 bool 2674 ucl_parser_add_string_priority (struct ucl_parser *parser, const char *data, 2675 size_t len, unsigned priority) 2676 { 2677 if (data == NULL) { 2678 ucl_create_err (&parser->err, "invalid string added"); 2679 return false; 2680 } 2681 if (len == 0) { 2682 len = strlen (data); 2683 } 2684 2685 return ucl_parser_add_chunk_priority (parser, 2686 (const unsigned char *)data, len, priority); 2687 } 2688 2689 bool 2690 ucl_parser_add_string (struct ucl_parser *parser, const char *data, 2691 size_t len) 2692 { 2693 if (parser == NULL) { 2694 return false; 2695 } 2696 2697 return ucl_parser_add_string_priority (parser, 2698 (const unsigned char *)data, len, parser->default_priority); 2699 } 2700 2701 bool 2702 ucl_set_include_path (struct ucl_parser *parser, ucl_object_t *paths) 2703 { 2704 if (parser == NULL || paths == NULL) { 2705 return false; 2706 } 2707 2708 if (parser->includepaths == NULL) { 2709 parser->includepaths = ucl_object_copy (paths); 2710 } 2711 else { 2712 ucl_object_unref (parser->includepaths); 2713 parser->includepaths = ucl_object_copy (paths); 2714 } 2715 2716 if (parser->includepaths == NULL) { 2717 return false; 2718 } 2719 2720 return true; 2721 } 2722