1 /* Copyright (c) 2013, Vsevolod Stakhov 2 * All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * * Redistributions of source code must retain the above copyright 7 * notice, this list of conditions and the following disclaimer. 8 * * Redistributions in binary form must reproduce the above copyright 9 * notice, this list of conditions and the following disclaimer in the 10 * documentation and/or other materials provided with the distribution. 11 * 12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY 13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY 16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 */ 23 24 #include "ucl.h" 25 #include "ucl_internal.h" 26 #include "ucl_chartable.h" 27 28 /** 29 * @file ucl_parser.c 30 * The implementation of ucl parser 31 */ 32 33 struct ucl_parser_saved_state { 34 unsigned int line; 35 unsigned int column; 36 size_t remain; 37 const unsigned char *pos; 38 }; 39 40 /** 41 * Move up to len characters 42 * @param parser 43 * @param begin 44 * @param len 45 * @return new position in chunk 46 */ 47 #define ucl_chunk_skipc(chunk, p) do{ \ 48 if (*(p) == '\n') { \ 49 (chunk)->line ++; \ 50 (chunk)->column = 0; \ 51 } \ 52 else (chunk)->column ++; \ 53 (p++); \ 54 (chunk)->pos ++; \ 55 (chunk)->remain --; \ 56 } while (0) 57 58 static inline void 59 ucl_set_err (struct ucl_parser *parser, int code, const char *str, UT_string **err) 60 { 61 const char *fmt_string, *filename; 62 struct ucl_chunk *chunk = parser->chunks; 63 64 if (parser->cur_file) { 65 filename = parser->cur_file; 66 } 67 else { 68 filename = "<unknown>"; 69 } 70 71 if (chunk->pos < chunk->end) { 72 if (isgraph (*chunk->pos)) { 73 fmt_string = "error while parsing %s: " 74 "line: %d, column: %d - '%s', character: '%c'"; 75 } 76 else { 77 fmt_string = "error while parsing %s: " 78 "line: %d, column: %d - '%s', character: '0x%02x'"; 79 } 80 ucl_create_err (err, fmt_string, 81 filename, chunk->line, chunk->column, 82 str, *chunk->pos); 83 } 84 else { 85 ucl_create_err (err, "error while parsing %s: at the end of chunk: %s", 86 filename, str); 87 } 88 89 parser->err_code = code; 90 } 91 92 static void 93 ucl_save_comment (struct ucl_parser *parser, const char *begin, size_t len) 94 { 95 ucl_object_t *nobj; 96 97 if (len > 0 && begin != NULL) { 98 nobj = ucl_object_fromstring_common (begin, len, 0); 99 100 if (parser->last_comment) { 101 /* We need to append data to an existing object */ 102 DL_APPEND (parser->last_comment, nobj); 103 } 104 else { 105 parser->last_comment = nobj; 106 } 107 } 108 } 109 110 static void 111 ucl_attach_comment (struct ucl_parser *parser, ucl_object_t *obj, bool before) 112 { 113 if (parser->last_comment) { 114 ucl_object_insert_key (parser->comments, parser->last_comment, 115 (const char *)&obj, sizeof (void *), true); 116 117 if (before) { 118 parser->last_comment->flags |= UCL_OBJECT_INHERITED; 119 } 120 121 parser->last_comment = NULL; 122 } 123 } 124 125 /** 126 * Skip all comments from the current pos resolving nested and multiline comments 127 * @param parser 128 * @return 129 */ 130 static bool 131 ucl_skip_comments (struct ucl_parser *parser) 132 { 133 struct ucl_chunk *chunk = parser->chunks; 134 const unsigned char *p, *beg = NULL; 135 int comments_nested = 0; 136 bool quoted = false; 137 138 p = chunk->pos; 139 140 start: 141 if (chunk->remain > 0 && *p == '#') { 142 if (parser->state != UCL_STATE_SCOMMENT && 143 parser->state != UCL_STATE_MCOMMENT) { 144 beg = p; 145 146 while (p < chunk->end) { 147 if (*p == '\n') { 148 if (parser->flags & UCL_PARSER_SAVE_COMMENTS) { 149 ucl_save_comment (parser, beg, p - beg); 150 beg = NULL; 151 } 152 153 ucl_chunk_skipc (chunk, p); 154 155 goto start; 156 } 157 ucl_chunk_skipc (chunk, p); 158 } 159 } 160 } 161 else if (chunk->remain >= 2 && *p == '/') { 162 if (p[1] == '*') { 163 beg = p; 164 ucl_chunk_skipc (chunk, p); 165 comments_nested ++; 166 ucl_chunk_skipc (chunk, p); 167 168 while (p < chunk->end) { 169 if (*p == '"' && *(p - 1) != '\\') { 170 quoted = !quoted; 171 } 172 173 if (!quoted) { 174 if (*p == '*') { 175 ucl_chunk_skipc (chunk, p); 176 if (*p == '/') { 177 comments_nested --; 178 if (comments_nested == 0) { 179 if (parser->flags & UCL_PARSER_SAVE_COMMENTS) { 180 ucl_save_comment (parser, beg, p - beg + 1); 181 beg = NULL; 182 } 183 184 ucl_chunk_skipc (chunk, p); 185 goto start; 186 } 187 } 188 ucl_chunk_skipc (chunk, p); 189 } 190 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') { 191 comments_nested ++; 192 ucl_chunk_skipc (chunk, p); 193 ucl_chunk_skipc (chunk, p); 194 continue; 195 } 196 } 197 198 ucl_chunk_skipc (chunk, p); 199 } 200 if (comments_nested != 0) { 201 ucl_set_err (parser, UCL_ENESTED, 202 "unfinished multiline comment", &parser->err); 203 return false; 204 } 205 } 206 } 207 208 if (beg && p > beg && (parser->flags & UCL_PARSER_SAVE_COMMENTS)) { 209 ucl_save_comment (parser, beg, p - beg); 210 } 211 212 return true; 213 } 214 215 /** 216 * Return multiplier for a character 217 * @param c multiplier character 218 * @param is_bytes if true use 1024 multiplier 219 * @return multiplier 220 */ 221 static inline unsigned long 222 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) { 223 const struct { 224 char c; 225 long mult_normal; 226 long mult_bytes; 227 } multipliers[] = { 228 {'m', 1000 * 1000, 1024 * 1024}, 229 {'k', 1000, 1024}, 230 {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024} 231 }; 232 int i; 233 234 for (i = 0; i < 3; i ++) { 235 if (tolower (c) == multipliers[i].c) { 236 if (is_bytes) { 237 return multipliers[i].mult_bytes; 238 } 239 return multipliers[i].mult_normal; 240 } 241 } 242 243 return 1; 244 } 245 246 247 /** 248 * Return multiplier for time scaling 249 * @param c 250 * @return 251 */ 252 static inline double 253 ucl_lex_time_multiplier (const unsigned char c) { 254 const struct { 255 char c; 256 double mult; 257 } multipliers[] = { 258 {'m', 60}, 259 {'h', 60 * 60}, 260 {'d', 60 * 60 * 24}, 261 {'w', 60 * 60 * 24 * 7}, 262 {'y', 60 * 60 * 24 * 365} 263 }; 264 int i; 265 266 for (i = 0; i < 5; i ++) { 267 if (tolower (c) == multipliers[i].c) { 268 return multipliers[i].mult; 269 } 270 } 271 272 return 1; 273 } 274 275 /** 276 * Return true if a character is a end of an atom 277 * @param c 278 * @return 279 */ 280 static inline bool 281 ucl_lex_is_atom_end (const unsigned char c) 282 { 283 return ucl_test_character (c, UCL_CHARACTER_VALUE_END); 284 } 285 286 static inline bool 287 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2) 288 { 289 if (c1 == '/') { 290 if (c2 == '*') { 291 return true; 292 } 293 } 294 else if (c1 == '#') { 295 return true; 296 } 297 return false; 298 } 299 300 /** 301 * Check variable found 302 * @param parser 303 * @param ptr 304 * @param remain 305 * @param out_len 306 * @param strict 307 * @param found 308 * @return 309 */ 310 static inline const char * 311 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain, 312 size_t *out_len, bool strict, bool *found) 313 { 314 struct ucl_variable *var; 315 unsigned char *dst; 316 size_t dstlen; 317 bool need_free = false; 318 319 LL_FOREACH (parser->variables, var) { 320 if (strict) { 321 if (remain == var->var_len) { 322 if (memcmp (ptr, var->var, var->var_len) == 0) { 323 *out_len += var->value_len; 324 *found = true; 325 return (ptr + var->var_len); 326 } 327 } 328 } 329 else { 330 if (remain >= var->var_len) { 331 if (memcmp (ptr, var->var, var->var_len) == 0) { 332 *out_len += var->value_len; 333 *found = true; 334 return (ptr + var->var_len); 335 } 336 } 337 } 338 } 339 340 /* XXX: can only handle ${VAR} */ 341 if (!(*found) && parser->var_handler != NULL && strict) { 342 /* Call generic handler */ 343 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free, 344 parser->var_data)) { 345 *out_len += dstlen; 346 *found = true; 347 if (need_free) { 348 free (dst); 349 } 350 return (ptr + remain); 351 } 352 } 353 354 return ptr; 355 } 356 357 /** 358 * Check for a variable in a given string 359 * @param parser 360 * @param ptr 361 * @param remain 362 * @param out_len 363 * @param vars_found 364 * @return 365 */ 366 static const char * 367 ucl_check_variable (struct ucl_parser *parser, const char *ptr, 368 size_t remain, size_t *out_len, bool *vars_found) 369 { 370 const char *p, *end, *ret = ptr; 371 bool found = false; 372 373 if (*ptr == '{') { 374 /* We need to match the variable enclosed in braces */ 375 p = ptr + 1; 376 end = ptr + remain; 377 while (p < end) { 378 if (*p == '}') { 379 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, 380 out_len, true, &found); 381 if (found) { 382 /* {} must be excluded actually */ 383 ret ++; 384 if (!*vars_found) { 385 *vars_found = true; 386 } 387 } 388 else { 389 *out_len += 2; 390 } 391 break; 392 } 393 p ++; 394 } 395 } 396 else if (*ptr != '$') { 397 /* Not count escaped dollar sign */ 398 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found); 399 if (found && !*vars_found) { 400 *vars_found = true; 401 } 402 if (!found) { 403 (*out_len) ++; 404 } 405 } 406 else { 407 ret ++; 408 (*out_len) ++; 409 } 410 411 return ret; 412 } 413 414 /** 415 * Expand a single variable 416 * @param parser 417 * @param ptr 418 * @param remain 419 * @param dest 420 * @return 421 */ 422 static const char * 423 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr, 424 size_t remain, unsigned char **dest) 425 { 426 unsigned char *d = *dest, *dst; 427 const char *p = ptr + 1, *ret; 428 struct ucl_variable *var; 429 size_t dstlen; 430 bool need_free = false; 431 bool found = false; 432 bool strict = false; 433 434 ret = ptr + 1; 435 remain --; 436 437 if (*p == '$') { 438 *d++ = *p++; 439 *dest = d; 440 return p; 441 } 442 else if (*p == '{') { 443 p ++; 444 strict = true; 445 ret += 2; 446 remain -= 2; 447 } 448 449 LL_FOREACH (parser->variables, var) { 450 if (remain >= var->var_len) { 451 if (memcmp (p, var->var, var->var_len) == 0) { 452 memcpy (d, var->value, var->value_len); 453 ret += var->var_len; 454 d += var->value_len; 455 found = true; 456 break; 457 } 458 } 459 } 460 if (!found) { 461 if (strict && parser->var_handler != NULL) { 462 size_t var_len = 0; 463 while (var_len < remain && p[var_len] != '}') 464 var_len ++; 465 466 if (parser->var_handler (p, var_len, &dst, &dstlen, &need_free, 467 parser->var_data)) { 468 memcpy (d, dst, dstlen); 469 ret += var_len; 470 d += dstlen; 471 if (need_free) { 472 free (dst); 473 } 474 found = true; 475 } 476 } 477 478 /* Leave variable as is */ 479 if (!found) { 480 if (strict) { 481 /* Copy '${' */ 482 memcpy (d, ptr, 2); 483 d += 2; 484 ret --; 485 } 486 else { 487 memcpy (d, ptr, 1); 488 d ++; 489 } 490 } 491 } 492 493 *dest = d; 494 return ret; 495 } 496 497 /** 498 * Expand variables in string 499 * @param parser 500 * @param dst 501 * @param src 502 * @param in_len 503 * @return 504 */ 505 static ssize_t 506 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst, 507 const char *src, size_t in_len) 508 { 509 const char *p, *end = src + in_len; 510 unsigned char *d; 511 size_t out_len = 0; 512 bool vars_found = false; 513 514 if (parser->flags & UCL_PARSER_DISABLE_MACRO) { 515 *dst = NULL; 516 return in_len; 517 } 518 519 p = src; 520 while (p != end) { 521 if (*p == '$') { 522 p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found); 523 } 524 else { 525 p ++; 526 out_len ++; 527 } 528 } 529 530 if (!vars_found) { 531 /* Trivial case */ 532 *dst = NULL; 533 return in_len; 534 } 535 536 *dst = UCL_ALLOC (out_len + 1); 537 if (*dst == NULL) { 538 return in_len; 539 } 540 541 d = *dst; 542 p = src; 543 while (p != end) { 544 if (*p == '$') { 545 p = ucl_expand_single_variable (parser, p, end - p, &d); 546 } 547 else { 548 *d++ = *p++; 549 } 550 } 551 552 *d = '\0'; 553 554 return out_len; 555 } 556 557 /** 558 * Store or copy pointer to the trash stack 559 * @param parser parser object 560 * @param src src string 561 * @param dst destination buffer (trash stack pointer) 562 * @param dst_const const destination pointer (e.g. value of object) 563 * @param in_len input length 564 * @param need_unescape need to unescape source (and copy it) 565 * @param need_lowercase need to lowercase value (and copy) 566 * @param need_expand need to expand variables (and copy as well) 567 * @return output length (excluding \0 symbol) 568 */ 569 static inline ssize_t 570 ucl_copy_or_store_ptr (struct ucl_parser *parser, 571 const unsigned char *src, unsigned char **dst, 572 const char **dst_const, size_t in_len, 573 bool need_unescape, bool need_lowercase, bool need_expand) 574 { 575 ssize_t ret = -1, tret; 576 unsigned char *tmp; 577 578 if (need_unescape || need_lowercase || 579 (need_expand && parser->variables != NULL) || 580 !(parser->flags & UCL_PARSER_ZEROCOPY)) { 581 /* Copy string */ 582 *dst = UCL_ALLOC (in_len + 1); 583 if (*dst == NULL) { 584 ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for a string", 585 &parser->err); 586 return false; 587 } 588 if (need_lowercase) { 589 ret = ucl_strlcpy_tolower (*dst, src, in_len + 1); 590 } 591 else { 592 ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1); 593 } 594 595 if (need_unescape) { 596 ret = ucl_unescape_json_string (*dst, ret); 597 } 598 if (need_expand) { 599 tmp = *dst; 600 tret = ret; 601 ret = ucl_expand_variable (parser, dst, tmp, ret); 602 if (*dst == NULL) { 603 /* Nothing to expand */ 604 *dst = tmp; 605 ret = tret; 606 } 607 else { 608 /* Free unexpanded value */ 609 UCL_FREE (in_len + 1, tmp); 610 } 611 } 612 *dst_const = *dst; 613 } 614 else { 615 *dst_const = src; 616 ret = in_len; 617 } 618 619 return ret; 620 } 621 622 /** 623 * Create and append an object at the specified level 624 * @param parser 625 * @param is_array 626 * @param level 627 * @return 628 */ 629 static inline ucl_object_t * 630 ucl_parser_add_container (ucl_object_t *obj, struct ucl_parser *parser, 631 bool is_array, int level) 632 { 633 struct ucl_stack *st; 634 635 if (!is_array) { 636 if (obj == NULL) { 637 obj = ucl_object_new_full (UCL_OBJECT, parser->chunks->priority); 638 } 639 else { 640 obj->type = UCL_OBJECT; 641 } 642 if (obj->value.ov == NULL) { 643 obj->value.ov = ucl_hash_create (parser->flags & UCL_PARSER_KEY_LOWERCASE); 644 } 645 parser->state = UCL_STATE_KEY; 646 } 647 else { 648 if (obj == NULL) { 649 obj = ucl_object_new_full (UCL_ARRAY, parser->chunks->priority); 650 } 651 else { 652 obj->type = UCL_ARRAY; 653 } 654 parser->state = UCL_STATE_VALUE; 655 } 656 657 st = UCL_ALLOC (sizeof (struct ucl_stack)); 658 659 if (st == NULL) { 660 ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for an object", 661 &parser->err); 662 ucl_object_unref (obj); 663 return NULL; 664 } 665 666 st->obj = obj; 667 st->level = level; 668 LL_PREPEND (parser->stack, st); 669 parser->cur_obj = obj; 670 671 return obj; 672 } 673 674 int 675 ucl_maybe_parse_number (ucl_object_t *obj, 676 const char *start, const char *end, const char **pos, 677 bool allow_double, bool number_bytes, bool allow_time) 678 { 679 const char *p = start, *c = start; 680 char *endptr; 681 bool got_dot = false, got_exp = false, need_double = false, 682 is_time = false, valid_start = false, is_hex = false, 683 is_neg = false; 684 double dv = 0; 685 int64_t lv = 0; 686 687 if (*p == '-') { 688 is_neg = true; 689 c ++; 690 p ++; 691 } 692 while (p < end) { 693 if (is_hex && isxdigit (*p)) { 694 p ++; 695 } 696 else if (isdigit (*p)) { 697 valid_start = true; 698 p ++; 699 } 700 else if (!is_hex && (*p == 'x' || *p == 'X')) { 701 is_hex = true; 702 allow_double = false; 703 c = p + 1; 704 } 705 else if (allow_double) { 706 if (p == c) { 707 /* Empty digits sequence, not a number */ 708 *pos = start; 709 return EINVAL; 710 } 711 else if (*p == '.') { 712 if (got_dot) { 713 /* Double dots, not a number */ 714 *pos = start; 715 return EINVAL; 716 } 717 else { 718 got_dot = true; 719 need_double = true; 720 p ++; 721 } 722 } 723 else if (*p == 'e' || *p == 'E') { 724 if (got_exp) { 725 /* Double exp, not a number */ 726 *pos = start; 727 return EINVAL; 728 } 729 else { 730 got_exp = true; 731 need_double = true; 732 p ++; 733 if (p >= end) { 734 *pos = start; 735 return EINVAL; 736 } 737 if (!isdigit (*p) && *p != '+' && *p != '-') { 738 /* Wrong exponent sign */ 739 *pos = start; 740 return EINVAL; 741 } 742 else { 743 p ++; 744 } 745 } 746 } 747 else { 748 /* Got the end of the number, need to check */ 749 break; 750 } 751 } 752 else { 753 break; 754 } 755 } 756 757 if (!valid_start) { 758 *pos = start; 759 return EINVAL; 760 } 761 762 errno = 0; 763 if (need_double) { 764 dv = strtod (c, &endptr); 765 } 766 else { 767 if (is_hex) { 768 lv = strtoimax (c, &endptr, 16); 769 } 770 else { 771 lv = strtoimax (c, &endptr, 10); 772 } 773 } 774 if (errno == ERANGE) { 775 *pos = start; 776 return ERANGE; 777 } 778 779 /* Now check endptr */ 780 if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0') { 781 p = endptr; 782 goto set_obj; 783 } 784 785 if (endptr < end && endptr != start) { 786 p = endptr; 787 switch (*p) { 788 case 'm': 789 case 'M': 790 case 'g': 791 case 'G': 792 case 'k': 793 case 'K': 794 if (end - p >= 2) { 795 if (p[1] == 's' || p[1] == 'S') { 796 /* Milliseconds */ 797 if (!need_double) { 798 need_double = true; 799 dv = lv; 800 } 801 is_time = true; 802 if (p[0] == 'm' || p[0] == 'M') { 803 dv /= 1000.; 804 } 805 else { 806 dv *= ucl_lex_num_multiplier (*p, false); 807 } 808 p += 2; 809 goto set_obj; 810 } 811 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) { 812 /* Bytes */ 813 if (need_double) { 814 need_double = false; 815 lv = dv; 816 } 817 lv *= ucl_lex_num_multiplier (*p, true); 818 p += 2; 819 goto set_obj; 820 } 821 else if (ucl_lex_is_atom_end (p[1])) { 822 if (need_double) { 823 dv *= ucl_lex_num_multiplier (*p, false); 824 } 825 else { 826 lv *= ucl_lex_num_multiplier (*p, number_bytes); 827 } 828 p ++; 829 goto set_obj; 830 } 831 else if (allow_time && end - p >= 3) { 832 if (tolower (p[0]) == 'm' && 833 tolower (p[1]) == 'i' && 834 tolower (p[2]) == 'n') { 835 /* Minutes */ 836 if (!need_double) { 837 need_double = true; 838 dv = lv; 839 } 840 is_time = true; 841 dv *= 60.; 842 p += 3; 843 goto set_obj; 844 } 845 } 846 } 847 else { 848 if (need_double) { 849 dv *= ucl_lex_num_multiplier (*p, false); 850 } 851 else { 852 lv *= ucl_lex_num_multiplier (*p, number_bytes); 853 } 854 p ++; 855 goto set_obj; 856 } 857 break; 858 case 'S': 859 case 's': 860 if (allow_time && 861 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { 862 if (!need_double) { 863 need_double = true; 864 dv = lv; 865 } 866 p ++; 867 is_time = true; 868 goto set_obj; 869 } 870 break; 871 case 'h': 872 case 'H': 873 case 'd': 874 case 'D': 875 case 'w': 876 case 'W': 877 case 'Y': 878 case 'y': 879 if (allow_time && 880 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { 881 if (!need_double) { 882 need_double = true; 883 dv = lv; 884 } 885 is_time = true; 886 dv *= ucl_lex_time_multiplier (*p); 887 p ++; 888 goto set_obj; 889 } 890 break; 891 case '\t': 892 case ' ': 893 while (p < end && ucl_test_character(*p, UCL_CHARACTER_WHITESPACE)) { 894 p++; 895 } 896 if (ucl_lex_is_atom_end(*p)) 897 goto set_obj; 898 break; 899 } 900 } 901 else if (endptr == end) { 902 /* Just a number at the end of chunk */ 903 p = endptr; 904 goto set_obj; 905 } 906 907 *pos = c; 908 return EINVAL; 909 910 set_obj: 911 if (obj != NULL) { 912 if (allow_double && (need_double || is_time)) { 913 if (!is_time) { 914 obj->type = UCL_FLOAT; 915 } 916 else { 917 obj->type = UCL_TIME; 918 } 919 obj->value.dv = is_neg ? (-dv) : dv; 920 } 921 else { 922 obj->type = UCL_INT; 923 obj->value.iv = is_neg ? (-lv) : lv; 924 } 925 } 926 *pos = p; 927 return 0; 928 } 929 930 /** 931 * Parse possible number 932 * @param parser 933 * @param chunk 934 * @param obj 935 * @return true if a number has been parsed 936 */ 937 static bool 938 ucl_lex_number (struct ucl_parser *parser, 939 struct ucl_chunk *chunk, ucl_object_t *obj) 940 { 941 const unsigned char *pos; 942 int ret; 943 944 ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos, 945 true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0)); 946 947 if (ret == 0) { 948 chunk->remain -= pos - chunk->pos; 949 chunk->column += pos - chunk->pos; 950 chunk->pos = pos; 951 return true; 952 } 953 else if (ret == ERANGE) { 954 ucl_set_err (parser, UCL_ESYNTAX, "numeric value out of range", 955 &parser->err); 956 } 957 958 return false; 959 } 960 961 /** 962 * Parse quoted string with possible escapes 963 * @param parser 964 * @param chunk 965 * @param need_unescape 966 * @param ucl_escape 967 * @param var_expand 968 * @return true if a string has been parsed 969 */ 970 static bool 971 ucl_lex_json_string (struct ucl_parser *parser, 972 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand) 973 { 974 const unsigned char *p = chunk->pos; 975 unsigned char c; 976 int i; 977 978 while (p < chunk->end) { 979 c = *p; 980 if (c < 0x1F) { 981 /* Unmasked control character */ 982 if (c == '\n') { 983 ucl_set_err (parser, UCL_ESYNTAX, "unexpected newline", 984 &parser->err); 985 } 986 else { 987 ucl_set_err (parser, UCL_ESYNTAX, "unexpected control character", 988 &parser->err); 989 } 990 return false; 991 } 992 else if (c == '\\') { 993 ucl_chunk_skipc (chunk, p); 994 c = *p; 995 if (p >= chunk->end) { 996 ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character", 997 &parser->err); 998 return false; 999 } 1000 else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) { 1001 if (c == 'u') { 1002 ucl_chunk_skipc (chunk, p); 1003 for (i = 0; i < 4 && p < chunk->end; i ++) { 1004 if (!isxdigit (*p)) { 1005 ucl_set_err (parser, UCL_ESYNTAX, "invalid utf escape", 1006 &parser->err); 1007 return false; 1008 } 1009 ucl_chunk_skipc (chunk, p); 1010 } 1011 if (p >= chunk->end) { 1012 ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character", 1013 &parser->err); 1014 return false; 1015 } 1016 } 1017 else { 1018 ucl_chunk_skipc (chunk, p); 1019 } 1020 } 1021 *need_unescape = true; 1022 *ucl_escape = true; 1023 continue; 1024 } 1025 else if (c == '"') { 1026 ucl_chunk_skipc (chunk, p); 1027 return true; 1028 } 1029 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) { 1030 *ucl_escape = true; 1031 } 1032 else if (c == '$') { 1033 *var_expand = true; 1034 } 1035 ucl_chunk_skipc (chunk, p); 1036 } 1037 1038 ucl_set_err (parser, UCL_ESYNTAX, "no quote at the end of json string", 1039 &parser->err); 1040 return false; 1041 } 1042 1043 static void 1044 ucl_parser_append_elt (struct ucl_parser *parser, ucl_hash_t *cont, 1045 ucl_object_t *top, 1046 ucl_object_t *elt) 1047 { 1048 ucl_object_t *nobj; 1049 1050 if ((parser->flags & UCL_PARSER_NO_IMPLICIT_ARRAYS) == 0) { 1051 /* Implicit array */ 1052 top->flags |= UCL_OBJECT_MULTIVALUE; 1053 DL_APPEND (top, elt); 1054 parser->stack->obj->len ++; 1055 } 1056 else { 1057 if ((top->flags & UCL_OBJECT_MULTIVALUE) != 0) { 1058 /* Just add to the explicit array */ 1059 ucl_array_append (top, elt); 1060 } 1061 else { 1062 /* Convert to an array */ 1063 nobj = ucl_object_typed_new (UCL_ARRAY); 1064 nobj->key = top->key; 1065 nobj->keylen = top->keylen; 1066 nobj->flags |= UCL_OBJECT_MULTIVALUE; 1067 ucl_array_append (nobj, top); 1068 ucl_array_append (nobj, elt); 1069 ucl_hash_replace (cont, top, nobj); 1070 } 1071 } 1072 } 1073 1074 bool 1075 ucl_parser_process_object_element (struct ucl_parser *parser, ucl_object_t *nobj) 1076 { 1077 ucl_hash_t *container; 1078 ucl_object_t *tobj; 1079 char errmsg[256]; 1080 1081 container = parser->stack->obj->value.ov; 1082 1083 tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj)); 1084 if (tobj == NULL) { 1085 container = ucl_hash_insert_object (container, nobj, 1086 parser->flags & UCL_PARSER_KEY_LOWERCASE); 1087 nobj->prev = nobj; 1088 nobj->next = NULL; 1089 parser->stack->obj->len ++; 1090 } 1091 else { 1092 unsigned priold = ucl_object_get_priority (tobj), 1093 prinew = ucl_object_get_priority (nobj); 1094 switch (parser->chunks->strategy) { 1095 1096 case UCL_DUPLICATE_APPEND: 1097 /* 1098 * The logic here is the following: 1099 * 1100 * - if we have two objects with the same priority, then we form an 1101 * implicit or explicit array 1102 * - if a new object has bigger priority, then we overwrite an old one 1103 * - if a new object has lower priority, then we ignore it 1104 */ 1105 1106 1107 /* Special case for inherited objects */ 1108 if (tobj->flags & UCL_OBJECT_INHERITED) { 1109 prinew = priold + 1; 1110 } 1111 1112 if (priold == prinew) { 1113 ucl_parser_append_elt (parser, container, tobj, nobj); 1114 } 1115 else if (priold > prinew) { 1116 /* 1117 * We add this new object to a list of trash objects just to ensure 1118 * that it won't come to any real object 1119 * XXX: rather inefficient approach 1120 */ 1121 DL_APPEND (parser->trash_objs, nobj); 1122 } 1123 else { 1124 ucl_hash_replace (container, tobj, nobj); 1125 ucl_object_unref (tobj); 1126 } 1127 1128 break; 1129 1130 case UCL_DUPLICATE_REWRITE: 1131 /* We just rewrite old values regardless of priority */ 1132 ucl_hash_replace (container, tobj, nobj); 1133 ucl_object_unref (tobj); 1134 1135 break; 1136 1137 case UCL_DUPLICATE_ERROR: 1138 snprintf(errmsg, sizeof(errmsg), 1139 "duplicate element for key '%s' found", 1140 nobj->key); 1141 ucl_set_err (parser, UCL_EMERGE, errmsg, &parser->err); 1142 return false; 1143 1144 case UCL_DUPLICATE_MERGE: 1145 /* 1146 * Here we do have some old object so we just push it on top of objects stack 1147 * Check priority and then perform the merge on the remaining objects 1148 */ 1149 if (tobj->type == UCL_OBJECT || tobj->type == UCL_ARRAY) { 1150 ucl_object_unref (nobj); 1151 nobj = tobj; 1152 } 1153 else if (priold == prinew) { 1154 ucl_parser_append_elt (parser, container, tobj, nobj); 1155 } 1156 else if (priold > prinew) { 1157 /* 1158 * We add this new object to a list of trash objects just to ensure 1159 * that it won't come to any real object 1160 * XXX: rather inefficient approach 1161 */ 1162 DL_APPEND (parser->trash_objs, nobj); 1163 } 1164 else { 1165 ucl_hash_replace (container, tobj, nobj); 1166 ucl_object_unref (tobj); 1167 } 1168 break; 1169 } 1170 } 1171 1172 parser->stack->obj->value.ov = container; 1173 parser->cur_obj = nobj; 1174 ucl_attach_comment (parser, nobj, false); 1175 1176 return true; 1177 } 1178 1179 /** 1180 * Parse a key in an object 1181 * @param parser 1182 * @param chunk 1183 * @param next_key 1184 * @param end_of_object 1185 * @return true if a key has been parsed 1186 */ 1187 static bool 1188 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, 1189 bool *next_key, bool *end_of_object) 1190 { 1191 const unsigned char *p, *c = NULL, *end, *t; 1192 const char *key = NULL; 1193 bool got_quote = false, got_eq = false, got_semicolon = false, 1194 need_unescape = false, ucl_escape = false, var_expand = false, 1195 got_content = false, got_sep = false; 1196 ucl_object_t *nobj; 1197 ssize_t keylen; 1198 1199 p = chunk->pos; 1200 1201 if (*p == '.') { 1202 /* It is macro actually */ 1203 if (!(parser->flags & UCL_PARSER_DISABLE_MACRO)) { 1204 ucl_chunk_skipc (chunk, p); 1205 } 1206 1207 parser->prev_state = parser->state; 1208 parser->state = UCL_STATE_MACRO_NAME; 1209 *end_of_object = false; 1210 return true; 1211 } 1212 while (p < chunk->end) { 1213 /* 1214 * A key must start with alpha, number, '/' or '_' and end with space character 1215 */ 1216 if (c == NULL) { 1217 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1218 if (!ucl_skip_comments (parser)) { 1219 return false; 1220 } 1221 p = chunk->pos; 1222 } 1223 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1224 ucl_chunk_skipc (chunk, p); 1225 } 1226 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) { 1227 /* The first symbol */ 1228 c = p; 1229 ucl_chunk_skipc (chunk, p); 1230 got_content = true; 1231 } 1232 else if (*p == '"') { 1233 /* JSON style key */ 1234 c = p + 1; 1235 got_quote = true; 1236 got_content = true; 1237 ucl_chunk_skipc (chunk, p); 1238 } 1239 else if (*p == '}') { 1240 /* We have actually end of an object */ 1241 *end_of_object = true; 1242 return true; 1243 } 1244 else if (*p == '.') { 1245 ucl_chunk_skipc (chunk, p); 1246 parser->prev_state = parser->state; 1247 parser->state = UCL_STATE_MACRO_NAME; 1248 return true; 1249 } 1250 else { 1251 /* Invalid identifier */ 1252 ucl_set_err (parser, UCL_ESYNTAX, "key must begin with a letter", 1253 &parser->err); 1254 return false; 1255 } 1256 } 1257 else { 1258 /* Parse the body of a key */ 1259 if (!got_quote) { 1260 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) { 1261 got_content = true; 1262 ucl_chunk_skipc (chunk, p); 1263 } 1264 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) { 1265 end = p; 1266 break; 1267 } 1268 else { 1269 ucl_set_err (parser, UCL_ESYNTAX, "invalid character in a key", 1270 &parser->err); 1271 return false; 1272 } 1273 } 1274 else { 1275 /* We need to parse json like quoted string */ 1276 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1277 return false; 1278 } 1279 /* Always escape keys obtained via json */ 1280 end = chunk->pos - 1; 1281 p = chunk->pos; 1282 break; 1283 } 1284 } 1285 } 1286 1287 if (p >= chunk->end && got_content) { 1288 ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err); 1289 return false; 1290 } 1291 else if (!got_content) { 1292 return true; 1293 } 1294 *end_of_object = false; 1295 /* We are now at the end of the key, need to parse the rest */ 1296 while (p < chunk->end) { 1297 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1298 ucl_chunk_skipc (chunk, p); 1299 } 1300 else if (*p == '=') { 1301 if (!got_eq && !got_semicolon) { 1302 ucl_chunk_skipc (chunk, p); 1303 got_eq = true; 1304 } 1305 else { 1306 ucl_set_err (parser, UCL_ESYNTAX, "unexpected '=' character", 1307 &parser->err); 1308 return false; 1309 } 1310 } 1311 else if (*p == ':') { 1312 if (!got_eq && !got_semicolon) { 1313 ucl_chunk_skipc (chunk, p); 1314 got_semicolon = true; 1315 } 1316 else { 1317 ucl_set_err (parser, UCL_ESYNTAX, "unexpected ':' character", 1318 &parser->err); 1319 return false; 1320 } 1321 } 1322 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1323 /* Check for comment */ 1324 if (!ucl_skip_comments (parser)) { 1325 return false; 1326 } 1327 p = chunk->pos; 1328 } 1329 else { 1330 /* Start value */ 1331 break; 1332 } 1333 } 1334 1335 if (p >= chunk->end && got_content) { 1336 ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err); 1337 return false; 1338 } 1339 1340 got_sep = got_semicolon || got_eq; 1341 1342 if (!got_sep) { 1343 /* 1344 * Maybe we have more keys nested, so search for termination character. 1345 * Possible choices: 1346 * 1) key1 key2 ... keyN [:=] value <- we treat that as error 1347 * 2) key1 ... keyN {} or [] <- we treat that as nested objects 1348 * 3) key1 value[;,\n] <- we treat that as linear object 1349 */ 1350 t = p; 1351 *next_key = false; 1352 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) { 1353 t ++; 1354 } 1355 /* Check first non-space character after a key */ 1356 if (*t != '{' && *t != '[') { 1357 while (t < chunk->end) { 1358 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') { 1359 break; 1360 } 1361 else if (*t == '{' || *t == '[') { 1362 *next_key = true; 1363 break; 1364 } 1365 t ++; 1366 } 1367 } 1368 } 1369 1370 /* Create a new object */ 1371 nobj = ucl_object_new_full (UCL_NULL, parser->chunks->priority); 1372 keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY], 1373 &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false); 1374 if (keylen == -1) { 1375 ucl_object_unref (nobj); 1376 return false; 1377 } 1378 else if (keylen == 0) { 1379 ucl_set_err (parser, UCL_ESYNTAX, "empty keys are not allowed", &parser->err); 1380 ucl_object_unref (nobj); 1381 return false; 1382 } 1383 1384 nobj->key = key; 1385 nobj->keylen = keylen; 1386 1387 if (!ucl_parser_process_object_element (parser, nobj)) { 1388 return false; 1389 } 1390 1391 if (ucl_escape) { 1392 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE; 1393 } 1394 1395 1396 return true; 1397 } 1398 1399 /** 1400 * Parse a cl string 1401 * @param parser 1402 * @param chunk 1403 * @param var_expand 1404 * @param need_unescape 1405 * @return true if a key has been parsed 1406 */ 1407 static bool 1408 ucl_parse_string_value (struct ucl_parser *parser, 1409 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape) 1410 { 1411 const unsigned char *p; 1412 enum { 1413 UCL_BRACE_ROUND = 0, 1414 UCL_BRACE_SQUARE, 1415 UCL_BRACE_FIGURE 1416 }; 1417 int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}}; 1418 1419 p = chunk->pos; 1420 1421 while (p < chunk->end) { 1422 1423 /* Skip pairs of figure braces */ 1424 if (*p == '{') { 1425 braces[UCL_BRACE_FIGURE][0] ++; 1426 } 1427 else if (*p == '}') { 1428 braces[UCL_BRACE_FIGURE][1] ++; 1429 if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) { 1430 /* This is not a termination symbol, continue */ 1431 ucl_chunk_skipc (chunk, p); 1432 continue; 1433 } 1434 } 1435 /* Skip pairs of square braces */ 1436 else if (*p == '[') { 1437 braces[UCL_BRACE_SQUARE][0] ++; 1438 } 1439 else if (*p == ']') { 1440 braces[UCL_BRACE_SQUARE][1] ++; 1441 if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) { 1442 /* This is not a termination symbol, continue */ 1443 ucl_chunk_skipc (chunk, p); 1444 continue; 1445 } 1446 } 1447 else if (*p == '$') { 1448 *var_expand = true; 1449 } 1450 else if (*p == '\\') { 1451 *need_unescape = true; 1452 ucl_chunk_skipc (chunk, p); 1453 if (p < chunk->end) { 1454 ucl_chunk_skipc (chunk, p); 1455 } 1456 continue; 1457 } 1458 1459 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { 1460 break; 1461 } 1462 ucl_chunk_skipc (chunk, p); 1463 } 1464 1465 return true; 1466 } 1467 1468 /** 1469 * Parse multiline string ending with \n{term}\n 1470 * @param parser 1471 * @param chunk 1472 * @param term 1473 * @param term_len 1474 * @param beg 1475 * @param var_expand 1476 * @return size of multiline string or 0 in case of error 1477 */ 1478 static int 1479 ucl_parse_multiline_string (struct ucl_parser *parser, 1480 struct ucl_chunk *chunk, const unsigned char *term, 1481 int term_len, unsigned char const **beg, 1482 bool *var_expand) 1483 { 1484 const unsigned char *p, *c, *tend; 1485 bool newline = false; 1486 int len = 0; 1487 1488 p = chunk->pos; 1489 1490 c = p; 1491 1492 while (p < chunk->end) { 1493 if (newline) { 1494 if (chunk->end - p < term_len) { 1495 return 0; 1496 } 1497 else if (memcmp (p, term, term_len) == 0) { 1498 tend = p + term_len; 1499 if (*tend != '\n' && *tend != ';' && *tend != ',') { 1500 /* Incomplete terminator */ 1501 ucl_chunk_skipc (chunk, p); 1502 continue; 1503 } 1504 len = p - c; 1505 chunk->remain -= term_len; 1506 chunk->pos = p + term_len; 1507 chunk->column = term_len; 1508 *beg = c; 1509 break; 1510 } 1511 } 1512 if (*p == '\n') { 1513 newline = true; 1514 } 1515 else { 1516 if (*p == '$') { 1517 *var_expand = true; 1518 } 1519 newline = false; 1520 } 1521 ucl_chunk_skipc (chunk, p); 1522 } 1523 1524 return len; 1525 } 1526 1527 static inline ucl_object_t* 1528 ucl_parser_get_container (struct ucl_parser *parser) 1529 { 1530 ucl_object_t *t, *obj = NULL; 1531 1532 if (parser == NULL || parser->stack == NULL || parser->stack->obj == NULL) { 1533 return NULL; 1534 } 1535 1536 if (parser->stack->obj->type == UCL_ARRAY) { 1537 /* Object must be allocated */ 1538 obj = ucl_object_new_full (UCL_NULL, parser->chunks->priority); 1539 t = parser->stack->obj; 1540 1541 if (!ucl_array_append (t, obj)) { 1542 ucl_object_unref (obj); 1543 return NULL; 1544 } 1545 1546 parser->cur_obj = obj; 1547 ucl_attach_comment (parser, obj, false); 1548 } 1549 else { 1550 /* Object has been already allocated */ 1551 obj = parser->cur_obj; 1552 } 1553 1554 return obj; 1555 } 1556 1557 /** 1558 * Handle value data 1559 * @param parser 1560 * @param chunk 1561 * @return 1562 */ 1563 static bool 1564 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) 1565 { 1566 const unsigned char *p, *c; 1567 ucl_object_t *obj = NULL; 1568 unsigned int stripped_spaces; 1569 int str_len; 1570 bool need_unescape = false, ucl_escape = false, var_expand = false; 1571 1572 p = chunk->pos; 1573 1574 /* Skip any spaces and comments */ 1575 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) || 1576 (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { 1577 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1578 ucl_chunk_skipc (chunk, p); 1579 } 1580 if (!ucl_skip_comments (parser)) { 1581 return false; 1582 } 1583 p = chunk->pos; 1584 } 1585 1586 while (p < chunk->end) { 1587 c = p; 1588 switch (*p) { 1589 case '"': 1590 ucl_chunk_skipc (chunk, p); 1591 1592 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, 1593 &var_expand)) { 1594 return false; 1595 } 1596 1597 obj = ucl_parser_get_container (parser); 1598 if (!obj) { 1599 return false; 1600 } 1601 1602 str_len = chunk->pos - c - 2; 1603 obj->type = UCL_STRING; 1604 if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, 1605 &obj->trash_stack[UCL_TRASH_VALUE], 1606 &obj->value.sv, str_len, need_unescape, false, 1607 var_expand)) == -1) { 1608 return false; 1609 } 1610 obj->len = str_len; 1611 1612 parser->state = UCL_STATE_AFTER_VALUE; 1613 p = chunk->pos; 1614 1615 return true; 1616 break; 1617 case '{': 1618 obj = ucl_parser_get_container (parser); 1619 /* We have a new object */ 1620 obj = ucl_parser_add_container (obj, parser, false, parser->stack->level); 1621 if (obj == NULL) { 1622 return false; 1623 } 1624 1625 ucl_chunk_skipc (chunk, p); 1626 1627 return true; 1628 break; 1629 case '[': 1630 obj = ucl_parser_get_container (parser); 1631 /* We have a new array */ 1632 obj = ucl_parser_add_container (obj, parser, true, parser->stack->level); 1633 if (obj == NULL) { 1634 return false; 1635 } 1636 1637 ucl_chunk_skipc (chunk, p); 1638 1639 return true; 1640 break; 1641 case ']': 1642 /* We have the array ending */ 1643 if (parser->stack && parser->stack->obj->type == UCL_ARRAY) { 1644 parser->state = UCL_STATE_AFTER_VALUE; 1645 return true; 1646 } 1647 else { 1648 goto parse_string; 1649 } 1650 break; 1651 case '<': 1652 obj = ucl_parser_get_container (parser); 1653 /* We have something like multiline value, which must be <<[A-Z]+\n */ 1654 if (chunk->end - p > 3) { 1655 if (memcmp (p, "<<", 2) == 0) { 1656 p += 2; 1657 /* We allow only uppercase characters in multiline definitions */ 1658 while (p < chunk->end && *p >= 'A' && *p <= 'Z') { 1659 p ++; 1660 } 1661 if (*p =='\n') { 1662 /* Set chunk positions and start multiline parsing */ 1663 c += 2; 1664 chunk->remain -= p - c; 1665 chunk->pos = p + 1; 1666 chunk->column = 0; 1667 chunk->line ++; 1668 if ((str_len = ucl_parse_multiline_string (parser, chunk, c, 1669 p - c, &c, &var_expand)) == 0) { 1670 ucl_set_err (parser, UCL_ESYNTAX, 1671 "unterminated multiline value", &parser->err); 1672 return false; 1673 } 1674 1675 obj->type = UCL_STRING; 1676 obj->flags |= UCL_OBJECT_MULTILINE; 1677 if ((str_len = ucl_copy_or_store_ptr (parser, c, 1678 &obj->trash_stack[UCL_TRASH_VALUE], 1679 &obj->value.sv, str_len - 1, false, 1680 false, var_expand)) == -1) { 1681 return false; 1682 } 1683 obj->len = str_len; 1684 1685 parser->state = UCL_STATE_AFTER_VALUE; 1686 1687 return true; 1688 } 1689 } 1690 } 1691 /* Fallback to ordinary strings */ 1692 default: 1693 parse_string: 1694 if (obj == NULL) { 1695 obj = ucl_parser_get_container (parser); 1696 } 1697 1698 /* Parse atom */ 1699 if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) { 1700 if (!ucl_lex_number (parser, chunk, obj)) { 1701 if (parser->state == UCL_STATE_ERROR) { 1702 return false; 1703 } 1704 } 1705 else { 1706 parser->state = UCL_STATE_AFTER_VALUE; 1707 return true; 1708 } 1709 /* Fallback to normal string */ 1710 } 1711 1712 if (!ucl_parse_string_value (parser, chunk, &var_expand, 1713 &need_unescape)) { 1714 return false; 1715 } 1716 /* Cut trailing spaces */ 1717 stripped_spaces = 0; 1718 while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces), 1719 UCL_CHARACTER_WHITESPACE)) { 1720 stripped_spaces ++; 1721 } 1722 str_len = chunk->pos - c - stripped_spaces; 1723 if (str_len <= 0) { 1724 ucl_set_err (parser, UCL_ESYNTAX, "string value must not be empty", 1725 &parser->err); 1726 return false; 1727 } 1728 else if (str_len == 4 && memcmp (c, "null", 4) == 0) { 1729 obj->len = 0; 1730 obj->type = UCL_NULL; 1731 } 1732 else if (!ucl_maybe_parse_boolean (obj, c, str_len)) { 1733 obj->type = UCL_STRING; 1734 if ((str_len = ucl_copy_or_store_ptr (parser, c, 1735 &obj->trash_stack[UCL_TRASH_VALUE], 1736 &obj->value.sv, str_len, need_unescape, 1737 false, var_expand)) == -1) { 1738 return false; 1739 } 1740 obj->len = str_len; 1741 } 1742 parser->state = UCL_STATE_AFTER_VALUE; 1743 p = chunk->pos; 1744 1745 return true; 1746 break; 1747 } 1748 } 1749 1750 return true; 1751 } 1752 1753 /** 1754 * Handle after value data 1755 * @param parser 1756 * @param chunk 1757 * @return 1758 */ 1759 static bool 1760 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk) 1761 { 1762 const unsigned char *p; 1763 bool got_sep = false; 1764 struct ucl_stack *st; 1765 1766 p = chunk->pos; 1767 1768 while (p < chunk->end) { 1769 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1770 /* Skip whitespaces */ 1771 ucl_chunk_skipc (chunk, p); 1772 } 1773 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1774 /* Skip comment */ 1775 if (!ucl_skip_comments (parser)) { 1776 return false; 1777 } 1778 /* Treat comment as a separator */ 1779 got_sep = true; 1780 p = chunk->pos; 1781 } 1782 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) { 1783 if (*p == '}' || *p == ']') { 1784 if (parser->stack == NULL) { 1785 ucl_set_err (parser, UCL_ESYNTAX, 1786 "end of array or object detected without corresponding start", 1787 &parser->err); 1788 return false; 1789 } 1790 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) || 1791 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) { 1792 1793 /* Pop all nested objects from a stack */ 1794 st = parser->stack; 1795 parser->stack = st->next; 1796 UCL_FREE (sizeof (struct ucl_stack), st); 1797 1798 if (parser->cur_obj) { 1799 ucl_attach_comment (parser, parser->cur_obj, true); 1800 } 1801 1802 while (parser->stack != NULL) { 1803 st = parser->stack; 1804 1805 if (st->next == NULL || st->next->level == st->level) { 1806 break; 1807 } 1808 1809 parser->stack = st->next; 1810 parser->cur_obj = st->obj; 1811 UCL_FREE (sizeof (struct ucl_stack), st); 1812 } 1813 } 1814 else { 1815 ucl_set_err (parser, UCL_ESYNTAX, 1816 "unexpected terminating symbol detected", 1817 &parser->err); 1818 return false; 1819 } 1820 1821 if (parser->stack == NULL) { 1822 /* Ignore everything after a top object */ 1823 return true; 1824 } 1825 else { 1826 ucl_chunk_skipc (chunk, p); 1827 } 1828 got_sep = true; 1829 } 1830 else { 1831 /* Got a separator */ 1832 got_sep = true; 1833 ucl_chunk_skipc (chunk, p); 1834 } 1835 } 1836 else { 1837 /* Anything else */ 1838 if (!got_sep) { 1839 ucl_set_err (parser, UCL_ESYNTAX, "delimiter is missing", 1840 &parser->err); 1841 return false; 1842 } 1843 return true; 1844 } 1845 } 1846 1847 return true; 1848 } 1849 1850 static bool 1851 ucl_skip_macro_as_comment (struct ucl_parser *parser, 1852 struct ucl_chunk *chunk) 1853 { 1854 const unsigned char *p, *c; 1855 enum { 1856 macro_skip_start = 0, 1857 macro_has_symbols, 1858 macro_has_obrace, 1859 macro_has_quote, 1860 macro_has_backslash, 1861 macro_has_sqbrace, 1862 macro_save 1863 } state = macro_skip_start, prev_state = macro_skip_start; 1864 1865 p = chunk->pos; 1866 c = chunk->pos; 1867 1868 while (p < chunk->end) { 1869 switch (state) { 1870 case macro_skip_start: 1871 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1872 state = macro_has_symbols; 1873 } 1874 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1875 state = macro_save; 1876 continue; 1877 } 1878 1879 ucl_chunk_skipc (chunk, p); 1880 break; 1881 1882 case macro_has_symbols: 1883 if (*p == '{') { 1884 state = macro_has_sqbrace; 1885 } 1886 else if (*p == '(') { 1887 state = macro_has_obrace; 1888 } 1889 else if (*p == '"') { 1890 state = macro_has_quote; 1891 } 1892 else if (*p == '\n') { 1893 state = macro_save; 1894 continue; 1895 } 1896 1897 ucl_chunk_skipc (chunk, p); 1898 break; 1899 1900 case macro_has_obrace: 1901 if (*p == '\\') { 1902 prev_state = state; 1903 state = macro_has_backslash; 1904 } 1905 else if (*p == ')') { 1906 state = macro_has_symbols; 1907 } 1908 1909 ucl_chunk_skipc (chunk, p); 1910 break; 1911 1912 case macro_has_sqbrace: 1913 if (*p == '\\') { 1914 prev_state = state; 1915 state = macro_has_backslash; 1916 } 1917 else if (*p == '}') { 1918 state = macro_save; 1919 } 1920 1921 ucl_chunk_skipc (chunk, p); 1922 break; 1923 1924 case macro_has_quote: 1925 if (*p == '\\') { 1926 prev_state = state; 1927 state = macro_has_backslash; 1928 } 1929 else if (*p == '"') { 1930 state = macro_save; 1931 } 1932 1933 ucl_chunk_skipc (chunk, p); 1934 break; 1935 1936 case macro_has_backslash: 1937 state = prev_state; 1938 ucl_chunk_skipc (chunk, p); 1939 break; 1940 1941 case macro_save: 1942 if (parser->flags & UCL_PARSER_SAVE_COMMENTS) { 1943 ucl_save_comment (parser, c, p - c); 1944 } 1945 1946 return true; 1947 } 1948 } 1949 1950 return false; 1951 } 1952 1953 /** 1954 * Handle macro data 1955 * @param parser 1956 * @param chunk 1957 * @param marco 1958 * @param macro_start 1959 * @param macro_len 1960 * @return 1961 */ 1962 static bool 1963 ucl_parse_macro_value (struct ucl_parser *parser, 1964 struct ucl_chunk *chunk, struct ucl_macro *macro, 1965 unsigned char const **macro_start, size_t *macro_len) 1966 { 1967 const unsigned char *p, *c; 1968 bool need_unescape = false, ucl_escape = false, var_expand = false; 1969 1970 p = chunk->pos; 1971 1972 switch (*p) { 1973 case '"': 1974 /* We have macro value encoded in quotes */ 1975 c = p; 1976 ucl_chunk_skipc (chunk, p); 1977 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1978 return false; 1979 } 1980 1981 *macro_start = c + 1; 1982 *macro_len = chunk->pos - c - 2; 1983 p = chunk->pos; 1984 break; 1985 case '{': 1986 /* We got a multiline macro body */ 1987 ucl_chunk_skipc (chunk, p); 1988 /* Skip spaces at the beginning */ 1989 while (p < chunk->end) { 1990 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1991 ucl_chunk_skipc (chunk, p); 1992 } 1993 else { 1994 break; 1995 } 1996 } 1997 c = p; 1998 while (p < chunk->end) { 1999 if (*p == '}') { 2000 break; 2001 } 2002 ucl_chunk_skipc (chunk, p); 2003 } 2004 *macro_start = c; 2005 *macro_len = p - c; 2006 ucl_chunk_skipc (chunk, p); 2007 break; 2008 default: 2009 /* Macro is not enclosed in quotes or braces */ 2010 c = p; 2011 while (p < chunk->end) { 2012 if (ucl_lex_is_atom_end (*p)) { 2013 break; 2014 } 2015 ucl_chunk_skipc (chunk, p); 2016 } 2017 *macro_start = c; 2018 *macro_len = p - c; 2019 break; 2020 } 2021 2022 /* We are at the end of a macro */ 2023 /* Skip ';' and space characters and return to previous state */ 2024 while (p < chunk->end) { 2025 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') { 2026 break; 2027 } 2028 ucl_chunk_skipc (chunk, p); 2029 } 2030 return true; 2031 } 2032 2033 /** 2034 * Parse macro arguments as UCL object 2035 * @param parser parser structure 2036 * @param chunk the current data chunk 2037 * @return 2038 */ 2039 static ucl_object_t * 2040 ucl_parse_macro_arguments (struct ucl_parser *parser, 2041 struct ucl_chunk *chunk) 2042 { 2043 ucl_object_t *res = NULL; 2044 struct ucl_parser *params_parser; 2045 int obraces = 1, ebraces = 0, state = 0; 2046 const unsigned char *p, *c; 2047 size_t args_len = 0; 2048 struct ucl_parser_saved_state saved; 2049 2050 saved.column = chunk->column; 2051 saved.line = chunk->line; 2052 saved.pos = chunk->pos; 2053 saved.remain = chunk->remain; 2054 p = chunk->pos; 2055 2056 if (*p != '(' || chunk->remain < 2) { 2057 return NULL; 2058 } 2059 2060 /* Set begin and start */ 2061 ucl_chunk_skipc (chunk, p); 2062 c = p; 2063 2064 while ((p) < (chunk)->end) { 2065 switch (state) { 2066 case 0: 2067 /* Parse symbols and check for '(', ')' and '"' */ 2068 if (*p == '(') { 2069 obraces ++; 2070 } 2071 else if (*p == ')') { 2072 ebraces ++; 2073 } 2074 else if (*p == '"') { 2075 state = 1; 2076 } 2077 /* Check pairing */ 2078 if (obraces == ebraces) { 2079 state = 99; 2080 } 2081 else { 2082 args_len ++; 2083 } 2084 /* Check overflow */ 2085 if (chunk->remain == 0) { 2086 goto restore_chunk; 2087 } 2088 ucl_chunk_skipc (chunk, p); 2089 break; 2090 case 1: 2091 /* We have quote character, so skip all but quotes */ 2092 if (*p == '"' && *(p - 1) != '\\') { 2093 state = 0; 2094 } 2095 if (chunk->remain == 0) { 2096 goto restore_chunk; 2097 } 2098 args_len ++; 2099 ucl_chunk_skipc (chunk, p); 2100 break; 2101 case 99: 2102 /* 2103 * We have read the full body of arguments, so we need to parse and set 2104 * object from that 2105 */ 2106 params_parser = ucl_parser_new (parser->flags); 2107 if (!ucl_parser_add_chunk (params_parser, c, args_len)) { 2108 ucl_set_err (parser, UCL_ESYNTAX, "macro arguments parsing error", 2109 &parser->err); 2110 } 2111 else { 2112 res = ucl_parser_get_object (params_parser); 2113 } 2114 ucl_parser_free (params_parser); 2115 2116 return res; 2117 2118 break; 2119 } 2120 } 2121 2122 return res; 2123 2124 restore_chunk: 2125 chunk->column = saved.column; 2126 chunk->line = saved.line; 2127 chunk->pos = saved.pos; 2128 chunk->remain = saved.remain; 2129 2130 return NULL; 2131 } 2132 2133 #define SKIP_SPACES_COMMENTS(parser, chunk, p) do { \ 2134 while ((p) < (chunk)->end) { \ 2135 if (!ucl_test_character (*(p), UCL_CHARACTER_WHITESPACE_UNSAFE)) { \ 2136 if ((chunk)->remain >= 2 && ucl_lex_is_comment ((p)[0], (p)[1])) { \ 2137 if (!ucl_skip_comments (parser)) { \ 2138 return false; \ 2139 } \ 2140 p = (chunk)->pos; \ 2141 } \ 2142 break; \ 2143 } \ 2144 ucl_chunk_skipc (chunk, p); \ 2145 } \ 2146 } while(0) 2147 2148 /** 2149 * Handle the main states of rcl parser 2150 * @param parser parser structure 2151 * @return true if chunk has been parsed and false in case of error 2152 */ 2153 static bool 2154 ucl_state_machine (struct ucl_parser *parser) 2155 { 2156 ucl_object_t *obj, *macro_args; 2157 struct ucl_chunk *chunk = parser->chunks; 2158 const unsigned char *p, *c = NULL, *macro_start = NULL; 2159 unsigned char *macro_escaped; 2160 size_t macro_len = 0; 2161 struct ucl_macro *macro = NULL; 2162 bool next_key = false, end_of_object = false, ret; 2163 2164 if (parser->top_obj == NULL) { 2165 parser->state = UCL_STATE_INIT; 2166 } 2167 2168 p = chunk->pos; 2169 while (chunk->pos < chunk->end) { 2170 switch (parser->state) { 2171 case UCL_STATE_INIT: 2172 /* 2173 * At the init state we can either go to the parse array or object 2174 * if we got [ or { correspondingly or can just treat new data as 2175 * a key of newly created object 2176 */ 2177 if (!ucl_skip_comments (parser)) { 2178 parser->prev_state = parser->state; 2179 parser->state = UCL_STATE_ERROR; 2180 return false; 2181 } 2182 else { 2183 /* Skip any spaces */ 2184 while (p < chunk->end && ucl_test_character (*p, 2185 UCL_CHARACTER_WHITESPACE_UNSAFE)) { 2186 ucl_chunk_skipc (chunk, p); 2187 } 2188 2189 p = chunk->pos; 2190 2191 if (*p == '[') { 2192 parser->state = UCL_STATE_VALUE; 2193 ucl_chunk_skipc (chunk, p); 2194 } 2195 else { 2196 parser->state = UCL_STATE_KEY; 2197 if (*p == '{') { 2198 ucl_chunk_skipc (chunk, p); 2199 } 2200 } 2201 2202 if (parser->top_obj == NULL) { 2203 if (parser->state == UCL_STATE_VALUE) { 2204 obj = ucl_parser_add_container (NULL, parser, true, 0); 2205 } 2206 else { 2207 obj = ucl_parser_add_container (NULL, parser, false, 0); 2208 } 2209 2210 if (obj == NULL) { 2211 return false; 2212 } 2213 2214 parser->top_obj = obj; 2215 parser->cur_obj = obj; 2216 } 2217 2218 } 2219 break; 2220 case UCL_STATE_KEY: 2221 /* Skip any spaces */ 2222 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 2223 ucl_chunk_skipc (chunk, p); 2224 } 2225 if (p == chunk->end || *p == '}') { 2226 /* We have the end of an object */ 2227 parser->state = UCL_STATE_AFTER_VALUE; 2228 continue; 2229 } 2230 if (parser->stack == NULL) { 2231 /* No objects are on stack, but we want to parse a key */ 2232 ucl_set_err (parser, UCL_ESYNTAX, "top object is finished but the parser " 2233 "expects a key", &parser->err); 2234 parser->prev_state = parser->state; 2235 parser->state = UCL_STATE_ERROR; 2236 return false; 2237 } 2238 if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) { 2239 parser->prev_state = parser->state; 2240 parser->state = UCL_STATE_ERROR; 2241 return false; 2242 } 2243 if (end_of_object) { 2244 p = chunk->pos; 2245 parser->state = UCL_STATE_AFTER_VALUE; 2246 continue; 2247 } 2248 else if (parser->state != UCL_STATE_MACRO_NAME) { 2249 if (next_key && parser->stack->obj->type == UCL_OBJECT) { 2250 /* Parse more keys and nest objects accordingly */ 2251 obj = ucl_parser_add_container (parser->cur_obj, parser, false, 2252 parser->stack->level + 1); 2253 if (obj == NULL) { 2254 return false; 2255 } 2256 } 2257 else { 2258 parser->state = UCL_STATE_VALUE; 2259 } 2260 } 2261 else { 2262 c = chunk->pos; 2263 } 2264 p = chunk->pos; 2265 break; 2266 case UCL_STATE_VALUE: 2267 /* We need to check what we do have */ 2268 if (!parser->cur_obj || !ucl_parse_value (parser, chunk)) { 2269 parser->prev_state = parser->state; 2270 parser->state = UCL_STATE_ERROR; 2271 return false; 2272 } 2273 /* State is set in ucl_parse_value call */ 2274 p = chunk->pos; 2275 break; 2276 case UCL_STATE_AFTER_VALUE: 2277 if (!ucl_parse_after_value (parser, chunk)) { 2278 parser->prev_state = parser->state; 2279 parser->state = UCL_STATE_ERROR; 2280 return false; 2281 } 2282 2283 if (parser->stack != NULL) { 2284 if (parser->stack->obj->type == UCL_OBJECT) { 2285 parser->state = UCL_STATE_KEY; 2286 } 2287 else { 2288 /* Array */ 2289 parser->state = UCL_STATE_VALUE; 2290 } 2291 } 2292 else { 2293 /* Skip everything at the end */ 2294 return true; 2295 } 2296 2297 p = chunk->pos; 2298 break; 2299 case UCL_STATE_MACRO_NAME: 2300 if (parser->flags & UCL_PARSER_DISABLE_MACRO) { 2301 if (!ucl_skip_macro_as_comment (parser, chunk)) { 2302 /* We have invalid macro */ 2303 ucl_create_err (&parser->err, 2304 "error on line %d at column %d: invalid macro", 2305 chunk->line, 2306 chunk->column); 2307 parser->state = UCL_STATE_ERROR; 2308 return false; 2309 } 2310 else { 2311 p = chunk->pos; 2312 parser->state = parser->prev_state; 2313 } 2314 } 2315 else { 2316 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && 2317 *p != '(') { 2318 ucl_chunk_skipc (chunk, p); 2319 } 2320 else { 2321 if (c != NULL && p - c > 0) { 2322 /* We got macro name */ 2323 macro_len = (size_t) (p - c); 2324 HASH_FIND (hh, parser->macroes, c, macro_len, macro); 2325 if (macro == NULL) { 2326 ucl_create_err (&parser->err, 2327 "error on line %d at column %d: " 2328 "unknown macro: '%.*s', character: '%c'", 2329 chunk->line, 2330 chunk->column, 2331 (int) (p - c), 2332 c, 2333 *chunk->pos); 2334 parser->state = UCL_STATE_ERROR; 2335 return false; 2336 } 2337 /* Now we need to skip all spaces */ 2338 SKIP_SPACES_COMMENTS(parser, chunk, p); 2339 parser->state = UCL_STATE_MACRO; 2340 } 2341 else { 2342 /* We have invalid macro name */ 2343 ucl_create_err (&parser->err, 2344 "error on line %d at column %d: invalid macro name", 2345 chunk->line, 2346 chunk->column); 2347 parser->state = UCL_STATE_ERROR; 2348 return false; 2349 } 2350 } 2351 } 2352 break; 2353 case UCL_STATE_MACRO: 2354 if (*chunk->pos == '(') { 2355 macro_args = ucl_parse_macro_arguments (parser, chunk); 2356 p = chunk->pos; 2357 if (macro_args) { 2358 SKIP_SPACES_COMMENTS(parser, chunk, p); 2359 } 2360 } 2361 else { 2362 macro_args = NULL; 2363 } 2364 if (!ucl_parse_macro_value (parser, chunk, macro, 2365 ¯o_start, ¯o_len)) { 2366 parser->prev_state = parser->state; 2367 parser->state = UCL_STATE_ERROR; 2368 return false; 2369 } 2370 macro_len = ucl_expand_variable (parser, ¯o_escaped, 2371 macro_start, macro_len); 2372 parser->state = parser->prev_state; 2373 2374 if (macro_escaped == NULL && macro != NULL) { 2375 if (macro->is_context) { 2376 ret = macro->h.context_handler (macro_start, macro_len, 2377 macro_args, 2378 parser->top_obj, 2379 macro->ud); 2380 } 2381 else { 2382 ret = macro->h.handler (macro_start, macro_len, macro_args, 2383 macro->ud); 2384 } 2385 } 2386 else if (macro != NULL) { 2387 if (macro->is_context) { 2388 ret = macro->h.context_handler (macro_escaped, macro_len, 2389 macro_args, 2390 parser->top_obj, 2391 macro->ud); 2392 } 2393 else { 2394 ret = macro->h.handler (macro_escaped, macro_len, macro_args, 2395 macro->ud); 2396 } 2397 2398 UCL_FREE (macro_len + 1, macro_escaped); 2399 } 2400 else { 2401 ret = false; 2402 ucl_set_err (parser, UCL_EINTERNAL, 2403 "internal error: parser has macro undefined", &parser->err); 2404 } 2405 2406 /* 2407 * Chunk can be modified within macro handler 2408 */ 2409 chunk = parser->chunks; 2410 p = chunk->pos; 2411 2412 if (macro_args) { 2413 ucl_object_unref (macro_args); 2414 } 2415 2416 if (!ret) { 2417 return false; 2418 } 2419 break; 2420 default: 2421 ucl_set_err (parser, UCL_EINTERNAL, 2422 "internal error: parser is in an unknown state", &parser->err); 2423 parser->state = UCL_STATE_ERROR; 2424 return false; 2425 } 2426 } 2427 2428 if (parser->last_comment) { 2429 if (parser->cur_obj) { 2430 ucl_attach_comment (parser, parser->cur_obj, true); 2431 } 2432 else if (parser->stack && parser->stack->obj) { 2433 ucl_attach_comment (parser, parser->stack->obj, true); 2434 } 2435 else if (parser->top_obj) { 2436 ucl_attach_comment (parser, parser->top_obj, true); 2437 } 2438 else { 2439 ucl_object_unref (parser->last_comment); 2440 } 2441 } 2442 2443 return true; 2444 } 2445 2446 struct ucl_parser* 2447 ucl_parser_new (int flags) 2448 { 2449 struct ucl_parser *parser; 2450 2451 parser = UCL_ALLOC (sizeof (struct ucl_parser)); 2452 if (parser == NULL) { 2453 return NULL; 2454 } 2455 2456 memset (parser, 0, sizeof (struct ucl_parser)); 2457 2458 ucl_parser_register_macro (parser, "include", ucl_include_handler, parser); 2459 ucl_parser_register_macro (parser, "try_include", ucl_try_include_handler, parser); 2460 ucl_parser_register_macro (parser, "includes", ucl_includes_handler, parser); 2461 ucl_parser_register_macro (parser, "priority", ucl_priority_handler, parser); 2462 ucl_parser_register_macro (parser, "load", ucl_load_handler, parser); 2463 ucl_parser_register_context_macro (parser, "inherit", ucl_inherit_handler, parser); 2464 2465 parser->flags = flags; 2466 parser->includepaths = NULL; 2467 2468 if (flags & UCL_PARSER_SAVE_COMMENTS) { 2469 parser->comments = ucl_object_typed_new (UCL_OBJECT); 2470 } 2471 2472 if (!(flags & UCL_PARSER_NO_FILEVARS)) { 2473 /* Initial assumption about filevars */ 2474 ucl_parser_set_filevars (parser, NULL, false); 2475 } 2476 2477 return parser; 2478 } 2479 2480 bool 2481 ucl_parser_set_default_priority (struct ucl_parser *parser, unsigned prio) 2482 { 2483 if (parser == NULL) { 2484 return false; 2485 } 2486 2487 parser->default_priority = prio; 2488 2489 return true; 2490 } 2491 2492 void 2493 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro, 2494 ucl_macro_handler handler, void* ud) 2495 { 2496 struct ucl_macro *new; 2497 2498 if (macro == NULL || handler == NULL) { 2499 return; 2500 } 2501 2502 new = UCL_ALLOC (sizeof (struct ucl_macro)); 2503 if (new == NULL) { 2504 return; 2505 } 2506 2507 memset (new, 0, sizeof (struct ucl_macro)); 2508 new->h.handler = handler; 2509 new->name = strdup (macro); 2510 new->ud = ud; 2511 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new); 2512 } 2513 2514 void 2515 ucl_parser_register_context_macro (struct ucl_parser *parser, const char *macro, 2516 ucl_context_macro_handler handler, void* ud) 2517 { 2518 struct ucl_macro *new; 2519 2520 if (macro == NULL || handler == NULL) { 2521 return; 2522 } 2523 2524 new = UCL_ALLOC (sizeof (struct ucl_macro)); 2525 if (new == NULL) { 2526 return; 2527 } 2528 2529 memset (new, 0, sizeof (struct ucl_macro)); 2530 new->h.context_handler = handler; 2531 new->name = strdup (macro); 2532 new->ud = ud; 2533 new->is_context = true; 2534 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new); 2535 } 2536 2537 void 2538 ucl_parser_register_variable (struct ucl_parser *parser, const char *var, 2539 const char *value) 2540 { 2541 struct ucl_variable *new = NULL, *cur; 2542 2543 if (var == NULL) { 2544 return; 2545 } 2546 2547 /* Find whether a variable already exists */ 2548 LL_FOREACH (parser->variables, cur) { 2549 if (strcmp (cur->var, var) == 0) { 2550 new = cur; 2551 break; 2552 } 2553 } 2554 2555 if (value == NULL) { 2556 2557 if (new != NULL) { 2558 /* Remove variable */ 2559 DL_DELETE (parser->variables, new); 2560 free (new->var); 2561 free (new->value); 2562 UCL_FREE (sizeof (struct ucl_variable), new); 2563 } 2564 else { 2565 /* Do nothing */ 2566 return; 2567 } 2568 } 2569 else { 2570 if (new == NULL) { 2571 new = UCL_ALLOC (sizeof (struct ucl_variable)); 2572 if (new == NULL) { 2573 return; 2574 } 2575 memset (new, 0, sizeof (struct ucl_variable)); 2576 new->var = strdup (var); 2577 new->var_len = strlen (var); 2578 new->value = strdup (value); 2579 new->value_len = strlen (value); 2580 2581 DL_APPEND (parser->variables, new); 2582 } 2583 else { 2584 free (new->value); 2585 new->value = strdup (value); 2586 new->value_len = strlen (value); 2587 } 2588 } 2589 } 2590 2591 void 2592 ucl_parser_set_variables_handler (struct ucl_parser *parser, 2593 ucl_variable_handler handler, void *ud) 2594 { 2595 parser->var_handler = handler; 2596 parser->var_data = ud; 2597 } 2598 2599 bool 2600 ucl_parser_add_chunk_full (struct ucl_parser *parser, const unsigned char *data, 2601 size_t len, unsigned priority, enum ucl_duplicate_strategy strat, 2602 enum ucl_parse_type parse_type) 2603 { 2604 struct ucl_chunk *chunk; 2605 2606 if (parser == NULL) { 2607 return false; 2608 } 2609 2610 if (data == NULL && len != 0) { 2611 ucl_create_err (&parser->err, "invalid chunk added"); 2612 return false; 2613 } 2614 2615 if (parser->state != UCL_STATE_ERROR) { 2616 chunk = UCL_ALLOC (sizeof (struct ucl_chunk)); 2617 if (chunk == NULL) { 2618 ucl_create_err (&parser->err, "cannot allocate chunk structure"); 2619 return false; 2620 } 2621 2622 if (parse_type == UCL_PARSE_AUTO && len > 0) { 2623 /* We need to detect parse type by the first symbol */ 2624 if ((*data & 0x80) == 0x80 && (*data >= 0xdc && *data <= 0xdf)) { 2625 parse_type = UCL_PARSE_MSGPACK; 2626 } 2627 else if (*data == '(') { 2628 parse_type = UCL_PARSE_CSEXP; 2629 } 2630 else { 2631 parse_type = UCL_PARSE_UCL; 2632 } 2633 } 2634 2635 chunk->begin = data; 2636 chunk->remain = len; 2637 chunk->pos = chunk->begin; 2638 chunk->end = chunk->begin + len; 2639 chunk->line = 1; 2640 chunk->column = 0; 2641 chunk->priority = priority; 2642 chunk->strategy = strat; 2643 chunk->parse_type = parse_type; 2644 LL_PREPEND (parser->chunks, chunk); 2645 parser->recursion ++; 2646 2647 if (parser->recursion > UCL_MAX_RECURSION) { 2648 ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d", 2649 parser->recursion); 2650 return false; 2651 } 2652 2653 if (len > 0) { 2654 /* Need to parse something */ 2655 switch (parse_type) { 2656 default: 2657 case UCL_PARSE_UCL: 2658 return ucl_state_machine (parser); 2659 case UCL_PARSE_MSGPACK: 2660 return ucl_parse_msgpack (parser); 2661 case UCL_PARSE_CSEXP: 2662 return ucl_parse_csexp (parser); 2663 } 2664 } 2665 else { 2666 /* Just add empty chunk and go forward */ 2667 if (parser->top_obj == NULL) { 2668 /* 2669 * In case of empty object, create one to indicate that we've 2670 * read something 2671 */ 2672 parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority); 2673 } 2674 2675 return true; 2676 } 2677 } 2678 2679 ucl_create_err (&parser->err, "a parser is in an invalid state"); 2680 2681 return false; 2682 } 2683 2684 bool 2685 ucl_parser_add_chunk_priority (struct ucl_parser *parser, 2686 const unsigned char *data, size_t len, unsigned priority) 2687 { 2688 /* We dereference parser, so this check is essential */ 2689 if (parser == NULL) { 2690 return false; 2691 } 2692 2693 return ucl_parser_add_chunk_full (parser, data, len, 2694 priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL); 2695 } 2696 2697 bool 2698 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data, 2699 size_t len) 2700 { 2701 if (parser == NULL) { 2702 return false; 2703 } 2704 2705 return ucl_parser_add_chunk_full (parser, data, len, 2706 parser->default_priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL); 2707 } 2708 2709 bool 2710 ucl_parser_add_string_priority (struct ucl_parser *parser, const char *data, 2711 size_t len, unsigned priority) 2712 { 2713 if (data == NULL) { 2714 ucl_create_err (&parser->err, "invalid string added"); 2715 return false; 2716 } 2717 if (len == 0) { 2718 len = strlen (data); 2719 } 2720 2721 return ucl_parser_add_chunk_priority (parser, 2722 (const unsigned char *)data, len, priority); 2723 } 2724 2725 bool 2726 ucl_parser_add_string (struct ucl_parser *parser, const char *data, 2727 size_t len) 2728 { 2729 if (parser == NULL) { 2730 return false; 2731 } 2732 2733 return ucl_parser_add_string_priority (parser, 2734 (const unsigned char *)data, len, parser->default_priority); 2735 } 2736 2737 bool 2738 ucl_set_include_path (struct ucl_parser *parser, ucl_object_t *paths) 2739 { 2740 if (parser == NULL || paths == NULL) { 2741 return false; 2742 } 2743 2744 if (parser->includepaths == NULL) { 2745 parser->includepaths = ucl_object_copy (paths); 2746 } 2747 else { 2748 ucl_object_unref (parser->includepaths); 2749 parser->includepaths = ucl_object_copy (paths); 2750 } 2751 2752 if (parser->includepaths == NULL) { 2753 return false; 2754 } 2755 2756 return true; 2757 } 2758