1 /* Copyright (c) 2013, Vsevolod Stakhov 2 * All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * * Redistributions of source code must retain the above copyright 7 * notice, this list of conditions and the following disclaimer. 8 * * Redistributions in binary form must reproduce the above copyright 9 * notice, this list of conditions and the following disclaimer in the 10 * documentation and/or other materials provided with the distribution. 11 * 12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY 13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY 16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 */ 23 24 #include <math.h> 25 #include "ucl.h" 26 #include "ucl_internal.h" 27 #include "ucl_chartable.h" 28 29 /** 30 * @file ucl_parser.c 31 * The implementation of ucl parser 32 */ 33 34 struct ucl_parser_saved_state { 35 unsigned int line; 36 unsigned int column; 37 size_t remain; 38 const unsigned char *pos; 39 }; 40 41 /** 42 * Move up to len characters 43 * @param parser 44 * @param begin 45 * @param len 46 * @return new position in chunk 47 */ 48 #define ucl_chunk_skipc(chunk, p) \ 49 do { \ 50 if (*(p) == '\n') { \ 51 (chunk)->line ++; \ 52 (chunk)->column = 0; \ 53 } \ 54 else (chunk)->column ++; \ 55 (p++); \ 56 (chunk)->pos ++; \ 57 (chunk)->remain --; \ 58 } while (0) 59 60 static inline void 61 ucl_set_err (struct ucl_parser *parser, int code, const char *str, UT_string **err) 62 { 63 const char *fmt_string, *filename; 64 struct ucl_chunk *chunk = parser->chunks; 65 66 if (parser->cur_file) { 67 filename = parser->cur_file; 68 } 69 else { 70 filename = "<unknown>"; 71 } 72 73 if (chunk->pos < chunk->end) { 74 if (isgraph (*chunk->pos)) { 75 fmt_string = "error while parsing %s: " 76 "line: %d, column: %d - '%s', character: '%c'"; 77 } 78 else { 79 fmt_string = "error while parsing %s: " 80 "line: %d, column: %d - '%s', character: '0x%02x'"; 81 } 82 ucl_create_err (err, fmt_string, 83 filename, chunk->line, chunk->column, 84 str, *chunk->pos); 85 } 86 else { 87 ucl_create_err (err, "error while parsing %s: at the end of chunk: %s", 88 filename, str); 89 } 90 91 parser->err_code = code; 92 parser->state = UCL_STATE_ERROR; 93 } 94 95 static void 96 ucl_save_comment (struct ucl_parser *parser, const char *begin, size_t len) 97 { 98 ucl_object_t *nobj; 99 100 if (len > 0 && begin != NULL) { 101 nobj = ucl_object_fromstring_common (begin, len, 0); 102 103 if (parser->last_comment) { 104 /* We need to append data to an existing object */ 105 DL_APPEND (parser->last_comment, nobj); 106 } 107 else { 108 parser->last_comment = nobj; 109 } 110 } 111 } 112 113 static void 114 ucl_attach_comment (struct ucl_parser *parser, ucl_object_t *obj, bool before) 115 { 116 if (parser->last_comment) { 117 ucl_object_insert_key (parser->comments, parser->last_comment, 118 (const char *)&obj, sizeof (void *), true); 119 120 if (before) { 121 parser->last_comment->flags |= UCL_OBJECT_INHERITED; 122 } 123 124 parser->last_comment = NULL; 125 } 126 } 127 128 /** 129 * Skip all comments from the current pos resolving nested and multiline comments 130 * @param parser 131 * @return 132 */ 133 static bool 134 ucl_skip_comments (struct ucl_parser *parser) 135 { 136 struct ucl_chunk *chunk = parser->chunks; 137 const unsigned char *p, *beg = NULL; 138 int comments_nested = 0; 139 bool quoted = false; 140 141 p = chunk->pos; 142 143 start: 144 if (chunk->remain > 0 && *p == '#') { 145 if (parser->state != UCL_STATE_SCOMMENT && 146 parser->state != UCL_STATE_MCOMMENT) { 147 beg = p; 148 149 while (p < chunk->end) { 150 if (*p == '\n') { 151 if (parser->flags & UCL_PARSER_SAVE_COMMENTS) { 152 ucl_save_comment (parser, beg, p - beg); 153 beg = NULL; 154 } 155 156 ucl_chunk_skipc (chunk, p); 157 158 goto start; 159 } 160 ucl_chunk_skipc (chunk, p); 161 } 162 } 163 } 164 else if (chunk->remain >= 2 && *p == '/') { 165 if (p[1] == '*') { 166 beg = p; 167 ucl_chunk_skipc (chunk, p); 168 comments_nested ++; 169 ucl_chunk_skipc (chunk, p); 170 171 while (p < chunk->end) { 172 if (*p == '"' && *(p - 1) != '\\') { 173 quoted = !quoted; 174 } 175 176 if (!quoted) { 177 if (*p == '*') { 178 ucl_chunk_skipc (chunk, p); 179 if (*p == '/') { 180 comments_nested --; 181 if (comments_nested == 0) { 182 if (parser->flags & UCL_PARSER_SAVE_COMMENTS) { 183 ucl_save_comment (parser, beg, p - beg + 1); 184 beg = NULL; 185 } 186 187 ucl_chunk_skipc (chunk, p); 188 goto start; 189 } 190 } 191 ucl_chunk_skipc (chunk, p); 192 } 193 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') { 194 comments_nested ++; 195 ucl_chunk_skipc (chunk, p); 196 ucl_chunk_skipc (chunk, p); 197 continue; 198 } 199 } 200 201 ucl_chunk_skipc (chunk, p); 202 } 203 if (comments_nested != 0) { 204 ucl_set_err (parser, UCL_ENESTED, 205 "unfinished multiline comment", &parser->err); 206 return false; 207 } 208 } 209 } 210 211 if (beg && p > beg && (parser->flags & UCL_PARSER_SAVE_COMMENTS)) { 212 ucl_save_comment (parser, beg, p - beg); 213 } 214 215 return true; 216 } 217 218 /** 219 * Return multiplier for a character 220 * @param c multiplier character 221 * @param is_bytes if true use 1024 multiplier 222 * @return multiplier 223 */ 224 static inline unsigned long 225 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) { 226 const struct { 227 char c; 228 long mult_normal; 229 long mult_bytes; 230 } multipliers[] = { 231 {'m', 1000 * 1000, 1024 * 1024}, 232 {'k', 1000, 1024}, 233 {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024} 234 }; 235 int i; 236 237 for (i = 0; i < 3; i ++) { 238 if (tolower (c) == multipliers[i].c) { 239 if (is_bytes) { 240 return multipliers[i].mult_bytes; 241 } 242 return multipliers[i].mult_normal; 243 } 244 } 245 246 return 1; 247 } 248 249 250 /** 251 * Return multiplier for time scaling 252 * @param c 253 * @return 254 */ 255 static inline double 256 ucl_lex_time_multiplier (const unsigned char c) { 257 const struct { 258 char c; 259 double mult; 260 } multipliers[] = { 261 {'m', 60}, 262 {'h', 60 * 60}, 263 {'d', 60 * 60 * 24}, 264 {'w', 60 * 60 * 24 * 7}, 265 {'y', 60 * 60 * 24 * 365} 266 }; 267 int i; 268 269 for (i = 0; i < 5; i ++) { 270 if (tolower (c) == multipliers[i].c) { 271 return multipliers[i].mult; 272 } 273 } 274 275 return 1; 276 } 277 278 /** 279 * Return true if a character is a end of an atom 280 * @param c 281 * @return 282 */ 283 static inline bool 284 ucl_lex_is_atom_end (const unsigned char c) 285 { 286 return ucl_test_character (c, UCL_CHARACTER_VALUE_END); 287 } 288 289 static inline bool 290 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2) 291 { 292 if (c1 == '/') { 293 if (c2 == '*') { 294 return true; 295 } 296 } 297 else if (c1 == '#') { 298 return true; 299 } 300 return false; 301 } 302 303 /** 304 * Check variable found 305 * @param parser 306 * @param ptr 307 * @param remain 308 * @param out_len 309 * @param strict 310 * @param found 311 * @return 312 */ 313 static inline const char * 314 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain, 315 size_t *out_len, bool strict, bool *found) 316 { 317 struct ucl_variable *var; 318 unsigned char *dst; 319 size_t dstlen; 320 bool need_free = false; 321 322 LL_FOREACH (parser->variables, var) { 323 if (strict) { 324 if (remain == var->var_len) { 325 if (memcmp (ptr, var->var, var->var_len) == 0) { 326 *out_len += var->value_len; 327 *found = true; 328 return (ptr + var->var_len); 329 } 330 } 331 } 332 else { 333 if (remain >= var->var_len) { 334 if (memcmp (ptr, var->var, var->var_len) == 0) { 335 *out_len += var->value_len; 336 *found = true; 337 return (ptr + var->var_len); 338 } 339 } 340 } 341 } 342 343 /* XXX: can only handle ${VAR} */ 344 if (!(*found) && parser->var_handler != NULL && strict) { 345 /* Call generic handler */ 346 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free, 347 parser->var_data)) { 348 *out_len = dstlen; 349 *found = true; 350 if (need_free) { 351 free (dst); 352 } 353 return (ptr + remain); 354 } 355 } 356 357 return ptr; 358 } 359 360 /** 361 * Check for a variable in a given string 362 * @param parser 363 * @param ptr 364 * @param remain 365 * @param out_len 366 * @param vars_found 367 * @return 368 */ 369 static const char * 370 ucl_check_variable (struct ucl_parser *parser, const char *ptr, 371 size_t remain, size_t *out_len, bool *vars_found) 372 { 373 const char *p, *end, *ret = ptr; 374 bool found = false; 375 376 if (*ptr == '{') { 377 /* We need to match the variable enclosed in braces */ 378 p = ptr + 1; 379 end = ptr + remain; 380 while (p < end) { 381 if (*p == '}') { 382 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, 383 out_len, true, &found); 384 if (found) { 385 /* {} must be excluded actually */ 386 ret ++; 387 if (!*vars_found) { 388 *vars_found = true; 389 } 390 } 391 else { 392 *out_len += 2; 393 } 394 break; 395 } 396 p ++; 397 } 398 } 399 else if (*ptr != '$') { 400 /* Not count escaped dollar sign */ 401 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found); 402 if (found && !*vars_found) { 403 *vars_found = true; 404 } 405 if (!found) { 406 (*out_len) ++; 407 } 408 } 409 else { 410 ret ++; 411 (*out_len) ++; 412 } 413 414 return ret; 415 } 416 417 /** 418 * Expand a single variable 419 * @param parser 420 * @param ptr 421 * @param remain 422 * @param dest 423 * @return 424 */ 425 static const char * 426 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr, 427 size_t remain, unsigned char **dest) 428 { 429 unsigned char *d = *dest, *dst; 430 const char *p = ptr + 1, *ret; 431 struct ucl_variable *var; 432 size_t dstlen; 433 bool need_free = false; 434 bool found = false; 435 bool strict = false; 436 437 ret = ptr + 1; 438 remain --; 439 440 if (*p == '$') { 441 *d++ = *p++; 442 *dest = d; 443 return p; 444 } 445 else if (*p == '{') { 446 p ++; 447 strict = true; 448 ret += 2; 449 remain -= 2; 450 } 451 452 LL_FOREACH (parser->variables, var) { 453 if (remain >= var->var_len) { 454 if (memcmp (p, var->var, var->var_len) == 0) { 455 memcpy (d, var->value, var->value_len); 456 ret += var->var_len; 457 d += var->value_len; 458 found = true; 459 break; 460 } 461 } 462 } 463 if (!found) { 464 if (strict && parser->var_handler != NULL) { 465 if (parser->var_handler (p, remain, &dst, &dstlen, &need_free, 466 parser->var_data)) { 467 memcpy (d, dst, dstlen); 468 ret += remain; 469 d += dstlen; 470 found = true; 471 if (need_free) { 472 free (dst); 473 } 474 } 475 } 476 477 /* Leave variable as is */ 478 if (!found) { 479 if (strict) { 480 /* Copy '${' */ 481 memcpy (d, ptr, 2); 482 d += 2; 483 ret --; 484 } 485 else { 486 memcpy (d, ptr, 1); 487 d ++; 488 } 489 } 490 } 491 492 *dest = d; 493 return ret; 494 } 495 496 /** 497 * Expand variables in string 498 * @param parser 499 * @param dst 500 * @param src 501 * @param in_len 502 * @return 503 */ 504 static ssize_t 505 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst, 506 const char *src, size_t in_len) 507 { 508 const char *p, *end = src + in_len; 509 unsigned char *d; 510 size_t out_len = 0; 511 bool vars_found = false; 512 513 if (parser->flags & UCL_PARSER_DISABLE_MACRO) { 514 *dst = NULL; 515 return in_len; 516 } 517 518 p = src; 519 while (p != end) { 520 if (*p == '$') { 521 p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found); 522 } 523 else { 524 p ++; 525 out_len ++; 526 } 527 } 528 529 if (!vars_found) { 530 /* Trivial case */ 531 *dst = NULL; 532 return in_len; 533 } 534 535 *dst = UCL_ALLOC (out_len + 1); 536 if (*dst == NULL) { 537 return in_len; 538 } 539 540 d = *dst; 541 p = src; 542 while (p != end) { 543 if (*p == '$') { 544 p = ucl_expand_single_variable (parser, p, end - p, &d); 545 } 546 else { 547 *d++ = *p++; 548 } 549 } 550 551 *d = '\0'; 552 553 return out_len; 554 } 555 556 /** 557 * Store or copy pointer to the trash stack 558 * @param parser parser object 559 * @param src src string 560 * @param dst destination buffer (trash stack pointer) 561 * @param dst_const const destination pointer (e.g. value of object) 562 * @param in_len input length 563 * @param need_unescape need to unescape source (and copy it) 564 * @param need_lowercase need to lowercase value (and copy) 565 * @param need_expand need to expand variables (and copy as well) 566 * @param unescape_squote unescape single quoted string 567 * @return output length (excluding \0 symbol) 568 */ 569 static inline ssize_t 570 ucl_copy_or_store_ptr (struct ucl_parser *parser, 571 const unsigned char *src, unsigned char **dst, 572 const char **dst_const, size_t in_len, 573 bool need_unescape, bool need_lowercase, bool need_expand, 574 bool unescape_squote) 575 { 576 ssize_t ret = -1, tret; 577 unsigned char *tmp; 578 579 if (need_unescape || need_lowercase || 580 (need_expand && parser->variables != NULL) || 581 !(parser->flags & UCL_PARSER_ZEROCOPY)) { 582 /* Copy string */ 583 *dst = UCL_ALLOC (in_len + 1); 584 if (*dst == NULL) { 585 ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for a string", 586 &parser->err); 587 return false; 588 } 589 if (need_lowercase) { 590 ret = ucl_strlcpy_tolower (*dst, src, in_len + 1); 591 } 592 else { 593 ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1); 594 } 595 596 if (need_unescape) { 597 if (!unescape_squote) { 598 ret = ucl_unescape_json_string (*dst, ret); 599 } 600 else { 601 ret = ucl_unescape_squoted_string (*dst, ret); 602 } 603 } 604 605 if (need_expand) { 606 tmp = *dst; 607 tret = ret; 608 ret = ucl_expand_variable (parser, dst, tmp, ret); 609 if (*dst == NULL) { 610 /* Nothing to expand */ 611 *dst = tmp; 612 ret = tret; 613 } 614 else { 615 /* Free unexpanded value */ 616 UCL_FREE (in_len + 1, tmp); 617 } 618 } 619 *dst_const = *dst; 620 } 621 else { 622 *dst_const = src; 623 ret = in_len; 624 } 625 626 return ret; 627 } 628 629 /** 630 * Create and append an object at the specified level 631 * @param parser 632 * @param is_array 633 * @param level 634 * @return 635 */ 636 static inline ucl_object_t * 637 ucl_parser_add_container (ucl_object_t *obj, struct ucl_parser *parser, 638 bool is_array, uint32_t level, bool has_obrace) 639 { 640 struct ucl_stack *st; 641 ucl_object_t *nobj; 642 643 if (obj == NULL) { 644 nobj = ucl_object_new_full (is_array ? UCL_ARRAY : UCL_OBJECT, parser->chunks->priority); 645 if (nobj == NULL) { 646 goto enomem0; 647 } 648 } else { 649 if (obj->type == (is_array ? UCL_OBJECT : UCL_ARRAY)) { 650 /* Bad combination for merge: array and object */ 651 ucl_set_err (parser, UCL_EMERGE, 652 "cannot merge an object with an array", 653 &parser->err); 654 655 return NULL; 656 } 657 nobj = obj; 658 nobj->type = is_array ? UCL_ARRAY : UCL_OBJECT; 659 } 660 661 if (!is_array) { 662 if (nobj->value.ov == NULL) { 663 nobj->value.ov = ucl_hash_create (parser->flags & UCL_PARSER_KEY_LOWERCASE); 664 if (nobj->value.ov == NULL) { 665 goto enomem1; 666 } 667 } 668 parser->state = UCL_STATE_KEY; 669 } else { 670 parser->state = UCL_STATE_VALUE; 671 } 672 673 st = UCL_ALLOC (sizeof (struct ucl_stack)); 674 675 if (st == NULL) { 676 goto enomem1; 677 } 678 679 st->obj = nobj; 680 681 if (level >= UINT16_MAX) { 682 ucl_set_err (parser, UCL_ENESTED, 683 "objects are nesting too deep (over 65535 limit)", 684 &parser->err); 685 if (nobj != obj) { 686 ucl_object_unref (obj); 687 } 688 689 return NULL; 690 } 691 692 693 st->e.params.level = level; 694 st->e.params.line = parser->chunks->line; 695 st->chunk = parser->chunks; 696 697 if (has_obrace) { 698 st->e.params.flags = UCL_STACK_HAS_OBRACE; 699 } 700 else { 701 st->e.params.flags = 0; 702 } 703 704 LL_PREPEND (parser->stack, st); 705 parser->cur_obj = nobj; 706 707 return nobj; 708 enomem1: 709 if (nobj != obj) 710 ucl_object_unref (nobj); 711 enomem0: 712 ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for an object", 713 &parser->err); 714 return NULL; 715 } 716 717 int 718 ucl_maybe_parse_number (ucl_object_t *obj, 719 const char *start, const char *end, const char **pos, 720 bool allow_double, bool number_bytes, bool allow_time) 721 { 722 const char *p = start, *c = start; 723 char *endptr; 724 bool got_dot = false, got_exp = false, need_double = false, 725 is_time = false, valid_start = false, is_hex = false, 726 is_neg = false; 727 double dv = 0; 728 int64_t lv = 0; 729 730 if (*p == '-') { 731 is_neg = true; 732 c ++; 733 p ++; 734 } 735 while (p < end) { 736 if (is_hex && isxdigit (*p)) { 737 p ++; 738 } 739 else if (isdigit (*p)) { 740 valid_start = true; 741 p ++; 742 } 743 else if (!is_hex && (*p == 'x' || *p == 'X')) { 744 is_hex = true; 745 allow_double = false; 746 c = p + 1; 747 } 748 else if (allow_double) { 749 if (p == c) { 750 /* Empty digits sequence, not a number */ 751 *pos = start; 752 return EINVAL; 753 } 754 else if (*p == '.') { 755 if (got_dot) { 756 /* Double dots, not a number */ 757 *pos = start; 758 return EINVAL; 759 } 760 else { 761 got_dot = true; 762 need_double = true; 763 p ++; 764 } 765 } 766 else if (*p == 'e' || *p == 'E') { 767 if (got_exp) { 768 /* Double exp, not a number */ 769 *pos = start; 770 return EINVAL; 771 } 772 else { 773 got_exp = true; 774 need_double = true; 775 p ++; 776 if (p >= end) { 777 *pos = start; 778 return EINVAL; 779 } 780 if (!isdigit (*p) && *p != '+' && *p != '-') { 781 /* Wrong exponent sign */ 782 *pos = start; 783 return EINVAL; 784 } 785 else { 786 p ++; 787 } 788 } 789 } 790 else { 791 /* Got the end of the number, need to check */ 792 break; 793 } 794 } 795 else { 796 break; 797 } 798 } 799 800 if (!valid_start) { 801 *pos = start; 802 return EINVAL; 803 } 804 805 errno = 0; 806 if (need_double) { 807 dv = strtod (c, &endptr); 808 } 809 else { 810 if (is_hex) { 811 lv = strtoimax (c, &endptr, 16); 812 } 813 else { 814 lv = strtoimax (c, &endptr, 10); 815 } 816 } 817 if (errno == ERANGE) { 818 *pos = start; 819 return ERANGE; 820 } 821 822 /* Now check endptr */ 823 if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0') { 824 p = endptr; 825 goto set_obj; 826 } 827 828 if (endptr < end && endptr != start) { 829 p = endptr; 830 switch (*p) { 831 case 'm': 832 case 'M': 833 case 'g': 834 case 'G': 835 case 'k': 836 case 'K': 837 if (end - p >= 2) { 838 if (p[1] == 's' || p[1] == 'S') { 839 /* Milliseconds */ 840 if (!need_double) { 841 need_double = true; 842 dv = lv; 843 } 844 is_time = true; 845 if (p[0] == 'm' || p[0] == 'M') { 846 dv /= 1000.; 847 } 848 else { 849 dv *= ucl_lex_num_multiplier (*p, false); 850 } 851 p += 2; 852 goto set_obj; 853 } 854 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) { 855 /* Bytes */ 856 if (need_double) { 857 need_double = false; 858 lv = dv; 859 } 860 lv *= ucl_lex_num_multiplier (*p, true); 861 p += 2; 862 goto set_obj; 863 } 864 else if (ucl_lex_is_atom_end (p[1])) { 865 if (need_double) { 866 dv *= ucl_lex_num_multiplier (*p, false); 867 } 868 else { 869 lv *= ucl_lex_num_multiplier (*p, number_bytes); 870 } 871 p ++; 872 goto set_obj; 873 } 874 else if (allow_time && end - p >= 3) { 875 if (tolower (p[0]) == 'm' && 876 tolower (p[1]) == 'i' && 877 tolower (p[2]) == 'n') { 878 /* Minutes */ 879 if (!need_double) { 880 need_double = true; 881 dv = lv; 882 } 883 is_time = true; 884 dv *= 60.; 885 p += 3; 886 goto set_obj; 887 } 888 } 889 } 890 else { 891 if (need_double) { 892 dv *= ucl_lex_num_multiplier (*p, false); 893 } 894 else { 895 lv *= ucl_lex_num_multiplier (*p, number_bytes); 896 } 897 p ++; 898 goto set_obj; 899 } 900 break; 901 case 'S': 902 case 's': 903 if (allow_time && 904 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { 905 if (!need_double) { 906 need_double = true; 907 dv = lv; 908 } 909 p ++; 910 is_time = true; 911 goto set_obj; 912 } 913 break; 914 case 'h': 915 case 'H': 916 case 'd': 917 case 'D': 918 case 'w': 919 case 'W': 920 case 'Y': 921 case 'y': 922 if (allow_time && 923 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { 924 if (!need_double) { 925 need_double = true; 926 dv = lv; 927 } 928 is_time = true; 929 dv *= ucl_lex_time_multiplier (*p); 930 p ++; 931 goto set_obj; 932 } 933 break; 934 case '\t': 935 case ' ': 936 while (p < end && ucl_test_character(*p, UCL_CHARACTER_WHITESPACE)) { 937 p++; 938 } 939 if (ucl_lex_is_atom_end(*p)) 940 goto set_obj; 941 break; 942 } 943 } 944 else if (endptr == end) { 945 /* Just a number at the end of chunk */ 946 p = endptr; 947 goto set_obj; 948 } 949 950 *pos = c; 951 return EINVAL; 952 953 set_obj: 954 if (obj != NULL) { 955 if (allow_double && (need_double || is_time)) { 956 if (!is_time) { 957 obj->type = UCL_FLOAT; 958 } 959 else { 960 obj->type = UCL_TIME; 961 } 962 obj->value.dv = is_neg ? (-dv) : dv; 963 } 964 else { 965 obj->type = UCL_INT; 966 obj->value.iv = is_neg ? (-lv) : lv; 967 } 968 } 969 *pos = p; 970 return 0; 971 } 972 973 /** 974 * Parse possible number 975 * @param parser 976 * @param chunk 977 * @param obj 978 * @return true if a number has been parsed 979 */ 980 static bool 981 ucl_lex_number (struct ucl_parser *parser, 982 struct ucl_chunk *chunk, ucl_object_t *obj) 983 { 984 const unsigned char *pos; 985 int ret; 986 987 ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos, 988 true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0)); 989 990 if (ret == 0) { 991 chunk->remain -= pos - chunk->pos; 992 chunk->column += pos - chunk->pos; 993 chunk->pos = pos; 994 return true; 995 } 996 else if (ret == ERANGE) { 997 ucl_set_err (parser, UCL_ESYNTAX, "numeric value out of range", 998 &parser->err); 999 } 1000 1001 return false; 1002 } 1003 1004 /** 1005 * Parse quoted string with possible escapes 1006 * @param parser 1007 * @param chunk 1008 * @param need_unescape 1009 * @param ucl_escape 1010 * @param var_expand 1011 * @return true if a string has been parsed 1012 */ 1013 static bool 1014 ucl_lex_json_string (struct ucl_parser *parser, 1015 struct ucl_chunk *chunk, 1016 bool *need_unescape, 1017 bool *ucl_escape, 1018 bool *var_expand) 1019 { 1020 const unsigned char *p = chunk->pos; 1021 unsigned char c; 1022 int i; 1023 1024 while (p < chunk->end) { 1025 c = *p; 1026 if (c < 0x1F) { 1027 /* Unmasked control character */ 1028 if (c == '\n') { 1029 ucl_set_err (parser, UCL_ESYNTAX, "unexpected newline", 1030 &parser->err); 1031 } 1032 else { 1033 ucl_set_err (parser, UCL_ESYNTAX, "unexpected control character", 1034 &parser->err); 1035 } 1036 return false; 1037 } 1038 else if (c == '\\') { 1039 ucl_chunk_skipc (chunk, p); 1040 c = *p; 1041 if (p >= chunk->end) { 1042 ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character", 1043 &parser->err); 1044 return false; 1045 } 1046 else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) { 1047 if (c == 'u') { 1048 ucl_chunk_skipc (chunk, p); 1049 for (i = 0; i < 4 && p < chunk->end; i ++) { 1050 if (!isxdigit (*p)) { 1051 ucl_set_err (parser, UCL_ESYNTAX, "invalid utf escape", 1052 &parser->err); 1053 return false; 1054 } 1055 ucl_chunk_skipc (chunk, p); 1056 } 1057 if (p >= chunk->end) { 1058 ucl_set_err (parser, UCL_ESYNTAX, 1059 "unfinished escape character", 1060 &parser->err); 1061 return false; 1062 } 1063 } 1064 else { 1065 ucl_chunk_skipc (chunk, p); 1066 } 1067 } 1068 *need_unescape = true; 1069 *ucl_escape = true; 1070 continue; 1071 } 1072 else if (c == '"') { 1073 ucl_chunk_skipc (chunk, p); 1074 return true; 1075 } 1076 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) { 1077 *ucl_escape = true; 1078 } 1079 else if (c == '$') { 1080 *var_expand = true; 1081 } 1082 ucl_chunk_skipc (chunk, p); 1083 } 1084 1085 ucl_set_err (parser, UCL_ESYNTAX, 1086 "no quote at the end of json string", 1087 &parser->err); 1088 return false; 1089 } 1090 1091 /** 1092 * Process single quoted string 1093 * @param parser 1094 * @param chunk 1095 * @param need_unescape 1096 * @return 1097 */ 1098 static bool 1099 ucl_lex_squoted_string (struct ucl_parser *parser, 1100 struct ucl_chunk *chunk, bool *need_unescape) 1101 { 1102 const unsigned char *p = chunk->pos; 1103 unsigned char c; 1104 1105 while (p < chunk->end) { 1106 c = *p; 1107 if (c == '\\') { 1108 ucl_chunk_skipc (chunk, p); 1109 1110 if (p >= chunk->end) { 1111 ucl_set_err (parser, UCL_ESYNTAX, 1112 "unfinished escape character", 1113 &parser->err); 1114 return false; 1115 } 1116 else { 1117 ucl_chunk_skipc (chunk, p); 1118 } 1119 1120 *need_unescape = true; 1121 continue; 1122 } 1123 else if (c == '\'') { 1124 ucl_chunk_skipc (chunk, p); 1125 return true; 1126 } 1127 1128 ucl_chunk_skipc (chunk, p); 1129 } 1130 1131 ucl_set_err (parser, UCL_ESYNTAX, 1132 "no quote at the end of single quoted string", 1133 &parser->err); 1134 return false; 1135 } 1136 1137 static void 1138 ucl_parser_append_elt (struct ucl_parser *parser, ucl_hash_t *cont, 1139 ucl_object_t *top, 1140 ucl_object_t *elt) 1141 { 1142 ucl_object_t *nobj; 1143 1144 if ((parser->flags & UCL_PARSER_NO_IMPLICIT_ARRAYS) == 0) { 1145 /* Implicit array */ 1146 top->flags |= UCL_OBJECT_MULTIVALUE; 1147 DL_APPEND (top, elt); 1148 parser->stack->obj->len ++; 1149 } 1150 else { 1151 if ((top->flags & UCL_OBJECT_MULTIVALUE) != 0) { 1152 /* Just add to the explicit array */ 1153 ucl_array_append (top, elt); 1154 } 1155 else { 1156 /* Convert to an array */ 1157 nobj = ucl_object_typed_new (UCL_ARRAY); 1158 nobj->key = top->key; 1159 nobj->keylen = top->keylen; 1160 nobj->flags |= UCL_OBJECT_MULTIVALUE; 1161 ucl_array_append (nobj, top); 1162 ucl_array_append (nobj, elt); 1163 ucl_hash_replace (cont, top, nobj); 1164 } 1165 } 1166 } 1167 1168 bool 1169 ucl_parser_process_object_element (struct ucl_parser *parser, ucl_object_t *nobj) 1170 { 1171 ucl_hash_t *container; 1172 ucl_object_t *tobj = NULL, *cur; 1173 char errmsg[256]; 1174 1175 container = parser->stack->obj->value.ov; 1176 1177 DL_FOREACH (parser->stack->obj, cur) { 1178 tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (cur->value.ov, nobj)); 1179 1180 if (tobj != NULL) { 1181 break; 1182 } 1183 } 1184 1185 1186 if (tobj == NULL) { 1187 container = ucl_hash_insert_object (container, nobj, 1188 parser->flags & UCL_PARSER_KEY_LOWERCASE); 1189 if (container == NULL) { 1190 return false; 1191 } 1192 nobj->prev = nobj; 1193 nobj->next = NULL; 1194 parser->stack->obj->len ++; 1195 } 1196 else { 1197 unsigned priold = ucl_object_get_priority (tobj), 1198 prinew = ucl_object_get_priority (nobj); 1199 switch (parser->chunks->strategy) { 1200 1201 case UCL_DUPLICATE_APPEND: 1202 /* 1203 * The logic here is the following: 1204 * 1205 * - if we have two objects with the same priority, then we form an 1206 * implicit or explicit array 1207 * - if a new object has bigger priority, then we overwrite an old one 1208 * - if a new object has lower priority, then we ignore it 1209 */ 1210 /* Special case for inherited objects */ 1211 if (tobj->flags & UCL_OBJECT_INHERITED) { 1212 prinew = priold + 1; 1213 } 1214 1215 if (priold == prinew) { 1216 ucl_parser_append_elt (parser, container, tobj, nobj); 1217 } 1218 else if (priold > prinew) { 1219 /* 1220 * We add this new object to a list of trash objects just to ensure 1221 * that it won't come to any real object 1222 * XXX: rather inefficient approach 1223 */ 1224 DL_APPEND (parser->trash_objs, nobj); 1225 } 1226 else { 1227 ucl_hash_replace (container, tobj, nobj); 1228 ucl_object_unref (tobj); 1229 } 1230 1231 break; 1232 1233 case UCL_DUPLICATE_REWRITE: 1234 /* We just rewrite old values regardless of priority */ 1235 ucl_hash_replace (container, tobj, nobj); 1236 ucl_object_unref (tobj); 1237 1238 break; 1239 1240 case UCL_DUPLICATE_ERROR: 1241 snprintf(errmsg, sizeof(errmsg), 1242 "duplicate element for key '%s' found", 1243 nobj->key); 1244 ucl_set_err (parser, UCL_EMERGE, errmsg, &parser->err); 1245 return false; 1246 1247 case UCL_DUPLICATE_MERGE: 1248 /* 1249 * Here we do have some old object so we just push it on top of objects stack 1250 * Check priority and then perform the merge on the remaining objects 1251 */ 1252 if (tobj->type == UCL_OBJECT || tobj->type == UCL_ARRAY) { 1253 ucl_object_unref (nobj); 1254 nobj = tobj; 1255 } 1256 else if (priold == prinew) { 1257 ucl_parser_append_elt (parser, container, tobj, nobj); 1258 } 1259 else if (priold > prinew) { 1260 /* 1261 * We add this new object to a list of trash objects just to ensure 1262 * that it won't come to any real object 1263 * XXX: rather inefficient approach 1264 */ 1265 DL_APPEND (parser->trash_objs, nobj); 1266 } 1267 else { 1268 ucl_hash_replace (container, tobj, nobj); 1269 ucl_object_unref (tobj); 1270 } 1271 break; 1272 } 1273 } 1274 1275 parser->stack->obj->value.ov = container; 1276 parser->cur_obj = nobj; 1277 ucl_attach_comment (parser, nobj, false); 1278 1279 return true; 1280 } 1281 1282 /** 1283 * Parse a key in an object 1284 * @param parser 1285 * @param chunk 1286 * @param next_key 1287 * @param end_of_object 1288 * @return true if a key has been parsed 1289 */ 1290 static bool 1291 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, 1292 bool *next_key, bool *end_of_object) 1293 { 1294 const unsigned char *p, *c = NULL, *end, *t; 1295 const char *key = NULL; 1296 bool got_quote = false, got_eq = false, got_semicolon = false, 1297 need_unescape = false, ucl_escape = false, var_expand = false, 1298 got_content = false, got_sep = false; 1299 ucl_object_t *nobj; 1300 ssize_t keylen; 1301 1302 p = chunk->pos; 1303 1304 if (*p == '.') { 1305 /* It is macro actually */ 1306 if (!(parser->flags & UCL_PARSER_DISABLE_MACRO)) { 1307 ucl_chunk_skipc (chunk, p); 1308 } 1309 1310 parser->prev_state = parser->state; 1311 parser->state = UCL_STATE_MACRO_NAME; 1312 *end_of_object = false; 1313 return true; 1314 } 1315 while (p < chunk->end) { 1316 /* 1317 * A key must start with alpha, number, '/' or '_' and end with space character 1318 */ 1319 if (c == NULL) { 1320 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1321 if (!ucl_skip_comments (parser)) { 1322 return false; 1323 } 1324 p = chunk->pos; 1325 } 1326 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1327 ucl_chunk_skipc (chunk, p); 1328 } 1329 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) { 1330 /* The first symbol */ 1331 c = p; 1332 ucl_chunk_skipc (chunk, p); 1333 got_content = true; 1334 } 1335 else if (*p == '"') { 1336 /* JSON style key */ 1337 c = p + 1; 1338 got_quote = true; 1339 got_content = true; 1340 ucl_chunk_skipc (chunk, p); 1341 } 1342 else if (*p == '}') { 1343 /* We have actually end of an object */ 1344 *end_of_object = true; 1345 return true; 1346 } 1347 else if (*p == '.') { 1348 ucl_chunk_skipc (chunk, p); 1349 parser->prev_state = parser->state; 1350 parser->state = UCL_STATE_MACRO_NAME; 1351 return true; 1352 } 1353 else { 1354 /* Invalid identifier */ 1355 ucl_set_err (parser, UCL_ESYNTAX, "key must begin with a letter", 1356 &parser->err); 1357 return false; 1358 } 1359 } 1360 else { 1361 /* Parse the body of a key */ 1362 if (!got_quote) { 1363 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) { 1364 got_content = true; 1365 ucl_chunk_skipc (chunk, p); 1366 } 1367 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) { 1368 end = p; 1369 break; 1370 } 1371 else { 1372 ucl_set_err (parser, UCL_ESYNTAX, "invalid character in a key", 1373 &parser->err); 1374 return false; 1375 } 1376 } 1377 else { 1378 /* We need to parse json like quoted string */ 1379 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1380 return false; 1381 } 1382 /* Always escape keys obtained via json */ 1383 end = chunk->pos - 1; 1384 p = chunk->pos; 1385 break; 1386 } 1387 } 1388 } 1389 1390 if (p >= chunk->end && got_content) { 1391 ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err); 1392 return false; 1393 } 1394 else if (!got_content) { 1395 return true; 1396 } 1397 *end_of_object = false; 1398 /* We are now at the end of the key, need to parse the rest */ 1399 while (p < chunk->end) { 1400 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1401 ucl_chunk_skipc (chunk, p); 1402 } 1403 else if (*p == '=') { 1404 if (!got_eq && !got_semicolon) { 1405 ucl_chunk_skipc (chunk, p); 1406 got_eq = true; 1407 } 1408 else { 1409 ucl_set_err (parser, UCL_ESYNTAX, "unexpected '=' character", 1410 &parser->err); 1411 return false; 1412 } 1413 } 1414 else if (*p == ':') { 1415 if (!got_eq && !got_semicolon) { 1416 ucl_chunk_skipc (chunk, p); 1417 got_semicolon = true; 1418 } 1419 else { 1420 ucl_set_err (parser, UCL_ESYNTAX, "unexpected ':' character", 1421 &parser->err); 1422 return false; 1423 } 1424 } 1425 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1426 /* Check for comment */ 1427 if (!ucl_skip_comments (parser)) { 1428 return false; 1429 } 1430 p = chunk->pos; 1431 } 1432 else { 1433 /* Start value */ 1434 break; 1435 } 1436 } 1437 1438 if (p >= chunk->end && got_content) { 1439 ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err); 1440 return false; 1441 } 1442 1443 got_sep = got_semicolon || got_eq; 1444 1445 if (!got_sep) { 1446 /* 1447 * Maybe we have more keys nested, so search for termination character. 1448 * Possible choices: 1449 * 1) key1 key2 ... keyN [:=] value <- we treat that as error 1450 * 2) key1 ... keyN {} or [] <- we treat that as nested objects 1451 * 3) key1 value[;,\n] <- we treat that as linear object 1452 */ 1453 t = p; 1454 *next_key = false; 1455 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) { 1456 t ++; 1457 } 1458 /* Check first non-space character after a key */ 1459 if (*t != '{' && *t != '[') { 1460 while (t < chunk->end) { 1461 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') { 1462 break; 1463 } 1464 else if (*t == '{' || *t == '[') { 1465 *next_key = true; 1466 break; 1467 } 1468 t ++; 1469 } 1470 } 1471 } 1472 1473 /* Create a new object */ 1474 nobj = ucl_object_new_full (UCL_NULL, parser->chunks->priority); 1475 if (nobj == NULL) { 1476 return false; 1477 } 1478 keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY], 1479 &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, 1480 false, false); 1481 if (keylen == -1) { 1482 ucl_object_unref (nobj); 1483 return false; 1484 } 1485 else if (keylen == 0) { 1486 ucl_set_err (parser, UCL_ESYNTAX, "empty keys are not allowed", &parser->err); 1487 ucl_object_unref (nobj); 1488 return false; 1489 } 1490 1491 nobj->key = key; 1492 nobj->keylen = keylen; 1493 1494 if (!ucl_parser_process_object_element (parser, nobj)) { 1495 return false; 1496 } 1497 1498 if (ucl_escape) { 1499 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE; 1500 } 1501 1502 1503 return true; 1504 } 1505 1506 /** 1507 * Parse a cl string 1508 * @param parser 1509 * @param chunk 1510 * @param var_expand 1511 * @param need_unescape 1512 * @return true if a key has been parsed 1513 */ 1514 static bool 1515 ucl_parse_string_value (struct ucl_parser *parser, 1516 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape) 1517 { 1518 const unsigned char *p; 1519 enum { 1520 UCL_BRACE_ROUND = 0, 1521 UCL_BRACE_SQUARE, 1522 UCL_BRACE_FIGURE 1523 }; 1524 int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}}; 1525 1526 p = chunk->pos; 1527 1528 while (p < chunk->end) { 1529 1530 /* Skip pairs of figure braces */ 1531 if (*p == '{') { 1532 braces[UCL_BRACE_FIGURE][0] ++; 1533 } 1534 else if (*p == '}') { 1535 braces[UCL_BRACE_FIGURE][1] ++; 1536 if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) { 1537 /* This is not a termination symbol, continue */ 1538 ucl_chunk_skipc (chunk, p); 1539 continue; 1540 } 1541 } 1542 /* Skip pairs of square braces */ 1543 else if (*p == '[') { 1544 braces[UCL_BRACE_SQUARE][0] ++; 1545 } 1546 else if (*p == ']') { 1547 braces[UCL_BRACE_SQUARE][1] ++; 1548 if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) { 1549 /* This is not a termination symbol, continue */ 1550 ucl_chunk_skipc (chunk, p); 1551 continue; 1552 } 1553 } 1554 else if (*p == '$') { 1555 *var_expand = true; 1556 } 1557 else if (*p == '\\') { 1558 *need_unescape = true; 1559 ucl_chunk_skipc (chunk, p); 1560 if (p < chunk->end) { 1561 ucl_chunk_skipc (chunk, p); 1562 } 1563 continue; 1564 } 1565 1566 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { 1567 break; 1568 } 1569 ucl_chunk_skipc (chunk, p); 1570 } 1571 1572 return true; 1573 } 1574 1575 /** 1576 * Parse multiline string ending with \n{term}\n 1577 * @param parser 1578 * @param chunk 1579 * @param term 1580 * @param term_len 1581 * @param beg 1582 * @param var_expand 1583 * @return size of multiline string or 0 in case of error 1584 */ 1585 static int 1586 ucl_parse_multiline_string (struct ucl_parser *parser, 1587 struct ucl_chunk *chunk, const unsigned char *term, 1588 int term_len, unsigned char const **beg, 1589 bool *var_expand) 1590 { 1591 const unsigned char *p, *c, *tend; 1592 bool newline = false; 1593 int len = 0; 1594 1595 p = chunk->pos; 1596 1597 c = p; 1598 1599 while (p < chunk->end) { 1600 if (newline) { 1601 if (chunk->end - p < term_len) { 1602 return 0; 1603 } 1604 else if (memcmp (p, term, term_len) == 0) { 1605 tend = p + term_len; 1606 if (*tend != '\n' && *tend != ';' && *tend != ',') { 1607 /* Incomplete terminator */ 1608 ucl_chunk_skipc (chunk, p); 1609 continue; 1610 } 1611 len = p - c; 1612 chunk->remain -= term_len; 1613 chunk->pos = p + term_len; 1614 chunk->column = term_len; 1615 *beg = c; 1616 break; 1617 } 1618 } 1619 if (*p == '\n') { 1620 newline = true; 1621 } 1622 else { 1623 if (*p == '$') { 1624 *var_expand = true; 1625 } 1626 newline = false; 1627 } 1628 ucl_chunk_skipc (chunk, p); 1629 } 1630 1631 return len; 1632 } 1633 1634 static inline ucl_object_t* 1635 ucl_parser_get_container (struct ucl_parser *parser) 1636 { 1637 ucl_object_t *t, *obj = NULL; 1638 1639 if (parser == NULL || parser->stack == NULL || parser->stack->obj == NULL) { 1640 return NULL; 1641 } 1642 1643 if (parser->stack->obj->type == UCL_ARRAY) { 1644 /* Object must be allocated */ 1645 obj = ucl_object_new_full (UCL_NULL, parser->chunks->priority); 1646 t = parser->stack->obj; 1647 1648 if (!ucl_array_append (t, obj)) { 1649 ucl_object_unref (obj); 1650 return NULL; 1651 } 1652 1653 parser->cur_obj = obj; 1654 ucl_attach_comment (parser, obj, false); 1655 } 1656 else { 1657 /* Object has been already allocated */ 1658 obj = parser->cur_obj; 1659 } 1660 1661 return obj; 1662 } 1663 1664 /** 1665 * Handle value data 1666 * @param parser 1667 * @param chunk 1668 * @return 1669 */ 1670 static bool 1671 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) 1672 { 1673 const unsigned char *p, *c; 1674 ucl_object_t *obj = NULL; 1675 unsigned int stripped_spaces; 1676 ssize_t str_len; 1677 bool need_unescape = false, ucl_escape = false, var_expand = false; 1678 1679 p = chunk->pos; 1680 1681 /* Skip any spaces and comments */ 1682 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) || 1683 (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { 1684 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1685 ucl_chunk_skipc (chunk, p); 1686 } 1687 if (!ucl_skip_comments (parser)) { 1688 return false; 1689 } 1690 p = chunk->pos; 1691 } 1692 1693 while (p < chunk->end) { 1694 c = p; 1695 switch (*p) { 1696 case '"': 1697 ucl_chunk_skipc (chunk, p); 1698 1699 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, 1700 &var_expand)) { 1701 return false; 1702 } 1703 1704 obj = ucl_parser_get_container (parser); 1705 if (!obj) { 1706 return false; 1707 } 1708 1709 str_len = chunk->pos - c - 2; 1710 obj->type = UCL_STRING; 1711 if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, 1712 &obj->trash_stack[UCL_TRASH_VALUE], 1713 &obj->value.sv, str_len, need_unescape, false, 1714 var_expand, false)) == -1) { 1715 return false; 1716 } 1717 1718 obj->len = str_len; 1719 parser->state = UCL_STATE_AFTER_VALUE; 1720 1721 return true; 1722 break; 1723 case '\'': 1724 ucl_chunk_skipc (chunk, p); 1725 1726 if (!ucl_lex_squoted_string (parser, chunk, &need_unescape)) { 1727 return false; 1728 } 1729 1730 obj = ucl_parser_get_container (parser); 1731 if (!obj) { 1732 return false; 1733 } 1734 1735 str_len = chunk->pos - c - 2; 1736 obj->type = UCL_STRING; 1737 obj->flags |= UCL_OBJECT_SQUOTED; 1738 1739 if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, 1740 &obj->trash_stack[UCL_TRASH_VALUE], 1741 &obj->value.sv, str_len, need_unescape, false, 1742 var_expand, true)) == -1) { 1743 return false; 1744 } 1745 1746 obj->len = str_len; 1747 1748 parser->state = UCL_STATE_AFTER_VALUE; 1749 1750 return true; 1751 break; 1752 case '{': 1753 obj = ucl_parser_get_container (parser); 1754 if (obj == NULL) { 1755 return false; 1756 } 1757 /* We have a new object */ 1758 if (parser->stack) { 1759 obj = ucl_parser_add_container (obj, parser, false, 1760 parser->stack->e.params.level, true); 1761 } 1762 else { 1763 return false; 1764 } 1765 if (obj == NULL) { 1766 return false; 1767 } 1768 1769 ucl_chunk_skipc (chunk, p); 1770 1771 return true; 1772 break; 1773 case '[': 1774 obj = ucl_parser_get_container (parser); 1775 if (obj == NULL) { 1776 return false; 1777 } 1778 /* We have a new array */ 1779 if (parser->stack) { 1780 obj = ucl_parser_add_container (obj, parser, true, 1781 parser->stack->e.params.level, true); 1782 } 1783 else { 1784 return false; 1785 } 1786 1787 if (obj == NULL) { 1788 return false; 1789 } 1790 1791 ucl_chunk_skipc (chunk, p); 1792 1793 return true; 1794 break; 1795 case ']': 1796 /* We have the array ending */ 1797 if (parser->stack && parser->stack->obj->type == UCL_ARRAY) { 1798 parser->state = UCL_STATE_AFTER_VALUE; 1799 return true; 1800 } 1801 else { 1802 goto parse_string; 1803 } 1804 break; 1805 case '<': 1806 obj = ucl_parser_get_container (parser); 1807 /* We have something like multiline value, which must be <<[A-Z]+\n */ 1808 if (chunk->end - p > 3) { 1809 if (memcmp (p, "<<", 2) == 0) { 1810 p += 2; 1811 /* We allow only uppercase characters in multiline definitions */ 1812 while (p < chunk->end && *p >= 'A' && *p <= 'Z') { 1813 p ++; 1814 } 1815 if (*p =='\n') { 1816 /* Set chunk positions and start multiline parsing */ 1817 chunk->remain -= p - c + 1; 1818 c += 2; 1819 chunk->pos = p + 1; 1820 chunk->column = 0; 1821 chunk->line ++; 1822 if ((str_len = ucl_parse_multiline_string (parser, chunk, c, 1823 p - c, &c, &var_expand)) == 0) { 1824 ucl_set_err (parser, UCL_ESYNTAX, 1825 "unterminated multiline value", &parser->err); 1826 return false; 1827 } 1828 1829 obj->type = UCL_STRING; 1830 obj->flags |= UCL_OBJECT_MULTILINE; 1831 if ((str_len = ucl_copy_or_store_ptr (parser, c, 1832 &obj->trash_stack[UCL_TRASH_VALUE], 1833 &obj->value.sv, str_len - 1, false, 1834 false, var_expand, false)) == -1) { 1835 return false; 1836 } 1837 obj->len = str_len; 1838 1839 parser->state = UCL_STATE_AFTER_VALUE; 1840 1841 return true; 1842 } 1843 } 1844 } 1845 /* Fallback to ordinary strings */ 1846 /* FALLTHRU */ 1847 default: 1848 parse_string: 1849 if (obj == NULL) { 1850 obj = ucl_parser_get_container (parser); 1851 } 1852 1853 /* Parse atom */ 1854 if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) { 1855 if (!ucl_lex_number (parser, chunk, obj)) { 1856 if (parser->state == UCL_STATE_ERROR) { 1857 return false; 1858 } 1859 } 1860 else { 1861 parser->state = UCL_STATE_AFTER_VALUE; 1862 return true; 1863 } 1864 /* Fallback to normal string */ 1865 } 1866 1867 if (!ucl_parse_string_value (parser, chunk, &var_expand, 1868 &need_unescape)) { 1869 return false; 1870 } 1871 /* Cut trailing spaces */ 1872 stripped_spaces = 0; 1873 while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces), 1874 UCL_CHARACTER_WHITESPACE)) { 1875 stripped_spaces ++; 1876 } 1877 str_len = chunk->pos - c - stripped_spaces; 1878 if (str_len <= 0) { 1879 ucl_set_err (parser, UCL_ESYNTAX, "string value must not be empty", 1880 &parser->err); 1881 return false; 1882 } 1883 else if (str_len == 4 && memcmp (c, "null", 4) == 0) { 1884 obj->len = 0; 1885 obj->type = UCL_NULL; 1886 } 1887 else if (str_len == 3 && memcmp (c, "nan", 3) == 0) { 1888 obj->len = 0; 1889 obj->type = UCL_FLOAT; 1890 obj->value.dv = NAN; 1891 } 1892 else if (str_len == 3 && memcmp (c, "inf", 3) == 0) { 1893 obj->len = 0; 1894 obj->type = UCL_FLOAT; 1895 obj->value.dv = INFINITY; 1896 } 1897 else if (!ucl_maybe_parse_boolean (obj, c, str_len)) { 1898 obj->type = UCL_STRING; 1899 if ((str_len = ucl_copy_or_store_ptr (parser, c, 1900 &obj->trash_stack[UCL_TRASH_VALUE], 1901 &obj->value.sv, str_len, need_unescape, 1902 false, var_expand, false)) == -1) { 1903 return false; 1904 } 1905 obj->len = str_len; 1906 } 1907 1908 parser->state = UCL_STATE_AFTER_VALUE; 1909 1910 return true; 1911 break; 1912 } 1913 } 1914 1915 return true; 1916 } 1917 1918 /** 1919 * Handle after value data 1920 * @param parser 1921 * @param chunk 1922 * @return 1923 */ 1924 static bool 1925 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk) 1926 { 1927 const unsigned char *p; 1928 bool got_sep = false; 1929 struct ucl_stack *st; 1930 1931 p = chunk->pos; 1932 1933 while (p < chunk->end) { 1934 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1935 /* Skip whitespaces */ 1936 ucl_chunk_skipc (chunk, p); 1937 } 1938 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1939 /* Skip comment */ 1940 if (!ucl_skip_comments (parser)) { 1941 return false; 1942 } 1943 /* Treat comment as a separator */ 1944 got_sep = true; 1945 p = chunk->pos; 1946 } 1947 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) { 1948 if (*p == '}' || *p == ']') { 1949 if (parser->stack == NULL) { 1950 ucl_set_err (parser, UCL_ESYNTAX, 1951 "end of array or object detected without corresponding start", 1952 &parser->err); 1953 return false; 1954 } 1955 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) || 1956 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) { 1957 1958 /* Pop all nested objects from a stack */ 1959 st = parser->stack; 1960 1961 if (!(st->e.params.flags & UCL_STACK_HAS_OBRACE)) { 1962 parser->err_code = UCL_EUNPAIRED; 1963 ucl_create_err (&parser->err, 1964 "%s:%d object closed with } is not opened with { at line %d", 1965 chunk->fname ? chunk->fname : "memory", 1966 parser->chunks->line, st->e.params.line); 1967 1968 return false; 1969 } 1970 1971 parser->stack = st->next; 1972 UCL_FREE (sizeof (struct ucl_stack), st); 1973 1974 if (parser->cur_obj) { 1975 ucl_attach_comment (parser, parser->cur_obj, true); 1976 } 1977 1978 while (parser->stack != NULL) { 1979 st = parser->stack; 1980 1981 if (st->next == NULL) { 1982 break; 1983 } 1984 else if (st->next->e.params.level == st->e.params.level) { 1985 break; 1986 } 1987 1988 1989 parser->stack = st->next; 1990 parser->cur_obj = st->obj; 1991 UCL_FREE (sizeof (struct ucl_stack), st); 1992 } 1993 } 1994 else { 1995 ucl_set_err (parser, UCL_ESYNTAX, 1996 "unexpected terminating symbol detected", 1997 &parser->err); 1998 return false; 1999 } 2000 2001 if (parser->stack == NULL) { 2002 /* Ignore everything after a top object */ 2003 return true; 2004 } 2005 else { 2006 ucl_chunk_skipc (chunk, p); 2007 } 2008 got_sep = true; 2009 } 2010 else { 2011 /* Got a separator */ 2012 got_sep = true; 2013 ucl_chunk_skipc (chunk, p); 2014 } 2015 } 2016 else { 2017 /* Anything else */ 2018 if (!got_sep) { 2019 ucl_set_err (parser, UCL_ESYNTAX, "delimiter is missing", 2020 &parser->err); 2021 return false; 2022 } 2023 return true; 2024 } 2025 } 2026 2027 return true; 2028 } 2029 2030 static bool 2031 ucl_skip_macro_as_comment (struct ucl_parser *parser, 2032 struct ucl_chunk *chunk) 2033 { 2034 const unsigned char *p, *c; 2035 enum { 2036 macro_skip_start = 0, 2037 macro_has_symbols, 2038 macro_has_obrace, 2039 macro_has_quote, 2040 macro_has_backslash, 2041 macro_has_sqbrace, 2042 macro_save 2043 } state = macro_skip_start, prev_state = macro_skip_start; 2044 2045 p = chunk->pos; 2046 c = chunk->pos; 2047 2048 while (p < chunk->end) { 2049 switch (state) { 2050 case macro_skip_start: 2051 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 2052 state = macro_has_symbols; 2053 } 2054 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 2055 state = macro_save; 2056 continue; 2057 } 2058 2059 ucl_chunk_skipc (chunk, p); 2060 break; 2061 2062 case macro_has_symbols: 2063 if (*p == '{') { 2064 state = macro_has_sqbrace; 2065 } 2066 else if (*p == '(') { 2067 state = macro_has_obrace; 2068 } 2069 else if (*p == '"') { 2070 state = macro_has_quote; 2071 } 2072 else if (*p == '\n') { 2073 state = macro_save; 2074 continue; 2075 } 2076 2077 ucl_chunk_skipc (chunk, p); 2078 break; 2079 2080 case macro_has_obrace: 2081 if (*p == '\\') { 2082 prev_state = state; 2083 state = macro_has_backslash; 2084 } 2085 else if (*p == ')') { 2086 state = macro_has_symbols; 2087 } 2088 2089 ucl_chunk_skipc (chunk, p); 2090 break; 2091 2092 case macro_has_sqbrace: 2093 if (*p == '\\') { 2094 prev_state = state; 2095 state = macro_has_backslash; 2096 } 2097 else if (*p == '}') { 2098 state = macro_save; 2099 } 2100 2101 ucl_chunk_skipc (chunk, p); 2102 break; 2103 2104 case macro_has_quote: 2105 if (*p == '\\') { 2106 prev_state = state; 2107 state = macro_has_backslash; 2108 } 2109 else if (*p == '"') { 2110 state = macro_save; 2111 } 2112 2113 ucl_chunk_skipc (chunk, p); 2114 break; 2115 2116 case macro_has_backslash: 2117 state = prev_state; 2118 ucl_chunk_skipc (chunk, p); 2119 break; 2120 2121 case macro_save: 2122 if (parser->flags & UCL_PARSER_SAVE_COMMENTS) { 2123 ucl_save_comment (parser, c, p - c); 2124 } 2125 2126 return true; 2127 } 2128 } 2129 2130 return false; 2131 } 2132 2133 /** 2134 * Handle macro data 2135 * @param parser 2136 * @param chunk 2137 * @param marco 2138 * @param macro_start 2139 * @param macro_len 2140 * @return 2141 */ 2142 static bool 2143 ucl_parse_macro_value (struct ucl_parser *parser, 2144 struct ucl_chunk *chunk, struct ucl_macro *macro, 2145 unsigned char const **macro_start, size_t *macro_len) 2146 { 2147 const unsigned char *p, *c; 2148 bool need_unescape = false, ucl_escape = false, var_expand = false; 2149 2150 p = chunk->pos; 2151 2152 switch (*p) { 2153 case '"': 2154 /* We have macro value encoded in quotes */ 2155 c = p; 2156 ucl_chunk_skipc (chunk, p); 2157 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 2158 return false; 2159 } 2160 2161 *macro_start = c + 1; 2162 *macro_len = chunk->pos - c - 2; 2163 p = chunk->pos; 2164 break; 2165 case '{': 2166 /* We got a multiline macro body */ 2167 ucl_chunk_skipc (chunk, p); 2168 /* Skip spaces at the beginning */ 2169 while (p < chunk->end) { 2170 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 2171 ucl_chunk_skipc (chunk, p); 2172 } 2173 else { 2174 break; 2175 } 2176 } 2177 c = p; 2178 while (p < chunk->end) { 2179 if (*p == '}') { 2180 break; 2181 } 2182 ucl_chunk_skipc (chunk, p); 2183 } 2184 *macro_start = c; 2185 *macro_len = p - c; 2186 ucl_chunk_skipc (chunk, p); 2187 break; 2188 default: 2189 /* Macro is not enclosed in quotes or braces */ 2190 c = p; 2191 while (p < chunk->end) { 2192 if (ucl_lex_is_atom_end (*p)) { 2193 break; 2194 } 2195 ucl_chunk_skipc (chunk, p); 2196 } 2197 *macro_start = c; 2198 *macro_len = p - c; 2199 break; 2200 } 2201 2202 /* We are at the end of a macro */ 2203 /* Skip ';' and space characters and return to previous state */ 2204 while (p < chunk->end) { 2205 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') { 2206 break; 2207 } 2208 ucl_chunk_skipc (chunk, p); 2209 } 2210 return true; 2211 } 2212 2213 /** 2214 * Parse macro arguments as UCL object 2215 * @param parser parser structure 2216 * @param chunk the current data chunk 2217 * @return 2218 */ 2219 static ucl_object_t * 2220 ucl_parse_macro_arguments (struct ucl_parser *parser, 2221 struct ucl_chunk *chunk) 2222 { 2223 ucl_object_t *res = NULL; 2224 struct ucl_parser *params_parser; 2225 int obraces = 1, ebraces = 0, state = 0; 2226 const unsigned char *p, *c; 2227 size_t args_len = 0; 2228 struct ucl_parser_saved_state saved; 2229 2230 saved.column = chunk->column; 2231 saved.line = chunk->line; 2232 saved.pos = chunk->pos; 2233 saved.remain = chunk->remain; 2234 p = chunk->pos; 2235 2236 if (*p != '(' || chunk->remain < 2) { 2237 return NULL; 2238 } 2239 2240 /* Set begin and start */ 2241 ucl_chunk_skipc (chunk, p); 2242 c = p; 2243 2244 while ((p) < (chunk)->end) { 2245 switch (state) { 2246 case 0: 2247 /* Parse symbols and check for '(', ')' and '"' */ 2248 if (*p == '(') { 2249 obraces ++; 2250 } 2251 else if (*p == ')') { 2252 ebraces ++; 2253 } 2254 else if (*p == '"') { 2255 state = 1; 2256 } 2257 /* Check pairing */ 2258 if (obraces == ebraces) { 2259 state = 99; 2260 } 2261 else { 2262 args_len ++; 2263 } 2264 /* Check overflow */ 2265 if (chunk->remain == 0) { 2266 goto restore_chunk; 2267 } 2268 ucl_chunk_skipc (chunk, p); 2269 break; 2270 case 1: 2271 /* We have quote character, so skip all but quotes */ 2272 if (*p == '"' && *(p - 1) != '\\') { 2273 state = 0; 2274 } 2275 if (chunk->remain == 0) { 2276 goto restore_chunk; 2277 } 2278 args_len ++; 2279 ucl_chunk_skipc (chunk, p); 2280 break; 2281 case 99: 2282 /* 2283 * We have read the full body of arguments, so we need to parse and set 2284 * object from that 2285 */ 2286 params_parser = ucl_parser_new (parser->flags); 2287 if (!ucl_parser_add_chunk (params_parser, c, args_len)) { 2288 ucl_set_err (parser, UCL_ESYNTAX, "macro arguments parsing error", 2289 &parser->err); 2290 } 2291 else { 2292 res = ucl_parser_get_object (params_parser); 2293 } 2294 ucl_parser_free (params_parser); 2295 2296 return res; 2297 2298 break; 2299 } 2300 } 2301 2302 return res; 2303 2304 restore_chunk: 2305 chunk->column = saved.column; 2306 chunk->line = saved.line; 2307 chunk->pos = saved.pos; 2308 chunk->remain = saved.remain; 2309 2310 return NULL; 2311 } 2312 2313 #define SKIP_SPACES_COMMENTS(parser, chunk, p) do { \ 2314 while ((p) < (chunk)->end) { \ 2315 if (!ucl_test_character (*(p), UCL_CHARACTER_WHITESPACE_UNSAFE)) { \ 2316 if ((chunk)->remain >= 2 && ucl_lex_is_comment ((p)[0], (p)[1])) { \ 2317 if (!ucl_skip_comments (parser)) { \ 2318 return false; \ 2319 } \ 2320 p = (chunk)->pos; \ 2321 } \ 2322 break; \ 2323 } \ 2324 ucl_chunk_skipc (chunk, p); \ 2325 } \ 2326 } while(0) 2327 2328 /** 2329 * Handle the main states of rcl parser 2330 * @param parser parser structure 2331 * @return true if chunk has been parsed and false in case of error 2332 */ 2333 static bool 2334 ucl_state_machine (struct ucl_parser *parser) 2335 { 2336 ucl_object_t *obj, *macro_args; 2337 struct ucl_chunk *chunk = parser->chunks; 2338 const unsigned char *p, *c = NULL, *macro_start = NULL; 2339 unsigned char *macro_escaped; 2340 size_t macro_len = 0; 2341 struct ucl_macro *macro = NULL; 2342 bool next_key = false, end_of_object = false, ret; 2343 2344 if (parser->top_obj == NULL) { 2345 parser->state = UCL_STATE_INIT; 2346 } 2347 2348 p = chunk->pos; 2349 while (chunk->pos < chunk->end) { 2350 switch (parser->state) { 2351 case UCL_STATE_INIT: 2352 /* 2353 * At the init state we can either go to the parse array or object 2354 * if we got [ or { correspondingly or can just treat new data as 2355 * a key of newly created object 2356 */ 2357 if (!ucl_skip_comments (parser)) { 2358 parser->prev_state = parser->state; 2359 parser->state = UCL_STATE_ERROR; 2360 return false; 2361 } 2362 else { 2363 bool seen_obrace = false; 2364 2365 /* Skip any spaces */ 2366 while (p < chunk->end && ucl_test_character (*p, 2367 UCL_CHARACTER_WHITESPACE_UNSAFE)) { 2368 ucl_chunk_skipc (chunk, p); 2369 } 2370 2371 p = chunk->pos; 2372 2373 if (p < chunk->end) { 2374 if (*p == '[') { 2375 parser->state = UCL_STATE_VALUE; 2376 ucl_chunk_skipc (chunk, p); 2377 seen_obrace = true; 2378 } 2379 else { 2380 2381 if (*p == '{') { 2382 ucl_chunk_skipc (chunk, p); 2383 parser->state = UCL_STATE_KEY_OBRACE; 2384 seen_obrace = true; 2385 } 2386 else { 2387 parser->state = UCL_STATE_KEY; 2388 } 2389 } 2390 } 2391 2392 if (parser->top_obj == NULL) { 2393 if (parser->state == UCL_STATE_VALUE) { 2394 obj = ucl_parser_add_container (NULL, parser, true, 0, 2395 seen_obrace); 2396 } 2397 else { 2398 obj = ucl_parser_add_container (NULL, parser, false, 0, 2399 seen_obrace); 2400 } 2401 2402 if (obj == NULL) { 2403 return false; 2404 } 2405 2406 parser->top_obj = obj; 2407 parser->cur_obj = obj; 2408 } 2409 2410 } 2411 break; 2412 case UCL_STATE_KEY: 2413 case UCL_STATE_KEY_OBRACE: 2414 /* Skip any spaces */ 2415 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 2416 ucl_chunk_skipc (chunk, p); 2417 } 2418 if (p == chunk->end || *p == '}') { 2419 /* We have the end of an object */ 2420 parser->state = UCL_STATE_AFTER_VALUE; 2421 continue; 2422 } 2423 if (parser->stack == NULL) { 2424 /* No objects are on stack, but we want to parse a key */ 2425 ucl_set_err (parser, UCL_ESYNTAX, "top object is finished but the parser " 2426 "expects a key", &parser->err); 2427 parser->prev_state = parser->state; 2428 parser->state = UCL_STATE_ERROR; 2429 return false; 2430 } 2431 if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) { 2432 parser->prev_state = parser->state; 2433 parser->state = UCL_STATE_ERROR; 2434 return false; 2435 } 2436 2437 if (end_of_object) { 2438 p = chunk->pos; 2439 parser->state = UCL_STATE_AFTER_VALUE; 2440 continue; 2441 } 2442 else if (parser->state != UCL_STATE_MACRO_NAME) { 2443 if (next_key && parser->stack->obj->type == UCL_OBJECT) { 2444 /* Parse more keys and nest objects accordingly */ 2445 obj = ucl_parser_add_container (parser->cur_obj, 2446 parser, 2447 false, 2448 parser->stack->e.params.level + 1, 2449 parser->state == UCL_STATE_KEY_OBRACE); 2450 if (obj == NULL) { 2451 return false; 2452 } 2453 } 2454 else { 2455 parser->state = UCL_STATE_VALUE; 2456 } 2457 } 2458 else { 2459 c = chunk->pos; 2460 } 2461 p = chunk->pos; 2462 break; 2463 case UCL_STATE_VALUE: 2464 /* We need to check what we do have */ 2465 if (!parser->cur_obj || !ucl_parse_value (parser, chunk)) { 2466 parser->prev_state = parser->state; 2467 parser->state = UCL_STATE_ERROR; 2468 return false; 2469 } 2470 /* State is set in ucl_parse_value call */ 2471 p = chunk->pos; 2472 break; 2473 case UCL_STATE_AFTER_VALUE: 2474 if (!ucl_parse_after_value (parser, chunk)) { 2475 parser->prev_state = parser->state; 2476 parser->state = UCL_STATE_ERROR; 2477 return false; 2478 } 2479 2480 if (parser->stack != NULL) { 2481 if (parser->stack->obj->type == UCL_OBJECT) { 2482 parser->state = UCL_STATE_KEY; 2483 } 2484 else { 2485 /* Array */ 2486 parser->state = UCL_STATE_VALUE; 2487 } 2488 } 2489 else { 2490 /* Skip everything at the end */ 2491 return true; 2492 } 2493 2494 p = chunk->pos; 2495 break; 2496 case UCL_STATE_MACRO_NAME: 2497 if (parser->flags & UCL_PARSER_DISABLE_MACRO) { 2498 if (!ucl_skip_macro_as_comment (parser, chunk)) { 2499 /* We have invalid macro */ 2500 ucl_create_err (&parser->err, 2501 "error at %s:%d at column %d: invalid macro", 2502 chunk->fname ? chunk->fname : "memory", 2503 chunk->line, 2504 chunk->column); 2505 parser->state = UCL_STATE_ERROR; 2506 return false; 2507 } 2508 else { 2509 p = chunk->pos; 2510 parser->state = parser->prev_state; 2511 } 2512 } 2513 else { 2514 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && 2515 *p != '(') { 2516 ucl_chunk_skipc (chunk, p); 2517 } 2518 else { 2519 if (c != NULL && p - c > 0) { 2520 /* We got macro name */ 2521 macro_len = (size_t) (p - c); 2522 HASH_FIND (hh, parser->macroes, c, macro_len, macro); 2523 if (macro == NULL) { 2524 ucl_create_err (&parser->err, 2525 "error at %s:%d at column %d: " 2526 "unknown macro: '%.*s', character: '%c'", 2527 chunk->fname ? chunk->fname : "memory", 2528 chunk->line, 2529 chunk->column, 2530 (int) (p - c), 2531 c, 2532 *chunk->pos); 2533 parser->state = UCL_STATE_ERROR; 2534 return false; 2535 } 2536 /* Now we need to skip all spaces */ 2537 SKIP_SPACES_COMMENTS(parser, chunk, p); 2538 parser->state = UCL_STATE_MACRO; 2539 } 2540 else { 2541 /* We have invalid macro name */ 2542 ucl_create_err (&parser->err, 2543 "error at %s:%d at column %d: invalid macro name", 2544 chunk->fname ? chunk->fname : "memory", 2545 chunk->line, 2546 chunk->column); 2547 parser->state = UCL_STATE_ERROR; 2548 return false; 2549 } 2550 } 2551 } 2552 break; 2553 case UCL_STATE_MACRO: 2554 if (*chunk->pos == '(') { 2555 macro_args = ucl_parse_macro_arguments (parser, chunk); 2556 p = chunk->pos; 2557 if (macro_args) { 2558 SKIP_SPACES_COMMENTS(parser, chunk, p); 2559 } 2560 } 2561 else { 2562 macro_args = NULL; 2563 } 2564 if (!ucl_parse_macro_value (parser, chunk, macro, 2565 ¯o_start, ¯o_len)) { 2566 parser->prev_state = parser->state; 2567 parser->state = UCL_STATE_ERROR; 2568 return false; 2569 } 2570 macro_len = ucl_expand_variable (parser, ¯o_escaped, 2571 macro_start, macro_len); 2572 parser->state = parser->prev_state; 2573 2574 if (macro_escaped == NULL && macro != NULL) { 2575 if (macro->is_context) { 2576 ret = macro->h.context_handler (macro_start, macro_len, 2577 macro_args, 2578 parser->top_obj, 2579 macro->ud); 2580 } 2581 else { 2582 ret = macro->h.handler (macro_start, macro_len, macro_args, 2583 macro->ud); 2584 } 2585 } 2586 else if (macro != NULL) { 2587 if (macro->is_context) { 2588 ret = macro->h.context_handler (macro_escaped, macro_len, 2589 macro_args, 2590 parser->top_obj, 2591 macro->ud); 2592 } 2593 else { 2594 ret = macro->h.handler (macro_escaped, macro_len, macro_args, 2595 macro->ud); 2596 } 2597 2598 UCL_FREE (macro_len + 1, macro_escaped); 2599 } 2600 else { 2601 ret = false; 2602 ucl_set_err (parser, UCL_EINTERNAL, 2603 "internal error: parser has macro undefined", &parser->err); 2604 } 2605 2606 /* 2607 * Chunk can be modified within macro handler 2608 */ 2609 chunk = parser->chunks; 2610 p = chunk->pos; 2611 2612 if (macro_args) { 2613 ucl_object_unref (macro_args); 2614 } 2615 2616 if (!ret) { 2617 return false; 2618 } 2619 break; 2620 default: 2621 ucl_set_err (parser, UCL_EINTERNAL, 2622 "internal error: parser is in an unknown state", &parser->err); 2623 parser->state = UCL_STATE_ERROR; 2624 return false; 2625 } 2626 } 2627 2628 if (parser->last_comment) { 2629 if (parser->cur_obj) { 2630 ucl_attach_comment (parser, parser->cur_obj, true); 2631 } 2632 else if (parser->stack && parser->stack->obj) { 2633 ucl_attach_comment (parser, parser->stack->obj, true); 2634 } 2635 else if (parser->top_obj) { 2636 ucl_attach_comment (parser, parser->top_obj, true); 2637 } 2638 else { 2639 ucl_object_unref (parser->last_comment); 2640 } 2641 } 2642 2643 if (parser->stack != NULL && parser->state != UCL_STATE_ERROR) { 2644 struct ucl_stack *st; 2645 bool has_error = false; 2646 2647 LL_FOREACH (parser->stack, st) { 2648 if (st->chunk != parser->chunks) { 2649 break; /* Not our chunk, give up */ 2650 } 2651 if (st->e.params.flags & UCL_STACK_HAS_OBRACE) { 2652 if (parser->err == NULL) { 2653 utstring_new (parser->err); 2654 } 2655 2656 utstring_printf (parser->err, "%s:%d unmatched open brace at %d; ", 2657 chunk->fname ? chunk->fname : "memory", 2658 parser->chunks->line, 2659 st->e.params.line); 2660 2661 has_error = true; 2662 } 2663 } 2664 2665 if (has_error) { 2666 parser->err_code = UCL_EUNPAIRED; 2667 2668 return false; 2669 } 2670 } 2671 2672 return true; 2673 } 2674 2675 #define UPRM_SAFE(fn, a, b, c, el) do { \ 2676 if (!fn(a, b, c, a)) \ 2677 goto el; \ 2678 } while (0) 2679 2680 struct ucl_parser* 2681 ucl_parser_new (int flags) 2682 { 2683 struct ucl_parser *parser; 2684 2685 parser = UCL_ALLOC (sizeof (struct ucl_parser)); 2686 if (parser == NULL) { 2687 return NULL; 2688 } 2689 2690 memset (parser, 0, sizeof (struct ucl_parser)); 2691 2692 UPRM_SAFE(ucl_parser_register_macro, parser, "include", ucl_include_handler, e0); 2693 UPRM_SAFE(ucl_parser_register_macro, parser, "try_include", ucl_try_include_handler, e0); 2694 UPRM_SAFE(ucl_parser_register_macro, parser, "includes", ucl_includes_handler, e0); 2695 UPRM_SAFE(ucl_parser_register_macro, parser, "priority", ucl_priority_handler, e0); 2696 UPRM_SAFE(ucl_parser_register_macro, parser, "load", ucl_load_handler, e0); 2697 UPRM_SAFE(ucl_parser_register_context_macro, parser, "inherit", ucl_inherit_handler, e0); 2698 2699 parser->flags = flags; 2700 parser->includepaths = NULL; 2701 2702 if (flags & UCL_PARSER_SAVE_COMMENTS) { 2703 parser->comments = ucl_object_typed_new (UCL_OBJECT); 2704 } 2705 2706 if (!(flags & UCL_PARSER_NO_FILEVARS)) { 2707 /* Initial assumption about filevars */ 2708 ucl_parser_set_filevars (parser, NULL, false); 2709 } 2710 2711 return parser; 2712 e0: 2713 ucl_parser_free(parser); 2714 return NULL; 2715 } 2716 2717 bool 2718 ucl_parser_set_default_priority (struct ucl_parser *parser, unsigned prio) 2719 { 2720 if (parser == NULL) { 2721 return false; 2722 } 2723 2724 parser->default_priority = prio; 2725 2726 return true; 2727 } 2728 2729 int 2730 ucl_parser_get_default_priority (struct ucl_parser *parser) 2731 { 2732 if (parser == NULL) { 2733 return -1; 2734 } 2735 2736 return parser->default_priority; 2737 } 2738 2739 bool 2740 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro, 2741 ucl_macro_handler handler, void* ud) 2742 { 2743 struct ucl_macro *new; 2744 2745 if (macro == NULL || handler == NULL) { 2746 return false; 2747 } 2748 2749 new = UCL_ALLOC (sizeof (struct ucl_macro)); 2750 if (new == NULL) { 2751 return false; 2752 } 2753 2754 memset (new, 0, sizeof (struct ucl_macro)); 2755 new->h.handler = handler; 2756 new->name = strdup (macro); 2757 if (new->name == NULL) { 2758 UCL_FREE (sizeof (struct ucl_macro), new); 2759 return false; 2760 } 2761 new->ud = ud; 2762 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new); 2763 return true; 2764 } 2765 2766 bool 2767 ucl_parser_register_context_macro (struct ucl_parser *parser, const char *macro, 2768 ucl_context_macro_handler handler, void* ud) 2769 { 2770 struct ucl_macro *new; 2771 2772 if (macro == NULL || handler == NULL) { 2773 return false; 2774 } 2775 2776 new = UCL_ALLOC (sizeof (struct ucl_macro)); 2777 if (new == NULL) { 2778 return false; 2779 } 2780 2781 memset (new, 0, sizeof (struct ucl_macro)); 2782 new->h.context_handler = handler; 2783 new->name = strdup (macro); 2784 if (new->name == NULL) { 2785 UCL_FREE (sizeof (struct ucl_macro), new); 2786 return false; 2787 } 2788 new->ud = ud; 2789 new->is_context = true; 2790 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new); 2791 return true; 2792 } 2793 2794 void 2795 ucl_parser_register_variable (struct ucl_parser *parser, const char *var, 2796 const char *value) 2797 { 2798 struct ucl_variable *new = NULL, *cur; 2799 2800 if (var == NULL) { 2801 return; 2802 } 2803 2804 /* Find whether a variable already exists */ 2805 LL_FOREACH (parser->variables, cur) { 2806 if (strcmp (cur->var, var) == 0) { 2807 new = cur; 2808 break; 2809 } 2810 } 2811 2812 if (value == NULL) { 2813 2814 if (new != NULL) { 2815 /* Remove variable */ 2816 DL_DELETE (parser->variables, new); 2817 free (new->var); 2818 free (new->value); 2819 UCL_FREE (sizeof (struct ucl_variable), new); 2820 } 2821 else { 2822 /* Do nothing */ 2823 return; 2824 } 2825 } 2826 else { 2827 if (new == NULL) { 2828 new = UCL_ALLOC (sizeof (struct ucl_variable)); 2829 if (new == NULL) { 2830 return; 2831 } 2832 memset (new, 0, sizeof (struct ucl_variable)); 2833 new->var = strdup (var); 2834 new->var_len = strlen (var); 2835 new->value = strdup (value); 2836 new->value_len = strlen (value); 2837 2838 DL_APPEND (parser->variables, new); 2839 } 2840 else { 2841 free (new->value); 2842 new->value = strdup (value); 2843 new->value_len = strlen (value); 2844 } 2845 } 2846 } 2847 2848 void 2849 ucl_parser_set_variables_handler (struct ucl_parser *parser, 2850 ucl_variable_handler handler, void *ud) 2851 { 2852 parser->var_handler = handler; 2853 parser->var_data = ud; 2854 } 2855 2856 bool 2857 ucl_parser_add_chunk_full (struct ucl_parser *parser, const unsigned char *data, 2858 size_t len, unsigned priority, enum ucl_duplicate_strategy strat, 2859 enum ucl_parse_type parse_type) 2860 { 2861 struct ucl_chunk *chunk; 2862 struct ucl_parser_special_handler *special_handler; 2863 2864 if (parser == NULL) { 2865 return false; 2866 } 2867 2868 if (data == NULL && len != 0) { 2869 ucl_create_err (&parser->err, "invalid chunk added"); 2870 return false; 2871 } 2872 2873 if (parser->state != UCL_STATE_ERROR) { 2874 chunk = UCL_ALLOC (sizeof (struct ucl_chunk)); 2875 if (chunk == NULL) { 2876 ucl_create_err (&parser->err, "cannot allocate chunk structure"); 2877 return false; 2878 } 2879 2880 memset (chunk, 0, sizeof (*chunk)); 2881 2882 /* Apply all matching handlers from the first to the last */ 2883 LL_FOREACH (parser->special_handlers, special_handler) { 2884 if ((special_handler->flags & UCL_SPECIAL_HANDLER_PREPROCESS_ALL) || 2885 (len >= special_handler->magic_len && 2886 memcmp (data, special_handler->magic, special_handler->magic_len) == 0)) { 2887 unsigned char *ndata = NULL; 2888 size_t nlen = 0; 2889 2890 if (!special_handler->handler (parser, data, len, &ndata, &nlen, 2891 special_handler->user_data)) { 2892 ucl_create_err (&parser->err, "call for external handler failed"); 2893 return false; 2894 } 2895 2896 struct ucl_parser_special_handler_chain *nchain; 2897 nchain = UCL_ALLOC (sizeof (*nchain)); 2898 nchain->begin = ndata; 2899 nchain->len = nlen; 2900 nchain->special_handler = special_handler; 2901 2902 /* Free order is reversed */ 2903 LL_PREPEND (chunk->special_handlers, nchain); 2904 2905 data = ndata; 2906 len = nlen; 2907 } 2908 } 2909 2910 if (parse_type == UCL_PARSE_AUTO && len > 0) { 2911 /* We need to detect parse type by the first symbol */ 2912 if ((*data & 0x80) == 0x80 && (*data >= 0xdc && *data <= 0xdf)) { 2913 parse_type = UCL_PARSE_MSGPACK; 2914 } 2915 else if (*data == '(') { 2916 parse_type = UCL_PARSE_CSEXP; 2917 } 2918 else { 2919 parse_type = UCL_PARSE_UCL; 2920 } 2921 } 2922 2923 chunk->begin = data; 2924 chunk->remain = len; 2925 chunk->pos = chunk->begin; 2926 chunk->end = chunk->begin + len; 2927 chunk->line = 1; 2928 chunk->column = 0; 2929 chunk->priority = priority; 2930 chunk->strategy = strat; 2931 chunk->parse_type = parse_type; 2932 2933 if (parser->cur_file) { 2934 chunk->fname = strdup (parser->cur_file); 2935 } 2936 2937 LL_PREPEND (parser->chunks, chunk); 2938 parser->recursion ++; 2939 2940 if (parser->recursion > UCL_MAX_RECURSION) { 2941 ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d", 2942 parser->recursion); 2943 return false; 2944 } 2945 2946 if (len > 0) { 2947 /* Need to parse something */ 2948 switch (parse_type) { 2949 default: 2950 case UCL_PARSE_UCL: 2951 return ucl_state_machine (parser); 2952 case UCL_PARSE_MSGPACK: 2953 return ucl_parse_msgpack (parser); 2954 case UCL_PARSE_CSEXP: 2955 return ucl_parse_csexp (parser); 2956 } 2957 } 2958 else { 2959 /* Just add empty chunk and go forward */ 2960 if (parser->top_obj == NULL) { 2961 /* 2962 * In case of empty object, create one to indicate that we've 2963 * read something 2964 */ 2965 parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority); 2966 } 2967 2968 return true; 2969 } 2970 } 2971 2972 ucl_create_err (&parser->err, "a parser is in an invalid state"); 2973 2974 return false; 2975 } 2976 2977 bool 2978 ucl_parser_add_chunk_priority (struct ucl_parser *parser, 2979 const unsigned char *data, size_t len, unsigned priority) 2980 { 2981 /* We dereference parser, so this check is essential */ 2982 if (parser == NULL) { 2983 return false; 2984 } 2985 2986 return ucl_parser_add_chunk_full (parser, data, len, 2987 priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL); 2988 } 2989 2990 bool 2991 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data, 2992 size_t len) 2993 { 2994 if (parser == NULL) { 2995 return false; 2996 } 2997 2998 return ucl_parser_add_chunk_full (parser, data, len, 2999 parser->default_priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL); 3000 } 3001 3002 bool 3003 ucl_parser_insert_chunk (struct ucl_parser *parser, const unsigned char *data, 3004 size_t len) 3005 { 3006 if (parser == NULL || parser->top_obj == NULL) { 3007 return false; 3008 } 3009 3010 bool res; 3011 struct ucl_chunk *chunk; 3012 3013 int state = parser->state; 3014 parser->state = UCL_STATE_INIT; 3015 3016 /* Prevent inserted chunks from unintentionally closing the current object */ 3017 if (parser->stack != NULL && parser->stack->next != NULL) { 3018 parser->stack->e.params.level = parser->stack->next->e.params.level; 3019 } 3020 3021 res = ucl_parser_add_chunk_full (parser, data, len, parser->chunks->priority, 3022 parser->chunks->strategy, parser->chunks->parse_type); 3023 3024 /* Remove chunk from the stack */ 3025 chunk = parser->chunks; 3026 if (chunk != NULL) { 3027 parser->chunks = chunk->next; 3028 ucl_chunk_free (chunk); 3029 parser->recursion --; 3030 } 3031 3032 parser->state = state; 3033 3034 return res; 3035 } 3036 3037 bool 3038 ucl_parser_add_string_priority (struct ucl_parser *parser, const char *data, 3039 size_t len, unsigned priority) 3040 { 3041 if (data == NULL) { 3042 ucl_create_err (&parser->err, "invalid string added"); 3043 return false; 3044 } 3045 if (len == 0) { 3046 len = strlen (data); 3047 } 3048 3049 return ucl_parser_add_chunk_priority (parser, 3050 (const unsigned char *)data, len, priority); 3051 } 3052 3053 bool 3054 ucl_parser_add_string (struct ucl_parser *parser, const char *data, 3055 size_t len) 3056 { 3057 if (parser == NULL) { 3058 return false; 3059 } 3060 3061 return ucl_parser_add_string_priority (parser, 3062 (const unsigned char *)data, len, parser->default_priority); 3063 } 3064 3065 bool 3066 ucl_set_include_path (struct ucl_parser *parser, ucl_object_t *paths) 3067 { 3068 if (parser == NULL || paths == NULL) { 3069 return false; 3070 } 3071 3072 if (parser->includepaths == NULL) { 3073 parser->includepaths = ucl_object_copy (paths); 3074 } 3075 else { 3076 ucl_object_unref (parser->includepaths); 3077 parser->includepaths = ucl_object_copy (paths); 3078 } 3079 3080 if (parser->includepaths == NULL) { 3081 return false; 3082 } 3083 3084 return true; 3085 } 3086 3087 unsigned char ucl_parser_chunk_peek (struct ucl_parser *parser) 3088 { 3089 if (parser == NULL || parser->chunks == NULL || parser->chunks->pos == NULL || parser->chunks->end == NULL || 3090 parser->chunks->pos == parser->chunks->end) { 3091 return 0; 3092 } 3093 3094 return( *parser->chunks->pos ); 3095 } 3096 3097 bool ucl_parser_chunk_skip (struct ucl_parser *parser) 3098 { 3099 if (parser == NULL || parser->chunks == NULL || parser->chunks->pos == NULL || parser->chunks->end == NULL || 3100 parser->chunks->pos == parser->chunks->end) { 3101 return false; 3102 } 3103 3104 const unsigned char *p = parser->chunks->pos; 3105 ucl_chunk_skipc( parser->chunks, p ); 3106 if( parser->chunks->pos != NULL ) return true; 3107 return false; 3108 } 3109 3110 ucl_object_t* 3111 ucl_parser_get_current_stack_object (struct ucl_parser *parser, unsigned int depth) 3112 { 3113 ucl_object_t *obj; 3114 3115 if (parser == NULL || parser->stack == NULL) { 3116 return NULL; 3117 } 3118 3119 struct ucl_stack *stack = parser->stack; 3120 if(stack == NULL || stack->obj == NULL || ucl_object_type (stack->obj) != UCL_OBJECT) 3121 { 3122 return NULL; 3123 } 3124 3125 for( unsigned int i = 0; i < depth; ++i ) 3126 { 3127 stack = stack->next; 3128 if(stack == NULL || stack->obj == NULL || ucl_object_type (stack->obj) != UCL_OBJECT) 3129 { 3130 return NULL; 3131 } 3132 } 3133 3134 obj = ucl_object_ref (stack->obj); 3135 return obj; 3136 } 3137 3138