1 /* Copyright (c) 2013, Vsevolod Stakhov 2 * All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * * Redistributions of source code must retain the above copyright 7 * notice, this list of conditions and the following disclaimer. 8 * * Redistributions in binary form must reproduce the above copyright 9 * notice, this list of conditions and the following disclaimer in the 10 * documentation and/or other materials provided with the distribution. 11 * 12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY 13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY 16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 */ 23 24 #include "ucl.h" 25 #include "ucl_internal.h" 26 #include "ucl_chartable.h" 27 28 /** 29 * @file ucl_parser.c 30 * The implementation of ucl parser 31 */ 32 33 struct ucl_parser_saved_state { 34 unsigned int line; 35 unsigned int column; 36 size_t remain; 37 const unsigned char *pos; 38 }; 39 40 /** 41 * Move up to len characters 42 * @param parser 43 * @param begin 44 * @param len 45 * @return new position in chunk 46 */ 47 #define ucl_chunk_skipc(chunk, p) do{ \ 48 if (*(p) == '\n') { \ 49 (chunk)->line ++; \ 50 (chunk)->column = 0; \ 51 } \ 52 else (chunk)->column ++; \ 53 (p++); \ 54 (chunk)->pos ++; \ 55 (chunk)->remain --; \ 56 } while (0) 57 58 static inline void 59 ucl_set_err (struct ucl_parser *parser, int code, const char *str, UT_string **err) 60 { 61 const char *fmt_string, *filename; 62 struct ucl_chunk *chunk = parser->chunks; 63 64 if (parser->cur_file) { 65 filename = parser->cur_file; 66 } 67 else { 68 filename = "<unknown>"; 69 } 70 if (chunk->pos < chunk->end) { 71 if (isgraph (*chunk->pos)) { 72 fmt_string = "error while parsing %s: " 73 "line: %d, column: %d - '%s', character: '%c'"; 74 } 75 else { 76 fmt_string = "error while parsing %s: " 77 "line: %d, column: %d - '%s', character: '0x%02x'"; 78 } 79 ucl_create_err (err, fmt_string, 80 filename, chunk->line, chunk->column, 81 str, *chunk->pos); 82 } 83 else { 84 ucl_create_err (err, "error while parsing %s: at the end of chunk: %s", 85 filename, str); 86 } 87 } 88 89 /** 90 * Skip all comments from the current pos resolving nested and multiline comments 91 * @param parser 92 * @return 93 */ 94 static bool 95 ucl_skip_comments (struct ucl_parser *parser) 96 { 97 struct ucl_chunk *chunk = parser->chunks; 98 const unsigned char *p; 99 int comments_nested = 0; 100 bool quoted = false; 101 102 p = chunk->pos; 103 104 start: 105 if (chunk->remain > 0 && *p == '#') { 106 if (parser->state != UCL_STATE_SCOMMENT && 107 parser->state != UCL_STATE_MCOMMENT) { 108 while (p < chunk->end) { 109 if (*p == '\n') { 110 ucl_chunk_skipc (chunk, p); 111 goto start; 112 } 113 ucl_chunk_skipc (chunk, p); 114 } 115 } 116 } 117 else if (chunk->remain >= 2 && *p == '/') { 118 if (p[1] == '*') { 119 ucl_chunk_skipc (chunk, p); 120 comments_nested ++; 121 ucl_chunk_skipc (chunk, p); 122 123 while (p < chunk->end) { 124 if (*p == '"' && *(p - 1) != '\\') { 125 quoted = !quoted; 126 } 127 128 if (!quoted) { 129 if (*p == '*') { 130 ucl_chunk_skipc (chunk, p); 131 if (*p == '/') { 132 comments_nested --; 133 if (comments_nested == 0) { 134 ucl_chunk_skipc (chunk, p); 135 goto start; 136 } 137 } 138 ucl_chunk_skipc (chunk, p); 139 } 140 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') { 141 comments_nested ++; 142 ucl_chunk_skipc (chunk, p); 143 ucl_chunk_skipc (chunk, p); 144 continue; 145 } 146 } 147 ucl_chunk_skipc (chunk, p); 148 } 149 if (comments_nested != 0) { 150 ucl_set_err (parser, UCL_ENESTED, 151 "unfinished multiline comment", &parser->err); 152 return false; 153 } 154 } 155 } 156 157 return true; 158 } 159 160 /** 161 * Return multiplier for a character 162 * @param c multiplier character 163 * @param is_bytes if true use 1024 multiplier 164 * @return multiplier 165 */ 166 static inline unsigned long 167 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) { 168 const struct { 169 char c; 170 long mult_normal; 171 long mult_bytes; 172 } multipliers[] = { 173 {'m', 1000 * 1000, 1024 * 1024}, 174 {'k', 1000, 1024}, 175 {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024} 176 }; 177 int i; 178 179 for (i = 0; i < 3; i ++) { 180 if (tolower (c) == multipliers[i].c) { 181 if (is_bytes) { 182 return multipliers[i].mult_bytes; 183 } 184 return multipliers[i].mult_normal; 185 } 186 } 187 188 return 1; 189 } 190 191 192 /** 193 * Return multiplier for time scaling 194 * @param c 195 * @return 196 */ 197 static inline double 198 ucl_lex_time_multiplier (const unsigned char c) { 199 const struct { 200 char c; 201 double mult; 202 } multipliers[] = { 203 {'m', 60}, 204 {'h', 60 * 60}, 205 {'d', 60 * 60 * 24}, 206 {'w', 60 * 60 * 24 * 7}, 207 {'y', 60 * 60 * 24 * 7 * 365} 208 }; 209 int i; 210 211 for (i = 0; i < 5; i ++) { 212 if (tolower (c) == multipliers[i].c) { 213 return multipliers[i].mult; 214 } 215 } 216 217 return 1; 218 } 219 220 /** 221 * Return true if a character is a end of an atom 222 * @param c 223 * @return 224 */ 225 static inline bool 226 ucl_lex_is_atom_end (const unsigned char c) 227 { 228 return ucl_test_character (c, UCL_CHARACTER_VALUE_END); 229 } 230 231 static inline bool 232 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2) 233 { 234 if (c1 == '/') { 235 if (c2 == '*') { 236 return true; 237 } 238 } 239 else if (c1 == '#') { 240 return true; 241 } 242 return false; 243 } 244 245 /** 246 * Check variable found 247 * @param parser 248 * @param ptr 249 * @param remain 250 * @param out_len 251 * @param strict 252 * @param found 253 * @return 254 */ 255 static inline const char * 256 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain, 257 size_t *out_len, bool strict, bool *found) 258 { 259 struct ucl_variable *var; 260 unsigned char *dst; 261 size_t dstlen; 262 bool need_free = false; 263 264 LL_FOREACH (parser->variables, var) { 265 if (strict) { 266 if (remain == var->var_len) { 267 if (memcmp (ptr, var->var, var->var_len) == 0) { 268 *out_len += var->value_len; 269 *found = true; 270 return (ptr + var->var_len); 271 } 272 } 273 } 274 else { 275 if (remain >= var->var_len) { 276 if (memcmp (ptr, var->var, var->var_len) == 0) { 277 *out_len += var->value_len; 278 *found = true; 279 return (ptr + var->var_len); 280 } 281 } 282 } 283 } 284 285 /* XXX: can only handle ${VAR} */ 286 if (!(*found) && parser->var_handler != NULL && strict) { 287 /* Call generic handler */ 288 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free, 289 parser->var_data)) { 290 *found = true; 291 if (need_free) { 292 free (dst); 293 } 294 return (ptr + remain); 295 } 296 } 297 298 return ptr; 299 } 300 301 /** 302 * Check for a variable in a given string 303 * @param parser 304 * @param ptr 305 * @param remain 306 * @param out_len 307 * @param vars_found 308 * @return 309 */ 310 static const char * 311 ucl_check_variable (struct ucl_parser *parser, const char *ptr, 312 size_t remain, size_t *out_len, bool *vars_found) 313 { 314 const char *p, *end, *ret = ptr; 315 bool found = false; 316 317 if (*ptr == '{') { 318 /* We need to match the variable enclosed in braces */ 319 p = ptr + 1; 320 end = ptr + remain; 321 while (p < end) { 322 if (*p == '}') { 323 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, 324 out_len, true, &found); 325 if (found) { 326 /* {} must be excluded actually */ 327 ret ++; 328 if (!*vars_found) { 329 *vars_found = true; 330 } 331 } 332 else { 333 *out_len += 2; 334 } 335 break; 336 } 337 p ++; 338 } 339 } 340 else if (*ptr != '$') { 341 /* Not count escaped dollar sign */ 342 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found); 343 if (found && !*vars_found) { 344 *vars_found = true; 345 } 346 if (!found) { 347 (*out_len) ++; 348 } 349 } 350 else { 351 ret ++; 352 (*out_len) ++; 353 } 354 355 return ret; 356 } 357 358 /** 359 * Expand a single variable 360 * @param parser 361 * @param ptr 362 * @param remain 363 * @param dest 364 * @return 365 */ 366 static const char * 367 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr, 368 size_t remain, unsigned char **dest) 369 { 370 unsigned char *d = *dest, *dst; 371 const char *p = ptr + 1, *ret; 372 struct ucl_variable *var; 373 size_t dstlen; 374 bool need_free = false; 375 bool found = false; 376 bool strict = false; 377 378 ret = ptr + 1; 379 remain --; 380 381 if (*p == '$') { 382 *d++ = *p++; 383 *dest = d; 384 return p; 385 } 386 else if (*p == '{') { 387 p ++; 388 strict = true; 389 ret += 2; 390 remain -= 2; 391 } 392 393 LL_FOREACH (parser->variables, var) { 394 if (remain >= var->var_len) { 395 if (memcmp (p, var->var, var->var_len) == 0) { 396 memcpy (d, var->value, var->value_len); 397 ret += var->var_len; 398 d += var->value_len; 399 found = true; 400 break; 401 } 402 } 403 } 404 if (!found) { 405 if (strict && parser->var_handler != NULL) { 406 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free, 407 parser->var_data)) { 408 memcpy (d, dst, dstlen); 409 ret += dstlen; 410 d += remain; 411 found = true; 412 } 413 } 414 415 /* Leave variable as is */ 416 if (!found) { 417 if (strict) { 418 /* Copy '${' */ 419 memcpy (d, ptr, 2); 420 d += 2; 421 ret --; 422 } 423 else { 424 memcpy (d, ptr, 1); 425 d ++; 426 } 427 } 428 } 429 430 *dest = d; 431 return ret; 432 } 433 434 /** 435 * Expand variables in string 436 * @param parser 437 * @param dst 438 * @param src 439 * @param in_len 440 * @return 441 */ 442 static ssize_t 443 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst, 444 const char *src, size_t in_len) 445 { 446 const char *p, *end = src + in_len; 447 unsigned char *d; 448 size_t out_len = 0; 449 bool vars_found = false; 450 451 p = src; 452 while (p != end) { 453 if (*p == '$') { 454 p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found); 455 } 456 else { 457 p ++; 458 out_len ++; 459 } 460 } 461 462 if (!vars_found) { 463 /* Trivial case */ 464 *dst = NULL; 465 return in_len; 466 } 467 468 *dst = UCL_ALLOC (out_len + 1); 469 if (*dst == NULL) { 470 return in_len; 471 } 472 473 d = *dst; 474 p = src; 475 while (p != end) { 476 if (*p == '$') { 477 p = ucl_expand_single_variable (parser, p, end - p, &d); 478 } 479 else { 480 *d++ = *p++; 481 } 482 } 483 484 *d = '\0'; 485 486 return out_len; 487 } 488 489 /** 490 * Store or copy pointer to the trash stack 491 * @param parser parser object 492 * @param src src string 493 * @param dst destination buffer (trash stack pointer) 494 * @param dst_const const destination pointer (e.g. value of object) 495 * @param in_len input length 496 * @param need_unescape need to unescape source (and copy it) 497 * @param need_lowercase need to lowercase value (and copy) 498 * @param need_expand need to expand variables (and copy as well) 499 * @return output length (excluding \0 symbol) 500 */ 501 static inline ssize_t 502 ucl_copy_or_store_ptr (struct ucl_parser *parser, 503 const unsigned char *src, unsigned char **dst, 504 const char **dst_const, size_t in_len, 505 bool need_unescape, bool need_lowercase, bool need_expand) 506 { 507 ssize_t ret = -1, tret; 508 unsigned char *tmp; 509 510 if (need_unescape || need_lowercase || 511 (need_expand && parser->variables != NULL) || 512 !(parser->flags & UCL_PARSER_ZEROCOPY)) { 513 /* Copy string */ 514 *dst = UCL_ALLOC (in_len + 1); 515 if (*dst == NULL) { 516 ucl_set_err (parser, 0, "cannot allocate memory for a string", 517 &parser->err); 518 return false; 519 } 520 if (need_lowercase) { 521 ret = ucl_strlcpy_tolower (*dst, src, in_len + 1); 522 } 523 else { 524 ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1); 525 } 526 527 if (need_unescape) { 528 ret = ucl_unescape_json_string (*dst, ret); 529 } 530 if (need_expand) { 531 tmp = *dst; 532 tret = ret; 533 ret = ucl_expand_variable (parser, dst, tmp, ret); 534 if (*dst == NULL) { 535 /* Nothing to expand */ 536 *dst = tmp; 537 ret = tret; 538 } 539 else { 540 /* Free unexpanded value */ 541 UCL_FREE (in_len + 1, tmp); 542 } 543 } 544 *dst_const = *dst; 545 } 546 else { 547 *dst_const = src; 548 ret = in_len; 549 } 550 551 return ret; 552 } 553 554 /** 555 * Create and append an object at the specified level 556 * @param parser 557 * @param is_array 558 * @param level 559 * @return 560 */ 561 static inline ucl_object_t * 562 ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level) 563 { 564 struct ucl_stack *st; 565 566 if (!is_array) { 567 if (obj == NULL) { 568 obj = ucl_object_new_full (UCL_OBJECT, parser->chunks->priority); 569 } 570 else { 571 obj->type = UCL_OBJECT; 572 } 573 obj->value.ov = ucl_hash_create (parser->flags & UCL_PARSER_KEY_LOWERCASE); 574 parser->state = UCL_STATE_KEY; 575 } 576 else { 577 if (obj == NULL) { 578 obj = ucl_object_new_full (UCL_ARRAY, parser->chunks->priority); 579 } 580 else { 581 obj->type = UCL_ARRAY; 582 } 583 parser->state = UCL_STATE_VALUE; 584 } 585 586 st = UCL_ALLOC (sizeof (struct ucl_stack)); 587 if (st == NULL) { 588 ucl_set_err (parser, 0, "cannot allocate memory for an object", 589 &parser->err); 590 ucl_object_unref (obj); 591 return NULL; 592 } 593 st->obj = obj; 594 st->level = level; 595 LL_PREPEND (parser->stack, st); 596 parser->cur_obj = obj; 597 598 return obj; 599 } 600 601 int 602 ucl_maybe_parse_number (ucl_object_t *obj, 603 const char *start, const char *end, const char **pos, 604 bool allow_double, bool number_bytes, bool allow_time) 605 { 606 const char *p = start, *c = start; 607 char *endptr; 608 bool got_dot = false, got_exp = false, need_double = false, 609 is_time = false, valid_start = false, is_hex = false, 610 is_neg = false; 611 double dv = 0; 612 int64_t lv = 0; 613 614 if (*p == '-') { 615 is_neg = true; 616 c ++; 617 p ++; 618 } 619 while (p < end) { 620 if (is_hex && isxdigit (*p)) { 621 p ++; 622 } 623 else if (isdigit (*p)) { 624 valid_start = true; 625 p ++; 626 } 627 else if (!is_hex && (*p == 'x' || *p == 'X')) { 628 is_hex = true; 629 allow_double = false; 630 c = p + 1; 631 } 632 else if (allow_double) { 633 if (p == c) { 634 /* Empty digits sequence, not a number */ 635 *pos = start; 636 return EINVAL; 637 } 638 else if (*p == '.') { 639 if (got_dot) { 640 /* Double dots, not a number */ 641 *pos = start; 642 return EINVAL; 643 } 644 else { 645 got_dot = true; 646 need_double = true; 647 p ++; 648 } 649 } 650 else if (*p == 'e' || *p == 'E') { 651 if (got_exp) { 652 /* Double exp, not a number */ 653 *pos = start; 654 return EINVAL; 655 } 656 else { 657 got_exp = true; 658 need_double = true; 659 p ++; 660 if (p >= end) { 661 *pos = start; 662 return EINVAL; 663 } 664 if (!isdigit (*p) && *p != '+' && *p != '-') { 665 /* Wrong exponent sign */ 666 *pos = start; 667 return EINVAL; 668 } 669 else { 670 p ++; 671 } 672 } 673 } 674 else { 675 /* Got the end of the number, need to check */ 676 break; 677 } 678 } 679 else { 680 break; 681 } 682 } 683 684 if (!valid_start) { 685 *pos = start; 686 return EINVAL; 687 } 688 689 errno = 0; 690 if (need_double) { 691 dv = strtod (c, &endptr); 692 } 693 else { 694 if (is_hex) { 695 lv = strtoimax (c, &endptr, 16); 696 } 697 else { 698 lv = strtoimax (c, &endptr, 10); 699 } 700 } 701 if (errno == ERANGE) { 702 *pos = start; 703 return ERANGE; 704 } 705 706 /* Now check endptr */ 707 if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0') { 708 p = endptr; 709 goto set_obj; 710 } 711 712 if (endptr < end && endptr != start) { 713 p = endptr; 714 switch (*p) { 715 case 'm': 716 case 'M': 717 case 'g': 718 case 'G': 719 case 'k': 720 case 'K': 721 if (end - p >= 2) { 722 if (p[1] == 's' || p[1] == 'S') { 723 /* Milliseconds */ 724 if (!need_double) { 725 need_double = true; 726 dv = lv; 727 } 728 is_time = true; 729 if (p[0] == 'm' || p[0] == 'M') { 730 dv /= 1000.; 731 } 732 else { 733 dv *= ucl_lex_num_multiplier (*p, false); 734 } 735 p += 2; 736 goto set_obj; 737 } 738 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) { 739 /* Bytes */ 740 if (need_double) { 741 need_double = false; 742 lv = dv; 743 } 744 lv *= ucl_lex_num_multiplier (*p, true); 745 p += 2; 746 goto set_obj; 747 } 748 else if (ucl_lex_is_atom_end (p[1])) { 749 if (need_double) { 750 dv *= ucl_lex_num_multiplier (*p, false); 751 } 752 else { 753 lv *= ucl_lex_num_multiplier (*p, number_bytes); 754 } 755 p ++; 756 goto set_obj; 757 } 758 else if (allow_time && end - p >= 3) { 759 if (tolower (p[0]) == 'm' && 760 tolower (p[1]) == 'i' && 761 tolower (p[2]) == 'n') { 762 /* Minutes */ 763 if (!need_double) { 764 need_double = true; 765 dv = lv; 766 } 767 is_time = true; 768 dv *= 60.; 769 p += 3; 770 goto set_obj; 771 } 772 } 773 } 774 else { 775 if (need_double) { 776 dv *= ucl_lex_num_multiplier (*p, false); 777 } 778 else { 779 lv *= ucl_lex_num_multiplier (*p, number_bytes); 780 } 781 p ++; 782 goto set_obj; 783 } 784 break; 785 case 'S': 786 case 's': 787 if (allow_time && 788 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { 789 if (!need_double) { 790 need_double = true; 791 dv = lv; 792 } 793 p ++; 794 is_time = true; 795 goto set_obj; 796 } 797 break; 798 case 'h': 799 case 'H': 800 case 'd': 801 case 'D': 802 case 'w': 803 case 'W': 804 case 'Y': 805 case 'y': 806 if (allow_time && 807 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { 808 if (!need_double) { 809 need_double = true; 810 dv = lv; 811 } 812 is_time = true; 813 dv *= ucl_lex_time_multiplier (*p); 814 p ++; 815 goto set_obj; 816 } 817 break; 818 case '\t': 819 case ' ': 820 while (p < end && ucl_test_character(*p, UCL_CHARACTER_WHITESPACE)) { 821 p++; 822 } 823 if (ucl_lex_is_atom_end(*p)) 824 goto set_obj; 825 break; 826 } 827 } 828 else if (endptr == end) { 829 /* Just a number at the end of chunk */ 830 p = endptr; 831 goto set_obj; 832 } 833 834 *pos = c; 835 return EINVAL; 836 837 set_obj: 838 if (allow_double && (need_double || is_time)) { 839 if (!is_time) { 840 obj->type = UCL_FLOAT; 841 } 842 else { 843 obj->type = UCL_TIME; 844 } 845 obj->value.dv = is_neg ? (-dv) : dv; 846 } 847 else { 848 obj->type = UCL_INT; 849 obj->value.iv = is_neg ? (-lv) : lv; 850 } 851 *pos = p; 852 return 0; 853 } 854 855 /** 856 * Parse possible number 857 * @param parser 858 * @param chunk 859 * @return true if a number has been parsed 860 */ 861 static bool 862 ucl_lex_number (struct ucl_parser *parser, 863 struct ucl_chunk *chunk, ucl_object_t *obj) 864 { 865 const unsigned char *pos; 866 int ret; 867 868 ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos, 869 true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0)); 870 871 if (ret == 0) { 872 chunk->remain -= pos - chunk->pos; 873 chunk->column += pos - chunk->pos; 874 chunk->pos = pos; 875 return true; 876 } 877 else if (ret == ERANGE) { 878 ucl_set_err (parser, ERANGE, "numeric value out of range", &parser->err); 879 } 880 881 return false; 882 } 883 884 /** 885 * Parse quoted string with possible escapes 886 * @param parser 887 * @param chunk 888 * @return true if a string has been parsed 889 */ 890 static bool 891 ucl_lex_json_string (struct ucl_parser *parser, 892 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand) 893 { 894 const unsigned char *p = chunk->pos; 895 unsigned char c; 896 int i; 897 898 while (p < chunk->end) { 899 c = *p; 900 if (c < 0x1F) { 901 /* Unmasked control character */ 902 if (c == '\n') { 903 ucl_set_err (parser, UCL_ESYNTAX, "unexpected newline", 904 &parser->err); 905 } 906 else { 907 ucl_set_err (parser, UCL_ESYNTAX, "unexpected control character", 908 &parser->err); 909 } 910 return false; 911 } 912 else if (c == '\\') { 913 ucl_chunk_skipc (chunk, p); 914 c = *p; 915 if (p >= chunk->end) { 916 ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character", 917 &parser->err); 918 return false; 919 } 920 else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) { 921 if (c == 'u') { 922 ucl_chunk_skipc (chunk, p); 923 for (i = 0; i < 4 && p < chunk->end; i ++) { 924 if (!isxdigit (*p)) { 925 ucl_set_err (parser, UCL_ESYNTAX, "invalid utf escape", 926 &parser->err); 927 return false; 928 } 929 ucl_chunk_skipc (chunk, p); 930 } 931 if (p >= chunk->end) { 932 ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character", 933 &parser->err); 934 return false; 935 } 936 } 937 else { 938 ucl_chunk_skipc (chunk, p); 939 } 940 } 941 *need_unescape = true; 942 *ucl_escape = true; 943 continue; 944 } 945 else if (c == '"') { 946 ucl_chunk_skipc (chunk, p); 947 return true; 948 } 949 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) { 950 *ucl_escape = true; 951 } 952 else if (c == '$') { 953 *var_expand = true; 954 } 955 ucl_chunk_skipc (chunk, p); 956 } 957 958 ucl_set_err (parser, UCL_ESYNTAX, "no quote at the end of json string", 959 &parser->err); 960 return false; 961 } 962 963 static void 964 ucl_parser_append_elt (struct ucl_parser *parser, ucl_hash_t *cont, 965 ucl_object_t *top, 966 ucl_object_t *elt) 967 { 968 ucl_object_t *nobj; 969 970 if ((parser->flags & UCL_PARSER_NO_IMPLICIT_ARRAYS) == 0) { 971 /* Implicit array */ 972 top->flags |= UCL_OBJECT_MULTIVALUE; 973 DL_APPEND (top, elt); 974 } 975 else { 976 if ((top->flags & UCL_OBJECT_MULTIVALUE) != 0) { 977 /* Just add to the explicit array */ 978 ucl_array_append (top, elt); 979 } 980 else { 981 /* Convert to an array */ 982 ucl_hash_delete (cont, top); 983 nobj = ucl_object_typed_new (UCL_ARRAY); 984 nobj->key = top->key; 985 nobj->keylen = top->keylen; 986 nobj->flags |= UCL_OBJECT_MULTIVALUE; 987 ucl_array_append (nobj, top); 988 ucl_array_append (nobj, elt); 989 ucl_hash_insert (cont, nobj, nobj->key, nobj->keylen); 990 } 991 } 992 } 993 994 /** 995 * Parse a key in an object 996 * @param parser 997 * @param chunk 998 * @return true if a key has been parsed 999 */ 1000 static bool 1001 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object) 1002 { 1003 const unsigned char *p, *c = NULL, *end, *t; 1004 const char *key = NULL; 1005 bool got_quote = false, got_eq = false, got_semicolon = false, 1006 need_unescape = false, ucl_escape = false, var_expand = false, 1007 got_content = false, got_sep = false; 1008 ucl_object_t *nobj, *tobj; 1009 ucl_hash_t *container; 1010 ssize_t keylen; 1011 1012 p = chunk->pos; 1013 1014 if (*p == '.') { 1015 /* It is macro actually */ 1016 ucl_chunk_skipc (chunk, p); 1017 parser->prev_state = parser->state; 1018 parser->state = UCL_STATE_MACRO_NAME; 1019 *end_of_object = false; 1020 return true; 1021 } 1022 while (p < chunk->end) { 1023 /* 1024 * A key must start with alpha, number, '/' or '_' and end with space character 1025 */ 1026 if (c == NULL) { 1027 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1028 if (!ucl_skip_comments (parser)) { 1029 return false; 1030 } 1031 p = chunk->pos; 1032 } 1033 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1034 ucl_chunk_skipc (chunk, p); 1035 } 1036 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) { 1037 /* The first symbol */ 1038 c = p; 1039 ucl_chunk_skipc (chunk, p); 1040 got_content = true; 1041 } 1042 else if (*p == '"') { 1043 /* JSON style key */ 1044 c = p + 1; 1045 got_quote = true; 1046 got_content = true; 1047 ucl_chunk_skipc (chunk, p); 1048 } 1049 else if (*p == '}') { 1050 /* We have actually end of an object */ 1051 *end_of_object = true; 1052 return true; 1053 } 1054 else if (*p == '.') { 1055 ucl_chunk_skipc (chunk, p); 1056 parser->prev_state = parser->state; 1057 parser->state = UCL_STATE_MACRO_NAME; 1058 return true; 1059 } 1060 else { 1061 /* Invalid identifier */ 1062 ucl_set_err (parser, UCL_ESYNTAX, "key must begin with a letter", 1063 &parser->err); 1064 return false; 1065 } 1066 } 1067 else { 1068 /* Parse the body of a key */ 1069 if (!got_quote) { 1070 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) { 1071 got_content = true; 1072 ucl_chunk_skipc (chunk, p); 1073 } 1074 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) { 1075 end = p; 1076 break; 1077 } 1078 else { 1079 ucl_set_err (parser, UCL_ESYNTAX, "invalid character in a key", 1080 &parser->err); 1081 return false; 1082 } 1083 } 1084 else { 1085 /* We need to parse json like quoted string */ 1086 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1087 return false; 1088 } 1089 /* Always escape keys obtained via json */ 1090 end = chunk->pos - 1; 1091 p = chunk->pos; 1092 break; 1093 } 1094 } 1095 } 1096 1097 if (p >= chunk->end && got_content) { 1098 ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err); 1099 return false; 1100 } 1101 else if (!got_content) { 1102 return true; 1103 } 1104 *end_of_object = false; 1105 /* We are now at the end of the key, need to parse the rest */ 1106 while (p < chunk->end) { 1107 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1108 ucl_chunk_skipc (chunk, p); 1109 } 1110 else if (*p == '=') { 1111 if (!got_eq && !got_semicolon) { 1112 ucl_chunk_skipc (chunk, p); 1113 got_eq = true; 1114 } 1115 else { 1116 ucl_set_err (parser, UCL_ESYNTAX, "unexpected '=' character", 1117 &parser->err); 1118 return false; 1119 } 1120 } 1121 else if (*p == ':') { 1122 if (!got_eq && !got_semicolon) { 1123 ucl_chunk_skipc (chunk, p); 1124 got_semicolon = true; 1125 } 1126 else { 1127 ucl_set_err (parser, UCL_ESYNTAX, "unexpected ':' character", 1128 &parser->err); 1129 return false; 1130 } 1131 } 1132 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1133 /* Check for comment */ 1134 if (!ucl_skip_comments (parser)) { 1135 return false; 1136 } 1137 p = chunk->pos; 1138 } 1139 else { 1140 /* Start value */ 1141 break; 1142 } 1143 } 1144 1145 if (p >= chunk->end && got_content) { 1146 ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err); 1147 return false; 1148 } 1149 1150 got_sep = got_semicolon || got_eq; 1151 1152 if (!got_sep) { 1153 /* 1154 * Maybe we have more keys nested, so search for termination character. 1155 * Possible choices: 1156 * 1) key1 key2 ... keyN [:=] value <- we treat that as error 1157 * 2) key1 ... keyN {} or [] <- we treat that as nested objects 1158 * 3) key1 value[;,\n] <- we treat that as linear object 1159 */ 1160 t = p; 1161 *next_key = false; 1162 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) { 1163 t ++; 1164 } 1165 /* Check first non-space character after a key */ 1166 if (*t != '{' && *t != '[') { 1167 while (t < chunk->end) { 1168 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') { 1169 break; 1170 } 1171 else if (*t == '{' || *t == '[') { 1172 *next_key = true; 1173 break; 1174 } 1175 t ++; 1176 } 1177 } 1178 } 1179 1180 /* Create a new object */ 1181 nobj = ucl_object_new_full (UCL_NULL, parser->chunks->priority); 1182 keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY], 1183 &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false); 1184 if (keylen == -1) { 1185 ucl_object_unref (nobj); 1186 return false; 1187 } 1188 else if (keylen == 0) { 1189 ucl_set_err (parser, UCL_ESYNTAX, "empty keys are not allowed", &parser->err); 1190 ucl_object_unref (nobj); 1191 return false; 1192 } 1193 1194 container = parser->stack->obj->value.ov; 1195 nobj->key = key; 1196 nobj->keylen = keylen; 1197 tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj)); 1198 if (tobj == NULL) { 1199 container = ucl_hash_insert_object (container, nobj, 1200 parser->flags & UCL_PARSER_KEY_LOWERCASE); 1201 nobj->prev = nobj; 1202 nobj->next = NULL; 1203 parser->stack->obj->len ++; 1204 } 1205 else { 1206 /* 1207 * The logic here is the following: 1208 * 1209 * - if we have two objects with the same priority, then we form an 1210 * implicit or explicit array 1211 * - if a new object has bigger priority, then we overwrite an old one 1212 * - if a new object has lower priority, then we ignore it 1213 */ 1214 unsigned priold = ucl_object_get_priority (tobj), 1215 prinew = ucl_object_get_priority (nobj); 1216 if (priold == prinew) { 1217 ucl_parser_append_elt (parser, container, tobj, nobj); 1218 } 1219 else if (priold > prinew) { 1220 ucl_object_unref (nobj); 1221 return true; 1222 } 1223 else { 1224 ucl_hash_replace (container, tobj, nobj); 1225 ucl_object_unref (tobj); 1226 } 1227 } 1228 1229 if (ucl_escape) { 1230 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE; 1231 } 1232 parser->stack->obj->value.ov = container; 1233 1234 parser->cur_obj = nobj; 1235 1236 return true; 1237 } 1238 1239 /** 1240 * Parse a cl string 1241 * @param parser 1242 * @param chunk 1243 * @return true if a key has been parsed 1244 */ 1245 static bool 1246 ucl_parse_string_value (struct ucl_parser *parser, 1247 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape) 1248 { 1249 const unsigned char *p; 1250 enum { 1251 UCL_BRACE_ROUND = 0, 1252 UCL_BRACE_SQUARE, 1253 UCL_BRACE_FIGURE 1254 }; 1255 int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}}; 1256 1257 p = chunk->pos; 1258 1259 while (p < chunk->end) { 1260 1261 /* Skip pairs of figure braces */ 1262 if (*p == '{') { 1263 braces[UCL_BRACE_FIGURE][0] ++; 1264 } 1265 else if (*p == '}') { 1266 braces[UCL_BRACE_FIGURE][1] ++; 1267 if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) { 1268 /* This is not a termination symbol, continue */ 1269 ucl_chunk_skipc (chunk, p); 1270 continue; 1271 } 1272 } 1273 /* Skip pairs of square braces */ 1274 else if (*p == '[') { 1275 braces[UCL_BRACE_SQUARE][0] ++; 1276 } 1277 else if (*p == ']') { 1278 braces[UCL_BRACE_SQUARE][1] ++; 1279 if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) { 1280 /* This is not a termination symbol, continue */ 1281 ucl_chunk_skipc (chunk, p); 1282 continue; 1283 } 1284 } 1285 else if (*p == '$') { 1286 *var_expand = true; 1287 } 1288 else if (*p == '\\') { 1289 *need_unescape = true; 1290 ucl_chunk_skipc (chunk, p); 1291 if (p < chunk->end) { 1292 ucl_chunk_skipc (chunk, p); 1293 } 1294 continue; 1295 } 1296 1297 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { 1298 break; 1299 } 1300 ucl_chunk_skipc (chunk, p); 1301 } 1302 1303 return true; 1304 } 1305 1306 /** 1307 * Parse multiline string ending with \n{term}\n 1308 * @param parser 1309 * @param chunk 1310 * @param term 1311 * @param term_len 1312 * @return size of multiline string or 0 in case of error 1313 */ 1314 static int 1315 ucl_parse_multiline_string (struct ucl_parser *parser, 1316 struct ucl_chunk *chunk, const unsigned char *term, 1317 int term_len, unsigned char const **beg, 1318 bool *var_expand) 1319 { 1320 const unsigned char *p, *c, *tend; 1321 bool newline = false; 1322 int len = 0; 1323 1324 p = chunk->pos; 1325 1326 c = p; 1327 1328 while (p < chunk->end) { 1329 if (newline) { 1330 if (chunk->end - p < term_len) { 1331 return 0; 1332 } 1333 else if (memcmp (p, term, term_len) == 0) { 1334 tend = p + term_len; 1335 if (*tend != '\n' && *tend != ';' && *tend != ',') { 1336 /* Incomplete terminator */ 1337 ucl_chunk_skipc (chunk, p); 1338 continue; 1339 } 1340 len = p - c; 1341 chunk->remain -= term_len; 1342 chunk->pos = p + term_len; 1343 chunk->column = term_len; 1344 *beg = c; 1345 break; 1346 } 1347 } 1348 if (*p == '\n') { 1349 newline = true; 1350 } 1351 else { 1352 if (*p == '$') { 1353 *var_expand = true; 1354 } 1355 newline = false; 1356 } 1357 ucl_chunk_skipc (chunk, p); 1358 } 1359 1360 return len; 1361 } 1362 1363 static ucl_object_t* 1364 ucl_get_value_object (struct ucl_parser *parser) 1365 { 1366 ucl_object_t *t, *obj = NULL; 1367 1368 if (parser == NULL || parser->stack == NULL || parser->stack->obj == NULL) { 1369 return NULL; 1370 } 1371 1372 if (parser->stack->obj->type == UCL_ARRAY) { 1373 /* Object must be allocated */ 1374 obj = ucl_object_new_full (UCL_NULL, parser->chunks->priority); 1375 t = parser->stack->obj; 1376 ucl_array_append (t, obj); 1377 parser->cur_obj = obj; 1378 } 1379 else { 1380 /* Object has been already allocated */ 1381 obj = parser->cur_obj; 1382 } 1383 1384 return obj; 1385 } 1386 1387 /** 1388 * Handle value data 1389 * @param parser 1390 * @param chunk 1391 * @return 1392 */ 1393 static bool 1394 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) 1395 { 1396 const unsigned char *p, *c; 1397 ucl_object_t *obj = NULL; 1398 unsigned int stripped_spaces; 1399 int str_len; 1400 bool need_unescape = false, ucl_escape = false, var_expand = false; 1401 1402 p = chunk->pos; 1403 1404 /* Skip any spaces and comments */ 1405 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) || 1406 (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { 1407 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1408 ucl_chunk_skipc (chunk, p); 1409 } 1410 if (!ucl_skip_comments (parser)) { 1411 return false; 1412 } 1413 p = chunk->pos; 1414 } 1415 1416 while (p < chunk->end) { 1417 c = p; 1418 switch (*p) { 1419 case '"': 1420 obj = ucl_get_value_object (parser); 1421 ucl_chunk_skipc (chunk, p); 1422 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1423 return false; 1424 } 1425 str_len = chunk->pos - c - 2; 1426 obj->type = UCL_STRING; 1427 if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE], 1428 &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) { 1429 return false; 1430 } 1431 obj->len = str_len; 1432 parser->state = UCL_STATE_AFTER_VALUE; 1433 p = chunk->pos; 1434 return true; 1435 break; 1436 case '{': 1437 obj = ucl_get_value_object (parser); 1438 /* We have a new object */ 1439 obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level); 1440 if (obj == NULL) { 1441 return false; 1442 } 1443 1444 ucl_chunk_skipc (chunk, p); 1445 return true; 1446 break; 1447 case '[': 1448 obj = ucl_get_value_object (parser); 1449 /* We have a new array */ 1450 obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level); 1451 if (obj == NULL) { 1452 return false; 1453 } 1454 1455 ucl_chunk_skipc (chunk, p); 1456 return true; 1457 break; 1458 case ']': 1459 /* We have the array ending */ 1460 if (parser->stack && parser->stack->obj->type == UCL_ARRAY) { 1461 parser->state = UCL_STATE_AFTER_VALUE; 1462 return true; 1463 } 1464 else { 1465 goto parse_string; 1466 } 1467 break; 1468 case '<': 1469 obj = ucl_get_value_object (parser); 1470 /* We have something like multiline value, which must be <<[A-Z]+\n */ 1471 if (chunk->end - p > 3) { 1472 if (memcmp (p, "<<", 2) == 0) { 1473 p += 2; 1474 /* We allow only uppercase characters in multiline definitions */ 1475 while (p < chunk->end && *p >= 'A' && *p <= 'Z') { 1476 p ++; 1477 } 1478 if (*p =='\n') { 1479 /* Set chunk positions and start multiline parsing */ 1480 c += 2; 1481 chunk->remain -= p - c; 1482 chunk->pos = p + 1; 1483 chunk->column = 0; 1484 chunk->line ++; 1485 if ((str_len = ucl_parse_multiline_string (parser, chunk, c, 1486 p - c, &c, &var_expand)) == 0) { 1487 ucl_set_err (parser, UCL_ESYNTAX, 1488 "unterminated multiline value", &parser->err); 1489 return false; 1490 } 1491 obj->type = UCL_STRING; 1492 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE], 1493 &obj->value.sv, str_len - 1, false, false, var_expand)) == -1) { 1494 return false; 1495 } 1496 obj->len = str_len; 1497 parser->state = UCL_STATE_AFTER_VALUE; 1498 return true; 1499 } 1500 } 1501 } 1502 /* Fallback to ordinary strings */ 1503 default: 1504 parse_string: 1505 if (obj == NULL) { 1506 obj = ucl_get_value_object (parser); 1507 } 1508 /* Parse atom */ 1509 if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) { 1510 if (!ucl_lex_number (parser, chunk, obj)) { 1511 if (parser->state == UCL_STATE_ERROR) { 1512 return false; 1513 } 1514 } 1515 else { 1516 parser->state = UCL_STATE_AFTER_VALUE; 1517 return true; 1518 } 1519 /* Fallback to normal string */ 1520 } 1521 1522 if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) { 1523 return false; 1524 } 1525 /* Cut trailing spaces */ 1526 stripped_spaces = 0; 1527 while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces), 1528 UCL_CHARACTER_WHITESPACE)) { 1529 stripped_spaces ++; 1530 } 1531 str_len = chunk->pos - c - stripped_spaces; 1532 if (str_len <= 0) { 1533 ucl_set_err (parser, 0, "string value must not be empty", 1534 &parser->err); 1535 return false; 1536 } 1537 else if (str_len == 4 && memcmp (c, "null", 4) == 0) { 1538 obj->len = 0; 1539 obj->type = UCL_NULL; 1540 } 1541 else if (!ucl_maybe_parse_boolean (obj, c, str_len)) { 1542 obj->type = UCL_STRING; 1543 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE], 1544 &obj->value.sv, str_len, need_unescape, 1545 false, var_expand)) == -1) { 1546 return false; 1547 } 1548 obj->len = str_len; 1549 } 1550 parser->state = UCL_STATE_AFTER_VALUE; 1551 p = chunk->pos; 1552 1553 return true; 1554 break; 1555 } 1556 } 1557 1558 return true; 1559 } 1560 1561 /** 1562 * Handle after value data 1563 * @param parser 1564 * @param chunk 1565 * @return 1566 */ 1567 static bool 1568 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk) 1569 { 1570 const unsigned char *p; 1571 bool got_sep = false; 1572 struct ucl_stack *st; 1573 1574 p = chunk->pos; 1575 1576 while (p < chunk->end) { 1577 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1578 /* Skip whitespaces */ 1579 ucl_chunk_skipc (chunk, p); 1580 } 1581 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1582 /* Skip comment */ 1583 if (!ucl_skip_comments (parser)) { 1584 return false; 1585 } 1586 /* Treat comment as a separator */ 1587 got_sep = true; 1588 p = chunk->pos; 1589 } 1590 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) { 1591 if (*p == '}' || *p == ']') { 1592 if (parser->stack == NULL) { 1593 ucl_set_err (parser, UCL_ESYNTAX, 1594 "end of array or object detected without corresponding start", 1595 &parser->err); 1596 return false; 1597 } 1598 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) || 1599 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) { 1600 1601 /* Pop all nested objects from a stack */ 1602 st = parser->stack; 1603 parser->stack = st->next; 1604 UCL_FREE (sizeof (struct ucl_stack), st); 1605 1606 while (parser->stack != NULL) { 1607 st = parser->stack; 1608 if (st->next == NULL || st->next->level == st->level) { 1609 break; 1610 } 1611 parser->stack = st->next; 1612 UCL_FREE (sizeof (struct ucl_stack), st); 1613 } 1614 } 1615 else { 1616 ucl_set_err (parser, UCL_ESYNTAX, 1617 "unexpected terminating symbol detected", 1618 &parser->err); 1619 return false; 1620 } 1621 1622 if (parser->stack == NULL) { 1623 /* Ignore everything after a top object */ 1624 return true; 1625 } 1626 else { 1627 ucl_chunk_skipc (chunk, p); 1628 } 1629 got_sep = true; 1630 } 1631 else { 1632 /* Got a separator */ 1633 got_sep = true; 1634 ucl_chunk_skipc (chunk, p); 1635 } 1636 } 1637 else { 1638 /* Anything else */ 1639 if (!got_sep) { 1640 ucl_set_err (parser, UCL_ESYNTAX, "delimiter is missing", 1641 &parser->err); 1642 return false; 1643 } 1644 return true; 1645 } 1646 } 1647 1648 return true; 1649 } 1650 1651 /** 1652 * Handle macro data 1653 * @param parser 1654 * @param chunk 1655 * @return 1656 */ 1657 static bool 1658 ucl_parse_macro_value (struct ucl_parser *parser, 1659 struct ucl_chunk *chunk, struct ucl_macro *macro, 1660 unsigned char const **macro_start, size_t *macro_len) 1661 { 1662 const unsigned char *p, *c; 1663 bool need_unescape = false, ucl_escape = false, var_expand = false; 1664 1665 p = chunk->pos; 1666 1667 switch (*p) { 1668 case '"': 1669 /* We have macro value encoded in quotes */ 1670 c = p; 1671 ucl_chunk_skipc (chunk, p); 1672 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1673 return false; 1674 } 1675 1676 *macro_start = c + 1; 1677 *macro_len = chunk->pos - c - 2; 1678 p = chunk->pos; 1679 break; 1680 case '{': 1681 /* We got a multiline macro body */ 1682 ucl_chunk_skipc (chunk, p); 1683 /* Skip spaces at the beginning */ 1684 while (p < chunk->end) { 1685 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1686 ucl_chunk_skipc (chunk, p); 1687 } 1688 else { 1689 break; 1690 } 1691 } 1692 c = p; 1693 while (p < chunk->end) { 1694 if (*p == '}') { 1695 break; 1696 } 1697 ucl_chunk_skipc (chunk, p); 1698 } 1699 *macro_start = c; 1700 *macro_len = p - c; 1701 ucl_chunk_skipc (chunk, p); 1702 break; 1703 default: 1704 /* Macro is not enclosed in quotes or braces */ 1705 c = p; 1706 while (p < chunk->end) { 1707 if (ucl_lex_is_atom_end (*p)) { 1708 break; 1709 } 1710 ucl_chunk_skipc (chunk, p); 1711 } 1712 *macro_start = c; 1713 *macro_len = p - c; 1714 break; 1715 } 1716 1717 /* We are at the end of a macro */ 1718 /* Skip ';' and space characters and return to previous state */ 1719 while (p < chunk->end) { 1720 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') { 1721 break; 1722 } 1723 ucl_chunk_skipc (chunk, p); 1724 } 1725 return true; 1726 } 1727 1728 /** 1729 * Parse macro arguments as UCL object 1730 * @param parser parser structure 1731 * @param chunk the current data chunk 1732 * @return 1733 */ 1734 static ucl_object_t * 1735 ucl_parse_macro_arguments (struct ucl_parser *parser, 1736 struct ucl_chunk *chunk) 1737 { 1738 ucl_object_t *res = NULL; 1739 struct ucl_parser *params_parser; 1740 int obraces = 1, ebraces = 0, state = 0; 1741 const unsigned char *p, *c; 1742 size_t args_len = 0; 1743 struct ucl_parser_saved_state saved; 1744 1745 saved.column = chunk->column; 1746 saved.line = chunk->line; 1747 saved.pos = chunk->pos; 1748 saved.remain = chunk->remain; 1749 p = chunk->pos; 1750 1751 if (*p != '(' || chunk->remain < 2) { 1752 return NULL; 1753 } 1754 1755 /* Set begin and start */ 1756 ucl_chunk_skipc (chunk, p); 1757 c = p; 1758 1759 while ((p) < (chunk)->end) { 1760 switch (state) { 1761 case 0: 1762 /* Parse symbols and check for '(', ')' and '"' */ 1763 if (*p == '(') { 1764 obraces ++; 1765 } 1766 else if (*p == ')') { 1767 ebraces ++; 1768 } 1769 else if (*p == '"') { 1770 state = 1; 1771 } 1772 /* Check pairing */ 1773 if (obraces == ebraces) { 1774 state = 99; 1775 } 1776 else { 1777 args_len ++; 1778 } 1779 /* Check overflow */ 1780 if (chunk->remain == 0) { 1781 goto restore_chunk; 1782 } 1783 ucl_chunk_skipc (chunk, p); 1784 break; 1785 case 1: 1786 /* We have quote character, so skip all but quotes */ 1787 if (*p == '"' && *(p - 1) != '\\') { 1788 state = 0; 1789 } 1790 if (chunk->remain == 0) { 1791 goto restore_chunk; 1792 } 1793 ucl_chunk_skipc (chunk, p); 1794 break; 1795 case 99: 1796 /* 1797 * We have read the full body of arguments, so we need to parse and set 1798 * object from that 1799 */ 1800 params_parser = ucl_parser_new (parser->flags); 1801 if (!ucl_parser_add_chunk (params_parser, c, args_len)) { 1802 ucl_set_err (parser, UCL_ESYNTAX, "macro arguments parsing error", 1803 &parser->err); 1804 } 1805 else { 1806 res = ucl_parser_get_object (params_parser); 1807 } 1808 ucl_parser_free (params_parser); 1809 1810 return res; 1811 1812 break; 1813 } 1814 } 1815 1816 return res; 1817 1818 restore_chunk: 1819 chunk->column = saved.column; 1820 chunk->line = saved.line; 1821 chunk->pos = saved.pos; 1822 chunk->remain = saved.remain; 1823 1824 return NULL; 1825 } 1826 1827 #define SKIP_SPACES_COMMENTS(parser, chunk, p) do { \ 1828 while ((p) < (chunk)->end) { \ 1829 if (!ucl_test_character (*(p), UCL_CHARACTER_WHITESPACE_UNSAFE)) { \ 1830 if ((chunk)->remain >= 2 && ucl_lex_is_comment ((p)[0], (p)[1])) { \ 1831 if (!ucl_skip_comments (parser)) { \ 1832 return false; \ 1833 } \ 1834 p = (chunk)->pos; \ 1835 } \ 1836 break; \ 1837 } \ 1838 ucl_chunk_skipc (chunk, p); \ 1839 } \ 1840 } while(0) 1841 1842 /** 1843 * Handle the main states of rcl parser 1844 * @param parser parser structure 1845 * @param data the pointer to the beginning of a chunk 1846 * @param len the length of a chunk 1847 * @return true if chunk has been parsed and false in case of error 1848 */ 1849 static bool 1850 ucl_state_machine (struct ucl_parser *parser) 1851 { 1852 ucl_object_t *obj, *macro_args; 1853 struct ucl_chunk *chunk = parser->chunks; 1854 const unsigned char *p, *c = NULL, *macro_start = NULL; 1855 unsigned char *macro_escaped; 1856 size_t macro_len = 0; 1857 struct ucl_macro *macro = NULL; 1858 bool next_key = false, end_of_object = false, ret; 1859 1860 if (parser->top_obj == NULL) { 1861 if (*chunk->pos == '[') { 1862 obj = ucl_add_parser_stack (NULL, parser, true, 0); 1863 } 1864 else { 1865 obj = ucl_add_parser_stack (NULL, parser, false, 0); 1866 } 1867 if (obj == NULL) { 1868 return false; 1869 } 1870 parser->top_obj = obj; 1871 parser->cur_obj = obj; 1872 parser->state = UCL_STATE_INIT; 1873 } 1874 1875 p = chunk->pos; 1876 while (chunk->pos < chunk->end) { 1877 switch (parser->state) { 1878 case UCL_STATE_INIT: 1879 /* 1880 * At the init state we can either go to the parse array or object 1881 * if we got [ or { correspondingly or can just treat new data as 1882 * a key of newly created object 1883 */ 1884 if (!ucl_skip_comments (parser)) { 1885 parser->prev_state = parser->state; 1886 parser->state = UCL_STATE_ERROR; 1887 return false; 1888 } 1889 else { 1890 /* Skip any spaces */ 1891 while (p < chunk->end && ucl_test_character (*p, 1892 UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1893 ucl_chunk_skipc (chunk, p); 1894 } 1895 p = chunk->pos; 1896 if (*p == '[') { 1897 parser->state = UCL_STATE_VALUE; 1898 ucl_chunk_skipc (chunk, p); 1899 } 1900 else { 1901 parser->state = UCL_STATE_KEY; 1902 if (*p == '{') { 1903 ucl_chunk_skipc (chunk, p); 1904 } 1905 } 1906 } 1907 break; 1908 case UCL_STATE_KEY: 1909 /* Skip any spaces */ 1910 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1911 ucl_chunk_skipc (chunk, p); 1912 } 1913 if (*p == '}') { 1914 /* We have the end of an object */ 1915 parser->state = UCL_STATE_AFTER_VALUE; 1916 continue; 1917 } 1918 if (parser->stack == NULL) { 1919 /* No objects are on stack, but we want to parse a key */ 1920 ucl_set_err (parser, UCL_ESYNTAX, "top object is finished but the parser " 1921 "expects a key", &parser->err); 1922 parser->prev_state = parser->state; 1923 parser->state = UCL_STATE_ERROR; 1924 return false; 1925 } 1926 if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) { 1927 parser->prev_state = parser->state; 1928 parser->state = UCL_STATE_ERROR; 1929 return false; 1930 } 1931 if (end_of_object) { 1932 p = chunk->pos; 1933 parser->state = UCL_STATE_AFTER_VALUE; 1934 continue; 1935 } 1936 else if (parser->state != UCL_STATE_MACRO_NAME) { 1937 if (next_key && parser->stack->obj->type == UCL_OBJECT) { 1938 /* Parse more keys and nest objects accordingly */ 1939 obj = ucl_add_parser_stack (parser->cur_obj, parser, false, 1940 parser->stack->level + 1); 1941 if (obj == NULL) { 1942 return false; 1943 } 1944 } 1945 else { 1946 parser->state = UCL_STATE_VALUE; 1947 } 1948 } 1949 else { 1950 c = chunk->pos; 1951 } 1952 p = chunk->pos; 1953 break; 1954 case UCL_STATE_VALUE: 1955 /* We need to check what we do have */ 1956 if (!ucl_parse_value (parser, chunk)) { 1957 parser->prev_state = parser->state; 1958 parser->state = UCL_STATE_ERROR; 1959 return false; 1960 } 1961 /* State is set in ucl_parse_value call */ 1962 p = chunk->pos; 1963 break; 1964 case UCL_STATE_AFTER_VALUE: 1965 if (!ucl_parse_after_value (parser, chunk)) { 1966 parser->prev_state = parser->state; 1967 parser->state = UCL_STATE_ERROR; 1968 return false; 1969 } 1970 if (parser->stack != NULL) { 1971 if (parser->stack->obj->type == UCL_OBJECT) { 1972 parser->state = UCL_STATE_KEY; 1973 } 1974 else { 1975 /* Array */ 1976 parser->state = UCL_STATE_VALUE; 1977 } 1978 } 1979 else { 1980 /* Skip everything at the end */ 1981 return true; 1982 } 1983 p = chunk->pos; 1984 break; 1985 case UCL_STATE_MACRO_NAME: 1986 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && 1987 *p != '(') { 1988 ucl_chunk_skipc (chunk, p); 1989 } 1990 else if (p - c > 0) { 1991 /* We got macro name */ 1992 macro_len = (size_t)(p - c); 1993 HASH_FIND (hh, parser->macroes, c, macro_len, macro); 1994 if (macro == NULL) { 1995 ucl_create_err (&parser->err, "error on line %d at column %d: " 1996 "unknown macro: '%.*s', character: '%c'", 1997 chunk->line, chunk->column, (int)(p - c), c, *chunk->pos); 1998 parser->state = UCL_STATE_ERROR; 1999 return false; 2000 } 2001 /* Now we need to skip all spaces */ 2002 SKIP_SPACES_COMMENTS(parser, chunk, p); 2003 parser->state = UCL_STATE_MACRO; 2004 } 2005 break; 2006 case UCL_STATE_MACRO: 2007 if (*chunk->pos == '(') { 2008 macro_args = ucl_parse_macro_arguments (parser, chunk); 2009 p = chunk->pos; 2010 if (macro_args) { 2011 SKIP_SPACES_COMMENTS(parser, chunk, p); 2012 } 2013 } 2014 else { 2015 macro_args = NULL; 2016 } 2017 if (!ucl_parse_macro_value (parser, chunk, macro, 2018 ¯o_start, ¯o_len)) { 2019 parser->prev_state = parser->state; 2020 parser->state = UCL_STATE_ERROR; 2021 return false; 2022 } 2023 macro_len = ucl_expand_variable (parser, ¯o_escaped, 2024 macro_start, macro_len); 2025 parser->state = parser->prev_state; 2026 if (macro_escaped == NULL) { 2027 ret = macro->handler (macro_start, macro_len, macro_args, 2028 macro->ud); 2029 } 2030 else { 2031 ret = macro->handler (macro_escaped, macro_len, macro_args, 2032 macro->ud); 2033 UCL_FREE (macro_len + 1, macro_escaped); 2034 } 2035 p = chunk->pos; 2036 if (macro_args) { 2037 ucl_object_unref (macro_args); 2038 } 2039 if (!ret) { 2040 return false; 2041 } 2042 break; 2043 default: 2044 /* TODO: add all states */ 2045 ucl_set_err (parser, UCL_EINTERNAL, 2046 "internal error: parser is in an unknown state", &parser->err); 2047 parser->state = UCL_STATE_ERROR; 2048 return false; 2049 } 2050 } 2051 2052 return true; 2053 } 2054 2055 struct ucl_parser* 2056 ucl_parser_new (int flags) 2057 { 2058 struct ucl_parser *new; 2059 2060 new = UCL_ALLOC (sizeof (struct ucl_parser)); 2061 if (new == NULL) { 2062 return NULL; 2063 } 2064 memset (new, 0, sizeof (struct ucl_parser)); 2065 2066 ucl_parser_register_macro (new, "include", ucl_include_handler, new); 2067 ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new); 2068 ucl_parser_register_macro (new, "includes", ucl_includes_handler, new); 2069 2070 new->flags = flags; 2071 2072 /* Initial assumption about filevars */ 2073 ucl_parser_set_filevars (new, NULL, false); 2074 2075 return new; 2076 } 2077 2078 2079 void 2080 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro, 2081 ucl_macro_handler handler, void* ud) 2082 { 2083 struct ucl_macro *new; 2084 2085 if (macro == NULL || handler == NULL) { 2086 return; 2087 } 2088 new = UCL_ALLOC (sizeof (struct ucl_macro)); 2089 if (new == NULL) { 2090 return; 2091 } 2092 memset (new, 0, sizeof (struct ucl_macro)); 2093 new->handler = handler; 2094 new->name = strdup (macro); 2095 new->ud = ud; 2096 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new); 2097 } 2098 2099 void 2100 ucl_parser_register_variable (struct ucl_parser *parser, const char *var, 2101 const char *value) 2102 { 2103 struct ucl_variable *new = NULL, *cur; 2104 2105 if (var == NULL) { 2106 return; 2107 } 2108 2109 /* Find whether a variable already exists */ 2110 LL_FOREACH (parser->variables, cur) { 2111 if (strcmp (cur->var, var) == 0) { 2112 new = cur; 2113 break; 2114 } 2115 } 2116 2117 if (value == NULL) { 2118 2119 if (new != NULL) { 2120 /* Remove variable */ 2121 DL_DELETE (parser->variables, new); 2122 free (new->var); 2123 free (new->value); 2124 UCL_FREE (sizeof (struct ucl_variable), new); 2125 } 2126 else { 2127 /* Do nothing */ 2128 return; 2129 } 2130 } 2131 else { 2132 if (new == NULL) { 2133 new = UCL_ALLOC (sizeof (struct ucl_variable)); 2134 if (new == NULL) { 2135 return; 2136 } 2137 memset (new, 0, sizeof (struct ucl_variable)); 2138 new->var = strdup (var); 2139 new->var_len = strlen (var); 2140 new->value = strdup (value); 2141 new->value_len = strlen (value); 2142 2143 DL_APPEND (parser->variables, new); 2144 } 2145 else { 2146 free (new->value); 2147 new->value = strdup (value); 2148 new->value_len = strlen (value); 2149 } 2150 } 2151 } 2152 2153 void 2154 ucl_parser_set_variables_handler (struct ucl_parser *parser, 2155 ucl_variable_handler handler, void *ud) 2156 { 2157 parser->var_handler = handler; 2158 parser->var_data = ud; 2159 } 2160 2161 bool 2162 ucl_parser_add_chunk_priority (struct ucl_parser *parser, const unsigned char *data, 2163 size_t len, unsigned priority) 2164 { 2165 struct ucl_chunk *chunk; 2166 2167 if (data == NULL) { 2168 ucl_create_err (&parser->err, "invalid chunk added"); 2169 return false; 2170 } 2171 if (len == 0) { 2172 parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority); 2173 return true; 2174 } 2175 if (parser->state != UCL_STATE_ERROR) { 2176 chunk = UCL_ALLOC (sizeof (struct ucl_chunk)); 2177 if (chunk == NULL) { 2178 ucl_create_err (&parser->err, "cannot allocate chunk structure"); 2179 return false; 2180 } 2181 chunk->begin = data; 2182 chunk->remain = len; 2183 chunk->pos = chunk->begin; 2184 chunk->end = chunk->begin + len; 2185 chunk->line = 1; 2186 chunk->column = 0; 2187 chunk->priority = priority; 2188 LL_PREPEND (parser->chunks, chunk); 2189 parser->recursion ++; 2190 if (parser->recursion > UCL_MAX_RECURSION) { 2191 ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d", 2192 parser->recursion); 2193 return false; 2194 } 2195 return ucl_state_machine (parser); 2196 } 2197 2198 ucl_create_err (&parser->err, "a parser is in an invalid state"); 2199 2200 return false; 2201 } 2202 2203 bool 2204 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data, 2205 size_t len) 2206 { 2207 return ucl_parser_add_chunk_priority (parser, data, len, 0); 2208 } 2209 2210 bool 2211 ucl_parser_add_string (struct ucl_parser *parser, const char *data, 2212 size_t len) 2213 { 2214 if (data == NULL) { 2215 ucl_create_err (&parser->err, "invalid string added"); 2216 return false; 2217 } 2218 if (len == 0) { 2219 len = strlen (data); 2220 } 2221 2222 return ucl_parser_add_chunk (parser, (const unsigned char *)data, len); 2223 } 2224