1 /* Copyright (c) 2013, Vsevolod Stakhov 2 * All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * * Redistributions of source code must retain the above copyright 7 * notice, this list of conditions and the following disclaimer. 8 * * Redistributions in binary form must reproduce the above copyright 9 * notice, this list of conditions and the following disclaimer in the 10 * documentation and/or other materials provided with the distribution. 11 * 12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY 13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY 16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 */ 23 24 #include "ucl.h" 25 #include "ucl_internal.h" 26 #include "ucl_chartable.h" 27 28 /** 29 * @file rcl_parser.c 30 * The implementation of rcl parser 31 */ 32 33 struct ucl_parser_saved_state { 34 unsigned int line; 35 unsigned int column; 36 size_t remain; 37 const unsigned char *pos; 38 }; 39 40 /** 41 * Move up to len characters 42 * @param parser 43 * @param begin 44 * @param len 45 * @return new position in chunk 46 */ 47 #define ucl_chunk_skipc(chunk, p) do{ \ 48 if (*(p) == '\n') { \ 49 (chunk)->line ++; \ 50 (chunk)->column = 0; \ 51 } \ 52 else (chunk)->column ++; \ 53 (p++); \ 54 (chunk)->pos ++; \ 55 (chunk)->remain --; \ 56 } while (0) 57 58 static inline void 59 ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err) 60 { 61 if (chunk->pos < chunk->end) { 62 if (isgraph (*chunk->pos)) { 63 ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'", 64 chunk->line, chunk->column, str, *chunk->pos); 65 } 66 else { 67 ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'", 68 chunk->line, chunk->column, str, (int)*chunk->pos); 69 } 70 } 71 else { 72 ucl_create_err (err, "error at the end of chunk: %s", str); 73 } 74 } 75 76 /** 77 * Skip all comments from the current pos resolving nested and multiline comments 78 * @param parser 79 * @return 80 */ 81 static bool 82 ucl_skip_comments (struct ucl_parser *parser) 83 { 84 struct ucl_chunk *chunk = parser->chunks; 85 const unsigned char *p; 86 int comments_nested = 0; 87 88 p = chunk->pos; 89 90 start: 91 if (*p == '#') { 92 if (parser->state != UCL_STATE_SCOMMENT && 93 parser->state != UCL_STATE_MCOMMENT) { 94 while (p < chunk->end) { 95 if (*p == '\n') { 96 ucl_chunk_skipc (chunk, p); 97 goto start; 98 } 99 ucl_chunk_skipc (chunk, p); 100 } 101 } 102 } 103 else if (*p == '/' && chunk->remain >= 2) { 104 if (p[1] == '*') { 105 ucl_chunk_skipc (chunk, p); 106 comments_nested ++; 107 ucl_chunk_skipc (chunk, p); 108 109 while (p < chunk->end) { 110 if (*p == '*') { 111 ucl_chunk_skipc (chunk, p); 112 if (*p == '/') { 113 comments_nested --; 114 if (comments_nested == 0) { 115 ucl_chunk_skipc (chunk, p); 116 goto start; 117 } 118 } 119 ucl_chunk_skipc (chunk, p); 120 } 121 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') { 122 comments_nested ++; 123 ucl_chunk_skipc (chunk, p); 124 ucl_chunk_skipc (chunk, p); 125 continue; 126 } 127 ucl_chunk_skipc (chunk, p); 128 } 129 if (comments_nested != 0) { 130 ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err); 131 return false; 132 } 133 } 134 } 135 136 return true; 137 } 138 139 /** 140 * Return multiplier for a character 141 * @param c multiplier character 142 * @param is_bytes if true use 1024 multiplier 143 * @return multiplier 144 */ 145 static inline unsigned long 146 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) { 147 const struct { 148 char c; 149 long mult_normal; 150 long mult_bytes; 151 } multipliers[] = { 152 {'m', 1000 * 1000, 1024 * 1024}, 153 {'k', 1000, 1024}, 154 {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024} 155 }; 156 int i; 157 158 for (i = 0; i < 3; i ++) { 159 if (tolower (c) == multipliers[i].c) { 160 if (is_bytes) { 161 return multipliers[i].mult_bytes; 162 } 163 return multipliers[i].mult_normal; 164 } 165 } 166 167 return 1; 168 } 169 170 171 /** 172 * Return multiplier for time scaling 173 * @param c 174 * @return 175 */ 176 static inline double 177 ucl_lex_time_multiplier (const unsigned char c) { 178 const struct { 179 char c; 180 double mult; 181 } multipliers[] = { 182 {'m', 60}, 183 {'h', 60 * 60}, 184 {'d', 60 * 60 * 24}, 185 {'w', 60 * 60 * 24 * 7}, 186 {'y', 60 * 60 * 24 * 7 * 365} 187 }; 188 int i; 189 190 for (i = 0; i < 5; i ++) { 191 if (tolower (c) == multipliers[i].c) { 192 return multipliers[i].mult; 193 } 194 } 195 196 return 1; 197 } 198 199 /** 200 * Return true if a character is a end of an atom 201 * @param c 202 * @return 203 */ 204 static inline bool 205 ucl_lex_is_atom_end (const unsigned char c) 206 { 207 return ucl_test_character (c, UCL_CHARACTER_VALUE_END); 208 } 209 210 static inline bool 211 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2) 212 { 213 if (c1 == '/') { 214 if (c2 == '*') { 215 return true; 216 } 217 } 218 else if (c1 == '#') { 219 return true; 220 } 221 return false; 222 } 223 224 /** 225 * Check variable found 226 * @param parser 227 * @param ptr 228 * @param remain 229 * @param out_len 230 * @param strict 231 * @param found 232 * @return 233 */ 234 static inline const char * 235 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain, 236 size_t *out_len, bool strict, bool *found) 237 { 238 struct ucl_variable *var; 239 unsigned char *dst; 240 size_t dstlen; 241 bool need_free = false; 242 243 LL_FOREACH (parser->variables, var) { 244 if (strict) { 245 if (remain == var->var_len) { 246 if (memcmp (ptr, var->var, var->var_len) == 0) { 247 *out_len += var->value_len; 248 *found = true; 249 return (ptr + var->var_len); 250 } 251 } 252 } 253 else { 254 if (remain >= var->var_len) { 255 if (memcmp (ptr, var->var, var->var_len) == 0) { 256 *out_len += var->value_len; 257 *found = true; 258 return (ptr + var->var_len); 259 } 260 } 261 } 262 } 263 264 /* XXX: can only handle ${VAR} */ 265 if (!(*found) && parser->var_handler != NULL && strict) { 266 /* Call generic handler */ 267 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free, 268 parser->var_data)) { 269 *found = true; 270 if (need_free) { 271 free (dst); 272 } 273 return (ptr + remain); 274 } 275 } 276 277 return ptr; 278 } 279 280 /** 281 * Check for a variable in a given string 282 * @param parser 283 * @param ptr 284 * @param remain 285 * @param out_len 286 * @param vars_found 287 * @return 288 */ 289 static const char * 290 ucl_check_variable (struct ucl_parser *parser, const char *ptr, 291 size_t remain, size_t *out_len, bool *vars_found) 292 { 293 const char *p, *end, *ret = ptr; 294 bool found = false; 295 296 if (*ptr == '{') { 297 /* We need to match the variable enclosed in braces */ 298 p = ptr + 1; 299 end = ptr + remain; 300 while (p < end) { 301 if (*p == '}') { 302 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, 303 out_len, true, &found); 304 if (found) { 305 /* {} must be excluded actually */ 306 ret ++; 307 if (!*vars_found) { 308 *vars_found = true; 309 } 310 } 311 else { 312 *out_len += 2; 313 } 314 break; 315 } 316 p ++; 317 } 318 } 319 else if (*ptr != '$') { 320 /* Not count escaped dollar sign */ 321 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found); 322 if (found && !*vars_found) { 323 *vars_found = true; 324 } 325 if (!found) { 326 (*out_len) ++; 327 } 328 } 329 else { 330 ret ++; 331 (*out_len) ++; 332 } 333 334 return ret; 335 } 336 337 /** 338 * Expand a single variable 339 * @param parser 340 * @param ptr 341 * @param remain 342 * @param dest 343 * @return 344 */ 345 static const char * 346 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr, 347 size_t remain, unsigned char **dest) 348 { 349 unsigned char *d = *dest, *dst; 350 const char *p = ptr + 1, *ret; 351 struct ucl_variable *var; 352 size_t dstlen; 353 bool need_free = false; 354 bool found = false; 355 bool strict = false; 356 357 ret = ptr + 1; 358 remain --; 359 360 if (*p == '$') { 361 *d++ = *p++; 362 *dest = d; 363 return p; 364 } 365 else if (*p == '{') { 366 p ++; 367 strict = true; 368 ret += 2; 369 remain -= 2; 370 } 371 372 LL_FOREACH (parser->variables, var) { 373 if (remain >= var->var_len) { 374 if (memcmp (p, var->var, var->var_len) == 0) { 375 memcpy (d, var->value, var->value_len); 376 ret += var->var_len; 377 d += var->value_len; 378 found = true; 379 break; 380 } 381 } 382 } 383 if (!found) { 384 if (strict && parser->var_handler != NULL) { 385 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free, 386 parser->var_data)) { 387 memcpy (d, dst, dstlen); 388 ret += dstlen; 389 d += remain; 390 found = true; 391 } 392 } 393 394 /* Leave variable as is */ 395 if (!found) { 396 memcpy (d, ptr, 2); 397 d += 2; 398 ret --; 399 } 400 } 401 402 *dest = d; 403 return ret; 404 } 405 406 /** 407 * Expand variables in string 408 * @param parser 409 * @param dst 410 * @param src 411 * @param in_len 412 * @return 413 */ 414 static ssize_t 415 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst, 416 const char *src, size_t in_len) 417 { 418 const char *p, *end = src + in_len; 419 unsigned char *d; 420 size_t out_len = 0; 421 bool vars_found = false; 422 423 p = src; 424 while (p != end) { 425 if (*p == '$') { 426 p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found); 427 } 428 else { 429 p ++; 430 out_len ++; 431 } 432 } 433 434 if (!vars_found) { 435 /* Trivial case */ 436 *dst = NULL; 437 return in_len; 438 } 439 440 *dst = UCL_ALLOC (out_len + 1); 441 if (*dst == NULL) { 442 return in_len; 443 } 444 445 d = *dst; 446 p = src; 447 while (p != end) { 448 if (*p == '$') { 449 p = ucl_expand_single_variable (parser, p, end - p, &d); 450 } 451 else { 452 *d++ = *p++; 453 } 454 } 455 456 *d = '\0'; 457 458 return out_len; 459 } 460 461 /** 462 * Store or copy pointer to the trash stack 463 * @param parser parser object 464 * @param src src string 465 * @param dst destination buffer (trash stack pointer) 466 * @param dst_const const destination pointer (e.g. value of object) 467 * @param in_len input length 468 * @param need_unescape need to unescape source (and copy it) 469 * @param need_lowercase need to lowercase value (and copy) 470 * @param need_expand need to expand variables (and copy as well) 471 * @return output length (excluding \0 symbol) 472 */ 473 static inline ssize_t 474 ucl_copy_or_store_ptr (struct ucl_parser *parser, 475 const unsigned char *src, unsigned char **dst, 476 const char **dst_const, size_t in_len, 477 bool need_unescape, bool need_lowercase, bool need_expand) 478 { 479 ssize_t ret = -1, tret; 480 unsigned char *tmp; 481 482 if (need_unescape || need_lowercase || 483 (need_expand && parser->variables != NULL) || 484 !(parser->flags & UCL_PARSER_ZEROCOPY)) { 485 /* Copy string */ 486 *dst = UCL_ALLOC (in_len + 1); 487 if (*dst == NULL) { 488 ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err); 489 return false; 490 } 491 if (need_lowercase) { 492 ret = ucl_strlcpy_tolower (*dst, src, in_len + 1); 493 } 494 else { 495 ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1); 496 } 497 498 if (need_unescape) { 499 ret = ucl_unescape_json_string (*dst, ret); 500 } 501 if (need_expand) { 502 tmp = *dst; 503 tret = ret; 504 ret = ucl_expand_variable (parser, dst, tmp, ret); 505 if (*dst == NULL) { 506 /* Nothing to expand */ 507 *dst = tmp; 508 ret = tret; 509 } 510 } 511 *dst_const = *dst; 512 } 513 else { 514 *dst_const = src; 515 ret = in_len; 516 } 517 518 return ret; 519 } 520 521 /** 522 * Create and append an object at the specified level 523 * @param parser 524 * @param is_array 525 * @param level 526 * @return 527 */ 528 static inline ucl_object_t * 529 ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level) 530 { 531 struct ucl_stack *st; 532 533 if (!is_array) { 534 if (obj == NULL) { 535 obj = ucl_object_typed_new (UCL_OBJECT); 536 } 537 else { 538 obj->type = UCL_OBJECT; 539 } 540 obj->value.ov = ucl_hash_create (); 541 parser->state = UCL_STATE_KEY; 542 } 543 else { 544 if (obj == NULL) { 545 obj = ucl_object_typed_new (UCL_ARRAY); 546 } 547 else { 548 obj->type = UCL_ARRAY; 549 } 550 parser->state = UCL_STATE_VALUE; 551 } 552 553 st = UCL_ALLOC (sizeof (struct ucl_stack)); 554 if (st == NULL) { 555 ucl_set_err (parser->chunks, 0, "cannot allocate memory for an object", &parser->err); 556 return NULL; 557 } 558 st->obj = obj; 559 st->level = level; 560 LL_PREPEND (parser->stack, st); 561 parser->cur_obj = obj; 562 563 return obj; 564 } 565 566 int 567 ucl_maybe_parse_number (ucl_object_t *obj, 568 const char *start, const char *end, const char **pos, 569 bool allow_double, bool number_bytes, bool allow_time) 570 { 571 const char *p = start, *c = start; 572 char *endptr; 573 bool got_dot = false, got_exp = false, need_double = false, 574 is_time = false, valid_start = false, is_hex = false, 575 is_neg = false; 576 double dv = 0; 577 int64_t lv = 0; 578 579 if (*p == '-') { 580 is_neg = true; 581 c ++; 582 p ++; 583 } 584 while (p < end) { 585 if (is_hex && isxdigit (*p)) { 586 p ++; 587 } 588 else if (isdigit (*p)) { 589 valid_start = true; 590 p ++; 591 } 592 else if (!is_hex && (*p == 'x' || *p == 'X')) { 593 is_hex = true; 594 allow_double = false; 595 c = p + 1; 596 } 597 else if (allow_double) { 598 if (p == c) { 599 /* Empty digits sequence, not a number */ 600 *pos = start; 601 return EINVAL; 602 } 603 else if (*p == '.') { 604 if (got_dot) { 605 /* Double dots, not a number */ 606 *pos = start; 607 return EINVAL; 608 } 609 else { 610 got_dot = true; 611 need_double = true; 612 p ++; 613 } 614 } 615 else if (*p == 'e' || *p == 'E') { 616 if (got_exp) { 617 /* Double exp, not a number */ 618 *pos = start; 619 return EINVAL; 620 } 621 else { 622 got_exp = true; 623 need_double = true; 624 p ++; 625 if (p >= end) { 626 *pos = start; 627 return EINVAL; 628 } 629 if (!isdigit (*p) && *p != '+' && *p != '-') { 630 /* Wrong exponent sign */ 631 *pos = start; 632 return EINVAL; 633 } 634 else { 635 p ++; 636 } 637 } 638 } 639 else { 640 /* Got the end of the number, need to check */ 641 break; 642 } 643 } 644 else { 645 break; 646 } 647 } 648 649 if (!valid_start) { 650 *pos = start; 651 return EINVAL; 652 } 653 654 errno = 0; 655 if (need_double) { 656 dv = strtod (c, &endptr); 657 } 658 else { 659 if (is_hex) { 660 lv = strtoimax (c, &endptr, 16); 661 } 662 else { 663 lv = strtoimax (c, &endptr, 10); 664 } 665 } 666 if (errno == ERANGE) { 667 *pos = start; 668 return ERANGE; 669 } 670 671 /* Now check endptr */ 672 if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0' || 673 ucl_test_character (*endptr, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 674 p = endptr; 675 goto set_obj; 676 } 677 678 if (endptr < end && endptr != start) { 679 p = endptr; 680 switch (*p) { 681 case 'm': 682 case 'M': 683 case 'g': 684 case 'G': 685 case 'k': 686 case 'K': 687 if (end - p >= 2) { 688 if (p[1] == 's' || p[1] == 'S') { 689 /* Milliseconds */ 690 if (!need_double) { 691 need_double = true; 692 dv = lv; 693 } 694 is_time = true; 695 if (p[0] == 'm' || p[0] == 'M') { 696 dv /= 1000.; 697 } 698 else { 699 dv *= ucl_lex_num_multiplier (*p, false); 700 } 701 p += 2; 702 goto set_obj; 703 } 704 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) { 705 /* Bytes */ 706 if (need_double) { 707 need_double = false; 708 lv = dv; 709 } 710 lv *= ucl_lex_num_multiplier (*p, true); 711 p += 2; 712 goto set_obj; 713 } 714 else if (ucl_lex_is_atom_end (p[1])) { 715 if (need_double) { 716 dv *= ucl_lex_num_multiplier (*p, false); 717 } 718 else { 719 lv *= ucl_lex_num_multiplier (*p, number_bytes); 720 } 721 p ++; 722 goto set_obj; 723 } 724 else if (allow_time && end - p >= 3) { 725 if (tolower (p[0]) == 'm' && 726 tolower (p[1]) == 'i' && 727 tolower (p[2]) == 'n') { 728 /* Minutes */ 729 if (!need_double) { 730 need_double = true; 731 dv = lv; 732 } 733 is_time = true; 734 dv *= 60.; 735 p += 3; 736 goto set_obj; 737 } 738 } 739 } 740 else { 741 if (need_double) { 742 dv *= ucl_lex_num_multiplier (*p, false); 743 } 744 else { 745 lv *= ucl_lex_num_multiplier (*p, number_bytes); 746 } 747 p ++; 748 goto set_obj; 749 } 750 break; 751 case 'S': 752 case 's': 753 if (allow_time && 754 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { 755 if (!need_double) { 756 need_double = true; 757 dv = lv; 758 } 759 p ++; 760 is_time = true; 761 goto set_obj; 762 } 763 break; 764 case 'h': 765 case 'H': 766 case 'd': 767 case 'D': 768 case 'w': 769 case 'W': 770 case 'Y': 771 case 'y': 772 if (allow_time && 773 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { 774 if (!need_double) { 775 need_double = true; 776 dv = lv; 777 } 778 is_time = true; 779 dv *= ucl_lex_time_multiplier (*p); 780 p ++; 781 goto set_obj; 782 } 783 break; 784 } 785 } 786 787 *pos = c; 788 return EINVAL; 789 790 set_obj: 791 if (allow_double && (need_double || is_time)) { 792 if (!is_time) { 793 obj->type = UCL_FLOAT; 794 } 795 else { 796 obj->type = UCL_TIME; 797 } 798 obj->value.dv = is_neg ? (-dv) : dv; 799 } 800 else { 801 obj->type = UCL_INT; 802 obj->value.iv = is_neg ? (-lv) : lv; 803 } 804 *pos = p; 805 return 0; 806 } 807 808 /** 809 * Parse possible number 810 * @param parser 811 * @param chunk 812 * @return true if a number has been parsed 813 */ 814 static bool 815 ucl_lex_number (struct ucl_parser *parser, 816 struct ucl_chunk *chunk, ucl_object_t *obj) 817 { 818 const unsigned char *pos; 819 int ret; 820 821 ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos, 822 true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0)); 823 824 if (ret == 0) { 825 chunk->remain -= pos - chunk->pos; 826 chunk->column += pos - chunk->pos; 827 chunk->pos = pos; 828 return true; 829 } 830 else if (ret == ERANGE) { 831 ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err); 832 } 833 834 return false; 835 } 836 837 /** 838 * Parse quoted string with possible escapes 839 * @param parser 840 * @param chunk 841 * @return true if a string has been parsed 842 */ 843 static bool 844 ucl_lex_json_string (struct ucl_parser *parser, 845 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand) 846 { 847 const unsigned char *p = chunk->pos; 848 unsigned char c; 849 int i; 850 851 while (p < chunk->end) { 852 c = *p; 853 if (c < 0x1F) { 854 /* Unmasked control character */ 855 if (c == '\n') { 856 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err); 857 } 858 else { 859 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err); 860 } 861 return false; 862 } 863 else if (c == '\\') { 864 ucl_chunk_skipc (chunk, p); 865 c = *p; 866 if (p >= chunk->end) { 867 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err); 868 return false; 869 } 870 else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) { 871 if (c == 'u') { 872 ucl_chunk_skipc (chunk, p); 873 for (i = 0; i < 4 && p < chunk->end; i ++) { 874 if (!isxdigit (*p)) { 875 ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err); 876 return false; 877 } 878 ucl_chunk_skipc (chunk, p); 879 } 880 if (p >= chunk->end) { 881 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err); 882 return false; 883 } 884 } 885 else { 886 ucl_chunk_skipc (chunk, p); 887 } 888 } 889 *need_unescape = true; 890 *ucl_escape = true; 891 continue; 892 } 893 else if (c == '"') { 894 ucl_chunk_skipc (chunk, p); 895 return true; 896 } 897 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) { 898 *ucl_escape = true; 899 } 900 else if (c == '$') { 901 *var_expand = true; 902 } 903 ucl_chunk_skipc (chunk, p); 904 } 905 906 ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err); 907 return false; 908 } 909 910 /** 911 * Parse a key in an object 912 * @param parser 913 * @param chunk 914 * @return true if a key has been parsed 915 */ 916 static bool 917 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object) 918 { 919 const unsigned char *p, *c = NULL, *end, *t; 920 const char *key = NULL; 921 bool got_quote = false, got_eq = false, got_semicolon = false, 922 need_unescape = false, ucl_escape = false, var_expand = false, 923 got_content = false, got_sep = false; 924 ucl_object_t *nobj, *tobj; 925 ucl_hash_t *container; 926 ssize_t keylen; 927 928 p = chunk->pos; 929 930 if (*p == '.') { 931 /* It is macro actually */ 932 ucl_chunk_skipc (chunk, p); 933 parser->prev_state = parser->state; 934 parser->state = UCL_STATE_MACRO_NAME; 935 return true; 936 } 937 while (p < chunk->end) { 938 /* 939 * A key must start with alpha, number, '/' or '_' and end with space character 940 */ 941 if (c == NULL) { 942 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 943 if (!ucl_skip_comments (parser)) { 944 return false; 945 } 946 p = chunk->pos; 947 } 948 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 949 ucl_chunk_skipc (chunk, p); 950 } 951 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) { 952 /* The first symbol */ 953 c = p; 954 ucl_chunk_skipc (chunk, p); 955 got_content = true; 956 } 957 else if (*p == '"') { 958 /* JSON style key */ 959 c = p + 1; 960 got_quote = true; 961 got_content = true; 962 ucl_chunk_skipc (chunk, p); 963 } 964 else if (*p == '}') { 965 /* We have actually end of an object */ 966 *end_of_object = true; 967 return true; 968 } 969 else if (*p == '.') { 970 ucl_chunk_skipc (chunk, p); 971 parser->prev_state = parser->state; 972 parser->state = UCL_STATE_MACRO_NAME; 973 return true; 974 } 975 else { 976 /* Invalid identifier */ 977 ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err); 978 return false; 979 } 980 } 981 else { 982 /* Parse the body of a key */ 983 if (!got_quote) { 984 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) { 985 got_content = true; 986 ucl_chunk_skipc (chunk, p); 987 } 988 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) { 989 end = p; 990 break; 991 } 992 else { 993 ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err); 994 return false; 995 } 996 } 997 else { 998 /* We need to parse json like quoted string */ 999 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1000 return false; 1001 } 1002 /* Always escape keys obtained via json */ 1003 end = chunk->pos - 1; 1004 p = chunk->pos; 1005 break; 1006 } 1007 } 1008 } 1009 1010 if (p >= chunk->end && got_content) { 1011 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err); 1012 return false; 1013 } 1014 else if (!got_content) { 1015 return true; 1016 } 1017 *end_of_object = false; 1018 /* We are now at the end of the key, need to parse the rest */ 1019 while (p < chunk->end) { 1020 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1021 ucl_chunk_skipc (chunk, p); 1022 } 1023 else if (*p == '=') { 1024 if (!got_eq && !got_semicolon) { 1025 ucl_chunk_skipc (chunk, p); 1026 got_eq = true; 1027 } 1028 else { 1029 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err); 1030 return false; 1031 } 1032 } 1033 else if (*p == ':') { 1034 if (!got_eq && !got_semicolon) { 1035 ucl_chunk_skipc (chunk, p); 1036 got_semicolon = true; 1037 } 1038 else { 1039 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err); 1040 return false; 1041 } 1042 } 1043 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1044 /* Check for comment */ 1045 if (!ucl_skip_comments (parser)) { 1046 return false; 1047 } 1048 p = chunk->pos; 1049 } 1050 else { 1051 /* Start value */ 1052 break; 1053 } 1054 } 1055 1056 if (p >= chunk->end && got_content) { 1057 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err); 1058 return false; 1059 } 1060 1061 got_sep = got_semicolon || got_eq; 1062 1063 if (!got_sep) { 1064 /* 1065 * Maybe we have more keys nested, so search for termination character. 1066 * Possible choices: 1067 * 1) key1 key2 ... keyN [:=] value <- we treat that as error 1068 * 2) key1 ... keyN {} or [] <- we treat that as nested objects 1069 * 3) key1 value[;,\n] <- we treat that as linear object 1070 */ 1071 t = p; 1072 *next_key = false; 1073 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) { 1074 t ++; 1075 } 1076 /* Check first non-space character after a key */ 1077 if (*t != '{' && *t != '[') { 1078 while (t < chunk->end) { 1079 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') { 1080 break; 1081 } 1082 else if (*t == '{' || *t == '[') { 1083 *next_key = true; 1084 break; 1085 } 1086 t ++; 1087 } 1088 } 1089 } 1090 1091 /* Create a new object */ 1092 nobj = ucl_object_new (); 1093 keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY], 1094 &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false); 1095 if (keylen == -1) { 1096 ucl_object_unref (nobj); 1097 return false; 1098 } 1099 else if (keylen == 0) { 1100 ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err); 1101 ucl_object_unref (nobj); 1102 return false; 1103 } 1104 1105 container = parser->stack->obj->value.ov; 1106 nobj->key = key; 1107 nobj->keylen = keylen; 1108 tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj)); 1109 if (tobj == NULL) { 1110 container = ucl_hash_insert_object (container, nobj); 1111 nobj->prev = nobj; 1112 nobj->next = NULL; 1113 parser->stack->obj->len ++; 1114 } 1115 else { 1116 DL_APPEND (tobj, nobj); 1117 } 1118 1119 if (ucl_escape) { 1120 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE; 1121 } 1122 parser->stack->obj->value.ov = container; 1123 1124 parser->cur_obj = nobj; 1125 1126 return true; 1127 } 1128 1129 /** 1130 * Parse a cl string 1131 * @param parser 1132 * @param chunk 1133 * @return true if a key has been parsed 1134 */ 1135 static bool 1136 ucl_parse_string_value (struct ucl_parser *parser, 1137 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape) 1138 { 1139 const unsigned char *p; 1140 enum { 1141 UCL_BRACE_ROUND = 0, 1142 UCL_BRACE_SQUARE, 1143 UCL_BRACE_FIGURE 1144 }; 1145 int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}}; 1146 1147 p = chunk->pos; 1148 1149 while (p < chunk->end) { 1150 1151 /* Skip pairs of figure braces */ 1152 if (*p == '{') { 1153 braces[UCL_BRACE_FIGURE][0] ++; 1154 } 1155 else if (*p == '}') { 1156 braces[UCL_BRACE_FIGURE][1] ++; 1157 if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) { 1158 /* This is not a termination symbol, continue */ 1159 ucl_chunk_skipc (chunk, p); 1160 continue; 1161 } 1162 } 1163 /* Skip pairs of square braces */ 1164 else if (*p == '[') { 1165 braces[UCL_BRACE_SQUARE][0] ++; 1166 } 1167 else if (*p == ']') { 1168 braces[UCL_BRACE_SQUARE][1] ++; 1169 if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) { 1170 /* This is not a termination symbol, continue */ 1171 ucl_chunk_skipc (chunk, p); 1172 continue; 1173 } 1174 } 1175 else if (*p == '$') { 1176 *var_expand = true; 1177 } 1178 else if (*p == '\\') { 1179 *need_unescape = true; 1180 ucl_chunk_skipc (chunk, p); 1181 if (p < chunk->end) { 1182 ucl_chunk_skipc (chunk, p); 1183 } 1184 continue; 1185 } 1186 1187 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { 1188 break; 1189 } 1190 ucl_chunk_skipc (chunk, p); 1191 } 1192 1193 if (p >= chunk->end) { 1194 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err); 1195 return false; 1196 } 1197 1198 return true; 1199 } 1200 1201 /** 1202 * Parse multiline string ending with \n{term}\n 1203 * @param parser 1204 * @param chunk 1205 * @param term 1206 * @param term_len 1207 * @return size of multiline string or 0 in case of error 1208 */ 1209 static int 1210 ucl_parse_multiline_string (struct ucl_parser *parser, 1211 struct ucl_chunk *chunk, const unsigned char *term, 1212 int term_len, unsigned char const **beg, 1213 bool *var_expand) 1214 { 1215 const unsigned char *p, *c; 1216 bool newline = false; 1217 int len = 0; 1218 1219 p = chunk->pos; 1220 1221 c = p; 1222 1223 while (p < chunk->end) { 1224 if (newline) { 1225 if (chunk->end - p < term_len) { 1226 return 0; 1227 } 1228 else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) { 1229 len = p - c; 1230 chunk->remain -= term_len; 1231 chunk->pos = p + term_len; 1232 chunk->column = term_len; 1233 *beg = c; 1234 break; 1235 } 1236 } 1237 if (*p == '\n') { 1238 newline = true; 1239 } 1240 else { 1241 if (*p == '$') { 1242 *var_expand = true; 1243 } 1244 newline = false; 1245 } 1246 ucl_chunk_skipc (chunk, p); 1247 } 1248 1249 return len; 1250 } 1251 1252 static ucl_object_t* 1253 ucl_get_value_object (struct ucl_parser *parser) 1254 { 1255 ucl_object_t *t, *obj = NULL; 1256 1257 if (parser->stack->obj->type == UCL_ARRAY) { 1258 /* Object must be allocated */ 1259 obj = ucl_object_new (); 1260 t = parser->stack->obj->value.av; 1261 DL_APPEND (t, obj); 1262 parser->cur_obj = obj; 1263 parser->stack->obj->value.av = t; 1264 parser->stack->obj->len ++; 1265 } 1266 else { 1267 /* Object has been already allocated */ 1268 obj = parser->cur_obj; 1269 } 1270 1271 return obj; 1272 } 1273 1274 /** 1275 * Handle value data 1276 * @param parser 1277 * @param chunk 1278 * @return 1279 */ 1280 static bool 1281 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) 1282 { 1283 const unsigned char *p, *c; 1284 ucl_object_t *obj = NULL; 1285 unsigned int stripped_spaces; 1286 int str_len; 1287 bool need_unescape = false, ucl_escape = false, var_expand = false; 1288 1289 p = chunk->pos; 1290 1291 /* Skip any spaces and comments */ 1292 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) || 1293 (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { 1294 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1295 ucl_chunk_skipc (chunk, p); 1296 } 1297 if (!ucl_skip_comments (parser)) { 1298 return false; 1299 } 1300 p = chunk->pos; 1301 } 1302 1303 while (p < chunk->end) { 1304 c = p; 1305 switch (*p) { 1306 case '"': 1307 obj = ucl_get_value_object (parser); 1308 ucl_chunk_skipc (chunk, p); 1309 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1310 return false; 1311 } 1312 str_len = chunk->pos - c - 2; 1313 obj->type = UCL_STRING; 1314 if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE], 1315 &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) { 1316 return false; 1317 } 1318 obj->len = str_len; 1319 parser->state = UCL_STATE_AFTER_VALUE; 1320 p = chunk->pos; 1321 return true; 1322 break; 1323 case '{': 1324 obj = ucl_get_value_object (parser); 1325 /* We have a new object */ 1326 obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level); 1327 if (obj == NULL) { 1328 return false; 1329 } 1330 1331 ucl_chunk_skipc (chunk, p); 1332 return true; 1333 break; 1334 case '[': 1335 obj = ucl_get_value_object (parser); 1336 /* We have a new array */ 1337 obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level); 1338 if (obj == NULL) { 1339 return false; 1340 } 1341 1342 ucl_chunk_skipc (chunk, p); 1343 return true; 1344 break; 1345 case ']': 1346 /* We have the array ending */ 1347 if (parser->stack && parser->stack->obj->type == UCL_ARRAY) { 1348 parser->state = UCL_STATE_AFTER_VALUE; 1349 return true; 1350 } 1351 else { 1352 goto parse_string; 1353 } 1354 break; 1355 case '<': 1356 obj = ucl_get_value_object (parser); 1357 /* We have something like multiline value, which must be <<[A-Z]+\n */ 1358 if (chunk->end - p > 3) { 1359 if (memcmp (p, "<<", 2) == 0) { 1360 p += 2; 1361 /* We allow only uppercase characters in multiline definitions */ 1362 while (p < chunk->end && *p >= 'A' && *p <= 'Z') { 1363 p ++; 1364 } 1365 if (*p =='\n') { 1366 /* Set chunk positions and start multiline parsing */ 1367 c += 2; 1368 chunk->remain -= p - c; 1369 chunk->pos = p + 1; 1370 chunk->column = 0; 1371 chunk->line ++; 1372 if ((str_len = ucl_parse_multiline_string (parser, chunk, c, 1373 p - c, &c, &var_expand)) == 0) { 1374 ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err); 1375 return false; 1376 } 1377 obj->type = UCL_STRING; 1378 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE], 1379 &obj->value.sv, str_len - 1, false, false, var_expand)) == -1) { 1380 return false; 1381 } 1382 obj->len = str_len; 1383 parser->state = UCL_STATE_AFTER_VALUE; 1384 return true; 1385 } 1386 } 1387 } 1388 /* Fallback to ordinary strings */ 1389 default: 1390 parse_string: 1391 if (obj == NULL) { 1392 obj = ucl_get_value_object (parser); 1393 } 1394 /* Parse atom */ 1395 if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) { 1396 if (!ucl_lex_number (parser, chunk, obj)) { 1397 if (parser->state == UCL_STATE_ERROR) { 1398 return false; 1399 } 1400 } 1401 else { 1402 parser->state = UCL_STATE_AFTER_VALUE; 1403 return true; 1404 } 1405 /* Fallback to normal string */ 1406 } 1407 1408 if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) { 1409 return false; 1410 } 1411 /* Cut trailing spaces */ 1412 stripped_spaces = 0; 1413 while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces), 1414 UCL_CHARACTER_WHITESPACE)) { 1415 stripped_spaces ++; 1416 } 1417 str_len = chunk->pos - c - stripped_spaces; 1418 if (str_len <= 0) { 1419 ucl_set_err (chunk, 0, "string value must not be empty", &parser->err); 1420 return false; 1421 } 1422 else if (str_len == 4 && memcmp (c, "null", 4) == 0) { 1423 obj->len = 0; 1424 obj->type = UCL_NULL; 1425 } 1426 else if (!ucl_maybe_parse_boolean (obj, c, str_len)) { 1427 obj->type = UCL_STRING; 1428 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE], 1429 &obj->value.sv, str_len, need_unescape, 1430 false, var_expand)) == -1) { 1431 return false; 1432 } 1433 obj->len = str_len; 1434 } 1435 parser->state = UCL_STATE_AFTER_VALUE; 1436 p = chunk->pos; 1437 1438 return true; 1439 break; 1440 } 1441 } 1442 1443 return true; 1444 } 1445 1446 /** 1447 * Handle after value data 1448 * @param parser 1449 * @param chunk 1450 * @return 1451 */ 1452 static bool 1453 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk) 1454 { 1455 const unsigned char *p; 1456 bool got_sep = false; 1457 struct ucl_stack *st; 1458 1459 p = chunk->pos; 1460 1461 while (p < chunk->end) { 1462 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1463 /* Skip whitespaces */ 1464 ucl_chunk_skipc (chunk, p); 1465 } 1466 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1467 /* Skip comment */ 1468 if (!ucl_skip_comments (parser)) { 1469 return false; 1470 } 1471 /* Treat comment as a separator */ 1472 got_sep = true; 1473 p = chunk->pos; 1474 } 1475 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) { 1476 if (*p == '}' || *p == ']') { 1477 if (parser->stack == NULL) { 1478 ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err); 1479 return false; 1480 } 1481 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) || 1482 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) { 1483 1484 /* Pop all nested objects from a stack */ 1485 st = parser->stack; 1486 parser->stack = st->next; 1487 UCL_FREE (sizeof (struct ucl_stack), st); 1488 1489 while (parser->stack != NULL) { 1490 st = parser->stack; 1491 if (st->next == NULL || st->next->level == st->level) { 1492 break; 1493 } 1494 parser->stack = st->next; 1495 UCL_FREE (sizeof (struct ucl_stack), st); 1496 } 1497 } 1498 else { 1499 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err); 1500 return false; 1501 } 1502 1503 if (parser->stack == NULL) { 1504 /* Ignore everything after a top object */ 1505 return true; 1506 } 1507 else { 1508 ucl_chunk_skipc (chunk, p); 1509 } 1510 got_sep = true; 1511 } 1512 else { 1513 /* Got a separator */ 1514 got_sep = true; 1515 ucl_chunk_skipc (chunk, p); 1516 } 1517 } 1518 else { 1519 /* Anything else */ 1520 if (!got_sep) { 1521 ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err); 1522 return false; 1523 } 1524 return true; 1525 } 1526 } 1527 1528 return true; 1529 } 1530 1531 /** 1532 * Handle macro data 1533 * @param parser 1534 * @param chunk 1535 * @return 1536 */ 1537 static bool 1538 ucl_parse_macro_value (struct ucl_parser *parser, 1539 struct ucl_chunk *chunk, struct ucl_macro *macro, 1540 unsigned char const **macro_start, size_t *macro_len) 1541 { 1542 const unsigned char *p, *c; 1543 bool need_unescape = false, ucl_escape = false, var_expand = false; 1544 1545 p = chunk->pos; 1546 1547 switch (*p) { 1548 case '"': 1549 /* We have macro value encoded in quotes */ 1550 c = p; 1551 ucl_chunk_skipc (chunk, p); 1552 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1553 return false; 1554 } 1555 1556 *macro_start = c + 1; 1557 *macro_len = chunk->pos - c - 2; 1558 p = chunk->pos; 1559 break; 1560 case '{': 1561 /* We got a multiline macro body */ 1562 ucl_chunk_skipc (chunk, p); 1563 /* Skip spaces at the beginning */ 1564 while (p < chunk->end) { 1565 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1566 ucl_chunk_skipc (chunk, p); 1567 } 1568 else { 1569 break; 1570 } 1571 } 1572 c = p; 1573 while (p < chunk->end) { 1574 if (*p == '}') { 1575 break; 1576 } 1577 ucl_chunk_skipc (chunk, p); 1578 } 1579 *macro_start = c; 1580 *macro_len = p - c; 1581 ucl_chunk_skipc (chunk, p); 1582 break; 1583 default: 1584 /* Macro is not enclosed in quotes or braces */ 1585 c = p; 1586 while (p < chunk->end) { 1587 if (ucl_lex_is_atom_end (*p)) { 1588 break; 1589 } 1590 ucl_chunk_skipc (chunk, p); 1591 } 1592 *macro_start = c; 1593 *macro_len = p - c; 1594 break; 1595 } 1596 1597 /* We are at the end of a macro */ 1598 /* Skip ';' and space characters and return to previous state */ 1599 while (p < chunk->end) { 1600 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') { 1601 break; 1602 } 1603 ucl_chunk_skipc (chunk, p); 1604 } 1605 return true; 1606 } 1607 1608 /** 1609 * Handle the main states of rcl parser 1610 * @param parser parser structure 1611 * @param data the pointer to the beginning of a chunk 1612 * @param len the length of a chunk 1613 * @return true if chunk has been parsed and false in case of error 1614 */ 1615 static bool 1616 ucl_state_machine (struct ucl_parser *parser) 1617 { 1618 ucl_object_t *obj; 1619 struct ucl_chunk *chunk = parser->chunks; 1620 const unsigned char *p, *c = NULL, *macro_start = NULL; 1621 unsigned char *macro_escaped; 1622 size_t macro_len = 0; 1623 struct ucl_macro *macro = NULL; 1624 bool next_key = false, end_of_object = false; 1625 1626 if (parser->top_obj == NULL) { 1627 if (*chunk->pos == '[') { 1628 obj = ucl_add_parser_stack (NULL, parser, true, 0); 1629 } 1630 else { 1631 obj = ucl_add_parser_stack (NULL, parser, false, 0); 1632 } 1633 if (obj == NULL) { 1634 return false; 1635 } 1636 parser->top_obj = obj; 1637 parser->cur_obj = obj; 1638 parser->state = UCL_STATE_INIT; 1639 } 1640 1641 p = chunk->pos; 1642 while (chunk->pos < chunk->end) { 1643 switch (parser->state) { 1644 case UCL_STATE_INIT: 1645 /* 1646 * At the init state we can either go to the parse array or object 1647 * if we got [ or { correspondingly or can just treat new data as 1648 * a key of newly created object 1649 */ 1650 obj = parser->cur_obj; 1651 if (!ucl_skip_comments (parser)) { 1652 parser->prev_state = parser->state; 1653 parser->state = UCL_STATE_ERROR; 1654 return false; 1655 } 1656 else { 1657 p = chunk->pos; 1658 if (*p == '[') { 1659 parser->state = UCL_STATE_VALUE; 1660 ucl_chunk_skipc (chunk, p); 1661 } 1662 else { 1663 parser->state = UCL_STATE_KEY; 1664 if (*p == '{') { 1665 ucl_chunk_skipc (chunk, p); 1666 } 1667 } 1668 } 1669 break; 1670 case UCL_STATE_KEY: 1671 /* Skip any spaces */ 1672 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1673 ucl_chunk_skipc (chunk, p); 1674 } 1675 if (*p == '}') { 1676 /* We have the end of an object */ 1677 parser->state = UCL_STATE_AFTER_VALUE; 1678 continue; 1679 } 1680 if (parser->stack == NULL) { 1681 /* No objects are on stack, but we want to parse a key */ 1682 ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser " 1683 "expects a key", &parser->err); 1684 parser->prev_state = parser->state; 1685 parser->state = UCL_STATE_ERROR; 1686 return false; 1687 } 1688 if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) { 1689 parser->prev_state = parser->state; 1690 parser->state = UCL_STATE_ERROR; 1691 return false; 1692 } 1693 if (end_of_object) { 1694 p = chunk->pos; 1695 parser->state = UCL_STATE_AFTER_VALUE; 1696 continue; 1697 } 1698 else if (parser->state != UCL_STATE_MACRO_NAME) { 1699 if (next_key && parser->stack->obj->type == UCL_OBJECT) { 1700 /* Parse more keys and nest objects accordingly */ 1701 obj = ucl_add_parser_stack (parser->cur_obj, parser, false, 1702 parser->stack->level + 1); 1703 if (obj == NULL) { 1704 return false; 1705 } 1706 } 1707 else { 1708 parser->state = UCL_STATE_VALUE; 1709 } 1710 } 1711 else { 1712 c = chunk->pos; 1713 } 1714 p = chunk->pos; 1715 break; 1716 case UCL_STATE_VALUE: 1717 /* We need to check what we do have */ 1718 if (!ucl_parse_value (parser, chunk)) { 1719 parser->prev_state = parser->state; 1720 parser->state = UCL_STATE_ERROR; 1721 return false; 1722 } 1723 /* State is set in ucl_parse_value call */ 1724 p = chunk->pos; 1725 break; 1726 case UCL_STATE_AFTER_VALUE: 1727 if (!ucl_parse_after_value (parser, chunk)) { 1728 parser->prev_state = parser->state; 1729 parser->state = UCL_STATE_ERROR; 1730 return false; 1731 } 1732 if (parser->stack != NULL) { 1733 if (parser->stack->obj->type == UCL_OBJECT) { 1734 parser->state = UCL_STATE_KEY; 1735 } 1736 else { 1737 /* Array */ 1738 parser->state = UCL_STATE_VALUE; 1739 } 1740 } 1741 else { 1742 /* Skip everything at the end */ 1743 return true; 1744 } 1745 p = chunk->pos; 1746 break; 1747 case UCL_STATE_MACRO_NAME: 1748 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1749 ucl_chunk_skipc (chunk, p); 1750 } 1751 else if (p - c > 0) { 1752 /* We got macro name */ 1753 macro_len = (size_t)(p - c); 1754 HASH_FIND (hh, parser->macroes, c, macro_len, macro); 1755 if (macro == NULL) { 1756 ucl_create_err (&parser->err, "error on line %d at column %d: " 1757 "unknown macro: '%.*s', character: '%c'", 1758 chunk->line, chunk->column, (int)(p - c), c, *chunk->pos); 1759 parser->state = UCL_STATE_ERROR; 1760 return false; 1761 } 1762 /* Now we need to skip all spaces */ 1763 while (p < chunk->end) { 1764 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1765 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1766 /* Skip comment */ 1767 if (!ucl_skip_comments (parser)) { 1768 return false; 1769 } 1770 p = chunk->pos; 1771 } 1772 break; 1773 } 1774 ucl_chunk_skipc (chunk, p); 1775 } 1776 parser->state = UCL_STATE_MACRO; 1777 } 1778 break; 1779 case UCL_STATE_MACRO: 1780 if (!ucl_parse_macro_value (parser, chunk, macro, 1781 ¯o_start, ¯o_len)) { 1782 parser->prev_state = parser->state; 1783 parser->state = UCL_STATE_ERROR; 1784 return false; 1785 } 1786 macro_len = ucl_expand_variable (parser, ¯o_escaped, macro_start, macro_len); 1787 parser->state = parser->prev_state; 1788 if (macro_escaped == NULL) { 1789 if (!macro->handler (macro_start, macro_len, macro->ud)) { 1790 return false; 1791 } 1792 } 1793 else { 1794 if (!macro->handler (macro_escaped, macro_len, macro->ud)) { 1795 UCL_FREE (macro_len + 1, macro_escaped); 1796 return false; 1797 } 1798 UCL_FREE (macro_len + 1, macro_escaped); 1799 } 1800 p = chunk->pos; 1801 break; 1802 default: 1803 /* TODO: add all states */ 1804 ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err); 1805 parser->state = UCL_STATE_ERROR; 1806 return false; 1807 } 1808 } 1809 1810 return true; 1811 } 1812 1813 struct ucl_parser* 1814 ucl_parser_new (int flags) 1815 { 1816 struct ucl_parser *new; 1817 1818 new = UCL_ALLOC (sizeof (struct ucl_parser)); 1819 if (new == NULL) { 1820 return NULL; 1821 } 1822 memset (new, 0, sizeof (struct ucl_parser)); 1823 1824 ucl_parser_register_macro (new, "include", ucl_include_handler, new); 1825 ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new); 1826 ucl_parser_register_macro (new, "includes", ucl_includes_handler, new); 1827 1828 new->flags = flags; 1829 1830 /* Initial assumption about filevars */ 1831 ucl_parser_set_filevars (new, NULL, false); 1832 1833 return new; 1834 } 1835 1836 1837 void 1838 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro, 1839 ucl_macro_handler handler, void* ud) 1840 { 1841 struct ucl_macro *new; 1842 1843 if (macro == NULL || handler == NULL) { 1844 return; 1845 } 1846 new = UCL_ALLOC (sizeof (struct ucl_macro)); 1847 if (new == NULL) { 1848 return; 1849 } 1850 memset (new, 0, sizeof (struct ucl_macro)); 1851 new->handler = handler; 1852 new->name = strdup (macro); 1853 new->ud = ud; 1854 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new); 1855 } 1856 1857 void 1858 ucl_parser_register_variable (struct ucl_parser *parser, const char *var, 1859 const char *value) 1860 { 1861 struct ucl_variable *new = NULL, *cur; 1862 1863 if (var == NULL) { 1864 return; 1865 } 1866 1867 /* Find whether a variable already exists */ 1868 LL_FOREACH (parser->variables, cur) { 1869 if (strcmp (cur->var, var) == 0) { 1870 new = cur; 1871 break; 1872 } 1873 } 1874 1875 if (value == NULL) { 1876 1877 if (new != NULL) { 1878 /* Remove variable */ 1879 LL_DELETE (parser->variables, new); 1880 free (new->var); 1881 free (new->value); 1882 UCL_FREE (sizeof (struct ucl_variable), new); 1883 } 1884 else { 1885 /* Do nothing */ 1886 return; 1887 } 1888 } 1889 else { 1890 if (new == NULL) { 1891 new = UCL_ALLOC (sizeof (struct ucl_variable)); 1892 if (new == NULL) { 1893 return; 1894 } 1895 memset (new, 0, sizeof (struct ucl_variable)); 1896 new->var = strdup (var); 1897 new->var_len = strlen (var); 1898 new->value = strdup (value); 1899 new->value_len = strlen (value); 1900 1901 LL_PREPEND (parser->variables, new); 1902 } 1903 else { 1904 free (new->value); 1905 new->value = strdup (value); 1906 new->value_len = strlen (value); 1907 } 1908 } 1909 } 1910 1911 void 1912 ucl_parser_set_variables_handler (struct ucl_parser *parser, 1913 ucl_variable_handler handler, void *ud) 1914 { 1915 parser->var_handler = handler; 1916 parser->var_data = ud; 1917 } 1918 1919 bool 1920 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data, 1921 size_t len) 1922 { 1923 struct ucl_chunk *chunk; 1924 1925 if (data == NULL || len == 0) { 1926 ucl_create_err (&parser->err, "invalid chunk added"); 1927 return false; 1928 } 1929 if (parser->state != UCL_STATE_ERROR) { 1930 chunk = UCL_ALLOC (sizeof (struct ucl_chunk)); 1931 if (chunk == NULL) { 1932 ucl_create_err (&parser->err, "cannot allocate chunk structure"); 1933 return false; 1934 } 1935 chunk->begin = data; 1936 chunk->remain = len; 1937 chunk->pos = chunk->begin; 1938 chunk->end = chunk->begin + len; 1939 chunk->line = 1; 1940 chunk->column = 0; 1941 LL_PREPEND (parser->chunks, chunk); 1942 parser->recursion ++; 1943 if (parser->recursion > UCL_MAX_RECURSION) { 1944 ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d", 1945 parser->recursion); 1946 return false; 1947 } 1948 return ucl_state_machine (parser); 1949 } 1950 1951 ucl_create_err (&parser->err, "a parser is in an invalid state"); 1952 1953 return false; 1954 } 1955 1956 bool 1957 ucl_parser_add_string (struct ucl_parser *parser, const char *data, 1958 size_t len) 1959 { 1960 if (data == NULL) { 1961 ucl_create_err (&parser->err, "invalid string added"); 1962 return false; 1963 } 1964 if (len == 0) { 1965 len = strlen (data); 1966 } 1967 1968 return ucl_parser_add_chunk (parser, (const unsigned char *)data, len); 1969 } 1970