1 /* Copyright (c) 2013, Vsevolod Stakhov 2 * All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * * Redistributions of source code must retain the above copyright 7 * notice, this list of conditions and the following disclaimer. 8 * * Redistributions in binary form must reproduce the above copyright 9 * notice, this list of conditions and the following disclaimer in the 10 * documentation and/or other materials provided with the distribution. 11 * 12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY 13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY 16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 */ 23 24 #include "ucl.h" 25 #include "ucl_internal.h" 26 #include "ucl_chartable.h" 27 28 /** 29 * @file rcl_parser.c 30 * The implementation of rcl parser 31 */ 32 33 struct ucl_parser_saved_state { 34 unsigned int line; 35 unsigned int column; 36 size_t remain; 37 const unsigned char *pos; 38 }; 39 40 /** 41 * Move up to len characters 42 * @param parser 43 * @param begin 44 * @param len 45 * @return new position in chunk 46 */ 47 #define ucl_chunk_skipc(chunk, p) do{ \ 48 if (*(p) == '\n') { \ 49 (chunk)->line ++; \ 50 (chunk)->column = 0; \ 51 } \ 52 else (chunk)->column ++; \ 53 (p++); \ 54 (chunk)->pos ++; \ 55 (chunk)->remain --; \ 56 } while (0) 57 58 static inline void 59 ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err) 60 { 61 if (chunk->pos < chunk->end) { 62 if (isgraph (*chunk->pos)) { 63 ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'", 64 chunk->line, chunk->column, str, *chunk->pos); 65 } 66 else { 67 ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'", 68 chunk->line, chunk->column, str, (int)*chunk->pos); 69 } 70 } 71 else { 72 ucl_create_err (err, "error at the end of chunk: %s", str); 73 } 74 } 75 76 /** 77 * Skip all comments from the current pos resolving nested and multiline comments 78 * @param parser 79 * @return 80 */ 81 static bool 82 ucl_skip_comments (struct ucl_parser *parser) 83 { 84 struct ucl_chunk *chunk = parser->chunks; 85 const unsigned char *p; 86 int comments_nested = 0; 87 88 p = chunk->pos; 89 90 start: 91 if (*p == '#') { 92 if (parser->state != UCL_STATE_SCOMMENT && 93 parser->state != UCL_STATE_MCOMMENT) { 94 while (p < chunk->end) { 95 if (*p == '\n') { 96 ucl_chunk_skipc (chunk, p); 97 goto start; 98 } 99 ucl_chunk_skipc (chunk, p); 100 } 101 } 102 } 103 else if (*p == '/' && chunk->remain >= 2) { 104 if (p[1] == '*') { 105 ucl_chunk_skipc (chunk, p); 106 comments_nested ++; 107 ucl_chunk_skipc (chunk, p); 108 109 while (p < chunk->end) { 110 if (*p == '*') { 111 ucl_chunk_skipc (chunk, p); 112 if (*p == '/') { 113 comments_nested --; 114 if (comments_nested == 0) { 115 ucl_chunk_skipc (chunk, p); 116 goto start; 117 } 118 } 119 ucl_chunk_skipc (chunk, p); 120 } 121 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') { 122 comments_nested ++; 123 ucl_chunk_skipc (chunk, p); 124 ucl_chunk_skipc (chunk, p); 125 continue; 126 } 127 ucl_chunk_skipc (chunk, p); 128 } 129 if (comments_nested != 0) { 130 ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err); 131 return false; 132 } 133 } 134 } 135 136 return true; 137 } 138 139 /** 140 * Return multiplier for a character 141 * @param c multiplier character 142 * @param is_bytes if true use 1024 multiplier 143 * @return multiplier 144 */ 145 static inline unsigned long 146 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) { 147 const struct { 148 char c; 149 long mult_normal; 150 long mult_bytes; 151 } multipliers[] = { 152 {'m', 1000 * 1000, 1024 * 1024}, 153 {'k', 1000, 1024}, 154 {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024} 155 }; 156 int i; 157 158 for (i = 0; i < 3; i ++) { 159 if (tolower (c) == multipliers[i].c) { 160 if (is_bytes) { 161 return multipliers[i].mult_bytes; 162 } 163 return multipliers[i].mult_normal; 164 } 165 } 166 167 return 1; 168 } 169 170 171 /** 172 * Return multiplier for time scaling 173 * @param c 174 * @return 175 */ 176 static inline double 177 ucl_lex_time_multiplier (const unsigned char c) { 178 const struct { 179 char c; 180 double mult; 181 } multipliers[] = { 182 {'m', 60}, 183 {'h', 60 * 60}, 184 {'d', 60 * 60 * 24}, 185 {'w', 60 * 60 * 24 * 7}, 186 {'y', 60 * 60 * 24 * 7 * 365} 187 }; 188 int i; 189 190 for (i = 0; i < 5; i ++) { 191 if (tolower (c) == multipliers[i].c) { 192 return multipliers[i].mult; 193 } 194 } 195 196 return 1; 197 } 198 199 /** 200 * Return true if a character is a end of an atom 201 * @param c 202 * @return 203 */ 204 static inline bool 205 ucl_lex_is_atom_end (const unsigned char c) 206 { 207 return ucl_test_character (c, UCL_CHARACTER_VALUE_END); 208 } 209 210 static inline bool 211 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2) 212 { 213 if (c1 == '/') { 214 if (c2 == '*') { 215 return true; 216 } 217 } 218 else if (c1 == '#') { 219 return true; 220 } 221 return false; 222 } 223 224 /** 225 * Check variable found 226 * @param parser 227 * @param ptr 228 * @param remain 229 * @param out_len 230 * @param strict 231 * @param found 232 * @return 233 */ 234 static inline const char * 235 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain, 236 size_t *out_len, bool strict, bool *found) 237 { 238 struct ucl_variable *var; 239 240 LL_FOREACH (parser->variables, var) { 241 if (strict) { 242 if (remain == var->var_len) { 243 if (memcmp (ptr, var->var, var->var_len) == 0) { 244 *out_len += var->value_len; 245 *found = true; 246 return (ptr + var->var_len); 247 } 248 } 249 } 250 else { 251 if (remain >= var->var_len) { 252 if (memcmp (ptr, var->var, var->var_len) == 0) { 253 *out_len += var->value_len; 254 *found = true; 255 return (ptr + var->var_len); 256 } 257 } 258 } 259 } 260 261 return ptr; 262 } 263 264 /** 265 * Check for a variable in a given string 266 * @param parser 267 * @param ptr 268 * @param remain 269 * @param out_len 270 * @param vars_found 271 * @return 272 */ 273 static const char * 274 ucl_check_variable (struct ucl_parser *parser, const char *ptr, size_t remain, size_t *out_len, bool *vars_found) 275 { 276 const char *p, *end, *ret = ptr; 277 bool found = false; 278 279 if (*ptr == '{') { 280 /* We need to match the variable enclosed in braces */ 281 p = ptr + 1; 282 end = ptr + remain; 283 while (p < end) { 284 if (*p == '}') { 285 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, out_len, true, &found); 286 if (found) { 287 /* {} must be excluded actually */ 288 ret ++; 289 if (!*vars_found) { 290 *vars_found = true; 291 } 292 } 293 else { 294 *out_len += 2; 295 } 296 break; 297 } 298 p ++; 299 } 300 } 301 else if (*ptr != '$') { 302 /* Not count escaped dollar sign */ 303 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found); 304 if (found && !*vars_found) { 305 *vars_found = true; 306 } 307 if (!found) { 308 (*out_len) ++; 309 } 310 } 311 else { 312 ret ++; 313 (*out_len) ++; 314 } 315 316 return ret; 317 } 318 319 /** 320 * Expand a single variable 321 * @param parser 322 * @param ptr 323 * @param remain 324 * @param dest 325 * @return 326 */ 327 static const char * 328 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr, 329 size_t remain, unsigned char **dest) 330 { 331 unsigned char *d = *dest; 332 const char *p = ptr + 1, *ret; 333 struct ucl_variable *var; 334 bool found = false; 335 336 ret = ptr + 1; 337 remain --; 338 339 if (*p == '$') { 340 *d++ = *p++; 341 *dest = d; 342 return p; 343 } 344 else if (*p == '{') { 345 p ++; 346 ret += 2; 347 remain -= 2; 348 } 349 350 LL_FOREACH (parser->variables, var) { 351 if (remain >= var->var_len) { 352 if (memcmp (p, var->var, var->var_len) == 0) { 353 memcpy (d, var->value, var->value_len); 354 ret += var->var_len; 355 d += var->value_len; 356 found = true; 357 break; 358 } 359 } 360 } 361 if (!found) { 362 memcpy (d, ptr, 2); 363 d += 2; 364 ret --; 365 } 366 367 *dest = d; 368 return ret; 369 } 370 371 /** 372 * Expand variables in string 373 * @param parser 374 * @param dst 375 * @param src 376 * @param in_len 377 * @return 378 */ 379 static ssize_t 380 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst, 381 const char *src, size_t in_len) 382 { 383 const char *p, *end = src + in_len; 384 unsigned char *d; 385 size_t out_len = 0; 386 bool vars_found = false; 387 388 p = src; 389 while (p != end) { 390 if (*p == '$') { 391 p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found); 392 } 393 else { 394 p ++; 395 out_len ++; 396 } 397 } 398 399 if (!vars_found) { 400 /* Trivial case */ 401 *dst = NULL; 402 return in_len; 403 } 404 405 *dst = UCL_ALLOC (out_len + 1); 406 if (*dst == NULL) { 407 return in_len; 408 } 409 410 d = *dst; 411 p = src; 412 while (p != end) { 413 if (*p == '$') { 414 p = ucl_expand_single_variable (parser, p, end - p, &d); 415 } 416 else { 417 *d++ = *p++; 418 } 419 } 420 421 *d = '\0'; 422 423 return out_len; 424 } 425 426 /** 427 * Store or copy pointer to the trash stack 428 * @param parser parser object 429 * @param src src string 430 * @param dst destination buffer (trash stack pointer) 431 * @param dst_const const destination pointer (e.g. value of object) 432 * @param in_len input length 433 * @param need_unescape need to unescape source (and copy it) 434 * @param need_lowercase need to lowercase value (and copy) 435 * @param need_expand need to expand variables (and copy as well) 436 * @return output length (excluding \0 symbol) 437 */ 438 static inline ssize_t 439 ucl_copy_or_store_ptr (struct ucl_parser *parser, 440 const unsigned char *src, unsigned char **dst, 441 const char **dst_const, size_t in_len, 442 bool need_unescape, bool need_lowercase, bool need_expand) 443 { 444 ssize_t ret = -1, tret; 445 unsigned char *tmp; 446 447 if (need_unescape || need_lowercase || 448 (need_expand && parser->variables != NULL) || 449 !(parser->flags & UCL_PARSER_ZEROCOPY)) { 450 /* Copy string */ 451 *dst = UCL_ALLOC (in_len + 1); 452 if (*dst == NULL) { 453 ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err); 454 return false; 455 } 456 if (need_lowercase) { 457 ret = ucl_strlcpy_tolower (*dst, src, in_len + 1); 458 } 459 else { 460 ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1); 461 } 462 463 if (need_unescape) { 464 ret = ucl_unescape_json_string (*dst, ret); 465 } 466 if (need_expand) { 467 tmp = *dst; 468 tret = ret; 469 ret = ucl_expand_variable (parser, dst, tmp, ret); 470 if (*dst == NULL) { 471 /* Nothing to expand */ 472 *dst = tmp; 473 ret = tret; 474 } 475 } 476 *dst_const = *dst; 477 } 478 else { 479 *dst_const = src; 480 ret = in_len; 481 } 482 483 return ret; 484 } 485 486 /** 487 * Create and append an object at the specified level 488 * @param parser 489 * @param is_array 490 * @param level 491 * @return 492 */ 493 static inline ucl_object_t * 494 ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level) 495 { 496 struct ucl_stack *st; 497 498 if (!is_array) { 499 if (obj == NULL) { 500 obj = ucl_object_typed_new (UCL_OBJECT); 501 } 502 else { 503 obj->type = UCL_OBJECT; 504 } 505 obj->value.ov = ucl_hash_create (); 506 parser->state = UCL_STATE_KEY; 507 } 508 else { 509 if (obj == NULL) { 510 obj = ucl_object_typed_new (UCL_ARRAY); 511 } 512 else { 513 obj->type = UCL_ARRAY; 514 } 515 parser->state = UCL_STATE_VALUE; 516 } 517 518 st = UCL_ALLOC (sizeof (struct ucl_stack)); 519 if (st == NULL) { 520 ucl_set_err (parser->chunks, 0, "cannot allocate memory for an object", &parser->err); 521 return NULL; 522 } 523 st->obj = obj; 524 st->level = level; 525 LL_PREPEND (parser->stack, st); 526 parser->cur_obj = obj; 527 528 return obj; 529 } 530 531 int 532 ucl_maybe_parse_number (ucl_object_t *obj, 533 const char *start, const char *end, const char **pos, 534 bool allow_double, bool number_bytes, bool allow_time) 535 { 536 const char *p = start, *c = start; 537 char *endptr; 538 bool got_dot = false, got_exp = false, need_double = false, 539 is_time = false, valid_start = false, is_hex = false, 540 is_neg = false; 541 double dv = 0; 542 int64_t lv = 0; 543 544 if (*p == '-') { 545 is_neg = true; 546 c ++; 547 p ++; 548 } 549 while (p < end) { 550 if (is_hex && isxdigit (*p)) { 551 p ++; 552 } 553 else if (isdigit (*p)) { 554 valid_start = true; 555 p ++; 556 } 557 else if (!is_hex && (*p == 'x' || *p == 'X')) { 558 is_hex = true; 559 allow_double = false; 560 c = p + 1; 561 } 562 else if (allow_double) { 563 if (p == c) { 564 /* Empty digits sequence, not a number */ 565 *pos = start; 566 return EINVAL; 567 } 568 else if (*p == '.') { 569 if (got_dot) { 570 /* Double dots, not a number */ 571 *pos = start; 572 return EINVAL; 573 } 574 else { 575 got_dot = true; 576 need_double = true; 577 p ++; 578 } 579 } 580 else if (*p == 'e' || *p == 'E') { 581 if (got_exp) { 582 /* Double exp, not a number */ 583 *pos = start; 584 return EINVAL; 585 } 586 else { 587 got_exp = true; 588 need_double = true; 589 p ++; 590 if (p >= end) { 591 *pos = start; 592 return EINVAL; 593 } 594 if (!isdigit (*p) && *p != '+' && *p != '-') { 595 /* Wrong exponent sign */ 596 *pos = start; 597 return EINVAL; 598 } 599 else { 600 p ++; 601 } 602 } 603 } 604 else { 605 /* Got the end of the number, need to check */ 606 break; 607 } 608 } 609 else { 610 break; 611 } 612 } 613 614 if (!valid_start) { 615 *pos = start; 616 return EINVAL; 617 } 618 619 errno = 0; 620 if (need_double) { 621 dv = strtod (c, &endptr); 622 } 623 else { 624 if (is_hex) { 625 lv = strtoimax (c, &endptr, 16); 626 } 627 else { 628 lv = strtoimax (c, &endptr, 10); 629 } 630 } 631 if (errno == ERANGE) { 632 *pos = start; 633 return ERANGE; 634 } 635 636 /* Now check endptr */ 637 if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0' || 638 ucl_test_character (*endptr, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 639 p = endptr; 640 goto set_obj; 641 } 642 643 if (endptr < end && endptr != start) { 644 p = endptr; 645 switch (*p) { 646 case 'm': 647 case 'M': 648 case 'g': 649 case 'G': 650 case 'k': 651 case 'K': 652 if (end - p >= 2) { 653 if (p[1] == 's' || p[1] == 'S') { 654 /* Milliseconds */ 655 if (!need_double) { 656 need_double = true; 657 dv = lv; 658 } 659 is_time = true; 660 if (p[0] == 'm' || p[0] == 'M') { 661 dv /= 1000.; 662 } 663 else { 664 dv *= ucl_lex_num_multiplier (*p, false); 665 } 666 p += 2; 667 goto set_obj; 668 } 669 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) { 670 /* Bytes */ 671 if (need_double) { 672 need_double = false; 673 lv = dv; 674 } 675 lv *= ucl_lex_num_multiplier (*p, true); 676 p += 2; 677 goto set_obj; 678 } 679 else if (ucl_lex_is_atom_end (p[1])) { 680 if (need_double) { 681 dv *= ucl_lex_num_multiplier (*p, false); 682 } 683 else { 684 lv *= ucl_lex_num_multiplier (*p, number_bytes); 685 } 686 p ++; 687 goto set_obj; 688 } 689 else if (allow_time && end - p >= 3) { 690 if (tolower (p[0]) == 'm' && 691 tolower (p[1]) == 'i' && 692 tolower (p[2]) == 'n') { 693 /* Minutes */ 694 if (!need_double) { 695 need_double = true; 696 dv = lv; 697 } 698 is_time = true; 699 dv *= 60.; 700 p += 3; 701 goto set_obj; 702 } 703 } 704 } 705 else { 706 if (need_double) { 707 dv *= ucl_lex_num_multiplier (*p, false); 708 } 709 else { 710 lv *= ucl_lex_num_multiplier (*p, number_bytes); 711 } 712 p ++; 713 goto set_obj; 714 } 715 break; 716 case 'S': 717 case 's': 718 if (allow_time && 719 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { 720 if (!need_double) { 721 need_double = true; 722 dv = lv; 723 } 724 p ++; 725 is_time = true; 726 goto set_obj; 727 } 728 break; 729 case 'h': 730 case 'H': 731 case 'd': 732 case 'D': 733 case 'w': 734 case 'W': 735 case 'Y': 736 case 'y': 737 if (allow_time && 738 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { 739 if (!need_double) { 740 need_double = true; 741 dv = lv; 742 } 743 is_time = true; 744 dv *= ucl_lex_time_multiplier (*p); 745 p ++; 746 goto set_obj; 747 } 748 break; 749 } 750 } 751 752 *pos = c; 753 return EINVAL; 754 755 set_obj: 756 if (allow_double && (need_double || is_time)) { 757 if (!is_time) { 758 obj->type = UCL_FLOAT; 759 } 760 else { 761 obj->type = UCL_TIME; 762 } 763 obj->value.dv = is_neg ? (-dv) : dv; 764 } 765 else { 766 obj->type = UCL_INT; 767 obj->value.iv = is_neg ? (-lv) : lv; 768 } 769 *pos = p; 770 return 0; 771 } 772 773 /** 774 * Parse possible number 775 * @param parser 776 * @param chunk 777 * @return true if a number has been parsed 778 */ 779 static bool 780 ucl_lex_number (struct ucl_parser *parser, 781 struct ucl_chunk *chunk, ucl_object_t *obj) 782 { 783 const unsigned char *pos; 784 int ret; 785 786 ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos, 787 true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0)); 788 789 if (ret == 0) { 790 chunk->remain -= pos - chunk->pos; 791 chunk->column += pos - chunk->pos; 792 chunk->pos = pos; 793 return true; 794 } 795 else if (ret == ERANGE) { 796 ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err); 797 } 798 799 return false; 800 } 801 802 /** 803 * Parse quoted string with possible escapes 804 * @param parser 805 * @param chunk 806 * @return true if a string has been parsed 807 */ 808 static bool 809 ucl_lex_json_string (struct ucl_parser *parser, 810 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand) 811 { 812 const unsigned char *p = chunk->pos; 813 unsigned char c; 814 int i; 815 816 while (p < chunk->end) { 817 c = *p; 818 if (c < 0x1F) { 819 /* Unmasked control character */ 820 if (c == '\n') { 821 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err); 822 } 823 else { 824 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err); 825 } 826 return false; 827 } 828 else if (c == '\\') { 829 ucl_chunk_skipc (chunk, p); 830 c = *p; 831 if (p >= chunk->end) { 832 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err); 833 return false; 834 } 835 else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) { 836 if (c == 'u') { 837 ucl_chunk_skipc (chunk, p); 838 for (i = 0; i < 4 && p < chunk->end; i ++) { 839 if (!isxdigit (*p)) { 840 ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err); 841 return false; 842 } 843 ucl_chunk_skipc (chunk, p); 844 } 845 if (p >= chunk->end) { 846 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err); 847 return false; 848 } 849 } 850 else { 851 ucl_chunk_skipc (chunk, p); 852 } 853 } 854 *need_unescape = true; 855 *ucl_escape = true; 856 continue; 857 } 858 else if (c == '"') { 859 ucl_chunk_skipc (chunk, p); 860 return true; 861 } 862 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) { 863 *ucl_escape = true; 864 } 865 else if (c == '$') { 866 *var_expand = true; 867 } 868 ucl_chunk_skipc (chunk, p); 869 } 870 871 ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err); 872 return false; 873 } 874 875 /** 876 * Parse a key in an object 877 * @param parser 878 * @param chunk 879 * @return true if a key has been parsed 880 */ 881 static bool 882 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object) 883 { 884 const unsigned char *p, *c = NULL, *end, *t; 885 const char *key = NULL; 886 bool got_quote = false, got_eq = false, got_semicolon = false, 887 need_unescape = false, ucl_escape = false, var_expand = false, 888 got_content = false, got_sep = false; 889 ucl_object_t *nobj, *tobj; 890 ucl_hash_t *container; 891 ssize_t keylen; 892 893 p = chunk->pos; 894 895 if (*p == '.') { 896 /* It is macro actually */ 897 ucl_chunk_skipc (chunk, p); 898 parser->prev_state = parser->state; 899 parser->state = UCL_STATE_MACRO_NAME; 900 return true; 901 } 902 while (p < chunk->end) { 903 /* 904 * A key must start with alpha, number, '/' or '_' and end with space character 905 */ 906 if (c == NULL) { 907 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 908 if (!ucl_skip_comments (parser)) { 909 return false; 910 } 911 p = chunk->pos; 912 } 913 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 914 ucl_chunk_skipc (chunk, p); 915 } 916 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) { 917 /* The first symbol */ 918 c = p; 919 ucl_chunk_skipc (chunk, p); 920 got_content = true; 921 } 922 else if (*p == '"') { 923 /* JSON style key */ 924 c = p + 1; 925 got_quote = true; 926 got_content = true; 927 ucl_chunk_skipc (chunk, p); 928 } 929 else if (*p == '}') { 930 /* We have actually end of an object */ 931 *end_of_object = true; 932 return true; 933 } 934 else if (*p == '.') { 935 ucl_chunk_skipc (chunk, p); 936 parser->prev_state = parser->state; 937 parser->state = UCL_STATE_MACRO_NAME; 938 return true; 939 } 940 else { 941 /* Invalid identifier */ 942 ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err); 943 return false; 944 } 945 } 946 else { 947 /* Parse the body of a key */ 948 if (!got_quote) { 949 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) { 950 got_content = true; 951 ucl_chunk_skipc (chunk, p); 952 } 953 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) { 954 end = p; 955 break; 956 } 957 else { 958 ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err); 959 return false; 960 } 961 } 962 else { 963 /* We need to parse json like quoted string */ 964 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 965 return false; 966 } 967 /* Always escape keys obtained via json */ 968 end = chunk->pos - 1; 969 p = chunk->pos; 970 break; 971 } 972 } 973 } 974 975 if (p >= chunk->end && got_content) { 976 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err); 977 return false; 978 } 979 else if (!got_content) { 980 return true; 981 } 982 *end_of_object = false; 983 /* We are now at the end of the key, need to parse the rest */ 984 while (p < chunk->end) { 985 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 986 ucl_chunk_skipc (chunk, p); 987 } 988 else if (*p == '=') { 989 if (!got_eq && !got_semicolon) { 990 ucl_chunk_skipc (chunk, p); 991 got_eq = true; 992 } 993 else { 994 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err); 995 return false; 996 } 997 } 998 else if (*p == ':') { 999 if (!got_eq && !got_semicolon) { 1000 ucl_chunk_skipc (chunk, p); 1001 got_semicolon = true; 1002 } 1003 else { 1004 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err); 1005 return false; 1006 } 1007 } 1008 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1009 /* Check for comment */ 1010 if (!ucl_skip_comments (parser)) { 1011 return false; 1012 } 1013 p = chunk->pos; 1014 } 1015 else { 1016 /* Start value */ 1017 break; 1018 } 1019 } 1020 1021 if (p >= chunk->end && got_content) { 1022 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err); 1023 return false; 1024 } 1025 1026 got_sep = got_semicolon || got_eq; 1027 1028 if (!got_sep) { 1029 /* 1030 * Maybe we have more keys nested, so search for termination character. 1031 * Possible choices: 1032 * 1) key1 key2 ... keyN [:=] value <- we treat that as error 1033 * 2) key1 ... keyN {} or [] <- we treat that as nested objects 1034 * 3) key1 value[;,\n] <- we treat that as linear object 1035 */ 1036 t = p; 1037 *next_key = false; 1038 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) { 1039 t ++; 1040 } 1041 /* Check first non-space character after a key */ 1042 if (*t != '{' && *t != '[') { 1043 while (t < chunk->end) { 1044 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') { 1045 break; 1046 } 1047 else if (*t == '{' || *t == '[') { 1048 *next_key = true; 1049 break; 1050 } 1051 t ++; 1052 } 1053 } 1054 } 1055 1056 /* Create a new object */ 1057 nobj = ucl_object_new (); 1058 keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY], 1059 &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false); 1060 if (keylen == -1) { 1061 ucl_object_unref (nobj); 1062 return false; 1063 } 1064 else if (keylen == 0) { 1065 ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err); 1066 ucl_object_unref (nobj); 1067 return false; 1068 } 1069 1070 container = parser->stack->obj->value.ov; 1071 nobj->key = key; 1072 nobj->keylen = keylen; 1073 tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj)); 1074 if (tobj == NULL) { 1075 container = ucl_hash_insert_object (container, nobj); 1076 nobj->prev = nobj; 1077 nobj->next = NULL; 1078 parser->stack->obj->len ++; 1079 } 1080 else { 1081 DL_APPEND (tobj, nobj); 1082 } 1083 1084 if (ucl_escape) { 1085 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE; 1086 } 1087 parser->stack->obj->value.ov = container; 1088 1089 parser->cur_obj = nobj; 1090 1091 return true; 1092 } 1093 1094 /** 1095 * Parse a cl string 1096 * @param parser 1097 * @param chunk 1098 * @return true if a key has been parsed 1099 */ 1100 static bool 1101 ucl_parse_string_value (struct ucl_parser *parser, 1102 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape) 1103 { 1104 const unsigned char *p; 1105 enum { 1106 UCL_BRACE_ROUND = 0, 1107 UCL_BRACE_SQUARE, 1108 UCL_BRACE_FIGURE 1109 }; 1110 int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}}; 1111 1112 p = chunk->pos; 1113 1114 while (p < chunk->end) { 1115 1116 /* Skip pairs of figure braces */ 1117 if (*p == '{') { 1118 braces[UCL_BRACE_FIGURE][0] ++; 1119 } 1120 else if (*p == '}') { 1121 braces[UCL_BRACE_FIGURE][1] ++; 1122 if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) { 1123 /* This is not a termination symbol, continue */ 1124 ucl_chunk_skipc (chunk, p); 1125 continue; 1126 } 1127 } 1128 /* Skip pairs of square braces */ 1129 else if (*p == '[') { 1130 braces[UCL_BRACE_SQUARE][0] ++; 1131 } 1132 else if (*p == ']') { 1133 braces[UCL_BRACE_SQUARE][1] ++; 1134 if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) { 1135 /* This is not a termination symbol, continue */ 1136 ucl_chunk_skipc (chunk, p); 1137 continue; 1138 } 1139 } 1140 else if (*p == '$') { 1141 *var_expand = true; 1142 } 1143 else if (*p == '\\') { 1144 *need_unescape = true; 1145 ucl_chunk_skipc (chunk, p); 1146 if (p < chunk->end) { 1147 ucl_chunk_skipc (chunk, p); 1148 } 1149 continue; 1150 } 1151 1152 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { 1153 break; 1154 } 1155 ucl_chunk_skipc (chunk, p); 1156 } 1157 1158 if (p >= chunk->end) { 1159 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err); 1160 return false; 1161 } 1162 1163 return true; 1164 } 1165 1166 /** 1167 * Parse multiline string ending with \n{term}\n 1168 * @param parser 1169 * @param chunk 1170 * @param term 1171 * @param term_len 1172 * @return size of multiline string or 0 in case of error 1173 */ 1174 static int 1175 ucl_parse_multiline_string (struct ucl_parser *parser, 1176 struct ucl_chunk *chunk, const unsigned char *term, 1177 int term_len, unsigned char const **beg, 1178 bool *var_expand) 1179 { 1180 const unsigned char *p, *c; 1181 bool newline = false; 1182 int len = 0; 1183 1184 p = chunk->pos; 1185 1186 c = p; 1187 1188 while (p < chunk->end) { 1189 if (newline) { 1190 if (chunk->end - p < term_len) { 1191 return 0; 1192 } 1193 else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) { 1194 len = p - c; 1195 chunk->remain -= term_len; 1196 chunk->pos = p + term_len; 1197 chunk->column = term_len; 1198 *beg = c; 1199 break; 1200 } 1201 } 1202 if (*p == '\n') { 1203 newline = true; 1204 } 1205 else { 1206 if (*p == '$') { 1207 *var_expand = true; 1208 } 1209 newline = false; 1210 } 1211 ucl_chunk_skipc (chunk, p); 1212 } 1213 1214 return len; 1215 } 1216 1217 static ucl_object_t* 1218 ucl_get_value_object (struct ucl_parser *parser) 1219 { 1220 ucl_object_t *t, *obj = NULL; 1221 1222 if (parser->stack->obj->type == UCL_ARRAY) { 1223 /* Object must be allocated */ 1224 obj = ucl_object_new (); 1225 t = parser->stack->obj->value.av; 1226 DL_APPEND (t, obj); 1227 parser->cur_obj = obj; 1228 parser->stack->obj->value.av = t; 1229 parser->stack->obj->len ++; 1230 } 1231 else { 1232 /* Object has been already allocated */ 1233 obj = parser->cur_obj; 1234 } 1235 1236 return obj; 1237 } 1238 1239 /** 1240 * Handle value data 1241 * @param parser 1242 * @param chunk 1243 * @return 1244 */ 1245 static bool 1246 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) 1247 { 1248 const unsigned char *p, *c; 1249 ucl_object_t *obj = NULL; 1250 unsigned int stripped_spaces; 1251 int str_len; 1252 bool need_unescape = false, ucl_escape = false, var_expand = false; 1253 1254 p = chunk->pos; 1255 1256 /* Skip any spaces and comments */ 1257 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) || 1258 (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { 1259 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1260 ucl_chunk_skipc (chunk, p); 1261 } 1262 if (!ucl_skip_comments (parser)) { 1263 return false; 1264 } 1265 p = chunk->pos; 1266 } 1267 1268 while (p < chunk->end) { 1269 c = p; 1270 switch (*p) { 1271 case '"': 1272 obj = ucl_get_value_object (parser); 1273 ucl_chunk_skipc (chunk, p); 1274 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1275 return false; 1276 } 1277 str_len = chunk->pos - c - 2; 1278 obj->type = UCL_STRING; 1279 if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE], 1280 &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) { 1281 return false; 1282 } 1283 obj->len = str_len; 1284 parser->state = UCL_STATE_AFTER_VALUE; 1285 p = chunk->pos; 1286 return true; 1287 break; 1288 case '{': 1289 obj = ucl_get_value_object (parser); 1290 /* We have a new object */ 1291 obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level); 1292 if (obj == NULL) { 1293 return false; 1294 } 1295 1296 ucl_chunk_skipc (chunk, p); 1297 return true; 1298 break; 1299 case '[': 1300 obj = ucl_get_value_object (parser); 1301 /* We have a new array */ 1302 obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level); 1303 if (obj == NULL) { 1304 return false; 1305 } 1306 1307 ucl_chunk_skipc (chunk, p); 1308 return true; 1309 break; 1310 case ']': 1311 /* We have the array ending */ 1312 if (parser->stack && parser->stack->obj->type == UCL_ARRAY) { 1313 parser->state = UCL_STATE_AFTER_VALUE; 1314 return true; 1315 } 1316 else { 1317 goto parse_string; 1318 } 1319 break; 1320 case '<': 1321 obj = ucl_get_value_object (parser); 1322 /* We have something like multiline value, which must be <<[A-Z]+\n */ 1323 if (chunk->end - p > 3) { 1324 if (memcmp (p, "<<", 2) == 0) { 1325 p += 2; 1326 /* We allow only uppercase characters in multiline definitions */ 1327 while (p < chunk->end && *p >= 'A' && *p <= 'Z') { 1328 p ++; 1329 } 1330 if (*p =='\n') { 1331 /* Set chunk positions and start multiline parsing */ 1332 c += 2; 1333 chunk->remain -= p - c; 1334 chunk->pos = p + 1; 1335 chunk->column = 0; 1336 chunk->line ++; 1337 if ((str_len = ucl_parse_multiline_string (parser, chunk, c, 1338 p - c, &c, &var_expand)) == 0) { 1339 ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err); 1340 return false; 1341 } 1342 obj->type = UCL_STRING; 1343 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE], 1344 &obj->value.sv, str_len - 1, false, false, var_expand)) == -1) { 1345 return false; 1346 } 1347 obj->len = str_len; 1348 parser->state = UCL_STATE_AFTER_VALUE; 1349 return true; 1350 } 1351 } 1352 } 1353 /* Fallback to ordinary strings */ 1354 default: 1355 parse_string: 1356 if (obj == NULL) { 1357 obj = ucl_get_value_object (parser); 1358 } 1359 /* Parse atom */ 1360 if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) { 1361 if (!ucl_lex_number (parser, chunk, obj)) { 1362 if (parser->state == UCL_STATE_ERROR) { 1363 return false; 1364 } 1365 } 1366 else { 1367 parser->state = UCL_STATE_AFTER_VALUE; 1368 return true; 1369 } 1370 /* Fallback to normal string */ 1371 } 1372 1373 if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) { 1374 return false; 1375 } 1376 /* Cut trailing spaces */ 1377 stripped_spaces = 0; 1378 while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces), 1379 UCL_CHARACTER_WHITESPACE)) { 1380 stripped_spaces ++; 1381 } 1382 str_len = chunk->pos - c - stripped_spaces; 1383 if (str_len <= 0) { 1384 ucl_set_err (chunk, 0, "string value must not be empty", &parser->err); 1385 return false; 1386 } 1387 else if (str_len == 4 && memcmp (c, "null", 4) == 0) { 1388 obj->len = 0; 1389 obj->type = UCL_NULL; 1390 } 1391 else if (!ucl_maybe_parse_boolean (obj, c, str_len)) { 1392 obj->type = UCL_STRING; 1393 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE], 1394 &obj->value.sv, str_len, need_unescape, 1395 false, var_expand)) == -1) { 1396 return false; 1397 } 1398 obj->len = str_len; 1399 } 1400 parser->state = UCL_STATE_AFTER_VALUE; 1401 p = chunk->pos; 1402 1403 return true; 1404 break; 1405 } 1406 } 1407 1408 return true; 1409 } 1410 1411 /** 1412 * Handle after value data 1413 * @param parser 1414 * @param chunk 1415 * @return 1416 */ 1417 static bool 1418 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk) 1419 { 1420 const unsigned char *p; 1421 bool got_sep = false; 1422 struct ucl_stack *st; 1423 1424 p = chunk->pos; 1425 1426 while (p < chunk->end) { 1427 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1428 /* Skip whitespaces */ 1429 ucl_chunk_skipc (chunk, p); 1430 } 1431 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1432 /* Skip comment */ 1433 if (!ucl_skip_comments (parser)) { 1434 return false; 1435 } 1436 /* Treat comment as a separator */ 1437 got_sep = true; 1438 p = chunk->pos; 1439 } 1440 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) { 1441 if (*p == '}' || *p == ']') { 1442 if (parser->stack == NULL) { 1443 ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err); 1444 return false; 1445 } 1446 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) || 1447 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) { 1448 1449 /* Pop all nested objects from a stack */ 1450 st = parser->stack; 1451 parser->stack = st->next; 1452 UCL_FREE (sizeof (struct ucl_stack), st); 1453 1454 while (parser->stack != NULL) { 1455 st = parser->stack; 1456 if (st->next == NULL || st->next->level == st->level) { 1457 break; 1458 } 1459 parser->stack = st->next; 1460 UCL_FREE (sizeof (struct ucl_stack), st); 1461 } 1462 } 1463 else { 1464 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err); 1465 return false; 1466 } 1467 1468 if (parser->stack == NULL) { 1469 /* Ignore everything after a top object */ 1470 return true; 1471 } 1472 else { 1473 ucl_chunk_skipc (chunk, p); 1474 } 1475 got_sep = true; 1476 } 1477 else { 1478 /* Got a separator */ 1479 got_sep = true; 1480 ucl_chunk_skipc (chunk, p); 1481 } 1482 } 1483 else { 1484 /* Anything else */ 1485 if (!got_sep) { 1486 ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err); 1487 return false; 1488 } 1489 return true; 1490 } 1491 } 1492 1493 return true; 1494 } 1495 1496 /** 1497 * Handle macro data 1498 * @param parser 1499 * @param chunk 1500 * @return 1501 */ 1502 static bool 1503 ucl_parse_macro_value (struct ucl_parser *parser, 1504 struct ucl_chunk *chunk, struct ucl_macro *macro, 1505 unsigned char const **macro_start, size_t *macro_len) 1506 { 1507 const unsigned char *p, *c; 1508 bool need_unescape = false, ucl_escape = false, var_expand = false; 1509 1510 p = chunk->pos; 1511 1512 switch (*p) { 1513 case '"': 1514 /* We have macro value encoded in quotes */ 1515 c = p; 1516 ucl_chunk_skipc (chunk, p); 1517 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1518 return false; 1519 } 1520 1521 *macro_start = c + 1; 1522 *macro_len = chunk->pos - c - 2; 1523 p = chunk->pos; 1524 break; 1525 case '{': 1526 /* We got a multiline macro body */ 1527 ucl_chunk_skipc (chunk, p); 1528 /* Skip spaces at the beginning */ 1529 while (p < chunk->end) { 1530 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1531 ucl_chunk_skipc (chunk, p); 1532 } 1533 else { 1534 break; 1535 } 1536 } 1537 c = p; 1538 while (p < chunk->end) { 1539 if (*p == '}') { 1540 break; 1541 } 1542 ucl_chunk_skipc (chunk, p); 1543 } 1544 *macro_start = c; 1545 *macro_len = p - c; 1546 ucl_chunk_skipc (chunk, p); 1547 break; 1548 default: 1549 /* Macro is not enclosed in quotes or braces */ 1550 c = p; 1551 while (p < chunk->end) { 1552 if (ucl_lex_is_atom_end (*p)) { 1553 break; 1554 } 1555 ucl_chunk_skipc (chunk, p); 1556 } 1557 *macro_start = c; 1558 *macro_len = p - c; 1559 break; 1560 } 1561 1562 /* We are at the end of a macro */ 1563 /* Skip ';' and space characters and return to previous state */ 1564 while (p < chunk->end) { 1565 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') { 1566 break; 1567 } 1568 ucl_chunk_skipc (chunk, p); 1569 } 1570 return true; 1571 } 1572 1573 /** 1574 * Handle the main states of rcl parser 1575 * @param parser parser structure 1576 * @param data the pointer to the beginning of a chunk 1577 * @param len the length of a chunk 1578 * @return true if chunk has been parsed and false in case of error 1579 */ 1580 static bool 1581 ucl_state_machine (struct ucl_parser *parser) 1582 { 1583 ucl_object_t *obj; 1584 struct ucl_chunk *chunk = parser->chunks; 1585 const unsigned char *p, *c = NULL, *macro_start = NULL; 1586 unsigned char *macro_escaped; 1587 size_t macro_len = 0; 1588 struct ucl_macro *macro = NULL; 1589 bool next_key = false, end_of_object = false; 1590 1591 if (parser->top_obj == NULL) { 1592 if (*chunk->pos == '[') { 1593 obj = ucl_add_parser_stack (NULL, parser, true, 0); 1594 } 1595 else { 1596 obj = ucl_add_parser_stack (NULL, parser, false, 0); 1597 } 1598 if (obj == NULL) { 1599 return false; 1600 } 1601 parser->top_obj = obj; 1602 parser->cur_obj = obj; 1603 parser->state = UCL_STATE_INIT; 1604 } 1605 1606 p = chunk->pos; 1607 while (chunk->pos < chunk->end) { 1608 switch (parser->state) { 1609 case UCL_STATE_INIT: 1610 /* 1611 * At the init state we can either go to the parse array or object 1612 * if we got [ or { correspondingly or can just treat new data as 1613 * a key of newly created object 1614 */ 1615 obj = parser->cur_obj; 1616 if (!ucl_skip_comments (parser)) { 1617 parser->prev_state = parser->state; 1618 parser->state = UCL_STATE_ERROR; 1619 return false; 1620 } 1621 else { 1622 p = chunk->pos; 1623 if (*p == '[') { 1624 parser->state = UCL_STATE_VALUE; 1625 ucl_chunk_skipc (chunk, p); 1626 } 1627 else { 1628 parser->state = UCL_STATE_KEY; 1629 if (*p == '{') { 1630 ucl_chunk_skipc (chunk, p); 1631 } 1632 } 1633 } 1634 break; 1635 case UCL_STATE_KEY: 1636 /* Skip any spaces */ 1637 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1638 ucl_chunk_skipc (chunk, p); 1639 } 1640 if (*p == '}') { 1641 /* We have the end of an object */ 1642 parser->state = UCL_STATE_AFTER_VALUE; 1643 continue; 1644 } 1645 if (parser->stack == NULL) { 1646 /* No objects are on stack, but we want to parse a key */ 1647 ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser " 1648 "expects a key", &parser->err); 1649 parser->prev_state = parser->state; 1650 parser->state = UCL_STATE_ERROR; 1651 return false; 1652 } 1653 if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) { 1654 parser->prev_state = parser->state; 1655 parser->state = UCL_STATE_ERROR; 1656 return false; 1657 } 1658 if (end_of_object) { 1659 p = chunk->pos; 1660 parser->state = UCL_STATE_AFTER_VALUE; 1661 continue; 1662 } 1663 else if (parser->state != UCL_STATE_MACRO_NAME) { 1664 if (next_key && parser->stack->obj->type == UCL_OBJECT) { 1665 /* Parse more keys and nest objects accordingly */ 1666 obj = ucl_add_parser_stack (parser->cur_obj, parser, false, 1667 parser->stack->level + 1); 1668 if (obj == NULL) { 1669 return false; 1670 } 1671 } 1672 else { 1673 parser->state = UCL_STATE_VALUE; 1674 } 1675 } 1676 else { 1677 c = chunk->pos; 1678 } 1679 p = chunk->pos; 1680 break; 1681 case UCL_STATE_VALUE: 1682 /* We need to check what we do have */ 1683 if (!ucl_parse_value (parser, chunk)) { 1684 parser->prev_state = parser->state; 1685 parser->state = UCL_STATE_ERROR; 1686 return false; 1687 } 1688 /* State is set in ucl_parse_value call */ 1689 p = chunk->pos; 1690 break; 1691 case UCL_STATE_AFTER_VALUE: 1692 if (!ucl_parse_after_value (parser, chunk)) { 1693 parser->prev_state = parser->state; 1694 parser->state = UCL_STATE_ERROR; 1695 return false; 1696 } 1697 if (parser->stack != NULL) { 1698 if (parser->stack->obj->type == UCL_OBJECT) { 1699 parser->state = UCL_STATE_KEY; 1700 } 1701 else { 1702 /* Array */ 1703 parser->state = UCL_STATE_VALUE; 1704 } 1705 } 1706 else { 1707 /* Skip everything at the end */ 1708 return true; 1709 } 1710 p = chunk->pos; 1711 break; 1712 case UCL_STATE_MACRO_NAME: 1713 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1714 ucl_chunk_skipc (chunk, p); 1715 } 1716 else if (p - c > 0) { 1717 /* We got macro name */ 1718 macro_len = (size_t)(p - c); 1719 HASH_FIND (hh, parser->macroes, c, macro_len, macro); 1720 if (macro == NULL) { 1721 ucl_create_err (&parser->err, "error on line %d at column %d: " 1722 "unknown macro: '%.*s', character: '%c'", 1723 chunk->line, chunk->column, (int)(p - c), c, *chunk->pos); 1724 parser->state = UCL_STATE_ERROR; 1725 return false; 1726 } 1727 /* Now we need to skip all spaces */ 1728 while (p < chunk->end) { 1729 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1730 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1731 /* Skip comment */ 1732 if (!ucl_skip_comments (parser)) { 1733 return false; 1734 } 1735 p = chunk->pos; 1736 } 1737 break; 1738 } 1739 ucl_chunk_skipc (chunk, p); 1740 } 1741 parser->state = UCL_STATE_MACRO; 1742 } 1743 break; 1744 case UCL_STATE_MACRO: 1745 if (!ucl_parse_macro_value (parser, chunk, macro, 1746 ¯o_start, ¯o_len)) { 1747 parser->prev_state = parser->state; 1748 parser->state = UCL_STATE_ERROR; 1749 return false; 1750 } 1751 macro_len = ucl_expand_variable (parser, ¯o_escaped, macro_start, macro_len); 1752 parser->state = parser->prev_state; 1753 if (macro_escaped == NULL) { 1754 if (!macro->handler (macro_start, macro_len, macro->ud)) { 1755 return false; 1756 } 1757 } 1758 else { 1759 if (!macro->handler (macro_escaped, macro_len, macro->ud)) { 1760 UCL_FREE (macro_len + 1, macro_escaped); 1761 return false; 1762 } 1763 UCL_FREE (macro_len + 1, macro_escaped); 1764 } 1765 p = chunk->pos; 1766 break; 1767 default: 1768 /* TODO: add all states */ 1769 ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err); 1770 parser->state = UCL_STATE_ERROR; 1771 return false; 1772 } 1773 } 1774 1775 return true; 1776 } 1777 1778 struct ucl_parser* 1779 ucl_parser_new (int flags) 1780 { 1781 struct ucl_parser *new; 1782 1783 new = UCL_ALLOC (sizeof (struct ucl_parser)); 1784 if (new == NULL) { 1785 return NULL; 1786 } 1787 memset (new, 0, sizeof (struct ucl_parser)); 1788 1789 ucl_parser_register_macro (new, "include", ucl_include_handler, new); 1790 ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new); 1791 ucl_parser_register_macro (new, "includes", ucl_includes_handler, new); 1792 1793 new->flags = flags; 1794 1795 /* Initial assumption about filevars */ 1796 ucl_parser_set_filevars (new, NULL, false); 1797 1798 return new; 1799 } 1800 1801 1802 void 1803 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro, 1804 ucl_macro_handler handler, void* ud) 1805 { 1806 struct ucl_macro *new; 1807 1808 if (macro == NULL || handler == NULL) { 1809 return; 1810 } 1811 new = UCL_ALLOC (sizeof (struct ucl_macro)); 1812 if (new == NULL) { 1813 return; 1814 } 1815 memset (new, 0, sizeof (struct ucl_macro)); 1816 new->handler = handler; 1817 new->name = strdup (macro); 1818 new->ud = ud; 1819 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new); 1820 } 1821 1822 void 1823 ucl_parser_register_variable (struct ucl_parser *parser, const char *var, 1824 const char *value) 1825 { 1826 struct ucl_variable *new = NULL, *cur; 1827 1828 if (var == NULL) { 1829 return; 1830 } 1831 1832 /* Find whether a variable already exists */ 1833 LL_FOREACH (parser->variables, cur) { 1834 if (strcmp (cur->var, var) == 0) { 1835 new = cur; 1836 break; 1837 } 1838 } 1839 1840 if (value == NULL) { 1841 1842 if (new != NULL) { 1843 /* Remove variable */ 1844 LL_DELETE (parser->variables, new); 1845 free (new->var); 1846 free (new->value); 1847 UCL_FREE (sizeof (struct ucl_variable), new); 1848 } 1849 else { 1850 /* Do nothing */ 1851 return; 1852 } 1853 } 1854 else { 1855 if (new == NULL) { 1856 new = UCL_ALLOC (sizeof (struct ucl_variable)); 1857 if (new == NULL) { 1858 return; 1859 } 1860 memset (new, 0, sizeof (struct ucl_variable)); 1861 new->var = strdup (var); 1862 new->var_len = strlen (var); 1863 new->value = strdup (value); 1864 new->value_len = strlen (value); 1865 1866 LL_PREPEND (parser->variables, new); 1867 } 1868 else { 1869 free (new->value); 1870 new->value = strdup (value); 1871 new->value_len = strlen (value); 1872 } 1873 } 1874 } 1875 1876 bool 1877 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data, 1878 size_t len) 1879 { 1880 struct ucl_chunk *chunk; 1881 1882 if (data == NULL || len == 0) { 1883 ucl_create_err (&parser->err, "invalid chunk added"); 1884 return false; 1885 } 1886 if (parser->state != UCL_STATE_ERROR) { 1887 chunk = UCL_ALLOC (sizeof (struct ucl_chunk)); 1888 if (chunk == NULL) { 1889 ucl_create_err (&parser->err, "cannot allocate chunk structure"); 1890 return false; 1891 } 1892 chunk->begin = data; 1893 chunk->remain = len; 1894 chunk->pos = chunk->begin; 1895 chunk->end = chunk->begin + len; 1896 chunk->line = 1; 1897 chunk->column = 0; 1898 LL_PREPEND (parser->chunks, chunk); 1899 parser->recursion ++; 1900 if (parser->recursion > UCL_MAX_RECURSION) { 1901 ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d", 1902 parser->recursion); 1903 return false; 1904 } 1905 return ucl_state_machine (parser); 1906 } 1907 1908 ucl_create_err (&parser->err, "a parser is in an invalid state"); 1909 1910 return false; 1911 } 1912 1913 bool 1914 ucl_parser_add_string (struct ucl_parser *parser, const char *data, 1915 size_t len) 1916 { 1917 if (data == NULL) { 1918 ucl_create_err (&parser->err, "invalid string added"); 1919 return false; 1920 } 1921 if (len == 0) { 1922 len = strlen (data); 1923 } 1924 1925 return ucl_parser_add_chunk (parser, (const unsigned char *)data, len); 1926 } 1927