1 /* Copyright (c) 2013, Vsevolod Stakhov 2 * All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * * Redistributions of source code must retain the above copyright 7 * notice, this list of conditions and the following disclaimer. 8 * * Redistributions in binary form must reproduce the above copyright 9 * notice, this list of conditions and the following disclaimer in the 10 * documentation and/or other materials provided with the distribution. 11 * 12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY 13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY 16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 */ 23 24 #include "ucl.h" 25 #include "ucl_internal.h" 26 #include "ucl_chartable.h" 27 28 /** 29 * @file rcl_parser.c 30 * The implementation of rcl parser 31 */ 32 33 struct ucl_parser_saved_state { 34 unsigned int line; 35 unsigned int column; 36 size_t remain; 37 const unsigned char *pos; 38 }; 39 40 /** 41 * Move up to len characters 42 * @param parser 43 * @param begin 44 * @param len 45 * @return new position in chunk 46 */ 47 #define ucl_chunk_skipc(chunk, p) do{ \ 48 if (*(p) == '\n') { \ 49 (chunk)->line ++; \ 50 (chunk)->column = 0; \ 51 } \ 52 else (chunk)->column ++; \ 53 (p++); \ 54 (chunk)->pos ++; \ 55 (chunk)->remain --; \ 56 } while (0) 57 58 static inline void 59 ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err) 60 { 61 if (chunk->pos < chunk->end) { 62 if (isgraph (*chunk->pos)) { 63 ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'", 64 chunk->line, chunk->column, str, *chunk->pos); 65 } 66 else { 67 ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'", 68 chunk->line, chunk->column, str, (int)*chunk->pos); 69 } 70 } 71 else { 72 ucl_create_err (err, "error at the end of chunk: %s", str); 73 } 74 } 75 76 /** 77 * Skip all comments from the current pos resolving nested and multiline comments 78 * @param parser 79 * @return 80 */ 81 static bool 82 ucl_skip_comments (struct ucl_parser *parser) 83 { 84 struct ucl_chunk *chunk = parser->chunks; 85 const unsigned char *p; 86 int comments_nested = 0; 87 88 p = chunk->pos; 89 90 start: 91 if (*p == '#') { 92 if (parser->state != UCL_STATE_SCOMMENT && 93 parser->state != UCL_STATE_MCOMMENT) { 94 while (p < chunk->end) { 95 if (*p == '\n') { 96 ucl_chunk_skipc (chunk, p); 97 goto start; 98 } 99 ucl_chunk_skipc (chunk, p); 100 } 101 } 102 } 103 else if (*p == '/' && chunk->remain >= 2) { 104 if (p[1] == '*') { 105 ucl_chunk_skipc (chunk, p); 106 comments_nested ++; 107 ucl_chunk_skipc (chunk, p); 108 109 while (p < chunk->end) { 110 if (*p == '*') { 111 ucl_chunk_skipc (chunk, p); 112 if (*p == '/') { 113 comments_nested --; 114 if (comments_nested == 0) { 115 ucl_chunk_skipc (chunk, p); 116 goto start; 117 } 118 } 119 ucl_chunk_skipc (chunk, p); 120 } 121 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') { 122 comments_nested ++; 123 ucl_chunk_skipc (chunk, p); 124 ucl_chunk_skipc (chunk, p); 125 continue; 126 } 127 ucl_chunk_skipc (chunk, p); 128 } 129 if (comments_nested != 0) { 130 ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err); 131 return false; 132 } 133 } 134 } 135 136 return true; 137 } 138 139 /** 140 * Return multiplier for a character 141 * @param c multiplier character 142 * @param is_bytes if true use 1024 multiplier 143 * @return multiplier 144 */ 145 static inline unsigned long 146 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) { 147 const struct { 148 char c; 149 long mult_normal; 150 long mult_bytes; 151 } multipliers[] = { 152 {'m', 1000 * 1000, 1024 * 1024}, 153 {'k', 1000, 1024}, 154 {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024} 155 }; 156 int i; 157 158 for (i = 0; i < 3; i ++) { 159 if (tolower (c) == multipliers[i].c) { 160 if (is_bytes) { 161 return multipliers[i].mult_bytes; 162 } 163 return multipliers[i].mult_normal; 164 } 165 } 166 167 return 1; 168 } 169 170 171 /** 172 * Return multiplier for time scaling 173 * @param c 174 * @return 175 */ 176 static inline double 177 ucl_lex_time_multiplier (const unsigned char c) { 178 const struct { 179 char c; 180 double mult; 181 } multipliers[] = { 182 {'m', 60}, 183 {'h', 60 * 60}, 184 {'d', 60 * 60 * 24}, 185 {'w', 60 * 60 * 24 * 7}, 186 {'y', 60 * 60 * 24 * 7 * 365} 187 }; 188 int i; 189 190 for (i = 0; i < 5; i ++) { 191 if (tolower (c) == multipliers[i].c) { 192 return multipliers[i].mult; 193 } 194 } 195 196 return 1; 197 } 198 199 /** 200 * Return true if a character is a end of an atom 201 * @param c 202 * @return 203 */ 204 static inline bool 205 ucl_lex_is_atom_end (const unsigned char c) 206 { 207 return ucl_test_character (c, UCL_CHARACTER_VALUE_END); 208 } 209 210 static inline bool 211 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2) 212 { 213 if (c1 == '/') { 214 if (c2 == '*') { 215 return true; 216 } 217 } 218 else if (c1 == '#') { 219 return true; 220 } 221 return false; 222 } 223 224 /** 225 * Check variable found 226 * @param parser 227 * @param ptr 228 * @param remain 229 * @param out_len 230 * @param strict 231 * @param found 232 * @return 233 */ 234 static inline const char * 235 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain, 236 size_t *out_len, bool strict, bool *found) 237 { 238 struct ucl_variable *var; 239 unsigned char *dst; 240 size_t dstlen; 241 bool need_free = false; 242 243 LL_FOREACH (parser->variables, var) { 244 if (strict) { 245 if (remain == var->var_len) { 246 if (memcmp (ptr, var->var, var->var_len) == 0) { 247 *out_len += var->value_len; 248 *found = true; 249 return (ptr + var->var_len); 250 } 251 } 252 } 253 else { 254 if (remain >= var->var_len) { 255 if (memcmp (ptr, var->var, var->var_len) == 0) { 256 *out_len += var->value_len; 257 *found = true; 258 return (ptr + var->var_len); 259 } 260 } 261 } 262 } 263 264 /* XXX: can only handle ${VAR} */ 265 if (!(*found) && parser->var_handler != NULL && strict) { 266 /* Call generic handler */ 267 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free, 268 parser->var_data)) { 269 *found = true; 270 if (need_free) { 271 free (dst); 272 } 273 return (ptr + remain); 274 } 275 } 276 277 return ptr; 278 } 279 280 /** 281 * Check for a variable in a given string 282 * @param parser 283 * @param ptr 284 * @param remain 285 * @param out_len 286 * @param vars_found 287 * @return 288 */ 289 static const char * 290 ucl_check_variable (struct ucl_parser *parser, const char *ptr, 291 size_t remain, size_t *out_len, bool *vars_found) 292 { 293 const char *p, *end, *ret = ptr; 294 bool found = false; 295 296 if (*ptr == '{') { 297 /* We need to match the variable enclosed in braces */ 298 p = ptr + 1; 299 end = ptr + remain; 300 while (p < end) { 301 if (*p == '}') { 302 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, 303 out_len, true, &found); 304 if (found) { 305 /* {} must be excluded actually */ 306 ret ++; 307 if (!*vars_found) { 308 *vars_found = true; 309 } 310 } 311 else { 312 *out_len += 2; 313 } 314 break; 315 } 316 p ++; 317 } 318 } 319 else if (*ptr != '$') { 320 /* Not count escaped dollar sign */ 321 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found); 322 if (found && !*vars_found) { 323 *vars_found = true; 324 } 325 if (!found) { 326 (*out_len) ++; 327 } 328 } 329 else { 330 ret ++; 331 (*out_len) ++; 332 } 333 334 return ret; 335 } 336 337 /** 338 * Expand a single variable 339 * @param parser 340 * @param ptr 341 * @param remain 342 * @param dest 343 * @return 344 */ 345 static const char * 346 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr, 347 size_t remain, unsigned char **dest) 348 { 349 unsigned char *d = *dest, *dst; 350 const char *p = ptr + 1, *ret; 351 struct ucl_variable *var; 352 size_t dstlen; 353 bool need_free = false; 354 bool found = false; 355 bool strict = false; 356 357 ret = ptr + 1; 358 remain --; 359 360 if (*p == '$') { 361 *d++ = *p++; 362 *dest = d; 363 return p; 364 } 365 else if (*p == '{') { 366 p ++; 367 strict = true; 368 ret += 2; 369 remain -= 2; 370 } 371 372 LL_FOREACH (parser->variables, var) { 373 if (remain >= var->var_len) { 374 if (memcmp (p, var->var, var->var_len) == 0) { 375 memcpy (d, var->value, var->value_len); 376 ret += var->var_len; 377 d += var->value_len; 378 found = true; 379 break; 380 } 381 } 382 } 383 if (!found) { 384 if (strict && parser->var_handler != NULL) { 385 if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free, 386 parser->var_data)) { 387 memcpy (d, dst, dstlen); 388 ret += dstlen; 389 d += remain; 390 found = true; 391 } 392 } 393 394 /* Leave variable as is */ 395 if (!found) { 396 if (strict) { 397 /* Copy '${' */ 398 memcpy (d, ptr, 2); 399 d += 2; 400 ret --; 401 } 402 else { 403 memcpy (d, ptr, 1); 404 d ++; 405 } 406 } 407 } 408 409 *dest = d; 410 return ret; 411 } 412 413 /** 414 * Expand variables in string 415 * @param parser 416 * @param dst 417 * @param src 418 * @param in_len 419 * @return 420 */ 421 static ssize_t 422 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst, 423 const char *src, size_t in_len) 424 { 425 const char *p, *end = src + in_len; 426 unsigned char *d; 427 size_t out_len = 0; 428 bool vars_found = false; 429 430 p = src; 431 while (p != end) { 432 if (*p == '$') { 433 p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found); 434 } 435 else { 436 p ++; 437 out_len ++; 438 } 439 } 440 441 if (!vars_found) { 442 /* Trivial case */ 443 *dst = NULL; 444 return in_len; 445 } 446 447 *dst = UCL_ALLOC (out_len + 1); 448 if (*dst == NULL) { 449 return in_len; 450 } 451 452 d = *dst; 453 p = src; 454 while (p != end) { 455 if (*p == '$') { 456 p = ucl_expand_single_variable (parser, p, end - p, &d); 457 } 458 else { 459 *d++ = *p++; 460 } 461 } 462 463 *d = '\0'; 464 465 return out_len; 466 } 467 468 /** 469 * Store or copy pointer to the trash stack 470 * @param parser parser object 471 * @param src src string 472 * @param dst destination buffer (trash stack pointer) 473 * @param dst_const const destination pointer (e.g. value of object) 474 * @param in_len input length 475 * @param need_unescape need to unescape source (and copy it) 476 * @param need_lowercase need to lowercase value (and copy) 477 * @param need_expand need to expand variables (and copy as well) 478 * @return output length (excluding \0 symbol) 479 */ 480 static inline ssize_t 481 ucl_copy_or_store_ptr (struct ucl_parser *parser, 482 const unsigned char *src, unsigned char **dst, 483 const char **dst_const, size_t in_len, 484 bool need_unescape, bool need_lowercase, bool need_expand) 485 { 486 ssize_t ret = -1, tret; 487 unsigned char *tmp; 488 489 if (need_unescape || need_lowercase || 490 (need_expand && parser->variables != NULL) || 491 !(parser->flags & UCL_PARSER_ZEROCOPY)) { 492 /* Copy string */ 493 *dst = UCL_ALLOC (in_len + 1); 494 if (*dst == NULL) { 495 ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err); 496 return false; 497 } 498 if (need_lowercase) { 499 ret = ucl_strlcpy_tolower (*dst, src, in_len + 1); 500 } 501 else { 502 ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1); 503 } 504 505 if (need_unescape) { 506 ret = ucl_unescape_json_string (*dst, ret); 507 } 508 if (need_expand) { 509 tmp = *dst; 510 tret = ret; 511 ret = ucl_expand_variable (parser, dst, tmp, ret); 512 if (*dst == NULL) { 513 /* Nothing to expand */ 514 *dst = tmp; 515 ret = tret; 516 } 517 } 518 *dst_const = *dst; 519 } 520 else { 521 *dst_const = src; 522 ret = in_len; 523 } 524 525 return ret; 526 } 527 528 /** 529 * Create and append an object at the specified level 530 * @param parser 531 * @param is_array 532 * @param level 533 * @return 534 */ 535 static inline ucl_object_t * 536 ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level) 537 { 538 struct ucl_stack *st; 539 540 if (!is_array) { 541 if (obj == NULL) { 542 obj = ucl_object_typed_new (UCL_OBJECT); 543 } 544 else { 545 obj->type = UCL_OBJECT; 546 } 547 obj->value.ov = ucl_hash_create (); 548 parser->state = UCL_STATE_KEY; 549 } 550 else { 551 if (obj == NULL) { 552 obj = ucl_object_typed_new (UCL_ARRAY); 553 } 554 else { 555 obj->type = UCL_ARRAY; 556 } 557 parser->state = UCL_STATE_VALUE; 558 } 559 560 st = UCL_ALLOC (sizeof (struct ucl_stack)); 561 if (st == NULL) { 562 ucl_set_err (parser->chunks, 0, "cannot allocate memory for an object", &parser->err); 563 return NULL; 564 } 565 st->obj = obj; 566 st->level = level; 567 LL_PREPEND (parser->stack, st); 568 parser->cur_obj = obj; 569 570 return obj; 571 } 572 573 int 574 ucl_maybe_parse_number (ucl_object_t *obj, 575 const char *start, const char *end, const char **pos, 576 bool allow_double, bool number_bytes, bool allow_time) 577 { 578 const char *p = start, *c = start; 579 char *endptr; 580 bool got_dot = false, got_exp = false, need_double = false, 581 is_time = false, valid_start = false, is_hex = false, 582 is_neg = false; 583 double dv = 0; 584 int64_t lv = 0; 585 586 if (*p == '-') { 587 is_neg = true; 588 c ++; 589 p ++; 590 } 591 while (p < end) { 592 if (is_hex && isxdigit (*p)) { 593 p ++; 594 } 595 else if (isdigit (*p)) { 596 valid_start = true; 597 p ++; 598 } 599 else if (!is_hex && (*p == 'x' || *p == 'X')) { 600 is_hex = true; 601 allow_double = false; 602 c = p + 1; 603 } 604 else if (allow_double) { 605 if (p == c) { 606 /* Empty digits sequence, not a number */ 607 *pos = start; 608 return EINVAL; 609 } 610 else if (*p == '.') { 611 if (got_dot) { 612 /* Double dots, not a number */ 613 *pos = start; 614 return EINVAL; 615 } 616 else { 617 got_dot = true; 618 need_double = true; 619 p ++; 620 } 621 } 622 else if (*p == 'e' || *p == 'E') { 623 if (got_exp) { 624 /* Double exp, not a number */ 625 *pos = start; 626 return EINVAL; 627 } 628 else { 629 got_exp = true; 630 need_double = true; 631 p ++; 632 if (p >= end) { 633 *pos = start; 634 return EINVAL; 635 } 636 if (!isdigit (*p) && *p != '+' && *p != '-') { 637 /* Wrong exponent sign */ 638 *pos = start; 639 return EINVAL; 640 } 641 else { 642 p ++; 643 } 644 } 645 } 646 else { 647 /* Got the end of the number, need to check */ 648 break; 649 } 650 } 651 else { 652 break; 653 } 654 } 655 656 if (!valid_start) { 657 *pos = start; 658 return EINVAL; 659 } 660 661 errno = 0; 662 if (need_double) { 663 dv = strtod (c, &endptr); 664 } 665 else { 666 if (is_hex) { 667 lv = strtoimax (c, &endptr, 16); 668 } 669 else { 670 lv = strtoimax (c, &endptr, 10); 671 } 672 } 673 if (errno == ERANGE) { 674 *pos = start; 675 return ERANGE; 676 } 677 678 /* Now check endptr */ 679 if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0' || 680 ucl_test_character (*endptr, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 681 p = endptr; 682 goto set_obj; 683 } 684 685 if (endptr < end && endptr != start) { 686 p = endptr; 687 switch (*p) { 688 case 'm': 689 case 'M': 690 case 'g': 691 case 'G': 692 case 'k': 693 case 'K': 694 if (end - p >= 2) { 695 if (p[1] == 's' || p[1] == 'S') { 696 /* Milliseconds */ 697 if (!need_double) { 698 need_double = true; 699 dv = lv; 700 } 701 is_time = true; 702 if (p[0] == 'm' || p[0] == 'M') { 703 dv /= 1000.; 704 } 705 else { 706 dv *= ucl_lex_num_multiplier (*p, false); 707 } 708 p += 2; 709 goto set_obj; 710 } 711 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) { 712 /* Bytes */ 713 if (need_double) { 714 need_double = false; 715 lv = dv; 716 } 717 lv *= ucl_lex_num_multiplier (*p, true); 718 p += 2; 719 goto set_obj; 720 } 721 else if (ucl_lex_is_atom_end (p[1])) { 722 if (need_double) { 723 dv *= ucl_lex_num_multiplier (*p, false); 724 } 725 else { 726 lv *= ucl_lex_num_multiplier (*p, number_bytes); 727 } 728 p ++; 729 goto set_obj; 730 } 731 else if (allow_time && end - p >= 3) { 732 if (tolower (p[0]) == 'm' && 733 tolower (p[1]) == 'i' && 734 tolower (p[2]) == 'n') { 735 /* Minutes */ 736 if (!need_double) { 737 need_double = true; 738 dv = lv; 739 } 740 is_time = true; 741 dv *= 60.; 742 p += 3; 743 goto set_obj; 744 } 745 } 746 } 747 else { 748 if (need_double) { 749 dv *= ucl_lex_num_multiplier (*p, false); 750 } 751 else { 752 lv *= ucl_lex_num_multiplier (*p, number_bytes); 753 } 754 p ++; 755 goto set_obj; 756 } 757 break; 758 case 'S': 759 case 's': 760 if (allow_time && 761 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { 762 if (!need_double) { 763 need_double = true; 764 dv = lv; 765 } 766 p ++; 767 is_time = true; 768 goto set_obj; 769 } 770 break; 771 case 'h': 772 case 'H': 773 case 'd': 774 case 'D': 775 case 'w': 776 case 'W': 777 case 'Y': 778 case 'y': 779 if (allow_time && 780 (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { 781 if (!need_double) { 782 need_double = true; 783 dv = lv; 784 } 785 is_time = true; 786 dv *= ucl_lex_time_multiplier (*p); 787 p ++; 788 goto set_obj; 789 } 790 break; 791 } 792 } 793 794 *pos = c; 795 return EINVAL; 796 797 set_obj: 798 if (allow_double && (need_double || is_time)) { 799 if (!is_time) { 800 obj->type = UCL_FLOAT; 801 } 802 else { 803 obj->type = UCL_TIME; 804 } 805 obj->value.dv = is_neg ? (-dv) : dv; 806 } 807 else { 808 obj->type = UCL_INT; 809 obj->value.iv = is_neg ? (-lv) : lv; 810 } 811 *pos = p; 812 return 0; 813 } 814 815 /** 816 * Parse possible number 817 * @param parser 818 * @param chunk 819 * @return true if a number has been parsed 820 */ 821 static bool 822 ucl_lex_number (struct ucl_parser *parser, 823 struct ucl_chunk *chunk, ucl_object_t *obj) 824 { 825 const unsigned char *pos; 826 int ret; 827 828 ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos, 829 true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0)); 830 831 if (ret == 0) { 832 chunk->remain -= pos - chunk->pos; 833 chunk->column += pos - chunk->pos; 834 chunk->pos = pos; 835 return true; 836 } 837 else if (ret == ERANGE) { 838 ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err); 839 } 840 841 return false; 842 } 843 844 /** 845 * Parse quoted string with possible escapes 846 * @param parser 847 * @param chunk 848 * @return true if a string has been parsed 849 */ 850 static bool 851 ucl_lex_json_string (struct ucl_parser *parser, 852 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand) 853 { 854 const unsigned char *p = chunk->pos; 855 unsigned char c; 856 int i; 857 858 while (p < chunk->end) { 859 c = *p; 860 if (c < 0x1F) { 861 /* Unmasked control character */ 862 if (c == '\n') { 863 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err); 864 } 865 else { 866 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err); 867 } 868 return false; 869 } 870 else if (c == '\\') { 871 ucl_chunk_skipc (chunk, p); 872 c = *p; 873 if (p >= chunk->end) { 874 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err); 875 return false; 876 } 877 else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) { 878 if (c == 'u') { 879 ucl_chunk_skipc (chunk, p); 880 for (i = 0; i < 4 && p < chunk->end; i ++) { 881 if (!isxdigit (*p)) { 882 ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err); 883 return false; 884 } 885 ucl_chunk_skipc (chunk, p); 886 } 887 if (p >= chunk->end) { 888 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err); 889 return false; 890 } 891 } 892 else { 893 ucl_chunk_skipc (chunk, p); 894 } 895 } 896 *need_unescape = true; 897 *ucl_escape = true; 898 continue; 899 } 900 else if (c == '"') { 901 ucl_chunk_skipc (chunk, p); 902 return true; 903 } 904 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) { 905 *ucl_escape = true; 906 } 907 else if (c == '$') { 908 *var_expand = true; 909 } 910 ucl_chunk_skipc (chunk, p); 911 } 912 913 ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err); 914 return false; 915 } 916 917 /** 918 * Parse a key in an object 919 * @param parser 920 * @param chunk 921 * @return true if a key has been parsed 922 */ 923 static bool 924 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object) 925 { 926 const unsigned char *p, *c = NULL, *end, *t; 927 const char *key = NULL; 928 bool got_quote = false, got_eq = false, got_semicolon = false, 929 need_unescape = false, ucl_escape = false, var_expand = false, 930 got_content = false, got_sep = false; 931 ucl_object_t *nobj, *tobj; 932 ucl_hash_t *container; 933 ssize_t keylen; 934 935 p = chunk->pos; 936 937 if (*p == '.') { 938 /* It is macro actually */ 939 ucl_chunk_skipc (chunk, p); 940 parser->prev_state = parser->state; 941 parser->state = UCL_STATE_MACRO_NAME; 942 return true; 943 } 944 while (p < chunk->end) { 945 /* 946 * A key must start with alpha, number, '/' or '_' and end with space character 947 */ 948 if (c == NULL) { 949 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 950 if (!ucl_skip_comments (parser)) { 951 return false; 952 } 953 p = chunk->pos; 954 } 955 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 956 ucl_chunk_skipc (chunk, p); 957 } 958 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) { 959 /* The first symbol */ 960 c = p; 961 ucl_chunk_skipc (chunk, p); 962 got_content = true; 963 } 964 else if (*p == '"') { 965 /* JSON style key */ 966 c = p + 1; 967 got_quote = true; 968 got_content = true; 969 ucl_chunk_skipc (chunk, p); 970 } 971 else if (*p == '}') { 972 /* We have actually end of an object */ 973 *end_of_object = true; 974 return true; 975 } 976 else if (*p == '.') { 977 ucl_chunk_skipc (chunk, p); 978 parser->prev_state = parser->state; 979 parser->state = UCL_STATE_MACRO_NAME; 980 return true; 981 } 982 else { 983 /* Invalid identifier */ 984 ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err); 985 return false; 986 } 987 } 988 else { 989 /* Parse the body of a key */ 990 if (!got_quote) { 991 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) { 992 got_content = true; 993 ucl_chunk_skipc (chunk, p); 994 } 995 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) { 996 end = p; 997 break; 998 } 999 else { 1000 ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err); 1001 return false; 1002 } 1003 } 1004 else { 1005 /* We need to parse json like quoted string */ 1006 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1007 return false; 1008 } 1009 /* Always escape keys obtained via json */ 1010 end = chunk->pos - 1; 1011 p = chunk->pos; 1012 break; 1013 } 1014 } 1015 } 1016 1017 if (p >= chunk->end && got_content) { 1018 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err); 1019 return false; 1020 } 1021 else if (!got_content) { 1022 return true; 1023 } 1024 *end_of_object = false; 1025 /* We are now at the end of the key, need to parse the rest */ 1026 while (p < chunk->end) { 1027 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1028 ucl_chunk_skipc (chunk, p); 1029 } 1030 else if (*p == '=') { 1031 if (!got_eq && !got_semicolon) { 1032 ucl_chunk_skipc (chunk, p); 1033 got_eq = true; 1034 } 1035 else { 1036 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err); 1037 return false; 1038 } 1039 } 1040 else if (*p == ':') { 1041 if (!got_eq && !got_semicolon) { 1042 ucl_chunk_skipc (chunk, p); 1043 got_semicolon = true; 1044 } 1045 else { 1046 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err); 1047 return false; 1048 } 1049 } 1050 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1051 /* Check for comment */ 1052 if (!ucl_skip_comments (parser)) { 1053 return false; 1054 } 1055 p = chunk->pos; 1056 } 1057 else { 1058 /* Start value */ 1059 break; 1060 } 1061 } 1062 1063 if (p >= chunk->end && got_content) { 1064 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err); 1065 return false; 1066 } 1067 1068 got_sep = got_semicolon || got_eq; 1069 1070 if (!got_sep) { 1071 /* 1072 * Maybe we have more keys nested, so search for termination character. 1073 * Possible choices: 1074 * 1) key1 key2 ... keyN [:=] value <- we treat that as error 1075 * 2) key1 ... keyN {} or [] <- we treat that as nested objects 1076 * 3) key1 value[;,\n] <- we treat that as linear object 1077 */ 1078 t = p; 1079 *next_key = false; 1080 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) { 1081 t ++; 1082 } 1083 /* Check first non-space character after a key */ 1084 if (*t != '{' && *t != '[') { 1085 while (t < chunk->end) { 1086 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') { 1087 break; 1088 } 1089 else if (*t == '{' || *t == '[') { 1090 *next_key = true; 1091 break; 1092 } 1093 t ++; 1094 } 1095 } 1096 } 1097 1098 /* Create a new object */ 1099 nobj = ucl_object_new (); 1100 keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY], 1101 &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false); 1102 if (keylen == -1) { 1103 ucl_object_unref (nobj); 1104 return false; 1105 } 1106 else if (keylen == 0) { 1107 ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err); 1108 ucl_object_unref (nobj); 1109 return false; 1110 } 1111 1112 container = parser->stack->obj->value.ov; 1113 nobj->key = key; 1114 nobj->keylen = keylen; 1115 tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (container, nobj)); 1116 if (tobj == NULL) { 1117 container = ucl_hash_insert_object (container, nobj); 1118 nobj->prev = nobj; 1119 nobj->next = NULL; 1120 parser->stack->obj->len ++; 1121 } 1122 else { 1123 DL_APPEND (tobj, nobj); 1124 } 1125 1126 if (ucl_escape) { 1127 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE; 1128 } 1129 parser->stack->obj->value.ov = container; 1130 1131 parser->cur_obj = nobj; 1132 1133 return true; 1134 } 1135 1136 /** 1137 * Parse a cl string 1138 * @param parser 1139 * @param chunk 1140 * @return true if a key has been parsed 1141 */ 1142 static bool 1143 ucl_parse_string_value (struct ucl_parser *parser, 1144 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape) 1145 { 1146 const unsigned char *p; 1147 enum { 1148 UCL_BRACE_ROUND = 0, 1149 UCL_BRACE_SQUARE, 1150 UCL_BRACE_FIGURE 1151 }; 1152 int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}}; 1153 1154 p = chunk->pos; 1155 1156 while (p < chunk->end) { 1157 1158 /* Skip pairs of figure braces */ 1159 if (*p == '{') { 1160 braces[UCL_BRACE_FIGURE][0] ++; 1161 } 1162 else if (*p == '}') { 1163 braces[UCL_BRACE_FIGURE][1] ++; 1164 if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) { 1165 /* This is not a termination symbol, continue */ 1166 ucl_chunk_skipc (chunk, p); 1167 continue; 1168 } 1169 } 1170 /* Skip pairs of square braces */ 1171 else if (*p == '[') { 1172 braces[UCL_BRACE_SQUARE][0] ++; 1173 } 1174 else if (*p == ']') { 1175 braces[UCL_BRACE_SQUARE][1] ++; 1176 if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) { 1177 /* This is not a termination symbol, continue */ 1178 ucl_chunk_skipc (chunk, p); 1179 continue; 1180 } 1181 } 1182 else if (*p == '$') { 1183 *var_expand = true; 1184 } 1185 else if (*p == '\\') { 1186 *need_unescape = true; 1187 ucl_chunk_skipc (chunk, p); 1188 if (p < chunk->end) { 1189 ucl_chunk_skipc (chunk, p); 1190 } 1191 continue; 1192 } 1193 1194 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { 1195 break; 1196 } 1197 ucl_chunk_skipc (chunk, p); 1198 } 1199 1200 if (p >= chunk->end) { 1201 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err); 1202 return false; 1203 } 1204 1205 return true; 1206 } 1207 1208 /** 1209 * Parse multiline string ending with \n{term}\n 1210 * @param parser 1211 * @param chunk 1212 * @param term 1213 * @param term_len 1214 * @return size of multiline string or 0 in case of error 1215 */ 1216 static int 1217 ucl_parse_multiline_string (struct ucl_parser *parser, 1218 struct ucl_chunk *chunk, const unsigned char *term, 1219 int term_len, unsigned char const **beg, 1220 bool *var_expand) 1221 { 1222 const unsigned char *p, *c; 1223 bool newline = false; 1224 int len = 0; 1225 1226 p = chunk->pos; 1227 1228 c = p; 1229 1230 while (p < chunk->end) { 1231 if (newline) { 1232 if (chunk->end - p < term_len) { 1233 return 0; 1234 } 1235 else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) { 1236 len = p - c; 1237 chunk->remain -= term_len; 1238 chunk->pos = p + term_len; 1239 chunk->column = term_len; 1240 *beg = c; 1241 break; 1242 } 1243 } 1244 if (*p == '\n') { 1245 newline = true; 1246 } 1247 else { 1248 if (*p == '$') { 1249 *var_expand = true; 1250 } 1251 newline = false; 1252 } 1253 ucl_chunk_skipc (chunk, p); 1254 } 1255 1256 return len; 1257 } 1258 1259 static ucl_object_t* 1260 ucl_get_value_object (struct ucl_parser *parser) 1261 { 1262 ucl_object_t *t, *obj = NULL; 1263 1264 if (parser->stack->obj->type == UCL_ARRAY) { 1265 /* Object must be allocated */ 1266 obj = ucl_object_new (); 1267 t = parser->stack->obj->value.av; 1268 DL_APPEND (t, obj); 1269 parser->cur_obj = obj; 1270 parser->stack->obj->value.av = t; 1271 parser->stack->obj->len ++; 1272 } 1273 else { 1274 /* Object has been already allocated */ 1275 obj = parser->cur_obj; 1276 } 1277 1278 return obj; 1279 } 1280 1281 /** 1282 * Handle value data 1283 * @param parser 1284 * @param chunk 1285 * @return 1286 */ 1287 static bool 1288 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) 1289 { 1290 const unsigned char *p, *c; 1291 ucl_object_t *obj = NULL; 1292 unsigned int stripped_spaces; 1293 int str_len; 1294 bool need_unescape = false, ucl_escape = false, var_expand = false; 1295 1296 p = chunk->pos; 1297 1298 /* Skip any spaces and comments */ 1299 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) || 1300 (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { 1301 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1302 ucl_chunk_skipc (chunk, p); 1303 } 1304 if (!ucl_skip_comments (parser)) { 1305 return false; 1306 } 1307 p = chunk->pos; 1308 } 1309 1310 while (p < chunk->end) { 1311 c = p; 1312 switch (*p) { 1313 case '"': 1314 obj = ucl_get_value_object (parser); 1315 ucl_chunk_skipc (chunk, p); 1316 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1317 return false; 1318 } 1319 str_len = chunk->pos - c - 2; 1320 obj->type = UCL_STRING; 1321 if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE], 1322 &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) { 1323 return false; 1324 } 1325 obj->len = str_len; 1326 parser->state = UCL_STATE_AFTER_VALUE; 1327 p = chunk->pos; 1328 return true; 1329 break; 1330 case '{': 1331 obj = ucl_get_value_object (parser); 1332 /* We have a new object */ 1333 obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level); 1334 if (obj == NULL) { 1335 return false; 1336 } 1337 1338 ucl_chunk_skipc (chunk, p); 1339 return true; 1340 break; 1341 case '[': 1342 obj = ucl_get_value_object (parser); 1343 /* We have a new array */ 1344 obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level); 1345 if (obj == NULL) { 1346 return false; 1347 } 1348 1349 ucl_chunk_skipc (chunk, p); 1350 return true; 1351 break; 1352 case ']': 1353 /* We have the array ending */ 1354 if (parser->stack && parser->stack->obj->type == UCL_ARRAY) { 1355 parser->state = UCL_STATE_AFTER_VALUE; 1356 return true; 1357 } 1358 else { 1359 goto parse_string; 1360 } 1361 break; 1362 case '<': 1363 obj = ucl_get_value_object (parser); 1364 /* We have something like multiline value, which must be <<[A-Z]+\n */ 1365 if (chunk->end - p > 3) { 1366 if (memcmp (p, "<<", 2) == 0) { 1367 p += 2; 1368 /* We allow only uppercase characters in multiline definitions */ 1369 while (p < chunk->end && *p >= 'A' && *p <= 'Z') { 1370 p ++; 1371 } 1372 if (*p =='\n') { 1373 /* Set chunk positions and start multiline parsing */ 1374 c += 2; 1375 chunk->remain -= p - c; 1376 chunk->pos = p + 1; 1377 chunk->column = 0; 1378 chunk->line ++; 1379 if ((str_len = ucl_parse_multiline_string (parser, chunk, c, 1380 p - c, &c, &var_expand)) == 0) { 1381 ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err); 1382 return false; 1383 } 1384 obj->type = UCL_STRING; 1385 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE], 1386 &obj->value.sv, str_len - 1, false, false, var_expand)) == -1) { 1387 return false; 1388 } 1389 obj->len = str_len; 1390 parser->state = UCL_STATE_AFTER_VALUE; 1391 return true; 1392 } 1393 } 1394 } 1395 /* Fallback to ordinary strings */ 1396 default: 1397 parse_string: 1398 if (obj == NULL) { 1399 obj = ucl_get_value_object (parser); 1400 } 1401 /* Parse atom */ 1402 if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) { 1403 if (!ucl_lex_number (parser, chunk, obj)) { 1404 if (parser->state == UCL_STATE_ERROR) { 1405 return false; 1406 } 1407 } 1408 else { 1409 parser->state = UCL_STATE_AFTER_VALUE; 1410 return true; 1411 } 1412 /* Fallback to normal string */ 1413 } 1414 1415 if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) { 1416 return false; 1417 } 1418 /* Cut trailing spaces */ 1419 stripped_spaces = 0; 1420 while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces), 1421 UCL_CHARACTER_WHITESPACE)) { 1422 stripped_spaces ++; 1423 } 1424 str_len = chunk->pos - c - stripped_spaces; 1425 if (str_len <= 0) { 1426 ucl_set_err (chunk, 0, "string value must not be empty", &parser->err); 1427 return false; 1428 } 1429 else if (str_len == 4 && memcmp (c, "null", 4) == 0) { 1430 obj->len = 0; 1431 obj->type = UCL_NULL; 1432 } 1433 else if (!ucl_maybe_parse_boolean (obj, c, str_len)) { 1434 obj->type = UCL_STRING; 1435 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE], 1436 &obj->value.sv, str_len, need_unescape, 1437 false, var_expand)) == -1) { 1438 return false; 1439 } 1440 obj->len = str_len; 1441 } 1442 parser->state = UCL_STATE_AFTER_VALUE; 1443 p = chunk->pos; 1444 1445 return true; 1446 break; 1447 } 1448 } 1449 1450 return true; 1451 } 1452 1453 /** 1454 * Handle after value data 1455 * @param parser 1456 * @param chunk 1457 * @return 1458 */ 1459 static bool 1460 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk) 1461 { 1462 const unsigned char *p; 1463 bool got_sep = false; 1464 struct ucl_stack *st; 1465 1466 p = chunk->pos; 1467 1468 while (p < chunk->end) { 1469 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1470 /* Skip whitespaces */ 1471 ucl_chunk_skipc (chunk, p); 1472 } 1473 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1474 /* Skip comment */ 1475 if (!ucl_skip_comments (parser)) { 1476 return false; 1477 } 1478 /* Treat comment as a separator */ 1479 got_sep = true; 1480 p = chunk->pos; 1481 } 1482 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) { 1483 if (*p == '}' || *p == ']') { 1484 if (parser->stack == NULL) { 1485 ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err); 1486 return false; 1487 } 1488 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) || 1489 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) { 1490 1491 /* Pop all nested objects from a stack */ 1492 st = parser->stack; 1493 parser->stack = st->next; 1494 UCL_FREE (sizeof (struct ucl_stack), st); 1495 1496 while (parser->stack != NULL) { 1497 st = parser->stack; 1498 if (st->next == NULL || st->next->level == st->level) { 1499 break; 1500 } 1501 parser->stack = st->next; 1502 UCL_FREE (sizeof (struct ucl_stack), st); 1503 } 1504 } 1505 else { 1506 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err); 1507 return false; 1508 } 1509 1510 if (parser->stack == NULL) { 1511 /* Ignore everything after a top object */ 1512 return true; 1513 } 1514 else { 1515 ucl_chunk_skipc (chunk, p); 1516 } 1517 got_sep = true; 1518 } 1519 else { 1520 /* Got a separator */ 1521 got_sep = true; 1522 ucl_chunk_skipc (chunk, p); 1523 } 1524 } 1525 else { 1526 /* Anything else */ 1527 if (!got_sep) { 1528 ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err); 1529 return false; 1530 } 1531 return true; 1532 } 1533 } 1534 1535 return true; 1536 } 1537 1538 /** 1539 * Handle macro data 1540 * @param parser 1541 * @param chunk 1542 * @return 1543 */ 1544 static bool 1545 ucl_parse_macro_value (struct ucl_parser *parser, 1546 struct ucl_chunk *chunk, struct ucl_macro *macro, 1547 unsigned char const **macro_start, size_t *macro_len) 1548 { 1549 const unsigned char *p, *c; 1550 bool need_unescape = false, ucl_escape = false, var_expand = false; 1551 1552 p = chunk->pos; 1553 1554 switch (*p) { 1555 case '"': 1556 /* We have macro value encoded in quotes */ 1557 c = p; 1558 ucl_chunk_skipc (chunk, p); 1559 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1560 return false; 1561 } 1562 1563 *macro_start = c + 1; 1564 *macro_len = chunk->pos - c - 2; 1565 p = chunk->pos; 1566 break; 1567 case '{': 1568 /* We got a multiline macro body */ 1569 ucl_chunk_skipc (chunk, p); 1570 /* Skip spaces at the beginning */ 1571 while (p < chunk->end) { 1572 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1573 ucl_chunk_skipc (chunk, p); 1574 } 1575 else { 1576 break; 1577 } 1578 } 1579 c = p; 1580 while (p < chunk->end) { 1581 if (*p == '}') { 1582 break; 1583 } 1584 ucl_chunk_skipc (chunk, p); 1585 } 1586 *macro_start = c; 1587 *macro_len = p - c; 1588 ucl_chunk_skipc (chunk, p); 1589 break; 1590 default: 1591 /* Macro is not enclosed in quotes or braces */ 1592 c = p; 1593 while (p < chunk->end) { 1594 if (ucl_lex_is_atom_end (*p)) { 1595 break; 1596 } 1597 ucl_chunk_skipc (chunk, p); 1598 } 1599 *macro_start = c; 1600 *macro_len = p - c; 1601 break; 1602 } 1603 1604 /* We are at the end of a macro */ 1605 /* Skip ';' and space characters and return to previous state */ 1606 while (p < chunk->end) { 1607 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') { 1608 break; 1609 } 1610 ucl_chunk_skipc (chunk, p); 1611 } 1612 return true; 1613 } 1614 1615 /** 1616 * Handle the main states of rcl parser 1617 * @param parser parser structure 1618 * @param data the pointer to the beginning of a chunk 1619 * @param len the length of a chunk 1620 * @return true if chunk has been parsed and false in case of error 1621 */ 1622 static bool 1623 ucl_state_machine (struct ucl_parser *parser) 1624 { 1625 ucl_object_t *obj; 1626 struct ucl_chunk *chunk = parser->chunks; 1627 const unsigned char *p, *c = NULL, *macro_start = NULL; 1628 unsigned char *macro_escaped; 1629 size_t macro_len = 0; 1630 struct ucl_macro *macro = NULL; 1631 bool next_key = false, end_of_object = false; 1632 1633 if (parser->top_obj == NULL) { 1634 if (*chunk->pos == '[') { 1635 obj = ucl_add_parser_stack (NULL, parser, true, 0); 1636 } 1637 else { 1638 obj = ucl_add_parser_stack (NULL, parser, false, 0); 1639 } 1640 if (obj == NULL) { 1641 return false; 1642 } 1643 parser->top_obj = obj; 1644 parser->cur_obj = obj; 1645 parser->state = UCL_STATE_INIT; 1646 } 1647 1648 p = chunk->pos; 1649 while (chunk->pos < chunk->end) { 1650 switch (parser->state) { 1651 case UCL_STATE_INIT: 1652 /* 1653 * At the init state we can either go to the parse array or object 1654 * if we got [ or { correspondingly or can just treat new data as 1655 * a key of newly created object 1656 */ 1657 obj = parser->cur_obj; 1658 if (!ucl_skip_comments (parser)) { 1659 parser->prev_state = parser->state; 1660 parser->state = UCL_STATE_ERROR; 1661 return false; 1662 } 1663 else { 1664 /* Skip any spaces */ 1665 while (p < chunk->end && ucl_test_character (*p, 1666 UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1667 ucl_chunk_skipc (chunk, p); 1668 } 1669 p = chunk->pos; 1670 if (*p == '[') { 1671 parser->state = UCL_STATE_VALUE; 1672 ucl_chunk_skipc (chunk, p); 1673 } 1674 else { 1675 parser->state = UCL_STATE_KEY; 1676 if (*p == '{') { 1677 ucl_chunk_skipc (chunk, p); 1678 } 1679 } 1680 } 1681 break; 1682 case UCL_STATE_KEY: 1683 /* Skip any spaces */ 1684 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1685 ucl_chunk_skipc (chunk, p); 1686 } 1687 if (*p == '}') { 1688 /* We have the end of an object */ 1689 parser->state = UCL_STATE_AFTER_VALUE; 1690 continue; 1691 } 1692 if (parser->stack == NULL) { 1693 /* No objects are on stack, but we want to parse a key */ 1694 ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser " 1695 "expects a key", &parser->err); 1696 parser->prev_state = parser->state; 1697 parser->state = UCL_STATE_ERROR; 1698 return false; 1699 } 1700 if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) { 1701 parser->prev_state = parser->state; 1702 parser->state = UCL_STATE_ERROR; 1703 return false; 1704 } 1705 if (end_of_object) { 1706 p = chunk->pos; 1707 parser->state = UCL_STATE_AFTER_VALUE; 1708 continue; 1709 } 1710 else if (parser->state != UCL_STATE_MACRO_NAME) { 1711 if (next_key && parser->stack->obj->type == UCL_OBJECT) { 1712 /* Parse more keys and nest objects accordingly */ 1713 obj = ucl_add_parser_stack (parser->cur_obj, parser, false, 1714 parser->stack->level + 1); 1715 if (obj == NULL) { 1716 return false; 1717 } 1718 } 1719 else { 1720 parser->state = UCL_STATE_VALUE; 1721 } 1722 } 1723 else { 1724 c = chunk->pos; 1725 } 1726 p = chunk->pos; 1727 break; 1728 case UCL_STATE_VALUE: 1729 /* We need to check what we do have */ 1730 if (!ucl_parse_value (parser, chunk)) { 1731 parser->prev_state = parser->state; 1732 parser->state = UCL_STATE_ERROR; 1733 return false; 1734 } 1735 /* State is set in ucl_parse_value call */ 1736 p = chunk->pos; 1737 break; 1738 case UCL_STATE_AFTER_VALUE: 1739 if (!ucl_parse_after_value (parser, chunk)) { 1740 parser->prev_state = parser->state; 1741 parser->state = UCL_STATE_ERROR; 1742 return false; 1743 } 1744 if (parser->stack != NULL) { 1745 if (parser->stack->obj->type == UCL_OBJECT) { 1746 parser->state = UCL_STATE_KEY; 1747 } 1748 else { 1749 /* Array */ 1750 parser->state = UCL_STATE_VALUE; 1751 } 1752 } 1753 else { 1754 /* Skip everything at the end */ 1755 return true; 1756 } 1757 p = chunk->pos; 1758 break; 1759 case UCL_STATE_MACRO_NAME: 1760 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1761 ucl_chunk_skipc (chunk, p); 1762 } 1763 else if (p - c > 0) { 1764 /* We got macro name */ 1765 macro_len = (size_t)(p - c); 1766 HASH_FIND (hh, parser->macroes, c, macro_len, macro); 1767 if (macro == NULL) { 1768 ucl_create_err (&parser->err, "error on line %d at column %d: " 1769 "unknown macro: '%.*s', character: '%c'", 1770 chunk->line, chunk->column, (int)(p - c), c, *chunk->pos); 1771 parser->state = UCL_STATE_ERROR; 1772 return false; 1773 } 1774 /* Now we need to skip all spaces */ 1775 while (p < chunk->end) { 1776 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1777 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1778 /* Skip comment */ 1779 if (!ucl_skip_comments (parser)) { 1780 return false; 1781 } 1782 p = chunk->pos; 1783 } 1784 break; 1785 } 1786 ucl_chunk_skipc (chunk, p); 1787 } 1788 parser->state = UCL_STATE_MACRO; 1789 } 1790 break; 1791 case UCL_STATE_MACRO: 1792 if (!ucl_parse_macro_value (parser, chunk, macro, 1793 ¯o_start, ¯o_len)) { 1794 parser->prev_state = parser->state; 1795 parser->state = UCL_STATE_ERROR; 1796 return false; 1797 } 1798 macro_len = ucl_expand_variable (parser, ¯o_escaped, macro_start, macro_len); 1799 parser->state = parser->prev_state; 1800 if (macro_escaped == NULL) { 1801 if (!macro->handler (macro_start, macro_len, macro->ud)) { 1802 return false; 1803 } 1804 } 1805 else { 1806 if (!macro->handler (macro_escaped, macro_len, macro->ud)) { 1807 UCL_FREE (macro_len + 1, macro_escaped); 1808 return false; 1809 } 1810 UCL_FREE (macro_len + 1, macro_escaped); 1811 } 1812 p = chunk->pos; 1813 break; 1814 default: 1815 /* TODO: add all states */ 1816 ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err); 1817 parser->state = UCL_STATE_ERROR; 1818 return false; 1819 } 1820 } 1821 1822 return true; 1823 } 1824 1825 struct ucl_parser* 1826 ucl_parser_new (int flags) 1827 { 1828 struct ucl_parser *new; 1829 1830 new = UCL_ALLOC (sizeof (struct ucl_parser)); 1831 if (new == NULL) { 1832 return NULL; 1833 } 1834 memset (new, 0, sizeof (struct ucl_parser)); 1835 1836 ucl_parser_register_macro (new, "include", ucl_include_handler, new); 1837 ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new); 1838 ucl_parser_register_macro (new, "includes", ucl_includes_handler, new); 1839 1840 new->flags = flags; 1841 1842 /* Initial assumption about filevars */ 1843 ucl_parser_set_filevars (new, NULL, false); 1844 1845 return new; 1846 } 1847 1848 1849 void 1850 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro, 1851 ucl_macro_handler handler, void* ud) 1852 { 1853 struct ucl_macro *new; 1854 1855 if (macro == NULL || handler == NULL) { 1856 return; 1857 } 1858 new = UCL_ALLOC (sizeof (struct ucl_macro)); 1859 if (new == NULL) { 1860 return; 1861 } 1862 memset (new, 0, sizeof (struct ucl_macro)); 1863 new->handler = handler; 1864 new->name = strdup (macro); 1865 new->ud = ud; 1866 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new); 1867 } 1868 1869 void 1870 ucl_parser_register_variable (struct ucl_parser *parser, const char *var, 1871 const char *value) 1872 { 1873 struct ucl_variable *new = NULL, *cur; 1874 1875 if (var == NULL) { 1876 return; 1877 } 1878 1879 /* Find whether a variable already exists */ 1880 LL_FOREACH (parser->variables, cur) { 1881 if (strcmp (cur->var, var) == 0) { 1882 new = cur; 1883 break; 1884 } 1885 } 1886 1887 if (value == NULL) { 1888 1889 if (new != NULL) { 1890 /* Remove variable */ 1891 LL_DELETE (parser->variables, new); 1892 free (new->var); 1893 free (new->value); 1894 UCL_FREE (sizeof (struct ucl_variable), new); 1895 } 1896 else { 1897 /* Do nothing */ 1898 return; 1899 } 1900 } 1901 else { 1902 if (new == NULL) { 1903 new = UCL_ALLOC (sizeof (struct ucl_variable)); 1904 if (new == NULL) { 1905 return; 1906 } 1907 memset (new, 0, sizeof (struct ucl_variable)); 1908 new->var = strdup (var); 1909 new->var_len = strlen (var); 1910 new->value = strdup (value); 1911 new->value_len = strlen (value); 1912 1913 LL_PREPEND (parser->variables, new); 1914 } 1915 else { 1916 free (new->value); 1917 new->value = strdup (value); 1918 new->value_len = strlen (value); 1919 } 1920 } 1921 } 1922 1923 void 1924 ucl_parser_set_variables_handler (struct ucl_parser *parser, 1925 ucl_variable_handler handler, void *ud) 1926 { 1927 parser->var_handler = handler; 1928 parser->var_data = ud; 1929 } 1930 1931 bool 1932 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data, 1933 size_t len) 1934 { 1935 struct ucl_chunk *chunk; 1936 1937 if (data == NULL || len == 0) { 1938 ucl_create_err (&parser->err, "invalid chunk added"); 1939 return false; 1940 } 1941 if (parser->state != UCL_STATE_ERROR) { 1942 chunk = UCL_ALLOC (sizeof (struct ucl_chunk)); 1943 if (chunk == NULL) { 1944 ucl_create_err (&parser->err, "cannot allocate chunk structure"); 1945 return false; 1946 } 1947 chunk->begin = data; 1948 chunk->remain = len; 1949 chunk->pos = chunk->begin; 1950 chunk->end = chunk->begin + len; 1951 chunk->line = 1; 1952 chunk->column = 0; 1953 LL_PREPEND (parser->chunks, chunk); 1954 parser->recursion ++; 1955 if (parser->recursion > UCL_MAX_RECURSION) { 1956 ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d", 1957 parser->recursion); 1958 return false; 1959 } 1960 return ucl_state_machine (parser); 1961 } 1962 1963 ucl_create_err (&parser->err, "a parser is in an invalid state"); 1964 1965 return false; 1966 } 1967 1968 bool 1969 ucl_parser_add_string (struct ucl_parser *parser, const char *data, 1970 size_t len) 1971 { 1972 if (data == NULL) { 1973 ucl_create_err (&parser->err, "invalid string added"); 1974 return false; 1975 } 1976 if (len == 0) { 1977 len = strlen (data); 1978 } 1979 1980 return ucl_parser_add_chunk (parser, (const unsigned char *)data, len); 1981 } 1982