1 /* Copyright (c) 2013, Vsevolod Stakhov 2 * All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * * Redistributions of source code must retain the above copyright 7 * notice, this list of conditions and the following disclaimer. 8 * * Redistributions in binary form must reproduce the above copyright 9 * notice, this list of conditions and the following disclaimer in the 10 * documentation and/or other materials provided with the distribution. 11 * 12 * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY 13 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 14 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 15 * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY 16 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 17 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 18 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 19 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 20 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 21 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 22 */ 23 24 #include "ucl.h" 25 #include "ucl_internal.h" 26 #include "ucl_chartable.h" 27 28 /** 29 * @file rcl_parser.c 30 * The implementation of rcl parser 31 */ 32 33 struct ucl_parser_saved_state { 34 unsigned int line; 35 unsigned int column; 36 size_t remain; 37 const unsigned char *pos; 38 }; 39 40 /** 41 * Move up to len characters 42 * @param parser 43 * @param begin 44 * @param len 45 * @return new position in chunk 46 */ 47 #define ucl_chunk_skipc(chunk, p) do{ \ 48 if (*(p) == '\n') { \ 49 (chunk)->line ++; \ 50 (chunk)->column = 0; \ 51 } \ 52 else (chunk)->column ++; \ 53 (p++); \ 54 (chunk)->pos ++; \ 55 (chunk)->remain --; \ 56 } while (0) 57 58 /** 59 * Save parser state 60 * @param chunk 61 * @param s 62 */ 63 static inline void 64 ucl_chunk_save_state (struct ucl_chunk *chunk, struct ucl_parser_saved_state *s) 65 { 66 s->column = chunk->column; 67 s->pos = chunk->pos; 68 s->line = chunk->line; 69 s->remain = chunk->remain; 70 } 71 72 /** 73 * Restore parser state 74 * @param chunk 75 * @param s 76 */ 77 static inline void 78 ucl_chunk_restore_state (struct ucl_chunk *chunk, struct ucl_parser_saved_state *s) 79 { 80 chunk->column = s->column; 81 chunk->pos = s->pos; 82 chunk->line = s->line; 83 chunk->remain = s->remain; 84 } 85 86 static inline void 87 ucl_set_err (struct ucl_chunk *chunk, int code, const char *str, UT_string **err) 88 { 89 if (chunk->pos < chunk->end) { 90 if (isgraph (*chunk->pos)) { 91 ucl_create_err (err, "error on line %d at column %d: '%s', character: '%c'", 92 chunk->line, chunk->column, str, *chunk->pos); 93 } 94 else { 95 ucl_create_err (err, "error on line %d at column %d: '%s', character: '0x%02x'", 96 chunk->line, chunk->column, str, (int)*chunk->pos); 97 } 98 } 99 else { 100 ucl_create_err (err, "error at the end of chunk: %s", str); 101 } 102 } 103 104 /** 105 * Skip all comments from the current pos resolving nested and multiline comments 106 * @param parser 107 * @return 108 */ 109 static bool 110 ucl_skip_comments (struct ucl_parser *parser) 111 { 112 struct ucl_chunk *chunk = parser->chunks; 113 const unsigned char *p; 114 int comments_nested = 0; 115 116 p = chunk->pos; 117 118 start: 119 if (*p == '#') { 120 if (parser->state != UCL_STATE_SCOMMENT && 121 parser->state != UCL_STATE_MCOMMENT) { 122 while (p < chunk->end) { 123 if (*p == '\n') { 124 ucl_chunk_skipc (chunk, p); 125 goto start; 126 } 127 ucl_chunk_skipc (chunk, p); 128 } 129 } 130 } 131 else if (*p == '/' && chunk->remain >= 2) { 132 if (p[1] == '*') { 133 ucl_chunk_skipc (chunk, p); 134 comments_nested ++; 135 ucl_chunk_skipc (chunk, p); 136 137 while (p < chunk->end) { 138 if (*p == '*') { 139 ucl_chunk_skipc (chunk, p); 140 if (*p == '/') { 141 comments_nested --; 142 if (comments_nested == 0) { 143 ucl_chunk_skipc (chunk, p); 144 goto start; 145 } 146 } 147 ucl_chunk_skipc (chunk, p); 148 } 149 else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') { 150 comments_nested ++; 151 ucl_chunk_skipc (chunk, p); 152 ucl_chunk_skipc (chunk, p); 153 continue; 154 } 155 ucl_chunk_skipc (chunk, p); 156 } 157 if (comments_nested != 0) { 158 ucl_set_err (chunk, UCL_ENESTED, "unfinished multiline comment", &parser->err); 159 return false; 160 } 161 } 162 } 163 164 return true; 165 } 166 167 /** 168 * Return multiplier for a character 169 * @param c multiplier character 170 * @param is_bytes if true use 1024 multiplier 171 * @return multiplier 172 */ 173 static inline unsigned long 174 ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) { 175 const struct { 176 char c; 177 long mult_normal; 178 long mult_bytes; 179 } multipliers[] = { 180 {'m', 1000 * 1000, 1024 * 1024}, 181 {'k', 1000, 1024}, 182 {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024} 183 }; 184 int i; 185 186 for (i = 0; i < 3; i ++) { 187 if (tolower (c) == multipliers[i].c) { 188 if (is_bytes) { 189 return multipliers[i].mult_bytes; 190 } 191 return multipliers[i].mult_normal; 192 } 193 } 194 195 return 1; 196 } 197 198 199 /** 200 * Return multiplier for time scaling 201 * @param c 202 * @return 203 */ 204 static inline double 205 ucl_lex_time_multiplier (const unsigned char c) { 206 const struct { 207 char c; 208 double mult; 209 } multipliers[] = { 210 {'m', 60}, 211 {'h', 60 * 60}, 212 {'d', 60 * 60 * 24}, 213 {'w', 60 * 60 * 24 * 7}, 214 {'y', 60 * 60 * 24 * 7 * 365} 215 }; 216 int i; 217 218 for (i = 0; i < 5; i ++) { 219 if (tolower (c) == multipliers[i].c) { 220 return multipliers[i].mult; 221 } 222 } 223 224 return 1; 225 } 226 227 /** 228 * Return true if a character is a end of an atom 229 * @param c 230 * @return 231 */ 232 static inline bool 233 ucl_lex_is_atom_end (const unsigned char c) 234 { 235 return ucl_test_character (c, UCL_CHARACTER_VALUE_END); 236 } 237 238 static inline bool 239 ucl_lex_is_comment (const unsigned char c1, const unsigned char c2) 240 { 241 if (c1 == '/') { 242 if (c2 == '*') { 243 return true; 244 } 245 } 246 else if (c1 == '#') { 247 return true; 248 } 249 return false; 250 } 251 252 /** 253 * Check variable found 254 * @param parser 255 * @param ptr 256 * @param remain 257 * @param out_len 258 * @param strict 259 * @param found 260 * @return 261 */ 262 static inline const char * 263 ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain, 264 size_t *out_len, bool strict, bool *found) 265 { 266 struct ucl_variable *var; 267 268 LL_FOREACH (parser->variables, var) { 269 if (strict) { 270 if (remain == var->var_len) { 271 if (memcmp (ptr, var->var, var->var_len) == 0) { 272 *out_len += var->value_len; 273 *found = true; 274 return (ptr + var->var_len); 275 } 276 } 277 } 278 else { 279 if (remain >= var->var_len) { 280 if (memcmp (ptr, var->var, var->var_len) == 0) { 281 *out_len += var->value_len; 282 *found = true; 283 return (ptr + var->var_len); 284 } 285 } 286 } 287 } 288 289 return ptr; 290 } 291 292 /** 293 * Check for a variable in a given string 294 * @param parser 295 * @param ptr 296 * @param remain 297 * @param out_len 298 * @param vars_found 299 * @return 300 */ 301 static const char * 302 ucl_check_variable (struct ucl_parser *parser, const char *ptr, size_t remain, size_t *out_len, bool *vars_found) 303 { 304 const char *p, *end, *ret = ptr; 305 bool found = false; 306 307 if (*ptr == '{') { 308 /* We need to match the variable enclosed in braces */ 309 p = ptr + 1; 310 end = ptr + remain; 311 while (p < end) { 312 if (*p == '}') { 313 ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, out_len, true, &found); 314 if (found) { 315 /* {} must be excluded actually */ 316 ret ++; 317 if (!*vars_found) { 318 *vars_found = true; 319 } 320 } 321 else { 322 *out_len += 2; 323 } 324 break; 325 } 326 p ++; 327 } 328 } 329 else if (*ptr != '$') { 330 /* Not count escaped dollar sign */ 331 ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found); 332 if (found && !*vars_found) { 333 *vars_found = true; 334 } 335 if (!found) { 336 (*out_len) ++; 337 } 338 } 339 else { 340 ret ++; 341 (*out_len) ++; 342 } 343 344 return ret; 345 } 346 347 /** 348 * Expand a single variable 349 * @param parser 350 * @param ptr 351 * @param remain 352 * @param dest 353 * @return 354 */ 355 static const char * 356 ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr, 357 size_t remain, unsigned char **dest) 358 { 359 unsigned char *d = *dest; 360 const char *p = ptr + 1, *ret; 361 struct ucl_variable *var; 362 bool found = false; 363 364 ret = ptr + 1; 365 remain --; 366 367 if (*p == '$') { 368 *d++ = *p++; 369 *dest = d; 370 return p; 371 } 372 else if (*p == '{') { 373 p ++; 374 ret += 2; 375 remain -= 2; 376 } 377 378 LL_FOREACH (parser->variables, var) { 379 if (remain >= var->var_len) { 380 if (memcmp (p, var->var, var->var_len) == 0) { 381 memcpy (d, var->value, var->value_len); 382 ret += var->var_len; 383 d += var->value_len; 384 found = true; 385 break; 386 } 387 } 388 } 389 if (!found) { 390 memcpy (d, ptr, 2); 391 d += 2; 392 ret --; 393 } 394 395 *dest = d; 396 return ret; 397 } 398 399 /** 400 * Expand variables in string 401 * @param parser 402 * @param dst 403 * @param src 404 * @param in_len 405 * @return 406 */ 407 static ssize_t 408 ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst, 409 const char *src, size_t in_len) 410 { 411 const char *p, *end = src + in_len; 412 unsigned char *d; 413 size_t out_len = 0; 414 bool vars_found = false; 415 416 p = src; 417 while (p != end) { 418 if (*p == '$') { 419 p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found); 420 } 421 else { 422 p ++; 423 out_len ++; 424 } 425 } 426 427 if (!vars_found) { 428 /* Trivial case */ 429 *dst = NULL; 430 return in_len; 431 } 432 433 *dst = UCL_ALLOC (out_len + 1); 434 if (*dst == NULL) { 435 return in_len; 436 } 437 438 d = *dst; 439 p = src; 440 while (p != end) { 441 if (*p == '$') { 442 p = ucl_expand_single_variable (parser, p, end - p, &d); 443 } 444 else { 445 *d++ = *p++; 446 } 447 } 448 449 *d = '\0'; 450 451 return out_len; 452 } 453 454 /** 455 * Store or copy pointer to the trash stack 456 * @param parser parser object 457 * @param src src string 458 * @param dst destination buffer (trash stack pointer) 459 * @param dst_const const destination pointer (e.g. value of object) 460 * @param in_len input length 461 * @param need_unescape need to unescape source (and copy it) 462 * @param need_lowercase need to lowercase value (and copy) 463 * @param need_expand need to expand variables (and copy as well) 464 * @return output length (excluding \0 symbol) 465 */ 466 static inline ssize_t 467 ucl_copy_or_store_ptr (struct ucl_parser *parser, 468 const unsigned char *src, unsigned char **dst, 469 const char **dst_const, size_t in_len, 470 bool need_unescape, bool need_lowercase, bool need_expand) 471 { 472 ssize_t ret = -1, tret; 473 unsigned char *tmp; 474 475 if (need_unescape || need_lowercase || 476 (need_expand && parser->variables != NULL) || 477 !(parser->flags & UCL_PARSER_ZEROCOPY)) { 478 /* Copy string */ 479 *dst = UCL_ALLOC (in_len + 1); 480 if (*dst == NULL) { 481 ucl_set_err (parser->chunks, 0, "cannot allocate memory for a string", &parser->err); 482 return false; 483 } 484 if (need_lowercase) { 485 ret = ucl_strlcpy_tolower (*dst, src, in_len + 1); 486 } 487 else { 488 ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1); 489 } 490 491 if (need_unescape) { 492 ret = ucl_unescape_json_string (*dst, ret); 493 } 494 if (need_expand) { 495 tmp = *dst; 496 tret = ret; 497 ret = ucl_expand_variable (parser, dst, tmp, ret); 498 if (*dst == NULL) { 499 /* Nothing to expand */ 500 *dst = tmp; 501 ret = tret; 502 } 503 } 504 *dst_const = *dst; 505 } 506 else { 507 *dst_const = src; 508 ret = in_len; 509 } 510 511 return ret; 512 } 513 514 /** 515 * Create and append an object at the specified level 516 * @param parser 517 * @param is_array 518 * @param level 519 * @return 520 */ 521 static inline ucl_object_t * 522 ucl_add_parser_stack (ucl_object_t *obj, struct ucl_parser *parser, bool is_array, int level) 523 { 524 struct ucl_stack *st; 525 526 if (!is_array) { 527 if (obj == NULL) { 528 obj = ucl_object_typed_new (UCL_OBJECT); 529 } 530 else { 531 obj->type = UCL_OBJECT; 532 } 533 obj->value.ov = ucl_hash_create (); 534 parser->state = UCL_STATE_KEY; 535 } 536 else { 537 if (obj == NULL) { 538 obj = ucl_object_typed_new (UCL_ARRAY); 539 } 540 else { 541 obj->type = UCL_ARRAY; 542 } 543 parser->state = UCL_STATE_VALUE; 544 } 545 546 st = UCL_ALLOC (sizeof (struct ucl_stack)); 547 st->obj = obj; 548 st->level = level; 549 LL_PREPEND (parser->stack, st); 550 parser->cur_obj = obj; 551 552 return obj; 553 } 554 555 int 556 ucl_maybe_parse_number (ucl_object_t *obj, 557 const char *start, const char *end, const char **pos, bool allow_double, bool number_bytes) 558 { 559 const char *p = start, *c = start; 560 char *endptr; 561 bool got_dot = false, got_exp = false, need_double = false, 562 is_date = false, valid_start = false, is_hex = false, 563 is_neg = false; 564 double dv = 0; 565 int64_t lv = 0; 566 567 if (*p == '-') { 568 is_neg = true; 569 c ++; 570 p ++; 571 } 572 while (p < end) { 573 if (is_hex && isxdigit (*p)) { 574 p ++; 575 } 576 else if (isdigit (*p)) { 577 valid_start = true; 578 p ++; 579 } 580 else if (!is_hex && (*p == 'x' || *p == 'X')) { 581 is_hex = true; 582 allow_double = false; 583 c = p + 1; 584 } 585 else if (allow_double) { 586 if (p == c) { 587 /* Empty digits sequence, not a number */ 588 *pos = start; 589 return EINVAL; 590 } 591 else if (*p == '.') { 592 if (got_dot) { 593 /* Double dots, not a number */ 594 *pos = start; 595 return EINVAL; 596 } 597 else { 598 got_dot = true; 599 need_double = true; 600 p ++; 601 } 602 } 603 else if (*p == 'e' || *p == 'E') { 604 if (got_exp) { 605 /* Double exp, not a number */ 606 *pos = start; 607 return EINVAL; 608 } 609 else { 610 got_exp = true; 611 need_double = true; 612 p ++; 613 if (p >= end) { 614 *pos = start; 615 return EINVAL; 616 } 617 if (!isdigit (*p) && *p != '+' && *p != '-') { 618 /* Wrong exponent sign */ 619 *pos = start; 620 return EINVAL; 621 } 622 else { 623 p ++; 624 } 625 } 626 } 627 else { 628 /* Got the end of the number, need to check */ 629 break; 630 } 631 } 632 else { 633 break; 634 } 635 } 636 637 if (!valid_start) { 638 *pos = start; 639 return EINVAL; 640 } 641 642 errno = 0; 643 if (need_double) { 644 dv = strtod (c, &endptr); 645 } 646 else { 647 if (is_hex) { 648 lv = strtoimax (c, &endptr, 16); 649 } 650 else { 651 lv = strtoimax (c, &endptr, 10); 652 } 653 } 654 if (errno == ERANGE) { 655 *pos = start; 656 return ERANGE; 657 } 658 659 /* Now check endptr */ 660 if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0') { 661 p = endptr; 662 goto set_obj; 663 } 664 665 if (endptr < end && endptr != start) { 666 p = endptr; 667 switch (*p) { 668 case 'm': 669 case 'M': 670 case 'g': 671 case 'G': 672 case 'k': 673 case 'K': 674 if (end - p >= 2) { 675 if (p[1] == 's' || p[1] == 'S') { 676 /* Milliseconds */ 677 if (!need_double) { 678 need_double = true; 679 dv = lv; 680 } 681 is_date = true; 682 if (p[0] == 'm' || p[0] == 'M') { 683 dv /= 1000.; 684 } 685 else { 686 dv *= ucl_lex_num_multiplier (*p, false); 687 } 688 p += 2; 689 goto set_obj; 690 } 691 else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) { 692 /* Bytes */ 693 if (need_double) { 694 need_double = false; 695 lv = dv; 696 } 697 lv *= ucl_lex_num_multiplier (*p, true); 698 p += 2; 699 goto set_obj; 700 } 701 else if (ucl_lex_is_atom_end (p[1])) { 702 if (need_double) { 703 dv *= ucl_lex_num_multiplier (*p, false); 704 } 705 else { 706 lv *= ucl_lex_num_multiplier (*p, number_bytes); 707 } 708 p ++; 709 goto set_obj; 710 } 711 else if (end - p >= 3) { 712 if (tolower (p[0]) == 'm' && 713 tolower (p[1]) == 'i' && 714 tolower (p[2]) == 'n') { 715 /* Minutes */ 716 if (!need_double) { 717 need_double = true; 718 dv = lv; 719 } 720 is_date = true; 721 dv *= 60.; 722 p += 3; 723 goto set_obj; 724 } 725 } 726 } 727 else { 728 if (need_double) { 729 dv *= ucl_lex_num_multiplier (*p, false); 730 } 731 else { 732 lv *= ucl_lex_num_multiplier (*p, number_bytes); 733 } 734 p ++; 735 goto set_obj; 736 } 737 break; 738 case 'S': 739 case 's': 740 if (p == end - 1 || ucl_lex_is_atom_end (p[1])) { 741 if (!need_double) { 742 need_double = true; 743 dv = lv; 744 } 745 p ++; 746 is_date = true; 747 goto set_obj; 748 } 749 break; 750 case 'h': 751 case 'H': 752 case 'd': 753 case 'D': 754 case 'w': 755 case 'W': 756 case 'Y': 757 case 'y': 758 if (p == end - 1 || ucl_lex_is_atom_end (p[1])) { 759 if (!need_double) { 760 need_double = true; 761 dv = lv; 762 } 763 is_date = true; 764 dv *= ucl_lex_time_multiplier (*p); 765 p ++; 766 goto set_obj; 767 } 768 break; 769 } 770 } 771 772 *pos = c; 773 return EINVAL; 774 775 set_obj: 776 if (allow_double && (need_double || is_date)) { 777 if (!is_date) { 778 obj->type = UCL_FLOAT; 779 } 780 else { 781 obj->type = UCL_TIME; 782 } 783 obj->value.dv = is_neg ? (-dv) : dv; 784 } 785 else { 786 obj->type = UCL_INT; 787 obj->value.iv = is_neg ? (-lv) : lv; 788 } 789 *pos = p; 790 return 0; 791 } 792 793 /** 794 * Parse possible number 795 * @param parser 796 * @param chunk 797 * @return true if a number has been parsed 798 */ 799 static bool 800 ucl_lex_number (struct ucl_parser *parser, 801 struct ucl_chunk *chunk, ucl_object_t *obj) 802 { 803 const unsigned char *pos; 804 int ret; 805 806 ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos, true, false); 807 808 if (ret == 0) { 809 chunk->remain -= pos - chunk->pos; 810 chunk->column += pos - chunk->pos; 811 chunk->pos = pos; 812 return true; 813 } 814 else if (ret == ERANGE) { 815 ucl_set_err (chunk, ERANGE, "numeric value out of range", &parser->err); 816 } 817 818 return false; 819 } 820 821 /** 822 * Parse quoted string with possible escapes 823 * @param parser 824 * @param chunk 825 * @return true if a string has been parsed 826 */ 827 static bool 828 ucl_lex_json_string (struct ucl_parser *parser, 829 struct ucl_chunk *chunk, bool *need_unescape, bool *ucl_escape, bool *var_expand) 830 { 831 const unsigned char *p = chunk->pos; 832 unsigned char c; 833 int i; 834 835 while (p < chunk->end) { 836 c = *p; 837 if (c < 0x1F) { 838 /* Unmasked control character */ 839 if (c == '\n') { 840 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected newline", &parser->err); 841 } 842 else { 843 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected control character", &parser->err); 844 } 845 return false; 846 } 847 else if (c == '\\') { 848 ucl_chunk_skipc (chunk, p); 849 c = *p; 850 if (p >= chunk->end) { 851 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err); 852 return false; 853 } 854 else if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) { 855 if (c == 'u') { 856 ucl_chunk_skipc (chunk, p); 857 for (i = 0; i < 4 && p < chunk->end; i ++) { 858 if (!isxdigit (*p)) { 859 ucl_set_err (chunk, UCL_ESYNTAX, "invalid utf escape", &parser->err); 860 return false; 861 } 862 ucl_chunk_skipc (chunk, p); 863 } 864 if (p >= chunk->end) { 865 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished escape character", &parser->err); 866 return false; 867 } 868 } 869 else { 870 ucl_chunk_skipc (chunk, p); 871 } 872 } 873 *need_unescape = true; 874 *ucl_escape = true; 875 continue; 876 } 877 else if (c == '"') { 878 ucl_chunk_skipc (chunk, p); 879 return true; 880 } 881 else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) { 882 *ucl_escape = true; 883 } 884 else if (c == '$') { 885 *var_expand = true; 886 } 887 ucl_chunk_skipc (chunk, p); 888 } 889 890 ucl_set_err (chunk, UCL_ESYNTAX, "no quote at the end of json string", &parser->err); 891 return false; 892 } 893 894 /** 895 * Parse a key in an object 896 * @param parser 897 * @param chunk 898 * @return true if a key has been parsed 899 */ 900 static bool 901 ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, bool *next_key, bool *end_of_object) 902 { 903 const unsigned char *p, *c = NULL, *end, *t; 904 const char *key = NULL; 905 bool got_quote = false, got_eq = false, got_semicolon = false, 906 need_unescape = false, ucl_escape = false, var_expand = false, 907 got_content = false, got_sep = false; 908 ucl_object_t *nobj, *tobj; 909 ucl_hash_t *container; 910 ssize_t keylen; 911 912 p = chunk->pos; 913 914 if (*p == '.') { 915 /* It is macro actually */ 916 ucl_chunk_skipc (chunk, p); 917 parser->prev_state = parser->state; 918 parser->state = UCL_STATE_MACRO_NAME; 919 return true; 920 } 921 while (p < chunk->end) { 922 /* 923 * A key must start with alpha, number, '/' or '_' and end with space character 924 */ 925 if (c == NULL) { 926 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 927 if (!ucl_skip_comments (parser)) { 928 return false; 929 } 930 p = chunk->pos; 931 } 932 else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 933 ucl_chunk_skipc (chunk, p); 934 } 935 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) { 936 /* The first symbol */ 937 c = p; 938 ucl_chunk_skipc (chunk, p); 939 got_content = true; 940 } 941 else if (*p == '"') { 942 /* JSON style key */ 943 c = p + 1; 944 got_quote = true; 945 got_content = true; 946 ucl_chunk_skipc (chunk, p); 947 } 948 else if (*p == '}') { 949 /* We have actually end of an object */ 950 *end_of_object = true; 951 return true; 952 } 953 else if (*p == '.') { 954 ucl_chunk_skipc (chunk, p); 955 parser->prev_state = parser->state; 956 parser->state = UCL_STATE_MACRO_NAME; 957 return true; 958 } 959 else { 960 /* Invalid identifier */ 961 ucl_set_err (chunk, UCL_ESYNTAX, "key must begin with a letter", &parser->err); 962 return false; 963 } 964 } 965 else { 966 /* Parse the body of a key */ 967 if (!got_quote) { 968 if (ucl_test_character (*p, UCL_CHARACTER_KEY)) { 969 got_content = true; 970 ucl_chunk_skipc (chunk, p); 971 } 972 else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) { 973 end = p; 974 break; 975 } 976 else { 977 ucl_set_err (chunk, UCL_ESYNTAX, "invalid character in a key", &parser->err); 978 return false; 979 } 980 } 981 else { 982 /* We need to parse json like quoted string */ 983 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 984 return false; 985 } 986 /* Always escape keys obtained via json */ 987 end = chunk->pos - 1; 988 p = chunk->pos; 989 break; 990 } 991 } 992 } 993 994 if (p >= chunk->end && got_content) { 995 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err); 996 return false; 997 } 998 else if (!got_content) { 999 return true; 1000 } 1001 *end_of_object = false; 1002 /* We are now at the end of the key, need to parse the rest */ 1003 while (p < chunk->end) { 1004 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1005 ucl_chunk_skipc (chunk, p); 1006 } 1007 else if (*p == '=') { 1008 if (!got_eq && !got_semicolon) { 1009 ucl_chunk_skipc (chunk, p); 1010 got_eq = true; 1011 } 1012 else { 1013 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected '=' character", &parser->err); 1014 return false; 1015 } 1016 } 1017 else if (*p == ':') { 1018 if (!got_eq && !got_semicolon) { 1019 ucl_chunk_skipc (chunk, p); 1020 got_semicolon = true; 1021 } 1022 else { 1023 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected ':' character", &parser->err); 1024 return false; 1025 } 1026 } 1027 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1028 /* Check for comment */ 1029 if (!ucl_skip_comments (parser)) { 1030 return false; 1031 } 1032 p = chunk->pos; 1033 } 1034 else { 1035 /* Start value */ 1036 break; 1037 } 1038 } 1039 1040 if (p >= chunk->end && got_content) { 1041 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished key", &parser->err); 1042 return false; 1043 } 1044 1045 got_sep = got_semicolon || got_eq; 1046 1047 if (!got_sep) { 1048 /* 1049 * Maybe we have more keys nested, so search for termination character. 1050 * Possible choices: 1051 * 1) key1 key2 ... keyN [:=] value <- we treat that as error 1052 * 2) key1 ... keyN {} or [] <- we treat that as nested objects 1053 * 3) key1 value[;,\n] <- we treat that as linear object 1054 */ 1055 t = p; 1056 *next_key = false; 1057 while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) { 1058 t ++; 1059 } 1060 /* Check first non-space character after a key */ 1061 if (*t != '{' && *t != '[') { 1062 while (t < chunk->end) { 1063 if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') { 1064 break; 1065 } 1066 else if (*t == '{' || *t == '[') { 1067 *next_key = true; 1068 break; 1069 } 1070 t ++; 1071 } 1072 } 1073 } 1074 1075 /* Create a new object */ 1076 nobj = ucl_object_new (); 1077 keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY], 1078 &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, false); 1079 if (keylen == -1) { 1080 ucl_object_free(nobj); 1081 return false; 1082 } 1083 else if (keylen == 0) { 1084 ucl_set_err (chunk, UCL_ESYNTAX, "empty keys are not allowed", &parser->err); 1085 ucl_object_free(nobj); 1086 return false; 1087 } 1088 1089 container = parser->stack->obj->value.ov; 1090 nobj->key = key; 1091 nobj->keylen = keylen; 1092 tobj = ucl_hash_search_obj (container, nobj); 1093 if (tobj == NULL) { 1094 container = ucl_hash_insert_object (container, nobj); 1095 nobj->prev = nobj; 1096 nobj->next = NULL; 1097 parser->stack->obj->len ++; 1098 } 1099 else { 1100 DL_APPEND (tobj, nobj); 1101 } 1102 1103 if (ucl_escape) { 1104 nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE; 1105 } 1106 parser->stack->obj->value.ov = container; 1107 1108 parser->cur_obj = nobj; 1109 1110 return true; 1111 } 1112 1113 /** 1114 * Parse a cl string 1115 * @param parser 1116 * @param chunk 1117 * @return true if a key has been parsed 1118 */ 1119 static bool 1120 ucl_parse_string_value (struct ucl_parser *parser, 1121 struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape) 1122 { 1123 const unsigned char *p; 1124 enum { 1125 UCL_BRACE_ROUND = 0, 1126 UCL_BRACE_SQUARE, 1127 UCL_BRACE_FIGURE 1128 }; 1129 int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}}; 1130 1131 p = chunk->pos; 1132 1133 while (p < chunk->end) { 1134 1135 /* Skip pairs of figure braces */ 1136 if (*p == '{') { 1137 braces[UCL_BRACE_FIGURE][0] ++; 1138 } 1139 else if (*p == '}') { 1140 braces[UCL_BRACE_FIGURE][1] ++; 1141 if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) { 1142 /* This is not a termination symbol, continue */ 1143 ucl_chunk_skipc (chunk, p); 1144 continue; 1145 } 1146 } 1147 /* Skip pairs of square braces */ 1148 else if (*p == '[') { 1149 braces[UCL_BRACE_SQUARE][0] ++; 1150 } 1151 else if (*p == ']') { 1152 braces[UCL_BRACE_SQUARE][1] ++; 1153 if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) { 1154 /* This is not a termination symbol, continue */ 1155 ucl_chunk_skipc (chunk, p); 1156 continue; 1157 } 1158 } 1159 else if (*p == '$') { 1160 *var_expand = true; 1161 } 1162 else if (*p == '\\') { 1163 *need_unescape = true; 1164 ucl_chunk_skipc (chunk, p); 1165 if (p < chunk->end) { 1166 ucl_chunk_skipc (chunk, p); 1167 } 1168 continue; 1169 } 1170 1171 if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { 1172 break; 1173 } 1174 ucl_chunk_skipc (chunk, p); 1175 } 1176 1177 if (p >= chunk->end) { 1178 ucl_set_err (chunk, UCL_ESYNTAX, "unfinished value", &parser->err); 1179 return false; 1180 } 1181 1182 return true; 1183 } 1184 1185 /** 1186 * Parse multiline string ending with \n{term}\n 1187 * @param parser 1188 * @param chunk 1189 * @param term 1190 * @param term_len 1191 * @return size of multiline string or 0 in case of error 1192 */ 1193 static int 1194 ucl_parse_multiline_string (struct ucl_parser *parser, 1195 struct ucl_chunk *chunk, const unsigned char *term, 1196 int term_len, unsigned char const **beg, 1197 bool *var_expand) 1198 { 1199 const unsigned char *p, *c; 1200 bool newline = false; 1201 int len = 0; 1202 1203 p = chunk->pos; 1204 1205 c = p; 1206 1207 while (p < chunk->end) { 1208 if (newline) { 1209 if (chunk->end - p < term_len) { 1210 return 0; 1211 } 1212 else if (memcmp (p, term, term_len) == 0 && (p[term_len] == '\n' || p[term_len] == '\r')) { 1213 len = p - c; 1214 chunk->remain -= term_len; 1215 chunk->pos = p + term_len; 1216 chunk->column = term_len; 1217 *beg = c; 1218 break; 1219 } 1220 } 1221 if (*p == '\n') { 1222 newline = true; 1223 } 1224 else { 1225 if (*p == '$') { 1226 *var_expand = true; 1227 } 1228 newline = false; 1229 } 1230 ucl_chunk_skipc (chunk, p); 1231 } 1232 1233 return len; 1234 } 1235 1236 /** 1237 * Handle value data 1238 * @param parser 1239 * @param chunk 1240 * @return 1241 */ 1242 static bool 1243 ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) 1244 { 1245 const unsigned char *p, *c; 1246 ucl_object_t *obj = NULL, *t; 1247 unsigned int stripped_spaces; 1248 int str_len; 1249 bool need_unescape = false, ucl_escape = false, var_expand = false; 1250 1251 p = chunk->pos; 1252 1253 while (p < chunk->end) { 1254 if (obj == NULL) { 1255 if (parser->stack->obj->type == UCL_ARRAY) { 1256 /* Object must be allocated */ 1257 obj = ucl_object_new (); 1258 t = parser->stack->obj->value.av; 1259 DL_APPEND (t, obj); 1260 parser->cur_obj = obj; 1261 parser->stack->obj->value.av = t; 1262 parser->stack->obj->len ++; 1263 } 1264 else { 1265 /* Object has been already allocated */ 1266 obj = parser->cur_obj; 1267 } 1268 } 1269 c = p; 1270 switch (*p) { 1271 case '"': 1272 ucl_chunk_skipc (chunk, p); 1273 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1274 return false; 1275 } 1276 str_len = chunk->pos - c - 2; 1277 obj->type = UCL_STRING; 1278 if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, &obj->trash_stack[UCL_TRASH_VALUE], 1279 &obj->value.sv, str_len, need_unescape, false, var_expand)) == -1) { 1280 return false; 1281 } 1282 obj->len = str_len; 1283 parser->state = UCL_STATE_AFTER_VALUE; 1284 p = chunk->pos; 1285 return true; 1286 break; 1287 case '{': 1288 /* We have a new object */ 1289 obj = ucl_add_parser_stack (obj, parser, false, parser->stack->level); 1290 1291 ucl_chunk_skipc (chunk, p); 1292 return true; 1293 break; 1294 case '[': 1295 /* We have a new array */ 1296 obj = ucl_add_parser_stack (obj, parser, true, parser->stack->level); 1297 1298 ucl_chunk_skipc (chunk, p); 1299 return true; 1300 break; 1301 case '<': 1302 /* We have something like multiline value, which must be <<[A-Z]+\n */ 1303 if (chunk->end - p > 3) { 1304 if (memcmp (p, "<<", 2) == 0) { 1305 p += 2; 1306 /* We allow only uppercase characters in multiline definitions */ 1307 while (p < chunk->end && *p >= 'A' && *p <= 'Z') { 1308 p ++; 1309 } 1310 if (*p =='\n') { 1311 /* Set chunk positions and start multiline parsing */ 1312 c += 2; 1313 chunk->remain -= p - c; 1314 chunk->pos = p + 1; 1315 chunk->column = 0; 1316 chunk->line ++; 1317 if ((str_len = ucl_parse_multiline_string (parser, chunk, c, 1318 p - c, &c, &var_expand)) == 0) { 1319 ucl_set_err (chunk, UCL_ESYNTAX, "unterminated multiline value", &parser->err); 1320 return false; 1321 } 1322 obj->type = UCL_STRING; 1323 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE], 1324 &obj->value.sv, str_len - 1, false, false, var_expand)) == -1) { 1325 return false; 1326 } 1327 obj->len = str_len; 1328 parser->state = UCL_STATE_AFTER_VALUE; 1329 return true; 1330 } 1331 } 1332 } 1333 /* Fallback to ordinary strings */ 1334 default: 1335 /* Skip any spaces and comments */ 1336 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) || 1337 (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { 1338 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1339 ucl_chunk_skipc (chunk, p); 1340 } 1341 if (!ucl_skip_comments (parser)) { 1342 return false; 1343 } 1344 p = chunk->pos; 1345 continue; 1346 } 1347 /* Parse atom */ 1348 if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) { 1349 if (!ucl_lex_number (parser, chunk, obj)) { 1350 if (parser->state == UCL_STATE_ERROR) { 1351 return false; 1352 } 1353 } 1354 else { 1355 parser->state = UCL_STATE_AFTER_VALUE; 1356 return true; 1357 } 1358 /* Fallback to normal string */ 1359 } 1360 1361 if (!ucl_parse_string_value (parser, chunk, &var_expand, &need_unescape)) { 1362 return false; 1363 } 1364 /* Cut trailing spaces */ 1365 stripped_spaces = 0; 1366 while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces), 1367 UCL_CHARACTER_WHITESPACE)) { 1368 stripped_spaces ++; 1369 } 1370 str_len = chunk->pos - c - stripped_spaces; 1371 if (str_len <= 0) { 1372 ucl_set_err (chunk, 0, "string value must not be empty", &parser->err); 1373 return false; 1374 } 1375 else if (str_len == 4 && memcmp (c, "null", 4) == 0) { 1376 obj->len = 0; 1377 obj->type = UCL_NULL; 1378 } 1379 else if (!ucl_maybe_parse_boolean (obj, c, str_len)) { 1380 obj->type = UCL_STRING; 1381 if ((str_len = ucl_copy_or_store_ptr (parser, c, &obj->trash_stack[UCL_TRASH_VALUE], 1382 &obj->value.sv, str_len, need_unescape, 1383 false, var_expand)) == -1) { 1384 return false; 1385 } 1386 obj->len = str_len; 1387 } 1388 parser->state = UCL_STATE_AFTER_VALUE; 1389 p = chunk->pos; 1390 1391 return true; 1392 break; 1393 } 1394 } 1395 1396 return true; 1397 } 1398 1399 /** 1400 * Handle after value data 1401 * @param parser 1402 * @param chunk 1403 * @return 1404 */ 1405 static bool 1406 ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk) 1407 { 1408 const unsigned char *p; 1409 bool got_sep = false; 1410 struct ucl_stack *st; 1411 1412 p = chunk->pos; 1413 1414 while (p < chunk->end) { 1415 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { 1416 /* Skip whitespaces */ 1417 ucl_chunk_skipc (chunk, p); 1418 } 1419 else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1420 /* Skip comment */ 1421 if (!ucl_skip_comments (parser)) { 1422 return false; 1423 } 1424 /* Treat comment as a separator */ 1425 got_sep = true; 1426 p = chunk->pos; 1427 } 1428 else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) { 1429 if (*p == '}' || *p == ']') { 1430 if (parser->stack == NULL) { 1431 ucl_set_err (chunk, UCL_ESYNTAX, "end of array or object detected without corresponding start", &parser->err); 1432 return false; 1433 } 1434 if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) || 1435 (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) { 1436 1437 /* Pop all nested objects from a stack */ 1438 st = parser->stack; 1439 parser->stack = st->next; 1440 UCL_FREE (sizeof (struct ucl_stack), st); 1441 1442 while (parser->stack != NULL) { 1443 st = parser->stack; 1444 if (st->next == NULL || st->next->level == st->level) { 1445 break; 1446 } 1447 parser->stack = st->next; 1448 UCL_FREE (sizeof (struct ucl_stack), st); 1449 } 1450 } 1451 else { 1452 ucl_set_err (chunk, UCL_ESYNTAX, "unexpected terminating symbol detected", &parser->err); 1453 return false; 1454 } 1455 1456 if (parser->stack == NULL) { 1457 /* Ignore everything after a top object */ 1458 return true; 1459 } 1460 else { 1461 ucl_chunk_skipc (chunk, p); 1462 } 1463 got_sep = true; 1464 } 1465 else { 1466 /* Got a separator */ 1467 got_sep = true; 1468 ucl_chunk_skipc (chunk, p); 1469 } 1470 } 1471 else { 1472 /* Anything else */ 1473 if (!got_sep) { 1474 ucl_set_err (chunk, UCL_ESYNTAX, "delimiter is missing", &parser->err); 1475 return false; 1476 } 1477 return true; 1478 } 1479 } 1480 1481 return true; 1482 } 1483 1484 /** 1485 * Handle macro data 1486 * @param parser 1487 * @param chunk 1488 * @return 1489 */ 1490 static bool 1491 ucl_parse_macro_value (struct ucl_parser *parser, 1492 struct ucl_chunk *chunk, struct ucl_macro *macro, 1493 unsigned char const **macro_start, size_t *macro_len) 1494 { 1495 const unsigned char *p, *c; 1496 bool need_unescape = false, ucl_escape = false, var_expand = false; 1497 1498 p = chunk->pos; 1499 1500 switch (*p) { 1501 case '"': 1502 /* We have macro value encoded in quotes */ 1503 c = p; 1504 ucl_chunk_skipc (chunk, p); 1505 if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { 1506 return false; 1507 } 1508 1509 *macro_start = c + 1; 1510 *macro_len = chunk->pos - c - 2; 1511 p = chunk->pos; 1512 break; 1513 case '{': 1514 /* We got a multiline macro body */ 1515 ucl_chunk_skipc (chunk, p); 1516 /* Skip spaces at the beginning */ 1517 while (p < chunk->end) { 1518 if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1519 ucl_chunk_skipc (chunk, p); 1520 } 1521 else { 1522 break; 1523 } 1524 } 1525 c = p; 1526 while (p < chunk->end) { 1527 if (*p == '}') { 1528 break; 1529 } 1530 ucl_chunk_skipc (chunk, p); 1531 } 1532 *macro_start = c; 1533 *macro_len = p - c; 1534 ucl_chunk_skipc (chunk, p); 1535 break; 1536 default: 1537 /* Macro is not enclosed in quotes or braces */ 1538 c = p; 1539 while (p < chunk->end) { 1540 if (ucl_lex_is_atom_end (*p)) { 1541 break; 1542 } 1543 ucl_chunk_skipc (chunk, p); 1544 } 1545 *macro_start = c; 1546 *macro_len = p - c; 1547 break; 1548 } 1549 1550 /* We are at the end of a macro */ 1551 /* Skip ';' and space characters and return to previous state */ 1552 while (p < chunk->end) { 1553 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') { 1554 break; 1555 } 1556 ucl_chunk_skipc (chunk, p); 1557 } 1558 return true; 1559 } 1560 1561 /** 1562 * Handle the main states of rcl parser 1563 * @param parser parser structure 1564 * @param data the pointer to the beginning of a chunk 1565 * @param len the length of a chunk 1566 * @return true if chunk has been parsed and false in case of error 1567 */ 1568 static bool 1569 ucl_state_machine (struct ucl_parser *parser) 1570 { 1571 ucl_object_t *obj; 1572 struct ucl_chunk *chunk = parser->chunks; 1573 const unsigned char *p, *c = NULL, *macro_start = NULL; 1574 unsigned char *macro_escaped; 1575 size_t macro_len = 0; 1576 struct ucl_macro *macro = NULL; 1577 bool next_key = false, end_of_object = false; 1578 1579 if (parser->top_obj == NULL) { 1580 if (*chunk->pos == '[') { 1581 obj = ucl_add_parser_stack (NULL, parser, true, 0); 1582 } 1583 else { 1584 obj = ucl_add_parser_stack (NULL, parser, false, 0); 1585 } 1586 parser->top_obj = obj; 1587 parser->cur_obj = obj; 1588 parser->state = UCL_STATE_INIT; 1589 } 1590 1591 p = chunk->pos; 1592 while (chunk->pos < chunk->end) { 1593 switch (parser->state) { 1594 case UCL_STATE_INIT: 1595 /* 1596 * At the init state we can either go to the parse array or object 1597 * if we got [ or { correspondingly or can just treat new data as 1598 * a key of newly created object 1599 */ 1600 obj = parser->cur_obj; 1601 if (!ucl_skip_comments (parser)) { 1602 parser->prev_state = parser->state; 1603 parser->state = UCL_STATE_ERROR; 1604 return false; 1605 } 1606 else { 1607 p = chunk->pos; 1608 if (*p == '[') { 1609 parser->state = UCL_STATE_VALUE; 1610 ucl_chunk_skipc (chunk, p); 1611 } 1612 else { 1613 parser->state = UCL_STATE_KEY; 1614 if (*p == '{') { 1615 ucl_chunk_skipc (chunk, p); 1616 } 1617 } 1618 } 1619 break; 1620 case UCL_STATE_KEY: 1621 /* Skip any spaces */ 1622 while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1623 ucl_chunk_skipc (chunk, p); 1624 } 1625 if (*p == '}') { 1626 /* We have the end of an object */ 1627 parser->state = UCL_STATE_AFTER_VALUE; 1628 continue; 1629 } 1630 if (parser->stack == NULL) { 1631 /* No objects are on stack, but we want to parse a key */ 1632 ucl_set_err (chunk, UCL_ESYNTAX, "top object is finished but the parser " 1633 "expects a key", &parser->err); 1634 parser->prev_state = parser->state; 1635 parser->state = UCL_STATE_ERROR; 1636 return false; 1637 } 1638 if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) { 1639 parser->prev_state = parser->state; 1640 parser->state = UCL_STATE_ERROR; 1641 return false; 1642 } 1643 if (end_of_object) { 1644 p = chunk->pos; 1645 parser->state = UCL_STATE_AFTER_VALUE; 1646 continue; 1647 } 1648 else if (parser->state != UCL_STATE_MACRO_NAME) { 1649 if (next_key && parser->stack->obj->type == UCL_OBJECT) { 1650 /* Parse more keys and nest objects accordingly */ 1651 obj = ucl_add_parser_stack (parser->cur_obj, parser, false, parser->stack->level + 1); 1652 } 1653 else { 1654 parser->state = UCL_STATE_VALUE; 1655 } 1656 } 1657 else { 1658 c = chunk->pos; 1659 } 1660 p = chunk->pos; 1661 break; 1662 case UCL_STATE_VALUE: 1663 /* We need to check what we do have */ 1664 if (!ucl_parse_value (parser, chunk)) { 1665 parser->prev_state = parser->state; 1666 parser->state = UCL_STATE_ERROR; 1667 return false; 1668 } 1669 /* State is set in ucl_parse_value call */ 1670 p = chunk->pos; 1671 break; 1672 case UCL_STATE_AFTER_VALUE: 1673 if (!ucl_parse_after_value (parser, chunk)) { 1674 parser->prev_state = parser->state; 1675 parser->state = UCL_STATE_ERROR; 1676 return false; 1677 } 1678 if (parser->stack != NULL) { 1679 if (parser->stack->obj->type == UCL_OBJECT) { 1680 parser->state = UCL_STATE_KEY; 1681 } 1682 else { 1683 /* Array */ 1684 parser->state = UCL_STATE_VALUE; 1685 } 1686 } 1687 else { 1688 /* Skip everything at the end */ 1689 return true; 1690 } 1691 p = chunk->pos; 1692 break; 1693 case UCL_STATE_MACRO_NAME: 1694 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1695 ucl_chunk_skipc (chunk, p); 1696 } 1697 else if (p - c > 0) { 1698 /* We got macro name */ 1699 macro_len = (size_t)(p - c); 1700 HASH_FIND (hh, parser->macroes, c, macro_len, macro); 1701 if (macro == NULL) { 1702 ucl_create_err (&parser->err, "error on line %d at column %d: " 1703 "unknown macro: '%.*s', character: '%c'", 1704 chunk->line, chunk->column, (int)(p - c), c, *chunk->pos); 1705 parser->state = UCL_STATE_ERROR; 1706 return false; 1707 } 1708 /* Now we need to skip all spaces */ 1709 while (p < chunk->end) { 1710 if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { 1711 if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { 1712 /* Skip comment */ 1713 if (!ucl_skip_comments (parser)) { 1714 return false; 1715 } 1716 p = chunk->pos; 1717 } 1718 break; 1719 } 1720 ucl_chunk_skipc (chunk, p); 1721 } 1722 parser->state = UCL_STATE_MACRO; 1723 } 1724 break; 1725 case UCL_STATE_MACRO: 1726 if (!ucl_parse_macro_value (parser, chunk, macro, 1727 ¯o_start, ¯o_len)) { 1728 parser->prev_state = parser->state; 1729 parser->state = UCL_STATE_ERROR; 1730 return false; 1731 } 1732 macro_len = ucl_expand_variable (parser, ¯o_escaped, macro_start, macro_len); 1733 parser->state = parser->prev_state; 1734 if (macro_escaped == NULL) { 1735 if (!macro->handler (macro_start, macro_len, macro->ud)) { 1736 return false; 1737 } 1738 } 1739 else { 1740 if (!macro->handler (macro_escaped, macro_len, macro->ud)) { 1741 UCL_FREE (macro_len + 1, macro_escaped); 1742 return false; 1743 } 1744 UCL_FREE (macro_len + 1, macro_escaped); 1745 } 1746 p = chunk->pos; 1747 break; 1748 default: 1749 /* TODO: add all states */ 1750 ucl_set_err (chunk, UCL_EINTERNAL, "internal error: parser is in an unknown state", &parser->err); 1751 parser->state = UCL_STATE_ERROR; 1752 return false; 1753 } 1754 } 1755 1756 return true; 1757 } 1758 1759 struct ucl_parser* 1760 ucl_parser_new (int flags) 1761 { 1762 struct ucl_parser *new; 1763 1764 new = UCL_ALLOC (sizeof (struct ucl_parser)); 1765 memset (new, 0, sizeof (struct ucl_parser)); 1766 1767 ucl_parser_register_macro (new, "include", ucl_include_handler, new); 1768 ucl_parser_register_macro (new, "try_include", ucl_try_include_handler, new); 1769 ucl_parser_register_macro (new, "includes", ucl_includes_handler, new); 1770 1771 new->flags = flags; 1772 1773 /* Initial assumption about filevars */ 1774 ucl_parser_set_filevars (new, NULL, false); 1775 1776 return new; 1777 } 1778 1779 1780 void 1781 ucl_parser_register_macro (struct ucl_parser *parser, const char *macro, 1782 ucl_macro_handler handler, void* ud) 1783 { 1784 struct ucl_macro *new; 1785 1786 new = UCL_ALLOC (sizeof (struct ucl_macro)); 1787 memset (new, 0, sizeof (struct ucl_macro)); 1788 new->handler = handler; 1789 new->name = strdup (macro); 1790 new->ud = ud; 1791 HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new); 1792 } 1793 1794 void 1795 ucl_parser_register_variable (struct ucl_parser *parser, const char *var, 1796 const char *value) 1797 { 1798 struct ucl_variable *new = NULL, *cur; 1799 1800 if (var == NULL) { 1801 return; 1802 } 1803 1804 /* Find whether a variable already exists */ 1805 LL_FOREACH (parser->variables, cur) { 1806 if (strcmp (cur->var, var) == 0) { 1807 new = cur; 1808 break; 1809 } 1810 } 1811 1812 if (value == NULL) { 1813 1814 if (new != NULL) { 1815 /* Remove variable */ 1816 LL_DELETE (parser->variables, new); 1817 free (new->var); 1818 free (new->value); 1819 UCL_FREE (sizeof (struct ucl_variable), new); 1820 } 1821 else { 1822 /* Do nothing */ 1823 return; 1824 } 1825 } 1826 else { 1827 if (new == NULL) { 1828 new = UCL_ALLOC (sizeof (struct ucl_variable)); 1829 memset (new, 0, sizeof (struct ucl_variable)); 1830 new->var = strdup (var); 1831 new->var_len = strlen (var); 1832 new->value = strdup (value); 1833 new->value_len = strlen (value); 1834 1835 LL_PREPEND (parser->variables, new); 1836 } 1837 else { 1838 free (new->value); 1839 new->value = strdup (value); 1840 new->value_len = strlen (value); 1841 } 1842 } 1843 } 1844 1845 bool 1846 ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data, 1847 size_t len) 1848 { 1849 struct ucl_chunk *chunk; 1850 1851 if (parser->state != UCL_STATE_ERROR) { 1852 chunk = UCL_ALLOC (sizeof (struct ucl_chunk)); 1853 chunk->begin = data; 1854 chunk->remain = len; 1855 chunk->pos = chunk->begin; 1856 chunk->end = chunk->begin + len; 1857 chunk->line = 1; 1858 chunk->column = 0; 1859 LL_PREPEND (parser->chunks, chunk); 1860 parser->recursion ++; 1861 if (parser->recursion > UCL_MAX_RECURSION) { 1862 ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d", 1863 parser->recursion); 1864 return false; 1865 } 1866 return ucl_state_machine (parser); 1867 } 1868 1869 ucl_create_err (&parser->err, "a parser is in an invalid state"); 1870 1871 return false; 1872 } 1873