1
2 /*
3 * Introduction
4 * ************
5 *
6 * The following notes assume that you are familiar with the YAML specification
7 * (http://yaml.org/spec/cvs/current.html). We mostly follow it, although in
8 * some cases we are less restrictive that it requires.
9 *
10 * The process of transforming a YAML stream into a sequence of events is
11 * divided on two steps: Scanning and Parsing.
12 *
13 * The Scanner transforms the input stream into a sequence of tokens, while the
14 * parser transform the sequence of tokens produced by the Scanner into a
15 * sequence of parsing events.
16 *
17 * The Scanner is rather clever and complicated. The Parser, on the contrary,
18 * is a straightforward implementation of a recursive-descendant parser (or,
19 * LL(1) parser, as it is usually called).
20 *
21 * Actually there are two issues of Scanning that might be called "clever", the
22 * rest is quite straightforward. The issues are "block collection start" and
23 * "simple keys". Both issues are explained below in details.
24 *
25 * Here the Scanning step is explained and implemented. We start with the list
26 * of all the tokens produced by the Scanner together with short descriptions.
27 *
28 * Now, tokens:
29 *
30 * STREAM-START(encoding) # The stream start.
31 * STREAM-END # The stream end.
32 * VERSION-DIRECTIVE(major,minor) # The '%YAML' directive.
33 * TAG-DIRECTIVE(handle,prefix) # The '%TAG' directive.
34 * DOCUMENT-START # '---'
35 * DOCUMENT-END # '...'
36 * BLOCK-SEQUENCE-START # Indentation increase denoting a block
37 * BLOCK-MAPPING-START # sequence or a block mapping.
38 * BLOCK-END # Indentation decrease.
39 * FLOW-SEQUENCE-START # '['
40 * FLOW-SEQUENCE-END # ']'
41 * FLOW-MAPPING-START # '{'
42 * FLOW-MAPPING-END # '}'
43 * BLOCK-ENTRY # '-'
44 * FLOW-ENTRY # ','
45 * KEY # '?' or nothing (simple keys).
46 * VALUE # ':'
47 * ALIAS(anchor) # '*anchor'
48 * ANCHOR(anchor) # '&anchor'
49 * TAG(handle,suffix) # '!handle!suffix'
50 * SCALAR(value,style) # A scalar.
51 *
52 * The following two tokens are "virtual" tokens denoting the beginning and the
53 * end of the stream:
54 *
55 * STREAM-START(encoding)
56 * STREAM-END
57 *
58 * We pass the information about the input stream encoding with the
59 * STREAM-START token.
60 *
61 * The next two tokens are responsible for tags:
62 *
63 * VERSION-DIRECTIVE(major,minor)
64 * TAG-DIRECTIVE(handle,prefix)
65 *
66 * Example:
67 *
68 * %YAML 1.1
69 * %TAG ! !foo
70 * %TAG !yaml! tag:yaml.org,2002:
71 * ---
72 *
73 * The corresponding sequence of tokens:
74 *
75 * STREAM-START(utf-8)
76 * VERSION-DIRECTIVE(1,1)
77 * TAG-DIRECTIVE("!","!foo")
78 * TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
79 * DOCUMENT-START
80 * STREAM-END
81 *
82 * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
83 * line.
84 *
85 * The document start and end indicators are represented by:
86 *
87 * DOCUMENT-START
88 * DOCUMENT-END
89 *
90 * Note that if a YAML stream contains an implicit document (without '---'
91 * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
92 * produced.
93 *
94 * In the following examples, we present whole documents together with the
95 * produced tokens.
96 *
97 * 1. An implicit document:
98 *
99 * 'a scalar'
100 *
101 * Tokens:
102 *
103 * STREAM-START(utf-8)
104 * SCALAR("a scalar",single-quoted)
105 * STREAM-END
106 *
107 * 2. An explicit document:
108 *
109 * ---
110 * 'a scalar'
111 * ...
112 *
113 * Tokens:
114 *
115 * STREAM-START(utf-8)
116 * DOCUMENT-START
117 * SCALAR("a scalar",single-quoted)
118 * DOCUMENT-END
119 * STREAM-END
120 *
121 * 3. Several documents in a stream:
122 *
123 * 'a scalar'
124 * ---
125 * 'another scalar'
126 * ---
127 * 'yet another scalar'
128 *
129 * Tokens:
130 *
131 * STREAM-START(utf-8)
132 * SCALAR("a scalar",single-quoted)
133 * DOCUMENT-START
134 * SCALAR("another scalar",single-quoted)
135 * DOCUMENT-START
136 * SCALAR("yet another scalar",single-quoted)
137 * STREAM-END
138 *
139 * We have already introduced the SCALAR token above. The following tokens are
140 * used to describe aliases, anchors, tag, and scalars:
141 *
142 * ALIAS(anchor)
143 * ANCHOR(anchor)
144 * TAG(handle,suffix)
145 * SCALAR(value,style)
146 *
147 * The following series of examples illustrate the usage of these tokens:
148 *
149 * 1. A recursive sequence:
150 *
151 * &A [ *A ]
152 *
153 * Tokens:
154 *
155 * STREAM-START(utf-8)
156 * ANCHOR("A")
157 * FLOW-SEQUENCE-START
158 * ALIAS("A")
159 * FLOW-SEQUENCE-END
160 * STREAM-END
161 *
162 * 2. A tagged scalar:
163 *
164 * !!float "3.14" # A good approximation.
165 *
166 * Tokens:
167 *
168 * STREAM-START(utf-8)
169 * TAG("!!","float")
170 * SCALAR("3.14",double-quoted)
171 * STREAM-END
172 *
173 * 3. Various scalar styles:
174 *
175 * --- # Implicit empty plain scalars do not produce tokens.
176 * --- a plain scalar
177 * --- 'a single-quoted scalar'
178 * --- "a double-quoted scalar"
179 * --- |-
180 * a literal scalar
181 * --- >-
182 * a folded
183 * scalar
184 *
185 * Tokens:
186 *
187 * STREAM-START(utf-8)
188 * DOCUMENT-START
189 * DOCUMENT-START
190 * SCALAR("a plain scalar",plain)
191 * DOCUMENT-START
192 * SCALAR("a single-quoted scalar",single-quoted)
193 * DOCUMENT-START
194 * SCALAR("a double-quoted scalar",double-quoted)
195 * DOCUMENT-START
196 * SCALAR("a literal scalar",literal)
197 * DOCUMENT-START
198 * SCALAR("a folded scalar",folded)
199 * STREAM-END
200 *
201 * Now it's time to review collection-related tokens. We will start with
202 * flow collections:
203 *
204 * FLOW-SEQUENCE-START
205 * FLOW-SEQUENCE-END
206 * FLOW-MAPPING-START
207 * FLOW-MAPPING-END
208 * FLOW-ENTRY
209 * KEY
210 * VALUE
211 *
212 * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
213 * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
214 * correspondingly. FLOW-ENTRY represent the ',' indicator. Finally the
215 * indicators '?' and ':', which are used for denoting mapping keys and values,
216 * are represented by the KEY and VALUE tokens.
217 *
218 * The following examples show flow collections:
219 *
220 * 1. A flow sequence:
221 *
222 * [item 1, item 2, item 3]
223 *
224 * Tokens:
225 *
226 * STREAM-START(utf-8)
227 * FLOW-SEQUENCE-START
228 * SCALAR("item 1",plain)
229 * FLOW-ENTRY
230 * SCALAR("item 2",plain)
231 * FLOW-ENTRY
232 * SCALAR("item 3",plain)
233 * FLOW-SEQUENCE-END
234 * STREAM-END
235 *
236 * 2. A flow mapping:
237 *
238 * {
239 * a simple key: a value, # Note that the KEY token is produced.
240 * ? a complex key: another value,
241 * }
242 *
243 * Tokens:
244 *
245 * STREAM-START(utf-8)
246 * FLOW-MAPPING-START
247 * KEY
248 * SCALAR("a simple key",plain)
249 * VALUE
250 * SCALAR("a value",plain)
251 * FLOW-ENTRY
252 * KEY
253 * SCALAR("a complex key",plain)
254 * VALUE
255 * SCALAR("another value",plain)
256 * FLOW-ENTRY
257 * FLOW-MAPPING-END
258 * STREAM-END
259 *
260 * A simple key is a key which is not denoted by the '?' indicator. Note that
261 * the Scanner still produce the KEY token whenever it encounters a simple key.
262 *
263 * For scanning block collections, the following tokens are used (note that we
264 * repeat KEY and VALUE here):
265 *
266 * BLOCK-SEQUENCE-START
267 * BLOCK-MAPPING-START
268 * BLOCK-END
269 * BLOCK-ENTRY
270 * KEY
271 * VALUE
272 *
273 * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
274 * increase that precedes a block collection (cf. the INDENT token in Python).
275 * The token BLOCK-END denote indentation decrease that ends a block collection
276 * (cf. the DEDENT token in Python). However YAML has some syntax pecularities
277 * that makes detections of these tokens more complex.
278 *
279 * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
280 * '-', '?', and ':' correspondingly.
281 *
282 * The following examples show how the tokens BLOCK-SEQUENCE-START,
283 * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
284 *
285 * 1. Block sequences:
286 *
287 * - item 1
288 * - item 2
289 * -
290 * - item 3.1
291 * - item 3.2
292 * -
293 * key 1: value 1
294 * key 2: value 2
295 *
296 * Tokens:
297 *
298 * STREAM-START(utf-8)
299 * BLOCK-SEQUENCE-START
300 * BLOCK-ENTRY
301 * SCALAR("item 1",plain)
302 * BLOCK-ENTRY
303 * SCALAR("item 2",plain)
304 * BLOCK-ENTRY
305 * BLOCK-SEQUENCE-START
306 * BLOCK-ENTRY
307 * SCALAR("item 3.1",plain)
308 * BLOCK-ENTRY
309 * SCALAR("item 3.2",plain)
310 * BLOCK-END
311 * BLOCK-ENTRY
312 * BLOCK-MAPPING-START
313 * KEY
314 * SCALAR("key 1",plain)
315 * VALUE
316 * SCALAR("value 1",plain)
317 * KEY
318 * SCALAR("key 2",plain)
319 * VALUE
320 * SCALAR("value 2",plain)
321 * BLOCK-END
322 * BLOCK-END
323 * STREAM-END
324 *
325 * 2. Block mappings:
326 *
327 * a simple key: a value # The KEY token is produced here.
328 * ? a complex key
329 * : another value
330 * a mapping:
331 * key 1: value 1
332 * key 2: value 2
333 * a sequence:
334 * - item 1
335 * - item 2
336 *
337 * Tokens:
338 *
339 * STREAM-START(utf-8)
340 * BLOCK-MAPPING-START
341 * KEY
342 * SCALAR("a simple key",plain)
343 * VALUE
344 * SCALAR("a value",plain)
345 * KEY
346 * SCALAR("a complex key",plain)
347 * VALUE
348 * SCALAR("another value",plain)
349 * KEY
350 * SCALAR("a mapping",plain)
351 * VALUE
352 * BLOCK-MAPPING-START
353 * KEY
354 * SCALAR("key 1",plain)
355 * VALUE
356 * SCALAR("value 1",plain)
357 * KEY
358 * SCALAR("key 2",plain)
359 * VALUE
360 * SCALAR("value 2",plain)
361 * BLOCK-END
362 * KEY
363 * SCALAR("a sequence",plain)
364 * VALUE
365 * BLOCK-SEQUENCE-START
366 * BLOCK-ENTRY
367 * SCALAR("item 1",plain)
368 * BLOCK-ENTRY
369 * SCALAR("item 2",plain)
370 * BLOCK-END
371 * BLOCK-END
372 * STREAM-END
373 *
374 * YAML does not always require to start a new block collection from a new
375 * line. If the current line contains only '-', '?', and ':' indicators, a new
376 * block collection may start at the current line. The following examples
377 * illustrate this case:
378 *
379 * 1. Collections in a sequence:
380 *
381 * - - item 1
382 * - item 2
383 * - key 1: value 1
384 * key 2: value 2
385 * - ? complex key
386 * : complex value
387 *
388 * Tokens:
389 *
390 * STREAM-START(utf-8)
391 * BLOCK-SEQUENCE-START
392 * BLOCK-ENTRY
393 * BLOCK-SEQUENCE-START
394 * BLOCK-ENTRY
395 * SCALAR("item 1",plain)
396 * BLOCK-ENTRY
397 * SCALAR("item 2",plain)
398 * BLOCK-END
399 * BLOCK-ENTRY
400 * BLOCK-MAPPING-START
401 * KEY
402 * SCALAR("key 1",plain)
403 * VALUE
404 * SCALAR("value 1",plain)
405 * KEY
406 * SCALAR("key 2",plain)
407 * VALUE
408 * SCALAR("value 2",plain)
409 * BLOCK-END
410 * BLOCK-ENTRY
411 * BLOCK-MAPPING-START
412 * KEY
413 * SCALAR("complex key")
414 * VALUE
415 * SCALAR("complex value")
416 * BLOCK-END
417 * BLOCK-END
418 * STREAM-END
419 *
420 * 2. Collections in a mapping:
421 *
422 * ? a sequence
423 * : - item 1
424 * - item 2
425 * ? a mapping
426 * : key 1: value 1
427 * key 2: value 2
428 *
429 * Tokens:
430 *
431 * STREAM-START(utf-8)
432 * BLOCK-MAPPING-START
433 * KEY
434 * SCALAR("a sequence",plain)
435 * VALUE
436 * BLOCK-SEQUENCE-START
437 * BLOCK-ENTRY
438 * SCALAR("item 1",plain)
439 * BLOCK-ENTRY
440 * SCALAR("item 2",plain)
441 * BLOCK-END
442 * KEY
443 * SCALAR("a mapping",plain)
444 * VALUE
445 * BLOCK-MAPPING-START
446 * KEY
447 * SCALAR("key 1",plain)
448 * VALUE
449 * SCALAR("value 1",plain)
450 * KEY
451 * SCALAR("key 2",plain)
452 * VALUE
453 * SCALAR("value 2",plain)
454 * BLOCK-END
455 * BLOCK-END
456 * STREAM-END
457 *
458 * YAML also permits non-indented sequences if they are included into a block
459 * mapping. In this case, the token BLOCK-SEQUENCE-START is not produced:
460 *
461 * key:
462 * - item 1 # BLOCK-SEQUENCE-START is NOT produced here.
463 * - item 2
464 *
465 * Tokens:
466 *
467 * STREAM-START(utf-8)
468 * BLOCK-MAPPING-START
469 * KEY
470 * SCALAR("key",plain)
471 * VALUE
472 * BLOCK-ENTRY
473 * SCALAR("item 1",plain)
474 * BLOCK-ENTRY
475 * SCALAR("item 2",plain)
476 * BLOCK-END
477 */
478
479 #include "yaml_private.h"
480
481 /*
482 * Ensure that the buffer contains the required number of characters.
483 * Return 1 on success, 0 on failure (reader error or memory error).
484 */
485
486 #define CACHE(parser,length) \
487 (parser->unread >= (length) \
488 ? 1 \
489 : yaml_parser_update_buffer(parser, (length)))
490
491 /*
492 * Advance the buffer pointer.
493 */
494
495 #define SKIP(parser) \
496 (parser->mark.index ++, \
497 parser->mark.column ++, \
498 parser->unread --, \
499 parser->buffer.pointer += WIDTH(parser->buffer))
500
501 #define SKIP_LINE(parser) \
502 (IS_CRLF(parser->buffer) ? \
503 (parser->mark.index += 2, \
504 parser->mark.column = 0, \
505 parser->mark.line ++, \
506 parser->unread -= 2, \
507 parser->buffer.pointer += 2) : \
508 IS_BREAK(parser->buffer) ? \
509 (parser->mark.index ++, \
510 parser->mark.column = 0, \
511 parser->mark.line ++, \
512 parser->unread --, \
513 parser->buffer.pointer += WIDTH(parser->buffer)) : 0)
514
515 /*
516 * Copy a character to a string buffer and advance pointers.
517 */
518
519 #define READ(parser,string) \
520 (STRING_EXTEND(parser,string) ? \
521 (COPY(string,parser->buffer), \
522 parser->mark.index ++, \
523 parser->mark.column ++, \
524 parser->unread --, \
525 1) : 0)
526
527 /*
528 * Copy a line break character to a string buffer and advance pointers.
529 */
530
531 #define READ_LINE(parser,string) \
532 (STRING_EXTEND(parser,string) ? \
533 (((CHECK_AT(parser->buffer,'\r',0) \
534 && CHECK_AT(parser->buffer,'\n',1)) ? /* CR LF -> LF */ \
535 (*((string).pointer++) = (yaml_char_t) '\n', \
536 parser->buffer.pointer += 2, \
537 parser->mark.index += 2, \
538 parser->mark.column = 0, \
539 parser->mark.line ++, \
540 parser->unread -= 2) : \
541 (CHECK_AT(parser->buffer,'\r',0) \
542 || CHECK_AT(parser->buffer,'\n',0)) ? /* CR|LF -> LF */ \
543 (*((string).pointer++) = (yaml_char_t) '\n', \
544 parser->buffer.pointer ++, \
545 parser->mark.index ++, \
546 parser->mark.column = 0, \
547 parser->mark.line ++, \
548 parser->unread --) : \
549 (CHECK_AT(parser->buffer,'\xC2',0) \
550 && CHECK_AT(parser->buffer,'\x85',1)) ? /* NEL -> LF */ \
551 (*((string).pointer++) = (yaml_char_t) '\n', \
552 parser->buffer.pointer += 2, \
553 parser->mark.index ++, \
554 parser->mark.column = 0, \
555 parser->mark.line ++, \
556 parser->unread --) : \
557 (CHECK_AT(parser->buffer,'\xE2',0) && \
558 CHECK_AT(parser->buffer,'\x80',1) && \
559 (CHECK_AT(parser->buffer,'\xA8',2) || \
560 CHECK_AT(parser->buffer,'\xA9',2))) ? /* LS|PS -> LS|PS */ \
561 (*((string).pointer++) = *(parser->buffer.pointer++), \
562 *((string).pointer++) = *(parser->buffer.pointer++), \
563 *((string).pointer++) = *(parser->buffer.pointer++), \
564 parser->mark.index ++, \
565 parser->mark.column = 0, \
566 parser->mark.line ++, \
567 parser->unread --) : 0), \
568 1) : 0)
569
570 /*
571 * Public API declarations.
572 */
573
574 YAML_DECLARE(int)
575 yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token);
576
577 /*
578 * Error handling.
579 */
580
581 static int
582 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
583 yaml_mark_t context_mark, const char *problem);
584
585 /*
586 * High-level token API.
587 */
588
589 YAML_DECLARE(int)
590 yaml_parser_fetch_more_tokens(yaml_parser_t *parser);
591
592 static int
593 yaml_parser_fetch_next_token(yaml_parser_t *parser);
594
595 /*
596 * Potential simple keys.
597 */
598
599 static int
600 yaml_parser_stale_simple_keys(yaml_parser_t *parser);
601
602 static int
603 yaml_parser_save_simple_key(yaml_parser_t *parser);
604
605 static int
606 yaml_parser_remove_simple_key(yaml_parser_t *parser);
607
608 static int
609 yaml_parser_increase_flow_level(yaml_parser_t *parser);
610
611 static int
612 yaml_parser_decrease_flow_level(yaml_parser_t *parser);
613
614 /*
615 * Indentation treatment.
616 */
617
618 static int
619 yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
620 ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark);
621
622 static int
623 yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column);
624
625 /*
626 * Token fetchers.
627 */
628
629 static int
630 yaml_parser_fetch_stream_start(yaml_parser_t *parser);
631
632 static int
633 yaml_parser_fetch_stream_end(yaml_parser_t *parser);
634
635 static int
636 yaml_parser_fetch_directive(yaml_parser_t *parser);
637
638 static int
639 yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
640 yaml_token_type_t type);
641
642 static int
643 yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
644 yaml_token_type_t type);
645
646 static int
647 yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
648 yaml_token_type_t type);
649
650 static int
651 yaml_parser_fetch_flow_entry(yaml_parser_t *parser);
652
653 static int
654 yaml_parser_fetch_block_entry(yaml_parser_t *parser);
655
656 static int
657 yaml_parser_fetch_key(yaml_parser_t *parser);
658
659 static int
660 yaml_parser_fetch_value(yaml_parser_t *parser);
661
662 static int
663 yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type);
664
665 static int
666 yaml_parser_fetch_tag(yaml_parser_t *parser);
667
668 static int
669 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);
670
671 static int
672 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);
673
674 static int
675 yaml_parser_fetch_plain_scalar(yaml_parser_t *parser);
676
677 /*
678 * Token scanners.
679 */
680
681 static int
682 yaml_parser_scan_to_next_token(yaml_parser_t *parser);
683
684 static int
685 yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token);
686
687 static int
688 yaml_parser_scan_directive_name(yaml_parser_t *parser,
689 yaml_mark_t start_mark, yaml_char_t **name);
690
691 static int
692 yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
693 yaml_mark_t start_mark, int *major, int *minor);
694
695 static int
696 yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
697 yaml_mark_t start_mark, int *number);
698
699 static int
700 yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
701 yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix);
702
703 static int
704 yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
705 yaml_token_type_t type);
706
707 static int
708 yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token);
709
710 static int
711 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
712 yaml_mark_t start_mark, yaml_char_t **handle);
713
714 static int
715 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int uri_char, int directive,
716 yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri);
717
718 static int
719 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
720 yaml_mark_t start_mark, yaml_string_t *string);
721
722 static int
723 yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
724 int literal);
725
726 static int
727 yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
728 int *indent, yaml_string_t *breaks,
729 yaml_mark_t start_mark, yaml_mark_t *end_mark);
730
731 static int
732 yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
733 int single);
734
735 static int
736 yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token);
737
738 /*
739 * Get the next token.
740 */
741
742 YAML_DECLARE(int)
yaml_parser_scan(yaml_parser_t * parser,yaml_token_t * token)743 yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token)
744 {
745 assert(parser); /* Non-NULL parser object is expected. */
746 assert(token); /* Non-NULL token object is expected. */
747
748 /* Erase the token object. */
749
750 memset(token, 0, sizeof(yaml_token_t));
751
752 /* No tokens after STREAM-END or error. */
753
754 if (parser->stream_end_produced || parser->error) {
755 return 1;
756 }
757
758 /* Ensure that the tokens queue contains enough tokens. */
759
760 if (!parser->token_available) {
761 if (!yaml_parser_fetch_more_tokens(parser))
762 return 0;
763 }
764
765 /* Fetch the next token from the queue. */
766
767 *token = DEQUEUE(parser, parser->tokens);
768 parser->token_available = 0;
769 parser->tokens_parsed ++;
770
771 if (token->type == YAML_STREAM_END_TOKEN) {
772 parser->stream_end_produced = 1;
773 }
774
775 return 1;
776 }
777
778 /*
779 * Set the scanner error and return 0.
780 */
781
782 static int
yaml_parser_set_scanner_error(yaml_parser_t * parser,const char * context,yaml_mark_t context_mark,const char * problem)783 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
784 yaml_mark_t context_mark, const char *problem)
785 {
786 parser->error = YAML_SCANNER_ERROR;
787 parser->context = context;
788 parser->context_mark = context_mark;
789 parser->problem = problem;
790 parser->problem_mark = parser->mark;
791
792 return 0;
793 }
794
795 /*
796 * Ensure that the tokens queue contains at least one token which can be
797 * returned to the Parser.
798 */
799
800 YAML_DECLARE(int)
yaml_parser_fetch_more_tokens(yaml_parser_t * parser)801 yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
802 {
803 int need_more_tokens;
804
805 /* While we need more tokens to fetch, do it. */
806
807 while (1)
808 {
809 /*
810 * Check if we really need to fetch more tokens.
811 */
812
813 need_more_tokens = 0;
814
815 if (parser->tokens.head == parser->tokens.tail)
816 {
817 /* Queue is empty. */
818
819 need_more_tokens = 1;
820 }
821 else
822 {
823 yaml_simple_key_t *simple_key;
824
825 /* Check if any potential simple key may occupy the head position. */
826
827 if (!yaml_parser_stale_simple_keys(parser))
828 return 0;
829
830 for (simple_key = parser->simple_keys.start;
831 simple_key != parser->simple_keys.top; simple_key++) {
832 if (simple_key->possible
833 && simple_key->token_number == parser->tokens_parsed) {
834 need_more_tokens = 1;
835 break;
836 }
837 }
838 }
839
840 /* We are finished. */
841
842 if (!need_more_tokens)
843 break;
844
845 /* Fetch the next token. */
846
847 if (!yaml_parser_fetch_next_token(parser))
848 return 0;
849 }
850
851 parser->token_available = 1;
852
853 return 1;
854 }
855
856 /*
857 * The dispatcher for token fetchers.
858 */
859
860 static int
yaml_parser_fetch_next_token(yaml_parser_t * parser)861 yaml_parser_fetch_next_token(yaml_parser_t *parser)
862 {
863 /* Ensure that the buffer is initialized. */
864
865 if (!CACHE(parser, 1))
866 return 0;
867
868 /* Check if we just started scanning. Fetch STREAM-START then. */
869
870 if (!parser->stream_start_produced)
871 return yaml_parser_fetch_stream_start(parser);
872
873 /* Eat whitespaces and comments until we reach the next token. */
874
875 if (!yaml_parser_scan_to_next_token(parser))
876 return 0;
877
878 /* Remove obsolete potential simple keys. */
879
880 if (!yaml_parser_stale_simple_keys(parser))
881 return 0;
882
883 /* Check the indentation level against the current column. */
884
885 if (!yaml_parser_unroll_indent(parser, parser->mark.column))
886 return 0;
887
888 /*
889 * Ensure that the buffer contains at least 4 characters. 4 is the length
890 * of the longest indicators ('--- ' and '... ').
891 */
892
893 if (!CACHE(parser, 4))
894 return 0;
895
896 /* Is it the end of the stream? */
897
898 if (IS_Z(parser->buffer))
899 return yaml_parser_fetch_stream_end(parser);
900
901 /* Is it a directive? */
902
903 if (parser->mark.column == 0 && CHECK(parser->buffer, '%'))
904 return yaml_parser_fetch_directive(parser);
905
906 /* Is it the document start indicator? */
907
908 if (parser->mark.column == 0
909 && CHECK_AT(parser->buffer, '-', 0)
910 && CHECK_AT(parser->buffer, '-', 1)
911 && CHECK_AT(parser->buffer, '-', 2)
912 && IS_BLANKZ_AT(parser->buffer, 3))
913 return yaml_parser_fetch_document_indicator(parser,
914 YAML_DOCUMENT_START_TOKEN);
915
916 /* Is it the document end indicator? */
917
918 if (parser->mark.column == 0
919 && CHECK_AT(parser->buffer, '.', 0)
920 && CHECK_AT(parser->buffer, '.', 1)
921 && CHECK_AT(parser->buffer, '.', 2)
922 && IS_BLANKZ_AT(parser->buffer, 3))
923 return yaml_parser_fetch_document_indicator(parser,
924 YAML_DOCUMENT_END_TOKEN);
925
926 /* Is it the flow sequence start indicator? */
927
928 if (CHECK(parser->buffer, '['))
929 return yaml_parser_fetch_flow_collection_start(parser,
930 YAML_FLOW_SEQUENCE_START_TOKEN);
931
932 /* Is it the flow mapping start indicator? */
933
934 if (CHECK(parser->buffer, '{'))
935 return yaml_parser_fetch_flow_collection_start(parser,
936 YAML_FLOW_MAPPING_START_TOKEN);
937
938 /* Is it the flow sequence end indicator? */
939
940 if (CHECK(parser->buffer, ']'))
941 return yaml_parser_fetch_flow_collection_end(parser,
942 YAML_FLOW_SEQUENCE_END_TOKEN);
943
944 /* Is it the flow mapping end indicator? */
945
946 if (CHECK(parser->buffer, '}'))
947 return yaml_parser_fetch_flow_collection_end(parser,
948 YAML_FLOW_MAPPING_END_TOKEN);
949
950 /* Is it the flow entry indicator? */
951
952 if (CHECK(parser->buffer, ','))
953 return yaml_parser_fetch_flow_entry(parser);
954
955 /* Is it the block entry indicator? */
956
957 if (CHECK(parser->buffer, '-') && IS_BLANKZ_AT(parser->buffer, 1))
958 return yaml_parser_fetch_block_entry(parser);
959
960 /* Is it the key indicator? */
961
962 if (CHECK(parser->buffer, '?')
963 && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
964 return yaml_parser_fetch_key(parser);
965
966 /* Is it the value indicator? */
967
968 if (CHECK(parser->buffer, ':')
969 && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
970 return yaml_parser_fetch_value(parser);
971
972 /* Is it an alias? */
973
974 if (CHECK(parser->buffer, '*'))
975 return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN);
976
977 /* Is it an anchor? */
978
979 if (CHECK(parser->buffer, '&'))
980 return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN);
981
982 /* Is it a tag? */
983
984 if (CHECK(parser->buffer, '!'))
985 return yaml_parser_fetch_tag(parser);
986
987 /* Is it a literal scalar? */
988
989 if (CHECK(parser->buffer, '|') && !parser->flow_level)
990 return yaml_parser_fetch_block_scalar(parser, 1);
991
992 /* Is it a folded scalar? */
993
994 if (CHECK(parser->buffer, '>') && !parser->flow_level)
995 return yaml_parser_fetch_block_scalar(parser, 0);
996
997 /* Is it a single-quoted scalar? */
998
999 if (CHECK(parser->buffer, '\''))
1000 return yaml_parser_fetch_flow_scalar(parser, 1);
1001
1002 /* Is it a double-quoted scalar? */
1003
1004 if (CHECK(parser->buffer, '"'))
1005 return yaml_parser_fetch_flow_scalar(parser, 0);
1006
1007 /*
1008 * Is it a plain scalar?
1009 *
1010 * A plain scalar may start with any non-blank characters except
1011 *
1012 * '-', '?', ':', ',', '[', ']', '{', '}',
1013 * '#', '&', '*', '!', '|', '>', '\'', '\"',
1014 * '%', '@', '`'.
1015 *
1016 * In the block context (and, for the '-' indicator, in the flow context
1017 * too), it may also start with the characters
1018 *
1019 * '-', '?', ':'
1020 *
1021 * if it is followed by a non-space character.
1022 *
1023 * The last rule is more restrictive than the specification requires.
1024 */
1025
1026 if (!(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '-')
1027 || CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':')
1028 || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '[')
1029 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
1030 || CHECK(parser->buffer, '}') || CHECK(parser->buffer, '#')
1031 || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '*')
1032 || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '|')
1033 || CHECK(parser->buffer, '>') || CHECK(parser->buffer, '\'')
1034 || CHECK(parser->buffer, '"') || CHECK(parser->buffer, '%')
1035 || CHECK(parser->buffer, '@') || CHECK(parser->buffer, '`')) ||
1036 (CHECK(parser->buffer, '-') && !IS_BLANK_AT(parser->buffer, 1)) ||
1037 (!parser->flow_level &&
1038 (CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':'))
1039 && !IS_BLANKZ_AT(parser->buffer, 1)))
1040 return yaml_parser_fetch_plain_scalar(parser);
1041
1042 /*
1043 * If we don't determine the token type so far, it is an error.
1044 */
1045
1046 return yaml_parser_set_scanner_error(parser,
1047 "while scanning for the next token", parser->mark,
1048 "found character that cannot start any token");
1049 }
1050
1051 /*
1052 * Check the list of potential simple keys and remove the positions that
1053 * cannot contain simple keys anymore.
1054 */
1055
1056 static int
yaml_parser_stale_simple_keys(yaml_parser_t * parser)1057 yaml_parser_stale_simple_keys(yaml_parser_t *parser)
1058 {
1059 yaml_simple_key_t *simple_key;
1060
1061 /* Check for a potential simple key for each flow level. */
1062
1063 for (simple_key = parser->simple_keys.start;
1064 simple_key != parser->simple_keys.top; simple_key ++)
1065 {
1066 /*
1067 * The specification requires that a simple key
1068 *
1069 * - is limited to a single line,
1070 * - is shorter than 1024 characters.
1071 */
1072
1073 if (simple_key->possible
1074 && (simple_key->mark.line < parser->mark.line
1075 || simple_key->mark.index+1024 < parser->mark.index)) {
1076
1077 /* Check if the potential simple key to be removed is required. */
1078
1079 if (simple_key->required) {
1080 return yaml_parser_set_scanner_error(parser,
1081 "while scanning a simple key", simple_key->mark,
1082 "could not find expected ':'");
1083 }
1084
1085 simple_key->possible = 0;
1086 }
1087 }
1088
1089 return 1;
1090 }
1091
1092 /*
1093 * Check if a simple key may start at the current position and add it if
1094 * needed.
1095 */
1096
1097 static int
yaml_parser_save_simple_key(yaml_parser_t * parser)1098 yaml_parser_save_simple_key(yaml_parser_t *parser)
1099 {
1100 /*
1101 * A simple key is required at the current position if the scanner is in
1102 * the block context and the current column coincides with the indentation
1103 * level.
1104 */
1105
1106 int required = (!parser->flow_level
1107 && parser->indent == (ptrdiff_t)parser->mark.column);
1108
1109 /*
1110 * If the current position may start a simple key, save it.
1111 */
1112
1113 if (parser->simple_key_allowed)
1114 {
1115 yaml_simple_key_t simple_key;
1116 simple_key.possible = 1;
1117 simple_key.required = required;
1118 simple_key.token_number =
1119 parser->tokens_parsed + (parser->tokens.tail - parser->tokens.head);
1120 simple_key.mark = parser->mark;
1121
1122 if (!yaml_parser_remove_simple_key(parser)) return 0;
1123
1124 *(parser->simple_keys.top-1) = simple_key;
1125 }
1126
1127 return 1;
1128 }
1129
1130 /*
1131 * Remove a potential simple key at the current flow level.
1132 */
1133
1134 static int
yaml_parser_remove_simple_key(yaml_parser_t * parser)1135 yaml_parser_remove_simple_key(yaml_parser_t *parser)
1136 {
1137 yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
1138
1139 if (simple_key->possible)
1140 {
1141 /* If the key is required, it is an error. */
1142
1143 if (simple_key->required) {
1144 return yaml_parser_set_scanner_error(parser,
1145 "while scanning a simple key", simple_key->mark,
1146 "could not find expected ':'");
1147 }
1148 }
1149
1150 /* Remove the key from the stack. */
1151
1152 simple_key->possible = 0;
1153
1154 return 1;
1155 }
1156
1157 /*
1158 * Increase the flow level and resize the simple key list if needed.
1159 */
1160
1161 static int
yaml_parser_increase_flow_level(yaml_parser_t * parser)1162 yaml_parser_increase_flow_level(yaml_parser_t *parser)
1163 {
1164 yaml_simple_key_t empty_simple_key = { 0, 0, 0, { 0, 0, 0 } };
1165
1166 /* Reset the simple key on the next level. */
1167
1168 if (!PUSH(parser, parser->simple_keys, empty_simple_key))
1169 return 0;
1170
1171 /* Increase the flow level. */
1172
1173 if (parser->flow_level == INT_MAX) {
1174 parser->error = YAML_MEMORY_ERROR;
1175 return 0;
1176 }
1177
1178 parser->flow_level++;
1179
1180 return 1;
1181 }
1182
1183 /*
1184 * Decrease the flow level.
1185 */
1186
1187 static int
yaml_parser_decrease_flow_level(yaml_parser_t * parser)1188 yaml_parser_decrease_flow_level(yaml_parser_t *parser)
1189 {
1190 if (parser->flow_level) {
1191 parser->flow_level --;
1192 (void)POP(parser, parser->simple_keys);
1193 }
1194
1195 return 1;
1196 }
1197
1198 /*
1199 * Push the current indentation level to the stack and set the new level
1200 * the current column is greater than the indentation level. In this case,
1201 * append or insert the specified token into the token queue.
1202 *
1203 */
1204
1205 static int
yaml_parser_roll_indent(yaml_parser_t * parser,ptrdiff_t column,ptrdiff_t number,yaml_token_type_t type,yaml_mark_t mark)1206 yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
1207 ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark)
1208 {
1209 yaml_token_t token;
1210
1211 /* In the flow context, do nothing. */
1212
1213 if (parser->flow_level)
1214 return 1;
1215
1216 if (parser->indent < column)
1217 {
1218 /*
1219 * Push the current indentation level to the stack and set the new
1220 * indentation level.
1221 */
1222
1223 if (!PUSH(parser, parser->indents, parser->indent))
1224 return 0;
1225
1226 if (column > INT_MAX) {
1227 parser->error = YAML_MEMORY_ERROR;
1228 return 0;
1229 }
1230
1231 parser->indent = column;
1232
1233 /* Create a token and insert it into the queue. */
1234
1235 TOKEN_INIT(token, type, mark, mark);
1236
1237 if (number == -1) {
1238 if (!ENQUEUE(parser, parser->tokens, token))
1239 return 0;
1240 }
1241 else {
1242 if (!QUEUE_INSERT(parser,
1243 parser->tokens, number - parser->tokens_parsed, token))
1244 return 0;
1245 }
1246 }
1247
1248 return 1;
1249 }
1250
1251 /*
1252 * Pop indentation levels from the indents stack until the current level
1253 * becomes less or equal to the column. For each indentation level, append
1254 * the BLOCK-END token.
1255 */
1256
1257
1258 static int
yaml_parser_unroll_indent(yaml_parser_t * parser,ptrdiff_t column)1259 yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column)
1260 {
1261 yaml_token_t token;
1262
1263 /* In the flow context, do nothing. */
1264
1265 if (parser->flow_level)
1266 return 1;
1267
1268 /* Loop through the indentation levels in the stack. */
1269
1270 while (parser->indent > column)
1271 {
1272 /* Create a token and append it to the queue. */
1273
1274 TOKEN_INIT(token, YAML_BLOCK_END_TOKEN, parser->mark, parser->mark);
1275
1276 if (!ENQUEUE(parser, parser->tokens, token))
1277 return 0;
1278
1279 /* Pop the indentation level. */
1280
1281 parser->indent = POP(parser, parser->indents);
1282 }
1283
1284 return 1;
1285 }
1286
1287 /*
1288 * Initialize the scanner and produce the STREAM-START token.
1289 */
1290
1291 static int
yaml_parser_fetch_stream_start(yaml_parser_t * parser)1292 yaml_parser_fetch_stream_start(yaml_parser_t *parser)
1293 {
1294 yaml_simple_key_t simple_key = { 0, 0, 0, { 0, 0, 0 } };
1295 yaml_token_t token;
1296
1297 /* Set the initial indentation. */
1298
1299 parser->indent = -1;
1300
1301 /* Initialize the simple key stack. */
1302
1303 if (!PUSH(parser, parser->simple_keys, simple_key))
1304 return 0;
1305
1306 /* A simple key is allowed at the beginning of the stream. */
1307
1308 parser->simple_key_allowed = 1;
1309
1310 /* We have started. */
1311
1312 parser->stream_start_produced = 1;
1313
1314 /* Create the STREAM-START token and append it to the queue. */
1315
1316 STREAM_START_TOKEN_INIT(token, parser->encoding,
1317 parser->mark, parser->mark);
1318
1319 if (!ENQUEUE(parser, parser->tokens, token))
1320 return 0;
1321
1322 return 1;
1323 }
1324
1325 /*
1326 * Produce the STREAM-END token and shut down the scanner.
1327 */
1328
1329 static int
yaml_parser_fetch_stream_end(yaml_parser_t * parser)1330 yaml_parser_fetch_stream_end(yaml_parser_t *parser)
1331 {
1332 yaml_token_t token;
1333
1334 /* Force new line. */
1335
1336 if (parser->mark.column != 0) {
1337 parser->mark.column = 0;
1338 parser->mark.line ++;
1339 }
1340
1341 /* Reset the indentation level. */
1342
1343 if (!yaml_parser_unroll_indent(parser, -1))
1344 return 0;
1345
1346 /* Reset simple keys. */
1347
1348 if (!yaml_parser_remove_simple_key(parser))
1349 return 0;
1350
1351 parser->simple_key_allowed = 0;
1352
1353 /* Create the STREAM-END token and append it to the queue. */
1354
1355 STREAM_END_TOKEN_INIT(token, parser->mark, parser->mark);
1356
1357 if (!ENQUEUE(parser, parser->tokens, token))
1358 return 0;
1359
1360 return 1;
1361 }
1362
1363 /*
1364 * Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
1365 */
1366
1367 static int
yaml_parser_fetch_directive(yaml_parser_t * parser)1368 yaml_parser_fetch_directive(yaml_parser_t *parser)
1369 {
1370 yaml_token_t token;
1371
1372 /* Reset the indentation level. */
1373
1374 if (!yaml_parser_unroll_indent(parser, -1))
1375 return 0;
1376
1377 /* Reset simple keys. */
1378
1379 if (!yaml_parser_remove_simple_key(parser))
1380 return 0;
1381
1382 parser->simple_key_allowed = 0;
1383
1384 /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */
1385
1386 if (!yaml_parser_scan_directive(parser, &token))
1387 return 0;
1388
1389 /* Append the token to the queue. */
1390
1391 if (!ENQUEUE(parser, parser->tokens, token)) {
1392 yaml_token_delete(&token);
1393 return 0;
1394 }
1395
1396 return 1;
1397 }
1398
1399 /*
1400 * Produce the DOCUMENT-START or DOCUMENT-END token.
1401 */
1402
1403 static int
yaml_parser_fetch_document_indicator(yaml_parser_t * parser,yaml_token_type_t type)1404 yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
1405 yaml_token_type_t type)
1406 {
1407 yaml_mark_t start_mark, end_mark;
1408 yaml_token_t token;
1409
1410 /* Reset the indentation level. */
1411
1412 if (!yaml_parser_unroll_indent(parser, -1))
1413 return 0;
1414
1415 /* Reset simple keys. */
1416
1417 if (!yaml_parser_remove_simple_key(parser))
1418 return 0;
1419
1420 parser->simple_key_allowed = 0;
1421
1422 /* Consume the token. */
1423
1424 start_mark = parser->mark;
1425
1426 SKIP(parser);
1427 SKIP(parser);
1428 SKIP(parser);
1429
1430 end_mark = parser->mark;
1431
1432 /* Create the DOCUMENT-START or DOCUMENT-END token. */
1433
1434 TOKEN_INIT(token, type, start_mark, end_mark);
1435
1436 /* Append the token to the queue. */
1437
1438 if (!ENQUEUE(parser, parser->tokens, token))
1439 return 0;
1440
1441 return 1;
1442 }
1443
1444 /*
1445 * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
1446 */
1447
1448 static int
yaml_parser_fetch_flow_collection_start(yaml_parser_t * parser,yaml_token_type_t type)1449 yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
1450 yaml_token_type_t type)
1451 {
1452 yaml_mark_t start_mark, end_mark;
1453 yaml_token_t token;
1454
1455 /* The indicators '[' and '{' may start a simple key. */
1456
1457 if (!yaml_parser_save_simple_key(parser))
1458 return 0;
1459
1460 /* Increase the flow level. */
1461
1462 if (!yaml_parser_increase_flow_level(parser))
1463 return 0;
1464
1465 /* A simple key may follow the indicators '[' and '{'. */
1466
1467 parser->simple_key_allowed = 1;
1468
1469 /* Consume the token. */
1470
1471 start_mark = parser->mark;
1472 SKIP(parser);
1473 end_mark = parser->mark;
1474
1475 /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */
1476
1477 TOKEN_INIT(token, type, start_mark, end_mark);
1478
1479 /* Append the token to the queue. */
1480
1481 if (!ENQUEUE(parser, parser->tokens, token))
1482 return 0;
1483
1484 return 1;
1485 }
1486
1487 /*
1488 * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
1489 */
1490
1491 static int
yaml_parser_fetch_flow_collection_end(yaml_parser_t * parser,yaml_token_type_t type)1492 yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
1493 yaml_token_type_t type)
1494 {
1495 yaml_mark_t start_mark, end_mark;
1496 yaml_token_t token;
1497
1498 /* Reset any potential simple key on the current flow level. */
1499
1500 if (!yaml_parser_remove_simple_key(parser))
1501 return 0;
1502
1503 /* Decrease the flow level. */
1504
1505 if (!yaml_parser_decrease_flow_level(parser))
1506 return 0;
1507
1508 /* No simple keys after the indicators ']' and '}'. */
1509
1510 parser->simple_key_allowed = 0;
1511
1512 /* Consume the token. */
1513
1514 start_mark = parser->mark;
1515 SKIP(parser);
1516 end_mark = parser->mark;
1517
1518 /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */
1519
1520 TOKEN_INIT(token, type, start_mark, end_mark);
1521
1522 /* Append the token to the queue. */
1523
1524 if (!ENQUEUE(parser, parser->tokens, token))
1525 return 0;
1526
1527 return 1;
1528 }
1529
1530 /*
1531 * Produce the FLOW-ENTRY token.
1532 */
1533
1534 static int
yaml_parser_fetch_flow_entry(yaml_parser_t * parser)1535 yaml_parser_fetch_flow_entry(yaml_parser_t *parser)
1536 {
1537 yaml_mark_t start_mark, end_mark;
1538 yaml_token_t token;
1539
1540 /* Reset any potential simple keys on the current flow level. */
1541
1542 if (!yaml_parser_remove_simple_key(parser))
1543 return 0;
1544
1545 /* Simple keys are allowed after ','. */
1546
1547 parser->simple_key_allowed = 1;
1548
1549 /* Consume the token. */
1550
1551 start_mark = parser->mark;
1552 SKIP(parser);
1553 end_mark = parser->mark;
1554
1555 /* Create the FLOW-ENTRY token and append it to the queue. */
1556
1557 TOKEN_INIT(token, YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark);
1558
1559 if (!ENQUEUE(parser, parser->tokens, token))
1560 return 0;
1561
1562 return 1;
1563 }
1564
1565 /*
1566 * Produce the BLOCK-ENTRY token.
1567 */
1568
1569 static int
yaml_parser_fetch_block_entry(yaml_parser_t * parser)1570 yaml_parser_fetch_block_entry(yaml_parser_t *parser)
1571 {
1572 yaml_mark_t start_mark, end_mark;
1573 yaml_token_t token;
1574
1575 /* Check if the scanner is in the block context. */
1576
1577 if (!parser->flow_level)
1578 {
1579 /* Check if we are allowed to start a new entry. */
1580
1581 if (!parser->simple_key_allowed) {
1582 return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1583 "block sequence entries are not allowed in this context");
1584 }
1585
1586 /* Add the BLOCK-SEQUENCE-START token if needed. */
1587
1588 if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1589 YAML_BLOCK_SEQUENCE_START_TOKEN, parser->mark))
1590 return 0;
1591 }
1592 else
1593 {
1594 /*
1595 * It is an error for the '-' indicator to occur in the flow context,
1596 * but we let the Parser detect and report about it because the Parser
1597 * is able to point to the context.
1598 */
1599 }
1600
1601 /* Reset any potential simple keys on the current flow level. */
1602
1603 if (!yaml_parser_remove_simple_key(parser))
1604 return 0;
1605
1606 /* Simple keys are allowed after '-'. */
1607
1608 parser->simple_key_allowed = 1;
1609
1610 /* Consume the token. */
1611
1612 start_mark = parser->mark;
1613 SKIP(parser);
1614 end_mark = parser->mark;
1615
1616 /* Create the BLOCK-ENTRY token and append it to the queue. */
1617
1618 TOKEN_INIT(token, YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark);
1619
1620 if (!ENQUEUE(parser, parser->tokens, token))
1621 return 0;
1622
1623 return 1;
1624 }
1625
1626 /*
1627 * Produce the KEY token.
1628 */
1629
1630 static int
yaml_parser_fetch_key(yaml_parser_t * parser)1631 yaml_parser_fetch_key(yaml_parser_t *parser)
1632 {
1633 yaml_mark_t start_mark, end_mark;
1634 yaml_token_t token;
1635
1636 /* In the block context, additional checks are required. */
1637
1638 if (!parser->flow_level)
1639 {
1640 /* Check if we are allowed to start a new key (not necessary simple). */
1641
1642 if (!parser->simple_key_allowed) {
1643 return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1644 "mapping keys are not allowed in this context");
1645 }
1646
1647 /* Add the BLOCK-MAPPING-START token if needed. */
1648
1649 if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1650 YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
1651 return 0;
1652 }
1653
1654 /* Reset any potential simple keys on the current flow level. */
1655
1656 if (!yaml_parser_remove_simple_key(parser))
1657 return 0;
1658
1659 /* Simple keys are allowed after '?' in the block context. */
1660
1661 parser->simple_key_allowed = (!parser->flow_level);
1662
1663 /* Consume the token. */
1664
1665 start_mark = parser->mark;
1666 SKIP(parser);
1667 end_mark = parser->mark;
1668
1669 /* Create the KEY token and append it to the queue. */
1670
1671 TOKEN_INIT(token, YAML_KEY_TOKEN, start_mark, end_mark);
1672
1673 if (!ENQUEUE(parser, parser->tokens, token))
1674 return 0;
1675
1676 return 1;
1677 }
1678
1679 /*
1680 * Produce the VALUE token.
1681 */
1682
1683 static int
yaml_parser_fetch_value(yaml_parser_t * parser)1684 yaml_parser_fetch_value(yaml_parser_t *parser)
1685 {
1686 yaml_mark_t start_mark, end_mark;
1687 yaml_token_t token;
1688 yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
1689
1690 /* Have we found a simple key? */
1691
1692 if (simple_key->possible)
1693 {
1694
1695 /* Create the KEY token and insert it into the queue. */
1696
1697 TOKEN_INIT(token, YAML_KEY_TOKEN, simple_key->mark, simple_key->mark);
1698
1699 if (!QUEUE_INSERT(parser, parser->tokens,
1700 simple_key->token_number - parser->tokens_parsed, token))
1701 return 0;
1702
1703 /* In the block context, we may need to add the BLOCK-MAPPING-START token. */
1704
1705 if (!yaml_parser_roll_indent(parser, simple_key->mark.column,
1706 simple_key->token_number,
1707 YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark))
1708 return 0;
1709
1710 /* Remove the simple key. */
1711
1712 simple_key->possible = 0;
1713
1714 /* A simple key cannot follow another simple key. */
1715
1716 parser->simple_key_allowed = 0;
1717 }
1718 else
1719 {
1720 /* The ':' indicator follows a complex key. */
1721
1722 /* In the block context, extra checks are required. */
1723
1724 if (!parser->flow_level)
1725 {
1726 /* Check if we are allowed to start a complex value. */
1727
1728 if (!parser->simple_key_allowed) {
1729 return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1730 "mapping values are not allowed in this context");
1731 }
1732
1733 /* Add the BLOCK-MAPPING-START token if needed. */
1734
1735 if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1736 YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
1737 return 0;
1738 }
1739
1740 /* Simple keys after ':' are allowed in the block context. */
1741
1742 parser->simple_key_allowed = (!parser->flow_level);
1743 }
1744
1745 /* Consume the token. */
1746
1747 start_mark = parser->mark;
1748 SKIP(parser);
1749 end_mark = parser->mark;
1750
1751 /* Create the VALUE token and append it to the queue. */
1752
1753 TOKEN_INIT(token, YAML_VALUE_TOKEN, start_mark, end_mark);
1754
1755 if (!ENQUEUE(parser, parser->tokens, token))
1756 return 0;
1757
1758 return 1;
1759 }
1760
1761 /*
1762 * Produce the ALIAS or ANCHOR token.
1763 */
1764
1765 static int
yaml_parser_fetch_anchor(yaml_parser_t * parser,yaml_token_type_t type)1766 yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type)
1767 {
1768 yaml_token_t token;
1769
1770 /* An anchor or an alias could be a simple key. */
1771
1772 if (!yaml_parser_save_simple_key(parser))
1773 return 0;
1774
1775 /* A simple key cannot follow an anchor or an alias. */
1776
1777 parser->simple_key_allowed = 0;
1778
1779 /* Create the ALIAS or ANCHOR token and append it to the queue. */
1780
1781 if (!yaml_parser_scan_anchor(parser, &token, type))
1782 return 0;
1783
1784 if (!ENQUEUE(parser, parser->tokens, token)) {
1785 yaml_token_delete(&token);
1786 return 0;
1787 }
1788 return 1;
1789 }
1790
1791 /*
1792 * Produce the TAG token.
1793 */
1794
1795 static int
yaml_parser_fetch_tag(yaml_parser_t * parser)1796 yaml_parser_fetch_tag(yaml_parser_t *parser)
1797 {
1798 yaml_token_t token;
1799
1800 /* A tag could be a simple key. */
1801
1802 if (!yaml_parser_save_simple_key(parser))
1803 return 0;
1804
1805 /* A simple key cannot follow a tag. */
1806
1807 parser->simple_key_allowed = 0;
1808
1809 /* Create the TAG token and append it to the queue. */
1810
1811 if (!yaml_parser_scan_tag(parser, &token))
1812 return 0;
1813
1814 if (!ENQUEUE(parser, parser->tokens, token)) {
1815 yaml_token_delete(&token);
1816 return 0;
1817 }
1818
1819 return 1;
1820 }
1821
1822 /*
1823 * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
1824 */
1825
1826 static int
yaml_parser_fetch_block_scalar(yaml_parser_t * parser,int literal)1827 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal)
1828 {
1829 yaml_token_t token;
1830
1831 /* Remove any potential simple keys. */
1832
1833 if (!yaml_parser_remove_simple_key(parser))
1834 return 0;
1835
1836 /* A simple key may follow a block scalar. */
1837
1838 parser->simple_key_allowed = 1;
1839
1840 /* Create the SCALAR token and append it to the queue. */
1841
1842 if (!yaml_parser_scan_block_scalar(parser, &token, literal))
1843 return 0;
1844
1845 if (!ENQUEUE(parser, parser->tokens, token)) {
1846 yaml_token_delete(&token);
1847 return 0;
1848 }
1849
1850 return 1;
1851 }
1852
1853 /*
1854 * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
1855 */
1856
1857 static int
yaml_parser_fetch_flow_scalar(yaml_parser_t * parser,int single)1858 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single)
1859 {
1860 yaml_token_t token;
1861
1862 /* A plain scalar could be a simple key. */
1863
1864 if (!yaml_parser_save_simple_key(parser))
1865 return 0;
1866
1867 /* A simple key cannot follow a flow scalar. */
1868
1869 parser->simple_key_allowed = 0;
1870
1871 /* Create the SCALAR token and append it to the queue. */
1872
1873 if (!yaml_parser_scan_flow_scalar(parser, &token, single))
1874 return 0;
1875
1876 if (!ENQUEUE(parser, parser->tokens, token)) {
1877 yaml_token_delete(&token);
1878 return 0;
1879 }
1880
1881 return 1;
1882 }
1883
1884 /*
1885 * Produce the SCALAR(...,plain) token.
1886 */
1887
1888 static int
yaml_parser_fetch_plain_scalar(yaml_parser_t * parser)1889 yaml_parser_fetch_plain_scalar(yaml_parser_t *parser)
1890 {
1891 yaml_token_t token;
1892
1893 /* A plain scalar could be a simple key. */
1894
1895 if (!yaml_parser_save_simple_key(parser))
1896 return 0;
1897
1898 /* A simple key cannot follow a flow scalar. */
1899
1900 parser->simple_key_allowed = 0;
1901
1902 /* Create the SCALAR token and append it to the queue. */
1903
1904 if (!yaml_parser_scan_plain_scalar(parser, &token))
1905 return 0;
1906
1907 if (!ENQUEUE(parser, parser->tokens, token)) {
1908 yaml_token_delete(&token);
1909 return 0;
1910 }
1911
1912 return 1;
1913 }
1914
1915 /*
1916 * Eat whitespaces and comments until the next token is found.
1917 */
1918
1919 static int
yaml_parser_scan_to_next_token(yaml_parser_t * parser)1920 yaml_parser_scan_to_next_token(yaml_parser_t *parser)
1921 {
1922 /* Until the next token is not found. */
1923
1924 while (1)
1925 {
1926 /* Allow the BOM mark to start a line. */
1927
1928 if (!CACHE(parser, 1)) return 0;
1929
1930 if (parser->mark.column == 0 && IS_BOM(parser->buffer))
1931 SKIP(parser);
1932
1933 /*
1934 * Eat whitespaces.
1935 *
1936 * Tabs are allowed:
1937 *
1938 * - in the flow context;
1939 * - in the block context, but not at the beginning of the line or
1940 * after '-', '?', or ':' (complex value).
1941 */
1942
1943 if (!CACHE(parser, 1)) return 0;
1944
1945 while (CHECK(parser->buffer,' ') ||
1946 ((parser->flow_level || !parser->simple_key_allowed) &&
1947 CHECK(parser->buffer, '\t'))) {
1948 SKIP(parser);
1949 if (!CACHE(parser, 1)) return 0;
1950 }
1951
1952 /* Eat a comment until a line break. */
1953
1954 if (CHECK(parser->buffer, '#')) {
1955 while (!IS_BREAKZ(parser->buffer)) {
1956 SKIP(parser);
1957 if (!CACHE(parser, 1)) return 0;
1958 }
1959 }
1960
1961 /* If it is a line break, eat it. */
1962
1963 if (IS_BREAK(parser->buffer))
1964 {
1965 if (!CACHE(parser, 2)) return 0;
1966 SKIP_LINE(parser);
1967
1968 /* In the block context, a new line may start a simple key. */
1969
1970 if (!parser->flow_level) {
1971 parser->simple_key_allowed = 1;
1972 }
1973 }
1974 else
1975 {
1976 /* We have found a token. */
1977
1978 break;
1979 }
1980 }
1981
1982 return 1;
1983 }
1984
1985 /*
1986 * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
1987 *
1988 * Scope:
1989 * %YAML 1.1 # a comment \n
1990 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1991 * %TAG !yaml! tag:yaml.org,2002: \n
1992 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1993 */
1994
1995 int
yaml_parser_scan_directive(yaml_parser_t * parser,yaml_token_t * token)1996 yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token)
1997 {
1998 yaml_mark_t start_mark, end_mark;
1999 yaml_char_t *name = NULL;
2000 int major, minor;
2001 yaml_char_t *handle = NULL, *prefix = NULL;
2002
2003 /* Eat '%'. */
2004
2005 start_mark = parser->mark;
2006
2007 SKIP(parser);
2008
2009 /* Scan the directive name. */
2010
2011 if (!yaml_parser_scan_directive_name(parser, start_mark, &name))
2012 goto error;
2013
2014 /* Is it a YAML directive? */
2015
2016 if (strcmp((char *)name, "YAML") == 0)
2017 {
2018 /* Scan the VERSION directive value. */
2019
2020 if (!yaml_parser_scan_version_directive_value(parser, start_mark,
2021 &major, &minor))
2022 goto error;
2023
2024 end_mark = parser->mark;
2025
2026 /* Create a VERSION-DIRECTIVE token. */
2027
2028 VERSION_DIRECTIVE_TOKEN_INIT(*token, major, minor,
2029 start_mark, end_mark);
2030 }
2031
2032 /* Is it a TAG directive? */
2033
2034 else if (strcmp((char *)name, "TAG") == 0)
2035 {
2036 /* Scan the TAG directive value. */
2037
2038 if (!yaml_parser_scan_tag_directive_value(parser, start_mark,
2039 &handle, &prefix))
2040 goto error;
2041
2042 end_mark = parser->mark;
2043
2044 /* Create a TAG-DIRECTIVE token. */
2045
2046 TAG_DIRECTIVE_TOKEN_INIT(*token, handle, prefix,
2047 start_mark, end_mark);
2048 }
2049
2050 /* Unknown directive. */
2051
2052 else
2053 {
2054 yaml_parser_set_scanner_error(parser, "while scanning a directive",
2055 start_mark, "found unknown directive name");
2056 goto error;
2057 }
2058
2059 /* Eat the rest of the line including any comments. */
2060
2061 if (!CACHE(parser, 1)) goto error;
2062
2063 while (IS_BLANK(parser->buffer)) {
2064 SKIP(parser);
2065 if (!CACHE(parser, 1)) goto error;
2066 }
2067
2068 if (CHECK(parser->buffer, '#')) {
2069 while (!IS_BREAKZ(parser->buffer)) {
2070 SKIP(parser);
2071 if (!CACHE(parser, 1)) goto error;
2072 }
2073 }
2074
2075 /* Check if we are at the end of the line. */
2076
2077 if (!IS_BREAKZ(parser->buffer)) {
2078 yaml_parser_set_scanner_error(parser, "while scanning a directive",
2079 start_mark, "did not find expected comment or line break");
2080 goto error;
2081 }
2082
2083 /* Eat a line break. */
2084
2085 if (IS_BREAK(parser->buffer)) {
2086 if (!CACHE(parser, 2)) goto error;
2087 SKIP_LINE(parser);
2088 }
2089
2090 yaml_free(name);
2091
2092 return 1;
2093
2094 error:
2095 yaml_free(prefix);
2096 yaml_free(handle);
2097 yaml_free(name);
2098 return 0;
2099 }
2100
2101 /*
2102 * Scan the directive name.
2103 *
2104 * Scope:
2105 * %YAML 1.1 # a comment \n
2106 * ^^^^
2107 * %TAG !yaml! tag:yaml.org,2002: \n
2108 * ^^^
2109 */
2110
2111 static int
yaml_parser_scan_directive_name(yaml_parser_t * parser,yaml_mark_t start_mark,yaml_char_t ** name)2112 yaml_parser_scan_directive_name(yaml_parser_t *parser,
2113 yaml_mark_t start_mark, yaml_char_t **name)
2114 {
2115 yaml_string_t string = NULL_STRING;
2116
2117 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2118
2119 /* Consume the directive name. */
2120
2121 if (!CACHE(parser, 1)) goto error;
2122
2123 while (IS_ALPHA(parser->buffer))
2124 {
2125 if (!READ(parser, string)) goto error;
2126 if (!CACHE(parser, 1)) goto error;
2127 }
2128
2129 /* Check if the name is empty. */
2130
2131 if (string.start == string.pointer) {
2132 yaml_parser_set_scanner_error(parser, "while scanning a directive",
2133 start_mark, "could not find expected directive name");
2134 goto error;
2135 }
2136
2137 /* Check for an blank character after the name. */
2138
2139 if (!IS_BLANKZ(parser->buffer)) {
2140 yaml_parser_set_scanner_error(parser, "while scanning a directive",
2141 start_mark, "found unexpected non-alphabetical character");
2142 goto error;
2143 }
2144
2145 *name = string.start;
2146
2147 return 1;
2148
2149 error:
2150 STRING_DEL(parser, string);
2151 return 0;
2152 }
2153
2154 /*
2155 * Scan the value of VERSION-DIRECTIVE.
2156 *
2157 * Scope:
2158 * %YAML 1.1 # a comment \n
2159 * ^^^^^^
2160 */
2161
2162 static int
yaml_parser_scan_version_directive_value(yaml_parser_t * parser,yaml_mark_t start_mark,int * major,int * minor)2163 yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
2164 yaml_mark_t start_mark, int *major, int *minor)
2165 {
2166 /* Eat whitespaces. */
2167
2168 if (!CACHE(parser, 1)) return 0;
2169
2170 while (IS_BLANK(parser->buffer)) {
2171 SKIP(parser);
2172 if (!CACHE(parser, 1)) return 0;
2173 }
2174
2175 /* Consume the major version number. */
2176
2177 if (!yaml_parser_scan_version_directive_number(parser, start_mark, major))
2178 return 0;
2179
2180 /* Eat '.'. */
2181
2182 if (!CHECK(parser->buffer, '.')) {
2183 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2184 start_mark, "did not find expected digit or '.' character");
2185 }
2186
2187 SKIP(parser);
2188
2189 /* Consume the minor version number. */
2190
2191 if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor))
2192 return 0;
2193
2194 return 1;
2195 }
2196
2197 #define MAX_NUMBER_LENGTH 9
2198
2199 /*
2200 * Scan the version number of VERSION-DIRECTIVE.
2201 *
2202 * Scope:
2203 * %YAML 1.1 # a comment \n
2204 * ^
2205 * %YAML 1.1 # a comment \n
2206 * ^
2207 */
2208
2209 static int
yaml_parser_scan_version_directive_number(yaml_parser_t * parser,yaml_mark_t start_mark,int * number)2210 yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
2211 yaml_mark_t start_mark, int *number)
2212 {
2213 int value = 0;
2214 size_t length = 0;
2215
2216 /* Repeat while the next character is digit. */
2217
2218 if (!CACHE(parser, 1)) return 0;
2219
2220 while (IS_DIGIT(parser->buffer))
2221 {
2222 /* Check if the number is too long. */
2223
2224 if (++length > MAX_NUMBER_LENGTH) {
2225 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2226 start_mark, "found extremely long version number");
2227 }
2228
2229 value = value*10 + AS_DIGIT(parser->buffer);
2230
2231 SKIP(parser);
2232
2233 if (!CACHE(parser, 1)) return 0;
2234 }
2235
2236 /* Check if the number was present. */
2237
2238 if (!length) {
2239 return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2240 start_mark, "did not find expected version number");
2241 }
2242
2243 *number = value;
2244
2245 return 1;
2246 }
2247
2248 /*
2249 * Scan the value of a TAG-DIRECTIVE token.
2250 *
2251 * Scope:
2252 * %TAG !yaml! tag:yaml.org,2002: \n
2253 * ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2254 */
2255
2256 static int
yaml_parser_scan_tag_directive_value(yaml_parser_t * parser,yaml_mark_t start_mark,yaml_char_t ** handle,yaml_char_t ** prefix)2257 yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
2258 yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix)
2259 {
2260 yaml_char_t *handle_value = NULL;
2261 yaml_char_t *prefix_value = NULL;
2262
2263 /* Eat whitespaces. */
2264
2265 if (!CACHE(parser, 1)) goto error;
2266
2267 while (IS_BLANK(parser->buffer)) {
2268 SKIP(parser);
2269 if (!CACHE(parser, 1)) goto error;
2270 }
2271
2272 /* Scan a handle. */
2273
2274 if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value))
2275 goto error;
2276
2277 /* Expect a whitespace. */
2278
2279 if (!CACHE(parser, 1)) goto error;
2280
2281 if (!IS_BLANK(parser->buffer)) {
2282 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2283 start_mark, "did not find expected whitespace");
2284 goto error;
2285 }
2286
2287 /* Eat whitespaces. */
2288
2289 while (IS_BLANK(parser->buffer)) {
2290 SKIP(parser);
2291 if (!CACHE(parser, 1)) goto error;
2292 }
2293
2294 /* Scan a prefix. */
2295
2296 if (!yaml_parser_scan_tag_uri(parser, 1, 1, NULL, start_mark, &prefix_value))
2297 goto error;
2298
2299 /* Expect a whitespace or line break. */
2300
2301 if (!CACHE(parser, 1)) goto error;
2302
2303 if (!IS_BLANKZ(parser->buffer)) {
2304 yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2305 start_mark, "did not find expected whitespace or line break");
2306 goto error;
2307 }
2308
2309 *handle = handle_value;
2310 *prefix = prefix_value;
2311
2312 return 1;
2313
2314 error:
2315 yaml_free(handle_value);
2316 yaml_free(prefix_value);
2317 return 0;
2318 }
2319
2320 static int
yaml_parser_scan_anchor(yaml_parser_t * parser,yaml_token_t * token,yaml_token_type_t type)2321 yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
2322 yaml_token_type_t type)
2323 {
2324 int length = 0;
2325 yaml_mark_t start_mark, end_mark;
2326 yaml_string_t string = NULL_STRING;
2327
2328 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2329
2330 /* Eat the indicator character. */
2331
2332 start_mark = parser->mark;
2333
2334 SKIP(parser);
2335
2336 /* Consume the value. */
2337
2338 if (!CACHE(parser, 1)) goto error;
2339
2340 while (IS_ALPHA(parser->buffer)) {
2341 if (!READ(parser, string)) goto error;
2342 if (!CACHE(parser, 1)) goto error;
2343 length ++;
2344 }
2345
2346 end_mark = parser->mark;
2347
2348 /*
2349 * Check if length of the anchor is greater than 0 and it is followed by
2350 * a whitespace character or one of the indicators:
2351 *
2352 * '?', ':', ',', ']', '}', '%', '@', '`'.
2353 */
2354
2355 if (!length || !(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '?')
2356 || CHECK(parser->buffer, ':') || CHECK(parser->buffer, ',')
2357 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '}')
2358 || CHECK(parser->buffer, '%') || CHECK(parser->buffer, '@')
2359 || CHECK(parser->buffer, '`'))) {
2360 yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ?
2361 "while scanning an anchor" : "while scanning an alias", start_mark,
2362 "did not find expected alphabetic or numeric character");
2363 goto error;
2364 }
2365
2366 /* Create a token. */
2367
2368 if (type == YAML_ANCHOR_TOKEN) {
2369 ANCHOR_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2370 }
2371 else {
2372 ALIAS_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2373 }
2374
2375 return 1;
2376
2377 error:
2378 STRING_DEL(parser, string);
2379 return 0;
2380 }
2381
2382 /*
2383 * Scan a TAG token.
2384 */
2385
2386 static int
yaml_parser_scan_tag(yaml_parser_t * parser,yaml_token_t * token)2387 yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token)
2388 {
2389 yaml_char_t *handle = NULL;
2390 yaml_char_t *suffix = NULL;
2391 yaml_mark_t start_mark, end_mark;
2392
2393 start_mark = parser->mark;
2394
2395 /* Check if the tag is in the canonical form. */
2396
2397 if (!CACHE(parser, 2)) goto error;
2398
2399 if (CHECK_AT(parser->buffer, '<', 1))
2400 {
2401 /* Set the handle to '' */
2402
2403 handle = YAML_MALLOC(1);
2404 if (!handle) goto error;
2405 handle[0] = '\0';
2406
2407 /* Eat '!<' */
2408
2409 SKIP(parser);
2410 SKIP(parser);
2411
2412 /* Consume the tag value. */
2413
2414 if (!yaml_parser_scan_tag_uri(parser, 1, 0, NULL, start_mark, &suffix))
2415 goto error;
2416
2417 /* Check for '>' and eat it. */
2418
2419 if (!CHECK(parser->buffer, '>')) {
2420 yaml_parser_set_scanner_error(parser, "while scanning a tag",
2421 start_mark, "did not find the expected '>'");
2422 goto error;
2423 }
2424
2425 SKIP(parser);
2426 }
2427 else
2428 {
2429 /* The tag has either the '!suffix' or the '!handle!suffix' form. */
2430
2431 /* First, try to scan a handle. */
2432
2433 if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle))
2434 goto error;
2435
2436 /* Check if it is, indeed, handle. */
2437
2438 if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!')
2439 {
2440 /* Scan the suffix now. */
2441
2442 if (!yaml_parser_scan_tag_uri(parser, 0, 0, NULL, start_mark, &suffix))
2443 goto error;
2444 }
2445 else
2446 {
2447 /* It wasn't a handle after all. Scan the rest of the tag. */
2448
2449 if (!yaml_parser_scan_tag_uri(parser, 0, 0, handle, start_mark, &suffix))
2450 goto error;
2451
2452 /* Set the handle to '!'. */
2453
2454 yaml_free(handle);
2455 handle = YAML_MALLOC(2);
2456 if (!handle) goto error;
2457 handle[0] = '!';
2458 handle[1] = '\0';
2459
2460 /*
2461 * A special case: the '!' tag. Set the handle to '' and the
2462 * suffix to '!'.
2463 */
2464
2465 if (suffix[0] == '\0') {
2466 yaml_char_t *tmp = handle;
2467 handle = suffix;
2468 suffix = tmp;
2469 }
2470 }
2471 }
2472
2473 /* Check the character which ends the tag. */
2474
2475 if (!CACHE(parser, 1)) goto error;
2476
2477 if (!IS_BLANKZ(parser->buffer)) {
2478 if (!parser->flow_level || !CHECK(parser->buffer, ',') ) {
2479 yaml_parser_set_scanner_error(parser, "while scanning a tag",
2480 start_mark, "did not find expected whitespace or line break");
2481 goto error;
2482 }
2483 }
2484
2485 end_mark = parser->mark;
2486
2487 /* Create a token. */
2488
2489 TAG_TOKEN_INIT(*token, handle, suffix, start_mark, end_mark);
2490
2491 return 1;
2492
2493 error:
2494 yaml_free(handle);
2495 yaml_free(suffix);
2496 return 0;
2497 }
2498
2499 /*
2500 * Scan a tag handle.
2501 */
2502
2503 static int
yaml_parser_scan_tag_handle(yaml_parser_t * parser,int directive,yaml_mark_t start_mark,yaml_char_t ** handle)2504 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
2505 yaml_mark_t start_mark, yaml_char_t **handle)
2506 {
2507 yaml_string_t string = NULL_STRING;
2508
2509 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2510
2511 /* Check the initial '!' character. */
2512
2513 if (!CACHE(parser, 1)) goto error;
2514
2515 if (!CHECK(parser->buffer, '!')) {
2516 yaml_parser_set_scanner_error(parser, directive ?
2517 "while scanning a tag directive" : "while scanning a tag",
2518 start_mark, "did not find expected '!'");
2519 goto error;
2520 }
2521
2522 /* Copy the '!' character. */
2523
2524 if (!READ(parser, string)) goto error;
2525
2526 /* Copy all subsequent alphabetical and numerical characters. */
2527
2528 if (!CACHE(parser, 1)) goto error;
2529
2530 while (IS_ALPHA(parser->buffer))
2531 {
2532 if (!READ(parser, string)) goto error;
2533 if (!CACHE(parser, 1)) goto error;
2534 }
2535
2536 /* Check if the trailing character is '!' and copy it. */
2537
2538 if (CHECK(parser->buffer, '!'))
2539 {
2540 if (!READ(parser, string)) goto error;
2541 }
2542 else
2543 {
2544 /*
2545 * It's either the '!' tag or not really a tag handle. If it's a %TAG
2546 * directive, it's an error. If it's a tag token, it must be a part of
2547 * URI.
2548 */
2549
2550 if (directive && !(string.start[0] == '!' && string.start[1] == '\0')) {
2551 yaml_parser_set_scanner_error(parser, "while parsing a tag directive",
2552 start_mark, "did not find expected '!'");
2553 goto error;
2554 }
2555 }
2556
2557 *handle = string.start;
2558
2559 return 1;
2560
2561 error:
2562 STRING_DEL(parser, string);
2563 return 0;
2564 }
2565
2566 /*
2567 * Scan a tag.
2568 */
2569
2570 static int
yaml_parser_scan_tag_uri(yaml_parser_t * parser,int uri_char,int directive,yaml_char_t * head,yaml_mark_t start_mark,yaml_char_t ** uri)2571 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int uri_char, int directive,
2572 yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri)
2573 {
2574 size_t length = head ? strlen((char *)head) : 0;
2575 yaml_string_t string = NULL_STRING;
2576
2577 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2578
2579 /* Resize the string to include the head. */
2580
2581 while ((size_t)(string.end - string.start) <= length) {
2582 if (!yaml_string_extend(&string.start, &string.pointer, &string.end)) {
2583 parser->error = YAML_MEMORY_ERROR;
2584 goto error;
2585 }
2586 }
2587
2588 /*
2589 * Copy the head if needed.
2590 *
2591 * Note that we don't copy the leading '!' character.
2592 */
2593
2594 if (length > 1) {
2595 memcpy(string.start, head+1, length-1);
2596 string.pointer += length-1;
2597 }
2598
2599 /* Scan the tag. */
2600
2601 if (!CACHE(parser, 1)) goto error;
2602
2603 /*
2604 * The set of characters that may appear in URI is as follows:
2605 *
2606 * '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
2607 * '=', '+', '$', '.', '!', '~', '*', '\'', '(', ')', '%'.
2608 *
2609 * If we are inside a verbatim tag <...> (parameter uri_char is true)
2610 * then also the following flow indicators are allowed:
2611 * ',', '[', ']'
2612 */
2613
2614 while (IS_ALPHA(parser->buffer) || CHECK(parser->buffer, ';')
2615 || CHECK(parser->buffer, '/') || CHECK(parser->buffer, '?')
2616 || CHECK(parser->buffer, ':') || CHECK(parser->buffer, '@')
2617 || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '=')
2618 || CHECK(parser->buffer, '+') || CHECK(parser->buffer, '$')
2619 || CHECK(parser->buffer, '.') || CHECK(parser->buffer, '%')
2620 || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '~')
2621 || CHECK(parser->buffer, '*') || CHECK(parser->buffer, '\'')
2622 || CHECK(parser->buffer, '(') || CHECK(parser->buffer, ')')
2623 || (uri_char && (
2624 CHECK(parser->buffer, ',')
2625 || CHECK(parser->buffer, '[') || CHECK(parser->buffer, ']')
2626 )
2627 ))
2628 {
2629 /* Check if it is a URI-escape sequence. */
2630
2631 if (CHECK(parser->buffer, '%')) {
2632 if (!STRING_EXTEND(parser, string))
2633 goto error;
2634
2635 if (!yaml_parser_scan_uri_escapes(parser,
2636 directive, start_mark, &string)) goto error;
2637 }
2638 else {
2639 if (!READ(parser, string)) goto error;
2640 }
2641
2642 length ++;
2643 if (!CACHE(parser, 1)) goto error;
2644 }
2645
2646 /* Check if the tag is non-empty. */
2647
2648 if (!length) {
2649 if (!STRING_EXTEND(parser, string))
2650 goto error;
2651
2652 yaml_parser_set_scanner_error(parser, directive ?
2653 "while parsing a %TAG directive" : "while parsing a tag",
2654 start_mark, "did not find expected tag URI");
2655 goto error;
2656 }
2657
2658 *uri = string.start;
2659
2660 return 1;
2661
2662 error:
2663 STRING_DEL(parser, string);
2664 return 0;
2665 }
2666
2667 /*
2668 * Decode an URI-escape sequence corresponding to a single UTF-8 character.
2669 */
2670
2671 static int
yaml_parser_scan_uri_escapes(yaml_parser_t * parser,int directive,yaml_mark_t start_mark,yaml_string_t * string)2672 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
2673 yaml_mark_t start_mark, yaml_string_t *string)
2674 {
2675 int width = 0;
2676
2677 /* Decode the required number of characters. */
2678
2679 do {
2680
2681 unsigned char octet = 0;
2682
2683 /* Check for a URI-escaped octet. */
2684
2685 if (!CACHE(parser, 3)) return 0;
2686
2687 if (!(CHECK(parser->buffer, '%')
2688 && IS_HEX_AT(parser->buffer, 1)
2689 && IS_HEX_AT(parser->buffer, 2))) {
2690 return yaml_parser_set_scanner_error(parser, directive ?
2691 "while parsing a %TAG directive" : "while parsing a tag",
2692 start_mark, "did not find URI escaped octet");
2693 }
2694
2695 /* Get the octet. */
2696
2697 octet = (AS_HEX_AT(parser->buffer, 1) << 4) + AS_HEX_AT(parser->buffer, 2);
2698
2699 /* If it is the leading octet, determine the length of the UTF-8 sequence. */
2700
2701 if (!width)
2702 {
2703 width = (octet & 0x80) == 0x00 ? 1 :
2704 (octet & 0xE0) == 0xC0 ? 2 :
2705 (octet & 0xF0) == 0xE0 ? 3 :
2706 (octet & 0xF8) == 0xF0 ? 4 : 0;
2707 if (!width) {
2708 return yaml_parser_set_scanner_error(parser, directive ?
2709 "while parsing a %TAG directive" : "while parsing a tag",
2710 start_mark, "found an incorrect leading UTF-8 octet");
2711 }
2712 }
2713 else
2714 {
2715 /* Check if the trailing octet is correct. */
2716
2717 if ((octet & 0xC0) != 0x80) {
2718 return yaml_parser_set_scanner_error(parser, directive ?
2719 "while parsing a %TAG directive" : "while parsing a tag",
2720 start_mark, "found an incorrect trailing UTF-8 octet");
2721 }
2722 }
2723
2724 /* Copy the octet and move the pointers. */
2725
2726 *(string->pointer++) = octet;
2727 SKIP(parser);
2728 SKIP(parser);
2729 SKIP(parser);
2730
2731 } while (--width);
2732
2733 return 1;
2734 }
2735
2736 /*
2737 * Scan a block scalar.
2738 */
2739
2740 static int
yaml_parser_scan_block_scalar(yaml_parser_t * parser,yaml_token_t * token,int literal)2741 yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
2742 int literal)
2743 {
2744 yaml_mark_t start_mark;
2745 yaml_mark_t end_mark;
2746 yaml_string_t string = NULL_STRING;
2747 yaml_string_t leading_break = NULL_STRING;
2748 yaml_string_t trailing_breaks = NULL_STRING;
2749 int chomping = 0;
2750 int increment = 0;
2751 int indent = 0;
2752 int leading_blank = 0;
2753 int trailing_blank = 0;
2754
2755 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2756 if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
2757 if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
2758
2759 /* Eat the indicator '|' or '>'. */
2760
2761 start_mark = parser->mark;
2762
2763 SKIP(parser);
2764
2765 /* Scan the additional block scalar indicators. */
2766
2767 if (!CACHE(parser, 1)) goto error;
2768
2769 /* Check for a chomping indicator. */
2770
2771 if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-'))
2772 {
2773 /* Set the chomping method and eat the indicator. */
2774
2775 chomping = CHECK(parser->buffer, '+') ? +1 : -1;
2776
2777 SKIP(parser);
2778
2779 /* Check for an indentation indicator. */
2780
2781 if (!CACHE(parser, 1)) goto error;
2782
2783 if (IS_DIGIT(parser->buffer))
2784 {
2785 /* Check that the indentation is greater than 0. */
2786
2787 if (CHECK(parser->buffer, '0')) {
2788 yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2789 start_mark, "found an indentation indicator equal to 0");
2790 goto error;
2791 }
2792
2793 /* Get the indentation level and eat the indicator. */
2794
2795 increment = AS_DIGIT(parser->buffer);
2796
2797 SKIP(parser);
2798 }
2799 }
2800
2801 /* Do the same as above, but in the opposite order. */
2802
2803 else if (IS_DIGIT(parser->buffer))
2804 {
2805 if (CHECK(parser->buffer, '0')) {
2806 yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2807 start_mark, "found an indentation indicator equal to 0");
2808 goto error;
2809 }
2810
2811 increment = AS_DIGIT(parser->buffer);
2812
2813 SKIP(parser);
2814
2815 if (!CACHE(parser, 1)) goto error;
2816
2817 if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) {
2818 chomping = CHECK(parser->buffer, '+') ? +1 : -1;
2819
2820 SKIP(parser);
2821 }
2822 }
2823
2824 /* Eat whitespaces and comments to the end of the line. */
2825
2826 if (!CACHE(parser, 1)) goto error;
2827
2828 while (IS_BLANK(parser->buffer)) {
2829 SKIP(parser);
2830 if (!CACHE(parser, 1)) goto error;
2831 }
2832
2833 if (CHECK(parser->buffer, '#')) {
2834 while (!IS_BREAKZ(parser->buffer)) {
2835 SKIP(parser);
2836 if (!CACHE(parser, 1)) goto error;
2837 }
2838 }
2839
2840 /* Check if we are at the end of the line. */
2841
2842 if (!IS_BREAKZ(parser->buffer)) {
2843 yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2844 start_mark, "did not find expected comment or line break");
2845 goto error;
2846 }
2847
2848 /* Eat a line break. */
2849
2850 if (IS_BREAK(parser->buffer)) {
2851 if (!CACHE(parser, 2)) goto error;
2852 SKIP_LINE(parser);
2853 }
2854
2855 end_mark = parser->mark;
2856
2857 /* Set the indentation level if it was specified. */
2858
2859 if (increment) {
2860 indent = parser->indent >= 0 ? parser->indent+increment : increment;
2861 }
2862
2863 /* Scan the leading line breaks and determine the indentation level if needed. */
2864
2865 if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks,
2866 start_mark, &end_mark)) goto error;
2867
2868 /* Scan the block scalar content. */
2869
2870 if (!CACHE(parser, 1)) goto error;
2871
2872 while ((int)parser->mark.column == indent && !(IS_Z(parser->buffer)))
2873 {
2874 /*
2875 * We are at the beginning of a non-empty line.
2876 */
2877
2878 /* Is it a trailing whitespace? */
2879
2880 trailing_blank = IS_BLANK(parser->buffer);
2881
2882 /* Check if we need to fold the leading line break. */
2883
2884 if (!literal && (*leading_break.start == '\n')
2885 && !leading_blank && !trailing_blank)
2886 {
2887 /* Do we need to join the lines by space? */
2888
2889 if (*trailing_breaks.start == '\0') {
2890 if (!STRING_EXTEND(parser, string)) goto error;
2891 *(string.pointer ++) = ' ';
2892 }
2893
2894 CLEAR(parser, leading_break);
2895 }
2896 else {
2897 if (!JOIN(parser, string, leading_break)) goto error;
2898 CLEAR(parser, leading_break);
2899 }
2900
2901 /* Append the remaining line breaks. */
2902
2903 if (!JOIN(parser, string, trailing_breaks)) goto error;
2904 CLEAR(parser, trailing_breaks);
2905
2906 /* Is it a leading whitespace? */
2907
2908 leading_blank = IS_BLANK(parser->buffer);
2909
2910 /* Consume the current line. */
2911
2912 while (!IS_BREAKZ(parser->buffer)) {
2913 if (!READ(parser, string)) goto error;
2914 if (!CACHE(parser, 1)) goto error;
2915 }
2916
2917 /* Consume the line break. */
2918
2919 if (!CACHE(parser, 2)) goto error;
2920
2921 if (!READ_LINE(parser, leading_break)) goto error;
2922
2923 /* Eat the following indentation spaces and line breaks. */
2924
2925 if (!yaml_parser_scan_block_scalar_breaks(parser,
2926 &indent, &trailing_breaks, start_mark, &end_mark)) goto error;
2927 }
2928
2929 /* Chomp the tail. */
2930
2931 if (chomping != -1) {
2932 if (!JOIN(parser, string, leading_break)) goto error;
2933 }
2934 if (chomping == 1) {
2935 if (!JOIN(parser, string, trailing_breaks)) goto error;
2936 }
2937
2938 /* Create a token. */
2939
2940 SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
2941 literal ? YAML_LITERAL_SCALAR_STYLE : YAML_FOLDED_SCALAR_STYLE,
2942 start_mark, end_mark);
2943
2944 STRING_DEL(parser, leading_break);
2945 STRING_DEL(parser, trailing_breaks);
2946
2947 return 1;
2948
2949 error:
2950 STRING_DEL(parser, string);
2951 STRING_DEL(parser, leading_break);
2952 STRING_DEL(parser, trailing_breaks);
2953
2954 return 0;
2955 }
2956
2957 /*
2958 * Scan indentation spaces and line breaks for a block scalar. Determine the
2959 * indentation level if needed.
2960 */
2961
2962 static int
yaml_parser_scan_block_scalar_breaks(yaml_parser_t * parser,int * indent,yaml_string_t * breaks,yaml_mark_t start_mark,yaml_mark_t * end_mark)2963 yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
2964 int *indent, yaml_string_t *breaks,
2965 yaml_mark_t start_mark, yaml_mark_t *end_mark)
2966 {
2967 int max_indent = 0;
2968
2969 *end_mark = parser->mark;
2970
2971 /* Eat the indentation spaces and line breaks. */
2972
2973 while (1)
2974 {
2975 /* Eat the indentation spaces. */
2976
2977 if (!CACHE(parser, 1)) return 0;
2978
2979 while ((!*indent || (int)parser->mark.column < *indent)
2980 && IS_SPACE(parser->buffer)) {
2981 SKIP(parser);
2982 if (!CACHE(parser, 1)) return 0;
2983 }
2984
2985 if ((int)parser->mark.column > max_indent)
2986 max_indent = (int)parser->mark.column;
2987
2988 /* Check for a tab character messing the indentation. */
2989
2990 if ((!*indent || (int)parser->mark.column < *indent)
2991 && IS_TAB(parser->buffer)) {
2992 return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2993 start_mark, "found a tab character where an indentation space is expected");
2994 }
2995
2996 /* Have we found a non-empty line? */
2997
2998 if (!IS_BREAK(parser->buffer)) break;
2999
3000 /* Consume the line break. */
3001
3002 if (!CACHE(parser, 2)) return 0;
3003 if (!READ_LINE(parser, *breaks)) return 0;
3004 *end_mark = parser->mark;
3005 }
3006
3007 /* Determine the indentation level if needed. */
3008
3009 if (!*indent) {
3010 *indent = max_indent;
3011 if (*indent < parser->indent + 1)
3012 *indent = parser->indent + 1;
3013 if (*indent < 1)
3014 *indent = 1;
3015 }
3016
3017 return 1;
3018 }
3019
3020 /*
3021 * Scan a quoted scalar.
3022 */
3023
3024 static int
yaml_parser_scan_flow_scalar(yaml_parser_t * parser,yaml_token_t * token,int single)3025 yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
3026 int single)
3027 {
3028 yaml_mark_t start_mark;
3029 yaml_mark_t end_mark;
3030 yaml_string_t string = NULL_STRING;
3031 yaml_string_t leading_break = NULL_STRING;
3032 yaml_string_t trailing_breaks = NULL_STRING;
3033 yaml_string_t whitespaces = NULL_STRING;
3034 int leading_blanks;
3035
3036 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3037 if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3038 if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3039 if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
3040
3041 /* Eat the left quote. */
3042
3043 start_mark = parser->mark;
3044
3045 SKIP(parser);
3046
3047 /* Consume the content of the quoted scalar. */
3048
3049 while (1)
3050 {
3051 /* Check that there are no document indicators at the beginning of the line. */
3052
3053 if (!CACHE(parser, 4)) goto error;
3054
3055 if (parser->mark.column == 0 &&
3056 ((CHECK_AT(parser->buffer, '-', 0) &&
3057 CHECK_AT(parser->buffer, '-', 1) &&
3058 CHECK_AT(parser->buffer, '-', 2)) ||
3059 (CHECK_AT(parser->buffer, '.', 0) &&
3060 CHECK_AT(parser->buffer, '.', 1) &&
3061 CHECK_AT(parser->buffer, '.', 2))) &&
3062 IS_BLANKZ_AT(parser->buffer, 3))
3063 {
3064 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3065 start_mark, "found unexpected document indicator");
3066 goto error;
3067 }
3068
3069 /* Check for EOF. */
3070
3071 if (IS_Z(parser->buffer)) {
3072 yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3073 start_mark, "found unexpected end of stream");
3074 goto error;
3075 }
3076
3077 /* Consume non-blank characters. */
3078
3079 if (!CACHE(parser, 2)) goto error;
3080
3081 leading_blanks = 0;
3082
3083 while (!IS_BLANKZ(parser->buffer))
3084 {
3085 /* Check for an escaped single quote. */
3086
3087 if (single && CHECK_AT(parser->buffer, '\'', 0)
3088 && CHECK_AT(parser->buffer, '\'', 1))
3089 {
3090 if (!STRING_EXTEND(parser, string)) goto error;
3091 *(string.pointer++) = '\'';
3092 SKIP(parser);
3093 SKIP(parser);
3094 }
3095
3096 /* Check for the right quote. */
3097
3098 else if (CHECK(parser->buffer, single ? '\'' : '"'))
3099 {
3100 break;
3101 }
3102
3103 /* Check for an escaped line break. */
3104
3105 else if (!single && CHECK(parser->buffer, '\\')
3106 && IS_BREAK_AT(parser->buffer, 1))
3107 {
3108 if (!CACHE(parser, 3)) goto error;
3109 SKIP(parser);
3110 SKIP_LINE(parser);
3111 leading_blanks = 1;
3112 break;
3113 }
3114
3115 /* Check for an escape sequence. */
3116
3117 else if (!single && CHECK(parser->buffer, '\\'))
3118 {
3119 size_t code_length = 0;
3120
3121 if (!STRING_EXTEND(parser, string)) goto error;
3122
3123 /* Check the escape character. */
3124
3125 switch (parser->buffer.pointer[1])
3126 {
3127 case '0':
3128 *(string.pointer++) = '\0';
3129 break;
3130
3131 case 'a':
3132 *(string.pointer++) = '\x07';
3133 break;
3134
3135 case 'b':
3136 *(string.pointer++) = '\x08';
3137 break;
3138
3139 case 't':
3140 case '\t':
3141 *(string.pointer++) = '\x09';
3142 break;
3143
3144 case 'n':
3145 *(string.pointer++) = '\x0A';
3146 break;
3147
3148 case 'v':
3149 *(string.pointer++) = '\x0B';
3150 break;
3151
3152 case 'f':
3153 *(string.pointer++) = '\x0C';
3154 break;
3155
3156 case 'r':
3157 *(string.pointer++) = '\x0D';
3158 break;
3159
3160 case 'e':
3161 *(string.pointer++) = '\x1B';
3162 break;
3163
3164 case ' ':
3165 *(string.pointer++) = '\x20';
3166 break;
3167
3168 case '"':
3169 *(string.pointer++) = '"';
3170 break;
3171
3172 case '/':
3173 *(string.pointer++) = '/';
3174 break;
3175
3176 case '\\':
3177 *(string.pointer++) = '\\';
3178 break;
3179
3180 case 'N': /* NEL (#x85) */
3181 *(string.pointer++) = '\xC2';
3182 *(string.pointer++) = '\x85';
3183 break;
3184
3185 case '_': /* #xA0 */
3186 *(string.pointer++) = '\xC2';
3187 *(string.pointer++) = '\xA0';
3188 break;
3189
3190 case 'L': /* LS (#x2028) */
3191 *(string.pointer++) = '\xE2';
3192 *(string.pointer++) = '\x80';
3193 *(string.pointer++) = '\xA8';
3194 break;
3195
3196 case 'P': /* PS (#x2029) */
3197 *(string.pointer++) = '\xE2';
3198 *(string.pointer++) = '\x80';
3199 *(string.pointer++) = '\xA9';
3200 break;
3201
3202 case 'x':
3203 code_length = 2;
3204 break;
3205
3206 case 'u':
3207 code_length = 4;
3208 break;
3209
3210 case 'U':
3211 code_length = 8;
3212 break;
3213
3214 default:
3215 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3216 start_mark, "found unknown escape character");
3217 goto error;
3218 }
3219
3220 SKIP(parser);
3221 SKIP(parser);
3222
3223 /* Consume an arbitrary escape code. */
3224
3225 if (code_length)
3226 {
3227 unsigned int value = 0;
3228 size_t k;
3229
3230 /* Scan the character value. */
3231
3232 if (!CACHE(parser, code_length)) goto error;
3233
3234 for (k = 0; k < code_length; k ++) {
3235 if (!IS_HEX_AT(parser->buffer, k)) {
3236 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3237 start_mark, "did not find expected hexdecimal number");
3238 goto error;
3239 }
3240 value = (value << 4) + AS_HEX_AT(parser->buffer, k);
3241 }
3242
3243 /* Check the value and write the character. */
3244
3245 if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
3246 yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3247 start_mark, "found invalid Unicode character escape code");
3248 goto error;
3249 }
3250
3251 if (value <= 0x7F) {
3252 *(string.pointer++) = value;
3253 }
3254 else if (value <= 0x7FF) {
3255 *(string.pointer++) = 0xC0 + (value >> 6);
3256 *(string.pointer++) = 0x80 + (value & 0x3F);
3257 }
3258 else if (value <= 0xFFFF) {
3259 *(string.pointer++) = 0xE0 + (value >> 12);
3260 *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3261 *(string.pointer++) = 0x80 + (value & 0x3F);
3262 }
3263 else {
3264 *(string.pointer++) = 0xF0 + (value >> 18);
3265 *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F);
3266 *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3267 *(string.pointer++) = 0x80 + (value & 0x3F);
3268 }
3269
3270 /* Advance the pointer. */
3271
3272 for (k = 0; k < code_length; k ++) {
3273 SKIP(parser);
3274 }
3275 }
3276 }
3277
3278 else
3279 {
3280 /* It is a non-escaped non-blank character. */
3281
3282 if (!READ(parser, string)) goto error;
3283 }
3284
3285 if (!CACHE(parser, 2)) goto error;
3286 }
3287
3288 /* Check if we are at the end of the scalar. */
3289
3290 /* Fix for crash unitialized value crash
3291 * Credit for the bug and input is to OSS Fuzz
3292 * Credit for the fix to Alex Gaynor
3293 */
3294 if (!CACHE(parser, 1)) goto error;
3295 if (CHECK(parser->buffer, single ? '\'' : '"'))
3296 break;
3297
3298 /* Consume blank characters. */
3299
3300 if (!CACHE(parser, 1)) goto error;
3301
3302 while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3303 {
3304 if (IS_BLANK(parser->buffer))
3305 {
3306 /* Consume a space or a tab character. */
3307
3308 if (!leading_blanks) {
3309 if (!READ(parser, whitespaces)) goto error;
3310 }
3311 else {
3312 SKIP(parser);
3313 }
3314 }
3315 else
3316 {
3317 if (!CACHE(parser, 2)) goto error;
3318
3319 /* Check if it is a first line break. */
3320
3321 if (!leading_blanks)
3322 {
3323 CLEAR(parser, whitespaces);
3324 if (!READ_LINE(parser, leading_break)) goto error;
3325 leading_blanks = 1;
3326 }
3327 else
3328 {
3329 if (!READ_LINE(parser, trailing_breaks)) goto error;
3330 }
3331 }
3332 if (!CACHE(parser, 1)) goto error;
3333 }
3334
3335 /* Join the whitespaces or fold line breaks. */
3336
3337 if (leading_blanks)
3338 {
3339 /* Do we need to fold line breaks? */
3340
3341 if (leading_break.start[0] == '\n') {
3342 if (trailing_breaks.start[0] == '\0') {
3343 if (!STRING_EXTEND(parser, string)) goto error;
3344 *(string.pointer++) = ' ';
3345 }
3346 else {
3347 if (!JOIN(parser, string, trailing_breaks)) goto error;
3348 CLEAR(parser, trailing_breaks);
3349 }
3350 CLEAR(parser, leading_break);
3351 }
3352 else {
3353 if (!JOIN(parser, string, leading_break)) goto error;
3354 if (!JOIN(parser, string, trailing_breaks)) goto error;
3355 CLEAR(parser, leading_break);
3356 CLEAR(parser, trailing_breaks);
3357 }
3358 }
3359 else
3360 {
3361 if (!JOIN(parser, string, whitespaces)) goto error;
3362 CLEAR(parser, whitespaces);
3363 }
3364 }
3365
3366 /* Eat the right quote. */
3367
3368 SKIP(parser);
3369
3370 end_mark = parser->mark;
3371
3372 /* Create a token. */
3373
3374 SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3375 single ? YAML_SINGLE_QUOTED_SCALAR_STYLE : YAML_DOUBLE_QUOTED_SCALAR_STYLE,
3376 start_mark, end_mark);
3377
3378 STRING_DEL(parser, leading_break);
3379 STRING_DEL(parser, trailing_breaks);
3380 STRING_DEL(parser, whitespaces);
3381
3382 return 1;
3383
3384 error:
3385 STRING_DEL(parser, string);
3386 STRING_DEL(parser, leading_break);
3387 STRING_DEL(parser, trailing_breaks);
3388 STRING_DEL(parser, whitespaces);
3389
3390 return 0;
3391 }
3392
3393 /*
3394 * Scan a plain scalar.
3395 */
3396
3397 static int
yaml_parser_scan_plain_scalar(yaml_parser_t * parser,yaml_token_t * token)3398 yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token)
3399 {
3400 yaml_mark_t start_mark;
3401 yaml_mark_t end_mark;
3402 yaml_string_t string = NULL_STRING;
3403 yaml_string_t leading_break = NULL_STRING;
3404 yaml_string_t trailing_breaks = NULL_STRING;
3405 yaml_string_t whitespaces = NULL_STRING;
3406 int leading_blanks = 0;
3407 int indent = parser->indent+1;
3408
3409 if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3410 if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3411 if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3412 if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
3413
3414 start_mark = end_mark = parser->mark;
3415
3416 /* Consume the content of the plain scalar. */
3417
3418 while (1)
3419 {
3420 /* Check for a document indicator. */
3421
3422 if (!CACHE(parser, 4)) goto error;
3423
3424 if (parser->mark.column == 0 &&
3425 ((CHECK_AT(parser->buffer, '-', 0) &&
3426 CHECK_AT(parser->buffer, '-', 1) &&
3427 CHECK_AT(parser->buffer, '-', 2)) ||
3428 (CHECK_AT(parser->buffer, '.', 0) &&
3429 CHECK_AT(parser->buffer, '.', 1) &&
3430 CHECK_AT(parser->buffer, '.', 2))) &&
3431 IS_BLANKZ_AT(parser->buffer, 3)) break;
3432
3433 /* Check for a comment. */
3434
3435 if (CHECK(parser->buffer, '#'))
3436 break;
3437
3438 /* Consume non-blank characters. */
3439
3440 while (!IS_BLANKZ(parser->buffer))
3441 {
3442 /* Check for "x:" + one of ',?[]{}' in the flow context. TODO: Fix the test "spec-08-13".
3443 * This is not completely according to the spec
3444 * See http://yaml.org/spec/1.1/#id907281 9.1.3. Plain
3445 */
3446
3447 if (parser->flow_level
3448 && CHECK(parser->buffer, ':')
3449 && (
3450 CHECK_AT(parser->buffer, ',', 1)
3451 || CHECK_AT(parser->buffer, '?', 1)
3452 || CHECK_AT(parser->buffer, '[', 1)
3453 || CHECK_AT(parser->buffer, ']', 1)
3454 || CHECK_AT(parser->buffer, '{', 1)
3455 || CHECK_AT(parser->buffer, '}', 1)
3456 )
3457 ) {
3458 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3459 start_mark, "found unexpected ':'");
3460 goto error;
3461 }
3462
3463 /* Check for indicators that may end a plain scalar. */
3464
3465 if ((CHECK(parser->buffer, ':') && IS_BLANKZ_AT(parser->buffer, 1))
3466 || (parser->flow_level &&
3467 (CHECK(parser->buffer, ',')
3468 || CHECK(parser->buffer, '[')
3469 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
3470 || CHECK(parser->buffer, '}'))))
3471 break;
3472
3473 /* Check if we need to join whitespaces and breaks. */
3474
3475 if (leading_blanks || whitespaces.start != whitespaces.pointer)
3476 {
3477 if (leading_blanks)
3478 {
3479 /* Do we need to fold line breaks? */
3480
3481 if (leading_break.start[0] == '\n') {
3482 if (trailing_breaks.start[0] == '\0') {
3483 if (!STRING_EXTEND(parser, string)) goto error;
3484 *(string.pointer++) = ' ';
3485 }
3486 else {
3487 if (!JOIN(parser, string, trailing_breaks)) goto error;
3488 CLEAR(parser, trailing_breaks);
3489 }
3490 CLEAR(parser, leading_break);
3491 }
3492 else {
3493 if (!JOIN(parser, string, leading_break)) goto error;
3494 if (!JOIN(parser, string, trailing_breaks)) goto error;
3495 CLEAR(parser, leading_break);
3496 CLEAR(parser, trailing_breaks);
3497 }
3498
3499 leading_blanks = 0;
3500 }
3501 else
3502 {
3503 if (!JOIN(parser, string, whitespaces)) goto error;
3504 CLEAR(parser, whitespaces);
3505 }
3506 }
3507
3508 /* Copy the character. */
3509
3510 if (!READ(parser, string)) goto error;
3511
3512 end_mark = parser->mark;
3513
3514 if (!CACHE(parser, 2)) goto error;
3515 }
3516
3517 /* Is it the end? */
3518
3519 if (!(IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)))
3520 break;
3521
3522 /* Consume blank characters. */
3523
3524 if (!CACHE(parser, 1)) goto error;
3525
3526 while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3527 {
3528 if (IS_BLANK(parser->buffer))
3529 {
3530 /* Check for tab characters that abuse indentation. */
3531
3532 if (leading_blanks && (int)parser->mark.column < indent
3533 && IS_TAB(parser->buffer)) {
3534 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3535 start_mark, "found a tab character that violates indentation");
3536 goto error;
3537 }
3538
3539 /* Consume a space or a tab character. */
3540
3541 if (!leading_blanks) {
3542 if (!READ(parser, whitespaces)) goto error;
3543 }
3544 else {
3545 SKIP(parser);
3546 }
3547 }
3548 else
3549 {
3550 if (!CACHE(parser, 2)) goto error;
3551
3552 /* Check if it is a first line break. */
3553
3554 if (!leading_blanks)
3555 {
3556 CLEAR(parser, whitespaces);
3557 if (!READ_LINE(parser, leading_break)) goto error;
3558 leading_blanks = 1;
3559 }
3560 else
3561 {
3562 if (!READ_LINE(parser, trailing_breaks)) goto error;
3563 }
3564 }
3565 if (!CACHE(parser, 1)) goto error;
3566 }
3567
3568 /* Check indentation level. */
3569
3570 if (!parser->flow_level && (int)parser->mark.column < indent)
3571 break;
3572 }
3573
3574 /* Create a token. */
3575
3576 SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3577 YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark);
3578
3579 /* Note that we change the 'simple_key_allowed' flag. */
3580
3581 if (leading_blanks) {
3582 parser->simple_key_allowed = 1;
3583 }
3584
3585 STRING_DEL(parser, leading_break);
3586 STRING_DEL(parser, trailing_breaks);
3587 STRING_DEL(parser, whitespaces);
3588
3589 return 1;
3590
3591 error:
3592 STRING_DEL(parser, string);
3593 STRING_DEL(parser, leading_break);
3594 STRING_DEL(parser, trailing_breaks);
3595 STRING_DEL(parser, whitespaces);
3596
3597 return 0;
3598 }
3599