xref: /freebsd/contrib/libyaml/src/scanner.c (revision 0f5c86ddb0257f4b7620f1d8e898289be30b19bf)
1 
2 /*
3  * Introduction
4  * ************
5  *
6  * The following notes assume that you are familiar with the YAML specification
7  * (http://yaml.org/spec/cvs/current.html).  We mostly follow it, although in
8  * some cases we are less restrictive that it requires.
9  *
10  * The process of transforming a YAML stream into a sequence of events is
11  * divided on two steps: Scanning and Parsing.
12  *
13  * The Scanner transforms the input stream into a sequence of tokens, while the
14  * parser transform the sequence of tokens produced by the Scanner into a
15  * sequence of parsing events.
16  *
17  * The Scanner is rather clever and complicated. The Parser, on the contrary,
18  * is a straightforward implementation of a recursive-descendant parser (or,
19  * LL(1) parser, as it is usually called).
20  *
21  * Actually there are two issues of Scanning that might be called "clever", the
22  * rest is quite straightforward.  The issues are "block collection start" and
23  * "simple keys".  Both issues are explained below in details.
24  *
25  * Here the Scanning step is explained and implemented.  We start with the list
26  * of all the tokens produced by the Scanner together with short descriptions.
27  *
28  * Now, tokens:
29  *
30  *      STREAM-START(encoding)          # The stream start.
31  *      STREAM-END                      # The stream end.
32  *      VERSION-DIRECTIVE(major,minor)  # The '%YAML' directive.
33  *      TAG-DIRECTIVE(handle,prefix)    # The '%TAG' directive.
34  *      DOCUMENT-START                  # '---'
35  *      DOCUMENT-END                    # '...'
36  *      BLOCK-SEQUENCE-START            # Indentation increase denoting a block
37  *      BLOCK-MAPPING-START             # sequence or a block mapping.
38  *      BLOCK-END                       # Indentation decrease.
39  *      FLOW-SEQUENCE-START             # '['
40  *      FLOW-SEQUENCE-END               # ']'
41  *      FLOW-MAPPING-START              # '{'
42  *      FLOW-MAPPING-END                # '}'
43  *      BLOCK-ENTRY                     # '-'
44  *      FLOW-ENTRY                      # ','
45  *      KEY                             # '?' or nothing (simple keys).
46  *      VALUE                           # ':'
47  *      ALIAS(anchor)                   # '*anchor'
48  *      ANCHOR(anchor)                  # '&anchor'
49  *      TAG(handle,suffix)              # '!handle!suffix'
50  *      SCALAR(value,style)             # A scalar.
51  *
52  * The following two tokens are "virtual" tokens denoting the beginning and the
53  * end of the stream:
54  *
55  *      STREAM-START(encoding)
56  *      STREAM-END
57  *
58  * We pass the information about the input stream encoding with the
59  * STREAM-START token.
60  *
61  * The next two tokens are responsible for tags:
62  *
63  *      VERSION-DIRECTIVE(major,minor)
64  *      TAG-DIRECTIVE(handle,prefix)
65  *
66  * Example:
67  *
68  *      %YAML   1.1
69  *      %TAG    !   !foo
70  *      %TAG    !yaml!  tag:yaml.org,2002:
71  *      ---
72  *
73  * The corresponding sequence of tokens:
74  *
75  *      STREAM-START(utf-8)
76  *      VERSION-DIRECTIVE(1,1)
77  *      TAG-DIRECTIVE("!","!foo")
78  *      TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
79  *      DOCUMENT-START
80  *      STREAM-END
81  *
82  * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
83  * line.
84  *
85  * The document start and end indicators are represented by:
86  *
87  *      DOCUMENT-START
88  *      DOCUMENT-END
89  *
90  * Note that if a YAML stream contains an implicit document (without '---'
91  * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
92  * produced.
93  *
94  * In the following examples, we present whole documents together with the
95  * produced tokens.
96  *
97  *      1. An implicit document:
98  *
99  *          'a scalar'
100  *
101  *      Tokens:
102  *
103  *          STREAM-START(utf-8)
104  *          SCALAR("a scalar",single-quoted)
105  *          STREAM-END
106  *
107  *      2. An explicit document:
108  *
109  *          ---
110  *          'a scalar'
111  *          ...
112  *
113  *      Tokens:
114  *
115  *          STREAM-START(utf-8)
116  *          DOCUMENT-START
117  *          SCALAR("a scalar",single-quoted)
118  *          DOCUMENT-END
119  *          STREAM-END
120  *
121  *      3. Several documents in a stream:
122  *
123  *          'a scalar'
124  *          ---
125  *          'another scalar'
126  *          ---
127  *          'yet another scalar'
128  *
129  *      Tokens:
130  *
131  *          STREAM-START(utf-8)
132  *          SCALAR("a scalar",single-quoted)
133  *          DOCUMENT-START
134  *          SCALAR("another scalar",single-quoted)
135  *          DOCUMENT-START
136  *          SCALAR("yet another scalar",single-quoted)
137  *          STREAM-END
138  *
139  * We have already introduced the SCALAR token above.  The following tokens are
140  * used to describe aliases, anchors, tag, and scalars:
141  *
142  *      ALIAS(anchor)
143  *      ANCHOR(anchor)
144  *      TAG(handle,suffix)
145  *      SCALAR(value,style)
146  *
147  * The following series of examples illustrate the usage of these tokens:
148  *
149  *      1. A recursive sequence:
150  *
151  *          &A [ *A ]
152  *
153  *      Tokens:
154  *
155  *          STREAM-START(utf-8)
156  *          ANCHOR("A")
157  *          FLOW-SEQUENCE-START
158  *          ALIAS("A")
159  *          FLOW-SEQUENCE-END
160  *          STREAM-END
161  *
162  *      2. A tagged scalar:
163  *
164  *          !!float "3.14"  # A good approximation.
165  *
166  *      Tokens:
167  *
168  *          STREAM-START(utf-8)
169  *          TAG("!!","float")
170  *          SCALAR("3.14",double-quoted)
171  *          STREAM-END
172  *
173  *      3. Various scalar styles:
174  *
175  *          --- # Implicit empty plain scalars do not produce tokens.
176  *          --- a plain scalar
177  *          --- 'a single-quoted scalar'
178  *          --- "a double-quoted scalar"
179  *          --- |-
180  *            a literal scalar
181  *          --- >-
182  *            a folded
183  *            scalar
184  *
185  *      Tokens:
186  *
187  *          STREAM-START(utf-8)
188  *          DOCUMENT-START
189  *          DOCUMENT-START
190  *          SCALAR("a plain scalar",plain)
191  *          DOCUMENT-START
192  *          SCALAR("a single-quoted scalar",single-quoted)
193  *          DOCUMENT-START
194  *          SCALAR("a double-quoted scalar",double-quoted)
195  *          DOCUMENT-START
196  *          SCALAR("a literal scalar",literal)
197  *          DOCUMENT-START
198  *          SCALAR("a folded scalar",folded)
199  *          STREAM-END
200  *
201  * Now it's time to review collection-related tokens. We will start with
202  * flow collections:
203  *
204  *      FLOW-SEQUENCE-START
205  *      FLOW-SEQUENCE-END
206  *      FLOW-MAPPING-START
207  *      FLOW-MAPPING-END
208  *      FLOW-ENTRY
209  *      KEY
210  *      VALUE
211  *
212  * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
213  * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
214  * correspondingly.  FLOW-ENTRY represent the ',' indicator.  Finally the
215  * indicators '?' and ':', which are used for denoting mapping keys and values,
216  * are represented by the KEY and VALUE tokens.
217  *
218  * The following examples show flow collections:
219  *
220  *      1. A flow sequence:
221  *
222  *          [item 1, item 2, item 3]
223  *
224  *      Tokens:
225  *
226  *          STREAM-START(utf-8)
227  *          FLOW-SEQUENCE-START
228  *          SCALAR("item 1",plain)
229  *          FLOW-ENTRY
230  *          SCALAR("item 2",plain)
231  *          FLOW-ENTRY
232  *          SCALAR("item 3",plain)
233  *          FLOW-SEQUENCE-END
234  *          STREAM-END
235  *
236  *      2. A flow mapping:
237  *
238  *          {
239  *              a simple key: a value,  # Note that the KEY token is produced.
240  *              ? a complex key: another value,
241  *          }
242  *
243  *      Tokens:
244  *
245  *          STREAM-START(utf-8)
246  *          FLOW-MAPPING-START
247  *          KEY
248  *          SCALAR("a simple key",plain)
249  *          VALUE
250  *          SCALAR("a value",plain)
251  *          FLOW-ENTRY
252  *          KEY
253  *          SCALAR("a complex key",plain)
254  *          VALUE
255  *          SCALAR("another value",plain)
256  *          FLOW-ENTRY
257  *          FLOW-MAPPING-END
258  *          STREAM-END
259  *
260  * A simple key is a key which is not denoted by the '?' indicator.  Note that
261  * the Scanner still produce the KEY token whenever it encounters a simple key.
262  *
263  * For scanning block collections, the following tokens are used (note that we
264  * repeat KEY and VALUE here):
265  *
266  *      BLOCK-SEQUENCE-START
267  *      BLOCK-MAPPING-START
268  *      BLOCK-END
269  *      BLOCK-ENTRY
270  *      KEY
271  *      VALUE
272  *
273  * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
274  * increase that precedes a block collection (cf. the INDENT token in Python).
275  * The token BLOCK-END denote indentation decrease that ends a block collection
276  * (cf. the DEDENT token in Python).  However YAML has some syntax pecularities
277  * that makes detections of these tokens more complex.
278  *
279  * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
280  * '-', '?', and ':' correspondingly.
281  *
282  * The following examples show how the tokens BLOCK-SEQUENCE-START,
283  * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
284  *
285  *      1. Block sequences:
286  *
287  *          - item 1
288  *          - item 2
289  *          -
290  *            - item 3.1
291  *            - item 3.2
292  *          -
293  *            key 1: value 1
294  *            key 2: value 2
295  *
296  *      Tokens:
297  *
298  *          STREAM-START(utf-8)
299  *          BLOCK-SEQUENCE-START
300  *          BLOCK-ENTRY
301  *          SCALAR("item 1",plain)
302  *          BLOCK-ENTRY
303  *          SCALAR("item 2",plain)
304  *          BLOCK-ENTRY
305  *          BLOCK-SEQUENCE-START
306  *          BLOCK-ENTRY
307  *          SCALAR("item 3.1",plain)
308  *          BLOCK-ENTRY
309  *          SCALAR("item 3.2",plain)
310  *          BLOCK-END
311  *          BLOCK-ENTRY
312  *          BLOCK-MAPPING-START
313  *          KEY
314  *          SCALAR("key 1",plain)
315  *          VALUE
316  *          SCALAR("value 1",plain)
317  *          KEY
318  *          SCALAR("key 2",plain)
319  *          VALUE
320  *          SCALAR("value 2",plain)
321  *          BLOCK-END
322  *          BLOCK-END
323  *          STREAM-END
324  *
325  *      2. Block mappings:
326  *
327  *          a simple key: a value   # The KEY token is produced here.
328  *          ? a complex key
329  *          : another value
330  *          a mapping:
331  *            key 1: value 1
332  *            key 2: value 2
333  *          a sequence:
334  *            - item 1
335  *            - item 2
336  *
337  *      Tokens:
338  *
339  *          STREAM-START(utf-8)
340  *          BLOCK-MAPPING-START
341  *          KEY
342  *          SCALAR("a simple key",plain)
343  *          VALUE
344  *          SCALAR("a value",plain)
345  *          KEY
346  *          SCALAR("a complex key",plain)
347  *          VALUE
348  *          SCALAR("another value",plain)
349  *          KEY
350  *          SCALAR("a mapping",plain)
351  *          VALUE
352  *          BLOCK-MAPPING-START
353  *          KEY
354  *          SCALAR("key 1",plain)
355  *          VALUE
356  *          SCALAR("value 1",plain)
357  *          KEY
358  *          SCALAR("key 2",plain)
359  *          VALUE
360  *          SCALAR("value 2",plain)
361  *          BLOCK-END
362  *          KEY
363  *          SCALAR("a sequence",plain)
364  *          VALUE
365  *          BLOCK-SEQUENCE-START
366  *          BLOCK-ENTRY
367  *          SCALAR("item 1",plain)
368  *          BLOCK-ENTRY
369  *          SCALAR("item 2",plain)
370  *          BLOCK-END
371  *          BLOCK-END
372  *          STREAM-END
373  *
374  * YAML does not always require to start a new block collection from a new
375  * line.  If the current line contains only '-', '?', and ':' indicators, a new
376  * block collection may start at the current line.  The following examples
377  * illustrate this case:
378  *
379  *      1. Collections in a sequence:
380  *
381  *          - - item 1
382  *            - item 2
383  *          - key 1: value 1
384  *            key 2: value 2
385  *          - ? complex key
386  *            : complex value
387  *
388  *      Tokens:
389  *
390  *          STREAM-START(utf-8)
391  *          BLOCK-SEQUENCE-START
392  *          BLOCK-ENTRY
393  *          BLOCK-SEQUENCE-START
394  *          BLOCK-ENTRY
395  *          SCALAR("item 1",plain)
396  *          BLOCK-ENTRY
397  *          SCALAR("item 2",plain)
398  *          BLOCK-END
399  *          BLOCK-ENTRY
400  *          BLOCK-MAPPING-START
401  *          KEY
402  *          SCALAR("key 1",plain)
403  *          VALUE
404  *          SCALAR("value 1",plain)
405  *          KEY
406  *          SCALAR("key 2",plain)
407  *          VALUE
408  *          SCALAR("value 2",plain)
409  *          BLOCK-END
410  *          BLOCK-ENTRY
411  *          BLOCK-MAPPING-START
412  *          KEY
413  *          SCALAR("complex key")
414  *          VALUE
415  *          SCALAR("complex value")
416  *          BLOCK-END
417  *          BLOCK-END
418  *          STREAM-END
419  *
420  *      2. Collections in a mapping:
421  *
422  *          ? a sequence
423  *          : - item 1
424  *            - item 2
425  *          ? a mapping
426  *          : key 1: value 1
427  *            key 2: value 2
428  *
429  *      Tokens:
430  *
431  *          STREAM-START(utf-8)
432  *          BLOCK-MAPPING-START
433  *          KEY
434  *          SCALAR("a sequence",plain)
435  *          VALUE
436  *          BLOCK-SEQUENCE-START
437  *          BLOCK-ENTRY
438  *          SCALAR("item 1",plain)
439  *          BLOCK-ENTRY
440  *          SCALAR("item 2",plain)
441  *          BLOCK-END
442  *          KEY
443  *          SCALAR("a mapping",plain)
444  *          VALUE
445  *          BLOCK-MAPPING-START
446  *          KEY
447  *          SCALAR("key 1",plain)
448  *          VALUE
449  *          SCALAR("value 1",plain)
450  *          KEY
451  *          SCALAR("key 2",plain)
452  *          VALUE
453  *          SCALAR("value 2",plain)
454  *          BLOCK-END
455  *          BLOCK-END
456  *          STREAM-END
457  *
458  * YAML also permits non-indented sequences if they are included into a block
459  * mapping.  In this case, the token BLOCK-SEQUENCE-START is not produced:
460  *
461  *      key:
462  *      - item 1    # BLOCK-SEQUENCE-START is NOT produced here.
463  *      - item 2
464  *
465  * Tokens:
466  *
467  *      STREAM-START(utf-8)
468  *      BLOCK-MAPPING-START
469  *      KEY
470  *      SCALAR("key",plain)
471  *      VALUE
472  *      BLOCK-ENTRY
473  *      SCALAR("item 1",plain)
474  *      BLOCK-ENTRY
475  *      SCALAR("item 2",plain)
476  *      BLOCK-END
477  */
478 
479 #include "yaml_private.h"
480 
481 /*
482  * Ensure that the buffer contains the required number of characters.
483  * Return 1 on success, 0 on failure (reader error or memory error).
484  */
485 
486 #define CACHE(parser,length)                                                    \
487     (parser->unread >= (length)                                                 \
488         ? 1                                                                     \
489         : yaml_parser_update_buffer(parser, (length)))
490 
491 /*
492  * Advance the buffer pointer.
493  */
494 
495 #define SKIP(parser)                                                            \
496      (parser->mark.index ++,                                                    \
497       parser->mark.column ++,                                                   \
498       parser->unread --,                                                        \
499       parser->buffer.pointer += WIDTH(parser->buffer))
500 
501 #define SKIP_LINE(parser)                                                       \
502      (IS_CRLF(parser->buffer) ?                                                 \
503       (parser->mark.index += 2,                                                 \
504        parser->mark.column = 0,                                                 \
505        parser->mark.line ++,                                                    \
506        parser->unread -= 2,                                                     \
507        parser->buffer.pointer += 2) :                                           \
508       IS_BREAK(parser->buffer) ?                                                \
509       (parser->mark.index ++,                                                   \
510        parser->mark.column = 0,                                                 \
511        parser->mark.line ++,                                                    \
512        parser->unread --,                                                       \
513        parser->buffer.pointer += WIDTH(parser->buffer)) : 0)
514 
515 /*
516  * Copy a character to a string buffer and advance pointers.
517  */
518 
519 #define READ(parser,string)                                                     \
520      (STRING_EXTEND(parser,string) ?                                            \
521          (COPY(string,parser->buffer),                                          \
522           parser->mark.index ++,                                                \
523           parser->mark.column ++,                                               \
524           parser->unread --,                                                    \
525           1) : 0)
526 
527 /*
528  * Copy a line break character to a string buffer and advance pointers.
529  */
530 
531 #define READ_LINE(parser,string)                                                \
532     (STRING_EXTEND(parser,string) ?                                             \
533     (((CHECK_AT(parser->buffer,'\r',0)                                          \
534        && CHECK_AT(parser->buffer,'\n',1)) ?        /* CR LF -> LF */           \
535      (*((string).pointer++) = (yaml_char_t) '\n',                               \
536       parser->buffer.pointer += 2,                                              \
537       parser->mark.index += 2,                                                  \
538       parser->mark.column = 0,                                                  \
539       parser->mark.line ++,                                                     \
540       parser->unread -= 2) :                                                    \
541      (CHECK_AT(parser->buffer,'\r',0)                                           \
542       || CHECK_AT(parser->buffer,'\n',0)) ?         /* CR|LF -> LF */           \
543      (*((string).pointer++) = (yaml_char_t) '\n',                               \
544       parser->buffer.pointer ++,                                                \
545       parser->mark.index ++,                                                    \
546       parser->mark.column = 0,                                                  \
547       parser->mark.line ++,                                                     \
548       parser->unread --) :                                                      \
549      (CHECK_AT(parser->buffer,'\xC2',0)                                         \
550       && CHECK_AT(parser->buffer,'\x85',1)) ?       /* NEL -> LF */             \
551      (*((string).pointer++) = (yaml_char_t) '\n',                               \
552       parser->buffer.pointer += 2,                                              \
553       parser->mark.index ++,                                                    \
554       parser->mark.column = 0,                                                  \
555       parser->mark.line ++,                                                     \
556       parser->unread --) :                                                      \
557      (CHECK_AT(parser->buffer,'\xE2',0) &&                                      \
558       CHECK_AT(parser->buffer,'\x80',1) &&                                      \
559       (CHECK_AT(parser->buffer,'\xA8',2) ||                                     \
560        CHECK_AT(parser->buffer,'\xA9',2))) ?        /* LS|PS -> LS|PS */        \
561      (*((string).pointer++) = *(parser->buffer.pointer++),                      \
562       *((string).pointer++) = *(parser->buffer.pointer++),                      \
563       *((string).pointer++) = *(parser->buffer.pointer++),                      \
564       parser->mark.index ++,                                                    \
565       parser->mark.column = 0,                                                  \
566       parser->mark.line ++,                                                     \
567       parser->unread --) : 0),                                                  \
568     1) : 0)
569 
570 /*
571  * Public API declarations.
572  */
573 
574 YAML_DECLARE(int)
575 yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token);
576 
577 /*
578  * Error handling.
579  */
580 
581 static int
582 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
583         yaml_mark_t context_mark, const char *problem);
584 
585 /*
586  * High-level token API.
587  */
588 
589 YAML_DECLARE(int)
590 yaml_parser_fetch_more_tokens(yaml_parser_t *parser);
591 
592 static int
593 yaml_parser_fetch_next_token(yaml_parser_t *parser);
594 
595 /*
596  * Potential simple keys.
597  */
598 
599 static int
600 yaml_parser_stale_simple_keys(yaml_parser_t *parser);
601 
602 static int
603 yaml_parser_save_simple_key(yaml_parser_t *parser);
604 
605 static int
606 yaml_parser_remove_simple_key(yaml_parser_t *parser);
607 
608 static int
609 yaml_parser_increase_flow_level(yaml_parser_t *parser);
610 
611 static int
612 yaml_parser_decrease_flow_level(yaml_parser_t *parser);
613 
614 /*
615  * Indentation treatment.
616  */
617 
618 static int
619 yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
620         ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark);
621 
622 static int
623 yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column);
624 
625 /*
626  * Token fetchers.
627  */
628 
629 static int
630 yaml_parser_fetch_stream_start(yaml_parser_t *parser);
631 
632 static int
633 yaml_parser_fetch_stream_end(yaml_parser_t *parser);
634 
635 static int
636 yaml_parser_fetch_directive(yaml_parser_t *parser);
637 
638 static int
639 yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
640         yaml_token_type_t type);
641 
642 static int
643 yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
644         yaml_token_type_t type);
645 
646 static int
647 yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
648         yaml_token_type_t type);
649 
650 static int
651 yaml_parser_fetch_flow_entry(yaml_parser_t *parser);
652 
653 static int
654 yaml_parser_fetch_block_entry(yaml_parser_t *parser);
655 
656 static int
657 yaml_parser_fetch_key(yaml_parser_t *parser);
658 
659 static int
660 yaml_parser_fetch_value(yaml_parser_t *parser);
661 
662 static int
663 yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type);
664 
665 static int
666 yaml_parser_fetch_tag(yaml_parser_t *parser);
667 
668 static int
669 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);
670 
671 static int
672 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);
673 
674 static int
675 yaml_parser_fetch_plain_scalar(yaml_parser_t *parser);
676 
677 /*
678  * Token scanners.
679  */
680 
681 static int
682 yaml_parser_scan_to_next_token(yaml_parser_t *parser);
683 
684 static int
685 yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token);
686 
687 static int
688 yaml_parser_scan_directive_name(yaml_parser_t *parser,
689         yaml_mark_t start_mark, yaml_char_t **name);
690 
691 static int
692 yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
693         yaml_mark_t start_mark, int *major, int *minor);
694 
695 static int
696 yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
697         yaml_mark_t start_mark, int *number);
698 
699 static int
700 yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
701         yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix);
702 
703 static int
704 yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
705         yaml_token_type_t type);
706 
707 static int
708 yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token);
709 
710 static int
711 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
712         yaml_mark_t start_mark, yaml_char_t **handle);
713 
714 static int
715 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int uri_char, int directive,
716         yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri);
717 
718 static int
719 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
720         yaml_mark_t start_mark, yaml_string_t *string);
721 
722 static int
723 yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
724         int literal);
725 
726 static int
727 yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
728         int *indent, yaml_string_t *breaks,
729         yaml_mark_t start_mark, yaml_mark_t *end_mark);
730 
731 static int
732 yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
733         int single);
734 
735 static int
736 yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token);
737 
738 /*
739  * Get the next token.
740  */
741 
742 YAML_DECLARE(int)
yaml_parser_scan(yaml_parser_t * parser,yaml_token_t * token)743 yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token)
744 {
745     assert(parser); /* Non-NULL parser object is expected. */
746     assert(token);  /* Non-NULL token object is expected. */
747 
748     /* Erase the token object. */
749 
750     memset(token, 0, sizeof(yaml_token_t));
751 
752     /* No tokens after STREAM-END or error. */
753 
754     if (parser->stream_end_produced || parser->error) {
755         return 1;
756     }
757 
758     /* Ensure that the tokens queue contains enough tokens. */
759 
760     if (!parser->token_available) {
761         if (!yaml_parser_fetch_more_tokens(parser))
762             return 0;
763     }
764 
765     /* Fetch the next token from the queue. */
766 
767     *token = DEQUEUE(parser, parser->tokens);
768     parser->token_available = 0;
769     parser->tokens_parsed ++;
770 
771     if (token->type == YAML_STREAM_END_TOKEN) {
772         parser->stream_end_produced = 1;
773     }
774 
775     return 1;
776 }
777 
778 /*
779  * Set the scanner error and return 0.
780  */
781 
782 static int
yaml_parser_set_scanner_error(yaml_parser_t * parser,const char * context,yaml_mark_t context_mark,const char * problem)783 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
784         yaml_mark_t context_mark, const char *problem)
785 {
786     parser->error = YAML_SCANNER_ERROR;
787     parser->context = context;
788     parser->context_mark = context_mark;
789     parser->problem = problem;
790     parser->problem_mark = parser->mark;
791 
792     return 0;
793 }
794 
795 /*
796  * Ensure that the tokens queue contains at least one token which can be
797  * returned to the Parser.
798  */
799 
800 YAML_DECLARE(int)
yaml_parser_fetch_more_tokens(yaml_parser_t * parser)801 yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
802 {
803     int need_more_tokens;
804 
805     /* While we need more tokens to fetch, do it. */
806 
807     while (1)
808     {
809         /*
810          * Check if we really need to fetch more tokens.
811          */
812 
813         need_more_tokens = 0;
814 
815         if (parser->tokens.head == parser->tokens.tail)
816         {
817             /* Queue is empty. */
818 
819             need_more_tokens = 1;
820         }
821         else
822         {
823             yaml_simple_key_t *simple_key;
824 
825             /* Check if any potential simple key may occupy the head position. */
826 
827             if (!yaml_parser_stale_simple_keys(parser))
828                 return 0;
829 
830             for (simple_key = parser->simple_keys.start;
831                     simple_key != parser->simple_keys.top; simple_key++) {
832                 if (simple_key->possible
833                         && simple_key->token_number == parser->tokens_parsed) {
834                     need_more_tokens = 1;
835                     break;
836                 }
837             }
838         }
839 
840         /* We are finished. */
841 
842         if (!need_more_tokens)
843             break;
844 
845         /* Fetch the next token. */
846 
847         if (!yaml_parser_fetch_next_token(parser))
848             return 0;
849     }
850 
851     parser->token_available = 1;
852 
853     return 1;
854 }
855 
856 /*
857  * The dispatcher for token fetchers.
858  */
859 
860 static int
yaml_parser_fetch_next_token(yaml_parser_t * parser)861 yaml_parser_fetch_next_token(yaml_parser_t *parser)
862 {
863     /* Ensure that the buffer is initialized. */
864 
865     if (!CACHE(parser, 1))
866         return 0;
867 
868     /* Check if we just started scanning.  Fetch STREAM-START then. */
869 
870     if (!parser->stream_start_produced)
871         return yaml_parser_fetch_stream_start(parser);
872 
873     /* Eat whitespaces and comments until we reach the next token. */
874 
875     if (!yaml_parser_scan_to_next_token(parser))
876         return 0;
877 
878     /* Remove obsolete potential simple keys. */
879 
880     if (!yaml_parser_stale_simple_keys(parser))
881         return 0;
882 
883     /* Check the indentation level against the current column. */
884 
885     if (!yaml_parser_unroll_indent(parser, parser->mark.column))
886         return 0;
887 
888     /*
889      * Ensure that the buffer contains at least 4 characters.  4 is the length
890      * of the longest indicators ('--- ' and '... ').
891      */
892 
893     if (!CACHE(parser, 4))
894         return 0;
895 
896     /* Is it the end of the stream? */
897 
898     if (IS_Z(parser->buffer))
899         return yaml_parser_fetch_stream_end(parser);
900 
901     /* Is it a directive? */
902 
903     if (parser->mark.column == 0 && CHECK(parser->buffer, '%'))
904         return yaml_parser_fetch_directive(parser);
905 
906     /* Is it the document start indicator? */
907 
908     if (parser->mark.column == 0
909             && CHECK_AT(parser->buffer, '-', 0)
910             && CHECK_AT(parser->buffer, '-', 1)
911             && CHECK_AT(parser->buffer, '-', 2)
912             && IS_BLANKZ_AT(parser->buffer, 3))
913         return yaml_parser_fetch_document_indicator(parser,
914                 YAML_DOCUMENT_START_TOKEN);
915 
916     /* Is it the document end indicator? */
917 
918     if (parser->mark.column == 0
919             && CHECK_AT(parser->buffer, '.', 0)
920             && CHECK_AT(parser->buffer, '.', 1)
921             && CHECK_AT(parser->buffer, '.', 2)
922             && IS_BLANKZ_AT(parser->buffer, 3))
923         return yaml_parser_fetch_document_indicator(parser,
924                 YAML_DOCUMENT_END_TOKEN);
925 
926     /* Is it the flow sequence start indicator? */
927 
928     if (CHECK(parser->buffer, '['))
929         return yaml_parser_fetch_flow_collection_start(parser,
930                 YAML_FLOW_SEQUENCE_START_TOKEN);
931 
932     /* Is it the flow mapping start indicator? */
933 
934     if (CHECK(parser->buffer, '{'))
935         return yaml_parser_fetch_flow_collection_start(parser,
936                 YAML_FLOW_MAPPING_START_TOKEN);
937 
938     /* Is it the flow sequence end indicator? */
939 
940     if (CHECK(parser->buffer, ']'))
941         return yaml_parser_fetch_flow_collection_end(parser,
942                 YAML_FLOW_SEQUENCE_END_TOKEN);
943 
944     /* Is it the flow mapping end indicator? */
945 
946     if (CHECK(parser->buffer, '}'))
947         return yaml_parser_fetch_flow_collection_end(parser,
948                 YAML_FLOW_MAPPING_END_TOKEN);
949 
950     /* Is it the flow entry indicator? */
951 
952     if (CHECK(parser->buffer, ','))
953         return yaml_parser_fetch_flow_entry(parser);
954 
955     /* Is it the block entry indicator? */
956 
957     if (CHECK(parser->buffer, '-') && IS_BLANKZ_AT(parser->buffer, 1))
958         return yaml_parser_fetch_block_entry(parser);
959 
960     /* Is it the key indicator? */
961 
962     if (CHECK(parser->buffer, '?')
963             && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
964         return yaml_parser_fetch_key(parser);
965 
966     /* Is it the value indicator? */
967 
968     if (CHECK(parser->buffer, ':')
969             && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
970         return yaml_parser_fetch_value(parser);
971 
972     /* Is it an alias? */
973 
974     if (CHECK(parser->buffer, '*'))
975         return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN);
976 
977     /* Is it an anchor? */
978 
979     if (CHECK(parser->buffer, '&'))
980         return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN);
981 
982     /* Is it a tag? */
983 
984     if (CHECK(parser->buffer, '!'))
985         return yaml_parser_fetch_tag(parser);
986 
987     /* Is it a literal scalar? */
988 
989     if (CHECK(parser->buffer, '|') && !parser->flow_level)
990         return yaml_parser_fetch_block_scalar(parser, 1);
991 
992     /* Is it a folded scalar? */
993 
994     if (CHECK(parser->buffer, '>') && !parser->flow_level)
995         return yaml_parser_fetch_block_scalar(parser, 0);
996 
997     /* Is it a single-quoted scalar? */
998 
999     if (CHECK(parser->buffer, '\''))
1000         return yaml_parser_fetch_flow_scalar(parser, 1);
1001 
1002     /* Is it a double-quoted scalar? */
1003 
1004     if (CHECK(parser->buffer, '"'))
1005         return yaml_parser_fetch_flow_scalar(parser, 0);
1006 
1007     /*
1008      * Is it a plain scalar?
1009      *
1010      * A plain scalar may start with any non-blank characters except
1011      *
1012      *      '-', '?', ':', ',', '[', ']', '{', '}',
1013      *      '#', '&', '*', '!', '|', '>', '\'', '\"',
1014      *      '%', '@', '`'.
1015      *
1016      * In the block context (and, for the '-' indicator, in the flow context
1017      * too), it may also start with the characters
1018      *
1019      *      '-', '?', ':'
1020      *
1021      * if it is followed by a non-space character.
1022      *
1023      * The last rule is more restrictive than the specification requires.
1024      */
1025 
1026     if (!(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '-')
1027                 || CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':')
1028                 || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '[')
1029                 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
1030                 || CHECK(parser->buffer, '}') || CHECK(parser->buffer, '#')
1031                 || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '*')
1032                 || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '|')
1033                 || CHECK(parser->buffer, '>') || CHECK(parser->buffer, '\'')
1034                 || CHECK(parser->buffer, '"') || CHECK(parser->buffer, '%')
1035                 || CHECK(parser->buffer, '@') || CHECK(parser->buffer, '`')) ||
1036             (CHECK(parser->buffer, '-') && !IS_BLANK_AT(parser->buffer, 1)) ||
1037             (!parser->flow_level &&
1038              (CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':'))
1039              && !IS_BLANKZ_AT(parser->buffer, 1)))
1040         return yaml_parser_fetch_plain_scalar(parser);
1041 
1042     /*
1043      * If we don't determine the token type so far, it is an error.
1044      */
1045 
1046     return yaml_parser_set_scanner_error(parser,
1047             "while scanning for the next token", parser->mark,
1048             "found character that cannot start any token");
1049 }
1050 
1051 /*
1052  * Check the list of potential simple keys and remove the positions that
1053  * cannot contain simple keys anymore.
1054  */
1055 
1056 static int
yaml_parser_stale_simple_keys(yaml_parser_t * parser)1057 yaml_parser_stale_simple_keys(yaml_parser_t *parser)
1058 {
1059     yaml_simple_key_t *simple_key;
1060 
1061     /* Check for a potential simple key for each flow level. */
1062 
1063     for (simple_key = parser->simple_keys.start;
1064             simple_key != parser->simple_keys.top; simple_key ++)
1065     {
1066         /*
1067          * The specification requires that a simple key
1068          *
1069          *  - is limited to a single line,
1070          *  - is shorter than 1024 characters.
1071          */
1072 
1073         if (simple_key->possible
1074                 && (simple_key->mark.line < parser->mark.line
1075                     || simple_key->mark.index+1024 < parser->mark.index)) {
1076 
1077             /* Check if the potential simple key to be removed is required. */
1078 
1079             if (simple_key->required) {
1080                 return yaml_parser_set_scanner_error(parser,
1081                         "while scanning a simple key", simple_key->mark,
1082                         "could not find expected ':'");
1083             }
1084 
1085             simple_key->possible = 0;
1086         }
1087     }
1088 
1089     return 1;
1090 }
1091 
1092 /*
1093  * Check if a simple key may start at the current position and add it if
1094  * needed.
1095  */
1096 
1097 static int
yaml_parser_save_simple_key(yaml_parser_t * parser)1098 yaml_parser_save_simple_key(yaml_parser_t *parser)
1099 {
1100     /*
1101      * A simple key is required at the current position if the scanner is in
1102      * the block context and the current column coincides with the indentation
1103      * level.
1104      */
1105 
1106     int required = (!parser->flow_level
1107             && parser->indent == (ptrdiff_t)parser->mark.column);
1108 
1109     /*
1110      * If the current position may start a simple key, save it.
1111      */
1112 
1113     if (parser->simple_key_allowed)
1114     {
1115         yaml_simple_key_t simple_key;
1116         simple_key.possible = 1;
1117         simple_key.required = required;
1118         simple_key.token_number =
1119             parser->tokens_parsed + (parser->tokens.tail - parser->tokens.head);
1120         simple_key.mark = parser->mark;
1121 
1122         if (!yaml_parser_remove_simple_key(parser)) return 0;
1123 
1124         *(parser->simple_keys.top-1) = simple_key;
1125     }
1126 
1127     return 1;
1128 }
1129 
1130 /*
1131  * Remove a potential simple key at the current flow level.
1132  */
1133 
1134 static int
yaml_parser_remove_simple_key(yaml_parser_t * parser)1135 yaml_parser_remove_simple_key(yaml_parser_t *parser)
1136 {
1137     yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
1138 
1139     if (simple_key->possible)
1140     {
1141         /* If the key is required, it is an error. */
1142 
1143         if (simple_key->required) {
1144             return yaml_parser_set_scanner_error(parser,
1145                     "while scanning a simple key", simple_key->mark,
1146                     "could not find expected ':'");
1147         }
1148     }
1149 
1150     /* Remove the key from the stack. */
1151 
1152     simple_key->possible = 0;
1153 
1154     return 1;
1155 }
1156 
1157 /*
1158  * Increase the flow level and resize the simple key list if needed.
1159  */
1160 
1161 static int
yaml_parser_increase_flow_level(yaml_parser_t * parser)1162 yaml_parser_increase_flow_level(yaml_parser_t *parser)
1163 {
1164     yaml_simple_key_t empty_simple_key = { 0, 0, 0, { 0, 0, 0 } };
1165 
1166     /* Reset the simple key on the next level. */
1167 
1168     if (!PUSH(parser, parser->simple_keys, empty_simple_key))
1169         return 0;
1170 
1171     /* Increase the flow level. */
1172 
1173     if (parser->flow_level == INT_MAX) {
1174         parser->error = YAML_MEMORY_ERROR;
1175         return 0;
1176     }
1177 
1178     parser->flow_level++;
1179 
1180     return 1;
1181 }
1182 
1183 /*
1184  * Decrease the flow level.
1185  */
1186 
1187 static int
yaml_parser_decrease_flow_level(yaml_parser_t * parser)1188 yaml_parser_decrease_flow_level(yaml_parser_t *parser)
1189 {
1190     if (parser->flow_level) {
1191         parser->flow_level --;
1192         (void)POP(parser, parser->simple_keys);
1193     }
1194 
1195     return 1;
1196 }
1197 
1198 /*
1199  * Push the current indentation level to the stack and set the new level
1200  * the current column is greater than the indentation level.  In this case,
1201  * append or insert the specified token into the token queue.
1202  *
1203  */
1204 
1205 static int
yaml_parser_roll_indent(yaml_parser_t * parser,ptrdiff_t column,ptrdiff_t number,yaml_token_type_t type,yaml_mark_t mark)1206 yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
1207         ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark)
1208 {
1209     yaml_token_t token;
1210 
1211     /* In the flow context, do nothing. */
1212 
1213     if (parser->flow_level)
1214         return 1;
1215 
1216     if (parser->indent < column)
1217     {
1218         /*
1219          * Push the current indentation level to the stack and set the new
1220          * indentation level.
1221          */
1222 
1223         if (!PUSH(parser, parser->indents, parser->indent))
1224             return 0;
1225 
1226         if (column > INT_MAX) {
1227             parser->error = YAML_MEMORY_ERROR;
1228             return 0;
1229         }
1230 
1231         parser->indent = column;
1232 
1233         /* Create a token and insert it into the queue. */
1234 
1235         TOKEN_INIT(token, type, mark, mark);
1236 
1237         if (number == -1) {
1238             if (!ENQUEUE(parser, parser->tokens, token))
1239                 return 0;
1240         }
1241         else {
1242             if (!QUEUE_INSERT(parser,
1243                         parser->tokens, number - parser->tokens_parsed, token))
1244                 return 0;
1245         }
1246     }
1247 
1248     return 1;
1249 }
1250 
1251 /*
1252  * Pop indentation levels from the indents stack until the current level
1253  * becomes less or equal to the column.  For each indentation level, append
1254  * the BLOCK-END token.
1255  */
1256 
1257 
1258 static int
yaml_parser_unroll_indent(yaml_parser_t * parser,ptrdiff_t column)1259 yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column)
1260 {
1261     yaml_token_t token;
1262 
1263     /* In the flow context, do nothing. */
1264 
1265     if (parser->flow_level)
1266         return 1;
1267 
1268     /* Loop through the indentation levels in the stack. */
1269 
1270     while (parser->indent > column)
1271     {
1272         /* Create a token and append it to the queue. */
1273 
1274         TOKEN_INIT(token, YAML_BLOCK_END_TOKEN, parser->mark, parser->mark);
1275 
1276         if (!ENQUEUE(parser, parser->tokens, token))
1277             return 0;
1278 
1279         /* Pop the indentation level. */
1280 
1281         parser->indent = POP(parser, parser->indents);
1282     }
1283 
1284     return 1;
1285 }
1286 
1287 /*
1288  * Initialize the scanner and produce the STREAM-START token.
1289  */
1290 
1291 static int
yaml_parser_fetch_stream_start(yaml_parser_t * parser)1292 yaml_parser_fetch_stream_start(yaml_parser_t *parser)
1293 {
1294     yaml_simple_key_t simple_key = { 0, 0, 0, { 0, 0, 0 } };
1295     yaml_token_t token;
1296 
1297     /* Set the initial indentation. */
1298 
1299     parser->indent = -1;
1300 
1301     /* Initialize the simple key stack. */
1302 
1303     if (!PUSH(parser, parser->simple_keys, simple_key))
1304         return 0;
1305 
1306     /* A simple key is allowed at the beginning of the stream. */
1307 
1308     parser->simple_key_allowed = 1;
1309 
1310     /* We have started. */
1311 
1312     parser->stream_start_produced = 1;
1313 
1314     /* Create the STREAM-START token and append it to the queue. */
1315 
1316     STREAM_START_TOKEN_INIT(token, parser->encoding,
1317             parser->mark, parser->mark);
1318 
1319     if (!ENQUEUE(parser, parser->tokens, token))
1320         return 0;
1321 
1322     return 1;
1323 }
1324 
1325 /*
1326  * Produce the STREAM-END token and shut down the scanner.
1327  */
1328 
1329 static int
yaml_parser_fetch_stream_end(yaml_parser_t * parser)1330 yaml_parser_fetch_stream_end(yaml_parser_t *parser)
1331 {
1332     yaml_token_t token;
1333 
1334     /* Force new line. */
1335 
1336     if (parser->mark.column != 0) {
1337         parser->mark.column = 0;
1338         parser->mark.line ++;
1339     }
1340 
1341     /* Reset the indentation level. */
1342 
1343     if (!yaml_parser_unroll_indent(parser, -1))
1344         return 0;
1345 
1346     /* Reset simple keys. */
1347 
1348     if (!yaml_parser_remove_simple_key(parser))
1349         return 0;
1350 
1351     parser->simple_key_allowed = 0;
1352 
1353     /* Create the STREAM-END token and append it to the queue. */
1354 
1355     STREAM_END_TOKEN_INIT(token, parser->mark, parser->mark);
1356 
1357     if (!ENQUEUE(parser, parser->tokens, token))
1358         return 0;
1359 
1360     return 1;
1361 }
1362 
1363 /*
1364  * Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
1365  */
1366 
1367 static int
yaml_parser_fetch_directive(yaml_parser_t * parser)1368 yaml_parser_fetch_directive(yaml_parser_t *parser)
1369 {
1370     yaml_token_t token;
1371 
1372     /* Reset the indentation level. */
1373 
1374     if (!yaml_parser_unroll_indent(parser, -1))
1375         return 0;
1376 
1377     /* Reset simple keys. */
1378 
1379     if (!yaml_parser_remove_simple_key(parser))
1380         return 0;
1381 
1382     parser->simple_key_allowed = 0;
1383 
1384     /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */
1385 
1386     if (!yaml_parser_scan_directive(parser, &token))
1387         return 0;
1388 
1389     /* Append the token to the queue. */
1390 
1391     if (!ENQUEUE(parser, parser->tokens, token)) {
1392         yaml_token_delete(&token);
1393         return 0;
1394     }
1395 
1396     return 1;
1397 }
1398 
1399 /*
1400  * Produce the DOCUMENT-START or DOCUMENT-END token.
1401  */
1402 
1403 static int
yaml_parser_fetch_document_indicator(yaml_parser_t * parser,yaml_token_type_t type)1404 yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
1405         yaml_token_type_t type)
1406 {
1407     yaml_mark_t start_mark, end_mark;
1408     yaml_token_t token;
1409 
1410     /* Reset the indentation level. */
1411 
1412     if (!yaml_parser_unroll_indent(parser, -1))
1413         return 0;
1414 
1415     /* Reset simple keys. */
1416 
1417     if (!yaml_parser_remove_simple_key(parser))
1418         return 0;
1419 
1420     parser->simple_key_allowed = 0;
1421 
1422     /* Consume the token. */
1423 
1424     start_mark = parser->mark;
1425 
1426     SKIP(parser);
1427     SKIP(parser);
1428     SKIP(parser);
1429 
1430     end_mark = parser->mark;
1431 
1432     /* Create the DOCUMENT-START or DOCUMENT-END token. */
1433 
1434     TOKEN_INIT(token, type, start_mark, end_mark);
1435 
1436     /* Append the token to the queue. */
1437 
1438     if (!ENQUEUE(parser, parser->tokens, token))
1439         return 0;
1440 
1441     return 1;
1442 }
1443 
1444 /*
1445  * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
1446  */
1447 
1448 static int
yaml_parser_fetch_flow_collection_start(yaml_parser_t * parser,yaml_token_type_t type)1449 yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
1450         yaml_token_type_t type)
1451 {
1452     yaml_mark_t start_mark, end_mark;
1453     yaml_token_t token;
1454 
1455     /* The indicators '[' and '{' may start a simple key. */
1456 
1457     if (!yaml_parser_save_simple_key(parser))
1458         return 0;
1459 
1460     /* Increase the flow level. */
1461 
1462     if (!yaml_parser_increase_flow_level(parser))
1463         return 0;
1464 
1465     /* A simple key may follow the indicators '[' and '{'. */
1466 
1467     parser->simple_key_allowed = 1;
1468 
1469     /* Consume the token. */
1470 
1471     start_mark = parser->mark;
1472     SKIP(parser);
1473     end_mark = parser->mark;
1474 
1475     /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */
1476 
1477     TOKEN_INIT(token, type, start_mark, end_mark);
1478 
1479     /* Append the token to the queue. */
1480 
1481     if (!ENQUEUE(parser, parser->tokens, token))
1482         return 0;
1483 
1484     return 1;
1485 }
1486 
1487 /*
1488  * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
1489  */
1490 
1491 static int
yaml_parser_fetch_flow_collection_end(yaml_parser_t * parser,yaml_token_type_t type)1492 yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
1493         yaml_token_type_t type)
1494 {
1495     yaml_mark_t start_mark, end_mark;
1496     yaml_token_t token;
1497 
1498     /* Reset any potential simple key on the current flow level. */
1499 
1500     if (!yaml_parser_remove_simple_key(parser))
1501         return 0;
1502 
1503     /* Decrease the flow level. */
1504 
1505     if (!yaml_parser_decrease_flow_level(parser))
1506         return 0;
1507 
1508     /* No simple keys after the indicators ']' and '}'. */
1509 
1510     parser->simple_key_allowed = 0;
1511 
1512     /* Consume the token. */
1513 
1514     start_mark = parser->mark;
1515     SKIP(parser);
1516     end_mark = parser->mark;
1517 
1518     /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */
1519 
1520     TOKEN_INIT(token, type, start_mark, end_mark);
1521 
1522     /* Append the token to the queue. */
1523 
1524     if (!ENQUEUE(parser, parser->tokens, token))
1525         return 0;
1526 
1527     return 1;
1528 }
1529 
1530 /*
1531  * Produce the FLOW-ENTRY token.
1532  */
1533 
1534 static int
yaml_parser_fetch_flow_entry(yaml_parser_t * parser)1535 yaml_parser_fetch_flow_entry(yaml_parser_t *parser)
1536 {
1537     yaml_mark_t start_mark, end_mark;
1538     yaml_token_t token;
1539 
1540     /* Reset any potential simple keys on the current flow level. */
1541 
1542     if (!yaml_parser_remove_simple_key(parser))
1543         return 0;
1544 
1545     /* Simple keys are allowed after ','. */
1546 
1547     parser->simple_key_allowed = 1;
1548 
1549     /* Consume the token. */
1550 
1551     start_mark = parser->mark;
1552     SKIP(parser);
1553     end_mark = parser->mark;
1554 
1555     /* Create the FLOW-ENTRY token and append it to the queue. */
1556 
1557     TOKEN_INIT(token, YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark);
1558 
1559     if (!ENQUEUE(parser, parser->tokens, token))
1560         return 0;
1561 
1562     return 1;
1563 }
1564 
1565 /*
1566  * Produce the BLOCK-ENTRY token.
1567  */
1568 
1569 static int
yaml_parser_fetch_block_entry(yaml_parser_t * parser)1570 yaml_parser_fetch_block_entry(yaml_parser_t *parser)
1571 {
1572     yaml_mark_t start_mark, end_mark;
1573     yaml_token_t token;
1574 
1575     /* Check if the scanner is in the block context. */
1576 
1577     if (!parser->flow_level)
1578     {
1579         /* Check if we are allowed to start a new entry. */
1580 
1581         if (!parser->simple_key_allowed) {
1582             return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1583                     "block sequence entries are not allowed in this context");
1584         }
1585 
1586         /* Add the BLOCK-SEQUENCE-START token if needed. */
1587 
1588         if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1589                     YAML_BLOCK_SEQUENCE_START_TOKEN, parser->mark))
1590             return 0;
1591     }
1592     else
1593     {
1594         /*
1595          * It is an error for the '-' indicator to occur in the flow context,
1596          * but we let the Parser detect and report about it because the Parser
1597          * is able to point to the context.
1598          */
1599     }
1600 
1601     /* Reset any potential simple keys on the current flow level. */
1602 
1603     if (!yaml_parser_remove_simple_key(parser))
1604         return 0;
1605 
1606     /* Simple keys are allowed after '-'. */
1607 
1608     parser->simple_key_allowed = 1;
1609 
1610     /* Consume the token. */
1611 
1612     start_mark = parser->mark;
1613     SKIP(parser);
1614     end_mark = parser->mark;
1615 
1616     /* Create the BLOCK-ENTRY token and append it to the queue. */
1617 
1618     TOKEN_INIT(token, YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark);
1619 
1620     if (!ENQUEUE(parser, parser->tokens, token))
1621         return 0;
1622 
1623     return 1;
1624 }
1625 
1626 /*
1627  * Produce the KEY token.
1628  */
1629 
1630 static int
yaml_parser_fetch_key(yaml_parser_t * parser)1631 yaml_parser_fetch_key(yaml_parser_t *parser)
1632 {
1633     yaml_mark_t start_mark, end_mark;
1634     yaml_token_t token;
1635 
1636     /* In the block context, additional checks are required. */
1637 
1638     if (!parser->flow_level)
1639     {
1640         /* Check if we are allowed to start a new key (not necessary simple). */
1641 
1642         if (!parser->simple_key_allowed) {
1643             return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1644                     "mapping keys are not allowed in this context");
1645         }
1646 
1647         /* Add the BLOCK-MAPPING-START token if needed. */
1648 
1649         if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1650                     YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
1651             return 0;
1652     }
1653 
1654     /* Reset any potential simple keys on the current flow level. */
1655 
1656     if (!yaml_parser_remove_simple_key(parser))
1657         return 0;
1658 
1659     /* Simple keys are allowed after '?' in the block context. */
1660 
1661     parser->simple_key_allowed = (!parser->flow_level);
1662 
1663     /* Consume the token. */
1664 
1665     start_mark = parser->mark;
1666     SKIP(parser);
1667     end_mark = parser->mark;
1668 
1669     /* Create the KEY token and append it to the queue. */
1670 
1671     TOKEN_INIT(token, YAML_KEY_TOKEN, start_mark, end_mark);
1672 
1673     if (!ENQUEUE(parser, parser->tokens, token))
1674         return 0;
1675 
1676     return 1;
1677 }
1678 
1679 /*
1680  * Produce the VALUE token.
1681  */
1682 
1683 static int
yaml_parser_fetch_value(yaml_parser_t * parser)1684 yaml_parser_fetch_value(yaml_parser_t *parser)
1685 {
1686     yaml_mark_t start_mark, end_mark;
1687     yaml_token_t token;
1688     yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
1689 
1690     /* Have we found a simple key? */
1691 
1692     if (simple_key->possible)
1693     {
1694 
1695         /* Create the KEY token and insert it into the queue. */
1696 
1697         TOKEN_INIT(token, YAML_KEY_TOKEN, simple_key->mark, simple_key->mark);
1698 
1699         if (!QUEUE_INSERT(parser, parser->tokens,
1700                     simple_key->token_number - parser->tokens_parsed, token))
1701             return 0;
1702 
1703         /* In the block context, we may need to add the BLOCK-MAPPING-START token. */
1704 
1705         if (!yaml_parser_roll_indent(parser, simple_key->mark.column,
1706                     simple_key->token_number,
1707                     YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark))
1708             return 0;
1709 
1710         /* Remove the simple key. */
1711 
1712         simple_key->possible = 0;
1713 
1714         /* A simple key cannot follow another simple key. */
1715 
1716         parser->simple_key_allowed = 0;
1717     }
1718     else
1719     {
1720         /* The ':' indicator follows a complex key. */
1721 
1722         /* In the block context, extra checks are required. */
1723 
1724         if (!parser->flow_level)
1725         {
1726             /* Check if we are allowed to start a complex value. */
1727 
1728             if (!parser->simple_key_allowed) {
1729                 return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1730                         "mapping values are not allowed in this context");
1731             }
1732 
1733             /* Add the BLOCK-MAPPING-START token if needed. */
1734 
1735             if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1736                         YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
1737                 return 0;
1738         }
1739 
1740         /* Simple keys after ':' are allowed in the block context. */
1741 
1742         parser->simple_key_allowed = (!parser->flow_level);
1743     }
1744 
1745     /* Consume the token. */
1746 
1747     start_mark = parser->mark;
1748     SKIP(parser);
1749     end_mark = parser->mark;
1750 
1751     /* Create the VALUE token and append it to the queue. */
1752 
1753     TOKEN_INIT(token, YAML_VALUE_TOKEN, start_mark, end_mark);
1754 
1755     if (!ENQUEUE(parser, parser->tokens, token))
1756         return 0;
1757 
1758     return 1;
1759 }
1760 
1761 /*
1762  * Produce the ALIAS or ANCHOR token.
1763  */
1764 
1765 static int
yaml_parser_fetch_anchor(yaml_parser_t * parser,yaml_token_type_t type)1766 yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type)
1767 {
1768     yaml_token_t token;
1769 
1770     /* An anchor or an alias could be a simple key. */
1771 
1772     if (!yaml_parser_save_simple_key(parser))
1773         return 0;
1774 
1775     /* A simple key cannot follow an anchor or an alias. */
1776 
1777     parser->simple_key_allowed = 0;
1778 
1779     /* Create the ALIAS or ANCHOR token and append it to the queue. */
1780 
1781     if (!yaml_parser_scan_anchor(parser, &token, type))
1782         return 0;
1783 
1784     if (!ENQUEUE(parser, parser->tokens, token)) {
1785         yaml_token_delete(&token);
1786         return 0;
1787     }
1788     return 1;
1789 }
1790 
1791 /*
1792  * Produce the TAG token.
1793  */
1794 
1795 static int
yaml_parser_fetch_tag(yaml_parser_t * parser)1796 yaml_parser_fetch_tag(yaml_parser_t *parser)
1797 {
1798     yaml_token_t token;
1799 
1800     /* A tag could be a simple key. */
1801 
1802     if (!yaml_parser_save_simple_key(parser))
1803         return 0;
1804 
1805     /* A simple key cannot follow a tag. */
1806 
1807     parser->simple_key_allowed = 0;
1808 
1809     /* Create the TAG token and append it to the queue. */
1810 
1811     if (!yaml_parser_scan_tag(parser, &token))
1812         return 0;
1813 
1814     if (!ENQUEUE(parser, parser->tokens, token)) {
1815         yaml_token_delete(&token);
1816         return 0;
1817     }
1818 
1819     return 1;
1820 }
1821 
1822 /*
1823  * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
1824  */
1825 
1826 static int
yaml_parser_fetch_block_scalar(yaml_parser_t * parser,int literal)1827 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal)
1828 {
1829     yaml_token_t token;
1830 
1831     /* Remove any potential simple keys. */
1832 
1833     if (!yaml_parser_remove_simple_key(parser))
1834         return 0;
1835 
1836     /* A simple key may follow a block scalar. */
1837 
1838     parser->simple_key_allowed = 1;
1839 
1840     /* Create the SCALAR token and append it to the queue. */
1841 
1842     if (!yaml_parser_scan_block_scalar(parser, &token, literal))
1843         return 0;
1844 
1845     if (!ENQUEUE(parser, parser->tokens, token)) {
1846         yaml_token_delete(&token);
1847         return 0;
1848     }
1849 
1850     return 1;
1851 }
1852 
1853 /*
1854  * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
1855  */
1856 
1857 static int
yaml_parser_fetch_flow_scalar(yaml_parser_t * parser,int single)1858 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single)
1859 {
1860     yaml_token_t token;
1861 
1862     /* A plain scalar could be a simple key. */
1863 
1864     if (!yaml_parser_save_simple_key(parser))
1865         return 0;
1866 
1867     /* A simple key cannot follow a flow scalar. */
1868 
1869     parser->simple_key_allowed = 0;
1870 
1871     /* Create the SCALAR token and append it to the queue. */
1872 
1873     if (!yaml_parser_scan_flow_scalar(parser, &token, single))
1874         return 0;
1875 
1876     if (!ENQUEUE(parser, parser->tokens, token)) {
1877         yaml_token_delete(&token);
1878         return 0;
1879     }
1880 
1881     return 1;
1882 }
1883 
1884 /*
1885  * Produce the SCALAR(...,plain) token.
1886  */
1887 
1888 static int
yaml_parser_fetch_plain_scalar(yaml_parser_t * parser)1889 yaml_parser_fetch_plain_scalar(yaml_parser_t *parser)
1890 {
1891     yaml_token_t token;
1892 
1893     /* A plain scalar could be a simple key. */
1894 
1895     if (!yaml_parser_save_simple_key(parser))
1896         return 0;
1897 
1898     /* A simple key cannot follow a flow scalar. */
1899 
1900     parser->simple_key_allowed = 0;
1901 
1902     /* Create the SCALAR token and append it to the queue. */
1903 
1904     if (!yaml_parser_scan_plain_scalar(parser, &token))
1905         return 0;
1906 
1907     if (!ENQUEUE(parser, parser->tokens, token)) {
1908         yaml_token_delete(&token);
1909         return 0;
1910     }
1911 
1912     return 1;
1913 }
1914 
1915 /*
1916  * Eat whitespaces and comments until the next token is found.
1917  */
1918 
1919 static int
yaml_parser_scan_to_next_token(yaml_parser_t * parser)1920 yaml_parser_scan_to_next_token(yaml_parser_t *parser)
1921 {
1922     /* Until the next token is not found. */
1923 
1924     while (1)
1925     {
1926         /* Allow the BOM mark to start a line. */
1927 
1928         if (!CACHE(parser, 1)) return 0;
1929 
1930         if (parser->mark.column == 0 && IS_BOM(parser->buffer))
1931             SKIP(parser);
1932 
1933         /*
1934          * Eat whitespaces.
1935          *
1936          * Tabs are allowed:
1937          *
1938          *  - in the flow context;
1939          *  - in the block context, but not at the beginning of the line or
1940          *  after '-', '?', or ':' (complex value).
1941          */
1942 
1943         if (!CACHE(parser, 1)) return 0;
1944 
1945         while (CHECK(parser->buffer,' ') ||
1946                 ((parser->flow_level || !parser->simple_key_allowed) &&
1947                  CHECK(parser->buffer, '\t'))) {
1948             SKIP(parser);
1949             if (!CACHE(parser, 1)) return 0;
1950         }
1951 
1952         /* Eat a comment until a line break. */
1953 
1954         if (CHECK(parser->buffer, '#')) {
1955             while (!IS_BREAKZ(parser->buffer)) {
1956                 SKIP(parser);
1957                 if (!CACHE(parser, 1)) return 0;
1958             }
1959         }
1960 
1961         /* If it is a line break, eat it. */
1962 
1963         if (IS_BREAK(parser->buffer))
1964         {
1965             if (!CACHE(parser, 2)) return 0;
1966             SKIP_LINE(parser);
1967 
1968             /* In the block context, a new line may start a simple key. */
1969 
1970             if (!parser->flow_level) {
1971                 parser->simple_key_allowed = 1;
1972             }
1973         }
1974         else
1975         {
1976             /* We have found a token. */
1977 
1978             break;
1979         }
1980     }
1981 
1982     return 1;
1983 }
1984 
1985 /*
1986  * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
1987  *
1988  * Scope:
1989  *      %YAML    1.1    # a comment \n
1990  *      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1991  *      %TAG    !yaml!  tag:yaml.org,2002:  \n
1992  *      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1993  */
1994 
1995 int
yaml_parser_scan_directive(yaml_parser_t * parser,yaml_token_t * token)1996 yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token)
1997 {
1998     yaml_mark_t start_mark, end_mark;
1999     yaml_char_t *name = NULL;
2000     int major, minor;
2001     yaml_char_t *handle = NULL, *prefix = NULL;
2002 
2003     /* Eat '%'. */
2004 
2005     start_mark = parser->mark;
2006 
2007     SKIP(parser);
2008 
2009     /* Scan the directive name. */
2010 
2011     if (!yaml_parser_scan_directive_name(parser, start_mark, &name))
2012         goto error;
2013 
2014     /* Is it a YAML directive? */
2015 
2016     if (strcmp((char *)name, "YAML") == 0)
2017     {
2018         /* Scan the VERSION directive value. */
2019 
2020         if (!yaml_parser_scan_version_directive_value(parser, start_mark,
2021                     &major, &minor))
2022             goto error;
2023 
2024         end_mark = parser->mark;
2025 
2026         /* Create a VERSION-DIRECTIVE token. */
2027 
2028         VERSION_DIRECTIVE_TOKEN_INIT(*token, major, minor,
2029                 start_mark, end_mark);
2030     }
2031 
2032     /* Is it a TAG directive? */
2033 
2034     else if (strcmp((char *)name, "TAG") == 0)
2035     {
2036         /* Scan the TAG directive value. */
2037 
2038         if (!yaml_parser_scan_tag_directive_value(parser, start_mark,
2039                     &handle, &prefix))
2040             goto error;
2041 
2042         end_mark = parser->mark;
2043 
2044         /* Create a TAG-DIRECTIVE token. */
2045 
2046         TAG_DIRECTIVE_TOKEN_INIT(*token, handle, prefix,
2047                 start_mark, end_mark);
2048     }
2049 
2050     /* Unknown directive. */
2051 
2052     else
2053     {
2054         yaml_parser_set_scanner_error(parser, "while scanning a directive",
2055                 start_mark, "found unknown directive name");
2056         goto error;
2057     }
2058 
2059     /* Eat the rest of the line including any comments. */
2060 
2061     if (!CACHE(parser, 1)) goto error;
2062 
2063     while (IS_BLANK(parser->buffer)) {
2064         SKIP(parser);
2065         if (!CACHE(parser, 1)) goto error;
2066     }
2067 
2068     if (CHECK(parser->buffer, '#')) {
2069         while (!IS_BREAKZ(parser->buffer)) {
2070             SKIP(parser);
2071             if (!CACHE(parser, 1)) goto error;
2072         }
2073     }
2074 
2075     /* Check if we are at the end of the line. */
2076 
2077     if (!IS_BREAKZ(parser->buffer)) {
2078         yaml_parser_set_scanner_error(parser, "while scanning a directive",
2079                 start_mark, "did not find expected comment or line break");
2080         goto error;
2081     }
2082 
2083     /* Eat a line break. */
2084 
2085     if (IS_BREAK(parser->buffer)) {
2086         if (!CACHE(parser, 2)) goto error;
2087         SKIP_LINE(parser);
2088     }
2089 
2090     yaml_free(name);
2091 
2092     return 1;
2093 
2094 error:
2095     yaml_free(prefix);
2096     yaml_free(handle);
2097     yaml_free(name);
2098     return 0;
2099 }
2100 
2101 /*
2102  * Scan the directive name.
2103  *
2104  * Scope:
2105  *      %YAML   1.1     # a comment \n
2106  *       ^^^^
2107  *      %TAG    !yaml!  tag:yaml.org,2002:  \n
2108  *       ^^^
2109  */
2110 
2111 static int
yaml_parser_scan_directive_name(yaml_parser_t * parser,yaml_mark_t start_mark,yaml_char_t ** name)2112 yaml_parser_scan_directive_name(yaml_parser_t *parser,
2113         yaml_mark_t start_mark, yaml_char_t **name)
2114 {
2115     yaml_string_t string = NULL_STRING;
2116 
2117     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2118 
2119     /* Consume the directive name. */
2120 
2121     if (!CACHE(parser, 1)) goto error;
2122 
2123     while (IS_ALPHA(parser->buffer))
2124     {
2125         if (!READ(parser, string)) goto error;
2126         if (!CACHE(parser, 1)) goto error;
2127     }
2128 
2129     /* Check if the name is empty. */
2130 
2131     if (string.start == string.pointer) {
2132         yaml_parser_set_scanner_error(parser, "while scanning a directive",
2133                 start_mark, "could not find expected directive name");
2134         goto error;
2135     }
2136 
2137     /* Check for an blank character after the name. */
2138 
2139     if (!IS_BLANKZ(parser->buffer)) {
2140         yaml_parser_set_scanner_error(parser, "while scanning a directive",
2141                 start_mark, "found unexpected non-alphabetical character");
2142         goto error;
2143     }
2144 
2145     *name = string.start;
2146 
2147     return 1;
2148 
2149 error:
2150     STRING_DEL(parser, string);
2151     return 0;
2152 }
2153 
2154 /*
2155  * Scan the value of VERSION-DIRECTIVE.
2156  *
2157  * Scope:
2158  *      %YAML   1.1     # a comment \n
2159  *           ^^^^^^
2160  */
2161 
2162 static int
yaml_parser_scan_version_directive_value(yaml_parser_t * parser,yaml_mark_t start_mark,int * major,int * minor)2163 yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
2164         yaml_mark_t start_mark, int *major, int *minor)
2165 {
2166     /* Eat whitespaces. */
2167 
2168     if (!CACHE(parser, 1)) return 0;
2169 
2170     while (IS_BLANK(parser->buffer)) {
2171         SKIP(parser);
2172         if (!CACHE(parser, 1)) return 0;
2173     }
2174 
2175     /* Consume the major version number. */
2176 
2177     if (!yaml_parser_scan_version_directive_number(parser, start_mark, major))
2178         return 0;
2179 
2180     /* Eat '.'. */
2181 
2182     if (!CHECK(parser->buffer, '.')) {
2183         return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2184                 start_mark, "did not find expected digit or '.' character");
2185     }
2186 
2187     SKIP(parser);
2188 
2189     /* Consume the minor version number. */
2190 
2191     if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor))
2192         return 0;
2193 
2194     return 1;
2195 }
2196 
2197 #define MAX_NUMBER_LENGTH   9
2198 
2199 /*
2200  * Scan the version number of VERSION-DIRECTIVE.
2201  *
2202  * Scope:
2203  *      %YAML   1.1     # a comment \n
2204  *              ^
2205  *      %YAML   1.1     # a comment \n
2206  *                ^
2207  */
2208 
2209 static int
yaml_parser_scan_version_directive_number(yaml_parser_t * parser,yaml_mark_t start_mark,int * number)2210 yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
2211         yaml_mark_t start_mark, int *number)
2212 {
2213     int value = 0;
2214     size_t length = 0;
2215 
2216     /* Repeat while the next character is digit. */
2217 
2218     if (!CACHE(parser, 1)) return 0;
2219 
2220     while (IS_DIGIT(parser->buffer))
2221     {
2222         /* Check if the number is too long. */
2223 
2224         if (++length > MAX_NUMBER_LENGTH) {
2225             return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2226                     start_mark, "found extremely long version number");
2227         }
2228 
2229         value = value*10 + AS_DIGIT(parser->buffer);
2230 
2231         SKIP(parser);
2232 
2233         if (!CACHE(parser, 1)) return 0;
2234     }
2235 
2236     /* Check if the number was present. */
2237 
2238     if (!length) {
2239         return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2240                 start_mark, "did not find expected version number");
2241     }
2242 
2243     *number = value;
2244 
2245     return 1;
2246 }
2247 
2248 /*
2249  * Scan the value of a TAG-DIRECTIVE token.
2250  *
2251  * Scope:
2252  *      %TAG    !yaml!  tag:yaml.org,2002:  \n
2253  *          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2254  */
2255 
2256 static int
yaml_parser_scan_tag_directive_value(yaml_parser_t * parser,yaml_mark_t start_mark,yaml_char_t ** handle,yaml_char_t ** prefix)2257 yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
2258         yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix)
2259 {
2260     yaml_char_t *handle_value = NULL;
2261     yaml_char_t *prefix_value = NULL;
2262 
2263     /* Eat whitespaces. */
2264 
2265     if (!CACHE(parser, 1)) goto error;
2266 
2267     while (IS_BLANK(parser->buffer)) {
2268         SKIP(parser);
2269         if (!CACHE(parser, 1)) goto error;
2270     }
2271 
2272     /* Scan a handle. */
2273 
2274     if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value))
2275         goto error;
2276 
2277     /* Expect a whitespace. */
2278 
2279     if (!CACHE(parser, 1)) goto error;
2280 
2281     if (!IS_BLANK(parser->buffer)) {
2282         yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2283                 start_mark, "did not find expected whitespace");
2284         goto error;
2285     }
2286 
2287     /* Eat whitespaces. */
2288 
2289     while (IS_BLANK(parser->buffer)) {
2290         SKIP(parser);
2291         if (!CACHE(parser, 1)) goto error;
2292     }
2293 
2294     /* Scan a prefix. */
2295 
2296     if (!yaml_parser_scan_tag_uri(parser, 1, 1, NULL, start_mark, &prefix_value))
2297         goto error;
2298 
2299     /* Expect a whitespace or line break. */
2300 
2301     if (!CACHE(parser, 1)) goto error;
2302 
2303     if (!IS_BLANKZ(parser->buffer)) {
2304         yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2305                 start_mark, "did not find expected whitespace or line break");
2306         goto error;
2307     }
2308 
2309     *handle = handle_value;
2310     *prefix = prefix_value;
2311 
2312     return 1;
2313 
2314 error:
2315     yaml_free(handle_value);
2316     yaml_free(prefix_value);
2317     return 0;
2318 }
2319 
2320 static int
yaml_parser_scan_anchor(yaml_parser_t * parser,yaml_token_t * token,yaml_token_type_t type)2321 yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
2322         yaml_token_type_t type)
2323 {
2324     int length = 0;
2325     yaml_mark_t start_mark, end_mark;
2326     yaml_string_t string = NULL_STRING;
2327 
2328     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2329 
2330     /* Eat the indicator character. */
2331 
2332     start_mark = parser->mark;
2333 
2334     SKIP(parser);
2335 
2336     /* Consume the value. */
2337 
2338     if (!CACHE(parser, 1)) goto error;
2339 
2340     while (IS_ALPHA(parser->buffer)) {
2341         if (!READ(parser, string)) goto error;
2342         if (!CACHE(parser, 1)) goto error;
2343         length ++;
2344     }
2345 
2346     end_mark = parser->mark;
2347 
2348     /*
2349      * Check if length of the anchor is greater than 0 and it is followed by
2350      * a whitespace character or one of the indicators:
2351      *
2352      *      '?', ':', ',', ']', '}', '%', '@', '`'.
2353      */
2354 
2355     if (!length || !(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '?')
2356                 || CHECK(parser->buffer, ':') || CHECK(parser->buffer, ',')
2357                 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '}')
2358                 || CHECK(parser->buffer, '%') || CHECK(parser->buffer, '@')
2359                 || CHECK(parser->buffer, '`'))) {
2360         yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ?
2361                 "while scanning an anchor" : "while scanning an alias", start_mark,
2362                 "did not find expected alphabetic or numeric character");
2363         goto error;
2364     }
2365 
2366     /* Create a token. */
2367 
2368     if (type == YAML_ANCHOR_TOKEN) {
2369         ANCHOR_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2370     }
2371     else {
2372         ALIAS_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2373     }
2374 
2375     return 1;
2376 
2377 error:
2378     STRING_DEL(parser, string);
2379     return 0;
2380 }
2381 
2382 /*
2383  * Scan a TAG token.
2384  */
2385 
2386 static int
yaml_parser_scan_tag(yaml_parser_t * parser,yaml_token_t * token)2387 yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token)
2388 {
2389     yaml_char_t *handle = NULL;
2390     yaml_char_t *suffix = NULL;
2391     yaml_mark_t start_mark, end_mark;
2392 
2393     start_mark = parser->mark;
2394 
2395     /* Check if the tag is in the canonical form. */
2396 
2397     if (!CACHE(parser, 2)) goto error;
2398 
2399     if (CHECK_AT(parser->buffer, '<', 1))
2400     {
2401         /* Set the handle to '' */
2402 
2403         handle = YAML_MALLOC(1);
2404         if (!handle) goto error;
2405         handle[0] = '\0';
2406 
2407         /* Eat '!<' */
2408 
2409         SKIP(parser);
2410         SKIP(parser);
2411 
2412         /* Consume the tag value. */
2413 
2414         if (!yaml_parser_scan_tag_uri(parser, 1, 0, NULL, start_mark, &suffix))
2415             goto error;
2416 
2417         /* Check for '>' and eat it. */
2418 
2419         if (!CHECK(parser->buffer, '>')) {
2420             yaml_parser_set_scanner_error(parser, "while scanning a tag",
2421                     start_mark, "did not find the expected '>'");
2422             goto error;
2423         }
2424 
2425         SKIP(parser);
2426     }
2427     else
2428     {
2429         /* The tag has either the '!suffix' or the '!handle!suffix' form. */
2430 
2431         /* First, try to scan a handle. */
2432 
2433         if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle))
2434             goto error;
2435 
2436         /* Check if it is, indeed, handle. */
2437 
2438         if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!')
2439         {
2440             /* Scan the suffix now. */
2441 
2442             if (!yaml_parser_scan_tag_uri(parser, 0, 0, NULL, start_mark, &suffix))
2443                 goto error;
2444         }
2445         else
2446         {
2447             /* It wasn't a handle after all.  Scan the rest of the tag. */
2448 
2449             if (!yaml_parser_scan_tag_uri(parser, 0, 0, handle, start_mark, &suffix))
2450                 goto error;
2451 
2452             /* Set the handle to '!'. */
2453 
2454             yaml_free(handle);
2455             handle = YAML_MALLOC(2);
2456             if (!handle) goto error;
2457             handle[0] = '!';
2458             handle[1] = '\0';
2459 
2460             /*
2461              * A special case: the '!' tag.  Set the handle to '' and the
2462              * suffix to '!'.
2463              */
2464 
2465             if (suffix[0] == '\0') {
2466                 yaml_char_t *tmp = handle;
2467                 handle = suffix;
2468                 suffix = tmp;
2469             }
2470         }
2471     }
2472 
2473     /* Check the character which ends the tag. */
2474 
2475     if (!CACHE(parser, 1)) goto error;
2476 
2477     if (!IS_BLANKZ(parser->buffer)) {
2478         if (!parser->flow_level || !CHECK(parser->buffer, ',') ) {
2479             yaml_parser_set_scanner_error(parser, "while scanning a tag",
2480                     start_mark, "did not find expected whitespace or line break");
2481             goto error;
2482         }
2483     }
2484 
2485     end_mark = parser->mark;
2486 
2487     /* Create a token. */
2488 
2489     TAG_TOKEN_INIT(*token, handle, suffix, start_mark, end_mark);
2490 
2491     return 1;
2492 
2493 error:
2494     yaml_free(handle);
2495     yaml_free(suffix);
2496     return 0;
2497 }
2498 
2499 /*
2500  * Scan a tag handle.
2501  */
2502 
2503 static int
yaml_parser_scan_tag_handle(yaml_parser_t * parser,int directive,yaml_mark_t start_mark,yaml_char_t ** handle)2504 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
2505         yaml_mark_t start_mark, yaml_char_t **handle)
2506 {
2507     yaml_string_t string = NULL_STRING;
2508 
2509     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2510 
2511     /* Check the initial '!' character. */
2512 
2513     if (!CACHE(parser, 1)) goto error;
2514 
2515     if (!CHECK(parser->buffer, '!')) {
2516         yaml_parser_set_scanner_error(parser, directive ?
2517                 "while scanning a tag directive" : "while scanning a tag",
2518                 start_mark, "did not find expected '!'");
2519         goto error;
2520     }
2521 
2522     /* Copy the '!' character. */
2523 
2524     if (!READ(parser, string)) goto error;
2525 
2526     /* Copy all subsequent alphabetical and numerical characters. */
2527 
2528     if (!CACHE(parser, 1)) goto error;
2529 
2530     while (IS_ALPHA(parser->buffer))
2531     {
2532         if (!READ(parser, string)) goto error;
2533         if (!CACHE(parser, 1)) goto error;
2534     }
2535 
2536     /* Check if the trailing character is '!' and copy it. */
2537 
2538     if (CHECK(parser->buffer, '!'))
2539     {
2540         if (!READ(parser, string)) goto error;
2541     }
2542     else
2543     {
2544         /*
2545          * It's either the '!' tag or not really a tag handle.  If it's a %TAG
2546          * directive, it's an error.  If it's a tag token, it must be a part of
2547          * URI.
2548          */
2549 
2550         if (directive && !(string.start[0] == '!' && string.start[1] == '\0')) {
2551             yaml_parser_set_scanner_error(parser, "while parsing a tag directive",
2552                     start_mark, "did not find expected '!'");
2553             goto error;
2554         }
2555     }
2556 
2557     *handle = string.start;
2558 
2559     return 1;
2560 
2561 error:
2562     STRING_DEL(parser, string);
2563     return 0;
2564 }
2565 
2566 /*
2567  * Scan a tag.
2568  */
2569 
2570 static int
yaml_parser_scan_tag_uri(yaml_parser_t * parser,int uri_char,int directive,yaml_char_t * head,yaml_mark_t start_mark,yaml_char_t ** uri)2571 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int uri_char, int directive,
2572         yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri)
2573 {
2574     size_t length = head ? strlen((char *)head) : 0;
2575     yaml_string_t string = NULL_STRING;
2576 
2577     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2578 
2579     /* Resize the string to include the head. */
2580 
2581     while ((size_t)(string.end - string.start) <= length) {
2582         if (!yaml_string_extend(&string.start, &string.pointer, &string.end)) {
2583             parser->error = YAML_MEMORY_ERROR;
2584             goto error;
2585         }
2586     }
2587 
2588     /*
2589      * Copy the head if needed.
2590      *
2591      * Note that we don't copy the leading '!' character.
2592      */
2593 
2594     if (length > 1) {
2595         memcpy(string.start, head+1, length-1);
2596         string.pointer += length-1;
2597     }
2598 
2599     /* Scan the tag. */
2600 
2601     if (!CACHE(parser, 1)) goto error;
2602 
2603     /*
2604      * The set of characters that may appear in URI is as follows:
2605      *
2606      *      '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
2607      *      '=', '+', '$', '.', '!', '~', '*', '\'', '(', ')', '%'.
2608      *
2609      * If we are inside a verbatim tag <...> (parameter uri_char is true)
2610      * then also the following flow indicators are allowed:
2611      *      ',', '[', ']'
2612      */
2613 
2614     while (IS_ALPHA(parser->buffer) || CHECK(parser->buffer, ';')
2615             || CHECK(parser->buffer, '/') || CHECK(parser->buffer, '?')
2616             || CHECK(parser->buffer, ':') || CHECK(parser->buffer, '@')
2617             || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '=')
2618             || CHECK(parser->buffer, '+') || CHECK(parser->buffer, '$')
2619             || CHECK(parser->buffer, '.') || CHECK(parser->buffer, '%')
2620             || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '~')
2621             || CHECK(parser->buffer, '*') || CHECK(parser->buffer, '\'')
2622             || CHECK(parser->buffer, '(') || CHECK(parser->buffer, ')')
2623             || (uri_char && (
2624                 CHECK(parser->buffer, ',')
2625                 || CHECK(parser->buffer, '[') || CHECK(parser->buffer, ']')
2626                 )
2627             ))
2628     {
2629         /* Check if it is a URI-escape sequence. */
2630 
2631         if (CHECK(parser->buffer, '%')) {
2632             if (!STRING_EXTEND(parser, string))
2633                 goto error;
2634 
2635             if (!yaml_parser_scan_uri_escapes(parser,
2636                         directive, start_mark, &string)) goto error;
2637         }
2638         else {
2639             if (!READ(parser, string)) goto error;
2640         }
2641 
2642         length ++;
2643         if (!CACHE(parser, 1)) goto error;
2644     }
2645 
2646     /* Check if the tag is non-empty. */
2647 
2648     if (!length) {
2649         if (!STRING_EXTEND(parser, string))
2650             goto error;
2651 
2652         yaml_parser_set_scanner_error(parser, directive ?
2653                 "while parsing a %TAG directive" : "while parsing a tag",
2654                 start_mark, "did not find expected tag URI");
2655         goto error;
2656     }
2657 
2658     *uri = string.start;
2659 
2660     return 1;
2661 
2662 error:
2663     STRING_DEL(parser, string);
2664     return 0;
2665 }
2666 
2667 /*
2668  * Decode an URI-escape sequence corresponding to a single UTF-8 character.
2669  */
2670 
2671 static int
yaml_parser_scan_uri_escapes(yaml_parser_t * parser,int directive,yaml_mark_t start_mark,yaml_string_t * string)2672 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
2673         yaml_mark_t start_mark, yaml_string_t *string)
2674 {
2675     int width = 0;
2676 
2677     /* Decode the required number of characters. */
2678 
2679     do {
2680 
2681         unsigned char octet = 0;
2682 
2683         /* Check for a URI-escaped octet. */
2684 
2685         if (!CACHE(parser, 3)) return 0;
2686 
2687         if (!(CHECK(parser->buffer, '%')
2688                     && IS_HEX_AT(parser->buffer, 1)
2689                     && IS_HEX_AT(parser->buffer, 2))) {
2690             return yaml_parser_set_scanner_error(parser, directive ?
2691                     "while parsing a %TAG directive" : "while parsing a tag",
2692                     start_mark, "did not find URI escaped octet");
2693         }
2694 
2695         /* Get the octet. */
2696 
2697         octet = (AS_HEX_AT(parser->buffer, 1) << 4) + AS_HEX_AT(parser->buffer, 2);
2698 
2699         /* If it is the leading octet, determine the length of the UTF-8 sequence. */
2700 
2701         if (!width)
2702         {
2703             width = (octet & 0x80) == 0x00 ? 1 :
2704                     (octet & 0xE0) == 0xC0 ? 2 :
2705                     (octet & 0xF0) == 0xE0 ? 3 :
2706                     (octet & 0xF8) == 0xF0 ? 4 : 0;
2707             if (!width) {
2708                 return yaml_parser_set_scanner_error(parser, directive ?
2709                         "while parsing a %TAG directive" : "while parsing a tag",
2710                         start_mark, "found an incorrect leading UTF-8 octet");
2711             }
2712         }
2713         else
2714         {
2715             /* Check if the trailing octet is correct. */
2716 
2717             if ((octet & 0xC0) != 0x80) {
2718                 return yaml_parser_set_scanner_error(parser, directive ?
2719                         "while parsing a %TAG directive" : "while parsing a tag",
2720                         start_mark, "found an incorrect trailing UTF-8 octet");
2721             }
2722         }
2723 
2724         /* Copy the octet and move the pointers. */
2725 
2726         *(string->pointer++) = octet;
2727         SKIP(parser);
2728         SKIP(parser);
2729         SKIP(parser);
2730 
2731     } while (--width);
2732 
2733     return 1;
2734 }
2735 
2736 /*
2737  * Scan a block scalar.
2738  */
2739 
2740 static int
yaml_parser_scan_block_scalar(yaml_parser_t * parser,yaml_token_t * token,int literal)2741 yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
2742         int literal)
2743 {
2744     yaml_mark_t start_mark;
2745     yaml_mark_t end_mark;
2746     yaml_string_t string = NULL_STRING;
2747     yaml_string_t leading_break = NULL_STRING;
2748     yaml_string_t trailing_breaks = NULL_STRING;
2749     int chomping = 0;
2750     int increment = 0;
2751     int indent = 0;
2752     int leading_blank = 0;
2753     int trailing_blank = 0;
2754 
2755     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2756     if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
2757     if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
2758 
2759     /* Eat the indicator '|' or '>'. */
2760 
2761     start_mark = parser->mark;
2762 
2763     SKIP(parser);
2764 
2765     /* Scan the additional block scalar indicators. */
2766 
2767     if (!CACHE(parser, 1)) goto error;
2768 
2769     /* Check for a chomping indicator. */
2770 
2771     if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-'))
2772     {
2773         /* Set the chomping method and eat the indicator. */
2774 
2775         chomping = CHECK(parser->buffer, '+') ? +1 : -1;
2776 
2777         SKIP(parser);
2778 
2779         /* Check for an indentation indicator. */
2780 
2781         if (!CACHE(parser, 1)) goto error;
2782 
2783         if (IS_DIGIT(parser->buffer))
2784         {
2785             /* Check that the indentation is greater than 0. */
2786 
2787             if (CHECK(parser->buffer, '0')) {
2788                 yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2789                         start_mark, "found an indentation indicator equal to 0");
2790                 goto error;
2791             }
2792 
2793             /* Get the indentation level and eat the indicator. */
2794 
2795             increment = AS_DIGIT(parser->buffer);
2796 
2797             SKIP(parser);
2798         }
2799     }
2800 
2801     /* Do the same as above, but in the opposite order. */
2802 
2803     else if (IS_DIGIT(parser->buffer))
2804     {
2805         if (CHECK(parser->buffer, '0')) {
2806             yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2807                     start_mark, "found an indentation indicator equal to 0");
2808             goto error;
2809         }
2810 
2811         increment = AS_DIGIT(parser->buffer);
2812 
2813         SKIP(parser);
2814 
2815         if (!CACHE(parser, 1)) goto error;
2816 
2817         if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) {
2818             chomping = CHECK(parser->buffer, '+') ? +1 : -1;
2819 
2820             SKIP(parser);
2821         }
2822     }
2823 
2824     /* Eat whitespaces and comments to the end of the line. */
2825 
2826     if (!CACHE(parser, 1)) goto error;
2827 
2828     while (IS_BLANK(parser->buffer)) {
2829         SKIP(parser);
2830         if (!CACHE(parser, 1)) goto error;
2831     }
2832 
2833     if (CHECK(parser->buffer, '#')) {
2834         while (!IS_BREAKZ(parser->buffer)) {
2835             SKIP(parser);
2836             if (!CACHE(parser, 1)) goto error;
2837         }
2838     }
2839 
2840     /* Check if we are at the end of the line. */
2841 
2842     if (!IS_BREAKZ(parser->buffer)) {
2843         yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2844                 start_mark, "did not find expected comment or line break");
2845         goto error;
2846     }
2847 
2848     /* Eat a line break. */
2849 
2850     if (IS_BREAK(parser->buffer)) {
2851         if (!CACHE(parser, 2)) goto error;
2852         SKIP_LINE(parser);
2853     }
2854 
2855     end_mark = parser->mark;
2856 
2857     /* Set the indentation level if it was specified. */
2858 
2859     if (increment) {
2860         indent = parser->indent >= 0 ? parser->indent+increment : increment;
2861     }
2862 
2863     /* Scan the leading line breaks and determine the indentation level if needed. */
2864 
2865     if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks,
2866                 start_mark, &end_mark)) goto error;
2867 
2868     /* Scan the block scalar content. */
2869 
2870     if (!CACHE(parser, 1)) goto error;
2871 
2872     while ((int)parser->mark.column == indent && !(IS_Z(parser->buffer)))
2873     {
2874         /*
2875          * We are at the beginning of a non-empty line.
2876          */
2877 
2878         /* Is it a trailing whitespace? */
2879 
2880         trailing_blank = IS_BLANK(parser->buffer);
2881 
2882         /* Check if we need to fold the leading line break. */
2883 
2884         if (!literal && (*leading_break.start == '\n')
2885                 && !leading_blank && !trailing_blank)
2886         {
2887             /* Do we need to join the lines by space? */
2888 
2889             if (*trailing_breaks.start == '\0') {
2890                 if (!STRING_EXTEND(parser, string)) goto error;
2891                 *(string.pointer ++) = ' ';
2892             }
2893 
2894             CLEAR(parser, leading_break);
2895         }
2896         else {
2897             if (!JOIN(parser, string, leading_break)) goto error;
2898             CLEAR(parser, leading_break);
2899         }
2900 
2901         /* Append the remaining line breaks. */
2902 
2903         if (!JOIN(parser, string, trailing_breaks)) goto error;
2904         CLEAR(parser, trailing_breaks);
2905 
2906         /* Is it a leading whitespace? */
2907 
2908         leading_blank = IS_BLANK(parser->buffer);
2909 
2910         /* Consume the current line. */
2911 
2912         while (!IS_BREAKZ(parser->buffer)) {
2913             if (!READ(parser, string)) goto error;
2914             if (!CACHE(parser, 1)) goto error;
2915         }
2916 
2917         /* Consume the line break. */
2918 
2919         if (!CACHE(parser, 2)) goto error;
2920 
2921         if (!READ_LINE(parser, leading_break)) goto error;
2922 
2923         /* Eat the following indentation spaces and line breaks. */
2924 
2925         if (!yaml_parser_scan_block_scalar_breaks(parser,
2926                     &indent, &trailing_breaks, start_mark, &end_mark)) goto error;
2927     }
2928 
2929     /* Chomp the tail. */
2930 
2931     if (chomping != -1) {
2932         if (!JOIN(parser, string, leading_break)) goto error;
2933     }
2934     if (chomping == 1) {
2935         if (!JOIN(parser, string, trailing_breaks)) goto error;
2936     }
2937 
2938     /* Create a token. */
2939 
2940     SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
2941             literal ? YAML_LITERAL_SCALAR_STYLE : YAML_FOLDED_SCALAR_STYLE,
2942             start_mark, end_mark);
2943 
2944     STRING_DEL(parser, leading_break);
2945     STRING_DEL(parser, trailing_breaks);
2946 
2947     return 1;
2948 
2949 error:
2950     STRING_DEL(parser, string);
2951     STRING_DEL(parser, leading_break);
2952     STRING_DEL(parser, trailing_breaks);
2953 
2954     return 0;
2955 }
2956 
2957 /*
2958  * Scan indentation spaces and line breaks for a block scalar.  Determine the
2959  * indentation level if needed.
2960  */
2961 
2962 static int
yaml_parser_scan_block_scalar_breaks(yaml_parser_t * parser,int * indent,yaml_string_t * breaks,yaml_mark_t start_mark,yaml_mark_t * end_mark)2963 yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
2964         int *indent, yaml_string_t *breaks,
2965         yaml_mark_t start_mark, yaml_mark_t *end_mark)
2966 {
2967     int max_indent = 0;
2968 
2969     *end_mark = parser->mark;
2970 
2971     /* Eat the indentation spaces and line breaks. */
2972 
2973     while (1)
2974     {
2975         /* Eat the indentation spaces. */
2976 
2977         if (!CACHE(parser, 1)) return 0;
2978 
2979         while ((!*indent || (int)parser->mark.column < *indent)
2980                 && IS_SPACE(parser->buffer)) {
2981             SKIP(parser);
2982             if (!CACHE(parser, 1)) return 0;
2983         }
2984 
2985         if ((int)parser->mark.column > max_indent)
2986             max_indent = (int)parser->mark.column;
2987 
2988         /* Check for a tab character messing the indentation. */
2989 
2990         if ((!*indent || (int)parser->mark.column < *indent)
2991                 && IS_TAB(parser->buffer)) {
2992             return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2993                     start_mark, "found a tab character where an indentation space is expected");
2994         }
2995 
2996         /* Have we found a non-empty line? */
2997 
2998         if (!IS_BREAK(parser->buffer)) break;
2999 
3000         /* Consume the line break. */
3001 
3002         if (!CACHE(parser, 2)) return 0;
3003         if (!READ_LINE(parser, *breaks)) return 0;
3004         *end_mark = parser->mark;
3005     }
3006 
3007     /* Determine the indentation level if needed. */
3008 
3009     if (!*indent) {
3010         *indent = max_indent;
3011         if (*indent < parser->indent + 1)
3012             *indent = parser->indent + 1;
3013         if (*indent < 1)
3014             *indent = 1;
3015     }
3016 
3017    return 1;
3018 }
3019 
3020 /*
3021  * Scan a quoted scalar.
3022  */
3023 
3024 static int
yaml_parser_scan_flow_scalar(yaml_parser_t * parser,yaml_token_t * token,int single)3025 yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
3026         int single)
3027 {
3028     yaml_mark_t start_mark;
3029     yaml_mark_t end_mark;
3030     yaml_string_t string = NULL_STRING;
3031     yaml_string_t leading_break = NULL_STRING;
3032     yaml_string_t trailing_breaks = NULL_STRING;
3033     yaml_string_t whitespaces = NULL_STRING;
3034     int leading_blanks;
3035 
3036     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3037     if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3038     if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3039     if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
3040 
3041     /* Eat the left quote. */
3042 
3043     start_mark = parser->mark;
3044 
3045     SKIP(parser);
3046 
3047     /* Consume the content of the quoted scalar. */
3048 
3049     while (1)
3050     {
3051         /* Check that there are no document indicators at the beginning of the line. */
3052 
3053         if (!CACHE(parser, 4)) goto error;
3054 
3055         if (parser->mark.column == 0 &&
3056             ((CHECK_AT(parser->buffer, '-', 0) &&
3057               CHECK_AT(parser->buffer, '-', 1) &&
3058               CHECK_AT(parser->buffer, '-', 2)) ||
3059              (CHECK_AT(parser->buffer, '.', 0) &&
3060               CHECK_AT(parser->buffer, '.', 1) &&
3061               CHECK_AT(parser->buffer, '.', 2))) &&
3062             IS_BLANKZ_AT(parser->buffer, 3))
3063         {
3064             yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3065                     start_mark, "found unexpected document indicator");
3066             goto error;
3067         }
3068 
3069         /* Check for EOF. */
3070 
3071         if (IS_Z(parser->buffer)) {
3072             yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3073                     start_mark, "found unexpected end of stream");
3074             goto error;
3075         }
3076 
3077         /* Consume non-blank characters. */
3078 
3079         if (!CACHE(parser, 2)) goto error;
3080 
3081         leading_blanks = 0;
3082 
3083         while (!IS_BLANKZ(parser->buffer))
3084         {
3085             /* Check for an escaped single quote. */
3086 
3087             if (single && CHECK_AT(parser->buffer, '\'', 0)
3088                     && CHECK_AT(parser->buffer, '\'', 1))
3089             {
3090                 if (!STRING_EXTEND(parser, string)) goto error;
3091                 *(string.pointer++) = '\'';
3092                 SKIP(parser);
3093                 SKIP(parser);
3094             }
3095 
3096             /* Check for the right quote. */
3097 
3098             else if (CHECK(parser->buffer, single ? '\'' : '"'))
3099             {
3100                 break;
3101             }
3102 
3103             /* Check for an escaped line break. */
3104 
3105             else if (!single && CHECK(parser->buffer, '\\')
3106                     && IS_BREAK_AT(parser->buffer, 1))
3107             {
3108                 if (!CACHE(parser, 3)) goto error;
3109                 SKIP(parser);
3110                 SKIP_LINE(parser);
3111                 leading_blanks = 1;
3112                 break;
3113             }
3114 
3115             /* Check for an escape sequence. */
3116 
3117             else if (!single && CHECK(parser->buffer, '\\'))
3118             {
3119                 size_t code_length = 0;
3120 
3121                 if (!STRING_EXTEND(parser, string)) goto error;
3122 
3123                 /* Check the escape character. */
3124 
3125                 switch (parser->buffer.pointer[1])
3126                 {
3127                     case '0':
3128                         *(string.pointer++) = '\0';
3129                         break;
3130 
3131                     case 'a':
3132                         *(string.pointer++) = '\x07';
3133                         break;
3134 
3135                     case 'b':
3136                         *(string.pointer++) = '\x08';
3137                         break;
3138 
3139                     case 't':
3140                     case '\t':
3141                         *(string.pointer++) = '\x09';
3142                         break;
3143 
3144                     case 'n':
3145                         *(string.pointer++) = '\x0A';
3146                         break;
3147 
3148                     case 'v':
3149                         *(string.pointer++) = '\x0B';
3150                         break;
3151 
3152                     case 'f':
3153                         *(string.pointer++) = '\x0C';
3154                         break;
3155 
3156                     case 'r':
3157                         *(string.pointer++) = '\x0D';
3158                         break;
3159 
3160                     case 'e':
3161                         *(string.pointer++) = '\x1B';
3162                         break;
3163 
3164                     case ' ':
3165                         *(string.pointer++) = '\x20';
3166                         break;
3167 
3168                     case '"':
3169                         *(string.pointer++) = '"';
3170                         break;
3171 
3172                     case '/':
3173                         *(string.pointer++) = '/';
3174                         break;
3175 
3176                     case '\\':
3177                         *(string.pointer++) = '\\';
3178                         break;
3179 
3180                     case 'N':   /* NEL (#x85) */
3181                         *(string.pointer++) = '\xC2';
3182                         *(string.pointer++) = '\x85';
3183                         break;
3184 
3185                     case '_':   /* #xA0 */
3186                         *(string.pointer++) = '\xC2';
3187                         *(string.pointer++) = '\xA0';
3188                         break;
3189 
3190                     case 'L':   /* LS (#x2028) */
3191                         *(string.pointer++) = '\xE2';
3192                         *(string.pointer++) = '\x80';
3193                         *(string.pointer++) = '\xA8';
3194                         break;
3195 
3196                     case 'P':   /* PS (#x2029) */
3197                         *(string.pointer++) = '\xE2';
3198                         *(string.pointer++) = '\x80';
3199                         *(string.pointer++) = '\xA9';
3200                         break;
3201 
3202                     case 'x':
3203                         code_length = 2;
3204                         break;
3205 
3206                     case 'u':
3207                         code_length = 4;
3208                         break;
3209 
3210                     case 'U':
3211                         code_length = 8;
3212                         break;
3213 
3214                     default:
3215                         yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3216                                 start_mark, "found unknown escape character");
3217                         goto error;
3218                 }
3219 
3220                 SKIP(parser);
3221                 SKIP(parser);
3222 
3223                 /* Consume an arbitrary escape code. */
3224 
3225                 if (code_length)
3226                 {
3227                     unsigned int value = 0;
3228                     size_t k;
3229 
3230                     /* Scan the character value. */
3231 
3232                     if (!CACHE(parser, code_length)) goto error;
3233 
3234                     for (k = 0; k < code_length; k ++) {
3235                         if (!IS_HEX_AT(parser->buffer, k)) {
3236                             yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3237                                     start_mark, "did not find expected hexdecimal number");
3238                             goto error;
3239                         }
3240                         value = (value << 4) + AS_HEX_AT(parser->buffer, k);
3241                     }
3242 
3243                     /* Check the value and write the character. */
3244 
3245                     if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
3246                         yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3247                                 start_mark, "found invalid Unicode character escape code");
3248                         goto error;
3249                     }
3250 
3251                     if (value <= 0x7F) {
3252                         *(string.pointer++) = value;
3253                     }
3254                     else if (value <= 0x7FF) {
3255                         *(string.pointer++) = 0xC0 + (value >> 6);
3256                         *(string.pointer++) = 0x80 + (value & 0x3F);
3257                     }
3258                     else if (value <= 0xFFFF) {
3259                         *(string.pointer++) = 0xE0 + (value >> 12);
3260                         *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3261                         *(string.pointer++) = 0x80 + (value & 0x3F);
3262                     }
3263                     else {
3264                         *(string.pointer++) = 0xF0 + (value >> 18);
3265                         *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F);
3266                         *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3267                         *(string.pointer++) = 0x80 + (value & 0x3F);
3268                     }
3269 
3270                     /* Advance the pointer. */
3271 
3272                     for (k = 0; k < code_length; k ++) {
3273                         SKIP(parser);
3274                     }
3275                 }
3276             }
3277 
3278             else
3279             {
3280                 /* It is a non-escaped non-blank character. */
3281 
3282                 if (!READ(parser, string)) goto error;
3283             }
3284 
3285             if (!CACHE(parser, 2)) goto error;
3286         }
3287 
3288         /* Check if we are at the end of the scalar. */
3289 
3290         /* Fix for crash unitialized value crash
3291          * Credit for the bug and input is to OSS Fuzz
3292          * Credit for the fix to Alex Gaynor
3293          */
3294         if (!CACHE(parser, 1)) goto error;
3295         if (CHECK(parser->buffer, single ? '\'' : '"'))
3296             break;
3297 
3298         /* Consume blank characters. */
3299 
3300         if (!CACHE(parser, 1)) goto error;
3301 
3302         while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3303         {
3304             if (IS_BLANK(parser->buffer))
3305             {
3306                 /* Consume a space or a tab character. */
3307 
3308                 if (!leading_blanks) {
3309                     if (!READ(parser, whitespaces)) goto error;
3310                 }
3311                 else {
3312                     SKIP(parser);
3313                 }
3314             }
3315             else
3316             {
3317                 if (!CACHE(parser, 2)) goto error;
3318 
3319                 /* Check if it is a first line break. */
3320 
3321                 if (!leading_blanks)
3322                 {
3323                     CLEAR(parser, whitespaces);
3324                     if (!READ_LINE(parser, leading_break)) goto error;
3325                     leading_blanks = 1;
3326                 }
3327                 else
3328                 {
3329                     if (!READ_LINE(parser, trailing_breaks)) goto error;
3330                 }
3331             }
3332             if (!CACHE(parser, 1)) goto error;
3333         }
3334 
3335         /* Join the whitespaces or fold line breaks. */
3336 
3337         if (leading_blanks)
3338         {
3339             /* Do we need to fold line breaks? */
3340 
3341             if (leading_break.start[0] == '\n') {
3342                 if (trailing_breaks.start[0] == '\0') {
3343                     if (!STRING_EXTEND(parser, string)) goto error;
3344                     *(string.pointer++) = ' ';
3345                 }
3346                 else {
3347                     if (!JOIN(parser, string, trailing_breaks)) goto error;
3348                     CLEAR(parser, trailing_breaks);
3349                 }
3350                 CLEAR(parser, leading_break);
3351             }
3352             else {
3353                 if (!JOIN(parser, string, leading_break)) goto error;
3354                 if (!JOIN(parser, string, trailing_breaks)) goto error;
3355                 CLEAR(parser, leading_break);
3356                 CLEAR(parser, trailing_breaks);
3357             }
3358         }
3359         else
3360         {
3361             if (!JOIN(parser, string, whitespaces)) goto error;
3362             CLEAR(parser, whitespaces);
3363         }
3364     }
3365 
3366     /* Eat the right quote. */
3367 
3368     SKIP(parser);
3369 
3370     end_mark = parser->mark;
3371 
3372     /* Create a token. */
3373 
3374     SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3375             single ? YAML_SINGLE_QUOTED_SCALAR_STYLE : YAML_DOUBLE_QUOTED_SCALAR_STYLE,
3376             start_mark, end_mark);
3377 
3378     STRING_DEL(parser, leading_break);
3379     STRING_DEL(parser, trailing_breaks);
3380     STRING_DEL(parser, whitespaces);
3381 
3382     return 1;
3383 
3384 error:
3385     STRING_DEL(parser, string);
3386     STRING_DEL(parser, leading_break);
3387     STRING_DEL(parser, trailing_breaks);
3388     STRING_DEL(parser, whitespaces);
3389 
3390     return 0;
3391 }
3392 
3393 /*
3394  * Scan a plain scalar.
3395  */
3396 
3397 static int
yaml_parser_scan_plain_scalar(yaml_parser_t * parser,yaml_token_t * token)3398 yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token)
3399 {
3400     yaml_mark_t start_mark;
3401     yaml_mark_t end_mark;
3402     yaml_string_t string = NULL_STRING;
3403     yaml_string_t leading_break = NULL_STRING;
3404     yaml_string_t trailing_breaks = NULL_STRING;
3405     yaml_string_t whitespaces = NULL_STRING;
3406     int leading_blanks = 0;
3407     int indent = parser->indent+1;
3408 
3409     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3410     if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3411     if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3412     if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
3413 
3414     start_mark = end_mark = parser->mark;
3415 
3416     /* Consume the content of the plain scalar. */
3417 
3418     while (1)
3419     {
3420         /* Check for a document indicator. */
3421 
3422         if (!CACHE(parser, 4)) goto error;
3423 
3424         if (parser->mark.column == 0 &&
3425             ((CHECK_AT(parser->buffer, '-', 0) &&
3426               CHECK_AT(parser->buffer, '-', 1) &&
3427               CHECK_AT(parser->buffer, '-', 2)) ||
3428              (CHECK_AT(parser->buffer, '.', 0) &&
3429               CHECK_AT(parser->buffer, '.', 1) &&
3430               CHECK_AT(parser->buffer, '.', 2))) &&
3431             IS_BLANKZ_AT(parser->buffer, 3)) break;
3432 
3433         /* Check for a comment. */
3434 
3435         if (CHECK(parser->buffer, '#'))
3436             break;
3437 
3438         /* Consume non-blank characters. */
3439 
3440         while (!IS_BLANKZ(parser->buffer))
3441         {
3442             /* Check for "x:" + one of ',?[]{}' in the flow context. TODO: Fix the test "spec-08-13".
3443              * This is not completely according to the spec
3444              * See http://yaml.org/spec/1.1/#id907281 9.1.3. Plain
3445              */
3446 
3447             if (parser->flow_level
3448                     && CHECK(parser->buffer, ':')
3449                     && (
3450                         CHECK_AT(parser->buffer, ',', 1)
3451                         || CHECK_AT(parser->buffer, '?', 1)
3452                         || CHECK_AT(parser->buffer, '[', 1)
3453                         || CHECK_AT(parser->buffer, ']', 1)
3454                         || CHECK_AT(parser->buffer, '{', 1)
3455                         || CHECK_AT(parser->buffer, '}', 1)
3456                     )
3457                     ) {
3458                 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3459                         start_mark, "found unexpected ':'");
3460                 goto error;
3461             }
3462 
3463             /* Check for indicators that may end a plain scalar. */
3464 
3465             if ((CHECK(parser->buffer, ':') && IS_BLANKZ_AT(parser->buffer, 1))
3466                     || (parser->flow_level &&
3467                         (CHECK(parser->buffer, ',')
3468                          || CHECK(parser->buffer, '[')
3469                          || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
3470                          || CHECK(parser->buffer, '}'))))
3471                 break;
3472 
3473             /* Check if we need to join whitespaces and breaks. */
3474 
3475             if (leading_blanks || whitespaces.start != whitespaces.pointer)
3476             {
3477                 if (leading_blanks)
3478                 {
3479                     /* Do we need to fold line breaks? */
3480 
3481                     if (leading_break.start[0] == '\n') {
3482                         if (trailing_breaks.start[0] == '\0') {
3483                             if (!STRING_EXTEND(parser, string)) goto error;
3484                             *(string.pointer++) = ' ';
3485                         }
3486                         else {
3487                             if (!JOIN(parser, string, trailing_breaks)) goto error;
3488                             CLEAR(parser, trailing_breaks);
3489                         }
3490                         CLEAR(parser, leading_break);
3491                     }
3492                     else {
3493                         if (!JOIN(parser, string, leading_break)) goto error;
3494                         if (!JOIN(parser, string, trailing_breaks)) goto error;
3495                         CLEAR(parser, leading_break);
3496                         CLEAR(parser, trailing_breaks);
3497                     }
3498 
3499                     leading_blanks = 0;
3500                 }
3501                 else
3502                 {
3503                     if (!JOIN(parser, string, whitespaces)) goto error;
3504                     CLEAR(parser, whitespaces);
3505                 }
3506             }
3507 
3508             /* Copy the character. */
3509 
3510             if (!READ(parser, string)) goto error;
3511 
3512             end_mark = parser->mark;
3513 
3514             if (!CACHE(parser, 2)) goto error;
3515         }
3516 
3517         /* Is it the end? */
3518 
3519         if (!(IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)))
3520             break;
3521 
3522         /* Consume blank characters. */
3523 
3524         if (!CACHE(parser, 1)) goto error;
3525 
3526         while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3527         {
3528             if (IS_BLANK(parser->buffer))
3529             {
3530                 /* Check for tab characters that abuse indentation. */
3531 
3532                 if (leading_blanks && (int)parser->mark.column < indent
3533                         && IS_TAB(parser->buffer)) {
3534                     yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3535                             start_mark, "found a tab character that violates indentation");
3536                     goto error;
3537                 }
3538 
3539                 /* Consume a space or a tab character. */
3540 
3541                 if (!leading_blanks) {
3542                     if (!READ(parser, whitespaces)) goto error;
3543                 }
3544                 else {
3545                     SKIP(parser);
3546                 }
3547             }
3548             else
3549             {
3550                 if (!CACHE(parser, 2)) goto error;
3551 
3552                 /* Check if it is a first line break. */
3553 
3554                 if (!leading_blanks)
3555                 {
3556                     CLEAR(parser, whitespaces);
3557                     if (!READ_LINE(parser, leading_break)) goto error;
3558                     leading_blanks = 1;
3559                 }
3560                 else
3561                 {
3562                     if (!READ_LINE(parser, trailing_breaks)) goto error;
3563                 }
3564             }
3565             if (!CACHE(parser, 1)) goto error;
3566         }
3567 
3568         /* Check indentation level. */
3569 
3570         if (!parser->flow_level && (int)parser->mark.column < indent)
3571             break;
3572     }
3573 
3574     /* Create a token. */
3575 
3576     SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3577             YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark);
3578 
3579     /* Note that we change the 'simple_key_allowed' flag. */
3580 
3581     if (leading_blanks) {
3582         parser->simple_key_allowed = 1;
3583     }
3584 
3585     STRING_DEL(parser, leading_break);
3586     STRING_DEL(parser, trailing_breaks);
3587     STRING_DEL(parser, whitespaces);
3588 
3589     return 1;
3590 
3591 error:
3592     STRING_DEL(parser, string);
3593     STRING_DEL(parser, leading_break);
3594     STRING_DEL(parser, trailing_breaks);
3595     STRING_DEL(parser, whitespaces);
3596 
3597     return 0;
3598 }
3599