xref: /freebsd/contrib/lyaml/spec/ext_yaml_scanner_spec.yaml (revision 2bc180ef045e5911cce0cea1c2a139cffd2b577a)
1# LYAML binding for Lua 5.1, 5.2, 5.3 & 5.4
2# Copyright (C) 2013-2022 Gary V. Vaughan
3
4before:
5  function consume (n, str)
6    local k = yaml.scanner (str)
7    for n = 1, n do k () end
8    return k
9  end
10
11specify scanning:
12- it scans empty streams:
13    k = yaml.scanner ""
14    expect (k ().type).to_be "STREAM_START"
15    expect (k ().type).to_be "STREAM_END"
16    expect (k ()).to_be (nil)
17    expect (k ()).to_be (nil)
18- it ignores comments: '
19    k = yaml.scanner "# A comment\nnon-comment # trailing comment\n"
20    expect (k ().type).to_be "STREAM_START"
21    expect (k ().value).to_be "non-comment"
22    expect (k ().type).to_be "STREAM_END"'
23
24- describe STREAM_START:
25  - before:
26      k = yaml.scanner "# no BOM"
27  - it is the first token:
28      expect (k ().type).to_be "STREAM_START"
29  - it reports token start marker:
30      expect (k ().start_mark).to_equal {line = 0, column = 0, index = 0}
31  - it reports token end marker:
32      expect (k ().end_mark).to_equal {line = 0, column = 0, index = 0}
33  - it uses UTF-8 by default:
34      expect (k ().encoding).to_be "UTF8"
35  - it recognizes UTF-16 BOM:
36      k = yaml.scanner (BOM .. " BOM")
37      expect (k ().encoding).to_match "UTF16[BL]E"
38
39- describe STREAM_END:
40  - before:
41      for t in yaml.scanner "nothing to see" do k = t end
42  - it is the last token:
43      expect (k.type).to_be "STREAM_END"
44  - it reports token start marker:
45      expect (k.start_mark).to_equal {line = 1, column = 0, index = 14}
46  - it reports token end marker:
47      expect (k.end_mark).to_equal {line = 1, column = 0, index = 14}
48
49- describe VERSION_DIRECTIVE:
50  - before:
51      k = consume (1, "%YAML 1.0")
52  - it can recognize document versions:
53      expect (filter (k (), "type", "major", "minor")).
54         to_equal {type = "VERSION_DIRECTIVE", major = 1, minor = 0}
55  - it reports token start marker:
56      expect (k ().start_mark).to_equal {line = 0, column = 0, index = 0}
57  - it reports token end marker:
58      expect (k ().end_mark).to_equal {line = 0, column = 9, index = 9}
59
60- describe TAG_DIRECTIVE:
61  - it can recognize primary tag handles:
62      k = consume (1, "%TAG ! tag:ben-kiki.org,2000:app/")
63      expect (filter (k (), "handle", "prefix")).
64         to_equal {handle = "!", prefix = "tag:ben-kiki.org,2000:app/"}
65  - it can recognize secondary tag handles:
66      k = consume (1, "%TAG !! tag:yaml.org,2002:")
67      expect (filter (k (), "handle", "prefix")).
68         to_equal {handle = "!!", prefix = "tag:yaml.org,2002:"}
69  - it can recognize named tag handles:
70      k = consume (1, "%TAG !o! tag:ben-kiki.org,2000:\n---")
71      expect (filter (k (), "handle", "prefix")).
72         to_equal {handle = "!o!", prefix = "tag:ben-kiki.org,2000:"}
73
74- describe DOCUMENT_START:
75  - before:
76      k = consume (1, "---")
77  - it recognizes document start marker:
78      expect (k ().type).to_be "DOCUMENT_START"
79  - it reports token start marker:
80      expect (k ().start_mark).to_equal {line = 0, column = 0, index = 0}
81  - it reports token end marker:
82      expect (k ().end_mark).to_equal {line = 0, column = 3, index = 3}
83
84- describe DOCUMENT_END:
85  - before:
86      k = consume (2, "foo\n...")
87  - it recognizes the document end marker:
88      expect (k ().type).to_be "DOCUMENT_END"
89  - it reports token start marker:
90      expect (k ().start_mark).to_equal {line = 1, column = 0, index = 4}
91  - it reports token end marker:
92      expect (k ().end_mark).to_equal {line = 1, column = 3, index = 7}
93
94- describe ALIAS:
95  - before:
96      k = consume (15, "---\n" ..
97                      "hr:\n" ..
98                      "- Mark McGwire\n" ..
99                      "- &SS Sammy Sosa\n" ..
100                      "rbi:\n" ..
101                      "- *SS\n" ..
102                      "- Ken Griffey")
103  - it recognizes an alias token:
104      expect (filter (k (), "type", "value")).
105         to_equal {type = "ALIAS", value = "SS"}
106  - it reports token start marker:
107      expect (k ().start_mark).to_equal {line = 5, column = 2, index = 47}
108  - it reports token end marker:
109      expect (k ().end_mark).to_equal {line = 5, column = 5, index = 50}
110
111- describe ANCHOR:
112  - before:
113      k = consume (9, "---\n" ..
114                      "hr:\n" ..
115                      "- Mark McGwire\n" ..
116                      "- &SS Sammy Sosa\n" ..
117                      "rbi:\n" ..
118                      "- *SS\n" ..
119                      "- Ken Griffey")
120  - it recognizes an anchor token:
121      expect (filter (k (), "type", "value")).
122         to_equal {type = "ANCHOR", value = "SS"}
123  - it reports token start marker:
124      expect (k ().start_mark).to_equal {line = 3, column = 2, index = 25}
125  - it reports token end marker:
126      expect (k ().end_mark).to_equal {line = 3, column = 5, index = 28}
127
128- describe SCALAR:
129  - before:
130      k = consume (10, "---\n" ..
131                      "hr:\n" ..
132                      "- Mark McGwire\n" ..
133                      "- &SS Sammy Sosa\n" ..
134                      "rbi:\n" ..
135                      "- *SS\n" ..
136                      "- Ken Griffey")
137  - it recognizes a scalar token:
138      expect (filter (k (), "type", "value")).
139         to_equal {type = "SCALAR",  value = "Sammy Sosa"}
140  - it reports token start marker:
141      expect (k ().start_mark).to_equal {line = 3, column = 6, index = 29}
142  - it reports token end marker:
143      expect (k ().end_mark).to_equal {line = 3, column = 16, index = 39}
144
145  - context with quoting style:
146    - context plain style:
147      - before:
148          k = consume (2, "---\n" ..
149                          "  Mark McGwire's\n" ..
150                          "  year was crippled\n" ..
151                          "  by a knee injury.\n")
152      - it ignores line-breaks and indentation:
153          expect (k ().value).
154             to_be "Mark McGwire's year was crippled by a knee injury."
155      - it recognizes PLAIN style:
156          expect (k ().style).to_be "PLAIN"
157    - context folded style:
158      - before:
159          k = consume (1, ">\n" ..
160                          "  Sammy Sosa completed another\n" ..
161                          "  fine season with great stats.\n" ..
162                          "\n" ..
163                          "    63 Home Runs\n" ..
164                          "    0.288 Batting Average\n" ..
165                          "\n" ..
166                          "  What a year!\n")
167      - it preserves blank lines and deeper indentation:
168          expect (k ().value).
169             to_be ("Sammy Sosa completed another fine season with great stats.\n" ..
170                       "\n" ..
171                       "  63 Home Runs\n" ..
172                       "  0.288 Batting Average\n" ..
173                       "\n" ..
174                       "What a year!\n")
175      - it recognizes FOLDED style:
176          expect (k ().style).to_be "FOLDED"
177    - context literal style:
178      - before:
179          k = consume (2, [[# ASCII Art]] .. "\n" ..
180                          [[--- |]] .. "\n" ..
181                          [[  \//||\/||]] .. "\n" ..
182                          [[  // ||  ||__]] .. "\n")
183      - it removes indentation but preserves all line-breaks:
184          expect (k ().value).
185             to_be ([[\//||\/||]] .. "\n" ..
186                       [[// ||  ||__]] .. "\n")
187      - it recognizes LITERAL style:
188          expect (k ().style).to_be "LITERAL"
189
190    - context single quoted style:
191      - before:
192          k = consume (1, [['This quoted scalar]] .. "\n" ..
193                          [[  spans two lines.']])
194      - it folds line breaks:
195          expect (k ().value).
196             to_be "This quoted scalar spans two lines."
197      - it does not process escape sequences:
198          # Lua [[ quoting makes sure libyaml sees all the quotes.
199          k = consume (1, [['"Howdy!"\t\u263A']])
200          expect (k ().value).to_be [["Howdy!"\t\u263A]]
201      - it recognizes LITERAL style:
202          expect (k ().style).to_be "SINGLE_QUOTED"
203
204    # Note that we have to single quote the Lua snippets to prevent
205    # libyaml from interpreting the bytes as the spec file is read, so
206    # that the raw strings get correctly passed to the Lua compiler.
207    - context double quoted style:
208      - it folds line breaks: '
209          k = consume (5, [[quoted: "This quoted scalar]] .. "\n" ..
210                          [[  spans two lines\n"]])
211          expect (k ().value).
212             to_be "This quoted scalar spans two lines\n"'
213      - it recognizes unicode escape sequences: '
214          k = consume (5, [[unicode: "Sosa did fine.\u263A"]])
215          expect (k ().value).to_be "Sosa did fine.\226\152\186"'
216      - it recognizes control escape sequences: '
217          k = consume (5, [[control: "\b1998\t1999\t2000\n"]])
218          expect (k ().value).to_be "\b1998\t1999\t2000\n"'
219      - it recognizes hexadecimal escape sequences: '
220          k = consume (5, [[hexesc: "\x41\x42\x43 is ABC"]])
221          expect (k ().value).to_be "ABC is ABC"'
222
223    - context indentation determines scope: '
224        k = consume (5, "name: Mark McGwire\n" ..
225                        "accomplishment: >\n" ..
226                        "  Mark set a major league\n" ..
227                        "  home run record in 1998.\n" ..
228                        "stats: |\n" ..
229                        "  65 Home Runs\n" ..
230                        "  0.278 Batting Average\n")
231        expect (k ().value).to_be "Mark McGwire"
232        expect (k ().type).to_be "KEY"
233        expect (k ().value).to_be "accomplishment"
234        expect (k ().type).to_be "VALUE"
235        expect (k ().value).
236          to_be "Mark set a major league home run record in 1998.\n"
237        expect (k ().type).to_be "KEY"
238        expect (k ().value).to_be "stats"
239        expect (k ().type).to_be "VALUE"
240        expect (k ().value).to_be "65 Home Runs\n0.278 Batting Average\n"'
241
242- describe TAG:
243  - it recognizes local tags: '
244      k = consume (5, "application specific tag: !something |\n" ..
245                      " The semantics of the tag\n" ..
246                      " above may be different for\n" ..
247                      " different documents.")
248      expect (filter (k (), "type", "handle", "suffix")).
249         to_equal {type = "TAG", handle = "!", suffix = "something"}'
250  - it recognizes global tags: '
251      k = consume (5, "picture: !!binary |\n" ..
252                      " R0lGODlhDAAMAIQAAP//9/X\n" ..
253                      " 17unp5WZmZgAAAOfn515eXv\n" ..
254                      " Pz7Y6OjuDg4J+fn5OTk6enp\n" ..
255                      " 56enmleECcgggoBADs=")
256      expect (filter (k (), "type", "handle", "suffix")).
257         to_equal {type = "TAG", handle = "!!", suffix = "binary"}'
258
259- describe BLOCK_SEQUENCE_START:
260  - before: '
261      k = consume (5, "fubar:\n" ..
262                      "  - foo\n" ..
263                      "  - bar\n")'
264  - it recognizes a sequence start token:
265      expect (k ().type).to_be "BLOCK_SEQUENCE_START"
266  - it reports token start marker:
267      expect (k ().start_mark).to_equal {line = 1, column = 2, index = 9}
268  - it reports token end marker:
269      expect (k ().end_mark).to_equal {line = 1, column = 2, index = 9}
270
271- describe BLOCK_MAPPING_START:
272  - before: 'k = consume (3, "-\n  foo: bar\n-")'
273  - it recognizes a mapping start token:
274      expect (k ().type).to_be "BLOCK_MAPPING_START"
275  - it reports token start marker:
276      expect (k ().start_mark).to_equal {line = 1, column = 2, index = 4}
277  - it reports token end marker:
278      expect (k ().end_mark).to_equal {line = 1, column = 2, index = 4}
279
280- describe BLOCK_ENTRY:
281  - before: 'k = consume (2, "-\n  foo: bar\n-")'
282  - it recognizes a sequence block entry token: '
283      k = consume (8, "fubar:\n" ..
284                      "  - foo\n" ..
285                      "  - bar\n")
286      expect (k ().type).to_be "BLOCK_ENTRY"'
287  - it recognizes a mapping block entry token:
288      expect (k ().type).to_be "BLOCK_ENTRY"
289  - it reports token start marker:
290      expect (k ().start_mark).to_equal {line = 0, column = 0, index = 0}
291  - it reports token end marker:
292      expect (k ().end_mark).to_equal {line = 0, column = 1, index = 1}
293
294- describe BLOCK_END:
295  - before: 'k = consume (8, "-\n  foo: bar\n-")'
296  - it recognizes a sequence block end token: '
297      k = consume (10, "fubar:\n" ..
298                      "  - foo\n" ..
299                      "  - bar\n")
300      expect (k ().type).to_be "BLOCK_END"'
301  - it recognizes a mapping block end token:
302      expect (k ().type).to_be "BLOCK_END"
303  - it reports token start marker:
304      expect (k ().start_mark).to_equal {line = 2, column = 0, index = 13}
305  - it reports token end marker:
306      expect (k ().end_mark).to_equal {line = 2, column = 0, index = 13}
307
308- describe FLOW_SEQUENCE_START:
309  - before: '
310      k = consume (5, "fubar: [foo, bar]\n")'
311  - it recognizes a sequence start token:
312      expect (k ().type).to_be "FLOW_SEQUENCE_START"
313  - it reports token start marker:
314      expect (k ().start_mark).to_equal {line = 0, column = 7, index = 7}
315  - it reports token end marker:
316      expect (k ().end_mark).to_equal {line = 0, column = 8, index = 8}
317
318- describe FLOW_SEQUENCE_END:
319  - before: '
320      k = consume (9, "fubar: [foo, bar]\n")'
321  - it recognizes a sequence end token:
322      expect (k ().type).to_equal "FLOW_SEQUENCE_END"
323  - it reports token start marker:
324      expect (k ().start_mark).to_equal {line = 0, column = 16, index = 16}
325  - it reports token end marker:
326      expect (k ().end_mark).to_equal {line = 0, column = 17, index = 17}
327
328- describe FLOW_ENTRY:
329  - before: 'k = consume (6, "{foo: bar, baz: quux}")'
330  - it recognizes a sequence flow entry: '
331      k = consume (6, "[foo: bar, baz: quux]")
332      expect (k ().type).to_be "FLOW_ENTRY"'
333  - it recognizes a mapping flow entry:
334      expect (k ().type).to_be "FLOW_ENTRY"
335  - it reports token start marker:
336      expect (k ().start_mark).to_equal {line = 0, column = 9, index = 9}
337  - it reports token end marker:
338      expect (k ().end_mark).to_equal {line = 0, column = 10, index = 10}
339
340- describe FLOW_MAPPING_START:
341  - before: 'k = consume (1, "{foo: bar, baz: quux}")'
342  - it recognizes flow style:
343      expect (k ().type).to_be "FLOW_MAPPING_START"
344  - it reports token start marker:
345      expect (k ().start_mark).to_equal {line = 0, column = 0, index = 0}
346  - it reports token end marker:
347      expect (k ().end_mark).to_equal {line = 0, column = 1, index = 1}
348
349- describe FLOW_MAPPING_END:
350  - before: 'k = consume (6, "{foo: bar}\n")'
351  - it recognizes the mapping end token:
352      expect (k ().type).to_equal "FLOW_MAPPING_END"
353  - it reports token start marker:
354      expect (k ().start_mark).to_equal {line = 0, column = 9, index = 9}
355  - it reports token end marker:
356      expect (k ().end_mark).to_equal {line = 0, column = 10, index = 10}
357
358- describe KEY:
359  - before: 'k = consume (2, "{the key: the value, another key: meh}")'
360  - it recognizes a flow mapping key token:
361      expect (k ().type).to_be "KEY"
362  - it recognizes a block mapping key token: '
363      k = consume (2, "the key: the value\nanother key: meh\n")
364      expect (k ().type).to_be "KEY"'
365  - it reports token start marker:
366      expect (k ().start_mark).to_equal {line = 0, column = 1, index = 1}
367  - it reports token end marker:
368      expect (k ().end_mark).to_equal {line = 0, column = 1, index = 1}
369
370- describe VALUE:
371  - before: 'k = consume (4, "{the key: the value, another key: meh}")'
372  - it recognizes a flow mapping value token:
373      expect (k ().type).to_be "VALUE"
374  - it recognizes a block mapping key value: '
375      k = consume (4, "the key: the value\nanother key: meh\n")
376      expect (k ().type).to_be "VALUE"'
377  - it reports token start marker:
378      expect (k ().start_mark).to_equal {line = 0, column = 8, index = 8}
379  - it reports token end marker:
380      expect (k ().end_mark).to_equal {line = 0, column = 9, index = 9}
381