xref: /freebsd/contrib/lyaml/lib/lyaml/init.lua (revision 2bc180ef045e5911cce0cea1c2a139cffd2b577a)
1-- Transform between YAML 1.1 streams and Lua table representations.
2-- Written by Gary V. Vaughan, 2013
3--
4-- Copyright(C) 2013-2022 Gary V. Vaughan
5--
6-- Permission is hereby granted, free of charge, to any person obtaining
7-- a copy of this software and associated documentation files(the
8-- "Software"), to deal in the Software without restriction, including
9-- without limitation the rights to use, copy, modify, merge, publish,
10-- distribute, sublicense, and/or sell copies of the Software, and to
11-- permit persons to whom the Software is furnished to do so, subject to
12-- the following conditions:
13--
14-- The above copyright notice and this permission notice shall be
15-- included in all copies or substantial portions of the Software.
16--
17-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18-- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19-- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20-- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21-- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22-- TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23-- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24--
25-- Portions of this software were inspired by an earlier LibYAML binding
26-- by Andrew Danforth <acd@weirdness.net>
27
28--- @module lyaml
29
30
31local explicit = require 'lyaml.explicit'
32local functional = require 'lyaml.functional'
33local implicit = require 'lyaml.implicit'
34local yaml = require 'yaml'
35
36local NULL = functional.NULL
37local anyof = functional.anyof
38local find = string.find
39local format = string.format
40local gsub = string.gsub
41local id = functional.id
42local isnull = functional.isnull
43local match = string.match
44
45
46local TAG_PREFIX = 'tag:yaml.org,2002:'
47
48
49local function tag(name)
50   return TAG_PREFIX .. name
51end
52
53
54local default = {
55   -- Tag table to lookup explicit scalar conversions.
56   explicit_scalar = {
57      [tag 'bool'] = explicit.bool,
58      [tag 'float'] = explicit.float,
59      [tag 'int'] = explicit.int,
60      [tag 'null'] = explicit.null,
61      [tag 'str'] = explicit.str,
62   },
63   -- Order is important, so we put most likely and fastest nearer
64   -- the top to reduce average number of comparisons and funcalls.
65   implicit_scalar = anyof {
66      implicit.null,
67      implicit.octal,	-- subset of decimal, must come earlier
68      implicit.decimal,
69      implicit.float,
70      implicit.bool,
71      implicit.inf,
72      implicit.nan,
73      implicit.hexadecimal,
74      implicit.binary,
75      implicit.sexagesimal,
76      implicit.sexfloat,
77      id,
78   },
79}
80
81
82-- Metatable for Dumper objects.
83local dumper_mt = {
84   __index = {
85      -- Emit EVENT to the LibYAML emitter.
86      emit = function(self, event)
87         return self.emitter.emit(event)
88      end,
89
90      -- Look up an anchor for a repeated document element.
91      get_anchor = function(self, value)
92         local r = self.anchors[value]
93         if r then
94            self.aliased[value], self.anchors[value] = self.anchors[value], nil
95         end
96         return r
97      end,
98
99      -- Look up an already anchored repeated document element.
100      get_alias = function(self, value)
101         return self.aliased[value]
102      end,
103
104      -- Dump ALIAS into the event stream.
105      dump_alias = function(self, alias)
106         return self:emit {
107            type = 'ALIAS',
108            anchor = alias,
109         }
110      end,
111
112      -- Dump MAP into the event stream.
113      dump_mapping = function(self, map)
114         local alias = self:get_alias(map)
115         if alias then
116            return self:dump_alias(alias)
117         end
118
119         self:emit {
120            type = 'MAPPING_START',
121            anchor = self:get_anchor(map),
122            style = 'BLOCK',
123         }
124         for k, v in pairs(map) do
125            self:dump_node(k)
126            self:dump_node(v)
127         end
128         return self:emit {type='MAPPING_END'}
129      end,
130
131      -- Dump SEQUENCE into the event stream.
132      dump_sequence = function(self, sequence)
133         local alias = self:get_alias(sequence)
134         if alias then
135            return self:dump_alias(alias)
136         end
137
138         self:emit {
139            type   = 'SEQUENCE_START',
140            anchor = self:get_anchor(sequence),
141            style  = 'BLOCK',
142         }
143         for _, v in ipairs(sequence) do
144            self:dump_node(v)
145         end
146         return self:emit {type='SEQUENCE_END'}
147      end,
148
149      -- Dump a null into the event stream.
150      dump_null = function(self)
151         return self:emit {
152            type = 'SCALAR',
153            value = '~',
154            plain_implicit = true,
155            quoted_implicit = true,
156            style = 'PLAIN',
157         }
158      end,
159
160      -- Dump VALUE into the event stream.
161      dump_scalar = function(self, value)
162         local alias = self:get_alias(value)
163         if alias then
164            return self:dump_alias(alias)
165         end
166
167         local anchor = self:get_anchor(value)
168         local itsa = type(value)
169         local style = 'PLAIN'
170         if itsa == 'string' and self.implicit_scalar(value) ~= value then
171            -- take care to round-trip strings that look like scalars
172            style = 'SINGLE_QUOTED'
173         elseif value == math.huge then
174            value = '.inf'
175         elseif value == -math.huge then
176            value = '-.inf'
177         elseif value ~= value then
178            value = '.nan'
179         elseif itsa == 'number' or itsa == 'boolean' then
180            value = tostring(value)
181         elseif itsa == 'string' and find(value, '\n') then
182            style = 'LITERAL'
183         end
184         return self:emit {
185            type = 'SCALAR',
186            anchor = anchor,
187            value = value,
188            plain_implicit = true,
189            quoted_implicit = true,
190            style = style,
191         }
192      end,
193
194      -- Decompose NODE into a stream of events.
195      dump_node = function(self, node)
196         local itsa = type(node)
197         if isnull(node) then
198            return self:dump_null()
199         elseif itsa == 'string' or itsa == 'boolean' or itsa == 'number' then
200            return self:dump_scalar(node)
201         elseif itsa == 'table' then
202            -- Something is only a sequence if its keys start at 1
203            -- and are consecutive integers without any jumps.
204            local prior_key = 0
205            local is_pure_sequence = true
206            local i, v = next(node, nil)
207            while i and is_pure_sequence do
208              if type(i) ~= "number" or (prior_key + 1 ~= i) then
209                is_pure_sequence = false -- breaks the loop
210              else
211                prior_key = i
212                i, v = next(node, prior_key)
213              end
214            end
215            if is_pure_sequence then
216               -- Only sequentially numbered integer keys starting from 1.
217               return self:dump_sequence(node)
218            else
219               -- Table contains non sequential integer keys or mixed keys.
220               return self:dump_mapping(node)
221            end
222         else -- unsupported Lua type
223            error("cannot dump object of type '" .. itsa .. "'", 2)
224         end
225      end,
226
227      -- Dump DOCUMENT into the event stream.
228      dump_document = function(self, document)
229         self:emit {type='DOCUMENT_START'}
230         self:dump_node(document)
231         return self:emit {type='DOCUMENT_END'}
232      end,
233   },
234}
235
236
237-- Emitter object constructor.
238local function Dumper(opts)
239   local anchors = {}
240   for k, v in pairs(opts.anchors) do
241      anchors[v] = k
242   end
243   local object = {
244      aliased = {},
245      anchors = anchors,
246      emitter = yaml.emitter(),
247      implicit_scalar = opts.implicit_scalar,
248   }
249   return setmetatable(object, dumper_mt)
250end
251
252
253--- Dump options table.
254-- @table dumper_opts
255-- @tfield table anchors map initial anchor names to values
256-- @tfield function implicit_scalar parse implicit scalar values
257
258
259--- Dump a list of Lua tables to an equivalent YAML stream.
260-- @tparam table documents a sequence of Lua tables.
261-- @tparam[opt] dumper_opts opts initialisation options
262-- @treturn string equivalest YAML stream
263local function dump(documents, opts)
264   opts = opts or {}
265
266   -- backwards compatibility
267   if opts.anchors == nil and opts.implicit_scalar == nil then
268      opts = {anchors=opts}
269   end
270
271   local dumper = Dumper {
272      anchors = opts.anchors or {},
273      implicit_scalar = opts.implicit_scalar or default.implicit_scalar,
274   }
275
276   dumper:emit {type='STREAM_START', encoding='UTF8'}
277   for _, document in ipairs(documents) do
278      dumper:dump_document(document)
279   end
280   local ok, stream = dumper:emit {type='STREAM_END'}
281   return stream
282end
283
284
285-- We save anchor types that will match the node type from expanding
286-- an alias for that anchor.
287local alias_type = {
288   MAPPING_END = 'MAPPING_END',
289   MAPPING_START = 'MAPPING_END',
290   SCALAR = 'SCALAR',
291   SEQUENCE_END = 'SEQUENCE_END',
292   SEQUENCE_START = 'SEQUENCE_END',
293}
294
295
296-- Metatable for Parser objects.
297local parser_mt = {
298   __index = {
299      -- Return the type of the current event.
300      type = function(self)
301         return tostring(self.event.type)
302      end,
303
304      -- Raise a parse error.
305      error = function(self, errmsg, ...)
306         error(format('%d:%d: ' .. errmsg, self.mark.line,
307                      self.mark.column, ...), 0)
308      end,
309
310      -- Save node in the anchor table for reference in future ALIASes.
311      add_anchor = function(self, node)
312         if self.event.anchor ~= nil then
313            self.anchors[self.event.anchor] = {
314               type = alias_type[self.event.type],
315               value = node,
316            }
317         end
318      end,
319
320      -- Fetch the next event.
321      parse = function(self)
322         local ok, event = pcall(self.next)
323         if not ok then
324            -- if ok is nil, then event is a parser error from libYAML
325            self:error(gsub(event, ' at document: .*$', ''))
326         end
327         self.event = event
328         self.mark = {
329            line = self.event.start_mark.line + 1,
330            column = self.event.start_mark.column + 1,
331         }
332         return self:type()
333      end,
334
335      -- Construct a Lua hash table from following events.
336      load_map = function(self)
337         local map = {}
338         self:add_anchor(map)
339         while true do
340            local key = self:load_node()
341            local tag = self.event.tag
342            if tag then
343               tag = match(tag, '^' .. TAG_PREFIX .. '(.*)$')
344            end
345            if key == nil then
346               break
347            end
348            if key == '<<' or tag == 'merge' then
349               tag = self.event.tag or key
350               local node, event = self:load_node()
351               if event == 'MAPPING_END' then
352                  for k, v in pairs(node) do
353                     if map[k] == nil then
354                        map[k] = v
355                     end
356                  end
357
358               elseif event == 'SEQUENCE_END' then
359                  for i, merge in ipairs(node) do
360                     if type(merge) ~= 'table' then
361                        self:error("invalid '%s' sequence element %d: %s",
362                           tag, i, tostring(merge))
363                     end
364                     for k, v in pairs(merge) do
365                        if map[k] == nil then
366                           map[k] = v
367                        end
368                     end
369                  end
370
371               else
372                  if event == 'SCALAR' then
373                     event = tostring(node)
374                  end
375                  self:error("invalid '%s' merge event: %s", tag, event)
376               end
377            else
378               local value, event = self:load_node()
379               if value == nil then
380                  self:error('unexpected %s event', self:type())
381               end
382               map[key] = value
383            end
384         end
385         return map, self:type()
386      end,
387
388      -- Construct a Lua array table from following events.
389      load_sequence = function(self)
390         local sequence = {}
391         self:add_anchor(sequence)
392         while true do
393            local node = self:load_node()
394            if node == nil then
395               break
396            end
397            sequence[#sequence + 1] = node
398         end
399         return sequence, self:type()
400      end,
401
402      -- Construct a primitive type from the current event.
403      load_scalar = function(self)
404         local value = self.event.value
405         local tag = self.event.tag
406         local explicit = self.explicit_scalar[tag]
407
408         -- Explicitly tagged values.
409         if explicit then
410            value = explicit(value)
411            if value == nil then
412               self:error("invalid '%s' value: '%s'", tag, self.event.value)
413            end
414
415         -- Otherwise, implicit conversion according to value content.
416         elseif self.event.style == 'PLAIN' then
417            value = self.implicit_scalar(self.event.value)
418         end
419         self:add_anchor(value)
420         return value, self:type()
421      end,
422
423      load_alias = function(self)
424         local anchor = self.event.anchor
425         local event = self.anchors[anchor]
426         if event == nil then
427            self:error('invalid reference: %s', tostring(anchor))
428         end
429         return event.value, event.type
430      end,
431
432      load_node = function(self)
433         local dispatch = {
434            SCALAR = self.load_scalar,
435            ALIAS = self.load_alias,
436            MAPPING_START = self.load_map,
437            SEQUENCE_START = self.load_sequence,
438            MAPPING_END = function() end,
439            SEQUENCE_END = function() end,
440            DOCUMENT_END = function() end,
441         }
442
443         local event = self:parse()
444         if dispatch[event] == nil then
445            self:error('invalid event: %s', self:type())
446         end
447       return dispatch[event](self)
448      end,
449   },
450}
451
452
453-- Parser object constructor.
454local function Parser(s, opts)
455   local object = {
456      anchors = {},
457      explicit_scalar = opts.explicit_scalar,
458      implicit_scalar = opts.implicit_scalar,
459      mark = {line=0, column=0},
460      next = yaml.parser(s),
461   }
462   return setmetatable(object, parser_mt)
463end
464
465
466--- Load options table.
467-- @table loader_opts
468-- @tfield boolean all load all documents from the stream
469-- @tfield table explicit_scalar map full tag-names to parser functions
470-- @tfield function implicit_scalar parse implicit scalar values
471
472
473--- Load a YAML stream into a Lua table.
474-- @tparam string s YAML stream
475-- @tparam[opt] loader_opts opts initialisation options
476-- @treturn table Lua table equivalent of stream *s*
477local function load(s, opts)
478   opts = opts or {}
479   local documents = {}
480   local all = false
481
482   -- backwards compatibility
483   if opts == true then
484      opts = {all=true}
485   end
486
487   local parser = Parser(s, {
488      explicit_scalar = opts.explicit_scalar or default.explicit_scalar,
489      implicit_scalar = opts.implicit_scalar or default.implicit_scalar,
490   })
491
492   if parser:parse() ~= 'STREAM_START' then
493      error('expecting STREAM_START event, but got ' .. parser:type(), 2)
494   end
495
496   while parser:parse() ~= 'STREAM_END' do
497      local document = parser:load_node()
498      if document == nil then
499         error('unexpected ' .. parser:type() .. ' event')
500      end
501
502      if parser:parse() ~= 'DOCUMENT_END' then
503         error('expecting DOCUMENT_END event, but got ' .. parser:type(), 2)
504      end
505
506      -- save document
507      documents[#documents + 1] = document
508
509      -- reset anchor table
510      parser.anchors = {}
511   end
512
513   return opts.all and documents or documents[1]
514end
515
516
517--[[ ----------------- ]]--
518--[[ Public Interface. ]]--
519--[[ ----------------- ]]--
520
521
522--- @export
523return {
524   dump = dump,
525   load = load,
526
527   --- `lyaml.null` value.
528   -- @table null
529   null = NULL,
530
531   --- Version number from yaml C binding.
532   -- @table _VERSION
533   _VERSION = yaml.version,
534}
535