xref: /freebsd/contrib/lyaml/ext/yaml/scanner.c (revision 4b15965daa99044daf184221b7c283bf7f2d7e66)
1 /*
2  * scanner.c, libyaml scanner binding for Lua
3  * Written by Gary V. Vaughan, 2013
4  *
5  * Copyright (C) 2013-2022 Gary V. Vaughan
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "lyaml.h"
27 
28 
29 typedef struct {
30    lua_State	 *L;
31    yaml_parser_t  parser;
32    yaml_token_t	  token;
33    char		  validtoken;
34    int		  document_count;
35 } lyaml_scanner;
36 
37 
38 static void
39 scanner_delete_token (lyaml_scanner *scanner)
40 {
41    if (scanner->validtoken)
42    {
43       yaml_token_delete (&scanner->token);
44       scanner->validtoken = 0;
45    }
46 }
47 
48 /* With the token result table on the top of the stack, insert
49    a mark entry. */
50 static void
51 scanner_set_mark (lua_State *L, const char *k, yaml_mark_t mark)
52 {
53    lua_pushstring  (L, k);
54    lua_createtable (L, 0, 3);
55 #define MENTRY(_s)	RAWSET_INTEGER (#_s, mark._s)
56          MENTRY( index	);
57          MENTRY( line	);
58          MENTRY( column	);
59 #undef MENTRY
60    lua_rawset (L, -3);
61 }
62 
63 /* Push a new token table, pre-populated with shared elements. */
64 static void
65 scanner_push_tokentable (lyaml_scanner *scanner, const char *v, int n)
66 {
67    lua_State *L = scanner->L;
68 
69    lua_createtable (L, 0, n + 3);
70    RAWSET_STRING   ("type", v);
71 
72 #define MENTRY(_s)	scanner_set_mark (L, #_s, scanner->token._s)
73          MENTRY( start_mark	);
74          MENTRY( end_mark	);
75 #undef MENTRY
76 }
77 
78 static void
79 scan_STREAM_START (lyaml_scanner *scanner)
80 {
81 #define EVENTF(_f)	(scanner->token.data.stream_start._f)
82    lua_State *L = scanner->L;
83    const char *encoding;
84 
85    switch (EVENTF (encoding))
86    {
87 #define MENTRY(_s)		\
88       case YAML_##_s##_ENCODING: encoding = #_s; break
89          MENTRY( UTF8		);
90          MENTRY( UTF16LE	);
91          MENTRY( UTF16BE	);
92 #undef MENTRY
93 
94       default:
95          lua_pushfstring (L, "invalid encoding %d", EVENTF (encoding));
96          lua_error (L);
97    }
98 
99    scanner_push_tokentable (scanner, "STREAM_START", 1);
100    RAWSET_STRING ("encoding", encoding);
101 #undef EVENTF
102 }
103 
104 static void
105 scan_VERSION_DIRECTIVE (lyaml_scanner *scanner)
106 {
107 #define EVENTF(_f)	(scanner->token.data.version_directive._f)
108    lua_State *L = scanner->L;
109 
110    scanner_push_tokentable (scanner, "VERSION_DIRECTIVE", 2);
111 
112 #define MENTRY(_s)	RAWSET_INTEGER (#_s, EVENTF (_s))
113          MENTRY( major	);
114          MENTRY( minor	);
115 #undef MENTRY
116 #undef EVENTF
117 }
118 
119 static void
120 scan_TAG_DIRECTIVE (lyaml_scanner *scanner)
121 {
122 #define EVENTF(_f)	(scanner->token.data.tag_directive._f)
123    lua_State *L = scanner->L;
124 
125    scanner_push_tokentable (scanner, "TAG_DIRECTIVE", 2);
126    RAWSET_EVENTF( handle	);
127    RAWSET_EVENTF( prefix	);
128 #undef EVENTF
129 }
130 
131 static void
132 scan_ALIAS (lyaml_scanner *scanner)
133 {
134 #define EVENTF(_f)	(scanner->token.data.alias._f)
135    lua_State *L = scanner->L;
136 
137    scanner_push_tokentable (scanner, "ALIAS", 1);
138    RAWSET_EVENTF (value);
139 #undef EVENTF
140 }
141 
142 static void
143 scan_ANCHOR (lyaml_scanner *scanner)
144 {
145 #define EVENTF(_f)	(scanner->token.data.anchor._f)
146    lua_State *L = scanner->L;
147 
148    scanner_push_tokentable (scanner, "ANCHOR", 1);
149    RAWSET_EVENTF (value);
150 #undef EVENTF
151 }
152 
153 static void
154 scan_TAG(lyaml_scanner *scanner)
155 {
156 #define EVENTF(_f)	(scanner->token.data.tag._f)
157    lua_State *L = scanner->L;
158 
159    scanner_push_tokentable (scanner, "TAG", 2);
160    RAWSET_EVENTF( handle	);
161    RAWSET_EVENTF( suffix	);
162 #undef EVENTF
163 }
164 
165 static void
166 scan_SCALAR (lyaml_scanner *scanner)
167 {
168 #define EVENTF(_f)	(scanner->token.data.scalar._f)
169    lua_State *L = scanner->L;
170    const char *style;
171 
172    switch (EVENTF (style))
173    {
174 #define MENTRY(_s)		\
175       case YAML_##_s##_SCALAR_STYLE: style = #_s; break
176 
177         MENTRY( PLAIN		);
178         MENTRY( SINGLE_QUOTED	);
179         MENTRY( DOUBLE_QUOTED	);
180 	MENTRY( LITERAL		);
181 	MENTRY( FOLDED		);
182 #undef MENTRY
183 
184       default:
185          lua_pushfstring (L, "invalid scalar style %d", EVENTF (style));
186          lua_error (L);
187    }
188 
189    scanner_push_tokentable (scanner, "SCALAR", 3);
190    RAWSET_EVENTF  (value);
191    RAWSET_INTEGER ("length", EVENTF (length));
192    RAWSET_STRING  ("style", style);
193 #undef EVENTF
194 }
195 
196 static void
197 scanner_generate_error_message (lyaml_scanner *scanner)
198 {
199    yaml_parser_t *P = &scanner->parser;
200    char buf[256];
201    luaL_Buffer b;
202 
203    luaL_buffinit (scanner->L, &b);
204    luaL_addstring (&b, P->problem ? P->problem : "A problem");
205    snprintf (buf, sizeof (buf), " at document: %d", scanner->document_count);
206    luaL_addstring (&b, buf);
207 
208    if (P->problem_mark.line || P->problem_mark.column)
209    {
210       snprintf (buf, sizeof (buf), ", line: %lu, column: %lu",
211          (unsigned long) P->problem_mark.line + 1,
212          (unsigned long) P->problem_mark.column + 1);
213       luaL_addstring (&b, buf);
214    }
215    luaL_addstring (&b, "\n");
216 
217    if (P->context)
218    {
219       snprintf (buf, sizeof (buf), "%s at line: %lu, column: %lu\n",
220          P->context,
221          (unsigned long) P->context_mark.line + 1,
222          (unsigned long) P->context_mark.column + 1);
223       luaL_addstring (&b, buf);
224    }
225 
226    luaL_pushresult (&b);
227 }
228 
229 static int
230 token_iter (lua_State *L)
231 {
232    lyaml_scanner *scanner = (lyaml_scanner *)lua_touserdata(L, lua_upvalueindex(1));
233    char *str;
234 
235    scanner_delete_token (scanner);
236    if (yaml_parser_scan (&scanner->parser, &scanner->token) != 1)
237    {
238       scanner_generate_error_message (scanner);
239       return lua_error (L);
240    }
241 
242    scanner->validtoken = 1;
243 
244    lua_newtable    (L);
245    lua_pushliteral (L, "type");
246 
247    switch (scanner->token.type)
248    {
249       /* First the simple tokens, generated right here... */
250 #define MENTRY(_s)			\
251       case YAML_##_s##_TOKEN: scanner_push_tokentable (scanner, #_s, 0); break
252          MENTRY( STREAM_END		);
253          MENTRY( DOCUMENT_START		);
254          MENTRY( DOCUMENT_END		);
255          MENTRY( BLOCK_SEQUENCE_START	);
256          MENTRY( BLOCK_MAPPING_START	);
257          MENTRY( BLOCK_END		);
258          MENTRY( FLOW_SEQUENCE_START	);
259          MENTRY( FLOW_SEQUENCE_END	);
260          MENTRY( FLOW_MAPPING_START	);
261          MENTRY( FLOW_MAPPING_END	);
262 	 MENTRY( BLOCK_ENTRY		);
263 	 MENTRY( FLOW_ENTRY		);
264 	 MENTRY( KEY			);
265 	 MENTRY( VALUE			);
266 #undef MENTRY
267 
268       /* ...then the complex tokens, generated by a function call. */
269 #define MENTRY(_s)		\
270       case YAML_##_s##_TOKEN: scan_##_s (scanner); break
271          MENTRY( STREAM_START		);
272 	 MENTRY( VERSION_DIRECTIVE	);
273 	 MENTRY( TAG_DIRECTIVE		);
274          MENTRY( ALIAS			);
275 	 MENTRY( ANCHOR			);
276 	 MENTRY( TAG			);
277          MENTRY( SCALAR			);
278 #undef MENTRY
279 
280       case YAML_NO_TOKEN:
281          lua_pushnil (L);
282          break;
283       default:
284          lua_pushfstring  (L, "invalid token %d", scanner->token.type);
285          return lua_error (L);
286    }
287 
288    return 1;
289 }
290 
291 static int
292 scanner_gc (lua_State *L)
293 {
294    lyaml_scanner *scanner = (lyaml_scanner *) lua_touserdata (L, 1);
295 
296    if (scanner)
297    {
298       scanner_delete_token (scanner);
299       yaml_parser_delete (&scanner->parser);
300    }
301    return 0;
302 }
303 
304 void
305 scanner_init (lua_State *L)
306 {
307    luaL_newmetatable (L, "lyaml.scanner");
308    lua_pushcfunction (L, scanner_gc);
309    lua_setfield      (L, -2, "__gc");
310 }
311 
312 int
313 Pscanner (lua_State *L)
314 {
315    lyaml_scanner *scanner;
316    const unsigned char *str;
317 
318    /* requires a single string type argument */
319    luaL_argcheck (L, lua_isstring (L, 1), 1, "must provide a string argument");
320    str = (const unsigned char *) lua_tostring (L, 1);
321 
322    /* create a user datum to store the scanner */
323    scanner = (lyaml_scanner *) lua_newuserdata (L, sizeof (*scanner));
324    memset ((void *) scanner, 0, sizeof (*scanner));
325    scanner->L = L;
326 
327    /* set its metatable */
328    luaL_getmetatable (L, "lyaml.scanner");
329    lua_setmetatable  (L, -2);
330 
331    /* try to initialize the scanner */
332    if (yaml_parser_initialize (&scanner->parser) == 0)
333       luaL_error (L, "cannot initialize parser for %s", str);
334    yaml_parser_set_input_string (&scanner->parser, str, lua_strlen (L, 1));
335 
336    /* create and return the iterator function, with the loader userdatum as
337       its sole upvalue */
338    lua_pushcclosure (L, token_iter, 1);
339    return 1;
340 }
341