xref: /freebsd/contrib/libxo/encoder/csv/enc_csv.c (revision c7a063741720ef81d4caa4613242579d12f1d605)
1 /*
2  * Copyright (c) 2015, Juniper Networks, Inc.
3  * All rights reserved.
4  * This SOFTWARE is licensed under the LICENSE provided in the
5  * ../Copyright file. By downloading, installing, copying, or otherwise
6  * using the SOFTWARE, you agree to be bound by the terms of that
7  * LICENSE.
8  * Phil Shafer, August 2015
9  */
10 
11 /*
12  * CSV encoder generates comma-separated value files for specific
13  * subsets of data.  This is not (and cannot be) a generalized
14  * facility, but for specific subsets of data, CSV data can be
15  * reasonably generated.  For example, the df XML content:
16  *     <filesystem>
17  *      <name>procfs</name>
18  *      <total-blocks>4</total-blocks>
19  *      <used-blocks>4</used-blocks>
20  *      <available-blocks>0</available-blocks>
21  *      <used-percent>100</used-percent>
22  *      <mounted-on>/proc</mounted-on>
23  *    </filesystem>
24  *
25  * could be represented as:
26  *
27  *  #+name,total-blocks,used-blocks,available-blocks,used-percent,mounted-on
28  *  procfs,4,4,0,100,/proc
29  *
30  * Data is then constrained to be sibling leaf values.  In addition,
31  * singular leafs can also be matched.  The costs include recording
32  * the specific leaf names (to ensure consistency) and some
33  * buffering.
34  *
35  * Some escaping is needed for CSV files, following the rules of RFC4180:
36  *
37  * - Fields containing a line-break, double-quote or commas should be
38  *   quoted. (If they are not, the file will likely be impossible to
39  *   process correctly).
40  * - A (double) quote character in a field must be represented by two
41  *   (double) quote characters.
42  * - Leading and trialing whitespace require fields be quoted.
43  *
44  * Cheesy, but simple.  The RFC also requires MS-DOS end-of-line,
45  * which we only do with the "dos" option.  Strange that we still live
46  * in a DOS-friendly world, but then again, we make spaceships based
47  * on the horse butts (http://www.astrodigital.org/space/stshorse.html
48  * though the "built by English expatriates” bit is rubbish; better to
49  * say the first engines used in America were built by Englishmen.)
50  */
51 
52 #include <string.h>
53 #include <sys/types.h>
54 #include <unistd.h>
55 #include <stdint.h>
56 #include <ctype.h>
57 #include <stdlib.h>
58 #include <limits.h>
59 
60 #include "xo.h"
61 #include "xo_encoder.h"
62 #include "xo_buf.h"
63 
64 #ifndef UNUSED
65 #define UNUSED __attribute__ ((__unused__))
66 #endif /* UNUSED */
67 
68 /*
69  * The CSV encoder has three moving parts:
70  *
71  * - The path holds the path we are matching against
72  *   - This is given as input via "options" and does not change
73  *
74  * - The stack holds the current names of the open elements
75  *   - The "open" operations push, while the "close" pop
76  *   - Turns out, at this point, the stack is unused, but I've
77  *     left "drippings" in the code because I see this as useful
78  *     for future features (under CSV_STACK_IS_NEEDED).
79  *
80  * - The leafs record the current set of leaf
81  *   - A key from the parent list counts as a leaf (unless CF_NO_KEYS)
82  *   - Once the path is matched, all other leafs at that level are leafs
83  *   - Leafs are recorded to get the header comment accurately recorded
84  *   - Once the first line is emited, the set of leafs _cannot_ change
85  *
86  * We use offsets into the buffers, since we know they can be
87  * realloc'd out from under us, as the size increases.  The 'path'
88  * is fixed, we allocate it once, so it doesn't need offsets.
89  */
90 typedef struct path_frame_s {
91     char *pf_name;	       /* Path member name; points into c_path_buf */
92     uint32_t pf_flags;	       /* Flags for this path element (PFF_*) */
93 } path_frame_t;
94 
95 typedef struct stack_frame_s {
96     ssize_t sf_off;		/* Element name; offset in c_stack_buf */
97     uint32_t sf_flags;		/* Flags for this frame (SFF_*) */
98 } stack_frame_t;
99 
100 /* Flags for sf_flags */
101 
102 typedef struct leaf_s {
103     ssize_t f_name;		/* Name of leaf; offset in c_name_buf */
104     ssize_t f_value;		/* Value of leaf; offset in c_value_buf */
105     uint32_t f_flags;		/* Flags for this value (FF_*)  */
106 #ifdef CSV_STACK_IS_NEEDED
107     ssize_t f_depth;		/* Depth of stack when leaf was recorded */
108 #endif /* CSV_STACK_IS_NEEDED */
109 } leaf_t;
110 
111 /* Flags for f_flags */
112 #define LF_KEY		(1<<0)	/* Leaf is a key */
113 #define LF_HAS_VALUE	(1<<1)	/* Value has been set */
114 
115 typedef struct csv_private_s {
116     uint32_t c_flags;		/* Flags for this encoder */
117 
118     /* The path for which we select leafs */
119     char *c_path_buf;	    	/* Buffer containing path members */
120     path_frame_t *c_path;	/* Array of path members */
121     ssize_t c_path_max;		/* Depth of c_path[] */
122     ssize_t c_path_cur;		/* Current depth in c_path[] */
123 
124     /* A stack of open elements (xo_op_list, xo_op_container) */
125 #if CSV_STACK_IS_NEEDED
126     xo_buffer_t c_stack_buf;	/* Buffer used for stack content */
127     stack_frame_t *c_stack;	/* Stack of open tags */
128     ssize_t c_stack_max;	/* Maximum stack depth */
129 #endif /* CSV_STACK_IS_NEEDED */
130     ssize_t c_stack_depth;	/* Current stack depth */
131 
132     /* List of leafs we are emitting (to ensure consistency) */
133     xo_buffer_t c_name_buf;	/* String buffer for leaf names */
134     xo_buffer_t c_value_buf;	/* String buffer for leaf values */
135     leaf_t *c_leaf;		/* List of leafs */
136     ssize_t c_leaf_depth;	/* Current depth of c_leaf[] (next free) */
137     ssize_t c_leaf_max;		/* Max depth of c_leaf[] */
138 
139     xo_buffer_t c_data;		/* Buffer for creating data */
140 } csv_private_t;
141 
142 #define C_STACK_MAX	32	/* default c_stack_max */
143 #define C_LEAF_MAX	32	/* default c_leaf_max */
144 
145 /* Flags for this structure */
146 #define CF_HEADER_DONE	(1<<0)	/* Have already written the header */
147 #define CF_NO_HEADER	(1<<1)	/* Do not generate header */
148 #define CF_NO_KEYS	(1<<2)	/* Do not generate excess keys */
149 #define CF_VALUE_ONLY	(1<<3)	/* Only generate the value */
150 
151 #define CF_DOS_NEWLINE	(1<<4)	/* Generate CR-NL, just like MS-DOS */
152 #define CF_LEAFS_DONE	(1<<5)	/* Leafs are already been recorded */
153 #define CF_NO_QUOTES	(1<<6)	/* Do not generate quotes */
154 #define CF_RECORD_DATA	(1<<7)	/* Record all sibling leafs */
155 
156 #define CF_DEBUG	(1<<8)	/* Make debug output */
157 #define CF_HAS_PATH	(1<<9)	/* A "path" option was provided */
158 
159 /*
160  * A simple debugging print function, similar to psu_dbg.  Controlled by
161  * the undocumented "debug" option.
162  */
163 static void
164 csv_dbg (xo_handle_t *xop UNUSED, csv_private_t *csv UNUSED,
165 	 const char *fmt, ...)
166 {
167     if (csv == NULL || !(csv->c_flags & CF_DEBUG))
168 	return;
169 
170     va_list vap;
171 
172     va_start(vap, fmt);
173     vfprintf(stderr, fmt, vap);
174     va_end(vap);
175 }
176 
177 /*
178  * Create the private data for this handle, initialize it, and record
179  * the pointer in the handle.
180  */
181 static int
182 csv_create (xo_handle_t *xop)
183 {
184     csv_private_t *csv = xo_realloc(NULL, sizeof(*csv));
185     if (csv == NULL)
186 	return -1;
187 
188     bzero(csv, sizeof(*csv));
189     xo_buf_init(&csv->c_data);
190     xo_buf_init(&csv->c_name_buf);
191     xo_buf_init(&csv->c_value_buf);
192 #ifdef CSV_STACK_IS_NEEDED
193     xo_buf_init(&csv->c_stack_buf);
194 #endif /* CSV_STACK_IS_NEEDED */
195 
196     xo_set_private(xop, csv);
197 
198     return 0;
199 }
200 
201 /*
202  * Clean up and release any data in use by this handle
203  */
204 static void
205 csv_destroy (xo_handle_t *xop UNUSED, csv_private_t *csv)
206 {
207     /* Clean up */
208     xo_buf_cleanup(&csv->c_data);
209     xo_buf_cleanup(&csv->c_name_buf);
210     xo_buf_cleanup(&csv->c_value_buf);
211 #ifdef CSV_STACK_IS_NEEDED
212     xo_buf_cleanup(&csv->c_stack_buf);
213 #endif /* CSV_STACK_IS_NEEDED */
214 
215     if (csv->c_leaf)
216 	xo_free(csv->c_leaf);
217     if (csv->c_path_buf)
218 	xo_free(csv->c_path_buf);
219 }
220 
221 /*
222  * Return the element name at the top of the path stack.  This is the
223  * item that we are currently trying to match on.
224  */
225 static const char *
226 csv_path_top (csv_private_t *csv, ssize_t delta)
227 {
228     if (!(csv->c_flags & CF_HAS_PATH) || csv->c_path == NULL)
229 	return NULL;
230 
231     ssize_t cur = csv->c_path_cur + delta;
232 
233     if (cur < 0)
234 	return NULL;
235 
236     return csv->c_path[cur].pf_name;
237 }
238 
239 /*
240  * Underimplemented stack functionality
241  */
242 static inline void
243 csv_stack_push (csv_private_t *csv UNUSED, const char *name UNUSED)
244 {
245 #ifdef CSV_STACK_IS_NEEDED
246     csv->c_stack_depth += 1;
247 #endif /* CSV_STACK_IS_NEEDED */
248 }
249 
250 /*
251  * Underimplemented stack functionality
252  */
253 static inline void
254 csv_stack_pop (csv_private_t *csv UNUSED, const char *name UNUSED)
255 {
256 #ifdef CSV_STACK_IS_NEEDED
257     csv->c_stack_depth -= 1;
258 #endif /* CSV_STACK_IS_NEEDED */
259 }
260 
261 /* Flags for csv_quote_flags */
262 #define QF_NEEDS_QUOTES	(1<<0)		/* Needs to be quoted */
263 #define QF_NEEDS_ESCAPE	(1<<1)		/* Needs to be escaped */
264 
265 /*
266  * Determine how much quote processing is needed.  The details of the
267  * quoting rules are given at the top of this file.  We return a set
268  * of flags, indicating what's needed.
269  */
270 static uint32_t
271 csv_quote_flags (xo_handle_t *xop UNUSED, csv_private_t *csv UNUSED,
272 		  const char *value)
273 {
274     static const char quoted[] = "\n\r\",";
275     static const char escaped[] = "\"";
276 
277     if (csv->c_flags & CF_NO_QUOTES)	/* User doesn't want quotes */
278 	return 0;
279 
280     size_t len = strlen(value);
281     uint32_t rc = 0;
282 
283     if (strcspn(value, quoted) != len)
284 	rc |= QF_NEEDS_QUOTES;
285     else if (isspace((int) value[0]))	/* Leading whitespace */
286 	rc |= QF_NEEDS_QUOTES;
287     else if (isspace((int) value[len - 1])) /* Trailing whitespace */
288 	rc |= QF_NEEDS_QUOTES;
289 
290     if (strcspn(value, escaped) != len)
291 	rc |= QF_NEEDS_ESCAPE;
292 
293     csv_dbg(xop, csv, "csv: quote flags [%s] -> %x (%zu/%zu)\n",
294 	    value, rc, len, strcspn(value, quoted));
295 
296     return rc;
297 }
298 
299 /*
300  * Escape the string, following the rules in RFC4180
301  */
302 static void
303 csv_escape (xo_buffer_t *xbp, const char *value, size_t len)
304 {
305     const char *cp, *ep, *np;
306 
307     for (cp = value, ep = value + len; cp && cp < ep; cp = np) {
308 	np = strchr(cp, '"');
309 	if (np) {
310 	    np += 1;
311 	    xo_buf_append(xbp, cp, np - cp);
312 	    xo_buf_append(xbp, "\"", 1);
313 	} else
314 	    xo_buf_append(xbp, cp, ep - cp);
315     }
316 }
317 
318 /*
319  * Append a newline to the buffer, following the settings of the "dos"
320  * flag.
321  */
322 static void
323 csv_append_newline (xo_buffer_t *xbp, csv_private_t *csv)
324 {
325     if (csv->c_flags & CF_DOS_NEWLINE)
326 	xo_buf_append(xbp, "\r\n", 2);
327     else
328 	xo_buf_append(xbp, "\n", 1);
329 }
330 
331 /*
332  * Create a 'record' of 'fields' from our recorded leaf values.  If
333  * this is the first line and "no-header" isn't given, make a record
334  * containing the leaf names.
335  */
336 static void
337 csv_emit_record (xo_handle_t *xop, csv_private_t *csv)
338 {
339     csv_dbg(xop, csv, "csv: emit: ...\n");
340 
341     ssize_t fnum;
342     uint32_t quote_flags;
343     leaf_t *lp;
344 
345     /* If we have no data, then don't bother */
346     if (csv->c_leaf_depth == 0)
347 	return;
348 
349     if (!(csv->c_flags & (CF_HEADER_DONE | CF_NO_HEADER))) {
350 	csv->c_flags |= CF_HEADER_DONE;
351 
352 	for (fnum = 0; fnum < csv->c_leaf_depth; fnum++) {
353 	    lp = &csv->c_leaf[fnum];
354 	    const char *name = xo_buf_data(&csv->c_name_buf, lp->f_name);
355 
356 	    if (fnum != 0)
357 		xo_buf_append(&csv->c_data, ",", 1);
358 
359 	    xo_buf_append(&csv->c_data, name, strlen(name));
360 	}
361 
362 	csv_append_newline(&csv->c_data, csv);
363     }
364 
365     for (fnum = 0; fnum < csv->c_leaf_depth; fnum++) {
366 	lp = &csv->c_leaf[fnum];
367 	const char *value;
368 
369 	if (lp->f_flags & LF_HAS_VALUE) {
370 	    value = xo_buf_data(&csv->c_value_buf, lp->f_value);
371 	} else {
372 	    value = "";
373 	}
374 
375 	quote_flags = csv_quote_flags(xop, csv, value);
376 
377 	if (fnum != 0)
378 	    xo_buf_append(&csv->c_data, ",", 1);
379 
380 	if (quote_flags & QF_NEEDS_QUOTES)
381 	    xo_buf_append(&csv->c_data, "\"", 1);
382 
383 	if (quote_flags & QF_NEEDS_ESCAPE)
384 	    csv_escape(&csv->c_data, value, strlen(value));
385 	else
386 	    xo_buf_append(&csv->c_data, value, strlen(value));
387 
388 	if (quote_flags & QF_NEEDS_QUOTES)
389 	    xo_buf_append(&csv->c_data, "\"", 1);
390     }
391 
392     csv_append_newline(&csv->c_data, csv);
393 
394     /* We flush if either flush flag is set */
395     if (xo_get_flags(xop) & (XOF_FLUSH | XOF_FLUSH_LINE))
396 	xo_flush_h(xop);
397 
398     /* Clean out values from leafs */
399     for (fnum = 0; fnum < csv->c_leaf_depth; fnum++) {
400 	lp = &csv->c_leaf[fnum];
401 
402 	lp->f_flags &= ~LF_HAS_VALUE;
403 	lp->f_value = 0;
404     }
405 
406     xo_buf_reset(&csv->c_value_buf);
407 
408     /*
409      * Once we emit the first line, our set of leafs is locked and
410      * cannot be changed.
411      */
412     csv->c_flags |= CF_LEAFS_DONE;
413 }
414 
415 /*
416  * Open a "level" of hierarchy, either a container or an instance.  Look
417  * for a match in the path=x/y/z hierarchy, and ignore if not a match.
418  * If we're at the end of the path, start recording leaf values.
419  */
420 static int
421 csv_open_level (xo_handle_t *xop UNUSED, csv_private_t *csv,
422 		const char *name, int instance)
423 {
424     /* An new "open" event means we stop recording */
425     if (csv->c_flags & CF_RECORD_DATA) {
426 	csv->c_flags &= ~CF_RECORD_DATA;
427 	csv_emit_record(xop, csv);
428 	return 0;
429     }
430 
431     const char *path_top = csv_path_top(csv, 0);
432 
433     /* If the top of the stack does not match the name, then ignore */
434     if (path_top == NULL) {
435 	if (instance && !(csv->c_flags & CF_HAS_PATH)) {
436 	    csv_dbg(xop, csv, "csv: recording (no-path) ...\n");
437 	    csv->c_flags |= CF_RECORD_DATA;
438 	}
439 
440     } else if (xo_streq(path_top, name)) {
441 	csv->c_path_cur += 1;		/* Advance to next path member */
442 
443 	csv_dbg(xop, csv, "csv: match: [%s] (%zd/%zd)\n", name,
444 	       csv->c_path_cur, csv->c_path_max);
445 
446 	/* If we're all the way thru the path members, start recording */
447 	if (csv->c_path_cur == csv->c_path_max) {
448 	    csv_dbg(xop, csv, "csv: recording ...\n");
449 	    csv->c_flags |= CF_RECORD_DATA;
450 	}
451     }
452 
453     /* Push the name on the stack */
454     csv_stack_push(csv, name);
455 
456     return 0;
457 }
458 
459 /*
460  * Close a "level", either a container or an instance.
461  */
462 static int
463 csv_close_level (xo_handle_t *xop UNUSED, csv_private_t *csv, const char *name)
464 {
465     /* If we're recording, a close triggers an emit */
466     if (csv->c_flags & CF_RECORD_DATA) {
467 	csv->c_flags &= ~CF_RECORD_DATA;
468 	csv_emit_record(xop, csv);
469     }
470 
471     const char *path_top = csv_path_top(csv, -1);
472     csv_dbg(xop, csv, "csv: close: [%s] [%s] (%zd)\n", name,
473 	   path_top ?: "", csv->c_path_cur);
474 
475     /* If the top of the stack does not match the name, then ignore */
476     if (path_top != NULL && xo_streq(path_top, name)) {
477 	csv->c_path_cur -= 1;
478 	return 0;
479     }
480 
481     /* Pop the name off the stack */
482     csv_stack_pop(csv, name);
483 
484     return 0;
485 }
486 
487 /*
488  * Return the index of a given leaf in the c_leaf[] array, where we
489  * record leaf values.  If the leaf is new and we haven't stopped recording
490  * leafs, then make a new slot for it and record the name.
491  */
492 static int
493 csv_leaf_num (xo_handle_t *xop UNUSED, csv_private_t *csv,
494 	       const char *name, xo_xff_flags_t flags)
495 {
496     ssize_t fnum;
497     leaf_t *lp;
498     xo_buffer_t *xbp = &csv->c_name_buf;
499 
500     for (fnum = 0; fnum < csv->c_leaf_depth; fnum++) {
501 	lp = &csv->c_leaf[fnum];
502 
503 	const char *fname = xo_buf_data(xbp, lp->f_name);
504 	if (xo_streq(fname, name))
505 	    return fnum;
506     }
507 
508     /* If we're done with adding new leafs, then bail */
509     if (csv->c_flags & CF_LEAFS_DONE)
510 	return -1;
511 
512     /* This leaf does not exist yet, so we need to create it */
513     /* Start by checking if there's enough room */
514     if (csv->c_leaf_depth + 1 >= csv->c_leaf_max) {
515 	/* Out of room; realloc it */
516 	ssize_t new_max = csv->c_leaf_max * 2;
517 	if (new_max == 0)
518 	    new_max = C_LEAF_MAX;
519 
520 	lp = xo_realloc(csv->c_leaf, new_max * sizeof(*lp));
521 	if (lp == NULL)
522 	    return -1;			/* No luck; bail */
523 
524 	/* Zero out the new portion */
525 	bzero(&lp[csv->c_leaf_max], csv->c_leaf_max * sizeof(*lp));
526 
527 	/* Update csv data */
528 	csv->c_leaf = lp;
529 	csv->c_leaf_max = new_max;
530     }
531 
532     lp = &csv->c_leaf[csv->c_leaf_depth++];
533 #ifdef CSV_STACK_IS_NEEDED
534     lp->f_depth = csv->c_stack_depth;
535 #endif /* CSV_STACK_IS_NEEDED */
536 
537     lp->f_name = xo_buf_offset(xbp);
538 
539     char *cp = xo_buf_cur(xbp);
540     xo_buf_append(xbp, name, strlen(name) + 1);
541 
542     if (flags & XFF_KEY)
543 	lp->f_flags |= LF_KEY;
544 
545     csv_dbg(xop, csv, "csv: leaf: name: %zd [%s] [%s] %x\n",
546 	    fnum, name, cp, lp->f_flags);
547 
548     return fnum;
549 }
550 
551 /*
552  * Record a new value for a leaf
553  */
554 static void
555 csv_leaf_set (xo_handle_t *xop UNUSED, csv_private_t *csv, leaf_t *lp,
556 	       const char *value)
557 {
558     xo_buffer_t *xbp = &csv->c_value_buf;
559 
560     lp->f_value = xo_buf_offset(xbp);
561     lp->f_flags |= LF_HAS_VALUE;
562 
563     char *cp = xo_buf_cur(xbp);
564     xo_buf_append(xbp, value, strlen(value) + 1);
565 
566     csv_dbg(xop, csv, "csv: leaf: value: [%s] [%s] %x\n",
567 	    value, cp, lp->f_flags);
568 }
569 
570 /*
571  * Record the requested set of leaf names.  The input should be a set
572  * of leaf names, separated by periods.
573  */
574 static int
575 csv_record_leafs (xo_handle_t *xop, csv_private_t *csv, const char *leafs_raw)
576 {
577     char *cp, *ep, *np;
578     ssize_t len = strlen(leafs_raw);
579     char *leafs_buf = alloca(len + 1);
580 
581     memcpy(leafs_buf, leafs_raw, len + 1); /* Make local copy */
582 
583     for (cp = leafs_buf, ep = leafs_buf + len; cp && cp < ep; cp = np) {
584 	np = strchr(cp, '.');
585 	if (np)
586 	    *np++ = '\0';
587 
588 	if (*cp == '\0')		/* Skip empty names */
589 	    continue;
590 
591 	csv_dbg(xop, csv, "adding leaf: [%s]\n", cp);
592 	csv_leaf_num(xop, csv, cp, 0);
593     }
594 
595     /*
596      * Since we've been told explicitly what leafs matter, ignore the rest
597      */
598     csv->c_flags |= CF_LEAFS_DONE;
599 
600     return 0;
601 }
602 
603 /*
604  * Record the requested path elements.  The input should be a set of
605  * container or instances names, separated by slashes.
606  */
607 static int
608 csv_record_path (xo_handle_t *xop, csv_private_t *csv, const char *path_raw)
609 {
610     int count;
611     char *cp, *ep, *np;
612     ssize_t len = strlen(path_raw);
613     char *path_buf = xo_realloc(NULL, len + 1);
614 
615     memcpy(path_buf, path_raw, len + 1);
616 
617     for (cp = path_buf, ep = path_buf + len, count = 2;
618 	 cp && cp < ep; cp = np) {
619 	np = strchr(cp, '/');
620 	if (np) {
621 	    np += 1;
622 	    count += 1;
623 	}
624     }
625 
626     path_frame_t *path = xo_realloc(NULL, sizeof(path[0]) * count);
627     if (path == NULL) {
628 	xo_failure(xop, "allocation failure for path '%s'", path_buf);
629 	return -1;
630     }
631 
632     bzero(path, sizeof(path[0]) * count);
633 
634     for (count = 0, cp = path_buf; cp && cp < ep; cp = np) {
635 	path[count++].pf_name = cp;
636 
637 	np = strchr(cp, '/');
638 	if (np)
639 	    *np++ = '\0';
640 	csv_dbg(xop, csv, "path: [%s]\n", cp);
641     }
642 
643     path[count].pf_name = NULL;
644 
645     if (csv->c_path)		     /* In case two paths are given */
646 	xo_free(csv->c_path);
647     if (csv->c_path_buf)	     /* In case two paths are given */
648 	xo_free(csv->c_path_buf);
649 
650     csv->c_path_buf = path_buf;
651     csv->c_path = path;
652     csv->c_path_max = count;
653     csv->c_path_cur = 0;
654 
655     return 0;
656 }
657 
658 /*
659  * Extract the option values.  The format is:
660  *    -libxo encoder=csv:kw=val:kw=val:kw=val,pretty
661  *    -libxo encoder=csv+kw=val+kw=val+kw=val,pretty
662  */
663 static int
664 csv_options (xo_handle_t *xop, csv_private_t *csv,
665 	     const char *raw_opts, char opts_char)
666 {
667     ssize_t len = strlen(raw_opts);
668     char *options = alloca(len + 1);
669     memcpy(options, raw_opts, len);
670     options[len] = '\0';
671 
672     char *cp, *ep, *np, *vp;
673     for (cp = options, ep = options + len + 1; cp && cp < ep; cp = np) {
674 	np = strchr(cp, opts_char);
675 	if (np)
676 	    *np++ = '\0';
677 
678 	vp = strchr(cp, '=');
679 	if (vp)
680 	    *vp++ = '\0';
681 
682 	if (xo_streq(cp, "path")) {
683 	    /* Record the path */
684 	    if (vp != NULL && csv_record_path(xop, csv, vp))
685   		return -1;
686 
687 	    csv->c_flags |= CF_HAS_PATH; /* Yup, we have an explicit path now */
688 
689 	} else if (xo_streq(cp, "leafs")
690 		   || xo_streq(cp, "leaf")
691 		   || xo_streq(cp, "leaves")) {
692 	    /* Record the leafs */
693 	    if (vp != NULL && csv_record_leafs(xop, csv, vp))
694   		return -1;
695 
696 	} else if (xo_streq(cp, "no-keys")) {
697 	    csv->c_flags |= CF_NO_KEYS;
698 	} else if (xo_streq(cp, "no-header")) {
699 	    csv->c_flags |= CF_NO_HEADER;
700 	} else if (xo_streq(cp, "value-only")) {
701 	    csv->c_flags |= CF_VALUE_ONLY;
702 	} else if (xo_streq(cp, "dos")) {
703 	    csv->c_flags |= CF_DOS_NEWLINE;
704 	} else if (xo_streq(cp, "no-quotes")) {
705 	    csv->c_flags |= CF_NO_QUOTES;
706 	} else if (xo_streq(cp, "debug")) {
707 	    csv->c_flags |= CF_DEBUG;
708 	} else {
709 	    xo_warn_hc(xop, -1,
710 		       "unknown encoder option value: '%s'", cp);
711 	    return -1;
712 	}
713     }
714 
715     return 0;
716 }
717 
718 /*
719  * Handler for incoming data values.  We just record each leaf name and
720  * value.  The values are emittd when the instance is closed.
721  */
722 static int
723 csv_data (xo_handle_t *xop UNUSED, csv_private_t *csv UNUSED,
724 	  const char *name, const char *value,
725 	  xo_xof_flags_t flags)
726 {
727     csv_dbg(xop, csv, "data: [%s]=[%s] %llx\n", name, value, (unsigned long long) flags);
728 
729     if (!(csv->c_flags & CF_RECORD_DATA))
730 	return 0;
731 
732     /* Find the leaf number */
733     int fnum = csv_leaf_num(xop, csv, name, flags);
734     if (fnum < 0)
735 	return 0;			/* Don't bother recording */
736 
737     leaf_t *lp = &csv->c_leaf[fnum];
738     csv_leaf_set(xop, csv, lp, value);
739 
740     return 0;
741 }
742 
743 /*
744  * The callback from libxo, passing us operations/events as they
745  * happen.
746  */
747 static int
748 csv_handler (XO_ENCODER_HANDLER_ARGS)
749 {
750     int rc = 0;
751     csv_private_t *csv = private;
752     xo_buffer_t *xbp = csv ? &csv->c_data : NULL;
753 
754     csv_dbg(xop, csv, "op %s: [%s] [%s]\n",  xo_encoder_op_name(op),
755 	   name ?: "", value ?: "");
756     fflush(stdout);
757 
758     /* If we don't have private data, we're sunk */
759     if (csv == NULL && op != XO_OP_CREATE)
760 	return -1;
761 
762     switch (op) {
763     case XO_OP_CREATE:		/* Called when the handle is init'd */
764 	rc = csv_create(xop);
765 	break;
766 
767     case XO_OP_OPTIONS:
768 	rc = csv_options(xop, csv, value, ':');
769 	break;
770 
771     case XO_OP_OPTIONS_PLUS:
772 	rc = csv_options(xop, csv, value, '+');
773 	break;
774 
775     case XO_OP_OPEN_LIST:
776     case XO_OP_CLOSE_LIST:
777 	break;				/* Ignore these ops */
778 
779     case XO_OP_OPEN_CONTAINER:
780     case XO_OP_OPEN_LEAF_LIST:
781 	rc = csv_open_level(xop, csv, name, 0);
782 	break;
783 
784     case XO_OP_OPEN_INSTANCE:
785 	rc = csv_open_level(xop, csv, name, 1);
786 	break;
787 
788     case XO_OP_CLOSE_CONTAINER:
789     case XO_OP_CLOSE_LEAF_LIST:
790     case XO_OP_CLOSE_INSTANCE:
791 	rc = csv_close_level(xop, csv, name);
792 	break;
793 
794     case XO_OP_STRING:		   /* Quoted UTF-8 string */
795     case XO_OP_CONTENT:		   /* Other content */
796 	rc = csv_data(xop, csv, name, value, flags);
797 	break;
798 
799     case XO_OP_FINISH:		   /* Clean up function */
800 	break;
801 
802     case XO_OP_FLUSH:		   /* Clean up function */
803 	rc = write(1, xbp->xb_bufp, xbp->xb_curp - xbp->xb_bufp);
804 	if (rc > 0)
805 	    rc = 0;
806 
807 	xo_buf_reset(xbp);
808 	break;
809 
810     case XO_OP_DESTROY:		   /* Clean up function */
811 	csv_destroy(xop, csv);
812 	break;
813 
814     case XO_OP_ATTRIBUTE:	   /* Attribute name/value */
815 	break;
816 
817     case XO_OP_VERSION:		/* Version string */
818 	break;
819     }
820 
821     return rc;
822 }
823 
824 /*
825  * Callback when our encoder is loaded.
826  */
827 int
828 xo_encoder_library_init (XO_ENCODER_INIT_ARGS)
829 {
830     arg->xei_handler = csv_handler;
831     arg->xei_version = XO_ENCODER_VERSION;
832 
833     return 0;
834 }
835