xref: /freebsd/lib/libfigpar/figpar.c (revision 53120fbb68952b7d620c2c0e1cf05c5017fc1b27)
1 /*-
2  * Copyright (c) 2002-2015 Devin Teske <dteske@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/param.h>
28 
29 #include <ctype.h>
30 #include <errno.h>
31 #include <fcntl.h>
32 #include <fnmatch.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 
37 #include "figpar.h"
38 #include "string_m.h"
39 
40 struct figpar_config figpar_dummy_config = {0, NULL, {0}, NULL};
41 
42 /*
43  * Search for config option (struct figpar_config) in the array of config
44  * options, returning the struct whose directive matches the given parameter.
45  * If no match is found, a pointer to the static dummy array (above) is
46  * returned.
47  *
48  * This is to eliminate dependency on the index position of an item in the
49  * array, since the index position is more apt to be changed as code grows.
50  */
51 struct figpar_config *
52 get_config_option(struct figpar_config options[], const char *directive)
53 {
54 	uint32_t n;
55 
56 	/* Check arguments */
57 	if (options == NULL || directive == NULL)
58 		return (&figpar_dummy_config);
59 
60 	/* Loop through the array, return the index of the first match */
61 	for (n = 0; options[n].directive != NULL; n++)
62 		if (strcmp(options[n].directive, directive) == 0)
63 			return (&(options[n]));
64 
65 	/* Re-initialize the dummy variable in case it was written to */
66 	figpar_dummy_config.directive	= NULL;
67 	figpar_dummy_config.type	= 0;
68 	figpar_dummy_config.action	= NULL;
69 	figpar_dummy_config.value.u_num	= 0;
70 
71 	return (&figpar_dummy_config);
72 }
73 
74 /*
75  * Parse the configuration file at `path' and execute the `action' call-back
76  * functions for any directives defined by the array of config options (first
77  * argument).
78  *
79  * For unknown directives that are encountered, you can optionally pass a
80  * call-back function for the third argument to be called for unknowns.
81  *
82  * Returns zero on success; otherwise returns -1 and errno should be consulted.
83 */
84 int
85 parse_config(struct figpar_config options[], const char *path,
86     int (*unknown)(struct figpar_config *option, uint32_t line,
87     char *directive, char *value), uint16_t processing_options)
88 {
89 	uint8_t bequals;
90 	uint8_t bsemicolon;
91 	uint8_t case_sensitive;
92 	uint8_t comment = 0;
93 	uint8_t end;
94 	uint8_t found;
95 	uint8_t have_equals = 0;
96 	uint8_t quote;
97 	uint8_t require_equals;
98 	uint8_t strict_equals;
99 	char p[2];
100 	char *directive;
101 	char *t;
102 	char *value;
103 	int error;
104 	int fd;
105 	ssize_t r = 1;
106 	uint32_t dsize;
107 	uint32_t line = 1;
108 	uint32_t n;
109 	uint32_t vsize;
110 	uint32_t x;
111 	off_t charpos;
112 	off_t curpos;
113 	char rpath[PATH_MAX];
114 
115 	/* Sanity check: if no options and no unknown function, return */
116 	if (options == NULL && unknown == NULL)
117 		return (-1);
118 
119 	/* Processing options */
120 	bequals = (processing_options & FIGPAR_BREAK_ON_EQUALS) == 0 ? 0 : 1;
121 	bsemicolon =
122 		(processing_options & FIGPAR_BREAK_ON_SEMICOLON) == 0 ? 0 : 1;
123 	case_sensitive =
124 		(processing_options & FIGPAR_CASE_SENSITIVE) == 0 ? 0 : 1;
125 	require_equals =
126 		(processing_options & FIGPAR_REQUIRE_EQUALS) == 0 ? 0 : 1;
127 	strict_equals =
128 		(processing_options & FIGPAR_STRICT_EQUALS) == 0 ? 0 : 1;
129 
130 	/* Initialize strings */
131 	directive = value = 0;
132 	vsize = dsize = 0;
133 
134 	/* Resolve the file path */
135 	if (realpath(path, rpath) == 0)
136 		return (-1);
137 
138 	/* Open the file */
139 	if ((fd = open(rpath, O_RDONLY)) < 0)
140 		return (-1);
141 
142 	/* Read the file until EOF */
143 	while (r != 0) {
144 		r = read(fd, p, 1);
145 
146 		/* skip to the beginning of a directive */
147 		while (r != 0 && (isspace(*p) || *p == '#' || comment ||
148 		    (bsemicolon && *p == ';'))) {
149 			if (*p == '#')
150 				comment = 1;
151 			else if (*p == '\n') {
152 				comment = 0;
153 				line++;
154 			}
155 			r = read(fd, p, 1);
156 		}
157 		/* Test for EOF; if EOF then no directive was found */
158 		if (r == 0) {
159 			close(fd);
160 			return (0);
161 		}
162 
163 		/* Get the current offset */
164 		curpos = lseek(fd, 0, SEEK_CUR) - 1;
165 		if (curpos == -1) {
166 			close(fd);
167 			return (-1);
168 		}
169 
170 		/* Find the length of the directive */
171 		for (n = 0; r != 0; n++) {
172 			if (isspace(*p))
173 				break;
174 			if (bequals && *p == '=') {
175 				have_equals = 1;
176 				break;
177 			}
178 			if (bsemicolon && *p == ';')
179 				break;
180 			r = read(fd, p, 1);
181 		}
182 
183 		/* Test for EOF, if EOF then no directive was found */
184 		if (n == 0 && r == 0) {
185 			close(fd);
186 			return (0);
187 		}
188 
189 		/* Go back to the beginning of the directive */
190 		error = (int)lseek(fd, curpos, SEEK_SET);
191 		if (error == (curpos - 1)) {
192 			close(fd);
193 			return (-1);
194 		}
195 
196 		/* Allocate and read the directive into memory */
197 		if (n > dsize) {
198 			if ((directive = realloc(directive, n + 1)) == NULL) {
199 				close(fd);
200 				return (-1);
201 			}
202 			dsize = n;
203 		}
204 		r = read(fd, directive, n);
205 
206 		/* Advance beyond the equals sign if appropriate/desired */
207 		if (bequals && *p == '=') {
208 			if (lseek(fd, 1, SEEK_CUR) != -1)
209 				r = read(fd, p, 1);
210 			if (strict_equals && isspace(*p))
211 				*p = '\n';
212 		}
213 
214 		/* Terminate the string */
215 		directive[n] = '\0';
216 
217 		/* Convert directive to lower case before comparison */
218 		if (!case_sensitive)
219 			strtolower(directive);
220 
221 		/* Move to what may be the start of the value */
222 		if (!(bsemicolon && *p == ';') &&
223 		    !(strict_equals && *p == '=')) {
224 			while (r != 0 && isspace(*p) && *p != '\n')
225 				r = read(fd, p, 1);
226 		}
227 
228 		/* An equals sign may have stopped us, should we eat it? */
229 		if (r != 0 && bequals && *p == '=' && !strict_equals) {
230 			have_equals = 1;
231 			r = read(fd, p, 1);
232 			while (r != 0 && isspace(*p) && *p != '\n')
233 				r = read(fd, p, 1);
234 		}
235 
236 		/* If no value, allocate a dummy value and jump to action */
237 		if (r == 0 || *p == '\n' || *p == '#' ||
238 		    (bsemicolon && *p == ';')) {
239 			/* Initialize the value if not already done */
240 			if (value == NULL && (value = malloc(1)) == NULL) {
241 				close(fd);
242 				return (-1);
243 			}
244 			value[0] = '\0';
245 			goto call_function;
246 		}
247 
248 		/* Get the current offset */
249 		curpos = lseek(fd, 0, SEEK_CUR) - 1;
250 		if (curpos == -1) {
251 			close(fd);
252 			return (-1);
253 		}
254 
255 		/* Find the end of the value */
256 		quote = 0;
257 		end = 0;
258 		while (r != 0 && end == 0) {
259 			/* Advance to the next character if we know we can */
260 			if (*p != '\"' && *p != '#' && *p != '\n' &&
261 			    (!bsemicolon || *p != ';')) {
262 				r = read(fd, p, 1);
263 				continue;
264 			}
265 
266 			/*
267 			 * If we get this far, we've hit an end-key
268 			 */
269 
270 			/* Get the current offset */
271 			charpos = lseek(fd, 0, SEEK_CUR) - 1;
272 			if (charpos == -1) {
273 				close(fd);
274 				return (-1);
275 			}
276 
277 			/*
278 			 * Go back so we can read the character before the key
279 			 * to check if the character is escaped (which means we
280 			 * should continue).
281 			 */
282 			error = (int)lseek(fd, -2, SEEK_CUR);
283 			if (error == -3) {
284 				close(fd);
285 				return (-1);
286 			}
287 			r = read(fd, p, 1);
288 
289 			/*
290 			 * Count how many backslashes there are (an odd number
291 			 * means the key is escaped, even means otherwise).
292 			 */
293 			for (n = 1; *p == '\\'; n++) {
294 				/* Move back another offset to read */
295 				error = (int)lseek(fd, -2, SEEK_CUR);
296 				if (error == -3) {
297 					close(fd);
298 					return (-1);
299 				}
300 				r = read(fd, p, 1);
301 			}
302 
303 			/* Move offset back to the key and read it */
304 			error = (int)lseek(fd, charpos, SEEK_SET);
305 			if (error == (charpos - 1)) {
306 				close(fd);
307 				return (-1);
308 			}
309 			r = read(fd, p, 1);
310 
311 			/*
312 			 * If an even number of backslashes was counted meaning
313 			 * key is not escaped, we should evaluate what to do.
314 			 */
315 			if ((n & 1) == 1) {
316 				switch (*p) {
317 				case '\"':
318 					/*
319 				 	 * Flag current sequence of characters
320 					 * to follow as being quoted (hashes
321 					 * are not considered comments).
322 					 */
323 					quote = !quote;
324 					break;
325 				case '#':
326 					/*
327 					 * If we aren't in a quoted series, we
328 					 * just hit an inline comment and have
329 					 * found the end of the value.
330 					 */
331 					if (!quote)
332 						end = 1;
333 					break;
334 				case '\n':
335 					/*
336 					 * Newline characters must always be
337 					 * escaped, whether inside a quoted
338 					 * series or not, otherwise they
339 					 * terminate the value.
340 					 */
341 					end = 1;
342 				case ';':
343 					if (!quote && bsemicolon)
344 						end = 1;
345 					break;
346 				}
347 			} else if (*p == '\n')
348 				/* Escaped newline character. increment */
349 				line++;
350 
351 			/* Advance to the next character */
352 			r = read(fd, p, 1);
353 		}
354 
355 		/* Get the current offset */
356 		charpos = lseek(fd, 0, SEEK_CUR) - 1;
357 		if (charpos == -1) {
358 			close(fd);
359 			return (-1);
360 		}
361 
362 		/* Get the length of the value */
363 		n = (uint32_t)(charpos - curpos);
364 		if (r != 0) /* more to read, but don't read ending key */
365 			n--;
366 
367 		/* Move offset back to the beginning of the value */
368 		error = (int)lseek(fd, curpos, SEEK_SET);
369 		if (error == (curpos - 1)) {
370 			close(fd);
371 			return (-1);
372 		}
373 
374 		/* Allocate and read the value into memory */
375 		if (n > vsize) {
376 			if ((value = realloc(value, n + 1)) == NULL) {
377 				close(fd);
378 				return (-1);
379 			}
380 			vsize = n;
381 		}
382 		r = read(fd, value, n);
383 
384 		/* Terminate the string */
385 		value[n] = '\0';
386 
387 		/* Cut trailing whitespace off by termination */
388 		t = value + n;
389 		while (isspace(*--t))
390 			*t = '\0';
391 
392 		/* Escape the escaped quotes (replaceall is in string_m.c) */
393 		x = strcount(value, "\\\""); /* in string_m.c */
394 		if (x != 0 && (n + x) > vsize) {
395 			if ((value = realloc(value, n + x + 1)) == NULL) {
396 				close(fd);
397 				return (-1);
398 			}
399 			vsize = n + x;
400 		}
401 		if (replaceall(value, "\\\"", "\\\\\"") < 0) {
402 			/* Replace operation failed for some unknown reason */
403 			close(fd);
404 			return (-1);
405 		}
406 
407 		/* Remove all new line characters */
408 		if (replaceall(value, "\\\n", "") < 0) {
409 			/* Replace operation failed for some unknown reason */
410 			close(fd);
411 			return (-1);
412 		}
413 
414 		/* Resolve escape sequences */
415 		strexpand(value); /* in string_m.c */
416 
417 call_function:
418 		/* Abort if we're seeking only assignments */
419 		if (require_equals && !have_equals)
420 			return (-1);
421 
422 		found = have_equals = 0; /* reset */
423 
424 		/* If there are no options defined, call unknown and loop */
425 		if (options == NULL && unknown != NULL) {
426 			error = unknown(NULL, line, directive, value);
427 			if (error != 0) {
428 				close(fd);
429 				return (error);
430 			}
431 			continue;
432 		}
433 
434 		/* Loop through the array looking for a match for the value */
435 		for (n = 0; options[n].directive != NULL; n++) {
436 			error = fnmatch(options[n].directive, directive,
437 			    FNM_NOESCAPE);
438 			if (error == 0) {
439 				found = 1;
440 				/* Call function for array index item */
441 				if (options[n].action != NULL) {
442 					error = options[n].action(
443 					    &options[n],
444 					    line, directive, value);
445 					if (error != 0) {
446 						close(fd);
447 						return (error);
448 					}
449 				}
450 			} else if (error != FNM_NOMATCH) {
451 				/* An error has occurred */
452 				close(fd);
453 				return (-1);
454 			}
455 		}
456 		if (!found && unknown != NULL) {
457 			/*
458 			 * No match was found for the value we read from the
459 			 * file; call function designated for unknown values.
460 			 */
461 			error = unknown(NULL, line, directive, value);
462 			if (error != 0) {
463 				close(fd);
464 				return (error);
465 			}
466 		}
467 	}
468 
469 	close(fd);
470 	return (0);
471 }
472