xref: /freebsd/lib/libfigpar/figpar.c (revision 22cf89c938886d14f5796fc49f9f020c23ea8eaf)
1 /*-
2  * Copyright (c) 2002-2015 Devin Teske <dteske@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 #include <sys/param.h>
29 
30 #include <ctype.h>
31 #include <errno.h>
32 #include <fcntl.h>
33 #include <fnmatch.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37 
38 #include "figpar.h"
39 #include "string_m.h"
40 
41 struct figpar_config figpar_dummy_config = {0, NULL, {0}, NULL};
42 
43 /*
44  * Search for config option (struct figpar_config) in the array of config
45  * options, returning the struct whose directive matches the given parameter.
46  * If no match is found, a pointer to the static dummy array (above) is
47  * returned.
48  *
49  * This is to eliminate dependency on the index position of an item in the
50  * array, since the index position is more apt to be changed as code grows.
51  */
52 struct figpar_config *
53 get_config_option(struct figpar_config options[], const char *directive)
54 {
55 	uint32_t n;
56 
57 	/* Check arguments */
58 	if (options == NULL || directive == NULL)
59 		return (&figpar_dummy_config);
60 
61 	/* Loop through the array, return the index of the first match */
62 	for (n = 0; options[n].directive != NULL; n++)
63 		if (strcmp(options[n].directive, directive) == 0)
64 			return (&(options[n]));
65 
66 	/* Re-initialize the dummy variable in case it was written to */
67 	figpar_dummy_config.directive	= NULL;
68 	figpar_dummy_config.type	= 0;
69 	figpar_dummy_config.action	= NULL;
70 	figpar_dummy_config.value.u_num	= 0;
71 
72 	return (&figpar_dummy_config);
73 }
74 
75 /*
76  * Parse the configuration file at `path' and execute the `action' call-back
77  * functions for any directives defined by the array of config options (first
78  * argument).
79  *
80  * For unknown directives that are encountered, you can optionally pass a
81  * call-back function for the third argument to be called for unknowns.
82  *
83  * Returns zero on success; otherwise returns -1 and errno should be consulted.
84 */
85 int
86 parse_config(struct figpar_config options[], const char *path,
87     int (*unknown)(struct figpar_config *option, uint32_t line,
88     char *directive, char *value), uint16_t processing_options)
89 {
90 	uint8_t bequals;
91 	uint8_t bsemicolon;
92 	uint8_t case_sensitive;
93 	uint8_t comment = 0;
94 	uint8_t end;
95 	uint8_t found;
96 	uint8_t have_equals = 0;
97 	uint8_t quote;
98 	uint8_t require_equals;
99 	uint8_t strict_equals;
100 	char p[2];
101 	char *directive;
102 	char *t;
103 	char *value;
104 	int error;
105 	int fd;
106 	ssize_t r = 1;
107 	uint32_t dsize;
108 	uint32_t line = 1;
109 	uint32_t n;
110 	uint32_t vsize;
111 	uint32_t x;
112 	off_t charpos;
113 	off_t curpos;
114 	char rpath[PATH_MAX];
115 
116 	/* Sanity check: if no options and no unknown function, return */
117 	if (options == NULL && unknown == NULL)
118 		return (-1);
119 
120 	/* Processing options */
121 	bequals = (processing_options & FIGPAR_BREAK_ON_EQUALS) == 0 ? 0 : 1;
122 	bsemicolon =
123 		(processing_options & FIGPAR_BREAK_ON_SEMICOLON) == 0 ? 0 : 1;
124 	case_sensitive =
125 		(processing_options & FIGPAR_CASE_SENSITIVE) == 0 ? 0 : 1;
126 	require_equals =
127 		(processing_options & FIGPAR_REQUIRE_EQUALS) == 0 ? 0 : 1;
128 	strict_equals =
129 		(processing_options & FIGPAR_STRICT_EQUALS) == 0 ? 0 : 1;
130 
131 	/* Initialize strings */
132 	directive = value = 0;
133 	vsize = dsize = 0;
134 
135 	/* Resolve the file path */
136 	if (realpath(path, rpath) == 0)
137 		return (-1);
138 
139 	/* Open the file */
140 	if ((fd = open(rpath, O_RDONLY)) < 0)
141 		return (-1);
142 
143 	/* Read the file until EOF */
144 	while (r != 0) {
145 		r = read(fd, p, 1);
146 
147 		/* skip to the beginning of a directive */
148 		while (r != 0 && (isspace(*p) || *p == '#' || comment ||
149 		    (bsemicolon && *p == ';'))) {
150 			if (*p == '#')
151 				comment = 1;
152 			else if (*p == '\n') {
153 				comment = 0;
154 				line++;
155 			}
156 			r = read(fd, p, 1);
157 		}
158 		/* Test for EOF; if EOF then no directive was found */
159 		if (r == 0) {
160 			close(fd);
161 			return (0);
162 		}
163 
164 		/* Get the current offset */
165 		curpos = lseek(fd, 0, SEEK_CUR) - 1;
166 		if (curpos == -1) {
167 			close(fd);
168 			return (-1);
169 		}
170 
171 		/* Find the length of the directive */
172 		for (n = 0; r != 0; n++) {
173 			if (isspace(*p))
174 				break;
175 			if (bequals && *p == '=') {
176 				have_equals = 1;
177 				break;
178 			}
179 			if (bsemicolon && *p == ';')
180 				break;
181 			r = read(fd, p, 1);
182 		}
183 
184 		/* Test for EOF, if EOF then no directive was found */
185 		if (n == 0 && r == 0) {
186 			close(fd);
187 			return (0);
188 		}
189 
190 		/* Go back to the beginning of the directive */
191 		error = (int)lseek(fd, curpos, SEEK_SET);
192 		if (error == (curpos - 1)) {
193 			close(fd);
194 			return (-1);
195 		}
196 
197 		/* Allocate and read the directive into memory */
198 		if (n > dsize) {
199 			if ((directive = realloc(directive, n + 1)) == NULL) {
200 				close(fd);
201 				return (-1);
202 			}
203 			dsize = n;
204 		}
205 		r = read(fd, directive, n);
206 
207 		/* Advance beyond the equals sign if appropriate/desired */
208 		if (bequals && *p == '=') {
209 			if (lseek(fd, 1, SEEK_CUR) != -1)
210 				r = read(fd, p, 1);
211 			if (strict_equals && isspace(*p))
212 				*p = '\n';
213 		}
214 
215 		/* Terminate the string */
216 		directive[n] = '\0';
217 
218 		/* Convert directive to lower case before comparison */
219 		if (!case_sensitive)
220 			strtolower(directive);
221 
222 		/* Move to what may be the start of the value */
223 		if (!(bsemicolon && *p == ';') &&
224 		    !(strict_equals && *p == '=')) {
225 			while (r != 0 && isspace(*p) && *p != '\n')
226 				r = read(fd, p, 1);
227 		}
228 
229 		/* An equals sign may have stopped us, should we eat it? */
230 		if (r != 0 && bequals && *p == '=' && !strict_equals) {
231 			have_equals = 1;
232 			r = read(fd, p, 1);
233 			while (r != 0 && isspace(*p) && *p != '\n')
234 				r = read(fd, p, 1);
235 		}
236 
237 		/* If no value, allocate a dummy value and jump to action */
238 		if (r == 0 || *p == '\n' || *p == '#' ||
239 		    (bsemicolon && *p == ';')) {
240 			/* Initialize the value if not already done */
241 			if (value == NULL && (value = malloc(1)) == NULL) {
242 				close(fd);
243 				return (-1);
244 			}
245 			value[0] = '\0';
246 			goto call_function;
247 		}
248 
249 		/* Get the current offset */
250 		curpos = lseek(fd, 0, SEEK_CUR) - 1;
251 		if (curpos == -1) {
252 			close(fd);
253 			return (-1);
254 		}
255 
256 		/* Find the end of the value */
257 		quote = 0;
258 		end = 0;
259 		while (r != 0 && end == 0) {
260 			/* Advance to the next character if we know we can */
261 			if (*p != '\"' && *p != '#' && *p != '\n' &&
262 			    (!bsemicolon || *p != ';')) {
263 				r = read(fd, p, 1);
264 				continue;
265 			}
266 
267 			/*
268 			 * If we get this far, we've hit an end-key
269 			 */
270 
271 			/* Get the current offset */
272 			charpos = lseek(fd, 0, SEEK_CUR) - 1;
273 			if (charpos == -1) {
274 				close(fd);
275 				return (-1);
276 			}
277 
278 			/*
279 			 * Go back so we can read the character before the key
280 			 * to check if the character is escaped (which means we
281 			 * should continue).
282 			 */
283 			error = (int)lseek(fd, -2, SEEK_CUR);
284 			if (error == -3) {
285 				close(fd);
286 				return (-1);
287 			}
288 			r = read(fd, p, 1);
289 
290 			/*
291 			 * Count how many backslashes there are (an odd number
292 			 * means the key is escaped, even means otherwise).
293 			 */
294 			for (n = 1; *p == '\\'; n++) {
295 				/* Move back another offset to read */
296 				error = (int)lseek(fd, -2, SEEK_CUR);
297 				if (error == -3) {
298 					close(fd);
299 					return (-1);
300 				}
301 				r = read(fd, p, 1);
302 			}
303 
304 			/* Move offset back to the key and read it */
305 			error = (int)lseek(fd, charpos, SEEK_SET);
306 			if (error == (charpos - 1)) {
307 				close(fd);
308 				return (-1);
309 			}
310 			r = read(fd, p, 1);
311 
312 			/*
313 			 * If an even number of backslashes was counted meaning
314 			 * key is not escaped, we should evaluate what to do.
315 			 */
316 			if ((n & 1) == 1) {
317 				switch (*p) {
318 				case '\"':
319 					/*
320 				 	 * Flag current sequence of characters
321 					 * to follow as being quoted (hashes
322 					 * are not considered comments).
323 					 */
324 					quote = !quote;
325 					break;
326 				case '#':
327 					/*
328 					 * If we aren't in a quoted series, we
329 					 * just hit an inline comment and have
330 					 * found the end of the value.
331 					 */
332 					if (!quote)
333 						end = 1;
334 					break;
335 				case '\n':
336 					/*
337 					 * Newline characters must always be
338 					 * escaped, whether inside a quoted
339 					 * series or not, otherwise they
340 					 * terminate the value.
341 					 */
342 					end = 1;
343 				case ';':
344 					if (!quote && bsemicolon)
345 						end = 1;
346 					break;
347 				}
348 			} else if (*p == '\n')
349 				/* Escaped newline character. increment */
350 				line++;
351 
352 			/* Advance to the next character */
353 			r = read(fd, p, 1);
354 		}
355 
356 		/* Get the current offset */
357 		charpos = lseek(fd, 0, SEEK_CUR) - 1;
358 		if (charpos == -1) {
359 			close(fd);
360 			return (-1);
361 		}
362 
363 		/* Get the length of the value */
364 		n = (uint32_t)(charpos - curpos);
365 		if (r != 0) /* more to read, but don't read ending key */
366 			n--;
367 
368 		/* Move offset back to the beginning of the value */
369 		error = (int)lseek(fd, curpos, SEEK_SET);
370 		if (error == (curpos - 1)) {
371 			close(fd);
372 			return (-1);
373 		}
374 
375 		/* Allocate and read the value into memory */
376 		if (n > vsize) {
377 			if ((value = realloc(value, n + 1)) == NULL) {
378 				close(fd);
379 				return (-1);
380 			}
381 			vsize = n;
382 		}
383 		r = read(fd, value, n);
384 
385 		/* Terminate the string */
386 		value[n] = '\0';
387 
388 		/* Cut trailing whitespace off by termination */
389 		t = value + n;
390 		while (isspace(*--t))
391 			*t = '\0';
392 
393 		/* Escape the escaped quotes (replaceall is in string_m.c) */
394 		x = strcount(value, "\\\""); /* in string_m.c */
395 		if (x != 0 && (n + x) > vsize) {
396 			if ((value = realloc(value, n + x + 1)) == NULL) {
397 				close(fd);
398 				return (-1);
399 			}
400 			vsize = n + x;
401 		}
402 		if (replaceall(value, "\\\"", "\\\\\"") < 0) {
403 			/* Replace operation failed for some unknown reason */
404 			close(fd);
405 			return (-1);
406 		}
407 
408 		/* Remove all new line characters */
409 		if (replaceall(value, "\\\n", "") < 0) {
410 			/* Replace operation failed for some unknown reason */
411 			close(fd);
412 			return (-1);
413 		}
414 
415 		/* Resolve escape sequences */
416 		strexpand(value); /* in string_m.c */
417 
418 call_function:
419 		/* Abort if we're seeking only assignments */
420 		if (require_equals && !have_equals)
421 			return (-1);
422 
423 		found = have_equals = 0; /* reset */
424 
425 		/* If there are no options defined, call unknown and loop */
426 		if (options == NULL && unknown != NULL) {
427 			error = unknown(NULL, line, directive, value);
428 			if (error != 0) {
429 				close(fd);
430 				return (error);
431 			}
432 			continue;
433 		}
434 
435 		/* Loop through the array looking for a match for the value */
436 		for (n = 0; options[n].directive != NULL; n++) {
437 			error = fnmatch(options[n].directive, directive,
438 			    FNM_NOESCAPE);
439 			if (error == 0) {
440 				found = 1;
441 				/* Call function for array index item */
442 				if (options[n].action != NULL) {
443 					error = options[n].action(
444 					    &options[n],
445 					    line, directive, value);
446 					if (error != 0) {
447 						close(fd);
448 						return (error);
449 					}
450 				}
451 			} else if (error != FNM_NOMATCH) {
452 				/* An error has occurred */
453 				close(fd);
454 				return (-1);
455 			}
456 		}
457 		if (!found && unknown != NULL) {
458 			/*
459 			 * No match was found for the value we read from the
460 			 * file; call function designated for unknown values.
461 			 */
462 			error = unknown(NULL, line, directive, value);
463 			if (error != 0) {
464 				close(fd);
465 				return (error);
466 			}
467 		}
468 	}
469 
470 	close(fd);
471 	return (0);
472 }
473