xref: /freebsd/contrib/unbound/sldns/parse.c (revision 02e9120893770924227138ba49df1edb3896112a)
1 /*
2  * a generic (simple) parser. Use to parse rr's, private key
3  * information and /etc/resolv.conf files
4  *
5  * a Net::DNS like library for C
6  * LibDNS Team @ NLnet Labs
7  * (c) NLnet Labs, 2005-2006
8  * See the file LICENSE for the license
9  */
10 #include "config.h"
11 #include "sldns/parse.h"
12 #include "sldns/parseutil.h"
13 #include "sldns/sbuffer.h"
14 
15 #include <limits.h>
16 #include <strings.h>
17 
18 sldns_lookup_table sldns_directive_types[] = {
19         { LDNS_DIR_TTL, "$TTL" },
20         { LDNS_DIR_ORIGIN, "$ORIGIN" },
21         { LDNS_DIR_INCLUDE, "$INCLUDE" },
22         { 0, NULL }
23 };
24 
25 /* add max_limit here? */
26 ssize_t
27 sldns_fget_token(FILE *f, char *token, const char *delim, size_t limit)
28 {
29 	return sldns_fget_token_l(f, token, delim, limit, NULL);
30 }
31 
32 ssize_t
33 sldns_fget_token_l(FILE *f, char *token, const char *delim, size_t limit, int *line_nr)
34 {
35 	int c, prev_c;
36 	int p; /* 0 -> no parentheses seen, >0 nr of ( seen */
37 	int com, quoted, only_blank;
38 	char *t;
39 	size_t i;
40 	const char *d;
41 	const char *del;
42 
43 	/* standard delimiters */
44 	if (!delim) {
45 		/* from isspace(3) */
46 		del = LDNS_PARSE_NORMAL;
47 	} else {
48 		del = delim;
49 	}
50 
51 	p = 0;
52 	i = 0;
53 	com = 0;
54 	quoted = 0;
55 	prev_c = 0;
56 	only_blank = 1;	/* Assume we got only <blank> until now */
57 	t = token;
58 	if (del[0] == '"') {
59 		quoted = 1;
60 	}
61 	while ((c = getc(f)) != EOF) {
62 		if (c == '\r') /* carriage return */
63 			c = ' ';
64 		if (c == '(' && prev_c != '\\' && !quoted) {
65 			/* this only counts for non-comments */
66 			if (com == 0) {
67 				p++;
68 			}
69 			prev_c = c;
70 			continue;
71 		}
72 
73 		if (c == ')' && prev_c != '\\' && !quoted) {
74 			/* this only counts for non-comments */
75 			if (com == 0) {
76 				p--;
77 			}
78 			prev_c = c;
79 			continue;
80 		}
81 
82 		if (p < 0) {
83 			/* more ) then ( - close off the string */
84 			*t = '\0';
85 			return 0;
86 		}
87 
88 		/* do something with comments ; */
89 		if (c == ';' && quoted == 0) {
90 			if (prev_c != '\\') {
91 				com = 1;
92 			}
93 		}
94 		if (c == '\"' && com == 0 && prev_c != '\\') {
95 			quoted = 1 - quoted;
96 		}
97 
98 		if (c == '\n' && com != 0) {
99 			/* comments */
100 			com = 0;
101 			*t = ' ';
102 			if (line_nr) {
103 				*line_nr = *line_nr + 1;
104 			}
105 			if (only_blank && i > 0) {
106 				/* Got only <blank> so far. Reset and try
107 				 * again with the next line.
108 				 */
109 				i = 0;
110 				t = token;
111 			}
112 			if (p == 0) {
113 				/* If p != 0 then the next line is a continuation. So
114 				 * we assume that the next line starts with a blank only
115 				 * if it is actually a new line.
116 				 */
117 				only_blank = 1;	/* Assume next line starts with
118 						 * <blank>.
119 						 */
120 			}
121 			if (p == 0 && i > 0) {
122 				goto tokenread;
123 			} else {
124 				prev_c = c;
125 				continue;
126 			}
127 		}
128 
129 		if (com == 1) {
130 			*t = ' ';
131 			prev_c = c;
132 			continue;
133 		}
134 
135 		if (c == '\n' && p != 0 && t > token) {
136 			/* in parentheses */
137 			if (line_nr) {
138 				*line_nr = *line_nr + 1;
139 			}
140 			if (limit > 0 && (i+1 >= limit || (size_t)(t-token)+1 >= limit)) {
141 				*t = '\0';
142 				return -1;
143 			}
144 			*t++ = ' ';
145 			prev_c = c;
146 			continue;
147 		}
148 
149 		/* check if we hit the delim */
150 		for (d = del; *d; d++) {
151 			if (c == *d)
152 				break;
153 		}
154 
155 		if (c == *d && i > 0 && prev_c != '\\' && p == 0) {
156 			if (c == '\n' && line_nr) {
157 				*line_nr = *line_nr + 1;
158 			}
159 			if (only_blank) {
160 				/* Got only <blank> so far. Reset and
161 				 * try again with the next line.
162 				 */
163 				i = 0;
164 				t = token;
165 				only_blank = 1;
166 				prev_c = c;
167 				continue;
168 			}
169 			goto tokenread;
170 		}
171 		if (c != ' ' && c != '\t') {
172 			/* Found something that is not <blank> */
173 			only_blank= 0;
174 		}
175 		if (c != '\0' && c != '\n') {
176 			i++;
177 		}
178 		/* is there space for the character and the zero after it */
179 		if (limit > 0 && (i+1 >= limit || (size_t)(t-token)+1 >= limit)) {
180 			*t = '\0';
181 			return -1;
182 		}
183 		if (c != '\0' && c != '\n') {
184 			*t++ = c;
185 		}
186 		if (c == '\n') {
187 			if (line_nr) {
188 				*line_nr = *line_nr + 1;
189 			}
190 			only_blank = 1;	/* Assume next line starts with
191 					 * <blank>.
192 					 */
193 		}
194 		if (c == '\\' && prev_c == '\\')
195 			prev_c = 0;
196 		else	prev_c = c;
197 	}
198 	*t = '\0';
199 	if (c == EOF) {
200 		return (ssize_t)i;
201 	}
202 
203 	if (i == 0) {
204 		/* nothing read */
205 		return -1;
206 	}
207 	if (p != 0) {
208 		return -1;
209 	}
210 	return (ssize_t)i;
211 
212 tokenread:
213 	if(*del == '"')
214 		/* do not skip over quotes after the string, they are part
215 		 * of the next string.  But skip over whitespace (if needed)*/
216 		sldns_fskipcs_l(f, del+1, line_nr);
217 	else	sldns_fskipcs_l(f, del, line_nr);
218 	*t = '\0';
219 	if (p != 0) {
220 		return -1;
221 	}
222 
223 	return (ssize_t)i;
224 }
225 
226 ssize_t
227 sldns_fget_keyword_data(FILE *f, const char *keyword, const char *k_del, char *data,
228                const char *d_del, size_t data_limit)
229 {
230        return sldns_fget_keyword_data_l(f, keyword, k_del, data, d_del,
231 		       data_limit, NULL);
232 }
233 
234 ssize_t
235 sldns_fget_keyword_data_l(FILE *f, const char *keyword, const char *k_del, char *data,
236                const char *d_del, size_t data_limit, int *line_nr)
237 {
238        /* we assume: keyword|sep|data */
239        char *fkeyword;
240        ssize_t i;
241 
242        if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN)
243                return -1;
244        fkeyword = (char*)malloc(LDNS_MAX_KEYWORDLEN);
245        if(!fkeyword)
246                return -1;
247 
248        i = sldns_fget_token(f, fkeyword, k_del, LDNS_MAX_KEYWORDLEN);
249        if(i==0 || i==-1) {
250                free(fkeyword);
251                return -1;
252        }
253 
254        /* case??? i instead of strlen? */
255        if (strncmp(fkeyword, keyword, LDNS_MAX_KEYWORDLEN - 1) == 0) {
256                /* whee! */
257                /* printf("%s\n%s\n", "Matching keyword", fkeyword); */
258                i = sldns_fget_token_l(f, data, d_del, data_limit, line_nr);
259                free(fkeyword);
260                return i;
261        } else {
262                /*printf("no match for %s (read: %s)\n", keyword, fkeyword);*/
263                free(fkeyword);
264                return -1;
265        }
266 }
267 
268 int
269 sldns_bgetc(sldns_buffer *buffer)
270 {
271 	if (!sldns_buffer_available_at(buffer, buffer->_position, sizeof(uint8_t))) {
272 		sldns_buffer_set_position(buffer, sldns_buffer_limit(buffer));
273 		/* sldns_buffer_rewind(buffer);*/
274 		return EOF;
275 	}
276 	return (int)sldns_buffer_read_u8(buffer);
277 }
278 
279 ssize_t
280 sldns_bget_token(sldns_buffer *b, char *token, const char *delim, size_t limit)
281 {
282 	return sldns_bget_token_par(b, token, delim, limit, NULL, NULL);
283 }
284 
285 ssize_t
286 sldns_bget_token_par(sldns_buffer *b, char *token, const char *delim,
287 	size_t limit, int* par, const char* skipw)
288 {
289 	int c, lc;
290 	int p; /* 0 -> no parentheses seen, >0 nr of ( seen */
291 	int com, quoted;
292 	char *t;
293 	size_t i;
294 	const char *d;
295 	const char *del;
296 
297 	/* standard delimiters */
298 	if (!delim) {
299 		/* from isspace(3) */
300 		del = LDNS_PARSE_NORMAL;
301 	} else {
302 		del = delim;
303 	}
304 
305 	p = (par?*par:0);
306 	i = 0;
307 	com = 0;
308 	quoted = 0;
309 	t = token;
310 	lc = 0;
311 	if (del[0] == '"') {
312 		quoted = 1;
313 	}
314 
315 	while ((c = sldns_bgetc(b)) != EOF) {
316 		if (c == '\r') /* carriage return */
317 			c = ' ';
318 		if (c == '(' && lc != '\\' && !quoted) {
319 			/* this only counts for non-comments */
320 			if (com == 0) {
321 				if(par) (*par)++;
322 				p++;
323 			}
324 			lc = c;
325 			continue;
326 		}
327 
328 		if (c == ')' && lc != '\\' && !quoted) {
329 			/* this only counts for non-comments */
330 			if (com == 0) {
331 				if(par) (*par)--;
332 				p--;
333 			}
334 			lc = c;
335 			continue;
336 		}
337 
338 		if (p < 0) {
339 			/* more ) then ( */
340 			*t = '\0';
341 			return 0;
342 		}
343 
344 		/* do something with comments ; */
345 		if (c == ';' && quoted == 0) {
346 			if (lc != '\\') {
347 				com = 1;
348 			}
349 		}
350 		if (c == '"' && com == 0 && lc != '\\') {
351 			quoted = 1 - quoted;
352 		}
353 
354 		if (c == '\n' && com != 0) {
355 			/* comments */
356 			com = 0;
357 			*t = ' ';
358 			lc = c;
359 			continue;
360 		}
361 
362 		if (com == 1) {
363 			*t = ' ';
364 			lc = c;
365 			continue;
366 		}
367 
368 		if (c == '\n' && p != 0) {
369 			/* in parentheses */
370 			/* do not write ' ' if we want to skip spaces */
371 			if(!(skipw && (strchr(skipw, c)||strchr(skipw, ' ')))) {
372 				/* check for space for the space character and a zero delimiter after that. */
373 				if (limit > 0 && (i+1 >= limit || (size_t)(t-token)+1 >= limit)) {
374 					*t = '\0';
375 					return -1;
376 				}
377 				*t++ = ' ';
378 			}
379 			lc = c;
380 			continue;
381 		}
382 
383 		/* check to skip whitespace at start, but also after ( */
384 		if(skipw && i==0 && !com && !quoted && lc != '\\') {
385 			if(strchr(skipw, c)) {
386 				lc = c;
387 				continue;
388 			}
389 		}
390 
391 		/* check if we hit the delim */
392 		for (d = del; *d; d++) {
393 			/* we can only exit if no parens or user tracks them */
394                         if (c == *d && lc != '\\' && (p == 0 || par)) {
395 				goto tokenread;
396                         }
397 		}
398 
399 		i++;
400 		if (limit > 0 && (i+1 >= limit || (size_t)(t-token)+1 >= limit)) {
401 			*t = '\0';
402 			return -1;
403 		}
404 		*t++ = c;
405 
406 		if (c == '\\' && lc == '\\') {
407 			lc = 0;
408 		} else {
409 			lc = c;
410 		}
411 	}
412 	*t = '\0';
413 	if (i == 0) {
414 		/* nothing read */
415 		return -1;
416 	}
417 	if (!par && p != 0) {
418 		return -1;
419 	}
420 	return (ssize_t)i;
421 
422 tokenread:
423 	if(*del == '"')
424 		/* do not skip over quotes after the string, they are part
425 		 * of the next string.  But skip over whitespace (if needed)*/
426 		sldns_bskipcs(b, del+1);
427 	else 	sldns_bskipcs(b, del);
428 	*t = '\0';
429 
430 	if (!par && p != 0) {
431 		return -1;
432 	}
433 	return (ssize_t)i;
434 }
435 
436 
437 void
438 sldns_bskipcs(sldns_buffer *buffer, const char *s)
439 {
440         int found;
441         char c;
442         const char *d;
443 
444         while(sldns_buffer_available_at(buffer, buffer->_position, sizeof(char))) {
445                 c = (char) sldns_buffer_read_u8_at(buffer, buffer->_position);
446                 found = 0;
447                 for (d = s; *d; d++) {
448                         if (*d == c) {
449                                 found = 1;
450                         }
451                 }
452                 if (found && buffer->_limit > buffer->_position) {
453                         buffer->_position += sizeof(char);
454                 } else {
455                         return;
456                 }
457         }
458 }
459 
460 void
461 sldns_fskipcs(FILE *fp, const char *s)
462 {
463 	sldns_fskipcs_l(fp, s, NULL);
464 }
465 
466 void
467 sldns_fskipcs_l(FILE *fp, const char *s, int *line_nr)
468 {
469         int found;
470         int c;
471         const char *d;
472 
473 	while ((c = fgetc(fp)) != EOF) {
474 		if (line_nr && c == '\n') {
475 			*line_nr = *line_nr + 1;
476 		}
477                 found = 0;
478                 for (d = s; *d; d++) {
479                         if (*d == c) {
480                                 found = 1;
481                         }
482                 }
483 		if (!found) {
484 			/* with getc, we've read too far */
485 			ungetc(c, fp);
486 			return;
487 		}
488 	}
489 }
490 
491 ssize_t
492 sldns_bget_keyword_data(sldns_buffer *b, const char *keyword, const char *k_del, char
493 *data, const char *d_del, size_t data_limit)
494 {
495        /* we assume: keyword|sep|data */
496        char *fkeyword;
497        ssize_t i;
498 
499        if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN)
500                return -1;
501        fkeyword = (char*)malloc(LDNS_MAX_KEYWORDLEN);
502        if(!fkeyword)
503                return -1; /* out of memory */
504 
505        i = sldns_bget_token(b, fkeyword, k_del, data_limit);
506        if(i==0 || i==-1) {
507                free(fkeyword);
508                return -1; /* nothing read */
509        }
510 
511        /* case??? */
512        if (strncmp(fkeyword, keyword, strlen(keyword)) == 0) {
513                free(fkeyword);
514                /* whee, the match! */
515                /* retrieve it's data */
516                i = sldns_bget_token(b, data, d_del, 0);
517                return i;
518        } else {
519                free(fkeyword);
520                return -1;
521        }
522 }
523 
524