xref: /freebsd/contrib/unbound/sldns/parse.c (revision e92ffd9b626833ebdbf2742c8ffddc6cd94b963e)
1 /*
2  * a generic (simple) parser. Use to parse rr's, private key
3  * information and /etc/resolv.conf files
4  *
5  * a Net::DNS like library for C
6  * LibDNS Team @ NLnet Labs
7  * (c) NLnet Labs, 2005-2006
8  * See the file LICENSE for the license
9  */
10 #include "config.h"
11 #include "sldns/parse.h"
12 #include "sldns/parseutil.h"
13 #include "sldns/sbuffer.h"
14 
15 #include <limits.h>
16 #include <strings.h>
17 
18 sldns_lookup_table sldns_directive_types[] = {
19         { LDNS_DIR_TTL, "$TTL" },
20         { LDNS_DIR_ORIGIN, "$ORIGIN" },
21         { LDNS_DIR_INCLUDE, "$INCLUDE" },
22         { 0, NULL }
23 };
24 
25 /* add max_limit here? */
26 ssize_t
27 sldns_fget_token(FILE *f, char *token, const char *delim, size_t limit)
28 {
29 	return sldns_fget_token_l(f, token, delim, limit, NULL);
30 }
31 
32 ssize_t
33 sldns_fget_token_l(FILE *f, char *token, const char *delim, size_t limit, int *line_nr)
34 {
35 	int c, prev_c;
36 	int p; /* 0 -> no parentheses seen, >0 nr of ( seen */
37 	int com, quoted;
38 	char *t;
39 	size_t i;
40 	const char *d;
41 	const char *del;
42 
43 	/* standard delimiters */
44 	if (!delim) {
45 		/* from isspace(3) */
46 		del = LDNS_PARSE_NORMAL;
47 	} else {
48 		del = delim;
49 	}
50 
51 	p = 0;
52 	i = 0;
53 	com = 0;
54 	quoted = 0;
55 	prev_c = 0;
56 	t = token;
57 	if (del[0] == '"') {
58 		quoted = 1;
59 	}
60 	while ((c = getc(f)) != EOF) {
61 		if (c == '\r') /* carriage return */
62 			c = ' ';
63 		if (c == '(' && prev_c != '\\' && !quoted) {
64 			/* this only counts for non-comments */
65 			if (com == 0) {
66 				p++;
67 			}
68 			prev_c = c;
69 			continue;
70 		}
71 
72 		if (c == ')' && prev_c != '\\' && !quoted) {
73 			/* this only counts for non-comments */
74 			if (com == 0) {
75 				p--;
76 			}
77 			prev_c = c;
78 			continue;
79 		}
80 
81 		if (p < 0) {
82 			/* more ) then ( - close off the string */
83 			*t = '\0';
84 			return 0;
85 		}
86 
87 		/* do something with comments ; */
88 		if (c == ';' && quoted == 0) {
89 			if (prev_c != '\\') {
90 				com = 1;
91 			}
92 		}
93 		if (c == '\"' && com == 0 && prev_c != '\\') {
94 			quoted = 1 - quoted;
95 		}
96 
97 		if (c == '\n' && com != 0) {
98 			/* comments */
99 			com = 0;
100 			*t = ' ';
101 			if (line_nr) {
102 				*line_nr = *line_nr + 1;
103 			}
104 			if (p == 0 && i > 0) {
105 				goto tokenread;
106 			} else {
107 				prev_c = c;
108 				continue;
109 			}
110 		}
111 
112 		if (com == 1) {
113 			*t = ' ';
114 			prev_c = c;
115 			continue;
116 		}
117 
118 		if (c == '\n' && p != 0 && t > token) {
119 			/* in parentheses */
120 			if (line_nr) {
121 				*line_nr = *line_nr + 1;
122 			}
123 			if (limit > 0 && (i+1 >= limit || (size_t)(t-token)+1 >= limit)) {
124 				*t = '\0';
125 				return -1;
126 			}
127 			*t++ = ' ';
128 			prev_c = c;
129 			continue;
130 		}
131 
132 		/* check if we hit the delim */
133 		for (d = del; *d; d++) {
134 			if (c == *d && i > 0 && prev_c != '\\' && p == 0) {
135 				if (c == '\n' && line_nr) {
136 					*line_nr = *line_nr + 1;
137 				}
138 				goto tokenread;
139 			}
140 		}
141 		if (c != '\0' && c != '\n') {
142 			i++;
143 		}
144 		/* is there space for the character and the zero after it */
145 		if (limit > 0 && (i+1 >= limit || (size_t)(t-token)+1 >= limit)) {
146 			*t = '\0';
147 			return -1;
148 		}
149 		if (c != '\0' && c != '\n') {
150 			*t++ = c;
151 		}
152 		if (c == '\n' && line_nr) {
153 			*line_nr = *line_nr + 1;
154 		}
155 		if (c == '\\' && prev_c == '\\')
156 			prev_c = 0;
157 		else	prev_c = c;
158 	}
159 	*t = '\0';
160 	if (c == EOF) {
161 		return (ssize_t)i;
162 	}
163 
164 	if (i == 0) {
165 		/* nothing read */
166 		return -1;
167 	}
168 	if (p != 0) {
169 		return -1;
170 	}
171 	return (ssize_t)i;
172 
173 tokenread:
174 	if(*del == '"')
175 		/* do not skip over quotes after the string, they are part
176 		 * of the next string.  But skip over whitespace (if needed)*/
177 		sldns_fskipcs_l(f, del+1, line_nr);
178 	else	sldns_fskipcs_l(f, del, line_nr);
179 	*t = '\0';
180 	if (p != 0) {
181 		return -1;
182 	}
183 
184 	return (ssize_t)i;
185 }
186 
187 ssize_t
188 sldns_fget_keyword_data(FILE *f, const char *keyword, const char *k_del, char *data,
189                const char *d_del, size_t data_limit)
190 {
191        return sldns_fget_keyword_data_l(f, keyword, k_del, data, d_del,
192 		       data_limit, NULL);
193 }
194 
195 ssize_t
196 sldns_fget_keyword_data_l(FILE *f, const char *keyword, const char *k_del, char *data,
197                const char *d_del, size_t data_limit, int *line_nr)
198 {
199        /* we assume: keyword|sep|data */
200        char *fkeyword;
201        ssize_t i;
202 
203        if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN)
204                return -1;
205        fkeyword = (char*)malloc(LDNS_MAX_KEYWORDLEN);
206        if(!fkeyword)
207                return -1;
208 
209        i = sldns_fget_token(f, fkeyword, k_del, LDNS_MAX_KEYWORDLEN);
210        if(i==0 || i==-1) {
211                free(fkeyword);
212                return -1;
213        }
214 
215        /* case??? i instead of strlen? */
216        if (strncmp(fkeyword, keyword, LDNS_MAX_KEYWORDLEN - 1) == 0) {
217                /* whee! */
218                /* printf("%s\n%s\n", "Matching keyword", fkeyword); */
219                i = sldns_fget_token_l(f, data, d_del, data_limit, line_nr);
220                free(fkeyword);
221                return i;
222        } else {
223                /*printf("no match for %s (read: %s)\n", keyword, fkeyword);*/
224                free(fkeyword);
225                return -1;
226        }
227 }
228 
229 int
230 sldns_bgetc(sldns_buffer *buffer)
231 {
232 	if (!sldns_buffer_available_at(buffer, buffer->_position, sizeof(uint8_t))) {
233 		sldns_buffer_set_position(buffer, sldns_buffer_limit(buffer));
234 		/* sldns_buffer_rewind(buffer);*/
235 		return EOF;
236 	}
237 	return (int)sldns_buffer_read_u8(buffer);
238 }
239 
240 ssize_t
241 sldns_bget_token(sldns_buffer *b, char *token, const char *delim, size_t limit)
242 {
243 	return sldns_bget_token_par(b, token, delim, limit, NULL, NULL);
244 }
245 
246 ssize_t
247 sldns_bget_token_par(sldns_buffer *b, char *token, const char *delim,
248 	size_t limit, int* par, const char* skipw)
249 {
250 	int c, lc;
251 	int p; /* 0 -> no parentheses seen, >0 nr of ( seen */
252 	int com, quoted;
253 	char *t;
254 	size_t i;
255 	const char *d;
256 	const char *del;
257 
258 	/* standard delimiters */
259 	if (!delim) {
260 		/* from isspace(3) */
261 		del = LDNS_PARSE_NORMAL;
262 	} else {
263 		del = delim;
264 	}
265 
266 	p = (par?*par:0);
267 	i = 0;
268 	com = 0;
269 	quoted = 0;
270 	t = token;
271 	lc = 0;
272 	if (del[0] == '"') {
273 		quoted = 1;
274 	}
275 
276 	while ((c = sldns_bgetc(b)) != EOF) {
277 		if (c == '\r') /* carriage return */
278 			c = ' ';
279 		if (c == '(' && lc != '\\' && !quoted) {
280 			/* this only counts for non-comments */
281 			if (com == 0) {
282 				if(par) (*par)++;
283 				p++;
284 			}
285 			lc = c;
286 			continue;
287 		}
288 
289 		if (c == ')' && lc != '\\' && !quoted) {
290 			/* this only counts for non-comments */
291 			if (com == 0) {
292 				if(par) (*par)--;
293 				p--;
294 			}
295 			lc = c;
296 			continue;
297 		}
298 
299 		if (p < 0) {
300 			/* more ) then ( */
301 			*t = '\0';
302 			return 0;
303 		}
304 
305 		/* do something with comments ; */
306 		if (c == ';' && quoted == 0) {
307 			if (lc != '\\') {
308 				com = 1;
309 			}
310 		}
311 		if (c == '"' && com == 0 && lc != '\\') {
312 			quoted = 1 - quoted;
313 		}
314 
315 		if (c == '\n' && com != 0) {
316 			/* comments */
317 			com = 0;
318 			*t = ' ';
319 			lc = c;
320 			continue;
321 		}
322 
323 		if (com == 1) {
324 			*t = ' ';
325 			lc = c;
326 			continue;
327 		}
328 
329 		if (c == '\n' && p != 0) {
330 			/* in parentheses */
331 			/* do not write ' ' if we want to skip spaces */
332 			if(!(skipw && (strchr(skipw, c)||strchr(skipw, ' ')))) {
333 				/* check for space for the space character and a zero delimiter after that. */
334 				if (limit > 0 && (i+1 >= limit || (size_t)(t-token)+1 >= limit)) {
335 					*t = '\0';
336 					return -1;
337 				}
338 				*t++ = ' ';
339 			}
340 			lc = c;
341 			continue;
342 		}
343 
344 		/* check to skip whitespace at start, but also after ( */
345 		if(skipw && i==0 && !com && !quoted && lc != '\\') {
346 			if(strchr(skipw, c)) {
347 				lc = c;
348 				continue;
349 			}
350 		}
351 
352 		/* check if we hit the delim */
353 		for (d = del; *d; d++) {
354 			/* we can only exit if no parens or user tracks them */
355                         if (c == *d && lc != '\\' && (p == 0 || par)) {
356 				goto tokenread;
357                         }
358 		}
359 
360 		i++;
361 		if (limit > 0 && (i+1 >= limit || (size_t)(t-token)+1 >= limit)) {
362 			*t = '\0';
363 			return -1;
364 		}
365 		*t++ = c;
366 
367 		if (c == '\\' && lc == '\\') {
368 			lc = 0;
369 		} else {
370 			lc = c;
371 		}
372 	}
373 	*t = '\0';
374 	if (i == 0) {
375 		/* nothing read */
376 		return -1;
377 	}
378 	if (!par && p != 0) {
379 		return -1;
380 	}
381 	return (ssize_t)i;
382 
383 tokenread:
384 	if(*del == '"')
385 		/* do not skip over quotes after the string, they are part
386 		 * of the next string.  But skip over whitespace (if needed)*/
387 		sldns_bskipcs(b, del+1);
388 	else 	sldns_bskipcs(b, del);
389 	*t = '\0';
390 
391 	if (!par && p != 0) {
392 		return -1;
393 	}
394 	return (ssize_t)i;
395 }
396 
397 
398 void
399 sldns_bskipcs(sldns_buffer *buffer, const char *s)
400 {
401         int found;
402         char c;
403         const char *d;
404 
405         while(sldns_buffer_available_at(buffer, buffer->_position, sizeof(char))) {
406                 c = (char) sldns_buffer_read_u8_at(buffer, buffer->_position);
407                 found = 0;
408                 for (d = s; *d; d++) {
409                         if (*d == c) {
410                                 found = 1;
411                         }
412                 }
413                 if (found && buffer->_limit > buffer->_position) {
414                         buffer->_position += sizeof(char);
415                 } else {
416                         return;
417                 }
418         }
419 }
420 
421 void
422 sldns_fskipcs(FILE *fp, const char *s)
423 {
424 	sldns_fskipcs_l(fp, s, NULL);
425 }
426 
427 void
428 sldns_fskipcs_l(FILE *fp, const char *s, int *line_nr)
429 {
430         int found;
431         int c;
432         const char *d;
433 
434 	while ((c = fgetc(fp)) != EOF) {
435 		if (line_nr && c == '\n') {
436 			*line_nr = *line_nr + 1;
437 		}
438                 found = 0;
439                 for (d = s; *d; d++) {
440                         if (*d == c) {
441                                 found = 1;
442                         }
443                 }
444 		if (!found) {
445 			/* with getc, we've read too far */
446 			ungetc(c, fp);
447 			return;
448 		}
449 	}
450 }
451 
452 ssize_t
453 sldns_bget_keyword_data(sldns_buffer *b, const char *keyword, const char *k_del, char
454 *data, const char *d_del, size_t data_limit)
455 {
456        /* we assume: keyword|sep|data */
457        char *fkeyword;
458        ssize_t i;
459 
460        if(strlen(keyword) >= LDNS_MAX_KEYWORDLEN)
461                return -1;
462        fkeyword = (char*)malloc(LDNS_MAX_KEYWORDLEN);
463        if(!fkeyword)
464                return -1; /* out of memory */
465 
466        i = sldns_bget_token(b, fkeyword, k_del, data_limit);
467        if(i==0 || i==-1) {
468                free(fkeyword);
469                return -1; /* nothing read */
470        }
471 
472        /* case??? */
473        if (strncmp(fkeyword, keyword, strlen(keyword)) == 0) {
474                free(fkeyword);
475                /* whee, the match! */
476                /* retrieve it's data */
477                i = sldns_bget_token(b, data, d_del, 0);
478                return i;
479        } else {
480                free(fkeyword);
481                return -1;
482        }
483 }
484 
485