xref: /freebsd/contrib/unbound/iterator/iter_scrub.c (revision 3a56015a2f5d630910177fa79a522bb95511ccf7)
1 /*
2  * iterator/iter_scrub.c - scrubbing, normalization, sanitization of DNS msgs.
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /**
37  * \file
38  *
39  * This file has routine(s) for cleaning up incoming DNS messages from
40  * possible useless or malicious junk in it.
41  */
42 #include "config.h"
43 #include "iterator/iter_scrub.h"
44 #include "iterator/iterator.h"
45 #include "iterator/iter_priv.h"
46 #include "services/cache/rrset.h"
47 #include "util/log.h"
48 #include "util/net_help.h"
49 #include "util/regional.h"
50 #include "util/config_file.h"
51 #include "util/module.h"
52 #include "util/data/msgparse.h"
53 #include "util/data/dname.h"
54 #include "util/data/msgreply.h"
55 #include "util/alloc.h"
56 #include "sldns/sbuffer.h"
57 
58 /** RRset flag used during scrubbing. The RRset is OK. */
59 #define RRSET_SCRUB_OK	0x80
60 
61 /** remove rrset, update loop variables */
62 static void
63 remove_rrset(const char* str, sldns_buffer* pkt, struct msg_parse* msg,
64 	struct rrset_parse* prev, struct rrset_parse** rrset)
65 {
66 	if(verbosity >= VERB_QUERY && str
67 		&& (*rrset)->dname_len <= LDNS_MAX_DOMAINLEN) {
68 		uint8_t buf[LDNS_MAX_DOMAINLEN+1];
69 		dname_pkt_copy(pkt, buf, (*rrset)->dname);
70 		log_nametypeclass(VERB_QUERY, str, buf,
71 			(*rrset)->type, ntohs((*rrset)->rrset_class));
72 	}
73 	if(prev)
74 		prev->rrset_all_next = (*rrset)->rrset_all_next;
75 	else	msg->rrset_first = (*rrset)->rrset_all_next;
76 	if(msg->rrset_last == *rrset)
77 		msg->rrset_last = prev;
78 	msg->rrset_count --;
79 	switch((*rrset)->section) {
80 		case LDNS_SECTION_ANSWER: msg->an_rrsets--; break;
81 		case LDNS_SECTION_AUTHORITY: msg->ns_rrsets--; break;
82 		case LDNS_SECTION_ADDITIONAL: msg->ar_rrsets--; break;
83 		default: log_assert(0);
84 	}
85 	msgparse_bucket_remove(msg, *rrset);
86 	*rrset = (*rrset)->rrset_all_next;
87 }
88 
89 /** return true if rr type has additional names in it */
90 static int
91 has_additional(uint16_t t)
92 {
93 	switch(t) {
94 		case LDNS_RR_TYPE_MB:
95 		case LDNS_RR_TYPE_MD:
96 		case LDNS_RR_TYPE_MF:
97 		case LDNS_RR_TYPE_NS:
98 		case LDNS_RR_TYPE_MX:
99 		case LDNS_RR_TYPE_KX:
100 		case LDNS_RR_TYPE_SRV:
101 			return 1;
102 		case LDNS_RR_TYPE_NAPTR:
103 			/* TODO: NAPTR not supported, glue stripped off */
104 			return 0;
105 	}
106 	return 0;
107 }
108 
109 /** get additional name from rrset RR, return false if no name present */
110 static int
111 get_additional_name(struct rrset_parse* rrset, struct rr_parse* rr,
112 	uint8_t** nm, size_t* nmlen, sldns_buffer* pkt)
113 {
114 	size_t offset = 0;
115 	size_t len, oldpos;
116 	switch(rrset->type) {
117 		case LDNS_RR_TYPE_MB:
118 		case LDNS_RR_TYPE_MD:
119 		case LDNS_RR_TYPE_MF:
120 		case LDNS_RR_TYPE_NS:
121 			offset = 0;
122 			break;
123 		case LDNS_RR_TYPE_MX:
124 		case LDNS_RR_TYPE_KX:
125 			offset = 2;
126 			break;
127 		case LDNS_RR_TYPE_SRV:
128 			offset = 6;
129 			break;
130 		case LDNS_RR_TYPE_NAPTR:
131 			/* TODO: NAPTR not supported, glue stripped off */
132 			return 0;
133 		default:
134 			return 0;
135 	}
136 	len = sldns_read_uint16(rr->ttl_data+sizeof(uint32_t));
137 	if(len < offset+1)
138 		return 0; /* rdata field too small */
139 	*nm = rr->ttl_data+sizeof(uint32_t)+sizeof(uint16_t)+offset;
140 	oldpos = sldns_buffer_position(pkt);
141 	sldns_buffer_set_position(pkt, (size_t)(*nm - sldns_buffer_begin(pkt)));
142 	*nmlen = pkt_dname_len(pkt);
143 	sldns_buffer_set_position(pkt, oldpos);
144 	if(*nmlen == 0)
145 		return 0;
146 	return 1;
147 }
148 
149 /** Place mark on rrsets in additional section they are OK */
150 static void
151 mark_additional_rrset(sldns_buffer* pkt, struct msg_parse* msg,
152 	struct rrset_parse* rrset)
153 {
154 	/* Mark A and AAAA for NS as appropriate additional section info. */
155 	uint8_t* nm = NULL;
156 	size_t nmlen = 0;
157 	struct rr_parse* rr;
158 
159 	if(!has_additional(rrset->type))
160 		return;
161 	for(rr = rrset->rr_first; rr; rr = rr->next) {
162 		if(get_additional_name(rrset, rr, &nm, &nmlen, pkt)) {
163 			/* mark A */
164 			hashvalue_type h = pkt_hash_rrset(pkt, nm,
165 				LDNS_RR_TYPE_A, rrset->rrset_class, 0);
166 			struct rrset_parse* r = msgparse_hashtable_lookup(
167 				msg, pkt, h, 0, nm, nmlen,
168 				LDNS_RR_TYPE_A, rrset->rrset_class);
169 			if(r && r->section == LDNS_SECTION_ADDITIONAL) {
170 				r->flags |= RRSET_SCRUB_OK;
171 			}
172 
173 			/* mark AAAA */
174 			h = pkt_hash_rrset(pkt, nm, LDNS_RR_TYPE_AAAA,
175 				rrset->rrset_class, 0);
176 			r = msgparse_hashtable_lookup(msg, pkt, h, 0, nm,
177 				nmlen, LDNS_RR_TYPE_AAAA, rrset->rrset_class);
178 			if(r && r->section == LDNS_SECTION_ADDITIONAL) {
179 				r->flags |= RRSET_SCRUB_OK;
180 			}
181 		}
182 	}
183 }
184 
185 /** Get target name of a CNAME */
186 static int
187 parse_get_cname_target(struct rrset_parse* rrset, uint8_t** sname,
188 	size_t* snamelen, sldns_buffer* pkt)
189 {
190 	size_t oldpos, dlen;
191 	if(rrset->rr_count != 1) {
192 		struct rr_parse* sig;
193 		verbose(VERB_ALGO, "Found CNAME rrset with "
194 			"size > 1: %u", (unsigned)rrset->rr_count);
195 		/* use the first CNAME! */
196 		rrset->rr_count = 1;
197 		rrset->size = rrset->rr_first->size;
198 		for(sig=rrset->rrsig_first; sig; sig=sig->next)
199 			rrset->size += sig->size;
200 		rrset->rr_last = rrset->rr_first;
201 		rrset->rr_first->next = NULL;
202 	}
203 	if(rrset->rr_first->size < sizeof(uint16_t)+1)
204 		return 0; /* CNAME rdata too small */
205 	*sname = rrset->rr_first->ttl_data + sizeof(uint32_t)
206 		+ sizeof(uint16_t); /* skip ttl, rdatalen */
207 	*snamelen = rrset->rr_first->size - sizeof(uint16_t);
208 
209 	if(rrset->rr_first->outside_packet) {
210 		if(!dname_valid(*sname, *snamelen))
211 			return 0;
212 		return 1;
213 	}
214 	oldpos = sldns_buffer_position(pkt);
215 	sldns_buffer_set_position(pkt, (size_t)(*sname - sldns_buffer_begin(pkt)));
216 	dlen = pkt_dname_len(pkt);
217 	sldns_buffer_set_position(pkt, oldpos);
218 	if(dlen == 0)
219 		return 0; /* parse fail on the rdata name */
220 	*snamelen = dlen;
221 	return 1;
222 }
223 
224 /** Synthesize CNAME from DNAME, false if too long */
225 static int
226 synth_cname(uint8_t* qname, size_t qnamelen, struct rrset_parse* dname_rrset,
227 	uint8_t* alias, size_t* aliaslen, sldns_buffer* pkt)
228 {
229 	/* we already know that sname is a strict subdomain of DNAME owner */
230 	uint8_t* dtarg = NULL;
231 	size_t dtarglen;
232 	if(!parse_get_cname_target(dname_rrset, &dtarg, &dtarglen, pkt))
233 		return 0;
234 	if(qnamelen <= dname_rrset->dname_len)
235 		return 0;
236 	if(qnamelen == 0)
237 		return 0;
238 	log_assert(qnamelen > dname_rrset->dname_len);
239 	/* DNAME from com. to net. with qname example.com. -> example.net. */
240 	/* so: \3com\0 to \3net\0 and qname \7example\3com\0 */
241 	*aliaslen = qnamelen + dtarglen - dname_rrset->dname_len;
242 	if(*aliaslen > LDNS_MAX_DOMAINLEN)
243 		return 0; /* should have been RCODE YXDOMAIN */
244 	/* decompress dnames into buffer, we know it fits */
245 	dname_pkt_copy(pkt, alias, qname);
246 	dname_pkt_copy(pkt, alias+(qnamelen-dname_rrset->dname_len), dtarg);
247 	return 1;
248 }
249 
250 /** synthesize a CNAME rrset */
251 static struct rrset_parse*
252 synth_cname_rrset(uint8_t** sname, size_t* snamelen, uint8_t* alias,
253 	size_t aliaslen, struct regional* region, struct msg_parse* msg,
254 	struct rrset_parse* rrset, struct rrset_parse* prev,
255 	struct rrset_parse* nx, sldns_buffer* pkt)
256 {
257 	struct rrset_parse* cn = (struct rrset_parse*)regional_alloc(region,
258 		sizeof(struct rrset_parse));
259 	if(!cn)
260 		return NULL;
261 	memset(cn, 0, sizeof(*cn));
262 	cn->rr_first = (struct rr_parse*)regional_alloc(region,
263 		sizeof(struct rr_parse));
264 	if(!cn->rr_first)
265 		return NULL;
266 	cn->rr_last = cn->rr_first;
267 	/* CNAME from sname to alias */
268 	cn->dname = (uint8_t*)regional_alloc(region, *snamelen);
269 	if(!cn->dname)
270 		return NULL;
271 	dname_pkt_copy(pkt, cn->dname, *sname);
272 	cn->dname_len = *snamelen;
273 	cn->type = LDNS_RR_TYPE_CNAME;
274 	cn->section = rrset->section;
275 	cn->rrset_class = rrset->rrset_class;
276 	cn->rr_count = 1;
277 	cn->size = sizeof(uint16_t) + aliaslen;
278 	cn->hash=pkt_hash_rrset(pkt, cn->dname, cn->type, cn->rrset_class, 0);
279 	/* allocate TTL + rdatalen + uncompressed dname */
280 	memset(cn->rr_first, 0, sizeof(struct rr_parse));
281 	cn->rr_first->outside_packet = 1;
282 	cn->rr_first->ttl_data = (uint8_t*)regional_alloc(region,
283 		sizeof(uint32_t)+sizeof(uint16_t)+aliaslen);
284 	if(!cn->rr_first->ttl_data)
285 		return NULL;
286 	memmove(cn->rr_first->ttl_data, rrset->rr_first->ttl_data,
287 		sizeof(uint32_t)); /* RFC6672: synth CNAME TTL == DNAME TTL */
288 	sldns_write_uint16(cn->rr_first->ttl_data+4, aliaslen);
289 	memmove(cn->rr_first->ttl_data+6, alias, aliaslen);
290 	cn->rr_first->size = sizeof(uint16_t)+aliaslen;
291 
292 	/* link it in */
293 	cn->rrset_all_next = nx;
294 	if(prev)
295 		prev->rrset_all_next = cn;
296 	else	msg->rrset_first = cn;
297 	if(nx == NULL)
298 		msg->rrset_last = cn;
299 	msg->rrset_count ++;
300 	msg->an_rrsets++;
301 	/* it is not inserted in the msg hashtable. */
302 
303 	*sname = cn->rr_first->ttl_data + sizeof(uint32_t)+sizeof(uint16_t);
304 	*snamelen = aliaslen;
305 	return cn;
306 }
307 
308 /** check if DNAME applies to a name */
309 static int
310 pkt_strict_sub(sldns_buffer* pkt, uint8_t* sname, uint8_t* dr)
311 {
312 	uint8_t buf1[LDNS_MAX_DOMAINLEN+1];
313 	uint8_t buf2[LDNS_MAX_DOMAINLEN+1];
314 	/* decompress names */
315 	dname_pkt_copy(pkt, buf1, sname);
316 	dname_pkt_copy(pkt, buf2, dr);
317 	return dname_strict_subdomain_c(buf1, buf2);
318 }
319 
320 /** check subdomain with decompression */
321 static int
322 pkt_sub(sldns_buffer* pkt, uint8_t* comprname, uint8_t* zone)
323 {
324 	uint8_t buf[LDNS_MAX_DOMAINLEN+1];
325 	dname_pkt_copy(pkt, buf, comprname);
326 	return dname_subdomain_c(buf, zone);
327 }
328 
329 /** check subdomain with decompression, compressed is parent */
330 static int
331 sub_of_pkt(sldns_buffer* pkt, uint8_t* zone, uint8_t* comprname)
332 {
333 	uint8_t buf[LDNS_MAX_DOMAINLEN+1];
334 	dname_pkt_copy(pkt, buf, comprname);
335 	return dname_subdomain_c(zone, buf);
336 }
337 
338 /** Check if there are SOA records in the authority section (negative) */
339 static int
340 soa_in_auth(struct msg_parse* msg)
341 {
342 	struct rrset_parse* rrset;
343 	for(rrset = msg->rrset_first; rrset; rrset = rrset->rrset_all_next)
344 		if(rrset->type == LDNS_RR_TYPE_SOA &&
345 			rrset->section == LDNS_SECTION_AUTHORITY)
346 			return 1;
347 	return 0;
348 }
349 
350 /** Check if type is allowed in the authority section */
351 static int
352 type_allowed_in_authority_section(uint16_t tp)
353 {
354 	if(tp == LDNS_RR_TYPE_SOA || tp == LDNS_RR_TYPE_NS ||
355 		tp == LDNS_RR_TYPE_DS || tp == LDNS_RR_TYPE_NSEC ||
356 		tp == LDNS_RR_TYPE_NSEC3)
357 		return 1;
358 	return 0;
359 }
360 
361 /** Check if type is allowed in the additional section */
362 static int
363 type_allowed_in_additional_section(uint16_t tp)
364 {
365 	if(tp == LDNS_RR_TYPE_A || tp == LDNS_RR_TYPE_AAAA)
366 		return 1;
367 	return 0;
368 }
369 
370 /** Shorten RRset */
371 static void
372 shorten_rrset(sldns_buffer* pkt, struct rrset_parse* rrset, int count)
373 {
374 	/* The too large NS RRset is shortened. This is so that too large
375 	 * content does not overwhelm the cache. It may make the rrset
376 	 * bogus if it was signed, and then the domain is not resolved any
377 	 * more, that is okay, the NS RRset was too large. During a referral
378 	 * it can be shortened and then the first part of the list could
379 	 * be used to resolve. The scrub continues to disallow glue for the
380 	 * removed nameserver RRs and removes that too. Because the glue
381 	 * is not marked as okay, since the RRs have been removed here. */
382 	int i;
383 	struct rr_parse* rr = rrset->rr_first, *prev = NULL;
384 	if(!rr)
385 		return;
386 	for(i=0; i<count; i++) {
387 		prev = rr;
388 		rr = rr->next;
389 		if(!rr)
390 			return; /* The RRset is already short. */
391 	}
392 	if(verbosity >= VERB_QUERY
393 		&& rrset->dname_len <= LDNS_MAX_DOMAINLEN) {
394 		uint8_t buf[LDNS_MAX_DOMAINLEN+1];
395 		dname_pkt_copy(pkt, buf, rrset->dname);
396 		log_nametypeclass(VERB_QUERY, "normalize: shorten RRset:", buf,
397 			rrset->type, ntohs(rrset->rrset_class));
398 	}
399 	/* remove further rrs */
400 	rrset->rr_last = prev;
401 	rrset->rr_count = count;
402 	while(rr) {
403 		rrset->size -= rr->size;
404 		rr = rr->next;
405 	}
406 	if(rrset->rr_last)
407 		rrset->rr_last->next = NULL;
408 	else	rrset->rr_first = NULL;
409 }
410 
411 /**
412  * This routine normalizes a response. This includes removing "irrelevant"
413  * records from the answer and additional sections and (re)synthesizing
414  * CNAMEs from DNAMEs, if present.
415  *
416  * @param pkt: packet.
417  * @param msg: msg to normalize.
418  * @param qinfo: original query.
419  * @param region: where to allocate synthesized CNAMEs.
420  * @param env: module env with config options.
421  * @return 0 on error.
422  */
423 static int
424 scrub_normalize(sldns_buffer* pkt, struct msg_parse* msg,
425 	struct query_info* qinfo, struct regional* region,
426 	struct module_env* env)
427 {
428 	uint8_t* sname = qinfo->qname;
429 	size_t snamelen = qinfo->qname_len;
430 	struct rrset_parse* rrset, *prev, *nsset=NULL;
431 	int cname_length = 0; /* number of CNAMEs, or DNAMEs */
432 
433 	if(FLAGS_GET_RCODE(msg->flags) != LDNS_RCODE_NOERROR &&
434 		FLAGS_GET_RCODE(msg->flags) != LDNS_RCODE_NXDOMAIN)
435 		return 1;
436 
437 	/* For the ANSWER section, remove all "irrelevant" records and add
438 	 * synthesized CNAMEs from DNAMEs
439 	 * This will strip out-of-order CNAMEs as well. */
440 
441 	/* walk through the parse packet rrset list, keep track of previous
442 	 * for insert and delete ease, and examine every RRset */
443 	prev = NULL;
444 	rrset = msg->rrset_first;
445 	while(rrset && rrset->section == LDNS_SECTION_ANSWER) {
446 		if(cname_length > env->cfg->iter_scrub_cname) {
447 			/* Too many CNAMEs, or DNAMEs, from the authority
448 			 * server, scrub down the length to something
449 			 * shorter. This deletes everything after the limit
450 			 * is reached. The iterator is going to look up
451 			 * the content one by one anyway. */
452 			remove_rrset("normalize: removing because too many cnames:",
453 				pkt, msg, prev, &rrset);
454 			continue;
455 		}
456 		if(rrset->type == LDNS_RR_TYPE_DNAME &&
457 			pkt_strict_sub(pkt, sname, rrset->dname)) {
458 			/* check if next rrset is correct CNAME. else,
459 			 * synthesize a CNAME */
460 			struct rrset_parse* nx = rrset->rrset_all_next;
461 			uint8_t alias[LDNS_MAX_DOMAINLEN+1];
462 			size_t aliaslen = 0;
463 			if(rrset->rr_count != 1) {
464 				verbose(VERB_ALGO, "Found DNAME rrset with "
465 					"size > 1: %u",
466 					(unsigned)rrset->rr_count);
467 				return 0;
468 			}
469 			if(!synth_cname(sname, snamelen, rrset, alias,
470 				&aliaslen, pkt)) {
471 				verbose(VERB_ALGO, "synthesized CNAME "
472 					"too long");
473 				return 0;
474 			}
475 			cname_length++;
476 			if(nx && nx->type == LDNS_RR_TYPE_CNAME &&
477 			   dname_pkt_compare(pkt, sname, nx->dname) == 0) {
478 				/* check next cname */
479 				uint8_t* t = NULL;
480 				size_t tlen = 0;
481 				if(!parse_get_cname_target(nx, &t, &tlen, pkt))
482 					return 0;
483 				if(dname_pkt_compare(pkt, alias, t) == 0) {
484 					/* it's OK and better capitalized */
485 					prev = rrset;
486 					rrset = nx;
487 					continue;
488 				}
489 				/* synth ourselves */
490 			}
491 			/* synth a CNAME rrset */
492 			prev = synth_cname_rrset(&sname, &snamelen, alias,
493 				aliaslen, region, msg, rrset, rrset, nx, pkt);
494 			if(!prev) {
495 				log_err("out of memory synthesizing CNAME");
496 				return 0;
497 			}
498 			/* FIXME: resolve the conflict between synthesized
499 			 * CNAME ttls and the cache. */
500 			rrset = nx;
501 			continue;
502 
503 		}
504 
505 		/* The only records in the ANSWER section not allowed to */
506 		if(dname_pkt_compare(pkt, sname, rrset->dname) != 0) {
507 			remove_rrset("normalize: removing irrelevant RRset:",
508 				pkt, msg, prev, &rrset);
509 			continue;
510 		}
511 
512 		/* Follow the CNAME chain. */
513 		if(rrset->type == LDNS_RR_TYPE_CNAME) {
514 			struct rrset_parse* nx = rrset->rrset_all_next;
515 			uint8_t* oldsname = sname;
516 			cname_length++;
517 			/* see if the next one is a DNAME, if so, swap them */
518 			if(nx && nx->section == LDNS_SECTION_ANSWER &&
519 				nx->type == LDNS_RR_TYPE_DNAME &&
520 				nx->rr_count == 1 &&
521 				pkt_strict_sub(pkt, sname, nx->dname)) {
522 				/* there is a DNAME after this CNAME, it
523 				 * is in the ANSWER section, and the DNAME
524 				 * applies to the name we cover */
525 				/* check if the alias of the DNAME equals
526 				 * this CNAME */
527 				uint8_t alias[LDNS_MAX_DOMAINLEN+1];
528 				size_t aliaslen = 0;
529 				uint8_t* t = NULL;
530 				size_t tlen = 0;
531 				if(synth_cname(sname, snamelen, nx, alias,
532 					&aliaslen, pkt) &&
533 					parse_get_cname_target(rrset, &t, &tlen, pkt) &&
534 			   		dname_pkt_compare(pkt, alias, t) == 0) {
535 					/* the synthesized CNAME equals the
536 					 * current CNAME.  This CNAME is the
537 					 * one that the DNAME creates, and this
538 					 * CNAME is better capitalised */
539 					verbose(VERB_ALGO, "normalize: re-order of DNAME and its CNAME");
540 					if(prev) prev->rrset_all_next = nx;
541 					else msg->rrset_first = nx;
542 					if(nx->rrset_all_next == NULL)
543 						msg->rrset_last = rrset;
544 					rrset->rrset_all_next =
545 						nx->rrset_all_next;
546 					nx->rrset_all_next = rrset;
547 					/* prev = nx; unused, enable if there
548 					 * is other rrset removal code after
549 					 * this */
550 				}
551 			}
552 
553 			/* move to next name in CNAME chain */
554 			if(!parse_get_cname_target(rrset, &sname, &snamelen, pkt))
555 				return 0;
556 			prev = rrset;
557 			rrset = rrset->rrset_all_next;
558 			/* in CNAME ANY response, can have data after CNAME */
559 			if(qinfo->qtype == LDNS_RR_TYPE_ANY) {
560 				while(rrset && rrset->section ==
561 					LDNS_SECTION_ANSWER &&
562 					dname_pkt_compare(pkt, oldsname,
563 					rrset->dname) == 0) {
564 					if(rrset->type == LDNS_RR_TYPE_NS &&
565 						rrset->rr_count > env->cfg->iter_scrub_ns) {
566 						shorten_rrset(pkt, rrset, env->cfg->iter_scrub_ns);
567 					}
568 					prev = rrset;
569 					rrset = rrset->rrset_all_next;
570 				}
571 			}
572 			continue;
573 		}
574 
575 		/* Otherwise, make sure that the RRset matches the qtype. */
576 		if(qinfo->qtype != LDNS_RR_TYPE_ANY &&
577 			qinfo->qtype != rrset->type) {
578 			remove_rrset("normalize: removing irrelevant RRset:",
579 				pkt, msg, prev, &rrset);
580 			continue;
581 		}
582 
583 		if(rrset->type == LDNS_RR_TYPE_NS &&
584 			rrset->rr_count > env->cfg->iter_scrub_ns) {
585 			shorten_rrset(pkt, rrset, env->cfg->iter_scrub_ns);
586 		}
587 
588 		/* Mark the additional names from relevant rrset as OK. */
589 		/* only for RRsets that match the query name, other ones
590 		 * will be removed by sanitize, so no additional for them */
591 		if(dname_pkt_compare(pkt, qinfo->qname, rrset->dname) == 0)
592 			mark_additional_rrset(pkt, msg, rrset);
593 
594 		prev = rrset;
595 		rrset = rrset->rrset_all_next;
596 	}
597 
598 	/* Mark additional names from AUTHORITY */
599 	while(rrset && rrset->section == LDNS_SECTION_AUTHORITY) {
600 		/* protect internals of recursor by making sure to del these */
601 		if(rrset->type==LDNS_RR_TYPE_DNAME ||
602 			rrset->type==LDNS_RR_TYPE_CNAME ||
603 			rrset->type==LDNS_RR_TYPE_A ||
604 			rrset->type==LDNS_RR_TYPE_AAAA) {
605 			remove_rrset("normalize: removing irrelevant "
606 				"RRset:", pkt, msg, prev, &rrset);
607 			continue;
608 		}
609 		/* Allowed list of types in the authority section */
610 		if(env->cfg->harden_unknown_additional &&
611 			!type_allowed_in_authority_section(rrset->type)) {
612 			remove_rrset("normalize: removing irrelevant "
613 				"RRset:", pkt, msg, prev, &rrset);
614 			continue;
615 		}
616 		/* only one NS set allowed in authority section */
617 		if(rrset->type==LDNS_RR_TYPE_NS) {
618 			/* NS set must be pertinent to the query */
619 			if(!sub_of_pkt(pkt, qinfo->qname, rrset->dname)) {
620 				remove_rrset("normalize: removing irrelevant "
621 					"RRset:", pkt, msg, prev, &rrset);
622 				continue;
623 			}
624 			/* we don't want NS sets for NXDOMAIN answers,
625 			 * because they could contain poisonous contents,
626 			 * from. eg. fragmentation attacks, inserted after
627 			 * long RRSIGs in the packet get to the packet
628 			 * border and such */
629 			/* also for NODATA answers */
630 			if(FLAGS_GET_RCODE(msg->flags) == LDNS_RCODE_NXDOMAIN ||
631 			   (FLAGS_GET_RCODE(msg->flags) == LDNS_RCODE_NOERROR
632 			    && soa_in_auth(msg) && msg->an_rrsets == 0)) {
633 				remove_rrset("normalize: removing irrelevant "
634 					"RRset:", pkt, msg, prev, &rrset);
635 				continue;
636 			}
637 			if(nsset == NULL) {
638 				nsset = rrset;
639 			} else {
640 				remove_rrset("normalize: removing irrelevant "
641 					"RRset:", pkt, msg, prev, &rrset);
642 				continue;
643 			}
644 			if(rrset->rr_count > env->cfg->iter_scrub_ns) {
645 				/* If this is not a referral, and the NS RRset
646 				 * is signed, then remove it entirely, so
647 				 * that when it becomes bogus it does not
648 				 * make the message that is otherwise fine
649 				 * into a bogus message. */
650 				if(!(msg->an_rrsets == 0 &&
651 					FLAGS_GET_RCODE(msg->flags) ==
652 					LDNS_RCODE_NOERROR &&
653 					!soa_in_auth(msg) &&
654 					!(msg->flags & BIT_AA)) &&
655 					rrset->rrsig_count != 0) {
656 					remove_rrset("normalize: removing too large NS "
657 						"RRset:", pkt, msg, prev, &rrset);
658 					continue;
659 				} else {
660 					shorten_rrset(pkt, rrset, env->cfg->iter_scrub_ns);
661 				}
662 			}
663 		}
664 		/* if this is type DS and we query for type DS we just got
665 		 * a referral answer for our type DS query, fix packet */
666 		if(rrset->type==LDNS_RR_TYPE_DS &&
667 			qinfo->qtype == LDNS_RR_TYPE_DS &&
668 			dname_pkt_compare(pkt, qinfo->qname, rrset->dname) == 0) {
669 			rrset->section = LDNS_SECTION_ANSWER;
670 			msg->ancount = rrset->rr_count + rrset->rrsig_count;
671 			msg->nscount = 0;
672 			msg->arcount = 0;
673 			msg->an_rrsets = 1;
674 			msg->ns_rrsets = 0;
675 			msg->ar_rrsets = 0;
676 			msg->rrset_count = 1;
677 			msg->rrset_first = rrset;
678 			msg->rrset_last = rrset;
679 			rrset->rrset_all_next = NULL;
680 			return 1;
681 		}
682 		mark_additional_rrset(pkt, msg, rrset);
683 		prev = rrset;
684 		rrset = rrset->rrset_all_next;
685 	}
686 
687 	/* For each record in the additional section, remove it if it is an
688 	 * address record and not in the collection of additional names
689 	 * found in ANSWER and AUTHORITY. */
690 	/* These records have not been marked OK previously */
691 	while(rrset && rrset->section == LDNS_SECTION_ADDITIONAL) {
692 		if(rrset->type==LDNS_RR_TYPE_A ||
693 			rrset->type==LDNS_RR_TYPE_AAAA)
694 		{
695 			if((rrset->flags & RRSET_SCRUB_OK)) {
696 				/* remove flag to clean up flags variable */
697 				rrset->flags &= ~RRSET_SCRUB_OK;
698 			} else {
699 				remove_rrset("normalize: removing irrelevant "
700 					"RRset:", pkt, msg, prev, &rrset);
701 				continue;
702 			}
703 		}
704 		/* protect internals of recursor by making sure to del these */
705 		if(rrset->type==LDNS_RR_TYPE_DNAME ||
706 			rrset->type==LDNS_RR_TYPE_CNAME ||
707 			rrset->type==LDNS_RR_TYPE_NS) {
708 			remove_rrset("normalize: removing irrelevant "
709 				"RRset:", pkt, msg, prev, &rrset);
710 			continue;
711 		}
712 		/* Allowed list of types in the additional section */
713 		if(env->cfg->harden_unknown_additional &&
714 			!type_allowed_in_additional_section(rrset->type)) {
715 			remove_rrset("normalize: removing irrelevant "
716 				"RRset:", pkt, msg, prev, &rrset);
717 			continue;
718 		}
719 		prev = rrset;
720 		rrset = rrset->rrset_all_next;
721 	}
722 
723 	return 1;
724 }
725 
726 /**
727  * Store potential poison in the cache (only if hardening disabled).
728  * The rrset is stored in the cache but removed from the message.
729  * So that it will be used for infrastructure purposes, but not be
730  * returned to the client.
731  * @param pkt: packet
732  * @param msg: message parsed
733  * @param env: environment with cache
734  * @param rrset: to store.
735  */
736 static void
737 store_rrset(sldns_buffer* pkt, struct msg_parse* msg, struct module_env* env,
738 	struct rrset_parse* rrset)
739 {
740 	struct ub_packed_rrset_key* k;
741 	struct packed_rrset_data* d;
742 	struct rrset_ref ref;
743 	time_t now = *env->now;
744 
745 	k = alloc_special_obtain(env->alloc);
746 	if(!k)
747 		return;
748 	k->entry.data = NULL;
749 	if(!parse_copy_decompress_rrset(pkt, msg, rrset, NULL, k)) {
750 		alloc_special_release(env->alloc, k);
751 		return;
752 	}
753 	d = (struct packed_rrset_data*)k->entry.data;
754 	packed_rrset_ttl_add(d, now);
755 	ref.key = k;
756 	ref.id = k->id;
757 	/*ignore ret: it was in the cache, ref updated */
758 	(void)rrset_cache_update(env->rrset_cache, &ref, env->alloc, now);
759 }
760 
761 /**
762  * Check if right hand name in NSEC is within zone
763  * @param pkt: the packet buffer for decompression.
764  * @param rrset: the NSEC rrset
765  * @param zonename: the zone name.
766  * @return true if BAD.
767  */
768 static int sanitize_nsec_is_overreach(sldns_buffer* pkt,
769 	struct rrset_parse* rrset, uint8_t* zonename)
770 {
771 	struct rr_parse* rr;
772 	uint8_t* rhs;
773 	size_t len;
774 	log_assert(rrset->type == LDNS_RR_TYPE_NSEC);
775 	for(rr = rrset->rr_first; rr; rr = rr->next) {
776 		size_t pos = sldns_buffer_position(pkt);
777 		size_t rhspos;
778 		rhs = rr->ttl_data+4+2;
779 		len = sldns_read_uint16(rr->ttl_data+4);
780 		rhspos = rhs-sldns_buffer_begin(pkt);
781 		sldns_buffer_set_position(pkt, rhspos);
782 		if(pkt_dname_len(pkt) == 0) {
783 			/* malformed */
784 			sldns_buffer_set_position(pkt, pos);
785 			return 1;
786 		}
787 		if(sldns_buffer_position(pkt)-rhspos > len) {
788 			/* outside of rdata boundaries */
789 			sldns_buffer_set_position(pkt, pos);
790 			return 1;
791 		}
792 		sldns_buffer_set_position(pkt, pos);
793 		if(!pkt_sub(pkt, rhs, zonename)) {
794 			/* overreaching */
795 			return 1;
796 		}
797 	}
798 	/* all NSEC RRs OK */
799 	return 0;
800 }
801 
802 /** Remove individual RRs, if the length is wrong. Returns true if the RRset
803  * has been removed. */
804 static int
805 scrub_sanitize_rr_length(sldns_buffer* pkt, struct msg_parse* msg,
806 	struct rrset_parse* prev, struct rrset_parse** rrset, int* added_ede,
807 	struct module_qstate* qstate)
808 {
809 	struct rr_parse* rr, *rr_prev = NULL;
810 	for(rr = (*rrset)->rr_first; rr; rr = rr->next) {
811 
812 		/* Sanity check for length of records
813 		 * An A record should be 6 bytes only
814 		 * (2 bytes for length and 4 for IPv4 addr)*/
815 		if((*rrset)->type == LDNS_RR_TYPE_A && rr->size != 6 ) {
816 			if(!*added_ede) {
817 				*added_ede = 1;
818 				errinf_ede(qstate, "sanitize: records of inappropriate length have been removed.",
819 					LDNS_EDE_OTHER);
820 			}
821 			if(msgparse_rrset_remove_rr("sanitize: removing type A RR of inappropriate length:",
822 				pkt, *rrset, rr_prev, rr, NULL, 0)) {
823 				remove_rrset("sanitize: removing type A RRset of inappropriate length:",
824 					pkt, msg, prev, rrset);
825 				return 1;
826 			}
827 			continue;
828 		}
829 
830 		/* Sanity check for length of records
831 		 * An AAAA record should be 18 bytes only
832 		 * (2 bytes for length and 16 for IPv6 addr)*/
833 		if((*rrset)->type == LDNS_RR_TYPE_AAAA && rr->size != 18 ) {
834 			if(!*added_ede) {
835 				*added_ede = 1;
836 				errinf_ede(qstate, "sanitize: records of inappropriate length have been removed.",
837 					LDNS_EDE_OTHER);
838 			}
839 			if(msgparse_rrset_remove_rr("sanitize: removing type AAAA RR of inappropriate length:",
840 				pkt, *rrset, rr_prev, rr, NULL, 0)) {
841 				remove_rrset("sanitize: removing type AAAA RRset of inappropriate length:",
842 					pkt, msg, prev, rrset);
843 				return 1;
844 			}
845 			continue;
846 		}
847 		rr_prev = rr;
848 	}
849 	return 0;
850 }
851 
852 /**
853  * Given a response event, remove suspect RRsets from the response.
854  * "Suspect" rrsets are potentially poison. Note that this routine expects
855  * the response to be in a "normalized" state -- that is, all "irrelevant"
856  * RRsets have already been removed, CNAMEs are in order, etc.
857  *
858  * @param pkt: packet.
859  * @param msg: msg to normalize.
860  * @param qinfo: the question originally asked.
861  * @param zonename: name of server zone.
862  * @param env: module environment with config and cache.
863  * @param ie: iterator environment with private address data.
864  * @param qstate: for setting errinf for EDE error messages.
865  * @return 0 on error.
866  */
867 static int
868 scrub_sanitize(sldns_buffer* pkt, struct msg_parse* msg,
869 	struct query_info* qinfo, uint8_t* zonename, struct module_env* env,
870 	struct iter_env* ie, struct module_qstate* qstate)
871 {
872 	int del_addi = 0; /* if additional-holding rrsets are deleted, we
873 		do not trust the normalized additional-A-AAAA any more */
874 	uint8_t* ns_rrset_dname = NULL;
875 	int added_rrlen_ede = 0;
876 	struct rrset_parse* rrset, *prev;
877 	prev = NULL;
878 	rrset = msg->rrset_first;
879 
880 	/* the first DNAME is allowed to stay. It needs checking before
881 	 * it can be used from the cache. After normalization, an initial
882 	 * DNAME will have a correctly synthesized CNAME after it. */
883 	if(rrset && rrset->type == LDNS_RR_TYPE_DNAME &&
884 		rrset->section == LDNS_SECTION_ANSWER &&
885 		pkt_strict_sub(pkt, qinfo->qname, rrset->dname) &&
886 		pkt_sub(pkt, rrset->dname, zonename)) {
887 		prev = rrset; /* DNAME allowed to stay in answer section */
888 		rrset = rrset->rrset_all_next;
889 	}
890 
891 	/* remove all records from the answer section that are
892 	 * not the same domain name as the query domain name.
893 	 * The answer section should contain rrsets with the same name
894 	 * as the question. For DNAMEs a CNAME has been synthesized.
895 	 * Wildcards have the query name in answer section.
896 	 * ANY queries get query name in answer section.
897 	 * Remainders of CNAME chains are cut off and resolved by iterator. */
898 	while(rrset && rrset->section == LDNS_SECTION_ANSWER) {
899 		if(dname_pkt_compare(pkt, qinfo->qname, rrset->dname) != 0) {
900 			if(has_additional(rrset->type)) del_addi = 1;
901 			remove_rrset("sanitize: removing extraneous answer "
902 				"RRset:", pkt, msg, prev, &rrset);
903 			continue;
904 		}
905 		prev = rrset;
906 		rrset = rrset->rrset_all_next;
907 	}
908 
909 	/* At this point, we brutally remove ALL rrsets that aren't
910 	 * children of the originating zone. The idea here is that,
911 	 * as far as we know, the server that we contacted is ONLY
912 	 * authoritative for the originating zone. It, of course, MAY
913 	 * be authoritative for any other zones, and of course, MAY
914 	 * NOT be authoritative for some subdomains of the originating
915 	 * zone. */
916 	prev = NULL;
917 	rrset = msg->rrset_first;
918 	while(rrset) {
919 
920 		/* Sanity check for length of records */
921 		if(rrset->type == LDNS_RR_TYPE_A ||
922 			rrset->type == LDNS_RR_TYPE_AAAA) {
923 			if(scrub_sanitize_rr_length(pkt, msg, prev, &rrset,
924 				&added_rrlen_ede, qstate))
925 				continue;
926 		}
927 
928 		/* remove private addresses */
929 		if( (rrset->type == LDNS_RR_TYPE_A ||
930 			rrset->type == LDNS_RR_TYPE_AAAA)) {
931 
932 			/* do not set servfail since this leads to too
933 			 * many drops of other people using rfc1918 space */
934 			/* also do not remove entire rrset, unless all records
935 			 * in it are bad */
936 			if(priv_rrset_bad(ie->priv, pkt, rrset)) {
937 				remove_rrset(NULL, pkt, msg, prev, &rrset);
938 				continue;
939 			}
940 		}
941 
942 		/* skip DNAME records -- they will always be followed by a
943 		 * synthesized CNAME, which will be relevant.
944 		 * FIXME: should this do something differently with DNAME
945 		 * rrsets NOT in Section.ANSWER? */
946 		/* But since DNAME records are also subdomains of the zone,
947 		 * same check can be used */
948 
949 		if(!pkt_sub(pkt, rrset->dname, zonename)) {
950 			if(msg->an_rrsets == 0 &&
951 				rrset->type == LDNS_RR_TYPE_NS &&
952 				rrset->section == LDNS_SECTION_AUTHORITY &&
953 				FLAGS_GET_RCODE(msg->flags) ==
954 				LDNS_RCODE_NOERROR && !soa_in_auth(msg) &&
955 				sub_of_pkt(pkt, zonename, rrset->dname)) {
956 				/* noerror, nodata and this NS rrset is above
957 				 * the zone. This is LAME!
958 				 * Leave in the NS for lame classification. */
959 				/* remove everything from the additional
960 				 * (we dont want its glue that was approved
961 				 * during the normalize action) */
962 				del_addi = 1;
963 			} else if(!env->cfg->harden_glue && (
964 				rrset->type == LDNS_RR_TYPE_A ||
965 				rrset->type == LDNS_RR_TYPE_AAAA)) {
966 				/* store in cache! Since it is relevant
967 				 * (from normalize) it will be picked up
968 				 * from the cache to be used later */
969 				store_rrset(pkt, msg, env, rrset);
970 				remove_rrset("sanitize: storing potential "
971 				"poison RRset:", pkt, msg, prev, &rrset);
972 				continue;
973 			} else {
974 				if(has_additional(rrset->type)) del_addi = 1;
975 				remove_rrset("sanitize: removing potential "
976 				"poison RRset:", pkt, msg, prev, &rrset);
977 				continue;
978 			}
979 		}
980 		if(rrset->type == LDNS_RR_TYPE_NS &&
981 			(rrset->section == LDNS_SECTION_AUTHORITY ||
982 			rrset->section == LDNS_SECTION_ANSWER)) {
983 			/* If the type is NS, and we're in the
984 			 * answer or authority section, then
985 			 * store the dname so we can check
986 			 * against the glue records
987 			 * further down	*/
988 			ns_rrset_dname = rrset->dname;
989 		}
990 		if(del_addi && rrset->section == LDNS_SECTION_ADDITIONAL) {
991 			remove_rrset("sanitize: removing potential "
992 			"poison reference RRset:", pkt, msg, prev, &rrset);
993 			continue;
994 		}
995 		/* check if right hand side of NSEC is within zone */
996 		if(rrset->type == LDNS_RR_TYPE_NSEC &&
997 			sanitize_nsec_is_overreach(pkt, rrset, zonename)) {
998 			remove_rrset("sanitize: removing overreaching NSEC "
999 				"RRset:", pkt, msg, prev, &rrset);
1000 			continue;
1001 		}
1002 		if(env->cfg->harden_unverified_glue && ns_rrset_dname &&
1003 			rrset->section == LDNS_SECTION_ADDITIONAL &&
1004 			(rrset->type == LDNS_RR_TYPE_A || rrset->type == LDNS_RR_TYPE_AAAA) &&
1005 			!pkt_strict_sub(pkt, rrset->dname, ns_rrset_dname)) {
1006 			/* We're in the additional section, looking
1007 			 * at an A/AAAA rrset, have a previous
1008 			 * delegation point and we notice that
1009 			 * the glue records are NOT for strict
1010 			 * subdomains of the delegation. So set a
1011 			 * flag, recompute the hash for the rrset
1012 			 * and write the A/AAAA record to cache.
1013 			 * It'll be retrieved if we can't separately
1014 			 * resolve the glue	*/
1015 			rrset->flags = PACKED_RRSET_UNVERIFIED_GLUE;
1016 			rrset->hash = pkt_hash_rrset(pkt, rrset->dname, rrset->type, rrset->rrset_class, rrset->flags);
1017 			store_rrset(pkt, msg, env, rrset);
1018 			remove_rrset("sanitize: storing potential "
1019 			"unverified glue reference RRset:", pkt, msg, prev, &rrset);
1020 			continue;
1021 		}
1022 		prev = rrset;
1023 		rrset = rrset->rrset_all_next;
1024 	}
1025 	return 1;
1026 }
1027 
1028 int
1029 scrub_message(sldns_buffer* pkt, struct msg_parse* msg,
1030 	struct query_info* qinfo, uint8_t* zonename, struct regional* region,
1031 	struct module_env* env, struct module_qstate* qstate,
1032 	struct iter_env* ie)
1033 {
1034 	/* basic sanity checks */
1035 	log_nametypeclass(VERB_ALGO, "scrub for", zonename, LDNS_RR_TYPE_NS,
1036 		qinfo->qclass);
1037 	if(msg->qdcount > 1)
1038 		return 0;
1039 	if( !(msg->flags&BIT_QR) )
1040 		return 0;
1041 	msg->flags &= ~(BIT_AD|BIT_Z); /* force off bit AD and Z */
1042 
1043 	/* make sure that a query is echoed back when NOERROR or NXDOMAIN */
1044 	/* this is not required for basic operation but is a forgery
1045 	 * resistance (security) feature */
1046 	if((FLAGS_GET_RCODE(msg->flags) == LDNS_RCODE_NOERROR ||
1047 		FLAGS_GET_RCODE(msg->flags) == LDNS_RCODE_NXDOMAIN) &&
1048 		msg->qdcount == 0)
1049 		return 0;
1050 
1051 	/* if a query is echoed back, make sure it is correct. Otherwise,
1052 	 * this may be not a reply to our query. */
1053 	if(msg->qdcount == 1) {
1054 		if(dname_pkt_compare(pkt, msg->qname, qinfo->qname) != 0)
1055 			return 0;
1056 		if(msg->qtype != qinfo->qtype || msg->qclass != qinfo->qclass)
1057 			return 0;
1058 	}
1059 
1060 	/* normalize the response, this cleans up the additional.  */
1061 	if(!scrub_normalize(pkt, msg, qinfo, region, env))
1062 		return 0;
1063 	/* delete all out-of-zone information */
1064 	if(!scrub_sanitize(pkt, msg, qinfo, zonename, env, ie, qstate))
1065 		return 0;
1066 	return 1;
1067 }
1068