xref: /freebsd/contrib/unbound/services/cache/infra.c (revision 369c692350f53cc5abd8e3056ad3d80c2155820e)
1 /*
2  * services/cache/infra.c - infrastructure cache, server rtt and capabilities
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /**
37  * \file
38  *
39  * This file contains the infrastructure cache.
40  */
41 #include "config.h"
42 #include "sldns/rrdef.h"
43 #include "sldns/str2wire.h"
44 #include "sldns/sbuffer.h"
45 #include "sldns/wire2str.h"
46 #include "services/cache/infra.h"
47 #include "util/storage/slabhash.h"
48 #include "util/storage/lookup3.h"
49 #include "util/data/dname.h"
50 #include "util/log.h"
51 #include "util/net_help.h"
52 #include "util/config_file.h"
53 #include "iterator/iterator.h"
54 
55 /** Timeout when only a single probe query per IP is allowed. */
56 #define PROBE_MAXRTO 12000 /* in msec */
57 
58 /** number of timeouts for a type when the domain can be blocked ;
59  * even if another type has completely rtt maxed it, the different type
60  * can do this number of packets (until those all timeout too) */
61 #define TIMEOUT_COUNT_MAX 3
62 
63 /** ratelimit value for delegation point */
64 int infra_dp_ratelimit = 0;
65 
66 /** ratelimit value for client ip addresses,
67  *  in queries per second. */
68 int infra_ip_ratelimit = 0;
69 
70 size_t
71 infra_sizefunc(void* k, void* ATTR_UNUSED(d))
72 {
73 	struct infra_key* key = (struct infra_key*)k;
74 	return sizeof(*key) + sizeof(struct infra_data) + key->namelen
75 		+ lock_get_mem(&key->entry.lock);
76 }
77 
78 int
79 infra_compfunc(void* key1, void* key2)
80 {
81 	struct infra_key* k1 = (struct infra_key*)key1;
82 	struct infra_key* k2 = (struct infra_key*)key2;
83 	int r = sockaddr_cmp(&k1->addr, k1->addrlen, &k2->addr, k2->addrlen);
84 	if(r != 0)
85 		return r;
86 	if(k1->namelen != k2->namelen) {
87 		if(k1->namelen < k2->namelen)
88 			return -1;
89 		return 1;
90 	}
91 	return query_dname_compare(k1->zonename, k2->zonename);
92 }
93 
94 void
95 infra_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
96 {
97 	struct infra_key* key = (struct infra_key*)k;
98 	if(!key)
99 		return;
100 	lock_rw_destroy(&key->entry.lock);
101 	free(key->zonename);
102 	free(key);
103 }
104 
105 void
106 infra_deldatafunc(void* d, void* ATTR_UNUSED(arg))
107 {
108 	struct infra_data* data = (struct infra_data*)d;
109 	free(data);
110 }
111 
112 size_t
113 rate_sizefunc(void* k, void* ATTR_UNUSED(d))
114 {
115 	struct rate_key* key = (struct rate_key*)k;
116 	return sizeof(*key) + sizeof(struct rate_data) + key->namelen
117 		+ lock_get_mem(&key->entry.lock);
118 }
119 
120 int
121 rate_compfunc(void* key1, void* key2)
122 {
123 	struct rate_key* k1 = (struct rate_key*)key1;
124 	struct rate_key* k2 = (struct rate_key*)key2;
125 	if(k1->namelen != k2->namelen) {
126 		if(k1->namelen < k2->namelen)
127 			return -1;
128 		return 1;
129 	}
130 	return query_dname_compare(k1->name, k2->name);
131 }
132 
133 void
134 rate_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
135 {
136 	struct rate_key* key = (struct rate_key*)k;
137 	if(!key)
138 		return;
139 	lock_rw_destroy(&key->entry.lock);
140 	free(key->name);
141 	free(key);
142 }
143 
144 void
145 rate_deldatafunc(void* d, void* ATTR_UNUSED(arg))
146 {
147 	struct rate_data* data = (struct rate_data*)d;
148 	free(data);
149 }
150 
151 /** find or create element in domainlimit tree */
152 static struct domain_limit_data* domain_limit_findcreate(
153 	struct infra_cache* infra, char* name)
154 {
155 	uint8_t* nm;
156 	int labs;
157 	size_t nmlen;
158 	struct domain_limit_data* d;
159 
160 	/* parse name */
161 	nm = sldns_str2wire_dname(name, &nmlen);
162 	if(!nm) {
163 		log_err("could not parse %s", name);
164 		return NULL;
165 	}
166 	labs = dname_count_labels(nm);
167 
168 	/* can we find it? */
169 	d = (struct domain_limit_data*)name_tree_find(&infra->domain_limits,
170 		nm, nmlen, labs, LDNS_RR_CLASS_IN);
171 	if(d) {
172 		free(nm);
173 		return d;
174 	}
175 
176 	/* create it */
177 	d = (struct domain_limit_data*)calloc(1, sizeof(*d));
178 	if(!d) {
179 		free(nm);
180 		return NULL;
181 	}
182 	d->node.node.key = &d->node;
183 	d->node.name = nm;
184 	d->node.len = nmlen;
185 	d->node.labs = labs;
186 	d->node.dclass = LDNS_RR_CLASS_IN;
187 	d->lim = -1;
188 	d->below = -1;
189 	if(!name_tree_insert(&infra->domain_limits, &d->node, nm, nmlen,
190 		labs, LDNS_RR_CLASS_IN)) {
191 		log_err("duplicate element in domainlimit tree");
192 		free(nm);
193 		free(d);
194 		return NULL;
195 	}
196 	return d;
197 }
198 
199 /** insert rate limit configuration into lookup tree */
200 static int infra_ratelimit_cfg_insert(struct infra_cache* infra,
201 	struct config_file* cfg)
202 {
203 	struct config_str2list* p;
204 	struct domain_limit_data* d;
205 	for(p = cfg->ratelimit_for_domain; p; p = p->next) {
206 		d = domain_limit_findcreate(infra, p->str);
207 		if(!d)
208 			return 0;
209 		d->lim = atoi(p->str2);
210 	}
211 	for(p = cfg->ratelimit_below_domain; p; p = p->next) {
212 		d = domain_limit_findcreate(infra, p->str);
213 		if(!d)
214 			return 0;
215 		d->below = atoi(p->str2);
216 	}
217 	return 1;
218 }
219 
220 /** setup domain limits tree (0 on failure) */
221 static int
222 setup_domain_limits(struct infra_cache* infra, struct config_file* cfg)
223 {
224 	name_tree_init(&infra->domain_limits);
225 	if(!infra_ratelimit_cfg_insert(infra, cfg)) {
226 		return 0;
227 	}
228 	name_tree_init_parents(&infra->domain_limits);
229 	return 1;
230 }
231 
232 struct infra_cache*
233 infra_create(struct config_file* cfg)
234 {
235 	struct infra_cache* infra = (struct infra_cache*)calloc(1,
236 		sizeof(struct infra_cache));
237 	size_t maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+
238 		sizeof(struct infra_data)+INFRA_BYTES_NAME);
239 	infra->hosts = slabhash_create(cfg->infra_cache_slabs,
240 		INFRA_HOST_STARTSIZE, maxmem, &infra_sizefunc, &infra_compfunc,
241 		&infra_delkeyfunc, &infra_deldatafunc, NULL);
242 	if(!infra->hosts) {
243 		free(infra);
244 		return NULL;
245 	}
246 	infra->host_ttl = cfg->host_ttl;
247 	infra->infra_keep_probing = cfg->infra_keep_probing;
248 	infra_dp_ratelimit = cfg->ratelimit;
249 	infra->domain_rates = slabhash_create(cfg->ratelimit_slabs,
250 		INFRA_HOST_STARTSIZE, cfg->ratelimit_size,
251 		&rate_sizefunc, &rate_compfunc, &rate_delkeyfunc,
252 		&rate_deldatafunc, NULL);
253 	if(!infra->domain_rates) {
254 		infra_delete(infra);
255 		return NULL;
256 	}
257 	/* insert config data into ratelimits */
258 	if(!setup_domain_limits(infra, cfg)) {
259 		infra_delete(infra);
260 		return NULL;
261 	}
262 	infra_ip_ratelimit = cfg->ip_ratelimit;
263 	infra->client_ip_rates = slabhash_create(cfg->ip_ratelimit_slabs,
264 	    INFRA_HOST_STARTSIZE, cfg->ip_ratelimit_size, &ip_rate_sizefunc,
265 	    &ip_rate_compfunc, &ip_rate_delkeyfunc, &ip_rate_deldatafunc, NULL);
266 	if(!infra->client_ip_rates) {
267 		infra_delete(infra);
268 		return NULL;
269 	}
270 	return infra;
271 }
272 
273 /** delete domain_limit entries */
274 static void domain_limit_free(rbnode_type* n, void* ATTR_UNUSED(arg))
275 {
276 	if(n) {
277 		free(((struct domain_limit_data*)n)->node.name);
278 		free(n);
279 	}
280 }
281 
282 void
283 infra_delete(struct infra_cache* infra)
284 {
285 	if(!infra)
286 		return;
287 	slabhash_delete(infra->hosts);
288 	slabhash_delete(infra->domain_rates);
289 	traverse_postorder(&infra->domain_limits, domain_limit_free, NULL);
290 	slabhash_delete(infra->client_ip_rates);
291 	free(infra);
292 }
293 
294 struct infra_cache*
295 infra_adjust(struct infra_cache* infra, struct config_file* cfg)
296 {
297 	size_t maxmem;
298 	if(!infra)
299 		return infra_create(cfg);
300 	infra->host_ttl = cfg->host_ttl;
301 	infra->infra_keep_probing = cfg->infra_keep_probing;
302 	infra_dp_ratelimit = cfg->ratelimit;
303 	infra_ip_ratelimit = cfg->ip_ratelimit;
304 	maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+
305 		sizeof(struct infra_data)+INFRA_BYTES_NAME);
306 	/* divide cachesize by slabs and multiply by slabs, because if the
307 	 * cachesize is not an even multiple of slabs, that is the resulting
308 	 * size of the slabhash */
309 	if(!slabhash_is_size(infra->hosts, maxmem, cfg->infra_cache_slabs) ||
310 	   !slabhash_is_size(infra->domain_rates, cfg->ratelimit_size,
311 	   	cfg->ratelimit_slabs) ||
312 	   !slabhash_is_size(infra->client_ip_rates, cfg->ip_ratelimit_size,
313 	   	cfg->ip_ratelimit_slabs)) {
314 		infra_delete(infra);
315 		infra = infra_create(cfg);
316 	} else {
317 		/* reapply domain limits */
318 		traverse_postorder(&infra->domain_limits, domain_limit_free,
319 			NULL);
320 		if(!setup_domain_limits(infra, cfg)) {
321 			infra_delete(infra);
322 			return NULL;
323 		}
324 	}
325 	return infra;
326 }
327 
328 /** calculate the hash value for a host key
329  *  set use_port to a non-0 number to use the port in
330  *  the hash calculation; 0 to ignore the port.*/
331 static hashvalue_type
332 hash_addr(struct sockaddr_storage* addr, socklen_t addrlen,
333   int use_port)
334 {
335 	hashvalue_type h = 0xab;
336 	/* select the pieces to hash, some OS have changing data inside */
337 	if(addr_is_ip6(addr, addrlen)) {
338 		struct sockaddr_in6* in6 = (struct sockaddr_in6*)addr;
339 		h = hashlittle(&in6->sin6_family, sizeof(in6->sin6_family), h);
340 		if(use_port){
341 			h = hashlittle(&in6->sin6_port, sizeof(in6->sin6_port), h);
342 		}
343 		h = hashlittle(&in6->sin6_addr, INET6_SIZE, h);
344 	} else {
345 		struct sockaddr_in* in = (struct sockaddr_in*)addr;
346 		h = hashlittle(&in->sin_family, sizeof(in->sin_family), h);
347 		if(use_port){
348 			h = hashlittle(&in->sin_port, sizeof(in->sin_port), h);
349 		}
350 		h = hashlittle(&in->sin_addr, INET_SIZE, h);
351 	}
352 	return h;
353 }
354 
355 /** calculate infra hash for a key */
356 static hashvalue_type
357 hash_infra(struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name)
358 {
359 	return dname_query_hash(name, hash_addr(addr, addrlen, 1));
360 }
361 
362 /** lookup version that does not check host ttl (you check it) */
363 struct lruhash_entry*
364 infra_lookup_nottl(struct infra_cache* infra, struct sockaddr_storage* addr,
365 	socklen_t addrlen, uint8_t* name, size_t namelen, int wr)
366 {
367 	struct infra_key k;
368 	k.addrlen = addrlen;
369 	memcpy(&k.addr, addr, addrlen);
370 	k.namelen = namelen;
371 	k.zonename = name;
372 	k.entry.hash = hash_infra(addr, addrlen, name);
373 	k.entry.key = (void*)&k;
374 	k.entry.data = NULL;
375 	return slabhash_lookup(infra->hosts, k.entry.hash, &k, wr);
376 }
377 
378 /** init the data elements */
379 static void
380 data_entry_init(struct infra_cache* infra, struct lruhash_entry* e,
381 	time_t timenow)
382 {
383 	struct infra_data* data = (struct infra_data*)e->data;
384 	data->ttl = timenow + infra->host_ttl;
385 	rtt_init(&data->rtt);
386 	data->edns_version = 0;
387 	data->edns_lame_known = 0;
388 	data->probedelay = 0;
389 	data->isdnsseclame = 0;
390 	data->rec_lame = 0;
391 	data->lame_type_A = 0;
392 	data->lame_other = 0;
393 	data->timeout_A = 0;
394 	data->timeout_AAAA = 0;
395 	data->timeout_other = 0;
396 }
397 
398 /**
399  * Create and init a new entry for a host
400  * @param infra: infra structure with config parameters.
401  * @param addr: host address.
402  * @param addrlen: length of addr.
403  * @param name: name of zone
404  * @param namelen: length of name.
405  * @param tm: time now.
406  * @return: the new entry or NULL on malloc failure.
407  */
408 static struct lruhash_entry*
409 new_entry(struct infra_cache* infra, struct sockaddr_storage* addr,
410 	socklen_t addrlen, uint8_t* name, size_t namelen, time_t tm)
411 {
412 	struct infra_data* data;
413 	struct infra_key* key = (struct infra_key*)malloc(sizeof(*key));
414 	if(!key)
415 		return NULL;
416 	data = (struct infra_data*)malloc(sizeof(struct infra_data));
417 	if(!data) {
418 		free(key);
419 		return NULL;
420 	}
421 	key->zonename = memdup(name, namelen);
422 	if(!key->zonename) {
423 		free(key);
424 		free(data);
425 		return NULL;
426 	}
427 	key->namelen = namelen;
428 	lock_rw_init(&key->entry.lock);
429 	key->entry.hash = hash_infra(addr, addrlen, name);
430 	key->entry.key = (void*)key;
431 	key->entry.data = (void*)data;
432 	key->addrlen = addrlen;
433 	memcpy(&key->addr, addr, addrlen);
434 	data_entry_init(infra, &key->entry, tm);
435 	return &key->entry;
436 }
437 
438 int
439 infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
440         socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow,
441 	int* edns_vs, uint8_t* edns_lame_known, int* to)
442 {
443 	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
444 		nm, nmlen, 0);
445 	struct infra_data* data;
446 	int wr = 0;
447 	if(e && ((struct infra_data*)e->data)->ttl < timenow) {
448 		/* it expired, try to reuse existing entry */
449 		int old = ((struct infra_data*)e->data)->rtt.rto;
450 		time_t tprobe = ((struct infra_data*)e->data)->probedelay;
451 		uint8_t tA = ((struct infra_data*)e->data)->timeout_A;
452 		uint8_t tAAAA = ((struct infra_data*)e->data)->timeout_AAAA;
453 		uint8_t tother = ((struct infra_data*)e->data)->timeout_other;
454 		lock_rw_unlock(&e->lock);
455 		e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
456 		if(e) {
457 			/* if its still there we have a writelock, init */
458 			/* re-initialise */
459 			/* do not touch lameness, it may be valid still */
460 			data_entry_init(infra, e, timenow);
461 			wr = 1;
462 			/* TOP_TIMEOUT remains on reuse */
463 			if(old >= USEFUL_SERVER_TOP_TIMEOUT) {
464 				((struct infra_data*)e->data)->rtt.rto
465 					= USEFUL_SERVER_TOP_TIMEOUT;
466 				((struct infra_data*)e->data)->probedelay = tprobe;
467 				((struct infra_data*)e->data)->timeout_A = tA;
468 				((struct infra_data*)e->data)->timeout_AAAA = tAAAA;
469 				((struct infra_data*)e->data)->timeout_other = tother;
470 			}
471 		}
472 	}
473 	if(!e) {
474 		/* insert new entry */
475 		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
476 			return 0;
477 		data = (struct infra_data*)e->data;
478 		*edns_vs = data->edns_version;
479 		*edns_lame_known = data->edns_lame_known;
480 		*to = rtt_timeout(&data->rtt);
481 		slabhash_insert(infra->hosts, e->hash, e, data, NULL);
482 		return 1;
483 	}
484 	/* use existing entry */
485 	data = (struct infra_data*)e->data;
486 	*edns_vs = data->edns_version;
487 	*edns_lame_known = data->edns_lame_known;
488 	*to = rtt_timeout(&data->rtt);
489 	if(*to >= PROBE_MAXRTO && (infra->infra_keep_probing ||
490 		rtt_notimeout(&data->rtt)*4 <= *to)) {
491 		/* delay other queries, this is the probe query */
492 		if(!wr) {
493 			lock_rw_unlock(&e->lock);
494 			e = infra_lookup_nottl(infra, addr,addrlen,nm,nmlen, 1);
495 			if(!e) { /* flushed from cache real fast, no use to
496 				allocate just for the probedelay */
497 				return 1;
498 			}
499 			data = (struct infra_data*)e->data;
500 		}
501 		/* add 999 to round up the timeout value from msec to sec,
502 		 * then add a whole second so it is certain that this probe
503 		 * has timed out before the next is allowed */
504 		data->probedelay = timenow + ((*to)+1999)/1000;
505 	}
506 	lock_rw_unlock(&e->lock);
507 	return 1;
508 }
509 
510 int
511 infra_set_lame(struct infra_cache* infra, struct sockaddr_storage* addr,
512 	socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow,
513 	int dnsseclame, int reclame, uint16_t qtype)
514 {
515 	struct infra_data* data;
516 	struct lruhash_entry* e;
517 	int needtoinsert = 0;
518 	e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
519 	if(!e) {
520 		/* insert it */
521 		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) {
522 			log_err("set_lame: malloc failure");
523 			return 0;
524 		}
525 		needtoinsert = 1;
526 	} else if( ((struct infra_data*)e->data)->ttl < timenow) {
527 		/* expired, reuse existing entry */
528 		data_entry_init(infra, e, timenow);
529 	}
530 	/* got an entry, now set the zone lame */
531 	data = (struct infra_data*)e->data;
532 	/* merge data (if any) */
533 	if(dnsseclame)
534 		data->isdnsseclame = 1;
535 	if(reclame)
536 		data->rec_lame = 1;
537 	if(!dnsseclame && !reclame && qtype == LDNS_RR_TYPE_A)
538 		data->lame_type_A = 1;
539 	if(!dnsseclame  && !reclame && qtype != LDNS_RR_TYPE_A)
540 		data->lame_other = 1;
541 	/* done */
542 	if(needtoinsert)
543 		slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
544 	else 	{ lock_rw_unlock(&e->lock); }
545 	return 1;
546 }
547 
548 void
549 infra_update_tcp_works(struct infra_cache* infra,
550         struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm,
551 	size_t nmlen)
552 {
553 	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
554 		nm, nmlen, 1);
555 	struct infra_data* data;
556 	if(!e)
557 		return; /* doesn't exist */
558 	data = (struct infra_data*)e->data;
559 	if(data->rtt.rto >= RTT_MAX_TIMEOUT)
560 		/* do not disqualify this server altogether, it is better
561 		 * than nothing */
562 		data->rtt.rto = RTT_MAX_TIMEOUT-1000;
563 	lock_rw_unlock(&e->lock);
564 }
565 
566 int
567 infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr,
568 	socklen_t addrlen, uint8_t* nm, size_t nmlen, int qtype,
569 	int roundtrip, int orig_rtt, time_t timenow)
570 {
571 	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
572 		nm, nmlen, 1);
573 	struct infra_data* data;
574 	int needtoinsert = 0, expired = 0;
575 	int rto = 1;
576 	time_t oldprobedelay = 0;
577 	if(!e) {
578 		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
579 			return 0;
580 		needtoinsert = 1;
581 	} else if(((struct infra_data*)e->data)->ttl < timenow) {
582 		oldprobedelay = ((struct infra_data*)e->data)->probedelay;
583 		data_entry_init(infra, e, timenow);
584 		expired = 1;
585 	}
586 	/* have an entry, update the rtt */
587 	data = (struct infra_data*)e->data;
588 	if(roundtrip == -1) {
589 		if(needtoinsert || expired) {
590 			/* timeout on entry that has expired before the timer
591 			 * keep old timeout from the function caller */
592 			data->rtt.rto = orig_rtt;
593 			data->probedelay = oldprobedelay;
594 		}
595 		rtt_lost(&data->rtt, orig_rtt);
596 		if(qtype == LDNS_RR_TYPE_A) {
597 			if(data->timeout_A < TIMEOUT_COUNT_MAX)
598 				data->timeout_A++;
599 		} else if(qtype == LDNS_RR_TYPE_AAAA) {
600 			if(data->timeout_AAAA < TIMEOUT_COUNT_MAX)
601 				data->timeout_AAAA++;
602 		} else {
603 			if(data->timeout_other < TIMEOUT_COUNT_MAX)
604 				data->timeout_other++;
605 		}
606 	} else {
607 		/* if we got a reply, but the old timeout was above server
608 		 * selection height, delete the timeout so the server is
609 		 * fully available again */
610 		if(rtt_unclamped(&data->rtt) >= USEFUL_SERVER_TOP_TIMEOUT)
611 			rtt_init(&data->rtt);
612 		rtt_update(&data->rtt, roundtrip);
613 		data->probedelay = 0;
614 		if(qtype == LDNS_RR_TYPE_A)
615 			data->timeout_A = 0;
616 		else if(qtype == LDNS_RR_TYPE_AAAA)
617 			data->timeout_AAAA = 0;
618 		else	data->timeout_other = 0;
619 	}
620 	if(data->rtt.rto > 0)
621 		rto = data->rtt.rto;
622 
623 	if(needtoinsert)
624 		slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
625 	else 	{ lock_rw_unlock(&e->lock); }
626 	return rto;
627 }
628 
629 long long infra_get_host_rto(struct infra_cache* infra,
630         struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm,
631 	size_t nmlen, struct rtt_info* rtt, int* delay, time_t timenow,
632 	int* tA, int* tAAAA, int* tother)
633 {
634 	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
635 		nm, nmlen, 0);
636 	struct infra_data* data;
637 	long long ttl = -2;
638 	if(!e) return -1;
639 	data = (struct infra_data*)e->data;
640 	if(data->ttl >= timenow) {
641 		ttl = (long long)(data->ttl - timenow);
642 		memmove(rtt, &data->rtt, sizeof(*rtt));
643 		if(timenow < data->probedelay)
644 			*delay = (int)(data->probedelay - timenow);
645 		else	*delay = 0;
646 	}
647 	*tA = (int)data->timeout_A;
648 	*tAAAA = (int)data->timeout_AAAA;
649 	*tother = (int)data->timeout_other;
650 	lock_rw_unlock(&e->lock);
651 	return ttl;
652 }
653 
654 int
655 infra_edns_update(struct infra_cache* infra, struct sockaddr_storage* addr,
656 	socklen_t addrlen, uint8_t* nm, size_t nmlen, int edns_version,
657 	time_t timenow)
658 {
659 	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
660 		nm, nmlen, 1);
661 	struct infra_data* data;
662 	int needtoinsert = 0;
663 	if(!e) {
664 		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
665 			return 0;
666 		needtoinsert = 1;
667 	} else if(((struct infra_data*)e->data)->ttl < timenow) {
668 		data_entry_init(infra, e, timenow);
669 	}
670 	/* have an entry, update the rtt, and the ttl */
671 	data = (struct infra_data*)e->data;
672 	/* do not update if noEDNS and stored is yesEDNS */
673 	if(!(edns_version == -1 && (data->edns_version != -1 &&
674 		data->edns_lame_known))) {
675 		data->edns_version = edns_version;
676 		data->edns_lame_known = 1;
677 	}
678 
679 	if(needtoinsert)
680 		slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
681 	else 	{ lock_rw_unlock(&e->lock); }
682 	return 1;
683 }
684 
685 int
686 infra_get_lame_rtt(struct infra_cache* infra,
687         struct sockaddr_storage* addr, socklen_t addrlen,
688         uint8_t* name, size_t namelen, uint16_t qtype,
689 	int* lame, int* dnsseclame, int* reclame, int* rtt, time_t timenow)
690 {
691 	struct infra_data* host;
692 	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
693 		name, namelen, 0);
694 	if(!e)
695 		return 0;
696 	host = (struct infra_data*)e->data;
697 	*rtt = rtt_unclamped(&host->rtt);
698 	if(host->rtt.rto >= PROBE_MAXRTO && timenow >= host->probedelay
699 		&& infra->infra_keep_probing) {
700 		/* single probe, keep probing */
701 		if(*rtt >= USEFUL_SERVER_TOP_TIMEOUT)
702 			*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
703 	} else if(host->rtt.rto >= PROBE_MAXRTO && timenow < host->probedelay
704 		&& rtt_notimeout(&host->rtt)*4 <= host->rtt.rto) {
705 		/* single probe for this domain, and we are not probing */
706 		/* unless the query type allows a probe to happen */
707 		if(qtype == LDNS_RR_TYPE_A) {
708 			if(host->timeout_A >= TIMEOUT_COUNT_MAX)
709 				*rtt = USEFUL_SERVER_TOP_TIMEOUT;
710 			else	*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
711 		} else if(qtype == LDNS_RR_TYPE_AAAA) {
712 			if(host->timeout_AAAA >= TIMEOUT_COUNT_MAX)
713 				*rtt = USEFUL_SERVER_TOP_TIMEOUT;
714 			else	*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
715 		} else {
716 			if(host->timeout_other >= TIMEOUT_COUNT_MAX)
717 				*rtt = USEFUL_SERVER_TOP_TIMEOUT;
718 			else	*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
719 		}
720 	}
721 	if(timenow > host->ttl) {
722 		/* expired entry */
723 		/* see if this can be a re-probe of an unresponsive server */
724 		/* minus 1000 because that is outside of the RTTBAND, so
725 		 * blacklisted servers stay blacklisted if this is chosen */
726 		if(host->rtt.rto >= USEFUL_SERVER_TOP_TIMEOUT ||
727 			infra->infra_keep_probing) {
728 			lock_rw_unlock(&e->lock);
729 			*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
730 			*lame = 0;
731 			*dnsseclame = 0;
732 			*reclame = 0;
733 			return 1;
734 		}
735 		lock_rw_unlock(&e->lock);
736 		return 0;
737 	}
738 	/* check lameness first */
739 	if(host->lame_type_A && qtype == LDNS_RR_TYPE_A) {
740 		lock_rw_unlock(&e->lock);
741 		*lame = 1;
742 		*dnsseclame = 0;
743 		*reclame = 0;
744 		return 1;
745 	} else if(host->lame_other && qtype != LDNS_RR_TYPE_A) {
746 		lock_rw_unlock(&e->lock);
747 		*lame = 1;
748 		*dnsseclame = 0;
749 		*reclame = 0;
750 		return 1;
751 	} else if(host->isdnsseclame) {
752 		lock_rw_unlock(&e->lock);
753 		*lame = 0;
754 		*dnsseclame = 1;
755 		*reclame = 0;
756 		return 1;
757 	} else if(host->rec_lame) {
758 		lock_rw_unlock(&e->lock);
759 		*lame = 0;
760 		*dnsseclame = 0;
761 		*reclame = 1;
762 		return 1;
763 	}
764 	/* no lameness for this type of query */
765 	lock_rw_unlock(&e->lock);
766 	*lame = 0;
767 	*dnsseclame = 0;
768 	*reclame = 0;
769 	return 1;
770 }
771 
772 int infra_find_ratelimit(struct infra_cache* infra, uint8_t* name,
773 	size_t namelen)
774 {
775 	int labs = dname_count_labels(name);
776 	struct domain_limit_data* d = (struct domain_limit_data*)
777 		name_tree_lookup(&infra->domain_limits, name, namelen, labs,
778 		LDNS_RR_CLASS_IN);
779 	if(!d) return infra_dp_ratelimit;
780 
781 	if(d->node.labs == labs && d->lim != -1)
782 		return d->lim; /* exact match */
783 
784 	/* find 'below match' */
785 	if(d->node.labs == labs)
786 		d = (struct domain_limit_data*)d->node.parent;
787 	while(d) {
788 		if(d->below != -1)
789 			return d->below;
790 		d = (struct domain_limit_data*)d->node.parent;
791 	}
792 	return infra_dp_ratelimit;
793 }
794 
795 size_t ip_rate_sizefunc(void* k, void* ATTR_UNUSED(d))
796 {
797 	struct ip_rate_key* key = (struct ip_rate_key*)k;
798 	return sizeof(*key) + sizeof(struct ip_rate_data)
799 		+ lock_get_mem(&key->entry.lock);
800 }
801 
802 int ip_rate_compfunc(void* key1, void* key2)
803 {
804 	struct ip_rate_key* k1 = (struct ip_rate_key*)key1;
805 	struct ip_rate_key* k2 = (struct ip_rate_key*)key2;
806 	return sockaddr_cmp_addr(&k1->addr, k1->addrlen,
807 		&k2->addr, k2->addrlen);
808 }
809 
810 void ip_rate_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
811 {
812 	struct ip_rate_key* key = (struct ip_rate_key*)k;
813 	if(!key)
814 		return;
815 	lock_rw_destroy(&key->entry.lock);
816 	free(key);
817 }
818 
819 /** find data item in array, for write access, caller unlocks */
820 static struct lruhash_entry* infra_find_ratedata(struct infra_cache* infra,
821 	uint8_t* name, size_t namelen, int wr)
822 {
823 	struct rate_key key;
824 	hashvalue_type h = dname_query_hash(name, 0xab);
825 	memset(&key, 0, sizeof(key));
826 	key.name = name;
827 	key.namelen = namelen;
828 	key.entry.hash = h;
829 	return slabhash_lookup(infra->domain_rates, h, &key, wr);
830 }
831 
832 /** find data item in array for ip addresses */
833 static struct lruhash_entry* infra_find_ip_ratedata(struct infra_cache* infra,
834 	struct comm_reply* repinfo, int wr)
835 {
836 	struct ip_rate_key key;
837 	hashvalue_type h = hash_addr(&(repinfo->addr),
838 		repinfo->addrlen, 0);
839 	memset(&key, 0, sizeof(key));
840 	key.addr = repinfo->addr;
841 	key.addrlen = repinfo->addrlen;
842 	key.entry.hash = h;
843 	return slabhash_lookup(infra->client_ip_rates, h, &key, wr);
844 }
845 
846 /** create rate data item for name, number 1 in now */
847 static void infra_create_ratedata(struct infra_cache* infra,
848 	uint8_t* name, size_t namelen, time_t timenow)
849 {
850 	hashvalue_type h = dname_query_hash(name, 0xab);
851 	struct rate_key* k = (struct rate_key*)calloc(1, sizeof(*k));
852 	struct rate_data* d = (struct rate_data*)calloc(1, sizeof(*d));
853 	if(!k || !d) {
854 		free(k);
855 		free(d);
856 		return; /* alloc failure */
857 	}
858 	k->namelen = namelen;
859 	k->name = memdup(name, namelen);
860 	if(!k->name) {
861 		free(k);
862 		free(d);
863 		return; /* alloc failure */
864 	}
865 	lock_rw_init(&k->entry.lock);
866 	k->entry.hash = h;
867 	k->entry.key = k;
868 	k->entry.data = d;
869 	d->qps[0] = 1;
870 	d->timestamp[0] = timenow;
871 	slabhash_insert(infra->domain_rates, h, &k->entry, d, NULL);
872 }
873 
874 /** create rate data item for ip address */
875 static void infra_ip_create_ratedata(struct infra_cache* infra,
876 	struct comm_reply* repinfo, time_t timenow)
877 {
878 	hashvalue_type h = hash_addr(&(repinfo->addr),
879 	repinfo->addrlen, 0);
880 	struct ip_rate_key* k = (struct ip_rate_key*)calloc(1, sizeof(*k));
881 	struct ip_rate_data* d = (struct ip_rate_data*)calloc(1, sizeof(*d));
882 	if(!k || !d) {
883 		free(k);
884 		free(d);
885 		return; /* alloc failure */
886 	}
887 	k->addr = repinfo->addr;
888 	k->addrlen = repinfo->addrlen;
889 	lock_rw_init(&k->entry.lock);
890 	k->entry.hash = h;
891 	k->entry.key = k;
892 	k->entry.data = d;
893 	d->qps[0] = 1;
894 	d->timestamp[0] = timenow;
895 	slabhash_insert(infra->client_ip_rates, h, &k->entry, d, NULL);
896 }
897 
898 /** find the second and return its rate counter, if none, remove oldest */
899 static int* infra_rate_find_second(void* data, time_t t)
900 {
901 	struct rate_data* d = (struct rate_data*)data;
902 	int i, oldest;
903 	for(i=0; i<RATE_WINDOW; i++) {
904 		if(d->timestamp[i] == t)
905 			return &(d->qps[i]);
906 	}
907 	/* remove oldest timestamp, and insert it at t with 0 qps */
908 	oldest = 0;
909 	for(i=0; i<RATE_WINDOW; i++) {
910 		if(d->timestamp[i] < d->timestamp[oldest])
911 			oldest = i;
912 	}
913 	d->timestamp[oldest] = t;
914 	d->qps[oldest] = 0;
915 	return &(d->qps[oldest]);
916 }
917 
918 int infra_rate_max(void* data, time_t now)
919 {
920 	struct rate_data* d = (struct rate_data*)data;
921 	int i, max = 0;
922 	for(i=0; i<RATE_WINDOW; i++) {
923 		if(now-d->timestamp[i] <= RATE_WINDOW) {
924 			if(d->qps[i] > max)
925 				max = d->qps[i];
926 		}
927 	}
928 	return max;
929 }
930 
931 int infra_ratelimit_inc(struct infra_cache* infra, uint8_t* name,
932 	size_t namelen, time_t timenow, struct query_info* qinfo,
933 	struct comm_reply* replylist)
934 {
935 	int lim, max;
936 	struct lruhash_entry* entry;
937 
938 	if(!infra_dp_ratelimit)
939 		return 1; /* not enabled */
940 
941 	/* find ratelimit */
942 	lim = infra_find_ratelimit(infra, name, namelen);
943 	if(!lim)
944 		return 1; /* disabled for this domain */
945 
946 	/* find or insert ratedata */
947 	entry = infra_find_ratedata(infra, name, namelen, 1);
948 	if(entry) {
949 		int premax = infra_rate_max(entry->data, timenow);
950 		int* cur = infra_rate_find_second(entry->data, timenow);
951 		(*cur)++;
952 		max = infra_rate_max(entry->data, timenow);
953 		lock_rw_unlock(&entry->lock);
954 
955 		if(premax < lim && max >= lim) {
956 			char buf[257], qnm[257], ts[12], cs[12], ip[128];
957 			dname_str(name, buf);
958 			dname_str(qinfo->qname, qnm);
959 			sldns_wire2str_type_buf(qinfo->qtype, ts, sizeof(ts));
960 			sldns_wire2str_class_buf(qinfo->qclass, cs, sizeof(cs));
961 			ip[0]=0;
962 			if(replylist) {
963 				addr_to_str((struct sockaddr_storage *)&replylist->addr,
964 					replylist->addrlen, ip, sizeof(ip));
965 				verbose(VERB_OPS, "ratelimit exceeded %s %d query %s %s %s from %s", buf, lim, qnm, cs, ts, ip);
966 			} else {
967 				verbose(VERB_OPS, "ratelimit exceeded %s %d query %s %s %s", buf, lim, qnm, cs, ts);
968 			}
969 		}
970 		return (max < lim);
971 	}
972 
973 	/* create */
974 	infra_create_ratedata(infra, name, namelen, timenow);
975 	return (1 < lim);
976 }
977 
978 void infra_ratelimit_dec(struct infra_cache* infra, uint8_t* name,
979 	size_t namelen, time_t timenow)
980 {
981 	struct lruhash_entry* entry;
982 	int* cur;
983 	if(!infra_dp_ratelimit)
984 		return; /* not enabled */
985 	entry = infra_find_ratedata(infra, name, namelen, 1);
986 	if(!entry) return; /* not cached */
987 	cur = infra_rate_find_second(entry->data, timenow);
988 	if((*cur) > 0)
989 		(*cur)--;
990 	lock_rw_unlock(&entry->lock);
991 }
992 
993 int infra_ratelimit_exceeded(struct infra_cache* infra, uint8_t* name,
994 	size_t namelen, time_t timenow)
995 {
996 	struct lruhash_entry* entry;
997 	int lim, max;
998 	if(!infra_dp_ratelimit)
999 		return 0; /* not enabled */
1000 
1001 	/* find ratelimit */
1002 	lim = infra_find_ratelimit(infra, name, namelen);
1003 	if(!lim)
1004 		return 0; /* disabled for this domain */
1005 
1006 	/* find current rate */
1007 	entry = infra_find_ratedata(infra, name, namelen, 0);
1008 	if(!entry)
1009 		return 0; /* not cached */
1010 	max = infra_rate_max(entry->data, timenow);
1011 	lock_rw_unlock(&entry->lock);
1012 
1013 	return (max >= lim);
1014 }
1015 
1016 size_t
1017 infra_get_mem(struct infra_cache* infra)
1018 {
1019 	size_t s = sizeof(*infra) + slabhash_get_mem(infra->hosts);
1020 	if(infra->domain_rates) s += slabhash_get_mem(infra->domain_rates);
1021 	if(infra->client_ip_rates) s += slabhash_get_mem(infra->client_ip_rates);
1022 	/* ignore domain_limits because walk through tree is big */
1023 	return s;
1024 }
1025 
1026 int infra_ip_ratelimit_inc(struct infra_cache* infra,
1027   struct comm_reply* repinfo, time_t timenow, struct sldns_buffer* buffer)
1028 {
1029 	int max;
1030 	struct lruhash_entry* entry;
1031 
1032 	/* not enabled */
1033 	if(!infra_ip_ratelimit) {
1034 		return 1;
1035 	}
1036 	/* find or insert ratedata */
1037 	entry = infra_find_ip_ratedata(infra, repinfo, 1);
1038 	if(entry) {
1039 		int premax = infra_rate_max(entry->data, timenow);
1040 		int* cur = infra_rate_find_second(entry->data, timenow);
1041 		(*cur)++;
1042 		max = infra_rate_max(entry->data, timenow);
1043 		lock_rw_unlock(&entry->lock);
1044 
1045 		if(premax < infra_ip_ratelimit && max >= infra_ip_ratelimit) {
1046 			char client_ip[128], qnm[LDNS_MAX_DOMAINLEN+1+12+12];
1047 			addr_to_str((struct sockaddr_storage *)&repinfo->addr,
1048 				repinfo->addrlen, client_ip, sizeof(client_ip));
1049 			qnm[0]=0;
1050 			if(sldns_buffer_limit(buffer)>LDNS_HEADER_SIZE &&
1051 				LDNS_QDCOUNT(sldns_buffer_begin(buffer))!=0) {
1052 				(void)sldns_wire2str_rrquestion_buf(
1053 					sldns_buffer_at(buffer, LDNS_HEADER_SIZE),
1054 					sldns_buffer_limit(buffer)-LDNS_HEADER_SIZE,
1055 					qnm, sizeof(qnm));
1056 				if(strlen(qnm)>0 && qnm[strlen(qnm)-1]=='\n')
1057 					qnm[strlen(qnm)-1] = 0; /*remove newline*/
1058 				if(strchr(qnm, '\t'))
1059 					*strchr(qnm, '\t') = ' ';
1060 				if(strchr(qnm, '\t'))
1061 					*strchr(qnm, '\t') = ' ';
1062 				verbose(VERB_OPS, "ip_ratelimit exceeded %s %d %s",
1063 					client_ip, infra_ip_ratelimit, qnm);
1064 			} else {
1065 				verbose(VERB_OPS, "ip_ratelimit exceeded %s %d (no query name)",
1066 					client_ip, infra_ip_ratelimit);
1067 			}
1068 		}
1069 		return (max <= infra_ip_ratelimit);
1070 	}
1071 
1072 	/* create */
1073 	infra_ip_create_ratedata(infra, repinfo, timenow);
1074 	return 1;
1075 }
1076