xref: /freebsd/contrib/unbound/services/cache/infra.c (revision a8089ea5aee578e08acab2438e82fc9a9ae50ed8)
1 /*
2  * services/cache/infra.c - infrastructure cache, server rtt and capabilities
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /**
37  * \file
38  *
39  * This file contains the infrastructure cache.
40  */
41 #include "config.h"
42 #include "sldns/rrdef.h"
43 #include "sldns/str2wire.h"
44 #include "sldns/sbuffer.h"
45 #include "sldns/wire2str.h"
46 #include "services/cache/infra.h"
47 #include "util/storage/slabhash.h"
48 #include "util/storage/lookup3.h"
49 #include "util/data/dname.h"
50 #include "util/log.h"
51 #include "util/net_help.h"
52 #include "util/config_file.h"
53 #include "iterator/iterator.h"
54 
55 /** Timeout when only a single probe query per IP is allowed. */
56 #define PROBE_MAXRTO 12000 /* in msec */
57 
58 /** number of timeouts for a type when the domain can be blocked ;
59  * even if another type has completely rtt maxed it, the different type
60  * can do this number of packets (until those all timeout too) */
61 #define TIMEOUT_COUNT_MAX 3
62 
63 /** ratelimit value for delegation point */
64 int infra_dp_ratelimit = 0;
65 
66 /** ratelimit value for client ip addresses,
67  *  in queries per second. */
68 int infra_ip_ratelimit = 0;
69 
70 /** ratelimit value for client ip addresses,
71  *  in queries per second.
72  *  For clients with a valid DNS Cookie. */
73 int infra_ip_ratelimit_cookie = 0;
74 
75 size_t
76 infra_sizefunc(void* k, void* ATTR_UNUSED(d))
77 {
78 	struct infra_key* key = (struct infra_key*)k;
79 	return sizeof(*key) + sizeof(struct infra_data) + key->namelen
80 		+ lock_get_mem(&key->entry.lock);
81 }
82 
83 int
84 infra_compfunc(void* key1, void* key2)
85 {
86 	struct infra_key* k1 = (struct infra_key*)key1;
87 	struct infra_key* k2 = (struct infra_key*)key2;
88 	int r = sockaddr_cmp(&k1->addr, k1->addrlen, &k2->addr, k2->addrlen);
89 	if(r != 0)
90 		return r;
91 	if(k1->namelen != k2->namelen) {
92 		if(k1->namelen < k2->namelen)
93 			return -1;
94 		return 1;
95 	}
96 	return query_dname_compare(k1->zonename, k2->zonename);
97 }
98 
99 void
100 infra_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
101 {
102 	struct infra_key* key = (struct infra_key*)k;
103 	if(!key)
104 		return;
105 	lock_rw_destroy(&key->entry.lock);
106 	free(key->zonename);
107 	free(key);
108 }
109 
110 void
111 infra_deldatafunc(void* d, void* ATTR_UNUSED(arg))
112 {
113 	struct infra_data* data = (struct infra_data*)d;
114 	free(data);
115 }
116 
117 size_t
118 rate_sizefunc(void* k, void* ATTR_UNUSED(d))
119 {
120 	struct rate_key* key = (struct rate_key*)k;
121 	return sizeof(*key) + sizeof(struct rate_data) + key->namelen
122 		+ lock_get_mem(&key->entry.lock);
123 }
124 
125 int
126 rate_compfunc(void* key1, void* key2)
127 {
128 	struct rate_key* k1 = (struct rate_key*)key1;
129 	struct rate_key* k2 = (struct rate_key*)key2;
130 	if(k1->namelen != k2->namelen) {
131 		if(k1->namelen < k2->namelen)
132 			return -1;
133 		return 1;
134 	}
135 	return query_dname_compare(k1->name, k2->name);
136 }
137 
138 void
139 rate_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
140 {
141 	struct rate_key* key = (struct rate_key*)k;
142 	if(!key)
143 		return;
144 	lock_rw_destroy(&key->entry.lock);
145 	free(key->name);
146 	free(key);
147 }
148 
149 void
150 rate_deldatafunc(void* d, void* ATTR_UNUSED(arg))
151 {
152 	struct rate_data* data = (struct rate_data*)d;
153 	free(data);
154 }
155 
156 /** find or create element in domainlimit tree */
157 static struct domain_limit_data* domain_limit_findcreate(
158 	struct infra_cache* infra, char* name)
159 {
160 	uint8_t* nm;
161 	int labs;
162 	size_t nmlen;
163 	struct domain_limit_data* d;
164 
165 	/* parse name */
166 	nm = sldns_str2wire_dname(name, &nmlen);
167 	if(!nm) {
168 		log_err("could not parse %s", name);
169 		return NULL;
170 	}
171 	labs = dname_count_labels(nm);
172 
173 	/* can we find it? */
174 	d = (struct domain_limit_data*)name_tree_find(&infra->domain_limits,
175 		nm, nmlen, labs, LDNS_RR_CLASS_IN);
176 	if(d) {
177 		free(nm);
178 		return d;
179 	}
180 
181 	/* create it */
182 	d = (struct domain_limit_data*)calloc(1, sizeof(*d));
183 	if(!d) {
184 		free(nm);
185 		return NULL;
186 	}
187 	d->node.node.key = &d->node;
188 	d->node.name = nm;
189 	d->node.len = nmlen;
190 	d->node.labs = labs;
191 	d->node.dclass = LDNS_RR_CLASS_IN;
192 	d->lim = -1;
193 	d->below = -1;
194 	if(!name_tree_insert(&infra->domain_limits, &d->node, nm, nmlen,
195 		labs, LDNS_RR_CLASS_IN)) {
196 		log_err("duplicate element in domainlimit tree");
197 		free(nm);
198 		free(d);
199 		return NULL;
200 	}
201 	return d;
202 }
203 
204 /** insert rate limit configuration into lookup tree */
205 static int infra_ratelimit_cfg_insert(struct infra_cache* infra,
206 	struct config_file* cfg)
207 {
208 	struct config_str2list* p;
209 	struct domain_limit_data* d;
210 	for(p = cfg->ratelimit_for_domain; p; p = p->next) {
211 		d = domain_limit_findcreate(infra, p->str);
212 		if(!d)
213 			return 0;
214 		d->lim = atoi(p->str2);
215 	}
216 	for(p = cfg->ratelimit_below_domain; p; p = p->next) {
217 		d = domain_limit_findcreate(infra, p->str);
218 		if(!d)
219 			return 0;
220 		d->below = atoi(p->str2);
221 	}
222 	return 1;
223 }
224 
225 /** setup domain limits tree (0 on failure) */
226 static int
227 setup_domain_limits(struct infra_cache* infra, struct config_file* cfg)
228 {
229 	name_tree_init(&infra->domain_limits);
230 	if(!infra_ratelimit_cfg_insert(infra, cfg)) {
231 		return 0;
232 	}
233 	name_tree_init_parents(&infra->domain_limits);
234 	return 1;
235 }
236 
237 struct infra_cache*
238 infra_create(struct config_file* cfg)
239 {
240 	struct infra_cache* infra = (struct infra_cache*)calloc(1,
241 		sizeof(struct infra_cache));
242 	size_t maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+
243 		sizeof(struct infra_data)+INFRA_BYTES_NAME);
244 	if(!infra) {
245 		return NULL;
246 	}
247 	infra->hosts = slabhash_create(cfg->infra_cache_slabs,
248 		INFRA_HOST_STARTSIZE, maxmem, &infra_sizefunc, &infra_compfunc,
249 		&infra_delkeyfunc, &infra_deldatafunc, NULL);
250 	if(!infra->hosts) {
251 		free(infra);
252 		return NULL;
253 	}
254 	infra->host_ttl = cfg->host_ttl;
255 	infra->infra_keep_probing = cfg->infra_keep_probing;
256 	infra_dp_ratelimit = cfg->ratelimit;
257 	infra->domain_rates = slabhash_create(cfg->ratelimit_slabs,
258 		INFRA_HOST_STARTSIZE, cfg->ratelimit_size,
259 		&rate_sizefunc, &rate_compfunc, &rate_delkeyfunc,
260 		&rate_deldatafunc, NULL);
261 	if(!infra->domain_rates) {
262 		infra_delete(infra);
263 		return NULL;
264 	}
265 	/* insert config data into ratelimits */
266 	if(!setup_domain_limits(infra, cfg)) {
267 		infra_delete(infra);
268 		return NULL;
269 	}
270 	infra_ip_ratelimit = cfg->ip_ratelimit;
271 	infra->client_ip_rates = slabhash_create(cfg->ip_ratelimit_slabs,
272 	    INFRA_HOST_STARTSIZE, cfg->ip_ratelimit_size, &ip_rate_sizefunc,
273 	    &ip_rate_compfunc, &ip_rate_delkeyfunc, &ip_rate_deldatafunc, NULL);
274 	if(!infra->client_ip_rates) {
275 		infra_delete(infra);
276 		return NULL;
277 	}
278 	return infra;
279 }
280 
281 /** delete domain_limit entries */
282 static void domain_limit_free(rbnode_type* n, void* ATTR_UNUSED(arg))
283 {
284 	if(n) {
285 		free(((struct domain_limit_data*)n)->node.name);
286 		free(n);
287 	}
288 }
289 
290 void
291 infra_delete(struct infra_cache* infra)
292 {
293 	if(!infra)
294 		return;
295 	slabhash_delete(infra->hosts);
296 	slabhash_delete(infra->domain_rates);
297 	traverse_postorder(&infra->domain_limits, domain_limit_free, NULL);
298 	slabhash_delete(infra->client_ip_rates);
299 	free(infra);
300 }
301 
302 struct infra_cache*
303 infra_adjust(struct infra_cache* infra, struct config_file* cfg)
304 {
305 	size_t maxmem;
306 	if(!infra)
307 		return infra_create(cfg);
308 	infra->host_ttl = cfg->host_ttl;
309 	infra->infra_keep_probing = cfg->infra_keep_probing;
310 	infra_dp_ratelimit = cfg->ratelimit;
311 	infra_ip_ratelimit = cfg->ip_ratelimit;
312 	maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+
313 		sizeof(struct infra_data)+INFRA_BYTES_NAME);
314 	/* divide cachesize by slabs and multiply by slabs, because if the
315 	 * cachesize is not an even multiple of slabs, that is the resulting
316 	 * size of the slabhash */
317 	if(!slabhash_is_size(infra->hosts, maxmem, cfg->infra_cache_slabs) ||
318 	   !slabhash_is_size(infra->domain_rates, cfg->ratelimit_size,
319 	   	cfg->ratelimit_slabs) ||
320 	   !slabhash_is_size(infra->client_ip_rates, cfg->ip_ratelimit_size,
321 	   	cfg->ip_ratelimit_slabs)) {
322 		infra_delete(infra);
323 		infra = infra_create(cfg);
324 	} else {
325 		/* reapply domain limits */
326 		traverse_postorder(&infra->domain_limits, domain_limit_free,
327 			NULL);
328 		if(!setup_domain_limits(infra, cfg)) {
329 			infra_delete(infra);
330 			return NULL;
331 		}
332 	}
333 	return infra;
334 }
335 
336 /** calculate the hash value for a host key
337  *  set use_port to a non-0 number to use the port in
338  *  the hash calculation; 0 to ignore the port.*/
339 static hashvalue_type
340 hash_addr(struct sockaddr_storage* addr, socklen_t addrlen,
341   int use_port)
342 {
343 	hashvalue_type h = 0xab;
344 	/* select the pieces to hash, some OS have changing data inside */
345 	if(addr_is_ip6(addr, addrlen)) {
346 		struct sockaddr_in6* in6 = (struct sockaddr_in6*)addr;
347 		h = hashlittle(&in6->sin6_family, sizeof(in6->sin6_family), h);
348 		if(use_port){
349 			h = hashlittle(&in6->sin6_port, sizeof(in6->sin6_port), h);
350 		}
351 		h = hashlittle(&in6->sin6_addr, INET6_SIZE, h);
352 	} else {
353 		struct sockaddr_in* in = (struct sockaddr_in*)addr;
354 		h = hashlittle(&in->sin_family, sizeof(in->sin_family), h);
355 		if(use_port){
356 			h = hashlittle(&in->sin_port, sizeof(in->sin_port), h);
357 		}
358 		h = hashlittle(&in->sin_addr, INET_SIZE, h);
359 	}
360 	return h;
361 }
362 
363 /** calculate infra hash for a key */
364 static hashvalue_type
365 hash_infra(struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name)
366 {
367 	return dname_query_hash(name, hash_addr(addr, addrlen, 1));
368 }
369 
370 /** lookup version that does not check host ttl (you check it) */
371 struct lruhash_entry*
372 infra_lookup_nottl(struct infra_cache* infra, struct sockaddr_storage* addr,
373 	socklen_t addrlen, uint8_t* name, size_t namelen, int wr)
374 {
375 	struct infra_key k;
376 	k.addrlen = addrlen;
377 	memcpy(&k.addr, addr, addrlen);
378 	k.namelen = namelen;
379 	k.zonename = name;
380 	k.entry.hash = hash_infra(addr, addrlen, name);
381 	k.entry.key = (void*)&k;
382 	k.entry.data = NULL;
383 	return slabhash_lookup(infra->hosts, k.entry.hash, &k, wr);
384 }
385 
386 /** init the data elements */
387 static void
388 data_entry_init(struct infra_cache* infra, struct lruhash_entry* e,
389 	time_t timenow)
390 {
391 	struct infra_data* data = (struct infra_data*)e->data;
392 	data->ttl = timenow + infra->host_ttl;
393 	rtt_init(&data->rtt);
394 	data->edns_version = 0;
395 	data->edns_lame_known = 0;
396 	data->probedelay = 0;
397 	data->isdnsseclame = 0;
398 	data->rec_lame = 0;
399 	data->lame_type_A = 0;
400 	data->lame_other = 0;
401 	data->timeout_A = 0;
402 	data->timeout_AAAA = 0;
403 	data->timeout_other = 0;
404 }
405 
406 /**
407  * Create and init a new entry for a host
408  * @param infra: infra structure with config parameters.
409  * @param addr: host address.
410  * @param addrlen: length of addr.
411  * @param name: name of zone
412  * @param namelen: length of name.
413  * @param tm: time now.
414  * @return: the new entry or NULL on malloc failure.
415  */
416 static struct lruhash_entry*
417 new_entry(struct infra_cache* infra, struct sockaddr_storage* addr,
418 	socklen_t addrlen, uint8_t* name, size_t namelen, time_t tm)
419 {
420 	struct infra_data* data;
421 	struct infra_key* key = (struct infra_key*)malloc(sizeof(*key));
422 	if(!key)
423 		return NULL;
424 	data = (struct infra_data*)malloc(sizeof(struct infra_data));
425 	if(!data) {
426 		free(key);
427 		return NULL;
428 	}
429 	key->zonename = memdup(name, namelen);
430 	if(!key->zonename) {
431 		free(key);
432 		free(data);
433 		return NULL;
434 	}
435 	key->namelen = namelen;
436 	lock_rw_init(&key->entry.lock);
437 	key->entry.hash = hash_infra(addr, addrlen, name);
438 	key->entry.key = (void*)key;
439 	key->entry.data = (void*)data;
440 	key->addrlen = addrlen;
441 	memcpy(&key->addr, addr, addrlen);
442 	data_entry_init(infra, &key->entry, tm);
443 	return &key->entry;
444 }
445 
446 int
447 infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
448         socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow,
449 	int* edns_vs, uint8_t* edns_lame_known, int* to)
450 {
451 	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
452 		nm, nmlen, 0);
453 	struct infra_data* data;
454 	int wr = 0;
455 	if(e && ((struct infra_data*)e->data)->ttl < timenow) {
456 		/* it expired, try to reuse existing entry */
457 		int old = ((struct infra_data*)e->data)->rtt.rto;
458 		time_t tprobe = ((struct infra_data*)e->data)->probedelay;
459 		uint8_t tA = ((struct infra_data*)e->data)->timeout_A;
460 		uint8_t tAAAA = ((struct infra_data*)e->data)->timeout_AAAA;
461 		uint8_t tother = ((struct infra_data*)e->data)->timeout_other;
462 		lock_rw_unlock(&e->lock);
463 		e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
464 		if(e) {
465 			/* if its still there we have a writelock, init */
466 			/* re-initialise */
467 			/* do not touch lameness, it may be valid still */
468 			data_entry_init(infra, e, timenow);
469 			wr = 1;
470 			/* TOP_TIMEOUT remains on reuse */
471 			if(old >= USEFUL_SERVER_TOP_TIMEOUT) {
472 				((struct infra_data*)e->data)->rtt.rto
473 					= USEFUL_SERVER_TOP_TIMEOUT;
474 				((struct infra_data*)e->data)->probedelay = tprobe;
475 				((struct infra_data*)e->data)->timeout_A = tA;
476 				((struct infra_data*)e->data)->timeout_AAAA = tAAAA;
477 				((struct infra_data*)e->data)->timeout_other = tother;
478 			}
479 		}
480 	}
481 	if(!e) {
482 		/* insert new entry */
483 		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
484 			return 0;
485 		data = (struct infra_data*)e->data;
486 		*edns_vs = data->edns_version;
487 		*edns_lame_known = data->edns_lame_known;
488 		*to = rtt_timeout(&data->rtt);
489 		slabhash_insert(infra->hosts, e->hash, e, data, NULL);
490 		return 1;
491 	}
492 	/* use existing entry */
493 	data = (struct infra_data*)e->data;
494 	*edns_vs = data->edns_version;
495 	*edns_lame_known = data->edns_lame_known;
496 	*to = rtt_timeout(&data->rtt);
497 	if(*to >= PROBE_MAXRTO && (infra->infra_keep_probing ||
498 		rtt_notimeout(&data->rtt)*4 <= *to)) {
499 		/* delay other queries, this is the probe query */
500 		if(!wr) {
501 			lock_rw_unlock(&e->lock);
502 			e = infra_lookup_nottl(infra, addr,addrlen,nm,nmlen, 1);
503 			if(!e) { /* flushed from cache real fast, no use to
504 				allocate just for the probedelay */
505 				return 1;
506 			}
507 			data = (struct infra_data*)e->data;
508 		}
509 		/* add 999 to round up the timeout value from msec to sec,
510 		 * then add a whole second so it is certain that this probe
511 		 * has timed out before the next is allowed */
512 		data->probedelay = timenow + ((*to)+1999)/1000;
513 	}
514 	lock_rw_unlock(&e->lock);
515 	return 1;
516 }
517 
518 int
519 infra_set_lame(struct infra_cache* infra, struct sockaddr_storage* addr,
520 	socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow,
521 	int dnsseclame, int reclame, uint16_t qtype)
522 {
523 	struct infra_data* data;
524 	struct lruhash_entry* e;
525 	int needtoinsert = 0;
526 	e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
527 	if(!e) {
528 		/* insert it */
529 		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) {
530 			log_err("set_lame: malloc failure");
531 			return 0;
532 		}
533 		needtoinsert = 1;
534 	} else if( ((struct infra_data*)e->data)->ttl < timenow) {
535 		/* expired, reuse existing entry */
536 		data_entry_init(infra, e, timenow);
537 	}
538 	/* got an entry, now set the zone lame */
539 	data = (struct infra_data*)e->data;
540 	/* merge data (if any) */
541 	if(dnsseclame)
542 		data->isdnsseclame = 1;
543 	if(reclame)
544 		data->rec_lame = 1;
545 	if(!dnsseclame && !reclame && qtype == LDNS_RR_TYPE_A)
546 		data->lame_type_A = 1;
547 	if(!dnsseclame  && !reclame && qtype != LDNS_RR_TYPE_A)
548 		data->lame_other = 1;
549 	/* done */
550 	if(needtoinsert)
551 		slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
552 	else 	{ lock_rw_unlock(&e->lock); }
553 	return 1;
554 }
555 
556 void
557 infra_update_tcp_works(struct infra_cache* infra,
558         struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm,
559 	size_t nmlen)
560 {
561 	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
562 		nm, nmlen, 1);
563 	struct infra_data* data;
564 	if(!e)
565 		return; /* doesn't exist */
566 	data = (struct infra_data*)e->data;
567 	if(data->rtt.rto >= RTT_MAX_TIMEOUT)
568 		/* do not disqualify this server altogether, it is better
569 		 * than nothing */
570 		data->rtt.rto = RTT_MAX_TIMEOUT-1000;
571 	lock_rw_unlock(&e->lock);
572 }
573 
574 int
575 infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr,
576 	socklen_t addrlen, uint8_t* nm, size_t nmlen, int qtype,
577 	int roundtrip, int orig_rtt, time_t timenow)
578 {
579 	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
580 		nm, nmlen, 1);
581 	struct infra_data* data;
582 	int needtoinsert = 0, expired = 0;
583 	int rto = 1;
584 	time_t oldprobedelay = 0;
585 	if(!e) {
586 		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
587 			return 0;
588 		needtoinsert = 1;
589 	} else if(((struct infra_data*)e->data)->ttl < timenow) {
590 		oldprobedelay = ((struct infra_data*)e->data)->probedelay;
591 		data_entry_init(infra, e, timenow);
592 		expired = 1;
593 	}
594 	/* have an entry, update the rtt */
595 	data = (struct infra_data*)e->data;
596 	if(roundtrip == -1) {
597 		if(needtoinsert || expired) {
598 			/* timeout on entry that has expired before the timer
599 			 * keep old timeout from the function caller */
600 			data->rtt.rto = orig_rtt;
601 			data->probedelay = oldprobedelay;
602 		}
603 		rtt_lost(&data->rtt, orig_rtt);
604 		if(qtype == LDNS_RR_TYPE_A) {
605 			if(data->timeout_A < TIMEOUT_COUNT_MAX)
606 				data->timeout_A++;
607 		} else if(qtype == LDNS_RR_TYPE_AAAA) {
608 			if(data->timeout_AAAA < TIMEOUT_COUNT_MAX)
609 				data->timeout_AAAA++;
610 		} else {
611 			if(data->timeout_other < TIMEOUT_COUNT_MAX)
612 				data->timeout_other++;
613 		}
614 	} else {
615 		/* if we got a reply, but the old timeout was above server
616 		 * selection height, delete the timeout so the server is
617 		 * fully available again */
618 		if(rtt_unclamped(&data->rtt) >= USEFUL_SERVER_TOP_TIMEOUT)
619 			rtt_init(&data->rtt);
620 		rtt_update(&data->rtt, roundtrip);
621 		data->probedelay = 0;
622 		if(qtype == LDNS_RR_TYPE_A)
623 			data->timeout_A = 0;
624 		else if(qtype == LDNS_RR_TYPE_AAAA)
625 			data->timeout_AAAA = 0;
626 		else	data->timeout_other = 0;
627 	}
628 	if(data->rtt.rto > 0)
629 		rto = data->rtt.rto;
630 
631 	if(needtoinsert)
632 		slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
633 	else 	{ lock_rw_unlock(&e->lock); }
634 	return rto;
635 }
636 
637 long long infra_get_host_rto(struct infra_cache* infra,
638         struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm,
639 	size_t nmlen, struct rtt_info* rtt, int* delay, time_t timenow,
640 	int* tA, int* tAAAA, int* tother)
641 {
642 	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
643 		nm, nmlen, 0);
644 	struct infra_data* data;
645 	long long ttl = -2;
646 	if(!e) return -1;
647 	data = (struct infra_data*)e->data;
648 	if(data->ttl >= timenow) {
649 		ttl = (long long)(data->ttl - timenow);
650 		memmove(rtt, &data->rtt, sizeof(*rtt));
651 		if(timenow < data->probedelay)
652 			*delay = (int)(data->probedelay - timenow);
653 		else	*delay = 0;
654 	}
655 	*tA = (int)data->timeout_A;
656 	*tAAAA = (int)data->timeout_AAAA;
657 	*tother = (int)data->timeout_other;
658 	lock_rw_unlock(&e->lock);
659 	return ttl;
660 }
661 
662 int
663 infra_edns_update(struct infra_cache* infra, struct sockaddr_storage* addr,
664 	socklen_t addrlen, uint8_t* nm, size_t nmlen, int edns_version,
665 	time_t timenow)
666 {
667 	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
668 		nm, nmlen, 1);
669 	struct infra_data* data;
670 	int needtoinsert = 0;
671 	if(!e) {
672 		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
673 			return 0;
674 		needtoinsert = 1;
675 	} else if(((struct infra_data*)e->data)->ttl < timenow) {
676 		data_entry_init(infra, e, timenow);
677 	}
678 	/* have an entry, update the rtt, and the ttl */
679 	data = (struct infra_data*)e->data;
680 	/* do not update if noEDNS and stored is yesEDNS */
681 	if(!(edns_version == -1 && (data->edns_version != -1 &&
682 		data->edns_lame_known))) {
683 		data->edns_version = edns_version;
684 		data->edns_lame_known = 1;
685 	}
686 
687 	if(needtoinsert)
688 		slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
689 	else 	{ lock_rw_unlock(&e->lock); }
690 	return 1;
691 }
692 
693 int
694 infra_get_lame_rtt(struct infra_cache* infra,
695         struct sockaddr_storage* addr, socklen_t addrlen,
696         uint8_t* name, size_t namelen, uint16_t qtype,
697 	int* lame, int* dnsseclame, int* reclame, int* rtt, time_t timenow)
698 {
699 	struct infra_data* host;
700 	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
701 		name, namelen, 0);
702 	if(!e)
703 		return 0;
704 	host = (struct infra_data*)e->data;
705 	*rtt = rtt_unclamped(&host->rtt);
706 	if(host->rtt.rto >= PROBE_MAXRTO && timenow >= host->probedelay
707 		&& infra->infra_keep_probing) {
708 		/* single probe, keep probing */
709 		if(*rtt >= USEFUL_SERVER_TOP_TIMEOUT)
710 			*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
711 	} else if(host->rtt.rto >= PROBE_MAXRTO && timenow < host->probedelay
712 		&& rtt_notimeout(&host->rtt)*4 <= host->rtt.rto) {
713 		/* single probe for this domain, and we are not probing */
714 		/* unless the query type allows a probe to happen */
715 		if(qtype == LDNS_RR_TYPE_A) {
716 			if(host->timeout_A >= TIMEOUT_COUNT_MAX)
717 				*rtt = USEFUL_SERVER_TOP_TIMEOUT;
718 			else	*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
719 		} else if(qtype == LDNS_RR_TYPE_AAAA) {
720 			if(host->timeout_AAAA >= TIMEOUT_COUNT_MAX)
721 				*rtt = USEFUL_SERVER_TOP_TIMEOUT;
722 			else	*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
723 		} else {
724 			if(host->timeout_other >= TIMEOUT_COUNT_MAX)
725 				*rtt = USEFUL_SERVER_TOP_TIMEOUT;
726 			else	*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
727 		}
728 	}
729 	/* expired entry */
730 	if(timenow > host->ttl) {
731 
732 		/* see if this can be a re-probe of an unresponsive server */
733 		/* minus 1000 because that is outside of the RTTBAND, so
734 		 * blacklisted servers stay blacklisted if this is chosen */
735 		if(host->rtt.rto >= USEFUL_SERVER_TOP_TIMEOUT) {
736 			lock_rw_unlock(&e->lock);
737 			*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
738 			*lame = 0;
739 			*dnsseclame = 0;
740 			*reclame = 0;
741 			return 1;
742 		}
743 		lock_rw_unlock(&e->lock);
744 		return 0;
745 	}
746 	/* check lameness first */
747 	if(host->lame_type_A && qtype == LDNS_RR_TYPE_A) {
748 		lock_rw_unlock(&e->lock);
749 		*lame = 1;
750 		*dnsseclame = 0;
751 		*reclame = 0;
752 		return 1;
753 	} else if(host->lame_other && qtype != LDNS_RR_TYPE_A) {
754 		lock_rw_unlock(&e->lock);
755 		*lame = 1;
756 		*dnsseclame = 0;
757 		*reclame = 0;
758 		return 1;
759 	} else if(host->isdnsseclame) {
760 		lock_rw_unlock(&e->lock);
761 		*lame = 0;
762 		*dnsseclame = 1;
763 		*reclame = 0;
764 		return 1;
765 	} else if(host->rec_lame) {
766 		lock_rw_unlock(&e->lock);
767 		*lame = 0;
768 		*dnsseclame = 0;
769 		*reclame = 1;
770 		return 1;
771 	}
772 	/* no lameness for this type of query */
773 	lock_rw_unlock(&e->lock);
774 	*lame = 0;
775 	*dnsseclame = 0;
776 	*reclame = 0;
777 	return 1;
778 }
779 
780 int infra_find_ratelimit(struct infra_cache* infra, uint8_t* name,
781 	size_t namelen)
782 {
783 	int labs = dname_count_labels(name);
784 	struct domain_limit_data* d = (struct domain_limit_data*)
785 		name_tree_lookup(&infra->domain_limits, name, namelen, labs,
786 		LDNS_RR_CLASS_IN);
787 	if(!d) return infra_dp_ratelimit;
788 
789 	if(d->node.labs == labs && d->lim != -1)
790 		return d->lim; /* exact match */
791 
792 	/* find 'below match' */
793 	if(d->node.labs == labs)
794 		d = (struct domain_limit_data*)d->node.parent;
795 	while(d) {
796 		if(d->below != -1)
797 			return d->below;
798 		d = (struct domain_limit_data*)d->node.parent;
799 	}
800 	return infra_dp_ratelimit;
801 }
802 
803 size_t ip_rate_sizefunc(void* k, void* ATTR_UNUSED(d))
804 {
805 	struct ip_rate_key* key = (struct ip_rate_key*)k;
806 	return sizeof(*key) + sizeof(struct ip_rate_data)
807 		+ lock_get_mem(&key->entry.lock);
808 }
809 
810 int ip_rate_compfunc(void* key1, void* key2)
811 {
812 	struct ip_rate_key* k1 = (struct ip_rate_key*)key1;
813 	struct ip_rate_key* k2 = (struct ip_rate_key*)key2;
814 	return sockaddr_cmp_addr(&k1->addr, k1->addrlen,
815 		&k2->addr, k2->addrlen);
816 }
817 
818 void ip_rate_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
819 {
820 	struct ip_rate_key* key = (struct ip_rate_key*)k;
821 	if(!key)
822 		return;
823 	lock_rw_destroy(&key->entry.lock);
824 	free(key);
825 }
826 
827 /** find data item in array, for write access, caller unlocks */
828 static struct lruhash_entry* infra_find_ratedata(struct infra_cache* infra,
829 	uint8_t* name, size_t namelen, int wr)
830 {
831 	struct rate_key key;
832 	hashvalue_type h = dname_query_hash(name, 0xab);
833 	memset(&key, 0, sizeof(key));
834 	key.name = name;
835 	key.namelen = namelen;
836 	key.entry.hash = h;
837 	return slabhash_lookup(infra->domain_rates, h, &key, wr);
838 }
839 
840 /** find data item in array for ip addresses */
841 static struct lruhash_entry* infra_find_ip_ratedata(struct infra_cache* infra,
842 	struct sockaddr_storage* addr, socklen_t addrlen, int wr)
843 {
844 	struct ip_rate_key key;
845 	hashvalue_type h = hash_addr(addr, addrlen, 0);
846 	memset(&key, 0, sizeof(key));
847 	key.addr = *addr;
848 	key.addrlen = addrlen;
849 	key.entry.hash = h;
850 	return slabhash_lookup(infra->client_ip_rates, h, &key, wr);
851 }
852 
853 /** create rate data item for name, number 1 in now */
854 static void infra_create_ratedata(struct infra_cache* infra,
855 	uint8_t* name, size_t namelen, time_t timenow)
856 {
857 	hashvalue_type h = dname_query_hash(name, 0xab);
858 	struct rate_key* k = (struct rate_key*)calloc(1, sizeof(*k));
859 	struct rate_data* d = (struct rate_data*)calloc(1, sizeof(*d));
860 	if(!k || !d) {
861 		free(k);
862 		free(d);
863 		return; /* alloc failure */
864 	}
865 	k->namelen = namelen;
866 	k->name = memdup(name, namelen);
867 	if(!k->name) {
868 		free(k);
869 		free(d);
870 		return; /* alloc failure */
871 	}
872 	lock_rw_init(&k->entry.lock);
873 	k->entry.hash = h;
874 	k->entry.key = k;
875 	k->entry.data = d;
876 	d->qps[0] = 1;
877 	d->timestamp[0] = timenow;
878 	slabhash_insert(infra->domain_rates, h, &k->entry, d, NULL);
879 }
880 
881 /** create rate data item for ip address */
882 static void infra_ip_create_ratedata(struct infra_cache* infra,
883 	struct sockaddr_storage* addr, socklen_t addrlen, time_t timenow)
884 {
885 	hashvalue_type h = hash_addr(addr, addrlen, 0);
886 	struct ip_rate_key* k = (struct ip_rate_key*)calloc(1, sizeof(*k));
887 	struct ip_rate_data* d = (struct ip_rate_data*)calloc(1, sizeof(*d));
888 	if(!k || !d) {
889 		free(k);
890 		free(d);
891 		return; /* alloc failure */
892 	}
893 	k->addr = *addr;
894 	k->addrlen = addrlen;
895 	lock_rw_init(&k->entry.lock);
896 	k->entry.hash = h;
897 	k->entry.key = k;
898 	k->entry.data = d;
899 	d->qps[0] = 1;
900 	d->timestamp[0] = timenow;
901 	slabhash_insert(infra->client_ip_rates, h, &k->entry, d, NULL);
902 }
903 
904 /** Find the second and return its rate counter. If none and should_add, remove
905  *  oldest to accommodate. Else return none. */
906 static int* infra_rate_find_second_or_none(void* data, time_t t, int should_add)
907 {
908 	struct rate_data* d = (struct rate_data*)data;
909 	int i, oldest;
910 	for(i=0; i<RATE_WINDOW; i++) {
911 		if(d->timestamp[i] == t)
912 			return &(d->qps[i]);
913 	}
914 	if(!should_add) return NULL;
915 	/* remove oldest timestamp, and insert it at t with 0 qps */
916 	oldest = 0;
917 	for(i=0; i<RATE_WINDOW; i++) {
918 		if(d->timestamp[i] < d->timestamp[oldest])
919 			oldest = i;
920 	}
921 	d->timestamp[oldest] = t;
922 	d->qps[oldest] = 0;
923 	return &(d->qps[oldest]);
924 }
925 
926 /** find the second and return its rate counter, if none, remove oldest to
927  *  accommodate */
928 static int* infra_rate_give_second(void* data, time_t t)
929 {
930     return infra_rate_find_second_or_none(data, t, 1);
931 }
932 
933 /** find the second and return its rate counter only if it exists. Caller
934  *  should check for NULL return value */
935 static int* infra_rate_get_second(void* data, time_t t)
936 {
937     return infra_rate_find_second_or_none(data, t, 0);
938 }
939 
940 int infra_rate_max(void* data, time_t now, int backoff)
941 {
942 	struct rate_data* d = (struct rate_data*)data;
943 	int i, max = 0;
944 	for(i=0; i<RATE_WINDOW; i++) {
945 		if(backoff) {
946 			if(now-d->timestamp[i] <= RATE_WINDOW &&
947 				d->qps[i] > max) {
948 				max = d->qps[i];
949 			}
950 		} else {
951 			if(now == d->timestamp[i]) {
952 				return d->qps[i];
953 			}
954 		}
955 	}
956 	return max;
957 }
958 
959 int infra_ratelimit_inc(struct infra_cache* infra, uint8_t* name,
960 	size_t namelen, time_t timenow, int backoff, struct query_info* qinfo,
961 	struct comm_reply* replylist)
962 {
963 	int lim, max;
964 	struct lruhash_entry* entry;
965 
966 	if(!infra_dp_ratelimit)
967 		return 1; /* not enabled */
968 
969 	/* find ratelimit */
970 	lim = infra_find_ratelimit(infra, name, namelen);
971 	if(!lim)
972 		return 1; /* disabled for this domain */
973 
974 	/* find or insert ratedata */
975 	entry = infra_find_ratedata(infra, name, namelen, 1);
976 	if(entry) {
977 		int premax = infra_rate_max(entry->data, timenow, backoff);
978 		int* cur = infra_rate_give_second(entry->data, timenow);
979 		(*cur)++;
980 		max = infra_rate_max(entry->data, timenow, backoff);
981 		lock_rw_unlock(&entry->lock);
982 
983 		if(premax <= lim && max > lim) {
984 			char buf[257], qnm[257], ts[12], cs[12], ip[128];
985 			dname_str(name, buf);
986 			dname_str(qinfo->qname, qnm);
987 			sldns_wire2str_type_buf(qinfo->qtype, ts, sizeof(ts));
988 			sldns_wire2str_class_buf(qinfo->qclass, cs, sizeof(cs));
989 			ip[0]=0;
990 			if(replylist) {
991 				addr_to_str((struct sockaddr_storage *)&replylist->remote_addr,
992 					replylist->remote_addrlen, ip, sizeof(ip));
993 				verbose(VERB_OPS, "ratelimit exceeded %s %d query %s %s %s from %s", buf, lim, qnm, cs, ts, ip);
994 			} else {
995 				verbose(VERB_OPS, "ratelimit exceeded %s %d query %s %s %s", buf, lim, qnm, cs, ts);
996 			}
997 		}
998 		return (max <= lim);
999 	}
1000 
1001 	/* create */
1002 	infra_create_ratedata(infra, name, namelen, timenow);
1003 	return (1 <= lim);
1004 }
1005 
1006 void infra_ratelimit_dec(struct infra_cache* infra, uint8_t* name,
1007 	size_t namelen, time_t timenow)
1008 {
1009 	struct lruhash_entry* entry;
1010 	int* cur;
1011 	if(!infra_dp_ratelimit)
1012 		return; /* not enabled */
1013 	entry = infra_find_ratedata(infra, name, namelen, 1);
1014 	if(!entry) return; /* not cached */
1015 	cur = infra_rate_get_second(entry->data, timenow);
1016 	if(cur == NULL) {
1017 		/* our timenow is not available anymore; nothing to decrease */
1018 		lock_rw_unlock(&entry->lock);
1019 		return;
1020 	}
1021 	if((*cur) > 0)
1022 		(*cur)--;
1023 	lock_rw_unlock(&entry->lock);
1024 }
1025 
1026 int infra_ratelimit_exceeded(struct infra_cache* infra, uint8_t* name,
1027 	size_t namelen, time_t timenow, int backoff)
1028 {
1029 	struct lruhash_entry* entry;
1030 	int lim, max;
1031 	if(!infra_dp_ratelimit)
1032 		return 0; /* not enabled */
1033 
1034 	/* find ratelimit */
1035 	lim = infra_find_ratelimit(infra, name, namelen);
1036 	if(!lim)
1037 		return 0; /* disabled for this domain */
1038 
1039 	/* find current rate */
1040 	entry = infra_find_ratedata(infra, name, namelen, 0);
1041 	if(!entry)
1042 		return 0; /* not cached */
1043 	max = infra_rate_max(entry->data, timenow, backoff);
1044 	lock_rw_unlock(&entry->lock);
1045 
1046 	return (max > lim);
1047 }
1048 
1049 size_t
1050 infra_get_mem(struct infra_cache* infra)
1051 {
1052 	size_t s = sizeof(*infra) + slabhash_get_mem(infra->hosts);
1053 	if(infra->domain_rates) s += slabhash_get_mem(infra->domain_rates);
1054 	if(infra->client_ip_rates) s += slabhash_get_mem(infra->client_ip_rates);
1055 	/* ignore domain_limits because walk through tree is big */
1056 	return s;
1057 }
1058 
1059 /* Returns 1 if the limit has not been exceeded, 0 otherwise. */
1060 static int
1061 check_ip_ratelimit(struct sockaddr_storage* addr, socklen_t addrlen,
1062 	struct sldns_buffer* buffer, int premax, int max, int has_cookie)
1063 {
1064 	int limit;
1065 
1066 	if(has_cookie) limit = infra_ip_ratelimit_cookie;
1067 	else           limit = infra_ip_ratelimit;
1068 
1069 	/* Disabled */
1070 	if(limit == 0) return 1;
1071 
1072 	if(premax <= limit && max > limit) {
1073 		char client_ip[128], qnm[LDNS_MAX_DOMAINLEN+1+12+12];
1074 		addr_to_str(addr, addrlen, client_ip, sizeof(client_ip));
1075 		qnm[0]=0;
1076 		if(sldns_buffer_limit(buffer)>LDNS_HEADER_SIZE &&
1077 			LDNS_QDCOUNT(sldns_buffer_begin(buffer))!=0) {
1078 			(void)sldns_wire2str_rrquestion_buf(
1079 				sldns_buffer_at(buffer, LDNS_HEADER_SIZE),
1080 				sldns_buffer_limit(buffer)-LDNS_HEADER_SIZE,
1081 				qnm, sizeof(qnm));
1082 			if(strlen(qnm)>0 && qnm[strlen(qnm)-1]=='\n')
1083 				qnm[strlen(qnm)-1] = 0; /*remove newline*/
1084 			if(strchr(qnm, '\t'))
1085 				*strchr(qnm, '\t') = ' ';
1086 			if(strchr(qnm, '\t'))
1087 				*strchr(qnm, '\t') = ' ';
1088 			verbose(VERB_OPS, "ip_ratelimit exceeded %s %d%s %s",
1089 				client_ip, limit,
1090 				has_cookie?"(cookie)":"", qnm);
1091 		} else {
1092 			verbose(VERB_OPS, "ip_ratelimit exceeded %s %d%s (no query name)",
1093 				client_ip, limit,
1094 				has_cookie?"(cookie)":"");
1095 		}
1096 	}
1097 	return (max <= limit);
1098 }
1099 
1100 int infra_ip_ratelimit_inc(struct infra_cache* infra,
1101 	struct sockaddr_storage* addr, socklen_t addrlen, time_t timenow,
1102 	int has_cookie, int backoff, struct sldns_buffer* buffer)
1103 {
1104 	int max;
1105 	struct lruhash_entry* entry;
1106 
1107 	/* not enabled */
1108 	if(!infra_ip_ratelimit) {
1109 		return 1;
1110 	}
1111 	/* find or insert ratedata */
1112 	entry = infra_find_ip_ratedata(infra, addr, addrlen, 1);
1113 	if(entry) {
1114 		int premax = infra_rate_max(entry->data, timenow, backoff);
1115 		int* cur = infra_rate_give_second(entry->data, timenow);
1116 		(*cur)++;
1117 		max = infra_rate_max(entry->data, timenow, backoff);
1118 		lock_rw_unlock(&entry->lock);
1119 		return check_ip_ratelimit(addr, addrlen, buffer, premax, max,
1120 			has_cookie);
1121 	}
1122 
1123 	/* create */
1124 	infra_ip_create_ratedata(infra, addr, addrlen, timenow);
1125 	return 1;
1126 }
1127