xref: /freebsd/contrib/unbound/services/cache/infra.c (revision fa50a3552d1e759e1bb65e54cb0b7e863bcf54d5)
1 /*
2  * services/cache/infra.c - infrastructure cache, server rtt and capabilities
3  *
4  * Copyright (c) 2007, NLnet Labs. All rights reserved.
5  *
6  * This software is open source.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * Redistributions of source code must retain the above copyright notice,
13  * this list of conditions and the following disclaimer.
14  *
15  * Redistributions in binary form must reproduce the above copyright notice,
16  * this list of conditions and the following disclaimer in the documentation
17  * and/or other materials provided with the distribution.
18  *
19  * Neither the name of the NLNET LABS nor the names of its contributors may
20  * be used to endorse or promote products derived from this software without
21  * specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
26  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
27  * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
28  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
29  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
30  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
31  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
32  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  */
35 
36 /**
37  * \file
38  *
39  * This file contains the infrastructure cache.
40  */
41 #include "config.h"
42 #include "sldns/rrdef.h"
43 #include "sldns/str2wire.h"
44 #include "services/cache/infra.h"
45 #include "util/storage/slabhash.h"
46 #include "util/storage/lookup3.h"
47 #include "util/data/dname.h"
48 #include "util/log.h"
49 #include "util/net_help.h"
50 #include "util/config_file.h"
51 #include "iterator/iterator.h"
52 
53 /** Timeout when only a single probe query per IP is allowed. */
54 #define PROBE_MAXRTO 12000 /* in msec */
55 
56 /** number of timeouts for a type when the domain can be blocked ;
57  * even if another type has completely rtt maxed it, the different type
58  * can do this number of packets (until those all timeout too) */
59 #define TIMEOUT_COUNT_MAX 3
60 
61 /** ratelimit value for delegation point */
62 int infra_dp_ratelimit = 0;
63 
64 /** ratelimit value for client ip addresses,
65  *  in queries per second. */
66 int infra_ip_ratelimit = 0;
67 
68 size_t
69 infra_sizefunc(void* k, void* ATTR_UNUSED(d))
70 {
71 	struct infra_key* key = (struct infra_key*)k;
72 	return sizeof(*key) + sizeof(struct infra_data) + key->namelen
73 		+ lock_get_mem(&key->entry.lock);
74 }
75 
76 int
77 infra_compfunc(void* key1, void* key2)
78 {
79 	struct infra_key* k1 = (struct infra_key*)key1;
80 	struct infra_key* k2 = (struct infra_key*)key2;
81 	int r = sockaddr_cmp(&k1->addr, k1->addrlen, &k2->addr, k2->addrlen);
82 	if(r != 0)
83 		return r;
84 	if(k1->namelen != k2->namelen) {
85 		if(k1->namelen < k2->namelen)
86 			return -1;
87 		return 1;
88 	}
89 	return query_dname_compare(k1->zonename, k2->zonename);
90 }
91 
92 void
93 infra_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
94 {
95 	struct infra_key* key = (struct infra_key*)k;
96 	if(!key)
97 		return;
98 	lock_rw_destroy(&key->entry.lock);
99 	free(key->zonename);
100 	free(key);
101 }
102 
103 void
104 infra_deldatafunc(void* d, void* ATTR_UNUSED(arg))
105 {
106 	struct infra_data* data = (struct infra_data*)d;
107 	free(data);
108 }
109 
110 size_t
111 rate_sizefunc(void* k, void* ATTR_UNUSED(d))
112 {
113 	struct rate_key* key = (struct rate_key*)k;
114 	return sizeof(*key) + sizeof(struct rate_data) + key->namelen
115 		+ lock_get_mem(&key->entry.lock);
116 }
117 
118 int
119 rate_compfunc(void* key1, void* key2)
120 {
121 	struct rate_key* k1 = (struct rate_key*)key1;
122 	struct rate_key* k2 = (struct rate_key*)key2;
123 	if(k1->namelen != k2->namelen) {
124 		if(k1->namelen < k2->namelen)
125 			return -1;
126 		return 1;
127 	}
128 	return query_dname_compare(k1->name, k2->name);
129 }
130 
131 void
132 rate_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
133 {
134 	struct rate_key* key = (struct rate_key*)k;
135 	if(!key)
136 		return;
137 	lock_rw_destroy(&key->entry.lock);
138 	free(key->name);
139 	free(key);
140 }
141 
142 void
143 rate_deldatafunc(void* d, void* ATTR_UNUSED(arg))
144 {
145 	struct rate_data* data = (struct rate_data*)d;
146 	free(data);
147 }
148 
149 /** find or create element in domainlimit tree */
150 static struct domain_limit_data* domain_limit_findcreate(
151 	struct infra_cache* infra, char* name)
152 {
153 	uint8_t* nm;
154 	int labs;
155 	size_t nmlen;
156 	struct domain_limit_data* d;
157 
158 	/* parse name */
159 	nm = sldns_str2wire_dname(name, &nmlen);
160 	if(!nm) {
161 		log_err("could not parse %s", name);
162 		return NULL;
163 	}
164 	labs = dname_count_labels(nm);
165 
166 	/* can we find it? */
167 	d = (struct domain_limit_data*)name_tree_find(&infra->domain_limits,
168 		nm, nmlen, labs, LDNS_RR_CLASS_IN);
169 	if(d) {
170 		free(nm);
171 		return d;
172 	}
173 
174 	/* create it */
175 	d = (struct domain_limit_data*)calloc(1, sizeof(*d));
176 	if(!d) {
177 		free(nm);
178 		return NULL;
179 	}
180 	d->node.node.key = &d->node;
181 	d->node.name = nm;
182 	d->node.len = nmlen;
183 	d->node.labs = labs;
184 	d->node.dclass = LDNS_RR_CLASS_IN;
185 	d->lim = -1;
186 	d->below = -1;
187 	if(!name_tree_insert(&infra->domain_limits, &d->node, nm, nmlen,
188 		labs, LDNS_RR_CLASS_IN)) {
189 		log_err("duplicate element in domainlimit tree");
190 		free(nm);
191 		free(d);
192 		return NULL;
193 	}
194 	return d;
195 }
196 
197 /** insert rate limit configuration into lookup tree */
198 static int infra_ratelimit_cfg_insert(struct infra_cache* infra,
199 	struct config_file* cfg)
200 {
201 	struct config_str2list* p;
202 	struct domain_limit_data* d;
203 	for(p = cfg->ratelimit_for_domain; p; p = p->next) {
204 		d = domain_limit_findcreate(infra, p->str);
205 		if(!d)
206 			return 0;
207 		d->lim = atoi(p->str2);
208 	}
209 	for(p = cfg->ratelimit_below_domain; p; p = p->next) {
210 		d = domain_limit_findcreate(infra, p->str);
211 		if(!d)
212 			return 0;
213 		d->below = atoi(p->str2);
214 	}
215 	return 1;
216 }
217 
218 /** setup domain limits tree (0 on failure) */
219 static int
220 setup_domain_limits(struct infra_cache* infra, struct config_file* cfg)
221 {
222 	name_tree_init(&infra->domain_limits);
223 	if(!infra_ratelimit_cfg_insert(infra, cfg)) {
224 		return 0;
225 	}
226 	name_tree_init_parents(&infra->domain_limits);
227 	return 1;
228 }
229 
230 struct infra_cache*
231 infra_create(struct config_file* cfg)
232 {
233 	struct infra_cache* infra = (struct infra_cache*)calloc(1,
234 		sizeof(struct infra_cache));
235 	size_t maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+
236 		sizeof(struct infra_data)+INFRA_BYTES_NAME);
237 	infra->hosts = slabhash_create(cfg->infra_cache_slabs,
238 		INFRA_HOST_STARTSIZE, maxmem, &infra_sizefunc, &infra_compfunc,
239 		&infra_delkeyfunc, &infra_deldatafunc, NULL);
240 	if(!infra->hosts) {
241 		free(infra);
242 		return NULL;
243 	}
244 	infra->host_ttl = cfg->host_ttl;
245 	infra_dp_ratelimit = cfg->ratelimit;
246 	infra->domain_rates = slabhash_create(cfg->ratelimit_slabs,
247 		INFRA_HOST_STARTSIZE, cfg->ratelimit_size,
248 		&rate_sizefunc, &rate_compfunc, &rate_delkeyfunc,
249 		&rate_deldatafunc, NULL);
250 	if(!infra->domain_rates) {
251 		infra_delete(infra);
252 		return NULL;
253 	}
254 	/* insert config data into ratelimits */
255 	if(!setup_domain_limits(infra, cfg)) {
256 		infra_delete(infra);
257 		return NULL;
258 	}
259 	infra_ip_ratelimit = cfg->ip_ratelimit;
260 	infra->client_ip_rates = slabhash_create(cfg->ip_ratelimit_slabs,
261 	    INFRA_HOST_STARTSIZE, cfg->ip_ratelimit_size, &ip_rate_sizefunc,
262 	    &ip_rate_compfunc, &ip_rate_delkeyfunc, &ip_rate_deldatafunc, NULL);
263 	if(!infra->client_ip_rates) {
264 		infra_delete(infra);
265 		return NULL;
266 	}
267 	return infra;
268 }
269 
270 /** delete domain_limit entries */
271 static void domain_limit_free(rbnode_type* n, void* ATTR_UNUSED(arg))
272 {
273 	if(n) {
274 		free(((struct domain_limit_data*)n)->node.name);
275 		free(n);
276 	}
277 }
278 
279 void
280 infra_delete(struct infra_cache* infra)
281 {
282 	if(!infra)
283 		return;
284 	slabhash_delete(infra->hosts);
285 	slabhash_delete(infra->domain_rates);
286 	traverse_postorder(&infra->domain_limits, domain_limit_free, NULL);
287 	slabhash_delete(infra->client_ip_rates);
288 	free(infra);
289 }
290 
291 struct infra_cache*
292 infra_adjust(struct infra_cache* infra, struct config_file* cfg)
293 {
294 	size_t maxmem;
295 	if(!infra)
296 		return infra_create(cfg);
297 	infra->host_ttl = cfg->host_ttl;
298 	infra_dp_ratelimit = cfg->ratelimit;
299 	infra_ip_ratelimit = cfg->ip_ratelimit;
300 	maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+
301 		sizeof(struct infra_data)+INFRA_BYTES_NAME);
302 	/* divide cachesize by slabs and multiply by slabs, because if the
303 	 * cachesize is not an even multiple of slabs, that is the resulting
304 	 * size of the slabhash */
305 	if(!slabhash_is_size(infra->hosts, maxmem, cfg->infra_cache_slabs) ||
306 	   !slabhash_is_size(infra->domain_rates, cfg->ratelimit_size,
307 	   	cfg->ratelimit_slabs) ||
308 	   !slabhash_is_size(infra->client_ip_rates, cfg->ip_ratelimit_size,
309 	   	cfg->ip_ratelimit_slabs)) {
310 		infra_delete(infra);
311 		infra = infra_create(cfg);
312 	} else {
313 		/* reapply domain limits */
314 		traverse_postorder(&infra->domain_limits, domain_limit_free,
315 			NULL);
316 		if(!setup_domain_limits(infra, cfg)) {
317 			infra_delete(infra);
318 			return NULL;
319 		}
320 	}
321 	return infra;
322 }
323 
324 /** calculate the hash value for a host key
325  *  set use_port to a non-0 number to use the port in
326  *  the hash calculation; 0 to ignore the port.*/
327 static hashvalue_type
328 hash_addr(struct sockaddr_storage* addr, socklen_t addrlen,
329   int use_port)
330 {
331 	hashvalue_type h = 0xab;
332 	/* select the pieces to hash, some OS have changing data inside */
333 	if(addr_is_ip6(addr, addrlen)) {
334 		struct sockaddr_in6* in6 = (struct sockaddr_in6*)addr;
335 		h = hashlittle(&in6->sin6_family, sizeof(in6->sin6_family), h);
336 		if(use_port){
337 			h = hashlittle(&in6->sin6_port, sizeof(in6->sin6_port), h);
338 		}
339 		h = hashlittle(&in6->sin6_addr, INET6_SIZE, h);
340 	} else {
341 		struct sockaddr_in* in = (struct sockaddr_in*)addr;
342 		h = hashlittle(&in->sin_family, sizeof(in->sin_family), h);
343 		if(use_port){
344 			h = hashlittle(&in->sin_port, sizeof(in->sin_port), h);
345 		}
346 		h = hashlittle(&in->sin_addr, INET_SIZE, h);
347 	}
348 	return h;
349 }
350 
351 /** calculate infra hash for a key */
352 static hashvalue_type
353 hash_infra(struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name)
354 {
355 	return dname_query_hash(name, hash_addr(addr, addrlen, 1));
356 }
357 
358 /** lookup version that does not check host ttl (you check it) */
359 struct lruhash_entry*
360 infra_lookup_nottl(struct infra_cache* infra, struct sockaddr_storage* addr,
361 	socklen_t addrlen, uint8_t* name, size_t namelen, int wr)
362 {
363 	struct infra_key k;
364 	k.addrlen = addrlen;
365 	memcpy(&k.addr, addr, addrlen);
366 	k.namelen = namelen;
367 	k.zonename = name;
368 	k.entry.hash = hash_infra(addr, addrlen, name);
369 	k.entry.key = (void*)&k;
370 	k.entry.data = NULL;
371 	return slabhash_lookup(infra->hosts, k.entry.hash, &k, wr);
372 }
373 
374 /** init the data elements */
375 static void
376 data_entry_init(struct infra_cache* infra, struct lruhash_entry* e,
377 	time_t timenow)
378 {
379 	struct infra_data* data = (struct infra_data*)e->data;
380 	data->ttl = timenow + infra->host_ttl;
381 	rtt_init(&data->rtt);
382 	data->edns_version = 0;
383 	data->edns_lame_known = 0;
384 	data->probedelay = 0;
385 	data->isdnsseclame = 0;
386 	data->rec_lame = 0;
387 	data->lame_type_A = 0;
388 	data->lame_other = 0;
389 	data->timeout_A = 0;
390 	data->timeout_AAAA = 0;
391 	data->timeout_other = 0;
392 }
393 
394 /**
395  * Create and init a new entry for a host
396  * @param infra: infra structure with config parameters.
397  * @param addr: host address.
398  * @param addrlen: length of addr.
399  * @param name: name of zone
400  * @param namelen: length of name.
401  * @param tm: time now.
402  * @return: the new entry or NULL on malloc failure.
403  */
404 static struct lruhash_entry*
405 new_entry(struct infra_cache* infra, struct sockaddr_storage* addr,
406 	socklen_t addrlen, uint8_t* name, size_t namelen, time_t tm)
407 {
408 	struct infra_data* data;
409 	struct infra_key* key = (struct infra_key*)malloc(sizeof(*key));
410 	if(!key)
411 		return NULL;
412 	data = (struct infra_data*)malloc(sizeof(struct infra_data));
413 	if(!data) {
414 		free(key);
415 		return NULL;
416 	}
417 	key->zonename = memdup(name, namelen);
418 	if(!key->zonename) {
419 		free(key);
420 		free(data);
421 		return NULL;
422 	}
423 	key->namelen = namelen;
424 	lock_rw_init(&key->entry.lock);
425 	key->entry.hash = hash_infra(addr, addrlen, name);
426 	key->entry.key = (void*)key;
427 	key->entry.data = (void*)data;
428 	key->addrlen = addrlen;
429 	memcpy(&key->addr, addr, addrlen);
430 	data_entry_init(infra, &key->entry, tm);
431 	return &key->entry;
432 }
433 
434 int
435 infra_host(struct infra_cache* infra, struct sockaddr_storage* addr,
436         socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow,
437 	int* edns_vs, uint8_t* edns_lame_known, int* to)
438 {
439 	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
440 		nm, nmlen, 0);
441 	struct infra_data* data;
442 	int wr = 0;
443 	if(e && ((struct infra_data*)e->data)->ttl < timenow) {
444 		/* it expired, try to reuse existing entry */
445 		int old = ((struct infra_data*)e->data)->rtt.rto;
446 		uint8_t tA = ((struct infra_data*)e->data)->timeout_A;
447 		uint8_t tAAAA = ((struct infra_data*)e->data)->timeout_AAAA;
448 		uint8_t tother = ((struct infra_data*)e->data)->timeout_other;
449 		lock_rw_unlock(&e->lock);
450 		e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
451 		if(e) {
452 			/* if its still there we have a writelock, init */
453 			/* re-initialise */
454 			/* do not touch lameness, it may be valid still */
455 			data_entry_init(infra, e, timenow);
456 			wr = 1;
457 			/* TOP_TIMEOUT remains on reuse */
458 			if(old >= USEFUL_SERVER_TOP_TIMEOUT) {
459 				((struct infra_data*)e->data)->rtt.rto
460 					= USEFUL_SERVER_TOP_TIMEOUT;
461 				((struct infra_data*)e->data)->timeout_A = tA;
462 				((struct infra_data*)e->data)->timeout_AAAA = tAAAA;
463 				((struct infra_data*)e->data)->timeout_other = tother;
464 			}
465 		}
466 	}
467 	if(!e) {
468 		/* insert new entry */
469 		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
470 			return 0;
471 		data = (struct infra_data*)e->data;
472 		*edns_vs = data->edns_version;
473 		*edns_lame_known = data->edns_lame_known;
474 		*to = rtt_timeout(&data->rtt);
475 		slabhash_insert(infra->hosts, e->hash, e, data, NULL);
476 		return 1;
477 	}
478 	/* use existing entry */
479 	data = (struct infra_data*)e->data;
480 	*edns_vs = data->edns_version;
481 	*edns_lame_known = data->edns_lame_known;
482 	*to = rtt_timeout(&data->rtt);
483 	if(*to >= PROBE_MAXRTO && rtt_notimeout(&data->rtt)*4 <= *to) {
484 		/* delay other queries, this is the probe query */
485 		if(!wr) {
486 			lock_rw_unlock(&e->lock);
487 			e = infra_lookup_nottl(infra, addr,addrlen,nm,nmlen, 1);
488 			if(!e) { /* flushed from cache real fast, no use to
489 				allocate just for the probedelay */
490 				return 1;
491 			}
492 			data = (struct infra_data*)e->data;
493 		}
494 		/* add 999 to round up the timeout value from msec to sec,
495 		 * then add a whole second so it is certain that this probe
496 		 * has timed out before the next is allowed */
497 		data->probedelay = timenow + ((*to)+1999)/1000;
498 	}
499 	lock_rw_unlock(&e->lock);
500 	return 1;
501 }
502 
503 int
504 infra_set_lame(struct infra_cache* infra, struct sockaddr_storage* addr,
505 	socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow,
506 	int dnsseclame, int reclame, uint16_t qtype)
507 {
508 	struct infra_data* data;
509 	struct lruhash_entry* e;
510 	int needtoinsert = 0;
511 	e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1);
512 	if(!e) {
513 		/* insert it */
514 		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) {
515 			log_err("set_lame: malloc failure");
516 			return 0;
517 		}
518 		needtoinsert = 1;
519 	} else if( ((struct infra_data*)e->data)->ttl < timenow) {
520 		/* expired, reuse existing entry */
521 		data_entry_init(infra, e, timenow);
522 	}
523 	/* got an entry, now set the zone lame */
524 	data = (struct infra_data*)e->data;
525 	/* merge data (if any) */
526 	if(dnsseclame)
527 		data->isdnsseclame = 1;
528 	if(reclame)
529 		data->rec_lame = 1;
530 	if(!dnsseclame && !reclame && qtype == LDNS_RR_TYPE_A)
531 		data->lame_type_A = 1;
532 	if(!dnsseclame  && !reclame && qtype != LDNS_RR_TYPE_A)
533 		data->lame_other = 1;
534 	/* done */
535 	if(needtoinsert)
536 		slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
537 	else 	{ lock_rw_unlock(&e->lock); }
538 	return 1;
539 }
540 
541 void
542 infra_update_tcp_works(struct infra_cache* infra,
543         struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm,
544 	size_t nmlen)
545 {
546 	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
547 		nm, nmlen, 1);
548 	struct infra_data* data;
549 	if(!e)
550 		return; /* doesn't exist */
551 	data = (struct infra_data*)e->data;
552 	if(data->rtt.rto >= RTT_MAX_TIMEOUT)
553 		/* do not disqualify this server altogether, it is better
554 		 * than nothing */
555 		data->rtt.rto = RTT_MAX_TIMEOUT-1000;
556 	lock_rw_unlock(&e->lock);
557 }
558 
559 int
560 infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr,
561 	socklen_t addrlen, uint8_t* nm, size_t nmlen, int qtype,
562 	int roundtrip, int orig_rtt, time_t timenow)
563 {
564 	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
565 		nm, nmlen, 1);
566 	struct infra_data* data;
567 	int needtoinsert = 0;
568 	int rto = 1;
569 	if(!e) {
570 		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
571 			return 0;
572 		needtoinsert = 1;
573 	} else if(((struct infra_data*)e->data)->ttl < timenow) {
574 		data_entry_init(infra, e, timenow);
575 	}
576 	/* have an entry, update the rtt */
577 	data = (struct infra_data*)e->data;
578 	if(roundtrip == -1) {
579 		rtt_lost(&data->rtt, orig_rtt);
580 		if(qtype == LDNS_RR_TYPE_A) {
581 			if(data->timeout_A < TIMEOUT_COUNT_MAX)
582 				data->timeout_A++;
583 		} else if(qtype == LDNS_RR_TYPE_AAAA) {
584 			if(data->timeout_AAAA < TIMEOUT_COUNT_MAX)
585 				data->timeout_AAAA++;
586 		} else {
587 			if(data->timeout_other < TIMEOUT_COUNT_MAX)
588 				data->timeout_other++;
589 		}
590 	} else {
591 		/* if we got a reply, but the old timeout was above server
592 		 * selection height, delete the timeout so the server is
593 		 * fully available again */
594 		if(rtt_unclamped(&data->rtt) >= USEFUL_SERVER_TOP_TIMEOUT)
595 			rtt_init(&data->rtt);
596 		rtt_update(&data->rtt, roundtrip);
597 		data->probedelay = 0;
598 		if(qtype == LDNS_RR_TYPE_A)
599 			data->timeout_A = 0;
600 		else if(qtype == LDNS_RR_TYPE_AAAA)
601 			data->timeout_AAAA = 0;
602 		else	data->timeout_other = 0;
603 	}
604 	if(data->rtt.rto > 0)
605 		rto = data->rtt.rto;
606 
607 	if(needtoinsert)
608 		slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
609 	else 	{ lock_rw_unlock(&e->lock); }
610 	return rto;
611 }
612 
613 long long infra_get_host_rto(struct infra_cache* infra,
614         struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm,
615 	size_t nmlen, struct rtt_info* rtt, int* delay, time_t timenow,
616 	int* tA, int* tAAAA, int* tother)
617 {
618 	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
619 		nm, nmlen, 0);
620 	struct infra_data* data;
621 	long long ttl = -2;
622 	if(!e) return -1;
623 	data = (struct infra_data*)e->data;
624 	if(data->ttl >= timenow) {
625 		ttl = (long long)(data->ttl - timenow);
626 		memmove(rtt, &data->rtt, sizeof(*rtt));
627 		if(timenow < data->probedelay)
628 			*delay = (int)(data->probedelay - timenow);
629 		else	*delay = 0;
630 	}
631 	*tA = (int)data->timeout_A;
632 	*tAAAA = (int)data->timeout_AAAA;
633 	*tother = (int)data->timeout_other;
634 	lock_rw_unlock(&e->lock);
635 	return ttl;
636 }
637 
638 int
639 infra_edns_update(struct infra_cache* infra, struct sockaddr_storage* addr,
640 	socklen_t addrlen, uint8_t* nm, size_t nmlen, int edns_version,
641 	time_t timenow)
642 {
643 	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
644 		nm, nmlen, 1);
645 	struct infra_data* data;
646 	int needtoinsert = 0;
647 	if(!e) {
648 		if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow)))
649 			return 0;
650 		needtoinsert = 1;
651 	} else if(((struct infra_data*)e->data)->ttl < timenow) {
652 		data_entry_init(infra, e, timenow);
653 	}
654 	/* have an entry, update the rtt, and the ttl */
655 	data = (struct infra_data*)e->data;
656 	/* do not update if noEDNS and stored is yesEDNS */
657 	if(!(edns_version == -1 && (data->edns_version != -1 &&
658 		data->edns_lame_known))) {
659 		data->edns_version = edns_version;
660 		data->edns_lame_known = 1;
661 	}
662 
663 	if(needtoinsert)
664 		slabhash_insert(infra->hosts, e->hash, e, e->data, NULL);
665 	else 	{ lock_rw_unlock(&e->lock); }
666 	return 1;
667 }
668 
669 int
670 infra_get_lame_rtt(struct infra_cache* infra,
671         struct sockaddr_storage* addr, socklen_t addrlen,
672         uint8_t* name, size_t namelen, uint16_t qtype,
673 	int* lame, int* dnsseclame, int* reclame, int* rtt, time_t timenow)
674 {
675 	struct infra_data* host;
676 	struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen,
677 		name, namelen, 0);
678 	if(!e)
679 		return 0;
680 	host = (struct infra_data*)e->data;
681 	*rtt = rtt_unclamped(&host->rtt);
682 	if(host->rtt.rto >= PROBE_MAXRTO && timenow < host->probedelay
683 		&& rtt_notimeout(&host->rtt)*4 <= host->rtt.rto) {
684 		/* single probe for this domain, and we are not probing */
685 		/* unless the query type allows a probe to happen */
686 		if(qtype == LDNS_RR_TYPE_A) {
687 			if(host->timeout_A >= TIMEOUT_COUNT_MAX)
688 				*rtt = USEFUL_SERVER_TOP_TIMEOUT;
689 			else	*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
690 		} else if(qtype == LDNS_RR_TYPE_AAAA) {
691 			if(host->timeout_AAAA >= TIMEOUT_COUNT_MAX)
692 				*rtt = USEFUL_SERVER_TOP_TIMEOUT;
693 			else	*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
694 		} else {
695 			if(host->timeout_other >= TIMEOUT_COUNT_MAX)
696 				*rtt = USEFUL_SERVER_TOP_TIMEOUT;
697 			else	*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
698 		}
699 	}
700 	if(timenow > host->ttl) {
701 		/* expired entry */
702 		/* see if this can be a re-probe of an unresponsive server */
703 		/* minus 1000 because that is outside of the RTTBAND, so
704 		 * blacklisted servers stay blacklisted if this is chosen */
705 		if(host->rtt.rto >= USEFUL_SERVER_TOP_TIMEOUT) {
706 			lock_rw_unlock(&e->lock);
707 			*rtt = USEFUL_SERVER_TOP_TIMEOUT-1000;
708 			*lame = 0;
709 			*dnsseclame = 0;
710 			*reclame = 0;
711 			return 1;
712 		}
713 		lock_rw_unlock(&e->lock);
714 		return 0;
715 	}
716 	/* check lameness first */
717 	if(host->lame_type_A && qtype == LDNS_RR_TYPE_A) {
718 		lock_rw_unlock(&e->lock);
719 		*lame = 1;
720 		*dnsseclame = 0;
721 		*reclame = 0;
722 		return 1;
723 	} else if(host->lame_other && qtype != LDNS_RR_TYPE_A) {
724 		lock_rw_unlock(&e->lock);
725 		*lame = 1;
726 		*dnsseclame = 0;
727 		*reclame = 0;
728 		return 1;
729 	} else if(host->isdnsseclame) {
730 		lock_rw_unlock(&e->lock);
731 		*lame = 0;
732 		*dnsseclame = 1;
733 		*reclame = 0;
734 		return 1;
735 	} else if(host->rec_lame) {
736 		lock_rw_unlock(&e->lock);
737 		*lame = 0;
738 		*dnsseclame = 0;
739 		*reclame = 1;
740 		return 1;
741 	}
742 	/* no lameness for this type of query */
743 	lock_rw_unlock(&e->lock);
744 	*lame = 0;
745 	*dnsseclame = 0;
746 	*reclame = 0;
747 	return 1;
748 }
749 
750 int infra_find_ratelimit(struct infra_cache* infra, uint8_t* name,
751 	size_t namelen)
752 {
753 	int labs = dname_count_labels(name);
754 	struct domain_limit_data* d = (struct domain_limit_data*)
755 		name_tree_lookup(&infra->domain_limits, name, namelen, labs,
756 		LDNS_RR_CLASS_IN);
757 	if(!d) return infra_dp_ratelimit;
758 
759 	if(d->node.labs == labs && d->lim != -1)
760 		return d->lim; /* exact match */
761 
762 	/* find 'below match' */
763 	if(d->node.labs == labs)
764 		d = (struct domain_limit_data*)d->node.parent;
765 	while(d) {
766 		if(d->below != -1)
767 			return d->below;
768 		d = (struct domain_limit_data*)d->node.parent;
769 	}
770 	return infra_dp_ratelimit;
771 }
772 
773 size_t ip_rate_sizefunc(void* k, void* ATTR_UNUSED(d))
774 {
775 	struct ip_rate_key* key = (struct ip_rate_key*)k;
776 	return sizeof(*key) + sizeof(struct ip_rate_data)
777 		+ lock_get_mem(&key->entry.lock);
778 }
779 
780 int ip_rate_compfunc(void* key1, void* key2)
781 {
782 	struct ip_rate_key* k1 = (struct ip_rate_key*)key1;
783 	struct ip_rate_key* k2 = (struct ip_rate_key*)key2;
784 	return sockaddr_cmp_addr(&k1->addr, k1->addrlen,
785 		&k2->addr, k2->addrlen);
786 }
787 
788 void ip_rate_delkeyfunc(void* k, void* ATTR_UNUSED(arg))
789 {
790 	struct ip_rate_key* key = (struct ip_rate_key*)k;
791 	if(!key)
792 		return;
793 	lock_rw_destroy(&key->entry.lock);
794 	free(key);
795 }
796 
797 /** find data item in array, for write access, caller unlocks */
798 static struct lruhash_entry* infra_find_ratedata(struct infra_cache* infra,
799 	uint8_t* name, size_t namelen, int wr)
800 {
801 	struct rate_key key;
802 	hashvalue_type h = dname_query_hash(name, 0xab);
803 	memset(&key, 0, sizeof(key));
804 	key.name = name;
805 	key.namelen = namelen;
806 	key.entry.hash = h;
807 	return slabhash_lookup(infra->domain_rates, h, &key, wr);
808 }
809 
810 /** find data item in array for ip addresses */
811 static struct lruhash_entry* infra_find_ip_ratedata(struct infra_cache* infra,
812 	struct comm_reply* repinfo, int wr)
813 {
814 	struct ip_rate_key key;
815 	hashvalue_type h = hash_addr(&(repinfo->addr),
816 		repinfo->addrlen, 0);
817 	memset(&key, 0, sizeof(key));
818 	key.addr = repinfo->addr;
819 	key.addrlen = repinfo->addrlen;
820 	key.entry.hash = h;
821 	return slabhash_lookup(infra->client_ip_rates, h, &key, wr);
822 }
823 
824 /** create rate data item for name, number 1 in now */
825 static void infra_create_ratedata(struct infra_cache* infra,
826 	uint8_t* name, size_t namelen, time_t timenow)
827 {
828 	hashvalue_type h = dname_query_hash(name, 0xab);
829 	struct rate_key* k = (struct rate_key*)calloc(1, sizeof(*k));
830 	struct rate_data* d = (struct rate_data*)calloc(1, sizeof(*d));
831 	if(!k || !d) {
832 		free(k);
833 		free(d);
834 		return; /* alloc failure */
835 	}
836 	k->namelen = namelen;
837 	k->name = memdup(name, namelen);
838 	if(!k->name) {
839 		free(k);
840 		free(d);
841 		return; /* alloc failure */
842 	}
843 	lock_rw_init(&k->entry.lock);
844 	k->entry.hash = h;
845 	k->entry.key = k;
846 	k->entry.data = d;
847 	d->qps[0] = 1;
848 	d->timestamp[0] = timenow;
849 	slabhash_insert(infra->domain_rates, h, &k->entry, d, NULL);
850 }
851 
852 /** create rate data item for ip address */
853 static void infra_ip_create_ratedata(struct infra_cache* infra,
854 	struct comm_reply* repinfo, time_t timenow)
855 {
856 	hashvalue_type h = hash_addr(&(repinfo->addr),
857 	repinfo->addrlen, 0);
858 	struct ip_rate_key* k = (struct ip_rate_key*)calloc(1, sizeof(*k));
859 	struct ip_rate_data* d = (struct ip_rate_data*)calloc(1, sizeof(*d));
860 	if(!k || !d) {
861 		free(k);
862 		free(d);
863 		return; /* alloc failure */
864 	}
865 	k->addr = repinfo->addr;
866 	k->addrlen = repinfo->addrlen;
867 	lock_rw_init(&k->entry.lock);
868 	k->entry.hash = h;
869 	k->entry.key = k;
870 	k->entry.data = d;
871 	d->qps[0] = 1;
872 	d->timestamp[0] = timenow;
873 	slabhash_insert(infra->client_ip_rates, h, &k->entry, d, NULL);
874 }
875 
876 /** find the second and return its rate counter, if none, remove oldest */
877 static int* infra_rate_find_second(void* data, time_t t)
878 {
879 	struct rate_data* d = (struct rate_data*)data;
880 	int i, oldest;
881 	for(i=0; i<RATE_WINDOW; i++) {
882 		if(d->timestamp[i] == t)
883 			return &(d->qps[i]);
884 	}
885 	/* remove oldest timestamp, and insert it at t with 0 qps */
886 	oldest = 0;
887 	for(i=0; i<RATE_WINDOW; i++) {
888 		if(d->timestamp[i] < d->timestamp[oldest])
889 			oldest = i;
890 	}
891 	d->timestamp[oldest] = t;
892 	d->qps[oldest] = 0;
893 	return &(d->qps[oldest]);
894 }
895 
896 int infra_rate_max(void* data, time_t now)
897 {
898 	struct rate_data* d = (struct rate_data*)data;
899 	int i, max = 0;
900 	for(i=0; i<RATE_WINDOW; i++) {
901 		if(now-d->timestamp[i] <= RATE_WINDOW) {
902 			if(d->qps[i] > max)
903 				max = d->qps[i];
904 		}
905 	}
906 	return max;
907 }
908 
909 int infra_ratelimit_inc(struct infra_cache* infra, uint8_t* name,
910 	size_t namelen, time_t timenow)
911 {
912 	int lim, max;
913 	struct lruhash_entry* entry;
914 
915 	if(!infra_dp_ratelimit)
916 		return 1; /* not enabled */
917 
918 	/* find ratelimit */
919 	lim = infra_find_ratelimit(infra, name, namelen);
920 	if(!lim)
921 		return 1; /* disabled for this domain */
922 
923 	/* find or insert ratedata */
924 	entry = infra_find_ratedata(infra, name, namelen, 1);
925 	if(entry) {
926 		int premax = infra_rate_max(entry->data, timenow);
927 		int* cur = infra_rate_find_second(entry->data, timenow);
928 		(*cur)++;
929 		max = infra_rate_max(entry->data, timenow);
930 		lock_rw_unlock(&entry->lock);
931 
932 		if(premax < lim && max >= lim) {
933 			char buf[257];
934 			dname_str(name, buf);
935 			verbose(VERB_OPS, "ratelimit exceeded %s %d", buf, lim);
936 		}
937 		return (max < lim);
938 	}
939 
940 	/* create */
941 	infra_create_ratedata(infra, name, namelen, timenow);
942 	return (1 < lim);
943 }
944 
945 void infra_ratelimit_dec(struct infra_cache* infra, uint8_t* name,
946 	size_t namelen, time_t timenow)
947 {
948 	struct lruhash_entry* entry;
949 	int* cur;
950 	if(!infra_dp_ratelimit)
951 		return; /* not enabled */
952 	entry = infra_find_ratedata(infra, name, namelen, 1);
953 	if(!entry) return; /* not cached */
954 	cur = infra_rate_find_second(entry->data, timenow);
955 	if((*cur) > 0)
956 		(*cur)--;
957 	lock_rw_unlock(&entry->lock);
958 }
959 
960 int infra_ratelimit_exceeded(struct infra_cache* infra, uint8_t* name,
961 	size_t namelen, time_t timenow)
962 {
963 	struct lruhash_entry* entry;
964 	int lim, max;
965 	if(!infra_dp_ratelimit)
966 		return 0; /* not enabled */
967 
968 	/* find ratelimit */
969 	lim = infra_find_ratelimit(infra, name, namelen);
970 	if(!lim)
971 		return 0; /* disabled for this domain */
972 
973 	/* find current rate */
974 	entry = infra_find_ratedata(infra, name, namelen, 0);
975 	if(!entry)
976 		return 0; /* not cached */
977 	max = infra_rate_max(entry->data, timenow);
978 	lock_rw_unlock(&entry->lock);
979 
980 	return (max >= lim);
981 }
982 
983 size_t
984 infra_get_mem(struct infra_cache* infra)
985 {
986 	size_t s = sizeof(*infra) + slabhash_get_mem(infra->hosts);
987 	if(infra->domain_rates) s += slabhash_get_mem(infra->domain_rates);
988 	if(infra->client_ip_rates) s += slabhash_get_mem(infra->client_ip_rates);
989 	/* ignore domain_limits because walk through tree is big */
990 	return s;
991 }
992 
993 int infra_ip_ratelimit_inc(struct infra_cache* infra,
994   struct comm_reply* repinfo, time_t timenow)
995 {
996 	int max;
997 	struct lruhash_entry* entry;
998 
999 	/* not enabled */
1000 	if(!infra_ip_ratelimit) {
1001 		return 1;
1002 	}
1003 	/* find or insert ratedata */
1004 	entry = infra_find_ip_ratedata(infra, repinfo, 1);
1005 	if(entry) {
1006 		int premax = infra_rate_max(entry->data, timenow);
1007 		int* cur = infra_rate_find_second(entry->data, timenow);
1008 		(*cur)++;
1009 		max = infra_rate_max(entry->data, timenow);
1010 		lock_rw_unlock(&entry->lock);
1011 
1012 		if(premax < infra_ip_ratelimit && max >= infra_ip_ratelimit) {
1013 			char client_ip[128];
1014 			addr_to_str((struct sockaddr_storage *)&repinfo->addr,
1015 				repinfo->addrlen, client_ip, sizeof(client_ip));
1016 			verbose(VERB_OPS, "ip_ratelimit exceeded %s %d",
1017 				client_ip, infra_ip_ratelimit);
1018 		}
1019 		return (max <= infra_ip_ratelimit);
1020 	}
1021 
1022 	/* create */
1023 	infra_ip_create_ratedata(infra, repinfo, timenow);
1024 	return 1;
1025 }
1026