1 /* 2 * services/cache/infra.c - infrastructure cache, server rtt and capabilities 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file contains the infrastructure cache. 40 */ 41 #include "config.h" 42 #include "sldns/rrdef.h" 43 #include "sldns/str2wire.h" 44 #include "sldns/sbuffer.h" 45 #include "sldns/wire2str.h" 46 #include "services/cache/infra.h" 47 #include "util/storage/slabhash.h" 48 #include "util/storage/lookup3.h" 49 #include "util/data/dname.h" 50 #include "util/log.h" 51 #include "util/net_help.h" 52 #include "util/config_file.h" 53 #include "iterator/iterator.h" 54 55 /** Timeout when only a single probe query per IP is allowed. */ 56 #define PROBE_MAXRTO 12000 /* in msec */ 57 58 /** number of timeouts for a type when the domain can be blocked ; 59 * even if another type has completely rtt maxed it, the different type 60 * can do this number of packets (until those all timeout too) */ 61 #define TIMEOUT_COUNT_MAX 3 62 63 /** ratelimit value for delegation point */ 64 int infra_dp_ratelimit = 0; 65 66 /** ratelimit value for client ip addresses, 67 * in queries per second. */ 68 int infra_ip_ratelimit = 0; 69 70 size_t 71 infra_sizefunc(void* k, void* ATTR_UNUSED(d)) 72 { 73 struct infra_key* key = (struct infra_key*)k; 74 return sizeof(*key) + sizeof(struct infra_data) + key->namelen 75 + lock_get_mem(&key->entry.lock); 76 } 77 78 int 79 infra_compfunc(void* key1, void* key2) 80 { 81 struct infra_key* k1 = (struct infra_key*)key1; 82 struct infra_key* k2 = (struct infra_key*)key2; 83 int r = sockaddr_cmp(&k1->addr, k1->addrlen, &k2->addr, k2->addrlen); 84 if(r != 0) 85 return r; 86 if(k1->namelen != k2->namelen) { 87 if(k1->namelen < k2->namelen) 88 return -1; 89 return 1; 90 } 91 return query_dname_compare(k1->zonename, k2->zonename); 92 } 93 94 void 95 infra_delkeyfunc(void* k, void* ATTR_UNUSED(arg)) 96 { 97 struct infra_key* key = (struct infra_key*)k; 98 if(!key) 99 return; 100 lock_rw_destroy(&key->entry.lock); 101 free(key->zonename); 102 free(key); 103 } 104 105 void 106 infra_deldatafunc(void* d, void* ATTR_UNUSED(arg)) 107 { 108 struct infra_data* data = (struct infra_data*)d; 109 free(data); 110 } 111 112 size_t 113 rate_sizefunc(void* k, void* ATTR_UNUSED(d)) 114 { 115 struct rate_key* key = (struct rate_key*)k; 116 return sizeof(*key) + sizeof(struct rate_data) + key->namelen 117 + lock_get_mem(&key->entry.lock); 118 } 119 120 int 121 rate_compfunc(void* key1, void* key2) 122 { 123 struct rate_key* k1 = (struct rate_key*)key1; 124 struct rate_key* k2 = (struct rate_key*)key2; 125 if(k1->namelen != k2->namelen) { 126 if(k1->namelen < k2->namelen) 127 return -1; 128 return 1; 129 } 130 return query_dname_compare(k1->name, k2->name); 131 } 132 133 void 134 rate_delkeyfunc(void* k, void* ATTR_UNUSED(arg)) 135 { 136 struct rate_key* key = (struct rate_key*)k; 137 if(!key) 138 return; 139 lock_rw_destroy(&key->entry.lock); 140 free(key->name); 141 free(key); 142 } 143 144 void 145 rate_deldatafunc(void* d, void* ATTR_UNUSED(arg)) 146 { 147 struct rate_data* data = (struct rate_data*)d; 148 free(data); 149 } 150 151 /** find or create element in domainlimit tree */ 152 static struct domain_limit_data* domain_limit_findcreate( 153 struct infra_cache* infra, char* name) 154 { 155 uint8_t* nm; 156 int labs; 157 size_t nmlen; 158 struct domain_limit_data* d; 159 160 /* parse name */ 161 nm = sldns_str2wire_dname(name, &nmlen); 162 if(!nm) { 163 log_err("could not parse %s", name); 164 return NULL; 165 } 166 labs = dname_count_labels(nm); 167 168 /* can we find it? */ 169 d = (struct domain_limit_data*)name_tree_find(&infra->domain_limits, 170 nm, nmlen, labs, LDNS_RR_CLASS_IN); 171 if(d) { 172 free(nm); 173 return d; 174 } 175 176 /* create it */ 177 d = (struct domain_limit_data*)calloc(1, sizeof(*d)); 178 if(!d) { 179 free(nm); 180 return NULL; 181 } 182 d->node.node.key = &d->node; 183 d->node.name = nm; 184 d->node.len = nmlen; 185 d->node.labs = labs; 186 d->node.dclass = LDNS_RR_CLASS_IN; 187 d->lim = -1; 188 d->below = -1; 189 if(!name_tree_insert(&infra->domain_limits, &d->node, nm, nmlen, 190 labs, LDNS_RR_CLASS_IN)) { 191 log_err("duplicate element in domainlimit tree"); 192 free(nm); 193 free(d); 194 return NULL; 195 } 196 return d; 197 } 198 199 /** insert rate limit configuration into lookup tree */ 200 static int infra_ratelimit_cfg_insert(struct infra_cache* infra, 201 struct config_file* cfg) 202 { 203 struct config_str2list* p; 204 struct domain_limit_data* d; 205 for(p = cfg->ratelimit_for_domain; p; p = p->next) { 206 d = domain_limit_findcreate(infra, p->str); 207 if(!d) 208 return 0; 209 d->lim = atoi(p->str2); 210 } 211 for(p = cfg->ratelimit_below_domain; p; p = p->next) { 212 d = domain_limit_findcreate(infra, p->str); 213 if(!d) 214 return 0; 215 d->below = atoi(p->str2); 216 } 217 return 1; 218 } 219 220 /** setup domain limits tree (0 on failure) */ 221 static int 222 setup_domain_limits(struct infra_cache* infra, struct config_file* cfg) 223 { 224 name_tree_init(&infra->domain_limits); 225 if(!infra_ratelimit_cfg_insert(infra, cfg)) { 226 return 0; 227 } 228 name_tree_init_parents(&infra->domain_limits); 229 return 1; 230 } 231 232 struct infra_cache* 233 infra_create(struct config_file* cfg) 234 { 235 struct infra_cache* infra = (struct infra_cache*)calloc(1, 236 sizeof(struct infra_cache)); 237 size_t maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+ 238 sizeof(struct infra_data)+INFRA_BYTES_NAME); 239 if(!infra) { 240 return NULL; 241 } 242 infra->hosts = slabhash_create(cfg->infra_cache_slabs, 243 INFRA_HOST_STARTSIZE, maxmem, &infra_sizefunc, &infra_compfunc, 244 &infra_delkeyfunc, &infra_deldatafunc, NULL); 245 if(!infra->hosts) { 246 free(infra); 247 return NULL; 248 } 249 infra->host_ttl = cfg->host_ttl; 250 infra->infra_keep_probing = cfg->infra_keep_probing; 251 infra_dp_ratelimit = cfg->ratelimit; 252 infra->domain_rates = slabhash_create(cfg->ratelimit_slabs, 253 INFRA_HOST_STARTSIZE, cfg->ratelimit_size, 254 &rate_sizefunc, &rate_compfunc, &rate_delkeyfunc, 255 &rate_deldatafunc, NULL); 256 if(!infra->domain_rates) { 257 infra_delete(infra); 258 return NULL; 259 } 260 /* insert config data into ratelimits */ 261 if(!setup_domain_limits(infra, cfg)) { 262 infra_delete(infra); 263 return NULL; 264 } 265 infra_ip_ratelimit = cfg->ip_ratelimit; 266 infra->client_ip_rates = slabhash_create(cfg->ip_ratelimit_slabs, 267 INFRA_HOST_STARTSIZE, cfg->ip_ratelimit_size, &ip_rate_sizefunc, 268 &ip_rate_compfunc, &ip_rate_delkeyfunc, &ip_rate_deldatafunc, NULL); 269 if(!infra->client_ip_rates) { 270 infra_delete(infra); 271 return NULL; 272 } 273 return infra; 274 } 275 276 /** delete domain_limit entries */ 277 static void domain_limit_free(rbnode_type* n, void* ATTR_UNUSED(arg)) 278 { 279 if(n) { 280 free(((struct domain_limit_data*)n)->node.name); 281 free(n); 282 } 283 } 284 285 void 286 infra_delete(struct infra_cache* infra) 287 { 288 if(!infra) 289 return; 290 slabhash_delete(infra->hosts); 291 slabhash_delete(infra->domain_rates); 292 traverse_postorder(&infra->domain_limits, domain_limit_free, NULL); 293 slabhash_delete(infra->client_ip_rates); 294 free(infra); 295 } 296 297 struct infra_cache* 298 infra_adjust(struct infra_cache* infra, struct config_file* cfg) 299 { 300 size_t maxmem; 301 if(!infra) 302 return infra_create(cfg); 303 infra->host_ttl = cfg->host_ttl; 304 infra->infra_keep_probing = cfg->infra_keep_probing; 305 infra_dp_ratelimit = cfg->ratelimit; 306 infra_ip_ratelimit = cfg->ip_ratelimit; 307 maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+ 308 sizeof(struct infra_data)+INFRA_BYTES_NAME); 309 /* divide cachesize by slabs and multiply by slabs, because if the 310 * cachesize is not an even multiple of slabs, that is the resulting 311 * size of the slabhash */ 312 if(!slabhash_is_size(infra->hosts, maxmem, cfg->infra_cache_slabs) || 313 !slabhash_is_size(infra->domain_rates, cfg->ratelimit_size, 314 cfg->ratelimit_slabs) || 315 !slabhash_is_size(infra->client_ip_rates, cfg->ip_ratelimit_size, 316 cfg->ip_ratelimit_slabs)) { 317 infra_delete(infra); 318 infra = infra_create(cfg); 319 } else { 320 /* reapply domain limits */ 321 traverse_postorder(&infra->domain_limits, domain_limit_free, 322 NULL); 323 if(!setup_domain_limits(infra, cfg)) { 324 infra_delete(infra); 325 return NULL; 326 } 327 } 328 return infra; 329 } 330 331 /** calculate the hash value for a host key 332 * set use_port to a non-0 number to use the port in 333 * the hash calculation; 0 to ignore the port.*/ 334 static hashvalue_type 335 hash_addr(struct sockaddr_storage* addr, socklen_t addrlen, 336 int use_port) 337 { 338 hashvalue_type h = 0xab; 339 /* select the pieces to hash, some OS have changing data inside */ 340 if(addr_is_ip6(addr, addrlen)) { 341 struct sockaddr_in6* in6 = (struct sockaddr_in6*)addr; 342 h = hashlittle(&in6->sin6_family, sizeof(in6->sin6_family), h); 343 if(use_port){ 344 h = hashlittle(&in6->sin6_port, sizeof(in6->sin6_port), h); 345 } 346 h = hashlittle(&in6->sin6_addr, INET6_SIZE, h); 347 } else { 348 struct sockaddr_in* in = (struct sockaddr_in*)addr; 349 h = hashlittle(&in->sin_family, sizeof(in->sin_family), h); 350 if(use_port){ 351 h = hashlittle(&in->sin_port, sizeof(in->sin_port), h); 352 } 353 h = hashlittle(&in->sin_addr, INET_SIZE, h); 354 } 355 return h; 356 } 357 358 /** calculate infra hash for a key */ 359 static hashvalue_type 360 hash_infra(struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name) 361 { 362 return dname_query_hash(name, hash_addr(addr, addrlen, 1)); 363 } 364 365 /** lookup version that does not check host ttl (you check it) */ 366 struct lruhash_entry* 367 infra_lookup_nottl(struct infra_cache* infra, struct sockaddr_storage* addr, 368 socklen_t addrlen, uint8_t* name, size_t namelen, int wr) 369 { 370 struct infra_key k; 371 k.addrlen = addrlen; 372 memcpy(&k.addr, addr, addrlen); 373 k.namelen = namelen; 374 k.zonename = name; 375 k.entry.hash = hash_infra(addr, addrlen, name); 376 k.entry.key = (void*)&k; 377 k.entry.data = NULL; 378 return slabhash_lookup(infra->hosts, k.entry.hash, &k, wr); 379 } 380 381 /** init the data elements */ 382 static void 383 data_entry_init(struct infra_cache* infra, struct lruhash_entry* e, 384 time_t timenow) 385 { 386 struct infra_data* data = (struct infra_data*)e->data; 387 data->ttl = timenow + infra->host_ttl; 388 rtt_init(&data->rtt); 389 data->edns_version = 0; 390 data->edns_lame_known = 0; 391 data->probedelay = 0; 392 data->isdnsseclame = 0; 393 data->rec_lame = 0; 394 data->lame_type_A = 0; 395 data->lame_other = 0; 396 data->timeout_A = 0; 397 data->timeout_AAAA = 0; 398 data->timeout_other = 0; 399 } 400 401 /** 402 * Create and init a new entry for a host 403 * @param infra: infra structure with config parameters. 404 * @param addr: host address. 405 * @param addrlen: length of addr. 406 * @param name: name of zone 407 * @param namelen: length of name. 408 * @param tm: time now. 409 * @return: the new entry or NULL on malloc failure. 410 */ 411 static struct lruhash_entry* 412 new_entry(struct infra_cache* infra, struct sockaddr_storage* addr, 413 socklen_t addrlen, uint8_t* name, size_t namelen, time_t tm) 414 { 415 struct infra_data* data; 416 struct infra_key* key = (struct infra_key*)malloc(sizeof(*key)); 417 if(!key) 418 return NULL; 419 data = (struct infra_data*)malloc(sizeof(struct infra_data)); 420 if(!data) { 421 free(key); 422 return NULL; 423 } 424 key->zonename = memdup(name, namelen); 425 if(!key->zonename) { 426 free(key); 427 free(data); 428 return NULL; 429 } 430 key->namelen = namelen; 431 lock_rw_init(&key->entry.lock); 432 key->entry.hash = hash_infra(addr, addrlen, name); 433 key->entry.key = (void*)key; 434 key->entry.data = (void*)data; 435 key->addrlen = addrlen; 436 memcpy(&key->addr, addr, addrlen); 437 data_entry_init(infra, &key->entry, tm); 438 return &key->entry; 439 } 440 441 int 442 infra_host(struct infra_cache* infra, struct sockaddr_storage* addr, 443 socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow, 444 int* edns_vs, uint8_t* edns_lame_known, int* to) 445 { 446 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, 447 nm, nmlen, 0); 448 struct infra_data* data; 449 int wr = 0; 450 if(e && ((struct infra_data*)e->data)->ttl < timenow) { 451 /* it expired, try to reuse existing entry */ 452 int old = ((struct infra_data*)e->data)->rtt.rto; 453 time_t tprobe = ((struct infra_data*)e->data)->probedelay; 454 uint8_t tA = ((struct infra_data*)e->data)->timeout_A; 455 uint8_t tAAAA = ((struct infra_data*)e->data)->timeout_AAAA; 456 uint8_t tother = ((struct infra_data*)e->data)->timeout_other; 457 lock_rw_unlock(&e->lock); 458 e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1); 459 if(e) { 460 /* if its still there we have a writelock, init */ 461 /* re-initialise */ 462 /* do not touch lameness, it may be valid still */ 463 data_entry_init(infra, e, timenow); 464 wr = 1; 465 /* TOP_TIMEOUT remains on reuse */ 466 if(old >= USEFUL_SERVER_TOP_TIMEOUT) { 467 ((struct infra_data*)e->data)->rtt.rto 468 = USEFUL_SERVER_TOP_TIMEOUT; 469 ((struct infra_data*)e->data)->probedelay = tprobe; 470 ((struct infra_data*)e->data)->timeout_A = tA; 471 ((struct infra_data*)e->data)->timeout_AAAA = tAAAA; 472 ((struct infra_data*)e->data)->timeout_other = tother; 473 } 474 } 475 } 476 if(!e) { 477 /* insert new entry */ 478 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) 479 return 0; 480 data = (struct infra_data*)e->data; 481 *edns_vs = data->edns_version; 482 *edns_lame_known = data->edns_lame_known; 483 *to = rtt_timeout(&data->rtt); 484 slabhash_insert(infra->hosts, e->hash, e, data, NULL); 485 return 1; 486 } 487 /* use existing entry */ 488 data = (struct infra_data*)e->data; 489 *edns_vs = data->edns_version; 490 *edns_lame_known = data->edns_lame_known; 491 *to = rtt_timeout(&data->rtt); 492 if(*to >= PROBE_MAXRTO && (infra->infra_keep_probing || 493 rtt_notimeout(&data->rtt)*4 <= *to)) { 494 /* delay other queries, this is the probe query */ 495 if(!wr) { 496 lock_rw_unlock(&e->lock); 497 e = infra_lookup_nottl(infra, addr,addrlen,nm,nmlen, 1); 498 if(!e) { /* flushed from cache real fast, no use to 499 allocate just for the probedelay */ 500 return 1; 501 } 502 data = (struct infra_data*)e->data; 503 } 504 /* add 999 to round up the timeout value from msec to sec, 505 * then add a whole second so it is certain that this probe 506 * has timed out before the next is allowed */ 507 data->probedelay = timenow + ((*to)+1999)/1000; 508 } 509 lock_rw_unlock(&e->lock); 510 return 1; 511 } 512 513 int 514 infra_set_lame(struct infra_cache* infra, struct sockaddr_storage* addr, 515 socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow, 516 int dnsseclame, int reclame, uint16_t qtype) 517 { 518 struct infra_data* data; 519 struct lruhash_entry* e; 520 int needtoinsert = 0; 521 e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1); 522 if(!e) { 523 /* insert it */ 524 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) { 525 log_err("set_lame: malloc failure"); 526 return 0; 527 } 528 needtoinsert = 1; 529 } else if( ((struct infra_data*)e->data)->ttl < timenow) { 530 /* expired, reuse existing entry */ 531 data_entry_init(infra, e, timenow); 532 } 533 /* got an entry, now set the zone lame */ 534 data = (struct infra_data*)e->data; 535 /* merge data (if any) */ 536 if(dnsseclame) 537 data->isdnsseclame = 1; 538 if(reclame) 539 data->rec_lame = 1; 540 if(!dnsseclame && !reclame && qtype == LDNS_RR_TYPE_A) 541 data->lame_type_A = 1; 542 if(!dnsseclame && !reclame && qtype != LDNS_RR_TYPE_A) 543 data->lame_other = 1; 544 /* done */ 545 if(needtoinsert) 546 slabhash_insert(infra->hosts, e->hash, e, e->data, NULL); 547 else { lock_rw_unlock(&e->lock); } 548 return 1; 549 } 550 551 void 552 infra_update_tcp_works(struct infra_cache* infra, 553 struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm, 554 size_t nmlen) 555 { 556 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, 557 nm, nmlen, 1); 558 struct infra_data* data; 559 if(!e) 560 return; /* doesn't exist */ 561 data = (struct infra_data*)e->data; 562 if(data->rtt.rto >= RTT_MAX_TIMEOUT) 563 /* do not disqualify this server altogether, it is better 564 * than nothing */ 565 data->rtt.rto = RTT_MAX_TIMEOUT-1000; 566 lock_rw_unlock(&e->lock); 567 } 568 569 int 570 infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr, 571 socklen_t addrlen, uint8_t* nm, size_t nmlen, int qtype, 572 int roundtrip, int orig_rtt, time_t timenow) 573 { 574 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, 575 nm, nmlen, 1); 576 struct infra_data* data; 577 int needtoinsert = 0, expired = 0; 578 int rto = 1; 579 time_t oldprobedelay = 0; 580 if(!e) { 581 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) 582 return 0; 583 needtoinsert = 1; 584 } else if(((struct infra_data*)e->data)->ttl < timenow) { 585 oldprobedelay = ((struct infra_data*)e->data)->probedelay; 586 data_entry_init(infra, e, timenow); 587 expired = 1; 588 } 589 /* have an entry, update the rtt */ 590 data = (struct infra_data*)e->data; 591 if(roundtrip == -1) { 592 if(needtoinsert || expired) { 593 /* timeout on entry that has expired before the timer 594 * keep old timeout from the function caller */ 595 data->rtt.rto = orig_rtt; 596 data->probedelay = oldprobedelay; 597 } 598 rtt_lost(&data->rtt, orig_rtt); 599 if(qtype == LDNS_RR_TYPE_A) { 600 if(data->timeout_A < TIMEOUT_COUNT_MAX) 601 data->timeout_A++; 602 } else if(qtype == LDNS_RR_TYPE_AAAA) { 603 if(data->timeout_AAAA < TIMEOUT_COUNT_MAX) 604 data->timeout_AAAA++; 605 } else { 606 if(data->timeout_other < TIMEOUT_COUNT_MAX) 607 data->timeout_other++; 608 } 609 } else { 610 /* if we got a reply, but the old timeout was above server 611 * selection height, delete the timeout so the server is 612 * fully available again */ 613 if(rtt_unclamped(&data->rtt) >= USEFUL_SERVER_TOP_TIMEOUT) 614 rtt_init(&data->rtt); 615 rtt_update(&data->rtt, roundtrip); 616 data->probedelay = 0; 617 if(qtype == LDNS_RR_TYPE_A) 618 data->timeout_A = 0; 619 else if(qtype == LDNS_RR_TYPE_AAAA) 620 data->timeout_AAAA = 0; 621 else data->timeout_other = 0; 622 } 623 if(data->rtt.rto > 0) 624 rto = data->rtt.rto; 625 626 if(needtoinsert) 627 slabhash_insert(infra->hosts, e->hash, e, e->data, NULL); 628 else { lock_rw_unlock(&e->lock); } 629 return rto; 630 } 631 632 long long infra_get_host_rto(struct infra_cache* infra, 633 struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm, 634 size_t nmlen, struct rtt_info* rtt, int* delay, time_t timenow, 635 int* tA, int* tAAAA, int* tother) 636 { 637 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, 638 nm, nmlen, 0); 639 struct infra_data* data; 640 long long ttl = -2; 641 if(!e) return -1; 642 data = (struct infra_data*)e->data; 643 if(data->ttl >= timenow) { 644 ttl = (long long)(data->ttl - timenow); 645 memmove(rtt, &data->rtt, sizeof(*rtt)); 646 if(timenow < data->probedelay) 647 *delay = (int)(data->probedelay - timenow); 648 else *delay = 0; 649 } 650 *tA = (int)data->timeout_A; 651 *tAAAA = (int)data->timeout_AAAA; 652 *tother = (int)data->timeout_other; 653 lock_rw_unlock(&e->lock); 654 return ttl; 655 } 656 657 int 658 infra_edns_update(struct infra_cache* infra, struct sockaddr_storage* addr, 659 socklen_t addrlen, uint8_t* nm, size_t nmlen, int edns_version, 660 time_t timenow) 661 { 662 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, 663 nm, nmlen, 1); 664 struct infra_data* data; 665 int needtoinsert = 0; 666 if(!e) { 667 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) 668 return 0; 669 needtoinsert = 1; 670 } else if(((struct infra_data*)e->data)->ttl < timenow) { 671 data_entry_init(infra, e, timenow); 672 } 673 /* have an entry, update the rtt, and the ttl */ 674 data = (struct infra_data*)e->data; 675 /* do not update if noEDNS and stored is yesEDNS */ 676 if(!(edns_version == -1 && (data->edns_version != -1 && 677 data->edns_lame_known))) { 678 data->edns_version = edns_version; 679 data->edns_lame_known = 1; 680 } 681 682 if(needtoinsert) 683 slabhash_insert(infra->hosts, e->hash, e, e->data, NULL); 684 else { lock_rw_unlock(&e->lock); } 685 return 1; 686 } 687 688 int 689 infra_get_lame_rtt(struct infra_cache* infra, 690 struct sockaddr_storage* addr, socklen_t addrlen, 691 uint8_t* name, size_t namelen, uint16_t qtype, 692 int* lame, int* dnsseclame, int* reclame, int* rtt, time_t timenow) 693 { 694 struct infra_data* host; 695 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, 696 name, namelen, 0); 697 if(!e) 698 return 0; 699 host = (struct infra_data*)e->data; 700 *rtt = rtt_unclamped(&host->rtt); 701 if(host->rtt.rto >= PROBE_MAXRTO && timenow >= host->probedelay 702 && infra->infra_keep_probing) { 703 /* single probe, keep probing */ 704 if(*rtt >= USEFUL_SERVER_TOP_TIMEOUT) 705 *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; 706 } else if(host->rtt.rto >= PROBE_MAXRTO && timenow < host->probedelay 707 && rtt_notimeout(&host->rtt)*4 <= host->rtt.rto) { 708 /* single probe for this domain, and we are not probing */ 709 /* unless the query type allows a probe to happen */ 710 if(qtype == LDNS_RR_TYPE_A) { 711 if(host->timeout_A >= TIMEOUT_COUNT_MAX) 712 *rtt = USEFUL_SERVER_TOP_TIMEOUT; 713 else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; 714 } else if(qtype == LDNS_RR_TYPE_AAAA) { 715 if(host->timeout_AAAA >= TIMEOUT_COUNT_MAX) 716 *rtt = USEFUL_SERVER_TOP_TIMEOUT; 717 else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; 718 } else { 719 if(host->timeout_other >= TIMEOUT_COUNT_MAX) 720 *rtt = USEFUL_SERVER_TOP_TIMEOUT; 721 else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; 722 } 723 } 724 if(timenow > host->ttl) { 725 /* expired entry */ 726 /* see if this can be a re-probe of an unresponsive server */ 727 /* minus 1000 because that is outside of the RTTBAND, so 728 * blacklisted servers stay blacklisted if this is chosen */ 729 if(host->rtt.rto >= USEFUL_SERVER_TOP_TIMEOUT || 730 infra->infra_keep_probing) { 731 lock_rw_unlock(&e->lock); 732 *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; 733 *lame = 0; 734 *dnsseclame = 0; 735 *reclame = 0; 736 return 1; 737 } 738 lock_rw_unlock(&e->lock); 739 return 0; 740 } 741 /* check lameness first */ 742 if(host->lame_type_A && qtype == LDNS_RR_TYPE_A) { 743 lock_rw_unlock(&e->lock); 744 *lame = 1; 745 *dnsseclame = 0; 746 *reclame = 0; 747 return 1; 748 } else if(host->lame_other && qtype != LDNS_RR_TYPE_A) { 749 lock_rw_unlock(&e->lock); 750 *lame = 1; 751 *dnsseclame = 0; 752 *reclame = 0; 753 return 1; 754 } else if(host->isdnsseclame) { 755 lock_rw_unlock(&e->lock); 756 *lame = 0; 757 *dnsseclame = 1; 758 *reclame = 0; 759 return 1; 760 } else if(host->rec_lame) { 761 lock_rw_unlock(&e->lock); 762 *lame = 0; 763 *dnsseclame = 0; 764 *reclame = 1; 765 return 1; 766 } 767 /* no lameness for this type of query */ 768 lock_rw_unlock(&e->lock); 769 *lame = 0; 770 *dnsseclame = 0; 771 *reclame = 0; 772 return 1; 773 } 774 775 int infra_find_ratelimit(struct infra_cache* infra, uint8_t* name, 776 size_t namelen) 777 { 778 int labs = dname_count_labels(name); 779 struct domain_limit_data* d = (struct domain_limit_data*) 780 name_tree_lookup(&infra->domain_limits, name, namelen, labs, 781 LDNS_RR_CLASS_IN); 782 if(!d) return infra_dp_ratelimit; 783 784 if(d->node.labs == labs && d->lim != -1) 785 return d->lim; /* exact match */ 786 787 /* find 'below match' */ 788 if(d->node.labs == labs) 789 d = (struct domain_limit_data*)d->node.parent; 790 while(d) { 791 if(d->below != -1) 792 return d->below; 793 d = (struct domain_limit_data*)d->node.parent; 794 } 795 return infra_dp_ratelimit; 796 } 797 798 size_t ip_rate_sizefunc(void* k, void* ATTR_UNUSED(d)) 799 { 800 struct ip_rate_key* key = (struct ip_rate_key*)k; 801 return sizeof(*key) + sizeof(struct ip_rate_data) 802 + lock_get_mem(&key->entry.lock); 803 } 804 805 int ip_rate_compfunc(void* key1, void* key2) 806 { 807 struct ip_rate_key* k1 = (struct ip_rate_key*)key1; 808 struct ip_rate_key* k2 = (struct ip_rate_key*)key2; 809 return sockaddr_cmp_addr(&k1->addr, k1->addrlen, 810 &k2->addr, k2->addrlen); 811 } 812 813 void ip_rate_delkeyfunc(void* k, void* ATTR_UNUSED(arg)) 814 { 815 struct ip_rate_key* key = (struct ip_rate_key*)k; 816 if(!key) 817 return; 818 lock_rw_destroy(&key->entry.lock); 819 free(key); 820 } 821 822 /** find data item in array, for write access, caller unlocks */ 823 static struct lruhash_entry* infra_find_ratedata(struct infra_cache* infra, 824 uint8_t* name, size_t namelen, int wr) 825 { 826 struct rate_key key; 827 hashvalue_type h = dname_query_hash(name, 0xab); 828 memset(&key, 0, sizeof(key)); 829 key.name = name; 830 key.namelen = namelen; 831 key.entry.hash = h; 832 return slabhash_lookup(infra->domain_rates, h, &key, wr); 833 } 834 835 /** find data item in array for ip addresses */ 836 static struct lruhash_entry* infra_find_ip_ratedata(struct infra_cache* infra, 837 struct comm_reply* repinfo, int wr) 838 { 839 struct ip_rate_key key; 840 hashvalue_type h = hash_addr(&(repinfo->addr), 841 repinfo->addrlen, 0); 842 memset(&key, 0, sizeof(key)); 843 key.addr = repinfo->addr; 844 key.addrlen = repinfo->addrlen; 845 key.entry.hash = h; 846 return slabhash_lookup(infra->client_ip_rates, h, &key, wr); 847 } 848 849 /** create rate data item for name, number 1 in now */ 850 static void infra_create_ratedata(struct infra_cache* infra, 851 uint8_t* name, size_t namelen, time_t timenow) 852 { 853 hashvalue_type h = dname_query_hash(name, 0xab); 854 struct rate_key* k = (struct rate_key*)calloc(1, sizeof(*k)); 855 struct rate_data* d = (struct rate_data*)calloc(1, sizeof(*d)); 856 if(!k || !d) { 857 free(k); 858 free(d); 859 return; /* alloc failure */ 860 } 861 k->namelen = namelen; 862 k->name = memdup(name, namelen); 863 if(!k->name) { 864 free(k); 865 free(d); 866 return; /* alloc failure */ 867 } 868 lock_rw_init(&k->entry.lock); 869 k->entry.hash = h; 870 k->entry.key = k; 871 k->entry.data = d; 872 d->qps[0] = 1; 873 d->timestamp[0] = timenow; 874 slabhash_insert(infra->domain_rates, h, &k->entry, d, NULL); 875 } 876 877 /** create rate data item for ip address */ 878 static void infra_ip_create_ratedata(struct infra_cache* infra, 879 struct comm_reply* repinfo, time_t timenow) 880 { 881 hashvalue_type h = hash_addr(&(repinfo->addr), 882 repinfo->addrlen, 0); 883 struct ip_rate_key* k = (struct ip_rate_key*)calloc(1, sizeof(*k)); 884 struct ip_rate_data* d = (struct ip_rate_data*)calloc(1, sizeof(*d)); 885 if(!k || !d) { 886 free(k); 887 free(d); 888 return; /* alloc failure */ 889 } 890 k->addr = repinfo->addr; 891 k->addrlen = repinfo->addrlen; 892 lock_rw_init(&k->entry.lock); 893 k->entry.hash = h; 894 k->entry.key = k; 895 k->entry.data = d; 896 d->qps[0] = 1; 897 d->timestamp[0] = timenow; 898 slabhash_insert(infra->client_ip_rates, h, &k->entry, d, NULL); 899 } 900 901 /** Find the second and return its rate counter. If none and should_add, remove 902 * oldest to accommodate. Else return none. */ 903 static int* infra_rate_find_second_or_none(void* data, time_t t, int should_add) 904 { 905 struct rate_data* d = (struct rate_data*)data; 906 int i, oldest; 907 for(i=0; i<RATE_WINDOW; i++) { 908 if(d->timestamp[i] == t) 909 return &(d->qps[i]); 910 } 911 if(!should_add) return NULL; 912 /* remove oldest timestamp, and insert it at t with 0 qps */ 913 oldest = 0; 914 for(i=0; i<RATE_WINDOW; i++) { 915 if(d->timestamp[i] < d->timestamp[oldest]) 916 oldest = i; 917 } 918 d->timestamp[oldest] = t; 919 d->qps[oldest] = 0; 920 return &(d->qps[oldest]); 921 } 922 923 /** find the second and return its rate counter, if none, remove oldest to 924 * accommodate */ 925 static int* infra_rate_give_second(void* data, time_t t) 926 { 927 return infra_rate_find_second_or_none(data, t, 1); 928 } 929 930 /** find the second and return its rate counter only if it exists. Caller 931 * should check for NULL return value */ 932 static int* infra_rate_get_second(void* data, time_t t) 933 { 934 return infra_rate_find_second_or_none(data, t, 0); 935 } 936 937 int infra_rate_max(void* data, time_t now, int backoff) 938 { 939 struct rate_data* d = (struct rate_data*)data; 940 int i, max = 0; 941 for(i=0; i<RATE_WINDOW; i++) { 942 if(backoff) { 943 if(now-d->timestamp[i] <= RATE_WINDOW && 944 d->qps[i] > max) { 945 max = d->qps[i]; 946 } 947 } else { 948 if(now == d->timestamp[i]) { 949 return d->qps[i]; 950 } 951 } 952 } 953 return max; 954 } 955 956 int infra_ratelimit_inc(struct infra_cache* infra, uint8_t* name, 957 size_t namelen, time_t timenow, int backoff, struct query_info* qinfo, 958 struct comm_reply* replylist) 959 { 960 int lim, max; 961 struct lruhash_entry* entry; 962 963 if(!infra_dp_ratelimit) 964 return 1; /* not enabled */ 965 966 /* find ratelimit */ 967 lim = infra_find_ratelimit(infra, name, namelen); 968 if(!lim) 969 return 1; /* disabled for this domain */ 970 971 /* find or insert ratedata */ 972 entry = infra_find_ratedata(infra, name, namelen, 1); 973 if(entry) { 974 int premax = infra_rate_max(entry->data, timenow, backoff); 975 int* cur = infra_rate_give_second(entry->data, timenow); 976 (*cur)++; 977 max = infra_rate_max(entry->data, timenow, backoff); 978 lock_rw_unlock(&entry->lock); 979 980 if(premax <= lim && max > lim) { 981 char buf[257], qnm[257], ts[12], cs[12], ip[128]; 982 dname_str(name, buf); 983 dname_str(qinfo->qname, qnm); 984 sldns_wire2str_type_buf(qinfo->qtype, ts, sizeof(ts)); 985 sldns_wire2str_class_buf(qinfo->qclass, cs, sizeof(cs)); 986 ip[0]=0; 987 if(replylist) { 988 addr_to_str((struct sockaddr_storage *)&replylist->addr, 989 replylist->addrlen, ip, sizeof(ip)); 990 verbose(VERB_OPS, "ratelimit exceeded %s %d query %s %s %s from %s", buf, lim, qnm, cs, ts, ip); 991 } else { 992 verbose(VERB_OPS, "ratelimit exceeded %s %d query %s %s %s", buf, lim, qnm, cs, ts); 993 } 994 } 995 return (max <= lim); 996 } 997 998 /* create */ 999 infra_create_ratedata(infra, name, namelen, timenow); 1000 return (1 <= lim); 1001 } 1002 1003 void infra_ratelimit_dec(struct infra_cache* infra, uint8_t* name, 1004 size_t namelen, time_t timenow) 1005 { 1006 struct lruhash_entry* entry; 1007 int* cur; 1008 if(!infra_dp_ratelimit) 1009 return; /* not enabled */ 1010 entry = infra_find_ratedata(infra, name, namelen, 1); 1011 if(!entry) return; /* not cached */ 1012 cur = infra_rate_get_second(entry->data, timenow); 1013 if(cur == NULL) { 1014 /* our timenow is not available anymore; nothing to decrease */ 1015 lock_rw_unlock(&entry->lock); 1016 return; 1017 } 1018 if((*cur) > 0) 1019 (*cur)--; 1020 lock_rw_unlock(&entry->lock); 1021 } 1022 1023 int infra_ratelimit_exceeded(struct infra_cache* infra, uint8_t* name, 1024 size_t namelen, time_t timenow, int backoff) 1025 { 1026 struct lruhash_entry* entry; 1027 int lim, max; 1028 if(!infra_dp_ratelimit) 1029 return 0; /* not enabled */ 1030 1031 /* find ratelimit */ 1032 lim = infra_find_ratelimit(infra, name, namelen); 1033 if(!lim) 1034 return 0; /* disabled for this domain */ 1035 1036 /* find current rate */ 1037 entry = infra_find_ratedata(infra, name, namelen, 0); 1038 if(!entry) 1039 return 0; /* not cached */ 1040 max = infra_rate_max(entry->data, timenow, backoff); 1041 lock_rw_unlock(&entry->lock); 1042 1043 return (max >= lim); 1044 } 1045 1046 size_t 1047 infra_get_mem(struct infra_cache* infra) 1048 { 1049 size_t s = sizeof(*infra) + slabhash_get_mem(infra->hosts); 1050 if(infra->domain_rates) s += slabhash_get_mem(infra->domain_rates); 1051 if(infra->client_ip_rates) s += slabhash_get_mem(infra->client_ip_rates); 1052 /* ignore domain_limits because walk through tree is big */ 1053 return s; 1054 } 1055 1056 int infra_ip_ratelimit_inc(struct infra_cache* infra, 1057 struct comm_reply* repinfo, time_t timenow, int backoff, 1058 struct sldns_buffer* buffer) 1059 { 1060 int max; 1061 struct lruhash_entry* entry; 1062 1063 /* not enabled */ 1064 if(!infra_ip_ratelimit) { 1065 return 1; 1066 } 1067 /* find or insert ratedata */ 1068 entry = infra_find_ip_ratedata(infra, repinfo, 1); 1069 if(entry) { 1070 int premax = infra_rate_max(entry->data, timenow, backoff); 1071 int* cur = infra_rate_give_second(entry->data, timenow); 1072 (*cur)++; 1073 max = infra_rate_max(entry->data, timenow, backoff); 1074 lock_rw_unlock(&entry->lock); 1075 1076 if(premax < infra_ip_ratelimit && max >= infra_ip_ratelimit) { 1077 char client_ip[128], qnm[LDNS_MAX_DOMAINLEN+1+12+12]; 1078 addr_to_str((struct sockaddr_storage *)&repinfo->addr, 1079 repinfo->addrlen, client_ip, sizeof(client_ip)); 1080 qnm[0]=0; 1081 if(sldns_buffer_limit(buffer)>LDNS_HEADER_SIZE && 1082 LDNS_QDCOUNT(sldns_buffer_begin(buffer))!=0) { 1083 (void)sldns_wire2str_rrquestion_buf( 1084 sldns_buffer_at(buffer, LDNS_HEADER_SIZE), 1085 sldns_buffer_limit(buffer)-LDNS_HEADER_SIZE, 1086 qnm, sizeof(qnm)); 1087 if(strlen(qnm)>0 && qnm[strlen(qnm)-1]=='\n') 1088 qnm[strlen(qnm)-1] = 0; /*remove newline*/ 1089 if(strchr(qnm, '\t')) 1090 *strchr(qnm, '\t') = ' '; 1091 if(strchr(qnm, '\t')) 1092 *strchr(qnm, '\t') = ' '; 1093 verbose(VERB_OPS, "ip_ratelimit exceeded %s %d %s", 1094 client_ip, infra_ip_ratelimit, qnm); 1095 } else { 1096 verbose(VERB_OPS, "ip_ratelimit exceeded %s %d (no query name)", 1097 client_ip, infra_ip_ratelimit); 1098 } 1099 } 1100 return (max <= infra_ip_ratelimit); 1101 } 1102 1103 /* create */ 1104 infra_ip_create_ratedata(infra, repinfo, timenow); 1105 return 1; 1106 } 1107