1 /* 2 * services/cache/infra.c - infrastructure cache, server rtt and capabilities 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file contains the infrastructure cache. 40 */ 41 #include "config.h" 42 #include "sldns/rrdef.h" 43 #include "sldns/str2wire.h" 44 #include "sldns/sbuffer.h" 45 #include "sldns/wire2str.h" 46 #include "services/cache/infra.h" 47 #include "util/storage/slabhash.h" 48 #include "util/storage/lookup3.h" 49 #include "util/data/dname.h" 50 #include "util/log.h" 51 #include "util/net_help.h" 52 #include "util/config_file.h" 53 #include "iterator/iterator.h" 54 55 /** Timeout when only a single probe query per IP is allowed. */ 56 #define PROBE_MAXRTO 12000 /* in msec */ 57 58 /** number of timeouts for a type when the domain can be blocked ; 59 * even if another type has completely rtt maxed it, the different type 60 * can do this number of packets (until those all timeout too) */ 61 #define TIMEOUT_COUNT_MAX 3 62 63 /** ratelimit value for delegation point */ 64 int infra_dp_ratelimit = 0; 65 66 /** ratelimit value for client ip addresses, 67 * in queries per second. */ 68 int infra_ip_ratelimit = 0; 69 70 size_t 71 infra_sizefunc(void* k, void* ATTR_UNUSED(d)) 72 { 73 struct infra_key* key = (struct infra_key*)k; 74 return sizeof(*key) + sizeof(struct infra_data) + key->namelen 75 + lock_get_mem(&key->entry.lock); 76 } 77 78 int 79 infra_compfunc(void* key1, void* key2) 80 { 81 struct infra_key* k1 = (struct infra_key*)key1; 82 struct infra_key* k2 = (struct infra_key*)key2; 83 int r = sockaddr_cmp(&k1->addr, k1->addrlen, &k2->addr, k2->addrlen); 84 if(r != 0) 85 return r; 86 if(k1->namelen != k2->namelen) { 87 if(k1->namelen < k2->namelen) 88 return -1; 89 return 1; 90 } 91 return query_dname_compare(k1->zonename, k2->zonename); 92 } 93 94 void 95 infra_delkeyfunc(void* k, void* ATTR_UNUSED(arg)) 96 { 97 struct infra_key* key = (struct infra_key*)k; 98 if(!key) 99 return; 100 lock_rw_destroy(&key->entry.lock); 101 free(key->zonename); 102 free(key); 103 } 104 105 void 106 infra_deldatafunc(void* d, void* ATTR_UNUSED(arg)) 107 { 108 struct infra_data* data = (struct infra_data*)d; 109 free(data); 110 } 111 112 size_t 113 rate_sizefunc(void* k, void* ATTR_UNUSED(d)) 114 { 115 struct rate_key* key = (struct rate_key*)k; 116 return sizeof(*key) + sizeof(struct rate_data) + key->namelen 117 + lock_get_mem(&key->entry.lock); 118 } 119 120 int 121 rate_compfunc(void* key1, void* key2) 122 { 123 struct rate_key* k1 = (struct rate_key*)key1; 124 struct rate_key* k2 = (struct rate_key*)key2; 125 if(k1->namelen != k2->namelen) { 126 if(k1->namelen < k2->namelen) 127 return -1; 128 return 1; 129 } 130 return query_dname_compare(k1->name, k2->name); 131 } 132 133 void 134 rate_delkeyfunc(void* k, void* ATTR_UNUSED(arg)) 135 { 136 struct rate_key* key = (struct rate_key*)k; 137 if(!key) 138 return; 139 lock_rw_destroy(&key->entry.lock); 140 free(key->name); 141 free(key); 142 } 143 144 void 145 rate_deldatafunc(void* d, void* ATTR_UNUSED(arg)) 146 { 147 struct rate_data* data = (struct rate_data*)d; 148 free(data); 149 } 150 151 /** find or create element in domainlimit tree */ 152 static struct domain_limit_data* domain_limit_findcreate( 153 struct infra_cache* infra, char* name) 154 { 155 uint8_t* nm; 156 int labs; 157 size_t nmlen; 158 struct domain_limit_data* d; 159 160 /* parse name */ 161 nm = sldns_str2wire_dname(name, &nmlen); 162 if(!nm) { 163 log_err("could not parse %s", name); 164 return NULL; 165 } 166 labs = dname_count_labels(nm); 167 168 /* can we find it? */ 169 d = (struct domain_limit_data*)name_tree_find(&infra->domain_limits, 170 nm, nmlen, labs, LDNS_RR_CLASS_IN); 171 if(d) { 172 free(nm); 173 return d; 174 } 175 176 /* create it */ 177 d = (struct domain_limit_data*)calloc(1, sizeof(*d)); 178 if(!d) { 179 free(nm); 180 return NULL; 181 } 182 d->node.node.key = &d->node; 183 d->node.name = nm; 184 d->node.len = nmlen; 185 d->node.labs = labs; 186 d->node.dclass = LDNS_RR_CLASS_IN; 187 d->lim = -1; 188 d->below = -1; 189 if(!name_tree_insert(&infra->domain_limits, &d->node, nm, nmlen, 190 labs, LDNS_RR_CLASS_IN)) { 191 log_err("duplicate element in domainlimit tree"); 192 free(nm); 193 free(d); 194 return NULL; 195 } 196 return d; 197 } 198 199 /** insert rate limit configuration into lookup tree */ 200 static int infra_ratelimit_cfg_insert(struct infra_cache* infra, 201 struct config_file* cfg) 202 { 203 struct config_str2list* p; 204 struct domain_limit_data* d; 205 for(p = cfg->ratelimit_for_domain; p; p = p->next) { 206 d = domain_limit_findcreate(infra, p->str); 207 if(!d) 208 return 0; 209 d->lim = atoi(p->str2); 210 } 211 for(p = cfg->ratelimit_below_domain; p; p = p->next) { 212 d = domain_limit_findcreate(infra, p->str); 213 if(!d) 214 return 0; 215 d->below = atoi(p->str2); 216 } 217 return 1; 218 } 219 220 /** setup domain limits tree (0 on failure) */ 221 static int 222 setup_domain_limits(struct infra_cache* infra, struct config_file* cfg) 223 { 224 name_tree_init(&infra->domain_limits); 225 if(!infra_ratelimit_cfg_insert(infra, cfg)) { 226 return 0; 227 } 228 name_tree_init_parents(&infra->domain_limits); 229 return 1; 230 } 231 232 struct infra_cache* 233 infra_create(struct config_file* cfg) 234 { 235 struct infra_cache* infra = (struct infra_cache*)calloc(1, 236 sizeof(struct infra_cache)); 237 size_t maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+ 238 sizeof(struct infra_data)+INFRA_BYTES_NAME); 239 if(!infra) { 240 return NULL; 241 } 242 infra->hosts = slabhash_create(cfg->infra_cache_slabs, 243 INFRA_HOST_STARTSIZE, maxmem, &infra_sizefunc, &infra_compfunc, 244 &infra_delkeyfunc, &infra_deldatafunc, NULL); 245 if(!infra->hosts) { 246 free(infra); 247 return NULL; 248 } 249 infra->host_ttl = cfg->host_ttl; 250 infra->infra_keep_probing = cfg->infra_keep_probing; 251 infra_dp_ratelimit = cfg->ratelimit; 252 infra->domain_rates = slabhash_create(cfg->ratelimit_slabs, 253 INFRA_HOST_STARTSIZE, cfg->ratelimit_size, 254 &rate_sizefunc, &rate_compfunc, &rate_delkeyfunc, 255 &rate_deldatafunc, NULL); 256 if(!infra->domain_rates) { 257 infra_delete(infra); 258 return NULL; 259 } 260 /* insert config data into ratelimits */ 261 if(!setup_domain_limits(infra, cfg)) { 262 infra_delete(infra); 263 return NULL; 264 } 265 infra_ip_ratelimit = cfg->ip_ratelimit; 266 infra->client_ip_rates = slabhash_create(cfg->ip_ratelimit_slabs, 267 INFRA_HOST_STARTSIZE, cfg->ip_ratelimit_size, &ip_rate_sizefunc, 268 &ip_rate_compfunc, &ip_rate_delkeyfunc, &ip_rate_deldatafunc, NULL); 269 if(!infra->client_ip_rates) { 270 infra_delete(infra); 271 return NULL; 272 } 273 return infra; 274 } 275 276 /** delete domain_limit entries */ 277 static void domain_limit_free(rbnode_type* n, void* ATTR_UNUSED(arg)) 278 { 279 if(n) { 280 free(((struct domain_limit_data*)n)->node.name); 281 free(n); 282 } 283 } 284 285 void 286 infra_delete(struct infra_cache* infra) 287 { 288 if(!infra) 289 return; 290 slabhash_delete(infra->hosts); 291 slabhash_delete(infra->domain_rates); 292 traverse_postorder(&infra->domain_limits, domain_limit_free, NULL); 293 slabhash_delete(infra->client_ip_rates); 294 free(infra); 295 } 296 297 struct infra_cache* 298 infra_adjust(struct infra_cache* infra, struct config_file* cfg) 299 { 300 size_t maxmem; 301 if(!infra) 302 return infra_create(cfg); 303 infra->host_ttl = cfg->host_ttl; 304 infra->infra_keep_probing = cfg->infra_keep_probing; 305 infra_dp_ratelimit = cfg->ratelimit; 306 infra_ip_ratelimit = cfg->ip_ratelimit; 307 maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+ 308 sizeof(struct infra_data)+INFRA_BYTES_NAME); 309 /* divide cachesize by slabs and multiply by slabs, because if the 310 * cachesize is not an even multiple of slabs, that is the resulting 311 * size of the slabhash */ 312 if(!slabhash_is_size(infra->hosts, maxmem, cfg->infra_cache_slabs) || 313 !slabhash_is_size(infra->domain_rates, cfg->ratelimit_size, 314 cfg->ratelimit_slabs) || 315 !slabhash_is_size(infra->client_ip_rates, cfg->ip_ratelimit_size, 316 cfg->ip_ratelimit_slabs)) { 317 infra_delete(infra); 318 infra = infra_create(cfg); 319 } else { 320 /* reapply domain limits */ 321 traverse_postorder(&infra->domain_limits, domain_limit_free, 322 NULL); 323 if(!setup_domain_limits(infra, cfg)) { 324 infra_delete(infra); 325 return NULL; 326 } 327 } 328 return infra; 329 } 330 331 /** calculate the hash value for a host key 332 * set use_port to a non-0 number to use the port in 333 * the hash calculation; 0 to ignore the port.*/ 334 static hashvalue_type 335 hash_addr(struct sockaddr_storage* addr, socklen_t addrlen, 336 int use_port) 337 { 338 hashvalue_type h = 0xab; 339 /* select the pieces to hash, some OS have changing data inside */ 340 if(addr_is_ip6(addr, addrlen)) { 341 struct sockaddr_in6* in6 = (struct sockaddr_in6*)addr; 342 h = hashlittle(&in6->sin6_family, sizeof(in6->sin6_family), h); 343 if(use_port){ 344 h = hashlittle(&in6->sin6_port, sizeof(in6->sin6_port), h); 345 } 346 h = hashlittle(&in6->sin6_addr, INET6_SIZE, h); 347 } else { 348 struct sockaddr_in* in = (struct sockaddr_in*)addr; 349 h = hashlittle(&in->sin_family, sizeof(in->sin_family), h); 350 if(use_port){ 351 h = hashlittle(&in->sin_port, sizeof(in->sin_port), h); 352 } 353 h = hashlittle(&in->sin_addr, INET_SIZE, h); 354 } 355 return h; 356 } 357 358 /** calculate infra hash for a key */ 359 static hashvalue_type 360 hash_infra(struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name) 361 { 362 return dname_query_hash(name, hash_addr(addr, addrlen, 1)); 363 } 364 365 /** lookup version that does not check host ttl (you check it) */ 366 struct lruhash_entry* 367 infra_lookup_nottl(struct infra_cache* infra, struct sockaddr_storage* addr, 368 socklen_t addrlen, uint8_t* name, size_t namelen, int wr) 369 { 370 struct infra_key k; 371 k.addrlen = addrlen; 372 memcpy(&k.addr, addr, addrlen); 373 k.namelen = namelen; 374 k.zonename = name; 375 k.entry.hash = hash_infra(addr, addrlen, name); 376 k.entry.key = (void*)&k; 377 k.entry.data = NULL; 378 return slabhash_lookup(infra->hosts, k.entry.hash, &k, wr); 379 } 380 381 /** init the data elements */ 382 static void 383 data_entry_init(struct infra_cache* infra, struct lruhash_entry* e, 384 time_t timenow) 385 { 386 struct infra_data* data = (struct infra_data*)e->data; 387 data->ttl = timenow + infra->host_ttl; 388 rtt_init(&data->rtt); 389 data->edns_version = 0; 390 data->edns_lame_known = 0; 391 data->probedelay = 0; 392 data->isdnsseclame = 0; 393 data->rec_lame = 0; 394 data->lame_type_A = 0; 395 data->lame_other = 0; 396 data->timeout_A = 0; 397 data->timeout_AAAA = 0; 398 data->timeout_other = 0; 399 } 400 401 /** 402 * Create and init a new entry for a host 403 * @param infra: infra structure with config parameters. 404 * @param addr: host address. 405 * @param addrlen: length of addr. 406 * @param name: name of zone 407 * @param namelen: length of name. 408 * @param tm: time now. 409 * @return: the new entry or NULL on malloc failure. 410 */ 411 static struct lruhash_entry* 412 new_entry(struct infra_cache* infra, struct sockaddr_storage* addr, 413 socklen_t addrlen, uint8_t* name, size_t namelen, time_t tm) 414 { 415 struct infra_data* data; 416 struct infra_key* key = (struct infra_key*)malloc(sizeof(*key)); 417 if(!key) 418 return NULL; 419 data = (struct infra_data*)malloc(sizeof(struct infra_data)); 420 if(!data) { 421 free(key); 422 return NULL; 423 } 424 key->zonename = memdup(name, namelen); 425 if(!key->zonename) { 426 free(key); 427 free(data); 428 return NULL; 429 } 430 key->namelen = namelen; 431 lock_rw_init(&key->entry.lock); 432 key->entry.hash = hash_infra(addr, addrlen, name); 433 key->entry.key = (void*)key; 434 key->entry.data = (void*)data; 435 key->addrlen = addrlen; 436 memcpy(&key->addr, addr, addrlen); 437 data_entry_init(infra, &key->entry, tm); 438 return &key->entry; 439 } 440 441 int 442 infra_host(struct infra_cache* infra, struct sockaddr_storage* addr, 443 socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow, 444 int* edns_vs, uint8_t* edns_lame_known, int* to) 445 { 446 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, 447 nm, nmlen, 0); 448 struct infra_data* data; 449 int wr = 0; 450 if(e && ((struct infra_data*)e->data)->ttl < timenow) { 451 /* it expired, try to reuse existing entry */ 452 int old = ((struct infra_data*)e->data)->rtt.rto; 453 time_t tprobe = ((struct infra_data*)e->data)->probedelay; 454 uint8_t tA = ((struct infra_data*)e->data)->timeout_A; 455 uint8_t tAAAA = ((struct infra_data*)e->data)->timeout_AAAA; 456 uint8_t tother = ((struct infra_data*)e->data)->timeout_other; 457 lock_rw_unlock(&e->lock); 458 e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1); 459 if(e) { 460 /* if its still there we have a writelock, init */ 461 /* re-initialise */ 462 /* do not touch lameness, it may be valid still */ 463 data_entry_init(infra, e, timenow); 464 wr = 1; 465 /* TOP_TIMEOUT remains on reuse */ 466 if(old >= USEFUL_SERVER_TOP_TIMEOUT) { 467 ((struct infra_data*)e->data)->rtt.rto 468 = USEFUL_SERVER_TOP_TIMEOUT; 469 ((struct infra_data*)e->data)->probedelay = tprobe; 470 ((struct infra_data*)e->data)->timeout_A = tA; 471 ((struct infra_data*)e->data)->timeout_AAAA = tAAAA; 472 ((struct infra_data*)e->data)->timeout_other = tother; 473 } 474 } 475 } 476 if(!e) { 477 /* insert new entry */ 478 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) 479 return 0; 480 data = (struct infra_data*)e->data; 481 *edns_vs = data->edns_version; 482 *edns_lame_known = data->edns_lame_known; 483 *to = rtt_timeout(&data->rtt); 484 slabhash_insert(infra->hosts, e->hash, e, data, NULL); 485 return 1; 486 } 487 /* use existing entry */ 488 data = (struct infra_data*)e->data; 489 *edns_vs = data->edns_version; 490 *edns_lame_known = data->edns_lame_known; 491 *to = rtt_timeout(&data->rtt); 492 if(*to >= PROBE_MAXRTO && (infra->infra_keep_probing || 493 rtt_notimeout(&data->rtt)*4 <= *to)) { 494 /* delay other queries, this is the probe query */ 495 if(!wr) { 496 lock_rw_unlock(&e->lock); 497 e = infra_lookup_nottl(infra, addr,addrlen,nm,nmlen, 1); 498 if(!e) { /* flushed from cache real fast, no use to 499 allocate just for the probedelay */ 500 return 1; 501 } 502 data = (struct infra_data*)e->data; 503 } 504 /* add 999 to round up the timeout value from msec to sec, 505 * then add a whole second so it is certain that this probe 506 * has timed out before the next is allowed */ 507 data->probedelay = timenow + ((*to)+1999)/1000; 508 } 509 lock_rw_unlock(&e->lock); 510 return 1; 511 } 512 513 int 514 infra_set_lame(struct infra_cache* infra, struct sockaddr_storage* addr, 515 socklen_t addrlen, uint8_t* nm, size_t nmlen, time_t timenow, 516 int dnsseclame, int reclame, uint16_t qtype) 517 { 518 struct infra_data* data; 519 struct lruhash_entry* e; 520 int needtoinsert = 0; 521 e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1); 522 if(!e) { 523 /* insert it */ 524 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) { 525 log_err("set_lame: malloc failure"); 526 return 0; 527 } 528 needtoinsert = 1; 529 } else if( ((struct infra_data*)e->data)->ttl < timenow) { 530 /* expired, reuse existing entry */ 531 data_entry_init(infra, e, timenow); 532 } 533 /* got an entry, now set the zone lame */ 534 data = (struct infra_data*)e->data; 535 /* merge data (if any) */ 536 if(dnsseclame) 537 data->isdnsseclame = 1; 538 if(reclame) 539 data->rec_lame = 1; 540 if(!dnsseclame && !reclame && qtype == LDNS_RR_TYPE_A) 541 data->lame_type_A = 1; 542 if(!dnsseclame && !reclame && qtype != LDNS_RR_TYPE_A) 543 data->lame_other = 1; 544 /* done */ 545 if(needtoinsert) 546 slabhash_insert(infra->hosts, e->hash, e, e->data, NULL); 547 else { lock_rw_unlock(&e->lock); } 548 return 1; 549 } 550 551 void 552 infra_update_tcp_works(struct infra_cache* infra, 553 struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm, 554 size_t nmlen) 555 { 556 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, 557 nm, nmlen, 1); 558 struct infra_data* data; 559 if(!e) 560 return; /* doesn't exist */ 561 data = (struct infra_data*)e->data; 562 if(data->rtt.rto >= RTT_MAX_TIMEOUT) 563 /* do not disqualify this server altogether, it is better 564 * than nothing */ 565 data->rtt.rto = RTT_MAX_TIMEOUT-1000; 566 lock_rw_unlock(&e->lock); 567 } 568 569 int 570 infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr, 571 socklen_t addrlen, uint8_t* nm, size_t nmlen, int qtype, 572 int roundtrip, int orig_rtt, time_t timenow) 573 { 574 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, 575 nm, nmlen, 1); 576 struct infra_data* data; 577 int needtoinsert = 0, expired = 0; 578 int rto = 1; 579 time_t oldprobedelay = 0; 580 if(!e) { 581 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) 582 return 0; 583 needtoinsert = 1; 584 } else if(((struct infra_data*)e->data)->ttl < timenow) { 585 oldprobedelay = ((struct infra_data*)e->data)->probedelay; 586 data_entry_init(infra, e, timenow); 587 expired = 1; 588 } 589 /* have an entry, update the rtt */ 590 data = (struct infra_data*)e->data; 591 if(roundtrip == -1) { 592 if(needtoinsert || expired) { 593 /* timeout on entry that has expired before the timer 594 * keep old timeout from the function caller */ 595 data->rtt.rto = orig_rtt; 596 data->probedelay = oldprobedelay; 597 } 598 rtt_lost(&data->rtt, orig_rtt); 599 if(qtype == LDNS_RR_TYPE_A) { 600 if(data->timeout_A < TIMEOUT_COUNT_MAX) 601 data->timeout_A++; 602 } else if(qtype == LDNS_RR_TYPE_AAAA) { 603 if(data->timeout_AAAA < TIMEOUT_COUNT_MAX) 604 data->timeout_AAAA++; 605 } else { 606 if(data->timeout_other < TIMEOUT_COUNT_MAX) 607 data->timeout_other++; 608 } 609 } else { 610 /* if we got a reply, but the old timeout was above server 611 * selection height, delete the timeout so the server is 612 * fully available again */ 613 if(rtt_unclamped(&data->rtt) >= USEFUL_SERVER_TOP_TIMEOUT) 614 rtt_init(&data->rtt); 615 rtt_update(&data->rtt, roundtrip); 616 data->probedelay = 0; 617 if(qtype == LDNS_RR_TYPE_A) 618 data->timeout_A = 0; 619 else if(qtype == LDNS_RR_TYPE_AAAA) 620 data->timeout_AAAA = 0; 621 else data->timeout_other = 0; 622 } 623 if(data->rtt.rto > 0) 624 rto = data->rtt.rto; 625 626 if(needtoinsert) 627 slabhash_insert(infra->hosts, e->hash, e, e->data, NULL); 628 else { lock_rw_unlock(&e->lock); } 629 return rto; 630 } 631 632 long long infra_get_host_rto(struct infra_cache* infra, 633 struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm, 634 size_t nmlen, struct rtt_info* rtt, int* delay, time_t timenow, 635 int* tA, int* tAAAA, int* tother) 636 { 637 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, 638 nm, nmlen, 0); 639 struct infra_data* data; 640 long long ttl = -2; 641 if(!e) return -1; 642 data = (struct infra_data*)e->data; 643 if(data->ttl >= timenow) { 644 ttl = (long long)(data->ttl - timenow); 645 memmove(rtt, &data->rtt, sizeof(*rtt)); 646 if(timenow < data->probedelay) 647 *delay = (int)(data->probedelay - timenow); 648 else *delay = 0; 649 } 650 *tA = (int)data->timeout_A; 651 *tAAAA = (int)data->timeout_AAAA; 652 *tother = (int)data->timeout_other; 653 lock_rw_unlock(&e->lock); 654 return ttl; 655 } 656 657 int 658 infra_edns_update(struct infra_cache* infra, struct sockaddr_storage* addr, 659 socklen_t addrlen, uint8_t* nm, size_t nmlen, int edns_version, 660 time_t timenow) 661 { 662 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, 663 nm, nmlen, 1); 664 struct infra_data* data; 665 int needtoinsert = 0; 666 if(!e) { 667 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) 668 return 0; 669 needtoinsert = 1; 670 } else if(((struct infra_data*)e->data)->ttl < timenow) { 671 data_entry_init(infra, e, timenow); 672 } 673 /* have an entry, update the rtt, and the ttl */ 674 data = (struct infra_data*)e->data; 675 /* do not update if noEDNS and stored is yesEDNS */ 676 if(!(edns_version == -1 && (data->edns_version != -1 && 677 data->edns_lame_known))) { 678 data->edns_version = edns_version; 679 data->edns_lame_known = 1; 680 } 681 682 if(needtoinsert) 683 slabhash_insert(infra->hosts, e->hash, e, e->data, NULL); 684 else { lock_rw_unlock(&e->lock); } 685 return 1; 686 } 687 688 int 689 infra_get_lame_rtt(struct infra_cache* infra, 690 struct sockaddr_storage* addr, socklen_t addrlen, 691 uint8_t* name, size_t namelen, uint16_t qtype, 692 int* lame, int* dnsseclame, int* reclame, int* rtt, time_t timenow) 693 { 694 struct infra_data* host; 695 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, 696 name, namelen, 0); 697 if(!e) 698 return 0; 699 host = (struct infra_data*)e->data; 700 *rtt = rtt_unclamped(&host->rtt); 701 if(host->rtt.rto >= PROBE_MAXRTO && timenow >= host->probedelay 702 && infra->infra_keep_probing) { 703 /* single probe, keep probing */ 704 if(*rtt >= USEFUL_SERVER_TOP_TIMEOUT) 705 *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; 706 } else if(host->rtt.rto >= PROBE_MAXRTO && timenow < host->probedelay 707 && rtt_notimeout(&host->rtt)*4 <= host->rtt.rto) { 708 /* single probe for this domain, and we are not probing */ 709 /* unless the query type allows a probe to happen */ 710 if(qtype == LDNS_RR_TYPE_A) { 711 if(host->timeout_A >= TIMEOUT_COUNT_MAX) 712 *rtt = USEFUL_SERVER_TOP_TIMEOUT; 713 else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; 714 } else if(qtype == LDNS_RR_TYPE_AAAA) { 715 if(host->timeout_AAAA >= TIMEOUT_COUNT_MAX) 716 *rtt = USEFUL_SERVER_TOP_TIMEOUT; 717 else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; 718 } else { 719 if(host->timeout_other >= TIMEOUT_COUNT_MAX) 720 *rtt = USEFUL_SERVER_TOP_TIMEOUT; 721 else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; 722 } 723 } 724 /* expired entry */ 725 if(timenow > host->ttl) { 726 727 /* see if this can be a re-probe of an unresponsive server */ 728 /* minus 1000 because that is outside of the RTTBAND, so 729 * blacklisted servers stay blacklisted if this is chosen */ 730 if(host->rtt.rto >= USEFUL_SERVER_TOP_TIMEOUT) { 731 lock_rw_unlock(&e->lock); 732 *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; 733 *lame = 0; 734 *dnsseclame = 0; 735 *reclame = 0; 736 return 1; 737 } 738 lock_rw_unlock(&e->lock); 739 return 0; 740 } 741 /* check lameness first */ 742 if(host->lame_type_A && qtype == LDNS_RR_TYPE_A) { 743 lock_rw_unlock(&e->lock); 744 *lame = 1; 745 *dnsseclame = 0; 746 *reclame = 0; 747 return 1; 748 } else if(host->lame_other && qtype != LDNS_RR_TYPE_A) { 749 lock_rw_unlock(&e->lock); 750 *lame = 1; 751 *dnsseclame = 0; 752 *reclame = 0; 753 return 1; 754 } else if(host->isdnsseclame) { 755 lock_rw_unlock(&e->lock); 756 *lame = 0; 757 *dnsseclame = 1; 758 *reclame = 0; 759 return 1; 760 } else if(host->rec_lame) { 761 lock_rw_unlock(&e->lock); 762 *lame = 0; 763 *dnsseclame = 0; 764 *reclame = 1; 765 return 1; 766 } 767 /* no lameness for this type of query */ 768 lock_rw_unlock(&e->lock); 769 *lame = 0; 770 *dnsseclame = 0; 771 *reclame = 0; 772 return 1; 773 } 774 775 int infra_find_ratelimit(struct infra_cache* infra, uint8_t* name, 776 size_t namelen) 777 { 778 int labs = dname_count_labels(name); 779 struct domain_limit_data* d = (struct domain_limit_data*) 780 name_tree_lookup(&infra->domain_limits, name, namelen, labs, 781 LDNS_RR_CLASS_IN); 782 if(!d) return infra_dp_ratelimit; 783 784 if(d->node.labs == labs && d->lim != -1) 785 return d->lim; /* exact match */ 786 787 /* find 'below match' */ 788 if(d->node.labs == labs) 789 d = (struct domain_limit_data*)d->node.parent; 790 while(d) { 791 if(d->below != -1) 792 return d->below; 793 d = (struct domain_limit_data*)d->node.parent; 794 } 795 return infra_dp_ratelimit; 796 } 797 798 size_t ip_rate_sizefunc(void* k, void* ATTR_UNUSED(d)) 799 { 800 struct ip_rate_key* key = (struct ip_rate_key*)k; 801 return sizeof(*key) + sizeof(struct ip_rate_data) 802 + lock_get_mem(&key->entry.lock); 803 } 804 805 int ip_rate_compfunc(void* key1, void* key2) 806 { 807 struct ip_rate_key* k1 = (struct ip_rate_key*)key1; 808 struct ip_rate_key* k2 = (struct ip_rate_key*)key2; 809 return sockaddr_cmp_addr(&k1->addr, k1->addrlen, 810 &k2->addr, k2->addrlen); 811 } 812 813 void ip_rate_delkeyfunc(void* k, void* ATTR_UNUSED(arg)) 814 { 815 struct ip_rate_key* key = (struct ip_rate_key*)k; 816 if(!key) 817 return; 818 lock_rw_destroy(&key->entry.lock); 819 free(key); 820 } 821 822 /** find data item in array, for write access, caller unlocks */ 823 static struct lruhash_entry* infra_find_ratedata(struct infra_cache* infra, 824 uint8_t* name, size_t namelen, int wr) 825 { 826 struct rate_key key; 827 hashvalue_type h = dname_query_hash(name, 0xab); 828 memset(&key, 0, sizeof(key)); 829 key.name = name; 830 key.namelen = namelen; 831 key.entry.hash = h; 832 return slabhash_lookup(infra->domain_rates, h, &key, wr); 833 } 834 835 /** find data item in array for ip addresses */ 836 static struct lruhash_entry* infra_find_ip_ratedata(struct infra_cache* infra, 837 struct sockaddr_storage* addr, socklen_t addrlen, int wr) 838 { 839 struct ip_rate_key key; 840 hashvalue_type h = hash_addr(addr, addrlen, 0); 841 memset(&key, 0, sizeof(key)); 842 key.addr = *addr; 843 key.addrlen = addrlen; 844 key.entry.hash = h; 845 return slabhash_lookup(infra->client_ip_rates, h, &key, wr); 846 } 847 848 /** create rate data item for name, number 1 in now */ 849 static void infra_create_ratedata(struct infra_cache* infra, 850 uint8_t* name, size_t namelen, time_t timenow) 851 { 852 hashvalue_type h = dname_query_hash(name, 0xab); 853 struct rate_key* k = (struct rate_key*)calloc(1, sizeof(*k)); 854 struct rate_data* d = (struct rate_data*)calloc(1, sizeof(*d)); 855 if(!k || !d) { 856 free(k); 857 free(d); 858 return; /* alloc failure */ 859 } 860 k->namelen = namelen; 861 k->name = memdup(name, namelen); 862 if(!k->name) { 863 free(k); 864 free(d); 865 return; /* alloc failure */ 866 } 867 lock_rw_init(&k->entry.lock); 868 k->entry.hash = h; 869 k->entry.key = k; 870 k->entry.data = d; 871 d->qps[0] = 1; 872 d->timestamp[0] = timenow; 873 slabhash_insert(infra->domain_rates, h, &k->entry, d, NULL); 874 } 875 876 /** create rate data item for ip address */ 877 static void infra_ip_create_ratedata(struct infra_cache* infra, 878 struct sockaddr_storage* addr, socklen_t addrlen, time_t timenow) 879 { 880 hashvalue_type h = hash_addr(addr, addrlen, 0); 881 struct ip_rate_key* k = (struct ip_rate_key*)calloc(1, sizeof(*k)); 882 struct ip_rate_data* d = (struct ip_rate_data*)calloc(1, sizeof(*d)); 883 if(!k || !d) { 884 free(k); 885 free(d); 886 return; /* alloc failure */ 887 } 888 k->addr = *addr; 889 k->addrlen = addrlen; 890 lock_rw_init(&k->entry.lock); 891 k->entry.hash = h; 892 k->entry.key = k; 893 k->entry.data = d; 894 d->qps[0] = 1; 895 d->timestamp[0] = timenow; 896 slabhash_insert(infra->client_ip_rates, h, &k->entry, d, NULL); 897 } 898 899 /** Find the second and return its rate counter. If none and should_add, remove 900 * oldest to accommodate. Else return none. */ 901 static int* infra_rate_find_second_or_none(void* data, time_t t, int should_add) 902 { 903 struct rate_data* d = (struct rate_data*)data; 904 int i, oldest; 905 for(i=0; i<RATE_WINDOW; i++) { 906 if(d->timestamp[i] == t) 907 return &(d->qps[i]); 908 } 909 if(!should_add) return NULL; 910 /* remove oldest timestamp, and insert it at t with 0 qps */ 911 oldest = 0; 912 for(i=0; i<RATE_WINDOW; i++) { 913 if(d->timestamp[i] < d->timestamp[oldest]) 914 oldest = i; 915 } 916 d->timestamp[oldest] = t; 917 d->qps[oldest] = 0; 918 return &(d->qps[oldest]); 919 } 920 921 /** find the second and return its rate counter, if none, remove oldest to 922 * accommodate */ 923 static int* infra_rate_give_second(void* data, time_t t) 924 { 925 return infra_rate_find_second_or_none(data, t, 1); 926 } 927 928 /** find the second and return its rate counter only if it exists. Caller 929 * should check for NULL return value */ 930 static int* infra_rate_get_second(void* data, time_t t) 931 { 932 return infra_rate_find_second_or_none(data, t, 0); 933 } 934 935 int infra_rate_max(void* data, time_t now, int backoff) 936 { 937 struct rate_data* d = (struct rate_data*)data; 938 int i, max = 0; 939 for(i=0; i<RATE_WINDOW; i++) { 940 if(backoff) { 941 if(now-d->timestamp[i] <= RATE_WINDOW && 942 d->qps[i] > max) { 943 max = d->qps[i]; 944 } 945 } else { 946 if(now == d->timestamp[i]) { 947 return d->qps[i]; 948 } 949 } 950 } 951 return max; 952 } 953 954 int infra_ratelimit_inc(struct infra_cache* infra, uint8_t* name, 955 size_t namelen, time_t timenow, int backoff, struct query_info* qinfo, 956 struct comm_reply* replylist) 957 { 958 int lim, max; 959 struct lruhash_entry* entry; 960 961 if(!infra_dp_ratelimit) 962 return 1; /* not enabled */ 963 964 /* find ratelimit */ 965 lim = infra_find_ratelimit(infra, name, namelen); 966 if(!lim) 967 return 1; /* disabled for this domain */ 968 969 /* find or insert ratedata */ 970 entry = infra_find_ratedata(infra, name, namelen, 1); 971 if(entry) { 972 int premax = infra_rate_max(entry->data, timenow, backoff); 973 int* cur = infra_rate_give_second(entry->data, timenow); 974 (*cur)++; 975 max = infra_rate_max(entry->data, timenow, backoff); 976 lock_rw_unlock(&entry->lock); 977 978 if(premax <= lim && max > lim) { 979 char buf[257], qnm[257], ts[12], cs[12], ip[128]; 980 dname_str(name, buf); 981 dname_str(qinfo->qname, qnm); 982 sldns_wire2str_type_buf(qinfo->qtype, ts, sizeof(ts)); 983 sldns_wire2str_class_buf(qinfo->qclass, cs, sizeof(cs)); 984 ip[0]=0; 985 if(replylist) { 986 addr_to_str((struct sockaddr_storage *)&replylist->remote_addr, 987 replylist->remote_addrlen, ip, sizeof(ip)); 988 verbose(VERB_OPS, "ratelimit exceeded %s %d query %s %s %s from %s", buf, lim, qnm, cs, ts, ip); 989 } else { 990 verbose(VERB_OPS, "ratelimit exceeded %s %d query %s %s %s", buf, lim, qnm, cs, ts); 991 } 992 } 993 return (max <= lim); 994 } 995 996 /* create */ 997 infra_create_ratedata(infra, name, namelen, timenow); 998 return (1 <= lim); 999 } 1000 1001 void infra_ratelimit_dec(struct infra_cache* infra, uint8_t* name, 1002 size_t namelen, time_t timenow) 1003 { 1004 struct lruhash_entry* entry; 1005 int* cur; 1006 if(!infra_dp_ratelimit) 1007 return; /* not enabled */ 1008 entry = infra_find_ratedata(infra, name, namelen, 1); 1009 if(!entry) return; /* not cached */ 1010 cur = infra_rate_get_second(entry->data, timenow); 1011 if(cur == NULL) { 1012 /* our timenow is not available anymore; nothing to decrease */ 1013 lock_rw_unlock(&entry->lock); 1014 return; 1015 } 1016 if((*cur) > 0) 1017 (*cur)--; 1018 lock_rw_unlock(&entry->lock); 1019 } 1020 1021 int infra_ratelimit_exceeded(struct infra_cache* infra, uint8_t* name, 1022 size_t namelen, time_t timenow, int backoff) 1023 { 1024 struct lruhash_entry* entry; 1025 int lim, max; 1026 if(!infra_dp_ratelimit) 1027 return 0; /* not enabled */ 1028 1029 /* find ratelimit */ 1030 lim = infra_find_ratelimit(infra, name, namelen); 1031 if(!lim) 1032 return 0; /* disabled for this domain */ 1033 1034 /* find current rate */ 1035 entry = infra_find_ratedata(infra, name, namelen, 0); 1036 if(!entry) 1037 return 0; /* not cached */ 1038 max = infra_rate_max(entry->data, timenow, backoff); 1039 lock_rw_unlock(&entry->lock); 1040 1041 return (max > lim); 1042 } 1043 1044 size_t 1045 infra_get_mem(struct infra_cache* infra) 1046 { 1047 size_t s = sizeof(*infra) + slabhash_get_mem(infra->hosts); 1048 if(infra->domain_rates) s += slabhash_get_mem(infra->domain_rates); 1049 if(infra->client_ip_rates) s += slabhash_get_mem(infra->client_ip_rates); 1050 /* ignore domain_limits because walk through tree is big */ 1051 return s; 1052 } 1053 1054 int infra_ip_ratelimit_inc(struct infra_cache* infra, 1055 struct sockaddr_storage* addr, socklen_t addrlen, time_t timenow, 1056 int backoff, struct sldns_buffer* buffer) 1057 { 1058 int max; 1059 struct lruhash_entry* entry; 1060 1061 /* not enabled */ 1062 if(!infra_ip_ratelimit) { 1063 return 1; 1064 } 1065 /* find or insert ratedata */ 1066 entry = infra_find_ip_ratedata(infra, addr, addrlen, 1); 1067 if(entry) { 1068 int premax = infra_rate_max(entry->data, timenow, backoff); 1069 int* cur = infra_rate_give_second(entry->data, timenow); 1070 (*cur)++; 1071 max = infra_rate_max(entry->data, timenow, backoff); 1072 lock_rw_unlock(&entry->lock); 1073 1074 if(premax <= infra_ip_ratelimit && max > infra_ip_ratelimit) { 1075 char client_ip[128], qnm[LDNS_MAX_DOMAINLEN+1+12+12]; 1076 addr_to_str(addr, addrlen, client_ip, sizeof(client_ip)); 1077 qnm[0]=0; 1078 if(sldns_buffer_limit(buffer)>LDNS_HEADER_SIZE && 1079 LDNS_QDCOUNT(sldns_buffer_begin(buffer))!=0) { 1080 (void)sldns_wire2str_rrquestion_buf( 1081 sldns_buffer_at(buffer, LDNS_HEADER_SIZE), 1082 sldns_buffer_limit(buffer)-LDNS_HEADER_SIZE, 1083 qnm, sizeof(qnm)); 1084 if(strlen(qnm)>0 && qnm[strlen(qnm)-1]=='\n') 1085 qnm[strlen(qnm)-1] = 0; /*remove newline*/ 1086 if(strchr(qnm, '\t')) 1087 *strchr(qnm, '\t') = ' '; 1088 if(strchr(qnm, '\t')) 1089 *strchr(qnm, '\t') = ' '; 1090 verbose(VERB_OPS, "ip_ratelimit exceeded %s %d %s", 1091 client_ip, infra_ip_ratelimit, qnm); 1092 } else { 1093 verbose(VERB_OPS, "ip_ratelimit exceeded %s %d (no query name)", 1094 client_ip, infra_ip_ratelimit); 1095 } 1096 } 1097 return (max <= infra_ip_ratelimit); 1098 } 1099 1100 /* create */ 1101 infra_ip_create_ratedata(infra, addr, addrlen, timenow); 1102 return 1; 1103 } 1104