1 /* 2 * services/cache/infra.c - infrastructure cache, server rtt and capabilities 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE 27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file contains the infrastructure cache. 40 */ 41 #include "config.h" 42 #include <ldns/rr.h> 43 #include "services/cache/infra.h" 44 #include "util/storage/slabhash.h" 45 #include "util/storage/lookup3.h" 46 #include "util/data/dname.h" 47 #include "util/log.h" 48 #include "util/net_help.h" 49 #include "util/config_file.h" 50 #include "iterator/iterator.h" 51 52 /** Timeout when only a single probe query per IP is allowed. */ 53 #define PROBE_MAXRTO 12000 /* in msec */ 54 55 /** number of timeouts for a type when the domain can be blocked ; 56 * even if another type has completely rtt maxed it, the different type 57 * can do this number of packets (until those all timeout too) */ 58 #define TIMEOUT_COUNT_MAX 3 59 60 size_t 61 infra_sizefunc(void* k, void* ATTR_UNUSED(d)) 62 { 63 struct infra_key* key = (struct infra_key*)k; 64 return sizeof(*key) + sizeof(struct infra_data) + key->namelen 65 + lock_get_mem(&key->entry.lock); 66 } 67 68 int 69 infra_compfunc(void* key1, void* key2) 70 { 71 struct infra_key* k1 = (struct infra_key*)key1; 72 struct infra_key* k2 = (struct infra_key*)key2; 73 int r = sockaddr_cmp(&k1->addr, k1->addrlen, &k2->addr, k2->addrlen); 74 if(r != 0) 75 return r; 76 if(k1->namelen != k2->namelen) { 77 if(k1->namelen < k2->namelen) 78 return -1; 79 return 1; 80 } 81 return query_dname_compare(k1->zonename, k2->zonename); 82 } 83 84 void 85 infra_delkeyfunc(void* k, void* ATTR_UNUSED(arg)) 86 { 87 struct infra_key* key = (struct infra_key*)k; 88 if(!key) 89 return; 90 lock_rw_destroy(&key->entry.lock); 91 free(key->zonename); 92 free(key); 93 } 94 95 void 96 infra_deldatafunc(void* d, void* ATTR_UNUSED(arg)) 97 { 98 struct infra_data* data = (struct infra_data*)d; 99 free(data); 100 } 101 102 struct infra_cache* 103 infra_create(struct config_file* cfg) 104 { 105 struct infra_cache* infra = (struct infra_cache*)calloc(1, 106 sizeof(struct infra_cache)); 107 size_t maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+ 108 sizeof(struct infra_data)+INFRA_BYTES_NAME); 109 infra->hosts = slabhash_create(cfg->infra_cache_slabs, 110 INFRA_HOST_STARTSIZE, maxmem, &infra_sizefunc, &infra_compfunc, 111 &infra_delkeyfunc, &infra_deldatafunc, NULL); 112 if(!infra->hosts) { 113 free(infra); 114 return NULL; 115 } 116 infra->host_ttl = cfg->host_ttl; 117 return infra; 118 } 119 120 void 121 infra_delete(struct infra_cache* infra) 122 { 123 if(!infra) 124 return; 125 slabhash_delete(infra->hosts); 126 free(infra); 127 } 128 129 struct infra_cache* 130 infra_adjust(struct infra_cache* infra, struct config_file* cfg) 131 { 132 size_t maxmem; 133 if(!infra) 134 return infra_create(cfg); 135 infra->host_ttl = cfg->host_ttl; 136 maxmem = cfg->infra_cache_numhosts * (sizeof(struct infra_key)+ 137 sizeof(struct infra_data)+INFRA_BYTES_NAME); 138 if(maxmem != slabhash_get_size(infra->hosts) || 139 cfg->infra_cache_slabs != infra->hosts->size) { 140 infra_delete(infra); 141 infra = infra_create(cfg); 142 } 143 return infra; 144 } 145 146 /** calculate the hash value for a host key */ 147 static hashvalue_t 148 hash_addr(struct sockaddr_storage* addr, socklen_t addrlen) 149 { 150 hashvalue_t h = 0xab; 151 /* select the pieces to hash, some OS have changing data inside */ 152 if(addr_is_ip6(addr, addrlen)) { 153 struct sockaddr_in6* in6 = (struct sockaddr_in6*)addr; 154 h = hashlittle(&in6->sin6_family, sizeof(in6->sin6_family), h); 155 h = hashlittle(&in6->sin6_port, sizeof(in6->sin6_port), h); 156 h = hashlittle(&in6->sin6_addr, INET6_SIZE, h); 157 } else { 158 struct sockaddr_in* in = (struct sockaddr_in*)addr; 159 h = hashlittle(&in->sin_family, sizeof(in->sin_family), h); 160 h = hashlittle(&in->sin_port, sizeof(in->sin_port), h); 161 h = hashlittle(&in->sin_addr, INET_SIZE, h); 162 } 163 return h; 164 } 165 166 /** calculate infra hash for a key */ 167 static hashvalue_t 168 hash_infra(struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* name) 169 { 170 return dname_query_hash(name, hash_addr(addr, addrlen)); 171 } 172 173 /** lookup version that does not check host ttl (you check it) */ 174 struct lruhash_entry* 175 infra_lookup_nottl(struct infra_cache* infra, struct sockaddr_storage* addr, 176 socklen_t addrlen, uint8_t* name, size_t namelen, int wr) 177 { 178 struct infra_key k; 179 k.addrlen = addrlen; 180 memcpy(&k.addr, addr, addrlen); 181 k.namelen = namelen; 182 k.zonename = name; 183 k.entry.hash = hash_infra(addr, addrlen, name); 184 k.entry.key = (void*)&k; 185 k.entry.data = NULL; 186 return slabhash_lookup(infra->hosts, k.entry.hash, &k, wr); 187 } 188 189 /** init the data elements */ 190 static void 191 data_entry_init(struct infra_cache* infra, struct lruhash_entry* e, 192 uint32_t timenow) 193 { 194 struct infra_data* data = (struct infra_data*)e->data; 195 data->ttl = timenow + infra->host_ttl; 196 rtt_init(&data->rtt); 197 data->edns_version = 0; 198 data->edns_lame_known = 0; 199 data->probedelay = 0; 200 data->isdnsseclame = 0; 201 data->rec_lame = 0; 202 data->lame_type_A = 0; 203 data->lame_other = 0; 204 data->timeout_A = 0; 205 data->timeout_AAAA = 0; 206 data->timeout_other = 0; 207 } 208 209 /** 210 * Create and init a new entry for a host 211 * @param infra: infra structure with config parameters. 212 * @param addr: host address. 213 * @param addrlen: length of addr. 214 * @param name: name of zone 215 * @param namelen: length of name. 216 * @param tm: time now. 217 * @return: the new entry or NULL on malloc failure. 218 */ 219 static struct lruhash_entry* 220 new_entry(struct infra_cache* infra, struct sockaddr_storage* addr, 221 socklen_t addrlen, uint8_t* name, size_t namelen, uint32_t tm) 222 { 223 struct infra_data* data; 224 struct infra_key* key = (struct infra_key*)malloc(sizeof(*key)); 225 if(!key) 226 return NULL; 227 data = (struct infra_data*)malloc(sizeof(struct infra_data)); 228 if(!data) { 229 free(key); 230 return NULL; 231 } 232 key->zonename = memdup(name, namelen); 233 if(!key->zonename) { 234 free(key); 235 free(data); 236 return NULL; 237 } 238 key->namelen = namelen; 239 lock_rw_init(&key->entry.lock); 240 key->entry.hash = hash_infra(addr, addrlen, name); 241 key->entry.key = (void*)key; 242 key->entry.data = (void*)data; 243 key->addrlen = addrlen; 244 memcpy(&key->addr, addr, addrlen); 245 data_entry_init(infra, &key->entry, tm); 246 return &key->entry; 247 } 248 249 int 250 infra_host(struct infra_cache* infra, struct sockaddr_storage* addr, 251 socklen_t addrlen, uint8_t* nm, size_t nmlen, uint32_t timenow, 252 int* edns_vs, uint8_t* edns_lame_known, int* to) 253 { 254 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, 255 nm, nmlen, 0); 256 struct infra_data* data; 257 int wr = 0; 258 if(e && ((struct infra_data*)e->data)->ttl < timenow) { 259 /* it expired, try to reuse existing entry */ 260 int old = ((struct infra_data*)e->data)->rtt.rto; 261 uint8_t tA = ((struct infra_data*)e->data)->timeout_A; 262 uint8_t tAAAA = ((struct infra_data*)e->data)->timeout_AAAA; 263 uint8_t tother = ((struct infra_data*)e->data)->timeout_other; 264 lock_rw_unlock(&e->lock); 265 e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1); 266 if(e) { 267 /* if its still there we have a writelock, init */ 268 /* re-initialise */ 269 /* do not touch lameness, it may be valid still */ 270 data_entry_init(infra, e, timenow); 271 wr = 1; 272 /* TOP_TIMEOUT remains on reuse */ 273 if(old >= USEFUL_SERVER_TOP_TIMEOUT) { 274 ((struct infra_data*)e->data)->rtt.rto 275 = USEFUL_SERVER_TOP_TIMEOUT; 276 ((struct infra_data*)e->data)->timeout_A = tA; 277 ((struct infra_data*)e->data)->timeout_AAAA = tAAAA; 278 ((struct infra_data*)e->data)->timeout_other = tother; 279 } 280 } 281 } 282 if(!e) { 283 /* insert new entry */ 284 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) 285 return 0; 286 data = (struct infra_data*)e->data; 287 *edns_vs = data->edns_version; 288 *edns_lame_known = data->edns_lame_known; 289 *to = rtt_timeout(&data->rtt); 290 slabhash_insert(infra->hosts, e->hash, e, data, NULL); 291 return 1; 292 } 293 /* use existing entry */ 294 data = (struct infra_data*)e->data; 295 *edns_vs = data->edns_version; 296 *edns_lame_known = data->edns_lame_known; 297 *to = rtt_timeout(&data->rtt); 298 if(*to >= PROBE_MAXRTO && rtt_notimeout(&data->rtt)*4 <= *to) { 299 /* delay other queries, this is the probe query */ 300 if(!wr) { 301 lock_rw_unlock(&e->lock); 302 e = infra_lookup_nottl(infra, addr,addrlen,nm,nmlen, 1); 303 if(!e) { /* flushed from cache real fast, no use to 304 allocate just for the probedelay */ 305 return 1; 306 } 307 data = (struct infra_data*)e->data; 308 } 309 /* add 999 to round up the timeout value from msec to sec, 310 * then add a whole second so it is certain that this probe 311 * has timed out before the next is allowed */ 312 data->probedelay = timenow + ((*to)+1999)/1000; 313 } 314 lock_rw_unlock(&e->lock); 315 return 1; 316 } 317 318 int 319 infra_set_lame(struct infra_cache* infra, struct sockaddr_storage* addr, 320 socklen_t addrlen, uint8_t* nm, size_t nmlen, uint32_t timenow, 321 int dnsseclame, int reclame, uint16_t qtype) 322 { 323 struct infra_data* data; 324 struct lruhash_entry* e; 325 int needtoinsert = 0; 326 e = infra_lookup_nottl(infra, addr, addrlen, nm, nmlen, 1); 327 if(!e) { 328 /* insert it */ 329 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) { 330 log_err("set_lame: malloc failure"); 331 return 0; 332 } 333 needtoinsert = 1; 334 } else if( ((struct infra_data*)e->data)->ttl < timenow) { 335 /* expired, reuse existing entry */ 336 data_entry_init(infra, e, timenow); 337 } 338 /* got an entry, now set the zone lame */ 339 data = (struct infra_data*)e->data; 340 /* merge data (if any) */ 341 if(dnsseclame) 342 data->isdnsseclame = 1; 343 if(reclame) 344 data->rec_lame = 1; 345 if(!dnsseclame && !reclame && qtype == LDNS_RR_TYPE_A) 346 data->lame_type_A = 1; 347 if(!dnsseclame && !reclame && qtype != LDNS_RR_TYPE_A) 348 data->lame_other = 1; 349 /* done */ 350 if(needtoinsert) 351 slabhash_insert(infra->hosts, e->hash, e, e->data, NULL); 352 else { lock_rw_unlock(&e->lock); } 353 return 1; 354 } 355 356 void 357 infra_update_tcp_works(struct infra_cache* infra, 358 struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm, 359 size_t nmlen) 360 { 361 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, 362 nm, nmlen, 1); 363 struct infra_data* data; 364 if(!e) 365 return; /* doesn't exist */ 366 data = (struct infra_data*)e->data; 367 if(data->rtt.rto >= RTT_MAX_TIMEOUT) 368 /* do not disqualify this server altogether, it is better 369 * than nothing */ 370 data->rtt.rto = RTT_MAX_TIMEOUT-1000; 371 lock_rw_unlock(&e->lock); 372 } 373 374 int 375 infra_rtt_update(struct infra_cache* infra, struct sockaddr_storage* addr, 376 socklen_t addrlen, uint8_t* nm, size_t nmlen, int qtype, 377 int roundtrip, int orig_rtt, uint32_t timenow) 378 { 379 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, 380 nm, nmlen, 1); 381 struct infra_data* data; 382 int needtoinsert = 0; 383 int rto = 1; 384 if(!e) { 385 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) 386 return 0; 387 needtoinsert = 1; 388 } else if(((struct infra_data*)e->data)->ttl < timenow) { 389 data_entry_init(infra, e, timenow); 390 } 391 /* have an entry, update the rtt */ 392 data = (struct infra_data*)e->data; 393 if(roundtrip == -1) { 394 rtt_lost(&data->rtt, orig_rtt); 395 if(qtype == LDNS_RR_TYPE_A) { 396 if(data->timeout_A < TIMEOUT_COUNT_MAX) 397 data->timeout_A++; 398 } else if(qtype == LDNS_RR_TYPE_AAAA) { 399 if(data->timeout_AAAA < TIMEOUT_COUNT_MAX) 400 data->timeout_AAAA++; 401 } else { 402 if(data->timeout_other < TIMEOUT_COUNT_MAX) 403 data->timeout_other++; 404 } 405 } else { 406 /* if we got a reply, but the old timeout was above server 407 * selection height, delete the timeout so the server is 408 * fully available again */ 409 if(rtt_unclamped(&data->rtt) >= USEFUL_SERVER_TOP_TIMEOUT) 410 rtt_init(&data->rtt); 411 rtt_update(&data->rtt, roundtrip); 412 data->probedelay = 0; 413 if(qtype == LDNS_RR_TYPE_A) 414 data->timeout_A = 0; 415 else if(qtype == LDNS_RR_TYPE_AAAA) 416 data->timeout_AAAA = 0; 417 else data->timeout_other = 0; 418 } 419 if(data->rtt.rto > 0) 420 rto = data->rtt.rto; 421 422 if(needtoinsert) 423 slabhash_insert(infra->hosts, e->hash, e, e->data, NULL); 424 else { lock_rw_unlock(&e->lock); } 425 return rto; 426 } 427 428 int infra_get_host_rto(struct infra_cache* infra, 429 struct sockaddr_storage* addr, socklen_t addrlen, uint8_t* nm, 430 size_t nmlen, struct rtt_info* rtt, int* delay, uint32_t timenow, 431 int* tA, int* tAAAA, int* tother) 432 { 433 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, 434 nm, nmlen, 0); 435 struct infra_data* data; 436 int ttl = -2; 437 if(!e) return -1; 438 data = (struct infra_data*)e->data; 439 if(data->ttl >= timenow) { 440 ttl = (int)(data->ttl - timenow); 441 memmove(rtt, &data->rtt, sizeof(*rtt)); 442 if(timenow < data->probedelay) 443 *delay = (int)(data->probedelay - timenow); 444 else *delay = 0; 445 } 446 *tA = (int)data->timeout_A; 447 *tAAAA = (int)data->timeout_AAAA; 448 *tother = (int)data->timeout_other; 449 lock_rw_unlock(&e->lock); 450 return ttl; 451 } 452 453 int 454 infra_edns_update(struct infra_cache* infra, struct sockaddr_storage* addr, 455 socklen_t addrlen, uint8_t* nm, size_t nmlen, int edns_version, 456 uint32_t timenow) 457 { 458 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, 459 nm, nmlen, 1); 460 struct infra_data* data; 461 int needtoinsert = 0; 462 if(!e) { 463 if(!(e = new_entry(infra, addr, addrlen, nm, nmlen, timenow))) 464 return 0; 465 needtoinsert = 1; 466 } else if(((struct infra_data*)e->data)->ttl < timenow) { 467 data_entry_init(infra, e, timenow); 468 } 469 /* have an entry, update the rtt, and the ttl */ 470 data = (struct infra_data*)e->data; 471 /* do not update if noEDNS and stored is yesEDNS */ 472 if(!(edns_version == -1 && (data->edns_version != -1 && 473 data->edns_lame_known))) { 474 data->edns_version = edns_version; 475 data->edns_lame_known = 1; 476 } 477 478 if(needtoinsert) 479 slabhash_insert(infra->hosts, e->hash, e, e->data, NULL); 480 else { lock_rw_unlock(&e->lock); } 481 return 1; 482 } 483 484 int 485 infra_get_lame_rtt(struct infra_cache* infra, 486 struct sockaddr_storage* addr, socklen_t addrlen, 487 uint8_t* name, size_t namelen, uint16_t qtype, 488 int* lame, int* dnsseclame, int* reclame, int* rtt, uint32_t timenow) 489 { 490 struct infra_data* host; 491 struct lruhash_entry* e = infra_lookup_nottl(infra, addr, addrlen, 492 name, namelen, 0); 493 if(!e) 494 return 0; 495 host = (struct infra_data*)e->data; 496 *rtt = rtt_unclamped(&host->rtt); 497 if(host->rtt.rto >= PROBE_MAXRTO && timenow < host->probedelay 498 && rtt_notimeout(&host->rtt)*4 <= host->rtt.rto) { 499 /* single probe for this domain, and we are not probing */ 500 /* unless the query type allows a probe to happen */ 501 if(qtype == LDNS_RR_TYPE_A) { 502 if(host->timeout_A >= TIMEOUT_COUNT_MAX) 503 *rtt = USEFUL_SERVER_TOP_TIMEOUT; 504 else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; 505 } else if(qtype == LDNS_RR_TYPE_AAAA) { 506 if(host->timeout_AAAA >= TIMEOUT_COUNT_MAX) 507 *rtt = USEFUL_SERVER_TOP_TIMEOUT; 508 else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; 509 } else { 510 if(host->timeout_other >= TIMEOUT_COUNT_MAX) 511 *rtt = USEFUL_SERVER_TOP_TIMEOUT; 512 else *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; 513 } 514 } 515 if(timenow > host->ttl) { 516 /* expired entry */ 517 /* see if this can be a re-probe of an unresponsive server */ 518 /* minus 1000 because that is outside of the RTTBAND, so 519 * blacklisted servers stay blacklisted if this is chosen */ 520 if(host->rtt.rto >= USEFUL_SERVER_TOP_TIMEOUT) { 521 lock_rw_unlock(&e->lock); 522 *rtt = USEFUL_SERVER_TOP_TIMEOUT-1000; 523 *lame = 0; 524 *dnsseclame = 0; 525 *reclame = 0; 526 return 1; 527 } 528 lock_rw_unlock(&e->lock); 529 return 0; 530 } 531 /* check lameness first */ 532 if(host->lame_type_A && qtype == LDNS_RR_TYPE_A) { 533 lock_rw_unlock(&e->lock); 534 *lame = 1; 535 *dnsseclame = 0; 536 *reclame = 0; 537 return 1; 538 } else if(host->lame_other && qtype != LDNS_RR_TYPE_A) { 539 lock_rw_unlock(&e->lock); 540 *lame = 1; 541 *dnsseclame = 0; 542 *reclame = 0; 543 return 1; 544 } else if(host->isdnsseclame) { 545 lock_rw_unlock(&e->lock); 546 *lame = 0; 547 *dnsseclame = 1; 548 *reclame = 0; 549 return 1; 550 } else if(host->rec_lame) { 551 lock_rw_unlock(&e->lock); 552 *lame = 0; 553 *dnsseclame = 0; 554 *reclame = 1; 555 return 1; 556 } 557 /* no lameness for this type of query */ 558 lock_rw_unlock(&e->lock); 559 *lame = 0; 560 *dnsseclame = 0; 561 *reclame = 0; 562 return 1; 563 } 564 565 size_t 566 infra_get_mem(struct infra_cache* infra) 567 { 568 return sizeof(*infra) + slabhash_get_mem(infra->hosts); 569 } 570