1 /* 2 * services/cache/rrset.c - Resource record set cache. 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file contains the rrset cache. 40 */ 41 #include "config.h" 42 #include "services/cache/rrset.h" 43 #include "sldns/rrdef.h" 44 #include "util/storage/slabhash.h" 45 #include "util/config_file.h" 46 #include "util/data/packed_rrset.h" 47 #include "util/data/msgreply.h" 48 #include "util/data/msgparse.h" 49 #include "util/data/dname.h" 50 #include "util/regional.h" 51 #include "util/alloc.h" 52 #include "util/net_help.h" 53 54 void 55 rrset_markdel(void* key) 56 { 57 struct ub_packed_rrset_key* r = (struct ub_packed_rrset_key*)key; 58 r->id = 0; 59 } 60 61 struct rrset_cache* rrset_cache_create(struct config_file* cfg, 62 struct alloc_cache* alloc) 63 { 64 size_t slabs = (cfg?cfg->rrset_cache_slabs:HASH_DEFAULT_SLABS); 65 size_t startarray = HASH_DEFAULT_STARTARRAY; 66 size_t maxmem = (cfg?cfg->rrset_cache_size:HASH_DEFAULT_MAXMEM); 67 68 struct rrset_cache *r = (struct rrset_cache*)slabhash_create(slabs, 69 startarray, maxmem, ub_rrset_sizefunc, ub_rrset_compare, 70 ub_rrset_key_delete, rrset_data_delete, alloc); 71 slabhash_setmarkdel(&r->table, &rrset_markdel); 72 return r; 73 } 74 75 void rrset_cache_delete(struct rrset_cache* r) 76 { 77 if(!r) 78 return; 79 slabhash_delete(&r->table); 80 /* slabhash delete also does free(r), since table is first in struct*/ 81 } 82 83 struct rrset_cache* rrset_cache_adjust(struct rrset_cache *r, 84 struct config_file* cfg, struct alloc_cache* alloc) 85 { 86 if(!r || !cfg || !slabhash_is_size(&r->table, cfg->rrset_cache_size, 87 cfg->rrset_cache_slabs)) 88 { 89 rrset_cache_delete(r); 90 r = rrset_cache_create(cfg, alloc); 91 } 92 return r; 93 } 94 95 void 96 rrset_cache_touch(struct rrset_cache* r, struct ub_packed_rrset_key* key, 97 hashvalue_type hash, rrset_id_type id) 98 { 99 struct lruhash* table = slabhash_gettable(&r->table, hash); 100 /* 101 * This leads to locking problems, deadlocks, if the caller is 102 * holding any other rrset lock. 103 * Because a lookup through the hashtable does: 104 * tablelock -> entrylock (for that entry caller holds) 105 * And this would do 106 * entrylock(already held) -> tablelock 107 * And if two threads do this, it results in deadlock. 108 * So, the caller must not hold entrylock. 109 */ 110 lock_quick_lock(&table->lock); 111 /* we have locked the hash table, the item can still be deleted. 112 * because it could already have been reclaimed, but not yet set id=0. 113 * This is because some lruhash routines have lazy deletion. 114 * so, we must acquire a lock on the item to verify the id != 0. 115 * also, with hash not changed, we are using the right slab. 116 */ 117 lock_rw_rdlock(&key->entry.lock); 118 if(key->id == id && key->entry.hash == hash) { 119 lru_touch(table, &key->entry); 120 } 121 lock_rw_unlock(&key->entry.lock); 122 lock_quick_unlock(&table->lock); 123 } 124 125 /** see if rrset needs to be updated in the cache */ 126 static int 127 need_to_update_rrset(void* nd, void* cd, time_t timenow, int equal, int ns) 128 { 129 struct packed_rrset_data* newd = (struct packed_rrset_data*)nd; 130 struct packed_rrset_data* cached = (struct packed_rrset_data*)cd; 131 /* o if new data is expired, current data is better */ 132 if( newd->ttl < timenow && cached->ttl >= timenow) 133 return 0; 134 /* o store if rrset has been validated 135 * everything better than bogus data 136 * secure is preferred */ 137 if( newd->security == sec_status_secure && 138 cached->security != sec_status_secure) 139 return 1; 140 if( cached->security == sec_status_bogus && 141 newd->security != sec_status_bogus && !equal) 142 return 1; 143 /* o if current RRset is more trustworthy - insert it */ 144 if( newd->trust > cached->trust ) { 145 /* if the cached rrset is bogus, and this one equal, 146 * do not update the TTL - let it expire. */ 147 if(equal && cached->ttl >= timenow && 148 cached->security == sec_status_bogus) 149 return 0; 150 return 1; 151 } 152 /* o item in cache has expired */ 153 if( cached->ttl < timenow ) 154 return 1; 155 /* o same trust, but different in data - insert it */ 156 if( newd->trust == cached->trust && !equal ) { 157 /* if this is type NS, do not 'stick' to owner that changes 158 * the NS RRset, but use the old TTL for the new data, and 159 * update to fetch the latest data. ttl is not expired, because 160 * that check was before this one. */ 161 if(ns) { 162 size_t i; 163 newd->ttl = cached->ttl; 164 for(i=0; i<(newd->count+newd->rrsig_count); i++) 165 if(newd->rr_ttl[i] > newd->ttl) 166 newd->rr_ttl[i] = newd->ttl; 167 } 168 return 1; 169 } 170 return 0; 171 } 172 173 /** Update RRSet special key ID */ 174 static void 175 rrset_update_id(struct rrset_ref* ref, struct alloc_cache* alloc) 176 { 177 /* this may clear the cache and invalidate lock below */ 178 uint64_t newid = alloc_get_id(alloc); 179 /* obtain writelock */ 180 lock_rw_wrlock(&ref->key->entry.lock); 181 /* check if it was deleted in the meantime, if so, skip update */ 182 if(ref->key->id == ref->id) { 183 ref->key->id = newid; 184 ref->id = newid; 185 } 186 lock_rw_unlock(&ref->key->entry.lock); 187 } 188 189 int 190 rrset_cache_update(struct rrset_cache* r, struct rrset_ref* ref, 191 struct alloc_cache* alloc, time_t timenow) 192 { 193 struct lruhash_entry* e; 194 struct ub_packed_rrset_key* k = ref->key; 195 hashvalue_type h = k->entry.hash; 196 uint16_t rrset_type = ntohs(k->rk.type); 197 int equal = 0; 198 log_assert(ref->id != 0 && k->id != 0); 199 log_assert(k->rk.dname != NULL); 200 /* looks up item with a readlock - no editing! */ 201 if((e=slabhash_lookup(&r->table, h, k, 0)) != 0) { 202 /* return id and key as they will be used in the cache 203 * since the lruhash_insert, if item already exists, deallocs 204 * the passed key in favor of the already stored key. 205 * because of the small gap (see below) this key ptr and id 206 * may prove later to be already deleted, which is no problem 207 * as it only makes a cache miss. 208 */ 209 ref->key = (struct ub_packed_rrset_key*)e->key; 210 ref->id = ref->key->id; 211 equal = rrsetdata_equal((struct packed_rrset_data*)k->entry. 212 data, (struct packed_rrset_data*)e->data); 213 if(!need_to_update_rrset(k->entry.data, e->data, timenow, 214 equal, (rrset_type==LDNS_RR_TYPE_NS))) { 215 /* cache is superior, return that value */ 216 lock_rw_unlock(&e->lock); 217 ub_packed_rrset_parsedelete(k, alloc); 218 if(equal) return 2; 219 return 1; 220 } 221 lock_rw_unlock(&e->lock); 222 /* Go on and insert the passed item. 223 * small gap here, where entry is not locked. 224 * possibly entry is updated with something else. 225 * we then overwrite that with our data. 226 * this is just too bad, its cache anyway. */ 227 /* use insert to update entry to manage lruhash 228 * cache size values nicely. */ 229 } 230 log_assert(ref->key->id != 0); 231 slabhash_insert(&r->table, h, &k->entry, k->entry.data, alloc); 232 if(e) { 233 /* For NSEC, NSEC3, DNAME, when rdata is updated, update 234 * the ID number so that proofs in message cache are 235 * invalidated */ 236 if((rrset_type == LDNS_RR_TYPE_NSEC 237 || rrset_type == LDNS_RR_TYPE_NSEC3 238 || rrset_type == LDNS_RR_TYPE_DNAME) && !equal) { 239 rrset_update_id(ref, alloc); 240 } 241 return 1; 242 } 243 return 0; 244 } 245 246 void rrset_cache_update_wildcard(struct rrset_cache* rrset_cache, 247 struct ub_packed_rrset_key* rrset, uint8_t* ce, size_t ce_len, 248 struct alloc_cache* alloc, time_t timenow) 249 { 250 struct rrset_ref ref; 251 uint8_t wc_dname[LDNS_MAX_DOMAINLEN+3]; 252 rrset = packed_rrset_copy_alloc(rrset, alloc, timenow); 253 if(!rrset) { 254 log_err("malloc failure in rrset_cache_update_wildcard"); 255 return; 256 } 257 /* ce has at least one label less then qname, we can therefore safely 258 * add the wildcard label. */ 259 wc_dname[0] = 1; 260 wc_dname[1] = (uint8_t)'*'; 261 memmove(wc_dname+2, ce, ce_len); 262 263 free(rrset->rk.dname); 264 rrset->rk.dname_len = ce_len + 2; 265 rrset->rk.dname = (uint8_t*)memdup(wc_dname, rrset->rk.dname_len); 266 if(!rrset->rk.dname) { 267 alloc_special_release(alloc, rrset); 268 log_err("memdup failure in rrset_cache_update_wildcard"); 269 return; 270 } 271 272 rrset->entry.hash = rrset_key_hash(&rrset->rk); 273 ref.key = rrset; 274 ref.id = rrset->id; 275 /* ignore ret: if it was in the cache, ref updated */ 276 (void)rrset_cache_update(rrset_cache, &ref, alloc, timenow); 277 } 278 279 struct ub_packed_rrset_key* 280 rrset_cache_lookup(struct rrset_cache* r, uint8_t* qname, size_t qnamelen, 281 uint16_t qtype, uint16_t qclass, uint32_t flags, time_t timenow, 282 int wr) 283 { 284 struct lruhash_entry* e; 285 struct ub_packed_rrset_key key; 286 287 key.entry.key = &key; 288 key.entry.data = NULL; 289 key.rk.dname = qname; 290 key.rk.dname_len = qnamelen; 291 key.rk.type = htons(qtype); 292 key.rk.rrset_class = htons(qclass); 293 key.rk.flags = flags; 294 295 key.entry.hash = rrset_key_hash(&key.rk); 296 297 if((e = slabhash_lookup(&r->table, key.entry.hash, &key, wr))) { 298 /* check TTL */ 299 struct packed_rrset_data* data = 300 (struct packed_rrset_data*)e->data; 301 if(timenow > data->ttl) { 302 lock_rw_unlock(&e->lock); 303 return NULL; 304 } 305 /* we're done */ 306 return (struct ub_packed_rrset_key*)e->key; 307 } 308 return NULL; 309 } 310 311 int 312 rrset_array_lock(struct rrset_ref* ref, size_t count, time_t timenow) 313 { 314 size_t i; 315 for(i=0; i<count; i++) { 316 if(i>0 && ref[i].key == ref[i-1].key) 317 continue; /* only lock items once */ 318 lock_rw_rdlock(&ref[i].key->entry.lock); 319 if(ref[i].id != ref[i].key->id || timenow > 320 ((struct packed_rrset_data*)(ref[i].key->entry.data)) 321 ->ttl) { 322 /* failure! rollback our readlocks */ 323 rrset_array_unlock(ref, i+1); 324 return 0; 325 } 326 } 327 return 1; 328 } 329 330 void 331 rrset_array_unlock(struct rrset_ref* ref, size_t count) 332 { 333 size_t i; 334 for(i=0; i<count; i++) { 335 if(i>0 && ref[i].key == ref[i-1].key) 336 continue; /* only unlock items once */ 337 lock_rw_unlock(&ref[i].key->entry.lock); 338 } 339 } 340 341 void 342 rrset_array_unlock_touch(struct rrset_cache* r, struct regional* scratch, 343 struct rrset_ref* ref, size_t count) 344 { 345 hashvalue_type* h; 346 size_t i; 347 if(count > RR_COUNT_MAX || !(h = (hashvalue_type*)regional_alloc( 348 scratch, sizeof(hashvalue_type)*count))) { 349 log_warn("rrset LRU: memory allocation failed"); 350 h = NULL; 351 } else /* store hash values */ 352 for(i=0; i<count; i++) 353 h[i] = ref[i].key->entry.hash; 354 /* unlock */ 355 for(i=0; i<count; i++) { 356 if(i>0 && ref[i].key == ref[i-1].key) 357 continue; /* only unlock items once */ 358 lock_rw_unlock(&ref[i].key->entry.lock); 359 } 360 if(h) { 361 /* LRU touch, with no rrset locks held */ 362 for(i=0; i<count; i++) { 363 if(i>0 && ref[i].key == ref[i-1].key) 364 continue; /* only touch items once */ 365 rrset_cache_touch(r, ref[i].key, h[i], ref[i].id); 366 } 367 } 368 } 369 370 void 371 rrset_update_sec_status(struct rrset_cache* r, 372 struct ub_packed_rrset_key* rrset, time_t now) 373 { 374 struct packed_rrset_data* updata = 375 (struct packed_rrset_data*)rrset->entry.data; 376 struct lruhash_entry* e; 377 struct packed_rrset_data* cachedata; 378 379 /* hash it again to make sure it has a hash */ 380 rrset->entry.hash = rrset_key_hash(&rrset->rk); 381 382 e = slabhash_lookup(&r->table, rrset->entry.hash, rrset, 1); 383 if(!e) 384 return; /* not in the cache anymore */ 385 cachedata = (struct packed_rrset_data*)e->data; 386 if(!rrsetdata_equal(updata, cachedata)) { 387 lock_rw_unlock(&e->lock); 388 return; /* rrset has changed in the meantime */ 389 } 390 /* update the cached rrset */ 391 if(updata->security > cachedata->security) { 392 size_t i; 393 if(updata->trust > cachedata->trust) 394 cachedata->trust = updata->trust; 395 cachedata->security = updata->security; 396 /* for NS records only shorter TTLs, other types: update it */ 397 if(ntohs(rrset->rk.type) != LDNS_RR_TYPE_NS || 398 updata->ttl+now < cachedata->ttl || 399 cachedata->ttl < now || 400 updata->security == sec_status_bogus) { 401 cachedata->ttl = updata->ttl + now; 402 for(i=0; i<cachedata->count+cachedata->rrsig_count; i++) 403 cachedata->rr_ttl[i] = updata->rr_ttl[i]+now; 404 cachedata->ttl_add = now; 405 } 406 } 407 lock_rw_unlock(&e->lock); 408 } 409 410 void 411 rrset_check_sec_status(struct rrset_cache* r, 412 struct ub_packed_rrset_key* rrset, time_t now) 413 { 414 struct packed_rrset_data* updata = 415 (struct packed_rrset_data*)rrset->entry.data; 416 struct lruhash_entry* e; 417 struct packed_rrset_data* cachedata; 418 419 /* hash it again to make sure it has a hash */ 420 rrset->entry.hash = rrset_key_hash(&rrset->rk); 421 422 e = slabhash_lookup(&r->table, rrset->entry.hash, rrset, 0); 423 if(!e) 424 return; /* not in the cache anymore */ 425 cachedata = (struct packed_rrset_data*)e->data; 426 if(now > cachedata->ttl || !rrsetdata_equal(updata, cachedata)) { 427 lock_rw_unlock(&e->lock); 428 return; /* expired, or rrset has changed in the meantime */ 429 } 430 if(cachedata->security > updata->security) { 431 updata->security = cachedata->security; 432 if(cachedata->security == sec_status_bogus) { 433 size_t i; 434 updata->ttl = cachedata->ttl - now; 435 for(i=0; i<cachedata->count+cachedata->rrsig_count; i++) 436 if(cachedata->rr_ttl[i] < now) 437 updata->rr_ttl[i] = 0; 438 else updata->rr_ttl[i] = 439 cachedata->rr_ttl[i]-now; 440 } 441 if(cachedata->trust > updata->trust) 442 updata->trust = cachedata->trust; 443 } 444 lock_rw_unlock(&e->lock); 445 } 446 447 void 448 rrset_cache_remove_above(struct rrset_cache* r, uint8_t** qname, size_t* 449 qnamelen, uint16_t searchtype, uint16_t qclass, time_t now, uint8_t* 450 qnametop, size_t qnametoplen) 451 { 452 struct ub_packed_rrset_key *rrset; 453 uint8_t lablen; 454 455 while(*qnamelen > 0) { 456 /* look one label higher */ 457 lablen = **qname; 458 *qname += lablen + 1; 459 *qnamelen -= lablen + 1; 460 if(*qnamelen <= 0) 461 return; 462 463 /* stop at qnametop */ 464 if(qnametop && *qnamelen == qnametoplen && 465 query_dname_compare(*qname, qnametop)==0) 466 return; 467 468 if(verbosity >= VERB_ALGO) { 469 /* looks up with a time of 0, to see expired entries */ 470 if((rrset = rrset_cache_lookup(r, *qname, 471 *qnamelen, searchtype, qclass, 0, 0, 0))) { 472 struct packed_rrset_data* data = 473 (struct packed_rrset_data*)rrset->entry.data; 474 int expired = (now > data->ttl); 475 lock_rw_unlock(&rrset->entry.lock); 476 if(expired) 477 log_nametypeclass(verbosity, "this " 478 "(grand)parent rrset will be " 479 "removed (expired)", 480 *qname, searchtype, qclass); 481 else log_nametypeclass(verbosity, "this " 482 "(grand)parent rrset will be " 483 "removed", 484 *qname, searchtype, qclass); 485 } 486 } 487 rrset_cache_remove(r, *qname, *qnamelen, searchtype, qclass, 0); 488 } 489 } 490 491 int 492 rrset_cache_expired_above(struct rrset_cache* r, uint8_t** qname, size_t* 493 qnamelen, uint16_t searchtype, uint16_t qclass, time_t now, uint8_t* 494 qnametop, size_t qnametoplen) 495 { 496 struct ub_packed_rrset_key *rrset; 497 uint8_t lablen; 498 499 while(*qnamelen > 0) { 500 /* look one label higher */ 501 lablen = **qname; 502 *qname += lablen + 1; 503 *qnamelen -= lablen + 1; 504 if(*qnamelen <= 0) 505 break; 506 507 /* looks up with a time of 0, to see expired entries */ 508 if((rrset = rrset_cache_lookup(r, *qname, 509 *qnamelen, searchtype, qclass, 0, 0, 0))) { 510 struct packed_rrset_data* data = 511 (struct packed_rrset_data*)rrset->entry.data; 512 if(now > data->ttl) { 513 /* it is expired, this is not wanted */ 514 lock_rw_unlock(&rrset->entry.lock); 515 log_nametypeclass(VERB_ALGO, "this rrset is expired", *qname, searchtype, qclass); 516 return 1; 517 } 518 /* it is not expired, continue looking */ 519 lock_rw_unlock(&rrset->entry.lock); 520 } 521 522 /* do not look above the qnametop. */ 523 if(qnametop && *qnamelen == qnametoplen && 524 query_dname_compare(*qname, qnametop)==0) 525 break; 526 } 527 return 0; 528 } 529 530 void rrset_cache_remove(struct rrset_cache* r, uint8_t* nm, size_t nmlen, 531 uint16_t type, uint16_t dclass, uint32_t flags) 532 { 533 struct ub_packed_rrset_key key; 534 key.entry.key = &key; 535 key.rk.dname = nm; 536 key.rk.dname_len = nmlen; 537 key.rk.rrset_class = htons(dclass); 538 key.rk.type = htons(type); 539 key.rk.flags = flags; 540 key.entry.hash = rrset_key_hash(&key.rk); 541 slabhash_remove(&r->table, key.entry.hash, &key); 542 } 543