1 /* 2 * services/cache/rrset.c - Resource record set cache. 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file contains the rrset cache. 40 */ 41 #include "config.h" 42 #include "services/cache/rrset.h" 43 #include "sldns/rrdef.h" 44 #include "util/storage/slabhash.h" 45 #include "util/config_file.h" 46 #include "util/data/packed_rrset.h" 47 #include "util/data/msgreply.h" 48 #include "util/data/msgparse.h" 49 #include "util/data/dname.h" 50 #include "util/regional.h" 51 #include "util/alloc.h" 52 #include "util/net_help.h" 53 54 void 55 rrset_markdel(void* key) 56 { 57 struct ub_packed_rrset_key* r = (struct ub_packed_rrset_key*)key; 58 r->id = 0; 59 } 60 61 struct rrset_cache* rrset_cache_create(struct config_file* cfg, 62 struct alloc_cache* alloc) 63 { 64 size_t slabs = (cfg?cfg->rrset_cache_slabs:HASH_DEFAULT_SLABS); 65 size_t startarray = HASH_DEFAULT_STARTARRAY; 66 size_t maxmem = (cfg?cfg->rrset_cache_size:HASH_DEFAULT_MAXMEM); 67 68 struct rrset_cache *r = (struct rrset_cache*)slabhash_create(slabs, 69 startarray, maxmem, ub_rrset_sizefunc, ub_rrset_compare, 70 ub_rrset_key_delete, rrset_data_delete, alloc); 71 if(!r) 72 return NULL; 73 slabhash_setmarkdel(&r->table, &rrset_markdel); 74 return r; 75 } 76 77 void rrset_cache_delete(struct rrset_cache* r) 78 { 79 if(!r) 80 return; 81 slabhash_delete(&r->table); 82 /* slabhash delete also does free(r), since table is first in struct*/ 83 } 84 85 struct rrset_cache* rrset_cache_adjust(struct rrset_cache *r, 86 struct config_file* cfg, struct alloc_cache* alloc) 87 { 88 if(!r || !cfg || !slabhash_is_size(&r->table, cfg->rrset_cache_size, 89 cfg->rrset_cache_slabs)) 90 { 91 rrset_cache_delete(r); 92 r = rrset_cache_create(cfg, alloc); 93 } 94 return r; 95 } 96 97 void 98 rrset_cache_touch(struct rrset_cache* r, struct ub_packed_rrset_key* key, 99 hashvalue_type hash, rrset_id_type id) 100 { 101 struct lruhash* table = slabhash_gettable(&r->table, hash); 102 /* 103 * This leads to locking problems, deadlocks, if the caller is 104 * holding any other rrset lock. 105 * Because a lookup through the hashtable does: 106 * tablelock -> entrylock (for that entry caller holds) 107 * And this would do 108 * entrylock(already held) -> tablelock 109 * And if two threads do this, it results in deadlock. 110 * So, the caller must not hold entrylock. 111 */ 112 lock_quick_lock(&table->lock); 113 /* we have locked the hash table, the item can still be deleted. 114 * because it could already have been reclaimed, but not yet set id=0. 115 * This is because some lruhash routines have lazy deletion. 116 * so, we must acquire a lock on the item to verify the id != 0. 117 * also, with hash not changed, we are using the right slab. 118 */ 119 lock_rw_rdlock(&key->entry.lock); 120 if(key->id == id && key->entry.hash == hash) { 121 lru_touch(table, &key->entry); 122 } 123 lock_rw_unlock(&key->entry.lock); 124 lock_quick_unlock(&table->lock); 125 } 126 127 /** see if rrset needs to be updated in the cache */ 128 static int 129 need_to_update_rrset(void* nd, void* cd, time_t timenow, int equal, int ns) 130 { 131 struct packed_rrset_data* newd = (struct packed_rrset_data*)nd; 132 struct packed_rrset_data* cached = (struct packed_rrset_data*)cd; 133 /* o if new data is expired, cached data is better */ 134 if( newd->ttl < timenow && timenow <= cached->ttl) 135 return 0; 136 /* o store if rrset has been validated 137 * everything better than bogus data 138 * secure is preferred */ 139 if( newd->security == sec_status_secure && 140 cached->security != sec_status_secure) 141 return 1; 142 if( cached->security == sec_status_bogus && 143 newd->security != sec_status_bogus && !equal) 144 return 1; 145 /* o if new RRset is more trustworthy - insert it */ 146 if( newd->trust > cached->trust ) { 147 /* if the cached rrset is bogus, and new is equal, 148 * do not update the TTL - let it expire. */ 149 if(equal && cached->ttl >= timenow && 150 cached->security == sec_status_bogus) 151 return 0; 152 return 1; 153 } 154 /* o item in cache has expired */ 155 if( cached->ttl < timenow ) 156 return 1; 157 /* o same trust, but different in data - insert it */ 158 if( newd->trust == cached->trust && !equal ) { 159 /* if this is type NS, do not 'stick' to owner that changes 160 * the NS RRset, but use the cached TTL for the new data, and 161 * update to fetch the latest data. ttl is not expired, because 162 * that check was before this one. */ 163 if(ns) { 164 size_t i; 165 newd->ttl = cached->ttl; 166 for(i=0; i<(newd->count+newd->rrsig_count); i++) 167 if(newd->rr_ttl[i] > newd->ttl) 168 newd->rr_ttl[i] = newd->ttl; 169 } 170 return 1; 171 } 172 return 0; 173 } 174 175 /** Update RRSet special key ID */ 176 static void 177 rrset_update_id(struct rrset_ref* ref, struct alloc_cache* alloc) 178 { 179 /* this may clear the cache and invalidate lock below */ 180 uint64_t newid = alloc_get_id(alloc); 181 /* obtain writelock */ 182 lock_rw_wrlock(&ref->key->entry.lock); 183 /* check if it was deleted in the meantime, if so, skip update */ 184 if(ref->key->id == ref->id) { 185 ref->key->id = newid; 186 ref->id = newid; 187 } 188 lock_rw_unlock(&ref->key->entry.lock); 189 } 190 191 int 192 rrset_cache_update(struct rrset_cache* r, struct rrset_ref* ref, 193 struct alloc_cache* alloc, time_t timenow) 194 { 195 struct lruhash_entry* e; 196 struct ub_packed_rrset_key* k = ref->key; 197 hashvalue_type h = k->entry.hash; 198 uint16_t rrset_type = ntohs(k->rk.type); 199 int equal = 0; 200 log_assert(ref->id != 0 && k->id != 0); 201 log_assert(k->rk.dname != NULL); 202 /* looks up item with a readlock - no editing! */ 203 if((e=slabhash_lookup(&r->table, h, k, 0)) != 0) { 204 /* return id and key as they will be used in the cache 205 * since the lruhash_insert, if item already exists, deallocs 206 * the passed key in favor of the already stored key. 207 * because of the small gap (see below) this key ptr and id 208 * may prove later to be already deleted, which is no problem 209 * as it only makes a cache miss. 210 */ 211 ref->key = (struct ub_packed_rrset_key*)e->key; 212 ref->id = ref->key->id; 213 equal = rrsetdata_equal((struct packed_rrset_data*)k->entry. 214 data, (struct packed_rrset_data*)e->data); 215 if(!need_to_update_rrset(k->entry.data, e->data, timenow, 216 equal, (rrset_type==LDNS_RR_TYPE_NS))) { 217 /* cache is superior, return that value */ 218 lock_rw_unlock(&e->lock); 219 ub_packed_rrset_parsedelete(k, alloc); 220 if(equal) return 2; 221 return 1; 222 } 223 lock_rw_unlock(&e->lock); 224 /* Go on and insert the passed item. 225 * small gap here, where entry is not locked. 226 * possibly entry is updated with something else. 227 * we then overwrite that with our data. 228 * this is just too bad, its cache anyway. */ 229 /* use insert to update entry to manage lruhash 230 * cache size values nicely. */ 231 } 232 log_assert(ref->key->id != 0); 233 slabhash_insert(&r->table, h, &k->entry, k->entry.data, alloc); 234 if(e) { 235 /* For NSEC, NSEC3, DNAME, when rdata is updated, update 236 * the ID number so that proofs in message cache are 237 * invalidated */ 238 if((rrset_type == LDNS_RR_TYPE_NSEC 239 || rrset_type == LDNS_RR_TYPE_NSEC3 240 || rrset_type == LDNS_RR_TYPE_DNAME) && !equal) { 241 rrset_update_id(ref, alloc); 242 } 243 return 1; 244 } 245 return 0; 246 } 247 248 void rrset_cache_update_wildcard(struct rrset_cache* rrset_cache, 249 struct ub_packed_rrset_key* rrset, uint8_t* ce, size_t ce_len, 250 struct alloc_cache* alloc, time_t timenow) 251 { 252 struct rrset_ref ref; 253 uint8_t wc_dname[LDNS_MAX_DOMAINLEN+3]; 254 rrset = packed_rrset_copy_alloc(rrset, alloc, timenow); 255 if(!rrset) { 256 log_err("malloc failure in rrset_cache_update_wildcard"); 257 return; 258 } 259 /* ce has at least one label less then qname, we can therefore safely 260 * add the wildcard label. */ 261 wc_dname[0] = 1; 262 wc_dname[1] = (uint8_t)'*'; 263 memmove(wc_dname+2, ce, ce_len); 264 265 free(rrset->rk.dname); 266 rrset->rk.dname_len = ce_len + 2; 267 rrset->rk.dname = (uint8_t*)memdup(wc_dname, rrset->rk.dname_len); 268 if(!rrset->rk.dname) { 269 alloc_special_release(alloc, rrset); 270 log_err("memdup failure in rrset_cache_update_wildcard"); 271 return; 272 } 273 274 rrset->entry.hash = rrset_key_hash(&rrset->rk); 275 ref.key = rrset; 276 ref.id = rrset->id; 277 /* ignore ret: if it was in the cache, ref updated */ 278 (void)rrset_cache_update(rrset_cache, &ref, alloc, timenow); 279 } 280 281 struct ub_packed_rrset_key* 282 rrset_cache_lookup(struct rrset_cache* r, uint8_t* qname, size_t qnamelen, 283 uint16_t qtype, uint16_t qclass, uint32_t flags, time_t timenow, 284 int wr) 285 { 286 struct lruhash_entry* e; 287 struct ub_packed_rrset_key key; 288 289 key.entry.key = &key; 290 key.entry.data = NULL; 291 key.rk.dname = qname; 292 key.rk.dname_len = qnamelen; 293 key.rk.type = htons(qtype); 294 key.rk.rrset_class = htons(qclass); 295 key.rk.flags = flags; 296 297 key.entry.hash = rrset_key_hash(&key.rk); 298 299 if((e = slabhash_lookup(&r->table, key.entry.hash, &key, wr))) { 300 /* check TTL */ 301 struct packed_rrset_data* data = 302 (struct packed_rrset_data*)e->data; 303 if(timenow > data->ttl) { 304 lock_rw_unlock(&e->lock); 305 return NULL; 306 } 307 /* we're done */ 308 return (struct ub_packed_rrset_key*)e->key; 309 } 310 return NULL; 311 } 312 313 int 314 rrset_array_lock(struct rrset_ref* ref, size_t count, time_t timenow) 315 { 316 size_t i; 317 for(i=0; i<count; i++) { 318 if(i>0 && ref[i].key == ref[i-1].key) 319 continue; /* only lock items once */ 320 lock_rw_rdlock(&ref[i].key->entry.lock); 321 if(ref[i].id != ref[i].key->id || timenow > 322 ((struct packed_rrset_data*)(ref[i].key->entry.data)) 323 ->ttl) { 324 /* failure! rollback our readlocks */ 325 rrset_array_unlock(ref, i+1); 326 return 0; 327 } 328 } 329 return 1; 330 } 331 332 void 333 rrset_array_unlock(struct rrset_ref* ref, size_t count) 334 { 335 size_t i; 336 for(i=0; i<count; i++) { 337 if(i>0 && ref[i].key == ref[i-1].key) 338 continue; /* only unlock items once */ 339 lock_rw_unlock(&ref[i].key->entry.lock); 340 } 341 } 342 343 void 344 rrset_array_unlock_touch(struct rrset_cache* r, struct regional* scratch, 345 struct rrset_ref* ref, size_t count) 346 { 347 hashvalue_type* h; 348 size_t i; 349 if(count > RR_COUNT_MAX || !(h = (hashvalue_type*)regional_alloc( 350 scratch, sizeof(hashvalue_type)*count))) { 351 log_warn("rrset LRU: memory allocation failed"); 352 h = NULL; 353 } else /* store hash values */ 354 for(i=0; i<count; i++) 355 h[i] = ref[i].key->entry.hash; 356 /* unlock */ 357 for(i=0; i<count; i++) { 358 if(i>0 && ref[i].key == ref[i-1].key) 359 continue; /* only unlock items once */ 360 lock_rw_unlock(&ref[i].key->entry.lock); 361 } 362 if(h) { 363 /* LRU touch, with no rrset locks held */ 364 for(i=0; i<count; i++) { 365 if(i>0 && ref[i].key == ref[i-1].key) 366 continue; /* only touch items once */ 367 rrset_cache_touch(r, ref[i].key, h[i], ref[i].id); 368 } 369 } 370 } 371 372 void 373 rrset_update_sec_status(struct rrset_cache* r, 374 struct ub_packed_rrset_key* rrset, time_t now) 375 { 376 struct packed_rrset_data* updata = 377 (struct packed_rrset_data*)rrset->entry.data; 378 struct lruhash_entry* e; 379 struct packed_rrset_data* cachedata; 380 381 /* hash it again to make sure it has a hash */ 382 rrset->entry.hash = rrset_key_hash(&rrset->rk); 383 384 e = slabhash_lookup(&r->table, rrset->entry.hash, rrset, 1); 385 if(!e) 386 return; /* not in the cache anymore */ 387 cachedata = (struct packed_rrset_data*)e->data; 388 if(!rrsetdata_equal(updata, cachedata)) { 389 lock_rw_unlock(&e->lock); 390 return; /* rrset has changed in the meantime */ 391 } 392 /* update the cached rrset */ 393 if(updata->security > cachedata->security) { 394 size_t i; 395 if(updata->trust > cachedata->trust) 396 cachedata->trust = updata->trust; 397 cachedata->security = updata->security; 398 /* for NS records only shorter TTLs, other types: update it */ 399 if(ntohs(rrset->rk.type) != LDNS_RR_TYPE_NS || 400 updata->ttl+now < cachedata->ttl || 401 cachedata->ttl < now || 402 updata->security == sec_status_bogus) { 403 cachedata->ttl = updata->ttl + now; 404 for(i=0; i<cachedata->count+cachedata->rrsig_count; i++) 405 cachedata->rr_ttl[i] = updata->rr_ttl[i]+now; 406 cachedata->ttl_add = now; 407 } 408 } 409 lock_rw_unlock(&e->lock); 410 } 411 412 void 413 rrset_check_sec_status(struct rrset_cache* r, 414 struct ub_packed_rrset_key* rrset, time_t now) 415 { 416 struct packed_rrset_data* updata = 417 (struct packed_rrset_data*)rrset->entry.data; 418 struct lruhash_entry* e; 419 struct packed_rrset_data* cachedata; 420 421 /* hash it again to make sure it has a hash */ 422 rrset->entry.hash = rrset_key_hash(&rrset->rk); 423 424 e = slabhash_lookup(&r->table, rrset->entry.hash, rrset, 0); 425 if(!e) 426 return; /* not in the cache anymore */ 427 cachedata = (struct packed_rrset_data*)e->data; 428 if(now > cachedata->ttl || !rrsetdata_equal(updata, cachedata)) { 429 lock_rw_unlock(&e->lock); 430 return; /* expired, or rrset has changed in the meantime */ 431 } 432 if(cachedata->security > updata->security) { 433 updata->security = cachedata->security; 434 if(cachedata->security == sec_status_bogus) { 435 size_t i; 436 updata->ttl = cachedata->ttl - now; 437 for(i=0; i<cachedata->count+cachedata->rrsig_count; i++) 438 if(cachedata->rr_ttl[i] < now) 439 updata->rr_ttl[i] = 0; 440 else updata->rr_ttl[i] = 441 cachedata->rr_ttl[i]-now; 442 } 443 if(cachedata->trust > updata->trust) 444 updata->trust = cachedata->trust; 445 } 446 lock_rw_unlock(&e->lock); 447 } 448 449 void 450 rrset_cache_remove_above(struct rrset_cache* r, uint8_t** qname, size_t* 451 qnamelen, uint16_t searchtype, uint16_t qclass, time_t now, uint8_t* 452 qnametop, size_t qnametoplen) 453 { 454 struct ub_packed_rrset_key *rrset; 455 uint8_t lablen; 456 457 while(*qnamelen > 0) { 458 /* look one label higher */ 459 lablen = **qname; 460 *qname += lablen + 1; 461 *qnamelen -= lablen + 1; 462 if(*qnamelen <= 0) 463 return; 464 465 /* stop at qnametop */ 466 if(qnametop && *qnamelen == qnametoplen && 467 query_dname_compare(*qname, qnametop)==0) 468 return; 469 470 if(verbosity >= VERB_ALGO) { 471 /* looks up with a time of 0, to see expired entries */ 472 if((rrset = rrset_cache_lookup(r, *qname, 473 *qnamelen, searchtype, qclass, 0, 0, 0))) { 474 struct packed_rrset_data* data = 475 (struct packed_rrset_data*)rrset->entry.data; 476 int expired = (now > data->ttl); 477 lock_rw_unlock(&rrset->entry.lock); 478 if(expired) 479 log_nametypeclass(verbosity, "this " 480 "(grand)parent rrset will be " 481 "removed (expired)", 482 *qname, searchtype, qclass); 483 else log_nametypeclass(verbosity, "this " 484 "(grand)parent rrset will be " 485 "removed", 486 *qname, searchtype, qclass); 487 } 488 } 489 rrset_cache_remove(r, *qname, *qnamelen, searchtype, qclass, 0); 490 } 491 } 492 493 int 494 rrset_cache_expired_above(struct rrset_cache* r, uint8_t** qname, size_t* 495 qnamelen, uint16_t searchtype, uint16_t qclass, time_t now, uint8_t* 496 qnametop, size_t qnametoplen) 497 { 498 struct ub_packed_rrset_key *rrset; 499 uint8_t lablen; 500 501 while(*qnamelen > 0) { 502 /* look one label higher */ 503 lablen = **qname; 504 *qname += lablen + 1; 505 *qnamelen -= lablen + 1; 506 if(*qnamelen <= 0) 507 break; 508 509 /* looks up with a time of 0, to see expired entries */ 510 if((rrset = rrset_cache_lookup(r, *qname, 511 *qnamelen, searchtype, qclass, 0, 0, 0))) { 512 struct packed_rrset_data* data = 513 (struct packed_rrset_data*)rrset->entry.data; 514 if(now > data->ttl) { 515 /* it is expired, this is not wanted */ 516 lock_rw_unlock(&rrset->entry.lock); 517 log_nametypeclass(VERB_ALGO, "this rrset is expired", *qname, searchtype, qclass); 518 return 1; 519 } 520 /* it is not expired, continue looking */ 521 lock_rw_unlock(&rrset->entry.lock); 522 } 523 524 /* do not look above the qnametop. */ 525 if(qnametop && *qnamelen == qnametoplen && 526 query_dname_compare(*qname, qnametop)==0) 527 break; 528 } 529 return 0; 530 } 531 532 void rrset_cache_remove(struct rrset_cache* r, uint8_t* nm, size_t nmlen, 533 uint16_t type, uint16_t dclass, uint32_t flags) 534 { 535 struct ub_packed_rrset_key key; 536 key.entry.key = &key; 537 key.rk.dname = nm; 538 key.rk.dname_len = nmlen; 539 key.rk.rrset_class = htons(dclass); 540 key.rk.type = htons(type); 541 key.rk.flags = flags; 542 key.entry.hash = rrset_key_hash(&key.rk); 543 slabhash_remove(&r->table, key.entry.hash, &key); 544 } 545