1 /* 2 * services/cache/rrset.c - Resource record set cache. 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file contains the rrset cache. 40 */ 41 #include "config.h" 42 #include "services/cache/rrset.h" 43 #include "sldns/rrdef.h" 44 #include "util/storage/slabhash.h" 45 #include "util/config_file.h" 46 #include "util/data/packed_rrset.h" 47 #include "util/data/msgreply.h" 48 #include "util/data/msgparse.h" 49 #include "util/regional.h" 50 #include "util/alloc.h" 51 #include "util/net_help.h" 52 53 void 54 rrset_markdel(void* key) 55 { 56 struct ub_packed_rrset_key* r = (struct ub_packed_rrset_key*)key; 57 r->id = 0; 58 } 59 60 struct rrset_cache* rrset_cache_create(struct config_file* cfg, 61 struct alloc_cache* alloc) 62 { 63 size_t slabs = (cfg?cfg->rrset_cache_slabs:HASH_DEFAULT_SLABS); 64 size_t startarray = HASH_DEFAULT_STARTARRAY; 65 size_t maxmem = (cfg?cfg->rrset_cache_size:HASH_DEFAULT_MAXMEM); 66 67 struct rrset_cache *r = (struct rrset_cache*)slabhash_create(slabs, 68 startarray, maxmem, ub_rrset_sizefunc, ub_rrset_compare, 69 ub_rrset_key_delete, rrset_data_delete, alloc); 70 slabhash_setmarkdel(&r->table, &rrset_markdel); 71 return r; 72 } 73 74 void rrset_cache_delete(struct rrset_cache* r) 75 { 76 if(!r) 77 return; 78 slabhash_delete(&r->table); 79 /* slabhash delete also does free(r), since table is first in struct*/ 80 } 81 82 struct rrset_cache* rrset_cache_adjust(struct rrset_cache *r, 83 struct config_file* cfg, struct alloc_cache* alloc) 84 { 85 if(!r || !cfg || !slabhash_is_size(&r->table, cfg->rrset_cache_size, 86 cfg->rrset_cache_slabs)) 87 { 88 rrset_cache_delete(r); 89 r = rrset_cache_create(cfg, alloc); 90 } 91 return r; 92 } 93 94 void 95 rrset_cache_touch(struct rrset_cache* r, struct ub_packed_rrset_key* key, 96 hashvalue_type hash, rrset_id_type id) 97 { 98 struct lruhash* table = slabhash_gettable(&r->table, hash); 99 /* 100 * This leads to locking problems, deadlocks, if the caller is 101 * holding any other rrset lock. 102 * Because a lookup through the hashtable does: 103 * tablelock -> entrylock (for that entry caller holds) 104 * And this would do 105 * entrylock(already held) -> tablelock 106 * And if two threads do this, it results in deadlock. 107 * So, the caller must not hold entrylock. 108 */ 109 lock_quick_lock(&table->lock); 110 /* we have locked the hash table, the item can still be deleted. 111 * because it could already have been reclaimed, but not yet set id=0. 112 * This is because some lruhash routines have lazy deletion. 113 * so, we must acquire a lock on the item to verify the id != 0. 114 * also, with hash not changed, we are using the right slab. 115 */ 116 lock_rw_rdlock(&key->entry.lock); 117 if(key->id == id && key->entry.hash == hash) { 118 lru_touch(table, &key->entry); 119 } 120 lock_rw_unlock(&key->entry.lock); 121 lock_quick_unlock(&table->lock); 122 } 123 124 /** see if rrset needs to be updated in the cache */ 125 static int 126 need_to_update_rrset(void* nd, void* cd, time_t timenow, int equal, int ns) 127 { 128 struct packed_rrset_data* newd = (struct packed_rrset_data*)nd; 129 struct packed_rrset_data* cached = (struct packed_rrset_data*)cd; 130 /* o store if rrset has been validated 131 * everything better than bogus data 132 * secure is preferred */ 133 if( newd->security == sec_status_secure && 134 cached->security != sec_status_secure) 135 return 1; 136 if( cached->security == sec_status_bogus && 137 newd->security != sec_status_bogus && !equal) 138 return 1; 139 /* o if current RRset is more trustworthy - insert it */ 140 if( newd->trust > cached->trust ) { 141 /* if the cached rrset is bogus, and this one equal, 142 * do not update the TTL - let it expire. */ 143 if(equal && cached->ttl >= timenow && 144 cached->security == sec_status_bogus) 145 return 0; 146 return 1; 147 } 148 /* o item in cache has expired */ 149 if( cached->ttl < timenow ) 150 return 1; 151 /* o same trust, but different in data - insert it */ 152 if( newd->trust == cached->trust && !equal ) { 153 /* if this is type NS, do not 'stick' to owner that changes 154 * the NS RRset, but use the old TTL for the new data, and 155 * update to fetch the latest data. ttl is not expired, because 156 * that check was before this one. */ 157 if(ns) { 158 size_t i; 159 newd->ttl = cached->ttl; 160 for(i=0; i<(newd->count+newd->rrsig_count); i++) 161 if(newd->rr_ttl[i] > newd->ttl) 162 newd->rr_ttl[i] = newd->ttl; 163 } 164 return 1; 165 } 166 return 0; 167 } 168 169 /** Update RRSet special key ID */ 170 static void 171 rrset_update_id(struct rrset_ref* ref, struct alloc_cache* alloc) 172 { 173 /* this may clear the cache and invalidate lock below */ 174 uint64_t newid = alloc_get_id(alloc); 175 /* obtain writelock */ 176 lock_rw_wrlock(&ref->key->entry.lock); 177 /* check if it was deleted in the meantime, if so, skip update */ 178 if(ref->key->id == ref->id) { 179 ref->key->id = newid; 180 ref->id = newid; 181 } 182 lock_rw_unlock(&ref->key->entry.lock); 183 } 184 185 int 186 rrset_cache_update(struct rrset_cache* r, struct rrset_ref* ref, 187 struct alloc_cache* alloc, time_t timenow) 188 { 189 struct lruhash_entry* e; 190 struct ub_packed_rrset_key* k = ref->key; 191 hashvalue_type h = k->entry.hash; 192 uint16_t rrset_type = ntohs(k->rk.type); 193 int equal = 0; 194 log_assert(ref->id != 0 && k->id != 0); 195 log_assert(k->rk.dname != NULL); 196 /* looks up item with a readlock - no editing! */ 197 if((e=slabhash_lookup(&r->table, h, k, 0)) != 0) { 198 /* return id and key as they will be used in the cache 199 * since the lruhash_insert, if item already exists, deallocs 200 * the passed key in favor of the already stored key. 201 * because of the small gap (see below) this key ptr and id 202 * may prove later to be already deleted, which is no problem 203 * as it only makes a cache miss. 204 */ 205 ref->key = (struct ub_packed_rrset_key*)e->key; 206 ref->id = ref->key->id; 207 equal = rrsetdata_equal((struct packed_rrset_data*)k->entry. 208 data, (struct packed_rrset_data*)e->data); 209 if(!need_to_update_rrset(k->entry.data, e->data, timenow, 210 equal, (rrset_type==LDNS_RR_TYPE_NS))) { 211 /* cache is superior, return that value */ 212 lock_rw_unlock(&e->lock); 213 ub_packed_rrset_parsedelete(k, alloc); 214 if(equal) return 2; 215 return 1; 216 } 217 lock_rw_unlock(&e->lock); 218 /* Go on and insert the passed item. 219 * small gap here, where entry is not locked. 220 * possibly entry is updated with something else. 221 * we then overwrite that with our data. 222 * this is just too bad, its cache anyway. */ 223 /* use insert to update entry to manage lruhash 224 * cache size values nicely. */ 225 } 226 log_assert(ref->key->id != 0); 227 slabhash_insert(&r->table, h, &k->entry, k->entry.data, alloc); 228 if(e) { 229 /* For NSEC, NSEC3, DNAME, when rdata is updated, update 230 * the ID number so that proofs in message cache are 231 * invalidated */ 232 if((rrset_type == LDNS_RR_TYPE_NSEC 233 || rrset_type == LDNS_RR_TYPE_NSEC3 234 || rrset_type == LDNS_RR_TYPE_DNAME) && !equal) { 235 rrset_update_id(ref, alloc); 236 } 237 return 1; 238 } 239 return 0; 240 } 241 242 void rrset_cache_update_wildcard(struct rrset_cache* rrset_cache, 243 struct ub_packed_rrset_key* rrset, uint8_t* ce, size_t ce_len, 244 struct alloc_cache* alloc, time_t timenow) 245 { 246 struct rrset_ref ref; 247 uint8_t wc_dname[LDNS_MAX_DOMAINLEN+3]; 248 rrset = packed_rrset_copy_alloc(rrset, alloc, timenow); 249 if(!rrset) { 250 log_err("malloc failure in rrset_cache_update_wildcard"); 251 return; 252 } 253 /* ce has at least one label less then qname, we can therefore safely 254 * add the wildcard label. */ 255 wc_dname[0] = 1; 256 wc_dname[1] = (uint8_t)'*'; 257 memmove(wc_dname+2, ce, ce_len); 258 259 free(rrset->rk.dname); 260 rrset->rk.dname_len = ce_len + 2; 261 rrset->rk.dname = (uint8_t*)memdup(wc_dname, rrset->rk.dname_len); 262 if(!rrset->rk.dname) { 263 alloc_special_release(alloc, rrset); 264 log_err("memdup failure in rrset_cache_update_wildcard"); 265 return; 266 } 267 268 rrset->entry.hash = rrset_key_hash(&rrset->rk); 269 ref.key = rrset; 270 ref.id = rrset->id; 271 /* ignore ret: if it was in the cache, ref updated */ 272 (void)rrset_cache_update(rrset_cache, &ref, alloc, timenow); 273 } 274 275 struct ub_packed_rrset_key* 276 rrset_cache_lookup(struct rrset_cache* r, uint8_t* qname, size_t qnamelen, 277 uint16_t qtype, uint16_t qclass, uint32_t flags, time_t timenow, 278 int wr) 279 { 280 struct lruhash_entry* e; 281 struct ub_packed_rrset_key key; 282 283 key.entry.key = &key; 284 key.entry.data = NULL; 285 key.rk.dname = qname; 286 key.rk.dname_len = qnamelen; 287 key.rk.type = htons(qtype); 288 key.rk.rrset_class = htons(qclass); 289 key.rk.flags = flags; 290 291 key.entry.hash = rrset_key_hash(&key.rk); 292 293 if((e = slabhash_lookup(&r->table, key.entry.hash, &key, wr))) { 294 /* check TTL */ 295 struct packed_rrset_data* data = 296 (struct packed_rrset_data*)e->data; 297 if(timenow > data->ttl) { 298 lock_rw_unlock(&e->lock); 299 return NULL; 300 } 301 /* we're done */ 302 return (struct ub_packed_rrset_key*)e->key; 303 } 304 return NULL; 305 } 306 307 int 308 rrset_array_lock(struct rrset_ref* ref, size_t count, time_t timenow) 309 { 310 size_t i; 311 for(i=0; i<count; i++) { 312 if(i>0 && ref[i].key == ref[i-1].key) 313 continue; /* only lock items once */ 314 lock_rw_rdlock(&ref[i].key->entry.lock); 315 if(ref[i].id != ref[i].key->id || timenow > 316 ((struct packed_rrset_data*)(ref[i].key->entry.data)) 317 ->ttl) { 318 /* failure! rollback our readlocks */ 319 rrset_array_unlock(ref, i+1); 320 return 0; 321 } 322 } 323 return 1; 324 } 325 326 void 327 rrset_array_unlock(struct rrset_ref* ref, size_t count) 328 { 329 size_t i; 330 for(i=0; i<count; i++) { 331 if(i>0 && ref[i].key == ref[i-1].key) 332 continue; /* only unlock items once */ 333 lock_rw_unlock(&ref[i].key->entry.lock); 334 } 335 } 336 337 void 338 rrset_array_unlock_touch(struct rrset_cache* r, struct regional* scratch, 339 struct rrset_ref* ref, size_t count) 340 { 341 hashvalue_type* h; 342 size_t i; 343 if(count > RR_COUNT_MAX || !(h = (hashvalue_type*)regional_alloc( 344 scratch, sizeof(hashvalue_type)*count))) { 345 log_warn("rrset LRU: memory allocation failed"); 346 h = NULL; 347 } else /* store hash values */ 348 for(i=0; i<count; i++) 349 h[i] = ref[i].key->entry.hash; 350 /* unlock */ 351 for(i=0; i<count; i++) { 352 if(i>0 && ref[i].key == ref[i-1].key) 353 continue; /* only unlock items once */ 354 lock_rw_unlock(&ref[i].key->entry.lock); 355 } 356 if(h) { 357 /* LRU touch, with no rrset locks held */ 358 for(i=0; i<count; i++) { 359 if(i>0 && ref[i].key == ref[i-1].key) 360 continue; /* only touch items once */ 361 rrset_cache_touch(r, ref[i].key, h[i], ref[i].id); 362 } 363 } 364 } 365 366 void 367 rrset_update_sec_status(struct rrset_cache* r, 368 struct ub_packed_rrset_key* rrset, time_t now) 369 { 370 struct packed_rrset_data* updata = 371 (struct packed_rrset_data*)rrset->entry.data; 372 struct lruhash_entry* e; 373 struct packed_rrset_data* cachedata; 374 375 /* hash it again to make sure it has a hash */ 376 rrset->entry.hash = rrset_key_hash(&rrset->rk); 377 378 e = slabhash_lookup(&r->table, rrset->entry.hash, rrset, 1); 379 if(!e) 380 return; /* not in the cache anymore */ 381 cachedata = (struct packed_rrset_data*)e->data; 382 if(!rrsetdata_equal(updata, cachedata)) { 383 lock_rw_unlock(&e->lock); 384 return; /* rrset has changed in the meantime */ 385 } 386 /* update the cached rrset */ 387 if(updata->security > cachedata->security) { 388 size_t i; 389 if(updata->trust > cachedata->trust) 390 cachedata->trust = updata->trust; 391 cachedata->security = updata->security; 392 /* for NS records only shorter TTLs, other types: update it */ 393 if(ntohs(rrset->rk.type) != LDNS_RR_TYPE_NS || 394 updata->ttl+now < cachedata->ttl || 395 cachedata->ttl < now || 396 updata->security == sec_status_bogus) { 397 cachedata->ttl = updata->ttl + now; 398 for(i=0; i<cachedata->count+cachedata->rrsig_count; i++) 399 cachedata->rr_ttl[i] = updata->rr_ttl[i]+now; 400 cachedata->ttl_add = now; 401 } 402 } 403 lock_rw_unlock(&e->lock); 404 } 405 406 void 407 rrset_check_sec_status(struct rrset_cache* r, 408 struct ub_packed_rrset_key* rrset, time_t now) 409 { 410 struct packed_rrset_data* updata = 411 (struct packed_rrset_data*)rrset->entry.data; 412 struct lruhash_entry* e; 413 struct packed_rrset_data* cachedata; 414 415 /* hash it again to make sure it has a hash */ 416 rrset->entry.hash = rrset_key_hash(&rrset->rk); 417 418 e = slabhash_lookup(&r->table, rrset->entry.hash, rrset, 0); 419 if(!e) 420 return; /* not in the cache anymore */ 421 cachedata = (struct packed_rrset_data*)e->data; 422 if(now > cachedata->ttl || !rrsetdata_equal(updata, cachedata)) { 423 lock_rw_unlock(&e->lock); 424 return; /* expired, or rrset has changed in the meantime */ 425 } 426 if(cachedata->security > updata->security) { 427 updata->security = cachedata->security; 428 if(cachedata->security == sec_status_bogus) { 429 size_t i; 430 updata->ttl = cachedata->ttl - now; 431 for(i=0; i<cachedata->count+cachedata->rrsig_count; i++) 432 if(cachedata->rr_ttl[i] < now) 433 updata->rr_ttl[i] = 0; 434 else updata->rr_ttl[i] = 435 cachedata->rr_ttl[i]-now; 436 } 437 if(cachedata->trust > updata->trust) 438 updata->trust = cachedata->trust; 439 } 440 lock_rw_unlock(&e->lock); 441 } 442 443 void rrset_cache_remove(struct rrset_cache* r, uint8_t* nm, size_t nmlen, 444 uint16_t type, uint16_t dclass, uint32_t flags) 445 { 446 struct ub_packed_rrset_key key; 447 key.entry.key = &key; 448 key.rk.dname = nm; 449 key.rk.dname_len = nmlen; 450 key.rk.rrset_class = htons(dclass); 451 key.rk.type = htons(type); 452 key.rk.flags = flags; 453 key.entry.hash = rrset_key_hash(&key.rk); 454 slabhash_remove(&r->table, key.entry.hash, &key); 455 } 456