1 /* 2 * services/cache/rrset.c - Resource record set cache. 3 * 4 * Copyright (c) 2007, NLnet Labs. All rights reserved. 5 * 6 * This software is open source. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * Redistributions of source code must retain the above copyright notice, 13 * this list of conditions and the following disclaimer. 14 * 15 * Redistributions in binary form must reproduce the above copyright notice, 16 * this list of conditions and the following disclaimer in the documentation 17 * and/or other materials provided with the distribution. 18 * 19 * Neither the name of the NLNET LABS nor the names of its contributors may 20 * be used to endorse or promote products derived from this software without 21 * specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 27 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 28 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 29 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 31 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 32 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /** 37 * \file 38 * 39 * This file contains the rrset cache. 40 */ 41 #include "config.h" 42 #include "services/cache/rrset.h" 43 #include "sldns/rrdef.h" 44 #include "util/storage/slabhash.h" 45 #include "util/config_file.h" 46 #include "util/data/packed_rrset.h" 47 #include "util/data/msgreply.h" 48 #include "util/data/msgparse.h" 49 #include "util/data/dname.h" 50 #include "util/regional.h" 51 #include "util/alloc.h" 52 #include "util/net_help.h" 53 54 void 55 rrset_markdel(void* key) 56 { 57 struct ub_packed_rrset_key* r = (struct ub_packed_rrset_key*)key; 58 r->id = 0; 59 } 60 61 struct rrset_cache* rrset_cache_create(struct config_file* cfg, 62 struct alloc_cache* alloc) 63 { 64 size_t slabs = (cfg?cfg->rrset_cache_slabs:HASH_DEFAULT_SLABS); 65 size_t startarray = HASH_DEFAULT_STARTARRAY; 66 size_t maxmem = (cfg?cfg->rrset_cache_size:HASH_DEFAULT_MAXMEM); 67 68 struct rrset_cache *r = (struct rrset_cache*)slabhash_create(slabs, 69 startarray, maxmem, ub_rrset_sizefunc, ub_rrset_compare, 70 ub_rrset_key_delete, rrset_data_delete, alloc); 71 if(!r) 72 return NULL; 73 slabhash_setmarkdel(&r->table, &rrset_markdel); 74 return r; 75 } 76 77 void rrset_cache_delete(struct rrset_cache* r) 78 { 79 if(!r) 80 return; 81 slabhash_delete(&r->table); 82 /* slabhash delete also does free(r), since table is first in struct*/ 83 } 84 85 struct rrset_cache* rrset_cache_adjust(struct rrset_cache *r, 86 struct config_file* cfg, struct alloc_cache* alloc) 87 { 88 if(!r || !cfg || !slabhash_is_size(&r->table, cfg->rrset_cache_size, 89 cfg->rrset_cache_slabs)) 90 { 91 rrset_cache_delete(r); 92 r = rrset_cache_create(cfg, alloc); 93 } 94 return r; 95 } 96 97 void 98 rrset_cache_touch(struct rrset_cache* r, struct ub_packed_rrset_key* key, 99 hashvalue_type hash, rrset_id_type id) 100 { 101 struct lruhash* table = slabhash_gettable(&r->table, hash); 102 /* 103 * This leads to locking problems, deadlocks, if the caller is 104 * holding any other rrset lock. 105 * Because a lookup through the hashtable does: 106 * tablelock -> entrylock (for that entry caller holds) 107 * And this would do 108 * entrylock(already held) -> tablelock 109 * And if two threads do this, it results in deadlock. 110 * So, the caller must not hold entrylock. 111 */ 112 lock_quick_lock(&table->lock); 113 /* we have locked the hash table, the item can still be deleted. 114 * because it could already have been reclaimed, but not yet set id=0. 115 * This is because some lruhash routines have lazy deletion. 116 * so, we must acquire a lock on the item to verify the id != 0. 117 * also, with hash not changed, we are using the right slab. 118 */ 119 lock_rw_rdlock(&key->entry.lock); 120 if(key->id == id && key->entry.hash == hash) { 121 lru_touch(table, &key->entry); 122 } 123 lock_rw_unlock(&key->entry.lock); 124 lock_quick_unlock(&table->lock); 125 } 126 127 /** see if rrset needs to be updated in the cache */ 128 static int 129 need_to_update_rrset(void* nd, void* cd, time_t timenow, int equal, int ns) 130 { 131 struct packed_rrset_data* newd = (struct packed_rrset_data*)nd; 132 struct packed_rrset_data* cached = (struct packed_rrset_data*)cd; 133 /* o if new data is expired, cached data is better */ 134 if( TTL_IS_EXPIRED(newd->ttl, timenow) && !TTL_IS_EXPIRED(cached->ttl, timenow)) 135 return 0; 136 /* o store if rrset has been validated 137 * everything better than bogus data 138 * secure is preferred */ 139 if( newd->security == sec_status_secure && 140 cached->security != sec_status_secure) 141 return 1; 142 if( cached->security == sec_status_bogus && 143 newd->security != sec_status_bogus && !equal) 144 return 1; 145 /* o if new RRset is more trustworthy - insert it */ 146 if( newd->trust > cached->trust ) { 147 /* if the cached rrset is bogus, and new is equal, 148 * do not update the TTL - let it expire. */ 149 if(equal && !TTL_IS_EXPIRED(cached->ttl, timenow) && 150 cached->security == sec_status_bogus) 151 return 0; 152 /* ghost-domain: never let an NS overwrite extend lifetime 153 * past the entry it replaces, regardless of trust. */ 154 if(ns && !TTL_IS_EXPIRED(cached->ttl, timenow) && 155 newd->ttl > cached->ttl) { 156 size_t i; 157 newd->ttl = cached->ttl; 158 for(i=0; i<(newd->count+newd->rrsig_count); i++) 159 if(newd->rr_ttl[i] > newd->ttl) 160 newd->rr_ttl[i] = newd->ttl; 161 } 162 return 1; 163 } 164 /* o item in cache has expired */ 165 if( TTL_IS_EXPIRED(cached->ttl, timenow) ) 166 return 1; 167 /* o same trust, but different in data - insert it */ 168 if( newd->trust == cached->trust && !equal ) { 169 /* if this is type NS, do not 'stick' to owner that changes 170 * the NS RRset, but use the cached TTL for the new data, and 171 * update to fetch the latest data. ttl is not expired, because 172 * that check was before this one. */ 173 if(ns) { 174 size_t i; 175 newd->ttl = cached->ttl; 176 for(i=0; i<(newd->count+newd->rrsig_count); i++) 177 if(newd->rr_ttl[i] > newd->ttl) 178 newd->rr_ttl[i] = newd->ttl; 179 } 180 return 1; 181 } 182 return 0; 183 } 184 185 /** Update RRSet special key ID */ 186 static void 187 rrset_update_id(struct rrset_ref* ref, struct alloc_cache* alloc) 188 { 189 /* this may clear the cache and invalidate lock below */ 190 uint64_t newid = alloc_get_id(alloc); 191 /* obtain writelock */ 192 lock_rw_wrlock(&ref->key->entry.lock); 193 /* check if it was deleted in the meantime, if so, skip update */ 194 if(ref->key->id == ref->id) { 195 ref->key->id = newid; 196 ref->id = newid; 197 } 198 lock_rw_unlock(&ref->key->entry.lock); 199 } 200 201 int 202 rrset_cache_update(struct rrset_cache* r, struct rrset_ref* ref, 203 struct alloc_cache* alloc, time_t timenow) 204 { 205 struct lruhash_entry* e; 206 struct ub_packed_rrset_key* k = ref->key; 207 hashvalue_type h = k->entry.hash; 208 uint16_t rrset_type = ntohs(k->rk.type); 209 int equal = 0; 210 log_assert(ref->id != 0 && k->id != 0); 211 log_assert(k->rk.dname != NULL); 212 /* looks up item with a readlock - no editing! */ 213 if((e=slabhash_lookup(&r->table, h, k, 0)) != 0) { 214 /* return id and key as they will be used in the cache 215 * since the lruhash_insert, if item already exists, deallocs 216 * the passed key in favor of the already stored key. 217 * because of the small gap (see below) this key ptr and id 218 * may prove later to be already deleted, which is no problem 219 * as it only makes a cache miss. 220 */ 221 ref->key = (struct ub_packed_rrset_key*)e->key; 222 ref->id = ref->key->id; 223 equal = rrsetdata_equal((struct packed_rrset_data*)k->entry. 224 data, (struct packed_rrset_data*)e->data); 225 if(!need_to_update_rrset(k->entry.data, e->data, timenow, 226 equal, (rrset_type==LDNS_RR_TYPE_NS))) { 227 /* cache is superior, return that value */ 228 lock_rw_unlock(&e->lock); 229 ub_packed_rrset_parsedelete(k, alloc); 230 if(equal) return 2; 231 return 1; 232 } 233 lock_rw_unlock(&e->lock); 234 /* Go on and insert the passed item. 235 * small gap here, where entry is not locked. 236 * possibly entry is updated with something else. 237 * we then overwrite that with our data. 238 * this is just too bad, its cache anyway. */ 239 /* use insert to update entry to manage lruhash 240 * cache size values nicely. */ 241 } 242 log_assert(ref->key->id != 0); 243 slabhash_insert(&r->table, h, &k->entry, k->entry.data, alloc); 244 if(e) { 245 /* For NSEC, NSEC3, DNAME, when rdata is updated, update 246 * the ID number so that proofs in message cache are 247 * invalidated */ 248 if((rrset_type == LDNS_RR_TYPE_NSEC 249 || rrset_type == LDNS_RR_TYPE_NSEC3 250 || rrset_type == LDNS_RR_TYPE_DNAME) && !equal) { 251 rrset_update_id(ref, alloc); 252 } 253 return 1; 254 } 255 return 0; 256 } 257 258 void rrset_cache_update_wildcard(struct rrset_cache* rrset_cache, 259 struct ub_packed_rrset_key* rrset, uint8_t* ce, size_t ce_len, 260 struct alloc_cache* alloc, time_t timenow) 261 { 262 struct rrset_ref ref; 263 uint8_t wc_dname[LDNS_MAX_DOMAINLEN+3]; 264 rrset = packed_rrset_copy_alloc(rrset, alloc, timenow); 265 if(!rrset) { 266 log_err("malloc failure in rrset_cache_update_wildcard"); 267 return; 268 } 269 /* ce has at least one label less then qname, we can therefore safely 270 * add the wildcard label. */ 271 wc_dname[0] = 1; 272 wc_dname[1] = (uint8_t)'*'; 273 memmove(wc_dname+2, ce, ce_len); 274 275 free(rrset->rk.dname); 276 rrset->rk.dname_len = ce_len + 2; 277 rrset->rk.dname = (uint8_t*)memdup(wc_dname, rrset->rk.dname_len); 278 if(!rrset->rk.dname) { 279 alloc_special_release(alloc, rrset); 280 log_err("memdup failure in rrset_cache_update_wildcard"); 281 return; 282 } 283 284 rrset->entry.hash = rrset_key_hash(&rrset->rk); 285 ref.key = rrset; 286 ref.id = rrset->id; 287 /* ignore ret: if it was in the cache, ref updated */ 288 (void)rrset_cache_update(rrset_cache, &ref, alloc, timenow); 289 } 290 291 /** Grace period in seconds for TTL=0 DNAME rrsets (RFC 2308: do not cache). 292 * Allows synthesis from cache within this window to reduce recursion load. */ 293 #define DNAME_TTL0_GRACE_SECONDS 1 294 295 struct ub_packed_rrset_key* 296 rrset_cache_lookup(struct rrset_cache* r, uint8_t* qname, size_t qnamelen, 297 uint16_t qtype, uint16_t qclass, uint32_t flags, time_t timenow, 298 int wr) 299 { 300 struct lruhash_entry* e; 301 struct ub_packed_rrset_key key; 302 303 key.entry.key = &key; 304 key.entry.data = NULL; 305 key.rk.dname = qname; 306 key.rk.dname_len = qnamelen; 307 key.rk.type = htons(qtype); 308 key.rk.rrset_class = htons(qclass); 309 key.rk.flags = flags; 310 311 key.entry.hash = rrset_key_hash(&key.rk); 312 313 if((e = slabhash_lookup(&r->table, key.entry.hash, &key, wr))) { 314 /* check TTL */ 315 struct packed_rrset_data* data = 316 (struct packed_rrset_data*)e->data; 317 struct ub_packed_rrset_key* k = (struct ub_packed_rrset_key*)e->key; 318 if(TTL_IS_EXPIRED(data->ttl, timenow)) { 319 /* Allow TTL=0 DNAME within grace period for synthesis */ 320 if(qtype == LDNS_RR_TYPE_DNAME && 321 (k->rk.flags & PACKED_RRSET_UPSTREAM_0TTL) && 322 (timenow - data->ttl_add) <= DNAME_TTL0_GRACE_SECONDS) { 323 /* within grace: allow for synthesis */ 324 } else { 325 lock_rw_unlock(&e->lock); 326 return NULL; 327 } 328 } 329 /* we're done */ 330 return k; 331 } 332 return NULL; 333 } 334 335 int 336 rrset_array_lock(struct rrset_ref* ref, size_t count, time_t timenow) 337 { 338 size_t i; 339 struct packed_rrset_data* d; 340 for(i=0; i<count; i++) { 341 if(i>0 && ref[i].key == ref[i-1].key) 342 continue; /* only lock items once */ 343 lock_rw_rdlock(&ref[i].key->entry.lock); 344 d = ref[i].key->entry.data; 345 if(ref[i].id != ref[i].key->id || 346 TTL_IS_EXPIRED(d->ttl, timenow)) { 347 /* failure! rollback our readlocks */ 348 rrset_array_unlock(ref, i+1); 349 return 0; 350 } 351 } 352 return 1; 353 } 354 355 void 356 rrset_array_unlock(struct rrset_ref* ref, size_t count) 357 { 358 size_t i; 359 for(i=0; i<count; i++) { 360 if(i>0 && ref[i].key == ref[i-1].key) 361 continue; /* only unlock items once */ 362 lock_rw_unlock(&ref[i].key->entry.lock); 363 } 364 } 365 366 void 367 rrset_array_unlock_touch(struct rrset_cache* r, struct regional* scratch, 368 struct rrset_ref* ref, size_t count) 369 { 370 hashvalue_type* h; 371 size_t i; 372 if(count > RR_COUNT_MAX || !(h = (hashvalue_type*)regional_alloc( 373 scratch, sizeof(hashvalue_type)*count))) { 374 log_warn("rrset LRU: memory allocation failed"); 375 h = NULL; 376 } else /* store hash values */ 377 for(i=0; i<count; i++) 378 h[i] = ref[i].key->entry.hash; 379 /* unlock */ 380 for(i=0; i<count; i++) { 381 if(i>0 && ref[i].key == ref[i-1].key) 382 continue; /* only unlock items once */ 383 lock_rw_unlock(&ref[i].key->entry.lock); 384 } 385 if(h) { 386 /* LRU touch, with no rrset locks held */ 387 for(i=0; i<count; i++) { 388 if(i>0 && ref[i].key == ref[i-1].key) 389 continue; /* only touch items once */ 390 rrset_cache_touch(r, ref[i].key, h[i], ref[i].id); 391 } 392 } 393 } 394 395 void 396 rrset_update_sec_status(struct rrset_cache* r, 397 struct ub_packed_rrset_key* rrset, time_t now) 398 { 399 struct packed_rrset_data* updata = 400 (struct packed_rrset_data*)rrset->entry.data; 401 struct lruhash_entry* e; 402 struct packed_rrset_data* cachedata; 403 404 /* hash it again to make sure it has a hash */ 405 rrset->entry.hash = rrset_key_hash(&rrset->rk); 406 407 e = slabhash_lookup(&r->table, rrset->entry.hash, rrset, 1); 408 if(!e) 409 return; /* not in the cache anymore */ 410 cachedata = (struct packed_rrset_data*)e->data; 411 if(!rrsetdata_equal(updata, cachedata)) { 412 lock_rw_unlock(&e->lock); 413 return; /* rrset has changed in the meantime */ 414 } 415 /* update the cached rrset */ 416 if(updata->security > cachedata->security) { 417 size_t i; 418 if(updata->trust > cachedata->trust) 419 cachedata->trust = updata->trust; 420 cachedata->security = updata->security; 421 /* for NS records only shorter TTLs, other types: update it */ 422 if(ntohs(rrset->rk.type) != LDNS_RR_TYPE_NS || 423 updata->ttl+now < cachedata->ttl || 424 cachedata->ttl < now || 425 updata->security == sec_status_bogus) { 426 cachedata->ttl = updata->ttl + now; 427 for(i=0; i<cachedata->count+cachedata->rrsig_count; i++) 428 cachedata->rr_ttl[i] = updata->rr_ttl[i]+now; 429 cachedata->ttl_add = now; 430 } 431 } 432 lock_rw_unlock(&e->lock); 433 } 434 435 void 436 rrset_check_sec_status(struct rrset_cache* r, 437 struct ub_packed_rrset_key* rrset, time_t now) 438 { 439 struct packed_rrset_data* updata = 440 (struct packed_rrset_data*)rrset->entry.data; 441 struct lruhash_entry* e; 442 struct packed_rrset_data* cachedata; 443 444 /* hash it again to make sure it has a hash */ 445 rrset->entry.hash = rrset_key_hash(&rrset->rk); 446 447 e = slabhash_lookup(&r->table, rrset->entry.hash, rrset, 0); 448 if(!e) 449 return; /* not in the cache anymore */ 450 cachedata = (struct packed_rrset_data*)e->data; 451 if(now > cachedata->ttl || !rrsetdata_equal(updata, cachedata)) { 452 lock_rw_unlock(&e->lock); 453 return; /* expired, or rrset has changed in the meantime */ 454 } 455 if(cachedata->security > updata->security) { 456 updata->security = cachedata->security; 457 if(cachedata->security == sec_status_bogus) { 458 size_t i; 459 updata->ttl = cachedata->ttl - now; 460 for(i=0; i<cachedata->count+cachedata->rrsig_count; i++) 461 if(cachedata->rr_ttl[i] < now) 462 updata->rr_ttl[i] = 0; 463 else updata->rr_ttl[i] = 464 cachedata->rr_ttl[i]-now; 465 } 466 if(cachedata->trust > updata->trust) 467 updata->trust = cachedata->trust; 468 } 469 lock_rw_unlock(&e->lock); 470 } 471 472 void 473 rrset_cache_remove_above(struct rrset_cache* r, uint8_t** qname, size_t* 474 qnamelen, uint16_t searchtype, uint16_t qclass, time_t now, uint8_t* 475 qnametop, size_t qnametoplen) 476 { 477 struct ub_packed_rrset_key *rrset; 478 uint8_t lablen; 479 480 while(*qnamelen > 0) { 481 /* look one label higher */ 482 lablen = **qname; 483 *qname += lablen + 1; 484 *qnamelen -= lablen + 1; 485 if(*qnamelen <= 0) 486 return; 487 488 /* stop at qnametop */ 489 if(qnametop && *qnamelen == qnametoplen && 490 query_dname_compare(*qname, qnametop)==0) 491 return; 492 493 if(verbosity >= VERB_ALGO) { 494 /* looks up with a time of 0, to see expired entries */ 495 if((rrset = rrset_cache_lookup(r, *qname, 496 *qnamelen, searchtype, qclass, 0, 0, 0))) { 497 struct packed_rrset_data* data = 498 (struct packed_rrset_data*)rrset->entry.data; 499 int expired = (now > data->ttl); 500 lock_rw_unlock(&rrset->entry.lock); 501 if(expired) 502 log_nametypeclass(verbosity, "this " 503 "(grand)parent rrset will be " 504 "removed (expired)", 505 *qname, searchtype, qclass); 506 else log_nametypeclass(verbosity, "this " 507 "(grand)parent rrset will be " 508 "removed", 509 *qname, searchtype, qclass); 510 } 511 } 512 rrset_cache_remove(r, *qname, *qnamelen, searchtype, qclass, 0); 513 } 514 } 515 516 int 517 rrset_cache_expired_above(struct rrset_cache* r, uint8_t** qname, size_t* 518 qnamelen, uint16_t searchtype, uint16_t qclass, time_t now, uint8_t* 519 qnametop, size_t qnametoplen) 520 { 521 struct ub_packed_rrset_key *rrset; 522 uint8_t lablen; 523 524 while(*qnamelen > 0) { 525 /* look one label higher */ 526 lablen = **qname; 527 *qname += lablen + 1; 528 *qnamelen -= lablen + 1; 529 if(*qnamelen <= 0) 530 break; 531 532 /* looks up with a time of 0, to see expired entries */ 533 if((rrset = rrset_cache_lookup(r, *qname, 534 *qnamelen, searchtype, qclass, 0, 0, 0))) { 535 struct packed_rrset_data* data = 536 (struct packed_rrset_data*)rrset->entry.data; 537 if(TTL_IS_EXPIRED(data->ttl, now)) { 538 /* it is expired, this is not wanted */ 539 lock_rw_unlock(&rrset->entry.lock); 540 log_nametypeclass(VERB_ALGO, "this rrset is expired", *qname, searchtype, qclass); 541 return 1; 542 } 543 /* it is not expired, continue looking */ 544 lock_rw_unlock(&rrset->entry.lock); 545 } 546 547 /* do not look above the qnametop. */ 548 if(qnametop && *qnamelen == qnametoplen && 549 query_dname_compare(*qname, qnametop)==0) 550 break; 551 } 552 return 0; 553 } 554 555 void rrset_cache_remove(struct rrset_cache* r, uint8_t* nm, size_t nmlen, 556 uint16_t type, uint16_t dclass, uint32_t flags) 557 { 558 struct ub_packed_rrset_key key; 559 key.entry.key = &key; 560 key.rk.dname = nm; 561 key.rk.dname_len = nmlen; 562 key.rk.rrset_class = htons(dclass); 563 key.rk.type = htons(type); 564 key.rk.flags = flags; 565 key.entry.hash = rrset_key_hash(&key.rk); 566 slabhash_remove(&r->table, key.entry.hash, &key); 567 } 568