1 /* $NetBSD: rbt-cachedb.c,v 1.5 2026/01/29 18:37:49 christos Exp $ */ 2 3 /* 4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 * 6 * SPDX-License-Identifier: MPL-2.0 7 * 8 * This Source Code Form is subject to the terms of the Mozilla Public 9 * License, v. 2.0. If a copy of the MPL was not distributed with this 10 * file, you can obtain one at https://mozilla.org/MPL/2.0/. 11 * 12 * See the COPYRIGHT file distributed with this work for additional 13 * information regarding copyright ownership. 14 */ 15 16 /*! \file */ 17 18 #include <inttypes.h> 19 #include <stdbool.h> 20 #include <sys/mman.h> 21 22 #include <isc/ascii.h> 23 #include <isc/async.h> 24 #include <isc/atomic.h> 25 #include <isc/crc64.h> 26 #include <isc/file.h> 27 #include <isc/hash.h> 28 #include <isc/hashmap.h> 29 #include <isc/heap.h> 30 #include <isc/hex.h> 31 #include <isc/loop.h> 32 #include <isc/mem.h> 33 #include <isc/mutex.h> 34 #include <isc/once.h> 35 #include <isc/random.h> 36 #include <isc/refcount.h> 37 #include <isc/result.h> 38 #include <isc/rwlock.h> 39 #include <isc/serial.h> 40 #include <isc/stdio.h> 41 #include <isc/string.h> 42 #include <isc/time.h> 43 #include <isc/urcu.h> 44 #include <isc/util.h> 45 46 #include <dns/callbacks.h> 47 #include <dns/db.h> 48 #include <dns/dbiterator.h> 49 #include <dns/fixedname.h> 50 #include <dns/log.h> 51 #include <dns/masterdump.h> 52 #include <dns/nsec.h> 53 #include <dns/nsec3.h> 54 #include <dns/rbt.h> 55 #include <dns/rdata.h> 56 #include <dns/rdataset.h> 57 #include <dns/rdatasetiter.h> 58 #include <dns/rdataslab.h> 59 #include <dns/rdatastruct.h> 60 #include <dns/stats.h> 61 #include <dns/time.h> 62 #include <dns/view.h> 63 #include <dns/zone.h> 64 #include <dns/zonekey.h> 65 66 #include "db_p.h" 67 #include "rbtdb_p.h" 68 69 /*% 70 * Whether to rate-limit updating the LRU to avoid possible thread contention. 71 * Updating LRU requires write locking, so we don't do it every time the 72 * record is touched - only after some time passes. 73 */ 74 #ifndef DNS_RBTDB_LIMITLRUUPDATE 75 #define DNS_RBTDB_LIMITLRUUPDATE 1 76 #endif 77 78 /*% Time after which we update LRU for glue records, 5 minutes */ 79 #define DNS_RBTDB_LRUUPDATE_GLUE 300 80 /*% Time after which we update LRU for all other records, 10 minutes */ 81 #define DNS_RBTDB_LRUUPDATE_REGULAR 600 82 83 #define EXISTS(header) \ 84 ((atomic_load_acquire(&(header)->attributes) & \ 85 DNS_SLABHEADERATTR_NONEXISTENT) == 0) 86 #define NONEXISTENT(header) \ 87 ((atomic_load_acquire(&(header)->attributes) & \ 88 DNS_SLABHEADERATTR_NONEXISTENT) != 0) 89 #define NXDOMAIN(header) \ 90 ((atomic_load_acquire(&(header)->attributes) & \ 91 DNS_SLABHEADERATTR_NXDOMAIN) != 0) 92 #define STALE(header) \ 93 ((atomic_load_acquire(&(header)->attributes) & \ 94 DNS_SLABHEADERATTR_STALE) != 0) 95 #define NEGATIVE(header) \ 96 ((atomic_load_acquire(&(header)->attributes) & \ 97 DNS_SLABHEADERATTR_NEGATIVE) != 0) 98 #define ZEROTTL(header) \ 99 ((atomic_load_acquire(&(header)->attributes) & \ 100 DNS_SLABHEADERATTR_ZEROTTL) != 0) 101 #define ANCIENT(header) \ 102 ((atomic_load_acquire(&(header)->attributes) & \ 103 DNS_SLABHEADERATTR_ANCIENT) != 0) 104 #define STATCOUNT(header) \ 105 ((atomic_load_acquire(&(header)->attributes) & \ 106 DNS_SLABHEADERATTR_STATCOUNT) != 0) 107 108 #define STALE_TTL(header, rbtdb) \ 109 (NXDOMAIN(header) ? 0 : rbtdb->common.serve_stale_ttl) 110 111 #define ACTIVE(header, now) \ 112 (((header)->ttl > (now)) || ((header)->ttl == (now) && ZEROTTL(header))) 113 114 #define KEEPSTALE(rbtdb) ((rbtdb)->common.serve_stale_ttl > 0) 115 116 /*% 117 * Routines for LRU-based cache management. 118 */ 119 120 /*% 121 * See if a given cache entry that is being reused needs to be updated 122 * in the LRU-list. From the LRU management point of view, this function is 123 * expected to return true for almost all cases. When used with threads, 124 * however, this may cause a non-negligible performance penalty because a 125 * writer lock will have to be acquired before updating the list. 126 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this 127 * function returns true if the entry has not been updated for some period of 128 * time. We differentiate the NS or glue address case and the others since 129 * experiments have shown that the former tends to be accessed relatively 130 * infrequently and the cost of cache miss is higher (e.g., a missing NS records 131 * may cause external queries at a higher level zone, involving more 132 * transactions). 133 * 134 * Caller must hold the node (read or write) lock. 135 */ 136 static bool 137 need_headerupdate(dns_slabheader_t *header, isc_stdtime_t now) { 138 if (DNS_SLABHEADER_GETATTR(header, 139 DNS_SLABHEADERATTR_NONEXISTENT | 140 DNS_SLABHEADERATTR_ANCIENT | 141 DNS_SLABHEADERATTR_ZEROTTL) != 0) 142 { 143 return false; 144 } 145 146 #if DNS_RBTDB_LIMITLRUUPDATE 147 if (header->type == dns_rdatatype_ns || 148 (header->trust == dns_trust_glue && 149 (header->type == dns_rdatatype_a || 150 header->type == dns_rdatatype_aaaa))) 151 { 152 /* 153 * Glue records are updated if at least DNS_RBTDB_LRUUPDATE_GLUE 154 * seconds have passed since the previous update time. 155 */ 156 return header->last_used + DNS_RBTDB_LRUUPDATE_GLUE <= now; 157 } 158 159 /* 160 * Other records are updated if DNS_RBTDB_LRUUPDATE_REGULAR seconds 161 * have passed. 162 */ 163 return header->last_used + DNS_RBTDB_LRUUPDATE_REGULAR <= now; 164 #else 165 UNUSED(now); 166 167 return true; 168 #endif /* if DNS_RBTDB_LIMITLRUUPDATE */ 169 } 170 171 /*% 172 * Update the timestamp of a given cache entry and move it to the head 173 * of the corresponding LRU list. 174 * 175 * Caller must hold the node (write) lock. 176 * 177 * Note that the we do NOT touch the heap here, as the TTL has not changed. 178 */ 179 static void 180 update_header(dns_rbtdb_t *rbtdb, dns_slabheader_t *header, isc_stdtime_t now) { 181 INSIST(IS_CACHE(rbtdb)); 182 183 /* To be checked: can we really assume this? XXXMLG */ 184 INSIST(ISC_LINK_LINKED(header, link)); 185 186 ISC_LIST_UNLINK(rbtdb->lru[RBTDB_HEADERNODE(header)->locknum], header, 187 link); 188 header->last_used = now; 189 ISC_LIST_PREPEND(rbtdb->lru[RBTDB_HEADERNODE(header)->locknum], header, 190 link); 191 } 192 193 /* 194 * Locking 195 * 196 * If a routine is going to lock more than one lock in this module, then 197 * the locking must be done in the following order: 198 * 199 * Tree Lock 200 * 201 * Node Lock (Only one from the set may be locked at one time by 202 * any caller) 203 * 204 * Database Lock 205 * 206 * Failure to follow this hierarchy can result in deadlock. 207 */ 208 209 /* 210 * Deleting Nodes 211 * 212 * For zone databases the node for the origin of the zone MUST NOT be deleted. 213 */ 214 215 /* 216 * DB Routines 217 */ 218 219 static void 220 update_cachestats(dns_rbtdb_t *rbtdb, isc_result_t result) { 221 INSIST(IS_CACHE(rbtdb)); 222 223 if (rbtdb->cachestats == NULL) { 224 return; 225 } 226 227 switch (result) { 228 case DNS_R_COVERINGNSEC: 229 isc_stats_increment(rbtdb->cachestats, 230 dns_cachestatscounter_coveringnsec); 231 FALLTHROUGH; 232 case ISC_R_SUCCESS: 233 case DNS_R_CNAME: 234 case DNS_R_DNAME: 235 case DNS_R_DELEGATION: 236 case DNS_R_NCACHENXDOMAIN: 237 case DNS_R_NCACHENXRRSET: 238 isc_stats_increment(rbtdb->cachestats, 239 dns_cachestatscounter_hits); 240 break; 241 default: 242 isc_stats_increment(rbtdb->cachestats, 243 dns_cachestatscounter_misses); 244 } 245 } 246 247 static void 248 clean_stale_headers(dns_slabheader_t *top) { 249 dns_slabheader_t *d = NULL, *down_next = NULL; 250 251 for (d = top->down; d != NULL; d = down_next) { 252 down_next = d->down; 253 dns_slabheader_destroy(&d); 254 } 255 top->down = NULL; 256 } 257 258 static isc_result_t 259 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep, 260 dns_name_t *foundname, dns_rdataset_t *rdataset, 261 dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 262 dns_name_t *zcname = NULL; 263 dns_typepair_t type; 264 dns_rbtnode_t *node = NULL; 265 266 REQUIRE(search != NULL); 267 REQUIRE(search->zonecut != NULL); 268 REQUIRE(search->zonecut_header != NULL); 269 270 /* 271 * The caller MUST NOT be holding any node locks. 272 */ 273 274 node = search->zonecut; 275 type = search->zonecut_header->type; 276 277 /* 278 * If we have to set foundname, we do it before anything else. 279 * If we were to set foundname after we had set nodep or bound the 280 * rdataset, then we'd have to undo that work if dns_name_copy() 281 * failed. By setting foundname first, there's nothing to undo if 282 * we have trouble. 283 */ 284 if (foundname != NULL && search->copy_name) { 285 zcname = dns_fixedname_name(&search->zonecut_name); 286 dns_name_copy(zcname, foundname); 287 } 288 if (nodep != NULL) { 289 /* 290 * Note that we don't have to increment the node's reference 291 * count here because we're going to use the reference we 292 * already have in the search block. 293 */ 294 *nodep = node; 295 search->need_cleanup = false; 296 } 297 if (rdataset != NULL) { 298 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 299 NODE_RDLOCK(&(search->rbtdb->node_locks[node->locknum].lock), 300 &nlocktype); 301 dns__rbtdb_bindrdataset(search->rbtdb, node, 302 search->zonecut_header, search->now, 303 isc_rwlocktype_read, 304 rdataset DNS__DB_FLARG_PASS); 305 if (sigrdataset != NULL && search->zonecut_sigheader != NULL) { 306 dns__rbtdb_bindrdataset( 307 search->rbtdb, node, search->zonecut_sigheader, 308 search->now, isc_rwlocktype_read, 309 sigrdataset DNS__DB_FLARG_PASS); 310 } 311 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock), 312 &nlocktype); 313 } 314 315 if (type == dns_rdatatype_dname) { 316 return DNS_R_DNAME; 317 } 318 return DNS_R_DELEGATION; 319 } 320 321 static bool 322 check_stale_header(dns_rbtnode_t *node, dns_slabheader_t *header, 323 isc_rwlocktype_t *nlocktypep, isc_rwlock_t *lock, 324 rbtdb_search_t *search, dns_slabheader_t **header_prev) { 325 if (!ACTIVE(header, search->now)) { 326 dns_ttl_t stale = header->ttl + 327 STALE_TTL(header, search->rbtdb); 328 /* 329 * If this data is in the stale window keep it and if 330 * DNS_DBFIND_STALEOK is not set we tell the caller to 331 * skip this record. We skip the records with ZEROTTL 332 * (these records should not be cached anyway). 333 */ 334 335 DNS_SLABHEADER_CLRATTR(header, DNS_SLABHEADERATTR_STALE_WINDOW); 336 if (!ZEROTTL(header) && KEEPSTALE(search->rbtdb) && 337 stale > search->now) 338 { 339 dns__rbtdb_mark(header, DNS_SLABHEADERATTR_STALE); 340 *header_prev = header; 341 /* 342 * If DNS_DBFIND_STALESTART is set then it means we 343 * failed to resolve the name during recursion, in 344 * this case we mark the time in which the refresh 345 * failed. 346 */ 347 if ((search->options & DNS_DBFIND_STALESTART) != 0) { 348 atomic_store_release( 349 &header->last_refresh_fail_ts, 350 search->now); 351 } else if ((search->options & 352 DNS_DBFIND_STALEENABLED) != 0 && 353 search->now < 354 (atomic_load_acquire( 355 &header->last_refresh_fail_ts) + 356 search->rbtdb->serve_stale_refresh)) 357 { 358 /* 359 * If we are within interval between last 360 * refresh failure time + 'stale-refresh-time', 361 * then don't skip this stale entry but use it 362 * instead. 363 */ 364 DNS_SLABHEADER_SETATTR( 365 header, 366 DNS_SLABHEADERATTR_STALE_WINDOW); 367 return false; 368 } else if ((search->options & 369 DNS_DBFIND_STALETIMEOUT) != 0) 370 { 371 /* 372 * We want stale RRset due to timeout, so we 373 * don't skip it. 374 */ 375 return false; 376 } 377 return (search->options & DNS_DBFIND_STALEOK) == 0; 378 } 379 380 /* 381 * This rdataset is stale. If no one else is using the 382 * node, we can clean it up right now, otherwise we mark 383 * it as ancient, and the node as dirty, so it will get 384 * cleaned up later. 385 */ 386 if ((header->ttl < search->now - RBTDB_VIRTUAL) && 387 (*nlocktypep == isc_rwlocktype_write || 388 NODE_TRYUPGRADE(lock, nlocktypep) == ISC_R_SUCCESS)) 389 { 390 /* 391 * We update the node's status only when we can 392 * get write access; otherwise, we leave others 393 * to this work. Periodical cleaning will 394 * eventually take the job as the last resort. 395 * We won't downgrade the lock, since other 396 * rdatasets are probably stale, too. 397 */ 398 399 if (isc_refcount_current(&node->references) == 0) { 400 /* 401 * header->down can be non-NULL if the 402 * refcount has just decremented to 0 403 * but dns__rbtnode_release() has not 404 * performed clean_cache_node(), in 405 * which case we need to purge the stale 406 * headers first. 407 */ 408 clean_stale_headers(header); 409 if (*header_prev != NULL) { 410 (*header_prev)->next = header->next; 411 } else { 412 node->data = header->next; 413 } 414 dns_slabheader_destroy(&header); 415 } else { 416 dns__rbtdb_mark_ancient(header); 417 *header_prev = header; 418 } 419 } else { 420 *header_prev = header; 421 } 422 return true; 423 } 424 return false; 425 } 426 427 static isc_result_t 428 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, 429 void *arg DNS__DB_FLARG) { 430 rbtdb_search_t *search = arg; 431 dns_slabheader_t *header = NULL; 432 dns_slabheader_t *header_prev = NULL, *header_next = NULL; 433 dns_slabheader_t *dname_header = NULL, *sigdname_header = NULL; 434 isc_result_t result; 435 isc_rwlock_t *lock = NULL; 436 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 437 438 REQUIRE(search->zonecut == NULL); 439 440 /* 441 * Keep compiler silent. 442 */ 443 UNUSED(name); 444 445 lock = &(search->rbtdb->node_locks[node->locknum].lock); 446 NODE_RDLOCK(lock, &nlocktype); 447 448 /* 449 * Look for a DNAME or RRSIG DNAME rdataset. 450 */ 451 for (header = node->data; header != NULL; header = header_next) { 452 header_next = header->next; 453 if (check_stale_header(node, header, &nlocktype, lock, search, 454 &header_prev)) 455 { 456 /* Do nothing. */ 457 } else if (header->type == dns_rdatatype_dname && 458 EXISTS(header) && !ANCIENT(header)) 459 { 460 dname_header = header; 461 header_prev = header; 462 } else if (header->type == DNS_SIGTYPE(dns_rdatatype_dname) && 463 EXISTS(header) && !ANCIENT(header)) 464 { 465 sigdname_header = header; 466 header_prev = header; 467 } else { 468 header_prev = header; 469 } 470 } 471 472 if (dname_header != NULL && 473 (!DNS_TRUST_PENDING(dname_header->trust) || 474 (search->options & DNS_DBFIND_PENDINGOK) != 0)) 475 { 476 /* 477 * We increment the reference count on node to ensure that 478 * search->zonecut_header will still be valid later. 479 */ 480 dns__rbtnode_acquire(search->rbtdb, node, 481 nlocktype DNS__DB_FLARG_PASS); 482 search->zonecut = node; 483 search->zonecut_header = dname_header; 484 search->zonecut_sigheader = sigdname_header; 485 search->need_cleanup = true; 486 result = DNS_R_PARTIALMATCH; 487 } else { 488 result = DNS_R_CONTINUE; 489 } 490 491 NODE_UNLOCK(lock, &nlocktype); 492 493 return result; 494 } 495 496 static isc_result_t 497 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node, 498 dns_dbnode_t **nodep, dns_name_t *foundname, 499 dns_rdataset_t *rdataset, 500 dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 501 unsigned int i; 502 isc_result_t result = ISC_R_NOTFOUND; 503 dns_name_t name; 504 dns_rbtdb_t *rbtdb = NULL; 505 bool done; 506 507 /* 508 * Caller must be holding the tree lock. 509 */ 510 511 rbtdb = search->rbtdb; 512 i = search->chain.level_matches; 513 done = false; 514 do { 515 dns_slabheader_t *header = NULL; 516 dns_slabheader_t *header_prev = NULL, *header_next = NULL; 517 dns_slabheader_t *found = NULL, *foundsig = NULL; 518 isc_rwlock_t *lock = &rbtdb->node_locks[node->locknum].lock; 519 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 520 521 NODE_RDLOCK(lock, &nlocktype); 522 523 /* 524 * Look for NS and RRSIG NS rdatasets. 525 */ 526 for (header = node->data; header != NULL; header = header_next) 527 { 528 header_next = header->next; 529 if (check_stale_header(node, header, &nlocktype, lock, 530 search, &header_prev)) 531 { 532 /* Do nothing. */ 533 } else if (EXISTS(header) && !ANCIENT(header)) { 534 /* 535 * We've found an extant rdataset. See if 536 * we're interested in it. 537 */ 538 if (header->type == dns_rdatatype_ns) { 539 found = header; 540 if (foundsig != NULL) { 541 break; 542 } 543 } else if (header->type == 544 DNS_SIGTYPE(dns_rdatatype_ns)) 545 { 546 foundsig = header; 547 if (found != NULL) { 548 break; 549 } 550 } 551 header_prev = header; 552 } else { 553 header_prev = header; 554 } 555 } 556 557 if (found != NULL) { 558 /* 559 * If we have to set foundname, we do it before 560 * anything else. If we were to set foundname after 561 * we had set nodep or bound the rdataset, then we'd 562 * have to undo that work if dns_name_concatenate() 563 * failed. By setting foundname first, there's 564 * nothing to undo if we have trouble. 565 */ 566 if (foundname != NULL) { 567 dns_name_init(&name, NULL); 568 dns_rbt_namefromnode(node, &name); 569 dns_name_copy(&name, foundname); 570 while (i > 0) { 571 dns_rbtnode_t *level_node = 572 search->chain.levels[--i]; 573 dns_name_init(&name, NULL); 574 dns_rbt_namefromnode(level_node, &name); 575 result = dns_name_concatenate( 576 foundname, &name, foundname, 577 NULL); 578 if (result != ISC_R_SUCCESS) { 579 if (nodep != NULL) { 580 *nodep = NULL; 581 } 582 goto node_exit; 583 } 584 } 585 } 586 result = DNS_R_DELEGATION; 587 if (nodep != NULL) { 588 dns__rbtnode_acquire( 589 search->rbtdb, node, 590 nlocktype DNS__DB_FLARG_PASS); 591 *nodep = node; 592 } 593 dns__rbtdb_bindrdataset(search->rbtdb, node, found, 594 search->now, nlocktype, 595 rdataset DNS__DB_FLARG_PASS); 596 if (foundsig != NULL) { 597 dns__rbtdb_bindrdataset( 598 search->rbtdb, node, foundsig, 599 search->now, nlocktype, 600 sigrdataset DNS__DB_FLARG_PASS); 601 } 602 if (need_headerupdate(found, search->now) || 603 (foundsig != NULL && 604 need_headerupdate(foundsig, search->now))) 605 { 606 if (nlocktype != isc_rwlocktype_write) { 607 NODE_FORCEUPGRADE(lock, &nlocktype); 608 POST(nlocktype); 609 } 610 if (need_headerupdate(found, search->now)) { 611 update_header(search->rbtdb, found, 612 search->now); 613 } 614 if (foundsig != NULL && 615 need_headerupdate(foundsig, search->now)) 616 { 617 update_header(search->rbtdb, foundsig, 618 search->now); 619 } 620 } 621 } 622 623 node_exit: 624 NODE_UNLOCK(lock, &nlocktype); 625 626 if (found == NULL && i > 0) { 627 i--; 628 node = search->chain.levels[i]; 629 } else { 630 done = true; 631 } 632 } while (!done); 633 634 return result; 635 } 636 637 /* 638 * Look for a potentially covering NSEC in the cache where `name` 639 * is known not to exist. This uses the auxiliary NSEC tree to find 640 * the potential NSEC owner. If found, we update 'foundname', 'nodep', 641 * 'rdataset' and 'sigrdataset', and return DNS_R_COVERINGNSEC. 642 * Otherwise, return ISC_R_NOTFOUND. 643 */ 644 static isc_result_t 645 find_coveringnsec(rbtdb_search_t *search, const dns_name_t *name, 646 dns_dbnode_t **nodep, isc_stdtime_t now, 647 dns_name_t *foundname, dns_rdataset_t *rdataset, 648 dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 649 dns_fixedname_t fprefix, forigin, ftarget, fixed; 650 dns_name_t *prefix = NULL, *origin = NULL; 651 dns_name_t *target = NULL, *fname = NULL; 652 dns_rbtnode_t *node = NULL; 653 dns_rbtnodechain_t chain; 654 isc_result_t result; 655 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 656 isc_rwlock_t *lock = NULL; 657 dns_typepair_t matchtype, sigmatchtype; 658 dns_slabheader_t *found = NULL, *foundsig = NULL; 659 dns_slabheader_t *header = NULL; 660 dns_slabheader_t *header_next = NULL, *header_prev = NULL; 661 662 /* 663 * Look for the node in the auxilary tree. 664 */ 665 dns_rbtnodechain_init(&chain); 666 target = dns_fixedname_initname(&ftarget); 667 result = dns_rbt_findnode(search->rbtdb->nsec, name, target, &node, 668 &chain, DNS_RBTFIND_EMPTYDATA, NULL, NULL); 669 if (result != DNS_R_PARTIALMATCH) { 670 dns_rbtnodechain_reset(&chain); 671 return ISC_R_NOTFOUND; 672 } 673 674 prefix = dns_fixedname_initname(&fprefix); 675 origin = dns_fixedname_initname(&forigin); 676 target = dns_fixedname_initname(&ftarget); 677 fname = dns_fixedname_initname(&fixed); 678 679 matchtype = DNS_TYPEPAIR_VALUE(dns_rdatatype_nsec, 0); 680 sigmatchtype = DNS_SIGTYPE(dns_rdatatype_nsec); 681 682 /* 683 * Extract predecessor from chain. 684 */ 685 result = dns_rbtnodechain_current(&chain, prefix, origin, NULL); 686 dns_rbtnodechain_reset(&chain); 687 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN) { 688 return ISC_R_NOTFOUND; 689 } 690 691 result = dns_name_concatenate(prefix, origin, target, NULL); 692 if (result != ISC_R_SUCCESS) { 693 return ISC_R_NOTFOUND; 694 } 695 696 /* 697 * Lookup the predecessor in the main tree. 698 */ 699 node = NULL; 700 result = dns_rbt_findnode(search->rbtdb->tree, target, fname, &node, 701 NULL, DNS_RBTFIND_EMPTYDATA, NULL, NULL); 702 if (result != ISC_R_SUCCESS) { 703 return ISC_R_NOTFOUND; 704 } 705 706 lock = &(search->rbtdb->node_locks[node->locknum].lock); 707 NODE_RDLOCK(lock, &nlocktype); 708 for (header = node->data; header != NULL; header = header_next) { 709 header_next = header->next; 710 if (check_stale_header(node, header, &nlocktype, lock, search, 711 &header_prev)) 712 { 713 continue; 714 } 715 if (NONEXISTENT(header) || DNS_TYPEPAIR_TYPE(header->type) == 0) 716 { 717 header_prev = header; 718 continue; 719 } 720 if (header->type == matchtype) { 721 found = header; 722 if (foundsig != NULL) { 723 break; 724 } 725 } else if (header->type == sigmatchtype) { 726 foundsig = header; 727 if (found != NULL) { 728 break; 729 } 730 } 731 header_prev = header; 732 } 733 if (found != NULL) { 734 dns__rbtdb_bindrdataset(search->rbtdb, node, found, now, 735 nlocktype, rdataset DNS__DB_FLARG_PASS); 736 if (foundsig != NULL) { 737 dns__rbtdb_bindrdataset(search->rbtdb, node, foundsig, 738 now, nlocktype, 739 sigrdataset DNS__DB_FLARG_PASS); 740 } 741 dns__rbtnode_acquire(search->rbtdb, node, 742 nlocktype DNS__DB_FLARG_PASS); 743 744 dns_name_copy(fname, foundname); 745 746 *nodep = node; 747 result = DNS_R_COVERINGNSEC; 748 } else { 749 result = ISC_R_NOTFOUND; 750 } 751 NODE_UNLOCK(lock, &nlocktype); 752 return result; 753 } 754 755 static isc_result_t 756 cache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version, 757 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now, 758 dns_dbnode_t **nodep, dns_name_t *foundname, 759 dns_rdataset_t *rdataset, 760 dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 761 dns_rbtnode_t *node = NULL; 762 isc_result_t result; 763 rbtdb_search_t search; 764 bool cname_ok = true; 765 bool found_noqname = false; 766 bool all_negative = true; 767 bool empty_node; 768 isc_rwlock_t *lock = NULL; 769 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 770 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 771 dns_slabheader_t *header = NULL; 772 dns_slabheader_t *header_prev = NULL, *header_next = NULL; 773 dns_slabheader_t *found = NULL, *nsheader = NULL; 774 dns_slabheader_t *foundsig = NULL, *nssig = NULL, *cnamesig = NULL; 775 dns_slabheader_t *update = NULL, *updatesig = NULL; 776 dns_slabheader_t *nsecheader = NULL, *nsecsig = NULL; 777 dns_typepair_t sigtype, negtype; 778 779 UNUSED(version); 780 781 REQUIRE(VALID_RBTDB((dns_rbtdb_t *)db)); 782 REQUIRE(version == NULL); 783 784 if (now == 0) { 785 now = isc_stdtime_now(); 786 } 787 788 search = (rbtdb_search_t){ 789 .rbtdb = (dns_rbtdb_t *)db, 790 .serial = 1, 791 .options = options, 792 .now = now, 793 }; 794 dns_fixedname_init(&search.zonecut_name); 795 dns_rbtnodechain_init(&search.chain); 796 797 TREE_RDLOCK(&search.rbtdb->tree_lock, &tlocktype); 798 799 /* 800 * Search down from the root of the tree. If, while going down, we 801 * encounter a callback node, cache_zonecut_callback() will search the 802 * rdatasets at the zone cut for a DNAME rdataset. 803 */ 804 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node, 805 &search.chain, DNS_RBTFIND_EMPTYDATA, 806 cache_zonecut_callback, &search); 807 808 if (result == DNS_R_PARTIALMATCH) { 809 /* 810 * If dns_rbt_findnode discovered a covering DNAME skip 811 * looking for a covering NSEC. 812 */ 813 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 && 814 (search.zonecut_header == NULL || 815 search.zonecut_header->type != dns_rdatatype_dname)) 816 { 817 result = find_coveringnsec( 818 &search, name, nodep, now, foundname, rdataset, 819 sigrdataset DNS__DB_FLARG_PASS); 820 if (result == DNS_R_COVERINGNSEC) { 821 goto tree_exit; 822 } 823 } 824 if (search.zonecut != NULL) { 825 result = setup_delegation( 826 &search, nodep, foundname, rdataset, 827 sigrdataset DNS__DB_FLARG_PASS); 828 goto tree_exit; 829 } else { 830 find_ns: 831 result = find_deepest_zonecut( 832 &search, node, nodep, foundname, rdataset, 833 sigrdataset DNS__DB_FLARG_PASS); 834 goto tree_exit; 835 } 836 } else if (result != ISC_R_SUCCESS) { 837 goto tree_exit; 838 } 839 840 /* 841 * Certain DNSSEC types are not subject to CNAME matching 842 * (RFC4035, section 2.5 and RFC3007). 843 * 844 * We don't check for RRSIG, because we don't store RRSIG records 845 * directly. 846 */ 847 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) { 848 cname_ok = false; 849 } 850 851 /* 852 * We now go looking for rdata... 853 */ 854 855 lock = &(search.rbtdb->node_locks[node->locknum].lock); 856 NODE_RDLOCK(lock, &nlocktype); 857 858 /* 859 * These pointers need to be reset here in case we did 860 * 'goto find_ns' from somewhere below. 861 */ 862 found = NULL; 863 foundsig = NULL; 864 sigtype = DNS_SIGTYPE(type); 865 negtype = DNS_TYPEPAIR_VALUE(0, type); 866 nsheader = NULL; 867 nsecheader = NULL; 868 nssig = NULL; 869 nsecsig = NULL; 870 cnamesig = NULL; 871 empty_node = true; 872 header_prev = NULL; 873 for (header = node->data; header != NULL; header = header_next) { 874 header_next = header->next; 875 if (check_stale_header(node, header, &nlocktype, lock, &search, 876 &header_prev)) 877 { 878 /* Do nothing. */ 879 } else if (EXISTS(header) && !ANCIENT(header)) { 880 /* 881 * We now know that there is at least one active 882 * non-stale rdataset at this node. 883 */ 884 empty_node = false; 885 if (header->noqname != NULL && 886 header->trust == dns_trust_secure) 887 { 888 found_noqname = true; 889 } 890 if (!NEGATIVE(header)) { 891 all_negative = false; 892 } 893 894 /* 895 * If we found a type we were looking for, remember 896 * it. 897 */ 898 if (header->type == type || 899 (type == dns_rdatatype_any && 900 DNS_TYPEPAIR_TYPE(header->type) != 0) || 901 (cname_ok && header->type == dns_rdatatype_cname)) 902 { 903 /* 904 * We've found the answer. 905 */ 906 found = header; 907 if (header->type == dns_rdatatype_cname && 908 cname_ok) 909 { 910 /* 911 * If we've already got the 912 * CNAME RRSIG, use it. 913 */ 914 if (cnamesig != NULL) { 915 foundsig = cnamesig; 916 } else { 917 sigtype = DNS_SIGTYPE( 918 dns_rdatatype_cname); 919 } 920 } 921 } else if (header->type == sigtype) { 922 /* 923 * We've found the RRSIG rdataset for our 924 * target type. Remember it. 925 */ 926 foundsig = header; 927 } else if (header->type == RDATATYPE_NCACHEANY || 928 header->type == negtype) 929 { 930 /* 931 * We've found a negative cache entry. 932 */ 933 found = header; 934 } else if (header->type == dns_rdatatype_ns) { 935 /* 936 * Remember a NS rdataset even if we're 937 * not specifically looking for it, because 938 * we might need it later. 939 */ 940 nsheader = header; 941 } else if (header->type == 942 DNS_SIGTYPE(dns_rdatatype_ns)) 943 { 944 /* 945 * If we need the NS rdataset, we'll also 946 * need its signature. 947 */ 948 nssig = header; 949 } else if (header->type == dns_rdatatype_nsec) { 950 nsecheader = header; 951 } else if (header->type == 952 DNS_SIGTYPE(dns_rdatatype_nsec)) 953 { 954 nsecsig = header; 955 } else if (cname_ok && 956 header->type == 957 DNS_SIGTYPE(dns_rdatatype_cname)) 958 { 959 /* 960 * If we get a CNAME match, we'll also need 961 * its signature. 962 */ 963 cnamesig = header; 964 } 965 header_prev = header; 966 } else { 967 header_prev = header; 968 } 969 } 970 971 if (empty_node) { 972 /* 973 * We have an exact match for the name, but there are no 974 * extant rdatasets. That means that this node doesn't 975 * meaningfully exist, and that we really have a partial match. 976 */ 977 NODE_UNLOCK(lock, &nlocktype); 978 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) { 979 result = find_coveringnsec( 980 &search, name, nodep, now, foundname, rdataset, 981 sigrdataset DNS__DB_FLARG_PASS); 982 if (result == DNS_R_COVERINGNSEC) { 983 goto tree_exit; 984 } 985 } 986 goto find_ns; 987 } 988 989 /* 990 * If we didn't find what we were looking for... 991 */ 992 if (found == NULL || 993 (DNS_TRUST_ADDITIONAL(found->trust) && 994 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) || 995 (found->trust == dns_trust_glue && 996 ((options & DNS_DBFIND_GLUEOK) == 0)) || 997 (DNS_TRUST_PENDING(found->trust) && 998 ((options & DNS_DBFIND_PENDINGOK) == 0))) 999 { 1000 /* 1001 * Return covering NODATA NSEC record. 1002 */ 1003 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 && 1004 nsecheader != NULL) 1005 { 1006 if (nodep != NULL) { 1007 dns__rbtnode_acquire( 1008 search.rbtdb, node, 1009 nlocktype DNS__DB_FLARG_PASS); 1010 *nodep = node; 1011 } 1012 dns__rbtdb_bindrdataset(search.rbtdb, node, nsecheader, 1013 search.now, nlocktype, 1014 rdataset DNS__DB_FLARG_PASS); 1015 if (need_headerupdate(nsecheader, search.now)) { 1016 update = nsecheader; 1017 } 1018 if (nsecsig != NULL) { 1019 dns__rbtdb_bindrdataset( 1020 search.rbtdb, node, nsecsig, search.now, 1021 nlocktype, 1022 sigrdataset DNS__DB_FLARG_PASS); 1023 if (need_headerupdate(nsecsig, search.now)) { 1024 updatesig = nsecsig; 1025 } 1026 } 1027 result = DNS_R_COVERINGNSEC; 1028 goto node_exit; 1029 } 1030 1031 /* 1032 * This name was from a wild card. Look for a covering NSEC. 1033 */ 1034 if (found == NULL && (found_noqname || all_negative) && 1035 (search.options & DNS_DBFIND_COVERINGNSEC) != 0) 1036 { 1037 NODE_UNLOCK(lock, &nlocktype); 1038 result = find_coveringnsec( 1039 &search, name, nodep, now, foundname, rdataset, 1040 sigrdataset DNS__DB_FLARG_PASS); 1041 if (result == DNS_R_COVERINGNSEC) { 1042 goto tree_exit; 1043 } 1044 goto find_ns; 1045 } 1046 1047 /* 1048 * If there is an NS rdataset at this node, then this is the 1049 * deepest zone cut. 1050 */ 1051 if (nsheader != NULL) { 1052 if (nodep != NULL) { 1053 dns__rbtnode_acquire( 1054 search.rbtdb, node, 1055 nlocktype DNS__DB_FLARG_PASS); 1056 *nodep = node; 1057 } 1058 dns__rbtdb_bindrdataset(search.rbtdb, node, nsheader, 1059 search.now, nlocktype, 1060 rdataset DNS__DB_FLARG_PASS); 1061 if (need_headerupdate(nsheader, search.now)) { 1062 update = nsheader; 1063 } 1064 if (nssig != NULL) { 1065 dns__rbtdb_bindrdataset( 1066 search.rbtdb, node, nssig, search.now, 1067 nlocktype, 1068 sigrdataset DNS__DB_FLARG_PASS); 1069 if (need_headerupdate(nssig, search.now)) { 1070 updatesig = nssig; 1071 } 1072 } 1073 result = DNS_R_DELEGATION; 1074 goto node_exit; 1075 } 1076 1077 /* 1078 * Go find the deepest zone cut. 1079 */ 1080 NODE_UNLOCK(lock, &nlocktype); 1081 goto find_ns; 1082 } 1083 1084 /* 1085 * We found what we were looking for, or we found a CNAME. 1086 */ 1087 1088 if (nodep != NULL) { 1089 dns__rbtnode_acquire(search.rbtdb, node, 1090 nlocktype DNS__DB_FLARG_PASS); 1091 *nodep = node; 1092 } 1093 1094 if (NEGATIVE(found)) { 1095 /* 1096 * We found a negative cache entry. 1097 */ 1098 if (NXDOMAIN(found)) { 1099 result = DNS_R_NCACHENXDOMAIN; 1100 } else { 1101 result = DNS_R_NCACHENXRRSET; 1102 } 1103 } else if (type != found->type && type != dns_rdatatype_any && 1104 found->type == dns_rdatatype_cname) 1105 { 1106 /* 1107 * We weren't doing an ANY query and we found a CNAME instead 1108 * of the type we were looking for, so we need to indicate 1109 * that result to the caller. 1110 */ 1111 result = DNS_R_CNAME; 1112 } else { 1113 /* 1114 * An ordinary successful query! 1115 */ 1116 result = ISC_R_SUCCESS; 1117 } 1118 1119 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN || 1120 result == DNS_R_NCACHENXRRSET) 1121 { 1122 dns__rbtdb_bindrdataset(search.rbtdb, node, found, search.now, 1123 nlocktype, rdataset DNS__DB_FLARG_PASS); 1124 if (need_headerupdate(found, search.now)) { 1125 update = found; 1126 } 1127 if (!NEGATIVE(found) && foundsig != NULL) { 1128 dns__rbtdb_bindrdataset(search.rbtdb, node, foundsig, 1129 search.now, nlocktype, 1130 sigrdataset DNS__DB_FLARG_PASS); 1131 if (need_headerupdate(foundsig, search.now)) { 1132 updatesig = foundsig; 1133 } 1134 } 1135 } 1136 1137 node_exit: 1138 if ((update != NULL || updatesig != NULL) && 1139 nlocktype != isc_rwlocktype_write) 1140 { 1141 NODE_FORCEUPGRADE(lock, &nlocktype); 1142 POST(nlocktype); 1143 } 1144 if (update != NULL && need_headerupdate(update, search.now)) { 1145 update_header(search.rbtdb, update, search.now); 1146 } 1147 if (updatesig != NULL && need_headerupdate(updatesig, search.now)) { 1148 update_header(search.rbtdb, updatesig, search.now); 1149 } 1150 1151 NODE_UNLOCK(lock, &nlocktype); 1152 1153 tree_exit: 1154 TREE_UNLOCK(&search.rbtdb->tree_lock, &tlocktype); 1155 1156 /* 1157 * If we found a zonecut but aren't going to use it, we have to 1158 * let go of it. 1159 */ 1160 if (search.need_cleanup) { 1161 node = search.zonecut; 1162 INSIST(node != NULL); 1163 lock = &(search.rbtdb->node_locks[node->locknum].lock); 1164 1165 NODE_RDLOCK(lock, &nlocktype); 1166 dns__rbtnode_release(search.rbtdb, node, 0, &nlocktype, 1167 &tlocktype, true, 1168 false DNS__DB_FLARG_PASS); 1169 NODE_UNLOCK(lock, &nlocktype); 1170 INSIST(tlocktype == isc_rwlocktype_none); 1171 } 1172 1173 dns_rbtnodechain_reset(&search.chain); 1174 1175 update_cachestats(search.rbtdb, result); 1176 return result; 1177 } 1178 1179 static isc_result_t 1180 cache_findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options, 1181 isc_stdtime_t now, dns_dbnode_t **nodep, 1182 dns_name_t *foundname, dns_name_t *dcname, 1183 dns_rdataset_t *rdataset, 1184 dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 1185 dns_rbtnode_t *node = NULL; 1186 isc_rwlock_t *lock = NULL; 1187 isc_result_t result; 1188 rbtdb_search_t search; 1189 dns_slabheader_t *header = NULL; 1190 dns_slabheader_t *header_prev = NULL, *header_next = NULL; 1191 dns_slabheader_t *found = NULL, *foundsig = NULL; 1192 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA; 1193 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 1194 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1195 bool dcnull = (dcname == NULL); 1196 1197 REQUIRE(VALID_RBTDB((dns_rbtdb_t *)db)); 1198 1199 if (now == 0) { 1200 now = isc_stdtime_now(); 1201 } 1202 1203 search = (rbtdb_search_t){ 1204 .rbtdb = (dns_rbtdb_t *)db, 1205 .serial = 1, 1206 .options = options, 1207 .now = now, 1208 }; 1209 dns_fixedname_init(&search.zonecut_name); 1210 dns_rbtnodechain_init(&search.chain); 1211 1212 if (dcnull) { 1213 dcname = foundname; 1214 } 1215 1216 if ((options & DNS_DBFIND_NOEXACT) != 0) { 1217 rbtoptions |= DNS_RBTFIND_NOEXACT; 1218 } 1219 1220 TREE_RDLOCK(&search.rbtdb->tree_lock, &tlocktype); 1221 1222 /* 1223 * Search down from the root of the tree. 1224 */ 1225 result = dns_rbt_findnode(search.rbtdb->tree, name, dcname, &node, 1226 &search.chain, rbtoptions, NULL, &search); 1227 1228 if (result == DNS_R_PARTIALMATCH) { 1229 result = find_deepest_zonecut(&search, node, nodep, foundname, 1230 rdataset, 1231 sigrdataset DNS__DB_FLARG_PASS); 1232 goto tree_exit; 1233 } else if (result != ISC_R_SUCCESS) { 1234 goto tree_exit; 1235 } else if (!dcnull) { 1236 dns_name_copy(dcname, foundname); 1237 } 1238 1239 /* 1240 * We now go looking for an NS rdataset at the node. 1241 */ 1242 1243 lock = &(search.rbtdb->node_locks[node->locknum].lock); 1244 NODE_RDLOCK(lock, &nlocktype); 1245 1246 for (header = node->data; header != NULL; header = header_next) { 1247 header_next = header->next; 1248 bool ns = (header->type == dns_rdatatype_ns || 1249 header->type == DNS_SIGTYPE(dns_rdatatype_ns)); 1250 if (check_stale_header(node, header, &nlocktype, lock, &search, 1251 &header_prev)) 1252 { 1253 if (ns) { 1254 /* 1255 * We found a cached NS, but was either 1256 * ancient or it was stale and serve-stale 1257 * is disabled, so this node can't be used 1258 * as a zone cut we know about. Instead we 1259 * bail out and call find_deepest_zonecut() 1260 * below. 1261 */ 1262 break; 1263 } 1264 } else if (EXISTS(header) && !ANCIENT(header)) { 1265 if (header->type == dns_rdatatype_ns) { 1266 found = header; 1267 if (foundsig != NULL) { 1268 break; 1269 } 1270 } else if (header->type == 1271 DNS_SIGTYPE(dns_rdatatype_ns)) 1272 { 1273 foundsig = header; 1274 if (found != NULL) { 1275 break; 1276 } 1277 } 1278 header_prev = header; 1279 } else { 1280 header_prev = header; 1281 } 1282 } 1283 1284 if (found == NULL) { 1285 /* 1286 * No active NS records found. Call find_deepest_zonecut() 1287 * to look for them in nodes above this one. 1288 */ 1289 NODE_UNLOCK(lock, &nlocktype); 1290 result = find_deepest_zonecut(&search, node, nodep, foundname, 1291 rdataset, 1292 sigrdataset DNS__DB_FLARG_PASS); 1293 dns_name_copy(foundname, dcname); 1294 goto tree_exit; 1295 } 1296 1297 if (nodep != NULL) { 1298 dns__rbtnode_acquire(search.rbtdb, node, 1299 nlocktype DNS__DB_FLARG_PASS); 1300 *nodep = node; 1301 } 1302 1303 dns__rbtdb_bindrdataset(search.rbtdb, node, found, search.now, 1304 nlocktype, rdataset DNS__DB_FLARG_PASS); 1305 if (foundsig != NULL) { 1306 dns__rbtdb_bindrdataset(search.rbtdb, node, foundsig, 1307 search.now, nlocktype, 1308 sigrdataset DNS__DB_FLARG_PASS); 1309 } 1310 1311 if (need_headerupdate(found, search.now) || 1312 (foundsig != NULL && need_headerupdate(foundsig, search.now))) 1313 { 1314 if (nlocktype != isc_rwlocktype_write) { 1315 NODE_FORCEUPGRADE(lock, &nlocktype); 1316 POST(nlocktype); 1317 } 1318 if (need_headerupdate(found, search.now)) { 1319 update_header(search.rbtdb, found, search.now); 1320 } 1321 if (foundsig != NULL && need_headerupdate(foundsig, search.now)) 1322 { 1323 update_header(search.rbtdb, foundsig, search.now); 1324 } 1325 } 1326 1327 NODE_UNLOCK(lock, &nlocktype); 1328 1329 tree_exit: 1330 TREE_UNLOCK(&search.rbtdb->tree_lock, &tlocktype); 1331 1332 INSIST(!search.need_cleanup); 1333 1334 dns_rbtnodechain_reset(&search.chain); 1335 1336 if (result == DNS_R_DELEGATION) { 1337 result = ISC_R_SUCCESS; 1338 } 1339 1340 return result; 1341 } 1342 1343 static isc_result_t 1344 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version, 1345 dns_rdatatype_t type, dns_rdatatype_t covers, 1346 isc_stdtime_t now, dns_rdataset_t *rdataset, 1347 dns_rdataset_t *sigrdataset DNS__DB_FLARG) { 1348 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1349 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 1350 dns_slabheader_t *header = NULL, *header_next = NULL; 1351 dns_slabheader_t *found = NULL, *foundsig = NULL; 1352 dns_typepair_t matchtype, sigmatchtype, negtype; 1353 isc_result_t result; 1354 isc_rwlock_t *lock = NULL; 1355 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1356 1357 REQUIRE(VALID_RBTDB(rbtdb)); 1358 REQUIRE(type != dns_rdatatype_any); 1359 1360 UNUSED(version); 1361 1362 result = ISC_R_SUCCESS; 1363 1364 if (now == 0) { 1365 now = isc_stdtime_now(); 1366 } 1367 1368 lock = &rbtdb->node_locks[rbtnode->locknum].lock; 1369 NODE_RDLOCK(lock, &nlocktype); 1370 1371 matchtype = DNS_TYPEPAIR_VALUE(type, covers); 1372 negtype = DNS_TYPEPAIR_VALUE(0, type); 1373 if (covers == 0) { 1374 sigmatchtype = DNS_SIGTYPE(type); 1375 } else { 1376 sigmatchtype = 0; 1377 } 1378 1379 for (header = rbtnode->data; header != NULL; header = header_next) { 1380 header_next = header->next; 1381 if (!ACTIVE(header, now)) { 1382 if ((header->ttl + STALE_TTL(header, rbtdb) < 1383 now - RBTDB_VIRTUAL) && 1384 (nlocktype == isc_rwlocktype_write || 1385 NODE_TRYUPGRADE(lock, &nlocktype) == 1386 ISC_R_SUCCESS)) 1387 { 1388 /* 1389 * We update the node's status only when we 1390 * can get write access. 1391 * 1392 * We don't check if refcurrent(rbtnode) == 0 1393 * and try to free like we do in cache_find(), 1394 * because refcurrent(rbtnode) must be 1395 * non-zero. This is so because 'node' is an 1396 * argument to the function. 1397 */ 1398 dns__rbtdb_mark_ancient(header); 1399 } 1400 } else if (EXISTS(header) && !ANCIENT(header)) { 1401 if (header->type == matchtype) { 1402 found = header; 1403 } else if (header->type == RDATATYPE_NCACHEANY || 1404 header->type == negtype) 1405 { 1406 found = header; 1407 } else if (header->type == sigmatchtype) { 1408 foundsig = header; 1409 } 1410 } 1411 } 1412 if (found != NULL) { 1413 dns__rbtdb_bindrdataset(rbtdb, rbtnode, found, now, nlocktype, 1414 rdataset DNS__DB_FLARG_PASS); 1415 if (!NEGATIVE(found) && foundsig != NULL) { 1416 dns__rbtdb_bindrdataset(rbtdb, rbtnode, foundsig, now, 1417 nlocktype, 1418 sigrdataset DNS__DB_FLARG_PASS); 1419 } 1420 } 1421 1422 NODE_UNLOCK(lock, &nlocktype); 1423 1424 if (found == NULL) { 1425 return ISC_R_NOTFOUND; 1426 } 1427 1428 if (NEGATIVE(found)) { 1429 /* 1430 * We found a negative cache entry. 1431 */ 1432 if (NXDOMAIN(found)) { 1433 result = DNS_R_NCACHENXDOMAIN; 1434 } else { 1435 result = DNS_R_NCACHENXRRSET; 1436 } 1437 } 1438 1439 update_cachestats(rbtdb, result); 1440 1441 return result; 1442 } 1443 1444 static size_t 1445 hashsize(dns_db_t *db) { 1446 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1447 size_t size; 1448 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 1449 1450 REQUIRE(VALID_RBTDB(rbtdb)); 1451 1452 TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype); 1453 size = dns_rbt_hashsize(rbtdb->tree); 1454 TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype); 1455 1456 return size; 1457 } 1458 1459 static isc_result_t 1460 setcachestats(dns_db_t *db, isc_stats_t *stats) { 1461 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1462 1463 REQUIRE(VALID_RBTDB(rbtdb)); 1464 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */ 1465 REQUIRE(stats != NULL); 1466 1467 isc_stats_attach(stats, &rbtdb->cachestats); 1468 return ISC_R_SUCCESS; 1469 } 1470 1471 static dns_stats_t * 1472 getrrsetstats(dns_db_t *db) { 1473 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1474 1475 REQUIRE(VALID_RBTDB(rbtdb)); 1476 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */ 1477 1478 return rbtdb->rrsetstats; 1479 } 1480 1481 static isc_result_t 1482 setservestalettl(dns_db_t *db, dns_ttl_t ttl) { 1483 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1484 1485 REQUIRE(VALID_RBTDB(rbtdb)); 1486 REQUIRE(IS_CACHE(rbtdb)); 1487 1488 /* currently no bounds checking. 0 means disable. */ 1489 rbtdb->common.serve_stale_ttl = ttl; 1490 return ISC_R_SUCCESS; 1491 } 1492 1493 static isc_result_t 1494 getservestalettl(dns_db_t *db, dns_ttl_t *ttl) { 1495 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1496 1497 REQUIRE(VALID_RBTDB(rbtdb)); 1498 REQUIRE(IS_CACHE(rbtdb)); 1499 1500 *ttl = rbtdb->common.serve_stale_ttl; 1501 return ISC_R_SUCCESS; 1502 } 1503 1504 static isc_result_t 1505 setservestalerefresh(dns_db_t *db, uint32_t interval) { 1506 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1507 1508 REQUIRE(VALID_RBTDB(rbtdb)); 1509 REQUIRE(IS_CACHE(rbtdb)); 1510 1511 /* currently no bounds checking. 0 means disable. */ 1512 rbtdb->serve_stale_refresh = interval; 1513 return ISC_R_SUCCESS; 1514 } 1515 1516 static isc_result_t 1517 getservestalerefresh(dns_db_t *db, uint32_t *interval) { 1518 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1519 1520 REQUIRE(VALID_RBTDB(rbtdb)); 1521 REQUIRE(IS_CACHE(rbtdb)); 1522 1523 *interval = rbtdb->serve_stale_refresh; 1524 return ISC_R_SUCCESS; 1525 } 1526 1527 static void 1528 expiredata(dns_db_t *db, dns_dbnode_t *node, void *data) { 1529 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db; 1530 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node; 1531 dns_slabheader_t *header = data; 1532 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1533 isc_rwlocktype_t tlocktype = isc_rwlocktype_none; 1534 1535 NODE_WRLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); 1536 dns__cacherbt_expireheader(header, &tlocktype, 1537 dns_expire_flush DNS__DB_FILELINE); 1538 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype); 1539 INSIST(tlocktype == isc_rwlocktype_none); 1540 } 1541 1542 dns_dbmethods_t dns__rbtdb_cachemethods = { 1543 .destroy = dns__rbtdb_destroy, 1544 .currentversion = dns__rbtdb_currentversion, 1545 .newversion = dns__rbtdb_newversion, 1546 .attachversion = dns__rbtdb_attachversion, 1547 .closeversion = dns__rbtdb_closeversion, 1548 .findnode = dns__rbtdb_findnode, 1549 .find = cache_find, 1550 .findzonecut = cache_findzonecut, 1551 .attachnode = dns__rbtdb_attachnode, 1552 .detachnode = dns__rbtdb_detachnode, 1553 .createiterator = dns__rbtdb_createiterator, 1554 .findrdataset = cache_findrdataset, 1555 .allrdatasets = dns__rbtdb_allrdatasets, 1556 .addrdataset = dns__rbtdb_addrdataset, 1557 .subtractrdataset = dns__rbtdb_subtractrdataset, 1558 .deleterdataset = dns__rbtdb_deleterdataset, 1559 .nodecount = dns__rbtdb_nodecount, 1560 .setloop = dns__rbtdb_setloop, 1561 .getoriginnode = dns__rbtdb_getoriginnode, 1562 .getrrsetstats = getrrsetstats, 1563 .setcachestats = setcachestats, 1564 .hashsize = hashsize, 1565 .setservestalettl = setservestalettl, 1566 .getservestalettl = getservestalettl, 1567 .setservestalerefresh = setservestalerefresh, 1568 .getservestalerefresh = getservestalerefresh, 1569 .locknode = dns__rbtdb_locknode, 1570 .unlocknode = dns__rbtdb_unlocknode, 1571 .expiredata = expiredata, 1572 .deletedata = dns__rbtdb_deletedata, 1573 .setmaxrrperset = dns__rbtdb_setmaxrrperset, 1574 .setmaxtypepername = dns__rbtdb_setmaxtypepername, 1575 }; 1576 1577 /* 1578 * Caller must hold the node (write) lock. 1579 */ 1580 void 1581 dns__cacherbt_expireheader(dns_slabheader_t *header, 1582 isc_rwlocktype_t *tlocktypep, 1583 dns_expire_t reason DNS__DB_FLARG) { 1584 dns__rbtdb_mark_ancient(header); 1585 1586 if (isc_refcount_current(&RBTDB_HEADERNODE(header)->references) == 0) { 1587 isc_rwlocktype_t nlocktype = isc_rwlocktype_write; 1588 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)header->db; 1589 1590 /* 1591 * If no one else is using the node, we can clean it up now. 1592 * We first need to gain a new reference to the node to meet a 1593 * requirement of dns__rbtnode_release(). 1594 */ 1595 dns__rbtnode_acquire(rbtdb, RBTDB_HEADERNODE(header), 1596 nlocktype DNS__DB_FLARG_PASS); 1597 dns__rbtnode_release(rbtdb, RBTDB_HEADERNODE(header), 0, 1598 &nlocktype, tlocktypep, true, 1599 false DNS__DB_FLARG_PASS); 1600 1601 if (rbtdb->cachestats == NULL) { 1602 return; 1603 } 1604 1605 switch (reason) { 1606 case dns_expire_ttl: 1607 isc_stats_increment(rbtdb->cachestats, 1608 dns_cachestatscounter_deletettl); 1609 break; 1610 case dns_expire_lru: 1611 isc_stats_increment(rbtdb->cachestats, 1612 dns_cachestatscounter_deletelru); 1613 break; 1614 default: 1615 break; 1616 } 1617 } 1618 } 1619 1620 static size_t 1621 rdataset_size(dns_slabheader_t *header) { 1622 if (!NONEXISTENT(header)) { 1623 return dns_rdataslab_size((unsigned char *)header, 1624 sizeof(*header)); 1625 } 1626 1627 return sizeof(*header); 1628 } 1629 1630 static size_t 1631 expire_lru_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, 1632 isc_rwlocktype_t *tlocktypep, 1633 size_t purgesize DNS__DB_FLARG) { 1634 dns_slabheader_t *header = NULL; 1635 size_t purged = 0; 1636 1637 for (header = ISC_LIST_TAIL(rbtdb->lru[locknum]); 1638 header != NULL && header->last_used <= rbtdb->last_used && 1639 purged <= purgesize; 1640 header = ISC_LIST_TAIL(rbtdb->lru[locknum])) 1641 { 1642 size_t header_size = rdataset_size(header); 1643 1644 /* 1645 * Unlink the entry at this point to avoid checking it 1646 * again even if it's currently used someone else and 1647 * cannot be purged at this moment. This entry won't be 1648 * referenced any more (so unlinking is safe) since the 1649 * TTL will be reset to 0. 1650 */ 1651 ISC_LIST_UNLINK(rbtdb->lru[locknum], header, link); 1652 dns__cacherbt_expireheader(header, tlocktypep, 1653 dns_expire_lru DNS__DB_FLARG_PASS); 1654 purged += header_size; 1655 } 1656 1657 return purged; 1658 } 1659 1660 /*% 1661 * Purge some expired and/or stale (i.e. unused for some period) cache entries 1662 * due to an overmem condition. To recover from this condition quickly, 1663 * we clean up entries up to the size of newly added rdata that triggered 1664 * the overmem; this is accessible via newheader. 1665 * 1666 * The LRU lists tails are processed in LRU order to the nearest second. 1667 * 1668 * A write lock on the tree must be held. 1669 */ 1670 void 1671 dns__cacherbt_overmem(dns_rbtdb_t *rbtdb, dns_slabheader_t *newheader, 1672 isc_rwlocktype_t *tlocktypep DNS__DB_FLARG) { 1673 uint32_t locknum_start = rbtdb->lru_sweep++ % rbtdb->node_lock_count; 1674 uint32_t locknum = locknum_start; 1675 /* Size of added data, possible node and possible ENT node. */ 1676 size_t purgesize = 1677 rdataset_size(newheader) + 1678 2 * dns__rbtnode_getsize(RBTDB_HEADERNODE(newheader)); 1679 size_t purged = 0; 1680 isc_stdtime_t min_last_used = 0; 1681 size_t max_passes = 8; 1682 1683 again: 1684 do { 1685 isc_rwlocktype_t nlocktype = isc_rwlocktype_none; 1686 NODE_WRLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); 1687 1688 purged += expire_lru_headers(rbtdb, locknum, tlocktypep, 1689 purgesize - 1690 purged DNS__DB_FLARG_PASS); 1691 1692 /* 1693 * Work out the oldest remaining last_used values of the list 1694 * tails as we walk across the array of lru lists. 1695 */ 1696 dns_slabheader_t *header = ISC_LIST_TAIL(rbtdb->lru[locknum]); 1697 if (header != NULL && 1698 (min_last_used == 0 || header->last_used < min_last_used)) 1699 { 1700 min_last_used = header->last_used; 1701 } 1702 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype); 1703 locknum = (locknum + 1) % rbtdb->node_lock_count; 1704 } while (locknum != locknum_start && purged <= purgesize); 1705 1706 /* 1707 * Update rbtdb->last_used if we have walked all the list tails and have 1708 * not freed the required amount of memory. 1709 */ 1710 if (purged < purgesize) { 1711 if (min_last_used != 0) { 1712 rbtdb->last_used = min_last_used; 1713 if (max_passes-- > 0) { 1714 goto again; 1715 } 1716 } 1717 } 1718 } 1719