Home | History | Annotate | Line # | Download | only in dns
      1 /*	$NetBSD: rbt-cachedb.c,v 1.5 2026/01/29 18:37:49 christos Exp $	*/
      2 
      3 /*
      4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
      5  *
      6  * SPDX-License-Identifier: MPL-2.0
      7  *
      8  * This Source Code Form is subject to the terms of the Mozilla Public
      9  * License, v. 2.0. If a copy of the MPL was not distributed with this
     10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
     11  *
     12  * See the COPYRIGHT file distributed with this work for additional
     13  * information regarding copyright ownership.
     14  */
     15 
     16 /*! \file */
     17 
     18 #include <inttypes.h>
     19 #include <stdbool.h>
     20 #include <sys/mman.h>
     21 
     22 #include <isc/ascii.h>
     23 #include <isc/async.h>
     24 #include <isc/atomic.h>
     25 #include <isc/crc64.h>
     26 #include <isc/file.h>
     27 #include <isc/hash.h>
     28 #include <isc/hashmap.h>
     29 #include <isc/heap.h>
     30 #include <isc/hex.h>
     31 #include <isc/loop.h>
     32 #include <isc/mem.h>
     33 #include <isc/mutex.h>
     34 #include <isc/once.h>
     35 #include <isc/random.h>
     36 #include <isc/refcount.h>
     37 #include <isc/result.h>
     38 #include <isc/rwlock.h>
     39 #include <isc/serial.h>
     40 #include <isc/stdio.h>
     41 #include <isc/string.h>
     42 #include <isc/time.h>
     43 #include <isc/urcu.h>
     44 #include <isc/util.h>
     45 
     46 #include <dns/callbacks.h>
     47 #include <dns/db.h>
     48 #include <dns/dbiterator.h>
     49 #include <dns/fixedname.h>
     50 #include <dns/log.h>
     51 #include <dns/masterdump.h>
     52 #include <dns/nsec.h>
     53 #include <dns/nsec3.h>
     54 #include <dns/rbt.h>
     55 #include <dns/rdata.h>
     56 #include <dns/rdataset.h>
     57 #include <dns/rdatasetiter.h>
     58 #include <dns/rdataslab.h>
     59 #include <dns/rdatastruct.h>
     60 #include <dns/stats.h>
     61 #include <dns/time.h>
     62 #include <dns/view.h>
     63 #include <dns/zone.h>
     64 #include <dns/zonekey.h>
     65 
     66 #include "db_p.h"
     67 #include "rbtdb_p.h"
     68 
     69 /*%
     70  * Whether to rate-limit updating the LRU to avoid possible thread contention.
     71  * Updating LRU requires write locking, so we don't do it every time the
     72  * record is touched - only after some time passes.
     73  */
     74 #ifndef DNS_RBTDB_LIMITLRUUPDATE
     75 #define DNS_RBTDB_LIMITLRUUPDATE 1
     76 #endif
     77 
     78 /*% Time after which we update LRU for glue records, 5 minutes */
     79 #define DNS_RBTDB_LRUUPDATE_GLUE 300
     80 /*% Time after which we update LRU for all other records, 10 minutes */
     81 #define DNS_RBTDB_LRUUPDATE_REGULAR 600
     82 
     83 #define EXISTS(header)                                 \
     84 	((atomic_load_acquire(&(header)->attributes) & \
     85 	  DNS_SLABHEADERATTR_NONEXISTENT) == 0)
     86 #define NONEXISTENT(header)                            \
     87 	((atomic_load_acquire(&(header)->attributes) & \
     88 	  DNS_SLABHEADERATTR_NONEXISTENT) != 0)
     89 #define NXDOMAIN(header)                               \
     90 	((atomic_load_acquire(&(header)->attributes) & \
     91 	  DNS_SLABHEADERATTR_NXDOMAIN) != 0)
     92 #define STALE(header)                                  \
     93 	((atomic_load_acquire(&(header)->attributes) & \
     94 	  DNS_SLABHEADERATTR_STALE) != 0)
     95 #define NEGATIVE(header)                               \
     96 	((atomic_load_acquire(&(header)->attributes) & \
     97 	  DNS_SLABHEADERATTR_NEGATIVE) != 0)
     98 #define ZEROTTL(header)                                \
     99 	((atomic_load_acquire(&(header)->attributes) & \
    100 	  DNS_SLABHEADERATTR_ZEROTTL) != 0)
    101 #define ANCIENT(header)                                \
    102 	((atomic_load_acquire(&(header)->attributes) & \
    103 	  DNS_SLABHEADERATTR_ANCIENT) != 0)
    104 #define STATCOUNT(header)                              \
    105 	((atomic_load_acquire(&(header)->attributes) & \
    106 	  DNS_SLABHEADERATTR_STATCOUNT) != 0)
    107 
    108 #define STALE_TTL(header, rbtdb) \
    109 	(NXDOMAIN(header) ? 0 : rbtdb->common.serve_stale_ttl)
    110 
    111 #define ACTIVE(header, now) \
    112 	(((header)->ttl > (now)) || ((header)->ttl == (now) && ZEROTTL(header)))
    113 
    114 #define KEEPSTALE(rbtdb) ((rbtdb)->common.serve_stale_ttl > 0)
    115 
    116 /*%
    117  * Routines for LRU-based cache management.
    118  */
    119 
    120 /*%
    121  * See if a given cache entry that is being reused needs to be updated
    122  * in the LRU-list.  From the LRU management point of view, this function is
    123  * expected to return true for almost all cases.  When used with threads,
    124  * however, this may cause a non-negligible performance penalty because a
    125  * writer lock will have to be acquired before updating the list.
    126  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
    127  * function returns true if the entry has not been updated for some period of
    128  * time.  We differentiate the NS or glue address case and the others since
    129  * experiments have shown that the former tends to be accessed relatively
    130  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
    131  * may cause external queries at a higher level zone, involving more
    132  * transactions).
    133  *
    134  * Caller must hold the node (read or write) lock.
    135  */
    136 static bool
    137 need_headerupdate(dns_slabheader_t *header, isc_stdtime_t now) {
    138 	if (DNS_SLABHEADER_GETATTR(header,
    139 				   DNS_SLABHEADERATTR_NONEXISTENT |
    140 					   DNS_SLABHEADERATTR_ANCIENT |
    141 					   DNS_SLABHEADERATTR_ZEROTTL) != 0)
    142 	{
    143 		return false;
    144 	}
    145 
    146 #if DNS_RBTDB_LIMITLRUUPDATE
    147 	if (header->type == dns_rdatatype_ns ||
    148 	    (header->trust == dns_trust_glue &&
    149 	     (header->type == dns_rdatatype_a ||
    150 	      header->type == dns_rdatatype_aaaa)))
    151 	{
    152 		/*
    153 		 * Glue records are updated if at least DNS_RBTDB_LRUUPDATE_GLUE
    154 		 * seconds have passed since the previous update time.
    155 		 */
    156 		return header->last_used + DNS_RBTDB_LRUUPDATE_GLUE <= now;
    157 	}
    158 
    159 	/*
    160 	 * Other records are updated if DNS_RBTDB_LRUUPDATE_REGULAR seconds
    161 	 * have passed.
    162 	 */
    163 	return header->last_used + DNS_RBTDB_LRUUPDATE_REGULAR <= now;
    164 #else
    165 	UNUSED(now);
    166 
    167 	return true;
    168 #endif /* if DNS_RBTDB_LIMITLRUUPDATE */
    169 }
    170 
    171 /*%
    172  * Update the timestamp of a given cache entry and move it to the head
    173  * of the corresponding LRU list.
    174  *
    175  * Caller must hold the node (write) lock.
    176  *
    177  * Note that the we do NOT touch the heap here, as the TTL has not changed.
    178  */
    179 static void
    180 update_header(dns_rbtdb_t *rbtdb, dns_slabheader_t *header, isc_stdtime_t now) {
    181 	INSIST(IS_CACHE(rbtdb));
    182 
    183 	/* To be checked: can we really assume this? XXXMLG */
    184 	INSIST(ISC_LINK_LINKED(header, link));
    185 
    186 	ISC_LIST_UNLINK(rbtdb->lru[RBTDB_HEADERNODE(header)->locknum], header,
    187 			link);
    188 	header->last_used = now;
    189 	ISC_LIST_PREPEND(rbtdb->lru[RBTDB_HEADERNODE(header)->locknum], header,
    190 			 link);
    191 }
    192 
    193 /*
    194  * Locking
    195  *
    196  * If a routine is going to lock more than one lock in this module, then
    197  * the locking must be done in the following order:
    198  *
    199  *      Tree Lock
    200  *
    201  *      Node Lock       (Only one from the set may be locked at one time by
    202  *                       any caller)
    203  *
    204  *      Database Lock
    205  *
    206  * Failure to follow this hierarchy can result in deadlock.
    207  */
    208 
    209 /*
    210  * Deleting Nodes
    211  *
    212  * For zone databases the node for the origin of the zone MUST NOT be deleted.
    213  */
    214 
    215 /*
    216  * DB Routines
    217  */
    218 
    219 static void
    220 update_cachestats(dns_rbtdb_t *rbtdb, isc_result_t result) {
    221 	INSIST(IS_CACHE(rbtdb));
    222 
    223 	if (rbtdb->cachestats == NULL) {
    224 		return;
    225 	}
    226 
    227 	switch (result) {
    228 	case DNS_R_COVERINGNSEC:
    229 		isc_stats_increment(rbtdb->cachestats,
    230 				    dns_cachestatscounter_coveringnsec);
    231 		FALLTHROUGH;
    232 	case ISC_R_SUCCESS:
    233 	case DNS_R_CNAME:
    234 	case DNS_R_DNAME:
    235 	case DNS_R_DELEGATION:
    236 	case DNS_R_NCACHENXDOMAIN:
    237 	case DNS_R_NCACHENXRRSET:
    238 		isc_stats_increment(rbtdb->cachestats,
    239 				    dns_cachestatscounter_hits);
    240 		break;
    241 	default:
    242 		isc_stats_increment(rbtdb->cachestats,
    243 				    dns_cachestatscounter_misses);
    244 	}
    245 }
    246 
    247 static void
    248 clean_stale_headers(dns_slabheader_t *top) {
    249 	dns_slabheader_t *d = NULL, *down_next = NULL;
    250 
    251 	for (d = top->down; d != NULL; d = down_next) {
    252 		down_next = d->down;
    253 		dns_slabheader_destroy(&d);
    254 	}
    255 	top->down = NULL;
    256 }
    257 
    258 static isc_result_t
    259 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
    260 		 dns_name_t *foundname, dns_rdataset_t *rdataset,
    261 		 dns_rdataset_t *sigrdataset DNS__DB_FLARG) {
    262 	dns_name_t *zcname = NULL;
    263 	dns_typepair_t type;
    264 	dns_rbtnode_t *node = NULL;
    265 
    266 	REQUIRE(search != NULL);
    267 	REQUIRE(search->zonecut != NULL);
    268 	REQUIRE(search->zonecut_header != NULL);
    269 
    270 	/*
    271 	 * The caller MUST NOT be holding any node locks.
    272 	 */
    273 
    274 	node = search->zonecut;
    275 	type = search->zonecut_header->type;
    276 
    277 	/*
    278 	 * If we have to set foundname, we do it before anything else.
    279 	 * If we were to set foundname after we had set nodep or bound the
    280 	 * rdataset, then we'd have to undo that work if dns_name_copy()
    281 	 * failed.  By setting foundname first, there's nothing to undo if
    282 	 * we have trouble.
    283 	 */
    284 	if (foundname != NULL && search->copy_name) {
    285 		zcname = dns_fixedname_name(&search->zonecut_name);
    286 		dns_name_copy(zcname, foundname);
    287 	}
    288 	if (nodep != NULL) {
    289 		/*
    290 		 * Note that we don't have to increment the node's reference
    291 		 * count here because we're going to use the reference we
    292 		 * already have in the search block.
    293 		 */
    294 		*nodep = node;
    295 		search->need_cleanup = false;
    296 	}
    297 	if (rdataset != NULL) {
    298 		isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
    299 		NODE_RDLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
    300 			    &nlocktype);
    301 		dns__rbtdb_bindrdataset(search->rbtdb, node,
    302 					search->zonecut_header, search->now,
    303 					isc_rwlocktype_read,
    304 					rdataset DNS__DB_FLARG_PASS);
    305 		if (sigrdataset != NULL && search->zonecut_sigheader != NULL) {
    306 			dns__rbtdb_bindrdataset(
    307 				search->rbtdb, node, search->zonecut_sigheader,
    308 				search->now, isc_rwlocktype_read,
    309 				sigrdataset DNS__DB_FLARG_PASS);
    310 		}
    311 		NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
    312 			    &nlocktype);
    313 	}
    314 
    315 	if (type == dns_rdatatype_dname) {
    316 		return DNS_R_DNAME;
    317 	}
    318 	return DNS_R_DELEGATION;
    319 }
    320 
    321 static bool
    322 check_stale_header(dns_rbtnode_t *node, dns_slabheader_t *header,
    323 		   isc_rwlocktype_t *nlocktypep, isc_rwlock_t *lock,
    324 		   rbtdb_search_t *search, dns_slabheader_t **header_prev) {
    325 	if (!ACTIVE(header, search->now)) {
    326 		dns_ttl_t stale = header->ttl +
    327 				  STALE_TTL(header, search->rbtdb);
    328 		/*
    329 		 * If this data is in the stale window keep it and if
    330 		 * DNS_DBFIND_STALEOK is not set we tell the caller to
    331 		 * skip this record.  We skip the records with ZEROTTL
    332 		 * (these records should not be cached anyway).
    333 		 */
    334 
    335 		DNS_SLABHEADER_CLRATTR(header, DNS_SLABHEADERATTR_STALE_WINDOW);
    336 		if (!ZEROTTL(header) && KEEPSTALE(search->rbtdb) &&
    337 		    stale > search->now)
    338 		{
    339 			dns__rbtdb_mark(header, DNS_SLABHEADERATTR_STALE);
    340 			*header_prev = header;
    341 			/*
    342 			 * If DNS_DBFIND_STALESTART is set then it means we
    343 			 * failed to resolve the name during recursion, in
    344 			 * this case we mark the time in which the refresh
    345 			 * failed.
    346 			 */
    347 			if ((search->options & DNS_DBFIND_STALESTART) != 0) {
    348 				atomic_store_release(
    349 					&header->last_refresh_fail_ts,
    350 					search->now);
    351 			} else if ((search->options &
    352 				    DNS_DBFIND_STALEENABLED) != 0 &&
    353 				   search->now <
    354 					   (atomic_load_acquire(
    355 						    &header->last_refresh_fail_ts) +
    356 					    search->rbtdb->serve_stale_refresh))
    357 			{
    358 				/*
    359 				 * If we are within interval between last
    360 				 * refresh failure time + 'stale-refresh-time',
    361 				 * then don't skip this stale entry but use it
    362 				 * instead.
    363 				 */
    364 				DNS_SLABHEADER_SETATTR(
    365 					header,
    366 					DNS_SLABHEADERATTR_STALE_WINDOW);
    367 				return false;
    368 			} else if ((search->options &
    369 				    DNS_DBFIND_STALETIMEOUT) != 0)
    370 			{
    371 				/*
    372 				 * We want stale RRset due to timeout, so we
    373 				 * don't skip it.
    374 				 */
    375 				return false;
    376 			}
    377 			return (search->options & DNS_DBFIND_STALEOK) == 0;
    378 		}
    379 
    380 		/*
    381 		 * This rdataset is stale.  If no one else is using the
    382 		 * node, we can clean it up right now, otherwise we mark
    383 		 * it as ancient, and the node as dirty, so it will get
    384 		 * cleaned up later.
    385 		 */
    386 		if ((header->ttl < search->now - RBTDB_VIRTUAL) &&
    387 		    (*nlocktypep == isc_rwlocktype_write ||
    388 		     NODE_TRYUPGRADE(lock, nlocktypep) == ISC_R_SUCCESS))
    389 		{
    390 			/*
    391 			 * We update the node's status only when we can
    392 			 * get write access; otherwise, we leave others
    393 			 * to this work.  Periodical cleaning will
    394 			 * eventually take the job as the last resort.
    395 			 * We won't downgrade the lock, since other
    396 			 * rdatasets are probably stale, too.
    397 			 */
    398 
    399 			if (isc_refcount_current(&node->references) == 0) {
    400 				/*
    401 				 * header->down can be non-NULL if the
    402 				 * refcount has just decremented to 0
    403 				 * but dns__rbtnode_release() has not
    404 				 * performed clean_cache_node(), in
    405 				 * which case we need to purge the stale
    406 				 * headers first.
    407 				 */
    408 				clean_stale_headers(header);
    409 				if (*header_prev != NULL) {
    410 					(*header_prev)->next = header->next;
    411 				} else {
    412 					node->data = header->next;
    413 				}
    414 				dns_slabheader_destroy(&header);
    415 			} else {
    416 				dns__rbtdb_mark_ancient(header);
    417 				*header_prev = header;
    418 			}
    419 		} else {
    420 			*header_prev = header;
    421 		}
    422 		return true;
    423 	}
    424 	return false;
    425 }
    426 
    427 static isc_result_t
    428 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name,
    429 		       void *arg DNS__DB_FLARG) {
    430 	rbtdb_search_t *search = arg;
    431 	dns_slabheader_t *header = NULL;
    432 	dns_slabheader_t *header_prev = NULL, *header_next = NULL;
    433 	dns_slabheader_t *dname_header = NULL, *sigdname_header = NULL;
    434 	isc_result_t result;
    435 	isc_rwlock_t *lock = NULL;
    436 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
    437 
    438 	REQUIRE(search->zonecut == NULL);
    439 
    440 	/*
    441 	 * Keep compiler silent.
    442 	 */
    443 	UNUSED(name);
    444 
    445 	lock = &(search->rbtdb->node_locks[node->locknum].lock);
    446 	NODE_RDLOCK(lock, &nlocktype);
    447 
    448 	/*
    449 	 * Look for a DNAME or RRSIG DNAME rdataset.
    450 	 */
    451 	for (header = node->data; header != NULL; header = header_next) {
    452 		header_next = header->next;
    453 		if (check_stale_header(node, header, &nlocktype, lock, search,
    454 				       &header_prev))
    455 		{
    456 			/* Do nothing. */
    457 		} else if (header->type == dns_rdatatype_dname &&
    458 			   EXISTS(header) && !ANCIENT(header))
    459 		{
    460 			dname_header = header;
    461 			header_prev = header;
    462 		} else if (header->type == DNS_SIGTYPE(dns_rdatatype_dname) &&
    463 			   EXISTS(header) && !ANCIENT(header))
    464 		{
    465 			sigdname_header = header;
    466 			header_prev = header;
    467 		} else {
    468 			header_prev = header;
    469 		}
    470 	}
    471 
    472 	if (dname_header != NULL &&
    473 	    (!DNS_TRUST_PENDING(dname_header->trust) ||
    474 	     (search->options & DNS_DBFIND_PENDINGOK) != 0))
    475 	{
    476 		/*
    477 		 * We increment the reference count on node to ensure that
    478 		 * search->zonecut_header will still be valid later.
    479 		 */
    480 		dns__rbtnode_acquire(search->rbtdb, node,
    481 				     nlocktype DNS__DB_FLARG_PASS);
    482 		search->zonecut = node;
    483 		search->zonecut_header = dname_header;
    484 		search->zonecut_sigheader = sigdname_header;
    485 		search->need_cleanup = true;
    486 		result = DNS_R_PARTIALMATCH;
    487 	} else {
    488 		result = DNS_R_CONTINUE;
    489 	}
    490 
    491 	NODE_UNLOCK(lock, &nlocktype);
    492 
    493 	return result;
    494 }
    495 
    496 static isc_result_t
    497 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
    498 		     dns_dbnode_t **nodep, dns_name_t *foundname,
    499 		     dns_rdataset_t *rdataset,
    500 		     dns_rdataset_t *sigrdataset DNS__DB_FLARG) {
    501 	unsigned int i;
    502 	isc_result_t result = ISC_R_NOTFOUND;
    503 	dns_name_t name;
    504 	dns_rbtdb_t *rbtdb = NULL;
    505 	bool done;
    506 
    507 	/*
    508 	 * Caller must be holding the tree lock.
    509 	 */
    510 
    511 	rbtdb = search->rbtdb;
    512 	i = search->chain.level_matches;
    513 	done = false;
    514 	do {
    515 		dns_slabheader_t *header = NULL;
    516 		dns_slabheader_t *header_prev = NULL, *header_next = NULL;
    517 		dns_slabheader_t *found = NULL, *foundsig = NULL;
    518 		isc_rwlock_t *lock = &rbtdb->node_locks[node->locknum].lock;
    519 		isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
    520 
    521 		NODE_RDLOCK(lock, &nlocktype);
    522 
    523 		/*
    524 		 * Look for NS and RRSIG NS rdatasets.
    525 		 */
    526 		for (header = node->data; header != NULL; header = header_next)
    527 		{
    528 			header_next = header->next;
    529 			if (check_stale_header(node, header, &nlocktype, lock,
    530 					       search, &header_prev))
    531 			{
    532 				/* Do nothing. */
    533 			} else if (EXISTS(header) && !ANCIENT(header)) {
    534 				/*
    535 				 * We've found an extant rdataset.  See if
    536 				 * we're interested in it.
    537 				 */
    538 				if (header->type == dns_rdatatype_ns) {
    539 					found = header;
    540 					if (foundsig != NULL) {
    541 						break;
    542 					}
    543 				} else if (header->type ==
    544 					   DNS_SIGTYPE(dns_rdatatype_ns))
    545 				{
    546 					foundsig = header;
    547 					if (found != NULL) {
    548 						break;
    549 					}
    550 				}
    551 				header_prev = header;
    552 			} else {
    553 				header_prev = header;
    554 			}
    555 		}
    556 
    557 		if (found != NULL) {
    558 			/*
    559 			 * If we have to set foundname, we do it before
    560 			 * anything else.  If we were to set foundname after
    561 			 * we had set nodep or bound the rdataset, then we'd
    562 			 * have to undo that work if dns_name_concatenate()
    563 			 * failed.  By setting foundname first, there's
    564 			 * nothing to undo if we have trouble.
    565 			 */
    566 			if (foundname != NULL) {
    567 				dns_name_init(&name, NULL);
    568 				dns_rbt_namefromnode(node, &name);
    569 				dns_name_copy(&name, foundname);
    570 				while (i > 0) {
    571 					dns_rbtnode_t *level_node =
    572 						search->chain.levels[--i];
    573 					dns_name_init(&name, NULL);
    574 					dns_rbt_namefromnode(level_node, &name);
    575 					result = dns_name_concatenate(
    576 						foundname, &name, foundname,
    577 						NULL);
    578 					if (result != ISC_R_SUCCESS) {
    579 						if (nodep != NULL) {
    580 							*nodep = NULL;
    581 						}
    582 						goto node_exit;
    583 					}
    584 				}
    585 			}
    586 			result = DNS_R_DELEGATION;
    587 			if (nodep != NULL) {
    588 				dns__rbtnode_acquire(
    589 					search->rbtdb, node,
    590 					nlocktype DNS__DB_FLARG_PASS);
    591 				*nodep = node;
    592 			}
    593 			dns__rbtdb_bindrdataset(search->rbtdb, node, found,
    594 						search->now, nlocktype,
    595 						rdataset DNS__DB_FLARG_PASS);
    596 			if (foundsig != NULL) {
    597 				dns__rbtdb_bindrdataset(
    598 					search->rbtdb, node, foundsig,
    599 					search->now, nlocktype,
    600 					sigrdataset DNS__DB_FLARG_PASS);
    601 			}
    602 			if (need_headerupdate(found, search->now) ||
    603 			    (foundsig != NULL &&
    604 			     need_headerupdate(foundsig, search->now)))
    605 			{
    606 				if (nlocktype != isc_rwlocktype_write) {
    607 					NODE_FORCEUPGRADE(lock, &nlocktype);
    608 					POST(nlocktype);
    609 				}
    610 				if (need_headerupdate(found, search->now)) {
    611 					update_header(search->rbtdb, found,
    612 						      search->now);
    613 				}
    614 				if (foundsig != NULL &&
    615 				    need_headerupdate(foundsig, search->now))
    616 				{
    617 					update_header(search->rbtdb, foundsig,
    618 						      search->now);
    619 				}
    620 			}
    621 		}
    622 
    623 	node_exit:
    624 		NODE_UNLOCK(lock, &nlocktype);
    625 
    626 		if (found == NULL && i > 0) {
    627 			i--;
    628 			node = search->chain.levels[i];
    629 		} else {
    630 			done = true;
    631 		}
    632 	} while (!done);
    633 
    634 	return result;
    635 }
    636 
    637 /*
    638  * Look for a potentially covering NSEC in the cache where `name`
    639  * is known not to exist.  This uses the auxiliary NSEC tree to find
    640  * the potential NSEC owner. If found, we update 'foundname', 'nodep',
    641  * 'rdataset' and 'sigrdataset', and return DNS_R_COVERINGNSEC.
    642  * Otherwise, return ISC_R_NOTFOUND.
    643  */
    644 static isc_result_t
    645 find_coveringnsec(rbtdb_search_t *search, const dns_name_t *name,
    646 		  dns_dbnode_t **nodep, isc_stdtime_t now,
    647 		  dns_name_t *foundname, dns_rdataset_t *rdataset,
    648 		  dns_rdataset_t *sigrdataset DNS__DB_FLARG) {
    649 	dns_fixedname_t fprefix, forigin, ftarget, fixed;
    650 	dns_name_t *prefix = NULL, *origin = NULL;
    651 	dns_name_t *target = NULL, *fname = NULL;
    652 	dns_rbtnode_t *node = NULL;
    653 	dns_rbtnodechain_t chain;
    654 	isc_result_t result;
    655 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
    656 	isc_rwlock_t *lock = NULL;
    657 	dns_typepair_t matchtype, sigmatchtype;
    658 	dns_slabheader_t *found = NULL, *foundsig = NULL;
    659 	dns_slabheader_t *header = NULL;
    660 	dns_slabheader_t *header_next = NULL, *header_prev = NULL;
    661 
    662 	/*
    663 	 * Look for the node in the auxilary tree.
    664 	 */
    665 	dns_rbtnodechain_init(&chain);
    666 	target = dns_fixedname_initname(&ftarget);
    667 	result = dns_rbt_findnode(search->rbtdb->nsec, name, target, &node,
    668 				  &chain, DNS_RBTFIND_EMPTYDATA, NULL, NULL);
    669 	if (result != DNS_R_PARTIALMATCH) {
    670 		dns_rbtnodechain_reset(&chain);
    671 		return ISC_R_NOTFOUND;
    672 	}
    673 
    674 	prefix = dns_fixedname_initname(&fprefix);
    675 	origin = dns_fixedname_initname(&forigin);
    676 	target = dns_fixedname_initname(&ftarget);
    677 	fname = dns_fixedname_initname(&fixed);
    678 
    679 	matchtype = DNS_TYPEPAIR_VALUE(dns_rdatatype_nsec, 0);
    680 	sigmatchtype = DNS_SIGTYPE(dns_rdatatype_nsec);
    681 
    682 	/*
    683 	 * Extract predecessor from chain.
    684 	 */
    685 	result = dns_rbtnodechain_current(&chain, prefix, origin, NULL);
    686 	dns_rbtnodechain_reset(&chain);
    687 	if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN) {
    688 		return ISC_R_NOTFOUND;
    689 	}
    690 
    691 	result = dns_name_concatenate(prefix, origin, target, NULL);
    692 	if (result != ISC_R_SUCCESS) {
    693 		return ISC_R_NOTFOUND;
    694 	}
    695 
    696 	/*
    697 	 * Lookup the predecessor in the main tree.
    698 	 */
    699 	node = NULL;
    700 	result = dns_rbt_findnode(search->rbtdb->tree, target, fname, &node,
    701 				  NULL, DNS_RBTFIND_EMPTYDATA, NULL, NULL);
    702 	if (result != ISC_R_SUCCESS) {
    703 		return ISC_R_NOTFOUND;
    704 	}
    705 
    706 	lock = &(search->rbtdb->node_locks[node->locknum].lock);
    707 	NODE_RDLOCK(lock, &nlocktype);
    708 	for (header = node->data; header != NULL; header = header_next) {
    709 		header_next = header->next;
    710 		if (check_stale_header(node, header, &nlocktype, lock, search,
    711 				       &header_prev))
    712 		{
    713 			continue;
    714 		}
    715 		if (NONEXISTENT(header) || DNS_TYPEPAIR_TYPE(header->type) == 0)
    716 		{
    717 			header_prev = header;
    718 			continue;
    719 		}
    720 		if (header->type == matchtype) {
    721 			found = header;
    722 			if (foundsig != NULL) {
    723 				break;
    724 			}
    725 		} else if (header->type == sigmatchtype) {
    726 			foundsig = header;
    727 			if (found != NULL) {
    728 				break;
    729 			}
    730 		}
    731 		header_prev = header;
    732 	}
    733 	if (found != NULL) {
    734 		dns__rbtdb_bindrdataset(search->rbtdb, node, found, now,
    735 					nlocktype, rdataset DNS__DB_FLARG_PASS);
    736 		if (foundsig != NULL) {
    737 			dns__rbtdb_bindrdataset(search->rbtdb, node, foundsig,
    738 						now, nlocktype,
    739 						sigrdataset DNS__DB_FLARG_PASS);
    740 		}
    741 		dns__rbtnode_acquire(search->rbtdb, node,
    742 				     nlocktype DNS__DB_FLARG_PASS);
    743 
    744 		dns_name_copy(fname, foundname);
    745 
    746 		*nodep = node;
    747 		result = DNS_R_COVERINGNSEC;
    748 	} else {
    749 		result = ISC_R_NOTFOUND;
    750 	}
    751 	NODE_UNLOCK(lock, &nlocktype);
    752 	return result;
    753 }
    754 
    755 static isc_result_t
    756 cache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
    757 	   dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
    758 	   dns_dbnode_t **nodep, dns_name_t *foundname,
    759 	   dns_rdataset_t *rdataset,
    760 	   dns_rdataset_t *sigrdataset DNS__DB_FLARG) {
    761 	dns_rbtnode_t *node = NULL;
    762 	isc_result_t result;
    763 	rbtdb_search_t search;
    764 	bool cname_ok = true;
    765 	bool found_noqname = false;
    766 	bool all_negative = true;
    767 	bool empty_node;
    768 	isc_rwlock_t *lock = NULL;
    769 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
    770 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
    771 	dns_slabheader_t *header = NULL;
    772 	dns_slabheader_t *header_prev = NULL, *header_next = NULL;
    773 	dns_slabheader_t *found = NULL, *nsheader = NULL;
    774 	dns_slabheader_t *foundsig = NULL, *nssig = NULL, *cnamesig = NULL;
    775 	dns_slabheader_t *update = NULL, *updatesig = NULL;
    776 	dns_slabheader_t *nsecheader = NULL, *nsecsig = NULL;
    777 	dns_typepair_t sigtype, negtype;
    778 
    779 	UNUSED(version);
    780 
    781 	REQUIRE(VALID_RBTDB((dns_rbtdb_t *)db));
    782 	REQUIRE(version == NULL);
    783 
    784 	if (now == 0) {
    785 		now = isc_stdtime_now();
    786 	}
    787 
    788 	search = (rbtdb_search_t){
    789 		.rbtdb = (dns_rbtdb_t *)db,
    790 		.serial = 1,
    791 		.options = options,
    792 		.now = now,
    793 	};
    794 	dns_fixedname_init(&search.zonecut_name);
    795 	dns_rbtnodechain_init(&search.chain);
    796 
    797 	TREE_RDLOCK(&search.rbtdb->tree_lock, &tlocktype);
    798 
    799 	/*
    800 	 * Search down from the root of the tree.  If, while going down, we
    801 	 * encounter a callback node, cache_zonecut_callback() will search the
    802 	 * rdatasets at the zone cut for a DNAME rdataset.
    803 	 */
    804 	result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
    805 				  &search.chain, DNS_RBTFIND_EMPTYDATA,
    806 				  cache_zonecut_callback, &search);
    807 
    808 	if (result == DNS_R_PARTIALMATCH) {
    809 		/*
    810 		 * If dns_rbt_findnode discovered a covering DNAME skip
    811 		 * looking for a covering NSEC.
    812 		 */
    813 		if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 &&
    814 		    (search.zonecut_header == NULL ||
    815 		     search.zonecut_header->type != dns_rdatatype_dname))
    816 		{
    817 			result = find_coveringnsec(
    818 				&search, name, nodep, now, foundname, rdataset,
    819 				sigrdataset DNS__DB_FLARG_PASS);
    820 			if (result == DNS_R_COVERINGNSEC) {
    821 				goto tree_exit;
    822 			}
    823 		}
    824 		if (search.zonecut != NULL) {
    825 			result = setup_delegation(
    826 				&search, nodep, foundname, rdataset,
    827 				sigrdataset DNS__DB_FLARG_PASS);
    828 			goto tree_exit;
    829 		} else {
    830 		find_ns:
    831 			result = find_deepest_zonecut(
    832 				&search, node, nodep, foundname, rdataset,
    833 				sigrdataset DNS__DB_FLARG_PASS);
    834 			goto tree_exit;
    835 		}
    836 	} else if (result != ISC_R_SUCCESS) {
    837 		goto tree_exit;
    838 	}
    839 
    840 	/*
    841 	 * Certain DNSSEC types are not subject to CNAME matching
    842 	 * (RFC4035, section 2.5 and RFC3007).
    843 	 *
    844 	 * We don't check for RRSIG, because we don't store RRSIG records
    845 	 * directly.
    846 	 */
    847 	if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) {
    848 		cname_ok = false;
    849 	}
    850 
    851 	/*
    852 	 * We now go looking for rdata...
    853 	 */
    854 
    855 	lock = &(search.rbtdb->node_locks[node->locknum].lock);
    856 	NODE_RDLOCK(lock, &nlocktype);
    857 
    858 	/*
    859 	 * These pointers need to be reset here in case we did
    860 	 * 'goto find_ns' from somewhere below.
    861 	 */
    862 	found = NULL;
    863 	foundsig = NULL;
    864 	sigtype = DNS_SIGTYPE(type);
    865 	negtype = DNS_TYPEPAIR_VALUE(0, type);
    866 	nsheader = NULL;
    867 	nsecheader = NULL;
    868 	nssig = NULL;
    869 	nsecsig = NULL;
    870 	cnamesig = NULL;
    871 	empty_node = true;
    872 	header_prev = NULL;
    873 	for (header = node->data; header != NULL; header = header_next) {
    874 		header_next = header->next;
    875 		if (check_stale_header(node, header, &nlocktype, lock, &search,
    876 				       &header_prev))
    877 		{
    878 			/* Do nothing. */
    879 		} else if (EXISTS(header) && !ANCIENT(header)) {
    880 			/*
    881 			 * We now know that there is at least one active
    882 			 * non-stale rdataset at this node.
    883 			 */
    884 			empty_node = false;
    885 			if (header->noqname != NULL &&
    886 			    header->trust == dns_trust_secure)
    887 			{
    888 				found_noqname = true;
    889 			}
    890 			if (!NEGATIVE(header)) {
    891 				all_negative = false;
    892 			}
    893 
    894 			/*
    895 			 * If we found a type we were looking for, remember
    896 			 * it.
    897 			 */
    898 			if (header->type == type ||
    899 			    (type == dns_rdatatype_any &&
    900 			     DNS_TYPEPAIR_TYPE(header->type) != 0) ||
    901 			    (cname_ok && header->type == dns_rdatatype_cname))
    902 			{
    903 				/*
    904 				 * We've found the answer.
    905 				 */
    906 				found = header;
    907 				if (header->type == dns_rdatatype_cname &&
    908 				    cname_ok)
    909 				{
    910 					/*
    911 					 * If we've already got the
    912 					 * CNAME RRSIG, use it.
    913 					 */
    914 					if (cnamesig != NULL) {
    915 						foundsig = cnamesig;
    916 					} else {
    917 						sigtype = DNS_SIGTYPE(
    918 							dns_rdatatype_cname);
    919 					}
    920 				}
    921 			} else if (header->type == sigtype) {
    922 				/*
    923 				 * We've found the RRSIG rdataset for our
    924 				 * target type.  Remember it.
    925 				 */
    926 				foundsig = header;
    927 			} else if (header->type == RDATATYPE_NCACHEANY ||
    928 				   header->type == negtype)
    929 			{
    930 				/*
    931 				 * We've found a negative cache entry.
    932 				 */
    933 				found = header;
    934 			} else if (header->type == dns_rdatatype_ns) {
    935 				/*
    936 				 * Remember a NS rdataset even if we're
    937 				 * not specifically looking for it, because
    938 				 * we might need it later.
    939 				 */
    940 				nsheader = header;
    941 			} else if (header->type ==
    942 				   DNS_SIGTYPE(dns_rdatatype_ns))
    943 			{
    944 				/*
    945 				 * If we need the NS rdataset, we'll also
    946 				 * need its signature.
    947 				 */
    948 				nssig = header;
    949 			} else if (header->type == dns_rdatatype_nsec) {
    950 				nsecheader = header;
    951 			} else if (header->type ==
    952 				   DNS_SIGTYPE(dns_rdatatype_nsec))
    953 			{
    954 				nsecsig = header;
    955 			} else if (cname_ok &&
    956 				   header->type ==
    957 					   DNS_SIGTYPE(dns_rdatatype_cname))
    958 			{
    959 				/*
    960 				 * If we get a CNAME match, we'll also need
    961 				 * its signature.
    962 				 */
    963 				cnamesig = header;
    964 			}
    965 			header_prev = header;
    966 		} else {
    967 			header_prev = header;
    968 		}
    969 	}
    970 
    971 	if (empty_node) {
    972 		/*
    973 		 * We have an exact match for the name, but there are no
    974 		 * extant rdatasets.  That means that this node doesn't
    975 		 * meaningfully exist, and that we really have a partial match.
    976 		 */
    977 		NODE_UNLOCK(lock, &nlocktype);
    978 		if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
    979 			result = find_coveringnsec(
    980 				&search, name, nodep, now, foundname, rdataset,
    981 				sigrdataset DNS__DB_FLARG_PASS);
    982 			if (result == DNS_R_COVERINGNSEC) {
    983 				goto tree_exit;
    984 			}
    985 		}
    986 		goto find_ns;
    987 	}
    988 
    989 	/*
    990 	 * If we didn't find what we were looking for...
    991 	 */
    992 	if (found == NULL ||
    993 	    (DNS_TRUST_ADDITIONAL(found->trust) &&
    994 	     ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
    995 	    (found->trust == dns_trust_glue &&
    996 	     ((options & DNS_DBFIND_GLUEOK) == 0)) ||
    997 	    (DNS_TRUST_PENDING(found->trust) &&
    998 	     ((options & DNS_DBFIND_PENDINGOK) == 0)))
    999 	{
   1000 		/*
   1001 		 * Return covering NODATA NSEC record.
   1002 		 */
   1003 		if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 &&
   1004 		    nsecheader != NULL)
   1005 		{
   1006 			if (nodep != NULL) {
   1007 				dns__rbtnode_acquire(
   1008 					search.rbtdb, node,
   1009 					nlocktype DNS__DB_FLARG_PASS);
   1010 				*nodep = node;
   1011 			}
   1012 			dns__rbtdb_bindrdataset(search.rbtdb, node, nsecheader,
   1013 						search.now, nlocktype,
   1014 						rdataset DNS__DB_FLARG_PASS);
   1015 			if (need_headerupdate(nsecheader, search.now)) {
   1016 				update = nsecheader;
   1017 			}
   1018 			if (nsecsig != NULL) {
   1019 				dns__rbtdb_bindrdataset(
   1020 					search.rbtdb, node, nsecsig, search.now,
   1021 					nlocktype,
   1022 					sigrdataset DNS__DB_FLARG_PASS);
   1023 				if (need_headerupdate(nsecsig, search.now)) {
   1024 					updatesig = nsecsig;
   1025 				}
   1026 			}
   1027 			result = DNS_R_COVERINGNSEC;
   1028 			goto node_exit;
   1029 		}
   1030 
   1031 		/*
   1032 		 * This name was from a wild card.  Look for a covering NSEC.
   1033 		 */
   1034 		if (found == NULL && (found_noqname || all_negative) &&
   1035 		    (search.options & DNS_DBFIND_COVERINGNSEC) != 0)
   1036 		{
   1037 			NODE_UNLOCK(lock, &nlocktype);
   1038 			result = find_coveringnsec(
   1039 				&search, name, nodep, now, foundname, rdataset,
   1040 				sigrdataset DNS__DB_FLARG_PASS);
   1041 			if (result == DNS_R_COVERINGNSEC) {
   1042 				goto tree_exit;
   1043 			}
   1044 			goto find_ns;
   1045 		}
   1046 
   1047 		/*
   1048 		 * If there is an NS rdataset at this node, then this is the
   1049 		 * deepest zone cut.
   1050 		 */
   1051 		if (nsheader != NULL) {
   1052 			if (nodep != NULL) {
   1053 				dns__rbtnode_acquire(
   1054 					search.rbtdb, node,
   1055 					nlocktype DNS__DB_FLARG_PASS);
   1056 				*nodep = node;
   1057 			}
   1058 			dns__rbtdb_bindrdataset(search.rbtdb, node, nsheader,
   1059 						search.now, nlocktype,
   1060 						rdataset DNS__DB_FLARG_PASS);
   1061 			if (need_headerupdate(nsheader, search.now)) {
   1062 				update = nsheader;
   1063 			}
   1064 			if (nssig != NULL) {
   1065 				dns__rbtdb_bindrdataset(
   1066 					search.rbtdb, node, nssig, search.now,
   1067 					nlocktype,
   1068 					sigrdataset DNS__DB_FLARG_PASS);
   1069 				if (need_headerupdate(nssig, search.now)) {
   1070 					updatesig = nssig;
   1071 				}
   1072 			}
   1073 			result = DNS_R_DELEGATION;
   1074 			goto node_exit;
   1075 		}
   1076 
   1077 		/*
   1078 		 * Go find the deepest zone cut.
   1079 		 */
   1080 		NODE_UNLOCK(lock, &nlocktype);
   1081 		goto find_ns;
   1082 	}
   1083 
   1084 	/*
   1085 	 * We found what we were looking for, or we found a CNAME.
   1086 	 */
   1087 
   1088 	if (nodep != NULL) {
   1089 		dns__rbtnode_acquire(search.rbtdb, node,
   1090 				     nlocktype DNS__DB_FLARG_PASS);
   1091 		*nodep = node;
   1092 	}
   1093 
   1094 	if (NEGATIVE(found)) {
   1095 		/*
   1096 		 * We found a negative cache entry.
   1097 		 */
   1098 		if (NXDOMAIN(found)) {
   1099 			result = DNS_R_NCACHENXDOMAIN;
   1100 		} else {
   1101 			result = DNS_R_NCACHENXRRSET;
   1102 		}
   1103 	} else if (type != found->type && type != dns_rdatatype_any &&
   1104 		   found->type == dns_rdatatype_cname)
   1105 	{
   1106 		/*
   1107 		 * We weren't doing an ANY query and we found a CNAME instead
   1108 		 * of the type we were looking for, so we need to indicate
   1109 		 * that result to the caller.
   1110 		 */
   1111 		result = DNS_R_CNAME;
   1112 	} else {
   1113 		/*
   1114 		 * An ordinary successful query!
   1115 		 */
   1116 		result = ISC_R_SUCCESS;
   1117 	}
   1118 
   1119 	if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
   1120 	    result == DNS_R_NCACHENXRRSET)
   1121 	{
   1122 		dns__rbtdb_bindrdataset(search.rbtdb, node, found, search.now,
   1123 					nlocktype, rdataset DNS__DB_FLARG_PASS);
   1124 		if (need_headerupdate(found, search.now)) {
   1125 			update = found;
   1126 		}
   1127 		if (!NEGATIVE(found) && foundsig != NULL) {
   1128 			dns__rbtdb_bindrdataset(search.rbtdb, node, foundsig,
   1129 						search.now, nlocktype,
   1130 						sigrdataset DNS__DB_FLARG_PASS);
   1131 			if (need_headerupdate(foundsig, search.now)) {
   1132 				updatesig = foundsig;
   1133 			}
   1134 		}
   1135 	}
   1136 
   1137 node_exit:
   1138 	if ((update != NULL || updatesig != NULL) &&
   1139 	    nlocktype != isc_rwlocktype_write)
   1140 	{
   1141 		NODE_FORCEUPGRADE(lock, &nlocktype);
   1142 		POST(nlocktype);
   1143 	}
   1144 	if (update != NULL && need_headerupdate(update, search.now)) {
   1145 		update_header(search.rbtdb, update, search.now);
   1146 	}
   1147 	if (updatesig != NULL && need_headerupdate(updatesig, search.now)) {
   1148 		update_header(search.rbtdb, updatesig, search.now);
   1149 	}
   1150 
   1151 	NODE_UNLOCK(lock, &nlocktype);
   1152 
   1153 tree_exit:
   1154 	TREE_UNLOCK(&search.rbtdb->tree_lock, &tlocktype);
   1155 
   1156 	/*
   1157 	 * If we found a zonecut but aren't going to use it, we have to
   1158 	 * let go of it.
   1159 	 */
   1160 	if (search.need_cleanup) {
   1161 		node = search.zonecut;
   1162 		INSIST(node != NULL);
   1163 		lock = &(search.rbtdb->node_locks[node->locknum].lock);
   1164 
   1165 		NODE_RDLOCK(lock, &nlocktype);
   1166 		dns__rbtnode_release(search.rbtdb, node, 0, &nlocktype,
   1167 				     &tlocktype, true,
   1168 				     false DNS__DB_FLARG_PASS);
   1169 		NODE_UNLOCK(lock, &nlocktype);
   1170 		INSIST(tlocktype == isc_rwlocktype_none);
   1171 	}
   1172 
   1173 	dns_rbtnodechain_reset(&search.chain);
   1174 
   1175 	update_cachestats(search.rbtdb, result);
   1176 	return result;
   1177 }
   1178 
   1179 static isc_result_t
   1180 cache_findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options,
   1181 		  isc_stdtime_t now, dns_dbnode_t **nodep,
   1182 		  dns_name_t *foundname, dns_name_t *dcname,
   1183 		  dns_rdataset_t *rdataset,
   1184 		  dns_rdataset_t *sigrdataset DNS__DB_FLARG) {
   1185 	dns_rbtnode_t *node = NULL;
   1186 	isc_rwlock_t *lock = NULL;
   1187 	isc_result_t result;
   1188 	rbtdb_search_t search;
   1189 	dns_slabheader_t *header = NULL;
   1190 	dns_slabheader_t *header_prev = NULL, *header_next = NULL;
   1191 	dns_slabheader_t *found = NULL, *foundsig = NULL;
   1192 	unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
   1193 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
   1194 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   1195 	bool dcnull = (dcname == NULL);
   1196 
   1197 	REQUIRE(VALID_RBTDB((dns_rbtdb_t *)db));
   1198 
   1199 	if (now == 0) {
   1200 		now = isc_stdtime_now();
   1201 	}
   1202 
   1203 	search = (rbtdb_search_t){
   1204 		.rbtdb = (dns_rbtdb_t *)db,
   1205 		.serial = 1,
   1206 		.options = options,
   1207 		.now = now,
   1208 	};
   1209 	dns_fixedname_init(&search.zonecut_name);
   1210 	dns_rbtnodechain_init(&search.chain);
   1211 
   1212 	if (dcnull) {
   1213 		dcname = foundname;
   1214 	}
   1215 
   1216 	if ((options & DNS_DBFIND_NOEXACT) != 0) {
   1217 		rbtoptions |= DNS_RBTFIND_NOEXACT;
   1218 	}
   1219 
   1220 	TREE_RDLOCK(&search.rbtdb->tree_lock, &tlocktype);
   1221 
   1222 	/*
   1223 	 * Search down from the root of the tree.
   1224 	 */
   1225 	result = dns_rbt_findnode(search.rbtdb->tree, name, dcname, &node,
   1226 				  &search.chain, rbtoptions, NULL, &search);
   1227 
   1228 	if (result == DNS_R_PARTIALMATCH) {
   1229 		result = find_deepest_zonecut(&search, node, nodep, foundname,
   1230 					      rdataset,
   1231 					      sigrdataset DNS__DB_FLARG_PASS);
   1232 		goto tree_exit;
   1233 	} else if (result != ISC_R_SUCCESS) {
   1234 		goto tree_exit;
   1235 	} else if (!dcnull) {
   1236 		dns_name_copy(dcname, foundname);
   1237 	}
   1238 
   1239 	/*
   1240 	 * We now go looking for an NS rdataset at the node.
   1241 	 */
   1242 
   1243 	lock = &(search.rbtdb->node_locks[node->locknum].lock);
   1244 	NODE_RDLOCK(lock, &nlocktype);
   1245 
   1246 	for (header = node->data; header != NULL; header = header_next) {
   1247 		header_next = header->next;
   1248 		bool ns = (header->type == dns_rdatatype_ns ||
   1249 			   header->type == DNS_SIGTYPE(dns_rdatatype_ns));
   1250 		if (check_stale_header(node, header, &nlocktype, lock, &search,
   1251 				       &header_prev))
   1252 		{
   1253 			if (ns) {
   1254 				/*
   1255 				 * We found a cached NS, but was either
   1256 				 * ancient or it was stale and serve-stale
   1257 				 * is disabled, so this node can't be used
   1258 				 * as a zone cut we know about. Instead we
   1259 				 * bail out and call find_deepest_zonecut()
   1260 				 * below.
   1261 				 */
   1262 				break;
   1263 			}
   1264 		} else if (EXISTS(header) && !ANCIENT(header)) {
   1265 			if (header->type == dns_rdatatype_ns) {
   1266 				found = header;
   1267 				if (foundsig != NULL) {
   1268 					break;
   1269 				}
   1270 			} else if (header->type ==
   1271 				   DNS_SIGTYPE(dns_rdatatype_ns))
   1272 			{
   1273 				foundsig = header;
   1274 				if (found != NULL) {
   1275 					break;
   1276 				}
   1277 			}
   1278 			header_prev = header;
   1279 		} else {
   1280 			header_prev = header;
   1281 		}
   1282 	}
   1283 
   1284 	if (found == NULL) {
   1285 		/*
   1286 		 * No active NS records found. Call find_deepest_zonecut()
   1287 		 * to look for them in nodes above this one.
   1288 		 */
   1289 		NODE_UNLOCK(lock, &nlocktype);
   1290 		result = find_deepest_zonecut(&search, node, nodep, foundname,
   1291 					      rdataset,
   1292 					      sigrdataset DNS__DB_FLARG_PASS);
   1293 		dns_name_copy(foundname, dcname);
   1294 		goto tree_exit;
   1295 	}
   1296 
   1297 	if (nodep != NULL) {
   1298 		dns__rbtnode_acquire(search.rbtdb, node,
   1299 				     nlocktype DNS__DB_FLARG_PASS);
   1300 		*nodep = node;
   1301 	}
   1302 
   1303 	dns__rbtdb_bindrdataset(search.rbtdb, node, found, search.now,
   1304 				nlocktype, rdataset DNS__DB_FLARG_PASS);
   1305 	if (foundsig != NULL) {
   1306 		dns__rbtdb_bindrdataset(search.rbtdb, node, foundsig,
   1307 					search.now, nlocktype,
   1308 					sigrdataset DNS__DB_FLARG_PASS);
   1309 	}
   1310 
   1311 	if (need_headerupdate(found, search.now) ||
   1312 	    (foundsig != NULL && need_headerupdate(foundsig, search.now)))
   1313 	{
   1314 		if (nlocktype != isc_rwlocktype_write) {
   1315 			NODE_FORCEUPGRADE(lock, &nlocktype);
   1316 			POST(nlocktype);
   1317 		}
   1318 		if (need_headerupdate(found, search.now)) {
   1319 			update_header(search.rbtdb, found, search.now);
   1320 		}
   1321 		if (foundsig != NULL && need_headerupdate(foundsig, search.now))
   1322 		{
   1323 			update_header(search.rbtdb, foundsig, search.now);
   1324 		}
   1325 	}
   1326 
   1327 	NODE_UNLOCK(lock, &nlocktype);
   1328 
   1329 tree_exit:
   1330 	TREE_UNLOCK(&search.rbtdb->tree_lock, &tlocktype);
   1331 
   1332 	INSIST(!search.need_cleanup);
   1333 
   1334 	dns_rbtnodechain_reset(&search.chain);
   1335 
   1336 	if (result == DNS_R_DELEGATION) {
   1337 		result = ISC_R_SUCCESS;
   1338 	}
   1339 
   1340 	return result;
   1341 }
   1342 
   1343 static isc_result_t
   1344 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
   1345 		   dns_rdatatype_t type, dns_rdatatype_t covers,
   1346 		   isc_stdtime_t now, dns_rdataset_t *rdataset,
   1347 		   dns_rdataset_t *sigrdataset DNS__DB_FLARG) {
   1348 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   1349 	dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
   1350 	dns_slabheader_t *header = NULL, *header_next = NULL;
   1351 	dns_slabheader_t *found = NULL, *foundsig = NULL;
   1352 	dns_typepair_t matchtype, sigmatchtype, negtype;
   1353 	isc_result_t result;
   1354 	isc_rwlock_t *lock = NULL;
   1355 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   1356 
   1357 	REQUIRE(VALID_RBTDB(rbtdb));
   1358 	REQUIRE(type != dns_rdatatype_any);
   1359 
   1360 	UNUSED(version);
   1361 
   1362 	result = ISC_R_SUCCESS;
   1363 
   1364 	if (now == 0) {
   1365 		now = isc_stdtime_now();
   1366 	}
   1367 
   1368 	lock = &rbtdb->node_locks[rbtnode->locknum].lock;
   1369 	NODE_RDLOCK(lock, &nlocktype);
   1370 
   1371 	matchtype = DNS_TYPEPAIR_VALUE(type, covers);
   1372 	negtype = DNS_TYPEPAIR_VALUE(0, type);
   1373 	if (covers == 0) {
   1374 		sigmatchtype = DNS_SIGTYPE(type);
   1375 	} else {
   1376 		sigmatchtype = 0;
   1377 	}
   1378 
   1379 	for (header = rbtnode->data; header != NULL; header = header_next) {
   1380 		header_next = header->next;
   1381 		if (!ACTIVE(header, now)) {
   1382 			if ((header->ttl + STALE_TTL(header, rbtdb) <
   1383 			     now - RBTDB_VIRTUAL) &&
   1384 			    (nlocktype == isc_rwlocktype_write ||
   1385 			     NODE_TRYUPGRADE(lock, &nlocktype) ==
   1386 				     ISC_R_SUCCESS))
   1387 			{
   1388 				/*
   1389 				 * We update the node's status only when we
   1390 				 * can get write access.
   1391 				 *
   1392 				 * We don't check if refcurrent(rbtnode) == 0
   1393 				 * and try to free like we do in cache_find(),
   1394 				 * because refcurrent(rbtnode) must be
   1395 				 * non-zero.  This is so because 'node' is an
   1396 				 * argument to the function.
   1397 				 */
   1398 				dns__rbtdb_mark_ancient(header);
   1399 			}
   1400 		} else if (EXISTS(header) && !ANCIENT(header)) {
   1401 			if (header->type == matchtype) {
   1402 				found = header;
   1403 			} else if (header->type == RDATATYPE_NCACHEANY ||
   1404 				   header->type == negtype)
   1405 			{
   1406 				found = header;
   1407 			} else if (header->type == sigmatchtype) {
   1408 				foundsig = header;
   1409 			}
   1410 		}
   1411 	}
   1412 	if (found != NULL) {
   1413 		dns__rbtdb_bindrdataset(rbtdb, rbtnode, found, now, nlocktype,
   1414 					rdataset DNS__DB_FLARG_PASS);
   1415 		if (!NEGATIVE(found) && foundsig != NULL) {
   1416 			dns__rbtdb_bindrdataset(rbtdb, rbtnode, foundsig, now,
   1417 						nlocktype,
   1418 						sigrdataset DNS__DB_FLARG_PASS);
   1419 		}
   1420 	}
   1421 
   1422 	NODE_UNLOCK(lock, &nlocktype);
   1423 
   1424 	if (found == NULL) {
   1425 		return ISC_R_NOTFOUND;
   1426 	}
   1427 
   1428 	if (NEGATIVE(found)) {
   1429 		/*
   1430 		 * We found a negative cache entry.
   1431 		 */
   1432 		if (NXDOMAIN(found)) {
   1433 			result = DNS_R_NCACHENXDOMAIN;
   1434 		} else {
   1435 			result = DNS_R_NCACHENXRRSET;
   1436 		}
   1437 	}
   1438 
   1439 	update_cachestats(rbtdb, result);
   1440 
   1441 	return result;
   1442 }
   1443 
   1444 static size_t
   1445 hashsize(dns_db_t *db) {
   1446 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   1447 	size_t size;
   1448 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
   1449 
   1450 	REQUIRE(VALID_RBTDB(rbtdb));
   1451 
   1452 	TREE_RDLOCK(&rbtdb->tree_lock, &tlocktype);
   1453 	size = dns_rbt_hashsize(rbtdb->tree);
   1454 	TREE_UNLOCK(&rbtdb->tree_lock, &tlocktype);
   1455 
   1456 	return size;
   1457 }
   1458 
   1459 static isc_result_t
   1460 setcachestats(dns_db_t *db, isc_stats_t *stats) {
   1461 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   1462 
   1463 	REQUIRE(VALID_RBTDB(rbtdb));
   1464 	REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
   1465 	REQUIRE(stats != NULL);
   1466 
   1467 	isc_stats_attach(stats, &rbtdb->cachestats);
   1468 	return ISC_R_SUCCESS;
   1469 }
   1470 
   1471 static dns_stats_t *
   1472 getrrsetstats(dns_db_t *db) {
   1473 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   1474 
   1475 	REQUIRE(VALID_RBTDB(rbtdb));
   1476 	REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
   1477 
   1478 	return rbtdb->rrsetstats;
   1479 }
   1480 
   1481 static isc_result_t
   1482 setservestalettl(dns_db_t *db, dns_ttl_t ttl) {
   1483 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   1484 
   1485 	REQUIRE(VALID_RBTDB(rbtdb));
   1486 	REQUIRE(IS_CACHE(rbtdb));
   1487 
   1488 	/* currently no bounds checking.  0 means disable. */
   1489 	rbtdb->common.serve_stale_ttl = ttl;
   1490 	return ISC_R_SUCCESS;
   1491 }
   1492 
   1493 static isc_result_t
   1494 getservestalettl(dns_db_t *db, dns_ttl_t *ttl) {
   1495 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   1496 
   1497 	REQUIRE(VALID_RBTDB(rbtdb));
   1498 	REQUIRE(IS_CACHE(rbtdb));
   1499 
   1500 	*ttl = rbtdb->common.serve_stale_ttl;
   1501 	return ISC_R_SUCCESS;
   1502 }
   1503 
   1504 static isc_result_t
   1505 setservestalerefresh(dns_db_t *db, uint32_t interval) {
   1506 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   1507 
   1508 	REQUIRE(VALID_RBTDB(rbtdb));
   1509 	REQUIRE(IS_CACHE(rbtdb));
   1510 
   1511 	/* currently no bounds checking.  0 means disable. */
   1512 	rbtdb->serve_stale_refresh = interval;
   1513 	return ISC_R_SUCCESS;
   1514 }
   1515 
   1516 static isc_result_t
   1517 getservestalerefresh(dns_db_t *db, uint32_t *interval) {
   1518 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   1519 
   1520 	REQUIRE(VALID_RBTDB(rbtdb));
   1521 	REQUIRE(IS_CACHE(rbtdb));
   1522 
   1523 	*interval = rbtdb->serve_stale_refresh;
   1524 	return ISC_R_SUCCESS;
   1525 }
   1526 
   1527 static void
   1528 expiredata(dns_db_t *db, dns_dbnode_t *node, void *data) {
   1529 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   1530 	dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
   1531 	dns_slabheader_t *header = data;
   1532 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   1533 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
   1534 
   1535 	NODE_WRLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype);
   1536 	dns__cacherbt_expireheader(header, &tlocktype,
   1537 				   dns_expire_flush DNS__DB_FILELINE);
   1538 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock, &nlocktype);
   1539 	INSIST(tlocktype == isc_rwlocktype_none);
   1540 }
   1541 
   1542 dns_dbmethods_t dns__rbtdb_cachemethods = {
   1543 	.destroy = dns__rbtdb_destroy,
   1544 	.currentversion = dns__rbtdb_currentversion,
   1545 	.newversion = dns__rbtdb_newversion,
   1546 	.attachversion = dns__rbtdb_attachversion,
   1547 	.closeversion = dns__rbtdb_closeversion,
   1548 	.findnode = dns__rbtdb_findnode,
   1549 	.find = cache_find,
   1550 	.findzonecut = cache_findzonecut,
   1551 	.attachnode = dns__rbtdb_attachnode,
   1552 	.detachnode = dns__rbtdb_detachnode,
   1553 	.createiterator = dns__rbtdb_createiterator,
   1554 	.findrdataset = cache_findrdataset,
   1555 	.allrdatasets = dns__rbtdb_allrdatasets,
   1556 	.addrdataset = dns__rbtdb_addrdataset,
   1557 	.subtractrdataset = dns__rbtdb_subtractrdataset,
   1558 	.deleterdataset = dns__rbtdb_deleterdataset,
   1559 	.nodecount = dns__rbtdb_nodecount,
   1560 	.setloop = dns__rbtdb_setloop,
   1561 	.getoriginnode = dns__rbtdb_getoriginnode,
   1562 	.getrrsetstats = getrrsetstats,
   1563 	.setcachestats = setcachestats,
   1564 	.hashsize = hashsize,
   1565 	.setservestalettl = setservestalettl,
   1566 	.getservestalettl = getservestalettl,
   1567 	.setservestalerefresh = setservestalerefresh,
   1568 	.getservestalerefresh = getservestalerefresh,
   1569 	.locknode = dns__rbtdb_locknode,
   1570 	.unlocknode = dns__rbtdb_unlocknode,
   1571 	.expiredata = expiredata,
   1572 	.deletedata = dns__rbtdb_deletedata,
   1573 	.setmaxrrperset = dns__rbtdb_setmaxrrperset,
   1574 	.setmaxtypepername = dns__rbtdb_setmaxtypepername,
   1575 };
   1576 
   1577 /*
   1578  * Caller must hold the node (write) lock.
   1579  */
   1580 void
   1581 dns__cacherbt_expireheader(dns_slabheader_t *header,
   1582 			   isc_rwlocktype_t *tlocktypep,
   1583 			   dns_expire_t reason DNS__DB_FLARG) {
   1584 	dns__rbtdb_mark_ancient(header);
   1585 
   1586 	if (isc_refcount_current(&RBTDB_HEADERNODE(header)->references) == 0) {
   1587 		isc_rwlocktype_t nlocktype = isc_rwlocktype_write;
   1588 		dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)header->db;
   1589 
   1590 		/*
   1591 		 * If no one else is using the node, we can clean it up now.
   1592 		 * We first need to gain a new reference to the node to meet a
   1593 		 * requirement of dns__rbtnode_release().
   1594 		 */
   1595 		dns__rbtnode_acquire(rbtdb, RBTDB_HEADERNODE(header),
   1596 				     nlocktype DNS__DB_FLARG_PASS);
   1597 		dns__rbtnode_release(rbtdb, RBTDB_HEADERNODE(header), 0,
   1598 				     &nlocktype, tlocktypep, true,
   1599 				     false DNS__DB_FLARG_PASS);
   1600 
   1601 		if (rbtdb->cachestats == NULL) {
   1602 			return;
   1603 		}
   1604 
   1605 		switch (reason) {
   1606 		case dns_expire_ttl:
   1607 			isc_stats_increment(rbtdb->cachestats,
   1608 					    dns_cachestatscounter_deletettl);
   1609 			break;
   1610 		case dns_expire_lru:
   1611 			isc_stats_increment(rbtdb->cachestats,
   1612 					    dns_cachestatscounter_deletelru);
   1613 			break;
   1614 		default:
   1615 			break;
   1616 		}
   1617 	}
   1618 }
   1619 
   1620 static size_t
   1621 rdataset_size(dns_slabheader_t *header) {
   1622 	if (!NONEXISTENT(header)) {
   1623 		return dns_rdataslab_size((unsigned char *)header,
   1624 					  sizeof(*header));
   1625 	}
   1626 
   1627 	return sizeof(*header);
   1628 }
   1629 
   1630 static size_t
   1631 expire_lru_headers(dns_rbtdb_t *rbtdb, unsigned int locknum,
   1632 		   isc_rwlocktype_t *tlocktypep,
   1633 		   size_t purgesize DNS__DB_FLARG) {
   1634 	dns_slabheader_t *header = NULL;
   1635 	size_t purged = 0;
   1636 
   1637 	for (header = ISC_LIST_TAIL(rbtdb->lru[locknum]);
   1638 	     header != NULL && header->last_used <= rbtdb->last_used &&
   1639 	     purged <= purgesize;
   1640 	     header = ISC_LIST_TAIL(rbtdb->lru[locknum]))
   1641 	{
   1642 		size_t header_size = rdataset_size(header);
   1643 
   1644 		/*
   1645 		 * Unlink the entry at this point to avoid checking it
   1646 		 * again even if it's currently used someone else and
   1647 		 * cannot be purged at this moment.  This entry won't be
   1648 		 * referenced any more (so unlinking is safe) since the
   1649 		 * TTL will be reset to 0.
   1650 		 */
   1651 		ISC_LIST_UNLINK(rbtdb->lru[locknum], header, link);
   1652 		dns__cacherbt_expireheader(header, tlocktypep,
   1653 					   dns_expire_lru DNS__DB_FLARG_PASS);
   1654 		purged += header_size;
   1655 	}
   1656 
   1657 	return purged;
   1658 }
   1659 
   1660 /*%
   1661  * Purge some expired and/or stale (i.e. unused for some period) cache entries
   1662  * due to an overmem condition.  To recover from this condition quickly,
   1663  * we clean up entries up to the size of newly added rdata that triggered
   1664  * the overmem; this is accessible via newheader.
   1665  *
   1666  * The LRU lists tails are processed in LRU order to the nearest second.
   1667  *
   1668  * A write lock on the tree must be held.
   1669  */
   1670 void
   1671 dns__cacherbt_overmem(dns_rbtdb_t *rbtdb, dns_slabheader_t *newheader,
   1672 		      isc_rwlocktype_t *tlocktypep DNS__DB_FLARG) {
   1673 	uint32_t locknum_start = rbtdb->lru_sweep++ % rbtdb->node_lock_count;
   1674 	uint32_t locknum = locknum_start;
   1675 	/* Size of added data, possible node and possible ENT node. */
   1676 	size_t purgesize =
   1677 		rdataset_size(newheader) +
   1678 		2 * dns__rbtnode_getsize(RBTDB_HEADERNODE(newheader));
   1679 	size_t purged = 0;
   1680 	isc_stdtime_t min_last_used = 0;
   1681 	size_t max_passes = 8;
   1682 
   1683 again:
   1684 	do {
   1685 		isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   1686 		NODE_WRLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype);
   1687 
   1688 		purged += expire_lru_headers(rbtdb, locknum, tlocktypep,
   1689 					     purgesize -
   1690 						     purged DNS__DB_FLARG_PASS);
   1691 
   1692 		/*
   1693 		 * Work out the oldest remaining last_used values of the list
   1694 		 * tails as we walk across the array of lru lists.
   1695 		 */
   1696 		dns_slabheader_t *header = ISC_LIST_TAIL(rbtdb->lru[locknum]);
   1697 		if (header != NULL &&
   1698 		    (min_last_used == 0 || header->last_used < min_last_used))
   1699 		{
   1700 			min_last_used = header->last_used;
   1701 		}
   1702 		NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, &nlocktype);
   1703 		locknum = (locknum + 1) % rbtdb->node_lock_count;
   1704 	} while (locknum != locknum_start && purged <= purgesize);
   1705 
   1706 	/*
   1707 	 * Update rbtdb->last_used if we have walked all the list tails and have
   1708 	 * not freed the required amount of memory.
   1709 	 */
   1710 	if (purged < purgesize) {
   1711 		if (min_last_used != 0) {
   1712 			rbtdb->last_used = min_last_used;
   1713 			if (max_passes-- > 0) {
   1714 				goto again;
   1715 			}
   1716 		}
   1717 	}
   1718 }
   1719