Home | History | Annotate | Line # | Download | only in dns
rbtdb.c revision 1.1
      1  1.1  christos /*	$NetBSD: rbtdb.c,v 1.1 2024/02/18 20:57:33 christos Exp $	*/
      2  1.1  christos 
      3  1.1  christos /*
      4  1.1  christos  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
      5  1.1  christos  *
      6  1.1  christos  * SPDX-License-Identifier: MPL-2.0
      7  1.1  christos  *
      8  1.1  christos  * This Source Code Form is subject to the terms of the Mozilla Public
      9  1.1  christos  * License, v. 2.0. If a copy of the MPL was not distributed with this
     10  1.1  christos  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
     11  1.1  christos  *
     12  1.1  christos  * See the COPYRIGHT file distributed with this work for additional
     13  1.1  christos  * information regarding copyright ownership.
     14  1.1  christos  */
     15  1.1  christos 
     16  1.1  christos /*! \file */
     17  1.1  christos 
     18  1.1  christos #include <ctype.h>
     19  1.1  christos #include <inttypes.h>
     20  1.1  christos #include <stdbool.h>
     21  1.1  christos 
     22  1.1  christos #include <isc/atomic.h>
     23  1.1  christos #include <isc/crc64.h>
     24  1.1  christos #include <isc/event.h>
     25  1.1  christos #include <isc/file.h>
     26  1.1  christos #include <isc/hash.h>
     27  1.1  christos #include <isc/heap.h>
     28  1.1  christos #include <isc/hex.h>
     29  1.1  christos #include <isc/mem.h>
     30  1.1  christos #include <isc/mutex.h>
     31  1.1  christos #include <isc/once.h>
     32  1.1  christos #include <isc/platform.h>
     33  1.1  christos #include <isc/print.h>
     34  1.1  christos #include <isc/random.h>
     35  1.1  christos #include <isc/refcount.h>
     36  1.1  christos #include <isc/rwlock.h>
     37  1.1  christos #include <isc/serial.h>
     38  1.1  christos #include <isc/socket.h>
     39  1.1  christos #include <isc/stdio.h>
     40  1.1  christos #include <isc/string.h>
     41  1.1  christos #include <isc/task.h>
     42  1.1  christos #include <isc/time.h>
     43  1.1  christos #include <isc/util.h>
     44  1.1  christos 
     45  1.1  christos #include <dns/callbacks.h>
     46  1.1  christos #include <dns/db.h>
     47  1.1  christos #include <dns/dbiterator.h>
     48  1.1  christos #include <dns/events.h>
     49  1.1  christos #include <dns/fixedname.h>
     50  1.1  christos #include <dns/lib.h>
     51  1.1  christos #include <dns/log.h>
     52  1.1  christos #include <dns/masterdump.h>
     53  1.1  christos #include <dns/nsec.h>
     54  1.1  christos #include <dns/nsec3.h>
     55  1.1  christos #include <dns/rbt.h>
     56  1.1  christos #include <dns/rdata.h>
     57  1.1  christos #include <dns/rdataset.h>
     58  1.1  christos #include <dns/rdatasetiter.h>
     59  1.1  christos #include <dns/rdataslab.h>
     60  1.1  christos #include <dns/rdatastruct.h>
     61  1.1  christos #include <dns/result.h>
     62  1.1  christos #include <dns/stats.h>
     63  1.1  christos #include <dns/time.h>
     64  1.1  christos #include <dns/version.h>
     65  1.1  christos #include <dns/view.h>
     66  1.1  christos #include <dns/zone.h>
     67  1.1  christos #include <dns/zonekey.h>
     68  1.1  christos 
     69  1.1  christos #ifndef WIN32
     70  1.1  christos #include <sys/mman.h>
     71  1.1  christos #else /* ifndef WIN32 */
     72  1.1  christos #define PROT_READ   0x01
     73  1.1  christos #define PROT_WRITE  0x02
     74  1.1  christos #define MAP_PRIVATE 0x0002
     75  1.1  christos #define MAP_FAILED  ((void *)-1)
     76  1.1  christos #endif /* ifndef WIN32 */
     77  1.1  christos 
     78  1.1  christos #include "rbtdb.h"
     79  1.1  christos 
     80  1.1  christos #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
     81  1.1  christos 
     82  1.1  christos #define CHECK(op)                            \
     83  1.1  christos 	do {                                 \
     84  1.1  christos 		result = (op);               \
     85  1.1  christos 		if (result != ISC_R_SUCCESS) \
     86  1.1  christos 			goto failure;        \
     87  1.1  christos 	} while (0)
     88  1.1  christos 
     89  1.1  christos /*
     90  1.1  christos  * This is the map file header for RBTDB images.  It is populated, and then
     91  1.1  christos  * written, as the LAST thing done to the file.  Writing this last (with
     92  1.1  christos  * zeros in the header area initially) will ensure that the header is only
     93  1.1  christos  * valid when the RBTDB image is also valid.
     94  1.1  christos  */
     95  1.1  christos typedef struct rbtdb_file_header rbtdb_file_header_t;
     96  1.1  christos 
     97  1.1  christos /* Header length, always the same size regardless of structure size */
     98  1.1  christos #define RBTDB_HEADER_LENGTH 1024
     99  1.1  christos 
    100  1.1  christos struct rbtdb_file_header {
    101  1.1  christos 	char version1[32];
    102  1.1  christos 	uint32_t ptrsize;
    103  1.1  christos 	unsigned int bigendian : 1;
    104  1.1  christos 	uint64_t tree;
    105  1.1  christos 	uint64_t nsec;
    106  1.1  christos 	uint64_t nsec3;
    107  1.1  christos 
    108  1.1  christos 	char version2[32]; /* repeated; must match version1 */
    109  1.1  christos };
    110  1.1  christos 
    111  1.1  christos /*%
    112  1.1  christos  * Note that "impmagic" is not the first four bytes of the struct, so
    113  1.1  christos  * ISC_MAGIC_VALID cannot be used.
    114  1.1  christos  */
    115  1.1  christos #define VALID_RBTDB(rbtdb) \
    116  1.1  christos 	((rbtdb) != NULL && (rbtdb)->common.impmagic == RBTDB_MAGIC)
    117  1.1  christos 
    118  1.1  christos typedef uint32_t rbtdb_serial_t;
    119  1.1  christos typedef uint32_t rbtdb_rdatatype_t;
    120  1.1  christos 
    121  1.1  christos #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type)&0xFFFF))
    122  1.1  christos #define RBTDB_RDATATYPE_EXT(type)  ((dns_rdatatype_t)((type) >> 16))
    123  1.1  christos #define RBTDB_RDATATYPE_VALUE(base, ext)              \
    124  1.1  christos 	((rbtdb_rdatatype_t)(((uint32_t)ext) << 16) | \
    125  1.1  christos 	 (((uint32_t)base) & 0xffff))
    126  1.1  christos 
    127  1.1  christos #define RBTDB_RDATATYPE_SIGNSEC \
    128  1.1  christos 	RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
    129  1.1  christos #define RBTDB_RDATATYPE_SIGNSEC3 \
    130  1.1  christos 	RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
    131  1.1  christos #define RBTDB_RDATATYPE_SIGNS \
    132  1.1  christos 	RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
    133  1.1  christos #define RBTDB_RDATATYPE_SIGCNAME \
    134  1.1  christos 	RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
    135  1.1  christos #define RBTDB_RDATATYPE_SIGDNAME \
    136  1.1  christos 	RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
    137  1.1  christos #define RBTDB_RDATATYPE_SIGDS \
    138  1.1  christos 	RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ds)
    139  1.1  christos #define RBTDB_RDATATYPE_SIGSOA \
    140  1.1  christos 	RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_soa)
    141  1.1  christos #define RBTDB_RDATATYPE_NCACHEANY RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
    142  1.1  christos 
    143  1.1  christos #define RBTDB_INITLOCK(l)    isc_rwlock_init((l), 0, 0)
    144  1.1  christos #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
    145  1.1  christos #define RBTDB_LOCK(l, t)     RWLOCK((l), (t))
    146  1.1  christos #define RBTDB_UNLOCK(l, t)   RWUNLOCK((l), (t))
    147  1.1  christos 
    148  1.1  christos /*
    149  1.1  christos  * Since node locking is sensitive to both performance and memory footprint,
    150  1.1  christos  * we need some trick here.  If we have both high-performance rwlock and
    151  1.1  christos  * high performance and small-memory reference counters, we use rwlock for
    152  1.1  christos  * node lock and isc_refcount for node references.  In this case, we don't have
    153  1.1  christos  * to protect the access to the counters by locks.
    154  1.1  christos  * Otherwise, we simply use ordinary mutex lock for node locking, and use
    155  1.1  christos  * simple integers as reference counters which is protected by the lock.
    156  1.1  christos  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
    157  1.1  christos  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
    158  1.1  christos  * counters first and then protect other parts of a node as read-only data.
    159  1.1  christos  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
    160  1.1  christos  * provided for these special cases.  When we can use the efficient backend
    161  1.1  christos  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
    162  1.1  christos  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
    163  1.1  christos  * section including the access to the reference counter.
    164  1.1  christos  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
    165  1.1  christos  * section is also protected by NODE_STRONGLOCK().
    166  1.1  christos  */
    167  1.1  christos typedef isc_rwlock_t nodelock_t;
    168  1.1  christos 
    169  1.1  christos #define NODE_INITLOCK(l)    isc_rwlock_init((l), 0, 0)
    170  1.1  christos #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
    171  1.1  christos #define NODE_LOCK(l, t)	    RWLOCK((l), (t))
    172  1.1  christos #define NODE_UNLOCK(l, t)   RWUNLOCK((l), (t))
    173  1.1  christos #define NODE_TRYUPGRADE(l)  isc_rwlock_tryupgrade(l)
    174  1.1  christos #define NODE_DOWNGRADE(l)   isc_rwlock_downgrade(l)
    175  1.1  christos 
    176  1.1  christos /*%
    177  1.1  christos  * Whether to rate-limit updating the LRU to avoid possible thread contention.
    178  1.1  christos  * Updating LRU requires write locking, so we don't do it every time the
    179  1.1  christos  * record is touched - only after some time passes.
    180  1.1  christos  */
    181  1.1  christos #ifndef DNS_RBTDB_LIMITLRUUPDATE
    182  1.1  christos #define DNS_RBTDB_LIMITLRUUPDATE 1
    183  1.1  christos #endif
    184  1.1  christos 
    185  1.1  christos /*% Time after which we update LRU for glue records, 5 minutes */
    186  1.1  christos #define DNS_RBTDB_LRUUPDATE_GLUE 300
    187  1.1  christos /*% Time after which we update LRU for all other records, 10 minutes */
    188  1.1  christos #define DNS_RBTDB_LRUUPDATE_REGULAR 600
    189  1.1  christos 
    190  1.1  christos /*
    191  1.1  christos  * Allow clients with a virtual time of up to 5 minutes in the past to see
    192  1.1  christos  * records that would have otherwise have expired.
    193  1.1  christos  */
    194  1.1  christos #define RBTDB_VIRTUAL 300
    195  1.1  christos 
    196  1.1  christos struct noqname {
    197  1.1  christos 	dns_name_t name;
    198  1.1  christos 	void *neg;
    199  1.1  christos 	void *negsig;
    200  1.1  christos 	dns_rdatatype_t type;
    201  1.1  christos };
    202  1.1  christos 
    203  1.1  christos typedef struct rdatasetheader {
    204  1.1  christos 	/*%
    205  1.1  christos 	 * Locked by the owning node's lock.
    206  1.1  christos 	 */
    207  1.1  christos 	rbtdb_serial_t serial;
    208  1.1  christos 	dns_ttl_t rdh_ttl;
    209  1.1  christos 	rbtdb_rdatatype_t type;
    210  1.1  christos 	atomic_uint_least16_t attributes;
    211  1.1  christos 	dns_trust_t trust;
    212  1.1  christos 	atomic_uint_fast32_t last_refresh_fail_ts;
    213  1.1  christos 	struct noqname *noqname;
    214  1.1  christos 	struct noqname *closest;
    215  1.1  christos 	unsigned int is_mmapped	      : 1;
    216  1.1  christos 	unsigned int next_is_relative : 1;
    217  1.1  christos 	unsigned int node_is_relative : 1;
    218  1.1  christos 	unsigned int resign_lsb	      : 1;
    219  1.1  christos 	/*%<
    220  1.1  christos 	 * We don't use the LIST macros, because the LIST structure has
    221  1.1  christos 	 * both head and tail pointers, and is doubly linked.
    222  1.1  christos 	 */
    223  1.1  christos 
    224  1.1  christos 	struct rdatasetheader *next;
    225  1.1  christos 	/*%<
    226  1.1  christos 	 * If this is the top header for an rdataset, 'next' points
    227  1.1  christos 	 * to the top header for the next rdataset (i.e., the next type).
    228  1.1  christos 	 * Otherwise, it points up to the header whose down pointer points
    229  1.1  christos 	 * at this header.
    230  1.1  christos 	 */
    231  1.1  christos 
    232  1.1  christos 	struct rdatasetheader *down;
    233  1.1  christos 	/*%<
    234  1.1  christos 	 * Points to the header for the next older version of
    235  1.1  christos 	 * this rdataset.
    236  1.1  christos 	 */
    237  1.1  christos 
    238  1.1  christos 	atomic_uint_fast32_t count;
    239  1.1  christos 	/*%<
    240  1.1  christos 	 * Monotonously increased every time this rdataset is bound so that
    241  1.1  christos 	 * it is used as the base of the starting point in DNS responses
    242  1.1  christos 	 * when the "cyclic" rrset-order is required.
    243  1.1  christos 	 */
    244  1.1  christos 
    245  1.1  christos 	dns_rbtnode_t *node;
    246  1.1  christos 	isc_stdtime_t last_used;
    247  1.1  christos 	ISC_LINK(struct rdatasetheader) link;
    248  1.1  christos 
    249  1.1  christos 	unsigned int heap_index;
    250  1.1  christos 	/*%<
    251  1.1  christos 	 * Used for TTL-based cache cleaning.
    252  1.1  christos 	 */
    253  1.1  christos 	isc_stdtime_t resign;
    254  1.1  christos 	/*%<
    255  1.1  christos 	 * Case vector.  If the bit is set then the corresponding
    256  1.1  christos 	 * character in the owner name needs to be AND'd with 0x20,
    257  1.1  christos 	 * rendering that character upper case.
    258  1.1  christos 	 */
    259  1.1  christos 	unsigned char upper[32];
    260  1.1  christos } rdatasetheader_t;
    261  1.1  christos 
    262  1.1  christos typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
    263  1.1  christos typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
    264  1.1  christos 
    265  1.1  christos #define RDATASET_ATTR_NONEXISTENT 0x0001
    266  1.1  christos /*%< May be potentially served as stale data. */
    267  1.1  christos #define RDATASET_ATTR_STALE	     0x0002
    268  1.1  christos #define RDATASET_ATTR_IGNORE	     0x0004
    269  1.1  christos #define RDATASET_ATTR_RETAIN	     0x0008
    270  1.1  christos #define RDATASET_ATTR_NXDOMAIN	     0x0010
    271  1.1  christos #define RDATASET_ATTR_RESIGN	     0x0020
    272  1.1  christos #define RDATASET_ATTR_STATCOUNT	     0x0040
    273  1.1  christos #define RDATASET_ATTR_OPTOUT	     0x0080
    274  1.1  christos #define RDATASET_ATTR_NEGATIVE	     0x0100
    275  1.1  christos #define RDATASET_ATTR_PREFETCH	     0x0200
    276  1.1  christos #define RDATASET_ATTR_CASESET	     0x0400
    277  1.1  christos #define RDATASET_ATTR_ZEROTTL	     0x0800
    278  1.1  christos #define RDATASET_ATTR_CASEFULLYLOWER 0x1000
    279  1.1  christos /*%< Ancient - awaiting cleanup. */
    280  1.1  christos #define RDATASET_ATTR_ANCIENT	   0x2000
    281  1.1  christos #define RDATASET_ATTR_STALE_WINDOW 0x4000
    282  1.1  christos 
    283  1.1  christos /*
    284  1.1  christos  * XXX
    285  1.1  christos  * When the cache will pre-expire data (due to memory low or other
    286  1.1  christos  * situations) before the rdataset's TTL has expired, it MUST
    287  1.1  christos  * respect the RETAIN bit and not expire the data until its TTL is
    288  1.1  christos  * expired.
    289  1.1  christos  */
    290  1.1  christos 
    291  1.1  christos #undef IGNORE /* WIN32 winbase.h defines this. */
    292  1.1  christos 
    293  1.1  christos #define EXISTS(header)                                 \
    294  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
    295  1.1  christos 	  RDATASET_ATTR_NONEXISTENT) == 0)
    296  1.1  christos #define NONEXISTENT(header)                            \
    297  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
    298  1.1  christos 	  RDATASET_ATTR_NONEXISTENT) != 0)
    299  1.1  christos #define IGNORE(header)                                 \
    300  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
    301  1.1  christos 	  RDATASET_ATTR_IGNORE) != 0)
    302  1.1  christos #define RETAIN(header)                                 \
    303  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
    304  1.1  christos 	  RDATASET_ATTR_RETAIN) != 0)
    305  1.1  christos #define NXDOMAIN(header)                               \
    306  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
    307  1.1  christos 	  RDATASET_ATTR_NXDOMAIN) != 0)
    308  1.1  christos #define STALE(header)                                                          \
    309  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & RDATASET_ATTR_STALE) != \
    310  1.1  christos 	 0)
    311  1.1  christos #define STALE_WINDOW(header)                           \
    312  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
    313  1.1  christos 	  RDATASET_ATTR_STALE_WINDOW) != 0)
    314  1.1  christos #define RESIGN(header)                                 \
    315  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
    316  1.1  christos 	  RDATASET_ATTR_RESIGN) != 0)
    317  1.1  christos #define OPTOUT(header)                                 \
    318  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
    319  1.1  christos 	  RDATASET_ATTR_OPTOUT) != 0)
    320  1.1  christos #define NEGATIVE(header)                               \
    321  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
    322  1.1  christos 	  RDATASET_ATTR_NEGATIVE) != 0)
    323  1.1  christos #define PREFETCH(header)                               \
    324  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
    325  1.1  christos 	  RDATASET_ATTR_PREFETCH) != 0)
    326  1.1  christos #define CASESET(header)                                \
    327  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
    328  1.1  christos 	  RDATASET_ATTR_CASESET) != 0)
    329  1.1  christos #define ZEROTTL(header)                                \
    330  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
    331  1.1  christos 	  RDATASET_ATTR_ZEROTTL) != 0)
    332  1.1  christos #define CASEFULLYLOWER(header)                         \
    333  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
    334  1.1  christos 	  RDATASET_ATTR_CASEFULLYLOWER) != 0)
    335  1.1  christos #define ANCIENT(header)                                \
    336  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
    337  1.1  christos 	  RDATASET_ATTR_ANCIENT) != 0)
    338  1.1  christos #define STATCOUNT(header)                              \
    339  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
    340  1.1  christos 	  RDATASET_ATTR_STATCOUNT) != 0)
    341  1.1  christos 
    342  1.1  christos #define RDATASET_ATTR_GET(header, attribute) \
    343  1.1  christos 	(atomic_load_acquire(&(header)->attributes) & attribute)
    344  1.1  christos #define RDATASET_ATTR_SET(header, attribute) \
    345  1.1  christos 	atomic_fetch_or_release(&(header)->attributes, attribute)
    346  1.1  christos #define RDATASET_ATTR_CLR(header, attribute) \
    347  1.1  christos 	atomic_fetch_and_release(&(header)->attributes, ~(attribute))
    348  1.1  christos 
    349  1.1  christos #define ACTIVE(header, now)             \
    350  1.1  christos 	(((header)->rdh_ttl > (now)) || \
    351  1.1  christos 	 ((header)->rdh_ttl == (now) && ZEROTTL(header)))
    352  1.1  christos 
    353  1.1  christos #define DEFAULT_NODE_LOCK_COUNT	    7 /*%< Should be prime. */
    354  1.1  christos #define RBTDB_GLUE_TABLE_INIT_BITS  2U
    355  1.1  christos #define RBTDB_GLUE_TABLE_MAX_BITS   32U
    356  1.1  christos #define RBTDB_GLUE_TABLE_OVERCOMMIT 3
    357  1.1  christos 
    358  1.1  christos #define GOLDEN_RATIO_32 0x61C88647
    359  1.1  christos #define HASHSIZE(bits)	(UINT64_C(1) << (bits))
    360  1.1  christos 
    361  1.1  christos static uint32_t
    362  1.1  christos hash_32(uint32_t val, unsigned int bits) {
    363  1.1  christos 	REQUIRE(bits <= RBTDB_GLUE_TABLE_MAX_BITS);
    364  1.1  christos 	/* High bits are more random. */
    365  1.1  christos 	return (val * GOLDEN_RATIO_32 >> (32 - bits));
    366  1.1  christos }
    367  1.1  christos 
    368  1.1  christos #define EXPIREDOK(rbtiterator) \
    369  1.1  christos 	(((rbtiterator)->common.options & DNS_DB_EXPIREDOK) != 0)
    370  1.1  christos 
    371  1.1  christos #define STALEOK(rbtiterator) \
    372  1.1  christos 	(((rbtiterator)->common.options & DNS_DB_STALEOK) != 0)
    373  1.1  christos 
    374  1.1  christos /*%
    375  1.1  christos  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
    376  1.1  christos  * There is a tradeoff issue about configuring this value: if this is too
    377  1.1  christos  * small, it may cause heavier contention between threads; if this is too large,
    378  1.1  christos  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
    379  1.1  christos  * The default value should work well for most environments, but this can
    380  1.1  christos  * also be configurable at compilation time via the
    381  1.1  christos  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
    382  1.1  christos  * 1 due to the assumption of overmem_purge().
    383  1.1  christos  */
    384  1.1  christos #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
    385  1.1  christos #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
    386  1.1  christos #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
    387  1.1  christos #else /* if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 */
    388  1.1  christos #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
    389  1.1  christos #endif /* if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 */
    390  1.1  christos #else  /* ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
    391  1.1  christos #define DEFAULT_CACHE_NODE_LOCK_COUNT 17
    392  1.1  christos #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
    393  1.1  christos 
    394  1.1  christos typedef struct {
    395  1.1  christos 	nodelock_t lock;
    396  1.1  christos 	/* Protected in the refcount routines. */
    397  1.1  christos 	isc_refcount_t references;
    398  1.1  christos 	/* Locked by lock. */
    399  1.1  christos 	bool exiting;
    400  1.1  christos } rbtdb_nodelock_t;
    401  1.1  christos 
    402  1.1  christos typedef struct rbtdb_changed {
    403  1.1  christos 	dns_rbtnode_t *node;
    404  1.1  christos 	bool dirty;
    405  1.1  christos 	ISC_LINK(struct rbtdb_changed) link;
    406  1.1  christos } rbtdb_changed_t;
    407  1.1  christos 
    408  1.1  christos typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
    409  1.1  christos 
    410  1.1  christos typedef enum { dns_db_insecure, dns_db_partial, dns_db_secure } dns_db_secure_t;
    411  1.1  christos 
    412  1.1  christos typedef struct dns_rbtdb dns_rbtdb_t;
    413  1.1  christos 
    414  1.1  christos /* Reason for expiring a record from cache */
    415  1.1  christos typedef enum { expire_lru, expire_ttl, expire_flush } expire_t;
    416  1.1  christos 
    417  1.1  christos typedef struct rbtdb_glue rbtdb_glue_t;
    418  1.1  christos 
    419  1.1  christos typedef struct rbtdb_glue_table_node {
    420  1.1  christos 	struct rbtdb_glue_table_node *next;
    421  1.1  christos 	dns_rbtnode_t *node;
    422  1.1  christos 	rbtdb_glue_t *glue_list;
    423  1.1  christos } rbtdb_glue_table_node_t;
    424  1.1  christos 
    425  1.1  christos typedef enum {
    426  1.1  christos 	rdataset_ttl_fresh,
    427  1.1  christos 	rdataset_ttl_stale,
    428  1.1  christos 	rdataset_ttl_ancient
    429  1.1  christos } rdataset_ttl_t;
    430  1.1  christos 
    431  1.1  christos typedef struct rbtdb_version {
    432  1.1  christos 	/* Not locked */
    433  1.1  christos 	rbtdb_serial_t serial;
    434  1.1  christos 	dns_rbtdb_t *rbtdb;
    435  1.1  christos 	/*
    436  1.1  christos 	 * Protected in the refcount routines.
    437  1.1  christos 	 * XXXJT: should we change the lock policy based on the refcount
    438  1.1  christos 	 * performance?
    439  1.1  christos 	 */
    440  1.1  christos 	isc_refcount_t references;
    441  1.1  christos 	/* Locked by database lock. */
    442  1.1  christos 	bool writer;
    443  1.1  christos 	bool commit_ok;
    444  1.1  christos 	rbtdb_changedlist_t changed_list;
    445  1.1  christos 	rdatasetheaderlist_t resigned_list;
    446  1.1  christos 	ISC_LINK(struct rbtdb_version) link;
    447  1.1  christos 	dns_db_secure_t secure;
    448  1.1  christos 	bool havensec3;
    449  1.1  christos 	/* NSEC3 parameters */
    450  1.1  christos 	dns_hash_t hash;
    451  1.1  christos 	uint8_t flags;
    452  1.1  christos 	uint16_t iterations;
    453  1.1  christos 	uint8_t salt_length;
    454  1.1  christos 	unsigned char salt[DNS_NSEC3_SALTSIZE];
    455  1.1  christos 
    456  1.1  christos 	/*
    457  1.1  christos 	 * records and xfrsize are covered by rwlock.
    458  1.1  christos 	 */
    459  1.1  christos 	isc_rwlock_t rwlock;
    460  1.1  christos 	uint64_t records;
    461  1.1  christos 	uint64_t xfrsize;
    462  1.1  christos 
    463  1.1  christos 	isc_rwlock_t glue_rwlock;
    464  1.1  christos 	size_t glue_table_bits;
    465  1.1  christos 	size_t glue_table_nodecount;
    466  1.1  christos 	rbtdb_glue_table_node_t **glue_table;
    467  1.1  christos } rbtdb_version_t;
    468  1.1  christos 
    469  1.1  christos typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
    470  1.1  christos 
    471  1.1  christos struct dns_rbtdb {
    472  1.1  christos 	/* Unlocked. */
    473  1.1  christos 	dns_db_t common;
    474  1.1  christos 	/* Locks the data in this struct */
    475  1.1  christos 	isc_rwlock_t lock;
    476  1.1  christos 	/* Locks the tree structure (prevents nodes appearing/disappearing) */
    477  1.1  christos 	isc_rwlock_t tree_lock;
    478  1.1  christos 	/* Locks for individual tree nodes */
    479  1.1  christos 	unsigned int node_lock_count;
    480  1.1  christos 	rbtdb_nodelock_t *node_locks;
    481  1.1  christos 	dns_rbtnode_t *origin_node;
    482  1.1  christos 	dns_rbtnode_t *nsec3_origin_node;
    483  1.1  christos 	dns_stats_t *rrsetstats;     /* cache DB only */
    484  1.1  christos 	isc_stats_t *cachestats;     /* cache DB only */
    485  1.1  christos 	isc_stats_t *gluecachestats; /* zone DB only */
    486  1.1  christos 	/* Locked by lock. */
    487  1.1  christos 	unsigned int active;
    488  1.1  christos 	isc_refcount_t references;
    489  1.1  christos 	unsigned int attributes;
    490  1.1  christos 	rbtdb_serial_t current_serial;
    491  1.1  christos 	rbtdb_serial_t least_serial;
    492  1.1  christos 	rbtdb_serial_t next_serial;
    493  1.1  christos 	rbtdb_version_t *current_version;
    494  1.1  christos 	rbtdb_version_t *future_version;
    495  1.1  christos 	rbtdb_versionlist_t open_versions;
    496  1.1  christos 	isc_task_t *task;
    497  1.1  christos 	dns_dbnode_t *soanode;
    498  1.1  christos 	dns_dbnode_t *nsnode;
    499  1.1  christos 
    500  1.1  christos 	/*
    501  1.1  christos 	 * Maximum length of time to keep using a stale answer past its
    502  1.1  christos 	 * normal TTL expiry.
    503  1.1  christos 	 */
    504  1.1  christos 	dns_ttl_t serve_stale_ttl;
    505  1.1  christos 
    506  1.1  christos 	/*
    507  1.1  christos 	 * The time after a failed lookup, where stale answers from cache
    508  1.1  christos 	 * may be used directly in a DNS response without attempting a
    509  1.1  christos 	 * new iterative lookup.
    510  1.1  christos 	 */
    511  1.1  christos 	uint32_t serve_stale_refresh;
    512  1.1  christos 
    513  1.1  christos 	/*
    514  1.1  christos 	 * This is a linked list used to implement the LRU cache.  There will
    515  1.1  christos 	 * be node_lock_count linked lists here.  Nodes in bucket 1 will be
    516  1.1  christos 	 * placed on the linked list rdatasets[1].
    517  1.1  christos 	 */
    518  1.1  christos 	rdatasetheaderlist_t *rdatasets;
    519  1.1  christos 
    520  1.1  christos 	/*%
    521  1.1  christos 	 * Temporary storage for stale cache nodes and dynamically deleted
    522  1.1  christos 	 * nodes that await being cleaned up.
    523  1.1  christos 	 */
    524  1.1  christos 	rbtnodelist_t *deadnodes;
    525  1.1  christos 
    526  1.1  christos 	/* List of nodes from which recursive tree pruning can be started from.
    527  1.1  christos 	 * Locked by tree_lock. */
    528  1.1  christos 	rbtnodelist_t prunenodes;
    529  1.1  christos 
    530  1.1  christos 	/*
    531  1.1  christos 	 * Heaps.  These are used for TTL based expiry in a cache,
    532  1.1  christos 	 * or for zone resigning in a zone DB.  hmctx is the memory
    533  1.1  christos 	 * context to use for the heap (which differs from the main
    534  1.1  christos 	 * database memory context in the case of a cache).
    535  1.1  christos 	 */
    536  1.1  christos 	isc_mem_t *hmctx;
    537  1.1  christos 	isc_heap_t **heaps;
    538  1.1  christos 
    539  1.1  christos 	/*
    540  1.1  christos 	 * Base values for the mmap() code.
    541  1.1  christos 	 */
    542  1.1  christos 	void *mmap_location;
    543  1.1  christos 	size_t mmap_size;
    544  1.1  christos 
    545  1.1  christos 	/* Locked by tree_lock. */
    546  1.1  christos 	dns_rbt_t *tree;
    547  1.1  christos 	dns_rbt_t *nsec;
    548  1.1  christos 	dns_rbt_t *nsec3;
    549  1.1  christos 
    550  1.1  christos 	/* Unlocked */
    551  1.1  christos 	unsigned int quantum;
    552  1.1  christos };
    553  1.1  christos 
    554  1.1  christos #define RBTDB_ATTR_LOADED  0x01
    555  1.1  christos #define RBTDB_ATTR_LOADING 0x02
    556  1.1  christos 
    557  1.1  christos #define KEEPSTALE(rbtdb) ((rbtdb)->serve_stale_ttl > 0)
    558  1.1  christos 
    559  1.1  christos /*%
    560  1.1  christos  * Search Context
    561  1.1  christos  */
    562  1.1  christos typedef struct {
    563  1.1  christos 	dns_rbtdb_t *rbtdb;
    564  1.1  christos 	rbtdb_version_t *rbtversion;
    565  1.1  christos 	rbtdb_serial_t serial;
    566  1.1  christos 	unsigned int options;
    567  1.1  christos 	dns_rbtnodechain_t chain;
    568  1.1  christos 	bool copy_name;
    569  1.1  christos 	bool need_cleanup;
    570  1.1  christos 	bool wild;
    571  1.1  christos 	dns_rbtnode_t *zonecut;
    572  1.1  christos 	rdatasetheader_t *zonecut_rdataset;
    573  1.1  christos 	rdatasetheader_t *zonecut_sigrdataset;
    574  1.1  christos 	dns_fixedname_t zonecut_name;
    575  1.1  christos 	isc_stdtime_t now;
    576  1.1  christos } rbtdb_search_t;
    577  1.1  christos 
    578  1.1  christos /*%
    579  1.1  christos  * Load Context
    580  1.1  christos  */
    581  1.1  christos typedef struct {
    582  1.1  christos 	dns_rbtdb_t *rbtdb;
    583  1.1  christos 	isc_stdtime_t now;
    584  1.1  christos } rbtdb_load_t;
    585  1.1  christos 
    586  1.1  christos static void
    587  1.1  christos delete_callback(void *data, void *arg);
    588  1.1  christos static void
    589  1.1  christos rdataset_disassociate(dns_rdataset_t *rdataset);
    590  1.1  christos static isc_result_t
    591  1.1  christos rdataset_first(dns_rdataset_t *rdataset);
    592  1.1  christos static isc_result_t
    593  1.1  christos rdataset_next(dns_rdataset_t *rdataset);
    594  1.1  christos static void
    595  1.1  christos rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
    596  1.1  christos static void
    597  1.1  christos rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
    598  1.1  christos static unsigned int
    599  1.1  christos rdataset_count(dns_rdataset_t *rdataset);
    600  1.1  christos static isc_result_t
    601  1.1  christos rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
    602  1.1  christos 		    dns_rdataset_t *neg, dns_rdataset_t *negsig);
    603  1.1  christos static isc_result_t
    604  1.1  christos rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
    605  1.1  christos 		    dns_rdataset_t *neg, dns_rdataset_t *negsig);
    606  1.1  christos static bool
    607  1.1  christos need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now);
    608  1.1  christos static void
    609  1.1  christos update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, isc_stdtime_t now);
    610  1.1  christos static void
    611  1.1  christos expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, bool tree_locked,
    612  1.1  christos 	      expire_t reason);
    613  1.1  christos static void
    614  1.1  christos overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, size_t purgesize,
    615  1.1  christos 	      bool tree_locked);
    616  1.1  christos static void
    617  1.1  christos resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader);
    618  1.1  christos static void
    619  1.1  christos resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
    620  1.1  christos 	      rdatasetheader_t *header);
    621  1.1  christos static void
    622  1.1  christos prune_tree(isc_task_t *task, isc_event_t *event);
    623  1.1  christos static void
    624  1.1  christos rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
    625  1.1  christos static void
    626  1.1  christos rdataset_expire(dns_rdataset_t *rdataset);
    627  1.1  christos static void
    628  1.1  christos rdataset_clearprefetch(dns_rdataset_t *rdataset);
    629  1.1  christos static void
    630  1.1  christos rdataset_setownercase(dns_rdataset_t *rdataset, const dns_name_t *name);
    631  1.1  christos static void
    632  1.1  christos rdataset_getownercase(const dns_rdataset_t *rdataset, dns_name_t *name);
    633  1.1  christos static isc_result_t
    634  1.1  christos rdataset_addglue(dns_rdataset_t *rdataset, dns_dbversion_t *version,
    635  1.1  christos 		 dns_message_t *msg);
    636  1.1  christos static void
    637  1.1  christos free_gluetable(rbtdb_version_t *version);
    638  1.1  christos static isc_result_t
    639  1.1  christos nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name);
    640  1.1  christos 
    641  1.1  christos static dns_rdatasetmethods_t rdataset_methods = { rdataset_disassociate,
    642  1.1  christos 						  rdataset_first,
    643  1.1  christos 						  rdataset_next,
    644  1.1  christos 						  rdataset_current,
    645  1.1  christos 						  rdataset_clone,
    646  1.1  christos 						  rdataset_count,
    647  1.1  christos 						  NULL, /* addnoqname */
    648  1.1  christos 						  rdataset_getnoqname,
    649  1.1  christos 						  NULL, /* addclosest */
    650  1.1  christos 						  rdataset_getclosest,
    651  1.1  christos 						  rdataset_settrust,
    652  1.1  christos 						  rdataset_expire,
    653  1.1  christos 						  rdataset_clearprefetch,
    654  1.1  christos 						  rdataset_setownercase,
    655  1.1  christos 						  rdataset_getownercase,
    656  1.1  christos 						  rdataset_addglue };
    657  1.1  christos 
    658  1.1  christos static dns_rdatasetmethods_t slab_methods = {
    659  1.1  christos 	rdataset_disassociate,
    660  1.1  christos 	rdataset_first,
    661  1.1  christos 	rdataset_next,
    662  1.1  christos 	rdataset_current,
    663  1.1  christos 	rdataset_clone,
    664  1.1  christos 	rdataset_count,
    665  1.1  christos 	NULL, /* addnoqname */
    666  1.1  christos 	NULL, /* getnoqname */
    667  1.1  christos 	NULL, /* addclosest */
    668  1.1  christos 	NULL, /* getclosest */
    669  1.1  christos 	NULL, /* settrust */
    670  1.1  christos 	NULL, /* expire */
    671  1.1  christos 	NULL, /* clearprefetch */
    672  1.1  christos 	NULL, /* setownercase */
    673  1.1  christos 	NULL, /* getownercase */
    674  1.1  christos 	NULL  /* addglue */
    675  1.1  christos };
    676  1.1  christos 
    677  1.1  christos static void
    678  1.1  christos rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
    679  1.1  christos static isc_result_t
    680  1.1  christos rdatasetiter_first(dns_rdatasetiter_t *iterator);
    681  1.1  christos static isc_result_t
    682  1.1  christos rdatasetiter_next(dns_rdatasetiter_t *iterator);
    683  1.1  christos static void
    684  1.1  christos rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset);
    685  1.1  christos 
    686  1.1  christos static dns_rdatasetitermethods_t rdatasetiter_methods = {
    687  1.1  christos 	rdatasetiter_destroy, rdatasetiter_first, rdatasetiter_next,
    688  1.1  christos 	rdatasetiter_current
    689  1.1  christos };
    690  1.1  christos 
    691  1.1  christos typedef struct rbtdb_rdatasetiter {
    692  1.1  christos 	dns_rdatasetiter_t common;
    693  1.1  christos 	rdatasetheader_t *current;
    694  1.1  christos } rbtdb_rdatasetiter_t;
    695  1.1  christos 
    696  1.1  christos /*
    697  1.1  christos  * Note that these iterators, unless created with either DNS_DB_NSEC3ONLY or
    698  1.1  christos  * DNS_DB_NONSEC3, will transparently move between the last node of the
    699  1.1  christos  * "regular" RBT ("chain" field) and the root node of the NSEC3 RBT
    700  1.1  christos  * ("nsec3chain" field) of the database in question, as if the latter was a
    701  1.1  christos  * successor to the former in lexical order.  The "current" field always holds
    702  1.1  christos  * the address of either "chain" or "nsec3chain", depending on which RBT is
    703  1.1  christos  * being traversed at given time.
    704  1.1  christos  */
    705  1.1  christos static void
    706  1.1  christos dbiterator_destroy(dns_dbiterator_t **iteratorp);
    707  1.1  christos static isc_result_t
    708  1.1  christos dbiterator_first(dns_dbiterator_t *iterator);
    709  1.1  christos static isc_result_t
    710  1.1  christos dbiterator_last(dns_dbiterator_t *iterator);
    711  1.1  christos static isc_result_t
    712  1.1  christos dbiterator_seek(dns_dbiterator_t *iterator, const dns_name_t *name);
    713  1.1  christos static isc_result_t
    714  1.1  christos dbiterator_prev(dns_dbiterator_t *iterator);
    715  1.1  christos static isc_result_t
    716  1.1  christos dbiterator_next(dns_dbiterator_t *iterator);
    717  1.1  christos static isc_result_t
    718  1.1  christos dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
    719  1.1  christos 		   dns_name_t *name);
    720  1.1  christos static isc_result_t
    721  1.1  christos dbiterator_pause(dns_dbiterator_t *iterator);
    722  1.1  christos static isc_result_t
    723  1.1  christos dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name);
    724  1.1  christos 
    725  1.1  christos static dns_dbiteratormethods_t dbiterator_methods = {
    726  1.1  christos 	dbiterator_destroy, dbiterator_first, dbiterator_last,
    727  1.1  christos 	dbiterator_seek,    dbiterator_prev,  dbiterator_next,
    728  1.1  christos 	dbiterator_current, dbiterator_pause, dbiterator_origin
    729  1.1  christos };
    730  1.1  christos 
    731  1.1  christos #define DELETION_BATCH_MAX 64
    732  1.1  christos 
    733  1.1  christos /*
    734  1.1  christos  * If 'paused' is true, then the tree lock is not being held.
    735  1.1  christos  */
    736  1.1  christos typedef struct rbtdb_dbiterator {
    737  1.1  christos 	dns_dbiterator_t common;
    738  1.1  christos 	bool paused;
    739  1.1  christos 	bool new_origin;
    740  1.1  christos 	isc_rwlocktype_t tree_locked;
    741  1.1  christos 	isc_result_t result;
    742  1.1  christos 	dns_fixedname_t name;
    743  1.1  christos 	dns_fixedname_t origin;
    744  1.1  christos 	dns_rbtnodechain_t chain;
    745  1.1  christos 	dns_rbtnodechain_t nsec3chain;
    746  1.1  christos 	dns_rbtnodechain_t *current;
    747  1.1  christos 	dns_rbtnode_t *node;
    748  1.1  christos 	dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
    749  1.1  christos 	int delcnt;
    750  1.1  christos 	bool nsec3only;
    751  1.1  christos 	bool nonsec3;
    752  1.1  christos } rbtdb_dbiterator_t;
    753  1.1  christos 
    754  1.1  christos #define IS_STUB(rbtdb)	(((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
    755  1.1  christos #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
    756  1.1  christos 
    757  1.1  christos static void
    758  1.1  christos free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event);
    759  1.1  christos static void
    760  1.1  christos overmem(dns_db_t *db, bool over);
    761  1.1  christos static void
    762  1.1  christos setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
    763  1.1  christos static void
    764  1.1  christos setownercase(rdatasetheader_t *header, const dns_name_t *name);
    765  1.1  christos 
    766  1.1  christos static bool
    767  1.1  christos match_header_version(rbtdb_file_header_t *header);
    768  1.1  christos 
    769  1.1  christos /* Pad to 32 bytes */
    770  1.1  christos static char FILE_VERSION[32] = "\0";
    771  1.1  christos 
    772  1.1  christos /*%
    773  1.1  christos  * 'init_count' is used to initialize 'newheader->count' which inturn
    774  1.1  christos  * is used to determine where in the cycle rrset-order cyclic starts.
    775  1.1  christos  * We don't lock this as we don't care about simultaneous updates.
    776  1.1  christos  *
    777  1.1  christos  * Note:
    778  1.1  christos  *      Both init_count and header->count can be UINT32_MAX.
    779  1.1  christos  *      The count on the returned rdataset however can't be as
    780  1.1  christos  *      that indicates that the database does not implement cyclic
    781  1.1  christos  *      processing.
    782  1.1  christos  */
    783  1.1  christos static atomic_uint_fast32_t init_count = 0;
    784  1.1  christos 
    785  1.1  christos /*
    786  1.1  christos  * Locking
    787  1.1  christos  *
    788  1.1  christos  * If a routine is going to lock more than one lock in this module, then
    789  1.1  christos  * the locking must be done in the following order:
    790  1.1  christos  *
    791  1.1  christos  *      Tree Lock
    792  1.1  christos  *
    793  1.1  christos  *      Node Lock       (Only one from the set may be locked at one time by
    794  1.1  christos  *                       any caller)
    795  1.1  christos  *
    796  1.1  christos  *      Database Lock
    797  1.1  christos  *
    798  1.1  christos  * Failure to follow this hierarchy can result in deadlock.
    799  1.1  christos  */
    800  1.1  christos 
    801  1.1  christos /*
    802  1.1  christos  * Deleting Nodes
    803  1.1  christos  *
    804  1.1  christos  * For zone databases the node for the origin of the zone MUST NOT be deleted.
    805  1.1  christos  */
    806  1.1  christos 
    807  1.1  christos /*
    808  1.1  christos  * Debugging routines
    809  1.1  christos  */
    810  1.1  christos #ifdef DEBUG
    811  1.1  christos static void
    812  1.1  christos hexdump(const char *desc, unsigned char *data, size_t size) {
    813  1.1  christos 	char hexdump[BUFSIZ * 2 + 1];
    814  1.1  christos 	isc_buffer_t b;
    815  1.1  christos 	isc_region_t r;
    816  1.1  christos 	isc_result_t result;
    817  1.1  christos 	size_t bytes;
    818  1.1  christos 
    819  1.1  christos 	fprintf(stderr, "%s: ", desc);
    820  1.1  christos 	do {
    821  1.1  christos 		isc_buffer_init(&b, hexdump, sizeof(hexdump));
    822  1.1  christos 		r.base = data;
    823  1.1  christos 		r.length = bytes = (size > BUFSIZ) ? BUFSIZ : size;
    824  1.1  christos 		result = isc_hex_totext(&r, 0, "", &b);
    825  1.1  christos 		RUNTIME_CHECK(result == ISC_R_SUCCESS);
    826  1.1  christos 		isc_buffer_putuint8(&b, 0);
    827  1.1  christos 		fprintf(stderr, "%s", hexdump);
    828  1.1  christos 		data += bytes;
    829  1.1  christos 		size -= bytes;
    830  1.1  christos 	} while (size > 0);
    831  1.1  christos 	fprintf(stderr, "\n");
    832  1.1  christos }
    833  1.1  christos #endif /* ifdef DEBUG */
    834  1.1  christos 
    835  1.1  christos /* Fixed RRSet helper macros */
    836  1.1  christos 
    837  1.1  christos #define DNS_RDATASET_LENGTH 2;
    838  1.1  christos 
    839  1.1  christos #if DNS_RDATASET_FIXED
    840  1.1  christos #define DNS_RDATASET_ORDER 2
    841  1.1  christos #define DNS_RDATASET_COUNT (count * 4)
    842  1.1  christos #else /* !DNS_RDATASET_FIXED */
    843  1.1  christos #define DNS_RDATASET_ORDER 0
    844  1.1  christos #define DNS_RDATASET_COUNT 0
    845  1.1  christos #endif /* DNS_RDATASET_FIXED */
    846  1.1  christos 
    847  1.1  christos /*
    848  1.1  christos  * DB Routines
    849  1.1  christos  */
    850  1.1  christos 
    851  1.1  christos static void
    852  1.1  christos attach(dns_db_t *source, dns_db_t **targetp) {
    853  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
    854  1.1  christos 
    855  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
    856  1.1  christos 
    857  1.1  christos 	isc_refcount_increment(&rbtdb->references);
    858  1.1  christos 
    859  1.1  christos 	*targetp = source;
    860  1.1  christos }
    861  1.1  christos 
    862  1.1  christos static void
    863  1.1  christos free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
    864  1.1  christos 	dns_rbtdb_t *rbtdb = event->ev_arg;
    865  1.1  christos 
    866  1.1  christos 	UNUSED(task);
    867  1.1  christos 
    868  1.1  christos 	free_rbtdb(rbtdb, true, event);
    869  1.1  christos }
    870  1.1  christos 
    871  1.1  christos static void
    872  1.1  christos update_cachestats(dns_rbtdb_t *rbtdb, isc_result_t result) {
    873  1.1  christos 	INSIST(IS_CACHE(rbtdb));
    874  1.1  christos 
    875  1.1  christos 	if (rbtdb->cachestats == NULL) {
    876  1.1  christos 		return;
    877  1.1  christos 	}
    878  1.1  christos 
    879  1.1  christos 	switch (result) {
    880  1.1  christos 	case ISC_R_SUCCESS:
    881  1.1  christos 	case DNS_R_CNAME:
    882  1.1  christos 	case DNS_R_DNAME:
    883  1.1  christos 	case DNS_R_DELEGATION:
    884  1.1  christos 	case DNS_R_NCACHENXDOMAIN:
    885  1.1  christos 	case DNS_R_NCACHENXRRSET:
    886  1.1  christos 		isc_stats_increment(rbtdb->cachestats,
    887  1.1  christos 				    dns_cachestatscounter_hits);
    888  1.1  christos 		break;
    889  1.1  christos 	default:
    890  1.1  christos 		isc_stats_increment(rbtdb->cachestats,
    891  1.1  christos 				    dns_cachestatscounter_misses);
    892  1.1  christos 	}
    893  1.1  christos }
    894  1.1  christos 
    895  1.1  christos static bool
    896  1.1  christos do_stats(rdatasetheader_t *header) {
    897  1.1  christos 	return (EXISTS(header) && STATCOUNT(header));
    898  1.1  christos }
    899  1.1  christos 
    900  1.1  christos static void
    901  1.1  christos update_rrsetstats(dns_rbtdb_t *rbtdb, const rbtdb_rdatatype_t htype,
    902  1.1  christos 		  const uint_least16_t hattributes, const bool increment) {
    903  1.1  christos 	dns_rdatastatstype_t statattributes = 0;
    904  1.1  christos 	dns_rdatastatstype_t base = 0;
    905  1.1  christos 	dns_rdatastatstype_t type;
    906  1.1  christos 	rdatasetheader_t *header = &(rdatasetheader_t){
    907  1.1  christos 		.type = htype,
    908  1.1  christos 		.attributes = hattributes,
    909  1.1  christos 	};
    910  1.1  christos 
    911  1.1  christos 	if (!do_stats(header)) {
    912  1.1  christos 		return;
    913  1.1  christos 	}
    914  1.1  christos 
    915  1.1  christos 	/* At the moment we count statistics only for cache DB */
    916  1.1  christos 	INSIST(IS_CACHE(rbtdb));
    917  1.1  christos 
    918  1.1  christos 	if (NEGATIVE(header)) {
    919  1.1  christos 		if (NXDOMAIN(header)) {
    920  1.1  christos 			statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
    921  1.1  christos 		} else {
    922  1.1  christos 			statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
    923  1.1  christos 			base = RBTDB_RDATATYPE_EXT(header->type);
    924  1.1  christos 		}
    925  1.1  christos 	} else {
    926  1.1  christos 		base = RBTDB_RDATATYPE_BASE(header->type);
    927  1.1  christos 	}
    928  1.1  christos 
    929  1.1  christos 	if (STALE(header)) {
    930  1.1  christos 		statattributes |= DNS_RDATASTATSTYPE_ATTR_STALE;
    931  1.1  christos 	}
    932  1.1  christos 	if (ANCIENT(header)) {
    933  1.1  christos 		statattributes |= DNS_RDATASTATSTYPE_ATTR_ANCIENT;
    934  1.1  christos 	}
    935  1.1  christos 
    936  1.1  christos 	type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
    937  1.1  christos 	if (increment) {
    938  1.1  christos 		dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
    939  1.1  christos 	} else {
    940  1.1  christos 		dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
    941  1.1  christos 	}
    942  1.1  christos }
    943  1.1  christos 
    944  1.1  christos static void
    945  1.1  christos set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
    946  1.1  christos 	int idx;
    947  1.1  christos 	isc_heap_t *heap;
    948  1.1  christos 	dns_ttl_t oldttl;
    949  1.1  christos 
    950  1.1  christos 	if (!IS_CACHE(rbtdb)) {
    951  1.1  christos 		header->rdh_ttl = newttl;
    952  1.1  christos 		return;
    953  1.1  christos 	}
    954  1.1  christos 
    955  1.1  christos 	oldttl = header->rdh_ttl;
    956  1.1  christos 	header->rdh_ttl = newttl;
    957  1.1  christos 
    958  1.1  christos 	/*
    959  1.1  christos 	 * It's possible the rbtdb is not a cache.  If this is the case,
    960  1.1  christos 	 * we will not have a heap, and we move on.  If we do, though,
    961  1.1  christos 	 * we might need to adjust things.
    962  1.1  christos 	 */
    963  1.1  christos 	if (header->heap_index == 0 || newttl == oldttl) {
    964  1.1  christos 		return;
    965  1.1  christos 	}
    966  1.1  christos 	idx = header->node->locknum;
    967  1.1  christos 	if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL) {
    968  1.1  christos 		return;
    969  1.1  christos 	}
    970  1.1  christos 	heap = rbtdb->heaps[idx];
    971  1.1  christos 
    972  1.1  christos 	if (newttl < oldttl) {
    973  1.1  christos 		isc_heap_increased(heap, header->heap_index);
    974  1.1  christos 	} else {
    975  1.1  christos 		isc_heap_decreased(heap, header->heap_index);
    976  1.1  christos 	}
    977  1.1  christos }
    978  1.1  christos 
    979  1.1  christos /*%
    980  1.1  christos  * These functions allow the heap code to rank the priority of each
    981  1.1  christos  * element.  It returns true if v1 happens "sooner" than v2.
    982  1.1  christos  */
    983  1.1  christos static bool
    984  1.1  christos ttl_sooner(void *v1, void *v2) {
    985  1.1  christos 	rdatasetheader_t *h1 = v1;
    986  1.1  christos 	rdatasetheader_t *h2 = v2;
    987  1.1  christos 
    988  1.1  christos 	return (h1->rdh_ttl < h2->rdh_ttl);
    989  1.1  christos }
    990  1.1  christos 
    991  1.1  christos /*%
    992  1.1  christos  * Return which RRset should be resigned sooner.  If the RRsets have the
    993  1.1  christos  * same signing time, prefer the other RRset over the SOA RRset.
    994  1.1  christos  */
    995  1.1  christos static bool
    996  1.1  christos resign_sooner(void *v1, void *v2) {
    997  1.1  christos 	rdatasetheader_t *h1 = v1;
    998  1.1  christos 	rdatasetheader_t *h2 = v2;
    999  1.1  christos 
   1000  1.1  christos 	return (h1->resign < h2->resign ||
   1001  1.1  christos 		(h1->resign == h2->resign && h1->resign_lsb < h2->resign_lsb) ||
   1002  1.1  christos 		(h1->resign == h2->resign && h1->resign_lsb == h2->resign_lsb &&
   1003  1.1  christos 		 h2->type == RBTDB_RDATATYPE_SIGSOA));
   1004  1.1  christos }
   1005  1.1  christos 
   1006  1.1  christos /*%
   1007  1.1  christos  * This function sets the heap index into the header.
   1008  1.1  christos  */
   1009  1.1  christos static void
   1010  1.1  christos set_index(void *what, unsigned int idx) {
   1011  1.1  christos 	rdatasetheader_t *h = what;
   1012  1.1  christos 
   1013  1.1  christos 	h->heap_index = idx;
   1014  1.1  christos }
   1015  1.1  christos 
   1016  1.1  christos /*%
   1017  1.1  christos  * Work out how many nodes can be deleted in the time between two
   1018  1.1  christos  * requests to the nameserver.  Smooth the resulting number and use it
   1019  1.1  christos  * as a estimate for the number of nodes to be deleted in the next
   1020  1.1  christos  * iteration.
   1021  1.1  christos  */
   1022  1.1  christos static unsigned int
   1023  1.1  christos adjust_quantum(unsigned int old, isc_time_t *start) {
   1024  1.1  christos 	unsigned int pps = dns_pps; /* packets per second */
   1025  1.1  christos 	unsigned int interval;
   1026  1.1  christos 	uint64_t usecs;
   1027  1.1  christos 	isc_time_t end;
   1028  1.1  christos 	unsigned int nodes;
   1029  1.1  christos 
   1030  1.1  christos 	if (pps < 100) {
   1031  1.1  christos 		pps = 100;
   1032  1.1  christos 	}
   1033  1.1  christos 	isc_time_now(&end);
   1034  1.1  christos 
   1035  1.1  christos 	interval = 1000000 / pps; /* interval in usec */
   1036  1.1  christos 	if (interval == 0) {
   1037  1.1  christos 		interval = 1;
   1038  1.1  christos 	}
   1039  1.1  christos 	usecs = isc_time_microdiff(&end, start);
   1040  1.1  christos 	if (usecs == 0) {
   1041  1.1  christos 		/*
   1042  1.1  christos 		 * We were unable to measure the amount of time taken.
   1043  1.1  christos 		 * Double the nodes deleted next time.
   1044  1.1  christos 		 */
   1045  1.1  christos 		old *= 2;
   1046  1.1  christos 		if (old > 1000) {
   1047  1.1  christos 			old = 1000;
   1048  1.1  christos 		}
   1049  1.1  christos 		return (old);
   1050  1.1  christos 	}
   1051  1.1  christos 	nodes = old * interval;
   1052  1.1  christos 	nodes /= (unsigned int)usecs;
   1053  1.1  christos 	if (nodes == 0) {
   1054  1.1  christos 		nodes = 1;
   1055  1.1  christos 	} else if (nodes > 1000) {
   1056  1.1  christos 		nodes = 1000;
   1057  1.1  christos 	}
   1058  1.1  christos 
   1059  1.1  christos 	/* Smooth */
   1060  1.1  christos 	nodes = (nodes + old * 3) / 4;
   1061  1.1  christos 
   1062  1.1  christos 	if (nodes != old) {
   1063  1.1  christos 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
   1064  1.1  christos 			      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
   1065  1.1  christos 			      "adjust_quantum: old=%d, new=%d", old, nodes);
   1066  1.1  christos 	}
   1067  1.1  christos 
   1068  1.1  christos 	return (nodes);
   1069  1.1  christos }
   1070  1.1  christos 
   1071  1.1  christos static void
   1072  1.1  christos free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) {
   1073  1.1  christos 	unsigned int i;
   1074  1.1  christos 	isc_result_t result;
   1075  1.1  christos 	char buf[DNS_NAME_FORMATSIZE];
   1076  1.1  christos 	dns_rbtnode_t *node = NULL;
   1077  1.1  christos 	dns_rbt_t **treep;
   1078  1.1  christos 	isc_time_t start;
   1079  1.1  christos 
   1080  1.1  christos 	if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in) {
   1081  1.1  christos 		overmem((dns_db_t *)rbtdb, (bool)-1);
   1082  1.1  christos 	}
   1083  1.1  christos 
   1084  1.1  christos 	REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
   1085  1.1  christos 	REQUIRE(rbtdb->future_version == NULL);
   1086  1.1  christos 
   1087  1.1  christos 	if (rbtdb->current_version != NULL) {
   1088  1.1  christos 		isc_refcount_decrementz(&rbtdb->current_version->references);
   1089  1.1  christos 		UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
   1090  1.1  christos 		isc_rwlock_destroy(&rbtdb->current_version->glue_rwlock);
   1091  1.1  christos 		isc_refcount_destroy(&rbtdb->current_version->references);
   1092  1.1  christos 		isc_rwlock_destroy(&rbtdb->current_version->rwlock);
   1093  1.1  christos 		isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
   1094  1.1  christos 			    sizeof(rbtdb_version_t));
   1095  1.1  christos 	}
   1096  1.1  christos 
   1097  1.1  christos 	/*
   1098  1.1  christos 	 * We assume the number of remaining dead nodes is reasonably small;
   1099  1.1  christos 	 * the overhead of unlinking all nodes here should be negligible.
   1100  1.1  christos 	 */
   1101  1.1  christos 	for (i = 0; i < rbtdb->node_lock_count; i++) {
   1102  1.1  christos 		node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
   1103  1.1  christos 		while (node != NULL) {
   1104  1.1  christos 			ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
   1105  1.1  christos 			node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
   1106  1.1  christos 		}
   1107  1.1  christos 	}
   1108  1.1  christos 
   1109  1.1  christos 	node = ISC_LIST_HEAD(rbtdb->prunenodes);
   1110  1.1  christos 	while (node != NULL) {
   1111  1.1  christos 		ISC_LIST_UNLINK(rbtdb->prunenodes, node, prunelink);
   1112  1.1  christos 		node = ISC_LIST_HEAD(rbtdb->prunenodes);
   1113  1.1  christos 	}
   1114  1.1  christos 
   1115  1.1  christos 	if (event == NULL) {
   1116  1.1  christos 		rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
   1117  1.1  christos 	}
   1118  1.1  christos 
   1119  1.1  christos 	for (;;) {
   1120  1.1  christos 		/*
   1121  1.1  christos 		 * pick the next tree to (start to) destroy
   1122  1.1  christos 		 */
   1123  1.1  christos 		treep = &rbtdb->tree;
   1124  1.1  christos 		if (*treep == NULL) {
   1125  1.1  christos 			treep = &rbtdb->nsec;
   1126  1.1  christos 			if (*treep == NULL) {
   1127  1.1  christos 				treep = &rbtdb->nsec3;
   1128  1.1  christos 				/*
   1129  1.1  christos 				 * we're finished after clear cutting
   1130  1.1  christos 				 */
   1131  1.1  christos 				if (*treep == NULL) {
   1132  1.1  christos 					break;
   1133  1.1  christos 				}
   1134  1.1  christos 			}
   1135  1.1  christos 		}
   1136  1.1  christos 
   1137  1.1  christos 		isc_time_now(&start);
   1138  1.1  christos 		result = dns_rbt_destroy2(treep, rbtdb->quantum);
   1139  1.1  christos 		if (result == ISC_R_QUOTA) {
   1140  1.1  christos 			INSIST(rbtdb->task != NULL);
   1141  1.1  christos 			if (rbtdb->quantum != 0) {
   1142  1.1  christos 				rbtdb->quantum = adjust_quantum(rbtdb->quantum,
   1143  1.1  christos 								&start);
   1144  1.1  christos 			}
   1145  1.1  christos 			if (event == NULL) {
   1146  1.1  christos 				event = isc_event_allocate(
   1147  1.1  christos 					rbtdb->common.mctx, NULL,
   1148  1.1  christos 					DNS_EVENT_FREESTORAGE,
   1149  1.1  christos 					free_rbtdb_callback, rbtdb,
   1150  1.1  christos 					sizeof(isc_event_t));
   1151  1.1  christos 			}
   1152  1.1  christos 			isc_task_send(rbtdb->task, &event);
   1153  1.1  christos 			return;
   1154  1.1  christos 		}
   1155  1.1  christos 		INSIST(result == ISC_R_SUCCESS && *treep == NULL);
   1156  1.1  christos 	}
   1157  1.1  christos 
   1158  1.1  christos 	if (event != NULL) {
   1159  1.1  christos 		isc_event_free(&event);
   1160  1.1  christos 	}
   1161  1.1  christos 	if (log) {
   1162  1.1  christos 		if (dns_name_dynamic(&rbtdb->common.origin)) {
   1163  1.1  christos 			dns_name_format(&rbtdb->common.origin, buf,
   1164  1.1  christos 					sizeof(buf));
   1165  1.1  christos 		} else {
   1166  1.1  christos 			strlcpy(buf, "<UNKNOWN>", sizeof(buf));
   1167  1.1  christos 		}
   1168  1.1  christos 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
   1169  1.1  christos 			      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
   1170  1.1  christos 			      "done free_rbtdb(%s)", buf);
   1171  1.1  christos 	}
   1172  1.1  christos 	if (dns_name_dynamic(&rbtdb->common.origin)) {
   1173  1.1  christos 		dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
   1174  1.1  christos 	}
   1175  1.1  christos 	for (i = 0; i < rbtdb->node_lock_count; i++) {
   1176  1.1  christos 		isc_refcount_destroy(&rbtdb->node_locks[i].references);
   1177  1.1  christos 		NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
   1178  1.1  christos 	}
   1179  1.1  christos 
   1180  1.1  christos 	/*
   1181  1.1  christos 	 * Clean up LRU / re-signing order lists.
   1182  1.1  christos 	 */
   1183  1.1  christos 	if (rbtdb->rdatasets != NULL) {
   1184  1.1  christos 		for (i = 0; i < rbtdb->node_lock_count; i++) {
   1185  1.1  christos 			INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
   1186  1.1  christos 		}
   1187  1.1  christos 		isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
   1188  1.1  christos 			    rbtdb->node_lock_count *
   1189  1.1  christos 				    sizeof(rdatasetheaderlist_t));
   1190  1.1  christos 	}
   1191  1.1  christos 	/*
   1192  1.1  christos 	 * Clean up dead node buckets.
   1193  1.1  christos 	 */
   1194  1.1  christos 	if (rbtdb->deadnodes != NULL) {
   1195  1.1  christos 		for (i = 0; i < rbtdb->node_lock_count; i++) {
   1196  1.1  christos 			INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
   1197  1.1  christos 		}
   1198  1.1  christos 		isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
   1199  1.1  christos 			    rbtdb->node_lock_count * sizeof(rbtnodelist_t));
   1200  1.1  christos 	}
   1201  1.1  christos 	/*
   1202  1.1  christos 	 * Clean up heap objects.
   1203  1.1  christos 	 */
   1204  1.1  christos 	if (rbtdb->heaps != NULL) {
   1205  1.1  christos 		for (i = 0; i < rbtdb->node_lock_count; i++) {
   1206  1.1  christos 			isc_heap_destroy(&rbtdb->heaps[i]);
   1207  1.1  christos 		}
   1208  1.1  christos 		isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
   1209  1.1  christos 			    rbtdb->node_lock_count * sizeof(isc_heap_t *));
   1210  1.1  christos 	}
   1211  1.1  christos 
   1212  1.1  christos 	if (rbtdb->rrsetstats != NULL) {
   1213  1.1  christos 		dns_stats_detach(&rbtdb->rrsetstats);
   1214  1.1  christos 	}
   1215  1.1  christos 	if (rbtdb->cachestats != NULL) {
   1216  1.1  christos 		isc_stats_detach(&rbtdb->cachestats);
   1217  1.1  christos 	}
   1218  1.1  christos 	if (rbtdb->gluecachestats != NULL) {
   1219  1.1  christos 		isc_stats_detach(&rbtdb->gluecachestats);
   1220  1.1  christos 	}
   1221  1.1  christos 
   1222  1.1  christos 	isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
   1223  1.1  christos 		    rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
   1224  1.1  christos 	isc_rwlock_destroy(&rbtdb->tree_lock);
   1225  1.1  christos 	isc_refcount_destroy(&rbtdb->references);
   1226  1.1  christos 	if (rbtdb->task != NULL) {
   1227  1.1  christos 		isc_task_detach(&rbtdb->task);
   1228  1.1  christos 	}
   1229  1.1  christos 
   1230  1.1  christos 	RBTDB_DESTROYLOCK(&rbtdb->lock);
   1231  1.1  christos 	rbtdb->common.magic = 0;
   1232  1.1  christos 	rbtdb->common.impmagic = 0;
   1233  1.1  christos 	isc_mem_detach(&rbtdb->hmctx);
   1234  1.1  christos 
   1235  1.1  christos 	if (rbtdb->mmap_location != NULL) {
   1236  1.1  christos 		isc_file_munmap(rbtdb->mmap_location, (size_t)rbtdb->mmap_size);
   1237  1.1  christos 	}
   1238  1.1  christos 
   1239  1.1  christos 	INSIST(ISC_LIST_EMPTY(rbtdb->common.update_listeners));
   1240  1.1  christos 
   1241  1.1  christos 	isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
   1242  1.1  christos }
   1243  1.1  christos 
   1244  1.1  christos static void
   1245  1.1  christos maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
   1246  1.1  christos 	bool want_free = false;
   1247  1.1  christos 	unsigned int i;
   1248  1.1  christos 	unsigned int inactive = 0;
   1249  1.1  christos 
   1250  1.1  christos 	/* XXX check for open versions here */
   1251  1.1  christos 
   1252  1.1  christos 	if (rbtdb->soanode != NULL) {
   1253  1.1  christos 		dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
   1254  1.1  christos 	}
   1255  1.1  christos 	if (rbtdb->nsnode != NULL) {
   1256  1.1  christos 		dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
   1257  1.1  christos 	}
   1258  1.1  christos 
   1259  1.1  christos 	/*
   1260  1.1  christos 	 * The current version's glue table needs to be freed early
   1261  1.1  christos 	 * so the nodes are dereferenced before we check the active
   1262  1.1  christos 	 * node count below.
   1263  1.1  christos 	 */
   1264  1.1  christos 	if (rbtdb->current_version != NULL) {
   1265  1.1  christos 		free_gluetable(rbtdb->current_version);
   1266  1.1  christos 	}
   1267  1.1  christos 
   1268  1.1  christos 	/*
   1269  1.1  christos 	 * Even though there are no external direct references, there still
   1270  1.1  christos 	 * may be nodes in use.
   1271  1.1  christos 	 */
   1272  1.1  christos 	for (i = 0; i < rbtdb->node_lock_count; i++) {
   1273  1.1  christos 		NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
   1274  1.1  christos 		rbtdb->node_locks[i].exiting = true;
   1275  1.1  christos 		if (isc_refcount_current(&rbtdb->node_locks[i].references) == 0)
   1276  1.1  christos 		{
   1277  1.1  christos 			inactive++;
   1278  1.1  christos 		}
   1279  1.1  christos 		NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
   1280  1.1  christos 	}
   1281  1.1  christos 
   1282  1.1  christos 	if (inactive != 0) {
   1283  1.1  christos 		RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
   1284  1.1  christos 		rbtdb->active -= inactive;
   1285  1.1  christos 		if (rbtdb->active == 0) {
   1286  1.1  christos 			want_free = true;
   1287  1.1  christos 		}
   1288  1.1  christos 		RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
   1289  1.1  christos 		if (want_free) {
   1290  1.1  christos 			char buf[DNS_NAME_FORMATSIZE];
   1291  1.1  christos 			if (dns_name_dynamic(&rbtdb->common.origin)) {
   1292  1.1  christos 				dns_name_format(&rbtdb->common.origin, buf,
   1293  1.1  christos 						sizeof(buf));
   1294  1.1  christos 			} else {
   1295  1.1  christos 				strlcpy(buf, "<UNKNOWN>", sizeof(buf));
   1296  1.1  christos 			}
   1297  1.1  christos 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
   1298  1.1  christos 				      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
   1299  1.1  christos 				      "calling free_rbtdb(%s)", buf);
   1300  1.1  christos 			free_rbtdb(rbtdb, true, NULL);
   1301  1.1  christos 		}
   1302  1.1  christos 	}
   1303  1.1  christos }
   1304  1.1  christos 
   1305  1.1  christos static void
   1306  1.1  christos detach(dns_db_t **dbp) {
   1307  1.1  christos 	REQUIRE(dbp != NULL && VALID_RBTDB((dns_rbtdb_t *)(*dbp)));
   1308  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
   1309  1.1  christos 	*dbp = NULL;
   1310  1.1  christos 
   1311  1.1  christos 	if (isc_refcount_decrement(&rbtdb->references) == 1) {
   1312  1.1  christos 		maybe_free_rbtdb(rbtdb);
   1313  1.1  christos 	}
   1314  1.1  christos }
   1315  1.1  christos 
   1316  1.1  christos static void
   1317  1.1  christos currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
   1318  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   1319  1.1  christos 	rbtdb_version_t *version;
   1320  1.1  christos 
   1321  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   1322  1.1  christos 
   1323  1.1  christos 	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
   1324  1.1  christos 	version = rbtdb->current_version;
   1325  1.1  christos 	isc_refcount_increment(&version->references);
   1326  1.1  christos 	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
   1327  1.1  christos 
   1328  1.1  christos 	*versionp = (dns_dbversion_t *)version;
   1329  1.1  christos }
   1330  1.1  christos 
   1331  1.1  christos static rbtdb_version_t *
   1332  1.1  christos allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
   1333  1.1  christos 		 unsigned int references, bool writer) {
   1334  1.1  christos 	rbtdb_version_t *version;
   1335  1.1  christos 	size_t size;
   1336  1.1  christos 
   1337  1.1  christos 	version = isc_mem_get(mctx, sizeof(*version));
   1338  1.1  christos 	version->serial = serial;
   1339  1.1  christos 
   1340  1.1  christos 	isc_refcount_init(&version->references, references);
   1341  1.1  christos 	isc_rwlock_init(&version->glue_rwlock, 0, 0);
   1342  1.1  christos 
   1343  1.1  christos 	version->glue_table_bits = RBTDB_GLUE_TABLE_INIT_BITS;
   1344  1.1  christos 	version->glue_table_nodecount = 0U;
   1345  1.1  christos 
   1346  1.1  christos 	size = HASHSIZE(version->glue_table_bits) *
   1347  1.1  christos 	       sizeof(version->glue_table[0]);
   1348  1.1  christos 	version->glue_table = isc_mem_get(mctx, size);
   1349  1.1  christos 	memset(version->glue_table, 0, size);
   1350  1.1  christos 
   1351  1.1  christos 	version->writer = writer;
   1352  1.1  christos 	version->commit_ok = false;
   1353  1.1  christos 	ISC_LIST_INIT(version->changed_list);
   1354  1.1  christos 	ISC_LIST_INIT(version->resigned_list);
   1355  1.1  christos 	ISC_LINK_INIT(version, link);
   1356  1.1  christos 
   1357  1.1  christos 	return (version);
   1358  1.1  christos }
   1359  1.1  christos 
   1360  1.1  christos static isc_result_t
   1361  1.1  christos newversion(dns_db_t *db, dns_dbversion_t **versionp) {
   1362  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   1363  1.1  christos 	rbtdb_version_t *version;
   1364  1.1  christos 
   1365  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   1366  1.1  christos 	REQUIRE(versionp != NULL && *versionp == NULL);
   1367  1.1  christos 	REQUIRE(rbtdb->future_version == NULL);
   1368  1.1  christos 
   1369  1.1  christos 	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
   1370  1.1  christos 	RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
   1371  1.1  christos 	version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
   1372  1.1  christos 				   true);
   1373  1.1  christos 	version->rbtdb = rbtdb;
   1374  1.1  christos 	version->commit_ok = true;
   1375  1.1  christos 	version->secure = rbtdb->current_version->secure;
   1376  1.1  christos 	version->havensec3 = rbtdb->current_version->havensec3;
   1377  1.1  christos 	if (version->havensec3) {
   1378  1.1  christos 		version->flags = rbtdb->current_version->flags;
   1379  1.1  christos 		version->iterations = rbtdb->current_version->iterations;
   1380  1.1  christos 		version->hash = rbtdb->current_version->hash;
   1381  1.1  christos 		version->salt_length = rbtdb->current_version->salt_length;
   1382  1.1  christos 		memmove(version->salt, rbtdb->current_version->salt,
   1383  1.1  christos 			version->salt_length);
   1384  1.1  christos 	} else {
   1385  1.1  christos 		version->flags = 0;
   1386  1.1  christos 		version->iterations = 0;
   1387  1.1  christos 		version->hash = 0;
   1388  1.1  christos 		version->salt_length = 0;
   1389  1.1  christos 		memset(version->salt, 0, sizeof(version->salt));
   1390  1.1  christos 	}
   1391  1.1  christos 	isc_rwlock_init(&version->rwlock, 0, 0);
   1392  1.1  christos 	RWLOCK(&rbtdb->current_version->rwlock, isc_rwlocktype_read);
   1393  1.1  christos 	version->records = rbtdb->current_version->records;
   1394  1.1  christos 	version->xfrsize = rbtdb->current_version->xfrsize;
   1395  1.1  christos 	RWUNLOCK(&rbtdb->current_version->rwlock, isc_rwlocktype_read);
   1396  1.1  christos 	rbtdb->next_serial++;
   1397  1.1  christos 	rbtdb->future_version = version;
   1398  1.1  christos 	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
   1399  1.1  christos 
   1400  1.1  christos 	*versionp = version;
   1401  1.1  christos 
   1402  1.1  christos 	return (ISC_R_SUCCESS);
   1403  1.1  christos }
   1404  1.1  christos 
   1405  1.1  christos static void
   1406  1.1  christos attachversion(dns_db_t *db, dns_dbversion_t *source,
   1407  1.1  christos 	      dns_dbversion_t **targetp) {
   1408  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   1409  1.1  christos 	rbtdb_version_t *rbtversion = source;
   1410  1.1  christos 
   1411  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   1412  1.1  christos 	INSIST(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
   1413  1.1  christos 
   1414  1.1  christos 	isc_refcount_increment(&rbtversion->references);
   1415  1.1  christos 
   1416  1.1  christos 	*targetp = rbtversion;
   1417  1.1  christos }
   1418  1.1  christos 
   1419  1.1  christos static rbtdb_changed_t *
   1420  1.1  christos add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version, dns_rbtnode_t *node) {
   1421  1.1  christos 	rbtdb_changed_t *changed;
   1422  1.1  christos 
   1423  1.1  christos 	/*
   1424  1.1  christos 	 * Caller must be holding the node lock if its reference must be
   1425  1.1  christos 	 * protected by the lock.
   1426  1.1  christos 	 */
   1427  1.1  christos 
   1428  1.1  christos 	changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
   1429  1.1  christos 
   1430  1.1  christos 	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
   1431  1.1  christos 
   1432  1.1  christos 	REQUIRE(version->writer);
   1433  1.1  christos 
   1434  1.1  christos 	if (changed != NULL) {
   1435  1.1  christos 		isc_refcount_increment(&node->references);
   1436  1.1  christos 		changed->node = node;
   1437  1.1  christos 		changed->dirty = false;
   1438  1.1  christos 		ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
   1439  1.1  christos 	} else {
   1440  1.1  christos 		version->commit_ok = false;
   1441  1.1  christos 	}
   1442  1.1  christos 
   1443  1.1  christos 	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
   1444  1.1  christos 
   1445  1.1  christos 	return (changed);
   1446  1.1  christos }
   1447  1.1  christos 
   1448  1.1  christos static void
   1449  1.1  christos free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
   1450  1.1  christos 	if (dns_name_dynamic(&(*noqname)->name)) {
   1451  1.1  christos 		dns_name_free(&(*noqname)->name, mctx);
   1452  1.1  christos 	}
   1453  1.1  christos 	if ((*noqname)->neg != NULL) {
   1454  1.1  christos 		isc_mem_put(mctx, (*noqname)->neg,
   1455  1.1  christos 			    dns_rdataslab_size((*noqname)->neg, 0));
   1456  1.1  christos 	}
   1457  1.1  christos 	if ((*noqname)->negsig != NULL) {
   1458  1.1  christos 		isc_mem_put(mctx, (*noqname)->negsig,
   1459  1.1  christos 			    dns_rdataslab_size((*noqname)->negsig, 0));
   1460  1.1  christos 	}
   1461  1.1  christos 	isc_mem_put(mctx, *noqname, sizeof(**noqname));
   1462  1.1  christos 	*noqname = NULL;
   1463  1.1  christos }
   1464  1.1  christos 
   1465  1.1  christos static void
   1466  1.1  christos init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h) {
   1467  1.1  christos 	ISC_LINK_INIT(h, link);
   1468  1.1  christos 	h->heap_index = 0;
   1469  1.1  christos 	h->is_mmapped = 0;
   1470  1.1  christos 	h->next_is_relative = 0;
   1471  1.1  christos 	h->node_is_relative = 0;
   1472  1.1  christos 	atomic_init(&h->attributes, 0);
   1473  1.1  christos 	atomic_init(&h->last_refresh_fail_ts, 0);
   1474  1.1  christos 
   1475  1.1  christos 	STATIC_ASSERT((sizeof(h->attributes) == 2),
   1476  1.1  christos 		      "The .attributes field of rdatasetheader_t needs to be "
   1477  1.1  christos 		      "16-bit int type exactly.");
   1478  1.1  christos 
   1479  1.1  christos #if TRACE_HEADER
   1480  1.1  christos 	if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in) {
   1481  1.1  christos 		fprintf(stderr, "initialized header: %p\n", h);
   1482  1.1  christos 	}
   1483  1.1  christos #else  /* if TRACE_HEADER */
   1484  1.1  christos 	UNUSED(rbtdb);
   1485  1.1  christos #endif /* if TRACE_HEADER */
   1486  1.1  christos }
   1487  1.1  christos 
   1488  1.1  christos /*
   1489  1.1  christos  * Update the copied values of 'next' and 'node' if they are relative.
   1490  1.1  christos  */
   1491  1.1  christos static void
   1492  1.1  christos update_newheader(rdatasetheader_t *newh, rdatasetheader_t *old) {
   1493  1.1  christos 	char *p;
   1494  1.1  christos 
   1495  1.1  christos 	if (old->next_is_relative) {
   1496  1.1  christos 		p = (char *)old;
   1497  1.1  christos 		p += (uintptr_t)old->next;
   1498  1.1  christos 		newh->next = (rdatasetheader_t *)p;
   1499  1.1  christos 	}
   1500  1.1  christos 	if (old->node_is_relative) {
   1501  1.1  christos 		p = (char *)old;
   1502  1.1  christos 		p += (uintptr_t)old->node;
   1503  1.1  christos 		newh->node = (dns_rbtnode_t *)p;
   1504  1.1  christos 	}
   1505  1.1  christos 	if (CASESET(old)) {
   1506  1.1  christos 		uint_least16_t attr = RDATASET_ATTR_GET(
   1507  1.1  christos 			old,
   1508  1.1  christos 			(RDATASET_ATTR_CASESET | RDATASET_ATTR_CASEFULLYLOWER));
   1509  1.1  christos 		RDATASET_ATTR_SET(newh, attr);
   1510  1.1  christos 		memmove(newh->upper, old->upper, sizeof(old->upper));
   1511  1.1  christos 	}
   1512  1.1  christos }
   1513  1.1  christos 
   1514  1.1  christos static rdatasetheader_t *
   1515  1.1  christos new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx) {
   1516  1.1  christos 	rdatasetheader_t *h;
   1517  1.1  christos 
   1518  1.1  christos 	h = isc_mem_get(mctx, sizeof(*h));
   1519  1.1  christos 
   1520  1.1  christos #if TRACE_HEADER
   1521  1.1  christos 	if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in) {
   1522  1.1  christos 		fprintf(stderr, "allocated header: %p\n", h);
   1523  1.1  christos 	}
   1524  1.1  christos #endif /* if TRACE_HEADER */
   1525  1.1  christos 	memset(h->upper, 0xeb, sizeof(h->upper));
   1526  1.1  christos 	init_rdataset(rbtdb, h);
   1527  1.1  christos 	h->rdh_ttl = 0;
   1528  1.1  christos 	return (h);
   1529  1.1  christos }
   1530  1.1  christos 
   1531  1.1  christos static void
   1532  1.1  christos free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset) {
   1533  1.1  christos 	unsigned int size;
   1534  1.1  christos 	int idx;
   1535  1.1  christos 
   1536  1.1  christos 	update_rrsetstats(rbtdb, rdataset->type,
   1537  1.1  christos 			  atomic_load_acquire(&rdataset->attributes), false);
   1538  1.1  christos 
   1539  1.1  christos 	idx = rdataset->node->locknum;
   1540  1.1  christos 	if (ISC_LINK_LINKED(rdataset, link)) {
   1541  1.1  christos 		INSIST(IS_CACHE(rbtdb));
   1542  1.1  christos 		ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
   1543  1.1  christos 	}
   1544  1.1  christos 
   1545  1.1  christos 	if (rdataset->heap_index != 0) {
   1546  1.1  christos 		isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
   1547  1.1  christos 	}
   1548  1.1  christos 	rdataset->heap_index = 0;
   1549  1.1  christos 
   1550  1.1  christos 	if (rdataset->noqname != NULL) {
   1551  1.1  christos 		free_noqname(mctx, &rdataset->noqname);
   1552  1.1  christos 	}
   1553  1.1  christos 	if (rdataset->closest != NULL) {
   1554  1.1  christos 		free_noqname(mctx, &rdataset->closest);
   1555  1.1  christos 	}
   1556  1.1  christos 
   1557  1.1  christos 	if (NONEXISTENT(rdataset)) {
   1558  1.1  christos 		size = sizeof(*rdataset);
   1559  1.1  christos 	} else {
   1560  1.1  christos 		size = dns_rdataslab_size((unsigned char *)rdataset,
   1561  1.1  christos 					  sizeof(*rdataset));
   1562  1.1  christos 	}
   1563  1.1  christos 
   1564  1.1  christos 	if (rdataset->is_mmapped == 1) {
   1565  1.1  christos 		return;
   1566  1.1  christos 	}
   1567  1.1  christos 
   1568  1.1  christos 	isc_mem_put(mctx, rdataset, size);
   1569  1.1  christos }
   1570  1.1  christos 
   1571  1.1  christos static void
   1572  1.1  christos rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
   1573  1.1  christos 	rdatasetheader_t *header, *dcurrent;
   1574  1.1  christos 	bool make_dirty = false;
   1575  1.1  christos 
   1576  1.1  christos 	/*
   1577  1.1  christos 	 * Caller must hold the node lock.
   1578  1.1  christos 	 */
   1579  1.1  christos 
   1580  1.1  christos 	/*
   1581  1.1  christos 	 * We set the IGNORE attribute on rdatasets with serial number
   1582  1.1  christos 	 * 'serial'.  When the reference count goes to zero, these rdatasets
   1583  1.1  christos 	 * will be cleaned up; until that time, they will be ignored.
   1584  1.1  christos 	 */
   1585  1.1  christos 	for (header = node->data; header != NULL; header = header->next) {
   1586  1.1  christos 		if (header->serial == serial) {
   1587  1.1  christos 			RDATASET_ATTR_SET(header, RDATASET_ATTR_IGNORE);
   1588  1.1  christos 			make_dirty = true;
   1589  1.1  christos 		}
   1590  1.1  christos 		for (dcurrent = header->down; dcurrent != NULL;
   1591  1.1  christos 		     dcurrent = dcurrent->down)
   1592  1.1  christos 		{
   1593  1.1  christos 			if (dcurrent->serial == serial) {
   1594  1.1  christos 				RDATASET_ATTR_SET(dcurrent,
   1595  1.1  christos 						  RDATASET_ATTR_IGNORE);
   1596  1.1  christos 				make_dirty = true;
   1597  1.1  christos 			}
   1598  1.1  christos 		}
   1599  1.1  christos 	}
   1600  1.1  christos 	if (make_dirty) {
   1601  1.1  christos 		node->dirty = 1;
   1602  1.1  christos 	}
   1603  1.1  christos }
   1604  1.1  christos 
   1605  1.1  christos static void
   1606  1.1  christos mark_header_ancient(dns_rbtdb_t *rbtdb, rdatasetheader_t *header) {
   1607  1.1  christos 	uint_least16_t attributes = atomic_load_acquire(&header->attributes);
   1608  1.1  christos 	uint_least16_t newattributes = 0;
   1609  1.1  christos 
   1610  1.1  christos 	/*
   1611  1.1  christos 	 * If we are already ancient there is nothing to do.
   1612  1.1  christos 	 */
   1613  1.1  christos 	do {
   1614  1.1  christos 		if ((attributes & RDATASET_ATTR_ANCIENT) != 0) {
   1615  1.1  christos 			return;
   1616  1.1  christos 		}
   1617  1.1  christos 		newattributes = attributes | RDATASET_ATTR_ANCIENT;
   1618  1.1  christos 	} while (!atomic_compare_exchange_weak_acq_rel(
   1619  1.1  christos 		&header->attributes, &attributes, newattributes));
   1620  1.1  christos 
   1621  1.1  christos 	/*
   1622  1.1  christos 	 * Decrement the stats counter for the appropriate RRtype.
   1623  1.1  christos 	 * If the STALE attribute is set, this will decrement the
   1624  1.1  christos 	 * stale type counter, otherwise it decrements the active
   1625  1.1  christos 	 * stats type counter.
   1626  1.1  christos 	 */
   1627  1.1  christos 	update_rrsetstats(rbtdb, header->type, attributes, false);
   1628  1.1  christos 	header->node->dirty = 1;
   1629  1.1  christos 
   1630  1.1  christos 	/* Increment the stats counter for the ancient RRtype. */
   1631  1.1  christos 	update_rrsetstats(rbtdb, header->type, newattributes, true);
   1632  1.1  christos }
   1633  1.1  christos 
   1634  1.1  christos static void
   1635  1.1  christos mark_header_stale(dns_rbtdb_t *rbtdb, rdatasetheader_t *header) {
   1636  1.1  christos 	uint_least16_t attributes = atomic_load_acquire(&header->attributes);
   1637  1.1  christos 	uint_least16_t newattributes = 0;
   1638  1.1  christos 
   1639  1.1  christos 	INSIST((attributes & RDATASET_ATTR_ZEROTTL) == 0);
   1640  1.1  christos 
   1641  1.1  christos 	/*
   1642  1.1  christos 	 * If we are already stale there is nothing to do.
   1643  1.1  christos 	 */
   1644  1.1  christos 	do {
   1645  1.1  christos 		if ((attributes & RDATASET_ATTR_STALE) != 0) {
   1646  1.1  christos 			return;
   1647  1.1  christos 		}
   1648  1.1  christos 		newattributes = attributes | RDATASET_ATTR_STALE;
   1649  1.1  christos 	} while (!atomic_compare_exchange_weak_acq_rel(
   1650  1.1  christos 		&header->attributes, &attributes, newattributes));
   1651  1.1  christos 
   1652  1.1  christos 	/* Decrement the stats counter for the appropriate RRtype.
   1653  1.1  christos 	 * If the ANCIENT attribute is set (although it is very
   1654  1.1  christos 	 * unlikely that an RRset goes from ANCIENT to STALE), this
   1655  1.1  christos 	 * will decrement the ancient stale type counter, otherwise it
   1656  1.1  christos 	 * decrements the active stats type counter.
   1657  1.1  christos 	 */
   1658  1.1  christos 
   1659  1.1  christos 	update_rrsetstats(rbtdb, header->type, attributes, false);
   1660  1.1  christos 	update_rrsetstats(rbtdb, header->type, newattributes, true);
   1661  1.1  christos }
   1662  1.1  christos 
   1663  1.1  christos static void
   1664  1.1  christos clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx,
   1665  1.1  christos 		    rdatasetheader_t *top) {
   1666  1.1  christos 	rdatasetheader_t *d, *down_next;
   1667  1.1  christos 
   1668  1.1  christos 	for (d = top->down; d != NULL; d = down_next) {
   1669  1.1  christos 		down_next = d->down;
   1670  1.1  christos 		free_rdataset(rbtdb, mctx, d);
   1671  1.1  christos 	}
   1672  1.1  christos 	top->down = NULL;
   1673  1.1  christos }
   1674  1.1  christos 
   1675  1.1  christos static void
   1676  1.1  christos clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
   1677  1.1  christos 	rdatasetheader_t *current, *top_prev, *top_next;
   1678  1.1  christos 	isc_mem_t *mctx = rbtdb->common.mctx;
   1679  1.1  christos 
   1680  1.1  christos 	/*
   1681  1.1  christos 	 * Caller must be holding the node lock.
   1682  1.1  christos 	 */
   1683  1.1  christos 
   1684  1.1  christos 	top_prev = NULL;
   1685  1.1  christos 	for (current = node->data; current != NULL; current = top_next) {
   1686  1.1  christos 		top_next = current->next;
   1687  1.1  christos 		clean_stale_headers(rbtdb, mctx, current);
   1688  1.1  christos 		/*
   1689  1.1  christos 		 * If current is nonexistent, ancient, or stale and
   1690  1.1  christos 		 * we are not keeping stale, we can clean it up.
   1691  1.1  christos 		 */
   1692  1.1  christos 		if (NONEXISTENT(current) || ANCIENT(current) ||
   1693  1.1  christos 		    (STALE(current) && !KEEPSTALE(rbtdb)))
   1694  1.1  christos 		{
   1695  1.1  christos 			if (top_prev != NULL) {
   1696  1.1  christos 				top_prev->next = current->next;
   1697  1.1  christos 			} else {
   1698  1.1  christos 				node->data = current->next;
   1699  1.1  christos 			}
   1700  1.1  christos 			free_rdataset(rbtdb, mctx, current);
   1701  1.1  christos 		} else {
   1702  1.1  christos 			top_prev = current;
   1703  1.1  christos 		}
   1704  1.1  christos 	}
   1705  1.1  christos 	node->dirty = 0;
   1706  1.1  christos }
   1707  1.1  christos 
   1708  1.1  christos static void
   1709  1.1  christos clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
   1710  1.1  christos 		rbtdb_serial_t least_serial) {
   1711  1.1  christos 	rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
   1712  1.1  christos 	rdatasetheader_t *top_prev, *top_next;
   1713  1.1  christos 	isc_mem_t *mctx = rbtdb->common.mctx;
   1714  1.1  christos 	bool still_dirty = false;
   1715  1.1  christos 
   1716  1.1  christos 	/*
   1717  1.1  christos 	 * Caller must be holding the node lock.
   1718  1.1  christos 	 */
   1719  1.1  christos 	REQUIRE(least_serial != 0);
   1720  1.1  christos 
   1721  1.1  christos 	top_prev = NULL;
   1722  1.1  christos 	for (current = node->data; current != NULL; current = top_next) {
   1723  1.1  christos 		top_next = current->next;
   1724  1.1  christos 
   1725  1.1  christos 		/*
   1726  1.1  christos 		 * First, we clean up any instances of multiple rdatasets
   1727  1.1  christos 		 * with the same serial number, or that have the IGNORE
   1728  1.1  christos 		 * attribute.
   1729  1.1  christos 		 */
   1730  1.1  christos 		dparent = current;
   1731  1.1  christos 		for (dcurrent = current->down; dcurrent != NULL;
   1732  1.1  christos 		     dcurrent = down_next)
   1733  1.1  christos 		{
   1734  1.1  christos 			down_next = dcurrent->down;
   1735  1.1  christos 			INSIST(dcurrent->serial <= dparent->serial);
   1736  1.1  christos 			if (dcurrent->serial == dparent->serial ||
   1737  1.1  christos 			    IGNORE(dcurrent))
   1738  1.1  christos 			{
   1739  1.1  christos 				if (down_next != NULL) {
   1740  1.1  christos 					down_next->next = dparent;
   1741  1.1  christos 				}
   1742  1.1  christos 				dparent->down = down_next;
   1743  1.1  christos 				free_rdataset(rbtdb, mctx, dcurrent);
   1744  1.1  christos 			} else {
   1745  1.1  christos 				dparent = dcurrent;
   1746  1.1  christos 			}
   1747  1.1  christos 		}
   1748  1.1  christos 
   1749  1.1  christos 		/*
   1750  1.1  christos 		 * We've now eliminated all IGNORE datasets with the possible
   1751  1.1  christos 		 * exception of current, which we now check.
   1752  1.1  christos 		 */
   1753  1.1  christos 		if (IGNORE(current)) {
   1754  1.1  christos 			down_next = current->down;
   1755  1.1  christos 			if (down_next == NULL) {
   1756  1.1  christos 				if (top_prev != NULL) {
   1757  1.1  christos 					top_prev->next = current->next;
   1758  1.1  christos 				} else {
   1759  1.1  christos 					node->data = current->next;
   1760  1.1  christos 				}
   1761  1.1  christos 				free_rdataset(rbtdb, mctx, current);
   1762  1.1  christos 				/*
   1763  1.1  christos 				 * current no longer exists, so we can
   1764  1.1  christos 				 * just continue with the loop.
   1765  1.1  christos 				 */
   1766  1.1  christos 				continue;
   1767  1.1  christos 			} else {
   1768  1.1  christos 				/*
   1769  1.1  christos 				 * Pull up current->down, making it the new
   1770  1.1  christos 				 * current.
   1771  1.1  christos 				 */
   1772  1.1  christos 				if (top_prev != NULL) {
   1773  1.1  christos 					top_prev->next = down_next;
   1774  1.1  christos 				} else {
   1775  1.1  christos 					node->data = down_next;
   1776  1.1  christos 				}
   1777  1.1  christos 				down_next->next = top_next;
   1778  1.1  christos 				free_rdataset(rbtdb, mctx, current);
   1779  1.1  christos 				current = down_next;
   1780  1.1  christos 			}
   1781  1.1  christos 		}
   1782  1.1  christos 
   1783  1.1  christos 		/*
   1784  1.1  christos 		 * We now try to find the first down node less than the
   1785  1.1  christos 		 * least serial.
   1786  1.1  christos 		 */
   1787  1.1  christos 		dparent = current;
   1788  1.1  christos 		for (dcurrent = current->down; dcurrent != NULL;
   1789  1.1  christos 		     dcurrent = down_next)
   1790  1.1  christos 		{
   1791  1.1  christos 			down_next = dcurrent->down;
   1792  1.1  christos 			if (dcurrent->serial < least_serial) {
   1793  1.1  christos 				break;
   1794  1.1  christos 			}
   1795  1.1  christos 			dparent = dcurrent;
   1796  1.1  christos 		}
   1797  1.1  christos 
   1798  1.1  christos 		/*
   1799  1.1  christos 		 * If there is a such an rdataset, delete it and any older
   1800  1.1  christos 		 * versions.
   1801  1.1  christos 		 */
   1802  1.1  christos 		if (dcurrent != NULL) {
   1803  1.1  christos 			do {
   1804  1.1  christos 				down_next = dcurrent->down;
   1805  1.1  christos 				INSIST(dcurrent->serial <= least_serial);
   1806  1.1  christos 				free_rdataset(rbtdb, mctx, dcurrent);
   1807  1.1  christos 				dcurrent = down_next;
   1808  1.1  christos 			} while (dcurrent != NULL);
   1809  1.1  christos 			dparent->down = NULL;
   1810  1.1  christos 		}
   1811  1.1  christos 
   1812  1.1  christos 		/*
   1813  1.1  christos 		 * Note.  The serial number of 'current' might be less than
   1814  1.1  christos 		 * least_serial too, but we cannot delete it because it is
   1815  1.1  christos 		 * the most recent version, unless it is a NONEXISTENT
   1816  1.1  christos 		 * rdataset.
   1817  1.1  christos 		 */
   1818  1.1  christos 		if (current->down != NULL) {
   1819  1.1  christos 			still_dirty = true;
   1820  1.1  christos 			top_prev = current;
   1821  1.1  christos 		} else {
   1822  1.1  christos 			/*
   1823  1.1  christos 			 * If this is a NONEXISTENT rdataset, we can delete it.
   1824  1.1  christos 			 */
   1825  1.1  christos 			if (NONEXISTENT(current)) {
   1826  1.1  christos 				if (top_prev != NULL) {
   1827  1.1  christos 					top_prev->next = current->next;
   1828  1.1  christos 				} else {
   1829  1.1  christos 					node->data = current->next;
   1830  1.1  christos 				}
   1831  1.1  christos 				free_rdataset(rbtdb, mctx, current);
   1832  1.1  christos 			} else {
   1833  1.1  christos 				top_prev = current;
   1834  1.1  christos 			}
   1835  1.1  christos 		}
   1836  1.1  christos 	}
   1837  1.1  christos 	if (!still_dirty) {
   1838  1.1  christos 		node->dirty = 0;
   1839  1.1  christos 	}
   1840  1.1  christos }
   1841  1.1  christos 
   1842  1.1  christos /*
   1843  1.1  christos  * tree_lock(write) must be held.
   1844  1.1  christos  */
   1845  1.1  christos static void
   1846  1.1  christos delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
   1847  1.1  christos 	dns_rbtnode_t *nsecnode;
   1848  1.1  christos 	dns_fixedname_t fname;
   1849  1.1  christos 	dns_name_t *name;
   1850  1.1  christos 	isc_result_t result = ISC_R_UNEXPECTED;
   1851  1.1  christos 
   1852  1.1  christos 	INSIST(!ISC_LINK_LINKED(node, deadlink));
   1853  1.1  christos 
   1854  1.1  christos 	if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
   1855  1.1  christos 		char printname[DNS_NAME_FORMATSIZE];
   1856  1.1  christos 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
   1857  1.1  christos 			      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
   1858  1.1  christos 			      "delete_node(): %p %s (bucket %d)", node,
   1859  1.1  christos 			      dns_rbt_formatnodename(node, printname,
   1860  1.1  christos 						     sizeof(printname)),
   1861  1.1  christos 			      node->locknum);
   1862  1.1  christos 	}
   1863  1.1  christos 
   1864  1.1  christos 	switch (node->nsec) {
   1865  1.1  christos 	case DNS_RBT_NSEC_NORMAL:
   1866  1.1  christos 		/*
   1867  1.1  christos 		 * Though this may be wasteful, it has to be done before
   1868  1.1  christos 		 * node is deleted.
   1869  1.1  christos 		 */
   1870  1.1  christos 		name = dns_fixedname_initname(&fname);
   1871  1.1  christos 		dns_rbt_fullnamefromnode(node, name);
   1872  1.1  christos 
   1873  1.1  christos 		result = dns_rbt_deletenode(rbtdb->tree, node, false);
   1874  1.1  christos 		break;
   1875  1.1  christos 	case DNS_RBT_NSEC_HAS_NSEC:
   1876  1.1  christos 		name = dns_fixedname_initname(&fname);
   1877  1.1  christos 		dns_rbt_fullnamefromnode(node, name);
   1878  1.1  christos 		/*
   1879  1.1  christos 		 * Delete the corresponding node from the auxiliary NSEC
   1880  1.1  christos 		 * tree before deleting from the main tree.
   1881  1.1  christos 		 */
   1882  1.1  christos 		nsecnode = NULL;
   1883  1.1  christos 		result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
   1884  1.1  christos 					  NULL, DNS_RBTFIND_EMPTYDATA, NULL,
   1885  1.1  christos 					  NULL);
   1886  1.1  christos 		if (result != ISC_R_SUCCESS) {
   1887  1.1  christos 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
   1888  1.1  christos 				      DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
   1889  1.1  christos 				      "delete_node: "
   1890  1.1  christos 				      "dns_rbt_findnode(nsec): %s",
   1891  1.1  christos 				      isc_result_totext(result));
   1892  1.1  christos 		} else {
   1893  1.1  christos 			result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
   1894  1.1  christos 						    false);
   1895  1.1  christos 			if (result != ISC_R_SUCCESS) {
   1896  1.1  christos 				isc_log_write(
   1897  1.1  christos 					dns_lctx, DNS_LOGCATEGORY_DATABASE,
   1898  1.1  christos 					DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
   1899  1.1  christos 					"delete_node(): "
   1900  1.1  christos 					"dns_rbt_deletenode(nsecnode): %s",
   1901  1.1  christos 					isc_result_totext(result));
   1902  1.1  christos 			}
   1903  1.1  christos 		}
   1904  1.1  christos 		result = dns_rbt_deletenode(rbtdb->tree, node, false);
   1905  1.1  christos 		break;
   1906  1.1  christos 	case DNS_RBT_NSEC_NSEC:
   1907  1.1  christos 		result = dns_rbt_deletenode(rbtdb->nsec, node, false);
   1908  1.1  christos 		break;
   1909  1.1  christos 	case DNS_RBT_NSEC_NSEC3:
   1910  1.1  christos 		result = dns_rbt_deletenode(rbtdb->nsec3, node, false);
   1911  1.1  christos 		break;
   1912  1.1  christos 	}
   1913  1.1  christos 	if (result != ISC_R_SUCCESS) {
   1914  1.1  christos 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
   1915  1.1  christos 			      DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
   1916  1.1  christos 			      "delete_node(): "
   1917  1.1  christos 			      "dns_rbt_deletenode: %s",
   1918  1.1  christos 			      isc_result_totext(result));
   1919  1.1  christos 	}
   1920  1.1  christos }
   1921  1.1  christos 
   1922  1.1  christos /*
   1923  1.1  christos  * Caller must be holding the node lock.
   1924  1.1  christos  */
   1925  1.1  christos static void
   1926  1.1  christos new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
   1927  1.1  christos 	      isc_rwlocktype_t locktype) {
   1928  1.1  christos 	if (locktype == isc_rwlocktype_write && ISC_LINK_LINKED(node, deadlink))
   1929  1.1  christos 	{
   1930  1.1  christos 		ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum], node,
   1931  1.1  christos 				deadlink);
   1932  1.1  christos 	}
   1933  1.1  christos 	if (isc_refcount_increment0(&node->references) == 0) {
   1934  1.1  christos 		/* this is the first reference to the node */
   1935  1.1  christos 		isc_refcount_increment0(
   1936  1.1  christos 			&rbtdb->node_locks[node->locknum].references);
   1937  1.1  christos 	}
   1938  1.1  christos }
   1939  1.1  christos 
   1940  1.1  christos /*%
   1941  1.1  christos  * The tree lock must be held for the result to be valid.
   1942  1.1  christos  */
   1943  1.1  christos static bool
   1944  1.1  christos is_leaf(dns_rbtnode_t *node) {
   1945  1.1  christos 	return (node->parent != NULL && node->parent->down == node &&
   1946  1.1  christos 		node->left == NULL && node->right == NULL);
   1947  1.1  christos }
   1948  1.1  christos 
   1949  1.1  christos /*%
   1950  1.1  christos  * The tree lock must be held when this function is called as it reads and
   1951  1.1  christos  * updates rbtdb->prunenodes.
   1952  1.1  christos  */
   1953  1.1  christos static void
   1954  1.1  christos send_to_prune_tree(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
   1955  1.1  christos 		   isc_rwlocktype_t locktype) {
   1956  1.1  christos 	bool pruning_queued = (ISC_LIST_HEAD(rbtdb->prunenodes) != NULL);
   1957  1.1  christos 
   1958  1.1  christos 	INSIST(locktype == isc_rwlocktype_write);
   1959  1.1  christos 
   1960  1.1  christos 	new_reference(rbtdb, node, locktype);
   1961  1.1  christos 	INSIST(!ISC_LINK_LINKED(node, prunelink));
   1962  1.1  christos 	ISC_LIST_APPEND(rbtdb->prunenodes, node, prunelink);
   1963  1.1  christos 
   1964  1.1  christos 	if (!pruning_queued) {
   1965  1.1  christos 		isc_event_t *ev = NULL;
   1966  1.1  christos 		dns_db_t *db = NULL;
   1967  1.1  christos 
   1968  1.1  christos 		attach((dns_db_t *)rbtdb, &db);
   1969  1.1  christos 
   1970  1.1  christos 		ev = isc_event_allocate(rbtdb->common.mctx, NULL,
   1971  1.1  christos 					DNS_EVENT_RBTPRUNE, prune_tree, db,
   1972  1.1  christos 					sizeof(isc_event_t));
   1973  1.1  christos 		isc_task_send(rbtdb->task, &ev);
   1974  1.1  christos 	}
   1975  1.1  christos }
   1976  1.1  christos 
   1977  1.1  christos /*%
   1978  1.1  christos  * Clean up dead nodes.  These are nodes which have no references, and
   1979  1.1  christos  * have no data.  They are dead but we could not or chose not to delete
   1980  1.1  christos  * them when we deleted all the data at that node because we did not want
   1981  1.1  christos  * to wait for the tree write lock.
   1982  1.1  christos  *
   1983  1.1  christos  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
   1984  1.1  christos  */
   1985  1.1  christos static void
   1986  1.1  christos cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
   1987  1.1  christos 	dns_rbtnode_t *node;
   1988  1.1  christos 	int count = 10; /* XXXJT: should be adjustable */
   1989  1.1  christos 
   1990  1.1  christos 	node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
   1991  1.1  christos 	while (node != NULL && count > 0) {
   1992  1.1  christos 		ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
   1993  1.1  christos 
   1994  1.1  christos 		/*
   1995  1.1  christos 		 * We might have reactivated this node without a tree write
   1996  1.1  christos 		 * lock, so we couldn't remove this node from deadnodes then
   1997  1.1  christos 		 * and we have to do it now.
   1998  1.1  christos 		 */
   1999  1.1  christos 		if (isc_refcount_current(&node->references) != 0 ||
   2000  1.1  christos 		    node->data != NULL)
   2001  1.1  christos 		{
   2002  1.1  christos 			node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
   2003  1.1  christos 			count--;
   2004  1.1  christos 			continue;
   2005  1.1  christos 		}
   2006  1.1  christos 
   2007  1.1  christos 		if (is_leaf(node) && rbtdb->task != NULL) {
   2008  1.1  christos 			send_to_prune_tree(rbtdb, node, isc_rwlocktype_write);
   2009  1.1  christos 		} else if (node->down == NULL && node->data == NULL) {
   2010  1.1  christos 			/*
   2011  1.1  christos 			 * Not a interior node and not needing to be
   2012  1.1  christos 			 * reactivated.
   2013  1.1  christos 			 */
   2014  1.1  christos 			delete_node(rbtdb, node);
   2015  1.1  christos 		} else if (node->data == NULL) {
   2016  1.1  christos 			/*
   2017  1.1  christos 			 * A interior node without data. Leave linked to
   2018  1.1  christos 			 * to be cleaned up when node->down becomes NULL.
   2019  1.1  christos 			 */
   2020  1.1  christos 			ISC_LIST_APPEND(rbtdb->deadnodes[bucketnum], node,
   2021  1.1  christos 					deadlink);
   2022  1.1  christos 		}
   2023  1.1  christos 		node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
   2024  1.1  christos 		count--;
   2025  1.1  christos 	}
   2026  1.1  christos }
   2027  1.1  christos 
   2028  1.1  christos /*
   2029  1.1  christos  * This function is assumed to be called when a node is newly referenced
   2030  1.1  christos  * and can be in the deadnode list.  In that case the node must be retrieved
   2031  1.1  christos  * from the list because it is going to be used.  In addition, if the caller
   2032  1.1  christos  * happens to hold a write lock on the tree, it's a good chance to purge dead
   2033  1.1  christos  * nodes.
   2034  1.1  christos  * Note: while a new reference is gained in multiple places, there are only very
   2035  1.1  christos  * few cases where the node can be in the deadnode list (only empty nodes can
   2036  1.1  christos  * have been added to the list).
   2037  1.1  christos  */
   2038  1.1  christos static void
   2039  1.1  christos reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
   2040  1.1  christos 		isc_rwlocktype_t treelocktype) {
   2041  1.1  christos 	isc_rwlocktype_t locktype = isc_rwlocktype_read;
   2042  1.1  christos 	nodelock_t *nodelock = &rbtdb->node_locks[node->locknum].lock;
   2043  1.1  christos 	bool maybe_cleanup = false;
   2044  1.1  christos 
   2045  1.1  christos 	POST(locktype);
   2046  1.1  christos 
   2047  1.1  christos 	NODE_LOCK(nodelock, locktype);
   2048  1.1  christos 
   2049  1.1  christos 	/*
   2050  1.1  christos 	 * Check if we can possibly cleanup the dead node.  If so, upgrade
   2051  1.1  christos 	 * the node lock below to perform the cleanup.
   2052  1.1  christos 	 */
   2053  1.1  christos 	if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
   2054  1.1  christos 	    treelocktype == isc_rwlocktype_write)
   2055  1.1  christos 	{
   2056  1.1  christos 		maybe_cleanup = true;
   2057  1.1  christos 	}
   2058  1.1  christos 
   2059  1.1  christos 	if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) {
   2060  1.1  christos 		/*
   2061  1.1  christos 		 * Upgrade the lock and test if we still need to unlink.
   2062  1.1  christos 		 */
   2063  1.1  christos 		NODE_UNLOCK(nodelock, locktype);
   2064  1.1  christos 		locktype = isc_rwlocktype_write;
   2065  1.1  christos 		POST(locktype);
   2066  1.1  christos 		NODE_LOCK(nodelock, locktype);
   2067  1.1  christos 		if (ISC_LINK_LINKED(node, deadlink)) {
   2068  1.1  christos 			ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum], node,
   2069  1.1  christos 					deadlink);
   2070  1.1  christos 		}
   2071  1.1  christos 		if (maybe_cleanup) {
   2072  1.1  christos 			cleanup_dead_nodes(rbtdb, node->locknum);
   2073  1.1  christos 		}
   2074  1.1  christos 	}
   2075  1.1  christos 
   2076  1.1  christos 	new_reference(rbtdb, node, locktype);
   2077  1.1  christos 
   2078  1.1  christos 	NODE_UNLOCK(nodelock, locktype);
   2079  1.1  christos }
   2080  1.1  christos 
   2081  1.1  christos /*
   2082  1.1  christos  * Caller must be holding the node lock; either the "strong", read or write
   2083  1.1  christos  * lock.  Note that the lock must be held even when node references are
   2084  1.1  christos  * atomically modified; in that case the decrement operation itself does not
   2085  1.1  christos  * have to be protected, but we must avoid a race condition where multiple
   2086  1.1  christos  * threads are decreasing the reference to zero simultaneously and at least
   2087  1.1  christos  * one of them is going to free the node.
   2088  1.1  christos  *
   2089  1.1  christos  * This function returns true if and only if the node reference decreases
   2090  1.1  christos  * to zero.
   2091  1.1  christos  *
   2092  1.1  christos  * NOTE: Decrementing the reference count of a node to zero does not mean it
   2093  1.1  christos  * will be immediately freed.
   2094  1.1  christos  */
   2095  1.1  christos static bool
   2096  1.1  christos decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
   2097  1.1  christos 		    rbtdb_serial_t least_serial, isc_rwlocktype_t nlock,
   2098  1.1  christos 		    isc_rwlocktype_t tlock, bool pruning) {
   2099  1.1  christos 	isc_result_t result;
   2100  1.1  christos 	bool write_locked;
   2101  1.1  christos 	bool locked = tlock != isc_rwlocktype_none;
   2102  1.1  christos 	rbtdb_nodelock_t *nodelock;
   2103  1.1  christos 	int bucket = node->locknum;
   2104  1.1  christos 	bool no_reference = true;
   2105  1.1  christos 	uint_fast32_t refs;
   2106  1.1  christos 
   2107  1.1  christos 	nodelock = &rbtdb->node_locks[bucket];
   2108  1.1  christos 
   2109  1.1  christos #define KEEP_NODE(n, r, l)                                  \
   2110  1.1  christos 	((n)->data != NULL || ((l) && (n)->down != NULL) || \
   2111  1.1  christos 	 (n) == (r)->origin_node || (n) == (r)->nsec3_origin_node)
   2112  1.1  christos 
   2113  1.1  christos 	/* Handle easy and typical case first. */
   2114  1.1  christos 	if (!node->dirty && KEEP_NODE(node, rbtdb, locked)) {
   2115  1.1  christos 		if (isc_refcount_decrement(&node->references) == 1) {
   2116  1.1  christos 			refs = isc_refcount_decrement(&nodelock->references);
   2117  1.1  christos 			INSIST(refs > 0);
   2118  1.1  christos 			return (true);
   2119  1.1  christos 		} else {
   2120  1.1  christos 			return (false);
   2121  1.1  christos 		}
   2122  1.1  christos 	}
   2123  1.1  christos 
   2124  1.1  christos 	/* Upgrade the lock? */
   2125  1.1  christos 	if (nlock == isc_rwlocktype_read) {
   2126  1.1  christos 		NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
   2127  1.1  christos 		NODE_LOCK(&nodelock->lock, isc_rwlocktype_write);
   2128  1.1  christos 	}
   2129  1.1  christos 
   2130  1.1  christos 	if (isc_refcount_decrement(&node->references) > 1) {
   2131  1.1  christos 		/* Restore the lock? */
   2132  1.1  christos 		if (nlock == isc_rwlocktype_read) {
   2133  1.1  christos 			NODE_DOWNGRADE(&nodelock->lock);
   2134  1.1  christos 		}
   2135  1.1  christos 		return (false);
   2136  1.1  christos 	}
   2137  1.1  christos 
   2138  1.1  christos 	if (node->dirty) {
   2139  1.1  christos 		if (IS_CACHE(rbtdb)) {
   2140  1.1  christos 			clean_cache_node(rbtdb, node);
   2141  1.1  christos 		} else {
   2142  1.1  christos 			if (least_serial == 0) {
   2143  1.1  christos 				/*
   2144  1.1  christos 				 * Caller doesn't know the least serial.
   2145  1.1  christos 				 * Get it.
   2146  1.1  christos 				 */
   2147  1.1  christos 				RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
   2148  1.1  christos 				least_serial = rbtdb->least_serial;
   2149  1.1  christos 				RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
   2150  1.1  christos 			}
   2151  1.1  christos 			clean_zone_node(rbtdb, node, least_serial);
   2152  1.1  christos 		}
   2153  1.1  christos 	}
   2154  1.1  christos 
   2155  1.1  christos 	/*
   2156  1.1  christos 	 * Attempt to switch to a write lock on the tree.  If this fails,
   2157  1.1  christos 	 * we will add this node to a linked list of nodes in this locking
   2158  1.1  christos 	 * bucket which we will free later.
   2159  1.1  christos 	 */
   2160  1.1  christos 	if (tlock != isc_rwlocktype_write) {
   2161  1.1  christos 		/*
   2162  1.1  christos 		 * Locking hierarchy notwithstanding, we don't need to free
   2163  1.1  christos 		 * the node lock before acquiring the tree write lock because
   2164  1.1  christos 		 * we only do a trylock.
   2165  1.1  christos 		 */
   2166  1.1  christos 		if (tlock == isc_rwlocktype_read) {
   2167  1.1  christos 			result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
   2168  1.1  christos 		} else {
   2169  1.1  christos 			result = isc_rwlock_trylock(&rbtdb->tree_lock,
   2170  1.1  christos 						    isc_rwlocktype_write);
   2171  1.1  christos 		}
   2172  1.1  christos 		RUNTIME_CHECK(result == ISC_R_SUCCESS ||
   2173  1.1  christos 			      result == ISC_R_LOCKBUSY);
   2174  1.1  christos 
   2175  1.1  christos 		write_locked = (result == ISC_R_SUCCESS);
   2176  1.1  christos 	} else {
   2177  1.1  christos 		write_locked = true;
   2178  1.1  christos 	}
   2179  1.1  christos 
   2180  1.1  christos 	refs = isc_refcount_decrement(&nodelock->references);
   2181  1.1  christos 	INSIST(refs > 0);
   2182  1.1  christos 
   2183  1.1  christos 	if (KEEP_NODE(node, rbtdb, locked || write_locked)) {
   2184  1.1  christos 		goto restore_locks;
   2185  1.1  christos 	}
   2186  1.1  christos 
   2187  1.1  christos #undef KEEP_NODE
   2188  1.1  christos 
   2189  1.1  christos 	if (write_locked) {
   2190  1.1  christos 		/*
   2191  1.1  christos 		 * We can now delete the node.
   2192  1.1  christos 		 */
   2193  1.1  christos 
   2194  1.1  christos 		/*
   2195  1.1  christos 		 * If this node is the only one in the level it's in, deleting
   2196  1.1  christos 		 * this node may recursively make its parent the only node in
   2197  1.1  christos 		 * the parent level; if so, and if no one is currently using
   2198  1.1  christos 		 * the parent node, this is almost the only opportunity to
   2199  1.1  christos 		 * clean it up.  But the recursive cleanup is not that trivial
   2200  1.1  christos 		 * since the child and parent may be in different lock buckets,
   2201  1.1  christos 		 * which would cause a lock order reversal problem.  To avoid
   2202  1.1  christos 		 * the trouble, we'll dispatch a separate event for batch
   2203  1.1  christos 		 * cleaning.  We need to check whether we're deleting the node
   2204  1.1  christos 		 * as a result of pruning to avoid infinite dispatching.
   2205  1.1  christos 		 * Note: pruning happens only when a task has been set for the
   2206  1.1  christos 		 * rbtdb.  If the user of the rbtdb chooses not to set a task,
   2207  1.1  christos 		 * it's their responsibility to purge stale leaves (e.g. by
   2208  1.1  christos 		 * periodic walk-through).
   2209  1.1  christos 		 */
   2210  1.1  christos 		if (!pruning && is_leaf(node) && rbtdb->task != NULL) {
   2211  1.1  christos 			send_to_prune_tree(rbtdb, node, isc_rwlocktype_write);
   2212  1.1  christos 			no_reference = false;
   2213  1.1  christos 		} else {
   2214  1.1  christos 			delete_node(rbtdb, node);
   2215  1.1  christos 		}
   2216  1.1  christos 	} else {
   2217  1.1  christos 		INSIST(node->data == NULL);
   2218  1.1  christos 		if (!ISC_LINK_LINKED(node, deadlink)) {
   2219  1.1  christos 			ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
   2220  1.1  christos 					deadlink);
   2221  1.1  christos 		}
   2222  1.1  christos 	}
   2223  1.1  christos 
   2224  1.1  christos restore_locks:
   2225  1.1  christos 	/* Restore the lock? */
   2226  1.1  christos 	if (nlock == isc_rwlocktype_read) {
   2227  1.1  christos 		NODE_DOWNGRADE(&nodelock->lock);
   2228  1.1  christos 	}
   2229  1.1  christos 
   2230  1.1  christos 	/*
   2231  1.1  christos 	 * Relock a read lock, or unlock the write lock if no lock was held.
   2232  1.1  christos 	 */
   2233  1.1  christos 	if (tlock == isc_rwlocktype_none) {
   2234  1.1  christos 		if (write_locked) {
   2235  1.1  christos 			RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
   2236  1.1  christos 		}
   2237  1.1  christos 	}
   2238  1.1  christos 
   2239  1.1  christos 	if (tlock == isc_rwlocktype_read) {
   2240  1.1  christos 		if (write_locked) {
   2241  1.1  christos 			isc_rwlock_downgrade(&rbtdb->tree_lock);
   2242  1.1  christos 		}
   2243  1.1  christos 	}
   2244  1.1  christos 
   2245  1.1  christos 	return (no_reference);
   2246  1.1  christos }
   2247  1.1  christos 
   2248  1.1  christos /*
   2249  1.1  christos  * Prune the tree by recursively cleaning up single leaves.  Go through all
   2250  1.1  christos  * nodes stored in the rbtdb->prunenodes list; for each of them, in the worst
   2251  1.1  christos  * case, it will be necessary to traverse a number of tree levels equal to the
   2252  1.1  christos  * maximum legal number of domain name labels (127); in practice, the number of
   2253  1.1  christos  * tree levels to traverse will virtually always be much smaller (a few levels
   2254  1.1  christos  * at most).  While holding the tree lock throughout this entire operation is
   2255  1.1  christos  * less than ideal, so is splitting the latter up by queueing a separate
   2256  1.1  christos  * prune_tree() run for each node to start pruning from (as queueing requires
   2257  1.1  christos  * allocating memory and can therefore potentially be exploited to exhaust
   2258  1.1  christos  * available memory).  Also note that actually freeing up the memory used by
   2259  1.1  christos  * RBTDB nodes (which is what this function does) is essential to keeping cache
   2260  1.1  christos  * memory use in check, so since the tree lock needs to be acquired anyway,
   2261  1.1  christos  * freeing as many nodes as possible before the tree lock gets released is
   2262  1.1  christos  * prudent.
   2263  1.1  christos  */
   2264  1.1  christos static void
   2265  1.1  christos prune_tree(isc_task_t *task, isc_event_t *event) {
   2266  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)event->ev_arg;
   2267  1.1  christos 	dns_rbtnode_t *node = NULL;
   2268  1.1  christos 	dns_rbtnode_t *parent = NULL;
   2269  1.1  christos 	unsigned int locknum;
   2270  1.1  christos 
   2271  1.1  christos 	UNUSED(task);
   2272  1.1  christos 
   2273  1.1  christos 	isc_event_free(&event);
   2274  1.1  christos 
   2275  1.1  christos 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
   2276  1.1  christos 
   2277  1.1  christos 	while ((node = ISC_LIST_HEAD(rbtdb->prunenodes)) != NULL) {
   2278  1.1  christos 		locknum = node->locknum;
   2279  1.1  christos 		NODE_LOCK(&rbtdb->node_locks[locknum].lock,
   2280  1.1  christos 			  isc_rwlocktype_write);
   2281  1.1  christos 		do {
   2282  1.1  christos 			if (ISC_LINK_LINKED(node, prunelink)) {
   2283  1.1  christos 				ISC_LIST_UNLINK(rbtdb->prunenodes, node,
   2284  1.1  christos 						prunelink);
   2285  1.1  christos 			}
   2286  1.1  christos 
   2287  1.1  christos 			parent = node->parent;
   2288  1.1  christos 			decrement_reference(rbtdb, node, 0,
   2289  1.1  christos 					    isc_rwlocktype_write,
   2290  1.1  christos 					    isc_rwlocktype_write, true);
   2291  1.1  christos 
   2292  1.1  christos 			if (parent != NULL && parent->down == NULL) {
   2293  1.1  christos 				/*
   2294  1.1  christos 				 * node was the only down child of the parent
   2295  1.1  christos 				 * and has just been removed.  We'll then need
   2296  1.1  christos 				 * to examine the parent.  Keep the lock if
   2297  1.1  christos 				 * possible; otherwise, release the old lock and
   2298  1.1  christos 				 * acquire one for the parent.
   2299  1.1  christos 				 */
   2300  1.1  christos 				if (parent->locknum != locknum) {
   2301  1.1  christos 					NODE_UNLOCK(
   2302  1.1  christos 						&rbtdb->node_locks[locknum].lock,
   2303  1.1  christos 						isc_rwlocktype_write);
   2304  1.1  christos 					locknum = parent->locknum;
   2305  1.1  christos 					NODE_LOCK(
   2306  1.1  christos 						&rbtdb->node_locks[locknum].lock,
   2307  1.1  christos 						isc_rwlocktype_write);
   2308  1.1  christos 				}
   2309  1.1  christos 
   2310  1.1  christos 				/*
   2311  1.1  christos 				 * We need to gain a reference to the node
   2312  1.1  christos 				 * before decrementing it in the next iteration.
   2313  1.1  christos 				 */
   2314  1.1  christos 				if (ISC_LINK_LINKED(parent, deadlink)) {
   2315  1.1  christos 					ISC_LIST_UNLINK(
   2316  1.1  christos 						rbtdb->deadnodes[locknum],
   2317  1.1  christos 						parent, deadlink);
   2318  1.1  christos 				}
   2319  1.1  christos 				new_reference(rbtdb, parent,
   2320  1.1  christos 					      isc_rwlocktype_write);
   2321  1.1  christos 			} else {
   2322  1.1  christos 				parent = NULL;
   2323  1.1  christos 			}
   2324  1.1  christos 
   2325  1.1  christos 			node = parent;
   2326  1.1  christos 		} while (node != NULL);
   2327  1.1  christos 		NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
   2328  1.1  christos 			    isc_rwlocktype_write);
   2329  1.1  christos 	}
   2330  1.1  christos 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
   2331  1.1  christos 
   2332  1.1  christos 	detach((dns_db_t **)(void *)&rbtdb);
   2333  1.1  christos }
   2334  1.1  christos 
   2335  1.1  christos static void
   2336  1.1  christos make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
   2337  1.1  christos 		   rbtdb_changedlist_t *cleanup_list) {
   2338  1.1  christos 	/*
   2339  1.1  christos 	 * Caller must be holding the database lock.
   2340  1.1  christos 	 */
   2341  1.1  christos 
   2342  1.1  christos 	rbtdb->least_serial = version->serial;
   2343  1.1  christos 	*cleanup_list = version->changed_list;
   2344  1.1  christos 	ISC_LIST_INIT(version->changed_list);
   2345  1.1  christos }
   2346  1.1  christos 
   2347  1.1  christos static void
   2348  1.1  christos cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
   2349  1.1  christos 	rbtdb_changed_t *changed, *next_changed;
   2350  1.1  christos 
   2351  1.1  christos 	/*
   2352  1.1  christos 	 * If the changed record is dirty, then
   2353  1.1  christos 	 * an update created multiple versions of
   2354  1.1  christos 	 * a given rdataset.  We keep this list
   2355  1.1  christos 	 * until we're the least open version, at
   2356  1.1  christos 	 * which point it's safe to get rid of any
   2357  1.1  christos 	 * older versions.
   2358  1.1  christos 	 *
   2359  1.1  christos 	 * If the changed record isn't dirty, then
   2360  1.1  christos 	 * we don't need it anymore since we're
   2361  1.1  christos 	 * committing and not rolling back.
   2362  1.1  christos 	 *
   2363  1.1  christos 	 * The caller must be holding the database lock.
   2364  1.1  christos 	 */
   2365  1.1  christos 	for (changed = HEAD(version->changed_list); changed != NULL;
   2366  1.1  christos 	     changed = next_changed)
   2367  1.1  christos 	{
   2368  1.1  christos 		next_changed = NEXT(changed, link);
   2369  1.1  christos 		if (!changed->dirty) {
   2370  1.1  christos 			UNLINK(version->changed_list, changed, link);
   2371  1.1  christos 			APPEND(*cleanup_list, changed, link);
   2372  1.1  christos 		}
   2373  1.1  christos 	}
   2374  1.1  christos }
   2375  1.1  christos 
   2376  1.1  christos static void
   2377  1.1  christos iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
   2378  1.1  christos 	dns_rdataset_t keyset;
   2379  1.1  christos 	dns_rdataset_t nsecset, signsecset;
   2380  1.1  christos 	bool haszonekey = false;
   2381  1.1  christos 	bool hasnsec = false;
   2382  1.1  christos 	isc_result_t result;
   2383  1.1  christos 
   2384  1.1  christos 	dns_rdataset_init(&keyset);
   2385  1.1  christos 	result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
   2386  1.1  christos 				     0, 0, &keyset, NULL);
   2387  1.1  christos 	if (result == ISC_R_SUCCESS) {
   2388  1.1  christos 		result = dns_rdataset_first(&keyset);
   2389  1.1  christos 		while (result == ISC_R_SUCCESS) {
   2390  1.1  christos 			dns_rdata_t keyrdata = DNS_RDATA_INIT;
   2391  1.1  christos 			dns_rdataset_current(&keyset, &keyrdata);
   2392  1.1  christos 			if (dns_zonekey_iszonekey(&keyrdata)) {
   2393  1.1  christos 				haszonekey = true;
   2394  1.1  christos 				break;
   2395  1.1  christos 			}
   2396  1.1  christos 			result = dns_rdataset_next(&keyset);
   2397  1.1  christos 		}
   2398  1.1  christos 		dns_rdataset_disassociate(&keyset);
   2399  1.1  christos 	}
   2400  1.1  christos 	if (!haszonekey) {
   2401  1.1  christos 		version->secure = dns_db_insecure;
   2402  1.1  christos 		version->havensec3 = false;
   2403  1.1  christos 		return;
   2404  1.1  christos 	}
   2405  1.1  christos 
   2406  1.1  christos 	dns_rdataset_init(&nsecset);
   2407  1.1  christos 	dns_rdataset_init(&signsecset);
   2408  1.1  christos 	result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec, 0,
   2409  1.1  christos 				     0, &nsecset, &signsecset);
   2410  1.1  christos 	if (result == ISC_R_SUCCESS) {
   2411  1.1  christos 		if (dns_rdataset_isassociated(&signsecset)) {
   2412  1.1  christos 			hasnsec = true;
   2413  1.1  christos 			dns_rdataset_disassociate(&signsecset);
   2414  1.1  christos 		}
   2415  1.1  christos 		dns_rdataset_disassociate(&nsecset);
   2416  1.1  christos 	}
   2417  1.1  christos 
   2418  1.1  christos 	setnsec3parameters(db, version);
   2419  1.1  christos 
   2420  1.1  christos 	/*
   2421  1.1  christos 	 * Do we have a valid NSEC/NSEC3 chain?
   2422  1.1  christos 	 */
   2423  1.1  christos 	if (version->havensec3 || hasnsec) {
   2424  1.1  christos 		version->secure = dns_db_secure;
   2425  1.1  christos 	} else {
   2426  1.1  christos 		version->secure = dns_db_insecure;
   2427  1.1  christos 	}
   2428  1.1  christos }
   2429  1.1  christos 
   2430  1.1  christos /*%<
   2431  1.1  christos  * Walk the origin node looking for NSEC3PARAM records.
   2432  1.1  christos  * Cache the nsec3 parameters.
   2433  1.1  christos  */
   2434  1.1  christos static void
   2435  1.1  christos setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) {
   2436  1.1  christos 	dns_rbtnode_t *node;
   2437  1.1  christos 	dns_rdata_nsec3param_t nsec3param;
   2438  1.1  christos 	dns_rdata_t rdata = DNS_RDATA_INIT;
   2439  1.1  christos 	isc_region_t region;
   2440  1.1  christos 	isc_result_t result;
   2441  1.1  christos 	rdatasetheader_t *header, *header_next;
   2442  1.1  christos 	unsigned char *raw; /* RDATASLAB */
   2443  1.1  christos 	unsigned int count, length;
   2444  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   2445  1.1  christos 
   2446  1.1  christos 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   2447  1.1  christos 	version->havensec3 = false;
   2448  1.1  christos 	node = rbtdb->origin_node;
   2449  1.1  christos 	NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
   2450  1.1  christos 		  isc_rwlocktype_read);
   2451  1.1  christos 	for (header = node->data; header != NULL; header = header_next) {
   2452  1.1  christos 		header_next = header->next;
   2453  1.1  christos 		do {
   2454  1.1  christos 			if (header->serial <= version->serial &&
   2455  1.1  christos 			    !IGNORE(header))
   2456  1.1  christos 			{
   2457  1.1  christos 				if (NONEXISTENT(header)) {
   2458  1.1  christos 					header = NULL;
   2459  1.1  christos 				}
   2460  1.1  christos 				break;
   2461  1.1  christos 			} else {
   2462  1.1  christos 				header = header->down;
   2463  1.1  christos 			}
   2464  1.1  christos 		} while (header != NULL);
   2465  1.1  christos 
   2466  1.1  christos 		if (header != NULL &&
   2467  1.1  christos 		    (header->type == dns_rdatatype_nsec3param))
   2468  1.1  christos 		{
   2469  1.1  christos 			/*
   2470  1.1  christos 			 * Find A NSEC3PARAM with a supported algorithm.
   2471  1.1  christos 			 */
   2472  1.1  christos 			raw = (unsigned char *)header + sizeof(*header);
   2473  1.1  christos 			count = raw[0] * 256 + raw[1]; /* count */
   2474  1.1  christos 			raw += DNS_RDATASET_COUNT + DNS_RDATASET_LENGTH;
   2475  1.1  christos 			while (count-- > 0U) {
   2476  1.1  christos 				length = raw[0] * 256 + raw[1];
   2477  1.1  christos 				raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH;
   2478  1.1  christos 				region.base = raw;
   2479  1.1  christos 				region.length = length;
   2480  1.1  christos 				raw += length;
   2481  1.1  christos 				dns_rdata_fromregion(
   2482  1.1  christos 					&rdata, rbtdb->common.rdclass,
   2483  1.1  christos 					dns_rdatatype_nsec3param, &region);
   2484  1.1  christos 				result = dns_rdata_tostruct(&rdata, &nsec3param,
   2485  1.1  christos 							    NULL);
   2486  1.1  christos 				INSIST(result == ISC_R_SUCCESS);
   2487  1.1  christos 				dns_rdata_reset(&rdata);
   2488  1.1  christos 
   2489  1.1  christos 				if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
   2490  1.1  christos 				    !dns_nsec3_supportedhash(nsec3param.hash))
   2491  1.1  christos 				{
   2492  1.1  christos 					continue;
   2493  1.1  christos 				}
   2494  1.1  christos 
   2495  1.1  christos 				if (nsec3param.flags != 0) {
   2496  1.1  christos 					continue;
   2497  1.1  christos 				}
   2498  1.1  christos 
   2499  1.1  christos 				memmove(version->salt, nsec3param.salt,
   2500  1.1  christos 					nsec3param.salt_length);
   2501  1.1  christos 				version->hash = nsec3param.hash;
   2502  1.1  christos 				version->salt_length = nsec3param.salt_length;
   2503  1.1  christos 				version->iterations = nsec3param.iterations;
   2504  1.1  christos 				version->flags = nsec3param.flags;
   2505  1.1  christos 				version->havensec3 = true;
   2506  1.1  christos 				/*
   2507  1.1  christos 				 * Look for a better algorithm than the
   2508  1.1  christos 				 * unknown test algorithm.
   2509  1.1  christos 				 */
   2510  1.1  christos 				if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG) {
   2511  1.1  christos 					goto unlock;
   2512  1.1  christos 				}
   2513  1.1  christos 			}
   2514  1.1  christos 		}
   2515  1.1  christos 	}
   2516  1.1  christos unlock:
   2517  1.1  christos 	NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
   2518  1.1  christos 		    isc_rwlocktype_read);
   2519  1.1  christos 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   2520  1.1  christos }
   2521  1.1  christos 
   2522  1.1  christos static void
   2523  1.1  christos cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
   2524  1.1  christos 	dns_rbtdb_t *rbtdb = event->ev_arg;
   2525  1.1  christos 	bool again = false;
   2526  1.1  christos 	unsigned int locknum;
   2527  1.1  christos 
   2528  1.1  christos 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
   2529  1.1  christos 	for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
   2530  1.1  christos 		NODE_LOCK(&rbtdb->node_locks[locknum].lock,
   2531  1.1  christos 			  isc_rwlocktype_write);
   2532  1.1  christos 		cleanup_dead_nodes(rbtdb, locknum);
   2533  1.1  christos 		if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL) {
   2534  1.1  christos 			again = true;
   2535  1.1  christos 		}
   2536  1.1  christos 		NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
   2537  1.1  christos 			    isc_rwlocktype_write);
   2538  1.1  christos 	}
   2539  1.1  christos 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
   2540  1.1  christos 	if (again) {
   2541  1.1  christos 		isc_task_send(task, &event);
   2542  1.1  christos 	} else {
   2543  1.1  christos 		isc_event_free(&event);
   2544  1.1  christos 		if (isc_refcount_decrement(&rbtdb->references) == 1) {
   2545  1.1  christos 			(void)isc_refcount_current(&rbtdb->references);
   2546  1.1  christos 			maybe_free_rbtdb(rbtdb);
   2547  1.1  christos 		}
   2548  1.1  christos 	}
   2549  1.1  christos }
   2550  1.1  christos 
   2551  1.1  christos static void
   2552  1.1  christos closeversion(dns_db_t *db, dns_dbversion_t **versionp, bool commit) {
   2553  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   2554  1.1  christos 	rbtdb_version_t *version, *cleanup_version, *least_greater;
   2555  1.1  christos 	bool rollback = false;
   2556  1.1  christos 	rbtdb_changedlist_t cleanup_list;
   2557  1.1  christos 	rdatasetheaderlist_t resigned_list;
   2558  1.1  christos 	rbtdb_changed_t *changed, *next_changed;
   2559  1.1  christos 	rbtdb_serial_t serial, least_serial;
   2560  1.1  christos 	dns_rbtnode_t *rbtnode;
   2561  1.1  christos 	rdatasetheader_t *header;
   2562  1.1  christos 
   2563  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   2564  1.1  christos 	version = (rbtdb_version_t *)*versionp;
   2565  1.1  christos 	INSIST(version->rbtdb == rbtdb);
   2566  1.1  christos 
   2567  1.1  christos 	cleanup_version = NULL;
   2568  1.1  christos 	ISC_LIST_INIT(cleanup_list);
   2569  1.1  christos 	ISC_LIST_INIT(resigned_list);
   2570  1.1  christos 
   2571  1.1  christos 	if (isc_refcount_decrement(&version->references) > 1) {
   2572  1.1  christos 		/* typical and easy case first */
   2573  1.1  christos 		if (commit) {
   2574  1.1  christos 			RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
   2575  1.1  christos 			INSIST(!version->writer);
   2576  1.1  christos 			RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
   2577  1.1  christos 		}
   2578  1.1  christos 		goto end;
   2579  1.1  christos 	}
   2580  1.1  christos 
   2581  1.1  christos 	/*
   2582  1.1  christos 	 * Update the zone's secure status in version before making
   2583  1.1  christos 	 * it the current version.
   2584  1.1  christos 	 */
   2585  1.1  christos 	if (version->writer && commit && !IS_CACHE(rbtdb)) {
   2586  1.1  christos 		iszonesecure(db, version, rbtdb->origin_node);
   2587  1.1  christos 	}
   2588  1.1  christos 
   2589  1.1  christos 	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
   2590  1.1  christos 	serial = version->serial;
   2591  1.1  christos 	if (version->writer) {
   2592  1.1  christos 		if (commit) {
   2593  1.1  christos 			unsigned cur_ref;
   2594  1.1  christos 			rbtdb_version_t *cur_version;
   2595  1.1  christos 
   2596  1.1  christos 			INSIST(version->commit_ok);
   2597  1.1  christos 			INSIST(version == rbtdb->future_version);
   2598  1.1  christos 			/*
   2599  1.1  christos 			 * The current version is going to be replaced.
   2600  1.1  christos 			 * Release the (likely last) reference to it from the
   2601  1.1  christos 			 * DB itself and unlink it from the open list.
   2602  1.1  christos 			 */
   2603  1.1  christos 			cur_version = rbtdb->current_version;
   2604  1.1  christos 			cur_ref = isc_refcount_decrement(
   2605  1.1  christos 				&cur_version->references);
   2606  1.1  christos 			if (cur_ref == 1) {
   2607  1.1  christos 				(void)isc_refcount_current(
   2608  1.1  christos 					&cur_version->references);
   2609  1.1  christos 				if (cur_version->serial == rbtdb->least_serial)
   2610  1.1  christos 				{
   2611  1.1  christos 					INSIST(EMPTY(
   2612  1.1  christos 						cur_version->changed_list));
   2613  1.1  christos 				}
   2614  1.1  christos 				UNLINK(rbtdb->open_versions, cur_version, link);
   2615  1.1  christos 			}
   2616  1.1  christos 			if (EMPTY(rbtdb->open_versions)) {
   2617  1.1  christos 				/*
   2618  1.1  christos 				 * We're going to become the least open
   2619  1.1  christos 				 * version.
   2620  1.1  christos 				 */
   2621  1.1  christos 				make_least_version(rbtdb, version,
   2622  1.1  christos 						   &cleanup_list);
   2623  1.1  christos 			} else {
   2624  1.1  christos 				/*
   2625  1.1  christos 				 * Some other open version is the
   2626  1.1  christos 				 * least version.  We can't cleanup
   2627  1.1  christos 				 * records that were changed in this
   2628  1.1  christos 				 * version because the older versions
   2629  1.1  christos 				 * may still be in use by an open
   2630  1.1  christos 				 * version.
   2631  1.1  christos 				 *
   2632  1.1  christos 				 * We can, however, discard the
   2633  1.1  christos 				 * changed records for things that
   2634  1.1  christos 				 * we've added that didn't exist in
   2635  1.1  christos 				 * prior versions.
   2636  1.1  christos 				 */
   2637  1.1  christos 				cleanup_nondirty(version, &cleanup_list);
   2638  1.1  christos 			}
   2639  1.1  christos 			/*
   2640  1.1  christos 			 * If the (soon to be former) current version
   2641  1.1  christos 			 * isn't being used by anyone, we can clean
   2642  1.1  christos 			 * it up.
   2643  1.1  christos 			 */
   2644  1.1  christos 			if (cur_ref == 1) {
   2645  1.1  christos 				cleanup_version = cur_version;
   2646  1.1  christos 				APPENDLIST(version->changed_list,
   2647  1.1  christos 					   cleanup_version->changed_list, link);
   2648  1.1  christos 			}
   2649  1.1  christos 			/*
   2650  1.1  christos 			 * Become the current version.
   2651  1.1  christos 			 */
   2652  1.1  christos 			version->writer = false;
   2653  1.1  christos 			rbtdb->current_version = version;
   2654  1.1  christos 			rbtdb->current_serial = version->serial;
   2655  1.1  christos 			rbtdb->future_version = NULL;
   2656  1.1  christos 
   2657  1.1  christos 			/*
   2658  1.1  christos 			 * Keep the current version in the open list, and
   2659  1.1  christos 			 * gain a reference for the DB itself (see the DB
   2660  1.1  christos 			 * creation function below).  This must be the only
   2661  1.1  christos 			 * case where we need to increment the counter from
   2662  1.1  christos 			 * zero and need to use isc_refcount_increment0().
   2663  1.1  christos 			 */
   2664  1.1  christos 			INSIST(isc_refcount_increment0(&version->references) ==
   2665  1.1  christos 			       0);
   2666  1.1  christos 			PREPEND(rbtdb->open_versions, rbtdb->current_version,
   2667  1.1  christos 				link);
   2668  1.1  christos 			resigned_list = version->resigned_list;
   2669  1.1  christos 			ISC_LIST_INIT(version->resigned_list);
   2670  1.1  christos 		} else {
   2671  1.1  christos 			/*
   2672  1.1  christos 			 * We're rolling back this transaction.
   2673  1.1  christos 			 */
   2674  1.1  christos 			cleanup_list = version->changed_list;
   2675  1.1  christos 			ISC_LIST_INIT(version->changed_list);
   2676  1.1  christos 			resigned_list = version->resigned_list;
   2677  1.1  christos 			ISC_LIST_INIT(version->resigned_list);
   2678  1.1  christos 			rollback = true;
   2679  1.1  christos 			cleanup_version = version;
   2680  1.1  christos 			rbtdb->future_version = NULL;
   2681  1.1  christos 		}
   2682  1.1  christos 	} else {
   2683  1.1  christos 		if (version != rbtdb->current_version) {
   2684  1.1  christos 			/*
   2685  1.1  christos 			 * There are no external or internal references
   2686  1.1  christos 			 * to this version and it can be cleaned up.
   2687  1.1  christos 			 */
   2688  1.1  christos 			cleanup_version = version;
   2689  1.1  christos 
   2690  1.1  christos 			/*
   2691  1.1  christos 			 * Find the version with the least serial
   2692  1.1  christos 			 * number greater than ours.
   2693  1.1  christos 			 */
   2694  1.1  christos 			least_greater = PREV(version, link);
   2695  1.1  christos 			if (least_greater == NULL) {
   2696  1.1  christos 				least_greater = rbtdb->current_version;
   2697  1.1  christos 			}
   2698  1.1  christos 
   2699  1.1  christos 			INSIST(version->serial < least_greater->serial);
   2700  1.1  christos 			/*
   2701  1.1  christos 			 * Is this the least open version?
   2702  1.1  christos 			 */
   2703  1.1  christos 			if (version->serial == rbtdb->least_serial) {
   2704  1.1  christos 				/*
   2705  1.1  christos 				 * Yes.  Install the new least open
   2706  1.1  christos 				 * version.
   2707  1.1  christos 				 */
   2708  1.1  christos 				make_least_version(rbtdb, least_greater,
   2709  1.1  christos 						   &cleanup_list);
   2710  1.1  christos 			} else {
   2711  1.1  christos 				/*
   2712  1.1  christos 				 * Add any unexecuted cleanups to
   2713  1.1  christos 				 * those of the least greater version.
   2714  1.1  christos 				 */
   2715  1.1  christos 				APPENDLIST(least_greater->changed_list,
   2716  1.1  christos 					   version->changed_list, link);
   2717  1.1  christos 			}
   2718  1.1  christos 		} else if (version->serial == rbtdb->least_serial) {
   2719  1.1  christos 			INSIST(EMPTY(version->changed_list));
   2720  1.1  christos 		}
   2721  1.1  christos 		UNLINK(rbtdb->open_versions, version, link);
   2722  1.1  christos 	}
   2723  1.1  christos 	least_serial = rbtdb->least_serial;
   2724  1.1  christos 	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
   2725  1.1  christos 
   2726  1.1  christos 	if (cleanup_version != NULL) {
   2727  1.1  christos 		INSIST(EMPTY(cleanup_version->changed_list));
   2728  1.1  christos 		free_gluetable(cleanup_version);
   2729  1.1  christos 		isc_rwlock_destroy(&cleanup_version->glue_rwlock);
   2730  1.1  christos 		isc_rwlock_destroy(&cleanup_version->rwlock);
   2731  1.1  christos 		isc_mem_put(rbtdb->common.mctx, cleanup_version,
   2732  1.1  christos 			    sizeof(*cleanup_version));
   2733  1.1  christos 	}
   2734  1.1  christos 
   2735  1.1  christos 	/*
   2736  1.1  christos 	 * Commit/rollback re-signed headers.
   2737  1.1  christos 	 */
   2738  1.1  christos 	for (header = HEAD(resigned_list); header != NULL;
   2739  1.1  christos 	     header = HEAD(resigned_list))
   2740  1.1  christos 	{
   2741  1.1  christos 		nodelock_t *lock;
   2742  1.1  christos 
   2743  1.1  christos 		ISC_LIST_UNLINK(resigned_list, header, link);
   2744  1.1  christos 
   2745  1.1  christos 		lock = &rbtdb->node_locks[header->node->locknum].lock;
   2746  1.1  christos 		NODE_LOCK(lock, isc_rwlocktype_write);
   2747  1.1  christos 		if (rollback && !IGNORE(header)) {
   2748  1.1  christos 			resign_insert(rbtdb, header->node->locknum, header);
   2749  1.1  christos 		}
   2750  1.1  christos 		decrement_reference(rbtdb, header->node, least_serial,
   2751  1.1  christos 				    isc_rwlocktype_write, isc_rwlocktype_none,
   2752  1.1  christos 				    false);
   2753  1.1  christos 		NODE_UNLOCK(lock, isc_rwlocktype_write);
   2754  1.1  christos 	}
   2755  1.1  christos 
   2756  1.1  christos 	if (!EMPTY(cleanup_list)) {
   2757  1.1  christos 		isc_event_t *event = NULL;
   2758  1.1  christos 		isc_rwlocktype_t tlock = isc_rwlocktype_none;
   2759  1.1  christos 
   2760  1.1  christos 		if (rbtdb->task != NULL) {
   2761  1.1  christos 			event = isc_event_allocate(rbtdb->common.mctx, NULL,
   2762  1.1  christos 						   DNS_EVENT_RBTDEADNODES,
   2763  1.1  christos 						   cleanup_dead_nodes_callback,
   2764  1.1  christos 						   rbtdb, sizeof(isc_event_t));
   2765  1.1  christos 		}
   2766  1.1  christos 		if (event == NULL) {
   2767  1.1  christos 			/*
   2768  1.1  christos 			 * We acquire a tree write lock here in order to make
   2769  1.1  christos 			 * sure that stale nodes will be removed in
   2770  1.1  christos 			 * decrement_reference().  If we didn't have the lock,
   2771  1.1  christos 			 * those nodes could miss the chance to be removed
   2772  1.1  christos 			 * until the server stops.  The write lock is
   2773  1.1  christos 			 * expensive, but this event should be rare enough
   2774  1.1  christos 			 * to justify the cost.
   2775  1.1  christos 			 */
   2776  1.1  christos 			RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
   2777  1.1  christos 			tlock = isc_rwlocktype_write;
   2778  1.1  christos 		}
   2779  1.1  christos 
   2780  1.1  christos 		for (changed = HEAD(cleanup_list); changed != NULL;
   2781  1.1  christos 		     changed = next_changed)
   2782  1.1  christos 		{
   2783  1.1  christos 			nodelock_t *lock;
   2784  1.1  christos 
   2785  1.1  christos 			next_changed = NEXT(changed, link);
   2786  1.1  christos 			rbtnode = changed->node;
   2787  1.1  christos 			lock = &rbtdb->node_locks[rbtnode->locknum].lock;
   2788  1.1  christos 
   2789  1.1  christos 			NODE_LOCK(lock, isc_rwlocktype_write);
   2790  1.1  christos 			/*
   2791  1.1  christos 			 * This is a good opportunity to purge any dead nodes,
   2792  1.1  christos 			 * so use it.
   2793  1.1  christos 			 */
   2794  1.1  christos 			if (event == NULL) {
   2795  1.1  christos 				cleanup_dead_nodes(rbtdb, rbtnode->locknum);
   2796  1.1  christos 			}
   2797  1.1  christos 
   2798  1.1  christos 			if (rollback) {
   2799  1.1  christos 				rollback_node(rbtnode, serial);
   2800  1.1  christos 			}
   2801  1.1  christos 			decrement_reference(rbtdb, rbtnode, least_serial,
   2802  1.1  christos 					    isc_rwlocktype_write, tlock, false);
   2803  1.1  christos 
   2804  1.1  christos 			NODE_UNLOCK(lock, isc_rwlocktype_write);
   2805  1.1  christos 
   2806  1.1  christos 			isc_mem_put(rbtdb->common.mctx, changed,
   2807  1.1  christos 				    sizeof(*changed));
   2808  1.1  christos 		}
   2809  1.1  christos 		if (event != NULL) {
   2810  1.1  christos 			isc_refcount_increment(&rbtdb->references);
   2811  1.1  christos 			isc_task_send(rbtdb->task, &event);
   2812  1.1  christos 		} else {
   2813  1.1  christos 			RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
   2814  1.1  christos 		}
   2815  1.1  christos 	}
   2816  1.1  christos 
   2817  1.1  christos end:
   2818  1.1  christos 	*versionp = NULL;
   2819  1.1  christos }
   2820  1.1  christos 
   2821  1.1  christos /*
   2822  1.1  christos  * Add the necessary magic for the wildcard name 'name'
   2823  1.1  christos  * to be found in 'rbtdb'.
   2824  1.1  christos  *
   2825  1.1  christos  * In order for wildcard matching to work correctly in
   2826  1.1  christos  * zone_find(), we must ensure that a node for the wildcarding
   2827  1.1  christos  * level exists in the database, and has its 'find_callback'
   2828  1.1  christos  * and 'wild' bits set.
   2829  1.1  christos  *
   2830  1.1  christos  * E.g. if the wildcard name is "*.sub.example." then we
   2831  1.1  christos  * must ensure that "sub.example." exists and is marked as
   2832  1.1  christos  * a wildcard level.
   2833  1.1  christos  *
   2834  1.1  christos  * tree_lock(write) must be held.
   2835  1.1  christos  */
   2836  1.1  christos static isc_result_t
   2837  1.1  christos add_wildcard_magic(dns_rbtdb_t *rbtdb, const dns_name_t *name, bool lock) {
   2838  1.1  christos 	isc_result_t result;
   2839  1.1  christos 	dns_name_t foundname;
   2840  1.1  christos 	dns_offsets_t offsets;
   2841  1.1  christos 	unsigned int n;
   2842  1.1  christos 	dns_rbtnode_t *node = NULL;
   2843  1.1  christos 
   2844  1.1  christos 	dns_name_init(&foundname, offsets);
   2845  1.1  christos 	n = dns_name_countlabels(name);
   2846  1.1  christos 	INSIST(n >= 2);
   2847  1.1  christos 	n--;
   2848  1.1  christos 	dns_name_getlabelsequence(name, 1, n, &foundname);
   2849  1.1  christos 	result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
   2850  1.1  christos 	if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) {
   2851  1.1  christos 		return (result);
   2852  1.1  christos 	}
   2853  1.1  christos 	if (result == ISC_R_SUCCESS) {
   2854  1.1  christos 		node->nsec = DNS_RBT_NSEC_NORMAL;
   2855  1.1  christos 	}
   2856  1.1  christos 	node->find_callback = 1;
   2857  1.1  christos 	if (lock) {
   2858  1.1  christos 		NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
   2859  1.1  christos 			  isc_rwlocktype_write);
   2860  1.1  christos 	}
   2861  1.1  christos 	node->wild = 1;
   2862  1.1  christos 	if (lock) {
   2863  1.1  christos 		NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
   2864  1.1  christos 			    isc_rwlocktype_write);
   2865  1.1  christos 	}
   2866  1.1  christos 	return (ISC_R_SUCCESS);
   2867  1.1  christos }
   2868  1.1  christos 
   2869  1.1  christos /*
   2870  1.1  christos  * tree_lock(write) must be held.
   2871  1.1  christos  */
   2872  1.1  christos static isc_result_t
   2873  1.1  christos add_empty_wildcards(dns_rbtdb_t *rbtdb, const dns_name_t *name, bool lock) {
   2874  1.1  christos 	isc_result_t result;
   2875  1.1  christos 	dns_name_t foundname;
   2876  1.1  christos 	dns_offsets_t offsets;
   2877  1.1  christos 	unsigned int n, l, i;
   2878  1.1  christos 
   2879  1.1  christos 	dns_name_init(&foundname, offsets);
   2880  1.1  christos 	n = dns_name_countlabels(name);
   2881  1.1  christos 	l = dns_name_countlabels(&rbtdb->common.origin);
   2882  1.1  christos 	i = l + 1;
   2883  1.1  christos 	while (i < n) {
   2884  1.1  christos 		dns_rbtnode_t *node = NULL; /* dummy */
   2885  1.1  christos 		dns_name_getlabelsequence(name, n - i, i, &foundname);
   2886  1.1  christos 		if (dns_name_iswildcard(&foundname)) {
   2887  1.1  christos 			result = add_wildcard_magic(rbtdb, &foundname, lock);
   2888  1.1  christos 			if (result != ISC_R_SUCCESS) {
   2889  1.1  christos 				return (result);
   2890  1.1  christos 			}
   2891  1.1  christos 			result = dns_rbt_addnode(rbtdb->tree, &foundname,
   2892  1.1  christos 						 &node);
   2893  1.1  christos 			if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) {
   2894  1.1  christos 				return (result);
   2895  1.1  christos 			}
   2896  1.1  christos 			if (result == ISC_R_SUCCESS) {
   2897  1.1  christos 				node->nsec = DNS_RBT_NSEC_NORMAL;
   2898  1.1  christos 			}
   2899  1.1  christos 		}
   2900  1.1  christos 		i++;
   2901  1.1  christos 	}
   2902  1.1  christos 	return (ISC_R_SUCCESS);
   2903  1.1  christos }
   2904  1.1  christos 
   2905  1.1  christos static isc_result_t
   2906  1.1  christos findnodeintree(dns_rbtdb_t *rbtdb, dns_rbt_t *tree, const dns_name_t *name,
   2907  1.1  christos 	       bool create, dns_dbnode_t **nodep) {
   2908  1.1  christos 	dns_rbtnode_t *node = NULL;
   2909  1.1  christos 	dns_name_t nodename;
   2910  1.1  christos 	isc_result_t result;
   2911  1.1  christos 	isc_rwlocktype_t locktype = isc_rwlocktype_read;
   2912  1.1  christos 
   2913  1.1  christos 	INSIST(tree == rbtdb->tree || tree == rbtdb->nsec3);
   2914  1.1  christos 
   2915  1.1  christos 	dns_name_init(&nodename, NULL);
   2916  1.1  christos 	RWLOCK(&rbtdb->tree_lock, locktype);
   2917  1.1  christos 	result = dns_rbt_findnode(tree, name, NULL, &node, NULL,
   2918  1.1  christos 				  DNS_RBTFIND_EMPTYDATA, NULL, NULL);
   2919  1.1  christos 	if (result != ISC_R_SUCCESS) {
   2920  1.1  christos 		RWUNLOCK(&rbtdb->tree_lock, locktype);
   2921  1.1  christos 		if (!create) {
   2922  1.1  christos 			if (result == DNS_R_PARTIALMATCH) {
   2923  1.1  christos 				result = ISC_R_NOTFOUND;
   2924  1.1  christos 			}
   2925  1.1  christos 			return (result);
   2926  1.1  christos 		}
   2927  1.1  christos 		/*
   2928  1.1  christos 		 * It would be nice to try to upgrade the lock instead of
   2929  1.1  christos 		 * unlocking then relocking.
   2930  1.1  christos 		 */
   2931  1.1  christos 		locktype = isc_rwlocktype_write;
   2932  1.1  christos 		RWLOCK(&rbtdb->tree_lock, locktype);
   2933  1.1  christos 		node = NULL;
   2934  1.1  christos 		result = dns_rbt_addnode(tree, name, &node);
   2935  1.1  christos 		if (result == ISC_R_SUCCESS) {
   2936  1.1  christos 			dns_rbt_namefromnode(node, &nodename);
   2937  1.1  christos 			node->locknum = node->hashval % rbtdb->node_lock_count;
   2938  1.1  christos 			if (tree == rbtdb->tree) {
   2939  1.1  christos 				add_empty_wildcards(rbtdb, name, true);
   2940  1.1  christos 
   2941  1.1  christos 				if (dns_name_iswildcard(name)) {
   2942  1.1  christos 					result = add_wildcard_magic(rbtdb, name,
   2943  1.1  christos 								    true);
   2944  1.1  christos 					if (result != ISC_R_SUCCESS) {
   2945  1.1  christos 						RWUNLOCK(&rbtdb->tree_lock,
   2946  1.1  christos 							 locktype);
   2947  1.1  christos 						return (result);
   2948  1.1  christos 					}
   2949  1.1  christos 				}
   2950  1.1  christos 			}
   2951  1.1  christos 			if (tree == rbtdb->nsec3) {
   2952  1.1  christos 				node->nsec = DNS_RBT_NSEC_NSEC3;
   2953  1.1  christos 			}
   2954  1.1  christos 		} else if (result != ISC_R_EXISTS) {
   2955  1.1  christos 			RWUNLOCK(&rbtdb->tree_lock, locktype);
   2956  1.1  christos 			return (result);
   2957  1.1  christos 		}
   2958  1.1  christos 	}
   2959  1.1  christos 
   2960  1.1  christos 	if (tree == rbtdb->nsec3) {
   2961  1.1  christos 		INSIST(node->nsec == DNS_RBT_NSEC_NSEC3);
   2962  1.1  christos 	}
   2963  1.1  christos 
   2964  1.1  christos 	reactivate_node(rbtdb, node, locktype);
   2965  1.1  christos 
   2966  1.1  christos 	RWUNLOCK(&rbtdb->tree_lock, locktype);
   2967  1.1  christos 
   2968  1.1  christos 	*nodep = (dns_dbnode_t *)node;
   2969  1.1  christos 
   2970  1.1  christos 	return (ISC_R_SUCCESS);
   2971  1.1  christos }
   2972  1.1  christos 
   2973  1.1  christos static isc_result_t
   2974  1.1  christos findnode(dns_db_t *db, const dns_name_t *name, bool create,
   2975  1.1  christos 	 dns_dbnode_t **nodep) {
   2976  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   2977  1.1  christos 
   2978  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   2979  1.1  christos 
   2980  1.1  christos 	return (findnodeintree(rbtdb, rbtdb->tree, name, create, nodep));
   2981  1.1  christos }
   2982  1.1  christos 
   2983  1.1  christos static isc_result_t
   2984  1.1  christos findnsec3node(dns_db_t *db, const dns_name_t *name, bool create,
   2985  1.1  christos 	      dns_dbnode_t **nodep) {
   2986  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   2987  1.1  christos 
   2988  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   2989  1.1  christos 
   2990  1.1  christos 	return (findnodeintree(rbtdb, rbtdb->nsec3, name, create, nodep));
   2991  1.1  christos }
   2992  1.1  christos 
   2993  1.1  christos static isc_result_t
   2994  1.1  christos zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
   2995  1.1  christos 	rbtdb_search_t *search = arg;
   2996  1.1  christos 	rdatasetheader_t *header, *header_next;
   2997  1.1  christos 	rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
   2998  1.1  christos 	rdatasetheader_t *found;
   2999  1.1  christos 	isc_result_t result;
   3000  1.1  christos 	dns_rbtnode_t *onode;
   3001  1.1  christos 
   3002  1.1  christos 	/*
   3003  1.1  christos 	 * We only want to remember the topmost zone cut, since it's the one
   3004  1.1  christos 	 * that counts, so we'll just continue if we've already found a
   3005  1.1  christos 	 * zonecut.
   3006  1.1  christos 	 */
   3007  1.1  christos 	if (search->zonecut != NULL) {
   3008  1.1  christos 		return (DNS_R_CONTINUE);
   3009  1.1  christos 	}
   3010  1.1  christos 
   3011  1.1  christos 	found = NULL;
   3012  1.1  christos 	result = DNS_R_CONTINUE;
   3013  1.1  christos 	onode = search->rbtdb->origin_node;
   3014  1.1  christos 
   3015  1.1  christos 	NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
   3016  1.1  christos 		  isc_rwlocktype_read);
   3017  1.1  christos 
   3018  1.1  christos 	/*
   3019  1.1  christos 	 * Look for an NS or DNAME rdataset active in our version.
   3020  1.1  christos 	 */
   3021  1.1  christos 	ns_header = NULL;
   3022  1.1  christos 	dname_header = NULL;
   3023  1.1  christos 	sigdname_header = NULL;
   3024  1.1  christos 	for (header = node->data; header != NULL; header = header_next) {
   3025  1.1  christos 		header_next = header->next;
   3026  1.1  christos 		if (header->type == dns_rdatatype_ns ||
   3027  1.1  christos 		    header->type == dns_rdatatype_dname ||
   3028  1.1  christos 		    header->type == RBTDB_RDATATYPE_SIGDNAME)
   3029  1.1  christos 		{
   3030  1.1  christos 			do {
   3031  1.1  christos 				if (header->serial <= search->serial &&
   3032  1.1  christos 				    !IGNORE(header))
   3033  1.1  christos 				{
   3034  1.1  christos 					/*
   3035  1.1  christos 					 * Is this a "this rdataset doesn't
   3036  1.1  christos 					 * exist" record?
   3037  1.1  christos 					 */
   3038  1.1  christos 					if (NONEXISTENT(header)) {
   3039  1.1  christos 						header = NULL;
   3040  1.1  christos 					}
   3041  1.1  christos 					break;
   3042  1.1  christos 				} else {
   3043  1.1  christos 					header = header->down;
   3044  1.1  christos 				}
   3045  1.1  christos 			} while (header != NULL);
   3046  1.1  christos 			if (header != NULL) {
   3047  1.1  christos 				if (header->type == dns_rdatatype_dname) {
   3048  1.1  christos 					dname_header = header;
   3049  1.1  christos 				} else if (header->type ==
   3050  1.1  christos 					   RBTDB_RDATATYPE_SIGDNAME)
   3051  1.1  christos 				{
   3052  1.1  christos 					sigdname_header = header;
   3053  1.1  christos 				} else if (node != onode ||
   3054  1.1  christos 					   IS_STUB(search->rbtdb))
   3055  1.1  christos 				{
   3056  1.1  christos 					/*
   3057  1.1  christos 					 * We've found an NS rdataset that
   3058  1.1  christos 					 * isn't at the origin node.  We check
   3059  1.1  christos 					 * that they're not at the origin node,
   3060  1.1  christos 					 * because otherwise we'd erroneously
   3061  1.1  christos 					 * treat the zone top as if it were
   3062  1.1  christos 					 * a delegation.
   3063  1.1  christos 					 */
   3064  1.1  christos 					ns_header = header;
   3065  1.1  christos 				}
   3066  1.1  christos 			}
   3067  1.1  christos 		}
   3068  1.1  christos 	}
   3069  1.1  christos 
   3070  1.1  christos 	/*
   3071  1.1  christos 	 * Did we find anything?
   3072  1.1  christos 	 */
   3073  1.1  christos 	if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) &&
   3074  1.1  christos 	    ns_header != NULL)
   3075  1.1  christos 	{
   3076  1.1  christos 		/*
   3077  1.1  christos 		 * Note that NS has precedence over DNAME if both exist
   3078  1.1  christos 		 * in a zone.  Otherwise DNAME take precedence over NS.
   3079  1.1  christos 		 */
   3080  1.1  christos 		found = ns_header;
   3081  1.1  christos 		search->zonecut_sigrdataset = NULL;
   3082  1.1  christos 	} else if (dname_header != NULL) {
   3083  1.1  christos 		found = dname_header;
   3084  1.1  christos 		search->zonecut_sigrdataset = sigdname_header;
   3085  1.1  christos 	} else if (ns_header != NULL) {
   3086  1.1  christos 		found = ns_header;
   3087  1.1  christos 		search->zonecut_sigrdataset = NULL;
   3088  1.1  christos 	}
   3089  1.1  christos 
   3090  1.1  christos 	if (found != NULL) {
   3091  1.1  christos 		/*
   3092  1.1  christos 		 * We increment the reference count on node to ensure that
   3093  1.1  christos 		 * search->zonecut_rdataset will still be valid later.
   3094  1.1  christos 		 */
   3095  1.1  christos 		new_reference(search->rbtdb, node, isc_rwlocktype_read);
   3096  1.1  christos 		search->zonecut = node;
   3097  1.1  christos 		search->zonecut_rdataset = found;
   3098  1.1  christos 		search->need_cleanup = true;
   3099  1.1  christos 		/*
   3100  1.1  christos 		 * Since we've found a zonecut, anything beneath it is
   3101  1.1  christos 		 * glue and is not subject to wildcard matching, so we
   3102  1.1  christos 		 * may clear search->wild.
   3103  1.1  christos 		 */
   3104  1.1  christos 		search->wild = false;
   3105  1.1  christos 		if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
   3106  1.1  christos 			/*
   3107  1.1  christos 			 * If the caller does not want to find glue, then
   3108  1.1  christos 			 * this is the best answer and the search should
   3109  1.1  christos 			 * stop now.
   3110  1.1  christos 			 */
   3111  1.1  christos 			result = DNS_R_PARTIALMATCH;
   3112  1.1  christos 		} else {
   3113  1.1  christos 			dns_name_t *zcname;
   3114  1.1  christos 
   3115  1.1  christos 			/*
   3116  1.1  christos 			 * The search will continue beneath the zone cut.
   3117  1.1  christos 			 * This may or may not be the best match.  In case it
   3118  1.1  christos 			 * is, we need to remember the node name.
   3119  1.1  christos 			 */
   3120  1.1  christos 			zcname = dns_fixedname_name(&search->zonecut_name);
   3121  1.1  christos 			dns_name_copynf(name, zcname);
   3122  1.1  christos 			search->copy_name = true;
   3123  1.1  christos 		}
   3124  1.1  christos 	} else {
   3125  1.1  christos 		/*
   3126  1.1  christos 		 * There is no zonecut at this node which is active in this
   3127  1.1  christos 		 * version.
   3128  1.1  christos 		 *
   3129  1.1  christos 		 * If this is a "wild" node and the caller hasn't disabled
   3130  1.1  christos 		 * wildcard matching, remember that we've seen a wild node
   3131  1.1  christos 		 * in case we need to go searching for wildcard matches
   3132  1.1  christos 		 * later on.
   3133  1.1  christos 		 */
   3134  1.1  christos 		if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0) {
   3135  1.1  christos 			search->wild = true;
   3136  1.1  christos 		}
   3137  1.1  christos 	}
   3138  1.1  christos 
   3139  1.1  christos 	NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
   3140  1.1  christos 		    isc_rwlocktype_read);
   3141  1.1  christos 
   3142  1.1  christos 	return (result);
   3143  1.1  christos }
   3144  1.1  christos 
   3145  1.1  christos static void
   3146  1.1  christos bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, rdatasetheader_t *header,
   3147  1.1  christos 	      isc_stdtime_t now, isc_rwlocktype_t locktype,
   3148  1.1  christos 	      dns_rdataset_t *rdataset) {
   3149  1.1  christos 	unsigned char *raw; /* RDATASLAB */
   3150  1.1  christos 	bool stale = STALE(header);
   3151  1.1  christos 	bool ancient = ANCIENT(header);
   3152  1.1  christos 
   3153  1.1  christos 	/*
   3154  1.1  christos 	 * Caller must be holding the node reader lock.
   3155  1.1  christos 	 * XXXJT: technically, we need a writer lock, since we'll increment
   3156  1.1  christos 	 * the header count below.  However, since the actual counter value
   3157  1.1  christos 	 * doesn't matter, we prioritize performance here.  (We may want to
   3158  1.1  christos 	 * use atomic increment when available).
   3159  1.1  christos 	 */
   3160  1.1  christos 
   3161  1.1  christos 	if (rdataset == NULL) {
   3162  1.1  christos 		return;
   3163  1.1  christos 	}
   3164  1.1  christos 
   3165  1.1  christos 	new_reference(rbtdb, node, locktype);
   3166  1.1  christos 
   3167  1.1  christos 	INSIST(rdataset->methods == NULL); /* We must be disassociated. */
   3168  1.1  christos 
   3169  1.1  christos 	/*
   3170  1.1  christos 	 * Mark header stale or ancient if the RRset is no longer active.
   3171  1.1  christos 	 */
   3172  1.1  christos 	if (!ACTIVE(header, now)) {
   3173  1.1  christos 		dns_ttl_t stale_ttl = header->rdh_ttl + rbtdb->serve_stale_ttl;
   3174  1.1  christos 		/*
   3175  1.1  christos 		 * If this data is in the stale window keep it and if
   3176  1.1  christos 		 * DNS_DBFIND_STALEOK is not set we tell the caller to
   3177  1.1  christos 		 * skip this record.  We skip the records with ZEROTTL
   3178  1.1  christos 		 * (these records should not be cached anyway).
   3179  1.1  christos 		 */
   3180  1.1  christos 
   3181  1.1  christos 		if (KEEPSTALE(rbtdb) && stale_ttl > now) {
   3182  1.1  christos 			stale = true;
   3183  1.1  christos 		} else {
   3184  1.1  christos 			/*
   3185  1.1  christos 			 * We are not keeping stale, or it is outside the
   3186  1.1  christos 			 * stale window. Mark ancient, i.e. ready for cleanup.
   3187  1.1  christos 			 */
   3188  1.1  christos 			ancient = true;
   3189  1.1  christos 		}
   3190  1.1  christos 	}
   3191  1.1  christos 
   3192  1.1  christos 	rdataset->methods = &rdataset_methods;
   3193  1.1  christos 	rdataset->rdclass = rbtdb->common.rdclass;
   3194  1.1  christos 	rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
   3195  1.1  christos 	rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
   3196  1.1  christos 	rdataset->ttl = header->rdh_ttl - now;
   3197  1.1  christos 	rdataset->trust = header->trust;
   3198  1.1  christos 
   3199  1.1  christos 	if (NEGATIVE(header)) {
   3200  1.1  christos 		rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
   3201  1.1  christos 	}
   3202  1.1  christos 	if (NXDOMAIN(header)) {
   3203  1.1  christos 		rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
   3204  1.1  christos 	}
   3205  1.1  christos 	if (OPTOUT(header)) {
   3206  1.1  christos 		rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
   3207  1.1  christos 	}
   3208  1.1  christos 	if (PREFETCH(header)) {
   3209  1.1  christos 		rdataset->attributes |= DNS_RDATASETATTR_PREFETCH;
   3210  1.1  christos 	}
   3211  1.1  christos 
   3212  1.1  christos 	if (stale && !ancient) {
   3213  1.1  christos 		dns_ttl_t stale_ttl = header->rdh_ttl + rbtdb->serve_stale_ttl;
   3214  1.1  christos 		if (stale_ttl > now) {
   3215  1.1  christos 			rdataset->ttl = stale_ttl - now;
   3216  1.1  christos 		} else {
   3217  1.1  christos 			rdataset->ttl = 0;
   3218  1.1  christos 		}
   3219  1.1  christos 		if (STALE_WINDOW(header)) {
   3220  1.1  christos 			rdataset->attributes |= DNS_RDATASETATTR_STALE_WINDOW;
   3221  1.1  christos 		}
   3222  1.1  christos 		rdataset->attributes |= DNS_RDATASETATTR_STALE;
   3223  1.1  christos 	} else if (IS_CACHE(rbtdb) && !ACTIVE(header, now)) {
   3224  1.1  christos 		rdataset->attributes |= DNS_RDATASETATTR_ANCIENT;
   3225  1.1  christos 		rdataset->ttl = header->rdh_ttl;
   3226  1.1  christos 	}
   3227  1.1  christos 
   3228  1.1  christos 	rdataset->private1 = rbtdb;
   3229  1.1  christos 	rdataset->private2 = node;
   3230  1.1  christos 	raw = (unsigned char *)header + sizeof(*header);
   3231  1.1  christos 	rdataset->private3 = raw;
   3232  1.1  christos 	rdataset->count = atomic_fetch_add_relaxed(&header->count, 1);
   3233  1.1  christos 	if (rdataset->count == UINT32_MAX) {
   3234  1.1  christos 		rdataset->count = 0;
   3235  1.1  christos 	}
   3236  1.1  christos 
   3237  1.1  christos 	/*
   3238  1.1  christos 	 * Reset iterator state.
   3239  1.1  christos 	 */
   3240  1.1  christos 	rdataset->privateuint4 = 0;
   3241  1.1  christos 	rdataset->private5 = NULL;
   3242  1.1  christos 
   3243  1.1  christos 	/*
   3244  1.1  christos 	 * Add noqname proof.
   3245  1.1  christos 	 */
   3246  1.1  christos 	rdataset->private6 = header->noqname;
   3247  1.1  christos 	if (rdataset->private6 != NULL) {
   3248  1.1  christos 		rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
   3249  1.1  christos 	}
   3250  1.1  christos 	rdataset->private7 = header->closest;
   3251  1.1  christos 	if (rdataset->private7 != NULL) {
   3252  1.1  christos 		rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
   3253  1.1  christos 	}
   3254  1.1  christos 
   3255  1.1  christos 	/*
   3256  1.1  christos 	 * Copy out re-signing information.
   3257  1.1  christos 	 */
   3258  1.1  christos 	if (RESIGN(header)) {
   3259  1.1  christos 		rdataset->attributes |= DNS_RDATASETATTR_RESIGN;
   3260  1.1  christos 		rdataset->resign = (header->resign << 1) | header->resign_lsb;
   3261  1.1  christos 	} else {
   3262  1.1  christos 		rdataset->resign = 0;
   3263  1.1  christos 	}
   3264  1.1  christos }
   3265  1.1  christos 
   3266  1.1  christos static isc_result_t
   3267  1.1  christos setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
   3268  1.1  christos 		 dns_name_t *foundname, dns_rdataset_t *rdataset,
   3269  1.1  christos 		 dns_rdataset_t *sigrdataset) {
   3270  1.1  christos 	dns_name_t *zcname;
   3271  1.1  christos 	rbtdb_rdatatype_t type;
   3272  1.1  christos 	dns_rbtnode_t *node;
   3273  1.1  christos 
   3274  1.1  christos 	/*
   3275  1.1  christos 	 * The caller MUST NOT be holding any node locks.
   3276  1.1  christos 	 */
   3277  1.1  christos 
   3278  1.1  christos 	node = search->zonecut;
   3279  1.1  christos 	type = search->zonecut_rdataset->type;
   3280  1.1  christos 
   3281  1.1  christos 	/*
   3282  1.1  christos 	 * If we have to set foundname, we do it before anything else.
   3283  1.1  christos 	 * If we were to set foundname after we had set nodep or bound the
   3284  1.1  christos 	 * rdataset, then we'd have to undo that work if dns_name_copy()
   3285  1.1  christos 	 * failed.  By setting foundname first, there's nothing to undo if
   3286  1.1  christos 	 * we have trouble.
   3287  1.1  christos 	 */
   3288  1.1  christos 	if (foundname != NULL && search->copy_name) {
   3289  1.1  christos 		zcname = dns_fixedname_name(&search->zonecut_name);
   3290  1.1  christos 		dns_name_copynf(zcname, foundname);
   3291  1.1  christos 	}
   3292  1.1  christos 	if (nodep != NULL) {
   3293  1.1  christos 		/*
   3294  1.1  christos 		 * Note that we don't have to increment the node's reference
   3295  1.1  christos 		 * count here because we're going to use the reference we
   3296  1.1  christos 		 * already have in the search block.
   3297  1.1  christos 		 */
   3298  1.1  christos 		*nodep = node;
   3299  1.1  christos 		search->need_cleanup = false;
   3300  1.1  christos 	}
   3301  1.1  christos 	if (rdataset != NULL) {
   3302  1.1  christos 		NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
   3303  1.1  christos 			  isc_rwlocktype_read);
   3304  1.1  christos 		bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
   3305  1.1  christos 			      search->now, isc_rwlocktype_read, rdataset);
   3306  1.1  christos 		if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
   3307  1.1  christos 		{
   3308  1.1  christos 			bind_rdataset(search->rbtdb, node,
   3309  1.1  christos 				      search->zonecut_sigrdataset, search->now,
   3310  1.1  christos 				      isc_rwlocktype_read, sigrdataset);
   3311  1.1  christos 		}
   3312  1.1  christos 		NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
   3313  1.1  christos 			    isc_rwlocktype_read);
   3314  1.1  christos 	}
   3315  1.1  christos 
   3316  1.1  christos 	if (type == dns_rdatatype_dname) {
   3317  1.1  christos 		return (DNS_R_DNAME);
   3318  1.1  christos 	}
   3319  1.1  christos 	return (DNS_R_DELEGATION);
   3320  1.1  christos }
   3321  1.1  christos 
   3322  1.1  christos static bool
   3323  1.1  christos valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
   3324  1.1  christos 	   dns_rbtnode_t *node) {
   3325  1.1  christos 	unsigned char *raw; /* RDATASLAB */
   3326  1.1  christos 	unsigned int count, size;
   3327  1.1  christos 	dns_name_t ns_name;
   3328  1.1  christos 	bool valid = false;
   3329  1.1  christos 	dns_offsets_t offsets;
   3330  1.1  christos 	isc_region_t region;
   3331  1.1  christos 	rdatasetheader_t *header;
   3332  1.1  christos 
   3333  1.1  christos 	/*
   3334  1.1  christos 	 * No additional locking is required.
   3335  1.1  christos 	 */
   3336  1.1  christos 
   3337  1.1  christos 	/*
   3338  1.1  christos 	 * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
   3339  1.1  christos 	 * if it occurs at a zone cut, but is not valid below it.
   3340  1.1  christos 	 */
   3341  1.1  christos 	if (type == dns_rdatatype_ns) {
   3342  1.1  christos 		if (node != search->zonecut) {
   3343  1.1  christos 			return (false);
   3344  1.1  christos 		}
   3345  1.1  christos 	} else if (type != dns_rdatatype_a && type != dns_rdatatype_aaaa &&
   3346  1.1  christos 		   type != dns_rdatatype_a6)
   3347  1.1  christos 	{
   3348  1.1  christos 		return (false);
   3349  1.1  christos 	}
   3350  1.1  christos 
   3351  1.1  christos 	header = search->zonecut_rdataset;
   3352  1.1  christos 	raw = (unsigned char *)header + sizeof(*header);
   3353  1.1  christos 	count = raw[0] * 256 + raw[1];
   3354  1.1  christos 	raw += DNS_RDATASET_COUNT + DNS_RDATASET_LENGTH;
   3355  1.1  christos 
   3356  1.1  christos 	while (count > 0) {
   3357  1.1  christos 		count--;
   3358  1.1  christos 		size = raw[0] * 256 + raw[1];
   3359  1.1  christos 		raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH;
   3360  1.1  christos 		region.base = raw;
   3361  1.1  christos 		region.length = size;
   3362  1.1  christos 		raw += size;
   3363  1.1  christos 		/*
   3364  1.1  christos 		 * XXX Until we have rdata structures, we have no choice but
   3365  1.1  christos 		 * to directly access the rdata format.
   3366  1.1  christos 		 */
   3367  1.1  christos 		dns_name_init(&ns_name, offsets);
   3368  1.1  christos 		dns_name_fromregion(&ns_name, &region);
   3369  1.1  christos 		if (dns_name_compare(&ns_name, name) == 0) {
   3370  1.1  christos 			valid = true;
   3371  1.1  christos 			break;
   3372  1.1  christos 		}
   3373  1.1  christos 	}
   3374  1.1  christos 
   3375  1.1  christos 	return (valid);
   3376  1.1  christos }
   3377  1.1  christos 
   3378  1.1  christos static bool
   3379  1.1  christos activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
   3380  1.1  christos 	    const dns_name_t *name) {
   3381  1.1  christos 	dns_fixedname_t fnext;
   3382  1.1  christos 	dns_fixedname_t forigin;
   3383  1.1  christos 	dns_name_t *next;
   3384  1.1  christos 	dns_name_t *origin;
   3385  1.1  christos 	dns_name_t prefix;
   3386  1.1  christos 	dns_rbtdb_t *rbtdb;
   3387  1.1  christos 	dns_rbtnode_t *node;
   3388  1.1  christos 	isc_result_t result;
   3389  1.1  christos 	bool answer = false;
   3390  1.1  christos 	rdatasetheader_t *header;
   3391  1.1  christos 
   3392  1.1  christos 	rbtdb = search->rbtdb;
   3393  1.1  christos 
   3394  1.1  christos 	dns_name_init(&prefix, NULL);
   3395  1.1  christos 	next = dns_fixedname_initname(&fnext);
   3396  1.1  christos 	origin = dns_fixedname_initname(&forigin);
   3397  1.1  christos 
   3398  1.1  christos 	result = dns_rbtnodechain_next(chain, NULL, NULL);
   3399  1.1  christos 	while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
   3400  1.1  christos 		node = NULL;
   3401  1.1  christos 		result = dns_rbtnodechain_current(chain, &prefix, origin,
   3402  1.1  christos 						  &node);
   3403  1.1  christos 		if (result != ISC_R_SUCCESS) {
   3404  1.1  christos 			break;
   3405  1.1  christos 		}
   3406  1.1  christos 		NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
   3407  1.1  christos 			  isc_rwlocktype_read);
   3408  1.1  christos 		for (header = node->data; header != NULL; header = header->next)
   3409  1.1  christos 		{
   3410  1.1  christos 			if (header->serial <= search->serial &&
   3411  1.1  christos 			    !IGNORE(header) && EXISTS(header))
   3412  1.1  christos 			{
   3413  1.1  christos 				break;
   3414  1.1  christos 			}
   3415  1.1  christos 		}
   3416  1.1  christos 		NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
   3417  1.1  christos 			    isc_rwlocktype_read);
   3418  1.1  christos 		if (header != NULL) {
   3419  1.1  christos 			break;
   3420  1.1  christos 		}
   3421  1.1  christos 		result = dns_rbtnodechain_next(chain, NULL, NULL);
   3422  1.1  christos 	}
   3423  1.1  christos 	if (result == ISC_R_SUCCESS) {
   3424  1.1  christos 		result = dns_name_concatenate(&prefix, origin, next, NULL);
   3425  1.1  christos 	}
   3426  1.1  christos 	if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name)) {
   3427  1.1  christos 		answer = true;
   3428  1.1  christos 	}
   3429  1.1  christos 	return (answer);
   3430  1.1  christos }
   3431  1.1  christos 
   3432  1.1  christos static bool
   3433  1.1  christos activeemptynode(rbtdb_search_t *search, const dns_name_t *qname,
   3434  1.1  christos 		dns_name_t *wname) {
   3435  1.1  christos 	dns_fixedname_t fnext;
   3436  1.1  christos 	dns_fixedname_t forigin;
   3437  1.1  christos 	dns_fixedname_t fprev;
   3438  1.1  christos 	dns_name_t *next;
   3439  1.1  christos 	dns_name_t *origin;
   3440  1.1  christos 	dns_name_t *prev;
   3441  1.1  christos 	dns_name_t name;
   3442  1.1  christos 	dns_name_t rname;
   3443  1.1  christos 	dns_name_t tname;
   3444  1.1  christos 	dns_rbtdb_t *rbtdb;
   3445  1.1  christos 	dns_rbtnode_t *node;
   3446  1.1  christos 	dns_rbtnodechain_t chain;
   3447  1.1  christos 	bool check_next = true;
   3448  1.1  christos 	bool check_prev = true;
   3449  1.1  christos 	bool answer = false;
   3450  1.1  christos 	isc_result_t result;
   3451  1.1  christos 	rdatasetheader_t *header;
   3452  1.1  christos 	unsigned int n;
   3453  1.1  christos 
   3454  1.1  christos 	rbtdb = search->rbtdb;
   3455  1.1  christos 
   3456  1.1  christos 	dns_name_init(&name, NULL);
   3457  1.1  christos 	dns_name_init(&tname, NULL);
   3458  1.1  christos 	dns_name_init(&rname, NULL);
   3459  1.1  christos 	next = dns_fixedname_initname(&fnext);
   3460  1.1  christos 	prev = dns_fixedname_initname(&fprev);
   3461  1.1  christos 	origin = dns_fixedname_initname(&forigin);
   3462  1.1  christos 
   3463  1.1  christos 	/*
   3464  1.1  christos 	 * Find if qname is at or below a empty node.
   3465  1.1  christos 	 * Use our own copy of the chain.
   3466  1.1  christos 	 */
   3467  1.1  christos 
   3468  1.1  christos 	chain = search->chain;
   3469  1.1  christos 	do {
   3470  1.1  christos 		node = NULL;
   3471  1.1  christos 		result = dns_rbtnodechain_current(&chain, &name, origin, &node);
   3472  1.1  christos 		if (result != ISC_R_SUCCESS) {
   3473  1.1  christos 			break;
   3474  1.1  christos 		}
   3475  1.1  christos 		NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
   3476  1.1  christos 			  isc_rwlocktype_read);
   3477  1.1  christos 		for (header = node->data; header != NULL; header = header->next)
   3478  1.1  christos 		{
   3479  1.1  christos 			if (header->serial <= search->serial &&
   3480  1.1  christos 			    !IGNORE(header) && EXISTS(header))
   3481  1.1  christos 			{
   3482  1.1  christos 				break;
   3483  1.1  christos 			}
   3484  1.1  christos 		}
   3485  1.1  christos 		NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
   3486  1.1  christos 			    isc_rwlocktype_read);
   3487  1.1  christos 		if (header != NULL) {
   3488  1.1  christos 			break;
   3489  1.1  christos 		}
   3490  1.1  christos 		result = dns_rbtnodechain_prev(&chain, NULL, NULL);
   3491  1.1  christos 	} while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
   3492  1.1  christos 	if (result == ISC_R_SUCCESS) {
   3493  1.1  christos 		result = dns_name_concatenate(&name, origin, prev, NULL);
   3494  1.1  christos 	}
   3495  1.1  christos 	if (result != ISC_R_SUCCESS) {
   3496  1.1  christos 		check_prev = false;
   3497  1.1  christos 	}
   3498  1.1  christos 
   3499  1.1  christos 	result = dns_rbtnodechain_next(&chain, NULL, NULL);
   3500  1.1  christos 	while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
   3501  1.1  christos 		node = NULL;
   3502  1.1  christos 		result = dns_rbtnodechain_current(&chain, &name, origin, &node);
   3503  1.1  christos 		if (result != ISC_R_SUCCESS) {
   3504  1.1  christos 			break;
   3505  1.1  christos 		}
   3506  1.1  christos 		NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
   3507  1.1  christos 			  isc_rwlocktype_read);
   3508  1.1  christos 		for (header = node->data; header != NULL; header = header->next)
   3509  1.1  christos 		{
   3510  1.1  christos 			if (header->serial <= search->serial &&
   3511  1.1  christos 			    !IGNORE(header) && EXISTS(header))
   3512  1.1  christos 			{
   3513  1.1  christos 				break;
   3514  1.1  christos 			}
   3515  1.1  christos 		}
   3516  1.1  christos 		NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
   3517  1.1  christos 			    isc_rwlocktype_read);
   3518  1.1  christos 		if (header != NULL) {
   3519  1.1  christos 			break;
   3520  1.1  christos 		}
   3521  1.1  christos 		result = dns_rbtnodechain_next(&chain, NULL, NULL);
   3522  1.1  christos 	}
   3523  1.1  christos 	if (result == ISC_R_SUCCESS) {
   3524  1.1  christos 		result = dns_name_concatenate(&name, origin, next, NULL);
   3525  1.1  christos 	}
   3526  1.1  christos 	if (result != ISC_R_SUCCESS) {
   3527  1.1  christos 		check_next = false;
   3528  1.1  christos 	}
   3529  1.1  christos 
   3530  1.1  christos 	dns_name_clone(qname, &rname);
   3531  1.1  christos 
   3532  1.1  christos 	/*
   3533  1.1  christos 	 * Remove the wildcard label to find the terminal name.
   3534  1.1  christos 	 */
   3535  1.1  christos 	n = dns_name_countlabels(wname);
   3536  1.1  christos 	dns_name_getlabelsequence(wname, 1, n - 1, &tname);
   3537  1.1  christos 
   3538  1.1  christos 	do {
   3539  1.1  christos 		if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
   3540  1.1  christos 		    (check_next && dns_name_issubdomain(next, &rname)))
   3541  1.1  christos 		{
   3542  1.1  christos 			answer = true;
   3543  1.1  christos 			break;
   3544  1.1  christos 		}
   3545  1.1  christos 		/*
   3546  1.1  christos 		 * Remove the left hand label.
   3547  1.1  christos 		 */
   3548  1.1  christos 		n = dns_name_countlabels(&rname);
   3549  1.1  christos 		dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
   3550  1.1  christos 	} while (!dns_name_equal(&rname, &tname));
   3551  1.1  christos 	return (answer);
   3552  1.1  christos }
   3553  1.1  christos 
   3554  1.1  christos static isc_result_t
   3555  1.1  christos find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
   3556  1.1  christos 	      const dns_name_t *qname) {
   3557  1.1  christos 	unsigned int i, j;
   3558  1.1  christos 	dns_rbtnode_t *node, *level_node, *wnode;
   3559  1.1  christos 	rdatasetheader_t *header;
   3560  1.1  christos 	isc_result_t result = ISC_R_NOTFOUND;
   3561  1.1  christos 	dns_name_t name;
   3562  1.1  christos 	dns_name_t *wname;
   3563  1.1  christos 	dns_fixedname_t fwname;
   3564  1.1  christos 	dns_rbtdb_t *rbtdb;
   3565  1.1  christos 	bool done, wild, active;
   3566  1.1  christos 	dns_rbtnodechain_t wchain;
   3567  1.1  christos 
   3568  1.1  christos 	/*
   3569  1.1  christos 	 * Caller must be holding the tree lock and MUST NOT be holding
   3570  1.1  christos 	 * any node locks.
   3571  1.1  christos 	 */
   3572  1.1  christos 
   3573  1.1  christos 	/*
   3574  1.1  christos 	 * Examine each ancestor level.  If the level's wild bit
   3575  1.1  christos 	 * is set, then construct the corresponding wildcard name and
   3576  1.1  christos 	 * search for it.  If the wildcard node exists, and is active in
   3577  1.1  christos 	 * this version, we're done.  If not, then we next check to see
   3578  1.1  christos 	 * if the ancestor is active in this version.  If so, then there
   3579  1.1  christos 	 * can be no possible wildcard match and again we're done.  If not,
   3580  1.1  christos 	 * continue the search.
   3581  1.1  christos 	 */
   3582  1.1  christos 
   3583  1.1  christos 	rbtdb = search->rbtdb;
   3584  1.1  christos 	i = search->chain.level_matches;
   3585  1.1  christos 	done = false;
   3586  1.1  christos 	node = *nodep;
   3587  1.1  christos 	do {
   3588  1.1  christos 		NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
   3589  1.1  christos 			  isc_rwlocktype_read);
   3590  1.1  christos 
   3591  1.1  christos 		/*
   3592  1.1  christos 		 * First we try to figure out if this node is active in
   3593  1.1  christos 		 * the search's version.  We do this now, even though we
   3594  1.1  christos 		 * may not need the information, because it simplifies the
   3595  1.1  christos 		 * locking and code flow.
   3596  1.1  christos 		 */
   3597  1.1  christos 		for (header = node->data; header != NULL; header = header->next)
   3598  1.1  christos 		{
   3599  1.1  christos 			if (header->serial <= search->serial &&
   3600  1.1  christos 			    !IGNORE(header) && EXISTS(header) &&
   3601  1.1  christos 			    !ANCIENT(header))
   3602  1.1  christos 			{
   3603  1.1  christos 				break;
   3604  1.1  christos 			}
   3605  1.1  christos 		}
   3606  1.1  christos 		if (header != NULL) {
   3607  1.1  christos 			active = true;
   3608  1.1  christos 		} else {
   3609  1.1  christos 			active = false;
   3610  1.1  christos 		}
   3611  1.1  christos 
   3612  1.1  christos 		if (node->wild) {
   3613  1.1  christos 			wild = true;
   3614  1.1  christos 		} else {
   3615  1.1  christos 			wild = false;
   3616  1.1  christos 		}
   3617  1.1  christos 
   3618  1.1  christos 		NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
   3619  1.1  christos 			    isc_rwlocktype_read);
   3620  1.1  christos 
   3621  1.1  christos 		if (wild) {
   3622  1.1  christos 			/*
   3623  1.1  christos 			 * Construct the wildcard name for this level.
   3624  1.1  christos 			 */
   3625  1.1  christos 			dns_name_init(&name, NULL);
   3626  1.1  christos 			dns_rbt_namefromnode(node, &name);
   3627  1.1  christos 			wname = dns_fixedname_initname(&fwname);
   3628  1.1  christos 			result = dns_name_concatenate(dns_wildcardname, &name,
   3629  1.1  christos 						      wname, NULL);
   3630  1.1  christos 			j = i;
   3631  1.1  christos 			while (result == ISC_R_SUCCESS && j != 0) {
   3632  1.1  christos 				j--;
   3633  1.1  christos 				level_node = search->chain.levels[j];
   3634  1.1  christos 				dns_name_init(&name, NULL);
   3635  1.1  christos 				dns_rbt_namefromnode(level_node, &name);
   3636  1.1  christos 				result = dns_name_concatenate(wname, &name,
   3637  1.1  christos 							      wname, NULL);
   3638  1.1  christos 			}
   3639  1.1  christos 			if (result != ISC_R_SUCCESS) {
   3640  1.1  christos 				break;
   3641  1.1  christos 			}
   3642  1.1  christos 
   3643  1.1  christos 			wnode = NULL;
   3644  1.1  christos 			dns_rbtnodechain_init(&wchain);
   3645  1.1  christos 			result = dns_rbt_findnode(
   3646  1.1  christos 				rbtdb->tree, wname, NULL, &wnode, &wchain,
   3647  1.1  christos 				DNS_RBTFIND_EMPTYDATA, NULL, NULL);
   3648  1.1  christos 			if (result == ISC_R_SUCCESS) {
   3649  1.1  christos 				nodelock_t *lock;
   3650  1.1  christos 
   3651  1.1  christos 				/*
   3652  1.1  christos 				 * We have found the wildcard node.  If it
   3653  1.1  christos 				 * is active in the search's version, we're
   3654  1.1  christos 				 * done.
   3655  1.1  christos 				 */
   3656  1.1  christos 				lock = &rbtdb->node_locks[wnode->locknum].lock;
   3657  1.1  christos 				NODE_LOCK(lock, isc_rwlocktype_read);
   3658  1.1  christos 				for (header = wnode->data; header != NULL;
   3659  1.1  christos 				     header = header->next)
   3660  1.1  christos 				{
   3661  1.1  christos 					if (header->serial <= search->serial &&
   3662  1.1  christos 					    !IGNORE(header) && EXISTS(header) &&
   3663  1.1  christos 					    !ANCIENT(header))
   3664  1.1  christos 					{
   3665  1.1  christos 						break;
   3666  1.1  christos 					}
   3667  1.1  christos 				}
   3668  1.1  christos 				NODE_UNLOCK(lock, isc_rwlocktype_read);
   3669  1.1  christos 				if (header != NULL ||
   3670  1.1  christos 				    activeempty(search, &wchain, wname))
   3671  1.1  christos 				{
   3672  1.1  christos 					if (activeemptynode(search, qname,
   3673  1.1  christos 							    wname))
   3674  1.1  christos 					{
   3675  1.1  christos 						return (ISC_R_NOTFOUND);
   3676  1.1  christos 					}
   3677  1.1  christos 					/*
   3678  1.1  christos 					 * The wildcard node is active!
   3679  1.1  christos 					 *
   3680  1.1  christos 					 * Note: result is still ISC_R_SUCCESS
   3681  1.1  christos 					 * so we don't have to set it.
   3682  1.1  christos 					 */
   3683  1.1  christos 					*nodep = wnode;
   3684  1.1  christos 					break;
   3685  1.1  christos 				}
   3686  1.1  christos 			} else if (result != ISC_R_NOTFOUND &&
   3687  1.1  christos 				   result != DNS_R_PARTIALMATCH)
   3688  1.1  christos 			{
   3689  1.1  christos 				/*
   3690  1.1  christos 				 * An error has occurred.  Bail out.
   3691  1.1  christos 				 */
   3692  1.1  christos 				break;
   3693  1.1  christos 			}
   3694  1.1  christos 		}
   3695  1.1  christos 
   3696  1.1  christos 		if (active) {
   3697  1.1  christos 			/*
   3698  1.1  christos 			 * The level node is active.  Any wildcarding
   3699  1.1  christos 			 * present at higher levels has no
   3700  1.1  christos 			 * effect and we're done.
   3701  1.1  christos 			 */
   3702  1.1  christos 			result = ISC_R_NOTFOUND;
   3703  1.1  christos 			break;
   3704  1.1  christos 		}
   3705  1.1  christos 
   3706  1.1  christos 		if (i > 0) {
   3707  1.1  christos 			i--;
   3708  1.1  christos 			node = search->chain.levels[i];
   3709  1.1  christos 		} else {
   3710  1.1  christos 			done = true;
   3711  1.1  christos 		}
   3712  1.1  christos 	} while (!done);
   3713  1.1  christos 
   3714  1.1  christos 	return (result);
   3715  1.1  christos }
   3716  1.1  christos 
   3717  1.1  christos static bool
   3718  1.1  christos matchparams(rdatasetheader_t *header, rbtdb_search_t *search) {
   3719  1.1  christos 	dns_rdata_t rdata = DNS_RDATA_INIT;
   3720  1.1  christos 	dns_rdata_nsec3_t nsec3;
   3721  1.1  christos 	unsigned char *raw; /* RDATASLAB */
   3722  1.1  christos 	unsigned int rdlen, count;
   3723  1.1  christos 	isc_region_t region;
   3724  1.1  christos 	isc_result_t result;
   3725  1.1  christos 
   3726  1.1  christos 	REQUIRE(header->type == dns_rdatatype_nsec3);
   3727  1.1  christos 
   3728  1.1  christos 	raw = (unsigned char *)header + sizeof(*header);
   3729  1.1  christos 	count = raw[0] * 256 + raw[1]; /* count */
   3730  1.1  christos 	raw += DNS_RDATASET_COUNT + DNS_RDATASET_LENGTH;
   3731  1.1  christos 
   3732  1.1  christos 	while (count-- > 0) {
   3733  1.1  christos 		rdlen = raw[0] * 256 + raw[1];
   3734  1.1  christos 		raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH;
   3735  1.1  christos 		region.base = raw;
   3736  1.1  christos 		region.length = rdlen;
   3737  1.1  christos 		dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
   3738  1.1  christos 				     dns_rdatatype_nsec3, &region);
   3739  1.1  christos 		raw += rdlen;
   3740  1.1  christos 		result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
   3741  1.1  christos 		INSIST(result == ISC_R_SUCCESS);
   3742  1.1  christos 		if (nsec3.hash == search->rbtversion->hash &&
   3743  1.1  christos 		    nsec3.iterations == search->rbtversion->iterations &&
   3744  1.1  christos 		    nsec3.salt_length == search->rbtversion->salt_length &&
   3745  1.1  christos 		    memcmp(nsec3.salt, search->rbtversion->salt,
   3746  1.1  christos 			   nsec3.salt_length) == 0)
   3747  1.1  christos 		{
   3748  1.1  christos 			return (true);
   3749  1.1  christos 		}
   3750  1.1  christos 		dns_rdata_reset(&rdata);
   3751  1.1  christos 	}
   3752  1.1  christos 	return (false);
   3753  1.1  christos }
   3754  1.1  christos 
   3755  1.1  christos /*
   3756  1.1  christos  * Find node of the NSEC/NSEC3 record that is 'name'.
   3757  1.1  christos  */
   3758  1.1  christos static isc_result_t
   3759  1.1  christos previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search,
   3760  1.1  christos 		      dns_name_t *name, dns_name_t *origin,
   3761  1.1  christos 		      dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain,
   3762  1.1  christos 		      bool *firstp) {
   3763  1.1  christos 	dns_fixedname_t ftarget;
   3764  1.1  christos 	dns_name_t *target;
   3765  1.1  christos 	dns_rbtnode_t *nsecnode;
   3766  1.1  christos 	isc_result_t result;
   3767  1.1  christos 
   3768  1.1  christos 	REQUIRE(nodep != NULL && *nodep == NULL);
   3769  1.1  christos 	REQUIRE(type == dns_rdatatype_nsec3 || firstp != NULL);
   3770  1.1  christos 
   3771  1.1  christos 	if (type == dns_rdatatype_nsec3) {
   3772  1.1  christos 		result = dns_rbtnodechain_prev(&search->chain, NULL, NULL);
   3773  1.1  christos 		if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN) {
   3774  1.1  christos 			return (result);
   3775  1.1  christos 		}
   3776  1.1  christos 		result = dns_rbtnodechain_current(&search->chain, name, origin,
   3777  1.1  christos 						  nodep);
   3778  1.1  christos 		return (result);
   3779  1.1  christos 	}
   3780  1.1  christos 
   3781  1.1  christos 	target = dns_fixedname_initname(&ftarget);
   3782  1.1  christos 
   3783  1.1  christos 	for (;;) {
   3784  1.1  christos 		if (*firstp) {
   3785  1.1  christos 			/*
   3786  1.1  christos 			 * Construct the name of the second node to check.
   3787  1.1  christos 			 * It is the first node sought in the NSEC tree.
   3788  1.1  christos 			 */
   3789  1.1  christos 			*firstp = false;
   3790  1.1  christos 			dns_rbtnodechain_init(nsecchain);
   3791  1.1  christos 			result = dns_name_concatenate(name, origin, target,
   3792  1.1  christos 						      NULL);
   3793  1.1  christos 			if (result != ISC_R_SUCCESS) {
   3794  1.1  christos 				return (result);
   3795  1.1  christos 			}
   3796  1.1  christos 			nsecnode = NULL;
   3797  1.1  christos 			result = dns_rbt_findnode(
   3798  1.1  christos 				search->rbtdb->nsec, target, NULL, &nsecnode,
   3799  1.1  christos 				nsecchain, DNS_RBTFIND_EMPTYDATA, NULL, NULL);
   3800  1.1  christos 			if (result == ISC_R_SUCCESS) {
   3801  1.1  christos 				/*
   3802  1.1  christos 				 * Since this was the first loop, finding the
   3803  1.1  christos 				 * name in the NSEC tree implies that the first
   3804  1.1  christos 				 * node checked in the main tree had an
   3805  1.1  christos 				 * unacceptable NSEC record.
   3806  1.1  christos 				 * Try the previous node in the NSEC tree.
   3807  1.1  christos 				 */
   3808  1.1  christos 				result = dns_rbtnodechain_prev(nsecchain, name,
   3809  1.1  christos 							       origin);
   3810  1.1  christos 				if (result == DNS_R_NEWORIGIN) {
   3811  1.1  christos 					result = ISC_R_SUCCESS;
   3812  1.1  christos 				}
   3813  1.1  christos 			} else if (result == ISC_R_NOTFOUND ||
   3814  1.1  christos 				   result == DNS_R_PARTIALMATCH)
   3815  1.1  christos 			{
   3816  1.1  christos 				result = dns_rbtnodechain_current(
   3817  1.1  christos 					nsecchain, name, origin, NULL);
   3818  1.1  christos 				if (result == ISC_R_NOTFOUND) {
   3819  1.1  christos 					result = ISC_R_NOMORE;
   3820  1.1  christos 				}
   3821  1.1  christos 			}
   3822  1.1  christos 		} else {
   3823  1.1  christos 			/*
   3824  1.1  christos 			 * This is a second or later trip through the auxiliary
   3825  1.1  christos 			 * tree for the name of a third or earlier NSEC node in
   3826  1.1  christos 			 * the main tree.  Previous trips through the NSEC tree
   3827  1.1  christos 			 * must have found nodes in the main tree with NSEC
   3828  1.1  christos 			 * records.  Perhaps they lacked signature records.
   3829  1.1  christos 			 */
   3830  1.1  christos 			result = dns_rbtnodechain_prev(nsecchain, name, origin);
   3831  1.1  christos 			if (result == DNS_R_NEWORIGIN) {
   3832  1.1  christos 				result = ISC_R_SUCCESS;
   3833  1.1  christos 			}
   3834  1.1  christos 		}
   3835  1.1  christos 		if (result != ISC_R_SUCCESS) {
   3836  1.1  christos 			return (result);
   3837  1.1  christos 		}
   3838  1.1  christos 
   3839  1.1  christos 		/*
   3840  1.1  christos 		 * Construct the name to seek in the main tree.
   3841  1.1  christos 		 */
   3842  1.1  christos 		result = dns_name_concatenate(name, origin, target, NULL);
   3843  1.1  christos 		if (result != ISC_R_SUCCESS) {
   3844  1.1  christos 			return (result);
   3845  1.1  christos 		}
   3846  1.1  christos 
   3847  1.1  christos 		*nodep = NULL;
   3848  1.1  christos 		result = dns_rbt_findnode(search->rbtdb->tree, target, NULL,
   3849  1.1  christos 					  nodep, &search->chain,
   3850  1.1  christos 					  DNS_RBTFIND_EMPTYDATA, NULL, NULL);
   3851  1.1  christos 		if (result == ISC_R_SUCCESS) {
   3852  1.1  christos 			return (result);
   3853  1.1  christos 		}
   3854  1.1  christos 
   3855  1.1  christos 		/*
   3856  1.1  christos 		 * There should always be a node in the main tree with the
   3857  1.1  christos 		 * same name as the node in the auxiliary NSEC tree, except for
   3858  1.1  christos 		 * nodes in the auxiliary tree that are awaiting deletion.
   3859  1.1  christos 		 */
   3860  1.1  christos 		if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) {
   3861  1.1  christos 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
   3862  1.1  christos 				      DNS_LOGMODULE_CACHE, ISC_LOG_ERROR,
   3863  1.1  christos 				      "previous_closest_nsec(): %s",
   3864  1.1  christos 				      isc_result_totext(result));
   3865  1.1  christos 			return (DNS_R_BADDB);
   3866  1.1  christos 		}
   3867  1.1  christos 	}
   3868  1.1  christos }
   3869  1.1  christos 
   3870  1.1  christos /*
   3871  1.1  christos  * Find the NSEC/NSEC3 which is or before the current point on the
   3872  1.1  christos  * search chain.  For NSEC3 records only NSEC3 records that match the
   3873  1.1  christos  * current NSEC3PARAM record are considered.
   3874  1.1  christos  */
   3875  1.1  christos static isc_result_t
   3876  1.1  christos find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
   3877  1.1  christos 		  dns_name_t *foundname, dns_rdataset_t *rdataset,
   3878  1.1  christos 		  dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
   3879  1.1  christos 		  dns_db_secure_t secure) {
   3880  1.1  christos 	dns_rbtnode_t *node, *prevnode;
   3881  1.1  christos 	rdatasetheader_t *header, *header_next, *found, *foundsig;
   3882  1.1  christos 	dns_rbtnodechain_t nsecchain;
   3883  1.1  christos 	bool empty_node;
   3884  1.1  christos 	isc_result_t result;
   3885  1.1  christos 	dns_fixedname_t fname, forigin;
   3886  1.1  christos 	dns_name_t *name, *origin;
   3887  1.1  christos 	dns_rdatatype_t type;
   3888  1.1  christos 	rbtdb_rdatatype_t sigtype;
   3889  1.1  christos 	bool wraps;
   3890  1.1  christos 	bool first = true;
   3891  1.1  christos 	bool need_sig = (secure == dns_db_secure);
   3892  1.1  christos 
   3893  1.1  christos 	if (tree == search->rbtdb->nsec3) {
   3894  1.1  christos 		type = dns_rdatatype_nsec3;
   3895  1.1  christos 		sigtype = RBTDB_RDATATYPE_SIGNSEC3;
   3896  1.1  christos 		wraps = true;
   3897  1.1  christos 	} else {
   3898  1.1  christos 		type = dns_rdatatype_nsec;
   3899  1.1  christos 		sigtype = RBTDB_RDATATYPE_SIGNSEC;
   3900  1.1  christos 		wraps = false;
   3901  1.1  christos 	}
   3902  1.1  christos 
   3903  1.1  christos 	/*
   3904  1.1  christos 	 * Use the auxiliary tree only starting with the second node in the
   3905  1.1  christos 	 * hope that the original node will be right much of the time.
   3906  1.1  christos 	 */
   3907  1.1  christos 	name = dns_fixedname_initname(&fname);
   3908  1.1  christos 	origin = dns_fixedname_initname(&forigin);
   3909  1.1  christos again:
   3910  1.1  christos 	node = NULL;
   3911  1.1  christos 	prevnode = NULL;
   3912  1.1  christos 	result = dns_rbtnodechain_current(&search->chain, name, origin, &node);
   3913  1.1  christos 	if (result != ISC_R_SUCCESS) {
   3914  1.1  christos 		return (result);
   3915  1.1  christos 	}
   3916  1.1  christos 	do {
   3917  1.1  christos 		NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
   3918  1.1  christos 			  isc_rwlocktype_read);
   3919  1.1  christos 		found = NULL;
   3920  1.1  christos 		foundsig = NULL;
   3921  1.1  christos 		empty_node = true;
   3922  1.1  christos 		for (header = node->data; header != NULL; header = header_next)
   3923  1.1  christos 		{
   3924  1.1  christos 			header_next = header->next;
   3925  1.1  christos 			/*
   3926  1.1  christos 			 * Look for an active, extant NSEC or RRSIG NSEC.
   3927  1.1  christos 			 */
   3928  1.1  christos 			do {
   3929  1.1  christos 				if (header->serial <= search->serial &&
   3930  1.1  christos 				    !IGNORE(header))
   3931  1.1  christos 				{
   3932  1.1  christos 					/*
   3933  1.1  christos 					 * Is this a "this rdataset doesn't
   3934  1.1  christos 					 * exist" record?
   3935  1.1  christos 					 */
   3936  1.1  christos 					if (NONEXISTENT(header)) {
   3937  1.1  christos 						header = NULL;
   3938  1.1  christos 					}
   3939  1.1  christos 					break;
   3940  1.1  christos 				} else {
   3941  1.1  christos 					header = header->down;
   3942  1.1  christos 				}
   3943  1.1  christos 			} while (header != NULL);
   3944  1.1  christos 			if (header != NULL) {
   3945  1.1  christos 				/*
   3946  1.1  christos 				 * We now know that there is at least one
   3947  1.1  christos 				 * active rdataset at this node.
   3948  1.1  christos 				 */
   3949  1.1  christos 				empty_node = false;
   3950  1.1  christos 				if (header->type == type) {
   3951  1.1  christos 					found = header;
   3952  1.1  christos 					if (foundsig != NULL) {
   3953  1.1  christos 						break;
   3954  1.1  christos 					}
   3955  1.1  christos 				} else if (header->type == sigtype) {
   3956  1.1  christos 					foundsig = header;
   3957  1.1  christos 					if (found != NULL) {
   3958  1.1  christos 						break;
   3959  1.1  christos 					}
   3960  1.1  christos 				}
   3961  1.1  christos 			}
   3962  1.1  christos 		}
   3963  1.1  christos 		if (!empty_node) {
   3964  1.1  christos 			if (found != NULL && search->rbtversion->havensec3 &&
   3965  1.1  christos 			    found->type == dns_rdatatype_nsec3 &&
   3966  1.1  christos 			    !matchparams(found, search))
   3967  1.1  christos 			{
   3968  1.1  christos 				empty_node = true;
   3969  1.1  christos 				found = NULL;
   3970  1.1  christos 				foundsig = NULL;
   3971  1.1  christos 				result = previous_closest_nsec(
   3972  1.1  christos 					type, search, name, origin, &prevnode,
   3973  1.1  christos 					NULL, NULL);
   3974  1.1  christos 			} else if (found != NULL &&
   3975  1.1  christos 				   (foundsig != NULL || !need_sig))
   3976  1.1  christos 			{
   3977  1.1  christos 				/*
   3978  1.1  christos 				 * We've found the right NSEC/NSEC3 record.
   3979  1.1  christos 				 *
   3980  1.1  christos 				 * Note: for this to really be the right
   3981  1.1  christos 				 * NSEC record, it's essential that the NSEC
   3982  1.1  christos 				 * records of any nodes obscured by a zone
   3983  1.1  christos 				 * cut have been removed; we assume this is
   3984  1.1  christos 				 * the case.
   3985  1.1  christos 				 */
   3986  1.1  christos 				result = dns_name_concatenate(name, origin,
   3987  1.1  christos 							      foundname, NULL);
   3988  1.1  christos 				if (result == ISC_R_SUCCESS) {
   3989  1.1  christos 					if (nodep != NULL) {
   3990  1.1  christos 						new_reference(
   3991  1.1  christos 							search->rbtdb, node,
   3992  1.1  christos 							isc_rwlocktype_read);
   3993  1.1  christos 						*nodep = node;
   3994  1.1  christos 					}
   3995  1.1  christos 					bind_rdataset(search->rbtdb, node,
   3996  1.1  christos 						      found, search->now,
   3997  1.1  christos 						      isc_rwlocktype_read,
   3998  1.1  christos 						      rdataset);
   3999  1.1  christos 					if (foundsig != NULL) {
   4000  1.1  christos 						bind_rdataset(
   4001  1.1  christos 							search->rbtdb, node,
   4002  1.1  christos 							foundsig, search->now,
   4003  1.1  christos 							isc_rwlocktype_read,
   4004  1.1  christos 							sigrdataset);
   4005  1.1  christos 					}
   4006  1.1  christos 				}
   4007  1.1  christos 			} else if (found == NULL && foundsig == NULL) {
   4008  1.1  christos 				/*
   4009  1.1  christos 				 * This node is active, but has no NSEC or
   4010  1.1  christos 				 * RRSIG NSEC.  That means it's glue or
   4011  1.1  christos 				 * other obscured zone data that isn't
   4012  1.1  christos 				 * relevant for our search.  Treat the
   4013  1.1  christos 				 * node as if it were empty and keep looking.
   4014  1.1  christos 				 */
   4015  1.1  christos 				empty_node = true;
   4016  1.1  christos 				result = previous_closest_nsec(
   4017  1.1  christos 					type, search, name, origin, &prevnode,
   4018  1.1  christos 					&nsecchain, &first);
   4019  1.1  christos 			} else {
   4020  1.1  christos 				/*
   4021  1.1  christos 				 * We found an active node, but either the
   4022  1.1  christos 				 * NSEC or the RRSIG NSEC is missing.  This
   4023  1.1  christos 				 * shouldn't happen.
   4024  1.1  christos 				 */
   4025  1.1  christos 				result = DNS_R_BADDB;
   4026  1.1  christos 			}
   4027  1.1  christos 		} else {
   4028  1.1  christos 			/*
   4029  1.1  christos 			 * This node isn't active.  We've got to keep
   4030  1.1  christos 			 * looking.
   4031  1.1  christos 			 */
   4032  1.1  christos 			result = previous_closest_nsec(type, search, name,
   4033  1.1  christos 						       origin, &prevnode,
   4034  1.1  christos 						       &nsecchain, &first);
   4035  1.1  christos 		}
   4036  1.1  christos 		NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
   4037  1.1  christos 			    isc_rwlocktype_read);
   4038  1.1  christos 		node = prevnode;
   4039  1.1  christos 		prevnode = NULL;
   4040  1.1  christos 	} while (empty_node && result == ISC_R_SUCCESS);
   4041  1.1  christos 
   4042  1.1  christos 	if (!first) {
   4043  1.1  christos 		dns_rbtnodechain_invalidate(&nsecchain);
   4044  1.1  christos 	}
   4045  1.1  christos 
   4046  1.1  christos 	if (result == ISC_R_NOMORE && wraps) {
   4047  1.1  christos 		result = dns_rbtnodechain_last(&search->chain, tree, NULL,
   4048  1.1  christos 					       NULL);
   4049  1.1  christos 		if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
   4050  1.1  christos 			wraps = false;
   4051  1.1  christos 			goto again;
   4052  1.1  christos 		}
   4053  1.1  christos 	}
   4054  1.1  christos 
   4055  1.1  christos 	/*
   4056  1.1  christos 	 * If the result is ISC_R_NOMORE, then we got to the beginning of
   4057  1.1  christos 	 * the database and didn't find a NSEC record.  This shouldn't
   4058  1.1  christos 	 * happen.
   4059  1.1  christos 	 */
   4060  1.1  christos 	if (result == ISC_R_NOMORE) {
   4061  1.1  christos 		result = DNS_R_BADDB;
   4062  1.1  christos 	}
   4063  1.1  christos 
   4064  1.1  christos 	return (result);
   4065  1.1  christos }
   4066  1.1  christos 
   4067  1.1  christos static isc_result_t
   4068  1.1  christos zone_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
   4069  1.1  christos 	  dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
   4070  1.1  christos 	  dns_dbnode_t **nodep, dns_name_t *foundname, dns_rdataset_t *rdataset,
   4071  1.1  christos 	  dns_rdataset_t *sigrdataset) {
   4072  1.1  christos 	dns_rbtnode_t *node = NULL;
   4073  1.1  christos 	isc_result_t result;
   4074  1.1  christos 	rbtdb_search_t search;
   4075  1.1  christos 	bool cname_ok = true;
   4076  1.1  christos 	bool close_version = false;
   4077  1.1  christos 	bool maybe_zonecut = false;
   4078  1.1  christos 	bool at_zonecut = false;
   4079  1.1  christos 	bool wild;
   4080  1.1  christos 	bool empty_node;
   4081  1.1  christos 	rdatasetheader_t *header, *header_next, *found, *nsecheader;
   4082  1.1  christos 	rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
   4083  1.1  christos 	rbtdb_rdatatype_t sigtype;
   4084  1.1  christos 	bool active;
   4085  1.1  christos 	nodelock_t *lock;
   4086  1.1  christos 	dns_rbt_t *tree;
   4087  1.1  christos 
   4088  1.1  christos 	search.rbtdb = (dns_rbtdb_t *)db;
   4089  1.1  christos 
   4090  1.1  christos 	REQUIRE(VALID_RBTDB(search.rbtdb));
   4091  1.1  christos 	INSIST(version == NULL ||
   4092  1.1  christos 	       ((rbtdb_version_t *)version)->rbtdb == (dns_rbtdb_t *)db);
   4093  1.1  christos 
   4094  1.1  christos 	/*
   4095  1.1  christos 	 * We don't care about 'now'.
   4096  1.1  christos 	 */
   4097  1.1  christos 	UNUSED(now);
   4098  1.1  christos 
   4099  1.1  christos 	/*
   4100  1.1  christos 	 * If the caller didn't supply a version, attach to the current
   4101  1.1  christos 	 * version.
   4102  1.1  christos 	 */
   4103  1.1  christos 	if (version == NULL) {
   4104  1.1  christos 		currentversion(db, &version);
   4105  1.1  christos 		close_version = true;
   4106  1.1  christos 	}
   4107  1.1  christos 
   4108  1.1  christos 	search.rbtversion = version;
   4109  1.1  christos 	search.serial = search.rbtversion->serial;
   4110  1.1  christos 	search.options = options;
   4111  1.1  christos 	search.copy_name = false;
   4112  1.1  christos 	search.need_cleanup = false;
   4113  1.1  christos 	search.wild = false;
   4114  1.1  christos 	search.zonecut = NULL;
   4115  1.1  christos 	dns_fixedname_init(&search.zonecut_name);
   4116  1.1  christos 	dns_rbtnodechain_init(&search.chain);
   4117  1.1  christos 	search.now = 0;
   4118  1.1  christos 
   4119  1.1  christos 	/*
   4120  1.1  christos 	 * 'wild' will be true iff. we've matched a wildcard.
   4121  1.1  christos 	 */
   4122  1.1  christos 	wild = false;
   4123  1.1  christos 
   4124  1.1  christos 	RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
   4125  1.1  christos 
   4126  1.1  christos 	/*
   4127  1.1  christos 	 * Search down from the root of the tree.  If, while going down, we
   4128  1.1  christos 	 * encounter a callback node, zone_zonecut_callback() will search the
   4129  1.1  christos 	 * rdatasets at the zone cut for active DNAME or NS rdatasets.
   4130  1.1  christos 	 */
   4131  1.1  christos 	tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3
   4132  1.1  christos 						      : search.rbtdb->tree;
   4133  1.1  christos 	result = dns_rbt_findnode(tree, name, foundname, &node, &search.chain,
   4134  1.1  christos 				  DNS_RBTFIND_EMPTYDATA, zone_zonecut_callback,
   4135  1.1  christos 				  &search);
   4136  1.1  christos 
   4137  1.1  christos 	if (result == DNS_R_PARTIALMATCH) {
   4138  1.1  christos 	partial_match:
   4139  1.1  christos 		if (search.zonecut != NULL) {
   4140  1.1  christos 			result = setup_delegation(&search, nodep, foundname,
   4141  1.1  christos 						  rdataset, sigrdataset);
   4142  1.1  christos 			goto tree_exit;
   4143  1.1  christos 		}
   4144  1.1  christos 
   4145  1.1  christos 		if (search.wild) {
   4146  1.1  christos 			/*
   4147  1.1  christos 			 * At least one of the levels in the search chain
   4148  1.1  christos 			 * potentially has a wildcard.  For each such level,
   4149  1.1  christos 			 * we must see if there's a matching wildcard active
   4150  1.1  christos 			 * in the current version.
   4151  1.1  christos 			 */
   4152  1.1  christos 			result = find_wildcard(&search, &node, name);
   4153  1.1  christos 			if (result == ISC_R_SUCCESS) {
   4154  1.1  christos 				dns_name_copynf(name, foundname);
   4155  1.1  christos 				wild = true;
   4156  1.1  christos 				goto found;
   4157  1.1  christos 			} else if (result != ISC_R_NOTFOUND) {
   4158  1.1  christos 				goto tree_exit;
   4159  1.1  christos 			}
   4160  1.1  christos 		}
   4161  1.1  christos 
   4162  1.1  christos 		active = false;
   4163  1.1  christos 		if ((options & DNS_DBFIND_FORCENSEC3) == 0) {
   4164  1.1  christos 			/*
   4165  1.1  christos 			 * The NSEC3 tree won't have empty nodes,
   4166  1.1  christos 			 * so it isn't necessary to check for them.
   4167  1.1  christos 			 */
   4168  1.1  christos 			dns_rbtnodechain_t chain = search.chain;
   4169  1.1  christos 			active = activeempty(&search, &chain, name);
   4170  1.1  christos 		}
   4171  1.1  christos 
   4172  1.1  christos 		/*
   4173  1.1  christos 		 * If we're here, then the name does not exist, is not
   4174  1.1  christos 		 * beneath a zonecut, and there's no matching wildcard.
   4175  1.1  christos 		 */
   4176  1.1  christos 		if ((search.rbtversion->secure == dns_db_secure &&
   4177  1.1  christos 		     !search.rbtversion->havensec3) ||
   4178  1.1  christos 		    (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
   4179  1.1  christos 		    (search.options & DNS_DBFIND_FORCENSEC3) != 0)
   4180  1.1  christos 		{
   4181  1.1  christos 			result = find_closest_nsec(&search, nodep, foundname,
   4182  1.1  christos 						   rdataset, sigrdataset, tree,
   4183  1.1  christos 						   search.rbtversion->secure);
   4184  1.1  christos 			if (result == ISC_R_SUCCESS) {
   4185  1.1  christos 				result = active ? DNS_R_EMPTYNAME
   4186  1.1  christos 						: DNS_R_NXDOMAIN;
   4187  1.1  christos 			}
   4188  1.1  christos 		} else {
   4189  1.1  christos 			result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
   4190  1.1  christos 		}
   4191  1.1  christos 		goto tree_exit;
   4192  1.1  christos 	} else if (result != ISC_R_SUCCESS) {
   4193  1.1  christos 		goto tree_exit;
   4194  1.1  christos 	}
   4195  1.1  christos 
   4196  1.1  christos found:
   4197  1.1  christos 	/*
   4198  1.1  christos 	 * We have found a node whose name is the desired name, or we
   4199  1.1  christos 	 * have matched a wildcard.
   4200  1.1  christos 	 */
   4201  1.1  christos 
   4202  1.1  christos 	if (search.zonecut != NULL) {
   4203  1.1  christos 		/*
   4204  1.1  christos 		 * If we're beneath a zone cut, we don't want to look for
   4205  1.1  christos 		 * CNAMEs because they're not legitimate zone glue.
   4206  1.1  christos 		 */
   4207  1.1  christos 		cname_ok = false;
   4208  1.1  christos 	} else {
   4209  1.1  christos 		/*
   4210  1.1  christos 		 * The node may be a zone cut itself.  If it might be one,
   4211  1.1  christos 		 * make sure we check for it later.
   4212  1.1  christos 		 *
   4213  1.1  christos 		 * DS records live above the zone cut in ordinary zone so
   4214  1.1  christos 		 * we want to ignore any referral.
   4215  1.1  christos 		 *
   4216  1.1  christos 		 * Stub zones don't have anything "above" the delegation so
   4217  1.1  christos 		 * we always return a referral.
   4218  1.1  christos 		 */
   4219  1.1  christos 		if (node->find_callback &&
   4220  1.1  christos 		    ((node != search.rbtdb->origin_node &&
   4221  1.1  christos 		      !dns_rdatatype_atparent(type)) ||
   4222  1.1  christos 		     IS_STUB(search.rbtdb)))
   4223  1.1  christos 		{
   4224  1.1  christos 			maybe_zonecut = true;
   4225  1.1  christos 		}
   4226  1.1  christos 	}
   4227  1.1  christos 
   4228  1.1  christos 	/*
   4229  1.1  christos 	 * Certain DNSSEC types are not subject to CNAME matching
   4230  1.1  christos 	 * (RFC4035, section 2.5 and RFC3007).
   4231  1.1  christos 	 *
   4232  1.1  christos 	 * We don't check for RRSIG, because we don't store RRSIG records
   4233  1.1  christos 	 * directly.
   4234  1.1  christos 	 */
   4235  1.1  christos 	if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) {
   4236  1.1  christos 		cname_ok = false;
   4237  1.1  christos 	}
   4238  1.1  christos 
   4239  1.1  christos 	/*
   4240  1.1  christos 	 * We now go looking for rdata...
   4241  1.1  christos 	 */
   4242  1.1  christos 
   4243  1.1  christos 	lock = &search.rbtdb->node_locks[node->locknum].lock;
   4244  1.1  christos 	NODE_LOCK(lock, isc_rwlocktype_read);
   4245  1.1  christos 
   4246  1.1  christos 	found = NULL;
   4247  1.1  christos 	foundsig = NULL;
   4248  1.1  christos 	sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
   4249  1.1  christos 	nsecheader = NULL;
   4250  1.1  christos 	nsecsig = NULL;
   4251  1.1  christos 	cnamesig = NULL;
   4252  1.1  christos 	empty_node = true;
   4253  1.1  christos 	for (header = node->data; header != NULL; header = header_next) {
   4254  1.1  christos 		header_next = header->next;
   4255  1.1  christos 		/*
   4256  1.1  christos 		 * Look for an active, extant rdataset.
   4257  1.1  christos 		 */
   4258  1.1  christos 		do {
   4259  1.1  christos 			if (header->serial <= search.serial && !IGNORE(header))
   4260  1.1  christos 			{
   4261  1.1  christos 				/*
   4262  1.1  christos 				 * Is this a "this rdataset doesn't
   4263  1.1  christos 				 * exist" record?
   4264  1.1  christos 				 */
   4265  1.1  christos 				if (NONEXISTENT(header)) {
   4266  1.1  christos 					header = NULL;
   4267  1.1  christos 				}
   4268  1.1  christos 				break;
   4269  1.1  christos 			} else {
   4270  1.1  christos 				header = header->down;
   4271  1.1  christos 			}
   4272  1.1  christos 		} while (header != NULL);
   4273  1.1  christos 		if (header != NULL) {
   4274  1.1  christos 			/*
   4275  1.1  christos 			 * We now know that there is at least one active
   4276  1.1  christos 			 * rdataset at this node.
   4277  1.1  christos 			 */
   4278  1.1  christos 			empty_node = false;
   4279  1.1  christos 
   4280  1.1  christos 			/*
   4281  1.1  christos 			 * Do special zone cut handling, if requested.
   4282  1.1  christos 			 */
   4283  1.1  christos 			if (maybe_zonecut && header->type == dns_rdatatype_ns) {
   4284  1.1  christos 				/*
   4285  1.1  christos 				 * We increment the reference count on node to
   4286  1.1  christos 				 * ensure that search->zonecut_rdataset will
   4287  1.1  christos 				 * still be valid later.
   4288  1.1  christos 				 */
   4289  1.1  christos 				new_reference(search.rbtdb, node,
   4290  1.1  christos 					      isc_rwlocktype_read);
   4291  1.1  christos 				search.zonecut = node;
   4292  1.1  christos 				search.zonecut_rdataset = header;
   4293  1.1  christos 				search.zonecut_sigrdataset = NULL;
   4294  1.1  christos 				search.need_cleanup = true;
   4295  1.1  christos 				maybe_zonecut = false;
   4296  1.1  christos 				at_zonecut = true;
   4297  1.1  christos 				/*
   4298  1.1  christos 				 * It is not clear if KEY should still be
   4299  1.1  christos 				 * allowed at the parent side of the zone
   4300  1.1  christos 				 * cut or not.  It is needed for RFC3007
   4301  1.1  christos 				 * validated updates.
   4302  1.1  christos 				 */
   4303  1.1  christos 				if ((search.options & DNS_DBFIND_GLUEOK) == 0 &&
   4304  1.1  christos 				    type != dns_rdatatype_nsec &&
   4305  1.1  christos 				    type != dns_rdatatype_key)
   4306  1.1  christos 				{
   4307  1.1  christos 					/*
   4308  1.1  christos 					 * Glue is not OK, but any answer we
   4309  1.1  christos 					 * could return would be glue.  Return
   4310  1.1  christos 					 * the delegation.
   4311  1.1  christos 					 */
   4312  1.1  christos 					found = NULL;
   4313  1.1  christos 					break;
   4314  1.1  christos 				}
   4315  1.1  christos 				if (found != NULL && foundsig != NULL) {
   4316  1.1  christos 					break;
   4317  1.1  christos 				}
   4318  1.1  christos 			}
   4319  1.1  christos 
   4320  1.1  christos 			/*
   4321  1.1  christos 			 * If the NSEC3 record doesn't match the chain
   4322  1.1  christos 			 * we are using behave as if it isn't here.
   4323  1.1  christos 			 */
   4324  1.1  christos 			if (header->type == dns_rdatatype_nsec3 &&
   4325  1.1  christos 			    !matchparams(header, &search))
   4326  1.1  christos 			{
   4327  1.1  christos 				NODE_UNLOCK(lock, isc_rwlocktype_read);
   4328  1.1  christos 				goto partial_match;
   4329  1.1  christos 			}
   4330  1.1  christos 			/*
   4331  1.1  christos 			 * If we found a type we were looking for,
   4332  1.1  christos 			 * remember it.
   4333  1.1  christos 			 */
   4334  1.1  christos 			if (header->type == type || type == dns_rdatatype_any ||
   4335  1.1  christos 			    (header->type == dns_rdatatype_cname && cname_ok))
   4336  1.1  christos 			{
   4337  1.1  christos 				/*
   4338  1.1  christos 				 * We've found the answer!
   4339  1.1  christos 				 */
   4340  1.1  christos 				found = header;
   4341  1.1  christos 				if (header->type == dns_rdatatype_cname &&
   4342  1.1  christos 				    cname_ok)
   4343  1.1  christos 				{
   4344  1.1  christos 					/*
   4345  1.1  christos 					 * We may be finding a CNAME instead
   4346  1.1  christos 					 * of the desired type.
   4347  1.1  christos 					 *
   4348  1.1  christos 					 * If we've already got the CNAME RRSIG,
   4349  1.1  christos 					 * use it, otherwise change sigtype
   4350  1.1  christos 					 * so that we find it.
   4351  1.1  christos 					 */
   4352  1.1  christos 					if (cnamesig != NULL) {
   4353  1.1  christos 						foundsig = cnamesig;
   4354  1.1  christos 					} else {
   4355  1.1  christos 						sigtype =
   4356  1.1  christos 							RBTDB_RDATATYPE_SIGCNAME;
   4357  1.1  christos 					}
   4358  1.1  christos 				}
   4359  1.1  christos 				/*
   4360  1.1  christos 				 * If we've got all we need, end the search.
   4361  1.1  christos 				 */
   4362  1.1  christos 				if (!maybe_zonecut && foundsig != NULL) {
   4363  1.1  christos 					break;
   4364  1.1  christos 				}
   4365  1.1  christos 			} else if (header->type == sigtype) {
   4366  1.1  christos 				/*
   4367  1.1  christos 				 * We've found the RRSIG rdataset for our
   4368  1.1  christos 				 * target type.  Remember it.
   4369  1.1  christos 				 */
   4370  1.1  christos 				foundsig = header;
   4371  1.1  christos 				/*
   4372  1.1  christos 				 * If we've got all we need, end the search.
   4373  1.1  christos 				 */
   4374  1.1  christos 				if (!maybe_zonecut && found != NULL) {
   4375  1.1  christos 					break;
   4376  1.1  christos 				}
   4377  1.1  christos 			} else if (header->type == dns_rdatatype_nsec &&
   4378  1.1  christos 				   !search.rbtversion->havensec3)
   4379  1.1  christos 			{
   4380  1.1  christos 				/*
   4381  1.1  christos 				 * Remember a NSEC rdataset even if we're
   4382  1.1  christos 				 * not specifically looking for it, because
   4383  1.1  christos 				 * we might need it later.
   4384  1.1  christos 				 */
   4385  1.1  christos 				nsecheader = header;
   4386  1.1  christos 			} else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
   4387  1.1  christos 				   !search.rbtversion->havensec3)
   4388  1.1  christos 			{
   4389  1.1  christos 				/*
   4390  1.1  christos 				 * If we need the NSEC rdataset, we'll also
   4391  1.1  christos 				 * need its signature.
   4392  1.1  christos 				 */
   4393  1.1  christos 				nsecsig = header;
   4394  1.1  christos 			} else if (cname_ok &&
   4395  1.1  christos 				   header->type == RBTDB_RDATATYPE_SIGCNAME)
   4396  1.1  christos 			{
   4397  1.1  christos 				/*
   4398  1.1  christos 				 * If we get a CNAME match, we'll also need
   4399  1.1  christos 				 * its signature.
   4400  1.1  christos 				 */
   4401  1.1  christos 				cnamesig = header;
   4402  1.1  christos 			}
   4403  1.1  christos 		}
   4404  1.1  christos 	}
   4405  1.1  christos 
   4406  1.1  christos 	if (empty_node) {
   4407  1.1  christos 		/*
   4408  1.1  christos 		 * We have an exact match for the name, but there are no
   4409  1.1  christos 		 * active rdatasets in the desired version.  That means that
   4410  1.1  christos 		 * this node doesn't exist in the desired version, and that
   4411  1.1  christos 		 * we really have a partial match.
   4412  1.1  christos 		 */
   4413  1.1  christos 		if (!wild) {
   4414  1.1  christos 			NODE_UNLOCK(lock, isc_rwlocktype_read);
   4415  1.1  christos 			goto partial_match;
   4416  1.1  christos 		}
   4417  1.1  christos 	}
   4418  1.1  christos 
   4419  1.1  christos 	/*
   4420  1.1  christos 	 * If we didn't find what we were looking for...
   4421  1.1  christos 	 */
   4422  1.1  christos 	if (found == NULL) {
   4423  1.1  christos 		if (search.zonecut != NULL) {
   4424  1.1  christos 			/*
   4425  1.1  christos 			 * We were trying to find glue at a node beneath a
   4426  1.1  christos 			 * zone cut, but didn't.
   4427  1.1  christos 			 *
   4428  1.1  christos 			 * Return the delegation.
   4429  1.1  christos 			 */
   4430  1.1  christos 			NODE_UNLOCK(lock, isc_rwlocktype_read);
   4431  1.1  christos 			result = setup_delegation(&search, nodep, foundname,
   4432  1.1  christos 						  rdataset, sigrdataset);
   4433  1.1  christos 			goto tree_exit;
   4434  1.1  christos 		}
   4435  1.1  christos 		/*
   4436  1.1  christos 		 * The desired type doesn't exist.
   4437  1.1  christos 		 */
   4438  1.1  christos 		result = DNS_R_NXRRSET;
   4439  1.1  christos 		if (search.rbtversion->secure == dns_db_secure &&
   4440  1.1  christos 		    !search.rbtversion->havensec3 &&
   4441  1.1  christos 		    (nsecheader == NULL || nsecsig == NULL))
   4442  1.1  christos 		{
   4443  1.1  christos 			/*
   4444  1.1  christos 			 * The zone is secure but there's no NSEC,
   4445  1.1  christos 			 * or the NSEC has no signature!
   4446  1.1  christos 			 */
   4447  1.1  christos 			if (!wild) {
   4448  1.1  christos 				result = DNS_R_BADDB;
   4449  1.1  christos 				goto node_exit;
   4450  1.1  christos 			}
   4451  1.1  christos 
   4452  1.1  christos 			NODE_UNLOCK(lock, isc_rwlocktype_read);
   4453  1.1  christos 			result = find_closest_nsec(&search, nodep, foundname,
   4454  1.1  christos 						   rdataset, sigrdataset,
   4455  1.1  christos 						   search.rbtdb->tree,
   4456  1.1  christos 						   search.rbtversion->secure);
   4457  1.1  christos 			if (result == ISC_R_SUCCESS) {
   4458  1.1  christos 				result = DNS_R_EMPTYWILD;
   4459  1.1  christos 			}
   4460  1.1  christos 			goto tree_exit;
   4461  1.1  christos 		}
   4462  1.1  christos 		if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
   4463  1.1  christos 		    nsecheader == NULL)
   4464  1.1  christos 		{
   4465  1.1  christos 			/*
   4466  1.1  christos 			 * There's no NSEC record, and we were told
   4467  1.1  christos 			 * to find one.
   4468  1.1  christos 			 */
   4469  1.1  christos 			result = DNS_R_BADDB;
   4470  1.1  christos 			goto node_exit;
   4471  1.1  christos 		}
   4472  1.1  christos 		if (nodep != NULL) {
   4473  1.1  christos 			new_reference(search.rbtdb, node, isc_rwlocktype_read);
   4474  1.1  christos 			*nodep = node;
   4475  1.1  christos 		}
   4476  1.1  christos 		if ((search.rbtversion->secure == dns_db_secure &&
   4477  1.1  christos 		     !search.rbtversion->havensec3) ||
   4478  1.1  christos 		    (search.options & DNS_DBFIND_FORCENSEC) != 0)
   4479  1.1  christos 		{
   4480  1.1  christos 			bind_rdataset(search.rbtdb, node, nsecheader, 0,
   4481  1.1  christos 				      isc_rwlocktype_read, rdataset);
   4482  1.1  christos 			if (nsecsig != NULL) {
   4483  1.1  christos 				bind_rdataset(search.rbtdb, node, nsecsig, 0,
   4484  1.1  christos 					      isc_rwlocktype_read, sigrdataset);
   4485  1.1  christos 			}
   4486  1.1  christos 		}
   4487  1.1  christos 		if (wild) {
   4488  1.1  christos 			foundname->attributes |= DNS_NAMEATTR_WILDCARD;
   4489  1.1  christos 		}
   4490  1.1  christos 		goto node_exit;
   4491  1.1  christos 	}
   4492  1.1  christos 
   4493  1.1  christos 	/*
   4494  1.1  christos 	 * We found what we were looking for, or we found a CNAME.
   4495  1.1  christos 	 */
   4496  1.1  christos 
   4497  1.1  christos 	if (type != found->type && type != dns_rdatatype_any &&
   4498  1.1  christos 	    found->type == dns_rdatatype_cname)
   4499  1.1  christos 	{
   4500  1.1  christos 		/*
   4501  1.1  christos 		 * We weren't doing an ANY query and we found a CNAME instead
   4502  1.1  christos 		 * of the type we were looking for, so we need to indicate
   4503  1.1  christos 		 * that result to the caller.
   4504  1.1  christos 		 */
   4505  1.1  christos 		result = DNS_R_CNAME;
   4506  1.1  christos 	} else if (search.zonecut != NULL) {
   4507  1.1  christos 		/*
   4508  1.1  christos 		 * If we're beneath a zone cut, we must indicate that the
   4509  1.1  christos 		 * result is glue, unless we're actually at the zone cut
   4510  1.1  christos 		 * and the type is NSEC or KEY.
   4511  1.1  christos 		 */
   4512  1.1  christos 		if (search.zonecut == node) {
   4513  1.1  christos 			/*
   4514  1.1  christos 			 * It is not clear if KEY should still be
   4515  1.1  christos 			 * allowed at the parent side of the zone
   4516  1.1  christos 			 * cut or not.  It is needed for RFC3007
   4517  1.1  christos 			 * validated updates.
   4518  1.1  christos 			 */
   4519  1.1  christos 			if (type == dns_rdatatype_nsec ||
   4520  1.1  christos 			    type == dns_rdatatype_nsec3 ||
   4521  1.1  christos 			    type == dns_rdatatype_key)
   4522  1.1  christos 			{
   4523  1.1  christos 				result = ISC_R_SUCCESS;
   4524  1.1  christos 			} else if (type == dns_rdatatype_any) {
   4525  1.1  christos 				result = DNS_R_ZONECUT;
   4526  1.1  christos 			} else {
   4527  1.1  christos 				result = DNS_R_GLUE;
   4528  1.1  christos 			}
   4529  1.1  christos 		} else {
   4530  1.1  christos 			result = DNS_R_GLUE;
   4531  1.1  christos 		}
   4532  1.1  christos 		/*
   4533  1.1  christos 		 * We might have found data that isn't glue, but was occluded
   4534  1.1  christos 		 * by a dynamic update.  If the caller cares about this, they
   4535  1.1  christos 		 * will have told us to validate glue.
   4536  1.1  christos 		 *
   4537  1.1  christos 		 * XXX We should cache the glue validity state!
   4538  1.1  christos 		 */
   4539  1.1  christos 		if (result == DNS_R_GLUE &&
   4540  1.1  christos 		    (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
   4541  1.1  christos 		    !valid_glue(&search, foundname, type, node))
   4542  1.1  christos 		{
   4543  1.1  christos 			NODE_UNLOCK(lock, isc_rwlocktype_read);
   4544  1.1  christos 			result = setup_delegation(&search, nodep, foundname,
   4545  1.1  christos 						  rdataset, sigrdataset);
   4546  1.1  christos 			goto tree_exit;
   4547  1.1  christos 		}
   4548  1.1  christos 	} else {
   4549  1.1  christos 		/*
   4550  1.1  christos 		 * An ordinary successful query!
   4551  1.1  christos 		 */
   4552  1.1  christos 		result = ISC_R_SUCCESS;
   4553  1.1  christos 	}
   4554  1.1  christos 
   4555  1.1  christos 	if (nodep != NULL) {
   4556  1.1  christos 		if (!at_zonecut) {
   4557  1.1  christos 			new_reference(search.rbtdb, node, isc_rwlocktype_read);
   4558  1.1  christos 		} else {
   4559  1.1  christos 			search.need_cleanup = false;
   4560  1.1  christos 		}
   4561  1.1  christos 		*nodep = node;
   4562  1.1  christos 	}
   4563  1.1  christos 
   4564  1.1  christos 	if (type != dns_rdatatype_any) {
   4565  1.1  christos 		bind_rdataset(search.rbtdb, node, found, 0, isc_rwlocktype_read,
   4566  1.1  christos 			      rdataset);
   4567  1.1  christos 		if (foundsig != NULL) {
   4568  1.1  christos 			bind_rdataset(search.rbtdb, node, foundsig, 0,
   4569  1.1  christos 				      isc_rwlocktype_read, sigrdataset);
   4570  1.1  christos 		}
   4571  1.1  christos 	}
   4572  1.1  christos 
   4573  1.1  christos 	if (wild) {
   4574  1.1  christos 		foundname->attributes |= DNS_NAMEATTR_WILDCARD;
   4575  1.1  christos 	}
   4576  1.1  christos 
   4577  1.1  christos node_exit:
   4578  1.1  christos 	NODE_UNLOCK(lock, isc_rwlocktype_read);
   4579  1.1  christos 
   4580  1.1  christos tree_exit:
   4581  1.1  christos 	RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
   4582  1.1  christos 
   4583  1.1  christos 	/*
   4584  1.1  christos 	 * If we found a zonecut but aren't going to use it, we have to
   4585  1.1  christos 	 * let go of it.
   4586  1.1  christos 	 */
   4587  1.1  christos 	if (search.need_cleanup) {
   4588  1.1  christos 		node = search.zonecut;
   4589  1.1  christos 		INSIST(node != NULL);
   4590  1.1  christos 		lock = &(search.rbtdb->node_locks[node->locknum].lock);
   4591  1.1  christos 
   4592  1.1  christos 		NODE_LOCK(lock, isc_rwlocktype_read);
   4593  1.1  christos 		decrement_reference(search.rbtdb, node, 0, isc_rwlocktype_read,
   4594  1.1  christos 				    isc_rwlocktype_none, false);
   4595  1.1  christos 		NODE_UNLOCK(lock, isc_rwlocktype_read);
   4596  1.1  christos 	}
   4597  1.1  christos 
   4598  1.1  christos 	if (close_version) {
   4599  1.1  christos 		closeversion(db, &version, false);
   4600  1.1  christos 	}
   4601  1.1  christos 
   4602  1.1  christos 	dns_rbtnodechain_reset(&search.chain);
   4603  1.1  christos 
   4604  1.1  christos 	return (result);
   4605  1.1  christos }
   4606  1.1  christos 
   4607  1.1  christos static isc_result_t
   4608  1.1  christos zone_findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options,
   4609  1.1  christos 		 isc_stdtime_t now, dns_dbnode_t **nodep, dns_name_t *foundname,
   4610  1.1  christos 		 dns_name_t *dcname, dns_rdataset_t *rdataset,
   4611  1.1  christos 		 dns_rdataset_t *sigrdataset) {
   4612  1.1  christos 	UNUSED(db);
   4613  1.1  christos 	UNUSED(name);
   4614  1.1  christos 	UNUSED(options);
   4615  1.1  christos 	UNUSED(now);
   4616  1.1  christos 	UNUSED(nodep);
   4617  1.1  christos 	UNUSED(foundname);
   4618  1.1  christos 	UNUSED(dcname);
   4619  1.1  christos 	UNUSED(rdataset);
   4620  1.1  christos 	UNUSED(sigrdataset);
   4621  1.1  christos 
   4622  1.1  christos 	FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
   4623  1.1  christos 
   4624  1.1  christos 	UNREACHABLE();
   4625  1.1  christos 	return (ISC_R_NOTIMPLEMENTED);
   4626  1.1  christos }
   4627  1.1  christos 
   4628  1.1  christos static bool
   4629  1.1  christos check_stale_header(dns_rbtnode_t *node, rdatasetheader_t *header,
   4630  1.1  christos 		   isc_rwlocktype_t *locktype, nodelock_t *lock,
   4631  1.1  christos 		   rbtdb_search_t *search, rdatasetheader_t **header_prev) {
   4632  1.1  christos 	if (!ACTIVE(header, search->now)) {
   4633  1.1  christos 		dns_ttl_t stale = header->rdh_ttl +
   4634  1.1  christos 				  search->rbtdb->serve_stale_ttl;
   4635  1.1  christos 		/*
   4636  1.1  christos 		 * If this data is in the stale window keep it and if
   4637  1.1  christos 		 * DNS_DBFIND_STALEOK is not set we tell the caller to
   4638  1.1  christos 		 * skip this record.  We skip the records with ZEROTTL
   4639  1.1  christos 		 * (these records should not be cached anyway).
   4640  1.1  christos 		 */
   4641  1.1  christos 
   4642  1.1  christos 		RDATASET_ATTR_CLR(header, RDATASET_ATTR_STALE_WINDOW);
   4643  1.1  christos 		if (!ZEROTTL(header) && KEEPSTALE(search->rbtdb) &&
   4644  1.1  christos 		    stale > search->now)
   4645  1.1  christos 		{
   4646  1.1  christos 			mark_header_stale(search->rbtdb, header);
   4647  1.1  christos 			*header_prev = header;
   4648  1.1  christos 			/*
   4649  1.1  christos 			 * If DNS_DBFIND_STALESTART is set then it means we
   4650  1.1  christos 			 * failed to resolve the name during recursion, in
   4651  1.1  christos 			 * this case we mark the time in which the refresh
   4652  1.1  christos 			 * failed.
   4653  1.1  christos 			 */
   4654  1.1  christos 			if ((search->options & DNS_DBFIND_STALESTART) != 0) {
   4655  1.1  christos 				atomic_store_release(
   4656  1.1  christos 					&header->last_refresh_fail_ts,
   4657  1.1  christos 					search->now);
   4658  1.1  christos 			} else if ((search->options &
   4659  1.1  christos 				    DNS_DBFIND_STALEENABLED) != 0 &&
   4660  1.1  christos 				   search->now <
   4661  1.1  christos 					   (atomic_load_acquire(
   4662  1.1  christos 						    &header->last_refresh_fail_ts) +
   4663  1.1  christos 					    search->rbtdb->serve_stale_refresh))
   4664  1.1  christos 			{
   4665  1.1  christos 				/*
   4666  1.1  christos 				 * If we are within interval between last
   4667  1.1  christos 				 * refresh failure time + 'stale-refresh-time',
   4668  1.1  christos 				 * then don't skip this stale entry but use it
   4669  1.1  christos 				 * instead.
   4670  1.1  christos 				 */
   4671  1.1  christos 				RDATASET_ATTR_SET(header,
   4672  1.1  christos 						  RDATASET_ATTR_STALE_WINDOW);
   4673  1.1  christos 				return (false);
   4674  1.1  christos 			} else if ((search->options &
   4675  1.1  christos 				    DNS_DBFIND_STALETIMEOUT) != 0)
   4676  1.1  christos 			{
   4677  1.1  christos 				/*
   4678  1.1  christos 				 * We want stale RRset due to timeout, so we
   4679  1.1  christos 				 * don't skip it.
   4680  1.1  christos 				 */
   4681  1.1  christos 				return (false);
   4682  1.1  christos 			}
   4683  1.1  christos 			return ((search->options & DNS_DBFIND_STALEOK) == 0);
   4684  1.1  christos 		}
   4685  1.1  christos 
   4686  1.1  christos 		/*
   4687  1.1  christos 		 * This rdataset is stale.  If no one else is using the
   4688  1.1  christos 		 * node, we can clean it up right now, otherwise we mark
   4689  1.1  christos 		 * it as ancient, and the node as dirty, so it will get
   4690  1.1  christos 		 * cleaned up later.
   4691  1.1  christos 		 */
   4692  1.1  christos 		if ((header->rdh_ttl < search->now - RBTDB_VIRTUAL) &&
   4693  1.1  christos 		    (*locktype == isc_rwlocktype_write ||
   4694  1.1  christos 		     NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS))
   4695  1.1  christos 		{
   4696  1.1  christos 			/*
   4697  1.1  christos 			 * We update the node's status only when we can
   4698  1.1  christos 			 * get write access; otherwise, we leave others
   4699  1.1  christos 			 * to this work.  Periodical cleaning will
   4700  1.1  christos 			 * eventually take the job as the last resort.
   4701  1.1  christos 			 * We won't downgrade the lock, since other
   4702  1.1  christos 			 * rdatasets are probably stale, too.
   4703  1.1  christos 			 */
   4704  1.1  christos 			*locktype = isc_rwlocktype_write;
   4705  1.1  christos 
   4706  1.1  christos 			if (isc_refcount_current(&node->references) == 0) {
   4707  1.1  christos 				isc_mem_t *mctx;
   4708  1.1  christos 
   4709  1.1  christos 				/*
   4710  1.1  christos 				 * header->down can be non-NULL if the
   4711  1.1  christos 				 * refcount has just decremented to 0
   4712  1.1  christos 				 * but decrement_reference() has not
   4713  1.1  christos 				 * performed clean_cache_node(), in
   4714  1.1  christos 				 * which case we need to purge the stale
   4715  1.1  christos 				 * headers first.
   4716  1.1  christos 				 */
   4717  1.1  christos 				mctx = search->rbtdb->common.mctx;
   4718  1.1  christos 				clean_stale_headers(search->rbtdb, mctx,
   4719  1.1  christos 						    header);
   4720  1.1  christos 				if (*header_prev != NULL) {
   4721  1.1  christos 					(*header_prev)->next = header->next;
   4722  1.1  christos 				} else {
   4723  1.1  christos 					node->data = header->next;
   4724  1.1  christos 				}
   4725  1.1  christos 				free_rdataset(search->rbtdb, mctx, header);
   4726  1.1  christos 			} else {
   4727  1.1  christos 				mark_header_ancient(search->rbtdb, header);
   4728  1.1  christos 				*header_prev = header;
   4729  1.1  christos 			}
   4730  1.1  christos 		} else {
   4731  1.1  christos 			*header_prev = header;
   4732  1.1  christos 		}
   4733  1.1  christos 		return (true);
   4734  1.1  christos 	}
   4735  1.1  christos 	return (false);
   4736  1.1  christos }
   4737  1.1  christos 
   4738  1.1  christos static isc_result_t
   4739  1.1  christos cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
   4740  1.1  christos 	rbtdb_search_t *search = arg;
   4741  1.1  christos 	rdatasetheader_t *header, *header_prev, *header_next;
   4742  1.1  christos 	rdatasetheader_t *dname_header, *sigdname_header;
   4743  1.1  christos 	isc_result_t result;
   4744  1.1  christos 	nodelock_t *lock;
   4745  1.1  christos 	isc_rwlocktype_t locktype;
   4746  1.1  christos 
   4747  1.1  christos 	/* XXX comment */
   4748  1.1  christos 
   4749  1.1  christos 	REQUIRE(search->zonecut == NULL);
   4750  1.1  christos 
   4751  1.1  christos 	/*
   4752  1.1  christos 	 * Keep compiler silent.
   4753  1.1  christos 	 */
   4754  1.1  christos 	UNUSED(name);
   4755  1.1  christos 
   4756  1.1  christos 	lock = &(search->rbtdb->node_locks[node->locknum].lock);
   4757  1.1  christos 	locktype = isc_rwlocktype_read;
   4758  1.1  christos 	NODE_LOCK(lock, locktype);
   4759  1.1  christos 
   4760  1.1  christos 	/*
   4761  1.1  christos 	 * Look for a DNAME or RRSIG DNAME rdataset.
   4762  1.1  christos 	 */
   4763  1.1  christos 	dname_header = NULL;
   4764  1.1  christos 	sigdname_header = NULL;
   4765  1.1  christos 	header_prev = NULL;
   4766  1.1  christos 	for (header = node->data; header != NULL; header = header_next) {
   4767  1.1  christos 		header_next = header->next;
   4768  1.1  christos 		if (check_stale_header(node, header, &locktype, lock, search,
   4769  1.1  christos 				       &header_prev))
   4770  1.1  christos 		{
   4771  1.1  christos 			/* Do nothing. */
   4772  1.1  christos 		} else if (header->type == dns_rdatatype_dname &&
   4773  1.1  christos 			   EXISTS(header) && !ANCIENT(header))
   4774  1.1  christos 		{
   4775  1.1  christos 			dname_header = header;
   4776  1.1  christos 			header_prev = header;
   4777  1.1  christos 		} else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
   4778  1.1  christos 			   EXISTS(header) && !ANCIENT(header))
   4779  1.1  christos 		{
   4780  1.1  christos 			sigdname_header = header;
   4781  1.1  christos 			header_prev = header;
   4782  1.1  christos 		} else {
   4783  1.1  christos 			header_prev = header;
   4784  1.1  christos 		}
   4785  1.1  christos 	}
   4786  1.1  christos 
   4787  1.1  christos 	if (dname_header != NULL &&
   4788  1.1  christos 	    (!DNS_TRUST_PENDING(dname_header->trust) ||
   4789  1.1  christos 	     (search->options & DNS_DBFIND_PENDINGOK) != 0))
   4790  1.1  christos 	{
   4791  1.1  christos 		/*
   4792  1.1  christos 		 * We increment the reference count on node to ensure that
   4793  1.1  christos 		 * search->zonecut_rdataset will still be valid later.
   4794  1.1  christos 		 */
   4795  1.1  christos 		new_reference(search->rbtdb, node, locktype);
   4796  1.1  christos 		search->zonecut = node;
   4797  1.1  christos 		search->zonecut_rdataset = dname_header;
   4798  1.1  christos 		search->zonecut_sigrdataset = sigdname_header;
   4799  1.1  christos 		search->need_cleanup = true;
   4800  1.1  christos 		result = DNS_R_PARTIALMATCH;
   4801  1.1  christos 	} else {
   4802  1.1  christos 		result = DNS_R_CONTINUE;
   4803  1.1  christos 	}
   4804  1.1  christos 
   4805  1.1  christos 	NODE_UNLOCK(lock, locktype);
   4806  1.1  christos 
   4807  1.1  christos 	return (result);
   4808  1.1  christos }
   4809  1.1  christos 
   4810  1.1  christos static isc_result_t
   4811  1.1  christos find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
   4812  1.1  christos 		     dns_dbnode_t **nodep, dns_name_t *foundname,
   4813  1.1  christos 		     dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) {
   4814  1.1  christos 	unsigned int i;
   4815  1.1  christos 	dns_rbtnode_t *level_node;
   4816  1.1  christos 	rdatasetheader_t *header, *header_prev, *header_next;
   4817  1.1  christos 	rdatasetheader_t *found, *foundsig;
   4818  1.1  christos 	isc_result_t result = ISC_R_NOTFOUND;
   4819  1.1  christos 	dns_name_t name;
   4820  1.1  christos 	dns_rbtdb_t *rbtdb;
   4821  1.1  christos 	bool done;
   4822  1.1  christos 	nodelock_t *lock;
   4823  1.1  christos 	isc_rwlocktype_t locktype;
   4824  1.1  christos 
   4825  1.1  christos 	/*
   4826  1.1  christos 	 * Caller must be holding the tree lock.
   4827  1.1  christos 	 */
   4828  1.1  christos 
   4829  1.1  christos 	rbtdb = search->rbtdb;
   4830  1.1  christos 	i = search->chain.level_matches;
   4831  1.1  christos 	done = false;
   4832  1.1  christos 	do {
   4833  1.1  christos 		locktype = isc_rwlocktype_read;
   4834  1.1  christos 		lock = &rbtdb->node_locks[node->locknum].lock;
   4835  1.1  christos 		NODE_LOCK(lock, locktype);
   4836  1.1  christos 
   4837  1.1  christos 		/*
   4838  1.1  christos 		 * Look for NS and RRSIG NS rdatasets.
   4839  1.1  christos 		 */
   4840  1.1  christos 		found = NULL;
   4841  1.1  christos 		foundsig = NULL;
   4842  1.1  christos 		header_prev = NULL;
   4843  1.1  christos 		for (header = node->data; header != NULL; header = header_next)
   4844  1.1  christos 		{
   4845  1.1  christos 			header_next = header->next;
   4846  1.1  christos 			if (check_stale_header(node, header, &locktype, lock,
   4847  1.1  christos 					       search, &header_prev))
   4848  1.1  christos 			{
   4849  1.1  christos 				/* Do nothing. */
   4850  1.1  christos 			} else if (EXISTS(header) && !ANCIENT(header)) {
   4851  1.1  christos 				/*
   4852  1.1  christos 				 * We've found an extant rdataset.  See if
   4853  1.1  christos 				 * we're interested in it.
   4854  1.1  christos 				 */
   4855  1.1  christos 				if (header->type == dns_rdatatype_ns) {
   4856  1.1  christos 					found = header;
   4857  1.1  christos 					if (foundsig != NULL) {
   4858  1.1  christos 						break;
   4859  1.1  christos 					}
   4860  1.1  christos 				} else if (header->type ==
   4861  1.1  christos 					   RBTDB_RDATATYPE_SIGNS)
   4862  1.1  christos 				{
   4863  1.1  christos 					foundsig = header;
   4864  1.1  christos 					if (found != NULL) {
   4865  1.1  christos 						break;
   4866  1.1  christos 					}
   4867  1.1  christos 				}
   4868  1.1  christos 				header_prev = header;
   4869  1.1  christos 			} else {
   4870  1.1  christos 				header_prev = header;
   4871  1.1  christos 			}
   4872  1.1  christos 		}
   4873  1.1  christos 
   4874  1.1  christos 		if (found != NULL) {
   4875  1.1  christos 			/*
   4876  1.1  christos 			 * If we have to set foundname, we do it before
   4877  1.1  christos 			 * anything else.  If we were to set foundname after
   4878  1.1  christos 			 * we had set nodep or bound the rdataset, then we'd
   4879  1.1  christos 			 * have to undo that work if dns_name_concatenate()
   4880  1.1  christos 			 * failed.  By setting foundname first, there's
   4881  1.1  christos 			 * nothing to undo if we have trouble.
   4882  1.1  christos 			 */
   4883  1.1  christos 			if (foundname != NULL) {
   4884  1.1  christos 				dns_name_init(&name, NULL);
   4885  1.1  christos 				dns_rbt_namefromnode(node, &name);
   4886  1.1  christos 				dns_name_copynf(&name, foundname);
   4887  1.1  christos 				while (i > 0) {
   4888  1.1  christos 					i--;
   4889  1.1  christos 					level_node = search->chain.levels[i];
   4890  1.1  christos 					dns_name_init(&name, NULL);
   4891  1.1  christos 					dns_rbt_namefromnode(level_node, &name);
   4892  1.1  christos 					result = dns_name_concatenate(
   4893  1.1  christos 						foundname, &name, foundname,
   4894  1.1  christos 						NULL);
   4895  1.1  christos 					if (result != ISC_R_SUCCESS) {
   4896  1.1  christos 						if (nodep != NULL) {
   4897  1.1  christos 							*nodep = NULL;
   4898  1.1  christos 						}
   4899  1.1  christos 						goto node_exit;
   4900  1.1  christos 					}
   4901  1.1  christos 				}
   4902  1.1  christos 			}
   4903  1.1  christos 			result = DNS_R_DELEGATION;
   4904  1.1  christos 			if (nodep != NULL) {
   4905  1.1  christos 				new_reference(search->rbtdb, node, locktype);
   4906  1.1  christos 				*nodep = node;
   4907  1.1  christos 			}
   4908  1.1  christos 			bind_rdataset(search->rbtdb, node, found, search->now,
   4909  1.1  christos 				      locktype, rdataset);
   4910  1.1  christos 			if (foundsig != NULL) {
   4911  1.1  christos 				bind_rdataset(search->rbtdb, node, foundsig,
   4912  1.1  christos 					      search->now, locktype,
   4913  1.1  christos 					      sigrdataset);
   4914  1.1  christos 			}
   4915  1.1  christos 			if (need_headerupdate(found, search->now) ||
   4916  1.1  christos 			    (foundsig != NULL &&
   4917  1.1  christos 			     need_headerupdate(foundsig, search->now)))
   4918  1.1  christos 			{
   4919  1.1  christos 				if (locktype != isc_rwlocktype_write) {
   4920  1.1  christos 					NODE_UNLOCK(lock, locktype);
   4921  1.1  christos 					NODE_LOCK(lock, isc_rwlocktype_write);
   4922  1.1  christos 					locktype = isc_rwlocktype_write;
   4923  1.1  christos 					POST(locktype);
   4924  1.1  christos 				}
   4925  1.1  christos 				if (need_headerupdate(found, search->now)) {
   4926  1.1  christos 					update_header(search->rbtdb, found,
   4927  1.1  christos 						      search->now);
   4928  1.1  christos 				}
   4929  1.1  christos 				if (foundsig != NULL &&
   4930  1.1  christos 				    need_headerupdate(foundsig, search->now))
   4931  1.1  christos 				{
   4932  1.1  christos 					update_header(search->rbtdb, foundsig,
   4933  1.1  christos 						      search->now);
   4934  1.1  christos 				}
   4935  1.1  christos 			}
   4936  1.1  christos 		}
   4937  1.1  christos 
   4938  1.1  christos 	node_exit:
   4939  1.1  christos 		NODE_UNLOCK(lock, locktype);
   4940  1.1  christos 
   4941  1.1  christos 		if (found == NULL && i > 0) {
   4942  1.1  christos 			i--;
   4943  1.1  christos 			node = search->chain.levels[i];
   4944  1.1  christos 		} else {
   4945  1.1  christos 			done = true;
   4946  1.1  christos 		}
   4947  1.1  christos 	} while (!done);
   4948  1.1  christos 
   4949  1.1  christos 	return (result);
   4950  1.1  christos }
   4951  1.1  christos 
   4952  1.1  christos static isc_result_t
   4953  1.1  christos find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
   4954  1.1  christos 		  isc_stdtime_t now, dns_name_t *foundname,
   4955  1.1  christos 		  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) {
   4956  1.1  christos 	dns_rbtnode_t *node;
   4957  1.1  christos 	rdatasetheader_t *header, *header_next, *header_prev;
   4958  1.1  christos 	rdatasetheader_t *found, *foundsig;
   4959  1.1  christos 	bool empty_node;
   4960  1.1  christos 	isc_result_t result;
   4961  1.1  christos 	dns_fixedname_t fname, forigin;
   4962  1.1  christos 	dns_name_t *name, *origin;
   4963  1.1  christos 	rbtdb_rdatatype_t matchtype, sigmatchtype;
   4964  1.1  christos 	nodelock_t *lock;
   4965  1.1  christos 	isc_rwlocktype_t locktype;
   4966  1.1  christos 	dns_rbtnodechain_t chain;
   4967  1.1  christos 
   4968  1.1  christos 	chain = search->chain;
   4969  1.1  christos 
   4970  1.1  christos 	matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
   4971  1.1  christos 	sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
   4972  1.1  christos 					     dns_rdatatype_nsec);
   4973  1.1  christos 
   4974  1.1  christos 	do {
   4975  1.1  christos 		node = NULL;
   4976  1.1  christos 		name = dns_fixedname_initname(&fname);
   4977  1.1  christos 		origin = dns_fixedname_initname(&forigin);
   4978  1.1  christos 		result = dns_rbtnodechain_current(&chain, name, origin, &node);
   4979  1.1  christos 		if (result != ISC_R_SUCCESS) {
   4980  1.1  christos 			return (result);
   4981  1.1  christos 		}
   4982  1.1  christos 		locktype = isc_rwlocktype_read;
   4983  1.1  christos 		lock = &(search->rbtdb->node_locks[node->locknum].lock);
   4984  1.1  christos 		NODE_LOCK(lock, locktype);
   4985  1.1  christos 		found = NULL;
   4986  1.1  christos 		foundsig = NULL;
   4987  1.1  christos 		empty_node = true;
   4988  1.1  christos 		header_prev = NULL;
   4989  1.1  christos 		for (header = node->data; header != NULL; header = header_next)
   4990  1.1  christos 		{
   4991  1.1  christos 			header_next = header->next;
   4992  1.1  christos 			if (check_stale_header(node, header, &locktype, lock,
   4993  1.1  christos 					       search, &header_prev))
   4994  1.1  christos 			{
   4995  1.1  christos 				continue;
   4996  1.1  christos 			}
   4997  1.1  christos 			if (NONEXISTENT(header) ||
   4998  1.1  christos 			    RBTDB_RDATATYPE_BASE(header->type) == 0)
   4999  1.1  christos 			{
   5000  1.1  christos 				header_prev = header;
   5001  1.1  christos 				continue;
   5002  1.1  christos 			}
   5003  1.1  christos 			/*
   5004  1.1  christos 			 * Don't stop on provable noqname / RRSIG.
   5005  1.1  christos 			 */
   5006  1.1  christos 			if (header->noqname == NULL &&
   5007  1.1  christos 			    RBTDB_RDATATYPE_BASE(header->type) !=
   5008  1.1  christos 				    dns_rdatatype_rrsig)
   5009  1.1  christos 			{
   5010  1.1  christos 				empty_node = false;
   5011  1.1  christos 			}
   5012  1.1  christos 			if (header->type == matchtype) {
   5013  1.1  christos 				found = header;
   5014  1.1  christos 			} else if (header->type == sigmatchtype) {
   5015  1.1  christos 				foundsig = header;
   5016  1.1  christos 			}
   5017  1.1  christos 			header_prev = header;
   5018  1.1  christos 		}
   5019  1.1  christos 		if (found != NULL) {
   5020  1.1  christos 			result = dns_name_concatenate(name, origin, foundname,
   5021  1.1  christos 						      NULL);
   5022  1.1  christos 			if (result != ISC_R_SUCCESS) {
   5023  1.1  christos 				goto unlock_node;
   5024  1.1  christos 			}
   5025  1.1  christos 			bind_rdataset(search->rbtdb, node, found, now, locktype,
   5026  1.1  christos 				      rdataset);
   5027  1.1  christos 			if (foundsig != NULL) {
   5028  1.1  christos 				bind_rdataset(search->rbtdb, node, foundsig,
   5029  1.1  christos 					      now, locktype, sigrdataset);
   5030  1.1  christos 			}
   5031  1.1  christos 			new_reference(search->rbtdb, node, locktype);
   5032  1.1  christos 			*nodep = node;
   5033  1.1  christos 			result = DNS_R_COVERINGNSEC;
   5034  1.1  christos 		} else if (!empty_node) {
   5035  1.1  christos 			result = ISC_R_NOTFOUND;
   5036  1.1  christos 		} else {
   5037  1.1  christos 			result = dns_rbtnodechain_prev(&chain, NULL, NULL);
   5038  1.1  christos 		}
   5039  1.1  christos 	unlock_node:
   5040  1.1  christos 		NODE_UNLOCK(lock, locktype);
   5041  1.1  christos 	} while (empty_node && result == ISC_R_SUCCESS);
   5042  1.1  christos 	return (result);
   5043  1.1  christos }
   5044  1.1  christos 
   5045  1.1  christos static isc_result_t
   5046  1.1  christos cache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
   5047  1.1  christos 	   dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
   5048  1.1  christos 	   dns_dbnode_t **nodep, dns_name_t *foundname,
   5049  1.1  christos 	   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) {
   5050  1.1  christos 	dns_rbtnode_t *node = NULL;
   5051  1.1  christos 	isc_result_t result;
   5052  1.1  christos 	rbtdb_search_t search;
   5053  1.1  christos 	bool cname_ok = true;
   5054  1.1  christos 	bool empty_node;
   5055  1.1  christos 	nodelock_t *lock;
   5056  1.1  christos 	isc_rwlocktype_t locktype;
   5057  1.1  christos 	rdatasetheader_t *header, *header_prev, *header_next;
   5058  1.1  christos 	rdatasetheader_t *found, *nsheader;
   5059  1.1  christos 	rdatasetheader_t *foundsig, *nssig, *cnamesig;
   5060  1.1  christos 	rdatasetheader_t *update, *updatesig;
   5061  1.1  christos 	rdatasetheader_t *nsecheader, *nsecsig;
   5062  1.1  christos 	rbtdb_rdatatype_t sigtype, negtype;
   5063  1.1  christos 
   5064  1.1  christos 	UNUSED(version);
   5065  1.1  christos 
   5066  1.1  christos 	search.rbtdb = (dns_rbtdb_t *)db;
   5067  1.1  christos 
   5068  1.1  christos 	REQUIRE(VALID_RBTDB(search.rbtdb));
   5069  1.1  christos 	REQUIRE(version == NULL);
   5070  1.1  christos 
   5071  1.1  christos 	if (now == 0) {
   5072  1.1  christos 		isc_stdtime_get(&now);
   5073  1.1  christos 	}
   5074  1.1  christos 
   5075  1.1  christos 	search.rbtversion = NULL;
   5076  1.1  christos 	search.serial = 1;
   5077  1.1  christos 	search.options = options;
   5078  1.1  christos 	search.copy_name = false;
   5079  1.1  christos 	search.need_cleanup = false;
   5080  1.1  christos 	search.wild = false;
   5081  1.1  christos 	search.zonecut = NULL;
   5082  1.1  christos 	dns_fixedname_init(&search.zonecut_name);
   5083  1.1  christos 	dns_rbtnodechain_init(&search.chain);
   5084  1.1  christos 	search.now = now;
   5085  1.1  christos 	update = NULL;
   5086  1.1  christos 	updatesig = NULL;
   5087  1.1  christos 
   5088  1.1  christos 	RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
   5089  1.1  christos 
   5090  1.1  christos 	/*
   5091  1.1  christos 	 * Search down from the root of the tree.  If, while going down, we
   5092  1.1  christos 	 * encounter a callback node, cache_zonecut_callback() will search the
   5093  1.1  christos 	 * rdatasets at the zone cut for a DNAME rdataset.
   5094  1.1  christos 	 */
   5095  1.1  christos 	result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
   5096  1.1  christos 				  &search.chain, DNS_RBTFIND_EMPTYDATA,
   5097  1.1  christos 				  cache_zonecut_callback, &search);
   5098  1.1  christos 
   5099  1.1  christos 	if (result == DNS_R_PARTIALMATCH) {
   5100  1.1  christos 		if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
   5101  1.1  christos 			result = find_coveringnsec(&search, nodep, now,
   5102  1.1  christos 						   foundname, rdataset,
   5103  1.1  christos 						   sigrdataset);
   5104  1.1  christos 			if (result == DNS_R_COVERINGNSEC) {
   5105  1.1  christos 				goto tree_exit;
   5106  1.1  christos 			}
   5107  1.1  christos 		}
   5108  1.1  christos 		if (search.zonecut != NULL) {
   5109  1.1  christos 			result = setup_delegation(&search, nodep, foundname,
   5110  1.1  christos 						  rdataset, sigrdataset);
   5111  1.1  christos 			goto tree_exit;
   5112  1.1  christos 		} else {
   5113  1.1  christos 		find_ns:
   5114  1.1  christos 			result = find_deepest_zonecut(&search, node, nodep,
   5115  1.1  christos 						      foundname, rdataset,
   5116  1.1  christos 						      sigrdataset);
   5117  1.1  christos 			goto tree_exit;
   5118  1.1  christos 		}
   5119  1.1  christos 	} else if (result != ISC_R_SUCCESS) {
   5120  1.1  christos 		goto tree_exit;
   5121  1.1  christos 	}
   5122  1.1  christos 
   5123  1.1  christos 	/*
   5124  1.1  christos 	 * Certain DNSSEC types are not subject to CNAME matching
   5125  1.1  christos 	 * (RFC4035, section 2.5 and RFC3007).
   5126  1.1  christos 	 *
   5127  1.1  christos 	 * We don't check for RRSIG, because we don't store RRSIG records
   5128  1.1  christos 	 * directly.
   5129  1.1  christos 	 */
   5130  1.1  christos 	if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) {
   5131  1.1  christos 		cname_ok = false;
   5132  1.1  christos 	}
   5133  1.1  christos 
   5134  1.1  christos 	/*
   5135  1.1  christos 	 * We now go looking for rdata...
   5136  1.1  christos 	 */
   5137  1.1  christos 
   5138  1.1  christos 	lock = &(search.rbtdb->node_locks[node->locknum].lock);
   5139  1.1  christos 	locktype = isc_rwlocktype_read;
   5140  1.1  christos 	NODE_LOCK(lock, locktype);
   5141  1.1  christos 
   5142  1.1  christos 	found = NULL;
   5143  1.1  christos 	foundsig = NULL;
   5144  1.1  christos 	sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
   5145  1.1  christos 	negtype = RBTDB_RDATATYPE_VALUE(0, type);
   5146  1.1  christos 	nsheader = NULL;
   5147  1.1  christos 	nsecheader = NULL;
   5148  1.1  christos 	nssig = NULL;
   5149  1.1  christos 	nsecsig = NULL;
   5150  1.1  christos 	cnamesig = NULL;
   5151  1.1  christos 	empty_node = true;
   5152  1.1  christos 	header_prev = NULL;
   5153  1.1  christos 	for (header = node->data; header != NULL; header = header_next) {
   5154  1.1  christos 		header_next = header->next;
   5155  1.1  christos 		if (check_stale_header(node, header, &locktype, lock, &search,
   5156  1.1  christos 				       &header_prev))
   5157  1.1  christos 		{
   5158  1.1  christos 			/* Do nothing. */
   5159  1.1  christos 		} else if (EXISTS(header) && !ANCIENT(header)) {
   5160  1.1  christos 			/*
   5161  1.1  christos 			 * We now know that there is at least one active
   5162  1.1  christos 			 * non-stale rdataset at this node.
   5163  1.1  christos 			 */
   5164  1.1  christos 			empty_node = false;
   5165  1.1  christos 
   5166  1.1  christos 			/*
   5167  1.1  christos 			 * If we found a type we were looking for, remember
   5168  1.1  christos 			 * it.
   5169  1.1  christos 			 */
   5170  1.1  christos 			if (header->type == type ||
   5171  1.1  christos 			    (type == dns_rdatatype_any &&
   5172  1.1  christos 			     RBTDB_RDATATYPE_BASE(header->type) != 0) ||
   5173  1.1  christos 			    (cname_ok && header->type == dns_rdatatype_cname))
   5174  1.1  christos 			{
   5175  1.1  christos 				/*
   5176  1.1  christos 				 * We've found the answer.
   5177  1.1  christos 				 */
   5178  1.1  christos 				found = header;
   5179  1.1  christos 				if (header->type == dns_rdatatype_cname &&
   5180  1.1  christos 				    cname_ok && cnamesig != NULL)
   5181  1.1  christos 				{
   5182  1.1  christos 					/*
   5183  1.1  christos 					 * If we've already got the
   5184  1.1  christos 					 * CNAME RRSIG, use it.
   5185  1.1  christos 					 */
   5186  1.1  christos 					foundsig = cnamesig;
   5187  1.1  christos 				}
   5188  1.1  christos 			} else if (header->type == sigtype) {
   5189  1.1  christos 				/*
   5190  1.1  christos 				 * We've found the RRSIG rdataset for our
   5191  1.1  christos 				 * target type.  Remember it.
   5192  1.1  christos 				 */
   5193  1.1  christos 				foundsig = header;
   5194  1.1  christos 			} else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
   5195  1.1  christos 				   header->type == negtype)
   5196  1.1  christos 			{
   5197  1.1  christos 				/*
   5198  1.1  christos 				 * We've found a negative cache entry.
   5199  1.1  christos 				 */
   5200  1.1  christos 				found = header;
   5201  1.1  christos 			} else if (header->type == dns_rdatatype_ns) {
   5202  1.1  christos 				/*
   5203  1.1  christos 				 * Remember a NS rdataset even if we're
   5204  1.1  christos 				 * not specifically looking for it, because
   5205  1.1  christos 				 * we might need it later.
   5206  1.1  christos 				 */
   5207  1.1  christos 				nsheader = header;
   5208  1.1  christos 			} else if (header->type == RBTDB_RDATATYPE_SIGNS) {
   5209  1.1  christos 				/*
   5210  1.1  christos 				 * If we need the NS rdataset, we'll also
   5211  1.1  christos 				 * need its signature.
   5212  1.1  christos 				 */
   5213  1.1  christos 				nssig = header;
   5214  1.1  christos 			} else if (header->type == dns_rdatatype_nsec) {
   5215  1.1  christos 				nsecheader = header;
   5216  1.1  christos 			} else if (header->type == RBTDB_RDATATYPE_SIGNSEC) {
   5217  1.1  christos 				nsecsig = header;
   5218  1.1  christos 			} else if (cname_ok &&
   5219  1.1  christos 				   header->type == RBTDB_RDATATYPE_SIGCNAME)
   5220  1.1  christos 			{
   5221  1.1  christos 				/*
   5222  1.1  christos 				 * If we get a CNAME match, we'll also need
   5223  1.1  christos 				 * its signature.
   5224  1.1  christos 				 */
   5225  1.1  christos 				cnamesig = header;
   5226  1.1  christos 			}
   5227  1.1  christos 			header_prev = header;
   5228  1.1  christos 		} else {
   5229  1.1  christos 			header_prev = header;
   5230  1.1  christos 		}
   5231  1.1  christos 	}
   5232  1.1  christos 
   5233  1.1  christos 	if (empty_node) {
   5234  1.1  christos 		/*
   5235  1.1  christos 		 * We have an exact match for the name, but there are no
   5236  1.1  christos 		 * extant rdatasets.  That means that this node doesn't
   5237  1.1  christos 		 * meaningfully exist, and that we really have a partial match.
   5238  1.1  christos 		 */
   5239  1.1  christos 		NODE_UNLOCK(lock, locktype);
   5240  1.1  christos 		goto find_ns;
   5241  1.1  christos 	}
   5242  1.1  christos 
   5243  1.1  christos 	/*
   5244  1.1  christos 	 * If we didn't find what we were looking for...
   5245  1.1  christos 	 */
   5246  1.1  christos 	if (found == NULL ||
   5247  1.1  christos 	    (DNS_TRUST_ADDITIONAL(found->trust) &&
   5248  1.1  christos 	     ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
   5249  1.1  christos 	    (found->trust == dns_trust_glue &&
   5250  1.1  christos 	     ((options & DNS_DBFIND_GLUEOK) == 0)) ||
   5251  1.1  christos 	    (DNS_TRUST_PENDING(found->trust) &&
   5252  1.1  christos 	     ((options & DNS_DBFIND_PENDINGOK) == 0)))
   5253  1.1  christos 	{
   5254  1.1  christos 		/*
   5255  1.1  christos 		 * Return covering NODATA NSEC record.
   5256  1.1  christos 		 */
   5257  1.1  christos 		if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 &&
   5258  1.1  christos 		    nsecheader != NULL)
   5259  1.1  christos 		{
   5260  1.1  christos 			if (nodep != NULL) {
   5261  1.1  christos 				new_reference(search.rbtdb, node, locktype);
   5262  1.1  christos 				*nodep = node;
   5263  1.1  christos 			}
   5264  1.1  christos 			bind_rdataset(search.rbtdb, node, nsecheader,
   5265  1.1  christos 				      search.now, locktype, rdataset);
   5266  1.1  christos 			if (need_headerupdate(nsecheader, search.now)) {
   5267  1.1  christos 				update = nsecheader;
   5268  1.1  christos 			}
   5269  1.1  christos 			if (nsecsig != NULL) {
   5270  1.1  christos 				bind_rdataset(search.rbtdb, node, nsecsig,
   5271  1.1  christos 					      search.now, locktype,
   5272  1.1  christos 					      sigrdataset);
   5273  1.1  christos 				if (need_headerupdate(nsecsig, search.now)) {
   5274  1.1  christos 					updatesig = nsecsig;
   5275  1.1  christos 				}
   5276  1.1  christos 			}
   5277  1.1  christos 			result = DNS_R_COVERINGNSEC;
   5278  1.1  christos 			goto node_exit;
   5279  1.1  christos 		}
   5280  1.1  christos 
   5281  1.1  christos 		/*
   5282  1.1  christos 		 * If there is an NS rdataset at this node, then this is the
   5283  1.1  christos 		 * deepest zone cut.
   5284  1.1  christos 		 */
   5285  1.1  christos 		if (nsheader != NULL) {
   5286  1.1  christos 			if (nodep != NULL) {
   5287  1.1  christos 				new_reference(search.rbtdb, node, locktype);
   5288  1.1  christos 				*nodep = node;
   5289  1.1  christos 			}
   5290  1.1  christos 			bind_rdataset(search.rbtdb, node, nsheader, search.now,
   5291  1.1  christos 				      locktype, rdataset);
   5292  1.1  christos 			if (need_headerupdate(nsheader, search.now)) {
   5293  1.1  christos 				update = nsheader;
   5294  1.1  christos 			}
   5295  1.1  christos 			if (nssig != NULL) {
   5296  1.1  christos 				bind_rdataset(search.rbtdb, node, nssig,
   5297  1.1  christos 					      search.now, locktype,
   5298  1.1  christos 					      sigrdataset);
   5299  1.1  christos 				if (need_headerupdate(nssig, search.now)) {
   5300  1.1  christos 					updatesig = nssig;
   5301  1.1  christos 				}
   5302  1.1  christos 			}
   5303  1.1  christos 			result = DNS_R_DELEGATION;
   5304  1.1  christos 			goto node_exit;
   5305  1.1  christos 		}
   5306  1.1  christos 
   5307  1.1  christos 		/*
   5308  1.1  christos 		 * Go find the deepest zone cut.
   5309  1.1  christos 		 */
   5310  1.1  christos 		NODE_UNLOCK(lock, locktype);
   5311  1.1  christos 		goto find_ns;
   5312  1.1  christos 	}
   5313  1.1  christos 
   5314  1.1  christos 	/*
   5315  1.1  christos 	 * We found what we were looking for, or we found a CNAME.
   5316  1.1  christos 	 */
   5317  1.1  christos 
   5318  1.1  christos 	if (nodep != NULL) {
   5319  1.1  christos 		new_reference(search.rbtdb, node, locktype);
   5320  1.1  christos 		*nodep = node;
   5321  1.1  christos 	}
   5322  1.1  christos 
   5323  1.1  christos 	if (NEGATIVE(found)) {
   5324  1.1  christos 		/*
   5325  1.1  christos 		 * We found a negative cache entry.
   5326  1.1  christos 		 */
   5327  1.1  christos 		if (NXDOMAIN(found)) {
   5328  1.1  christos 			result = DNS_R_NCACHENXDOMAIN;
   5329  1.1  christos 		} else {
   5330  1.1  christos 			result = DNS_R_NCACHENXRRSET;
   5331  1.1  christos 		}
   5332  1.1  christos 	} else if (type != found->type && type != dns_rdatatype_any &&
   5333  1.1  christos 		   found->type == dns_rdatatype_cname)
   5334  1.1  christos 	{
   5335  1.1  christos 		/*
   5336  1.1  christos 		 * We weren't doing an ANY query and we found a CNAME instead
   5337  1.1  christos 		 * of the type we were looking for, so we need to indicate
   5338  1.1  christos 		 * that result to the caller.
   5339  1.1  christos 		 */
   5340  1.1  christos 		result = DNS_R_CNAME;
   5341  1.1  christos 	} else {
   5342  1.1  christos 		/*
   5343  1.1  christos 		 * An ordinary successful query!
   5344  1.1  christos 		 */
   5345  1.1  christos 		result = ISC_R_SUCCESS;
   5346  1.1  christos 	}
   5347  1.1  christos 
   5348  1.1  christos 	if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
   5349  1.1  christos 	    result == DNS_R_NCACHENXRRSET)
   5350  1.1  christos 	{
   5351  1.1  christos 		bind_rdataset(search.rbtdb, node, found, search.now, locktype,
   5352  1.1  christos 			      rdataset);
   5353  1.1  christos 		if (need_headerupdate(found, search.now)) {
   5354  1.1  christos 			update = found;
   5355  1.1  christos 		}
   5356  1.1  christos 		if (!NEGATIVE(found) && foundsig != NULL) {
   5357  1.1  christos 			bind_rdataset(search.rbtdb, node, foundsig, search.now,
   5358  1.1  christos 				      locktype, sigrdataset);
   5359  1.1  christos 			if (need_headerupdate(foundsig, search.now)) {
   5360  1.1  christos 				updatesig = foundsig;
   5361  1.1  christos 			}
   5362  1.1  christos 		}
   5363  1.1  christos 	}
   5364  1.1  christos 
   5365  1.1  christos node_exit:
   5366  1.1  christos 	if ((update != NULL || updatesig != NULL) &&
   5367  1.1  christos 	    locktype != isc_rwlocktype_write)
   5368  1.1  christos 	{
   5369  1.1  christos 		NODE_UNLOCK(lock, locktype);
   5370  1.1  christos 		NODE_LOCK(lock, isc_rwlocktype_write);
   5371  1.1  christos 		locktype = isc_rwlocktype_write;
   5372  1.1  christos 		POST(locktype);
   5373  1.1  christos 	}
   5374  1.1  christos 	if (update != NULL && need_headerupdate(update, search.now)) {
   5375  1.1  christos 		update_header(search.rbtdb, update, search.now);
   5376  1.1  christos 	}
   5377  1.1  christos 	if (updatesig != NULL && need_headerupdate(updatesig, search.now)) {
   5378  1.1  christos 		update_header(search.rbtdb, updatesig, search.now);
   5379  1.1  christos 	}
   5380  1.1  christos 
   5381  1.1  christos 	NODE_UNLOCK(lock, locktype);
   5382  1.1  christos 
   5383  1.1  christos tree_exit:
   5384  1.1  christos 	RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
   5385  1.1  christos 
   5386  1.1  christos 	/*
   5387  1.1  christos 	 * If we found a zonecut but aren't going to use it, we have to
   5388  1.1  christos 	 * let go of it.
   5389  1.1  christos 	 */
   5390  1.1  christos 	if (search.need_cleanup) {
   5391  1.1  christos 		node = search.zonecut;
   5392  1.1  christos 		INSIST(node != NULL);
   5393  1.1  christos 		lock = &(search.rbtdb->node_locks[node->locknum].lock);
   5394  1.1  christos 
   5395  1.1  christos 		NODE_LOCK(lock, isc_rwlocktype_read);
   5396  1.1  christos 		decrement_reference(search.rbtdb, node, 0, isc_rwlocktype_read,
   5397  1.1  christos 				    isc_rwlocktype_none, false);
   5398  1.1  christos 		NODE_UNLOCK(lock, isc_rwlocktype_read);
   5399  1.1  christos 	}
   5400  1.1  christos 
   5401  1.1  christos 	dns_rbtnodechain_reset(&search.chain);
   5402  1.1  christos 
   5403  1.1  christos 	update_cachestats(search.rbtdb, result);
   5404  1.1  christos 	return (result);
   5405  1.1  christos }
   5406  1.1  christos 
   5407  1.1  christos static isc_result_t
   5408  1.1  christos cache_findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options,
   5409  1.1  christos 		  isc_stdtime_t now, dns_dbnode_t **nodep,
   5410  1.1  christos 		  dns_name_t *foundname, dns_name_t *dcname,
   5411  1.1  christos 		  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) {
   5412  1.1  christos 	dns_rbtnode_t *node = NULL;
   5413  1.1  christos 	nodelock_t *lock;
   5414  1.1  christos 	isc_result_t result;
   5415  1.1  christos 	rbtdb_search_t search;
   5416  1.1  christos 	rdatasetheader_t *header, *header_prev, *header_next;
   5417  1.1  christos 	rdatasetheader_t *found, *foundsig;
   5418  1.1  christos 	unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
   5419  1.1  christos 	isc_rwlocktype_t locktype;
   5420  1.1  christos 	bool dcnull = (dcname == NULL);
   5421  1.1  christos 
   5422  1.1  christos 	search.rbtdb = (dns_rbtdb_t *)db;
   5423  1.1  christos 
   5424  1.1  christos 	REQUIRE(VALID_RBTDB(search.rbtdb));
   5425  1.1  christos 
   5426  1.1  christos 	if (now == 0) {
   5427  1.1  christos 		isc_stdtime_get(&now);
   5428  1.1  christos 	}
   5429  1.1  christos 
   5430  1.1  christos 	search.rbtversion = NULL;
   5431  1.1  christos 	search.serial = 1;
   5432  1.1  christos 	search.options = options;
   5433  1.1  christos 	search.copy_name = false;
   5434  1.1  christos 	search.need_cleanup = false;
   5435  1.1  christos 	search.wild = false;
   5436  1.1  christos 	search.zonecut = NULL;
   5437  1.1  christos 	dns_fixedname_init(&search.zonecut_name);
   5438  1.1  christos 	dns_rbtnodechain_init(&search.chain);
   5439  1.1  christos 	search.now = now;
   5440  1.1  christos 
   5441  1.1  christos 	if (dcnull) {
   5442  1.1  christos 		dcname = foundname;
   5443  1.1  christos 	}
   5444  1.1  christos 
   5445  1.1  christos 	if ((options & DNS_DBFIND_NOEXACT) != 0) {
   5446  1.1  christos 		rbtoptions |= DNS_RBTFIND_NOEXACT;
   5447  1.1  christos 	}
   5448  1.1  christos 
   5449  1.1  christos 	RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
   5450  1.1  christos 
   5451  1.1  christos 	/*
   5452  1.1  christos 	 * Search down from the root of the tree.
   5453  1.1  christos 	 */
   5454  1.1  christos 	result = dns_rbt_findnode(search.rbtdb->tree, name, dcname, &node,
   5455  1.1  christos 				  &search.chain, rbtoptions, NULL, &search);
   5456  1.1  christos 
   5457  1.1  christos 	if (result == DNS_R_PARTIALMATCH) {
   5458  1.1  christos 		result = find_deepest_zonecut(&search, node, nodep, foundname,
   5459  1.1  christos 					      rdataset, sigrdataset);
   5460  1.1  christos 		goto tree_exit;
   5461  1.1  christos 	} else if (result != ISC_R_SUCCESS) {
   5462  1.1  christos 		goto tree_exit;
   5463  1.1  christos 	} else if (!dcnull) {
   5464  1.1  christos 		dns_name_copynf(dcname, foundname);
   5465  1.1  christos 	}
   5466  1.1  christos 
   5467  1.1  christos 	/*
   5468  1.1  christos 	 * We now go looking for an NS rdataset at the node.
   5469  1.1  christos 	 */
   5470  1.1  christos 
   5471  1.1  christos 	lock = &(search.rbtdb->node_locks[node->locknum].lock);
   5472  1.1  christos 	locktype = isc_rwlocktype_read;
   5473  1.1  christos 	NODE_LOCK(lock, locktype);
   5474  1.1  christos 
   5475  1.1  christos 	found = NULL;
   5476  1.1  christos 	foundsig = NULL;
   5477  1.1  christos 	header_prev = NULL;
   5478  1.1  christos 	for (header = node->data; header != NULL; header = header_next) {
   5479  1.1  christos 		header_next = header->next;
   5480  1.1  christos 		if (check_stale_header(node, header, &locktype, lock, &search,
   5481  1.1  christos 				       &header_prev))
   5482  1.1  christos 		{
   5483  1.1  christos 			/*
   5484  1.1  christos 			 * The function dns_rbt_findnode found us the a matching
   5485  1.1  christos 			 * node for 'name' and stored the result in 'dcname'.
   5486  1.1  christos 			 * This is the deepest known zonecut in our database.
   5487  1.1  christos 			 * However, this node may be stale and if serve-stale
   5488  1.1  christos 			 * is not enabled (in other words 'stale-answer-enable'
   5489  1.1  christos 			 * is set to no), this node may not be used as a
   5490  1.1  christos 			 * zonecut we know about. If so, find the deepest
   5491  1.1  christos 			 * zonecut from this node up and return that instead.
   5492  1.1  christos 			 */
   5493  1.1  christos 			NODE_UNLOCK(lock, locktype);
   5494  1.1  christos 			result = find_deepest_zonecut(&search, node, nodep,
   5495  1.1  christos 						      foundname, rdataset,
   5496  1.1  christos 						      sigrdataset);
   5497  1.1  christos 			dns_name_copynf(foundname, dcname);
   5498  1.1  christos 			goto tree_exit;
   5499  1.1  christos 		} else if (EXISTS(header) && !ANCIENT(header)) {
   5500  1.1  christos 			/*
   5501  1.1  christos 			 * If we found a type we were looking for, remember
   5502  1.1  christos 			 * it.
   5503  1.1  christos 			 */
   5504  1.1  christos 			if (header->type == dns_rdatatype_ns) {
   5505  1.1  christos 				/*
   5506  1.1  christos 				 * Remember a NS rdataset even if we're
   5507  1.1  christos 				 * not specifically looking for it, because
   5508  1.1  christos 				 * we might need it later.
   5509  1.1  christos 				 */
   5510  1.1  christos 				found = header;
   5511  1.1  christos 			} else if (header->type == RBTDB_RDATATYPE_SIGNS) {
   5512  1.1  christos 				/*
   5513  1.1  christos 				 * If we need the NS rdataset, we'll also
   5514  1.1  christos 				 * need its signature.
   5515  1.1  christos 				 */
   5516  1.1  christos 				foundsig = header;
   5517  1.1  christos 			}
   5518  1.1  christos 			header_prev = header;
   5519  1.1  christos 		} else {
   5520  1.1  christos 			header_prev = header;
   5521  1.1  christos 		}
   5522  1.1  christos 	}
   5523  1.1  christos 
   5524  1.1  christos 	if (found == NULL) {
   5525  1.1  christos 		/*
   5526  1.1  christos 		 * No NS records here.
   5527  1.1  christos 		 */
   5528  1.1  christos 		NODE_UNLOCK(lock, locktype);
   5529  1.1  christos 		result = find_deepest_zonecut(&search, node, nodep, foundname,
   5530  1.1  christos 					      rdataset, sigrdataset);
   5531  1.1  christos 		goto tree_exit;
   5532  1.1  christos 	}
   5533  1.1  christos 
   5534  1.1  christos 	if (nodep != NULL) {
   5535  1.1  christos 		new_reference(search.rbtdb, node, locktype);
   5536  1.1  christos 		*nodep = node;
   5537  1.1  christos 	}
   5538  1.1  christos 
   5539  1.1  christos 	bind_rdataset(search.rbtdb, node, found, search.now, locktype,
   5540  1.1  christos 		      rdataset);
   5541  1.1  christos 	if (foundsig != NULL) {
   5542  1.1  christos 		bind_rdataset(search.rbtdb, node, foundsig, search.now,
   5543  1.1  christos 			      locktype, sigrdataset);
   5544  1.1  christos 	}
   5545  1.1  christos 
   5546  1.1  christos 	if (need_headerupdate(found, search.now) ||
   5547  1.1  christos 	    (foundsig != NULL && need_headerupdate(foundsig, search.now)))
   5548  1.1  christos 	{
   5549  1.1  christos 		if (locktype != isc_rwlocktype_write) {
   5550  1.1  christos 			NODE_UNLOCK(lock, locktype);
   5551  1.1  christos 			NODE_LOCK(lock, isc_rwlocktype_write);
   5552  1.1  christos 			locktype = isc_rwlocktype_write;
   5553  1.1  christos 			POST(locktype);
   5554  1.1  christos 		}
   5555  1.1  christos 		if (need_headerupdate(found, search.now)) {
   5556  1.1  christos 			update_header(search.rbtdb, found, search.now);
   5557  1.1  christos 		}
   5558  1.1  christos 		if (foundsig != NULL && need_headerupdate(foundsig, search.now))
   5559  1.1  christos 		{
   5560  1.1  christos 			update_header(search.rbtdb, foundsig, search.now);
   5561  1.1  christos 		}
   5562  1.1  christos 	}
   5563  1.1  christos 
   5564  1.1  christos 	NODE_UNLOCK(lock, locktype);
   5565  1.1  christos 
   5566  1.1  christos tree_exit:
   5567  1.1  christos 	RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
   5568  1.1  christos 
   5569  1.1  christos 	INSIST(!search.need_cleanup);
   5570  1.1  christos 
   5571  1.1  christos 	dns_rbtnodechain_reset(&search.chain);
   5572  1.1  christos 
   5573  1.1  christos 	if (result == DNS_R_DELEGATION) {
   5574  1.1  christos 		result = ISC_R_SUCCESS;
   5575  1.1  christos 	}
   5576  1.1  christos 
   5577  1.1  christos 	return (result);
   5578  1.1  christos }
   5579  1.1  christos 
   5580  1.1  christos static void
   5581  1.1  christos attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
   5582  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   5583  1.1  christos 	dns_rbtnode_t *node = (dns_rbtnode_t *)source;
   5584  1.1  christos 
   5585  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   5586  1.1  christos 	REQUIRE(targetp != NULL && *targetp == NULL);
   5587  1.1  christos 
   5588  1.1  christos 	isc_refcount_increment(&node->references);
   5589  1.1  christos 
   5590  1.1  christos 	*targetp = source;
   5591  1.1  christos }
   5592  1.1  christos 
   5593  1.1  christos static void
   5594  1.1  christos detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
   5595  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   5596  1.1  christos 	dns_rbtnode_t *node;
   5597  1.1  christos 	bool want_free = false;
   5598  1.1  christos 	bool inactive = false;
   5599  1.1  christos 	rbtdb_nodelock_t *nodelock;
   5600  1.1  christos 
   5601  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   5602  1.1  christos 	REQUIRE(targetp != NULL && *targetp != NULL);
   5603  1.1  christos 
   5604  1.1  christos 	node = (dns_rbtnode_t *)(*targetp);
   5605  1.1  christos 	nodelock = &rbtdb->node_locks[node->locknum];
   5606  1.1  christos 
   5607  1.1  christos 	NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
   5608  1.1  christos 
   5609  1.1  christos 	if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
   5610  1.1  christos 				isc_rwlocktype_none, false))
   5611  1.1  christos 	{
   5612  1.1  christos 		if (isc_refcount_current(&nodelock->references) == 0 &&
   5613  1.1  christos 		    nodelock->exiting)
   5614  1.1  christos 		{
   5615  1.1  christos 			inactive = true;
   5616  1.1  christos 		}
   5617  1.1  christos 	}
   5618  1.1  christos 
   5619  1.1  christos 	NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
   5620  1.1  christos 
   5621  1.1  christos 	*targetp = NULL;
   5622  1.1  christos 
   5623  1.1  christos 	if (inactive) {
   5624  1.1  christos 		RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
   5625  1.1  christos 		rbtdb->active--;
   5626  1.1  christos 		if (rbtdb->active == 0) {
   5627  1.1  christos 			want_free = true;
   5628  1.1  christos 		}
   5629  1.1  christos 		RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
   5630  1.1  christos 		if (want_free) {
   5631  1.1  christos 			char buf[DNS_NAME_FORMATSIZE];
   5632  1.1  christos 			if (dns_name_dynamic(&rbtdb->common.origin)) {
   5633  1.1  christos 				dns_name_format(&rbtdb->common.origin, buf,
   5634  1.1  christos 						sizeof(buf));
   5635  1.1  christos 			} else {
   5636  1.1  christos 				strlcpy(buf, "<UNKNOWN>", sizeof(buf));
   5637  1.1  christos 			}
   5638  1.1  christos 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
   5639  1.1  christos 				      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
   5640  1.1  christos 				      "calling free_rbtdb(%s)", buf);
   5641  1.1  christos 			free_rbtdb(rbtdb, true, NULL);
   5642  1.1  christos 		}
   5643  1.1  christos 	}
   5644  1.1  christos }
   5645  1.1  christos 
   5646  1.1  christos static isc_result_t
   5647  1.1  christos expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
   5648  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   5649  1.1  christos 	dns_rbtnode_t *rbtnode = node;
   5650  1.1  christos 	rdatasetheader_t *header;
   5651  1.1  christos 	bool force_expire = false;
   5652  1.1  christos 	/*
   5653  1.1  christos 	 * These are the category and module used by the cache cleaner.
   5654  1.1  christos 	 */
   5655  1.1  christos 	bool log = false;
   5656  1.1  christos 	isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
   5657  1.1  christos 	isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
   5658  1.1  christos 	int level = ISC_LOG_DEBUG(2);
   5659  1.1  christos 	char printname[DNS_NAME_FORMATSIZE];
   5660  1.1  christos 
   5661  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   5662  1.1  christos 
   5663  1.1  christos 	/*
   5664  1.1  christos 	 * Caller must hold a tree lock.
   5665  1.1  christos 	 */
   5666  1.1  christos 
   5667  1.1  christos 	if (now == 0) {
   5668  1.1  christos 		isc_stdtime_get(&now);
   5669  1.1  christos 	}
   5670  1.1  christos 
   5671  1.1  christos 	if (isc_mem_isovermem(rbtdb->common.mctx)) {
   5672  1.1  christos 		/*
   5673  1.1  christos 		 * Force expire with 25% probability.
   5674  1.1  christos 		 * XXXDCL Could stand to have a better policy, like LRU.
   5675  1.1  christos 		 */
   5676  1.1  christos 		force_expire = (rbtnode->down == NULL &&
   5677  1.1  christos 				(isc_random32() % 4) == 0);
   5678  1.1  christos 
   5679  1.1  christos 		/*
   5680  1.1  christos 		 * Note that 'log' can be true IFF overmem is also true.
   5681  1.1  christos 		 * overmem can currently only be true for cache
   5682  1.1  christos 		 * databases -- hence all of the "overmem cache" log strings.
   5683  1.1  christos 		 */
   5684  1.1  christos 		log = isc_log_wouldlog(dns_lctx, level);
   5685  1.1  christos 		if (log) {
   5686  1.1  christos 			isc_log_write(
   5687  1.1  christos 				dns_lctx, category, module, level,
   5688  1.1  christos 				"overmem cache: %s %s",
   5689  1.1  christos 				force_expire ? "FORCE" : "check",
   5690  1.1  christos 				dns_rbt_formatnodename(rbtnode, printname,
   5691  1.1  christos 						       sizeof(printname)));
   5692  1.1  christos 		}
   5693  1.1  christos 	}
   5694  1.1  christos 
   5695  1.1  christos 	/*
   5696  1.1  christos 	 * We may not need write access, but this code path is not performance
   5697  1.1  christos 	 * sensitive, so it should be okay to always lock as a writer.
   5698  1.1  christos 	 */
   5699  1.1  christos 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   5700  1.1  christos 		  isc_rwlocktype_write);
   5701  1.1  christos 
   5702  1.1  christos 	for (header = rbtnode->data; header != NULL; header = header->next) {
   5703  1.1  christos 		if (header->rdh_ttl + rbtdb->serve_stale_ttl <=
   5704  1.1  christos 		    now - RBTDB_VIRTUAL)
   5705  1.1  christos 		{
   5706  1.1  christos 			/*
   5707  1.1  christos 			 * We don't check if refcurrent(rbtnode) == 0 and try
   5708  1.1  christos 			 * to free like we do in cache_find(), because
   5709  1.1  christos 			 * refcurrent(rbtnode) must be non-zero.  This is so
   5710  1.1  christos 			 * because 'node' is an argument to the function.
   5711  1.1  christos 			 */
   5712  1.1  christos 			mark_header_ancient(rbtdb, header);
   5713  1.1  christos 			if (log) {
   5714  1.1  christos 				isc_log_write(dns_lctx, category, module, level,
   5715  1.1  christos 					      "overmem cache: ancient %s",
   5716  1.1  christos 					      printname);
   5717  1.1  christos 			}
   5718  1.1  christos 		} else if (force_expire) {
   5719  1.1  christos 			if (!RETAIN(header)) {
   5720  1.1  christos 				set_ttl(rbtdb, header, 0);
   5721  1.1  christos 				mark_header_ancient(rbtdb, header);
   5722  1.1  christos 			} else if (log) {
   5723  1.1  christos 				isc_log_write(dns_lctx, category, module, level,
   5724  1.1  christos 					      "overmem cache: "
   5725  1.1  christos 					      "reprieve by RETAIN() %s",
   5726  1.1  christos 					      printname);
   5727  1.1  christos 			}
   5728  1.1  christos 		} else if (isc_mem_isovermem(rbtdb->common.mctx) && log) {
   5729  1.1  christos 			isc_log_write(dns_lctx, category, module, level,
   5730  1.1  christos 				      "overmem cache: saved %s", printname);
   5731  1.1  christos 		}
   5732  1.1  christos 	}
   5733  1.1  christos 
   5734  1.1  christos 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   5735  1.1  christos 		    isc_rwlocktype_write);
   5736  1.1  christos 
   5737  1.1  christos 	return (ISC_R_SUCCESS);
   5738  1.1  christos }
   5739  1.1  christos 
   5740  1.1  christos static void
   5741  1.1  christos overmem(dns_db_t *db, bool over) {
   5742  1.1  christos 	/* This is an empty callback.  See adb.c:water() */
   5743  1.1  christos 
   5744  1.1  christos 	UNUSED(db);
   5745  1.1  christos 	UNUSED(over);
   5746  1.1  christos 
   5747  1.1  christos 	return;
   5748  1.1  christos }
   5749  1.1  christos 
   5750  1.1  christos static void
   5751  1.1  christos printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
   5752  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   5753  1.1  christos 	dns_rbtnode_t *rbtnode = node;
   5754  1.1  christos 	bool first;
   5755  1.1  christos 	uint32_t refs;
   5756  1.1  christos 
   5757  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   5758  1.1  christos 
   5759  1.1  christos 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   5760  1.1  christos 		  isc_rwlocktype_read);
   5761  1.1  christos 
   5762  1.1  christos 	refs = isc_refcount_current(&rbtnode->references);
   5763  1.1  christos 	fprintf(out, "node %p, %" PRIu32 " references, locknum = %u\n", rbtnode,
   5764  1.1  christos 		refs, rbtnode->locknum);
   5765  1.1  christos 	if (rbtnode->data != NULL) {
   5766  1.1  christos 		rdatasetheader_t *current, *top_next;
   5767  1.1  christos 
   5768  1.1  christos 		for (current = rbtnode->data; current != NULL;
   5769  1.1  christos 		     current = top_next)
   5770  1.1  christos 		{
   5771  1.1  christos 			top_next = current->next;
   5772  1.1  christos 			first = true;
   5773  1.1  christos 			fprintf(out, "\ttype %u", current->type);
   5774  1.1  christos 			do {
   5775  1.1  christos 				uint_least16_t attributes = atomic_load_acquire(
   5776  1.1  christos 					&current->attributes);
   5777  1.1  christos 				if (!first) {
   5778  1.1  christos 					fprintf(out, "\t");
   5779  1.1  christos 				}
   5780  1.1  christos 				first = false;
   5781  1.1  christos 				fprintf(out,
   5782  1.1  christos 					"\tserial = %lu, ttl = %u, "
   5783  1.1  christos 					"trust = %u, attributes = %" PRIuLEAST16
   5784  1.1  christos 					", "
   5785  1.1  christos 					"resign = %u\n",
   5786  1.1  christos 					(unsigned long)current->serial,
   5787  1.1  christos 					current->rdh_ttl, current->trust,
   5788  1.1  christos 					attributes,
   5789  1.1  christos 					(current->resign << 1) |
   5790  1.1  christos 						current->resign_lsb);
   5791  1.1  christos 				current = current->down;
   5792  1.1  christos 			} while (current != NULL);
   5793  1.1  christos 		}
   5794  1.1  christos 	} else {
   5795  1.1  christos 		fprintf(out, "(empty)\n");
   5796  1.1  christos 	}
   5797  1.1  christos 
   5798  1.1  christos 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   5799  1.1  christos 		    isc_rwlocktype_read);
   5800  1.1  christos }
   5801  1.1  christos 
   5802  1.1  christos static isc_result_t
   5803  1.1  christos createiterator(dns_db_t *db, unsigned int options,
   5804  1.1  christos 	       dns_dbiterator_t **iteratorp) {
   5805  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   5806  1.1  christos 	rbtdb_dbiterator_t *rbtdbiter;
   5807  1.1  christos 
   5808  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   5809  1.1  christos 
   5810  1.1  christos 	rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
   5811  1.1  christos 
   5812  1.1  christos 	rbtdbiter->common.methods = &dbiterator_methods;
   5813  1.1  christos 	rbtdbiter->common.db = NULL;
   5814  1.1  christos 	dns_db_attach(db, &rbtdbiter->common.db);
   5815  1.1  christos 	rbtdbiter->common.relative_names = ((options & DNS_DB_RELATIVENAMES) !=
   5816  1.1  christos 					    0);
   5817  1.1  christos 	rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
   5818  1.1  christos 	rbtdbiter->common.cleaning = false;
   5819  1.1  christos 	rbtdbiter->paused = true;
   5820  1.1  christos 	rbtdbiter->tree_locked = isc_rwlocktype_none;
   5821  1.1  christos 	rbtdbiter->result = ISC_R_SUCCESS;
   5822  1.1  christos 	dns_fixedname_init(&rbtdbiter->name);
   5823  1.1  christos 	dns_fixedname_init(&rbtdbiter->origin);
   5824  1.1  christos 	rbtdbiter->node = NULL;
   5825  1.1  christos 	rbtdbiter->delcnt = 0;
   5826  1.1  christos 	rbtdbiter->nsec3only = ((options & DNS_DB_NSEC3ONLY) != 0);
   5827  1.1  christos 	rbtdbiter->nonsec3 = ((options & DNS_DB_NONSEC3) != 0);
   5828  1.1  christos 	memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
   5829  1.1  christos 	dns_rbtnodechain_init(&rbtdbiter->chain);
   5830  1.1  christos 	dns_rbtnodechain_init(&rbtdbiter->nsec3chain);
   5831  1.1  christos 	if (rbtdbiter->nsec3only) {
   5832  1.1  christos 		rbtdbiter->current = &rbtdbiter->nsec3chain;
   5833  1.1  christos 	} else {
   5834  1.1  christos 		rbtdbiter->current = &rbtdbiter->chain;
   5835  1.1  christos 	}
   5836  1.1  christos 
   5837  1.1  christos 	*iteratorp = (dns_dbiterator_t *)rbtdbiter;
   5838  1.1  christos 
   5839  1.1  christos 	return (ISC_R_SUCCESS);
   5840  1.1  christos }
   5841  1.1  christos 
   5842  1.1  christos static isc_result_t
   5843  1.1  christos zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
   5844  1.1  christos 		  dns_rdatatype_t type, dns_rdatatype_t covers,
   5845  1.1  christos 		  isc_stdtime_t now, dns_rdataset_t *rdataset,
   5846  1.1  christos 		  dns_rdataset_t *sigrdataset) {
   5847  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   5848  1.1  christos 	dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
   5849  1.1  christos 	rdatasetheader_t *header, *header_next, *found, *foundsig;
   5850  1.1  christos 	rbtdb_serial_t serial;
   5851  1.1  christos 	rbtdb_version_t *rbtversion = version;
   5852  1.1  christos 	bool close_version = false;
   5853  1.1  christos 	rbtdb_rdatatype_t matchtype, sigmatchtype;
   5854  1.1  christos 
   5855  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   5856  1.1  christos 	REQUIRE(type != dns_rdatatype_any);
   5857  1.1  christos 	INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
   5858  1.1  christos 
   5859  1.1  christos 	if (rbtversion == NULL) {
   5860  1.1  christos 		currentversion(db, (dns_dbversion_t **)(void *)(&rbtversion));
   5861  1.1  christos 		close_version = true;
   5862  1.1  christos 	}
   5863  1.1  christos 	serial = rbtversion->serial;
   5864  1.1  christos 	now = 0;
   5865  1.1  christos 
   5866  1.1  christos 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   5867  1.1  christos 		  isc_rwlocktype_read);
   5868  1.1  christos 
   5869  1.1  christos 	found = NULL;
   5870  1.1  christos 	foundsig = NULL;
   5871  1.1  christos 	matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
   5872  1.1  christos 	if (covers == 0) {
   5873  1.1  christos 		sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
   5874  1.1  christos 	} else {
   5875  1.1  christos 		sigmatchtype = 0;
   5876  1.1  christos 	}
   5877  1.1  christos 
   5878  1.1  christos 	for (header = rbtnode->data; header != NULL; header = header_next) {
   5879  1.1  christos 		header_next = header->next;
   5880  1.1  christos 		do {
   5881  1.1  christos 			if (header->serial <= serial && !IGNORE(header)) {
   5882  1.1  christos 				/*
   5883  1.1  christos 				 * Is this a "this rdataset doesn't
   5884  1.1  christos 				 * exist" record?
   5885  1.1  christos 				 */
   5886  1.1  christos 				if (NONEXISTENT(header)) {
   5887  1.1  christos 					header = NULL;
   5888  1.1  christos 				}
   5889  1.1  christos 				break;
   5890  1.1  christos 			} else {
   5891  1.1  christos 				header = header->down;
   5892  1.1  christos 			}
   5893  1.1  christos 		} while (header != NULL);
   5894  1.1  christos 		if (header != NULL) {
   5895  1.1  christos 			/*
   5896  1.1  christos 			 * We have an active, extant rdataset.  If it's a
   5897  1.1  christos 			 * type we're looking for, remember it.
   5898  1.1  christos 			 */
   5899  1.1  christos 			if (header->type == matchtype) {
   5900  1.1  christos 				found = header;
   5901  1.1  christos 				if (foundsig != NULL) {
   5902  1.1  christos 					break;
   5903  1.1  christos 				}
   5904  1.1  christos 			} else if (header->type == sigmatchtype) {
   5905  1.1  christos 				foundsig = header;
   5906  1.1  christos 				if (found != NULL) {
   5907  1.1  christos 					break;
   5908  1.1  christos 				}
   5909  1.1  christos 			}
   5910  1.1  christos 		}
   5911  1.1  christos 	}
   5912  1.1  christos 	if (found != NULL) {
   5913  1.1  christos 		bind_rdataset(rbtdb, rbtnode, found, now, isc_rwlocktype_read,
   5914  1.1  christos 			      rdataset);
   5915  1.1  christos 		if (foundsig != NULL) {
   5916  1.1  christos 			bind_rdataset(rbtdb, rbtnode, foundsig, now,
   5917  1.1  christos 				      isc_rwlocktype_read, sigrdataset);
   5918  1.1  christos 		}
   5919  1.1  christos 	}
   5920  1.1  christos 
   5921  1.1  christos 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   5922  1.1  christos 		    isc_rwlocktype_read);
   5923  1.1  christos 
   5924  1.1  christos 	if (close_version) {
   5925  1.1  christos 		closeversion(db, (dns_dbversion_t **)(void *)(&rbtversion),
   5926  1.1  christos 			     false);
   5927  1.1  christos 	}
   5928  1.1  christos 
   5929  1.1  christos 	if (found == NULL) {
   5930  1.1  christos 		return (ISC_R_NOTFOUND);
   5931  1.1  christos 	}
   5932  1.1  christos 
   5933  1.1  christos 	return (ISC_R_SUCCESS);
   5934  1.1  christos }
   5935  1.1  christos 
   5936  1.1  christos static isc_result_t
   5937  1.1  christos cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
   5938  1.1  christos 		   dns_rdatatype_t type, dns_rdatatype_t covers,
   5939  1.1  christos 		   isc_stdtime_t now, dns_rdataset_t *rdataset,
   5940  1.1  christos 		   dns_rdataset_t *sigrdataset) {
   5941  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   5942  1.1  christos 	dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
   5943  1.1  christos 	rdatasetheader_t *header, *header_next, *found, *foundsig;
   5944  1.1  christos 	rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
   5945  1.1  christos 	isc_result_t result;
   5946  1.1  christos 	nodelock_t *lock;
   5947  1.1  christos 	isc_rwlocktype_t locktype;
   5948  1.1  christos 
   5949  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   5950  1.1  christos 	REQUIRE(type != dns_rdatatype_any);
   5951  1.1  christos 
   5952  1.1  christos 	UNUSED(version);
   5953  1.1  christos 
   5954  1.1  christos 	result = ISC_R_SUCCESS;
   5955  1.1  christos 
   5956  1.1  christos 	if (now == 0) {
   5957  1.1  christos 		isc_stdtime_get(&now);
   5958  1.1  christos 	}
   5959  1.1  christos 
   5960  1.1  christos 	lock = &rbtdb->node_locks[rbtnode->locknum].lock;
   5961  1.1  christos 	locktype = isc_rwlocktype_read;
   5962  1.1  christos 	NODE_LOCK(lock, locktype);
   5963  1.1  christos 
   5964  1.1  christos 	found = NULL;
   5965  1.1  christos 	foundsig = NULL;
   5966  1.1  christos 	matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
   5967  1.1  christos 	negtype = RBTDB_RDATATYPE_VALUE(0, type);
   5968  1.1  christos 	if (covers == 0) {
   5969  1.1  christos 		sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
   5970  1.1  christos 	} else {
   5971  1.1  christos 		sigmatchtype = 0;
   5972  1.1  christos 	}
   5973  1.1  christos 
   5974  1.1  christos 	for (header = rbtnode->data; header != NULL; header = header_next) {
   5975  1.1  christos 		header_next = header->next;
   5976  1.1  christos 		if (!ACTIVE(header, now)) {
   5977  1.1  christos 			if ((header->rdh_ttl + rbtdb->serve_stale_ttl <
   5978  1.1  christos 			     now - RBTDB_VIRTUAL) &&
   5979  1.1  christos 			    (locktype == isc_rwlocktype_write ||
   5980  1.1  christos 			     NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS))
   5981  1.1  christos 			{
   5982  1.1  christos 				/*
   5983  1.1  christos 				 * We update the node's status only when we
   5984  1.1  christos 				 * can get write access.
   5985  1.1  christos 				 */
   5986  1.1  christos 				locktype = isc_rwlocktype_write;
   5987  1.1  christos 
   5988  1.1  christos 				/*
   5989  1.1  christos 				 * We don't check if refcurrent(rbtnode) == 0
   5990  1.1  christos 				 * and try to free like we do in cache_find(),
   5991  1.1  christos 				 * because refcurrent(rbtnode) must be
   5992  1.1  christos 				 * non-zero.  This is so because 'node' is an
   5993  1.1  christos 				 * argument to the function.
   5994  1.1  christos 				 */
   5995  1.1  christos 				mark_header_ancient(rbtdb, header);
   5996  1.1  christos 			}
   5997  1.1  christos 		} else if (EXISTS(header) && !ANCIENT(header)) {
   5998  1.1  christos 			if (header->type == matchtype) {
   5999  1.1  christos 				found = header;
   6000  1.1  christos 			} else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
   6001  1.1  christos 				   header->type == negtype)
   6002  1.1  christos 			{
   6003  1.1  christos 				found = header;
   6004  1.1  christos 			} else if (header->type == sigmatchtype) {
   6005  1.1  christos 				foundsig = header;
   6006  1.1  christos 			}
   6007  1.1  christos 		}
   6008  1.1  christos 	}
   6009  1.1  christos 	if (found != NULL) {
   6010  1.1  christos 		bind_rdataset(rbtdb, rbtnode, found, now, locktype, rdataset);
   6011  1.1  christos 		if (!NEGATIVE(found) && foundsig != NULL) {
   6012  1.1  christos 			bind_rdataset(rbtdb, rbtnode, foundsig, now, locktype,
   6013  1.1  christos 				      sigrdataset);
   6014  1.1  christos 		}
   6015  1.1  christos 	}
   6016  1.1  christos 
   6017  1.1  christos 	NODE_UNLOCK(lock, locktype);
   6018  1.1  christos 
   6019  1.1  christos 	if (found == NULL) {
   6020  1.1  christos 		return (ISC_R_NOTFOUND);
   6021  1.1  christos 	}
   6022  1.1  christos 
   6023  1.1  christos 	if (NEGATIVE(found)) {
   6024  1.1  christos 		/*
   6025  1.1  christos 		 * We found a negative cache entry.
   6026  1.1  christos 		 */
   6027  1.1  christos 		if (NXDOMAIN(found)) {
   6028  1.1  christos 			result = DNS_R_NCACHENXDOMAIN;
   6029  1.1  christos 		} else {
   6030  1.1  christos 			result = DNS_R_NCACHENXRRSET;
   6031  1.1  christos 		}
   6032  1.1  christos 	}
   6033  1.1  christos 
   6034  1.1  christos 	update_cachestats(rbtdb, result);
   6035  1.1  christos 
   6036  1.1  christos 	return (result);
   6037  1.1  christos }
   6038  1.1  christos 
   6039  1.1  christos static isc_result_t
   6040  1.1  christos allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
   6041  1.1  christos 	     unsigned int options, isc_stdtime_t now,
   6042  1.1  christos 	     dns_rdatasetiter_t **iteratorp) {
   6043  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   6044  1.1  christos 	dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
   6045  1.1  christos 	rbtdb_version_t *rbtversion = version;
   6046  1.1  christos 	rbtdb_rdatasetiter_t *iterator;
   6047  1.1  christos 
   6048  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   6049  1.1  christos 
   6050  1.1  christos 	iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
   6051  1.1  christos 
   6052  1.1  christos 	if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
   6053  1.1  christos 		now = 0;
   6054  1.1  christos 		if (rbtversion == NULL) {
   6055  1.1  christos 			currentversion(
   6056  1.1  christos 				db, (dns_dbversion_t **)(void *)(&rbtversion));
   6057  1.1  christos 		} else {
   6058  1.1  christos 			INSIST(rbtversion->rbtdb == rbtdb);
   6059  1.1  christos 
   6060  1.1  christos 			(void)isc_refcount_increment(&rbtversion->references);
   6061  1.1  christos 		}
   6062  1.1  christos 	} else {
   6063  1.1  christos 		if (now == 0) {
   6064  1.1  christos 			isc_stdtime_get(&now);
   6065  1.1  christos 		}
   6066  1.1  christos 		rbtversion = NULL;
   6067  1.1  christos 	}
   6068  1.1  christos 
   6069  1.1  christos 	iterator->common.magic = DNS_RDATASETITER_MAGIC;
   6070  1.1  christos 	iterator->common.methods = &rdatasetiter_methods;
   6071  1.1  christos 	iterator->common.db = db;
   6072  1.1  christos 	iterator->common.node = node;
   6073  1.1  christos 	iterator->common.version = (dns_dbversion_t *)rbtversion;
   6074  1.1  christos 	iterator->common.options = options;
   6075  1.1  christos 	iterator->common.now = now;
   6076  1.1  christos 
   6077  1.1  christos 	isc_refcount_increment(&rbtnode->references);
   6078  1.1  christos 
   6079  1.1  christos 	iterator->current = NULL;
   6080  1.1  christos 
   6081  1.1  christos 	*iteratorp = (dns_rdatasetiter_t *)iterator;
   6082  1.1  christos 
   6083  1.1  christos 	return (ISC_R_SUCCESS);
   6084  1.1  christos }
   6085  1.1  christos 
   6086  1.1  christos static bool
   6087  1.1  christos cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
   6088  1.1  christos 	rdatasetheader_t *header, *header_next;
   6089  1.1  christos 	bool cname, other_data;
   6090  1.1  christos 	dns_rdatatype_t rdtype;
   6091  1.1  christos 
   6092  1.1  christos 	/*
   6093  1.1  christos 	 * The caller must hold the node lock.
   6094  1.1  christos 	 */
   6095  1.1  christos 
   6096  1.1  christos 	/*
   6097  1.1  christos 	 * Look for CNAME and "other data" rdatasets active in our version.
   6098  1.1  christos 	 */
   6099  1.1  christos 	cname = false;
   6100  1.1  christos 	other_data = false;
   6101  1.1  christos 	for (header = node->data; header != NULL; header = header_next) {
   6102  1.1  christos 		header_next = header->next;
   6103  1.1  christos 		if (header->type == dns_rdatatype_cname) {
   6104  1.1  christos 			/*
   6105  1.1  christos 			 * Look for an active extant CNAME.
   6106  1.1  christos 			 */
   6107  1.1  christos 			do {
   6108  1.1  christos 				if (header->serial <= serial && !IGNORE(header))
   6109  1.1  christos 				{
   6110  1.1  christos 					/*
   6111  1.1  christos 					 * Is this a "this rdataset doesn't
   6112  1.1  christos 					 * exist" record?
   6113  1.1  christos 					 */
   6114  1.1  christos 					if (NONEXISTENT(header)) {
   6115  1.1  christos 						header = NULL;
   6116  1.1  christos 					}
   6117  1.1  christos 					break;
   6118  1.1  christos 				} else {
   6119  1.1  christos 					header = header->down;
   6120  1.1  christos 				}
   6121  1.1  christos 			} while (header != NULL);
   6122  1.1  christos 			if (header != NULL) {
   6123  1.1  christos 				cname = true;
   6124  1.1  christos 			}
   6125  1.1  christos 		} else {
   6126  1.1  christos 			/*
   6127  1.1  christos 			 * Look for active extant "other data".
   6128  1.1  christos 			 *
   6129  1.1  christos 			 * "Other data" is any rdataset whose type is not
   6130  1.1  christos 			 * KEY, NSEC, SIG or RRSIG.
   6131  1.1  christos 			 */
   6132  1.1  christos 			rdtype = RBTDB_RDATATYPE_BASE(header->type);
   6133  1.1  christos 			if (rdtype != dns_rdatatype_key &&
   6134  1.1  christos 			    rdtype != dns_rdatatype_sig &&
   6135  1.1  christos 			    rdtype != dns_rdatatype_nsec &&
   6136  1.1  christos 			    rdtype != dns_rdatatype_rrsig)
   6137  1.1  christos 			{
   6138  1.1  christos 				/*
   6139  1.1  christos 				 * Is it active and extant?
   6140  1.1  christos 				 */
   6141  1.1  christos 				do {
   6142  1.1  christos 					if (header->serial <= serial &&
   6143  1.1  christos 					    !IGNORE(header))
   6144  1.1  christos 					{
   6145  1.1  christos 						/*
   6146  1.1  christos 						 * Is this a "this rdataset
   6147  1.1  christos 						 * doesn't exist" record?
   6148  1.1  christos 						 */
   6149  1.1  christos 						if (NONEXISTENT(header)) {
   6150  1.1  christos 							header = NULL;
   6151  1.1  christos 						}
   6152  1.1  christos 						break;
   6153  1.1  christos 					} else {
   6154  1.1  christos 						header = header->down;
   6155  1.1  christos 					}
   6156  1.1  christos 				} while (header != NULL);
   6157  1.1  christos 				if (header != NULL) {
   6158  1.1  christos 					other_data = true;
   6159  1.1  christos 				}
   6160  1.1  christos 			}
   6161  1.1  christos 		}
   6162  1.1  christos 	}
   6163  1.1  christos 
   6164  1.1  christos 	if (cname && other_data) {
   6165  1.1  christos 		return (true);
   6166  1.1  christos 	}
   6167  1.1  christos 
   6168  1.1  christos 	return (false);
   6169  1.1  christos }
   6170  1.1  christos 
   6171  1.1  christos static void
   6172  1.1  christos resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
   6173  1.1  christos 	INSIST(!IS_CACHE(rbtdb));
   6174  1.1  christos 	INSIST(newheader->heap_index == 0);
   6175  1.1  christos 	INSIST(!ISC_LINK_LINKED(newheader, link));
   6176  1.1  christos 
   6177  1.1  christos 	isc_heap_insert(rbtdb->heaps[idx], newheader);
   6178  1.1  christos }
   6179  1.1  christos 
   6180  1.1  christos /*
   6181  1.1  christos  * node write lock must be held.
   6182  1.1  christos  */
   6183  1.1  christos static void
   6184  1.1  christos resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
   6185  1.1  christos 	      rdatasetheader_t *header) {
   6186  1.1  christos 	/*
   6187  1.1  christos 	 * Remove the old header from the heap
   6188  1.1  christos 	 */
   6189  1.1  christos 	if (header != NULL && header->heap_index != 0) {
   6190  1.1  christos 		isc_heap_delete(rbtdb->heaps[header->node->locknum],
   6191  1.1  christos 				header->heap_index);
   6192  1.1  christos 		header->heap_index = 0;
   6193  1.1  christos 		if (version != NULL) {
   6194  1.1  christos 			new_reference(rbtdb, header->node,
   6195  1.1  christos 				      isc_rwlocktype_write);
   6196  1.1  christos 			ISC_LIST_APPEND(version->resigned_list, header, link);
   6197  1.1  christos 		}
   6198  1.1  christos 	}
   6199  1.1  christos }
   6200  1.1  christos 
   6201  1.1  christos static uint64_t
   6202  1.1  christos recordsize(rdatasetheader_t *header, unsigned int namelen) {
   6203  1.1  christos 	return (dns_rdataslab_rdatasize((unsigned char *)header,
   6204  1.1  christos 					sizeof(*header)) +
   6205  1.1  christos 		sizeof(dns_ttl_t) + sizeof(dns_rdatatype_t) +
   6206  1.1  christos 		sizeof(dns_rdataclass_t) + namelen);
   6207  1.1  christos }
   6208  1.1  christos 
   6209  1.1  christos static void
   6210  1.1  christos update_recordsandxfrsize(bool add, rbtdb_version_t *rbtversion,
   6211  1.1  christos 			 rdatasetheader_t *header, unsigned int namelen) {
   6212  1.1  christos 	unsigned char *hdr = (unsigned char *)header;
   6213  1.1  christos 	size_t hdrsize = sizeof(*header);
   6214  1.1  christos 
   6215  1.1  christos 	RWLOCK(&rbtversion->rwlock, isc_rwlocktype_write);
   6216  1.1  christos 	if (add) {
   6217  1.1  christos 		rbtversion->records += dns_rdataslab_count(hdr, hdrsize);
   6218  1.1  christos 		rbtversion->xfrsize += recordsize(header, namelen);
   6219  1.1  christos 	} else {
   6220  1.1  christos 		rbtversion->records -= dns_rdataslab_count(hdr, hdrsize);
   6221  1.1  christos 		rbtversion->xfrsize -= recordsize(header, namelen);
   6222  1.1  christos 	}
   6223  1.1  christos 	RWUNLOCK(&rbtversion->rwlock, isc_rwlocktype_write);
   6224  1.1  christos }
   6225  1.1  christos 
   6226  1.1  christos /*
   6227  1.1  christos  * write lock on rbtnode must be held.
   6228  1.1  christos  */
   6229  1.1  christos static isc_result_t
   6230  1.1  christos add32(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, const dns_name_t *nodename,
   6231  1.1  christos       rbtdb_version_t *rbtversion, rdatasetheader_t *newheader,
   6232  1.1  christos       unsigned int options, bool loading, dns_rdataset_t *addedrdataset,
   6233  1.1  christos       isc_stdtime_t now) {
   6234  1.1  christos 	rbtdb_changed_t *changed = NULL;
   6235  1.1  christos 	rdatasetheader_t *topheader = NULL, *topheader_prev = NULL;
   6236  1.1  christos 	rdatasetheader_t *header = NULL, *sigheader = NULL;
   6237  1.1  christos 	unsigned char *merged = NULL;
   6238  1.1  christos 	isc_result_t result;
   6239  1.1  christos 	bool header_nx;
   6240  1.1  christos 	bool newheader_nx;
   6241  1.1  christos 	bool merge;
   6242  1.1  christos 	dns_rdatatype_t rdtype, covers;
   6243  1.1  christos 	rbtdb_rdatatype_t negtype, sigtype;
   6244  1.1  christos 	dns_trust_t trust;
   6245  1.1  christos 	int idx;
   6246  1.1  christos 
   6247  1.1  christos 	/*
   6248  1.1  christos 	 * Add an rdatasetheader_t to a node.
   6249  1.1  christos 	 */
   6250  1.1  christos 
   6251  1.1  christos 	/*
   6252  1.1  christos 	 * Caller must be holding the node lock.
   6253  1.1  christos 	 */
   6254  1.1  christos 
   6255  1.1  christos 	if ((options & DNS_DBADD_MERGE) != 0) {
   6256  1.1  christos 		REQUIRE(rbtversion != NULL);
   6257  1.1  christos 		merge = true;
   6258  1.1  christos 	} else {
   6259  1.1  christos 		merge = false;
   6260  1.1  christos 	}
   6261  1.1  christos 
   6262  1.1  christos 	if ((options & DNS_DBADD_FORCE) != 0) {
   6263  1.1  christos 		trust = dns_trust_ultimate;
   6264  1.1  christos 	} else {
   6265  1.1  christos 		trust = newheader->trust;
   6266  1.1  christos 	}
   6267  1.1  christos 
   6268  1.1  christos 	if (rbtversion != NULL && !loading) {
   6269  1.1  christos 		/*
   6270  1.1  christos 		 * We always add a changed record, even if no changes end up
   6271  1.1  christos 		 * being made to this node, because it's harmless and
   6272  1.1  christos 		 * simplifies the code.
   6273  1.1  christos 		 */
   6274  1.1  christos 		changed = add_changed(rbtdb, rbtversion, rbtnode);
   6275  1.1  christos 		if (changed == NULL) {
   6276  1.1  christos 			free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
   6277  1.1  christos 			return (ISC_R_NOMEMORY);
   6278  1.1  christos 		}
   6279  1.1  christos 	}
   6280  1.1  christos 
   6281  1.1  christos 	newheader_nx = NONEXISTENT(newheader) ? true : false;
   6282  1.1  christos 	topheader_prev = NULL;
   6283  1.1  christos 	sigheader = NULL;
   6284  1.1  christos 	negtype = 0;
   6285  1.1  christos 	if (rbtversion == NULL && !newheader_nx) {
   6286  1.1  christos 		rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
   6287  1.1  christos 		covers = RBTDB_RDATATYPE_EXT(newheader->type);
   6288  1.1  christos 		sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, covers);
   6289  1.1  christos 		if (NEGATIVE(newheader)) {
   6290  1.1  christos 			/*
   6291  1.1  christos 			 * We're adding a negative cache entry.
   6292  1.1  christos 			 */
   6293  1.1  christos 			if (covers == dns_rdatatype_any) {
   6294  1.1  christos 				/*
   6295  1.1  christos 				 * If we're adding an negative cache entry
   6296  1.1  christos 				 * which covers all types (NXDOMAIN,
   6297  1.1  christos 				 * NODATA(QTYPE=ANY)),
   6298  1.1  christos 				 *
   6299  1.1  christos 				 * We make all other data ancient so that the
   6300  1.1  christos 				 * only rdataset that can be found at this
   6301  1.1  christos 				 * node is the negative cache entry.
   6302  1.1  christos 				 */
   6303  1.1  christos 				for (topheader = rbtnode->data;
   6304  1.1  christos 				     topheader != NULL;
   6305  1.1  christos 				     topheader = topheader->next)
   6306  1.1  christos 				{
   6307  1.1  christos 					set_ttl(rbtdb, topheader, 0);
   6308  1.1  christos 					mark_header_ancient(rbtdb, topheader);
   6309  1.1  christos 				}
   6310  1.1  christos 				goto find_header;
   6311  1.1  christos 			}
   6312  1.1  christos 			/*
   6313  1.1  christos 			 * Otherwise look for any RRSIGs of the given
   6314  1.1  christos 			 * type so they can be marked ancient later.
   6315  1.1  christos 			 */
   6316  1.1  christos 			for (topheader = rbtnode->data; topheader != NULL;
   6317  1.1  christos 			     topheader = topheader->next)
   6318  1.1  christos 			{
   6319  1.1  christos 				if (topheader->type == sigtype) {
   6320  1.1  christos 					sigheader = topheader;
   6321  1.1  christos 				}
   6322  1.1  christos 			}
   6323  1.1  christos 			negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
   6324  1.1  christos 		} else {
   6325  1.1  christos 			/*
   6326  1.1  christos 			 * We're adding something that isn't a
   6327  1.1  christos 			 * negative cache entry.  Look for an extant
   6328  1.1  christos 			 * non-ancient NXDOMAIN/NODATA(QTYPE=ANY) negative
   6329  1.1  christos 			 * cache entry.  If we're adding an RRSIG, also
   6330  1.1  christos 			 * check for an extant non-ancient NODATA ncache
   6331  1.1  christos 			 * entry which covers the same type as the RRSIG.
   6332  1.1  christos 			 */
   6333  1.1  christos 			for (topheader = rbtnode->data; topheader != NULL;
   6334  1.1  christos 			     topheader = topheader->next)
   6335  1.1  christos 			{
   6336  1.1  christos 				if ((topheader->type ==
   6337  1.1  christos 				     RBTDB_RDATATYPE_NCACHEANY) ||
   6338  1.1  christos 				    (newheader->type == sigtype &&
   6339  1.1  christos 				     topheader->type ==
   6340  1.1  christos 					     RBTDB_RDATATYPE_VALUE(0, covers)))
   6341  1.1  christos 				{
   6342  1.1  christos 					break;
   6343  1.1  christos 				}
   6344  1.1  christos 			}
   6345  1.1  christos 			if (topheader != NULL && EXISTS(topheader) &&
   6346  1.1  christos 			    ACTIVE(topheader, now))
   6347  1.1  christos 			{
   6348  1.1  christos 				/*
   6349  1.1  christos 				 * Found one.
   6350  1.1  christos 				 */
   6351  1.1  christos 				if (trust < topheader->trust) {
   6352  1.1  christos 					/*
   6353  1.1  christos 					 * The NXDOMAIN/NODATA(QTYPE=ANY)
   6354  1.1  christos 					 * is more trusted.
   6355  1.1  christos 					 */
   6356  1.1  christos 					free_rdataset(rbtdb, rbtdb->common.mctx,
   6357  1.1  christos 						      newheader);
   6358  1.1  christos 					if (addedrdataset != NULL) {
   6359  1.1  christos 						bind_rdataset(
   6360  1.1  christos 							rbtdb, rbtnode,
   6361  1.1  christos 							topheader, now,
   6362  1.1  christos 							isc_rwlocktype_write,
   6363  1.1  christos 							addedrdataset);
   6364  1.1  christos 					}
   6365  1.1  christos 					return (DNS_R_UNCHANGED);
   6366  1.1  christos 				}
   6367  1.1  christos 				/*
   6368  1.1  christos 				 * The new rdataset is better.  Expire the
   6369  1.1  christos 				 * ncache entry.
   6370  1.1  christos 				 */
   6371  1.1  christos 				set_ttl(rbtdb, topheader, 0);
   6372  1.1  christos 				mark_header_ancient(rbtdb, topheader);
   6373  1.1  christos 				topheader = NULL;
   6374  1.1  christos 				goto find_header;
   6375  1.1  christos 			}
   6376  1.1  christos 			negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
   6377  1.1  christos 		}
   6378  1.1  christos 	}
   6379  1.1  christos 
   6380  1.1  christos 	for (topheader = rbtnode->data; topheader != NULL;
   6381  1.1  christos 	     topheader = topheader->next)
   6382  1.1  christos 	{
   6383  1.1  christos 		if (topheader->type == newheader->type ||
   6384  1.1  christos 		    topheader->type == negtype)
   6385  1.1  christos 		{
   6386  1.1  christos 			break;
   6387  1.1  christos 		}
   6388  1.1  christos 		topheader_prev = topheader;
   6389  1.1  christos 	}
   6390  1.1  christos 
   6391  1.1  christos find_header:
   6392  1.1  christos 	/*
   6393  1.1  christos 	 * If header isn't NULL, we've found the right type.  There may be
   6394  1.1  christos 	 * IGNORE rdatasets between the top of the chain and the first real
   6395  1.1  christos 	 * data.  We skip over them.
   6396  1.1  christos 	 */
   6397  1.1  christos 	header = topheader;
   6398  1.1  christos 	while (header != NULL && IGNORE(header)) {
   6399  1.1  christos 		header = header->down;
   6400  1.1  christos 	}
   6401  1.1  christos 	if (header != NULL) {
   6402  1.1  christos 		header_nx = NONEXISTENT(header) ? true : false;
   6403  1.1  christos 
   6404  1.1  christos 		/*
   6405  1.1  christos 		 * Deleting an already non-existent rdataset has no effect.
   6406  1.1  christos 		 */
   6407  1.1  christos 		if (header_nx && newheader_nx) {
   6408  1.1  christos 			free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
   6409  1.1  christos 			return (DNS_R_UNCHANGED);
   6410  1.1  christos 		}
   6411  1.1  christos 
   6412  1.1  christos 		/*
   6413  1.1  christos 		 * Trying to add an rdataset with lower trust to a cache
   6414  1.1  christos 		 * DB has no effect, provided that the cache data isn't
   6415  1.1  christos 		 * stale. If the cache data is stale, new lower trust
   6416  1.1  christos 		 * data will supersede it below. Unclear what the best
   6417  1.1  christos 		 * policy is here.
   6418  1.1  christos 		 */
   6419  1.1  christos 		if (rbtversion == NULL && trust < header->trust &&
   6420  1.1  christos 		    (ACTIVE(header, now) || header_nx))
   6421  1.1  christos 		{
   6422  1.1  christos 			free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
   6423  1.1  christos 			if (addedrdataset != NULL) {
   6424  1.1  christos 				bind_rdataset(rbtdb, rbtnode, header, now,
   6425  1.1  christos 					      isc_rwlocktype_write,
   6426  1.1  christos 					      addedrdataset);
   6427  1.1  christos 			}
   6428  1.1  christos 			return (DNS_R_UNCHANGED);
   6429  1.1  christos 		}
   6430  1.1  christos 
   6431  1.1  christos 		/*
   6432  1.1  christos 		 * Don't merge if a nonexistent rdataset is involved.
   6433  1.1  christos 		 */
   6434  1.1  christos 		if (merge && (header_nx || newheader_nx)) {
   6435  1.1  christos 			merge = false;
   6436  1.1  christos 		}
   6437  1.1  christos 
   6438  1.1  christos 		/*
   6439  1.1  christos 		 * If 'merge' is true, we'll try to create a new rdataset
   6440  1.1  christos 		 * that is the union of 'newheader' and 'header'.
   6441  1.1  christos 		 */
   6442  1.1  christos 		if (merge) {
   6443  1.1  christos 			unsigned int flags = 0;
   6444  1.1  christos 			INSIST(rbtversion->serial >= header->serial);
   6445  1.1  christos 			merged = NULL;
   6446  1.1  christos 			result = ISC_R_SUCCESS;
   6447  1.1  christos 
   6448  1.1  christos 			if ((options & DNS_DBADD_EXACT) != 0) {
   6449  1.1  christos 				flags |= DNS_RDATASLAB_EXACT;
   6450  1.1  christos 			}
   6451  1.1  christos 			/*
   6452  1.1  christos 			 * TTL use here is irrelevant to the cache;
   6453  1.1  christos 			 * merge is only done with zonedbs.
   6454  1.1  christos 			 */
   6455  1.1  christos 			if ((options & DNS_DBADD_EXACTTTL) != 0 &&
   6456  1.1  christos 			    newheader->rdh_ttl != header->rdh_ttl)
   6457  1.1  christos 			{
   6458  1.1  christos 				result = DNS_R_NOTEXACT;
   6459  1.1  christos 			} else if (newheader->rdh_ttl != header->rdh_ttl) {
   6460  1.1  christos 				flags |= DNS_RDATASLAB_FORCE;
   6461  1.1  christos 			}
   6462  1.1  christos 			if (result == ISC_R_SUCCESS) {
   6463  1.1  christos 				result = dns_rdataslab_merge(
   6464  1.1  christos 					(unsigned char *)header,
   6465  1.1  christos 					(unsigned char *)newheader,
   6466  1.1  christos 					(unsigned int)(sizeof(*newheader)),
   6467  1.1  christos 					rbtdb->common.mctx,
   6468  1.1  christos 					rbtdb->common.rdclass,
   6469  1.1  christos 					(dns_rdatatype_t)header->type, flags,
   6470  1.1  christos 					&merged);
   6471  1.1  christos 			}
   6472  1.1  christos 			if (result == ISC_R_SUCCESS) {
   6473  1.1  christos 				/*
   6474  1.1  christos 				 * If 'header' has the same serial number as
   6475  1.1  christos 				 * we do, we could clean it up now if we knew
   6476  1.1  christos 				 * that our caller had no references to it.
   6477  1.1  christos 				 * We don't know this, however, so we leave it
   6478  1.1  christos 				 * alone.  It will get cleaned up when
   6479  1.1  christos 				 * clean_zone_node() runs.
   6480  1.1  christos 				 */
   6481  1.1  christos 				free_rdataset(rbtdb, rbtdb->common.mctx,
   6482  1.1  christos 					      newheader);
   6483  1.1  christos 				newheader = (rdatasetheader_t *)merged;
   6484  1.1  christos 				init_rdataset(rbtdb, newheader);
   6485  1.1  christos 				update_newheader(newheader, header);
   6486  1.1  christos 				if (loading && RESIGN(newheader) &&
   6487  1.1  christos 				    RESIGN(header) &&
   6488  1.1  christos 				    resign_sooner(header, newheader))
   6489  1.1  christos 				{
   6490  1.1  christos 					newheader->resign = header->resign;
   6491  1.1  christos 					newheader->resign_lsb =
   6492  1.1  christos 						header->resign_lsb;
   6493  1.1  christos 				}
   6494  1.1  christos 			} else {
   6495  1.1  christos 				free_rdataset(rbtdb, rbtdb->common.mctx,
   6496  1.1  christos 					      newheader);
   6497  1.1  christos 				return (result);
   6498  1.1  christos 			}
   6499  1.1  christos 		}
   6500  1.1  christos 		/*
   6501  1.1  christos 		 * Don't replace existing NS, A and AAAA RRsets in the
   6502  1.1  christos 		 * cache if they are already exist. This prevents named
   6503  1.1  christos 		 * being locked to old servers. Don't lower trust of
   6504  1.1  christos 		 * existing record if the update is forced. Nothing
   6505  1.1  christos 		 * special to be done w.r.t stale data; it gets replaced
   6506  1.1  christos 		 * normally further down.
   6507  1.1  christos 		 */
   6508  1.1  christos 		if (IS_CACHE(rbtdb) && ACTIVE(header, now) &&
   6509  1.1  christos 		    header->type == dns_rdatatype_ns && !header_nx &&
   6510  1.1  christos 		    !newheader_nx && header->trust >= newheader->trust &&
   6511  1.1  christos 		    dns_rdataslab_equalx((unsigned char *)header,
   6512  1.1  christos 					 (unsigned char *)newheader,
   6513  1.1  christos 					 (unsigned int)(sizeof(*newheader)),
   6514  1.1  christos 					 rbtdb->common.rdclass,
   6515  1.1  christos 					 (dns_rdatatype_t)header->type))
   6516  1.1  christos 		{
   6517  1.1  christos 			/*
   6518  1.1  christos 			 * Honour the new ttl if it is less than the
   6519  1.1  christos 			 * older one.
   6520  1.1  christos 			 */
   6521  1.1  christos 			if (header->rdh_ttl > newheader->rdh_ttl) {
   6522  1.1  christos 				set_ttl(rbtdb, header, newheader->rdh_ttl);
   6523  1.1  christos 			}
   6524  1.1  christos 			if (header->noqname == NULL &&
   6525  1.1  christos 			    newheader->noqname != NULL)
   6526  1.1  christos 			{
   6527  1.1  christos 				header->noqname = newheader->noqname;
   6528  1.1  christos 				newheader->noqname = NULL;
   6529  1.1  christos 			}
   6530  1.1  christos 			if (header->closest == NULL &&
   6531  1.1  christos 			    newheader->closest != NULL)
   6532  1.1  christos 			{
   6533  1.1  christos 				header->closest = newheader->closest;
   6534  1.1  christos 				newheader->closest = NULL;
   6535  1.1  christos 			}
   6536  1.1  christos 			free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
   6537  1.1  christos 			if (addedrdataset != NULL) {
   6538  1.1  christos 				bind_rdataset(rbtdb, rbtnode, header, now,
   6539  1.1  christos 					      isc_rwlocktype_write,
   6540  1.1  christos 					      addedrdataset);
   6541  1.1  christos 			}
   6542  1.1  christos 			return (ISC_R_SUCCESS);
   6543  1.1  christos 		}
   6544  1.1  christos 		/*
   6545  1.1  christos 		 * If we have will be replacing a NS RRset force its TTL
   6546  1.1  christos 		 * to be no more than the current NS RRset's TTL.  This
   6547  1.1  christos 		 * ensures the delegations that are withdrawn are honoured.
   6548  1.1  christos 		 */
   6549  1.1  christos 		if (IS_CACHE(rbtdb) && ACTIVE(header, now) &&
   6550  1.1  christos 		    header->type == dns_rdatatype_ns && !header_nx &&
   6551  1.1  christos 		    !newheader_nx && header->trust <= newheader->trust)
   6552  1.1  christos 		{
   6553  1.1  christos 			if (newheader->rdh_ttl > header->rdh_ttl) {
   6554  1.1  christos 				newheader->rdh_ttl = header->rdh_ttl;
   6555  1.1  christos 			}
   6556  1.1  christos 		}
   6557  1.1  christos 		if (IS_CACHE(rbtdb) && ACTIVE(header, now) &&
   6558  1.1  christos 		    (options & DNS_DBADD_PREFETCH) == 0 &&
   6559  1.1  christos 		    (header->type == dns_rdatatype_a ||
   6560  1.1  christos 		     header->type == dns_rdatatype_aaaa ||
   6561  1.1  christos 		     header->type == dns_rdatatype_ds ||
   6562  1.1  christos 		     header->type == RBTDB_RDATATYPE_SIGDS) &&
   6563  1.1  christos 		    !header_nx && !newheader_nx &&
   6564  1.1  christos 		    header->trust >= newheader->trust &&
   6565  1.1  christos 		    dns_rdataslab_equal((unsigned char *)header,
   6566  1.1  christos 					(unsigned char *)newheader,
   6567  1.1  christos 					(unsigned int)(sizeof(*newheader))))
   6568  1.1  christos 		{
   6569  1.1  christos 			/*
   6570  1.1  christos 			 * Honour the new ttl if it is less than the
   6571  1.1  christos 			 * older one.
   6572  1.1  christos 			 */
   6573  1.1  christos 			if (header->rdh_ttl > newheader->rdh_ttl) {
   6574  1.1  christos 				set_ttl(rbtdb, header, newheader->rdh_ttl);
   6575  1.1  christos 			}
   6576  1.1  christos 			if (header->noqname == NULL &&
   6577  1.1  christos 			    newheader->noqname != NULL)
   6578  1.1  christos 			{
   6579  1.1  christos 				header->noqname = newheader->noqname;
   6580  1.1  christos 				newheader->noqname = NULL;
   6581  1.1  christos 			}
   6582  1.1  christos 			if (header->closest == NULL &&
   6583  1.1  christos 			    newheader->closest != NULL)
   6584  1.1  christos 			{
   6585  1.1  christos 				header->closest = newheader->closest;
   6586  1.1  christos 				newheader->closest = NULL;
   6587  1.1  christos 			}
   6588  1.1  christos 			free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
   6589  1.1  christos 			if (addedrdataset != NULL) {
   6590  1.1  christos 				bind_rdataset(rbtdb, rbtnode, header, now,
   6591  1.1  christos 					      isc_rwlocktype_write,
   6592  1.1  christos 					      addedrdataset);
   6593  1.1  christos 			}
   6594  1.1  christos 			return (ISC_R_SUCCESS);
   6595  1.1  christos 		}
   6596  1.1  christos 		INSIST(rbtversion == NULL ||
   6597  1.1  christos 		       rbtversion->serial >= topheader->serial);
   6598  1.1  christos 		if (loading) {
   6599  1.1  christos 			newheader->down = NULL;
   6600  1.1  christos 			idx = newheader->node->locknum;
   6601  1.1  christos 			if (IS_CACHE(rbtdb)) {
   6602  1.1  christos 				if (ZEROTTL(newheader)) {
   6603  1.1  christos 					ISC_LIST_APPEND(rbtdb->rdatasets[idx],
   6604  1.1  christos 							newheader, link);
   6605  1.1  christos 				} else {
   6606  1.1  christos 					ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
   6607  1.1  christos 							 newheader, link);
   6608  1.1  christos 				}
   6609  1.1  christos 				INSIST(rbtdb->heaps != NULL);
   6610  1.1  christos 				isc_heap_insert(rbtdb->heaps[idx], newheader);
   6611  1.1  christos 			} else if (RESIGN(newheader)) {
   6612  1.1  christos 				resign_insert(rbtdb, idx, newheader);
   6613  1.1  christos 				/*
   6614  1.1  christos 				 * Don't call resign_delete as we don't need
   6615  1.1  christos 				 * to reverse the delete.  The free_rdataset
   6616  1.1  christos 				 * call below will clean up the heap entry.
   6617  1.1  christos 				 */
   6618  1.1  christos 			}
   6619  1.1  christos 
   6620  1.1  christos 			/*
   6621  1.1  christos 			 * There are no other references to 'header' when
   6622  1.1  christos 			 * loading, so we MAY clean up 'header' now.
   6623  1.1  christos 			 * Since we don't generate changed records when
   6624  1.1  christos 			 * loading, we MUST clean up 'header' now.
   6625  1.1  christos 			 */
   6626  1.1  christos 			if (topheader_prev != NULL) {
   6627  1.1  christos 				topheader_prev->next = newheader;
   6628  1.1  christos 			} else {
   6629  1.1  christos 				rbtnode->data = newheader;
   6630  1.1  christos 			}
   6631  1.1  christos 			newheader->next = topheader->next;
   6632  1.1  christos 			if (rbtversion != NULL && !header_nx) {
   6633  1.1  christos 				update_recordsandxfrsize(false, rbtversion,
   6634  1.1  christos 							 header,
   6635  1.1  christos 							 nodename->length);
   6636  1.1  christos 			}
   6637  1.1  christos 			free_rdataset(rbtdb, rbtdb->common.mctx, header);
   6638  1.1  christos 		} else {
   6639  1.1  christos 			idx = newheader->node->locknum;
   6640  1.1  christos 			if (IS_CACHE(rbtdb)) {
   6641  1.1  christos 				INSIST(rbtdb->heaps != NULL);
   6642  1.1  christos 				isc_heap_insert(rbtdb->heaps[idx], newheader);
   6643  1.1  christos 				if (ZEROTTL(newheader)) {
   6644  1.1  christos 					ISC_LIST_APPEND(rbtdb->rdatasets[idx],
   6645  1.1  christos 							newheader, link);
   6646  1.1  christos 				} else {
   6647  1.1  christos 					ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
   6648  1.1  christos 							 newheader, link);
   6649  1.1  christos 				}
   6650  1.1  christos 			} else if (RESIGN(newheader)) {
   6651  1.1  christos 				resign_insert(rbtdb, idx, newheader);
   6652  1.1  christos 				resign_delete(rbtdb, rbtversion, header);
   6653  1.1  christos 			}
   6654  1.1  christos 			if (topheader_prev != NULL) {
   6655  1.1  christos 				topheader_prev->next = newheader;
   6656  1.1  christos 			} else {
   6657  1.1  christos 				rbtnode->data = newheader;
   6658  1.1  christos 			}
   6659  1.1  christos 			newheader->next = topheader->next;
   6660  1.1  christos 			newheader->down = topheader;
   6661  1.1  christos 			topheader->next = newheader;
   6662  1.1  christos 			rbtnode->dirty = 1;
   6663  1.1  christos 			if (changed != NULL) {
   6664  1.1  christos 				changed->dirty = true;
   6665  1.1  christos 			}
   6666  1.1  christos 			if (rbtversion == NULL) {
   6667  1.1  christos 				set_ttl(rbtdb, header, 0);
   6668  1.1  christos 				mark_header_ancient(rbtdb, header);
   6669  1.1  christos 				if (sigheader != NULL) {
   6670  1.1  christos 					set_ttl(rbtdb, sigheader, 0);
   6671  1.1  christos 					mark_header_ancient(rbtdb, sigheader);
   6672  1.1  christos 				}
   6673  1.1  christos 			}
   6674  1.1  christos 			if (rbtversion != NULL && !header_nx) {
   6675  1.1  christos 				update_recordsandxfrsize(false, rbtversion,
   6676  1.1  christos 							 header,
   6677  1.1  christos 							 nodename->length);
   6678  1.1  christos 			}
   6679  1.1  christos 		}
   6680  1.1  christos 	} else {
   6681  1.1  christos 		/*
   6682  1.1  christos 		 * No non-IGNORED rdatasets of the given type exist at
   6683  1.1  christos 		 * this node.
   6684  1.1  christos 		 */
   6685  1.1  christos 
   6686  1.1  christos 		/*
   6687  1.1  christos 		 * If we're trying to delete the type, don't bother.
   6688  1.1  christos 		 */
   6689  1.1  christos 		if (newheader_nx) {
   6690  1.1  christos 			free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
   6691  1.1  christos 			return (DNS_R_UNCHANGED);
   6692  1.1  christos 		}
   6693  1.1  christos 
   6694  1.1  christos 		idx = newheader->node->locknum;
   6695  1.1  christos 		if (IS_CACHE(rbtdb)) {
   6696  1.1  christos 			isc_heap_insert(rbtdb->heaps[idx], newheader);
   6697  1.1  christos 			if (ZEROTTL(newheader)) {
   6698  1.1  christos 				ISC_LIST_APPEND(rbtdb->rdatasets[idx],
   6699  1.1  christos 						newheader, link);
   6700  1.1  christos 			} else {
   6701  1.1  christos 				ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
   6702  1.1  christos 						 newheader, link);
   6703  1.1  christos 			}
   6704  1.1  christos 		} else if (RESIGN(newheader)) {
   6705  1.1  christos 			resign_insert(rbtdb, idx, newheader);
   6706  1.1  christos 			resign_delete(rbtdb, rbtversion, header);
   6707  1.1  christos 		}
   6708  1.1  christos 
   6709  1.1  christos 		if (topheader != NULL) {
   6710  1.1  christos 			/*
   6711  1.1  christos 			 * We have an list of rdatasets of the given type,
   6712  1.1  christos 			 * but they're all marked IGNORE.  We simply insert
   6713  1.1  christos 			 * the new rdataset at the head of the list.
   6714  1.1  christos 			 *
   6715  1.1  christos 			 * Ignored rdatasets cannot occur during loading, so
   6716  1.1  christos 			 * we INSIST on it.
   6717  1.1  christos 			 */
   6718  1.1  christos 			INSIST(!loading);
   6719  1.1  christos 			INSIST(rbtversion == NULL ||
   6720  1.1  christos 			       rbtversion->serial >= topheader->serial);
   6721  1.1  christos 			if (topheader_prev != NULL) {
   6722  1.1  christos 				topheader_prev->next = newheader;
   6723  1.1  christos 			} else {
   6724  1.1  christos 				rbtnode->data = newheader;
   6725  1.1  christos 			}
   6726  1.1  christos 			newheader->next = topheader->next;
   6727  1.1  christos 			newheader->down = topheader;
   6728  1.1  christos 			topheader->next = newheader;
   6729  1.1  christos 			rbtnode->dirty = 1;
   6730  1.1  christos 			if (changed != NULL) {
   6731  1.1  christos 				changed->dirty = true;
   6732  1.1  christos 			}
   6733  1.1  christos 		} else {
   6734  1.1  christos 			/*
   6735  1.1  christos 			 * No rdatasets of the given type exist at the node.
   6736  1.1  christos 			 */
   6737  1.1  christos 			newheader->next = rbtnode->data;
   6738  1.1  christos 			newheader->down = NULL;
   6739  1.1  christos 			rbtnode->data = newheader;
   6740  1.1  christos 		}
   6741  1.1  christos 	}
   6742  1.1  christos 
   6743  1.1  christos 	if (rbtversion != NULL && !newheader_nx) {
   6744  1.1  christos 		update_recordsandxfrsize(true, rbtversion, newheader,
   6745  1.1  christos 					 nodename->length);
   6746  1.1  christos 	}
   6747  1.1  christos 
   6748  1.1  christos 	/*
   6749  1.1  christos 	 * Check if the node now contains CNAME and other data.
   6750  1.1  christos 	 */
   6751  1.1  christos 	if (rbtversion != NULL &&
   6752  1.1  christos 	    cname_and_other_data(rbtnode, rbtversion->serial))
   6753  1.1  christos 	{
   6754  1.1  christos 		return (DNS_R_CNAMEANDOTHER);
   6755  1.1  christos 	}
   6756  1.1  christos 
   6757  1.1  christos 	if (addedrdataset != NULL) {
   6758  1.1  christos 		bind_rdataset(rbtdb, rbtnode, newheader, now,
   6759  1.1  christos 			      isc_rwlocktype_write, addedrdataset);
   6760  1.1  christos 	}
   6761  1.1  christos 
   6762  1.1  christos 	return (ISC_R_SUCCESS);
   6763  1.1  christos }
   6764  1.1  christos 
   6765  1.1  christos static bool
   6766  1.1  christos delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
   6767  1.1  christos 		rbtdb_rdatatype_t type) {
   6768  1.1  christos 	if (IS_CACHE(rbtdb)) {
   6769  1.1  christos 		if (type == dns_rdatatype_dname) {
   6770  1.1  christos 			return (true);
   6771  1.1  christos 		} else {
   6772  1.1  christos 			return (false);
   6773  1.1  christos 		}
   6774  1.1  christos 	} else if (type == dns_rdatatype_dname ||
   6775  1.1  christos 		   (type == dns_rdatatype_ns &&
   6776  1.1  christos 		    (node != rbtdb->origin_node || IS_STUB(rbtdb))))
   6777  1.1  christos 	{
   6778  1.1  christos 		return (true);
   6779  1.1  christos 	}
   6780  1.1  christos 	return (false);
   6781  1.1  christos }
   6782  1.1  christos 
   6783  1.1  christos static isc_result_t
   6784  1.1  christos addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
   6785  1.1  christos 	   dns_rdataset_t *rdataset) {
   6786  1.1  christos 	struct noqname *noqname;
   6787  1.1  christos 	isc_mem_t *mctx = rbtdb->common.mctx;
   6788  1.1  christos 	dns_name_t name;
   6789  1.1  christos 	dns_rdataset_t neg, negsig;
   6790  1.1  christos 	isc_result_t result;
   6791  1.1  christos 	isc_region_t r;
   6792  1.1  christos 
   6793  1.1  christos 	dns_name_init(&name, NULL);
   6794  1.1  christos 	dns_rdataset_init(&neg);
   6795  1.1  christos 	dns_rdataset_init(&negsig);
   6796  1.1  christos 
   6797  1.1  christos 	result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
   6798  1.1  christos 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
   6799  1.1  christos 
   6800  1.1  christos 	noqname = isc_mem_get(mctx, sizeof(*noqname));
   6801  1.1  christos 	dns_name_init(&noqname->name, NULL);
   6802  1.1  christos 	noqname->neg = NULL;
   6803  1.1  christos 	noqname->negsig = NULL;
   6804  1.1  christos 	noqname->type = neg.type;
   6805  1.1  christos 	dns_name_dup(&name, mctx, &noqname->name);
   6806  1.1  christos 	result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
   6807  1.1  christos 	if (result != ISC_R_SUCCESS) {
   6808  1.1  christos 		goto cleanup;
   6809  1.1  christos 	}
   6810  1.1  christos 	noqname->neg = r.base;
   6811  1.1  christos 	result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
   6812  1.1  christos 	if (result != ISC_R_SUCCESS) {
   6813  1.1  christos 		goto cleanup;
   6814  1.1  christos 	}
   6815  1.1  christos 	noqname->negsig = r.base;
   6816  1.1  christos 	dns_rdataset_disassociate(&neg);
   6817  1.1  christos 	dns_rdataset_disassociate(&negsig);
   6818  1.1  christos 	newheader->noqname = noqname;
   6819  1.1  christos 	return (ISC_R_SUCCESS);
   6820  1.1  christos 
   6821  1.1  christos cleanup:
   6822  1.1  christos 	dns_rdataset_disassociate(&neg);
   6823  1.1  christos 	dns_rdataset_disassociate(&negsig);
   6824  1.1  christos 	free_noqname(mctx, &noqname);
   6825  1.1  christos 	return (result);
   6826  1.1  christos }
   6827  1.1  christos 
   6828  1.1  christos static isc_result_t
   6829  1.1  christos addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
   6830  1.1  christos 	   dns_rdataset_t *rdataset) {
   6831  1.1  christos 	struct noqname *closest;
   6832  1.1  christos 	isc_mem_t *mctx = rbtdb->common.mctx;
   6833  1.1  christos 	dns_name_t name;
   6834  1.1  christos 	dns_rdataset_t neg, negsig;
   6835  1.1  christos 	isc_result_t result;
   6836  1.1  christos 	isc_region_t r;
   6837  1.1  christos 
   6838  1.1  christos 	dns_name_init(&name, NULL);
   6839  1.1  christos 	dns_rdataset_init(&neg);
   6840  1.1  christos 	dns_rdataset_init(&negsig);
   6841  1.1  christos 
   6842  1.1  christos 	result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
   6843  1.1  christos 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
   6844  1.1  christos 
   6845  1.1  christos 	closest = isc_mem_get(mctx, sizeof(*closest));
   6846  1.1  christos 	dns_name_init(&closest->name, NULL);
   6847  1.1  christos 	closest->neg = NULL;
   6848  1.1  christos 	closest->negsig = NULL;
   6849  1.1  christos 	closest->type = neg.type;
   6850  1.1  christos 	dns_name_dup(&name, mctx, &closest->name);
   6851  1.1  christos 	result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
   6852  1.1  christos 	if (result != ISC_R_SUCCESS) {
   6853  1.1  christos 		goto cleanup;
   6854  1.1  christos 	}
   6855  1.1  christos 	closest->neg = r.base;
   6856  1.1  christos 	result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
   6857  1.1  christos 	if (result != ISC_R_SUCCESS) {
   6858  1.1  christos 		goto cleanup;
   6859  1.1  christos 	}
   6860  1.1  christos 	closest->negsig = r.base;
   6861  1.1  christos 	dns_rdataset_disassociate(&neg);
   6862  1.1  christos 	dns_rdataset_disassociate(&negsig);
   6863  1.1  christos 	newheader->closest = closest;
   6864  1.1  christos 	return (ISC_R_SUCCESS);
   6865  1.1  christos 
   6866  1.1  christos cleanup:
   6867  1.1  christos 	dns_rdataset_disassociate(&neg);
   6868  1.1  christos 	dns_rdataset_disassociate(&negsig);
   6869  1.1  christos 	free_noqname(mctx, &closest);
   6870  1.1  christos 	return (result);
   6871  1.1  christos }
   6872  1.1  christos 
   6873  1.1  christos static dns_dbmethods_t zone_methods;
   6874  1.1  christos 
   6875  1.1  christos static size_t
   6876  1.1  christos rdataset_size(rdatasetheader_t *header) {
   6877  1.1  christos 	if (!NONEXISTENT(header)) {
   6878  1.1  christos 		return (dns_rdataslab_size((unsigned char *)header,
   6879  1.1  christos 					   sizeof(*header)));
   6880  1.1  christos 	}
   6881  1.1  christos 
   6882  1.1  christos 	return (sizeof(*header));
   6883  1.1  christos }
   6884  1.1  christos 
   6885  1.1  christos static isc_result_t
   6886  1.1  christos addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
   6887  1.1  christos 	    isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
   6888  1.1  christos 	    dns_rdataset_t *addedrdataset) {
   6889  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   6890  1.1  christos 	dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
   6891  1.1  christos 	rbtdb_version_t *rbtversion = version;
   6892  1.1  christos 	isc_region_t region;
   6893  1.1  christos 	rdatasetheader_t *newheader;
   6894  1.1  christos 	rdatasetheader_t *header;
   6895  1.1  christos 	isc_result_t result;
   6896  1.1  christos 	bool delegating;
   6897  1.1  christos 	bool newnsec;
   6898  1.1  christos 	bool tree_locked = false;
   6899  1.1  christos 	bool cache_is_overmem = false;
   6900  1.1  christos 	dns_fixedname_t fixed;
   6901  1.1  christos 	dns_name_t *name;
   6902  1.1  christos 
   6903  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   6904  1.1  christos 	INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
   6905  1.1  christos 
   6906  1.1  christos 	if (rbtdb->common.methods == &zone_methods) {
   6907  1.1  christos 		/*
   6908  1.1  christos 		 * SOA records are only allowed at top of zone.
   6909  1.1  christos 		 */
   6910  1.1  christos 		if (rdataset->type == dns_rdatatype_soa &&
   6911  1.1  christos 		    node != rbtdb->origin_node)
   6912  1.1  christos 		{
   6913  1.1  christos 			return (DNS_R_NOTZONETOP);
   6914  1.1  christos 		}
   6915  1.1  christos 		RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   6916  1.1  christos 		REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
   6917  1.1  christos 			  (rdataset->type == dns_rdatatype_nsec3 ||
   6918  1.1  christos 			   rdataset->covers == dns_rdatatype_nsec3)) ||
   6919  1.1  christos 			 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
   6920  1.1  christos 			  rdataset->type != dns_rdatatype_nsec3 &&
   6921  1.1  christos 			  rdataset->covers != dns_rdatatype_nsec3)));
   6922  1.1  christos 		RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   6923  1.1  christos 	}
   6924  1.1  christos 
   6925  1.1  christos 	if (rbtversion == NULL) {
   6926  1.1  christos 		if (now == 0) {
   6927  1.1  christos 			isc_stdtime_get(&now);
   6928  1.1  christos 		}
   6929  1.1  christos 	} else {
   6930  1.1  christos 		now = 0;
   6931  1.1  christos 	}
   6932  1.1  christos 
   6933  1.1  christos 	result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
   6934  1.1  christos 					    &region, sizeof(rdatasetheader_t));
   6935  1.1  christos 	if (result != ISC_R_SUCCESS) {
   6936  1.1  christos 		return (result);
   6937  1.1  christos 	}
   6938  1.1  christos 
   6939  1.1  christos 	name = dns_fixedname_initname(&fixed);
   6940  1.1  christos 	nodefullname(db, node, name);
   6941  1.1  christos 	dns_rdataset_getownercase(rdataset, name);
   6942  1.1  christos 
   6943  1.1  christos 	newheader = (rdatasetheader_t *)region.base;
   6944  1.1  christos 	init_rdataset(rbtdb, newheader);
   6945  1.1  christos 	setownercase(newheader, name);
   6946  1.1  christos 	set_ttl(rbtdb, newheader, rdataset->ttl + now);
   6947  1.1  christos 	newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
   6948  1.1  christos 						rdataset->covers);
   6949  1.1  christos 	atomic_init(&newheader->attributes, 0);
   6950  1.1  christos 	if (rdataset->ttl == 0U) {
   6951  1.1  christos 		RDATASET_ATTR_SET(newheader, RDATASET_ATTR_ZEROTTL);
   6952  1.1  christos 	}
   6953  1.1  christos 	newheader->noqname = NULL;
   6954  1.1  christos 	newheader->closest = NULL;
   6955  1.1  christos 	atomic_init(&newheader->count,
   6956  1.1  christos 		    atomic_fetch_add_relaxed(&init_count, 1));
   6957  1.1  christos 	newheader->trust = rdataset->trust;
   6958  1.1  christos 	newheader->last_used = now;
   6959  1.1  christos 	newheader->node = rbtnode;
   6960  1.1  christos 	if (rbtversion != NULL) {
   6961  1.1  christos 		newheader->serial = rbtversion->serial;
   6962  1.1  christos 		now = 0;
   6963  1.1  christos 
   6964  1.1  christos 		if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
   6965  1.1  christos 			RDATASET_ATTR_SET(newheader, RDATASET_ATTR_RESIGN);
   6966  1.1  christos 			newheader->resign =
   6967  1.1  christos 				(isc_stdtime_t)(dns_time64_from32(
   6968  1.1  christos 							rdataset->resign) >>
   6969  1.1  christos 						1);
   6970  1.1  christos 			newheader->resign_lsb = rdataset->resign & 0x1;
   6971  1.1  christos 		} else {
   6972  1.1  christos 			newheader->resign = 0;
   6973  1.1  christos 			newheader->resign_lsb = 0;
   6974  1.1  christos 		}
   6975  1.1  christos 	} else {
   6976  1.1  christos 		newheader->serial = 1;
   6977  1.1  christos 		newheader->resign = 0;
   6978  1.1  christos 		newheader->resign_lsb = 0;
   6979  1.1  christos 		if ((rdataset->attributes & DNS_RDATASETATTR_PREFETCH) != 0) {
   6980  1.1  christos 			RDATASET_ATTR_SET(newheader, RDATASET_ATTR_PREFETCH);
   6981  1.1  christos 		}
   6982  1.1  christos 		if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0) {
   6983  1.1  christos 			RDATASET_ATTR_SET(newheader, RDATASET_ATTR_NEGATIVE);
   6984  1.1  christos 		}
   6985  1.1  christos 		if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0) {
   6986  1.1  christos 			RDATASET_ATTR_SET(newheader, RDATASET_ATTR_NXDOMAIN);
   6987  1.1  christos 		}
   6988  1.1  christos 		if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0) {
   6989  1.1  christos 			RDATASET_ATTR_SET(newheader, RDATASET_ATTR_OPTOUT);
   6990  1.1  christos 		}
   6991  1.1  christos 		if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
   6992  1.1  christos 			result = addnoqname(rbtdb, newheader, rdataset);
   6993  1.1  christos 			if (result != ISC_R_SUCCESS) {
   6994  1.1  christos 				free_rdataset(rbtdb, rbtdb->common.mctx,
   6995  1.1  christos 					      newheader);
   6996  1.1  christos 				return (result);
   6997  1.1  christos 			}
   6998  1.1  christos 		}
   6999  1.1  christos 		if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
   7000  1.1  christos 			result = addclosest(rbtdb, newheader, rdataset);
   7001  1.1  christos 			if (result != ISC_R_SUCCESS) {
   7002  1.1  christos 				free_rdataset(rbtdb, rbtdb->common.mctx,
   7003  1.1  christos 					      newheader);
   7004  1.1  christos 				return (result);
   7005  1.1  christos 			}
   7006  1.1  christos 		}
   7007  1.1  christos 	}
   7008  1.1  christos 
   7009  1.1  christos 	/*
   7010  1.1  christos 	 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
   7011  1.1  christos 	 * just DNAME for the cache), then we need to set the callback bit
   7012  1.1  christos 	 * on the node.
   7013  1.1  christos 	 */
   7014  1.1  christos 	if (delegating_type(rbtdb, rbtnode, rdataset->type)) {
   7015  1.1  christos 		delegating = true;
   7016  1.1  christos 	} else {
   7017  1.1  christos 		delegating = false;
   7018  1.1  christos 	}
   7019  1.1  christos 
   7020  1.1  christos 	/*
   7021  1.1  christos 	 * Add to the auxiliary NSEC tree if we're adding an NSEC record.
   7022  1.1  christos 	 */
   7023  1.1  christos 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   7024  1.1  christos 	if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC &&
   7025  1.1  christos 	    rdataset->type == dns_rdatatype_nsec)
   7026  1.1  christos 	{
   7027  1.1  christos 		newnsec = true;
   7028  1.1  christos 	} else {
   7029  1.1  christos 		newnsec = false;
   7030  1.1  christos 	}
   7031  1.1  christos 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   7032  1.1  christos 
   7033  1.1  christos 	/*
   7034  1.1  christos 	 * If we're adding a delegation type, adding to the auxiliary NSEC
   7035  1.1  christos 	 * tree, or the DB is a cache in an overmem state, hold an
   7036  1.1  christos 	 * exclusive lock on the tree.  In the latter case the lock does
   7037  1.1  christos 	 * not necessarily have to be acquired but it will help purge
   7038  1.1  christos 	 * ancient entries more effectively.
   7039  1.1  christos 	 */
   7040  1.1  christos 	if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx)) {
   7041  1.1  christos 		cache_is_overmem = true;
   7042  1.1  christos 	}
   7043  1.1  christos 	if (delegating || newnsec || cache_is_overmem) {
   7044  1.1  christos 		tree_locked = true;
   7045  1.1  christos 		RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
   7046  1.1  christos 	}
   7047  1.1  christos 
   7048  1.1  christos 	if (cache_is_overmem) {
   7049  1.1  christos 		overmem_purge(rbtdb, rbtnode->locknum, rdataset_size(newheader),
   7050  1.1  christos 			      tree_locked);
   7051  1.1  christos 	}
   7052  1.1  christos 
   7053  1.1  christos 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   7054  1.1  christos 		  isc_rwlocktype_write);
   7055  1.1  christos 
   7056  1.1  christos 	if (rbtdb->rrsetstats != NULL) {
   7057  1.1  christos 		RDATASET_ATTR_SET(newheader, RDATASET_ATTR_STATCOUNT);
   7058  1.1  christos 		update_rrsetstats(rbtdb, newheader->type,
   7059  1.1  christos 				  atomic_load_acquire(&newheader->attributes),
   7060  1.1  christos 				  true);
   7061  1.1  christos 	}
   7062  1.1  christos 
   7063  1.1  christos 	if (IS_CACHE(rbtdb)) {
   7064  1.1  christos 		if (tree_locked) {
   7065  1.1  christos 			cleanup_dead_nodes(rbtdb, rbtnode->locknum);
   7066  1.1  christos 		}
   7067  1.1  christos 
   7068  1.1  christos 		header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
   7069  1.1  christos 		if (header != NULL) {
   7070  1.1  christos 			dns_ttl_t rdh_ttl = header->rdh_ttl;
   7071  1.1  christos 
   7072  1.1  christos 			/* Only account for stale TTL if cache is not overmem */
   7073  1.1  christos 			if (!cache_is_overmem) {
   7074  1.1  christos 				rdh_ttl += rbtdb->serve_stale_ttl;
   7075  1.1  christos 			}
   7076  1.1  christos 
   7077  1.1  christos 			if (rdh_ttl < now - RBTDB_VIRTUAL) {
   7078  1.1  christos 				expire_header(rbtdb, header, tree_locked,
   7079  1.1  christos 					      expire_ttl);
   7080  1.1  christos 			}
   7081  1.1  christos 		}
   7082  1.1  christos 
   7083  1.1  christos 		/*
   7084  1.1  christos 		 * If we've been holding a write lock on the tree just for
   7085  1.1  christos 		 * cleaning, we can release it now.  However, we still need the
   7086  1.1  christos 		 * node lock.
   7087  1.1  christos 		 */
   7088  1.1  christos 		if (tree_locked && !delegating && !newnsec) {
   7089  1.1  christos 			RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
   7090  1.1  christos 			tree_locked = false;
   7091  1.1  christos 		}
   7092  1.1  christos 	}
   7093  1.1  christos 
   7094  1.1  christos 	result = ISC_R_SUCCESS;
   7095  1.1  christos 	if (newnsec) {
   7096  1.1  christos 		dns_rbtnode_t *nsecnode;
   7097  1.1  christos 
   7098  1.1  christos 		nsecnode = NULL;
   7099  1.1  christos 		result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
   7100  1.1  christos 		if (result == ISC_R_SUCCESS) {
   7101  1.1  christos 			nsecnode->nsec = DNS_RBT_NSEC_NSEC;
   7102  1.1  christos 			rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
   7103  1.1  christos 		} else if (result == ISC_R_EXISTS) {
   7104  1.1  christos 			rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
   7105  1.1  christos 			result = ISC_R_SUCCESS;
   7106  1.1  christos 		}
   7107  1.1  christos 	}
   7108  1.1  christos 
   7109  1.1  christos 	if (result == ISC_R_SUCCESS) {
   7110  1.1  christos 		result = add32(rbtdb, rbtnode, name, rbtversion, newheader,
   7111  1.1  christos 			       options, false, addedrdataset, now);
   7112  1.1  christos 	}
   7113  1.1  christos 	if (result == ISC_R_SUCCESS && delegating) {
   7114  1.1  christos 		rbtnode->find_callback = 1;
   7115  1.1  christos 	}
   7116  1.1  christos 
   7117  1.1  christos 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   7118  1.1  christos 		    isc_rwlocktype_write);
   7119  1.1  christos 
   7120  1.1  christos 	if (tree_locked) {
   7121  1.1  christos 		RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
   7122  1.1  christos 	}
   7123  1.1  christos 
   7124  1.1  christos 	/*
   7125  1.1  christos 	 * Update the zone's secure status.  If version is non-NULL
   7126  1.1  christos 	 * this is deferred until closeversion() is called.
   7127  1.1  christos 	 */
   7128  1.1  christos 	if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb)) {
   7129  1.1  christos 		iszonesecure(db, version, rbtdb->origin_node);
   7130  1.1  christos 	}
   7131  1.1  christos 
   7132  1.1  christos 	return (result);
   7133  1.1  christos }
   7134  1.1  christos 
   7135  1.1  christos static isc_result_t
   7136  1.1  christos subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
   7137  1.1  christos 		 dns_rdataset_t *rdataset, unsigned int options,
   7138  1.1  christos 		 dns_rdataset_t *newrdataset) {
   7139  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   7140  1.1  christos 	dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
   7141  1.1  christos 	rbtdb_version_t *rbtversion = version;
   7142  1.1  christos 	dns_fixedname_t fname;
   7143  1.1  christos 	dns_name_t *nodename = dns_fixedname_initname(&fname);
   7144  1.1  christos 	rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
   7145  1.1  christos 	unsigned char *subresult;
   7146  1.1  christos 	isc_region_t region;
   7147  1.1  christos 	isc_result_t result;
   7148  1.1  christos 	rbtdb_changed_t *changed;
   7149  1.1  christos 
   7150  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   7151  1.1  christos 	REQUIRE(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
   7152  1.1  christos 
   7153  1.1  christos 	if (rbtdb->common.methods == &zone_methods) {
   7154  1.1  christos 		RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   7155  1.1  christos 		REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
   7156  1.1  christos 			  (rdataset->type == dns_rdatatype_nsec3 ||
   7157  1.1  christos 			   rdataset->covers == dns_rdatatype_nsec3)) ||
   7158  1.1  christos 			 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
   7159  1.1  christos 			  rdataset->type != dns_rdatatype_nsec3 &&
   7160  1.1  christos 			  rdataset->covers != dns_rdatatype_nsec3)));
   7161  1.1  christos 		RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   7162  1.1  christos 	}
   7163  1.1  christos 
   7164  1.1  christos 	nodefullname(db, node, nodename);
   7165  1.1  christos 
   7166  1.1  christos 	result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
   7167  1.1  christos 					    &region, sizeof(rdatasetheader_t));
   7168  1.1  christos 	if (result != ISC_R_SUCCESS) {
   7169  1.1  christos 		return (result);
   7170  1.1  christos 	}
   7171  1.1  christos 	newheader = (rdatasetheader_t *)region.base;
   7172  1.1  christos 	init_rdataset(rbtdb, newheader);
   7173  1.1  christos 	set_ttl(rbtdb, newheader, rdataset->ttl);
   7174  1.1  christos 	newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
   7175  1.1  christos 						rdataset->covers);
   7176  1.1  christos 	atomic_init(&newheader->attributes, 0);
   7177  1.1  christos 	newheader->serial = rbtversion->serial;
   7178  1.1  christos 	newheader->trust = 0;
   7179  1.1  christos 	newheader->noqname = NULL;
   7180  1.1  christos 	newheader->closest = NULL;
   7181  1.1  christos 	atomic_init(&newheader->count,
   7182  1.1  christos 		    atomic_fetch_add_relaxed(&init_count, 1));
   7183  1.1  christos 	newheader->last_used = 0;
   7184  1.1  christos 	newheader->node = rbtnode;
   7185  1.1  christos 	if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
   7186  1.1  christos 		RDATASET_ATTR_SET(newheader, RDATASET_ATTR_RESIGN);
   7187  1.1  christos 		newheader->resign =
   7188  1.1  christos 			(isc_stdtime_t)(dns_time64_from32(rdataset->resign) >>
   7189  1.1  christos 					1);
   7190  1.1  christos 		newheader->resign_lsb = rdataset->resign & 0x1;
   7191  1.1  christos 	} else {
   7192  1.1  christos 		newheader->resign = 0;
   7193  1.1  christos 		newheader->resign_lsb = 0;
   7194  1.1  christos 	}
   7195  1.1  christos 
   7196  1.1  christos 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   7197  1.1  christos 		  isc_rwlocktype_write);
   7198  1.1  christos 
   7199  1.1  christos 	changed = add_changed(rbtdb, rbtversion, rbtnode);
   7200  1.1  christos 	if (changed == NULL) {
   7201  1.1  christos 		free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
   7202  1.1  christos 		NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   7203  1.1  christos 			    isc_rwlocktype_write);
   7204  1.1  christos 		return (ISC_R_NOMEMORY);
   7205  1.1  christos 	}
   7206  1.1  christos 
   7207  1.1  christos 	topheader_prev = NULL;
   7208  1.1  christos 	for (topheader = rbtnode->data; topheader != NULL;
   7209  1.1  christos 	     topheader = topheader->next)
   7210  1.1  christos 	{
   7211  1.1  christos 		if (topheader->type == newheader->type) {
   7212  1.1  christos 			break;
   7213  1.1  christos 		}
   7214  1.1  christos 		topheader_prev = topheader;
   7215  1.1  christos 	}
   7216  1.1  christos 	/*
   7217  1.1  christos 	 * If header isn't NULL, we've found the right type.  There may be
   7218  1.1  christos 	 * IGNORE rdatasets between the top of the chain and the first real
   7219  1.1  christos 	 * data.  We skip over them.
   7220  1.1  christos 	 */
   7221  1.1  christos 	header = topheader;
   7222  1.1  christos 	while (header != NULL && IGNORE(header)) {
   7223  1.1  christos 		header = header->down;
   7224  1.1  christos 	}
   7225  1.1  christos 	if (header != NULL && EXISTS(header)) {
   7226  1.1  christos 		unsigned int flags = 0;
   7227  1.1  christos 		subresult = NULL;
   7228  1.1  christos 		result = ISC_R_SUCCESS;
   7229  1.1  christos 		if ((options & DNS_DBSUB_EXACT) != 0) {
   7230  1.1  christos 			flags |= DNS_RDATASLAB_EXACT;
   7231  1.1  christos 			if (newheader->rdh_ttl != header->rdh_ttl) {
   7232  1.1  christos 				result = DNS_R_NOTEXACT;
   7233  1.1  christos 			}
   7234  1.1  christos 		}
   7235  1.1  christos 		if (result == ISC_R_SUCCESS) {
   7236  1.1  christos 			result = dns_rdataslab_subtract(
   7237  1.1  christos 				(unsigned char *)header,
   7238  1.1  christos 				(unsigned char *)newheader,
   7239  1.1  christos 				(unsigned int)(sizeof(*newheader)),
   7240  1.1  christos 				rbtdb->common.mctx, rbtdb->common.rdclass,
   7241  1.1  christos 				(dns_rdatatype_t)header->type, flags,
   7242  1.1  christos 				&subresult);
   7243  1.1  christos 		}
   7244  1.1  christos 		if (result == ISC_R_SUCCESS) {
   7245  1.1  christos 			free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
   7246  1.1  christos 			newheader = (rdatasetheader_t *)subresult;
   7247  1.1  christos 			init_rdataset(rbtdb, newheader);
   7248  1.1  christos 			update_newheader(newheader, header);
   7249  1.1  christos 			if (RESIGN(header)) {
   7250  1.1  christos 				RDATASET_ATTR_SET(newheader,
   7251  1.1  christos 						  RDATASET_ATTR_RESIGN);
   7252  1.1  christos 				newheader->resign = header->resign;
   7253  1.1  christos 				newheader->resign_lsb = header->resign_lsb;
   7254  1.1  christos 				resign_insert(rbtdb, rbtnode->locknum,
   7255  1.1  christos 					      newheader);
   7256  1.1  christos 			}
   7257  1.1  christos 			/*
   7258  1.1  christos 			 * We have to set the serial since the rdataslab
   7259  1.1  christos 			 * subtraction routine copies the reserved portion of
   7260  1.1  christos 			 * header, not newheader.
   7261  1.1  christos 			 */
   7262  1.1  christos 			newheader->serial = rbtversion->serial;
   7263  1.1  christos 			/*
   7264  1.1  christos 			 * XXXJT: dns_rdataslab_subtract() copied the pointers
   7265  1.1  christos 			 * to additional info.  We need to clear these fields
   7266  1.1  christos 			 * to avoid having duplicated references.
   7267  1.1  christos 			 */
   7268  1.1  christos 			update_recordsandxfrsize(true, rbtversion, newheader,
   7269  1.1  christos 						 nodename->length);
   7270  1.1  christos 		} else if (result == DNS_R_NXRRSET) {
   7271  1.1  christos 			/*
   7272  1.1  christos 			 * This subtraction would remove all of the rdata;
   7273  1.1  christos 			 * add a nonexistent header instead.
   7274  1.1  christos 			 */
   7275  1.1  christos 			free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
   7276  1.1  christos 			newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
   7277  1.1  christos 			if (newheader == NULL) {
   7278  1.1  christos 				result = ISC_R_NOMEMORY;
   7279  1.1  christos 				goto unlock;
   7280  1.1  christos 			}
   7281  1.1  christos 			init_rdataset(rbtdb, newheader);
   7282  1.1  christos 			set_ttl(rbtdb, newheader, 0);
   7283  1.1  christos 			newheader->type = topheader->type;
   7284  1.1  christos 			atomic_init(&newheader->attributes,
   7285  1.1  christos 				    RDATASET_ATTR_NONEXISTENT);
   7286  1.1  christos 			newheader->trust = 0;
   7287  1.1  christos 			newheader->serial = rbtversion->serial;
   7288  1.1  christos 			newheader->noqname = NULL;
   7289  1.1  christos 			newheader->closest = NULL;
   7290  1.1  christos 			atomic_init(&newheader->count, 0);
   7291  1.1  christos 			newheader->node = rbtnode;
   7292  1.1  christos 			newheader->resign = 0;
   7293  1.1  christos 			newheader->resign_lsb = 0;
   7294  1.1  christos 			newheader->last_used = 0;
   7295  1.1  christos 		} else {
   7296  1.1  christos 			free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
   7297  1.1  christos 			goto unlock;
   7298  1.1  christos 		}
   7299  1.1  christos 
   7300  1.1  christos 		/*
   7301  1.1  christos 		 * If we're here, we want to link newheader in front of
   7302  1.1  christos 		 * topheader.
   7303  1.1  christos 		 */
   7304  1.1  christos 		INSIST(rbtversion->serial >= topheader->serial);
   7305  1.1  christos 		update_recordsandxfrsize(false, rbtversion, header,
   7306  1.1  christos 					 nodename->length);
   7307  1.1  christos 		if (topheader_prev != NULL) {
   7308  1.1  christos 			topheader_prev->next = newheader;
   7309  1.1  christos 		} else {
   7310  1.1  christos 			rbtnode->data = newheader;
   7311  1.1  christos 		}
   7312  1.1  christos 		newheader->next = topheader->next;
   7313  1.1  christos 		newheader->down = topheader;
   7314  1.1  christos 		topheader->next = newheader;
   7315  1.1  christos 		rbtnode->dirty = 1;
   7316  1.1  christos 		changed->dirty = true;
   7317  1.1  christos 		resign_delete(rbtdb, rbtversion, header);
   7318  1.1  christos 	} else {
   7319  1.1  christos 		/*
   7320  1.1  christos 		 * The rdataset doesn't exist, so we don't need to do anything
   7321  1.1  christos 		 * to satisfy the deletion request.
   7322  1.1  christos 		 */
   7323  1.1  christos 		free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
   7324  1.1  christos 		if ((options & DNS_DBSUB_EXACT) != 0) {
   7325  1.1  christos 			result = DNS_R_NOTEXACT;
   7326  1.1  christos 		} else {
   7327  1.1  christos 			result = DNS_R_UNCHANGED;
   7328  1.1  christos 		}
   7329  1.1  christos 	}
   7330  1.1  christos 
   7331  1.1  christos 	if (result == ISC_R_SUCCESS && newrdataset != NULL) {
   7332  1.1  christos 		bind_rdataset(rbtdb, rbtnode, newheader, 0,
   7333  1.1  christos 			      isc_rwlocktype_write, newrdataset);
   7334  1.1  christos 	}
   7335  1.1  christos 
   7336  1.1  christos 	if (result == DNS_R_NXRRSET && newrdataset != NULL &&
   7337  1.1  christos 	    (options & DNS_DBSUB_WANTOLD) != 0)
   7338  1.1  christos 	{
   7339  1.1  christos 		bind_rdataset(rbtdb, rbtnode, header, 0, isc_rwlocktype_write,
   7340  1.1  christos 			      newrdataset);
   7341  1.1  christos 	}
   7342  1.1  christos 
   7343  1.1  christos unlock:
   7344  1.1  christos 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   7345  1.1  christos 		    isc_rwlocktype_write);
   7346  1.1  christos 
   7347  1.1  christos 	/*
   7348  1.1  christos 	 * Update the zone's secure status.  If version is non-NULL
   7349  1.1  christos 	 * this is deferred until closeversion() is called.
   7350  1.1  christos 	 */
   7351  1.1  christos 	if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb)) {
   7352  1.1  christos 		RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
   7353  1.1  christos 		version = rbtdb->current_version;
   7354  1.1  christos 		RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
   7355  1.1  christos 		iszonesecure(db, version, rbtdb->origin_node);
   7356  1.1  christos 	}
   7357  1.1  christos 
   7358  1.1  christos 	return (result);
   7359  1.1  christos }
   7360  1.1  christos 
   7361  1.1  christos static isc_result_t
   7362  1.1  christos deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
   7363  1.1  christos 	       dns_rdatatype_t type, dns_rdatatype_t covers) {
   7364  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   7365  1.1  christos 	dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
   7366  1.1  christos 	rbtdb_version_t *rbtversion = version;
   7367  1.1  christos 	dns_fixedname_t fname;
   7368  1.1  christos 	dns_name_t *nodename = dns_fixedname_initname(&fname);
   7369  1.1  christos 	isc_result_t result;
   7370  1.1  christos 	rdatasetheader_t *newheader;
   7371  1.1  christos 
   7372  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   7373  1.1  christos 	INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
   7374  1.1  christos 
   7375  1.1  christos 	if (type == dns_rdatatype_any) {
   7376  1.1  christos 		return (ISC_R_NOTIMPLEMENTED);
   7377  1.1  christos 	}
   7378  1.1  christos 	if (type == dns_rdatatype_rrsig && covers == 0) {
   7379  1.1  christos 		return (ISC_R_NOTIMPLEMENTED);
   7380  1.1  christos 	}
   7381  1.1  christos 
   7382  1.1  christos 	newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
   7383  1.1  christos 	if (newheader == NULL) {
   7384  1.1  christos 		return (ISC_R_NOMEMORY);
   7385  1.1  christos 	}
   7386  1.1  christos 	init_rdataset(rbtdb, newheader);
   7387  1.1  christos 	set_ttl(rbtdb, newheader, 0);
   7388  1.1  christos 	newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
   7389  1.1  christos 	atomic_init(&newheader->attributes, RDATASET_ATTR_NONEXISTENT);
   7390  1.1  christos 	newheader->trust = 0;
   7391  1.1  christos 	newheader->noqname = NULL;
   7392  1.1  christos 	newheader->closest = NULL;
   7393  1.1  christos 	if (rbtversion != NULL) {
   7394  1.1  christos 		newheader->serial = rbtversion->serial;
   7395  1.1  christos 	} else {
   7396  1.1  christos 		newheader->serial = 0;
   7397  1.1  christos 	}
   7398  1.1  christos 	atomic_init(&newheader->count, 0);
   7399  1.1  christos 	newheader->last_used = 0;
   7400  1.1  christos 	newheader->node = rbtnode;
   7401  1.1  christos 
   7402  1.1  christos 	nodefullname(db, node, nodename);
   7403  1.1  christos 
   7404  1.1  christos 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   7405  1.1  christos 		  isc_rwlocktype_write);
   7406  1.1  christos 	result = add32(rbtdb, rbtnode, nodename, rbtversion, newheader,
   7407  1.1  christos 		       DNS_DBADD_FORCE, false, NULL, 0);
   7408  1.1  christos 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   7409  1.1  christos 		    isc_rwlocktype_write);
   7410  1.1  christos 
   7411  1.1  christos 	/*
   7412  1.1  christos 	 * Update the zone's secure status.  If version is non-NULL
   7413  1.1  christos 	 * this is deferred until closeversion() is called.
   7414  1.1  christos 	 */
   7415  1.1  christos 	if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb)) {
   7416  1.1  christos 		RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
   7417  1.1  christos 		version = rbtdb->current_version;
   7418  1.1  christos 		RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
   7419  1.1  christos 		iszonesecure(db, version, rbtdb->origin_node);
   7420  1.1  christos 	}
   7421  1.1  christos 
   7422  1.1  christos 	return (result);
   7423  1.1  christos }
   7424  1.1  christos 
   7425  1.1  christos /*
   7426  1.1  christos  * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC
   7427  1.1  christos  */
   7428  1.1  christos static isc_result_t
   7429  1.1  christos loadnode(dns_rbtdb_t *rbtdb, const dns_name_t *name, dns_rbtnode_t **nodep,
   7430  1.1  christos 	 bool hasnsec) {
   7431  1.1  christos 	isc_result_t noderesult, nsecresult, tmpresult;
   7432  1.1  christos 	dns_rbtnode_t *nsecnode = NULL, *node = NULL;
   7433  1.1  christos 
   7434  1.1  christos 	noderesult = dns_rbt_addnode(rbtdb->tree, name, &node);
   7435  1.1  christos 	if (!hasnsec) {
   7436  1.1  christos 		goto done;
   7437  1.1  christos 	}
   7438  1.1  christos 	if (noderesult == ISC_R_EXISTS) {
   7439  1.1  christos 		/*
   7440  1.1  christos 		 * Add a node to the auxiliary NSEC tree for an old node
   7441  1.1  christos 		 * just now getting an NSEC record.
   7442  1.1  christos 		 */
   7443  1.1  christos 		if (node->nsec == DNS_RBT_NSEC_HAS_NSEC) {
   7444  1.1  christos 			goto done;
   7445  1.1  christos 		}
   7446  1.1  christos 	} else if (noderesult != ISC_R_SUCCESS) {
   7447  1.1  christos 		goto done;
   7448  1.1  christos 	}
   7449  1.1  christos 
   7450  1.1  christos 	/*
   7451  1.1  christos 	 * Build the auxiliary tree for NSECs as we go.
   7452  1.1  christos 	 * This tree speeds searches for closest NSECs that would otherwise
   7453  1.1  christos 	 * need to examine many irrelevant nodes in large TLDs.
   7454  1.1  christos 	 *
   7455  1.1  christos 	 * Add nodes to the auxiliary tree after corresponding nodes have
   7456  1.1  christos 	 * been added to the main tree.
   7457  1.1  christos 	 */
   7458  1.1  christos 	nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
   7459  1.1  christos 	if (nsecresult == ISC_R_SUCCESS) {
   7460  1.1  christos 		nsecnode->nsec = DNS_RBT_NSEC_NSEC;
   7461  1.1  christos 		node->nsec = DNS_RBT_NSEC_HAS_NSEC;
   7462  1.1  christos 		goto done;
   7463  1.1  christos 	}
   7464  1.1  christos 
   7465  1.1  christos 	if (nsecresult == ISC_R_EXISTS) {
   7466  1.1  christos #if 1 /* 0 */
   7467  1.1  christos 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
   7468  1.1  christos 			      DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
   7469  1.1  christos 			      "addnode: NSEC node already exists");
   7470  1.1  christos #endif /* if 1 */
   7471  1.1  christos 		node->nsec = DNS_RBT_NSEC_HAS_NSEC;
   7472  1.1  christos 		goto done;
   7473  1.1  christos 	}
   7474  1.1  christos 
   7475  1.1  christos 	if (noderesult == ISC_R_SUCCESS) {
   7476  1.1  christos 		/*
   7477  1.1  christos 		 * Remove the node we just added above.
   7478  1.1  christos 		 */
   7479  1.1  christos 		tmpresult = dns_rbt_deletenode(rbtdb->tree, node, false);
   7480  1.1  christos 		if (tmpresult != ISC_R_SUCCESS) {
   7481  1.1  christos 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
   7482  1.1  christos 				      DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
   7483  1.1  christos 				      "loading_addrdataset: "
   7484  1.1  christos 				      "dns_rbt_deletenode: %s after "
   7485  1.1  christos 				      "dns_rbt_addnode(NSEC): %s",
   7486  1.1  christos 				      isc_result_totext(tmpresult),
   7487  1.1  christos 				      isc_result_totext(noderesult));
   7488  1.1  christos 		}
   7489  1.1  christos 	}
   7490  1.1  christos 
   7491  1.1  christos 	/*
   7492  1.1  christos 	 * Set the error condition to be returned.
   7493  1.1  christos 	 */
   7494  1.1  christos 	noderesult = nsecresult;
   7495  1.1  christos 
   7496  1.1  christos done:
   7497  1.1  christos 	if (noderesult == ISC_R_SUCCESS || noderesult == ISC_R_EXISTS) {
   7498  1.1  christos 		*nodep = node;
   7499  1.1  christos 	}
   7500  1.1  christos 
   7501  1.1  christos 	return (noderesult);
   7502  1.1  christos }
   7503  1.1  christos 
   7504  1.1  christos static isc_result_t
   7505  1.1  christos loading_addrdataset(void *arg, const dns_name_t *name,
   7506  1.1  christos 		    dns_rdataset_t *rdataset) {
   7507  1.1  christos 	rbtdb_load_t *loadctx = arg;
   7508  1.1  christos 	dns_rbtdb_t *rbtdb = loadctx->rbtdb;
   7509  1.1  christos 	dns_rbtnode_t *node;
   7510  1.1  christos 	isc_result_t result;
   7511  1.1  christos 	isc_region_t region;
   7512  1.1  christos 	rdatasetheader_t *newheader;
   7513  1.1  christos 
   7514  1.1  christos 	REQUIRE(rdataset->rdclass == rbtdb->common.rdclass);
   7515  1.1  christos 
   7516  1.1  christos 	/*
   7517  1.1  christos 	 * SOA records are only allowed at top of zone.
   7518  1.1  christos 	 */
   7519  1.1  christos 	if (rdataset->type == dns_rdatatype_soa && !IS_CACHE(rbtdb) &&
   7520  1.1  christos 	    !dns_name_equal(name, &rbtdb->common.origin))
   7521  1.1  christos 	{
   7522  1.1  christos 		return (DNS_R_NOTZONETOP);
   7523  1.1  christos 	}
   7524  1.1  christos 
   7525  1.1  christos 	if (rdataset->type != dns_rdatatype_nsec3 &&
   7526  1.1  christos 	    rdataset->covers != dns_rdatatype_nsec3)
   7527  1.1  christos 	{
   7528  1.1  christos 		add_empty_wildcards(rbtdb, name, false);
   7529  1.1  christos 	}
   7530  1.1  christos 
   7531  1.1  christos 	if (dns_name_iswildcard(name)) {
   7532  1.1  christos 		/*
   7533  1.1  christos 		 * NS record owners cannot legally be wild cards.
   7534  1.1  christos 		 */
   7535  1.1  christos 		if (rdataset->type == dns_rdatatype_ns) {
   7536  1.1  christos 			return (DNS_R_INVALIDNS);
   7537  1.1  christos 		}
   7538  1.1  christos 		/*
   7539  1.1  christos 		 * NSEC3 record owners cannot legally be wild cards.
   7540  1.1  christos 		 */
   7541  1.1  christos 		if (rdataset->type == dns_rdatatype_nsec3) {
   7542  1.1  christos 			return (DNS_R_INVALIDNSEC3);
   7543  1.1  christos 		}
   7544  1.1  christos 		result = add_wildcard_magic(rbtdb, name, false);
   7545  1.1  christos 		if (result != ISC_R_SUCCESS) {
   7546  1.1  christos 			return (result);
   7547  1.1  christos 		}
   7548  1.1  christos 	}
   7549  1.1  christos 
   7550  1.1  christos 	node = NULL;
   7551  1.1  christos 	if (rdataset->type == dns_rdatatype_nsec3 ||
   7552  1.1  christos 	    rdataset->covers == dns_rdatatype_nsec3)
   7553  1.1  christos 	{
   7554  1.1  christos 		result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
   7555  1.1  christos 		if (result == ISC_R_SUCCESS) {
   7556  1.1  christos 			node->nsec = DNS_RBT_NSEC_NSEC3;
   7557  1.1  christos 		}
   7558  1.1  christos 	} else if (rdataset->type == dns_rdatatype_nsec) {
   7559  1.1  christos 		result = loadnode(rbtdb, name, &node, true);
   7560  1.1  christos 	} else {
   7561  1.1  christos 		result = loadnode(rbtdb, name, &node, false);
   7562  1.1  christos 	}
   7563  1.1  christos 	if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) {
   7564  1.1  christos 		return (result);
   7565  1.1  christos 	}
   7566  1.1  christos 	if (result == ISC_R_SUCCESS) {
   7567  1.1  christos 		node->locknum = node->hashval % rbtdb->node_lock_count;
   7568  1.1  christos 	}
   7569  1.1  christos 
   7570  1.1  christos 	result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
   7571  1.1  christos 					    &region, sizeof(rdatasetheader_t));
   7572  1.1  christos 	if (result != ISC_R_SUCCESS) {
   7573  1.1  christos 		return (result);
   7574  1.1  christos 	}
   7575  1.1  christos 	newheader = (rdatasetheader_t *)region.base;
   7576  1.1  christos 	init_rdataset(rbtdb, newheader);
   7577  1.1  christos 	set_ttl(rbtdb, newheader, rdataset->ttl + loadctx->now); /* XXX overflow
   7578  1.1  christos 								  * check */
   7579  1.1  christos 	newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
   7580  1.1  christos 						rdataset->covers);
   7581  1.1  christos 	atomic_init(&newheader->attributes, 0);
   7582  1.1  christos 	newheader->trust = rdataset->trust;
   7583  1.1  christos 	newheader->serial = 1;
   7584  1.1  christos 	newheader->noqname = NULL;
   7585  1.1  christos 	newheader->closest = NULL;
   7586  1.1  christos 	atomic_init(&newheader->count,
   7587  1.1  christos 		    atomic_fetch_add_relaxed(&init_count, 1));
   7588  1.1  christos 	newheader->last_used = 0;
   7589  1.1  christos 	newheader->node = node;
   7590  1.1  christos 	setownercase(newheader, name);
   7591  1.1  christos 
   7592  1.1  christos 	if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
   7593  1.1  christos 		RDATASET_ATTR_SET(newheader, RDATASET_ATTR_RESIGN);
   7594  1.1  christos 		newheader->resign =
   7595  1.1  christos 			(isc_stdtime_t)(dns_time64_from32(rdataset->resign) >>
   7596  1.1  christos 					1);
   7597  1.1  christos 		newheader->resign_lsb = rdataset->resign & 0x1;
   7598  1.1  christos 	} else {
   7599  1.1  christos 		newheader->resign = 0;
   7600  1.1  christos 		newheader->resign_lsb = 0;
   7601  1.1  christos 	}
   7602  1.1  christos 
   7603  1.1  christos 	NODE_LOCK(&rbtdb->node_locks[node->locknum].lock, isc_rwlocktype_write);
   7604  1.1  christos 	result = add32(rbtdb, node, name, rbtdb->current_version, newheader,
   7605  1.1  christos 		       DNS_DBADD_MERGE, true, NULL, 0);
   7606  1.1  christos 	NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
   7607  1.1  christos 		    isc_rwlocktype_write);
   7608  1.1  christos 
   7609  1.1  christos 	if (result == ISC_R_SUCCESS &&
   7610  1.1  christos 	    delegating_type(rbtdb, node, rdataset->type))
   7611  1.1  christos 	{
   7612  1.1  christos 		node->find_callback = 1;
   7613  1.1  christos 	} else if (result == DNS_R_UNCHANGED) {
   7614  1.1  christos 		result = ISC_R_SUCCESS;
   7615  1.1  christos 	}
   7616  1.1  christos 
   7617  1.1  christos 	return (result);
   7618  1.1  christos }
   7619  1.1  christos 
   7620  1.1  christos static isc_result_t
   7621  1.1  christos rbt_datafixer(dns_rbtnode_t *rbtnode, void *base, size_t filesize, void *arg,
   7622  1.1  christos 	      uint64_t *crc) {
   7623  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)arg;
   7624  1.1  christos 	rdatasetheader_t *header;
   7625  1.1  christos 	unsigned char *limit = ((unsigned char *)base) + filesize;
   7626  1.1  christos 
   7627  1.1  christos 	REQUIRE(rbtnode != NULL);
   7628  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   7629  1.1  christos 
   7630  1.1  christos 	for (header = rbtnode->data; header != NULL; header = header->next) {
   7631  1.1  christos 		unsigned char *p = (unsigned char *)header;
   7632  1.1  christos 		size_t size = dns_rdataslab_size(p, sizeof(*header));
   7633  1.1  christos 		isc_crc64_update(crc, p, size);
   7634  1.1  christos #ifdef DEBUG
   7635  1.1  christos 		hexdump("hashing header", p, sizeof(rdatasetheader_t));
   7636  1.1  christos 		hexdump("hashing slab", p + sizeof(rdatasetheader_t),
   7637  1.1  christos 			size - sizeof(rdatasetheader_t));
   7638  1.1  christos #endif /* ifdef DEBUG */
   7639  1.1  christos 		header->serial = 1;
   7640  1.1  christos 		header->is_mmapped = 1;
   7641  1.1  christos 		header->node = rbtnode;
   7642  1.1  christos 		header->node_is_relative = 0;
   7643  1.1  christos 
   7644  1.1  christos 		if (RESIGN(header) &&
   7645  1.1  christos 		    (header->resign != 0 || header->resign_lsb != 0))
   7646  1.1  christos 		{
   7647  1.1  christos 			int idx = header->node->locknum;
   7648  1.1  christos 			isc_heap_insert(rbtdb->heaps[idx], header);
   7649  1.1  christos 		}
   7650  1.1  christos 
   7651  1.1  christos 		if (header->next != NULL) {
   7652  1.1  christos 			size_t cooked = dns_rbt_serialize_align(size);
   7653  1.1  christos 			if ((uintptr_t)header->next !=
   7654  1.1  christos 			    (p - (unsigned char *)base) + cooked)
   7655  1.1  christos 			{
   7656  1.1  christos 				return (ISC_R_INVALIDFILE);
   7657  1.1  christos 			}
   7658  1.1  christos 			header->next = (rdatasetheader_t *)(p + cooked);
   7659  1.1  christos 			header->next_is_relative = 0;
   7660  1.1  christos 			if ((header->next < (rdatasetheader_t *)base) ||
   7661  1.1  christos 			    (header->next > (rdatasetheader_t *)limit))
   7662  1.1  christos 			{
   7663  1.1  christos 				return (ISC_R_INVALIDFILE);
   7664  1.1  christos 			}
   7665  1.1  christos 		}
   7666  1.1  christos 
   7667  1.1  christos 		update_recordsandxfrsize(true, rbtdb->current_version, header,
   7668  1.1  christos 					 rbtnode->fullnamelen);
   7669  1.1  christos 	}
   7670  1.1  christos 
   7671  1.1  christos 	/* We're done deserializing; clear fullnamelen */
   7672  1.1  christos 	rbtnode->fullnamelen = 0;
   7673  1.1  christos 
   7674  1.1  christos 	return (ISC_R_SUCCESS);
   7675  1.1  christos }
   7676  1.1  christos 
   7677  1.1  christos /*
   7678  1.1  christos  * Load the RBT database from the image in 'f'
   7679  1.1  christos  */
   7680  1.1  christos static isc_result_t
   7681  1.1  christos deserialize(void *arg, FILE *f, off_t offset) {
   7682  1.1  christos 	isc_result_t result;
   7683  1.1  christos 	rbtdb_load_t *loadctx = arg;
   7684  1.1  christos 	dns_rbtdb_t *rbtdb = loadctx->rbtdb;
   7685  1.1  christos 	rbtdb_file_header_t *header;
   7686  1.1  christos 	int fd;
   7687  1.1  christos 	off_t filesize = 0;
   7688  1.1  christos 	char *base;
   7689  1.1  christos 	dns_rbt_t *tree = NULL, *nsec = NULL, *nsec3 = NULL;
   7690  1.1  christos 	int protect, flags;
   7691  1.1  christos 	dns_rbtnode_t *origin_node = NULL;
   7692  1.1  christos 
   7693  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   7694  1.1  christos 
   7695  1.1  christos 	/*
   7696  1.1  christos 	 * TODO CKB: since this is read-write (had to be to add nodes later)
   7697  1.1  christos 	 * we will need to lock the file or the nodes in it before modifying
   7698  1.1  christos 	 * the nodes in the file.
   7699  1.1  christos 	 */
   7700  1.1  christos 
   7701  1.1  christos 	/* Map in the whole file in one go */
   7702  1.1  christos 	fd = fileno(f);
   7703  1.1  christos 	isc_file_getsizefd(fd, &filesize);
   7704  1.1  christos 	protect = PROT_READ | PROT_WRITE;
   7705  1.1  christos 	flags = MAP_PRIVATE;
   7706  1.1  christos #ifdef MAP_FILE
   7707  1.1  christos 	flags |= MAP_FILE;
   7708  1.1  christos #endif /* ifdef MAP_FILE */
   7709  1.1  christos 
   7710  1.1  christos 	base = isc_file_mmap(NULL, filesize, protect, flags, fd, 0);
   7711  1.1  christos 	if (base == NULL || base == MAP_FAILED) {
   7712  1.1  christos 		return (ISC_R_FAILURE);
   7713  1.1  christos 	}
   7714  1.1  christos 
   7715  1.1  christos 	header = (rbtdb_file_header_t *)(base + offset);
   7716  1.1  christos 	if (!match_header_version(header)) {
   7717  1.1  christos 		result = ISC_R_INVALIDFILE;
   7718  1.1  christos 		goto cleanup;
   7719  1.1  christos 	}
   7720  1.1  christos 
   7721  1.1  christos 	if (header->tree != 0) {
   7722  1.1  christos 		result = dns_rbt_deserialize_tree(
   7723  1.1  christos 			base, filesize, (off_t)header->tree, rbtdb->common.mctx,
   7724  1.1  christos 			delete_callback, rbtdb, rbt_datafixer, rbtdb, NULL,
   7725  1.1  christos 			&tree);
   7726  1.1  christos 		if (result != ISC_R_SUCCESS) {
   7727  1.1  christos 			goto cleanup;
   7728  1.1  christos 		}
   7729  1.1  christos 
   7730  1.1  christos 		result = dns_rbt_findnode(tree, &rbtdb->common.origin, NULL,
   7731  1.1  christos 					  &origin_node, NULL,
   7732  1.1  christos 					  DNS_RBTFIND_EMPTYDATA, NULL, NULL);
   7733  1.1  christos 		if (result != ISC_R_SUCCESS) {
   7734  1.1  christos 			goto cleanup;
   7735  1.1  christos 		}
   7736  1.1  christos 	}
   7737  1.1  christos 
   7738  1.1  christos 	if (header->nsec != 0) {
   7739  1.1  christos 		result = dns_rbt_deserialize_tree(
   7740  1.1  christos 			base, filesize, (off_t)header->nsec, rbtdb->common.mctx,
   7741  1.1  christos 			delete_callback, rbtdb, rbt_datafixer, rbtdb, NULL,
   7742  1.1  christos 			&nsec);
   7743  1.1  christos 		if (result != ISC_R_SUCCESS) {
   7744  1.1  christos 			goto cleanup;
   7745  1.1  christos 		}
   7746  1.1  christos 	}
   7747  1.1  christos 
   7748  1.1  christos 	if (header->nsec3 != 0) {
   7749  1.1  christos 		result = dns_rbt_deserialize_tree(
   7750  1.1  christos 			base, filesize, (off_t)header->nsec3,
   7751  1.1  christos 			rbtdb->common.mctx, delete_callback, rbtdb,
   7752  1.1  christos 			rbt_datafixer, rbtdb, NULL, &nsec3);
   7753  1.1  christos 		if (result != ISC_R_SUCCESS) {
   7754  1.1  christos 			goto cleanup;
   7755  1.1  christos 		}
   7756  1.1  christos 	}
   7757  1.1  christos 
   7758  1.1  christos 	/*
   7759  1.1  christos 	 * We have a successfully loaded all the rbt trees now update
   7760  1.1  christos 	 * rbtdb to use them.
   7761  1.1  christos 	 */
   7762  1.1  christos 
   7763  1.1  christos 	rbtdb->mmap_location = base;
   7764  1.1  christos 	rbtdb->mmap_size = (size_t)filesize;
   7765  1.1  christos 
   7766  1.1  christos 	if (tree != NULL) {
   7767  1.1  christos 		dns_rbt_destroy(&rbtdb->tree);
   7768  1.1  christos 		rbtdb->tree = tree;
   7769  1.1  christos 		rbtdb->origin_node = origin_node;
   7770  1.1  christos 	}
   7771  1.1  christos 
   7772  1.1  christos 	if (nsec != NULL) {
   7773  1.1  christos 		dns_rbt_destroy(&rbtdb->nsec);
   7774  1.1  christos 		rbtdb->nsec = nsec;
   7775  1.1  christos 	}
   7776  1.1  christos 
   7777  1.1  christos 	if (nsec3 != NULL) {
   7778  1.1  christos 		dns_rbt_destroy(&rbtdb->nsec3);
   7779  1.1  christos 		rbtdb->nsec3 = nsec3;
   7780  1.1  christos 	}
   7781  1.1  christos 
   7782  1.1  christos 	return (ISC_R_SUCCESS);
   7783  1.1  christos 
   7784  1.1  christos cleanup:
   7785  1.1  christos 	if (tree != NULL) {
   7786  1.1  christos 		dns_rbt_destroy(&tree);
   7787  1.1  christos 	}
   7788  1.1  christos 	if (nsec != NULL) {
   7789  1.1  christos 		dns_rbt_destroy(&nsec);
   7790  1.1  christos 	}
   7791  1.1  christos 	if (nsec3 != NULL) {
   7792  1.1  christos 		dns_rbt_destroy(&nsec3);
   7793  1.1  christos 	}
   7794  1.1  christos 	isc_file_munmap(base, (size_t)filesize);
   7795  1.1  christos 	return (result);
   7796  1.1  christos }
   7797  1.1  christos 
   7798  1.1  christos static isc_result_t
   7799  1.1  christos beginload(dns_db_t *db, dns_rdatacallbacks_t *callbacks) {
   7800  1.1  christos 	rbtdb_load_t *loadctx;
   7801  1.1  christos 	dns_rbtdb_t *rbtdb;
   7802  1.1  christos 	rbtdb = (dns_rbtdb_t *)db;
   7803  1.1  christos 
   7804  1.1  christos 	REQUIRE(DNS_CALLBACK_VALID(callbacks));
   7805  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   7806  1.1  christos 
   7807  1.1  christos 	loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
   7808  1.1  christos 
   7809  1.1  christos 	loadctx->rbtdb = rbtdb;
   7810  1.1  christos 	if (IS_CACHE(rbtdb)) {
   7811  1.1  christos 		isc_stdtime_get(&loadctx->now);
   7812  1.1  christos 	} else {
   7813  1.1  christos 		loadctx->now = 0;
   7814  1.1  christos 	}
   7815  1.1  christos 
   7816  1.1  christos 	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
   7817  1.1  christos 
   7818  1.1  christos 	REQUIRE((rbtdb->attributes &
   7819  1.1  christos 		 (RBTDB_ATTR_LOADED | RBTDB_ATTR_LOADING)) == 0);
   7820  1.1  christos 	rbtdb->attributes |= RBTDB_ATTR_LOADING;
   7821  1.1  christos 
   7822  1.1  christos 	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
   7823  1.1  christos 
   7824  1.1  christos 	callbacks->add = loading_addrdataset;
   7825  1.1  christos 	callbacks->add_private = loadctx;
   7826  1.1  christos 	callbacks->deserialize = deserialize;
   7827  1.1  christos 	callbacks->deserialize_private = loadctx;
   7828  1.1  christos 
   7829  1.1  christos 	return (ISC_R_SUCCESS);
   7830  1.1  christos }
   7831  1.1  christos 
   7832  1.1  christos static isc_result_t
   7833  1.1  christos endload(dns_db_t *db, dns_rdatacallbacks_t *callbacks) {
   7834  1.1  christos 	rbtdb_load_t *loadctx;
   7835  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   7836  1.1  christos 
   7837  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   7838  1.1  christos 	REQUIRE(DNS_CALLBACK_VALID(callbacks));
   7839  1.1  christos 	loadctx = callbacks->add_private;
   7840  1.1  christos 	REQUIRE(loadctx != NULL);
   7841  1.1  christos 	REQUIRE(loadctx->rbtdb == rbtdb);
   7842  1.1  christos 
   7843  1.1  christos 	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
   7844  1.1  christos 
   7845  1.1  christos 	REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
   7846  1.1  christos 	REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
   7847  1.1  christos 
   7848  1.1  christos 	rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
   7849  1.1  christos 	rbtdb->attributes |= RBTDB_ATTR_LOADED;
   7850  1.1  christos 
   7851  1.1  christos 	/*
   7852  1.1  christos 	 * If there's a KEY rdataset at the zone origin containing a
   7853  1.1  christos 	 * zone key, we consider the zone secure.
   7854  1.1  christos 	 */
   7855  1.1  christos 	if (!IS_CACHE(rbtdb) && rbtdb->origin_node != NULL) {
   7856  1.1  christos 		dns_dbversion_t *version = rbtdb->current_version;
   7857  1.1  christos 		RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
   7858  1.1  christos 		iszonesecure(db, version, rbtdb->origin_node);
   7859  1.1  christos 	} else {
   7860  1.1  christos 		RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
   7861  1.1  christos 	}
   7862  1.1  christos 
   7863  1.1  christos 	callbacks->add = NULL;
   7864  1.1  christos 	callbacks->add_private = NULL;
   7865  1.1  christos 	callbacks->deserialize = NULL;
   7866  1.1  christos 	callbacks->deserialize_private = NULL;
   7867  1.1  christos 
   7868  1.1  christos 	isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
   7869  1.1  christos 
   7870  1.1  christos 	return (ISC_R_SUCCESS);
   7871  1.1  christos }
   7872  1.1  christos 
   7873  1.1  christos /*
   7874  1.1  christos  * helper function to handle writing out the rdataset data pointed to
   7875  1.1  christos  * by the void *data pointer in the dns_rbtnode
   7876  1.1  christos  */
   7877  1.1  christos static isc_result_t
   7878  1.1  christos rbt_datawriter(FILE *rbtfile, unsigned char *data, void *arg, uint64_t *crc) {
   7879  1.1  christos 	rbtdb_version_t *version = (rbtdb_version_t *)arg;
   7880  1.1  christos 	rbtdb_serial_t serial;
   7881  1.1  christos 	rdatasetheader_t newheader;
   7882  1.1  christos 	rdatasetheader_t *header = (rdatasetheader_t *)data, *next;
   7883  1.1  christos 	off_t where;
   7884  1.1  christos 	size_t cooked, size;
   7885  1.1  christos 	unsigned char *p;
   7886  1.1  christos 	isc_result_t result = ISC_R_SUCCESS;
   7887  1.1  christos 	char pad[sizeof(char *)];
   7888  1.1  christos 	uintptr_t off;
   7889  1.1  christos 
   7890  1.1  christos 	REQUIRE(rbtfile != NULL);
   7891  1.1  christos 	REQUIRE(data != NULL);
   7892  1.1  christos 	REQUIRE(version != NULL);
   7893  1.1  christos 
   7894  1.1  christos 	serial = version->serial;
   7895  1.1  christos 
   7896  1.1  christos 	for (; header != NULL; header = next) {
   7897  1.1  christos 		next = header->next;
   7898  1.1  christos 		do {
   7899  1.1  christos 			if (header->serial <= serial && !IGNORE(header)) {
   7900  1.1  christos 				if (NONEXISTENT(header)) {
   7901  1.1  christos 					header = NULL;
   7902  1.1  christos 				}
   7903  1.1  christos 				break;
   7904  1.1  christos 			} else {
   7905  1.1  christos 				header = header->down;
   7906  1.1  christos 			}
   7907  1.1  christos 		} while (header != NULL);
   7908  1.1  christos 
   7909  1.1  christos 		if (header == NULL) {
   7910  1.1  christos 			continue;
   7911  1.1  christos 		}
   7912  1.1  christos 
   7913  1.1  christos 		CHECK(isc_stdio_tell(rbtfile, &where));
   7914  1.1  christos 		size = dns_rdataslab_size((unsigned char *)header,
   7915  1.1  christos 					  sizeof(rdatasetheader_t));
   7916  1.1  christos 
   7917  1.1  christos 		p = (unsigned char *)header;
   7918  1.1  christos 		memmove(&newheader, p, sizeof(rdatasetheader_t));
   7919  1.1  christos 		newheader.down = NULL;
   7920  1.1  christos 		newheader.next = NULL;
   7921  1.1  christos 		off = where;
   7922  1.1  christos 		if ((off_t)off != where) {
   7923  1.1  christos 			return (ISC_R_RANGE);
   7924  1.1  christos 		}
   7925  1.1  christos 		newheader.node = (dns_rbtnode_t *)off;
   7926  1.1  christos 		newheader.node_is_relative = 1;
   7927  1.1  christos 		newheader.serial = 1;
   7928  1.1  christos 
   7929  1.1  christos 		/*
   7930  1.1  christos 		 * Round size up to the next pointer sized offset so it
   7931  1.1  christos 		 * will be properly aligned when read back in.
   7932  1.1  christos 		 */
   7933  1.1  christos 		cooked = dns_rbt_serialize_align(size);
   7934  1.1  christos 		if (next != NULL) {
   7935  1.1  christos 			newheader.next = (rdatasetheader_t *)(off + cooked);
   7936  1.1  christos 			newheader.next_is_relative = 1;
   7937  1.1  christos 		}
   7938  1.1  christos 
   7939  1.1  christos #ifdef DEBUG
   7940  1.1  christos 		hexdump("writing header", (unsigned char *)&newheader,
   7941  1.1  christos 			sizeof(rdatasetheader_t));
   7942  1.1  christos 		hexdump("writing slab", p + sizeof(rdatasetheader_t),
   7943  1.1  christos 			size - sizeof(rdatasetheader_t));
   7944  1.1  christos #endif /* ifdef DEBUG */
   7945  1.1  christos 		isc_crc64_update(crc, (unsigned char *)&newheader,
   7946  1.1  christos 				 sizeof(rdatasetheader_t));
   7947  1.1  christos 		CHECK(isc_stdio_write(&newheader, sizeof(rdatasetheader_t), 1,
   7948  1.1  christos 				      rbtfile, NULL));
   7949  1.1  christos 
   7950  1.1  christos 		isc_crc64_update(crc, p + sizeof(rdatasetheader_t),
   7951  1.1  christos 				 size - sizeof(rdatasetheader_t));
   7952  1.1  christos 		CHECK(isc_stdio_write(p + sizeof(rdatasetheader_t),
   7953  1.1  christos 				      size - sizeof(rdatasetheader_t), 1,
   7954  1.1  christos 				      rbtfile, NULL));
   7955  1.1  christos 		/*
   7956  1.1  christos 		 * Pad to force alignment.
   7957  1.1  christos 		 */
   7958  1.1  christos 		if (size != (size_t)cooked) {
   7959  1.1  christos 			memset(pad, 0, sizeof(pad));
   7960  1.1  christos 			CHECK(isc_stdio_write(pad, cooked - size, 1, rbtfile,
   7961  1.1  christos 					      NULL));
   7962  1.1  christos 		}
   7963  1.1  christos 	}
   7964  1.1  christos 
   7965  1.1  christos failure:
   7966  1.1  christos 	return (result);
   7967  1.1  christos }
   7968  1.1  christos 
   7969  1.1  christos /*
   7970  1.1  christos  * Write out a zeroed header as a placeholder.  Doing this ensures
   7971  1.1  christos  * that the file will not read while it is partially written, should
   7972  1.1  christos  * writing fail or be interrupted.
   7973  1.1  christos  */
   7974  1.1  christos static isc_result_t
   7975  1.1  christos rbtdb_zero_header(FILE *rbtfile) {
   7976  1.1  christos 	char buffer[RBTDB_HEADER_LENGTH];
   7977  1.1  christos 	isc_result_t result;
   7978  1.1  christos 
   7979  1.1  christos 	memset(buffer, 0, RBTDB_HEADER_LENGTH);
   7980  1.1  christos 	result = isc_stdio_write(buffer, 1, RBTDB_HEADER_LENGTH, rbtfile, NULL);
   7981  1.1  christos 	fflush(rbtfile);
   7982  1.1  christos 
   7983  1.1  christos 	return (result);
   7984  1.1  christos }
   7985  1.1  christos 
   7986  1.1  christos static isc_once_t once = ISC_ONCE_INIT;
   7987  1.1  christos 
   7988  1.1  christos static void
   7989  1.1  christos init_file_version(void) {
   7990  1.1  christos 	int n;
   7991  1.1  christos 
   7992  1.1  christos 	memset(FILE_VERSION, 0, sizeof(FILE_VERSION));
   7993  1.1  christos 	n = snprintf(FILE_VERSION, sizeof(FILE_VERSION), "RBTDB Image %s %s",
   7994  1.1  christos 		     dns_major, dns_mapapi);
   7995  1.1  christos 	INSIST(n > 0 && (unsigned int)n < sizeof(FILE_VERSION));
   7996  1.1  christos }
   7997  1.1  christos 
   7998  1.1  christos /*
   7999  1.1  christos  * Write the file header out, recording the locations of the three
   8000  1.1  christos  * RBT's used in the rbtdb: tree, nsec, and nsec3, and including NodeDump
   8001  1.1  christos  * version information and any information stored in the rbtdb object
   8002  1.1  christos  * itself that should be stored here.
   8003  1.1  christos  */
   8004  1.1  christos static isc_result_t
   8005  1.1  christos rbtdb_write_header(FILE *rbtfile, off_t tree_location, off_t nsec_location,
   8006  1.1  christos 		   off_t nsec3_location) {
   8007  1.1  christos 	rbtdb_file_header_t header;
   8008  1.1  christos 	isc_result_t result;
   8009  1.1  christos 
   8010  1.1  christos 	RUNTIME_CHECK(isc_once_do(&once, init_file_version) == ISC_R_SUCCESS);
   8011  1.1  christos 
   8012  1.1  christos 	memset(&header, 0, sizeof(rbtdb_file_header_t));
   8013  1.1  christos 	memmove(header.version1, FILE_VERSION, sizeof(header.version1));
   8014  1.1  christos 	memmove(header.version2, FILE_VERSION, sizeof(header.version2));
   8015  1.1  christos 	header.ptrsize = (uint32_t)sizeof(void *);
   8016  1.1  christos 	header.bigendian = (1 == htonl(1)) ? 1 : 0;
   8017  1.1  christos 	header.tree = (uint64_t)tree_location;
   8018  1.1  christos 	header.nsec = (uint64_t)nsec_location;
   8019  1.1  christos 	header.nsec3 = (uint64_t)nsec3_location;
   8020  1.1  christos 	result = isc_stdio_write(&header, 1, sizeof(rbtdb_file_header_t),
   8021  1.1  christos 				 rbtfile, NULL);
   8022  1.1  christos 	fflush(rbtfile);
   8023  1.1  christos 
   8024  1.1  christos 	return (result);
   8025  1.1  christos }
   8026  1.1  christos 
   8027  1.1  christos static bool
   8028  1.1  christos match_header_version(rbtdb_file_header_t *header) {
   8029  1.1  christos 	RUNTIME_CHECK(isc_once_do(&once, init_file_version) == ISC_R_SUCCESS);
   8030  1.1  christos 
   8031  1.1  christos 	if (memcmp(header->version1, FILE_VERSION, sizeof(header->version1)) !=
   8032  1.1  christos 		    0 ||
   8033  1.1  christos 	    memcmp(header->version2, FILE_VERSION, sizeof(header->version1)) !=
   8034  1.1  christos 		    0)
   8035  1.1  christos 	{
   8036  1.1  christos 		return (false);
   8037  1.1  christos 	}
   8038  1.1  christos 
   8039  1.1  christos 	return (true);
   8040  1.1  christos }
   8041  1.1  christos 
   8042  1.1  christos static isc_result_t
   8043  1.1  christos serialize(dns_db_t *db, dns_dbversion_t *ver, FILE *rbtfile) {
   8044  1.1  christos 	rbtdb_version_t *version = (rbtdb_version_t *)ver;
   8045  1.1  christos 	dns_rbtdb_t *rbtdb;
   8046  1.1  christos 	isc_result_t result;
   8047  1.1  christos 	off_t tree_location, nsec_location, nsec3_location, header_location;
   8048  1.1  christos 
   8049  1.1  christos 	rbtdb = (dns_rbtdb_t *)db;
   8050  1.1  christos 
   8051  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8052  1.1  christos 	REQUIRE(rbtfile != NULL);
   8053  1.1  christos 
   8054  1.1  christos 	/* Ensure we're writing to a plain file */
   8055  1.1  christos 	CHECK(isc_file_isplainfilefd(fileno(rbtfile)));
   8056  1.1  christos 
   8057  1.1  christos 	/*
   8058  1.1  christos 	 * first, write out a zeroed header to store rbtdb information
   8059  1.1  christos 	 *
   8060  1.1  christos 	 * then for each of the three trees, store the current position
   8061  1.1  christos 	 * in the file and call dns_rbt_serialize_tree
   8062  1.1  christos 	 *
   8063  1.1  christos 	 * finally, write out the rbtdb header, storing the locations of the
   8064  1.1  christos 	 * rbtheaders
   8065  1.1  christos 	 *
   8066  1.1  christos 	 * NOTE: need to do something better with the return codes, &= will
   8067  1.1  christos 	 * not work.
   8068  1.1  christos 	 */
   8069  1.1  christos 	CHECK(isc_stdio_tell(rbtfile, &header_location));
   8070  1.1  christos 	CHECK(rbtdb_zero_header(rbtfile));
   8071  1.1  christos 	CHECK(dns_rbt_serialize_tree(rbtfile, rbtdb->tree, rbt_datawriter,
   8072  1.1  christos 				     version, &tree_location));
   8073  1.1  christos 	CHECK(dns_rbt_serialize_tree(rbtfile, rbtdb->nsec, rbt_datawriter,
   8074  1.1  christos 				     version, &nsec_location));
   8075  1.1  christos 	CHECK(dns_rbt_serialize_tree(rbtfile, rbtdb->nsec3, rbt_datawriter,
   8076  1.1  christos 				     version, &nsec3_location));
   8077  1.1  christos 
   8078  1.1  christos 	CHECK(isc_stdio_seek(rbtfile, header_location, SEEK_SET));
   8079  1.1  christos 	CHECK(rbtdb_write_header(rbtfile, tree_location, nsec_location,
   8080  1.1  christos 				 nsec3_location));
   8081  1.1  christos failure:
   8082  1.1  christos 	return (result);
   8083  1.1  christos }
   8084  1.1  christos 
   8085  1.1  christos static isc_result_t
   8086  1.1  christos dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
   8087  1.1  christos      dns_masterformat_t masterformat) {
   8088  1.1  christos 	dns_rbtdb_t *rbtdb;
   8089  1.1  christos 	rbtdb_version_t *rbtversion = version;
   8090  1.1  christos 
   8091  1.1  christos 	rbtdb = (dns_rbtdb_t *)db;
   8092  1.1  christos 
   8093  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8094  1.1  christos 	INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
   8095  1.1  christos 
   8096  1.1  christos 	return (dns_master_dump(rbtdb->common.mctx, db, version,
   8097  1.1  christos 				&dns_master_style_default, filename,
   8098  1.1  christos 				masterformat, NULL));
   8099  1.1  christos }
   8100  1.1  christos 
   8101  1.1  christos static void
   8102  1.1  christos delete_callback(void *data, void *arg) {
   8103  1.1  christos 	dns_rbtdb_t *rbtdb = arg;
   8104  1.1  christos 	rdatasetheader_t *current, *next;
   8105  1.1  christos 	unsigned int locknum;
   8106  1.1  christos 
   8107  1.1  christos 	current = data;
   8108  1.1  christos 	locknum = current->node->locknum;
   8109  1.1  christos 	NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
   8110  1.1  christos 	while (current != NULL) {
   8111  1.1  christos 		next = current->next;
   8112  1.1  christos 		free_rdataset(rbtdb, rbtdb->common.mctx, current);
   8113  1.1  christos 		current = next;
   8114  1.1  christos 	}
   8115  1.1  christos 	NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
   8116  1.1  christos }
   8117  1.1  christos 
   8118  1.1  christos static bool
   8119  1.1  christos issecure(dns_db_t *db) {
   8120  1.1  christos 	dns_rbtdb_t *rbtdb;
   8121  1.1  christos 	bool secure;
   8122  1.1  christos 
   8123  1.1  christos 	rbtdb = (dns_rbtdb_t *)db;
   8124  1.1  christos 
   8125  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8126  1.1  christos 
   8127  1.1  christos 	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
   8128  1.1  christos 	secure = (rbtdb->current_version->secure == dns_db_secure);
   8129  1.1  christos 	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
   8130  1.1  christos 
   8131  1.1  christos 	return (secure);
   8132  1.1  christos }
   8133  1.1  christos 
   8134  1.1  christos static bool
   8135  1.1  christos isdnssec(dns_db_t *db) {
   8136  1.1  christos 	dns_rbtdb_t *rbtdb;
   8137  1.1  christos 	bool dnssec;
   8138  1.1  christos 
   8139  1.1  christos 	rbtdb = (dns_rbtdb_t *)db;
   8140  1.1  christos 
   8141  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8142  1.1  christos 
   8143  1.1  christos 	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
   8144  1.1  christos 	dnssec = (rbtdb->current_version->secure != dns_db_insecure);
   8145  1.1  christos 	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
   8146  1.1  christos 
   8147  1.1  christos 	return (dnssec);
   8148  1.1  christos }
   8149  1.1  christos 
   8150  1.1  christos static unsigned int
   8151  1.1  christos nodecount(dns_db_t *db) {
   8152  1.1  christos 	dns_rbtdb_t *rbtdb;
   8153  1.1  christos 	unsigned int count;
   8154  1.1  christos 
   8155  1.1  christos 	rbtdb = (dns_rbtdb_t *)db;
   8156  1.1  christos 
   8157  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8158  1.1  christos 
   8159  1.1  christos 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   8160  1.1  christos 	count = dns_rbt_nodecount(rbtdb->tree);
   8161  1.1  christos 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   8162  1.1  christos 
   8163  1.1  christos 	return (count);
   8164  1.1  christos }
   8165  1.1  christos 
   8166  1.1  christos static size_t
   8167  1.1  christos hashsize(dns_db_t *db) {
   8168  1.1  christos 	dns_rbtdb_t *rbtdb;
   8169  1.1  christos 	size_t size;
   8170  1.1  christos 
   8171  1.1  christos 	rbtdb = (dns_rbtdb_t *)db;
   8172  1.1  christos 
   8173  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8174  1.1  christos 
   8175  1.1  christos 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   8176  1.1  christos 	size = dns_rbt_hashsize(rbtdb->tree);
   8177  1.1  christos 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   8178  1.1  christos 
   8179  1.1  christos 	return (size);
   8180  1.1  christos }
   8181  1.1  christos 
   8182  1.1  christos static isc_result_t
   8183  1.1  christos adjusthashsize(dns_db_t *db, size_t size) {
   8184  1.1  christos 	isc_result_t result;
   8185  1.1  christos 	dns_rbtdb_t *rbtdb;
   8186  1.1  christos 
   8187  1.1  christos 	rbtdb = (dns_rbtdb_t *)db;
   8188  1.1  christos 
   8189  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8190  1.1  christos 
   8191  1.1  christos 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
   8192  1.1  christos 	result = dns_rbt_adjusthashsize(rbtdb->tree, size);
   8193  1.1  christos 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
   8194  1.1  christos 
   8195  1.1  christos 	return (result);
   8196  1.1  christos }
   8197  1.1  christos 
   8198  1.1  christos static void
   8199  1.1  christos settask(dns_db_t *db, isc_task_t *task) {
   8200  1.1  christos 	dns_rbtdb_t *rbtdb;
   8201  1.1  christos 
   8202  1.1  christos 	rbtdb = (dns_rbtdb_t *)db;
   8203  1.1  christos 
   8204  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8205  1.1  christos 
   8206  1.1  christos 	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
   8207  1.1  christos 	if (rbtdb->task != NULL) {
   8208  1.1  christos 		isc_task_detach(&rbtdb->task);
   8209  1.1  christos 	}
   8210  1.1  christos 	if (task != NULL) {
   8211  1.1  christos 		isc_task_attach(task, &rbtdb->task);
   8212  1.1  christos 	}
   8213  1.1  christos 	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
   8214  1.1  christos }
   8215  1.1  christos 
   8216  1.1  christos static bool
   8217  1.1  christos ispersistent(dns_db_t *db) {
   8218  1.1  christos 	UNUSED(db);
   8219  1.1  christos 	return (false);
   8220  1.1  christos }
   8221  1.1  christos 
   8222  1.1  christos static isc_result_t
   8223  1.1  christos getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
   8224  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   8225  1.1  christos 	dns_rbtnode_t *onode;
   8226  1.1  christos 	isc_result_t result = ISC_R_SUCCESS;
   8227  1.1  christos 
   8228  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8229  1.1  christos 	REQUIRE(nodep != NULL && *nodep == NULL);
   8230  1.1  christos 
   8231  1.1  christos 	/* Note that the access to origin_node doesn't require a DB lock */
   8232  1.1  christos 	onode = (dns_rbtnode_t *)rbtdb->origin_node;
   8233  1.1  christos 	if (onode != NULL) {
   8234  1.1  christos 		new_reference(rbtdb, onode, isc_rwlocktype_none);
   8235  1.1  christos 		*nodep = rbtdb->origin_node;
   8236  1.1  christos 	} else {
   8237  1.1  christos 		INSIST(IS_CACHE(rbtdb));
   8238  1.1  christos 		result = ISC_R_NOTFOUND;
   8239  1.1  christos 	}
   8240  1.1  christos 
   8241  1.1  christos 	return (result);
   8242  1.1  christos }
   8243  1.1  christos 
   8244  1.1  christos static isc_result_t
   8245  1.1  christos getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
   8246  1.1  christos 		   uint8_t *flags, uint16_t *iterations, unsigned char *salt,
   8247  1.1  christos 		   size_t *salt_length) {
   8248  1.1  christos 	dns_rbtdb_t *rbtdb;
   8249  1.1  christos 	isc_result_t result = ISC_R_NOTFOUND;
   8250  1.1  christos 	rbtdb_version_t *rbtversion = version;
   8251  1.1  christos 
   8252  1.1  christos 	rbtdb = (dns_rbtdb_t *)db;
   8253  1.1  christos 
   8254  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8255  1.1  christos 	INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
   8256  1.1  christos 
   8257  1.1  christos 	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
   8258  1.1  christos 	if (rbtversion == NULL) {
   8259  1.1  christos 		rbtversion = rbtdb->current_version;
   8260  1.1  christos 	}
   8261  1.1  christos 
   8262  1.1  christos 	if (rbtversion->havensec3) {
   8263  1.1  christos 		if (hash != NULL) {
   8264  1.1  christos 			*hash = rbtversion->hash;
   8265  1.1  christos 		}
   8266  1.1  christos 		if (salt != NULL && salt_length != NULL) {
   8267  1.1  christos 			REQUIRE(*salt_length >= rbtversion->salt_length);
   8268  1.1  christos 			memmove(salt, rbtversion->salt,
   8269  1.1  christos 				rbtversion->salt_length);
   8270  1.1  christos 		}
   8271  1.1  christos 		if (salt_length != NULL) {
   8272  1.1  christos 			*salt_length = rbtversion->salt_length;
   8273  1.1  christos 		}
   8274  1.1  christos 		if (iterations != NULL) {
   8275  1.1  christos 			*iterations = rbtversion->iterations;
   8276  1.1  christos 		}
   8277  1.1  christos 		if (flags != NULL) {
   8278  1.1  christos 			*flags = rbtversion->flags;
   8279  1.1  christos 		}
   8280  1.1  christos 		result = ISC_R_SUCCESS;
   8281  1.1  christos 	}
   8282  1.1  christos 	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
   8283  1.1  christos 
   8284  1.1  christos 	return (result);
   8285  1.1  christos }
   8286  1.1  christos 
   8287  1.1  christos static isc_result_t
   8288  1.1  christos getsize(dns_db_t *db, dns_dbversion_t *version, uint64_t *records,
   8289  1.1  christos 	uint64_t *xfrsize) {
   8290  1.1  christos 	dns_rbtdb_t *rbtdb;
   8291  1.1  christos 	isc_result_t result = ISC_R_SUCCESS;
   8292  1.1  christos 	rbtdb_version_t *rbtversion = version;
   8293  1.1  christos 
   8294  1.1  christos 	rbtdb = (dns_rbtdb_t *)db;
   8295  1.1  christos 
   8296  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8297  1.1  christos 	INSIST(rbtversion == NULL || rbtversion->rbtdb == rbtdb);
   8298  1.1  christos 
   8299  1.1  christos 	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
   8300  1.1  christos 	if (rbtversion == NULL) {
   8301  1.1  christos 		rbtversion = rbtdb->current_version;
   8302  1.1  christos 	}
   8303  1.1  christos 
   8304  1.1  christos 	RWLOCK(&rbtversion->rwlock, isc_rwlocktype_read);
   8305  1.1  christos 	if (records != NULL) {
   8306  1.1  christos 		*records = rbtversion->records;
   8307  1.1  christos 	}
   8308  1.1  christos 
   8309  1.1  christos 	if (xfrsize != NULL) {
   8310  1.1  christos 		*xfrsize = rbtversion->xfrsize;
   8311  1.1  christos 	}
   8312  1.1  christos 	RWUNLOCK(&rbtversion->rwlock, isc_rwlocktype_read);
   8313  1.1  christos 	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
   8314  1.1  christos 
   8315  1.1  christos 	return (result);
   8316  1.1  christos }
   8317  1.1  christos 
   8318  1.1  christos static isc_result_t
   8319  1.1  christos setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
   8320  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   8321  1.1  christos 	rdatasetheader_t *header, oldheader;
   8322  1.1  christos 
   8323  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8324  1.1  christos 	REQUIRE(!IS_CACHE(rbtdb));
   8325  1.1  christos 	REQUIRE(rdataset != NULL);
   8326  1.1  christos 
   8327  1.1  christos 	header = rdataset->private3;
   8328  1.1  christos 	header--;
   8329  1.1  christos 
   8330  1.1  christos 	NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
   8331  1.1  christos 		  isc_rwlocktype_write);
   8332  1.1  christos 
   8333  1.1  christos 	oldheader = *header;
   8334  1.1  christos 	/*
   8335  1.1  christos 	 * Only break the heap invariant (by adjusting resign and resign_lsb)
   8336  1.1  christos 	 * if we are going to be restoring it by calling isc_heap_increased
   8337  1.1  christos 	 * or isc_heap_decreased.
   8338  1.1  christos 	 */
   8339  1.1  christos 	if (resign != 0) {
   8340  1.1  christos 		header->resign = (isc_stdtime_t)(dns_time64_from32(resign) >>
   8341  1.1  christos 						 1);
   8342  1.1  christos 		header->resign_lsb = resign & 0x1;
   8343  1.1  christos 	}
   8344  1.1  christos 	if (header->heap_index != 0) {
   8345  1.1  christos 		INSIST(RESIGN(header));
   8346  1.1  christos 		if (resign == 0) {
   8347  1.1  christos 			isc_heap_delete(rbtdb->heaps[header->node->locknum],
   8348  1.1  christos 					header->heap_index);
   8349  1.1  christos 			header->heap_index = 0;
   8350  1.1  christos 		} else if (resign_sooner(header, &oldheader)) {
   8351  1.1  christos 			isc_heap_increased(rbtdb->heaps[header->node->locknum],
   8352  1.1  christos 					   header->heap_index);
   8353  1.1  christos 		} else if (resign_sooner(&oldheader, header)) {
   8354  1.1  christos 			isc_heap_decreased(rbtdb->heaps[header->node->locknum],
   8355  1.1  christos 					   header->heap_index);
   8356  1.1  christos 		}
   8357  1.1  christos 	} else if (resign != 0) {
   8358  1.1  christos 		RDATASET_ATTR_SET(header, RDATASET_ATTR_RESIGN);
   8359  1.1  christos 		resign_insert(rbtdb, header->node->locknum, header);
   8360  1.1  christos 	}
   8361  1.1  christos 	NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
   8362  1.1  christos 		    isc_rwlocktype_write);
   8363  1.1  christos 	return (ISC_R_SUCCESS);
   8364  1.1  christos }
   8365  1.1  christos 
   8366  1.1  christos static isc_result_t
   8367  1.1  christos getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, dns_name_t *foundname) {
   8368  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   8369  1.1  christos 	rdatasetheader_t *header = NULL, *this;
   8370  1.1  christos 	unsigned int i;
   8371  1.1  christos 	isc_result_t result = ISC_R_NOTFOUND;
   8372  1.1  christos 	unsigned int locknum = 0;
   8373  1.1  christos 
   8374  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8375  1.1  christos 
   8376  1.1  christos 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   8377  1.1  christos 
   8378  1.1  christos 	for (i = 0; i < rbtdb->node_lock_count; i++) {
   8379  1.1  christos 		NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
   8380  1.1  christos 
   8381  1.1  christos 		/*
   8382  1.1  christos 		 * Find for the earliest signing time among all of the
   8383  1.1  christos 		 * heaps, each of which is covered by a different bucket
   8384  1.1  christos 		 * lock.
   8385  1.1  christos 		 */
   8386  1.1  christos 		this = isc_heap_element(rbtdb->heaps[i], 1);
   8387  1.1  christos 		if (this == NULL) {
   8388  1.1  christos 			/* Nothing found; unlock and try the next heap. */
   8389  1.1  christos 			NODE_UNLOCK(&rbtdb->node_locks[i].lock,
   8390  1.1  christos 				    isc_rwlocktype_read);
   8391  1.1  christos 			continue;
   8392  1.1  christos 		}
   8393  1.1  christos 
   8394  1.1  christos 		if (header == NULL) {
   8395  1.1  christos 			/*
   8396  1.1  christos 			 * Found a signing time: retain the bucket lock and
   8397  1.1  christos 			 * preserve the lock number so we can unlock it
   8398  1.1  christos 			 * later.
   8399  1.1  christos 			 */
   8400  1.1  christos 			header = this;
   8401  1.1  christos 			locknum = i;
   8402  1.1  christos 		} else if (resign_sooner(this, header)) {
   8403  1.1  christos 			/*
   8404  1.1  christos 			 * Found an earlier signing time; release the
   8405  1.1  christos 			 * previous bucket lock and retain this one instead.
   8406  1.1  christos 			 */
   8407  1.1  christos 			NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
   8408  1.1  christos 				    isc_rwlocktype_read);
   8409  1.1  christos 			header = this;
   8410  1.1  christos 			locknum = i;
   8411  1.1  christos 		} else {
   8412  1.1  christos 			/*
   8413  1.1  christos 			 * Earliest signing time in this heap isn't
   8414  1.1  christos 			 * an improvement; unlock and try the next heap.
   8415  1.1  christos 			 */
   8416  1.1  christos 			NODE_UNLOCK(&rbtdb->node_locks[i].lock,
   8417  1.1  christos 				    isc_rwlocktype_read);
   8418  1.1  christos 		}
   8419  1.1  christos 	}
   8420  1.1  christos 
   8421  1.1  christos 	if (header != NULL) {
   8422  1.1  christos 		/*
   8423  1.1  christos 		 * Found something; pass back the answer and unlock
   8424  1.1  christos 		 * the bucket.
   8425  1.1  christos 		 */
   8426  1.1  christos 		bind_rdataset(rbtdb, header->node, header, 0,
   8427  1.1  christos 			      isc_rwlocktype_read, rdataset);
   8428  1.1  christos 
   8429  1.1  christos 		if (foundname != NULL) {
   8430  1.1  christos 			dns_rbt_fullnamefromnode(header->node, foundname);
   8431  1.1  christos 		}
   8432  1.1  christos 
   8433  1.1  christos 		NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
   8434  1.1  christos 			    isc_rwlocktype_read);
   8435  1.1  christos 
   8436  1.1  christos 		result = ISC_R_SUCCESS;
   8437  1.1  christos 	}
   8438  1.1  christos 
   8439  1.1  christos 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   8440  1.1  christos 
   8441  1.1  christos 	return (result);
   8442  1.1  christos }
   8443  1.1  christos 
   8444  1.1  christos static void
   8445  1.1  christos resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version) {
   8446  1.1  christos 	rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
   8447  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   8448  1.1  christos 	dns_rbtnode_t *node;
   8449  1.1  christos 	rdatasetheader_t *header;
   8450  1.1  christos 
   8451  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8452  1.1  christos 	REQUIRE(rdataset != NULL);
   8453  1.1  christos 	REQUIRE(rdataset->methods == &rdataset_methods);
   8454  1.1  christos 	REQUIRE(rbtdb->future_version == rbtversion);
   8455  1.1  christos 	REQUIRE(rbtversion != NULL);
   8456  1.1  christos 	REQUIRE(rbtversion->writer);
   8457  1.1  christos 	REQUIRE(rbtversion->rbtdb == rbtdb);
   8458  1.1  christos 
   8459  1.1  christos 	node = rdataset->private2;
   8460  1.1  christos 	INSIST(node != NULL);
   8461  1.1  christos 	header = rdataset->private3;
   8462  1.1  christos 	INSIST(header != NULL);
   8463  1.1  christos 	header--;
   8464  1.1  christos 
   8465  1.1  christos 	if (header->heap_index == 0) {
   8466  1.1  christos 		return;
   8467  1.1  christos 	}
   8468  1.1  christos 
   8469  1.1  christos 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
   8470  1.1  christos 	NODE_LOCK(&rbtdb->node_locks[node->locknum].lock, isc_rwlocktype_write);
   8471  1.1  christos 	/*
   8472  1.1  christos 	 * Delete from heap and save to re-signed list so that it can
   8473  1.1  christos 	 * be restored if we backout of this change.
   8474  1.1  christos 	 */
   8475  1.1  christos 	resign_delete(rbtdb, rbtversion, header);
   8476  1.1  christos 	NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
   8477  1.1  christos 		    isc_rwlocktype_write);
   8478  1.1  christos 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
   8479  1.1  christos }
   8480  1.1  christos 
   8481  1.1  christos static isc_result_t
   8482  1.1  christos setcachestats(dns_db_t *db, isc_stats_t *stats) {
   8483  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   8484  1.1  christos 
   8485  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8486  1.1  christos 	REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
   8487  1.1  christos 	REQUIRE(stats != NULL);
   8488  1.1  christos 
   8489  1.1  christos 	isc_stats_attach(stats, &rbtdb->cachestats);
   8490  1.1  christos 	return (ISC_R_SUCCESS);
   8491  1.1  christos }
   8492  1.1  christos 
   8493  1.1  christos static isc_result_t
   8494  1.1  christos setgluecachestats(dns_db_t *db, isc_stats_t *stats) {
   8495  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   8496  1.1  christos 
   8497  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8498  1.1  christos 	REQUIRE(!IS_CACHE(rbtdb) && !IS_STUB(rbtdb));
   8499  1.1  christos 	REQUIRE(stats != NULL);
   8500  1.1  christos 
   8501  1.1  christos 	isc_stats_attach(stats, &rbtdb->gluecachestats);
   8502  1.1  christos 	return (ISC_R_SUCCESS);
   8503  1.1  christos }
   8504  1.1  christos 
   8505  1.1  christos static dns_stats_t *
   8506  1.1  christos getrrsetstats(dns_db_t *db) {
   8507  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   8508  1.1  christos 
   8509  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8510  1.1  christos 	REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
   8511  1.1  christos 
   8512  1.1  christos 	return (rbtdb->rrsetstats);
   8513  1.1  christos }
   8514  1.1  christos 
   8515  1.1  christos static isc_result_t
   8516  1.1  christos nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name) {
   8517  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   8518  1.1  christos 	dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
   8519  1.1  christos 	isc_result_t result;
   8520  1.1  christos 
   8521  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8522  1.1  christos 	REQUIRE(node != NULL);
   8523  1.1  christos 	REQUIRE(name != NULL);
   8524  1.1  christos 
   8525  1.1  christos 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   8526  1.1  christos 	result = dns_rbt_fullnamefromnode(rbtnode, name);
   8527  1.1  christos 	RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   8528  1.1  christos 
   8529  1.1  christos 	return (result);
   8530  1.1  christos }
   8531  1.1  christos 
   8532  1.1  christos static isc_result_t
   8533  1.1  christos setservestalettl(dns_db_t *db, dns_ttl_t ttl) {
   8534  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   8535  1.1  christos 
   8536  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8537  1.1  christos 	REQUIRE(IS_CACHE(rbtdb));
   8538  1.1  christos 
   8539  1.1  christos 	/* currently no bounds checking.  0 means disable. */
   8540  1.1  christos 	rbtdb->serve_stale_ttl = ttl;
   8541  1.1  christos 	return (ISC_R_SUCCESS);
   8542  1.1  christos }
   8543  1.1  christos 
   8544  1.1  christos static isc_result_t
   8545  1.1  christos getservestalettl(dns_db_t *db, dns_ttl_t *ttl) {
   8546  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   8547  1.1  christos 
   8548  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8549  1.1  christos 	REQUIRE(IS_CACHE(rbtdb));
   8550  1.1  christos 
   8551  1.1  christos 	*ttl = rbtdb->serve_stale_ttl;
   8552  1.1  christos 	return (ISC_R_SUCCESS);
   8553  1.1  christos }
   8554  1.1  christos 
   8555  1.1  christos static isc_result_t
   8556  1.1  christos setservestalerefresh(dns_db_t *db, uint32_t interval) {
   8557  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   8558  1.1  christos 
   8559  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8560  1.1  christos 	REQUIRE(IS_CACHE(rbtdb));
   8561  1.1  christos 
   8562  1.1  christos 	/* currently no bounds checking.  0 means disable. */
   8563  1.1  christos 	rbtdb->serve_stale_refresh = interval;
   8564  1.1  christos 	return (ISC_R_SUCCESS);
   8565  1.1  christos }
   8566  1.1  christos 
   8567  1.1  christos static isc_result_t
   8568  1.1  christos getservestalerefresh(dns_db_t *db, uint32_t *interval) {
   8569  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
   8570  1.1  christos 
   8571  1.1  christos 	REQUIRE(VALID_RBTDB(rbtdb));
   8572  1.1  christos 	REQUIRE(IS_CACHE(rbtdb));
   8573  1.1  christos 
   8574  1.1  christos 	*interval = rbtdb->serve_stale_refresh;
   8575  1.1  christos 	return (ISC_R_SUCCESS);
   8576  1.1  christos }
   8577  1.1  christos 
   8578  1.1  christos static dns_dbmethods_t zone_methods = { attach,
   8579  1.1  christos 					detach,
   8580  1.1  christos 					beginload,
   8581  1.1  christos 					endload,
   8582  1.1  christos 					serialize,
   8583  1.1  christos 					dump,
   8584  1.1  christos 					currentversion,
   8585  1.1  christos 					newversion,
   8586  1.1  christos 					attachversion,
   8587  1.1  christos 					closeversion,
   8588  1.1  christos 					findnode,
   8589  1.1  christos 					zone_find,
   8590  1.1  christos 					zone_findzonecut,
   8591  1.1  christos 					attachnode,
   8592  1.1  christos 					detachnode,
   8593  1.1  christos 					expirenode,
   8594  1.1  christos 					printnode,
   8595  1.1  christos 					createiterator,
   8596  1.1  christos 					zone_findrdataset,
   8597  1.1  christos 					allrdatasets,
   8598  1.1  christos 					addrdataset,
   8599  1.1  christos 					subtractrdataset,
   8600  1.1  christos 					deleterdataset,
   8601  1.1  christos 					issecure,
   8602  1.1  christos 					nodecount,
   8603  1.1  christos 					ispersistent,
   8604  1.1  christos 					overmem,
   8605  1.1  christos 					settask,
   8606  1.1  christos 					getoriginnode,
   8607  1.1  christos 					NULL, /* transfernode */
   8608  1.1  christos 					getnsec3parameters,
   8609  1.1  christos 					findnsec3node,
   8610  1.1  christos 					setsigningtime,
   8611  1.1  christos 					getsigningtime,
   8612  1.1  christos 					resigned,
   8613  1.1  christos 					isdnssec,
   8614  1.1  christos 					NULL, /* getrrsetstats */
   8615  1.1  christos 					NULL, /* rpz_attach */
   8616  1.1  christos 					NULL, /* rpz_ready */
   8617  1.1  christos 					NULL, /* findnodeext */
   8618  1.1  christos 					NULL, /* findext */
   8619  1.1  christos 					NULL, /* setcachestats */
   8620  1.1  christos 					hashsize,
   8621  1.1  christos 					nodefullname,
   8622  1.1  christos 					getsize,
   8623  1.1  christos 					NULL, /* setservestalettl */
   8624  1.1  christos 					NULL, /* getservestalettl */
   8625  1.1  christos 					NULL, /* setservestalerefresh */
   8626  1.1  christos 					NULL, /* getservestalerefresh */
   8627  1.1  christos 					setgluecachestats,
   8628  1.1  christos 					adjusthashsize };
   8629  1.1  christos 
   8630  1.1  christos static dns_dbmethods_t cache_methods = { attach,
   8631  1.1  christos 					 detach,
   8632  1.1  christos 					 beginload,
   8633  1.1  christos 					 endload,
   8634  1.1  christos 					 NULL, /* serialize */
   8635  1.1  christos 					 dump,
   8636  1.1  christos 					 currentversion,
   8637  1.1  christos 					 newversion,
   8638  1.1  christos 					 attachversion,
   8639  1.1  christos 					 closeversion,
   8640  1.1  christos 					 findnode,
   8641  1.1  christos 					 cache_find,
   8642  1.1  christos 					 cache_findzonecut,
   8643  1.1  christos 					 attachnode,
   8644  1.1  christos 					 detachnode,
   8645  1.1  christos 					 expirenode,
   8646  1.1  christos 					 printnode,
   8647  1.1  christos 					 createiterator,
   8648  1.1  christos 					 cache_findrdataset,
   8649  1.1  christos 					 allrdatasets,
   8650  1.1  christos 					 addrdataset,
   8651  1.1  christos 					 subtractrdataset,
   8652  1.1  christos 					 deleterdataset,
   8653  1.1  christos 					 issecure,
   8654  1.1  christos 					 nodecount,
   8655  1.1  christos 					 ispersistent,
   8656  1.1  christos 					 overmem,
   8657  1.1  christos 					 settask,
   8658  1.1  christos 					 getoriginnode,
   8659  1.1  christos 					 NULL, /* transfernode */
   8660  1.1  christos 					 NULL, /* getnsec3parameters */
   8661  1.1  christos 					 NULL, /* findnsec3node */
   8662  1.1  christos 					 NULL, /* setsigningtime */
   8663  1.1  christos 					 NULL, /* getsigningtime */
   8664  1.1  christos 					 NULL, /* resigned */
   8665  1.1  christos 					 isdnssec,
   8666  1.1  christos 					 getrrsetstats,
   8667  1.1  christos 					 NULL, /* rpz_attach */
   8668  1.1  christos 					 NULL, /* rpz_ready */
   8669  1.1  christos 					 NULL, /* findnodeext */
   8670  1.1  christos 					 NULL, /* findext */
   8671  1.1  christos 					 setcachestats,
   8672  1.1  christos 					 hashsize,
   8673  1.1  christos 					 nodefullname,
   8674  1.1  christos 					 NULL, /* getsize */
   8675  1.1  christos 					 setservestalettl,
   8676  1.1  christos 					 getservestalettl,
   8677  1.1  christos 					 setservestalerefresh,
   8678  1.1  christos 					 getservestalerefresh,
   8679  1.1  christos 					 NULL,
   8680  1.1  christos 					 adjusthashsize };
   8681  1.1  christos 
   8682  1.1  christos isc_result_t
   8683  1.1  christos dns_rbtdb_create(isc_mem_t *mctx, const dns_name_t *origin, dns_dbtype_t type,
   8684  1.1  christos 		 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
   8685  1.1  christos 		 void *driverarg, dns_db_t **dbp) {
   8686  1.1  christos 	dns_rbtdb_t *rbtdb;
   8687  1.1  christos 	isc_result_t result;
   8688  1.1  christos 	int i;
   8689  1.1  christos 	dns_name_t name;
   8690  1.1  christos 	bool (*sooner)(void *, void *);
   8691  1.1  christos 	isc_mem_t *hmctx = mctx;
   8692  1.1  christos 
   8693  1.1  christos 	/* Keep the compiler happy. */
   8694  1.1  christos 	UNUSED(driverarg);
   8695  1.1  christos 
   8696  1.1  christos 	rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
   8697  1.1  christos 
   8698  1.1  christos 	/*
   8699  1.1  christos 	 * If argv[0] exists, it points to a memory context to use for heap
   8700  1.1  christos 	 */
   8701  1.1  christos 	if (argc != 0) {
   8702  1.1  christos 		hmctx = (isc_mem_t *)argv[0];
   8703  1.1  christos 	}
   8704  1.1  christos 
   8705  1.1  christos 	memset(rbtdb, '\0', sizeof(*rbtdb));
   8706  1.1  christos 	dns_name_init(&rbtdb->common.origin, NULL);
   8707  1.1  christos 	rbtdb->common.attributes = 0;
   8708  1.1  christos 	if (type == dns_dbtype_cache) {
   8709  1.1  christos 		rbtdb->common.methods = &cache_methods;
   8710  1.1  christos 		rbtdb->common.attributes |= DNS_DBATTR_CACHE;
   8711  1.1  christos 	} else if (type == dns_dbtype_stub) {
   8712  1.1  christos 		rbtdb->common.methods = &zone_methods;
   8713  1.1  christos 		rbtdb->common.attributes |= DNS_DBATTR_STUB;
   8714  1.1  christos 	} else {
   8715  1.1  christos 		rbtdb->common.methods = &zone_methods;
   8716  1.1  christos 	}
   8717  1.1  christos 	rbtdb->common.rdclass = rdclass;
   8718  1.1  christos 	rbtdb->common.mctx = NULL;
   8719  1.1  christos 
   8720  1.1  christos 	ISC_LIST_INIT(rbtdb->common.update_listeners);
   8721  1.1  christos 
   8722  1.1  christos 	RBTDB_INITLOCK(&rbtdb->lock);
   8723  1.1  christos 
   8724  1.1  christos 	isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
   8725  1.1  christos 
   8726  1.1  christos 	/*
   8727  1.1  christos 	 * Initialize node_lock_count in a generic way to support future
   8728  1.1  christos 	 * extension which allows the user to specify this value on creation.
   8729  1.1  christos 	 * Note that when specified for a cache DB it must be larger than 1
   8730  1.1  christos 	 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
   8731  1.1  christos 	 */
   8732  1.1  christos 	if (rbtdb->node_lock_count == 0) {
   8733  1.1  christos 		if (IS_CACHE(rbtdb)) {
   8734  1.1  christos 			rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
   8735  1.1  christos 		} else {
   8736  1.1  christos 			rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
   8737  1.1  christos 		}
   8738  1.1  christos 	} else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
   8739  1.1  christos 		result = ISC_R_RANGE;
   8740  1.1  christos 		goto cleanup_tree_lock;
   8741  1.1  christos 	}
   8742  1.1  christos 	INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
   8743  1.1  christos 	rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
   8744  1.1  christos 						      sizeof(rbtdb_nodelock_t));
   8745  1.1  christos 
   8746  1.1  christos 	rbtdb->cachestats = NULL;
   8747  1.1  christos 	rbtdb->gluecachestats = NULL;
   8748  1.1  christos 
   8749  1.1  christos 	rbtdb->rrsetstats = NULL;
   8750  1.1  christos 	if (IS_CACHE(rbtdb)) {
   8751  1.1  christos 		result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
   8752  1.1  christos 		if (result != ISC_R_SUCCESS) {
   8753  1.1  christos 			goto cleanup_node_locks;
   8754  1.1  christos 		}
   8755  1.1  christos 		rbtdb->rdatasets = isc_mem_get(
   8756  1.1  christos 			mctx,
   8757  1.1  christos 			rbtdb->node_lock_count * sizeof(rdatasetheaderlist_t));
   8758  1.1  christos 		for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
   8759  1.1  christos 			ISC_LIST_INIT(rbtdb->rdatasets[i]);
   8760  1.1  christos 		}
   8761  1.1  christos 	} else {
   8762  1.1  christos 		rbtdb->rdatasets = NULL;
   8763  1.1  christos 	}
   8764  1.1  christos 
   8765  1.1  christos 	/*
   8766  1.1  christos 	 * Create the heaps.
   8767  1.1  christos 	 */
   8768  1.1  christos 	rbtdb->heaps = isc_mem_get(hmctx, rbtdb->node_lock_count *
   8769  1.1  christos 						  sizeof(isc_heap_t *));
   8770  1.1  christos 	for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
   8771  1.1  christos 		rbtdb->heaps[i] = NULL;
   8772  1.1  christos 	}
   8773  1.1  christos 	sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
   8774  1.1  christos 	for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
   8775  1.1  christos 		isc_heap_create(hmctx, sooner, set_index, 0, &rbtdb->heaps[i]);
   8776  1.1  christos 	}
   8777  1.1  christos 
   8778  1.1  christos 	/*
   8779  1.1  christos 	 * Create deadnode lists.
   8780  1.1  christos 	 */
   8781  1.1  christos 	rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
   8782  1.1  christos 						     sizeof(rbtnodelist_t));
   8783  1.1  christos 	for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
   8784  1.1  christos 		ISC_LIST_INIT(rbtdb->deadnodes[i]);
   8785  1.1  christos 	}
   8786  1.1  christos 
   8787  1.1  christos 	ISC_LIST_INIT(rbtdb->prunenodes);
   8788  1.1  christos 
   8789  1.1  christos 	rbtdb->active = rbtdb->node_lock_count;
   8790  1.1  christos 
   8791  1.1  christos 	for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
   8792  1.1  christos 		NODE_INITLOCK(&rbtdb->node_locks[i].lock);
   8793  1.1  christos 		isc_refcount_init(&rbtdb->node_locks[i].references, 0);
   8794  1.1  christos 		rbtdb->node_locks[i].exiting = false;
   8795  1.1  christos 	}
   8796  1.1  christos 
   8797  1.1  christos 	/*
   8798  1.1  christos 	 * Attach to the mctx.  The database will persist so long as there
   8799  1.1  christos 	 * are references to it, and attaching to the mctx ensures that our
   8800  1.1  christos 	 * mctx won't disappear out from under us.
   8801  1.1  christos 	 */
   8802  1.1  christos 	isc_mem_attach(mctx, &rbtdb->common.mctx);
   8803  1.1  christos 	isc_mem_attach(hmctx, &rbtdb->hmctx);
   8804  1.1  christos 
   8805  1.1  christos 	/*
   8806  1.1  christos 	 * Make a copy of the origin name.
   8807  1.1  christos 	 */
   8808  1.1  christos 	result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
   8809  1.1  christos 	if (result != ISC_R_SUCCESS) {
   8810  1.1  christos 		free_rbtdb(rbtdb, false, NULL);
   8811  1.1  christos 		return (result);
   8812  1.1  christos 	}
   8813  1.1  christos 
   8814  1.1  christos 	/*
   8815  1.1  christos 	 * Make the Red-Black Trees.
   8816  1.1  christos 	 */
   8817  1.1  christos 	result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
   8818  1.1  christos 	if (result != ISC_R_SUCCESS) {
   8819  1.1  christos 		free_rbtdb(rbtdb, false, NULL);
   8820  1.1  christos 		return (result);
   8821  1.1  christos 	}
   8822  1.1  christos 
   8823  1.1  christos 	result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec);
   8824  1.1  christos 	if (result != ISC_R_SUCCESS) {
   8825  1.1  christos 		free_rbtdb(rbtdb, false, NULL);
   8826  1.1  christos 		return (result);
   8827  1.1  christos 	}
   8828  1.1  christos 
   8829  1.1  christos 	result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
   8830  1.1  christos 	if (result != ISC_R_SUCCESS) {
   8831  1.1  christos 		free_rbtdb(rbtdb, false, NULL);
   8832  1.1  christos 		return (result);
   8833  1.1  christos 	}
   8834  1.1  christos 
   8835  1.1  christos 	/*
   8836  1.1  christos 	 * In order to set the node callback bit correctly in zone databases,
   8837  1.1  christos 	 * we need to know if the node has the origin name of the zone.
   8838  1.1  christos 	 * In loading_addrdataset() we could simply compare the new name
   8839  1.1  christos 	 * to the origin name, but this is expensive.  Also, we don't know the
   8840  1.1  christos 	 * node name in addrdataset(), so we need another way of knowing the
   8841  1.1  christos 	 * zone's top.
   8842  1.1  christos 	 *
   8843  1.1  christos 	 * We now explicitly create a node for the zone's origin, and then
   8844  1.1  christos 	 * we simply remember the node's address.  This is safe, because
   8845  1.1  christos 	 * the top-of-zone node can never be deleted, nor can its address
   8846  1.1  christos 	 * change.
   8847  1.1  christos 	 */
   8848  1.1  christos 	if (!IS_CACHE(rbtdb)) {
   8849  1.1  christos 		rbtdb->origin_node = NULL;
   8850  1.1  christos 		result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
   8851  1.1  christos 					 &rbtdb->origin_node);
   8852  1.1  christos 		if (result != ISC_R_SUCCESS) {
   8853  1.1  christos 			INSIST(result != ISC_R_EXISTS);
   8854  1.1  christos 			free_rbtdb(rbtdb, false, NULL);
   8855  1.1  christos 			return (result);
   8856  1.1  christos 		}
   8857  1.1  christos 		INSIST(rbtdb->origin_node != NULL);
   8858  1.1  christos 		rbtdb->origin_node->nsec = DNS_RBT_NSEC_NORMAL;
   8859  1.1  christos 		/*
   8860  1.1  christos 		 * We need to give the origin node the right locknum.
   8861  1.1  christos 		 */
   8862  1.1  christos 		dns_name_init(&name, NULL);
   8863  1.1  christos 		dns_rbt_namefromnode(rbtdb->origin_node, &name);
   8864  1.1  christos 		rbtdb->origin_node->locknum = rbtdb->origin_node->hashval %
   8865  1.1  christos 					      rbtdb->node_lock_count;
   8866  1.1  christos 		/*
   8867  1.1  christos 		 * Add an apex node to the NSEC3 tree so that NSEC3 searches
   8868  1.1  christos 		 * return partial matches when there is only a single NSEC3
   8869  1.1  christos 		 * record in the tree.
   8870  1.1  christos 		 */
   8871  1.1  christos 		rbtdb->nsec3_origin_node = NULL;
   8872  1.1  christos 		result = dns_rbt_addnode(rbtdb->nsec3, &rbtdb->common.origin,
   8873  1.1  christos 					 &rbtdb->nsec3_origin_node);
   8874  1.1  christos 		if (result != ISC_R_SUCCESS) {
   8875  1.1  christos 			INSIST(result != ISC_R_EXISTS);
   8876  1.1  christos 			free_rbtdb(rbtdb, false, NULL);
   8877  1.1  christos 			return (result);
   8878  1.1  christos 		}
   8879  1.1  christos 		rbtdb->nsec3_origin_node->nsec = DNS_RBT_NSEC_NSEC3;
   8880  1.1  christos 		/*
   8881  1.1  christos 		 * We need to give the nsec3 origin node the right locknum.
   8882  1.1  christos 		 */
   8883  1.1  christos 		dns_name_init(&name, NULL);
   8884  1.1  christos 		dns_rbt_namefromnode(rbtdb->nsec3_origin_node, &name);
   8885  1.1  christos 		rbtdb->nsec3_origin_node->locknum =
   8886  1.1  christos 			rbtdb->nsec3_origin_node->hashval %
   8887  1.1  christos 			rbtdb->node_lock_count;
   8888  1.1  christos 	}
   8889  1.1  christos 
   8890  1.1  christos 	/*
   8891  1.1  christos 	 * Misc. Initialization.
   8892  1.1  christos 	 */
   8893  1.1  christos 	isc_refcount_init(&rbtdb->references, 1);
   8894  1.1  christos 	rbtdb->attributes = 0;
   8895  1.1  christos 	rbtdb->task = NULL;
   8896  1.1  christos 	rbtdb->serve_stale_ttl = 0;
   8897  1.1  christos 
   8898  1.1  christos 	/*
   8899  1.1  christos 	 * Version Initialization.
   8900  1.1  christos 	 */
   8901  1.1  christos 	rbtdb->current_serial = 1;
   8902  1.1  christos 	rbtdb->least_serial = 1;
   8903  1.1  christos 	rbtdb->next_serial = 2;
   8904  1.1  christos 	rbtdb->current_version = allocate_version(mctx, 1, 1, false);
   8905  1.1  christos 	rbtdb->current_version->rbtdb = rbtdb;
   8906  1.1  christos 	rbtdb->current_version->secure = dns_db_insecure;
   8907  1.1  christos 	rbtdb->current_version->havensec3 = false;
   8908  1.1  christos 	rbtdb->current_version->flags = 0;
   8909  1.1  christos 	rbtdb->current_version->iterations = 0;
   8910  1.1  christos 	rbtdb->current_version->hash = 0;
   8911  1.1  christos 	rbtdb->current_version->salt_length = 0;
   8912  1.1  christos 	memset(rbtdb->current_version->salt, 0,
   8913  1.1  christos 	       sizeof(rbtdb->current_version->salt));
   8914  1.1  christos 	isc_rwlock_init(&rbtdb->current_version->rwlock, 0, 0);
   8915  1.1  christos 	rbtdb->current_version->records = 0;
   8916  1.1  christos 	rbtdb->current_version->xfrsize = 0;
   8917  1.1  christos 	rbtdb->future_version = NULL;
   8918  1.1  christos 	ISC_LIST_INIT(rbtdb->open_versions);
   8919  1.1  christos 	/*
   8920  1.1  christos 	 * Keep the current version in the open list so that list operation
   8921  1.1  christos 	 * won't happen in normal lookup operations.
   8922  1.1  christos 	 */
   8923  1.1  christos 	PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
   8924  1.1  christos 
   8925  1.1  christos 	rbtdb->common.magic = DNS_DB_MAGIC;
   8926  1.1  christos 	rbtdb->common.impmagic = RBTDB_MAGIC;
   8927  1.1  christos 
   8928  1.1  christos 	*dbp = (dns_db_t *)rbtdb;
   8929  1.1  christos 
   8930  1.1  christos 	return (ISC_R_SUCCESS);
   8931  1.1  christos 
   8932  1.1  christos cleanup_node_locks:
   8933  1.1  christos 	isc_mem_put(mctx, rbtdb->node_locks,
   8934  1.1  christos 		    rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
   8935  1.1  christos 
   8936  1.1  christos cleanup_tree_lock:
   8937  1.1  christos 	isc_rwlock_destroy(&rbtdb->tree_lock);
   8938  1.1  christos 	RBTDB_DESTROYLOCK(&rbtdb->lock);
   8939  1.1  christos 	isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
   8940  1.1  christos 	return (result);
   8941  1.1  christos }
   8942  1.1  christos 
   8943  1.1  christos /*
   8944  1.1  christos  * Slabbed Rdataset Methods
   8945  1.1  christos  */
   8946  1.1  christos 
   8947  1.1  christos static void
   8948  1.1  christos rdataset_disassociate(dns_rdataset_t *rdataset) {
   8949  1.1  christos 	dns_db_t *db = rdataset->private1;
   8950  1.1  christos 	dns_dbnode_t *node = rdataset->private2;
   8951  1.1  christos 
   8952  1.1  christos 	detachnode(db, &node);
   8953  1.1  christos }
   8954  1.1  christos 
   8955  1.1  christos static isc_result_t
   8956  1.1  christos rdataset_first(dns_rdataset_t *rdataset) {
   8957  1.1  christos 	unsigned char *raw = rdataset->private3; /* RDATASLAB */
   8958  1.1  christos 	unsigned int count;
   8959  1.1  christos 
   8960  1.1  christos 	count = raw[0] * 256 + raw[1];
   8961  1.1  christos 	if (count == 0) {
   8962  1.1  christos 		rdataset->private5 = NULL;
   8963  1.1  christos 		return (ISC_R_NOMORE);
   8964  1.1  christos 	}
   8965  1.1  christos 
   8966  1.1  christos 	if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
   8967  1.1  christos 		raw += DNS_RDATASET_COUNT;
   8968  1.1  christos 	}
   8969  1.1  christos 
   8970  1.1  christos 	raw += DNS_RDATASET_LENGTH;
   8971  1.1  christos 
   8972  1.1  christos 	/*
   8973  1.1  christos 	 * The privateuint4 field is the number of rdata beyond the
   8974  1.1  christos 	 * cursor position, so we decrement the total count by one
   8975  1.1  christos 	 * before storing it.
   8976  1.1  christos 	 *
   8977  1.1  christos 	 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
   8978  1.1  christos 	 * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
   8979  1.1  christos 	 * to the first entry in the offset table.
   8980  1.1  christos 	 */
   8981  1.1  christos 	count--;
   8982  1.1  christos 	rdataset->privateuint4 = count;
   8983  1.1  christos 	rdataset->private5 = raw;
   8984  1.1  christos 
   8985  1.1  christos 	return (ISC_R_SUCCESS);
   8986  1.1  christos }
   8987  1.1  christos 
   8988  1.1  christos static isc_result_t
   8989  1.1  christos rdataset_next(dns_rdataset_t *rdataset) {
   8990  1.1  christos 	unsigned int count;
   8991  1.1  christos 	unsigned int length;
   8992  1.1  christos 	unsigned char *raw; /* RDATASLAB */
   8993  1.1  christos 
   8994  1.1  christos 	count = rdataset->privateuint4;
   8995  1.1  christos 	if (count == 0) {
   8996  1.1  christos 		return (ISC_R_NOMORE);
   8997  1.1  christos 	}
   8998  1.1  christos 	count--;
   8999  1.1  christos 	rdataset->privateuint4 = count;
   9000  1.1  christos 
   9001  1.1  christos 	/*
   9002  1.1  christos 	 * Skip forward one record (length + 4) or one offset (4).
   9003  1.1  christos 	 */
   9004  1.1  christos 	raw = rdataset->private5;
   9005  1.1  christos #if DNS_RDATASET_FIXED
   9006  1.1  christos 	if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
   9007  1.1  christos #endif /* DNS_RDATASET_FIXED */
   9008  1.1  christos 	{
   9009  1.1  christos 		length = raw[0] * 256 + raw[1];
   9010  1.1  christos 		raw += length;
   9011  1.1  christos 	}
   9012  1.1  christos 
   9013  1.1  christos 	rdataset->private5 = raw + DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH;
   9014  1.1  christos 
   9015  1.1  christos 	return (ISC_R_SUCCESS);
   9016  1.1  christos }
   9017  1.1  christos 
   9018  1.1  christos static void
   9019  1.1  christos rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
   9020  1.1  christos 	unsigned char *raw = rdataset->private5; /* RDATASLAB */
   9021  1.1  christos 	unsigned int length;
   9022  1.1  christos 	isc_region_t r;
   9023  1.1  christos 	unsigned int flags = 0;
   9024  1.1  christos 
   9025  1.1  christos 	REQUIRE(raw != NULL);
   9026  1.1  christos 
   9027  1.1  christos 	/*
   9028  1.1  christos 	 * Find the start of the record if not already in private5
   9029  1.1  christos 	 * then skip the length and order fields.
   9030  1.1  christos 	 */
   9031  1.1  christos #if DNS_RDATASET_FIXED
   9032  1.1  christos 	if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
   9033  1.1  christos 		unsigned int offset;
   9034  1.1  christos 		offset = ((unsigned int)raw[0] << 24) +
   9035  1.1  christos 			 ((unsigned int)raw[1] << 16) +
   9036  1.1  christos 			 ((unsigned int)raw[2] << 8) + (unsigned int)raw[3];
   9037  1.1  christos 		raw = rdataset->private3;
   9038  1.1  christos 		raw += offset;
   9039  1.1  christos 	}
   9040  1.1  christos #endif /* if DNS_RDATASET_FIXED */
   9041  1.1  christos 
   9042  1.1  christos 	length = raw[0] * 256 + raw[1];
   9043  1.1  christos 
   9044  1.1  christos 	raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH;
   9045  1.1  christos 
   9046  1.1  christos 	if (rdataset->type == dns_rdatatype_rrsig) {
   9047  1.1  christos 		if (*raw & DNS_RDATASLAB_OFFLINE) {
   9048  1.1  christos 			flags |= DNS_RDATA_OFFLINE;
   9049  1.1  christos 		}
   9050  1.1  christos 		length--;
   9051  1.1  christos 		raw++;
   9052  1.1  christos 	}
   9053  1.1  christos 	r.length = length;
   9054  1.1  christos 	r.base = raw;
   9055  1.1  christos 	dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
   9056  1.1  christos 	rdata->flags |= flags;
   9057  1.1  christos }
   9058  1.1  christos 
   9059  1.1  christos static void
   9060  1.1  christos rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
   9061  1.1  christos 	dns_db_t *db = source->private1;
   9062  1.1  christos 	dns_dbnode_t *node = source->private2;
   9063  1.1  christos 	dns_dbnode_t *cloned_node = NULL;
   9064  1.1  christos 
   9065  1.1  christos 	attachnode(db, node, &cloned_node);
   9066  1.1  christos 	INSIST(!ISC_LINK_LINKED(target, link));
   9067  1.1  christos 	*target = *source;
   9068  1.1  christos 	ISC_LINK_INIT(target, link);
   9069  1.1  christos 
   9070  1.1  christos 	/*
   9071  1.1  christos 	 * Reset iterator state.
   9072  1.1  christos 	 */
   9073  1.1  christos 	target->privateuint4 = 0;
   9074  1.1  christos 	target->private5 = NULL;
   9075  1.1  christos }
   9076  1.1  christos 
   9077  1.1  christos static unsigned int
   9078  1.1  christos rdataset_count(dns_rdataset_t *rdataset) {
   9079  1.1  christos 	unsigned char *raw = rdataset->private3; /* RDATASLAB */
   9080  1.1  christos 	unsigned int count;
   9081  1.1  christos 
   9082  1.1  christos 	count = raw[0] * 256 + raw[1];
   9083  1.1  christos 
   9084  1.1  christos 	return (count);
   9085  1.1  christos }
   9086  1.1  christos 
   9087  1.1  christos static isc_result_t
   9088  1.1  christos rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
   9089  1.1  christos 		    dns_rdataset_t *nsec, dns_rdataset_t *nsecsig) {
   9090  1.1  christos 	dns_db_t *db = rdataset->private1;
   9091  1.1  christos 	dns_dbnode_t *node = rdataset->private2;
   9092  1.1  christos 	dns_dbnode_t *cloned_node;
   9093  1.1  christos 	const struct noqname *noqname = rdataset->private6;
   9094  1.1  christos 
   9095  1.1  christos 	cloned_node = NULL;
   9096  1.1  christos 	attachnode(db, node, &cloned_node);
   9097  1.1  christos 	nsec->methods = &slab_methods;
   9098  1.1  christos 	nsec->rdclass = db->rdclass;
   9099  1.1  christos 	nsec->type = noqname->type;
   9100  1.1  christos 	nsec->covers = 0;
   9101  1.1  christos 	nsec->ttl = rdataset->ttl;
   9102  1.1  christos 	nsec->trust = rdataset->trust;
   9103  1.1  christos 	nsec->private1 = rdataset->private1;
   9104  1.1  christos 	nsec->private2 = rdataset->private2;
   9105  1.1  christos 	nsec->private3 = noqname->neg;
   9106  1.1  christos 	nsec->privateuint4 = 0;
   9107  1.1  christos 	nsec->private5 = NULL;
   9108  1.1  christos 	nsec->private6 = NULL;
   9109  1.1  christos 	nsec->private7 = NULL;
   9110  1.1  christos 
   9111  1.1  christos 	cloned_node = NULL;
   9112  1.1  christos 	attachnode(db, node, &cloned_node);
   9113  1.1  christos 	nsecsig->methods = &slab_methods;
   9114  1.1  christos 	nsecsig->rdclass = db->rdclass;
   9115  1.1  christos 	nsecsig->type = dns_rdatatype_rrsig;
   9116  1.1  christos 	nsecsig->covers = noqname->type;
   9117  1.1  christos 	nsecsig->ttl = rdataset->ttl;
   9118  1.1  christos 	nsecsig->trust = rdataset->trust;
   9119  1.1  christos 	nsecsig->private1 = rdataset->private1;
   9120  1.1  christos 	nsecsig->private2 = rdataset->private2;
   9121  1.1  christos 	nsecsig->private3 = noqname->negsig;
   9122  1.1  christos 	nsecsig->privateuint4 = 0;
   9123  1.1  christos 	nsecsig->private5 = NULL;
   9124  1.1  christos 	nsec->private6 = NULL;
   9125  1.1  christos 	nsec->private7 = NULL;
   9126  1.1  christos 
   9127  1.1  christos 	dns_name_clone(&noqname->name, name);
   9128  1.1  christos 
   9129  1.1  christos 	return (ISC_R_SUCCESS);
   9130  1.1  christos }
   9131  1.1  christos 
   9132  1.1  christos static isc_result_t
   9133  1.1  christos rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
   9134  1.1  christos 		    dns_rdataset_t *nsec, dns_rdataset_t *nsecsig) {
   9135  1.1  christos 	dns_db_t *db = rdataset->private1;
   9136  1.1  christos 	dns_dbnode_t *node = rdataset->private2;
   9137  1.1  christos 	dns_dbnode_t *cloned_node;
   9138  1.1  christos 	const struct noqname *closest = rdataset->private7;
   9139  1.1  christos 
   9140  1.1  christos 	cloned_node = NULL;
   9141  1.1  christos 	attachnode(db, node, &cloned_node);
   9142  1.1  christos 	nsec->methods = &slab_methods;
   9143  1.1  christos 	nsec->rdclass = db->rdclass;
   9144  1.1  christos 	nsec->type = closest->type;
   9145  1.1  christos 	nsec->covers = 0;
   9146  1.1  christos 	nsec->ttl = rdataset->ttl;
   9147  1.1  christos 	nsec->trust = rdataset->trust;
   9148  1.1  christos 	nsec->private1 = rdataset->private1;
   9149  1.1  christos 	nsec->private2 = rdataset->private2;
   9150  1.1  christos 	nsec->private3 = closest->neg;
   9151  1.1  christos 	nsec->privateuint4 = 0;
   9152  1.1  christos 	nsec->private5 = NULL;
   9153  1.1  christos 	nsec->private6 = NULL;
   9154  1.1  christos 	nsec->private7 = NULL;
   9155  1.1  christos 
   9156  1.1  christos 	cloned_node = NULL;
   9157  1.1  christos 	attachnode(db, node, &cloned_node);
   9158  1.1  christos 	nsecsig->methods = &slab_methods;
   9159  1.1  christos 	nsecsig->rdclass = db->rdclass;
   9160  1.1  christos 	nsecsig->type = dns_rdatatype_rrsig;
   9161  1.1  christos 	nsecsig->covers = closest->type;
   9162  1.1  christos 	nsecsig->ttl = rdataset->ttl;
   9163  1.1  christos 	nsecsig->trust = rdataset->trust;
   9164  1.1  christos 	nsecsig->private1 = rdataset->private1;
   9165  1.1  christos 	nsecsig->private2 = rdataset->private2;
   9166  1.1  christos 	nsecsig->private3 = closest->negsig;
   9167  1.1  christos 	nsecsig->privateuint4 = 0;
   9168  1.1  christos 	nsecsig->private5 = NULL;
   9169  1.1  christos 	nsec->private6 = NULL;
   9170  1.1  christos 	nsec->private7 = NULL;
   9171  1.1  christos 
   9172  1.1  christos 	dns_name_clone(&closest->name, name);
   9173  1.1  christos 
   9174  1.1  christos 	return (ISC_R_SUCCESS);
   9175  1.1  christos }
   9176  1.1  christos 
   9177  1.1  christos static void
   9178  1.1  christos rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
   9179  1.1  christos 	dns_rbtdb_t *rbtdb = rdataset->private1;
   9180  1.1  christos 	dns_rbtnode_t *rbtnode = rdataset->private2;
   9181  1.1  christos 	rdatasetheader_t *header = rdataset->private3;
   9182  1.1  christos 
   9183  1.1  christos 	header--;
   9184  1.1  christos 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   9185  1.1  christos 		  isc_rwlocktype_write);
   9186  1.1  christos 	header->trust = rdataset->trust = trust;
   9187  1.1  christos 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   9188  1.1  christos 		    isc_rwlocktype_write);
   9189  1.1  christos }
   9190  1.1  christos 
   9191  1.1  christos static void
   9192  1.1  christos rdataset_expire(dns_rdataset_t *rdataset) {
   9193  1.1  christos 	dns_rbtdb_t *rbtdb = rdataset->private1;
   9194  1.1  christos 	dns_rbtnode_t *rbtnode = rdataset->private2;
   9195  1.1  christos 	rdatasetheader_t *header = rdataset->private3;
   9196  1.1  christos 
   9197  1.1  christos 	header--;
   9198  1.1  christos 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   9199  1.1  christos 		  isc_rwlocktype_write);
   9200  1.1  christos 	expire_header(rbtdb, header, false, expire_flush);
   9201  1.1  christos 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   9202  1.1  christos 		    isc_rwlocktype_write);
   9203  1.1  christos }
   9204  1.1  christos 
   9205  1.1  christos static void
   9206  1.1  christos rdataset_clearprefetch(dns_rdataset_t *rdataset) {
   9207  1.1  christos 	dns_rbtdb_t *rbtdb = rdataset->private1;
   9208  1.1  christos 	dns_rbtnode_t *rbtnode = rdataset->private2;
   9209  1.1  christos 	rdatasetheader_t *header = rdataset->private3;
   9210  1.1  christos 
   9211  1.1  christos 	header--;
   9212  1.1  christos 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   9213  1.1  christos 		  isc_rwlocktype_write);
   9214  1.1  christos 	RDATASET_ATTR_CLR(header, RDATASET_ATTR_PREFETCH);
   9215  1.1  christos 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   9216  1.1  christos 		    isc_rwlocktype_write);
   9217  1.1  christos }
   9218  1.1  christos 
   9219  1.1  christos /*
   9220  1.1  christos  * Rdataset Iterator Methods
   9221  1.1  christos  */
   9222  1.1  christos 
   9223  1.1  christos static void
   9224  1.1  christos rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
   9225  1.1  christos 	rbtdb_rdatasetiter_t *rbtiterator;
   9226  1.1  christos 
   9227  1.1  christos 	rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
   9228  1.1  christos 
   9229  1.1  christos 	if (rbtiterator->common.version != NULL) {
   9230  1.1  christos 		closeversion(rbtiterator->common.db,
   9231  1.1  christos 			     &rbtiterator->common.version, false);
   9232  1.1  christos 	}
   9233  1.1  christos 	detachnode(rbtiterator->common.db, &rbtiterator->common.node);
   9234  1.1  christos 	isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
   9235  1.1  christos 		    sizeof(*rbtiterator));
   9236  1.1  christos 
   9237  1.1  christos 	*iteratorp = NULL;
   9238  1.1  christos }
   9239  1.1  christos 
   9240  1.1  christos static bool
   9241  1.1  christos iterator_active(dns_rbtdb_t *rbtdb, rbtdb_rdatasetiter_t *rbtiterator,
   9242  1.1  christos 		rdatasetheader_t *header) {
   9243  1.1  christos 	dns_ttl_t stale_ttl = header->rdh_ttl + rbtdb->serve_stale_ttl;
   9244  1.1  christos 
   9245  1.1  christos 	/*
   9246  1.1  christos 	 * Is this a "this rdataset doesn't exist" record?
   9247  1.1  christos 	 */
   9248  1.1  christos 	if (NONEXISTENT(header)) {
   9249  1.1  christos 		return (false);
   9250  1.1  christos 	}
   9251  1.1  christos 
   9252  1.1  christos 	/*
   9253  1.1  christos 	 * If this is a zone or this header still active then return it.
   9254  1.1  christos 	 */
   9255  1.1  christos 	if (!IS_CACHE(rbtdb) || ACTIVE(header, rbtiterator->common.now)) {
   9256  1.1  christos 		return (true);
   9257  1.1  christos 	}
   9258  1.1  christos 
   9259  1.1  christos 	/*
   9260  1.1  christos 	 * If we are not returning stale records or the rdataset is
   9261  1.1  christos 	 * too old don't return it.
   9262  1.1  christos 	 */
   9263  1.1  christos 	if (!STALEOK(rbtiterator) || (rbtiterator->common.now > stale_ttl)) {
   9264  1.1  christos 		return (false);
   9265  1.1  christos 	}
   9266  1.1  christos 	return (true);
   9267  1.1  christos }
   9268  1.1  christos 
   9269  1.1  christos static isc_result_t
   9270  1.1  christos rdatasetiter_first(dns_rdatasetiter_t *iterator) {
   9271  1.1  christos 	rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
   9272  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
   9273  1.1  christos 	dns_rbtnode_t *rbtnode = rbtiterator->common.node;
   9274  1.1  christos 	rbtdb_version_t *rbtversion = rbtiterator->common.version;
   9275  1.1  christos 	rdatasetheader_t *header, *top_next;
   9276  1.1  christos 	rbtdb_serial_t serial = IS_CACHE(rbtdb) ? 1 : rbtversion->serial;
   9277  1.1  christos 
   9278  1.1  christos 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   9279  1.1  christos 		  isc_rwlocktype_read);
   9280  1.1  christos 
   9281  1.1  christos 	for (header = rbtnode->data; header != NULL; header = top_next) {
   9282  1.1  christos 		top_next = header->next;
   9283  1.1  christos 		do {
   9284  1.1  christos 			if (EXPIREDOK(rbtiterator)) {
   9285  1.1  christos 				if (!NONEXISTENT(header)) {
   9286  1.1  christos 					break;
   9287  1.1  christos 				}
   9288  1.1  christos 				header = header->down;
   9289  1.1  christos 			} else if (header->serial <= serial && !IGNORE(header))
   9290  1.1  christos 			{
   9291  1.1  christos 				if (!iterator_active(rbtdb, rbtiterator,
   9292  1.1  christos 						     header))
   9293  1.1  christos 				{
   9294  1.1  christos 					header = NULL;
   9295  1.1  christos 				}
   9296  1.1  christos 				break;
   9297  1.1  christos 			} else {
   9298  1.1  christos 				header = header->down;
   9299  1.1  christos 			}
   9300  1.1  christos 		} while (header != NULL);
   9301  1.1  christos 		if (header != NULL) {
   9302  1.1  christos 			break;
   9303  1.1  christos 		}
   9304  1.1  christos 	}
   9305  1.1  christos 
   9306  1.1  christos 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   9307  1.1  christos 		    isc_rwlocktype_read);
   9308  1.1  christos 
   9309  1.1  christos 	rbtiterator->current = header;
   9310  1.1  christos 
   9311  1.1  christos 	if (header == NULL) {
   9312  1.1  christos 		return (ISC_R_NOMORE);
   9313  1.1  christos 	}
   9314  1.1  christos 
   9315  1.1  christos 	return (ISC_R_SUCCESS);
   9316  1.1  christos }
   9317  1.1  christos 
   9318  1.1  christos static isc_result_t
   9319  1.1  christos rdatasetiter_next(dns_rdatasetiter_t *iterator) {
   9320  1.1  christos 	rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
   9321  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
   9322  1.1  christos 	dns_rbtnode_t *rbtnode = rbtiterator->common.node;
   9323  1.1  christos 	rbtdb_version_t *rbtversion = rbtiterator->common.version;
   9324  1.1  christos 	rdatasetheader_t *header, *top_next;
   9325  1.1  christos 	rbtdb_serial_t serial = IS_CACHE(rbtdb) ? 1 : rbtversion->serial;
   9326  1.1  christos 	rbtdb_rdatatype_t type, negtype;
   9327  1.1  christos 	dns_rdatatype_t rdtype, covers;
   9328  1.1  christos 	bool expiredok = EXPIREDOK(rbtiterator);
   9329  1.1  christos 
   9330  1.1  christos 	header = rbtiterator->current;
   9331  1.1  christos 	if (header == NULL) {
   9332  1.1  christos 		return (ISC_R_NOMORE);
   9333  1.1  christos 	}
   9334  1.1  christos 
   9335  1.1  christos 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   9336  1.1  christos 		  isc_rwlocktype_read);
   9337  1.1  christos 
   9338  1.1  christos 	type = header->type;
   9339  1.1  christos 	rdtype = RBTDB_RDATATYPE_BASE(header->type);
   9340  1.1  christos 	if (NEGATIVE(header)) {
   9341  1.1  christos 		covers = RBTDB_RDATATYPE_EXT(header->type);
   9342  1.1  christos 		negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
   9343  1.1  christos 	} else {
   9344  1.1  christos 		negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
   9345  1.1  christos 	}
   9346  1.1  christos 
   9347  1.1  christos 	/*
   9348  1.1  christos 	 * Find the start of the header chain for the next type
   9349  1.1  christos 	 * by walking back up the list.
   9350  1.1  christos 	 */
   9351  1.1  christos 	top_next = header->next;
   9352  1.1  christos 	while (top_next != NULL &&
   9353  1.1  christos 	       (top_next->type == type || top_next->type == negtype))
   9354  1.1  christos 	{
   9355  1.1  christos 		top_next = top_next->next;
   9356  1.1  christos 	}
   9357  1.1  christos 	if (expiredok) {
   9358  1.1  christos 		/*
   9359  1.1  christos 		 * Keep walking down the list if possible or
   9360  1.1  christos 		 * start the next type.
   9361  1.1  christos 		 */
   9362  1.1  christos 		header = header->down != NULL ? header->down : top_next;
   9363  1.1  christos 	} else {
   9364  1.1  christos 		header = top_next;
   9365  1.1  christos 	}
   9366  1.1  christos 	for (; header != NULL; header = top_next) {
   9367  1.1  christos 		top_next = header->next;
   9368  1.1  christos 		do {
   9369  1.1  christos 			if (expiredok) {
   9370  1.1  christos 				if (!NONEXISTENT(header)) {
   9371  1.1  christos 					break;
   9372  1.1  christos 				}
   9373  1.1  christos 				header = header->down;
   9374  1.1  christos 			} else if (header->serial <= serial && !IGNORE(header))
   9375  1.1  christos 			{
   9376  1.1  christos 				if (!iterator_active(rbtdb, rbtiterator,
   9377  1.1  christos 						     header))
   9378  1.1  christos 				{
   9379  1.1  christos 					header = NULL;
   9380  1.1  christos 				}
   9381  1.1  christos 				break;
   9382  1.1  christos 			} else {
   9383  1.1  christos 				header = header->down;
   9384  1.1  christos 			}
   9385  1.1  christos 		} while (header != NULL);
   9386  1.1  christos 		if (header != NULL) {
   9387  1.1  christos 			break;
   9388  1.1  christos 		}
   9389  1.1  christos 		/*
   9390  1.1  christos 		 * Find the start of the header chain for the next type
   9391  1.1  christos 		 * by walking back up the list.
   9392  1.1  christos 		 */
   9393  1.1  christos 		while (top_next != NULL &&
   9394  1.1  christos 		       (top_next->type == type || top_next->type == negtype))
   9395  1.1  christos 		{
   9396  1.1  christos 			top_next = top_next->next;
   9397  1.1  christos 		}
   9398  1.1  christos 	}
   9399  1.1  christos 
   9400  1.1  christos 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   9401  1.1  christos 		    isc_rwlocktype_read);
   9402  1.1  christos 
   9403  1.1  christos 	rbtiterator->current = header;
   9404  1.1  christos 
   9405  1.1  christos 	if (header == NULL) {
   9406  1.1  christos 		return (ISC_R_NOMORE);
   9407  1.1  christos 	}
   9408  1.1  christos 
   9409  1.1  christos 	return (ISC_R_SUCCESS);
   9410  1.1  christos }
   9411  1.1  christos 
   9412  1.1  christos static void
   9413  1.1  christos rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
   9414  1.1  christos 	rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
   9415  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
   9416  1.1  christos 	dns_rbtnode_t *rbtnode = rbtiterator->common.node;
   9417  1.1  christos 	rdatasetheader_t *header;
   9418  1.1  christos 
   9419  1.1  christos 	header = rbtiterator->current;
   9420  1.1  christos 	REQUIRE(header != NULL);
   9421  1.1  christos 
   9422  1.1  christos 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   9423  1.1  christos 		  isc_rwlocktype_read);
   9424  1.1  christos 
   9425  1.1  christos 	bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
   9426  1.1  christos 		      isc_rwlocktype_read, rdataset);
   9427  1.1  christos 
   9428  1.1  christos 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   9429  1.1  christos 		    isc_rwlocktype_read);
   9430  1.1  christos }
   9431  1.1  christos 
   9432  1.1  christos /*
   9433  1.1  christos  * Database Iterator Methods
   9434  1.1  christos  */
   9435  1.1  christos 
   9436  1.1  christos static void
   9437  1.1  christos reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
   9438  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
   9439  1.1  christos 	dns_rbtnode_t *node = rbtdbiter->node;
   9440  1.1  christos 
   9441  1.1  christos 	if (node == NULL) {
   9442  1.1  christos 		return;
   9443  1.1  christos 	}
   9444  1.1  christos 
   9445  1.1  christos 	INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
   9446  1.1  christos 	reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
   9447  1.1  christos }
   9448  1.1  christos 
   9449  1.1  christos static void
   9450  1.1  christos dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
   9451  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
   9452  1.1  christos 	dns_rbtnode_t *node = rbtdbiter->node;
   9453  1.1  christos 	nodelock_t *lock;
   9454  1.1  christos 
   9455  1.1  christos 	if (node == NULL) {
   9456  1.1  christos 		return;
   9457  1.1  christos 	}
   9458  1.1  christos 
   9459  1.1  christos 	lock = &rbtdb->node_locks[node->locknum].lock;
   9460  1.1  christos 	NODE_LOCK(lock, isc_rwlocktype_read);
   9461  1.1  christos 	decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
   9462  1.1  christos 			    rbtdbiter->tree_locked, false);
   9463  1.1  christos 	NODE_UNLOCK(lock, isc_rwlocktype_read);
   9464  1.1  christos 
   9465  1.1  christos 	rbtdbiter->node = NULL;
   9466  1.1  christos }
   9467  1.1  christos 
   9468  1.1  christos static void
   9469  1.1  christos flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
   9470  1.1  christos 	dns_rbtnode_t *node;
   9471  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
   9472  1.1  christos 	bool was_read_locked = false;
   9473  1.1  christos 	nodelock_t *lock;
   9474  1.1  christos 	int i;
   9475  1.1  christos 
   9476  1.1  christos 	if (rbtdbiter->delcnt != 0) {
   9477  1.1  christos 		/*
   9478  1.1  christos 		 * Note that "%d node of %d in tree" can report things like
   9479  1.1  christos 		 * "flush_deletions: 59 nodes of 41 in tree".  This means
   9480  1.1  christos 		 * That some nodes appear on the deletions list more than
   9481  1.1  christos 		 * once.  Only the last occurrence will actually be deleted.
   9482  1.1  christos 		 */
   9483  1.1  christos 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
   9484  1.1  christos 			      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
   9485  1.1  christos 			      "flush_deletions: %d nodes of %d in tree",
   9486  1.1  christos 			      rbtdbiter->delcnt,
   9487  1.1  christos 			      dns_rbt_nodecount(rbtdb->tree));
   9488  1.1  christos 
   9489  1.1  christos 		if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
   9490  1.1  christos 			RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   9491  1.1  christos 			was_read_locked = true;
   9492  1.1  christos 		}
   9493  1.1  christos 		RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
   9494  1.1  christos 		rbtdbiter->tree_locked = isc_rwlocktype_write;
   9495  1.1  christos 
   9496  1.1  christos 		for (i = 0; i < rbtdbiter->delcnt; i++) {
   9497  1.1  christos 			node = rbtdbiter->deletions[i];
   9498  1.1  christos 			lock = &rbtdb->node_locks[node->locknum].lock;
   9499  1.1  christos 
   9500  1.1  christos 			NODE_LOCK(lock, isc_rwlocktype_read);
   9501  1.1  christos 			decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
   9502  1.1  christos 					    rbtdbiter->tree_locked, false);
   9503  1.1  christos 			NODE_UNLOCK(lock, isc_rwlocktype_read);
   9504  1.1  christos 		}
   9505  1.1  christos 
   9506  1.1  christos 		rbtdbiter->delcnt = 0;
   9507  1.1  christos 
   9508  1.1  christos 		RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
   9509  1.1  christos 		if (was_read_locked) {
   9510  1.1  christos 			RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   9511  1.1  christos 			rbtdbiter->tree_locked = isc_rwlocktype_read;
   9512  1.1  christos 		} else {
   9513  1.1  christos 			rbtdbiter->tree_locked = isc_rwlocktype_none;
   9514  1.1  christos 		}
   9515  1.1  christos 	}
   9516  1.1  christos }
   9517  1.1  christos 
   9518  1.1  christos static void
   9519  1.1  christos resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
   9520  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
   9521  1.1  christos 
   9522  1.1  christos 	REQUIRE(rbtdbiter->paused);
   9523  1.1  christos 	REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
   9524  1.1  christos 
   9525  1.1  christos 	RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   9526  1.1  christos 	rbtdbiter->tree_locked = isc_rwlocktype_read;
   9527  1.1  christos 
   9528  1.1  christos 	rbtdbiter->paused = false;
   9529  1.1  christos }
   9530  1.1  christos 
   9531  1.1  christos static void
   9532  1.1  christos dbiterator_destroy(dns_dbiterator_t **iteratorp) {
   9533  1.1  christos 	rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
   9534  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
   9535  1.1  christos 	dns_db_t *db = NULL;
   9536  1.1  christos 
   9537  1.1  christos 	if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
   9538  1.1  christos 		RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   9539  1.1  christos 		rbtdbiter->tree_locked = isc_rwlocktype_none;
   9540  1.1  christos 	} else {
   9541  1.1  christos 		INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
   9542  1.1  christos 	}
   9543  1.1  christos 
   9544  1.1  christos 	dereference_iter_node(rbtdbiter);
   9545  1.1  christos 
   9546  1.1  christos 	flush_deletions(rbtdbiter);
   9547  1.1  christos 
   9548  1.1  christos 	dns_db_attach(rbtdbiter->common.db, &db);
   9549  1.1  christos 	dns_db_detach(&rbtdbiter->common.db);
   9550  1.1  christos 
   9551  1.1  christos 	dns_rbtnodechain_reset(&rbtdbiter->chain);
   9552  1.1  christos 	dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
   9553  1.1  christos 	isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
   9554  1.1  christos 	dns_db_detach(&db);
   9555  1.1  christos 
   9556  1.1  christos 	*iteratorp = NULL;
   9557  1.1  christos }
   9558  1.1  christos 
   9559  1.1  christos static isc_result_t
   9560  1.1  christos dbiterator_first(dns_dbiterator_t *iterator) {
   9561  1.1  christos 	isc_result_t result;
   9562  1.1  christos 	rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
   9563  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
   9564  1.1  christos 	dns_name_t *name, *origin;
   9565  1.1  christos 
   9566  1.1  christos 	if (rbtdbiter->result != ISC_R_SUCCESS &&
   9567  1.1  christos 	    rbtdbiter->result != ISC_R_NOTFOUND &&
   9568  1.1  christos 	    rbtdbiter->result != DNS_R_PARTIALMATCH &&
   9569  1.1  christos 	    rbtdbiter->result != ISC_R_NOMORE)
   9570  1.1  christos 	{
   9571  1.1  christos 		return (rbtdbiter->result);
   9572  1.1  christos 	}
   9573  1.1  christos 
   9574  1.1  christos 	if (rbtdbiter->paused) {
   9575  1.1  christos 		resume_iteration(rbtdbiter);
   9576  1.1  christos 	}
   9577  1.1  christos 
   9578  1.1  christos 	dereference_iter_node(rbtdbiter);
   9579  1.1  christos 
   9580  1.1  christos 	name = dns_fixedname_name(&rbtdbiter->name);
   9581  1.1  christos 	origin = dns_fixedname_name(&rbtdbiter->origin);
   9582  1.1  christos 	dns_rbtnodechain_reset(&rbtdbiter->chain);
   9583  1.1  christos 	dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
   9584  1.1  christos 
   9585  1.1  christos 	if (rbtdbiter->nsec3only) {
   9586  1.1  christos 		rbtdbiter->current = &rbtdbiter->nsec3chain;
   9587  1.1  christos 		result = dns_rbtnodechain_first(rbtdbiter->current,
   9588  1.1  christos 						rbtdb->nsec3, name, origin);
   9589  1.1  christos 	} else {
   9590  1.1  christos 		rbtdbiter->current = &rbtdbiter->chain;
   9591  1.1  christos 		result = dns_rbtnodechain_first(rbtdbiter->current, rbtdb->tree,
   9592  1.1  christos 						name, origin);
   9593  1.1  christos 		if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
   9594  1.1  christos 			rbtdbiter->current = &rbtdbiter->nsec3chain;
   9595  1.1  christos 			result = dns_rbtnodechain_first(
   9596  1.1  christos 				rbtdbiter->current, rbtdb->nsec3, name, origin);
   9597  1.1  christos 		}
   9598  1.1  christos 	}
   9599  1.1  christos 	if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
   9600  1.1  christos 		result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
   9601  1.1  christos 						  NULL, &rbtdbiter->node);
   9602  1.1  christos 		if (result == ISC_R_SUCCESS) {
   9603  1.1  christos 			rbtdbiter->new_origin = true;
   9604  1.1  christos 			reference_iter_node(rbtdbiter);
   9605  1.1  christos 		}
   9606  1.1  christos 	} else {
   9607  1.1  christos 		INSIST(result == ISC_R_NOTFOUND);
   9608  1.1  christos 		result = ISC_R_NOMORE; /* The tree is empty. */
   9609  1.1  christos 	}
   9610  1.1  christos 
   9611  1.1  christos 	rbtdbiter->result = result;
   9612  1.1  christos 
   9613  1.1  christos 	if (result != ISC_R_SUCCESS) {
   9614  1.1  christos 		ENSURE(!rbtdbiter->paused);
   9615  1.1  christos 	}
   9616  1.1  christos 
   9617  1.1  christos 	return (result);
   9618  1.1  christos }
   9619  1.1  christos 
   9620  1.1  christos static isc_result_t
   9621  1.1  christos dbiterator_last(dns_dbiterator_t *iterator) {
   9622  1.1  christos 	isc_result_t result;
   9623  1.1  christos 	rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
   9624  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
   9625  1.1  christos 	dns_name_t *name, *origin;
   9626  1.1  christos 
   9627  1.1  christos 	if (rbtdbiter->result != ISC_R_SUCCESS &&
   9628  1.1  christos 	    rbtdbiter->result != ISC_R_NOTFOUND &&
   9629  1.1  christos 	    rbtdbiter->result != DNS_R_PARTIALMATCH &&
   9630  1.1  christos 	    rbtdbiter->result != ISC_R_NOMORE)
   9631  1.1  christos 	{
   9632  1.1  christos 		return (rbtdbiter->result);
   9633  1.1  christos 	}
   9634  1.1  christos 
   9635  1.1  christos 	if (rbtdbiter->paused) {
   9636  1.1  christos 		resume_iteration(rbtdbiter);
   9637  1.1  christos 	}
   9638  1.1  christos 
   9639  1.1  christos 	dereference_iter_node(rbtdbiter);
   9640  1.1  christos 
   9641  1.1  christos 	name = dns_fixedname_name(&rbtdbiter->name);
   9642  1.1  christos 	origin = dns_fixedname_name(&rbtdbiter->origin);
   9643  1.1  christos 	dns_rbtnodechain_reset(&rbtdbiter->chain);
   9644  1.1  christos 	dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
   9645  1.1  christos 
   9646  1.1  christos 	result = ISC_R_NOTFOUND;
   9647  1.1  christos 	if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
   9648  1.1  christos 		rbtdbiter->current = &rbtdbiter->nsec3chain;
   9649  1.1  christos 		result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->nsec3,
   9650  1.1  christos 					       name, origin);
   9651  1.1  christos 	}
   9652  1.1  christos 	if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
   9653  1.1  christos 		rbtdbiter->current = &rbtdbiter->chain;
   9654  1.1  christos 		result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
   9655  1.1  christos 					       name, origin);
   9656  1.1  christos 	}
   9657  1.1  christos 	if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
   9658  1.1  christos 		result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
   9659  1.1  christos 						  NULL, &rbtdbiter->node);
   9660  1.1  christos 		if (result == ISC_R_SUCCESS) {
   9661  1.1  christos 			rbtdbiter->new_origin = true;
   9662  1.1  christos 			reference_iter_node(rbtdbiter);
   9663  1.1  christos 		}
   9664  1.1  christos 	} else {
   9665  1.1  christos 		INSIST(result == ISC_R_NOTFOUND);
   9666  1.1  christos 		result = ISC_R_NOMORE; /* The tree is empty. */
   9667  1.1  christos 	}
   9668  1.1  christos 
   9669  1.1  christos 	rbtdbiter->result = result;
   9670  1.1  christos 
   9671  1.1  christos 	return (result);
   9672  1.1  christos }
   9673  1.1  christos 
   9674  1.1  christos static isc_result_t
   9675  1.1  christos dbiterator_seek(dns_dbiterator_t *iterator, const dns_name_t *name) {
   9676  1.1  christos 	isc_result_t result, tresult;
   9677  1.1  christos 	rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
   9678  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
   9679  1.1  christos 	dns_name_t *iname, *origin;
   9680  1.1  christos 
   9681  1.1  christos 	if (rbtdbiter->result != ISC_R_SUCCESS &&
   9682  1.1  christos 	    rbtdbiter->result != ISC_R_NOTFOUND &&
   9683  1.1  christos 	    rbtdbiter->result != DNS_R_PARTIALMATCH &&
   9684  1.1  christos 	    rbtdbiter->result != ISC_R_NOMORE)
   9685  1.1  christos 	{
   9686  1.1  christos 		return (rbtdbiter->result);
   9687  1.1  christos 	}
   9688  1.1  christos 
   9689  1.1  christos 	if (rbtdbiter->paused) {
   9690  1.1  christos 		resume_iteration(rbtdbiter);
   9691  1.1  christos 	}
   9692  1.1  christos 
   9693  1.1  christos 	dereference_iter_node(rbtdbiter);
   9694  1.1  christos 
   9695  1.1  christos 	iname = dns_fixedname_name(&rbtdbiter->name);
   9696  1.1  christos 	origin = dns_fixedname_name(&rbtdbiter->origin);
   9697  1.1  christos 	dns_rbtnodechain_reset(&rbtdbiter->chain);
   9698  1.1  christos 	dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
   9699  1.1  christos 
   9700  1.1  christos 	if (rbtdbiter->nsec3only) {
   9701  1.1  christos 		rbtdbiter->current = &rbtdbiter->nsec3chain;
   9702  1.1  christos 		result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
   9703  1.1  christos 					  &rbtdbiter->node, rbtdbiter->current,
   9704  1.1  christos 					  DNS_RBTFIND_EMPTYDATA, NULL, NULL);
   9705  1.1  christos 	} else if (rbtdbiter->nonsec3) {
   9706  1.1  christos 		rbtdbiter->current = &rbtdbiter->chain;
   9707  1.1  christos 		result = dns_rbt_findnode(rbtdb->tree, name, NULL,
   9708  1.1  christos 					  &rbtdbiter->node, rbtdbiter->current,
   9709  1.1  christos 					  DNS_RBTFIND_EMPTYDATA, NULL, NULL);
   9710  1.1  christos 	} else {
   9711  1.1  christos 		/*
   9712  1.1  christos 		 * Stay on main chain if not found on either chain.
   9713  1.1  christos 		 */
   9714  1.1  christos 		rbtdbiter->current = &rbtdbiter->chain;
   9715  1.1  christos 		result = dns_rbt_findnode(rbtdb->tree, name, NULL,
   9716  1.1  christos 					  &rbtdbiter->node, rbtdbiter->current,
   9717  1.1  christos 					  DNS_RBTFIND_EMPTYDATA, NULL, NULL);
   9718  1.1  christos 		if (result == DNS_R_PARTIALMATCH) {
   9719  1.1  christos 			dns_rbtnode_t *node = NULL;
   9720  1.1  christos 			tresult = dns_rbt_findnode(
   9721  1.1  christos 				rbtdb->nsec3, name, NULL, &node,
   9722  1.1  christos 				&rbtdbiter->nsec3chain, DNS_RBTFIND_EMPTYDATA,
   9723  1.1  christos 				NULL, NULL);
   9724  1.1  christos 			if (tresult == ISC_R_SUCCESS) {
   9725  1.1  christos 				rbtdbiter->node = node;
   9726  1.1  christos 				rbtdbiter->current = &rbtdbiter->nsec3chain;
   9727  1.1  christos 				result = tresult;
   9728  1.1  christos 			}
   9729  1.1  christos 		}
   9730  1.1  christos 	}
   9731  1.1  christos 
   9732  1.1  christos 	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
   9733  1.1  christos 		tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
   9734  1.1  christos 						   origin, NULL);
   9735  1.1  christos 		if (tresult == ISC_R_SUCCESS) {
   9736  1.1  christos 			rbtdbiter->new_origin = true;
   9737  1.1  christos 			reference_iter_node(rbtdbiter);
   9738  1.1  christos 		} else {
   9739  1.1  christos 			result = tresult;
   9740  1.1  christos 			rbtdbiter->node = NULL;
   9741  1.1  christos 		}
   9742  1.1  christos 	} else {
   9743  1.1  christos 		rbtdbiter->node = NULL;
   9744  1.1  christos 	}
   9745  1.1  christos 
   9746  1.1  christos 	rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ? ISC_R_SUCCESS
   9747  1.1  christos 							   : result;
   9748  1.1  christos 
   9749  1.1  christos 	return (result);
   9750  1.1  christos }
   9751  1.1  christos 
   9752  1.1  christos static isc_result_t
   9753  1.1  christos dbiterator_prev(dns_dbiterator_t *iterator) {
   9754  1.1  christos 	isc_result_t result;
   9755  1.1  christos 	rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
   9756  1.1  christos 	dns_name_t *name, *origin;
   9757  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
   9758  1.1  christos 
   9759  1.1  christos 	REQUIRE(rbtdbiter->node != NULL);
   9760  1.1  christos 
   9761  1.1  christos 	if (rbtdbiter->result != ISC_R_SUCCESS) {
   9762  1.1  christos 		return (rbtdbiter->result);
   9763  1.1  christos 	}
   9764  1.1  christos 
   9765  1.1  christos 	if (rbtdbiter->paused) {
   9766  1.1  christos 		resume_iteration(rbtdbiter);
   9767  1.1  christos 	}
   9768  1.1  christos 
   9769  1.1  christos 	name = dns_fixedname_name(&rbtdbiter->name);
   9770  1.1  christos 	origin = dns_fixedname_name(&rbtdbiter->origin);
   9771  1.1  christos 	result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
   9772  1.1  christos 	if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
   9773  1.1  christos 	    !rbtdbiter->nonsec3 && &rbtdbiter->nsec3chain == rbtdbiter->current)
   9774  1.1  christos 	{
   9775  1.1  christos 		rbtdbiter->current = &rbtdbiter->chain;
   9776  1.1  christos 		dns_rbtnodechain_reset(rbtdbiter->current);
   9777  1.1  christos 		result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
   9778  1.1  christos 					       name, origin);
   9779  1.1  christos 		if (result == ISC_R_NOTFOUND) {
   9780  1.1  christos 			result = ISC_R_NOMORE;
   9781  1.1  christos 		}
   9782  1.1  christos 	}
   9783  1.1  christos 
   9784  1.1  christos 	dereference_iter_node(rbtdbiter);
   9785  1.1  christos 
   9786  1.1  christos 	if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
   9787  1.1  christos 		rbtdbiter->new_origin = (result == DNS_R_NEWORIGIN);
   9788  1.1  christos 		result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
   9789  1.1  christos 						  NULL, &rbtdbiter->node);
   9790  1.1  christos 	}
   9791  1.1  christos 
   9792  1.1  christos 	if (result == ISC_R_SUCCESS) {
   9793  1.1  christos 		reference_iter_node(rbtdbiter);
   9794  1.1  christos 	}
   9795  1.1  christos 
   9796  1.1  christos 	rbtdbiter->result = result;
   9797  1.1  christos 
   9798  1.1  christos 	return (result);
   9799  1.1  christos }
   9800  1.1  christos 
   9801  1.1  christos static isc_result_t
   9802  1.1  christos dbiterator_next(dns_dbiterator_t *iterator) {
   9803  1.1  christos 	isc_result_t result;
   9804  1.1  christos 	rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
   9805  1.1  christos 	dns_name_t *name, *origin;
   9806  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
   9807  1.1  christos 
   9808  1.1  christos 	REQUIRE(rbtdbiter->node != NULL);
   9809  1.1  christos 
   9810  1.1  christos 	if (rbtdbiter->result != ISC_R_SUCCESS) {
   9811  1.1  christos 		return (rbtdbiter->result);
   9812  1.1  christos 	}
   9813  1.1  christos 
   9814  1.1  christos 	if (rbtdbiter->paused) {
   9815  1.1  christos 		resume_iteration(rbtdbiter);
   9816  1.1  christos 	}
   9817  1.1  christos 
   9818  1.1  christos 	name = dns_fixedname_name(&rbtdbiter->name);
   9819  1.1  christos 	origin = dns_fixedname_name(&rbtdbiter->origin);
   9820  1.1  christos 	result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
   9821  1.1  christos 	if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
   9822  1.1  christos 	    !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current)
   9823  1.1  christos 	{
   9824  1.1  christos 		rbtdbiter->current = &rbtdbiter->nsec3chain;
   9825  1.1  christos 		dns_rbtnodechain_reset(rbtdbiter->current);
   9826  1.1  christos 		result = dns_rbtnodechain_first(rbtdbiter->current,
   9827  1.1  christos 						rbtdb->nsec3, name, origin);
   9828  1.1  christos 		if (result == ISC_R_NOTFOUND) {
   9829  1.1  christos 			result = ISC_R_NOMORE;
   9830  1.1  christos 		}
   9831  1.1  christos 	}
   9832  1.1  christos 
   9833  1.1  christos 	dereference_iter_node(rbtdbiter);
   9834  1.1  christos 
   9835  1.1  christos 	if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
   9836  1.1  christos 		rbtdbiter->new_origin = (result == DNS_R_NEWORIGIN);
   9837  1.1  christos 		result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
   9838  1.1  christos 						  NULL, &rbtdbiter->node);
   9839  1.1  christos 	}
   9840  1.1  christos 	if (result == ISC_R_SUCCESS) {
   9841  1.1  christos 		reference_iter_node(rbtdbiter);
   9842  1.1  christos 	}
   9843  1.1  christos 
   9844  1.1  christos 	rbtdbiter->result = result;
   9845  1.1  christos 
   9846  1.1  christos 	return (result);
   9847  1.1  christos }
   9848  1.1  christos 
   9849  1.1  christos static isc_result_t
   9850  1.1  christos dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
   9851  1.1  christos 		   dns_name_t *name) {
   9852  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
   9853  1.1  christos 	rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
   9854  1.1  christos 	dns_rbtnode_t *node = rbtdbiter->node;
   9855  1.1  christos 	isc_result_t result;
   9856  1.1  christos 	dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
   9857  1.1  christos 	dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
   9858  1.1  christos 
   9859  1.1  christos 	REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
   9860  1.1  christos 	REQUIRE(rbtdbiter->node != NULL);
   9861  1.1  christos 
   9862  1.1  christos 	if (rbtdbiter->paused) {
   9863  1.1  christos 		resume_iteration(rbtdbiter);
   9864  1.1  christos 	}
   9865  1.1  christos 
   9866  1.1  christos 	if (name != NULL) {
   9867  1.1  christos 		if (rbtdbiter->common.relative_names) {
   9868  1.1  christos 			origin = NULL;
   9869  1.1  christos 		}
   9870  1.1  christos 		result = dns_name_concatenate(nodename, origin, name, NULL);
   9871  1.1  christos 		if (result != ISC_R_SUCCESS) {
   9872  1.1  christos 			return (result);
   9873  1.1  christos 		}
   9874  1.1  christos 		if (rbtdbiter->common.relative_names && rbtdbiter->new_origin) {
   9875  1.1  christos 			result = DNS_R_NEWORIGIN;
   9876  1.1  christos 		}
   9877  1.1  christos 	} else {
   9878  1.1  christos 		result = ISC_R_SUCCESS;
   9879  1.1  christos 	}
   9880  1.1  christos 
   9881  1.1  christos 	new_reference(rbtdb, node, isc_rwlocktype_none);
   9882  1.1  christos 
   9883  1.1  christos 	*nodep = rbtdbiter->node;
   9884  1.1  christos 
   9885  1.1  christos 	if (iterator->cleaning && result == ISC_R_SUCCESS) {
   9886  1.1  christos 		isc_result_t expire_result;
   9887  1.1  christos 
   9888  1.1  christos 		/*
   9889  1.1  christos 		 * If the deletion array is full, flush it before trying
   9890  1.1  christos 		 * to expire the current node.  The current node can't
   9891  1.1  christos 		 * fully deleted while the iteration cursor is still on it.
   9892  1.1  christos 		 */
   9893  1.1  christos 		if (rbtdbiter->delcnt == DELETION_BATCH_MAX) {
   9894  1.1  christos 			flush_deletions(rbtdbiter);
   9895  1.1  christos 		}
   9896  1.1  christos 
   9897  1.1  christos 		expire_result = expirenode(iterator->db, *nodep, 0);
   9898  1.1  christos 
   9899  1.1  christos 		/*
   9900  1.1  christos 		 * expirenode() currently always returns success.
   9901  1.1  christos 		 */
   9902  1.1  christos 		if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
   9903  1.1  christos 			rbtdbiter->deletions[rbtdbiter->delcnt++] = node;
   9904  1.1  christos 			isc_refcount_increment(&node->references);
   9905  1.1  christos 		}
   9906  1.1  christos 	}
   9907  1.1  christos 
   9908  1.1  christos 	return (result);
   9909  1.1  christos }
   9910  1.1  christos 
   9911  1.1  christos static isc_result_t
   9912  1.1  christos dbiterator_pause(dns_dbiterator_t *iterator) {
   9913  1.1  christos 	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
   9914  1.1  christos 	rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
   9915  1.1  christos 
   9916  1.1  christos 	if (rbtdbiter->result != ISC_R_SUCCESS &&
   9917  1.1  christos 	    rbtdbiter->result != ISC_R_NOTFOUND &&
   9918  1.1  christos 	    rbtdbiter->result != DNS_R_PARTIALMATCH &&
   9919  1.1  christos 	    rbtdbiter->result != ISC_R_NOMORE)
   9920  1.1  christos 	{
   9921  1.1  christos 		return (rbtdbiter->result);
   9922  1.1  christos 	}
   9923  1.1  christos 
   9924  1.1  christos 	if (rbtdbiter->paused) {
   9925  1.1  christos 		return (ISC_R_SUCCESS);
   9926  1.1  christos 	}
   9927  1.1  christos 
   9928  1.1  christos 	rbtdbiter->paused = true;
   9929  1.1  christos 
   9930  1.1  christos 	if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
   9931  1.1  christos 		INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
   9932  1.1  christos 		RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
   9933  1.1  christos 		rbtdbiter->tree_locked = isc_rwlocktype_none;
   9934  1.1  christos 	}
   9935  1.1  christos 
   9936  1.1  christos 	flush_deletions(rbtdbiter);
   9937  1.1  christos 
   9938  1.1  christos 	return (ISC_R_SUCCESS);
   9939  1.1  christos }
   9940  1.1  christos 
   9941  1.1  christos static isc_result_t
   9942  1.1  christos dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
   9943  1.1  christos 	rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
   9944  1.1  christos 	dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
   9945  1.1  christos 
   9946  1.1  christos 	if (rbtdbiter->result != ISC_R_SUCCESS) {
   9947  1.1  christos 		return (rbtdbiter->result);
   9948  1.1  christos 	}
   9949  1.1  christos 
   9950  1.1  christos 	dns_name_copynf(origin, name);
   9951  1.1  christos 	return (ISC_R_SUCCESS);
   9952  1.1  christos }
   9953  1.1  christos 
   9954  1.1  christos static void
   9955  1.1  christos setownercase(rdatasetheader_t *header, const dns_name_t *name) {
   9956  1.1  christos 	unsigned int i;
   9957  1.1  christos 	bool fully_lower;
   9958  1.1  christos 
   9959  1.1  christos 	/*
   9960  1.1  christos 	 * We do not need to worry about label lengths as they are all
   9961  1.1  christos 	 * less than or equal to 63.
   9962  1.1  christos 	 */
   9963  1.1  christos 	memset(header->upper, 0, sizeof(header->upper));
   9964  1.1  christos 	fully_lower = true;
   9965  1.1  christos 	for (i = 0; i < name->length; i++) {
   9966  1.1  christos 		if (isupper(name->ndata[i])) {
   9967  1.1  christos 			header->upper[i / 8] |= 1 << (i % 8);
   9968  1.1  christos 			fully_lower = false;
   9969  1.1  christos 		}
   9970  1.1  christos 	}
   9971  1.1  christos 	RDATASET_ATTR_SET(header, RDATASET_ATTR_CASESET);
   9972  1.1  christos 	if (ISC_LIKELY(fully_lower)) {
   9973  1.1  christos 		RDATASET_ATTR_SET(header, RDATASET_ATTR_CASEFULLYLOWER);
   9974  1.1  christos 	}
   9975  1.1  christos }
   9976  1.1  christos 
   9977  1.1  christos static void
   9978  1.1  christos rdataset_setownercase(dns_rdataset_t *rdataset, const dns_name_t *name) {
   9979  1.1  christos 	dns_rbtdb_t *rbtdb = rdataset->private1;
   9980  1.1  christos 	dns_rbtnode_t *rbtnode = rdataset->private2;
   9981  1.1  christos 	unsigned char *raw = rdataset->private3; /* RDATASLAB */
   9982  1.1  christos 	rdatasetheader_t *header;
   9983  1.1  christos 
   9984  1.1  christos 	header = (struct rdatasetheader *)(raw - sizeof(*header));
   9985  1.1  christos 
   9986  1.1  christos 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   9987  1.1  christos 		  isc_rwlocktype_write);
   9988  1.1  christos 	setownercase(header, name);
   9989  1.1  christos 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   9990  1.1  christos 		    isc_rwlocktype_write);
   9991  1.1  christos }
   9992  1.1  christos 
   9993  1.1  christos static void
   9994  1.1  christos rdataset_getownercase(const dns_rdataset_t *rdataset, dns_name_t *name) {
   9995  1.1  christos 	dns_rbtdb_t *rbtdb = rdataset->private1;
   9996  1.1  christos 	dns_rbtnode_t *rbtnode = rdataset->private2;
   9997  1.1  christos 	unsigned char *raw = rdataset->private3; /* RDATASLAB */
   9998  1.1  christos 	rdatasetheader_t *header = NULL;
   9999  1.1  christos 	uint8_t mask = (1 << 7);
   10000  1.1  christos 	uint8_t bits = 0;
   10001  1.1  christos 
   10002  1.1  christos 	header = (struct rdatasetheader *)(raw - sizeof(*header));
   10003  1.1  christos 
   10004  1.1  christos 	NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   10005  1.1  christos 		  isc_rwlocktype_read);
   10006  1.1  christos 
   10007  1.1  christos 	if (!CASESET(header)) {
   10008  1.1  christos 		goto unlock;
   10009  1.1  christos 	}
   10010  1.1  christos 
   10011  1.1  christos 	if (ISC_LIKELY(CASEFULLYLOWER(header))) {
   10012  1.1  christos 		for (size_t i = 0; i < name->length; i++) {
   10013  1.1  christos 			name->ndata[i] = tolower(name->ndata[i]);
   10014  1.1  christos 		}
   10015  1.1  christos 	} else {
   10016  1.1  christos 		for (size_t i = 0; i < name->length; i++) {
   10017  1.1  christos 			if (mask == (1 << 7)) {
   10018  1.1  christos 				bits = header->upper[i / 8];
   10019  1.1  christos 				mask = 1;
   10020  1.1  christos 			} else {
   10021  1.1  christos 				mask <<= 1;
   10022  1.1  christos 			}
   10023  1.1  christos 
   10024  1.1  christos 			name->ndata[i] = ((bits & mask) != 0)
   10025  1.1  christos 						 ? toupper(name->ndata[i])
   10026  1.1  christos 						 : tolower(name->ndata[i]);
   10027  1.1  christos 		}
   10028  1.1  christos 	}
   10029  1.1  christos 
   10030  1.1  christos unlock:
   10031  1.1  christos 	NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
   10032  1.1  christos 		    isc_rwlocktype_read);
   10033  1.1  christos }
   10034  1.1  christos 
   10035  1.1  christos struct rbtdb_glue {
   10036  1.1  christos 	struct rbtdb_glue *next;
   10037  1.1  christos 	dns_fixedname_t fixedname;
   10038  1.1  christos 	dns_rdataset_t rdataset_a;
   10039  1.1  christos 	dns_rdataset_t sigrdataset_a;
   10040  1.1  christos 	dns_rdataset_t rdataset_aaaa;
   10041  1.1  christos 	dns_rdataset_t sigrdataset_aaaa;
   10042  1.1  christos };
   10043  1.1  christos 
   10044  1.1  christos typedef struct {
   10045  1.1  christos 	rbtdb_glue_t *glue_list;
   10046  1.1  christos 	dns_rbtdb_t *rbtdb;
   10047  1.1  christos 	rbtdb_version_t *rbtversion;
   10048  1.1  christos } rbtdb_glue_additionaldata_ctx_t;
   10049  1.1  christos 
   10050  1.1  christos static void
   10051  1.1  christos free_gluelist(rbtdb_glue_t *glue_list, dns_rbtdb_t *rbtdb) {
   10052  1.1  christos 	rbtdb_glue_t *cur, *cur_next;
   10053  1.1  christos 
   10054  1.1  christos 	if (glue_list == (void *)-1) {
   10055  1.1  christos 		return;
   10056  1.1  christos 	}
   10057  1.1  christos 
   10058  1.1  christos 	cur = glue_list;
   10059  1.1  christos 	while (cur != NULL) {
   10060  1.1  christos 		cur_next = cur->next;
   10061  1.1  christos 
   10062  1.1  christos 		if (dns_rdataset_isassociated(&cur->rdataset_a)) {
   10063  1.1  christos 			dns_rdataset_disassociate(&cur->rdataset_a);
   10064  1.1  christos 		}
   10065  1.1  christos 		if (dns_rdataset_isassociated(&cur->sigrdataset_a)) {
   10066  1.1  christos 			dns_rdataset_disassociate(&cur->sigrdataset_a);
   10067  1.1  christos 		}
   10068  1.1  christos 
   10069  1.1  christos 		if (dns_rdataset_isassociated(&cur->rdataset_aaaa)) {
   10070  1.1  christos 			dns_rdataset_disassociate(&cur->rdataset_aaaa);
   10071  1.1  christos 		}
   10072  1.1  christos 		if (dns_rdataset_isassociated(&cur->sigrdataset_aaaa)) {
   10073  1.1  christos 			dns_rdataset_disassociate(&cur->sigrdataset_aaaa);
   10074  1.1  christos 		}
   10075  1.1  christos 
   10076  1.1  christos 		dns_rdataset_invalidate(&cur->rdataset_a);
   10077  1.1  christos 		dns_rdataset_invalidate(&cur->sigrdataset_a);
   10078  1.1  christos 		dns_rdataset_invalidate(&cur->rdataset_aaaa);
   10079  1.1  christos 		dns_rdataset_invalidate(&cur->sigrdataset_aaaa);
   10080  1.1  christos 
   10081  1.1  christos 		isc_mem_put(rbtdb->common.mctx, cur, sizeof(*cur));
   10082  1.1  christos 		cur = cur_next;
   10083  1.1  christos 	}
   10084  1.1  christos }
   10085  1.1  christos 
   10086  1.1  christos static void
   10087  1.1  christos free_gluetable(rbtdb_version_t *version) {
   10088  1.1  christos 	dns_rbtdb_t *rbtdb;
   10089  1.1  christos 	size_t size, i;
   10090  1.1  christos 
   10091  1.1  christos 	RWLOCK(&version->glue_rwlock, isc_rwlocktype_write);
   10092  1.1  christos 
   10093  1.1  christos 	rbtdb = version->rbtdb;
   10094  1.1  christos 
   10095  1.1  christos 	for (i = 0; i < HASHSIZE(version->glue_table_bits); i++) {
   10096  1.1  christos 		rbtdb_glue_table_node_t *cur, *cur_next;
   10097  1.1  christos 
   10098  1.1  christos 		cur = version->glue_table[i];
   10099  1.1  christos 		while (cur != NULL) {
   10100  1.1  christos 			cur_next = cur->next;
   10101  1.1  christos 			/* isc_refcount_decrement(&cur->node->references); */
   10102  1.1  christos 			cur->node = NULL;
   10103  1.1  christos 			free_gluelist(cur->glue_list, rbtdb);
   10104  1.1  christos 			cur->glue_list = NULL;
   10105  1.1  christos 			isc_mem_put(rbtdb->common.mctx, cur, sizeof(*cur));
   10106  1.1  christos 			cur = cur_next;
   10107  1.1  christos 		}
   10108  1.1  christos 		version->glue_table[i] = NULL;
   10109  1.1  christos 	}
   10110  1.1  christos 
   10111  1.1  christos 	size = HASHSIZE(version->glue_table_bits) *
   10112  1.1  christos 	       sizeof(*version->glue_table);
   10113  1.1  christos 	isc_mem_put(rbtdb->common.mctx, version->glue_table, size);
   10114  1.1  christos 
   10115  1.1  christos 	RWUNLOCK(&version->glue_rwlock, isc_rwlocktype_write);
   10116  1.1  christos }
   10117  1.1  christos 
   10118  1.1  christos static uint32_t
   10119  1.1  christos rehash_bits(rbtdb_version_t *version, size_t newcount) {
   10120  1.1  christos 	uint32_t oldbits = version->glue_table_bits;
   10121  1.1  christos 	uint32_t newbits = oldbits;
   10122  1.1  christos 
   10123  1.1  christos 	while (newcount >= HASHSIZE(newbits) &&
   10124  1.1  christos 	       newbits <= RBTDB_GLUE_TABLE_MAX_BITS)
   10125  1.1  christos 	{
   10126  1.1  christos 		newbits += 1;
   10127  1.1  christos 	}
   10128  1.1  christos 
   10129  1.1  christos 	return (newbits);
   10130  1.1  christos }
   10131  1.1  christos 
   10132  1.1  christos /*%
   10133  1.1  christos  * Write lock (version->glue_rwlock) must be held.
   10134  1.1  christos  */
   10135  1.1  christos static void
   10136  1.1  christos rehash_gluetable(rbtdb_version_t *version) {
   10137  1.1  christos 	uint32_t oldbits, newbits;
   10138  1.1  christos 	size_t newsize, oldcount, i;
   10139  1.1  christos 	rbtdb_glue_table_node_t **oldtable;
   10140  1.1  christos 
   10141  1.1  christos 	oldbits = version->glue_table_bits;
   10142  1.1  christos 	oldcount = HASHSIZE(oldbits);
   10143  1.1  christos 	oldtable = version->glue_table;
   10144  1.1  christos 
   10145  1.1  christos 	newbits = rehash_bits(version, version->glue_table_nodecount);
   10146  1.1  christos 	newsize = HASHSIZE(newbits) * sizeof(version->glue_table[0]);
   10147  1.1  christos 
   10148  1.1  christos 	version->glue_table = isc_mem_get(version->rbtdb->common.mctx, newsize);
   10149  1.1  christos 	version->glue_table_bits = newbits;
   10150  1.1  christos 	memset(version->glue_table, 0, newsize);
   10151  1.1  christos 
   10152  1.1  christos 	for (i = 0; i < oldcount; i++) {
   10153  1.1  christos 		rbtdb_glue_table_node_t *gluenode;
   10154  1.1  christos 		rbtdb_glue_table_node_t *nextgluenode;
   10155  1.1  christos 		for (gluenode = oldtable[i]; gluenode != NULL;
   10156  1.1  christos 		     gluenode = nextgluenode)
   10157  1.1  christos 		{
   10158  1.1  christos 			uint32_t hash = isc_hash32(
   10159  1.1  christos 				&gluenode->node, sizeof(gluenode->node), true);
   10160  1.1  christos 			uint32_t idx = hash_32(hash, newbits);
   10161  1.1  christos 			nextgluenode = gluenode->next;
   10162  1.1  christos 			gluenode->next = version->glue_table[idx];
   10163  1.1  christos 			version->glue_table[idx] = gluenode;
   10164  1.1  christos 		}
   10165  1.1  christos 	}
   10166  1.1  christos 
   10167  1.1  christos 	isc_mem_put(version->rbtdb->common.mctx, oldtable,
   10168  1.1  christos 		    oldcount * sizeof(*version->glue_table));
   10169  1.1  christos 
   10170  1.1  christos 	isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_ZONE,
   10171  1.1  christos 		      ISC_LOG_DEBUG(3),
   10172  1.1  christos 		      "rehash_gluetable(): "
   10173  1.1  christos 		      "resized glue table from %zu to "
   10174  1.1  christos 		      "%zu",
   10175  1.1  christos 		      oldcount, newsize / sizeof(version->glue_table[0]));
   10176  1.1  christos }
   10177  1.1  christos 
   10178  1.1  christos static void
   10179  1.1  christos maybe_rehash_gluetable(rbtdb_version_t *version) {
   10180  1.1  christos 	size_t overcommit = HASHSIZE(version->glue_table_bits) *
   10181  1.1  christos 			    RBTDB_GLUE_TABLE_OVERCOMMIT;
   10182  1.1  christos 	if (ISC_LIKELY(version->glue_table_nodecount < overcommit)) {
   10183  1.1  christos 		return;
   10184  1.1  christos 	}
   10185  1.1  christos 
   10186  1.1  christos 	rehash_gluetable(version);
   10187  1.1  christos }
   10188  1.1  christos 
   10189  1.1  christos static isc_result_t
   10190  1.1  christos glue_nsdname_cb(void *arg, const dns_name_t *name, dns_rdatatype_t qtype) {
   10191  1.1  christos 	rbtdb_glue_additionaldata_ctx_t *ctx;
   10192  1.1  christos 	isc_result_t result;
   10193  1.1  christos 	dns_fixedname_t fixedname_a;
   10194  1.1  christos 	dns_name_t *name_a = NULL;
   10195  1.1  christos 	dns_rdataset_t rdataset_a, sigrdataset_a;
   10196  1.1  christos 	dns_rbtnode_t *node_a = NULL;
   10197  1.1  christos 	dns_fixedname_t fixedname_aaaa;
   10198  1.1  christos 	dns_name_t *name_aaaa = NULL;
   10199  1.1  christos 	dns_rdataset_t rdataset_aaaa, sigrdataset_aaaa;
   10200  1.1  christos 	dns_rbtnode_t *node_aaaa = NULL;
   10201  1.1  christos 	rbtdb_glue_t *glue = NULL;
   10202  1.1  christos 	dns_name_t *gluename = NULL;
   10203  1.1  christos 
   10204  1.1  christos 	/*
   10205  1.1  christos 	 * NS records want addresses in additional records.
   10206  1.1  christos 	 */
   10207  1.1  christos 	INSIST(qtype == dns_rdatatype_a);
   10208  1.1  christos 
   10209  1.1  christos 	ctx = (rbtdb_glue_additionaldata_ctx_t *)arg;
   10210  1.1  christos 
   10211  1.1  christos 	name_a = dns_fixedname_initname(&fixedname_a);
   10212  1.1  christos 	dns_rdataset_init(&rdataset_a);
   10213  1.1  christos 	dns_rdataset_init(&sigrdataset_a);
   10214  1.1  christos 
   10215  1.1  christos 	name_aaaa = dns_fixedname_initname(&fixedname_aaaa);
   10216  1.1  christos 	dns_rdataset_init(&rdataset_aaaa);
   10217  1.1  christos 	dns_rdataset_init(&sigrdataset_aaaa);
   10218  1.1  christos 
   10219  1.1  christos 	result = zone_find((dns_db_t *)ctx->rbtdb, name, ctx->rbtversion,
   10220  1.1  christos 			   dns_rdatatype_a, DNS_DBFIND_GLUEOK, 0,
   10221  1.1  christos 			   (dns_dbnode_t **)&node_a, name_a, &rdataset_a,
   10222  1.1  christos 			   &sigrdataset_a);
   10223  1.1  christos 	if (result == DNS_R_GLUE) {
   10224  1.1  christos 		glue = isc_mem_get(ctx->rbtdb->common.mctx, sizeof(*glue));
   10225  1.1  christos 
   10226  1.1  christos 		gluename = dns_fixedname_initname(&glue->fixedname);
   10227  1.1  christos 		dns_name_copynf(name_a, gluename);
   10228  1.1  christos 
   10229  1.1  christos 		dns_rdataset_init(&glue->rdataset_a);
   10230  1.1  christos 		dns_rdataset_init(&glue->sigrdataset_a);
   10231  1.1  christos 		dns_rdataset_init(&glue->rdataset_aaaa);
   10232  1.1  christos 		dns_rdataset_init(&glue->sigrdataset_aaaa);
   10233  1.1  christos 
   10234  1.1  christos 		dns_rdataset_clone(&rdataset_a, &glue->rdataset_a);
   10235  1.1  christos 		if (dns_rdataset_isassociated(&sigrdataset_a)) {
   10236  1.1  christos 			dns_rdataset_clone(&sigrdataset_a,
   10237  1.1  christos 					   &glue->sigrdataset_a);
   10238  1.1  christos 		}
   10239  1.1  christos 	}
   10240  1.1  christos 
   10241  1.1  christos 	result = zone_find((dns_db_t *)ctx->rbtdb, name, ctx->rbtversion,
   10242  1.1  christos 			   dns_rdatatype_aaaa, DNS_DBFIND_GLUEOK, 0,
   10243  1.1  christos 			   (dns_dbnode_t **)&node_aaaa, name_aaaa,
   10244  1.1  christos 			   &rdataset_aaaa, &sigrdataset_aaaa);
   10245  1.1  christos 	if (result == DNS_R_GLUE) {
   10246  1.1  christos 		if (glue == NULL) {
   10247  1.1  christos 			glue = isc_mem_get(ctx->rbtdb->common.mctx,
   10248  1.1  christos 					   sizeof(*glue));
   10249  1.1  christos 
   10250  1.1  christos 			gluename = dns_fixedname_initname(&glue->fixedname);
   10251  1.1  christos 			dns_name_copynf(name_aaaa, gluename);
   10252  1.1  christos 
   10253  1.1  christos 			dns_rdataset_init(&glue->rdataset_a);
   10254  1.1  christos 			dns_rdataset_init(&glue->sigrdataset_a);
   10255  1.1  christos 			dns_rdataset_init(&glue->rdataset_aaaa);
   10256  1.1  christos 			dns_rdataset_init(&glue->sigrdataset_aaaa);
   10257  1.1  christos 		} else {
   10258  1.1  christos 			INSIST(node_a == node_aaaa);
   10259  1.1  christos 			INSIST(dns_name_equal(name_a, name_aaaa));
   10260  1.1  christos 		}
   10261  1.1  christos 
   10262  1.1  christos 		dns_rdataset_clone(&rdataset_aaaa, &glue->rdataset_aaaa);
   10263  1.1  christos 		if (dns_rdataset_isassociated(&sigrdataset_aaaa)) {
   10264  1.1  christos 			dns_rdataset_clone(&sigrdataset_aaaa,
   10265  1.1  christos 					   &glue->sigrdataset_aaaa);
   10266  1.1  christos 		}
   10267  1.1  christos 	}
   10268  1.1  christos 
   10269  1.1  christos 	if (glue != NULL) {
   10270  1.1  christos 		glue->next = ctx->glue_list;
   10271  1.1  christos 		ctx->glue_list = glue;
   10272  1.1  christos 	}
   10273  1.1  christos 
   10274  1.1  christos 	result = ISC_R_SUCCESS;
   10275  1.1  christos 
   10276  1.1  christos 	if (dns_rdataset_isassociated(&rdataset_a)) {
   10277  1.1  christos 		rdataset_disassociate(&rdataset_a);
   10278  1.1  christos 	}
   10279  1.1  christos 	if (dns_rdataset_isassociated(&sigrdataset_a)) {
   10280  1.1  christos 		rdataset_disassociate(&sigrdataset_a);
   10281  1.1  christos 	}
   10282  1.1  christos 
   10283  1.1  christos 	if (dns_rdataset_isassociated(&rdataset_aaaa)) {
   10284  1.1  christos 		rdataset_disassociate(&rdataset_aaaa);
   10285  1.1  christos 	}
   10286  1.1  christos 	if (dns_rdataset_isassociated(&sigrdataset_aaaa)) {
   10287  1.1  christos 		rdataset_disassociate(&sigrdataset_aaaa);
   10288  1.1  christos 	}
   10289  1.1  christos 
   10290  1.1  christos 	if (node_a != NULL) {
   10291  1.1  christos 		detachnode((dns_db_t *)ctx->rbtdb, (dns_dbnode_t *)&node_a);
   10292  1.1  christos 	}
   10293  1.1  christos 	if (node_aaaa != NULL) {
   10294  1.1  christos 		detachnode((dns_db_t *)ctx->rbtdb, (dns_dbnode_t *)&node_aaaa);
   10295  1.1  christos 	}
   10296  1.1  christos 
   10297  1.1  christos 	return (result);
   10298  1.1  christos }
   10299  1.1  christos 
   10300  1.1  christos static isc_result_t
   10301  1.1  christos rdataset_addglue(dns_rdataset_t *rdataset, dns_dbversion_t *version,
   10302  1.1  christos 		 dns_message_t *msg) {
   10303  1.1  christos 	dns_rbtdb_t *rbtdb = rdataset->private1;
   10304  1.1  christos 	dns_rbtnode_t *node = rdataset->private2;
   10305  1.1  christos 	rbtdb_version_t *rbtversion = version;
   10306  1.1  christos 	uint32_t idx;
   10307  1.1  christos 	rbtdb_glue_table_node_t *cur;
   10308  1.1  christos 	bool found = false;
   10309  1.1  christos 	bool restarted = false;
   10310  1.1  christos 	rbtdb_glue_t *ge;
   10311  1.1  christos 	rbtdb_glue_additionaldata_ctx_t ctx;
   10312  1.1  christos 	isc_result_t result;
   10313  1.1  christos 	uint64_t hash;
   10314  1.1  christos 
   10315  1.1  christos 	REQUIRE(rdataset->type == dns_rdatatype_ns);
   10316  1.1  christos 	REQUIRE(rbtdb == rbtversion->rbtdb);
   10317  1.1  christos 	REQUIRE(!IS_CACHE(rbtdb) && !IS_STUB(rbtdb));
   10318  1.1  christos 
   10319  1.1  christos 	/*
   10320  1.1  christos 	 * The glue table cache that forms a part of the DB version
   10321  1.1  christos 	 * structure is not explicitly bounded and there's no cache
   10322  1.1  christos 	 * cleaning. The zone data size itself is an implicit bound.
   10323  1.1  christos 	 *
   10324  1.1  christos 	 * The key into the glue hashtable is the node pointer. This is
   10325  1.1  christos 	 * because the glue hashtable is a property of the DB version,
   10326  1.1  christos 	 * and the glue is keyed for the ownername/NS tuple. We don't
   10327  1.1  christos 	 * bother with using an expensive dns_name_t comparison here as
   10328  1.1  christos 	 * the node pointer is a fixed value that won't change for a DB
   10329  1.1  christos 	 * version and can be compared directly.
   10330  1.1  christos 	 */
   10331  1.1  christos 	hash = isc_hash_function(&node, sizeof(node), true);
   10332  1.1  christos 
   10333  1.1  christos restart:
   10334  1.1  christos 	/*
   10335  1.1  christos 	 * First, check if we have the additional entries already cached
   10336  1.1  christos 	 * in the glue table.
   10337  1.1  christos 	 */
   10338  1.1  christos 	RWLOCK(&rbtversion->glue_rwlock, isc_rwlocktype_read);
   10339  1.1  christos 
   10340  1.1  christos 	idx = hash_32(hash, rbtversion->glue_table_bits);
   10341  1.1  christos 
   10342  1.1  christos 	for (cur = rbtversion->glue_table[idx]; cur != NULL; cur = cur->next) {
   10343  1.1  christos 		if (cur->node == node) {
   10344  1.1  christos 			break;
   10345  1.1  christos 		}
   10346  1.1  christos 	}
   10347  1.1  christos 
   10348  1.1  christos 	if (cur == NULL) {
   10349  1.1  christos 		goto no_glue;
   10350  1.1  christos 	}
   10351  1.1  christos 	/*
   10352  1.1  christos 	 * We found a cached result. Add it to the message and
   10353  1.1  christos 	 * return.
   10354  1.1  christos 	 */
   10355  1.1  christos 	found = true;
   10356  1.1  christos 	ge = cur->glue_list;
   10357  1.1  christos 
   10358  1.1  christos 	/*
   10359  1.1  christos 	 * (void *) -1 is a special value that means no glue is
   10360  1.1  christos 	 * present in the zone.
   10361  1.1  christos 	 */
   10362  1.1  christos 	if (ge == (void *)-1) {
   10363  1.1  christos 		if (!restarted && (rbtdb->gluecachestats != NULL)) {
   10364  1.1  christos 			isc_stats_increment(
   10365  1.1  christos 				rbtdb->gluecachestats,
   10366  1.1  christos 				dns_gluecachestatscounter_hits_absent);
   10367  1.1  christos 		}
   10368  1.1  christos 		goto no_glue;
   10369  1.1  christos 	} else {
   10370  1.1  christos 		if (!restarted && (rbtdb->gluecachestats != NULL)) {
   10371  1.1  christos 			isc_stats_increment(
   10372  1.1  christos 				rbtdb->gluecachestats,
   10373  1.1  christos 				dns_gluecachestatscounter_hits_present);
   10374  1.1  christos 		}
   10375  1.1  christos 	}
   10376  1.1  christos 
   10377  1.1  christos 	for (; ge != NULL; ge = ge->next) {
   10378  1.1  christos 		dns_name_t *name = NULL;
   10379  1.1  christos 		dns_rdataset_t *rdataset_a = NULL;
   10380  1.1  christos 		dns_rdataset_t *sigrdataset_a = NULL;
   10381  1.1  christos 		dns_rdataset_t *rdataset_aaaa = NULL;
   10382  1.1  christos 		dns_rdataset_t *sigrdataset_aaaa = NULL;
   10383  1.1  christos 		dns_name_t *gluename = dns_fixedname_name(&ge->fixedname);
   10384  1.1  christos 
   10385  1.1  christos 		result = dns_message_gettempname(msg, &name);
   10386  1.1  christos 		if (ISC_UNLIKELY(result != ISC_R_SUCCESS)) {
   10387  1.1  christos 			goto no_glue;
   10388  1.1  christos 		}
   10389  1.1  christos 
   10390  1.1  christos 		dns_name_copynf(gluename, name);
   10391  1.1  christos 
   10392  1.1  christos 		if (dns_rdataset_isassociated(&ge->rdataset_a)) {
   10393  1.1  christos 			result = dns_message_gettemprdataset(msg, &rdataset_a);
   10394  1.1  christos 			if (ISC_UNLIKELY(result != ISC_R_SUCCESS)) {
   10395  1.1  christos 				dns_message_puttempname(msg, &name);
   10396  1.1  christos 				goto no_glue;
   10397  1.1  christos 			}
   10398  1.1  christos 		}
   10399  1.1  christos 
   10400  1.1  christos 		if (dns_rdataset_isassociated(&ge->sigrdataset_a)) {
   10401  1.1  christos 			result = dns_message_gettemprdataset(msg,
   10402  1.1  christos 							     &sigrdataset_a);
   10403  1.1  christos 			if (ISC_UNLIKELY(result != ISC_R_SUCCESS)) {
   10404  1.1  christos 				if (rdataset_a != NULL) {
   10405  1.1  christos 					dns_message_puttemprdataset(
   10406  1.1  christos 						msg, &rdataset_a);
   10407  1.1  christos 				}
   10408  1.1  christos 				dns_message_puttempname(msg, &name);
   10409  1.1  christos 				goto no_glue;
   10410  1.1  christos 			}
   10411  1.1  christos 		}
   10412  1.1  christos 
   10413  1.1  christos 		if (dns_rdataset_isassociated(&ge->rdataset_aaaa)) {
   10414  1.1  christos 			result = dns_message_gettemprdataset(msg,
   10415  1.1  christos 							     &rdataset_aaaa);
   10416  1.1  christos 			if (ISC_UNLIKELY(result != ISC_R_SUCCESS)) {
   10417  1.1  christos 				dns_message_puttempname(msg, &name);
   10418  1.1  christos 				if (rdataset_a != NULL) {
   10419  1.1  christos 					dns_message_puttemprdataset(
   10420  1.1  christos 						msg, &rdataset_a);
   10421  1.1  christos 				}
   10422  1.1  christos 				if (sigrdataset_a != NULL) {
   10423  1.1  christos 					dns_message_puttemprdataset(
   10424  1.1  christos 						msg, &sigrdataset_a);
   10425  1.1  christos 				}
   10426  1.1  christos 				goto no_glue;
   10427  1.1  christos 			}
   10428  1.1  christos 		}
   10429  1.1  christos 
   10430  1.1  christos 		if (dns_rdataset_isassociated(&ge->sigrdataset_aaaa)) {
   10431  1.1  christos 			result = dns_message_gettemprdataset(msg,
   10432  1.1  christos 							     &sigrdataset_aaaa);
   10433  1.1  christos 			if (ISC_UNLIKELY(result != ISC_R_SUCCESS)) {
   10434  1.1  christos 				dns_message_puttempname(msg, &name);
   10435  1.1  christos 				if (rdataset_a != NULL) {
   10436  1.1  christos 					dns_message_puttemprdataset(
   10437  1.1  christos 						msg, &rdataset_a);
   10438  1.1  christos 				}
   10439  1.1  christos 				if (sigrdataset_a != NULL) {
   10440  1.1  christos 					dns_message_puttemprdataset(
   10441  1.1  christos 						msg, &sigrdataset_a);
   10442  1.1  christos 				}
   10443  1.1  christos 				if (rdataset_aaaa != NULL) {
   10444  1.1  christos 					dns_message_puttemprdataset(
   10445  1.1  christos 						msg, &rdataset_aaaa);
   10446  1.1  christos 				}
   10447  1.1  christos 				goto no_glue;
   10448  1.1  christos 			}
   10449  1.1  christos 		}
   10450  1.1  christos 
   10451  1.1  christos 		if (ISC_LIKELY(rdataset_a != NULL)) {
   10452  1.1  christos 			dns_rdataset_clone(&ge->rdataset_a, rdataset_a);
   10453  1.1  christos 			ISC_LIST_APPEND(name->list, rdataset_a, link);
   10454  1.1  christos 		}
   10455  1.1  christos 
   10456  1.1  christos 		if (sigrdataset_a != NULL) {
   10457  1.1  christos 			dns_rdataset_clone(&ge->sigrdataset_a, sigrdataset_a);
   10458  1.1  christos 			ISC_LIST_APPEND(name->list, sigrdataset_a, link);
   10459  1.1  christos 		}
   10460  1.1  christos 
   10461  1.1  christos 		if (rdataset_aaaa != NULL) {
   10462  1.1  christos 			dns_rdataset_clone(&ge->rdataset_aaaa, rdataset_aaaa);
   10463  1.1  christos 			ISC_LIST_APPEND(name->list, rdataset_aaaa, link);
   10464  1.1  christos 		}
   10465  1.1  christos 		if (sigrdataset_aaaa != NULL) {
   10466  1.1  christos 			dns_rdataset_clone(&ge->sigrdataset_aaaa,
   10467  1.1  christos 					   sigrdataset_aaaa);
   10468  1.1  christos 			ISC_LIST_APPEND(name->list, sigrdataset_aaaa, link);
   10469  1.1  christos 		}
   10470  1.1  christos 
   10471  1.1  christos 		dns_message_addname(msg, name, DNS_SECTION_ADDITIONAL);
   10472  1.1  christos 	}
   10473  1.1  christos 
   10474  1.1  christos no_glue:
   10475  1.1  christos 	RWUNLOCK(&rbtversion->glue_rwlock, isc_rwlocktype_read);
   10476  1.1  christos 
   10477  1.1  christos 	if (found) {
   10478  1.1  christos 		return (ISC_R_SUCCESS);
   10479  1.1  christos 	}
   10480  1.1  christos 
   10481  1.1  christos 	if (restarted) {
   10482  1.1  christos 		return (ISC_R_FAILURE);
   10483  1.1  christos 	}
   10484  1.1  christos 
   10485  1.1  christos 	/*
   10486  1.1  christos 	 * No cached glue was found in the table. Cache it and restart
   10487  1.1  christos 	 * this function.
   10488  1.1  christos 	 *
   10489  1.1  christos 	 * Due to the gap between the read lock and the write lock, it's
   10490  1.1  christos 	 * possible that we may cache a duplicate glue table entry, but
   10491  1.1  christos 	 * we don't care.
   10492  1.1  christos 	 */
   10493  1.1  christos 
   10494  1.1  christos 	ctx.glue_list = NULL;
   10495  1.1  christos 	ctx.rbtdb = rbtdb;
   10496  1.1  christos 	ctx.rbtversion = rbtversion;
   10497  1.1  christos 
   10498  1.1  christos 	RWLOCK(&rbtversion->glue_rwlock, isc_rwlocktype_write);
   10499  1.1  christos 
   10500  1.1  christos 	maybe_rehash_gluetable(rbtversion);
   10501  1.1  christos 	idx = hash_32(hash, rbtversion->glue_table_bits);
   10502  1.1  christos 
   10503  1.1  christos 	(void)dns_rdataset_additionaldata(rdataset, glue_nsdname_cb, &ctx);
   10504  1.1  christos 
   10505  1.1  christos 	cur = isc_mem_get(rbtdb->common.mctx, sizeof(*cur));
   10506  1.1  christos 
   10507  1.1  christos 	/*
   10508  1.1  christos 	 * XXXMUKS: it looks like the dns_dbversion is not destroyed
   10509  1.1  christos 	 * when named is terminated by a keyboard break. This doesn't
   10510  1.1  christos 	 * cleanup the node reference and keeps the process dangling.
   10511  1.1  christos 	 */
   10512  1.1  christos 	/* isc_refcount_increment0(&node->references); */
   10513  1.1  christos 	cur->node = node;
   10514  1.1  christos 
   10515  1.1  christos 	if (ctx.glue_list == NULL) {
   10516  1.1  christos 		/*
   10517  1.1  christos 		 * No glue was found. Cache it so.
   10518  1.1  christos 		 */
   10519  1.1  christos 		cur->glue_list = (void *)-1;
   10520  1.1  christos 		if (rbtdb->gluecachestats != NULL) {
   10521  1.1  christos 			isc_stats_increment(
   10522  1.1  christos 				rbtdb->gluecachestats,
   10523  1.1  christos 				dns_gluecachestatscounter_inserts_absent);
   10524  1.1  christos 		}
   10525  1.1  christos 	} else {
   10526  1.1  christos 		cur->glue_list = ctx.glue_list;
   10527  1.1  christos 		if (rbtdb->gluecachestats != NULL) {
   10528  1.1  christos 			isc_stats_increment(
   10529  1.1  christos 				rbtdb->gluecachestats,
   10530  1.1  christos 				dns_gluecachestatscounter_inserts_present);
   10531  1.1  christos 		}
   10532  1.1  christos 	}
   10533  1.1  christos 
   10534  1.1  christos 	cur->next = rbtversion->glue_table[idx];
   10535  1.1  christos 	rbtversion->glue_table[idx] = cur;
   10536  1.1  christos 	rbtversion->glue_table_nodecount++;
   10537  1.1  christos 
   10538  1.1  christos 	RWUNLOCK(&rbtversion->glue_rwlock, isc_rwlocktype_write);
   10539  1.1  christos 
   10540  1.1  christos 	restarted = true;
   10541  1.1  christos 	goto restart;
   10542  1.1  christos 
   10543  1.1  christos 	/* UNREACHABLE */
   10544  1.1  christos }
   10545  1.1  christos 
   10546  1.1  christos /*%
   10547  1.1  christos  * Routines for LRU-based cache management.
   10548  1.1  christos  */
   10549  1.1  christos 
   10550  1.1  christos /*%
   10551  1.1  christos  * See if a given cache entry that is being reused needs to be updated
   10552  1.1  christos  * in the LRU-list.  From the LRU management point of view, this function is
   10553  1.1  christos  * expected to return true for almost all cases.  When used with threads,
   10554  1.1  christos  * however, this may cause a non-negligible performance penalty because a
   10555  1.1  christos  * writer lock will have to be acquired before updating the list.
   10556  1.1  christos  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
   10557  1.1  christos  * function returns true if the entry has not been updated for some period of
   10558  1.1  christos  * time.  We differentiate the NS or glue address case and the others since
   10559  1.1  christos  * experiments have shown that the former tends to be accessed relatively
   10560  1.1  christos  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
   10561  1.1  christos  * may cause external queries at a higher level zone, involving more
   10562  1.1  christos  * transactions).
   10563  1.1  christos  *
   10564  1.1  christos  * Caller must hold the node (read or write) lock.
   10565  1.1  christos  */
   10566  1.1  christos static bool
   10567  1.1  christos need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
   10568  1.1  christos 	if (RDATASET_ATTR_GET(header, (RDATASET_ATTR_NONEXISTENT |
   10569  1.1  christos 				       RDATASET_ATTR_ANCIENT |
   10570  1.1  christos 				       RDATASET_ATTR_ZEROTTL)) != 0)
   10571  1.1  christos 	{
   10572  1.1  christos 		return (false);
   10573  1.1  christos 	}
   10574  1.1  christos 
   10575  1.1  christos #if DNS_RBTDB_LIMITLRUUPDATE
   10576  1.1  christos 	if (header->type == dns_rdatatype_ns ||
   10577  1.1  christos 	    (header->trust == dns_trust_glue &&
   10578  1.1  christos 	     (header->type == dns_rdatatype_a ||
   10579  1.1  christos 	      header->type == dns_rdatatype_aaaa)))
   10580  1.1  christos 	{
   10581  1.1  christos 		/*
   10582  1.1  christos 		 * Glue records are updated if at least DNS_RBTDB_LRUUPDATE_GLUE
   10583  1.1  christos 		 * seconds have passed since the previous update time.
   10584  1.1  christos 		 */
   10585  1.1  christos 		return (header->last_used + DNS_RBTDB_LRUUPDATE_GLUE <= now);
   10586  1.1  christos 	}
   10587  1.1  christos 
   10588  1.1  christos 	/*
   10589  1.1  christos 	 * Other records are updated if DNS_RBTDB_LRUUPDATE_REGULAR seconds
   10590  1.1  christos 	 * have passed.
   10591  1.1  christos 	 */
   10592  1.1  christos 	return (header->last_used + DNS_RBTDB_LRUUPDATE_REGULAR <= now);
   10593  1.1  christos #else
   10594  1.1  christos 	UNUSED(now);
   10595  1.1  christos 
   10596  1.1  christos 	return (true);
   10597  1.1  christos #endif /* if DNS_RBTDB_LIMITLRUUPDATE */
   10598  1.1  christos }
   10599  1.1  christos 
   10600  1.1  christos /*%
   10601  1.1  christos  * Update the timestamp of a given cache entry and move it to the head
   10602  1.1  christos  * of the corresponding LRU list.
   10603  1.1  christos  *
   10604  1.1  christos  * Caller must hold the node (write) lock.
   10605  1.1  christos  *
   10606  1.1  christos  * Note that the we do NOT touch the heap here, as the TTL has not changed.
   10607  1.1  christos  */
   10608  1.1  christos static void
   10609  1.1  christos update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, isc_stdtime_t now) {
   10610  1.1  christos 	INSIST(IS_CACHE(rbtdb));
   10611  1.1  christos 
   10612  1.1  christos 	/* To be checked: can we really assume this? XXXMLG */
   10613  1.1  christos 	INSIST(ISC_LINK_LINKED(header, link));
   10614  1.1  christos 
   10615  1.1  christos 	ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
   10616  1.1  christos 	header->last_used = now;
   10617  1.1  christos 	ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
   10618  1.1  christos }
   10619  1.1  christos 
   10620  1.1  christos static size_t
   10621  1.1  christos expire_lru_headers(dns_rbtdb_t *rbtdb, unsigned int locknum, size_t purgesize,
   10622  1.1  christos 		   bool tree_locked) {
   10623  1.1  christos 	rdatasetheader_t *header, *header_prev;
   10624  1.1  christos 	size_t purged = 0;
   10625  1.1  christos 
   10626  1.1  christos 	for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
   10627  1.1  christos 	     header != NULL && purged <= purgesize; header = header_prev)
   10628  1.1  christos 	{
   10629  1.1  christos 		header_prev = ISC_LIST_PREV(header, link);
   10630  1.1  christos 		/*
   10631  1.1  christos 		 * Unlink the entry at this point to avoid checking it
   10632  1.1  christos 		 * again even if it's currently used someone else and
   10633  1.1  christos 		 * cannot be purged at this moment.  This entry won't be
   10634  1.1  christos 		 * referenced any more (so unlinking is safe) since the
   10635  1.1  christos 		 * TTL was reset to 0.
   10636  1.1  christos 		 */
   10637  1.1  christos 		ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header, link);
   10638  1.1  christos 		size_t header_size = rdataset_size(header);
   10639  1.1  christos 		expire_header(rbtdb, header, tree_locked, expire_lru);
   10640  1.1  christos 		purged += header_size;
   10641  1.1  christos 	}
   10642  1.1  christos 
   10643  1.1  christos 	return (purged);
   10644  1.1  christos }
   10645  1.1  christos 
   10646  1.1  christos /*%
   10647  1.1  christos  * Purge some stale (i.e. unused for some period - LRU based cleaning) cache
   10648  1.1  christos  * entries under the overmem condition.  To recover from this condition quickly,
   10649  1.1  christos  * we cleanup entries up to the size of newly added rdata (passed as purgesize).
   10650  1.1  christos  *
   10651  1.1  christos  * This process is triggered while adding a new entry, and we specifically avoid
   10652  1.1  christos  * purging entries in the same LRU bucket as the one to which the new entry will
   10653  1.1  christos  * belong.  Otherwise, we might purge entries of the same name of different RR
   10654  1.1  christos  * types while adding RRsets from a single response (consider the case where
   10655  1.1  christos  * we're adding A and AAAA glue records of the same NS name).
   10656  1.1  christos  */
   10657  1.1  christos static void
   10658  1.1  christos overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, size_t purgesize,
   10659  1.1  christos 	      bool tree_locked) {
   10660  1.1  christos 	unsigned int locknum;
   10661  1.1  christos 	size_t purged = 0;
   10662  1.1  christos 
   10663  1.1  christos 	for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
   10664  1.1  christos 	     locknum != locknum_start && purged <= purgesize;
   10665  1.1  christos 	     locknum = (locknum + 1) % rbtdb->node_lock_count)
   10666  1.1  christos 	{
   10667  1.1  christos 		NODE_LOCK(&rbtdb->node_locks[locknum].lock,
   10668  1.1  christos 			  isc_rwlocktype_write);
   10669  1.1  christos 
   10670  1.1  christos 		purged += expire_lru_headers(rbtdb, locknum, purgesize - purged,
   10671  1.1  christos 					     tree_locked);
   10672  1.1  christos 
   10673  1.1  christos 		NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
   10674  1.1  christos 			    isc_rwlocktype_write);
   10675  1.1  christos 	}
   10676  1.1  christos }
   10677  1.1  christos 
   10678  1.1  christos static void
   10679  1.1  christos expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, bool tree_locked,
   10680  1.1  christos 	      expire_t reason) {
   10681  1.1  christos 	set_ttl(rbtdb, header, 0);
   10682  1.1  christos 	mark_header_ancient(rbtdb, header);
   10683  1.1  christos 
   10684  1.1  christos 	/*
   10685  1.1  christos 	 * Caller must hold the node (write) lock.
   10686  1.1  christos 	 */
   10687  1.1  christos 
   10688  1.1  christos 	if (isc_refcount_current(&header->node->references) == 0) {
   10689  1.1  christos 		/*
   10690  1.1  christos 		 * If no one else is using the node, we can clean it up now.
   10691  1.1  christos 		 * We first need to gain a new reference to the node to meet a
   10692  1.1  christos 		 * requirement of decrement_reference().
   10693  1.1  christos 		 */
   10694  1.1  christos 		new_reference(rbtdb, header->node, isc_rwlocktype_write);
   10695  1.1  christos 		decrement_reference(rbtdb, header->node, 0,
   10696  1.1  christos 				    isc_rwlocktype_write,
   10697  1.1  christos 				    tree_locked ? isc_rwlocktype_write
   10698  1.1  christos 						: isc_rwlocktype_none,
   10699  1.1  christos 				    false);
   10700  1.1  christos 
   10701  1.1  christos 		if (rbtdb->cachestats == NULL) {
   10702  1.1  christos 			return;
   10703  1.1  christos 		}
   10704  1.1  christos 
   10705  1.1  christos 		switch (reason) {
   10706  1.1  christos 		case expire_ttl:
   10707  1.1  christos 			isc_stats_increment(rbtdb->cachestats,
   10708  1.1  christos 					    dns_cachestatscounter_deletettl);
   10709  1.1  christos 			break;
   10710  1.1  christos 		case expire_lru:
   10711  1.1  christos 			isc_stats_increment(rbtdb->cachestats,
   10712  1.1  christos 					    dns_cachestatscounter_deletelru);
   10713  1.1  christos 			break;
   10714  1.1  christos 		default:
   10715  1.1  christos 			break;
   10716  1.1  christos 		}
   10717  1.1  christos 	}
   10718  1.1  christos }
   10719