Home | History | Annotate | Line # | Download | only in dns
      1  1.4  christos /*	$NetBSD: qpcache.c,v 1.5 2026/01/29 18:37:49 christos Exp $	*/
      2  1.1  christos 
      3  1.1  christos /*
      4  1.1  christos  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
      5  1.1  christos  *
      6  1.1  christos  * SPDX-License-Identifier: MPL-2.0
      7  1.1  christos  *
      8  1.1  christos  * This Source Code Form is subject to the terms of the Mozilla Public
      9  1.1  christos  * License, v. 2.0. If a copy of the MPL was not distributed with this
     10  1.1  christos  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
     11  1.1  christos  *
     12  1.1  christos  * See the COPYRIGHT file distributed with this work for additional
     13  1.1  christos  * information regarding copyright ownership.
     14  1.1  christos  */
     15  1.1  christos 
     16  1.1  christos /*! \file */
     17  1.1  christos 
     18  1.1  christos #include <inttypes.h>
     19  1.3  christos #include <stdalign.h>
     20  1.1  christos #include <stdbool.h>
     21  1.1  christos #include <sys/mman.h>
     22  1.1  christos 
     23  1.1  christos #include <isc/ascii.h>
     24  1.1  christos #include <isc/async.h>
     25  1.1  christos #include <isc/atomic.h>
     26  1.1  christos #include <isc/crc64.h>
     27  1.1  christos #include <isc/file.h>
     28  1.1  christos #include <isc/heap.h>
     29  1.1  christos #include <isc/hex.h>
     30  1.1  christos #include <isc/loop.h>
     31  1.1  christos #include <isc/mem.h>
     32  1.1  christos #include <isc/mutex.h>
     33  1.3  christos #include <isc/os.h>
     34  1.1  christos #include <isc/queue.h>
     35  1.1  christos #include <isc/random.h>
     36  1.1  christos #include <isc/refcount.h>
     37  1.1  christos #include <isc/result.h>
     38  1.1  christos #include <isc/rwlock.h>
     39  1.1  christos #include <isc/stdio.h>
     40  1.1  christos #include <isc/string.h>
     41  1.1  christos #include <isc/time.h>
     42  1.1  christos #include <isc/urcu.h>
     43  1.1  christos #include <isc/util.h>
     44  1.1  christos 
     45  1.1  christos #include <dns/callbacks.h>
     46  1.1  christos #include <dns/db.h>
     47  1.1  christos #include <dns/dbiterator.h>
     48  1.1  christos #include <dns/fixedname.h>
     49  1.1  christos #include <dns/log.h>
     50  1.1  christos #include <dns/masterdump.h>
     51  1.1  christos #include <dns/nsec.h>
     52  1.1  christos #include <dns/qp.h>
     53  1.1  christos #include <dns/rdata.h>
     54  1.1  christos #include <dns/rdataset.h>
     55  1.1  christos #include <dns/rdatasetiter.h>
     56  1.1  christos #include <dns/rdataslab.h>
     57  1.1  christos #include <dns/rdatastruct.h>
     58  1.1  christos #include <dns/stats.h>
     59  1.1  christos #include <dns/time.h>
     60  1.1  christos #include <dns/view.h>
     61  1.1  christos #include <dns/zonekey.h>
     62  1.1  christos 
     63  1.1  christos #include "db_p.h"
     64  1.1  christos #include "qpcache_p.h"
     65  1.1  christos 
     66  1.1  christos #define EXISTS(header)                                 \
     67  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
     68  1.1  christos 	  DNS_SLABHEADERATTR_NONEXISTENT) == 0)
     69  1.1  christos #define NONEXISTENT(header)                            \
     70  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
     71  1.1  christos 	  DNS_SLABHEADERATTR_NONEXISTENT) != 0)
     72  1.1  christos #define IGNORE(header)                                 \
     73  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
     74  1.1  christos 	  DNS_SLABHEADERATTR_IGNORE) != 0)
     75  1.1  christos #define NXDOMAIN(header)                               \
     76  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
     77  1.1  christos 	  DNS_SLABHEADERATTR_NXDOMAIN) != 0)
     78  1.1  christos #define STALE(header)                                  \
     79  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
     80  1.1  christos 	  DNS_SLABHEADERATTR_STALE) != 0)
     81  1.1  christos #define STALE_WINDOW(header)                           \
     82  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
     83  1.1  christos 	  DNS_SLABHEADERATTR_STALE_WINDOW) != 0)
     84  1.1  christos #define OPTOUT(header)                                 \
     85  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
     86  1.1  christos 	  DNS_SLABHEADERATTR_OPTOUT) != 0)
     87  1.1  christos #define NEGATIVE(header)                               \
     88  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
     89  1.1  christos 	  DNS_SLABHEADERATTR_NEGATIVE) != 0)
     90  1.1  christos #define PREFETCH(header)                               \
     91  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
     92  1.1  christos 	  DNS_SLABHEADERATTR_PREFETCH) != 0)
     93  1.1  christos #define ZEROTTL(header)                                \
     94  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
     95  1.1  christos 	  DNS_SLABHEADERATTR_ZEROTTL) != 0)
     96  1.1  christos #define ANCIENT(header)                                \
     97  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
     98  1.1  christos 	  DNS_SLABHEADERATTR_ANCIENT) != 0)
     99  1.1  christos #define STATCOUNT(header)                              \
    100  1.1  christos 	((atomic_load_acquire(&(header)->attributes) & \
    101  1.1  christos 	  DNS_SLABHEADERATTR_STATCOUNT) != 0)
    102  1.1  christos 
    103  1.1  christos #define STALE_TTL(header, qpdb) \
    104  1.1  christos 	(NXDOMAIN(header) ? 0 : qpdb->common.serve_stale_ttl)
    105  1.1  christos 
    106  1.1  christos #define ACTIVE(header, now) \
    107  1.1  christos 	(((header)->ttl > (now)) || ((header)->ttl == (now) && ZEROTTL(header)))
    108  1.1  christos 
    109  1.1  christos #define EXPIREDOK(iterator) \
    110  1.1  christos 	(((iterator)->common.options & DNS_DB_EXPIREDOK) != 0)
    111  1.1  christos 
    112  1.1  christos #define STALEOK(iterator) (((iterator)->common.options & DNS_DB_STALEOK) != 0)
    113  1.1  christos 
    114  1.1  christos #define KEEPSTALE(qpdb) ((qpdb)->common.serve_stale_ttl > 0)
    115  1.1  christos 
    116  1.1  christos /*%
    117  1.1  christos  * Note that "impmagic" is not the first four bytes of the struct, so
    118  1.1  christos  * ISC_MAGIC_VALID cannot be used.
    119  1.1  christos  */
    120  1.1  christos #define QPDB_MAGIC ISC_MAGIC('Q', 'P', 'D', '4')
    121  1.1  christos #define VALID_QPDB(qpdb) \
    122  1.1  christos 	((qpdb) != NULL && (qpdb)->common.impmagic == QPDB_MAGIC)
    123  1.1  christos 
    124  1.1  christos #define HEADERNODE(h) ((qpcnode_t *)((h)->node))
    125  1.1  christos 
    126  1.1  christos /*
    127  1.1  christos  * Allow clients with a virtual time of up to 5 minutes in the past to see
    128  1.1  christos  * records that would have otherwise have expired.
    129  1.1  christos  */
    130  1.1  christos #define QPDB_VIRTUAL 300
    131  1.1  christos 
    132  1.1  christos /*%
    133  1.1  christos  * Whether to rate-limit updating the LRU to avoid possible thread contention.
    134  1.1  christos  * Updating LRU requires write locking, so we don't do it every time the
    135  1.1  christos  * record is touched - only after some time passes.
    136  1.1  christos  */
    137  1.1  christos #ifndef DNS_QPDB_LIMITLRUUPDATE
    138  1.1  christos #define DNS_QPDB_LIMITLRUUPDATE 1
    139  1.1  christos #endif
    140  1.1  christos 
    141  1.1  christos /*% Time after which we update LRU for glue records, 5 minutes */
    142  1.1  christos #define DNS_QPDB_LRUUPDATE_GLUE 300
    143  1.1  christos /*% Time after which we update LRU for all other records, 10 minutes */
    144  1.1  christos #define DNS_QPDB_LRUUPDATE_REGULAR 600
    145  1.1  christos 
    146  1.1  christos /*
    147  1.1  christos  * This defines the number of headers that we try to expire each time the
    148  1.1  christos  * expire_ttl_headers() is run.  The number should be small enough, so the
    149  1.1  christos  * TTL-based header expiration doesn't take too long, but it should be large
    150  1.1  christos  * enough, so we expire enough headers if their TTL is clustered.
    151  1.1  christos  */
    152  1.1  christos #define DNS_QPDB_EXPIRE_TTL_COUNT 10
    153  1.1  christos 
    154  1.1  christos /*%
    155  1.1  christos  * This is the structure that is used for each node in the qp trie of trees.
    156  1.1  christos  */
    157  1.1  christos typedef struct qpcnode qpcnode_t;
    158  1.1  christos struct qpcnode {
    159  1.1  christos 	dns_name_t name;
    160  1.1  christos 	isc_mem_t *mctx;
    161  1.1  christos 
    162  1.1  christos 	uint8_t			: 0;
    163  1.1  christos 	unsigned int delegating : 1;
    164  1.1  christos 	unsigned int nsec	: 2; /*%< range is 0..3 */
    165  1.1  christos 	uint8_t			: 0;
    166  1.1  christos 
    167  1.3  christos 	uint16_t locknum;
    168  1.3  christos 
    169  1.3  christos 	/*
    170  1.3  christos 	 * 'erefs' counts external references held by a caller: for
    171  1.3  christos 	 * example, it could be incremented by dns_db_findnode(),
    172  1.3  christos 	 * and decremented by dns_db_detachnode().
    173  1.3  christos 	 *
    174  1.3  christos 	 * 'references' counts internal references to the node object,
    175  1.3  christos 	 * including the one held by the QP trie so the node won't be
    176  1.3  christos 	 * deleted while it's quiescently stored in the database - even
    177  1.3  christos 	 * though 'erefs' may be zero because no external caller is
    178  1.3  christos 	 * using it at the time.
    179  1.3  christos 	 *
    180  1.3  christos 	 * Generally when 'erefs' is incremented or decremented,
    181  1.3  christos 	 * 'references' is too. When both go to zero (meaning callers
    182  1.3  christos 	 * and the database have both released the object) the object
    183  1.3  christos 	 * is freed.
    184  1.3  christos 	 *
    185  1.3  christos 	 * Whenever 'erefs' is incremented from zero, we also aquire a
    186  1.3  christos 	 * node use reference (see 'qpcache->references' below), and
    187  1.3  christos 	 * release it when 'erefs' goes back to zero. This prevents the
    188  1.3  christos 	 * database from being shut down until every caller has released
    189  1.3  christos 	 * all nodes.
    190  1.3  christos 	 */
    191  1.1  christos 	isc_refcount_t references;
    192  1.1  christos 	isc_refcount_t erefs;
    193  1.1  christos 	void *data;
    194  1.1  christos 
    195  1.1  christos 	/*%
    196  1.1  christos 	 * NOTE: The 'dirty' flag is protected by the node lock, so
    197  1.1  christos 	 * this bitfield has to be separated from the one above.
    198  1.1  christos 	 * We don't want it to share the same qword with bits
    199  1.1  christos 	 * that can be accessed without the node lock.
    200  1.1  christos 	 */
    201  1.1  christos 	uint8_t	      : 0;
    202  1.1  christos 	uint8_t dirty : 1;
    203  1.1  christos 	uint8_t	      : 0;
    204  1.1  christos 
    205  1.1  christos 	/*%
    206  1.1  christos 	 * Used for dead nodes cleaning.  This linked list is used to mark nodes
    207  1.1  christos 	 * which have no data any longer, but we cannot unlink at that exact
    208  1.1  christos 	 * moment because we did not or could not obtain a write lock on the
    209  1.1  christos 	 * tree.
    210  1.1  christos 	 */
    211  1.1  christos 	isc_queue_node_t deadlink;
    212  1.1  christos };
    213  1.1  christos 
    214  1.3  christos /*%
    215  1.3  christos  * One bucket structure will be created for each loop, and
    216  1.3  christos  * nodes in the database will evenly distributed among buckets
    217  1.3  christos  * to reduce contention between threads.
    218  1.3  christos  */
    219  1.3  christos typedef struct qpcache_bucket {
    220  1.3  christos 	/*%
    221  1.3  christos 	 * Temporary storage for stale cache nodes and dynamically
    222  1.3  christos 	 * deleted nodes that await being cleaned up.
    223  1.3  christos 	 */
    224  1.3  christos 	isc_queue_t deadnodes;
    225  1.3  christos 
    226  1.3  christos 	/* Per-bucket lock. */
    227  1.3  christos 	isc_rwlock_t lock;
    228  1.3  christos 
    229  1.3  christos 	/*
    230  1.3  christos 	 * Linked list used to implement LRU cache cleaning.
    231  1.3  christos 	 */
    232  1.3  christos 	dns_slabheaderlist_t lru;
    233  1.3  christos 
    234  1.3  christos 	/*
    235  1.3  christos 	 * The heap is used for TTL based expiry.  Note that qpcache->hmctx
    236  1.3  christos 	 * is the memory context to use for heap memory; this differs from
    237  1.3  christos 	 * the main database memory context, which is qpcache->common.mctx.
    238  1.3  christos 	 */
    239  1.3  christos 	isc_heap_t *heap;
    240  1.3  christos 
    241  1.3  christos 	/* Padding to prevent false sharing between locks. */
    242  1.3  christos 	uint8_t __padding[ISC_OS_CACHELINE_SIZE -
    243  1.3  christos 			  (sizeof(isc_queue_t) + sizeof(isc_rwlock_t) +
    244  1.3  christos 			   sizeof(dns_slabheaderlist_t) + sizeof(isc_heap_t *)) %
    245  1.3  christos 				  ISC_OS_CACHELINE_SIZE];
    246  1.3  christos 
    247  1.3  christos } qpcache_bucket_t;
    248  1.3  christos 
    249  1.1  christos typedef struct qpcache qpcache_t;
    250  1.1  christos struct qpcache {
    251  1.1  christos 	/* Unlocked. */
    252  1.1  christos 	dns_db_t common;
    253  1.1  christos 	/* Loopmgr */
    254  1.1  christos 	isc_loopmgr_t *loopmgr;
    255  1.1  christos 	/* Locks the data in this struct */
    256  1.1  christos 	isc_rwlock_t lock;
    257  1.1  christos 	/* Locks the tree structure (prevents nodes appearing/disappearing) */
    258  1.1  christos 	isc_rwlock_t tree_lock;
    259  1.3  christos 
    260  1.3  christos 	/*
    261  1.3  christos 	 * NOTE: 'references' is NOT the global reference counter for
    262  1.3  christos 	 * the database object handled by dns_db_attach() and _detach();
    263  1.3  christos 	 * that one is 'common.references'.
    264  1.3  christos 	 *
    265  1.3  christos 	 * Instead, 'references' counts the number of nodes being used by
    266  1.3  christos 	 * at least one external caller. (It's called 'references' to
    267  1.3  christos 	 * leverage the ISC_REFCOUNT_STATIC macros, but 'nodes_in_use'
    268  1.3  christos 	 * might be a clearer name.)
    269  1.3  christos 	 *
    270  1.3  christos 	 * One additional reference to this counter is held by the database
    271  1.3  christos 	 * object itself. When 'common.references' goes to zero, that
    272  1.3  christos 	 * reference is released. When in turn 'references' goes to zero,
    273  1.3  christos 	 * the database is shut down and freed.
    274  1.3  christos 	 */
    275  1.3  christos 	isc_refcount_t references;
    276  1.3  christos 
    277  1.3  christos 	dns_stats_t *rrsetstats;
    278  1.3  christos 	isc_stats_t *cachestats;
    279  1.1  christos 
    280  1.1  christos 	uint32_t maxrrperset;	 /* Maximum RRs per RRset */
    281  1.1  christos 	uint32_t maxtypepername; /* Maximum number of RR types per owner */
    282  1.1  christos 
    283  1.1  christos 	/*
    284  1.1  christos 	 * The time after a failed lookup, where stale answers from cache
    285  1.1  christos 	 * may be used directly in a DNS response without attempting a
    286  1.1  christos 	 * new iterative lookup.
    287  1.1  christos 	 */
    288  1.1  christos 	uint32_t serve_stale_refresh;
    289  1.1  christos 
    290  1.1  christos 	/*
    291  1.1  christos 	 * Start point % node_lock_count for next LRU cleanup.
    292  1.1  christos 	 */
    293  1.1  christos 	atomic_uint lru_sweep;
    294  1.1  christos 
    295  1.1  christos 	/*
    296  1.1  christos 	 * When performing LRU cleaning limit cleaning to headers that were
    297  1.1  christos 	 * last used at or before this.
    298  1.1  christos 	 */
    299  1.1  christos 	_Atomic(isc_stdtime_t) last_used;
    300  1.1  christos 
    301  1.1  christos 	/* Locked by tree_lock. */
    302  1.1  christos 	dns_qp_t *tree;
    303  1.1  christos 	dns_qp_t *nsec;
    304  1.3  christos 
    305  1.3  christos 	isc_mem_t *hmctx; /* Memory context for the heaps */
    306  1.3  christos 
    307  1.3  christos 	size_t buckets_count;
    308  1.3  christos 	qpcache_bucket_t buckets[]; /* attribute((counted_by(buckets_count))) */
    309  1.1  christos };
    310  1.1  christos 
    311  1.3  christos #ifdef DNS_DB_NODETRACE
    312  1.3  christos #define qpcache_ref(ptr)   qpcache__ref(ptr, __func__, __FILE__, __LINE__)
    313  1.3  christos #define qpcache_unref(ptr) qpcache__unref(ptr, __func__, __FILE__, __LINE__)
    314  1.3  christos #define qpcache_attach(ptr, ptrp) \
    315  1.3  christos 	qpcache__attach(ptr, ptrp, __func__, __FILE__, __LINE__)
    316  1.3  christos #define qpcache_detach(ptrp) qpcache__detach(ptrp, __func__, __FILE__, __LINE__)
    317  1.3  christos ISC_REFCOUNT_STATIC_TRACE_DECL(qpcache);
    318  1.3  christos #else
    319  1.3  christos ISC_REFCOUNT_STATIC_DECL(qpcache);
    320  1.3  christos #endif
    321  1.3  christos 
    322  1.1  christos /*%
    323  1.1  christos  * Search Context
    324  1.1  christos  */
    325  1.1  christos typedef struct {
    326  1.1  christos 	qpcache_t *qpdb;
    327  1.1  christos 	unsigned int options;
    328  1.1  christos 	dns_qpchain_t chain;
    329  1.1  christos 	dns_qpiter_t iter;
    330  1.1  christos 	bool need_cleanup;
    331  1.1  christos 	qpcnode_t *zonecut;
    332  1.1  christos 	dns_slabheader_t *zonecut_header;
    333  1.1  christos 	dns_slabheader_t *zonecut_sigheader;
    334  1.1  christos 	isc_stdtime_t now;
    335  1.1  christos } qpc_search_t;
    336  1.1  christos 
    337  1.1  christos #ifdef DNS_DB_NODETRACE
    338  1.1  christos #define qpcnode_ref(ptr)   qpcnode__ref(ptr, __func__, __FILE__, __LINE__)
    339  1.1  christos #define qpcnode_unref(ptr) qpcnode__unref(ptr, __func__, __FILE__, __LINE__)
    340  1.1  christos #define qpcnode_attach(ptr, ptrp) \
    341  1.1  christos 	qpcnode__attach(ptr, ptrp, __func__, __FILE__, __LINE__)
    342  1.1  christos #define qpcnode_detach(ptrp) qpcnode__detach(ptrp, __func__, __FILE__, __LINE__)
    343  1.1  christos ISC_REFCOUNT_STATIC_TRACE_DECL(qpcnode);
    344  1.1  christos #else
    345  1.1  christos ISC_REFCOUNT_STATIC_DECL(qpcnode);
    346  1.1  christos #endif
    347  1.1  christos 
    348  1.1  christos /* QP methods */
    349  1.1  christos static void
    350  1.1  christos qp_attach(void *uctx, void *pval, uint32_t ival);
    351  1.1  christos static void
    352  1.1  christos qp_detach(void *uctx, void *pval, uint32_t ival);
    353  1.1  christos static size_t
    354  1.1  christos qp_makekey(dns_qpkey_t key, void *uctx, void *pval, uint32_t ival);
    355  1.1  christos static void
    356  1.1  christos qp_triename(void *uctx, char *buf, size_t size);
    357  1.1  christos 
    358  1.1  christos static dns_qpmethods_t qpmethods = {
    359  1.1  christos 	qp_attach,
    360  1.1  christos 	qp_detach,
    361  1.1  christos 	qp_makekey,
    362  1.1  christos 	qp_triename,
    363  1.1  christos };
    364  1.1  christos 
    365  1.1  christos static void
    366  1.1  christos qp_attach(void *uctx ISC_ATTR_UNUSED, void *pval,
    367  1.1  christos 	  uint32_t ival ISC_ATTR_UNUSED) {
    368  1.1  christos 	qpcnode_t *data = pval;
    369  1.1  christos 	qpcnode_ref(data);
    370  1.1  christos }
    371  1.1  christos 
    372  1.1  christos static void
    373  1.1  christos qp_detach(void *uctx ISC_ATTR_UNUSED, void *pval,
    374  1.1  christos 	  uint32_t ival ISC_ATTR_UNUSED) {
    375  1.1  christos 	qpcnode_t *data = pval;
    376  1.1  christos 	qpcnode_detach(&data);
    377  1.1  christos }
    378  1.1  christos 
    379  1.1  christos static size_t
    380  1.1  christos qp_makekey(dns_qpkey_t key, void *uctx ISC_ATTR_UNUSED, void *pval,
    381  1.1  christos 	   uint32_t ival ISC_ATTR_UNUSED) {
    382  1.1  christos 	qpcnode_t *data = pval;
    383  1.1  christos 	return dns_qpkey_fromname(key, &data->name);
    384  1.1  christos }
    385  1.1  christos 
    386  1.1  christos static void
    387  1.1  christos qp_triename(void *uctx, char *buf, size_t size) {
    388  1.1  christos 	UNUSED(uctx);
    389  1.1  christos 	snprintf(buf, size, "qpdb-lite");
    390  1.1  christos }
    391  1.1  christos 
    392  1.1  christos static void
    393  1.1  christos rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp DNS__DB_FLARG);
    394  1.1  christos static isc_result_t
    395  1.1  christos rdatasetiter_first(dns_rdatasetiter_t *iterator DNS__DB_FLARG);
    396  1.1  christos static isc_result_t
    397  1.1  christos rdatasetiter_next(dns_rdatasetiter_t *iterator DNS__DB_FLARG);
    398  1.1  christos static void
    399  1.1  christos rdatasetiter_current(dns_rdatasetiter_t *iterator,
    400  1.1  christos 		     dns_rdataset_t *rdataset DNS__DB_FLARG);
    401  1.1  christos 
    402  1.1  christos static dns_rdatasetitermethods_t rdatasetiter_methods = {
    403  1.1  christos 	rdatasetiter_destroy, rdatasetiter_first, rdatasetiter_next,
    404  1.1  christos 	rdatasetiter_current
    405  1.1  christos };
    406  1.1  christos 
    407  1.1  christos typedef struct qpc_rditer {
    408  1.1  christos 	dns_rdatasetiter_t common;
    409  1.1  christos 	dns_slabheader_t *current;
    410  1.1  christos } qpc_rditer_t;
    411  1.1  christos 
    412  1.1  christos static void
    413  1.1  christos dbiterator_destroy(dns_dbiterator_t **iteratorp DNS__DB_FLARG);
    414  1.1  christos static isc_result_t
    415  1.1  christos dbiterator_first(dns_dbiterator_t *iterator DNS__DB_FLARG);
    416  1.1  christos static isc_result_t
    417  1.1  christos dbiterator_last(dns_dbiterator_t *iterator DNS__DB_FLARG);
    418  1.1  christos static isc_result_t
    419  1.1  christos dbiterator_seek(dns_dbiterator_t *iterator,
    420  1.1  christos 		const dns_name_t *name DNS__DB_FLARG);
    421  1.1  christos static isc_result_t
    422  1.5  christos dbiterator_seek3(dns_dbiterator_t *iterator,
    423  1.5  christos 		 const dns_name_t *name DNS__DB_FLARG);
    424  1.5  christos static isc_result_t
    425  1.1  christos dbiterator_prev(dns_dbiterator_t *iterator DNS__DB_FLARG);
    426  1.1  christos static isc_result_t
    427  1.1  christos dbiterator_next(dns_dbiterator_t *iterator DNS__DB_FLARG);
    428  1.1  christos static isc_result_t
    429  1.1  christos dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
    430  1.1  christos 		   dns_name_t *name DNS__DB_FLARG);
    431  1.1  christos static isc_result_t
    432  1.1  christos dbiterator_pause(dns_dbiterator_t *iterator);
    433  1.1  christos static isc_result_t
    434  1.1  christos dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name);
    435  1.1  christos 
    436  1.1  christos static dns_dbiteratormethods_t dbiterator_methods = {
    437  1.5  christos 	dbiterator_destroy, dbiterator_first,	dbiterator_last,
    438  1.5  christos 	dbiterator_seek,    dbiterator_seek3,	dbiterator_prev,
    439  1.5  christos 	dbiterator_next,    dbiterator_current, dbiterator_pause,
    440  1.5  christos 	dbiterator_origin
    441  1.1  christos };
    442  1.1  christos 
    443  1.1  christos /*
    444  1.1  christos  * Note that the QP cache database only needs a single QP iterator, because
    445  1.1  christos  * unlike the QP zone database, NSEC3 records are cached in the main tree.
    446  1.1  christos  *
    447  1.1  christos  * If we ever implement synth-from-dnssec using NSEC3 records, we'll need
    448  1.1  christos  * to have a separate tree for NSEC3 records, and to copy in the more complex
    449  1.1  christos  * iterator implementation from qpzone.c.
    450  1.1  christos  */
    451  1.1  christos typedef struct qpc_dbit {
    452  1.1  christos 	dns_dbiterator_t common;
    453  1.1  christos 	bool paused;
    454  1.1  christos 	isc_rwlocktype_t tree_locked;
    455  1.1  christos 	isc_result_t result;
    456  1.1  christos 	dns_fixedname_t fixed;
    457  1.1  christos 	dns_name_t *name;
    458  1.1  christos 	dns_qpiter_t iter;
    459  1.1  christos 	qpcnode_t *node;
    460  1.1  christos } qpc_dbit_t;
    461  1.1  christos 
    462  1.1  christos static void
    463  1.3  christos qpcache__destroy(qpcache_t *qpdb);
    464  1.1  christos 
    465  1.1  christos static dns_dbmethods_t qpdb_cachemethods;
    466  1.1  christos 
    467  1.1  christos /*%
    468  1.1  christos  * 'init_count' is used to initialize 'newheader->count' which in turn
    469  1.1  christos  * is used to determine where in the cycle rrset-order cyclic starts.
    470  1.1  christos  * We don't lock this as we don't care about simultaneous updates.
    471  1.1  christos  */
    472  1.1  christos static atomic_uint_fast16_t init_count = 0;
    473  1.1  christos 
    474  1.1  christos /*
    475  1.1  christos  * Locking
    476  1.1  christos  *
    477  1.1  christos  * If a routine is going to lock more than one lock in this module, then
    478  1.1  christos  * the locking must be done in the following order:
    479  1.1  christos  *
    480  1.1  christos  *      Tree Lock
    481  1.1  christos  *
    482  1.1  christos  *      Node Lock       (Only one from the set may be locked at one time by
    483  1.1  christos  *                       any caller)
    484  1.1  christos  *
    485  1.1  christos  *      Database Lock
    486  1.1  christos  *
    487  1.1  christos  * Failure to follow this hierarchy can result in deadlock.
    488  1.1  christos  */
    489  1.1  christos 
    490  1.1  christos /*%
    491  1.1  christos  * Routines for LRU-based cache management.
    492  1.1  christos  */
    493  1.1  christos 
    494  1.1  christos /*%
    495  1.1  christos  * See if a given cache entry that is being reused needs to be updated
    496  1.1  christos  * in the LRU-list.  From the LRU management point of view, this function is
    497  1.1  christos  * expected to return true for almost all cases.  When used with threads,
    498  1.1  christos  * however, this may cause a non-negligible performance penalty because a
    499  1.1  christos  * writer lock will have to be acquired before updating the list.
    500  1.1  christos  * If DNS_QPDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
    501  1.1  christos  * function returns true if the entry has not been updated for some period of
    502  1.1  christos  * time.  We differentiate the NS or glue address case and the others since
    503  1.1  christos  * experiments have shown that the former tends to be accessed relatively
    504  1.1  christos  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
    505  1.1  christos  * may cause external queries at a higher level zone, involving more
    506  1.1  christos  * transactions).
    507  1.1  christos  *
    508  1.1  christos  * Caller must hold the node (read or write) lock.
    509  1.1  christos  */
    510  1.1  christos static bool
    511  1.1  christos need_headerupdate(dns_slabheader_t *header, isc_stdtime_t now) {
    512  1.4  christos 	if (DNS_SLABHEADER_GETATTR(header,
    513  1.4  christos 				   DNS_SLABHEADERATTR_NONEXISTENT |
    514  1.4  christos 					   DNS_SLABHEADERATTR_ANCIENT |
    515  1.4  christos 					   DNS_SLABHEADERATTR_ZEROTTL) != 0)
    516  1.1  christos 	{
    517  1.1  christos 		return false;
    518  1.1  christos 	}
    519  1.1  christos 
    520  1.1  christos #if DNS_QPDB_LIMITLRUUPDATE
    521  1.1  christos 	if (header->type == dns_rdatatype_ns ||
    522  1.1  christos 	    (header->trust == dns_trust_glue &&
    523  1.1  christos 	     (header->type == dns_rdatatype_a ||
    524  1.1  christos 	      header->type == dns_rdatatype_aaaa)))
    525  1.1  christos 	{
    526  1.1  christos 		/*
    527  1.1  christos 		 * Glue records are updated if at least DNS_QPDB_LRUUPDATE_GLUE
    528  1.1  christos 		 * seconds have passed since the previous update time.
    529  1.1  christos 		 */
    530  1.1  christos 		return header->last_used + DNS_QPDB_LRUUPDATE_GLUE <= now;
    531  1.1  christos 	}
    532  1.1  christos 
    533  1.1  christos 	/*
    534  1.1  christos 	 * Other records are updated if DNS_QPDB_LRUUPDATE_REGULAR seconds
    535  1.1  christos 	 * have passed.
    536  1.1  christos 	 */
    537  1.1  christos 	return header->last_used + DNS_QPDB_LRUUPDATE_REGULAR <= now;
    538  1.1  christos #else
    539  1.1  christos 	UNUSED(now);
    540  1.1  christos 
    541  1.1  christos 	return true;
    542  1.1  christos #endif /* if DNS_QPDB_LIMITLRUUPDATE */
    543  1.1  christos }
    544  1.1  christos 
    545  1.1  christos /*%
    546  1.1  christos  * Update the timestamp of a given cache entry and move it to the head
    547  1.1  christos  * of the corresponding LRU list.
    548  1.1  christos  *
    549  1.1  christos  * Caller must hold the node (write) lock.
    550  1.1  christos  *
    551  1.1  christos  * Note that the we do NOT touch the heap here, as the TTL has not changed.
    552  1.1  christos  */
    553  1.1  christos static void
    554  1.1  christos update_header(qpcache_t *qpdb, dns_slabheader_t *header, isc_stdtime_t now) {
    555  1.1  christos 	/* To be checked: can we really assume this? XXXMLG */
    556  1.1  christos 	INSIST(ISC_LINK_LINKED(header, link));
    557  1.1  christos 
    558  1.3  christos 	ISC_LIST_UNLINK(qpdb->buckets[HEADERNODE(header)->locknum].lru, header,
    559  1.3  christos 			link);
    560  1.1  christos 	header->last_used = now;
    561  1.3  christos 	ISC_LIST_PREPEND(qpdb->buckets[HEADERNODE(header)->locknum].lru, header,
    562  1.3  christos 			 link);
    563  1.1  christos }
    564  1.1  christos 
    565  1.1  christos /*
    566  1.1  christos  * Locking:
    567  1.1  christos  * If a routine is going to lock more than one lock in this module, then
    568  1.1  christos  * the locking must be done in the following order:
    569  1.1  christos  *
    570  1.1  christos  *      Tree Lock
    571  1.1  christos  *
    572  1.1  christos  *      Node Lock       (Only one from the set may be locked at one time by
    573  1.1  christos  *                       any caller)
    574  1.1  christos  *
    575  1.1  christos  *      Database Lock
    576  1.1  christos  *
    577  1.1  christos  * Failure to follow this hierarchy can result in deadlock.
    578  1.1  christos  *
    579  1.1  christos  * Deleting Nodes:
    580  1.1  christos  * For zone databases the node for the origin of the zone MUST NOT be deleted.
    581  1.1  christos  */
    582  1.1  christos 
    583  1.1  christos /*
    584  1.1  christos  * DB Routines
    585  1.1  christos  */
    586  1.1  christos 
    587  1.1  christos static void
    588  1.1  christos clean_stale_headers(dns_slabheader_t *top) {
    589  1.1  christos 	dns_slabheader_t *d = NULL, *down_next = NULL;
    590  1.1  christos 
    591  1.1  christos 	for (d = top->down; d != NULL; d = down_next) {
    592  1.1  christos 		down_next = d->down;
    593  1.1  christos 		dns_slabheader_destroy(&d);
    594  1.1  christos 	}
    595  1.1  christos 	top->down = NULL;
    596  1.1  christos }
    597  1.1  christos 
    598  1.1  christos static void
    599  1.1  christos clean_cache_node(qpcache_t *qpdb, qpcnode_t *node) {
    600  1.1  christos 	dns_slabheader_t *current = NULL, *top_prev = NULL, *top_next = NULL;
    601  1.1  christos 
    602  1.1  christos 	/*
    603  1.1  christos 	 * Caller must be holding the node lock.
    604  1.1  christos 	 */
    605  1.1  christos 
    606  1.1  christos 	for (current = node->data; current != NULL; current = top_next) {
    607  1.1  christos 		top_next = current->next;
    608  1.1  christos 		clean_stale_headers(current);
    609  1.1  christos 		/*
    610  1.1  christos 		 * If current is nonexistent, ancient, or stale and
    611  1.1  christos 		 * we are not keeping stale, we can clean it up.
    612  1.1  christos 		 */
    613  1.1  christos 		if (NONEXISTENT(current) || ANCIENT(current) ||
    614  1.1  christos 		    (STALE(current) && !KEEPSTALE(qpdb)))
    615  1.1  christos 		{
    616  1.1  christos 			if (top_prev != NULL) {
    617  1.1  christos 				top_prev->next = current->next;
    618  1.1  christos 			} else {
    619  1.1  christos 				node->data = current->next;
    620  1.1  christos 			}
    621  1.1  christos 			dns_slabheader_destroy(&current);
    622  1.1  christos 		} else {
    623  1.1  christos 			top_prev = current;
    624  1.1  christos 		}
    625  1.1  christos 	}
    626  1.1  christos 	node->dirty = 0;
    627  1.1  christos }
    628  1.1  christos 
    629  1.1  christos /*
    630  1.1  christos  * tree_lock(write) must be held.
    631  1.1  christos  */
    632  1.1  christos static void
    633  1.1  christos delete_node(qpcache_t *qpdb, qpcnode_t *node) {
    634  1.1  christos 	isc_result_t result = ISC_R_UNEXPECTED;
    635  1.1  christos 
    636  1.1  christos 	if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
    637  1.1  christos 		char printname[DNS_NAME_FORMATSIZE];
    638  1.1  christos 		dns_name_format(&node->name, printname, sizeof(printname));
    639  1.1  christos 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
    640  1.1  christos 			      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
    641  1.1  christos 			      "delete_node(): %p %s (bucket %d)", node,
    642  1.1  christos 			      printname, node->locknum);
    643  1.1  christos 	}
    644  1.1  christos 
    645  1.1  christos 	switch (node->nsec) {
    646  1.1  christos 	case DNS_DB_NSEC_HAS_NSEC:
    647  1.1  christos 		/*
    648  1.1  christos 		 * Delete the corresponding node from the auxiliary NSEC
    649  1.1  christos 		 * tree before deleting from the main tree.
    650  1.1  christos 		 */
    651  1.1  christos 		result = dns_qp_deletename(qpdb->nsec, &node->name, NULL, NULL);
    652  1.1  christos 		if (result != ISC_R_SUCCESS) {
    653  1.1  christos 			isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
    654  1.1  christos 				      DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
    655  1.1  christos 				      "delete_node(): "
    656  1.1  christos 				      "dns_qp_deletename: %s",
    657  1.1  christos 				      isc_result_totext(result));
    658  1.1  christos 		}
    659  1.1  christos 		/* FALLTHROUGH */
    660  1.1  christos 	case DNS_DB_NSEC_NORMAL:
    661  1.1  christos 		result = dns_qp_deletename(qpdb->tree, &node->name, NULL, NULL);
    662  1.1  christos 		break;
    663  1.1  christos 	case DNS_DB_NSEC_NSEC:
    664  1.1  christos 		result = dns_qp_deletename(qpdb->nsec, &node->name, NULL, NULL);
    665  1.1  christos 		break;
    666  1.1  christos 	}
    667  1.1  christos 	if (result != ISC_R_SUCCESS) {
    668  1.1  christos 		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
    669  1.1  christos 			      DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
    670  1.1  christos 			      "delete_node(): "
    671  1.1  christos 			      "dns_qp_deletename: %s",
    672  1.1  christos 			      isc_result_totext(result));
    673  1.1  christos 	}
    674  1.1  christos }
    675  1.1  christos 
    676  1.1  christos /*
    677  1.1  christos  * The caller must specify its currect node and tree lock status.
    678  1.1  christos  * It's okay for neither lock to be held if there are existing external
    679  1.1  christos  * references to the node, but if this is the first external reference,
    680  1.1  christos  * then the caller must be holding at least one lock.
    681  1.3  christos  *
    682  1.3  christos  * If incrementing erefs from zero, we also increment the node use counter
    683  1.3  christos  * in the qpcache object.
    684  1.3  christos  *
    685  1.3  christos  * This function is called from qpcnode_acquire(), so that internal
    686  1.3  christos  * and external references are acquired at the same time, and from
    687  1.3  christos  * qpcnode_release() when we only need to increase the internal references.
    688  1.1  christos  */
    689  1.1  christos static void
    690  1.3  christos qpcnode_erefs_increment(qpcache_t *qpdb, qpcnode_t *node,
    691  1.3  christos 			isc_rwlocktype_t nlocktype,
    692  1.3  christos 			isc_rwlocktype_t tlocktype DNS__DB_FLARG) {
    693  1.3  christos 	uint_fast32_t refs = isc_refcount_increment0(&node->erefs);
    694  1.1  christos 
    695  1.1  christos #if DNS_DB_NODETRACE
    696  1.1  christos 	fprintf(stderr, "incr:node:%s:%s:%u:%p->erefs = %" PRIuFAST32 "\n",
    697  1.1  christos 		func, file, line, node, refs + 1);
    698  1.1  christos #endif
    699  1.1  christos 
    700  1.3  christos 	if (refs > 0) {
    701  1.3  christos 		return;
    702  1.3  christos 	}
    703  1.3  christos 
    704  1.3  christos 	/*
    705  1.3  christos 	 * this is the first external reference to the node.
    706  1.3  christos 	 *
    707  1.3  christos 	 * we need to hold the node or tree lock to avoid
    708  1.3  christos 	 * incrementing the reference count while also deleting
    709  1.3  christos 	 * the node. delete_node() is always protected by both
    710  1.3  christos 	 * tree and node locks being write-locked.
    711  1.3  christos 	 */
    712  1.3  christos 	INSIST(nlocktype != isc_rwlocktype_none ||
    713  1.3  christos 	       tlocktype != isc_rwlocktype_none);
    714  1.3  christos 
    715  1.3  christos 	qpcache_ref(qpdb);
    716  1.3  christos }
    717  1.3  christos 
    718  1.3  christos static void
    719  1.3  christos qpcnode_acquire(qpcache_t *qpdb, qpcnode_t *node, isc_rwlocktype_t nlocktype,
    720  1.3  christos 		isc_rwlocktype_t tlocktype DNS__DB_FLARG) {
    721  1.3  christos 	qpcnode_ref(node);
    722  1.3  christos 	qpcnode_erefs_increment(qpdb, node, nlocktype,
    723  1.3  christos 				tlocktype DNS__DB_FLARG_PASS);
    724  1.3  christos }
    725  1.3  christos 
    726  1.3  christos static void
    727  1.3  christos cleanup_deadnodes(void *arg);
    728  1.3  christos 
    729  1.3  christos /*
    730  1.3  christos  * Decrement the external references to a node. If the counter
    731  1.3  christos  * goes to zero, decrement the node use counter in the qpcache object
    732  1.3  christos  * as well, and return true. Otherwise return false.
    733  1.3  christos  */
    734  1.3  christos static bool
    735  1.3  christos qpcnode_erefs_decrement(qpcache_t *qpdb, qpcnode_t *node DNS__DB_FLARG) {
    736  1.3  christos 	uint_fast32_t refs = isc_refcount_decrement(&node->erefs);
    737  1.1  christos 
    738  1.1  christos #if DNS_DB_NODETRACE
    739  1.3  christos 	fprintf(stderr, "decr:node:%s:%s:%u:%p->erefs = %" PRIuFAST32 "\n",
    740  1.3  christos 		func, file, line, node, refs - 1);
    741  1.1  christos #endif
    742  1.3  christos 	if (refs > 1) {
    743  1.3  christos 		return false;
    744  1.1  christos 	}
    745  1.3  christos 
    746  1.3  christos 	qpcache_unref(qpdb);
    747  1.3  christos 	return true;
    748  1.1  christos }
    749  1.1  christos 
    750  1.1  christos /*
    751  1.3  christos  * Caller must be holding a node lock, either read or write.
    752  1.3  christos  *
    753  1.1  christos  * Note that the lock must be held even when node references are
    754  1.1  christos  * atomically modified; in that case the decrement operation itself does not
    755  1.1  christos  * have to be protected, but we must avoid a race condition where multiple
    756  1.1  christos  * threads are decreasing the reference to zero simultaneously and at least
    757  1.1  christos  * one of them is going to free the node.
    758  1.1  christos  *
    759  1.3  christos  * This calls dec_erefs() to decrement the external node reference counter,
    760  1.3  christos  * (and possibly the node use counter), cleans up and deletes the node
    761  1.3  christos  * if necessary, then decrements the internal reference counter as well.
    762  1.1  christos  */
    763  1.3  christos static void
    764  1.3  christos qpcnode_release(qpcache_t *qpdb, qpcnode_t *node, isc_rwlocktype_t *nlocktypep,
    765  1.3  christos 		isc_rwlocktype_t *tlocktypep, bool tryupgrade DNS__DB_FLARG) {
    766  1.3  christos 	REQUIRE(*nlocktypep != isc_rwlocktype_none);
    767  1.3  christos 
    768  1.1  christos 	isc_result_t result;
    769  1.1  christos 	bool locked = *tlocktypep != isc_rwlocktype_none;
    770  1.1  christos 	bool write_locked = false;
    771  1.1  christos 
    772  1.3  christos 	if (!qpcnode_erefs_decrement(qpdb, node DNS__DB_FLARG_PASS)) {
    773  1.3  christos 		goto unref;
    774  1.3  christos 	}
    775  1.1  christos 
    776  1.1  christos 	/* Handle easy and typical case first. */
    777  1.3  christos 	if (!node->dirty && node->data != NULL) {
    778  1.3  christos 		goto unref;
    779  1.1  christos 	}
    780  1.1  christos 
    781  1.1  christos 	if (*nlocktypep == isc_rwlocktype_read) {
    782  1.3  christos 		/*
    783  1.3  christos 		 * The external reference count went to zero and the node
    784  1.3  christos 		 * is dirty or has no data, so we might want to delete it.
    785  1.3  christos 		 * To do that, we'll need a write lock. If we don't already
    786  1.3  christos 		 * have one, we have to make sure nobody else has
    787  1.3  christos 		 * acquired a reference in the meantime, so we increment
    788  1.3  christos 		 * erefs (but NOT references!), upgrade the node lock,
    789  1.3  christos 		 * decrement erefs again, and see if it's still zero.
    790  1.3  christos 		 *
    791  1.3  christos 		 * We can't really assume anything about the result code of
    792  1.3  christos 		 * erefs_increment.  If another thread acquires reference it
    793  1.3  christos 		 * will be larger than 0, if it doesn't it is going to be 0.
    794  1.3  christos 		 */
    795  1.3  christos 		isc_rwlock_t *nlock = &qpdb->buckets[node->locknum].lock;
    796  1.3  christos 		qpcnode_erefs_increment(qpdb, node, *nlocktypep,
    797  1.3  christos 					*tlocktypep DNS__DB_FLARG_PASS);
    798  1.3  christos 		NODE_FORCEUPGRADE(nlock, nlocktypep);
    799  1.3  christos 		if (!qpcnode_erefs_decrement(qpdb, node DNS__DB_FLARG_PASS)) {
    800  1.3  christos 			goto unref;
    801  1.3  christos 		}
    802  1.1  christos 	}
    803  1.1  christos 
    804  1.1  christos 	if (node->dirty) {
    805  1.1  christos 		clean_cache_node(qpdb, node);
    806  1.1  christos 	}
    807  1.1  christos 
    808  1.1  christos 	/*
    809  1.1  christos 	 * Attempt to switch to a write lock on the tree.  If this fails,
    810  1.1  christos 	 * we will add this node to a linked list of nodes in this locking
    811  1.1  christos 	 * bucket which we will free later.
    812  1.1  christos 	 *
    813  1.1  christos 	 * Locking hierarchy notwithstanding, we don't need to free
    814  1.1  christos 	 * the node lock before acquiring the tree write lock because
    815  1.1  christos 	 * we only do a trylock.
    816  1.1  christos 	 */
    817  1.1  christos 	/* We are allowed to upgrade the tree lock */
    818  1.1  christos 
    819  1.1  christos 	switch (*tlocktypep) {
    820  1.1  christos 	case isc_rwlocktype_write:
    821  1.1  christos 		result = ISC_R_SUCCESS;
    822  1.1  christos 		break;
    823  1.1  christos 	case isc_rwlocktype_read:
    824  1.1  christos 		if (tryupgrade) {
    825  1.1  christos 			result = TREE_TRYUPGRADE(&qpdb->tree_lock, tlocktypep);
    826  1.1  christos 		} else {
    827  1.1  christos 			result = ISC_R_LOCKBUSY;
    828  1.1  christos 		}
    829  1.1  christos 		break;
    830  1.1  christos 	case isc_rwlocktype_none:
    831  1.1  christos 		result = TREE_TRYWRLOCK(&qpdb->tree_lock, tlocktypep);
    832  1.1  christos 		break;
    833  1.1  christos 	default:
    834  1.1  christos 		UNREACHABLE();
    835  1.1  christos 	}
    836  1.1  christos 	RUNTIME_CHECK(result == ISC_R_SUCCESS || result == ISC_R_LOCKBUSY);
    837  1.1  christos 	if (result == ISC_R_SUCCESS) {
    838  1.1  christos 		write_locked = true;
    839  1.1  christos 	}
    840  1.1  christos 
    841  1.3  christos 	if (node->data != NULL) {
    842  1.1  christos 		goto restore_locks;
    843  1.1  christos 	}
    844  1.1  christos 
    845  1.1  christos 	if (write_locked) {
    846  1.1  christos 		/*
    847  1.1  christos 		 * We can now delete the node.
    848  1.1  christos 		 */
    849  1.1  christos 		delete_node(qpdb, node);
    850  1.1  christos 	} else {
    851  1.3  christos 		qpcnode_acquire(qpdb, node, *nlocktypep,
    852  1.3  christos 				*tlocktypep DNS__DB_FLARG_PASS);
    853  1.1  christos 
    854  1.1  christos 		isc_queue_node_init(&node->deadlink);
    855  1.3  christos 		if (!isc_queue_enqueue_entry(
    856  1.3  christos 			    &qpdb->buckets[node->locknum].deadnodes, node,
    857  1.3  christos 			    deadlink))
    858  1.1  christos 		{
    859  1.1  christos 			/* Queue was empty, trigger new cleaning */
    860  1.3  christos 			isc_loop_t *loop = isc_loop_get(qpdb->loopmgr,
    861  1.3  christos 							node->locknum);
    862  1.1  christos 
    863  1.1  christos 			isc_async_run(loop, cleanup_deadnodes, qpdb);
    864  1.1  christos 		}
    865  1.1  christos 	}
    866  1.1  christos 
    867  1.1  christos restore_locks:
    868  1.1  christos 	/*
    869  1.3  christos 	 * Unlock the tree lock if it wasn't held previously.
    870  1.1  christos 	 */
    871  1.1  christos 	if (!locked && write_locked) {
    872  1.1  christos 		TREE_UNLOCK(&qpdb->tree_lock, tlocktypep);
    873  1.1  christos 	}
    874  1.1  christos 
    875  1.3  christos unref:
    876  1.1  christos 	qpcnode_unref(node);
    877  1.1  christos }
    878  1.1  christos 
    879  1.1  christos static void
    880  1.1  christos update_rrsetstats(dns_stats_t *stats, const dns_typepair_t htype,
    881  1.1  christos 		  const uint_least16_t hattributes, const bool increment) {
    882  1.1  christos 	dns_rdatastatstype_t statattributes = 0;
    883  1.1  christos 	dns_rdatastatstype_t base = 0;
    884  1.1  christos 	dns_rdatastatstype_t type;
    885  1.1  christos 	dns_slabheader_t *header = &(dns_slabheader_t){
    886  1.1  christos 		.type = htype,
    887  1.1  christos 		.attributes = hattributes,
    888  1.1  christos 	};
    889  1.1  christos 
    890  1.1  christos 	if (!EXISTS(header) || !STATCOUNT(header)) {
    891  1.1  christos 		return;
    892  1.1  christos 	}
    893  1.1  christos 
    894  1.1  christos 	if (NEGATIVE(header)) {
    895  1.1  christos 		if (NXDOMAIN(header)) {
    896  1.1  christos 			statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
    897  1.1  christos 		} else {
    898  1.1  christos 			statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
    899  1.1  christos 			base = DNS_TYPEPAIR_COVERS(header->type);
    900  1.1  christos 		}
    901  1.1  christos 	} else {
    902  1.1  christos 		base = DNS_TYPEPAIR_TYPE(header->type);
    903  1.1  christos 	}
    904  1.1  christos 
    905  1.1  christos 	if (STALE(header)) {
    906  1.1  christos 		statattributes |= DNS_RDATASTATSTYPE_ATTR_STALE;
    907  1.1  christos 	}
    908  1.1  christos 	if (ANCIENT(header)) {
    909  1.1  christos 		statattributes |= DNS_RDATASTATSTYPE_ATTR_ANCIENT;
    910  1.1  christos 	}
    911  1.1  christos 
    912  1.1  christos 	type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
    913  1.1  christos 	if (increment) {
    914  1.1  christos 		dns_rdatasetstats_increment(stats, type);
    915  1.1  christos 	} else {
    916  1.1  christos 		dns_rdatasetstats_decrement(stats, type);
    917  1.1  christos 	}
    918  1.1  christos }
    919  1.1  christos 
    920  1.1  christos static void
    921  1.1  christos mark(dns_slabheader_t *header, uint_least16_t flag) {
    922  1.1  christos 	uint_least16_t attributes = atomic_load_acquire(&header->attributes);
    923  1.1  christos 	uint_least16_t newattributes = 0;
    924  1.1  christos 	dns_stats_t *stats = NULL;
    925  1.1  christos 
    926  1.1  christos 	/*
    927  1.1  christos 	 * If we are already ancient there is nothing to do.
    928  1.1  christos 	 */
    929  1.1  christos 	do {
    930  1.1  christos 		if ((attributes & flag) != 0) {
    931  1.1  christos 			return;
    932  1.1  christos 		}
    933  1.1  christos 		newattributes = attributes | flag;
    934  1.1  christos 	} while (!atomic_compare_exchange_weak_acq_rel(
    935  1.1  christos 		&header->attributes, &attributes, newattributes));
    936  1.1  christos 
    937  1.1  christos 	/*
    938  1.1  christos 	 * Decrement and increment the stats counter for the appropriate
    939  1.1  christos 	 * RRtype.
    940  1.1  christos 	 */
    941  1.1  christos 	stats = dns_db_getrrsetstats(header->db);
    942  1.1  christos 	if (stats != NULL) {
    943  1.1  christos 		update_rrsetstats(stats, header->type, attributes, false);
    944  1.1  christos 		update_rrsetstats(stats, header->type, newattributes, true);
    945  1.1  christos 	}
    946  1.1  christos }
    947  1.1  christos 
    948  1.1  christos static void
    949  1.1  christos setttl(dns_slabheader_t *header, dns_ttl_t newttl) {
    950  1.1  christos 	dns_ttl_t oldttl = header->ttl;
    951  1.1  christos 
    952  1.1  christos 	header->ttl = newttl;
    953  1.1  christos 
    954  1.1  christos 	if (header->db == NULL || !dns_db_iscache(header->db)) {
    955  1.1  christos 		return;
    956  1.1  christos 	}
    957  1.1  christos 
    958  1.1  christos 	/*
    959  1.1  christos 	 * This is a cache. Adjust the heaps if necessary.
    960  1.1  christos 	 */
    961  1.1  christos 	if (header->heap == NULL || header->heap_index == 0 || newttl == oldttl)
    962  1.1  christos 	{
    963  1.1  christos 		return;
    964  1.1  christos 	}
    965  1.1  christos 
    966  1.1  christos 	if (newttl < oldttl) {
    967  1.1  christos 		isc_heap_increased(header->heap, header->heap_index);
    968  1.1  christos 	} else {
    969  1.1  christos 		isc_heap_decreased(header->heap, header->heap_index);
    970  1.1  christos 	}
    971  1.1  christos 
    972  1.1  christos 	if (newttl == 0) {
    973  1.1  christos 		isc_heap_delete(header->heap, header->heap_index);
    974  1.1  christos 	}
    975  1.1  christos }
    976  1.1  christos 
    977  1.3  christos static void
    978  1.3  christos mark_ancient(dns_slabheader_t *header) {
    979  1.3  christos 	setttl(header, 0);
    980  1.3  christos 	mark(header, DNS_SLABHEADERATTR_ANCIENT);
    981  1.3  christos 	HEADERNODE(header)->dirty = 1;
    982  1.3  christos }
    983  1.3  christos 
    984  1.1  christos /*
    985  1.1  christos  * Caller must hold the node (write) lock.
    986  1.1  christos  */
    987  1.1  christos static void
    988  1.1  christos expireheader(dns_slabheader_t *header, isc_rwlocktype_t *nlocktypep,
    989  1.1  christos 	     isc_rwlocktype_t *tlocktypep, dns_expire_t reason DNS__DB_FLARG) {
    990  1.3  christos 	mark_ancient(header);
    991  1.1  christos 
    992  1.1  christos 	if (isc_refcount_current(&HEADERNODE(header)->erefs) == 0) {
    993  1.1  christos 		qpcache_t *qpdb = (qpcache_t *)header->db;
    994  1.1  christos 
    995  1.1  christos 		/*
    996  1.1  christos 		 * If no one else is using the node, we can clean it up now.
    997  1.1  christos 		 * We first need to gain a new reference to the node to meet a
    998  1.3  christos 		 * requirement of qpcnode_release().
    999  1.1  christos 		 */
   1000  1.3  christos 		qpcnode_acquire(qpdb, HEADERNODE(header), *nlocktypep,
   1001  1.3  christos 				*tlocktypep DNS__DB_FLARG_PASS);
   1002  1.3  christos 		qpcnode_release(qpdb, HEADERNODE(header), nlocktypep,
   1003  1.3  christos 				tlocktypep, true DNS__DB_FLARG_PASS);
   1004  1.1  christos 
   1005  1.1  christos 		if (qpdb->cachestats == NULL) {
   1006  1.1  christos 			return;
   1007  1.1  christos 		}
   1008  1.1  christos 
   1009  1.1  christos 		switch (reason) {
   1010  1.1  christos 		case dns_expire_ttl:
   1011  1.1  christos 			isc_stats_increment(qpdb->cachestats,
   1012  1.1  christos 					    dns_cachestatscounter_deletettl);
   1013  1.1  christos 			break;
   1014  1.1  christos 		case dns_expire_lru:
   1015  1.1  christos 			isc_stats_increment(qpdb->cachestats,
   1016  1.1  christos 					    dns_cachestatscounter_deletelru);
   1017  1.1  christos 			break;
   1018  1.1  christos 		default:
   1019  1.1  christos 			break;
   1020  1.1  christos 		}
   1021  1.1  christos 	}
   1022  1.1  christos }
   1023  1.1  christos 
   1024  1.1  christos static void
   1025  1.1  christos update_cachestats(qpcache_t *qpdb, isc_result_t result) {
   1026  1.1  christos 	if (qpdb->cachestats == NULL) {
   1027  1.1  christos 		return;
   1028  1.1  christos 	}
   1029  1.1  christos 
   1030  1.1  christos 	switch (result) {
   1031  1.1  christos 	case DNS_R_COVERINGNSEC:
   1032  1.1  christos 		isc_stats_increment(qpdb->cachestats,
   1033  1.1  christos 				    dns_cachestatscounter_coveringnsec);
   1034  1.1  christos 		FALLTHROUGH;
   1035  1.1  christos 	case ISC_R_SUCCESS:
   1036  1.1  christos 	case DNS_R_CNAME:
   1037  1.1  christos 	case DNS_R_DNAME:
   1038  1.1  christos 	case DNS_R_DELEGATION:
   1039  1.1  christos 	case DNS_R_NCACHENXDOMAIN:
   1040  1.1  christos 	case DNS_R_NCACHENXRRSET:
   1041  1.1  christos 		isc_stats_increment(qpdb->cachestats,
   1042  1.1  christos 				    dns_cachestatscounter_hits);
   1043  1.1  christos 		break;
   1044  1.1  christos 	default:
   1045  1.1  christos 		isc_stats_increment(qpdb->cachestats,
   1046  1.1  christos 				    dns_cachestatscounter_misses);
   1047  1.1  christos 	}
   1048  1.1  christos }
   1049  1.1  christos 
   1050  1.1  christos static void
   1051  1.1  christos bindrdataset(qpcache_t *qpdb, qpcnode_t *node, dns_slabheader_t *header,
   1052  1.1  christos 	     isc_stdtime_t now, isc_rwlocktype_t nlocktype,
   1053  1.1  christos 	     isc_rwlocktype_t tlocktype,
   1054  1.1  christos 	     dns_rdataset_t *rdataset DNS__DB_FLARG) {
   1055  1.1  christos 	bool stale = STALE(header);
   1056  1.1  christos 	bool ancient = ANCIENT(header);
   1057  1.1  christos 
   1058  1.1  christos 	/*
   1059  1.1  christos 	 * Caller must be holding the node reader lock.
   1060  1.1  christos 	 * XXXJT: technically, we need a writer lock, since we'll increment
   1061  1.1  christos 	 * the header count below.  However, since the actual counter value
   1062  1.1  christos 	 * doesn't matter, we prioritize performance here.  (We may want to
   1063  1.1  christos 	 * use atomic increment when available).
   1064  1.1  christos 	 */
   1065  1.1  christos 
   1066  1.1  christos 	if (rdataset == NULL) {
   1067  1.1  christos 		return;
   1068  1.1  christos 	}
   1069  1.1  christos 
   1070  1.3  christos 	qpcnode_acquire(qpdb, node, nlocktype, tlocktype DNS__DB_FLARG_PASS);
   1071  1.1  christos 
   1072  1.1  christos 	INSIST(rdataset->methods == NULL); /* We must be disassociated. */
   1073  1.1  christos 
   1074  1.1  christos 	/*
   1075  1.1  christos 	 * Mark header stale or ancient if the RRset is no longer active.
   1076  1.1  christos 	 */
   1077  1.1  christos 	if (!ACTIVE(header, now)) {
   1078  1.1  christos 		dns_ttl_t stale_ttl = header->ttl + STALE_TTL(header, qpdb);
   1079  1.1  christos 		/*
   1080  1.1  christos 		 * If this data is in the stale window keep it and if
   1081  1.1  christos 		 * DNS_DBFIND_STALEOK is not set we tell the caller to
   1082  1.1  christos 		 * skip this record.  We skip the records with ZEROTTL
   1083  1.1  christos 		 * (these records should not be cached anyway).
   1084  1.1  christos 		 */
   1085  1.1  christos 
   1086  1.3  christos 		if (!ZEROTTL(header) && KEEPSTALE(qpdb) && stale_ttl > now) {
   1087  1.1  christos 			stale = true;
   1088  1.1  christos 		} else {
   1089  1.1  christos 			/*
   1090  1.1  christos 			 * We are not keeping stale, or it is outside the
   1091  1.1  christos 			 * stale window. Mark ancient, i.e. ready for cleanup.
   1092  1.1  christos 			 */
   1093  1.1  christos 			ancient = true;
   1094  1.1  christos 		}
   1095  1.1  christos 	}
   1096  1.1  christos 
   1097  1.1  christos 	rdataset->methods = &dns_rdataslab_rdatasetmethods;
   1098  1.1  christos 	rdataset->rdclass = qpdb->common.rdclass;
   1099  1.1  christos 	rdataset->type = DNS_TYPEPAIR_TYPE(header->type);
   1100  1.1  christos 	rdataset->covers = DNS_TYPEPAIR_COVERS(header->type);
   1101  1.3  christos 	rdataset->ttl = !ZEROTTL(header) ? header->ttl - now : 0;
   1102  1.1  christos 	rdataset->ttl = header->ttl - now;
   1103  1.1  christos 	rdataset->trust = header->trust;
   1104  1.1  christos 	rdataset->resign = 0;
   1105  1.1  christos 
   1106  1.1  christos 	if (NEGATIVE(header)) {
   1107  1.1  christos 		rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
   1108  1.1  christos 	}
   1109  1.1  christos 	if (NXDOMAIN(header)) {
   1110  1.1  christos 		rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
   1111  1.1  christos 	}
   1112  1.1  christos 	if (OPTOUT(header)) {
   1113  1.1  christos 		rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
   1114  1.1  christos 	}
   1115  1.1  christos 	if (PREFETCH(header)) {
   1116  1.1  christos 		rdataset->attributes |= DNS_RDATASETATTR_PREFETCH;
   1117  1.1  christos 	}
   1118  1.1  christos 
   1119  1.1  christos 	if (stale && !ancient) {
   1120  1.1  christos 		dns_ttl_t stale_ttl = header->ttl + STALE_TTL(header, qpdb);
   1121  1.1  christos 		if (stale_ttl > now) {
   1122  1.1  christos 			rdataset->ttl = stale_ttl - now;
   1123  1.1  christos 		} else {
   1124  1.1  christos 			rdataset->ttl = 0;
   1125  1.1  christos 		}
   1126  1.1  christos 		if (STALE_WINDOW(header)) {
   1127  1.1  christos 			rdataset->attributes |= DNS_RDATASETATTR_STALE_WINDOW;
   1128  1.1  christos 		}
   1129  1.1  christos 		rdataset->attributes |= DNS_RDATASETATTR_STALE;
   1130  1.3  christos 		rdataset->expire = header->ttl;
   1131  1.1  christos 	} else if (!ACTIVE(header, now)) {
   1132  1.1  christos 		rdataset->attributes |= DNS_RDATASETATTR_ANCIENT;
   1133  1.3  christos 		rdataset->ttl = 0;
   1134  1.1  christos 	}
   1135  1.1  christos 
   1136  1.1  christos 	rdataset->count = atomic_fetch_add_relaxed(&header->count, 1);
   1137  1.1  christos 
   1138  1.1  christos 	rdataset->slab.db = (dns_db_t *)qpdb;
   1139  1.1  christos 	rdataset->slab.node = (dns_dbnode_t *)node;
   1140  1.1  christos 	rdataset->slab.raw = dns_slabheader_raw(header);
   1141  1.1  christos 	rdataset->slab.iter_pos = NULL;
   1142  1.1  christos 	rdataset->slab.iter_count = 0;
   1143  1.1  christos 
   1144  1.1  christos 	/*
   1145  1.1  christos 	 * Add noqname proof.
   1146  1.1  christos 	 */
   1147  1.1  christos 	rdataset->slab.noqname = header->noqname;
   1148  1.1  christos 	if (header->noqname != NULL) {
   1149  1.1  christos 		rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
   1150  1.1  christos 	}
   1151  1.1  christos 	rdataset->slab.closest = header->closest;
   1152  1.1  christos 	if (header->closest != NULL) {
   1153  1.1  christos 		rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
   1154  1.1  christos 	}
   1155  1.1  christos }
   1156  1.1  christos 
   1157  1.1  christos static isc_result_t
   1158  1.1  christos setup_delegation(qpc_search_t *search, dns_dbnode_t **nodep,
   1159  1.1  christos 		 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset,
   1160  1.1  christos 		 isc_rwlocktype_t tlocktype DNS__DB_FLARG) {
   1161  1.1  christos 	dns_typepair_t type;
   1162  1.1  christos 	qpcnode_t *node = NULL;
   1163  1.1  christos 
   1164  1.1  christos 	REQUIRE(search != NULL);
   1165  1.1  christos 	REQUIRE(search->zonecut != NULL);
   1166  1.1  christos 	REQUIRE(search->zonecut_header != NULL);
   1167  1.1  christos 
   1168  1.1  christos 	/*
   1169  1.1  christos 	 * The caller MUST NOT be holding any node locks.
   1170  1.1  christos 	 */
   1171  1.1  christos 
   1172  1.1  christos 	node = search->zonecut;
   1173  1.1  christos 	type = search->zonecut_header->type;
   1174  1.1  christos 
   1175  1.1  christos 	if (nodep != NULL) {
   1176  1.1  christos 		/*
   1177  1.1  christos 		 * Note that we don't have to increment the node's reference
   1178  1.1  christos 		 * count here because we're going to use the reference we
   1179  1.1  christos 		 * already have in the search block.
   1180  1.1  christos 		 */
   1181  1.1  christos 		*nodep = node;
   1182  1.1  christos 		search->need_cleanup = false;
   1183  1.1  christos 	}
   1184  1.1  christos 	if (rdataset != NULL) {
   1185  1.1  christos 		isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   1186  1.3  christos 		isc_rwlock_t *nlock =
   1187  1.3  christos 			&search->qpdb->buckets[node->locknum].lock;
   1188  1.3  christos 		NODE_RDLOCK(nlock, &nlocktype);
   1189  1.1  christos 		bindrdataset(search->qpdb, node, search->zonecut_header,
   1190  1.1  christos 			     search->now, nlocktype, tlocktype,
   1191  1.1  christos 			     rdataset DNS__DB_FLARG_PASS);
   1192  1.1  christos 		if (sigrdataset != NULL && search->zonecut_sigheader != NULL) {
   1193  1.1  christos 			bindrdataset(search->qpdb, node,
   1194  1.1  christos 				     search->zonecut_sigheader, search->now,
   1195  1.1  christos 				     nlocktype, tlocktype,
   1196  1.1  christos 				     sigrdataset DNS__DB_FLARG_PASS);
   1197  1.1  christos 		}
   1198  1.3  christos 		NODE_UNLOCK(nlock, &nlocktype);
   1199  1.1  christos 	}
   1200  1.1  christos 
   1201  1.1  christos 	if (type == dns_rdatatype_dname) {
   1202  1.1  christos 		return DNS_R_DNAME;
   1203  1.1  christos 	}
   1204  1.1  christos 	return DNS_R_DELEGATION;
   1205  1.1  christos }
   1206  1.1  christos 
   1207  1.1  christos static bool
   1208  1.1  christos check_stale_header(qpcnode_t *node, dns_slabheader_t *header,
   1209  1.3  christos 		   isc_rwlocktype_t *nlocktypep, isc_rwlock_t *nlock,
   1210  1.1  christos 		   qpc_search_t *search, dns_slabheader_t **header_prev) {
   1211  1.1  christos 	if (!ACTIVE(header, search->now)) {
   1212  1.1  christos 		dns_ttl_t stale = header->ttl + STALE_TTL(header, search->qpdb);
   1213  1.1  christos 		/*
   1214  1.1  christos 		 * If this data is in the stale window keep it and if
   1215  1.1  christos 		 * DNS_DBFIND_STALEOK is not set we tell the caller to
   1216  1.1  christos 		 * skip this record.  We skip the records with ZEROTTL
   1217  1.1  christos 		 * (these records should not be cached anyway).
   1218  1.1  christos 		 */
   1219  1.1  christos 
   1220  1.1  christos 		DNS_SLABHEADER_CLRATTR(header, DNS_SLABHEADERATTR_STALE_WINDOW);
   1221  1.1  christos 		if (!ZEROTTL(header) && KEEPSTALE(search->qpdb) &&
   1222  1.1  christos 		    stale > search->now)
   1223  1.1  christos 		{
   1224  1.1  christos 			mark(header, DNS_SLABHEADERATTR_STALE);
   1225  1.1  christos 			*header_prev = header;
   1226  1.1  christos 			/*
   1227  1.1  christos 			 * If DNS_DBFIND_STALESTART is set then it means we
   1228  1.1  christos 			 * failed to resolve the name during recursion, in
   1229  1.1  christos 			 * this case we mark the time in which the refresh
   1230  1.1  christos 			 * failed.
   1231  1.1  christos 			 */
   1232  1.1  christos 			if ((search->options & DNS_DBFIND_STALESTART) != 0) {
   1233  1.1  christos 				atomic_store_release(
   1234  1.1  christos 					&header->last_refresh_fail_ts,
   1235  1.1  christos 					search->now);
   1236  1.1  christos 			} else if ((search->options &
   1237  1.1  christos 				    DNS_DBFIND_STALEENABLED) != 0 &&
   1238  1.1  christos 				   search->now <
   1239  1.1  christos 					   (atomic_load_acquire(
   1240  1.1  christos 						    &header->last_refresh_fail_ts) +
   1241  1.1  christos 					    search->qpdb->serve_stale_refresh))
   1242  1.1  christos 			{
   1243  1.1  christos 				/*
   1244  1.1  christos 				 * If we are within interval between last
   1245  1.1  christos 				 * refresh failure time + 'stale-refresh-time',
   1246  1.1  christos 				 * then don't skip this stale entry but use it
   1247  1.1  christos 				 * instead.
   1248  1.1  christos 				 */
   1249  1.1  christos 				DNS_SLABHEADER_SETATTR(
   1250  1.1  christos 					header,
   1251  1.1  christos 					DNS_SLABHEADERATTR_STALE_WINDOW);
   1252  1.1  christos 				return false;
   1253  1.1  christos 			} else if ((search->options &
   1254  1.1  christos 				    DNS_DBFIND_STALETIMEOUT) != 0)
   1255  1.1  christos 			{
   1256  1.1  christos 				/*
   1257  1.1  christos 				 * We want stale RRset due to timeout, so we
   1258  1.1  christos 				 * don't skip it.
   1259  1.1  christos 				 */
   1260  1.1  christos 				return false;
   1261  1.1  christos 			}
   1262  1.1  christos 			return (search->options & DNS_DBFIND_STALEOK) == 0;
   1263  1.1  christos 		}
   1264  1.1  christos 
   1265  1.1  christos 		/*
   1266  1.1  christos 		 * This rdataset is stale.  If no one else is using the
   1267  1.1  christos 		 * node, we can clean it up right now, otherwise we mark
   1268  1.1  christos 		 * it as ancient, and the node as dirty, so it will get
   1269  1.1  christos 		 * cleaned up later.
   1270  1.1  christos 		 */
   1271  1.1  christos 		if ((header->ttl < search->now - QPDB_VIRTUAL) &&
   1272  1.1  christos 		    (*nlocktypep == isc_rwlocktype_write ||
   1273  1.3  christos 		     NODE_TRYUPGRADE(nlock, nlocktypep) == ISC_R_SUCCESS))
   1274  1.1  christos 		{
   1275  1.1  christos 			/*
   1276  1.1  christos 			 * We update the node's status only when we can
   1277  1.1  christos 			 * get write access; otherwise, we leave others
   1278  1.1  christos 			 * to this work.  Periodical cleaning will
   1279  1.1  christos 			 * eventually take the job as the last resort.
   1280  1.1  christos 			 * We won't downgrade the lock, since other
   1281  1.1  christos 			 * rdatasets are probably stale, too.
   1282  1.1  christos 			 */
   1283  1.1  christos 
   1284  1.1  christos 			if (isc_refcount_current(&node->references) == 0) {
   1285  1.1  christos 				/*
   1286  1.1  christos 				 * header->down can be non-NULL if the
   1287  1.1  christos 				 * refcount has just decremented to 0
   1288  1.3  christos 				 * but qpcnode_release() has not
   1289  1.1  christos 				 * performed clean_cache_node(), in
   1290  1.1  christos 				 * which case we need to purge the stale
   1291  1.1  christos 				 * headers first.
   1292  1.1  christos 				 */
   1293  1.1  christos 				clean_stale_headers(header);
   1294  1.1  christos 				if (*header_prev != NULL) {
   1295  1.1  christos 					(*header_prev)->next = header->next;
   1296  1.1  christos 				} else {
   1297  1.1  christos 					node->data = header->next;
   1298  1.1  christos 				}
   1299  1.1  christos 				dns_slabheader_destroy(&header);
   1300  1.1  christos 			} else {
   1301  1.3  christos 				mark_ancient(header);
   1302  1.1  christos 				*header_prev = header;
   1303  1.1  christos 			}
   1304  1.1  christos 		} else {
   1305  1.1  christos 			*header_prev = header;
   1306  1.1  christos 		}
   1307  1.1  christos 		return true;
   1308  1.1  christos 	}
   1309  1.1  christos 	return false;
   1310  1.1  christos }
   1311  1.1  christos 
   1312  1.1  christos static isc_result_t
   1313  1.1  christos check_zonecut(qpcnode_t *node, void *arg DNS__DB_FLARG) {
   1314  1.1  christos 	qpc_search_t *search = arg;
   1315  1.1  christos 	dns_slabheader_t *header = NULL;
   1316  1.1  christos 	dns_slabheader_t *header_prev = NULL, *header_next = NULL;
   1317  1.1  christos 	dns_slabheader_t *dname_header = NULL, *sigdname_header = NULL;
   1318  1.1  christos 	isc_result_t result;
   1319  1.3  christos 	isc_rwlock_t *nlock = NULL;
   1320  1.1  christos 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   1321  1.1  christos 
   1322  1.1  christos 	REQUIRE(search->zonecut == NULL);
   1323  1.1  christos 
   1324  1.3  christos 	nlock = &search->qpdb->buckets[node->locknum].lock;
   1325  1.3  christos 	NODE_RDLOCK(nlock, &nlocktype);
   1326  1.1  christos 
   1327  1.1  christos 	/*
   1328  1.1  christos 	 * Look for a DNAME or RRSIG DNAME rdataset.
   1329  1.1  christos 	 */
   1330  1.1  christos 	for (header = node->data; header != NULL; header = header_next) {
   1331  1.1  christos 		header_next = header->next;
   1332  1.3  christos 		if (check_stale_header(node, header, &nlocktype, nlock, search,
   1333  1.1  christos 				       &header_prev))
   1334  1.1  christos 		{
   1335  1.1  christos 			/* Do nothing. */
   1336  1.1  christos 		} else if (header->type == dns_rdatatype_dname &&
   1337  1.1  christos 			   EXISTS(header) && !ANCIENT(header))
   1338  1.1  christos 		{
   1339  1.1  christos 			dname_header = header;
   1340  1.1  christos 			header_prev = header;
   1341  1.1  christos 		} else if (header->type == DNS_SIGTYPE(dns_rdatatype_dname) &&
   1342  1.1  christos 			   EXISTS(header) && !ANCIENT(header))
   1343  1.1  christos 		{
   1344  1.1  christos 			sigdname_header = header;
   1345  1.1  christos 			header_prev = header;
   1346  1.1  christos 		} else {
   1347  1.1  christos 			header_prev = header;
   1348  1.1  christos 		}
   1349  1.1  christos 	}
   1350  1.1  christos 
   1351  1.1  christos 	if (dname_header != NULL &&
   1352  1.1  christos 	    (!DNS_TRUST_PENDING(dname_header->trust) ||
   1353  1.1  christos 	     (search->options & DNS_DBFIND_PENDINGOK) != 0))
   1354  1.1  christos 	{
   1355  1.1  christos 		/*
   1356  1.1  christos 		 * We increment the reference count on node to ensure that
   1357  1.1  christos 		 * search->zonecut_header will still be valid later.
   1358  1.1  christos 		 */
   1359  1.3  christos 		qpcnode_acquire(search->qpdb, node, nlocktype,
   1360  1.3  christos 				isc_rwlocktype_none DNS__DB_FLARG_PASS);
   1361  1.1  christos 		search->zonecut = node;
   1362  1.1  christos 		search->zonecut_header = dname_header;
   1363  1.1  christos 		search->zonecut_sigheader = sigdname_header;
   1364  1.1  christos 		search->need_cleanup = true;
   1365  1.1  christos 		result = DNS_R_PARTIALMATCH;
   1366  1.1  christos 	} else {
   1367  1.1  christos 		result = DNS_R_CONTINUE;
   1368  1.1  christos 	}
   1369  1.1  christos 
   1370  1.3  christos 	NODE_UNLOCK(nlock, &nlocktype);
   1371  1.1  christos 
   1372  1.1  christos 	return result;
   1373  1.1  christos }
   1374  1.1  christos 
   1375  1.1  christos static isc_result_t
   1376  1.1  christos find_deepest_zonecut(qpc_search_t *search, qpcnode_t *node,
   1377  1.1  christos 		     dns_dbnode_t **nodep, dns_name_t *foundname,
   1378  1.1  christos 		     dns_rdataset_t *rdataset,
   1379  1.1  christos 		     dns_rdataset_t *sigrdataset DNS__DB_FLARG) {
   1380  1.1  christos 	isc_result_t result = ISC_R_NOTFOUND;
   1381  1.1  christos 	qpcache_t *qpdb = NULL;
   1382  1.1  christos 
   1383  1.1  christos 	/*
   1384  1.1  christos 	 * Caller must be holding the tree lock.
   1385  1.1  christos 	 */
   1386  1.1  christos 
   1387  1.1  christos 	qpdb = search->qpdb;
   1388  1.1  christos 
   1389  1.1  christos 	for (int i = dns_qpchain_length(&search->chain) - 1; i >= 0; i--) {
   1390  1.1  christos 		dns_slabheader_t *header = NULL;
   1391  1.1  christos 		dns_slabheader_t *header_prev = NULL, *header_next = NULL;
   1392  1.1  christos 		dns_slabheader_t *found = NULL, *foundsig = NULL;
   1393  1.3  christos 		isc_rwlock_t *nlock = NULL;
   1394  1.1  christos 		isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   1395  1.1  christos 
   1396  1.1  christos 		dns_qpchain_node(&search->chain, i, NULL, (void **)&node, NULL);
   1397  1.3  christos 		nlock = &qpdb->buckets[node->locknum].lock;
   1398  1.1  christos 
   1399  1.3  christos 		NODE_RDLOCK(nlock, &nlocktype);
   1400  1.1  christos 
   1401  1.1  christos 		/*
   1402  1.1  christos 		 * Look for NS and RRSIG NS rdatasets.
   1403  1.1  christos 		 */
   1404  1.1  christos 		for (header = node->data; header != NULL; header = header_next)
   1405  1.1  christos 		{
   1406  1.1  christos 			header_next = header->next;
   1407  1.3  christos 			if (check_stale_header(node, header, &nlocktype, nlock,
   1408  1.1  christos 					       search, &header_prev))
   1409  1.1  christos 			{
   1410  1.1  christos 				/* Do nothing. */
   1411  1.1  christos 			} else if (EXISTS(header) && !ANCIENT(header)) {
   1412  1.1  christos 				/*
   1413  1.1  christos 				 * We've found an extant rdataset.  See if
   1414  1.1  christos 				 * we're interested in it.
   1415  1.1  christos 				 */
   1416  1.1  christos 				if (header->type == dns_rdatatype_ns) {
   1417  1.1  christos 					found = header;
   1418  1.1  christos 					if (foundsig != NULL) {
   1419  1.1  christos 						break;
   1420  1.1  christos 					}
   1421  1.1  christos 				} else if (header->type ==
   1422  1.1  christos 					   DNS_SIGTYPE(dns_rdatatype_ns))
   1423  1.1  christos 				{
   1424  1.1  christos 					foundsig = header;
   1425  1.1  christos 					if (found != NULL) {
   1426  1.1  christos 						break;
   1427  1.1  christos 					}
   1428  1.1  christos 				}
   1429  1.1  christos 				header_prev = header;
   1430  1.1  christos 			} else {
   1431  1.1  christos 				header_prev = header;
   1432  1.1  christos 			}
   1433  1.1  christos 		}
   1434  1.1  christos 
   1435  1.1  christos 		if (found != NULL) {
   1436  1.1  christos 			/*
   1437  1.1  christos 			 * If we have to set foundname, we do it before
   1438  1.1  christos 			 * anything else.
   1439  1.1  christos 			 */
   1440  1.1  christos 			if (foundname != NULL) {
   1441  1.1  christos 				dns_name_copy(&node->name, foundname);
   1442  1.1  christos 			}
   1443  1.1  christos 			result = DNS_R_DELEGATION;
   1444  1.1  christos 			if (nodep != NULL) {
   1445  1.3  christos 				qpcnode_acquire(
   1446  1.3  christos 					search->qpdb, node, nlocktype,
   1447  1.3  christos 					isc_rwlocktype_none DNS__DB_FLARG_PASS);
   1448  1.1  christos 				*nodep = node;
   1449  1.1  christos 			}
   1450  1.1  christos 			bindrdataset(search->qpdb, node, found, search->now,
   1451  1.1  christos 				     nlocktype, isc_rwlocktype_none,
   1452  1.1  christos 				     rdataset DNS__DB_FLARG_PASS);
   1453  1.1  christos 			if (foundsig != NULL) {
   1454  1.1  christos 				bindrdataset(search->qpdb, node, foundsig,
   1455  1.1  christos 					     search->now, nlocktype,
   1456  1.1  christos 					     isc_rwlocktype_none,
   1457  1.1  christos 					     sigrdataset DNS__DB_FLARG_PASS);
   1458  1.1  christos 			}
   1459  1.1  christos 			if (need_headerupdate(found, search->now) ||
   1460  1.1  christos 			    (foundsig != NULL &&
   1461  1.1  christos 			     need_headerupdate(foundsig, search->now)))
   1462  1.1  christos 			{
   1463  1.1  christos 				if (nlocktype != isc_rwlocktype_write) {
   1464  1.3  christos 					NODE_FORCEUPGRADE(nlock, &nlocktype);
   1465  1.1  christos 					POST(nlocktype);
   1466  1.1  christos 				}
   1467  1.1  christos 				if (need_headerupdate(found, search->now)) {
   1468  1.1  christos 					update_header(search->qpdb, found,
   1469  1.1  christos 						      search->now);
   1470  1.1  christos 				}
   1471  1.1  christos 				if (foundsig != NULL &&
   1472  1.1  christos 				    need_headerupdate(foundsig, search->now))
   1473  1.1  christos 				{
   1474  1.1  christos 					update_header(search->qpdb, foundsig,
   1475  1.1  christos 						      search->now);
   1476  1.1  christos 				}
   1477  1.1  christos 			}
   1478  1.1  christos 		}
   1479  1.1  christos 
   1480  1.3  christos 		NODE_UNLOCK(nlock, &nlocktype);
   1481  1.1  christos 
   1482  1.1  christos 		if (found != NULL) {
   1483  1.1  christos 			break;
   1484  1.1  christos 		}
   1485  1.1  christos 	}
   1486  1.1  christos 
   1487  1.1  christos 	return result;
   1488  1.1  christos }
   1489  1.1  christos 
   1490  1.1  christos /*
   1491  1.1  christos  * Look for a potentially covering NSEC in the cache where `name`
   1492  1.1  christos  * is known not to exist.  This uses the auxiliary NSEC tree to find
   1493  1.1  christos  * the potential NSEC owner. If found, we update 'foundname', 'nodep',
   1494  1.1  christos  * 'rdataset' and 'sigrdataset', and return DNS_R_COVERINGNSEC.
   1495  1.1  christos  * Otherwise, return ISC_R_NOTFOUND.
   1496  1.1  christos  */
   1497  1.1  christos static isc_result_t
   1498  1.1  christos find_coveringnsec(qpc_search_t *search, const dns_name_t *name,
   1499  1.1  christos 		  dns_dbnode_t **nodep, isc_stdtime_t now,
   1500  1.1  christos 		  dns_name_t *foundname, dns_rdataset_t *rdataset,
   1501  1.1  christos 		  dns_rdataset_t *sigrdataset DNS__DB_FLARG) {
   1502  1.1  christos 	dns_fixedname_t fpredecessor, fixed;
   1503  1.1  christos 	dns_name_t *predecessor = NULL, *fname = NULL;
   1504  1.1  christos 	qpcnode_t *node = NULL;
   1505  1.1  christos 	dns_qpiter_t iter;
   1506  1.1  christos 	isc_result_t result;
   1507  1.1  christos 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   1508  1.3  christos 	isc_rwlock_t *nlock = NULL;
   1509  1.1  christos 	dns_typepair_t matchtype, sigmatchtype;
   1510  1.1  christos 	dns_slabheader_t *found = NULL, *foundsig = NULL;
   1511  1.1  christos 	dns_slabheader_t *header = NULL;
   1512  1.1  christos 	dns_slabheader_t *header_next = NULL, *header_prev = NULL;
   1513  1.1  christos 
   1514  1.1  christos 	/*
   1515  1.1  christos 	 * Look for the node in the auxilary tree.
   1516  1.1  christos 	 */
   1517  1.1  christos 	result = dns_qp_lookup(search->qpdb->nsec, name, NULL, &iter, NULL,
   1518  1.1  christos 			       (void **)&node, NULL);
   1519  1.5  christos 	/*
   1520  1.5  christos 	 * When DNS_R_PARTIALMATCH or ISC_R_NOTFOUND is returned from
   1521  1.5  christos 	 * dns_qp_lookup there is potentially a covering NSEC present
   1522  1.5  christos 	 * in the cache so we need to search for it.  Otherwise we are
   1523  1.5  christos 	 * done here.
   1524  1.5  christos 	 */
   1525  1.5  christos 	if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) {
   1526  1.1  christos 		return ISC_R_NOTFOUND;
   1527  1.1  christos 	}
   1528  1.1  christos 
   1529  1.1  christos 	fname = dns_fixedname_initname(&fixed);
   1530  1.1  christos 	predecessor = dns_fixedname_initname(&fpredecessor);
   1531  1.1  christos 	matchtype = DNS_TYPEPAIR_VALUE(dns_rdatatype_nsec, 0);
   1532  1.1  christos 	sigmatchtype = DNS_SIGTYPE(dns_rdatatype_nsec);
   1533  1.1  christos 
   1534  1.1  christos 	/*
   1535  1.1  christos 	 * Extract predecessor from iterator.
   1536  1.1  christos 	 */
   1537  1.1  christos 	result = dns_qpiter_current(&iter, predecessor, NULL, NULL);
   1538  1.1  christos 	if (result != ISC_R_SUCCESS) {
   1539  1.1  christos 		return ISC_R_NOTFOUND;
   1540  1.1  christos 	}
   1541  1.1  christos 
   1542  1.1  christos 	/*
   1543  1.1  christos 	 * Lookup the predecessor in the main tree.
   1544  1.1  christos 	 */
   1545  1.1  christos 	node = NULL;
   1546  1.1  christos 	result = dns_qp_getname(search->qpdb->tree, predecessor, (void **)&node,
   1547  1.1  christos 				NULL);
   1548  1.1  christos 	if (result != ISC_R_SUCCESS) {
   1549  1.1  christos 		return result;
   1550  1.1  christos 	}
   1551  1.1  christos 	dns_name_copy(&node->name, fname);
   1552  1.1  christos 
   1553  1.3  christos 	nlock = &search->qpdb->buckets[node->locknum].lock;
   1554  1.3  christos 	NODE_RDLOCK(nlock, &nlocktype);
   1555  1.1  christos 	for (header = node->data; header != NULL; header = header_next) {
   1556  1.1  christos 		header_next = header->next;
   1557  1.3  christos 		if (check_stale_header(node, header, &nlocktype, nlock, search,
   1558  1.1  christos 				       &header_prev))
   1559  1.1  christos 		{
   1560  1.1  christos 			continue;
   1561  1.1  christos 		}
   1562  1.1  christos 		if (NONEXISTENT(header) || DNS_TYPEPAIR_TYPE(header->type) == 0)
   1563  1.1  christos 		{
   1564  1.1  christos 			header_prev = header;
   1565  1.1  christos 			continue;
   1566  1.1  christos 		}
   1567  1.1  christos 		if (header->type == matchtype) {
   1568  1.1  christos 			found = header;
   1569  1.1  christos 			if (foundsig != NULL) {
   1570  1.1  christos 				break;
   1571  1.1  christos 			}
   1572  1.1  christos 		} else if (header->type == sigmatchtype) {
   1573  1.1  christos 			foundsig = header;
   1574  1.1  christos 			if (found != NULL) {
   1575  1.1  christos 				break;
   1576  1.1  christos 			}
   1577  1.1  christos 		}
   1578  1.1  christos 		header_prev = header;
   1579  1.1  christos 	}
   1580  1.1  christos 	if (found != NULL) {
   1581  1.1  christos 		bindrdataset(search->qpdb, node, found, now, nlocktype,
   1582  1.1  christos 			     isc_rwlocktype_none, rdataset DNS__DB_FLARG_PASS);
   1583  1.1  christos 		if (foundsig != NULL) {
   1584  1.1  christos 			bindrdataset(search->qpdb, node, foundsig, now,
   1585  1.1  christos 				     nlocktype, isc_rwlocktype_none,
   1586  1.1  christos 				     sigrdataset DNS__DB_FLARG_PASS);
   1587  1.1  christos 		}
   1588  1.3  christos 		qpcnode_acquire(search->qpdb, node, nlocktype,
   1589  1.3  christos 				isc_rwlocktype_none DNS__DB_FLARG_PASS);
   1590  1.1  christos 
   1591  1.1  christos 		dns_name_copy(fname, foundname);
   1592  1.1  christos 
   1593  1.1  christos 		*nodep = node;
   1594  1.1  christos 		result = DNS_R_COVERINGNSEC;
   1595  1.1  christos 	} else {
   1596  1.1  christos 		result = ISC_R_NOTFOUND;
   1597  1.1  christos 	}
   1598  1.3  christos 	NODE_UNLOCK(nlock, &nlocktype);
   1599  1.1  christos 	return result;
   1600  1.1  christos }
   1601  1.1  christos 
   1602  1.1  christos static isc_result_t
   1603  1.1  christos find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
   1604  1.1  christos      dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
   1605  1.1  christos      dns_dbnode_t **nodep, dns_name_t *foundname, dns_rdataset_t *rdataset,
   1606  1.1  christos      dns_rdataset_t *sigrdataset DNS__DB_FLARG) {
   1607  1.1  christos 	qpcnode_t *node = NULL;
   1608  1.1  christos 	isc_result_t result;
   1609  1.1  christos 	qpc_search_t search;
   1610  1.1  christos 	bool cname_ok = true;
   1611  1.1  christos 	bool found_noqname = false;
   1612  1.1  christos 	bool all_negative = true;
   1613  1.1  christos 	bool empty_node;
   1614  1.3  christos 	isc_rwlock_t *nlock = NULL;
   1615  1.1  christos 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
   1616  1.1  christos 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   1617  1.1  christos 	dns_slabheader_t *header = NULL;
   1618  1.1  christos 	dns_slabheader_t *header_prev = NULL, *header_next = NULL;
   1619  1.1  christos 	dns_slabheader_t *found = NULL, *nsheader = NULL;
   1620  1.1  christos 	dns_slabheader_t *foundsig = NULL, *nssig = NULL, *cnamesig = NULL;
   1621  1.1  christos 	dns_slabheader_t *update = NULL, *updatesig = NULL;
   1622  1.1  christos 	dns_slabheader_t *nsecheader = NULL, *nsecsig = NULL;
   1623  1.1  christos 	dns_typepair_t sigtype, negtype;
   1624  1.1  christos 
   1625  1.1  christos 	UNUSED(version);
   1626  1.1  christos 
   1627  1.1  christos 	REQUIRE(VALID_QPDB((qpcache_t *)db));
   1628  1.1  christos 	REQUIRE(version == NULL);
   1629  1.1  christos 
   1630  1.1  christos 	if (now == 0) {
   1631  1.1  christos 		now = isc_stdtime_now();
   1632  1.1  christos 	}
   1633  1.1  christos 
   1634  1.1  christos 	search = (qpc_search_t){
   1635  1.1  christos 		.qpdb = (qpcache_t *)db,
   1636  1.1  christos 		.options = options,
   1637  1.1  christos 		.now = now,
   1638  1.1  christos 	};
   1639  1.1  christos 
   1640  1.1  christos 	TREE_RDLOCK(&search.qpdb->tree_lock, &tlocktype);
   1641  1.1  christos 
   1642  1.1  christos 	/*
   1643  1.1  christos 	 * Search down from the root of the tree.
   1644  1.1  christos 	 */
   1645  1.1  christos 	result = dns_qp_lookup(search.qpdb->tree, name, NULL, NULL,
   1646  1.1  christos 			       &search.chain, (void **)&node, NULL);
   1647  1.1  christos 	if (result != ISC_R_NOTFOUND && foundname != NULL) {
   1648  1.1  christos 		dns_name_copy(&node->name, foundname);
   1649  1.1  christos 	}
   1650  1.1  christos 
   1651  1.1  christos 	/*
   1652  1.1  christos 	 * Check the QP chain to see if there's a node above us with a
   1653  1.1  christos 	 * active DNAME or NS rdatasets.
   1654  1.1  christos 	 *
   1655  1.1  christos 	 * We're only interested in nodes above QNAME, so if the result
   1656  1.1  christos 	 * was success, then we skip the last item in the chain.
   1657  1.1  christos 	 */
   1658  1.1  christos 	unsigned int len = dns_qpchain_length(&search.chain);
   1659  1.1  christos 	if (result == ISC_R_SUCCESS) {
   1660  1.1  christos 		len--;
   1661  1.1  christos 	}
   1662  1.1  christos 
   1663  1.1  christos 	for (unsigned int i = 0; i < len; i++) {
   1664  1.1  christos 		isc_result_t zcresult;
   1665  1.1  christos 		qpcnode_t *encloser = NULL;
   1666  1.1  christos 
   1667  1.1  christos 		dns_qpchain_node(&search.chain, i, NULL, (void **)&encloser,
   1668  1.1  christos 				 NULL);
   1669  1.1  christos 
   1670  1.1  christos 		zcresult = check_zonecut(encloser,
   1671  1.1  christos 					 (void *)&search DNS__DB_FLARG_PASS);
   1672  1.1  christos 		if (zcresult != DNS_R_CONTINUE) {
   1673  1.1  christos 			result = DNS_R_PARTIALMATCH;
   1674  1.1  christos 			search.chain.len = i - 1;
   1675  1.1  christos 			node = encloser;
   1676  1.1  christos 			if (foundname != NULL) {
   1677  1.1  christos 				dns_name_copy(&node->name, foundname);
   1678  1.1  christos 			}
   1679  1.1  christos 			break;
   1680  1.1  christos 		}
   1681  1.1  christos 	}
   1682  1.1  christos 
   1683  1.1  christos 	if (result == DNS_R_PARTIALMATCH) {
   1684  1.1  christos 		/*
   1685  1.1  christos 		 * If we discovered a covering DNAME skip looking for a covering
   1686  1.1  christos 		 * NSEC.
   1687  1.1  christos 		 */
   1688  1.1  christos 		if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 &&
   1689  1.1  christos 		    (search.zonecut_header == NULL ||
   1690  1.1  christos 		     search.zonecut_header->type != dns_rdatatype_dname))
   1691  1.1  christos 		{
   1692  1.1  christos 			result = find_coveringnsec(
   1693  1.1  christos 				&search, name, nodep, now, foundname, rdataset,
   1694  1.1  christos 				sigrdataset DNS__DB_FLARG_PASS);
   1695  1.1  christos 			if (result == DNS_R_COVERINGNSEC) {
   1696  1.1  christos 				goto tree_exit;
   1697  1.1  christos 			}
   1698  1.1  christos 		}
   1699  1.1  christos 		if (search.zonecut != NULL) {
   1700  1.1  christos 			result = setup_delegation(&search, nodep, rdataset,
   1701  1.1  christos 						  sigrdataset,
   1702  1.1  christos 						  tlocktype DNS__DB_FLARG_PASS);
   1703  1.1  christos 			goto tree_exit;
   1704  1.1  christos 		} else {
   1705  1.1  christos 		find_ns:
   1706  1.1  christos 			result = find_deepest_zonecut(
   1707  1.1  christos 				&search, node, nodep, foundname, rdataset,
   1708  1.1  christos 				sigrdataset DNS__DB_FLARG_PASS);
   1709  1.1  christos 			goto tree_exit;
   1710  1.1  christos 		}
   1711  1.1  christos 	} else if (result != ISC_R_SUCCESS) {
   1712  1.1  christos 		goto tree_exit;
   1713  1.1  christos 	}
   1714  1.1  christos 
   1715  1.1  christos 	/*
   1716  1.1  christos 	 * Certain DNSSEC types are not subject to CNAME matching
   1717  1.1  christos 	 * (RFC4035, section 2.5 and RFC3007).
   1718  1.1  christos 	 *
   1719  1.1  christos 	 * We don't check for RRSIG, because we don't store RRSIG records
   1720  1.1  christos 	 * directly.
   1721  1.1  christos 	 */
   1722  1.1  christos 	if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) {
   1723  1.1  christos 		cname_ok = false;
   1724  1.1  christos 	}
   1725  1.1  christos 
   1726  1.1  christos 	/*
   1727  1.1  christos 	 * We now go looking for rdata...
   1728  1.1  christos 	 */
   1729  1.1  christos 
   1730  1.3  christos 	nlock = &search.qpdb->buckets[node->locknum].lock;
   1731  1.3  christos 	NODE_RDLOCK(nlock, &nlocktype);
   1732  1.1  christos 
   1733  1.1  christos 	/*
   1734  1.1  christos 	 * These pointers need to be reset here in case we did
   1735  1.1  christos 	 * 'goto find_ns' from somewhere below.
   1736  1.1  christos 	 */
   1737  1.1  christos 	found = NULL;
   1738  1.1  christos 	foundsig = NULL;
   1739  1.1  christos 	sigtype = DNS_SIGTYPE(type);
   1740  1.1  christos 	negtype = DNS_TYPEPAIR_VALUE(0, type);
   1741  1.1  christos 	nsheader = NULL;
   1742  1.1  christos 	nsecheader = NULL;
   1743  1.1  christos 	nssig = NULL;
   1744  1.1  christos 	nsecsig = NULL;
   1745  1.1  christos 	cnamesig = NULL;
   1746  1.1  christos 	empty_node = true;
   1747  1.1  christos 	header_prev = NULL;
   1748  1.1  christos 	for (header = node->data; header != NULL; header = header_next) {
   1749  1.1  christos 		header_next = header->next;
   1750  1.3  christos 		if (check_stale_header(node, header, &nlocktype, nlock, &search,
   1751  1.1  christos 				       &header_prev))
   1752  1.1  christos 		{
   1753  1.1  christos 			/* Do nothing. */
   1754  1.1  christos 		} else if (EXISTS(header) && !ANCIENT(header)) {
   1755  1.1  christos 			/*
   1756  1.1  christos 			 * We now know that there is at least one active
   1757  1.1  christos 			 * non-stale rdataset at this node.
   1758  1.1  christos 			 */
   1759  1.1  christos 			empty_node = false;
   1760  1.1  christos 			if (header->noqname != NULL &&
   1761  1.1  christos 			    header->trust == dns_trust_secure)
   1762  1.1  christos 			{
   1763  1.1  christos 				found_noqname = true;
   1764  1.1  christos 			}
   1765  1.1  christos 			if (!NEGATIVE(header)) {
   1766  1.1  christos 				all_negative = false;
   1767  1.1  christos 			}
   1768  1.1  christos 
   1769  1.1  christos 			/*
   1770  1.1  christos 			 * If we found a type we were looking for, remember
   1771  1.1  christos 			 * it.
   1772  1.1  christos 			 */
   1773  1.1  christos 			if (header->type == type ||
   1774  1.1  christos 			    (type == dns_rdatatype_any &&
   1775  1.1  christos 			     DNS_TYPEPAIR_TYPE(header->type) != 0) ||
   1776  1.1  christos 			    (cname_ok && header->type == dns_rdatatype_cname))
   1777  1.1  christos 			{
   1778  1.1  christos 				/*
   1779  1.1  christos 				 * We've found the answer.
   1780  1.1  christos 				 */
   1781  1.1  christos 				found = header;
   1782  1.1  christos 				if (header->type == dns_rdatatype_cname &&
   1783  1.1  christos 				    cname_ok)
   1784  1.1  christos 				{
   1785  1.1  christos 					/*
   1786  1.1  christos 					 * If we've already got the
   1787  1.1  christos 					 * CNAME RRSIG, use it.
   1788  1.1  christos 					 */
   1789  1.1  christos 					if (cnamesig != NULL) {
   1790  1.1  christos 						foundsig = cnamesig;
   1791  1.1  christos 					} else {
   1792  1.1  christos 						sigtype = DNS_SIGTYPE(
   1793  1.1  christos 							dns_rdatatype_cname);
   1794  1.1  christos 					}
   1795  1.1  christos 				}
   1796  1.1  christos 			} else if (header->type == sigtype) {
   1797  1.1  christos 				/*
   1798  1.1  christos 				 * We've found the RRSIG rdataset for our
   1799  1.1  christos 				 * target type.  Remember it.
   1800  1.1  christos 				 */
   1801  1.1  christos 				foundsig = header;
   1802  1.1  christos 			} else if (header->type == RDATATYPE_NCACHEANY ||
   1803  1.1  christos 				   header->type == negtype)
   1804  1.1  christos 			{
   1805  1.1  christos 				/*
   1806  1.1  christos 				 * We've found a negative cache entry.
   1807  1.1  christos 				 */
   1808  1.1  christos 				found = header;
   1809  1.1  christos 			} else if (header->type == dns_rdatatype_ns) {
   1810  1.1  christos 				/*
   1811  1.1  christos 				 * Remember a NS rdataset even if we're
   1812  1.1  christos 				 * not specifically looking for it, because
   1813  1.1  christos 				 * we might need it later.
   1814  1.1  christos 				 */
   1815  1.1  christos 				nsheader = header;
   1816  1.1  christos 			} else if (header->type ==
   1817  1.1  christos 				   DNS_SIGTYPE(dns_rdatatype_ns))
   1818  1.1  christos 			{
   1819  1.1  christos 				/*
   1820  1.1  christos 				 * If we need the NS rdataset, we'll also
   1821  1.1  christos 				 * need its signature.
   1822  1.1  christos 				 */
   1823  1.1  christos 				nssig = header;
   1824  1.1  christos 			} else if (header->type == dns_rdatatype_nsec) {
   1825  1.1  christos 				nsecheader = header;
   1826  1.1  christos 			} else if (header->type ==
   1827  1.1  christos 				   DNS_SIGTYPE(dns_rdatatype_nsec))
   1828  1.1  christos 			{
   1829  1.1  christos 				nsecsig = header;
   1830  1.1  christos 			} else if (cname_ok &&
   1831  1.1  christos 				   header->type ==
   1832  1.1  christos 					   DNS_SIGTYPE(dns_rdatatype_cname))
   1833  1.1  christos 			{
   1834  1.1  christos 				/*
   1835  1.1  christos 				 * If we get a CNAME match, we'll also need
   1836  1.1  christos 				 * its signature.
   1837  1.1  christos 				 */
   1838  1.1  christos 				cnamesig = header;
   1839  1.1  christos 			}
   1840  1.1  christos 			header_prev = header;
   1841  1.1  christos 		} else {
   1842  1.1  christos 			header_prev = header;
   1843  1.1  christos 		}
   1844  1.1  christos 	}
   1845  1.1  christos 
   1846  1.1  christos 	if (empty_node) {
   1847  1.1  christos 		/*
   1848  1.1  christos 		 * We have an exact match for the name, but there are no
   1849  1.1  christos 		 * extant rdatasets.  That means that this node doesn't
   1850  1.1  christos 		 * meaningfully exist, and that we really have a partial match.
   1851  1.1  christos 		 */
   1852  1.3  christos 		NODE_UNLOCK(nlock, &nlocktype);
   1853  1.1  christos 		if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
   1854  1.1  christos 			result = find_coveringnsec(
   1855  1.1  christos 				&search, name, nodep, now, foundname, rdataset,
   1856  1.1  christos 				sigrdataset DNS__DB_FLARG_PASS);
   1857  1.1  christos 			if (result == DNS_R_COVERINGNSEC) {
   1858  1.1  christos 				goto tree_exit;
   1859  1.1  christos 			}
   1860  1.1  christos 		}
   1861  1.1  christos 		goto find_ns;
   1862  1.1  christos 	}
   1863  1.1  christos 
   1864  1.1  christos 	/*
   1865  1.1  christos 	 * If we didn't find what we were looking for...
   1866  1.1  christos 	 */
   1867  1.1  christos 	if (found == NULL ||
   1868  1.1  christos 	    (DNS_TRUST_ADDITIONAL(found->trust) &&
   1869  1.1  christos 	     ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
   1870  1.1  christos 	    (found->trust == dns_trust_glue &&
   1871  1.1  christos 	     ((options & DNS_DBFIND_GLUEOK) == 0)) ||
   1872  1.1  christos 	    (DNS_TRUST_PENDING(found->trust) &&
   1873  1.1  christos 	     ((options & DNS_DBFIND_PENDINGOK) == 0)))
   1874  1.1  christos 	{
   1875  1.1  christos 		/*
   1876  1.1  christos 		 * Return covering NODATA NSEC record.
   1877  1.1  christos 		 */
   1878  1.1  christos 		if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 &&
   1879  1.1  christos 		    nsecheader != NULL)
   1880  1.1  christos 		{
   1881  1.1  christos 			if (nodep != NULL) {
   1882  1.3  christos 				qpcnode_acquire(search.qpdb, node, nlocktype,
   1883  1.3  christos 						tlocktype DNS__DB_FLARG_PASS);
   1884  1.1  christos 				*nodep = node;
   1885  1.1  christos 			}
   1886  1.1  christos 			bindrdataset(search.qpdb, node, nsecheader, search.now,
   1887  1.1  christos 				     nlocktype, tlocktype,
   1888  1.1  christos 				     rdataset DNS__DB_FLARG_PASS);
   1889  1.1  christos 			if (need_headerupdate(nsecheader, search.now)) {
   1890  1.1  christos 				update = nsecheader;
   1891  1.1  christos 			}
   1892  1.1  christos 			if (nsecsig != NULL) {
   1893  1.1  christos 				bindrdataset(search.qpdb, node, nsecsig,
   1894  1.1  christos 					     search.now, nlocktype, tlocktype,
   1895  1.1  christos 					     sigrdataset DNS__DB_FLARG_PASS);
   1896  1.1  christos 				if (need_headerupdate(nsecsig, search.now)) {
   1897  1.1  christos 					updatesig = nsecsig;
   1898  1.1  christos 				}
   1899  1.1  christos 			}
   1900  1.1  christos 			result = DNS_R_COVERINGNSEC;
   1901  1.1  christos 			goto node_exit;
   1902  1.1  christos 		}
   1903  1.1  christos 
   1904  1.1  christos 		/*
   1905  1.1  christos 		 * This name was from a wild card.  Look for a covering NSEC.
   1906  1.1  christos 		 */
   1907  1.1  christos 		if (found == NULL && (found_noqname || all_negative) &&
   1908  1.1  christos 		    (search.options & DNS_DBFIND_COVERINGNSEC) != 0)
   1909  1.1  christos 		{
   1910  1.3  christos 			NODE_UNLOCK(nlock, &nlocktype);
   1911  1.1  christos 			result = find_coveringnsec(
   1912  1.1  christos 				&search, name, nodep, now, foundname, rdataset,
   1913  1.1  christos 				sigrdataset DNS__DB_FLARG_PASS);
   1914  1.1  christos 			if (result == DNS_R_COVERINGNSEC) {
   1915  1.1  christos 				goto tree_exit;
   1916  1.1  christos 			}
   1917  1.1  christos 			goto find_ns;
   1918  1.1  christos 		}
   1919  1.1  christos 
   1920  1.1  christos 		/*
   1921  1.1  christos 		 * If there is an NS rdataset at this node, then this is the
   1922  1.1  christos 		 * deepest zone cut.
   1923  1.1  christos 		 */
   1924  1.1  christos 		if (nsheader != NULL) {
   1925  1.1  christos 			if (nodep != NULL) {
   1926  1.3  christos 				qpcnode_acquire(search.qpdb, node, nlocktype,
   1927  1.3  christos 						tlocktype DNS__DB_FLARG_PASS);
   1928  1.1  christos 				*nodep = node;
   1929  1.1  christos 			}
   1930  1.1  christos 			bindrdataset(search.qpdb, node, nsheader, search.now,
   1931  1.1  christos 				     nlocktype, tlocktype,
   1932  1.1  christos 				     rdataset DNS__DB_FLARG_PASS);
   1933  1.1  christos 			if (need_headerupdate(nsheader, search.now)) {
   1934  1.1  christos 				update = nsheader;
   1935  1.1  christos 			}
   1936  1.1  christos 			if (nssig != NULL) {
   1937  1.1  christos 				bindrdataset(search.qpdb, node, nssig,
   1938  1.1  christos 					     search.now, nlocktype, tlocktype,
   1939  1.1  christos 					     sigrdataset DNS__DB_FLARG_PASS);
   1940  1.1  christos 				if (need_headerupdate(nssig, search.now)) {
   1941  1.1  christos 					updatesig = nssig;
   1942  1.1  christos 				}
   1943  1.1  christos 			}
   1944  1.1  christos 			result = DNS_R_DELEGATION;
   1945  1.1  christos 			goto node_exit;
   1946  1.1  christos 		}
   1947  1.1  christos 
   1948  1.1  christos 		/*
   1949  1.1  christos 		 * Go find the deepest zone cut.
   1950  1.1  christos 		 */
   1951  1.3  christos 		NODE_UNLOCK(nlock, &nlocktype);
   1952  1.1  christos 		goto find_ns;
   1953  1.1  christos 	}
   1954  1.1  christos 
   1955  1.1  christos 	/*
   1956  1.1  christos 	 * We found what we were looking for, or we found a CNAME.
   1957  1.1  christos 	 */
   1958  1.1  christos 
   1959  1.1  christos 	if (nodep != NULL) {
   1960  1.3  christos 		qpcnode_acquire(search.qpdb, node, nlocktype,
   1961  1.3  christos 				tlocktype DNS__DB_FLARG_PASS);
   1962  1.1  christos 		*nodep = node;
   1963  1.1  christos 	}
   1964  1.1  christos 
   1965  1.1  christos 	if (NEGATIVE(found)) {
   1966  1.1  christos 		/*
   1967  1.1  christos 		 * We found a negative cache entry.
   1968  1.1  christos 		 */
   1969  1.1  christos 		if (NXDOMAIN(found)) {
   1970  1.1  christos 			result = DNS_R_NCACHENXDOMAIN;
   1971  1.1  christos 		} else {
   1972  1.1  christos 			result = DNS_R_NCACHENXRRSET;
   1973  1.1  christos 		}
   1974  1.1  christos 	} else if (type != found->type && type != dns_rdatatype_any &&
   1975  1.1  christos 		   found->type == dns_rdatatype_cname)
   1976  1.1  christos 	{
   1977  1.1  christos 		/*
   1978  1.1  christos 		 * We weren't doing an ANY query and we found a CNAME instead
   1979  1.1  christos 		 * of the type we were looking for, so we need to indicate
   1980  1.1  christos 		 * that result to the caller.
   1981  1.1  christos 		 */
   1982  1.1  christos 		result = DNS_R_CNAME;
   1983  1.1  christos 	} else {
   1984  1.1  christos 		/*
   1985  1.1  christos 		 * An ordinary successful query!
   1986  1.1  christos 		 */
   1987  1.1  christos 		result = ISC_R_SUCCESS;
   1988  1.1  christos 	}
   1989  1.1  christos 
   1990  1.1  christos 	if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
   1991  1.1  christos 	    result == DNS_R_NCACHENXRRSET)
   1992  1.1  christos 	{
   1993  1.1  christos 		bindrdataset(search.qpdb, node, found, search.now, nlocktype,
   1994  1.1  christos 			     tlocktype, rdataset DNS__DB_FLARG_PASS);
   1995  1.1  christos 		if (need_headerupdate(found, search.now)) {
   1996  1.1  christos 			update = found;
   1997  1.1  christos 		}
   1998  1.1  christos 		if (!NEGATIVE(found) && foundsig != NULL) {
   1999  1.1  christos 			bindrdataset(search.qpdb, node, foundsig, search.now,
   2000  1.1  christos 				     nlocktype, tlocktype,
   2001  1.1  christos 				     sigrdataset DNS__DB_FLARG_PASS);
   2002  1.1  christos 			if (need_headerupdate(foundsig, search.now)) {
   2003  1.1  christos 				updatesig = foundsig;
   2004  1.1  christos 			}
   2005  1.1  christos 		}
   2006  1.1  christos 	}
   2007  1.1  christos 
   2008  1.1  christos node_exit:
   2009  1.1  christos 	if ((update != NULL || updatesig != NULL) &&
   2010  1.1  christos 	    nlocktype != isc_rwlocktype_write)
   2011  1.1  christos 	{
   2012  1.3  christos 		NODE_FORCEUPGRADE(nlock, &nlocktype);
   2013  1.1  christos 		POST(nlocktype);
   2014  1.1  christos 	}
   2015  1.1  christos 	if (update != NULL && need_headerupdate(update, search.now)) {
   2016  1.1  christos 		update_header(search.qpdb, update, search.now);
   2017  1.1  christos 	}
   2018  1.1  christos 	if (updatesig != NULL && need_headerupdate(updatesig, search.now)) {
   2019  1.1  christos 		update_header(search.qpdb, updatesig, search.now);
   2020  1.1  christos 	}
   2021  1.1  christos 
   2022  1.3  christos 	NODE_UNLOCK(nlock, &nlocktype);
   2023  1.1  christos 
   2024  1.1  christos tree_exit:
   2025  1.1  christos 	TREE_UNLOCK(&search.qpdb->tree_lock, &tlocktype);
   2026  1.1  christos 
   2027  1.1  christos 	/*
   2028  1.1  christos 	 * If we found a zonecut but aren't going to use it, we have to
   2029  1.1  christos 	 * let go of it.
   2030  1.1  christos 	 */
   2031  1.1  christos 	if (search.need_cleanup) {
   2032  1.1  christos 		node = search.zonecut;
   2033  1.1  christos 		INSIST(node != NULL);
   2034  1.3  christos 		nlock = &search.qpdb->buckets[node->locknum].lock;
   2035  1.1  christos 
   2036  1.3  christos 		NODE_RDLOCK(nlock, &nlocktype);
   2037  1.3  christos 		qpcnode_release(search.qpdb, node, &nlocktype, &tlocktype,
   2038  1.3  christos 				true DNS__DB_FLARG_PASS);
   2039  1.3  christos 		NODE_UNLOCK(nlock, &nlocktype);
   2040  1.1  christos 		INSIST(tlocktype == isc_rwlocktype_none);
   2041  1.1  christos 	}
   2042  1.1  christos 
   2043  1.1  christos 	update_cachestats(search.qpdb, result);
   2044  1.1  christos 	return result;
   2045  1.1  christos }
   2046  1.1  christos 
   2047  1.1  christos static isc_result_t
   2048  1.1  christos findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options,
   2049  1.1  christos 	    isc_stdtime_t now, dns_dbnode_t **nodep, dns_name_t *foundname,
   2050  1.1  christos 	    dns_name_t *dcname, dns_rdataset_t *rdataset,
   2051  1.1  christos 	    dns_rdataset_t *sigrdataset DNS__DB_FLARG) {
   2052  1.1  christos 	qpcnode_t *node = NULL;
   2053  1.3  christos 	isc_rwlock_t *nlock = NULL;
   2054  1.1  christos 	isc_result_t result;
   2055  1.1  christos 	qpc_search_t search;
   2056  1.1  christos 	dns_slabheader_t *header = NULL;
   2057  1.1  christos 	dns_slabheader_t *header_prev = NULL, *header_next = NULL;
   2058  1.1  christos 	dns_slabheader_t *found = NULL, *foundsig = NULL;
   2059  1.1  christos 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
   2060  1.1  christos 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   2061  1.1  christos 	bool dcnull = (dcname == NULL);
   2062  1.1  christos 
   2063  1.1  christos 	REQUIRE(VALID_QPDB((qpcache_t *)db));
   2064  1.1  christos 
   2065  1.1  christos 	if (now == 0) {
   2066  1.1  christos 		now = isc_stdtime_now();
   2067  1.1  christos 	}
   2068  1.1  christos 
   2069  1.1  christos 	search = (qpc_search_t){
   2070  1.1  christos 		.qpdb = (qpcache_t *)db,
   2071  1.1  christos 		.options = options,
   2072  1.1  christos 		.now = now,
   2073  1.1  christos 	};
   2074  1.1  christos 
   2075  1.1  christos 	if (dcnull) {
   2076  1.1  christos 		dcname = foundname;
   2077  1.1  christos 	}
   2078  1.1  christos 
   2079  1.1  christos 	TREE_RDLOCK(&search.qpdb->tree_lock, &tlocktype);
   2080  1.1  christos 
   2081  1.1  christos 	/*
   2082  1.1  christos 	 * Search down from the root of the tree.
   2083  1.1  christos 	 */
   2084  1.1  christos 	result = dns_qp_lookup(search.qpdb->tree, name, NULL, NULL,
   2085  1.1  christos 			       &search.chain, (void **)&node, NULL);
   2086  1.1  christos 	if (result != ISC_R_NOTFOUND) {
   2087  1.1  christos 		dns_name_copy(&node->name, dcname);
   2088  1.1  christos 	}
   2089  1.1  christos 	if ((options & DNS_DBFIND_NOEXACT) != 0 && result == ISC_R_SUCCESS) {
   2090  1.1  christos 		int len = dns_qpchain_length(&search.chain);
   2091  1.1  christos 		if (len >= 2) {
   2092  1.1  christos 			node = NULL;
   2093  1.1  christos 			dns_qpchain_node(&search.chain, len - 2, NULL,
   2094  1.1  christos 					 (void **)&node, NULL);
   2095  1.1  christos 			search.chain.len = len - 1;
   2096  1.1  christos 			result = DNS_R_PARTIALMATCH;
   2097  1.1  christos 		} else {
   2098  1.1  christos 			result = ISC_R_NOTFOUND;
   2099  1.1  christos 		}
   2100  1.1  christos 	}
   2101  1.1  christos 
   2102  1.1  christos 	if (result == DNS_R_PARTIALMATCH) {
   2103  1.1  christos 		result = find_deepest_zonecut(&search, node, nodep, foundname,
   2104  1.1  christos 					      rdataset,
   2105  1.1  christos 					      sigrdataset DNS__DB_FLARG_PASS);
   2106  1.1  christos 		goto tree_exit;
   2107  1.1  christos 	} else if (result != ISC_R_SUCCESS) {
   2108  1.1  christos 		goto tree_exit;
   2109  1.1  christos 	} else if (!dcnull) {
   2110  1.1  christos 		dns_name_copy(dcname, foundname);
   2111  1.1  christos 	}
   2112  1.1  christos 
   2113  1.1  christos 	/*
   2114  1.1  christos 	 * We now go looking for an NS rdataset at the node.
   2115  1.1  christos 	 */
   2116  1.1  christos 
   2117  1.3  christos 	nlock = &search.qpdb->buckets[node->locknum].lock;
   2118  1.3  christos 	NODE_RDLOCK(nlock, &nlocktype);
   2119  1.1  christos 
   2120  1.1  christos 	for (header = node->data; header != NULL; header = header_next) {
   2121  1.1  christos 		header_next = header->next;
   2122  1.3  christos 		bool ns = (header->type == dns_rdatatype_ns ||
   2123  1.3  christos 			   header->type == DNS_SIGTYPE(dns_rdatatype_ns));
   2124  1.3  christos 		if (check_stale_header(node, header, &nlocktype, nlock, &search,
   2125  1.1  christos 				       &header_prev))
   2126  1.1  christos 		{
   2127  1.3  christos 			if (ns) {
   2128  1.3  christos 				/*
   2129  1.3  christos 				 * We found a cached NS, but was either
   2130  1.3  christos 				 * ancient or it was stale and serve-stale
   2131  1.3  christos 				 * is disabled, so this node can't be used
   2132  1.3  christos 				 * as a zone cut we know about. Instead we
   2133  1.3  christos 				 * bail out and call find_deepest_zonecut()
   2134  1.3  christos 				 * below.
   2135  1.3  christos 				 */
   2136  1.3  christos 				break;
   2137  1.3  christos 			}
   2138  1.1  christos 		} else if (EXISTS(header) && !ANCIENT(header)) {
   2139  1.1  christos 			if (header->type == dns_rdatatype_ns) {
   2140  1.1  christos 				found = header;
   2141  1.3  christos 				if (foundsig != NULL) {
   2142  1.3  christos 					break;
   2143  1.3  christos 				}
   2144  1.1  christos 			} else if (header->type ==
   2145  1.1  christos 				   DNS_SIGTYPE(dns_rdatatype_ns))
   2146  1.1  christos 			{
   2147  1.1  christos 				foundsig = header;
   2148  1.3  christos 				if (found != NULL) {
   2149  1.3  christos 					break;
   2150  1.3  christos 				}
   2151  1.1  christos 			}
   2152  1.1  christos 			header_prev = header;
   2153  1.1  christos 		} else {
   2154  1.1  christos 			header_prev = header;
   2155  1.1  christos 		}
   2156  1.1  christos 	}
   2157  1.1  christos 
   2158  1.1  christos 	if (found == NULL) {
   2159  1.1  christos 		/*
   2160  1.3  christos 		 * No active NS records found. Call find_deepest_zonecut()
   2161  1.3  christos 		 * to look for them in nodes above this one.
   2162  1.1  christos 		 */
   2163  1.3  christos 		NODE_UNLOCK(nlock, &nlocktype);
   2164  1.1  christos 		result = find_deepest_zonecut(&search, node, nodep, foundname,
   2165  1.1  christos 					      rdataset,
   2166  1.1  christos 					      sigrdataset DNS__DB_FLARG_PASS);
   2167  1.3  christos 		dns_name_copy(foundname, dcname);
   2168  1.1  christos 		goto tree_exit;
   2169  1.1  christos 	}
   2170  1.1  christos 
   2171  1.1  christos 	if (nodep != NULL) {
   2172  1.3  christos 		qpcnode_acquire(search.qpdb, node, nlocktype,
   2173  1.3  christos 				tlocktype DNS__DB_FLARG_PASS);
   2174  1.1  christos 		*nodep = node;
   2175  1.1  christos 	}
   2176  1.1  christos 
   2177  1.1  christos 	bindrdataset(search.qpdb, node, found, search.now, nlocktype, tlocktype,
   2178  1.1  christos 		     rdataset DNS__DB_FLARG_PASS);
   2179  1.1  christos 	if (foundsig != NULL) {
   2180  1.1  christos 		bindrdataset(search.qpdb, node, foundsig, search.now, nlocktype,
   2181  1.1  christos 			     tlocktype, sigrdataset DNS__DB_FLARG_PASS);
   2182  1.1  christos 	}
   2183  1.1  christos 
   2184  1.1  christos 	if (need_headerupdate(found, search.now) ||
   2185  1.1  christos 	    (foundsig != NULL && need_headerupdate(foundsig, search.now)))
   2186  1.1  christos 	{
   2187  1.1  christos 		if (nlocktype != isc_rwlocktype_write) {
   2188  1.3  christos 			NODE_FORCEUPGRADE(nlock, &nlocktype);
   2189  1.1  christos 			POST(nlocktype);
   2190  1.1  christos 		}
   2191  1.1  christos 		if (need_headerupdate(found, search.now)) {
   2192  1.1  christos 			update_header(search.qpdb, found, search.now);
   2193  1.1  christos 		}
   2194  1.1  christos 		if (foundsig != NULL && need_headerupdate(foundsig, search.now))
   2195  1.1  christos 		{
   2196  1.1  christos 			update_header(search.qpdb, foundsig, search.now);
   2197  1.1  christos 		}
   2198  1.1  christos 	}
   2199  1.1  christos 
   2200  1.3  christos 	NODE_UNLOCK(nlock, &nlocktype);
   2201  1.1  christos 
   2202  1.1  christos tree_exit:
   2203  1.1  christos 	TREE_UNLOCK(&search.qpdb->tree_lock, &tlocktype);
   2204  1.1  christos 
   2205  1.1  christos 	INSIST(!search.need_cleanup);
   2206  1.1  christos 
   2207  1.1  christos 	if (result == DNS_R_DELEGATION) {
   2208  1.1  christos 		result = ISC_R_SUCCESS;
   2209  1.1  christos 	}
   2210  1.1  christos 
   2211  1.1  christos 	return result;
   2212  1.1  christos }
   2213  1.1  christos 
   2214  1.1  christos static isc_result_t
   2215  1.1  christos findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
   2216  1.1  christos 	     dns_rdatatype_t type, dns_rdatatype_t covers, isc_stdtime_t now,
   2217  1.1  christos 	     dns_rdataset_t *rdataset,
   2218  1.1  christos 	     dns_rdataset_t *sigrdataset DNS__DB_FLARG) {
   2219  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   2220  1.1  christos 	qpcnode_t *qpnode = (qpcnode_t *)node;
   2221  1.1  christos 	dns_slabheader_t *header = NULL, *header_next = NULL;
   2222  1.1  christos 	dns_slabheader_t *found = NULL, *foundsig = NULL;
   2223  1.1  christos 	dns_typepair_t matchtype, sigmatchtype, negtype;
   2224  1.1  christos 	isc_result_t result;
   2225  1.3  christos 	isc_rwlock_t *nlock = NULL;
   2226  1.1  christos 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   2227  1.1  christos 
   2228  1.1  christos 	REQUIRE(VALID_QPDB(qpdb));
   2229  1.1  christos 	REQUIRE(type != dns_rdatatype_any);
   2230  1.1  christos 
   2231  1.1  christos 	UNUSED(version);
   2232  1.1  christos 
   2233  1.1  christos 	result = ISC_R_SUCCESS;
   2234  1.1  christos 
   2235  1.1  christos 	if (now == 0) {
   2236  1.1  christos 		now = isc_stdtime_now();
   2237  1.1  christos 	}
   2238  1.1  christos 
   2239  1.3  christos 	nlock = &qpdb->buckets[qpnode->locknum].lock;
   2240  1.3  christos 	NODE_RDLOCK(nlock, &nlocktype);
   2241  1.1  christos 
   2242  1.1  christos 	matchtype = DNS_TYPEPAIR_VALUE(type, covers);
   2243  1.1  christos 	negtype = DNS_TYPEPAIR_VALUE(0, type);
   2244  1.1  christos 	if (covers == 0) {
   2245  1.1  christos 		sigmatchtype = DNS_SIGTYPE(type);
   2246  1.1  christos 	} else {
   2247  1.1  christos 		sigmatchtype = 0;
   2248  1.1  christos 	}
   2249  1.1  christos 
   2250  1.1  christos 	for (header = qpnode->data; header != NULL; header = header_next) {
   2251  1.1  christos 		header_next = header->next;
   2252  1.1  christos 		if (!ACTIVE(header, now)) {
   2253  1.1  christos 			if ((header->ttl + STALE_TTL(header, qpdb) <
   2254  1.1  christos 			     now - QPDB_VIRTUAL) &&
   2255  1.1  christos 			    (nlocktype == isc_rwlocktype_write ||
   2256  1.3  christos 			     NODE_TRYUPGRADE(nlock, &nlocktype) ==
   2257  1.1  christos 				     ISC_R_SUCCESS))
   2258  1.1  christos 			{
   2259  1.1  christos 				/*
   2260  1.1  christos 				 * We update the node's status only when we
   2261  1.1  christos 				 * can get write access.
   2262  1.1  christos 				 *
   2263  1.1  christos 				 * We don't check if refcurrent(qpnode) == 0
   2264  1.1  christos 				 * and try to free like we do in find(),
   2265  1.1  christos 				 * because refcurrent(qpnode) must be
   2266  1.1  christos 				 * non-zero.  This is so because 'node' is an
   2267  1.1  christos 				 * argument to the function.
   2268  1.1  christos 				 */
   2269  1.3  christos 				mark_ancient(header);
   2270  1.1  christos 			}
   2271  1.1  christos 		} else if (EXISTS(header) && !ANCIENT(header)) {
   2272  1.1  christos 			if (header->type == matchtype) {
   2273  1.1  christos 				found = header;
   2274  1.1  christos 			} else if (header->type == RDATATYPE_NCACHEANY ||
   2275  1.1  christos 				   header->type == negtype)
   2276  1.1  christos 			{
   2277  1.1  christos 				found = header;
   2278  1.1  christos 			} else if (header->type == sigmatchtype) {
   2279  1.1  christos 				foundsig = header;
   2280  1.1  christos 			}
   2281  1.1  christos 		}
   2282  1.1  christos 	}
   2283  1.1  christos 	if (found != NULL) {
   2284  1.1  christos 		bindrdataset(qpdb, qpnode, found, now, nlocktype,
   2285  1.1  christos 			     isc_rwlocktype_none, rdataset DNS__DB_FLARG_PASS);
   2286  1.1  christos 		if (!NEGATIVE(found) && foundsig != NULL) {
   2287  1.1  christos 			bindrdataset(qpdb, qpnode, foundsig, now, nlocktype,
   2288  1.1  christos 				     isc_rwlocktype_none,
   2289  1.1  christos 				     sigrdataset DNS__DB_FLARG_PASS);
   2290  1.1  christos 		}
   2291  1.1  christos 	}
   2292  1.1  christos 
   2293  1.3  christos 	NODE_UNLOCK(nlock, &nlocktype);
   2294  1.1  christos 
   2295  1.1  christos 	if (found == NULL) {
   2296  1.1  christos 		return ISC_R_NOTFOUND;
   2297  1.1  christos 	}
   2298  1.1  christos 
   2299  1.1  christos 	if (NEGATIVE(found)) {
   2300  1.1  christos 		/*
   2301  1.1  christos 		 * We found a negative cache entry.
   2302  1.1  christos 		 */
   2303  1.1  christos 		if (NXDOMAIN(found)) {
   2304  1.1  christos 			result = DNS_R_NCACHENXDOMAIN;
   2305  1.1  christos 		} else {
   2306  1.1  christos 			result = DNS_R_NCACHENXRRSET;
   2307  1.1  christos 		}
   2308  1.1  christos 	}
   2309  1.1  christos 
   2310  1.1  christos 	update_cachestats(qpdb, result);
   2311  1.1  christos 
   2312  1.1  christos 	return result;
   2313  1.1  christos }
   2314  1.1  christos 
   2315  1.1  christos static isc_result_t
   2316  1.1  christos setcachestats(dns_db_t *db, isc_stats_t *stats) {
   2317  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   2318  1.1  christos 
   2319  1.1  christos 	REQUIRE(VALID_QPDB(qpdb));
   2320  1.1  christos 	REQUIRE(stats != NULL);
   2321  1.1  christos 
   2322  1.1  christos 	isc_stats_attach(stats, &qpdb->cachestats);
   2323  1.1  christos 	return ISC_R_SUCCESS;
   2324  1.1  christos }
   2325  1.1  christos 
   2326  1.1  christos static dns_stats_t *
   2327  1.1  christos getrrsetstats(dns_db_t *db) {
   2328  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   2329  1.1  christos 
   2330  1.1  christos 	REQUIRE(VALID_QPDB(qpdb));
   2331  1.1  christos 
   2332  1.1  christos 	return qpdb->rrsetstats;
   2333  1.1  christos }
   2334  1.1  christos 
   2335  1.1  christos static isc_result_t
   2336  1.1  christos setservestalettl(dns_db_t *db, dns_ttl_t ttl) {
   2337  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   2338  1.1  christos 
   2339  1.1  christos 	REQUIRE(VALID_QPDB(qpdb));
   2340  1.1  christos 
   2341  1.1  christos 	/* currently no bounds checking.  0 means disable. */
   2342  1.1  christos 	qpdb->common.serve_stale_ttl = ttl;
   2343  1.1  christos 	return ISC_R_SUCCESS;
   2344  1.1  christos }
   2345  1.1  christos 
   2346  1.1  christos static isc_result_t
   2347  1.1  christos getservestalettl(dns_db_t *db, dns_ttl_t *ttl) {
   2348  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   2349  1.1  christos 
   2350  1.1  christos 	REQUIRE(VALID_QPDB(qpdb));
   2351  1.1  christos 
   2352  1.1  christos 	*ttl = qpdb->common.serve_stale_ttl;
   2353  1.1  christos 	return ISC_R_SUCCESS;
   2354  1.1  christos }
   2355  1.1  christos 
   2356  1.1  christos static isc_result_t
   2357  1.1  christos setservestalerefresh(dns_db_t *db, uint32_t interval) {
   2358  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   2359  1.1  christos 
   2360  1.1  christos 	REQUIRE(VALID_QPDB(qpdb));
   2361  1.1  christos 
   2362  1.1  christos 	/* currently no bounds checking.  0 means disable. */
   2363  1.1  christos 	qpdb->serve_stale_refresh = interval;
   2364  1.1  christos 	return ISC_R_SUCCESS;
   2365  1.1  christos }
   2366  1.1  christos 
   2367  1.1  christos static isc_result_t
   2368  1.1  christos getservestalerefresh(dns_db_t *db, uint32_t *interval) {
   2369  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   2370  1.1  christos 
   2371  1.1  christos 	REQUIRE(VALID_QPDB(qpdb));
   2372  1.1  christos 
   2373  1.1  christos 	*interval = qpdb->serve_stale_refresh;
   2374  1.1  christos 	return ISC_R_SUCCESS;
   2375  1.1  christos }
   2376  1.1  christos 
   2377  1.1  christos static void
   2378  1.1  christos expiredata(dns_db_t *db, dns_dbnode_t *node, void *data) {
   2379  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   2380  1.1  christos 	qpcnode_t *qpnode = (qpcnode_t *)node;
   2381  1.1  christos 	dns_slabheader_t *header = data;
   2382  1.1  christos 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   2383  1.1  christos 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
   2384  1.3  christos 	isc_rwlock_t *nlock = &qpdb->buckets[qpnode->locknum].lock;
   2385  1.1  christos 
   2386  1.3  christos 	NODE_WRLOCK(nlock, &nlocktype);
   2387  1.1  christos 	expireheader(header, &nlocktype, &tlocktype,
   2388  1.1  christos 		     dns_expire_flush DNS__DB_FILELINE);
   2389  1.3  christos 	NODE_UNLOCK(nlock, &nlocktype);
   2390  1.1  christos 	INSIST(tlocktype == isc_rwlocktype_none);
   2391  1.1  christos }
   2392  1.1  christos 
   2393  1.1  christos static size_t
   2394  1.1  christos rdataset_size(dns_slabheader_t *header) {
   2395  1.1  christos 	if (!NONEXISTENT(header)) {
   2396  1.1  christos 		return dns_rdataslab_size((unsigned char *)header,
   2397  1.1  christos 					  sizeof(*header));
   2398  1.1  christos 	}
   2399  1.1  christos 
   2400  1.1  christos 	return sizeof(*header);
   2401  1.1  christos }
   2402  1.1  christos 
   2403  1.1  christos static size_t
   2404  1.1  christos expire_lru_headers(qpcache_t *qpdb, unsigned int locknum,
   2405  1.1  christos 		   isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep,
   2406  1.1  christos 		   size_t purgesize DNS__DB_FLARG) {
   2407  1.1  christos 	dns_slabheader_t *header = NULL;
   2408  1.1  christos 	size_t purged = 0;
   2409  1.1  christos 
   2410  1.3  christos 	for (header = ISC_LIST_TAIL(qpdb->buckets[locknum].lru);
   2411  1.1  christos 	     header != NULL && header->last_used <= qpdb->last_used &&
   2412  1.1  christos 	     purged <= purgesize;
   2413  1.3  christos 	     header = ISC_LIST_TAIL(qpdb->buckets[locknum].lru))
   2414  1.1  christos 	{
   2415  1.1  christos 		size_t header_size = rdataset_size(header);
   2416  1.1  christos 
   2417  1.1  christos 		/*
   2418  1.1  christos 		 * Unlink the entry at this point to avoid checking it
   2419  1.1  christos 		 * again even if it's currently used someone else and
   2420  1.1  christos 		 * cannot be purged at this moment.  This entry won't be
   2421  1.1  christos 		 * referenced any more (so unlinking is safe) since the
   2422  1.1  christos 		 * TTL will be reset to 0.
   2423  1.1  christos 		 */
   2424  1.3  christos 		ISC_LIST_UNLINK(qpdb->buckets[locknum].lru, header, link);
   2425  1.1  christos 		expireheader(header, nlocktypep, tlocktypep,
   2426  1.1  christos 			     dns_expire_lru DNS__DB_FLARG_PASS);
   2427  1.1  christos 		purged += header_size;
   2428  1.1  christos 	}
   2429  1.1  christos 
   2430  1.1  christos 	return purged;
   2431  1.1  christos }
   2432  1.1  christos 
   2433  1.1  christos /*%
   2434  1.1  christos  * Purge some expired and/or stale (i.e. unused for some period) cache entries
   2435  1.1  christos  * due to an overmem condition.  To recover from this condition quickly,
   2436  1.1  christos  * we clean up entries up to the size of newly added rdata that triggered
   2437  1.1  christos  * the overmem; this is accessible via newheader.
   2438  1.1  christos  *
   2439  1.1  christos  * The LRU lists tails are processed in LRU order to the nearest second.
   2440  1.1  christos  *
   2441  1.1  christos  * A write lock on the tree must be held.
   2442  1.1  christos  */
   2443  1.1  christos static void
   2444  1.1  christos overmem(qpcache_t *qpdb, dns_slabheader_t *newheader,
   2445  1.1  christos 	isc_rwlocktype_t *tlocktypep DNS__DB_FLARG) {
   2446  1.3  christos 	uint32_t locknum_start = qpdb->lru_sweep++ % qpdb->buckets_count;
   2447  1.1  christos 	uint32_t locknum = locknum_start;
   2448  1.1  christos 	size_t purgesize, purged = 0;
   2449  1.1  christos 	isc_stdtime_t min_last_used = 0;
   2450  1.1  christos 	size_t max_passes = 8;
   2451  1.1  christos 
   2452  1.1  christos 	/*
   2453  1.1  christos 	 * Maximum estimated size of the data being added: The size
   2454  1.1  christos 	 * of the rdataset, plus a new QP database node and nodename,
   2455  1.1  christos 	 * and a possible additional NSEC node and nodename. Also add
   2456  1.1  christos 	 * a 12k margin for a possible QP-trie chunk allocation.
   2457  1.1  christos 	 * (It's okay to overestimate, we want to get cache memory
   2458  1.1  christos 	 * down quickly.)
   2459  1.1  christos 	 */
   2460  1.1  christos 	purgesize = 2 * (sizeof(qpcnode_t) +
   2461  1.1  christos 			 dns_name_size(&HEADERNODE(newheader)->name)) +
   2462  1.5  christos 		    rdataset_size(newheader) + QP_SAFETY_MARGIN;
   2463  1.1  christos again:
   2464  1.1  christos 	do {
   2465  1.1  christos 		isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   2466  1.3  christos 		isc_rwlock_t *nlock = &qpdb->buckets[locknum].lock;
   2467  1.3  christos 		NODE_WRLOCK(nlock, &nlocktype);
   2468  1.1  christos 
   2469  1.1  christos 		purged += expire_lru_headers(
   2470  1.1  christos 			qpdb, locknum, &nlocktype, tlocktypep,
   2471  1.1  christos 			purgesize - purged DNS__DB_FLARG_PASS);
   2472  1.1  christos 
   2473  1.1  christos 		/*
   2474  1.1  christos 		 * Work out the oldest remaining last_used values of the list
   2475  1.1  christos 		 * tails as we walk across the array of lru lists.
   2476  1.1  christos 		 */
   2477  1.3  christos 		dns_slabheader_t *header =
   2478  1.3  christos 			ISC_LIST_TAIL(qpdb->buckets[locknum].lru);
   2479  1.1  christos 		if (header != NULL &&
   2480  1.1  christos 		    (min_last_used == 0 || header->last_used < min_last_used))
   2481  1.1  christos 		{
   2482  1.1  christos 			min_last_used = header->last_used;
   2483  1.1  christos 		}
   2484  1.3  christos 		NODE_UNLOCK(nlock, &nlocktype);
   2485  1.3  christos 		locknum = (locknum + 1) % qpdb->buckets_count;
   2486  1.1  christos 	} while (locknum != locknum_start && purged <= purgesize);
   2487  1.1  christos 
   2488  1.1  christos 	/*
   2489  1.1  christos 	 * Update qpdb->last_used if we have walked all the list tails and have
   2490  1.1  christos 	 * not freed the required amount of memory.
   2491  1.1  christos 	 */
   2492  1.1  christos 	if (purged < purgesize) {
   2493  1.1  christos 		if (min_last_used != 0) {
   2494  1.1  christos 			qpdb->last_used = min_last_used;
   2495  1.1  christos 			if (max_passes-- > 0) {
   2496  1.1  christos 				goto again;
   2497  1.1  christos 			}
   2498  1.1  christos 		}
   2499  1.1  christos 	}
   2500  1.1  christos }
   2501  1.1  christos 
   2502  1.1  christos /*%
   2503  1.1  christos  * These functions allow the heap code to rank the priority of each
   2504  1.1  christos  * element.  It returns true if v1 happens "sooner" than v2.
   2505  1.1  christos  */
   2506  1.1  christos static bool
   2507  1.1  christos ttl_sooner(void *v1, void *v2) {
   2508  1.1  christos 	dns_slabheader_t *h1 = v1;
   2509  1.1  christos 	dns_slabheader_t *h2 = v2;
   2510  1.1  christos 
   2511  1.1  christos 	return h1->ttl < h2->ttl;
   2512  1.1  christos }
   2513  1.1  christos 
   2514  1.1  christos /*%
   2515  1.1  christos  * This function sets the heap index into the header.
   2516  1.1  christos  */
   2517  1.1  christos static void
   2518  1.1  christos set_index(void *what, unsigned int idx) {
   2519  1.1  christos 	dns_slabheader_t *h = what;
   2520  1.1  christos 
   2521  1.1  christos 	h->heap_index = idx;
   2522  1.1  christos }
   2523  1.1  christos 
   2524  1.1  christos static void
   2525  1.3  christos qpcache__destroy(qpcache_t *qpdb) {
   2526  1.1  christos 	unsigned int i;
   2527  1.1  christos 	char buf[DNS_NAME_FORMATSIZE];
   2528  1.1  christos 	dns_qp_t **treep = NULL;
   2529  1.1  christos 
   2530  1.1  christos 	for (;;) {
   2531  1.1  christos 		/*
   2532  1.1  christos 		 * pick the next tree to (start to) destroy
   2533  1.1  christos 		 */
   2534  1.1  christos 		treep = &qpdb->tree;
   2535  1.1  christos 		if (*treep == NULL) {
   2536  1.1  christos 			treep = &qpdb->nsec;
   2537  1.1  christos 			if (*treep == NULL) {
   2538  1.1  christos 				break;
   2539  1.1  christos 			}
   2540  1.1  christos 		}
   2541  1.1  christos 
   2542  1.1  christos 		dns_qp_destroy(treep);
   2543  1.1  christos 		INSIST(*treep == NULL);
   2544  1.1  christos 	}
   2545  1.1  christos 
   2546  1.3  christos 	if (dns_name_dynamic(&qpdb->common.origin)) {
   2547  1.3  christos 		dns_name_format(&qpdb->common.origin, buf, sizeof(buf));
   2548  1.3  christos 	} else {
   2549  1.3  christos 		strlcpy(buf, "<UNKNOWN>", sizeof(buf));
   2550  1.1  christos 	}
   2551  1.3  christos 	isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
   2552  1.3  christos 		      ISC_LOG_DEBUG(1), "done %s(%s)", __func__, buf);
   2553  1.3  christos 
   2554  1.1  christos 	if (dns_name_dynamic(&qpdb->common.origin)) {
   2555  1.1  christos 		dns_name_free(&qpdb->common.origin, qpdb->common.mctx);
   2556  1.1  christos 	}
   2557  1.3  christos 	for (i = 0; i < qpdb->buckets_count; i++) {
   2558  1.3  christos 		NODE_DESTROYLOCK(&qpdb->buckets[i].lock);
   2559  1.3  christos 
   2560  1.3  christos 		INSIST(ISC_LIST_EMPTY(qpdb->buckets[i].lru));
   2561  1.1  christos 
   2562  1.3  christos 		INSIST(isc_queue_empty(&qpdb->buckets[i].deadnodes));
   2563  1.3  christos 		isc_queue_destroy(&qpdb->buckets[i].deadnodes);
   2564  1.1  christos 
   2565  1.3  christos 		isc_heap_destroy(&qpdb->buckets[i].heap);
   2566  1.1  christos 	}
   2567  1.1  christos 
   2568  1.1  christos 	if (qpdb->rrsetstats != NULL) {
   2569  1.1  christos 		dns_stats_detach(&qpdb->rrsetstats);
   2570  1.1  christos 	}
   2571  1.1  christos 	if (qpdb->cachestats != NULL) {
   2572  1.1  christos 		isc_stats_detach(&qpdb->cachestats);
   2573  1.1  christos 	}
   2574  1.1  christos 
   2575  1.1  christos 	TREE_DESTROYLOCK(&qpdb->tree_lock);
   2576  1.3  christos 	isc_refcount_destroy(&qpdb->references);
   2577  1.1  christos 	isc_refcount_destroy(&qpdb->common.references);
   2578  1.1  christos 
   2579  1.1  christos 	isc_rwlock_destroy(&qpdb->lock);
   2580  1.1  christos 	qpdb->common.magic = 0;
   2581  1.1  christos 	qpdb->common.impmagic = 0;
   2582  1.1  christos 	isc_mem_detach(&qpdb->hmctx);
   2583  1.1  christos 
   2584  1.3  christos 	isc_mem_putanddetach(&qpdb->common.mctx, qpdb,
   2585  1.3  christos 			     sizeof(*qpdb) + qpdb->buckets_count *
   2586  1.3  christos 						     sizeof(qpdb->buckets[0]));
   2587  1.1  christos }
   2588  1.1  christos 
   2589  1.1  christos static void
   2590  1.1  christos qpdb_destroy(dns_db_t *arg) {
   2591  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)arg;
   2592  1.1  christos 
   2593  1.3  christos 	qpcache_detach(&qpdb);
   2594  1.1  christos }
   2595  1.1  christos 
   2596  1.1  christos /*%
   2597  1.1  christos  * Clean up dead nodes.  These are nodes which have no references, and
   2598  1.1  christos  * have no data.  They are dead but we could not or chose not to delete
   2599  1.1  christos  * them when we deleted all the data at that node because we did not want
   2600  1.1  christos  * to wait for the tree write lock.
   2601  1.1  christos  */
   2602  1.1  christos static void
   2603  1.1  christos cleanup_deadnodes(void *arg) {
   2604  1.1  christos 	qpcache_t *qpdb = arg;
   2605  1.1  christos 	uint16_t locknum = isc_tid();
   2606  1.1  christos 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
   2607  1.1  christos 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   2608  1.3  christos 	isc_rwlock_t *nlock = &qpdb->buckets[locknum].lock;
   2609  1.1  christos 	qpcnode_t *qpnode = NULL, *qpnext = NULL;
   2610  1.1  christos 	isc_queue_t deadnodes;
   2611  1.1  christos 
   2612  1.3  christos 	INSIST(locknum < qpdb->buckets_count);
   2613  1.1  christos 
   2614  1.1  christos 	isc_queue_init(&deadnodes);
   2615  1.1  christos 
   2616  1.1  christos 	TREE_WRLOCK(&qpdb->tree_lock, &tlocktype);
   2617  1.3  christos 	NODE_WRLOCK(nlock, &nlocktype);
   2618  1.1  christos 
   2619  1.3  christos 	RUNTIME_CHECK(isc_queue_splice(&deadnodes,
   2620  1.3  christos 				       &qpdb->buckets[locknum].deadnodes));
   2621  1.1  christos 	isc_queue_for_each_entry_safe(&deadnodes, qpnode, qpnext, deadlink) {
   2622  1.3  christos 		qpcnode_release(qpdb, qpnode, &nlocktype, &tlocktype,
   2623  1.3  christos 				false DNS__DB_FILELINE);
   2624  1.1  christos 	}
   2625  1.1  christos 
   2626  1.3  christos 	NODE_UNLOCK(nlock, &nlocktype);
   2627  1.1  christos 	TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
   2628  1.1  christos }
   2629  1.1  christos 
   2630  1.1  christos /*
   2631  1.1  christos  * This function is assumed to be called when a node is newly referenced
   2632  1.1  christos  * and can be in the deadnode list.  In that case the node will be references
   2633  1.1  christos  * and cleanup_deadnodes() will remove it from the list when the cleaning
   2634  1.1  christos  * happens.
   2635  1.1  christos  * Note: while a new reference is gained in multiple places, there are only very
   2636  1.1  christos  * few cases where the node can be in the deadnode list (only empty nodes can
   2637  1.1  christos  * have been added to the list).
   2638  1.1  christos  */
   2639  1.1  christos static void
   2640  1.1  christos reactivate_node(qpcache_t *qpdb, qpcnode_t *node,
   2641  1.1  christos 		isc_rwlocktype_t tlocktype ISC_ATTR_UNUSED DNS__DB_FLARG) {
   2642  1.1  christos 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   2643  1.3  christos 	isc_rwlock_t *nlock = &qpdb->buckets[node->locknum].lock;
   2644  1.1  christos 
   2645  1.3  christos 	NODE_RDLOCK(nlock, &nlocktype);
   2646  1.3  christos 	qpcnode_acquire(qpdb, node, nlocktype, tlocktype DNS__DB_FLARG_PASS);
   2647  1.3  christos 	NODE_UNLOCK(nlock, &nlocktype);
   2648  1.1  christos }
   2649  1.1  christos 
   2650  1.1  christos static qpcnode_t *
   2651  1.1  christos new_qpcnode(qpcache_t *qpdb, const dns_name_t *name) {
   2652  1.1  christos 	qpcnode_t *newdata = isc_mem_get(qpdb->common.mctx, sizeof(*newdata));
   2653  1.1  christos 	*newdata = (qpcnode_t){
   2654  1.1  christos 		.name = DNS_NAME_INITEMPTY,
   2655  1.1  christos 		.references = ISC_REFCOUNT_INITIALIZER(1),
   2656  1.3  christos 		.locknum = isc_random_uniform(qpdb->buckets_count),
   2657  1.1  christos 	};
   2658  1.1  christos 
   2659  1.1  christos 	isc_mem_attach(qpdb->common.mctx, &newdata->mctx);
   2660  1.1  christos 	dns_name_dupwithoffsets(name, newdata->mctx, &newdata->name);
   2661  1.1  christos 
   2662  1.1  christos #ifdef DNS_DB_NODETRACE
   2663  1.1  christos 	fprintf(stderr, "new_qpcnode:%s:%s:%d:%p->references = 1\n", __func__,
   2664  1.1  christos 		__FILE__, __LINE__ + 1, name);
   2665  1.1  christos #endif
   2666  1.1  christos 	return newdata;
   2667  1.1  christos }
   2668  1.1  christos 
   2669  1.1  christos static isc_result_t
   2670  1.1  christos findnode(dns_db_t *db, const dns_name_t *name, bool create,
   2671  1.1  christos 	 dns_dbnode_t **nodep DNS__DB_FLARG) {
   2672  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   2673  1.1  christos 	qpcnode_t *node = NULL;
   2674  1.1  christos 	isc_result_t result;
   2675  1.1  christos 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
   2676  1.1  christos 
   2677  1.1  christos 	TREE_RDLOCK(&qpdb->tree_lock, &tlocktype);
   2678  1.1  christos 	result = dns_qp_getname(qpdb->tree, name, (void **)&node, NULL);
   2679  1.1  christos 	if (result != ISC_R_SUCCESS) {
   2680  1.1  christos 		if (!create) {
   2681  1.1  christos 			goto unlock;
   2682  1.1  christos 		}
   2683  1.1  christos 		/*
   2684  1.1  christos 		 * Try to upgrade the lock and if that fails unlock then relock.
   2685  1.1  christos 		 */
   2686  1.1  christos 		TREE_FORCEUPGRADE(&qpdb->tree_lock, &tlocktype);
   2687  1.1  christos 		result = dns_qp_getname(qpdb->tree, name, (void **)&node, NULL);
   2688  1.1  christos 		if (result != ISC_R_SUCCESS) {
   2689  1.1  christos 			node = new_qpcnode(qpdb, name);
   2690  1.1  christos 			result = dns_qp_insert(qpdb->tree, node, 0);
   2691  1.1  christos 			INSIST(result == ISC_R_SUCCESS);
   2692  1.1  christos 			qpcnode_unref(node);
   2693  1.1  christos 		}
   2694  1.1  christos 	}
   2695  1.1  christos 
   2696  1.1  christos 	reactivate_node(qpdb, node, tlocktype DNS__DB_FLARG_PASS);
   2697  1.1  christos 
   2698  1.1  christos 	*nodep = (dns_dbnode_t *)node;
   2699  1.1  christos unlock:
   2700  1.1  christos 	TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
   2701  1.1  christos 
   2702  1.1  christos 	return result;
   2703  1.1  christos }
   2704  1.1  christos 
   2705  1.1  christos static void
   2706  1.1  christos attachnode(dns_db_t *db, dns_dbnode_t *source,
   2707  1.1  christos 	   dns_dbnode_t **targetp DNS__DB_FLARG) {
   2708  1.1  christos 	REQUIRE(VALID_QPDB((qpcache_t *)db));
   2709  1.1  christos 	REQUIRE(targetp != NULL && *targetp == NULL);
   2710  1.1  christos 
   2711  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   2712  1.1  christos 	qpcnode_t *node = (qpcnode_t *)source;
   2713  1.1  christos 
   2714  1.3  christos 	qpcnode_acquire(qpdb, node, isc_rwlocktype_none,
   2715  1.3  christos 			isc_rwlocktype_none DNS__DB_FLARG_PASS);
   2716  1.1  christos 
   2717  1.1  christos 	*targetp = source;
   2718  1.1  christos }
   2719  1.1  christos 
   2720  1.1  christos static void
   2721  1.3  christos detachnode(dns_db_t *db, dns_dbnode_t **nodep DNS__DB_FLARG) {
   2722  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   2723  1.1  christos 	qpcnode_t *node = NULL;
   2724  1.1  christos 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   2725  1.1  christos 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
   2726  1.3  christos 	isc_rwlock_t *nlock = NULL;
   2727  1.1  christos 
   2728  1.1  christos 	REQUIRE(VALID_QPDB(qpdb));
   2729  1.3  christos 	REQUIRE(nodep != NULL && *nodep != NULL);
   2730  1.1  christos 
   2731  1.3  christos 	node = (qpcnode_t *)(*nodep);
   2732  1.3  christos 	*nodep = NULL;
   2733  1.3  christos 	nlock = &qpdb->buckets[node->locknum].lock;
   2734  1.1  christos 
   2735  1.3  christos 	/*
   2736  1.3  christos 	 * We can't destroy qpcache while holding a nodelock, so we need to
   2737  1.3  christos 	 * reference it before acquiring the lock and release it afterward.
   2738  1.3  christos 	 * Additionally, we must ensure that we don't destroy the database while
   2739  1.3  christos 	 * the NODE_LOCK is locked.
   2740  1.3  christos 	 */
   2741  1.3  christos 	qpcache_ref(qpdb);
   2742  1.1  christos 
   2743  1.3  christos 	rcu_read_lock();
   2744  1.3  christos 	NODE_RDLOCK(nlock, &nlocktype);
   2745  1.3  christos 	qpcnode_release(qpdb, node, &nlocktype, &tlocktype,
   2746  1.3  christos 			true DNS__DB_FLARG_PASS);
   2747  1.3  christos 	NODE_UNLOCK(nlock, &nlocktype);
   2748  1.3  christos 	rcu_read_unlock();
   2749  1.1  christos 
   2750  1.3  christos 	qpcache_detach(&qpdb);
   2751  1.1  christos }
   2752  1.1  christos 
   2753  1.1  christos static isc_result_t
   2754  1.1  christos createiterator(dns_db_t *db, unsigned int options ISC_ATTR_UNUSED,
   2755  1.1  christos 	       dns_dbiterator_t **iteratorp) {
   2756  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   2757  1.1  christos 	qpc_dbit_t *qpdbiter = NULL;
   2758  1.1  christos 
   2759  1.1  christos 	REQUIRE(VALID_QPDB(qpdb));
   2760  1.1  christos 
   2761  1.1  christos 	qpdbiter = isc_mem_get(qpdb->common.mctx, sizeof(*qpdbiter));
   2762  1.1  christos 	*qpdbiter = (qpc_dbit_t){
   2763  1.1  christos 		.common.methods = &dbiterator_methods,
   2764  1.1  christos 		.common.magic = DNS_DBITERATOR_MAGIC,
   2765  1.1  christos 		.paused = true,
   2766  1.1  christos 	};
   2767  1.1  christos 
   2768  1.1  christos 	qpdbiter->name = dns_fixedname_initname(&qpdbiter->fixed);
   2769  1.1  christos 	dns_db_attach(db, &qpdbiter->common.db);
   2770  1.1  christos 	dns_qpiter_init(qpdb->tree, &qpdbiter->iter);
   2771  1.1  christos 
   2772  1.1  christos 	*iteratorp = (dns_dbiterator_t *)qpdbiter;
   2773  1.1  christos 	return ISC_R_SUCCESS;
   2774  1.1  christos }
   2775  1.1  christos 
   2776  1.1  christos static isc_result_t
   2777  1.1  christos allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
   2778  1.1  christos 	     unsigned int options, isc_stdtime_t now,
   2779  1.1  christos 	     dns_rdatasetiter_t **iteratorp DNS__DB_FLARG) {
   2780  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   2781  1.1  christos 	qpcnode_t *qpnode = (qpcnode_t *)node;
   2782  1.1  christos 	qpc_rditer_t *iterator = NULL;
   2783  1.1  christos 
   2784  1.1  christos 	REQUIRE(VALID_QPDB(qpdb));
   2785  1.1  christos 
   2786  1.1  christos 	UNUSED(version);
   2787  1.1  christos 
   2788  1.1  christos 	iterator = isc_mem_get(qpdb->common.mctx, sizeof(*iterator));
   2789  1.1  christos 
   2790  1.1  christos 	if (now == 0) {
   2791  1.1  christos 		now = isc_stdtime_now();
   2792  1.1  christos 	}
   2793  1.1  christos 
   2794  1.1  christos 	iterator->common.magic = DNS_RDATASETITER_MAGIC;
   2795  1.1  christos 	iterator->common.methods = &rdatasetiter_methods;
   2796  1.1  christos 	iterator->common.db = db;
   2797  1.1  christos 	iterator->common.node = node;
   2798  1.1  christos 	iterator->common.version = NULL;
   2799  1.1  christos 	iterator->common.options = options;
   2800  1.1  christos 	iterator->common.now = now;
   2801  1.1  christos 	iterator->current = NULL;
   2802  1.1  christos 
   2803  1.3  christos 	qpcnode_acquire(qpdb, qpnode, isc_rwlocktype_none,
   2804  1.3  christos 			isc_rwlocktype_none DNS__DB_FLARG_PASS);
   2805  1.1  christos 
   2806  1.1  christos 	*iteratorp = (dns_rdatasetiter_t *)iterator;
   2807  1.1  christos 
   2808  1.1  christos 	return ISC_R_SUCCESS;
   2809  1.1  christos }
   2810  1.1  christos 
   2811  1.1  christos static bool
   2812  1.1  christos overmaxtype(qpcache_t *qpdb, uint32_t ntypes) {
   2813  1.1  christos 	if (qpdb->maxtypepername == 0) {
   2814  1.1  christos 		return false;
   2815  1.1  christos 	}
   2816  1.1  christos 
   2817  1.1  christos 	return ntypes >= qpdb->maxtypepername;
   2818  1.1  christos }
   2819  1.1  christos 
   2820  1.1  christos static bool
   2821  1.1  christos prio_header(dns_slabheader_t *header) {
   2822  1.1  christos 	if (NEGATIVE(header) && prio_type(DNS_TYPEPAIR_COVERS(header->type))) {
   2823  1.1  christos 		return true;
   2824  1.1  christos 	}
   2825  1.1  christos 
   2826  1.1  christos 	return prio_type(header->type);
   2827  1.1  christos }
   2828  1.1  christos 
   2829  1.1  christos static isc_result_t
   2830  1.1  christos add(qpcache_t *qpdb, qpcnode_t *qpnode,
   2831  1.1  christos     const dns_name_t *nodename ISC_ATTR_UNUSED, dns_slabheader_t *newheader,
   2832  1.1  christos     unsigned int options, bool loading, dns_rdataset_t *addedrdataset,
   2833  1.1  christos     isc_stdtime_t now, isc_rwlocktype_t nlocktype,
   2834  1.1  christos     isc_rwlocktype_t tlocktype DNS__DB_FLARG) {
   2835  1.1  christos 	dns_slabheader_t *topheader = NULL, *topheader_prev = NULL;
   2836  1.1  christos 	dns_slabheader_t *header = NULL, *sigheader = NULL;
   2837  1.1  christos 	dns_slabheader_t *prioheader = NULL, *expireheader = NULL;
   2838  1.1  christos 	bool header_nx;
   2839  1.1  christos 	bool newheader_nx;
   2840  1.1  christos 	dns_typepair_t negtype = 0;
   2841  1.1  christos 	dns_trust_t trust;
   2842  1.1  christos 	int idx;
   2843  1.1  christos 	uint32_t ntypes = 0;
   2844  1.1  christos 
   2845  1.1  christos 	if ((options & DNS_DBADD_FORCE) != 0) {
   2846  1.1  christos 		trust = dns_trust_ultimate;
   2847  1.1  christos 	} else {
   2848  1.1  christos 		trust = newheader->trust;
   2849  1.1  christos 	}
   2850  1.1  christos 
   2851  1.1  christos 	newheader_nx = NONEXISTENT(newheader) ? true : false;
   2852  1.1  christos 
   2853  1.1  christos 	if (!newheader_nx) {
   2854  1.1  christos 		dns_rdatatype_t rdtype = DNS_TYPEPAIR_TYPE(newheader->type);
   2855  1.1  christos 		dns_rdatatype_t covers = DNS_TYPEPAIR_COVERS(newheader->type);
   2856  1.1  christos 		dns_typepair_t sigtype = DNS_SIGTYPE(covers);
   2857  1.1  christos 		if (NEGATIVE(newheader)) {
   2858  1.1  christos 			/*
   2859  1.1  christos 			 * We're adding a negative cache entry.
   2860  1.1  christos 			 */
   2861  1.1  christos 			if (covers == dns_rdatatype_any) {
   2862  1.1  christos 				/*
   2863  1.1  christos 				 * If we're adding an negative cache entry
   2864  1.1  christos 				 * which covers all types (NXDOMAIN,
   2865  1.1  christos 				 * NODATA(QTYPE=ANY)),
   2866  1.1  christos 				 *
   2867  1.1  christos 				 * We make all other data ancient so that the
   2868  1.1  christos 				 * only rdataset that can be found at this
   2869  1.1  christos 				 * node is the negative cache entry.
   2870  1.1  christos 				 */
   2871  1.1  christos 				for (topheader = qpnode->data;
   2872  1.1  christos 				     topheader != NULL;
   2873  1.1  christos 				     topheader = topheader->next)
   2874  1.1  christos 				{
   2875  1.1  christos 					mark_ancient(topheader);
   2876  1.1  christos 				}
   2877  1.1  christos 				goto find_header;
   2878  1.1  christos 			}
   2879  1.1  christos 			/*
   2880  1.1  christos 			 * Otherwise look for any RRSIGs of the given
   2881  1.1  christos 			 * type so they can be marked ancient later.
   2882  1.1  christos 			 */
   2883  1.1  christos 			for (topheader = qpnode->data; topheader != NULL;
   2884  1.1  christos 			     topheader = topheader->next)
   2885  1.1  christos 			{
   2886  1.1  christos 				if (topheader->type == sigtype) {
   2887  1.1  christos 					sigheader = topheader;
   2888  1.1  christos 					break;
   2889  1.1  christos 				}
   2890  1.1  christos 			}
   2891  1.1  christos 			negtype = DNS_TYPEPAIR_VALUE(covers, 0);
   2892  1.1  christos 		} else {
   2893  1.1  christos 			/*
   2894  1.1  christos 			 * We're adding something that isn't a
   2895  1.1  christos 			 * negative cache entry.  Look for an extant
   2896  1.1  christos 			 * non-ancient NXDOMAIN/NODATA(QTYPE=ANY) negative
   2897  1.1  christos 			 * cache entry.  If we're adding an RRSIG, also
   2898  1.1  christos 			 * check for an extant non-ancient NODATA ncache
   2899  1.1  christos 			 * entry which covers the same type as the RRSIG.
   2900  1.1  christos 			 */
   2901  1.1  christos 			for (topheader = qpnode->data; topheader != NULL;
   2902  1.1  christos 			     topheader = topheader->next)
   2903  1.1  christos 			{
   2904  1.1  christos 				if ((topheader->type == RDATATYPE_NCACHEANY) ||
   2905  1.1  christos 				    (newheader->type == sigtype &&
   2906  1.1  christos 				     topheader->type ==
   2907  1.1  christos 					     DNS_TYPEPAIR_VALUE(0, covers)))
   2908  1.1  christos 				{
   2909  1.1  christos 					break;
   2910  1.1  christos 				}
   2911  1.1  christos 			}
   2912  1.1  christos 			if (topheader != NULL && EXISTS(topheader) &&
   2913  1.1  christos 			    ACTIVE(topheader, now))
   2914  1.1  christos 			{
   2915  1.1  christos 				/*
   2916  1.1  christos 				 * Found one.
   2917  1.1  christos 				 */
   2918  1.1  christos 				if (trust < topheader->trust) {
   2919  1.1  christos 					/*
   2920  1.1  christos 					 * The NXDOMAIN/NODATA(QTYPE=ANY)
   2921  1.1  christos 					 * is more trusted.
   2922  1.1  christos 					 */
   2923  1.1  christos 					dns_slabheader_destroy(&newheader);
   2924  1.1  christos 					if (addedrdataset != NULL) {
   2925  1.1  christos 						bindrdataset(
   2926  1.1  christos 							qpdb, qpnode, topheader,
   2927  1.1  christos 							now, nlocktype,
   2928  1.1  christos 							tlocktype,
   2929  1.1  christos 							addedrdataset
   2930  1.1  christos 								DNS__DB_FLARG_PASS);
   2931  1.1  christos 					}
   2932  1.1  christos 					return DNS_R_UNCHANGED;
   2933  1.1  christos 				}
   2934  1.1  christos 				/*
   2935  1.1  christos 				 * The new rdataset is better.  Expire the
   2936  1.1  christos 				 * ncache entry.
   2937  1.1  christos 				 */
   2938  1.1  christos 				mark_ancient(topheader);
   2939  1.1  christos 				topheader = NULL;
   2940  1.1  christos 				goto find_header;
   2941  1.1  christos 			}
   2942  1.1  christos 			negtype = DNS_TYPEPAIR_VALUE(0, rdtype);
   2943  1.1  christos 		}
   2944  1.1  christos 	}
   2945  1.1  christos 
   2946  1.1  christos 	for (topheader = qpnode->data; topheader != NULL;
   2947  1.1  christos 	     topheader = topheader->next)
   2948  1.1  christos 	{
   2949  1.1  christos 		if (ACTIVE(topheader, now)) {
   2950  1.1  christos 			++ntypes;
   2951  1.1  christos 			expireheader = topheader;
   2952  1.1  christos 		}
   2953  1.1  christos 		if (prio_header(topheader)) {
   2954  1.1  christos 			prioheader = topheader;
   2955  1.1  christos 		}
   2956  1.1  christos 
   2957  1.1  christos 		if (topheader->type == newheader->type ||
   2958  1.1  christos 		    topheader->type == negtype)
   2959  1.1  christos 		{
   2960  1.1  christos 			break;
   2961  1.1  christos 		}
   2962  1.1  christos 		topheader_prev = topheader;
   2963  1.1  christos 	}
   2964  1.1  christos 
   2965  1.1  christos find_header:
   2966  1.1  christos 	/*
   2967  1.1  christos 	 * If header isn't NULL, we've found the right type.  There may be
   2968  1.1  christos 	 * IGNORE rdatasets between the top of the chain and the first real
   2969  1.1  christos 	 * data.  We skip over them.
   2970  1.1  christos 	 */
   2971  1.1  christos 	header = topheader;
   2972  1.1  christos 	while (header != NULL && IGNORE(header)) {
   2973  1.1  christos 		header = header->down;
   2974  1.1  christos 	}
   2975  1.1  christos 	if (header != NULL) {
   2976  1.1  christos 		header_nx = NONEXISTENT(header) ? true : false;
   2977  1.1  christos 
   2978  1.1  christos 		/*
   2979  1.1  christos 		 * Deleting an already non-existent rdataset has no effect.
   2980  1.1  christos 		 */
   2981  1.1  christos 		if (header_nx && newheader_nx) {
   2982  1.1  christos 			dns_slabheader_destroy(&newheader);
   2983  1.1  christos 			return DNS_R_UNCHANGED;
   2984  1.1  christos 		}
   2985  1.1  christos 
   2986  1.1  christos 		/*
   2987  1.1  christos 		 * Trying to add an rdataset with lower trust to a cache
   2988  1.1  christos 		 * DB has no effect, provided that the cache data isn't
   2989  1.1  christos 		 * stale. If the cache data is stale, new lower trust
   2990  1.1  christos 		 * data will supersede it below. Unclear what the best
   2991  1.1  christos 		 * policy is here.
   2992  1.1  christos 		 */
   2993  1.1  christos 		if (trust < header->trust && (ACTIVE(header, now) || header_nx))
   2994  1.1  christos 		{
   2995  1.5  christos 			isc_result_t result = DNS_R_UNCHANGED;
   2996  1.5  christos 			bindrdataset(qpdb, qpnode, header, now, nlocktype,
   2997  1.5  christos 				     tlocktype,
   2998  1.5  christos 				     addedrdataset DNS__DB_FLARG_PASS);
   2999  1.5  christos 			if (ACTIVE(header, now) &&
   3000  1.5  christos 			    (options & DNS_DBADD_EQUALOK) != 0 &&
   3001  1.5  christos 			    dns_rdataslab_equalx(
   3002  1.5  christos 				    (unsigned char *)header,
   3003  1.5  christos 				    (unsigned char *)newheader,
   3004  1.5  christos 				    (unsigned int)(sizeof(*newheader)),
   3005  1.5  christos 				    qpdb->common.rdclass,
   3006  1.5  christos 				    (dns_rdatatype_t)header->type))
   3007  1.5  christos 			{
   3008  1.5  christos 				result = ISC_R_SUCCESS;
   3009  1.5  christos 			}
   3010  1.1  christos 			dns_slabheader_destroy(&newheader);
   3011  1.5  christos 			return result;
   3012  1.1  christos 		}
   3013  1.1  christos 
   3014  1.1  christos 		/*
   3015  1.5  christos 		 * Don't replace existing NS in the cache if they already exist
   3016  1.5  christos 		 * and replacing the existing one would increase the TTL. This
   3017  1.5  christos 		 * prevents named being locked to old servers. Don't lower trust
   3018  1.5  christos 		 * of existing record if the update is forced. Nothing special
   3019  1.5  christos 		 * to be done w.r.t stale data; it gets replaced normally
   3020  1.5  christos 		 * further down.
   3021  1.1  christos 		 */
   3022  1.1  christos 		if (ACTIVE(header, now) && header->type == dns_rdatatype_ns &&
   3023  1.1  christos 		    !header_nx && !newheader_nx &&
   3024  1.1  christos 		    header->trust >= newheader->trust &&
   3025  1.5  christos 		    header->ttl < newheader->ttl &&
   3026  1.1  christos 		    dns_rdataslab_equalx((unsigned char *)header,
   3027  1.1  christos 					 (unsigned char *)newheader,
   3028  1.1  christos 					 (unsigned int)(sizeof(*newheader)),
   3029  1.1  christos 					 qpdb->common.rdclass,
   3030  1.1  christos 					 (dns_rdatatype_t)header->type))
   3031  1.1  christos 		{
   3032  1.1  christos 			if (header->last_used != now) {
   3033  1.1  christos 				ISC_LIST_UNLINK(
   3034  1.3  christos 					qpdb->buckets[HEADERNODE(header)->locknum]
   3035  1.3  christos 						.lru,
   3036  1.1  christos 					header, link);
   3037  1.1  christos 				header->last_used = now;
   3038  1.1  christos 				ISC_LIST_PREPEND(
   3039  1.3  christos 					qpdb->buckets[HEADERNODE(header)->locknum]
   3040  1.3  christos 						.lru,
   3041  1.1  christos 					header, link);
   3042  1.1  christos 			}
   3043  1.1  christos 			if (header->noqname == NULL &&
   3044  1.1  christos 			    newheader->noqname != NULL)
   3045  1.1  christos 			{
   3046  1.1  christos 				header->noqname = newheader->noqname;
   3047  1.1  christos 				newheader->noqname = NULL;
   3048  1.1  christos 			}
   3049  1.1  christos 			if (header->closest == NULL &&
   3050  1.1  christos 			    newheader->closest != NULL)
   3051  1.1  christos 			{
   3052  1.1  christos 				header->closest = newheader->closest;
   3053  1.1  christos 				newheader->closest = NULL;
   3054  1.1  christos 			}
   3055  1.1  christos 			dns_slabheader_destroy(&newheader);
   3056  1.1  christos 			if (addedrdataset != NULL) {
   3057  1.1  christos 				bindrdataset(qpdb, qpnode, header, now,
   3058  1.1  christos 					     nlocktype, tlocktype,
   3059  1.1  christos 					     addedrdataset DNS__DB_FLARG_PASS);
   3060  1.1  christos 			}
   3061  1.1  christos 			return ISC_R_SUCCESS;
   3062  1.1  christos 		}
   3063  1.1  christos 
   3064  1.1  christos 		/*
   3065  1.5  christos 		 * If we will be replacing a NS RRset force its TTL
   3066  1.1  christos 		 * to be no more than the current NS RRset's TTL.  This
   3067  1.1  christos 		 * ensures the delegations that are withdrawn are honoured.
   3068  1.1  christos 		 */
   3069  1.1  christos 		if (ACTIVE(header, now) && header->type == dns_rdatatype_ns &&
   3070  1.1  christos 		    !header_nx && !newheader_nx &&
   3071  1.1  christos 		    header->trust <= newheader->trust)
   3072  1.1  christos 		{
   3073  1.1  christos 			if (newheader->ttl > header->ttl) {
   3074  1.5  christos 				if (ZEROTTL(header)) {
   3075  1.5  christos 					DNS_SLABHEADER_SETATTR(
   3076  1.5  christos 						newheader,
   3077  1.5  christos 						DNS_SLABHEADERATTR_ZEROTTL);
   3078  1.5  christos 				}
   3079  1.1  christos 				newheader->ttl = header->ttl;
   3080  1.1  christos 			}
   3081  1.1  christos 		}
   3082  1.1  christos 		if (ACTIVE(header, now) &&
   3083  1.1  christos 		    (options & DNS_DBADD_PREFETCH) == 0 &&
   3084  1.1  christos 		    (header->type == dns_rdatatype_a ||
   3085  1.1  christos 		     header->type == dns_rdatatype_aaaa ||
   3086  1.1  christos 		     header->type == dns_rdatatype_ds ||
   3087  1.1  christos 		     header->type == DNS_SIGTYPE(dns_rdatatype_ds)) &&
   3088  1.1  christos 		    !header_nx && !newheader_nx &&
   3089  1.1  christos 		    header->trust >= newheader->trust &&
   3090  1.5  christos 		    header->ttl < newheader->ttl &&
   3091  1.1  christos 		    dns_rdataslab_equal((unsigned char *)header,
   3092  1.1  christos 					(unsigned char *)newheader,
   3093  1.1  christos 					(unsigned int)(sizeof(*newheader))))
   3094  1.1  christos 		{
   3095  1.1  christos 			if (header->last_used != now) {
   3096  1.1  christos 				ISC_LIST_UNLINK(
   3097  1.3  christos 					qpdb->buckets[HEADERNODE(header)->locknum]
   3098  1.3  christos 						.lru,
   3099  1.1  christos 					header, link);
   3100  1.1  christos 				header->last_used = now;
   3101  1.1  christos 				ISC_LIST_PREPEND(
   3102  1.3  christos 					qpdb->buckets[HEADERNODE(header)->locknum]
   3103  1.3  christos 						.lru,
   3104  1.1  christos 					header, link);
   3105  1.1  christos 			}
   3106  1.1  christos 			if (header->noqname == NULL &&
   3107  1.1  christos 			    newheader->noqname != NULL)
   3108  1.1  christos 			{
   3109  1.1  christos 				header->noqname = newheader->noqname;
   3110  1.1  christos 				newheader->noqname = NULL;
   3111  1.1  christos 			}
   3112  1.1  christos 			if (header->closest == NULL &&
   3113  1.1  christos 			    newheader->closest != NULL)
   3114  1.1  christos 			{
   3115  1.1  christos 				header->closest = newheader->closest;
   3116  1.1  christos 				newheader->closest = NULL;
   3117  1.1  christos 			}
   3118  1.1  christos 			dns_slabheader_destroy(&newheader);
   3119  1.1  christos 			if (addedrdataset != NULL) {
   3120  1.1  christos 				bindrdataset(qpdb, qpnode, header, now,
   3121  1.1  christos 					     nlocktype, tlocktype,
   3122  1.1  christos 					     addedrdataset DNS__DB_FLARG_PASS);
   3123  1.1  christos 			}
   3124  1.1  christos 			return ISC_R_SUCCESS;
   3125  1.1  christos 		}
   3126  1.1  christos 
   3127  1.1  christos 		if (loading) {
   3128  1.1  christos 			newheader->down = NULL;
   3129  1.1  christos 			idx = HEADERNODE(newheader)->locknum;
   3130  1.1  christos 			if (ZEROTTL(newheader)) {
   3131  1.1  christos 				newheader->last_used = qpdb->last_used + 1;
   3132  1.3  christos 				ISC_LIST_APPEND(qpdb->buckets[idx].lru,
   3133  1.3  christos 						newheader, link);
   3134  1.1  christos 			} else {
   3135  1.3  christos 				ISC_LIST_PREPEND(qpdb->buckets[idx].lru,
   3136  1.3  christos 						 newheader, link);
   3137  1.1  christos 			}
   3138  1.3  christos 			isc_heap_insert(qpdb->buckets[idx].heap, newheader);
   3139  1.3  christos 			newheader->heap = qpdb->buckets[idx].heap;
   3140  1.1  christos 
   3141  1.1  christos 			/*
   3142  1.1  christos 			 * There are no other references to 'header' when
   3143  1.1  christos 			 * loading, so we MAY clean up 'header' now.
   3144  1.1  christos 			 * Since we don't generate changed records when
   3145  1.1  christos 			 * loading, we MUST clean up 'header' now.
   3146  1.1  christos 			 */
   3147  1.1  christos 			if (topheader_prev != NULL) {
   3148  1.1  christos 				topheader_prev->next = newheader;
   3149  1.1  christos 			} else {
   3150  1.1  christos 				qpnode->data = newheader;
   3151  1.1  christos 			}
   3152  1.1  christos 			newheader->next = topheader->next;
   3153  1.1  christos 			dns_slabheader_destroy(&header);
   3154  1.1  christos 		} else {
   3155  1.1  christos 			idx = HEADERNODE(newheader)->locknum;
   3156  1.3  christos 			isc_heap_insert(qpdb->buckets[idx].heap, newheader);
   3157  1.3  christos 			newheader->heap = qpdb->buckets[idx].heap;
   3158  1.1  christos 			if (ZEROTTL(newheader)) {
   3159  1.1  christos 				newheader->last_used = qpdb->last_used + 1;
   3160  1.3  christos 				ISC_LIST_APPEND(qpdb->buckets[idx].lru,
   3161  1.3  christos 						newheader, link);
   3162  1.1  christos 			} else {
   3163  1.3  christos 				ISC_LIST_PREPEND(qpdb->buckets[idx].lru,
   3164  1.3  christos 						 newheader, link);
   3165  1.1  christos 			}
   3166  1.1  christos 			if (topheader_prev != NULL) {
   3167  1.1  christos 				topheader_prev->next = newheader;
   3168  1.1  christos 			} else {
   3169  1.1  christos 				qpnode->data = newheader;
   3170  1.1  christos 			}
   3171  1.1  christos 			newheader->next = topheader->next;
   3172  1.1  christos 			newheader->down = topheader;
   3173  1.1  christos 			topheader->next = newheader;
   3174  1.1  christos 			mark_ancient(header);
   3175  1.1  christos 			if (sigheader != NULL) {
   3176  1.1  christos 				mark_ancient(sigheader);
   3177  1.1  christos 			}
   3178  1.1  christos 		}
   3179  1.1  christos 	} else {
   3180  1.1  christos 		/*
   3181  1.1  christos 		 * No non-IGNORED rdatasets of the given type exist at
   3182  1.1  christos 		 * this node.
   3183  1.1  christos 		 */
   3184  1.1  christos 
   3185  1.1  christos 		/*
   3186  1.1  christos 		 * If we're trying to delete the type, don't bother.
   3187  1.1  christos 		 */
   3188  1.1  christos 		if (newheader_nx) {
   3189  1.1  christos 			dns_slabheader_destroy(&newheader);
   3190  1.1  christos 			return DNS_R_UNCHANGED;
   3191  1.1  christos 		}
   3192  1.1  christos 
   3193  1.1  christos 		idx = HEADERNODE(newheader)->locknum;
   3194  1.3  christos 		isc_heap_insert(qpdb->buckets[idx].heap, newheader);
   3195  1.3  christos 		newheader->heap = qpdb->buckets[idx].heap;
   3196  1.1  christos 		if (ZEROTTL(newheader)) {
   3197  1.3  christos 			ISC_LIST_APPEND(qpdb->buckets[idx].lru, newheader,
   3198  1.3  christos 					link);
   3199  1.1  christos 		} else {
   3200  1.3  christos 			ISC_LIST_PREPEND(qpdb->buckets[idx].lru, newheader,
   3201  1.3  christos 					 link);
   3202  1.1  christos 		}
   3203  1.1  christos 
   3204  1.1  christos 		if (topheader != NULL) {
   3205  1.1  christos 			/*
   3206  1.1  christos 			 * We have a list of rdatasets of the given type,
   3207  1.1  christos 			 * but they're all marked IGNORE.  We simply insert
   3208  1.1  christos 			 * the new rdataset at the head of the list.
   3209  1.1  christos 			 *
   3210  1.1  christos 			 * Ignored rdatasets cannot occur during loading, so
   3211  1.1  christos 			 * we INSIST on it.
   3212  1.1  christos 			 */
   3213  1.1  christos 			INSIST(!loading);
   3214  1.1  christos 			if (topheader_prev != NULL) {
   3215  1.1  christos 				topheader_prev->next = newheader;
   3216  1.1  christos 			} else {
   3217  1.1  christos 				qpnode->data = newheader;
   3218  1.1  christos 			}
   3219  1.1  christos 			newheader->next = topheader->next;
   3220  1.1  christos 			newheader->down = topheader;
   3221  1.1  christos 			topheader->next = newheader;
   3222  1.1  christos 			qpnode->dirty = 1;
   3223  1.1  christos 		} else {
   3224  1.1  christos 			/*
   3225  1.1  christos 			 * No rdatasets of the given type exist at the node.
   3226  1.1  christos 			 */
   3227  1.1  christos 			INSIST(newheader->down == NULL);
   3228  1.1  christos 
   3229  1.1  christos 			if (prio_header(newheader)) {
   3230  1.1  christos 				/* This is a priority type, prepend it */
   3231  1.1  christos 				newheader->next = qpnode->data;
   3232  1.1  christos 				qpnode->data = newheader;
   3233  1.1  christos 			} else if (prioheader != NULL) {
   3234  1.1  christos 				/* Append after the priority headers */
   3235  1.1  christos 				newheader->next = prioheader->next;
   3236  1.1  christos 				prioheader->next = newheader;
   3237  1.1  christos 			} else {
   3238  1.1  christos 				/* There were no priority headers */
   3239  1.1  christos 				newheader->next = qpnode->data;
   3240  1.1  christos 				qpnode->data = newheader;
   3241  1.1  christos 			}
   3242  1.1  christos 
   3243  1.1  christos 			if (overmaxtype(qpdb, ntypes)) {
   3244  1.1  christos 				if (expireheader == NULL) {
   3245  1.1  christos 					expireheader = newheader;
   3246  1.1  christos 				}
   3247  1.1  christos 				if (NEGATIVE(newheader) &&
   3248  1.1  christos 				    !prio_header(newheader))
   3249  1.1  christos 				{
   3250  1.1  christos 					/*
   3251  1.1  christos 					 * Add the new non-priority negative
   3252  1.1  christos 					 * header to the database only
   3253  1.1  christos 					 * temporarily.
   3254  1.1  christos 					 */
   3255  1.1  christos 					expireheader = newheader;
   3256  1.1  christos 				}
   3257  1.1  christos 
   3258  1.1  christos 				mark_ancient(expireheader);
   3259  1.1  christos 				/*
   3260  1.1  christos 				 * FIXME: In theory, we should mark the RRSIG
   3261  1.1  christos 				 * and the header at the same time, but there is
   3262  1.1  christos 				 * no direct link between those two header, so
   3263  1.1  christos 				 * we would have to check the whole list again.
   3264  1.1  christos 				 */
   3265  1.1  christos 			}
   3266  1.1  christos 		}
   3267  1.1  christos 	}
   3268  1.1  christos 
   3269  1.1  christos 	if (addedrdataset != NULL) {
   3270  1.1  christos 		bindrdataset(qpdb, qpnode, newheader, now, nlocktype, tlocktype,
   3271  1.1  christos 			     addedrdataset DNS__DB_FLARG_PASS);
   3272  1.1  christos 	}
   3273  1.1  christos 
   3274  1.1  christos 	return ISC_R_SUCCESS;
   3275  1.1  christos }
   3276  1.1  christos 
   3277  1.1  christos static isc_result_t
   3278  1.1  christos addnoqname(isc_mem_t *mctx, dns_slabheader_t *newheader, uint32_t maxrrperset,
   3279  1.1  christos 	   dns_rdataset_t *rdataset) {
   3280  1.1  christos 	isc_result_t result;
   3281  1.1  christos 	dns_slabheader_proof_t *noqname = NULL;
   3282  1.1  christos 	dns_name_t name = DNS_NAME_INITEMPTY;
   3283  1.1  christos 	dns_rdataset_t neg = DNS_RDATASET_INIT, negsig = DNS_RDATASET_INIT;
   3284  1.1  christos 	isc_region_t r1, r2;
   3285  1.1  christos 
   3286  1.1  christos 	result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
   3287  1.1  christos 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
   3288  1.1  christos 
   3289  1.1  christos 	result = dns_rdataslab_fromrdataset(&neg, mctx, &r1, 0, maxrrperset);
   3290  1.1  christos 	if (result != ISC_R_SUCCESS) {
   3291  1.1  christos 		goto cleanup;
   3292  1.1  christos 	}
   3293  1.1  christos 
   3294  1.1  christos 	result = dns_rdataslab_fromrdataset(&negsig, mctx, &r2, 0, maxrrperset);
   3295  1.1  christos 	if (result != ISC_R_SUCCESS) {
   3296  1.1  christos 		goto cleanup;
   3297  1.1  christos 	}
   3298  1.1  christos 
   3299  1.1  christos 	noqname = isc_mem_get(mctx, sizeof(*noqname));
   3300  1.1  christos 	*noqname = (dns_slabheader_proof_t){
   3301  1.1  christos 		.neg = r1.base,
   3302  1.1  christos 		.negsig = r2.base,
   3303  1.1  christos 		.type = neg.type,
   3304  1.1  christos 		.name = DNS_NAME_INITEMPTY,
   3305  1.1  christos 	};
   3306  1.1  christos 	dns_name_dup(&name, mctx, &noqname->name);
   3307  1.1  christos 	newheader->noqname = noqname;
   3308  1.1  christos 
   3309  1.1  christos cleanup:
   3310  1.1  christos 	dns_rdataset_disassociate(&neg);
   3311  1.1  christos 	dns_rdataset_disassociate(&negsig);
   3312  1.1  christos 
   3313  1.1  christos 	return result;
   3314  1.1  christos }
   3315  1.1  christos 
   3316  1.1  christos static isc_result_t
   3317  1.1  christos addclosest(isc_mem_t *mctx, dns_slabheader_t *newheader, uint32_t maxrrperset,
   3318  1.1  christos 	   dns_rdataset_t *rdataset) {
   3319  1.1  christos 	isc_result_t result;
   3320  1.1  christos 	dns_slabheader_proof_t *closest = NULL;
   3321  1.1  christos 	dns_name_t name = DNS_NAME_INITEMPTY;
   3322  1.1  christos 	dns_rdataset_t neg = DNS_RDATASET_INIT, negsig = DNS_RDATASET_INIT;
   3323  1.1  christos 	isc_region_t r1, r2;
   3324  1.1  christos 
   3325  1.1  christos 	result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
   3326  1.1  christos 	RUNTIME_CHECK(result == ISC_R_SUCCESS);
   3327  1.1  christos 
   3328  1.1  christos 	result = dns_rdataslab_fromrdataset(&neg, mctx, &r1, 0, maxrrperset);
   3329  1.1  christos 	if (result != ISC_R_SUCCESS) {
   3330  1.1  christos 		goto cleanup;
   3331  1.1  christos 	}
   3332  1.1  christos 
   3333  1.1  christos 	result = dns_rdataslab_fromrdataset(&negsig, mctx, &r2, 0, maxrrperset);
   3334  1.1  christos 	if (result != ISC_R_SUCCESS) {
   3335  1.1  christos 		goto cleanup;
   3336  1.1  christos 	}
   3337  1.1  christos 
   3338  1.1  christos 	closest = isc_mem_get(mctx, sizeof(*closest));
   3339  1.1  christos 	*closest = (dns_slabheader_proof_t){
   3340  1.1  christos 		.neg = r1.base,
   3341  1.1  christos 		.negsig = r2.base,
   3342  1.1  christos 		.name = DNS_NAME_INITEMPTY,
   3343  1.1  christos 		.type = neg.type,
   3344  1.1  christos 	};
   3345  1.1  christos 	dns_name_dup(&name, mctx, &closest->name);
   3346  1.1  christos 	newheader->closest = closest;
   3347  1.1  christos 
   3348  1.1  christos cleanup:
   3349  1.1  christos 	dns_rdataset_disassociate(&neg);
   3350  1.1  christos 	dns_rdataset_disassociate(&negsig);
   3351  1.1  christos 	return result;
   3352  1.1  christos }
   3353  1.1  christos 
   3354  1.1  christos static void
   3355  1.1  christos expire_ttl_headers(qpcache_t *qpdb, unsigned int locknum,
   3356  1.1  christos 		   isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep,
   3357  1.1  christos 		   isc_stdtime_t now, bool cache_is_overmem DNS__DB_FLARG);
   3358  1.1  christos 
   3359  1.1  christos static isc_result_t
   3360  1.1  christos addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
   3361  1.1  christos 	    isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
   3362  1.1  christos 	    dns_rdataset_t *addedrdataset DNS__DB_FLARG) {
   3363  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   3364  1.1  christos 	qpcnode_t *qpnode = (qpcnode_t *)node;
   3365  1.1  christos 	isc_region_t region;
   3366  1.1  christos 	dns_slabheader_t *newheader = NULL;
   3367  1.1  christos 	isc_result_t result;
   3368  1.1  christos 	bool delegating = false;
   3369  1.1  christos 	bool newnsec;
   3370  1.1  christos 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
   3371  1.1  christos 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   3372  1.3  christos 	isc_rwlock_t *nlock = NULL;
   3373  1.1  christos 	bool cache_is_overmem = false;
   3374  1.1  christos 	dns_fixedname_t fixed;
   3375  1.1  christos 	dns_name_t *name = NULL;
   3376  1.1  christos 
   3377  1.1  christos 	REQUIRE(VALID_QPDB(qpdb));
   3378  1.1  christos 	REQUIRE(version == NULL);
   3379  1.1  christos 
   3380  1.1  christos 	if (now == 0) {
   3381  1.1  christos 		now = isc_stdtime_now();
   3382  1.1  christos 	}
   3383  1.1  christos 
   3384  1.1  christos 	result = dns_rdataslab_fromrdataset(rdataset, qpdb->common.mctx,
   3385  1.1  christos 					    &region, sizeof(dns_slabheader_t),
   3386  1.1  christos 					    qpdb->maxrrperset);
   3387  1.1  christos 	if (result != ISC_R_SUCCESS) {
   3388  1.1  christos 		if (result == DNS_R_TOOMANYRECORDS) {
   3389  1.1  christos 			dns__db_logtoomanyrecords((dns_db_t *)qpdb,
   3390  1.1  christos 						  &qpnode->name, rdataset->type,
   3391  1.1  christos 						  "adding", qpdb->maxrrperset);
   3392  1.1  christos 		}
   3393  1.1  christos 		return result;
   3394  1.1  christos 	}
   3395  1.1  christos 
   3396  1.1  christos 	name = dns_fixedname_initname(&fixed);
   3397  1.1  christos 	dns_name_copy(&qpnode->name, name);
   3398  1.1  christos 	dns_rdataset_getownercase(rdataset, name);
   3399  1.1  christos 
   3400  1.1  christos 	newheader = (dns_slabheader_t *)region.base;
   3401  1.1  christos 	*newheader = (dns_slabheader_t){
   3402  1.1  christos 		.type = DNS_TYPEPAIR_VALUE(rdataset->type, rdataset->covers),
   3403  1.1  christos 		.trust = rdataset->trust,
   3404  1.1  christos 		.last_used = now,
   3405  1.1  christos 		.node = qpnode,
   3406  1.1  christos 	};
   3407  1.1  christos 
   3408  1.1  christos 	dns_slabheader_reset(newheader, db, node);
   3409  1.1  christos 	setttl(newheader, rdataset->ttl + now);
   3410  1.1  christos 	if (rdataset->ttl == 0U) {
   3411  1.1  christos 		DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_ZEROTTL);
   3412  1.1  christos 	}
   3413  1.1  christos 	atomic_init(&newheader->count,
   3414  1.1  christos 		    atomic_fetch_add_relaxed(&init_count, 1));
   3415  1.1  christos 	if ((rdataset->attributes & DNS_RDATASETATTR_PREFETCH) != 0) {
   3416  1.1  christos 		DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_PREFETCH);
   3417  1.1  christos 	}
   3418  1.1  christos 	if ((rdataset->attributes & DNS_RDATASETATTR_NEGATIVE) != 0) {
   3419  1.1  christos 		DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_NEGATIVE);
   3420  1.1  christos 	}
   3421  1.1  christos 	if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0) {
   3422  1.1  christos 		DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_NXDOMAIN);
   3423  1.1  christos 	}
   3424  1.1  christos 	if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0) {
   3425  1.1  christos 		DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_OPTOUT);
   3426  1.1  christos 	}
   3427  1.1  christos 	if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
   3428  1.1  christos 		result = addnoqname(qpdb->common.mctx, newheader,
   3429  1.1  christos 				    qpdb->maxrrperset, rdataset);
   3430  1.1  christos 		if (result != ISC_R_SUCCESS) {
   3431  1.1  christos 			dns_slabheader_destroy(&newheader);
   3432  1.1  christos 			return result;
   3433  1.1  christos 		}
   3434  1.1  christos 	}
   3435  1.1  christos 	if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
   3436  1.1  christos 		result = addclosest(qpdb->common.mctx, newheader,
   3437  1.1  christos 				    qpdb->maxrrperset, rdataset);
   3438  1.1  christos 		if (result != ISC_R_SUCCESS) {
   3439  1.1  christos 			dns_slabheader_destroy(&newheader);
   3440  1.1  christos 			return result;
   3441  1.1  christos 		}
   3442  1.1  christos 	}
   3443  1.1  christos 
   3444  1.1  christos 	/*
   3445  1.1  christos 	 * If we're adding a delegation type (which would be an NS or DNAME
   3446  1.1  christos 	 * for a zone, but only DNAME counts for a cache), we need to set
   3447  1.1  christos 	 * the callback bit on the node.
   3448  1.1  christos 	 */
   3449  1.1  christos 	if (rdataset->type == dns_rdatatype_dname) {
   3450  1.1  christos 		delegating = true;
   3451  1.1  christos 	}
   3452  1.1  christos 
   3453  1.1  christos 	/*
   3454  1.1  christos 	 * Add to the auxiliary NSEC tree if we're adding an NSEC record.
   3455  1.1  christos 	 */
   3456  1.1  christos 	TREE_RDLOCK(&qpdb->tree_lock, &tlocktype);
   3457  1.1  christos 	if (qpnode->nsec != DNS_DB_NSEC_HAS_NSEC &&
   3458  1.1  christos 	    rdataset->type == dns_rdatatype_nsec)
   3459  1.1  christos 	{
   3460  1.1  christos 		newnsec = true;
   3461  1.1  christos 	} else {
   3462  1.1  christos 		newnsec = false;
   3463  1.1  christos 	}
   3464  1.1  christos 	TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
   3465  1.1  christos 
   3466  1.1  christos 	/*
   3467  1.1  christos 	 * If we're adding a delegation type, adding to the auxiliary NSEC
   3468  1.1  christos 	 * tree, or the DB is a cache in an overmem state, hold an
   3469  1.1  christos 	 * exclusive lock on the tree.  In the latter case the lock does
   3470  1.1  christos 	 * not necessarily have to be acquired but it will help purge
   3471  1.1  christos 	 * ancient entries more effectively.
   3472  1.1  christos 	 */
   3473  1.1  christos 	if (isc_mem_isovermem(qpdb->common.mctx)) {
   3474  1.1  christos 		cache_is_overmem = true;
   3475  1.1  christos 	}
   3476  1.1  christos 	if (delegating || newnsec || cache_is_overmem) {
   3477  1.1  christos 		TREE_WRLOCK(&qpdb->tree_lock, &tlocktype);
   3478  1.1  christos 	}
   3479  1.1  christos 
   3480  1.1  christos 	if (cache_is_overmem) {
   3481  1.1  christos 		overmem(qpdb, newheader, &tlocktype DNS__DB_FLARG_PASS);
   3482  1.1  christos 	}
   3483  1.1  christos 
   3484  1.3  christos 	nlock = &qpdb->buckets[qpnode->locknum].lock;
   3485  1.3  christos 
   3486  1.3  christos 	NODE_WRLOCK(nlock, &nlocktype);
   3487  1.1  christos 
   3488  1.1  christos 	if (qpdb->rrsetstats != NULL) {
   3489  1.1  christos 		DNS_SLABHEADER_SETATTR(newheader, DNS_SLABHEADERATTR_STATCOUNT);
   3490  1.1  christos 		update_rrsetstats(qpdb->rrsetstats, newheader->type,
   3491  1.1  christos 				  atomic_load_acquire(&newheader->attributes),
   3492  1.1  christos 				  true);
   3493  1.1  christos 	}
   3494  1.1  christos 
   3495  1.1  christos 	expire_ttl_headers(qpdb, qpnode->locknum, &nlocktype, &tlocktype, now,
   3496  1.1  christos 			   cache_is_overmem DNS__DB_FLARG_PASS);
   3497  1.1  christos 
   3498  1.1  christos 	/*
   3499  1.1  christos 	 * If we've been holding a write lock on the tree just for
   3500  1.1  christos 	 * cleaning, we can release it now.  However, we still need the
   3501  1.1  christos 	 * node lock.
   3502  1.1  christos 	 */
   3503  1.1  christos 	if (tlocktype == isc_rwlocktype_write && !delegating && !newnsec) {
   3504  1.1  christos 		TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
   3505  1.1  christos 	}
   3506  1.1  christos 
   3507  1.1  christos 	result = ISC_R_SUCCESS;
   3508  1.1  christos 	if (newnsec) {
   3509  1.1  christos 		qpcnode_t *nsecnode = NULL;
   3510  1.1  christos 
   3511  1.1  christos 		result = dns_qp_getname(qpdb->nsec, name, (void **)&nsecnode,
   3512  1.1  christos 					NULL);
   3513  1.1  christos 		if (result == ISC_R_SUCCESS) {
   3514  1.1  christos 			result = ISC_R_SUCCESS;
   3515  1.1  christos 		} else {
   3516  1.1  christos 			INSIST(nsecnode == NULL);
   3517  1.1  christos 			nsecnode = new_qpcnode(qpdb, name);
   3518  1.1  christos 			nsecnode->nsec = DNS_DB_NSEC_NSEC;
   3519  1.1  christos 			result = dns_qp_insert(qpdb->nsec, nsecnode, 0);
   3520  1.1  christos 			INSIST(result == ISC_R_SUCCESS);
   3521  1.1  christos 			qpcnode_detach(&nsecnode);
   3522  1.1  christos 		}
   3523  1.1  christos 		qpnode->nsec = DNS_DB_NSEC_HAS_NSEC;
   3524  1.1  christos 	}
   3525  1.1  christos 
   3526  1.1  christos 	if (result == ISC_R_SUCCESS) {
   3527  1.1  christos 		result = add(qpdb, qpnode, name, newheader, options, false,
   3528  1.1  christos 			     addedrdataset, now, nlocktype,
   3529  1.1  christos 			     tlocktype DNS__DB_FLARG_PASS);
   3530  1.1  christos 	}
   3531  1.1  christos 	if (result == ISC_R_SUCCESS && delegating) {
   3532  1.1  christos 		qpnode->delegating = 1;
   3533  1.1  christos 	}
   3534  1.1  christos 
   3535  1.3  christos 	NODE_UNLOCK(nlock, &nlocktype);
   3536  1.1  christos 
   3537  1.1  christos 	if (tlocktype != isc_rwlocktype_none) {
   3538  1.1  christos 		TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
   3539  1.1  christos 	}
   3540  1.1  christos 	INSIST(tlocktype == isc_rwlocktype_none);
   3541  1.1  christos 
   3542  1.1  christos 	return result;
   3543  1.1  christos }
   3544  1.1  christos 
   3545  1.1  christos static isc_result_t
   3546  1.1  christos deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
   3547  1.1  christos 	       dns_rdatatype_t type, dns_rdatatype_t covers DNS__DB_FLARG) {
   3548  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   3549  1.1  christos 	qpcnode_t *qpnode = (qpcnode_t *)node;
   3550  1.1  christos 	isc_result_t result;
   3551  1.1  christos 	dns_slabheader_t *newheader = NULL;
   3552  1.1  christos 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   3553  1.3  christos 	isc_rwlock_t *nlock = NULL;
   3554  1.1  christos 
   3555  1.1  christos 	REQUIRE(VALID_QPDB(qpdb));
   3556  1.1  christos 	REQUIRE(version == NULL);
   3557  1.1  christos 
   3558  1.1  christos 	if (type == dns_rdatatype_any) {
   3559  1.1  christos 		return ISC_R_NOTIMPLEMENTED;
   3560  1.1  christos 	}
   3561  1.1  christos 	if (type == dns_rdatatype_rrsig && covers == 0) {
   3562  1.1  christos 		return ISC_R_NOTIMPLEMENTED;
   3563  1.1  christos 	}
   3564  1.1  christos 
   3565  1.1  christos 	newheader = dns_slabheader_new(db, node);
   3566  1.1  christos 	newheader->type = DNS_TYPEPAIR_VALUE(type, covers);
   3567  1.1  christos 	setttl(newheader, 0);
   3568  1.1  christos 	atomic_init(&newheader->attributes, DNS_SLABHEADERATTR_NONEXISTENT);
   3569  1.1  christos 
   3570  1.3  christos 	nlock = &qpdb->buckets[qpnode->locknum].lock;
   3571  1.3  christos 	NODE_WRLOCK(nlock, &nlocktype);
   3572  1.1  christos 	result = add(qpdb, qpnode, NULL, newheader, DNS_DBADD_FORCE, false,
   3573  1.1  christos 		     NULL, 0, nlocktype,
   3574  1.1  christos 		     isc_rwlocktype_none DNS__DB_FLARG_PASS);
   3575  1.3  christos 	NODE_UNLOCK(nlock, &nlocktype);
   3576  1.1  christos 
   3577  1.1  christos 	return result;
   3578  1.1  christos }
   3579  1.1  christos 
   3580  1.1  christos static unsigned int
   3581  1.1  christos nodecount(dns_db_t *db, dns_dbtree_t tree) {
   3582  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   3583  1.1  christos 	dns_qp_memusage_t mu;
   3584  1.1  christos 	isc_rwlocktype_t tlocktype = isc_rwlocktype_none;
   3585  1.1  christos 
   3586  1.1  christos 	REQUIRE(VALID_QPDB(qpdb));
   3587  1.1  christos 
   3588  1.1  christos 	TREE_RDLOCK(&qpdb->tree_lock, &tlocktype);
   3589  1.1  christos 	switch (tree) {
   3590  1.1  christos 	case dns_dbtree_main:
   3591  1.1  christos 		mu = dns_qp_memusage(qpdb->tree);
   3592  1.1  christos 		break;
   3593  1.1  christos 	case dns_dbtree_nsec:
   3594  1.1  christos 		mu = dns_qp_memusage(qpdb->nsec);
   3595  1.1  christos 		break;
   3596  1.1  christos 	default:
   3597  1.1  christos 		UNREACHABLE();
   3598  1.1  christos 	}
   3599  1.1  christos 	TREE_UNLOCK(&qpdb->tree_lock, &tlocktype);
   3600  1.1  christos 
   3601  1.1  christos 	return mu.leaves;
   3602  1.1  christos }
   3603  1.1  christos 
   3604  1.1  christos static void
   3605  1.1  christos locknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type) {
   3606  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   3607  1.1  christos 	qpcnode_t *qpnode = (qpcnode_t *)node;
   3608  1.1  christos 
   3609  1.3  christos 	RWLOCK(&qpdb->buckets[qpnode->locknum].lock, type);
   3610  1.1  christos }
   3611  1.1  christos 
   3612  1.1  christos static void
   3613  1.1  christos unlocknode(dns_db_t *db, dns_dbnode_t *node, isc_rwlocktype_t type) {
   3614  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   3615  1.1  christos 	qpcnode_t *qpnode = (qpcnode_t *)node;
   3616  1.1  christos 
   3617  1.3  christos 	RWUNLOCK(&qpdb->buckets[qpnode->locknum].lock, type);
   3618  1.1  christos }
   3619  1.1  christos 
   3620  1.1  christos isc_result_t
   3621  1.1  christos dns__qpcache_create(isc_mem_t *mctx, const dns_name_t *origin,
   3622  1.1  christos 		    dns_dbtype_t type, dns_rdataclass_t rdclass,
   3623  1.1  christos 		    unsigned int argc, char *argv[],
   3624  1.1  christos 		    void *driverarg ISC_ATTR_UNUSED, dns_db_t **dbp) {
   3625  1.1  christos 	qpcache_t *qpdb = NULL;
   3626  1.1  christos 	isc_mem_t *hmctx = mctx;
   3627  1.1  christos 	isc_loop_t *loop = isc_loop();
   3628  1.1  christos 	int i;
   3629  1.3  christos 	isc_loopmgr_t *loopmgr = isc_loop_getloopmgr(loop);
   3630  1.3  christos 	size_t nloops = isc_loopmgr_nloops(loopmgr);
   3631  1.1  christos 
   3632  1.1  christos 	/* This database implementation only supports cache semantics */
   3633  1.1  christos 	REQUIRE(type == dns_dbtype_cache);
   3634  1.1  christos 	REQUIRE(loop != NULL);
   3635  1.1  christos 
   3636  1.3  christos 	qpdb = isc_mem_get(mctx,
   3637  1.3  christos 			   sizeof(*qpdb) + nloops * sizeof(qpdb->buckets[0]));
   3638  1.1  christos 	*qpdb = (qpcache_t){
   3639  1.1  christos 		.common.methods = &qpdb_cachemethods,
   3640  1.1  christos 		.common.origin = DNS_NAME_INITEMPTY,
   3641  1.1  christos 		.common.rdclass = rdclass,
   3642  1.1  christos 		.common.attributes = DNS_DBATTR_CACHE,
   3643  1.3  christos 		.common.references = 1,
   3644  1.1  christos 		.loopmgr = isc_loop_getloopmgr(loop),
   3645  1.3  christos 		.references = 1,
   3646  1.3  christos 		.buckets_count = nloops,
   3647  1.1  christos 	};
   3648  1.1  christos 
   3649  1.1  christos 	/*
   3650  1.1  christos 	 * If argv[0] exists, it points to a memory context to use for heap
   3651  1.1  christos 	 */
   3652  1.1  christos 	if (argc != 0) {
   3653  1.1  christos 		hmctx = (isc_mem_t *)argv[0];
   3654  1.1  christos 	}
   3655  1.1  christos 
   3656  1.1  christos 	isc_rwlock_init(&qpdb->lock);
   3657  1.1  christos 	TREE_INITLOCK(&qpdb->tree_lock);
   3658  1.1  christos 
   3659  1.3  christos 	qpdb->buckets_count = isc_loopmgr_nloops(qpdb->loopmgr);
   3660  1.1  christos 
   3661  1.1  christos 	dns_rdatasetstats_create(mctx, &qpdb->rrsetstats);
   3662  1.3  christos 	for (i = 0; i < (int)qpdb->buckets_count; i++) {
   3663  1.3  christos 		ISC_LIST_INIT(qpdb->buckets[i].lru);
   3664  1.1  christos 
   3665  1.3  christos 		qpdb->buckets[i].heap = NULL;
   3666  1.1  christos 		isc_heap_create(hmctx, ttl_sooner, set_index, 0,
   3667  1.3  christos 				&qpdb->buckets[i].heap);
   3668  1.1  christos 
   3669  1.3  christos 		isc_queue_init(&qpdb->buckets[i].deadnodes);
   3670  1.1  christos 
   3671  1.3  christos 		NODE_INITLOCK(&qpdb->buckets[i].lock);
   3672  1.1  christos 	}
   3673  1.1  christos 
   3674  1.1  christos 	/*
   3675  1.1  christos 	 * Attach to the mctx.  The database will persist so long as there
   3676  1.1  christos 	 * are references to it, and attaching to the mctx ensures that our
   3677  1.1  christos 	 * mctx won't disappear out from under us.
   3678  1.1  christos 	 */
   3679  1.1  christos 	isc_mem_attach(mctx, &qpdb->common.mctx);
   3680  1.1  christos 	isc_mem_attach(hmctx, &qpdb->hmctx);
   3681  1.1  christos 
   3682  1.1  christos 	/*
   3683  1.1  christos 	 * Make a copy of the origin name.
   3684  1.1  christos 	 */
   3685  1.1  christos 	dns_name_dupwithoffsets(origin, mctx, &qpdb->common.origin);
   3686  1.1  christos 
   3687  1.1  christos 	/*
   3688  1.1  christos 	 * Make the qp tries.
   3689  1.1  christos 	 */
   3690  1.1  christos 	dns_qp_create(mctx, &qpmethods, qpdb, &qpdb->tree);
   3691  1.1  christos 	dns_qp_create(mctx, &qpmethods, qpdb, &qpdb->nsec);
   3692  1.1  christos 
   3693  1.1  christos 	qpdb->common.magic = DNS_DB_MAGIC;
   3694  1.1  christos 	qpdb->common.impmagic = QPDB_MAGIC;
   3695  1.1  christos 
   3696  1.1  christos 	*dbp = (dns_db_t *)qpdb;
   3697  1.1  christos 
   3698  1.1  christos 	return ISC_R_SUCCESS;
   3699  1.1  christos }
   3700  1.1  christos 
   3701  1.1  christos /*
   3702  1.1  christos  * Rdataset Iterator Methods
   3703  1.1  christos  */
   3704  1.1  christos 
   3705  1.1  christos static void
   3706  1.1  christos rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp DNS__DB_FLARG) {
   3707  1.1  christos 	qpc_rditer_t *iterator = NULL;
   3708  1.1  christos 
   3709  1.1  christos 	iterator = (qpc_rditer_t *)(*iteratorp);
   3710  1.1  christos 
   3711  1.1  christos 	dns__db_detachnode(iterator->common.db,
   3712  1.1  christos 			   &iterator->common.node DNS__DB_FLARG_PASS);
   3713  1.1  christos 	isc_mem_put(iterator->common.db->mctx, iterator, sizeof(*iterator));
   3714  1.1  christos 
   3715  1.1  christos 	*iteratorp = NULL;
   3716  1.1  christos }
   3717  1.1  christos 
   3718  1.1  christos static bool
   3719  1.1  christos iterator_active(qpcache_t *qpdb, qpc_rditer_t *iterator,
   3720  1.1  christos 		dns_slabheader_t *header) {
   3721  1.1  christos 	dns_ttl_t stale_ttl = header->ttl + STALE_TTL(header, qpdb);
   3722  1.1  christos 
   3723  1.1  christos 	/*
   3724  1.1  christos 	 * Is this a "this rdataset doesn't exist" record?
   3725  1.1  christos 	 */
   3726  1.1  christos 	if (NONEXISTENT(header)) {
   3727  1.1  christos 		return false;
   3728  1.1  christos 	}
   3729  1.1  christos 
   3730  1.1  christos 	/*
   3731  1.1  christos 	 * If this header is still active then return it.
   3732  1.1  christos 	 */
   3733  1.1  christos 	if (ACTIVE(header, iterator->common.now)) {
   3734  1.1  christos 		return true;
   3735  1.1  christos 	}
   3736  1.1  christos 
   3737  1.1  christos 	/*
   3738  1.1  christos 	 * If we are not returning stale records or the rdataset is
   3739  1.1  christos 	 * too old don't return it.
   3740  1.1  christos 	 */
   3741  1.1  christos 	if (!STALEOK(iterator) || (iterator->common.now > stale_ttl)) {
   3742  1.1  christos 		return false;
   3743  1.1  christos 	}
   3744  1.1  christos 	return true;
   3745  1.1  christos }
   3746  1.1  christos 
   3747  1.1  christos static isc_result_t
   3748  1.1  christos rdatasetiter_first(dns_rdatasetiter_t *it DNS__DB_FLARG) {
   3749  1.1  christos 	qpc_rditer_t *iterator = (qpc_rditer_t *)it;
   3750  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)(iterator->common.db);
   3751  1.1  christos 	qpcnode_t *qpnode = iterator->common.node;
   3752  1.1  christos 	dns_slabheader_t *header = NULL, *top_next = NULL;
   3753  1.1  christos 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   3754  1.3  christos 	isc_rwlock_t *nlock = &qpdb->buckets[qpnode->locknum].lock;
   3755  1.1  christos 
   3756  1.3  christos 	NODE_RDLOCK(nlock, &nlocktype);
   3757  1.1  christos 
   3758  1.1  christos 	for (header = qpnode->data; header != NULL; header = top_next) {
   3759  1.1  christos 		top_next = header->next;
   3760  1.1  christos 		do {
   3761  1.1  christos 			if (EXPIREDOK(iterator)) {
   3762  1.1  christos 				if (!NONEXISTENT(header)) {
   3763  1.1  christos 					break;
   3764  1.1  christos 				}
   3765  1.1  christos 				header = header->down;
   3766  1.1  christos 			} else if (!IGNORE(header)) {
   3767  1.1  christos 				if (!iterator_active(qpdb, iterator, header)) {
   3768  1.1  christos 					header = NULL;
   3769  1.1  christos 				}
   3770  1.1  christos 				break;
   3771  1.1  christos 			} else {
   3772  1.1  christos 				header = header->down;
   3773  1.1  christos 			}
   3774  1.1  christos 		} while (header != NULL);
   3775  1.1  christos 		if (header != NULL) {
   3776  1.1  christos 			break;
   3777  1.1  christos 		}
   3778  1.1  christos 	}
   3779  1.1  christos 
   3780  1.3  christos 	NODE_UNLOCK(nlock, &nlocktype);
   3781  1.1  christos 
   3782  1.1  christos 	iterator->current = header;
   3783  1.1  christos 
   3784  1.1  christos 	if (header == NULL) {
   3785  1.1  christos 		return ISC_R_NOMORE;
   3786  1.1  christos 	}
   3787  1.1  christos 
   3788  1.1  christos 	return ISC_R_SUCCESS;
   3789  1.1  christos }
   3790  1.1  christos 
   3791  1.1  christos static isc_result_t
   3792  1.1  christos rdatasetiter_next(dns_rdatasetiter_t *it DNS__DB_FLARG) {
   3793  1.1  christos 	qpc_rditer_t *iterator = (qpc_rditer_t *)it;
   3794  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)(iterator->common.db);
   3795  1.1  christos 	qpcnode_t *qpnode = iterator->common.node;
   3796  1.1  christos 	dns_slabheader_t *header = NULL, *top_next = NULL;
   3797  1.1  christos 	dns_typepair_t type, negtype;
   3798  1.1  christos 	dns_rdatatype_t rdtype, covers;
   3799  1.1  christos 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   3800  1.3  christos 	isc_rwlock_t *nlock = &qpdb->buckets[qpnode->locknum].lock;
   3801  1.1  christos 	bool expiredok = EXPIREDOK(iterator);
   3802  1.1  christos 
   3803  1.1  christos 	header = iterator->current;
   3804  1.1  christos 	if (header == NULL) {
   3805  1.1  christos 		return ISC_R_NOMORE;
   3806  1.1  christos 	}
   3807  1.1  christos 
   3808  1.3  christos 	NODE_RDLOCK(nlock, &nlocktype);
   3809  1.1  christos 
   3810  1.1  christos 	type = header->type;
   3811  1.1  christos 	rdtype = DNS_TYPEPAIR_TYPE(header->type);
   3812  1.1  christos 	if (NEGATIVE(header)) {
   3813  1.1  christos 		covers = DNS_TYPEPAIR_COVERS(header->type);
   3814  1.1  christos 		negtype = DNS_TYPEPAIR_VALUE(covers, 0);
   3815  1.1  christos 	} else {
   3816  1.1  christos 		negtype = DNS_TYPEPAIR_VALUE(0, rdtype);
   3817  1.1  christos 	}
   3818  1.1  christos 
   3819  1.1  christos 	/*
   3820  1.1  christos 	 * Find the start of the header chain for the next type
   3821  1.1  christos 	 * by walking back up the list.
   3822  1.1  christos 	 */
   3823  1.1  christos 	top_next = header->next;
   3824  1.1  christos 	while (top_next != NULL &&
   3825  1.1  christos 	       (top_next->type == type || top_next->type == negtype))
   3826  1.1  christos 	{
   3827  1.1  christos 		top_next = top_next->next;
   3828  1.1  christos 	}
   3829  1.1  christos 	if (expiredok) {
   3830  1.1  christos 		/*
   3831  1.1  christos 		 * Keep walking down the list if possible or
   3832  1.1  christos 		 * start the next type.
   3833  1.1  christos 		 */
   3834  1.1  christos 		header = header->down != NULL ? header->down : top_next;
   3835  1.1  christos 	} else {
   3836  1.1  christos 		header = top_next;
   3837  1.1  christos 	}
   3838  1.1  christos 	for (; header != NULL; header = top_next) {
   3839  1.1  christos 		top_next = header->next;
   3840  1.1  christos 		do {
   3841  1.1  christos 			if (expiredok) {
   3842  1.1  christos 				if (!NONEXISTENT(header)) {
   3843  1.1  christos 					break;
   3844  1.1  christos 				}
   3845  1.1  christos 				header = header->down;
   3846  1.1  christos 			} else if (!IGNORE(header)) {
   3847  1.1  christos 				if (!iterator_active(qpdb, iterator, header)) {
   3848  1.1  christos 					header = NULL;
   3849  1.1  christos 				}
   3850  1.1  christos 				break;
   3851  1.1  christos 			} else {
   3852  1.1  christos 				header = header->down;
   3853  1.1  christos 			}
   3854  1.1  christos 		} while (header != NULL);
   3855  1.1  christos 		if (header != NULL) {
   3856  1.1  christos 			break;
   3857  1.1  christos 		}
   3858  1.1  christos 		/*
   3859  1.1  christos 		 * Find the start of the header chain for the next type
   3860  1.1  christos 		 * by walking back up the list.
   3861  1.1  christos 		 */
   3862  1.1  christos 		while (top_next != NULL &&
   3863  1.1  christos 		       (top_next->type == type || top_next->type == negtype))
   3864  1.1  christos 		{
   3865  1.1  christos 			top_next = top_next->next;
   3866  1.1  christos 		}
   3867  1.1  christos 	}
   3868  1.1  christos 
   3869  1.3  christos 	NODE_UNLOCK(nlock, &nlocktype);
   3870  1.1  christos 
   3871  1.1  christos 	iterator->current = header;
   3872  1.1  christos 
   3873  1.1  christos 	if (header == NULL) {
   3874  1.1  christos 		return ISC_R_NOMORE;
   3875  1.1  christos 	}
   3876  1.1  christos 
   3877  1.1  christos 	return ISC_R_SUCCESS;
   3878  1.1  christos }
   3879  1.1  christos 
   3880  1.1  christos static void
   3881  1.1  christos rdatasetiter_current(dns_rdatasetiter_t *it,
   3882  1.1  christos 		     dns_rdataset_t *rdataset DNS__DB_FLARG) {
   3883  1.1  christos 	qpc_rditer_t *iterator = (qpc_rditer_t *)it;
   3884  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)(iterator->common.db);
   3885  1.1  christos 	qpcnode_t *qpnode = iterator->common.node;
   3886  1.1  christos 	dns_slabheader_t *header = NULL;
   3887  1.1  christos 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   3888  1.3  christos 	isc_rwlock_t *nlock = &qpdb->buckets[qpnode->locknum].lock;
   3889  1.1  christos 
   3890  1.1  christos 	header = iterator->current;
   3891  1.1  christos 	REQUIRE(header != NULL);
   3892  1.1  christos 
   3893  1.3  christos 	NODE_RDLOCK(nlock, &nlocktype);
   3894  1.1  christos 
   3895  1.1  christos 	bindrdataset(qpdb, qpnode, header, iterator->common.now, nlocktype,
   3896  1.1  christos 		     isc_rwlocktype_none, rdataset DNS__DB_FLARG_PASS);
   3897  1.1  christos 
   3898  1.3  christos 	NODE_UNLOCK(nlock, &nlocktype);
   3899  1.1  christos }
   3900  1.1  christos 
   3901  1.1  christos /*
   3902  1.1  christos  * Database Iterator Methods
   3903  1.1  christos  */
   3904  1.1  christos 
   3905  1.1  christos static void
   3906  1.1  christos reference_iter_node(qpc_dbit_t *qpdbiter DNS__DB_FLARG) {
   3907  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)qpdbiter->common.db;
   3908  1.1  christos 	qpcnode_t *node = qpdbiter->node;
   3909  1.1  christos 
   3910  1.1  christos 	if (node == NULL) {
   3911  1.1  christos 		return;
   3912  1.1  christos 	}
   3913  1.1  christos 
   3914  1.1  christos 	INSIST(qpdbiter->tree_locked != isc_rwlocktype_none);
   3915  1.1  christos 	reactivate_node(qpdb, node, qpdbiter->tree_locked DNS__DB_FLARG_PASS);
   3916  1.1  christos }
   3917  1.1  christos 
   3918  1.1  christos static void
   3919  1.1  christos dereference_iter_node(qpc_dbit_t *qpdbiter DNS__DB_FLARG) {
   3920  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)qpdbiter->common.db;
   3921  1.1  christos 	qpcnode_t *node = qpdbiter->node;
   3922  1.3  christos 	isc_rwlock_t *nlock = NULL;
   3923  1.1  christos 	isc_rwlocktype_t nlocktype = isc_rwlocktype_none;
   3924  1.1  christos 	isc_rwlocktype_t tlocktype = qpdbiter->tree_locked;
   3925  1.1  christos 
   3926  1.1  christos 	if (node == NULL) {
   3927  1.1  christos 		return;
   3928  1.1  christos 	}
   3929  1.1  christos 
   3930  1.1  christos 	REQUIRE(tlocktype != isc_rwlocktype_write);
   3931  1.1  christos 
   3932  1.3  christos 	nlock = &qpdb->buckets[node->locknum].lock;
   3933  1.3  christos 	NODE_RDLOCK(nlock, &nlocktype);
   3934  1.3  christos 	qpcnode_release(qpdb, node, &nlocktype, &qpdbiter->tree_locked,
   3935  1.3  christos 			false DNS__DB_FLARG_PASS);
   3936  1.3  christos 	NODE_UNLOCK(nlock, &nlocktype);
   3937  1.1  christos 
   3938  1.1  christos 	INSIST(qpdbiter->tree_locked == tlocktype);
   3939  1.1  christos 
   3940  1.1  christos 	qpdbiter->node = NULL;
   3941  1.1  christos }
   3942  1.1  christos 
   3943  1.1  christos static void
   3944  1.1  christos resume_iteration(qpc_dbit_t *qpdbiter, bool continuing) {
   3945  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)qpdbiter->common.db;
   3946  1.1  christos 
   3947  1.1  christos 	REQUIRE(qpdbiter->paused);
   3948  1.1  christos 	REQUIRE(qpdbiter->tree_locked == isc_rwlocktype_none);
   3949  1.1  christos 
   3950  1.1  christos 	TREE_RDLOCK(&qpdb->tree_lock, &qpdbiter->tree_locked);
   3951  1.1  christos 
   3952  1.1  christos 	/*
   3953  1.1  christos 	 * If we're being called from dbiterator_next or _prev,
   3954  1.1  christos 	 * then we may need to reinitialize the iterator to the current
   3955  1.1  christos 	 * name. The tree could have changed while it was unlocked,
   3956  1.1  christos 	 * would make the iterator traversal inconsistent.
   3957  1.1  christos 	 *
   3958  1.1  christos 	 * As long as the iterator is holding a reference to
   3959  1.1  christos 	 * qpdbiter->node, the node won't be removed from the tree,
   3960  1.1  christos 	 * so the lookup should always succeed.
   3961  1.1  christos 	 */
   3962  1.1  christos 	if (continuing && qpdbiter->node != NULL) {
   3963  1.1  christos 		isc_result_t result;
   3964  1.1  christos 		result = dns_qp_lookup(qpdb->tree, qpdbiter->name, NULL,
   3965  1.1  christos 				       &qpdbiter->iter, NULL, NULL, NULL);
   3966  1.1  christos 		INSIST(result == ISC_R_SUCCESS);
   3967  1.1  christos 	}
   3968  1.1  christos 
   3969  1.1  christos 	qpdbiter->paused = false;
   3970  1.1  christos }
   3971  1.1  christos 
   3972  1.1  christos static void
   3973  1.1  christos dbiterator_destroy(dns_dbiterator_t **iteratorp DNS__DB_FLARG) {
   3974  1.1  christos 	qpc_dbit_t *qpdbiter = (qpc_dbit_t *)(*iteratorp);
   3975  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)qpdbiter->common.db;
   3976  1.1  christos 	dns_db_t *db = NULL;
   3977  1.1  christos 
   3978  1.1  christos 	if (qpdbiter->tree_locked == isc_rwlocktype_read) {
   3979  1.1  christos 		TREE_UNLOCK(&qpdb->tree_lock, &qpdbiter->tree_locked);
   3980  1.1  christos 	}
   3981  1.1  christos 	INSIST(qpdbiter->tree_locked == isc_rwlocktype_none);
   3982  1.1  christos 
   3983  1.1  christos 	dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
   3984  1.1  christos 
   3985  1.1  christos 	dns_db_attach(qpdbiter->common.db, &db);
   3986  1.1  christos 	dns_db_detach(&qpdbiter->common.db);
   3987  1.1  christos 
   3988  1.1  christos 	isc_mem_put(db->mctx, qpdbiter, sizeof(*qpdbiter));
   3989  1.1  christos 	dns_db_detach(&db);
   3990  1.1  christos 
   3991  1.1  christos 	*iteratorp = NULL;
   3992  1.1  christos }
   3993  1.1  christos 
   3994  1.1  christos static isc_result_t
   3995  1.1  christos dbiterator_first(dns_dbiterator_t *iterator DNS__DB_FLARG) {
   3996  1.1  christos 	isc_result_t result;
   3997  1.1  christos 	qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator;
   3998  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)iterator->db;
   3999  1.1  christos 
   4000  1.1  christos 	if (qpdbiter->result != ISC_R_SUCCESS &&
   4001  1.1  christos 	    qpdbiter->result != ISC_R_NOTFOUND &&
   4002  1.1  christos 	    qpdbiter->result != DNS_R_PARTIALMATCH &&
   4003  1.1  christos 	    qpdbiter->result != ISC_R_NOMORE)
   4004  1.1  christos 	{
   4005  1.1  christos 		return qpdbiter->result;
   4006  1.1  christos 	}
   4007  1.1  christos 
   4008  1.1  christos 	if (qpdbiter->paused) {
   4009  1.1  christos 		resume_iteration(qpdbiter, false);
   4010  1.1  christos 	}
   4011  1.1  christos 
   4012  1.1  christos 	dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
   4013  1.1  christos 
   4014  1.1  christos 	dns_qpiter_init(qpdb->tree, &qpdbiter->iter);
   4015  1.1  christos 	result = dns_qpiter_next(&qpdbiter->iter, NULL,
   4016  1.1  christos 				 (void **)&qpdbiter->node, NULL);
   4017  1.1  christos 
   4018  1.1  christos 	if (result == ISC_R_SUCCESS) {
   4019  1.1  christos 		dns_name_copy(&qpdbiter->node->name, qpdbiter->name);
   4020  1.1  christos 		reference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
   4021  1.1  christos 	} else {
   4022  1.1  christos 		INSIST(result == ISC_R_NOMORE); /* The tree is empty. */
   4023  1.1  christos 		qpdbiter->node = NULL;
   4024  1.1  christos 	}
   4025  1.1  christos 
   4026  1.1  christos 	qpdbiter->result = result;
   4027  1.1  christos 
   4028  1.1  christos 	if (result != ISC_R_SUCCESS) {
   4029  1.1  christos 		ENSURE(!qpdbiter->paused);
   4030  1.1  christos 	}
   4031  1.1  christos 
   4032  1.1  christos 	return result;
   4033  1.1  christos }
   4034  1.1  christos 
   4035  1.1  christos static isc_result_t
   4036  1.1  christos dbiterator_last(dns_dbiterator_t *iterator DNS__DB_FLARG) {
   4037  1.1  christos 	isc_result_t result;
   4038  1.1  christos 	qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator;
   4039  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)iterator->db;
   4040  1.1  christos 
   4041  1.1  christos 	if (qpdbiter->result != ISC_R_SUCCESS &&
   4042  1.1  christos 	    qpdbiter->result != ISC_R_NOTFOUND &&
   4043  1.1  christos 	    qpdbiter->result != DNS_R_PARTIALMATCH &&
   4044  1.1  christos 	    qpdbiter->result != ISC_R_NOMORE)
   4045  1.1  christos 	{
   4046  1.1  christos 		return qpdbiter->result;
   4047  1.1  christos 	}
   4048  1.1  christos 
   4049  1.1  christos 	if (qpdbiter->paused) {
   4050  1.1  christos 		resume_iteration(qpdbiter, false);
   4051  1.1  christos 	}
   4052  1.1  christos 
   4053  1.1  christos 	dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
   4054  1.1  christos 
   4055  1.1  christos 	dns_qpiter_init(qpdb->tree, &qpdbiter->iter);
   4056  1.1  christos 	result = dns_qpiter_prev(&qpdbiter->iter, NULL,
   4057  1.1  christos 				 (void **)&qpdbiter->node, NULL);
   4058  1.1  christos 
   4059  1.1  christos 	if (result == ISC_R_SUCCESS) {
   4060  1.1  christos 		dns_name_copy(&qpdbiter->node->name, qpdbiter->name);
   4061  1.1  christos 		reference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
   4062  1.1  christos 	} else {
   4063  1.1  christos 		INSIST(result == ISC_R_NOMORE); /* The tree is empty. */
   4064  1.1  christos 		qpdbiter->node = NULL;
   4065  1.1  christos 	}
   4066  1.1  christos 
   4067  1.1  christos 	qpdbiter->result = result;
   4068  1.1  christos 	return result;
   4069  1.1  christos }
   4070  1.1  christos 
   4071  1.1  christos static isc_result_t
   4072  1.1  christos dbiterator_seek(dns_dbiterator_t *iterator,
   4073  1.1  christos 		const dns_name_t *name DNS__DB_FLARG) {
   4074  1.1  christos 	isc_result_t result;
   4075  1.1  christos 	qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator;
   4076  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)iterator->db;
   4077  1.1  christos 
   4078  1.1  christos 	if (qpdbiter->result != ISC_R_SUCCESS &&
   4079  1.1  christos 	    qpdbiter->result != ISC_R_NOTFOUND &&
   4080  1.1  christos 	    qpdbiter->result != DNS_R_PARTIALMATCH &&
   4081  1.1  christos 	    qpdbiter->result != ISC_R_NOMORE)
   4082  1.1  christos 	{
   4083  1.1  christos 		return qpdbiter->result;
   4084  1.1  christos 	}
   4085  1.1  christos 
   4086  1.1  christos 	if (qpdbiter->paused) {
   4087  1.1  christos 		resume_iteration(qpdbiter, false);
   4088  1.1  christos 	}
   4089  1.1  christos 
   4090  1.1  christos 	dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
   4091  1.1  christos 
   4092  1.1  christos 	result = dns_qp_lookup(qpdb->tree, name, NULL, &qpdbiter->iter, NULL,
   4093  1.1  christos 			       (void **)&qpdbiter->node, NULL);
   4094  1.1  christos 
   4095  1.1  christos 	if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
   4096  1.1  christos 		dns_name_copy(&qpdbiter->node->name, qpdbiter->name);
   4097  1.1  christos 		reference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
   4098  1.1  christos 	} else {
   4099  1.1  christos 		qpdbiter->node = NULL;
   4100  1.1  christos 	}
   4101  1.1  christos 
   4102  1.1  christos 	qpdbiter->result = (result == DNS_R_PARTIALMATCH) ? ISC_R_SUCCESS
   4103  1.1  christos 							  : result;
   4104  1.1  christos 	return result;
   4105  1.1  christos }
   4106  1.1  christos 
   4107  1.1  christos static isc_result_t
   4108  1.5  christos dbiterator_seek3(dns_dbiterator_t *iterator ISC_ATTR_UNUSED,
   4109  1.5  christos 		 const dns_name_t *name ISC_ATTR_UNUSED DNS__DB_FLARG) {
   4110  1.5  christos 	return ISC_R_NOTIMPLEMENTED;
   4111  1.5  christos }
   4112  1.5  christos 
   4113  1.5  christos static isc_result_t
   4114  1.1  christos dbiterator_prev(dns_dbiterator_t *iterator DNS__DB_FLARG) {
   4115  1.1  christos 	isc_result_t result;
   4116  1.1  christos 	qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator;
   4117  1.1  christos 
   4118  1.1  christos 	REQUIRE(qpdbiter->node != NULL);
   4119  1.1  christos 
   4120  1.1  christos 	if (qpdbiter->result != ISC_R_SUCCESS) {
   4121  1.1  christos 		return qpdbiter->result;
   4122  1.1  christos 	}
   4123  1.1  christos 
   4124  1.1  christos 	if (qpdbiter->paused) {
   4125  1.1  christos 		resume_iteration(qpdbiter, true);
   4126  1.1  christos 	}
   4127  1.1  christos 
   4128  1.1  christos 	dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
   4129  1.1  christos 
   4130  1.1  christos 	result = dns_qpiter_prev(&qpdbiter->iter, NULL,
   4131  1.1  christos 				 (void **)&qpdbiter->node, NULL);
   4132  1.1  christos 
   4133  1.1  christos 	if (result == ISC_R_SUCCESS) {
   4134  1.1  christos 		dns_name_copy(&qpdbiter->node->name, qpdbiter->name);
   4135  1.1  christos 		reference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
   4136  1.1  christos 	} else {
   4137  1.1  christos 		INSIST(result == ISC_R_NOMORE);
   4138  1.1  christos 		qpdbiter->node = NULL;
   4139  1.1  christos 	}
   4140  1.1  christos 
   4141  1.1  christos 	qpdbiter->result = result;
   4142  1.1  christos 	return result;
   4143  1.1  christos }
   4144  1.1  christos 
   4145  1.1  christos static isc_result_t
   4146  1.1  christos dbiterator_next(dns_dbiterator_t *iterator DNS__DB_FLARG) {
   4147  1.1  christos 	isc_result_t result;
   4148  1.1  christos 	qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator;
   4149  1.1  christos 
   4150  1.1  christos 	REQUIRE(qpdbiter->node != NULL);
   4151  1.1  christos 
   4152  1.1  christos 	if (qpdbiter->result != ISC_R_SUCCESS) {
   4153  1.1  christos 		return qpdbiter->result;
   4154  1.1  christos 	}
   4155  1.1  christos 
   4156  1.1  christos 	if (qpdbiter->paused) {
   4157  1.1  christos 		resume_iteration(qpdbiter, true);
   4158  1.1  christos 	}
   4159  1.1  christos 
   4160  1.1  christos 	dereference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
   4161  1.1  christos 
   4162  1.1  christos 	result = dns_qpiter_next(&qpdbiter->iter, NULL,
   4163  1.1  christos 				 (void **)&qpdbiter->node, NULL);
   4164  1.1  christos 
   4165  1.1  christos 	if (result == ISC_R_SUCCESS) {
   4166  1.1  christos 		dns_name_copy(&qpdbiter->node->name, qpdbiter->name);
   4167  1.1  christos 		reference_iter_node(qpdbiter DNS__DB_FLARG_PASS);
   4168  1.1  christos 	} else {
   4169  1.1  christos 		INSIST(result == ISC_R_NOMORE);
   4170  1.1  christos 		qpdbiter->node = NULL;
   4171  1.1  christos 	}
   4172  1.1  christos 
   4173  1.1  christos 	qpdbiter->result = result;
   4174  1.1  christos 	return result;
   4175  1.1  christos }
   4176  1.1  christos 
   4177  1.1  christos static isc_result_t
   4178  1.1  christos dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
   4179  1.1  christos 		   dns_name_t *name DNS__DB_FLARG) {
   4180  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)iterator->db;
   4181  1.1  christos 	qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator;
   4182  1.1  christos 	qpcnode_t *node = qpdbiter->node;
   4183  1.1  christos 
   4184  1.1  christos 	REQUIRE(qpdbiter->result == ISC_R_SUCCESS);
   4185  1.1  christos 	REQUIRE(node != NULL);
   4186  1.1  christos 
   4187  1.1  christos 	if (qpdbiter->paused) {
   4188  1.1  christos 		resume_iteration(qpdbiter, false);
   4189  1.1  christos 	}
   4190  1.1  christos 
   4191  1.1  christos 	if (name != NULL) {
   4192  1.1  christos 		dns_name_copy(&node->name, name);
   4193  1.1  christos 	}
   4194  1.1  christos 
   4195  1.3  christos 	qpcnode_acquire(qpdb, node, isc_rwlocktype_none,
   4196  1.3  christos 			qpdbiter->tree_locked DNS__DB_FLARG_PASS);
   4197  1.1  christos 
   4198  1.1  christos 	*nodep = qpdbiter->node;
   4199  1.1  christos 	return ISC_R_SUCCESS;
   4200  1.1  christos }
   4201  1.1  christos 
   4202  1.1  christos static isc_result_t
   4203  1.1  christos dbiterator_pause(dns_dbiterator_t *iterator) {
   4204  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)iterator->db;
   4205  1.1  christos 	qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator;
   4206  1.1  christos 
   4207  1.1  christos 	if (qpdbiter->result != ISC_R_SUCCESS &&
   4208  1.1  christos 	    qpdbiter->result != ISC_R_NOTFOUND &&
   4209  1.1  christos 	    qpdbiter->result != DNS_R_PARTIALMATCH &&
   4210  1.1  christos 	    qpdbiter->result != ISC_R_NOMORE)
   4211  1.1  christos 	{
   4212  1.1  christos 		return qpdbiter->result;
   4213  1.1  christos 	}
   4214  1.1  christos 
   4215  1.1  christos 	if (qpdbiter->paused) {
   4216  1.1  christos 		return ISC_R_SUCCESS;
   4217  1.1  christos 	}
   4218  1.1  christos 
   4219  1.1  christos 	qpdbiter->paused = true;
   4220  1.1  christos 
   4221  1.1  christos 	if (qpdbiter->tree_locked == isc_rwlocktype_read) {
   4222  1.1  christos 		TREE_UNLOCK(&qpdb->tree_lock, &qpdbiter->tree_locked);
   4223  1.1  christos 	}
   4224  1.1  christos 	INSIST(qpdbiter->tree_locked == isc_rwlocktype_none);
   4225  1.1  christos 
   4226  1.1  christos 	return ISC_R_SUCCESS;
   4227  1.1  christos }
   4228  1.1  christos 
   4229  1.1  christos static isc_result_t
   4230  1.1  christos dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
   4231  1.1  christos 	qpc_dbit_t *qpdbiter = (qpc_dbit_t *)iterator;
   4232  1.1  christos 
   4233  1.1  christos 	if (qpdbiter->result != ISC_R_SUCCESS) {
   4234  1.1  christos 		return qpdbiter->result;
   4235  1.1  christos 	}
   4236  1.1  christos 
   4237  1.1  christos 	dns_name_copy(dns_rootname, name);
   4238  1.1  christos 	return ISC_R_SUCCESS;
   4239  1.1  christos }
   4240  1.1  christos 
   4241  1.1  christos static void
   4242  1.1  christos deletedata(dns_db_t *db ISC_ATTR_UNUSED, dns_dbnode_t *node ISC_ATTR_UNUSED,
   4243  1.1  christos 	   void *data) {
   4244  1.1  christos 	dns_slabheader_t *header = data;
   4245  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)header->db;
   4246  1.1  christos 
   4247  1.1  christos 	if (header->heap != NULL && header->heap_index != 0) {
   4248  1.1  christos 		isc_heap_delete(header->heap, header->heap_index);
   4249  1.1  christos 	}
   4250  1.1  christos 
   4251  1.1  christos 	update_rrsetstats(qpdb->rrsetstats, header->type,
   4252  1.1  christos 			  atomic_load_acquire(&header->attributes), false);
   4253  1.1  christos 
   4254  1.1  christos 	if (ISC_LINK_LINKED(header, link)) {
   4255  1.1  christos 		int idx = HEADERNODE(header)->locknum;
   4256  1.3  christos 		ISC_LIST_UNLINK(qpdb->buckets[idx].lru, header, link);
   4257  1.1  christos 	}
   4258  1.1  christos 
   4259  1.1  christos 	if (header->noqname != NULL) {
   4260  1.1  christos 		dns_slabheader_freeproof(db->mctx, &header->noqname);
   4261  1.1  christos 	}
   4262  1.1  christos 	if (header->closest != NULL) {
   4263  1.1  christos 		dns_slabheader_freeproof(db->mctx, &header->closest);
   4264  1.1  christos 	}
   4265  1.1  christos }
   4266  1.1  christos 
   4267  1.1  christos /*
   4268  1.1  christos  * Caller must be holding the node write lock.
   4269  1.1  christos  */
   4270  1.1  christos static void
   4271  1.1  christos expire_ttl_headers(qpcache_t *qpdb, unsigned int locknum,
   4272  1.1  christos 		   isc_rwlocktype_t *nlocktypep, isc_rwlocktype_t *tlocktypep,
   4273  1.1  christos 		   isc_stdtime_t now, bool cache_is_overmem DNS__DB_FLARG) {
   4274  1.3  christos 	isc_heap_t *heap = qpdb->buckets[locknum].heap;
   4275  1.1  christos 
   4276  1.1  christos 	for (size_t i = 0; i < DNS_QPDB_EXPIRE_TTL_COUNT; i++) {
   4277  1.1  christos 		dns_slabheader_t *header = isc_heap_element(heap, 1);
   4278  1.1  christos 
   4279  1.1  christos 		if (header == NULL) {
   4280  1.1  christos 			/* No headers left on this TTL heap; exit cleaning */
   4281  1.1  christos 			return;
   4282  1.1  christos 		}
   4283  1.1  christos 
   4284  1.1  christos 		dns_ttl_t ttl = header->ttl;
   4285  1.1  christos 
   4286  1.1  christos 		if (!cache_is_overmem) {
   4287  1.1  christos 			/* Only account for stale TTL if cache is not overmem */
   4288  1.1  christos 			ttl += STALE_TTL(header, qpdb);
   4289  1.1  christos 		}
   4290  1.1  christos 
   4291  1.1  christos 		if (ttl >= now - QPDB_VIRTUAL) {
   4292  1.1  christos 			/*
   4293  1.1  christos 			 * The header at the top of this TTL heap is not yet
   4294  1.1  christos 			 * eligible for expiry, so none of the other headers on
   4295  1.1  christos 			 * the same heap can be eligible for expiry, either;
   4296  1.1  christos 			 * exit cleaning.
   4297  1.1  christos 			 */
   4298  1.1  christos 			return;
   4299  1.1  christos 		}
   4300  1.1  christos 
   4301  1.1  christos 		expireheader(header, nlocktypep, tlocktypep,
   4302  1.1  christos 			     dns_expire_ttl DNS__DB_FLARG_PASS);
   4303  1.1  christos 	}
   4304  1.1  christos }
   4305  1.1  christos 
   4306  1.1  christos static void
   4307  1.1  christos setmaxrrperset(dns_db_t *db, uint32_t value) {
   4308  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   4309  1.1  christos 
   4310  1.1  christos 	REQUIRE(VALID_QPDB(qpdb));
   4311  1.1  christos 
   4312  1.1  christos 	qpdb->maxrrperset = value;
   4313  1.1  christos }
   4314  1.1  christos 
   4315  1.1  christos static void
   4316  1.1  christos setmaxtypepername(dns_db_t *db, uint32_t value) {
   4317  1.1  christos 	qpcache_t *qpdb = (qpcache_t *)db;
   4318  1.1  christos 
   4319  1.1  christos 	REQUIRE(VALID_QPDB(qpdb));
   4320  1.1  christos 
   4321  1.1  christos 	qpdb->maxtypepername = value;
   4322  1.1  christos }
   4323  1.1  christos 
   4324  1.1  christos static dns_dbmethods_t qpdb_cachemethods = {
   4325  1.1  christos 	.destroy = qpdb_destroy,
   4326  1.1  christos 	.findnode = findnode,
   4327  1.1  christos 	.find = find,
   4328  1.1  christos 	.findzonecut = findzonecut,
   4329  1.1  christos 	.attachnode = attachnode,
   4330  1.1  christos 	.detachnode = detachnode,
   4331  1.1  christos 	.createiterator = createiterator,
   4332  1.1  christos 	.findrdataset = findrdataset,
   4333  1.1  christos 	.allrdatasets = allrdatasets,
   4334  1.1  christos 	.addrdataset = addrdataset,
   4335  1.1  christos 	.deleterdataset = deleterdataset,
   4336  1.1  christos 	.nodecount = nodecount,
   4337  1.1  christos 	.getrrsetstats = getrrsetstats,
   4338  1.1  christos 	.setcachestats = setcachestats,
   4339  1.1  christos 	.setservestalettl = setservestalettl,
   4340  1.1  christos 	.getservestalettl = getservestalettl,
   4341  1.1  christos 	.setservestalerefresh = setservestalerefresh,
   4342  1.1  christos 	.getservestalerefresh = getservestalerefresh,
   4343  1.1  christos 	.locknode = locknode,
   4344  1.1  christos 	.unlocknode = unlocknode,
   4345  1.1  christos 	.expiredata = expiredata,
   4346  1.1  christos 	.deletedata = deletedata,
   4347  1.1  christos 	.setmaxrrperset = setmaxrrperset,
   4348  1.1  christos 	.setmaxtypepername = setmaxtypepername,
   4349  1.1  christos };
   4350  1.1  christos 
   4351  1.1  christos static void
   4352  1.1  christos qpcnode_destroy(qpcnode_t *data) {
   4353  1.1  christos 	dns_slabheader_t *current = NULL, *next = NULL;
   4354  1.1  christos 
   4355  1.1  christos 	for (current = data->data; current != NULL; current = next) {
   4356  1.1  christos 		dns_slabheader_t *down = current->down, *down_next = NULL;
   4357  1.1  christos 
   4358  1.1  christos 		next = current->next;
   4359  1.1  christos 
   4360  1.1  christos 		for (down = current->down; down != NULL; down = down_next) {
   4361  1.1  christos 			down_next = down->down;
   4362  1.1  christos 			dns_slabheader_destroy(&down);
   4363  1.1  christos 		}
   4364  1.1  christos 
   4365  1.1  christos 		dns_slabheader_destroy(&current);
   4366  1.1  christos 	}
   4367  1.1  christos 
   4368  1.1  christos 	dns_name_free(&data->name, data->mctx);
   4369  1.1  christos 	isc_mem_putanddetach(&data->mctx, data, sizeof(qpcnode_t));
   4370  1.1  christos }
   4371  1.1  christos 
   4372  1.1  christos #ifdef DNS_DB_NODETRACE
   4373  1.1  christos ISC_REFCOUNT_STATIC_TRACE_IMPL(qpcnode, qpcnode_destroy);
   4374  1.1  christos #else
   4375  1.1  christos ISC_REFCOUNT_STATIC_IMPL(qpcnode, qpcnode_destroy);
   4376  1.1  christos #endif
   4377  1.3  christos 
   4378  1.3  christos #ifdef DNS_DB_NODETRACE
   4379  1.3  christos ISC_REFCOUNT_STATIC_TRACE_IMPL(qpcache, qpcache__destroy);
   4380  1.3  christos #else
   4381  1.3  christos ISC_REFCOUNT_STATIC_IMPL(qpcache, qpcache__destroy);
   4382  1.3  christos #endif
   4383