Home | History | Annotate | Line # | Download | only in dns
      1 /*	$NetBSD: rbt.c,v 1.1 2024/02/18 20:57:33 christos Exp $	*/
      2 
      3 /*
      4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
      5  *
      6  * SPDX-License-Identifier: MPL-2.0
      7  *
      8  * This Source Code Form is subject to the terms of the Mozilla Public
      9  * License, v. 2.0. If a copy of the MPL was not distributed with this
     10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
     11  *
     12  * See the COPYRIGHT file distributed with this work for additional
     13  * information regarding copyright ownership.
     14  */
     15 
     16 /*! \file */
     17 
     18 #include <inttypes.h>
     19 #include <stdbool.h>
     20 #include <sys/stat.h>
     21 
     22 #include <isc/crc64.h>
     23 #include <isc/file.h>
     24 #include <isc/hex.h>
     25 #include <isc/mem.h>
     26 #include <isc/once.h>
     27 #include <isc/platform.h>
     28 #include <isc/print.h>
     29 #include <isc/refcount.h>
     30 #include <isc/socket.h>
     31 #include <isc/stdio.h>
     32 #include <isc/string.h>
     33 #include <isc/util.h>
     34 
     35 /*%
     36  * This define is so dns/name.h (included by dns/fixedname.h) uses more
     37  * efficient macro calls instead of functions for a few operations.
     38  */
     39 #define DNS_NAME_USEINLINE 1
     40 #define ALIGNMENT_SIZE	   8U /* see lib/isc/mem.c */
     41 
     42 #include <unistd.h>
     43 
     44 #include <dns/fixedname.h>
     45 #include <dns/log.h>
     46 #include <dns/rbt.h>
     47 #include <dns/result.h>
     48 #include <dns/version.h>
     49 
     50 #define CHECK(x)                             \
     51 	do {                                 \
     52 		result = (x);                \
     53 		if (result != ISC_R_SUCCESS) \
     54 			goto cleanup;        \
     55 	} while (0)
     56 
     57 #define RBT_MAGIC      ISC_MAGIC('R', 'B', 'T', '+')
     58 #define VALID_RBT(rbt) ISC_MAGIC_VALID(rbt, RBT_MAGIC)
     59 
     60 /*
     61  * XXXDCL Since parent pointers were added in again, I could remove all of the
     62  * chain junk, and replace with dns_rbt_firstnode, _previousnode, _nextnode,
     63  * _lastnode.  This would involve pretty major change to the API.
     64  */
     65 #define CHAIN_MAGIC	   ISC_MAGIC('0', '-', '0', '-')
     66 #define VALID_CHAIN(chain) ISC_MAGIC_VALID(chain, CHAIN_MAGIC)
     67 
     68 #define RBT_HASH_MIN_BITS   4
     69 #define RBT_HASH_MAX_BITS   32
     70 #define RBT_HASH_OVERCOMMIT 3
     71 #define RBT_HASH_BUCKETSIZE 4096 /* FIXME: What would be a good value here? */
     72 
     73 #ifdef RBT_MEM_TEST
     74 #undef RBT_HASH_SIZE
     75 #define RBT_HASH_SIZE 2 /*%< To give the reallocation code a workout. */
     76 #endif			/* ifdef RBT_MEM_TEST */
     77 
     78 #define GOLDEN_RATIO_32 0x61C88647
     79 
     80 #define HASHSIZE(bits) (UINT64_C(1) << (bits))
     81 
     82 static uint32_t
     83 hash_32(uint32_t val, unsigned int bits) {
     84 	REQUIRE(bits <= RBT_HASH_MAX_BITS);
     85 	/* High bits are more random. */
     86 	return (val * GOLDEN_RATIO_32 >> (32 - bits));
     87 }
     88 
     89 struct dns_rbt {
     90 	unsigned int magic;
     91 	isc_mem_t *mctx;
     92 	dns_rbtnode_t *root;
     93 	void (*data_deleter)(void *, void *);
     94 	void *deleter_arg;
     95 	unsigned int nodecount;
     96 	uint16_t hashbits;
     97 	uint16_t maxhashbits;
     98 	dns_rbtnode_t **hashtable;
     99 	void *mmap_location;
    100 };
    101 
    102 #define RED   0
    103 #define BLACK 1
    104 
    105 /*
    106  * This is the header for map-format RBT images.  It is populated,
    107  * and then written, as the LAST thing done to the file before returning.
    108  * Writing this last (with zeros in the header area initially) will ensure
    109  * that the header is only valid when the RBT image is also valid.
    110  */
    111 typedef struct file_header file_header_t;
    112 
    113 /* Pad to 32 bytes */
    114 static char FILE_VERSION[32] = "\0";
    115 
    116 /* Header length, always the same size regardless of structure size */
    117 #define HEADER_LENGTH 1024
    118 
    119 struct file_header {
    120 	char version1[32];
    121 	uint64_t first_node_offset; /* usually 1024 */
    122 	/*
    123 	 * information about the system on which the map file was generated
    124 	 * will be used to tell if we can load the map file or not
    125 	 */
    126 	uint32_t ptrsize;
    127 	unsigned int bigendian	    : 1; /* big or little endian system */
    128 	unsigned int rdataset_fixed : 1; /* compiled with
    129 					  * --enable-rrset-fixed
    130 					  */
    131 	unsigned int nodecount;		 /* shadow from rbt structure */
    132 	uint64_t crc;
    133 	char version2[32]; /* repeated; must match version1 */
    134 };
    135 
    136 /*
    137  * The following declarations are for the serialization of an RBT:
    138  *
    139  * step one: write out a zeroed header of 1024 bytes
    140  * step two: walk the tree in a depth-first, left-right-down order, writing
    141  * out the nodes, reserving space as we go, correcting addresses to point
    142  * at the proper offset in the file, and setting a flag for each pointer to
    143  * indicate that it is a reference to a location in the file, rather than in
    144  * memory.
    145  * step three: write out the header, adding the information that will be
    146  * needed to re-create the tree object itself.
    147  *
    148  * The RBTDB object will do this three times, once for each of the three
    149  * RBT objects it contains.
    150  *
    151  * Note: 'file' must point an actual open file that can be mmapped
    152  * and fseeked, not to a pipe or stream
    153  */
    154 
    155 static isc_result_t
    156 dns_rbt_zero_header(FILE *file);
    157 
    158 static isc_result_t
    159 write_header(FILE *file, dns_rbt_t *rbt, uint64_t first_node_offset,
    160 	     uint64_t crc);
    161 
    162 static bool
    163 match_header_version(file_header_t *header);
    164 
    165 static isc_result_t
    166 serialize_node(FILE *file, dns_rbtnode_t *node, uintptr_t left, uintptr_t right,
    167 	       uintptr_t down, uintptr_t parent, uintptr_t data, uint64_t *crc);
    168 
    169 static isc_result_t
    170 serialize_nodes(FILE *file, dns_rbtnode_t *node, uintptr_t parent,
    171 		dns_rbtdatawriter_t datawriter, void *writer_arg,
    172 		uintptr_t *where, uint64_t *crc);
    173 
    174 #define ADJUST_ADDRESS(address, relative, header)        \
    175 	if (address != NULL && header != NULL) {         \
    176 		address += relative * (uintptr_t)header; \
    177 	}
    178 /*
    179  * The following functions allow you to get the actual address of a pointer
    180  * without having to use an if statement to check to see if that address is
    181  * relative or not
    182  */
    183 static dns_rbtnode_t *
    184 getparent(dns_rbtnode_t *node, file_header_t *header) {
    185 	char *adjusted_address = (char *)(node->parent);
    186 
    187 	ADJUST_ADDRESS(adjusted_address, node->parent_is_relative, header);
    188 
    189 	return ((dns_rbtnode_t *)adjusted_address);
    190 }
    191 
    192 static dns_rbtnode_t *
    193 getleft(dns_rbtnode_t *node, file_header_t *header) {
    194 	char *adjusted_address = (char *)(node->left);
    195 
    196 	ADJUST_ADDRESS(adjusted_address, node->left_is_relative, header);
    197 
    198 	return ((dns_rbtnode_t *)adjusted_address);
    199 }
    200 
    201 static dns_rbtnode_t *
    202 getright(dns_rbtnode_t *node, file_header_t *header) {
    203 	char *adjusted_address = (char *)(node->right);
    204 
    205 	ADJUST_ADDRESS(adjusted_address, node->right_is_relative, header);
    206 
    207 	return ((dns_rbtnode_t *)adjusted_address);
    208 }
    209 
    210 static dns_rbtnode_t *
    211 getdown(dns_rbtnode_t *node, file_header_t *header) {
    212 	char *adjusted_address = (char *)(node->down);
    213 
    214 	ADJUST_ADDRESS(adjusted_address, node->down_is_relative, header);
    215 
    216 	return ((dns_rbtnode_t *)adjusted_address);
    217 }
    218 
    219 static dns_rbtnode_t *
    220 getdata(dns_rbtnode_t *node, file_header_t *header) {
    221 	char *adjusted_address = (char *)(node->data);
    222 
    223 	ADJUST_ADDRESS(adjusted_address, node->data_is_relative, header);
    224 
    225 	return ((dns_rbtnode_t *)adjusted_address);
    226 }
    227 
    228 /*%
    229  * Elements of the rbtnode structure.
    230  */
    231 #define PARENT(node)	   ((node)->parent)
    232 #define LEFT(node)	   ((node)->left)
    233 #define RIGHT(node)	   ((node)->right)
    234 #define DOWN(node)	   ((node)->down)
    235 #define UPPERNODE(node)	   ((node)->uppernode)
    236 #define DATA(node)	   ((node)->data)
    237 #define IS_EMPTY(node)	   ((node)->data == NULL)
    238 #define HASHNEXT(node)	   ((node)->hashnext)
    239 #define HASHVAL(node)	   ((node)->hashval)
    240 #define COLOR(node)	   ((node)->color)
    241 #define NAMELEN(node)	   ((node)->namelen)
    242 #define OLDNAMELEN(node)   ((node)->oldnamelen)
    243 #define OFFSETLEN(node)	   ((node)->offsetlen)
    244 #define ATTRS(node)	   ((node)->attributes)
    245 #define IS_ROOT(node)	   ((node)->is_root)
    246 #define FINDCALLBACK(node) ((node)->find_callback)
    247 
    248 #define WANTEMPTYDATA_OR_DATA(options, node) \
    249 	((options & DNS_RBTFIND_EMPTYDATA) != 0 || DATA(node) != NULL)
    250 
    251 /*%
    252  * Structure elements from the rbtdb.c, not
    253  * used as part of the rbt.c algorithms.
    254  */
    255 #define DIRTY(node)   ((node)->dirty)
    256 #define WILD(node)    ((node)->wild)
    257 #define LOCKNUM(node) ((node)->locknum)
    258 
    259 /*%
    260  * The variable length stuff stored after the node has the following
    261  * structure.
    262  *
    263  *	&lt;name_data&gt;{1..255}&lt;oldoffsetlen&gt;{1}&lt;offsets&gt;{1..128}
    264  *
    265  * &lt;name_data&gt; contains the name of the node when it was created.
    266  * &lt;oldoffsetlen&gt; contains the length of &lt;offsets&gt; when the node
    267  * was created.
    268  * &lt;offsets&gt; contains the offsets into name for each label when the node
    269  * was created.
    270  */
    271 
    272 #define NAME(node)	   ((unsigned char *)((node) + 1))
    273 #define OFFSETS(node)	   (NAME(node) + OLDNAMELEN(node) + 1)
    274 #define OLDOFFSETLEN(node) (OFFSETS(node)[-1])
    275 
    276 #define NODE_SIZE(node) \
    277 	(sizeof(*node) + OLDNAMELEN(node) + OLDOFFSETLEN(node) + 1)
    278 
    279 /*%
    280  * Color management.
    281  */
    282 #define IS_RED(node)	 ((node) != NULL && (node)->color == RED)
    283 #define IS_BLACK(node)	 ((node) == NULL || (node)->color == BLACK)
    284 #define MAKE_RED(node)	 ((node)->color = RED)
    285 #define MAKE_BLACK(node) ((node)->color = BLACK)
    286 
    287 /*%
    288  * Chain management.
    289  *
    290  * The "ancestors" member of chains were removed, with their job now
    291  * being wholly handled by parent pointers (which didn't exist, because
    292  * of memory concerns, when chains were first implemented).
    293  */
    294 #define ADD_LEVEL(chain, node)                                     \
    295 	do {                                                       \
    296 		INSIST((chain)->level_count < DNS_RBT_LEVELBLOCK); \
    297 		(chain)->levels[(chain)->level_count++] = (node);  \
    298 	} while (0)
    299 
    300 /*%
    301  * The following macros directly access normally private name variables.
    302  * These macros are used to avoid a lot of function calls in the critical
    303  * path of the tree traversal code.
    304  */
    305 
    306 static void
    307 NODENAME(dns_rbtnode_t *node, dns_name_t *name) {
    308 	name->length = NAMELEN(node);
    309 	name->labels = OFFSETLEN(node);
    310 	name->ndata = NAME(node);
    311 	name->offsets = OFFSETS(node);
    312 	name->attributes = ATTRS(node);
    313 	name->attributes |= DNS_NAMEATTR_READONLY;
    314 }
    315 
    316 #ifdef DEBUG
    317 #define inline
    318 /*
    319  * A little something to help out in GDB.
    320  */
    321 dns_name_t
    322 Name(dns_rbtnode_t *node);
    323 dns_name_t
    324 Name(dns_rbtnode_t *node) {
    325 	dns_name_t name;
    326 
    327 	dns_name_init(&name, NULL);
    328 	if (node != NULL) {
    329 		NODENAME(node, &name);
    330 	}
    331 
    332 	return (name);
    333 }
    334 
    335 static void
    336 hexdump(const char *desc, void *blob, size_t size) {
    337 	char hexdump[BUFSIZ * 2 + 1];
    338 	isc_buffer_t b;
    339 	isc_region_t r;
    340 	isc_result_t result;
    341 	size_t bytes;
    342 	uint8_t *data = blob;
    343 
    344 	fprintf(stderr, "%s: ", desc);
    345 	do {
    346 		isc_buffer_init(&b, hexdump, sizeof(hexdump));
    347 		r.base = data;
    348 		r.length = bytes = (size > BUFSIZ) ? BUFSIZ : size;
    349 		result = isc_hex_totext(&r, 0, "", &b);
    350 		RUNTIME_CHECK(result == ISC_R_SUCCESS);
    351 		isc_buffer_putuint8(&b, 0);
    352 		fprintf(stderr, "%s", hexdump);
    353 		data += bytes;
    354 		size -= bytes;
    355 	} while (size > 0);
    356 	fprintf(stderr, "\n");
    357 }
    358 #endif /* DEBUG */
    359 
    360 /*
    361  * Upper node is the parent of the root of the passed node's
    362  * subtree. The passed node must not be NULL.
    363  */
    364 static dns_rbtnode_t *
    365 get_upper_node(dns_rbtnode_t *node) {
    366 	return (UPPERNODE(node));
    367 }
    368 
    369 static void
    370 fixup_uppernodes_helper(dns_rbtnode_t *node, dns_rbtnode_t *uppernode) {
    371 	if (node == NULL) {
    372 		return;
    373 	}
    374 
    375 	UPPERNODE(node) = uppernode;
    376 
    377 	fixup_uppernodes_helper(LEFT(node), uppernode);
    378 	fixup_uppernodes_helper(RIGHT(node), uppernode);
    379 	fixup_uppernodes_helper(DOWN(node), node);
    380 }
    381 
    382 /*
    383  * This function is used to fixup uppernode members of all dns_rbtnodes
    384  * after deserialization.
    385  */
    386 static void
    387 fixup_uppernodes(dns_rbt_t *rbt) {
    388 	fixup_uppernodes_helper(rbt->root, NULL);
    389 }
    390 
    391 size_t
    392 dns__rbtnode_getdistance(dns_rbtnode_t *node) {
    393 	size_t nodes = 1;
    394 
    395 	while (node != NULL) {
    396 		if (IS_ROOT(node)) {
    397 			break;
    398 		}
    399 		nodes++;
    400 		node = PARENT(node);
    401 	}
    402 
    403 	return (nodes);
    404 }
    405 
    406 /*
    407  * Forward declarations.
    408  */
    409 static isc_result_t
    410 create_node(isc_mem_t *mctx, const dns_name_t *name, dns_rbtnode_t **nodep);
    411 
    412 static isc_result_t
    413 inithash(dns_rbt_t *rbt);
    414 
    415 static void
    416 hash_node(dns_rbt_t *rbt, dns_rbtnode_t *node, const dns_name_t *name);
    417 
    418 static void
    419 unhash_node(dns_rbt_t *rbt, dns_rbtnode_t *node);
    420 
    421 static uint32_t
    422 rehash_bits(dns_rbt_t *rbt, size_t newcount);
    423 static void
    424 rehash(dns_rbt_t *rbt, uint32_t newbits);
    425 static void
    426 maybe_rehash(dns_rbt_t *rbt, size_t size);
    427 
    428 static void
    429 rotate_left(dns_rbtnode_t *node, dns_rbtnode_t **rootp);
    430 static void
    431 rotate_right(dns_rbtnode_t *node, dns_rbtnode_t **rootp);
    432 
    433 static void
    434 addonlevel(dns_rbtnode_t *node, dns_rbtnode_t *current, int order,
    435 	   dns_rbtnode_t **rootp);
    436 
    437 static void
    438 deletefromlevel(dns_rbtnode_t *item, dns_rbtnode_t **rootp);
    439 
    440 static isc_result_t
    441 treefix(dns_rbt_t *rbt, void *base, size_t size, dns_rbtnode_t *n,
    442 	const dns_name_t *name, dns_rbtdatafixer_t datafixer, void *fixer_arg,
    443 	uint64_t *crc);
    444 
    445 static void
    446 deletetreeflat(dns_rbt_t *rbt, unsigned int quantum, bool unhash,
    447 	       dns_rbtnode_t **nodep);
    448 
    449 static void
    450 printnodename(dns_rbtnode_t *node, bool quoted, FILE *f);
    451 
    452 static void
    453 freenode(dns_rbt_t *rbt, dns_rbtnode_t **nodep);
    454 
    455 static isc_result_t
    456 dns_rbt_zero_header(FILE *file) {
    457 	/*
    458 	 * Write out a zeroed header as a placeholder.  Doing this ensures
    459 	 * that the file will not read while it is partially written, should
    460 	 * writing fail or be interrupted.
    461 	 */
    462 	char buffer[HEADER_LENGTH];
    463 	isc_result_t result;
    464 
    465 	memset(buffer, 0, HEADER_LENGTH);
    466 	result = isc_stdio_write(buffer, 1, HEADER_LENGTH, file, NULL);
    467 	if (result != ISC_R_SUCCESS) {
    468 		return (result);
    469 	}
    470 
    471 	result = fflush(file);
    472 	if (result != ISC_R_SUCCESS) {
    473 		return (result);
    474 	}
    475 
    476 	return (ISC_R_SUCCESS);
    477 }
    478 
    479 static isc_once_t once = ISC_ONCE_INIT;
    480 
    481 static void
    482 init_file_version(void) {
    483 	int n;
    484 
    485 	memset(FILE_VERSION, 0, sizeof(FILE_VERSION));
    486 	n = snprintf(FILE_VERSION, sizeof(FILE_VERSION), "RBT Image %s %s",
    487 		     dns_major, dns_mapapi);
    488 	INSIST(n > 0 && (unsigned int)n < sizeof(FILE_VERSION));
    489 }
    490 
    491 /*
    492  * Write out the real header, including NodeDump version information
    493  * and the offset of the first node.
    494  *
    495  * Any information stored in the rbt object itself should be stored
    496  * here.
    497  */
    498 static isc_result_t
    499 write_header(FILE *file, dns_rbt_t *rbt, uint64_t first_node_offset,
    500 	     uint64_t crc) {
    501 	file_header_t header;
    502 	isc_result_t result;
    503 	off_t location;
    504 
    505 	RUNTIME_CHECK(isc_once_do(&once, init_file_version) == ISC_R_SUCCESS);
    506 
    507 	memset(&header, 0, sizeof(file_header_t));
    508 	memmove(header.version1, FILE_VERSION, sizeof(header.version1));
    509 	memmove(header.version2, FILE_VERSION, sizeof(header.version2));
    510 	header.first_node_offset = first_node_offset;
    511 	header.ptrsize = (uint32_t)sizeof(void *);
    512 	header.bigendian = (1 == htonl(1)) ? 1 : 0;
    513 
    514 #ifdef DNS_RDATASET_FIXED
    515 	header.rdataset_fixed = 1;
    516 #else  /* ifdef DNS_RDATASET_FIXED */
    517 	header.rdataset_fixed = 0;
    518 #endif /* ifdef DNS_RDATASET_FIXED */
    519 
    520 	header.nodecount = rbt->nodecount;
    521 
    522 	header.crc = crc;
    523 
    524 	CHECK(isc_stdio_tell(file, &location));
    525 	location = dns_rbt_serialize_align(location);
    526 	CHECK(isc_stdio_seek(file, location, SEEK_SET));
    527 	CHECK(isc_stdio_write(&header, 1, sizeof(file_header_t), file, NULL));
    528 	CHECK(fflush(file));
    529 
    530 	/* Ensure we are always at the end of the file. */
    531 	CHECK(isc_stdio_seek(file, 0, SEEK_END));
    532 
    533 cleanup:
    534 	return (result);
    535 }
    536 
    537 static bool
    538 match_header_version(file_header_t *header) {
    539 	RUNTIME_CHECK(isc_once_do(&once, init_file_version) == ISC_R_SUCCESS);
    540 
    541 	if (memcmp(header->version1, FILE_VERSION, sizeof(header->version1)) !=
    542 		    0 ||
    543 	    memcmp(header->version2, FILE_VERSION, sizeof(header->version1)) !=
    544 		    0)
    545 	{
    546 		return (false);
    547 	}
    548 
    549 	return (true);
    550 }
    551 
    552 unsigned int
    553 dns__rbtnode_namelen(dns_rbtnode_t *node) {
    554 	dns_name_t current;
    555 	unsigned int len = 0;
    556 
    557 	REQUIRE(DNS_RBTNODE_VALID(node));
    558 
    559 	dns_name_init(&current, NULL);
    560 
    561 	do {
    562 		if (node != NULL) {
    563 			NODENAME(node, &current);
    564 			len += current.length;
    565 		} else {
    566 			len += 1;
    567 			break;
    568 		}
    569 
    570 		node = get_upper_node(node);
    571 	} while (!dns_name_isabsolute(&current));
    572 
    573 	return (len);
    574 }
    575 
    576 static isc_result_t
    577 serialize_node(FILE *file, dns_rbtnode_t *node, uintptr_t left, uintptr_t right,
    578 	       uintptr_t down, uintptr_t parent, uintptr_t data,
    579 	       uint64_t *crc) {
    580 	isc_result_t result;
    581 	dns_rbtnode_t temp_node;
    582 	off_t file_position;
    583 	unsigned char *node_data = NULL;
    584 	size_t datasize;
    585 #ifdef DEBUG
    586 	dns_name_t nodename;
    587 #endif /* ifdef DEBUG */
    588 
    589 	INSIST(node != NULL);
    590 
    591 	CHECK(isc_stdio_tell(file, &file_position));
    592 	file_position = dns_rbt_serialize_align(file_position);
    593 	CHECK(isc_stdio_seek(file, file_position, SEEK_SET));
    594 
    595 	temp_node = *node;
    596 	temp_node.down_is_relative = 0;
    597 	temp_node.left_is_relative = 0;
    598 	temp_node.right_is_relative = 0;
    599 	temp_node.parent_is_relative = 0;
    600 	temp_node.data_is_relative = 0;
    601 	temp_node.is_mmapped = 1;
    602 
    603 	/*
    604 	 * If the next node is not NULL, calculate the next node's location
    605 	 * in the file.  Note that this will have to change when the data
    606 	 * structure changes, and it also assumes that we always write the
    607 	 * nodes out in list order (which we currently do.)
    608 	 */
    609 	if (temp_node.parent != NULL) {
    610 		temp_node.parent = (dns_rbtnode_t *)(parent);
    611 		temp_node.parent_is_relative = 1;
    612 	}
    613 	if (temp_node.left != NULL) {
    614 		temp_node.left = (dns_rbtnode_t *)(left);
    615 		temp_node.left_is_relative = 1;
    616 	}
    617 	if (temp_node.right != NULL) {
    618 		temp_node.right = (dns_rbtnode_t *)(right);
    619 		temp_node.right_is_relative = 1;
    620 	}
    621 	if (temp_node.down != NULL) {
    622 		temp_node.down = (dns_rbtnode_t *)(down);
    623 		temp_node.down_is_relative = 1;
    624 	}
    625 	if (temp_node.data != NULL) {
    626 		temp_node.data = (dns_rbtnode_t *)(data);
    627 		temp_node.data_is_relative = 1;
    628 	}
    629 
    630 	temp_node.fullnamelen = dns__rbtnode_namelen(node);
    631 
    632 	node_data = (unsigned char *)node + sizeof(dns_rbtnode_t);
    633 	datasize = NODE_SIZE(node) - sizeof(dns_rbtnode_t);
    634 
    635 	CHECK(isc_stdio_write(&temp_node, 1, sizeof(dns_rbtnode_t), file,
    636 			      NULL));
    637 	CHECK(isc_stdio_write(node_data, 1, datasize, file, NULL));
    638 
    639 #ifdef DEBUG
    640 	dns_name_init(&nodename, NULL);
    641 	NODENAME(node, &nodename);
    642 	fprintf(stderr, "serialize ");
    643 	dns_name_print(&nodename, stderr);
    644 	fprintf(stderr, "\n");
    645 	hexdump("node header", (unsigned char *)&temp_node,
    646 		sizeof(dns_rbtnode_t));
    647 	hexdump("node data", node_data, datasize);
    648 #endif /* ifdef DEBUG */
    649 
    650 	isc_crc64_update(crc, (const uint8_t *)&temp_node,
    651 			 sizeof(dns_rbtnode_t));
    652 	isc_crc64_update(crc, (const uint8_t *)node_data, datasize);
    653 
    654 cleanup:
    655 	return (result);
    656 }
    657 
    658 static isc_result_t
    659 serialize_nodes(FILE *file, dns_rbtnode_t *node, uintptr_t parent,
    660 		dns_rbtdatawriter_t datawriter, void *writer_arg,
    661 		uintptr_t *where, uint64_t *crc) {
    662 	uintptr_t left = 0, right = 0, down = 0, data = 0;
    663 	off_t location = 0, offset_adjust;
    664 	isc_result_t result;
    665 
    666 	if (node == NULL) {
    667 		if (where != NULL) {
    668 			*where = 0;
    669 		}
    670 		return (ISC_R_SUCCESS);
    671 	}
    672 
    673 	/* Reserve space for current node. */
    674 	CHECK(isc_stdio_tell(file, &location));
    675 	location = dns_rbt_serialize_align(location);
    676 	CHECK(isc_stdio_seek(file, location, SEEK_SET));
    677 
    678 	offset_adjust = dns_rbt_serialize_align(location + NODE_SIZE(node));
    679 	CHECK(isc_stdio_seek(file, offset_adjust, SEEK_SET));
    680 
    681 	/*
    682 	 * Serialize the rest of the tree.
    683 	 *
    684 	 * WARNING: A change in the order (from left, right, down)
    685 	 * will break the way the crc hash is computed.
    686 	 */
    687 	CHECK(serialize_nodes(file, getleft(node, NULL), location, datawriter,
    688 			      writer_arg, &left, crc));
    689 	CHECK(serialize_nodes(file, getright(node, NULL), location, datawriter,
    690 			      writer_arg, &right, crc));
    691 	CHECK(serialize_nodes(file, getdown(node, NULL), location, datawriter,
    692 			      writer_arg, &down, crc));
    693 
    694 	if (node->data != NULL) {
    695 		off_t ret;
    696 
    697 		CHECK(isc_stdio_tell(file, &ret));
    698 		ret = dns_rbt_serialize_align(ret);
    699 		CHECK(isc_stdio_seek(file, ret, SEEK_SET));
    700 		data = ret;
    701 
    702 		datawriter(file, node->data, writer_arg, crc);
    703 	}
    704 
    705 	/* Seek back to reserved space. */
    706 	CHECK(isc_stdio_seek(file, location, SEEK_SET));
    707 
    708 	/* Serialize the current node. */
    709 	CHECK(serialize_node(file, node, left, right, down, parent, data, crc));
    710 
    711 	/* Ensure we are always at the end of the file. */
    712 	CHECK(isc_stdio_seek(file, 0, SEEK_END));
    713 
    714 	if (where != NULL) {
    715 		*where = (uintptr_t)location;
    716 	}
    717 
    718 cleanup:
    719 	return (result);
    720 }
    721 
    722 off_t
    723 dns_rbt_serialize_align(off_t target) {
    724 	off_t offset = target % 8;
    725 
    726 	if (offset == 0) {
    727 		return (target);
    728 	} else {
    729 		return (target + 8 - offset);
    730 	}
    731 }
    732 
    733 isc_result_t
    734 dns_rbt_serialize_tree(FILE *file, dns_rbt_t *rbt,
    735 		       dns_rbtdatawriter_t datawriter, void *writer_arg,
    736 		       off_t *offset) {
    737 	isc_result_t result;
    738 	off_t header_position, node_position, end_position;
    739 	uint64_t crc;
    740 
    741 	REQUIRE(file != NULL);
    742 
    743 	CHECK(isc_file_isplainfilefd(fileno(file)));
    744 
    745 	isc_crc64_init(&crc);
    746 
    747 	CHECK(isc_stdio_tell(file, &header_position));
    748 
    749 	/* Write dummy header */
    750 	CHECK(dns_rbt_zero_header(file));
    751 
    752 	/* Serialize nodes */
    753 	CHECK(isc_stdio_tell(file, &node_position));
    754 	CHECK(serialize_nodes(file, rbt->root, 0, datawriter, writer_arg, NULL,
    755 			      &crc));
    756 
    757 	CHECK(isc_stdio_tell(file, &end_position));
    758 	if (node_position == end_position) {
    759 		CHECK(isc_stdio_seek(file, header_position, SEEK_SET));
    760 		*offset = 0;
    761 		return (ISC_R_SUCCESS);
    762 	}
    763 
    764 	isc_crc64_final(&crc);
    765 #ifdef DEBUG
    766 	hexdump("serializing CRC", (unsigned char *)&crc, sizeof(crc));
    767 #endif /* ifdef DEBUG */
    768 
    769 	/* Serialize header */
    770 	CHECK(isc_stdio_seek(file, header_position, SEEK_SET));
    771 	CHECK(write_header(file, rbt, HEADER_LENGTH, crc));
    772 
    773 	/* Ensure we are always at the end of the file. */
    774 	CHECK(isc_stdio_seek(file, 0, SEEK_END));
    775 	*offset = dns_rbt_serialize_align(header_position);
    776 
    777 cleanup:
    778 	return (result);
    779 }
    780 
    781 #define CONFIRM(a)                                  \
    782 	do {                                        \
    783 		if (!(a)) {                         \
    784 			result = ISC_R_INVALIDFILE; \
    785 			goto cleanup;               \
    786 		}                                   \
    787 	} while (0)
    788 
    789 static isc_result_t
    790 treefix(dns_rbt_t *rbt, void *base, size_t filesize, dns_rbtnode_t *n,
    791 	const dns_name_t *name, dns_rbtdatafixer_t datafixer, void *fixer_arg,
    792 	uint64_t *crc) {
    793 	isc_result_t result = ISC_R_SUCCESS;
    794 	dns_fixedname_t fixed;
    795 	dns_name_t nodename, *fullname = NULL;
    796 	unsigned char *node_data = NULL;
    797 	dns_rbtnode_t header;
    798 	size_t nodemax = filesize - sizeof(dns_rbtnode_t);
    799 	size_t datasize;
    800 
    801 	if (n == NULL) {
    802 		return (ISC_R_SUCCESS);
    803 	}
    804 
    805 #define CHECK_ALIGNMENT(n) \
    806 	(((uintptr_t)n & ~((uintptr_t)ALIGNMENT_SIZE - 1)) == (uintptr_t)n)
    807 
    808 	CONFIRM((void *)n >= base);
    809 	CONFIRM((size_t)((char *)n - (char *)base) <= nodemax);
    810 	CONFIRM(CHECK_ALIGNMENT(n));
    811 	CONFIRM(DNS_RBTNODE_VALID(n));
    812 
    813 	dns_name_init(&nodename, NULL);
    814 	NODENAME(n, &nodename);
    815 
    816 	fullname = &nodename;
    817 	CONFIRM(dns_name_isvalid(fullname));
    818 
    819 	if (!dns_name_isabsolute(&nodename)) {
    820 		fullname = dns_fixedname_initname(&fixed);
    821 		CHECK(dns_name_concatenate(&nodename, name, fullname, NULL));
    822 	}
    823 
    824 	/* memorize header contents prior to fixup */
    825 	memmove(&header, n, sizeof(header));
    826 
    827 	if (n->left_is_relative) {
    828 		CONFIRM(n->left <= (dns_rbtnode_t *)nodemax);
    829 		n->left = getleft(n, rbt->mmap_location);
    830 		n->left_is_relative = 0;
    831 		CONFIRM(CHECK_ALIGNMENT(n->left));
    832 		CONFIRM(DNS_RBTNODE_VALID(n->left));
    833 	} else {
    834 		CONFIRM(n->left == NULL);
    835 	}
    836 
    837 	if (n->right_is_relative) {
    838 		CONFIRM(n->right <= (dns_rbtnode_t *)nodemax);
    839 		n->right = getright(n, rbt->mmap_location);
    840 		n->right_is_relative = 0;
    841 		CONFIRM(CHECK_ALIGNMENT(n->right));
    842 		CONFIRM(DNS_RBTNODE_VALID(n->right));
    843 	} else {
    844 		CONFIRM(n->right == NULL);
    845 	}
    846 
    847 	if (n->down_is_relative) {
    848 		CONFIRM(n->down <= (dns_rbtnode_t *)nodemax);
    849 		n->down = getdown(n, rbt->mmap_location);
    850 		n->down_is_relative = 0;
    851 		CONFIRM(n->down > (dns_rbtnode_t *)n);
    852 		CONFIRM(CHECK_ALIGNMENT(n->down));
    853 		CONFIRM(DNS_RBTNODE_VALID(n->down));
    854 	} else {
    855 		CONFIRM(n->down == NULL);
    856 	}
    857 
    858 	if (n->parent_is_relative) {
    859 		CONFIRM(n->parent <= (dns_rbtnode_t *)nodemax);
    860 		n->parent = getparent(n, rbt->mmap_location);
    861 		n->parent_is_relative = 0;
    862 		CONFIRM(n->parent < (dns_rbtnode_t *)n);
    863 		CONFIRM(CHECK_ALIGNMENT(n->parent));
    864 		CONFIRM(DNS_RBTNODE_VALID(n->parent));
    865 	} else {
    866 		CONFIRM(n->parent == NULL);
    867 	}
    868 
    869 	if (n->data_is_relative) {
    870 		CONFIRM(n->data <= (void *)filesize);
    871 		n->data = getdata(n, rbt->mmap_location);
    872 		n->data_is_relative = 0;
    873 		CONFIRM(n->data > (void *)n);
    874 		CONFIRM(CHECK_ALIGNMENT(n->data));
    875 	} else {
    876 		CONFIRM(n->data == NULL);
    877 	}
    878 
    879 	hash_node(rbt, n, fullname);
    880 
    881 	/* a change in the order (from left, right, down) will break hashing*/
    882 	if (n->left != NULL) {
    883 		CHECK(treefix(rbt, base, filesize, n->left, name, datafixer,
    884 			      fixer_arg, crc));
    885 	}
    886 	if (n->right != NULL) {
    887 		CHECK(treefix(rbt, base, filesize, n->right, name, datafixer,
    888 			      fixer_arg, crc));
    889 	}
    890 	if (n->down != NULL) {
    891 		CHECK(treefix(rbt, base, filesize, n->down, fullname, datafixer,
    892 			      fixer_arg, crc));
    893 	}
    894 
    895 	if (datafixer != NULL && n->data != NULL) {
    896 		CHECK(datafixer(n, base, filesize, fixer_arg, crc));
    897 	}
    898 
    899 	rbt->nodecount++;
    900 	node_data = (unsigned char *)n + sizeof(dns_rbtnode_t);
    901 	datasize = NODE_SIZE(n) - sizeof(dns_rbtnode_t);
    902 
    903 #ifdef DEBUG
    904 	fprintf(stderr, "deserialize ");
    905 	dns_name_print(&nodename, stderr);
    906 	fprintf(stderr, "\n");
    907 	hexdump("node header", (unsigned char *)&header, sizeof(dns_rbtnode_t));
    908 	hexdump("node data", node_data, datasize);
    909 #endif /* ifdef DEBUG */
    910 	isc_crc64_update(crc, (const uint8_t *)&header, sizeof(dns_rbtnode_t));
    911 	isc_crc64_update(crc, (const uint8_t *)node_data, datasize);
    912 
    913 cleanup:
    914 	return (result);
    915 }
    916 
    917 isc_result_t
    918 dns_rbt_deserialize_tree(void *base_address, size_t filesize,
    919 			 off_t header_offset, isc_mem_t *mctx,
    920 			 dns_rbtdeleter_t deleter, void *deleter_arg,
    921 			 dns_rbtdatafixer_t datafixer, void *fixer_arg,
    922 			 dns_rbtnode_t **originp, dns_rbt_t **rbtp) {
    923 	isc_result_t result = ISC_R_SUCCESS;
    924 	file_header_t *header;
    925 	dns_rbt_t *rbt = NULL;
    926 	uint64_t crc;
    927 	unsigned int host_big_endian;
    928 
    929 	REQUIRE(originp == NULL || *originp == NULL);
    930 	REQUIRE(rbtp != NULL && *rbtp == NULL);
    931 
    932 	isc_crc64_init(&crc);
    933 
    934 	CHECK(dns_rbt_create(mctx, deleter, deleter_arg, &rbt));
    935 
    936 	rbt->mmap_location = base_address;
    937 
    938 	header = (file_header_t *)((char *)base_address + header_offset);
    939 	if (!match_header_version(header)) {
    940 		result = ISC_R_INVALIDFILE;
    941 		goto cleanup;
    942 	}
    943 
    944 #ifdef DNS_RDATASET_FIXED
    945 	if (header->rdataset_fixed != 1) {
    946 		result = ISC_R_INVALIDFILE;
    947 		goto cleanup;
    948 	}
    949 
    950 #else  /* ifdef DNS_RDATASET_FIXED */
    951 	if (header->rdataset_fixed != 0) {
    952 		result = ISC_R_INVALIDFILE;
    953 		goto cleanup;
    954 	}
    955 #endif /* ifdef DNS_RDATASET_FIXED */
    956 
    957 	if (header->ptrsize != (uint32_t)sizeof(void *)) {
    958 		result = ISC_R_INVALIDFILE;
    959 		goto cleanup;
    960 	}
    961 
    962 	host_big_endian = (1 == htonl(1));
    963 	if (header->bigendian != host_big_endian) {
    964 		result = ISC_R_INVALIDFILE;
    965 		goto cleanup;
    966 	}
    967 
    968 	/* Copy other data items from the header into our rbt. */
    969 	rbt->root = (dns_rbtnode_t *)((char *)base_address + header_offset +
    970 				      header->first_node_offset);
    971 
    972 	if ((header->nodecount * sizeof(dns_rbtnode_t)) > filesize) {
    973 		result = ISC_R_INVALIDFILE;
    974 		goto cleanup;
    975 	}
    976 	maybe_rehash(rbt, header->nodecount);
    977 
    978 	CHECK(treefix(rbt, base_address, filesize, rbt->root, dns_rootname,
    979 		      datafixer, fixer_arg, &crc));
    980 
    981 	isc_crc64_final(&crc);
    982 #ifdef DEBUG
    983 	hexdump("deserializing CRC", (unsigned char *)&crc, sizeof(crc));
    984 #endif /* ifdef DEBUG */
    985 
    986 	/* Check file hash */
    987 	if (header->crc != crc) {
    988 		result = ISC_R_INVALIDFILE;
    989 		goto cleanup;
    990 	}
    991 
    992 	if (header->nodecount != rbt->nodecount) {
    993 		result = ISC_R_INVALIDFILE;
    994 		goto cleanup;
    995 	}
    996 
    997 	fixup_uppernodes(rbt);
    998 
    999 	*rbtp = rbt;
   1000 	if (originp != NULL) {
   1001 		*originp = rbt->root;
   1002 	}
   1003 
   1004 cleanup:
   1005 	if (result != ISC_R_SUCCESS && rbt != NULL) {
   1006 		rbt->root = NULL;
   1007 		rbt->nodecount = 0;
   1008 		dns_rbt_destroy(&rbt);
   1009 	}
   1010 
   1011 	return (result);
   1012 }
   1013 
   1014 /*
   1015  * Initialize a red/black tree of trees.
   1016  */
   1017 isc_result_t
   1018 dns_rbt_create(isc_mem_t *mctx, dns_rbtdeleter_t deleter, void *deleter_arg,
   1019 	       dns_rbt_t **rbtp) {
   1020 	isc_result_t result;
   1021 	dns_rbt_t *rbt;
   1022 
   1023 	REQUIRE(mctx != NULL);
   1024 	REQUIRE(rbtp != NULL && *rbtp == NULL);
   1025 	REQUIRE(deleter == NULL ? deleter_arg == NULL : 1);
   1026 
   1027 	rbt = isc_mem_get(mctx, sizeof(*rbt));
   1028 
   1029 	rbt->mctx = NULL;
   1030 	isc_mem_attach(mctx, &rbt->mctx);
   1031 	rbt->data_deleter = deleter;
   1032 	rbt->deleter_arg = deleter_arg;
   1033 	rbt->root = NULL;
   1034 	rbt->nodecount = 0;
   1035 	rbt->hashtable = NULL;
   1036 	rbt->hashbits = 0;
   1037 	rbt->maxhashbits = RBT_HASH_MAX_BITS;
   1038 	rbt->mmap_location = NULL;
   1039 
   1040 	result = inithash(rbt);
   1041 	if (result != ISC_R_SUCCESS) {
   1042 		isc_mem_putanddetach(&rbt->mctx, rbt, sizeof(*rbt));
   1043 		return (result);
   1044 	}
   1045 
   1046 	rbt->magic = RBT_MAGIC;
   1047 
   1048 	*rbtp = rbt;
   1049 
   1050 	return (ISC_R_SUCCESS);
   1051 }
   1052 
   1053 /*
   1054  * Deallocate a red/black tree of trees.
   1055  */
   1056 void
   1057 dns_rbt_destroy(dns_rbt_t **rbtp) {
   1058 	RUNTIME_CHECK(dns_rbt_destroy2(rbtp, 0) == ISC_R_SUCCESS);
   1059 }
   1060 
   1061 isc_result_t
   1062 dns_rbt_destroy2(dns_rbt_t **rbtp, unsigned int quantum) {
   1063 	dns_rbt_t *rbt;
   1064 
   1065 	REQUIRE(rbtp != NULL && VALID_RBT(*rbtp));
   1066 
   1067 	rbt = *rbtp;
   1068 
   1069 	deletetreeflat(rbt, quantum, false, &rbt->root);
   1070 	if (rbt->root != NULL) {
   1071 		return (ISC_R_QUOTA);
   1072 	}
   1073 
   1074 	*rbtp = NULL;
   1075 
   1076 	INSIST(rbt->nodecount == 0);
   1077 
   1078 	rbt->mmap_location = NULL;
   1079 
   1080 	if (rbt->hashtable != NULL) {
   1081 		size_t size = HASHSIZE(rbt->hashbits) * sizeof(dns_rbtnode_t *);
   1082 		isc_mem_put(rbt->mctx, rbt->hashtable, size);
   1083 	}
   1084 
   1085 	rbt->magic = 0;
   1086 
   1087 	isc_mem_putanddetach(&rbt->mctx, rbt, sizeof(*rbt));
   1088 	return (ISC_R_SUCCESS);
   1089 }
   1090 
   1091 unsigned int
   1092 dns_rbt_nodecount(dns_rbt_t *rbt) {
   1093 	REQUIRE(VALID_RBT(rbt));
   1094 
   1095 	return (rbt->nodecount);
   1096 }
   1097 
   1098 size_t
   1099 dns_rbt_hashsize(dns_rbt_t *rbt) {
   1100 	REQUIRE(VALID_RBT(rbt));
   1101 
   1102 	return (1 << rbt->hashbits);
   1103 }
   1104 
   1105 isc_result_t
   1106 dns_rbt_adjusthashsize(dns_rbt_t *rbt, size_t size) {
   1107 	REQUIRE(VALID_RBT(rbt));
   1108 
   1109 	if (size > 0) {
   1110 		/*
   1111 		 * Setting a new, finite size limit was requested for the RBT.
   1112 		 * Estimate how many hash table slots are needed for the
   1113 		 * requested size and how many bits would be needed to index
   1114 		 * those hash table slots, then rehash the RBT if necessary.
   1115 		 * Note that the hash table can only grow, it is not shrunk if
   1116 		 * the requested size limit is lower than the current one.
   1117 		 */
   1118 		size_t newsize = size / RBT_HASH_BUCKETSIZE;
   1119 		rbt->maxhashbits = rehash_bits(rbt, newsize);
   1120 		maybe_rehash(rbt, newsize);
   1121 	} else {
   1122 		/*
   1123 		 * Setting an infinite size limit was requested for the RBT.
   1124 		 * Increase the maximum allowed number of hash table slots to
   1125 		 * 2^32, which enables the hash table to grow as nodes are
   1126 		 * added to the RBT without immediately preallocating 2^32 hash
   1127 		 * table slots.
   1128 		 */
   1129 		rbt->maxhashbits = RBT_HASH_MAX_BITS;
   1130 	}
   1131 
   1132 	return (ISC_R_SUCCESS);
   1133 }
   1134 
   1135 static isc_result_t
   1136 chain_name(dns_rbtnodechain_t *chain, dns_name_t *name,
   1137 	   bool include_chain_end) {
   1138 	dns_name_t nodename;
   1139 	isc_result_t result = ISC_R_SUCCESS;
   1140 	int i;
   1141 
   1142 	dns_name_init(&nodename, NULL);
   1143 
   1144 	if (include_chain_end && chain->end != NULL) {
   1145 		NODENAME(chain->end, &nodename);
   1146 		dns_name_copynf(&nodename, name);
   1147 	} else {
   1148 		dns_name_reset(name);
   1149 	}
   1150 
   1151 	for (i = (int)chain->level_count - 1; i >= 0; i--) {
   1152 		NODENAME(chain->levels[i], &nodename);
   1153 		result = dns_name_concatenate(name, &nodename, name, NULL);
   1154 
   1155 		if (result != ISC_R_SUCCESS) {
   1156 			return (result);
   1157 		}
   1158 	}
   1159 	return (result);
   1160 }
   1161 
   1162 static isc_result_t
   1163 move_chain_to_last(dns_rbtnodechain_t *chain, dns_rbtnode_t *node) {
   1164 	do {
   1165 		/*
   1166 		 * Go as far right and then down as much as possible,
   1167 		 * as long as the rightmost node has a down pointer.
   1168 		 */
   1169 		while (RIGHT(node) != NULL) {
   1170 			node = RIGHT(node);
   1171 		}
   1172 
   1173 		if (DOWN(node) == NULL) {
   1174 			break;
   1175 		}
   1176 
   1177 		ADD_LEVEL(chain, node);
   1178 		node = DOWN(node);
   1179 	} while (1);
   1180 
   1181 	chain->end = node;
   1182 
   1183 	return (ISC_R_SUCCESS);
   1184 }
   1185 
   1186 /*
   1187  * Add 'name' to tree, initializing its data pointer with 'data'.
   1188  */
   1189 
   1190 isc_result_t
   1191 dns_rbt_addnode(dns_rbt_t *rbt, const dns_name_t *name, dns_rbtnode_t **nodep) {
   1192 	/*
   1193 	 * Does this thing have too many variables or what?
   1194 	 */
   1195 	dns_rbtnode_t **root, *parent, *child, *current, *new_current;
   1196 	dns_name_t *add_name, *new_name, current_name, *prefix, *suffix;
   1197 	dns_fixedname_t fixedcopy, fixedprefix, fixedsuffix, fnewname;
   1198 	dns_offsets_t current_offsets;
   1199 	dns_namereln_t compared;
   1200 	isc_result_t result = ISC_R_SUCCESS;
   1201 	unsigned int level_count;
   1202 	unsigned int common_labels;
   1203 	unsigned int nlabels, hlabels;
   1204 	int order;
   1205 
   1206 	REQUIRE(VALID_RBT(rbt));
   1207 	REQUIRE(dns_name_isabsolute(name));
   1208 	REQUIRE(nodep != NULL && *nodep == NULL);
   1209 
   1210 	/*
   1211 	 * Dear future BIND developer,
   1212 	 *
   1213 	 * After you have tried attempting to optimize this routine by
   1214 	 * using the hashtable and have realized your folly, please
   1215 	 * append another cross ("X") below as a warning to the next
   1216 	 * future BIND developer:
   1217 	 *
   1218 	 * Number of victim developers: X
   1219 	 *
   1220 	 * I wish the past developer had included such a notice.
   1221 	 *
   1222 	 * Long form: Unlike dns_rbt_findnode(), this function does not
   1223 	 * lend itself to be optimized using the hashtable:
   1224 	 *
   1225 	 * 1. In the subtree where the insertion occurs, this function
   1226 	 * needs to have the insertion point and the order where the
   1227 	 * lookup terminated (i.e., at the insertion point where left or
   1228 	 * right child is NULL). This cannot be determined from the
   1229 	 * hashtable, so at least in that subtree, a BST O(log N) lookup
   1230 	 * is necessary.
   1231 	 *
   1232 	 * 2. Our RBT nodes contain not only single labels but label
   1233 	 * sequences to optimize space usage. So at every level, we have
   1234 	 * to look for a match in the hashtable for all superdomains in
   1235 	 * the rest of the name we're searching. This is an O(N)
   1236 	 * operation at least, here N being the label size of name, each
   1237 	 * of which is a hashtable lookup involving dns_name_equal()
   1238 	 * comparisons.
   1239 	 */
   1240 
   1241 	/*
   1242 	 * Create a copy of the name so the original name structure is
   1243 	 * not modified.
   1244 	 */
   1245 	add_name = dns_fixedname_initname(&fixedcopy);
   1246 	INSIST(add_name != NULL);
   1247 	dns_name_clone(name, add_name);
   1248 
   1249 	if (ISC_UNLIKELY(rbt->root == NULL)) {
   1250 		result = create_node(rbt->mctx, add_name, &new_current);
   1251 		if (result == ISC_R_SUCCESS) {
   1252 			rbt->nodecount++;
   1253 			new_current->is_root = 1;
   1254 
   1255 			UPPERNODE(new_current) = NULL;
   1256 
   1257 			rbt->root = new_current;
   1258 			*nodep = new_current;
   1259 			hash_node(rbt, new_current, name);
   1260 		}
   1261 		return (result);
   1262 	}
   1263 
   1264 	level_count = 0;
   1265 
   1266 	prefix = dns_fixedname_initname(&fixedprefix);
   1267 	suffix = dns_fixedname_initname(&fixedsuffix);
   1268 
   1269 	INSIST(prefix != NULL);
   1270 	INSIST(suffix != NULL);
   1271 
   1272 	root = &rbt->root;
   1273 	INSIST(IS_ROOT(*root));
   1274 	parent = NULL;
   1275 	current = NULL;
   1276 	child = *root;
   1277 	dns_name_init(&current_name, current_offsets);
   1278 	new_name = dns_fixedname_initname(&fnewname);
   1279 	nlabels = dns_name_countlabels(name);
   1280 	hlabels = 0;
   1281 
   1282 	do {
   1283 		current = child;
   1284 
   1285 		NODENAME(current, &current_name);
   1286 		compared = dns_name_fullcompare(add_name, &current_name, &order,
   1287 						&common_labels);
   1288 
   1289 		if (compared == dns_namereln_equal) {
   1290 			*nodep = current;
   1291 			result = ISC_R_EXISTS;
   1292 			break;
   1293 		}
   1294 
   1295 		if (compared == dns_namereln_none) {
   1296 			if (order < 0) {
   1297 				parent = current;
   1298 				child = LEFT(current);
   1299 			} else if (order > 0) {
   1300 				parent = current;
   1301 				child = RIGHT(current);
   1302 			}
   1303 		} else {
   1304 			/*
   1305 			 * This name has some suffix in common with the
   1306 			 * name at the current node.  If the name at
   1307 			 * the current node is shorter, that means the
   1308 			 * new name should be in a subtree.  If the
   1309 			 * name at the current node is longer, that means
   1310 			 * the down pointer to this tree should point
   1311 			 * to a new tree that has the common suffix, and
   1312 			 * the non-common parts of these two names should
   1313 			 * start a new tree.
   1314 			 */
   1315 			hlabels += common_labels;
   1316 			if (compared == dns_namereln_subdomain) {
   1317 				/*
   1318 				 * All of the existing labels are in common,
   1319 				 * so the new name is in a subtree.
   1320 				 * Whack off the common labels for the
   1321 				 * not-in-common part to be searched for
   1322 				 * in the next level.
   1323 				 */
   1324 				dns_name_split(add_name, common_labels,
   1325 					       add_name, NULL);
   1326 
   1327 				/*
   1328 				 * Follow the down pointer (possibly NULL).
   1329 				 */
   1330 				root = &DOWN(current);
   1331 
   1332 				INSIST(*root == NULL ||
   1333 				       (IS_ROOT(*root) &&
   1334 					PARENT(*root) == current));
   1335 
   1336 				parent = NULL;
   1337 				child = DOWN(current);
   1338 
   1339 				INSIST(level_count < DNS_RBT_LEVELBLOCK);
   1340 				level_count++;
   1341 			} else {
   1342 				/*
   1343 				 * The number of labels in common is fewer
   1344 				 * than the number of labels at the current
   1345 				 * node, so the current node must be adjusted
   1346 				 * to have just the common suffix, and a down
   1347 				 * pointer made to a new tree.
   1348 				 */
   1349 
   1350 				INSIST(compared ==
   1351 					       dns_namereln_commonancestor ||
   1352 				       compared == dns_namereln_contains);
   1353 
   1354 				/*
   1355 				 * Ensure the number of levels in the tree
   1356 				 * does not exceed the number of logical
   1357 				 * levels allowed by DNSSEC.
   1358 				 *
   1359 				 * XXXDCL need a better error result?
   1360 				 */
   1361 				if (level_count >= DNS_RBT_LEVELBLOCK) {
   1362 					result = ISC_R_NOSPACE;
   1363 					break;
   1364 				}
   1365 
   1366 				/*
   1367 				 * Split the name into two parts, a prefix
   1368 				 * which is the not-in-common parts of the
   1369 				 * two names and a suffix that is the common
   1370 				 * parts of them.
   1371 				 */
   1372 				dns_name_split(&current_name, common_labels,
   1373 					       prefix, suffix);
   1374 				result = create_node(rbt->mctx, suffix,
   1375 						     &new_current);
   1376 
   1377 				if (result != ISC_R_SUCCESS) {
   1378 					break;
   1379 				}
   1380 
   1381 				/*
   1382 				 * Reproduce the tree attributes of the
   1383 				 * current node.
   1384 				 */
   1385 				new_current->is_root = current->is_root;
   1386 				if (current->nsec == DNS_RBT_NSEC_HAS_NSEC) {
   1387 					new_current->nsec = DNS_RBT_NSEC_NORMAL;
   1388 				} else {
   1389 					new_current->nsec = current->nsec;
   1390 				}
   1391 				PARENT(new_current) = PARENT(current);
   1392 				LEFT(new_current) = LEFT(current);
   1393 				RIGHT(new_current) = RIGHT(current);
   1394 				COLOR(new_current) = COLOR(current);
   1395 
   1396 				/*
   1397 				 * Fix pointers that were to the current node.
   1398 				 */
   1399 				if (parent != NULL) {
   1400 					if (LEFT(parent) == current) {
   1401 						LEFT(parent) = new_current;
   1402 					} else {
   1403 						RIGHT(parent) = new_current;
   1404 					}
   1405 				}
   1406 				if (LEFT(new_current) != NULL) {
   1407 					PARENT(LEFT(new_current)) = new_current;
   1408 				}
   1409 				if (RIGHT(new_current) != NULL) {
   1410 					PARENT(RIGHT(new_current)) =
   1411 						new_current;
   1412 				}
   1413 				if (*root == current) {
   1414 					*root = new_current;
   1415 				}
   1416 
   1417 				NAMELEN(current) = prefix->length;
   1418 				OFFSETLEN(current) = prefix->labels;
   1419 
   1420 				/*
   1421 				 * Set up the new root of the next level.
   1422 				 * By definition it will not be the top
   1423 				 * level tree, so clear DNS_NAMEATTR_ABSOLUTE.
   1424 				 */
   1425 				current->is_root = 1;
   1426 				PARENT(current) = new_current;
   1427 				DOWN(new_current) = current;
   1428 				root = &DOWN(new_current);
   1429 
   1430 				UPPERNODE(new_current) = UPPERNODE(current);
   1431 				UPPERNODE(current) = new_current;
   1432 
   1433 				INSIST(level_count < DNS_RBT_LEVELBLOCK);
   1434 				level_count++;
   1435 
   1436 				LEFT(current) = NULL;
   1437 				RIGHT(current) = NULL;
   1438 
   1439 				MAKE_BLACK(current);
   1440 				ATTRS(current) &= ~DNS_NAMEATTR_ABSOLUTE;
   1441 
   1442 				rbt->nodecount++;
   1443 				dns_name_getlabelsequence(name,
   1444 							  nlabels - hlabels,
   1445 							  hlabels, new_name);
   1446 				hash_node(rbt, new_current, new_name);
   1447 
   1448 				if (common_labels ==
   1449 				    dns_name_countlabels(add_name))
   1450 				{
   1451 					/*
   1452 					 * The name has been added by pushing
   1453 					 * the not-in-common parts down to
   1454 					 * a new level.
   1455 					 */
   1456 					*nodep = new_current;
   1457 					return (ISC_R_SUCCESS);
   1458 				} else {
   1459 					/*
   1460 					 * The current node has no data,
   1461 					 * because it is just a placeholder.
   1462 					 * Its data pointer is already NULL
   1463 					 * from create_node()), so there's
   1464 					 * nothing more to do to it.
   1465 					 */
   1466 
   1467 					/*
   1468 					 * The not-in-common parts of the new
   1469 					 * name will be inserted into the new
   1470 					 * level following this loop (unless
   1471 					 * result != ISC_R_SUCCESS, which
   1472 					 * is tested after the loop ends).
   1473 					 */
   1474 					dns_name_split(add_name, common_labels,
   1475 						       add_name, NULL);
   1476 
   1477 					break;
   1478 				}
   1479 			}
   1480 		}
   1481 	} while (ISC_LIKELY(child != NULL));
   1482 
   1483 	if (ISC_LIKELY(result == ISC_R_SUCCESS)) {
   1484 		result = create_node(rbt->mctx, add_name, &new_current);
   1485 	}
   1486 
   1487 	if (ISC_LIKELY(result == ISC_R_SUCCESS)) {
   1488 		if (*root == NULL) {
   1489 			UPPERNODE(new_current) = current;
   1490 		} else {
   1491 			UPPERNODE(new_current) = PARENT(*root);
   1492 		}
   1493 
   1494 		addonlevel(new_current, current, order, root);
   1495 		rbt->nodecount++;
   1496 		*nodep = new_current;
   1497 		hash_node(rbt, new_current, name);
   1498 	}
   1499 
   1500 	return (result);
   1501 }
   1502 
   1503 /*
   1504  * Add a name to the tree of trees, associating it with some data.
   1505  */
   1506 isc_result_t
   1507 dns_rbt_addname(dns_rbt_t *rbt, const dns_name_t *name, void *data) {
   1508 	isc_result_t result;
   1509 	dns_rbtnode_t *node;
   1510 
   1511 	REQUIRE(VALID_RBT(rbt));
   1512 	REQUIRE(dns_name_isabsolute(name));
   1513 
   1514 	node = NULL;
   1515 
   1516 	result = dns_rbt_addnode(rbt, name, &node);
   1517 
   1518 	/*
   1519 	 * dns_rbt_addnode will report the node exists even when
   1520 	 * it does not have data associated with it, but the
   1521 	 * dns_rbt_*name functions all behave depending on whether
   1522 	 * there is data associated with a node.
   1523 	 */
   1524 	if (result == ISC_R_SUCCESS ||
   1525 	    (result == ISC_R_EXISTS && DATA(node) == NULL))
   1526 	{
   1527 		DATA(node) = data;
   1528 		result = ISC_R_SUCCESS;
   1529 	}
   1530 
   1531 	return (result);
   1532 }
   1533 
   1534 /*
   1535  * Find the node for "name" in the tree of trees.
   1536  */
   1537 isc_result_t
   1538 dns_rbt_findnode(dns_rbt_t *rbt, const dns_name_t *name, dns_name_t *foundname,
   1539 		 dns_rbtnode_t **node, dns_rbtnodechain_t *chain,
   1540 		 unsigned int options, dns_rbtfindcallback_t callback,
   1541 		 void *callback_arg) {
   1542 	dns_rbtnode_t *current, *last_compared;
   1543 	dns_rbtnodechain_t localchain;
   1544 	dns_name_t *search_name, current_name, *callback_name;
   1545 	dns_fixedname_t fixedcallbackname, fixedsearchname;
   1546 	dns_namereln_t compared;
   1547 	isc_result_t result, saved_result;
   1548 	unsigned int common_labels;
   1549 	unsigned int hlabels = 0;
   1550 	int order;
   1551 
   1552 	REQUIRE(VALID_RBT(rbt));
   1553 	REQUIRE(dns_name_isabsolute(name));
   1554 	REQUIRE(node != NULL && *node == NULL);
   1555 	REQUIRE((options & (DNS_RBTFIND_NOEXACT | DNS_RBTFIND_NOPREDECESSOR)) !=
   1556 		(DNS_RBTFIND_NOEXACT | DNS_RBTFIND_NOPREDECESSOR));
   1557 
   1558 	/*
   1559 	 * If there is a chain it needs to appear to be in a sane state,
   1560 	 * otherwise a chain is still needed to generate foundname and
   1561 	 * callback_name.
   1562 	 */
   1563 	if (chain == NULL) {
   1564 		options |= DNS_RBTFIND_NOPREDECESSOR;
   1565 		chain = &localchain;
   1566 		dns_rbtnodechain_init(chain);
   1567 	} else {
   1568 		dns_rbtnodechain_reset(chain);
   1569 	}
   1570 
   1571 	if (ISC_UNLIKELY(rbt->root == NULL)) {
   1572 		return (ISC_R_NOTFOUND);
   1573 	}
   1574 
   1575 	/*
   1576 	 * Appease GCC about variables it incorrectly thinks are
   1577 	 * possibly used uninitialized.
   1578 	 */
   1579 	compared = dns_namereln_none;
   1580 	last_compared = NULL;
   1581 	order = 0;
   1582 
   1583 	callback_name = dns_fixedname_initname(&fixedcallbackname);
   1584 
   1585 	/*
   1586 	 * search_name is the name segment being sought in each tree level.
   1587 	 * By using a fixedname, the search_name will definitely have offsets
   1588 	 * for use by any splitting.
   1589 	 * By using dns_name_clone, no name data should be copied thanks to
   1590 	 * the lack of bitstring labels.
   1591 	 */
   1592 	search_name = dns_fixedname_initname(&fixedsearchname);
   1593 	INSIST(search_name != NULL);
   1594 	dns_name_clone(name, search_name);
   1595 
   1596 	dns_name_init(&current_name, NULL);
   1597 
   1598 	saved_result = ISC_R_SUCCESS;
   1599 	current = rbt->root;
   1600 
   1601 	while (ISC_LIKELY(current != NULL)) {
   1602 		NODENAME(current, &current_name);
   1603 		compared = dns_name_fullcompare(search_name, &current_name,
   1604 						&order, &common_labels);
   1605 		/*
   1606 		 * last_compared is used as a shortcut to start (or
   1607 		 * continue rather) finding the stop-node of the search
   1608 		 * when hashing was used (see much below in this
   1609 		 * function).
   1610 		 */
   1611 		last_compared = current;
   1612 
   1613 		if (compared == dns_namereln_equal) {
   1614 			break;
   1615 		}
   1616 
   1617 		if (compared == dns_namereln_none) {
   1618 			/*
   1619 			 * Here, current is pointing at a subtree root
   1620 			 * node. We try to find a matching node using
   1621 			 * the hashtable. We can get one of 3 results
   1622 			 * here: (a) we locate the matching node, (b) we
   1623 			 * find a node to which the current node has a
   1624 			 * subdomain relation, (c) we fail to find (a)
   1625 			 * or (b).
   1626 			 */
   1627 
   1628 			dns_name_t hash_name;
   1629 			dns_rbtnode_t *hnode;
   1630 			dns_rbtnode_t *up_current;
   1631 			unsigned int nlabels;
   1632 			unsigned int tlabels = 1;
   1633 			uint32_t hash;
   1634 
   1635 			/*
   1636 			 * The case of current not being a subtree root,
   1637 			 * that means a left or right pointer was
   1638 			 * followed, only happens when the algorithm
   1639 			 * fell through to the traditional binary search
   1640 			 * because of a bitstring label.  Since we
   1641 			 * dropped the bitstring support, this should
   1642 			 * not happen.
   1643 			 */
   1644 			INSIST(IS_ROOT(current));
   1645 
   1646 			nlabels = dns_name_countlabels(search_name);
   1647 
   1648 			/*
   1649 			 * current is the root of the current level, so
   1650 			 * its parent is the same as its "up" pointer.
   1651 			 */
   1652 			up_current = PARENT(current);
   1653 			dns_name_init(&hash_name, NULL);
   1654 
   1655 		hashagain:
   1656 			/*
   1657 			 * Compute the hash over the full absolute
   1658 			 * name. Look for the smallest suffix match at
   1659 			 * this tree level (hlevel), and then at every
   1660 			 * iteration, look for the next smallest suffix
   1661 			 * match (add another subdomain label to the
   1662 			 * absolute name being hashed).
   1663 			 */
   1664 			dns_name_getlabelsequence(name, nlabels - tlabels,
   1665 						  hlabels + tlabels,
   1666 						  &hash_name);
   1667 			hash = dns_name_fullhash(&hash_name, false);
   1668 			dns_name_getlabelsequence(search_name,
   1669 						  nlabels - tlabels, tlabels,
   1670 						  &hash_name);
   1671 
   1672 			/*
   1673 			 * Walk all the nodes in the hash bucket pointed
   1674 			 * by the computed hash value.
   1675 			 */
   1676 			for (hnode = rbt->hashtable[hash_32(hash,
   1677 							    rbt->hashbits)];
   1678 			     hnode != NULL; hnode = hnode->hashnext)
   1679 			{
   1680 				dns_name_t hnode_name;
   1681 
   1682 				if (ISC_LIKELY(hash != HASHVAL(hnode))) {
   1683 					continue;
   1684 				}
   1685 				/*
   1686 				 * This checks that the hashed label
   1687 				 * sequence being looked up is at the
   1688 				 * same tree level, so that we don't
   1689 				 * match a labelsequence from some other
   1690 				 * subdomain.
   1691 				 */
   1692 				if (ISC_LIKELY(get_upper_node(hnode) !=
   1693 					       up_current))
   1694 				{
   1695 					continue;
   1696 				}
   1697 
   1698 				dns_name_init(&hnode_name, NULL);
   1699 				NODENAME(hnode, &hnode_name);
   1700 				if (ISC_LIKELY(dns_name_equal(&hnode_name,
   1701 							      &hash_name)))
   1702 				{
   1703 					break;
   1704 				}
   1705 			}
   1706 
   1707 			if (hnode != NULL) {
   1708 				current = hnode;
   1709 				/*
   1710 				 * This is an optimization.  If hashing found
   1711 				 * the right node, the next call to
   1712 				 * dns_name_fullcompare() would obviously
   1713 				 * return _equal or _subdomain.  Determine
   1714 				 * which of those would be the case by
   1715 				 * checking if the full name was hashed.  Then
   1716 				 * make it look like dns_name_fullcompare
   1717 				 * was called and jump to the right place.
   1718 				 */
   1719 				if (tlabels == nlabels) {
   1720 					compared = dns_namereln_equal;
   1721 					break;
   1722 				} else {
   1723 					common_labels = tlabels;
   1724 					compared = dns_namereln_subdomain;
   1725 					goto subdomain;
   1726 				}
   1727 			}
   1728 
   1729 			if (tlabels++ < nlabels) {
   1730 				goto hashagain;
   1731 			}
   1732 
   1733 			/*
   1734 			 * All of the labels have been tried against the hash
   1735 			 * table.  Since we dropped the support of bitstring
   1736 			 * labels, the name isn't in the table.
   1737 			 */
   1738 			current = NULL;
   1739 			continue;
   1740 		} else {
   1741 			/*
   1742 			 * The names have some common suffix labels.
   1743 			 *
   1744 			 * If the number in common are equal in length to
   1745 			 * the current node's name length, then follow the
   1746 			 * down pointer and search in the new tree.
   1747 			 */
   1748 			if (compared == dns_namereln_subdomain) {
   1749 			subdomain:
   1750 				/*
   1751 				 * Whack off the current node's common parts
   1752 				 * for the name to search in the next level.
   1753 				 */
   1754 				dns_name_split(search_name, common_labels,
   1755 					       search_name, NULL);
   1756 				hlabels += common_labels;
   1757 				/*
   1758 				 * This might be the closest enclosing name.
   1759 				 */
   1760 				if (WANTEMPTYDATA_OR_DATA(options, current)) {
   1761 					*node = current;
   1762 				}
   1763 
   1764 				/*
   1765 				 * Point the chain to the next level.   This
   1766 				 * needs to be done before 'current' is pointed
   1767 				 * there because the callback in the next
   1768 				 * block of code needs the current 'current',
   1769 				 * but in the event the callback requests that
   1770 				 * the search be stopped then the
   1771 				 * DNS_R_PARTIALMATCH code at the end of this
   1772 				 * function needs the chain pointed to the
   1773 				 * next level.
   1774 				 */
   1775 				ADD_LEVEL(chain, current);
   1776 
   1777 				/*
   1778 				 * The caller may want to interrupt the
   1779 				 * downward search when certain special nodes
   1780 				 * are traversed.  If this is a special node,
   1781 				 * the callback is used to learn what the
   1782 				 * caller wants to do.
   1783 				 */
   1784 				if (callback != NULL && FINDCALLBACK(current)) {
   1785 					result = chain_name(
   1786 						chain, callback_name, false);
   1787 					if (result != ISC_R_SUCCESS) {
   1788 						dns_rbtnodechain_reset(chain);
   1789 						return (result);
   1790 					}
   1791 
   1792 					result = (callback)(current,
   1793 							    callback_name,
   1794 							    callback_arg);
   1795 					if (result != DNS_R_CONTINUE) {
   1796 						saved_result = result;
   1797 						/*
   1798 						 * Treat this node as if it
   1799 						 * had no down pointer.
   1800 						 */
   1801 						current = NULL;
   1802 						break;
   1803 					}
   1804 				}
   1805 
   1806 				/*
   1807 				 * Finally, head to the next tree level.
   1808 				 */
   1809 				current = DOWN(current);
   1810 			} else {
   1811 				/*
   1812 				 * Though there are labels in common, the
   1813 				 * entire name at this node is not common
   1814 				 * with the search name so the search
   1815 				 * name does not exist in the tree.
   1816 				 */
   1817 				INSIST(compared ==
   1818 					       dns_namereln_commonancestor ||
   1819 				       compared == dns_namereln_contains);
   1820 
   1821 				current = NULL;
   1822 			}
   1823 		}
   1824 	}
   1825 
   1826 	/*
   1827 	 * If current is not NULL, NOEXACT is not disallowing exact matches,
   1828 	 * and either the node has data or an empty node is ok, return
   1829 	 * ISC_R_SUCCESS to indicate an exact match.
   1830 	 */
   1831 	if (current != NULL && (options & DNS_RBTFIND_NOEXACT) == 0 &&
   1832 	    WANTEMPTYDATA_OR_DATA(options, current))
   1833 	{
   1834 		/*
   1835 		 * Found an exact match.
   1836 		 */
   1837 		chain->end = current;
   1838 		chain->level_matches = chain->level_count;
   1839 
   1840 		if (foundname != NULL) {
   1841 			result = chain_name(chain, foundname, true);
   1842 		} else {
   1843 			result = ISC_R_SUCCESS;
   1844 		}
   1845 
   1846 		if (result == ISC_R_SUCCESS) {
   1847 			*node = current;
   1848 			result = saved_result;
   1849 		} else {
   1850 			*node = NULL;
   1851 		}
   1852 	} else {
   1853 		/*
   1854 		 * Did not find an exact match (or did not want one).
   1855 		 */
   1856 		if (*node != NULL) {
   1857 			/*
   1858 			 * ... but found a partially matching superdomain.
   1859 			 * Unwind the chain to the partial match node
   1860 			 * to set level_matches to the level above the node,
   1861 			 * and then to derive the name.
   1862 			 *
   1863 			 * chain->level_count is guaranteed to be at least 1
   1864 			 * here because by definition of finding a superdomain,
   1865 			 * the chain is pointed to at least the first subtree.
   1866 			 */
   1867 			chain->level_matches = chain->level_count - 1;
   1868 
   1869 			while (chain->levels[chain->level_matches] != *node) {
   1870 				INSIST(chain->level_matches > 0);
   1871 				chain->level_matches--;
   1872 			}
   1873 
   1874 			if (foundname != NULL) {
   1875 				unsigned int saved_count = chain->level_count;
   1876 
   1877 				chain->level_count = chain->level_matches + 1;
   1878 
   1879 				result = chain_name(chain, foundname, false);
   1880 
   1881 				chain->level_count = saved_count;
   1882 			} else {
   1883 				result = ISC_R_SUCCESS;
   1884 			}
   1885 
   1886 			if (result == ISC_R_SUCCESS) {
   1887 				result = DNS_R_PARTIALMATCH;
   1888 			}
   1889 		} else {
   1890 			result = ISC_R_NOTFOUND;
   1891 		}
   1892 
   1893 		if (current != NULL) {
   1894 			/*
   1895 			 * There was an exact match but either
   1896 			 * DNS_RBTFIND_NOEXACT was set, or
   1897 			 * DNS_RBTFIND_EMPTYDATA was set and the node had no
   1898 			 * data.  A policy decision was made to set the
   1899 			 * chain to the exact match, but this is subject
   1900 			 * to change if it becomes apparent that something
   1901 			 * else would be more useful.  It is important that
   1902 			 * this case is handled here, because the predecessor
   1903 			 * setting code below assumes the match was not exact.
   1904 			 */
   1905 			INSIST(((options & DNS_RBTFIND_NOEXACT) != 0) ||
   1906 			       ((options & DNS_RBTFIND_EMPTYDATA) == 0 &&
   1907 				DATA(current) == NULL));
   1908 			chain->end = current;
   1909 		} else if ((options & DNS_RBTFIND_NOPREDECESSOR) != 0) {
   1910 			/*
   1911 			 * Ensure the chain points nowhere.
   1912 			 */
   1913 			chain->end = NULL;
   1914 		} else {
   1915 			/*
   1916 			 * Since there was no exact match, the chain argument
   1917 			 * needs to be pointed at the DNSSEC predecessor of
   1918 			 * the search name.
   1919 			 */
   1920 			if (compared == dns_namereln_subdomain) {
   1921 				/*
   1922 				 * Attempted to follow a down pointer that was
   1923 				 * NULL, which means the searched for name was
   1924 				 * a subdomain of a terminal name in the tree.
   1925 				 * Since there are no existing subdomains to
   1926 				 * order against, the terminal name is the
   1927 				 * predecessor.
   1928 				 */
   1929 				INSIST(chain->level_count > 0);
   1930 				INSIST(chain->level_matches <
   1931 				       chain->level_count);
   1932 				chain->end =
   1933 					chain->levels[--chain->level_count];
   1934 			} else {
   1935 				isc_result_t result2;
   1936 
   1937 				/*
   1938 				 * Point current to the node that stopped
   1939 				 * the search.
   1940 				 *
   1941 				 * With the hashing modification that has been
   1942 				 * added to the algorithm, the stop node of a
   1943 				 * standard binary search is not known.  So it
   1944 				 * has to be found.  There is probably a more
   1945 				 * clever way of doing this.
   1946 				 *
   1947 				 * The assignment of current to NULL when
   1948 				 * the relationship is *not* dns_namereln_none,
   1949 				 * even though it later gets set to the same
   1950 				 * last_compared anyway, is simply to not push
   1951 				 * the while loop in one more level of
   1952 				 * indentation.
   1953 				 */
   1954 				if (compared == dns_namereln_none) {
   1955 					current = last_compared;
   1956 				} else {
   1957 					current = NULL;
   1958 				}
   1959 
   1960 				while (current != NULL) {
   1961 					NODENAME(current, &current_name);
   1962 					compared = dns_name_fullcompare(
   1963 						search_name, &current_name,
   1964 						&order, &common_labels);
   1965 					POST(compared);
   1966 
   1967 					last_compared = current;
   1968 
   1969 					/*
   1970 					 * Standard binary search movement.
   1971 					 */
   1972 					if (order < 0) {
   1973 						current = LEFT(current);
   1974 					} else {
   1975 						current = RIGHT(current);
   1976 					}
   1977 				}
   1978 
   1979 				current = last_compared;
   1980 
   1981 				/*
   1982 				 * Reached a point within a level tree that
   1983 				 * positively indicates the name is not
   1984 				 * present, but the stop node could be either
   1985 				 * less than the desired name (order > 0) or
   1986 				 * greater than the desired name (order < 0).
   1987 				 *
   1988 				 * If the stop node is less, it is not
   1989 				 * necessarily the predecessor.  If the stop
   1990 				 * node has a down pointer, then the real
   1991 				 * predecessor is at the end of a level below
   1992 				 * (not necessarily the next level).
   1993 				 * Move down levels until the rightmost node
   1994 				 * does not have a down pointer.
   1995 				 *
   1996 				 * When the stop node is greater, it is
   1997 				 * the successor.  All the logic for finding
   1998 				 * the predecessor is handily encapsulated
   1999 				 * in dns_rbtnodechain_prev.  In the event
   2000 				 * that the search name is less than anything
   2001 				 * else in the tree, the chain is reset.
   2002 				 * XXX DCL What is the best way for the caller
   2003 				 *         to know that the search name has
   2004 				 *         no predecessor?
   2005 				 */
   2006 
   2007 				if (order > 0) {
   2008 					if (DOWN(current) != NULL) {
   2009 						ADD_LEVEL(chain, current);
   2010 
   2011 						result2 = move_chain_to_last(
   2012 							chain, DOWN(current));
   2013 
   2014 						if (result2 != ISC_R_SUCCESS) {
   2015 							result = result2;
   2016 						}
   2017 					} else {
   2018 						/*
   2019 						 * Ah, the pure and simple
   2020 						 * case.  The stop node is the
   2021 						 * predecessor.
   2022 						 */
   2023 						chain->end = current;
   2024 					}
   2025 				} else {
   2026 					INSIST(order < 0);
   2027 
   2028 					chain->end = current;
   2029 
   2030 					result2 = dns_rbtnodechain_prev(
   2031 						chain, NULL, NULL);
   2032 					if (result2 == ISC_R_SUCCESS ||
   2033 					    result2 == DNS_R_NEWORIGIN)
   2034 					{
   2035 						/* Nothing. */
   2036 					} else if (result2 == ISC_R_NOMORE) {
   2037 						/*
   2038 						 * There is no predecessor.
   2039 						 */
   2040 						dns_rbtnodechain_reset(chain);
   2041 					} else {
   2042 						result = result2;
   2043 					}
   2044 				}
   2045 			}
   2046 		}
   2047 	}
   2048 
   2049 	ENSURE(*node == NULL || DNS_RBTNODE_VALID(*node));
   2050 
   2051 	return (result);
   2052 }
   2053 
   2054 /*
   2055  * Get the data pointer associated with 'name'.
   2056  */
   2057 isc_result_t
   2058 dns_rbt_findname(dns_rbt_t *rbt, const dns_name_t *name, unsigned int options,
   2059 		 dns_name_t *foundname, void **data) {
   2060 	dns_rbtnode_t *node = NULL;
   2061 	isc_result_t result;
   2062 
   2063 	REQUIRE(data != NULL && *data == NULL);
   2064 
   2065 	result = dns_rbt_findnode(rbt, name, foundname, &node, NULL, options,
   2066 				  NULL, NULL);
   2067 
   2068 	if (node != NULL && WANTEMPTYDATA_OR_DATA(options, node)) {
   2069 		*data = DATA(node);
   2070 	} else {
   2071 		result = ISC_R_NOTFOUND;
   2072 	}
   2073 
   2074 	return (result);
   2075 }
   2076 
   2077 /*
   2078  * Delete a name from the tree of trees.
   2079  */
   2080 isc_result_t
   2081 dns_rbt_deletename(dns_rbt_t *rbt, const dns_name_t *name, bool recurse) {
   2082 	dns_rbtnode_t *node = NULL;
   2083 	isc_result_t result;
   2084 
   2085 	REQUIRE(VALID_RBT(rbt));
   2086 	REQUIRE(dns_name_isabsolute(name));
   2087 
   2088 	/*
   2089 	 * First, find the node.
   2090 	 *
   2091 	 * When searching, the name might not have an exact match:
   2092 	 * consider a.b.a.com, b.b.a.com and c.b.a.com as the only
   2093 	 * elements of a tree, which would make layer 1 a single
   2094 	 * node tree of "b.a.com" and layer 2 a three node tree of
   2095 	 * a, b, and c.  Deleting a.com would find only a partial depth
   2096 	 * match in the first layer.  Should it be a requirement that
   2097 	 * that the name to be deleted have data?  For now, it is.
   2098 	 *
   2099 	 * ->dirty, ->locknum and ->references are ignored; they are
   2100 	 * solely the province of rbtdb.c.
   2101 	 */
   2102 	result = dns_rbt_findnode(rbt, name, NULL, &node, NULL,
   2103 				  DNS_RBTFIND_NOOPTIONS, NULL, NULL);
   2104 
   2105 	if (result == ISC_R_SUCCESS) {
   2106 		if (DATA(node) != NULL) {
   2107 			result = dns_rbt_deletenode(rbt, node, recurse);
   2108 		} else {
   2109 			result = ISC_R_NOTFOUND;
   2110 		}
   2111 	} else if (result == DNS_R_PARTIALMATCH) {
   2112 		result = ISC_R_NOTFOUND;
   2113 	}
   2114 
   2115 	return (result);
   2116 }
   2117 
   2118 /*
   2119  * Remove a node from the tree of trees.
   2120  *
   2121  * NOTE WELL: deletion is *not* symmetric with addition; that is, reversing
   2122  * a sequence of additions to be deletions will not generally get the
   2123  * tree back to the state it started in.  For example, if the addition
   2124  * of "b.c" caused the node "a.b.c" to be split, pushing "a" to its own level,
   2125  * then the subsequent deletion of "b.c" will not cause "a" to be pulled up,
   2126  * restoring "a.b.c".  The RBT *used* to do this kind of rejoining, but it
   2127  * turned out to be a bad idea because it could corrupt an active nodechain
   2128  * that had "b.c" as one of its levels -- and the RBT has no idea what
   2129  * nodechains are in use by callers, so it can't even *try* to helpfully
   2130  * fix them up (which would probably be doomed to failure anyway).
   2131  *
   2132  * Similarly, it is possible to leave the tree in a state where a supposedly
   2133  * deleted node still exists.  The first case of this is obvious; take
   2134  * the tree which has "b.c" on one level, pointing to "a".  Now deleted "b.c".
   2135  * It was just established in the previous paragraph why we can't pull "a"
   2136  * back up to its parent level.  But what happens when "a" then gets deleted?
   2137  * "b.c" is left hanging around without data or children.  This condition
   2138  * is actually pretty easy to detect, but ... should it really be removed?
   2139  * Is a chain pointing to it?  An iterator?  Who knows!  (Note that the
   2140  * references structure member cannot be looked at because it is private to
   2141  * rbtdb.)  This is ugly and makes me unhappy, but after hours of trying to
   2142  * make it more aesthetically proper and getting nowhere, this is the way it
   2143  * is going to stay until such time as it proves to be a *real* problem.
   2144  *
   2145  * Finally, for reference, note that the original routine that did node
   2146  * joining was called join_nodes().  It has been excised, living now only
   2147  * in the CVS history, but comments have been left behind that point to it just
   2148  * in case someone wants to muck with this some more.
   2149  *
   2150  * The one positive aspect of all of this is that joining used to have a
   2151  * case where it might fail.  Without trying to join, now this function always
   2152  * succeeds. It still returns isc_result_t, though, so the API wouldn't change.
   2153  */
   2154 isc_result_t
   2155 dns_rbt_deletenode(dns_rbt_t *rbt, dns_rbtnode_t *node, bool recurse) {
   2156 	dns_rbtnode_t *parent;
   2157 
   2158 	REQUIRE(VALID_RBT(rbt));
   2159 	REQUIRE(DNS_RBTNODE_VALID(node));
   2160 	INSIST(rbt->nodecount != 0);
   2161 
   2162 	if (DOWN(node) != NULL) {
   2163 		if (recurse) {
   2164 			PARENT(DOWN(node)) = NULL;
   2165 			deletetreeflat(rbt, 0, true, &DOWN(node));
   2166 		} else {
   2167 			if (DATA(node) != NULL && rbt->data_deleter != NULL) {
   2168 				rbt->data_deleter(DATA(node), rbt->deleter_arg);
   2169 			}
   2170 			DATA(node) = NULL;
   2171 
   2172 			/*
   2173 			 * Since there is at least one node below this one and
   2174 			 * no recursion was requested, the deletion is
   2175 			 * complete.  The down node from this node might be all
   2176 			 * by itself on a single level, so join_nodes() could
   2177 			 * be used to collapse the tree (with all the caveats
   2178 			 * of the comment at the start of this function).
   2179 			 * But join_nodes() function has now been removed.
   2180 			 */
   2181 			return (ISC_R_SUCCESS);
   2182 		}
   2183 	}
   2184 
   2185 	/*
   2186 	 * Note the node that points to the level of the node
   2187 	 * that is being deleted.  If the deleted node is the
   2188 	 * top level, parent will be set to NULL.
   2189 	 */
   2190 	parent = get_upper_node(node);
   2191 
   2192 	/*
   2193 	 * This node now has no down pointer, so now it needs
   2194 	 * to be removed from this level.
   2195 	 */
   2196 	deletefromlevel(node, parent == NULL ? &rbt->root : &DOWN(parent));
   2197 
   2198 	if (DATA(node) != NULL && rbt->data_deleter != NULL) {
   2199 		rbt->data_deleter(DATA(node), rbt->deleter_arg);
   2200 	}
   2201 
   2202 	unhash_node(rbt, node);
   2203 #if DNS_RBT_USEMAGIC
   2204 	node->magic = 0;
   2205 #endif /* if DNS_RBT_USEMAGIC */
   2206 	isc_refcount_destroy(&node->references);
   2207 
   2208 	freenode(rbt, &node);
   2209 
   2210 	/*
   2211 	 * This function never fails.
   2212 	 */
   2213 	return (ISC_R_SUCCESS);
   2214 }
   2215 
   2216 void
   2217 dns_rbt_namefromnode(dns_rbtnode_t *node, dns_name_t *name) {
   2218 	REQUIRE(DNS_RBTNODE_VALID(node));
   2219 	REQUIRE(name != NULL);
   2220 	REQUIRE(name->offsets == NULL);
   2221 
   2222 	NODENAME(node, name);
   2223 }
   2224 
   2225 isc_result_t
   2226 dns_rbt_fullnamefromnode(dns_rbtnode_t *node, dns_name_t *name) {
   2227 	dns_name_t current;
   2228 	isc_result_t result;
   2229 
   2230 	REQUIRE(DNS_RBTNODE_VALID(node));
   2231 	REQUIRE(name != NULL);
   2232 	REQUIRE(name->buffer != NULL);
   2233 
   2234 	dns_name_init(&current, NULL);
   2235 	dns_name_reset(name);
   2236 
   2237 	do {
   2238 		INSIST(node != NULL);
   2239 
   2240 		NODENAME(node, &current);
   2241 
   2242 		result = dns_name_concatenate(name, &current, name, NULL);
   2243 		if (result != ISC_R_SUCCESS) {
   2244 			break;
   2245 		}
   2246 
   2247 		node = get_upper_node(node);
   2248 	} while (!dns_name_isabsolute(name));
   2249 
   2250 	return (result);
   2251 }
   2252 
   2253 char *
   2254 dns_rbt_formatnodename(dns_rbtnode_t *node, char *printname,
   2255 		       unsigned int size) {
   2256 	dns_fixedname_t fixedname;
   2257 	dns_name_t *name;
   2258 	isc_result_t result;
   2259 
   2260 	REQUIRE(DNS_RBTNODE_VALID(node));
   2261 	REQUIRE(printname != NULL);
   2262 
   2263 	name = dns_fixedname_initname(&fixedname);
   2264 	result = dns_rbt_fullnamefromnode(node, name);
   2265 	if (result == ISC_R_SUCCESS) {
   2266 		dns_name_format(name, printname, size);
   2267 	} else {
   2268 		snprintf(printname, size, "<error building name: %s>",
   2269 			 dns_result_totext(result));
   2270 	}
   2271 
   2272 	return (printname);
   2273 }
   2274 
   2275 static isc_result_t
   2276 create_node(isc_mem_t *mctx, const dns_name_t *name, dns_rbtnode_t **nodep) {
   2277 	dns_rbtnode_t *node;
   2278 	isc_region_t region;
   2279 	unsigned int labels;
   2280 	size_t nodelen;
   2281 
   2282 	REQUIRE(name->offsets != NULL);
   2283 
   2284 	dns_name_toregion(name, &region);
   2285 	labels = dns_name_countlabels(name);
   2286 	ENSURE(labels > 0);
   2287 
   2288 	/*
   2289 	 * Allocate space for the node structure, the name, and the offsets.
   2290 	 */
   2291 	nodelen = sizeof(dns_rbtnode_t) + region.length + labels + 1;
   2292 	node = isc_mem_get(mctx, nodelen);
   2293 	memset(node, 0, nodelen);
   2294 
   2295 	node->is_root = 0;
   2296 	PARENT(node) = NULL;
   2297 	RIGHT(node) = NULL;
   2298 	LEFT(node) = NULL;
   2299 	DOWN(node) = NULL;
   2300 	DATA(node) = NULL;
   2301 	node->is_mmapped = 0;
   2302 	node->down_is_relative = 0;
   2303 	node->left_is_relative = 0;
   2304 	node->right_is_relative = 0;
   2305 	node->parent_is_relative = 0;
   2306 	node->data_is_relative = 0;
   2307 	node->rpz = 0;
   2308 
   2309 	HASHNEXT(node) = NULL;
   2310 	HASHVAL(node) = 0;
   2311 
   2312 	ISC_LINK_INIT(node, deadlink);
   2313 	ISC_LINK_INIT(node, prunelink);
   2314 
   2315 	LOCKNUM(node) = 0;
   2316 	WILD(node) = 0;
   2317 	DIRTY(node) = 0;
   2318 	isc_refcount_init(&node->references, 0);
   2319 	node->find_callback = 0;
   2320 	node->nsec = DNS_RBT_NSEC_NORMAL;
   2321 
   2322 	MAKE_BLACK(node);
   2323 
   2324 	/*
   2325 	 * The following is stored to make reconstructing a name from the
   2326 	 * stored value in the node easy:  the length of the name, the number
   2327 	 * of labels, whether the name is absolute or not, the name itself,
   2328 	 * and the name's offsets table.
   2329 	 *
   2330 	 * XXX RTH
   2331 	 *      The offsets table could be made smaller by eliminating the
   2332 	 *      first offset, which is always 0.  This requires changes to
   2333 	 *      lib/dns/name.c.
   2334 	 *
   2335 	 * Note: OLDOFFSETLEN *must* be assigned *after* OLDNAMELEN is assigned
   2336 	 * 	 as it uses OLDNAMELEN.
   2337 	 */
   2338 	OLDNAMELEN(node) = NAMELEN(node) = region.length;
   2339 	OLDOFFSETLEN(node) = OFFSETLEN(node) = labels;
   2340 	ATTRS(node) = name->attributes;
   2341 
   2342 	memmove(NAME(node), region.base, region.length);
   2343 	memmove(OFFSETS(node), name->offsets, labels);
   2344 
   2345 #if DNS_RBT_USEMAGIC
   2346 	node->magic = DNS_RBTNODE_MAGIC;
   2347 #endif /* if DNS_RBT_USEMAGIC */
   2348 	*nodep = node;
   2349 
   2350 	return (ISC_R_SUCCESS);
   2351 }
   2352 
   2353 /*
   2354  * Add a node to the hash table
   2355  */
   2356 static void
   2357 hash_add_node(dns_rbt_t *rbt, dns_rbtnode_t *node, const dns_name_t *name) {
   2358 	uint32_t hash;
   2359 
   2360 	REQUIRE(name != NULL);
   2361 
   2362 	HASHVAL(node) = dns_name_fullhash(name, false);
   2363 
   2364 	hash = hash_32(HASHVAL(node), rbt->hashbits);
   2365 	HASHNEXT(node) = rbt->hashtable[hash];
   2366 
   2367 	rbt->hashtable[hash] = node;
   2368 }
   2369 
   2370 /*
   2371  * Initialize hash table
   2372  */
   2373 static isc_result_t
   2374 inithash(dns_rbt_t *rbt) {
   2375 	size_t size;
   2376 
   2377 	rbt->hashbits = RBT_HASH_MIN_BITS;
   2378 	size = HASHSIZE(rbt->hashbits) * sizeof(dns_rbtnode_t *);
   2379 	rbt->hashtable = isc_mem_get(rbt->mctx, size);
   2380 	memset(rbt->hashtable, 0, size);
   2381 
   2382 	return (ISC_R_SUCCESS);
   2383 }
   2384 
   2385 static uint32_t
   2386 rehash_bits(dns_rbt_t *rbt, size_t newcount) {
   2387 	uint32_t newbits = rbt->hashbits;
   2388 
   2389 	while (newcount >= HASHSIZE(newbits) && newbits < RBT_HASH_MAX_BITS) {
   2390 		newbits += 1;
   2391 	}
   2392 
   2393 	return (newbits);
   2394 }
   2395 
   2396 /*
   2397  * Rebuild the hashtable to reduce the load factor
   2398  */
   2399 static void
   2400 rehash(dns_rbt_t *rbt, uint32_t newbits) {
   2401 	uint32_t oldbits;
   2402 	size_t oldsize;
   2403 	dns_rbtnode_t **oldtable;
   2404 	size_t newsize;
   2405 
   2406 	REQUIRE(rbt->hashbits <= rbt->maxhashbits);
   2407 	REQUIRE(newbits <= rbt->maxhashbits);
   2408 
   2409 	oldbits = rbt->hashbits;
   2410 	oldsize = HASHSIZE(oldbits);
   2411 	oldtable = rbt->hashtable;
   2412 
   2413 	rbt->hashbits = newbits;
   2414 	newsize = HASHSIZE(rbt->hashbits);
   2415 	rbt->hashtable = isc_mem_get(rbt->mctx,
   2416 				     newsize * sizeof(dns_rbtnode_t *));
   2417 	memset(rbt->hashtable, 0, newsize * sizeof(dns_rbtnode_t *));
   2418 
   2419 	for (size_t i = 0; i < oldsize; i++) {
   2420 		dns_rbtnode_t *node;
   2421 		dns_rbtnode_t *nextnode;
   2422 		for (node = oldtable[i]; node != NULL; node = nextnode) {
   2423 			uint32_t hash = hash_32(HASHVAL(node), rbt->hashbits);
   2424 			nextnode = HASHNEXT(node);
   2425 			HASHNEXT(node) = rbt->hashtable[hash];
   2426 			rbt->hashtable[hash] = node;
   2427 		}
   2428 	}
   2429 
   2430 	isc_mem_put(rbt->mctx, oldtable, oldsize * sizeof(dns_rbtnode_t *));
   2431 }
   2432 
   2433 static void
   2434 maybe_rehash(dns_rbt_t *rbt, size_t newcount) {
   2435 	uint32_t newbits = rehash_bits(rbt, newcount);
   2436 	if (rbt->hashbits < newbits && newbits <= rbt->maxhashbits) {
   2437 		rehash(rbt, newbits);
   2438 	}
   2439 }
   2440 
   2441 /*
   2442  * Add a node to the hash table. Rehash the hashtable if the node count
   2443  * rises above a critical level.
   2444  */
   2445 static void
   2446 hash_node(dns_rbt_t *rbt, dns_rbtnode_t *node, const dns_name_t *name) {
   2447 	REQUIRE(DNS_RBTNODE_VALID(node));
   2448 
   2449 	if (rbt->nodecount >= (HASHSIZE(rbt->hashbits) * RBT_HASH_OVERCOMMIT)) {
   2450 		maybe_rehash(rbt, rbt->nodecount);
   2451 	}
   2452 
   2453 	hash_add_node(rbt, node, name);
   2454 }
   2455 
   2456 /*
   2457  * Remove a node from the hash table
   2458  */
   2459 static void
   2460 unhash_node(dns_rbt_t *rbt, dns_rbtnode_t *node) {
   2461 	uint32_t bucket;
   2462 	dns_rbtnode_t *bucket_node;
   2463 
   2464 	REQUIRE(DNS_RBTNODE_VALID(node));
   2465 
   2466 	bucket = hash_32(HASHVAL(node), rbt->hashbits);
   2467 	bucket_node = rbt->hashtable[bucket];
   2468 
   2469 	if (bucket_node == node) {
   2470 		rbt->hashtable[bucket] = HASHNEXT(node);
   2471 	} else {
   2472 		while (HASHNEXT(bucket_node) != node) {
   2473 			INSIST(HASHNEXT(bucket_node) != NULL);
   2474 			bucket_node = HASHNEXT(bucket_node);
   2475 		}
   2476 		HASHNEXT(bucket_node) = HASHNEXT(node);
   2477 	}
   2478 }
   2479 
   2480 static void
   2481 rotate_left(dns_rbtnode_t *node, dns_rbtnode_t **rootp) {
   2482 	dns_rbtnode_t *child;
   2483 
   2484 	REQUIRE(DNS_RBTNODE_VALID(node));
   2485 	REQUIRE(rootp != NULL);
   2486 
   2487 	child = RIGHT(node);
   2488 	INSIST(child != NULL);
   2489 
   2490 	RIGHT(node) = LEFT(child);
   2491 	if (LEFT(child) != NULL) {
   2492 		PARENT(LEFT(child)) = node;
   2493 	}
   2494 	LEFT(child) = node;
   2495 
   2496 	PARENT(child) = PARENT(node);
   2497 
   2498 	if (IS_ROOT(node)) {
   2499 		*rootp = child;
   2500 		child->is_root = 1;
   2501 		node->is_root = 0;
   2502 	} else {
   2503 		if (LEFT(PARENT(node)) == node) {
   2504 			LEFT(PARENT(node)) = child;
   2505 		} else {
   2506 			RIGHT(PARENT(node)) = child;
   2507 		}
   2508 	}
   2509 
   2510 	PARENT(node) = child;
   2511 }
   2512 
   2513 static void
   2514 rotate_right(dns_rbtnode_t *node, dns_rbtnode_t **rootp) {
   2515 	dns_rbtnode_t *child;
   2516 
   2517 	REQUIRE(DNS_RBTNODE_VALID(node));
   2518 	REQUIRE(rootp != NULL);
   2519 
   2520 	child = LEFT(node);
   2521 	INSIST(child != NULL);
   2522 
   2523 	LEFT(node) = RIGHT(child);
   2524 	if (RIGHT(child) != NULL) {
   2525 		PARENT(RIGHT(child)) = node;
   2526 	}
   2527 	RIGHT(child) = node;
   2528 
   2529 	PARENT(child) = PARENT(node);
   2530 
   2531 	if (IS_ROOT(node)) {
   2532 		*rootp = child;
   2533 		child->is_root = 1;
   2534 		node->is_root = 0;
   2535 	} else {
   2536 		if (LEFT(PARENT(node)) == node) {
   2537 			LEFT(PARENT(node)) = child;
   2538 		} else {
   2539 			RIGHT(PARENT(node)) = child;
   2540 		}
   2541 	}
   2542 
   2543 	PARENT(node) = child;
   2544 }
   2545 
   2546 /*
   2547  * This is the real workhorse of the insertion code, because it does the
   2548  * true red/black tree on a single level.
   2549  */
   2550 static void
   2551 addonlevel(dns_rbtnode_t *node, dns_rbtnode_t *current, int order,
   2552 	   dns_rbtnode_t **rootp) {
   2553 	dns_rbtnode_t *child, *root, *parent, *grandparent;
   2554 	dns_name_t add_name, current_name;
   2555 	dns_offsets_t add_offsets, current_offsets;
   2556 
   2557 	REQUIRE(rootp != NULL);
   2558 	REQUIRE(DNS_RBTNODE_VALID(node) && LEFT(node) == NULL &&
   2559 		RIGHT(node) == NULL);
   2560 	REQUIRE(current != NULL);
   2561 
   2562 	root = *rootp;
   2563 	if (root == NULL) {
   2564 		/*
   2565 		 * First node of a level.
   2566 		 */
   2567 		MAKE_BLACK(node);
   2568 		node->is_root = 1;
   2569 		PARENT(node) = current;
   2570 		*rootp = node;
   2571 		return;
   2572 	}
   2573 
   2574 	child = root;
   2575 	POST(child);
   2576 
   2577 	dns_name_init(&add_name, add_offsets);
   2578 	NODENAME(node, &add_name);
   2579 
   2580 	dns_name_init(&current_name, current_offsets);
   2581 	NODENAME(current, &current_name);
   2582 
   2583 	if (order < 0) {
   2584 		INSIST(LEFT(current) == NULL);
   2585 		LEFT(current) = node;
   2586 	} else {
   2587 		INSIST(RIGHT(current) == NULL);
   2588 		RIGHT(current) = node;
   2589 	}
   2590 
   2591 	INSIST(PARENT(node) == NULL);
   2592 	PARENT(node) = current;
   2593 
   2594 	MAKE_RED(node);
   2595 
   2596 	while (node != root && IS_RED(PARENT(node))) {
   2597 		/*
   2598 		 * XXXDCL could do away with separate parent and grandparent
   2599 		 * variables.  They are vestiges of the days before parent
   2600 		 * pointers.  However, they make the code a little clearer.
   2601 		 */
   2602 
   2603 		parent = PARENT(node);
   2604 		grandparent = PARENT(parent);
   2605 
   2606 		if (parent == LEFT(grandparent)) {
   2607 			child = RIGHT(grandparent);
   2608 			if (child != NULL && IS_RED(child)) {
   2609 				MAKE_BLACK(parent);
   2610 				MAKE_BLACK(child);
   2611 				MAKE_RED(grandparent);
   2612 				node = grandparent;
   2613 			} else {
   2614 				if (node == RIGHT(parent)) {
   2615 					rotate_left(parent, &root);
   2616 					node = parent;
   2617 					parent = PARENT(node);
   2618 					grandparent = PARENT(parent);
   2619 				}
   2620 				MAKE_BLACK(parent);
   2621 				MAKE_RED(grandparent);
   2622 				rotate_right(grandparent, &root);
   2623 			}
   2624 		} else {
   2625 			child = LEFT(grandparent);
   2626 			if (child != NULL && IS_RED(child)) {
   2627 				MAKE_BLACK(parent);
   2628 				MAKE_BLACK(child);
   2629 				MAKE_RED(grandparent);
   2630 				node = grandparent;
   2631 			} else {
   2632 				if (node == LEFT(parent)) {
   2633 					rotate_right(parent, &root);
   2634 					node = parent;
   2635 					parent = PARENT(node);
   2636 					grandparent = PARENT(parent);
   2637 				}
   2638 				MAKE_BLACK(parent);
   2639 				MAKE_RED(grandparent);
   2640 				rotate_left(grandparent, &root);
   2641 			}
   2642 		}
   2643 	}
   2644 
   2645 	MAKE_BLACK(root);
   2646 	ENSURE(IS_ROOT(root));
   2647 	*rootp = root;
   2648 
   2649 	return;
   2650 }
   2651 
   2652 /*
   2653  * This is the real workhorse of the deletion code, because it does the
   2654  * true red/black tree on a single level.
   2655  */
   2656 static void
   2657 deletefromlevel(dns_rbtnode_t *item, dns_rbtnode_t **rootp) {
   2658 	dns_rbtnode_t *child, *sibling, *parent;
   2659 	dns_rbtnode_t *successor;
   2660 
   2661 	REQUIRE(item != NULL);
   2662 
   2663 	/*
   2664 	 * Verify that the parent history is (apparently) correct.
   2665 	 */
   2666 	INSIST((IS_ROOT(item) && *rootp == item) ||
   2667 	       (!IS_ROOT(item) &&
   2668 		(LEFT(PARENT(item)) == item || RIGHT(PARENT(item)) == item)));
   2669 
   2670 	child = NULL;
   2671 
   2672 	if (LEFT(item) == NULL) {
   2673 		if (RIGHT(item) == NULL) {
   2674 			if (IS_ROOT(item)) {
   2675 				/*
   2676 				 * This is the only item in the tree.
   2677 				 */
   2678 				*rootp = NULL;
   2679 				return;
   2680 			}
   2681 		} else {
   2682 			/*
   2683 			 * This node has one child, on the right.
   2684 			 */
   2685 			child = RIGHT(item);
   2686 		}
   2687 	} else if (RIGHT(item) == NULL) {
   2688 		/*
   2689 		 * This node has one child, on the left.
   2690 		 */
   2691 		child = LEFT(item);
   2692 	} else {
   2693 		dns_rbtnode_t *saved_parent, *saved_right;
   2694 		int saved_color;
   2695 
   2696 		/*
   2697 		 * This node has two children, so it cannot be directly
   2698 		 * deleted.  Find its immediate in-order successor and
   2699 		 * move it to this location, then do the deletion at the
   2700 		 * old site of the successor.
   2701 		 */
   2702 		successor = RIGHT(item);
   2703 		while (LEFT(successor) != NULL) {
   2704 			successor = LEFT(successor);
   2705 		}
   2706 
   2707 		/*
   2708 		 * The successor cannot possibly have a left child;
   2709 		 * if there is any child, it is on the right.
   2710 		 */
   2711 		if (RIGHT(successor) != NULL) {
   2712 			child = RIGHT(successor);
   2713 		}
   2714 
   2715 		/*
   2716 		 * Swap the two nodes; it would be simpler to just replace
   2717 		 * the value being deleted with that of the successor,
   2718 		 * but this rigamarole is done so the caller has complete
   2719 		 * control over the pointers (and memory allocation) of
   2720 		 * all of nodes.  If just the key value were removed from
   2721 		 * the tree, the pointer to the node would be unchanged.
   2722 		 */
   2723 
   2724 		/*
   2725 		 * First, put the successor in the tree location of the
   2726 		 * node to be deleted.  Save its existing tree pointer
   2727 		 * information, which will be needed when linking up
   2728 		 * delete to the successor's old location.
   2729 		 */
   2730 		saved_parent = PARENT(successor);
   2731 		saved_right = RIGHT(successor);
   2732 		saved_color = COLOR(successor);
   2733 
   2734 		if (IS_ROOT(item)) {
   2735 			*rootp = successor;
   2736 			successor->is_root = true;
   2737 			item->is_root = false;
   2738 		} else if (LEFT(PARENT(item)) == item) {
   2739 			LEFT(PARENT(item)) = successor;
   2740 		} else {
   2741 			RIGHT(PARENT(item)) = successor;
   2742 		}
   2743 
   2744 		PARENT(successor) = PARENT(item);
   2745 		LEFT(successor) = LEFT(item);
   2746 		RIGHT(successor) = RIGHT(item);
   2747 		COLOR(successor) = COLOR(item);
   2748 
   2749 		if (LEFT(successor) != NULL) {
   2750 			PARENT(LEFT(successor)) = successor;
   2751 		}
   2752 		if (RIGHT(successor) != successor) {
   2753 			PARENT(RIGHT(successor)) = successor;
   2754 		}
   2755 
   2756 		/*
   2757 		 * Now relink the node to be deleted into the
   2758 		 * successor's previous tree location.
   2759 		 */
   2760 		INSIST(!IS_ROOT(item));
   2761 
   2762 		if (saved_parent == item) {
   2763 			/*
   2764 			 * Node being deleted was successor's parent.
   2765 			 */
   2766 			RIGHT(successor) = item;
   2767 			PARENT(item) = successor;
   2768 		} else {
   2769 			LEFT(saved_parent) = item;
   2770 			PARENT(item) = saved_parent;
   2771 		}
   2772 
   2773 		/*
   2774 		 * Original location of successor node has no left.
   2775 		 */
   2776 		LEFT(item) = NULL;
   2777 		RIGHT(item) = saved_right;
   2778 		COLOR(item) = saved_color;
   2779 	}
   2780 
   2781 	/*
   2782 	 * Remove the node by removing the links from its parent.
   2783 	 */
   2784 	if (!IS_ROOT(item)) {
   2785 		if (LEFT(PARENT(item)) == item) {
   2786 			LEFT(PARENT(item)) = child;
   2787 		} else {
   2788 			RIGHT(PARENT(item)) = child;
   2789 		}
   2790 
   2791 		if (child != NULL) {
   2792 			PARENT(child) = PARENT(item);
   2793 		}
   2794 	} else {
   2795 		/*
   2796 		 * This is the root being deleted, and at this point
   2797 		 * it is known to have just one child.
   2798 		 */
   2799 		*rootp = child;
   2800 		child->is_root = 1;
   2801 		PARENT(child) = PARENT(item);
   2802 	}
   2803 
   2804 	/*
   2805 	 * Fix color violations.
   2806 	 */
   2807 	if (IS_BLACK(item)) {
   2808 		/* cppcheck-suppress nullPointerRedundantCheck symbolName=item
   2809 		 */
   2810 		parent = PARENT(item);
   2811 
   2812 		while (child != *rootp && IS_BLACK(child)) {
   2813 			INSIST(child == NULL || !IS_ROOT(child));
   2814 
   2815 			if (LEFT(parent) == child) {
   2816 				sibling = RIGHT(parent);
   2817 
   2818 				if (IS_RED(sibling)) {
   2819 					MAKE_BLACK(sibling);
   2820 					MAKE_RED(parent);
   2821 					rotate_left(parent, rootp);
   2822 					sibling = RIGHT(parent);
   2823 				}
   2824 
   2825 				INSIST(sibling != NULL);
   2826 
   2827 				/* cppcheck-suppress nullPointerRedundantCheck
   2828 				 * symbolName=sibling */
   2829 				if (IS_BLACK(LEFT(sibling)) &&
   2830 				    IS_BLACK(RIGHT(sibling)))
   2831 				{
   2832 					MAKE_RED(sibling);
   2833 					child = parent;
   2834 				} else {
   2835 					if (IS_BLACK(RIGHT(sibling))) {
   2836 						MAKE_BLACK(LEFT(sibling));
   2837 						MAKE_RED(sibling);
   2838 						rotate_right(sibling, rootp);
   2839 						sibling = RIGHT(parent);
   2840 					}
   2841 
   2842 					COLOR(sibling) = COLOR(parent);
   2843 					MAKE_BLACK(parent);
   2844 					INSIST(RIGHT(sibling) != NULL);
   2845 					MAKE_BLACK(RIGHT(sibling));
   2846 					rotate_left(parent, rootp);
   2847 					child = *rootp;
   2848 				}
   2849 			} else {
   2850 				/*
   2851 				 * Child is parent's right child.
   2852 				 * Everything is done the same as above,
   2853 				 * except mirrored.
   2854 				 */
   2855 				sibling = LEFT(parent);
   2856 
   2857 				if (IS_RED(sibling)) {
   2858 					MAKE_BLACK(sibling);
   2859 					MAKE_RED(parent);
   2860 					rotate_right(parent, rootp);
   2861 					sibling = LEFT(parent);
   2862 				}
   2863 
   2864 				INSIST(sibling != NULL);
   2865 
   2866 				/* cppcheck-suppress nullPointerRedundantCheck
   2867 				 * symbolName=sibling */
   2868 				if (IS_BLACK(LEFT(sibling)) &&
   2869 				    IS_BLACK(RIGHT(sibling)))
   2870 				{
   2871 					MAKE_RED(sibling);
   2872 					child = parent;
   2873 				} else {
   2874 					if (IS_BLACK(LEFT(sibling))) {
   2875 						MAKE_BLACK(RIGHT(sibling));
   2876 						MAKE_RED(sibling);
   2877 						rotate_left(sibling, rootp);
   2878 						sibling = LEFT(parent);
   2879 					}
   2880 
   2881 					COLOR(sibling) = COLOR(parent);
   2882 					MAKE_BLACK(parent);
   2883 					INSIST(LEFT(sibling) != NULL);
   2884 					MAKE_BLACK(LEFT(sibling));
   2885 					rotate_right(parent, rootp);
   2886 					child = *rootp;
   2887 				}
   2888 			}
   2889 
   2890 			parent = PARENT(child);
   2891 		}
   2892 
   2893 		if (IS_RED(child)) {
   2894 			MAKE_BLACK(child);
   2895 		}
   2896 	}
   2897 }
   2898 
   2899 static void
   2900 freenode(dns_rbt_t *rbt, dns_rbtnode_t **nodep) {
   2901 	dns_rbtnode_t *node = *nodep;
   2902 	*nodep = NULL;
   2903 
   2904 	if (node->is_mmapped == 0) {
   2905 		isc_mem_put(rbt->mctx, node, NODE_SIZE(node));
   2906 	}
   2907 
   2908 	rbt->nodecount--;
   2909 }
   2910 
   2911 static void
   2912 deletetreeflat(dns_rbt_t *rbt, unsigned int quantum, bool unhash,
   2913 	       dns_rbtnode_t **nodep) {
   2914 	dns_rbtnode_t *root = *nodep;
   2915 
   2916 	while (root != NULL) {
   2917 		/*
   2918 		 * If there is a left, right or down node, walk into it
   2919 		 * and iterate.
   2920 		 */
   2921 		if (LEFT(root) != NULL) {
   2922 			dns_rbtnode_t *node = root;
   2923 			root = LEFT(root);
   2924 			LEFT(node) = NULL;
   2925 		} else if (RIGHT(root) != NULL) {
   2926 			dns_rbtnode_t *node = root;
   2927 			root = RIGHT(root);
   2928 			RIGHT(node) = NULL;
   2929 		} else if (DOWN(root) != NULL) {
   2930 			dns_rbtnode_t *node = root;
   2931 			root = DOWN(root);
   2932 			DOWN(node) = NULL;
   2933 		} else {
   2934 			/*
   2935 			 * There are no left, right or down nodes, so we
   2936 			 * can free this one and go back to its parent.
   2937 			 */
   2938 			dns_rbtnode_t *node = root;
   2939 			root = PARENT(root);
   2940 
   2941 			if (rbt->data_deleter != NULL && DATA(node) != NULL) {
   2942 				rbt->data_deleter(DATA(node), rbt->deleter_arg);
   2943 			}
   2944 			if (unhash) {
   2945 				unhash_node(rbt, node);
   2946 			}
   2947 			/*
   2948 			 * Note: we don't call unhash_node() here as we
   2949 			 * are destroying the complete RBT tree.
   2950 			 */
   2951 #if DNS_RBT_USEMAGIC
   2952 			node->magic = 0;
   2953 #endif /* if DNS_RBT_USEMAGIC */
   2954 			freenode(rbt, &node);
   2955 			if (quantum != 0 && --quantum == 0) {
   2956 				break;
   2957 			}
   2958 		}
   2959 	}
   2960 
   2961 	*nodep = root;
   2962 }
   2963 
   2964 static size_t
   2965 getheight_helper(dns_rbtnode_t *node) {
   2966 	size_t dl, dr;
   2967 	size_t this_height, down_height;
   2968 
   2969 	if (node == NULL) {
   2970 		return (0);
   2971 	}
   2972 
   2973 	dl = getheight_helper(LEFT(node));
   2974 	dr = getheight_helper(RIGHT(node));
   2975 
   2976 	this_height = ISC_MAX(dl + 1, dr + 1);
   2977 	down_height = getheight_helper(DOWN(node));
   2978 
   2979 	return (ISC_MAX(this_height, down_height));
   2980 }
   2981 
   2982 size_t
   2983 dns__rbt_getheight(dns_rbt_t *rbt) {
   2984 	return (getheight_helper(rbt->root));
   2985 }
   2986 
   2987 static bool
   2988 check_properties_helper(dns_rbtnode_t *node) {
   2989 	if (node == NULL) {
   2990 		return (true);
   2991 	}
   2992 
   2993 	if (IS_RED(node)) {
   2994 		/* Root nodes must be BLACK. */
   2995 		if (IS_ROOT(node)) {
   2996 			return (false);
   2997 		}
   2998 
   2999 		/* Both children of RED nodes must be BLACK. */
   3000 		if (IS_RED(LEFT(node)) || IS_RED(RIGHT(node))) {
   3001 			return (false);
   3002 		}
   3003 	}
   3004 
   3005 	/* cppcheck-suppress nullPointerRedundantCheck symbolName=node */
   3006 	if ((DOWN(node) != NULL) && (!IS_ROOT(DOWN(node)))) {
   3007 		return (false);
   3008 	}
   3009 
   3010 	if (IS_ROOT(node)) {
   3011 		if ((PARENT(node) != NULL) && (DOWN(PARENT(node)) != node)) {
   3012 			return (false);
   3013 		}
   3014 
   3015 		if (get_upper_node(node) != PARENT(node)) {
   3016 			return (false);
   3017 		}
   3018 	}
   3019 
   3020 	/* If node is assigned to the down_ pointer of its parent, it is
   3021 	 * a subtree root and must have the flag set.
   3022 	 */
   3023 	if (((!PARENT(node)) || (DOWN(PARENT(node)) == node)) &&
   3024 	    (!IS_ROOT(node)))
   3025 	{
   3026 		return (false);
   3027 	}
   3028 
   3029 	/* Repeat tests with this node's children. */
   3030 	return (check_properties_helper(LEFT(node)) &&
   3031 		check_properties_helper(RIGHT(node)) &&
   3032 		check_properties_helper(DOWN(node)));
   3033 }
   3034 
   3035 static bool
   3036 check_black_distance_helper(dns_rbtnode_t *node, size_t *distance) {
   3037 	size_t dl, dr, dd;
   3038 
   3039 	if (node == NULL) {
   3040 		*distance = 1;
   3041 		return (true);
   3042 	}
   3043 
   3044 	/* cppcheck-suppress nullPointerRedundantCheck symbolName=node */
   3045 	if (!check_black_distance_helper(LEFT(node), &dl)) {
   3046 		return (false);
   3047 	}
   3048 
   3049 	/* cppcheck-suppress nullPointerRedundantCheck symbolName=node */
   3050 	if (!check_black_distance_helper(RIGHT(node), &dr)) {
   3051 		return (false);
   3052 	}
   3053 
   3054 	/* cppcheck-suppress nullPointerRedundantCheck symbolName=node */
   3055 	if (!check_black_distance_helper(DOWN(node), &dd)) {
   3056 		return (false);
   3057 	}
   3058 
   3059 	/* Left and right side black node counts must match. */
   3060 	if (dl != dr) {
   3061 		return (false);
   3062 	}
   3063 
   3064 	if (IS_BLACK(node)) {
   3065 		dl++;
   3066 	}
   3067 
   3068 	*distance = dl;
   3069 
   3070 	return (true);
   3071 }
   3072 
   3073 bool
   3074 dns__rbt_checkproperties(dns_rbt_t *rbt) {
   3075 	size_t dd;
   3076 
   3077 	if (!check_properties_helper(rbt->root)) {
   3078 		return (false);
   3079 	}
   3080 
   3081 	/* Path from a given node to all its leaves must contain the
   3082 	 * same number of BLACK child nodes. This is done separately
   3083 	 * here instead of inside check_properties_helper() as
   3084 	 * it would take (n log n) complexity otherwise.
   3085 	 */
   3086 	return (check_black_distance_helper(rbt->root, &dd));
   3087 }
   3088 
   3089 static void
   3090 dns_rbt_indent(FILE *f, int depth) {
   3091 	int i;
   3092 
   3093 	fprintf(f, "%4d ", depth);
   3094 
   3095 	for (i = 0; i < depth; i++) {
   3096 		fprintf(f, "- ");
   3097 	}
   3098 }
   3099 
   3100 void
   3101 dns_rbt_printnodeinfo(dns_rbtnode_t *n, FILE *f) {
   3102 	if (n == NULL) {
   3103 		fprintf(f, "Null node\n");
   3104 		return;
   3105 	}
   3106 
   3107 	fprintf(f, "Node info for nodename: ");
   3108 	printnodename(n, true, f);
   3109 	fprintf(f, "\n");
   3110 
   3111 	fprintf(f, "n = %p\n", n);
   3112 
   3113 	fprintf(f, "Relative pointers: %s%s%s%s%s\n",
   3114 		(n->parent_is_relative == 1 ? " P" : ""),
   3115 		(n->right_is_relative == 1 ? " R" : ""),
   3116 		(n->left_is_relative == 1 ? " L" : ""),
   3117 		(n->down_is_relative == 1 ? " D" : ""),
   3118 		(n->data_is_relative == 1 ? " T" : ""));
   3119 
   3120 	fprintf(f, "node lock address = %u\n", n->locknum);
   3121 
   3122 	fprintf(f, "Parent: %p\n", n->parent);
   3123 	fprintf(f, "Right: %p\n", n->right);
   3124 	fprintf(f, "Left: %p\n", n->left);
   3125 	fprintf(f, "Down: %p\n", n->down);
   3126 	fprintf(f, "Data: %p\n", n->data);
   3127 }
   3128 
   3129 static void
   3130 printnodename(dns_rbtnode_t *node, bool quoted, FILE *f) {
   3131 	isc_region_t r;
   3132 	dns_name_t name;
   3133 	char buffer[DNS_NAME_FORMATSIZE];
   3134 	dns_offsets_t offsets;
   3135 
   3136 	r.length = NAMELEN(node);
   3137 	r.base = NAME(node);
   3138 
   3139 	dns_name_init(&name, offsets);
   3140 	dns_name_fromregion(&name, &r);
   3141 
   3142 	dns_name_format(&name, buffer, sizeof(buffer));
   3143 
   3144 	if (quoted) {
   3145 		fprintf(f, "\"%s\"", buffer);
   3146 	} else {
   3147 		fprintf(f, "%s", buffer);
   3148 	}
   3149 }
   3150 
   3151 static void
   3152 print_text_helper(dns_rbtnode_t *root, dns_rbtnode_t *parent, int depth,
   3153 		  const char *direction, void (*data_printer)(FILE *, void *),
   3154 		  FILE *f) {
   3155 	dns_rbt_indent(f, depth);
   3156 
   3157 	if (root != NULL) {
   3158 		printnodename(root, true, f);
   3159 		/*
   3160 		 * Don't use IS_RED(root) as it tests for 'root != NULL'
   3161 		 * and cppcheck produces false positives.
   3162 		 */
   3163 		fprintf(f, " (%s, %s", direction,
   3164 			COLOR(root) == RED ? "RED" : "BLACK");
   3165 
   3166 		if ((!IS_ROOT(root) && PARENT(root) != parent) ||
   3167 		    (IS_ROOT(root) && depth > 0 && DOWN(PARENT(root)) != root))
   3168 		{
   3169 			fprintf(f, " (BAD parent pointer! -> ");
   3170 			if (PARENT(root) != NULL) {
   3171 				printnodename(PARENT(root), true, f);
   3172 			} else {
   3173 				fprintf(f, "NULL");
   3174 			}
   3175 			fprintf(f, ")");
   3176 		}
   3177 
   3178 		fprintf(f, ")");
   3179 
   3180 		if (root->data != NULL && data_printer != NULL) {
   3181 			fprintf(f, " data@%p: ", root->data);
   3182 			data_printer(f, root->data);
   3183 		}
   3184 		fprintf(f, "\n");
   3185 
   3186 		depth++;
   3187 
   3188 		/*
   3189 		 * Don't use IS_RED(root) as it tests for 'root != NULL'
   3190 		 * and cppcheck produces false positives.
   3191 		 */
   3192 		if (COLOR(root) == RED && IS_RED(LEFT(root))) {
   3193 			fprintf(f, "** Red/Red color violation on left\n");
   3194 		}
   3195 		print_text_helper(LEFT(root), root, depth, "left", data_printer,
   3196 				  f);
   3197 
   3198 		/*
   3199 		 * Don't use IS_RED(root) as cppcheck produces false positives.
   3200 		 */
   3201 		if (COLOR(root) == RED && IS_RED(RIGHT(root))) {
   3202 			fprintf(f, "** Red/Red color violation on right\n");
   3203 		}
   3204 		print_text_helper(RIGHT(root), root, depth, "right",
   3205 				  data_printer, f);
   3206 
   3207 		print_text_helper(DOWN(root), NULL, depth, "down", data_printer,
   3208 				  f);
   3209 	} else {
   3210 		fprintf(f, "NULL (%s)\n", direction);
   3211 	}
   3212 }
   3213 
   3214 void
   3215 dns_rbt_printtext(dns_rbt_t *rbt, void (*data_printer)(FILE *, void *),
   3216 		  FILE *f) {
   3217 	REQUIRE(VALID_RBT(rbt));
   3218 
   3219 	print_text_helper(rbt->root, NULL, 0, "root", data_printer, f);
   3220 }
   3221 
   3222 static int
   3223 print_dot_helper(dns_rbtnode_t *node, unsigned int *nodecount,
   3224 		 bool show_pointers, FILE *f) {
   3225 	unsigned int l, r, d;
   3226 
   3227 	if (node == NULL) {
   3228 		return (0);
   3229 	}
   3230 
   3231 	l = print_dot_helper(LEFT(node), nodecount, show_pointers, f);
   3232 	r = print_dot_helper(RIGHT(node), nodecount, show_pointers, f);
   3233 	d = print_dot_helper(DOWN(node), nodecount, show_pointers, f);
   3234 
   3235 	*nodecount += 1;
   3236 
   3237 	fprintf(f, "node%u[label = \"<f0> |<f1> ", *nodecount);
   3238 	printnodename(node, false, f);
   3239 	fprintf(f, "|<f2>");
   3240 
   3241 	if (show_pointers) {
   3242 		fprintf(f, "|<f3> n=%p|<f4> p=%p", node, PARENT(node));
   3243 	}
   3244 
   3245 	fprintf(f, "\"] [");
   3246 
   3247 	if (IS_RED(node)) {
   3248 		fprintf(f, "color=red");
   3249 	} else {
   3250 		fprintf(f, "color=black");
   3251 	}
   3252 
   3253 	/* XXXMUKS: verify that IS_ROOT() indicates subtree root and not
   3254 	 * forest root.
   3255 	 */
   3256 	if (IS_ROOT(node)) {
   3257 		fprintf(f, ",penwidth=3");
   3258 	}
   3259 
   3260 	if (IS_EMPTY(node)) {
   3261 		fprintf(f, ",style=filled,fillcolor=lightgrey");
   3262 	}
   3263 
   3264 	fprintf(f, "];\n");
   3265 
   3266 	if (LEFT(node) != NULL) {
   3267 		fprintf(f, "\"node%u\":f0 -> \"node%u\":f1;\n", *nodecount, l);
   3268 	}
   3269 
   3270 	if (DOWN(node) != NULL) {
   3271 		fprintf(f, "\"node%u\":f1 -> \"node%u\":f1 [penwidth=5];\n",
   3272 			*nodecount, d);
   3273 	}
   3274 
   3275 	if (RIGHT(node) != NULL) {
   3276 		fprintf(f, "\"node%u\":f2 -> \"node%u\":f1;\n", *nodecount, r);
   3277 	}
   3278 
   3279 	return (*nodecount);
   3280 }
   3281 
   3282 void
   3283 dns_rbt_printdot(dns_rbt_t *rbt, bool show_pointers, FILE *f) {
   3284 	unsigned int nodecount = 0;
   3285 
   3286 	REQUIRE(VALID_RBT(rbt));
   3287 
   3288 	fprintf(f, "digraph g {\n");
   3289 	fprintf(f, "node [shape = record,height=.1];\n");
   3290 	print_dot_helper(rbt->root, &nodecount, show_pointers, f);
   3291 	fprintf(f, "}\n");
   3292 }
   3293 
   3294 /*
   3295  * Chain Functions
   3296  */
   3297 
   3298 void
   3299 dns_rbtnodechain_init(dns_rbtnodechain_t *chain) {
   3300 	REQUIRE(chain != NULL);
   3301 
   3302 	/*
   3303 	 * Initialize 'chain'.
   3304 	 */
   3305 	chain->end = NULL;
   3306 	chain->level_count = 0;
   3307 	chain->level_matches = 0;
   3308 	memset(chain->levels, 0, sizeof(chain->levels));
   3309 
   3310 	chain->magic = CHAIN_MAGIC;
   3311 }
   3312 
   3313 isc_result_t
   3314 dns_rbtnodechain_current(dns_rbtnodechain_t *chain, dns_name_t *name,
   3315 			 dns_name_t *origin, dns_rbtnode_t **node) {
   3316 	isc_result_t result = ISC_R_SUCCESS;
   3317 
   3318 	REQUIRE(VALID_CHAIN(chain));
   3319 
   3320 	if (node != NULL) {
   3321 		*node = chain->end;
   3322 	}
   3323 
   3324 	if (chain->end == NULL) {
   3325 		return (ISC_R_NOTFOUND);
   3326 	}
   3327 
   3328 	if (name != NULL) {
   3329 		NODENAME(chain->end, name);
   3330 
   3331 		if (chain->level_count == 0) {
   3332 			/*
   3333 			 * Names in the top level tree are all absolute.
   3334 			 * Always make 'name' relative.
   3335 			 */
   3336 			INSIST(dns_name_isabsolute(name));
   3337 
   3338 			/*
   3339 			 * This is cheaper than dns_name_getlabelsequence().
   3340 			 */
   3341 			name->labels--;
   3342 			name->length--;
   3343 			name->attributes &= ~DNS_NAMEATTR_ABSOLUTE;
   3344 		}
   3345 	}
   3346 
   3347 	if (origin != NULL) {
   3348 		if (chain->level_count > 0) {
   3349 			result = chain_name(chain, origin, false);
   3350 		} else {
   3351 			dns_name_copynf(dns_rootname, origin);
   3352 		}
   3353 	}
   3354 
   3355 	return (result);
   3356 }
   3357 
   3358 isc_result_t
   3359 dns_rbtnodechain_prev(dns_rbtnodechain_t *chain, dns_name_t *name,
   3360 		      dns_name_t *origin) {
   3361 	dns_rbtnode_t *current, *previous, *predecessor;
   3362 	isc_result_t result = ISC_R_SUCCESS;
   3363 	bool new_origin = false;
   3364 
   3365 	REQUIRE(VALID_CHAIN(chain) && chain->end != NULL);
   3366 
   3367 	predecessor = NULL;
   3368 
   3369 	current = chain->end;
   3370 
   3371 	if (LEFT(current) != NULL) {
   3372 		/*
   3373 		 * Moving left one then right as far as possible is the
   3374 		 * previous node, at least for this level.
   3375 		 */
   3376 		current = LEFT(current);
   3377 
   3378 		while (RIGHT(current) != NULL) {
   3379 			current = RIGHT(current);
   3380 		}
   3381 
   3382 		predecessor = current;
   3383 	} else {
   3384 		/*
   3385 		 * No left links, so move toward the root.  If at any point on
   3386 		 * the way there the link from parent to child is a right
   3387 		 * link, then the parent is the previous node, at least
   3388 		 * for this level.
   3389 		 */
   3390 		while (!IS_ROOT(current)) {
   3391 			previous = current;
   3392 			current = PARENT(current);
   3393 
   3394 			if (RIGHT(current) == previous) {
   3395 				predecessor = current;
   3396 				break;
   3397 			}
   3398 		}
   3399 	}
   3400 
   3401 	if (predecessor != NULL) {
   3402 		/*
   3403 		 * Found a predecessor node in this level.  It might not
   3404 		 * really be the predecessor, however.
   3405 		 */
   3406 		if (DOWN(predecessor) != NULL) {
   3407 			/*
   3408 			 * The predecessor is really down at least one level.
   3409 			 * Go down and as far right as possible, and repeat
   3410 			 * as long as the rightmost node has a down pointer.
   3411 			 */
   3412 			do {
   3413 				/*
   3414 				 * XXX DCL Need to do something about origins
   3415 				 * here. See whether to go down, and if so
   3416 				 * whether it is truly what Bob calls a
   3417 				 * new origin.
   3418 				 */
   3419 				ADD_LEVEL(chain, predecessor);
   3420 				predecessor = DOWN(predecessor);
   3421 
   3422 				/* XXX DCL duplicated from above; clever
   3423 				 * way to unduplicate? */
   3424 
   3425 				while (RIGHT(predecessor) != NULL) {
   3426 					predecessor = RIGHT(predecessor);
   3427 				}
   3428 			} while (DOWN(predecessor) != NULL);
   3429 
   3430 			/* XXX DCL probably needs work on the concept */
   3431 			if (origin != NULL) {
   3432 				new_origin = true;
   3433 			}
   3434 		}
   3435 	} else if (chain->level_count > 0) {
   3436 		/*
   3437 		 * Dang, didn't find a predecessor in this level.
   3438 		 * Got to the root of this level without having traversed
   3439 		 * any right links.  Ascend the tree one level; the
   3440 		 * node that points to this tree is the predecessor.
   3441 		 */
   3442 		INSIST(chain->level_count > 0 && IS_ROOT(current));
   3443 		predecessor = chain->levels[--chain->level_count];
   3444 
   3445 		/* XXX DCL probably needs work on the concept */
   3446 		/*
   3447 		 * Don't declare an origin change when the new origin is "."
   3448 		 * at the top level tree, because "." is declared as the origin
   3449 		 * for the second level tree.
   3450 		 */
   3451 		if (origin != NULL &&
   3452 		    (chain->level_count > 0 || OFFSETLEN(predecessor) > 1))
   3453 		{
   3454 			new_origin = true;
   3455 		}
   3456 	}
   3457 
   3458 	if (predecessor != NULL) {
   3459 		chain->end = predecessor;
   3460 
   3461 		if (new_origin) {
   3462 			result = dns_rbtnodechain_current(chain, name, origin,
   3463 							  NULL);
   3464 			if (result == ISC_R_SUCCESS) {
   3465 				result = DNS_R_NEWORIGIN;
   3466 			}
   3467 		} else {
   3468 			result = dns_rbtnodechain_current(chain, name, NULL,
   3469 							  NULL);
   3470 		}
   3471 	} else {
   3472 		result = ISC_R_NOMORE;
   3473 	}
   3474 
   3475 	return (result);
   3476 }
   3477 
   3478 isc_result_t
   3479 dns_rbtnodechain_down(dns_rbtnodechain_t *chain, dns_name_t *name,
   3480 		      dns_name_t *origin) {
   3481 	dns_rbtnode_t *current, *successor;
   3482 	isc_result_t result = ISC_R_SUCCESS;
   3483 	bool new_origin = false;
   3484 
   3485 	REQUIRE(VALID_CHAIN(chain) && chain->end != NULL);
   3486 
   3487 	successor = NULL;
   3488 
   3489 	current = chain->end;
   3490 
   3491 	if (DOWN(current) != NULL) {
   3492 		/*
   3493 		 * Don't declare an origin change when the new origin is "."
   3494 		 * at the second level tree, because "." is already declared
   3495 		 * as the origin for the top level tree.
   3496 		 */
   3497 		if (chain->level_count > 0 || OFFSETLEN(current) > 1) {
   3498 			new_origin = true;
   3499 		}
   3500 
   3501 		ADD_LEVEL(chain, current);
   3502 		current = DOWN(current);
   3503 
   3504 		while (LEFT(current) != NULL) {
   3505 			current = LEFT(current);
   3506 		}
   3507 
   3508 		successor = current;
   3509 	}
   3510 
   3511 	if (successor != NULL) {
   3512 		chain->end = successor;
   3513 
   3514 		/*
   3515 		 * It is not necessary to use dns_rbtnodechain_current like
   3516 		 * the other functions because this function will never
   3517 		 * find a node in the topmost level.  This is because the
   3518 		 * root level will never be more than one name, and everything
   3519 		 * in the megatree is a successor to that node, down at
   3520 		 * the second level or below.
   3521 		 */
   3522 
   3523 		if (name != NULL) {
   3524 			NODENAME(chain->end, name);
   3525 		}
   3526 
   3527 		if (new_origin) {
   3528 			if (origin != NULL) {
   3529 				result = chain_name(chain, origin, false);
   3530 			}
   3531 
   3532 			if (result == ISC_R_SUCCESS) {
   3533 				result = DNS_R_NEWORIGIN;
   3534 			}
   3535 		} else {
   3536 			result = ISC_R_SUCCESS;
   3537 		}
   3538 	} else {
   3539 		result = ISC_R_NOMORE;
   3540 	}
   3541 
   3542 	return (result);
   3543 }
   3544 
   3545 isc_result_t
   3546 dns_rbtnodechain_nextflat(dns_rbtnodechain_t *chain, dns_name_t *name) {
   3547 	dns_rbtnode_t *current, *previous, *successor;
   3548 	isc_result_t result = ISC_R_SUCCESS;
   3549 
   3550 	REQUIRE(VALID_CHAIN(chain) && chain->end != NULL);
   3551 
   3552 	successor = NULL;
   3553 
   3554 	current = chain->end;
   3555 
   3556 	if (RIGHT(current) == NULL) {
   3557 		while (!IS_ROOT(current)) {
   3558 			previous = current;
   3559 			current = PARENT(current);
   3560 
   3561 			if (LEFT(current) == previous) {
   3562 				successor = current;
   3563 				break;
   3564 			}
   3565 		}
   3566 	} else {
   3567 		current = RIGHT(current);
   3568 
   3569 		while (LEFT(current) != NULL) {
   3570 			current = LEFT(current);
   3571 		}
   3572 
   3573 		successor = current;
   3574 	}
   3575 
   3576 	if (successor != NULL) {
   3577 		chain->end = successor;
   3578 
   3579 		if (name != NULL) {
   3580 			NODENAME(chain->end, name);
   3581 		}
   3582 
   3583 		result = ISC_R_SUCCESS;
   3584 	} else {
   3585 		result = ISC_R_NOMORE;
   3586 	}
   3587 
   3588 	return (result);
   3589 }
   3590 
   3591 isc_result_t
   3592 dns_rbtnodechain_next(dns_rbtnodechain_t *chain, dns_name_t *name,
   3593 		      dns_name_t *origin) {
   3594 	dns_rbtnode_t *current, *previous, *successor;
   3595 	isc_result_t result = ISC_R_SUCCESS;
   3596 	bool new_origin = false;
   3597 
   3598 	REQUIRE(VALID_CHAIN(chain) && chain->end != NULL);
   3599 
   3600 	successor = NULL;
   3601 
   3602 	current = chain->end;
   3603 
   3604 	/*
   3605 	 * If there is a level below this node, the next node is the leftmost
   3606 	 * node of the next level.
   3607 	 */
   3608 	if (DOWN(current) != NULL) {
   3609 		/*
   3610 		 * Don't declare an origin change when the new origin is "."
   3611 		 * at the second level tree, because "." is already declared
   3612 		 * as the origin for the top level tree.
   3613 		 */
   3614 		if (chain->level_count > 0 || OFFSETLEN(current) > 1) {
   3615 			new_origin = true;
   3616 		}
   3617 
   3618 		ADD_LEVEL(chain, current);
   3619 		current = DOWN(current);
   3620 
   3621 		while (LEFT(current) != NULL) {
   3622 			current = LEFT(current);
   3623 		}
   3624 
   3625 		successor = current;
   3626 	} else if (RIGHT(current) == NULL) {
   3627 		/*
   3628 		 * The successor is up, either in this level or a previous one.
   3629 		 * Head back toward the root of the tree, looking for any path
   3630 		 * that was via a left link; the successor is the node that has
   3631 		 * that left link.  In the event the root of the level is
   3632 		 * reached without having traversed any left links, ascend one
   3633 		 * level and look for either a right link off the point of
   3634 		 * ascent, or search for a left link upward again, repeating
   3635 		 * ascends until either case is true.
   3636 		 */
   3637 		do {
   3638 			while (!IS_ROOT(current)) {
   3639 				previous = current;
   3640 				current = PARENT(current);
   3641 
   3642 				if (LEFT(current) == previous) {
   3643 					successor = current;
   3644 					break;
   3645 				}
   3646 			}
   3647 
   3648 			if (successor == NULL) {
   3649 				/*
   3650 				 * Reached the root without having traversed
   3651 				 * any left pointers, so this level is done.
   3652 				 */
   3653 				if (chain->level_count == 0) {
   3654 					/*
   3655 					 * If the tree we are iterating over
   3656 					 * was modified since this chain was
   3657 					 * initialized in a way that caused
   3658 					 * node splits to occur, "current" may
   3659 					 * now be pointing to a root node which
   3660 					 * appears to be at level 0, but still
   3661 					 * has a parent.  If that happens,
   3662 					 * abort.  Otherwise, we are done
   3663 					 * looking for a successor as we really
   3664 					 * reached the root node on level 0.
   3665 					 */
   3666 					INSIST(PARENT(current) == NULL);
   3667 					break;
   3668 				}
   3669 
   3670 				current = chain->levels[--chain->level_count];
   3671 				new_origin = true;
   3672 
   3673 				if (RIGHT(current) != NULL) {
   3674 					break;
   3675 				}
   3676 			}
   3677 		} while (successor == NULL);
   3678 	}
   3679 
   3680 	if (successor == NULL && RIGHT(current) != NULL) {
   3681 		current = RIGHT(current);
   3682 
   3683 		while (LEFT(current) != NULL) {
   3684 			current = LEFT(current);
   3685 		}
   3686 
   3687 		successor = current;
   3688 	}
   3689 
   3690 	if (successor != NULL) {
   3691 		/*
   3692 		 * If we determine that the current node is the successor to
   3693 		 * itself, we will run into an infinite loop, so abort instead.
   3694 		 */
   3695 		INSIST(chain->end != successor);
   3696 
   3697 		chain->end = successor;
   3698 
   3699 		/*
   3700 		 * It is not necessary to use dns_rbtnodechain_current like
   3701 		 * the other functions because this function will never
   3702 		 * find a node in the topmost level.  This is because the
   3703 		 * root level will never be more than one name, and everything
   3704 		 * in the megatree is a successor to that node, down at
   3705 		 * the second level or below.
   3706 		 */
   3707 
   3708 		if (name != NULL) {
   3709 			NODENAME(chain->end, name);
   3710 		}
   3711 
   3712 		if (new_origin) {
   3713 			if (origin != NULL) {
   3714 				result = chain_name(chain, origin, false);
   3715 			}
   3716 
   3717 			if (result == ISC_R_SUCCESS) {
   3718 				result = DNS_R_NEWORIGIN;
   3719 			}
   3720 		} else {
   3721 			result = ISC_R_SUCCESS;
   3722 		}
   3723 	} else {
   3724 		result = ISC_R_NOMORE;
   3725 	}
   3726 
   3727 	return (result);
   3728 }
   3729 
   3730 isc_result_t
   3731 dns_rbtnodechain_first(dns_rbtnodechain_t *chain, dns_rbt_t *rbt,
   3732 		       dns_name_t *name, dns_name_t *origin)
   3733 
   3734 {
   3735 	isc_result_t result;
   3736 
   3737 	REQUIRE(VALID_RBT(rbt));
   3738 	REQUIRE(VALID_CHAIN(chain));
   3739 
   3740 	dns_rbtnodechain_reset(chain);
   3741 
   3742 	chain->end = rbt->root;
   3743 
   3744 	result = dns_rbtnodechain_current(chain, name, origin, NULL);
   3745 
   3746 	if (result == ISC_R_SUCCESS) {
   3747 		result = DNS_R_NEWORIGIN;
   3748 	}
   3749 
   3750 	return (result);
   3751 }
   3752 
   3753 isc_result_t
   3754 dns_rbtnodechain_last(dns_rbtnodechain_t *chain, dns_rbt_t *rbt,
   3755 		      dns_name_t *name, dns_name_t *origin)
   3756 
   3757 {
   3758 	isc_result_t result;
   3759 
   3760 	REQUIRE(VALID_RBT(rbt));
   3761 	REQUIRE(VALID_CHAIN(chain));
   3762 
   3763 	dns_rbtnodechain_reset(chain);
   3764 
   3765 	result = move_chain_to_last(chain, rbt->root);
   3766 	if (result != ISC_R_SUCCESS) {
   3767 		return (result);
   3768 	}
   3769 
   3770 	result = dns_rbtnodechain_current(chain, name, origin, NULL);
   3771 
   3772 	if (result == ISC_R_SUCCESS) {
   3773 		result = DNS_R_NEWORIGIN;
   3774 	}
   3775 
   3776 	return (result);
   3777 }
   3778 
   3779 void
   3780 dns_rbtnodechain_reset(dns_rbtnodechain_t *chain) {
   3781 	REQUIRE(VALID_CHAIN(chain));
   3782 
   3783 	/*
   3784 	 * Free any dynamic storage associated with 'chain', and then
   3785 	 * reinitialize 'chain'.
   3786 	 */
   3787 	chain->end = NULL;
   3788 	chain->level_count = 0;
   3789 	chain->level_matches = 0;
   3790 }
   3791 
   3792 void
   3793 dns_rbtnodechain_invalidate(dns_rbtnodechain_t *chain) {
   3794 	/*
   3795 	 * Free any dynamic storage associated with 'chain', and then
   3796 	 * invalidate 'chain'.
   3797 	 */
   3798 
   3799 	dns_rbtnodechain_reset(chain);
   3800 
   3801 	chain->magic = 0;
   3802 }
   3803 
   3804 /* XXXMUKS:
   3805  *
   3806  * - worth removing inline as static functions are inlined automatically
   3807  *   where suitable by modern compilers.
   3808  * - bump the size of dns_rbt.nodecount to size_t.
   3809  * - the dumpfile header also contains a nodecount that is unsigned
   3810  *   int. If large files (> 2^32 nodes) are to be supported, the
   3811  *   allocation for this field should be increased.
   3812  */
   3813