Home | History | Annotate | Line # | Download | only in avl
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * Copyright (c) 2014 by Delphix. All rights reserved.
     28  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
     29  */
     30 
     31 /*
     32  * AVL - generic AVL tree implementation for kernel use
     33  *
     34  * A complete description of AVL trees can be found in many CS textbooks.
     35  *
     36  * Here is a very brief overview. An AVL tree is a binary search tree that is
     37  * almost perfectly balanced. By "almost" perfectly balanced, we mean that at
     38  * any given node, the left and right subtrees are allowed to differ in height
     39  * by at most 1 level.
     40  *
     41  * This relaxation from a perfectly balanced binary tree allows doing
     42  * insertion and deletion relatively efficiently. Searching the tree is
     43  * still a fast operation, roughly O(log(N)).
     44  *
     45  * The key to insertion and deletion is a set of tree manipulations called
     46  * rotations, which bring unbalanced subtrees back into the semi-balanced state.
     47  *
     48  * This implementation of AVL trees has the following peculiarities:
     49  *
     50  *	- The AVL specific data structures are physically embedded as fields
     51  *	  in the "using" data structures.  To maintain generality the code
     52  *	  must constantly translate between "avl_node_t *" and containing
     53  *	  data structure "void *"s by adding/subtracting the avl_offset.
     54  *
     55  *	- Since the AVL data is always embedded in other structures, there is
     56  *	  no locking or memory allocation in the AVL routines. This must be
     57  *	  provided for by the enclosing data structure's semantics. Typically,
     58  *	  avl_insert()/_add()/_remove()/avl_insert_here() require some kind of
     59  *	  exclusive write lock. Other operations require a read lock.
     60  *
     61  *      - The implementation uses iteration instead of explicit recursion,
     62  *	  since it is intended to run on limited size kernel stacks. Since
     63  *	  there is no recursion stack present to move "up" in the tree,
     64  *	  there is an explicit "parent" link in the avl_node_t.
     65  *
     66  *      - The left/right children pointers of a node are in an array.
     67  *	  In the code, variables (instead of constants) are used to represent
     68  *	  left and right indices.  The implementation is written as if it only
     69  *	  dealt with left handed manipulations.  By changing the value assigned
     70  *	  to "left", the code also works for right handed trees.  The
     71  *	  following variables/terms are frequently used:
     72  *
     73  *		int left;	// 0 when dealing with left children,
     74  *				// 1 for dealing with right children
     75  *
     76  *		int left_heavy;	// -1 when left subtree is taller at some node,
     77  *				// +1 when right subtree is taller
     78  *
     79  *		int right;	// will be the opposite of left (0 or 1)
     80  *		int right_heavy;// will be the opposite of left_heavy (-1 or 1)
     81  *
     82  *		int direction;  // 0 for "<" (ie. left child); 1 for ">" (right)
     83  *
     84  *	  Though it is a little more confusing to read the code, the approach
     85  *	  allows using half as much code (and hence cache footprint) for tree
     86  *	  manipulations and eliminates many conditional branches.
     87  *
     88  *	- The avl_index_t is an opaque "cookie" used to find nodes at or
     89  *	  adjacent to where a new value would be inserted in the tree. The value
     90  *	  is a modified "avl_node_t *".  The bottom bit (normally 0 for a
     91  *	  pointer) is set to indicate if that the new node has a value greater
     92  *	  than the value of the indicated "avl_node_t *".
     93  *
     94  * Note - in addition to userland (e.g. libavl and libutil) and the kernel
     95  * (e.g. genunix), avl.c is compiled into ld.so and kmdb's genunix module,
     96  * which each have their own compilation environments and subsequent
     97  * requirements. Each of these environments must be considered when adding
     98  * dependencies from avl.c.
     99  */
    100 
    101 #include <sys/types.h>
    102 #include <sys/param.h>
    103 #include <sys/stdint.h>
    104 #include <sys/debug.h>
    105 #include <sys/avl.h>
    106 
    107 /*
    108  * Small arrays to translate between balance (or diff) values and child indices.
    109  *
    110  * Code that deals with binary tree data structures will randomly use
    111  * left and right children when examining a tree.  C "if()" statements
    112  * which evaluate randomly suffer from very poor hardware branch prediction.
    113  * In this code we avoid some of the branch mispredictions by using the
    114  * following translation arrays. They replace random branches with an
    115  * additional memory reference. Since the translation arrays are both very
    116  * small the data should remain efficiently in cache.
    117  */
    118 static const int  avl_child2balance[2]	= {-1, 1};
    119 static const int  avl_balance2child[]	= {0, 0, 1};
    120 
    121 
    122 /*
    123  * Walk from one node to the previous valued node (ie. an infix walk
    124  * towards the left). At any given node we do one of 2 things:
    125  *
    126  * - If there is a left child, go to it, then to it's rightmost descendant.
    127  *
    128  * - otherwise we return through parent nodes until we've come from a right
    129  *   child.
    130  *
    131  * Return Value:
    132  * NULL - if at the end of the nodes
    133  * otherwise next node
    134  */
    135 void *
    136 avl_walk(avl_tree_t *tree, void	*oldnode, int left)
    137 {
    138 	size_t off = tree->avl_offset;
    139 	avl_node_t *node = AVL_DATA2NODE(oldnode, off);
    140 	int right = 1 - left;
    141 	int was_child;
    142 
    143 
    144 	/*
    145 	 * nowhere to walk to if tree is empty
    146 	 */
    147 	if (node == NULL)
    148 		return (NULL);
    149 
    150 	/*
    151 	 * Visit the previous valued node. There are two possibilities:
    152 	 *
    153 	 * If this node has a left child, go down one left, then all
    154 	 * the way right.
    155 	 */
    156 	if (node->avl_child[left] != NULL) {
    157 		for (node = node->avl_child[left];
    158 		    node->avl_child[right] != NULL;
    159 		    node = node->avl_child[right])
    160 			;
    161 	/*
    162 	 * Otherwise, return thru left children as far as we can.
    163 	 */
    164 	} else {
    165 		for (;;) {
    166 			was_child = AVL_XCHILD(node);
    167 			node = AVL_XPARENT(node);
    168 			if (node == NULL)
    169 				return (NULL);
    170 			if (was_child == right)
    171 				break;
    172 		}
    173 	}
    174 
    175 	return (AVL_NODE2DATA(node, off));
    176 }
    177 
    178 /*
    179  * Return the lowest valued node in a tree or NULL.
    180  * (leftmost child from root of tree)
    181  */
    182 void *
    183 avl_first(avl_tree_t *tree)
    184 {
    185 	avl_node_t *node;
    186 	avl_node_t *prev = NULL;
    187 	size_t off = tree->avl_offset;
    188 
    189 	for (node = tree->avl_root; node != NULL; node = node->avl_child[0])
    190 		prev = node;
    191 
    192 	if (prev != NULL)
    193 		return (AVL_NODE2DATA(prev, off));
    194 	return (NULL);
    195 }
    196 
    197 /*
    198  * Return the highest valued node in a tree or NULL.
    199  * (rightmost child from root of tree)
    200  */
    201 void *
    202 avl_last(avl_tree_t *tree)
    203 {
    204 	avl_node_t *node;
    205 	avl_node_t *prev = NULL;
    206 	size_t off = tree->avl_offset;
    207 
    208 	for (node = tree->avl_root; node != NULL; node = node->avl_child[1])
    209 		prev = node;
    210 
    211 	if (prev != NULL)
    212 		return (AVL_NODE2DATA(prev, off));
    213 	return (NULL);
    214 }
    215 
    216 /*
    217  * Access the node immediately before or after an insertion point.
    218  *
    219  * "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child
    220  *
    221  * Return value:
    222  *	NULL: no node in the given direction
    223  *	"void *"  of the found tree node
    224  */
    225 void *
    226 avl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
    227 {
    228 	int child = AVL_INDEX2CHILD(where);
    229 	avl_node_t *node = AVL_INDEX2NODE(where);
    230 	void *data;
    231 	size_t off = tree->avl_offset;
    232 
    233 	if (node == NULL) {
    234 		ASSERT(tree->avl_root == NULL);
    235 		return (NULL);
    236 	}
    237 	data = AVL_NODE2DATA(node, off);
    238 	if (child != direction)
    239 		return (data);
    240 
    241 	return (avl_walk(tree, data, direction));
    242 }
    243 
    244 
    245 /*
    246  * Search for the node which contains "value".  The algorithm is a
    247  * simple binary tree search.
    248  *
    249  * return value:
    250  *	NULL: the value is not in the AVL tree
    251  *		*where (if not NULL)  is set to indicate the insertion point
    252  *	"void *"  of the found tree node
    253  */
    254 void *
    255 avl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
    256 {
    257 	avl_node_t *node;
    258 	avl_node_t *prev = NULL;
    259 	int child = 0;
    260 	int diff;
    261 	size_t off = tree->avl_offset;
    262 
    263 	for (node = tree->avl_root; node != NULL;
    264 	    node = node->avl_child[child]) {
    265 
    266 		prev = node;
    267 
    268 		diff = tree->avl_compar(value, AVL_NODE2DATA(node, off));
    269 		ASSERT(-1 <= diff && diff <= 1);
    270 		if (diff == 0) {
    271 #ifdef DEBUG
    272 			if (where != NULL)
    273 				*where = 0;
    274 #endif
    275 			return (AVL_NODE2DATA(node, off));
    276 		}
    277 		child = avl_balance2child[1 + diff];
    278 
    279 	}
    280 
    281 	if (where != NULL)
    282 		*where = AVL_MKINDEX(prev, child);
    283 
    284 	return (NULL);
    285 }
    286 
    287 
    288 /*
    289  * Perform a rotation to restore balance at the subtree given by depth.
    290  *
    291  * This routine is used by both insertion and deletion. The return value
    292  * indicates:
    293  *	 0 : subtree did not change height
    294  *	!0 : subtree was reduced in height
    295  *
    296  * The code is written as if handling left rotations, right rotations are
    297  * symmetric and handled by swapping values of variables right/left[_heavy]
    298  *
    299  * On input balance is the "new" balance at "node". This value is either
    300  * -2 or +2.
    301  */
    302 static int
    303 avl_rotation(avl_tree_t *tree, avl_node_t *node, int balance)
    304 {
    305 	int left = !(balance < 0);	/* when balance = -2, left will be 0 */
    306 	int right = 1 - left;
    307 	int left_heavy = balance >> 1;
    308 	int right_heavy = -left_heavy;
    309 	avl_node_t *parent = AVL_XPARENT(node);
    310 	avl_node_t *child = node->avl_child[left];
    311 	avl_node_t *cright;
    312 	avl_node_t *gchild;
    313 	avl_node_t *gright;
    314 	avl_node_t *gleft;
    315 	int which_child = AVL_XCHILD(node);
    316 	int child_bal = AVL_XBALANCE(child);
    317 
    318 	/* BEGIN CSTYLED */
    319 	/*
    320 	 * case 1 : node is overly left heavy, the left child is balanced or
    321 	 * also left heavy. This requires the following rotation.
    322 	 *
    323 	 *                   (node bal:-2)
    324 	 *                    /           \
    325 	 *                   /             \
    326 	 *              (child bal:0 or -1)
    327 	 *              /    \
    328 	 *             /      \
    329 	 *                     cright
    330 	 *
    331 	 * becomes:
    332 	 *
    333 	 *              (child bal:1 or 0)
    334 	 *              /        \
    335 	 *             /          \
    336 	 *                        (node bal:-1 or 0)
    337 	 *                         /     \
    338 	 *                        /       \
    339 	 *                     cright
    340 	 *
    341 	 * we detect this situation by noting that child's balance is not
    342 	 * right_heavy.
    343 	 */
    344 	/* END CSTYLED */
    345 	if (child_bal != right_heavy) {
    346 
    347 		/*
    348 		 * compute new balance of nodes
    349 		 *
    350 		 * If child used to be left heavy (now balanced) we reduced
    351 		 * the height of this sub-tree -- used in "return...;" below
    352 		 */
    353 		child_bal += right_heavy; /* adjust towards right */
    354 
    355 		/*
    356 		 * move "cright" to be node's left child
    357 		 */
    358 		cright = child->avl_child[right];
    359 		node->avl_child[left] = cright;
    360 		if (cright != NULL) {
    361 			AVL_SETPARENT(cright, node);
    362 			AVL_SETCHILD(cright, left);
    363 		}
    364 
    365 		/*
    366 		 * move node to be child's right child
    367 		 */
    368 		child->avl_child[right] = node;
    369 		AVL_SETBALANCE(node, -child_bal);
    370 		AVL_SETCHILD(node, right);
    371 		AVL_SETPARENT(node, child);
    372 
    373 		/*
    374 		 * update the pointer into this subtree
    375 		 */
    376 		AVL_SETBALANCE(child, child_bal);
    377 		AVL_SETCHILD(child, which_child);
    378 		AVL_SETPARENT(child, parent);
    379 		if (parent != NULL)
    380 			parent->avl_child[which_child] = child;
    381 		else
    382 			tree->avl_root = child;
    383 
    384 		return (child_bal == 0);
    385 	}
    386 
    387 	/* BEGIN CSTYLED */
    388 	/*
    389 	 * case 2 : When node is left heavy, but child is right heavy we use
    390 	 * a different rotation.
    391 	 *
    392 	 *                   (node b:-2)
    393 	 *                    /   \
    394 	 *                   /     \
    395 	 *                  /       \
    396 	 *             (child b:+1)
    397 	 *              /     \
    398 	 *             /       \
    399 	 *                   (gchild b: != 0)
    400 	 *                     /  \
    401 	 *                    /    \
    402 	 *                 gleft   gright
    403 	 *
    404 	 * becomes:
    405 	 *
    406 	 *              (gchild b:0)
    407 	 *              /       \
    408 	 *             /         \
    409 	 *            /           \
    410 	 *        (child b:?)   (node b:?)
    411 	 *         /  \          /   \
    412 	 *        /    \        /     \
    413 	 *            gleft   gright
    414 	 *
    415 	 * computing the new balances is more complicated. As an example:
    416 	 *	 if gchild was right_heavy, then child is now left heavy
    417 	 *		else it is balanced
    418 	 */
    419 	/* END CSTYLED */
    420 	gchild = child->avl_child[right];
    421 	gleft = gchild->avl_child[left];
    422 	gright = gchild->avl_child[right];
    423 
    424 	/*
    425 	 * move gright to left child of node and
    426 	 *
    427 	 * move gleft to right child of node
    428 	 */
    429 	node->avl_child[left] = gright;
    430 	if (gright != NULL) {
    431 		AVL_SETPARENT(gright, node);
    432 		AVL_SETCHILD(gright, left);
    433 	}
    434 
    435 	child->avl_child[right] = gleft;
    436 	if (gleft != NULL) {
    437 		AVL_SETPARENT(gleft, child);
    438 		AVL_SETCHILD(gleft, right);
    439 	}
    440 
    441 	/*
    442 	 * move child to left child of gchild and
    443 	 *
    444 	 * move node to right child of gchild and
    445 	 *
    446 	 * fixup parent of all this to point to gchild
    447 	 */
    448 	balance = AVL_XBALANCE(gchild);
    449 	gchild->avl_child[left] = child;
    450 	AVL_SETBALANCE(child, (balance == right_heavy ? left_heavy : 0));
    451 	AVL_SETPARENT(child, gchild);
    452 	AVL_SETCHILD(child, left);
    453 
    454 	gchild->avl_child[right] = node;
    455 	AVL_SETBALANCE(node, (balance == left_heavy ? right_heavy : 0));
    456 	AVL_SETPARENT(node, gchild);
    457 	AVL_SETCHILD(node, right);
    458 
    459 	AVL_SETBALANCE(gchild, 0);
    460 	AVL_SETPARENT(gchild, parent);
    461 	AVL_SETCHILD(gchild, which_child);
    462 	if (parent != NULL)
    463 		parent->avl_child[which_child] = gchild;
    464 	else
    465 		tree->avl_root = gchild;
    466 
    467 	return (1);	/* the new tree is always shorter */
    468 }
    469 
    470 
    471 /*
    472  * Insert a new node into an AVL tree at the specified (from avl_find()) place.
    473  *
    474  * Newly inserted nodes are always leaf nodes in the tree, since avl_find()
    475  * searches out to the leaf positions.  The avl_index_t indicates the node
    476  * which will be the parent of the new node.
    477  *
    478  * After the node is inserted, a single rotation further up the tree may
    479  * be necessary to maintain an acceptable AVL balance.
    480  */
    481 void
    482 avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
    483 {
    484 	avl_node_t *node;
    485 	avl_node_t *parent = AVL_INDEX2NODE(where);
    486 	int old_balance;
    487 	int new_balance;
    488 	int which_child = AVL_INDEX2CHILD(where);
    489 	size_t off = tree->avl_offset;
    490 
    491 	ASSERT(tree);
    492 #ifdef _LP64
    493 	ASSERT(((uintptr_t)new_data & 0x7) == 0);
    494 #endif
    495 
    496 	node = AVL_DATA2NODE(new_data, off);
    497 
    498 	/*
    499 	 * First, add the node to the tree at the indicated position.
    500 	 */
    501 	++tree->avl_numnodes;
    502 
    503 	node->avl_child[0] = NULL;
    504 	node->avl_child[1] = NULL;
    505 
    506 	AVL_SETCHILD(node, which_child);
    507 	AVL_SETBALANCE(node, 0);
    508 	AVL_SETPARENT(node, parent);
    509 	if (parent != NULL) {
    510 		ASSERT(parent->avl_child[which_child] == NULL);
    511 		parent->avl_child[which_child] = node;
    512 	} else {
    513 		ASSERT(tree->avl_root == NULL);
    514 		tree->avl_root = node;
    515 	}
    516 	/*
    517 	 * Now, back up the tree modifying the balance of all nodes above the
    518 	 * insertion point. If we get to a highly unbalanced ancestor, we
    519 	 * need to do a rotation.  If we back out of the tree we are done.
    520 	 * If we brought any subtree into perfect balance (0), we are also done.
    521 	 */
    522 	for (;;) {
    523 		node = parent;
    524 		if (node == NULL)
    525 			return;
    526 
    527 		/*
    528 		 * Compute the new balance
    529 		 */
    530 		old_balance = AVL_XBALANCE(node);
    531 		new_balance = old_balance + avl_child2balance[which_child];
    532 
    533 		/*
    534 		 * If we introduced equal balance, then we are done immediately
    535 		 */
    536 		if (new_balance == 0) {
    537 			AVL_SETBALANCE(node, 0);
    538 			return;
    539 		}
    540 
    541 		/*
    542 		 * If both old and new are not zero we went
    543 		 * from -1 to -2 balance, do a rotation.
    544 		 */
    545 		if (old_balance != 0)
    546 			break;
    547 
    548 		AVL_SETBALANCE(node, new_balance);
    549 		parent = AVL_XPARENT(node);
    550 		which_child = AVL_XCHILD(node);
    551 	}
    552 
    553 	/*
    554 	 * perform a rotation to fix the tree and return
    555 	 */
    556 	(void) avl_rotation(tree, node, new_balance);
    557 }
    558 
    559 /*
    560  * Insert "new_data" in "tree" in the given "direction" either after or
    561  * before (AVL_AFTER, AVL_BEFORE) the data "here".
    562  *
    563  * Insertions can only be done at empty leaf points in the tree, therefore
    564  * if the given child of the node is already present we move to either
    565  * the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since
    566  * every other node in the tree is a leaf, this always works.
    567  *
    568  * To help developers using this interface, we assert that the new node
    569  * is correctly ordered at every step of the way in DEBUG kernels.
    570  */
    571 void
    572 avl_insert_here(
    573 	avl_tree_t *tree,
    574 	void *new_data,
    575 	void *here,
    576 	int direction)
    577 {
    578 	avl_node_t *node;
    579 	int child = direction;	/* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */
    580 #ifdef DEBUG
    581 	int diff;
    582 #endif
    583 
    584 	ASSERT(tree != NULL);
    585 	ASSERT(new_data != NULL);
    586 	ASSERT(here != NULL);
    587 	ASSERT(direction == AVL_BEFORE || direction == AVL_AFTER);
    588 
    589 	/*
    590 	 * If corresponding child of node is not NULL, go to the neighboring
    591 	 * node and reverse the insertion direction.
    592 	 */
    593 	node = AVL_DATA2NODE(here, tree->avl_offset);
    594 
    595 #ifdef DEBUG
    596 	diff = tree->avl_compar(new_data, here);
    597 	ASSERT(-1 <= diff && diff <= 1);
    598 	ASSERT(diff != 0);
    599 	ASSERT(diff > 0 ? child == 1 : child == 0);
    600 #endif
    601 
    602 	if (node->avl_child[child] != NULL) {
    603 		node = node->avl_child[child];
    604 		child = 1 - child;
    605 		while (node->avl_child[child] != NULL) {
    606 #ifdef DEBUG
    607 			diff = tree->avl_compar(new_data,
    608 			    AVL_NODE2DATA(node, tree->avl_offset));
    609 			ASSERT(-1 <= diff && diff <= 1);
    610 			ASSERT(diff != 0);
    611 			ASSERT(diff > 0 ? child == 1 : child == 0);
    612 #endif
    613 			node = node->avl_child[child];
    614 		}
    615 #ifdef DEBUG
    616 		diff = tree->avl_compar(new_data,
    617 		    AVL_NODE2DATA(node, tree->avl_offset));
    618 		ASSERT(-1 <= diff && diff <= 1);
    619 		ASSERT(diff != 0);
    620 		ASSERT(diff > 0 ? child == 1 : child == 0);
    621 #endif
    622 	}
    623 	ASSERT(node->avl_child[child] == NULL);
    624 
    625 	avl_insert(tree, new_data, AVL_MKINDEX(node, child));
    626 }
    627 
    628 /*
    629  * Add a new node to an AVL tree.
    630  */
    631 void
    632 avl_add(avl_tree_t *tree, void *new_node)
    633 {
    634 	avl_index_t where;
    635 
    636 	/*
    637 	 * This is unfortunate.  We want to call panic() here, even for
    638 	 * non-DEBUG kernels.  In userland, however, we can't depend on anything
    639 	 * in libc or else the rtld build process gets confused.
    640 	 * Thankfully, rtld provides us with its own assfail() so we can use
    641 	 * that here.  We use assfail() directly to get a nice error message
    642 	 * in the core - much like what panic() does for crashdumps.
    643 	 */
    644 	if (avl_find(tree, new_node, &where) != NULL)
    645 #ifdef _KERNEL
    646 		panic("avl_find() succeeded inside avl_add()");
    647 #else
    648 		(void) assfail("avl_find() succeeded inside avl_add()",
    649 		    __FILE__, __LINE__);
    650 #endif
    651 	avl_insert(tree, new_node, where);
    652 }
    653 
    654 /*
    655  * Delete a node from the AVL tree.  Deletion is similar to insertion, but
    656  * with 2 complications.
    657  *
    658  * First, we may be deleting an interior node. Consider the following subtree:
    659  *
    660  *     d           c            c
    661  *    / \         / \          / \
    662  *   b   e       b   e        b   e
    663  *  / \	        / \          /
    664  * a   c       a            a
    665  *
    666  * When we are deleting node (d), we find and bring up an adjacent valued leaf
    667  * node, say (c), to take the interior node's place. In the code this is
    668  * handled by temporarily swapping (d) and (c) in the tree and then using
    669  * common code to delete (d) from the leaf position.
    670  *
    671  * Secondly, an interior deletion from a deep tree may require more than one
    672  * rotation to fix the balance. This is handled by moving up the tree through
    673  * parents and applying rotations as needed. The return value from
    674  * avl_rotation() is used to detect when a subtree did not change overall
    675  * height due to a rotation.
    676  */
    677 void
    678 avl_remove(avl_tree_t *tree, void *data)
    679 {
    680 	avl_node_t *delete;
    681 	avl_node_t *parent;
    682 	avl_node_t *node;
    683 	avl_node_t tmp;
    684 	int old_balance;
    685 	int new_balance;
    686 	int left;
    687 	int right;
    688 	int which_child;
    689 	size_t off = tree->avl_offset;
    690 
    691 	ASSERT(tree);
    692 
    693 	delete = AVL_DATA2NODE(data, off);
    694 
    695 	/*
    696 	 * Deletion is easiest with a node that has at most 1 child.
    697 	 * We swap a node with 2 children with a sequentially valued
    698 	 * neighbor node. That node will have at most 1 child. Note this
    699 	 * has no effect on the ordering of the remaining nodes.
    700 	 *
    701 	 * As an optimization, we choose the greater neighbor if the tree
    702 	 * is right heavy, otherwise the left neighbor. This reduces the
    703 	 * number of rotations needed.
    704 	 */
    705 	if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) {
    706 
    707 		/*
    708 		 * choose node to swap from whichever side is taller
    709 		 */
    710 		old_balance = AVL_XBALANCE(delete);
    711 		left = avl_balance2child[old_balance + 1];
    712 		right = 1 - left;
    713 
    714 		/*
    715 		 * get to the previous value'd node
    716 		 * (down 1 left, as far as possible right)
    717 		 */
    718 		for (node = delete->avl_child[left];
    719 		    node->avl_child[right] != NULL;
    720 		    node = node->avl_child[right])
    721 			;
    722 
    723 		/*
    724 		 * create a temp placeholder for 'node'
    725 		 * move 'node' to delete's spot in the tree
    726 		 */
    727 		tmp = *node;
    728 
    729 		*node = *delete;
    730 		if (node->avl_child[left] == node)
    731 			node->avl_child[left] = &tmp;
    732 
    733 		parent = AVL_XPARENT(node);
    734 		if (parent != NULL)
    735 			parent->avl_child[AVL_XCHILD(node)] = node;
    736 		else
    737 			tree->avl_root = node;
    738 		AVL_SETPARENT(node->avl_child[left], node);
    739 		AVL_SETPARENT(node->avl_child[right], node);
    740 
    741 		/*
    742 		 * Put tmp where node used to be (just temporary).
    743 		 * It always has a parent and at most 1 child.
    744 		 */
    745 		delete = &tmp;
    746 		parent = AVL_XPARENT(delete);
    747 		parent->avl_child[AVL_XCHILD(delete)] = delete;
    748 		which_child = (delete->avl_child[1] != 0);
    749 		if (delete->avl_child[which_child] != NULL)
    750 			AVL_SETPARENT(delete->avl_child[which_child], delete);
    751 	}
    752 
    753 
    754 	/*
    755 	 * Here we know "delete" is at least partially a leaf node. It can
    756 	 * be easily removed from the tree.
    757 	 */
    758 	ASSERT(tree->avl_numnodes > 0);
    759 	--tree->avl_numnodes;
    760 	parent = AVL_XPARENT(delete);
    761 	which_child = AVL_XCHILD(delete);
    762 	if (delete->avl_child[0] != NULL)
    763 		node = delete->avl_child[0];
    764 	else
    765 		node = delete->avl_child[1];
    766 
    767 	/*
    768 	 * Connect parent directly to node (leaving out delete).
    769 	 */
    770 	if (node != NULL) {
    771 		AVL_SETPARENT(node, parent);
    772 		AVL_SETCHILD(node, which_child);
    773 	}
    774 	if (parent == NULL) {
    775 		tree->avl_root = node;
    776 		return;
    777 	}
    778 	parent->avl_child[which_child] = node;
    779 
    780 
    781 	/*
    782 	 * Since the subtree is now shorter, begin adjusting parent balances
    783 	 * and performing any needed rotations.
    784 	 */
    785 	do {
    786 
    787 		/*
    788 		 * Move up the tree and adjust the balance
    789 		 *
    790 		 * Capture the parent and which_child values for the next
    791 		 * iteration before any rotations occur.
    792 		 */
    793 		node = parent;
    794 		old_balance = AVL_XBALANCE(node);
    795 		new_balance = old_balance - avl_child2balance[which_child];
    796 		parent = AVL_XPARENT(node);
    797 		which_child = AVL_XCHILD(node);
    798 
    799 		/*
    800 		 * If a node was in perfect balance but isn't anymore then
    801 		 * we can stop, since the height didn't change above this point
    802 		 * due to a deletion.
    803 		 */
    804 		if (old_balance == 0) {
    805 			AVL_SETBALANCE(node, new_balance);
    806 			break;
    807 		}
    808 
    809 		/*
    810 		 * If the new balance is zero, we don't need to rotate
    811 		 * else
    812 		 * need a rotation to fix the balance.
    813 		 * If the rotation doesn't change the height
    814 		 * of the sub-tree we have finished adjusting.
    815 		 */
    816 		if (new_balance == 0)
    817 			AVL_SETBALANCE(node, new_balance);
    818 		else if (!avl_rotation(tree, node, new_balance))
    819 			break;
    820 	} while (parent != NULL);
    821 }
    822 
    823 #define	AVL_REINSERT(tree, obj)		\
    824 	avl_remove((tree), (obj));	\
    825 	avl_add((tree), (obj))
    826 
    827 boolean_t
    828 avl_update_lt(avl_tree_t *t, void *obj)
    829 {
    830 	void *neighbor;
    831 
    832 	ASSERT(((neighbor = AVL_NEXT(t, obj)) == NULL) ||
    833 	    (t->avl_compar(obj, neighbor) <= 0));
    834 
    835 	neighbor = AVL_PREV(t, obj);
    836 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
    837 		AVL_REINSERT(t, obj);
    838 		return (B_TRUE);
    839 	}
    840 
    841 	return (B_FALSE);
    842 }
    843 
    844 boolean_t
    845 avl_update_gt(avl_tree_t *t, void *obj)
    846 {
    847 	void *neighbor;
    848 
    849 	ASSERT(((neighbor = AVL_PREV(t, obj)) == NULL) ||
    850 	    (t->avl_compar(obj, neighbor) >= 0));
    851 
    852 	neighbor = AVL_NEXT(t, obj);
    853 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
    854 		AVL_REINSERT(t, obj);
    855 		return (B_TRUE);
    856 	}
    857 
    858 	return (B_FALSE);
    859 }
    860 
    861 boolean_t
    862 avl_update(avl_tree_t *t, void *obj)
    863 {
    864 	void *neighbor;
    865 
    866 	neighbor = AVL_PREV(t, obj);
    867 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
    868 		AVL_REINSERT(t, obj);
    869 		return (B_TRUE);
    870 	}
    871 
    872 	neighbor = AVL_NEXT(t, obj);
    873 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
    874 		AVL_REINSERT(t, obj);
    875 		return (B_TRUE);
    876 	}
    877 
    878 	return (B_FALSE);
    879 }
    880 
    881 void
    882 avl_swap(avl_tree_t *tree1, avl_tree_t *tree2)
    883 {
    884 	avl_node_t *temp_node;
    885 	ulong_t temp_numnodes;
    886 
    887 	ASSERT3P(tree1->avl_compar, ==, tree2->avl_compar);
    888 	ASSERT3U(tree1->avl_offset, ==, tree2->avl_offset);
    889 	ASSERT3U(tree1->avl_size, ==, tree2->avl_size);
    890 
    891 	temp_node = tree1->avl_root;
    892 	temp_numnodes = tree1->avl_numnodes;
    893 	tree1->avl_root = tree2->avl_root;
    894 	tree1->avl_numnodes = tree2->avl_numnodes;
    895 	tree2->avl_root = temp_node;
    896 	tree2->avl_numnodes = temp_numnodes;
    897 }
    898 
    899 /*
    900  * initialize a new AVL tree
    901  */
    902 void
    903 avl_create(avl_tree_t *tree, int (*compar) (const void *, const void *),
    904     size_t size, size_t offset)
    905 {
    906 	ASSERT(tree);
    907 	ASSERT(compar);
    908 	ASSERT(size > 0);
    909 	ASSERT(size >= offset + sizeof (avl_node_t));
    910 #ifdef _LP64
    911 	ASSERT((offset & 0x7) == 0);
    912 #endif
    913 
    914 	tree->avl_compar = compar;
    915 	tree->avl_root = NULL;
    916 	tree->avl_numnodes = 0;
    917 	tree->avl_size = size;
    918 	tree->avl_offset = offset;
    919 }
    920 
    921 /*
    922  * Delete a tree.
    923  */
    924 /* ARGSUSED */
    925 void
    926 avl_destroy(avl_tree_t *tree)
    927 {
    928 	ASSERT(tree);
    929 	ASSERT(tree->avl_numnodes == 0);
    930 	ASSERT(tree->avl_root == NULL);
    931 }
    932 
    933 
    934 /*
    935  * Return the number of nodes in an AVL tree.
    936  */
    937 ulong_t
    938 avl_numnodes(avl_tree_t *tree)
    939 {
    940 	ASSERT(tree);
    941 	return (tree->avl_numnodes);
    942 }
    943 
    944 boolean_t
    945 avl_is_empty(avl_tree_t *tree)
    946 {
    947 	ASSERT(tree);
    948 	return (tree->avl_numnodes == 0);
    949 }
    950 
    951 #define	CHILDBIT	(1L)
    952 
    953 /*
    954  * Post-order tree walk used to visit all tree nodes and destroy the tree
    955  * in post order. This is used for destroying a tree without paying any cost
    956  * for rebalancing it.
    957  *
    958  * example:
    959  *
    960  *	void *cookie = NULL;
    961  *	my_data_t *node;
    962  *
    963  *	while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
    964  *		free(node);
    965  *	avl_destroy(tree);
    966  *
    967  * The cookie is really an avl_node_t to the current node's parent and
    968  * an indication of which child you looked at last.
    969  *
    970  * On input, a cookie value of CHILDBIT indicates the tree is done.
    971  */
    972 void *
    973 avl_destroy_nodes(avl_tree_t *tree, void **cookie)
    974 {
    975 	avl_node_t	*node;
    976 	avl_node_t	*parent;
    977 	int		child;
    978 	void		*first;
    979 	size_t		off = tree->avl_offset;
    980 
    981 	/*
    982 	 * Initial calls go to the first node or it's right descendant.
    983 	 */
    984 	if (*cookie == NULL) {
    985 		first = avl_first(tree);
    986 
    987 		/*
    988 		 * deal with an empty tree
    989 		 */
    990 		if (first == NULL) {
    991 			*cookie = (void *)CHILDBIT;
    992 			return (NULL);
    993 		}
    994 
    995 		node = AVL_DATA2NODE(first, off);
    996 		parent = AVL_XPARENT(node);
    997 		goto check_right_side;
    998 	}
    999 
   1000 	/*
   1001 	 * If there is no parent to return to we are done.
   1002 	 */
   1003 	parent = (avl_node_t *)((uintptr_t)(*cookie) & ~CHILDBIT);
   1004 	if (parent == NULL) {
   1005 		if (tree->avl_root != NULL) {
   1006 			ASSERT(tree->avl_numnodes == 1);
   1007 			tree->avl_root = NULL;
   1008 			tree->avl_numnodes = 0;
   1009 		}
   1010 		return (NULL);
   1011 	}
   1012 
   1013 	/*
   1014 	 * Remove the child pointer we just visited from the parent and tree.
   1015 	 */
   1016 	child = (uintptr_t)(*cookie) & CHILDBIT;
   1017 	parent->avl_child[child] = NULL;
   1018 	ASSERT(tree->avl_numnodes > 1);
   1019 	--tree->avl_numnodes;
   1020 
   1021 	/*
   1022 	 * If we just did a right child or there isn't one, go up to parent.
   1023 	 */
   1024 	if (child == 1 || parent->avl_child[1] == NULL) {
   1025 		node = parent;
   1026 		parent = AVL_XPARENT(parent);
   1027 		goto done;
   1028 	}
   1029 
   1030 	/*
   1031 	 * Do parent's right child, then leftmost descendent.
   1032 	 */
   1033 	node = parent->avl_child[1];
   1034 	while (node->avl_child[0] != NULL) {
   1035 		parent = node;
   1036 		node = node->avl_child[0];
   1037 	}
   1038 
   1039 	/*
   1040 	 * If here, we moved to a left child. It may have one
   1041 	 * child on the right (when balance == +1).
   1042 	 */
   1043 check_right_side:
   1044 	if (node->avl_child[1] != NULL) {
   1045 		ASSERT(AVL_XBALANCE(node) == 1);
   1046 		parent = node;
   1047 		node = node->avl_child[1];
   1048 		ASSERT(node->avl_child[0] == NULL &&
   1049 		    node->avl_child[1] == NULL);
   1050 	} else {
   1051 		ASSERT(AVL_XBALANCE(node) <= 0);
   1052 	}
   1053 
   1054 done:
   1055 	if (parent == NULL) {
   1056 		*cookie = (void *)CHILDBIT;
   1057 		ASSERT(node == tree->avl_root);
   1058 	} else {
   1059 		*cookie = (void *)((uintptr_t)parent | AVL_XCHILD(node));
   1060 	}
   1061 
   1062 	return (AVL_NODE2DATA(node, off));
   1063 }
   1064