xref: /titanic_44/usr/src/common/avl/avl.c (revision 6ed9368a130d7c9a82e574da808d34034da33748)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright (c) 2014 by Delphix. All rights reserved.
28  */
29 
30 /*
31  * AVL - generic AVL tree implementation for kernel use
32  *
33  * A complete description of AVL trees can be found in many CS textbooks.
34  *
35  * Here is a very brief overview. An AVL tree is a binary search tree that is
36  * almost perfectly balanced. By "almost" perfectly balanced, we mean that at
37  * any given node, the left and right subtrees are allowed to differ in height
38  * by at most 1 level.
39  *
40  * This relaxation from a perfectly balanced binary tree allows doing
41  * insertion and deletion relatively efficiently. Searching the tree is
42  * still a fast operation, roughly O(log(N)).
43  *
44  * The key to insertion and deletion is a set of tree manipulations called
45  * rotations, which bring unbalanced subtrees back into the semi-balanced state.
46  *
47  * This implementation of AVL trees has the following peculiarities:
48  *
49  *	- The AVL specific data structures are physically embedded as fields
50  *	  in the "using" data structures.  To maintain generality the code
51  *	  must constantly translate between "avl_node_t *" and containing
52  *	  data structure "void *"s by adding/subtracting the avl_offset.
53  *
54  *	- Since the AVL data is always embedded in other structures, there is
55  *	  no locking or memory allocation in the AVL routines. This must be
56  *	  provided for by the enclosing data structure's semantics. Typically,
57  *	  avl_insert()/_add()/_remove()/avl_insert_here() require some kind of
58  *	  exclusive write lock. Other operations require a read lock.
59  *
60  *      - The implementation uses iteration instead of explicit recursion,
61  *	  since it is intended to run on limited size kernel stacks. Since
62  *	  there is no recursion stack present to move "up" in the tree,
63  *	  there is an explicit "parent" link in the avl_node_t.
64  *
65  *      - The left/right children pointers of a node are in an array.
66  *	  In the code, variables (instead of constants) are used to represent
67  *	  left and right indices.  The implementation is written as if it only
68  *	  dealt with left handed manipulations.  By changing the value assigned
69  *	  to "left", the code also works for right handed trees.  The
70  *	  following variables/terms are frequently used:
71  *
72  *		int left;	// 0 when dealing with left children,
73  *				// 1 for dealing with right children
74  *
75  *		int left_heavy;	// -1 when left subtree is taller at some node,
76  *				// +1 when right subtree is taller
77  *
78  *		int right;	// will be the opposite of left (0 or 1)
79  *		int right_heavy;// will be the opposite of left_heavy (-1 or 1)
80  *
81  *		int direction;  // 0 for "<" (ie. left child); 1 for ">" (right)
82  *
83  *	  Though it is a little more confusing to read the code, the approach
84  *	  allows using half as much code (and hence cache footprint) for tree
85  *	  manipulations and eliminates many conditional branches.
86  *
87  *	- The avl_index_t is an opaque "cookie" used to find nodes at or
88  *	  adjacent to where a new value would be inserted in the tree. The value
89  *	  is a modified "avl_node_t *".  The bottom bit (normally 0 for a
90  *	  pointer) is set to indicate if that the new node has a value greater
91  *	  than the value of the indicated "avl_node_t *".
92  *
93  * Note - in addition to userland (e.g. libavl and libutil) and the kernel
94  * (e.g. genunix), avl.c is compiled into ld.so and kmdb's genunix module,
95  * which each have their own compilation environments and subsequent
96  * requirements. Each of these environments must be considered when adding
97  * dependencies from avl.c.
98  */
99 
100 #include <sys/types.h>
101 #include <sys/param.h>
102 #include <sys/debug.h>
103 #include <sys/avl.h>
104 #include <sys/cmn_err.h>
105 
106 /*
107  * Small arrays to translate between balance (or diff) values and child indices.
108  *
109  * Code that deals with binary tree data structures will randomly use
110  * left and right children when examining a tree.  C "if()" statements
111  * which evaluate randomly suffer from very poor hardware branch prediction.
112  * In this code we avoid some of the branch mispredictions by using the
113  * following translation arrays. They replace random branches with an
114  * additional memory reference. Since the translation arrays are both very
115  * small the data should remain efficiently in cache.
116  */
117 static const int  avl_child2balance[2]	= {-1, 1};
118 static const int  avl_balance2child[]	= {0, 0, 1};
119 
120 
121 /*
122  * Walk from one node to the previous valued node (ie. an infix walk
123  * towards the left). At any given node we do one of 2 things:
124  *
125  * - If there is a left child, go to it, then to it's rightmost descendant.
126  *
127  * - otherwise we return through parent nodes until we've come from a right
128  *   child.
129  *
130  * Return Value:
131  * NULL - if at the end of the nodes
132  * otherwise next node
133  */
134 void *
135 avl_walk(avl_tree_t *tree, void	*oldnode, int left)
136 {
137 	size_t off = tree->avl_offset;
138 	avl_node_t *node = AVL_DATA2NODE(oldnode, off);
139 	int right = 1 - left;
140 	int was_child;
141 
142 
143 	/*
144 	 * nowhere to walk to if tree is empty
145 	 */
146 	if (node == NULL)
147 		return (NULL);
148 
149 	/*
150 	 * Visit the previous valued node. There are two possibilities:
151 	 *
152 	 * If this node has a left child, go down one left, then all
153 	 * the way right.
154 	 */
155 	if (node->avl_child[left] != NULL) {
156 		for (node = node->avl_child[left];
157 		    node->avl_child[right] != NULL;
158 		    node = node->avl_child[right])
159 			;
160 	/*
161 	 * Otherwise, return thru left children as far as we can.
162 	 */
163 	} else {
164 		for (;;) {
165 			was_child = AVL_XCHILD(node);
166 			node = AVL_XPARENT(node);
167 			if (node == NULL)
168 				return (NULL);
169 			if (was_child == right)
170 				break;
171 		}
172 	}
173 
174 	return (AVL_NODE2DATA(node, off));
175 }
176 
177 /*
178  * Return the lowest valued node in a tree or NULL.
179  * (leftmost child from root of tree)
180  */
181 void *
182 avl_first(avl_tree_t *tree)
183 {
184 	avl_node_t *node;
185 	avl_node_t *prev = NULL;
186 	size_t off = tree->avl_offset;
187 
188 	for (node = tree->avl_root; node != NULL; node = node->avl_child[0])
189 		prev = node;
190 
191 	if (prev != NULL)
192 		return (AVL_NODE2DATA(prev, off));
193 	return (NULL);
194 }
195 
196 /*
197  * Return the highest valued node in a tree or NULL.
198  * (rightmost child from root of tree)
199  */
200 void *
201 avl_last(avl_tree_t *tree)
202 {
203 	avl_node_t *node;
204 	avl_node_t *prev = NULL;
205 	size_t off = tree->avl_offset;
206 
207 	for (node = tree->avl_root; node != NULL; node = node->avl_child[1])
208 		prev = node;
209 
210 	if (prev != NULL)
211 		return (AVL_NODE2DATA(prev, off));
212 	return (NULL);
213 }
214 
215 /*
216  * Access the node immediately before or after an insertion point.
217  *
218  * "avl_index_t" is a (avl_node_t *) with the bottom bit indicating a child
219  *
220  * Return value:
221  *	NULL: no node in the given direction
222  *	"void *"  of the found tree node
223  */
224 void *
225 avl_nearest(avl_tree_t *tree, avl_index_t where, int direction)
226 {
227 	int child = AVL_INDEX2CHILD(where);
228 	avl_node_t *node = AVL_INDEX2NODE(where);
229 	void *data;
230 	size_t off = tree->avl_offset;
231 
232 	if (node == NULL) {
233 		ASSERT(tree->avl_root == NULL);
234 		return (NULL);
235 	}
236 	data = AVL_NODE2DATA(node, off);
237 	if (child != direction)
238 		return (data);
239 
240 	return (avl_walk(tree, data, direction));
241 }
242 
243 
244 /*
245  * Search for the node which contains "value".  The algorithm is a
246  * simple binary tree search.
247  *
248  * return value:
249  *	NULL: the value is not in the AVL tree
250  *		*where (if not NULL)  is set to indicate the insertion point
251  *	"void *"  of the found tree node
252  */
253 void *
254 avl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
255 {
256 	avl_node_t *node;
257 	avl_node_t *prev = NULL;
258 	int child = 0;
259 	int diff;
260 	size_t off = tree->avl_offset;
261 
262 	for (node = tree->avl_root; node != NULL;
263 	    node = node->avl_child[child]) {
264 
265 		prev = node;
266 
267 		diff = tree->avl_compar(value, AVL_NODE2DATA(node, off));
268 		ASSERT(-1 <= diff && diff <= 1);
269 		if (diff == 0) {
270 #ifdef DEBUG
271 			if (where != NULL)
272 				*where = 0;
273 #endif
274 			return (AVL_NODE2DATA(node, off));
275 		}
276 		child = avl_balance2child[1 + diff];
277 
278 	}
279 
280 	if (where != NULL)
281 		*where = AVL_MKINDEX(prev, child);
282 
283 	return (NULL);
284 }
285 
286 
287 /*
288  * Perform a rotation to restore balance at the subtree given by depth.
289  *
290  * This routine is used by both insertion and deletion. The return value
291  * indicates:
292  *	 0 : subtree did not change height
293  *	!0 : subtree was reduced in height
294  *
295  * The code is written as if handling left rotations, right rotations are
296  * symmetric and handled by swapping values of variables right/left[_heavy]
297  *
298  * On input balance is the "new" balance at "node". This value is either
299  * -2 or +2.
300  */
301 static int
302 avl_rotation(avl_tree_t *tree, avl_node_t *node, int balance)
303 {
304 	int left = !(balance < 0);	/* when balance = -2, left will be 0 */
305 	int right = 1 - left;
306 	int left_heavy = balance >> 1;
307 	int right_heavy = -left_heavy;
308 	avl_node_t *parent = AVL_XPARENT(node);
309 	avl_node_t *child = node->avl_child[left];
310 	avl_node_t *cright;
311 	avl_node_t *gchild;
312 	avl_node_t *gright;
313 	avl_node_t *gleft;
314 	int which_child = AVL_XCHILD(node);
315 	int child_bal = AVL_XBALANCE(child);
316 
317 	/* BEGIN CSTYLED */
318 	/*
319 	 * case 1 : node is overly left heavy, the left child is balanced or
320 	 * also left heavy. This requires the following rotation.
321 	 *
322 	 *                   (node bal:-2)
323 	 *                    /           \
324 	 *                   /             \
325 	 *              (child bal:0 or -1)
326 	 *              /    \
327 	 *             /      \
328 	 *                     cright
329 	 *
330 	 * becomes:
331 	 *
332 	 *              (child bal:1 or 0)
333 	 *              /        \
334 	 *             /          \
335 	 *                        (node bal:-1 or 0)
336 	 *                         /     \
337 	 *                        /       \
338 	 *                     cright
339 	 *
340 	 * we detect this situation by noting that child's balance is not
341 	 * right_heavy.
342 	 */
343 	/* END CSTYLED */
344 	if (child_bal != right_heavy) {
345 
346 		/*
347 		 * compute new balance of nodes
348 		 *
349 		 * If child used to be left heavy (now balanced) we reduced
350 		 * the height of this sub-tree -- used in "return...;" below
351 		 */
352 		child_bal += right_heavy; /* adjust towards right */
353 
354 		/*
355 		 * move "cright" to be node's left child
356 		 */
357 		cright = child->avl_child[right];
358 		node->avl_child[left] = cright;
359 		if (cright != NULL) {
360 			AVL_SETPARENT(cright, node);
361 			AVL_SETCHILD(cright, left);
362 		}
363 
364 		/*
365 		 * move node to be child's right child
366 		 */
367 		child->avl_child[right] = node;
368 		AVL_SETBALANCE(node, -child_bal);
369 		AVL_SETCHILD(node, right);
370 		AVL_SETPARENT(node, child);
371 
372 		/*
373 		 * update the pointer into this subtree
374 		 */
375 		AVL_SETBALANCE(child, child_bal);
376 		AVL_SETCHILD(child, which_child);
377 		AVL_SETPARENT(child, parent);
378 		if (parent != NULL)
379 			parent->avl_child[which_child] = child;
380 		else
381 			tree->avl_root = child;
382 
383 		return (child_bal == 0);
384 	}
385 
386 	/* BEGIN CSTYLED */
387 	/*
388 	 * case 2 : When node is left heavy, but child is right heavy we use
389 	 * a different rotation.
390 	 *
391 	 *                   (node b:-2)
392 	 *                    /   \
393 	 *                   /     \
394 	 *                  /       \
395 	 *             (child b:+1)
396 	 *              /     \
397 	 *             /       \
398 	 *                   (gchild b: != 0)
399 	 *                     /  \
400 	 *                    /    \
401 	 *                 gleft   gright
402 	 *
403 	 * becomes:
404 	 *
405 	 *              (gchild b:0)
406 	 *              /       \
407 	 *             /         \
408 	 *            /           \
409 	 *        (child b:?)   (node b:?)
410 	 *         /  \          /   \
411 	 *        /    \        /     \
412 	 *            gleft   gright
413 	 *
414 	 * computing the new balances is more complicated. As an example:
415 	 *	 if gchild was right_heavy, then child is now left heavy
416 	 *		else it is balanced
417 	 */
418 	/* END CSTYLED */
419 	gchild = child->avl_child[right];
420 	gleft = gchild->avl_child[left];
421 	gright = gchild->avl_child[right];
422 
423 	/*
424 	 * move gright to left child of node and
425 	 *
426 	 * move gleft to right child of node
427 	 */
428 	node->avl_child[left] = gright;
429 	if (gright != NULL) {
430 		AVL_SETPARENT(gright, node);
431 		AVL_SETCHILD(gright, left);
432 	}
433 
434 	child->avl_child[right] = gleft;
435 	if (gleft != NULL) {
436 		AVL_SETPARENT(gleft, child);
437 		AVL_SETCHILD(gleft, right);
438 	}
439 
440 	/*
441 	 * move child to left child of gchild and
442 	 *
443 	 * move node to right child of gchild and
444 	 *
445 	 * fixup parent of all this to point to gchild
446 	 */
447 	balance = AVL_XBALANCE(gchild);
448 	gchild->avl_child[left] = child;
449 	AVL_SETBALANCE(child, (balance == right_heavy ? left_heavy : 0));
450 	AVL_SETPARENT(child, gchild);
451 	AVL_SETCHILD(child, left);
452 
453 	gchild->avl_child[right] = node;
454 	AVL_SETBALANCE(node, (balance == left_heavy ? right_heavy : 0));
455 	AVL_SETPARENT(node, gchild);
456 	AVL_SETCHILD(node, right);
457 
458 	AVL_SETBALANCE(gchild, 0);
459 	AVL_SETPARENT(gchild, parent);
460 	AVL_SETCHILD(gchild, which_child);
461 	if (parent != NULL)
462 		parent->avl_child[which_child] = gchild;
463 	else
464 		tree->avl_root = gchild;
465 
466 	return (1);	/* the new tree is always shorter */
467 }
468 
469 
470 /*
471  * Insert a new node into an AVL tree at the specified (from avl_find()) place.
472  *
473  * Newly inserted nodes are always leaf nodes in the tree, since avl_find()
474  * searches out to the leaf positions.  The avl_index_t indicates the node
475  * which will be the parent of the new node.
476  *
477  * After the node is inserted, a single rotation further up the tree may
478  * be necessary to maintain an acceptable AVL balance.
479  */
480 void
481 avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
482 {
483 	avl_node_t *node;
484 	avl_node_t *parent = AVL_INDEX2NODE(where);
485 	int old_balance;
486 	int new_balance;
487 	int which_child = AVL_INDEX2CHILD(where);
488 	size_t off = tree->avl_offset;
489 
490 	ASSERT(tree);
491 #ifdef _LP64
492 	ASSERT(((uintptr_t)new_data & 0x7) == 0);
493 #endif
494 
495 	node = AVL_DATA2NODE(new_data, off);
496 
497 	/*
498 	 * First, add the node to the tree at the indicated position.
499 	 */
500 	++tree->avl_numnodes;
501 
502 	node->avl_child[0] = NULL;
503 	node->avl_child[1] = NULL;
504 
505 	AVL_SETCHILD(node, which_child);
506 	AVL_SETBALANCE(node, 0);
507 	AVL_SETPARENT(node, parent);
508 	if (parent != NULL) {
509 		ASSERT(parent->avl_child[which_child] == NULL);
510 		parent->avl_child[which_child] = node;
511 	} else {
512 		ASSERT(tree->avl_root == NULL);
513 		tree->avl_root = node;
514 	}
515 	/*
516 	 * Now, back up the tree modifying the balance of all nodes above the
517 	 * insertion point. If we get to a highly unbalanced ancestor, we
518 	 * need to do a rotation.  If we back out of the tree we are done.
519 	 * If we brought any subtree into perfect balance (0), we are also done.
520 	 */
521 	for (;;) {
522 		node = parent;
523 		if (node == NULL)
524 			return;
525 
526 		/*
527 		 * Compute the new balance
528 		 */
529 		old_balance = AVL_XBALANCE(node);
530 		new_balance = old_balance + avl_child2balance[which_child];
531 
532 		/*
533 		 * If we introduced equal balance, then we are done immediately
534 		 */
535 		if (new_balance == 0) {
536 			AVL_SETBALANCE(node, 0);
537 			return;
538 		}
539 
540 		/*
541 		 * If both old and new are not zero we went
542 		 * from -1 to -2 balance, do a rotation.
543 		 */
544 		if (old_balance != 0)
545 			break;
546 
547 		AVL_SETBALANCE(node, new_balance);
548 		parent = AVL_XPARENT(node);
549 		which_child = AVL_XCHILD(node);
550 	}
551 
552 	/*
553 	 * perform a rotation to fix the tree and return
554 	 */
555 	(void) avl_rotation(tree, node, new_balance);
556 }
557 
558 /*
559  * Insert "new_data" in "tree" in the given "direction" either after or
560  * before (AVL_AFTER, AVL_BEFORE) the data "here".
561  *
562  * Insertions can only be done at empty leaf points in the tree, therefore
563  * if the given child of the node is already present we move to either
564  * the AVL_PREV or AVL_NEXT and reverse the insertion direction. Since
565  * every other node in the tree is a leaf, this always works.
566  *
567  * To help developers using this interface, we assert that the new node
568  * is correctly ordered at every step of the way in DEBUG kernels.
569  */
570 void
571 avl_insert_here(
572 	avl_tree_t *tree,
573 	void *new_data,
574 	void *here,
575 	int direction)
576 {
577 	avl_node_t *node;
578 	int child = direction;	/* rely on AVL_BEFORE == 0, AVL_AFTER == 1 */
579 #ifdef DEBUG
580 	int diff;
581 #endif
582 
583 	ASSERT(tree != NULL);
584 	ASSERT(new_data != NULL);
585 	ASSERT(here != NULL);
586 	ASSERT(direction == AVL_BEFORE || direction == AVL_AFTER);
587 
588 	/*
589 	 * If corresponding child of node is not NULL, go to the neighboring
590 	 * node and reverse the insertion direction.
591 	 */
592 	node = AVL_DATA2NODE(here, tree->avl_offset);
593 
594 #ifdef DEBUG
595 	diff = tree->avl_compar(new_data, here);
596 	ASSERT(-1 <= diff && diff <= 1);
597 	ASSERT(diff != 0);
598 	ASSERT(diff > 0 ? child == 1 : child == 0);
599 #endif
600 
601 	if (node->avl_child[child] != NULL) {
602 		node = node->avl_child[child];
603 		child = 1 - child;
604 		while (node->avl_child[child] != NULL) {
605 #ifdef DEBUG
606 			diff = tree->avl_compar(new_data,
607 			    AVL_NODE2DATA(node, tree->avl_offset));
608 			ASSERT(-1 <= diff && diff <= 1);
609 			ASSERT(diff != 0);
610 			ASSERT(diff > 0 ? child == 1 : child == 0);
611 #endif
612 			node = node->avl_child[child];
613 		}
614 #ifdef DEBUG
615 		diff = tree->avl_compar(new_data,
616 		    AVL_NODE2DATA(node, tree->avl_offset));
617 		ASSERT(-1 <= diff && diff <= 1);
618 		ASSERT(diff != 0);
619 		ASSERT(diff > 0 ? child == 1 : child == 0);
620 #endif
621 	}
622 	ASSERT(node->avl_child[child] == NULL);
623 
624 	avl_insert(tree, new_data, AVL_MKINDEX(node, child));
625 }
626 
627 /*
628  * Add a new node to an AVL tree.
629  */
630 void
631 avl_add(avl_tree_t *tree, void *new_node)
632 {
633 	avl_index_t where;
634 
635 	/*
636 	 * This is unfortunate.  We want to call panic() here, even for
637 	 * non-DEBUG kernels.  In userland, however, we can't depend on anything
638 	 * in libc or else the rtld build process gets confused.  So, all we can
639 	 * do in userland is resort to a normal ASSERT().
640 	 */
641 	if (avl_find(tree, new_node, &where) != NULL)
642 #ifdef _KERNEL
643 		panic("avl_find() succeeded inside avl_add()");
644 #else
645 		ASSERT(0);
646 #endif
647 	avl_insert(tree, new_node, where);
648 }
649 
650 /*
651  * Delete a node from the AVL tree.  Deletion is similar to insertion, but
652  * with 2 complications.
653  *
654  * First, we may be deleting an interior node. Consider the following subtree:
655  *
656  *     d           c            c
657  *    / \         / \          / \
658  *   b   e       b   e        b   e
659  *  / \	        / \          /
660  * a   c       a            a
661  *
662  * When we are deleting node (d), we find and bring up an adjacent valued leaf
663  * node, say (c), to take the interior node's place. In the code this is
664  * handled by temporarily swapping (d) and (c) in the tree and then using
665  * common code to delete (d) from the leaf position.
666  *
667  * Secondly, an interior deletion from a deep tree may require more than one
668  * rotation to fix the balance. This is handled by moving up the tree through
669  * parents and applying rotations as needed. The return value from
670  * avl_rotation() is used to detect when a subtree did not change overall
671  * height due to a rotation.
672  */
673 void
674 avl_remove(avl_tree_t *tree, void *data)
675 {
676 	avl_node_t *delete;
677 	avl_node_t *parent;
678 	avl_node_t *node;
679 	avl_node_t tmp;
680 	int old_balance;
681 	int new_balance;
682 	int left;
683 	int right;
684 	int which_child;
685 	size_t off = tree->avl_offset;
686 
687 	ASSERT(tree);
688 
689 	delete = AVL_DATA2NODE(data, off);
690 
691 	/*
692 	 * Deletion is easiest with a node that has at most 1 child.
693 	 * We swap a node with 2 children with a sequentially valued
694 	 * neighbor node. That node will have at most 1 child. Note this
695 	 * has no effect on the ordering of the remaining nodes.
696 	 *
697 	 * As an optimization, we choose the greater neighbor if the tree
698 	 * is right heavy, otherwise the left neighbor. This reduces the
699 	 * number of rotations needed.
700 	 */
701 	if (delete->avl_child[0] != NULL && delete->avl_child[1] != NULL) {
702 
703 		/*
704 		 * choose node to swap from whichever side is taller
705 		 */
706 		old_balance = AVL_XBALANCE(delete);
707 		left = avl_balance2child[old_balance + 1];
708 		right = 1 - left;
709 
710 		/*
711 		 * get to the previous value'd node
712 		 * (down 1 left, as far as possible right)
713 		 */
714 		for (node = delete->avl_child[left];
715 		    node->avl_child[right] != NULL;
716 		    node = node->avl_child[right])
717 			;
718 
719 		/*
720 		 * create a temp placeholder for 'node'
721 		 * move 'node' to delete's spot in the tree
722 		 */
723 		tmp = *node;
724 
725 		*node = *delete;
726 		if (node->avl_child[left] == node)
727 			node->avl_child[left] = &tmp;
728 
729 		parent = AVL_XPARENT(node);
730 		if (parent != NULL)
731 			parent->avl_child[AVL_XCHILD(node)] = node;
732 		else
733 			tree->avl_root = node;
734 		AVL_SETPARENT(node->avl_child[left], node);
735 		AVL_SETPARENT(node->avl_child[right], node);
736 
737 		/*
738 		 * Put tmp where node used to be (just temporary).
739 		 * It always has a parent and at most 1 child.
740 		 */
741 		delete = &tmp;
742 		parent = AVL_XPARENT(delete);
743 		parent->avl_child[AVL_XCHILD(delete)] = delete;
744 		which_child = (delete->avl_child[1] != 0);
745 		if (delete->avl_child[which_child] != NULL)
746 			AVL_SETPARENT(delete->avl_child[which_child], delete);
747 	}
748 
749 
750 	/*
751 	 * Here we know "delete" is at least partially a leaf node. It can
752 	 * be easily removed from the tree.
753 	 */
754 	ASSERT(tree->avl_numnodes > 0);
755 	--tree->avl_numnodes;
756 	parent = AVL_XPARENT(delete);
757 	which_child = AVL_XCHILD(delete);
758 	if (delete->avl_child[0] != NULL)
759 		node = delete->avl_child[0];
760 	else
761 		node = delete->avl_child[1];
762 
763 	/*
764 	 * Connect parent directly to node (leaving out delete).
765 	 */
766 	if (node != NULL) {
767 		AVL_SETPARENT(node, parent);
768 		AVL_SETCHILD(node, which_child);
769 	}
770 	if (parent == NULL) {
771 		tree->avl_root = node;
772 		return;
773 	}
774 	parent->avl_child[which_child] = node;
775 
776 
777 	/*
778 	 * Since the subtree is now shorter, begin adjusting parent balances
779 	 * and performing any needed rotations.
780 	 */
781 	do {
782 
783 		/*
784 		 * Move up the tree and adjust the balance
785 		 *
786 		 * Capture the parent and which_child values for the next
787 		 * iteration before any rotations occur.
788 		 */
789 		node = parent;
790 		old_balance = AVL_XBALANCE(node);
791 		new_balance = old_balance - avl_child2balance[which_child];
792 		parent = AVL_XPARENT(node);
793 		which_child = AVL_XCHILD(node);
794 
795 		/*
796 		 * If a node was in perfect balance but isn't anymore then
797 		 * we can stop, since the height didn't change above this point
798 		 * due to a deletion.
799 		 */
800 		if (old_balance == 0) {
801 			AVL_SETBALANCE(node, new_balance);
802 			break;
803 		}
804 
805 		/*
806 		 * If the new balance is zero, we don't need to rotate
807 		 * else
808 		 * need a rotation to fix the balance.
809 		 * If the rotation doesn't change the height
810 		 * of the sub-tree we have finished adjusting.
811 		 */
812 		if (new_balance == 0)
813 			AVL_SETBALANCE(node, new_balance);
814 		else if (!avl_rotation(tree, node, new_balance))
815 			break;
816 	} while (parent != NULL);
817 }
818 
819 #define	AVL_REINSERT(tree, obj)		\
820 	avl_remove((tree), (obj));	\
821 	avl_add((tree), (obj))
822 
823 boolean_t
824 avl_update_lt(avl_tree_t *t, void *obj)
825 {
826 	void *neighbor;
827 
828 	ASSERT(((neighbor = AVL_NEXT(t, obj)) == NULL) ||
829 	    (t->avl_compar(obj, neighbor) <= 0));
830 
831 	neighbor = AVL_PREV(t, obj);
832 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
833 		AVL_REINSERT(t, obj);
834 		return (B_TRUE);
835 	}
836 
837 	return (B_FALSE);
838 }
839 
840 boolean_t
841 avl_update_gt(avl_tree_t *t, void *obj)
842 {
843 	void *neighbor;
844 
845 	ASSERT(((neighbor = AVL_PREV(t, obj)) == NULL) ||
846 	    (t->avl_compar(obj, neighbor) >= 0));
847 
848 	neighbor = AVL_NEXT(t, obj);
849 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
850 		AVL_REINSERT(t, obj);
851 		return (B_TRUE);
852 	}
853 
854 	return (B_FALSE);
855 }
856 
857 boolean_t
858 avl_update(avl_tree_t *t, void *obj)
859 {
860 	void *neighbor;
861 
862 	neighbor = AVL_PREV(t, obj);
863 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) < 0)) {
864 		AVL_REINSERT(t, obj);
865 		return (B_TRUE);
866 	}
867 
868 	neighbor = AVL_NEXT(t, obj);
869 	if ((neighbor != NULL) && (t->avl_compar(obj, neighbor) > 0)) {
870 		AVL_REINSERT(t, obj);
871 		return (B_TRUE);
872 	}
873 
874 	return (B_FALSE);
875 }
876 
877 void
878 avl_swap(avl_tree_t *tree1, avl_tree_t *tree2)
879 {
880 	avl_node_t *temp_node;
881 	ulong_t temp_numnodes;
882 
883 	ASSERT3P(tree1->avl_compar, ==, tree2->avl_compar);
884 	ASSERT3U(tree1->avl_offset, ==, tree2->avl_offset);
885 	ASSERT3U(tree1->avl_size, ==, tree2->avl_size);
886 
887 	temp_node = tree1->avl_root;
888 	temp_numnodes = tree1->avl_numnodes;
889 	tree1->avl_root = tree2->avl_root;
890 	tree1->avl_numnodes = tree2->avl_numnodes;
891 	tree2->avl_root = temp_node;
892 	tree2->avl_numnodes = temp_numnodes;
893 }
894 
895 /*
896  * initialize a new AVL tree
897  */
898 void
899 avl_create(avl_tree_t *tree, int (*compar) (const void *, const void *),
900     size_t size, size_t offset)
901 {
902 	ASSERT(tree);
903 	ASSERT(compar);
904 	ASSERT(size > 0);
905 	ASSERT(size >= offset + sizeof (avl_node_t));
906 #ifdef _LP64
907 	ASSERT((offset & 0x7) == 0);
908 #endif
909 
910 	tree->avl_compar = compar;
911 	tree->avl_root = NULL;
912 	tree->avl_numnodes = 0;
913 	tree->avl_size = size;
914 	tree->avl_offset = offset;
915 }
916 
917 /*
918  * Delete a tree.
919  */
920 /* ARGSUSED */
921 void
922 avl_destroy(avl_tree_t *tree)
923 {
924 	ASSERT(tree);
925 	ASSERT(tree->avl_numnodes == 0);
926 	ASSERT(tree->avl_root == NULL);
927 }
928 
929 
930 /*
931  * Return the number of nodes in an AVL tree.
932  */
933 ulong_t
934 avl_numnodes(avl_tree_t *tree)
935 {
936 	ASSERT(tree);
937 	return (tree->avl_numnodes);
938 }
939 
940 boolean_t
941 avl_is_empty(avl_tree_t *tree)
942 {
943 	ASSERT(tree);
944 	return (tree->avl_numnodes == 0);
945 }
946 
947 #define	CHILDBIT	(1L)
948 
949 /*
950  * Post-order tree walk used to visit all tree nodes and destroy the tree
951  * in post order. This is used for destroying a tree without paying any cost
952  * for rebalancing it.
953  *
954  * example:
955  *
956  *	void *cookie = NULL;
957  *	my_data_t *node;
958  *
959  *	while ((node = avl_destroy_nodes(tree, &cookie)) != NULL)
960  *		free(node);
961  *	avl_destroy(tree);
962  *
963  * The cookie is really an avl_node_t to the current node's parent and
964  * an indication of which child you looked at last.
965  *
966  * On input, a cookie value of CHILDBIT indicates the tree is done.
967  */
968 void *
969 avl_destroy_nodes(avl_tree_t *tree, void **cookie)
970 {
971 	avl_node_t	*node;
972 	avl_node_t	*parent;
973 	int		child;
974 	void		*first;
975 	size_t		off = tree->avl_offset;
976 
977 	/*
978 	 * Initial calls go to the first node or it's right descendant.
979 	 */
980 	if (*cookie == NULL) {
981 		first = avl_first(tree);
982 
983 		/*
984 		 * deal with an empty tree
985 		 */
986 		if (first == NULL) {
987 			*cookie = (void *)CHILDBIT;
988 			return (NULL);
989 		}
990 
991 		node = AVL_DATA2NODE(first, off);
992 		parent = AVL_XPARENT(node);
993 		goto check_right_side;
994 	}
995 
996 	/*
997 	 * If there is no parent to return to we are done.
998 	 */
999 	parent = (avl_node_t *)((uintptr_t)(*cookie) & ~CHILDBIT);
1000 	if (parent == NULL) {
1001 		if (tree->avl_root != NULL) {
1002 			ASSERT(tree->avl_numnodes == 1);
1003 			tree->avl_root = NULL;
1004 			tree->avl_numnodes = 0;
1005 		}
1006 		return (NULL);
1007 	}
1008 
1009 	/*
1010 	 * Remove the child pointer we just visited from the parent and tree.
1011 	 */
1012 	child = (uintptr_t)(*cookie) & CHILDBIT;
1013 	parent->avl_child[child] = NULL;
1014 	ASSERT(tree->avl_numnodes > 1);
1015 	--tree->avl_numnodes;
1016 
1017 	/*
1018 	 * If we just did a right child or there isn't one, go up to parent.
1019 	 */
1020 	if (child == 1 || parent->avl_child[1] == NULL) {
1021 		node = parent;
1022 		parent = AVL_XPARENT(parent);
1023 		goto done;
1024 	}
1025 
1026 	/*
1027 	 * Do parent's right child, then leftmost descendent.
1028 	 */
1029 	node = parent->avl_child[1];
1030 	while (node->avl_child[0] != NULL) {
1031 		parent = node;
1032 		node = node->avl_child[0];
1033 	}
1034 
1035 	/*
1036 	 * If here, we moved to a left child. It may have one
1037 	 * child on the right (when balance == +1).
1038 	 */
1039 check_right_side:
1040 	if (node->avl_child[1] != NULL) {
1041 		ASSERT(AVL_XBALANCE(node) == 1);
1042 		parent = node;
1043 		node = node->avl_child[1];
1044 		ASSERT(node->avl_child[0] == NULL &&
1045 		    node->avl_child[1] == NULL);
1046 	} else {
1047 		ASSERT(AVL_XBALANCE(node) <= 0);
1048 	}
1049 
1050 done:
1051 	if (parent == NULL) {
1052 		*cookie = (void *)CHILDBIT;
1053 		ASSERT(node == tree->avl_root);
1054 	} else {
1055 		*cookie = (void *)((uintptr_t)parent | AVL_XCHILD(node));
1056 	}
1057 
1058 	return (AVL_NODE2DATA(node, off));
1059 }
1060