radixtree.c revision 1.30 1 /* $NetBSD: radixtree.c,v 1.30 2023/09/10 14:45:52 ad Exp $ */
2
3 /*-
4 * Copyright (c)2011,2012,2013 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * radixtree.c
31 *
32 * Overview:
33 *
34 * This is an implementation of radix tree, whose keys are uint64_t and leafs
35 * are user provided pointers.
36 *
37 * Leaf nodes are just void * and this implementation doesn't care about
38 * what they actually point to. However, this implementation has an assumption
39 * about their alignment. Specifically, this implementation assumes that their
40 * 2 LSBs are always zero and uses them for internal accounting.
41 *
42 * Intermediate nodes and memory allocation:
43 *
44 * Intermediate nodes are automatically allocated and freed internally and
45 * basically users don't need to care about them. The allocation is done via
46 * kmem_zalloc(9) for _KERNEL, malloc(3) for userland, and alloc() for
47 * _STANDALONE environment. Only radix_tree_insert_node function can allocate
48 * memory for intermediate nodes and thus can fail for ENOMEM.
49 *
50 * Memory Efficiency:
51 *
52 * It's designed to work efficiently with dense index distribution.
53 * The memory consumption (number of necessary intermediate nodes) heavily
54 * depends on the index distribution. Basically, more dense index distribution
55 * consumes less nodes per item. Approximately,
56 *
57 * - the best case: about RADIX_TREE_PTR_PER_NODE items per intermediate node.
58 * it would look like the following.
59 *
60 * root (t_height=1)
61 * |
62 * v
63 * [ | | | ] (intermediate node. RADIX_TREE_PTR_PER_NODE=4 in this fig)
64 * | | | |
65 * v v v v
66 * p p p p (items)
67 *
68 * - the worst case: RADIX_TREE_MAX_HEIGHT intermediate nodes per item.
69 * it would look like the following if RADIX_TREE_MAX_HEIGHT=3.
70 *
71 * root (t_height=3)
72 * |
73 * v
74 * [ | | | ]
75 * |
76 * v
77 * [ | | | ]
78 * |
79 * v
80 * [ | | | ]
81 * |
82 * v
83 * p
84 *
85 * The height of tree (t_height) is dynamic. It's smaller if only small
86 * index values are used. As an extreme case, if only index 0 is used,
87 * the corresponding value is directly stored in the root of the tree
88 * (struct radix_tree) without allocating any intermediate nodes. In that
89 * case, t_height=0.
90 *
91 * Gang lookup:
92 *
93 * This implementation provides a way to scan many nodes quickly via
94 * radix_tree_gang_lookup_node function and its varients.
95 *
96 * Tags:
97 *
98 * This implementation provides tagging functionality, which allows quick
99 * scanning of a subset of leaf nodes. Leaf nodes are untagged when inserted
100 * into the tree and can be tagged by radix_tree_set_tag function.
101 * radix_tree_gang_lookup_tagged_node function and its variants returns only
102 * leaf nodes with the given tag. To reduce amount of nodes to visit for
103 * these functions, this implementation keeps tagging information in internal
104 * intermediate nodes and quickly skips uninterested parts of a tree.
105 *
106 * A tree has RADIX_TREE_TAG_ID_MAX independent tag spaces, each of which are
107 * identified by a zero-origin numbers, tagid. For the current implementation,
108 * RADIX_TREE_TAG_ID_MAX is 2. A set of tags is described as a bitmask tagmask,
109 * which is a bitwise OR of (1 << tagid).
110 */
111
112 #include <sys/cdefs.h>
113
114 #if defined(_KERNEL) || defined(_STANDALONE)
115 __KERNEL_RCSID(0, "$NetBSD: radixtree.c,v 1.30 2023/09/10 14:45:52 ad Exp $");
116 #include <sys/param.h>
117 #include <sys/errno.h>
118 #include <sys/kmem.h>
119 #include <sys/radixtree.h>
120 #include <lib/libkern/libkern.h>
121 #if defined(_STANDALONE)
122 #include <lib/libsa/stand.h>
123 #endif /* defined(_STANDALONE) */
124 #else /* defined(_KERNEL) || defined(_STANDALONE) */
125 __RCSID("$NetBSD: radixtree.c,v 1.30 2023/09/10 14:45:52 ad Exp $");
126 #include <assert.h>
127 #include <errno.h>
128 #include <stdbool.h>
129 #include <stdlib.h>
130 #include <string.h>
131 #if 1
132 #define KASSERT assert
133 #else
134 #define KASSERT(a) /* nothing */
135 #endif
136 #endif /* defined(_KERNEL) || defined(_STANDALONE) */
137
138 #include <sys/radixtree.h>
139
140 #define RADIX_TREE_BITS_PER_HEIGHT 4 /* XXX tune */
141 #define RADIX_TREE_PTR_PER_NODE (1 << RADIX_TREE_BITS_PER_HEIGHT)
142 #define RADIX_TREE_MAX_HEIGHT (64 / RADIX_TREE_BITS_PER_HEIGHT)
143 #define RADIX_TREE_INVALID_HEIGHT (RADIX_TREE_MAX_HEIGHT + 1)
144 __CTASSERT((64 % RADIX_TREE_BITS_PER_HEIGHT) == 0);
145
146 __CTASSERT(((1 << RADIX_TREE_TAG_ID_MAX) & (sizeof(int) - 1)) == 0);
147 #define RADIX_TREE_TAG_MASK ((1 << RADIX_TREE_TAG_ID_MAX) - 1)
148
149 static inline void *
150 entry_ptr(void *p)
151 {
152
153 return (void *)((uintptr_t)p & ~RADIX_TREE_TAG_MASK);
154 }
155
156 static inline unsigned int
157 entry_tagmask(void *p)
158 {
159
160 return (uintptr_t)p & RADIX_TREE_TAG_MASK;
161 }
162
163 static inline void *
164 entry_compose(void *p, unsigned int tagmask)
165 {
166
167 return (void *)((uintptr_t)p | tagmask);
168 }
169
170 static inline bool
171 entry_match_p(void *p, unsigned int tagmask)
172 {
173
174 KASSERT(entry_ptr(p) != NULL || entry_tagmask(p) == 0);
175 if (p == NULL) {
176 return false;
177 }
178 if (tagmask == 0) {
179 return true;
180 }
181 return (entry_tagmask(p) & tagmask) != 0;
182 }
183
184 /*
185 * radix_tree_node: an intermediate node
186 *
187 * we don't care the type of leaf nodes. they are just void *.
188 *
189 * we used to maintain a count of non-NULL nodes in this structure, but it
190 * prevented it from being aligned to a cache line boundary; the performance
191 * benefit from being cache friendly is greater than the benefit of having
192 * a dedicated count value, especially in multi-processor situations where
193 * we need to avoid intra-pool-page false sharing.
194 */
195
196 struct radix_tree_node {
197 void *n_ptrs[RADIX_TREE_PTR_PER_NODE];
198 };
199
200 /*
201 * p_refs[0].pptr == &t->t_root
202 * :
203 * p_refs[n].pptr == &(*p_refs[n-1])->n_ptrs[x]
204 * :
205 * :
206 * p_refs[t->t_height].pptr == &leaf_pointer
207 */
208
209 struct radix_tree_path {
210 struct radix_tree_node_ref {
211 void **pptr;
212 } p_refs[RADIX_TREE_MAX_HEIGHT + 1]; /* +1 for the root ptr */
213 /*
214 * p_lastidx is either the index of the last valid element of p_refs[]
215 * or RADIX_TREE_INVALID_HEIGHT.
216 * RADIX_TREE_INVALID_HEIGHT means that radix_tree_lookup_ptr found
217 * that the height of the tree is not enough to cover the given index.
218 */
219 unsigned int p_lastidx;
220 };
221
222 static inline void **
223 path_pptr(const struct radix_tree *t, const struct radix_tree_path *p,
224 unsigned int height)
225 {
226
227 KASSERT(height <= t->t_height);
228 return p->p_refs[height].pptr;
229 }
230
231 static inline struct radix_tree_node *
232 path_node(const struct radix_tree * t, const struct radix_tree_path *p,
233 unsigned int height)
234 {
235
236 KASSERT(height <= t->t_height);
237 return entry_ptr(*path_pptr(t, p, height));
238 }
239
240 /*
241 * radix_tree_init_tree:
242 *
243 * Initialize a tree.
244 */
245
246 void
247 radix_tree_init_tree(struct radix_tree *t)
248 {
249
250 t->t_height = 0;
251 t->t_root = NULL;
252 }
253
254 /*
255 * radix_tree_fini_tree:
256 *
257 * Finish using a tree.
258 */
259
260 void
261 radix_tree_fini_tree(struct radix_tree *t)
262 {
263
264 KASSERT(t->t_root == NULL);
265 KASSERT(t->t_height == 0);
266 }
267
268 /*
269 * radix_tree_empty_tree_p:
270 *
271 * Return if the tree is empty.
272 */
273
274 bool
275 radix_tree_empty_tree_p(struct radix_tree *t)
276 {
277
278 return t->t_root == NULL;
279 }
280
281 /*
282 * radix_tree_empty_tree_p:
283 *
284 * Return true if the tree has any nodes with the given tag. Otherwise
285 * return false.
286 *
287 * It's illegal to call this function with tagmask 0.
288 */
289
290 bool
291 radix_tree_empty_tagged_tree_p(struct radix_tree *t, unsigned int tagmask)
292 {
293
294 KASSERT(tagmask != 0);
295 return (entry_tagmask(t->t_root) & tagmask) == 0;
296 }
297
298 static void
299 radix_tree_node_init(struct radix_tree_node *n)
300 {
301
302 memset(n, 0, sizeof(*n));
303 }
304
305 #if defined(_KERNEL)
306 /*
307 * radix_tree_init:
308 *
309 * initialize the subsystem.
310 */
311
312 void
313 radix_tree_init(void)
314 {
315
316 /* nothing right now */
317 }
318
319 /*
320 * radix_tree_await_memory:
321 *
322 * after an insert has failed with ENOMEM, wait for memory to become
323 * available, so the caller can retry. this needs to ensure that the
324 * maximum possible required number of nodes is available.
325 */
326
327 void
328 radix_tree_await_memory(void)
329 {
330 struct radix_tree_node *nodes[RADIX_TREE_MAX_HEIGHT];
331 int i;
332
333 for (i = 0; i < __arraycount(nodes); i++) {
334 nodes[i] = kmem_alloc(sizeof(struct radix_tree_node), KM_SLEEP);
335 }
336 while (--i >= 0) {
337 kmem_free(nodes[i], sizeof(struct radix_tree_node));
338 }
339 }
340
341 #endif /* defined(_KERNEL) */
342
343 /*
344 * radix_tree_sum_node:
345 *
346 * return the logical sum of all entries in the given node. used to quickly
347 * check for tag masks or empty nodes.
348 */
349
350 static uintptr_t
351 radix_tree_sum_node(const struct radix_tree_node *n)
352 {
353 #if RADIX_TREE_PTR_PER_NODE > 16
354 unsigned int i;
355 uintptr_t sum;
356
357 for (i = 0, sum = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
358 sum |= (uintptr_t)n->n_ptrs[i];
359 }
360 return sum;
361 #else /* RADIX_TREE_PTR_PER_NODE > 16 */
362 uintptr_t sum;
363
364 /*
365 * Unrolling the above is much better than a tight loop with two
366 * test+branch pairs. On x86 with gcc 5.5.0 this compiles into 19
367 * deterministic instructions including the "return" and prologue &
368 * epilogue.
369 */
370 sum = (uintptr_t)n->n_ptrs[0];
371 sum |= (uintptr_t)n->n_ptrs[1];
372 sum |= (uintptr_t)n->n_ptrs[2];
373 sum |= (uintptr_t)n->n_ptrs[3];
374 #if RADIX_TREE_PTR_PER_NODE > 4
375 sum |= (uintptr_t)n->n_ptrs[4];
376 sum |= (uintptr_t)n->n_ptrs[5];
377 sum |= (uintptr_t)n->n_ptrs[6];
378 sum |= (uintptr_t)n->n_ptrs[7];
379 #endif
380 #if RADIX_TREE_PTR_PER_NODE > 8
381 sum |= (uintptr_t)n->n_ptrs[8];
382 sum |= (uintptr_t)n->n_ptrs[9];
383 sum |= (uintptr_t)n->n_ptrs[10];
384 sum |= (uintptr_t)n->n_ptrs[11];
385 sum |= (uintptr_t)n->n_ptrs[12];
386 sum |= (uintptr_t)n->n_ptrs[13];
387 sum |= (uintptr_t)n->n_ptrs[14];
388 sum |= (uintptr_t)n->n_ptrs[15];
389 #endif
390 return sum;
391 #endif /* RADIX_TREE_PTR_PER_NODE > 16 */
392 }
393
394 static int __unused
395 radix_tree_node_count_ptrs(const struct radix_tree_node *n)
396 {
397 unsigned int i, c;
398
399 for (i = c = 0; i < RADIX_TREE_PTR_PER_NODE; i++) {
400 c += (n->n_ptrs[i] != NULL);
401 }
402 return c;
403 }
404
405 static struct radix_tree_node *
406 radix_tree_alloc_node(void)
407 {
408 struct radix_tree_node *n;
409
410 #if defined(_KERNEL)
411 /*
412 * note that kmem_alloc can block.
413 */
414 n = kmem_alloc(sizeof(struct radix_tree_node), KM_SLEEP);
415 #elif defined(_STANDALONE)
416 n = alloc(sizeof(*n));
417 #else /* defined(_STANDALONE) */
418 n = malloc(sizeof(*n));
419 #endif /* defined(_STANDALONE) */
420 if (n != NULL) {
421 radix_tree_node_init(n);
422 }
423 KASSERT(n == NULL || radix_tree_sum_node(n) == 0);
424 return n;
425 }
426
427 static void
428 radix_tree_free_node(struct radix_tree_node *n)
429 {
430
431 KASSERT(radix_tree_sum_node(n) == 0);
432 #if defined(_KERNEL)
433 kmem_free(n, sizeof(struct radix_tree_node));
434 #elif defined(_STANDALONE)
435 dealloc(n, sizeof(*n));
436 #else
437 free(n);
438 #endif
439 }
440
441 /*
442 * radix_tree_grow:
443 *
444 * increase the height of the tree.
445 */
446
447 static __noinline int
448 radix_tree_grow(struct radix_tree *t, unsigned int newheight)
449 {
450 const unsigned int tagmask = entry_tagmask(t->t_root);
451 struct radix_tree_node *newnodes[RADIX_TREE_MAX_HEIGHT];
452 void *root;
453 int h;
454
455 KASSERT(newheight <= RADIX_TREE_MAX_HEIGHT);
456 if ((root = t->t_root) == NULL) {
457 t->t_height = newheight;
458 return 0;
459 }
460 for (h = t->t_height; h < newheight; h++) {
461 newnodes[h] = radix_tree_alloc_node();
462 if (__predict_false(newnodes[h] == NULL)) {
463 while (--h >= (int)t->t_height) {
464 newnodes[h]->n_ptrs[0] = NULL;
465 radix_tree_free_node(newnodes[h]);
466 }
467 return ENOMEM;
468 }
469 newnodes[h]->n_ptrs[0] = root;
470 root = entry_compose(newnodes[h], tagmask);
471 }
472 t->t_root = root;
473 t->t_height = h;
474 return 0;
475 }
476
477 /*
478 * radix_tree_lookup_ptr:
479 *
480 * an internal helper function used for various exported functions.
481 *
482 * return the pointer to store the node for the given index.
483 *
484 * if alloc is true, try to allocate the storage. (note for _KERNEL:
485 * in that case, this function can block.) if the allocation failed or
486 * alloc is false, return NULL.
487 *
488 * if path is not NULL, fill it for the caller's investigation.
489 *
490 * if tagmask is not zero, search only for nodes with the tag set.
491 * note that, however, this function doesn't check the tagmask for the leaf
492 * pointer. it's a caller's responsibility to investigate the value which
493 * is pointed by the returned pointer if necessary.
494 *
495 * while this function is a bit large, as it's called with some constant
496 * arguments, inlining might have benefits. anyway, a compiler will decide.
497 */
498
499 static inline void **
500 radix_tree_lookup_ptr(struct radix_tree *t, uint64_t idx,
501 struct radix_tree_path *path, bool alloc, const unsigned int tagmask)
502 {
503 struct radix_tree_node *n;
504 int hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
505 int shift;
506 void **vpp;
507 const uint64_t mask = (UINT64_C(1) << RADIX_TREE_BITS_PER_HEIGHT) - 1;
508 struct radix_tree_node_ref *refs = NULL;
509
510 /*
511 * check unsupported combinations
512 */
513 KASSERT(tagmask == 0 || !alloc);
514 KASSERT(path == NULL || !alloc);
515 vpp = &t->t_root;
516 if (path != NULL) {
517 refs = path->p_refs;
518 refs->pptr = vpp;
519 }
520 n = NULL;
521 for (shift = 64 - RADIX_TREE_BITS_PER_HEIGHT; shift >= 0;) {
522 struct radix_tree_node *c;
523 void *entry;
524 const uint64_t i = (idx >> shift) & mask;
525
526 if (shift >= hshift) {
527 unsigned int newheight;
528
529 KASSERT(vpp == &t->t_root);
530 if (i == 0) {
531 shift -= RADIX_TREE_BITS_PER_HEIGHT;
532 continue;
533 }
534 if (!alloc) {
535 if (path != NULL) {
536 KASSERT((refs - path->p_refs) == 0);
537 path->p_lastidx =
538 RADIX_TREE_INVALID_HEIGHT;
539 }
540 return NULL;
541 }
542 newheight = shift / RADIX_TREE_BITS_PER_HEIGHT + 1;
543 if (radix_tree_grow(t, newheight)) {
544 return NULL;
545 }
546 hshift = RADIX_TREE_BITS_PER_HEIGHT * t->t_height;
547 }
548 entry = *vpp;
549 c = entry_ptr(entry);
550 if (c == NULL ||
551 (tagmask != 0 &&
552 (entry_tagmask(entry) & tagmask) == 0)) {
553 if (!alloc) {
554 if (path != NULL) {
555 path->p_lastidx = refs - path->p_refs;
556 }
557 return NULL;
558 }
559 c = radix_tree_alloc_node();
560 if (c == NULL) {
561 return NULL;
562 }
563 *vpp = c;
564 }
565 n = c;
566 vpp = &n->n_ptrs[i];
567 if (path != NULL) {
568 refs++;
569 refs->pptr = vpp;
570 }
571 shift -= RADIX_TREE_BITS_PER_HEIGHT;
572 }
573 if (alloc) {
574 KASSERT(*vpp == NULL);
575 }
576 if (path != NULL) {
577 path->p_lastidx = refs - path->p_refs;
578 }
579 return vpp;
580 }
581
582 /*
583 * radix_tree_undo_insert_node:
584 *
585 * Undo the effects of a failed insert. The conditions that led to the
586 * insert may change and it may not be retried. If the insert is not
587 * retried, there will be no corresponding radix_tree_remove_node() for
588 * this index in the future. Therefore any adjustments made to the tree
589 * before memory was exhausted must be reverted.
590 */
591
592 static __noinline void
593 radix_tree_undo_insert_node(struct radix_tree *t, uint64_t idx)
594 {
595 struct radix_tree_path path;
596 int i;
597
598 (void)radix_tree_lookup_ptr(t, idx, &path, false, 0);
599 if (path.p_lastidx == RADIX_TREE_INVALID_HEIGHT) {
600 /*
601 * no nodes were inserted.
602 */
603 return;
604 }
605 for (i = path.p_lastidx - 1; i >= 0; i--) {
606 struct radix_tree_node ** const pptr =
607 (struct radix_tree_node **)path_pptr(t, &path, i);
608 struct radix_tree_node *n;
609
610 KASSERT(pptr != NULL);
611 n = entry_ptr(*pptr);
612 KASSERT(n != NULL);
613 if (radix_tree_sum_node(n) != 0) {
614 break;
615 }
616 radix_tree_free_node(n);
617 *pptr = NULL;
618 }
619 /*
620 * fix up height
621 */
622 if (i < 0) {
623 KASSERT(t->t_root == NULL);
624 t->t_height = 0;
625 }
626 }
627
628 /*
629 * radix_tree_insert_node:
630 *
631 * Insert the node at the given index.
632 *
633 * It's illegal to insert NULL. It's illegal to insert a non-aligned pointer.
634 *
635 * This function returns ENOMEM if necessary memory allocation failed.
636 * Otherwise, this function returns 0.
637 *
638 * Note that inserting a node can involves memory allocation for intermediate
639 * nodes. If _KERNEL, it's done with no-sleep IPL_NONE memory allocation.
640 *
641 * For the newly inserted node, all tags are cleared.
642 */
643
644 int
645 radix_tree_insert_node(struct radix_tree *t, uint64_t idx, void *p)
646 {
647 void **vpp;
648
649 KASSERT(p != NULL);
650 KASSERT(entry_tagmask(entry_compose(p, 0)) == 0);
651 vpp = radix_tree_lookup_ptr(t, idx, NULL, true, 0);
652 if (__predict_false(vpp == NULL)) {
653 radix_tree_undo_insert_node(t, idx);
654 return ENOMEM;
655 }
656 KASSERT(*vpp == NULL);
657 *vpp = p;
658 return 0;
659 }
660
661 /*
662 * radix_tree_replace_node:
663 *
664 * Replace a node at the given index with the given node and return the
665 * replaced one.
666 *
667 * It's illegal to try to replace a node which has not been inserted.
668 *
669 * This function keeps tags intact.
670 */
671
672 void *
673 radix_tree_replace_node(struct radix_tree *t, uint64_t idx, void *p)
674 {
675 void **vpp;
676 void *oldp;
677
678 KASSERT(p != NULL);
679 KASSERT(entry_tagmask(entry_compose(p, 0)) == 0);
680 vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
681 KASSERT(vpp != NULL);
682 oldp = *vpp;
683 KASSERT(oldp != NULL);
684 *vpp = entry_compose(p, entry_tagmask(*vpp));
685 return entry_ptr(oldp);
686 }
687
688 /*
689 * radix_tree_remove_node:
690 *
691 * Remove the node at the given index.
692 *
693 * It's illegal to try to remove a node which has not been inserted.
694 */
695
696 void *
697 radix_tree_remove_node(struct radix_tree *t, uint64_t idx)
698 {
699 struct radix_tree_path path;
700 void **vpp;
701 void *oldp;
702 int i;
703
704 vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
705 KASSERT(vpp != NULL);
706 oldp = *vpp;
707 KASSERT(oldp != NULL);
708 KASSERT(path.p_lastidx == t->t_height);
709 KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
710 *vpp = NULL;
711 for (i = t->t_height - 1; i >= 0; i--) {
712 void *entry;
713 struct radix_tree_node ** const pptr =
714 (struct radix_tree_node **)path_pptr(t, &path, i);
715 struct radix_tree_node *n;
716
717 KASSERT(pptr != NULL);
718 entry = *pptr;
719 n = entry_ptr(entry);
720 KASSERT(n != NULL);
721 if (radix_tree_sum_node(n) != 0) {
722 break;
723 }
724 radix_tree_free_node(n);
725 *pptr = NULL;
726 }
727 /*
728 * fix up height
729 */
730 if (i < 0) {
731 KASSERT(t->t_root == NULL);
732 t->t_height = 0;
733 }
734 /*
735 * update tags
736 */
737 for (; i >= 0; i--) {
738 void *entry;
739 struct radix_tree_node ** const pptr =
740 (struct radix_tree_node **)path_pptr(t, &path, i);
741 struct radix_tree_node *n;
742 unsigned int newmask;
743
744 KASSERT(pptr != NULL);
745 entry = *pptr;
746 n = entry_ptr(entry);
747 KASSERT(n != NULL);
748 KASSERT(radix_tree_sum_node(n) != 0);
749 newmask = radix_tree_sum_node(n) & RADIX_TREE_TAG_MASK;
750 if (newmask == entry_tagmask(entry)) {
751 break;
752 }
753 *pptr = entry_compose(n, newmask);
754 }
755 /*
756 * XXX is it worth to try to reduce height?
757 * if we do that, make radix_tree_grow rollback its change as well.
758 */
759 return entry_ptr(oldp);
760 }
761
762 /*
763 * radix_tree_lookup_node:
764 *
765 * Returns the node at the given index.
766 * Returns NULL if nothing is found at the given index.
767 */
768
769 void *
770 radix_tree_lookup_node(struct radix_tree *t, uint64_t idx)
771 {
772 void **vpp;
773
774 vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
775 if (vpp == NULL) {
776 return NULL;
777 }
778 return entry_ptr(*vpp);
779 }
780
781 static inline void
782 gang_lookup_init(struct radix_tree *t, uint64_t idx,
783 struct radix_tree_path *path, const unsigned int tagmask)
784 {
785 void **vpp __unused;
786
787 vpp = radix_tree_lookup_ptr(t, idx, path, false, tagmask);
788 KASSERT(vpp == NULL ||
789 vpp == path_pptr(t, path, path->p_lastidx));
790 KASSERT(&t->t_root == path_pptr(t, path, 0));
791 KASSERT(path->p_lastidx == RADIX_TREE_INVALID_HEIGHT ||
792 path->p_lastidx == t->t_height ||
793 !entry_match_p(*path_pptr(t, path, path->p_lastidx), tagmask));
794 }
795
796 /*
797 * gang_lookup_scan:
798 *
799 * a helper routine for radix_tree_gang_lookup_node and its variants.
800 */
801
802 static inline unsigned int
803 __attribute__((__always_inline__))
804 gang_lookup_scan(struct radix_tree *t, struct radix_tree_path *path,
805 void **results, const unsigned int maxresults, const unsigned int tagmask,
806 const bool reverse, const bool dense)
807 {
808
809 /*
810 * we keep the path updated only for lastidx-1.
811 * vpp is what path_pptr(t, path, lastidx) would be.
812 */
813 void **vpp;
814 unsigned int nfound;
815 unsigned int lastidx;
816 /*
817 * set up scan direction dependant constants so that we can iterate
818 * n_ptrs as the following.
819 *
820 * for (i = first; i != guard; i += step)
821 * visit n->n_ptrs[i];
822 */
823 const int step = reverse ? -1 : 1;
824 const unsigned int first = reverse ? RADIX_TREE_PTR_PER_NODE - 1 : 0;
825 const unsigned int last = reverse ? 0 : RADIX_TREE_PTR_PER_NODE - 1;
826 const unsigned int guard = last + step;
827
828 KASSERT(maxresults > 0);
829 KASSERT(&t->t_root == path_pptr(t, path, 0));
830 lastidx = path->p_lastidx;
831 KASSERT(lastidx == RADIX_TREE_INVALID_HEIGHT ||
832 lastidx == t->t_height ||
833 !entry_match_p(*path_pptr(t, path, lastidx), tagmask));
834 nfound = 0;
835 if (lastidx == RADIX_TREE_INVALID_HEIGHT) {
836 /*
837 * requested idx is beyond the right-most node.
838 */
839 if (reverse && !dense) {
840 lastidx = 0;
841 vpp = path_pptr(t, path, lastidx);
842 goto descend;
843 }
844 return 0;
845 }
846 vpp = path_pptr(t, path, lastidx);
847 while (/*CONSTCOND*/true) {
848 struct radix_tree_node *n;
849 unsigned int i;
850
851 if (entry_match_p(*vpp, tagmask)) {
852 KASSERT(lastidx == t->t_height);
853 /*
854 * record the matching non-NULL leaf.
855 */
856 results[nfound] = entry_ptr(*vpp);
857 nfound++;
858 if (nfound == maxresults) {
859 return nfound;
860 }
861 } else if (dense) {
862 return nfound;
863 }
864 scan_siblings:
865 /*
866 * try to find the next matching non-NULL sibling.
867 */
868 if (lastidx == 0) {
869 /*
870 * the root has no siblings.
871 * we've done.
872 */
873 KASSERT(vpp == &t->t_root);
874 break;
875 }
876 n = path_node(t, path, lastidx - 1);
877 for (i = vpp - n->n_ptrs + step; i != guard; i += step) {
878 KASSERT(i < RADIX_TREE_PTR_PER_NODE);
879 if (entry_match_p(n->n_ptrs[i], tagmask)) {
880 vpp = &n->n_ptrs[i];
881 break;
882 } else if (dense) {
883 return nfound;
884 }
885 }
886 if (i == guard) {
887 /*
888 * not found. go to parent.
889 */
890 lastidx--;
891 vpp = path_pptr(t, path, lastidx);
892 goto scan_siblings;
893 }
894 descend:
895 /*
896 * following the left-most (or right-most in the case of
897 * reverse scan) child node, descend until reaching the leaf or
898 * a non-matching entry.
899 */
900 while (entry_match_p(*vpp, tagmask) && lastidx < t->t_height) {
901 /*
902 * save vpp in the path so that we can come back to this
903 * node after finishing visiting children.
904 */
905 path->p_refs[lastidx].pptr = vpp;
906 n = entry_ptr(*vpp);
907 vpp = &n->n_ptrs[first];
908 lastidx++;
909 }
910 }
911 return nfound;
912 }
913
914 /*
915 * radix_tree_gang_lookup_node:
916 *
917 * Scan the tree starting from the given index in the ascending order and
918 * return found nodes.
919 *
920 * results should be an array large enough to hold maxresults pointers.
921 * This function returns the number of nodes found, up to maxresults.
922 * Returning less than maxresults means there are no more nodes in the tree.
923 *
924 * If dense == true, this function stops scanning when it founds a hole of
925 * indexes. I.e. an index for which radix_tree_lookup_node would returns NULL.
926 * If dense == false, this function skips holes and continue scanning until
927 * maxresults nodes are found or it reaches the limit of the index range.
928 *
929 * The result of this function is semantically equivalent to what could be
930 * obtained by repeated calls of radix_tree_lookup_node with increasing index.
931 * but this function is expected to be computationally cheaper when looking up
932 * multiple nodes at once. Especially, it's expected to be much cheaper when
933 * node indexes are distributed sparsely.
934 *
935 * Note that this function doesn't return index values of found nodes.
936 * Thus, in the case of dense == false, if index values are important for
937 * a caller, it's the caller's responsibility to check them, typically
938 * by examining the returned nodes using some caller-specific knowledge
939 * about them.
940 * In the case of dense == true, a node returned via results[N] is always for
941 * the index (idx + N).
942 */
943
944 unsigned int
945 radix_tree_gang_lookup_node(struct radix_tree *t, uint64_t idx,
946 void **results, unsigned int maxresults, bool dense)
947 {
948 struct radix_tree_path path;
949
950 gang_lookup_init(t, idx, &path, 0);
951 return gang_lookup_scan(t, &path, results, maxresults, 0, false, dense);
952 }
953
954 /*
955 * radix_tree_gang_lookup_node_reverse:
956 *
957 * Same as radix_tree_gang_lookup_node except that this one scans the
958 * tree in the reverse order. I.e. descending index values.
959 */
960
961 unsigned int
962 radix_tree_gang_lookup_node_reverse(struct radix_tree *t, uint64_t idx,
963 void **results, unsigned int maxresults, bool dense)
964 {
965 struct radix_tree_path path;
966
967 gang_lookup_init(t, idx, &path, 0);
968 return gang_lookup_scan(t, &path, results, maxresults, 0, true, dense);
969 }
970
971 /*
972 * radix_tree_gang_lookup_tagged_node:
973 *
974 * Same as radix_tree_gang_lookup_node except that this one only returns
975 * nodes tagged with tagid.
976 *
977 * It's illegal to call this function with tagmask 0.
978 */
979
980 unsigned int
981 radix_tree_gang_lookup_tagged_node(struct radix_tree *t, uint64_t idx,
982 void **results, unsigned int maxresults, bool dense, unsigned int tagmask)
983 {
984 struct radix_tree_path path;
985
986 KASSERT(tagmask != 0);
987 gang_lookup_init(t, idx, &path, tagmask);
988 return gang_lookup_scan(t, &path, results, maxresults, tagmask, false,
989 dense);
990 }
991
992 /*
993 * radix_tree_gang_lookup_tagged_node_reverse:
994 *
995 * Same as radix_tree_gang_lookup_tagged_node except that this one scans the
996 * tree in the reverse order. I.e. descending index values.
997 */
998
999 unsigned int
1000 radix_tree_gang_lookup_tagged_node_reverse(struct radix_tree *t, uint64_t idx,
1001 void **results, unsigned int maxresults, bool dense, unsigned int tagmask)
1002 {
1003 struct radix_tree_path path;
1004
1005 KASSERT(tagmask != 0);
1006 gang_lookup_init(t, idx, &path, tagmask);
1007 return gang_lookup_scan(t, &path, results, maxresults, tagmask, true,
1008 dense);
1009 }
1010
1011 /*
1012 * radix_tree_get_tag:
1013 *
1014 * Return the tagmask for the node at the given index.
1015 *
1016 * It's illegal to call this function for a node which has not been inserted.
1017 */
1018
1019 unsigned int
1020 radix_tree_get_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
1021 {
1022 /*
1023 * the following two implementations should behave same.
1024 * the former one was chosen because it seems faster.
1025 */
1026 #if 1
1027 void **vpp;
1028
1029 vpp = radix_tree_lookup_ptr(t, idx, NULL, false, tagmask);
1030 if (vpp == NULL) {
1031 return false;
1032 }
1033 KASSERT(*vpp != NULL);
1034 return (entry_tagmask(*vpp) & tagmask);
1035 #else
1036 void **vpp;
1037
1038 vpp = radix_tree_lookup_ptr(t, idx, NULL, false, 0);
1039 KASSERT(vpp != NULL);
1040 return (entry_tagmask(*vpp) & tagmask);
1041 #endif
1042 }
1043
1044 /*
1045 * radix_tree_set_tag:
1046 *
1047 * Set the tag for the node at the given index.
1048 *
1049 * It's illegal to call this function for a node which has not been inserted.
1050 * It's illegal to call this function with tagmask 0.
1051 */
1052
1053 void
1054 radix_tree_set_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
1055 {
1056 struct radix_tree_path path;
1057 void **vpp __unused;
1058 int i;
1059
1060 KASSERT(tagmask != 0);
1061 vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
1062 KASSERT(vpp != NULL);
1063 KASSERT(*vpp != NULL);
1064 KASSERT(path.p_lastidx == t->t_height);
1065 KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
1066 for (i = t->t_height; i >= 0; i--) {
1067 void ** const pptr = (void **)path_pptr(t, &path, i);
1068 void *entry;
1069
1070 KASSERT(pptr != NULL);
1071 entry = *pptr;
1072 if ((entry_tagmask(entry) & tagmask) != 0) {
1073 break;
1074 }
1075 *pptr = (void *)((uintptr_t)entry | tagmask);
1076 }
1077 }
1078
1079 /*
1080 * radix_tree_clear_tag:
1081 *
1082 * Clear the tag for the node at the given index.
1083 *
1084 * It's illegal to call this function for a node which has not been inserted.
1085 * It's illegal to call this function with tagmask 0.
1086 */
1087
1088 void
1089 radix_tree_clear_tag(struct radix_tree *t, uint64_t idx, unsigned int tagmask)
1090 {
1091 struct radix_tree_path path;
1092 void **vpp;
1093 int i;
1094
1095 KASSERT(tagmask != 0);
1096 vpp = radix_tree_lookup_ptr(t, idx, &path, false, 0);
1097 KASSERT(vpp != NULL);
1098 KASSERT(*vpp != NULL);
1099 KASSERT(path.p_lastidx == t->t_height);
1100 KASSERT(vpp == path_pptr(t, &path, path.p_lastidx));
1101 /*
1102 * if already cleared, nothing to do
1103 */
1104 if ((entry_tagmask(*vpp) & tagmask) == 0) {
1105 return;
1106 }
1107 /*
1108 * clear the tag only if no children have the tag.
1109 */
1110 for (i = t->t_height; i >= 0; i--) {
1111 void ** const pptr = (void **)path_pptr(t, &path, i);
1112 void *entry;
1113
1114 KASSERT(pptr != NULL);
1115 entry = *pptr;
1116 KASSERT((entry_tagmask(entry) & tagmask) != 0);
1117 *pptr = entry_compose(entry_ptr(entry),
1118 entry_tagmask(entry) & ~tagmask);
1119 /*
1120 * check if we should proceed to process the next level.
1121 */
1122 if (0 < i) {
1123 struct radix_tree_node *n = path_node(t, &path, i - 1);
1124
1125 if ((radix_tree_sum_node(n) & tagmask) != 0) {
1126 break;
1127 }
1128 }
1129 }
1130 }
1131
1132 #if defined(UNITTEST)
1133
1134 #include <inttypes.h>
1135 #include <stdio.h>
1136
1137 static void
1138 radix_tree_dump_node(const struct radix_tree *t, void *vp,
1139 uint64_t offset, unsigned int height)
1140 {
1141 struct radix_tree_node *n;
1142 unsigned int i;
1143
1144 for (i = 0; i < t->t_height - height; i++) {
1145 printf(" ");
1146 }
1147 if (entry_tagmask(vp) == 0) {
1148 printf("[%" PRIu64 "] %p", offset, entry_ptr(vp));
1149 } else {
1150 printf("[%" PRIu64 "] %p (tagmask=0x%x)", offset, entry_ptr(vp),
1151 entry_tagmask(vp));
1152 }
1153 if (height == 0) {
1154 printf(" (leaf)\n");
1155 return;
1156 }
1157 n = entry_ptr(vp);
1158 assert((radix_tree_sum_node(n) & RADIX_TREE_TAG_MASK) ==
1159 entry_tagmask(vp));
1160 printf(" (%u children)\n", radix_tree_node_count_ptrs(n));
1161 for (i = 0; i < __arraycount(n->n_ptrs); i++) {
1162 void *c;
1163
1164 c = n->n_ptrs[i];
1165 if (c == NULL) {
1166 continue;
1167 }
1168 radix_tree_dump_node(t, c,
1169 offset + i * (UINT64_C(1) <<
1170 (RADIX_TREE_BITS_PER_HEIGHT * (height - 1))), height - 1);
1171 }
1172 }
1173
1174 void radix_tree_dump(const struct radix_tree *);
1175
1176 void
1177 radix_tree_dump(const struct radix_tree *t)
1178 {
1179
1180 printf("tree %p height=%u\n", t, t->t_height);
1181 radix_tree_dump_node(t, t->t_root, 0, t->t_height);
1182 }
1183
1184 static void
1185 test1(void)
1186 {
1187 struct radix_tree s;
1188 struct radix_tree *t = &s;
1189 void *results[3];
1190
1191 radix_tree_init_tree(t);
1192 radix_tree_dump(t);
1193 assert(radix_tree_lookup_node(t, 0) == NULL);
1194 assert(radix_tree_lookup_node(t, 1000) == NULL);
1195 assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 0);
1196 assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 0);
1197 assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 0);
1198 assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 0);
1199 assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false) ==
1200 0);
1201 assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true) ==
1202 0);
1203 assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
1204 == 0);
1205 assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
1206 == 0);
1207 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
1208 == 0);
1209 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
1210 == 0);
1211 assert(radix_tree_gang_lookup_tagged_node(t, 1000, results, 3, false, 1)
1212 == 0);
1213 assert(radix_tree_gang_lookup_tagged_node(t, 1000, results, 3, true, 1)
1214 == 0);
1215 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1216 false, 1) == 0);
1217 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1218 true, 1) == 0);
1219 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 1000, results, 3,
1220 false, 1) == 0);
1221 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 1000, results, 3,
1222 true, 1) == 0);
1223 assert(radix_tree_empty_tree_p(t));
1224 assert(radix_tree_empty_tagged_tree_p(t, 1));
1225 assert(radix_tree_empty_tagged_tree_p(t, 2));
1226 assert(radix_tree_insert_node(t, 0, (void *)0xdeadbea0) == 0);
1227 assert(!radix_tree_empty_tree_p(t));
1228 assert(radix_tree_empty_tagged_tree_p(t, 1));
1229 assert(radix_tree_empty_tagged_tree_p(t, 2));
1230 assert(radix_tree_lookup_node(t, 0) == (void *)0xdeadbea0);
1231 assert(radix_tree_lookup_node(t, 1000) == NULL);
1232 memset(results, 0, sizeof(results));
1233 assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 1);
1234 assert(results[0] == (void *)0xdeadbea0);
1235 memset(results, 0, sizeof(results));
1236 assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 1);
1237 assert(results[0] == (void *)0xdeadbea0);
1238 assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 0);
1239 assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 0);
1240 memset(results, 0, sizeof(results));
1241 assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false) ==
1242 1);
1243 assert(results[0] == (void *)0xdeadbea0);
1244 memset(results, 0, sizeof(results));
1245 assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true) ==
1246 1);
1247 assert(results[0] == (void *)0xdeadbea0);
1248 memset(results, 0, sizeof(results));
1249 assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
1250 == 1);
1251 assert(results[0] == (void *)0xdeadbea0);
1252 assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
1253 == 0);
1254 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
1255 == 0);
1256 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
1257 == 0);
1258 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1259 false, 1) == 0);
1260 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1261 true, 1) == 0);
1262 assert(radix_tree_insert_node(t, 1000, (void *)0xdeadbea0) == 0);
1263 assert(radix_tree_remove_node(t, 0) == (void *)0xdeadbea0);
1264 assert(!radix_tree_empty_tree_p(t));
1265 radix_tree_dump(t);
1266 assert(radix_tree_lookup_node(t, 0) == NULL);
1267 assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1268 memset(results, 0, sizeof(results));
1269 assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 1);
1270 assert(results[0] == (void *)0xdeadbea0);
1271 assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 0);
1272 memset(results, 0, sizeof(results));
1273 assert(radix_tree_gang_lookup_node(t, 1000, results, 3, false) == 1);
1274 assert(results[0] == (void *)0xdeadbea0);
1275 memset(results, 0, sizeof(results));
1276 assert(radix_tree_gang_lookup_node(t, 1000, results, 3, true) == 1);
1277 assert(results[0] == (void *)0xdeadbea0);
1278 assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, false)
1279 == 0);
1280 assert(radix_tree_gang_lookup_node_reverse(t, 0, results, 3, true)
1281 == 0);
1282 memset(results, 0, sizeof(results));
1283 assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, false)
1284 == 1);
1285 memset(results, 0, sizeof(results));
1286 assert(radix_tree_gang_lookup_node_reverse(t, 1000, results, 3, true)
1287 == 1);
1288 assert(results[0] == (void *)0xdeadbea0);
1289 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, false, 1)
1290 == 0);
1291 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 3, true, 1)
1292 == 0);
1293 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1294 false, 1) == 0);
1295 assert(radix_tree_gang_lookup_tagged_node_reverse(t, 0, results, 3,
1296 true, 1) == 0);
1297 assert(!radix_tree_get_tag(t, 1000, 1));
1298 assert(!radix_tree_get_tag(t, 1000, 2));
1299 assert(radix_tree_get_tag(t, 1000, 2 | 1) == 0);
1300 assert(radix_tree_empty_tagged_tree_p(t, 1));
1301 assert(radix_tree_empty_tagged_tree_p(t, 2));
1302 radix_tree_set_tag(t, 1000, 2);
1303 assert(!radix_tree_get_tag(t, 1000, 1));
1304 assert(radix_tree_get_tag(t, 1000, 2));
1305 assert(radix_tree_get_tag(t, 1000, 2 | 1) == 2);
1306 assert(radix_tree_empty_tagged_tree_p(t, 1));
1307 assert(!radix_tree_empty_tagged_tree_p(t, 2));
1308 radix_tree_dump(t);
1309 assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1310 assert(radix_tree_insert_node(t, 0, (void *)0xbea0) == 0);
1311 radix_tree_dump(t);
1312 assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
1313 assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1314 assert(radix_tree_insert_node(t, UINT64_C(10000000000), (void *)0xdea0)
1315 == 0);
1316 radix_tree_dump(t);
1317 assert(radix_tree_lookup_node(t, 0) == (void *)0xbea0);
1318 assert(radix_tree_lookup_node(t, 1000) == (void *)0xdeadbea0);
1319 assert(radix_tree_lookup_node(t, UINT64_C(10000000000)) ==
1320 (void *)0xdea0);
1321 radix_tree_dump(t);
1322 assert(!radix_tree_get_tag(t, 0, 2));
1323 assert(radix_tree_get_tag(t, 1000, 2));
1324 assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 1));
1325 radix_tree_set_tag(t, 0, 2);
1326 radix_tree_set_tag(t, UINT64_C(10000000000), 2);
1327 radix_tree_dump(t);
1328 assert(radix_tree_get_tag(t, 0, 2));
1329 assert(radix_tree_get_tag(t, 1000, 2));
1330 assert(radix_tree_get_tag(t, UINT64_C(10000000000), 2));
1331 radix_tree_clear_tag(t, 0, 2);
1332 radix_tree_clear_tag(t, UINT64_C(10000000000), 2);
1333 radix_tree_dump(t);
1334 assert(!radix_tree_get_tag(t, 0, 2));
1335 assert(radix_tree_get_tag(t, 1000, 2));
1336 assert(!radix_tree_get_tag(t, UINT64_C(10000000000), 2));
1337 radix_tree_dump(t);
1338 assert(radix_tree_replace_node(t, 1000, (void *)0x12345678) ==
1339 (void *)0xdeadbea0);
1340 assert(!radix_tree_get_tag(t, 1000, 1));
1341 assert(radix_tree_get_tag(t, 1000, 2));
1342 assert(radix_tree_get_tag(t, 1000, 2 | 1) == 2);
1343 memset(results, 0, sizeof(results));
1344 assert(radix_tree_gang_lookup_node(t, 0, results, 3, false) == 3);
1345 assert(results[0] == (void *)0xbea0);
1346 assert(results[1] == (void *)0x12345678);
1347 assert(results[2] == (void *)0xdea0);
1348 memset(results, 0, sizeof(results));
1349 assert(radix_tree_gang_lookup_node(t, 0, results, 3, true) == 1);
1350 assert(results[0] == (void *)0xbea0);
1351 memset(results, 0, sizeof(results));
1352 assert(radix_tree_gang_lookup_node(t, 1, results, 3, false) == 2);
1353 assert(results[0] == (void *)0x12345678);
1354 assert(results[1] == (void *)0xdea0);
1355 assert(radix_tree_gang_lookup_node(t, 1, results, 3, true) == 0);
1356 memset(results, 0, sizeof(results));
1357 assert(radix_tree_gang_lookup_node(t, 1001, results, 3, false) == 1);
1358 assert(results[0] == (void *)0xdea0);
1359 assert(radix_tree_gang_lookup_node(t, 1001, results, 3, true) == 0);
1360 assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000001), results, 3,
1361 false) == 0);
1362 assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000001), results, 3,
1363 true) == 0);
1364 assert(radix_tree_gang_lookup_node(t, UINT64_C(1000000000000), results,
1365 3, false) == 0);
1366 assert(radix_tree_gang_lookup_node(t, UINT64_C(1000000000000), results,
1367 3, true) == 0);
1368 memset(results, 0, sizeof(results));
1369 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 100, false, 2)
1370 == 1);
1371 assert(results[0] == (void *)0x12345678);
1372 assert(radix_tree_gang_lookup_tagged_node(t, 0, results, 100, true, 2)
1373 == 0);
1374 assert(entry_tagmask(t->t_root) != 0);
1375 assert(radix_tree_remove_node(t, 1000) == (void *)0x12345678);
1376 assert(entry_tagmask(t->t_root) == 0);
1377 radix_tree_dump(t);
1378 assert(radix_tree_insert_node(t, UINT64_C(10000000001), (void *)0xfff0)
1379 == 0);
1380 memset(results, 0, sizeof(results));
1381 assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000000), results, 3,
1382 false) == 2);
1383 assert(results[0] == (void *)0xdea0);
1384 assert(results[1] == (void *)0xfff0);
1385 memset(results, 0, sizeof(results));
1386 assert(radix_tree_gang_lookup_node(t, UINT64_C(10000000000), results, 3,
1387 true) == 2);
1388 assert(results[0] == (void *)0xdea0);
1389 assert(results[1] == (void *)0xfff0);
1390 memset(results, 0, sizeof(results));
1391 assert(radix_tree_gang_lookup_node_reverse(t, UINT64_C(10000000001),
1392 results, 3, false) == 3);
1393 assert(results[0] == (void *)0xfff0);
1394 assert(results[1] == (void *)0xdea0);
1395 assert(results[2] == (void *)0xbea0);
1396 memset(results, 0, sizeof(results));
1397 assert(radix_tree_gang_lookup_node_reverse(t, UINT64_C(10000000001),
1398 results, 3, true) == 2);
1399 assert(results[0] == (void *)0xfff0);
1400 assert(results[1] == (void *)0xdea0);
1401 assert(radix_tree_remove_node(t, UINT64_C(10000000000)) ==
1402 (void *)0xdea0);
1403 assert(radix_tree_remove_node(t, UINT64_C(10000000001)) ==
1404 (void *)0xfff0);
1405 radix_tree_dump(t);
1406 assert(radix_tree_remove_node(t, 0) == (void *)0xbea0);
1407 radix_tree_dump(t);
1408 radix_tree_fini_tree(t);
1409 }
1410
1411 #include <sys/time.h>
1412
1413 struct testnode {
1414 uint64_t idx;
1415 bool tagged[RADIX_TREE_TAG_ID_MAX];
1416 };
1417
1418 static void
1419 printops(const char *title, const char *name, int tag, unsigned int n,
1420 const struct timeval *stv, const struct timeval *etv)
1421 {
1422 uint64_t s = stv->tv_sec * 1000000 + stv->tv_usec;
1423 uint64_t e = etv->tv_sec * 1000000 + etv->tv_usec;
1424
1425 printf("RESULT %s %s %d %lf op/s\n", title, name, tag,
1426 (double)n / (e - s) * 1000000);
1427 }
1428
1429 #define TEST2_GANG_LOOKUP_NODES 16
1430
1431 static bool
1432 test2_should_tag(unsigned int i, unsigned int tagid)
1433 {
1434
1435 if (tagid == 0) {
1436 return (i % 4) == 0; /* 25% */
1437 } else {
1438 return (i % 7) == 0; /* 14% */
1439 }
1440 return 1;
1441 }
1442
1443 static void
1444 check_tag_count(const unsigned int *ntagged, unsigned int tagmask,
1445 unsigned int count)
1446 {
1447 unsigned int tag;
1448
1449 for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1450 if ((tagmask & (1 << tag)) == 0) {
1451 continue;
1452 }
1453 if (((tagmask - 1) & tagmask) == 0) {
1454 assert(count == ntagged[tag]);
1455 } else {
1456 assert(count >= ntagged[tag]);
1457 }
1458 }
1459 }
1460
1461 static void
1462 test2(const char *title, bool dense)
1463 {
1464 struct radix_tree s;
1465 struct radix_tree *t = &s;
1466 struct testnode *n;
1467 unsigned int i;
1468 unsigned int nnodes = 100000;
1469 unsigned int removed;
1470 unsigned int tag;
1471 unsigned int tagmask;
1472 unsigned int ntagged[RADIX_TREE_TAG_ID_MAX];
1473 struct testnode *nodes;
1474 struct timeval stv;
1475 struct timeval etv;
1476
1477 nodes = malloc(nnodes * sizeof(*nodes));
1478 for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1479 ntagged[tag] = 0;
1480 }
1481 radix_tree_init_tree(t);
1482 for (i = 0; i < nnodes; i++) {
1483 n = &nodes[i];
1484 n->idx = random();
1485 if (sizeof(long) == 4) {
1486 n->idx <<= 32;
1487 n->idx |= (uint32_t)random();
1488 }
1489 if (dense) {
1490 n->idx %= nnodes * 2;
1491 }
1492 while (radix_tree_lookup_node(t, n->idx) != NULL) {
1493 n->idx++;
1494 }
1495 radix_tree_insert_node(t, n->idx, n);
1496 for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1497 tagmask = 1 << tag;
1498
1499 n->tagged[tag] = test2_should_tag(i, tag);
1500 if (n->tagged[tag]) {
1501 radix_tree_set_tag(t, n->idx, tagmask);
1502 ntagged[tag]++;
1503 }
1504 assert((n->tagged[tag] ? tagmask : 0) ==
1505 radix_tree_get_tag(t, n->idx, tagmask));
1506 }
1507 }
1508
1509 gettimeofday(&stv, NULL);
1510 for (i = 0; i < nnodes; i++) {
1511 n = &nodes[i];
1512 assert(radix_tree_lookup_node(t, n->idx) == n);
1513 }
1514 gettimeofday(&etv, NULL);
1515 printops(title, "lookup", 0, nnodes, &stv, &etv);
1516
1517 for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1518 unsigned int count = 0;
1519
1520 gettimeofday(&stv, NULL);
1521 for (i = 0; i < nnodes; i++) {
1522 unsigned int tagged;
1523
1524 n = &nodes[i];
1525 tagged = radix_tree_get_tag(t, n->idx, tagmask);
1526 assert((tagged & ~tagmask) == 0);
1527 for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1528 assert((tagmask & (1 << tag)) == 0 ||
1529 n->tagged[tag] == !!(tagged & (1 << tag)));
1530 }
1531 if (tagged) {
1532 count++;
1533 }
1534 }
1535 gettimeofday(&etv, NULL);
1536 check_tag_count(ntagged, tagmask, count);
1537 printops(title, "get_tag", tagmask, nnodes, &stv, &etv);
1538 }
1539
1540 gettimeofday(&stv, NULL);
1541 for (i = 0; i < nnodes; i++) {
1542 n = &nodes[i];
1543 radix_tree_remove_node(t, n->idx);
1544 }
1545 gettimeofday(&etv, NULL);
1546 printops(title, "remove", 0, nnodes, &stv, &etv);
1547
1548 gettimeofday(&stv, NULL);
1549 for (i = 0; i < nnodes; i++) {
1550 n = &nodes[i];
1551 radix_tree_insert_node(t, n->idx, n);
1552 }
1553 gettimeofday(&etv, NULL);
1554 printops(title, "insert", 0, nnodes, &stv, &etv);
1555
1556 for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1557 tagmask = 1 << tag;
1558
1559 ntagged[tag] = 0;
1560 gettimeofday(&stv, NULL);
1561 for (i = 0; i < nnodes; i++) {
1562 n = &nodes[i];
1563 if (n->tagged[tag]) {
1564 radix_tree_set_tag(t, n->idx, tagmask);
1565 ntagged[tag]++;
1566 }
1567 }
1568 gettimeofday(&etv, NULL);
1569 printops(title, "set_tag", tag, ntagged[tag], &stv, &etv);
1570 }
1571
1572 gettimeofday(&stv, NULL);
1573 {
1574 struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1575 uint64_t nextidx;
1576 unsigned int nfound;
1577 unsigned int total;
1578
1579 nextidx = 0;
1580 total = 0;
1581 while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
1582 (void *)results, __arraycount(results), false)) > 0) {
1583 nextidx = results[nfound - 1]->idx + 1;
1584 total += nfound;
1585 if (nextidx == 0) {
1586 break;
1587 }
1588 }
1589 assert(total == nnodes);
1590 }
1591 gettimeofday(&etv, NULL);
1592 printops(title, "ganglookup", 0, nnodes, &stv, &etv);
1593
1594 gettimeofday(&stv, NULL);
1595 {
1596 struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1597 uint64_t nextidx;
1598 unsigned int nfound;
1599 unsigned int total;
1600
1601 nextidx = UINT64_MAX;
1602 total = 0;
1603 while ((nfound = radix_tree_gang_lookup_node_reverse(t, nextidx,
1604 (void *)results, __arraycount(results), false)) > 0) {
1605 nextidx = results[nfound - 1]->idx - 1;
1606 total += nfound;
1607 if (nextidx == UINT64_MAX) {
1608 break;
1609 }
1610 }
1611 assert(total == nnodes);
1612 }
1613 gettimeofday(&etv, NULL);
1614 printops(title, "ganglookup_reverse", 0, nnodes, &stv, &etv);
1615
1616 for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1617 unsigned int total = 0;
1618
1619 gettimeofday(&stv, NULL);
1620 {
1621 struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1622 uint64_t nextidx;
1623 unsigned int nfound;
1624
1625 nextidx = 0;
1626 while ((nfound = radix_tree_gang_lookup_tagged_node(t,
1627 nextidx, (void *)results, __arraycount(results),
1628 false, tagmask)) > 0) {
1629 nextidx = results[nfound - 1]->idx + 1;
1630 total += nfound;
1631 }
1632 }
1633 gettimeofday(&etv, NULL);
1634 check_tag_count(ntagged, tagmask, total);
1635 assert(tagmask != 0 || total == 0);
1636 printops(title, "ganglookup_tag", tagmask, total, &stv, &etv);
1637 }
1638
1639 for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1640 unsigned int total = 0;
1641
1642 gettimeofday(&stv, NULL);
1643 {
1644 struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1645 uint64_t nextidx;
1646 unsigned int nfound;
1647
1648 nextidx = UINT64_MAX;
1649 while ((nfound =
1650 radix_tree_gang_lookup_tagged_node_reverse(t,
1651 nextidx, (void *)results, __arraycount(results),
1652 false, tagmask)) > 0) {
1653 nextidx = results[nfound - 1]->idx - 1;
1654 total += nfound;
1655 if (nextidx == UINT64_MAX) {
1656 break;
1657 }
1658 }
1659 }
1660 gettimeofday(&etv, NULL);
1661 check_tag_count(ntagged, tagmask, total);
1662 assert(tagmask != 0 || total == 0);
1663 printops(title, "ganglookup_tag_reverse", tagmask, total,
1664 &stv, &etv);
1665 }
1666
1667 removed = 0;
1668 for (tag = 0; tag < RADIX_TREE_TAG_ID_MAX; tag++) {
1669 unsigned int total;
1670
1671 total = 0;
1672 tagmask = 1 << tag;
1673 gettimeofday(&stv, NULL);
1674 {
1675 struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1676 uint64_t nextidx;
1677 unsigned int nfound;
1678
1679 nextidx = 0;
1680 while ((nfound = radix_tree_gang_lookup_tagged_node(t,
1681 nextidx, (void *)results, __arraycount(results),
1682 false, tagmask)) > 0) {
1683 for (i = 0; i < nfound; i++) {
1684 radix_tree_remove_node(t,
1685 results[i]->idx);
1686 }
1687 nextidx = results[nfound - 1]->idx + 1;
1688 total += nfound;
1689 if (nextidx == 0) {
1690 break;
1691 }
1692 }
1693 }
1694 gettimeofday(&etv, NULL);
1695 if (tag == 0) {
1696 check_tag_count(ntagged, tagmask, total);
1697 } else {
1698 assert(total <= ntagged[tag]);
1699 }
1700 printops(title, "ganglookup_tag+remove", tagmask, total, &stv,
1701 &etv);
1702 removed += total;
1703 }
1704
1705 gettimeofday(&stv, NULL);
1706 {
1707 struct testnode *results[TEST2_GANG_LOOKUP_NODES];
1708 uint64_t nextidx;
1709 unsigned int nfound;
1710 unsigned int total;
1711
1712 nextidx = 0;
1713 total = 0;
1714 while ((nfound = radix_tree_gang_lookup_node(t, nextidx,
1715 (void *)results, __arraycount(results), false)) > 0) {
1716 for (i = 0; i < nfound; i++) {
1717 assert(results[i] == radix_tree_remove_node(t,
1718 results[i]->idx));
1719 }
1720 nextidx = results[nfound - 1]->idx + 1;
1721 total += nfound;
1722 if (nextidx == 0) {
1723 break;
1724 }
1725 }
1726 assert(total == nnodes - removed);
1727 }
1728 gettimeofday(&etv, NULL);
1729 printops(title, "ganglookup+remove", 0, nnodes - removed, &stv, &etv);
1730
1731 assert(radix_tree_empty_tree_p(t));
1732 for (tagmask = 1; tagmask <= RADIX_TREE_TAG_MASK; tagmask ++) {
1733 assert(radix_tree_empty_tagged_tree_p(t, tagmask));
1734 }
1735 radix_tree_fini_tree(t);
1736 free(nodes);
1737 }
1738
1739 int
1740 main(int argc, char *argv[])
1741 {
1742
1743 test1();
1744 test2("dense", true);
1745 test2("sparse", false);
1746 return 0;
1747 }
1748
1749 #endif /* defined(UNITTEST) */
1750