rpst.c revision 1.3 1 /* $NetBSD: rpst.c,v 1.3 2009/05/22 11:38:05 yamt Exp $ */
2
3 /*-
4 * Copyright (c)2009 YAMAMOTO Takashi,
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 /*
30 * radix priority search tree
31 *
32 * described in:
33 * SIAM J. COMPUT.
34 * Vol. 14, No. 2, May 1985
35 * PRIORITY SEARCH TREES
36 * EDWARD M. McCREIGHT
37 *
38 * ideas from linux:
39 * - grow tree height on-demand.
40 * - allow duplicated X values. in that case, we act as a heap.
41 */
42
43 #include <sys/cdefs.h>
44
45 #if defined(_KERNEL)
46 __KERNEL_RCSID(0, "$NetBSD: rpst.c,v 1.3 2009/05/22 11:38:05 yamt Exp $");
47 #include <sys/param.h>
48 #else /* defined(_KERNEL) */
49 __RCSID("$NetBSD: rpst.c,v 1.3 2009/05/22 11:38:05 yamt Exp $");
50 #include <assert.h>
51 #include <stdbool.h>
52 #include <string.h>
53 #if 1
54 #define KASSERT assert
55 #else
56 #define KASSERT(a)
57 #endif
58 #endif /* defined(_KERNEL) */
59
60 #include <sys/rpst.h>
61
62 /*
63 * rpst_init_tree: initialize a tree.
64 */
65
66 void
67 rpst_init_tree(struct rpst_tree *t)
68 {
69
70 t->t_root = NULL;
71 t->t_height = 0;
72 }
73
74 /*
75 * rpst_height2max: calculate the maximum index which can be handled by
76 * a tree with the given height.
77 *
78 * 0 ... 0x0000000000000001
79 * 1 ... 0x0000000000000003
80 * 2 ... 0x0000000000000007
81 * 3 ... 0x000000000000000f
82 *
83 * 31 ... 0x00000000ffffffff
84 *
85 * 63 ... 0xffffffffffffffff
86 */
87
88 static uint64_t
89 rpst_height2max(unsigned int height)
90 {
91
92 KASSERT(height < 64);
93 if (height == 63) {
94 return UINT64_MAX;
95 }
96 return (UINT64_C(1) << (height + 1)) - 1;
97 }
98
99 /*
100 * rpst_level2mask: calculate the mask for the given level in the tree.
101 *
102 * the mask used to index root's children is level 0.
103 */
104
105 static uint64_t
106 rpst_level2mask(const struct rpst_tree *t, unsigned int level)
107 {
108 uint64_t mask;
109
110 if (t->t_height < level) {
111 mask = 0;
112 } else {
113 mask = UINT64_C(1) << (t->t_height - level);
114 }
115 return mask;
116 }
117
118 /*
119 * rpst_startmask: calculate the mask for the start of a search.
120 * (ie. the mask for the top-most bit)
121 */
122
123 static uint64_t
124 rpst_startmask(const struct rpst_tree *t)
125 {
126 const uint64_t mask = rpst_level2mask(t, 0);
127
128 KASSERT((mask | (mask - 1)) == rpst_height2max(t->t_height));
129 return mask;
130 }
131
132 /*
133 * rpst_enlarge_tree: enlarge tree so that 'index' can be stored
134 */
135
136 static void
137 rpst_enlarge_tree(struct rpst_tree *t, uint64_t idx)
138 {
139
140 while (idx > rpst_height2max(t->t_height)) {
141 struct rpst_node *n = t->t_root;
142
143 if (n != NULL) {
144 rpst_remove_node(t, n);
145 memset(&n->n_children, 0, sizeof(n->n_children));
146 n->n_children[0] = t->t_root;
147 t->t_root = n;
148 }
149 t->t_height++;
150 }
151 }
152
153 /*
154 * rpst_insert_node1: a helper for rpst_insert_node.
155 */
156
157 static struct rpst_node *
158 rpst_insert_node1(struct rpst_node **where, struct rpst_node *n, uint64_t mask)
159 {
160 struct rpst_node *cur;
161 unsigned int idx;
162
163 KASSERT((n->n_x & ((-mask) << 1)) == 0);
164 next:
165 cur = *where;
166 if (cur == NULL) {
167 memset(&n->n_children, 0, sizeof(n->n_children));
168 *where = n;
169 return NULL;
170 }
171 if (n->n_y == cur->n_y && n->n_x == cur->n_x) {
172 return cur;
173 }
174 if (n->n_y < cur->n_y) {
175 /* swap cur and n */
176 memcpy(n->n_children, cur->n_children, sizeof(n->n_children));
177 *where = n;
178 n = cur;
179 cur = *where;
180 }
181 KASSERT(*where == cur);
182 idx = (n->n_x & mask) != 0;
183 where = &cur->n_children[idx];
184 KASSERT((*where) == NULL || ((((*where)->n_x & mask) != 0) == idx));
185 KASSERT((*where) == NULL || (*where)->n_y >= cur->n_y);
186 mask >>= 1;
187 goto next;
188 }
189
190 /*
191 * rpst_insert_node: insert a node into the tree.
192 *
193 * => return NULL on success.
194 * => if a duplicated node (a node with the same X,Y pair as ours) is found,
195 * return the node. in that case, the tree is intact.
196 */
197
198 struct rpst_node *
199 rpst_insert_node(struct rpst_tree *t, struct rpst_node *n)
200 {
201
202 rpst_enlarge_tree(t, n->n_x);
203 return rpst_insert_node1(&t->t_root, n, rpst_startmask(t));
204 }
205
206 /*
207 * rpst_find_pptr: find a pointer to the given node.
208 *
209 * also, return the parent node via parentp. (NULL for the root node.)
210 *
211 * XXX is it better to simply make each nodes have a pointer to parent?
212 */
213
214 static struct rpst_node **
215 rpst_find_pptr(struct rpst_node **where, struct rpst_node *n, uint64_t mask,
216 struct rpst_node **parentp)
217 {
218 struct rpst_node *pn = NULL;
219 struct rpst_node *cur;
220 unsigned int idx;
221
222 next:
223 cur = *where;
224 KASSERT(cur != NULL);
225 if (cur == n) {
226 KASSERT(pn == NULL || pn->n_y <= n->n_y);
227 *parentp = pn;
228 return where;
229 }
230 idx = (n->n_x & mask) != 0;
231 pn = cur;
232 where = &cur->n_children[idx];
233 KASSERT((*where) == NULL || ((((*where)->n_x & mask) != 0) == idx));
234 KASSERT((*where) == NULL || (*where)->n_y >= cur->n_y);
235 mask >>= 1;
236 goto next;
237 }
238
239 /*
240 * rpst_remove_node_at: remove a node at *where.
241 */
242
243 static void
244 rpst_remove_node_at(struct rpst_node **where)
245 {
246 struct rpst_node *tmp[2];
247 struct rpst_node *cur;
248 struct rpst_node *selected;
249 unsigned int selected_idx;
250 unsigned int i;
251
252 cur = *where;
253 KASSERT(cur != NULL);
254 next:
255 selected = NULL;
256 for (i = 0; i < 2; i++) {
257 struct rpst_node *c;
258
259 c = cur->n_children[i];
260 if (selected == NULL || (c != NULL && c->n_y < selected->n_y)) {
261 selected = c;
262 selected_idx = i;
263 }
264 }
265 *where = selected;
266 if (selected == NULL) {
267 return;
268 }
269 /*
270 * swap selected->n_children and cur->n_children.
271 */
272 memcpy(tmp, selected->n_children, sizeof(tmp));
273 memcpy(selected->n_children, cur->n_children, sizeof(tmp));
274 memcpy(cur->n_children, tmp, sizeof(tmp));
275 where = &selected->n_children[selected_idx];
276 goto next;
277 }
278
279 /*
280 * rpst_remove_node: remove a node from the tree.
281 */
282
283 void
284 rpst_remove_node(struct rpst_tree *t, struct rpst_node *n)
285 {
286 struct rpst_node *parent;
287 struct rpst_node **where;
288
289 where = rpst_find_pptr(&t->t_root, n, rpst_startmask(t), &parent);
290 KASSERT(*where == n);
291 rpst_remove_node_at(where);
292 }
293
294 static bool __unused
295 rpst_iterator_match_p(const struct rpst_node *n, const struct rpst_iterator *it)
296 {
297
298 if (n->n_y > it->it_max_y) {
299 return false;
300 }
301 if (n->n_x < it->it_min_x) {
302 return false;
303 }
304 if (n->n_x > it->it_max_x) {
305 return false;
306 }
307 return true;
308 }
309
310 struct rpst_node *
311 rpst_iterate_first(struct rpst_tree *t, uint64_t max_y, uint64_t min_x,
312 uint64_t max_x, struct rpst_iterator *it)
313 {
314 struct rpst_node *n;
315
316 KASSERT(min_x <= max_x);
317 n = t->t_root;
318 if (n == NULL || n->n_y > max_y) {
319 return NULL;
320 }
321 it->it_tree = t;
322 it->it_cur = n;
323 it->it_idx = (min_x & rpst_startmask(t)) != 0;
324 it->it_level = 0;
325 it->it_max_y = max_y;
326 it->it_min_x = min_x;
327 it->it_max_x = max_x;
328 return rpst_iterate_next(it);
329 }
330
331 static unsigned int
332 rpst_node_on_edge_p(const struct rpst_node *n, uint64_t val, uint64_t mask)
333 {
334
335 return ((n->n_x ^ val) & ((-mask) << 1)) == 0;
336 }
337
338 static uint64_t
339 rpst_maxidx(const struct rpst_node *n, uint64_t max_x, uint64_t mask)
340 {
341
342 if (rpst_node_on_edge_p(n, max_x, mask)) {
343 return (max_x & mask) != 0;
344 } else {
345 return 1;
346 }
347 }
348
349 static uint64_t
350 rpst_minidx(const struct rpst_node *n, uint64_t min_x, uint64_t mask)
351 {
352
353 if (rpst_node_on_edge_p(n, min_x, mask)) {
354 return (min_x & mask) != 0;
355 } else {
356 return 0;
357 }
358 }
359
360 struct rpst_node *
361 rpst_iterate_next(struct rpst_iterator *it)
362 {
363 struct rpst_tree *t;
364 struct rpst_node *n;
365 struct rpst_node *next;
366 const uint64_t max_y = it->it_max_y;
367 const uint64_t min_x = it->it_min_x;
368 const uint64_t max_x = it->it_max_x;
369 unsigned int idx;
370 unsigned int maxidx;
371 unsigned int level;
372 uint64_t mask;
373
374 t = it->it_tree;
375 n = it->it_cur;
376 idx = it->it_idx;
377 level = it->it_level;
378 mask = rpst_level2mask(t, level);
379 maxidx = rpst_maxidx(n, max_x, mask);
380 KASSERT(n == t->t_root || rpst_iterator_match_p(n, it));
381 next:
382 KASSERT(mask == rpst_level2mask(t, level));
383 KASSERT(idx >= rpst_minidx(n, min_x, mask));
384 KASSERT(maxidx == rpst_maxidx(n, max_x, mask));
385 KASSERT(idx <= maxidx + 2);
386 KASSERT(n != NULL);
387 #if 0
388 printf("%s: cur=%p, idx=%u maxidx=%u level=%u mask=%" PRIx64 "\n",
389 __func__, (void *)n, idx, maxidx, level, mask);
390 #endif
391 if (idx == maxidx + 1) { /* visit the current node */
392 idx++;
393 if (min_x <= n->n_x && n->n_x <= max_x) {
394 it->it_tree = t;
395 it->it_cur = n;
396 it->it_idx = idx;
397 it->it_level = level;
398 KASSERT(rpst_iterator_match_p(n, it));
399 return n; /* report */
400 }
401 goto next;
402 } else if (idx == maxidx + 2) { /* back to the parent */
403 struct rpst_node **where;
404
405 where = rpst_find_pptr(&t->t_root, n, rpst_startmask(t), &next);
406 if (next == NULL) {
407 KASSERT(level == 0);
408 KASSERT(t->t_root == n);
409 KASSERT(&t->t_root == where);
410 return NULL; /* done */
411 }
412 KASSERT(level > 0);
413 level--;
414 n = next;
415 mask = rpst_level2mask(t, level);
416 maxidx = rpst_maxidx(n, max_x, mask);
417 idx = where - n->n_children + 1;
418 KASSERT(idx < 2 + 1);
419 goto next;
420 }
421 /* go to a child */
422 KASSERT(idx < 2);
423 next = n->n_children[idx];
424 if (next == NULL || next->n_y > max_y) {
425 idx++;
426 goto next;
427 }
428 KASSERT(next->n_y >= n->n_y);
429 level++;
430 mask >>= 1;
431 n = next;
432 idx = rpst_minidx(n, min_x, mask);
433 maxidx = rpst_maxidx(n, max_x, mask);
434 #if 0
435 printf("%s: visit %p idx=%u level=%u mask=%llx\n",
436 __func__, n, idx, level, mask);
437 #endif
438 goto next;
439 }
440
441 #if defined(UNITTEST)
442 #include <sys/time.h>
443
444 #include <inttypes.h>
445 #include <stdio.h>
446 #include <stdlib.h>
447
448 static void
449 rpst_dump_node(const struct rpst_node *n, unsigned int depth)
450 {
451 unsigned int i;
452
453 for (i = 0; i < depth; i++) {
454 printf(" ");
455 }
456 printf("[%u]", depth);
457 if (n == NULL) {
458 printf("NULL\n");
459 return;
460 }
461 printf("%p x=%" PRIx64 "(%" PRIu64 ") y=%" PRIx64 "(%" PRIu64 ")\n",
462 (const void *)n, n->n_x, n->n_x, n->n_y, n->n_y);
463 for (i = 0; i < 2; i++) {
464 rpst_dump_node(n->n_children[i], depth + 1);
465 }
466 }
467
468 static void
469 rpst_dump_tree(const struct rpst_tree *t)
470 {
471
472 printf("pst %p height=%u\n", (const void *)t, t->t_height);
473 rpst_dump_node(t->t_root, 0);
474 }
475
476 struct testnode {
477 struct rpst_node n;
478 struct testnode *next;
479 bool failed;
480 bool found;
481 };
482
483 struct rpst_tree t;
484 struct testnode *h = NULL;
485
486 static uintmax_t
487 tvdiff(const struct timeval *tv1, const struct timeval *tv2)
488 {
489
490 return (uintmax_t)tv1->tv_sec * 1000000 + tv1->tv_usec -
491 tv2->tv_sec * 1000000 - tv2->tv_usec;
492 }
493
494 static unsigned int
495 query(uint64_t max_y, uint64_t min_x, uint64_t max_x)
496 {
497 struct testnode *n;
498 struct rpst_node *rn;
499 struct rpst_iterator it;
500 struct timeval start;
501 struct timeval end;
502 unsigned int done;
503
504 printf("quering max_y=%" PRIu64 " min_x=%" PRIu64 " max_x=%" PRIu64
505 "\n",
506 max_y, min_x, max_x);
507 done = 0;
508 gettimeofday(&start, NULL);
509 for (rn = rpst_iterate_first(&t, max_y, min_x, max_x, &it);
510 rn != NULL;
511 rn = rpst_iterate_next(&it)) {
512 done++;
513 #if 0
514 printf("found %p x=%" PRIu64 " y=%" PRIu64 "\n",
515 (void *)rn, rn->n_x, rn->n_y);
516 #endif
517 n = (void *)rn;
518 assert(!n->found);
519 n->found = true;
520 }
521 gettimeofday(&end, NULL);
522 printf("%u nodes found in %ju usecs\n", done,
523 tvdiff(&end, &start));
524
525 gettimeofday(&start, NULL);
526 for (n = h; n != NULL; n = n->next) {
527 assert(n->failed ||
528 n->found == rpst_iterator_match_p(&n->n, &it));
529 n->found = false;
530 }
531 gettimeofday(&end, NULL);
532 printf("(linear search took %ju usecs)\n", tvdiff(&end, &start));
533 return done;
534 }
535
536 int
537 main(int argc, char *argv[])
538 {
539 struct testnode *n;
540 unsigned int i;
541 struct rpst_iterator it;
542 struct timeval start;
543 struct timeval end;
544 uint64_t min_y = UINT64_MAX;
545 uint64_t max_y = 0;
546 uint64_t min_x = UINT64_MAX;
547 uint64_t max_x = 0;
548 uint64_t w;
549 unsigned int done;
550 unsigned int fail;
551 unsigned int num = 500000;
552
553 rpst_init_tree(&t);
554 rpst_dump_tree(&t);
555 assert(NULL == rpst_iterate_first(&t, UINT64_MAX, 0, UINT64_MAX, &it));
556
557 for (i = 0; i < num; i++) {
558 n = malloc(sizeof(*n));
559 if (i > 499000) {
560 n->n.n_x = 10;
561 n->n.n_y = random();
562 } else if (i > 400000) {
563 n->n.n_x = i;
564 n->n.n_y = random();
565 } else {
566 n->n.n_x = random();
567 n->n.n_y = random();
568 }
569 if (n->n.n_y < min_y) {
570 min_y = n->n.n_y;
571 }
572 if (n->n.n_y > max_y) {
573 max_y = n->n.n_y;
574 }
575 if (n->n.n_x < min_x) {
576 min_x = n->n.n_x;
577 }
578 if (n->n.n_x > max_x) {
579 max_x = n->n.n_x;
580 }
581 n->found = false;
582 n->failed = false;
583 n->next = h;
584 h = n;
585 }
586
587 done = 0;
588 fail = 0;
589 gettimeofday(&start, NULL);
590 for (n = h; n != NULL; n = n->next) {
591 struct rpst_node *o;
592 #if 0
593 printf("insert %p x=%" PRIu64 " y=%" PRIu64 "\n",
594 n, n->n.n_x, n->n.n_y);
595 #endif
596 o = rpst_insert_node(&t, &n->n);
597 if (o == NULL) {
598 done++;
599 } else {
600 n->failed = true;
601 fail++;
602 }
603 }
604 gettimeofday(&end, NULL);
605 printf("%u nodes inserted and %u insertion failed in %ju usecs\n",
606 done, fail,
607 tvdiff(&end, &start));
608
609 assert(min_y == 0 || 0 == query(min_y - 1, 0, UINT64_MAX));
610 assert(max_x == UINT64_MAX ||
611 0 == query(UINT64_MAX, max_x + 1, UINT64_MAX));
612 assert(min_x == 0 || 0 == query(UINT64_MAX, 0, min_x - 1));
613
614 done = query(max_y, min_x, max_x);
615 assert(done == num - fail);
616
617 done = query(UINT64_MAX, 0, UINT64_MAX);
618 assert(done == num - fail);
619
620 w = max_x - min_x;
621 query(max_y / 2, min_x, max_x);
622 query(max_y, min_x + w / 2, max_x);
623 query(max_y / 2, min_x + w / 2, max_x);
624 query(max_y / 2, min_x, max_x - w / 2);
625 query(max_y / 2, min_x + w / 3, max_x - w / 3);
626 query(max_y - 1, min_x + 1, max_x - 1);
627 query(UINT64_MAX, 10, 10);
628
629 done = 0;
630 gettimeofday(&start, NULL);
631 for (n = h; n != NULL; n = n->next) {
632 if (n->failed) {
633 continue;
634 }
635 #if 0
636 printf("remove %p x=%" PRIu64 " y=%" PRIu64 "\n",
637 n, n->n.n_x, n->n.n_y);
638 #endif
639 rpst_remove_node(&t, &n->n);
640 done++;
641 }
642 gettimeofday(&end, NULL);
643 printf("%u nodes removed in %ju usecs\n", done,
644 tvdiff(&end, &start));
645
646 rpst_dump_tree(&t);
647 }
648 #endif /* defined(UNITTEST) */
649