Home | History | Annotate | Line # | Download | only in libgcc
unwind-dw2-btree.h revision 1.1.1.1
      1  1.1  mrg /* Lock-free btree for manually registered unwind frames.  */
      2  1.1  mrg /* Copyright (C) 2022-2024 Free Software Foundation, Inc.
      3  1.1  mrg    Contributed by Thomas Neumann
      4  1.1  mrg 
      5  1.1  mrg This file is part of GCC.
      6  1.1  mrg 
      7  1.1  mrg GCC is free software; you can redistribute it and/or modify it under
      8  1.1  mrg the terms of the GNU General Public License as published by the Free
      9  1.1  mrg Software Foundation; either version 3, or (at your option) any later
     10  1.1  mrg version.
     11  1.1  mrg 
     12  1.1  mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     13  1.1  mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
     14  1.1  mrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     15  1.1  mrg for more details.
     16  1.1  mrg 
     17  1.1  mrg Under Section 7 of GPL version 3, you are granted additional
     18  1.1  mrg permissions described in the GCC Runtime Library Exception, version
     19  1.1  mrg 3.1, as published by the Free Software Foundation.
     20  1.1  mrg 
     21  1.1  mrg You should have received a copy of the GNU General Public License and
     22  1.1  mrg a copy of the GCC Runtime Library Exception along with this program;
     23  1.1  mrg see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     24  1.1  mrg <http://www.gnu.org/licenses/>.  */
     25  1.1  mrg 
     26  1.1  mrg #ifndef GCC_UNWIND_DW2_BTREE_H
     27  1.1  mrg #define GCC_UNWIND_DW2_BTREE_H
     28  1.1  mrg 
     29  1.1  mrg #include <stdbool.h>
     30  1.1  mrg 
     31  1.1  mrg // Common logic for version locks.
     32  1.1  mrg struct version_lock
     33  1.1  mrg {
     34  1.1  mrg   // The lock itself. The lowest bit indicates an exclusive lock,
     35  1.1  mrg   // the second bit indicates waiting threads. All other bits are
     36  1.1  mrg   // used as counter to recognize changes.
     37  1.1  mrg   // Overflows are okay here, we must only prevent overflow to the
     38  1.1  mrg   // same value within one lock_optimistic/validate
     39  1.1  mrg   // range. Even on 32 bit platforms that would require 1 billion
     40  1.1  mrg   // frame registrations within the time span of a few assembler
     41  1.1  mrg   // instructions.
     42  1.1  mrg   uintptr_type version_lock;
     43  1.1  mrg };
     44  1.1  mrg 
     45  1.1  mrg #ifdef __GTHREAD_HAS_COND
     46  1.1  mrg // We should never get contention within the tree as it rarely changes.
     47  1.1  mrg // But if we ever do get contention we use these for waiting.
     48  1.1  mrg static __gthread_mutex_t version_lock_mutex = __GTHREAD_MUTEX_INIT;
     49  1.1  mrg static __gthread_cond_t version_lock_cond = __GTHREAD_COND_INIT;
     50  1.1  mrg #endif
     51  1.1  mrg 
     52  1.1  mrg // Initialize in locked state.
     53  1.1  mrg static inline void
     54  1.1  mrg version_lock_initialize_locked_exclusive (struct version_lock *vl)
     55  1.1  mrg {
     56  1.1  mrg   vl->version_lock = 1;
     57  1.1  mrg }
     58  1.1  mrg 
     59  1.1  mrg // Try to lock the node exclusive.
     60  1.1  mrg static inline bool
     61  1.1  mrg version_lock_try_lock_exclusive (struct version_lock *vl)
     62  1.1  mrg {
     63  1.1  mrg   uintptr_type state = __atomic_load_n (&(vl->version_lock), __ATOMIC_SEQ_CST);
     64  1.1  mrg   if (state & 1)
     65  1.1  mrg     return false;
     66  1.1  mrg   return __atomic_compare_exchange_n (&(vl->version_lock), &state, state | 1,
     67  1.1  mrg 				      false, __ATOMIC_SEQ_CST,
     68  1.1  mrg 				      __ATOMIC_SEQ_CST);
     69  1.1  mrg }
     70  1.1  mrg 
     71  1.1  mrg // Lock the node exclusive, blocking as needed.
     72  1.1  mrg static void
     73  1.1  mrg version_lock_lock_exclusive (struct version_lock *vl)
     74  1.1  mrg {
     75  1.1  mrg #ifndef __GTHREAD_HAS_COND
     76  1.1  mrg restart:
     77  1.1  mrg #endif
     78  1.1  mrg 
     79  1.1  mrg   // We should virtually never get contention here, as frame
     80  1.1  mrg   // changes are rare.
     81  1.1  mrg   uintptr_type state = __atomic_load_n (&(vl->version_lock), __ATOMIC_SEQ_CST);
     82  1.1  mrg   if (!(state & 1))
     83  1.1  mrg     {
     84  1.1  mrg       if (__atomic_compare_exchange_n (&(vl->version_lock), &state, state | 1,
     85  1.1  mrg 				       false, __ATOMIC_SEQ_CST,
     86  1.1  mrg 				       __ATOMIC_SEQ_CST))
     87  1.1  mrg 	return;
     88  1.1  mrg     }
     89  1.1  mrg 
     90  1.1  mrg     // We did get contention, wait properly.
     91  1.1  mrg #ifdef __GTHREAD_HAS_COND
     92  1.1  mrg   __gthread_mutex_lock (&version_lock_mutex);
     93  1.1  mrg   state = __atomic_load_n (&(vl->version_lock), __ATOMIC_SEQ_CST);
     94  1.1  mrg   while (true)
     95  1.1  mrg     {
     96  1.1  mrg       // Check if the lock is still held.
     97  1.1  mrg       if (!(state & 1))
     98  1.1  mrg 	{
     99  1.1  mrg 	  if (__atomic_compare_exchange_n (&(vl->version_lock), &state,
    100  1.1  mrg 					   state | 1, false, __ATOMIC_SEQ_CST,
    101  1.1  mrg 					   __ATOMIC_SEQ_CST))
    102  1.1  mrg 	    {
    103  1.1  mrg 	      __gthread_mutex_unlock (&version_lock_mutex);
    104  1.1  mrg 	      return;
    105  1.1  mrg 	    }
    106  1.1  mrg 	  else
    107  1.1  mrg 	    {
    108  1.1  mrg 	      continue;
    109  1.1  mrg 	    }
    110  1.1  mrg 	}
    111  1.1  mrg 
    112  1.1  mrg       // Register waiting thread.
    113  1.1  mrg       if (!(state & 2))
    114  1.1  mrg 	{
    115  1.1  mrg 	  if (!__atomic_compare_exchange_n (&(vl->version_lock), &state,
    116  1.1  mrg 					    state | 2, false, __ATOMIC_SEQ_CST,
    117  1.1  mrg 					    __ATOMIC_SEQ_CST))
    118  1.1  mrg 	    continue;
    119  1.1  mrg 	}
    120  1.1  mrg 
    121  1.1  mrg       // And sleep.
    122  1.1  mrg       __gthread_cond_wait (&version_lock_cond, &version_lock_mutex);
    123  1.1  mrg       state = __atomic_load_n (&(vl->version_lock), __ATOMIC_SEQ_CST);
    124  1.1  mrg     }
    125  1.1  mrg #else
    126  1.1  mrg   // Spin if we do not have condition variables available.
    127  1.1  mrg   // We expect no contention here, spinning should be okay.
    128  1.1  mrg   goto restart;
    129  1.1  mrg #endif
    130  1.1  mrg }
    131  1.1  mrg 
    132  1.1  mrg // Release a locked node and increase the version lock.
    133  1.1  mrg static void
    134  1.1  mrg version_lock_unlock_exclusive (struct version_lock *vl)
    135  1.1  mrg {
    136  1.1  mrg   // increase version, reset exclusive lock bits
    137  1.1  mrg   uintptr_type state = __atomic_load_n (&(vl->version_lock), __ATOMIC_SEQ_CST);
    138  1.1  mrg   uintptr_type ns = (state + 4) & (~((uintptr_type) 3));
    139  1.1  mrg   state = __atomic_exchange_n (&(vl->version_lock), ns, __ATOMIC_SEQ_CST);
    140  1.1  mrg 
    141  1.1  mrg #ifdef __GTHREAD_HAS_COND
    142  1.1  mrg   if (state & 2)
    143  1.1  mrg     {
    144  1.1  mrg       // Wake up waiting threads. This should be extremely rare.
    145  1.1  mrg       __gthread_mutex_lock (&version_lock_mutex);
    146  1.1  mrg       __gthread_cond_broadcast (&version_lock_cond);
    147  1.1  mrg       __gthread_mutex_unlock (&version_lock_mutex);
    148  1.1  mrg     }
    149  1.1  mrg #endif
    150  1.1  mrg }
    151  1.1  mrg 
    152  1.1  mrg // Acquire an optimistic "lock". Note that this does not lock at all, it
    153  1.1  mrg // only allows for validation later.
    154  1.1  mrg static inline bool
    155  1.1  mrg version_lock_lock_optimistic (const struct version_lock *vl, uintptr_type *lock)
    156  1.1  mrg {
    157  1.1  mrg   uintptr_type state = __atomic_load_n (&(vl->version_lock), __ATOMIC_SEQ_CST);
    158  1.1  mrg   *lock = state;
    159  1.1  mrg 
    160  1.1  mrg   // Acquiring the lock fails when there is currently an exclusive lock.
    161  1.1  mrg   return !(state & 1);
    162  1.1  mrg }
    163  1.1  mrg 
    164  1.1  mrg // Validate a previously acquired "lock".
    165  1.1  mrg static inline bool
    166  1.1  mrg version_lock_validate (const struct version_lock *vl, uintptr_type lock)
    167  1.1  mrg {
    168  1.1  mrg   // Prevent the reordering of non-atomic loads behind the atomic load.
    169  1.1  mrg   // Hans Boehm, Can Seqlocks Get Along with Programming Language Memory
    170  1.1  mrg   // Models?, Section 4.
    171  1.1  mrg   __atomic_thread_fence (__ATOMIC_ACQUIRE);
    172  1.1  mrg 
    173  1.1  mrg   // Check that the node is still in the same state.
    174  1.1  mrg   uintptr_type state = __atomic_load_n (&(vl->version_lock), __ATOMIC_SEQ_CST);
    175  1.1  mrg   return (state == lock);
    176  1.1  mrg }
    177  1.1  mrg 
    178  1.1  mrg // The largest possible separator value.
    179  1.1  mrg static const uintptr_type max_separator = ~((uintptr_type) (0));
    180  1.1  mrg 
    181  1.1  mrg struct btree_node;
    182  1.1  mrg 
    183  1.1  mrg // Inner entry. The child tree contains all entries <= separator.
    184  1.1  mrg struct inner_entry
    185  1.1  mrg {
    186  1.1  mrg   uintptr_type separator;
    187  1.1  mrg   struct btree_node *child;
    188  1.1  mrg };
    189  1.1  mrg 
    190  1.1  mrg // Leaf entry. Stores an object entry.
    191  1.1  mrg struct leaf_entry
    192  1.1  mrg {
    193  1.1  mrg   uintptr_type base, size;
    194  1.1  mrg   struct object *ob;
    195  1.1  mrg };
    196  1.1  mrg 
    197  1.1  mrg // Node types.
    198  1.1  mrg enum node_type
    199  1.1  mrg {
    200  1.1  mrg   btree_node_inner,
    201  1.1  mrg   btree_node_leaf,
    202  1.1  mrg   btree_node_free
    203  1.1  mrg };
    204  1.1  mrg 
    205  1.1  mrg // Node sizes. Chosen such that the result size is roughly 256 bytes.
    206  1.1  mrg #define max_fanout_inner 15
    207  1.1  mrg #define max_fanout_leaf 10
    208  1.1  mrg 
    209  1.1  mrg // A btree node.
    210  1.1  mrg struct btree_node
    211  1.1  mrg {
    212  1.1  mrg   // The version lock used for optimistic lock coupling.
    213  1.1  mrg   struct version_lock version_lock;
    214  1.1  mrg   // The number of entries.
    215  1.1  mrg   unsigned entry_count;
    216  1.1  mrg   // The type.
    217  1.1  mrg   enum node_type type;
    218  1.1  mrg   // The payload.
    219  1.1  mrg   union
    220  1.1  mrg   {
    221  1.1  mrg     // The inner nodes have fence keys, i.e., the right-most entry includes a
    222  1.1  mrg     // separator.
    223  1.1  mrg     struct inner_entry children[max_fanout_inner];
    224  1.1  mrg     struct leaf_entry entries[max_fanout_leaf];
    225  1.1  mrg   } content;
    226  1.1  mrg };
    227  1.1  mrg 
    228  1.1  mrg // Is an inner node?
    229  1.1  mrg static inline bool
    230  1.1  mrg btree_node_is_inner (const struct btree_node *n)
    231  1.1  mrg {
    232  1.1  mrg   return n->type == btree_node_inner;
    233  1.1  mrg }
    234  1.1  mrg 
    235  1.1  mrg // Is a leaf node?
    236  1.1  mrg static inline bool
    237  1.1  mrg btree_node_is_leaf (const struct btree_node *n)
    238  1.1  mrg {
    239  1.1  mrg   return n->type == btree_node_leaf;
    240  1.1  mrg }
    241  1.1  mrg 
    242  1.1  mrg // Should the node be merged?
    243  1.1  mrg static inline bool
    244  1.1  mrg btree_node_needs_merge (const struct btree_node *n)
    245  1.1  mrg {
    246  1.1  mrg   return n->entry_count < (btree_node_is_inner (n) ? (max_fanout_inner / 2)
    247  1.1  mrg 						   : (max_fanout_leaf / 2));
    248  1.1  mrg }
    249  1.1  mrg 
    250  1.1  mrg // Get the fence key for inner nodes.
    251  1.1  mrg static inline uintptr_type
    252  1.1  mrg btree_node_get_fence_key (const struct btree_node *n)
    253  1.1  mrg {
    254  1.1  mrg   // For inner nodes we just return our right-most entry.
    255  1.1  mrg   return n->content.children[n->entry_count - 1].separator;
    256  1.1  mrg }
    257  1.1  mrg 
    258  1.1  mrg // Find the position for a slot in an inner node.
    259  1.1  mrg static unsigned
    260  1.1  mrg btree_node_find_inner_slot (const struct btree_node *n, uintptr_type value)
    261  1.1  mrg {
    262  1.1  mrg   for (unsigned index = 0, ec = n->entry_count; index != ec; ++index)
    263  1.1  mrg     if (n->content.children[index].separator >= value)
    264  1.1  mrg       return index;
    265  1.1  mrg   return n->entry_count;
    266  1.1  mrg }
    267  1.1  mrg 
    268  1.1  mrg // Find the position for a slot in a leaf node.
    269  1.1  mrg static unsigned
    270  1.1  mrg btree_node_find_leaf_slot (const struct btree_node *n, uintptr_type value)
    271  1.1  mrg {
    272  1.1  mrg   for (unsigned index = 0, ec = n->entry_count; index != ec; ++index)
    273  1.1  mrg     if (n->content.entries[index].base + n->content.entries[index].size > value)
    274  1.1  mrg       return index;
    275  1.1  mrg   return n->entry_count;
    276  1.1  mrg }
    277  1.1  mrg 
    278  1.1  mrg // Try to lock the node exclusive.
    279  1.1  mrg static inline bool
    280  1.1  mrg btree_node_try_lock_exclusive (struct btree_node *n)
    281  1.1  mrg {
    282  1.1  mrg   return version_lock_try_lock_exclusive (&(n->version_lock));
    283  1.1  mrg }
    284  1.1  mrg 
    285  1.1  mrg // Lock the node exclusive, blocking as needed.
    286  1.1  mrg static inline void
    287  1.1  mrg btree_node_lock_exclusive (struct btree_node *n)
    288  1.1  mrg {
    289  1.1  mrg   version_lock_lock_exclusive (&(n->version_lock));
    290  1.1  mrg }
    291  1.1  mrg 
    292  1.1  mrg // Release a locked node and increase the version lock.
    293  1.1  mrg static inline void
    294  1.1  mrg btree_node_unlock_exclusive (struct btree_node *n)
    295  1.1  mrg {
    296  1.1  mrg   version_lock_unlock_exclusive (&(n->version_lock));
    297  1.1  mrg }
    298  1.1  mrg 
    299  1.1  mrg // Acquire an optimistic "lock". Note that this does not lock at all, it
    300  1.1  mrg // only allows for validation later.
    301  1.1  mrg static inline bool
    302  1.1  mrg btree_node_lock_optimistic (const struct btree_node *n, uintptr_type *lock)
    303  1.1  mrg {
    304  1.1  mrg   return version_lock_lock_optimistic (&(n->version_lock), lock);
    305  1.1  mrg }
    306  1.1  mrg 
    307  1.1  mrg // Validate a previously acquire lock.
    308  1.1  mrg static inline bool
    309  1.1  mrg btree_node_validate (const struct btree_node *n, uintptr_type lock)
    310  1.1  mrg {
    311  1.1  mrg   return version_lock_validate (&(n->version_lock), lock);
    312  1.1  mrg }
    313  1.1  mrg 
    314  1.1  mrg // Insert a new separator after splitting.
    315  1.1  mrg static void
    316  1.1  mrg btree_node_update_separator_after_split (struct btree_node *n,
    317  1.1  mrg 					 uintptr_type old_separator,
    318  1.1  mrg 					 uintptr_type new_separator,
    319  1.1  mrg 					 struct btree_node *new_right)
    320  1.1  mrg {
    321  1.1  mrg   unsigned slot = btree_node_find_inner_slot (n, old_separator);
    322  1.1  mrg   for (unsigned index = n->entry_count; index > slot; --index)
    323  1.1  mrg     n->content.children[index] = n->content.children[index - 1];
    324  1.1  mrg   n->content.children[slot].separator = new_separator;
    325  1.1  mrg   n->content.children[slot + 1].child = new_right;
    326  1.1  mrg   n->entry_count++;
    327  1.1  mrg }
    328  1.1  mrg 
    329  1.1  mrg // A btree. Suitable for static initialization, all members are zero at the
    330  1.1  mrg // beginning.
    331  1.1  mrg struct btree
    332  1.1  mrg {
    333  1.1  mrg   // The root of the btree.
    334  1.1  mrg   struct btree_node *root;
    335  1.1  mrg   // The free list of released node.
    336  1.1  mrg   struct btree_node *free_list;
    337  1.1  mrg   // The version lock used to protect the root.
    338  1.1  mrg   struct version_lock root_lock;
    339  1.1  mrg };
    340  1.1  mrg 
    341  1.1  mrg // Initialize a btree. Not actually used, just for exposition.
    342  1.1  mrg static inline void
    343  1.1  mrg btree_init (struct btree *t)
    344  1.1  mrg {
    345  1.1  mrg   t->root = NULL;
    346  1.1  mrg   t->free_list = NULL;
    347  1.1  mrg   t->root_lock.version_lock = 0;
    348  1.1  mrg };
    349  1.1  mrg 
    350  1.1  mrg static void
    351  1.1  mrg btree_release_tree_recursively (struct btree *t, struct btree_node *n);
    352  1.1  mrg 
    353  1.1  mrg // Destroy a tree and release all nodes.
    354  1.1  mrg static void
    355  1.1  mrg btree_destroy (struct btree *t)
    356  1.1  mrg {
    357  1.1  mrg   // Disable the mechanism before cleaning up.
    358  1.1  mrg   struct btree_node *old_root
    359  1.1  mrg     = __atomic_exchange_n (&(t->root), NULL, __ATOMIC_SEQ_CST);
    360  1.1  mrg   if (old_root)
    361  1.1  mrg     btree_release_tree_recursively (t, old_root);
    362  1.1  mrg 
    363  1.1  mrg   // Release all free nodes.
    364  1.1  mrg   while (t->free_list)
    365  1.1  mrg     {
    366  1.1  mrg       struct btree_node *next = t->free_list->content.children[0].child;
    367  1.1  mrg       free (t->free_list);
    368  1.1  mrg       t->free_list = next;
    369  1.1  mrg     }
    370  1.1  mrg }
    371  1.1  mrg 
    372  1.1  mrg // Allocate a node. This node will be returned in locked exclusive state.
    373  1.1  mrg static struct btree_node *
    374  1.1  mrg btree_allocate_node (struct btree *t, bool inner)
    375  1.1  mrg {
    376  1.1  mrg   while (true)
    377  1.1  mrg     {
    378  1.1  mrg       // Try the free list first.
    379  1.1  mrg       struct btree_node *next_free
    380  1.1  mrg 	= __atomic_load_n (&(t->free_list), __ATOMIC_SEQ_CST);
    381  1.1  mrg       if (next_free)
    382  1.1  mrg 	{
    383  1.1  mrg 	  if (!btree_node_try_lock_exclusive (next_free))
    384  1.1  mrg 	    continue;
    385  1.1  mrg 	  // The node might no longer be free, check that again after acquiring
    386  1.1  mrg 	  // the exclusive lock.
    387  1.1  mrg 	  if (next_free->type == btree_node_free)
    388  1.1  mrg 	    {
    389  1.1  mrg 	      struct btree_node *ex = next_free;
    390  1.1  mrg 	      if (__atomic_compare_exchange_n (
    391  1.1  mrg 		    &(t->free_list), &ex, next_free->content.children[0].child,
    392  1.1  mrg 		    false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
    393  1.1  mrg 		{
    394  1.1  mrg 		  next_free->entry_count = 0;
    395  1.1  mrg 		  next_free->type = inner ? btree_node_inner : btree_node_leaf;
    396  1.1  mrg 		  return next_free;
    397  1.1  mrg 		}
    398  1.1  mrg 	    }
    399  1.1  mrg 	  btree_node_unlock_exclusive (next_free);
    400  1.1  mrg 	  continue;
    401  1.1  mrg 	}
    402  1.1  mrg 
    403  1.1  mrg       // No free node available, allocate a new one.
    404  1.1  mrg       struct btree_node *new_node
    405  1.1  mrg 	= (struct btree_node *) (malloc (sizeof (struct btree_node)));
    406  1.1  mrg       version_lock_initialize_locked_exclusive (
    407  1.1  mrg 	&(new_node->version_lock)); // initialize the node in locked state.
    408  1.1  mrg       new_node->entry_count = 0;
    409  1.1  mrg       new_node->type = inner ? btree_node_inner : btree_node_leaf;
    410  1.1  mrg       return new_node;
    411  1.1  mrg     }
    412  1.1  mrg }
    413  1.1  mrg 
    414  1.1  mrg // Release a node. This node must be currently locked exclusively and will
    415  1.1  mrg // be placed in the free list.
    416  1.1  mrg static void
    417  1.1  mrg btree_release_node (struct btree *t, struct btree_node *node)
    418  1.1  mrg {
    419  1.1  mrg   // We cannot release the memory immediately because there might still be
    420  1.1  mrg   // concurrent readers on that node. Put it in the free list instead.
    421  1.1  mrg   node->type = btree_node_free;
    422  1.1  mrg   struct btree_node *next_free
    423  1.1  mrg     = __atomic_load_n (&(t->free_list), __ATOMIC_SEQ_CST);
    424  1.1  mrg   do
    425  1.1  mrg     {
    426  1.1  mrg       node->content.children[0].child = next_free;
    427  1.1  mrg   } while (!__atomic_compare_exchange_n (&(t->free_list), &next_free, node,
    428  1.1  mrg 					 false, __ATOMIC_SEQ_CST,
    429  1.1  mrg 					 __ATOMIC_SEQ_CST));
    430  1.1  mrg   btree_node_unlock_exclusive (node);
    431  1.1  mrg }
    432  1.1  mrg 
    433  1.1  mrg // Recursively release a tree. The btree is by design very shallow, thus
    434  1.1  mrg // we can risk recursion here.
    435  1.1  mrg static void
    436  1.1  mrg btree_release_tree_recursively (struct btree *t, struct btree_node *node)
    437  1.1  mrg {
    438  1.1  mrg   btree_node_lock_exclusive (node);
    439  1.1  mrg   if (btree_node_is_inner (node))
    440  1.1  mrg     {
    441  1.1  mrg       for (unsigned index = 0; index < node->entry_count; ++index)
    442  1.1  mrg 	btree_release_tree_recursively (t, node->content.children[index].child);
    443  1.1  mrg     }
    444  1.1  mrg   btree_release_node (t, node);
    445  1.1  mrg }
    446  1.1  mrg 
    447  1.1  mrg // Check if we are splitting the root.
    448  1.1  mrg static void
    449  1.1  mrg btree_handle_root_split (struct btree *t, struct btree_node **node,
    450  1.1  mrg 			 struct btree_node **parent)
    451  1.1  mrg {
    452  1.1  mrg   // We want to keep the root pointer stable to allow for contention
    453  1.1  mrg   // free reads. Thus, we split the root by first moving the content
    454  1.1  mrg   // of the root node to a new node, and then split that new node.
    455  1.1  mrg   if (!*parent)
    456  1.1  mrg     {
    457  1.1  mrg       // Allocate a new node, this guarantees us that we will have a parent
    458  1.1  mrg       // afterwards.
    459  1.1  mrg       struct btree_node *new_node
    460  1.1  mrg 	= btree_allocate_node (t, btree_node_is_inner (*node));
    461  1.1  mrg       struct btree_node *old_node = *node;
    462  1.1  mrg       new_node->entry_count = old_node->entry_count;
    463  1.1  mrg       new_node->content = old_node->content;
    464  1.1  mrg       old_node->content.children[0].separator = max_separator;
    465  1.1  mrg       old_node->content.children[0].child = new_node;
    466  1.1  mrg       old_node->entry_count = 1;
    467  1.1  mrg       old_node->type = btree_node_inner;
    468  1.1  mrg 
    469  1.1  mrg       *parent = old_node;
    470  1.1  mrg       *node = new_node;
    471  1.1  mrg     }
    472  1.1  mrg }
    473  1.1  mrg 
    474  1.1  mrg // Split an inner node.
    475  1.1  mrg static void
    476  1.1  mrg btree_split_inner (struct btree *t, struct btree_node **inner,
    477  1.1  mrg 		   struct btree_node **parent, uintptr_type target,
    478  1.1  mrg 		   uintptr_type size)
    479  1.1  mrg {
    480  1.1  mrg   // Check for the root.
    481  1.1  mrg   btree_handle_root_split (t, inner, parent);
    482  1.1  mrg 
    483  1.1  mrg   // Create two inner node.
    484  1.1  mrg   uintptr_type right_fence = btree_node_get_fence_key (*inner);
    485  1.1  mrg   struct btree_node *left_inner = *inner;
    486  1.1  mrg   struct btree_node *right_inner = btree_allocate_node (t, true);
    487  1.1  mrg   unsigned split = left_inner->entry_count / 2;
    488  1.1  mrg   right_inner->entry_count = left_inner->entry_count - split;
    489  1.1  mrg   for (unsigned index = 0; index < right_inner->entry_count; ++index)
    490  1.1  mrg     right_inner->content.children[index]
    491  1.1  mrg       = left_inner->content.children[split + index];
    492  1.1  mrg   left_inner->entry_count = split;
    493  1.1  mrg   uintptr_type left_fence = btree_node_get_fence_key (left_inner);
    494  1.1  mrg   if (left_fence >= target && left_fence < target + size - 1)
    495  1.1  mrg     // See the PR119151 comment in btree_insert.
    496  1.1  mrg     left_fence = target + size - 1;
    497  1.1  mrg   btree_node_update_separator_after_split (*parent, right_fence, left_fence,
    498  1.1  mrg 					   right_inner);
    499  1.1  mrg   if (target <= left_fence)
    500  1.1  mrg     {
    501  1.1  mrg       *inner = left_inner;
    502  1.1  mrg       btree_node_unlock_exclusive (right_inner);
    503  1.1  mrg     }
    504  1.1  mrg   else
    505  1.1  mrg     {
    506  1.1  mrg       *inner = right_inner;
    507  1.1  mrg       btree_node_unlock_exclusive (left_inner);
    508  1.1  mrg     }
    509  1.1  mrg }
    510  1.1  mrg 
    511  1.1  mrg // Split a leaf node.
    512  1.1  mrg static void
    513  1.1  mrg btree_split_leaf (struct btree *t, struct btree_node **leaf,
    514  1.1  mrg 		  struct btree_node **parent, uintptr_type fence,
    515  1.1  mrg 		  uintptr_type target)
    516  1.1  mrg {
    517  1.1  mrg   // Check for the root.
    518  1.1  mrg   btree_handle_root_split (t, leaf, parent);
    519  1.1  mrg 
    520  1.1  mrg   // Create two leaf nodes.
    521  1.1  mrg   uintptr_type right_fence = fence;
    522  1.1  mrg   struct btree_node *left_leaf = *leaf;
    523  1.1  mrg   struct btree_node *right_leaf = btree_allocate_node (t, false);
    524  1.1  mrg   unsigned split = left_leaf->entry_count / 2;
    525  1.1  mrg   right_leaf->entry_count = left_leaf->entry_count - split;
    526  1.1  mrg   for (unsigned index = 0; index != right_leaf->entry_count; ++index)
    527  1.1  mrg     right_leaf->content.entries[index]
    528  1.1  mrg       = left_leaf->content.entries[split + index];
    529  1.1  mrg   left_leaf->entry_count = split;
    530  1.1  mrg   uintptr_type left_fence = right_leaf->content.entries[0].base - 1;
    531  1.1  mrg   btree_node_update_separator_after_split (*parent, right_fence, left_fence,
    532  1.1  mrg 					   right_leaf);
    533  1.1  mrg   if (target <= left_fence)
    534  1.1  mrg     {
    535  1.1  mrg       *leaf = left_leaf;
    536  1.1  mrg       btree_node_unlock_exclusive (right_leaf);
    537  1.1  mrg     }
    538  1.1  mrg   else
    539  1.1  mrg     {
    540  1.1  mrg       *leaf = right_leaf;
    541  1.1  mrg       btree_node_unlock_exclusive (left_leaf);
    542  1.1  mrg     }
    543  1.1  mrg }
    544  1.1  mrg 
    545  1.1  mrg // Merge (or balance) child nodes.
    546  1.1  mrg static struct btree_node *
    547  1.1  mrg btree_merge_node (struct btree *t, unsigned child_slot,
    548  1.1  mrg 		  struct btree_node *parent, uintptr_type target)
    549  1.1  mrg {
    550  1.1  mrg   // Choose the emptiest neighbor and lock both. The target child is already
    551  1.1  mrg   // locked.
    552  1.1  mrg   unsigned left_slot;
    553  1.1  mrg   struct btree_node *left_node, *right_node;
    554  1.1  mrg   if ((child_slot == 0)
    555  1.1  mrg       || (((child_slot + 1) < parent->entry_count)
    556  1.1  mrg 	  && (parent->content.children[child_slot + 1].child->entry_count
    557  1.1  mrg 	      < parent->content.children[child_slot - 1].child->entry_count)))
    558  1.1  mrg     {
    559  1.1  mrg       left_slot = child_slot;
    560  1.1  mrg       left_node = parent->content.children[left_slot].child;
    561  1.1  mrg       right_node = parent->content.children[left_slot + 1].child;
    562  1.1  mrg       btree_node_lock_exclusive (right_node);
    563  1.1  mrg     }
    564  1.1  mrg   else
    565  1.1  mrg     {
    566  1.1  mrg       left_slot = child_slot - 1;
    567  1.1  mrg       left_node = parent->content.children[left_slot].child;
    568  1.1  mrg       right_node = parent->content.children[left_slot + 1].child;
    569  1.1  mrg       btree_node_lock_exclusive (left_node);
    570  1.1  mrg     }
    571  1.1  mrg 
    572  1.1  mrg   // Can we merge both nodes into one node?
    573  1.1  mrg   unsigned total_count = left_node->entry_count + right_node->entry_count;
    574  1.1  mrg   unsigned max_count
    575  1.1  mrg     = btree_node_is_inner (left_node) ? max_fanout_inner : max_fanout_leaf;
    576  1.1  mrg   if (total_count <= max_count)
    577  1.1  mrg     {
    578  1.1  mrg       // Merge into the parent?
    579  1.1  mrg       if (parent->entry_count == 2)
    580  1.1  mrg 	{
    581  1.1  mrg 	  // Merge children into parent. This can only happen at the root.
    582  1.1  mrg 	  if (btree_node_is_inner (left_node))
    583  1.1  mrg 	    {
    584  1.1  mrg 	      for (unsigned index = 0; index != left_node->entry_count; ++index)
    585  1.1  mrg 		parent->content.children[index]
    586  1.1  mrg 		  = left_node->content.children[index];
    587  1.1  mrg 	      for (unsigned index = 0; index != right_node->entry_count;
    588  1.1  mrg 		   ++index)
    589  1.1  mrg 		parent->content.children[index + left_node->entry_count]
    590  1.1  mrg 		  = right_node->content.children[index];
    591  1.1  mrg 	    }
    592  1.1  mrg 	  else
    593  1.1  mrg 	    {
    594  1.1  mrg 	      parent->type = btree_node_leaf;
    595  1.1  mrg 	      for (unsigned index = 0; index != left_node->entry_count; ++index)
    596  1.1  mrg 		parent->content.entries[index]
    597  1.1  mrg 		  = left_node->content.entries[index];
    598  1.1  mrg 	      for (unsigned index = 0; index != right_node->entry_count;
    599  1.1  mrg 		   ++index)
    600  1.1  mrg 		parent->content.entries[index + left_node->entry_count]
    601  1.1  mrg 		  = right_node->content.entries[index];
    602  1.1  mrg 	    }
    603  1.1  mrg 	  parent->entry_count = total_count;
    604  1.1  mrg 	  btree_release_node (t, left_node);
    605  1.1  mrg 	  btree_release_node (t, right_node);
    606  1.1  mrg 	  return parent;
    607  1.1  mrg 	}
    608  1.1  mrg       else
    609  1.1  mrg 	{
    610  1.1  mrg 	  // Regular merge.
    611  1.1  mrg 	  if (btree_node_is_inner (left_node))
    612  1.1  mrg 	    {
    613  1.1  mrg 	      for (unsigned index = 0; index != right_node->entry_count;
    614  1.1  mrg 		   ++index)
    615  1.1  mrg 		left_node->content.children[left_node->entry_count++]
    616  1.1  mrg 		  = right_node->content.children[index];
    617  1.1  mrg 	    }
    618  1.1  mrg 	  else
    619  1.1  mrg 	    {
    620  1.1  mrg 	      for (unsigned index = 0; index != right_node->entry_count;
    621  1.1  mrg 		   ++index)
    622  1.1  mrg 		left_node->content.entries[left_node->entry_count++]
    623  1.1  mrg 		  = right_node->content.entries[index];
    624  1.1  mrg 	    }
    625  1.1  mrg 	  parent->content.children[left_slot].separator
    626  1.1  mrg 	    = parent->content.children[left_slot + 1].separator;
    627  1.1  mrg 	  for (unsigned index = left_slot + 1; index + 1 < parent->entry_count;
    628  1.1  mrg 	       ++index)
    629  1.1  mrg 	    parent->content.children[index]
    630  1.1  mrg 	      = parent->content.children[index + 1];
    631  1.1  mrg 	  parent->entry_count--;
    632  1.1  mrg 	  btree_release_node (t, right_node);
    633  1.1  mrg 	  btree_node_unlock_exclusive (parent);
    634  1.1  mrg 	  return left_node;
    635  1.1  mrg 	}
    636  1.1  mrg     }
    637  1.1  mrg 
    638  1.1  mrg   // No merge possible, rebalance instead.
    639  1.1  mrg   if (left_node->entry_count > right_node->entry_count)
    640  1.1  mrg     {
    641  1.1  mrg       // Shift from left to right.
    642  1.1  mrg       unsigned to_shift
    643  1.1  mrg 	= (left_node->entry_count - right_node->entry_count) / 2;
    644  1.1  mrg       if (btree_node_is_inner (left_node))
    645  1.1  mrg 	{
    646  1.1  mrg 	  for (unsigned index = 0; index != right_node->entry_count; ++index)
    647  1.1  mrg 	    {
    648  1.1  mrg 	      unsigned pos = right_node->entry_count - 1 - index;
    649  1.1  mrg 	      right_node->content.children[pos + to_shift]
    650  1.1  mrg 		= right_node->content.children[pos];
    651  1.1  mrg 	    }
    652  1.1  mrg 	  for (unsigned index = 0; index != to_shift; ++index)
    653  1.1  mrg 	    right_node->content.children[index]
    654  1.1  mrg 	      = left_node->content
    655  1.1  mrg 		  .children[left_node->entry_count - to_shift + index];
    656  1.1  mrg 	}
    657  1.1  mrg       else
    658  1.1  mrg 	{
    659  1.1  mrg 	  for (unsigned index = 0; index != right_node->entry_count; ++index)
    660  1.1  mrg 	    {
    661  1.1  mrg 	      unsigned pos = right_node->entry_count - 1 - index;
    662  1.1  mrg 	      right_node->content.entries[pos + to_shift]
    663  1.1  mrg 		= right_node->content.entries[pos];
    664  1.1  mrg 	    }
    665  1.1  mrg 	  for (unsigned index = 0; index != to_shift; ++index)
    666  1.1  mrg 	    right_node->content.entries[index]
    667  1.1  mrg 	      = left_node->content
    668  1.1  mrg 		  .entries[left_node->entry_count - to_shift + index];
    669  1.1  mrg 	}
    670  1.1  mrg       left_node->entry_count -= to_shift;
    671  1.1  mrg       right_node->entry_count += to_shift;
    672  1.1  mrg     }
    673  1.1  mrg   else
    674  1.1  mrg     {
    675  1.1  mrg       // Shift from right to left.
    676  1.1  mrg       unsigned to_shift
    677  1.1  mrg 	= (right_node->entry_count - left_node->entry_count) / 2;
    678  1.1  mrg       if (btree_node_is_inner (left_node))
    679  1.1  mrg 	{
    680  1.1  mrg 	  for (unsigned index = 0; index != to_shift; ++index)
    681  1.1  mrg 	    left_node->content.children[left_node->entry_count + index]
    682  1.1  mrg 	      = right_node->content.children[index];
    683  1.1  mrg 	  for (unsigned index = 0; index != right_node->entry_count - to_shift;
    684  1.1  mrg 	       ++index)
    685  1.1  mrg 	    right_node->content.children[index]
    686  1.1  mrg 	      = right_node->content.children[index + to_shift];
    687  1.1  mrg 	}
    688  1.1  mrg       else
    689  1.1  mrg 	{
    690  1.1  mrg 	  for (unsigned index = 0; index != to_shift; ++index)
    691  1.1  mrg 	    left_node->content.entries[left_node->entry_count + index]
    692  1.1  mrg 	      = right_node->content.entries[index];
    693  1.1  mrg 	  for (unsigned index = 0; index != right_node->entry_count - to_shift;
    694  1.1  mrg 	       ++index)
    695  1.1  mrg 	    right_node->content.entries[index]
    696  1.1  mrg 	      = right_node->content.entries[index + to_shift];
    697  1.1  mrg 	}
    698  1.1  mrg       left_node->entry_count += to_shift;
    699  1.1  mrg       right_node->entry_count -= to_shift;
    700  1.1  mrg     }
    701  1.1  mrg   uintptr_type left_fence;
    702  1.1  mrg   if (btree_node_is_leaf (left_node))
    703  1.1  mrg     {
    704  1.1  mrg       left_fence = right_node->content.entries[0].base - 1;
    705  1.1  mrg     }
    706  1.1  mrg   else
    707  1.1  mrg     {
    708  1.1  mrg       left_fence = btree_node_get_fence_key (left_node);
    709  1.1  mrg     }
    710  1.1  mrg   parent->content.children[left_slot].separator = left_fence;
    711  1.1  mrg   btree_node_unlock_exclusive (parent);
    712  1.1  mrg   if (target <= left_fence)
    713  1.1  mrg     {
    714  1.1  mrg       btree_node_unlock_exclusive (right_node);
    715  1.1  mrg       return left_node;
    716  1.1  mrg     }
    717  1.1  mrg   else
    718  1.1  mrg     {
    719  1.1  mrg       btree_node_unlock_exclusive (left_node);
    720  1.1  mrg       return right_node;
    721  1.1  mrg     }
    722  1.1  mrg }
    723  1.1  mrg 
    724  1.1  mrg // Insert an entry.
    725  1.1  mrg static bool
    726  1.1  mrg btree_insert (struct btree *t, uintptr_type base, uintptr_type size,
    727  1.1  mrg 	      struct object *ob)
    728  1.1  mrg {
    729  1.1  mrg   // Sanity check.
    730  1.1  mrg   if (!size)
    731  1.1  mrg     return false;
    732  1.1  mrg 
    733  1.1  mrg   // Access the root.
    734  1.1  mrg   struct btree_node *iter, *parent = NULL;
    735  1.1  mrg   {
    736  1.1  mrg     version_lock_lock_exclusive (&(t->root_lock));
    737  1.1  mrg     iter = t->root;
    738  1.1  mrg     if (iter)
    739  1.1  mrg       {
    740  1.1  mrg 	btree_node_lock_exclusive (iter);
    741  1.1  mrg       }
    742  1.1  mrg     else
    743  1.1  mrg       {
    744  1.1  mrg 	t->root = iter = btree_allocate_node (t, false);
    745  1.1  mrg       }
    746  1.1  mrg     version_lock_unlock_exclusive (&(t->root_lock));
    747  1.1  mrg   }
    748  1.1  mrg 
    749  1.1  mrg   // Walk down the btree with classic lock coupling and eager splits.
    750  1.1  mrg   // Strictly speaking this is not performance optimal, we could use
    751  1.1  mrg   // optimistic lock coupling until we hit a node that has to be modified.
    752  1.1  mrg   // But that is more difficult to implement and frame registration is
    753  1.1  mrg   // rare anyway, we use simple locking for now.
    754  1.1  mrg 
    755  1.1  mrg   uintptr_type fence = max_separator;
    756  1.1  mrg   while (btree_node_is_inner (iter))
    757  1.1  mrg     {
    758  1.1  mrg       // Use eager splits to avoid lock coupling up.
    759  1.1  mrg       if (iter->entry_count == max_fanout_inner)
    760  1.1  mrg 	btree_split_inner (t, &iter, &parent, base, size);
    761  1.1  mrg 
    762  1.1  mrg       unsigned slot = btree_node_find_inner_slot (iter, base);
    763  1.1  mrg       if (parent)
    764  1.1  mrg 	btree_node_unlock_exclusive (parent);
    765  1.1  mrg       parent = iter;
    766  1.1  mrg       fence = iter->content.children[slot].separator;
    767  1.1  mrg       if (fence < base + size - 1)
    768  1.1  mrg 	// The separator was set to the base - 1 of the leftmost leaf child
    769  1.1  mrg 	// at some point but such an entry could have been removed afterwards.
    770  1.1  mrg 	// As both insertion and removal are just walking down the tree with
    771  1.1  mrg 	// only a few current nodes locked at a time, updating the separator
    772  1.1  mrg 	// on removal is not possible, especially because btree_remove does
    773  1.1  mrg 	// not know the size until it reaches leaf node.  We must ensure that
    774  1.1  mrg 	// the separator is not in a middle of some entry though, as
    775  1.1  mrg 	// btree_lookup can look up any address in the entry's range and if
    776  1.1  mrg 	// the separator is in the middle, addresses below it or equal to it
    777  1.1  mrg 	// would be found while addresses above it would result in failed
    778  1.1  mrg 	// lookup.  Update the separator now.  Assumption that users
    779  1.1  mrg 	// ensure no overlapping registered ranges, there should be no
    780  1.1  mrg 	// current entry for any address in the range.  See PR119151.
    781  1.1  mrg 	fence = iter->content.children[slot].separator = base + size - 1;
    782  1.1  mrg       iter = iter->content.children[slot].child;
    783  1.1  mrg       btree_node_lock_exclusive (iter);
    784  1.1  mrg     }
    785  1.1  mrg 
    786  1.1  mrg   // Make sure we have space.
    787  1.1  mrg   if (iter->entry_count == max_fanout_leaf)
    788  1.1  mrg     btree_split_leaf (t, &iter, &parent, fence, base);
    789  1.1  mrg   if (parent)
    790  1.1  mrg     btree_node_unlock_exclusive (parent);
    791  1.1  mrg 
    792  1.1  mrg   // Insert in node.
    793  1.1  mrg   unsigned slot = btree_node_find_leaf_slot (iter, base);
    794  1.1  mrg   if ((slot < iter->entry_count) && (iter->content.entries[slot].base == base))
    795  1.1  mrg     {
    796  1.1  mrg       // Duplicate entry, this should never happen.
    797  1.1  mrg       btree_node_unlock_exclusive (iter);
    798  1.1  mrg       return false;
    799  1.1  mrg     }
    800  1.1  mrg   for (unsigned index = iter->entry_count; index > slot; --index)
    801  1.1  mrg     iter->content.entries[index] = iter->content.entries[index - 1];
    802  1.1  mrg   struct leaf_entry *e = &(iter->content.entries[slot]);
    803  1.1  mrg   e->base = base;
    804  1.1  mrg   e->size = size;
    805  1.1  mrg   e->ob = ob;
    806  1.1  mrg   iter->entry_count++;
    807  1.1  mrg   btree_node_unlock_exclusive (iter);
    808  1.1  mrg   return true;
    809  1.1  mrg }
    810  1.1  mrg 
    811  1.1  mrg // Remove an entry.
    812  1.1  mrg static struct object *
    813  1.1  mrg btree_remove (struct btree *t, uintptr_type base)
    814  1.1  mrg {
    815  1.1  mrg   // Access the root.
    816  1.1  mrg   version_lock_lock_exclusive (&(t->root_lock));
    817  1.1  mrg   struct btree_node *iter = t->root;
    818  1.1  mrg   if (iter)
    819  1.1  mrg     btree_node_lock_exclusive (iter);
    820  1.1  mrg   version_lock_unlock_exclusive (&(t->root_lock));
    821  1.1  mrg   if (!iter)
    822  1.1  mrg     return NULL;
    823  1.1  mrg 
    824  1.1  mrg   // Same strategy as with insert, walk down with lock coupling and
    825  1.1  mrg   // merge eagerly.
    826  1.1  mrg   while (btree_node_is_inner (iter))
    827  1.1  mrg     {
    828  1.1  mrg       unsigned slot = btree_node_find_inner_slot (iter, base);
    829  1.1  mrg       struct btree_node *next = iter->content.children[slot].child;
    830  1.1  mrg       btree_node_lock_exclusive (next);
    831  1.1  mrg       if (btree_node_needs_merge (next))
    832  1.1  mrg 	{
    833  1.1  mrg 	  // Use eager merges to avoid lock coupling up.
    834  1.1  mrg 	  iter = btree_merge_node (t, slot, iter, base);
    835  1.1  mrg 	}
    836  1.1  mrg       else
    837  1.1  mrg 	{
    838  1.1  mrg 	  btree_node_unlock_exclusive (iter);
    839  1.1  mrg 	  iter = next;
    840  1.1  mrg 	}
    841  1.1  mrg     }
    842  1.1  mrg 
    843  1.1  mrg   // Remove existing entry.
    844  1.1  mrg   unsigned slot = btree_node_find_leaf_slot (iter, base);
    845  1.1  mrg   if ((slot >= iter->entry_count) || (iter->content.entries[slot].base != base))
    846  1.1  mrg     {
    847  1.1  mrg       // Not found, this should never happen.
    848  1.1  mrg       btree_node_unlock_exclusive (iter);
    849  1.1  mrg       return NULL;
    850  1.1  mrg     }
    851  1.1  mrg   struct object *ob = iter->content.entries[slot].ob;
    852  1.1  mrg   for (unsigned index = slot; index + 1 < iter->entry_count; ++index)
    853  1.1  mrg     iter->content.entries[index] = iter->content.entries[index + 1];
    854  1.1  mrg   iter->entry_count--;
    855  1.1  mrg   btree_node_unlock_exclusive (iter);
    856  1.1  mrg   return ob;
    857  1.1  mrg }
    858  1.1  mrg 
    859  1.1  mrg // Find the corresponding entry for the given address.
    860  1.1  mrg static struct object *
    861  1.1  mrg btree_lookup (const struct btree *t, uintptr_type target_addr)
    862  1.1  mrg {
    863  1.1  mrg   // Within this function many loads are relaxed atomic loads.
    864  1.1  mrg   // Use a macro to keep the code reasonable.
    865  1.1  mrg #define RLOAD(x) __atomic_load_n (&(x), __ATOMIC_RELAXED)
    866  1.1  mrg 
    867  1.1  mrg   // For targets where unwind info is usually not registered through these
    868  1.1  mrg   // APIs anymore, avoid any sequential consistent atomics.
    869  1.1  mrg   // Use relaxed MO here, it is up to the app to ensure that the library
    870  1.1  mrg   // loading/initialization happens-before using that library in other
    871  1.1  mrg   // threads (in particular unwinding with that library's functions
    872  1.1  mrg   // appearing in the backtraces).  Calling that library's functions
    873  1.1  mrg   // without waiting for the library to initialize would be racy.
    874  1.1  mrg   if (__builtin_expect (!RLOAD (t->root), 1))
    875  1.1  mrg     return NULL;
    876  1.1  mrg 
    877  1.1  mrg   // The unwinding tables are mostly static, they only change when
    878  1.1  mrg   // frames are added or removed. This makes it extremely unlikely that they
    879  1.1  mrg   // change during a given unwinding sequence. Thus, we optimize for the
    880  1.1  mrg   // contention free case and use optimistic lock coupling. This does not
    881  1.1  mrg   // require any writes to shared state, instead we validate every read. It is
    882  1.1  mrg   // important that we do not trust any value that we have read until we call
    883  1.1  mrg   // validate again. Data can change at arbitrary points in time, thus we always
    884  1.1  mrg   // copy something into a local variable and validate again before acting on
    885  1.1  mrg   // the read. In the unlikely event that we encounter a concurrent change we
    886  1.1  mrg   // simply restart and try again.
    887  1.1  mrg 
    888  1.1  mrg restart:
    889  1.1  mrg   struct btree_node *iter;
    890  1.1  mrg   uintptr_type lock;
    891  1.1  mrg   {
    892  1.1  mrg     // Accessing the root node requires defending against concurrent pointer
    893  1.1  mrg     // changes Thus we couple rootLock -> lock on root node -> validate rootLock
    894  1.1  mrg     if (!version_lock_lock_optimistic (&(t->root_lock), &lock))
    895  1.1  mrg       goto restart;
    896  1.1  mrg     iter = RLOAD (t->root);
    897  1.1  mrg     if (!version_lock_validate (&(t->root_lock), lock))
    898  1.1  mrg       goto restart;
    899  1.1  mrg     if (!iter)
    900  1.1  mrg       return NULL;
    901  1.1  mrg     uintptr_type child_lock;
    902  1.1  mrg     if ((!btree_node_lock_optimistic (iter, &child_lock))
    903  1.1  mrg 	|| (!version_lock_validate (&(t->root_lock), lock)))
    904  1.1  mrg       goto restart;
    905  1.1  mrg     lock = child_lock;
    906  1.1  mrg   }
    907  1.1  mrg 
    908  1.1  mrg   // Now we can walk down towards the right leaf node.
    909  1.1  mrg   while (true)
    910  1.1  mrg     {
    911  1.1  mrg       enum node_type type = RLOAD (iter->type);
    912  1.1  mrg       unsigned entry_count = RLOAD (iter->entry_count);
    913  1.1  mrg       if (!btree_node_validate (iter, lock))
    914  1.1  mrg 	goto restart;
    915  1.1  mrg       if (!entry_count)
    916  1.1  mrg 	return NULL;
    917  1.1  mrg 
    918  1.1  mrg       if (type == btree_node_inner)
    919  1.1  mrg 	{
    920  1.1  mrg 	  // We cannot call find_inner_slot here because we need (relaxed)
    921  1.1  mrg 	  // atomic reads here.
    922  1.1  mrg 	  unsigned slot = 0;
    923  1.1  mrg 	  while (
    924  1.1  mrg 	    ((slot + 1) < entry_count)
    925  1.1  mrg 	    && (RLOAD (iter->content.children[slot].separator) < target_addr))
    926  1.1  mrg 	    ++slot;
    927  1.1  mrg 	  struct btree_node *child = RLOAD (iter->content.children[slot].child);
    928  1.1  mrg 	  if (!btree_node_validate (iter, lock))
    929  1.1  mrg 	    goto restart;
    930  1.1  mrg 
    931  1.1  mrg 	  // The node content can change at any point in time, thus we must
    932  1.1  mrg 	  // interleave parent and child checks.
    933  1.1  mrg 	  uintptr_type child_lock;
    934  1.1  mrg 	  if (!btree_node_lock_optimistic (child, &child_lock))
    935  1.1  mrg 	    goto restart;
    936  1.1  mrg 	  if (!btree_node_validate (iter, lock))
    937  1.1  mrg 	    goto restart; // make sure we still point to the correct node after
    938  1.1  mrg 			  // acquiring the optimistic lock.
    939  1.1  mrg 
    940  1.1  mrg 	  // Go down
    941  1.1  mrg 	  iter = child;
    942  1.1  mrg 	  lock = child_lock;
    943  1.1  mrg 	}
    944  1.1  mrg       else
    945  1.1  mrg 	{
    946  1.1  mrg 	  // We cannot call find_leaf_slot here because we need (relaxed)
    947  1.1  mrg 	  // atomic reads here.
    948  1.1  mrg 	  unsigned slot = 0;
    949  1.1  mrg 	  while (((slot + 1) < entry_count)
    950  1.1  mrg 		 && (RLOAD (iter->content.entries[slot].base)
    951  1.1  mrg 		       + RLOAD (iter->content.entries[slot].size)
    952  1.1  mrg 		     <= target_addr))
    953  1.1  mrg 	    ++slot;
    954  1.1  mrg 	  struct leaf_entry entry;
    955  1.1  mrg 	  entry.base = RLOAD (iter->content.entries[slot].base);
    956  1.1  mrg 	  entry.size = RLOAD (iter->content.entries[slot].size);
    957  1.1  mrg 	  entry.ob = RLOAD (iter->content.entries[slot].ob);
    958  1.1  mrg 	  if (!btree_node_validate (iter, lock))
    959  1.1  mrg 	    goto restart;
    960  1.1  mrg 
    961  1.1  mrg 	  // Check if we have a hit.
    962  1.1  mrg 	  if ((entry.base <= target_addr)
    963  1.1  mrg 	      && (target_addr < entry.base + entry.size))
    964  1.1  mrg 	    {
    965  1.1  mrg 	      return entry.ob;
    966  1.1  mrg 	    }
    967  1.1  mrg 	  return NULL;
    968  1.1  mrg 	}
    969  1.1  mrg     }
    970  1.1  mrg #undef RLOAD
    971  1.1  mrg }
    972  1.1  mrg 
    973  1.1  mrg #endif /* unwind-dw2-btree.h */
    974