Home | History | Annotate | Line # | Download | only in lloadd
      1 /*	$NetBSD: epoch.c,v 1.3 2025/09/05 21:16:24 christos Exp $	*/
      2 
      3 /* epoch.c - epoch based memory reclamation */
      4 /* $OpenLDAP$ */
      5 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
      6  *
      7  * Copyright 2018-2024 The OpenLDAP Foundation.
      8  * All rights reserved.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted only as authorized by the OpenLDAP
     12  * Public License.
     13  *
     14  * A copy of this license is available in the file LICENSE in the
     15  * top-level directory of the distribution or, alternatively, at
     16  * <http://www.OpenLDAP.org/license.html>.
     17  */
     18 
     19 /** @file epoch.c
     20  *
     21  * Implementation of epoch based memory reclamation, in principle
     22  * similar to the algorithm presented in
     23  * https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-579.pdf
     24  *
     25  * Not completely lock-free at the moment.
     26  *
     27  * Also the problems with epoch based memory reclamation are still
     28  * present - a thread actively observing an epoch getting stuck will
     29  * prevent managed objects (in our case connections and operations)
     30  * from being freed, potentially running out of memory.
     31  */
     32 
     33 #include <sys/cdefs.h>
     34 __RCSID("$NetBSD: epoch.c,v 1.3 2025/09/05 21:16:24 christos Exp $");
     35 
     36 #include "portable.h"
     37 
     38 #include "lload.h"
     39 #include <epoch.h>
     40 
     41 /* Has to be >= 3 */
     42 #define EPOCH_MASK ( 1 << 2 )
     43 #define EPOCH_PREV(epoch) ( ( (epoch) + EPOCH_MASK - 1 ) % EPOCH_MASK )
     44 #define EPOCH_NEXT(epoch) ( ( (epoch) + 1 ) % EPOCH_MASK )
     45 
     46 struct pending_ref {
     47     void *object;
     48     dispose_cb *dispose;
     49     struct pending_ref *next;
     50 };
     51 
     52 ldap_pvt_thread_rdwr_t epoch_mutex;
     53 
     54 static epoch_t current_epoch;
     55 static uintptr_t epoch_threads[EPOCH_MASK];
     56 static struct pending_ref *references[EPOCH_MASK];
     57 
     58 void
     59 epoch_init( void )
     60 {
     61     epoch_t epoch;
     62 
     63     current_epoch = 0;
     64     for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
     65         assert( !epoch_threads[epoch] );
     66         assert( !references[epoch] );
     67     }
     68 
     69     ldap_pvt_thread_rdwr_init( &epoch_mutex );
     70 }
     71 
     72 void
     73 epoch_shutdown( void )
     74 {
     75     epoch_t epoch;
     76     struct pending_ref *old, *next;
     77 
     78     for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
     79         assert( !epoch_threads[epoch] );
     80     }
     81 
     82     /*
     83      * Even with the work in epoch_leave(), shutdown code doesn't currently
     84      * observe any epoch, so there might still be references left to free.
     85      */
     86     epoch = EPOCH_PREV(current_epoch);
     87     next = references[epoch];
     88     references[epoch] = NULL;
     89     for ( old = next; old; old = next ) {
     90         next = old->next;
     91 
     92         old->dispose( old->object );
     93         ch_free( old );
     94     }
     95 
     96     epoch = current_epoch;
     97     next = references[epoch];
     98     references[epoch] = NULL;
     99     for ( old = next; old; old = next ) {
    100         next = old->next;
    101 
    102         old->dispose( old->object );
    103         ch_free( old );
    104     }
    105 
    106     /* No references should exist anywhere now */
    107     for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) {
    108         assert( !references[epoch] );
    109     }
    110 
    111     ldap_pvt_thread_rdwr_destroy( &epoch_mutex );
    112 }
    113 
    114 epoch_t
    115 epoch_join( void )
    116 {
    117     epoch_t epoch;
    118     struct pending_ref *old, *ref = NULL;
    119 
    120 retry:
    121     /* TODO: make this completely lock-free */
    122     ldap_pvt_thread_rdwr_rlock( &epoch_mutex );
    123     epoch = current_epoch;
    124     __atomic_add_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL );
    125     ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
    126 
    127     if ( __atomic_load_n(
    128                  &epoch_threads[EPOCH_PREV(epoch)], __ATOMIC_ACQUIRE ) ) {
    129         return epoch;
    130     }
    131 
    132     __atomic_exchange(
    133             &references[EPOCH_PREV(epoch)], &ref, &ref, __ATOMIC_ACQ_REL );
    134 
    135     Debug( LDAP_DEBUG_TRACE, "epoch_join: "
    136             "advancing epoch to %zu with %s objects to free\n",
    137             EPOCH_NEXT(epoch), ref ? "some" : "no" );
    138 
    139     ldap_pvt_thread_rdwr_wlock( &epoch_mutex );
    140     current_epoch = EPOCH_NEXT(epoch);
    141     ldap_pvt_thread_rdwr_wunlock( &epoch_mutex );
    142 
    143     if ( !ref ) {
    144         return epoch;
    145     }
    146 
    147     /*
    148      * The below is now safe to free outside epochs and we don't want to make
    149      * the current epoch last any longer than necessary.
    150      *
    151      * Looks like there might be fairness issues in massively parallel
    152      * environments but they haven't been observed on 32-core machines.
    153      */
    154     epoch_leave( epoch );
    155 
    156     for ( old = ref; old; old = ref ) {
    157         ref = old->next;
    158 
    159         old->dispose( old->object );
    160         ch_free( old );
    161     }
    162 
    163     goto retry;
    164 }
    165 
    166 void
    167 epoch_leave( epoch_t epoch )
    168 {
    169     struct pending_ref *p, *next, *old_refs = NULL, *current_refs = NULL;
    170 
    171     /* Are there other threads observing our epoch? */
    172     if ( __atomic_sub_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL ) ) {
    173         return;
    174     }
    175 
    176     /*
    177      * Optimisation for the case when we're mostly idle. Otherwise we won't
    178      * release resources until another thread comes by and joins the epoch
    179      * (twice), and there's no idea how soon (or late) that is going to happen.
    180      *
    181      * NB. There is no limit to the number of threads executing the following
    182      * code in parallel.
    183      */
    184     ldap_pvt_thread_rdwr_rlock( &epoch_mutex );
    185     /*
    186      * Anything could happen between the subtract and the lock being acquired
    187      * above, so check again. But once we hold this lock (and confirm no more
    188      * threads still observe either prospective epoch), noone will be able to
    189      * finish epoch_join until we've released epoch_mutex since we *first* make
    190      * sure it holds that:
    191      *
    192      * epoch_threads[EPOCH_PREV(current_epoch)] == 0
    193      *
    194      * and that leads epoch_join() to acquire a write lock on &epoch_mutex.
    195      */
    196     if ( epoch != current_epoch && epoch != EPOCH_PREV(current_epoch) ) {
    197         /* Epoch counter has run away from us, no need to do anything */
    198         ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
    199         return;
    200     }
    201     if ( __atomic_load_n(
    202                 &epoch_threads[EPOCH_PREV(current_epoch)],
    203                 __ATOMIC_ACQUIRE ) ) {
    204         /* There is another thread still running */
    205         ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
    206         return;
    207     }
    208     if ( __atomic_load_n( &epoch_threads[current_epoch], __ATOMIC_ACQUIRE ) ) {
    209         /* There is another thread still running */
    210         ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
    211         return;
    212     }
    213 
    214     /*
    215      * We're all alone (apart from anyone who reached epoch_leave() at the same
    216      * time), it's safe to claim all references and free them.
    217      */
    218     __atomic_exchange(
    219             &references[EPOCH_PREV(current_epoch)], &old_refs, &old_refs,
    220             __ATOMIC_ACQ_REL );
    221     __atomic_exchange(
    222             &references[current_epoch], &current_refs, &current_refs,
    223             __ATOMIC_ACQ_REL );
    224     ldap_pvt_thread_rdwr_runlock( &epoch_mutex );
    225 
    226     for ( p = old_refs; p; p = next ) {
    227         next = p->next;
    228 
    229         p->dispose( p->object );
    230         ch_free( p );
    231     }
    232 
    233     for ( p = current_refs; p; p = next ) {
    234         next = p->next;
    235 
    236         p->dispose( p->object );
    237         ch_free( p );
    238     }
    239 }
    240 
    241 /*
    242  * Add the object to the "current global epoch", not the epoch our thread
    243  * entered.
    244  */
    245 void
    246 epoch_append( void *ptr, dispose_cb *cb )
    247 {
    248     struct pending_ref *new;
    249     epoch_t epoch = __atomic_load_n( &current_epoch, __ATOMIC_ACQUIRE );
    250 
    251     /*
    252      * BTW, the following is not appropriate here:
    253      * assert( __atomic_load_n( &epoch_threads[epoch], __ATOMIC_RELAXED ) );
    254      *
    255      * We might be a thread lagging behind in the "previous epoch" with no
    256      * other threads executing at all.
    257      */
    258 
    259     new = ch_malloc( sizeof(struct pending_ref) );
    260     new->object = ptr;
    261     new->dispose = cb;
    262     new->next = __atomic_load_n( &references[epoch], __ATOMIC_ACQUIRE );
    263 
    264     while ( !__atomic_compare_exchange( &references[epoch], &new->next, &new, 0,
    265             __ATOMIC_RELEASE, __ATOMIC_RELAXED ) )
    266         /* iterate until we succeed */;
    267 }
    268 
    269 int
    270 acquire_ref( uintptr_t *refp )
    271 {
    272     uintptr_t refcnt, new_refcnt;
    273 
    274     refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE );
    275 
    276     /*
    277      * If we just incremented the refcnt and checked for zero after, another
    278      * thread might falsely believe the object was going to stick around.
    279      *
    280      * Checking whether the object is still dead at disposal time might not be
    281      * able to distinguish it from being freed in a later epoch.
    282      */
    283     do {
    284         if ( !refcnt ) {
    285             return refcnt;
    286         }
    287 
    288         new_refcnt = refcnt + 1;
    289     } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0,
    290             __ATOMIC_RELEASE, __ATOMIC_RELAXED ) );
    291     assert( new_refcnt == refcnt + 1 );
    292 
    293     return refcnt;
    294 }
    295 
    296 int
    297 try_release_ref(
    298         uintptr_t *refp,
    299         void *object,
    300         dispose_cb *unlink_cb,
    301         dispose_cb *destroy_cb )
    302 {
    303     uintptr_t refcnt, new_refcnt;
    304 
    305     refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE );
    306 
    307     /* We promise the caller that we won't decrease refcnt below 0 */
    308     do {
    309         if ( !refcnt ) {
    310             return refcnt;
    311         }
    312 
    313         new_refcnt = refcnt - 1;
    314     } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0,
    315             __ATOMIC_RELEASE, __ATOMIC_RELAXED ) );
    316     assert( new_refcnt == refcnt - 1 );
    317 
    318     if ( !new_refcnt ) {
    319         if ( unlink_cb ) {
    320             unlink_cb( object );
    321         }
    322         epoch_append( object, destroy_cb );
    323     }
    324 
    325     return refcnt;
    326 }
    327