1 /* $NetBSD: epoch.c,v 1.3 2025/09/05 21:16:24 christos Exp $ */ 2 3 /* epoch.c - epoch based memory reclamation */ 4 /* $OpenLDAP$ */ 5 /* This work is part of OpenLDAP Software <http://www.openldap.org/>. 6 * 7 * Copyright 2018-2024 The OpenLDAP Foundation. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted only as authorized by the OpenLDAP 12 * Public License. 13 * 14 * A copy of this license is available in the file LICENSE in the 15 * top-level directory of the distribution or, alternatively, at 16 * <http://www.OpenLDAP.org/license.html>. 17 */ 18 19 /** @file epoch.c 20 * 21 * Implementation of epoch based memory reclamation, in principle 22 * similar to the algorithm presented in 23 * https://www.cl.cam.ac.uk/techreports/UCAM-CL-TR-579.pdf 24 * 25 * Not completely lock-free at the moment. 26 * 27 * Also the problems with epoch based memory reclamation are still 28 * present - a thread actively observing an epoch getting stuck will 29 * prevent managed objects (in our case connections and operations) 30 * from being freed, potentially running out of memory. 31 */ 32 33 #include <sys/cdefs.h> 34 __RCSID("$NetBSD: epoch.c,v 1.3 2025/09/05 21:16:24 christos Exp $"); 35 36 #include "portable.h" 37 38 #include "lload.h" 39 #include <epoch.h> 40 41 /* Has to be >= 3 */ 42 #define EPOCH_MASK ( 1 << 2 ) 43 #define EPOCH_PREV(epoch) ( ( (epoch) + EPOCH_MASK - 1 ) % EPOCH_MASK ) 44 #define EPOCH_NEXT(epoch) ( ( (epoch) + 1 ) % EPOCH_MASK ) 45 46 struct pending_ref { 47 void *object; 48 dispose_cb *dispose; 49 struct pending_ref *next; 50 }; 51 52 ldap_pvt_thread_rdwr_t epoch_mutex; 53 54 static epoch_t current_epoch; 55 static uintptr_t epoch_threads[EPOCH_MASK]; 56 static struct pending_ref *references[EPOCH_MASK]; 57 58 void 59 epoch_init( void ) 60 { 61 epoch_t epoch; 62 63 current_epoch = 0; 64 for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) { 65 assert( !epoch_threads[epoch] ); 66 assert( !references[epoch] ); 67 } 68 69 ldap_pvt_thread_rdwr_init( &epoch_mutex ); 70 } 71 72 void 73 epoch_shutdown( void ) 74 { 75 epoch_t epoch; 76 struct pending_ref *old, *next; 77 78 for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) { 79 assert( !epoch_threads[epoch] ); 80 } 81 82 /* 83 * Even with the work in epoch_leave(), shutdown code doesn't currently 84 * observe any epoch, so there might still be references left to free. 85 */ 86 epoch = EPOCH_PREV(current_epoch); 87 next = references[epoch]; 88 references[epoch] = NULL; 89 for ( old = next; old; old = next ) { 90 next = old->next; 91 92 old->dispose( old->object ); 93 ch_free( old ); 94 } 95 96 epoch = current_epoch; 97 next = references[epoch]; 98 references[epoch] = NULL; 99 for ( old = next; old; old = next ) { 100 next = old->next; 101 102 old->dispose( old->object ); 103 ch_free( old ); 104 } 105 106 /* No references should exist anywhere now */ 107 for ( epoch = 0; epoch < EPOCH_MASK; epoch++ ) { 108 assert( !references[epoch] ); 109 } 110 111 ldap_pvt_thread_rdwr_destroy( &epoch_mutex ); 112 } 113 114 epoch_t 115 epoch_join( void ) 116 { 117 epoch_t epoch; 118 struct pending_ref *old, *ref = NULL; 119 120 retry: 121 /* TODO: make this completely lock-free */ 122 ldap_pvt_thread_rdwr_rlock( &epoch_mutex ); 123 epoch = current_epoch; 124 __atomic_add_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL ); 125 ldap_pvt_thread_rdwr_runlock( &epoch_mutex ); 126 127 if ( __atomic_load_n( 128 &epoch_threads[EPOCH_PREV(epoch)], __ATOMIC_ACQUIRE ) ) { 129 return epoch; 130 } 131 132 __atomic_exchange( 133 &references[EPOCH_PREV(epoch)], &ref, &ref, __ATOMIC_ACQ_REL ); 134 135 Debug( LDAP_DEBUG_TRACE, "epoch_join: " 136 "advancing epoch to %zu with %s objects to free\n", 137 EPOCH_NEXT(epoch), ref ? "some" : "no" ); 138 139 ldap_pvt_thread_rdwr_wlock( &epoch_mutex ); 140 current_epoch = EPOCH_NEXT(epoch); 141 ldap_pvt_thread_rdwr_wunlock( &epoch_mutex ); 142 143 if ( !ref ) { 144 return epoch; 145 } 146 147 /* 148 * The below is now safe to free outside epochs and we don't want to make 149 * the current epoch last any longer than necessary. 150 * 151 * Looks like there might be fairness issues in massively parallel 152 * environments but they haven't been observed on 32-core machines. 153 */ 154 epoch_leave( epoch ); 155 156 for ( old = ref; old; old = ref ) { 157 ref = old->next; 158 159 old->dispose( old->object ); 160 ch_free( old ); 161 } 162 163 goto retry; 164 } 165 166 void 167 epoch_leave( epoch_t epoch ) 168 { 169 struct pending_ref *p, *next, *old_refs = NULL, *current_refs = NULL; 170 171 /* Are there other threads observing our epoch? */ 172 if ( __atomic_sub_fetch( &epoch_threads[epoch], 1, __ATOMIC_ACQ_REL ) ) { 173 return; 174 } 175 176 /* 177 * Optimisation for the case when we're mostly idle. Otherwise we won't 178 * release resources until another thread comes by and joins the epoch 179 * (twice), and there's no idea how soon (or late) that is going to happen. 180 * 181 * NB. There is no limit to the number of threads executing the following 182 * code in parallel. 183 */ 184 ldap_pvt_thread_rdwr_rlock( &epoch_mutex ); 185 /* 186 * Anything could happen between the subtract and the lock being acquired 187 * above, so check again. But once we hold this lock (and confirm no more 188 * threads still observe either prospective epoch), noone will be able to 189 * finish epoch_join until we've released epoch_mutex since we *first* make 190 * sure it holds that: 191 * 192 * epoch_threads[EPOCH_PREV(current_epoch)] == 0 193 * 194 * and that leads epoch_join() to acquire a write lock on &epoch_mutex. 195 */ 196 if ( epoch != current_epoch && epoch != EPOCH_PREV(current_epoch) ) { 197 /* Epoch counter has run away from us, no need to do anything */ 198 ldap_pvt_thread_rdwr_runlock( &epoch_mutex ); 199 return; 200 } 201 if ( __atomic_load_n( 202 &epoch_threads[EPOCH_PREV(current_epoch)], 203 __ATOMIC_ACQUIRE ) ) { 204 /* There is another thread still running */ 205 ldap_pvt_thread_rdwr_runlock( &epoch_mutex ); 206 return; 207 } 208 if ( __atomic_load_n( &epoch_threads[current_epoch], __ATOMIC_ACQUIRE ) ) { 209 /* There is another thread still running */ 210 ldap_pvt_thread_rdwr_runlock( &epoch_mutex ); 211 return; 212 } 213 214 /* 215 * We're all alone (apart from anyone who reached epoch_leave() at the same 216 * time), it's safe to claim all references and free them. 217 */ 218 __atomic_exchange( 219 &references[EPOCH_PREV(current_epoch)], &old_refs, &old_refs, 220 __ATOMIC_ACQ_REL ); 221 __atomic_exchange( 222 &references[current_epoch], ¤t_refs, ¤t_refs, 223 __ATOMIC_ACQ_REL ); 224 ldap_pvt_thread_rdwr_runlock( &epoch_mutex ); 225 226 for ( p = old_refs; p; p = next ) { 227 next = p->next; 228 229 p->dispose( p->object ); 230 ch_free( p ); 231 } 232 233 for ( p = current_refs; p; p = next ) { 234 next = p->next; 235 236 p->dispose( p->object ); 237 ch_free( p ); 238 } 239 } 240 241 /* 242 * Add the object to the "current global epoch", not the epoch our thread 243 * entered. 244 */ 245 void 246 epoch_append( void *ptr, dispose_cb *cb ) 247 { 248 struct pending_ref *new; 249 epoch_t epoch = __atomic_load_n( ¤t_epoch, __ATOMIC_ACQUIRE ); 250 251 /* 252 * BTW, the following is not appropriate here: 253 * assert( __atomic_load_n( &epoch_threads[epoch], __ATOMIC_RELAXED ) ); 254 * 255 * We might be a thread lagging behind in the "previous epoch" with no 256 * other threads executing at all. 257 */ 258 259 new = ch_malloc( sizeof(struct pending_ref) ); 260 new->object = ptr; 261 new->dispose = cb; 262 new->next = __atomic_load_n( &references[epoch], __ATOMIC_ACQUIRE ); 263 264 while ( !__atomic_compare_exchange( &references[epoch], &new->next, &new, 0, 265 __ATOMIC_RELEASE, __ATOMIC_RELAXED ) ) 266 /* iterate until we succeed */; 267 } 268 269 int 270 acquire_ref( uintptr_t *refp ) 271 { 272 uintptr_t refcnt, new_refcnt; 273 274 refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE ); 275 276 /* 277 * If we just incremented the refcnt and checked for zero after, another 278 * thread might falsely believe the object was going to stick around. 279 * 280 * Checking whether the object is still dead at disposal time might not be 281 * able to distinguish it from being freed in a later epoch. 282 */ 283 do { 284 if ( !refcnt ) { 285 return refcnt; 286 } 287 288 new_refcnt = refcnt + 1; 289 } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0, 290 __ATOMIC_RELEASE, __ATOMIC_RELAXED ) ); 291 assert( new_refcnt == refcnt + 1 ); 292 293 return refcnt; 294 } 295 296 int 297 try_release_ref( 298 uintptr_t *refp, 299 void *object, 300 dispose_cb *unlink_cb, 301 dispose_cb *destroy_cb ) 302 { 303 uintptr_t refcnt, new_refcnt; 304 305 refcnt = __atomic_load_n( refp, __ATOMIC_ACQUIRE ); 306 307 /* We promise the caller that we won't decrease refcnt below 0 */ 308 do { 309 if ( !refcnt ) { 310 return refcnt; 311 } 312 313 new_refcnt = refcnt - 1; 314 } while ( !__atomic_compare_exchange( refp, &refcnt, &new_refcnt, 0, 315 __ATOMIC_RELEASE, __ATOMIC_RELAXED ) ); 316 assert( new_refcnt == refcnt - 1 ); 317 318 if ( !new_refcnt ) { 319 if ( unlink_cb ) { 320 unlink_cb( object ); 321 } 322 epoch_append( object, destroy_cb ); 323 } 324 325 return refcnt; 326 } 327