Home | History | Annotate | Line # | Download | only in linux
      1 /*	$NetBSD: linux_rcu.c,v 1.7 2021/12/19 12:40:11 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2018 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 __KERNEL_RCSID(0, "$NetBSD: linux_rcu.c,v 1.7 2021/12/19 12:40:11 riastradh Exp $");
     34 
     35 #include <sys/param.h>
     36 #include <sys/types.h>
     37 
     38 #include <sys/condvar.h>
     39 #include <sys/cpu.h>
     40 #include <sys/kthread.h>
     41 #include <sys/lockdebug.h>
     42 #include <sys/mutex.h>
     43 #include <sys/sdt.h>
     44 #include <sys/xcall.h>
     45 
     46 #include <linux/rcupdate.h>
     47 #include <linux/slab.h>
     48 
     49 SDT_PROBE_DEFINE0(sdt, linux, rcu, synchronize__start);
     50 SDT_PROBE_DEFINE1(sdt, linux, rcu, synchronize__cpu, "unsigned"/*cpu*/);
     51 SDT_PROBE_DEFINE0(sdt, linux, rcu, synchronize__done);
     52 SDT_PROBE_DEFINE0(sdt, linux, rcu, barrier__start);
     53 SDT_PROBE_DEFINE0(sdt, linux, rcu, barrier__done);
     54 SDT_PROBE_DEFINE2(sdt, linux, rcu, call__queue,
     55     "struct rcu_head *"/*head*/, "void (*)(struct rcu_head *)"/*callback*/);
     56 SDT_PROBE_DEFINE2(sdt, linux, rcu, call__run,
     57     "struct rcu_head *"/*head*/, "void (*)(struct rcu_head *)"/*callback*/);
     58 SDT_PROBE_DEFINE2(sdt, linux, rcu, call__done,
     59     "struct rcu_head *"/*head*/, "void (*)(struct rcu_head *)"/*callback*/);
     60 SDT_PROBE_DEFINE2(sdt, linux, rcu, kfree__queue,
     61     "struct rcu_head *"/*head*/, "void *"/*obj*/);
     62 SDT_PROBE_DEFINE2(sdt, linux, rcu, kfree__free,
     63     "struct rcu_head *"/*head*/, "void *"/*obj*/);
     64 SDT_PROBE_DEFINE2(sdt, linux, rcu, kfree__done,
     65     "struct rcu_head *"/*head*/, "void *"/*obj*/);
     66 
     67 static struct {
     68 	kmutex_t	lock;
     69 	kcondvar_t	cv;
     70 	struct rcu_head	*first_callback;
     71 	struct rcu_head	*first_kfree;
     72 	struct lwp	*lwp;
     73 	uint64_t	gen;
     74 	bool		running;
     75 	bool		dying;
     76 } gc __cacheline_aligned;
     77 
     78 static void
     79 synchronize_rcu_xc(void *a, void *b)
     80 {
     81 
     82 	SDT_PROBE1(sdt, linux, rcu, synchronize__cpu,  cpu_index(curcpu()));
     83 }
     84 
     85 /*
     86  * synchronize_rcu()
     87  *
     88  *	Wait for any pending RCU read section on every CPU to complete
     89  *	by triggering on every CPU activity that is blocked by an RCU
     90  *	read section.
     91  *
     92  *	May sleep.  (Practically guaranteed to sleep!)
     93  */
     94 void
     95 synchronize_rcu(void)
     96 {
     97 
     98 	SDT_PROBE0(sdt, linux, rcu, synchronize__start);
     99 	xc_wait(xc_broadcast(0, &synchronize_rcu_xc, NULL, NULL));
    100 	SDT_PROBE0(sdt, linux, rcu, synchronize__done);
    101 }
    102 
    103 /*
    104  * synchronize_rcu_expedited()
    105  *
    106  *	Wait for any pending RCU read section on every CPU to complete
    107  *	by triggering on every CPU activity that is blocked by an RCU
    108  *	read section.  Try to get an answer faster than
    109  *	synchronize_rcu, at the cost of more activity triggered on
    110  *	other CPUs.
    111  *
    112  *	May sleep.  (Practically guaranteed to sleep!)
    113  */
    114 void
    115 synchronize_rcu_expedited(void)
    116 {
    117 
    118 	synchronize_rcu();
    119 }
    120 
    121 /*
    122  * cookie = get_state_synchronize_rcu(), cond_synchronize_rcu(cookie)
    123  *
    124  *	Optimization for synchronize_rcu -- skip if it has already
    125  *	happened between get_state_synchronize_rcu and
    126  *	cond_synchronize_rcu.  get_state_synchronize_rcu implies a full
    127  *	SMP memory barrier (membar_sync).
    128  */
    129 unsigned long
    130 get_state_synchronize_rcu(void)
    131 {
    132 
    133 	membar_sync();
    134 	return 0;
    135 }
    136 
    137 void
    138 cond_synchronize_rcu(unsigned long cookie)
    139 {
    140 
    141 	synchronize_rcu();
    142 }
    143 
    144 /*
    145  * rcu_barrier()
    146  *
    147  *	Wait for all pending RCU callbacks to complete.
    148  *
    149  *	Does not imply, and is not implied by, synchronize_rcu.
    150  */
    151 void
    152 rcu_barrier(void)
    153 {
    154 	uint64_t gen;
    155 
    156 	/*
    157 	 * If the GC isn't running anything yet, then all callbacks of
    158 	 * interest are queued, and it suffices to wait for the GC to
    159 	 * advance one generation number.
    160 	 *
    161 	 * If the GC is already running, however, and there are any
    162 	 * callbacks of interest queued but not in the GC's current
    163 	 * batch of work, then when the advances the generation number
    164 	 * it will not have completed the queued callbacks.  So we have
    165 	 * to wait for one more generation -- or until the GC has
    166 	 * stopped running because there's no work left.
    167 	 */
    168 
    169 	SDT_PROBE0(sdt, linux, rcu, barrier__start);
    170 	mutex_enter(&gc.lock);
    171 	gen = gc.gen;
    172 	if (gc.running)
    173 		gen++;
    174 	while (gc.running || gc.first_callback || gc.first_kfree) {
    175 		cv_wait(&gc.cv, &gc.lock);
    176 		if (gc.gen > gen)
    177 			break;
    178 	}
    179 	mutex_exit(&gc.lock);
    180 	SDT_PROBE0(sdt, linux, rcu, barrier__done);
    181 }
    182 
    183 /*
    184  * call_rcu(head, callback)
    185  *
    186  *	Arrange to call callback(head) after any pending RCU read
    187  *	sections on every CPU is complete.  Return immediately.
    188  */
    189 void
    190 call_rcu(struct rcu_head *head, void (*callback)(struct rcu_head *))
    191 {
    192 
    193 	head->rcuh_u.callback = callback;
    194 
    195 	mutex_enter(&gc.lock);
    196 	head->rcuh_next = gc.first_callback;
    197 	gc.first_callback = head;
    198 	cv_broadcast(&gc.cv);
    199 	SDT_PROBE2(sdt, linux, rcu, call__queue,  head, callback);
    200 	mutex_exit(&gc.lock);
    201 }
    202 
    203 /*
    204  * _kfree_rcu(head, obj)
    205  *
    206  *	kfree_rcu helper: schedule kfree(obj) using head for storage.
    207  */
    208 void
    209 _kfree_rcu(struct rcu_head *head, void *obj)
    210 {
    211 
    212 	LOCKDEBUG_MEM_CHECK(obj, ((struct linux_malloc *)obj - 1)->lm_size);
    213 
    214 	head->rcuh_u.obj = obj;
    215 
    216 	mutex_enter(&gc.lock);
    217 	head->rcuh_next = gc.first_kfree;
    218 	gc.first_kfree = head;
    219 	cv_broadcast(&gc.cv);
    220 	SDT_PROBE2(sdt, linux, rcu, kfree__queue,  head, obj);
    221 	mutex_exit(&gc.lock);
    222 }
    223 
    224 static void
    225 gc_thread(void *cookie)
    226 {
    227 	struct rcu_head *head_callback, *head_kfree, *head, *next;
    228 
    229 	mutex_enter(&gc.lock);
    230 	for (;;) {
    231 		/* Start with no work.  */
    232 		bool work = false;
    233 
    234 		/* Grab the list of callbacks.  */
    235 		if ((head_callback = gc.first_callback) != NULL) {
    236 			gc.first_callback = NULL;
    237 			work = true;
    238 		}
    239 
    240 		/* Grab the list of objects to kfree.  */
    241 		if ((head_kfree = gc.first_kfree) != NULL) {
    242 			gc.first_kfree = NULL;
    243 			work = true;
    244 		}
    245 
    246 		/*
    247 		 * If no work, then either stop, if we're dying, or
    248 		 * wait for work, if not.
    249 		 */
    250 		if (!work) {
    251 			if (gc.dying)
    252 				break;
    253 			cv_wait(&gc.cv, &gc.lock);
    254 			continue;
    255 		}
    256 
    257 		/*
    258 		 * We have work to do.  Drop the lock to do it, and
    259 		 * notify rcu_barrier that we're still doing it.
    260 		 */
    261 		gc.running = true;
    262 		mutex_exit(&gc.lock);
    263 
    264 		/* Wait for activity on all CPUs.  */
    265 		synchronize_rcu();
    266 
    267 		/* Call the callbacks.  */
    268 		for (head = head_callback; head != NULL; head = next) {
    269 			void (*callback)(struct rcu_head *) =
    270 			    head->rcuh_u.callback;
    271 			next = head->rcuh_next;
    272 			SDT_PROBE2(sdt, linux, rcu, call__run,
    273 			    head, callback);
    274 			(*callback)(head);
    275 			/*
    276 			 * Can't dereference head or invoke
    277 			 * callback after this point.
    278 			 */
    279 			SDT_PROBE2(sdt, linux, rcu, call__done,
    280 			    head, callback);
    281 		}
    282 
    283 		/* Free the objects to kfree.  */
    284 		for (head = head_kfree; head != NULL; head = next) {
    285 			void *obj = head->rcuh_u.obj;
    286 			next = head->rcuh_next;
    287 			SDT_PROBE2(sdt, linux, rcu, kfree__free,  head, obj);
    288 			kfree(obj);
    289 			/* Can't dereference head or obj after this point.  */
    290 			SDT_PROBE2(sdt, linux, rcu, kfree__done,  head, obj);
    291 		}
    292 
    293 		/* Return to the lock.  */
    294 		mutex_enter(&gc.lock);
    295 
    296 		/* Finished a batch of work.  Notify rcu_barrier.  */
    297 		gc.gen++;
    298 		gc.running = false;
    299 		cv_broadcast(&gc.cv);
    300 
    301 		/*
    302 		 * Limit ourselves to one batch per tick, in an attempt
    303 		 * to make the batches larger.
    304 		 *
    305 		 * XXX We should maybe also limit the size of each
    306 		 * batch.
    307 		 */
    308 		(void)kpause("lxrcubat", /*intr*/false, /*timo*/1, &gc.lock);
    309 	}
    310 	KASSERT(gc.first_callback == NULL);
    311 	KASSERT(gc.first_kfree == NULL);
    312 	mutex_exit(&gc.lock);
    313 
    314 	kthread_exit(0);
    315 }
    316 
    317 void
    318 init_rcu_head(struct rcu_head *head)
    319 {
    320 }
    321 
    322 void
    323 destroy_rcu_head(struct rcu_head *head)
    324 {
    325 }
    326 
    327 int
    328 linux_rcu_gc_init(void)
    329 {
    330 	int error;
    331 
    332 	mutex_init(&gc.lock, MUTEX_DEFAULT, IPL_VM);
    333 	cv_init(&gc.cv, "lnxrcugc");
    334 	gc.first_callback = NULL;
    335 	gc.first_kfree = NULL;
    336 	gc.gen = 0;
    337 	gc.dying = false;
    338 
    339 	error = kthread_create(PRI_NONE,
    340 	    KTHREAD_MPSAFE|KTHREAD_TS|KTHREAD_MUSTJOIN, NULL, &gc_thread, NULL,
    341 	    &gc.lwp, "lnxrcugc");
    342 	if (error)
    343 		goto fail;
    344 
    345 	/* Success!  */
    346 	return 0;
    347 
    348 fail:	cv_destroy(&gc.cv);
    349 	mutex_destroy(&gc.lock);
    350 	return error;
    351 }
    352 
    353 void
    354 linux_rcu_gc_fini(void)
    355 {
    356 
    357 	mutex_enter(&gc.lock);
    358 	gc.dying = true;
    359 	cv_broadcast(&gc.cv);
    360 	mutex_exit(&gc.lock);
    361 
    362 	kthread_join(gc.lwp);
    363 	gc.lwp = NULL;
    364 	KASSERT(gc.first_callback == NULL);
    365 	KASSERT(gc.first_kfree == NULL);
    366 	cv_destroy(&gc.cv);
    367 	mutex_destroy(&gc.lock);
    368 }
    369