Home | History | Annotate | Line # | Download | only in static
      1 // SPDX-FileCopyrightText: 2009 Mathieu Desnoyers <mathieu.desnoyers (at) efficios.com>
      2 // SPDX-FileCopyrightText: 2009 Paul E. McKenney, IBM Corporation.
      3 //
      4 // SPDX-License-Identifier: LGPL-2.1-or-later
      5 
      6 #ifndef _URCU_BP_STATIC_H
      7 #define _URCU_BP_STATIC_H
      8 
      9 /*
     10  * Userspace RCU header.
     11  *
     12  * TO BE INCLUDED ONLY IN CODE THAT IS TO BE RECOMPILED ON EACH LIBURCU
     13  * RELEASE. See urcu.h for linking dynamically with the userspace rcu library.
     14  *
     15  * IBM's contributions to this file may be relicensed under LGPLv2 or later.
     16  */
     17 
     18 #include <stdlib.h>
     19 #include <pthread.h>
     20 #include <unistd.h>
     21 
     22 #include <urcu/annotate.h>
     23 #include <urcu/debug.h>
     24 #include <urcu/config.h>
     25 #include <urcu/compiler.h>
     26 #include <urcu/arch.h>
     27 #include <urcu/system.h>
     28 #include <urcu/uatomic.h>
     29 #include <urcu/list.h>
     30 #include <urcu/tls-compat.h>
     31 
     32 /*
     33  * This code section can only be included in LGPL 2.1 compatible source code.
     34  * See below for the function call wrappers which can be used in code meant to
     35  * be only linked with the Userspace RCU library. This comes with a small
     36  * performance degradation on the read-side due to the added function calls.
     37  * This is required to permit relinking with newer versions of the library.
     38  */
     39 
     40 #ifdef __cplusplus
     41 extern "C" {
     42 #endif
     43 
     44 enum urcu_bp_state {
     45 	URCU_BP_READER_ACTIVE_CURRENT,
     46 	URCU_BP_READER_ACTIVE_OLD,
     47 	URCU_BP_READER_INACTIVE,
     48 };
     49 
     50 /*
     51  * The trick here is that URCU_BP_GP_CTR_PHASE must be a multiple of 8 so we can use a
     52  * full 8-bits, 16-bits or 32-bits bitmask for the lower order bits.
     53  */
     54 #define URCU_BP_GP_COUNT		(1UL << 0)
     55 /* Use the amount of bits equal to half of the architecture long size */
     56 #define URCU_BP_GP_CTR_PHASE		(1UL << (sizeof(long) << 2))
     57 #define URCU_BP_GP_CTR_NEST_MASK	(URCU_BP_GP_CTR_PHASE - 1)
     58 
     59 /*
     60  * Used internally by _urcu_bp_read_lock.
     61  */
     62 extern void urcu_bp_register(void);
     63 
     64 struct urcu_bp_gp {
     65 	/*
     66 	 * Global grace period counter.
     67 	 * Contains the current URCU_BP_GP_CTR_PHASE.
     68 	 * Also has a URCU_BP_GP_COUNT of 1, to accelerate the reader fast path.
     69 	 * Written to only by writer with mutex taken.
     70 	 * Read by both writer and readers.
     71 	 */
     72 	unsigned long ctr;
     73 } __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
     74 
     75 extern struct urcu_bp_gp urcu_bp_gp;
     76 
     77 struct urcu_bp_reader {
     78 	/* Data used by both reader and urcu_bp_synchronize_rcu() */
     79 	unsigned long ctr;
     80 	/* Data used for registry */
     81 	struct cds_list_head node __attribute__((aligned(CAA_CACHE_LINE_SIZE)));
     82 	pthread_t tid;
     83 	int alloc;	/* registry entry allocated */
     84 };
     85 
     86 /*
     87  * Bulletproof version keeps a pointer to a registry not part of the TLS.
     88  * Adds a pointer dereference on the read-side, but won't require to unregister
     89  * the reader thread.
     90  */
     91 extern DECLARE_URCU_TLS(struct urcu_bp_reader *, urcu_bp_reader);
     92 
     93 #ifdef CONFIG_RCU_FORCE_SYS_MEMBARRIER
     94 #define urcu_bp_has_sys_membarrier	1
     95 #else
     96 extern int urcu_bp_has_sys_membarrier;
     97 #endif
     98 
     99 static inline void urcu_bp_smp_mb_slave(void)
    100 {
    101 	if (caa_likely(urcu_bp_has_sys_membarrier))
    102 		cmm_barrier();
    103 	else
    104 		cmm_smp_mb();
    105 }
    106 
    107 static inline enum urcu_bp_state urcu_bp_reader_state(unsigned long *ctr,
    108 						cmm_annotate_t *group)
    109 {
    110 	unsigned long v;
    111 
    112 	if (ctr == NULL)
    113 		return URCU_BP_READER_INACTIVE;
    114 	/*
    115 	 * Make sure both tests below are done on the same version of *value
    116 	 * to insure consistency.
    117 	 */
    118 	v = uatomic_load(ctr, CMM_RELAXED);
    119 	cmm_annotate_group_mem_acquire(group, ctr);
    120 
    121 	if (!(v & URCU_BP_GP_CTR_NEST_MASK))
    122 		return URCU_BP_READER_INACTIVE;
    123 	if (!((v ^ urcu_bp_gp.ctr) & URCU_BP_GP_CTR_PHASE))
    124 		return URCU_BP_READER_ACTIVE_CURRENT;
    125 	return URCU_BP_READER_ACTIVE_OLD;
    126 }
    127 
    128 /*
    129  * Helper for _urcu_bp_read_lock().  The format of urcu_bp_gp.ctr (as well as
    130  * the per-thread rcu_reader.ctr) has the lower-order bits containing a count of
    131  * _urcu_bp_read_lock() nesting, and a single high-order URCU_BP_GP_CTR_PHASE bit
    132  * that contains either zero or one.  The smp_mb_slave() ensures that the accesses in
    133  * _urcu_bp_read_lock() happen before the subsequent read-side critical section.
    134  */
    135 static inline void _urcu_bp_read_lock_update(unsigned long tmp)
    136 {
    137 	if (caa_likely(!(tmp & URCU_BP_GP_CTR_NEST_MASK))) {
    138 		_CMM_STORE_SHARED(URCU_TLS(urcu_bp_reader)->ctr, _CMM_LOAD_SHARED(urcu_bp_gp.ctr));
    139 		urcu_bp_smp_mb_slave();
    140 	} else
    141 		_CMM_STORE_SHARED(URCU_TLS(urcu_bp_reader)->ctr, tmp + URCU_BP_GP_COUNT);
    142 }
    143 
    144 /*
    145  * Enter an RCU read-side critical section.
    146  *
    147  * The first cmm_barrier() call ensures that the compiler does not reorder
    148  * the body of _urcu_bp_read_lock() with a mutex.
    149  *
    150  * This function and its helper are both less than 10 lines long.  The
    151  * intent is that this function meets the 10-line criterion in LGPL,
    152  * allowing this function to be invoked directly from non-LGPL code.
    153  */
    154 static inline void _urcu_bp_read_lock(void)
    155 {
    156 	unsigned long tmp;
    157 
    158 	if (caa_unlikely(!URCU_TLS(urcu_bp_reader)))
    159 		urcu_bp_register(); /* If not yet registered. */
    160 	cmm_barrier();	/* Ensure the compiler does not reorder us with mutex */
    161 	tmp = URCU_TLS(urcu_bp_reader)->ctr;
    162 	urcu_assert_debug((tmp & URCU_BP_GP_CTR_NEST_MASK) != URCU_BP_GP_CTR_NEST_MASK);
    163 	_urcu_bp_read_lock_update(tmp);
    164 }
    165 
    166 /*
    167  * Exit an RCU read-side critical section.  This function is less than
    168  * 10 lines of code, and is intended to be usable by non-LGPL code, as
    169  * called out in LGPL.
    170  */
    171 static inline void _urcu_bp_read_unlock(void)
    172 {
    173 	unsigned long tmp;
    174 	unsigned long *ctr = &URCU_TLS(urcu_bp_reader)->ctr;
    175 
    176 	tmp = URCU_TLS(urcu_bp_reader)->ctr;
    177 	urcu_assert_debug(tmp & URCU_BP_GP_CTR_NEST_MASK);
    178 	/* Finish using rcu before decrementing the pointer. */
    179 	urcu_bp_smp_mb_slave();
    180 	cmm_annotate_mem_release(ctr);
    181 	uatomic_store(ctr, tmp - URCU_BP_GP_COUNT, CMM_RELAXED);
    182 	cmm_barrier();	/* Ensure the compiler does not reorder us with mutex */
    183 }
    184 
    185 /*
    186  * Returns whether within a RCU read-side critical section.
    187  *
    188  * This function is less than 10 lines long.  The intent is that this
    189  * function meets the 10-line criterion for LGPL, allowing this function
    190  * to be invoked directly from non-LGPL code.
    191  */
    192 static inline int _urcu_bp_read_ongoing(void)
    193 {
    194 	if (caa_unlikely(!URCU_TLS(urcu_bp_reader)))
    195 		urcu_bp_register(); /* If not yet registered. */
    196 	return URCU_TLS(urcu_bp_reader)->ctr & URCU_BP_GP_CTR_NEST_MASK;
    197 }
    198 
    199 #ifdef __cplusplus
    200 }
    201 #endif
    202 
    203 #endif /* _URCU_BP_STATIC_H */
    204