Home | History | Annotate | Line # | Download | only in uatomic
x86.h revision 1.1.1.1
      1 // SPDX-FileCopyrightText: 1991-1994 by Xerox Corporation.  All rights reserved.
      2 // SPDX-FileCopyrightText: 1996-1999 by Silicon Graphics.  All rights reserved.
      3 // SPDX-FileCopyrightText: 1999-2004 Hewlett-Packard Development Company, L.P.
      4 // SPDX-FileCopyrightText: 2009 Mathieu Desnoyers <mathieu.desnoyers (at) efficios.com>
      5 //
      6 // SPDX-License-Identifier: LicenseRef-Boehm-GC
      7 
      8 #ifndef _URCU_ARCH_UATOMIC_X86_H
      9 #define _URCU_ARCH_UATOMIC_X86_H
     10 
     11 #include <stdlib.h>		/* For abort(3). */
     12 
     13 /*
     14  * Code inspired from libuatomic_ops-1.2, inherited in part from the
     15  * Boehm-Demers-Weiser conservative garbage collector.
     16  */
     17 
     18 #include <urcu/arch.h>
     19 #include <urcu/config.h>
     20 #include <urcu/compiler.h>
     21 #include <urcu/system.h>
     22 
     23 #define UATOMIC_HAS_ATOMIC_BYTE
     24 #define UATOMIC_HAS_ATOMIC_SHORT
     25 
     26 #ifdef __cplusplus
     27 extern "C" {
     28 #endif
     29 
     30 /*
     31  * Derived from AO_compare_and_swap() and AO_test_and_set_full().
     32  */
     33 
     34 /*
     35  * The __hp() macro casts the void pointer @x to a pointer to a structure
     36  * containing an array of char of the specified size. This allows passing the
     37  * @addr arguments of the following inline functions as "m" and "+m" operands
     38  * to the assembly. The @size parameter should be a constant to support
     39  * compilers such as clang which do not support VLA. Create typedefs because
     40  * C++ does not allow types be defined in casts.
     41  */
     42 
     43 typedef struct { char v[1]; } __hp_1;
     44 typedef struct { char v[2]; } __hp_2;
     45 typedef struct { char v[4]; } __hp_4;
     46 typedef struct { char v[8]; } __hp_8;
     47 
     48 #define __hp(size, x)	((__hp_##size *)(x))
     49 
     50 /* cmpxchg */
     51 
     52 static inline __attribute__((always_inline))
     53 unsigned long __uatomic_cmpxchg(void *addr, unsigned long old,
     54 			      unsigned long _new, int len)
     55 {
     56 	switch (len) {
     57 	case 1:
     58 	{
     59 		unsigned char result = old;
     60 
     61 		__asm__ __volatile__(
     62 		"lock; cmpxchgb %2, %1"
     63 			: "+a"(result), "+m"(*__hp(1, addr))
     64 			: "q"((unsigned char)_new)
     65 			: "memory");
     66 		return result;
     67 	}
     68 	case 2:
     69 	{
     70 		unsigned short result = old;
     71 
     72 		__asm__ __volatile__(
     73 		"lock; cmpxchgw %2, %1"
     74 			: "+a"(result), "+m"(*__hp(2, addr))
     75 			: "r"((unsigned short)_new)
     76 			: "memory");
     77 		return result;
     78 	}
     79 	case 4:
     80 	{
     81 		unsigned int result = old;
     82 
     83 		__asm__ __volatile__(
     84 		"lock; cmpxchgl %2, %1"
     85 			: "+a"(result), "+m"(*__hp(4, addr))
     86 			: "r"((unsigned int)_new)
     87 			: "memory");
     88 		return result;
     89 	}
     90 #if (CAA_BITS_PER_LONG == 64)
     91 	case 8:
     92 	{
     93 		unsigned long result = old;
     94 
     95 		__asm__ __volatile__(
     96 		"lock; cmpxchgq %2, %1"
     97 			: "+a"(result), "+m"(*__hp(8, addr))
     98 			: "r"((unsigned long)_new)
     99 			: "memory");
    100 		return result;
    101 	}
    102 #endif
    103 	}
    104 	/*
    105 	 * generate an illegal instruction. Cannot catch this with
    106 	 * linker tricks when optimizations are disabled.
    107 	 */
    108 	__asm__ __volatile__("ud2");
    109 	return 0;
    110 }
    111 
    112 #define _uatomic_cmpxchg(addr, old, _new)				      \
    113 	((__typeof__(*(addr))) __uatomic_cmpxchg((addr),		      \
    114 						caa_cast_long_keep_sign(old), \
    115 						caa_cast_long_keep_sign(_new),\
    116 						sizeof(*(addr))))
    117 
    118 /* xchg */
    119 
    120 static inline __attribute__((always_inline))
    121 unsigned long __uatomic_exchange(void *addr, unsigned long val, int len)
    122 {
    123 	/* Note: the "xchg" instruction does not need a "lock" prefix. */
    124 	switch (len) {
    125 	case 1:
    126 	{
    127 		unsigned char result;
    128 		__asm__ __volatile__(
    129 		"xchgb %0, %1"
    130 			: "=q"(result), "+m"(*__hp(1, addr))
    131 			: "0" ((unsigned char)val)
    132 			: "memory");
    133 		return result;
    134 	}
    135 	case 2:
    136 	{
    137 		unsigned short result;
    138 		__asm__ __volatile__(
    139 		"xchgw %0, %1"
    140 			: "=r"(result), "+m"(*__hp(2, addr))
    141 			: "0" ((unsigned short)val)
    142 			: "memory");
    143 		return result;
    144 	}
    145 	case 4:
    146 	{
    147 		unsigned int result;
    148 		__asm__ __volatile__(
    149 		"xchgl %0, %1"
    150 			: "=r"(result), "+m"(*__hp(4, addr))
    151 			: "0" ((unsigned int)val)
    152 			: "memory");
    153 		return result;
    154 	}
    155 #if (CAA_BITS_PER_LONG == 64)
    156 	case 8:
    157 	{
    158 		unsigned long result;
    159 		__asm__ __volatile__(
    160 		"xchgq %0, %1"
    161 			: "=r"(result), "+m"(*__hp(8, addr))
    162 			: "0" ((unsigned long)val)
    163 			: "memory");
    164 		return result;
    165 	}
    166 #endif
    167 	}
    168 	/*
    169 	 * generate an illegal instruction. Cannot catch this with
    170 	 * linker tricks when optimizations are disabled.
    171 	 */
    172 	__asm__ __volatile__("ud2");
    173 	return 0;
    174 }
    175 
    176 #define _uatomic_xchg(addr, v)						      \
    177 	((__typeof__(*(addr))) __uatomic_exchange((addr),		      \
    178 						caa_cast_long_keep_sign(v),   \
    179 						sizeof(*(addr))))
    180 
    181 /* uatomic_add_return */
    182 
    183 static inline __attribute__((always_inline))
    184 unsigned long __uatomic_add_return(void *addr, unsigned long val,
    185 				 int len)
    186 {
    187 	switch (len) {
    188 	case 1:
    189 	{
    190 		unsigned char result = val;
    191 
    192 		__asm__ __volatile__(
    193 		"lock; xaddb %1, %0"
    194 			: "+m"(*__hp(1, addr)), "+q" (result)
    195 			:
    196 			: "memory");
    197 		return result + (unsigned char)val;
    198 	}
    199 	case 2:
    200 	{
    201 		unsigned short result = val;
    202 
    203 		__asm__ __volatile__(
    204 		"lock; xaddw %1, %0"
    205 			: "+m"(*__hp(2, addr)), "+r" (result)
    206 			:
    207 			: "memory");
    208 		return result + (unsigned short)val;
    209 	}
    210 	case 4:
    211 	{
    212 		unsigned int result = val;
    213 
    214 		__asm__ __volatile__(
    215 		"lock; xaddl %1, %0"
    216 			: "+m"(*__hp(4, addr)), "+r" (result)
    217 			:
    218 			: "memory");
    219 		return result + (unsigned int)val;
    220 	}
    221 #if (CAA_BITS_PER_LONG == 64)
    222 	case 8:
    223 	{
    224 		unsigned long result = val;
    225 
    226 		__asm__ __volatile__(
    227 		"lock; xaddq %1, %0"
    228 			: "+m"(*__hp(8, addr)), "+r" (result)
    229 			:
    230 			: "memory");
    231 		return result + (unsigned long)val;
    232 	}
    233 #endif
    234 	}
    235 	/*
    236 	 * generate an illegal instruction. Cannot catch this with
    237 	 * linker tricks when optimizations are disabled.
    238 	 */
    239 	__asm__ __volatile__("ud2");
    240 	return 0;
    241 }
    242 
    243 #define _uatomic_add_return(addr, v)					    \
    244 	((__typeof__(*(addr))) __uatomic_add_return((addr),		    \
    245 						caa_cast_long_keep_sign(v), \
    246 						sizeof(*(addr))))
    247 
    248 /* uatomic_and */
    249 
    250 static inline __attribute__((always_inline))
    251 void __uatomic_and(void *addr, unsigned long val, int len)
    252 {
    253 	switch (len) {
    254 	case 1:
    255 	{
    256 		__asm__ __volatile__(
    257 		"lock; andb %1, %0"
    258 			: "=m"(*__hp(1, addr))
    259 			: "iq" ((unsigned char)val)
    260 			: "memory");
    261 		return;
    262 	}
    263 	case 2:
    264 	{
    265 		__asm__ __volatile__(
    266 		"lock; andw %1, %0"
    267 			: "=m"(*__hp(2, addr))
    268 			: "ir" ((unsigned short)val)
    269 			: "memory");
    270 		return;
    271 	}
    272 	case 4:
    273 	{
    274 		__asm__ __volatile__(
    275 		"lock; andl %1, %0"
    276 			: "=m"(*__hp(4, addr))
    277 			: "ir" ((unsigned int)val)
    278 			: "memory");
    279 		return;
    280 	}
    281 #if (CAA_BITS_PER_LONG == 64)
    282 	case 8:
    283 	{
    284 		__asm__ __volatile__(
    285 		"lock; andq %1, %0"
    286 			: "=m"(*__hp(8, addr))
    287 			: "er" ((unsigned long)val)
    288 			: "memory");
    289 		return;
    290 	}
    291 #endif
    292 	}
    293 	/*
    294 	 * generate an illegal instruction. Cannot catch this with
    295 	 * linker tricks when optimizations are disabled.
    296 	 */
    297 	__asm__ __volatile__("ud2");
    298 	return;
    299 }
    300 
    301 #define _uatomic_and(addr, v)						   \
    302 	(__uatomic_and((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
    303 
    304 /* uatomic_or */
    305 
    306 static inline __attribute__((always_inline))
    307 void __uatomic_or(void *addr, unsigned long val, int len)
    308 {
    309 	switch (len) {
    310 	case 1:
    311 	{
    312 		__asm__ __volatile__(
    313 		"lock; orb %1, %0"
    314 			: "=m"(*__hp(1, addr))
    315 			: "iq" ((unsigned char)val)
    316 			: "memory");
    317 		return;
    318 	}
    319 	case 2:
    320 	{
    321 		__asm__ __volatile__(
    322 		"lock; orw %1, %0"
    323 			: "=m"(*__hp(2, addr))
    324 			: "ir" ((unsigned short)val)
    325 			: "memory");
    326 		return;
    327 	}
    328 	case 4:
    329 	{
    330 		__asm__ __volatile__(
    331 		"lock; orl %1, %0"
    332 			: "=m"(*__hp(4, addr))
    333 			: "ir" ((unsigned int)val)
    334 			: "memory");
    335 		return;
    336 	}
    337 #if (CAA_BITS_PER_LONG == 64)
    338 	case 8:
    339 	{
    340 		__asm__ __volatile__(
    341 		"lock; orq %1, %0"
    342 			: "=m"(*__hp(8, addr))
    343 			: "er" ((unsigned long)val)
    344 			: "memory");
    345 		return;
    346 	}
    347 #endif
    348 	}
    349 	/*
    350 	 * generate an illegal instruction. Cannot catch this with
    351 	 * linker tricks when optimizations are disabled.
    352 	 */
    353 	__asm__ __volatile__("ud2");
    354 	return;
    355 }
    356 
    357 #define _uatomic_or(addr, v)						   \
    358 	(__uatomic_or((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
    359 
    360 /* uatomic_add */
    361 
    362 static inline __attribute__((always_inline))
    363 void __uatomic_add(void *addr, unsigned long val, int len)
    364 {
    365 	switch (len) {
    366 	case 1:
    367 	{
    368 		__asm__ __volatile__(
    369 		"lock; addb %1, %0"
    370 			: "=m"(*__hp(1, addr))
    371 			: "iq" ((unsigned char)val)
    372 			: "memory");
    373 		return;
    374 	}
    375 	case 2:
    376 	{
    377 		__asm__ __volatile__(
    378 		"lock; addw %1, %0"
    379 			: "=m"(*__hp(2, addr))
    380 			: "ir" ((unsigned short)val)
    381 			: "memory");
    382 		return;
    383 	}
    384 	case 4:
    385 	{
    386 		__asm__ __volatile__(
    387 		"lock; addl %1, %0"
    388 			: "=m"(*__hp(4, addr))
    389 			: "ir" ((unsigned int)val)
    390 			: "memory");
    391 		return;
    392 	}
    393 #if (CAA_BITS_PER_LONG == 64)
    394 	case 8:
    395 	{
    396 		__asm__ __volatile__(
    397 		"lock; addq %1, %0"
    398 			: "=m"(*__hp(8, addr))
    399 			: "er" ((unsigned long)val)
    400 			: "memory");
    401 		return;
    402 	}
    403 #endif
    404 	}
    405 	/*
    406 	 * generate an illegal instruction. Cannot catch this with
    407 	 * linker tricks when optimizations are disabled.
    408 	 */
    409 	__asm__ __volatile__("ud2");
    410 	return;
    411 }
    412 
    413 #define _uatomic_add(addr, v)						   \
    414 	(__uatomic_add((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
    415 
    416 
    417 /* uatomic_inc */
    418 
    419 static inline __attribute__((always_inline))
    420 void __uatomic_inc(void *addr, int len)
    421 {
    422 	switch (len) {
    423 	case 1:
    424 	{
    425 		__asm__ __volatile__(
    426 		"lock; incb %0"
    427 			: "=m"(*__hp(1, addr))
    428 			:
    429 			: "memory");
    430 		return;
    431 	}
    432 	case 2:
    433 	{
    434 		__asm__ __volatile__(
    435 		"lock; incw %0"
    436 			: "=m"(*__hp(2, addr))
    437 			:
    438 			: "memory");
    439 		return;
    440 	}
    441 	case 4:
    442 	{
    443 		__asm__ __volatile__(
    444 		"lock; incl %0"
    445 			: "=m"(*__hp(4, addr))
    446 			:
    447 			: "memory");
    448 		return;
    449 	}
    450 #if (CAA_BITS_PER_LONG == 64)
    451 	case 8:
    452 	{
    453 		__asm__ __volatile__(
    454 		"lock; incq %0"
    455 			: "=m"(*__hp(8, addr))
    456 			:
    457 			: "memory");
    458 		return;
    459 	}
    460 #endif
    461 	}
    462 	/* generate an illegal instruction. Cannot catch this with linker tricks
    463 	 * when optimizations are disabled. */
    464 	__asm__ __volatile__("ud2");
    465 	return;
    466 }
    467 
    468 #define _uatomic_inc(addr)	(__uatomic_inc((addr), sizeof(*(addr))))
    469 
    470 /* uatomic_dec */
    471 
    472 static inline __attribute__((always_inline))
    473 void __uatomic_dec(void *addr, int len)
    474 {
    475 	switch (len) {
    476 	case 1:
    477 	{
    478 		__asm__ __volatile__(
    479 		"lock; decb %0"
    480 			: "=m"(*__hp(1, addr))
    481 			:
    482 			: "memory");
    483 		return;
    484 	}
    485 	case 2:
    486 	{
    487 		__asm__ __volatile__(
    488 		"lock; decw %0"
    489 			: "=m"(*__hp(2, addr))
    490 			:
    491 			: "memory");
    492 		return;
    493 	}
    494 	case 4:
    495 	{
    496 		__asm__ __volatile__(
    497 		"lock; decl %0"
    498 			: "=m"(*__hp(4, addr))
    499 			:
    500 			: "memory");
    501 		return;
    502 	}
    503 #if (CAA_BITS_PER_LONG == 64)
    504 	case 8:
    505 	{
    506 		__asm__ __volatile__(
    507 		"lock; decq %0"
    508 			: "=m"(*__hp(8, addr))
    509 			:
    510 			: "memory");
    511 		return;
    512 	}
    513 #endif
    514 	}
    515 	/*
    516 	 * generate an illegal instruction. Cannot catch this with
    517 	 * linker tricks when optimizations are disabled.
    518 	 */
    519 	__asm__ __volatile__("ud2");
    520 	return;
    521 }
    522 
    523 #define _uatomic_dec(addr)	(__uatomic_dec((addr), sizeof(*(addr))))
    524 
    525 #ifdef URCU_ARCH_X86_NO_CAS
    526 
    527 /* For backwards compat */
    528 #define CONFIG_RCU_COMPAT_ARCH 1
    529 
    530 extern int __rcu_cas_avail;
    531 extern int __rcu_cas_init(void);
    532 
    533 #define UATOMIC_COMPAT(insn)							\
    534 	((caa_likely(__rcu_cas_avail > 0))						\
    535 	? (_uatomic_##insn)							\
    536 		: ((caa_unlikely(__rcu_cas_avail < 0)				\
    537 			? ((__rcu_cas_init() > 0)				\
    538 				? (_uatomic_##insn)				\
    539 				: (compat_uatomic_##insn))			\
    540 			: (compat_uatomic_##insn))))
    541 
    542 /*
    543  * We leave the return value so we don't break the ABI, but remove the
    544  * return value from the API.
    545  */
    546 extern unsigned long _compat_uatomic_set(void *addr,
    547 					 unsigned long _new, int len);
    548 #define compat_uatomic_set(addr, _new)				     	       \
    549 	((void) _compat_uatomic_set((addr),				       \
    550 				caa_cast_long_keep_sign(_new),		       \
    551 				sizeof(*(addr))))
    552 
    553 
    554 extern unsigned long _compat_uatomic_xchg(void *addr,
    555 					  unsigned long _new, int len);
    556 #define compat_uatomic_xchg(addr, _new)					       \
    557 	((__typeof__(*(addr))) _compat_uatomic_xchg((addr),		       \
    558 						caa_cast_long_keep_sign(_new), \
    559 						sizeof(*(addr))))
    560 
    561 extern unsigned long _compat_uatomic_cmpxchg(void *addr, unsigned long old,
    562 					     unsigned long _new, int len);
    563 #define compat_uatomic_cmpxchg(addr, old, _new)				       \
    564 	((__typeof__(*(addr))) _compat_uatomic_cmpxchg((addr),		       \
    565 						caa_cast_long_keep_sign(old),  \
    566 						caa_cast_long_keep_sign(_new), \
    567 						sizeof(*(addr))))
    568 
    569 extern void _compat_uatomic_and(void *addr, unsigned long _new, int len);
    570 #define compat_uatomic_and(addr, v)				       \
    571 	(_compat_uatomic_and((addr),				       \
    572 			caa_cast_long_keep_sign(v),		       \
    573 			sizeof(*(addr))))
    574 
    575 extern void _compat_uatomic_or(void *addr, unsigned long _new, int len);
    576 #define compat_uatomic_or(addr, v)				       \
    577 	(_compat_uatomic_or((addr),				       \
    578 			  caa_cast_long_keep_sign(v),		       \
    579 			  sizeof(*(addr))))
    580 
    581 extern unsigned long _compat_uatomic_add_return(void *addr,
    582 						unsigned long _new, int len);
    583 #define compat_uatomic_add_return(addr, v)			            \
    584 	((__typeof__(*(addr))) _compat_uatomic_add_return((addr),     	    \
    585 						caa_cast_long_keep_sign(v), \
    586 						sizeof(*(addr))))
    587 
    588 #define compat_uatomic_add(addr, v)					       \
    589 		((void)compat_uatomic_add_return((addr), (v)))
    590 #define compat_uatomic_inc(addr)					       \
    591 		(compat_uatomic_add((addr), 1))
    592 #define compat_uatomic_dec(addr)					       \
    593 		(compat_uatomic_add((addr), -1))
    594 
    595 #else
    596 #define UATOMIC_COMPAT(insn)	(_uatomic_##insn)
    597 #endif
    598 
    599 /*
    600  * All RMW operations have an implicit lock prefix.  Thus, ignoring memory
    601  * ordering for these operations, since they can all be respected by not
    602  * emitting any memory barrier.
    603  */
    604 
    605 #define uatomic_cmpxchg_mo(addr, old, _new, mos, mof)		\
    606 		UATOMIC_COMPAT(cmpxchg(addr, old, _new))
    607 
    608 #define uatomic_xchg_mo(addr, v, mo)		\
    609 		UATOMIC_COMPAT(xchg(addr, v))
    610 
    611 #define uatomic_and_mo(addr, v, mo)		\
    612 		UATOMIC_COMPAT(and(addr, v))
    613 #define cmm_smp_mb__before_uatomic_and()	cmm_barrier()
    614 #define cmm_smp_mb__after_uatomic_and()		cmm_barrier()
    615 
    616 #define uatomic_or_mo(addr, v, mo)		\
    617 		UATOMIC_COMPAT(or(addr, v))
    618 #define cmm_smp_mb__before_uatomic_or()		cmm_barrier()
    619 #define cmm_smp_mb__after_uatomic_or()		cmm_barrier()
    620 
    621 #define uatomic_add_return_mo(addr, v, mo)		\
    622 		UATOMIC_COMPAT(add_return(addr, v))
    623 
    624 #define uatomic_add_mo(addr, v, mo)	UATOMIC_COMPAT(add(addr, v))
    625 #define cmm_smp_mb__before_uatomic_add()	cmm_barrier()
    626 #define cmm_smp_mb__after_uatomic_add()		cmm_barrier()
    627 
    628 #define uatomic_inc_mo(addr, mo)	UATOMIC_COMPAT(inc(addr))
    629 #define cmm_smp_mb__before_uatomic_inc()	cmm_barrier()
    630 #define cmm_smp_mb__after_uatomic_inc()		cmm_barrier()
    631 
    632 #define uatomic_dec_mo(addr, mo)	UATOMIC_COMPAT(dec(addr))
    633 #define cmm_smp_mb__before_uatomic_dec()	cmm_barrier()
    634 #define cmm_smp_mb__after_uatomic_dec()		cmm_barrier()
    635 
    636 
    637 static inline void _cmm_compat_c11_smp_mb__before_uatomic_load_mo(enum cmm_memorder mo)
    638 {
    639 	/*
    640 	 * A SMP barrier is not necessary for CMM_SEQ_CST because, only a
    641 	 * previous store can be reordered with the load.  However, emitting the
    642 	 * memory barrier after the store is sufficient to prevent reordering
    643 	 * between the two.  This follows toolchains decision of emitting the
    644 	 * memory fence on the stores instead of the loads.
    645 	 *
    646 	 * A compiler barrier is necessary because the underlying operation does
    647 	 * not clobber the registers.
    648 	 */
    649 	switch (mo) {
    650 	case CMM_RELAXED:	/* Fall-through */
    651 	case CMM_ACQUIRE:	/* Fall-through */
    652 	case CMM_CONSUME:	/* Fall-through */
    653 	case CMM_SEQ_CST:	/* Fall-through */
    654 	case CMM_SEQ_CST_FENCE:
    655 		cmm_barrier();
    656 		break;
    657 	case CMM_ACQ_REL:	/* Fall-through */
    658 	case CMM_RELEASE:	/* Fall-through */
    659 	default:
    660 		abort();
    661 		break;
    662 	}
    663 }
    664 
    665 static inline void _cmm_compat_c11_smp_mb__after_uatomic_load_mo(enum cmm_memorder mo)
    666 {
    667 	/*
    668 	 * A SMP barrier is not necessary for CMM_SEQ_CST because following
    669 	 * loads and stores cannot be reordered with the load.
    670 	 *
    671 	 * A SMP barrier is however necessary for CMM_SEQ_CST_FENCE to respect
    672 	 * the memory model, since the underlying operation does not have a lock
    673 	 * prefix.
    674 	 *
    675 	 * A compiler barrier is necessary because the underlying operation does
    676 	 * not clobber the registers.
    677 	 */
    678 	switch (mo) {
    679 	case CMM_SEQ_CST_FENCE:
    680 		cmm_smp_mb();
    681 		break;
    682 	case CMM_RELAXED:	/* Fall-through */
    683 	case CMM_ACQUIRE:	/* Fall-through */
    684 	case CMM_CONSUME:	/* Fall-through */
    685 	case CMM_SEQ_CST:
    686 		cmm_barrier();
    687 		break;
    688 	case CMM_ACQ_REL:	/* Fall-through */
    689 	case CMM_RELEASE:	/* Fall-through */
    690 	default:
    691 		abort();
    692 		break;
    693 	}
    694 }
    695 
    696 static inline void _cmm_compat_c11_smp_mb__before_uatomic_store_mo(enum cmm_memorder mo)
    697 {
    698 	/*
    699 	 * A SMP barrier is not necessary for CMM_SEQ_CST because the store can
    700 	 * only be reodered with later loads
    701 	 *
    702 	 * A compiler barrier is necessary because the underlying operation does
    703 	 * not clobber the registers.
    704 	 */
    705 	switch (mo) {
    706 	case CMM_RELAXED:	/* Fall-through */
    707 	case CMM_RELEASE:	/* Fall-through */
    708 	case CMM_SEQ_CST:	/* Fall-through */
    709 	case CMM_SEQ_CST_FENCE:
    710 		cmm_barrier();
    711 		break;
    712 	case CMM_ACQ_REL:	/* Fall-through */
    713 	case CMM_ACQUIRE:	/* Fall-through */
    714 	case CMM_CONSUME:	/* Fall-through */
    715 	default:
    716 		abort();
    717 		break;
    718 	}
    719 }
    720 
    721 static inline void _cmm_compat_c11_smp_mb__after_uatomic_store_mo(enum cmm_memorder mo)
    722 {
    723 	/*
    724 	 * A SMP barrier is necessary for CMM_SEQ_CST because the store can be
    725 	 * reorded with later loads.  Since no memory barrier is being emitted
    726 	 * before loads, one has to be emitted after the store.  This follows
    727 	 * toolchains decision of emitting the memory fence on the stores instead
    728 	 * of the loads.
    729 	 *
    730 	 * A SMP barrier is necessary for CMM_SEQ_CST_FENCE to respect the
    731 	 * memory model, since the underlying store does not have a lock prefix.
    732 	 *
    733 	 * A compiler barrier is necessary because the underlying operation does
    734 	 * not clobber the registers.
    735 	 */
    736 	switch (mo) {
    737 	case CMM_SEQ_CST:	/* Fall-through */
    738 	case CMM_SEQ_CST_FENCE:
    739 		cmm_smp_mb();
    740 		break;
    741 	case CMM_RELAXED:	/* Fall-through */
    742 	case CMM_RELEASE:
    743 		cmm_barrier();
    744 		break;
    745 	case CMM_ACQ_REL:	/* Fall-through */
    746 	case CMM_ACQUIRE:	/* Fall-through */
    747 	case CMM_CONSUME:	/* Fall-through */
    748 	default:
    749 		abort();
    750 		break;
    751 	}
    752 }
    753 
    754 #define _cmm_compat_c11_smp_mb__before_mo(operation, mo)		\
    755 	do {								\
    756 		_cmm_compat_c11_smp_mb__before_ ## operation ## _mo (mo); \
    757 	} while (0)
    758 
    759 #define _cmm_compat_c11_smp_mb__after_mo(operation, mo)			\
    760 	do {								\
    761 		_cmm_compat_c11_smp_mb__after_ ## operation ## _mo (mo); \
    762 	} while (0)
    763 
    764 
    765 #ifdef __cplusplus
    766 }
    767 #endif
    768 
    769 #include <urcu/uatomic/generic.h>
    770 
    771 #endif /* _URCU_ARCH_UATOMIC_X86_H */
    772