Home | History | Annotate | Line # | Download | only in uatomic
      1  1.1  christos // SPDX-FileCopyrightText: 1991-1994 by Xerox Corporation.  All rights reserved.
      2  1.1  christos // SPDX-FileCopyrightText: 1996-1999 by Silicon Graphics.  All rights reserved.
      3  1.1  christos // SPDX-FileCopyrightText: 1999-2004 Hewlett-Packard Development Company, L.P.
      4  1.1  christos // SPDX-FileCopyrightText: 2009 Mathieu Desnoyers <mathieu.desnoyers (at) efficios.com>
      5  1.1  christos //
      6  1.1  christos // SPDX-License-Identifier: LicenseRef-Boehm-GC
      7  1.1  christos 
      8  1.1  christos #ifndef _URCU_ARCH_UATOMIC_X86_H
      9  1.1  christos #define _URCU_ARCH_UATOMIC_X86_H
     10  1.1  christos 
     11  1.1  christos #include <stdlib.h>		/* For abort(3). */
     12  1.1  christos 
     13  1.1  christos /*
     14  1.1  christos  * Code inspired from libuatomic_ops-1.2, inherited in part from the
     15  1.1  christos  * Boehm-Demers-Weiser conservative garbage collector.
     16  1.1  christos  */
     17  1.1  christos 
     18  1.1  christos #include <urcu/arch.h>
     19  1.1  christos #include <urcu/config.h>
     20  1.1  christos #include <urcu/compiler.h>
     21  1.1  christos #include <urcu/system.h>
     22  1.1  christos 
     23  1.1  christos #define UATOMIC_HAS_ATOMIC_BYTE
     24  1.1  christos #define UATOMIC_HAS_ATOMIC_SHORT
     25  1.1  christos 
     26  1.1  christos #ifdef __cplusplus
     27  1.1  christos extern "C" {
     28  1.1  christos #endif
     29  1.1  christos 
     30  1.1  christos /*
     31  1.1  christos  * Derived from AO_compare_and_swap() and AO_test_and_set_full().
     32  1.1  christos  */
     33  1.1  christos 
     34  1.1  christos /*
     35  1.1  christos  * The __hp() macro casts the void pointer @x to a pointer to a structure
     36  1.1  christos  * containing an array of char of the specified size. This allows passing the
     37  1.1  christos  * @addr arguments of the following inline functions as "m" and "+m" operands
     38  1.1  christos  * to the assembly. The @size parameter should be a constant to support
     39  1.1  christos  * compilers such as clang which do not support VLA. Create typedefs because
     40  1.1  christos  * C++ does not allow types be defined in casts.
     41  1.1  christos  */
     42  1.1  christos 
     43  1.1  christos typedef struct { char v[1]; } __hp_1;
     44  1.1  christos typedef struct { char v[2]; } __hp_2;
     45  1.1  christos typedef struct { char v[4]; } __hp_4;
     46  1.1  christos typedef struct { char v[8]; } __hp_8;
     47  1.1  christos 
     48  1.1  christos #define __hp(size, x)	((__hp_##size *)(x))
     49  1.1  christos 
     50  1.1  christos /* cmpxchg */
     51  1.1  christos 
     52  1.1  christos static inline __attribute__((always_inline))
     53  1.1  christos unsigned long __uatomic_cmpxchg(void *addr, unsigned long old,
     54  1.1  christos 			      unsigned long _new, int len)
     55  1.1  christos {
     56  1.1  christos 	switch (len) {
     57  1.1  christos 	case 1:
     58  1.1  christos 	{
     59  1.1  christos 		unsigned char result = old;
     60  1.1  christos 
     61  1.1  christos 		__asm__ __volatile__(
     62  1.1  christos 		"lock; cmpxchgb %2, %1"
     63  1.1  christos 			: "+a"(result), "+m"(*__hp(1, addr))
     64  1.1  christos 			: "q"((unsigned char)_new)
     65  1.1  christos 			: "memory");
     66  1.1  christos 		return result;
     67  1.1  christos 	}
     68  1.1  christos 	case 2:
     69  1.1  christos 	{
     70  1.1  christos 		unsigned short result = old;
     71  1.1  christos 
     72  1.1  christos 		__asm__ __volatile__(
     73  1.1  christos 		"lock; cmpxchgw %2, %1"
     74  1.1  christos 			: "+a"(result), "+m"(*__hp(2, addr))
     75  1.1  christos 			: "r"((unsigned short)_new)
     76  1.1  christos 			: "memory");
     77  1.1  christos 		return result;
     78  1.1  christos 	}
     79  1.1  christos 	case 4:
     80  1.1  christos 	{
     81  1.1  christos 		unsigned int result = old;
     82  1.1  christos 
     83  1.1  christos 		__asm__ __volatile__(
     84  1.1  christos 		"lock; cmpxchgl %2, %1"
     85  1.1  christos 			: "+a"(result), "+m"(*__hp(4, addr))
     86  1.1  christos 			: "r"((unsigned int)_new)
     87  1.1  christos 			: "memory");
     88  1.1  christos 		return result;
     89  1.1  christos 	}
     90  1.1  christos #if (CAA_BITS_PER_LONG == 64)
     91  1.1  christos 	case 8:
     92  1.1  christos 	{
     93  1.1  christos 		unsigned long result = old;
     94  1.1  christos 
     95  1.1  christos 		__asm__ __volatile__(
     96  1.1  christos 		"lock; cmpxchgq %2, %1"
     97  1.1  christos 			: "+a"(result), "+m"(*__hp(8, addr))
     98  1.1  christos 			: "r"((unsigned long)_new)
     99  1.1  christos 			: "memory");
    100  1.1  christos 		return result;
    101  1.1  christos 	}
    102  1.1  christos #endif
    103  1.1  christos 	}
    104  1.1  christos 	/*
    105  1.1  christos 	 * generate an illegal instruction. Cannot catch this with
    106  1.1  christos 	 * linker tricks when optimizations are disabled.
    107  1.1  christos 	 */
    108  1.1  christos 	__asm__ __volatile__("ud2");
    109  1.1  christos 	return 0;
    110  1.1  christos }
    111  1.1  christos 
    112  1.1  christos #define _uatomic_cmpxchg(addr, old, _new)				      \
    113  1.1  christos 	((__typeof__(*(addr))) __uatomic_cmpxchg((addr),		      \
    114  1.1  christos 						caa_cast_long_keep_sign(old), \
    115  1.1  christos 						caa_cast_long_keep_sign(_new),\
    116  1.1  christos 						sizeof(*(addr))))
    117  1.1  christos 
    118  1.1  christos /* xchg */
    119  1.1  christos 
    120  1.1  christos static inline __attribute__((always_inline))
    121  1.1  christos unsigned long __uatomic_exchange(void *addr, unsigned long val, int len)
    122  1.1  christos {
    123  1.1  christos 	/* Note: the "xchg" instruction does not need a "lock" prefix. */
    124  1.1  christos 	switch (len) {
    125  1.1  christos 	case 1:
    126  1.1  christos 	{
    127  1.1  christos 		unsigned char result;
    128  1.1  christos 		__asm__ __volatile__(
    129  1.1  christos 		"xchgb %0, %1"
    130  1.1  christos 			: "=q"(result), "+m"(*__hp(1, addr))
    131  1.1  christos 			: "0" ((unsigned char)val)
    132  1.1  christos 			: "memory");
    133  1.1  christos 		return result;
    134  1.1  christos 	}
    135  1.1  christos 	case 2:
    136  1.1  christos 	{
    137  1.1  christos 		unsigned short result;
    138  1.1  christos 		__asm__ __volatile__(
    139  1.1  christos 		"xchgw %0, %1"
    140  1.1  christos 			: "=r"(result), "+m"(*__hp(2, addr))
    141  1.1  christos 			: "0" ((unsigned short)val)
    142  1.1  christos 			: "memory");
    143  1.1  christos 		return result;
    144  1.1  christos 	}
    145  1.1  christos 	case 4:
    146  1.1  christos 	{
    147  1.1  christos 		unsigned int result;
    148  1.1  christos 		__asm__ __volatile__(
    149  1.1  christos 		"xchgl %0, %1"
    150  1.1  christos 			: "=r"(result), "+m"(*__hp(4, addr))
    151  1.1  christos 			: "0" ((unsigned int)val)
    152  1.1  christos 			: "memory");
    153  1.1  christos 		return result;
    154  1.1  christos 	}
    155  1.1  christos #if (CAA_BITS_PER_LONG == 64)
    156  1.1  christos 	case 8:
    157  1.1  christos 	{
    158  1.1  christos 		unsigned long result;
    159  1.1  christos 		__asm__ __volatile__(
    160  1.1  christos 		"xchgq %0, %1"
    161  1.1  christos 			: "=r"(result), "+m"(*__hp(8, addr))
    162  1.1  christos 			: "0" ((unsigned long)val)
    163  1.1  christos 			: "memory");
    164  1.1  christos 		return result;
    165  1.1  christos 	}
    166  1.1  christos #endif
    167  1.1  christos 	}
    168  1.1  christos 	/*
    169  1.1  christos 	 * generate an illegal instruction. Cannot catch this with
    170  1.1  christos 	 * linker tricks when optimizations are disabled.
    171  1.1  christos 	 */
    172  1.1  christos 	__asm__ __volatile__("ud2");
    173  1.1  christos 	return 0;
    174  1.1  christos }
    175  1.1  christos 
    176  1.1  christos #define _uatomic_xchg(addr, v)						      \
    177  1.1  christos 	((__typeof__(*(addr))) __uatomic_exchange((addr),		      \
    178  1.1  christos 						caa_cast_long_keep_sign(v),   \
    179  1.1  christos 						sizeof(*(addr))))
    180  1.1  christos 
    181  1.1  christos /* uatomic_add_return */
    182  1.1  christos 
    183  1.1  christos static inline __attribute__((always_inline))
    184  1.1  christos unsigned long __uatomic_add_return(void *addr, unsigned long val,
    185  1.1  christos 				 int len)
    186  1.1  christos {
    187  1.1  christos 	switch (len) {
    188  1.1  christos 	case 1:
    189  1.1  christos 	{
    190  1.1  christos 		unsigned char result = val;
    191  1.1  christos 
    192  1.1  christos 		__asm__ __volatile__(
    193  1.1  christos 		"lock; xaddb %1, %0"
    194  1.1  christos 			: "+m"(*__hp(1, addr)), "+q" (result)
    195  1.1  christos 			:
    196  1.1  christos 			: "memory");
    197  1.1  christos 		return result + (unsigned char)val;
    198  1.1  christos 	}
    199  1.1  christos 	case 2:
    200  1.1  christos 	{
    201  1.1  christos 		unsigned short result = val;
    202  1.1  christos 
    203  1.1  christos 		__asm__ __volatile__(
    204  1.1  christos 		"lock; xaddw %1, %0"
    205  1.1  christos 			: "+m"(*__hp(2, addr)), "+r" (result)
    206  1.1  christos 			:
    207  1.1  christos 			: "memory");
    208  1.1  christos 		return result + (unsigned short)val;
    209  1.1  christos 	}
    210  1.1  christos 	case 4:
    211  1.1  christos 	{
    212  1.1  christos 		unsigned int result = val;
    213  1.1  christos 
    214  1.1  christos 		__asm__ __volatile__(
    215  1.1  christos 		"lock; xaddl %1, %0"
    216  1.1  christos 			: "+m"(*__hp(4, addr)), "+r" (result)
    217  1.1  christos 			:
    218  1.1  christos 			: "memory");
    219  1.1  christos 		return result + (unsigned int)val;
    220  1.1  christos 	}
    221  1.1  christos #if (CAA_BITS_PER_LONG == 64)
    222  1.1  christos 	case 8:
    223  1.1  christos 	{
    224  1.1  christos 		unsigned long result = val;
    225  1.1  christos 
    226  1.1  christos 		__asm__ __volatile__(
    227  1.1  christos 		"lock; xaddq %1, %0"
    228  1.1  christos 			: "+m"(*__hp(8, addr)), "+r" (result)
    229  1.1  christos 			:
    230  1.1  christos 			: "memory");
    231  1.1  christos 		return result + (unsigned long)val;
    232  1.1  christos 	}
    233  1.1  christos #endif
    234  1.1  christos 	}
    235  1.1  christos 	/*
    236  1.1  christos 	 * generate an illegal instruction. Cannot catch this with
    237  1.1  christos 	 * linker tricks when optimizations are disabled.
    238  1.1  christos 	 */
    239  1.1  christos 	__asm__ __volatile__("ud2");
    240  1.1  christos 	return 0;
    241  1.1  christos }
    242  1.1  christos 
    243  1.1  christos #define _uatomic_add_return(addr, v)					    \
    244  1.1  christos 	((__typeof__(*(addr))) __uatomic_add_return((addr),		    \
    245  1.1  christos 						caa_cast_long_keep_sign(v), \
    246  1.1  christos 						sizeof(*(addr))))
    247  1.1  christos 
    248  1.1  christos /* uatomic_and */
    249  1.1  christos 
    250  1.1  christos static inline __attribute__((always_inline))
    251  1.1  christos void __uatomic_and(void *addr, unsigned long val, int len)
    252  1.1  christos {
    253  1.1  christos 	switch (len) {
    254  1.1  christos 	case 1:
    255  1.1  christos 	{
    256  1.1  christos 		__asm__ __volatile__(
    257  1.1  christos 		"lock; andb %1, %0"
    258  1.1  christos 			: "=m"(*__hp(1, addr))
    259  1.1  christos 			: "iq" ((unsigned char)val)
    260  1.1  christos 			: "memory");
    261  1.1  christos 		return;
    262  1.1  christos 	}
    263  1.1  christos 	case 2:
    264  1.1  christos 	{
    265  1.1  christos 		__asm__ __volatile__(
    266  1.1  christos 		"lock; andw %1, %0"
    267  1.1  christos 			: "=m"(*__hp(2, addr))
    268  1.1  christos 			: "ir" ((unsigned short)val)
    269  1.1  christos 			: "memory");
    270  1.1  christos 		return;
    271  1.1  christos 	}
    272  1.1  christos 	case 4:
    273  1.1  christos 	{
    274  1.1  christos 		__asm__ __volatile__(
    275  1.1  christos 		"lock; andl %1, %0"
    276  1.1  christos 			: "=m"(*__hp(4, addr))
    277  1.1  christos 			: "ir" ((unsigned int)val)
    278  1.1  christos 			: "memory");
    279  1.1  christos 		return;
    280  1.1  christos 	}
    281  1.1  christos #if (CAA_BITS_PER_LONG == 64)
    282  1.1  christos 	case 8:
    283  1.1  christos 	{
    284  1.1  christos 		__asm__ __volatile__(
    285  1.1  christos 		"lock; andq %1, %0"
    286  1.1  christos 			: "=m"(*__hp(8, addr))
    287  1.1  christos 			: "er" ((unsigned long)val)
    288  1.1  christos 			: "memory");
    289  1.1  christos 		return;
    290  1.1  christos 	}
    291  1.1  christos #endif
    292  1.1  christos 	}
    293  1.1  christos 	/*
    294  1.1  christos 	 * generate an illegal instruction. Cannot catch this with
    295  1.1  christos 	 * linker tricks when optimizations are disabled.
    296  1.1  christos 	 */
    297  1.1  christos 	__asm__ __volatile__("ud2");
    298  1.1  christos 	return;
    299  1.1  christos }
    300  1.1  christos 
    301  1.1  christos #define _uatomic_and(addr, v)						   \
    302  1.1  christos 	(__uatomic_and((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
    303  1.1  christos 
    304  1.1  christos /* uatomic_or */
    305  1.1  christos 
    306  1.1  christos static inline __attribute__((always_inline))
    307  1.1  christos void __uatomic_or(void *addr, unsigned long val, int len)
    308  1.1  christos {
    309  1.1  christos 	switch (len) {
    310  1.1  christos 	case 1:
    311  1.1  christos 	{
    312  1.1  christos 		__asm__ __volatile__(
    313  1.1  christos 		"lock; orb %1, %0"
    314  1.1  christos 			: "=m"(*__hp(1, addr))
    315  1.1  christos 			: "iq" ((unsigned char)val)
    316  1.1  christos 			: "memory");
    317  1.1  christos 		return;
    318  1.1  christos 	}
    319  1.1  christos 	case 2:
    320  1.1  christos 	{
    321  1.1  christos 		__asm__ __volatile__(
    322  1.1  christos 		"lock; orw %1, %0"
    323  1.1  christos 			: "=m"(*__hp(2, addr))
    324  1.1  christos 			: "ir" ((unsigned short)val)
    325  1.1  christos 			: "memory");
    326  1.1  christos 		return;
    327  1.1  christos 	}
    328  1.1  christos 	case 4:
    329  1.1  christos 	{
    330  1.1  christos 		__asm__ __volatile__(
    331  1.1  christos 		"lock; orl %1, %0"
    332  1.1  christos 			: "=m"(*__hp(4, addr))
    333  1.1  christos 			: "ir" ((unsigned int)val)
    334  1.1  christos 			: "memory");
    335  1.1  christos 		return;
    336  1.1  christos 	}
    337  1.1  christos #if (CAA_BITS_PER_LONG == 64)
    338  1.1  christos 	case 8:
    339  1.1  christos 	{
    340  1.1  christos 		__asm__ __volatile__(
    341  1.1  christos 		"lock; orq %1, %0"
    342  1.1  christos 			: "=m"(*__hp(8, addr))
    343  1.1  christos 			: "er" ((unsigned long)val)
    344  1.1  christos 			: "memory");
    345  1.1  christos 		return;
    346  1.1  christos 	}
    347  1.1  christos #endif
    348  1.1  christos 	}
    349  1.1  christos 	/*
    350  1.1  christos 	 * generate an illegal instruction. Cannot catch this with
    351  1.1  christos 	 * linker tricks when optimizations are disabled.
    352  1.1  christos 	 */
    353  1.1  christos 	__asm__ __volatile__("ud2");
    354  1.1  christos 	return;
    355  1.1  christos }
    356  1.1  christos 
    357  1.1  christos #define _uatomic_or(addr, v)						   \
    358  1.1  christos 	(__uatomic_or((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
    359  1.1  christos 
    360  1.1  christos /* uatomic_add */
    361  1.1  christos 
    362  1.1  christos static inline __attribute__((always_inline))
    363  1.1  christos void __uatomic_add(void *addr, unsigned long val, int len)
    364  1.1  christos {
    365  1.1  christos 	switch (len) {
    366  1.1  christos 	case 1:
    367  1.1  christos 	{
    368  1.1  christos 		__asm__ __volatile__(
    369  1.1  christos 		"lock; addb %1, %0"
    370  1.1  christos 			: "=m"(*__hp(1, addr))
    371  1.1  christos 			: "iq" ((unsigned char)val)
    372  1.1  christos 			: "memory");
    373  1.1  christos 		return;
    374  1.1  christos 	}
    375  1.1  christos 	case 2:
    376  1.1  christos 	{
    377  1.1  christos 		__asm__ __volatile__(
    378  1.1  christos 		"lock; addw %1, %0"
    379  1.1  christos 			: "=m"(*__hp(2, addr))
    380  1.1  christos 			: "ir" ((unsigned short)val)
    381  1.1  christos 			: "memory");
    382  1.1  christos 		return;
    383  1.1  christos 	}
    384  1.1  christos 	case 4:
    385  1.1  christos 	{
    386  1.1  christos 		__asm__ __volatile__(
    387  1.1  christos 		"lock; addl %1, %0"
    388  1.1  christos 			: "=m"(*__hp(4, addr))
    389  1.1  christos 			: "ir" ((unsigned int)val)
    390  1.1  christos 			: "memory");
    391  1.1  christos 		return;
    392  1.1  christos 	}
    393  1.1  christos #if (CAA_BITS_PER_LONG == 64)
    394  1.1  christos 	case 8:
    395  1.1  christos 	{
    396  1.1  christos 		__asm__ __volatile__(
    397  1.1  christos 		"lock; addq %1, %0"
    398  1.1  christos 			: "=m"(*__hp(8, addr))
    399  1.1  christos 			: "er" ((unsigned long)val)
    400  1.1  christos 			: "memory");
    401  1.1  christos 		return;
    402  1.1  christos 	}
    403  1.1  christos #endif
    404  1.1  christos 	}
    405  1.1  christos 	/*
    406  1.1  christos 	 * generate an illegal instruction. Cannot catch this with
    407  1.1  christos 	 * linker tricks when optimizations are disabled.
    408  1.1  christos 	 */
    409  1.1  christos 	__asm__ __volatile__("ud2");
    410  1.1  christos 	return;
    411  1.1  christos }
    412  1.1  christos 
    413  1.1  christos #define _uatomic_add(addr, v)						   \
    414  1.1  christos 	(__uatomic_add((addr), caa_cast_long_keep_sign(v), sizeof(*(addr))))
    415  1.1  christos 
    416  1.1  christos 
    417  1.1  christos /* uatomic_inc */
    418  1.1  christos 
    419  1.1  christos static inline __attribute__((always_inline))
    420  1.1  christos void __uatomic_inc(void *addr, int len)
    421  1.1  christos {
    422  1.1  christos 	switch (len) {
    423  1.1  christos 	case 1:
    424  1.1  christos 	{
    425  1.1  christos 		__asm__ __volatile__(
    426  1.1  christos 		"lock; incb %0"
    427  1.1  christos 			: "=m"(*__hp(1, addr))
    428  1.1  christos 			:
    429  1.1  christos 			: "memory");
    430  1.1  christos 		return;
    431  1.1  christos 	}
    432  1.1  christos 	case 2:
    433  1.1  christos 	{
    434  1.1  christos 		__asm__ __volatile__(
    435  1.1  christos 		"lock; incw %0"
    436  1.1  christos 			: "=m"(*__hp(2, addr))
    437  1.1  christos 			:
    438  1.1  christos 			: "memory");
    439  1.1  christos 		return;
    440  1.1  christos 	}
    441  1.1  christos 	case 4:
    442  1.1  christos 	{
    443  1.1  christos 		__asm__ __volatile__(
    444  1.1  christos 		"lock; incl %0"
    445  1.1  christos 			: "=m"(*__hp(4, addr))
    446  1.1  christos 			:
    447  1.1  christos 			: "memory");
    448  1.1  christos 		return;
    449  1.1  christos 	}
    450  1.1  christos #if (CAA_BITS_PER_LONG == 64)
    451  1.1  christos 	case 8:
    452  1.1  christos 	{
    453  1.1  christos 		__asm__ __volatile__(
    454  1.1  christos 		"lock; incq %0"
    455  1.1  christos 			: "=m"(*__hp(8, addr))
    456  1.1  christos 			:
    457  1.1  christos 			: "memory");
    458  1.1  christos 		return;
    459  1.1  christos 	}
    460  1.1  christos #endif
    461  1.1  christos 	}
    462  1.1  christos 	/* generate an illegal instruction. Cannot catch this with linker tricks
    463  1.1  christos 	 * when optimizations are disabled. */
    464  1.1  christos 	__asm__ __volatile__("ud2");
    465  1.1  christos 	return;
    466  1.1  christos }
    467  1.1  christos 
    468  1.1  christos #define _uatomic_inc(addr)	(__uatomic_inc((addr), sizeof(*(addr))))
    469  1.1  christos 
    470  1.1  christos /* uatomic_dec */
    471  1.1  christos 
    472  1.1  christos static inline __attribute__((always_inline))
    473  1.1  christos void __uatomic_dec(void *addr, int len)
    474  1.1  christos {
    475  1.1  christos 	switch (len) {
    476  1.1  christos 	case 1:
    477  1.1  christos 	{
    478  1.1  christos 		__asm__ __volatile__(
    479  1.1  christos 		"lock; decb %0"
    480  1.1  christos 			: "=m"(*__hp(1, addr))
    481  1.1  christos 			:
    482  1.1  christos 			: "memory");
    483  1.1  christos 		return;
    484  1.1  christos 	}
    485  1.1  christos 	case 2:
    486  1.1  christos 	{
    487  1.1  christos 		__asm__ __volatile__(
    488  1.1  christos 		"lock; decw %0"
    489  1.1  christos 			: "=m"(*__hp(2, addr))
    490  1.1  christos 			:
    491  1.1  christos 			: "memory");
    492  1.1  christos 		return;
    493  1.1  christos 	}
    494  1.1  christos 	case 4:
    495  1.1  christos 	{
    496  1.1  christos 		__asm__ __volatile__(
    497  1.1  christos 		"lock; decl %0"
    498  1.1  christos 			: "=m"(*__hp(4, addr))
    499  1.1  christos 			:
    500  1.1  christos 			: "memory");
    501  1.1  christos 		return;
    502  1.1  christos 	}
    503  1.1  christos #if (CAA_BITS_PER_LONG == 64)
    504  1.1  christos 	case 8:
    505  1.1  christos 	{
    506  1.1  christos 		__asm__ __volatile__(
    507  1.1  christos 		"lock; decq %0"
    508  1.1  christos 			: "=m"(*__hp(8, addr))
    509  1.1  christos 			:
    510  1.1  christos 			: "memory");
    511  1.1  christos 		return;
    512  1.1  christos 	}
    513  1.1  christos #endif
    514  1.1  christos 	}
    515  1.1  christos 	/*
    516  1.1  christos 	 * generate an illegal instruction. Cannot catch this with
    517  1.1  christos 	 * linker tricks when optimizations are disabled.
    518  1.1  christos 	 */
    519  1.1  christos 	__asm__ __volatile__("ud2");
    520  1.1  christos 	return;
    521  1.1  christos }
    522  1.1  christos 
    523  1.1  christos #define _uatomic_dec(addr)	(__uatomic_dec((addr), sizeof(*(addr))))
    524  1.1  christos 
    525  1.1  christos #ifdef URCU_ARCH_X86_NO_CAS
    526  1.1  christos 
    527  1.1  christos /* For backwards compat */
    528  1.1  christos #define CONFIG_RCU_COMPAT_ARCH 1
    529  1.1  christos 
    530  1.1  christos extern int __rcu_cas_avail;
    531  1.1  christos extern int __rcu_cas_init(void);
    532  1.1  christos 
    533  1.1  christos #define UATOMIC_COMPAT(insn)							\
    534  1.1  christos 	((caa_likely(__rcu_cas_avail > 0))						\
    535  1.1  christos 	? (_uatomic_##insn)							\
    536  1.1  christos 		: ((caa_unlikely(__rcu_cas_avail < 0)				\
    537  1.1  christos 			? ((__rcu_cas_init() > 0)				\
    538  1.1  christos 				? (_uatomic_##insn)				\
    539  1.1  christos 				: (compat_uatomic_##insn))			\
    540  1.1  christos 			: (compat_uatomic_##insn))))
    541  1.1  christos 
    542  1.1  christos /*
    543  1.1  christos  * We leave the return value so we don't break the ABI, but remove the
    544  1.1  christos  * return value from the API.
    545  1.1  christos  */
    546  1.1  christos extern unsigned long _compat_uatomic_set(void *addr,
    547  1.1  christos 					 unsigned long _new, int len);
    548  1.1  christos #define compat_uatomic_set(addr, _new)				     	       \
    549  1.1  christos 	((void) _compat_uatomic_set((addr),				       \
    550  1.1  christos 				caa_cast_long_keep_sign(_new),		       \
    551  1.1  christos 				sizeof(*(addr))))
    552  1.1  christos 
    553  1.1  christos 
    554  1.1  christos extern unsigned long _compat_uatomic_xchg(void *addr,
    555  1.1  christos 					  unsigned long _new, int len);
    556  1.1  christos #define compat_uatomic_xchg(addr, _new)					       \
    557  1.1  christos 	((__typeof__(*(addr))) _compat_uatomic_xchg((addr),		       \
    558  1.1  christos 						caa_cast_long_keep_sign(_new), \
    559  1.1  christos 						sizeof(*(addr))))
    560  1.1  christos 
    561  1.1  christos extern unsigned long _compat_uatomic_cmpxchg(void *addr, unsigned long old,
    562  1.1  christos 					     unsigned long _new, int len);
    563  1.1  christos #define compat_uatomic_cmpxchg(addr, old, _new)				       \
    564  1.1  christos 	((__typeof__(*(addr))) _compat_uatomic_cmpxchg((addr),		       \
    565  1.1  christos 						caa_cast_long_keep_sign(old),  \
    566  1.1  christos 						caa_cast_long_keep_sign(_new), \
    567  1.1  christos 						sizeof(*(addr))))
    568  1.1  christos 
    569  1.1  christos extern void _compat_uatomic_and(void *addr, unsigned long _new, int len);
    570  1.1  christos #define compat_uatomic_and(addr, v)				       \
    571  1.1  christos 	(_compat_uatomic_and((addr),				       \
    572  1.1  christos 			caa_cast_long_keep_sign(v),		       \
    573  1.1  christos 			sizeof(*(addr))))
    574  1.1  christos 
    575  1.1  christos extern void _compat_uatomic_or(void *addr, unsigned long _new, int len);
    576  1.1  christos #define compat_uatomic_or(addr, v)				       \
    577  1.1  christos 	(_compat_uatomic_or((addr),				       \
    578  1.1  christos 			  caa_cast_long_keep_sign(v),		       \
    579  1.1  christos 			  sizeof(*(addr))))
    580  1.1  christos 
    581  1.1  christos extern unsigned long _compat_uatomic_add_return(void *addr,
    582  1.1  christos 						unsigned long _new, int len);
    583  1.1  christos #define compat_uatomic_add_return(addr, v)			            \
    584  1.1  christos 	((__typeof__(*(addr))) _compat_uatomic_add_return((addr),     	    \
    585  1.1  christos 						caa_cast_long_keep_sign(v), \
    586  1.1  christos 						sizeof(*(addr))))
    587  1.1  christos 
    588  1.1  christos #define compat_uatomic_add(addr, v)					       \
    589  1.1  christos 		((void)compat_uatomic_add_return((addr), (v)))
    590  1.1  christos #define compat_uatomic_inc(addr)					       \
    591  1.1  christos 		(compat_uatomic_add((addr), 1))
    592  1.1  christos #define compat_uatomic_dec(addr)					       \
    593  1.1  christos 		(compat_uatomic_add((addr), -1))
    594  1.1  christos 
    595  1.1  christos #else
    596  1.1  christos #define UATOMIC_COMPAT(insn)	(_uatomic_##insn)
    597  1.1  christos #endif
    598  1.1  christos 
    599  1.1  christos /*
    600  1.1  christos  * All RMW operations have an implicit lock prefix.  Thus, ignoring memory
    601  1.1  christos  * ordering for these operations, since they can all be respected by not
    602  1.1  christos  * emitting any memory barrier.
    603  1.1  christos  */
    604  1.1  christos 
    605  1.1  christos #define uatomic_cmpxchg_mo(addr, old, _new, mos, mof)		\
    606  1.1  christos 		UATOMIC_COMPAT(cmpxchg(addr, old, _new))
    607  1.1  christos 
    608  1.1  christos #define uatomic_xchg_mo(addr, v, mo)		\
    609  1.1  christos 		UATOMIC_COMPAT(xchg(addr, v))
    610  1.1  christos 
    611  1.1  christos #define uatomic_and_mo(addr, v, mo)		\
    612  1.1  christos 		UATOMIC_COMPAT(and(addr, v))
    613  1.1  christos #define cmm_smp_mb__before_uatomic_and()	cmm_barrier()
    614  1.1  christos #define cmm_smp_mb__after_uatomic_and()		cmm_barrier()
    615  1.1  christos 
    616  1.1  christos #define uatomic_or_mo(addr, v, mo)		\
    617  1.1  christos 		UATOMIC_COMPAT(or(addr, v))
    618  1.1  christos #define cmm_smp_mb__before_uatomic_or()		cmm_barrier()
    619  1.1  christos #define cmm_smp_mb__after_uatomic_or()		cmm_barrier()
    620  1.1  christos 
    621  1.1  christos #define uatomic_add_return_mo(addr, v, mo)		\
    622  1.1  christos 		UATOMIC_COMPAT(add_return(addr, v))
    623  1.1  christos 
    624  1.1  christos #define uatomic_add_mo(addr, v, mo)	UATOMIC_COMPAT(add(addr, v))
    625  1.1  christos #define cmm_smp_mb__before_uatomic_add()	cmm_barrier()
    626  1.1  christos #define cmm_smp_mb__after_uatomic_add()		cmm_barrier()
    627  1.1  christos 
    628  1.1  christos #define uatomic_inc_mo(addr, mo)	UATOMIC_COMPAT(inc(addr))
    629  1.1  christos #define cmm_smp_mb__before_uatomic_inc()	cmm_barrier()
    630  1.1  christos #define cmm_smp_mb__after_uatomic_inc()		cmm_barrier()
    631  1.1  christos 
    632  1.1  christos #define uatomic_dec_mo(addr, mo)	UATOMIC_COMPAT(dec(addr))
    633  1.1  christos #define cmm_smp_mb__before_uatomic_dec()	cmm_barrier()
    634  1.1  christos #define cmm_smp_mb__after_uatomic_dec()		cmm_barrier()
    635  1.1  christos 
    636  1.1  christos 
    637  1.1  christos static inline void _cmm_compat_c11_smp_mb__before_uatomic_load_mo(enum cmm_memorder mo)
    638  1.1  christos {
    639  1.1  christos 	/*
    640  1.1  christos 	 * A SMP barrier is not necessary for CMM_SEQ_CST because, only a
    641  1.1  christos 	 * previous store can be reordered with the load.  However, emitting the
    642  1.1  christos 	 * memory barrier after the store is sufficient to prevent reordering
    643  1.1  christos 	 * between the two.  This follows toolchains decision of emitting the
    644  1.1  christos 	 * memory fence on the stores instead of the loads.
    645  1.1  christos 	 *
    646  1.1  christos 	 * A compiler barrier is necessary because the underlying operation does
    647  1.1  christos 	 * not clobber the registers.
    648  1.1  christos 	 */
    649  1.1  christos 	switch (mo) {
    650  1.1  christos 	case CMM_RELAXED:	/* Fall-through */
    651  1.1  christos 	case CMM_ACQUIRE:	/* Fall-through */
    652  1.1  christos 	case CMM_CONSUME:	/* Fall-through */
    653  1.1  christos 	case CMM_SEQ_CST:	/* Fall-through */
    654  1.1  christos 	case CMM_SEQ_CST_FENCE:
    655  1.1  christos 		cmm_barrier();
    656  1.1  christos 		break;
    657  1.1  christos 	case CMM_ACQ_REL:	/* Fall-through */
    658  1.1  christos 	case CMM_RELEASE:	/* Fall-through */
    659  1.1  christos 	default:
    660  1.1  christos 		abort();
    661  1.1  christos 		break;
    662  1.1  christos 	}
    663  1.1  christos }
    664  1.1  christos 
    665  1.1  christos static inline void _cmm_compat_c11_smp_mb__after_uatomic_load_mo(enum cmm_memorder mo)
    666  1.1  christos {
    667  1.1  christos 	/*
    668  1.1  christos 	 * A SMP barrier is not necessary for CMM_SEQ_CST because following
    669  1.1  christos 	 * loads and stores cannot be reordered with the load.
    670  1.1  christos 	 *
    671  1.1  christos 	 * A SMP barrier is however necessary for CMM_SEQ_CST_FENCE to respect
    672  1.1  christos 	 * the memory model, since the underlying operation does not have a lock
    673  1.1  christos 	 * prefix.
    674  1.1  christos 	 *
    675  1.1  christos 	 * A compiler barrier is necessary because the underlying operation does
    676  1.1  christos 	 * not clobber the registers.
    677  1.1  christos 	 */
    678  1.1  christos 	switch (mo) {
    679  1.1  christos 	case CMM_SEQ_CST_FENCE:
    680  1.1  christos 		cmm_smp_mb();
    681  1.1  christos 		break;
    682  1.1  christos 	case CMM_RELAXED:	/* Fall-through */
    683  1.1  christos 	case CMM_ACQUIRE:	/* Fall-through */
    684  1.1  christos 	case CMM_CONSUME:	/* Fall-through */
    685  1.1  christos 	case CMM_SEQ_CST:
    686  1.1  christos 		cmm_barrier();
    687  1.1  christos 		break;
    688  1.1  christos 	case CMM_ACQ_REL:	/* Fall-through */
    689  1.1  christos 	case CMM_RELEASE:	/* Fall-through */
    690  1.1  christos 	default:
    691  1.1  christos 		abort();
    692  1.1  christos 		break;
    693  1.1  christos 	}
    694  1.1  christos }
    695  1.1  christos 
    696  1.1  christos static inline void _cmm_compat_c11_smp_mb__before_uatomic_store_mo(enum cmm_memorder mo)
    697  1.1  christos {
    698  1.1  christos 	/*
    699  1.1  christos 	 * A SMP barrier is not necessary for CMM_SEQ_CST because the store can
    700  1.1  christos 	 * only be reodered with later loads
    701  1.1  christos 	 *
    702  1.1  christos 	 * A compiler barrier is necessary because the underlying operation does
    703  1.1  christos 	 * not clobber the registers.
    704  1.1  christos 	 */
    705  1.1  christos 	switch (mo) {
    706  1.1  christos 	case CMM_RELAXED:	/* Fall-through */
    707  1.1  christos 	case CMM_RELEASE:	/* Fall-through */
    708  1.1  christos 	case CMM_SEQ_CST:	/* Fall-through */
    709  1.1  christos 	case CMM_SEQ_CST_FENCE:
    710  1.1  christos 		cmm_barrier();
    711  1.1  christos 		break;
    712  1.1  christos 	case CMM_ACQ_REL:	/* Fall-through */
    713  1.1  christos 	case CMM_ACQUIRE:	/* Fall-through */
    714  1.1  christos 	case CMM_CONSUME:	/* Fall-through */
    715  1.1  christos 	default:
    716  1.1  christos 		abort();
    717  1.1  christos 		break;
    718  1.1  christos 	}
    719  1.1  christos }
    720  1.1  christos 
    721  1.1  christos static inline void _cmm_compat_c11_smp_mb__after_uatomic_store_mo(enum cmm_memorder mo)
    722  1.1  christos {
    723  1.1  christos 	/*
    724  1.1  christos 	 * A SMP barrier is necessary for CMM_SEQ_CST because the store can be
    725  1.1  christos 	 * reorded with later loads.  Since no memory barrier is being emitted
    726  1.1  christos 	 * before loads, one has to be emitted after the store.  This follows
    727  1.1  christos 	 * toolchains decision of emitting the memory fence on the stores instead
    728  1.1  christos 	 * of the loads.
    729  1.1  christos 	 *
    730  1.1  christos 	 * A SMP barrier is necessary for CMM_SEQ_CST_FENCE to respect the
    731  1.1  christos 	 * memory model, since the underlying store does not have a lock prefix.
    732  1.1  christos 	 *
    733  1.1  christos 	 * A compiler barrier is necessary because the underlying operation does
    734  1.1  christos 	 * not clobber the registers.
    735  1.1  christos 	 */
    736  1.1  christos 	switch (mo) {
    737  1.1  christos 	case CMM_SEQ_CST:	/* Fall-through */
    738  1.1  christos 	case CMM_SEQ_CST_FENCE:
    739  1.1  christos 		cmm_smp_mb();
    740  1.1  christos 		break;
    741  1.1  christos 	case CMM_RELAXED:	/* Fall-through */
    742  1.1  christos 	case CMM_RELEASE:
    743  1.1  christos 		cmm_barrier();
    744  1.1  christos 		break;
    745  1.1  christos 	case CMM_ACQ_REL:	/* Fall-through */
    746  1.1  christos 	case CMM_ACQUIRE:	/* Fall-through */
    747  1.1  christos 	case CMM_CONSUME:	/* Fall-through */
    748  1.1  christos 	default:
    749  1.1  christos 		abort();
    750  1.1  christos 		break;
    751  1.1  christos 	}
    752  1.1  christos }
    753  1.1  christos 
    754  1.1  christos #define _cmm_compat_c11_smp_mb__before_mo(operation, mo)		\
    755  1.1  christos 	do {								\
    756  1.1  christos 		_cmm_compat_c11_smp_mb__before_ ## operation ## _mo (mo); \
    757  1.1  christos 	} while (0)
    758  1.1  christos 
    759  1.1  christos #define _cmm_compat_c11_smp_mb__after_mo(operation, mo)			\
    760  1.1  christos 	do {								\
    761  1.1  christos 		_cmm_compat_c11_smp_mb__after_ ## operation ## _mo (mo); \
    762  1.1  christos 	} while (0)
    763  1.1  christos 
    764  1.1  christos 
    765  1.1  christos #ifdef __cplusplus
    766  1.1  christos }
    767  1.1  christos #endif
    768  1.1  christos 
    769  1.1  christos #include <urcu/uatomic/generic.h>
    770  1.1  christos 
    771  1.1  christos #endif /* _URCU_ARCH_UATOMIC_X86_H */
    772