1b8e80941Smrg/**
2b8e80941Smrg * Many similar implementations exist. See for example libwsbm
3b8e80941Smrg * or the linux kernel include/atomic.h
4b8e80941Smrg *
5b8e80941Smrg * No copyright claimed on this file.
6b8e80941Smrg *
7b8e80941Smrg */
8b8e80941Smrg
9b8e80941Smrg#include "no_extern_c.h"
10b8e80941Smrg
11b8e80941Smrg#ifndef U_ATOMIC_H
12b8e80941Smrg#define U_ATOMIC_H
13b8e80941Smrg
14b8e80941Smrg#include <stdbool.h>
15b8e80941Smrg#include <stdint.h>
16b8e80941Smrg
17b8e80941Smrg/* Favor OS-provided implementations.
18b8e80941Smrg *
19b8e80941Smrg * Where no OS-provided implementation is available, fall back to
20b8e80941Smrg * locally coded assembly, compiler intrinsic or ultimately a
21b8e80941Smrg * mutex-based implementation.
22b8e80941Smrg */
23b8e80941Smrg#if defined(__sun)
24b8e80941Smrg#define PIPE_ATOMIC_OS_SOLARIS
25b8e80941Smrg#elif defined(_MSC_VER)
26b8e80941Smrg#define PIPE_ATOMIC_MSVC_INTRINSIC
27d4248a18Schristos#elif defined(__GNUC__) || defined(__lint__)
28b8e80941Smrg#define PIPE_ATOMIC_GCC_INTRINSIC
29b8e80941Smrg#else
30b8e80941Smrg#error "Unsupported platform"
31b8e80941Smrg#endif
32b8e80941Smrg
33b8e80941Smrg
34b8e80941Smrg/* Implementation using GCC-provided synchronization intrinsics
35b8e80941Smrg */
36b8e80941Smrg#if defined(PIPE_ATOMIC_GCC_INTRINSIC)
37b8e80941Smrg
38b8e80941Smrg#define PIPE_ATOMIC "GCC Sync Intrinsics"
39b8e80941Smrg
40b8e80941Smrg#if defined(USE_GCC_ATOMIC_BUILTINS)
41b8e80941Smrg
42b8e80941Smrg/* The builtins with explicit memory model are available since GCC 4.7. */
43b8e80941Smrg#define p_atomic_set(_v, _i) __atomic_store_n((_v), (_i), __ATOMIC_RELEASE)
44b8e80941Smrg#define p_atomic_read(_v) __atomic_load_n((_v), __ATOMIC_ACQUIRE)
45b8e80941Smrg#define p_atomic_dec_zero(v) (__atomic_sub_fetch((v), 1, __ATOMIC_ACQ_REL) == 0)
46b8e80941Smrg#define p_atomic_inc(v) (void) __atomic_add_fetch((v), 1, __ATOMIC_ACQ_REL)
47b8e80941Smrg#define p_atomic_dec(v) (void) __atomic_sub_fetch((v), 1, __ATOMIC_ACQ_REL)
48b8e80941Smrg#define p_atomic_add(v, i) (void) __atomic_add_fetch((v), (i), __ATOMIC_ACQ_REL)
49b8e80941Smrg#define p_atomic_inc_return(v) __atomic_add_fetch((v), 1, __ATOMIC_ACQ_REL)
50b8e80941Smrg#define p_atomic_dec_return(v) __atomic_sub_fetch((v), 1, __ATOMIC_ACQ_REL)
51b8e80941Smrg#define p_atomic_xchg(v, i) __atomic_exchange_n((v), (i), __ATOMIC_ACQ_REL)
52b8e80941Smrg#define PIPE_NATIVE_ATOMIC_XCHG
53b8e80941Smrg
54b8e80941Smrg#else
55b8e80941Smrg
56b8e80941Smrg#define p_atomic_set(_v, _i) (*(_v) = (_i))
57b8e80941Smrg#define p_atomic_read(_v) (*(_v))
58b8e80941Smrg#define p_atomic_dec_zero(v) (__sync_sub_and_fetch((v), 1) == 0)
59b8e80941Smrg#define p_atomic_inc(v) (void) __sync_add_and_fetch((v), 1)
60b8e80941Smrg#define p_atomic_dec(v) (void) __sync_sub_and_fetch((v), 1)
61b8e80941Smrg#define p_atomic_add(v, i) (void) __sync_add_and_fetch((v), (i))
62b8e80941Smrg#define p_atomic_inc_return(v) __sync_add_and_fetch((v), 1)
63b8e80941Smrg#define p_atomic_dec_return(v) __sync_sub_and_fetch((v), 1)
64b8e80941Smrg
65b8e80941Smrg#endif
66b8e80941Smrg
67b8e80941Smrg/* There is no __atomic_* compare and exchange that returns the current value.
68b8e80941Smrg * Also, GCC 5.4 seems unable to optimize a compound statement expression that
69b8e80941Smrg * uses an additional stack variable with __atomic_compare_exchange[_n].
70b8e80941Smrg */
71b8e80941Smrg#define p_atomic_cmpxchg(v, old, _new) \
72b8e80941Smrg   __sync_val_compare_and_swap((v), (old), (_new))
73b8e80941Smrg
74b8e80941Smrg#endif
75b8e80941Smrg
76b8e80941Smrg
77b8e80941Smrg
78b8e80941Smrg/* Unlocked version for single threaded environments, such as some
79b8e80941Smrg * windows kernel modules.
80b8e80941Smrg */
81b8e80941Smrg#if defined(PIPE_ATOMIC_OS_UNLOCKED)
82b8e80941Smrg
83b8e80941Smrg#define PIPE_ATOMIC "Unlocked"
84b8e80941Smrg
85b8e80941Smrg#define p_atomic_set(_v, _i) (*(_v) = (_i))
86b8e80941Smrg#define p_atomic_read(_v) (*(_v))
87b8e80941Smrg#define p_atomic_dec_zero(_v) (p_atomic_dec_return(_v) == 0)
88b8e80941Smrg#define p_atomic_inc(_v) ((void) p_atomic_inc_return(_v))
89b8e80941Smrg#define p_atomic_dec(_v) ((void) p_atomic_dec_return(_v))
90b8e80941Smrg#define p_atomic_add(_v, _i) (*(_v) = *(_v) + (_i))
91b8e80941Smrg#define p_atomic_inc_return(_v) (++(*(_v)))
92b8e80941Smrg#define p_atomic_dec_return(_v) (--(*(_v)))
93b8e80941Smrg#define p_atomic_cmpxchg(_v, _old, _new) (*(_v) == (_old) ? (*(_v) = (_new), (_old)) : *(_v))
94b8e80941Smrg
95b8e80941Smrg#endif
96b8e80941Smrg
97b8e80941Smrg
98b8e80941Smrg#if defined(PIPE_ATOMIC_MSVC_INTRINSIC)
99b8e80941Smrg
100b8e80941Smrg#define PIPE_ATOMIC "MSVC Intrinsics"
101b8e80941Smrg
102b8e80941Smrg/* We use the Windows header's Interlocked*64 functions instead of the
103b8e80941Smrg * _Interlocked*64 intrinsics wherever we can, as support for the latter varies
104b8e80941Smrg * with target CPU, whereas Windows headers take care of all portability
105b8e80941Smrg * issues: using intrinsics where available, falling back to library
106b8e80941Smrg * implementations where not.
107b8e80941Smrg */
108b8e80941Smrg#ifndef WIN32_LEAN_AND_MEAN
109b8e80941Smrg#define WIN32_LEAN_AND_MEAN 1
110b8e80941Smrg#endif
111b8e80941Smrg#include <windows.h>
112b8e80941Smrg#include <intrin.h>
113b8e80941Smrg#include <assert.h>
114b8e80941Smrg
115b8e80941Smrg/* MSVC supports decltype keyword, but it's only supported on C++ and doesn't
116b8e80941Smrg * quite work here; and if a C++-only solution is worthwhile, then it would be
117b8e80941Smrg * better to use templates / function overloading, instead of decltype magic.
118b8e80941Smrg * Therefore, we rely on implicit casting to LONGLONG for the functions that return
119b8e80941Smrg */
120b8e80941Smrg
121b8e80941Smrg#define p_atomic_set(_v, _i) (*(_v) = (_i))
122b8e80941Smrg#define p_atomic_read(_v) (*(_v))
123b8e80941Smrg
124b8e80941Smrg#define p_atomic_dec_zero(_v) \
125b8e80941Smrg   (p_atomic_dec_return(_v) == 0)
126b8e80941Smrg
127b8e80941Smrg#define p_atomic_inc(_v) \
128b8e80941Smrg   ((void) p_atomic_inc_return(_v))
129b8e80941Smrg
130b8e80941Smrg#define p_atomic_inc_return(_v) (\
131b8e80941Smrg   sizeof *(_v) == sizeof(short)   ? _InterlockedIncrement16((short *)  (_v)) : \
132b8e80941Smrg   sizeof *(_v) == sizeof(long)    ? _InterlockedIncrement  ((long *)   (_v)) : \
133b8e80941Smrg   sizeof *(_v) == sizeof(__int64) ? InterlockedIncrement64 ((__int64 *)(_v)) : \
134b8e80941Smrg                                     (assert(!"should not get here"), 0))
135b8e80941Smrg
136b8e80941Smrg#define p_atomic_dec(_v) \
137b8e80941Smrg   ((void) p_atomic_dec_return(_v))
138b8e80941Smrg
139b8e80941Smrg#define p_atomic_dec_return(_v) (\
140b8e80941Smrg   sizeof *(_v) == sizeof(short)   ? _InterlockedDecrement16((short *)  (_v)) : \
141b8e80941Smrg   sizeof *(_v) == sizeof(long)    ? _InterlockedDecrement  ((long *)   (_v)) : \
142b8e80941Smrg   sizeof *(_v) == sizeof(__int64) ? InterlockedDecrement64 ((__int64 *)(_v)) : \
143b8e80941Smrg                                     (assert(!"should not get here"), 0))
144b8e80941Smrg
145b8e80941Smrg#define p_atomic_add(_v, _i) (\
146b8e80941Smrg   sizeof *(_v) == sizeof(char)    ? _InterlockedExchangeAdd8 ((char *)   (_v), (_i)) : \
147b8e80941Smrg   sizeof *(_v) == sizeof(short)   ? _InterlockedExchangeAdd16((short *)  (_v), (_i)) : \
148b8e80941Smrg   sizeof *(_v) == sizeof(long)    ? _InterlockedExchangeAdd  ((long *)   (_v), (_i)) : \
149b8e80941Smrg   sizeof *(_v) == sizeof(__int64) ? InterlockedExchangeAdd64((__int64 *)(_v), (_i)) : \
150b8e80941Smrg                                     (assert(!"should not get here"), 0))
151b8e80941Smrg
152b8e80941Smrg#define p_atomic_cmpxchg(_v, _old, _new) (\
153b8e80941Smrg   sizeof *(_v) == sizeof(char)    ? _InterlockedCompareExchange8 ((char *)   (_v), (char)   (_new), (char)   (_old)) : \
154b8e80941Smrg   sizeof *(_v) == sizeof(short)   ? _InterlockedCompareExchange16((short *)  (_v), (short)  (_new), (short)  (_old)) : \
155b8e80941Smrg   sizeof *(_v) == sizeof(long)    ? _InterlockedCompareExchange  ((long *)   (_v), (long)   (_new), (long)   (_old)) : \
156b8e80941Smrg   sizeof *(_v) == sizeof(__int64) ? InterlockedCompareExchange64 ((__int64 *)(_v), (__int64)(_new), (__int64)(_old)) : \
157b8e80941Smrg                                     (assert(!"should not get here"), 0))
158b8e80941Smrg
159b8e80941Smrg#endif
160b8e80941Smrg
161b8e80941Smrg#if defined(PIPE_ATOMIC_OS_SOLARIS)
162b8e80941Smrg
163b8e80941Smrg#define PIPE_ATOMIC "Solaris OS atomic functions"
164b8e80941Smrg
165b8e80941Smrg#include <atomic.h>
166b8e80941Smrg#include <assert.h>
167b8e80941Smrg
168b8e80941Smrg#define p_atomic_set(_v, _i) (*(_v) = (_i))
169b8e80941Smrg#define p_atomic_read(_v) (*(_v))
170b8e80941Smrg
171b8e80941Smrg#define p_atomic_dec_zero(v) (\
172b8e80941Smrg   sizeof(*v) == sizeof(uint8_t)  ? atomic_dec_8_nv ((uint8_t  *)(v)) == 0 : \
173b8e80941Smrg   sizeof(*v) == sizeof(uint16_t) ? atomic_dec_16_nv((uint16_t *)(v)) == 0 : \
174b8e80941Smrg   sizeof(*v) == sizeof(uint32_t) ? atomic_dec_32_nv((uint32_t *)(v)) == 0 : \
175b8e80941Smrg   sizeof(*v) == sizeof(uint64_t) ? atomic_dec_64_nv((uint64_t *)(v)) == 0 : \
176b8e80941Smrg                                    (assert(!"should not get here"), 0))
177b8e80941Smrg
178b8e80941Smrg#define p_atomic_inc(v) (void) (\
179b8e80941Smrg   sizeof(*v) == sizeof(uint8_t)  ? atomic_inc_8 ((uint8_t  *)(v)) : \
180b8e80941Smrg   sizeof(*v) == sizeof(uint16_t) ? atomic_inc_16((uint16_t *)(v)) : \
181b8e80941Smrg   sizeof(*v) == sizeof(uint32_t) ? atomic_inc_32((uint32_t *)(v)) : \
182b8e80941Smrg   sizeof(*v) == sizeof(uint64_t) ? atomic_inc_64((uint64_t *)(v)) : \
183b8e80941Smrg                                    (assert(!"should not get here"), 0))
184b8e80941Smrg
185b8e80941Smrg#define p_atomic_inc_return(v) ((__typeof(*v)) \
186b8e80941Smrg   sizeof(*v) == sizeof(uint8_t)  ? atomic_inc_8_nv ((uint8_t  *)(v)) : \
187b8e80941Smrg   sizeof(*v) == sizeof(uint16_t) ? atomic_inc_16_nv((uint16_t *)(v)) : \
188b8e80941Smrg   sizeof(*v) == sizeof(uint32_t) ? atomic_inc_32_nv((uint32_t *)(v)) : \
189b8e80941Smrg   sizeof(*v) == sizeof(uint64_t) ? atomic_inc_64_nv((uint64_t *)(v)) : \
190b8e80941Smrg                                    (assert(!"should not get here"), 0))
191b8e80941Smrg
192b8e80941Smrg#define p_atomic_dec(v) ((void) \
193b8e80941Smrg   sizeof(*v) == sizeof(uint8_t)  ? atomic_dec_8 ((uint8_t  *)(v)) : \
194b8e80941Smrg   sizeof(*v) == sizeof(uint16_t) ? atomic_dec_16((uint16_t *)(v)) : \
195b8e80941Smrg   sizeof(*v) == sizeof(uint32_t) ? atomic_dec_32((uint32_t *)(v)) : \
196b8e80941Smrg   sizeof(*v) == sizeof(uint64_t) ? atomic_dec_64((uint64_t *)(v)) : \
197b8e80941Smrg                                    (assert(!"should not get here"), 0))
198b8e80941Smrg
199b8e80941Smrg#define p_atomic_dec_return(v) ((__typeof(*v)) \
200b8e80941Smrg   sizeof(*v) == sizeof(uint8_t)  ? atomic_dec_8_nv ((uint8_t  *)(v)) : \
201b8e80941Smrg   sizeof(*v) == sizeof(uint16_t) ? atomic_dec_16_nv((uint16_t *)(v)) : \
202b8e80941Smrg   sizeof(*v) == sizeof(uint32_t) ? atomic_dec_32_nv((uint32_t *)(v)) : \
203b8e80941Smrg   sizeof(*v) == sizeof(uint64_t) ? atomic_dec_64_nv((uint64_t *)(v)) : \
204b8e80941Smrg                                    (assert(!"should not get here"), 0))
205b8e80941Smrg
206b8e80941Smrg#define p_atomic_add(v, i) ((void)				     \
207b8e80941Smrg   sizeof(*v) == sizeof(uint8_t)  ? atomic_add_8 ((uint8_t  *)(v), (i)) : \
208b8e80941Smrg   sizeof(*v) == sizeof(uint16_t) ? atomic_add_16((uint16_t *)(v), (i)) : \
209b8e80941Smrg   sizeof(*v) == sizeof(uint32_t) ? atomic_add_32((uint32_t *)(v), (i)) : \
210b8e80941Smrg   sizeof(*v) == sizeof(uint64_t) ? atomic_add_64((uint64_t *)(v), (i)) : \
211b8e80941Smrg                                    (assert(!"should not get here"), 0))
212b8e80941Smrg
213b8e80941Smrg#define p_atomic_cmpxchg(v, old, _new) ((__typeof(*v)) \
214b8e80941Smrg   sizeof(*v) == sizeof(uint8_t)  ? atomic_cas_8 ((uint8_t  *)(v), (uint8_t )(old), (uint8_t )(_new)) : \
215b8e80941Smrg   sizeof(*v) == sizeof(uint16_t) ? atomic_cas_16((uint16_t *)(v), (uint16_t)(old), (uint16_t)(_new)) : \
216b8e80941Smrg   sizeof(*v) == sizeof(uint32_t) ? atomic_cas_32((uint32_t *)(v), (uint32_t)(old), (uint32_t)(_new)) : \
217b8e80941Smrg   sizeof(*v) == sizeof(uint64_t) ? atomic_cas_64((uint64_t *)(v), (uint64_t)(old), (uint64_t)(_new)) : \
218b8e80941Smrg                                    (assert(!"should not get here"), 0))
219b8e80941Smrg
220b8e80941Smrg#endif
221b8e80941Smrg
222b8e80941Smrg#ifndef PIPE_ATOMIC
223b8e80941Smrg#error "No pipe_atomic implementation selected"
224b8e80941Smrg#endif
225b8e80941Smrg
226b8e80941Smrg#ifndef PIPE_NATIVE_ATOMIC_XCHG
227b8e80941Smrgstatic inline uint32_t p_atomic_xchg_32(uint32_t *v, uint32_t i)
228b8e80941Smrg{
229b8e80941Smrg   uint32_t actual = p_atomic_read(v);
230b8e80941Smrg   uint32_t expected;
231b8e80941Smrg   do {
232b8e80941Smrg      expected = actual;
233b8e80941Smrg      actual = p_atomic_cmpxchg(v, expected, i);
234b8e80941Smrg   } while (expected != actual);
235b8e80941Smrg   return actual;
236b8e80941Smrg}
237b8e80941Smrg
238b8e80941Smrgstatic inline uint64_t p_atomic_xchg_64(uint64_t *v, uint64_t i)
239b8e80941Smrg{
240b8e80941Smrg   uint64_t actual = p_atomic_read(v);
241b8e80941Smrg   uint64_t expected;
242b8e80941Smrg   do {
243b8e80941Smrg      expected = actual;
244b8e80941Smrg      actual = p_atomic_cmpxchg(v, expected, i);
245b8e80941Smrg   } while (expected != actual);
246b8e80941Smrg   return actual;
247b8e80941Smrg}
248b8e80941Smrg
249b8e80941Smrg#define p_atomic_xchg(v, i) ((__typeof(*(v))) \
250b8e80941Smrg   sizeof(*(v)) == sizeof(uint32_t) ? p_atomic_xchg_32((uint32_t *)(v), (uint32_t)(i)) : \
251b8e80941Smrg   sizeof(*(v)) == sizeof(uint64_t) ? p_atomic_xchg_64((uint64_t *)(v), (uint64_t)(i)) : \
252b8e80941Smrg                                      (assert(!"should not get here"), 0))
253b8e80941Smrg#endif
254b8e80941Smrg
255b8e80941Smrg#endif /* U_ATOMIC_H */
256