atomic.S revision 1.30 1 /* $NetBSD: atomic.S,v 1.30 2024/07/16 22:44:38 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/param.h>
33 #include <machine/asm.h>
34
35 #ifdef _KERNEL
36 #define ALIAS(f, t) STRONG_ALIAS(f,t)
37 #else
38 #define ALIAS(f, t) WEAK_ALIAS(f,t)
39 #endif
40
41 .text
42
43 /* 32-bit */
44
45 ENTRY(_atomic_add_32)
46 LOCK
47 addl %esi, (%rdi)
48 ret
49 END(_atomic_add_32)
50
51 ENTRY(_atomic_add_32_nv)
52 movl %esi, %eax
53 LOCK
54 xaddl %eax, (%rdi)
55 addl %esi, %eax
56 ret
57 END(_atomic_add_32_nv)
58
59 ENTRY(_atomic_and_32)
60 LOCK
61 andl %esi, (%rdi)
62 ret
63 END(_atomic_and_32)
64
65 ENTRY(_atomic_and_32_nv)
66 movl (%rdi), %eax
67 1:
68 movl %eax, %ecx
69 andl %esi, %ecx
70 LOCK
71 cmpxchgl %ecx, (%rdi)
72 jnz 1b
73 movl %ecx, %eax
74 ret
75 END(_atomic_and_32_nv)
76
77 ENTRY(_atomic_dec_32)
78 LOCK
79 decl (%rdi)
80 ret
81 END(_atomic_dec_32)
82
83 ENTRY(_atomic_dec_32_nv)
84 movl $-1, %eax
85 LOCK
86 xaddl %eax, (%rdi)
87 decl %eax
88 ret
89 END(_atomic_dec_32_nv)
90
91 ENTRY(_atomic_inc_32)
92 LOCK
93 incl (%rdi)
94 ret
95 END(_atomic_inc_32)
96
97 ENTRY(_atomic_inc_32_nv)
98 movl $1, %eax
99 LOCK
100 xaddl %eax, (%rdi)
101 incl %eax
102 ret
103 END(_atomic_inc_32_nv)
104
105 ENTRY(_atomic_or_32)
106 LOCK
107 orl %esi, (%rdi)
108 ret
109 END(_atomic_or_32)
110
111 ENTRY(_atomic_or_32_nv)
112 movl (%rdi), %eax
113 1:
114 movl %eax, %ecx
115 orl %esi, %ecx
116 LOCK
117 cmpxchgl %ecx, (%rdi)
118 jnz 1b
119 movl %ecx, %eax
120 ret
121 END(_atomic_or_32_nv)
122
123 ENTRY(_atomic_swap_32)
124 movl %esi, %eax
125 xchgl %eax, (%rdi)
126 ret
127 END(_atomic_swap_32)
128
129 ENTRY(_atomic_cas_32)
130 movl %esi, %eax
131 LOCK
132 cmpxchgl %edx, (%rdi)
133 /* %eax now contains the old value */
134 ret
135 END(_atomic_cas_32)
136
137 ENTRY(_atomic_cas_32_ni)
138 movl %esi, %eax
139 cmpxchgl %edx, (%rdi)
140 /* %eax now contains the old value */
141 ret
142 END(_atomic_cas_32_ni)
143
144 /* 64-bit */
145
146 ENTRY(_atomic_add_64)
147 LOCK
148 addq %rsi, (%rdi)
149 ret
150 END(_atomic_add_64)
151
152 ENTRY(_atomic_add_64_nv)
153 movq %rsi, %rax
154 LOCK
155 xaddq %rax, (%rdi)
156 addq %rsi, %rax
157 ret
158 END(_atomic_add_64_nv)
159
160 ENTRY(_atomic_and_64)
161 LOCK
162 andq %rsi, (%rdi)
163 ret
164 END(_atomic_and_64)
165
166 ENTRY(_atomic_and_64_nv)
167 movq (%rdi), %rax
168 1:
169 movq %rax, %rcx
170 andq %rsi, %rcx
171 LOCK
172 cmpxchgq %rcx, (%rdi)
173 jnz 1b
174 movq %rcx, %rax
175 ret
176 END(_atomic_and_64_nv)
177
178 ENTRY(_atomic_dec_64)
179 LOCK
180 decq (%rdi)
181 ret
182 END(_atomic_dec_64)
183
184 ENTRY(_atomic_dec_64_nv)
185 movq $-1, %rax
186 LOCK
187 xaddq %rax, (%rdi)
188 decq %rax
189 ret
190 END(_atomic_dec_64_nv)
191
192 ENTRY(_atomic_inc_64)
193 LOCK
194 incq (%rdi)
195 ret
196 END(_atomic_inc_64)
197
198 ENTRY(_atomic_inc_64_nv)
199 movq $1, %rax
200 LOCK
201 xaddq %rax, (%rdi)
202 incq %rax
203 ret
204 END(_atomic_inc_64_nv)
205
206 ENTRY(_atomic_or_64)
207 LOCK
208 orq %rsi, (%rdi)
209 ret
210 END(_atomic_or_64)
211
212 ENTRY(_atomic_or_64_nv)
213 movq (%rdi), %rax
214 1:
215 movq %rax, %rcx
216 orq %rsi, %rcx
217 LOCK
218 cmpxchgq %rcx, (%rdi)
219 jnz 1b
220 movq %rcx, %rax
221 ret
222 END(_atomic_or_64_nv)
223
224 ENTRY(_atomic_swap_64)
225 movq %rsi, %rax
226 xchgq %rax, (%rdi)
227 ret
228 END(_atomic_swap_64)
229
230 ENTRY(_atomic_cas_64)
231 movq %rsi, %rax
232 LOCK
233 cmpxchgq %rdx, (%rdi)
234 /* %eax now contains the old value */
235 ret
236 END(_atomic_cas_64)
237
238 ENTRY(_atomic_cas_64_ni)
239 movq %rsi, %rax
240 cmpxchgq %rdx, (%rdi)
241 /* %eax now contains the old value */
242 ret
243 END(_atomic_cas_64_ni)
244
245 /* memory barriers */
246
247 ENTRY(_membar_acquire)
248 /*
249 * Every load from normal memory is a load-acquire on x86, so
250 * there is never any need for explicit barriers to order
251 * load-before-anything.
252 */
253 ret
254 END(_membar_acquire)
255
256 ENTRY(_membar_release)
257 /*
258 * Every store to normal memory is a store-release on x86, so
259 * there is never any need for explicit barriers to order
260 * anything-before-store.
261 */
262 ret
263 END(_membar_release)
264
265 ENTRY(_membar_sync)
266 /*
267 * MFENCE, or a serializing instruction like a locked ADDQ,
268 * is necessary to order store-before-load. Every other
269 * ordering -- load-before-anything, anything-before-store --
270 * is already guaranteed without explicit barriers.
271 *
272 * Empirically it turns out locked ADDQ is cheaper than MFENCE,
273 * so we use that, with an offset below the return address on
274 * the stack to avoid a false dependency with RET. (It might
275 * even be better to use a much lower offset, say -128, to
276 * avoid false dependencies for subsequent callees of the
277 * caller.)
278 *
279 * https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/
280 * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
281 * https://www.agner.org/optimize/instruction_tables.pdf
282 *
283 * Sync with xen_mb in sys/arch/amd64/amd64/cpufunc.S.
284 */
285 LOCK
286 addq $0, -8(%rsp)
287 ret
288 END(_membar_sync)
289
290 ALIAS(atomic_add_32,_atomic_add_32)
291 ALIAS(atomic_add_64,_atomic_add_64)
292 ALIAS(atomic_add_int,_atomic_add_32)
293 ALIAS(atomic_add_long,_atomic_add_64)
294 ALIAS(atomic_add_ptr,_atomic_add_64)
295
296 ALIAS(atomic_add_32_nv,_atomic_add_32_nv)
297 ALIAS(atomic_add_64_nv,_atomic_add_64_nv)
298 ALIAS(atomic_add_int_nv,_atomic_add_32_nv)
299 ALIAS(atomic_add_long_nv,_atomic_add_64_nv)
300 ALIAS(atomic_add_ptr_nv,_atomic_add_64_nv)
301
302 ALIAS(atomic_and_32,_atomic_and_32)
303 ALIAS(atomic_and_64,_atomic_and_64)
304 ALIAS(atomic_and_uint,_atomic_and_32)
305 ALIAS(atomic_and_ulong,_atomic_and_64)
306 ALIAS(atomic_and_ptr,_atomic_and_64)
307
308 ALIAS(atomic_and_32_nv,_atomic_and_32_nv)
309 ALIAS(atomic_and_64_nv,_atomic_and_64_nv)
310 ALIAS(atomic_and_uint_nv,_atomic_and_32_nv)
311 ALIAS(atomic_and_ulong_nv,_atomic_and_64_nv)
312 ALIAS(atomic_and_ptr_nv,_atomic_and_64_nv)
313
314 ALIAS(atomic_dec_32,_atomic_dec_32)
315 ALIAS(atomic_dec_64,_atomic_dec_64)
316 ALIAS(atomic_dec_uint,_atomic_dec_32)
317 ALIAS(atomic_dec_ulong,_atomic_dec_64)
318 ALIAS(atomic_dec_ptr,_atomic_dec_64)
319
320 ALIAS(atomic_dec_32_nv,_atomic_dec_32_nv)
321 ALIAS(atomic_dec_64_nv,_atomic_dec_64_nv)
322 ALIAS(atomic_dec_uint_nv,_atomic_dec_32_nv)
323 ALIAS(atomic_dec_ulong_nv,_atomic_dec_64_nv)
324 ALIAS(atomic_dec_ptr_nv,_atomic_dec_64_nv)
325
326 ALIAS(atomic_inc_32,_atomic_inc_32)
327 ALIAS(atomic_inc_64,_atomic_inc_64)
328 ALIAS(atomic_inc_uint,_atomic_inc_32)
329 ALIAS(atomic_inc_ulong,_atomic_inc_64)
330 ALIAS(atomic_inc_ptr,_atomic_inc_64)
331
332 ALIAS(atomic_inc_32_nv,_atomic_inc_32_nv)
333 ALIAS(atomic_inc_64_nv,_atomic_inc_64_nv)
334 ALIAS(atomic_inc_uint_nv,_atomic_inc_32_nv)
335 ALIAS(atomic_inc_ulong_nv,_atomic_inc_64_nv)
336 ALIAS(atomic_inc_ptr_nv,_atomic_inc_64_nv)
337
338 ALIAS(atomic_or_32,_atomic_or_32)
339 ALIAS(atomic_or_64,_atomic_or_64)
340 ALIAS(atomic_or_uint,_atomic_or_32)
341 ALIAS(atomic_or_ulong,_atomic_or_64)
342 ALIAS(atomic_or_ptr,_atomic_or_64)
343
344 ALIAS(atomic_or_32_nv,_atomic_or_32_nv)
345 ALIAS(atomic_or_64_nv,_atomic_or_64_nv)
346 ALIAS(atomic_or_uint_nv,_atomic_or_32_nv)
347 ALIAS(atomic_or_ulong_nv,_atomic_or_64_nv)
348 ALIAS(atomic_or_ptr_nv,_atomic_or_64_nv)
349
350 ALIAS(atomic_swap_32,_atomic_swap_32)
351 ALIAS(atomic_swap_64,_atomic_swap_64)
352 ALIAS(atomic_swap_uint,_atomic_swap_32)
353 ALIAS(atomic_swap_ulong,_atomic_swap_64)
354 ALIAS(atomic_swap_ptr,_atomic_swap_64)
355
356 ALIAS(atomic_cas_32,_atomic_cas_32)
357 ALIAS(atomic_cas_64,_atomic_cas_64)
358 ALIAS(atomic_cas_uint,_atomic_cas_32)
359 ALIAS(atomic_cas_ulong,_atomic_cas_64)
360 ALIAS(atomic_cas_ptr,_atomic_cas_64)
361
362 ALIAS(atomic_cas_32_ni,_atomic_cas_32_ni)
363 ALIAS(atomic_cas_64_ni,_atomic_cas_64_ni)
364 ALIAS(atomic_cas_uint_ni,_atomic_cas_32_ni)
365 ALIAS(atomic_cas_ulong_ni,_atomic_cas_64_ni)
366 ALIAS(atomic_cas_ptr_ni,_atomic_cas_64_ni)
367
368 ALIAS(membar_acquire,_membar_acquire)
369 ALIAS(membar_release,_membar_release)
370 ALIAS(membar_sync,_membar_sync)
371
372 ALIAS(membar_consumer,_membar_acquire)
373 ALIAS(membar_producer,_membar_release)
374 ALIAS(membar_enter,_membar_sync)
375 ALIAS(membar_exit,_membar_release)
376 ALIAS(membar_sync,_membar_sync)
377
378 STRONG_ALIAS(_atomic_add_int,_atomic_add_32)
379 STRONG_ALIAS(_atomic_add_long,_atomic_add_64)
380 STRONG_ALIAS(_atomic_add_ptr,_atomic_add_64)
381
382 STRONG_ALIAS(_atomic_add_int_nv,_atomic_add_32_nv)
383 STRONG_ALIAS(_atomic_add_long_nv,_atomic_add_64_nv)
384 STRONG_ALIAS(_atomic_add_ptr_nv,_atomic_add_64_nv)
385
386 STRONG_ALIAS(_atomic_and_uint,_atomic_and_32)
387 STRONG_ALIAS(_atomic_and_ulong,_atomic_and_64)
388 STRONG_ALIAS(_atomic_and_ptr,_atomic_and_64)
389
390 STRONG_ALIAS(_atomic_and_uint_nv,_atomic_and_32_nv)
391 STRONG_ALIAS(_atomic_and_ulong_nv,_atomic_and_64_nv)
392 STRONG_ALIAS(_atomic_and_ptr_nv,_atomic_and_64_nv)
393
394 STRONG_ALIAS(_atomic_dec_uint,_atomic_dec_32)
395 STRONG_ALIAS(_atomic_dec_ulong,_atomic_dec_64)
396 STRONG_ALIAS(_atomic_dec_ptr,_atomic_dec_64)
397
398 STRONG_ALIAS(_atomic_dec_uint_nv,_atomic_dec_32_nv)
399 STRONG_ALIAS(_atomic_dec_ulong_nv,_atomic_dec_64_nv)
400 STRONG_ALIAS(_atomic_dec_ptr_nv,_atomic_dec_64_nv)
401
402 STRONG_ALIAS(_atomic_inc_uint,_atomic_inc_32)
403 STRONG_ALIAS(_atomic_inc_ulong,_atomic_inc_64)
404 STRONG_ALIAS(_atomic_inc_ptr,_atomic_inc_64)
405
406 STRONG_ALIAS(_atomic_inc_uint_nv,_atomic_inc_32_nv)
407 STRONG_ALIAS(_atomic_inc_ulong_nv,_atomic_inc_64_nv)
408 STRONG_ALIAS(_atomic_inc_ptr_nv,_atomic_inc_64_nv)
409
410 STRONG_ALIAS(_atomic_or_uint,_atomic_or_32)
411 STRONG_ALIAS(_atomic_or_ulong,_atomic_or_64)
412 STRONG_ALIAS(_atomic_or_ptr,_atomic_or_64)
413
414 STRONG_ALIAS(_atomic_or_uint_nv,_atomic_or_32_nv)
415 STRONG_ALIAS(_atomic_or_ulong_nv,_atomic_or_64_nv)
416 STRONG_ALIAS(_atomic_or_ptr_nv,_atomic_or_64_nv)
417
418 STRONG_ALIAS(_atomic_swap_uint,_atomic_swap_32)
419 STRONG_ALIAS(_atomic_swap_ulong,_atomic_swap_64)
420 STRONG_ALIAS(_atomic_swap_ptr,_atomic_swap_64)
421
422 STRONG_ALIAS(_atomic_cas_uint,_atomic_cas_32)
423 STRONG_ALIAS(_atomic_cas_ulong,_atomic_cas_64)
424 STRONG_ALIAS(_atomic_cas_ptr,_atomic_cas_64)
425
426 STRONG_ALIAS(_atomic_cas_uint_ni,_atomic_cas_32_ni)
427 STRONG_ALIAS(_atomic_cas_ulong_ni,_atomic_cas_64_ni)
428 STRONG_ALIAS(_atomic_cas_ptr_ni,_atomic_cas_64_ni)
429
430 STRONG_ALIAS(_membar_consumer,_membar_acquire)
431 STRONG_ALIAS(_membar_producer,_membar_release)
432 STRONG_ALIAS(_membar_enter,_membar_sync)
433 STRONG_ALIAS(_membar_exit,_membar_release)
434