atomic.S revision 1.31 1 /* $NetBSD: atomic.S,v 1.31 2024/07/16 22:45:10 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/param.h>
33 #include <machine/asm.h>
34
35 #ifdef _KERNEL
36 #define ALIAS(f, t) STRONG_ALIAS(f,t)
37 #else
38 #define ALIAS(f, t) WEAK_ALIAS(f,t)
39 #endif
40
41 #ifdef _HARDKERNEL
42 #include <machine/frameasm.h>
43 #define LOCK HOTPATCH(HP_NAME_NOLOCK, 1); lock
44 #else
45 #define LOCK lock
46 #endif
47
48 .text
49
50 /* 32-bit */
51
52 ENTRY(_atomic_add_32)
53 LOCK
54 addl %esi, (%rdi)
55 ret
56 END(_atomic_add_32)
57
58 ENTRY(_atomic_add_32_nv)
59 movl %esi, %eax
60 LOCK
61 xaddl %eax, (%rdi)
62 addl %esi, %eax
63 ret
64 END(_atomic_add_32_nv)
65
66 ENTRY(_atomic_and_32)
67 LOCK
68 andl %esi, (%rdi)
69 ret
70 END(_atomic_and_32)
71
72 ENTRY(_atomic_and_32_nv)
73 movl (%rdi), %eax
74 1:
75 movl %eax, %ecx
76 andl %esi, %ecx
77 LOCK
78 cmpxchgl %ecx, (%rdi)
79 jnz 1b
80 movl %ecx, %eax
81 ret
82 END(_atomic_and_32_nv)
83
84 ENTRY(_atomic_dec_32)
85 LOCK
86 decl (%rdi)
87 ret
88 END(_atomic_dec_32)
89
90 ENTRY(_atomic_dec_32_nv)
91 movl $-1, %eax
92 LOCK
93 xaddl %eax, (%rdi)
94 decl %eax
95 ret
96 END(_atomic_dec_32_nv)
97
98 ENTRY(_atomic_inc_32)
99 LOCK
100 incl (%rdi)
101 ret
102 END(_atomic_inc_32)
103
104 ENTRY(_atomic_inc_32_nv)
105 movl $1, %eax
106 LOCK
107 xaddl %eax, (%rdi)
108 incl %eax
109 ret
110 END(_atomic_inc_32_nv)
111
112 ENTRY(_atomic_or_32)
113 LOCK
114 orl %esi, (%rdi)
115 ret
116 END(_atomic_or_32)
117
118 ENTRY(_atomic_or_32_nv)
119 movl (%rdi), %eax
120 1:
121 movl %eax, %ecx
122 orl %esi, %ecx
123 LOCK
124 cmpxchgl %ecx, (%rdi)
125 jnz 1b
126 movl %ecx, %eax
127 ret
128 END(_atomic_or_32_nv)
129
130 ENTRY(_atomic_swap_32)
131 movl %esi, %eax
132 xchgl %eax, (%rdi)
133 ret
134 END(_atomic_swap_32)
135
136 ENTRY(_atomic_cas_32)
137 movl %esi, %eax
138 LOCK
139 cmpxchgl %edx, (%rdi)
140 /* %eax now contains the old value */
141 ret
142 END(_atomic_cas_32)
143
144 ENTRY(_atomic_cas_32_ni)
145 movl %esi, %eax
146 cmpxchgl %edx, (%rdi)
147 /* %eax now contains the old value */
148 ret
149 END(_atomic_cas_32_ni)
150
151 /* 64-bit */
152
153 ENTRY(_atomic_add_64)
154 LOCK
155 addq %rsi, (%rdi)
156 ret
157 END(_atomic_add_64)
158
159 ENTRY(_atomic_add_64_nv)
160 movq %rsi, %rax
161 LOCK
162 xaddq %rax, (%rdi)
163 addq %rsi, %rax
164 ret
165 END(_atomic_add_64_nv)
166
167 ENTRY(_atomic_and_64)
168 LOCK
169 andq %rsi, (%rdi)
170 ret
171 END(_atomic_and_64)
172
173 ENTRY(_atomic_and_64_nv)
174 movq (%rdi), %rax
175 1:
176 movq %rax, %rcx
177 andq %rsi, %rcx
178 LOCK
179 cmpxchgq %rcx, (%rdi)
180 jnz 1b
181 movq %rcx, %rax
182 ret
183 END(_atomic_and_64_nv)
184
185 ENTRY(_atomic_dec_64)
186 LOCK
187 decq (%rdi)
188 ret
189 END(_atomic_dec_64)
190
191 ENTRY(_atomic_dec_64_nv)
192 movq $-1, %rax
193 LOCK
194 xaddq %rax, (%rdi)
195 decq %rax
196 ret
197 END(_atomic_dec_64_nv)
198
199 ENTRY(_atomic_inc_64)
200 LOCK
201 incq (%rdi)
202 ret
203 END(_atomic_inc_64)
204
205 ENTRY(_atomic_inc_64_nv)
206 movq $1, %rax
207 LOCK
208 xaddq %rax, (%rdi)
209 incq %rax
210 ret
211 END(_atomic_inc_64_nv)
212
213 ENTRY(_atomic_or_64)
214 LOCK
215 orq %rsi, (%rdi)
216 ret
217 END(_atomic_or_64)
218
219 ENTRY(_atomic_or_64_nv)
220 movq (%rdi), %rax
221 1:
222 movq %rax, %rcx
223 orq %rsi, %rcx
224 LOCK
225 cmpxchgq %rcx, (%rdi)
226 jnz 1b
227 movq %rcx, %rax
228 ret
229 END(_atomic_or_64_nv)
230
231 ENTRY(_atomic_swap_64)
232 movq %rsi, %rax
233 xchgq %rax, (%rdi)
234 ret
235 END(_atomic_swap_64)
236
237 ENTRY(_atomic_cas_64)
238 movq %rsi, %rax
239 LOCK
240 cmpxchgq %rdx, (%rdi)
241 /* %eax now contains the old value */
242 ret
243 END(_atomic_cas_64)
244
245 ENTRY(_atomic_cas_64_ni)
246 movq %rsi, %rax
247 cmpxchgq %rdx, (%rdi)
248 /* %eax now contains the old value */
249 ret
250 END(_atomic_cas_64_ni)
251
252 /* memory barriers */
253
254 ENTRY(_membar_acquire)
255 /*
256 * Every load from normal memory is a load-acquire on x86, so
257 * there is never any need for explicit barriers to order
258 * load-before-anything.
259 */
260 ret
261 END(_membar_acquire)
262
263 ENTRY(_membar_release)
264 /*
265 * Every store to normal memory is a store-release on x86, so
266 * there is never any need for explicit barriers to order
267 * anything-before-store.
268 */
269 ret
270 END(_membar_release)
271
272 ENTRY(_membar_sync)
273 /*
274 * MFENCE, or a serializing instruction like a locked ADDQ,
275 * is necessary to order store-before-load. Every other
276 * ordering -- load-before-anything, anything-before-store --
277 * is already guaranteed without explicit barriers.
278 *
279 * Empirically it turns out locked ADDQ is cheaper than MFENCE,
280 * so we use that, with an offset below the return address on
281 * the stack to avoid a false dependency with RET. (It might
282 * even be better to use a much lower offset, say -128, to
283 * avoid false dependencies for subsequent callees of the
284 * caller.)
285 *
286 * https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/
287 * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
288 * https://www.agner.org/optimize/instruction_tables.pdf
289 *
290 * Sync with xen_mb in sys/arch/amd64/amd64/cpufunc.S.
291 */
292 LOCK
293 addq $0, -8(%rsp)
294 ret
295 END(_membar_sync)
296
297 ALIAS(atomic_add_32,_atomic_add_32)
298 ALIAS(atomic_add_64,_atomic_add_64)
299 ALIAS(atomic_add_int,_atomic_add_32)
300 ALIAS(atomic_add_long,_atomic_add_64)
301 ALIAS(atomic_add_ptr,_atomic_add_64)
302
303 ALIAS(atomic_add_32_nv,_atomic_add_32_nv)
304 ALIAS(atomic_add_64_nv,_atomic_add_64_nv)
305 ALIAS(atomic_add_int_nv,_atomic_add_32_nv)
306 ALIAS(atomic_add_long_nv,_atomic_add_64_nv)
307 ALIAS(atomic_add_ptr_nv,_atomic_add_64_nv)
308
309 ALIAS(atomic_and_32,_atomic_and_32)
310 ALIAS(atomic_and_64,_atomic_and_64)
311 ALIAS(atomic_and_uint,_atomic_and_32)
312 ALIAS(atomic_and_ulong,_atomic_and_64)
313 ALIAS(atomic_and_ptr,_atomic_and_64)
314
315 ALIAS(atomic_and_32_nv,_atomic_and_32_nv)
316 ALIAS(atomic_and_64_nv,_atomic_and_64_nv)
317 ALIAS(atomic_and_uint_nv,_atomic_and_32_nv)
318 ALIAS(atomic_and_ulong_nv,_atomic_and_64_nv)
319 ALIAS(atomic_and_ptr_nv,_atomic_and_64_nv)
320
321 ALIAS(atomic_dec_32,_atomic_dec_32)
322 ALIAS(atomic_dec_64,_atomic_dec_64)
323 ALIAS(atomic_dec_uint,_atomic_dec_32)
324 ALIAS(atomic_dec_ulong,_atomic_dec_64)
325 ALIAS(atomic_dec_ptr,_atomic_dec_64)
326
327 ALIAS(atomic_dec_32_nv,_atomic_dec_32_nv)
328 ALIAS(atomic_dec_64_nv,_atomic_dec_64_nv)
329 ALIAS(atomic_dec_uint_nv,_atomic_dec_32_nv)
330 ALIAS(atomic_dec_ulong_nv,_atomic_dec_64_nv)
331 ALIAS(atomic_dec_ptr_nv,_atomic_dec_64_nv)
332
333 ALIAS(atomic_inc_32,_atomic_inc_32)
334 ALIAS(atomic_inc_64,_atomic_inc_64)
335 ALIAS(atomic_inc_uint,_atomic_inc_32)
336 ALIAS(atomic_inc_ulong,_atomic_inc_64)
337 ALIAS(atomic_inc_ptr,_atomic_inc_64)
338
339 ALIAS(atomic_inc_32_nv,_atomic_inc_32_nv)
340 ALIAS(atomic_inc_64_nv,_atomic_inc_64_nv)
341 ALIAS(atomic_inc_uint_nv,_atomic_inc_32_nv)
342 ALIAS(atomic_inc_ulong_nv,_atomic_inc_64_nv)
343 ALIAS(atomic_inc_ptr_nv,_atomic_inc_64_nv)
344
345 ALIAS(atomic_or_32,_atomic_or_32)
346 ALIAS(atomic_or_64,_atomic_or_64)
347 ALIAS(atomic_or_uint,_atomic_or_32)
348 ALIAS(atomic_or_ulong,_atomic_or_64)
349 ALIAS(atomic_or_ptr,_atomic_or_64)
350
351 ALIAS(atomic_or_32_nv,_atomic_or_32_nv)
352 ALIAS(atomic_or_64_nv,_atomic_or_64_nv)
353 ALIAS(atomic_or_uint_nv,_atomic_or_32_nv)
354 ALIAS(atomic_or_ulong_nv,_atomic_or_64_nv)
355 ALIAS(atomic_or_ptr_nv,_atomic_or_64_nv)
356
357 ALIAS(atomic_swap_32,_atomic_swap_32)
358 ALIAS(atomic_swap_64,_atomic_swap_64)
359 ALIAS(atomic_swap_uint,_atomic_swap_32)
360 ALIAS(atomic_swap_ulong,_atomic_swap_64)
361 ALIAS(atomic_swap_ptr,_atomic_swap_64)
362
363 ALIAS(atomic_cas_32,_atomic_cas_32)
364 ALIAS(atomic_cas_64,_atomic_cas_64)
365 ALIAS(atomic_cas_uint,_atomic_cas_32)
366 ALIAS(atomic_cas_ulong,_atomic_cas_64)
367 ALIAS(atomic_cas_ptr,_atomic_cas_64)
368
369 ALIAS(atomic_cas_32_ni,_atomic_cas_32_ni)
370 ALIAS(atomic_cas_64_ni,_atomic_cas_64_ni)
371 ALIAS(atomic_cas_uint_ni,_atomic_cas_32_ni)
372 ALIAS(atomic_cas_ulong_ni,_atomic_cas_64_ni)
373 ALIAS(atomic_cas_ptr_ni,_atomic_cas_64_ni)
374
375 ALIAS(membar_acquire,_membar_acquire)
376 ALIAS(membar_release,_membar_release)
377 ALIAS(membar_sync,_membar_sync)
378
379 ALIAS(membar_consumer,_membar_acquire)
380 ALIAS(membar_producer,_membar_release)
381 ALIAS(membar_enter,_membar_sync)
382 ALIAS(membar_exit,_membar_release)
383 ALIAS(membar_sync,_membar_sync)
384
385 STRONG_ALIAS(_atomic_add_int,_atomic_add_32)
386 STRONG_ALIAS(_atomic_add_long,_atomic_add_64)
387 STRONG_ALIAS(_atomic_add_ptr,_atomic_add_64)
388
389 STRONG_ALIAS(_atomic_add_int_nv,_atomic_add_32_nv)
390 STRONG_ALIAS(_atomic_add_long_nv,_atomic_add_64_nv)
391 STRONG_ALIAS(_atomic_add_ptr_nv,_atomic_add_64_nv)
392
393 STRONG_ALIAS(_atomic_and_uint,_atomic_and_32)
394 STRONG_ALIAS(_atomic_and_ulong,_atomic_and_64)
395 STRONG_ALIAS(_atomic_and_ptr,_atomic_and_64)
396
397 STRONG_ALIAS(_atomic_and_uint_nv,_atomic_and_32_nv)
398 STRONG_ALIAS(_atomic_and_ulong_nv,_atomic_and_64_nv)
399 STRONG_ALIAS(_atomic_and_ptr_nv,_atomic_and_64_nv)
400
401 STRONG_ALIAS(_atomic_dec_uint,_atomic_dec_32)
402 STRONG_ALIAS(_atomic_dec_ulong,_atomic_dec_64)
403 STRONG_ALIAS(_atomic_dec_ptr,_atomic_dec_64)
404
405 STRONG_ALIAS(_atomic_dec_uint_nv,_atomic_dec_32_nv)
406 STRONG_ALIAS(_atomic_dec_ulong_nv,_atomic_dec_64_nv)
407 STRONG_ALIAS(_atomic_dec_ptr_nv,_atomic_dec_64_nv)
408
409 STRONG_ALIAS(_atomic_inc_uint,_atomic_inc_32)
410 STRONG_ALIAS(_atomic_inc_ulong,_atomic_inc_64)
411 STRONG_ALIAS(_atomic_inc_ptr,_atomic_inc_64)
412
413 STRONG_ALIAS(_atomic_inc_uint_nv,_atomic_inc_32_nv)
414 STRONG_ALIAS(_atomic_inc_ulong_nv,_atomic_inc_64_nv)
415 STRONG_ALIAS(_atomic_inc_ptr_nv,_atomic_inc_64_nv)
416
417 STRONG_ALIAS(_atomic_or_uint,_atomic_or_32)
418 STRONG_ALIAS(_atomic_or_ulong,_atomic_or_64)
419 STRONG_ALIAS(_atomic_or_ptr,_atomic_or_64)
420
421 STRONG_ALIAS(_atomic_or_uint_nv,_atomic_or_32_nv)
422 STRONG_ALIAS(_atomic_or_ulong_nv,_atomic_or_64_nv)
423 STRONG_ALIAS(_atomic_or_ptr_nv,_atomic_or_64_nv)
424
425 STRONG_ALIAS(_atomic_swap_uint,_atomic_swap_32)
426 STRONG_ALIAS(_atomic_swap_ulong,_atomic_swap_64)
427 STRONG_ALIAS(_atomic_swap_ptr,_atomic_swap_64)
428
429 STRONG_ALIAS(_atomic_cas_uint,_atomic_cas_32)
430 STRONG_ALIAS(_atomic_cas_ulong,_atomic_cas_64)
431 STRONG_ALIAS(_atomic_cas_ptr,_atomic_cas_64)
432
433 STRONG_ALIAS(_atomic_cas_uint_ni,_atomic_cas_32_ni)
434 STRONG_ALIAS(_atomic_cas_ulong_ni,_atomic_cas_64_ni)
435 STRONG_ALIAS(_atomic_cas_ptr_ni,_atomic_cas_64_ni)
436
437 STRONG_ALIAS(_membar_consumer,_membar_acquire)
438 STRONG_ALIAS(_membar_producer,_membar_release)
439 STRONG_ALIAS(_membar_enter,_membar_sync)
440 STRONG_ALIAS(_membar_exit,_membar_release)
441