atomic.S revision 1.38 1 /* $NetBSD: atomic.S,v 1.38 2025/09/06 02:53:21 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe, and by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/param.h>
33 #include <machine/asm.h>
34 /*
35 * __HAVE_ constants should not be in <machine/types.h>
36 * because we can't use them from assembly. OTOH we
37 * only need __HAVE_ATOMIC64_OPS here, and we don't.
38 */
39 #ifdef _KERNEL
40 #define ALIAS(f, t) STRONG_ALIAS(f,t)
41 #else
42 #define ALIAS(f, t) WEAK_ALIAS(f,t)
43 #endif
44
45 #ifdef _HARDKERNEL
46 #include "opt_xen.h"
47 #include <machine/frameasm.h>
48 #define LOCK HOTPATCH(HP_NAME_NOLOCK, 1); lock
49 #define HOTPATCH_CAS_64 HOTPATCH(HP_NAME_CAS_64, 49);
50 #else
51 #define LOCK lock
52 #define HOTPATCH_CAS_64 /* nothing */
53 #endif
54
55 .text
56
57 ENTRY(_atomic_add_32)
58 movl 4(%esp), %edx
59 movl 8(%esp), %eax
60 LOCK
61 addl %eax, (%edx)
62 ret
63 END(_atomic_add_32)
64
65 ENTRY(_atomic_add_32_nv)
66 movl 4(%esp), %edx
67 movl 8(%esp), %eax
68 movl %eax, %ecx
69 LOCK
70 xaddl %eax, (%edx)
71 addl %ecx, %eax
72 ret
73 END(_atomic_add_32_nv)
74
75 ENTRY(_atomic_and_32)
76 movl 4(%esp), %edx
77 movl 8(%esp), %eax
78 LOCK
79 andl %eax, (%edx)
80 ret
81 END(_atomic_and_32)
82
83 ENTRY(_atomic_and_32_nv)
84 movl 4(%esp), %edx
85 movl (%edx), %eax
86 0:
87 movl %eax, %ecx
88 andl 8(%esp), %ecx
89 LOCK
90 cmpxchgl %ecx, (%edx)
91 jnz 1f
92 movl %ecx, %eax
93 ret
94 1:
95 jmp 0b
96 END(_atomic_and_32_nv)
97
98 ENTRY(_atomic_dec_32)
99 movl 4(%esp), %edx
100 LOCK
101 decl (%edx)
102 ret
103 END(_atomic_dec_32)
104
105 ENTRY(_atomic_dec_32_nv)
106 movl 4(%esp), %edx
107 movl $-1, %eax
108 LOCK
109 xaddl %eax, (%edx)
110 decl %eax
111 ret
112 END(_atomic_dec_32_nv)
113
114 ENTRY(_atomic_inc_32)
115 movl 4(%esp), %edx
116 LOCK
117 incl (%edx)
118 ret
119 END(_atomic_inc_32)
120
121 ENTRY(_atomic_inc_32_nv)
122 movl 4(%esp), %edx
123 movl $1, %eax
124 LOCK
125 xaddl %eax, (%edx)
126 incl %eax
127 ret
128 END(_atomic_inc_32_nv)
129
130 ENTRY(_atomic_or_32)
131 movl 4(%esp), %edx
132 movl 8(%esp), %eax
133 LOCK
134 orl %eax, (%edx)
135 ret
136 END(_atomic_or_32)
137
138 ENTRY(_atomic_or_32_nv)
139 movl 4(%esp), %edx
140 movl (%edx), %eax
141 0:
142 movl %eax, %ecx
143 orl 8(%esp), %ecx
144 LOCK
145 cmpxchgl %ecx, (%edx)
146 jnz 1f
147 movl %ecx, %eax
148 ret
149 1:
150 jmp 0b
151 END(_atomic_or_32_nv)
152
153 ENTRY(_atomic_swap_32)
154 movl 4(%esp), %edx
155 movl 8(%esp), %eax
156 xchgl %eax, (%edx)
157 ret
158 END(_atomic_swap_32)
159
160 ENTRY(_atomic_cas_32)
161 movl 4(%esp), %edx
162 movl 8(%esp), %eax
163 movl 12(%esp), %ecx
164 LOCK
165 cmpxchgl %ecx, (%edx)
166 /* %eax now contains the old value */
167 ret
168 END(_atomic_cas_32)
169
170 ENTRY(_atomic_cas_32_ni)
171 movl 4(%esp), %edx
172 movl 8(%esp), %eax
173 movl 12(%esp), %ecx
174 cmpxchgl %ecx, (%edx)
175 /* %eax now contains the old value */
176 ret
177 END(_atomic_cas_32_ni)
178
179 ENTRY(_membar_acquire)
180 /*
181 * Every load from normal memory is a load-acquire on x86, so
182 * there is never any need for explicit barriers to order
183 * load-before-anything.
184 */
185 ret
186 END(_membar_acquire)
187
188 ENTRY(_membar_release)
189 /*
190 * Every store to normal memory is a store-release on x86, so
191 * there is never any need for explicit barriers to order
192 * anything-before-store.
193 */
194 ret
195 END(_membar_release)
196
197 ENTRY(_membar_sync)
198 /*
199 * MFENCE, or a serializing instruction like a locked ADDL,
200 * is necessary to order store-before-load. Every other
201 * ordering -- load-before-anything, anything-before-store --
202 * is already guaranteed without explicit barriers.
203 *
204 * Empirically it turns out locked ADDL is cheaper than MFENCE,
205 * so we use that, with an offset below the return address on
206 * the stack to avoid a false dependency with RET. (It might
207 * even be better to use a much lower offset, say -128, to
208 * avoid false dependencies for subsequent callees of the
209 * caller.)
210 *
211 * https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/
212 * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
213 * https://www.agner.org/optimize/instruction_tables.pdf
214 *
215 * Sync with paravirt_membar_sync in
216 * sys/arch/i386/i386/cpufunc.S.
217 */
218 LOCK
219 addl $0, -4(%esp)
220 ret
221 END(_membar_sync)
222
223 #if defined(__HAVE_ATOMIC64_OPS) || defined(_KERNEL)
224 #ifdef XENPV
225 STRONG_ALIAS(_atomic_cas_64,_atomic_cas_cx8)
226 #else
227 ENTRY(_atomic_cas_64)
228 HOTPATCH_CAS_64
229 /* 49 bytes of instructions */
230 #ifdef _HARDKERNEL
231 pushf
232 cli
233 #endif
234 pushl %edi
235 pushl %ebx
236 movl 12(%esp), %edi
237 movl 16(%esp), %eax
238 movl 20(%esp), %edx
239 movl 24(%esp), %ebx
240 movl 28(%esp), %ecx
241 cmpl 0(%edi), %eax
242 jne 2f
243 cmpl 4(%edi), %edx
244 jne 2f
245 movl %ebx, 0(%edi)
246 movl %ecx, 4(%edi)
247 1:
248 popl %ebx
249 popl %edi
250 #ifdef _HARDKERNEL
251 popf
252 #endif
253 ret
254 2:
255 movl 0(%edi), %eax
256 movl 4(%edi), %edx
257 jmp 1b
258 END(_atomic_cas_64)
259 #endif /* !XENPV */
260
261 ENTRY(_atomic_cas_cx8)
262 /* 49 bytes of instructions */
263 pushl %edi
264 pushl %ebx
265 movl 12(%esp), %edi
266 movl 16(%esp), %eax
267 movl 20(%esp), %edx
268 movl 24(%esp), %ebx
269 movl 28(%esp), %ecx
270 LOCK
271 cmpxchg8b (%edi)
272 popl %ebx
273 popl %edi
274 ret
275 #ifdef _HARDKERNEL
276 .space 20, 0xCC
277 #endif
278 END(_atomic_cas_cx8)
279 LABEL(_atomic_cas_cx8_end)
280 #endif /* __HAVE_ATOMIC64_OPS || _KERNEL */
281
282 ALIAS(atomic_add_32,_atomic_add_32)
283 ALIAS(atomic_add_int,_atomic_add_32)
284 ALIAS(atomic_add_long,_atomic_add_32)
285 ALIAS(atomic_add_ptr,_atomic_add_32)
286
287 ALIAS(atomic_add_32_nv,_atomic_add_32_nv)
288 ALIAS(atomic_add_int_nv,_atomic_add_32_nv)
289 ALIAS(atomic_add_long_nv,_atomic_add_32_nv)
290 ALIAS(atomic_add_ptr_nv,_atomic_add_32_nv)
291
292 ALIAS(atomic_and_32,_atomic_and_32)
293 ALIAS(atomic_and_uint,_atomic_and_32)
294 ALIAS(atomic_and_ulong,_atomic_and_32)
295 ALIAS(atomic_and_ptr,_atomic_and_32)
296
297 ALIAS(atomic_and_32_nv,_atomic_and_32_nv)
298 ALIAS(atomic_and_uint_nv,_atomic_and_32_nv)
299 ALIAS(atomic_and_ulong_nv,_atomic_and_32_nv)
300 ALIAS(atomic_and_ptr_nv,_atomic_and_32_nv)
301
302 ALIAS(atomic_dec_32,_atomic_dec_32)
303 ALIAS(atomic_dec_uint,_atomic_dec_32)
304 ALIAS(atomic_dec_ulong,_atomic_dec_32)
305 ALIAS(atomic_dec_ptr,_atomic_dec_32)
306
307 ALIAS(atomic_dec_32_nv,_atomic_dec_32_nv)
308 ALIAS(atomic_dec_uint_nv,_atomic_dec_32_nv)
309 ALIAS(atomic_dec_ulong_nv,_atomic_dec_32_nv)
310 ALIAS(atomic_dec_ptr_nv,_atomic_dec_32_nv)
311
312 ALIAS(atomic_inc_32,_atomic_inc_32)
313 ALIAS(atomic_inc_uint,_atomic_inc_32)
314 ALIAS(atomic_inc_ulong,_atomic_inc_32)
315 ALIAS(atomic_inc_ptr,_atomic_inc_32)
316
317 ALIAS(atomic_inc_32_nv,_atomic_inc_32_nv)
318 ALIAS(atomic_inc_uint_nv,_atomic_inc_32_nv)
319 ALIAS(atomic_inc_ulong_nv,_atomic_inc_32_nv)
320 ALIAS(atomic_inc_ptr_nv,_atomic_inc_32_nv)
321
322 ALIAS(atomic_or_32,_atomic_or_32)
323 ALIAS(atomic_or_uint,_atomic_or_32)
324 ALIAS(atomic_or_ulong,_atomic_or_32)
325 ALIAS(atomic_or_ptr,_atomic_or_32)
326
327 ALIAS(atomic_or_32_nv,_atomic_or_32_nv)
328 ALIAS(atomic_or_uint_nv,_atomic_or_32_nv)
329 ALIAS(atomic_or_ulong_nv,_atomic_or_32_nv)
330 ALIAS(atomic_or_ptr_nv,_atomic_or_32_nv)
331
332 ALIAS(atomic_swap_32,_atomic_swap_32)
333 ALIAS(atomic_swap_uint,_atomic_swap_32)
334 ALIAS(atomic_swap_ulong,_atomic_swap_32)
335 ALIAS(atomic_swap_ptr,_atomic_swap_32)
336
337 ALIAS(atomic_cas_32,_atomic_cas_32)
338 ALIAS(atomic_cas_uint,_atomic_cas_32)
339 ALIAS(atomic_cas_ulong,_atomic_cas_32)
340 ALIAS(atomic_cas_ptr,_atomic_cas_32)
341
342 ALIAS(atomic_cas_32_ni,_atomic_cas_32_ni)
343 ALIAS(atomic_cas_uint_ni,_atomic_cas_32_ni)
344 ALIAS(atomic_cas_ulong_ni,_atomic_cas_32_ni)
345 ALIAS(atomic_cas_ptr_ni,_atomic_cas_32_ni)
346
347 #if defined(__HAVE_ATOMIC64_OPS) || defined(_KERNEL)
348 ALIAS(atomic_cas_64,_atomic_cas_64)
349 ALIAS(atomic_cas_64_ni,_atomic_cas_64)
350 ALIAS(__sync_val_compare_and_swap_8,_atomic_cas_64)
351 #endif /* __HAVE_ATOMIC64_OPS || _KERNEL */
352
353 ALIAS(membar_acquire,_membar_acquire)
354 ALIAS(membar_release,_membar_release)
355 ALIAS(membar_sync,_membar_sync)
356
357 ALIAS(membar_consumer,_membar_acquire)
358 ALIAS(membar_producer,_membar_release)
359 ALIAS(membar_enter,_membar_sync)
360 ALIAS(membar_exit,_membar_release)
361 ALIAS(membar_sync,_membar_sync)
362
363 STRONG_ALIAS(_atomic_add_int,_atomic_add_32)
364 STRONG_ALIAS(_atomic_add_long,_atomic_add_32)
365 STRONG_ALIAS(_atomic_add_ptr,_atomic_add_32)
366
367 STRONG_ALIAS(_atomic_add_int_nv,_atomic_add_32_nv)
368 STRONG_ALIAS(_atomic_add_long_nv,_atomic_add_32_nv)
369 STRONG_ALIAS(_atomic_add_ptr_nv,_atomic_add_32_nv)
370
371 STRONG_ALIAS(_atomic_and_uint,_atomic_and_32)
372 STRONG_ALIAS(_atomic_and_ulong,_atomic_and_32)
373 STRONG_ALIAS(_atomic_and_ptr,_atomic_and_32)
374
375 STRONG_ALIAS(_atomic_and_uint_nv,_atomic_and_32_nv)
376 STRONG_ALIAS(_atomic_and_ulong_nv,_atomic_and_32_nv)
377 STRONG_ALIAS(_atomic_and_ptr_nv,_atomic_and_32_nv)
378
379 STRONG_ALIAS(_atomic_dec_uint,_atomic_dec_32)
380 STRONG_ALIAS(_atomic_dec_ulong,_atomic_dec_32)
381 STRONG_ALIAS(_atomic_dec_ptr,_atomic_dec_32)
382
383 STRONG_ALIAS(_atomic_dec_uint_nv,_atomic_dec_32_nv)
384 STRONG_ALIAS(_atomic_dec_ulong_nv,_atomic_dec_32_nv)
385 STRONG_ALIAS(_atomic_dec_ptr_nv,_atomic_dec_32_nv)
386
387 STRONG_ALIAS(_atomic_inc_uint,_atomic_inc_32)
388 STRONG_ALIAS(_atomic_inc_ulong,_atomic_inc_32)
389 STRONG_ALIAS(_atomic_inc_ptr,_atomic_inc_32)
390
391 STRONG_ALIAS(_atomic_inc_uint_nv,_atomic_inc_32_nv)
392 STRONG_ALIAS(_atomic_inc_ulong_nv,_atomic_inc_32_nv)
393 STRONG_ALIAS(_atomic_inc_ptr_nv,_atomic_inc_32_nv)
394
395 STRONG_ALIAS(_atomic_or_uint,_atomic_or_32)
396 STRONG_ALIAS(_atomic_or_ulong,_atomic_or_32)
397 STRONG_ALIAS(_atomic_or_ptr,_atomic_or_32)
398
399 STRONG_ALIAS(_atomic_or_uint_nv,_atomic_or_32_nv)
400 STRONG_ALIAS(_atomic_or_ulong_nv,_atomic_or_32_nv)
401 STRONG_ALIAS(_atomic_or_ptr_nv,_atomic_or_32_nv)
402
403 STRONG_ALIAS(_atomic_swap_uint,_atomic_swap_32)
404 STRONG_ALIAS(_atomic_swap_ulong,_atomic_swap_32)
405 STRONG_ALIAS(_atomic_swap_ptr,_atomic_swap_32)
406
407 STRONG_ALIAS(_atomic_cas_uint,_atomic_cas_32)
408 STRONG_ALIAS(_atomic_cas_ulong,_atomic_cas_32)
409 STRONG_ALIAS(_atomic_cas_ptr,_atomic_cas_32)
410
411 STRONG_ALIAS(_atomic_cas_uint_ni,_atomic_cas_32_ni)
412 STRONG_ALIAS(_atomic_cas_ulong_ni,_atomic_cas_32_ni)
413 STRONG_ALIAS(_atomic_cas_ptr_ni,_atomic_cas_32_ni)
414
415 STRONG_ALIAS(_membar_consumer,_membar_acquire)
416 STRONG_ALIAS(_membar_producer,_membar_release)
417 STRONG_ALIAS(_membar_enter,_membar_sync)
418 STRONG_ALIAS(_membar_exit,_membar_release)
419