1 /* $NetBSD: atomic.S,v 1.38 2025/09/06 02:53:21 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/param.h> 33 #include <machine/asm.h> 34 /* 35 * __HAVE_ constants should not be in <machine/types.h> 36 * because we can't use them from assembly. OTOH we 37 * only need __HAVE_ATOMIC64_OPS here, and we don't. 38 */ 39 #ifdef _KERNEL 40 #define ALIAS(f, t) STRONG_ALIAS(f,t) 41 #else 42 #define ALIAS(f, t) WEAK_ALIAS(f,t) 43 #endif 44 45 #ifdef _HARDKERNEL 46 #include "opt_xen.h" 47 #include <machine/frameasm.h> 48 #define LOCK HOTPATCH(HP_NAME_NOLOCK, 1); lock 49 #define HOTPATCH_CAS_64 HOTPATCH(HP_NAME_CAS_64, 49); 50 #else 51 #define LOCK lock 52 #define HOTPATCH_CAS_64 /* nothing */ 53 #endif 54 55 .text 56 57 ENTRY(_atomic_add_32) 58 movl 4(%esp), %edx 59 movl 8(%esp), %eax 60 LOCK 61 addl %eax, (%edx) 62 ret 63 END(_atomic_add_32) 64 65 ENTRY(_atomic_add_32_nv) 66 movl 4(%esp), %edx 67 movl 8(%esp), %eax 68 movl %eax, %ecx 69 LOCK 70 xaddl %eax, (%edx) 71 addl %ecx, %eax 72 ret 73 END(_atomic_add_32_nv) 74 75 ENTRY(_atomic_and_32) 76 movl 4(%esp), %edx 77 movl 8(%esp), %eax 78 LOCK 79 andl %eax, (%edx) 80 ret 81 END(_atomic_and_32) 82 83 ENTRY(_atomic_and_32_nv) 84 movl 4(%esp), %edx 85 movl (%edx), %eax 86 0: 87 movl %eax, %ecx 88 andl 8(%esp), %ecx 89 LOCK 90 cmpxchgl %ecx, (%edx) 91 jnz 1f 92 movl %ecx, %eax 93 ret 94 1: 95 jmp 0b 96 END(_atomic_and_32_nv) 97 98 ENTRY(_atomic_dec_32) 99 movl 4(%esp), %edx 100 LOCK 101 decl (%edx) 102 ret 103 END(_atomic_dec_32) 104 105 ENTRY(_atomic_dec_32_nv) 106 movl 4(%esp), %edx 107 movl $-1, %eax 108 LOCK 109 xaddl %eax, (%edx) 110 decl %eax 111 ret 112 END(_atomic_dec_32_nv) 113 114 ENTRY(_atomic_inc_32) 115 movl 4(%esp), %edx 116 LOCK 117 incl (%edx) 118 ret 119 END(_atomic_inc_32) 120 121 ENTRY(_atomic_inc_32_nv) 122 movl 4(%esp), %edx 123 movl $1, %eax 124 LOCK 125 xaddl %eax, (%edx) 126 incl %eax 127 ret 128 END(_atomic_inc_32_nv) 129 130 ENTRY(_atomic_or_32) 131 movl 4(%esp), %edx 132 movl 8(%esp), %eax 133 LOCK 134 orl %eax, (%edx) 135 ret 136 END(_atomic_or_32) 137 138 ENTRY(_atomic_or_32_nv) 139 movl 4(%esp), %edx 140 movl (%edx), %eax 141 0: 142 movl %eax, %ecx 143 orl 8(%esp), %ecx 144 LOCK 145 cmpxchgl %ecx, (%edx) 146 jnz 1f 147 movl %ecx, %eax 148 ret 149 1: 150 jmp 0b 151 END(_atomic_or_32_nv) 152 153 ENTRY(_atomic_swap_32) 154 movl 4(%esp), %edx 155 movl 8(%esp), %eax 156 xchgl %eax, (%edx) 157 ret 158 END(_atomic_swap_32) 159 160 ENTRY(_atomic_cas_32) 161 movl 4(%esp), %edx 162 movl 8(%esp), %eax 163 movl 12(%esp), %ecx 164 LOCK 165 cmpxchgl %ecx, (%edx) 166 /* %eax now contains the old value */ 167 ret 168 END(_atomic_cas_32) 169 170 ENTRY(_atomic_cas_32_ni) 171 movl 4(%esp), %edx 172 movl 8(%esp), %eax 173 movl 12(%esp), %ecx 174 cmpxchgl %ecx, (%edx) 175 /* %eax now contains the old value */ 176 ret 177 END(_atomic_cas_32_ni) 178 179 ENTRY(_membar_acquire) 180 /* 181 * Every load from normal memory is a load-acquire on x86, so 182 * there is never any need for explicit barriers to order 183 * load-before-anything. 184 */ 185 ret 186 END(_membar_acquire) 187 188 ENTRY(_membar_release) 189 /* 190 * Every store to normal memory is a store-release on x86, so 191 * there is never any need for explicit barriers to order 192 * anything-before-store. 193 */ 194 ret 195 END(_membar_release) 196 197 ENTRY(_membar_sync) 198 /* 199 * MFENCE, or a serializing instruction like a locked ADDL, 200 * is necessary to order store-before-load. Every other 201 * ordering -- load-before-anything, anything-before-store -- 202 * is already guaranteed without explicit barriers. 203 * 204 * Empirically it turns out locked ADDL is cheaper than MFENCE, 205 * so we use that, with an offset below the return address on 206 * the stack to avoid a false dependency with RET. (It might 207 * even be better to use a much lower offset, say -128, to 208 * avoid false dependencies for subsequent callees of the 209 * caller.) 210 * 211 * https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/ 212 * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ 213 * https://www.agner.org/optimize/instruction_tables.pdf 214 * 215 * Sync with paravirt_membar_sync in 216 * sys/arch/i386/i386/cpufunc.S. 217 */ 218 LOCK 219 addl $0, -4(%esp) 220 ret 221 END(_membar_sync) 222 223 #if defined(__HAVE_ATOMIC64_OPS) || defined(_KERNEL) 224 #ifdef XENPV 225 STRONG_ALIAS(_atomic_cas_64,_atomic_cas_cx8) 226 #else 227 ENTRY(_atomic_cas_64) 228 HOTPATCH_CAS_64 229 /* 49 bytes of instructions */ 230 #ifdef _HARDKERNEL 231 pushf 232 cli 233 #endif 234 pushl %edi 235 pushl %ebx 236 movl 12(%esp), %edi 237 movl 16(%esp), %eax 238 movl 20(%esp), %edx 239 movl 24(%esp), %ebx 240 movl 28(%esp), %ecx 241 cmpl 0(%edi), %eax 242 jne 2f 243 cmpl 4(%edi), %edx 244 jne 2f 245 movl %ebx, 0(%edi) 246 movl %ecx, 4(%edi) 247 1: 248 popl %ebx 249 popl %edi 250 #ifdef _HARDKERNEL 251 popf 252 #endif 253 ret 254 2: 255 movl 0(%edi), %eax 256 movl 4(%edi), %edx 257 jmp 1b 258 END(_atomic_cas_64) 259 #endif /* !XENPV */ 260 261 ENTRY(_atomic_cas_cx8) 262 /* 49 bytes of instructions */ 263 pushl %edi 264 pushl %ebx 265 movl 12(%esp), %edi 266 movl 16(%esp), %eax 267 movl 20(%esp), %edx 268 movl 24(%esp), %ebx 269 movl 28(%esp), %ecx 270 LOCK 271 cmpxchg8b (%edi) 272 popl %ebx 273 popl %edi 274 ret 275 #ifdef _HARDKERNEL 276 .space 20, 0xCC 277 #endif 278 END(_atomic_cas_cx8) 279 LABEL(_atomic_cas_cx8_end) 280 #endif /* __HAVE_ATOMIC64_OPS || _KERNEL */ 281 282 ALIAS(atomic_add_32,_atomic_add_32) 283 ALIAS(atomic_add_int,_atomic_add_32) 284 ALIAS(atomic_add_long,_atomic_add_32) 285 ALIAS(atomic_add_ptr,_atomic_add_32) 286 287 ALIAS(atomic_add_32_nv,_atomic_add_32_nv) 288 ALIAS(atomic_add_int_nv,_atomic_add_32_nv) 289 ALIAS(atomic_add_long_nv,_atomic_add_32_nv) 290 ALIAS(atomic_add_ptr_nv,_atomic_add_32_nv) 291 292 ALIAS(atomic_and_32,_atomic_and_32) 293 ALIAS(atomic_and_uint,_atomic_and_32) 294 ALIAS(atomic_and_ulong,_atomic_and_32) 295 ALIAS(atomic_and_ptr,_atomic_and_32) 296 297 ALIAS(atomic_and_32_nv,_atomic_and_32_nv) 298 ALIAS(atomic_and_uint_nv,_atomic_and_32_nv) 299 ALIAS(atomic_and_ulong_nv,_atomic_and_32_nv) 300 ALIAS(atomic_and_ptr_nv,_atomic_and_32_nv) 301 302 ALIAS(atomic_dec_32,_atomic_dec_32) 303 ALIAS(atomic_dec_uint,_atomic_dec_32) 304 ALIAS(atomic_dec_ulong,_atomic_dec_32) 305 ALIAS(atomic_dec_ptr,_atomic_dec_32) 306 307 ALIAS(atomic_dec_32_nv,_atomic_dec_32_nv) 308 ALIAS(atomic_dec_uint_nv,_atomic_dec_32_nv) 309 ALIAS(atomic_dec_ulong_nv,_atomic_dec_32_nv) 310 ALIAS(atomic_dec_ptr_nv,_atomic_dec_32_nv) 311 312 ALIAS(atomic_inc_32,_atomic_inc_32) 313 ALIAS(atomic_inc_uint,_atomic_inc_32) 314 ALIAS(atomic_inc_ulong,_atomic_inc_32) 315 ALIAS(atomic_inc_ptr,_atomic_inc_32) 316 317 ALIAS(atomic_inc_32_nv,_atomic_inc_32_nv) 318 ALIAS(atomic_inc_uint_nv,_atomic_inc_32_nv) 319 ALIAS(atomic_inc_ulong_nv,_atomic_inc_32_nv) 320 ALIAS(atomic_inc_ptr_nv,_atomic_inc_32_nv) 321 322 ALIAS(atomic_or_32,_atomic_or_32) 323 ALIAS(atomic_or_uint,_atomic_or_32) 324 ALIAS(atomic_or_ulong,_atomic_or_32) 325 ALIAS(atomic_or_ptr,_atomic_or_32) 326 327 ALIAS(atomic_or_32_nv,_atomic_or_32_nv) 328 ALIAS(atomic_or_uint_nv,_atomic_or_32_nv) 329 ALIAS(atomic_or_ulong_nv,_atomic_or_32_nv) 330 ALIAS(atomic_or_ptr_nv,_atomic_or_32_nv) 331 332 ALIAS(atomic_swap_32,_atomic_swap_32) 333 ALIAS(atomic_swap_uint,_atomic_swap_32) 334 ALIAS(atomic_swap_ulong,_atomic_swap_32) 335 ALIAS(atomic_swap_ptr,_atomic_swap_32) 336 337 ALIAS(atomic_cas_32,_atomic_cas_32) 338 ALIAS(atomic_cas_uint,_atomic_cas_32) 339 ALIAS(atomic_cas_ulong,_atomic_cas_32) 340 ALIAS(atomic_cas_ptr,_atomic_cas_32) 341 342 ALIAS(atomic_cas_32_ni,_atomic_cas_32_ni) 343 ALIAS(atomic_cas_uint_ni,_atomic_cas_32_ni) 344 ALIAS(atomic_cas_ulong_ni,_atomic_cas_32_ni) 345 ALIAS(atomic_cas_ptr_ni,_atomic_cas_32_ni) 346 347 #if defined(__HAVE_ATOMIC64_OPS) || defined(_KERNEL) 348 ALIAS(atomic_cas_64,_atomic_cas_64) 349 ALIAS(atomic_cas_64_ni,_atomic_cas_64) 350 ALIAS(__sync_val_compare_and_swap_8,_atomic_cas_64) 351 #endif /* __HAVE_ATOMIC64_OPS || _KERNEL */ 352 353 ALIAS(membar_acquire,_membar_acquire) 354 ALIAS(membar_release,_membar_release) 355 ALIAS(membar_sync,_membar_sync) 356 357 ALIAS(membar_consumer,_membar_acquire) 358 ALIAS(membar_producer,_membar_release) 359 ALIAS(membar_enter,_membar_sync) 360 ALIAS(membar_exit,_membar_release) 361 ALIAS(membar_sync,_membar_sync) 362 363 STRONG_ALIAS(_atomic_add_int,_atomic_add_32) 364 STRONG_ALIAS(_atomic_add_long,_atomic_add_32) 365 STRONG_ALIAS(_atomic_add_ptr,_atomic_add_32) 366 367 STRONG_ALIAS(_atomic_add_int_nv,_atomic_add_32_nv) 368 STRONG_ALIAS(_atomic_add_long_nv,_atomic_add_32_nv) 369 STRONG_ALIAS(_atomic_add_ptr_nv,_atomic_add_32_nv) 370 371 STRONG_ALIAS(_atomic_and_uint,_atomic_and_32) 372 STRONG_ALIAS(_atomic_and_ulong,_atomic_and_32) 373 STRONG_ALIAS(_atomic_and_ptr,_atomic_and_32) 374 375 STRONG_ALIAS(_atomic_and_uint_nv,_atomic_and_32_nv) 376 STRONG_ALIAS(_atomic_and_ulong_nv,_atomic_and_32_nv) 377 STRONG_ALIAS(_atomic_and_ptr_nv,_atomic_and_32_nv) 378 379 STRONG_ALIAS(_atomic_dec_uint,_atomic_dec_32) 380 STRONG_ALIAS(_atomic_dec_ulong,_atomic_dec_32) 381 STRONG_ALIAS(_atomic_dec_ptr,_atomic_dec_32) 382 383 STRONG_ALIAS(_atomic_dec_uint_nv,_atomic_dec_32_nv) 384 STRONG_ALIAS(_atomic_dec_ulong_nv,_atomic_dec_32_nv) 385 STRONG_ALIAS(_atomic_dec_ptr_nv,_atomic_dec_32_nv) 386 387 STRONG_ALIAS(_atomic_inc_uint,_atomic_inc_32) 388 STRONG_ALIAS(_atomic_inc_ulong,_atomic_inc_32) 389 STRONG_ALIAS(_atomic_inc_ptr,_atomic_inc_32) 390 391 STRONG_ALIAS(_atomic_inc_uint_nv,_atomic_inc_32_nv) 392 STRONG_ALIAS(_atomic_inc_ulong_nv,_atomic_inc_32_nv) 393 STRONG_ALIAS(_atomic_inc_ptr_nv,_atomic_inc_32_nv) 394 395 STRONG_ALIAS(_atomic_or_uint,_atomic_or_32) 396 STRONG_ALIAS(_atomic_or_ulong,_atomic_or_32) 397 STRONG_ALIAS(_atomic_or_ptr,_atomic_or_32) 398 399 STRONG_ALIAS(_atomic_or_uint_nv,_atomic_or_32_nv) 400 STRONG_ALIAS(_atomic_or_ulong_nv,_atomic_or_32_nv) 401 STRONG_ALIAS(_atomic_or_ptr_nv,_atomic_or_32_nv) 402 403 STRONG_ALIAS(_atomic_swap_uint,_atomic_swap_32) 404 STRONG_ALIAS(_atomic_swap_ulong,_atomic_swap_32) 405 STRONG_ALIAS(_atomic_swap_ptr,_atomic_swap_32) 406 407 STRONG_ALIAS(_atomic_cas_uint,_atomic_cas_32) 408 STRONG_ALIAS(_atomic_cas_ulong,_atomic_cas_32) 409 STRONG_ALIAS(_atomic_cas_ptr,_atomic_cas_32) 410 411 STRONG_ALIAS(_atomic_cas_uint_ni,_atomic_cas_32_ni) 412 STRONG_ALIAS(_atomic_cas_ulong_ni,_atomic_cas_32_ni) 413 STRONG_ALIAS(_atomic_cas_ptr_ni,_atomic_cas_32_ni) 414 415 STRONG_ALIAS(_membar_consumer,_membar_acquire) 416 STRONG_ALIAS(_membar_producer,_membar_release) 417 STRONG_ALIAS(_membar_enter,_membar_sync) 418 STRONG_ALIAS(_membar_exit,_membar_release) 419