1 /* $NetBSD: atomic.S,v 1.32 2025/09/06 02:53:21 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2007 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/param.h> 33 #include <machine/asm.h> 34 35 #ifdef _KERNEL 36 #define ALIAS(f, t) STRONG_ALIAS(f,t) 37 #else 38 #define ALIAS(f, t) WEAK_ALIAS(f,t) 39 #endif 40 41 #ifdef _HARDKERNEL 42 #include <machine/frameasm.h> 43 #define LOCK HOTPATCH(HP_NAME_NOLOCK, 1); lock 44 #else 45 #define LOCK lock 46 #endif 47 48 .text 49 50 /* 32-bit */ 51 52 ENTRY(_atomic_add_32) 53 LOCK 54 addl %esi, (%rdi) 55 ret 56 END(_atomic_add_32) 57 58 ENTRY(_atomic_add_32_nv) 59 movl %esi, %eax 60 LOCK 61 xaddl %eax, (%rdi) 62 addl %esi, %eax 63 ret 64 END(_atomic_add_32_nv) 65 66 ENTRY(_atomic_and_32) 67 LOCK 68 andl %esi, (%rdi) 69 ret 70 END(_atomic_and_32) 71 72 ENTRY(_atomic_and_32_nv) 73 movl (%rdi), %eax 74 1: 75 movl %eax, %ecx 76 andl %esi, %ecx 77 LOCK 78 cmpxchgl %ecx, (%rdi) 79 jnz 1b 80 movl %ecx, %eax 81 ret 82 END(_atomic_and_32_nv) 83 84 ENTRY(_atomic_dec_32) 85 LOCK 86 decl (%rdi) 87 ret 88 END(_atomic_dec_32) 89 90 ENTRY(_atomic_dec_32_nv) 91 movl $-1, %eax 92 LOCK 93 xaddl %eax, (%rdi) 94 decl %eax 95 ret 96 END(_atomic_dec_32_nv) 97 98 ENTRY(_atomic_inc_32) 99 LOCK 100 incl (%rdi) 101 ret 102 END(_atomic_inc_32) 103 104 ENTRY(_atomic_inc_32_nv) 105 movl $1, %eax 106 LOCK 107 xaddl %eax, (%rdi) 108 incl %eax 109 ret 110 END(_atomic_inc_32_nv) 111 112 ENTRY(_atomic_or_32) 113 LOCK 114 orl %esi, (%rdi) 115 ret 116 END(_atomic_or_32) 117 118 ENTRY(_atomic_or_32_nv) 119 movl (%rdi), %eax 120 1: 121 movl %eax, %ecx 122 orl %esi, %ecx 123 LOCK 124 cmpxchgl %ecx, (%rdi) 125 jnz 1b 126 movl %ecx, %eax 127 ret 128 END(_atomic_or_32_nv) 129 130 ENTRY(_atomic_swap_32) 131 movl %esi, %eax 132 xchgl %eax, (%rdi) 133 ret 134 END(_atomic_swap_32) 135 136 ENTRY(_atomic_cas_32) 137 movl %esi, %eax 138 LOCK 139 cmpxchgl %edx, (%rdi) 140 /* %eax now contains the old value */ 141 ret 142 END(_atomic_cas_32) 143 144 ENTRY(_atomic_cas_32_ni) 145 movl %esi, %eax 146 cmpxchgl %edx, (%rdi) 147 /* %eax now contains the old value */ 148 ret 149 END(_atomic_cas_32_ni) 150 151 /* 64-bit */ 152 153 ENTRY(_atomic_add_64) 154 LOCK 155 addq %rsi, (%rdi) 156 ret 157 END(_atomic_add_64) 158 159 ENTRY(_atomic_add_64_nv) 160 movq %rsi, %rax 161 LOCK 162 xaddq %rax, (%rdi) 163 addq %rsi, %rax 164 ret 165 END(_atomic_add_64_nv) 166 167 ENTRY(_atomic_and_64) 168 LOCK 169 andq %rsi, (%rdi) 170 ret 171 END(_atomic_and_64) 172 173 ENTRY(_atomic_and_64_nv) 174 movq (%rdi), %rax 175 1: 176 movq %rax, %rcx 177 andq %rsi, %rcx 178 LOCK 179 cmpxchgq %rcx, (%rdi) 180 jnz 1b 181 movq %rcx, %rax 182 ret 183 END(_atomic_and_64_nv) 184 185 ENTRY(_atomic_dec_64) 186 LOCK 187 decq (%rdi) 188 ret 189 END(_atomic_dec_64) 190 191 ENTRY(_atomic_dec_64_nv) 192 movq $-1, %rax 193 LOCK 194 xaddq %rax, (%rdi) 195 decq %rax 196 ret 197 END(_atomic_dec_64_nv) 198 199 ENTRY(_atomic_inc_64) 200 LOCK 201 incq (%rdi) 202 ret 203 END(_atomic_inc_64) 204 205 ENTRY(_atomic_inc_64_nv) 206 movq $1, %rax 207 LOCK 208 xaddq %rax, (%rdi) 209 incq %rax 210 ret 211 END(_atomic_inc_64_nv) 212 213 ENTRY(_atomic_or_64) 214 LOCK 215 orq %rsi, (%rdi) 216 ret 217 END(_atomic_or_64) 218 219 ENTRY(_atomic_or_64_nv) 220 movq (%rdi), %rax 221 1: 222 movq %rax, %rcx 223 orq %rsi, %rcx 224 LOCK 225 cmpxchgq %rcx, (%rdi) 226 jnz 1b 227 movq %rcx, %rax 228 ret 229 END(_atomic_or_64_nv) 230 231 ENTRY(_atomic_swap_64) 232 movq %rsi, %rax 233 xchgq %rax, (%rdi) 234 ret 235 END(_atomic_swap_64) 236 237 ENTRY(_atomic_cas_64) 238 movq %rsi, %rax 239 LOCK 240 cmpxchgq %rdx, (%rdi) 241 /* %eax now contains the old value */ 242 ret 243 END(_atomic_cas_64) 244 245 ENTRY(_atomic_cas_64_ni) 246 movq %rsi, %rax 247 cmpxchgq %rdx, (%rdi) 248 /* %eax now contains the old value */ 249 ret 250 END(_atomic_cas_64_ni) 251 252 /* memory barriers */ 253 254 ENTRY(_membar_acquire) 255 /* 256 * Every load from normal memory is a load-acquire on x86, so 257 * there is never any need for explicit barriers to order 258 * load-before-anything. 259 */ 260 ret 261 END(_membar_acquire) 262 263 ENTRY(_membar_release) 264 /* 265 * Every store to normal memory is a store-release on x86, so 266 * there is never any need for explicit barriers to order 267 * anything-before-store. 268 */ 269 ret 270 END(_membar_release) 271 272 ENTRY(_membar_sync) 273 /* 274 * MFENCE, or a serializing instruction like a locked ADDQ, 275 * is necessary to order store-before-load. Every other 276 * ordering -- load-before-anything, anything-before-store -- 277 * is already guaranteed without explicit barriers. 278 * 279 * Empirically it turns out locked ADDQ is cheaper than MFENCE, 280 * so we use that, with an offset below the return address on 281 * the stack to avoid a false dependency with RET. (It might 282 * even be better to use a much lower offset, say -128, to 283 * avoid false dependencies for subsequent callees of the 284 * caller.) 285 * 286 * https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/ 287 * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/ 288 * https://www.agner.org/optimize/instruction_tables.pdf 289 * 290 * Sync with paravirt_membar_sync in 291 * sys/arch/amd64/amd64/cpufunc.S. 292 */ 293 LOCK 294 addq $0, -8(%rsp) 295 ret 296 END(_membar_sync) 297 298 ALIAS(atomic_add_32,_atomic_add_32) 299 ALIAS(atomic_add_64,_atomic_add_64) 300 ALIAS(atomic_add_int,_atomic_add_32) 301 ALIAS(atomic_add_long,_atomic_add_64) 302 ALIAS(atomic_add_ptr,_atomic_add_64) 303 304 ALIAS(atomic_add_32_nv,_atomic_add_32_nv) 305 ALIAS(atomic_add_64_nv,_atomic_add_64_nv) 306 ALIAS(atomic_add_int_nv,_atomic_add_32_nv) 307 ALIAS(atomic_add_long_nv,_atomic_add_64_nv) 308 ALIAS(atomic_add_ptr_nv,_atomic_add_64_nv) 309 310 ALIAS(atomic_and_32,_atomic_and_32) 311 ALIAS(atomic_and_64,_atomic_and_64) 312 ALIAS(atomic_and_uint,_atomic_and_32) 313 ALIAS(atomic_and_ulong,_atomic_and_64) 314 ALIAS(atomic_and_ptr,_atomic_and_64) 315 316 ALIAS(atomic_and_32_nv,_atomic_and_32_nv) 317 ALIAS(atomic_and_64_nv,_atomic_and_64_nv) 318 ALIAS(atomic_and_uint_nv,_atomic_and_32_nv) 319 ALIAS(atomic_and_ulong_nv,_atomic_and_64_nv) 320 ALIAS(atomic_and_ptr_nv,_atomic_and_64_nv) 321 322 ALIAS(atomic_dec_32,_atomic_dec_32) 323 ALIAS(atomic_dec_64,_atomic_dec_64) 324 ALIAS(atomic_dec_uint,_atomic_dec_32) 325 ALIAS(atomic_dec_ulong,_atomic_dec_64) 326 ALIAS(atomic_dec_ptr,_atomic_dec_64) 327 328 ALIAS(atomic_dec_32_nv,_atomic_dec_32_nv) 329 ALIAS(atomic_dec_64_nv,_atomic_dec_64_nv) 330 ALIAS(atomic_dec_uint_nv,_atomic_dec_32_nv) 331 ALIAS(atomic_dec_ulong_nv,_atomic_dec_64_nv) 332 ALIAS(atomic_dec_ptr_nv,_atomic_dec_64_nv) 333 334 ALIAS(atomic_inc_32,_atomic_inc_32) 335 ALIAS(atomic_inc_64,_atomic_inc_64) 336 ALIAS(atomic_inc_uint,_atomic_inc_32) 337 ALIAS(atomic_inc_ulong,_atomic_inc_64) 338 ALIAS(atomic_inc_ptr,_atomic_inc_64) 339 340 ALIAS(atomic_inc_32_nv,_atomic_inc_32_nv) 341 ALIAS(atomic_inc_64_nv,_atomic_inc_64_nv) 342 ALIAS(atomic_inc_uint_nv,_atomic_inc_32_nv) 343 ALIAS(atomic_inc_ulong_nv,_atomic_inc_64_nv) 344 ALIAS(atomic_inc_ptr_nv,_atomic_inc_64_nv) 345 346 ALIAS(atomic_or_32,_atomic_or_32) 347 ALIAS(atomic_or_64,_atomic_or_64) 348 ALIAS(atomic_or_uint,_atomic_or_32) 349 ALIAS(atomic_or_ulong,_atomic_or_64) 350 ALIAS(atomic_or_ptr,_atomic_or_64) 351 352 ALIAS(atomic_or_32_nv,_atomic_or_32_nv) 353 ALIAS(atomic_or_64_nv,_atomic_or_64_nv) 354 ALIAS(atomic_or_uint_nv,_atomic_or_32_nv) 355 ALIAS(atomic_or_ulong_nv,_atomic_or_64_nv) 356 ALIAS(atomic_or_ptr_nv,_atomic_or_64_nv) 357 358 ALIAS(atomic_swap_32,_atomic_swap_32) 359 ALIAS(atomic_swap_64,_atomic_swap_64) 360 ALIAS(atomic_swap_uint,_atomic_swap_32) 361 ALIAS(atomic_swap_ulong,_atomic_swap_64) 362 ALIAS(atomic_swap_ptr,_atomic_swap_64) 363 364 ALIAS(atomic_cas_32,_atomic_cas_32) 365 ALIAS(atomic_cas_64,_atomic_cas_64) 366 ALIAS(atomic_cas_uint,_atomic_cas_32) 367 ALIAS(atomic_cas_ulong,_atomic_cas_64) 368 ALIAS(atomic_cas_ptr,_atomic_cas_64) 369 370 ALIAS(atomic_cas_32_ni,_atomic_cas_32_ni) 371 ALIAS(atomic_cas_64_ni,_atomic_cas_64_ni) 372 ALIAS(atomic_cas_uint_ni,_atomic_cas_32_ni) 373 ALIAS(atomic_cas_ulong_ni,_atomic_cas_64_ni) 374 ALIAS(atomic_cas_ptr_ni,_atomic_cas_64_ni) 375 376 ALIAS(membar_acquire,_membar_acquire) 377 ALIAS(membar_release,_membar_release) 378 ALIAS(membar_sync,_membar_sync) 379 380 ALIAS(membar_consumer,_membar_acquire) 381 ALIAS(membar_producer,_membar_release) 382 ALIAS(membar_enter,_membar_sync) 383 ALIAS(membar_exit,_membar_release) 384 ALIAS(membar_sync,_membar_sync) 385 386 STRONG_ALIAS(_atomic_add_int,_atomic_add_32) 387 STRONG_ALIAS(_atomic_add_long,_atomic_add_64) 388 STRONG_ALIAS(_atomic_add_ptr,_atomic_add_64) 389 390 STRONG_ALIAS(_atomic_add_int_nv,_atomic_add_32_nv) 391 STRONG_ALIAS(_atomic_add_long_nv,_atomic_add_64_nv) 392 STRONG_ALIAS(_atomic_add_ptr_nv,_atomic_add_64_nv) 393 394 STRONG_ALIAS(_atomic_and_uint,_atomic_and_32) 395 STRONG_ALIAS(_atomic_and_ulong,_atomic_and_64) 396 STRONG_ALIAS(_atomic_and_ptr,_atomic_and_64) 397 398 STRONG_ALIAS(_atomic_and_uint_nv,_atomic_and_32_nv) 399 STRONG_ALIAS(_atomic_and_ulong_nv,_atomic_and_64_nv) 400 STRONG_ALIAS(_atomic_and_ptr_nv,_atomic_and_64_nv) 401 402 STRONG_ALIAS(_atomic_dec_uint,_atomic_dec_32) 403 STRONG_ALIAS(_atomic_dec_ulong,_atomic_dec_64) 404 STRONG_ALIAS(_atomic_dec_ptr,_atomic_dec_64) 405 406 STRONG_ALIAS(_atomic_dec_uint_nv,_atomic_dec_32_nv) 407 STRONG_ALIAS(_atomic_dec_ulong_nv,_atomic_dec_64_nv) 408 STRONG_ALIAS(_atomic_dec_ptr_nv,_atomic_dec_64_nv) 409 410 STRONG_ALIAS(_atomic_inc_uint,_atomic_inc_32) 411 STRONG_ALIAS(_atomic_inc_ulong,_atomic_inc_64) 412 STRONG_ALIAS(_atomic_inc_ptr,_atomic_inc_64) 413 414 STRONG_ALIAS(_atomic_inc_uint_nv,_atomic_inc_32_nv) 415 STRONG_ALIAS(_atomic_inc_ulong_nv,_atomic_inc_64_nv) 416 STRONG_ALIAS(_atomic_inc_ptr_nv,_atomic_inc_64_nv) 417 418 STRONG_ALIAS(_atomic_or_uint,_atomic_or_32) 419 STRONG_ALIAS(_atomic_or_ulong,_atomic_or_64) 420 STRONG_ALIAS(_atomic_or_ptr,_atomic_or_64) 421 422 STRONG_ALIAS(_atomic_or_uint_nv,_atomic_or_32_nv) 423 STRONG_ALIAS(_atomic_or_ulong_nv,_atomic_or_64_nv) 424 STRONG_ALIAS(_atomic_or_ptr_nv,_atomic_or_64_nv) 425 426 STRONG_ALIAS(_atomic_swap_uint,_atomic_swap_32) 427 STRONG_ALIAS(_atomic_swap_ulong,_atomic_swap_64) 428 STRONG_ALIAS(_atomic_swap_ptr,_atomic_swap_64) 429 430 STRONG_ALIAS(_atomic_cas_uint,_atomic_cas_32) 431 STRONG_ALIAS(_atomic_cas_ulong,_atomic_cas_64) 432 STRONG_ALIAS(_atomic_cas_ptr,_atomic_cas_64) 433 434 STRONG_ALIAS(_atomic_cas_uint_ni,_atomic_cas_32_ni) 435 STRONG_ALIAS(_atomic_cas_ulong_ni,_atomic_cas_64_ni) 436 STRONG_ALIAS(_atomic_cas_ptr_ni,_atomic_cas_64_ni) 437 438 STRONG_ALIAS(_membar_consumer,_membar_acquire) 439 STRONG_ALIAS(_membar_producer,_membar_release) 440 STRONG_ALIAS(_membar_enter,_membar_sync) 441 STRONG_ALIAS(_membar_exit,_membar_release) 442