Home | History | Annotate | Line # | Download | only in atomic
      1 /*	$NetBSD: atomic.S,v 1.32 2025/09/06 02:53:21 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2007 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe, and by Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/param.h>
     33 #include <machine/asm.h>
     34 
     35 #ifdef _KERNEL
     36 #define	ALIAS(f, t)	STRONG_ALIAS(f,t)
     37 #else
     38 #define	ALIAS(f, t)	WEAK_ALIAS(f,t)
     39 #endif
     40 
     41 #ifdef _HARDKERNEL
     42 #include <machine/frameasm.h>
     43 #define	LOCK		HOTPATCH(HP_NAME_NOLOCK, 1); lock
     44 #else
     45 #define	LOCK		lock
     46 #endif
     47 
     48 	.text
     49 
     50 /* 32-bit */
     51 
     52 ENTRY(_atomic_add_32)
     53 	LOCK
     54 	addl	%esi, (%rdi)
     55 	ret
     56 END(_atomic_add_32)
     57 
     58 ENTRY(_atomic_add_32_nv)
     59 	movl	%esi, %eax
     60 	LOCK
     61 	xaddl	%eax, (%rdi)
     62 	addl	%esi, %eax
     63 	ret
     64 END(_atomic_add_32_nv)
     65 
     66 ENTRY(_atomic_and_32)
     67 	LOCK
     68 	andl	%esi, (%rdi)
     69 	ret
     70 END(_atomic_and_32)
     71 
     72 ENTRY(_atomic_and_32_nv)
     73 	movl	(%rdi), %eax
     74 1:
     75 	movl	%eax, %ecx
     76 	andl	%esi, %ecx
     77 	LOCK
     78 	cmpxchgl %ecx, (%rdi)
     79 	jnz	1b
     80 	movl	%ecx, %eax
     81 	ret
     82 END(_atomic_and_32_nv)
     83 
     84 ENTRY(_atomic_dec_32)
     85 	LOCK
     86 	decl	(%rdi)
     87 	ret
     88 END(_atomic_dec_32)
     89 
     90 ENTRY(_atomic_dec_32_nv)
     91 	movl	$-1, %eax
     92 	LOCK
     93 	xaddl	%eax, (%rdi)
     94 	decl	%eax
     95 	ret
     96 END(_atomic_dec_32_nv)
     97 
     98 ENTRY(_atomic_inc_32)
     99 	LOCK
    100 	incl	(%rdi)
    101 	ret
    102 END(_atomic_inc_32)
    103 
    104 ENTRY(_atomic_inc_32_nv)
    105 	movl	$1, %eax
    106 	LOCK
    107 	xaddl	%eax, (%rdi)
    108 	incl	%eax
    109 	ret
    110 END(_atomic_inc_32_nv)
    111 
    112 ENTRY(_atomic_or_32)
    113 	LOCK
    114 	orl	%esi, (%rdi)
    115 	ret
    116 END(_atomic_or_32)
    117 
    118 ENTRY(_atomic_or_32_nv)
    119 	movl	(%rdi), %eax
    120 1:
    121 	movl	%eax, %ecx
    122 	orl	%esi, %ecx
    123 	LOCK
    124 	cmpxchgl %ecx, (%rdi)
    125 	jnz	1b
    126 	movl	%ecx, %eax
    127 	ret
    128 END(_atomic_or_32_nv)
    129 
    130 ENTRY(_atomic_swap_32)
    131 	movl	%esi, %eax
    132 	xchgl	%eax, (%rdi)
    133 	ret
    134 END(_atomic_swap_32)
    135 
    136 ENTRY(_atomic_cas_32)
    137 	movl	%esi, %eax
    138 	LOCK
    139 	cmpxchgl %edx, (%rdi)
    140 	/* %eax now contains the old value */
    141 	ret
    142 END(_atomic_cas_32)
    143 
    144 ENTRY(_atomic_cas_32_ni)
    145 	movl	%esi, %eax
    146 	cmpxchgl %edx, (%rdi)
    147 	/* %eax now contains the old value */
    148 	ret
    149 END(_atomic_cas_32_ni)
    150 
    151 /* 64-bit */
    152 
    153 ENTRY(_atomic_add_64)
    154 	LOCK
    155 	addq	%rsi, (%rdi)
    156 	ret
    157 END(_atomic_add_64)
    158 
    159 ENTRY(_atomic_add_64_nv)
    160 	movq	%rsi, %rax
    161 	LOCK
    162 	xaddq	%rax, (%rdi)
    163 	addq	%rsi, %rax
    164 	ret
    165 END(_atomic_add_64_nv)
    166 
    167 ENTRY(_atomic_and_64)
    168 	LOCK
    169 	andq	%rsi, (%rdi)
    170 	ret
    171 END(_atomic_and_64)
    172 
    173 ENTRY(_atomic_and_64_nv)
    174 	movq	(%rdi), %rax
    175 1:
    176 	movq	%rax, %rcx
    177 	andq	%rsi, %rcx
    178 	LOCK
    179 	cmpxchgq %rcx, (%rdi)
    180 	jnz	1b
    181 	movq	%rcx, %rax
    182 	ret
    183 END(_atomic_and_64_nv)
    184 
    185 ENTRY(_atomic_dec_64)
    186 	LOCK
    187 	decq	(%rdi)
    188 	ret
    189 END(_atomic_dec_64)
    190 
    191 ENTRY(_atomic_dec_64_nv)
    192 	movq	$-1, %rax
    193 	LOCK
    194 	xaddq	%rax, (%rdi)
    195 	decq	%rax
    196 	ret
    197 END(_atomic_dec_64_nv)
    198 
    199 ENTRY(_atomic_inc_64)
    200 	LOCK
    201 	incq	(%rdi)
    202 	ret
    203 END(_atomic_inc_64)
    204 
    205 ENTRY(_atomic_inc_64_nv)
    206 	movq	$1, %rax
    207 	LOCK
    208 	xaddq	%rax, (%rdi)
    209 	incq	%rax
    210 	ret
    211 END(_atomic_inc_64_nv)
    212 
    213 ENTRY(_atomic_or_64)
    214 	LOCK
    215 	orq	%rsi, (%rdi)
    216 	ret
    217 END(_atomic_or_64)
    218 
    219 ENTRY(_atomic_or_64_nv)
    220 	movq	(%rdi), %rax
    221 1:
    222 	movq	%rax, %rcx
    223 	orq	%rsi, %rcx
    224 	LOCK
    225 	cmpxchgq %rcx, (%rdi)
    226 	jnz	1b
    227 	movq	%rcx, %rax
    228 	ret
    229 END(_atomic_or_64_nv)
    230 
    231 ENTRY(_atomic_swap_64)
    232 	movq	%rsi, %rax
    233 	xchgq	%rax, (%rdi)
    234 	ret
    235 END(_atomic_swap_64)
    236 
    237 ENTRY(_atomic_cas_64)
    238 	movq	%rsi, %rax
    239 	LOCK
    240 	cmpxchgq %rdx, (%rdi)
    241 	/* %eax now contains the old value */
    242 	ret
    243 END(_atomic_cas_64)
    244 
    245 ENTRY(_atomic_cas_64_ni)
    246 	movq	%rsi, %rax
    247 	cmpxchgq %rdx, (%rdi)
    248 	/* %eax now contains the old value */
    249 	ret
    250 END(_atomic_cas_64_ni)
    251 
    252 /* memory barriers */
    253 
    254 ENTRY(_membar_acquire)
    255 	/*
    256 	 * Every load from normal memory is a load-acquire on x86, so
    257 	 * there is never any need for explicit barriers to order
    258 	 * load-before-anything.
    259 	 */
    260 	ret
    261 END(_membar_acquire)
    262 
    263 ENTRY(_membar_release)
    264 	/*
    265 	 * Every store to normal memory is a store-release on x86, so
    266 	 * there is never any need for explicit barriers to order
    267 	 * anything-before-store.
    268 	 */
    269 	ret
    270 END(_membar_release)
    271 
    272 ENTRY(_membar_sync)
    273 	/*
    274 	 * MFENCE, or a serializing instruction like a locked ADDQ,
    275 	 * is necessary to order store-before-load.  Every other
    276 	 * ordering -- load-before-anything, anything-before-store --
    277 	 * is already guaranteed without explicit barriers.
    278 	 *
    279 	 * Empirically it turns out locked ADDQ is cheaper than MFENCE,
    280 	 * so we use that, with an offset below the return address on
    281 	 * the stack to avoid a false dependency with RET.  (It might
    282 	 * even be better to use a much lower offset, say -128, to
    283 	 * avoid false dependencies for subsequent callees of the
    284 	 * caller.)
    285 	 *
    286 	 * https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/
    287 	 * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
    288 	 * https://www.agner.org/optimize/instruction_tables.pdf
    289 	 *
    290 	 * Sync with paravirt_membar_sync in
    291 	 * sys/arch/amd64/amd64/cpufunc.S.
    292 	 */
    293 	LOCK
    294 	addq	$0, -8(%rsp)
    295 	ret
    296 END(_membar_sync)
    297 
    298 ALIAS(atomic_add_32,_atomic_add_32)
    299 ALIAS(atomic_add_64,_atomic_add_64)
    300 ALIAS(atomic_add_int,_atomic_add_32)
    301 ALIAS(atomic_add_long,_atomic_add_64)
    302 ALIAS(atomic_add_ptr,_atomic_add_64)
    303 
    304 ALIAS(atomic_add_32_nv,_atomic_add_32_nv)
    305 ALIAS(atomic_add_64_nv,_atomic_add_64_nv)
    306 ALIAS(atomic_add_int_nv,_atomic_add_32_nv)
    307 ALIAS(atomic_add_long_nv,_atomic_add_64_nv)
    308 ALIAS(atomic_add_ptr_nv,_atomic_add_64_nv)
    309 
    310 ALIAS(atomic_and_32,_atomic_and_32)
    311 ALIAS(atomic_and_64,_atomic_and_64)
    312 ALIAS(atomic_and_uint,_atomic_and_32)
    313 ALIAS(atomic_and_ulong,_atomic_and_64)
    314 ALIAS(atomic_and_ptr,_atomic_and_64)
    315 
    316 ALIAS(atomic_and_32_nv,_atomic_and_32_nv)
    317 ALIAS(atomic_and_64_nv,_atomic_and_64_nv)
    318 ALIAS(atomic_and_uint_nv,_atomic_and_32_nv)
    319 ALIAS(atomic_and_ulong_nv,_atomic_and_64_nv)
    320 ALIAS(atomic_and_ptr_nv,_atomic_and_64_nv)
    321 
    322 ALIAS(atomic_dec_32,_atomic_dec_32)
    323 ALIAS(atomic_dec_64,_atomic_dec_64)
    324 ALIAS(atomic_dec_uint,_atomic_dec_32)
    325 ALIAS(atomic_dec_ulong,_atomic_dec_64)
    326 ALIAS(atomic_dec_ptr,_atomic_dec_64)
    327 
    328 ALIAS(atomic_dec_32_nv,_atomic_dec_32_nv)
    329 ALIAS(atomic_dec_64_nv,_atomic_dec_64_nv)
    330 ALIAS(atomic_dec_uint_nv,_atomic_dec_32_nv)
    331 ALIAS(atomic_dec_ulong_nv,_atomic_dec_64_nv)
    332 ALIAS(atomic_dec_ptr_nv,_atomic_dec_64_nv)
    333 
    334 ALIAS(atomic_inc_32,_atomic_inc_32)
    335 ALIAS(atomic_inc_64,_atomic_inc_64)
    336 ALIAS(atomic_inc_uint,_atomic_inc_32)
    337 ALIAS(atomic_inc_ulong,_atomic_inc_64)
    338 ALIAS(atomic_inc_ptr,_atomic_inc_64)
    339 
    340 ALIAS(atomic_inc_32_nv,_atomic_inc_32_nv)
    341 ALIAS(atomic_inc_64_nv,_atomic_inc_64_nv)
    342 ALIAS(atomic_inc_uint_nv,_atomic_inc_32_nv)
    343 ALIAS(atomic_inc_ulong_nv,_atomic_inc_64_nv)
    344 ALIAS(atomic_inc_ptr_nv,_atomic_inc_64_nv)
    345 
    346 ALIAS(atomic_or_32,_atomic_or_32)
    347 ALIAS(atomic_or_64,_atomic_or_64)
    348 ALIAS(atomic_or_uint,_atomic_or_32)
    349 ALIAS(atomic_or_ulong,_atomic_or_64)
    350 ALIAS(atomic_or_ptr,_atomic_or_64)
    351 
    352 ALIAS(atomic_or_32_nv,_atomic_or_32_nv)
    353 ALIAS(atomic_or_64_nv,_atomic_or_64_nv)
    354 ALIAS(atomic_or_uint_nv,_atomic_or_32_nv)
    355 ALIAS(atomic_or_ulong_nv,_atomic_or_64_nv)
    356 ALIAS(atomic_or_ptr_nv,_atomic_or_64_nv)
    357 
    358 ALIAS(atomic_swap_32,_atomic_swap_32)
    359 ALIAS(atomic_swap_64,_atomic_swap_64)
    360 ALIAS(atomic_swap_uint,_atomic_swap_32)
    361 ALIAS(atomic_swap_ulong,_atomic_swap_64)
    362 ALIAS(atomic_swap_ptr,_atomic_swap_64)
    363 
    364 ALIAS(atomic_cas_32,_atomic_cas_32)
    365 ALIAS(atomic_cas_64,_atomic_cas_64)
    366 ALIAS(atomic_cas_uint,_atomic_cas_32)
    367 ALIAS(atomic_cas_ulong,_atomic_cas_64)
    368 ALIAS(atomic_cas_ptr,_atomic_cas_64)
    369 
    370 ALIAS(atomic_cas_32_ni,_atomic_cas_32_ni)
    371 ALIAS(atomic_cas_64_ni,_atomic_cas_64_ni)
    372 ALIAS(atomic_cas_uint_ni,_atomic_cas_32_ni)
    373 ALIAS(atomic_cas_ulong_ni,_atomic_cas_64_ni)
    374 ALIAS(atomic_cas_ptr_ni,_atomic_cas_64_ni)
    375 
    376 ALIAS(membar_acquire,_membar_acquire)
    377 ALIAS(membar_release,_membar_release)
    378 ALIAS(membar_sync,_membar_sync)
    379 
    380 ALIAS(membar_consumer,_membar_acquire)
    381 ALIAS(membar_producer,_membar_release)
    382 ALIAS(membar_enter,_membar_sync)
    383 ALIAS(membar_exit,_membar_release)
    384 ALIAS(membar_sync,_membar_sync)
    385 
    386 STRONG_ALIAS(_atomic_add_int,_atomic_add_32)
    387 STRONG_ALIAS(_atomic_add_long,_atomic_add_64)
    388 STRONG_ALIAS(_atomic_add_ptr,_atomic_add_64)
    389 
    390 STRONG_ALIAS(_atomic_add_int_nv,_atomic_add_32_nv)
    391 STRONG_ALIAS(_atomic_add_long_nv,_atomic_add_64_nv)
    392 STRONG_ALIAS(_atomic_add_ptr_nv,_atomic_add_64_nv)
    393 
    394 STRONG_ALIAS(_atomic_and_uint,_atomic_and_32)
    395 STRONG_ALIAS(_atomic_and_ulong,_atomic_and_64)
    396 STRONG_ALIAS(_atomic_and_ptr,_atomic_and_64)
    397 
    398 STRONG_ALIAS(_atomic_and_uint_nv,_atomic_and_32_nv)
    399 STRONG_ALIAS(_atomic_and_ulong_nv,_atomic_and_64_nv)
    400 STRONG_ALIAS(_atomic_and_ptr_nv,_atomic_and_64_nv)
    401 
    402 STRONG_ALIAS(_atomic_dec_uint,_atomic_dec_32)
    403 STRONG_ALIAS(_atomic_dec_ulong,_atomic_dec_64)
    404 STRONG_ALIAS(_atomic_dec_ptr,_atomic_dec_64)
    405 
    406 STRONG_ALIAS(_atomic_dec_uint_nv,_atomic_dec_32_nv)
    407 STRONG_ALIAS(_atomic_dec_ulong_nv,_atomic_dec_64_nv)
    408 STRONG_ALIAS(_atomic_dec_ptr_nv,_atomic_dec_64_nv)
    409 
    410 STRONG_ALIAS(_atomic_inc_uint,_atomic_inc_32)
    411 STRONG_ALIAS(_atomic_inc_ulong,_atomic_inc_64)
    412 STRONG_ALIAS(_atomic_inc_ptr,_atomic_inc_64)
    413 
    414 STRONG_ALIAS(_atomic_inc_uint_nv,_atomic_inc_32_nv)
    415 STRONG_ALIAS(_atomic_inc_ulong_nv,_atomic_inc_64_nv)
    416 STRONG_ALIAS(_atomic_inc_ptr_nv,_atomic_inc_64_nv)
    417 
    418 STRONG_ALIAS(_atomic_or_uint,_atomic_or_32)
    419 STRONG_ALIAS(_atomic_or_ulong,_atomic_or_64)
    420 STRONG_ALIAS(_atomic_or_ptr,_atomic_or_64)
    421 
    422 STRONG_ALIAS(_atomic_or_uint_nv,_atomic_or_32_nv)
    423 STRONG_ALIAS(_atomic_or_ulong_nv,_atomic_or_64_nv)
    424 STRONG_ALIAS(_atomic_or_ptr_nv,_atomic_or_64_nv)
    425 
    426 STRONG_ALIAS(_atomic_swap_uint,_atomic_swap_32)
    427 STRONG_ALIAS(_atomic_swap_ulong,_atomic_swap_64)
    428 STRONG_ALIAS(_atomic_swap_ptr,_atomic_swap_64)
    429 
    430 STRONG_ALIAS(_atomic_cas_uint,_atomic_cas_32)
    431 STRONG_ALIAS(_atomic_cas_ulong,_atomic_cas_64)
    432 STRONG_ALIAS(_atomic_cas_ptr,_atomic_cas_64)
    433 
    434 STRONG_ALIAS(_atomic_cas_uint_ni,_atomic_cas_32_ni)
    435 STRONG_ALIAS(_atomic_cas_ulong_ni,_atomic_cas_64_ni)
    436 STRONG_ALIAS(_atomic_cas_ptr_ni,_atomic_cas_64_ni)
    437 
    438 STRONG_ALIAS(_membar_consumer,_membar_acquire)
    439 STRONG_ALIAS(_membar_producer,_membar_release)
    440 STRONG_ALIAS(_membar_enter,_membar_sync)
    441 STRONG_ALIAS(_membar_exit,_membar_release)
    442