Home | History | Annotate | Line # | Download | only in atomic
atomic.S revision 1.30
      1 /*	$NetBSD: atomic.S,v 1.30 2024/07/16 22:44:38 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2007 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe, and by Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/param.h>
     33 #include <machine/asm.h>
     34 
     35 #ifdef _KERNEL
     36 #define	ALIAS(f, t)	STRONG_ALIAS(f,t)
     37 #else
     38 #define	ALIAS(f, t)	WEAK_ALIAS(f,t)
     39 #endif
     40 
     41 	.text
     42 
     43 /* 32-bit */
     44 
     45 ENTRY(_atomic_add_32)
     46 	LOCK
     47 	addl	%esi, (%rdi)
     48 	ret
     49 END(_atomic_add_32)
     50 
     51 ENTRY(_atomic_add_32_nv)
     52 	movl	%esi, %eax
     53 	LOCK
     54 	xaddl	%eax, (%rdi)
     55 	addl	%esi, %eax
     56 	ret
     57 END(_atomic_add_32_nv)
     58 
     59 ENTRY(_atomic_and_32)
     60 	LOCK
     61 	andl	%esi, (%rdi)
     62 	ret
     63 END(_atomic_and_32)
     64 
     65 ENTRY(_atomic_and_32_nv)
     66 	movl	(%rdi), %eax
     67 1:
     68 	movl	%eax, %ecx
     69 	andl	%esi, %ecx
     70 	LOCK
     71 	cmpxchgl %ecx, (%rdi)
     72 	jnz	1b
     73 	movl	%ecx, %eax
     74 	ret
     75 END(_atomic_and_32_nv)
     76 
     77 ENTRY(_atomic_dec_32)
     78 	LOCK
     79 	decl	(%rdi)
     80 	ret
     81 END(_atomic_dec_32)
     82 
     83 ENTRY(_atomic_dec_32_nv)
     84 	movl	$-1, %eax
     85 	LOCK
     86 	xaddl	%eax, (%rdi)
     87 	decl	%eax
     88 	ret
     89 END(_atomic_dec_32_nv)
     90 
     91 ENTRY(_atomic_inc_32)
     92 	LOCK
     93 	incl	(%rdi)
     94 	ret
     95 END(_atomic_inc_32)
     96 
     97 ENTRY(_atomic_inc_32_nv)
     98 	movl	$1, %eax
     99 	LOCK
    100 	xaddl	%eax, (%rdi)
    101 	incl	%eax
    102 	ret
    103 END(_atomic_inc_32_nv)
    104 
    105 ENTRY(_atomic_or_32)
    106 	LOCK
    107 	orl	%esi, (%rdi)
    108 	ret
    109 END(_atomic_or_32)
    110 
    111 ENTRY(_atomic_or_32_nv)
    112 	movl	(%rdi), %eax
    113 1:
    114 	movl	%eax, %ecx
    115 	orl	%esi, %ecx
    116 	LOCK
    117 	cmpxchgl %ecx, (%rdi)
    118 	jnz	1b
    119 	movl	%ecx, %eax
    120 	ret
    121 END(_atomic_or_32_nv)
    122 
    123 ENTRY(_atomic_swap_32)
    124 	movl	%esi, %eax
    125 	xchgl	%eax, (%rdi)
    126 	ret
    127 END(_atomic_swap_32)
    128 
    129 ENTRY(_atomic_cas_32)
    130 	movl	%esi, %eax
    131 	LOCK
    132 	cmpxchgl %edx, (%rdi)
    133 	/* %eax now contains the old value */
    134 	ret
    135 END(_atomic_cas_32)
    136 
    137 ENTRY(_atomic_cas_32_ni)
    138 	movl	%esi, %eax
    139 	cmpxchgl %edx, (%rdi)
    140 	/* %eax now contains the old value */
    141 	ret
    142 END(_atomic_cas_32_ni)
    143 
    144 /* 64-bit */
    145 
    146 ENTRY(_atomic_add_64)
    147 	LOCK
    148 	addq	%rsi, (%rdi)
    149 	ret
    150 END(_atomic_add_64)
    151 
    152 ENTRY(_atomic_add_64_nv)
    153 	movq	%rsi, %rax
    154 	LOCK
    155 	xaddq	%rax, (%rdi)
    156 	addq	%rsi, %rax
    157 	ret
    158 END(_atomic_add_64_nv)
    159 
    160 ENTRY(_atomic_and_64)
    161 	LOCK
    162 	andq	%rsi, (%rdi)
    163 	ret
    164 END(_atomic_and_64)
    165 
    166 ENTRY(_atomic_and_64_nv)
    167 	movq	(%rdi), %rax
    168 1:
    169 	movq	%rax, %rcx
    170 	andq	%rsi, %rcx
    171 	LOCK
    172 	cmpxchgq %rcx, (%rdi)
    173 	jnz	1b
    174 	movq	%rcx, %rax
    175 	ret
    176 END(_atomic_and_64_nv)
    177 
    178 ENTRY(_atomic_dec_64)
    179 	LOCK
    180 	decq	(%rdi)
    181 	ret
    182 END(_atomic_dec_64)
    183 
    184 ENTRY(_atomic_dec_64_nv)
    185 	movq	$-1, %rax
    186 	LOCK
    187 	xaddq	%rax, (%rdi)
    188 	decq	%rax
    189 	ret
    190 END(_atomic_dec_64_nv)
    191 
    192 ENTRY(_atomic_inc_64)
    193 	LOCK
    194 	incq	(%rdi)
    195 	ret
    196 END(_atomic_inc_64)
    197 
    198 ENTRY(_atomic_inc_64_nv)
    199 	movq	$1, %rax
    200 	LOCK
    201 	xaddq	%rax, (%rdi)
    202 	incq	%rax
    203 	ret
    204 END(_atomic_inc_64_nv)
    205 
    206 ENTRY(_atomic_or_64)
    207 	LOCK
    208 	orq	%rsi, (%rdi)
    209 	ret
    210 END(_atomic_or_64)
    211 
    212 ENTRY(_atomic_or_64_nv)
    213 	movq	(%rdi), %rax
    214 1:
    215 	movq	%rax, %rcx
    216 	orq	%rsi, %rcx
    217 	LOCK
    218 	cmpxchgq %rcx, (%rdi)
    219 	jnz	1b
    220 	movq	%rcx, %rax
    221 	ret
    222 END(_atomic_or_64_nv)
    223 
    224 ENTRY(_atomic_swap_64)
    225 	movq	%rsi, %rax
    226 	xchgq	%rax, (%rdi)
    227 	ret
    228 END(_atomic_swap_64)
    229 
    230 ENTRY(_atomic_cas_64)
    231 	movq	%rsi, %rax
    232 	LOCK
    233 	cmpxchgq %rdx, (%rdi)
    234 	/* %eax now contains the old value */
    235 	ret
    236 END(_atomic_cas_64)
    237 
    238 ENTRY(_atomic_cas_64_ni)
    239 	movq	%rsi, %rax
    240 	cmpxchgq %rdx, (%rdi)
    241 	/* %eax now contains the old value */
    242 	ret
    243 END(_atomic_cas_64_ni)
    244 
    245 /* memory barriers */
    246 
    247 ENTRY(_membar_acquire)
    248 	/*
    249 	 * Every load from normal memory is a load-acquire on x86, so
    250 	 * there is never any need for explicit barriers to order
    251 	 * load-before-anything.
    252 	 */
    253 	ret
    254 END(_membar_acquire)
    255 
    256 ENTRY(_membar_release)
    257 	/*
    258 	 * Every store to normal memory is a store-release on x86, so
    259 	 * there is never any need for explicit barriers to order
    260 	 * anything-before-store.
    261 	 */
    262 	ret
    263 END(_membar_release)
    264 
    265 ENTRY(_membar_sync)
    266 	/*
    267 	 * MFENCE, or a serializing instruction like a locked ADDQ,
    268 	 * is necessary to order store-before-load.  Every other
    269 	 * ordering -- load-before-anything, anything-before-store --
    270 	 * is already guaranteed without explicit barriers.
    271 	 *
    272 	 * Empirically it turns out locked ADDQ is cheaper than MFENCE,
    273 	 * so we use that, with an offset below the return address on
    274 	 * the stack to avoid a false dependency with RET.  (It might
    275 	 * even be better to use a much lower offset, say -128, to
    276 	 * avoid false dependencies for subsequent callees of the
    277 	 * caller.)
    278 	 *
    279 	 * https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/
    280 	 * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
    281 	 * https://www.agner.org/optimize/instruction_tables.pdf
    282 	 *
    283 	 * Sync with xen_mb in sys/arch/amd64/amd64/cpufunc.S.
    284 	 */
    285 	LOCK
    286 	addq	$0, -8(%rsp)
    287 	ret
    288 END(_membar_sync)
    289 
    290 ALIAS(atomic_add_32,_atomic_add_32)
    291 ALIAS(atomic_add_64,_atomic_add_64)
    292 ALIAS(atomic_add_int,_atomic_add_32)
    293 ALIAS(atomic_add_long,_atomic_add_64)
    294 ALIAS(atomic_add_ptr,_atomic_add_64)
    295 
    296 ALIAS(atomic_add_32_nv,_atomic_add_32_nv)
    297 ALIAS(atomic_add_64_nv,_atomic_add_64_nv)
    298 ALIAS(atomic_add_int_nv,_atomic_add_32_nv)
    299 ALIAS(atomic_add_long_nv,_atomic_add_64_nv)
    300 ALIAS(atomic_add_ptr_nv,_atomic_add_64_nv)
    301 
    302 ALIAS(atomic_and_32,_atomic_and_32)
    303 ALIAS(atomic_and_64,_atomic_and_64)
    304 ALIAS(atomic_and_uint,_atomic_and_32)
    305 ALIAS(atomic_and_ulong,_atomic_and_64)
    306 ALIAS(atomic_and_ptr,_atomic_and_64)
    307 
    308 ALIAS(atomic_and_32_nv,_atomic_and_32_nv)
    309 ALIAS(atomic_and_64_nv,_atomic_and_64_nv)
    310 ALIAS(atomic_and_uint_nv,_atomic_and_32_nv)
    311 ALIAS(atomic_and_ulong_nv,_atomic_and_64_nv)
    312 ALIAS(atomic_and_ptr_nv,_atomic_and_64_nv)
    313 
    314 ALIAS(atomic_dec_32,_atomic_dec_32)
    315 ALIAS(atomic_dec_64,_atomic_dec_64)
    316 ALIAS(atomic_dec_uint,_atomic_dec_32)
    317 ALIAS(atomic_dec_ulong,_atomic_dec_64)
    318 ALIAS(atomic_dec_ptr,_atomic_dec_64)
    319 
    320 ALIAS(atomic_dec_32_nv,_atomic_dec_32_nv)
    321 ALIAS(atomic_dec_64_nv,_atomic_dec_64_nv)
    322 ALIAS(atomic_dec_uint_nv,_atomic_dec_32_nv)
    323 ALIAS(atomic_dec_ulong_nv,_atomic_dec_64_nv)
    324 ALIAS(atomic_dec_ptr_nv,_atomic_dec_64_nv)
    325 
    326 ALIAS(atomic_inc_32,_atomic_inc_32)
    327 ALIAS(atomic_inc_64,_atomic_inc_64)
    328 ALIAS(atomic_inc_uint,_atomic_inc_32)
    329 ALIAS(atomic_inc_ulong,_atomic_inc_64)
    330 ALIAS(atomic_inc_ptr,_atomic_inc_64)
    331 
    332 ALIAS(atomic_inc_32_nv,_atomic_inc_32_nv)
    333 ALIAS(atomic_inc_64_nv,_atomic_inc_64_nv)
    334 ALIAS(atomic_inc_uint_nv,_atomic_inc_32_nv)
    335 ALIAS(atomic_inc_ulong_nv,_atomic_inc_64_nv)
    336 ALIAS(atomic_inc_ptr_nv,_atomic_inc_64_nv)
    337 
    338 ALIAS(atomic_or_32,_atomic_or_32)
    339 ALIAS(atomic_or_64,_atomic_or_64)
    340 ALIAS(atomic_or_uint,_atomic_or_32)
    341 ALIAS(atomic_or_ulong,_atomic_or_64)
    342 ALIAS(atomic_or_ptr,_atomic_or_64)
    343 
    344 ALIAS(atomic_or_32_nv,_atomic_or_32_nv)
    345 ALIAS(atomic_or_64_nv,_atomic_or_64_nv)
    346 ALIAS(atomic_or_uint_nv,_atomic_or_32_nv)
    347 ALIAS(atomic_or_ulong_nv,_atomic_or_64_nv)
    348 ALIAS(atomic_or_ptr_nv,_atomic_or_64_nv)
    349 
    350 ALIAS(atomic_swap_32,_atomic_swap_32)
    351 ALIAS(atomic_swap_64,_atomic_swap_64)
    352 ALIAS(atomic_swap_uint,_atomic_swap_32)
    353 ALIAS(atomic_swap_ulong,_atomic_swap_64)
    354 ALIAS(atomic_swap_ptr,_atomic_swap_64)
    355 
    356 ALIAS(atomic_cas_32,_atomic_cas_32)
    357 ALIAS(atomic_cas_64,_atomic_cas_64)
    358 ALIAS(atomic_cas_uint,_atomic_cas_32)
    359 ALIAS(atomic_cas_ulong,_atomic_cas_64)
    360 ALIAS(atomic_cas_ptr,_atomic_cas_64)
    361 
    362 ALIAS(atomic_cas_32_ni,_atomic_cas_32_ni)
    363 ALIAS(atomic_cas_64_ni,_atomic_cas_64_ni)
    364 ALIAS(atomic_cas_uint_ni,_atomic_cas_32_ni)
    365 ALIAS(atomic_cas_ulong_ni,_atomic_cas_64_ni)
    366 ALIAS(atomic_cas_ptr_ni,_atomic_cas_64_ni)
    367 
    368 ALIAS(membar_acquire,_membar_acquire)
    369 ALIAS(membar_release,_membar_release)
    370 ALIAS(membar_sync,_membar_sync)
    371 
    372 ALIAS(membar_consumer,_membar_acquire)
    373 ALIAS(membar_producer,_membar_release)
    374 ALIAS(membar_enter,_membar_sync)
    375 ALIAS(membar_exit,_membar_release)
    376 ALIAS(membar_sync,_membar_sync)
    377 
    378 STRONG_ALIAS(_atomic_add_int,_atomic_add_32)
    379 STRONG_ALIAS(_atomic_add_long,_atomic_add_64)
    380 STRONG_ALIAS(_atomic_add_ptr,_atomic_add_64)
    381 
    382 STRONG_ALIAS(_atomic_add_int_nv,_atomic_add_32_nv)
    383 STRONG_ALIAS(_atomic_add_long_nv,_atomic_add_64_nv)
    384 STRONG_ALIAS(_atomic_add_ptr_nv,_atomic_add_64_nv)
    385 
    386 STRONG_ALIAS(_atomic_and_uint,_atomic_and_32)
    387 STRONG_ALIAS(_atomic_and_ulong,_atomic_and_64)
    388 STRONG_ALIAS(_atomic_and_ptr,_atomic_and_64)
    389 
    390 STRONG_ALIAS(_atomic_and_uint_nv,_atomic_and_32_nv)
    391 STRONG_ALIAS(_atomic_and_ulong_nv,_atomic_and_64_nv)
    392 STRONG_ALIAS(_atomic_and_ptr_nv,_atomic_and_64_nv)
    393 
    394 STRONG_ALIAS(_atomic_dec_uint,_atomic_dec_32)
    395 STRONG_ALIAS(_atomic_dec_ulong,_atomic_dec_64)
    396 STRONG_ALIAS(_atomic_dec_ptr,_atomic_dec_64)
    397 
    398 STRONG_ALIAS(_atomic_dec_uint_nv,_atomic_dec_32_nv)
    399 STRONG_ALIAS(_atomic_dec_ulong_nv,_atomic_dec_64_nv)
    400 STRONG_ALIAS(_atomic_dec_ptr_nv,_atomic_dec_64_nv)
    401 
    402 STRONG_ALIAS(_atomic_inc_uint,_atomic_inc_32)
    403 STRONG_ALIAS(_atomic_inc_ulong,_atomic_inc_64)
    404 STRONG_ALIAS(_atomic_inc_ptr,_atomic_inc_64)
    405 
    406 STRONG_ALIAS(_atomic_inc_uint_nv,_atomic_inc_32_nv)
    407 STRONG_ALIAS(_atomic_inc_ulong_nv,_atomic_inc_64_nv)
    408 STRONG_ALIAS(_atomic_inc_ptr_nv,_atomic_inc_64_nv)
    409 
    410 STRONG_ALIAS(_atomic_or_uint,_atomic_or_32)
    411 STRONG_ALIAS(_atomic_or_ulong,_atomic_or_64)
    412 STRONG_ALIAS(_atomic_or_ptr,_atomic_or_64)
    413 
    414 STRONG_ALIAS(_atomic_or_uint_nv,_atomic_or_32_nv)
    415 STRONG_ALIAS(_atomic_or_ulong_nv,_atomic_or_64_nv)
    416 STRONG_ALIAS(_atomic_or_ptr_nv,_atomic_or_64_nv)
    417 
    418 STRONG_ALIAS(_atomic_swap_uint,_atomic_swap_32)
    419 STRONG_ALIAS(_atomic_swap_ulong,_atomic_swap_64)
    420 STRONG_ALIAS(_atomic_swap_ptr,_atomic_swap_64)
    421 
    422 STRONG_ALIAS(_atomic_cas_uint,_atomic_cas_32)
    423 STRONG_ALIAS(_atomic_cas_ulong,_atomic_cas_64)
    424 STRONG_ALIAS(_atomic_cas_ptr,_atomic_cas_64)
    425 
    426 STRONG_ALIAS(_atomic_cas_uint_ni,_atomic_cas_32_ni)
    427 STRONG_ALIAS(_atomic_cas_ulong_ni,_atomic_cas_64_ni)
    428 STRONG_ALIAS(_atomic_cas_ptr_ni,_atomic_cas_64_ni)
    429 
    430 STRONG_ALIAS(_membar_consumer,_membar_acquire)
    431 STRONG_ALIAS(_membar_producer,_membar_release)
    432 STRONG_ALIAS(_membar_enter,_membar_sync)
    433 STRONG_ALIAS(_membar_exit,_membar_release)
    434