Home | History | Annotate | Line # | Download | only in atomic
atomic.S revision 1.38
      1 /*	$NetBSD: atomic.S,v 1.38 2025/09/06 02:53:21 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2007 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe, and by Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/param.h>
     33 #include <machine/asm.h>
     34 /*
     35  * __HAVE_ constants should not be in <machine/types.h>
     36  * because we can't use them from assembly. OTOH we
     37  * only need __HAVE_ATOMIC64_OPS here, and we don't.
     38  */
     39 #ifdef _KERNEL
     40 #define	ALIAS(f, t)	STRONG_ALIAS(f,t)
     41 #else
     42 #define	ALIAS(f, t)	WEAK_ALIAS(f,t)
     43 #endif
     44 
     45 #ifdef _HARDKERNEL
     46 #include "opt_xen.h"
     47 #include <machine/frameasm.h>
     48 #define LOCK			HOTPATCH(HP_NAME_NOLOCK, 1); lock
     49 #define HOTPATCH_CAS_64		HOTPATCH(HP_NAME_CAS_64, 49);
     50 #else
     51 #define LOCK			lock
     52 #define HOTPATCH_CAS_64		/* nothing */
     53 #endif
     54 
     55 	.text
     56 
     57 ENTRY(_atomic_add_32)
     58 	movl	4(%esp), %edx
     59 	movl	8(%esp), %eax
     60 	LOCK
     61 	addl	%eax, (%edx)
     62 	ret
     63 END(_atomic_add_32)
     64 
     65 ENTRY(_atomic_add_32_nv)
     66 	movl	4(%esp), %edx
     67 	movl	8(%esp), %eax
     68 	movl	%eax, %ecx
     69 	LOCK
     70 	xaddl	%eax, (%edx)
     71 	addl	%ecx, %eax
     72 	ret
     73 END(_atomic_add_32_nv)
     74 
     75 ENTRY(_atomic_and_32)
     76 	movl	4(%esp), %edx
     77 	movl	8(%esp), %eax
     78 	LOCK
     79 	andl	%eax, (%edx)
     80 	ret
     81 END(_atomic_and_32)
     82 
     83 ENTRY(_atomic_and_32_nv)
     84 	movl	4(%esp), %edx
     85 	movl	(%edx), %eax
     86 0:
     87 	movl	%eax, %ecx
     88 	andl	8(%esp), %ecx
     89 	LOCK
     90 	cmpxchgl %ecx, (%edx)
     91 	jnz	1f
     92 	movl	%ecx, %eax
     93 	ret
     94 1:
     95 	jmp	0b
     96 END(_atomic_and_32_nv)
     97 
     98 ENTRY(_atomic_dec_32)
     99 	movl	4(%esp), %edx
    100 	LOCK
    101 	decl	(%edx)
    102 	ret
    103 END(_atomic_dec_32)
    104 
    105 ENTRY(_atomic_dec_32_nv)
    106 	movl	4(%esp), %edx
    107 	movl	$-1, %eax
    108 	LOCK
    109 	xaddl	%eax, (%edx)
    110 	decl	%eax
    111 	ret
    112 END(_atomic_dec_32_nv)
    113 
    114 ENTRY(_atomic_inc_32)
    115 	movl	4(%esp), %edx
    116 	LOCK
    117 	incl	(%edx)
    118 	ret
    119 END(_atomic_inc_32)
    120 
    121 ENTRY(_atomic_inc_32_nv)
    122 	movl	4(%esp), %edx
    123 	movl	$1, %eax
    124 	LOCK
    125 	xaddl	%eax, (%edx)
    126 	incl	%eax
    127 	ret
    128 END(_atomic_inc_32_nv)
    129 
    130 ENTRY(_atomic_or_32)
    131 	movl	4(%esp), %edx
    132 	movl	8(%esp), %eax
    133 	LOCK
    134 	orl	%eax, (%edx)
    135 	ret
    136 END(_atomic_or_32)
    137 
    138 ENTRY(_atomic_or_32_nv)
    139 	movl	4(%esp), %edx
    140 	movl	(%edx), %eax
    141 0:
    142 	movl	%eax, %ecx
    143 	orl	8(%esp), %ecx
    144 	LOCK
    145 	cmpxchgl %ecx, (%edx)
    146 	jnz	1f
    147 	movl	%ecx, %eax
    148 	ret
    149 1:
    150 	jmp	0b
    151 END(_atomic_or_32_nv)
    152 
    153 ENTRY(_atomic_swap_32)
    154 	movl	4(%esp), %edx
    155 	movl	8(%esp), %eax
    156 	xchgl	%eax, (%edx)
    157 	ret
    158 END(_atomic_swap_32)
    159 
    160 ENTRY(_atomic_cas_32)
    161 	movl	4(%esp), %edx
    162 	movl	8(%esp), %eax
    163 	movl	12(%esp), %ecx
    164 	LOCK
    165 	cmpxchgl %ecx, (%edx)
    166 	/* %eax now contains the old value */
    167 	ret
    168 END(_atomic_cas_32)
    169 
    170 ENTRY(_atomic_cas_32_ni)
    171 	movl	4(%esp), %edx
    172 	movl	8(%esp), %eax
    173 	movl	12(%esp), %ecx
    174 	cmpxchgl %ecx, (%edx)
    175 	/* %eax now contains the old value */
    176 	ret
    177 END(_atomic_cas_32_ni)
    178 
    179 ENTRY(_membar_acquire)
    180 	/*
    181 	 * Every load from normal memory is a load-acquire on x86, so
    182 	 * there is never any need for explicit barriers to order
    183 	 * load-before-anything.
    184 	 */
    185 	ret
    186 END(_membar_acquire)
    187 
    188 ENTRY(_membar_release)
    189 	/*
    190 	 * Every store to normal memory is a store-release on x86, so
    191 	 * there is never any need for explicit barriers to order
    192 	 * anything-before-store.
    193 	 */
    194 	ret
    195 END(_membar_release)
    196 
    197 ENTRY(_membar_sync)
    198 	/*
    199 	 * MFENCE, or a serializing instruction like a locked ADDL,
    200 	 * is necessary to order store-before-load.  Every other
    201 	 * ordering -- load-before-anything, anything-before-store --
    202 	 * is already guaranteed without explicit barriers.
    203 	 *
    204 	 * Empirically it turns out locked ADDL is cheaper than MFENCE,
    205 	 * so we use that, with an offset below the return address on
    206 	 * the stack to avoid a false dependency with RET.  (It might
    207 	 * even be better to use a much lower offset, say -128, to
    208 	 * avoid false dependencies for subsequent callees of the
    209 	 * caller.)
    210 	 *
    211 	 * https://pvk.ca/Blog/2014/10/19/performance-optimisation-~-writing-an-essay/
    212 	 * https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
    213 	 * https://www.agner.org/optimize/instruction_tables.pdf
    214 	 *
    215 	 * Sync with paravirt_membar_sync in
    216 	 * sys/arch/i386/i386/cpufunc.S.
    217 	 */
    218 	LOCK
    219 	addl	$0, -4(%esp)
    220 	ret
    221 END(_membar_sync)
    222 
    223 #if defined(__HAVE_ATOMIC64_OPS) || defined(_KERNEL)
    224 #ifdef XENPV
    225 STRONG_ALIAS(_atomic_cas_64,_atomic_cas_cx8)
    226 #else
    227 ENTRY(_atomic_cas_64)
    228 	HOTPATCH_CAS_64
    229 	/* 49 bytes of instructions */
    230 #ifdef _HARDKERNEL
    231 	pushf
    232 	cli
    233 #endif
    234 	pushl	%edi
    235 	pushl	%ebx
    236 	movl	12(%esp), %edi
    237 	movl	16(%esp), %eax
    238 	movl	20(%esp), %edx
    239 	movl	24(%esp), %ebx
    240 	movl	28(%esp), %ecx
    241 	cmpl	0(%edi), %eax
    242 	jne	2f
    243 	cmpl	4(%edi), %edx
    244 	jne	2f
    245 	movl	%ebx, 0(%edi)
    246 	movl	%ecx, 4(%edi)
    247 1:
    248 	popl	%ebx
    249 	popl	%edi
    250 #ifdef _HARDKERNEL
    251 	popf
    252 #endif
    253 	ret
    254 2:
    255 	movl	0(%edi), %eax
    256 	movl	4(%edi), %edx
    257 	jmp	1b
    258 END(_atomic_cas_64)
    259 #endif /* !XENPV */
    260 
    261 ENTRY(_atomic_cas_cx8)
    262 	/* 49 bytes of instructions */
    263 	pushl	%edi
    264 	pushl	%ebx
    265 	movl	12(%esp), %edi
    266 	movl	16(%esp), %eax
    267 	movl	20(%esp), %edx
    268 	movl	24(%esp), %ebx
    269 	movl	28(%esp), %ecx
    270 	LOCK
    271 	cmpxchg8b (%edi)
    272 	popl	%ebx
    273 	popl	%edi
    274 	ret
    275 #ifdef _HARDKERNEL
    276 	.space	20, 0xCC
    277 #endif
    278 END(_atomic_cas_cx8)
    279 LABEL(_atomic_cas_cx8_end)
    280 #endif /* __HAVE_ATOMIC64_OPS || _KERNEL */
    281 
    282 ALIAS(atomic_add_32,_atomic_add_32)
    283 ALIAS(atomic_add_int,_atomic_add_32)
    284 ALIAS(atomic_add_long,_atomic_add_32)
    285 ALIAS(atomic_add_ptr,_atomic_add_32)
    286 
    287 ALIAS(atomic_add_32_nv,_atomic_add_32_nv)
    288 ALIAS(atomic_add_int_nv,_atomic_add_32_nv)
    289 ALIAS(atomic_add_long_nv,_atomic_add_32_nv)
    290 ALIAS(atomic_add_ptr_nv,_atomic_add_32_nv)
    291 
    292 ALIAS(atomic_and_32,_atomic_and_32)
    293 ALIAS(atomic_and_uint,_atomic_and_32)
    294 ALIAS(atomic_and_ulong,_atomic_and_32)
    295 ALIAS(atomic_and_ptr,_atomic_and_32)
    296 
    297 ALIAS(atomic_and_32_nv,_atomic_and_32_nv)
    298 ALIAS(atomic_and_uint_nv,_atomic_and_32_nv)
    299 ALIAS(atomic_and_ulong_nv,_atomic_and_32_nv)
    300 ALIAS(atomic_and_ptr_nv,_atomic_and_32_nv)
    301 
    302 ALIAS(atomic_dec_32,_atomic_dec_32)
    303 ALIAS(atomic_dec_uint,_atomic_dec_32)
    304 ALIAS(atomic_dec_ulong,_atomic_dec_32)
    305 ALIAS(atomic_dec_ptr,_atomic_dec_32)
    306 
    307 ALIAS(atomic_dec_32_nv,_atomic_dec_32_nv)
    308 ALIAS(atomic_dec_uint_nv,_atomic_dec_32_nv)
    309 ALIAS(atomic_dec_ulong_nv,_atomic_dec_32_nv)
    310 ALIAS(atomic_dec_ptr_nv,_atomic_dec_32_nv)
    311 
    312 ALIAS(atomic_inc_32,_atomic_inc_32)
    313 ALIAS(atomic_inc_uint,_atomic_inc_32)
    314 ALIAS(atomic_inc_ulong,_atomic_inc_32)
    315 ALIAS(atomic_inc_ptr,_atomic_inc_32)
    316 
    317 ALIAS(atomic_inc_32_nv,_atomic_inc_32_nv)
    318 ALIAS(atomic_inc_uint_nv,_atomic_inc_32_nv)
    319 ALIAS(atomic_inc_ulong_nv,_atomic_inc_32_nv)
    320 ALIAS(atomic_inc_ptr_nv,_atomic_inc_32_nv)
    321 
    322 ALIAS(atomic_or_32,_atomic_or_32)
    323 ALIAS(atomic_or_uint,_atomic_or_32)
    324 ALIAS(atomic_or_ulong,_atomic_or_32)
    325 ALIAS(atomic_or_ptr,_atomic_or_32)
    326 
    327 ALIAS(atomic_or_32_nv,_atomic_or_32_nv)
    328 ALIAS(atomic_or_uint_nv,_atomic_or_32_nv)
    329 ALIAS(atomic_or_ulong_nv,_atomic_or_32_nv)
    330 ALIAS(atomic_or_ptr_nv,_atomic_or_32_nv)
    331 
    332 ALIAS(atomic_swap_32,_atomic_swap_32)
    333 ALIAS(atomic_swap_uint,_atomic_swap_32)
    334 ALIAS(atomic_swap_ulong,_atomic_swap_32)
    335 ALIAS(atomic_swap_ptr,_atomic_swap_32)
    336 
    337 ALIAS(atomic_cas_32,_atomic_cas_32)
    338 ALIAS(atomic_cas_uint,_atomic_cas_32)
    339 ALIAS(atomic_cas_ulong,_atomic_cas_32)
    340 ALIAS(atomic_cas_ptr,_atomic_cas_32)
    341 
    342 ALIAS(atomic_cas_32_ni,_atomic_cas_32_ni)
    343 ALIAS(atomic_cas_uint_ni,_atomic_cas_32_ni)
    344 ALIAS(atomic_cas_ulong_ni,_atomic_cas_32_ni)
    345 ALIAS(atomic_cas_ptr_ni,_atomic_cas_32_ni)
    346 
    347 #if defined(__HAVE_ATOMIC64_OPS) || defined(_KERNEL)
    348 ALIAS(atomic_cas_64,_atomic_cas_64)
    349 ALIAS(atomic_cas_64_ni,_atomic_cas_64)
    350 ALIAS(__sync_val_compare_and_swap_8,_atomic_cas_64)
    351 #endif /* __HAVE_ATOMIC64_OPS || _KERNEL */
    352 
    353 ALIAS(membar_acquire,_membar_acquire)
    354 ALIAS(membar_release,_membar_release)
    355 ALIAS(membar_sync,_membar_sync)
    356 
    357 ALIAS(membar_consumer,_membar_acquire)
    358 ALIAS(membar_producer,_membar_release)
    359 ALIAS(membar_enter,_membar_sync)
    360 ALIAS(membar_exit,_membar_release)
    361 ALIAS(membar_sync,_membar_sync)
    362 
    363 STRONG_ALIAS(_atomic_add_int,_atomic_add_32)
    364 STRONG_ALIAS(_atomic_add_long,_atomic_add_32)
    365 STRONG_ALIAS(_atomic_add_ptr,_atomic_add_32)
    366 
    367 STRONG_ALIAS(_atomic_add_int_nv,_atomic_add_32_nv)
    368 STRONG_ALIAS(_atomic_add_long_nv,_atomic_add_32_nv)
    369 STRONG_ALIAS(_atomic_add_ptr_nv,_atomic_add_32_nv)
    370 
    371 STRONG_ALIAS(_atomic_and_uint,_atomic_and_32)
    372 STRONG_ALIAS(_atomic_and_ulong,_atomic_and_32)
    373 STRONG_ALIAS(_atomic_and_ptr,_atomic_and_32)
    374 
    375 STRONG_ALIAS(_atomic_and_uint_nv,_atomic_and_32_nv)
    376 STRONG_ALIAS(_atomic_and_ulong_nv,_atomic_and_32_nv)
    377 STRONG_ALIAS(_atomic_and_ptr_nv,_atomic_and_32_nv)
    378 
    379 STRONG_ALIAS(_atomic_dec_uint,_atomic_dec_32)
    380 STRONG_ALIAS(_atomic_dec_ulong,_atomic_dec_32)
    381 STRONG_ALIAS(_atomic_dec_ptr,_atomic_dec_32)
    382 
    383 STRONG_ALIAS(_atomic_dec_uint_nv,_atomic_dec_32_nv)
    384 STRONG_ALIAS(_atomic_dec_ulong_nv,_atomic_dec_32_nv)
    385 STRONG_ALIAS(_atomic_dec_ptr_nv,_atomic_dec_32_nv)
    386 
    387 STRONG_ALIAS(_atomic_inc_uint,_atomic_inc_32)
    388 STRONG_ALIAS(_atomic_inc_ulong,_atomic_inc_32)
    389 STRONG_ALIAS(_atomic_inc_ptr,_atomic_inc_32)
    390 
    391 STRONG_ALIAS(_atomic_inc_uint_nv,_atomic_inc_32_nv)
    392 STRONG_ALIAS(_atomic_inc_ulong_nv,_atomic_inc_32_nv)
    393 STRONG_ALIAS(_atomic_inc_ptr_nv,_atomic_inc_32_nv)
    394 
    395 STRONG_ALIAS(_atomic_or_uint,_atomic_or_32)
    396 STRONG_ALIAS(_atomic_or_ulong,_atomic_or_32)
    397 STRONG_ALIAS(_atomic_or_ptr,_atomic_or_32)
    398 
    399 STRONG_ALIAS(_atomic_or_uint_nv,_atomic_or_32_nv)
    400 STRONG_ALIAS(_atomic_or_ulong_nv,_atomic_or_32_nv)
    401 STRONG_ALIAS(_atomic_or_ptr_nv,_atomic_or_32_nv)
    402 
    403 STRONG_ALIAS(_atomic_swap_uint,_atomic_swap_32)
    404 STRONG_ALIAS(_atomic_swap_ulong,_atomic_swap_32)
    405 STRONG_ALIAS(_atomic_swap_ptr,_atomic_swap_32)
    406 
    407 STRONG_ALIAS(_atomic_cas_uint,_atomic_cas_32)
    408 STRONG_ALIAS(_atomic_cas_ulong,_atomic_cas_32)
    409 STRONG_ALIAS(_atomic_cas_ptr,_atomic_cas_32)
    410 
    411 STRONG_ALIAS(_atomic_cas_uint_ni,_atomic_cas_32_ni)
    412 STRONG_ALIAS(_atomic_cas_ulong_ni,_atomic_cas_32_ni)
    413 STRONG_ALIAS(_atomic_cas_ptr_ni,_atomic_cas_32_ni)
    414 
    415 STRONG_ALIAS(_membar_consumer,_membar_acquire)
    416 STRONG_ALIAS(_membar_producer,_membar_release)
    417 STRONG_ALIAS(_membar_enter,_membar_sync)
    418 STRONG_ALIAS(_membar_exit,_membar_release)
    419