lock_stubs.S revision 1.31
11.31Scherry/*	$NetBSD: lock_stubs.S,v 1.31 2019/02/11 14:59:32 cherry Exp $	*/
21.2Sad
31.2Sad/*-
41.22Sad * Copyright (c) 2006, 2007, 2008, 2009 The NetBSD Foundation, Inc.
51.2Sad * All rights reserved.
61.2Sad *
71.2Sad * This code is derived from software contributed to The NetBSD Foundation
81.2Sad * by Andrew Doran.
91.2Sad *
101.2Sad * Redistribution and use in source and binary forms, with or without
111.2Sad * modification, are permitted provided that the following conditions
121.2Sad * are met:
131.2Sad * 1. Redistributions of source code must retain the above copyright
141.2Sad *    notice, this list of conditions and the following disclaimer.
151.2Sad * 2. Redistributions in binary form must reproduce the above copyright
161.2Sad *    notice, this list of conditions and the following disclaimer in the
171.2Sad *    documentation and/or other materials provided with the distribution.
181.2Sad *
191.2Sad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
201.2Sad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
211.2Sad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
221.2Sad * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
231.2Sad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
241.2Sad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
251.2Sad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
261.2Sad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
271.2Sad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
281.2Sad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
291.2Sad * POSSIBILITY OF SUCH DAMAGE.
301.2Sad */
311.2Sad
321.2Sad/*
331.2Sad * AMD64 lock stubs.  Calling convention:
341.2Sad *
351.2Sad * %rdi		arg 1
361.2Sad * %rsi		arg 2
371.2Sad * %rdx		arg 3
381.2Sad * %rax		return value
391.2Sad */
401.2Sad
411.2Sad#include "opt_multiprocessor.h"
421.2Sad#include "opt_lockdebug.h"
431.2Sad
441.2Sad#include <machine/asm.h>
451.8Sbouyer#include <machine/frameasm.h>
461.2Sad
471.2Sad#include "assym.h"
481.2Sad
491.16Syamt#define	ENDLABEL(name,a) .align	a; LABEL(name)
501.27Smaxv#define	LOCK(num)	\
511.27Smaxv	HOTPATCH(HP_NAME_NOLOCK, 1)	; \
521.27Smaxv	lock
531.28Smaxv#define	RET(num)	\
541.28Smaxv	HOTPATCH(HP_NAME_RETFENCE, 3)	; \
551.28Smaxv	ret; nop; nop			; \
561.28Smaxv	ret
571.2Sad
581.2Sad#ifndef LOCKDEBUG
591.2Sad
601.2Sad/*
611.2Sad * void mutex_enter(kmutex_t *mtx);
621.2Sad *
631.2Sad * Acquire a mutex and post a load fence.
641.2Sad */
651.2Sad	.align	64
661.2Sad
671.19SchsENTRY(mutex_enter)
681.2Sad	movq	CPUVAR(CURLWP), %rcx
691.2Sad	xorq	%rax, %rax
701.11Sad	LOCK(1)
711.21Sad	cmpxchgq %rcx, (%rdi)
721.14Sad	jnz	1f
731.22Sad	RET(1)
741.14Sad1:
751.14Sad	jmp	_C_LABEL(mutex_vector_enter)
761.25SuebayasiEND(mutex_enter)
771.2Sad
781.2Sad/*
791.2Sad * void mutex_exit(kmutex_t *mtx);
801.2Sad *
811.2Sad * Release a mutex and post a load fence.
821.2Sad *
831.2Sad * See comments in mutex_vector_enter() about doing this operation unlocked
841.2Sad * on multiprocessor systems, and comments in arch/x86/include/lock.h about
851.2Sad * memory ordering on Intel x86 systems.
861.2Sad */
871.19SchsENTRY(mutex_exit)
881.2Sad	movq	CPUVAR(CURLWP), %rax
891.2Sad	xorq	%rdx, %rdx
901.21Sad	cmpxchgq %rdx, (%rdi)
911.14Sad	jnz	1f
921.2Sad	ret
931.14Sad1:
941.14Sad	jmp	_C_LABEL(mutex_vector_exit)
951.25SuebayasiEND(mutex_exit)
961.2Sad
971.2Sad/*
981.2Sad * void mutex_spin_enter(kmutex_t *mtx);
991.2Sad *
1001.2Sad * Acquire a spin mutex and post a load fence.
1011.2Sad */
1021.19SchsENTRY(mutex_spin_enter)
1031.20Sad	movl	$1, %eax
1041.14Sad	movl	CPUVAR(ILEVEL), %esi
1051.2Sad	movzbl	MTX_IPL(%rdi), %ecx		/* new SPL */
1061.2Sad	cmpl	%ecx, %esi			/* higher? */
1071.2Sad	cmovgl	%esi, %ecx
1081.14Sad	movl	%ecx, CPUVAR(ILEVEL)		/* splraiseipl() */
1091.20Sad	subl	%eax, CPUVAR(MTX_COUNT)		/* decl doesnt set CF */
1101.14Sad	cmovncl	CPUVAR(MTX_OLDSPL), %esi
1111.14Sad	movl	%esi, CPUVAR(MTX_OLDSPL)
1121.20Sad	xchgb	%al, MTX_LOCK(%rdi)		/* lock */
1131.21Sad#ifdef MULTIPROCESSOR	/* XXX for xen */
1141.20Sad	testb	%al, %al
1151.14Sad	jnz	1f
1161.2Sad#endif
1171.22Sad	RET(2)
1181.14Sad1:
1191.14Sad	jmp	_C_LABEL(mutex_spin_retry)	/* failed; hard case */
1201.25SuebayasiEND(mutex_spin_enter)
1211.2Sad
1221.2Sad/*
1231.2Sad * void mutex_spin_exit(kmutex_t *mtx);
1241.2Sad *
1251.2Sad * Release a spin mutex and post a load fence.
1261.2Sad */
1271.19SchsENTRY(mutex_spin_exit)
1281.2Sad#ifdef DIAGNOSTIC
1291.2Sad
1301.2Sad	movl	$0x0001, %eax			/* new + expected value */
1311.4Sad	movq	CPUVAR(SELF), %r8
1321.2Sad	cmpxchgb %ah, MTX_LOCK(%rdi)		/* unlock */
1331.21Sad	jnz	_C_LABEL(mutex_vector_exit)	/* hard case if problems */
1341.4Sad	movl	CPU_INFO_MTX_OLDSPL(%r8), %edi
1351.4Sad	incl	CPU_INFO_MTX_COUNT(%r8)
1361.2Sad	jnz	1f
1371.4Sad	cmpl	CPU_INFO_ILEVEL(%r8), %edi
1381.2Sad	jae	1f
1391.31Scherry#if !defined(XENPV)
1401.4Sad	movl	CPU_INFO_IUNMASK(%r8,%rdi,4), %esi
1411.12Sdsl	CLI(ax)
1421.4Sad	testl	CPU_INFO_IPENDING(%r8), %esi
1431.2Sad	jnz	_C_LABEL(Xspllower)
1441.30Scherry#endif
1451.30Scherry#if defined(XEN)
1461.30Scherry	movl	CPU_INFO_XUNMASK(%r8,%rdi,4), %esi
1471.30Scherry	CLI(ax)
1481.30Scherry	testl	CPU_INFO_XPENDING(%r8), %esi
1491.30Scherry	jnz	_C_LABEL(Xspllower)
1501.30Scherry#endif
1511.4Sad	movl	%edi, CPU_INFO_ILEVEL(%r8)
1521.12Sdsl	STI(ax)
1531.2Sad1:	rep					/* double byte ret as branch */
1541.2Sad	ret					/* target: see AMD docs */
1551.2Sad
1561.2Sad#else	/* DIAGNOSTIC */
1571.2Sad
1581.4Sad	movq	CPUVAR(SELF), %rsi
1591.2Sad	movb	$0x00, MTX_LOCK(%rdi)
1601.4Sad	movl	CPU_INFO_MTX_OLDSPL(%rsi), %ecx
1611.4Sad	incl	CPU_INFO_MTX_COUNT(%rsi)
1621.4Sad	movl	CPU_INFO_ILEVEL(%rsi),%edx
1631.2Sad	cmovnzl	%edx,%ecx
1641.21Sad	pushq	%rbx
1651.2Sad	cmpl	%edx,%ecx			/* new level is lower? */
1661.21Sad	jae	2f
1671.2Sad1:
1681.31Scherry#if !defined(XENPV)
1691.4Sad	movl	CPU_INFO_IPENDING(%rsi),%eax
1701.4Sad	testl	%eax,CPU_INFO_IUNMASK(%rsi,%rcx,4)/* deferred interrupts? */
1711.21Sad	jnz	3f
1721.2Sad	movl	%eax,%ebx
1731.4Sad	cmpxchg8b CPU_INFO_ISTATE(%rsi)		/* swap in new ilevel */
1741.21Sad	jnz	4f
1751.30Scherry#endif
1761.30Scherry#if defined(XEN)
1771.30Scherry	movl	CPU_INFO_XPENDING(%rsi),%eax
1781.30Scherry	testl	%eax,CPU_INFO_XUNMASK(%rsi,%rcx,4)/* deferred interrupts? */
1791.30Scherry	jnz	3f
1801.30Scherry	movl	%edx, %eax
1811.30Scherry	cmpxchgl %ecx, CPU_INFO_ILEVEL(%rsi)
1821.30Scherry	jnz	4f
1831.30Scherry#endif
1841.2Sad2:
1851.2Sad	popq	%rbx
1861.2Sad	ret
1871.2Sad3:
1881.2Sad	popq	%rbx
1891.2Sad	movl	%ecx, %edi
1901.2Sad	jmp	_C_LABEL(Xspllower)
1911.21Sad4:
1921.21Sad	jmp	1b
1931.2Sad
1941.2Sad#endif	/* DIAGNOSTIC */
1951.2Sad
1961.25SuebayasiEND(mutex_spin_exit)
1971.25Suebayasi
1981.2Sad/*
1991.2Sad * void	rw_enter(krwlock_t *rwl, krw_t op);
2001.2Sad *
2011.2Sad * Acquire one hold on a RW lock.
2021.2Sad */
2031.19SchsENTRY(rw_enter)
2041.2Sad	cmpl	$RW_READER, %esi
2051.2Sad	jne	2f
2061.2Sad
2071.2Sad	/*
2081.2Sad	 * Reader: this is the most common case.
2091.2Sad	 */
2101.21Sad	movq	(%rdi), %rax
2111.21Sad0:
2121.2Sad	testb	$(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al
2131.21Sad	jnz	3f
2141.2Sad	leaq	RW_READ_INCR(%rax), %rdx
2151.11Sad	LOCK(2)
2161.21Sad	cmpxchgq %rdx, (%rdi)
2171.21Sad	jnz	1f
2181.22Sad	RET(3)
2191.21Sad1:
2201.21Sad	jmp	0b
2211.2Sad
2221.2Sad	/*
2231.2Sad	 * Writer: if the compare-and-set fails, don't bother retrying.
2241.2Sad	 */
2251.2Sad2:	movq	CPUVAR(CURLWP), %rcx
2261.2Sad	xorq	%rax, %rax
2271.2Sad	orq	$RW_WRITE_LOCKED, %rcx
2281.11Sad	LOCK(3)
2291.21Sad	cmpxchgq %rcx, (%rdi)
2301.14Sad	jnz	3f
2311.22Sad	RET(4)
2321.14Sad3:
2331.14Sad	jmp	_C_LABEL(rw_vector_enter)
2341.25SuebayasiEND(rw_enter)
2351.2Sad
2361.2Sad/*
2371.2Sad * void	rw_exit(krwlock_t *rwl);
2381.2Sad *
2391.2Sad * Release one hold on a RW lock.
2401.2Sad */
2411.19SchsENTRY(rw_exit)
2421.21Sad	movq	(%rdi), %rax
2431.2Sad	testb	$RW_WRITE_LOCKED, %al
2441.2Sad	jnz	2f
2451.2Sad
2461.2Sad	/*
2471.2Sad	 * Reader
2481.2Sad	 */
2491.21Sad0:	testb	$RW_HAS_WAITERS, %al
2501.14Sad	jnz	3f
2511.2Sad	cmpq	$RW_READ_INCR, %rax
2521.21Sad	jb	3f
2531.2Sad	leaq	-RW_READ_INCR(%rax), %rdx
2541.11Sad	LOCK(4)
2551.21Sad	cmpxchgq %rdx, (%rdi)
2561.21Sad	jnz	1f
2571.2Sad	ret
2581.21Sad1:
2591.21Sad	jmp	0b
2601.2Sad
2611.2Sad	/*
2621.2Sad	 * Writer
2631.2Sad	 */
2641.2Sad2:	leaq	-RW_WRITE_LOCKED(%rax), %rdx
2651.2Sad	subq	CPUVAR(CURLWP), %rdx
2661.14Sad	jnz	3f
2671.11Sad	LOCK(5)
2681.21Sad	cmpxchgq %rdx, (%rdi)
2691.2Sad	jnz	3f
2701.2Sad	ret
2711.2Sad
2721.2Sad3:	jmp	_C_LABEL(rw_vector_exit)
2731.25SuebayasiEND(rw_exit)
2741.2Sad
2751.13Sad/*
2761.13Sad * int	rw_tryenter(krwlock_t *rwl, krw_t op);
2771.13Sad *
2781.13Sad * Try to acquire one hold on a RW lock.
2791.13Sad */
2801.19SchsENTRY(rw_tryenter)
2811.13Sad	cmpl	$RW_READER, %esi
2821.13Sad	jne	2f
2831.13Sad
2841.13Sad	/*
2851.13Sad	 * Reader: this is the most common case.
2861.13Sad	 */
2871.21Sad	movq	(%rdi), %rax
2881.21Sad0:
2891.13Sad	testb	$(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al
2901.22Sad	jnz	4f
2911.13Sad	leaq	RW_READ_INCR(%rax), %rdx
2921.13Sad	LOCK(8)
2931.21Sad	cmpxchgq %rdx, (%rdi)
2941.21Sad	jnz	1f
2951.21Sad	movl	%edx, %eax			/* nonzero */
2961.22Sad	RET(5)
2971.21Sad1:
2981.21Sad	jmp	0b
2991.13Sad
3001.13Sad	/*
3011.13Sad	 * Writer: if the compare-and-set fails, don't bother retrying.
3021.13Sad	 */
3031.13Sad2:	movq	CPUVAR(CURLWP), %rcx
3041.13Sad	xorq	%rax, %rax
3051.13Sad	orq	$RW_WRITE_LOCKED, %rcx
3061.13Sad	LOCK(9)
3071.21Sad	cmpxchgq %rcx, (%rdi)
3081.18Sad	movl	$0, %eax
3091.13Sad	setz	%al
3101.22Sad3:
3111.22Sad	RET(6)
3121.13Sad	ret
3131.22Sad4:
3141.22Sad	xorl	%eax, %eax
3151.22Sad	jmp	3b
3161.25SuebayasiEND(rw_tryenter)
3171.13Sad
3181.2Sad#endif	/* LOCKDEBUG */
3191.2Sad
3201.2Sad/*
3211.11Sad * Spinlocks.
3221.2Sad */
3231.19SchsENTRY(__cpu_simple_lock_init)
3241.11Sad	movb	$0, (%rdi)
3251.2Sad	ret
3261.25SuebayasiEND(__cpu_simple_lock_init)
3271.2Sad
3281.29SmaxvENTRY(__cpu_simple_lock)
3291.11Sad	movl	$0x0100, %eax
3301.11Sad1:
3311.11Sad	LOCK(6)
3321.11Sad	cmpxchgb %ah, (%rdi)
3331.11Sad	jnz	2f
3341.22Sad	RET(7)
3351.11Sad2:
3361.11Sad	movl	$0x0100, %eax
3371.11Sad	pause
3381.11Sad	nop
3391.7Sad	nop
3401.11Sad	cmpb	$0, (%rdi)
3411.11Sad	je	1b
3421.11Sad	jmp	2b
3431.25SuebayasiEND(__cpu_simple_lock)
3441.11Sad
3451.29SmaxvENTRY(__cpu_simple_unlock)
3461.11Sad	movb	$0, (%rdi)
3471.2Sad	ret
3481.25SuebayasiEND(__cpu_simple_unlock)
3491.7Sad
3501.19SchsENTRY(__cpu_simple_lock_try)
3511.11Sad	movl	$0x0100, %eax
3521.11Sad	LOCK(7)
3531.11Sad	cmpxchgb %ah, (%rdi)
3541.11Sad	movl	$0, %eax
3551.22Sad	setz	%al
3561.22Sad	RET(8)
3571.25SuebayasiEND(__cpu_simple_lock_try)
3581.2Sad
359