lock_stubs.S revision 1.8
1/*	$NetBSD: lock_stubs.S,v 1.8 2007/11/22 16:16:42 bouyer Exp $	*/
2
3/*-
4 * Copyright (c) 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 *    must display the following acknowledgement:
20 *	This product includes software developed by the NetBSD
21 *	Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 *    contributors may be used to endorse or promote products derived
24 *    from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39/*
40 * AMD64 lock stubs.  Calling convention:
41 *
42 * %rdi		arg 1
43 * %rsi		arg 2
44 * %rdx		arg 3
45 * %rax		return value
46 */
47
48#include "opt_multiprocessor.h"
49#include "opt_lockdebug.h"
50
51#include <machine/asm.h>
52#include <machine/frameasm.h>
53
54#include "assym.h"
55
56#if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
57#define	FULL
58#endif
59
60#define	END(name,a)	.align	a; LABEL(name)
61
62#define	LOCK(name)	LABEL(name) lock
63
64#ifndef LOCKDEBUG
65
66/*
67 * void mutex_enter(kmutex_t *mtx);
68 *
69 * Acquire a mutex and post a load fence.
70 */
71	.align	64
72
73NENTRY(mutex_enter)				/* 0x0000, 25 bytes */
74	movq	CPUVAR(CURLWP), %rcx
75	xorq	%rax, %rax
76	LOCK(lockpatch1)
77	cmpxchgq %rcx, MTX_OWNER(%rdi)
78	jnz,pn	_C_LABEL(mutex_vector_enter)
79	ret
80
81/*
82 * void mutex_exit(kmutex_t *mtx);
83 *
84 * Release a mutex and post a load fence.
85 *
86 * See comments in mutex_vector_enter() about doing this operation unlocked
87 * on multiprocessor systems, and comments in arch/x86/include/lock.h about
88 * memory ordering on Intel x86 systems.
89 */
90NENTRY(mutex_exit)				/* 0x0020, 24 bytes */
91	movq	CPUVAR(CURLWP), %rax
92	xorq	%rdx, %rdx
93	cmpxchgq %rdx, MTX_OWNER(%rdi)
94	jnz,pn	_C_LABEL(mutex_vector_exit)
95	ret
96
97/*
98 * void mutex_spin_enter(kmutex_t *mtx);
99 *
100 * Acquire a spin mutex and post a load fence.
101 */
102NENTRY(mutex_spin_enter)
103	movq	CPUVAR(SELF), %r8
104#if defined(FULL)
105	movl	$0x0100, %eax			/* new + expected value */
106#endif
107	movl	CPU_INFO_ILEVEL(%r8), %esi
108	subl	$1, CPU_INFO_MTX_COUNT(%r8)	/* decl doesnt set CF */
109	movzbl	MTX_IPL(%rdi), %ecx		/* new SPL */
110	cmovncl	CPU_INFO_MTX_OLDSPL(%r8), %esi
111	cmpl	%ecx, %esi			/* higher? */
112	movl	%esi, CPU_INFO_MTX_OLDSPL(%r8)
113	cmovgl	%esi, %ecx
114	movl	%ecx, CPU_INFO_ILEVEL(%r8)	/* splraiseipl() */
115#if defined(FULL)
116	LOCK(lockpatch11)
117	cmpxchgb %ah, MTX_LOCK(%rdi)		/* lock */
118	jnz,pn	_C_LABEL(mutex_spin_retry)	/* failed; hard case */
119#endif
120	ret
121
122/*
123 * void mutex_spin_exit(kmutex_t *mtx);
124 *
125 * Release a spin mutex and post a load fence.
126 */
127NENTRY(mutex_spin_exit)
128#ifdef DIAGNOSTIC
129
130	movl	$0x0001, %eax			/* new + expected value */
131	movq	CPUVAR(SELF), %r8
132	cmpxchgb %ah, MTX_LOCK(%rdi)		/* unlock */
133	jnz,pn	_C_LABEL(mutex_vector_exit)	/* hard case if problems */
134	movl	CPU_INFO_MTX_OLDSPL(%r8), %edi
135	incl	CPU_INFO_MTX_COUNT(%r8)
136	jnz	1f
137	cmpl	CPU_INFO_ILEVEL(%r8), %edi
138	jae	1f
139	movl	CPU_INFO_IUNMASK(%r8,%rdi,4), %esi
140	CLI(ax,10)
141	testl	CPU_INFO_IPENDING(%r8), %esi
142	jnz	_C_LABEL(Xspllower)
143	movl	%edi, CPU_INFO_ILEVEL(%r8)
144	STI(ax,10)
1451:	rep					/* double byte ret as branch */
146	ret					/* target: see AMD docs */
147
148#else	/* DIAGNOSTIC */
149
150	movq	CPUVAR(SELF), %rsi
151#ifdef MULTIPROCESSOR
152	movb	$0x00, MTX_LOCK(%rdi)
153#endif
154	movl	CPU_INFO_MTX_OLDSPL(%rsi), %ecx
155	incl	CPU_INFO_MTX_COUNT(%rsi)
156	movl	CPU_INFO_ILEVEL(%rsi),%edx
157	cmovnzl	%edx,%ecx
158	cmpl	%edx,%ecx			/* new level is lower? */
159	pushq	%rbx
160	jae,pn	2f
1611:
162	movl	CPU_INFO_IPENDING(%rsi),%eax
163	testl	%eax,CPU_INFO_IUNMASK(%rsi,%rcx,4)/* deferred interrupts? */
164	movl	%eax,%ebx
165	jnz,pn	3f
166	cmpxchg8b CPU_INFO_ISTATE(%rsi)		/* swap in new ilevel */
167	jnz,pn	1b
1682:
169	popq	%rbx
170	ret
1713:
172	popq	%rbx
173	movl	%ecx, %edi
174	jmp	_C_LABEL(Xspllower)
175
176#endif	/* DIAGNOSTIC */
177
178/*
179 * void	rw_enter(krwlock_t *rwl, krw_t op);
180 *
181 * Acquire one hold on a RW lock.
182 */
183NENTRY(rw_enter)				/* 0x00c0, 62 bytes */
184	cmpl	$RW_READER, %esi
185	jne	2f
186
187	/*
188	 * Reader: this is the most common case.
189	 */
1901:	movq	RW_OWNER(%rdi), %rax
191	testb	$(RW_WRITE_LOCKED|RW_WRITE_WANTED), %al
192	leaq	RW_READ_INCR(%rax), %rdx
193	jnz,pn	_C_LABEL(rw_vector_enter)
194	LOCK(lockpatch2)
195	cmpxchgq %rdx, RW_OWNER(%rdi)
196	jnz,pn	1b
197	ret
198
199	/*
200	 * Writer: if the compare-and-set fails, don't bother retrying.
201	 */
2022:	movq	CPUVAR(CURLWP), %rcx
203	xorq	%rax, %rax
204	orq	$RW_WRITE_LOCKED, %rcx
205	LOCK(lockpatch3)
206	cmpxchgq %rcx, RW_OWNER(%rdi)
207	jnz,pn	_C_LABEL(rw_vector_enter)
208	ret
209
210/*
211 * void	rw_exit(krwlock_t *rwl);
212 *
213 * Release one hold on a RW lock.
214 */
215NENTRY(rw_exit)					/* 0x0100, 64 bytes */
216	movq	RW_OWNER(%rdi), %rax
217	testb	$RW_WRITE_LOCKED, %al
218	jnz	2f
219
220	/*
221	 * Reader
222	 */
2231:	testb	$RW_HAS_WAITERS, %al
224	jnz,pn	3f
225	cmpq	$RW_READ_INCR, %rax
226	leaq	-RW_READ_INCR(%rax), %rdx
227	jb,pn	3f
228	LOCK(lockpatch4)
229	cmpxchgq %rdx, RW_OWNER(%rdi)
230	jnz,pn	1b
231	ret
232
233	/*
234	 * Writer
235	 */
2362:	leaq	-RW_WRITE_LOCKED(%rax), %rdx
237	subq	CPUVAR(CURLWP), %rdx
238	jnz,pn	3f
239	LOCK(lockpatch5)
240	cmpxchgq %rdx, RW_OWNER(%rdi)
241	jnz	3f
242	ret
243
2443:	jmp	_C_LABEL(rw_vector_exit)
245
246#endif	/* LOCKDEBUG */
247
248/*
249 * int _lock_cas(uintptr_t *val, uintptr_t old, uintptr_t new);
250 *
251 * Perform an atomic compare-and-set operation.
252 */
253NENTRY(_lock_cas)				/* 0x0140, 19 bytes */
254	movq	%rsi, %rax
255	LOCK(lockpatch6)
256	cmpxchgq %rdx, (%rdi)
257	movq	$0, %rax
258	setz	%al				/* = 1 if success */
259	ret
260
261/*
262 * Memory barrier operations, may be patched at runtime.
263 */
264	.align	8
265ENTRY(mb_read)
266	LOCK(lockpatch7)
267	addq	$0, 0(%rsp)
268	ret
269END(mb_read_end, 8)
270
271ENTRY(mb_write)
272	nop
273	ret
274END(mb_write_end, 8)
275
276ENTRY(mb_memory)
277	LOCK(lockpatch8)
278	addq	$0, 0(%rsp)
279	ret
280END(mb_memory_end, 8)
281
282ENTRY(sse2_mb_read)
283	lfence
284	ret
285END(sse2_mb_read_end, 8)
286
287ENTRY(sse2_mb_memory)
288	mfence
289	ret
290END(sse2_mb_memory_end, 8)
291
292/*
293 * Make sure code after the ret is properly encoded with nopness
294 * by gas, or could stall newer processors.
295 */
296
297ENTRY(x86_mb_nop)
298	nop
299	ret
300END(x86_mb_nop_end, 8)
301
302/*
303 * XXX Don't belong here.
304 */
305ENTRY(atomic_inc_uint)
306	LOCK(lockpatch9)
307	incl	(%rdi)
308	ret
309
310ENTRY(atomic_dec_uint_nv)
311	movl	$-1, %eax
312	LOCK(lockpatch10)
313	xaddl	%eax, (%rdi)
314	decl	%eax
315	ret
316
317/*
318 * Patchpoints to replace with NOP when ncpu == 1.
319 */
320#ifndef LOCKDEBUG
321LABEL(x86_lockpatch)
322	.quad	lockpatch1, lockpatch2, lockpatch3, lockpatch4
323	.quad	lockpatch5, lockpatch6, lockpatch7, lockpatch8
324	.quad	lockpatch9, lockpatch10
325#ifdef FULL
326	.quad	lockpatch11
327#endif
328	.quad	0
329#endif
330