kern_mutex.c revision 1.110 1 /* $NetBSD: kern_mutex.c,v 1.110 2023/09/23 18:48:04 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007, 2008, 2019, 2023
5 * The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe and Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Kernel mutex implementation, modeled after those found in Solaris,
35 * a description of which can be found in:
36 *
37 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
38 * Richard McDougall.
39 */
40
41 #define __MUTEX_PRIVATE
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.110 2023/09/23 18:48:04 ad Exp $");
45
46 #include <sys/param.h>
47 #include <sys/atomic.h>
48 #include <sys/proc.h>
49 #include <sys/mutex.h>
50 #include <sys/sched.h>
51 #include <sys/sleepq.h>
52 #include <sys/systm.h>
53 #include <sys/lockdebug.h>
54 #include <sys/kernel.h>
55 #include <sys/intr.h>
56 #include <sys/lock.h>
57 #include <sys/types.h>
58 #include <sys/cpu.h>
59 #include <sys/pserialize.h>
60
61 #include <dev/lockstat.h>
62
63 #include <machine/lock.h>
64
65 /*
66 * When not running a debug kernel, spin mutexes are not much
67 * more than an splraiseipl() and splx() pair.
68 */
69
70 #if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
71 #define FULL
72 #endif
73
74 /*
75 * Debugging support.
76 */
77
78 #define MUTEX_WANTLOCK(mtx) \
79 LOCKDEBUG_WANTLOCK(MUTEX_DEBUG_P(mtx), (mtx), \
80 (uintptr_t)__builtin_return_address(0), 0)
81 #define MUTEX_TESTLOCK(mtx) \
82 LOCKDEBUG_WANTLOCK(MUTEX_DEBUG_P(mtx), (mtx), \
83 (uintptr_t)__builtin_return_address(0), -1)
84 #define MUTEX_LOCKED(mtx) \
85 LOCKDEBUG_LOCKED(MUTEX_DEBUG_P(mtx), (mtx), NULL, \
86 (uintptr_t)__builtin_return_address(0), 0)
87 #define MUTEX_UNLOCKED(mtx) \
88 LOCKDEBUG_UNLOCKED(MUTEX_DEBUG_P(mtx), (mtx), \
89 (uintptr_t)__builtin_return_address(0), 0)
90 #define MUTEX_ABORT(mtx, msg) \
91 mutex_abort(__func__, __LINE__, mtx, msg)
92
93 #if defined(LOCKDEBUG)
94
95 #define MUTEX_DASSERT(mtx, cond) \
96 do { \
97 if (__predict_false(!(cond))) \
98 MUTEX_ABORT(mtx, "assertion failed: " #cond); \
99 } while (/* CONSTCOND */ 0)
100
101 #else /* LOCKDEBUG */
102
103 #define MUTEX_DASSERT(mtx, cond) /* nothing */
104
105 #endif /* LOCKDEBUG */
106
107 #if defined(DIAGNOSTIC)
108
109 #define MUTEX_ASSERT(mtx, cond) \
110 do { \
111 if (__predict_false(!(cond))) \
112 MUTEX_ABORT(mtx, "assertion failed: " #cond); \
113 } while (/* CONSTCOND */ 0)
114
115 #else /* DIAGNOSTIC */
116
117 #define MUTEX_ASSERT(mtx, cond) /* nothing */
118
119 #endif /* DIAGNOSTIC */
120
121 /*
122 * Some architectures can't use __cpu_simple_lock as is so allow a way
123 * for them to use an alternate definition.
124 */
125 #ifndef MUTEX_SPINBIT_LOCK_INIT
126 #define MUTEX_SPINBIT_LOCK_INIT(mtx) __cpu_simple_lock_init(&(mtx)->mtx_lock)
127 #endif
128 #ifndef MUTEX_SPINBIT_LOCKED_P
129 #define MUTEX_SPINBIT_LOCKED_P(mtx) __SIMPLELOCK_LOCKED_P(&(mtx)->mtx_lock)
130 #endif
131 #ifndef MUTEX_SPINBIT_LOCK_TRY
132 #define MUTEX_SPINBIT_LOCK_TRY(mtx) __cpu_simple_lock_try(&(mtx)->mtx_lock)
133 #endif
134 #ifndef MUTEX_SPINBIT_LOCK_UNLOCK
135 #define MUTEX_SPINBIT_LOCK_UNLOCK(mtx) __cpu_simple_unlock(&(mtx)->mtx_lock)
136 #endif
137
138 #ifndef MUTEX_INITIALIZE_SPIN_IPL
139 #define MUTEX_INITIALIZE_SPIN_IPL(mtx, ipl) \
140 ((mtx)->mtx_ipl = makeiplcookie((ipl)))
141 #endif
142
143 /*
144 * Spin mutex SPL save / restore.
145 */
146
147 #define MUTEX_SPIN_SPLRAISE(mtx) \
148 do { \
149 const int s = splraiseipl(MUTEX_SPIN_IPL(mtx)); \
150 struct cpu_info * const x__ci = curcpu(); \
151 const int x__cnt = x__ci->ci_mtx_count--; \
152 __insn_barrier(); \
153 if (x__cnt == 0) \
154 x__ci->ci_mtx_oldspl = s; \
155 } while (/* CONSTCOND */ 0)
156
157 #define MUTEX_SPIN_SPLRESTORE(mtx) \
158 do { \
159 struct cpu_info * const x__ci = curcpu(); \
160 const int s = x__ci->ci_mtx_oldspl; \
161 __insn_barrier(); \
162 if (++(x__ci->ci_mtx_count) == 0) \
163 splx(s); \
164 } while (/* CONSTCOND */ 0)
165
166 /*
167 * Memory barriers.
168 */
169 #ifdef __HAVE_ATOMIC_AS_MEMBAR
170 #define MUTEX_MEMBAR_ENTER()
171 #else
172 #define MUTEX_MEMBAR_ENTER() membar_enter()
173 #endif
174
175 /*
176 * For architectures that provide 'simple' mutexes: they provide a
177 * CAS function that is either MP-safe, or does not need to be MP
178 * safe. Adaptive mutexes on these architectures do not require an
179 * additional interlock.
180 */
181
182 #ifdef __HAVE_SIMPLE_MUTEXES
183
184 #define MUTEX_OWNER(owner) \
185 (owner & MUTEX_THREAD)
186 #define MUTEX_HAS_WAITERS(mtx) \
187 (((int)(mtx)->mtx_owner & MUTEX_BIT_WAITERS) != 0)
188
189 #define MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug) \
190 do { \
191 if (!dodebug) \
192 (mtx)->mtx_owner |= MUTEX_BIT_NODEBUG; \
193 } while (/* CONSTCOND */ 0)
194
195 #define MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl) \
196 do { \
197 (mtx)->mtx_owner = MUTEX_BIT_SPIN; \
198 if (!dodebug) \
199 (mtx)->mtx_owner |= MUTEX_BIT_NODEBUG; \
200 MUTEX_INITIALIZE_SPIN_IPL((mtx), (ipl)); \
201 MUTEX_SPINBIT_LOCK_INIT((mtx)); \
202 } while (/* CONSTCOND */ 0)
203
204 #define MUTEX_DESTROY(mtx) \
205 do { \
206 (mtx)->mtx_owner = MUTEX_THREAD; \
207 } while (/* CONSTCOND */ 0)
208
209 #define MUTEX_SPIN_P(owner) \
210 (((owner) & MUTEX_BIT_SPIN) != 0)
211 #define MUTEX_ADAPTIVE_P(owner) \
212 (((owner) & MUTEX_BIT_SPIN) == 0)
213
214 #ifndef MUTEX_CAS
215 #define MUTEX_CAS(p, o, n) \
216 (atomic_cas_ulong((volatile unsigned long *)(p), (o), (n)) == (o))
217 #endif /* MUTEX_CAS */
218
219 #define MUTEX_DEBUG_P(mtx) (((mtx)->mtx_owner & MUTEX_BIT_NODEBUG) == 0)
220 #if defined(LOCKDEBUG)
221 #define MUTEX_OWNED(owner) (((owner) & ~MUTEX_BIT_NODEBUG) != 0)
222 #define MUTEX_INHERITDEBUG(n, o) (n) |= (o) & MUTEX_BIT_NODEBUG
223 #else /* defined(LOCKDEBUG) */
224 #define MUTEX_OWNED(owner) ((owner) != 0)
225 #define MUTEX_INHERITDEBUG(n, o) /* nothing */
226 #endif /* defined(LOCKDEBUG) */
227
228 static inline int
229 MUTEX_ACQUIRE(kmutex_t *mtx, uintptr_t curthread)
230 {
231 int rv;
232 uintptr_t oldown = 0;
233 uintptr_t newown = curthread;
234
235 MUTEX_INHERITDEBUG(oldown, mtx->mtx_owner);
236 MUTEX_INHERITDEBUG(newown, oldown);
237 rv = MUTEX_CAS(&mtx->mtx_owner, oldown, newown);
238 membar_acquire();
239 return rv;
240 }
241
242 static inline int
243 MUTEX_SET_WAITERS(kmutex_t *mtx, uintptr_t owner)
244 {
245 int rv;
246
247 rv = MUTEX_CAS(&mtx->mtx_owner, owner, owner | MUTEX_BIT_WAITERS);
248 MUTEX_MEMBAR_ENTER();
249 return rv;
250 }
251
252 static inline void
253 MUTEX_RELEASE(kmutex_t *mtx)
254 {
255 uintptr_t newown;
256
257 newown = 0;
258 MUTEX_INHERITDEBUG(newown, mtx->mtx_owner);
259 atomic_store_release(&mtx->mtx_owner, newown);
260 }
261 #endif /* __HAVE_SIMPLE_MUTEXES */
262
263 /*
264 * Patch in stubs via strong alias where they are not available.
265 */
266
267 #if defined(LOCKDEBUG)
268 #undef __HAVE_MUTEX_STUBS
269 #undef __HAVE_SPIN_MUTEX_STUBS
270 #endif
271
272 #ifndef __HAVE_MUTEX_STUBS
273 __strong_alias(mutex_enter,mutex_vector_enter);
274 __strong_alias(mutex_exit,mutex_vector_exit);
275 #endif
276
277 #ifndef __HAVE_SPIN_MUTEX_STUBS
278 __strong_alias(mutex_spin_enter,mutex_vector_enter);
279 __strong_alias(mutex_spin_exit,mutex_vector_exit);
280 #endif
281
282 static void mutex_abort(const char *, size_t, volatile const kmutex_t *,
283 const char *);
284 static void mutex_dump(const volatile void *, lockop_printer_t);
285 static lwp_t *mutex_owner(wchan_t);
286
287 lockops_t mutex_spin_lockops = {
288 .lo_name = "Mutex",
289 .lo_type = LOCKOPS_SPIN,
290 .lo_dump = mutex_dump,
291 };
292
293 lockops_t mutex_adaptive_lockops = {
294 .lo_name = "Mutex",
295 .lo_type = LOCKOPS_SLEEP,
296 .lo_dump = mutex_dump,
297 };
298
299 syncobj_t mutex_syncobj = {
300 .sobj_name = "mutex",
301 .sobj_flag = SOBJ_SLEEPQ_SORTED,
302 .sobj_boostpri = PRI_KERNEL,
303 .sobj_unsleep = turnstile_unsleep,
304 .sobj_changepri = turnstile_changepri,
305 .sobj_lendpri = sleepq_lendpri,
306 .sobj_owner = mutex_owner,
307 };
308
309 /*
310 * mutex_dump:
311 *
312 * Dump the contents of a mutex structure.
313 */
314 static void
315 mutex_dump(const volatile void *cookie, lockop_printer_t pr)
316 {
317 const volatile kmutex_t *mtx = cookie;
318 uintptr_t owner = mtx->mtx_owner;
319
320 pr("owner field : %#018lx wait/spin: %16d/%d\n",
321 (long)MUTEX_OWNER(owner), MUTEX_HAS_WAITERS(mtx),
322 MUTEX_SPIN_P(owner));
323 }
324
325 /*
326 * mutex_abort:
327 *
328 * Dump information about an error and panic the system. This
329 * generates a lot of machine code in the DIAGNOSTIC case, so
330 * we ask the compiler to not inline it.
331 */
332 static void __noinline
333 mutex_abort(const char *func, size_t line, volatile const kmutex_t *mtx,
334 const char *msg)
335 {
336
337 LOCKDEBUG_ABORT(func, line, mtx, (MUTEX_SPIN_P(mtx->mtx_owner) ?
338 &mutex_spin_lockops : &mutex_adaptive_lockops), msg);
339 }
340
341 /*
342 * mutex_init:
343 *
344 * Initialize a mutex for use. Note that adaptive mutexes are in
345 * essence spin mutexes that can sleep to avoid deadlock and wasting
346 * CPU time. We can't easily provide a type of mutex that always
347 * sleeps - see comments in mutex_vector_enter() about releasing
348 * mutexes unlocked.
349 */
350 void
351 _mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl,
352 uintptr_t return_address)
353 {
354 lockops_t *lockops __unused;
355 bool dodebug;
356
357 memset(mtx, 0, sizeof(*mtx));
358
359 if (ipl == IPL_NONE || ipl == IPL_SOFTCLOCK ||
360 ipl == IPL_SOFTBIO || ipl == IPL_SOFTNET ||
361 ipl == IPL_SOFTSERIAL) {
362 lockops = (type == MUTEX_NODEBUG ?
363 NULL : &mutex_adaptive_lockops);
364 dodebug = LOCKDEBUG_ALLOC(mtx, lockops, return_address);
365 MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug);
366 } else {
367 lockops = (type == MUTEX_NODEBUG ?
368 NULL : &mutex_spin_lockops);
369 dodebug = LOCKDEBUG_ALLOC(mtx, lockops, return_address);
370 MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl);
371 }
372 }
373
374 void
375 mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl)
376 {
377
378 _mutex_init(mtx, type, ipl, (uintptr_t)__builtin_return_address(0));
379 }
380
381 /*
382 * mutex_destroy:
383 *
384 * Tear down a mutex.
385 */
386 void
387 mutex_destroy(kmutex_t *mtx)
388 {
389 uintptr_t owner = mtx->mtx_owner;
390
391 if (MUTEX_ADAPTIVE_P(owner)) {
392 MUTEX_ASSERT(mtx, !MUTEX_OWNED(owner));
393 MUTEX_ASSERT(mtx, !MUTEX_HAS_WAITERS(mtx));
394 } else {
395 MUTEX_ASSERT(mtx, !MUTEX_SPINBIT_LOCKED_P(mtx));
396 }
397
398 LOCKDEBUG_FREE(MUTEX_DEBUG_P(mtx), mtx);
399 MUTEX_DESTROY(mtx);
400 }
401
402 #ifdef MULTIPROCESSOR
403 /*
404 * mutex_oncpu:
405 *
406 * Return true if an adaptive mutex owner is running on a CPU in the
407 * system. If the target is waiting on the kernel big lock, then we
408 * must release it. This is necessary to avoid deadlock.
409 */
410 static bool
411 mutex_oncpu(uintptr_t owner)
412 {
413 struct cpu_info *ci;
414 lwp_t *l;
415
416 KASSERT(kpreempt_disabled());
417
418 if (!MUTEX_OWNED(owner)) {
419 return false;
420 }
421
422 /*
423 * See lwp_dtor() why dereference of the LWP pointer is safe.
424 * We must have kernel preemption disabled for that.
425 */
426 l = (lwp_t *)MUTEX_OWNER(owner);
427 ci = l->l_cpu;
428
429 if (ci && ci->ci_curlwp == l) {
430 /* Target is running; do we need to block? */
431 return (atomic_load_relaxed(&ci->ci_biglock_wanted) != l);
432 }
433
434 /* Not running. It may be safe to block now. */
435 return false;
436 }
437 #endif /* MULTIPROCESSOR */
438
439 /*
440 * mutex_vector_enter:
441 *
442 * Support routine for mutex_enter() that must handle all cases. In
443 * the LOCKDEBUG case, mutex_enter() is always aliased here, even if
444 * fast-path stubs are available. If a mutex_spin_enter() stub is
445 * not available, then it is also aliased directly here.
446 */
447 void
448 mutex_vector_enter(kmutex_t *mtx)
449 {
450 uintptr_t owner, curthread;
451 turnstile_t *ts;
452 #ifdef MULTIPROCESSOR
453 u_int count;
454 #endif
455 LOCKSTAT_COUNTER(spincnt);
456 LOCKSTAT_COUNTER(slpcnt);
457 LOCKSTAT_TIMER(spintime);
458 LOCKSTAT_TIMER(slptime);
459 LOCKSTAT_FLAG(lsflag);
460
461 /*
462 * Handle spin mutexes.
463 */
464 KPREEMPT_DISABLE(curlwp);
465 owner = mtx->mtx_owner;
466 if (MUTEX_SPIN_P(owner)) {
467 #if defined(LOCKDEBUG) && defined(MULTIPROCESSOR)
468 u_int spins = 0;
469 #endif
470 KPREEMPT_ENABLE(curlwp);
471 MUTEX_SPIN_SPLRAISE(mtx);
472 MUTEX_WANTLOCK(mtx);
473 #ifdef FULL
474 if (MUTEX_SPINBIT_LOCK_TRY(mtx)) {
475 MUTEX_LOCKED(mtx);
476 return;
477 }
478 #if !defined(MULTIPROCESSOR)
479 MUTEX_ABORT(mtx, "locking against myself");
480 #else /* !MULTIPROCESSOR */
481
482 LOCKSTAT_ENTER(lsflag);
483 LOCKSTAT_START_TIMER(lsflag, spintime);
484 count = SPINLOCK_BACKOFF_MIN;
485
486 /*
487 * Spin testing the lock word and do exponential backoff
488 * to reduce cache line ping-ponging between CPUs.
489 */
490 do {
491 while (MUTEX_SPINBIT_LOCKED_P(mtx)) {
492 SPINLOCK_SPIN_HOOK;
493 SPINLOCK_BACKOFF(count);
494 #ifdef LOCKDEBUG
495 if (SPINLOCK_SPINOUT(spins))
496 MUTEX_ABORT(mtx, "spinout");
497 #endif /* LOCKDEBUG */
498 }
499 } while (!MUTEX_SPINBIT_LOCK_TRY(mtx));
500
501 if (count != SPINLOCK_BACKOFF_MIN) {
502 LOCKSTAT_STOP_TIMER(lsflag, spintime);
503 LOCKSTAT_EVENT(lsflag, mtx,
504 LB_SPIN_MUTEX | LB_SPIN, 1, spintime);
505 }
506 LOCKSTAT_EXIT(lsflag);
507 #endif /* !MULTIPROCESSOR */
508 #endif /* FULL */
509 MUTEX_LOCKED(mtx);
510 return;
511 }
512
513 curthread = (uintptr_t)curlwp;
514
515 MUTEX_DASSERT(mtx, MUTEX_ADAPTIVE_P(owner));
516 MUTEX_ASSERT(mtx, curthread != 0);
517 MUTEX_ASSERT(mtx, !cpu_intr_p());
518 MUTEX_WANTLOCK(mtx);
519
520 if (__predict_true(panicstr == NULL)) {
521 KDASSERT(pserialize_not_in_read_section());
522 LOCKDEBUG_BARRIER(&kernel_lock, 1);
523 }
524
525 LOCKSTAT_ENTER(lsflag);
526
527 /*
528 * Adaptive mutex; spin trying to acquire the mutex. If we
529 * determine that the owner is not running on a processor,
530 * then we stop spinning, and sleep instead.
531 */
532 for (;;) {
533 if (!MUTEX_OWNED(owner)) {
534 /*
535 * Mutex owner clear could mean two things:
536 *
537 * * The mutex has been released.
538 * * The owner field hasn't been set yet.
539 *
540 * Try to acquire it again. If that fails,
541 * we'll just loop again.
542 */
543 if (MUTEX_ACQUIRE(mtx, curthread))
544 break;
545 owner = mtx->mtx_owner;
546 continue;
547 }
548 if (__predict_false(MUTEX_OWNER(owner) == curthread)) {
549 MUTEX_ABORT(mtx, "locking against myself");
550 }
551 #ifdef MULTIPROCESSOR
552 /*
553 * Check to see if the owner is running on a processor.
554 * If so, then we should just spin, as the owner will
555 * likely release the lock very soon.
556 */
557 if (mutex_oncpu(owner)) {
558 LOCKSTAT_START_TIMER(lsflag, spintime);
559 count = SPINLOCK_BACKOFF_MIN;
560 do {
561 KPREEMPT_ENABLE(curlwp);
562 SPINLOCK_BACKOFF(count);
563 KPREEMPT_DISABLE(curlwp);
564 owner = mtx->mtx_owner;
565 } while (mutex_oncpu(owner));
566 LOCKSTAT_STOP_TIMER(lsflag, spintime);
567 LOCKSTAT_COUNT(spincnt, 1);
568 if (!MUTEX_OWNED(owner))
569 continue;
570 }
571 #endif
572
573 ts = turnstile_lookup(mtx);
574
575 /*
576 * Once we have the turnstile chain interlock, mark the
577 * mutex as having waiters. If that fails, spin again:
578 * chances are that the mutex has been released.
579 */
580 if (!MUTEX_SET_WAITERS(mtx, owner)) {
581 turnstile_exit(mtx);
582 owner = mtx->mtx_owner;
583 continue;
584 }
585
586 #ifdef MULTIPROCESSOR
587 /*
588 * mutex_exit() is permitted to release the mutex without
589 * any interlocking instructions, and the following can
590 * occur as a result:
591 *
592 * CPU 1: MUTEX_SET_WAITERS() CPU2: mutex_exit()
593 * ---------------------------- ----------------------------
594 * .. load mtx->mtx_owner
595 * .. see has-waiters bit clear
596 * set has-waiters bit ..
597 * .. store mtx->mtx_owner := 0
598 * return success
599 *
600 * There is another race that can occur: a third CPU could
601 * acquire the mutex as soon as it is released. Since
602 * adaptive mutexes are primarily spin mutexes, this is not
603 * something that we need to worry about too much. What we
604 * do need to ensure is that the waiters bit gets set.
605 *
606 * To allow the unlocked release, we need to make some
607 * assumptions here:
608 *
609 * o Release is the only non-atomic/unlocked operation
610 * that can be performed on the mutex. (It must still
611 * be atomic on the local CPU, e.g. in case interrupted
612 * or preempted).
613 *
614 * o At any given time on each mutex, MUTEX_SET_WAITERS()
615 * can only ever be in progress on one CPU in the
616 * system - guaranteed by the turnstile chain lock.
617 *
618 * o No other operations other than MUTEX_SET_WAITERS()
619 * and release can modify a mutex with a non-zero
620 * owner field.
621 *
622 * o If the holding LWP switches away, it posts a store
623 * fence before changing curlwp, ensuring that any
624 * overwrite of the mutex waiters flag by mutex_exit()
625 * completes before the modification of curlwp becomes
626 * visible to this CPU.
627 *
628 * o cpu_switchto() posts a store fence after setting curlwp
629 * and before resuming execution of an LWP.
630 *
631 * o _kernel_lock() posts a store fence before setting
632 * curcpu()->ci_biglock_wanted, and after clearing it.
633 * This ensures that any overwrite of the mutex waiters
634 * flag by mutex_exit() completes before the modification
635 * of ci_biglock_wanted becomes visible.
636 *
637 * After MUTEX_SET_WAITERS() succeeds, simultaneously
638 * confirming that the same LWP still holds the mutex
639 * since we took the turnstile lock and notifying it that
640 * we're waiting, we check the lock holder's status again.
641 * Some of the possible outcomes (not an exhaustive list;
642 * XXX this should be made exhaustive):
643 *
644 * 1. The on-CPU check returns true: the holding LWP is
645 * running again. The lock may be released soon and
646 * we should spin. Importantly, we can't trust the
647 * value of the waiters flag.
648 *
649 * 2. The on-CPU check returns false: the holding LWP is
650 * not running. We now have the opportunity to check
651 * if mutex_exit() has blatted the modifications made
652 * by MUTEX_SET_WAITERS().
653 *
654 * 3. The on-CPU check returns false: the holding LWP may
655 * or may not be running. It has context switched at
656 * some point during our check. Again, we have the
657 * chance to see if the waiters bit is still set or
658 * has been overwritten.
659 *
660 * 4. The on-CPU check returns false: the holding LWP is
661 * running on a CPU, but wants the big lock. It's OK
662 * to check the waiters field in this case.
663 *
664 * 5. The has-waiters check fails: the mutex has been
665 * released, the waiters flag cleared and another LWP
666 * now owns the mutex.
667 *
668 * 6. The has-waiters check fails: the mutex has been
669 * released.
670 *
671 * If the waiters bit is not set it's unsafe to go asleep,
672 * as we might never be awoken.
673 */
674 if (mutex_oncpu(owner)) {
675 turnstile_exit(mtx);
676 owner = mtx->mtx_owner;
677 continue;
678 }
679 membar_consumer();
680 if (!MUTEX_HAS_WAITERS(mtx)) {
681 turnstile_exit(mtx);
682 owner = mtx->mtx_owner;
683 continue;
684 }
685 #endif /* MULTIPROCESSOR */
686
687 LOCKSTAT_START_TIMER(lsflag, slptime);
688
689 turnstile_block(ts, TS_WRITER_Q, mtx, &mutex_syncobj);
690
691 LOCKSTAT_STOP_TIMER(lsflag, slptime);
692 LOCKSTAT_COUNT(slpcnt, 1);
693
694 owner = mtx->mtx_owner;
695 }
696 KPREEMPT_ENABLE(curlwp);
697
698 LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SLEEP1,
699 slpcnt, slptime);
700 LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SPIN,
701 spincnt, spintime);
702 LOCKSTAT_EXIT(lsflag);
703
704 MUTEX_DASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread);
705 MUTEX_LOCKED(mtx);
706 }
707
708 /*
709 * mutex_vector_exit:
710 *
711 * Support routine for mutex_exit() that handles all cases.
712 */
713 void
714 mutex_vector_exit(kmutex_t *mtx)
715 {
716 turnstile_t *ts;
717 uintptr_t curthread;
718
719 if (MUTEX_SPIN_P(mtx->mtx_owner)) {
720 #ifdef FULL
721 if (__predict_false(!MUTEX_SPINBIT_LOCKED_P(mtx))) {
722 MUTEX_ABORT(mtx, "exiting unheld spin mutex");
723 }
724 MUTEX_UNLOCKED(mtx);
725 MUTEX_SPINBIT_LOCK_UNLOCK(mtx);
726 #endif
727 MUTEX_SPIN_SPLRESTORE(mtx);
728 return;
729 }
730
731 #ifndef __HAVE_MUTEX_STUBS
732 /*
733 * On some architectures without mutex stubs, we can enter here to
734 * release mutexes before interrupts and whatnot are up and running.
735 * We need this hack to keep them sweet.
736 */
737 if (__predict_false(cold)) {
738 MUTEX_UNLOCKED(mtx);
739 MUTEX_RELEASE(mtx);
740 return;
741 }
742 #endif
743
744 curthread = (uintptr_t)curlwp;
745 MUTEX_DASSERT(mtx, curthread != 0);
746 MUTEX_ASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread);
747 MUTEX_UNLOCKED(mtx);
748 #if !defined(LOCKDEBUG)
749 __USE(curthread);
750 #endif
751
752 #ifdef LOCKDEBUG
753 /*
754 * Avoid having to take the turnstile chain lock every time
755 * around. Raise the priority level to splhigh() in order
756 * to disable preemption and so make the following atomic.
757 * This also blocks out soft interrupts that could set the
758 * waiters bit.
759 */
760 {
761 int s = splhigh();
762 if (!MUTEX_HAS_WAITERS(mtx)) {
763 MUTEX_RELEASE(mtx);
764 splx(s);
765 return;
766 }
767 splx(s);
768 }
769 #endif
770
771 /*
772 * Get this lock's turnstile. This gets the interlock on
773 * the sleep queue. Once we have that, we can clear the
774 * lock. If there was no turnstile for the lock, there
775 * were no waiters remaining.
776 */
777 ts = turnstile_lookup(mtx);
778
779 if (ts == NULL) {
780 MUTEX_RELEASE(mtx);
781 turnstile_exit(mtx);
782 } else {
783 MUTEX_RELEASE(mtx);
784 turnstile_wakeup(ts, TS_WRITER_Q,
785 TS_WAITERS(ts, TS_WRITER_Q), NULL);
786 }
787 }
788
789 #ifndef __HAVE_SIMPLE_MUTEXES
790 /*
791 * mutex_wakeup:
792 *
793 * Support routine for mutex_exit() that wakes up all waiters.
794 * We assume that the mutex has been released, but it need not
795 * be.
796 */
797 void
798 mutex_wakeup(kmutex_t *mtx)
799 {
800 turnstile_t *ts;
801
802 ts = turnstile_lookup(mtx);
803 if (ts == NULL) {
804 turnstile_exit(mtx);
805 return;
806 }
807 MUTEX_CLEAR_WAITERS(mtx);
808 turnstile_wakeup(ts, TS_WRITER_Q, TS_WAITERS(ts, TS_WRITER_Q), NULL);
809 }
810 #endif /* !__HAVE_SIMPLE_MUTEXES */
811
812 /*
813 * mutex_owned:
814 *
815 * Return true if the current LWP (adaptive) or CPU (spin)
816 * holds the mutex.
817 */
818 int
819 mutex_owned(const kmutex_t *mtx)
820 {
821
822 if (mtx == NULL)
823 return 0;
824 if (MUTEX_ADAPTIVE_P(mtx->mtx_owner))
825 return MUTEX_OWNER(mtx->mtx_owner) == (uintptr_t)curlwp;
826 #ifdef FULL
827 return MUTEX_SPINBIT_LOCKED_P(mtx);
828 #else
829 return 1;
830 #endif
831 }
832
833 /*
834 * mutex_owner:
835 *
836 * Return the current owner of an adaptive mutex. Used for
837 * priority inheritance.
838 */
839 static lwp_t *
840 mutex_owner(wchan_t wchan)
841 {
842 volatile const kmutex_t *mtx = wchan;
843
844 MUTEX_ASSERT(mtx, MUTEX_ADAPTIVE_P(mtx->mtx_owner));
845 return (struct lwp *)MUTEX_OWNER(mtx->mtx_owner);
846 }
847
848 /*
849 * mutex_ownable:
850 *
851 * When compiled with DEBUG and LOCKDEBUG defined, ensure that
852 * the mutex is available. We cannot use !mutex_owned() since
853 * that won't work correctly for spin mutexes.
854 */
855 int
856 mutex_ownable(const kmutex_t *mtx)
857 {
858
859 #ifdef LOCKDEBUG
860 MUTEX_TESTLOCK(mtx);
861 #endif
862 return 1;
863 }
864
865 /*
866 * mutex_tryenter:
867 *
868 * Try to acquire the mutex; return non-zero if we did.
869 */
870 int
871 mutex_tryenter(kmutex_t *mtx)
872 {
873 uintptr_t curthread;
874
875 /*
876 * Handle spin mutexes.
877 */
878 if (MUTEX_SPIN_P(mtx->mtx_owner)) {
879 MUTEX_SPIN_SPLRAISE(mtx);
880 #ifdef FULL
881 if (MUTEX_SPINBIT_LOCK_TRY(mtx)) {
882 MUTEX_WANTLOCK(mtx);
883 MUTEX_LOCKED(mtx);
884 return 1;
885 }
886 MUTEX_SPIN_SPLRESTORE(mtx);
887 #else
888 MUTEX_WANTLOCK(mtx);
889 MUTEX_LOCKED(mtx);
890 return 1;
891 #endif
892 } else {
893 curthread = (uintptr_t)curlwp;
894 MUTEX_ASSERT(mtx, curthread != 0);
895 if (MUTEX_ACQUIRE(mtx, curthread)) {
896 MUTEX_WANTLOCK(mtx);
897 MUTEX_LOCKED(mtx);
898 MUTEX_DASSERT(mtx,
899 MUTEX_OWNER(mtx->mtx_owner) == curthread);
900 return 1;
901 }
902 }
903
904 return 0;
905 }
906
907 #if defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL)
908 /*
909 * mutex_spin_retry:
910 *
911 * Support routine for mutex_spin_enter(). Assumes that the caller
912 * has already raised the SPL, and adjusted counters.
913 */
914 void
915 mutex_spin_retry(kmutex_t *mtx)
916 {
917 #ifdef MULTIPROCESSOR
918 u_int count;
919 LOCKSTAT_TIMER(spintime);
920 LOCKSTAT_FLAG(lsflag);
921 #ifdef LOCKDEBUG
922 u_int spins = 0;
923 #endif /* LOCKDEBUG */
924
925 MUTEX_WANTLOCK(mtx);
926
927 LOCKSTAT_ENTER(lsflag);
928 LOCKSTAT_START_TIMER(lsflag, spintime);
929 count = SPINLOCK_BACKOFF_MIN;
930
931 /*
932 * Spin testing the lock word and do exponential backoff
933 * to reduce cache line ping-ponging between CPUs.
934 */
935 do {
936 while (MUTEX_SPINBIT_LOCKED_P(mtx)) {
937 SPINLOCK_BACKOFF(count);
938 #ifdef LOCKDEBUG
939 if (SPINLOCK_SPINOUT(spins))
940 MUTEX_ABORT(mtx, "spinout");
941 #endif /* LOCKDEBUG */
942 }
943 } while (!MUTEX_SPINBIT_LOCK_TRY(mtx));
944
945 LOCKSTAT_STOP_TIMER(lsflag, spintime);
946 LOCKSTAT_EVENT(lsflag, mtx, LB_SPIN_MUTEX | LB_SPIN, 1, spintime);
947 LOCKSTAT_EXIT(lsflag);
948
949 MUTEX_LOCKED(mtx);
950 #else /* MULTIPROCESSOR */
951 MUTEX_ABORT(mtx, "locking against myself");
952 #endif /* MULTIPROCESSOR */
953 }
954 #endif /* defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL) */
955