kern_mutex.c revision 1.109 1 /* $NetBSD: kern_mutex.c,v 1.109 2023/09/07 20:05:42 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007, 2008, 2019 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Kernel mutex implementation, modeled after those found in Solaris,
34 * a description of which can be found in:
35 *
36 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
37 * Richard McDougall.
38 */
39
40 #define __MUTEX_PRIVATE
41
42 #include <sys/cdefs.h>
43 __KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.109 2023/09/07 20:05:42 ad Exp $");
44
45 #include <sys/param.h>
46 #include <sys/atomic.h>
47 #include <sys/proc.h>
48 #include <sys/mutex.h>
49 #include <sys/sched.h>
50 #include <sys/sleepq.h>
51 #include <sys/systm.h>
52 #include <sys/lockdebug.h>
53 #include <sys/kernel.h>
54 #include <sys/intr.h>
55 #include <sys/lock.h>
56 #include <sys/types.h>
57 #include <sys/cpu.h>
58 #include <sys/pserialize.h>
59
60 #include <dev/lockstat.h>
61
62 #include <machine/lock.h>
63
64 /*
65 * When not running a debug kernel, spin mutexes are not much
66 * more than an splraiseipl() and splx() pair.
67 */
68
69 #if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
70 #define FULL
71 #endif
72
73 /*
74 * Debugging support.
75 */
76
77 #define MUTEX_WANTLOCK(mtx) \
78 LOCKDEBUG_WANTLOCK(MUTEX_DEBUG_P(mtx), (mtx), \
79 (uintptr_t)__builtin_return_address(0), 0)
80 #define MUTEX_TESTLOCK(mtx) \
81 LOCKDEBUG_WANTLOCK(MUTEX_DEBUG_P(mtx), (mtx), \
82 (uintptr_t)__builtin_return_address(0), -1)
83 #define MUTEX_LOCKED(mtx) \
84 LOCKDEBUG_LOCKED(MUTEX_DEBUG_P(mtx), (mtx), NULL, \
85 (uintptr_t)__builtin_return_address(0), 0)
86 #define MUTEX_UNLOCKED(mtx) \
87 LOCKDEBUG_UNLOCKED(MUTEX_DEBUG_P(mtx), (mtx), \
88 (uintptr_t)__builtin_return_address(0), 0)
89 #define MUTEX_ABORT(mtx, msg) \
90 mutex_abort(__func__, __LINE__, mtx, msg)
91
92 #if defined(LOCKDEBUG)
93
94 #define MUTEX_DASSERT(mtx, cond) \
95 do { \
96 if (__predict_false(!(cond))) \
97 MUTEX_ABORT(mtx, "assertion failed: " #cond); \
98 } while (/* CONSTCOND */ 0)
99
100 #else /* LOCKDEBUG */
101
102 #define MUTEX_DASSERT(mtx, cond) /* nothing */
103
104 #endif /* LOCKDEBUG */
105
106 #if defined(DIAGNOSTIC)
107
108 #define MUTEX_ASSERT(mtx, cond) \
109 do { \
110 if (__predict_false(!(cond))) \
111 MUTEX_ABORT(mtx, "assertion failed: " #cond); \
112 } while (/* CONSTCOND */ 0)
113
114 #else /* DIAGNOSTIC */
115
116 #define MUTEX_ASSERT(mtx, cond) /* nothing */
117
118 #endif /* DIAGNOSTIC */
119
120 /*
121 * Some architectures can't use __cpu_simple_lock as is so allow a way
122 * for them to use an alternate definition.
123 */
124 #ifndef MUTEX_SPINBIT_LOCK_INIT
125 #define MUTEX_SPINBIT_LOCK_INIT(mtx) __cpu_simple_lock_init(&(mtx)->mtx_lock)
126 #endif
127 #ifndef MUTEX_SPINBIT_LOCKED_P
128 #define MUTEX_SPINBIT_LOCKED_P(mtx) __SIMPLELOCK_LOCKED_P(&(mtx)->mtx_lock)
129 #endif
130 #ifndef MUTEX_SPINBIT_LOCK_TRY
131 #define MUTEX_SPINBIT_LOCK_TRY(mtx) __cpu_simple_lock_try(&(mtx)->mtx_lock)
132 #endif
133 #ifndef MUTEX_SPINBIT_LOCK_UNLOCK
134 #define MUTEX_SPINBIT_LOCK_UNLOCK(mtx) __cpu_simple_unlock(&(mtx)->mtx_lock)
135 #endif
136
137 #ifndef MUTEX_INITIALIZE_SPIN_IPL
138 #define MUTEX_INITIALIZE_SPIN_IPL(mtx, ipl) \
139 ((mtx)->mtx_ipl = makeiplcookie((ipl)))
140 #endif
141
142 /*
143 * Spin mutex SPL save / restore.
144 */
145
146 #define MUTEX_SPIN_SPLRAISE(mtx) \
147 do { \
148 const int s = splraiseipl(MUTEX_SPIN_IPL(mtx)); \
149 struct cpu_info * const x__ci = curcpu(); \
150 const int x__cnt = x__ci->ci_mtx_count--; \
151 __insn_barrier(); \
152 if (x__cnt == 0) \
153 x__ci->ci_mtx_oldspl = s; \
154 } while (/* CONSTCOND */ 0)
155
156 #define MUTEX_SPIN_SPLRESTORE(mtx) \
157 do { \
158 struct cpu_info * const x__ci = curcpu(); \
159 const int s = x__ci->ci_mtx_oldspl; \
160 __insn_barrier(); \
161 if (++(x__ci->ci_mtx_count) == 0) \
162 splx(s); \
163 } while (/* CONSTCOND */ 0)
164
165 /*
166 * Memory barriers.
167 */
168 #ifdef __HAVE_ATOMIC_AS_MEMBAR
169 #define MUTEX_MEMBAR_ENTER()
170 #else
171 #define MUTEX_MEMBAR_ENTER() membar_enter()
172 #endif
173
174 /*
175 * For architectures that provide 'simple' mutexes: they provide a
176 * CAS function that is either MP-safe, or does not need to be MP
177 * safe. Adaptive mutexes on these architectures do not require an
178 * additional interlock.
179 */
180
181 #ifdef __HAVE_SIMPLE_MUTEXES
182
183 #define MUTEX_OWNER(owner) \
184 (owner & MUTEX_THREAD)
185 #define MUTEX_HAS_WAITERS(mtx) \
186 (((int)(mtx)->mtx_owner & MUTEX_BIT_WAITERS) != 0)
187
188 #define MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug) \
189 do { \
190 if (!dodebug) \
191 (mtx)->mtx_owner |= MUTEX_BIT_NODEBUG; \
192 } while (/* CONSTCOND */ 0)
193
194 #define MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl) \
195 do { \
196 (mtx)->mtx_owner = MUTEX_BIT_SPIN; \
197 if (!dodebug) \
198 (mtx)->mtx_owner |= MUTEX_BIT_NODEBUG; \
199 MUTEX_INITIALIZE_SPIN_IPL((mtx), (ipl)); \
200 MUTEX_SPINBIT_LOCK_INIT((mtx)); \
201 } while (/* CONSTCOND */ 0)
202
203 #define MUTEX_DESTROY(mtx) \
204 do { \
205 (mtx)->mtx_owner = MUTEX_THREAD; \
206 } while (/* CONSTCOND */ 0)
207
208 #define MUTEX_SPIN_P(owner) \
209 (((owner) & MUTEX_BIT_SPIN) != 0)
210 #define MUTEX_ADAPTIVE_P(owner) \
211 (((owner) & MUTEX_BIT_SPIN) == 0)
212
213 #ifndef MUTEX_CAS
214 #define MUTEX_CAS(p, o, n) \
215 (atomic_cas_ulong((volatile unsigned long *)(p), (o), (n)) == (o))
216 #endif /* MUTEX_CAS */
217
218 #define MUTEX_DEBUG_P(mtx) (((mtx)->mtx_owner & MUTEX_BIT_NODEBUG) == 0)
219 #if defined(LOCKDEBUG)
220 #define MUTEX_OWNED(owner) (((owner) & ~MUTEX_BIT_NODEBUG) != 0)
221 #define MUTEX_INHERITDEBUG(n, o) (n) |= (o) & MUTEX_BIT_NODEBUG
222 #else /* defined(LOCKDEBUG) */
223 #define MUTEX_OWNED(owner) ((owner) != 0)
224 #define MUTEX_INHERITDEBUG(n, o) /* nothing */
225 #endif /* defined(LOCKDEBUG) */
226
227 static inline int
228 MUTEX_ACQUIRE(kmutex_t *mtx, uintptr_t curthread)
229 {
230 int rv;
231 uintptr_t oldown = 0;
232 uintptr_t newown = curthread;
233
234 MUTEX_INHERITDEBUG(oldown, mtx->mtx_owner);
235 MUTEX_INHERITDEBUG(newown, oldown);
236 rv = MUTEX_CAS(&mtx->mtx_owner, oldown, newown);
237 membar_acquire();
238 return rv;
239 }
240
241 static inline int
242 MUTEX_SET_WAITERS(kmutex_t *mtx, uintptr_t owner)
243 {
244 int rv;
245
246 rv = MUTEX_CAS(&mtx->mtx_owner, owner, owner | MUTEX_BIT_WAITERS);
247 MUTEX_MEMBAR_ENTER();
248 return rv;
249 }
250
251 static inline void
252 MUTEX_RELEASE(kmutex_t *mtx)
253 {
254 uintptr_t newown;
255
256 newown = 0;
257 MUTEX_INHERITDEBUG(newown, mtx->mtx_owner);
258 atomic_store_release(&mtx->mtx_owner, newown);
259 }
260 #endif /* __HAVE_SIMPLE_MUTEXES */
261
262 /*
263 * Patch in stubs via strong alias where they are not available.
264 */
265
266 #if defined(LOCKDEBUG)
267 #undef __HAVE_MUTEX_STUBS
268 #undef __HAVE_SPIN_MUTEX_STUBS
269 #endif
270
271 #ifndef __HAVE_MUTEX_STUBS
272 __strong_alias(mutex_enter,mutex_vector_enter);
273 __strong_alias(mutex_exit,mutex_vector_exit);
274 #endif
275
276 #ifndef __HAVE_SPIN_MUTEX_STUBS
277 __strong_alias(mutex_spin_enter,mutex_vector_enter);
278 __strong_alias(mutex_spin_exit,mutex_vector_exit);
279 #endif
280
281 static void mutex_abort(const char *, size_t, volatile const kmutex_t *,
282 const char *);
283 static void mutex_dump(const volatile void *, lockop_printer_t);
284 static lwp_t *mutex_owner(wchan_t);
285
286 lockops_t mutex_spin_lockops = {
287 .lo_name = "Mutex",
288 .lo_type = LOCKOPS_SPIN,
289 .lo_dump = mutex_dump,
290 };
291
292 lockops_t mutex_adaptive_lockops = {
293 .lo_name = "Mutex",
294 .lo_type = LOCKOPS_SLEEP,
295 .lo_dump = mutex_dump,
296 };
297
298 syncobj_t mutex_syncobj = {
299 .sobj_name = "mutex",
300 .sobj_flag = SOBJ_SLEEPQ_SORTED,
301 .sobj_unsleep = turnstile_unsleep,
302 .sobj_changepri = turnstile_changepri,
303 .sobj_lendpri = sleepq_lendpri,
304 .sobj_owner = mutex_owner,
305 };
306
307 /*
308 * mutex_dump:
309 *
310 * Dump the contents of a mutex structure.
311 */
312 static void
313 mutex_dump(const volatile void *cookie, lockop_printer_t pr)
314 {
315 const volatile kmutex_t *mtx = cookie;
316 uintptr_t owner = mtx->mtx_owner;
317
318 pr("owner field : %#018lx wait/spin: %16d/%d\n",
319 (long)MUTEX_OWNER(owner), MUTEX_HAS_WAITERS(mtx),
320 MUTEX_SPIN_P(owner));
321 }
322
323 /*
324 * mutex_abort:
325 *
326 * Dump information about an error and panic the system. This
327 * generates a lot of machine code in the DIAGNOSTIC case, so
328 * we ask the compiler to not inline it.
329 */
330 static void __noinline
331 mutex_abort(const char *func, size_t line, volatile const kmutex_t *mtx,
332 const char *msg)
333 {
334
335 LOCKDEBUG_ABORT(func, line, mtx, (MUTEX_SPIN_P(mtx->mtx_owner) ?
336 &mutex_spin_lockops : &mutex_adaptive_lockops), msg);
337 }
338
339 /*
340 * mutex_init:
341 *
342 * Initialize a mutex for use. Note that adaptive mutexes are in
343 * essence spin mutexes that can sleep to avoid deadlock and wasting
344 * CPU time. We can't easily provide a type of mutex that always
345 * sleeps - see comments in mutex_vector_enter() about releasing
346 * mutexes unlocked.
347 */
348 void
349 _mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl,
350 uintptr_t return_address)
351 {
352 lockops_t *lockops __unused;
353 bool dodebug;
354
355 memset(mtx, 0, sizeof(*mtx));
356
357 if (ipl == IPL_NONE || ipl == IPL_SOFTCLOCK ||
358 ipl == IPL_SOFTBIO || ipl == IPL_SOFTNET ||
359 ipl == IPL_SOFTSERIAL) {
360 lockops = (type == MUTEX_NODEBUG ?
361 NULL : &mutex_adaptive_lockops);
362 dodebug = LOCKDEBUG_ALLOC(mtx, lockops, return_address);
363 MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug);
364 } else {
365 lockops = (type == MUTEX_NODEBUG ?
366 NULL : &mutex_spin_lockops);
367 dodebug = LOCKDEBUG_ALLOC(mtx, lockops, return_address);
368 MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl);
369 }
370 }
371
372 void
373 mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl)
374 {
375
376 _mutex_init(mtx, type, ipl, (uintptr_t)__builtin_return_address(0));
377 }
378
379 /*
380 * mutex_destroy:
381 *
382 * Tear down a mutex.
383 */
384 void
385 mutex_destroy(kmutex_t *mtx)
386 {
387 uintptr_t owner = mtx->mtx_owner;
388
389 if (MUTEX_ADAPTIVE_P(owner)) {
390 MUTEX_ASSERT(mtx, !MUTEX_OWNED(owner));
391 MUTEX_ASSERT(mtx, !MUTEX_HAS_WAITERS(mtx));
392 } else {
393 MUTEX_ASSERT(mtx, !MUTEX_SPINBIT_LOCKED_P(mtx));
394 }
395
396 LOCKDEBUG_FREE(MUTEX_DEBUG_P(mtx), mtx);
397 MUTEX_DESTROY(mtx);
398 }
399
400 #ifdef MULTIPROCESSOR
401 /*
402 * mutex_oncpu:
403 *
404 * Return true if an adaptive mutex owner is running on a CPU in the
405 * system. If the target is waiting on the kernel big lock, then we
406 * must release it. This is necessary to avoid deadlock.
407 */
408 static bool
409 mutex_oncpu(uintptr_t owner)
410 {
411 struct cpu_info *ci;
412 lwp_t *l;
413
414 KASSERT(kpreempt_disabled());
415
416 if (!MUTEX_OWNED(owner)) {
417 return false;
418 }
419
420 /*
421 * See lwp_dtor() why dereference of the LWP pointer is safe.
422 * We must have kernel preemption disabled for that.
423 */
424 l = (lwp_t *)MUTEX_OWNER(owner);
425 ci = l->l_cpu;
426
427 if (ci && ci->ci_curlwp == l) {
428 /* Target is running; do we need to block? */
429 return (atomic_load_relaxed(&ci->ci_biglock_wanted) != l);
430 }
431
432 /* Not running. It may be safe to block now. */
433 return false;
434 }
435 #endif /* MULTIPROCESSOR */
436
437 /*
438 * mutex_vector_enter:
439 *
440 * Support routine for mutex_enter() that must handle all cases. In
441 * the LOCKDEBUG case, mutex_enter() is always aliased here, even if
442 * fast-path stubs are available. If a mutex_spin_enter() stub is
443 * not available, then it is also aliased directly here.
444 */
445 void
446 mutex_vector_enter(kmutex_t *mtx)
447 {
448 uintptr_t owner, curthread;
449 turnstile_t *ts;
450 #ifdef MULTIPROCESSOR
451 u_int count;
452 #endif
453 LOCKSTAT_COUNTER(spincnt);
454 LOCKSTAT_COUNTER(slpcnt);
455 LOCKSTAT_TIMER(spintime);
456 LOCKSTAT_TIMER(slptime);
457 LOCKSTAT_FLAG(lsflag);
458
459 /*
460 * Handle spin mutexes.
461 */
462 KPREEMPT_DISABLE(curlwp);
463 owner = mtx->mtx_owner;
464 if (MUTEX_SPIN_P(owner)) {
465 #if defined(LOCKDEBUG) && defined(MULTIPROCESSOR)
466 u_int spins = 0;
467 #endif
468 KPREEMPT_ENABLE(curlwp);
469 MUTEX_SPIN_SPLRAISE(mtx);
470 MUTEX_WANTLOCK(mtx);
471 #ifdef FULL
472 if (MUTEX_SPINBIT_LOCK_TRY(mtx)) {
473 MUTEX_LOCKED(mtx);
474 return;
475 }
476 #if !defined(MULTIPROCESSOR)
477 MUTEX_ABORT(mtx, "locking against myself");
478 #else /* !MULTIPROCESSOR */
479
480 LOCKSTAT_ENTER(lsflag);
481 LOCKSTAT_START_TIMER(lsflag, spintime);
482 count = SPINLOCK_BACKOFF_MIN;
483
484 /*
485 * Spin testing the lock word and do exponential backoff
486 * to reduce cache line ping-ponging between CPUs.
487 */
488 do {
489 while (MUTEX_SPINBIT_LOCKED_P(mtx)) {
490 SPINLOCK_SPIN_HOOK;
491 SPINLOCK_BACKOFF(count);
492 #ifdef LOCKDEBUG
493 if (SPINLOCK_SPINOUT(spins))
494 MUTEX_ABORT(mtx, "spinout");
495 #endif /* LOCKDEBUG */
496 }
497 } while (!MUTEX_SPINBIT_LOCK_TRY(mtx));
498
499 if (count != SPINLOCK_BACKOFF_MIN) {
500 LOCKSTAT_STOP_TIMER(lsflag, spintime);
501 LOCKSTAT_EVENT(lsflag, mtx,
502 LB_SPIN_MUTEX | LB_SPIN, 1, spintime);
503 }
504 LOCKSTAT_EXIT(lsflag);
505 #endif /* !MULTIPROCESSOR */
506 #endif /* FULL */
507 MUTEX_LOCKED(mtx);
508 return;
509 }
510
511 curthread = (uintptr_t)curlwp;
512
513 MUTEX_DASSERT(mtx, MUTEX_ADAPTIVE_P(owner));
514 MUTEX_ASSERT(mtx, curthread != 0);
515 MUTEX_ASSERT(mtx, !cpu_intr_p());
516 MUTEX_WANTLOCK(mtx);
517
518 if (__predict_true(panicstr == NULL)) {
519 KDASSERT(pserialize_not_in_read_section());
520 LOCKDEBUG_BARRIER(&kernel_lock, 1);
521 }
522
523 LOCKSTAT_ENTER(lsflag);
524
525 /*
526 * Adaptive mutex; spin trying to acquire the mutex. If we
527 * determine that the owner is not running on a processor,
528 * then we stop spinning, and sleep instead.
529 */
530 for (;;) {
531 if (!MUTEX_OWNED(owner)) {
532 /*
533 * Mutex owner clear could mean two things:
534 *
535 * * The mutex has been released.
536 * * The owner field hasn't been set yet.
537 *
538 * Try to acquire it again. If that fails,
539 * we'll just loop again.
540 */
541 if (MUTEX_ACQUIRE(mtx, curthread))
542 break;
543 owner = mtx->mtx_owner;
544 continue;
545 }
546 if (__predict_false(MUTEX_OWNER(owner) == curthread)) {
547 MUTEX_ABORT(mtx, "locking against myself");
548 }
549 #ifdef MULTIPROCESSOR
550 /*
551 * Check to see if the owner is running on a processor.
552 * If so, then we should just spin, as the owner will
553 * likely release the lock very soon.
554 */
555 if (mutex_oncpu(owner)) {
556 LOCKSTAT_START_TIMER(lsflag, spintime);
557 count = SPINLOCK_BACKOFF_MIN;
558 do {
559 KPREEMPT_ENABLE(curlwp);
560 SPINLOCK_BACKOFF(count);
561 KPREEMPT_DISABLE(curlwp);
562 owner = mtx->mtx_owner;
563 } while (mutex_oncpu(owner));
564 LOCKSTAT_STOP_TIMER(lsflag, spintime);
565 LOCKSTAT_COUNT(spincnt, 1);
566 if (!MUTEX_OWNED(owner))
567 continue;
568 }
569 #endif
570
571 ts = turnstile_lookup(mtx);
572
573 /*
574 * Once we have the turnstile chain interlock, mark the
575 * mutex as having waiters. If that fails, spin again:
576 * chances are that the mutex has been released.
577 */
578 if (!MUTEX_SET_WAITERS(mtx, owner)) {
579 turnstile_exit(mtx);
580 owner = mtx->mtx_owner;
581 continue;
582 }
583
584 #ifdef MULTIPROCESSOR
585 /*
586 * mutex_exit() is permitted to release the mutex without
587 * any interlocking instructions, and the following can
588 * occur as a result:
589 *
590 * CPU 1: MUTEX_SET_WAITERS() CPU2: mutex_exit()
591 * ---------------------------- ----------------------------
592 * .. load mtx->mtx_owner
593 * .. see has-waiters bit clear
594 * set has-waiters bit ..
595 * .. store mtx->mtx_owner := 0
596 * return success
597 *
598 * There is another race that can occur: a third CPU could
599 * acquire the mutex as soon as it is released. Since
600 * adaptive mutexes are primarily spin mutexes, this is not
601 * something that we need to worry about too much. What we
602 * do need to ensure is that the waiters bit gets set.
603 *
604 * To allow the unlocked release, we need to make some
605 * assumptions here:
606 *
607 * o Release is the only non-atomic/unlocked operation
608 * that can be performed on the mutex. (It must still
609 * be atomic on the local CPU, e.g. in case interrupted
610 * or preempted).
611 *
612 * o At any given time on each mutex, MUTEX_SET_WAITERS()
613 * can only ever be in progress on one CPU in the
614 * system - guaranteed by the turnstile chain lock.
615 *
616 * o No other operations other than MUTEX_SET_WAITERS()
617 * and release can modify a mutex with a non-zero
618 * owner field.
619 *
620 * o If the holding LWP switches away, it posts a store
621 * fence before changing curlwp, ensuring that any
622 * overwrite of the mutex waiters flag by mutex_exit()
623 * completes before the modification of curlwp becomes
624 * visible to this CPU.
625 *
626 * o cpu_switchto() posts a store fence after setting curlwp
627 * and before resuming execution of an LWP.
628 *
629 * o _kernel_lock() posts a store fence before setting
630 * curcpu()->ci_biglock_wanted, and after clearing it.
631 * This ensures that any overwrite of the mutex waiters
632 * flag by mutex_exit() completes before the modification
633 * of ci_biglock_wanted becomes visible.
634 *
635 * After MUTEX_SET_WAITERS() succeeds, simultaneously
636 * confirming that the same LWP still holds the mutex
637 * since we took the turnstile lock and notifying it that
638 * we're waiting, we check the lock holder's status again.
639 * Some of the possible outcomes (not an exhaustive list;
640 * XXX this should be made exhaustive):
641 *
642 * 1. The on-CPU check returns true: the holding LWP is
643 * running again. The lock may be released soon and
644 * we should spin. Importantly, we can't trust the
645 * value of the waiters flag.
646 *
647 * 2. The on-CPU check returns false: the holding LWP is
648 * not running. We now have the opportunity to check
649 * if mutex_exit() has blatted the modifications made
650 * by MUTEX_SET_WAITERS().
651 *
652 * 3. The on-CPU check returns false: the holding LWP may
653 * or may not be running. It has context switched at
654 * some point during our check. Again, we have the
655 * chance to see if the waiters bit is still set or
656 * has been overwritten.
657 *
658 * 4. The on-CPU check returns false: the holding LWP is
659 * running on a CPU, but wants the big lock. It's OK
660 * to check the waiters field in this case.
661 *
662 * 5. The has-waiters check fails: the mutex has been
663 * released, the waiters flag cleared and another LWP
664 * now owns the mutex.
665 *
666 * 6. The has-waiters check fails: the mutex has been
667 * released.
668 *
669 * If the waiters bit is not set it's unsafe to go asleep,
670 * as we might never be awoken.
671 */
672 if (mutex_oncpu(owner)) {
673 turnstile_exit(mtx);
674 owner = mtx->mtx_owner;
675 continue;
676 }
677 membar_consumer();
678 if (!MUTEX_HAS_WAITERS(mtx)) {
679 turnstile_exit(mtx);
680 owner = mtx->mtx_owner;
681 continue;
682 }
683 #endif /* MULTIPROCESSOR */
684
685 LOCKSTAT_START_TIMER(lsflag, slptime);
686
687 turnstile_block(ts, TS_WRITER_Q, mtx, &mutex_syncobj);
688
689 LOCKSTAT_STOP_TIMER(lsflag, slptime);
690 LOCKSTAT_COUNT(slpcnt, 1);
691
692 owner = mtx->mtx_owner;
693 }
694 KPREEMPT_ENABLE(curlwp);
695
696 LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SLEEP1,
697 slpcnt, slptime);
698 LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SPIN,
699 spincnt, spintime);
700 LOCKSTAT_EXIT(lsflag);
701
702 MUTEX_DASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread);
703 MUTEX_LOCKED(mtx);
704 }
705
706 /*
707 * mutex_vector_exit:
708 *
709 * Support routine for mutex_exit() that handles all cases.
710 */
711 void
712 mutex_vector_exit(kmutex_t *mtx)
713 {
714 turnstile_t *ts;
715 uintptr_t curthread;
716
717 if (MUTEX_SPIN_P(mtx->mtx_owner)) {
718 #ifdef FULL
719 if (__predict_false(!MUTEX_SPINBIT_LOCKED_P(mtx))) {
720 MUTEX_ABORT(mtx, "exiting unheld spin mutex");
721 }
722 MUTEX_UNLOCKED(mtx);
723 MUTEX_SPINBIT_LOCK_UNLOCK(mtx);
724 #endif
725 MUTEX_SPIN_SPLRESTORE(mtx);
726 return;
727 }
728
729 #ifndef __HAVE_MUTEX_STUBS
730 /*
731 * On some architectures without mutex stubs, we can enter here to
732 * release mutexes before interrupts and whatnot are up and running.
733 * We need this hack to keep them sweet.
734 */
735 if (__predict_false(cold)) {
736 MUTEX_UNLOCKED(mtx);
737 MUTEX_RELEASE(mtx);
738 return;
739 }
740 #endif
741
742 curthread = (uintptr_t)curlwp;
743 MUTEX_DASSERT(mtx, curthread != 0);
744 MUTEX_ASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread);
745 MUTEX_UNLOCKED(mtx);
746 #if !defined(LOCKDEBUG)
747 __USE(curthread);
748 #endif
749
750 #ifdef LOCKDEBUG
751 /*
752 * Avoid having to take the turnstile chain lock every time
753 * around. Raise the priority level to splhigh() in order
754 * to disable preemption and so make the following atomic.
755 * This also blocks out soft interrupts that could set the
756 * waiters bit.
757 */
758 {
759 int s = splhigh();
760 if (!MUTEX_HAS_WAITERS(mtx)) {
761 MUTEX_RELEASE(mtx);
762 splx(s);
763 return;
764 }
765 splx(s);
766 }
767 #endif
768
769 /*
770 * Get this lock's turnstile. This gets the interlock on
771 * the sleep queue. Once we have that, we can clear the
772 * lock. If there was no turnstile for the lock, there
773 * were no waiters remaining.
774 */
775 ts = turnstile_lookup(mtx);
776
777 if (ts == NULL) {
778 MUTEX_RELEASE(mtx);
779 turnstile_exit(mtx);
780 } else {
781 MUTEX_RELEASE(mtx);
782 turnstile_wakeup(ts, TS_WRITER_Q,
783 TS_WAITERS(ts, TS_WRITER_Q), NULL);
784 }
785 }
786
787 #ifndef __HAVE_SIMPLE_MUTEXES
788 /*
789 * mutex_wakeup:
790 *
791 * Support routine for mutex_exit() that wakes up all waiters.
792 * We assume that the mutex has been released, but it need not
793 * be.
794 */
795 void
796 mutex_wakeup(kmutex_t *mtx)
797 {
798 turnstile_t *ts;
799
800 ts = turnstile_lookup(mtx);
801 if (ts == NULL) {
802 turnstile_exit(mtx);
803 return;
804 }
805 MUTEX_CLEAR_WAITERS(mtx);
806 turnstile_wakeup(ts, TS_WRITER_Q, TS_WAITERS(ts, TS_WRITER_Q), NULL);
807 }
808 #endif /* !__HAVE_SIMPLE_MUTEXES */
809
810 /*
811 * mutex_owned:
812 *
813 * Return true if the current LWP (adaptive) or CPU (spin)
814 * holds the mutex.
815 */
816 int
817 mutex_owned(const kmutex_t *mtx)
818 {
819
820 if (mtx == NULL)
821 return 0;
822 if (MUTEX_ADAPTIVE_P(mtx->mtx_owner))
823 return MUTEX_OWNER(mtx->mtx_owner) == (uintptr_t)curlwp;
824 #ifdef FULL
825 return MUTEX_SPINBIT_LOCKED_P(mtx);
826 #else
827 return 1;
828 #endif
829 }
830
831 /*
832 * mutex_owner:
833 *
834 * Return the current owner of an adaptive mutex. Used for
835 * priority inheritance.
836 */
837 static lwp_t *
838 mutex_owner(wchan_t wchan)
839 {
840 volatile const kmutex_t *mtx = wchan;
841
842 MUTEX_ASSERT(mtx, MUTEX_ADAPTIVE_P(mtx->mtx_owner));
843 return (struct lwp *)MUTEX_OWNER(mtx->mtx_owner);
844 }
845
846 /*
847 * mutex_ownable:
848 *
849 * When compiled with DEBUG and LOCKDEBUG defined, ensure that
850 * the mutex is available. We cannot use !mutex_owned() since
851 * that won't work correctly for spin mutexes.
852 */
853 int
854 mutex_ownable(const kmutex_t *mtx)
855 {
856
857 #ifdef LOCKDEBUG
858 MUTEX_TESTLOCK(mtx);
859 #endif
860 return 1;
861 }
862
863 /*
864 * mutex_tryenter:
865 *
866 * Try to acquire the mutex; return non-zero if we did.
867 */
868 int
869 mutex_tryenter(kmutex_t *mtx)
870 {
871 uintptr_t curthread;
872
873 /*
874 * Handle spin mutexes.
875 */
876 if (MUTEX_SPIN_P(mtx->mtx_owner)) {
877 MUTEX_SPIN_SPLRAISE(mtx);
878 #ifdef FULL
879 if (MUTEX_SPINBIT_LOCK_TRY(mtx)) {
880 MUTEX_WANTLOCK(mtx);
881 MUTEX_LOCKED(mtx);
882 return 1;
883 }
884 MUTEX_SPIN_SPLRESTORE(mtx);
885 #else
886 MUTEX_WANTLOCK(mtx);
887 MUTEX_LOCKED(mtx);
888 return 1;
889 #endif
890 } else {
891 curthread = (uintptr_t)curlwp;
892 MUTEX_ASSERT(mtx, curthread != 0);
893 if (MUTEX_ACQUIRE(mtx, curthread)) {
894 MUTEX_WANTLOCK(mtx);
895 MUTEX_LOCKED(mtx);
896 MUTEX_DASSERT(mtx,
897 MUTEX_OWNER(mtx->mtx_owner) == curthread);
898 return 1;
899 }
900 }
901
902 return 0;
903 }
904
905 #if defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL)
906 /*
907 * mutex_spin_retry:
908 *
909 * Support routine for mutex_spin_enter(). Assumes that the caller
910 * has already raised the SPL, and adjusted counters.
911 */
912 void
913 mutex_spin_retry(kmutex_t *mtx)
914 {
915 #ifdef MULTIPROCESSOR
916 u_int count;
917 LOCKSTAT_TIMER(spintime);
918 LOCKSTAT_FLAG(lsflag);
919 #ifdef LOCKDEBUG
920 u_int spins = 0;
921 #endif /* LOCKDEBUG */
922
923 MUTEX_WANTLOCK(mtx);
924
925 LOCKSTAT_ENTER(lsflag);
926 LOCKSTAT_START_TIMER(lsflag, spintime);
927 count = SPINLOCK_BACKOFF_MIN;
928
929 /*
930 * Spin testing the lock word and do exponential backoff
931 * to reduce cache line ping-ponging between CPUs.
932 */
933 do {
934 while (MUTEX_SPINBIT_LOCKED_P(mtx)) {
935 SPINLOCK_BACKOFF(count);
936 #ifdef LOCKDEBUG
937 if (SPINLOCK_SPINOUT(spins))
938 MUTEX_ABORT(mtx, "spinout");
939 #endif /* LOCKDEBUG */
940 }
941 } while (!MUTEX_SPINBIT_LOCK_TRY(mtx));
942
943 LOCKSTAT_STOP_TIMER(lsflag, spintime);
944 LOCKSTAT_EVENT(lsflag, mtx, LB_SPIN_MUTEX | LB_SPIN, 1, spintime);
945 LOCKSTAT_EXIT(lsflag);
946
947 MUTEX_LOCKED(mtx);
948 #else /* MULTIPROCESSOR */
949 MUTEX_ABORT(mtx, "locking against myself");
950 #endif /* MULTIPROCESSOR */
951 }
952 #endif /* defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL) */
953