kern_mutex.c revision 1.93 1 /* $NetBSD: kern_mutex.c,v 1.93 2020/12/14 19:42:51 skrll Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007, 2008, 2019 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Kernel mutex implementation, modeled after those found in Solaris,
34 * a description of which can be found in:
35 *
36 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
37 * Richard McDougall.
38 */
39
40 #define __MUTEX_PRIVATE
41
42 #include <sys/cdefs.h>
43 __KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.93 2020/12/14 19:42:51 skrll Exp $");
44
45 #include <sys/param.h>
46 #include <sys/atomic.h>
47 #include <sys/proc.h>
48 #include <sys/mutex.h>
49 #include <sys/sched.h>
50 #include <sys/sleepq.h>
51 #include <sys/systm.h>
52 #include <sys/lockdebug.h>
53 #include <sys/kernel.h>
54 #include <sys/intr.h>
55 #include <sys/lock.h>
56 #include <sys/types.h>
57 #include <sys/cpu.h>
58 #include <sys/pserialize.h>
59
60 #include <dev/lockstat.h>
61
62 #include <machine/lock.h>
63
64 /*
65 * When not running a debug kernel, spin mutexes are not much
66 * more than an splraiseipl() and splx() pair.
67 */
68
69 #if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
70 #define FULL
71 #endif
72
73 /*
74 * Debugging support.
75 */
76
77 #define MUTEX_WANTLOCK(mtx) \
78 LOCKDEBUG_WANTLOCK(MUTEX_DEBUG_P(mtx), (mtx), \
79 (uintptr_t)__builtin_return_address(0), 0)
80 #define MUTEX_TESTLOCK(mtx) \
81 LOCKDEBUG_WANTLOCK(MUTEX_DEBUG_P(mtx), (mtx), \
82 (uintptr_t)__builtin_return_address(0), -1)
83 #define MUTEX_LOCKED(mtx) \
84 LOCKDEBUG_LOCKED(MUTEX_DEBUG_P(mtx), (mtx), NULL, \
85 (uintptr_t)__builtin_return_address(0), 0)
86 #define MUTEX_UNLOCKED(mtx) \
87 LOCKDEBUG_UNLOCKED(MUTEX_DEBUG_P(mtx), (mtx), \
88 (uintptr_t)__builtin_return_address(0), 0)
89 #define MUTEX_ABORT(mtx, msg) \
90 mutex_abort(__func__, __LINE__, mtx, msg)
91
92 #if defined(LOCKDEBUG)
93
94 #define MUTEX_DASSERT(mtx, cond) \
95 do { \
96 if (__predict_false(!(cond))) \
97 MUTEX_ABORT(mtx, "assertion failed: " #cond); \
98 } while (/* CONSTCOND */ 0)
99
100 #else /* LOCKDEBUG */
101
102 #define MUTEX_DASSERT(mtx, cond) /* nothing */
103
104 #endif /* LOCKDEBUG */
105
106 #if defined(DIAGNOSTIC)
107
108 #define MUTEX_ASSERT(mtx, cond) \
109 do { \
110 if (__predict_false(!(cond))) \
111 MUTEX_ABORT(mtx, "assertion failed: " #cond); \
112 } while (/* CONSTCOND */ 0)
113
114 #else /* DIAGNOSTIC */
115
116 #define MUTEX_ASSERT(mtx, cond) /* nothing */
117
118 #endif /* DIAGNOSTIC */
119
120 /*
121 * Some architectures can't use __cpu_simple_lock as is so allow a way
122 * for them to use an alternate definition.
123 */
124 #ifndef MUTEX_SPINBIT_LOCK_INIT
125 #define MUTEX_SPINBIT_LOCK_INIT(mtx) __cpu_simple_lock_init(&(mtx)->mtx_lock)
126 #endif
127 #ifndef MUTEX_SPINBIT_LOCKED_P
128 #define MUTEX_SPINBIT_LOCKED_P(mtx) __SIMPLELOCK_LOCKED_P(&(mtx)->mtx_lock)
129 #endif
130 #ifndef MUTEX_SPINBIT_LOCK_TRY
131 #define MUTEX_SPINBIT_LOCK_TRY(mtx) __cpu_simple_lock_try(&(mtx)->mtx_lock)
132 #endif
133 #ifndef MUTEX_SPINBIT_LOCK_UNLOCK
134 #define MUTEX_SPINBIT_LOCK_UNLOCK(mtx) __cpu_simple_unlock(&(mtx)->mtx_lock)
135 #endif
136
137 #ifndef MUTEX_INITIALIZE_SPIN_IPL
138 #define MUTEX_INITIALIZE_SPIN_IPL(mtx, ipl) \
139 ((mtx)->mtx_ipl = makeiplcookie((ipl)))
140 #endif
141
142 /*
143 * Spin mutex SPL save / restore.
144 */
145
146 #define MUTEX_SPIN_SPLRAISE(mtx) \
147 do { \
148 struct cpu_info *x__ci; \
149 int x__cnt, s; \
150 s = splraiseipl(MUTEX_SPIN_IPL(mtx)); \
151 x__ci = curcpu(); \
152 x__cnt = x__ci->ci_mtx_count--; \
153 __insn_barrier(); \
154 if (x__cnt == 0) \
155 x__ci->ci_mtx_oldspl = (s); \
156 } while (/* CONSTCOND */ 0)
157
158 #define MUTEX_SPIN_SPLRESTORE(mtx) \
159 do { \
160 struct cpu_info *x__ci = curcpu(); \
161 int s = x__ci->ci_mtx_oldspl; \
162 __insn_barrier(); \
163 if (++(x__ci->ci_mtx_count) == 0) \
164 splx(s); \
165 } while (/* CONSTCOND */ 0)
166
167 /*
168 * Memory barriers.
169 */
170 #ifdef __HAVE_ATOMIC_AS_MEMBAR
171 #define MUTEX_MEMBAR_ENTER()
172 #define MUTEX_MEMBAR_EXIT()
173 #else
174 #define MUTEX_MEMBAR_ENTER() membar_enter()
175 #define MUTEX_MEMBAR_EXIT() membar_exit()
176 #endif
177
178 /*
179 * For architectures that provide 'simple' mutexes: they provide a
180 * CAS function that is either MP-safe, or does not need to be MP
181 * safe. Adaptive mutexes on these architectures do not require an
182 * additional interlock.
183 */
184
185 #ifdef __HAVE_SIMPLE_MUTEXES
186
187 #define MUTEX_OWNER(owner) \
188 (owner & MUTEX_THREAD)
189 #define MUTEX_HAS_WAITERS(mtx) \
190 (((int)(mtx)->mtx_owner & MUTEX_BIT_WAITERS) != 0)
191
192 #define MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug) \
193 do { \
194 if (!dodebug) \
195 (mtx)->mtx_owner |= MUTEX_BIT_NODEBUG; \
196 } while (/* CONSTCOND */ 0)
197
198 #define MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl) \
199 do { \
200 (mtx)->mtx_owner = MUTEX_BIT_SPIN; \
201 if (!dodebug) \
202 (mtx)->mtx_owner |= MUTEX_BIT_NODEBUG; \
203 MUTEX_INITIALIZE_SPIN_IPL((mtx), (ipl)); \
204 MUTEX_SPINBIT_LOCK_INIT((mtx)); \
205 } while (/* CONSTCOND */ 0)
206
207 #define MUTEX_DESTROY(mtx) \
208 do { \
209 (mtx)->mtx_owner = MUTEX_THREAD; \
210 } while (/* CONSTCOND */ 0)
211
212 #define MUTEX_SPIN_P(owner) \
213 (((owner) & MUTEX_BIT_SPIN) != 0)
214 #define MUTEX_ADAPTIVE_P(owner) \
215 (((owner) & MUTEX_BIT_SPIN) == 0)
216
217 #define MUTEX_DEBUG_P(mtx) (((mtx)->mtx_owner & MUTEX_BIT_NODEBUG) == 0)
218 #if defined(LOCKDEBUG)
219 #define MUTEX_OWNED(owner) (((owner) & ~MUTEX_BIT_NODEBUG) != 0)
220 #define MUTEX_INHERITDEBUG(n, o) (n) |= (o) & MUTEX_BIT_NODEBUG
221 #else /* defined(LOCKDEBUG) */
222 #define MUTEX_OWNED(owner) ((owner) != 0)
223 #define MUTEX_INHERITDEBUG(n, o) /* nothing */
224 #endif /* defined(LOCKDEBUG) */
225
226 static inline int
227 MUTEX_ACQUIRE(kmutex_t *mtx, uintptr_t curthread)
228 {
229 int rv;
230 uintptr_t oldown = 0;
231 uintptr_t newown = curthread;
232
233 MUTEX_INHERITDEBUG(oldown, mtx->mtx_owner);
234 MUTEX_INHERITDEBUG(newown, oldown);
235 rv = MUTEX_CAS(&mtx->mtx_owner, oldown, newown);
236 MUTEX_MEMBAR_ENTER();
237 return rv;
238 }
239
240 static inline int
241 MUTEX_SET_WAITERS(kmutex_t *mtx, uintptr_t owner)
242 {
243 int rv;
244 rv = MUTEX_CAS(&mtx->mtx_owner, owner, owner | MUTEX_BIT_WAITERS);
245 MUTEX_MEMBAR_ENTER();
246 return rv;
247 }
248
249 static inline void
250 MUTEX_RELEASE(kmutex_t *mtx)
251 {
252 uintptr_t newown;
253
254 MUTEX_MEMBAR_EXIT();
255 newown = 0;
256 MUTEX_INHERITDEBUG(newown, mtx->mtx_owner);
257 mtx->mtx_owner = newown;
258 }
259 #endif /* __HAVE_SIMPLE_MUTEXES */
260
261 /*
262 * Patch in stubs via strong alias where they are not available.
263 */
264
265 #if defined(LOCKDEBUG)
266 #undef __HAVE_MUTEX_STUBS
267 #undef __HAVE_SPIN_MUTEX_STUBS
268 #endif
269
270 #ifndef __HAVE_MUTEX_STUBS
271 __strong_alias(mutex_enter,mutex_vector_enter);
272 __strong_alias(mutex_exit,mutex_vector_exit);
273 #endif
274
275 #ifndef __HAVE_SPIN_MUTEX_STUBS
276 __strong_alias(mutex_spin_enter,mutex_vector_enter);
277 __strong_alias(mutex_spin_exit,mutex_vector_exit);
278 #endif
279
280 static void mutex_abort(const char *, size_t, const kmutex_t *,
281 const char *);
282 static void mutex_dump(const volatile void *, lockop_printer_t);
283
284 lockops_t mutex_spin_lockops = {
285 .lo_name = "Mutex",
286 .lo_type = LOCKOPS_SPIN,
287 .lo_dump = mutex_dump,
288 };
289
290 lockops_t mutex_adaptive_lockops = {
291 .lo_name = "Mutex",
292 .lo_type = LOCKOPS_SLEEP,
293 .lo_dump = mutex_dump,
294 };
295
296 syncobj_t mutex_syncobj = {
297 .sobj_flag = SOBJ_SLEEPQ_SORTED,
298 .sobj_unsleep = turnstile_unsleep,
299 .sobj_changepri = turnstile_changepri,
300 .sobj_lendpri = sleepq_lendpri,
301 .sobj_owner = (void *)mutex_owner,
302 };
303
304 /*
305 * mutex_dump:
306 *
307 * Dump the contents of a mutex structure.
308 */
309 static void
310 mutex_dump(const volatile void *cookie, lockop_printer_t pr)
311 {
312 const volatile kmutex_t *mtx = cookie;
313 uintptr_t owner = mtx->mtx_owner;
314
315 pr("owner field : %#018lx wait/spin: %16d/%d\n",
316 (long)MUTEX_OWNER(owner), MUTEX_HAS_WAITERS(mtx),
317 MUTEX_SPIN_P(owner));
318 }
319
320 /*
321 * mutex_abort:
322 *
323 * Dump information about an error and panic the system. This
324 * generates a lot of machine code in the DIAGNOSTIC case, so
325 * we ask the compiler to not inline it.
326 */
327 static void __noinline
328 mutex_abort(const char *func, size_t line, const kmutex_t *mtx, const char *msg)
329 {
330
331 LOCKDEBUG_ABORT(func, line, mtx, (MUTEX_SPIN_P(mtx->mtx_owner) ?
332 &mutex_spin_lockops : &mutex_adaptive_lockops), msg);
333 }
334
335 /*
336 * mutex_init:
337 *
338 * Initialize a mutex for use. Note that adaptive mutexes are in
339 * essence spin mutexes that can sleep to avoid deadlock and wasting
340 * CPU time. We can't easily provide a type of mutex that always
341 * sleeps - see comments in mutex_vector_enter() about releasing
342 * mutexes unlocked.
343 */
344 void _mutex_init(kmutex_t *, kmutex_type_t, int, uintptr_t);
345 void
346 _mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl,
347 uintptr_t return_address)
348 {
349 lockops_t *lockops __unused;
350 bool dodebug;
351
352 memset(mtx, 0, sizeof(*mtx));
353
354 if (ipl == IPL_NONE || ipl == IPL_SOFTCLOCK ||
355 ipl == IPL_SOFTBIO || ipl == IPL_SOFTNET ||
356 ipl == IPL_SOFTSERIAL) {
357 lockops = (type == MUTEX_NODEBUG ?
358 NULL : &mutex_adaptive_lockops);
359 dodebug = LOCKDEBUG_ALLOC(mtx, lockops, return_address);
360 MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug);
361 } else {
362 lockops = (type == MUTEX_NODEBUG ?
363 NULL : &mutex_spin_lockops);
364 dodebug = LOCKDEBUG_ALLOC(mtx, lockops, return_address);
365 MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl);
366 }
367 }
368
369 void
370 mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl)
371 {
372
373 _mutex_init(mtx, type, ipl, (uintptr_t)__builtin_return_address(0));
374 }
375
376 /*
377 * mutex_destroy:
378 *
379 * Tear down a mutex.
380 */
381 void
382 mutex_destroy(kmutex_t *mtx)
383 {
384 uintptr_t owner = mtx->mtx_owner;
385
386 if (MUTEX_ADAPTIVE_P(owner)) {
387 MUTEX_ASSERT(mtx, !MUTEX_OWNED(owner));
388 MUTEX_ASSERT(mtx, !MUTEX_HAS_WAITERS(mtx));
389 } else {
390 MUTEX_ASSERT(mtx, !MUTEX_SPINBIT_LOCKED_P(mtx));
391 }
392
393 LOCKDEBUG_FREE(MUTEX_DEBUG_P(mtx), mtx);
394 MUTEX_DESTROY(mtx);
395 }
396
397 #ifdef MULTIPROCESSOR
398 /*
399 * mutex_oncpu:
400 *
401 * Return true if an adaptive mutex owner is running on a CPU in the
402 * system. If the target is waiting on the kernel big lock, then we
403 * must release it. This is necessary to avoid deadlock.
404 */
405 static bool
406 mutex_oncpu(uintptr_t owner)
407 {
408 struct cpu_info *ci;
409 lwp_t *l;
410
411 KASSERT(kpreempt_disabled());
412
413 if (!MUTEX_OWNED(owner)) {
414 return false;
415 }
416
417 /*
418 * See lwp_dtor() why dereference of the LWP pointer is safe.
419 * We must have kernel preemption disabled for that.
420 */
421 l = (lwp_t *)MUTEX_OWNER(owner);
422 ci = l->l_cpu;
423
424 if (ci && ci->ci_curlwp == l) {
425 /* Target is running; do we need to block? */
426 return (ci->ci_biglock_wanted != l);
427 }
428
429 /* Not running. It may be safe to block now. */
430 return false;
431 }
432 #endif /* MULTIPROCESSOR */
433
434 /*
435 * mutex_vector_enter:
436 *
437 * Support routine for mutex_enter() that must handle all cases. In
438 * the LOCKDEBUG case, mutex_enter() is always aliased here, even if
439 * fast-path stubs are available. If a mutex_spin_enter() stub is
440 * not available, then it is also aliased directly here.
441 */
442 void
443 mutex_vector_enter(kmutex_t *mtx)
444 {
445 uintptr_t owner, curthread;
446 turnstile_t *ts;
447 #ifdef MULTIPROCESSOR
448 u_int count;
449 #endif
450 LOCKSTAT_COUNTER(spincnt);
451 LOCKSTAT_COUNTER(slpcnt);
452 LOCKSTAT_TIMER(spintime);
453 LOCKSTAT_TIMER(slptime);
454 LOCKSTAT_FLAG(lsflag);
455
456 /*
457 * Handle spin mutexes.
458 */
459 KPREEMPT_DISABLE(curlwp);
460 owner = mtx->mtx_owner;
461 if (MUTEX_SPIN_P(owner)) {
462 #if defined(LOCKDEBUG) && defined(MULTIPROCESSOR)
463 u_int spins = 0;
464 #endif
465 KPREEMPT_ENABLE(curlwp);
466 MUTEX_SPIN_SPLRAISE(mtx);
467 MUTEX_WANTLOCK(mtx);
468 #ifdef FULL
469 if (MUTEX_SPINBIT_LOCK_TRY(mtx)) {
470 MUTEX_LOCKED(mtx);
471 return;
472 }
473 #if !defined(MULTIPROCESSOR)
474 MUTEX_ABORT(mtx, "locking against myself");
475 #else /* !MULTIPROCESSOR */
476
477 LOCKSTAT_ENTER(lsflag);
478 LOCKSTAT_START_TIMER(lsflag, spintime);
479 count = SPINLOCK_BACKOFF_MIN;
480
481 /*
482 * Spin testing the lock word and do exponential backoff
483 * to reduce cache line ping-ponging between CPUs.
484 */
485 do {
486 while (MUTEX_SPINBIT_LOCKED_P(mtx)) {
487 SPINLOCK_BACKOFF(count);
488 #ifdef LOCKDEBUG
489 if (SPINLOCK_SPINOUT(spins))
490 MUTEX_ABORT(mtx, "spinout");
491 #endif /* LOCKDEBUG */
492 }
493 } while (!MUTEX_SPINBIT_LOCK_TRY(mtx));
494
495 if (count != SPINLOCK_BACKOFF_MIN) {
496 LOCKSTAT_STOP_TIMER(lsflag, spintime);
497 LOCKSTAT_EVENT(lsflag, mtx,
498 LB_SPIN_MUTEX | LB_SPIN, 1, spintime);
499 }
500 LOCKSTAT_EXIT(lsflag);
501 #endif /* !MULTIPROCESSOR */
502 #endif /* FULL */
503 MUTEX_LOCKED(mtx);
504 return;
505 }
506
507 curthread = (uintptr_t)curlwp;
508
509 MUTEX_DASSERT(mtx, MUTEX_ADAPTIVE_P(owner));
510 MUTEX_ASSERT(mtx, curthread != 0);
511 MUTEX_ASSERT(mtx, !cpu_intr_p());
512 MUTEX_WANTLOCK(mtx);
513
514 if (panicstr == NULL) {
515 KDASSERT(pserialize_not_in_read_section());
516 LOCKDEBUG_BARRIER(&kernel_lock, 1);
517 }
518
519 LOCKSTAT_ENTER(lsflag);
520
521 /*
522 * Adaptive mutex; spin trying to acquire the mutex. If we
523 * determine that the owner is not running on a processor,
524 * then we stop spinning, and sleep instead.
525 */
526 for (;;) {
527 if (!MUTEX_OWNED(owner)) {
528 /*
529 * Mutex owner clear could mean two things:
530 *
531 * * The mutex has been released.
532 * * The owner field hasn't been set yet.
533 *
534 * Try to acquire it again. If that fails,
535 * we'll just loop again.
536 */
537 if (MUTEX_ACQUIRE(mtx, curthread))
538 break;
539 owner = mtx->mtx_owner;
540 continue;
541 }
542 if (__predict_false(MUTEX_OWNER(owner) == curthread)) {
543 MUTEX_ABORT(mtx, "locking against myself");
544 }
545 #ifdef MULTIPROCESSOR
546 /*
547 * Check to see if the owner is running on a processor.
548 * If so, then we should just spin, as the owner will
549 * likely release the lock very soon.
550 */
551 if (mutex_oncpu(owner)) {
552 LOCKSTAT_START_TIMER(lsflag, spintime);
553 count = SPINLOCK_BACKOFF_MIN;
554 do {
555 KPREEMPT_ENABLE(curlwp);
556 SPINLOCK_BACKOFF(count);
557 KPREEMPT_DISABLE(curlwp);
558 owner = mtx->mtx_owner;
559 } while (mutex_oncpu(owner));
560 LOCKSTAT_STOP_TIMER(lsflag, spintime);
561 LOCKSTAT_COUNT(spincnt, 1);
562 if (!MUTEX_OWNED(owner))
563 continue;
564 }
565 #endif
566
567 ts = turnstile_lookup(mtx);
568
569 /*
570 * Once we have the turnstile chain interlock, mark the
571 * mutex as having waiters. If that fails, spin again:
572 * chances are that the mutex has been released.
573 */
574 if (!MUTEX_SET_WAITERS(mtx, owner)) {
575 turnstile_exit(mtx);
576 owner = mtx->mtx_owner;
577 continue;
578 }
579
580 #ifdef MULTIPROCESSOR
581 /*
582 * mutex_exit() is permitted to release the mutex without
583 * any interlocking instructions, and the following can
584 * occur as a result:
585 *
586 * CPU 1: MUTEX_SET_WAITERS() CPU2: mutex_exit()
587 * ---------------------------- ----------------------------
588 * .. acquire cache line
589 * .. test for waiters
590 * acquire cache line <- lose cache line
591 * lock cache line ..
592 * verify mutex is held ..
593 * set waiters ..
594 * unlock cache line ..
595 * lose cache line -> acquire cache line
596 * .. clear lock word, waiters
597 * return success
598 *
599 * There is another race that can occur: a third CPU could
600 * acquire the mutex as soon as it is released. Since
601 * adaptive mutexes are primarily spin mutexes, this is not
602 * something that we need to worry about too much. What we
603 * do need to ensure is that the waiters bit gets set.
604 *
605 * To allow the unlocked release, we need to make some
606 * assumptions here:
607 *
608 * o Release is the only non-atomic/unlocked operation
609 * that can be performed on the mutex. (It must still
610 * be atomic on the local CPU, e.g. in case interrupted
611 * or preempted).
612 *
613 * o At any given time, MUTEX_SET_WAITERS() can only ever
614 * be in progress on one CPU in the system - guaranteed
615 * by the turnstile chain lock.
616 *
617 * o No other operations other than MUTEX_SET_WAITERS()
618 * and release can modify a mutex with a non-zero
619 * owner field.
620 *
621 * o The result of a successful MUTEX_SET_WAITERS() call
622 * is an unbuffered write that is immediately visible
623 * to all other processors in the system.
624 *
625 * o If the holding LWP switches away, it posts a store
626 * fence before changing curlwp, ensuring that any
627 * overwrite of the mutex waiters flag by mutex_exit()
628 * completes before the modification of curlwp becomes
629 * visible to this CPU.
630 *
631 * o mi_switch() posts a store fence before setting curlwp
632 * and before resuming execution of an LWP.
633 *
634 * o _kernel_lock() posts a store fence before setting
635 * curcpu()->ci_biglock_wanted, and after clearing it.
636 * This ensures that any overwrite of the mutex waiters
637 * flag by mutex_exit() completes before the modification
638 * of ci_biglock_wanted becomes visible.
639 *
640 * We now post a read memory barrier (after setting the
641 * waiters field) and check the lock holder's status again.
642 * Some of the possible outcomes (not an exhaustive list):
643 *
644 * 1. The on-CPU check returns true: the holding LWP is
645 * running again. The lock may be released soon and
646 * we should spin. Importantly, we can't trust the
647 * value of the waiters flag.
648 *
649 * 2. The on-CPU check returns false: the holding LWP is
650 * not running. We now have the opportunity to check
651 * if mutex_exit() has blatted the modifications made
652 * by MUTEX_SET_WAITERS().
653 *
654 * 3. The on-CPU check returns false: the holding LWP may
655 * or may not be running. It has context switched at
656 * some point during our check. Again, we have the
657 * chance to see if the waiters bit is still set or
658 * has been overwritten.
659 *
660 * 4. The on-CPU check returns false: the holding LWP is
661 * running on a CPU, but wants the big lock. It's OK
662 * to check the waiters field in this case.
663 *
664 * 5. The has-waiters check fails: the mutex has been
665 * released, the waiters flag cleared and another LWP
666 * now owns the mutex.
667 *
668 * 6. The has-waiters check fails: the mutex has been
669 * released.
670 *
671 * If the waiters bit is not set it's unsafe to go asleep,
672 * as we might never be awoken.
673 */
674 membar_consumer();
675 if (mutex_oncpu(owner)) {
676 turnstile_exit(mtx);
677 owner = mtx->mtx_owner;
678 continue;
679 }
680 membar_consumer();
681 if (!MUTEX_HAS_WAITERS(mtx)) {
682 turnstile_exit(mtx);
683 owner = mtx->mtx_owner;
684 continue;
685 }
686 #endif /* MULTIPROCESSOR */
687
688 LOCKSTAT_START_TIMER(lsflag, slptime);
689
690 turnstile_block(ts, TS_WRITER_Q, mtx, &mutex_syncobj);
691
692 LOCKSTAT_STOP_TIMER(lsflag, slptime);
693 LOCKSTAT_COUNT(slpcnt, 1);
694
695 owner = mtx->mtx_owner;
696 }
697 KPREEMPT_ENABLE(curlwp);
698
699 LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SLEEP1,
700 slpcnt, slptime);
701 LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SPIN,
702 spincnt, spintime);
703 LOCKSTAT_EXIT(lsflag);
704
705 MUTEX_DASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread);
706 MUTEX_LOCKED(mtx);
707 }
708
709 /*
710 * mutex_vector_exit:
711 *
712 * Support routine for mutex_exit() that handles all cases.
713 */
714 void
715 mutex_vector_exit(kmutex_t *mtx)
716 {
717 turnstile_t *ts;
718 uintptr_t curthread;
719
720 if (MUTEX_SPIN_P(mtx->mtx_owner)) {
721 #ifdef FULL
722 if (__predict_false(!MUTEX_SPINBIT_LOCKED_P(mtx))) {
723 MUTEX_ABORT(mtx, "exiting unheld spin mutex");
724 }
725 MUTEX_UNLOCKED(mtx);
726 MUTEX_SPINBIT_LOCK_UNLOCK(mtx);
727 #endif
728 MUTEX_SPIN_SPLRESTORE(mtx);
729 return;
730 }
731
732 #ifndef __HAVE_MUTEX_STUBS
733 /*
734 * On some architectures without mutex stubs, we can enter here to
735 * release mutexes before interrupts and whatnot are up and running.
736 * We need this hack to keep them sweet.
737 */
738 if (__predict_false(cold)) {
739 MUTEX_UNLOCKED(mtx);
740 MUTEX_RELEASE(mtx);
741 return;
742 }
743 #endif
744
745 curthread = (uintptr_t)curlwp;
746 MUTEX_DASSERT(mtx, curthread != 0);
747 MUTEX_ASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread);
748 MUTEX_UNLOCKED(mtx);
749 #if !defined(LOCKDEBUG)
750 __USE(curthread);
751 #endif
752
753 #ifdef LOCKDEBUG
754 /*
755 * Avoid having to take the turnstile chain lock every time
756 * around. Raise the priority level to splhigh() in order
757 * to disable preemption and so make the following atomic.
758 */
759 {
760 int s = splhigh();
761 if (!MUTEX_HAS_WAITERS(mtx)) {
762 MUTEX_RELEASE(mtx);
763 splx(s);
764 return;
765 }
766 splx(s);
767 }
768 #endif
769
770 /*
771 * Get this lock's turnstile. This gets the interlock on
772 * the sleep queue. Once we have that, we can clear the
773 * lock. If there was no turnstile for the lock, there
774 * were no waiters remaining.
775 */
776 ts = turnstile_lookup(mtx);
777
778 if (ts == NULL) {
779 MUTEX_RELEASE(mtx);
780 turnstile_exit(mtx);
781 } else {
782 MUTEX_RELEASE(mtx);
783 turnstile_wakeup(ts, TS_WRITER_Q,
784 TS_WAITERS(ts, TS_WRITER_Q), NULL);
785 }
786 }
787
788 #ifndef __HAVE_SIMPLE_MUTEXES
789 /*
790 * mutex_wakeup:
791 *
792 * Support routine for mutex_exit() that wakes up all waiters.
793 * We assume that the mutex has been released, but it need not
794 * be.
795 */
796 void
797 mutex_wakeup(kmutex_t *mtx)
798 {
799 turnstile_t *ts;
800
801 ts = turnstile_lookup(mtx);
802 if (ts == NULL) {
803 turnstile_exit(mtx);
804 return;
805 }
806 MUTEX_CLEAR_WAITERS(mtx);
807 turnstile_wakeup(ts, TS_WRITER_Q, TS_WAITERS(ts, TS_WRITER_Q), NULL);
808 }
809 #endif /* !__HAVE_SIMPLE_MUTEXES */
810
811 /*
812 * mutex_owned:
813 *
814 * Return true if the current LWP (adaptive) or CPU (spin)
815 * holds the mutex.
816 */
817 int
818 mutex_owned(const kmutex_t *mtx)
819 {
820
821 if (mtx == NULL)
822 return 0;
823 if (MUTEX_ADAPTIVE_P(mtx->mtx_owner))
824 return MUTEX_OWNER(mtx->mtx_owner) == (uintptr_t)curlwp;
825 #ifdef FULL
826 return MUTEX_SPINBIT_LOCKED_P(mtx);
827 #else
828 return 1;
829 #endif
830 }
831
832 /*
833 * mutex_owner:
834 *
835 * Return the current owner of an adaptive mutex. Used for
836 * priority inheritance.
837 */
838 lwp_t *
839 mutex_owner(const kmutex_t *mtx)
840 {
841
842 MUTEX_ASSERT(mtx, MUTEX_ADAPTIVE_P(mtx->mtx_owner));
843 return (struct lwp *)MUTEX_OWNER(mtx->mtx_owner);
844 }
845
846 /*
847 * mutex_owner_running:
848 *
849 * Return true if an adaptive mutex is unheld, or held and the owner is
850 * running on a CPU. For the pagedaemon only - do not document or use
851 * in other code.
852 */
853 bool
854 mutex_owner_running(const kmutex_t *mtx)
855 {
856 #ifdef MULTIPROCESSOR
857 uintptr_t owner;
858 bool rv;
859
860 MUTEX_ASSERT(mtx, MUTEX_ADAPTIVE_P(mtx->mtx_owner));
861 kpreempt_disable();
862 owner = mtx->mtx_owner;
863 rv = !MUTEX_OWNED(owner) || mutex_oncpu(MUTEX_OWNER(owner));
864 kpreempt_enable();
865 return rv;
866 #else
867 return mutex_owner(mtx) == curlwp;
868 #endif
869 }
870
871 /*
872 * mutex_ownable:
873 *
874 * When compiled with DEBUG and LOCKDEBUG defined, ensure that
875 * the mutex is available. We cannot use !mutex_owned() since
876 * that won't work correctly for spin mutexes.
877 */
878 int
879 mutex_ownable(const kmutex_t *mtx)
880 {
881
882 #ifdef LOCKDEBUG
883 MUTEX_TESTLOCK(mtx);
884 #endif
885 return 1;
886 }
887
888 /*
889 * mutex_tryenter:
890 *
891 * Try to acquire the mutex; return non-zero if we did.
892 */
893 int
894 mutex_tryenter(kmutex_t *mtx)
895 {
896 uintptr_t curthread;
897
898 /*
899 * Handle spin mutexes.
900 */
901 if (MUTEX_SPIN_P(mtx->mtx_owner)) {
902 MUTEX_SPIN_SPLRAISE(mtx);
903 #ifdef FULL
904 if (MUTEX_SPINBIT_LOCK_TRY(mtx)) {
905 MUTEX_WANTLOCK(mtx);
906 MUTEX_LOCKED(mtx);
907 return 1;
908 }
909 MUTEX_SPIN_SPLRESTORE(mtx);
910 #else
911 MUTEX_WANTLOCK(mtx);
912 MUTEX_LOCKED(mtx);
913 return 1;
914 #endif
915 } else {
916 curthread = (uintptr_t)curlwp;
917 MUTEX_ASSERT(mtx, curthread != 0);
918 if (MUTEX_ACQUIRE(mtx, curthread)) {
919 MUTEX_WANTLOCK(mtx);
920 MUTEX_LOCKED(mtx);
921 MUTEX_DASSERT(mtx,
922 MUTEX_OWNER(mtx->mtx_owner) == curthread);
923 return 1;
924 }
925 }
926
927 return 0;
928 }
929
930 #if defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL)
931 /*
932 * mutex_spin_retry:
933 *
934 * Support routine for mutex_spin_enter(). Assumes that the caller
935 * has already raised the SPL, and adjusted counters.
936 */
937 void
938 mutex_spin_retry(kmutex_t *mtx)
939 {
940 #ifdef MULTIPROCESSOR
941 u_int count;
942 LOCKSTAT_TIMER(spintime);
943 LOCKSTAT_FLAG(lsflag);
944 #ifdef LOCKDEBUG
945 u_int spins = 0;
946 #endif /* LOCKDEBUG */
947
948 MUTEX_WANTLOCK(mtx);
949
950 LOCKSTAT_ENTER(lsflag);
951 LOCKSTAT_START_TIMER(lsflag, spintime);
952 count = SPINLOCK_BACKOFF_MIN;
953
954 /*
955 * Spin testing the lock word and do exponential backoff
956 * to reduce cache line ping-ponging between CPUs.
957 */
958 do {
959 while (MUTEX_SPINBIT_LOCKED_P(mtx)) {
960 SPINLOCK_BACKOFF(count);
961 #ifdef LOCKDEBUG
962 if (SPINLOCK_SPINOUT(spins))
963 MUTEX_ABORT(mtx, "spinout");
964 #endif /* LOCKDEBUG */
965 }
966 } while (!MUTEX_SPINBIT_LOCK_TRY(mtx));
967
968 LOCKSTAT_STOP_TIMER(lsflag, spintime);
969 LOCKSTAT_EVENT(lsflag, mtx, LB_SPIN_MUTEX | LB_SPIN, 1, spintime);
970 LOCKSTAT_EXIT(lsflag);
971
972 MUTEX_LOCKED(mtx);
973 #else /* MULTIPROCESSOR */
974 MUTEX_ABORT(mtx, "locking against myself");
975 #endif /* MULTIPROCESSOR */
976 }
977 #endif /* defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL) */
978