kern_mutex.c revision 1.71 1 /* $NetBSD: kern_mutex.c,v 1.71 2018/02/05 04:25:04 ozaki-r Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Kernel mutex implementation, modeled after those found in Solaris,
34 * a description of which can be found in:
35 *
36 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
37 * Richard McDougall.
38 */
39
40 #define __MUTEX_PRIVATE
41
42 #include <sys/cdefs.h>
43 __KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.71 2018/02/05 04:25:04 ozaki-r Exp $");
44
45 #include <sys/param.h>
46 #include <sys/atomic.h>
47 #include <sys/proc.h>
48 #include <sys/mutex.h>
49 #include <sys/sched.h>
50 #include <sys/sleepq.h>
51 #include <sys/systm.h>
52 #include <sys/lockdebug.h>
53 #include <sys/kernel.h>
54 #include <sys/intr.h>
55 #include <sys/lock.h>
56 #include <sys/types.h>
57
58 #include <dev/lockstat.h>
59
60 #include <machine/lock.h>
61
62 /*
63 * When not running a debug kernel, spin mutexes are not much
64 * more than an splraiseipl() and splx() pair.
65 */
66
67 #if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
68 #define FULL
69 #endif
70
71 /*
72 * Debugging support.
73 */
74
75 #define MUTEX_WANTLOCK(mtx) \
76 LOCKDEBUG_WANTLOCK(MUTEX_DEBUG_P(mtx), (mtx), \
77 (uintptr_t)__builtin_return_address(0), 0)
78 #define MUTEX_TESTLOCK(mtx) \
79 LOCKDEBUG_WANTLOCK(MUTEX_DEBUG_P(mtx), (mtx), \
80 (uintptr_t)__builtin_return_address(0), -1)
81 #define MUTEX_LOCKED(mtx) \
82 LOCKDEBUG_LOCKED(MUTEX_DEBUG_P(mtx), (mtx), NULL, \
83 (uintptr_t)__builtin_return_address(0), 0)
84 #define MUTEX_UNLOCKED(mtx) \
85 LOCKDEBUG_UNLOCKED(MUTEX_DEBUG_P(mtx), (mtx), \
86 (uintptr_t)__builtin_return_address(0), 0)
87 #define MUTEX_ABORT(mtx, msg) \
88 mutex_abort(__func__, __LINE__, mtx, msg)
89
90 #if defined(LOCKDEBUG)
91
92 #define MUTEX_DASSERT(mtx, cond) \
93 do { \
94 if (!(cond)) \
95 MUTEX_ABORT(mtx, "assertion failed: " #cond); \
96 } while (/* CONSTCOND */ 0);
97
98 #else /* LOCKDEBUG */
99
100 #define MUTEX_DASSERT(mtx, cond) /* nothing */
101
102 #endif /* LOCKDEBUG */
103
104 #if defined(DIAGNOSTIC)
105
106 #define MUTEX_ASSERT(mtx, cond) \
107 do { \
108 if (!(cond)) \
109 MUTEX_ABORT(mtx, "assertion failed: " #cond); \
110 } while (/* CONSTCOND */ 0)
111
112 #else /* DIAGNOSTIC */
113
114 #define MUTEX_ASSERT(mtx, cond) /* nothing */
115
116 #endif /* DIAGNOSTIC */
117
118 /*
119 * Some architectures can't use __cpu_simple_lock as is so allow a way
120 * for them to use an alternate definition.
121 */
122 #ifndef MUTEX_SPINBIT_LOCK_INIT
123 #define MUTEX_SPINBIT_LOCK_INIT(mtx) __cpu_simple_lock_init(&(mtx)->mtx_lock)
124 #endif
125 #ifndef MUTEX_SPINBIT_LOCKED_P
126 #define MUTEX_SPINBIT_LOCKED_P(mtx) __SIMPLELOCK_LOCKED_P(&(mtx)->mtx_lock)
127 #endif
128 #ifndef MUTEX_SPINBIT_LOCK_TRY
129 #define MUTEX_SPINBIT_LOCK_TRY(mtx) __cpu_simple_lock_try(&(mtx)->mtx_lock)
130 #endif
131 #ifndef MUTEX_SPINBIT_LOCK_UNLOCK
132 #define MUTEX_SPINBIT_LOCK_UNLOCK(mtx) __cpu_simple_unlock(&(mtx)->mtx_lock)
133 #endif
134
135 #ifndef MUTEX_INITIALIZE_SPIN_IPL
136 #define MUTEX_INITIALIZE_SPIN_IPL(mtx, ipl) \
137 ((mtx)->mtx_ipl = makeiplcookie((ipl)))
138 #endif
139
140 /*
141 * Spin mutex SPL save / restore.
142 */
143
144 #define MUTEX_SPIN_SPLRAISE(mtx) \
145 do { \
146 struct cpu_info *x__ci; \
147 int x__cnt, s; \
148 s = splraiseipl(MUTEX_SPIN_IPL(mtx)); \
149 x__ci = curcpu(); \
150 x__cnt = x__ci->ci_mtx_count--; \
151 __insn_barrier(); \
152 if (x__cnt == 0) \
153 x__ci->ci_mtx_oldspl = (s); \
154 } while (/* CONSTCOND */ 0)
155
156 #define MUTEX_SPIN_SPLRESTORE(mtx) \
157 do { \
158 struct cpu_info *x__ci = curcpu(); \
159 int s = x__ci->ci_mtx_oldspl; \
160 __insn_barrier(); \
161 if (++(x__ci->ci_mtx_count) == 0) \
162 splx(s); \
163 } while (/* CONSTCOND */ 0)
164
165 /*
166 * For architectures that provide 'simple' mutexes: they provide a
167 * CAS function that is either MP-safe, or does not need to be MP
168 * safe. Adaptive mutexes on these architectures do not require an
169 * additional interlock.
170 */
171
172 #ifdef __HAVE_SIMPLE_MUTEXES
173
174 #define MUTEX_OWNER(owner) \
175 (owner & MUTEX_THREAD)
176 #define MUTEX_HAS_WAITERS(mtx) \
177 (((int)(mtx)->mtx_owner & MUTEX_BIT_WAITERS) != 0)
178
179 #define MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug) \
180 if (!dodebug) \
181 (mtx)->mtx_owner |= MUTEX_BIT_NODEBUG; \
182 do { \
183 } while (/* CONSTCOND */ 0);
184
185 #define MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl) \
186 do { \
187 (mtx)->mtx_owner = MUTEX_BIT_SPIN; \
188 if (!dodebug) \
189 (mtx)->mtx_owner |= MUTEX_BIT_NODEBUG; \
190 MUTEX_INITIALIZE_SPIN_IPL((mtx), (ipl)); \
191 MUTEX_SPINBIT_LOCK_INIT((mtx)); \
192 } while (/* CONSTCOND */ 0)
193
194 #define MUTEX_DESTROY(mtx) \
195 do { \
196 (mtx)->mtx_owner = MUTEX_THREAD; \
197 } while (/* CONSTCOND */ 0);
198
199 #define MUTEX_SPIN_P(mtx) \
200 (((mtx)->mtx_owner & MUTEX_BIT_SPIN) != 0)
201 #define MUTEX_ADAPTIVE_P(mtx) \
202 (((mtx)->mtx_owner & MUTEX_BIT_SPIN) == 0)
203
204 #define MUTEX_DEBUG_P(mtx) (((mtx)->mtx_owner & MUTEX_BIT_NODEBUG) == 0)
205 #if defined(LOCKDEBUG)
206 #define MUTEX_OWNED(owner) (((owner) & ~MUTEX_BIT_NODEBUG) != 0)
207 #define MUTEX_INHERITDEBUG(n, o) (n) |= (o) & MUTEX_BIT_NODEBUG
208 #else /* defined(LOCKDEBUG) */
209 #define MUTEX_OWNED(owner) ((owner) != 0)
210 #define MUTEX_INHERITDEBUG(n, o) /* nothing */
211 #endif /* defined(LOCKDEBUG) */
212
213 static inline int
214 MUTEX_ACQUIRE(kmutex_t *mtx, uintptr_t curthread)
215 {
216 int rv;
217 uintptr_t oldown = 0;
218 uintptr_t newown = curthread;
219
220 MUTEX_INHERITDEBUG(oldown, mtx->mtx_owner);
221 MUTEX_INHERITDEBUG(newown, oldown);
222 rv = MUTEX_CAS(&mtx->mtx_owner, oldown, newown);
223 MUTEX_RECEIVE(mtx);
224 return rv;
225 }
226
227 static inline int
228 MUTEX_SET_WAITERS(kmutex_t *mtx, uintptr_t owner)
229 {
230 int rv;
231 rv = MUTEX_CAS(&mtx->mtx_owner, owner, owner | MUTEX_BIT_WAITERS);
232 MUTEX_RECEIVE(mtx);
233 return rv;
234 }
235
236 static inline void
237 MUTEX_RELEASE(kmutex_t *mtx)
238 {
239 uintptr_t newown;
240
241 MUTEX_GIVE(mtx);
242 newown = 0;
243 MUTEX_INHERITDEBUG(newown, mtx->mtx_owner);
244 mtx->mtx_owner = newown;
245 }
246 #endif /* __HAVE_SIMPLE_MUTEXES */
247
248 /*
249 * Patch in stubs via strong alias where they are not available.
250 */
251
252 #if defined(LOCKDEBUG)
253 #undef __HAVE_MUTEX_STUBS
254 #undef __HAVE_SPIN_MUTEX_STUBS
255 #endif
256
257 #ifndef __HAVE_MUTEX_STUBS
258 __strong_alias(mutex_enter,mutex_vector_enter);
259 __strong_alias(mutex_exit,mutex_vector_exit);
260 #endif
261
262 #ifndef __HAVE_SPIN_MUTEX_STUBS
263 __strong_alias(mutex_spin_enter,mutex_vector_enter);
264 __strong_alias(mutex_spin_exit,mutex_vector_exit);
265 #endif
266
267 static void mutex_abort(const char *, size_t, const kmutex_t *,
268 const char *);
269 static void mutex_dump(const volatile void *);
270
271 lockops_t mutex_spin_lockops = {
272 .lo_name = "Mutex",
273 .lo_type = LOCKOPS_SPIN,
274 .lo_dump = mutex_dump,
275 };
276
277 lockops_t mutex_adaptive_lockops = {
278 .lo_name = "Mutex",
279 .lo_type = LOCKOPS_SLEEP,
280 .lo_dump = mutex_dump,
281 };
282
283 syncobj_t mutex_syncobj = {
284 .sobj_flag = SOBJ_SLEEPQ_SORTED,
285 .sobj_unsleep = turnstile_unsleep,
286 .sobj_changepri = turnstile_changepri,
287 .sobj_lendpri = sleepq_lendpri,
288 .sobj_owner = (void *)mutex_owner,
289 };
290
291 /*
292 * mutex_dump:
293 *
294 * Dump the contents of a mutex structure.
295 */
296 void
297 mutex_dump(const volatile void *cookie)
298 {
299 const volatile kmutex_t *mtx = cookie;
300
301 printf_nolog("owner field : %#018lx wait/spin: %16d/%d\n",
302 (long)MUTEX_OWNER(mtx->mtx_owner), MUTEX_HAS_WAITERS(mtx),
303 MUTEX_SPIN_P(mtx));
304 }
305
306 /*
307 * mutex_abort:
308 *
309 * Dump information about an error and panic the system. This
310 * generates a lot of machine code in the DIAGNOSTIC case, so
311 * we ask the compiler to not inline it.
312 */
313 void __noinline
314 mutex_abort(const char *func, size_t line, const kmutex_t *mtx, const char *msg)
315 {
316
317 LOCKDEBUG_ABORT(func, line, mtx, (MUTEX_SPIN_P(mtx) ?
318 &mutex_spin_lockops : &mutex_adaptive_lockops), msg);
319 }
320
321 /*
322 * mutex_init:
323 *
324 * Initialize a mutex for use. Note that adaptive mutexes are in
325 * essence spin mutexes that can sleep to avoid deadlock and wasting
326 * CPU time. We can't easily provide a type of mutex that always
327 * sleeps - see comments in mutex_vector_enter() about releasing
328 * mutexes unlocked.
329 */
330 void _mutex_init(kmutex_t *, kmutex_type_t, int, uintptr_t);
331 void
332 _mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl,
333 uintptr_t return_address)
334 {
335 bool dodebug;
336
337 memset(mtx, 0, sizeof(*mtx));
338
339 switch (type) {
340 case MUTEX_ADAPTIVE:
341 KASSERT(ipl == IPL_NONE);
342 break;
343 case MUTEX_DEFAULT:
344 case MUTEX_DRIVER:
345 if (ipl == IPL_NONE || ipl == IPL_SOFTCLOCK ||
346 ipl == IPL_SOFTBIO || ipl == IPL_SOFTNET ||
347 ipl == IPL_SOFTSERIAL) {
348 type = MUTEX_ADAPTIVE;
349 } else {
350 type = MUTEX_SPIN;
351 }
352 break;
353 default:
354 break;
355 }
356
357 switch (type) {
358 case MUTEX_NODEBUG:
359 dodebug = LOCKDEBUG_ALLOC(mtx, NULL, return_address);
360 MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl);
361 break;
362 case MUTEX_ADAPTIVE:
363 dodebug = LOCKDEBUG_ALLOC(mtx, &mutex_adaptive_lockops,
364 return_address);
365 MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug);
366 break;
367 case MUTEX_SPIN:
368 dodebug = LOCKDEBUG_ALLOC(mtx, &mutex_spin_lockops,
369 return_address);
370 MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl);
371 break;
372 default:
373 panic("mutex_init: impossible type");
374 break;
375 }
376 }
377
378 void
379 mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl)
380 {
381
382 _mutex_init(mtx, type, ipl, (uintptr_t)__builtin_return_address(0));
383 }
384
385 /*
386 * mutex_destroy:
387 *
388 * Tear down a mutex.
389 */
390 void
391 mutex_destroy(kmutex_t *mtx)
392 {
393
394 if (MUTEX_ADAPTIVE_P(mtx)) {
395 MUTEX_ASSERT(mtx, !MUTEX_OWNED(mtx->mtx_owner) &&
396 !MUTEX_HAS_WAITERS(mtx));
397 } else {
398 MUTEX_ASSERT(mtx, !MUTEX_SPINBIT_LOCKED_P(mtx));
399 }
400
401 LOCKDEBUG_FREE(MUTEX_DEBUG_P(mtx), mtx);
402 MUTEX_DESTROY(mtx);
403 }
404
405 #ifdef MULTIPROCESSOR
406 /*
407 * mutex_oncpu:
408 *
409 * Return true if an adaptive mutex owner is running on a CPU in the
410 * system. If the target is waiting on the kernel big lock, then we
411 * must release it. This is necessary to avoid deadlock.
412 */
413 static bool
414 mutex_oncpu(uintptr_t owner)
415 {
416 struct cpu_info *ci;
417 lwp_t *l;
418
419 KASSERT(kpreempt_disabled());
420
421 if (!MUTEX_OWNED(owner)) {
422 return false;
423 }
424
425 /*
426 * See lwp_dtor() why dereference of the LWP pointer is safe.
427 * We must have kernel preemption disabled for that.
428 */
429 l = (lwp_t *)MUTEX_OWNER(owner);
430 ci = l->l_cpu;
431
432 if (ci && ci->ci_curlwp == l) {
433 /* Target is running; do we need to block? */
434 return (ci->ci_biglock_wanted != l);
435 }
436
437 /* Not running. It may be safe to block now. */
438 return false;
439 }
440 #endif /* MULTIPROCESSOR */
441
442 /*
443 * mutex_vector_enter:
444 *
445 * Support routine for mutex_enter() that must handle all cases. In
446 * the LOCKDEBUG case, mutex_enter() is always aliased here, even if
447 * fast-path stubs are available. If a mutex_spin_enter() stub is
448 * not available, then it is also aliased directly here.
449 */
450 void
451 mutex_vector_enter(kmutex_t *mtx)
452 {
453 uintptr_t owner, curthread;
454 turnstile_t *ts;
455 #ifdef MULTIPROCESSOR
456 u_int count;
457 #endif
458 LOCKSTAT_COUNTER(spincnt);
459 LOCKSTAT_COUNTER(slpcnt);
460 LOCKSTAT_TIMER(spintime);
461 LOCKSTAT_TIMER(slptime);
462 LOCKSTAT_FLAG(lsflag);
463
464 /*
465 * Handle spin mutexes.
466 */
467 if (MUTEX_SPIN_P(mtx)) {
468 #if defined(LOCKDEBUG) && defined(MULTIPROCESSOR)
469 u_int spins = 0;
470 #endif
471 MUTEX_SPIN_SPLRAISE(mtx);
472 MUTEX_WANTLOCK(mtx);
473 #ifdef FULL
474 if (MUTEX_SPINBIT_LOCK_TRY(mtx)) {
475 MUTEX_LOCKED(mtx);
476 return;
477 }
478 #if !defined(MULTIPROCESSOR)
479 MUTEX_ABORT(mtx, "locking against myself");
480 #else /* !MULTIPROCESSOR */
481
482 LOCKSTAT_ENTER(lsflag);
483 LOCKSTAT_START_TIMER(lsflag, spintime);
484 count = SPINLOCK_BACKOFF_MIN;
485
486 /*
487 * Spin testing the lock word and do exponential backoff
488 * to reduce cache line ping-ponging between CPUs.
489 */
490 do {
491 if (panicstr != NULL)
492 break;
493 while (MUTEX_SPINBIT_LOCKED_P(mtx)) {
494 SPINLOCK_BACKOFF(count);
495 #ifdef LOCKDEBUG
496 if (SPINLOCK_SPINOUT(spins))
497 MUTEX_ABORT(mtx, "spinout");
498 #endif /* LOCKDEBUG */
499 }
500 } while (!MUTEX_SPINBIT_LOCK_TRY(mtx));
501
502 if (count != SPINLOCK_BACKOFF_MIN) {
503 LOCKSTAT_STOP_TIMER(lsflag, spintime);
504 LOCKSTAT_EVENT(lsflag, mtx,
505 LB_SPIN_MUTEX | LB_SPIN, 1, spintime);
506 }
507 LOCKSTAT_EXIT(lsflag);
508 #endif /* !MULTIPROCESSOR */
509 #endif /* FULL */
510 MUTEX_LOCKED(mtx);
511 return;
512 }
513
514 curthread = (uintptr_t)curlwp;
515
516 MUTEX_DASSERT(mtx, MUTEX_ADAPTIVE_P(mtx));
517 MUTEX_ASSERT(mtx, curthread != 0);
518 MUTEX_WANTLOCK(mtx);
519
520 if (panicstr == NULL) {
521 LOCKDEBUG_BARRIER(&kernel_lock, 1);
522 }
523
524 LOCKSTAT_ENTER(lsflag);
525
526 /*
527 * Adaptive mutex; spin trying to acquire the mutex. If we
528 * determine that the owner is not running on a processor,
529 * then we stop spinning, and sleep instead.
530 */
531 KPREEMPT_DISABLE(curlwp);
532 for (owner = mtx->mtx_owner;;) {
533 if (!MUTEX_OWNED(owner)) {
534 /*
535 * Mutex owner clear could mean two things:
536 *
537 * * The mutex has been released.
538 * * The owner field hasn't been set yet.
539 *
540 * Try to acquire it again. If that fails,
541 * we'll just loop again.
542 */
543 if (MUTEX_ACQUIRE(mtx, curthread))
544 break;
545 owner = mtx->mtx_owner;
546 continue;
547 }
548 if (__predict_false(panicstr != NULL)) {
549 KPREEMPT_ENABLE(curlwp);
550 return;
551 }
552 if (__predict_false(MUTEX_OWNER(owner) == curthread)) {
553 MUTEX_ABORT(mtx, "locking against myself");
554 }
555 #ifdef MULTIPROCESSOR
556 /*
557 * Check to see if the owner is running on a processor.
558 * If so, then we should just spin, as the owner will
559 * likely release the lock very soon.
560 */
561 if (mutex_oncpu(owner)) {
562 LOCKSTAT_START_TIMER(lsflag, spintime);
563 count = SPINLOCK_BACKOFF_MIN;
564 do {
565 KPREEMPT_ENABLE(curlwp);
566 SPINLOCK_BACKOFF(count);
567 KPREEMPT_DISABLE(curlwp);
568 owner = mtx->mtx_owner;
569 } while (mutex_oncpu(owner));
570 LOCKSTAT_STOP_TIMER(lsflag, spintime);
571 LOCKSTAT_COUNT(spincnt, 1);
572 if (!MUTEX_OWNED(owner))
573 continue;
574 }
575 #endif
576
577 ts = turnstile_lookup(mtx);
578
579 /*
580 * Once we have the turnstile chain interlock, mark the
581 * mutex as having waiters. If that fails, spin again:
582 * chances are that the mutex has been released.
583 */
584 if (!MUTEX_SET_WAITERS(mtx, owner)) {
585 turnstile_exit(mtx);
586 owner = mtx->mtx_owner;
587 continue;
588 }
589
590 #ifdef MULTIPROCESSOR
591 /*
592 * mutex_exit() is permitted to release the mutex without
593 * any interlocking instructions, and the following can
594 * occur as a result:
595 *
596 * CPU 1: MUTEX_SET_WAITERS() CPU2: mutex_exit()
597 * ---------------------------- ----------------------------
598 * .. acquire cache line
599 * .. test for waiters
600 * acquire cache line <- lose cache line
601 * lock cache line ..
602 * verify mutex is held ..
603 * set waiters ..
604 * unlock cache line ..
605 * lose cache line -> acquire cache line
606 * .. clear lock word, waiters
607 * return success
608 *
609 * There is another race that can occur: a third CPU could
610 * acquire the mutex as soon as it is released. Since
611 * adaptive mutexes are primarily spin mutexes, this is not
612 * something that we need to worry about too much. What we
613 * do need to ensure is that the waiters bit gets set.
614 *
615 * To allow the unlocked release, we need to make some
616 * assumptions here:
617 *
618 * o Release is the only non-atomic/unlocked operation
619 * that can be performed on the mutex. (It must still
620 * be atomic on the local CPU, e.g. in case interrupted
621 * or preempted).
622 *
623 * o At any given time, MUTEX_SET_WAITERS() can only ever
624 * be in progress on one CPU in the system - guaranteed
625 * by the turnstile chain lock.
626 *
627 * o No other operations other than MUTEX_SET_WAITERS()
628 * and release can modify a mutex with a non-zero
629 * owner field.
630 *
631 * o The result of a successful MUTEX_SET_WAITERS() call
632 * is an unbuffered write that is immediately visible
633 * to all other processors in the system.
634 *
635 * o If the holding LWP switches away, it posts a store
636 * fence before changing curlwp, ensuring that any
637 * overwrite of the mutex waiters flag by mutex_exit()
638 * completes before the modification of curlwp becomes
639 * visible to this CPU.
640 *
641 * o mi_switch() posts a store fence before setting curlwp
642 * and before resuming execution of an LWP.
643 *
644 * o _kernel_lock() posts a store fence before setting
645 * curcpu()->ci_biglock_wanted, and after clearing it.
646 * This ensures that any overwrite of the mutex waiters
647 * flag by mutex_exit() completes before the modification
648 * of ci_biglock_wanted becomes visible.
649 *
650 * We now post a read memory barrier (after setting the
651 * waiters field) and check the lock holder's status again.
652 * Some of the possible outcomes (not an exhaustive list):
653 *
654 * 1. The on-CPU check returns true: the holding LWP is
655 * running again. The lock may be released soon and
656 * we should spin. Importantly, we can't trust the
657 * value of the waiters flag.
658 *
659 * 2. The on-CPU check returns false: the holding LWP is
660 * not running. We now have the opportunity to check
661 * if mutex_exit() has blatted the modifications made
662 * by MUTEX_SET_WAITERS().
663 *
664 * 3. The on-CPU check returns false: the holding LWP may
665 * or may not be running. It has context switched at
666 * some point during our check. Again, we have the
667 * chance to see if the waiters bit is still set or
668 * has been overwritten.
669 *
670 * 4. The on-CPU check returns false: the holding LWP is
671 * running on a CPU, but wants the big lock. It's OK
672 * to check the waiters field in this case.
673 *
674 * 5. The has-waiters check fails: the mutex has been
675 * released, the waiters flag cleared and another LWP
676 * now owns the mutex.
677 *
678 * 6. The has-waiters check fails: the mutex has been
679 * released.
680 *
681 * If the waiters bit is not set it's unsafe to go asleep,
682 * as we might never be awoken.
683 */
684 if ((membar_consumer(), mutex_oncpu(owner)) ||
685 (membar_consumer(), !MUTEX_HAS_WAITERS(mtx))) {
686 turnstile_exit(mtx);
687 owner = mtx->mtx_owner;
688 continue;
689 }
690 #endif /* MULTIPROCESSOR */
691
692 LOCKSTAT_START_TIMER(lsflag, slptime);
693
694 turnstile_block(ts, TS_WRITER_Q, mtx, &mutex_syncobj);
695
696 LOCKSTAT_STOP_TIMER(lsflag, slptime);
697 LOCKSTAT_COUNT(slpcnt, 1);
698
699 owner = mtx->mtx_owner;
700 }
701 KPREEMPT_ENABLE(curlwp);
702
703 LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SLEEP1,
704 slpcnt, slptime);
705 LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SPIN,
706 spincnt, spintime);
707 LOCKSTAT_EXIT(lsflag);
708
709 MUTEX_DASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread);
710 MUTEX_LOCKED(mtx);
711 }
712
713 /*
714 * mutex_vector_exit:
715 *
716 * Support routine for mutex_exit() that handles all cases.
717 */
718 void
719 mutex_vector_exit(kmutex_t *mtx)
720 {
721 turnstile_t *ts;
722 uintptr_t curthread;
723
724 if (MUTEX_SPIN_P(mtx)) {
725 #ifdef FULL
726 if (__predict_false(!MUTEX_SPINBIT_LOCKED_P(mtx))) {
727 if (panicstr != NULL)
728 return;
729 MUTEX_ABORT(mtx, "exiting unheld spin mutex");
730 }
731 MUTEX_UNLOCKED(mtx);
732 MUTEX_SPINBIT_LOCK_UNLOCK(mtx);
733 #endif
734 MUTEX_SPIN_SPLRESTORE(mtx);
735 return;
736 }
737
738 if (__predict_false((uintptr_t)panicstr | cold)) {
739 MUTEX_UNLOCKED(mtx);
740 MUTEX_RELEASE(mtx);
741 return;
742 }
743
744 curthread = (uintptr_t)curlwp;
745 MUTEX_DASSERT(mtx, curthread != 0);
746 MUTEX_ASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread);
747 MUTEX_UNLOCKED(mtx);
748 #if !defined(LOCKDEBUG)
749 __USE(curthread);
750 #endif
751
752 #ifdef LOCKDEBUG
753 /*
754 * Avoid having to take the turnstile chain lock every time
755 * around. Raise the priority level to splhigh() in order
756 * to disable preemption and so make the following atomic.
757 */
758 {
759 int s = splhigh();
760 if (!MUTEX_HAS_WAITERS(mtx)) {
761 MUTEX_RELEASE(mtx);
762 splx(s);
763 return;
764 }
765 splx(s);
766 }
767 #endif
768
769 /*
770 * Get this lock's turnstile. This gets the interlock on
771 * the sleep queue. Once we have that, we can clear the
772 * lock. If there was no turnstile for the lock, there
773 * were no waiters remaining.
774 */
775 ts = turnstile_lookup(mtx);
776
777 if (ts == NULL) {
778 MUTEX_RELEASE(mtx);
779 turnstile_exit(mtx);
780 } else {
781 MUTEX_RELEASE(mtx);
782 turnstile_wakeup(ts, TS_WRITER_Q,
783 TS_WAITERS(ts, TS_WRITER_Q), NULL);
784 }
785 }
786
787 #ifndef __HAVE_SIMPLE_MUTEXES
788 /*
789 * mutex_wakeup:
790 *
791 * Support routine for mutex_exit() that wakes up all waiters.
792 * We assume that the mutex has been released, but it need not
793 * be.
794 */
795 void
796 mutex_wakeup(kmutex_t *mtx)
797 {
798 turnstile_t *ts;
799
800 ts = turnstile_lookup(mtx);
801 if (ts == NULL) {
802 turnstile_exit(mtx);
803 return;
804 }
805 MUTEX_CLEAR_WAITERS(mtx);
806 turnstile_wakeup(ts, TS_WRITER_Q, TS_WAITERS(ts, TS_WRITER_Q), NULL);
807 }
808 #endif /* !__HAVE_SIMPLE_MUTEXES */
809
810 /*
811 * mutex_owned:
812 *
813 * Return true if the current LWP (adaptive) or CPU (spin)
814 * holds the mutex.
815 */
816 int
817 mutex_owned(const kmutex_t *mtx)
818 {
819
820 if (mtx == NULL)
821 return 0;
822 if (MUTEX_ADAPTIVE_P(mtx))
823 return MUTEX_OWNER(mtx->mtx_owner) == (uintptr_t)curlwp;
824 #ifdef FULL
825 return MUTEX_SPINBIT_LOCKED_P(mtx);
826 #else
827 return 1;
828 #endif
829 }
830
831 /*
832 * mutex_owner:
833 *
834 * Return the current owner of an adaptive mutex. Used for
835 * priority inheritance.
836 */
837 lwp_t *
838 mutex_owner(const kmutex_t *mtx)
839 {
840
841 MUTEX_ASSERT(mtx, MUTEX_ADAPTIVE_P(mtx));
842 return (struct lwp *)MUTEX_OWNER(mtx->mtx_owner);
843 }
844
845 /*
846 * mutex_ownable:
847 *
848 * When compiled with DEBUG and LOCKDEBUG defined, ensure that
849 * the mutex is available. We cannot use !mutex_owned() since
850 * that won't work correctly for spin mutexes.
851 */
852 int
853 mutex_ownable(const kmutex_t *mtx)
854 {
855
856 #ifdef LOCKDEBUG
857 MUTEX_TESTLOCK(mtx);
858 #endif
859 return 1;
860 }
861
862 /*
863 * mutex_tryenter:
864 *
865 * Try to acquire the mutex; return non-zero if we did.
866 */
867 int
868 mutex_tryenter(kmutex_t *mtx)
869 {
870 uintptr_t curthread;
871
872 /*
873 * Handle spin mutexes.
874 */
875 if (MUTEX_SPIN_P(mtx)) {
876 MUTEX_SPIN_SPLRAISE(mtx);
877 #ifdef FULL
878 if (MUTEX_SPINBIT_LOCK_TRY(mtx)) {
879 MUTEX_WANTLOCK(mtx);
880 MUTEX_LOCKED(mtx);
881 return 1;
882 }
883 MUTEX_SPIN_SPLRESTORE(mtx);
884 #else
885 MUTEX_WANTLOCK(mtx);
886 MUTEX_LOCKED(mtx);
887 return 1;
888 #endif
889 } else {
890 curthread = (uintptr_t)curlwp;
891 MUTEX_ASSERT(mtx, curthread != 0);
892 if (MUTEX_ACQUIRE(mtx, curthread)) {
893 MUTEX_WANTLOCK(mtx);
894 MUTEX_LOCKED(mtx);
895 MUTEX_DASSERT(mtx,
896 MUTEX_OWNER(mtx->mtx_owner) == curthread);
897 return 1;
898 }
899 }
900
901 return 0;
902 }
903
904 #if defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL)
905 /*
906 * mutex_spin_retry:
907 *
908 * Support routine for mutex_spin_enter(). Assumes that the caller
909 * has already raised the SPL, and adjusted counters.
910 */
911 void
912 mutex_spin_retry(kmutex_t *mtx)
913 {
914 #ifdef MULTIPROCESSOR
915 u_int count;
916 LOCKSTAT_TIMER(spintime);
917 LOCKSTAT_FLAG(lsflag);
918 #ifdef LOCKDEBUG
919 u_int spins = 0;
920 #endif /* LOCKDEBUG */
921
922 MUTEX_WANTLOCK(mtx);
923
924 LOCKSTAT_ENTER(lsflag);
925 LOCKSTAT_START_TIMER(lsflag, spintime);
926 count = SPINLOCK_BACKOFF_MIN;
927
928 /*
929 * Spin testing the lock word and do exponential backoff
930 * to reduce cache line ping-ponging between CPUs.
931 */
932 do {
933 if (panicstr != NULL)
934 break;
935 while (MUTEX_SPINBIT_LOCKED_P(mtx)) {
936 SPINLOCK_BACKOFF(count);
937 #ifdef LOCKDEBUG
938 if (SPINLOCK_SPINOUT(spins))
939 MUTEX_ABORT(mtx, "spinout");
940 #endif /* LOCKDEBUG */
941 }
942 } while (!MUTEX_SPINBIT_LOCK_TRY(mtx));
943
944 LOCKSTAT_STOP_TIMER(lsflag, spintime);
945 LOCKSTAT_EVENT(lsflag, mtx, LB_SPIN_MUTEX | LB_SPIN, 1, spintime);
946 LOCKSTAT_EXIT(lsflag);
947
948 MUTEX_LOCKED(mtx);
949 #else /* MULTIPROCESSOR */
950 MUTEX_ABORT(mtx, "locking against myself");
951 #endif /* MULTIPROCESSOR */
952 }
953 #endif /* defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL) */
954