kern_mutex.c revision 1.34 1 /* $NetBSD: kern_mutex.c,v 1.34 2008/04/11 15:28:34 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Kernel mutex implementation, modeled after those found in Solaris,
41 * a description of which can be found in:
42 *
43 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
44 * Richard McDougall.
45 */
46
47 #define __MUTEX_PRIVATE
48
49 #include <sys/cdefs.h>
50 __KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.34 2008/04/11 15:28:34 ad Exp $");
51
52 #include "opt_multiprocessor.h"
53
54 #include <sys/param.h>
55 #include <sys/proc.h>
56 #include <sys/mutex.h>
57 #include <sys/sched.h>
58 #include <sys/sleepq.h>
59 #include <sys/systm.h>
60 #include <sys/lockdebug.h>
61 #include <sys/kernel.h>
62 #include <sys/atomic.h>
63 #include <sys/intr.h>
64 #include <sys/lock.h>
65 #include <sys/pool.h>
66
67 #include <dev/lockstat.h>
68
69 #include <machine/lock.h>
70
71 /*
72 * When not running a debug kernel, spin mutexes are not much
73 * more than an splraiseipl() and splx() pair.
74 */
75
76 #if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
77 #define FULL
78 #endif
79
80 /*
81 * Debugging support.
82 */
83
84 #define MUTEX_WANTLOCK(mtx) \
85 LOCKDEBUG_WANTLOCK(MUTEX_DEBUG_P(mtx), (mtx), \
86 (uintptr_t)__builtin_return_address(0), 0)
87 #define MUTEX_LOCKED(mtx) \
88 LOCKDEBUG_LOCKED(MUTEX_DEBUG_P(mtx), (mtx), \
89 (uintptr_t)__builtin_return_address(0), 0)
90 #define MUTEX_UNLOCKED(mtx) \
91 LOCKDEBUG_UNLOCKED(MUTEX_DEBUG_P(mtx), (mtx), \
92 (uintptr_t)__builtin_return_address(0), 0)
93 #define MUTEX_ABORT(mtx, msg) \
94 mutex_abort(mtx, __func__, msg)
95
96 #if defined(LOCKDEBUG)
97
98 #define MUTEX_DASSERT(mtx, cond) \
99 do { \
100 if (!(cond)) \
101 MUTEX_ABORT(mtx, "assertion failed: " #cond); \
102 } while (/* CONSTCOND */ 0);
103
104 #else /* LOCKDEBUG */
105
106 #define MUTEX_DASSERT(mtx, cond) /* nothing */
107
108 #endif /* LOCKDEBUG */
109
110 #if defined(DIAGNOSTIC)
111
112 #define MUTEX_ASSERT(mtx, cond) \
113 do { \
114 if (!(cond)) \
115 MUTEX_ABORT(mtx, "assertion failed: " #cond); \
116 } while (/* CONSTCOND */ 0)
117
118 #else /* DIAGNOSTIC */
119
120 #define MUTEX_ASSERT(mtx, cond) /* nothing */
121
122 #endif /* DIAGNOSTIC */
123
124 /*
125 * Spin mutex SPL save / restore.
126 */
127 #ifndef MUTEX_COUNT_BIAS
128 #define MUTEX_COUNT_BIAS 0
129 #endif
130
131 #define MUTEX_SPIN_SPLRAISE(mtx) \
132 do { \
133 struct cpu_info *x__ci = curcpu(); \
134 int x__cnt, s; \
135 x__cnt = x__ci->ci_mtx_count--; \
136 s = splraiseipl(mtx->mtx_ipl); \
137 if (x__cnt == MUTEX_COUNT_BIAS) \
138 x__ci->ci_mtx_oldspl = (s); \
139 } while (/* CONSTCOND */ 0)
140
141 #define MUTEX_SPIN_SPLRESTORE(mtx) \
142 do { \
143 struct cpu_info *x__ci = curcpu(); \
144 int s = x__ci->ci_mtx_oldspl; \
145 __insn_barrier(); \
146 if (++(x__ci->ci_mtx_count) == MUTEX_COUNT_BIAS) \
147 splx(s); \
148 } while (/* CONSTCOND */ 0)
149
150 /*
151 * For architectures that provide 'simple' mutexes: they provide a
152 * CAS function that is either MP-safe, or does not need to be MP
153 * safe. Adaptive mutexes on these architectures do not require an
154 * additional interlock.
155 */
156
157 #ifdef __HAVE_SIMPLE_MUTEXES
158
159 #define MUTEX_OWNER(owner) \
160 (owner & MUTEX_THREAD)
161 #define MUTEX_HAS_WAITERS(mtx) \
162 (((int)(mtx)->mtx_owner & MUTEX_BIT_WAITERS) != 0)
163
164 #define MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug) \
165 do { \
166 if (dodebug) \
167 (mtx)->mtx_owner |= MUTEX_BIT_DEBUG; \
168 } while (/* CONSTCOND */ 0);
169
170 #define MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl) \
171 do { \
172 (mtx)->mtx_owner = MUTEX_BIT_SPIN; \
173 if (dodebug) \
174 (mtx)->mtx_owner |= MUTEX_BIT_DEBUG; \
175 (mtx)->mtx_ipl = makeiplcookie((ipl)); \
176 __cpu_simple_lock_init(&(mtx)->mtx_lock); \
177 } while (/* CONSTCOND */ 0)
178
179 #define MUTEX_DESTROY(mtx) \
180 do { \
181 (mtx)->mtx_owner = MUTEX_THREAD; \
182 } while (/* CONSTCOND */ 0);
183
184 #define MUTEX_SPIN_P(mtx) \
185 (((mtx)->mtx_owner & MUTEX_BIT_SPIN) != 0)
186 #define MUTEX_ADAPTIVE_P(mtx) \
187 (((mtx)->mtx_owner & MUTEX_BIT_SPIN) == 0)
188
189 #define MUTEX_DEBUG_P(mtx) (((mtx)->mtx_owner & MUTEX_BIT_DEBUG) != 0)
190 #if defined(LOCKDEBUG)
191 #define MUTEX_OWNED(owner) (((owner) & ~MUTEX_BIT_DEBUG) != 0)
192 #define MUTEX_INHERITDEBUG(new, old) (new) |= (old) & MUTEX_BIT_DEBUG
193 #else /* defined(LOCKDEBUG) */
194 #define MUTEX_OWNED(owner) ((owner) != 0)
195 #define MUTEX_INHERITDEBUG(new, old) /* nothing */
196 #endif /* defined(LOCKDEBUG) */
197
198 static inline int
199 MUTEX_ACQUIRE(kmutex_t *mtx, uintptr_t curthread)
200 {
201 int rv;
202 uintptr_t old = 0;
203 uintptr_t new = curthread;
204
205 MUTEX_INHERITDEBUG(old, mtx->mtx_owner);
206 MUTEX_INHERITDEBUG(new, old);
207 rv = MUTEX_CAS(&mtx->mtx_owner, old, new);
208 MUTEX_RECEIVE(mtx);
209 return rv;
210 }
211
212 static inline int
213 MUTEX_SET_WAITERS(kmutex_t *mtx, uintptr_t owner)
214 {
215 int rv;
216 rv = MUTEX_CAS(&mtx->mtx_owner, owner, owner | MUTEX_BIT_WAITERS);
217 MUTEX_RECEIVE(mtx);
218 return rv;
219 }
220
221 static inline void
222 MUTEX_RELEASE(kmutex_t *mtx)
223 {
224 uintptr_t new;
225
226 MUTEX_GIVE(mtx);
227 new = 0;
228 MUTEX_INHERITDEBUG(new, mtx->mtx_owner);
229 mtx->mtx_owner = new;
230 }
231
232 static inline void
233 MUTEX_CLEAR_WAITERS(kmutex_t *mtx)
234 {
235 /* nothing */
236 }
237 #endif /* __HAVE_SIMPLE_MUTEXES */
238
239 /*
240 * Patch in stubs via strong alias where they are not available.
241 */
242
243 #if defined(LOCKDEBUG)
244 #undef __HAVE_MUTEX_STUBS
245 #undef __HAVE_SPIN_MUTEX_STUBS
246 #endif
247
248 #ifndef __HAVE_MUTEX_STUBS
249 __strong_alias(mutex_enter,mutex_vector_enter);
250 __strong_alias(mutex_exit,mutex_vector_exit);
251 #endif
252
253 #ifndef __HAVE_SPIN_MUTEX_STUBS
254 __strong_alias(mutex_spin_enter,mutex_vector_enter);
255 __strong_alias(mutex_spin_exit,mutex_vector_exit);
256 #endif
257
258 void mutex_abort(kmutex_t *, const char *, const char *);
259 void mutex_dump(volatile void *);
260 int mutex_onproc(uintptr_t, struct cpu_info **);
261
262 lockops_t mutex_spin_lockops = {
263 "Mutex",
264 0,
265 mutex_dump
266 };
267
268 lockops_t mutex_adaptive_lockops = {
269 "Mutex",
270 1,
271 mutex_dump
272 };
273
274 syncobj_t mutex_syncobj = {
275 SOBJ_SLEEPQ_SORTED,
276 turnstile_unsleep,
277 turnstile_changepri,
278 sleepq_lendpri,
279 (void *)mutex_owner,
280 };
281
282 /* Mutex cache */
283 #define MUTEX_OBJ_MAGIC 0x5aa3c85d
284 struct kmutexobj {
285 kmutex_t mo_lock;
286 u_int mo_magic;
287 u_int mo_refcnt;
288 };
289
290 static int mutex_obj_ctor(void *, void *, int);
291
292 static pool_cache_t mutex_obj_cache;
293
294 /*
295 * mutex_dump:
296 *
297 * Dump the contents of a mutex structure.
298 */
299 void
300 mutex_dump(volatile void *cookie)
301 {
302 volatile kmutex_t *mtx = cookie;
303
304 printf_nolog("owner field : %#018lx wait/spin: %16d/%d\n",
305 (long)MUTEX_OWNER(mtx->mtx_owner), MUTEX_HAS_WAITERS(mtx),
306 MUTEX_SPIN_P(mtx));
307 }
308
309 /*
310 * mutex_abort:
311 *
312 * Dump information about an error and panic the system. This
313 * generates a lot of machine code in the DIAGNOSTIC case, so
314 * we ask the compiler to not inline it.
315 */
316
317 #if __GNUC_PREREQ__(3, 0)
318 __attribute ((noinline)) __attribute ((noreturn))
319 #endif
320 void
321 mutex_abort(kmutex_t *mtx, const char *func, const char *msg)
322 {
323
324 LOCKDEBUG_ABORT(mtx, (MUTEX_SPIN_P(mtx) ?
325 &mutex_spin_lockops : &mutex_adaptive_lockops), func, msg);
326 /* NOTREACHED */
327 }
328
329 /*
330 * mutex_init:
331 *
332 * Initialize a mutex for use. Note that adaptive mutexes are in
333 * essence spin mutexes that can sleep to avoid deadlock and wasting
334 * CPU time. We can't easily provide a type of mutex that always
335 * sleeps - see comments in mutex_vector_enter() about releasing
336 * mutexes unlocked.
337 */
338 void
339 mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl)
340 {
341 bool dodebug;
342
343 memset(mtx, 0, sizeof(*mtx));
344
345 switch (type) {
346 case MUTEX_ADAPTIVE:
347 KASSERT(ipl == IPL_NONE);
348 break;
349 case MUTEX_DEFAULT:
350 case MUTEX_DRIVER:
351 if (ipl == IPL_NONE || ipl == IPL_SOFTCLOCK ||
352 ipl == IPL_SOFTBIO || ipl == IPL_SOFTNET ||
353 ipl == IPL_SOFTSERIAL) {
354 type = MUTEX_ADAPTIVE;
355 } else {
356 type = MUTEX_SPIN;
357 }
358 break;
359 default:
360 break;
361 }
362
363 switch (type) {
364 case MUTEX_NODEBUG:
365 dodebug = LOCKDEBUG_ALLOC(mtx, NULL,
366 (uintptr_t)__builtin_return_address(0));
367 MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl);
368 break;
369 case MUTEX_ADAPTIVE:
370 dodebug = LOCKDEBUG_ALLOC(mtx, &mutex_adaptive_lockops,
371 (uintptr_t)__builtin_return_address(0));
372 MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug);
373 break;
374 case MUTEX_SPIN:
375 dodebug = LOCKDEBUG_ALLOC(mtx, &mutex_spin_lockops,
376 (uintptr_t)__builtin_return_address(0));
377 MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl);
378 break;
379 default:
380 panic("mutex_init: impossible type");
381 break;
382 }
383 }
384
385 /*
386 * mutex_destroy:
387 *
388 * Tear down a mutex.
389 */
390 void
391 mutex_destroy(kmutex_t *mtx)
392 {
393
394 if (MUTEX_ADAPTIVE_P(mtx)) {
395 MUTEX_ASSERT(mtx, !MUTEX_OWNED(mtx->mtx_owner) &&
396 !MUTEX_HAS_WAITERS(mtx));
397 } else {
398 MUTEX_ASSERT(mtx, !__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock));
399 }
400
401 LOCKDEBUG_FREE(MUTEX_DEBUG_P(mtx), mtx);
402 MUTEX_DESTROY(mtx);
403 }
404
405 /*
406 * mutex_onproc:
407 *
408 * Return true if an adaptive mutex owner is running on a CPU in the
409 * system. If the target is waiting on the kernel big lock, then we
410 * must release it. This is necessary to avoid deadlock.
411 *
412 * Note that we can't use the mutex owner field as an LWP pointer. We
413 * don't have full control over the timing of our execution, and so the
414 * pointer could be completely invalid by the time we dereference it.
415 */
416 #ifdef MULTIPROCESSOR
417 int
418 mutex_onproc(uintptr_t owner, struct cpu_info **cip)
419 {
420 CPU_INFO_ITERATOR cii;
421 struct cpu_info *ci;
422 struct lwp *l;
423
424 if (!MUTEX_OWNED(owner))
425 return 0;
426 l = (struct lwp *)MUTEX_OWNER(owner);
427
428 /* See if the target is running on a CPU somewhere. */
429 if ((ci = *cip) != NULL && ci->ci_curlwp == l)
430 goto run;
431 for (CPU_INFO_FOREACH(cii, ci))
432 if (ci->ci_curlwp == l)
433 goto run;
434
435 /* No: it may be safe to block now. */
436 *cip = NULL;
437 return 0;
438
439 run:
440 /* Target is running; do we need to block? */
441 *cip = ci;
442 return ci->ci_biglock_wanted != l;
443 }
444 #endif /* MULTIPROCESSOR */
445
446 /*
447 * mutex_vector_enter:
448 *
449 * Support routine for mutex_enter() that must handles all cases. In
450 * the LOCKDEBUG case, mutex_enter() is always aliased here, even if
451 * fast-path stubs are available. If an mutex_spin_enter() stub is
452 * not available, then it is also aliased directly here.
453 */
454 void
455 mutex_vector_enter(kmutex_t *mtx)
456 {
457 uintptr_t owner, curthread;
458 turnstile_t *ts;
459 #ifdef MULTIPROCESSOR
460 struct cpu_info *ci = NULL;
461 u_int count;
462 #endif
463 LOCKSTAT_COUNTER(spincnt);
464 LOCKSTAT_COUNTER(slpcnt);
465 LOCKSTAT_TIMER(spintime);
466 LOCKSTAT_TIMER(slptime);
467 LOCKSTAT_FLAG(lsflag);
468
469 /*
470 * Handle spin mutexes.
471 */
472 if (MUTEX_SPIN_P(mtx)) {
473 #if defined(LOCKDEBUG) && defined(MULTIPROCESSOR)
474 u_int spins = 0;
475 #endif
476 MUTEX_SPIN_SPLRAISE(mtx);
477 MUTEX_WANTLOCK(mtx);
478 #ifdef FULL
479 if (__cpu_simple_lock_try(&mtx->mtx_lock)) {
480 MUTEX_LOCKED(mtx);
481 return;
482 }
483 #if !defined(MULTIPROCESSOR)
484 MUTEX_ABORT(mtx, "locking against myself");
485 #else /* !MULTIPROCESSOR */
486
487 LOCKSTAT_ENTER(lsflag);
488 LOCKSTAT_START_TIMER(lsflag, spintime);
489 count = SPINLOCK_BACKOFF_MIN;
490
491 /*
492 * Spin testing the lock word and do exponential backoff
493 * to reduce cache line ping-ponging between CPUs.
494 */
495 do {
496 if (panicstr != NULL)
497 break;
498 while (__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock)) {
499 SPINLOCK_BACKOFF(count);
500 #ifdef LOCKDEBUG
501 if (SPINLOCK_SPINOUT(spins))
502 MUTEX_ABORT(mtx, "spinout");
503 #endif /* LOCKDEBUG */
504 }
505 } while (!__cpu_simple_lock_try(&mtx->mtx_lock));
506
507 if (count != SPINLOCK_BACKOFF_MIN) {
508 LOCKSTAT_STOP_TIMER(lsflag, spintime);
509 LOCKSTAT_EVENT(lsflag, mtx,
510 LB_SPIN_MUTEX | LB_SPIN, 1, spintime);
511 }
512 LOCKSTAT_EXIT(lsflag);
513 #endif /* !MULTIPROCESSOR */
514 #endif /* FULL */
515 MUTEX_LOCKED(mtx);
516 return;
517 }
518
519 curthread = (uintptr_t)curlwp;
520
521 MUTEX_DASSERT(mtx, MUTEX_ADAPTIVE_P(mtx));
522 MUTEX_ASSERT(mtx, curthread != 0);
523 MUTEX_WANTLOCK(mtx);
524
525 if (panicstr == NULL) {
526 LOCKDEBUG_BARRIER(&kernel_lock, 1);
527 }
528
529 LOCKSTAT_ENTER(lsflag);
530
531 /*
532 * Adaptive mutex; spin trying to acquire the mutex. If we
533 * determine that the owner is not running on a processor,
534 * then we stop spinning, and sleep instead.
535 */
536 for (owner = mtx->mtx_owner;;) {
537 if (!MUTEX_OWNED(owner)) {
538 /*
539 * Mutex owner clear could mean two things:
540 *
541 * * The mutex has been released.
542 * * The owner field hasn't been set yet.
543 *
544 * Try to acquire it again. If that fails,
545 * we'll just loop again.
546 */
547 if (MUTEX_ACQUIRE(mtx, curthread))
548 break;
549 owner = mtx->mtx_owner;
550 continue;
551 }
552
553 if (panicstr != NULL)
554 return;
555 if (MUTEX_OWNER(owner) == curthread)
556 MUTEX_ABORT(mtx, "locking against myself");
557
558 #ifdef MULTIPROCESSOR
559 /*
560 * Check to see if the owner is running on a processor.
561 * If so, then we should just spin, as the owner will
562 * likely release the lock very soon.
563 */
564 if (mutex_onproc(owner, &ci)) {
565 LOCKSTAT_START_TIMER(lsflag, spintime);
566 count = SPINLOCK_BACKOFF_MIN;
567 for (;;) {
568 SPINLOCK_BACKOFF(count);
569 owner = mtx->mtx_owner;
570 if (!mutex_onproc(owner, &ci))
571 break;
572 }
573 LOCKSTAT_STOP_TIMER(lsflag, spintime);
574 LOCKSTAT_COUNT(spincnt, 1);
575 if (!MUTEX_OWNED(owner))
576 continue;
577 }
578 #endif
579
580 ts = turnstile_lookup(mtx);
581
582 /*
583 * Once we have the turnstile chain interlock, mark the
584 * mutex has having waiters. If that fails, spin again:
585 * chances are that the mutex has been released.
586 */
587 if (!MUTEX_SET_WAITERS(mtx, owner)) {
588 turnstile_exit(mtx);
589 owner = mtx->mtx_owner;
590 continue;
591 }
592
593 #ifdef MULTIPROCESSOR
594 /*
595 * mutex_exit() is permitted to release the mutex without
596 * any interlocking instructions, and the following can
597 * occur as a result:
598 *
599 * CPU 1: MUTEX_SET_WAITERS() CPU2: mutex_exit()
600 * ---------------------------- ----------------------------
601 * .. acquire cache line
602 * .. test for waiters
603 * acquire cache line <- lose cache line
604 * lock cache line ..
605 * verify mutex is held ..
606 * set waiters ..
607 * unlock cache line ..
608 * lose cache line -> acquire cache line
609 * .. clear lock word, waiters
610 * return success
611 *
612 * There is a another race that can occur: a third CPU could
613 * acquire the mutex as soon as it is released. Since
614 * adaptive mutexes are primarily spin mutexes, this is not
615 * something that we need to worry about too much. What we
616 * do need to ensure is that the waiters bit gets set.
617 *
618 * To allow the unlocked release, we need to make some
619 * assumptions here:
620 *
621 * o Release is the only non-atomic/unlocked operation
622 * that can be performed on the mutex. (It must still
623 * be atomic on the local CPU, e.g. in case interrupted
624 * or preempted).
625 *
626 * o At any given time, MUTEX_SET_WAITERS() can only ever
627 * be in progress on one CPU in the system - guaranteed
628 * by the turnstile chain lock.
629 *
630 * o No other operations other than MUTEX_SET_WAITERS()
631 * and release can modify a mutex with a non-zero
632 * owner field.
633 *
634 * o The result of a successful MUTEX_SET_WAITERS() call
635 * is an unbuffered write that is immediately visible
636 * to all other processors in the system.
637 *
638 * o If the holding LWP switches away, it posts a store
639 * fence before changing curlwp, ensuring that any
640 * overwrite of the mutex waiters flag by mutex_exit()
641 * completes before the modification of curlwp becomes
642 * visible to this CPU.
643 *
644 * o mi_switch() posts a store fence before setting curlwp
645 * and before resuming execution of an LWP.
646 *
647 * o _kernel_lock() posts a store fence before setting
648 * curcpu()->ci_biglock_wanted, and after clearing it.
649 * This ensures that any overwrite of the mutex waiters
650 * flag by mutex_exit() completes before the modification
651 * of ci_biglock_wanted becomes visible.
652 *
653 * We now post a read memory barrier (after setting the
654 * waiters field) and check the lock holder's status again.
655 * Some of the possible outcomes (not an exhaustive list):
656 *
657 * 1. The onproc check returns true: the holding LWP is
658 * running again. The lock may be released soon and
659 * we should spin. Importantly, we can't trust the
660 * value of the waiters flag.
661 *
662 * 2. The onproc check returns false: the holding LWP is
663 * not running. We now have the oppertunity to check
664 * if mutex_exit() has blatted the modifications made
665 * by MUTEX_SET_WAITERS().
666 *
667 * 3. The onproc check returns false: the holding LWP may
668 * or may not be running. It has context switched at
669 * some point during our check. Again, we have the
670 * chance to see if the waiters bit is still set or
671 * has been overwritten.
672 *
673 * 4. The onproc check returns false: the holding LWP is
674 * running on a CPU, but wants the big lock. It's OK
675 * to check the waiters field in this case.
676 *
677 * 5. The has-waiters check fails: the mutex has been
678 * released, the waiters flag cleared and another LWP
679 * now owns the mutex.
680 *
681 * 6. The has-waiters check fails: the mutex has been
682 * released.
683 *
684 * If the waiters bit is not set it's unsafe to go asleep,
685 * as we might never be awoken.
686 */
687 if ((membar_consumer(), mutex_onproc(owner, &ci)) ||
688 (membar_consumer(), !MUTEX_HAS_WAITERS(mtx))) {
689 turnstile_exit(mtx);
690 owner = mtx->mtx_owner;
691 continue;
692 }
693 #endif /* MULTIPROCESSOR */
694
695 LOCKSTAT_START_TIMER(lsflag, slptime);
696
697 turnstile_block(ts, TS_WRITER_Q, mtx, &mutex_syncobj);
698
699 LOCKSTAT_STOP_TIMER(lsflag, slptime);
700 LOCKSTAT_COUNT(slpcnt, 1);
701
702 owner = mtx->mtx_owner;
703 }
704
705 LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SLEEP1,
706 slpcnt, slptime);
707 LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SPIN,
708 spincnt, spintime);
709 LOCKSTAT_EXIT(lsflag);
710
711 MUTEX_DASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread);
712 MUTEX_LOCKED(mtx);
713 }
714
715 /*
716 * mutex_vector_exit:
717 *
718 * Support routine for mutex_exit() that handles all cases.
719 */
720 void
721 mutex_vector_exit(kmutex_t *mtx)
722 {
723 turnstile_t *ts;
724 uintptr_t curthread;
725
726 if (MUTEX_SPIN_P(mtx)) {
727 #ifdef FULL
728 if (__predict_false(!__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock))) {
729 if (panicstr != NULL)
730 return;
731 MUTEX_ABORT(mtx, "exiting unheld spin mutex");
732 }
733 MUTEX_UNLOCKED(mtx);
734 __cpu_simple_unlock(&mtx->mtx_lock);
735 #endif
736 MUTEX_SPIN_SPLRESTORE(mtx);
737 return;
738 }
739
740 if (__predict_false((uintptr_t)panicstr | cold)) {
741 MUTEX_UNLOCKED(mtx);
742 MUTEX_RELEASE(mtx);
743 return;
744 }
745
746 curthread = (uintptr_t)curlwp;
747 MUTEX_DASSERT(mtx, curthread != 0);
748 MUTEX_ASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread);
749 MUTEX_UNLOCKED(mtx);
750
751 #ifdef LOCKDEBUG
752 /*
753 * Avoid having to take the turnstile chain lock every time
754 * around. Raise the priority level to splhigh() in order
755 * to disable preemption and so make the following atomic.
756 */
757 {
758 int s = splhigh();
759 if (!MUTEX_HAS_WAITERS(mtx)) {
760 MUTEX_RELEASE(mtx);
761 splx(s);
762 return;
763 }
764 splx(s);
765 }
766 #endif
767
768 /*
769 * Get this lock's turnstile. This gets the interlock on
770 * the sleep queue. Once we have that, we can clear the
771 * lock. If there was no turnstile for the lock, there
772 * were no waiters remaining.
773 */
774 ts = turnstile_lookup(mtx);
775
776 if (ts == NULL) {
777 MUTEX_RELEASE(mtx);
778 turnstile_exit(mtx);
779 } else {
780 MUTEX_RELEASE(mtx);
781 turnstile_wakeup(ts, TS_WRITER_Q,
782 TS_WAITERS(ts, TS_WRITER_Q), NULL);
783 }
784 }
785
786 #ifndef __HAVE_SIMPLE_MUTEXES
787 /*
788 * mutex_wakeup:
789 *
790 * Support routine for mutex_exit() that wakes up all waiters.
791 * We assume that the mutex has been released, but it need not
792 * be.
793 */
794 void
795 mutex_wakeup(kmutex_t *mtx)
796 {
797 turnstile_t *ts;
798
799 ts = turnstile_lookup(mtx);
800 if (ts == NULL) {
801 turnstile_exit(mtx);
802 return;
803 }
804 MUTEX_CLEAR_WAITERS(mtx);
805 turnstile_wakeup(ts, TS_WRITER_Q, TS_WAITERS(ts, TS_WRITER_Q), NULL);
806 }
807 #endif /* !__HAVE_SIMPLE_MUTEXES */
808
809 /*
810 * mutex_owned:
811 *
812 * Return true if the current LWP (adaptive) or CPU (spin)
813 * holds the mutex.
814 */
815 int
816 mutex_owned(kmutex_t *mtx)
817 {
818
819 if (MUTEX_ADAPTIVE_P(mtx))
820 return MUTEX_OWNER(mtx->mtx_owner) == (uintptr_t)curlwp;
821 #ifdef FULL
822 return __SIMPLELOCK_LOCKED_P(&mtx->mtx_lock);
823 #else
824 return 1;
825 #endif
826 }
827
828 /*
829 * mutex_owner:
830 *
831 * Return the current owner of an adaptive mutex. Used for
832 * priority inheritance.
833 */
834 lwp_t *
835 mutex_owner(kmutex_t *mtx)
836 {
837
838 MUTEX_ASSERT(mtx, MUTEX_ADAPTIVE_P(mtx));
839 return (struct lwp *)MUTEX_OWNER(mtx->mtx_owner);
840 }
841
842 /*
843 * mutex_tryenter:
844 *
845 * Try to acquire the mutex; return non-zero if we did.
846 */
847 int
848 mutex_tryenter(kmutex_t *mtx)
849 {
850 uintptr_t curthread;
851
852 /*
853 * Handle spin mutexes.
854 */
855 if (MUTEX_SPIN_P(mtx)) {
856 MUTEX_SPIN_SPLRAISE(mtx);
857 #ifdef FULL
858 if (__cpu_simple_lock_try(&mtx->mtx_lock)) {
859 MUTEX_WANTLOCK(mtx);
860 MUTEX_LOCKED(mtx);
861 return 1;
862 }
863 MUTEX_SPIN_SPLRESTORE(mtx);
864 #else
865 MUTEX_WANTLOCK(mtx);
866 MUTEX_LOCKED(mtx);
867 return 1;
868 #endif
869 } else {
870 curthread = (uintptr_t)curlwp;
871 MUTEX_ASSERT(mtx, curthread != 0);
872 if (MUTEX_ACQUIRE(mtx, curthread)) {
873 MUTEX_WANTLOCK(mtx);
874 MUTEX_LOCKED(mtx);
875 MUTEX_DASSERT(mtx,
876 MUTEX_OWNER(mtx->mtx_owner) == curthread);
877 return 1;
878 }
879 }
880
881 return 0;
882 }
883
884 #if defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL)
885 /*
886 * mutex_spin_retry:
887 *
888 * Support routine for mutex_spin_enter(). Assumes that the caller
889 * has already raised the SPL, and adjusted counters.
890 */
891 void
892 mutex_spin_retry(kmutex_t *mtx)
893 {
894 #ifdef MULTIPROCESSOR
895 u_int count;
896 LOCKSTAT_TIMER(spintime);
897 LOCKSTAT_FLAG(lsflag);
898 #ifdef LOCKDEBUG
899 u_int spins = 0;
900 #endif /* LOCKDEBUG */
901
902 MUTEX_WANTLOCK(mtx);
903
904 LOCKSTAT_ENTER(lsflag);
905 LOCKSTAT_START_TIMER(lsflag, spintime);
906 count = SPINLOCK_BACKOFF_MIN;
907
908 /*
909 * Spin testing the lock word and do exponential backoff
910 * to reduce cache line ping-ponging between CPUs.
911 */
912 do {
913 if (panicstr != NULL)
914 break;
915 while (__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock)) {
916 SPINLOCK_BACKOFF(count);
917 #ifdef LOCKDEBUG
918 if (SPINLOCK_SPINOUT(spins))
919 MUTEX_ABORT(mtx, "spinout");
920 #endif /* LOCKDEBUG */
921 }
922 } while (!__cpu_simple_lock_try(&mtx->mtx_lock));
923
924 LOCKSTAT_STOP_TIMER(lsflag, spintime);
925 LOCKSTAT_EVENT(lsflag, mtx, LB_SPIN_MUTEX | LB_SPIN, 1, spintime);
926 LOCKSTAT_EXIT(lsflag);
927
928 MUTEX_LOCKED(mtx);
929 #else /* MULTIPROCESSOR */
930 MUTEX_ABORT(mtx, "locking against myself");
931 #endif /* MULTIPROCESSOR */
932 }
933 #endif /* defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL) */
934
935 /*
936 * mutex_obj_init:
937 *
938 * Initialize the mutex object store.
939 */
940 void
941 mutex_obj_init(void)
942 {
943
944 mutex_obj_cache = pool_cache_init(sizeof(struct kmutexobj),
945 coherency_unit, 0, 0, "mutex", NULL, IPL_NONE, mutex_obj_ctor,
946 NULL, NULL);
947 }
948
949 /*
950 * mutex_obj_ctor:
951 *
952 * Initialize a new lock for the cache.
953 */
954 static int
955 mutex_obj_ctor(void *arg, void *obj, int flags)
956 {
957 struct kmutexobj * mo = obj;
958
959 mo->mo_magic = MUTEX_OBJ_MAGIC;
960
961 return 0;
962 }
963
964 /*
965 * mutex_obj_alloc:
966 *
967 * Allocate a single lock object.
968 */
969 kmutex_t *
970 mutex_obj_alloc(kmutex_type_t type, int ipl)
971 {
972 struct kmutexobj *mo;
973
974 mo = pool_cache_get(mutex_obj_cache, PR_WAITOK);
975 mutex_init(&mo->mo_lock, type, ipl);
976 mo->mo_refcnt = 1;
977
978 return (kmutex_t *)mo;
979 }
980
981 /*
982 * mutex_obj_hold:
983 *
984 * Add a single reference to a lock object. A reference to the object
985 * must already be held, and must be held across this call.
986 */
987 void
988 mutex_obj_hold(kmutex_t *lock)
989 {
990 struct kmutexobj *mo = (struct kmutexobj *)lock;
991
992 KASSERT(mo->mo_magic == MUTEX_OBJ_MAGIC);
993 KASSERT(mo->mo_refcnt > 0);
994
995 atomic_inc_uint(&mo->mo_refcnt);
996 }
997
998 /*
999 * mutex_obj_free:
1000 *
1001 * Drop a reference from a lock object. If the last reference is being
1002 * dropped, free the object and return true. Otherwise, return false.
1003 */
1004 bool
1005 mutex_obj_free(kmutex_t *lock)
1006 {
1007 struct kmutexobj *mo = (struct kmutexobj *)lock;
1008
1009 KASSERT(mo->mo_magic == MUTEX_OBJ_MAGIC);
1010 KASSERT(mo->mo_refcnt > 0);
1011
1012 if (atomic_dec_uint_nv(&mo->mo_refcnt) > 0) {
1013 return false;
1014 }
1015 mutex_destroy(&mo->mo_lock);
1016 pool_cache_put(mutex_obj_cache, mo);
1017 return true;
1018 }
1019