kern_mutex.c revision 1.33 1 /* $NetBSD: kern_mutex.c,v 1.33 2008/03/28 22:19:39 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Kernel mutex implementation, modeled after those found in Solaris,
41 * a description of which can be found in:
42 *
43 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
44 * Richard McDougall.
45 */
46
47 #define __MUTEX_PRIVATE
48
49 #include <sys/cdefs.h>
50 __KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.33 2008/03/28 22:19:39 ad Exp $");
51
52 #include "opt_multiprocessor.h"
53
54 #include <sys/param.h>
55 #include <sys/proc.h>
56 #include <sys/mutex.h>
57 #include <sys/sched.h>
58 #include <sys/sleepq.h>
59 #include <sys/systm.h>
60 #include <sys/lockdebug.h>
61 #include <sys/kernel.h>
62 #include <sys/atomic.h>
63 #include <sys/intr.h>
64 #include <sys/lock.h>
65 #include <sys/pool.h>
66
67 #include <dev/lockstat.h>
68
69 #include <machine/lock.h>
70
71 /*
72 * When not running a debug kernel, spin mutexes are not much
73 * more than an splraiseipl() and splx() pair.
74 */
75
76 #if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
77 #define FULL
78 #endif
79
80 /*
81 * Debugging support.
82 */
83
84 #define MUTEX_WANTLOCK(mtx) \
85 LOCKDEBUG_WANTLOCK(MUTEX_DEBUG_P(mtx), (mtx), \
86 (uintptr_t)__builtin_return_address(0), 0)
87 #define MUTEX_LOCKED(mtx) \
88 LOCKDEBUG_LOCKED(MUTEX_DEBUG_P(mtx), (mtx), \
89 (uintptr_t)__builtin_return_address(0), 0)
90 #define MUTEX_UNLOCKED(mtx) \
91 LOCKDEBUG_UNLOCKED(MUTEX_DEBUG_P(mtx), (mtx), \
92 (uintptr_t)__builtin_return_address(0), 0)
93 #define MUTEX_ABORT(mtx, msg) \
94 mutex_abort(mtx, __func__, msg)
95
96 #if defined(LOCKDEBUG)
97
98 #define MUTEX_DASSERT(mtx, cond) \
99 do { \
100 if (!(cond)) \
101 MUTEX_ABORT(mtx, "assertion failed: " #cond); \
102 } while (/* CONSTCOND */ 0);
103
104 #else /* LOCKDEBUG */
105
106 #define MUTEX_DASSERT(mtx, cond) /* nothing */
107
108 #endif /* LOCKDEBUG */
109
110 #if defined(DIAGNOSTIC)
111
112 #define MUTEX_ASSERT(mtx, cond) \
113 do { \
114 if (!(cond)) \
115 MUTEX_ABORT(mtx, "assertion failed: " #cond); \
116 } while (/* CONSTCOND */ 0)
117
118 #else /* DIAGNOSTIC */
119
120 #define MUTEX_ASSERT(mtx, cond) /* nothing */
121
122 #endif /* DIAGNOSTIC */
123
124 /*
125 * Spin mutex SPL save / restore.
126 */
127 #ifndef MUTEX_COUNT_BIAS
128 #define MUTEX_COUNT_BIAS 0
129 #endif
130
131 #define MUTEX_SPIN_SPLRAISE(mtx) \
132 do { \
133 struct cpu_info *x__ci = curcpu(); \
134 int x__cnt, s; \
135 x__cnt = x__ci->ci_mtx_count--; \
136 s = splraiseipl(mtx->mtx_ipl); \
137 if (x__cnt == MUTEX_COUNT_BIAS) \
138 x__ci->ci_mtx_oldspl = (s); \
139 } while (/* CONSTCOND */ 0)
140
141 #define MUTEX_SPIN_SPLRESTORE(mtx) \
142 do { \
143 struct cpu_info *x__ci = curcpu(); \
144 int s = x__ci->ci_mtx_oldspl; \
145 __insn_barrier(); \
146 if (++(x__ci->ci_mtx_count) == MUTEX_COUNT_BIAS) \
147 splx(s); \
148 } while (/* CONSTCOND */ 0)
149
150 /*
151 * For architectures that provide 'simple' mutexes: they provide a
152 * CAS function that is either MP-safe, or does not need to be MP
153 * safe. Adaptive mutexes on these architectures do not require an
154 * additional interlock.
155 */
156
157 #ifdef __HAVE_SIMPLE_MUTEXES
158
159 #define MUTEX_OWNER(owner) \
160 (owner & MUTEX_THREAD)
161 #define MUTEX_HAS_WAITERS(mtx) \
162 (((int)(mtx)->mtx_owner & MUTEX_BIT_WAITERS) != 0)
163
164 #define MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug) \
165 do { \
166 if (dodebug) \
167 (mtx)->mtx_owner |= MUTEX_BIT_DEBUG; \
168 } while (/* CONSTCOND */ 0);
169
170 #define MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl) \
171 do { \
172 (mtx)->mtx_owner = MUTEX_BIT_SPIN; \
173 if (dodebug) \
174 (mtx)->mtx_owner |= MUTEX_BIT_DEBUG; \
175 (mtx)->mtx_ipl = makeiplcookie((ipl)); \
176 __cpu_simple_lock_init(&(mtx)->mtx_lock); \
177 } while (/* CONSTCOND */ 0)
178
179 #define MUTEX_DESTROY(mtx) \
180 do { \
181 (mtx)->mtx_owner = MUTEX_THREAD; \
182 } while (/* CONSTCOND */ 0);
183
184 #define MUTEX_SPIN_P(mtx) \
185 (((mtx)->mtx_owner & MUTEX_BIT_SPIN) != 0)
186 #define MUTEX_ADAPTIVE_P(mtx) \
187 (((mtx)->mtx_owner & MUTEX_BIT_SPIN) == 0)
188
189 #define MUTEX_DEBUG_P(mtx) (((mtx)->mtx_owner & MUTEX_BIT_DEBUG) != 0)
190 #if defined(LOCKDEBUG)
191 #define MUTEX_OWNED(owner) (((owner) & ~MUTEX_BIT_DEBUG) != 0)
192 #define MUTEX_INHERITDEBUG(new, old) (new) |= (old) & MUTEX_BIT_DEBUG
193 #else /* defined(LOCKDEBUG) */
194 #define MUTEX_OWNED(owner) ((owner) != 0)
195 #define MUTEX_INHERITDEBUG(new, old) /* nothing */
196 #endif /* defined(LOCKDEBUG) */
197
198 static inline int
199 MUTEX_ACQUIRE(kmutex_t *mtx, uintptr_t curthread)
200 {
201 int rv;
202 uintptr_t old = 0;
203 uintptr_t new = curthread;
204
205 MUTEX_INHERITDEBUG(old, mtx->mtx_owner);
206 MUTEX_INHERITDEBUG(new, old);
207 rv = MUTEX_CAS(&mtx->mtx_owner, old, new);
208 MUTEX_RECEIVE(mtx);
209 return rv;
210 }
211
212 static inline int
213 MUTEX_SET_WAITERS(kmutex_t *mtx, uintptr_t owner)
214 {
215 int rv;
216 rv = MUTEX_CAS(&mtx->mtx_owner, owner, owner | MUTEX_BIT_WAITERS);
217 MUTEX_RECEIVE(mtx);
218 return rv;
219 }
220
221 static inline void
222 MUTEX_RELEASE(kmutex_t *mtx)
223 {
224 uintptr_t new;
225
226 MUTEX_GIVE(mtx);
227 new = 0;
228 MUTEX_INHERITDEBUG(new, mtx->mtx_owner);
229 mtx->mtx_owner = new;
230 }
231
232 static inline void
233 MUTEX_CLEAR_WAITERS(kmutex_t *mtx)
234 {
235 /* nothing */
236 }
237 #endif /* __HAVE_SIMPLE_MUTEXES */
238
239 /*
240 * Patch in stubs via strong alias where they are not available.
241 */
242
243 #if defined(LOCKDEBUG)
244 #undef __HAVE_MUTEX_STUBS
245 #undef __HAVE_SPIN_MUTEX_STUBS
246 #endif
247
248 #ifndef __HAVE_MUTEX_STUBS
249 __strong_alias(mutex_enter,mutex_vector_enter);
250 __strong_alias(mutex_exit,mutex_vector_exit);
251 #endif
252
253 #ifndef __HAVE_SPIN_MUTEX_STUBS
254 __strong_alias(mutex_spin_enter,mutex_vector_enter);
255 __strong_alias(mutex_spin_exit,mutex_vector_exit);
256 #endif
257
258 void mutex_abort(kmutex_t *, const char *, const char *);
259 void mutex_dump(volatile void *);
260 int mutex_onproc(uintptr_t, struct cpu_info **);
261
262 lockops_t mutex_spin_lockops = {
263 "Mutex",
264 0,
265 mutex_dump
266 };
267
268 lockops_t mutex_adaptive_lockops = {
269 "Mutex",
270 1,
271 mutex_dump
272 };
273
274 syncobj_t mutex_syncobj = {
275 SOBJ_SLEEPQ_SORTED,
276 turnstile_unsleep,
277 turnstile_changepri,
278 sleepq_lendpri,
279 (void *)mutex_owner,
280 };
281
282 /* Mutex cache */
283 #define MUTEX_OBJ_MAGIC 0x5aa3c85d
284 struct kmutexobj {
285 kmutex_t mo_lock;
286 u_int mo_magic;
287 u_int mo_refcnt;
288 };
289
290 static int mutex_obj_ctor(void *, void *, int);
291
292 static pool_cache_t mutex_obj_cache;
293
294 /*
295 * mutex_dump:
296 *
297 * Dump the contents of a mutex structure.
298 */
299 void
300 mutex_dump(volatile void *cookie)
301 {
302 volatile kmutex_t *mtx = cookie;
303
304 printf_nolog("owner field : %#018lx wait/spin: %16d/%d\n",
305 (long)MUTEX_OWNER(mtx->mtx_owner), MUTEX_HAS_WAITERS(mtx),
306 MUTEX_SPIN_P(mtx));
307 }
308
309 /*
310 * mutex_abort:
311 *
312 * Dump information about an error and panic the system. This
313 * generates a lot of machine code in the DIAGNOSTIC case, so
314 * we ask the compiler to not inline it.
315 */
316
317 #if __GNUC_PREREQ__(3, 0)
318 __attribute ((noinline)) __attribute ((noreturn))
319 #endif
320 void
321 mutex_abort(kmutex_t *mtx, const char *func, const char *msg)
322 {
323
324 LOCKDEBUG_ABORT(mtx, (MUTEX_SPIN_P(mtx) ?
325 &mutex_spin_lockops : &mutex_adaptive_lockops), func, msg);
326 /* NOTREACHED */
327 }
328
329 /*
330 * mutex_init:
331 *
332 * Initialize a mutex for use. Note that adaptive mutexes are in
333 * essence spin mutexes that can sleep to avoid deadlock and wasting
334 * CPU time. We can't easily provide a type of mutex that always
335 * sleeps - see comments in mutex_vector_enter() about releasing
336 * mutexes unlocked.
337 */
338 void
339 mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl)
340 {
341 bool dodebug;
342
343 memset(mtx, 0, sizeof(*mtx));
344
345 switch (type) {
346 case MUTEX_ADAPTIVE:
347 KASSERT(ipl == IPL_NONE);
348 break;
349 case MUTEX_DEFAULT:
350 case MUTEX_DRIVER:
351 if (ipl == IPL_NONE || ipl == IPL_SOFTCLOCK ||
352 ipl == IPL_SOFTBIO || ipl == IPL_SOFTNET ||
353 ipl == IPL_SOFTSERIAL) {
354 type = MUTEX_ADAPTIVE;
355 } else {
356 type = MUTEX_SPIN;
357 }
358 break;
359 default:
360 break;
361 }
362
363 switch (type) {
364 case MUTEX_NODEBUG:
365 dodebug = LOCKDEBUG_ALLOC(mtx, NULL,
366 (uintptr_t)__builtin_return_address(0));
367 MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl);
368 break;
369 case MUTEX_ADAPTIVE:
370 dodebug = LOCKDEBUG_ALLOC(mtx, &mutex_adaptive_lockops,
371 (uintptr_t)__builtin_return_address(0));
372 MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug);
373 break;
374 case MUTEX_SPIN:
375 dodebug = LOCKDEBUG_ALLOC(mtx, &mutex_spin_lockops,
376 (uintptr_t)__builtin_return_address(0));
377 MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl);
378 break;
379 default:
380 panic("mutex_init: impossible type");
381 break;
382 }
383 }
384
385 /*
386 * mutex_destroy:
387 *
388 * Tear down a mutex.
389 */
390 void
391 mutex_destroy(kmutex_t *mtx)
392 {
393
394 if (MUTEX_ADAPTIVE_P(mtx)) {
395 MUTEX_ASSERT(mtx, !MUTEX_OWNED(mtx->mtx_owner) &&
396 !MUTEX_HAS_WAITERS(mtx));
397 } else {
398 MUTEX_ASSERT(mtx, !__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock));
399 }
400
401 LOCKDEBUG_FREE(MUTEX_DEBUG_P(mtx), mtx);
402 MUTEX_DESTROY(mtx);
403 }
404
405 /*
406 * mutex_onproc:
407 *
408 * Return true if an adaptive mutex owner is running on a CPU in the
409 * system. If the target is waiting on the kernel big lock, then we
410 * must release it. This is necessary to avoid deadlock.
411 *
412 * Note that we can't use the mutex owner field as an LWP pointer. We
413 * don't have full control over the timing of our execution, and so the
414 * pointer could be completely invalid by the time we dereference it.
415 */
416 #ifdef MULTIPROCESSOR
417 int
418 mutex_onproc(uintptr_t owner, struct cpu_info **cip)
419 {
420 CPU_INFO_ITERATOR cii;
421 struct cpu_info *ci;
422 struct lwp *l;
423
424 if (!MUTEX_OWNED(owner))
425 return 0;
426 l = (struct lwp *)MUTEX_OWNER(owner);
427
428 /* See if the target is running on a CPU somewhere. */
429 if ((ci = *cip) != NULL && ci->ci_curlwp == l)
430 goto run;
431 for (CPU_INFO_FOREACH(cii, ci))
432 if (ci->ci_curlwp == l)
433 goto run;
434
435 /* No: it may be safe to block now. */
436 *cip = NULL;
437 return 0;
438
439 run:
440 /* Target is running; do we need to block? */
441 *cip = ci;
442 return ci->ci_biglock_wanted != l;
443 }
444 #endif /* MULTIPROCESSOR */
445
446 /*
447 * mutex_vector_enter:
448 *
449 * Support routine for mutex_enter() that must handles all cases. In
450 * the LOCKDEBUG case, mutex_enter() is always aliased here, even if
451 * fast-path stubs are available. If an mutex_spin_enter() stub is
452 * not available, then it is also aliased directly here.
453 */
454 void
455 mutex_vector_enter(kmutex_t *mtx)
456 {
457 uintptr_t owner, curthread;
458 turnstile_t *ts;
459 #ifdef MULTIPROCESSOR
460 struct cpu_info *ci = NULL;
461 u_int count;
462 #endif
463 LOCKSTAT_COUNTER(spincnt);
464 LOCKSTAT_COUNTER(slpcnt);
465 LOCKSTAT_TIMER(spintime);
466 LOCKSTAT_TIMER(slptime);
467 LOCKSTAT_FLAG(lsflag);
468
469 /*
470 * Handle spin mutexes.
471 */
472 if (MUTEX_SPIN_P(mtx)) {
473 #if defined(LOCKDEBUG) && defined(MULTIPROCESSOR)
474 u_int spins = 0;
475 #endif
476 MUTEX_SPIN_SPLRAISE(mtx);
477 MUTEX_WANTLOCK(mtx);
478 #ifdef FULL
479 if (__cpu_simple_lock_try(&mtx->mtx_lock)) {
480 MUTEX_LOCKED(mtx);
481 return;
482 }
483 #if !defined(MULTIPROCESSOR)
484 MUTEX_ABORT(mtx, "locking against myself");
485 #else /* !MULTIPROCESSOR */
486
487 LOCKSTAT_ENTER(lsflag);
488 LOCKSTAT_START_TIMER(lsflag, spintime);
489 count = SPINLOCK_BACKOFF_MIN;
490
491 /*
492 * Spin testing the lock word and do exponential backoff
493 * to reduce cache line ping-ponging between CPUs.
494 */
495 do {
496 if (panicstr != NULL)
497 break;
498 while (__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock)) {
499 SPINLOCK_BACKOFF(count);
500 #ifdef LOCKDEBUG
501 if (SPINLOCK_SPINOUT(spins))
502 MUTEX_ABORT(mtx, "spinout");
503 #endif /* LOCKDEBUG */
504 }
505 } while (!__cpu_simple_lock_try(&mtx->mtx_lock));
506
507 if (count != SPINLOCK_BACKOFF_MIN) {
508 LOCKSTAT_STOP_TIMER(lsflag, spintime);
509 LOCKSTAT_EVENT(lsflag, mtx,
510 LB_SPIN_MUTEX | LB_SPIN, 1, spintime);
511 }
512 LOCKSTAT_EXIT(lsflag);
513 #endif /* !MULTIPROCESSOR */
514 #endif /* FULL */
515 MUTEX_LOCKED(mtx);
516 return;
517 }
518
519 curthread = (uintptr_t)curlwp;
520
521 MUTEX_DASSERT(mtx, MUTEX_ADAPTIVE_P(mtx));
522 MUTEX_ASSERT(mtx, curthread != 0);
523 MUTEX_WANTLOCK(mtx);
524
525 if (panicstr == NULL) {
526 LOCKDEBUG_BARRIER(&kernel_lock, 1);
527 }
528
529 LOCKSTAT_ENTER(lsflag);
530
531 /*
532 * Adaptive mutex; spin trying to acquire the mutex. If we
533 * determine that the owner is not running on a processor,
534 * then we stop spinning, and sleep instead.
535 */
536 for (;;) {
537 owner = mtx->mtx_owner;
538 if (!MUTEX_OWNED(owner)) {
539 /*
540 * Mutex owner clear could mean two things:
541 *
542 * * The mutex has been released.
543 * * The owner field hasn't been set yet.
544 *
545 * Try to acquire it again. If that fails,
546 * we'll just loop again.
547 */
548 if (MUTEX_ACQUIRE(mtx, curthread))
549 break;
550 continue;
551 }
552
553 if (panicstr != NULL)
554 return;
555 if (MUTEX_OWNER(owner) == curthread)
556 MUTEX_ABORT(mtx, "locking against myself");
557
558 #ifdef MULTIPROCESSOR
559 /*
560 * Check to see if the owner is running on a processor.
561 * If so, then we should just spin, as the owner will
562 * likely release the lock very soon.
563 */
564 if (mutex_onproc(owner, &ci)) {
565 LOCKSTAT_START_TIMER(lsflag, spintime);
566 count = SPINLOCK_BACKOFF_MIN;
567 for (;;) {
568 owner = mtx->mtx_owner;
569 if (!mutex_onproc(owner, &ci))
570 break;
571 SPINLOCK_BACKOFF(count);
572 }
573 LOCKSTAT_STOP_TIMER(lsflag, spintime);
574 LOCKSTAT_COUNT(spincnt, 1);
575 if (!MUTEX_OWNED(owner))
576 continue;
577 }
578 #endif
579
580 ts = turnstile_lookup(mtx);
581
582 /*
583 * Once we have the turnstile chain interlock, mark the
584 * mutex has having waiters. If that fails, spin again:
585 * chances are that the mutex has been released.
586 */
587 if (!MUTEX_SET_WAITERS(mtx, owner)) {
588 turnstile_exit(mtx);
589 continue;
590 }
591
592 #ifdef MULTIPROCESSOR
593 /*
594 * mutex_exit() is permitted to release the mutex without
595 * any interlocking instructions, and the following can
596 * occur as a result:
597 *
598 * CPU 1: MUTEX_SET_WAITERS() CPU2: mutex_exit()
599 * ---------------------------- ----------------------------
600 * .. acquire cache line
601 * .. test for waiters
602 * acquire cache line <- lose cache line
603 * lock cache line ..
604 * verify mutex is held ..
605 * set waiters ..
606 * unlock cache line ..
607 * lose cache line -> acquire cache line
608 * .. clear lock word, waiters
609 * return success
610 *
611 * There is a another race that can occur: a third CPU could
612 * acquire the mutex as soon as it is released. Since
613 * adaptive mutexes are primarily spin mutexes, this is not
614 * something that we need to worry about too much. What we
615 * do need to ensure is that the waiters bit gets set.
616 *
617 * To allow the unlocked release, we need to make some
618 * assumptions here:
619 *
620 * o Release is the only non-atomic/unlocked operation
621 * that can be performed on the mutex. (It must still
622 * be atomic on the local CPU, e.g. in case interrupted
623 * or preempted).
624 *
625 * o At any given time, MUTEX_SET_WAITERS() can only ever
626 * be in progress on one CPU in the system - guaranteed
627 * by the turnstile chain lock.
628 *
629 * o No other operations other than MUTEX_SET_WAITERS()
630 * and release can modify a mutex with a non-zero
631 * owner field.
632 *
633 * o The result of a successful MUTEX_SET_WAITERS() call
634 * is an unbuffered write that is immediately visible
635 * to all other processors in the system.
636 *
637 * o If the holding LWP switches away, it posts a store
638 * fence before changing curlwp, ensuring that any
639 * overwrite of the mutex waiters flag by mutex_exit()
640 * completes before the modification of curlwp becomes
641 * visible to this CPU.
642 *
643 * o mi_switch() posts a store fence before setting curlwp
644 * and before resuming execution of an LWP.
645 *
646 * o _kernel_lock() posts a store fence before setting
647 * curcpu()->ci_biglock_wanted, and after clearing it.
648 * This ensures that any overwrite of the mutex waiters
649 * flag by mutex_exit() completes before the modification
650 * of ci_biglock_wanted becomes visible.
651 *
652 * We now post a read memory barrier (after setting the
653 * waiters field) and check the lock holder's status again.
654 * Some of the possible outcomes (not an exhaustive list):
655 *
656 * 1. The onproc check returns true: the holding LWP is
657 * running again. The lock may be released soon and
658 * we should spin. Importantly, we can't trust the
659 * value of the waiters flag.
660 *
661 * 2. The onproc check returns false: the holding LWP is
662 * not running. We now have the oppertunity to check
663 * if mutex_exit() has blatted the modifications made
664 * by MUTEX_SET_WAITERS().
665 *
666 * 3. The onproc check returns false: the holding LWP may
667 * or may not be running. It has context switched at
668 * some point during our check. Again, we have the
669 * chance to see if the waiters bit is still set or
670 * has been overwritten.
671 *
672 * 4. The onproc check returns false: the holding LWP is
673 * running on a CPU, but wants the big lock. It's OK
674 * to check the waiters field in this case.
675 *
676 * 5. The has-waiters check fails: the mutex has been
677 * released, the waiters flag cleared and another LWP
678 * now owns the mutex.
679 *
680 * 6. The has-waiters check fails: the mutex has been
681 * released.
682 *
683 * If the waiters bit is not set it's unsafe to go asleep,
684 * as we might never be awoken.
685 */
686 if ((membar_consumer(), mutex_onproc(owner, &ci)) ||
687 (membar_consumer(), !MUTEX_HAS_WAITERS(mtx))) {
688 turnstile_exit(mtx);
689 continue;
690 }
691 #endif /* MULTIPROCESSOR */
692
693 LOCKSTAT_START_TIMER(lsflag, slptime);
694
695 turnstile_block(ts, TS_WRITER_Q, mtx, &mutex_syncobj);
696
697 LOCKSTAT_STOP_TIMER(lsflag, slptime);
698 LOCKSTAT_COUNT(slpcnt, 1);
699 }
700
701 LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SLEEP1,
702 slpcnt, slptime);
703 LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SPIN,
704 spincnt, spintime);
705 LOCKSTAT_EXIT(lsflag);
706
707 MUTEX_DASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread);
708 MUTEX_LOCKED(mtx);
709 }
710
711 /*
712 * mutex_vector_exit:
713 *
714 * Support routine for mutex_exit() that handles all cases.
715 */
716 void
717 mutex_vector_exit(kmutex_t *mtx)
718 {
719 turnstile_t *ts;
720 uintptr_t curthread;
721
722 if (MUTEX_SPIN_P(mtx)) {
723 #ifdef FULL
724 if (__predict_false(!__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock))) {
725 if (panicstr != NULL)
726 return;
727 MUTEX_ABORT(mtx, "exiting unheld spin mutex");
728 }
729 MUTEX_UNLOCKED(mtx);
730 __cpu_simple_unlock(&mtx->mtx_lock);
731 #endif
732 MUTEX_SPIN_SPLRESTORE(mtx);
733 return;
734 }
735
736 if (__predict_false((uintptr_t)panicstr | cold)) {
737 MUTEX_UNLOCKED(mtx);
738 MUTEX_RELEASE(mtx);
739 return;
740 }
741
742 curthread = (uintptr_t)curlwp;
743 MUTEX_DASSERT(mtx, curthread != 0);
744 MUTEX_ASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread);
745 MUTEX_UNLOCKED(mtx);
746
747 #ifdef LOCKDEBUG
748 /*
749 * Avoid having to take the turnstile chain lock every time
750 * around. Raise the priority level to splhigh() in order
751 * to disable preemption and so make the following atomic.
752 */
753 {
754 int s = splhigh();
755 if (!MUTEX_HAS_WAITERS(mtx)) {
756 MUTEX_RELEASE(mtx);
757 splx(s);
758 return;
759 }
760 splx(s);
761 }
762 #endif
763
764 /*
765 * Get this lock's turnstile. This gets the interlock on
766 * the sleep queue. Once we have that, we can clear the
767 * lock. If there was no turnstile for the lock, there
768 * were no waiters remaining.
769 */
770 ts = turnstile_lookup(mtx);
771
772 if (ts == NULL) {
773 MUTEX_RELEASE(mtx);
774 turnstile_exit(mtx);
775 } else {
776 MUTEX_RELEASE(mtx);
777 turnstile_wakeup(ts, TS_WRITER_Q,
778 TS_WAITERS(ts, TS_WRITER_Q), NULL);
779 }
780 }
781
782 #ifndef __HAVE_SIMPLE_MUTEXES
783 /*
784 * mutex_wakeup:
785 *
786 * Support routine for mutex_exit() that wakes up all waiters.
787 * We assume that the mutex has been released, but it need not
788 * be.
789 */
790 void
791 mutex_wakeup(kmutex_t *mtx)
792 {
793 turnstile_t *ts;
794
795 ts = turnstile_lookup(mtx);
796 if (ts == NULL) {
797 turnstile_exit(mtx);
798 return;
799 }
800 MUTEX_CLEAR_WAITERS(mtx);
801 turnstile_wakeup(ts, TS_WRITER_Q, TS_WAITERS(ts, TS_WRITER_Q), NULL);
802 }
803 #endif /* !__HAVE_SIMPLE_MUTEXES */
804
805 /*
806 * mutex_owned:
807 *
808 * Return true if the current LWP (adaptive) or CPU (spin)
809 * holds the mutex.
810 */
811 int
812 mutex_owned(kmutex_t *mtx)
813 {
814
815 if (MUTEX_ADAPTIVE_P(mtx))
816 return MUTEX_OWNER(mtx->mtx_owner) == (uintptr_t)curlwp;
817 #ifdef FULL
818 return __SIMPLELOCK_LOCKED_P(&mtx->mtx_lock);
819 #else
820 return 1;
821 #endif
822 }
823
824 /*
825 * mutex_owner:
826 *
827 * Return the current owner of an adaptive mutex. Used for
828 * priority inheritance.
829 */
830 lwp_t *
831 mutex_owner(kmutex_t *mtx)
832 {
833
834 MUTEX_ASSERT(mtx, MUTEX_ADAPTIVE_P(mtx));
835 return (struct lwp *)MUTEX_OWNER(mtx->mtx_owner);
836 }
837
838 /*
839 * mutex_tryenter:
840 *
841 * Try to acquire the mutex; return non-zero if we did.
842 */
843 int
844 mutex_tryenter(kmutex_t *mtx)
845 {
846 uintptr_t curthread;
847
848 /*
849 * Handle spin mutexes.
850 */
851 if (MUTEX_SPIN_P(mtx)) {
852 MUTEX_SPIN_SPLRAISE(mtx);
853 #ifdef FULL
854 if (__cpu_simple_lock_try(&mtx->mtx_lock)) {
855 MUTEX_WANTLOCK(mtx);
856 MUTEX_LOCKED(mtx);
857 return 1;
858 }
859 MUTEX_SPIN_SPLRESTORE(mtx);
860 #else
861 MUTEX_WANTLOCK(mtx);
862 MUTEX_LOCKED(mtx);
863 return 1;
864 #endif
865 } else {
866 curthread = (uintptr_t)curlwp;
867 MUTEX_ASSERT(mtx, curthread != 0);
868 if (MUTEX_ACQUIRE(mtx, curthread)) {
869 MUTEX_WANTLOCK(mtx);
870 MUTEX_LOCKED(mtx);
871 MUTEX_DASSERT(mtx,
872 MUTEX_OWNER(mtx->mtx_owner) == curthread);
873 return 1;
874 }
875 }
876
877 return 0;
878 }
879
880 #if defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL)
881 /*
882 * mutex_spin_retry:
883 *
884 * Support routine for mutex_spin_enter(). Assumes that the caller
885 * has already raised the SPL, and adjusted counters.
886 */
887 void
888 mutex_spin_retry(kmutex_t *mtx)
889 {
890 #ifdef MULTIPROCESSOR
891 u_int count;
892 LOCKSTAT_TIMER(spintime);
893 LOCKSTAT_FLAG(lsflag);
894 #ifdef LOCKDEBUG
895 u_int spins = 0;
896 #endif /* LOCKDEBUG */
897
898 MUTEX_WANTLOCK(mtx);
899
900 LOCKSTAT_ENTER(lsflag);
901 LOCKSTAT_START_TIMER(lsflag, spintime);
902 count = SPINLOCK_BACKOFF_MIN;
903
904 /*
905 * Spin testing the lock word and do exponential backoff
906 * to reduce cache line ping-ponging between CPUs.
907 */
908 do {
909 if (panicstr != NULL)
910 break;
911 while (__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock)) {
912 SPINLOCK_BACKOFF(count);
913 #ifdef LOCKDEBUG
914 if (SPINLOCK_SPINOUT(spins))
915 MUTEX_ABORT(mtx, "spinout");
916 #endif /* LOCKDEBUG */
917 }
918 } while (!__cpu_simple_lock_try(&mtx->mtx_lock));
919
920 LOCKSTAT_STOP_TIMER(lsflag, spintime);
921 LOCKSTAT_EVENT(lsflag, mtx, LB_SPIN_MUTEX | LB_SPIN, 1, spintime);
922 LOCKSTAT_EXIT(lsflag);
923
924 MUTEX_LOCKED(mtx);
925 #else /* MULTIPROCESSOR */
926 MUTEX_ABORT(mtx, "locking against myself");
927 #endif /* MULTIPROCESSOR */
928 }
929 #endif /* defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL) */
930
931 /*
932 * mutex_obj_init:
933 *
934 * Initialize the mutex object store.
935 */
936 void
937 mutex_obj_init(void)
938 {
939
940 mutex_obj_cache = pool_cache_init(sizeof(struct kmutexobj),
941 coherency_unit, 0, 0, "mutex", NULL, IPL_NONE, mutex_obj_ctor,
942 NULL, NULL);
943 }
944
945 /*
946 * mutex_obj_ctor:
947 *
948 * Initialize a new lock for the cache.
949 */
950 static int
951 mutex_obj_ctor(void *arg, void *obj, int flags)
952 {
953 struct kmutexobj * mo = obj;
954
955 mo->mo_magic = MUTEX_OBJ_MAGIC;
956
957 return 0;
958 }
959
960 /*
961 * mutex_obj_alloc:
962 *
963 * Allocate a single lock object.
964 */
965 kmutex_t *
966 mutex_obj_alloc(kmutex_type_t type, int ipl)
967 {
968 struct kmutexobj *mo;
969
970 mo = pool_cache_get(mutex_obj_cache, PR_WAITOK);
971 mutex_init(&mo->mo_lock, type, ipl);
972 mo->mo_refcnt = 1;
973
974 return (kmutex_t *)mo;
975 }
976
977 /*
978 * mutex_obj_hold:
979 *
980 * Add a single reference to a lock object. A reference to the object
981 * must already be held, and must be held across this call.
982 */
983 void
984 mutex_obj_hold(kmutex_t *lock)
985 {
986 struct kmutexobj *mo = (struct kmutexobj *)lock;
987
988 KASSERT(mo->mo_magic == MUTEX_OBJ_MAGIC);
989 KASSERT(mo->mo_refcnt > 0);
990
991 atomic_inc_uint(&mo->mo_refcnt);
992 }
993
994 /*
995 * mutex_obj_free:
996 *
997 * Drop a reference from a lock object. If the last reference is being
998 * dropped, free the object and return true. Otherwise, return false.
999 */
1000 bool
1001 mutex_obj_free(kmutex_t *lock)
1002 {
1003 struct kmutexobj *mo = (struct kmutexobj *)lock;
1004
1005 KASSERT(mo->mo_magic == MUTEX_OBJ_MAGIC);
1006 KASSERT(mo->mo_refcnt > 0);
1007
1008 if (atomic_dec_uint_nv(&mo->mo_refcnt) > 0) {
1009 return false;
1010 }
1011 mutex_destroy(&mo->mo_lock);
1012 pool_cache_put(mutex_obj_cache, mo);
1013 return true;
1014 }
1015