kern_mutex.c revision 1.36 1 /* $NetBSD: kern_mutex.c,v 1.36 2008/04/27 14:29:09 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Kernel mutex implementation, modeled after those found in Solaris,
41 * a description of which can be found in:
42 *
43 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
44 * Richard McDougall.
45 */
46
47 #define __MUTEX_PRIVATE
48
49 #include <sys/cdefs.h>
50 __KERNEL_RCSID(0, "$NetBSD: kern_mutex.c,v 1.36 2008/04/27 14:29:09 ad Exp $");
51
52 #include "opt_multiprocessor.h"
53
54 #include <sys/param.h>
55 #include <sys/proc.h>
56 #include <sys/mutex.h>
57 #include <sys/sched.h>
58 #include <sys/sleepq.h>
59 #include <sys/systm.h>
60 #include <sys/lockdebug.h>
61 #include <sys/kernel.h>
62 #include <sys/atomic.h>
63 #include <sys/intr.h>
64 #include <sys/lock.h>
65 #include <sys/pool.h>
66
67 #include <dev/lockstat.h>
68
69 #include <machine/lock.h>
70
71 /*
72 * When not running a debug kernel, spin mutexes are not much
73 * more than an splraiseipl() and splx() pair.
74 */
75
76 #if defined(DIAGNOSTIC) || defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
77 #define FULL
78 #endif
79
80 /*
81 * Debugging support.
82 */
83
84 #define MUTEX_WANTLOCK(mtx) \
85 LOCKDEBUG_WANTLOCK(MUTEX_DEBUG_P(mtx), (mtx), \
86 (uintptr_t)__builtin_return_address(0), 0)
87 #define MUTEX_LOCKED(mtx) \
88 LOCKDEBUG_LOCKED(MUTEX_DEBUG_P(mtx), (mtx), \
89 (uintptr_t)__builtin_return_address(0), 0)
90 #define MUTEX_UNLOCKED(mtx) \
91 LOCKDEBUG_UNLOCKED(MUTEX_DEBUG_P(mtx), (mtx), \
92 (uintptr_t)__builtin_return_address(0), 0)
93 #define MUTEX_ABORT(mtx, msg) \
94 mutex_abort(mtx, __func__, msg)
95
96 #if defined(LOCKDEBUG)
97
98 #define MUTEX_DASSERT(mtx, cond) \
99 do { \
100 if (!(cond)) \
101 MUTEX_ABORT(mtx, "assertion failed: " #cond); \
102 } while (/* CONSTCOND */ 0);
103
104 #else /* LOCKDEBUG */
105
106 #define MUTEX_DASSERT(mtx, cond) /* nothing */
107
108 #endif /* LOCKDEBUG */
109
110 #if defined(DIAGNOSTIC)
111
112 #define MUTEX_ASSERT(mtx, cond) \
113 do { \
114 if (!(cond)) \
115 MUTEX_ABORT(mtx, "assertion failed: " #cond); \
116 } while (/* CONSTCOND */ 0)
117
118 #else /* DIAGNOSTIC */
119
120 #define MUTEX_ASSERT(mtx, cond) /* nothing */
121
122 #endif /* DIAGNOSTIC */
123
124 /*
125 * Spin mutex SPL save / restore.
126 */
127 #ifndef MUTEX_COUNT_BIAS
128 #define MUTEX_COUNT_BIAS 0
129 #endif
130
131 #define MUTEX_SPIN_SPLRAISE(mtx) \
132 do { \
133 struct cpu_info *x__ci; \
134 int x__cnt, s; \
135 s = splraiseipl(mtx->mtx_ipl); \
136 x__ci = curcpu(); \
137 x__cnt = x__ci->ci_mtx_count--; \
138 if (x__cnt == MUTEX_COUNT_BIAS) \
139 x__ci->ci_mtx_oldspl = (s); \
140 } while (/* CONSTCOND */ 0)
141
142 #define MUTEX_SPIN_SPLRESTORE(mtx) \
143 do { \
144 struct cpu_info *x__ci = curcpu(); \
145 int s = x__ci->ci_mtx_oldspl; \
146 __insn_barrier(); \
147 if (++(x__ci->ci_mtx_count) == MUTEX_COUNT_BIAS) \
148 splx(s); \
149 } while (/* CONSTCOND */ 0)
150
151 /*
152 * For architectures that provide 'simple' mutexes: they provide a
153 * CAS function that is either MP-safe, or does not need to be MP
154 * safe. Adaptive mutexes on these architectures do not require an
155 * additional interlock.
156 */
157
158 #ifdef __HAVE_SIMPLE_MUTEXES
159
160 #define MUTEX_OWNER(owner) \
161 (owner & MUTEX_THREAD)
162 #define MUTEX_HAS_WAITERS(mtx) \
163 (((int)(mtx)->mtx_owner & MUTEX_BIT_WAITERS) != 0)
164
165 #define MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug) \
166 do { \
167 if (dodebug) \
168 (mtx)->mtx_owner |= MUTEX_BIT_DEBUG; \
169 } while (/* CONSTCOND */ 0);
170
171 #define MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl) \
172 do { \
173 (mtx)->mtx_owner = MUTEX_BIT_SPIN; \
174 if (dodebug) \
175 (mtx)->mtx_owner |= MUTEX_BIT_DEBUG; \
176 (mtx)->mtx_ipl = makeiplcookie((ipl)); \
177 __cpu_simple_lock_init(&(mtx)->mtx_lock); \
178 } while (/* CONSTCOND */ 0)
179
180 #define MUTEX_DESTROY(mtx) \
181 do { \
182 (mtx)->mtx_owner = MUTEX_THREAD; \
183 } while (/* CONSTCOND */ 0);
184
185 #define MUTEX_SPIN_P(mtx) \
186 (((mtx)->mtx_owner & MUTEX_BIT_SPIN) != 0)
187 #define MUTEX_ADAPTIVE_P(mtx) \
188 (((mtx)->mtx_owner & MUTEX_BIT_SPIN) == 0)
189
190 #define MUTEX_DEBUG_P(mtx) (((mtx)->mtx_owner & MUTEX_BIT_DEBUG) != 0)
191 #if defined(LOCKDEBUG)
192 #define MUTEX_OWNED(owner) (((owner) & ~MUTEX_BIT_DEBUG) != 0)
193 #define MUTEX_INHERITDEBUG(new, old) (new) |= (old) & MUTEX_BIT_DEBUG
194 #else /* defined(LOCKDEBUG) */
195 #define MUTEX_OWNED(owner) ((owner) != 0)
196 #define MUTEX_INHERITDEBUG(new, old) /* nothing */
197 #endif /* defined(LOCKDEBUG) */
198
199 static inline int
200 MUTEX_ACQUIRE(kmutex_t *mtx, uintptr_t curthread)
201 {
202 int rv;
203 uintptr_t old = 0;
204 uintptr_t new = curthread;
205
206 MUTEX_INHERITDEBUG(old, mtx->mtx_owner);
207 MUTEX_INHERITDEBUG(new, old);
208 rv = MUTEX_CAS(&mtx->mtx_owner, old, new);
209 MUTEX_RECEIVE(mtx);
210 return rv;
211 }
212
213 static inline int
214 MUTEX_SET_WAITERS(kmutex_t *mtx, uintptr_t owner)
215 {
216 int rv;
217 rv = MUTEX_CAS(&mtx->mtx_owner, owner, owner | MUTEX_BIT_WAITERS);
218 MUTEX_RECEIVE(mtx);
219 return rv;
220 }
221
222 static inline void
223 MUTEX_RELEASE(kmutex_t *mtx)
224 {
225 uintptr_t new;
226
227 MUTEX_GIVE(mtx);
228 new = 0;
229 MUTEX_INHERITDEBUG(new, mtx->mtx_owner);
230 mtx->mtx_owner = new;
231 }
232
233 static inline void
234 MUTEX_CLEAR_WAITERS(kmutex_t *mtx)
235 {
236 /* nothing */
237 }
238 #endif /* __HAVE_SIMPLE_MUTEXES */
239
240 /*
241 * Patch in stubs via strong alias where they are not available.
242 */
243
244 #if defined(LOCKDEBUG)
245 #undef __HAVE_MUTEX_STUBS
246 #undef __HAVE_SPIN_MUTEX_STUBS
247 #endif
248
249 #ifndef __HAVE_MUTEX_STUBS
250 __strong_alias(mutex_enter,mutex_vector_enter);
251 __strong_alias(mutex_exit,mutex_vector_exit);
252 #endif
253
254 #ifndef __HAVE_SPIN_MUTEX_STUBS
255 __strong_alias(mutex_spin_enter,mutex_vector_enter);
256 __strong_alias(mutex_spin_exit,mutex_vector_exit);
257 #endif
258
259 void mutex_abort(kmutex_t *, const char *, const char *);
260 void mutex_dump(volatile void *);
261 int mutex_onproc(uintptr_t, struct cpu_info **);
262
263 lockops_t mutex_spin_lockops = {
264 "Mutex",
265 0,
266 mutex_dump
267 };
268
269 lockops_t mutex_adaptive_lockops = {
270 "Mutex",
271 1,
272 mutex_dump
273 };
274
275 syncobj_t mutex_syncobj = {
276 SOBJ_SLEEPQ_SORTED,
277 turnstile_unsleep,
278 turnstile_changepri,
279 sleepq_lendpri,
280 (void *)mutex_owner,
281 };
282
283 /* Mutex cache */
284 #define MUTEX_OBJ_MAGIC 0x5aa3c85d
285 struct kmutexobj {
286 kmutex_t mo_lock;
287 u_int mo_magic;
288 u_int mo_refcnt;
289 };
290
291 static int mutex_obj_ctor(void *, void *, int);
292
293 static pool_cache_t mutex_obj_cache;
294
295 /*
296 * mutex_dump:
297 *
298 * Dump the contents of a mutex structure.
299 */
300 void
301 mutex_dump(volatile void *cookie)
302 {
303 volatile kmutex_t *mtx = cookie;
304
305 printf_nolog("owner field : %#018lx wait/spin: %16d/%d\n",
306 (long)MUTEX_OWNER(mtx->mtx_owner), MUTEX_HAS_WAITERS(mtx),
307 MUTEX_SPIN_P(mtx));
308 }
309
310 /*
311 * mutex_abort:
312 *
313 * Dump information about an error and panic the system. This
314 * generates a lot of machine code in the DIAGNOSTIC case, so
315 * we ask the compiler to not inline it.
316 */
317
318 #if __GNUC_PREREQ__(3, 0)
319 __attribute ((noinline)) __attribute ((noreturn))
320 #endif
321 void
322 mutex_abort(kmutex_t *mtx, const char *func, const char *msg)
323 {
324
325 LOCKDEBUG_ABORT(mtx, (MUTEX_SPIN_P(mtx) ?
326 &mutex_spin_lockops : &mutex_adaptive_lockops), func, msg);
327 /* NOTREACHED */
328 }
329
330 /*
331 * mutex_init:
332 *
333 * Initialize a mutex for use. Note that adaptive mutexes are in
334 * essence spin mutexes that can sleep to avoid deadlock and wasting
335 * CPU time. We can't easily provide a type of mutex that always
336 * sleeps - see comments in mutex_vector_enter() about releasing
337 * mutexes unlocked.
338 */
339 void
340 mutex_init(kmutex_t *mtx, kmutex_type_t type, int ipl)
341 {
342 bool dodebug;
343
344 memset(mtx, 0, sizeof(*mtx));
345
346 switch (type) {
347 case MUTEX_ADAPTIVE:
348 KASSERT(ipl == IPL_NONE);
349 break;
350 case MUTEX_DEFAULT:
351 case MUTEX_DRIVER:
352 if (ipl == IPL_NONE || ipl == IPL_SOFTCLOCK ||
353 ipl == IPL_SOFTBIO || ipl == IPL_SOFTNET ||
354 ipl == IPL_SOFTSERIAL) {
355 type = MUTEX_ADAPTIVE;
356 } else {
357 type = MUTEX_SPIN;
358 }
359 break;
360 default:
361 break;
362 }
363
364 switch (type) {
365 case MUTEX_NODEBUG:
366 dodebug = LOCKDEBUG_ALLOC(mtx, NULL,
367 (uintptr_t)__builtin_return_address(0));
368 MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl);
369 break;
370 case MUTEX_ADAPTIVE:
371 dodebug = LOCKDEBUG_ALLOC(mtx, &mutex_adaptive_lockops,
372 (uintptr_t)__builtin_return_address(0));
373 MUTEX_INITIALIZE_ADAPTIVE(mtx, dodebug);
374 break;
375 case MUTEX_SPIN:
376 dodebug = LOCKDEBUG_ALLOC(mtx, &mutex_spin_lockops,
377 (uintptr_t)__builtin_return_address(0));
378 MUTEX_INITIALIZE_SPIN(mtx, dodebug, ipl);
379 break;
380 default:
381 panic("mutex_init: impossible type");
382 break;
383 }
384 }
385
386 /*
387 * mutex_destroy:
388 *
389 * Tear down a mutex.
390 */
391 void
392 mutex_destroy(kmutex_t *mtx)
393 {
394
395 if (MUTEX_ADAPTIVE_P(mtx)) {
396 MUTEX_ASSERT(mtx, !MUTEX_OWNED(mtx->mtx_owner) &&
397 !MUTEX_HAS_WAITERS(mtx));
398 } else {
399 MUTEX_ASSERT(mtx, !__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock));
400 }
401
402 LOCKDEBUG_FREE(MUTEX_DEBUG_P(mtx), mtx);
403 MUTEX_DESTROY(mtx);
404 }
405
406 /*
407 * mutex_onproc:
408 *
409 * Return true if an adaptive mutex owner is running on a CPU in the
410 * system. If the target is waiting on the kernel big lock, then we
411 * must release it. This is necessary to avoid deadlock.
412 *
413 * Note that we can't use the mutex owner field as an LWP pointer. We
414 * don't have full control over the timing of our execution, and so the
415 * pointer could be completely invalid by the time we dereference it.
416 */
417 #ifdef MULTIPROCESSOR
418 int
419 mutex_onproc(uintptr_t owner, struct cpu_info **cip)
420 {
421 CPU_INFO_ITERATOR cii;
422 struct cpu_info *ci;
423 struct lwp *l;
424
425 if (!MUTEX_OWNED(owner))
426 return 0;
427 l = (struct lwp *)MUTEX_OWNER(owner);
428
429 /* See if the target is running on a CPU somewhere. */
430 if ((ci = *cip) != NULL && ci->ci_curlwp == l)
431 goto run;
432 for (CPU_INFO_FOREACH(cii, ci))
433 if (ci->ci_curlwp == l)
434 goto run;
435
436 /* No: it may be safe to block now. */
437 *cip = NULL;
438 return 0;
439
440 run:
441 /* Target is running; do we need to block? */
442 *cip = ci;
443 return ci->ci_biglock_wanted != l;
444 }
445 #endif /* MULTIPROCESSOR */
446
447 /*
448 * mutex_vector_enter:
449 *
450 * Support routine for mutex_enter() that must handles all cases. In
451 * the LOCKDEBUG case, mutex_enter() is always aliased here, even if
452 * fast-path stubs are available. If an mutex_spin_enter() stub is
453 * not available, then it is also aliased directly here.
454 */
455 void
456 mutex_vector_enter(kmutex_t *mtx)
457 {
458 uintptr_t owner, curthread;
459 turnstile_t *ts;
460 #ifdef MULTIPROCESSOR
461 struct cpu_info *ci = NULL;
462 u_int count;
463 #endif
464 LOCKSTAT_COUNTER(spincnt);
465 LOCKSTAT_COUNTER(slpcnt);
466 LOCKSTAT_TIMER(spintime);
467 LOCKSTAT_TIMER(slptime);
468 LOCKSTAT_FLAG(lsflag);
469
470 /*
471 * Handle spin mutexes.
472 */
473 if (MUTEX_SPIN_P(mtx)) {
474 #if defined(LOCKDEBUG) && defined(MULTIPROCESSOR)
475 u_int spins = 0;
476 #endif
477 MUTEX_SPIN_SPLRAISE(mtx);
478 MUTEX_WANTLOCK(mtx);
479 #ifdef FULL
480 if (__cpu_simple_lock_try(&mtx->mtx_lock)) {
481 MUTEX_LOCKED(mtx);
482 return;
483 }
484 #if !defined(MULTIPROCESSOR)
485 MUTEX_ABORT(mtx, "locking against myself");
486 #else /* !MULTIPROCESSOR */
487
488 LOCKSTAT_ENTER(lsflag);
489 LOCKSTAT_START_TIMER(lsflag, spintime);
490 count = SPINLOCK_BACKOFF_MIN;
491
492 /*
493 * Spin testing the lock word and do exponential backoff
494 * to reduce cache line ping-ponging between CPUs.
495 */
496 do {
497 if (panicstr != NULL)
498 break;
499 while (__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock)) {
500 SPINLOCK_BACKOFF(count);
501 #ifdef LOCKDEBUG
502 if (SPINLOCK_SPINOUT(spins))
503 MUTEX_ABORT(mtx, "spinout");
504 #endif /* LOCKDEBUG */
505 }
506 } while (!__cpu_simple_lock_try(&mtx->mtx_lock));
507
508 if (count != SPINLOCK_BACKOFF_MIN) {
509 LOCKSTAT_STOP_TIMER(lsflag, spintime);
510 LOCKSTAT_EVENT(lsflag, mtx,
511 LB_SPIN_MUTEX | LB_SPIN, 1, spintime);
512 }
513 LOCKSTAT_EXIT(lsflag);
514 #endif /* !MULTIPROCESSOR */
515 #endif /* FULL */
516 MUTEX_LOCKED(mtx);
517 return;
518 }
519
520 curthread = (uintptr_t)curlwp;
521
522 MUTEX_DASSERT(mtx, MUTEX_ADAPTIVE_P(mtx));
523 MUTEX_ASSERT(mtx, curthread != 0);
524 MUTEX_WANTLOCK(mtx);
525
526 if (panicstr == NULL) {
527 LOCKDEBUG_BARRIER(&kernel_lock, 1);
528 }
529
530 LOCKSTAT_ENTER(lsflag);
531
532 /*
533 * Adaptive mutex; spin trying to acquire the mutex. If we
534 * determine that the owner is not running on a processor,
535 * then we stop spinning, and sleep instead.
536 */
537 for (owner = mtx->mtx_owner;;) {
538 if (!MUTEX_OWNED(owner)) {
539 /*
540 * Mutex owner clear could mean two things:
541 *
542 * * The mutex has been released.
543 * * The owner field hasn't been set yet.
544 *
545 * Try to acquire it again. If that fails,
546 * we'll just loop again.
547 */
548 if (MUTEX_ACQUIRE(mtx, curthread))
549 break;
550 owner = mtx->mtx_owner;
551 continue;
552 }
553
554 if (panicstr != NULL)
555 return;
556 if (MUTEX_OWNER(owner) == curthread)
557 MUTEX_ABORT(mtx, "locking against myself");
558
559 #ifdef MULTIPROCESSOR
560 /*
561 * Check to see if the owner is running on a processor.
562 * If so, then we should just spin, as the owner will
563 * likely release the lock very soon.
564 */
565 if (mutex_onproc(owner, &ci)) {
566 LOCKSTAT_START_TIMER(lsflag, spintime);
567 count = SPINLOCK_BACKOFF_MIN;
568 for (;;) {
569 SPINLOCK_BACKOFF(count);
570 owner = mtx->mtx_owner;
571 if (!mutex_onproc(owner, &ci))
572 break;
573 }
574 LOCKSTAT_STOP_TIMER(lsflag, spintime);
575 LOCKSTAT_COUNT(spincnt, 1);
576 if (!MUTEX_OWNED(owner))
577 continue;
578 }
579 #endif
580
581 ts = turnstile_lookup(mtx);
582
583 /*
584 * Once we have the turnstile chain interlock, mark the
585 * mutex has having waiters. If that fails, spin again:
586 * chances are that the mutex has been released.
587 */
588 if (!MUTEX_SET_WAITERS(mtx, owner)) {
589 turnstile_exit(mtx);
590 owner = mtx->mtx_owner;
591 continue;
592 }
593
594 #ifdef MULTIPROCESSOR
595 /*
596 * mutex_exit() is permitted to release the mutex without
597 * any interlocking instructions, and the following can
598 * occur as a result:
599 *
600 * CPU 1: MUTEX_SET_WAITERS() CPU2: mutex_exit()
601 * ---------------------------- ----------------------------
602 * .. acquire cache line
603 * .. test for waiters
604 * acquire cache line <- lose cache line
605 * lock cache line ..
606 * verify mutex is held ..
607 * set waiters ..
608 * unlock cache line ..
609 * lose cache line -> acquire cache line
610 * .. clear lock word, waiters
611 * return success
612 *
613 * There is a another race that can occur: a third CPU could
614 * acquire the mutex as soon as it is released. Since
615 * adaptive mutexes are primarily spin mutexes, this is not
616 * something that we need to worry about too much. What we
617 * do need to ensure is that the waiters bit gets set.
618 *
619 * To allow the unlocked release, we need to make some
620 * assumptions here:
621 *
622 * o Release is the only non-atomic/unlocked operation
623 * that can be performed on the mutex. (It must still
624 * be atomic on the local CPU, e.g. in case interrupted
625 * or preempted).
626 *
627 * o At any given time, MUTEX_SET_WAITERS() can only ever
628 * be in progress on one CPU in the system - guaranteed
629 * by the turnstile chain lock.
630 *
631 * o No other operations other than MUTEX_SET_WAITERS()
632 * and release can modify a mutex with a non-zero
633 * owner field.
634 *
635 * o The result of a successful MUTEX_SET_WAITERS() call
636 * is an unbuffered write that is immediately visible
637 * to all other processors in the system.
638 *
639 * o If the holding LWP switches away, it posts a store
640 * fence before changing curlwp, ensuring that any
641 * overwrite of the mutex waiters flag by mutex_exit()
642 * completes before the modification of curlwp becomes
643 * visible to this CPU.
644 *
645 * o mi_switch() posts a store fence before setting curlwp
646 * and before resuming execution of an LWP.
647 *
648 * o _kernel_lock() posts a store fence before setting
649 * curcpu()->ci_biglock_wanted, and after clearing it.
650 * This ensures that any overwrite of the mutex waiters
651 * flag by mutex_exit() completes before the modification
652 * of ci_biglock_wanted becomes visible.
653 *
654 * We now post a read memory barrier (after setting the
655 * waiters field) and check the lock holder's status again.
656 * Some of the possible outcomes (not an exhaustive list):
657 *
658 * 1. The onproc check returns true: the holding LWP is
659 * running again. The lock may be released soon and
660 * we should spin. Importantly, we can't trust the
661 * value of the waiters flag.
662 *
663 * 2. The onproc check returns false: the holding LWP is
664 * not running. We now have the oppertunity to check
665 * if mutex_exit() has blatted the modifications made
666 * by MUTEX_SET_WAITERS().
667 *
668 * 3. The onproc check returns false: the holding LWP may
669 * or may not be running. It has context switched at
670 * some point during our check. Again, we have the
671 * chance to see if the waiters bit is still set or
672 * has been overwritten.
673 *
674 * 4. The onproc check returns false: the holding LWP is
675 * running on a CPU, but wants the big lock. It's OK
676 * to check the waiters field in this case.
677 *
678 * 5. The has-waiters check fails: the mutex has been
679 * released, the waiters flag cleared and another LWP
680 * now owns the mutex.
681 *
682 * 6. The has-waiters check fails: the mutex has been
683 * released.
684 *
685 * If the waiters bit is not set it's unsafe to go asleep,
686 * as we might never be awoken.
687 */
688 if ((membar_consumer(), mutex_onproc(owner, &ci)) ||
689 (membar_consumer(), !MUTEX_HAS_WAITERS(mtx))) {
690 turnstile_exit(mtx);
691 owner = mtx->mtx_owner;
692 continue;
693 }
694 #endif /* MULTIPROCESSOR */
695
696 LOCKSTAT_START_TIMER(lsflag, slptime);
697
698 turnstile_block(ts, TS_WRITER_Q, mtx, &mutex_syncobj);
699
700 LOCKSTAT_STOP_TIMER(lsflag, slptime);
701 LOCKSTAT_COUNT(slpcnt, 1);
702
703 owner = mtx->mtx_owner;
704 }
705
706 LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SLEEP1,
707 slpcnt, slptime);
708 LOCKSTAT_EVENT(lsflag, mtx, LB_ADAPTIVE_MUTEX | LB_SPIN,
709 spincnt, spintime);
710 LOCKSTAT_EXIT(lsflag);
711
712 MUTEX_DASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread);
713 MUTEX_LOCKED(mtx);
714 }
715
716 /*
717 * mutex_vector_exit:
718 *
719 * Support routine for mutex_exit() that handles all cases.
720 */
721 void
722 mutex_vector_exit(kmutex_t *mtx)
723 {
724 turnstile_t *ts;
725 uintptr_t curthread;
726
727 if (MUTEX_SPIN_P(mtx)) {
728 #ifdef FULL
729 if (__predict_false(!__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock))) {
730 if (panicstr != NULL)
731 return;
732 MUTEX_ABORT(mtx, "exiting unheld spin mutex");
733 }
734 MUTEX_UNLOCKED(mtx);
735 __cpu_simple_unlock(&mtx->mtx_lock);
736 #endif
737 MUTEX_SPIN_SPLRESTORE(mtx);
738 return;
739 }
740
741 if (__predict_false((uintptr_t)panicstr | cold)) {
742 MUTEX_UNLOCKED(mtx);
743 MUTEX_RELEASE(mtx);
744 return;
745 }
746
747 curthread = (uintptr_t)curlwp;
748 MUTEX_DASSERT(mtx, curthread != 0);
749 MUTEX_ASSERT(mtx, MUTEX_OWNER(mtx->mtx_owner) == curthread);
750 MUTEX_UNLOCKED(mtx);
751
752 #ifdef LOCKDEBUG
753 /*
754 * Avoid having to take the turnstile chain lock every time
755 * around. Raise the priority level to splhigh() in order
756 * to disable preemption and so make the following atomic.
757 */
758 {
759 int s = splhigh();
760 if (!MUTEX_HAS_WAITERS(mtx)) {
761 MUTEX_RELEASE(mtx);
762 splx(s);
763 return;
764 }
765 splx(s);
766 }
767 #endif
768
769 /*
770 * Get this lock's turnstile. This gets the interlock on
771 * the sleep queue. Once we have that, we can clear the
772 * lock. If there was no turnstile for the lock, there
773 * were no waiters remaining.
774 */
775 ts = turnstile_lookup(mtx);
776
777 if (ts == NULL) {
778 MUTEX_RELEASE(mtx);
779 turnstile_exit(mtx);
780 } else {
781 MUTEX_RELEASE(mtx);
782 turnstile_wakeup(ts, TS_WRITER_Q,
783 TS_WAITERS(ts, TS_WRITER_Q), NULL);
784 }
785 }
786
787 #ifndef __HAVE_SIMPLE_MUTEXES
788 /*
789 * mutex_wakeup:
790 *
791 * Support routine for mutex_exit() that wakes up all waiters.
792 * We assume that the mutex has been released, but it need not
793 * be.
794 */
795 void
796 mutex_wakeup(kmutex_t *mtx)
797 {
798 turnstile_t *ts;
799
800 ts = turnstile_lookup(mtx);
801 if (ts == NULL) {
802 turnstile_exit(mtx);
803 return;
804 }
805 MUTEX_CLEAR_WAITERS(mtx);
806 turnstile_wakeup(ts, TS_WRITER_Q, TS_WAITERS(ts, TS_WRITER_Q), NULL);
807 }
808 #endif /* !__HAVE_SIMPLE_MUTEXES */
809
810 /*
811 * mutex_owned:
812 *
813 * Return true if the current LWP (adaptive) or CPU (spin)
814 * holds the mutex.
815 */
816 int
817 mutex_owned(kmutex_t *mtx)
818 {
819
820 if (mtx == NULL)
821 return 0;
822 if (MUTEX_ADAPTIVE_P(mtx))
823 return MUTEX_OWNER(mtx->mtx_owner) == (uintptr_t)curlwp;
824 #ifdef FULL
825 return __SIMPLELOCK_LOCKED_P(&mtx->mtx_lock);
826 #else
827 return 1;
828 #endif
829 }
830
831 /*
832 * mutex_owner:
833 *
834 * Return the current owner of an adaptive mutex. Used for
835 * priority inheritance.
836 */
837 lwp_t *
838 mutex_owner(kmutex_t *mtx)
839 {
840
841 MUTEX_ASSERT(mtx, MUTEX_ADAPTIVE_P(mtx));
842 return (struct lwp *)MUTEX_OWNER(mtx->mtx_owner);
843 }
844
845 /*
846 * mutex_tryenter:
847 *
848 * Try to acquire the mutex; return non-zero if we did.
849 */
850 int
851 mutex_tryenter(kmutex_t *mtx)
852 {
853 uintptr_t curthread;
854
855 /*
856 * Handle spin mutexes.
857 */
858 if (MUTEX_SPIN_P(mtx)) {
859 MUTEX_SPIN_SPLRAISE(mtx);
860 #ifdef FULL
861 if (__cpu_simple_lock_try(&mtx->mtx_lock)) {
862 MUTEX_WANTLOCK(mtx);
863 MUTEX_LOCKED(mtx);
864 return 1;
865 }
866 MUTEX_SPIN_SPLRESTORE(mtx);
867 #else
868 MUTEX_WANTLOCK(mtx);
869 MUTEX_LOCKED(mtx);
870 return 1;
871 #endif
872 } else {
873 curthread = (uintptr_t)curlwp;
874 MUTEX_ASSERT(mtx, curthread != 0);
875 if (MUTEX_ACQUIRE(mtx, curthread)) {
876 MUTEX_WANTLOCK(mtx);
877 MUTEX_LOCKED(mtx);
878 MUTEX_DASSERT(mtx,
879 MUTEX_OWNER(mtx->mtx_owner) == curthread);
880 return 1;
881 }
882 }
883
884 return 0;
885 }
886
887 #if defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL)
888 /*
889 * mutex_spin_retry:
890 *
891 * Support routine for mutex_spin_enter(). Assumes that the caller
892 * has already raised the SPL, and adjusted counters.
893 */
894 void
895 mutex_spin_retry(kmutex_t *mtx)
896 {
897 #ifdef MULTIPROCESSOR
898 u_int count;
899 LOCKSTAT_TIMER(spintime);
900 LOCKSTAT_FLAG(lsflag);
901 #ifdef LOCKDEBUG
902 u_int spins = 0;
903 #endif /* LOCKDEBUG */
904
905 MUTEX_WANTLOCK(mtx);
906
907 LOCKSTAT_ENTER(lsflag);
908 LOCKSTAT_START_TIMER(lsflag, spintime);
909 count = SPINLOCK_BACKOFF_MIN;
910
911 /*
912 * Spin testing the lock word and do exponential backoff
913 * to reduce cache line ping-ponging between CPUs.
914 */
915 do {
916 if (panicstr != NULL)
917 break;
918 while (__SIMPLELOCK_LOCKED_P(&mtx->mtx_lock)) {
919 SPINLOCK_BACKOFF(count);
920 #ifdef LOCKDEBUG
921 if (SPINLOCK_SPINOUT(spins))
922 MUTEX_ABORT(mtx, "spinout");
923 #endif /* LOCKDEBUG */
924 }
925 } while (!__cpu_simple_lock_try(&mtx->mtx_lock));
926
927 LOCKSTAT_STOP_TIMER(lsflag, spintime);
928 LOCKSTAT_EVENT(lsflag, mtx, LB_SPIN_MUTEX | LB_SPIN, 1, spintime);
929 LOCKSTAT_EXIT(lsflag);
930
931 MUTEX_LOCKED(mtx);
932 #else /* MULTIPROCESSOR */
933 MUTEX_ABORT(mtx, "locking against myself");
934 #endif /* MULTIPROCESSOR */
935 }
936 #endif /* defined(__HAVE_SPIN_MUTEX_STUBS) || defined(FULL) */
937
938 /*
939 * mutex_obj_init:
940 *
941 * Initialize the mutex object store.
942 */
943 void
944 mutex_obj_init(void)
945 {
946
947 mutex_obj_cache = pool_cache_init(sizeof(struct kmutexobj),
948 coherency_unit, 0, 0, "mutex", NULL, IPL_NONE, mutex_obj_ctor,
949 NULL, NULL);
950 }
951
952 /*
953 * mutex_obj_ctor:
954 *
955 * Initialize a new lock for the cache.
956 */
957 static int
958 mutex_obj_ctor(void *arg, void *obj, int flags)
959 {
960 struct kmutexobj * mo = obj;
961
962 mo->mo_magic = MUTEX_OBJ_MAGIC;
963
964 return 0;
965 }
966
967 /*
968 * mutex_obj_alloc:
969 *
970 * Allocate a single lock object.
971 */
972 kmutex_t *
973 mutex_obj_alloc(kmutex_type_t type, int ipl)
974 {
975 struct kmutexobj *mo;
976
977 mo = pool_cache_get(mutex_obj_cache, PR_WAITOK);
978 mutex_init(&mo->mo_lock, type, ipl);
979 mo->mo_refcnt = 1;
980
981 return (kmutex_t *)mo;
982 }
983
984 /*
985 * mutex_obj_hold:
986 *
987 * Add a single reference to a lock object. A reference to the object
988 * must already be held, and must be held across this call.
989 */
990 void
991 mutex_obj_hold(kmutex_t *lock)
992 {
993 struct kmutexobj *mo = (struct kmutexobj *)lock;
994
995 KASSERT(mo->mo_magic == MUTEX_OBJ_MAGIC);
996 KASSERT(mo->mo_refcnt > 0);
997
998 atomic_inc_uint(&mo->mo_refcnt);
999 }
1000
1001 /*
1002 * mutex_obj_free:
1003 *
1004 * Drop a reference from a lock object. If the last reference is being
1005 * dropped, free the object and return true. Otherwise, return false.
1006 */
1007 bool
1008 mutex_obj_free(kmutex_t *lock)
1009 {
1010 struct kmutexobj *mo = (struct kmutexobj *)lock;
1011
1012 KASSERT(mo->mo_magic == MUTEX_OBJ_MAGIC);
1013 KASSERT(mo->mo_refcnt > 0);
1014
1015 if (atomic_dec_uint_nv(&mo->mo_refcnt) > 0) {
1016 return false;
1017 }
1018 mutex_destroy(&mo->mo_lock);
1019 pool_cache_put(mutex_obj_cache, mo);
1020 return true;
1021 }
1022