kern_rwlock.c revision 1.65 1 /* $NetBSD: kern_rwlock.c,v 1.65 2020/02/22 21:24:45 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020
5 * The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe and Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Kernel reader/writer lock implementation, modeled after those
35 * found in Solaris, a description of which can be found in:
36 *
37 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
38 * Richard McDougall.
39 *
40 * The NetBSD implementation differs from that described in the book, in
41 * that the locks are partially adaptive. Lock waiters spin wait while a
42 * lock is write held and the holder is still running on a CPU. The method
43 * of choosing which threads to awaken when a lock is released also differs,
44 * mainly to take account of the partially adaptive behaviour.
45 */
46
47 #include <sys/cdefs.h>
48 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.65 2020/02/22 21:24:45 ad Exp $");
49
50 #include "opt_lockdebug.h"
51
52 #define __RWLOCK_PRIVATE
53
54 #include <sys/param.h>
55 #include <sys/proc.h>
56 #include <sys/rwlock.h>
57 #include <sys/sched.h>
58 #include <sys/sleepq.h>
59 #include <sys/systm.h>
60 #include <sys/lockdebug.h>
61 #include <sys/cpu.h>
62 #include <sys/atomic.h>
63 #include <sys/lock.h>
64 #include <sys/pserialize.h>
65
66 #include <dev/lockstat.h>
67
68 #include <machine/rwlock.h>
69
70 /*
71 * LOCKDEBUG
72 */
73
74 #define RW_DEBUG_P(rw) (((rw)->rw_owner & RW_NODEBUG) == 0)
75
76 #define RW_WANTLOCK(rw, op) \
77 LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \
78 (uintptr_t)__builtin_return_address(0), op == RW_READER);
79 #define RW_LOCKED(rw, op) \
80 LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \
81 (uintptr_t)__builtin_return_address(0), op == RW_READER);
82 #define RW_UNLOCKED(rw, op) \
83 LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \
84 (uintptr_t)__builtin_return_address(0), op == RW_READER);
85
86 /*
87 * DIAGNOSTIC
88 */
89
90 #if defined(DIAGNOSTIC)
91 #define RW_ASSERT(rw, cond) \
92 do { \
93 if (__predict_false(!(cond))) \
94 rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\
95 } while (/* CONSTCOND */ 0)
96 #else
97 #define RW_ASSERT(rw, cond) /* nothing */
98 #endif /* DIAGNOSTIC */
99
100 /*
101 * Memory barriers.
102 */
103 #ifdef __HAVE_ATOMIC_AS_MEMBAR
104 #define RW_MEMBAR_ENTER()
105 #define RW_MEMBAR_EXIT()
106 #define RW_MEMBAR_PRODUCER()
107 #else
108 #define RW_MEMBAR_ENTER() membar_enter()
109 #define RW_MEMBAR_EXIT() membar_exit()
110 #define RW_MEMBAR_PRODUCER() membar_producer()
111 #endif
112
113 /*
114 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
115 */
116 #ifdef LOCKDEBUG
117 #undef __HAVE_RW_STUBS
118 #endif
119
120 #ifndef __HAVE_RW_STUBS
121 __strong_alias(rw_enter,rw_vector_enter);
122 __strong_alias(rw_exit,rw_vector_exit);
123 __strong_alias(rw_tryenter,rw_vector_tryenter);
124 #endif
125
126 static void rw_abort(const char *, size_t, krwlock_t *, const char *);
127 static void rw_dump(const volatile void *, lockop_printer_t);
128 static lwp_t *rw_owner(wchan_t);
129
130 lockops_t rwlock_lockops = {
131 .lo_name = "Reader / writer lock",
132 .lo_type = LOCKOPS_SLEEP,
133 .lo_dump = rw_dump,
134 };
135
136 syncobj_t rw_syncobj = {
137 .sobj_flag = SOBJ_SLEEPQ_SORTED,
138 .sobj_unsleep = turnstile_unsleep,
139 .sobj_changepri = turnstile_changepri,
140 .sobj_lendpri = sleepq_lendpri,
141 .sobj_owner = rw_owner,
142 };
143
144 /*
145 * rw_cas:
146 *
147 * Do an atomic compare-and-swap on the lock word.
148 */
149 static inline uintptr_t
150 rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n)
151 {
152
153 return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner,
154 (void *)o, (void *)n);
155 }
156
157 /*
158 * rw_swap:
159 *
160 * Do an atomic swap of the lock word. This is used only when it's
161 * known that the lock word is set up such that it can't be changed
162 * behind us (assert this), so there's no point considering the result.
163 */
164 static inline void
165 rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n)
166 {
167
168 n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner,
169 (void *)n);
170
171 RW_ASSERT(rw, n == o);
172 RW_ASSERT(rw, (o & RW_HAS_WAITERS) != 0);
173 }
174
175 /*
176 * rw_dump:
177 *
178 * Dump the contents of a rwlock structure.
179 */
180 static void
181 rw_dump(const volatile void *cookie, lockop_printer_t pr)
182 {
183 const volatile krwlock_t *rw = cookie;
184
185 pr("owner/count : %#018lx flags : %#018x\n",
186 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
187 }
188
189 /*
190 * rw_abort:
191 *
192 * Dump information about an error and panic the system. This
193 * generates a lot of machine code in the DIAGNOSTIC case, so
194 * we ask the compiler to not inline it.
195 */
196 static void __noinline
197 rw_abort(const char *func, size_t line, krwlock_t *rw, const char *msg)
198 {
199
200 if (panicstr != NULL)
201 return;
202
203 LOCKDEBUG_ABORT(func, line, rw, &rwlock_lockops, msg);
204 }
205
206 /*
207 * rw_init:
208 *
209 * Initialize a rwlock for use.
210 */
211 void
212 _rw_init(krwlock_t *rw, uintptr_t return_address)
213 {
214
215 #ifdef LOCKDEBUG
216 /* XXX only because the assembly stubs can't handle RW_NODEBUG */
217 if (LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address))
218 rw->rw_owner = 0;
219 else
220 rw->rw_owner = RW_NODEBUG;
221 #else
222 rw->rw_owner = 0;
223 #endif
224 }
225
226 void
227 rw_init(krwlock_t *rw)
228 {
229
230 _rw_init(rw, (uintptr_t)__builtin_return_address(0));
231 }
232
233 /*
234 * rw_destroy:
235 *
236 * Tear down a rwlock.
237 */
238 void
239 rw_destroy(krwlock_t *rw)
240 {
241
242 RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0);
243 LOCKDEBUG_FREE((rw->rw_owner & RW_NODEBUG) == 0, rw);
244 }
245
246 /*
247 * rw_oncpu:
248 *
249 * Return true if an rwlock owner is running on a CPU in the system.
250 * If the target is waiting on the kernel big lock, then we must
251 * release it. This is necessary to avoid deadlock.
252 */
253 static bool
254 rw_oncpu(uintptr_t owner)
255 {
256 #ifdef MULTIPROCESSOR
257 struct cpu_info *ci;
258 lwp_t *l;
259
260 KASSERT(kpreempt_disabled());
261
262 if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) {
263 return false;
264 }
265
266 /*
267 * See lwp_dtor() why dereference of the LWP pointer is safe.
268 * We must have kernel preemption disabled for that.
269 */
270 l = (lwp_t *)(owner & RW_THREAD);
271 ci = l->l_cpu;
272
273 if (ci && ci->ci_curlwp == l) {
274 /* Target is running; do we need to block? */
275 return (ci->ci_biglock_wanted != l);
276 }
277 #endif
278 /* Not running. It may be safe to block now. */
279 return false;
280 }
281
282 /*
283 * rw_vector_enter:
284 *
285 * Acquire a rwlock.
286 */
287 void
288 rw_vector_enter(krwlock_t *rw, const krw_t op)
289 {
290 uintptr_t owner, incr, need_wait, set_wait, curthread, next;
291 turnstile_t *ts;
292 int queue;
293 lwp_t *l;
294 LOCKSTAT_TIMER(slptime);
295 LOCKSTAT_TIMER(slpcnt);
296 LOCKSTAT_TIMER(spintime);
297 LOCKSTAT_COUNTER(spincnt);
298 LOCKSTAT_FLAG(lsflag);
299
300 l = curlwp;
301 curthread = (uintptr_t)l;
302
303 RW_ASSERT(rw, !cpu_intr_p());
304 RW_ASSERT(rw, curthread != 0);
305 RW_WANTLOCK(rw, op);
306
307 if (panicstr == NULL) {
308 KDASSERT(pserialize_not_in_read_section());
309 LOCKDEBUG_BARRIER(&kernel_lock, 1);
310 }
311
312 /*
313 * We play a slight trick here. If we're a reader, we want
314 * increment the read count. If we're a writer, we want to
315 * set the owner field and the WRITE_LOCKED bit.
316 *
317 * In the latter case, we expect those bits to be zero,
318 * therefore we can use an add operation to set them, which
319 * means an add operation for both cases.
320 */
321 if (__predict_true(op == RW_READER)) {
322 incr = RW_READ_INCR;
323 set_wait = RW_HAS_WAITERS;
324 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
325 queue = TS_READER_Q;
326 } else {
327 RW_ASSERT(rw, op == RW_WRITER);
328 incr = curthread | RW_WRITE_LOCKED;
329 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
330 need_wait = RW_WRITE_LOCKED | RW_THREAD;
331 queue = TS_WRITER_Q;
332 }
333
334 LOCKSTAT_ENTER(lsflag);
335
336 KPREEMPT_DISABLE(curlwp);
337 for (owner = rw->rw_owner;;) {
338 /*
339 * Read the lock owner field. If the need-to-wait
340 * indicator is clear, then try to acquire the lock.
341 */
342 if ((owner & need_wait) == 0) {
343 next = rw_cas(rw, owner, (owner + incr) &
344 ~RW_WRITE_WANTED);
345 if (__predict_true(next == owner)) {
346 /* Got it! */
347 RW_MEMBAR_ENTER();
348 break;
349 }
350
351 /*
352 * Didn't get it -- spin around again (we'll
353 * probably sleep on the next iteration).
354 */
355 owner = next;
356 continue;
357 }
358 if (__predict_false(RW_OWNER(rw) == curthread)) {
359 rw_abort(__func__, __LINE__, rw,
360 "locking against myself");
361 }
362 /*
363 * If the lock owner is running on another CPU, and
364 * there are no existing waiters, then spin.
365 */
366 if (rw_oncpu(owner)) {
367 LOCKSTAT_START_TIMER(lsflag, spintime);
368 u_int count = SPINLOCK_BACKOFF_MIN;
369 do {
370 KPREEMPT_ENABLE(curlwp);
371 SPINLOCK_BACKOFF(count);
372 KPREEMPT_DISABLE(curlwp);
373 owner = rw->rw_owner;
374 } while (rw_oncpu(owner));
375 LOCKSTAT_STOP_TIMER(lsflag, spintime);
376 LOCKSTAT_COUNT(spincnt, 1);
377 if ((owner & need_wait) == 0)
378 continue;
379 }
380
381 /*
382 * Grab the turnstile chain lock. Once we have that, we
383 * can adjust the waiter bits and sleep queue.
384 */
385 ts = turnstile_lookup(rw);
386
387 /*
388 * Mark the rwlock as having waiters. If the set fails,
389 * then we may not need to sleep and should spin again.
390 * Reload rw_owner because turnstile_lookup() may have
391 * spun on the turnstile chain lock.
392 */
393 owner = rw->rw_owner;
394 if ((owner & need_wait) == 0 || rw_oncpu(owner)) {
395 turnstile_exit(rw);
396 continue;
397 }
398 next = rw_cas(rw, owner, owner | set_wait);
399 if (__predict_false(next != owner)) {
400 turnstile_exit(rw);
401 owner = next;
402 continue;
403 }
404
405 LOCKSTAT_START_TIMER(lsflag, slptime);
406 turnstile_block(ts, queue, rw, &rw_syncobj);
407 LOCKSTAT_STOP_TIMER(lsflag, slptime);
408 LOCKSTAT_COUNT(slpcnt, 1);
409
410 /*
411 * No need for a memory barrier because of context switch.
412 * If not handed the lock, then spin again.
413 */
414 if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread)
415 break;
416
417 owner = rw->rw_owner;
418 }
419 KPREEMPT_ENABLE(curlwp);
420
421 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK |
422 (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime,
423 (l->l_rwcallsite != 0 ? l->l_rwcallsite :
424 (uintptr_t)__builtin_return_address(0)));
425 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime,
426 (l->l_rwcallsite != 0 ? l->l_rwcallsite :
427 (uintptr_t)__builtin_return_address(0)));
428 LOCKSTAT_EXIT(lsflag);
429
430 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
431 (op == RW_READER && RW_COUNT(rw) != 0));
432 RW_LOCKED(rw, op);
433 }
434
435 /*
436 * rw_vector_exit:
437 *
438 * Release a rwlock.
439 */
440 void
441 rw_vector_exit(krwlock_t *rw)
442 {
443 uintptr_t curthread, owner, decr, newown, next;
444 turnstile_t *ts;
445 int rcnt, wcnt;
446 lwp_t *l;
447
448 l = curlwp;
449 curthread = (uintptr_t)l;
450 RW_ASSERT(rw, curthread != 0);
451
452 /*
453 * Again, we use a trick. Since we used an add operation to
454 * set the required lock bits, we can use a subtract to clear
455 * them, which makes the read-release and write-release path
456 * the same.
457 */
458 owner = rw->rw_owner;
459 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
460 RW_UNLOCKED(rw, RW_WRITER);
461 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
462 decr = curthread | RW_WRITE_LOCKED;
463 } else {
464 RW_UNLOCKED(rw, RW_READER);
465 RW_ASSERT(rw, RW_COUNT(rw) != 0);
466 decr = RW_READ_INCR;
467 }
468
469 /*
470 * Compute what we expect the new value of the lock to be. Only
471 * proceed to do direct handoff if there are waiters, and if the
472 * lock would become unowned.
473 */
474 RW_MEMBAR_EXIT();
475 for (;;) {
476 newown = (owner - decr);
477 if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
478 break;
479 next = rw_cas(rw, owner, newown);
480 if (__predict_true(next == owner))
481 return;
482 owner = next;
483 }
484
485 /*
486 * Grab the turnstile chain lock. This gets the interlock
487 * on the sleep queue. Once we have that, we can adjust the
488 * waiter bits.
489 */
490 ts = turnstile_lookup(rw);
491 owner = rw->rw_owner;
492 RW_ASSERT(rw, ts != NULL);
493 RW_ASSERT(rw, (owner & RW_HAS_WAITERS) != 0);
494
495 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
496 rcnt = TS_WAITERS(ts, TS_READER_Q);
497
498 /*
499 * Give the lock away.
500 *
501 * If we are releasing a write lock, then prefer to wake all
502 * outstanding readers. Otherwise, wake one writer if there
503 * are outstanding readers, or all writers if there are no
504 * pending readers. If waking one specific writer, the writer
505 * is handed the lock here. If waking multiple writers, we
506 * set WRITE_WANTED to block out new readers, and let them
507 * do the work of acquiring the lock in rw_vector_enter().
508 */
509 if (rcnt == 0 || decr == RW_READ_INCR) {
510 RW_ASSERT(rw, wcnt != 0);
511 RW_ASSERT(rw, (owner & RW_WRITE_WANTED) != 0);
512
513 if (rcnt != 0) {
514 /* Give the lock to the longest waiting writer. */
515 l = TS_FIRST(ts, TS_WRITER_Q);
516 newown = (uintptr_t)l | (owner & RW_NODEBUG);
517 newown |= RW_WRITE_LOCKED | RW_HAS_WAITERS;
518 if (wcnt > 1)
519 newown |= RW_WRITE_WANTED;
520 rw_swap(rw, owner, newown);
521 turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
522 } else {
523 /* Wake all writers and let them fight it out. */
524 newown = owner & RW_NODEBUG;
525 newown |= RW_WRITE_WANTED;
526 rw_swap(rw, owner, newown);
527 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL);
528 }
529 } else {
530 RW_ASSERT(rw, rcnt != 0);
531
532 /*
533 * Give the lock to all blocked readers. If there
534 * is a writer waiting, new readers that arrive
535 * after the release will be blocked out.
536 */
537 newown = owner & RW_NODEBUG;
538 newown += rcnt << RW_READ_COUNT_SHIFT;
539 if (wcnt != 0)
540 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
541
542 /* Wake up all sleeping readers. */
543 rw_swap(rw, owner, newown);
544 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
545 }
546 }
547
548 /*
549 * rw_vector_tryenter:
550 *
551 * Try to acquire a rwlock.
552 */
553 int
554 rw_vector_tryenter(krwlock_t *rw, const krw_t op)
555 {
556 uintptr_t curthread, owner, incr, need_wait, next;
557 lwp_t *l;
558
559 l = curlwp;
560 curthread = (uintptr_t)l;
561
562 RW_ASSERT(rw, curthread != 0);
563
564 if (op == RW_READER) {
565 incr = RW_READ_INCR;
566 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
567 } else {
568 RW_ASSERT(rw, op == RW_WRITER);
569 incr = curthread | RW_WRITE_LOCKED;
570 need_wait = RW_WRITE_LOCKED | RW_THREAD;
571 }
572
573 for (owner = rw->rw_owner;; owner = next) {
574 if (__predict_false((owner & need_wait) != 0))
575 return 0;
576 next = rw_cas(rw, owner, owner + incr);
577 if (__predict_true(next == owner)) {
578 /* Got it! */
579 break;
580 }
581 }
582
583 RW_WANTLOCK(rw, op);
584 RW_LOCKED(rw, op);
585 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
586 (op == RW_READER && RW_COUNT(rw) != 0));
587
588 RW_MEMBAR_ENTER();
589 return 1;
590 }
591
592 /*
593 * rw_downgrade:
594 *
595 * Downgrade a write lock to a read lock.
596 */
597 void
598 rw_downgrade(krwlock_t *rw)
599 {
600 uintptr_t owner, curthread, newown, next;
601 turnstile_t *ts;
602 int rcnt, wcnt;
603 lwp_t *l;
604
605 l = curlwp;
606 curthread = (uintptr_t)l;
607 RW_ASSERT(rw, curthread != 0);
608 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
609 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
610 RW_UNLOCKED(rw, RW_WRITER);
611 #if !defined(DIAGNOSTIC)
612 __USE(curthread);
613 #endif
614
615 RW_MEMBAR_PRODUCER();
616
617 for (owner = rw->rw_owner;; owner = next) {
618 /*
619 * If there are no waiters we can do this the easy way. Try
620 * swapping us down to one read hold. If it fails, the lock
621 * condition has changed and we most likely now have
622 * waiters.
623 */
624 if ((owner & RW_HAS_WAITERS) == 0) {
625 newown = (owner & RW_NODEBUG);
626 next = rw_cas(rw, owner, newown + RW_READ_INCR);
627 if (__predict_true(next == owner)) {
628 RW_LOCKED(rw, RW_READER);
629 RW_ASSERT(rw,
630 (rw->rw_owner & RW_WRITE_LOCKED) == 0);
631 RW_ASSERT(rw, RW_COUNT(rw) != 0);
632 return;
633 }
634 continue;
635 }
636
637 /*
638 * Grab the turnstile chain lock. This gets the interlock
639 * on the sleep queue. Once we have that, we can adjust the
640 * waiter bits.
641 */
642 ts = turnstile_lookup(rw);
643 RW_ASSERT(rw, ts != NULL);
644
645 rcnt = TS_WAITERS(ts, TS_READER_Q);
646 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
647
648 if (rcnt == 0) {
649 /*
650 * If there are no readers, just preserve the
651 * waiters bits, swap us down to one read hold and
652 * return.
653 */
654 RW_ASSERT(rw, wcnt != 0);
655 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
656 RW_ASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
657
658 newown = owner & RW_NODEBUG;
659 newown |= RW_READ_INCR | RW_HAS_WAITERS |
660 RW_WRITE_WANTED;
661 next = rw_cas(rw, owner, newown);
662 turnstile_exit(rw);
663 if (__predict_true(next == owner))
664 break;
665 } else {
666 /*
667 * Give the lock to all blocked readers. We may
668 * retain one read hold if downgrading. If there is
669 * a writer waiting, new readers will be blocked
670 * out.
671 */
672 newown = owner & RW_NODEBUG;
673 newown += (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
674 if (wcnt != 0)
675 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
676
677 next = rw_cas(rw, owner, newown);
678 if (__predict_true(next == owner)) {
679 /* Wake up all sleeping readers. */
680 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
681 break;
682 }
683 turnstile_exit(rw);
684 }
685 }
686
687 RW_WANTLOCK(rw, RW_READER);
688 RW_LOCKED(rw, RW_READER);
689 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
690 RW_ASSERT(rw, RW_COUNT(rw) != 0);
691 }
692
693 /*
694 * rw_tryupgrade:
695 *
696 * Try to upgrade a read lock to a write lock. We must be the only
697 * reader.
698 */
699 int
700 rw_tryupgrade(krwlock_t *rw)
701 {
702 uintptr_t owner, curthread, newown, next;
703 struct lwp *l;
704
705 l = curlwp;
706 curthread = (uintptr_t)l;
707 RW_ASSERT(rw, curthread != 0);
708 RW_ASSERT(rw, rw_read_held(rw));
709
710 for (owner = RW_READ_INCR;; owner = next) {
711 newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
712 next = rw_cas(rw, owner, newown);
713 if (__predict_true(next == owner)) {
714 RW_MEMBAR_PRODUCER();
715 break;
716 }
717 RW_ASSERT(rw, (next & RW_WRITE_LOCKED) == 0);
718 if (__predict_false((next & RW_THREAD) != RW_READ_INCR)) {
719 RW_ASSERT(rw, (next & RW_THREAD) != 0);
720 return 0;
721 }
722 }
723
724 RW_UNLOCKED(rw, RW_READER);
725 RW_WANTLOCK(rw, RW_WRITER);
726 RW_LOCKED(rw, RW_WRITER);
727 RW_ASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
728 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
729
730 return 1;
731 }
732
733 /*
734 * rw_read_held:
735 *
736 * Returns true if the rwlock is held for reading. Must only be
737 * used for diagnostic assertions, and never be used to make
738 * decisions about how to use a rwlock.
739 */
740 int
741 rw_read_held(krwlock_t *rw)
742 {
743 uintptr_t owner;
744
745 if (rw == NULL)
746 return 0;
747 owner = rw->rw_owner;
748 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
749 }
750
751 /*
752 * rw_write_held:
753 *
754 * Returns true if the rwlock is held for writing. Must only be
755 * used for diagnostic assertions, and never be used to make
756 * decisions about how to use a rwlock.
757 */
758 int
759 rw_write_held(krwlock_t *rw)
760 {
761
762 if (rw == NULL)
763 return 0;
764 return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) ==
765 (RW_WRITE_LOCKED | (uintptr_t)curlwp);
766 }
767
768 /*
769 * rw_lock_held:
770 *
771 * Returns true if the rwlock is held for reading or writing. Must
772 * only be used for diagnostic assertions, and never be used to make
773 * decisions about how to use a rwlock.
774 */
775 int
776 rw_lock_held(krwlock_t *rw)
777 {
778
779 if (rw == NULL)
780 return 0;
781 return (rw->rw_owner & RW_THREAD) != 0;
782 }
783
784 /*
785 * rw_lock_op:
786 *
787 * For a rwlock that is known to be held by the caller, return
788 * RW_READER or RW_WRITER to describe the hold type.
789 */
790 krw_t
791 rw_lock_op(krwlock_t *rw)
792 {
793
794 RW_ASSERT(rw, rw_lock_held(rw));
795
796 return (rw->rw_owner & RW_WRITE_LOCKED) != 0 ? RW_WRITER : RW_READER;
797 }
798
799 /*
800 * rw_owner:
801 *
802 * Return the current owner of an RW lock, but only if it is write
803 * held. Used for priority inheritance.
804 */
805 static lwp_t *
806 rw_owner(wchan_t obj)
807 {
808 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
809 uintptr_t owner = rw->rw_owner;
810
811 if ((owner & RW_WRITE_LOCKED) == 0)
812 return NULL;
813
814 return (void *)(owner & RW_THREAD);
815 }
816
817 /*
818 * rw_owner_running:
819 *
820 * Return true if a RW lock is unheld, or write held and the owner is
821 * running on a CPU. For the pagedaemon.
822 */
823 bool
824 rw_owner_running(const krwlock_t *rw)
825 {
826 #ifdef MULTIPROCESSOR
827 uintptr_t owner;
828 bool rv;
829
830 kpreempt_disable();
831 owner = rw->rw_owner;
832 rv = (owner & RW_THREAD) == 0 || rw_oncpu(owner);
833 kpreempt_enable();
834 return rv;
835 #else
836 return rw_owner(rw) == curlwp;
837 #endif
838 }
839