kern_rwlock.c revision 1.66.4.2 1 /* $NetBSD: kern_rwlock.c,v 1.66.4.2 2023/07/31 14:45:59 martin Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020
5 * The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe and Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Kernel reader/writer lock implementation, modeled after those
35 * found in Solaris, a description of which can be found in:
36 *
37 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
38 * Richard McDougall.
39 *
40 * The NetBSD implementation differs from that described in the book, in
41 * that the locks are partially adaptive. Lock waiters spin wait while a
42 * lock is write held and the holder is still running on a CPU. The method
43 * of choosing which threads to awaken when a lock is released also differs,
44 * mainly to take account of the partially adaptive behaviour.
45 */
46
47 #include <sys/cdefs.h>
48 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.66.4.2 2023/07/31 14:45:59 martin Exp $");
49
50 #include "opt_lockdebug.h"
51
52 #define __RWLOCK_PRIVATE
53
54 #include <sys/param.h>
55 #include <sys/proc.h>
56 #include <sys/rwlock.h>
57 #include <sys/sched.h>
58 #include <sys/sleepq.h>
59 #include <sys/systm.h>
60 #include <sys/lockdebug.h>
61 #include <sys/cpu.h>
62 #include <sys/atomic.h>
63 #include <sys/lock.h>
64 #include <sys/pserialize.h>
65
66 #include <dev/lockstat.h>
67
68 #include <machine/rwlock.h>
69
70 /*
71 * LOCKDEBUG
72 */
73
74 #define RW_DEBUG_P(rw) (((rw)->rw_owner & RW_NODEBUG) == 0)
75
76 #define RW_WANTLOCK(rw, op) \
77 LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \
78 (uintptr_t)__builtin_return_address(0), op == RW_READER);
79 #define RW_LOCKED(rw, op) \
80 LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \
81 (uintptr_t)__builtin_return_address(0), op == RW_READER);
82 #define RW_UNLOCKED(rw, op) \
83 LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \
84 (uintptr_t)__builtin_return_address(0), op == RW_READER);
85
86 /*
87 * DIAGNOSTIC
88 */
89
90 #if defined(DIAGNOSTIC)
91 #define RW_ASSERT(rw, cond) \
92 do { \
93 if (__predict_false(!(cond))) \
94 rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\
95 } while (/* CONSTCOND */ 0)
96 #else
97 #define RW_ASSERT(rw, cond) /* nothing */
98 #endif /* DIAGNOSTIC */
99
100 /*
101 * Memory barriers.
102 */
103 #ifdef __HAVE_ATOMIC_AS_MEMBAR
104 #define RW_MEMBAR_ACQUIRE()
105 #define RW_MEMBAR_RELEASE()
106 #else
107 #define RW_MEMBAR_ACQUIRE() membar_acquire()
108 #define RW_MEMBAR_RELEASE() membar_release()
109 #endif
110
111 /*
112 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
113 */
114 #ifdef LOCKDEBUG
115 #undef __HAVE_RW_STUBS
116 #endif
117
118 #ifndef __HAVE_RW_STUBS
119 __strong_alias(rw_enter,rw_vector_enter);
120 __strong_alias(rw_exit,rw_vector_exit);
121 __strong_alias(rw_tryenter,rw_vector_tryenter);
122 #endif
123
124 static void rw_abort(const char *, size_t, krwlock_t *, const char *);
125 static void rw_dump(const volatile void *, lockop_printer_t);
126 static lwp_t *rw_owner(wchan_t);
127
128 lockops_t rwlock_lockops = {
129 .lo_name = "Reader / writer lock",
130 .lo_type = LOCKOPS_SLEEP,
131 .lo_dump = rw_dump,
132 };
133
134 syncobj_t rw_syncobj = {
135 .sobj_flag = SOBJ_SLEEPQ_SORTED,
136 .sobj_unsleep = turnstile_unsleep,
137 .sobj_changepri = turnstile_changepri,
138 .sobj_lendpri = sleepq_lendpri,
139 .sobj_owner = rw_owner,
140 };
141
142 /*
143 * rw_cas:
144 *
145 * Do an atomic compare-and-swap on the lock word.
146 */
147 static inline uintptr_t
148 rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n)
149 {
150
151 return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner,
152 (void *)o, (void *)n);
153 }
154
155 /*
156 * rw_swap:
157 *
158 * Do an atomic swap of the lock word. This is used only when it's
159 * known that the lock word is set up such that it can't be changed
160 * behind us (assert this), so there's no point considering the result.
161 */
162 static inline void
163 rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n)
164 {
165
166 n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner,
167 (void *)n);
168
169 RW_ASSERT(rw, n == o);
170 RW_ASSERT(rw, (o & RW_HAS_WAITERS) != 0);
171 }
172
173 /*
174 * rw_dump:
175 *
176 * Dump the contents of a rwlock structure.
177 */
178 static void
179 rw_dump(const volatile void *cookie, lockop_printer_t pr)
180 {
181 const volatile krwlock_t *rw = cookie;
182
183 pr("owner/count : %#018lx flags : %#018x\n",
184 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
185 }
186
187 /*
188 * rw_abort:
189 *
190 * Dump information about an error and panic the system. This
191 * generates a lot of machine code in the DIAGNOSTIC case, so
192 * we ask the compiler to not inline it.
193 */
194 static void __noinline
195 rw_abort(const char *func, size_t line, krwlock_t *rw, const char *msg)
196 {
197
198 if (__predict_false(panicstr != NULL))
199 return;
200
201 LOCKDEBUG_ABORT(func, line, rw, &rwlock_lockops, msg);
202 }
203
204 /*
205 * rw_init:
206 *
207 * Initialize a rwlock for use.
208 */
209 void
210 _rw_init(krwlock_t *rw, uintptr_t return_address)
211 {
212
213 #ifdef LOCKDEBUG
214 /* XXX only because the assembly stubs can't handle RW_NODEBUG */
215 if (LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address))
216 rw->rw_owner = 0;
217 else
218 rw->rw_owner = RW_NODEBUG;
219 #else
220 rw->rw_owner = 0;
221 #endif
222 }
223
224 void
225 rw_init(krwlock_t *rw)
226 {
227
228 _rw_init(rw, (uintptr_t)__builtin_return_address(0));
229 }
230
231 /*
232 * rw_destroy:
233 *
234 * Tear down a rwlock.
235 */
236 void
237 rw_destroy(krwlock_t *rw)
238 {
239
240 RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0);
241 LOCKDEBUG_FREE((rw->rw_owner & RW_NODEBUG) == 0, rw);
242 }
243
244 /*
245 * rw_oncpu:
246 *
247 * Return true if an rwlock owner is running on a CPU in the system.
248 * If the target is waiting on the kernel big lock, then we must
249 * release it. This is necessary to avoid deadlock.
250 */
251 static bool
252 rw_oncpu(uintptr_t owner)
253 {
254 #ifdef MULTIPROCESSOR
255 struct cpu_info *ci;
256 lwp_t *l;
257
258 KASSERT(kpreempt_disabled());
259
260 if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) {
261 return false;
262 }
263
264 /*
265 * See lwp_dtor() why dereference of the LWP pointer is safe.
266 * We must have kernel preemption disabled for that.
267 */
268 l = (lwp_t *)(owner & RW_THREAD);
269 ci = l->l_cpu;
270
271 if (ci && ci->ci_curlwp == l) {
272 /* Target is running; do we need to block? */
273 return (ci->ci_biglock_wanted != l);
274 }
275 #endif
276 /* Not running. It may be safe to block now. */
277 return false;
278 }
279
280 /*
281 * rw_vector_enter:
282 *
283 * Acquire a rwlock.
284 */
285 void
286 rw_vector_enter(krwlock_t *rw, const krw_t op)
287 {
288 uintptr_t owner, incr, need_wait, set_wait, curthread, next;
289 turnstile_t *ts;
290 int queue;
291 lwp_t *l;
292 LOCKSTAT_TIMER(slptime);
293 LOCKSTAT_TIMER(slpcnt);
294 LOCKSTAT_TIMER(spintime);
295 LOCKSTAT_COUNTER(spincnt);
296 LOCKSTAT_FLAG(lsflag);
297
298 l = curlwp;
299 curthread = (uintptr_t)l;
300
301 RW_ASSERT(rw, !cpu_intr_p());
302 RW_ASSERT(rw, curthread != 0);
303 RW_WANTLOCK(rw, op);
304
305 if (__predict_true(panicstr == NULL)) {
306 KDASSERT(pserialize_not_in_read_section());
307 LOCKDEBUG_BARRIER(&kernel_lock, 1);
308 }
309
310 /*
311 * We play a slight trick here. If we're a reader, we want
312 * increment the read count. If we're a writer, we want to
313 * set the owner field and the WRITE_LOCKED bit.
314 *
315 * In the latter case, we expect those bits to be zero,
316 * therefore we can use an add operation to set them, which
317 * means an add operation for both cases.
318 */
319 if (__predict_true(op == RW_READER)) {
320 incr = RW_READ_INCR;
321 set_wait = RW_HAS_WAITERS;
322 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
323 queue = TS_READER_Q;
324 } else {
325 RW_ASSERT(rw, op == RW_WRITER);
326 incr = curthread | RW_WRITE_LOCKED;
327 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
328 need_wait = RW_WRITE_LOCKED | RW_THREAD;
329 queue = TS_WRITER_Q;
330 }
331
332 LOCKSTAT_ENTER(lsflag);
333
334 KPREEMPT_DISABLE(curlwp);
335 for (owner = rw->rw_owner;;) {
336 /*
337 * Read the lock owner field. If the need-to-wait
338 * indicator is clear, then try to acquire the lock.
339 */
340 if ((owner & need_wait) == 0) {
341 next = rw_cas(rw, owner, (owner + incr) &
342 ~RW_WRITE_WANTED);
343 if (__predict_true(next == owner)) {
344 /* Got it! */
345 RW_MEMBAR_ACQUIRE();
346 break;
347 }
348
349 /*
350 * Didn't get it -- spin around again (we'll
351 * probably sleep on the next iteration).
352 */
353 owner = next;
354 continue;
355 }
356 if (__predict_false(RW_OWNER(rw) == curthread)) {
357 rw_abort(__func__, __LINE__, rw,
358 "locking against myself");
359 }
360 /*
361 * If the lock owner is running on another CPU, and
362 * there are no existing waiters, then spin.
363 */
364 if (rw_oncpu(owner)) {
365 LOCKSTAT_START_TIMER(lsflag, spintime);
366 u_int count = SPINLOCK_BACKOFF_MIN;
367 do {
368 KPREEMPT_ENABLE(curlwp);
369 SPINLOCK_BACKOFF(count);
370 KPREEMPT_DISABLE(curlwp);
371 owner = rw->rw_owner;
372 } while (rw_oncpu(owner));
373 LOCKSTAT_STOP_TIMER(lsflag, spintime);
374 LOCKSTAT_COUNT(spincnt, 1);
375 if ((owner & need_wait) == 0)
376 continue;
377 }
378
379 /*
380 * Grab the turnstile chain lock. Once we have that, we
381 * can adjust the waiter bits and sleep queue.
382 */
383 ts = turnstile_lookup(rw);
384
385 /*
386 * Mark the rwlock as having waiters. If the set fails,
387 * then we may not need to sleep and should spin again.
388 * Reload rw_owner because turnstile_lookup() may have
389 * spun on the turnstile chain lock.
390 */
391 owner = rw->rw_owner;
392 if ((owner & need_wait) == 0 || rw_oncpu(owner)) {
393 turnstile_exit(rw);
394 continue;
395 }
396 next = rw_cas(rw, owner, owner | set_wait);
397 /* XXX membar? */
398 if (__predict_false(next != owner)) {
399 turnstile_exit(rw);
400 owner = next;
401 continue;
402 }
403
404 LOCKSTAT_START_TIMER(lsflag, slptime);
405 turnstile_block(ts, queue, rw, &rw_syncobj);
406 LOCKSTAT_STOP_TIMER(lsflag, slptime);
407 LOCKSTAT_COUNT(slpcnt, 1);
408
409 /*
410 * No need for a memory barrier because of context switch.
411 * If not handed the lock, then spin again.
412 */
413 if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread)
414 break;
415
416 owner = rw->rw_owner;
417 }
418 KPREEMPT_ENABLE(curlwp);
419
420 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK |
421 (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime,
422 (l->l_rwcallsite != 0 ? l->l_rwcallsite :
423 (uintptr_t)__builtin_return_address(0)));
424 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime,
425 (l->l_rwcallsite != 0 ? l->l_rwcallsite :
426 (uintptr_t)__builtin_return_address(0)));
427 LOCKSTAT_EXIT(lsflag);
428
429 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
430 (op == RW_READER && RW_COUNT(rw) != 0));
431 RW_LOCKED(rw, op);
432 }
433
434 /*
435 * rw_vector_exit:
436 *
437 * Release a rwlock.
438 */
439 void
440 rw_vector_exit(krwlock_t *rw)
441 {
442 uintptr_t curthread, owner, decr, newown, next;
443 turnstile_t *ts;
444 int rcnt, wcnt;
445 lwp_t *l;
446
447 l = curlwp;
448 curthread = (uintptr_t)l;
449 RW_ASSERT(rw, curthread != 0);
450
451 /*
452 * Again, we use a trick. Since we used an add operation to
453 * set the required lock bits, we can use a subtract to clear
454 * them, which makes the read-release and write-release path
455 * the same.
456 */
457 owner = rw->rw_owner;
458 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
459 RW_UNLOCKED(rw, RW_WRITER);
460 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
461 decr = curthread | RW_WRITE_LOCKED;
462 } else {
463 RW_UNLOCKED(rw, RW_READER);
464 RW_ASSERT(rw, RW_COUNT(rw) != 0);
465 decr = RW_READ_INCR;
466 }
467
468 /*
469 * Compute what we expect the new value of the lock to be. Only
470 * proceed to do direct handoff if there are waiters, and if the
471 * lock would become unowned.
472 */
473 RW_MEMBAR_RELEASE();
474 for (;;) {
475 newown = (owner - decr);
476 if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
477 break;
478 next = rw_cas(rw, owner, newown);
479 if (__predict_true(next == owner))
480 return;
481 owner = next;
482 }
483
484 /*
485 * Grab the turnstile chain lock. This gets the interlock
486 * on the sleep queue. Once we have that, we can adjust the
487 * waiter bits.
488 */
489 ts = turnstile_lookup(rw);
490 owner = rw->rw_owner;
491 RW_ASSERT(rw, ts != NULL);
492 RW_ASSERT(rw, (owner & RW_HAS_WAITERS) != 0);
493
494 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
495 rcnt = TS_WAITERS(ts, TS_READER_Q);
496
497 /*
498 * Give the lock away.
499 *
500 * If we are releasing a write lock, then prefer to wake all
501 * outstanding readers. Otherwise, wake one writer if there
502 * are outstanding readers, or all writers if there are no
503 * pending readers. If waking one specific writer, the writer
504 * is handed the lock here. If waking multiple writers, we
505 * set WRITE_WANTED to block out new readers, and let them
506 * do the work of acquiring the lock in rw_vector_enter().
507 */
508 if (rcnt == 0 || decr == RW_READ_INCR) {
509 RW_ASSERT(rw, wcnt != 0);
510 RW_ASSERT(rw, (owner & RW_WRITE_WANTED) != 0);
511
512 if (rcnt != 0) {
513 /* Give the lock to the longest waiting writer. */
514 l = TS_FIRST(ts, TS_WRITER_Q);
515 newown = (uintptr_t)l | (owner & RW_NODEBUG);
516 newown |= RW_WRITE_LOCKED | RW_HAS_WAITERS;
517 if (wcnt > 1)
518 newown |= RW_WRITE_WANTED;
519 rw_swap(rw, owner, newown);
520 turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
521 } else {
522 /* Wake all writers and let them fight it out. */
523 newown = owner & RW_NODEBUG;
524 newown |= RW_WRITE_WANTED;
525 rw_swap(rw, owner, newown);
526 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL);
527 }
528 } else {
529 RW_ASSERT(rw, rcnt != 0);
530
531 /*
532 * Give the lock to all blocked readers. If there
533 * is a writer waiting, new readers that arrive
534 * after the release will be blocked out.
535 */
536 newown = owner & RW_NODEBUG;
537 newown += rcnt << RW_READ_COUNT_SHIFT;
538 if (wcnt != 0)
539 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
540
541 /* Wake up all sleeping readers. */
542 rw_swap(rw, owner, newown);
543 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
544 }
545 }
546
547 /*
548 * rw_vector_tryenter:
549 *
550 * Try to acquire a rwlock.
551 */
552 int
553 rw_vector_tryenter(krwlock_t *rw, const krw_t op)
554 {
555 uintptr_t curthread, owner, incr, need_wait, next;
556 lwp_t *l;
557
558 l = curlwp;
559 curthread = (uintptr_t)l;
560
561 RW_ASSERT(rw, curthread != 0);
562
563 if (op == RW_READER) {
564 incr = RW_READ_INCR;
565 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
566 } else {
567 RW_ASSERT(rw, op == RW_WRITER);
568 incr = curthread | RW_WRITE_LOCKED;
569 need_wait = RW_WRITE_LOCKED | RW_THREAD;
570 }
571
572 for (owner = rw->rw_owner;; owner = next) {
573 if (__predict_false((owner & need_wait) != 0))
574 return 0;
575 next = rw_cas(rw, owner, owner + incr);
576 if (__predict_true(next == owner)) {
577 /* Got it! */
578 break;
579 }
580 }
581
582 RW_WANTLOCK(rw, op);
583 RW_LOCKED(rw, op);
584 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
585 (op == RW_READER && RW_COUNT(rw) != 0));
586
587 RW_MEMBAR_ACQUIRE();
588 return 1;
589 }
590
591 /*
592 * rw_downgrade:
593 *
594 * Downgrade a write lock to a read lock.
595 */
596 void
597 rw_downgrade(krwlock_t *rw)
598 {
599 uintptr_t owner, curthread, newown, next;
600 turnstile_t *ts;
601 int rcnt, wcnt;
602 lwp_t *l;
603
604 l = curlwp;
605 curthread = (uintptr_t)l;
606 RW_ASSERT(rw, curthread != 0);
607 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
608 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
609 RW_UNLOCKED(rw, RW_WRITER);
610 #if !defined(DIAGNOSTIC)
611 __USE(curthread);
612 #endif
613
614 RW_MEMBAR_RELEASE();
615
616 for (owner = rw->rw_owner;; owner = next) {
617 /*
618 * If there are no waiters we can do this the easy way. Try
619 * swapping us down to one read hold. If it fails, the lock
620 * condition has changed and we most likely now have
621 * waiters.
622 */
623 if ((owner & RW_HAS_WAITERS) == 0) {
624 newown = (owner & RW_NODEBUG);
625 next = rw_cas(rw, owner, newown + RW_READ_INCR);
626 if (__predict_true(next == owner)) {
627 RW_LOCKED(rw, RW_READER);
628 RW_ASSERT(rw,
629 (rw->rw_owner & RW_WRITE_LOCKED) == 0);
630 RW_ASSERT(rw, RW_COUNT(rw) != 0);
631 return;
632 }
633 continue;
634 }
635
636 /*
637 * Grab the turnstile chain lock. This gets the interlock
638 * on the sleep queue. Once we have that, we can adjust the
639 * waiter bits.
640 */
641 ts = turnstile_lookup(rw);
642 RW_ASSERT(rw, ts != NULL);
643
644 rcnt = TS_WAITERS(ts, TS_READER_Q);
645 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
646
647 if (rcnt == 0) {
648 /*
649 * If there are no readers, just preserve the
650 * waiters bits, swap us down to one read hold and
651 * return.
652 */
653 RW_ASSERT(rw, wcnt != 0);
654 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
655 RW_ASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
656
657 newown = owner & RW_NODEBUG;
658 newown |= RW_READ_INCR | RW_HAS_WAITERS |
659 RW_WRITE_WANTED;
660 next = rw_cas(rw, owner, newown);
661 turnstile_exit(rw);
662 if (__predict_true(next == owner))
663 break;
664 } else {
665 /*
666 * Give the lock to all blocked readers. We may
667 * retain one read hold if downgrading. If there is
668 * a writer waiting, new readers will be blocked
669 * out.
670 */
671 newown = owner & RW_NODEBUG;
672 newown += (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
673 if (wcnt != 0)
674 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
675
676 next = rw_cas(rw, owner, newown);
677 if (__predict_true(next == owner)) {
678 /* Wake up all sleeping readers. */
679 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
680 break;
681 }
682 turnstile_exit(rw);
683 }
684 }
685
686 RW_WANTLOCK(rw, RW_READER);
687 RW_LOCKED(rw, RW_READER);
688 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
689 RW_ASSERT(rw, RW_COUNT(rw) != 0);
690 }
691
692 /*
693 * rw_tryupgrade:
694 *
695 * Try to upgrade a read lock to a write lock. We must be the only
696 * reader.
697 */
698 int
699 rw_tryupgrade(krwlock_t *rw)
700 {
701 uintptr_t owner, curthread, newown, next;
702 struct lwp *l;
703
704 l = curlwp;
705 curthread = (uintptr_t)l;
706 RW_ASSERT(rw, curthread != 0);
707 RW_ASSERT(rw, rw_read_held(rw));
708
709 for (owner = RW_READ_INCR;; owner = next) {
710 newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
711 next = rw_cas(rw, owner, newown);
712 if (__predict_true(next == owner)) {
713 RW_MEMBAR_ACQUIRE();
714 break;
715 }
716 RW_ASSERT(rw, (next & RW_WRITE_LOCKED) == 0);
717 if (__predict_false((next & RW_THREAD) != RW_READ_INCR)) {
718 RW_ASSERT(rw, (next & RW_THREAD) != 0);
719 return 0;
720 }
721 }
722
723 RW_UNLOCKED(rw, RW_READER);
724 RW_WANTLOCK(rw, RW_WRITER);
725 RW_LOCKED(rw, RW_WRITER);
726 RW_ASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
727 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
728
729 return 1;
730 }
731
732 /*
733 * rw_read_held:
734 *
735 * Returns true if the rwlock is held for reading. Must only be
736 * used for diagnostic assertions, and never be used to make
737 * decisions about how to use a rwlock.
738 */
739 int
740 rw_read_held(krwlock_t *rw)
741 {
742 uintptr_t owner;
743
744 if (rw == NULL)
745 return 0;
746 owner = rw->rw_owner;
747 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
748 }
749
750 /*
751 * rw_write_held:
752 *
753 * Returns true if the rwlock is held for writing. Must only be
754 * used for diagnostic assertions, and never be used to make
755 * decisions about how to use a rwlock.
756 */
757 int
758 rw_write_held(krwlock_t *rw)
759 {
760
761 if (rw == NULL)
762 return 0;
763 return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) ==
764 (RW_WRITE_LOCKED | (uintptr_t)curlwp);
765 }
766
767 /*
768 * rw_lock_held:
769 *
770 * Returns true if the rwlock is held for reading or writing. Must
771 * only be used for diagnostic assertions, and never be used to make
772 * decisions about how to use a rwlock.
773 */
774 int
775 rw_lock_held(krwlock_t *rw)
776 {
777
778 if (rw == NULL)
779 return 0;
780 return (rw->rw_owner & RW_THREAD) != 0;
781 }
782
783 /*
784 * rw_lock_op:
785 *
786 * For a rwlock that is known to be held by the caller, return
787 * RW_READER or RW_WRITER to describe the hold type.
788 */
789 krw_t
790 rw_lock_op(krwlock_t *rw)
791 {
792
793 RW_ASSERT(rw, rw_lock_held(rw));
794
795 return (rw->rw_owner & RW_WRITE_LOCKED) != 0 ? RW_WRITER : RW_READER;
796 }
797
798 /*
799 * rw_owner:
800 *
801 * Return the current owner of an RW lock, but only if it is write
802 * held. Used for priority inheritance.
803 */
804 static lwp_t *
805 rw_owner(wchan_t obj)
806 {
807 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
808 uintptr_t owner = rw->rw_owner;
809
810 if ((owner & RW_WRITE_LOCKED) == 0)
811 return NULL;
812
813 return (void *)(owner & RW_THREAD);
814 }
815
816 /*
817 * rw_owner_running:
818 *
819 * Return true if a RW lock is unheld, or write held and the owner is
820 * running on a CPU. For the pagedaemon.
821 */
822 bool
823 rw_owner_running(const krwlock_t *rw)
824 {
825 #ifdef MULTIPROCESSOR
826 uintptr_t owner;
827 bool rv;
828
829 kpreempt_disable();
830 owner = rw->rw_owner;
831 rv = (owner & RW_THREAD) == 0 || rw_oncpu(owner);
832 kpreempt_enable();
833 return rv;
834 #else
835 return rw_owner(rw) == curlwp;
836 #endif
837 }
838