kern_rwlock.c revision 1.74 1 /* $NetBSD: kern_rwlock.c,v 1.74 2023/10/04 20:39:35 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020, 2023
5 * The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe and Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Kernel reader/writer lock implementation, modeled after those
35 * found in Solaris, a description of which can be found in:
36 *
37 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
38 * Richard McDougall.
39 *
40 * The NetBSD implementation differs from that described in the book, in
41 * that the locks are partially adaptive. Lock waiters spin wait while a
42 * lock is write held and the holder is still running on a CPU. The method
43 * of choosing which threads to awaken when a lock is released also differs,
44 * mainly to take account of the partially adaptive behaviour.
45 */
46
47 #include <sys/cdefs.h>
48 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.74 2023/10/04 20:39:35 ad Exp $");
49
50 #include "opt_lockdebug.h"
51
52 #define __RWLOCK_PRIVATE
53
54 #include <sys/param.h>
55 #include <sys/proc.h>
56 #include <sys/rwlock.h>
57 #include <sys/sched.h>
58 #include <sys/sleepq.h>
59 #include <sys/systm.h>
60 #include <sys/lockdebug.h>
61 #include <sys/cpu.h>
62 #include <sys/atomic.h>
63 #include <sys/lock.h>
64 #include <sys/pserialize.h>
65
66 #include <dev/lockstat.h>
67
68 #include <machine/rwlock.h>
69
70 /*
71 * LOCKDEBUG
72 */
73
74 #define RW_DEBUG_P(rw) (((rw)->rw_owner & RW_NODEBUG) == 0)
75
76 #define RW_WANTLOCK(rw, op) \
77 LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \
78 (uintptr_t)__builtin_return_address(0), op == RW_READER);
79 #define RW_LOCKED(rw, op) \
80 LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \
81 (uintptr_t)__builtin_return_address(0), op == RW_READER);
82 #define RW_UNLOCKED(rw, op) \
83 LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \
84 (uintptr_t)__builtin_return_address(0), op == RW_READER);
85
86 /*
87 * DIAGNOSTIC
88 */
89
90 #if defined(DIAGNOSTIC)
91 #define RW_ASSERT(rw, cond) \
92 do { \
93 if (__predict_false(!(cond))) \
94 rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\
95 } while (/* CONSTCOND */ 0)
96 #else
97 #define RW_ASSERT(rw, cond) /* nothing */
98 #endif /* DIAGNOSTIC */
99
100 /*
101 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
102 */
103 #ifdef LOCKDEBUG
104 #undef __HAVE_RW_STUBS
105 #endif
106
107 #ifndef __HAVE_RW_STUBS
108 __strong_alias(rw_enter,rw_vector_enter);
109 __strong_alias(rw_exit,rw_vector_exit);
110 __strong_alias(rw_tryenter,rw_vector_tryenter);
111 #endif
112
113 static void rw_abort(const char *, size_t, krwlock_t *, const char *);
114 static void rw_dump(const volatile void *, lockop_printer_t);
115 static lwp_t *rw_owner(wchan_t);
116
117 lockops_t rwlock_lockops = {
118 .lo_name = "Reader / writer lock",
119 .lo_type = LOCKOPS_SLEEP,
120 .lo_dump = rw_dump,
121 };
122
123 /*
124 * Give rwlock holders an extra-high priority boost on-blocking due to
125 * direct handoff. XXX To be revisited.
126 */
127 syncobj_t rw_syncobj = {
128 .sobj_name = "rwlock",
129 .sobj_flag = SOBJ_SLEEPQ_SORTED,
130 .sobj_boostpri = PRI_KTHREAD,
131 .sobj_unsleep = turnstile_unsleep,
132 .sobj_changepri = turnstile_changepri,
133 .sobj_lendpri = sleepq_lendpri,
134 .sobj_owner = rw_owner,
135 };
136
137 /*
138 * rw_cas:
139 *
140 * Do an atomic compare-and-swap on the lock word.
141 */
142 static inline uintptr_t
143 rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n)
144 {
145
146 return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner,
147 (void *)o, (void *)n);
148 }
149
150 /*
151 * rw_swap:
152 *
153 * Do an atomic swap of the lock word. This is used only when it's
154 * known that the lock word is set up such that it can't be changed
155 * behind us (assert this), so there's no point considering the result.
156 */
157 static inline void
158 rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n)
159 {
160
161 n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner,
162 (void *)n);
163
164 RW_ASSERT(rw, n == o);
165 RW_ASSERT(rw, (o & RW_HAS_WAITERS) != 0);
166 }
167
168 /*
169 * rw_dump:
170 *
171 * Dump the contents of a rwlock structure.
172 */
173 static void
174 rw_dump(const volatile void *cookie, lockop_printer_t pr)
175 {
176 const volatile krwlock_t *rw = cookie;
177
178 pr("owner/count : %#018lx flags : %#018x\n",
179 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
180 }
181
182 /*
183 * rw_abort:
184 *
185 * Dump information about an error and panic the system. This
186 * generates a lot of machine code in the DIAGNOSTIC case, so
187 * we ask the compiler to not inline it.
188 */
189 static void __noinline
190 rw_abort(const char *func, size_t line, krwlock_t *rw, const char *msg)
191 {
192
193 if (__predict_false(panicstr != NULL))
194 return;
195
196 LOCKDEBUG_ABORT(func, line, rw, &rwlock_lockops, msg);
197 }
198
199 /*
200 * rw_init:
201 *
202 * Initialize a rwlock for use.
203 */
204 void
205 _rw_init(krwlock_t *rw, uintptr_t return_address)
206 {
207
208 #ifdef LOCKDEBUG
209 /* XXX only because the assembly stubs can't handle RW_NODEBUG */
210 if (LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address))
211 rw->rw_owner = 0;
212 else
213 rw->rw_owner = RW_NODEBUG;
214 #else
215 rw->rw_owner = 0;
216 #endif
217 }
218
219 void
220 rw_init(krwlock_t *rw)
221 {
222
223 _rw_init(rw, (uintptr_t)__builtin_return_address(0));
224 }
225
226 /*
227 * rw_destroy:
228 *
229 * Tear down a rwlock.
230 */
231 void
232 rw_destroy(krwlock_t *rw)
233 {
234
235 RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0);
236 LOCKDEBUG_FREE((rw->rw_owner & RW_NODEBUG) == 0, rw);
237 }
238
239 /*
240 * rw_oncpu:
241 *
242 * Return true if an rwlock owner is running on a CPU in the system.
243 * If the target is waiting on the kernel big lock, then we must
244 * release it. This is necessary to avoid deadlock.
245 */
246 static bool
247 rw_oncpu(uintptr_t owner)
248 {
249 #ifdef MULTIPROCESSOR
250 struct cpu_info *ci;
251 lwp_t *l;
252
253 KASSERT(kpreempt_disabled());
254
255 if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) {
256 return false;
257 }
258
259 /*
260 * See lwp_dtor() why dereference of the LWP pointer is safe.
261 * We must have kernel preemption disabled for that.
262 */
263 l = (lwp_t *)(owner & RW_THREAD);
264 ci = l->l_cpu;
265
266 if (ci && ci->ci_curlwp == l) {
267 /* Target is running; do we need to block? */
268 return (ci->ci_biglock_wanted != l);
269 }
270 #endif
271 /* Not running. It may be safe to block now. */
272 return false;
273 }
274
275 /*
276 * rw_vector_enter:
277 *
278 * Acquire a rwlock.
279 */
280 void
281 rw_vector_enter(krwlock_t *rw, const krw_t op)
282 {
283 uintptr_t owner, incr, need_wait, set_wait, curthread, next;
284 turnstile_t *ts;
285 int queue;
286 lwp_t *l;
287 LOCKSTAT_TIMER(slptime);
288 LOCKSTAT_TIMER(slpcnt);
289 LOCKSTAT_TIMER(spintime);
290 LOCKSTAT_COUNTER(spincnt);
291 LOCKSTAT_FLAG(lsflag);
292
293 l = curlwp;
294 curthread = (uintptr_t)l;
295
296 RW_ASSERT(rw, !cpu_intr_p());
297 RW_ASSERT(rw, curthread != 0);
298 RW_WANTLOCK(rw, op);
299
300 if (__predict_true(panicstr == NULL)) {
301 KDASSERT(pserialize_not_in_read_section());
302 LOCKDEBUG_BARRIER(&kernel_lock, 1);
303 }
304
305 /*
306 * We play a slight trick here. If we're a reader, we want
307 * increment the read count. If we're a writer, we want to
308 * set the owner field and the WRITE_LOCKED bit.
309 *
310 * In the latter case, we expect those bits to be zero,
311 * therefore we can use an add operation to set them, which
312 * means an add operation for both cases.
313 */
314 if (__predict_true(op == RW_READER)) {
315 incr = RW_READ_INCR;
316 set_wait = RW_HAS_WAITERS;
317 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
318 queue = TS_READER_Q;
319 } else {
320 RW_ASSERT(rw, op == RW_WRITER);
321 incr = curthread | RW_WRITE_LOCKED;
322 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
323 need_wait = RW_WRITE_LOCKED | RW_THREAD;
324 queue = TS_WRITER_Q;
325 }
326
327 LOCKSTAT_ENTER(lsflag);
328
329 KPREEMPT_DISABLE(curlwp);
330 for (owner = rw->rw_owner;;) {
331 /*
332 * Read the lock owner field. If the need-to-wait
333 * indicator is clear, then try to acquire the lock.
334 */
335 if ((owner & need_wait) == 0) {
336 next = rw_cas(rw, owner, (owner + incr) &
337 ~RW_WRITE_WANTED);
338 if (__predict_true(next == owner)) {
339 /* Got it! */
340 membar_acquire();
341 break;
342 }
343
344 /*
345 * Didn't get it -- spin around again (we'll
346 * probably sleep on the next iteration).
347 */
348 owner = next;
349 continue;
350 }
351 if (__predict_false(RW_OWNER(rw) == curthread)) {
352 rw_abort(__func__, __LINE__, rw,
353 "locking against myself");
354 }
355 /*
356 * If the lock owner is running on another CPU, and
357 * there are no existing waiters, then spin.
358 */
359 if (rw_oncpu(owner)) {
360 LOCKSTAT_START_TIMER(lsflag, spintime);
361 u_int count = SPINLOCK_BACKOFF_MIN;
362 do {
363 KPREEMPT_ENABLE(curlwp);
364 SPINLOCK_BACKOFF(count);
365 KPREEMPT_DISABLE(curlwp);
366 owner = rw->rw_owner;
367 } while (rw_oncpu(owner));
368 LOCKSTAT_STOP_TIMER(lsflag, spintime);
369 LOCKSTAT_COUNT(spincnt, 1);
370 if ((owner & need_wait) == 0)
371 continue;
372 }
373
374 /*
375 * Grab the turnstile chain lock. Once we have that, we
376 * can adjust the waiter bits and sleep queue.
377 */
378 ts = turnstile_lookup(rw);
379
380 /*
381 * Mark the rwlock as having waiters. If the set fails,
382 * then we may not need to sleep and should spin again.
383 * Reload rw_owner because turnstile_lookup() may have
384 * spun on the turnstile chain lock.
385 */
386 owner = rw->rw_owner;
387 if ((owner & need_wait) == 0 || rw_oncpu(owner)) {
388 turnstile_exit(rw);
389 continue;
390 }
391 next = rw_cas(rw, owner, owner | set_wait);
392 /* XXX membar? */
393 if (__predict_false(next != owner)) {
394 turnstile_exit(rw);
395 owner = next;
396 continue;
397 }
398
399 LOCKSTAT_START_TIMER(lsflag, slptime);
400 turnstile_block(ts, queue, rw, &rw_syncobj);
401 LOCKSTAT_STOP_TIMER(lsflag, slptime);
402 LOCKSTAT_COUNT(slpcnt, 1);
403
404 /*
405 * No need for a memory barrier because of context switch.
406 * If not handed the lock, then spin again.
407 */
408 if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread)
409 break;
410
411 owner = rw->rw_owner;
412 }
413 KPREEMPT_ENABLE(curlwp);
414
415 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK |
416 (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime,
417 (l->l_rwcallsite != 0 ? l->l_rwcallsite :
418 (uintptr_t)__builtin_return_address(0)));
419 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime,
420 (l->l_rwcallsite != 0 ? l->l_rwcallsite :
421 (uintptr_t)__builtin_return_address(0)));
422 LOCKSTAT_EXIT(lsflag);
423
424 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
425 (op == RW_READER && RW_COUNT(rw) != 0));
426 RW_LOCKED(rw, op);
427 }
428
429 /*
430 * rw_vector_exit:
431 *
432 * Release a rwlock.
433 */
434 void
435 rw_vector_exit(krwlock_t *rw)
436 {
437 uintptr_t curthread, owner, decr, newown, next;
438 turnstile_t *ts;
439 int rcnt, wcnt;
440 lwp_t *l;
441
442 l = curlwp;
443 curthread = (uintptr_t)l;
444 RW_ASSERT(rw, curthread != 0);
445
446 /*
447 * Again, we use a trick. Since we used an add operation to
448 * set the required lock bits, we can use a subtract to clear
449 * them, which makes the read-release and write-release path
450 * the same.
451 */
452 owner = rw->rw_owner;
453 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
454 RW_UNLOCKED(rw, RW_WRITER);
455 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
456 decr = curthread | RW_WRITE_LOCKED;
457 } else {
458 RW_UNLOCKED(rw, RW_READER);
459 RW_ASSERT(rw, RW_COUNT(rw) != 0);
460 decr = RW_READ_INCR;
461 }
462
463 /*
464 * Compute what we expect the new value of the lock to be. Only
465 * proceed to do direct handoff if there are waiters, and if the
466 * lock would become unowned.
467 */
468 membar_release();
469 for (;;) {
470 newown = (owner - decr);
471 if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
472 break;
473 next = rw_cas(rw, owner, newown);
474 if (__predict_true(next == owner))
475 return;
476 owner = next;
477 }
478
479 /*
480 * Grab the turnstile chain lock. This gets the interlock
481 * on the sleep queue. Once we have that, we can adjust the
482 * waiter bits.
483 */
484 ts = turnstile_lookup(rw);
485 owner = rw->rw_owner;
486 RW_ASSERT(rw, ts != NULL);
487 RW_ASSERT(rw, (owner & RW_HAS_WAITERS) != 0);
488
489 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
490 rcnt = TS_WAITERS(ts, TS_READER_Q);
491
492 /*
493 * Give the lock away.
494 *
495 * If we are releasing a write lock, then prefer to wake all
496 * outstanding readers. Otherwise, wake one writer if there
497 * are outstanding readers, or all writers if there are no
498 * pending readers. If waking one specific writer, the writer
499 * is handed the lock here. If waking multiple writers, we
500 * set WRITE_WANTED to block out new readers, and let them
501 * do the work of acquiring the lock in rw_vector_enter().
502 */
503 if (rcnt == 0 || decr == RW_READ_INCR) {
504 RW_ASSERT(rw, wcnt != 0);
505 RW_ASSERT(rw, (owner & RW_WRITE_WANTED) != 0);
506
507 if (rcnt != 0) {
508 /* Give the lock to the longest waiting writer. */
509 l = TS_FIRST(ts, TS_WRITER_Q);
510 newown = (uintptr_t)l | (owner & RW_NODEBUG);
511 newown |= RW_WRITE_LOCKED | RW_HAS_WAITERS;
512 if (wcnt > 1)
513 newown |= RW_WRITE_WANTED;
514 rw_swap(rw, owner, newown);
515 turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
516 } else {
517 /* Wake all writers and let them fight it out. */
518 newown = owner & RW_NODEBUG;
519 newown |= RW_WRITE_WANTED;
520 rw_swap(rw, owner, newown);
521 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL);
522 }
523 } else {
524 RW_ASSERT(rw, rcnt != 0);
525
526 /*
527 * Give the lock to all blocked readers. If there
528 * is a writer waiting, new readers that arrive
529 * after the release will be blocked out.
530 */
531 newown = owner & RW_NODEBUG;
532 newown += rcnt << RW_READ_COUNT_SHIFT;
533 if (wcnt != 0)
534 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
535
536 /* Wake up all sleeping readers. */
537 rw_swap(rw, owner, newown);
538 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
539 }
540 }
541
542 /*
543 * rw_vector_tryenter:
544 *
545 * Try to acquire a rwlock.
546 */
547 int
548 rw_vector_tryenter(krwlock_t *rw, const krw_t op)
549 {
550 uintptr_t curthread, owner, incr, need_wait, next;
551 lwp_t *l;
552
553 l = curlwp;
554 curthread = (uintptr_t)l;
555
556 RW_ASSERT(rw, curthread != 0);
557
558 if (op == RW_READER) {
559 incr = RW_READ_INCR;
560 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
561 } else {
562 RW_ASSERT(rw, op == RW_WRITER);
563 incr = curthread | RW_WRITE_LOCKED;
564 need_wait = RW_WRITE_LOCKED | RW_THREAD;
565 }
566
567 for (owner = rw->rw_owner;; owner = next) {
568 if (__predict_false((owner & need_wait) != 0))
569 return 0;
570 next = rw_cas(rw, owner, owner + incr);
571 if (__predict_true(next == owner)) {
572 /* Got it! */
573 break;
574 }
575 }
576
577 RW_WANTLOCK(rw, op);
578 RW_LOCKED(rw, op);
579 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
580 (op == RW_READER && RW_COUNT(rw) != 0));
581
582 membar_acquire();
583 return 1;
584 }
585
586 /*
587 * rw_downgrade:
588 *
589 * Downgrade a write lock to a read lock.
590 */
591 void
592 rw_downgrade(krwlock_t *rw)
593 {
594 uintptr_t owner, newown, next, curthread __diagused;
595 turnstile_t *ts;
596 int rcnt, wcnt;
597 lwp_t *l;
598
599 l = curlwp;
600 curthread = (uintptr_t)l;
601 RW_ASSERT(rw, curthread != 0);
602 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
603 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
604 RW_UNLOCKED(rw, RW_WRITER);
605
606 membar_release();
607 for (owner = rw->rw_owner;; owner = next) {
608 /*
609 * If there are no waiters we can do this the easy way. Try
610 * swapping us down to one read hold. If it fails, the lock
611 * condition has changed and we most likely now have
612 * waiters.
613 */
614 if ((owner & RW_HAS_WAITERS) == 0) {
615 newown = (owner & RW_NODEBUG);
616 next = rw_cas(rw, owner, newown + RW_READ_INCR);
617 if (__predict_true(next == owner)) {
618 RW_LOCKED(rw, RW_READER);
619 RW_ASSERT(rw,
620 (rw->rw_owner & RW_WRITE_LOCKED) == 0);
621 RW_ASSERT(rw, RW_COUNT(rw) != 0);
622 return;
623 }
624 continue;
625 }
626
627 /*
628 * Grab the turnstile chain lock. This gets the interlock
629 * on the sleep queue. Once we have that, we can adjust the
630 * waiter bits.
631 */
632 ts = turnstile_lookup(rw);
633 RW_ASSERT(rw, ts != NULL);
634
635 rcnt = TS_WAITERS(ts, TS_READER_Q);
636 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
637
638 if (rcnt == 0) {
639 /*
640 * If there are no readers, just preserve the
641 * waiters bits, swap us down to one read hold and
642 * return.
643 */
644 RW_ASSERT(rw, wcnt != 0);
645 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
646 RW_ASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
647
648 newown = owner & RW_NODEBUG;
649 newown |= RW_READ_INCR | RW_HAS_WAITERS |
650 RW_WRITE_WANTED;
651 next = rw_cas(rw, owner, newown);
652 turnstile_exit(rw);
653 if (__predict_true(next == owner))
654 break;
655 } else {
656 /*
657 * Give the lock to all blocked readers. We may
658 * retain one read hold if downgrading. If there is
659 * a writer waiting, new readers will be blocked
660 * out.
661 */
662 newown = owner & RW_NODEBUG;
663 newown += (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
664 if (wcnt != 0)
665 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
666
667 next = rw_cas(rw, owner, newown);
668 if (__predict_true(next == owner)) {
669 /* Wake up all sleeping readers. */
670 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
671 break;
672 }
673 turnstile_exit(rw);
674 }
675 }
676
677 RW_WANTLOCK(rw, RW_READER);
678 RW_LOCKED(rw, RW_READER);
679 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
680 RW_ASSERT(rw, RW_COUNT(rw) != 0);
681 }
682
683 /*
684 * rw_tryupgrade:
685 *
686 * Try to upgrade a read lock to a write lock. We must be the only
687 * reader.
688 */
689 int
690 rw_tryupgrade(krwlock_t *rw)
691 {
692 uintptr_t owner, curthread, newown, next;
693 struct lwp *l;
694
695 l = curlwp;
696 curthread = (uintptr_t)l;
697 RW_ASSERT(rw, curthread != 0);
698 RW_ASSERT(rw, rw_read_held(rw));
699
700 for (owner = RW_READ_INCR;; owner = next) {
701 newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
702 next = rw_cas(rw, owner, newown);
703 if (__predict_true(next == owner)) {
704 membar_acquire();
705 break;
706 }
707 RW_ASSERT(rw, (next & RW_WRITE_LOCKED) == 0);
708 if (__predict_false((next & RW_THREAD) != RW_READ_INCR)) {
709 RW_ASSERT(rw, (next & RW_THREAD) != 0);
710 return 0;
711 }
712 }
713
714 RW_UNLOCKED(rw, RW_READER);
715 RW_WANTLOCK(rw, RW_WRITER);
716 RW_LOCKED(rw, RW_WRITER);
717 RW_ASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
718 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
719
720 return 1;
721 }
722
723 /*
724 * rw_read_held:
725 *
726 * Returns true if the rwlock is held for reading. Must only be
727 * used for diagnostic assertions, and never be used to make
728 * decisions about how to use a rwlock.
729 */
730 int
731 rw_read_held(krwlock_t *rw)
732 {
733 uintptr_t owner;
734
735 if (rw == NULL)
736 return 0;
737 owner = rw->rw_owner;
738 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
739 }
740
741 /*
742 * rw_write_held:
743 *
744 * Returns true if the rwlock is held for writing. Must only be
745 * used for diagnostic assertions, and never be used to make
746 * decisions about how to use a rwlock.
747 */
748 int
749 rw_write_held(krwlock_t *rw)
750 {
751
752 if (rw == NULL)
753 return 0;
754 return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) ==
755 (RW_WRITE_LOCKED | (uintptr_t)curlwp);
756 }
757
758 /*
759 * rw_lock_held:
760 *
761 * Returns true if the rwlock is held for reading or writing. Must
762 * only be used for diagnostic assertions, and never be used to make
763 * decisions about how to use a rwlock.
764 */
765 int
766 rw_lock_held(krwlock_t *rw)
767 {
768
769 if (rw == NULL)
770 return 0;
771 return (rw->rw_owner & RW_THREAD) != 0;
772 }
773
774 /*
775 * rw_lock_op:
776 *
777 * For a rwlock that is known to be held by the caller, return
778 * RW_READER or RW_WRITER to describe the hold type.
779 */
780 krw_t
781 rw_lock_op(krwlock_t *rw)
782 {
783
784 RW_ASSERT(rw, rw_lock_held(rw));
785
786 return (rw->rw_owner & RW_WRITE_LOCKED) != 0 ? RW_WRITER : RW_READER;
787 }
788
789 /*
790 * rw_owner:
791 *
792 * Return the current owner of an RW lock, but only if it is write
793 * held. Used for priority inheritance.
794 */
795 static lwp_t *
796 rw_owner(wchan_t obj)
797 {
798 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
799 uintptr_t owner = rw->rw_owner;
800
801 if ((owner & RW_WRITE_LOCKED) == 0)
802 return NULL;
803
804 return (void *)(owner & RW_THREAD);
805 }
806