kern_rwlock.c revision 1.75 1 /* $NetBSD: kern_rwlock.c,v 1.75 2023/10/15 10:27:11 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020, 2023
5 * The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe and Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Kernel reader/writer lock implementation, modeled after those
35 * found in Solaris, a description of which can be found in:
36 *
37 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
38 * Richard McDougall.
39 *
40 * The NetBSD implementation differs from that described in the book, in
41 * that the locks are partially adaptive. Lock waiters spin wait while a
42 * lock is write held and the holder is still running on a CPU. The method
43 * of choosing which threads to awaken when a lock is released also differs,
44 * mainly to take account of the partially adaptive behaviour.
45 */
46
47 #include <sys/cdefs.h>
48 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.75 2023/10/15 10:27:11 riastradh Exp $");
49
50 #include "opt_lockdebug.h"
51
52 #define __RWLOCK_PRIVATE
53
54 #include <sys/param.h>
55 #include <sys/proc.h>
56 #include <sys/rwlock.h>
57 #include <sys/sched.h>
58 #include <sys/sleepq.h>
59 #include <sys/systm.h>
60 #include <sys/lockdebug.h>
61 #include <sys/cpu.h>
62 #include <sys/atomic.h>
63 #include <sys/lock.h>
64 #include <sys/pserialize.h>
65 #include <sys/syncobj.h>
66
67 #include <dev/lockstat.h>
68
69 #include <machine/rwlock.h>
70
71 /*
72 * LOCKDEBUG
73 */
74
75 #define RW_DEBUG_P(rw) (((rw)->rw_owner & RW_NODEBUG) == 0)
76
77 #define RW_WANTLOCK(rw, op) \
78 LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \
79 (uintptr_t)__builtin_return_address(0), op == RW_READER);
80 #define RW_LOCKED(rw, op) \
81 LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \
82 (uintptr_t)__builtin_return_address(0), op == RW_READER);
83 #define RW_UNLOCKED(rw, op) \
84 LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \
85 (uintptr_t)__builtin_return_address(0), op == RW_READER);
86
87 /*
88 * DIAGNOSTIC
89 */
90
91 #if defined(DIAGNOSTIC)
92 #define RW_ASSERT(rw, cond) \
93 do { \
94 if (__predict_false(!(cond))) \
95 rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\
96 } while (/* CONSTCOND */ 0)
97 #else
98 #define RW_ASSERT(rw, cond) /* nothing */
99 #endif /* DIAGNOSTIC */
100
101 /*
102 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
103 */
104 #ifdef LOCKDEBUG
105 #undef __HAVE_RW_STUBS
106 #endif
107
108 #ifndef __HAVE_RW_STUBS
109 __strong_alias(rw_enter,rw_vector_enter);
110 __strong_alias(rw_exit,rw_vector_exit);
111 __strong_alias(rw_tryenter,rw_vector_tryenter);
112 #endif
113
114 static void rw_abort(const char *, size_t, krwlock_t *, const char *);
115 static void rw_dump(const volatile void *, lockop_printer_t);
116 static lwp_t *rw_owner(wchan_t);
117
118 lockops_t rwlock_lockops = {
119 .lo_name = "Reader / writer lock",
120 .lo_type = LOCKOPS_SLEEP,
121 .lo_dump = rw_dump,
122 };
123
124 /*
125 * Give rwlock holders an extra-high priority boost on-blocking due to
126 * direct handoff. XXX To be revisited.
127 */
128 syncobj_t rw_syncobj = {
129 .sobj_name = "rwlock",
130 .sobj_flag = SOBJ_SLEEPQ_SORTED,
131 .sobj_boostpri = PRI_KTHREAD,
132 .sobj_unsleep = turnstile_unsleep,
133 .sobj_changepri = turnstile_changepri,
134 .sobj_lendpri = sleepq_lendpri,
135 .sobj_owner = rw_owner,
136 };
137
138 /*
139 * rw_cas:
140 *
141 * Do an atomic compare-and-swap on the lock word.
142 */
143 static inline uintptr_t
144 rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n)
145 {
146
147 return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner,
148 (void *)o, (void *)n);
149 }
150
151 /*
152 * rw_swap:
153 *
154 * Do an atomic swap of the lock word. This is used only when it's
155 * known that the lock word is set up such that it can't be changed
156 * behind us (assert this), so there's no point considering the result.
157 */
158 static inline void
159 rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n)
160 {
161
162 n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner,
163 (void *)n);
164
165 RW_ASSERT(rw, n == o);
166 RW_ASSERT(rw, (o & RW_HAS_WAITERS) != 0);
167 }
168
169 /*
170 * rw_dump:
171 *
172 * Dump the contents of a rwlock structure.
173 */
174 static void
175 rw_dump(const volatile void *cookie, lockop_printer_t pr)
176 {
177 const volatile krwlock_t *rw = cookie;
178
179 pr("owner/count : %#018lx flags : %#018x\n",
180 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
181 }
182
183 /*
184 * rw_abort:
185 *
186 * Dump information about an error and panic the system. This
187 * generates a lot of machine code in the DIAGNOSTIC case, so
188 * we ask the compiler to not inline it.
189 */
190 static void __noinline
191 rw_abort(const char *func, size_t line, krwlock_t *rw, const char *msg)
192 {
193
194 if (__predict_false(panicstr != NULL))
195 return;
196
197 LOCKDEBUG_ABORT(func, line, rw, &rwlock_lockops, msg);
198 }
199
200 /*
201 * rw_init:
202 *
203 * Initialize a rwlock for use.
204 */
205 void
206 _rw_init(krwlock_t *rw, uintptr_t return_address)
207 {
208
209 #ifdef LOCKDEBUG
210 /* XXX only because the assembly stubs can't handle RW_NODEBUG */
211 if (LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address))
212 rw->rw_owner = 0;
213 else
214 rw->rw_owner = RW_NODEBUG;
215 #else
216 rw->rw_owner = 0;
217 #endif
218 }
219
220 void
221 rw_init(krwlock_t *rw)
222 {
223
224 _rw_init(rw, (uintptr_t)__builtin_return_address(0));
225 }
226
227 /*
228 * rw_destroy:
229 *
230 * Tear down a rwlock.
231 */
232 void
233 rw_destroy(krwlock_t *rw)
234 {
235
236 RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0);
237 LOCKDEBUG_FREE((rw->rw_owner & RW_NODEBUG) == 0, rw);
238 }
239
240 /*
241 * rw_oncpu:
242 *
243 * Return true if an rwlock owner is running on a CPU in the system.
244 * If the target is waiting on the kernel big lock, then we must
245 * release it. This is necessary to avoid deadlock.
246 */
247 static bool
248 rw_oncpu(uintptr_t owner)
249 {
250 #ifdef MULTIPROCESSOR
251 struct cpu_info *ci;
252 lwp_t *l;
253
254 KASSERT(kpreempt_disabled());
255
256 if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) {
257 return false;
258 }
259
260 /*
261 * See lwp_dtor() why dereference of the LWP pointer is safe.
262 * We must have kernel preemption disabled for that.
263 */
264 l = (lwp_t *)(owner & RW_THREAD);
265 ci = l->l_cpu;
266
267 if (ci && ci->ci_curlwp == l) {
268 /* Target is running; do we need to block? */
269 return (ci->ci_biglock_wanted != l);
270 }
271 #endif
272 /* Not running. It may be safe to block now. */
273 return false;
274 }
275
276 /*
277 * rw_vector_enter:
278 *
279 * Acquire a rwlock.
280 */
281 void
282 rw_vector_enter(krwlock_t *rw, const krw_t op)
283 {
284 uintptr_t owner, incr, need_wait, set_wait, curthread, next;
285 turnstile_t *ts;
286 int queue;
287 lwp_t *l;
288 LOCKSTAT_TIMER(slptime);
289 LOCKSTAT_TIMER(slpcnt);
290 LOCKSTAT_TIMER(spintime);
291 LOCKSTAT_COUNTER(spincnt);
292 LOCKSTAT_FLAG(lsflag);
293
294 l = curlwp;
295 curthread = (uintptr_t)l;
296
297 RW_ASSERT(rw, !cpu_intr_p());
298 RW_ASSERT(rw, curthread != 0);
299 RW_WANTLOCK(rw, op);
300
301 if (__predict_true(panicstr == NULL)) {
302 KDASSERT(pserialize_not_in_read_section());
303 LOCKDEBUG_BARRIER(&kernel_lock, 1);
304 }
305
306 /*
307 * We play a slight trick here. If we're a reader, we want
308 * increment the read count. If we're a writer, we want to
309 * set the owner field and the WRITE_LOCKED bit.
310 *
311 * In the latter case, we expect those bits to be zero,
312 * therefore we can use an add operation to set them, which
313 * means an add operation for both cases.
314 */
315 if (__predict_true(op == RW_READER)) {
316 incr = RW_READ_INCR;
317 set_wait = RW_HAS_WAITERS;
318 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
319 queue = TS_READER_Q;
320 } else {
321 RW_ASSERT(rw, op == RW_WRITER);
322 incr = curthread | RW_WRITE_LOCKED;
323 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
324 need_wait = RW_WRITE_LOCKED | RW_THREAD;
325 queue = TS_WRITER_Q;
326 }
327
328 LOCKSTAT_ENTER(lsflag);
329
330 KPREEMPT_DISABLE(curlwp);
331 for (owner = rw->rw_owner;;) {
332 /*
333 * Read the lock owner field. If the need-to-wait
334 * indicator is clear, then try to acquire the lock.
335 */
336 if ((owner & need_wait) == 0) {
337 next = rw_cas(rw, owner, (owner + incr) &
338 ~RW_WRITE_WANTED);
339 if (__predict_true(next == owner)) {
340 /* Got it! */
341 membar_acquire();
342 break;
343 }
344
345 /*
346 * Didn't get it -- spin around again (we'll
347 * probably sleep on the next iteration).
348 */
349 owner = next;
350 continue;
351 }
352 if (__predict_false(RW_OWNER(rw) == curthread)) {
353 rw_abort(__func__, __LINE__, rw,
354 "locking against myself");
355 }
356 /*
357 * If the lock owner is running on another CPU, and
358 * there are no existing waiters, then spin.
359 */
360 if (rw_oncpu(owner)) {
361 LOCKSTAT_START_TIMER(lsflag, spintime);
362 u_int count = SPINLOCK_BACKOFF_MIN;
363 do {
364 KPREEMPT_ENABLE(curlwp);
365 SPINLOCK_BACKOFF(count);
366 KPREEMPT_DISABLE(curlwp);
367 owner = rw->rw_owner;
368 } while (rw_oncpu(owner));
369 LOCKSTAT_STOP_TIMER(lsflag, spintime);
370 LOCKSTAT_COUNT(spincnt, 1);
371 if ((owner & need_wait) == 0)
372 continue;
373 }
374
375 /*
376 * Grab the turnstile chain lock. Once we have that, we
377 * can adjust the waiter bits and sleep queue.
378 */
379 ts = turnstile_lookup(rw);
380
381 /*
382 * Mark the rwlock as having waiters. If the set fails,
383 * then we may not need to sleep and should spin again.
384 * Reload rw_owner because turnstile_lookup() may have
385 * spun on the turnstile chain lock.
386 */
387 owner = rw->rw_owner;
388 if ((owner & need_wait) == 0 || rw_oncpu(owner)) {
389 turnstile_exit(rw);
390 continue;
391 }
392 next = rw_cas(rw, owner, owner | set_wait);
393 /* XXX membar? */
394 if (__predict_false(next != owner)) {
395 turnstile_exit(rw);
396 owner = next;
397 continue;
398 }
399
400 LOCKSTAT_START_TIMER(lsflag, slptime);
401 turnstile_block(ts, queue, rw, &rw_syncobj);
402 LOCKSTAT_STOP_TIMER(lsflag, slptime);
403 LOCKSTAT_COUNT(slpcnt, 1);
404
405 /*
406 * No need for a memory barrier because of context switch.
407 * If not handed the lock, then spin again.
408 */
409 if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread)
410 break;
411
412 owner = rw->rw_owner;
413 }
414 KPREEMPT_ENABLE(curlwp);
415
416 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK |
417 (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime,
418 (l->l_rwcallsite != 0 ? l->l_rwcallsite :
419 (uintptr_t)__builtin_return_address(0)));
420 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime,
421 (l->l_rwcallsite != 0 ? l->l_rwcallsite :
422 (uintptr_t)__builtin_return_address(0)));
423 LOCKSTAT_EXIT(lsflag);
424
425 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
426 (op == RW_READER && RW_COUNT(rw) != 0));
427 RW_LOCKED(rw, op);
428 }
429
430 /*
431 * rw_vector_exit:
432 *
433 * Release a rwlock.
434 */
435 void
436 rw_vector_exit(krwlock_t *rw)
437 {
438 uintptr_t curthread, owner, decr, newown, next;
439 turnstile_t *ts;
440 int rcnt, wcnt;
441 lwp_t *l;
442
443 l = curlwp;
444 curthread = (uintptr_t)l;
445 RW_ASSERT(rw, curthread != 0);
446
447 /*
448 * Again, we use a trick. Since we used an add operation to
449 * set the required lock bits, we can use a subtract to clear
450 * them, which makes the read-release and write-release path
451 * the same.
452 */
453 owner = rw->rw_owner;
454 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
455 RW_UNLOCKED(rw, RW_WRITER);
456 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
457 decr = curthread | RW_WRITE_LOCKED;
458 } else {
459 RW_UNLOCKED(rw, RW_READER);
460 RW_ASSERT(rw, RW_COUNT(rw) != 0);
461 decr = RW_READ_INCR;
462 }
463
464 /*
465 * Compute what we expect the new value of the lock to be. Only
466 * proceed to do direct handoff if there are waiters, and if the
467 * lock would become unowned.
468 */
469 membar_release();
470 for (;;) {
471 newown = (owner - decr);
472 if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
473 break;
474 next = rw_cas(rw, owner, newown);
475 if (__predict_true(next == owner))
476 return;
477 owner = next;
478 }
479
480 /*
481 * Grab the turnstile chain lock. This gets the interlock
482 * on the sleep queue. Once we have that, we can adjust the
483 * waiter bits.
484 */
485 ts = turnstile_lookup(rw);
486 owner = rw->rw_owner;
487 RW_ASSERT(rw, ts != NULL);
488 RW_ASSERT(rw, (owner & RW_HAS_WAITERS) != 0);
489
490 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
491 rcnt = TS_WAITERS(ts, TS_READER_Q);
492
493 /*
494 * Give the lock away.
495 *
496 * If we are releasing a write lock, then prefer to wake all
497 * outstanding readers. Otherwise, wake one writer if there
498 * are outstanding readers, or all writers if there are no
499 * pending readers. If waking one specific writer, the writer
500 * is handed the lock here. If waking multiple writers, we
501 * set WRITE_WANTED to block out new readers, and let them
502 * do the work of acquiring the lock in rw_vector_enter().
503 */
504 if (rcnt == 0 || decr == RW_READ_INCR) {
505 RW_ASSERT(rw, wcnt != 0);
506 RW_ASSERT(rw, (owner & RW_WRITE_WANTED) != 0);
507
508 if (rcnt != 0) {
509 /* Give the lock to the longest waiting writer. */
510 l = TS_FIRST(ts, TS_WRITER_Q);
511 newown = (uintptr_t)l | (owner & RW_NODEBUG);
512 newown |= RW_WRITE_LOCKED | RW_HAS_WAITERS;
513 if (wcnt > 1)
514 newown |= RW_WRITE_WANTED;
515 rw_swap(rw, owner, newown);
516 turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
517 } else {
518 /* Wake all writers and let them fight it out. */
519 newown = owner & RW_NODEBUG;
520 newown |= RW_WRITE_WANTED;
521 rw_swap(rw, owner, newown);
522 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL);
523 }
524 } else {
525 RW_ASSERT(rw, rcnt != 0);
526
527 /*
528 * Give the lock to all blocked readers. If there
529 * is a writer waiting, new readers that arrive
530 * after the release will be blocked out.
531 */
532 newown = owner & RW_NODEBUG;
533 newown += rcnt << RW_READ_COUNT_SHIFT;
534 if (wcnt != 0)
535 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
536
537 /* Wake up all sleeping readers. */
538 rw_swap(rw, owner, newown);
539 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
540 }
541 }
542
543 /*
544 * rw_vector_tryenter:
545 *
546 * Try to acquire a rwlock.
547 */
548 int
549 rw_vector_tryenter(krwlock_t *rw, const krw_t op)
550 {
551 uintptr_t curthread, owner, incr, need_wait, next;
552 lwp_t *l;
553
554 l = curlwp;
555 curthread = (uintptr_t)l;
556
557 RW_ASSERT(rw, curthread != 0);
558
559 if (op == RW_READER) {
560 incr = RW_READ_INCR;
561 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
562 } else {
563 RW_ASSERT(rw, op == RW_WRITER);
564 incr = curthread | RW_WRITE_LOCKED;
565 need_wait = RW_WRITE_LOCKED | RW_THREAD;
566 }
567
568 for (owner = rw->rw_owner;; owner = next) {
569 if (__predict_false((owner & need_wait) != 0))
570 return 0;
571 next = rw_cas(rw, owner, owner + incr);
572 if (__predict_true(next == owner)) {
573 /* Got it! */
574 break;
575 }
576 }
577
578 RW_WANTLOCK(rw, op);
579 RW_LOCKED(rw, op);
580 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
581 (op == RW_READER && RW_COUNT(rw) != 0));
582
583 membar_acquire();
584 return 1;
585 }
586
587 /*
588 * rw_downgrade:
589 *
590 * Downgrade a write lock to a read lock.
591 */
592 void
593 rw_downgrade(krwlock_t *rw)
594 {
595 uintptr_t owner, newown, next, curthread __diagused;
596 turnstile_t *ts;
597 int rcnt, wcnt;
598 lwp_t *l;
599
600 l = curlwp;
601 curthread = (uintptr_t)l;
602 RW_ASSERT(rw, curthread != 0);
603 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
604 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
605 RW_UNLOCKED(rw, RW_WRITER);
606
607 membar_release();
608 for (owner = rw->rw_owner;; owner = next) {
609 /*
610 * If there are no waiters we can do this the easy way. Try
611 * swapping us down to one read hold. If it fails, the lock
612 * condition has changed and we most likely now have
613 * waiters.
614 */
615 if ((owner & RW_HAS_WAITERS) == 0) {
616 newown = (owner & RW_NODEBUG);
617 next = rw_cas(rw, owner, newown + RW_READ_INCR);
618 if (__predict_true(next == owner)) {
619 RW_LOCKED(rw, RW_READER);
620 RW_ASSERT(rw,
621 (rw->rw_owner & RW_WRITE_LOCKED) == 0);
622 RW_ASSERT(rw, RW_COUNT(rw) != 0);
623 return;
624 }
625 continue;
626 }
627
628 /*
629 * Grab the turnstile chain lock. This gets the interlock
630 * on the sleep queue. Once we have that, we can adjust the
631 * waiter bits.
632 */
633 ts = turnstile_lookup(rw);
634 RW_ASSERT(rw, ts != NULL);
635
636 rcnt = TS_WAITERS(ts, TS_READER_Q);
637 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
638
639 if (rcnt == 0) {
640 /*
641 * If there are no readers, just preserve the
642 * waiters bits, swap us down to one read hold and
643 * return.
644 */
645 RW_ASSERT(rw, wcnt != 0);
646 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
647 RW_ASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
648
649 newown = owner & RW_NODEBUG;
650 newown |= RW_READ_INCR | RW_HAS_WAITERS |
651 RW_WRITE_WANTED;
652 next = rw_cas(rw, owner, newown);
653 turnstile_exit(rw);
654 if (__predict_true(next == owner))
655 break;
656 } else {
657 /*
658 * Give the lock to all blocked readers. We may
659 * retain one read hold if downgrading. If there is
660 * a writer waiting, new readers will be blocked
661 * out.
662 */
663 newown = owner & RW_NODEBUG;
664 newown += (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
665 if (wcnt != 0)
666 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
667
668 next = rw_cas(rw, owner, newown);
669 if (__predict_true(next == owner)) {
670 /* Wake up all sleeping readers. */
671 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
672 break;
673 }
674 turnstile_exit(rw);
675 }
676 }
677
678 RW_WANTLOCK(rw, RW_READER);
679 RW_LOCKED(rw, RW_READER);
680 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
681 RW_ASSERT(rw, RW_COUNT(rw) != 0);
682 }
683
684 /*
685 * rw_tryupgrade:
686 *
687 * Try to upgrade a read lock to a write lock. We must be the only
688 * reader.
689 */
690 int
691 rw_tryupgrade(krwlock_t *rw)
692 {
693 uintptr_t owner, curthread, newown, next;
694 struct lwp *l;
695
696 l = curlwp;
697 curthread = (uintptr_t)l;
698 RW_ASSERT(rw, curthread != 0);
699 RW_ASSERT(rw, rw_read_held(rw));
700
701 for (owner = RW_READ_INCR;; owner = next) {
702 newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
703 next = rw_cas(rw, owner, newown);
704 if (__predict_true(next == owner)) {
705 membar_acquire();
706 break;
707 }
708 RW_ASSERT(rw, (next & RW_WRITE_LOCKED) == 0);
709 if (__predict_false((next & RW_THREAD) != RW_READ_INCR)) {
710 RW_ASSERT(rw, (next & RW_THREAD) != 0);
711 return 0;
712 }
713 }
714
715 RW_UNLOCKED(rw, RW_READER);
716 RW_WANTLOCK(rw, RW_WRITER);
717 RW_LOCKED(rw, RW_WRITER);
718 RW_ASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
719 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
720
721 return 1;
722 }
723
724 /*
725 * rw_read_held:
726 *
727 * Returns true if the rwlock is held for reading. Must only be
728 * used for diagnostic assertions, and never be used to make
729 * decisions about how to use a rwlock.
730 */
731 int
732 rw_read_held(krwlock_t *rw)
733 {
734 uintptr_t owner;
735
736 if (rw == NULL)
737 return 0;
738 owner = rw->rw_owner;
739 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
740 }
741
742 /*
743 * rw_write_held:
744 *
745 * Returns true if the rwlock is held for writing. Must only be
746 * used for diagnostic assertions, and never be used to make
747 * decisions about how to use a rwlock.
748 */
749 int
750 rw_write_held(krwlock_t *rw)
751 {
752
753 if (rw == NULL)
754 return 0;
755 return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) ==
756 (RW_WRITE_LOCKED | (uintptr_t)curlwp);
757 }
758
759 /*
760 * rw_lock_held:
761 *
762 * Returns true if the rwlock is held for reading or writing. Must
763 * only be used for diagnostic assertions, and never be used to make
764 * decisions about how to use a rwlock.
765 */
766 int
767 rw_lock_held(krwlock_t *rw)
768 {
769
770 if (rw == NULL)
771 return 0;
772 return (rw->rw_owner & RW_THREAD) != 0;
773 }
774
775 /*
776 * rw_lock_op:
777 *
778 * For a rwlock that is known to be held by the caller, return
779 * RW_READER or RW_WRITER to describe the hold type.
780 */
781 krw_t
782 rw_lock_op(krwlock_t *rw)
783 {
784
785 RW_ASSERT(rw, rw_lock_held(rw));
786
787 return (rw->rw_owner & RW_WRITE_LOCKED) != 0 ? RW_WRITER : RW_READER;
788 }
789
790 /*
791 * rw_owner:
792 *
793 * Return the current owner of an RW lock, but only if it is write
794 * held. Used for priority inheritance.
795 */
796 static lwp_t *
797 rw_owner(wchan_t obj)
798 {
799 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
800 uintptr_t owner = rw->rw_owner;
801
802 if ((owner & RW_WRITE_LOCKED) == 0)
803 return NULL;
804
805 return (void *)(owner & RW_THREAD);
806 }
807