kern_rwlock.c revision 1.61 1 /* $NetBSD: kern_rwlock.c,v 1.61 2020/01/19 18:34:24 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020
5 * The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe and Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Kernel reader/writer lock implementation, modeled after those
35 * found in Solaris, a description of which can be found in:
36 *
37 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
38 * Richard McDougall.
39 */
40
41 #include <sys/cdefs.h>
42 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.61 2020/01/19 18:34:24 ad Exp $");
43
44 #include "opt_lockdebug.h"
45
46 #define __RWLOCK_PRIVATE
47
48 #include <sys/param.h>
49 #include <sys/proc.h>
50 #include <sys/rwlock.h>
51 #include <sys/sched.h>
52 #include <sys/sleepq.h>
53 #include <sys/systm.h>
54 #include <sys/lockdebug.h>
55 #include <sys/cpu.h>
56 #include <sys/atomic.h>
57 #include <sys/lock.h>
58 #include <sys/pserialize.h>
59
60 #include <dev/lockstat.h>
61
62 #include <machine/rwlock.h>
63
64 /*
65 * LOCKDEBUG
66 */
67
68 #define RW_DEBUG_P(rw) (((rw)->rw_owner & RW_NODEBUG) == 0)
69
70 #define RW_WANTLOCK(rw, op) \
71 LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \
72 (uintptr_t)__builtin_return_address(0), op == RW_READER);
73 #define RW_LOCKED(rw, op) \
74 LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \
75 (uintptr_t)__builtin_return_address(0), op == RW_READER);
76 #define RW_UNLOCKED(rw, op) \
77 LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \
78 (uintptr_t)__builtin_return_address(0), op == RW_READER);
79
80 /*
81 * DIAGNOSTIC
82 */
83
84 #if defined(DIAGNOSTIC)
85 #define RW_ASSERT(rw, cond) \
86 do { \
87 if (__predict_false(!(cond))) \
88 rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\
89 } while (/* CONSTCOND */ 0)
90 #else
91 #define RW_ASSERT(rw, cond) /* nothing */
92 #endif /* DIAGNOSTIC */
93
94 /*
95 * Memory barriers.
96 */
97 #ifdef __HAVE_ATOMIC_AS_MEMBAR
98 #define RW_MEMBAR_ENTER()
99 #define RW_MEMBAR_EXIT()
100 #define RW_MEMBAR_PRODUCER()
101 #else
102 #define RW_MEMBAR_ENTER() membar_enter()
103 #define RW_MEMBAR_EXIT() membar_exit()
104 #define RW_MEMBAR_PRODUCER() membar_producer()
105 #endif
106
107 /*
108 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
109 */
110 #ifdef LOCKDEBUG
111 #undef __HAVE_RW_STUBS
112 #endif
113
114 #ifndef __HAVE_RW_STUBS
115 __strong_alias(rw_enter,rw_vector_enter);
116 __strong_alias(rw_exit,rw_vector_exit);
117 __strong_alias(rw_tryenter,rw_vector_tryenter);
118 #endif
119
120 static void rw_abort(const char *, size_t, krwlock_t *, const char *);
121 static void rw_dump(const volatile void *, lockop_printer_t);
122 static lwp_t *rw_owner(wchan_t);
123
124 lockops_t rwlock_lockops = {
125 .lo_name = "Reader / writer lock",
126 .lo_type = LOCKOPS_SLEEP,
127 .lo_dump = rw_dump,
128 };
129
130 syncobj_t rw_syncobj = {
131 .sobj_flag = SOBJ_SLEEPQ_SORTED,
132 .sobj_unsleep = turnstile_unsleep,
133 .sobj_changepri = turnstile_changepri,
134 .sobj_lendpri = sleepq_lendpri,
135 .sobj_owner = rw_owner,
136 };
137
138 /*
139 * rw_cas:
140 *
141 * Do an atomic compare-and-swap on the lock word.
142 */
143 static inline uintptr_t
144 rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n)
145 {
146
147 return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner,
148 (void *)o, (void *)n);
149 }
150
151 /*
152 * rw_swap:
153 *
154 * Do an atomic swap of the lock word. This is used only when it's
155 * known that the lock word is set up such that it can't be changed
156 * behind us (assert this), so there's no point considering the result.
157 */
158 static inline void
159 rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n)
160 {
161
162 n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner,
163 (void *)n);
164
165 RW_ASSERT(rw, n == o);
166 RW_ASSERT(rw, (o & RW_HAS_WAITERS) != 0);
167 }
168
169 /*
170 * rw_dump:
171 *
172 * Dump the contents of a rwlock structure.
173 */
174 static void
175 rw_dump(const volatile void *cookie, lockop_printer_t pr)
176 {
177 const volatile krwlock_t *rw = cookie;
178
179 pr("owner/count : %#018lx flags : %#018x\n",
180 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
181 }
182
183 /*
184 * rw_abort:
185 *
186 * Dump information about an error and panic the system. This
187 * generates a lot of machine code in the DIAGNOSTIC case, so
188 * we ask the compiler to not inline it.
189 */
190 static void __noinline
191 rw_abort(const char *func, size_t line, krwlock_t *rw, const char *msg)
192 {
193
194 if (panicstr != NULL)
195 return;
196
197 LOCKDEBUG_ABORT(func, line, rw, &rwlock_lockops, msg);
198 }
199
200 /*
201 * rw_init:
202 *
203 * Initialize a rwlock for use.
204 */
205 void
206 _rw_init(krwlock_t *rw, uintptr_t return_address)
207 {
208
209 if (LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address))
210 rw->rw_owner = 0;
211 else
212 rw->rw_owner = RW_NODEBUG;
213 }
214
215 void
216 rw_init(krwlock_t *rw)
217 {
218
219 _rw_init(rw, (uintptr_t)__builtin_return_address(0));
220 }
221
222 /*
223 * rw_destroy:
224 *
225 * Tear down a rwlock.
226 */
227 void
228 rw_destroy(krwlock_t *rw)
229 {
230
231 RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0);
232 LOCKDEBUG_FREE((rw->rw_owner & RW_NODEBUG) == 0, rw);
233 }
234
235 /*
236 * rw_oncpu:
237 *
238 * Return true if an rwlock owner is running on a CPU in the system.
239 * If the target is waiting on the kernel big lock, then we must
240 * release it. This is necessary to avoid deadlock.
241 */
242 static bool
243 rw_oncpu(uintptr_t owner)
244 {
245 #ifdef MULTIPROCESSOR
246 struct cpu_info *ci;
247 lwp_t *l;
248
249 KASSERT(kpreempt_disabled());
250
251 if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) {
252 return false;
253 }
254
255 /*
256 * See lwp_dtor() why dereference of the LWP pointer is safe.
257 * We must have kernel preemption disabled for that.
258 */
259 l = (lwp_t *)(owner & RW_THREAD);
260 ci = l->l_cpu;
261
262 if (ci && ci->ci_curlwp == l) {
263 /* Target is running; do we need to block? */
264 return (ci->ci_biglock_wanted != l);
265 }
266 #endif
267 /* Not running. It may be safe to block now. */
268 return false;
269 }
270
271 /*
272 * rw_vector_enter:
273 *
274 * Acquire a rwlock.
275 */
276 void
277 rw_vector_enter(krwlock_t *rw, const krw_t op)
278 {
279 uintptr_t owner, incr, need_wait, set_wait, curthread, next;
280 turnstile_t *ts;
281 int queue;
282 lwp_t *l;
283 LOCKSTAT_TIMER(slptime);
284 LOCKSTAT_TIMER(slpcnt);
285 LOCKSTAT_TIMER(spintime);
286 LOCKSTAT_COUNTER(spincnt);
287 LOCKSTAT_FLAG(lsflag);
288
289 l = curlwp;
290 curthread = (uintptr_t)l;
291
292 RW_ASSERT(rw, !cpu_intr_p());
293 RW_ASSERT(rw, curthread != 0);
294 RW_WANTLOCK(rw, op);
295
296 if (panicstr == NULL) {
297 KDASSERT(pserialize_not_in_read_section());
298 LOCKDEBUG_BARRIER(&kernel_lock, 1);
299 }
300
301 /*
302 * We play a slight trick here. If we're a reader, we want
303 * increment the read count. If we're a writer, we want to
304 * set the owner field and the WRITE_LOCKED bit.
305 *
306 * In the latter case, we expect those bits to be zero,
307 * therefore we can use an add operation to set them, which
308 * means an add operation for both cases.
309 */
310 if (__predict_true(op == RW_READER)) {
311 incr = RW_READ_INCR;
312 set_wait = RW_HAS_WAITERS;
313 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
314 queue = TS_READER_Q;
315 } else {
316 RW_ASSERT(rw, op == RW_WRITER);
317 incr = curthread | RW_WRITE_LOCKED;
318 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
319 need_wait = RW_WRITE_LOCKED | RW_THREAD;
320 queue = TS_WRITER_Q;
321 }
322
323 LOCKSTAT_ENTER(lsflag);
324
325 KPREEMPT_DISABLE(curlwp);
326 for (owner = rw->rw_owner;;) {
327 /*
328 * Read the lock owner field. If the need-to-wait
329 * indicator is clear, then try to acquire the lock.
330 */
331 if ((owner & need_wait) == 0) {
332 next = rw_cas(rw, owner, (owner + incr) &
333 ~RW_WRITE_WANTED);
334 if (__predict_true(next == owner)) {
335 /* Got it! */
336 RW_MEMBAR_ENTER();
337 break;
338 }
339
340 /*
341 * Didn't get it -- spin around again (we'll
342 * probably sleep on the next iteration).
343 */
344 owner = next;
345 continue;
346 }
347 if (__predict_false(RW_OWNER(rw) == curthread)) {
348 rw_abort(__func__, __LINE__, rw,
349 "locking against myself");
350 }
351 /*
352 * If the lock owner is running on another CPU, and
353 * there are no existing waiters, then spin.
354 */
355 if (rw_oncpu(owner)) {
356 LOCKSTAT_START_TIMER(lsflag, spintime);
357 u_int count = SPINLOCK_BACKOFF_MIN;
358 do {
359 KPREEMPT_ENABLE(curlwp);
360 SPINLOCK_BACKOFF(count);
361 KPREEMPT_DISABLE(curlwp);
362 owner = rw->rw_owner;
363 } while (rw_oncpu(owner));
364 LOCKSTAT_STOP_TIMER(lsflag, spintime);
365 LOCKSTAT_COUNT(spincnt, 1);
366 if ((owner & need_wait) == 0)
367 continue;
368 }
369
370 /*
371 * Grab the turnstile chain lock. Once we have that, we
372 * can adjust the waiter bits and sleep queue.
373 */
374 ts = turnstile_lookup(rw);
375
376 /*
377 * Mark the rwlock as having waiters. If the set fails,
378 * then we may not need to sleep and should spin again.
379 * Reload rw_owner because turnstile_lookup() may have
380 * spun on the turnstile chain lock.
381 */
382 owner = rw->rw_owner;
383 if ((owner & need_wait) == 0 || rw_oncpu(owner)) {
384 turnstile_exit(rw);
385 continue;
386 }
387 next = rw_cas(rw, owner, owner | set_wait);
388 if (__predict_false(next != owner)) {
389 turnstile_exit(rw);
390 owner = next;
391 continue;
392 }
393
394 LOCKSTAT_START_TIMER(lsflag, slptime);
395 turnstile_block(ts, queue, rw, &rw_syncobj);
396 LOCKSTAT_STOP_TIMER(lsflag, slptime);
397 LOCKSTAT_COUNT(slpcnt, 1);
398
399 /*
400 * No need for a memory barrier because of context switch.
401 * If not handed the lock, then spin again.
402 */
403 if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread)
404 break;
405
406 owner = rw->rw_owner;
407 }
408 KPREEMPT_ENABLE(curlwp);
409
410 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK |
411 (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime,
412 (l->l_rwcallsite != 0 ? l->l_rwcallsite :
413 (uintptr_t)__builtin_return_address(0)));
414 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime,
415 (l->l_rwcallsite != 0 ? l->l_rwcallsite :
416 (uintptr_t)__builtin_return_address(0)));
417 LOCKSTAT_EXIT(lsflag);
418
419 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
420 (op == RW_READER && RW_COUNT(rw) != 0));
421 RW_LOCKED(rw, op);
422 }
423
424 /*
425 * rw_vector_exit:
426 *
427 * Release a rwlock.
428 */
429 void
430 rw_vector_exit(krwlock_t *rw)
431 {
432 uintptr_t curthread, owner, decr, newown, next;
433 turnstile_t *ts;
434 int rcnt, wcnt;
435 lwp_t *l;
436
437 l = curlwp;
438 curthread = (uintptr_t)l;
439 RW_ASSERT(rw, curthread != 0);
440
441 /*
442 * Again, we use a trick. Since we used an add operation to
443 * set the required lock bits, we can use a subtract to clear
444 * them, which makes the read-release and write-release path
445 * the same.
446 */
447 owner = rw->rw_owner;
448 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
449 RW_UNLOCKED(rw, RW_WRITER);
450 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
451 decr = curthread | RW_WRITE_LOCKED;
452 } else {
453 RW_UNLOCKED(rw, RW_READER);
454 RW_ASSERT(rw, RW_COUNT(rw) != 0);
455 decr = RW_READ_INCR;
456 }
457
458 /*
459 * Compute what we expect the new value of the lock to be. Only
460 * proceed to do direct handoff if there are waiters, and if the
461 * lock would become unowned.
462 */
463 RW_MEMBAR_EXIT();
464 for (;;) {
465 newown = (owner - decr);
466 if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
467 break;
468 next = rw_cas(rw, owner, newown);
469 if (__predict_true(next == owner))
470 return;
471 owner = next;
472 }
473
474 /*
475 * Grab the turnstile chain lock. This gets the interlock
476 * on the sleep queue. Once we have that, we can adjust the
477 * waiter bits.
478 */
479 ts = turnstile_lookup(rw);
480 owner = rw->rw_owner;
481 RW_ASSERT(rw, ts != NULL);
482 RW_ASSERT(rw, (owner & RW_HAS_WAITERS) != 0);
483
484 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
485 rcnt = TS_WAITERS(ts, TS_READER_Q);
486
487 /*
488 * Give the lock away.
489 *
490 * If we are releasing a write lock, then prefer to wake all
491 * outstanding readers. Otherwise, wake one writer if there
492 * are outstanding readers, or all writers if there are no
493 * pending readers. If waking one specific writer, the writer
494 * is handed the lock here. If waking multiple writers, we
495 * set WRITE_WANTED to block out new readers, and let them
496 * do the work of acquiring the lock in rw_vector_enter().
497 */
498 if (rcnt == 0 || decr == RW_READ_INCR) {
499 RW_ASSERT(rw, wcnt != 0);
500 RW_ASSERT(rw, (owner & RW_WRITE_WANTED) != 0);
501
502 if (rcnt != 0) {
503 /* Give the lock to the longest waiting writer. */
504 l = TS_FIRST(ts, TS_WRITER_Q);
505 newown = (uintptr_t)l | (owner & RW_NODEBUG);
506 newown |= RW_WRITE_LOCKED | RW_HAS_WAITERS;
507 if (wcnt > 1)
508 newown |= RW_WRITE_WANTED;
509 rw_swap(rw, owner, newown);
510 turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
511 } else {
512 /* Wake all writers and let them fight it out. */
513 newown = owner & RW_NODEBUG;
514 newown |= RW_WRITE_WANTED;
515 rw_swap(rw, owner, newown);
516 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL);
517 }
518 } else {
519 RW_ASSERT(rw, rcnt != 0);
520
521 /*
522 * Give the lock to all blocked readers. If there
523 * is a writer waiting, new readers that arrive
524 * after the release will be blocked out.
525 */
526 newown = owner & RW_NODEBUG;
527 newown += rcnt << RW_READ_COUNT_SHIFT;
528 if (wcnt != 0)
529 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
530
531 /* Wake up all sleeping readers. */
532 rw_swap(rw, owner, newown);
533 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
534 }
535 }
536
537 /*
538 * rw_vector_tryenter:
539 *
540 * Try to acquire a rwlock.
541 */
542 int
543 rw_vector_tryenter(krwlock_t *rw, const krw_t op)
544 {
545 uintptr_t curthread, owner, incr, need_wait, next;
546 lwp_t *l;
547
548 l = curlwp;
549 curthread = (uintptr_t)l;
550
551 RW_ASSERT(rw, curthread != 0);
552
553 if (op == RW_READER) {
554 incr = RW_READ_INCR;
555 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
556 } else {
557 RW_ASSERT(rw, op == RW_WRITER);
558 incr = curthread | RW_WRITE_LOCKED;
559 need_wait = RW_WRITE_LOCKED | RW_THREAD;
560 }
561
562 for (owner = rw->rw_owner;; owner = next) {
563 if (__predict_false((owner & need_wait) != 0))
564 return 0;
565 next = rw_cas(rw, owner, owner + incr);
566 if (__predict_true(next == owner)) {
567 /* Got it! */
568 break;
569 }
570 }
571
572 RW_WANTLOCK(rw, op);
573 RW_LOCKED(rw, op);
574 RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
575 (op == RW_READER && RW_COUNT(rw) != 0));
576
577 RW_MEMBAR_ENTER();
578 return 1;
579 }
580
581 /*
582 * rw_downgrade:
583 *
584 * Downgrade a write lock to a read lock.
585 */
586 void
587 rw_downgrade(krwlock_t *rw)
588 {
589 uintptr_t owner, curthread, newown, next;
590 turnstile_t *ts;
591 int rcnt, wcnt;
592 lwp_t *l;
593
594 l = curlwp;
595 curthread = (uintptr_t)l;
596 RW_ASSERT(rw, curthread != 0);
597 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
598 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
599 RW_UNLOCKED(rw, RW_WRITER);
600 #if !defined(DIAGNOSTIC)
601 __USE(curthread);
602 #endif
603
604 RW_MEMBAR_PRODUCER();
605
606 for (owner = rw->rw_owner;; owner = next) {
607 /*
608 * If there are no waiters we can do this the easy way. Try
609 * swapping us down to one read hold. If it fails, the lock
610 * condition has changed and we most likely now have
611 * waiters.
612 */
613 if ((owner & RW_HAS_WAITERS) == 0) {
614 newown = (owner & RW_NODEBUG);
615 next = rw_cas(rw, owner, newown + RW_READ_INCR);
616 if (__predict_true(next == owner)) {
617 RW_LOCKED(rw, RW_READER);
618 RW_ASSERT(rw,
619 (rw->rw_owner & RW_WRITE_LOCKED) == 0);
620 RW_ASSERT(rw, RW_COUNT(rw) != 0);
621 return;
622 }
623 continue;
624 }
625
626 /*
627 * Grab the turnstile chain lock. This gets the interlock
628 * on the sleep queue. Once we have that, we can adjust the
629 * waiter bits.
630 */
631 ts = turnstile_lookup(rw);
632 RW_ASSERT(rw, ts != NULL);
633
634 rcnt = TS_WAITERS(ts, TS_READER_Q);
635 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
636
637 if (rcnt == 0) {
638 /*
639 * If there are no readers, just preserve the
640 * waiters bits, swap us down to one read hold and
641 * return.
642 */
643 RW_ASSERT(rw, wcnt != 0);
644 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
645 RW_ASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
646
647 newown = owner & RW_NODEBUG;
648 newown = RW_READ_INCR | RW_HAS_WAITERS |
649 RW_WRITE_WANTED;
650 next = rw_cas(rw, owner, newown);
651 turnstile_exit(rw);
652 if (__predict_true(next == owner))
653 break;
654 } else {
655 /*
656 * Give the lock to all blocked readers. We may
657 * retain one read hold if downgrading. If there is
658 * a writer waiting, new readers will be blocked
659 * out.
660 */
661 newown = owner & RW_NODEBUG;
662 newown += (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
663 if (wcnt != 0)
664 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
665
666 next = rw_cas(rw, owner, newown);
667 if (__predict_true(next == owner)) {
668 /* Wake up all sleeping readers. */
669 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
670 break;
671 }
672 turnstile_exit(rw);
673 }
674 }
675
676 RW_WANTLOCK(rw, RW_READER);
677 RW_LOCKED(rw, RW_READER);
678 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
679 RW_ASSERT(rw, RW_COUNT(rw) != 0);
680 }
681
682 /*
683 * rw_tryupgrade:
684 *
685 * Try to upgrade a read lock to a write lock. We must be the only
686 * reader.
687 */
688 int
689 rw_tryupgrade(krwlock_t *rw)
690 {
691 uintptr_t owner, curthread, newown, next;
692 struct lwp *l;
693
694 l = curlwp;
695 curthread = (uintptr_t)l;
696 RW_ASSERT(rw, curthread != 0);
697 RW_ASSERT(rw, rw_read_held(rw));
698
699 for (owner = RW_READ_INCR;; owner = next) {
700 newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
701 next = rw_cas(rw, owner, newown);
702 if (__predict_true(next == owner)) {
703 RW_MEMBAR_PRODUCER();
704 break;
705 }
706 RW_ASSERT(rw, (next & RW_WRITE_LOCKED) == 0);
707 if (__predict_false((next & RW_THREAD) != RW_READ_INCR)) {
708 RW_ASSERT(rw, (next & RW_THREAD) != 0);
709 return 0;
710 }
711 }
712
713 RW_UNLOCKED(rw, RW_READER);
714 RW_WANTLOCK(rw, RW_WRITER);
715 RW_LOCKED(rw, RW_WRITER);
716 RW_ASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
717 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
718
719 return 1;
720 }
721
722 /*
723 * rw_read_held:
724 *
725 * Returns true if the rwlock is held for reading. Must only be
726 * used for diagnostic assertions, and never be used to make
727 * decisions about how to use a rwlock.
728 */
729 int
730 rw_read_held(krwlock_t *rw)
731 {
732 uintptr_t owner;
733
734 if (rw == NULL)
735 return 0;
736 owner = rw->rw_owner;
737 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
738 }
739
740 /*
741 * rw_write_held:
742 *
743 * Returns true if the rwlock is held for writing. Must only be
744 * used for diagnostic assertions, and never be used to make
745 * decisions about how to use a rwlock.
746 */
747 int
748 rw_write_held(krwlock_t *rw)
749 {
750
751 if (rw == NULL)
752 return 0;
753 return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) ==
754 (RW_WRITE_LOCKED | (uintptr_t)curlwp);
755 }
756
757 /*
758 * rw_lock_held:
759 *
760 * Returns true if the rwlock is held for reading or writing. Must
761 * only be used for diagnostic assertions, and never be used to make
762 * decisions about how to use a rwlock.
763 */
764 int
765 rw_lock_held(krwlock_t *rw)
766 {
767
768 if (rw == NULL)
769 return 0;
770 return (rw->rw_owner & RW_THREAD) != 0;
771 }
772
773 /*
774 * rw_owner:
775 *
776 * Return the current owner of an RW lock, but only if it is write
777 * held. Used for priority inheritance.
778 */
779 static lwp_t *
780 rw_owner(wchan_t obj)
781 {
782 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
783 uintptr_t owner = rw->rw_owner;
784
785 if ((owner & RW_WRITE_LOCKED) == 0)
786 return NULL;
787
788 return (void *)(owner & RW_THREAD);
789 }
790