kern_rwlock.c revision 1.59.2.1 1 /* $NetBSD: kern_rwlock.c,v 1.59.2.1 2020/01/17 21:47:35 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020
5 * The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe and Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Kernel reader/writer lock implementation, modeled after those
35 * found in Solaris, a description of which can be found in:
36 *
37 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
38 * Richard McDougall.
39 */
40
41 #include <sys/cdefs.h>
42 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.59.2.1 2020/01/17 21:47:35 ad Exp $");
43
44 #define __RWLOCK_PRIVATE
45
46 #include <sys/param.h>
47 #include <sys/proc.h>
48 #include <sys/rwlock.h>
49 #include <sys/sched.h>
50 #include <sys/sleepq.h>
51 #include <sys/systm.h>
52 #include <sys/lockdebug.h>
53 #include <sys/cpu.h>
54 #include <sys/atomic.h>
55 #include <sys/lock.h>
56 #include <sys/pserialize.h>
57
58 #include <dev/lockstat.h>
59
60 #include <machine/rwlock.h>
61
62 /*
63 * LOCKDEBUG
64 */
65
66 #if defined(LOCKDEBUG)
67
68 #define RW_WANTLOCK(rw, op) \
69 LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \
70 (uintptr_t)__builtin_return_address(0), op == RW_READER);
71 #define RW_LOCKED(rw, op) \
72 LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \
73 (uintptr_t)__builtin_return_address(0), op == RW_READER);
74 #define RW_UNLOCKED(rw, op) \
75 LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \
76 (uintptr_t)__builtin_return_address(0), op == RW_READER);
77 #define RW_DASSERT(rw, cond) \
78 do { \
79 if (__predict_false(!(cond))) \
80 rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\
81 } while (/* CONSTCOND */ 0);
82
83 #else /* LOCKDEBUG */
84
85 #define RW_WANTLOCK(rw, op) /* nothing */
86 #define RW_LOCKED(rw, op) /* nothing */
87 #define RW_UNLOCKED(rw, op) /* nothing */
88 #define RW_DASSERT(rw, cond) /* nothing */
89
90 #endif /* LOCKDEBUG */
91
92 /*
93 * DIAGNOSTIC
94 */
95
96 #if defined(DIAGNOSTIC)
97
98 #define RW_ASSERT(rw, cond) \
99 do { \
100 if (__predict_false(!(cond))) \
101 rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\
102 } while (/* CONSTCOND */ 0)
103
104 #else
105
106 #define RW_ASSERT(rw, cond) /* nothing */
107
108 #endif /* DIAGNOSTIC */
109
110 #define RW_SETDEBUG(rw, on) ((rw)->rw_owner |= (on) ? 0 : RW_NODEBUG)
111 #define RW_DEBUG_P(rw) (((rw)->rw_owner & RW_NODEBUG) == 0)
112 #if defined(LOCKDEBUG)
113 #define RW_INHERITDEBUG(n, o) (n) |= (o) & RW_NODEBUG
114 #else /* defined(LOCKDEBUG) */
115 #define RW_INHERITDEBUG(n, o) /* nothing */
116 #endif /* defined(LOCKDEBUG) */
117
118 /*
119 * Memory barriers.
120 */
121 #ifdef __HAVE_ATOMIC_AS_MEMBAR
122 #define RW_MEMBAR_ENTER()
123 #define RW_MEMBAR_EXIT()
124 #define RW_MEMBAR_PRODUCER()
125 #else
126 #define RW_MEMBAR_ENTER() membar_enter()
127 #define RW_MEMBAR_EXIT() membar_exit()
128 #define RW_MEMBAR_PRODUCER() membar_producer()
129 #endif
130
131 static void rw_abort(const char *, size_t, krwlock_t *, const char *);
132 static void rw_dump(const volatile void *, lockop_printer_t);
133 static lwp_t *rw_owner(wchan_t);
134
135 static inline uintptr_t
136 rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n)
137 {
138
139 RW_INHERITDEBUG(n, o);
140 return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner,
141 (void *)o, (void *)n);
142 }
143
144 static inline void
145 rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n)
146 {
147
148 RW_INHERITDEBUG(n, o);
149 n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner,
150 (void *)n);
151 RW_DASSERT(rw, n == o);
152 }
153
154 /*
155 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
156 */
157 #ifdef LOCKDEBUG
158 #undef __HAVE_RW_STUBS
159 #endif
160
161 #ifndef __HAVE_RW_STUBS
162 __strong_alias(rw_enter,rw_vector_enter);
163 __strong_alias(rw_exit,rw_vector_exit);
164 __strong_alias(rw_tryenter,rw_vector_tryenter);
165 #endif
166
167 lockops_t rwlock_lockops = {
168 .lo_name = "Reader / writer lock",
169 .lo_type = LOCKOPS_SLEEP,
170 .lo_dump = rw_dump,
171 };
172
173 syncobj_t rw_syncobj = {
174 .sobj_flag = SOBJ_SLEEPQ_SORTED,
175 .sobj_unsleep = turnstile_unsleep,
176 .sobj_changepri = turnstile_changepri,
177 .sobj_lendpri = sleepq_lendpri,
178 .sobj_owner = rw_owner,
179 };
180
181 /*
182 * rw_dump:
183 *
184 * Dump the contents of a rwlock structure.
185 */
186 static void
187 rw_dump(const volatile void *cookie, lockop_printer_t pr)
188 {
189 const volatile krwlock_t *rw = cookie;
190
191 pr("owner/count : %#018lx flags : %#018x\n",
192 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
193 }
194
195 /*
196 * rw_abort:
197 *
198 * Dump information about an error and panic the system. This
199 * generates a lot of machine code in the DIAGNOSTIC case, so
200 * we ask the compiler to not inline it.
201 */
202 static void __noinline
203 rw_abort(const char *func, size_t line, krwlock_t *rw, const char *msg)
204 {
205
206 if (panicstr != NULL)
207 return;
208
209 LOCKDEBUG_ABORT(func, line, rw, &rwlock_lockops, msg);
210 }
211
212 /*
213 * rw_init:
214 *
215 * Initialize a rwlock for use.
216 */
217 void _rw_init(krwlock_t *, uintptr_t);
218 void
219 _rw_init(krwlock_t *rw, uintptr_t return_address)
220 {
221 bool dodebug;
222
223 memset(rw, 0, sizeof(*rw));
224
225 dodebug = LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address);
226 RW_SETDEBUG(rw, dodebug);
227 }
228
229 void
230 rw_init(krwlock_t *rw)
231 {
232
233 _rw_init(rw, (uintptr_t)__builtin_return_address(0));
234 }
235
236 /*
237 * rw_destroy:
238 *
239 * Tear down a rwlock.
240 */
241 void
242 rw_destroy(krwlock_t *rw)
243 {
244
245 RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0);
246 LOCKDEBUG_FREE(RW_DEBUG_P(rw), rw);
247 }
248
249 /*
250 * rw_oncpu:
251 *
252 * Return true if an rwlock owner is running on a CPU in the system.
253 * If the target is waiting on the kernel big lock, then we must
254 * release it. This is necessary to avoid deadlock.
255 */
256 static bool
257 rw_oncpu(uintptr_t owner)
258 {
259 #ifdef MULTIPROCESSOR
260 struct cpu_info *ci;
261 lwp_t *l;
262
263 KASSERT(kpreempt_disabled());
264
265 if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) {
266 return false;
267 }
268
269 /*
270 * See lwp_dtor() why dereference of the LWP pointer is safe.
271 * We must have kernel preemption disabled for that.
272 */
273 l = (lwp_t *)(owner & RW_THREAD);
274 ci = l->l_cpu;
275
276 if (ci && ci->ci_curlwp == l) {
277 /* Target is running; do we need to block? */
278 return (ci->ci_biglock_wanted != l);
279 }
280 #endif
281 /* Not running. It may be safe to block now. */
282 return false;
283 }
284
285 /*
286 * rw_vector_enter:
287 *
288 * Acquire a rwlock.
289 */
290 void
291 rw_vector_enter(krwlock_t *rw, const krw_t op)
292 {
293 uintptr_t owner, incr, need_wait, set_wait, curthread, next;
294 turnstile_t *ts;
295 int queue;
296 lwp_t *l;
297 LOCKSTAT_TIMER(slptime);
298 LOCKSTAT_TIMER(slpcnt);
299 LOCKSTAT_TIMER(spintime);
300 LOCKSTAT_COUNTER(spincnt);
301 LOCKSTAT_FLAG(lsflag);
302
303 l = curlwp;
304 curthread = (uintptr_t)l;
305
306 RW_ASSERT(rw, !cpu_intr_p());
307 RW_ASSERT(rw, curthread != 0);
308 RW_WANTLOCK(rw, op);
309
310 if (panicstr == NULL) {
311 KDASSERT(pserialize_not_in_read_section());
312 LOCKDEBUG_BARRIER(&kernel_lock, 1);
313 }
314
315 /*
316 * We play a slight trick here. If we're a reader, we want
317 * increment the read count. If we're a writer, we want to
318 * set the owner field and the WRITE_LOCKED bit.
319 *
320 * In the latter case, we expect those bits to be zero,
321 * therefore we can use an add operation to set them, which
322 * means an add operation for both cases.
323 */
324 if (__predict_true(op == RW_READER)) {
325 incr = RW_READ_INCR;
326 set_wait = RW_HAS_WAITERS;
327 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
328 queue = TS_READER_Q;
329 } else {
330 RW_DASSERT(rw, op == RW_WRITER);
331 incr = curthread | RW_WRITE_LOCKED;
332 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
333 need_wait = RW_WRITE_LOCKED | RW_THREAD;
334 queue = TS_WRITER_Q;
335 }
336
337 LOCKSTAT_ENTER(lsflag);
338
339 KPREEMPT_DISABLE(curlwp);
340 for (owner = rw->rw_owner;;) {
341 /*
342 * Read the lock owner field. If the need-to-wait
343 * indicator is clear, then try to acquire the lock.
344 */
345 if ((owner & need_wait) == 0) {
346 next = rw_cas(rw, owner, (owner + incr) &
347 ~RW_WRITE_WANTED);
348 if (__predict_true(next == owner)) {
349 /* Got it! */
350 RW_MEMBAR_ENTER();
351 break;
352 }
353
354 /*
355 * Didn't get it -- spin around again (we'll
356 * probably sleep on the next iteration).
357 */
358 owner = next;
359 continue;
360 }
361 if (__predict_false(RW_OWNER(rw) == curthread)) {
362 rw_abort(__func__, __LINE__, rw,
363 "locking against myself");
364 }
365 /*
366 * If the lock owner is running on another CPU, and
367 * there are no existing waiters, then spin.
368 */
369 if (rw_oncpu(owner)) {
370 LOCKSTAT_START_TIMER(lsflag, spintime);
371 u_int count = SPINLOCK_BACKOFF_MIN;
372 do {
373 KPREEMPT_ENABLE(curlwp);
374 SPINLOCK_BACKOFF(count);
375 KPREEMPT_DISABLE(curlwp);
376 owner = rw->rw_owner;
377 } while (rw_oncpu(owner));
378 LOCKSTAT_STOP_TIMER(lsflag, spintime);
379 LOCKSTAT_COUNT(spincnt, 1);
380 if ((owner & need_wait) == 0)
381 continue;
382 }
383
384 /*
385 * Grab the turnstile chain lock. Once we have that, we
386 * can adjust the waiter bits and sleep queue.
387 */
388 ts = turnstile_lookup(rw);
389
390 /*
391 * Mark the rwlock as having waiters. If the set fails,
392 * then we may not need to sleep and should spin again.
393 * Reload rw_owner because turnstile_lookup() may have
394 * spun on the turnstile chain lock.
395 */
396 owner = rw->rw_owner;
397 if ((owner & need_wait) == 0 || rw_oncpu(owner)) {
398 turnstile_exit(rw);
399 continue;
400 }
401 next = rw_cas(rw, owner, owner | set_wait);
402 if (__predict_false(next != owner)) {
403 turnstile_exit(rw);
404 owner = next;
405 continue;
406 }
407
408 LOCKSTAT_START_TIMER(lsflag, slptime);
409 turnstile_block(ts, queue, rw, &rw_syncobj);
410 LOCKSTAT_STOP_TIMER(lsflag, slptime);
411 LOCKSTAT_COUNT(slpcnt, 1);
412
413 /*
414 * No need for a memory barrier because of context switch.
415 * If not handed the lock, then spin again.
416 */
417 if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread)
418 break;
419
420 owner = rw->rw_owner;
421 }
422 KPREEMPT_ENABLE(curlwp);
423
424 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK |
425 (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime,
426 (l->l_rwcallsite != 0 ? l->l_rwcallsite :
427 (uintptr_t)__builtin_return_address(0)));
428 LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime,
429 (l->l_rwcallsite != 0 ? l->l_rwcallsite :
430 (uintptr_t)__builtin_return_address(0)));
431 LOCKSTAT_EXIT(lsflag);
432
433 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
434 (op == RW_READER && RW_COUNT(rw) != 0));
435 RW_LOCKED(rw, op);
436 }
437
438 /*
439 * rw_vector_exit:
440 *
441 * Release a rwlock.
442 */
443 void
444 rw_vector_exit(krwlock_t *rw)
445 {
446 uintptr_t curthread, owner, decr, newown, next;
447 turnstile_t *ts;
448 int rcnt, wcnt;
449 lwp_t *l;
450
451 curthread = (uintptr_t)curlwp;
452 RW_ASSERT(rw, curthread != 0);
453
454 /*
455 * Again, we use a trick. Since we used an add operation to
456 * set the required lock bits, we can use a subtract to clear
457 * them, which makes the read-release and write-release path
458 * the same.
459 */
460 owner = rw->rw_owner;
461 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
462 RW_UNLOCKED(rw, RW_WRITER);
463 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
464 decr = curthread | RW_WRITE_LOCKED;
465 } else {
466 RW_UNLOCKED(rw, RW_READER);
467 RW_ASSERT(rw, RW_COUNT(rw) != 0);
468 decr = RW_READ_INCR;
469 }
470
471 /*
472 * Compute what we expect the new value of the lock to be. Only
473 * proceed to do direct handoff if there are waiters, and if the
474 * lock would become unowned.
475 */
476 RW_MEMBAR_EXIT();
477 for (;;) {
478 newown = (owner - decr);
479 if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
480 break;
481 next = rw_cas(rw, owner, newown);
482 if (__predict_true(next == owner))
483 return;
484 owner = next;
485 }
486
487 /*
488 * Grab the turnstile chain lock. This gets the interlock
489 * on the sleep queue. Once we have that, we can adjust the
490 * waiter bits.
491 */
492 ts = turnstile_lookup(rw);
493 owner = rw->rw_owner;
494 RW_DASSERT(rw, ts != NULL);
495 RW_DASSERT(rw, (owner & RW_HAS_WAITERS) != 0);
496
497 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
498 rcnt = TS_WAITERS(ts, TS_READER_Q);
499
500 /*
501 * Give the lock away.
502 *
503 * If we are releasing a write lock, then prefer to wake all
504 * outstanding readers. Otherwise, wake one writer if there
505 * are outstanding readers, or all writers if there are no
506 * pending readers. If waking one specific writer, the writer
507 * is handed the lock here. If waking multiple writers, we
508 * set WRITE_WANTED to block out new readers, and let them
509 * do the work of acquiring the lock in rw_vector_enter().
510 */
511 if (rcnt == 0 || decr == RW_READ_INCR) {
512 RW_DASSERT(rw, wcnt != 0);
513 RW_DASSERT(rw, (owner & RW_WRITE_WANTED) != 0);
514
515 if (rcnt != 0) {
516 /* Give the lock to the longest waiting writer. */
517 l = TS_FIRST(ts, TS_WRITER_Q);
518 newown = (uintptr_t)l | RW_WRITE_LOCKED | RW_HAS_WAITERS;
519 if (wcnt > 1)
520 newown |= RW_WRITE_WANTED;
521 rw_swap(rw, owner, newown);
522 turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
523 } else {
524 /* Wake all writers and let them fight it out. */
525 rw_swap(rw, owner, RW_WRITE_WANTED);
526 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL);
527 }
528 } else {
529 RW_DASSERT(rw, rcnt != 0);
530
531 /*
532 * Give the lock to all blocked readers. If there
533 * is a writer waiting, new readers that arrive
534 * after the release will be blocked out.
535 */
536 newown = rcnt << RW_READ_COUNT_SHIFT;
537 if (wcnt != 0)
538 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
539
540 /* Wake up all sleeping readers. */
541 rw_swap(rw, owner, newown);
542 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
543 }
544 }
545
546 /*
547 * rw_vector_tryenter:
548 *
549 * Try to acquire a rwlock.
550 */
551 int
552 rw_vector_tryenter(krwlock_t *rw, const krw_t op)
553 {
554 uintptr_t curthread, owner, incr, need_wait, next;
555
556 curthread = (uintptr_t)curlwp;
557
558 RW_ASSERT(rw, curthread != 0);
559
560 if (op == RW_READER) {
561 incr = RW_READ_INCR;
562 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
563 } else {
564 RW_DASSERT(rw, op == RW_WRITER);
565 incr = curthread | RW_WRITE_LOCKED;
566 need_wait = RW_WRITE_LOCKED | RW_THREAD;
567 }
568
569 for (owner = rw->rw_owner;; owner = next) {
570 if (__predict_false((owner & need_wait) != 0))
571 return 0;
572 next = rw_cas(rw, owner, owner + incr);
573 if (__predict_true(next == owner)) {
574 /* Got it! */
575 RW_MEMBAR_ENTER();
576 break;
577 }
578 }
579
580 RW_WANTLOCK(rw, op);
581 RW_LOCKED(rw, op);
582 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
583 (op == RW_READER && RW_COUNT(rw) != 0));
584
585 return 1;
586 }
587
588 /*
589 * rw_downgrade:
590 *
591 * Downgrade a write lock to a read lock. Optimise memory accesses for
592 * the uncontended case.
593 */
594 void
595 rw_downgrade(krwlock_t *rw)
596 {
597 uintptr_t owner, curthread, newown, next;
598 turnstile_t *ts;
599 int rcnt, wcnt;
600
601 curthread = (uintptr_t)curlwp;
602 RW_ASSERT(rw, curthread != 0);
603 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
604 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
605 RW_UNLOCKED(rw, RW_WRITER);
606 #if !defined(DIAGNOSTIC)
607 __USE(curthread);
608 #endif
609
610 /*
611 * If there are no waiters, so we can do this the easy way.
612 * Try swapping us down to one read hold. If it fails, the
613 * lock condition has changed and we most likely now have
614 * waiters.
615 */
616 RW_MEMBAR_PRODUCER();
617 owner = curthread | RW_WRITE_LOCKED;
618 next = rw_cas(rw, owner, RW_READ_INCR);
619 if (__predict_true(next == owner)) {
620 RW_LOCKED(rw, RW_READER);
621 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
622 RW_DASSERT(rw, RW_COUNT(rw) != 0);
623 return;
624 }
625
626 /*
627 * Grab the turnstile chain lock. This gets the interlock
628 * on the sleep queue. Once we have that, we can adjust the
629 * waiter bits.
630 */
631 for (;;) {
632 owner = next;
633 ts = turnstile_lookup(rw);
634 RW_DASSERT(rw, ts != NULL);
635
636 rcnt = TS_WAITERS(ts, TS_READER_Q);
637 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
638
639 /*
640 * If there are no readers, just preserve the waiters
641 * bits, swap us down to one read hold and return.
642 */
643 if (rcnt == 0) {
644 RW_DASSERT(rw, wcnt != 0);
645 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
646 RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
647
648 newown = RW_READ_INCR | RW_HAS_WAITERS | RW_WRITE_WANTED;
649 next = rw_cas(rw, owner, newown);
650 turnstile_exit(rw);
651 if (__predict_true(next == owner))
652 break;
653 } else {
654 /*
655 * Give the lock to all blocked readers. We may
656 * retain one read hold if downgrading. If there
657 * is a writer waiting, new readers will be blocked
658 * out.
659 */
660 newown = (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
661 if (wcnt != 0)
662 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
663
664 next = rw_cas(rw, owner, newown);
665 if (__predict_true(next == owner)) {
666 /* Wake up all sleeping readers. */
667 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
668 break;
669 }
670 turnstile_exit(rw);
671 }
672 }
673
674 RW_WANTLOCK(rw, RW_READER);
675 RW_LOCKED(rw, RW_READER);
676 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
677 RW_DASSERT(rw, RW_COUNT(rw) != 0);
678 }
679
680 /*
681 * rw_tryupgrade:
682 *
683 * Try to upgrade a read lock to a write lock. We must be the only
684 * reader. Optimise memory accesses for the uncontended case.
685 */
686 int
687 rw_tryupgrade(krwlock_t *rw)
688 {
689 uintptr_t owner, curthread, newown, next;
690
691 curthread = (uintptr_t)curlwp;
692 RW_ASSERT(rw, curthread != 0);
693 RW_ASSERT(rw, rw_read_held(rw));
694
695 for (owner = RW_READ_INCR;; owner = next) {
696 newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
697 next = rw_cas(rw, owner, newown);
698 if (__predict_true(next == owner)) {
699 RW_MEMBAR_PRODUCER();
700 break;
701 }
702 RW_ASSERT(rw, (next & RW_WRITE_LOCKED) == 0);
703 if (__predict_false((next & RW_THREAD) != RW_READ_INCR)) {
704 RW_ASSERT(rw, (next & RW_THREAD) != 0);
705 return 0;
706 }
707 }
708
709 RW_UNLOCKED(rw, RW_READER);
710 RW_WANTLOCK(rw, RW_WRITER);
711 RW_LOCKED(rw, RW_WRITER);
712 RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
713 RW_DASSERT(rw, RW_OWNER(rw) == curthread);
714
715 return 1;
716 }
717
718 /*
719 * rw_read_held:
720 *
721 * Returns true if the rwlock is held for reading. Must only be
722 * used for diagnostic assertions, and never be used to make
723 * decisions about how to use a rwlock.
724 */
725 int
726 rw_read_held(krwlock_t *rw)
727 {
728 uintptr_t owner;
729
730 if (rw == NULL)
731 return 0;
732 owner = rw->rw_owner;
733 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
734 }
735
736 /*
737 * rw_write_held:
738 *
739 * Returns true if the rwlock is held for writing. Must only be
740 * used for diagnostic assertions, and never be used to make
741 * decisions about how to use a rwlock.
742 */
743 int
744 rw_write_held(krwlock_t *rw)
745 {
746
747 if (rw == NULL)
748 return 0;
749 return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) ==
750 (RW_WRITE_LOCKED | (uintptr_t)curlwp);
751 }
752
753 /*
754 * rw_lock_held:
755 *
756 * Returns true if the rwlock is held for reading or writing. Must
757 * only be used for diagnostic assertions, and never be used to make
758 * decisions about how to use a rwlock.
759 */
760 int
761 rw_lock_held(krwlock_t *rw)
762 {
763
764 if (rw == NULL)
765 return 0;
766 return (rw->rw_owner & RW_THREAD) != 0;
767 }
768
769 /*
770 * rw_owner:
771 *
772 * Return the current owner of an RW lock, but only if it is write
773 * held. Used for priority inheritance.
774 */
775 static lwp_t *
776 rw_owner(wchan_t obj)
777 {
778 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
779 uintptr_t owner = rw->rw_owner;
780
781 if ((owner & RW_WRITE_LOCKED) == 0)
782 return NULL;
783
784 return (void *)(owner & RW_THREAD);
785 }
786