kern_rwlock.c revision 1.58 1 /* $NetBSD: kern_rwlock.c,v 1.58 2019/11/30 14:21:16 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * Kernel reader/writer lock implementation, modeled after those
34 * found in Solaris, a description of which can be found in:
35 *
36 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
37 * Richard McDougall.
38 */
39
40 #include <sys/cdefs.h>
41 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.58 2019/11/30 14:21:16 ad Exp $");
42
43 #define __RWLOCK_PRIVATE
44
45 #include <sys/param.h>
46 #include <sys/proc.h>
47 #include <sys/rwlock.h>
48 #include <sys/sched.h>
49 #include <sys/sleepq.h>
50 #include <sys/systm.h>
51 #include <sys/lockdebug.h>
52 #include <sys/cpu.h>
53 #include <sys/atomic.h>
54 #include <sys/lock.h>
55 #include <sys/pserialize.h>
56
57 #include <dev/lockstat.h>
58
59 #include <machine/rwlock.h>
60
61 /*
62 * LOCKDEBUG
63 */
64
65 #if defined(LOCKDEBUG)
66
67 #define RW_WANTLOCK(rw, op) \
68 LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \
69 (uintptr_t)__builtin_return_address(0), op == RW_READER);
70 #define RW_LOCKED(rw, op) \
71 LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \
72 (uintptr_t)__builtin_return_address(0), op == RW_READER);
73 #define RW_UNLOCKED(rw, op) \
74 LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \
75 (uintptr_t)__builtin_return_address(0), op == RW_READER);
76 #define RW_DASSERT(rw, cond) \
77 do { \
78 if (__predict_false(!(cond))) \
79 rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\
80 } while (/* CONSTCOND */ 0);
81
82 #else /* LOCKDEBUG */
83
84 #define RW_WANTLOCK(rw, op) /* nothing */
85 #define RW_LOCKED(rw, op) /* nothing */
86 #define RW_UNLOCKED(rw, op) /* nothing */
87 #define RW_DASSERT(rw, cond) /* nothing */
88
89 #endif /* LOCKDEBUG */
90
91 /*
92 * DIAGNOSTIC
93 */
94
95 #if defined(DIAGNOSTIC)
96
97 #define RW_ASSERT(rw, cond) \
98 do { \
99 if (__predict_false(!(cond))) \
100 rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\
101 } while (/* CONSTCOND */ 0)
102
103 #else
104
105 #define RW_ASSERT(rw, cond) /* nothing */
106
107 #endif /* DIAGNOSTIC */
108
109 #define RW_SETDEBUG(rw, on) ((rw)->rw_owner |= (on) ? 0 : RW_NODEBUG)
110 #define RW_DEBUG_P(rw) (((rw)->rw_owner & RW_NODEBUG) == 0)
111 #if defined(LOCKDEBUG)
112 #define RW_INHERITDEBUG(n, o) (n) |= (o) & RW_NODEBUG
113 #else /* defined(LOCKDEBUG) */
114 #define RW_INHERITDEBUG(n, o) /* nothing */
115 #endif /* defined(LOCKDEBUG) */
116
117 /*
118 * Memory barriers.
119 */
120 #ifdef __HAVE_ATOMIC_AS_MEMBAR
121 #define RW_MEMBAR_ENTER()
122 #define RW_MEMBAR_EXIT()
123 #define RW_MEMBAR_PRODUCER()
124 #else
125 #define RW_MEMBAR_ENTER() membar_enter()
126 #define RW_MEMBAR_EXIT() membar_exit()
127 #define RW_MEMBAR_PRODUCER() membar_producer()
128 #endif
129
130 static void rw_abort(const char *, size_t, krwlock_t *, const char *);
131 static void rw_dump(const volatile void *, lockop_printer_t);
132 static lwp_t *rw_owner(wchan_t);
133
134 static inline uintptr_t
135 rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n)
136 {
137
138 RW_INHERITDEBUG(n, o);
139 return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner,
140 (void *)o, (void *)n);
141 }
142
143 static inline void
144 rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n)
145 {
146
147 RW_INHERITDEBUG(n, o);
148 n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner,
149 (void *)n);
150 RW_DASSERT(rw, n == o);
151 }
152
153 /*
154 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
155 */
156 #ifdef LOCKDEBUG
157 #undef __HAVE_RW_STUBS
158 #endif
159
160 #ifndef __HAVE_RW_STUBS
161 __strong_alias(rw_enter,rw_vector_enter);
162 __strong_alias(rw_exit,rw_vector_exit);
163 __strong_alias(rw_tryenter,rw_vector_tryenter);
164 #endif
165
166 lockops_t rwlock_lockops = {
167 .lo_name = "Reader / writer lock",
168 .lo_type = LOCKOPS_SLEEP,
169 .lo_dump = rw_dump,
170 };
171
172 syncobj_t rw_syncobj = {
173 .sobj_flag = SOBJ_SLEEPQ_SORTED,
174 .sobj_unsleep = turnstile_unsleep,
175 .sobj_changepri = turnstile_changepri,
176 .sobj_lendpri = sleepq_lendpri,
177 .sobj_owner = rw_owner,
178 };
179
180 /*
181 * rw_dump:
182 *
183 * Dump the contents of a rwlock structure.
184 */
185 static void
186 rw_dump(const volatile void *cookie, lockop_printer_t pr)
187 {
188 const volatile krwlock_t *rw = cookie;
189
190 pr("owner/count : %#018lx flags : %#018x\n",
191 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
192 }
193
194 /*
195 * rw_abort:
196 *
197 * Dump information about an error and panic the system. This
198 * generates a lot of machine code in the DIAGNOSTIC case, so
199 * we ask the compiler to not inline it.
200 */
201 static void __noinline
202 rw_abort(const char *func, size_t line, krwlock_t *rw, const char *msg)
203 {
204
205 if (panicstr != NULL)
206 return;
207
208 LOCKDEBUG_ABORT(func, line, rw, &rwlock_lockops, msg);
209 }
210
211 /*
212 * rw_init:
213 *
214 * Initialize a rwlock for use.
215 */
216 void _rw_init(krwlock_t *, uintptr_t);
217 void
218 _rw_init(krwlock_t *rw, uintptr_t return_address)
219 {
220 bool dodebug;
221
222 memset(rw, 0, sizeof(*rw));
223
224 dodebug = LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address);
225 RW_SETDEBUG(rw, dodebug);
226 }
227
228 void
229 rw_init(krwlock_t *rw)
230 {
231
232 _rw_init(rw, (uintptr_t)__builtin_return_address(0));
233 }
234
235 /*
236 * rw_destroy:
237 *
238 * Tear down a rwlock.
239 */
240 void
241 rw_destroy(krwlock_t *rw)
242 {
243
244 RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0);
245 LOCKDEBUG_FREE(RW_DEBUG_P(rw), rw);
246 }
247
248 /*
249 * rw_oncpu:
250 *
251 * Return true if an rwlock owner is running on a CPU in the system.
252 * If the target is waiting on the kernel big lock, then we must
253 * release it. This is necessary to avoid deadlock.
254 */
255 static bool
256 rw_oncpu(uintptr_t owner)
257 {
258 #ifdef MULTIPROCESSOR
259 struct cpu_info *ci;
260 lwp_t *l;
261
262 KASSERT(kpreempt_disabled());
263
264 if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) {
265 return false;
266 }
267
268 /*
269 * See lwp_dtor() why dereference of the LWP pointer is safe.
270 * We must have kernel preemption disabled for that.
271 */
272 l = (lwp_t *)(owner & RW_THREAD);
273 ci = l->l_cpu;
274
275 if (ci && ci->ci_curlwp == l) {
276 /* Target is running; do we need to block? */
277 return (ci->ci_biglock_wanted != l);
278 }
279 #endif
280 /* Not running. It may be safe to block now. */
281 return false;
282 }
283
284 /*
285 * rw_vector_enter:
286 *
287 * Acquire a rwlock.
288 */
289 void
290 rw_vector_enter(krwlock_t *rw, const krw_t op)
291 {
292 uintptr_t owner, incr, need_wait, set_wait, curthread, next;
293 turnstile_t *ts;
294 int queue;
295 lwp_t *l;
296 LOCKSTAT_TIMER(slptime);
297 LOCKSTAT_TIMER(slpcnt);
298 LOCKSTAT_TIMER(spintime);
299 LOCKSTAT_COUNTER(spincnt);
300 LOCKSTAT_FLAG(lsflag);
301
302 l = curlwp;
303 curthread = (uintptr_t)l;
304
305 RW_ASSERT(rw, !cpu_intr_p());
306 RW_ASSERT(rw, curthread != 0);
307 RW_WANTLOCK(rw, op);
308
309 if (panicstr == NULL) {
310 KDASSERT(pserialize_not_in_read_section());
311 LOCKDEBUG_BARRIER(&kernel_lock, 1);
312 }
313
314 /*
315 * We play a slight trick here. If we're a reader, we want
316 * increment the read count. If we're a writer, we want to
317 * set the owner field and the WRITE_LOCKED bit.
318 *
319 * In the latter case, we expect those bits to be zero,
320 * therefore we can use an add operation to set them, which
321 * means an add operation for both cases.
322 */
323 if (__predict_true(op == RW_READER)) {
324 incr = RW_READ_INCR;
325 set_wait = RW_HAS_WAITERS;
326 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
327 queue = TS_READER_Q;
328 } else {
329 RW_DASSERT(rw, op == RW_WRITER);
330 incr = curthread | RW_WRITE_LOCKED;
331 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
332 need_wait = RW_WRITE_LOCKED | RW_THREAD;
333 queue = TS_WRITER_Q;
334 }
335
336 LOCKSTAT_ENTER(lsflag);
337
338 KPREEMPT_DISABLE(curlwp);
339 for (owner = rw->rw_owner;;) {
340 /*
341 * Read the lock owner field. If the need-to-wait
342 * indicator is clear, then try to acquire the lock.
343 */
344 if ((owner & need_wait) == 0) {
345 next = rw_cas(rw, owner, (owner + incr) &
346 ~RW_WRITE_WANTED);
347 if (__predict_true(next == owner)) {
348 /* Got it! */
349 RW_MEMBAR_ENTER();
350 break;
351 }
352
353 /*
354 * Didn't get it -- spin around again (we'll
355 * probably sleep on the next iteration).
356 */
357 owner = next;
358 continue;
359 }
360 if (__predict_false(panicstr != NULL)) {
361 KPREEMPT_ENABLE(curlwp);
362 return;
363 }
364 if (__predict_false(RW_OWNER(rw) == curthread)) {
365 rw_abort(__func__, __LINE__, rw,
366 "locking against myself");
367 }
368 /*
369 * If the lock owner is running on another CPU, and
370 * there are no existing waiters, then spin.
371 */
372 if (rw_oncpu(owner)) {
373 LOCKSTAT_START_TIMER(lsflag, spintime);
374 u_int count = SPINLOCK_BACKOFF_MIN;
375 do {
376 KPREEMPT_ENABLE(curlwp);
377 SPINLOCK_BACKOFF(count);
378 KPREEMPT_DISABLE(curlwp);
379 owner = rw->rw_owner;
380 } while (rw_oncpu(owner));
381 LOCKSTAT_STOP_TIMER(lsflag, spintime);
382 LOCKSTAT_COUNT(spincnt, 1);
383 if ((owner & need_wait) == 0)
384 continue;
385 }
386
387 /*
388 * Grab the turnstile chain lock. Once we have that, we
389 * can adjust the waiter bits and sleep queue.
390 */
391 ts = turnstile_lookup(rw);
392
393 /*
394 * Mark the rwlock as having waiters. If the set fails,
395 * then we may not need to sleep and should spin again.
396 * Reload rw_owner because turnstile_lookup() may have
397 * spun on the turnstile chain lock.
398 */
399 owner = rw->rw_owner;
400 if ((owner & need_wait) == 0 || rw_oncpu(owner)) {
401 turnstile_exit(rw);
402 continue;
403 }
404 next = rw_cas(rw, owner, owner | set_wait);
405 if (__predict_false(next != owner)) {
406 turnstile_exit(rw);
407 owner = next;
408 continue;
409 }
410
411 LOCKSTAT_START_TIMER(lsflag, slptime);
412 turnstile_block(ts, queue, rw, &rw_syncobj);
413 LOCKSTAT_STOP_TIMER(lsflag, slptime);
414 LOCKSTAT_COUNT(slpcnt, 1);
415
416 /*
417 * No need for a memory barrier because of context switch.
418 * If not handed the lock, then spin again.
419 */
420 if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread)
421 break;
422
423 owner = rw->rw_owner;
424 }
425 KPREEMPT_ENABLE(curlwp);
426
427 LOCKSTAT_EVENT(lsflag, rw, LB_RWLOCK |
428 (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime);
429 LOCKSTAT_EVENT(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime);
430 LOCKSTAT_EXIT(lsflag);
431
432 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
433 (op == RW_READER && RW_COUNT(rw) != 0));
434 RW_LOCKED(rw, op);
435 }
436
437 /*
438 * rw_vector_exit:
439 *
440 * Release a rwlock.
441 */
442 void
443 rw_vector_exit(krwlock_t *rw)
444 {
445 uintptr_t curthread, owner, decr, newown, next;
446 turnstile_t *ts;
447 int rcnt, wcnt;
448 lwp_t *l;
449
450 curthread = (uintptr_t)curlwp;
451 RW_ASSERT(rw, curthread != 0);
452
453 if (__predict_false(panicstr != NULL))
454 return;
455
456 /*
457 * Again, we use a trick. Since we used an add operation to
458 * set the required lock bits, we can use a subtract to clear
459 * them, which makes the read-release and write-release path
460 * the same.
461 */
462 owner = rw->rw_owner;
463 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
464 RW_UNLOCKED(rw, RW_WRITER);
465 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
466 decr = curthread | RW_WRITE_LOCKED;
467 } else {
468 RW_UNLOCKED(rw, RW_READER);
469 RW_ASSERT(rw, RW_COUNT(rw) != 0);
470 decr = RW_READ_INCR;
471 }
472
473 /*
474 * Compute what we expect the new value of the lock to be. Only
475 * proceed to do direct handoff if there are waiters, and if the
476 * lock would become unowned.
477 */
478 RW_MEMBAR_EXIT();
479 for (;;) {
480 newown = (owner - decr);
481 if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
482 break;
483 next = rw_cas(rw, owner, newown);
484 if (__predict_true(next == owner))
485 return;
486 owner = next;
487 }
488
489 /*
490 * Grab the turnstile chain lock. This gets the interlock
491 * on the sleep queue. Once we have that, we can adjust the
492 * waiter bits.
493 */
494 ts = turnstile_lookup(rw);
495 owner = rw->rw_owner;
496 RW_DASSERT(rw, ts != NULL);
497 RW_DASSERT(rw, (owner & RW_HAS_WAITERS) != 0);
498
499 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
500 rcnt = TS_WAITERS(ts, TS_READER_Q);
501
502 /*
503 * Give the lock away.
504 *
505 * If we are releasing a write lock, then prefer to wake all
506 * outstanding readers. Otherwise, wake one writer if there
507 * are outstanding readers, or all writers if there are no
508 * pending readers. If waking one specific writer, the writer
509 * is handed the lock here. If waking multiple writers, we
510 * set WRITE_WANTED to block out new readers, and let them
511 * do the work of acquiring the lock in rw_vector_enter().
512 */
513 if (rcnt == 0 || decr == RW_READ_INCR) {
514 RW_DASSERT(rw, wcnt != 0);
515 RW_DASSERT(rw, (owner & RW_WRITE_WANTED) != 0);
516
517 if (rcnt != 0) {
518 /* Give the lock to the longest waiting writer. */
519 l = TS_FIRST(ts, TS_WRITER_Q);
520 newown = (uintptr_t)l | RW_WRITE_LOCKED | RW_HAS_WAITERS;
521 if (wcnt > 1)
522 newown |= RW_WRITE_WANTED;
523 rw_swap(rw, owner, newown);
524 turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
525 } else {
526 /* Wake all writers and let them fight it out. */
527 rw_swap(rw, owner, RW_WRITE_WANTED);
528 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL);
529 }
530 } else {
531 RW_DASSERT(rw, rcnt != 0);
532
533 /*
534 * Give the lock to all blocked readers. If there
535 * is a writer waiting, new readers that arrive
536 * after the release will be blocked out.
537 */
538 newown = rcnt << RW_READ_COUNT_SHIFT;
539 if (wcnt != 0)
540 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
541
542 /* Wake up all sleeping readers. */
543 rw_swap(rw, owner, newown);
544 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
545 }
546 }
547
548 /*
549 * rw_vector_tryenter:
550 *
551 * Try to acquire a rwlock.
552 */
553 int
554 rw_vector_tryenter(krwlock_t *rw, const krw_t op)
555 {
556 uintptr_t curthread, owner, incr, need_wait, next;
557
558 curthread = (uintptr_t)curlwp;
559
560 RW_ASSERT(rw, curthread != 0);
561
562 if (op == RW_READER) {
563 incr = RW_READ_INCR;
564 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
565 } else {
566 RW_DASSERT(rw, op == RW_WRITER);
567 incr = curthread | RW_WRITE_LOCKED;
568 need_wait = RW_WRITE_LOCKED | RW_THREAD;
569 }
570
571 for (owner = rw->rw_owner;; owner = next) {
572 if (__predict_false((owner & need_wait) != 0))
573 return 0;
574 next = rw_cas(rw, owner, owner + incr);
575 if (__predict_true(next == owner)) {
576 /* Got it! */
577 RW_MEMBAR_ENTER();
578 break;
579 }
580 }
581
582 RW_WANTLOCK(rw, op);
583 RW_LOCKED(rw, op);
584 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
585 (op == RW_READER && RW_COUNT(rw) != 0));
586
587 return 1;
588 }
589
590 /*
591 * rw_downgrade:
592 *
593 * Downgrade a write lock to a read lock. Optimise memory accesses for
594 * the uncontended case.
595 */
596 void
597 rw_downgrade(krwlock_t *rw)
598 {
599 uintptr_t owner, curthread, newown, next;
600 turnstile_t *ts;
601 int rcnt, wcnt;
602
603 curthread = (uintptr_t)curlwp;
604 RW_ASSERT(rw, curthread != 0);
605 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
606 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
607 RW_UNLOCKED(rw, RW_WRITER);
608 #if !defined(DIAGNOSTIC)
609 __USE(curthread);
610 #endif
611
612 /*
613 * If there are no waiters, so we can do this the easy way.
614 * Try swapping us down to one read hold. If it fails, the
615 * lock condition has changed and we most likely now have
616 * waiters.
617 */
618 RW_MEMBAR_PRODUCER();
619 owner = curthread | RW_WRITE_LOCKED;
620 next = rw_cas(rw, owner, RW_READ_INCR);
621 if (__predict_true(next == owner)) {
622 RW_LOCKED(rw, RW_READER);
623 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
624 RW_DASSERT(rw, RW_COUNT(rw) != 0);
625 return;
626 }
627
628 /*
629 * Grab the turnstile chain lock. This gets the interlock
630 * on the sleep queue. Once we have that, we can adjust the
631 * waiter bits.
632 */
633 for (;;) {
634 owner = next;
635 ts = turnstile_lookup(rw);
636 RW_DASSERT(rw, ts != NULL);
637
638 rcnt = TS_WAITERS(ts, TS_READER_Q);
639 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
640
641 /*
642 * If there are no readers, just preserve the waiters
643 * bits, swap us down to one read hold and return.
644 */
645 if (rcnt == 0) {
646 RW_DASSERT(rw, wcnt != 0);
647 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
648 RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
649
650 newown = RW_READ_INCR | RW_HAS_WAITERS | RW_WRITE_WANTED;
651 next = rw_cas(rw, owner, newown);
652 turnstile_exit(rw);
653 if (__predict_true(next == owner))
654 break;
655 } else {
656 /*
657 * Give the lock to all blocked readers. We may
658 * retain one read hold if downgrading. If there
659 * is a writer waiting, new readers will be blocked
660 * out.
661 */
662 newown = (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
663 if (wcnt != 0)
664 newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
665
666 next = rw_cas(rw, owner, newown);
667 if (__predict_true(next == owner)) {
668 /* Wake up all sleeping readers. */
669 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
670 break;
671 }
672 turnstile_exit(rw);
673 }
674 }
675
676 RW_WANTLOCK(rw, RW_READER);
677 RW_LOCKED(rw, RW_READER);
678 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
679 RW_DASSERT(rw, RW_COUNT(rw) != 0);
680 }
681
682 /*
683 * rw_tryupgrade:
684 *
685 * Try to upgrade a read lock to a write lock. We must be the only
686 * reader. Optimise memory accesses for the uncontended case.
687 */
688 int
689 rw_tryupgrade(krwlock_t *rw)
690 {
691 uintptr_t owner, curthread, newown, next;
692
693 curthread = (uintptr_t)curlwp;
694 RW_ASSERT(rw, curthread != 0);
695 RW_ASSERT(rw, rw_read_held(rw));
696
697 for (owner = RW_READ_INCR;; owner = next) {
698 newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
699 next = rw_cas(rw, owner, newown);
700 if (__predict_true(next == owner)) {
701 RW_MEMBAR_PRODUCER();
702 break;
703 }
704 RW_ASSERT(rw, (next & RW_WRITE_LOCKED) == 0);
705 if (__predict_false((next & RW_THREAD) != RW_READ_INCR)) {
706 RW_ASSERT(rw, (next & RW_THREAD) != 0);
707 return 0;
708 }
709 }
710
711 RW_UNLOCKED(rw, RW_READER);
712 RW_WANTLOCK(rw, RW_WRITER);
713 RW_LOCKED(rw, RW_WRITER);
714 RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
715 RW_DASSERT(rw, RW_OWNER(rw) == curthread);
716
717 return 1;
718 }
719
720 /*
721 * rw_read_held:
722 *
723 * Returns true if the rwlock is held for reading. Must only be
724 * used for diagnostic assertions, and never be used to make
725 * decisions about how to use a rwlock.
726 */
727 int
728 rw_read_held(krwlock_t *rw)
729 {
730 uintptr_t owner;
731
732 if (panicstr != NULL)
733 return 1;
734 if (rw == NULL)
735 return 0;
736 owner = rw->rw_owner;
737 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
738 }
739
740 /*
741 * rw_write_held:
742 *
743 * Returns true if the rwlock is held for writing. Must only be
744 * used for diagnostic assertions, and never be used to make
745 * decisions about how to use a rwlock.
746 */
747 int
748 rw_write_held(krwlock_t *rw)
749 {
750
751 if (panicstr != NULL)
752 return 1;
753 if (rw == NULL)
754 return 0;
755 return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) ==
756 (RW_WRITE_LOCKED | (uintptr_t)curlwp);
757 }
758
759 /*
760 * rw_lock_held:
761 *
762 * Returns true if the rwlock is held for reading or writing. Must
763 * only be used for diagnostic assertions, and never be used to make
764 * decisions about how to use a rwlock.
765 */
766 int
767 rw_lock_held(krwlock_t *rw)
768 {
769
770 if (panicstr != NULL)
771 return 1;
772 if (rw == NULL)
773 return 0;
774 return (rw->rw_owner & RW_THREAD) != 0;
775 }
776
777 /*
778 * rw_owner:
779 *
780 * Return the current owner of an RW lock, but only if it is write
781 * held. Used for priority inheritance.
782 */
783 static lwp_t *
784 rw_owner(wchan_t obj)
785 {
786 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
787 uintptr_t owner = rw->rw_owner;
788
789 if ((owner & RW_WRITE_LOCKED) == 0)
790 return NULL;
791
792 return (void *)(owner & RW_THREAD);
793 }
794