kern_rwlock.c revision 1.59.2.3 1 1.59.2.2 ad /* $NetBSD: kern_rwlock.c,v 1.59.2.3 2020/01/19 21:08:29 ad Exp $ */
2 1.2 ad
3 1.2 ad /*-
4 1.59.2.1 ad * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020
5 1.59.2.1 ad * The NetBSD Foundation, Inc.
6 1.2 ad * All rights reserved.
7 1.2 ad *
8 1.2 ad * This code is derived from software contributed to The NetBSD Foundation
9 1.2 ad * by Jason R. Thorpe and Andrew Doran.
10 1.2 ad *
11 1.2 ad * Redistribution and use in source and binary forms, with or without
12 1.2 ad * modification, are permitted provided that the following conditions
13 1.2 ad * are met:
14 1.2 ad * 1. Redistributions of source code must retain the above copyright
15 1.2 ad * notice, this list of conditions and the following disclaimer.
16 1.2 ad * 2. Redistributions in binary form must reproduce the above copyright
17 1.2 ad * notice, this list of conditions and the following disclaimer in the
18 1.2 ad * documentation and/or other materials provided with the distribution.
19 1.2 ad *
20 1.2 ad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 1.2 ad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 1.2 ad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 1.2 ad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 1.2 ad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 1.2 ad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 1.2 ad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 1.2 ad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 1.2 ad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 1.2 ad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 1.2 ad * POSSIBILITY OF SUCH DAMAGE.
31 1.2 ad */
32 1.2 ad
33 1.2 ad /*
34 1.2 ad * Kernel reader/writer lock implementation, modeled after those
35 1.2 ad * found in Solaris, a description of which can be found in:
36 1.2 ad *
37 1.2 ad * Solaris Internals: Core Kernel Architecture, Jim Mauro and
38 1.2 ad * Richard McDougall.
39 1.59.2.3 ad *
40 1.59.2.3 ad * The NetBSD implementation is different from that described in the book,
41 1.59.2.3 ad * in that the locks are adaptive. Lock waiters spin wait while the lock
42 1.59.2.3 ad * holders are on CPU (if the holds can be tracked: up to N per-thread).
43 1.59.2.3 ad *
44 1.59.2.3 ad * While spin waiting, threads compete for the lock without the assistance
45 1.59.2.3 ad * of turnstiles. If a lock holder sleeps for any reason, the lock waiters
46 1.59.2.3 ad * will also sleep in response and at that point turnstiles, priority
47 1.59.2.3 ad * inheritance and strong efforts at ensuring fairness come into play.
48 1.59.2.3 ad *
49 1.59.2.3 ad * The adaptive behaviour is controlled by the RW_SPIN flag bit, which is
50 1.59.2.3 ad * cleared by a lock owner that is going off the CPU, and set again by the
51 1.59.2.3 ad * lock owner that releases the last hold on the lock.
52 1.2 ad */
53 1.2 ad
54 1.10 dsl #include <sys/cdefs.h>
55 1.59.2.2 ad __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.59.2.3 2020/01/19 21:08:29 ad Exp $");
56 1.59.2.2 ad
57 1.59.2.2 ad #include "opt_lockdebug.h"
58 1.2 ad
59 1.2 ad #define __RWLOCK_PRIVATE
60 1.2 ad
61 1.2 ad #include <sys/param.h>
62 1.2 ad #include <sys/proc.h>
63 1.2 ad #include <sys/rwlock.h>
64 1.2 ad #include <sys/sched.h>
65 1.2 ad #include <sys/sleepq.h>
66 1.2 ad #include <sys/systm.h>
67 1.2 ad #include <sys/lockdebug.h>
68 1.11 ad #include <sys/cpu.h>
69 1.14 ad #include <sys/atomic.h>
70 1.15 ad #include <sys/lock.h>
71 1.51 ozaki #include <sys/pserialize.h>
72 1.2 ad
73 1.2 ad #include <dev/lockstat.h>
74 1.2 ad
75 1.2 ad /*
76 1.2 ad * LOCKDEBUG
77 1.2 ad */
78 1.2 ad
79 1.59.2.2 ad #define RW_DEBUG_P(rw) (((rw)->rw_owner & RW_NODEBUG) == 0)
80 1.2 ad
81 1.59.2.2 ad #define RW_WANTLOCK(rw, op) \
82 1.59.2.2 ad LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \
83 1.59.2.2 ad (uintptr_t)__builtin_return_address(0), op == RW_READER);
84 1.59.2.2 ad #define RW_LOCKED(rw, op) \
85 1.59.2.2 ad LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \
86 1.59.2.2 ad (uintptr_t)__builtin_return_address(0), op == RW_READER);
87 1.59.2.2 ad #define RW_UNLOCKED(rw, op) \
88 1.59.2.2 ad LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \
89 1.59.2.2 ad (uintptr_t)__builtin_return_address(0), op == RW_READER);
90 1.2 ad
91 1.2 ad /*
92 1.2 ad * DIAGNOSTIC
93 1.2 ad */
94 1.2 ad
95 1.2 ad #if defined(DIAGNOSTIC)
96 1.59.2.2 ad #define RW_ASSERT(rw, cond) \
97 1.59.2.2 ad do { \
98 1.59.2.2 ad if (__predict_false(!(cond))) \
99 1.46 christos rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\
100 1.2 ad } while (/* CONSTCOND */ 0)
101 1.2 ad #else
102 1.2 ad #define RW_ASSERT(rw, cond) /* nothing */
103 1.2 ad #endif /* DIAGNOSTIC */
104 1.2 ad
105 1.55 ad /*
106 1.55 ad * Memory barriers.
107 1.55 ad */
108 1.55 ad #ifdef __HAVE_ATOMIC_AS_MEMBAR
109 1.55 ad #define RW_MEMBAR_ENTER()
110 1.55 ad #define RW_MEMBAR_EXIT()
111 1.55 ad #define RW_MEMBAR_PRODUCER()
112 1.55 ad #else
113 1.55 ad #define RW_MEMBAR_ENTER() membar_enter()
114 1.55 ad #define RW_MEMBAR_EXIT() membar_exit()
115 1.55 ad #define RW_MEMBAR_PRODUCER() membar_producer()
116 1.55 ad #endif
117 1.55 ad
118 1.59.2.2 ad static void rw_abort(const char *, size_t, krwlock_t *, const char *);
119 1.59.2.2 ad static void rw_dump(const volatile void *, lockop_printer_t);
120 1.59.2.2 ad static lwp_t *rw_owner(wchan_t);
121 1.59.2.2 ad
122 1.2 ad lockops_t rwlock_lockops = {
123 1.48 ozaki .lo_name = "Reader / writer lock",
124 1.48 ozaki .lo_type = LOCKOPS_SLEEP,
125 1.48 ozaki .lo_dump = rw_dump,
126 1.2 ad };
127 1.2 ad
128 1.4 yamt syncobj_t rw_syncobj = {
129 1.49 ozaki .sobj_flag = SOBJ_SLEEPQ_SORTED,
130 1.49 ozaki .sobj_unsleep = turnstile_unsleep,
131 1.49 ozaki .sobj_changepri = turnstile_changepri,
132 1.49 ozaki .sobj_lendpri = sleepq_lendpri,
133 1.49 ozaki .sobj_owner = rw_owner,
134 1.4 yamt };
135 1.4 yamt
136 1.2 ad /*
137 1.59.2.2 ad * rw_cas:
138 1.59.2.2 ad *
139 1.59.2.2 ad * Do an atomic compare-and-swap on the lock word.
140 1.59.2.2 ad */
141 1.59.2.2 ad static inline uintptr_t
142 1.59.2.2 ad rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n)
143 1.59.2.2 ad {
144 1.59.2.2 ad
145 1.59.2.2 ad return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner,
146 1.59.2.2 ad (void *)o, (void *)n);
147 1.59.2.2 ad }
148 1.59.2.2 ad
149 1.59.2.2 ad /*
150 1.59.2.3 ad * rw_and:
151 1.59.2.3 ad *
152 1.59.2.3 ad * Do an atomic AND on the lock word.
153 1.59.2.3 ad */
154 1.59.2.3 ad static inline void
155 1.59.2.3 ad rw_and(krwlock_t *rw, uintptr_t m)
156 1.59.2.3 ad {
157 1.59.2.3 ad
158 1.59.2.3 ad #ifdef _LP64
159 1.59.2.3 ad atomic_and_64(&rw->rw_owner, m);
160 1.59.2.3 ad #else
161 1.59.2.3 ad atomic_and_32(&rw->rw_owner, m);
162 1.59.2.3 ad #endif
163 1.59.2.3 ad }
164 1.59.2.3 ad
165 1.59.2.3 ad /*
166 1.59.2.2 ad * rw_swap:
167 1.59.2.2 ad *
168 1.59.2.2 ad * Do an atomic swap of the lock word. This is used only when it's
169 1.59.2.2 ad * known that the lock word is set up such that it can't be changed
170 1.59.2.2 ad * behind us (assert this), so there's no point considering the result.
171 1.59.2.2 ad */
172 1.59.2.2 ad static inline void
173 1.59.2.2 ad rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n)
174 1.59.2.2 ad {
175 1.59.2.2 ad
176 1.59.2.2 ad n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner,
177 1.59.2.2 ad (void *)n);
178 1.59.2.2 ad
179 1.59.2.2 ad RW_ASSERT(rw, n == o);
180 1.59.2.2 ad RW_ASSERT(rw, (o & RW_HAS_WAITERS) != 0);
181 1.59.2.2 ad }
182 1.59.2.2 ad
183 1.59.2.2 ad /*
184 1.59.2.3 ad * rw_hold_remember:
185 1.59.2.3 ad *
186 1.59.2.3 ad * Helper - when acquring a lock, record the new hold.
187 1.59.2.3 ad */
188 1.59.2.3 ad static inline uintptr_t
189 1.59.2.3 ad rw_hold_remember(krwlock_t *rw, lwp_t *l)
190 1.59.2.3 ad {
191 1.59.2.3 ad int i;
192 1.59.2.3 ad
193 1.59.2.3 ad KASSERT(kpreempt_disabled());
194 1.59.2.3 ad
195 1.59.2.3 ad for (i = 0; i < __arraycount(l->l_rwlocks); i++) {
196 1.59.2.3 ad if (__predict_true(l->l_rwlocks[i] == NULL)) {
197 1.59.2.3 ad l->l_rwlocks[i] = rw;
198 1.59.2.3 ad /*
199 1.59.2.3 ad * Clear the write wanted flag on every acquire to
200 1.59.2.3 ad * give readers a chance once again.
201 1.59.2.3 ad */
202 1.59.2.3 ad return ~RW_WRITE_WANTED;
203 1.59.2.3 ad }
204 1.59.2.3 ad }
205 1.59.2.3 ad
206 1.59.2.3 ad /*
207 1.59.2.3 ad * Nowhere to track the hold so we lose: temporarily disable
208 1.59.2.3 ad * spinning on the lock.
209 1.59.2.3 ad */
210 1.59.2.3 ad return ~(RW_WRITE_WANTED | RW_SPIN);
211 1.59.2.3 ad }
212 1.59.2.3 ad
213 1.59.2.3 ad /*
214 1.59.2.3 ad * rw_hold_forget:
215 1.59.2.3 ad *
216 1.59.2.3 ad * Helper - when releasing a lock, stop tracking the hold.
217 1.59.2.3 ad */
218 1.59.2.3 ad static inline void
219 1.59.2.3 ad rw_hold_forget(krwlock_t *rw, lwp_t *l)
220 1.59.2.3 ad {
221 1.59.2.3 ad int i;
222 1.59.2.3 ad
223 1.59.2.3 ad KASSERT(kpreempt_disabled());
224 1.59.2.3 ad
225 1.59.2.3 ad for (i = 0; i < __arraycount(l->l_rwlocks); i++) {
226 1.59.2.3 ad if (__predict_true(l->l_rwlocks[i] == rw)) {
227 1.59.2.3 ad l->l_rwlocks[i] = NULL;
228 1.59.2.3 ad return;
229 1.59.2.3 ad }
230 1.59.2.3 ad }
231 1.59.2.3 ad }
232 1.59.2.3 ad
233 1.59.2.3 ad /*
234 1.59.2.3 ad * rw_switch:
235 1.59.2.3 ad *
236 1.59.2.3 ad * Called by mi_switch() to indicate that an LWP is going off the CPU.
237 1.59.2.3 ad */
238 1.59.2.3 ad void
239 1.59.2.3 ad rw_switch(void)
240 1.59.2.3 ad {
241 1.59.2.3 ad lwp_t *l = curlwp;
242 1.59.2.3 ad int i;
243 1.59.2.3 ad
244 1.59.2.3 ad for (i = 0; i < __arraycount(l->l_rwlocks); i++) {
245 1.59.2.3 ad if (l->l_rwlocks[i] != NULL) {
246 1.59.2.3 ad rw_and(l->l_rwlocks[i], ~RW_SPIN);
247 1.59.2.3 ad /* Leave in place for exit to clear. */
248 1.59.2.3 ad }
249 1.59.2.3 ad }
250 1.59.2.3 ad }
251 1.59.2.3 ad
252 1.59.2.3 ad /*
253 1.2 ad * rw_dump:
254 1.2 ad *
255 1.2 ad * Dump the contents of a rwlock structure.
256 1.2 ad */
257 1.11 ad static void
258 1.54 ozaki rw_dump(const volatile void *cookie, lockop_printer_t pr)
259 1.2 ad {
260 1.47 christos const volatile krwlock_t *rw = cookie;
261 1.2 ad
262 1.54 ozaki pr("owner/count : %#018lx flags : %#018x\n",
263 1.2 ad (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
264 1.2 ad }
265 1.2 ad
266 1.2 ad /*
267 1.11 ad * rw_abort:
268 1.11 ad *
269 1.11 ad * Dump information about an error and panic the system. This
270 1.11 ad * generates a lot of machine code in the DIAGNOSTIC case, so
271 1.11 ad * we ask the compiler to not inline it.
272 1.11 ad */
273 1.26 ad static void __noinline
274 1.46 christos rw_abort(const char *func, size_t line, krwlock_t *rw, const char *msg)
275 1.11 ad {
276 1.11 ad
277 1.11 ad if (panicstr != NULL)
278 1.11 ad return;
279 1.11 ad
280 1.46 christos LOCKDEBUG_ABORT(func, line, rw, &rwlock_lockops, msg);
281 1.11 ad }
282 1.11 ad
283 1.11 ad /*
284 1.2 ad * rw_init:
285 1.2 ad *
286 1.2 ad * Initialize a rwlock for use.
287 1.2 ad */
288 1.2 ad void
289 1.50 ozaki _rw_init(krwlock_t *rw, uintptr_t return_address)
290 1.2 ad {
291 1.2 ad
292 1.59.2.2 ad if (LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address))
293 1.59.2.3 ad rw->rw_owner = RW_SPIN;
294 1.59.2.2 ad else
295 1.59.2.3 ad rw->rw_owner = RW_SPIN | RW_NODEBUG;
296 1.2 ad }
297 1.2 ad
298 1.50 ozaki void
299 1.50 ozaki rw_init(krwlock_t *rw)
300 1.50 ozaki {
301 1.50 ozaki
302 1.50 ozaki _rw_init(rw, (uintptr_t)__builtin_return_address(0));
303 1.50 ozaki }
304 1.50 ozaki
305 1.2 ad /*
306 1.2 ad * rw_destroy:
307 1.2 ad *
308 1.2 ad * Tear down a rwlock.
309 1.2 ad */
310 1.2 ad void
311 1.2 ad rw_destroy(krwlock_t *rw)
312 1.2 ad {
313 1.2 ad
314 1.59.2.3 ad RW_ASSERT(rw, (rw->rw_owner & ~(RW_NODEBUG | RW_SPIN)) == 0);
315 1.59.2.2 ad LOCKDEBUG_FREE((rw->rw_owner & RW_NODEBUG) == 0, rw);
316 1.2 ad }
317 1.2 ad
318 1.2 ad /*
319 1.2 ad * rw_vector_enter:
320 1.2 ad *
321 1.59.2.3 ad * The slow path for acquiring a rwlock, that considers all conditions.
322 1.59.2.3 ad * Marked __noinline to prevent the compiler pulling it into rw_enter().
323 1.2 ad */
324 1.59.2.3 ad static void __noinline
325 1.59.2.3 ad rw_vector_enter(krwlock_t *rw, const krw_t op, uintptr_t mask, uintptr_t ra)
326 1.2 ad {
327 1.20 ad uintptr_t owner, incr, need_wait, set_wait, curthread, next;
328 1.2 ad turnstile_t *ts;
329 1.2 ad int queue;
330 1.7 ad lwp_t *l;
331 1.2 ad LOCKSTAT_TIMER(slptime);
332 1.20 ad LOCKSTAT_TIMER(slpcnt);
333 1.19 ad LOCKSTAT_TIMER(spintime);
334 1.19 ad LOCKSTAT_COUNTER(spincnt);
335 1.2 ad LOCKSTAT_FLAG(lsflag);
336 1.2 ad
337 1.2 ad l = curlwp;
338 1.2 ad curthread = (uintptr_t)l;
339 1.2 ad
340 1.13 ad RW_ASSERT(rw, !cpu_intr_p());
341 1.2 ad RW_ASSERT(rw, curthread != 0);
342 1.59.2.3 ad RW_ASSERT(rw, kpreempt_disabled());
343 1.40 mlelstv RW_WANTLOCK(rw, op);
344 1.2 ad
345 1.2 ad if (panicstr == NULL) {
346 1.53 ozaki KDASSERT(pserialize_not_in_read_section());
347 1.2 ad LOCKDEBUG_BARRIER(&kernel_lock, 1);
348 1.2 ad }
349 1.2 ad
350 1.2 ad /*
351 1.2 ad * We play a slight trick here. If we're a reader, we want
352 1.2 ad * increment the read count. If we're a writer, we want to
353 1.43 ozaki * set the owner field and the WRITE_LOCKED bit.
354 1.2 ad *
355 1.2 ad * In the latter case, we expect those bits to be zero,
356 1.2 ad * therefore we can use an add operation to set them, which
357 1.2 ad * means an add operation for both cases.
358 1.2 ad */
359 1.2 ad if (__predict_true(op == RW_READER)) {
360 1.2 ad incr = RW_READ_INCR;
361 1.2 ad set_wait = RW_HAS_WAITERS;
362 1.2 ad need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
363 1.2 ad queue = TS_READER_Q;
364 1.2 ad } else {
365 1.59.2.2 ad RW_ASSERT(rw, op == RW_WRITER);
366 1.2 ad incr = curthread | RW_WRITE_LOCKED;
367 1.2 ad set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
368 1.2 ad need_wait = RW_WRITE_LOCKED | RW_THREAD;
369 1.2 ad queue = TS_WRITER_Q;
370 1.2 ad }
371 1.2 ad
372 1.2 ad LOCKSTAT_ENTER(lsflag);
373 1.2 ad
374 1.55 ad for (owner = rw->rw_owner;;) {
375 1.2 ad /*
376 1.2 ad * Read the lock owner field. If the need-to-wait
377 1.2 ad * indicator is clear, then try to acquire the lock.
378 1.2 ad */
379 1.2 ad if ((owner & need_wait) == 0) {
380 1.59.2.3 ad next = rw_cas(rw, owner, (owner + incr) & mask);
381 1.20 ad if (__predict_true(next == owner)) {
382 1.2 ad /* Got it! */
383 1.55 ad RW_MEMBAR_ENTER();
384 1.2 ad break;
385 1.2 ad }
386 1.2 ad
387 1.2 ad /*
388 1.2 ad * Didn't get it -- spin around again (we'll
389 1.2 ad * probably sleep on the next iteration).
390 1.2 ad */
391 1.20 ad owner = next;
392 1.2 ad continue;
393 1.2 ad }
394 1.37 rmind if (__predict_false(RW_OWNER(rw) == curthread)) {
395 1.46 christos rw_abort(__func__, __LINE__, rw,
396 1.46 christos "locking against myself");
397 1.37 rmind }
398 1.59.2.3 ad
399 1.19 ad /*
400 1.59.2.3 ad * If the lock owner is running on another CPU, and there
401 1.59.2.3 ad * are no existing waiters, then spin. Notes:
402 1.59.2.3 ad *
403 1.59.2.3 ad * 1) If an LWP on this CPU (possibly curlwp, or an LWP that
404 1.59.2.3 ad * curlwp has interupted) holds kernel_lock, we can't spin
405 1.59.2.3 ad * without a deadlock. The CPU that holds the rwlock may be
406 1.59.2.3 ad * blocked trying to acquire kernel_lock, or there may be an
407 1.59.2.3 ad * unseen chain of dependant locks. To defeat the potential
408 1.59.2.3 ad * deadlock, this LWP needs to sleep (and thereby directly
409 1.59.2.3 ad * drop the kernel_lock, or permit the interrupted LWP that
410 1.59.2.3 ad * holds kernel_lock to complete its work).
411 1.59.2.3 ad *
412 1.59.2.3 ad * 2) If trying to acquire a write lock, and the lock is
413 1.59.2.3 ad * currently read held, after a brief wait set the write
414 1.59.2.3 ad * wanted bit to block out new readers and try to avoid
415 1.59.2.3 ad * starvation. When the hold is acquired, we'll clear the
416 1.59.2.3 ad * WRITE_WANTED flag to give readers a chance again. With
417 1.59.2.3 ad * luck this should nudge things in the direction of
418 1.59.2.3 ad * interleaving readers and writers when there is high
419 1.59.2.3 ad * contention.
420 1.59.2.3 ad *
421 1.59.2.3 ad * 3) The spin wait can't be done in soft interrupt context,
422 1.59.2.3 ad * because a lock holder could be pinned down underneath the
423 1.59.2.3 ad * soft interrupt LWP (i.e. curlwp) on the same CPU. For
424 1.59.2.3 ad * the lock holder to make progress and release the lock,
425 1.59.2.3 ad * the soft interrupt needs to sleep.
426 1.19 ad */
427 1.59.2.3 ad if ((owner & RW_SPIN) != 0 && !cpu_softintr_p()) {
428 1.19 ad LOCKSTAT_START_TIMER(lsflag, spintime);
429 1.19 ad u_int count = SPINLOCK_BACKOFF_MIN;
430 1.20 ad do {
431 1.38 rmind KPREEMPT_ENABLE(curlwp);
432 1.20 ad SPINLOCK_BACKOFF(count);
433 1.38 rmind KPREEMPT_DISABLE(curlwp);
434 1.19 ad owner = rw->rw_owner;
435 1.59.2.3 ad if ((owner & need_wait) == 0)
436 1.59.2.3 ad break;
437 1.59.2.3 ad if (count != SPINLOCK_BACKOFF_MAX)
438 1.59.2.3 ad continue;
439 1.59.2.3 ad if (curcpu()->ci_biglock_count != 0)
440 1.59.2.3 ad break;
441 1.59.2.3 ad if (op == RW_WRITER &&
442 1.59.2.3 ad (owner & RW_WRITE_LOCKED) == 0 &&
443 1.59.2.3 ad (owner & RW_WRITE_WANTED) == 0) {
444 1.59.2.3 ad (void)rw_cas(rw, owner,
445 1.59.2.3 ad owner | RW_WRITE_WANTED);
446 1.59.2.3 ad }
447 1.59.2.3 ad } while ((owner & RW_SPIN) != 0);
448 1.19 ad LOCKSTAT_STOP_TIMER(lsflag, spintime);
449 1.19 ad LOCKSTAT_COUNT(spincnt, 1);
450 1.19 ad if ((owner & need_wait) == 0)
451 1.19 ad continue;
452 1.19 ad }
453 1.19 ad
454 1.2 ad /*
455 1.2 ad * Grab the turnstile chain lock. Once we have that, we
456 1.2 ad * can adjust the waiter bits and sleep queue.
457 1.2 ad */
458 1.2 ad ts = turnstile_lookup(rw);
459 1.2 ad
460 1.2 ad /*
461 1.59.2.3 ad * Mark the rwlock as having waiters, and disable spinning.
462 1.59.2.3 ad * If the set fails, then we may not need to sleep and
463 1.59.2.3 ad * should spin again. Reload rw_owner now that we own
464 1.59.2.3 ad * the turnstile chain lock.
465 1.2 ad */
466 1.20 ad owner = rw->rw_owner;
467 1.59.2.3 ad if ((owner & need_wait) == 0 ||
468 1.59.2.3 ad ((owner & RW_SPIN) != 0 && !cpu_softintr_p())) {
469 1.20 ad turnstile_exit(rw);
470 1.20 ad continue;
471 1.20 ad }
472 1.59.2.3 ad next = rw_cas(rw, owner, (owner | set_wait) & ~RW_SPIN);
473 1.20 ad if (__predict_false(next != owner)) {
474 1.2 ad turnstile_exit(rw);
475 1.20 ad owner = next;
476 1.2 ad continue;
477 1.2 ad }
478 1.2 ad
479 1.2 ad LOCKSTAT_START_TIMER(lsflag, slptime);
480 1.4 yamt turnstile_block(ts, queue, rw, &rw_syncobj);
481 1.2 ad LOCKSTAT_STOP_TIMER(lsflag, slptime);
482 1.20 ad LOCKSTAT_COUNT(slpcnt, 1);
483 1.2 ad
484 1.20 ad /*
485 1.20 ad * No need for a memory barrier because of context switch.
486 1.20 ad * If not handed the lock, then spin again.
487 1.20 ad */
488 1.58 ad if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread)
489 1.20 ad break;
490 1.58 ad
491 1.39 yamt owner = rw->rw_owner;
492 1.2 ad }
493 1.37 rmind KPREEMPT_ENABLE(curlwp);
494 1.2 ad
495 1.59.2.1 ad LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK |
496 1.59.2.1 ad (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime,
497 1.59.2.3 ad (l->l_rwcallsite != 0 ? l->l_rwcallsite : ra));
498 1.59.2.1 ad LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime,
499 1.59.2.3 ad (l->l_rwcallsite != 0 ? l->l_rwcallsite : ra));
500 1.2 ad LOCKSTAT_EXIT(lsflag);
501 1.2 ad
502 1.59.2.2 ad RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
503 1.2 ad (op == RW_READER && RW_COUNT(rw) != 0));
504 1.2 ad RW_LOCKED(rw, op);
505 1.2 ad }
506 1.2 ad
507 1.2 ad /*
508 1.59.2.3 ad * rw_enter:
509 1.2 ad *
510 1.59.2.3 ad * The fast path for acquiring a lock that considers only the
511 1.59.2.3 ad * uncontended case. Falls back to rw_vector_enter().
512 1.2 ad */
513 1.2 ad void
514 1.59.2.3 ad rw_enter(krwlock_t *rw, const krw_t op)
515 1.59.2.3 ad {
516 1.59.2.3 ad uintptr_t owner, incr, need_wait, curthread, next, mask;
517 1.59.2.3 ad lwp_t *l;
518 1.59.2.3 ad
519 1.59.2.3 ad l = curlwp;
520 1.59.2.3 ad curthread = (uintptr_t)l;
521 1.59.2.3 ad
522 1.59.2.3 ad RW_ASSERT(rw, !cpu_intr_p());
523 1.59.2.3 ad RW_ASSERT(rw, curthread != 0);
524 1.59.2.3 ad RW_WANTLOCK(rw, op);
525 1.59.2.3 ad
526 1.59.2.3 ad KPREEMPT_DISABLE(l);
527 1.59.2.3 ad mask = rw_hold_remember(rw, l);
528 1.59.2.3 ad
529 1.59.2.3 ad /*
530 1.59.2.3 ad * We play a slight trick here. If we're a reader, we want
531 1.59.2.3 ad * increment the read count. If we're a writer, we want to
532 1.59.2.3 ad * set the owner field and the WRITE_LOCKED bit.
533 1.59.2.3 ad *
534 1.59.2.3 ad * In the latter case, we expect those bits to be zero,
535 1.59.2.3 ad * therefore we can use an add operation to set them, which
536 1.59.2.3 ad * means an add operation for both cases.
537 1.59.2.3 ad */
538 1.59.2.3 ad if (__predict_true(op == RW_READER)) {
539 1.59.2.3 ad incr = RW_READ_INCR;
540 1.59.2.3 ad need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
541 1.59.2.3 ad } else {
542 1.59.2.3 ad RW_ASSERT(rw, op == RW_WRITER);
543 1.59.2.3 ad incr = curthread | RW_WRITE_LOCKED;
544 1.59.2.3 ad need_wait = RW_WRITE_LOCKED | RW_THREAD;
545 1.59.2.3 ad }
546 1.59.2.3 ad
547 1.59.2.3 ad /*
548 1.59.2.3 ad * Read the lock owner field. If the need-to-wait
549 1.59.2.3 ad * indicator is clear, then try to acquire the lock.
550 1.59.2.3 ad */
551 1.59.2.3 ad owner = rw->rw_owner;
552 1.59.2.3 ad if ((owner & need_wait) == 0) {
553 1.59.2.3 ad next = rw_cas(rw, owner, (owner + incr) & mask);
554 1.59.2.3 ad if (__predict_true(next == owner)) {
555 1.59.2.3 ad /* Got it! */
556 1.59.2.3 ad KPREEMPT_ENABLE(l);
557 1.59.2.3 ad RW_MEMBAR_ENTER();
558 1.59.2.3 ad return;
559 1.59.2.3 ad }
560 1.59.2.3 ad }
561 1.59.2.3 ad
562 1.59.2.3 ad rw_vector_enter(rw, op, mask, (uintptr_t)__builtin_return_address(0));
563 1.59.2.3 ad }
564 1.59.2.3 ad
565 1.59.2.3 ad /*
566 1.59.2.3 ad * rw_vector_exit:
567 1.59.2.3 ad *
568 1.59.2.3 ad * The slow path for releasing a rwlock, that considers all conditions.
569 1.59.2.3 ad * Marked __noinline to prevent the compiler pulling it into rw_enter().
570 1.59.2.3 ad */
571 1.59.2.3 ad static void __noinline
572 1.2 ad rw_vector_exit(krwlock_t *rw)
573 1.2 ad {
574 1.44 matt uintptr_t curthread, owner, decr, newown, next;
575 1.2 ad turnstile_t *ts;
576 1.2 ad int rcnt, wcnt;
577 1.7 ad lwp_t *l;
578 1.2 ad
579 1.59.2.2 ad l = curlwp;
580 1.59.2.2 ad curthread = (uintptr_t)l;
581 1.2 ad RW_ASSERT(rw, curthread != 0);
582 1.59.2.3 ad RW_ASSERT(rw, kpreempt_disabled());
583 1.2 ad
584 1.2 ad /*
585 1.2 ad * Again, we use a trick. Since we used an add operation to
586 1.2 ad * set the required lock bits, we can use a subtract to clear
587 1.2 ad * them, which makes the read-release and write-release path
588 1.2 ad * the same.
589 1.2 ad */
590 1.2 ad owner = rw->rw_owner;
591 1.2 ad if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
592 1.2 ad RW_UNLOCKED(rw, RW_WRITER);
593 1.2 ad RW_ASSERT(rw, RW_OWNER(rw) == curthread);
594 1.2 ad decr = curthread | RW_WRITE_LOCKED;
595 1.2 ad } else {
596 1.2 ad RW_UNLOCKED(rw, RW_READER);
597 1.2 ad RW_ASSERT(rw, RW_COUNT(rw) != 0);
598 1.2 ad decr = RW_READ_INCR;
599 1.2 ad }
600 1.2 ad
601 1.2 ad /*
602 1.2 ad * Compute what we expect the new value of the lock to be. Only
603 1.2 ad * proceed to do direct handoff if there are waiters, and if the
604 1.2 ad * lock would become unowned.
605 1.2 ad */
606 1.55 ad RW_MEMBAR_EXIT();
607 1.58 ad for (;;) {
608 1.44 matt newown = (owner - decr);
609 1.44 matt if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
610 1.2 ad break;
611 1.59.2.3 ad /* Want spinning enabled if lock is becoming free. */
612 1.59.2.3 ad if ((newown & RW_THREAD) == 0)
613 1.59.2.3 ad newown |= RW_SPIN;
614 1.44 matt next = rw_cas(rw, owner, newown);
615 1.59.2.3 ad if (__predict_true(next == owner)) {
616 1.59.2.3 ad rw_hold_forget(rw, l);
617 1.59.2.3 ad kpreempt_enable();
618 1.2 ad return;
619 1.59.2.3 ad }
620 1.58 ad owner = next;
621 1.2 ad }
622 1.2 ad
623 1.20 ad /*
624 1.20 ad * Grab the turnstile chain lock. This gets the interlock
625 1.20 ad * on the sleep queue. Once we have that, we can adjust the
626 1.20 ad * waiter bits.
627 1.20 ad */
628 1.20 ad ts = turnstile_lookup(rw);
629 1.20 ad owner = rw->rw_owner;
630 1.59.2.2 ad RW_ASSERT(rw, ts != NULL);
631 1.59.2.2 ad RW_ASSERT(rw, (owner & RW_HAS_WAITERS) != 0);
632 1.2 ad
633 1.20 ad wcnt = TS_WAITERS(ts, TS_WRITER_Q);
634 1.20 ad rcnt = TS_WAITERS(ts, TS_READER_Q);
635 1.2 ad
636 1.20 ad /*
637 1.20 ad * Give the lock away.
638 1.20 ad *
639 1.20 ad * If we are releasing a write lock, then prefer to wake all
640 1.20 ad * outstanding readers. Otherwise, wake one writer if there
641 1.20 ad * are outstanding readers, or all writers if there are no
642 1.20 ad * pending readers. If waking one specific writer, the writer
643 1.20 ad * is handed the lock here. If waking multiple writers, we
644 1.20 ad * set WRITE_WANTED to block out new readers, and let them
645 1.41 skrll * do the work of acquiring the lock in rw_vector_enter().
646 1.20 ad */
647 1.32 yamt if (rcnt == 0 || decr == RW_READ_INCR) {
648 1.59.2.2 ad RW_ASSERT(rw, wcnt != 0);
649 1.59.2.2 ad RW_ASSERT(rw, (owner & RW_WRITE_WANTED) != 0);
650 1.2 ad
651 1.20 ad if (rcnt != 0) {
652 1.20 ad /* Give the lock to the longest waiting writer. */
653 1.2 ad l = TS_FIRST(ts, TS_WRITER_Q);
654 1.59.2.2 ad newown = (uintptr_t)l | (owner & RW_NODEBUG);
655 1.59.2.2 ad newown |= RW_WRITE_LOCKED | RW_HAS_WAITERS;
656 1.28 thorpej if (wcnt > 1)
657 1.44 matt newown |= RW_WRITE_WANTED;
658 1.44 matt rw_swap(rw, owner, newown);
659 1.59.2.3 ad rw_hold_forget(rw, l);
660 1.7 ad turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
661 1.2 ad } else {
662 1.20 ad /* Wake all writers and let them fight it out. */
663 1.59.2.2 ad newown = owner & RW_NODEBUG;
664 1.59.2.2 ad newown |= RW_WRITE_WANTED;
665 1.59.2.2 ad rw_swap(rw, owner, newown);
666 1.59.2.3 ad rw_hold_forget(rw, l);
667 1.20 ad turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL);
668 1.20 ad }
669 1.20 ad } else {
670 1.59.2.2 ad RW_ASSERT(rw, rcnt != 0);
671 1.2 ad
672 1.20 ad /*
673 1.20 ad * Give the lock to all blocked readers. If there
674 1.20 ad * is a writer waiting, new readers that arrive
675 1.20 ad * after the release will be blocked out.
676 1.20 ad */
677 1.59.2.2 ad newown = owner & RW_NODEBUG;
678 1.59.2.2 ad newown += rcnt << RW_READ_COUNT_SHIFT;
679 1.20 ad if (wcnt != 0)
680 1.44 matt newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
681 1.12 yamt
682 1.20 ad /* Wake up all sleeping readers. */
683 1.44 matt rw_swap(rw, owner, newown);
684 1.59.2.3 ad rw_hold_forget(rw, l);
685 1.20 ad turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
686 1.2 ad }
687 1.59.2.3 ad kpreempt_enable();
688 1.59.2.3 ad }
689 1.59.2.3 ad
690 1.59.2.3 ad /*
691 1.59.2.3 ad * rw_exit:
692 1.59.2.3 ad *
693 1.59.2.3 ad * The fast path for releasing a lock that considers only the
694 1.59.2.3 ad * uncontended case. Falls back to rw_vector_exit().
695 1.59.2.3 ad */
696 1.59.2.3 ad void
697 1.59.2.3 ad rw_exit(krwlock_t *rw)
698 1.59.2.3 ad {
699 1.59.2.3 ad uintptr_t curthread, owner, decr, newown, next;
700 1.59.2.3 ad lwp_t *l;
701 1.59.2.3 ad
702 1.59.2.3 ad l = curlwp;
703 1.59.2.3 ad curthread = (uintptr_t)l;
704 1.59.2.3 ad RW_ASSERT(rw, curthread != 0);
705 1.59.2.3 ad
706 1.59.2.3 ad /*
707 1.59.2.3 ad * Again, we use a trick. Since we used an add operation to
708 1.59.2.3 ad * set the required lock bits, we can use a subtract to clear
709 1.59.2.3 ad * them, which makes the read-release and write-release path
710 1.59.2.3 ad * the same.
711 1.59.2.3 ad */
712 1.59.2.3 ad owner = rw->rw_owner;
713 1.59.2.3 ad if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
714 1.59.2.3 ad RW_UNLOCKED(rw, RW_WRITER);
715 1.59.2.3 ad RW_ASSERT(rw, RW_OWNER(rw) == curthread);
716 1.59.2.3 ad decr = curthread | RW_WRITE_LOCKED;
717 1.59.2.3 ad } else {
718 1.59.2.3 ad RW_UNLOCKED(rw, RW_READER);
719 1.59.2.3 ad RW_ASSERT(rw, RW_COUNT(rw) != 0);
720 1.59.2.3 ad decr = RW_READ_INCR;
721 1.59.2.3 ad }
722 1.59.2.3 ad
723 1.59.2.3 ad /* Now try to release it. */
724 1.59.2.3 ad RW_MEMBAR_EXIT();
725 1.59.2.3 ad KPREEMPT_DISABLE(l);
726 1.59.2.3 ad newown = (owner - decr);
727 1.59.2.3 ad if (__predict_true((newown & (RW_THREAD | RW_HAS_WAITERS)) !=
728 1.59.2.3 ad RW_HAS_WAITERS)) {
729 1.59.2.3 ad /* Want spinning (re-)enabled if lock is becoming free. */
730 1.59.2.3 ad if ((newown & RW_THREAD) == 0)
731 1.59.2.3 ad newown |= RW_SPIN;
732 1.59.2.3 ad next = rw_cas(rw, owner, newown);
733 1.59.2.3 ad if (__predict_true(next == owner)) {
734 1.59.2.3 ad rw_hold_forget(rw, l);
735 1.59.2.3 ad KPREEMPT_ENABLE(l);
736 1.59.2.3 ad return;
737 1.59.2.3 ad }
738 1.59.2.3 ad }
739 1.59.2.3 ad rw_vector_exit(rw);
740 1.2 ad }
741 1.2 ad
742 1.2 ad /*
743 1.59.2.3 ad * rw_tryenter:
744 1.2 ad *
745 1.2 ad * Try to acquire a rwlock.
746 1.2 ad */
747 1.2 ad int
748 1.59.2.3 ad rw_tryenter(krwlock_t *rw, const krw_t op)
749 1.2 ad {
750 1.59.2.3 ad uintptr_t curthread, owner, incr, need_wait, next, mask;
751 1.59.2.2 ad lwp_t *l;
752 1.2 ad
753 1.59.2.2 ad l = curlwp;
754 1.59.2.2 ad curthread = (uintptr_t)l;
755 1.2 ad
756 1.2 ad RW_ASSERT(rw, curthread != 0);
757 1.2 ad
758 1.59.2.3 ad KPREEMPT_DISABLE(l);
759 1.59.2.3 ad mask = rw_hold_remember(rw, l);
760 1.59.2.3 ad
761 1.2 ad if (op == RW_READER) {
762 1.2 ad incr = RW_READ_INCR;
763 1.2 ad need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
764 1.2 ad } else {
765 1.59.2.2 ad RW_ASSERT(rw, op == RW_WRITER);
766 1.2 ad incr = curthread | RW_WRITE_LOCKED;
767 1.2 ad need_wait = RW_WRITE_LOCKED | RW_THREAD;
768 1.2 ad }
769 1.2 ad
770 1.58 ad for (owner = rw->rw_owner;; owner = next) {
771 1.59.2.3 ad if (__predict_false((owner & need_wait) != 0)) {
772 1.59.2.3 ad rw_hold_forget(rw, l);
773 1.59.2.3 ad KPREEMPT_ENABLE(l);
774 1.58 ad return 0;
775 1.59.2.3 ad }
776 1.59.2.3 ad next = rw_cas(rw, owner, (owner + incr) & mask);
777 1.20 ad if (__predict_true(next == owner)) {
778 1.20 ad /* Got it! */
779 1.20 ad break;
780 1.2 ad }
781 1.2 ad }
782 1.2 ad
783 1.40 mlelstv RW_WANTLOCK(rw, op);
784 1.2 ad RW_LOCKED(rw, op);
785 1.59.2.2 ad RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
786 1.2 ad (op == RW_READER && RW_COUNT(rw) != 0));
787 1.7 ad
788 1.59.2.3 ad KPREEMPT_ENABLE(l);
789 1.59.2.2 ad RW_MEMBAR_ENTER();
790 1.2 ad return 1;
791 1.2 ad }
792 1.2 ad
793 1.2 ad /*
794 1.2 ad * rw_downgrade:
795 1.2 ad *
796 1.59.2.2 ad * Downgrade a write lock to a read lock.
797 1.2 ad */
798 1.2 ad void
799 1.2 ad rw_downgrade(krwlock_t *rw)
800 1.2 ad {
801 1.44 matt uintptr_t owner, curthread, newown, next;
802 1.2 ad turnstile_t *ts;
803 1.2 ad int rcnt, wcnt;
804 1.59.2.2 ad lwp_t *l;
805 1.2 ad
806 1.59.2.2 ad l = curlwp;
807 1.59.2.2 ad curthread = (uintptr_t)l;
808 1.2 ad RW_ASSERT(rw, curthread != 0);
809 1.59.2.2 ad RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
810 1.2 ad RW_ASSERT(rw, RW_OWNER(rw) == curthread);
811 1.2 ad RW_UNLOCKED(rw, RW_WRITER);
812 1.42 mrg #if !defined(DIAGNOSTIC)
813 1.42 mrg __USE(curthread);
814 1.42 mrg #endif
815 1.42 mrg
816 1.55 ad RW_MEMBAR_PRODUCER();
817 1.2 ad
818 1.59.2.2 ad for (owner = rw->rw_owner;; owner = next) {
819 1.59.2.2 ad /*
820 1.59.2.2 ad * If there are no waiters we can do this the easy way. Try
821 1.59.2.2 ad * swapping us down to one read hold. If it fails, the lock
822 1.59.2.2 ad * condition has changed and we most likely now have
823 1.59.2.2 ad * waiters.
824 1.59.2.2 ad */
825 1.59.2.2 ad if ((owner & RW_HAS_WAITERS) == 0) {
826 1.59.2.3 ad newown = (owner & RW_NODEBUG) | RW_SPIN;
827 1.59.2.2 ad next = rw_cas(rw, owner, newown + RW_READ_INCR);
828 1.59.2.2 ad if (__predict_true(next == owner)) {
829 1.59.2.2 ad RW_LOCKED(rw, RW_READER);
830 1.59.2.2 ad RW_ASSERT(rw,
831 1.59.2.2 ad (rw->rw_owner & RW_WRITE_LOCKED) == 0);
832 1.59.2.2 ad RW_ASSERT(rw, RW_COUNT(rw) != 0);
833 1.59.2.2 ad return;
834 1.59.2.2 ad }
835 1.59.2.2 ad continue;
836 1.59.2.2 ad }
837 1.59.2.2 ad
838 1.59.2.2 ad /*
839 1.59.2.2 ad * Grab the turnstile chain lock. This gets the interlock
840 1.59.2.2 ad * on the sleep queue. Once we have that, we can adjust the
841 1.59.2.2 ad * waiter bits.
842 1.59.2.2 ad */
843 1.2 ad ts = turnstile_lookup(rw);
844 1.59.2.2 ad RW_ASSERT(rw, ts != NULL);
845 1.2 ad
846 1.2 ad rcnt = TS_WAITERS(ts, TS_READER_Q);
847 1.2 ad wcnt = TS_WAITERS(ts, TS_WRITER_Q);
848 1.2 ad
849 1.2 ad if (rcnt == 0) {
850 1.59.2.2 ad /*
851 1.59.2.2 ad * If there are no readers, just preserve the
852 1.59.2.2 ad * waiters bits, swap us down to one read hold and
853 1.59.2.3 ad * return. Don't set the spin bit as nobody's
854 1.59.2.3 ad * running yet.
855 1.59.2.2 ad */
856 1.59.2.2 ad RW_ASSERT(rw, wcnt != 0);
857 1.59.2.2 ad RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
858 1.59.2.2 ad RW_ASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
859 1.59.2.2 ad
860 1.59.2.2 ad newown = owner & RW_NODEBUG;
861 1.59.2.2 ad newown = RW_READ_INCR | RW_HAS_WAITERS |
862 1.59.2.2 ad RW_WRITE_WANTED;
863 1.44 matt next = rw_cas(rw, owner, newown);
864 1.27 rmind turnstile_exit(rw);
865 1.20 ad if (__predict_true(next == owner))
866 1.20 ad break;
867 1.20 ad } else {
868 1.20 ad /*
869 1.20 ad * Give the lock to all blocked readers. We may
870 1.59.2.2 ad * retain one read hold if downgrading. If there is
871 1.59.2.2 ad * a writer waiting, new readers will be blocked
872 1.59.2.3 ad * out. Don't set the spin bit as nobody's running
873 1.59.2.3 ad * yet.
874 1.20 ad */
875 1.59.2.2 ad newown = owner & RW_NODEBUG;
876 1.59.2.2 ad newown += (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
877 1.20 ad if (wcnt != 0)
878 1.44 matt newown |= RW_HAS_WAITERS | RW_WRITE_WANTED;
879 1.20 ad
880 1.44 matt next = rw_cas(rw, owner, newown);
881 1.20 ad if (__predict_true(next == owner)) {
882 1.20 ad /* Wake up all sleeping readers. */
883 1.20 ad turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
884 1.20 ad break;
885 1.2 ad }
886 1.27 rmind turnstile_exit(rw);
887 1.2 ad }
888 1.2 ad }
889 1.2 ad
890 1.40 mlelstv RW_WANTLOCK(rw, RW_READER);
891 1.2 ad RW_LOCKED(rw, RW_READER);
892 1.59.2.2 ad RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
893 1.59.2.2 ad RW_ASSERT(rw, RW_COUNT(rw) != 0);
894 1.2 ad }
895 1.2 ad
896 1.2 ad /*
897 1.2 ad * rw_tryupgrade:
898 1.2 ad *
899 1.55 ad * Try to upgrade a read lock to a write lock. We must be the only
900 1.59.2.2 ad * reader.
901 1.2 ad */
902 1.2 ad int
903 1.2 ad rw_tryupgrade(krwlock_t *rw)
904 1.2 ad {
905 1.44 matt uintptr_t owner, curthread, newown, next;
906 1.59.2.2 ad struct lwp *l;
907 1.2 ad
908 1.59.2.2 ad l = curlwp;
909 1.59.2.2 ad curthread = (uintptr_t)l;
910 1.2 ad RW_ASSERT(rw, curthread != 0);
911 1.31 yamt RW_ASSERT(rw, rw_read_held(rw));
912 1.2 ad
913 1.55 ad for (owner = RW_READ_INCR;; owner = next) {
914 1.44 matt newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
915 1.44 matt next = rw_cas(rw, owner, newown);
916 1.30 ad if (__predict_true(next == owner)) {
917 1.55 ad RW_MEMBAR_PRODUCER();
918 1.2 ad break;
919 1.30 ad }
920 1.55 ad RW_ASSERT(rw, (next & RW_WRITE_LOCKED) == 0);
921 1.55 ad if (__predict_false((next & RW_THREAD) != RW_READ_INCR)) {
922 1.55 ad RW_ASSERT(rw, (next & RW_THREAD) != 0);
923 1.55 ad return 0;
924 1.55 ad }
925 1.2 ad }
926 1.2 ad
927 1.2 ad RW_UNLOCKED(rw, RW_READER);
928 1.40 mlelstv RW_WANTLOCK(rw, RW_WRITER);
929 1.2 ad RW_LOCKED(rw, RW_WRITER);
930 1.59.2.2 ad RW_ASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
931 1.59.2.2 ad RW_ASSERT(rw, RW_OWNER(rw) == curthread);
932 1.2 ad
933 1.2 ad return 1;
934 1.2 ad }
935 1.2 ad
936 1.2 ad /*
937 1.2 ad * rw_read_held:
938 1.2 ad *
939 1.2 ad * Returns true if the rwlock is held for reading. Must only be
940 1.2 ad * used for diagnostic assertions, and never be used to make
941 1.2 ad * decisions about how to use a rwlock.
942 1.2 ad */
943 1.2 ad int
944 1.2 ad rw_read_held(krwlock_t *rw)
945 1.2 ad {
946 1.2 ad uintptr_t owner;
947 1.2 ad
948 1.21 ad if (rw == NULL)
949 1.21 ad return 0;
950 1.2 ad owner = rw->rw_owner;
951 1.2 ad return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
952 1.2 ad }
953 1.2 ad
954 1.2 ad /*
955 1.2 ad * rw_write_held:
956 1.2 ad *
957 1.2 ad * Returns true if the rwlock is held for writing. Must only be
958 1.2 ad * used for diagnostic assertions, and never be used to make
959 1.2 ad * decisions about how to use a rwlock.
960 1.2 ad */
961 1.2 ad int
962 1.2 ad rw_write_held(krwlock_t *rw)
963 1.2 ad {
964 1.2 ad
965 1.21 ad if (rw == NULL)
966 1.21 ad return 0;
967 1.17 ad return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) ==
968 1.18 ad (RW_WRITE_LOCKED | (uintptr_t)curlwp);
969 1.2 ad }
970 1.2 ad
971 1.2 ad /*
972 1.2 ad * rw_lock_held:
973 1.2 ad *
974 1.2 ad * Returns true if the rwlock is held for reading or writing. Must
975 1.2 ad * only be used for diagnostic assertions, and never be used to make
976 1.2 ad * decisions about how to use a rwlock.
977 1.2 ad */
978 1.2 ad int
979 1.2 ad rw_lock_held(krwlock_t *rw)
980 1.2 ad {
981 1.2 ad
982 1.21 ad if (rw == NULL)
983 1.21 ad return 0;
984 1.2 ad return (rw->rw_owner & RW_THREAD) != 0;
985 1.2 ad }
986 1.4 yamt
987 1.5 ad /*
988 1.5 ad * rw_owner:
989 1.5 ad *
990 1.5 ad * Return the current owner of an RW lock, but only if it is write
991 1.5 ad * held. Used for priority inheritance.
992 1.5 ad */
993 1.7 ad static lwp_t *
994 1.4 yamt rw_owner(wchan_t obj)
995 1.4 yamt {
996 1.4 yamt krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
997 1.4 yamt uintptr_t owner = rw->rw_owner;
998 1.4 yamt
999 1.4 yamt if ((owner & RW_WRITE_LOCKED) == 0)
1000 1.4 yamt return NULL;
1001 1.4 yamt
1002 1.4 yamt return (void *)(owner & RW_THREAD);
1003 1.4 yamt }
1004 1.59.2.3 ad
1005 1.59.2.3 ad /*
1006 1.59.2.3 ad * rw_owner_running:
1007 1.59.2.3 ad *
1008 1.59.2.3 ad * Return true if a RW lock is unheld, or held and the owner is running
1009 1.59.2.3 ad * on a CPU. For the pagedaemon only - do not document or use in other
1010 1.59.2.3 ad * code.
1011 1.59.2.3 ad */
1012 1.59.2.3 ad bool
1013 1.59.2.3 ad rw_owner_running(const krwlock_t *rw)
1014 1.59.2.3 ad {
1015 1.59.2.3 ad uintptr_t owner = rw->rw_owner;
1016 1.59.2.3 ad
1017 1.59.2.3 ad return (owner & RW_THREAD) == 0 || (owner & RW_SPIN) != 0;
1018 1.59.2.3 ad }
1019