1 1.76 riastrad /* $NetBSD: kern_rwlock.c,v 1.76 2023/10/15 10:28:48 riastradh Exp $ */ 2 1.2 ad 3 1.2 ad /*- 4 1.73 ad * Copyright (c) 2002, 2006, 2007, 2008, 2009, 2019, 2020, 2023 5 1.60 ad * The NetBSD Foundation, Inc. 6 1.2 ad * All rights reserved. 7 1.2 ad * 8 1.2 ad * This code is derived from software contributed to The NetBSD Foundation 9 1.2 ad * by Jason R. Thorpe and Andrew Doran. 10 1.2 ad * 11 1.2 ad * Redistribution and use in source and binary forms, with or without 12 1.2 ad * modification, are permitted provided that the following conditions 13 1.2 ad * are met: 14 1.2 ad * 1. Redistributions of source code must retain the above copyright 15 1.2 ad * notice, this list of conditions and the following disclaimer. 16 1.2 ad * 2. Redistributions in binary form must reproduce the above copyright 17 1.2 ad * notice, this list of conditions and the following disclaimer in the 18 1.2 ad * documentation and/or other materials provided with the distribution. 19 1.2 ad * 20 1.2 ad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 1.2 ad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 1.2 ad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 1.2 ad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 1.2 ad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 1.2 ad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 1.2 ad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 1.2 ad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 1.2 ad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 1.2 ad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 1.2 ad * POSSIBILITY OF SUCH DAMAGE. 31 1.2 ad */ 32 1.2 ad 33 1.2 ad /* 34 1.2 ad * Kernel reader/writer lock implementation, modeled after those 35 1.2 ad * found in Solaris, a description of which can be found in: 36 1.2 ad * 37 1.2 ad * Solaris Internals: Core Kernel Architecture, Jim Mauro and 38 1.2 ad * Richard McDougall. 39 1.64 ad * 40 1.64 ad * The NetBSD implementation differs from that described in the book, in 41 1.64 ad * that the locks are partially adaptive. Lock waiters spin wait while a 42 1.64 ad * lock is write held and the holder is still running on a CPU. The method 43 1.64 ad * of choosing which threads to awaken when a lock is released also differs, 44 1.64 ad * mainly to take account of the partially adaptive behaviour. 45 1.2 ad */ 46 1.2 ad 47 1.10 dsl #include <sys/cdefs.h> 48 1.76 riastrad __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.76 2023/10/15 10:28:48 riastradh Exp $"); 49 1.61 ad 50 1.61 ad #include "opt_lockdebug.h" 51 1.2 ad 52 1.2 ad #define __RWLOCK_PRIVATE 53 1.2 ad 54 1.2 ad #include <sys/param.h> 55 1.76 riastrad 56 1.76 riastrad #include <sys/atomic.h> 57 1.76 riastrad #include <sys/cpu.h> 58 1.76 riastrad #include <sys/lock.h> 59 1.76 riastrad #include <sys/lockdebug.h> 60 1.2 ad #include <sys/proc.h> 61 1.76 riastrad #include <sys/pserialize.h> 62 1.2 ad #include <sys/rwlock.h> 63 1.2 ad #include <sys/sched.h> 64 1.2 ad #include <sys/sleepq.h> 65 1.76 riastrad #include <sys/syncobj.h> 66 1.2 ad #include <sys/systm.h> 67 1.2 ad 68 1.2 ad #include <dev/lockstat.h> 69 1.2 ad 70 1.56 riastrad #include <machine/rwlock.h> 71 1.56 riastrad 72 1.2 ad /* 73 1.2 ad * LOCKDEBUG 74 1.2 ad */ 75 1.2 ad 76 1.61 ad #define RW_DEBUG_P(rw) (((rw)->rw_owner & RW_NODEBUG) == 0) 77 1.2 ad 78 1.61 ad #define RW_WANTLOCK(rw, op) \ 79 1.61 ad LOCKDEBUG_WANTLOCK(RW_DEBUG_P(rw), (rw), \ 80 1.61 ad (uintptr_t)__builtin_return_address(0), op == RW_READER); 81 1.61 ad #define RW_LOCKED(rw, op) \ 82 1.61 ad LOCKDEBUG_LOCKED(RW_DEBUG_P(rw), (rw), NULL, \ 83 1.61 ad (uintptr_t)__builtin_return_address(0), op == RW_READER); 84 1.61 ad #define RW_UNLOCKED(rw, op) \ 85 1.61 ad LOCKDEBUG_UNLOCKED(RW_DEBUG_P(rw), (rw), \ 86 1.61 ad (uintptr_t)__builtin_return_address(0), op == RW_READER); 87 1.2 ad 88 1.2 ad /* 89 1.2 ad * DIAGNOSTIC 90 1.2 ad */ 91 1.2 ad 92 1.2 ad #if defined(DIAGNOSTIC) 93 1.61 ad #define RW_ASSERT(rw, cond) \ 94 1.61 ad do { \ 95 1.61 ad if (__predict_false(!(cond))) \ 96 1.46 christos rw_abort(__func__, __LINE__, rw, "assertion failed: " #cond);\ 97 1.2 ad } while (/* CONSTCOND */ 0) 98 1.2 ad #else 99 1.2 ad #define RW_ASSERT(rw, cond) /* nothing */ 100 1.2 ad #endif /* DIAGNOSTIC */ 101 1.2 ad 102 1.55 ad /* 103 1.2 ad * For platforms that do not provide stubs, or for the LOCKDEBUG case. 104 1.2 ad */ 105 1.2 ad #ifdef LOCKDEBUG 106 1.2 ad #undef __HAVE_RW_STUBS 107 1.2 ad #endif 108 1.2 ad 109 1.2 ad #ifndef __HAVE_RW_STUBS 110 1.6 itohy __strong_alias(rw_enter,rw_vector_enter); 111 1.6 itohy __strong_alias(rw_exit,rw_vector_exit); 112 1.16 ad __strong_alias(rw_tryenter,rw_vector_tryenter); 113 1.2 ad #endif 114 1.2 ad 115 1.61 ad static void rw_abort(const char *, size_t, krwlock_t *, const char *); 116 1.61 ad static void rw_dump(const volatile void *, lockop_printer_t); 117 1.61 ad static lwp_t *rw_owner(wchan_t); 118 1.61 ad 119 1.2 ad lockops_t rwlock_lockops = { 120 1.48 ozaki .lo_name = "Reader / writer lock", 121 1.48 ozaki .lo_type = LOCKOPS_SLEEP, 122 1.48 ozaki .lo_dump = rw_dump, 123 1.2 ad }; 124 1.2 ad 125 1.73 ad /* 126 1.73 ad * Give rwlock holders an extra-high priority boost on-blocking due to 127 1.73 ad * direct handoff. XXX To be revisited. 128 1.73 ad */ 129 1.4 yamt syncobj_t rw_syncobj = { 130 1.74 ad .sobj_name = "rwlock", 131 1.49 ozaki .sobj_flag = SOBJ_SLEEPQ_SORTED, 132 1.73 ad .sobj_boostpri = PRI_KTHREAD, 133 1.49 ozaki .sobj_unsleep = turnstile_unsleep, 134 1.49 ozaki .sobj_changepri = turnstile_changepri, 135 1.49 ozaki .sobj_lendpri = sleepq_lendpri, 136 1.49 ozaki .sobj_owner = rw_owner, 137 1.4 yamt }; 138 1.4 yamt 139 1.2 ad /* 140 1.61 ad * rw_cas: 141 1.61 ad * 142 1.61 ad * Do an atomic compare-and-swap on the lock word. 143 1.61 ad */ 144 1.61 ad static inline uintptr_t 145 1.61 ad rw_cas(krwlock_t *rw, uintptr_t o, uintptr_t n) 146 1.61 ad { 147 1.61 ad 148 1.61 ad return (uintptr_t)atomic_cas_ptr((volatile void *)&rw->rw_owner, 149 1.61 ad (void *)o, (void *)n); 150 1.61 ad } 151 1.61 ad 152 1.61 ad /* 153 1.61 ad * rw_swap: 154 1.61 ad * 155 1.61 ad * Do an atomic swap of the lock word. This is used only when it's 156 1.61 ad * known that the lock word is set up such that it can't be changed 157 1.61 ad * behind us (assert this), so there's no point considering the result. 158 1.61 ad */ 159 1.61 ad static inline void 160 1.61 ad rw_swap(krwlock_t *rw, uintptr_t o, uintptr_t n) 161 1.61 ad { 162 1.61 ad 163 1.61 ad n = (uintptr_t)atomic_swap_ptr((volatile void *)&rw->rw_owner, 164 1.61 ad (void *)n); 165 1.61 ad 166 1.61 ad RW_ASSERT(rw, n == o); 167 1.61 ad RW_ASSERT(rw, (o & RW_HAS_WAITERS) != 0); 168 1.61 ad } 169 1.61 ad 170 1.61 ad /* 171 1.2 ad * rw_dump: 172 1.2 ad * 173 1.2 ad * Dump the contents of a rwlock structure. 174 1.2 ad */ 175 1.11 ad static void 176 1.54 ozaki rw_dump(const volatile void *cookie, lockop_printer_t pr) 177 1.2 ad { 178 1.47 christos const volatile krwlock_t *rw = cookie; 179 1.2 ad 180 1.54 ozaki pr("owner/count : %#018lx flags : %#018x\n", 181 1.2 ad (long)RW_OWNER(rw), (int)RW_FLAGS(rw)); 182 1.2 ad } 183 1.2 ad 184 1.2 ad /* 185 1.11 ad * rw_abort: 186 1.11 ad * 187 1.11 ad * Dump information about an error and panic the system. This 188 1.11 ad * generates a lot of machine code in the DIAGNOSTIC case, so 189 1.11 ad * we ask the compiler to not inline it. 190 1.11 ad */ 191 1.26 ad static void __noinline 192 1.46 christos rw_abort(const char *func, size_t line, krwlock_t *rw, const char *msg) 193 1.11 ad { 194 1.11 ad 195 1.67 ozaki if (__predict_false(panicstr != NULL)) 196 1.11 ad return; 197 1.11 ad 198 1.46 christos LOCKDEBUG_ABORT(func, line, rw, &rwlock_lockops, msg); 199 1.11 ad } 200 1.11 ad 201 1.11 ad /* 202 1.2 ad * rw_init: 203 1.2 ad * 204 1.2 ad * Initialize a rwlock for use. 205 1.2 ad */ 206 1.2 ad void 207 1.50 ozaki _rw_init(krwlock_t *rw, uintptr_t return_address) 208 1.2 ad { 209 1.2 ad 210 1.62 ad #ifdef LOCKDEBUG 211 1.62 ad /* XXX only because the assembly stubs can't handle RW_NODEBUG */ 212 1.61 ad if (LOCKDEBUG_ALLOC(rw, &rwlock_lockops, return_address)) 213 1.61 ad rw->rw_owner = 0; 214 1.61 ad else 215 1.61 ad rw->rw_owner = RW_NODEBUG; 216 1.62 ad #else 217 1.62 ad rw->rw_owner = 0; 218 1.62 ad #endif 219 1.2 ad } 220 1.2 ad 221 1.50 ozaki void 222 1.50 ozaki rw_init(krwlock_t *rw) 223 1.50 ozaki { 224 1.50 ozaki 225 1.50 ozaki _rw_init(rw, (uintptr_t)__builtin_return_address(0)); 226 1.50 ozaki } 227 1.50 ozaki 228 1.2 ad /* 229 1.2 ad * rw_destroy: 230 1.2 ad * 231 1.2 ad * Tear down a rwlock. 232 1.2 ad */ 233 1.2 ad void 234 1.2 ad rw_destroy(krwlock_t *rw) 235 1.2 ad { 236 1.2 ad 237 1.36 skrll RW_ASSERT(rw, (rw->rw_owner & ~RW_NODEBUG) == 0); 238 1.61 ad LOCKDEBUG_FREE((rw->rw_owner & RW_NODEBUG) == 0, rw); 239 1.2 ad } 240 1.2 ad 241 1.2 ad /* 242 1.37 rmind * rw_oncpu: 243 1.20 ad * 244 1.20 ad * Return true if an rwlock owner is running on a CPU in the system. 245 1.20 ad * If the target is waiting on the kernel big lock, then we must 246 1.20 ad * release it. This is necessary to avoid deadlock. 247 1.20 ad */ 248 1.37 rmind static bool 249 1.37 rmind rw_oncpu(uintptr_t owner) 250 1.20 ad { 251 1.20 ad #ifdef MULTIPROCESSOR 252 1.20 ad struct cpu_info *ci; 253 1.20 ad lwp_t *l; 254 1.20 ad 255 1.37 rmind KASSERT(kpreempt_disabled()); 256 1.37 rmind 257 1.37 rmind if ((owner & (RW_WRITE_LOCKED|RW_HAS_WAITERS)) != RW_WRITE_LOCKED) { 258 1.37 rmind return false; 259 1.37 rmind } 260 1.37 rmind 261 1.37 rmind /* 262 1.37 rmind * See lwp_dtor() why dereference of the LWP pointer is safe. 263 1.37 rmind * We must have kernel preemption disabled for that. 264 1.37 rmind */ 265 1.20 ad l = (lwp_t *)(owner & RW_THREAD); 266 1.37 rmind ci = l->l_cpu; 267 1.20 ad 268 1.37 rmind if (ci && ci->ci_curlwp == l) { 269 1.37 rmind /* Target is running; do we need to block? */ 270 1.37 rmind return (ci->ci_biglock_wanted != l); 271 1.37 rmind } 272 1.37 rmind #endif 273 1.37 rmind /* Not running. It may be safe to block now. */ 274 1.37 rmind return false; 275 1.20 ad } 276 1.20 ad 277 1.20 ad /* 278 1.2 ad * rw_vector_enter: 279 1.2 ad * 280 1.2 ad * Acquire a rwlock. 281 1.2 ad */ 282 1.2 ad void 283 1.2 ad rw_vector_enter(krwlock_t *rw, const krw_t op) 284 1.2 ad { 285 1.20 ad uintptr_t owner, incr, need_wait, set_wait, curthread, next; 286 1.2 ad turnstile_t *ts; 287 1.2 ad int queue; 288 1.7 ad lwp_t *l; 289 1.2 ad LOCKSTAT_TIMER(slptime); 290 1.20 ad LOCKSTAT_TIMER(slpcnt); 291 1.19 ad LOCKSTAT_TIMER(spintime); 292 1.19 ad LOCKSTAT_COUNTER(spincnt); 293 1.2 ad LOCKSTAT_FLAG(lsflag); 294 1.2 ad 295 1.2 ad l = curlwp; 296 1.2 ad curthread = (uintptr_t)l; 297 1.2 ad 298 1.13 ad RW_ASSERT(rw, !cpu_intr_p()); 299 1.2 ad RW_ASSERT(rw, curthread != 0); 300 1.40 mlelstv RW_WANTLOCK(rw, op); 301 1.2 ad 302 1.67 ozaki if (__predict_true(panicstr == NULL)) { 303 1.53 ozaki KDASSERT(pserialize_not_in_read_section()); 304 1.2 ad LOCKDEBUG_BARRIER(&kernel_lock, 1); 305 1.2 ad } 306 1.2 ad 307 1.2 ad /* 308 1.2 ad * We play a slight trick here. If we're a reader, we want 309 1.2 ad * increment the read count. If we're a writer, we want to 310 1.43 ozaki * set the owner field and the WRITE_LOCKED bit. 311 1.2 ad * 312 1.2 ad * In the latter case, we expect those bits to be zero, 313 1.2 ad * therefore we can use an add operation to set them, which 314 1.2 ad * means an add operation for both cases. 315 1.2 ad */ 316 1.2 ad if (__predict_true(op == RW_READER)) { 317 1.2 ad incr = RW_READ_INCR; 318 1.2 ad set_wait = RW_HAS_WAITERS; 319 1.2 ad need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; 320 1.2 ad queue = TS_READER_Q; 321 1.2 ad } else { 322 1.61 ad RW_ASSERT(rw, op == RW_WRITER); 323 1.2 ad incr = curthread | RW_WRITE_LOCKED; 324 1.2 ad set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED; 325 1.2 ad need_wait = RW_WRITE_LOCKED | RW_THREAD; 326 1.2 ad queue = TS_WRITER_Q; 327 1.2 ad } 328 1.2 ad 329 1.2 ad LOCKSTAT_ENTER(lsflag); 330 1.2 ad 331 1.37 rmind KPREEMPT_DISABLE(curlwp); 332 1.55 ad for (owner = rw->rw_owner;;) { 333 1.2 ad /* 334 1.2 ad * Read the lock owner field. If the need-to-wait 335 1.2 ad * indicator is clear, then try to acquire the lock. 336 1.2 ad */ 337 1.2 ad if ((owner & need_wait) == 0) { 338 1.20 ad next = rw_cas(rw, owner, (owner + incr) & 339 1.20 ad ~RW_WRITE_WANTED); 340 1.20 ad if (__predict_true(next == owner)) { 341 1.2 ad /* Got it! */ 342 1.70 riastrad membar_acquire(); 343 1.2 ad break; 344 1.2 ad } 345 1.2 ad 346 1.2 ad /* 347 1.2 ad * Didn't get it -- spin around again (we'll 348 1.2 ad * probably sleep on the next iteration). 349 1.2 ad */ 350 1.20 ad owner = next; 351 1.2 ad continue; 352 1.2 ad } 353 1.37 rmind if (__predict_false(RW_OWNER(rw) == curthread)) { 354 1.46 christos rw_abort(__func__, __LINE__, rw, 355 1.46 christos "locking against myself"); 356 1.37 rmind } 357 1.19 ad /* 358 1.19 ad * If the lock owner is running on another CPU, and 359 1.19 ad * there are no existing waiters, then spin. 360 1.19 ad */ 361 1.37 rmind if (rw_oncpu(owner)) { 362 1.19 ad LOCKSTAT_START_TIMER(lsflag, spintime); 363 1.19 ad u_int count = SPINLOCK_BACKOFF_MIN; 364 1.20 ad do { 365 1.38 rmind KPREEMPT_ENABLE(curlwp); 366 1.20 ad SPINLOCK_BACKOFF(count); 367 1.38 rmind KPREEMPT_DISABLE(curlwp); 368 1.19 ad owner = rw->rw_owner; 369 1.37 rmind } while (rw_oncpu(owner)); 370 1.19 ad LOCKSTAT_STOP_TIMER(lsflag, spintime); 371 1.19 ad LOCKSTAT_COUNT(spincnt, 1); 372 1.19 ad if ((owner & need_wait) == 0) 373 1.19 ad continue; 374 1.19 ad } 375 1.19 ad 376 1.2 ad /* 377 1.2 ad * Grab the turnstile chain lock. Once we have that, we 378 1.2 ad * can adjust the waiter bits and sleep queue. 379 1.2 ad */ 380 1.2 ad ts = turnstile_lookup(rw); 381 1.2 ad 382 1.2 ad /* 383 1.2 ad * Mark the rwlock as having waiters. If the set fails, 384 1.2 ad * then we may not need to sleep and should spin again. 385 1.20 ad * Reload rw_owner because turnstile_lookup() may have 386 1.20 ad * spun on the turnstile chain lock. 387 1.2 ad */ 388 1.20 ad owner = rw->rw_owner; 389 1.37 rmind if ((owner & need_wait) == 0 || rw_oncpu(owner)) { 390 1.20 ad turnstile_exit(rw); 391 1.20 ad continue; 392 1.20 ad } 393 1.20 ad next = rw_cas(rw, owner, owner | set_wait); 394 1.66 riastrad /* XXX membar? */ 395 1.20 ad if (__predict_false(next != owner)) { 396 1.2 ad turnstile_exit(rw); 397 1.20 ad owner = next; 398 1.2 ad continue; 399 1.2 ad } 400 1.2 ad 401 1.2 ad LOCKSTAT_START_TIMER(lsflag, slptime); 402 1.4 yamt turnstile_block(ts, queue, rw, &rw_syncobj); 403 1.2 ad LOCKSTAT_STOP_TIMER(lsflag, slptime); 404 1.20 ad LOCKSTAT_COUNT(slpcnt, 1); 405 1.2 ad 406 1.20 ad /* 407 1.20 ad * No need for a memory barrier because of context switch. 408 1.20 ad * If not handed the lock, then spin again. 409 1.20 ad */ 410 1.58 ad if (op == RW_READER || (rw->rw_owner & RW_THREAD) == curthread) 411 1.20 ad break; 412 1.58 ad 413 1.39 yamt owner = rw->rw_owner; 414 1.2 ad } 415 1.37 rmind KPREEMPT_ENABLE(curlwp); 416 1.2 ad 417 1.60 ad LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | 418 1.60 ad (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2), slpcnt, slptime, 419 1.60 ad (l->l_rwcallsite != 0 ? l->l_rwcallsite : 420 1.60 ad (uintptr_t)__builtin_return_address(0))); 421 1.60 ad LOCKSTAT_EVENT_RA(lsflag, rw, LB_RWLOCK | LB_SPIN, spincnt, spintime, 422 1.60 ad (l->l_rwcallsite != 0 ? l->l_rwcallsite : 423 1.60 ad (uintptr_t)__builtin_return_address(0))); 424 1.2 ad LOCKSTAT_EXIT(lsflag); 425 1.2 ad 426 1.61 ad RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || 427 1.2 ad (op == RW_READER && RW_COUNT(rw) != 0)); 428 1.2 ad RW_LOCKED(rw, op); 429 1.2 ad } 430 1.2 ad 431 1.2 ad /* 432 1.2 ad * rw_vector_exit: 433 1.2 ad * 434 1.2 ad * Release a rwlock. 435 1.2 ad */ 436 1.2 ad void 437 1.2 ad rw_vector_exit(krwlock_t *rw) 438 1.2 ad { 439 1.44 matt uintptr_t curthread, owner, decr, newown, next; 440 1.2 ad turnstile_t *ts; 441 1.2 ad int rcnt, wcnt; 442 1.7 ad lwp_t *l; 443 1.2 ad 444 1.61 ad l = curlwp; 445 1.61 ad curthread = (uintptr_t)l; 446 1.2 ad RW_ASSERT(rw, curthread != 0); 447 1.2 ad 448 1.2 ad /* 449 1.2 ad * Again, we use a trick. Since we used an add operation to 450 1.2 ad * set the required lock bits, we can use a subtract to clear 451 1.2 ad * them, which makes the read-release and write-release path 452 1.2 ad * the same. 453 1.2 ad */ 454 1.2 ad owner = rw->rw_owner; 455 1.2 ad if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) { 456 1.2 ad RW_UNLOCKED(rw, RW_WRITER); 457 1.2 ad RW_ASSERT(rw, RW_OWNER(rw) == curthread); 458 1.2 ad decr = curthread | RW_WRITE_LOCKED; 459 1.2 ad } else { 460 1.2 ad RW_UNLOCKED(rw, RW_READER); 461 1.2 ad RW_ASSERT(rw, RW_COUNT(rw) != 0); 462 1.2 ad decr = RW_READ_INCR; 463 1.2 ad } 464 1.2 ad 465 1.2 ad /* 466 1.2 ad * Compute what we expect the new value of the lock to be. Only 467 1.2 ad * proceed to do direct handoff if there are waiters, and if the 468 1.2 ad * lock would become unowned. 469 1.2 ad */ 470 1.70 riastrad membar_release(); 471 1.58 ad for (;;) { 472 1.44 matt newown = (owner - decr); 473 1.44 matt if ((newown & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS) 474 1.2 ad break; 475 1.44 matt next = rw_cas(rw, owner, newown); 476 1.20 ad if (__predict_true(next == owner)) 477 1.2 ad return; 478 1.58 ad owner = next; 479 1.2 ad } 480 1.2 ad 481 1.20 ad /* 482 1.20 ad * Grab the turnstile chain lock. This gets the interlock 483 1.20 ad * on the sleep queue. Once we have that, we can adjust the 484 1.20 ad * waiter bits. 485 1.20 ad */ 486 1.20 ad ts = turnstile_lookup(rw); 487 1.20 ad owner = rw->rw_owner; 488 1.61 ad RW_ASSERT(rw, ts != NULL); 489 1.61 ad RW_ASSERT(rw, (owner & RW_HAS_WAITERS) != 0); 490 1.2 ad 491 1.20 ad wcnt = TS_WAITERS(ts, TS_WRITER_Q); 492 1.20 ad rcnt = TS_WAITERS(ts, TS_READER_Q); 493 1.2 ad 494 1.20 ad /* 495 1.20 ad * Give the lock away. 496 1.20 ad * 497 1.20 ad * If we are releasing a write lock, then prefer to wake all 498 1.20 ad * outstanding readers. Otherwise, wake one writer if there 499 1.20 ad * are outstanding readers, or all writers if there are no 500 1.20 ad * pending readers. If waking one specific writer, the writer 501 1.20 ad * is handed the lock here. If waking multiple writers, we 502 1.20 ad * set WRITE_WANTED to block out new readers, and let them 503 1.41 skrll * do the work of acquiring the lock in rw_vector_enter(). 504 1.20 ad */ 505 1.32 yamt if (rcnt == 0 || decr == RW_READ_INCR) { 506 1.61 ad RW_ASSERT(rw, wcnt != 0); 507 1.61 ad RW_ASSERT(rw, (owner & RW_WRITE_WANTED) != 0); 508 1.2 ad 509 1.20 ad if (rcnt != 0) { 510 1.20 ad /* Give the lock to the longest waiting writer. */ 511 1.2 ad l = TS_FIRST(ts, TS_WRITER_Q); 512 1.61 ad newown = (uintptr_t)l | (owner & RW_NODEBUG); 513 1.61 ad newown |= RW_WRITE_LOCKED | RW_HAS_WAITERS; 514 1.28 thorpej if (wcnt > 1) 515 1.44 matt newown |= RW_WRITE_WANTED; 516 1.44 matt rw_swap(rw, owner, newown); 517 1.7 ad turnstile_wakeup(ts, TS_WRITER_Q, 1, l); 518 1.2 ad } else { 519 1.20 ad /* Wake all writers and let them fight it out. */ 520 1.61 ad newown = owner & RW_NODEBUG; 521 1.61 ad newown |= RW_WRITE_WANTED; 522 1.61 ad rw_swap(rw, owner, newown); 523 1.20 ad turnstile_wakeup(ts, TS_WRITER_Q, wcnt, NULL); 524 1.20 ad } 525 1.20 ad } else { 526 1.61 ad RW_ASSERT(rw, rcnt != 0); 527 1.2 ad 528 1.20 ad /* 529 1.20 ad * Give the lock to all blocked readers. If there 530 1.20 ad * is a writer waiting, new readers that arrive 531 1.20 ad * after the release will be blocked out. 532 1.20 ad */ 533 1.61 ad newown = owner & RW_NODEBUG; 534 1.61 ad newown += rcnt << RW_READ_COUNT_SHIFT; 535 1.20 ad if (wcnt != 0) 536 1.44 matt newown |= RW_HAS_WAITERS | RW_WRITE_WANTED; 537 1.12 yamt 538 1.20 ad /* Wake up all sleeping readers. */ 539 1.44 matt rw_swap(rw, owner, newown); 540 1.20 ad turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL); 541 1.2 ad } 542 1.2 ad } 543 1.2 ad 544 1.2 ad /* 545 1.16 ad * rw_vector_tryenter: 546 1.2 ad * 547 1.2 ad * Try to acquire a rwlock. 548 1.2 ad */ 549 1.2 ad int 550 1.16 ad rw_vector_tryenter(krwlock_t *rw, const krw_t op) 551 1.2 ad { 552 1.20 ad uintptr_t curthread, owner, incr, need_wait, next; 553 1.61 ad lwp_t *l; 554 1.2 ad 555 1.61 ad l = curlwp; 556 1.61 ad curthread = (uintptr_t)l; 557 1.2 ad 558 1.2 ad RW_ASSERT(rw, curthread != 0); 559 1.2 ad 560 1.2 ad if (op == RW_READER) { 561 1.2 ad incr = RW_READ_INCR; 562 1.2 ad need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED; 563 1.2 ad } else { 564 1.61 ad RW_ASSERT(rw, op == RW_WRITER); 565 1.2 ad incr = curthread | RW_WRITE_LOCKED; 566 1.2 ad need_wait = RW_WRITE_LOCKED | RW_THREAD; 567 1.2 ad } 568 1.2 ad 569 1.58 ad for (owner = rw->rw_owner;; owner = next) { 570 1.58 ad if (__predict_false((owner & need_wait) != 0)) 571 1.58 ad return 0; 572 1.20 ad next = rw_cas(rw, owner, owner + incr); 573 1.20 ad if (__predict_true(next == owner)) { 574 1.20 ad /* Got it! */ 575 1.20 ad break; 576 1.2 ad } 577 1.2 ad } 578 1.2 ad 579 1.40 mlelstv RW_WANTLOCK(rw, op); 580 1.2 ad RW_LOCKED(rw, op); 581 1.61 ad RW_ASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) || 582 1.2 ad (op == RW_READER && RW_COUNT(rw) != 0)); 583 1.7 ad 584 1.70 riastrad membar_acquire(); 585 1.2 ad return 1; 586 1.2 ad } 587 1.2 ad 588 1.2 ad /* 589 1.2 ad * rw_downgrade: 590 1.2 ad * 591 1.61 ad * Downgrade a write lock to a read lock. 592 1.2 ad */ 593 1.2 ad void 594 1.2 ad rw_downgrade(krwlock_t *rw) 595 1.2 ad { 596 1.72 ad uintptr_t owner, newown, next, curthread __diagused; 597 1.2 ad turnstile_t *ts; 598 1.2 ad int rcnt, wcnt; 599 1.61 ad lwp_t *l; 600 1.2 ad 601 1.61 ad l = curlwp; 602 1.61 ad curthread = (uintptr_t)l; 603 1.2 ad RW_ASSERT(rw, curthread != 0); 604 1.61 ad RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0); 605 1.2 ad RW_ASSERT(rw, RW_OWNER(rw) == curthread); 606 1.2 ad RW_UNLOCKED(rw, RW_WRITER); 607 1.42 mrg 608 1.70 riastrad membar_release(); 609 1.61 ad for (owner = rw->rw_owner;; owner = next) { 610 1.61 ad /* 611 1.61 ad * If there are no waiters we can do this the easy way. Try 612 1.61 ad * swapping us down to one read hold. If it fails, the lock 613 1.61 ad * condition has changed and we most likely now have 614 1.61 ad * waiters. 615 1.61 ad */ 616 1.61 ad if ((owner & RW_HAS_WAITERS) == 0) { 617 1.61 ad newown = (owner & RW_NODEBUG); 618 1.61 ad next = rw_cas(rw, owner, newown + RW_READ_INCR); 619 1.61 ad if (__predict_true(next == owner)) { 620 1.61 ad RW_LOCKED(rw, RW_READER); 621 1.61 ad RW_ASSERT(rw, 622 1.61 ad (rw->rw_owner & RW_WRITE_LOCKED) == 0); 623 1.61 ad RW_ASSERT(rw, RW_COUNT(rw) != 0); 624 1.61 ad return; 625 1.61 ad } 626 1.61 ad continue; 627 1.61 ad } 628 1.61 ad 629 1.61 ad /* 630 1.61 ad * Grab the turnstile chain lock. This gets the interlock 631 1.61 ad * on the sleep queue. Once we have that, we can adjust the 632 1.61 ad * waiter bits. 633 1.61 ad */ 634 1.2 ad ts = turnstile_lookup(rw); 635 1.61 ad RW_ASSERT(rw, ts != NULL); 636 1.2 ad 637 1.2 ad rcnt = TS_WAITERS(ts, TS_READER_Q); 638 1.2 ad wcnt = TS_WAITERS(ts, TS_WRITER_Q); 639 1.2 ad 640 1.2 ad if (rcnt == 0) { 641 1.61 ad /* 642 1.61 ad * If there are no readers, just preserve the 643 1.61 ad * waiters bits, swap us down to one read hold and 644 1.61 ad * return. 645 1.61 ad */ 646 1.61 ad RW_ASSERT(rw, wcnt != 0); 647 1.61 ad RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0); 648 1.61 ad RW_ASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0); 649 1.61 ad 650 1.61 ad newown = owner & RW_NODEBUG; 651 1.62 ad newown |= RW_READ_INCR | RW_HAS_WAITERS | 652 1.61 ad RW_WRITE_WANTED; 653 1.44 matt next = rw_cas(rw, owner, newown); 654 1.27 rmind turnstile_exit(rw); 655 1.20 ad if (__predict_true(next == owner)) 656 1.20 ad break; 657 1.20 ad } else { 658 1.20 ad /* 659 1.20 ad * Give the lock to all blocked readers. We may 660 1.61 ad * retain one read hold if downgrading. If there is 661 1.61 ad * a writer waiting, new readers will be blocked 662 1.20 ad * out. 663 1.20 ad */ 664 1.61 ad newown = owner & RW_NODEBUG; 665 1.61 ad newown += (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR; 666 1.20 ad if (wcnt != 0) 667 1.44 matt newown |= RW_HAS_WAITERS | RW_WRITE_WANTED; 668 1.20 ad 669 1.44 matt next = rw_cas(rw, owner, newown); 670 1.20 ad if (__predict_true(next == owner)) { 671 1.20 ad /* Wake up all sleeping readers. */ 672 1.20 ad turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL); 673 1.20 ad break; 674 1.2 ad } 675 1.27 rmind turnstile_exit(rw); 676 1.2 ad } 677 1.2 ad } 678 1.2 ad 679 1.40 mlelstv RW_WANTLOCK(rw, RW_READER); 680 1.2 ad RW_LOCKED(rw, RW_READER); 681 1.61 ad RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0); 682 1.61 ad RW_ASSERT(rw, RW_COUNT(rw) != 0); 683 1.2 ad } 684 1.2 ad 685 1.2 ad /* 686 1.2 ad * rw_tryupgrade: 687 1.2 ad * 688 1.55 ad * Try to upgrade a read lock to a write lock. We must be the only 689 1.61 ad * reader. 690 1.2 ad */ 691 1.2 ad int 692 1.2 ad rw_tryupgrade(krwlock_t *rw) 693 1.2 ad { 694 1.44 matt uintptr_t owner, curthread, newown, next; 695 1.61 ad struct lwp *l; 696 1.2 ad 697 1.61 ad l = curlwp; 698 1.61 ad curthread = (uintptr_t)l; 699 1.2 ad RW_ASSERT(rw, curthread != 0); 700 1.31 yamt RW_ASSERT(rw, rw_read_held(rw)); 701 1.2 ad 702 1.55 ad for (owner = RW_READ_INCR;; owner = next) { 703 1.44 matt newown = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD); 704 1.44 matt next = rw_cas(rw, owner, newown); 705 1.30 ad if (__predict_true(next == owner)) { 706 1.70 riastrad membar_acquire(); 707 1.2 ad break; 708 1.30 ad } 709 1.55 ad RW_ASSERT(rw, (next & RW_WRITE_LOCKED) == 0); 710 1.55 ad if (__predict_false((next & RW_THREAD) != RW_READ_INCR)) { 711 1.55 ad RW_ASSERT(rw, (next & RW_THREAD) != 0); 712 1.55 ad return 0; 713 1.55 ad } 714 1.2 ad } 715 1.2 ad 716 1.2 ad RW_UNLOCKED(rw, RW_READER); 717 1.40 mlelstv RW_WANTLOCK(rw, RW_WRITER); 718 1.2 ad RW_LOCKED(rw, RW_WRITER); 719 1.61 ad RW_ASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED); 720 1.61 ad RW_ASSERT(rw, RW_OWNER(rw) == curthread); 721 1.2 ad 722 1.2 ad return 1; 723 1.2 ad } 724 1.2 ad 725 1.2 ad /* 726 1.2 ad * rw_read_held: 727 1.2 ad * 728 1.2 ad * Returns true if the rwlock is held for reading. Must only be 729 1.2 ad * used for diagnostic assertions, and never be used to make 730 1.2 ad * decisions about how to use a rwlock. 731 1.2 ad */ 732 1.2 ad int 733 1.2 ad rw_read_held(krwlock_t *rw) 734 1.2 ad { 735 1.2 ad uintptr_t owner; 736 1.2 ad 737 1.21 ad if (rw == NULL) 738 1.21 ad return 0; 739 1.2 ad owner = rw->rw_owner; 740 1.2 ad return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0; 741 1.2 ad } 742 1.2 ad 743 1.2 ad /* 744 1.2 ad * rw_write_held: 745 1.2 ad * 746 1.2 ad * Returns true if the rwlock is held for writing. Must only be 747 1.2 ad * used for diagnostic assertions, and never be used to make 748 1.2 ad * decisions about how to use a rwlock. 749 1.2 ad */ 750 1.2 ad int 751 1.2 ad rw_write_held(krwlock_t *rw) 752 1.2 ad { 753 1.2 ad 754 1.21 ad if (rw == NULL) 755 1.21 ad return 0; 756 1.17 ad return (rw->rw_owner & (RW_WRITE_LOCKED | RW_THREAD)) == 757 1.18 ad (RW_WRITE_LOCKED | (uintptr_t)curlwp); 758 1.2 ad } 759 1.2 ad 760 1.2 ad /* 761 1.2 ad * rw_lock_held: 762 1.2 ad * 763 1.2 ad * Returns true if the rwlock is held for reading or writing. Must 764 1.2 ad * only be used for diagnostic assertions, and never be used to make 765 1.2 ad * decisions about how to use a rwlock. 766 1.2 ad */ 767 1.2 ad int 768 1.2 ad rw_lock_held(krwlock_t *rw) 769 1.2 ad { 770 1.2 ad 771 1.21 ad if (rw == NULL) 772 1.21 ad return 0; 773 1.2 ad return (rw->rw_owner & RW_THREAD) != 0; 774 1.2 ad } 775 1.4 yamt 776 1.5 ad /* 777 1.65 ad * rw_lock_op: 778 1.65 ad * 779 1.65 ad * For a rwlock that is known to be held by the caller, return 780 1.65 ad * RW_READER or RW_WRITER to describe the hold type. 781 1.65 ad */ 782 1.65 ad krw_t 783 1.65 ad rw_lock_op(krwlock_t *rw) 784 1.65 ad { 785 1.65 ad 786 1.65 ad RW_ASSERT(rw, rw_lock_held(rw)); 787 1.65 ad 788 1.65 ad return (rw->rw_owner & RW_WRITE_LOCKED) != 0 ? RW_WRITER : RW_READER; 789 1.65 ad } 790 1.65 ad 791 1.65 ad /* 792 1.5 ad * rw_owner: 793 1.5 ad * 794 1.5 ad * Return the current owner of an RW lock, but only if it is write 795 1.5 ad * held. Used for priority inheritance. 796 1.5 ad */ 797 1.7 ad static lwp_t * 798 1.4 yamt rw_owner(wchan_t obj) 799 1.4 yamt { 800 1.4 yamt krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */ 801 1.4 yamt uintptr_t owner = rw->rw_owner; 802 1.4 yamt 803 1.4 yamt if ((owner & RW_WRITE_LOCKED) == 0) 804 1.4 yamt return NULL; 805 1.4 yamt 806 1.4 yamt return (void *)(owner & RW_THREAD); 807 1.4 yamt } 808