kern_rwlock.c revision 1.7.2.1 1 /* $NetBSD: kern_rwlock.c,v 1.7.2.1 2007/04/17 06:47:31 thorpej Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Kernel reader/writer lock implementation, modeled after those
41 * found in Solaris, a description of which can be found in:
42 *
43 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
44 * Richard McDougall.
45 */
46
47 #include "opt_multiprocessor.h"
48
49 #include <sys/cdefs.h>
50 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.7.2.1 2007/04/17 06:47:31 thorpej Exp $");
51
52 #define __RWLOCK_PRIVATE
53
54 #include <sys/param.h>
55 #include <sys/proc.h>
56 #include <sys/rwlock.h>
57 #include <sys/sched.h>
58 #include <sys/sleepq.h>
59 #include <sys/systm.h>
60 #include <sys/lockdebug.h>
61 #include <sys/atomic.h>
62
63 #include <dev/lockstat.h>
64
65 #define RW_ABORT(rw, msg) \
66 LOCKDEBUG_ABORT(RW_GETID(rw), rw, &rwlock_lockops, __FUNCTION__, msg)
67
68 /*
69 * LOCKDEBUG
70 */
71
72 #if defined(LOCKDEBUG)
73
74 #define RW_WANTLOCK(rw, op) \
75 LOCKDEBUG_WANTLOCK(RW_GETID(rw), \
76 (uintptr_t)__builtin_return_address(0), op == RW_READER);
77 #define RW_LOCKED(rw, op) \
78 LOCKDEBUG_LOCKED(RW_GETID(rw), \
79 (uintptr_t)__builtin_return_address(0), op == RW_READER);
80 #define RW_UNLOCKED(rw, op) \
81 LOCKDEBUG_UNLOCKED(RW_GETID(rw), \
82 (uintptr_t)__builtin_return_address(0), op == RW_READER);
83 #define RW_DASSERT(rw, cond) \
84 do { \
85 if (!(cond)) \
86 RW_ABORT(rw, "assertion failed: " #cond); \
87 } while (/* CONSTCOND */ 0);
88
89 #else /* LOCKDEBUG */
90
91 #define RW_WANTLOCK(rw, op) /* nothing */
92 #define RW_LOCKED(rw, op) /* nothing */
93 #define RW_UNLOCKED(rw, op) /* nothing */
94 #define RW_DASSERT(rw, cond) /* nothing */
95
96 #endif /* LOCKDEBUG */
97
98 /*
99 * DIAGNOSTIC
100 */
101
102 #if defined(DIAGNOSTIC)
103
104 #define RW_ASSERT(rw, cond) \
105 do { \
106 if (!(cond)) \
107 RW_ABORT(rw, "assertion failed: " #cond); \
108 } while (/* CONSTCOND */ 0)
109
110 #else
111
112 #define RW_ASSERT(rw, cond) /* nothing */
113
114 #endif /* DIAGNOSTIC */
115
116 /*
117 * For platforms that use 'simple' RW locks.
118 */
119 #ifdef __HAVE_SIMPLE_RW_LOCKS
120 #ifndef RW_CAS
121 #define RW_CAS(p, o, n) \
122 (atomic_cas_ptr((volatile void *)(p), (void *)(o), \
123 (void *)(n)) == (void *)(o))
124 #endif
125 #define RW_ACQUIRE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
126 #define RW_RELEASE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
127 #define RW_SETID(rw, id) ((rw)->rw_id = id)
128 #define RW_GETID(rw) ((rw)->rw_id)
129
130 static inline int
131 RW_SET_WAITERS(krwlock_t *rw, uintptr_t need, uintptr_t set)
132 {
133 uintptr_t old;
134
135 if (((old = rw->rw_owner) & need) == 0)
136 return 0;
137 return RW_CAS(&rw->rw_owner, old, old | set);
138 }
139 #endif /* __HAVE_SIMPLE_RW_LOCKS */
140
141 /*
142 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
143 */
144 #ifdef LOCKDEBUG
145 #undef __HAVE_RW_STUBS
146 #endif
147
148 #ifndef __HAVE_RW_STUBS
149 __strong_alias(rw_enter,rw_vector_enter);
150 __strong_alias(rw_exit,rw_vector_exit);
151 #endif
152
153 static void rw_dump(volatile void *);
154 static lwp_t *rw_owner(wchan_t);
155
156 lockops_t rwlock_lockops = {
157 "Reader / writer lock",
158 1,
159 rw_dump
160 };
161
162 syncobj_t rw_syncobj = {
163 SOBJ_SLEEPQ_SORTED,
164 turnstile_unsleep,
165 turnstile_changepri,
166 sleepq_lendpri,
167 rw_owner,
168 };
169
170 /*
171 * rw_dump:
172 *
173 * Dump the contents of a rwlock structure.
174 */
175 void
176 rw_dump(volatile void *cookie)
177 {
178 volatile krwlock_t *rw = cookie;
179
180 printf_nolog("owner/count : %#018lx flags : %#018x\n",
181 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
182 }
183
184 /*
185 * rw_init:
186 *
187 * Initialize a rwlock for use.
188 */
189 void
190 rw_init(krwlock_t *rw)
191 {
192 u_int id;
193
194 memset(rw, 0, sizeof(*rw));
195
196 id = LOCKDEBUG_ALLOC(rw, &rwlock_lockops);
197 RW_SETID(rw, id);
198 }
199
200 /*
201 * rw_destroy:
202 *
203 * Tear down a rwlock.
204 */
205 void
206 rw_destroy(krwlock_t *rw)
207 {
208
209 LOCKDEBUG_FREE(rw, RW_GETID(rw));
210 RW_ASSERT(rw, rw->rw_owner == 0);
211 }
212
213 /*
214 * rw_vector_enter:
215 *
216 * Acquire a rwlock.
217 */
218 void
219 rw_vector_enter(krwlock_t *rw, const krw_t op)
220 {
221 uintptr_t owner, incr, need_wait, set_wait, curthread;
222 turnstile_t *ts;
223 int queue;
224 lwp_t *l;
225 LOCKSTAT_TIMER(slptime);
226 LOCKSTAT_FLAG(lsflag);
227
228 l = curlwp;
229 curthread = (uintptr_t)l;
230
231 RW_ASSERT(rw, curthread != 0);
232 RW_WANTLOCK(rw, op);
233
234 #ifdef LOCKDEBUG
235 if (panicstr == NULL) {
236 simple_lock_only_held(NULL, "rw_enter");
237 LOCKDEBUG_BARRIER(&kernel_lock, 1);
238 }
239 #endif
240
241 /*
242 * We play a slight trick here. If we're a reader, we want
243 * increment the read count. If we're a writer, we want to
244 * set the owner field and whe WRITE_LOCKED bit.
245 *
246 * In the latter case, we expect those bits to be zero,
247 * therefore we can use an add operation to set them, which
248 * means an add operation for both cases.
249 */
250 if (__predict_true(op == RW_READER)) {
251 incr = RW_READ_INCR;
252 set_wait = RW_HAS_WAITERS;
253 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
254 queue = TS_READER_Q;
255 } else {
256 RW_DASSERT(rw, op == RW_WRITER);
257 incr = curthread | RW_WRITE_LOCKED;
258 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
259 need_wait = RW_WRITE_LOCKED | RW_THREAD;
260 queue = TS_WRITER_Q;
261 }
262
263 LOCKSTAT_ENTER(lsflag);
264
265 for (;;) {
266 /*
267 * Read the lock owner field. If the need-to-wait
268 * indicator is clear, then try to acquire the lock.
269 */
270 owner = rw->rw_owner;
271 if ((owner & need_wait) == 0) {
272 if (RW_ACQUIRE(rw, owner, owner + incr)) {
273 /* Got it! */
274 break;
275 }
276
277 /*
278 * Didn't get it -- spin around again (we'll
279 * probably sleep on the next iteration).
280 */
281 continue;
282 }
283
284 if (panicstr != NULL)
285 return;
286 if (RW_OWNER(rw) == curthread)
287 RW_ABORT(rw, "locking against myself");
288
289 /*
290 * Grab the turnstile chain lock. Once we have that, we
291 * can adjust the waiter bits and sleep queue.
292 */
293 ts = turnstile_lookup(rw);
294
295 /*
296 * XXXSMP if this is a high priority LWP (interrupt handler
297 * or realtime) and acquiring a read hold, then we shouldn't
298 * wait for RW_WRITE_WANTED if our priority is >= that of
299 * the highest priority writer that is waiting.
300 */
301
302 /*
303 * Mark the rwlock as having waiters. If the set fails,
304 * then we may not need to sleep and should spin again.
305 */
306 if (!RW_SET_WAITERS(rw, need_wait, set_wait)) {
307 turnstile_exit(rw);
308 continue;
309 }
310
311 LOCKSTAT_START_TIMER(lsflag, slptime);
312
313 turnstile_block(ts, queue, rw, &rw_syncobj);
314
315 /* If we wake up and arrive here, we've been handed the lock. */
316 RW_RECEIVE(rw);
317
318 LOCKSTAT_STOP_TIMER(lsflag, slptime);
319 LOCKSTAT_EVENT(lsflag, rw,
320 LB_RWLOCK | (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2),
321 1, slptime);
322
323 turnstile_unblock();
324 break;
325 }
326
327 LOCKSTAT_EXIT(lsflag);
328
329 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
330 (op == RW_READER && RW_COUNT(rw) != 0));
331 RW_LOCKED(rw, op);
332 }
333
334 /*
335 * rw_vector_exit:
336 *
337 * Release a rwlock.
338 */
339 void
340 rw_vector_exit(krwlock_t *rw)
341 {
342 uintptr_t curthread, owner, decr, new;
343 turnstile_t *ts;
344 int rcnt, wcnt;
345 lwp_t *l;
346
347 curthread = (uintptr_t)curlwp;
348 RW_ASSERT(rw, curthread != 0);
349
350 if (panicstr != NULL) {
351 /*
352 * XXX What's the correct thing to do here? We should at
353 * least release the lock.
354 */
355 return;
356 }
357
358 /*
359 * Again, we use a trick. Since we used an add operation to
360 * set the required lock bits, we can use a subtract to clear
361 * them, which makes the read-release and write-release path
362 * the same.
363 */
364 owner = rw->rw_owner;
365 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
366 RW_UNLOCKED(rw, RW_WRITER);
367 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
368 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
369 decr = curthread | RW_WRITE_LOCKED;
370 } else {
371 RW_UNLOCKED(rw, RW_READER);
372 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
373 RW_ASSERT(rw, RW_COUNT(rw) != 0);
374 decr = RW_READ_INCR;
375 }
376
377 /*
378 * Compute what we expect the new value of the lock to be. Only
379 * proceed to do direct handoff if there are waiters, and if the
380 * lock would become unowned.
381 */
382 for (;; owner = rw->rw_owner) {
383 new = (owner - decr);
384 if ((new & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
385 break;
386 if (RW_RELEASE(rw, owner, new))
387 return;
388 }
389
390 for (;;) {
391 /*
392 * Grab the turnstile chain lock. This gets the interlock
393 * on the sleep queue. Once we have that, we can adjust the
394 * waiter bits.
395 */
396 ts = turnstile_lookup(rw);
397 RW_DASSERT(rw, ts != NULL);
398 RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
399
400 owner = rw->rw_owner;
401 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
402 rcnt = TS_WAITERS(ts, TS_READER_Q);
403
404 /*
405 * Give the lock away.
406 *
407 * If we are releasing a write lock, then wake all
408 * outstanding readers. If we are releasing a read
409 * lock, then wake one writer.
410 */
411 if (rcnt == 0 || (decr == RW_READ_INCR && wcnt != 0)) {
412 RW_DASSERT(rw, wcnt != 0);
413 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
414
415 /*
416 * Give the lock to the longest waiting
417 * writer.
418 */
419 l = TS_FIRST(ts, TS_WRITER_Q);
420 new = (uintptr_t)l | RW_WRITE_LOCKED;
421
422 if (wcnt > 1)
423 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
424 else if (rcnt != 0)
425 new |= RW_HAS_WAITERS;
426
427 RW_GIVE(rw);
428 if (!RW_RELEASE(rw, owner, new)) {
429 /* Oops, try again. */
430 turnstile_exit(rw);
431 continue;
432 }
433
434 /* Wake the writer. */
435 turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
436 } else {
437 RW_DASSERT(rw, rcnt != 0);
438
439 /*
440 * Give the lock to all blocked readers. If there
441 * is a writer waiting, new readers that arrive
442 * after the release will be blocked out.
443 */
444 new = rcnt << RW_READ_COUNT_SHIFT;
445 if (wcnt != 0)
446 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
447
448 RW_GIVE(rw);
449 if (!RW_RELEASE(rw, owner, new)) {
450 /* Oops, try again. */
451 turnstile_exit(rw);
452 continue;
453 }
454
455 /* Wake up all sleeping readers. */
456 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
457 }
458
459 break;
460 }
461 }
462
463 /*
464 * rw_tryenter:
465 *
466 * Try to acquire a rwlock.
467 */
468 int
469 rw_tryenter(krwlock_t *rw, const krw_t op)
470 {
471 uintptr_t curthread, owner, incr, need_wait;
472
473 curthread = (uintptr_t)curlwp;
474
475 RW_ASSERT(rw, curthread != 0);
476 RW_WANTLOCK(rw, op);
477
478 if (op == RW_READER) {
479 incr = RW_READ_INCR;
480 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
481 } else {
482 RW_DASSERT(rw, op == RW_WRITER);
483 incr = curthread | RW_WRITE_LOCKED;
484 need_wait = RW_WRITE_LOCKED | RW_THREAD;
485 }
486
487 for (;;) {
488 owner = rw->rw_owner;
489 if ((owner & need_wait) == 0) {
490 if (RW_ACQUIRE(rw, owner, owner + incr)) {
491 /* Got it! */
492 break;
493 }
494 continue;
495 }
496 return 0;
497 }
498
499 RW_LOCKED(rw, op);
500 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
501 (op == RW_READER && RW_COUNT(rw) != 0));
502
503 return 1;
504 }
505
506 /*
507 * rw_downgrade:
508 *
509 * Downgrade a write lock to a read lock.
510 */
511 void
512 rw_downgrade(krwlock_t *rw)
513 {
514 uintptr_t owner, curthread, new;
515 turnstile_t *ts;
516 int rcnt, wcnt;
517
518 curthread = (uintptr_t)curlwp;
519 RW_ASSERT(rw, curthread != 0);
520 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
521 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
522 RW_UNLOCKED(rw, RW_WRITER);
523
524 owner = rw->rw_owner;
525 if ((owner & RW_HAS_WAITERS) == 0) {
526 /*
527 * There are no waiters, so we can do this the easy way.
528 * Try swapping us down to one read hold. If it fails, the
529 * lock condition has changed and we most likely now have
530 * waiters.
531 */
532 if (RW_RELEASE(rw, owner, RW_READ_INCR)) {
533 RW_LOCKED(rw, RW_READER);
534 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
535 RW_DASSERT(rw, RW_COUNT(rw) != 0);
536 return;
537 }
538 }
539
540 /*
541 * Grab the turnstile chain lock. This gets the interlock
542 * on the sleep queue. Once we have that, we can adjust the
543 * waiter bits.
544 */
545 for (;;) {
546 ts = turnstile_lookup(rw);
547 RW_DASSERT(rw, ts != NULL);
548
549 owner = rw->rw_owner;
550 rcnt = TS_WAITERS(ts, TS_READER_Q);
551 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
552
553 /*
554 * If there are no readers, just preserve the waiters
555 * bits, swap us down to one read hold and return.
556 */
557 if (rcnt == 0) {
558 RW_DASSERT(rw, wcnt != 0);
559 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
560 RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
561
562 new = RW_READ_INCR | RW_HAS_WAITERS | RW_WRITE_WANTED;
563 if (!RW_RELEASE(rw, owner, new)) {
564 /* Oops, try again. */
565 turnstile_exit(ts);
566 continue;
567 }
568 break;
569 }
570
571 /*
572 * Give the lock to all blocked readers. We may
573 * retain one read hold if downgrading. If there
574 * is a writer waiting, new readers will be blocked
575 * out.
576 */
577 new = (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
578 if (wcnt != 0)
579 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
580
581 RW_GIVE(rw);
582 if (!RW_RELEASE(rw, owner, new)) {
583 /* Oops, try again. */
584 turnstile_exit(rw);
585 continue;
586 }
587
588 /* Wake up all sleeping readers. */
589 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
590 break;
591 }
592
593 RW_LOCKED(rw, RW_READER);
594 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
595 RW_DASSERT(rw, RW_COUNT(rw) != 0);
596 }
597
598 /*
599 * rw_tryupgrade:
600 *
601 * Try to upgrade a read lock to a write lock. We must be the
602 * only reader.
603 */
604 int
605 rw_tryupgrade(krwlock_t *rw)
606 {
607 uintptr_t owner, curthread, new;
608
609 curthread = (uintptr_t)curlwp;
610 RW_ASSERT(rw, curthread != 0);
611 RW_WANTLOCK(rw, RW_WRITER);
612
613 for (;;) {
614 owner = rw->rw_owner;
615 RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
616 if ((owner & RW_THREAD) != RW_READ_INCR) {
617 RW_ASSERT(rw, (owner & RW_THREAD) != 0);
618 return 0;
619 }
620 new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
621 if (RW_ACQUIRE(rw, owner, new))
622 break;
623 }
624
625 RW_UNLOCKED(rw, RW_READER);
626 RW_LOCKED(rw, RW_WRITER);
627 RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
628 RW_DASSERT(rw, RW_OWNER(rw) == curthread);
629
630 return 1;
631 }
632
633 /*
634 * rw_read_held:
635 *
636 * Returns true if the rwlock is held for reading. Must only be
637 * used for diagnostic assertions, and never be used to make
638 * decisions about how to use a rwlock.
639 */
640 int
641 rw_read_held(krwlock_t *rw)
642 {
643 uintptr_t owner;
644
645 if (panicstr != NULL)
646 return 1;
647
648 owner = rw->rw_owner;
649 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
650 }
651
652 /*
653 * rw_write_held:
654 *
655 * Returns true if the rwlock is held for writing. Must only be
656 * used for diagnostic assertions, and never be used to make
657 * decisions about how to use a rwlock.
658 */
659 int
660 rw_write_held(krwlock_t *rw)
661 {
662
663 if (panicstr != NULL)
664 return 1;
665
666 return (rw->rw_owner & RW_WRITE_LOCKED) != 0;
667 }
668
669 /*
670 * rw_lock_held:
671 *
672 * Returns true if the rwlock is held for reading or writing. Must
673 * only be used for diagnostic assertions, and never be used to make
674 * decisions about how to use a rwlock.
675 */
676 int
677 rw_lock_held(krwlock_t *rw)
678 {
679
680 if (panicstr != NULL)
681 return 1;
682
683 return (rw->rw_owner & RW_THREAD) != 0;
684 }
685
686 /*
687 * rw_owner:
688 *
689 * Return the current owner of an RW lock, but only if it is write
690 * held. Used for priority inheritance.
691 */
692 static lwp_t *
693 rw_owner(wchan_t obj)
694 {
695 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
696 uintptr_t owner = rw->rw_owner;
697
698 if ((owner & RW_WRITE_LOCKED) == 0)
699 return NULL;
700
701 return (void *)(owner & RW_THREAD);
702 }
703