kern_rwlock.c revision 1.6.2.6 1 /* $NetBSD: kern_rwlock.c,v 1.6.2.6 2007/08/20 18:08:55 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Kernel reader/writer lock implementation, modeled after those
41 * found in Solaris, a description of which can be found in:
42 *
43 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
44 * Richard McDougall.
45 */
46
47 #include <sys/cdefs.h>
48 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.6.2.6 2007/08/20 18:08:55 ad Exp $");
49
50 #define __RWLOCK_PRIVATE
51
52 #include <sys/param.h>
53 #include <sys/proc.h>
54 #include <sys/rwlock.h>
55 #include <sys/sched.h>
56 #include <sys/sleepq.h>
57 #include <sys/systm.h>
58 #include <sys/lockdebug.h>
59 #include <sys/cpu.h>
60
61 #include <dev/lockstat.h>
62
63 /*
64 * LOCKDEBUG
65 */
66
67 #if defined(LOCKDEBUG)
68
69 #define RW_WANTLOCK(rw, op) \
70 LOCKDEBUG_WANTLOCK(RW_GETID(rw), \
71 (uintptr_t)__builtin_return_address(0), op == RW_READER);
72 #define RW_LOCKED(rw, op) \
73 LOCKDEBUG_LOCKED(RW_GETID(rw), \
74 (uintptr_t)__builtin_return_address(0), op == RW_READER);
75 #define RW_UNLOCKED(rw, op) \
76 LOCKDEBUG_UNLOCKED(RW_GETID(rw), \
77 (uintptr_t)__builtin_return_address(0), op == RW_READER);
78 #define RW_DASSERT(rw, cond) \
79 do { \
80 if (!(cond)) \
81 rw_abort(rw, __FUNCTION__, "assertion failed: " #cond); \
82 } while (/* CONSTCOND */ 0);
83
84 #else /* LOCKDEBUG */
85
86 #define RW_WANTLOCK(rw, op) /* nothing */
87 #define RW_LOCKED(rw, op) /* nothing */
88 #define RW_UNLOCKED(rw, op) /* nothing */
89 #define RW_DASSERT(rw, cond) /* nothing */
90
91 #endif /* LOCKDEBUG */
92
93 /*
94 * DIAGNOSTIC
95 */
96
97 #if defined(DIAGNOSTIC)
98
99 #define RW_ASSERT(rw, cond) \
100 do { \
101 if (!(cond)) \
102 rw_abort(rw, __FUNCTION__, "assertion failed: " #cond); \
103 } while (/* CONSTCOND */ 0)
104
105 #else
106
107 #define RW_ASSERT(rw, cond) /* nothing */
108
109 #endif /* DIAGNOSTIC */
110
111 /*
112 * For platforms that use 'simple' RW locks.
113 */
114 #ifdef __HAVE_SIMPLE_RW_LOCKS
115 #define RW_ACQUIRE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
116 #define RW_RELEASE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
117 #define RW_SETID(rw, id) ((rw)->rw_id = id)
118 #define RW_GETID(rw) ((rw)->rw_id)
119
120 static inline int
121 RW_SET_WAITERS(krwlock_t *rw, uintptr_t need, uintptr_t set)
122 {
123 uintptr_t old;
124
125 if (((old = rw->rw_owner) & need) == 0)
126 return 0;
127 return RW_CAS(&rw->rw_owner, old, old | set);
128 }
129 #endif /* __HAVE_SIMPLE_RW_LOCKS */
130
131 /*
132 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
133 */
134 #ifdef LOCKDEBUG
135 #undef __HAVE_RW_STUBS
136 #endif
137
138 #ifndef __HAVE_RW_STUBS
139 __strong_alias(rw_enter,rw_vector_enter);
140 __strong_alias(rw_exit,rw_vector_exit);
141 #endif
142
143 static void rw_dump(volatile void *);
144 static lwp_t *rw_owner(wchan_t);
145
146 lockops_t rwlock_lockops = {
147 "Reader / writer lock",
148 1,
149 rw_dump
150 };
151
152 syncobj_t rw_syncobj = {
153 SOBJ_SLEEPQ_SORTED,
154 turnstile_unsleep,
155 turnstile_changepri,
156 sleepq_lendpri,
157 rw_owner,
158 };
159
160 /*
161 * rw_dump:
162 *
163 * Dump the contents of a rwlock structure.
164 */
165 static void
166 rw_dump(volatile void *cookie)
167 {
168 volatile krwlock_t *rw = cookie;
169
170 printf_nolog("owner/count : %#018lx flags : %#018x\n",
171 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
172 }
173
174 /*
175 * rw_abort:
176 *
177 * Dump information about an error and panic the system. This
178 * generates a lot of machine code in the DIAGNOSTIC case, so
179 * we ask the compiler to not inline it.
180 */
181 #if __GNUC_PREREQ__(3, 0)
182 __attribute ((noinline))
183 #endif
184 static void
185 rw_abort(krwlock_t *rw, const char *func, const char *msg)
186 {
187
188 if (panicstr != NULL)
189 return;
190
191 LOCKDEBUG_ABORT(RW_GETID(rw), rw, &rwlock_lockops, func, msg);
192 }
193
194 /*
195 * rw_init:
196 *
197 * Initialize a rwlock for use.
198 */
199 void
200 rw_init(krwlock_t *rw)
201 {
202 u_int id;
203
204 memset(rw, 0, sizeof(*rw));
205
206 id = LOCKDEBUG_ALLOC(rw, &rwlock_lockops,
207 (uintptr_t)__builtin_return_address(0));
208 RW_SETID(rw, id);
209 }
210
211 /*
212 * rw_destroy:
213 *
214 * Tear down a rwlock.
215 */
216 void
217 rw_destroy(krwlock_t *rw)
218 {
219
220 LOCKDEBUG_FREE(rw, RW_GETID(rw));
221 RW_ASSERT(rw, rw->rw_owner == 0);
222 }
223
224 /*
225 * rw_vector_enter:
226 *
227 * Acquire a rwlock.
228 */
229 void
230 rw_vector_enter(krwlock_t *rw, const krw_t op)
231 {
232 uintptr_t owner, incr, need_wait, set_wait, curthread;
233 turnstile_t *ts;
234 int queue;
235 lwp_t *l;
236 LOCKSTAT_TIMER(slptime);
237 LOCKSTAT_FLAG(lsflag);
238
239 l = curlwp;
240 curthread = (uintptr_t)l;
241
242 RW_ASSERT(rw, !cpu_intr_p());
243 RW_ASSERT(rw, curthread != 0);
244 RW_WANTLOCK(rw, op);
245
246 if (panicstr == NULL) {
247 LOCKDEBUG_BARRIER(&kernel_lock, 1);
248 }
249
250 /*
251 * We play a slight trick here. If we're a reader, we want
252 * increment the read count. If we're a writer, we want to
253 * set the owner field and whe WRITE_LOCKED bit.
254 *
255 * In the latter case, we expect those bits to be zero,
256 * therefore we can use an add operation to set them, which
257 * means an add operation for both cases.
258 */
259 if (__predict_true(op == RW_READER)) {
260 incr = RW_READ_INCR;
261 set_wait = RW_HAS_WAITERS;
262 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
263 queue = TS_READER_Q;
264 } else {
265 RW_DASSERT(rw, op == RW_WRITER);
266 incr = curthread | RW_WRITE_LOCKED;
267 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
268 need_wait = RW_WRITE_LOCKED | RW_THREAD;
269 queue = TS_WRITER_Q;
270 }
271
272 LOCKSTAT_ENTER(lsflag);
273
274 for (;;) {
275 /*
276 * Read the lock owner field. If the need-to-wait
277 * indicator is clear, then try to acquire the lock.
278 */
279 owner = rw->rw_owner;
280 if ((owner & need_wait) == 0) {
281 if (RW_ACQUIRE(rw, owner, owner + incr)) {
282 /* Got it! */
283 break;
284 }
285
286 /*
287 * Didn't get it -- spin around again (we'll
288 * probably sleep on the next iteration).
289 */
290 continue;
291 }
292
293 if (panicstr != NULL)
294 return;
295 if (RW_OWNER(rw) == curthread)
296 rw_abort(rw, __FUNCTION__, "locking against myself");
297
298 /*
299 * Grab the turnstile chain lock. Once we have that, we
300 * can adjust the waiter bits and sleep queue.
301 */
302 ts = turnstile_lookup(rw);
303
304 /*
305 * Mark the rwlock as having waiters. If the set fails,
306 * then we may not need to sleep and should spin again.
307 */
308 if (!RW_SET_WAITERS(rw, need_wait, set_wait)) {
309 turnstile_exit(rw);
310 continue;
311 }
312
313 LOCKSTAT_START_TIMER(lsflag, slptime);
314
315 turnstile_block(ts, queue, rw, &rw_syncobj);
316
317 /* If we wake up and arrive here, we've been handed the lock. */
318 RW_RECEIVE(rw);
319
320 LOCKSTAT_STOP_TIMER(lsflag, slptime);
321 LOCKSTAT_EVENT(lsflag, rw,
322 LB_RWLOCK | (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2),
323 1, slptime);
324
325 break;
326 }
327
328 LOCKSTAT_EXIT(lsflag);
329
330 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
331 (op == RW_READER && RW_COUNT(rw) != 0));
332 RW_LOCKED(rw, op);
333 }
334
335 /*
336 * rw_vector_exit:
337 *
338 * Release a rwlock.
339 */
340 void
341 rw_vector_exit(krwlock_t *rw)
342 {
343 uintptr_t curthread, owner, decr, new;
344 turnstile_t *ts;
345 int rcnt, wcnt;
346 lwp_t *l;
347
348 curthread = (uintptr_t)curlwp;
349 RW_ASSERT(rw, curthread != 0);
350
351 if (panicstr != NULL)
352 return;
353
354 /*
355 * Again, we use a trick. Since we used an add operation to
356 * set the required lock bits, we can use a subtract to clear
357 * them, which makes the read-release and write-release path
358 * the same.
359 */
360 owner = rw->rw_owner;
361 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
362 RW_UNLOCKED(rw, RW_WRITER);
363 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
364 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
365 decr = curthread | RW_WRITE_LOCKED;
366 } else {
367 RW_UNLOCKED(rw, RW_READER);
368 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
369 RW_ASSERT(rw, RW_COUNT(rw) != 0);
370 decr = RW_READ_INCR;
371 }
372
373 /*
374 * Compute what we expect the new value of the lock to be. Only
375 * proceed to do direct handoff if there are waiters, and if the
376 * lock would become unowned.
377 */
378 for (;; owner = rw->rw_owner) {
379 new = (owner - decr);
380 if ((new & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
381 break;
382 if (RW_RELEASE(rw, owner, new))
383 return;
384 }
385
386 for (;;) {
387 /*
388 * Grab the turnstile chain lock. This gets the interlock
389 * on the sleep queue. Once we have that, we can adjust the
390 * waiter bits.
391 */
392 ts = turnstile_lookup(rw);
393 RW_DASSERT(rw, ts != NULL);
394 RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
395
396 owner = rw->rw_owner;
397 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
398 rcnt = TS_WAITERS(ts, TS_READER_Q);
399
400 /*
401 * Give the lock away.
402 *
403 * If we are releasing a write lock, then wake all
404 * outstanding readers. If we are releasing a read
405 * lock, then wake one writer.
406 */
407 if (rcnt == 0 || (decr == RW_READ_INCR && wcnt != 0)) {
408 RW_DASSERT(rw, wcnt != 0);
409 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
410
411 /*
412 * Give the lock to the longest waiting
413 * writer.
414 */
415 l = TS_FIRST(ts, TS_WRITER_Q);
416 new = (uintptr_t)l | RW_WRITE_LOCKED;
417
418 if (wcnt > 1)
419 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
420 else if (rcnt != 0)
421 new |= RW_HAS_WAITERS;
422
423 RW_GIVE(rw);
424 if (!RW_RELEASE(rw, owner, new)) {
425 /* Oops, try again. */
426 turnstile_exit(rw);
427 continue;
428 }
429
430 /* Wake the writer. */
431 turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
432 } else {
433 RW_DASSERT(rw, rcnt != 0);
434
435 /*
436 * Give the lock to all blocked readers. If there
437 * is a writer waiting, new readers that arrive
438 * after the release will be blocked out.
439 */
440 new = rcnt << RW_READ_COUNT_SHIFT;
441 if (wcnt != 0)
442 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
443
444 RW_GIVE(rw);
445 if (!RW_RELEASE(rw, owner, new)) {
446 /* Oops, try again. */
447 turnstile_exit(rw);
448 continue;
449 }
450
451 /* Wake up all sleeping readers. */
452 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
453 }
454
455 break;
456 }
457 }
458
459 /*
460 * rw_tryenter:
461 *
462 * Try to acquire a rwlock.
463 */
464 int
465 rw_tryenter(krwlock_t *rw, const krw_t op)
466 {
467 uintptr_t curthread, owner, incr, need_wait;
468
469 curthread = (uintptr_t)curlwp;
470
471 RW_ASSERT(rw, curthread != 0);
472 RW_WANTLOCK(rw, op);
473
474 if (op == RW_READER) {
475 incr = RW_READ_INCR;
476 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
477 } else {
478 RW_DASSERT(rw, op == RW_WRITER);
479 incr = curthread | RW_WRITE_LOCKED;
480 need_wait = RW_WRITE_LOCKED | RW_THREAD;
481 }
482
483 for (;;) {
484 owner = rw->rw_owner;
485 if ((owner & need_wait) == 0) {
486 if (RW_ACQUIRE(rw, owner, owner + incr)) {
487 /* Got it! */
488 break;
489 }
490 continue;
491 }
492 return 0;
493 }
494
495 RW_LOCKED(rw, op);
496 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
497 (op == RW_READER && RW_COUNT(rw) != 0));
498
499 return 1;
500 }
501
502 /*
503 * rw_downgrade:
504 *
505 * Downgrade a write lock to a read lock.
506 */
507 void
508 rw_downgrade(krwlock_t *rw)
509 {
510 uintptr_t owner, curthread, new;
511 turnstile_t *ts;
512 int rcnt, wcnt;
513
514 curthread = (uintptr_t)curlwp;
515 RW_ASSERT(rw, curthread != 0);
516 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
517 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
518 RW_UNLOCKED(rw, RW_WRITER);
519
520 owner = rw->rw_owner;
521 if ((owner & RW_HAS_WAITERS) == 0) {
522 /*
523 * There are no waiters, so we can do this the easy way.
524 * Try swapping us down to one read hold. If it fails, the
525 * lock condition has changed and we most likely now have
526 * waiters.
527 */
528 if (RW_RELEASE(rw, owner, RW_READ_INCR)) {
529 RW_LOCKED(rw, RW_READER);
530 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
531 RW_DASSERT(rw, RW_COUNT(rw) != 0);
532 return;
533 }
534 }
535
536 /*
537 * Grab the turnstile chain lock. This gets the interlock
538 * on the sleep queue. Once we have that, we can adjust the
539 * waiter bits.
540 */
541 for (;;) {
542 ts = turnstile_lookup(rw);
543 RW_DASSERT(rw, ts != NULL);
544
545 owner = rw->rw_owner;
546 rcnt = TS_WAITERS(ts, TS_READER_Q);
547 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
548
549 /*
550 * If there are no readers, just preserve the waiters
551 * bits, swap us down to one read hold and return.
552 */
553 if (rcnt == 0) {
554 RW_DASSERT(rw, wcnt != 0);
555 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
556 RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
557
558 new = RW_READ_INCR | RW_HAS_WAITERS | RW_WRITE_WANTED;
559 if (!RW_RELEASE(rw, owner, new)) {
560 /* Oops, try again. */
561 turnstile_exit(ts);
562 continue;
563 }
564 break;
565 }
566
567 /*
568 * Give the lock to all blocked readers. We may
569 * retain one read hold if downgrading. If there
570 * is a writer waiting, new readers will be blocked
571 * out.
572 */
573 new = (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
574 if (wcnt != 0)
575 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
576
577 RW_GIVE(rw);
578 if (!RW_RELEASE(rw, owner, new)) {
579 /* Oops, try again. */
580 turnstile_exit(rw);
581 continue;
582 }
583
584 /* Wake up all sleeping readers. */
585 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
586 break;
587 }
588
589 RW_LOCKED(rw, RW_READER);
590 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
591 RW_DASSERT(rw, RW_COUNT(rw) != 0);
592 }
593
594 /*
595 * rw_tryupgrade:
596 *
597 * Try to upgrade a read lock to a write lock. We must be the
598 * only reader.
599 */
600 int
601 rw_tryupgrade(krwlock_t *rw)
602 {
603 uintptr_t owner, curthread, new;
604
605 curthread = (uintptr_t)curlwp;
606 RW_ASSERT(rw, curthread != 0);
607 RW_WANTLOCK(rw, RW_WRITER);
608
609 for (;;) {
610 owner = rw->rw_owner;
611 RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
612 if ((owner & RW_THREAD) != RW_READ_INCR) {
613 RW_ASSERT(rw, (owner & RW_THREAD) != 0);
614 return 0;
615 }
616 new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
617 if (RW_ACQUIRE(rw, owner, new))
618 break;
619 }
620
621 RW_UNLOCKED(rw, RW_READER);
622 RW_LOCKED(rw, RW_WRITER);
623 RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
624 RW_DASSERT(rw, RW_OWNER(rw) == curthread);
625
626 return 1;
627 }
628
629 /*
630 * rw_read_held:
631 *
632 * Returns true if the rwlock is held for reading. Must only be
633 * used for diagnostic assertions, and never be used to make
634 * decisions about how to use a rwlock.
635 */
636 int
637 rw_read_held(krwlock_t *rw)
638 {
639 uintptr_t owner;
640
641 if (panicstr != NULL)
642 return 1;
643
644 owner = rw->rw_owner;
645 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
646 }
647
648 /*
649 * rw_write_held:
650 *
651 * Returns true if the rwlock is held for writing. Must only be
652 * used for diagnostic assertions, and never be used to make
653 * decisions about how to use a rwlock.
654 */
655 int
656 rw_write_held(krwlock_t *rw)
657 {
658
659 if (panicstr != NULL)
660 return 1;
661
662 return (rw->rw_owner & RW_WRITE_LOCKED) != 0;
663 }
664
665 /*
666 * rw_lock_held:
667 *
668 * Returns true if the rwlock is held for reading or writing. Must
669 * only be used for diagnostic assertions, and never be used to make
670 * decisions about how to use a rwlock.
671 */
672 int
673 rw_lock_held(krwlock_t *rw)
674 {
675
676 if (panicstr != NULL)
677 return 1;
678
679 return (rw->rw_owner & RW_THREAD) != 0;
680 }
681
682 /*
683 * rw_owner:
684 *
685 * Return the current owner of an RW lock, but only if it is write
686 * held. Used for priority inheritance.
687 */
688 static lwp_t *
689 rw_owner(wchan_t obj)
690 {
691 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
692 uintptr_t owner = rw->rw_owner;
693
694 if ((owner & RW_WRITE_LOCKED) == 0)
695 return NULL;
696
697 return (void *)(owner & RW_THREAD);
698 }
699