kern_rwlock.c revision 1.8.6.2 1 /* $NetBSD: kern_rwlock.c,v 1.8.6.2 2007/10/26 15:48:34 joerg Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Kernel reader/writer lock implementation, modeled after those
41 * found in Solaris, a description of which can be found in:
42 *
43 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
44 * Richard McDougall.
45 */
46
47 #include <sys/cdefs.h>
48 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.8.6.2 2007/10/26 15:48:34 joerg Exp $");
49
50 #include "opt_multiprocessor.h"
51
52 #define __RWLOCK_PRIVATE
53
54 #include <sys/param.h>
55 #include <sys/proc.h>
56 #include <sys/rwlock.h>
57 #include <sys/sched.h>
58 #include <sys/sleepq.h>
59 #include <sys/systm.h>
60 #include <sys/lockdebug.h>
61 #include <sys/cpu.h>
62
63 #include <dev/lockstat.h>
64
65 /*
66 * LOCKDEBUG
67 */
68
69 #if defined(LOCKDEBUG)
70
71 #define RW_WANTLOCK(rw, op) \
72 LOCKDEBUG_WANTLOCK(RW_GETID(rw), \
73 (uintptr_t)__builtin_return_address(0), op == RW_READER);
74 #define RW_LOCKED(rw, op) \
75 LOCKDEBUG_LOCKED(RW_GETID(rw), \
76 (uintptr_t)__builtin_return_address(0), op == RW_READER);
77 #define RW_UNLOCKED(rw, op) \
78 LOCKDEBUG_UNLOCKED(RW_GETID(rw), \
79 (uintptr_t)__builtin_return_address(0), op == RW_READER);
80 #define RW_DASSERT(rw, cond) \
81 do { \
82 if (!(cond)) \
83 rw_abort(rw, __func__, "assertion failed: " #cond); \
84 } while (/* CONSTCOND */ 0);
85
86 #else /* LOCKDEBUG */
87
88 #define RW_WANTLOCK(rw, op) /* nothing */
89 #define RW_LOCKED(rw, op) /* nothing */
90 #define RW_UNLOCKED(rw, op) /* nothing */
91 #define RW_DASSERT(rw, cond) /* nothing */
92
93 #endif /* LOCKDEBUG */
94
95 /*
96 * DIAGNOSTIC
97 */
98
99 #if defined(DIAGNOSTIC)
100
101 #define RW_ASSERT(rw, cond) \
102 do { \
103 if (!(cond)) \
104 rw_abort(rw, __func__, "assertion failed: " #cond); \
105 } while (/* CONSTCOND */ 0)
106
107 #else
108
109 #define RW_ASSERT(rw, cond) /* nothing */
110
111 #endif /* DIAGNOSTIC */
112
113 /*
114 * For platforms that use 'simple' RW locks.
115 */
116 #ifdef __HAVE_SIMPLE_RW_LOCKS
117 #define RW_ACQUIRE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
118 #define RW_RELEASE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
119 #define RW_SETID(rw, id) ((rw)->rw_id = id)
120 #define RW_GETID(rw) ((rw)->rw_id)
121
122 static inline int
123 RW_SET_WAITERS(krwlock_t *rw, uintptr_t need, uintptr_t set)
124 {
125 uintptr_t old;
126
127 if (((old = rw->rw_owner) & need) == 0)
128 return 0;
129 return RW_CAS(&rw->rw_owner, old, old | set);
130 }
131 #endif /* __HAVE_SIMPLE_RW_LOCKS */
132
133 /*
134 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
135 */
136 #ifdef LOCKDEBUG
137 #undef __HAVE_RW_STUBS
138 #endif
139
140 #ifndef __HAVE_RW_STUBS
141 __strong_alias(rw_enter,rw_vector_enter);
142 __strong_alias(rw_exit,rw_vector_exit);
143 #endif
144
145 static void rw_dump(volatile void *);
146 static lwp_t *rw_owner(wchan_t);
147
148 lockops_t rwlock_lockops = {
149 "Reader / writer lock",
150 1,
151 rw_dump
152 };
153
154 syncobj_t rw_syncobj = {
155 SOBJ_SLEEPQ_SORTED,
156 turnstile_unsleep,
157 turnstile_changepri,
158 sleepq_lendpri,
159 rw_owner,
160 };
161
162 /*
163 * rw_dump:
164 *
165 * Dump the contents of a rwlock structure.
166 */
167 static void
168 rw_dump(volatile void *cookie)
169 {
170 volatile krwlock_t *rw = cookie;
171
172 printf_nolog("owner/count : %#018lx flags : %#018x\n",
173 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
174 }
175
176 /*
177 * rw_abort:
178 *
179 * Dump information about an error and panic the system. This
180 * generates a lot of machine code in the DIAGNOSTIC case, so
181 * we ask the compiler to not inline it.
182 */
183 #if __GNUC_PREREQ__(3, 0)
184 __attribute ((noinline))
185 #endif
186 static void
187 rw_abort(krwlock_t *rw, const char *func, const char *msg)
188 {
189
190 if (panicstr != NULL)
191 return;
192
193 LOCKDEBUG_ABORT(RW_GETID(rw), rw, &rwlock_lockops, func, msg);
194 }
195
196 /*
197 * rw_init:
198 *
199 * Initialize a rwlock for use.
200 */
201 void
202 rw_init(krwlock_t *rw)
203 {
204 u_int id;
205
206 memset(rw, 0, sizeof(*rw));
207
208 id = LOCKDEBUG_ALLOC(rw, &rwlock_lockops,
209 (uintptr_t)__builtin_return_address(0));
210 RW_SETID(rw, id);
211 }
212
213 /*
214 * rw_destroy:
215 *
216 * Tear down a rwlock.
217 */
218 void
219 rw_destroy(krwlock_t *rw)
220 {
221
222 LOCKDEBUG_FREE(rw, RW_GETID(rw));
223 RW_ASSERT(rw, rw->rw_owner == 0);
224 }
225
226 /*
227 * rw_vector_enter:
228 *
229 * Acquire a rwlock.
230 */
231 void
232 rw_vector_enter(krwlock_t *rw, const krw_t op)
233 {
234 uintptr_t owner, incr, need_wait, set_wait, curthread;
235 turnstile_t *ts;
236 int queue;
237 lwp_t *l;
238 LOCKSTAT_TIMER(slptime);
239 LOCKSTAT_FLAG(lsflag);
240
241 l = curlwp;
242 curthread = (uintptr_t)l;
243
244 RW_ASSERT(rw, curthread != 0);
245 RW_WANTLOCK(rw, op);
246
247 if (panicstr == NULL) {
248 LOCKDEBUG_BARRIER(&kernel_lock, 1);
249 }
250
251 /*
252 * We play a slight trick here. If we're a reader, we want
253 * increment the read count. If we're a writer, we want to
254 * set the owner field and whe WRITE_LOCKED bit.
255 *
256 * In the latter case, we expect those bits to be zero,
257 * therefore we can use an add operation to set them, which
258 * means an add operation for both cases.
259 */
260 if (__predict_true(op == RW_READER)) {
261 incr = RW_READ_INCR;
262 set_wait = RW_HAS_WAITERS;
263 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
264 queue = TS_READER_Q;
265 } else {
266 RW_DASSERT(rw, op == RW_WRITER);
267 incr = curthread | RW_WRITE_LOCKED;
268 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
269 need_wait = RW_WRITE_LOCKED | RW_THREAD;
270 queue = TS_WRITER_Q;
271 }
272
273 LOCKSTAT_ENTER(lsflag);
274
275 for (;;) {
276 /*
277 * Read the lock owner field. If the need-to-wait
278 * indicator is clear, then try to acquire the lock.
279 */
280 owner = rw->rw_owner;
281 if ((owner & need_wait) == 0) {
282 if (RW_ACQUIRE(rw, owner, owner + incr)) {
283 /* Got it! */
284 break;
285 }
286
287 /*
288 * Didn't get it -- spin around again (we'll
289 * probably sleep on the next iteration).
290 */
291 continue;
292 }
293
294 if (panicstr != NULL)
295 return;
296 if (RW_OWNER(rw) == curthread)
297 rw_abort(rw, __func__, "locking against myself");
298
299 /*
300 * Grab the turnstile chain lock. Once we have that, we
301 * can adjust the waiter bits and sleep queue.
302 */
303 ts = turnstile_lookup(rw);
304
305 /*
306 * Mark the rwlock as having waiters. If the set fails,
307 * then we may not need to sleep and should spin again.
308 */
309 if (!RW_SET_WAITERS(rw, need_wait, set_wait)) {
310 turnstile_exit(rw);
311 continue;
312 }
313
314 LOCKSTAT_START_TIMER(lsflag, slptime);
315
316 turnstile_block(ts, queue, rw, &rw_syncobj);
317
318 /* If we wake up and arrive here, we've been handed the lock. */
319 RW_RECEIVE(rw);
320
321 LOCKSTAT_STOP_TIMER(lsflag, slptime);
322 LOCKSTAT_EVENT(lsflag, rw,
323 LB_RWLOCK | (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2),
324 1, slptime);
325
326 break;
327 }
328
329 LOCKSTAT_EXIT(lsflag);
330
331 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
332 (op == RW_READER && RW_COUNT(rw) != 0));
333 RW_LOCKED(rw, op);
334 }
335
336 /*
337 * rw_vector_exit:
338 *
339 * Release a rwlock.
340 */
341 void
342 rw_vector_exit(krwlock_t *rw)
343 {
344 uintptr_t curthread, owner, decr, new;
345 turnstile_t *ts;
346 int rcnt, wcnt;
347 lwp_t *l;
348
349 curthread = (uintptr_t)curlwp;
350 RW_ASSERT(rw, curthread != 0);
351
352 if (panicstr != NULL)
353 return;
354
355 /*
356 * Again, we use a trick. Since we used an add operation to
357 * set the required lock bits, we can use a subtract to clear
358 * them, which makes the read-release and write-release path
359 * the same.
360 */
361 owner = rw->rw_owner;
362 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
363 RW_UNLOCKED(rw, RW_WRITER);
364 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
365 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
366 decr = curthread | RW_WRITE_LOCKED;
367 } else {
368 RW_UNLOCKED(rw, RW_READER);
369 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
370 RW_ASSERT(rw, RW_COUNT(rw) != 0);
371 decr = RW_READ_INCR;
372 }
373
374 /*
375 * Compute what we expect the new value of the lock to be. Only
376 * proceed to do direct handoff if there are waiters, and if the
377 * lock would become unowned.
378 */
379 for (;; owner = rw->rw_owner) {
380 new = (owner - decr);
381 if ((new & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
382 break;
383 if (RW_RELEASE(rw, owner, new))
384 return;
385 }
386
387 for (;;) {
388 /*
389 * Grab the turnstile chain lock. This gets the interlock
390 * on the sleep queue. Once we have that, we can adjust the
391 * waiter bits.
392 */
393 ts = turnstile_lookup(rw);
394 RW_DASSERT(rw, ts != NULL);
395 RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
396
397 owner = rw->rw_owner;
398 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
399 rcnt = TS_WAITERS(ts, TS_READER_Q);
400
401 /*
402 * Give the lock away.
403 *
404 * If we are releasing a write lock, then wake all
405 * outstanding readers. If we are releasing a read
406 * lock, then wake one writer.
407 */
408 if (rcnt == 0 || (decr == RW_READ_INCR && wcnt != 0)) {
409 RW_DASSERT(rw, wcnt != 0);
410 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
411
412 /*
413 * Give the lock to the longest waiting
414 * writer.
415 */
416 l = TS_FIRST(ts, TS_WRITER_Q);
417 new = (uintptr_t)l | RW_WRITE_LOCKED;
418
419 if (wcnt > 1)
420 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
421 else if (rcnt != 0)
422 new |= RW_HAS_WAITERS;
423
424 RW_GIVE(rw);
425 if (!RW_RELEASE(rw, owner, new)) {
426 /* Oops, try again. */
427 turnstile_exit(rw);
428 continue;
429 }
430
431 /* Wake the writer. */
432 turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
433 } else {
434 RW_DASSERT(rw, rcnt != 0);
435
436 /*
437 * Give the lock to all blocked readers. If there
438 * is a writer waiting, new readers that arrive
439 * after the release will be blocked out.
440 */
441 new = rcnt << RW_READ_COUNT_SHIFT;
442 if (wcnt != 0)
443 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
444
445 RW_GIVE(rw);
446 if (!RW_RELEASE(rw, owner, new)) {
447 /* Oops, try again. */
448 turnstile_exit(rw);
449 continue;
450 }
451
452 /* Wake up all sleeping readers. */
453 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
454 }
455
456 break;
457 }
458 }
459
460 /*
461 * rw_tryenter:
462 *
463 * Try to acquire a rwlock.
464 */
465 int
466 rw_tryenter(krwlock_t *rw, const krw_t op)
467 {
468 uintptr_t curthread, owner, incr, need_wait;
469
470 curthread = (uintptr_t)curlwp;
471
472 RW_ASSERT(rw, curthread != 0);
473 RW_WANTLOCK(rw, op);
474
475 if (op == RW_READER) {
476 incr = RW_READ_INCR;
477 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
478 } else {
479 RW_DASSERT(rw, op == RW_WRITER);
480 incr = curthread | RW_WRITE_LOCKED;
481 need_wait = RW_WRITE_LOCKED | RW_THREAD;
482 }
483
484 for (;;) {
485 owner = rw->rw_owner;
486 if ((owner & need_wait) == 0) {
487 if (RW_ACQUIRE(rw, owner, owner + incr)) {
488 /* Got it! */
489 break;
490 }
491 continue;
492 }
493 return 0;
494 }
495
496 RW_LOCKED(rw, op);
497 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
498 (op == RW_READER && RW_COUNT(rw) != 0));
499
500 return 1;
501 }
502
503 /*
504 * rw_downgrade:
505 *
506 * Downgrade a write lock to a read lock.
507 */
508 void
509 rw_downgrade(krwlock_t *rw)
510 {
511 uintptr_t owner, curthread, new;
512 turnstile_t *ts;
513 int rcnt, wcnt;
514
515 curthread = (uintptr_t)curlwp;
516 RW_ASSERT(rw, curthread != 0);
517 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
518 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
519 RW_UNLOCKED(rw, RW_WRITER);
520
521 owner = rw->rw_owner;
522 if ((owner & RW_HAS_WAITERS) == 0) {
523 /*
524 * There are no waiters, so we can do this the easy way.
525 * Try swapping us down to one read hold. If it fails, the
526 * lock condition has changed and we most likely now have
527 * waiters.
528 */
529 if (RW_RELEASE(rw, owner, RW_READ_INCR)) {
530 RW_LOCKED(rw, RW_READER);
531 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
532 RW_DASSERT(rw, RW_COUNT(rw) != 0);
533 return;
534 }
535 }
536
537 /*
538 * Grab the turnstile chain lock. This gets the interlock
539 * on the sleep queue. Once we have that, we can adjust the
540 * waiter bits.
541 */
542 for (;;) {
543 ts = turnstile_lookup(rw);
544 RW_DASSERT(rw, ts != NULL);
545
546 owner = rw->rw_owner;
547 rcnt = TS_WAITERS(ts, TS_READER_Q);
548 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
549
550 /*
551 * If there are no readers, just preserve the waiters
552 * bits, swap us down to one read hold and return.
553 */
554 if (rcnt == 0) {
555 RW_DASSERT(rw, wcnt != 0);
556 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
557 RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
558
559 new = RW_READ_INCR | RW_HAS_WAITERS | RW_WRITE_WANTED;
560 if (!RW_RELEASE(rw, owner, new)) {
561 /* Oops, try again. */
562 turnstile_exit(ts);
563 continue;
564 }
565 break;
566 }
567
568 /*
569 * Give the lock to all blocked readers. We may
570 * retain one read hold if downgrading. If there
571 * is a writer waiting, new readers will be blocked
572 * out.
573 */
574 new = (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
575 if (wcnt != 0)
576 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
577
578 RW_GIVE(rw);
579 if (!RW_RELEASE(rw, owner, new)) {
580 /* Oops, try again. */
581 turnstile_exit(rw);
582 continue;
583 }
584
585 /* Wake up all sleeping readers. */
586 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
587 break;
588 }
589
590 RW_LOCKED(rw, RW_READER);
591 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
592 RW_DASSERT(rw, RW_COUNT(rw) != 0);
593 }
594
595 /*
596 * rw_tryupgrade:
597 *
598 * Try to upgrade a read lock to a write lock. We must be the
599 * only reader.
600 */
601 int
602 rw_tryupgrade(krwlock_t *rw)
603 {
604 uintptr_t owner, curthread, new;
605
606 curthread = (uintptr_t)curlwp;
607 RW_ASSERT(rw, curthread != 0);
608 RW_WANTLOCK(rw, RW_WRITER);
609
610 for (;;) {
611 owner = rw->rw_owner;
612 RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
613 if ((owner & RW_THREAD) != RW_READ_INCR) {
614 RW_ASSERT(rw, (owner & RW_THREAD) != 0);
615 return 0;
616 }
617 new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
618 if (RW_ACQUIRE(rw, owner, new))
619 break;
620 }
621
622 RW_UNLOCKED(rw, RW_READER);
623 RW_LOCKED(rw, RW_WRITER);
624 RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
625 RW_DASSERT(rw, RW_OWNER(rw) == curthread);
626
627 return 1;
628 }
629
630 /*
631 * rw_read_held:
632 *
633 * Returns true if the rwlock is held for reading. Must only be
634 * used for diagnostic assertions, and never be used to make
635 * decisions about how to use a rwlock.
636 */
637 int
638 rw_read_held(krwlock_t *rw)
639 {
640 uintptr_t owner;
641
642 if (panicstr != NULL)
643 return 1;
644
645 owner = rw->rw_owner;
646 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
647 }
648
649 /*
650 * rw_write_held:
651 *
652 * Returns true if the rwlock is held for writing. Must only be
653 * used for diagnostic assertions, and never be used to make
654 * decisions about how to use a rwlock.
655 */
656 int
657 rw_write_held(krwlock_t *rw)
658 {
659
660 if (panicstr != NULL)
661 return 1;
662
663 return (rw->rw_owner & RW_WRITE_LOCKED) != 0;
664 }
665
666 /*
667 * rw_lock_held:
668 *
669 * Returns true if the rwlock is held for reading or writing. Must
670 * only be used for diagnostic assertions, and never be used to make
671 * decisions about how to use a rwlock.
672 */
673 int
674 rw_lock_held(krwlock_t *rw)
675 {
676
677 if (panicstr != NULL)
678 return 1;
679
680 return (rw->rw_owner & RW_THREAD) != 0;
681 }
682
683 /*
684 * rw_owner:
685 *
686 * Return the current owner of an RW lock, but only if it is write
687 * held. Used for priority inheritance.
688 */
689 static lwp_t *
690 rw_owner(wchan_t obj)
691 {
692 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
693 uintptr_t owner = rw->rw_owner;
694
695 if ((owner & RW_WRITE_LOCKED) == 0)
696 return NULL;
697
698 return (void *)(owner & RW_THREAD);
699 }
700