kern_rwlock.c revision 1.6.2.5 1 /* $NetBSD: kern_rwlock.c,v 1.6.2.5 2007/07/29 11:43:23 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Kernel reader/writer lock implementation, modeled after those
41 * found in Solaris, a description of which can be found in:
42 *
43 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
44 * Richard McDougall.
45 */
46
47 #include <sys/cdefs.h>
48 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.6.2.5 2007/07/29 11:43:23 ad Exp $");
49
50 #define __RWLOCK_PRIVATE
51
52 #include <sys/param.h>
53 #include <sys/proc.h>
54 #include <sys/rwlock.h>
55 #include <sys/sched.h>
56 #include <sys/sleepq.h>
57 #include <sys/systm.h>
58 #include <sys/lockdebug.h>
59 #include <sys/cpu.h>
60
61 #include <dev/lockstat.h>
62
63 /*
64 * LOCKDEBUG
65 */
66
67 #if defined(LOCKDEBUG)
68
69 #define RW_WANTLOCK(rw, op) \
70 LOCKDEBUG_WANTLOCK(RW_GETID(rw), \
71 (uintptr_t)__builtin_return_address(0), op == RW_READER);
72 #define RW_LOCKED(rw, op) \
73 LOCKDEBUG_LOCKED(RW_GETID(rw), \
74 (uintptr_t)__builtin_return_address(0), op == RW_READER);
75 #define RW_UNLOCKED(rw, op) \
76 LOCKDEBUG_UNLOCKED(RW_GETID(rw), \
77 (uintptr_t)__builtin_return_address(0), op == RW_READER);
78 #define RW_DASSERT(rw, cond) \
79 do { \
80 if (!(cond)) \
81 rw_abort(rw, __FUNCTION__, "assertion failed: " #cond); \
82 } while (/* CONSTCOND */ 0);
83
84 #else /* LOCKDEBUG */
85
86 #define RW_WANTLOCK(rw, op) /* nothing */
87 #define RW_LOCKED(rw, op) /* nothing */
88 #define RW_UNLOCKED(rw, op) /* nothing */
89 #define RW_DASSERT(rw, cond) /* nothing */
90
91 #endif /* LOCKDEBUG */
92
93 /*
94 * DIAGNOSTIC
95 */
96
97 #if defined(DIAGNOSTIC)
98
99 #define RW_ASSERT(rw, cond) \
100 do { \
101 if (!(cond)) \
102 rw_abort(rw, __FUNCTION__, "assertion failed: " #cond); \
103 } while (/* CONSTCOND */ 0)
104
105 #else
106
107 #define RW_ASSERT(rw, cond) /* nothing */
108
109 #endif /* DIAGNOSTIC */
110
111 /*
112 * For platforms that use 'simple' RW locks.
113 */
114 #ifdef __HAVE_SIMPLE_RW_LOCKS
115 #define RW_ACQUIRE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
116 #define RW_RELEASE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
117 #define RW_SETID(rw, id) ((rw)->rw_id = id)
118 #define RW_GETID(rw) ((rw)->rw_id)
119
120 static inline int
121 RW_SET_WAITERS(krwlock_t *rw, uintptr_t need, uintptr_t set)
122 {
123 uintptr_t old;
124
125 if (((old = rw->rw_owner) & need) == 0)
126 return 0;
127 return RW_CAS(&rw->rw_owner, old, old | set);
128 }
129 #endif /* __HAVE_SIMPLE_RW_LOCKS */
130
131 /*
132 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
133 */
134 #ifdef LOCKDEBUG
135 #undef __HAVE_RW_STUBS
136 #endif
137
138 #ifndef __HAVE_RW_STUBS
139 __strong_alias(rw_enter,rw_vector_enter);
140 __strong_alias(rw_exit,rw_vector_exit);
141 #endif
142
143 static void rw_dump(volatile void *);
144 static lwp_t *rw_owner(wchan_t);
145
146 lockops_t rwlock_lockops = {
147 "Reader / writer lock",
148 1,
149 rw_dump
150 };
151
152 syncobj_t rw_syncobj = {
153 SOBJ_SLEEPQ_SORTED,
154 turnstile_unsleep,
155 turnstile_changepri,
156 sleepq_lendpri,
157 rw_owner,
158 };
159
160 /*
161 * rw_dump:
162 *
163 * Dump the contents of a rwlock structure.
164 */
165 static void
166 rw_dump(volatile void *cookie)
167 {
168 volatile krwlock_t *rw = cookie;
169
170 printf_nolog("owner/count : %#018lx flags : %#018x\n",
171 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
172 }
173
174 /*
175 * rw_abort:
176 *
177 * Dump information about an error and panic the system. This
178 * generates a lot of machine code in the DIAGNOSTIC case, so
179 * we ask the compiler to not inline it.
180 */
181 #if __GNUC_PREREQ__(3, 0)
182 __attribute ((noinline))
183 #endif
184 static void
185 rw_abort(krwlock_t *rw, const char *func, const char *msg)
186 {
187
188 if (panicstr != NULL)
189 return;
190
191 LOCKDEBUG_ABORT(RW_GETID(rw), rw, &rwlock_lockops, func, msg);
192 }
193
194 /*
195 * rw_init:
196 *
197 * Initialize a rwlock for use.
198 */
199 void
200 rw_init(krwlock_t *rw)
201 {
202 u_int id;
203
204 memset(rw, 0, sizeof(*rw));
205
206 id = LOCKDEBUG_ALLOC(rw, &rwlock_lockops);
207 RW_SETID(rw, id);
208 }
209
210 /*
211 * rw_destroy:
212 *
213 * Tear down a rwlock.
214 */
215 void
216 rw_destroy(krwlock_t *rw)
217 {
218
219 LOCKDEBUG_FREE(rw, RW_GETID(rw));
220 RW_ASSERT(rw, rw->rw_owner == 0);
221 }
222
223 /*
224 * rw_vector_enter:
225 *
226 * Acquire a rwlock.
227 */
228 void
229 rw_vector_enter(krwlock_t *rw, const krw_t op)
230 {
231 uintptr_t owner, incr, need_wait, set_wait, curthread;
232 turnstile_t *ts;
233 int queue;
234 lwp_t *l;
235 LOCKSTAT_TIMER(slptime);
236 LOCKSTAT_FLAG(lsflag);
237
238 l = curlwp;
239 curthread = (uintptr_t)l;
240
241 RW_ASSERT(rw, !cpu_intr_p());
242 RW_ASSERT(rw, curthread != 0);
243 RW_WANTLOCK(rw, op);
244
245 if (panicstr == NULL) {
246 LOCKDEBUG_BARRIER(&kernel_lock, 1);
247 }
248
249 /*
250 * We play a slight trick here. If we're a reader, we want
251 * increment the read count. If we're a writer, we want to
252 * set the owner field and whe WRITE_LOCKED bit.
253 *
254 * In the latter case, we expect those bits to be zero,
255 * therefore we can use an add operation to set them, which
256 * means an add operation for both cases.
257 */
258 if (__predict_true(op == RW_READER)) {
259 incr = RW_READ_INCR;
260 set_wait = RW_HAS_WAITERS;
261 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
262 queue = TS_READER_Q;
263 } else {
264 RW_DASSERT(rw, op == RW_WRITER);
265 incr = curthread | RW_WRITE_LOCKED;
266 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
267 need_wait = RW_WRITE_LOCKED | RW_THREAD;
268 queue = TS_WRITER_Q;
269 }
270
271 LOCKSTAT_ENTER(lsflag);
272
273 for (;;) {
274 /*
275 * Read the lock owner field. If the need-to-wait
276 * indicator is clear, then try to acquire the lock.
277 */
278 owner = rw->rw_owner;
279 if ((owner & need_wait) == 0) {
280 if (RW_ACQUIRE(rw, owner, owner + incr)) {
281 /* Got it! */
282 break;
283 }
284
285 /*
286 * Didn't get it -- spin around again (we'll
287 * probably sleep on the next iteration).
288 */
289 continue;
290 }
291
292 if (panicstr != NULL)
293 return;
294 if (RW_OWNER(rw) == curthread)
295 rw_abort(rw, __FUNCTION__, "locking against myself");
296
297 /*
298 * Grab the turnstile chain lock. Once we have that, we
299 * can adjust the waiter bits and sleep queue.
300 */
301 ts = turnstile_lookup(rw);
302
303 /*
304 * Mark the rwlock as having waiters. If the set fails,
305 * then we may not need to sleep and should spin again.
306 */
307 if (!RW_SET_WAITERS(rw, need_wait, set_wait)) {
308 turnstile_exit(rw);
309 continue;
310 }
311
312 LOCKSTAT_START_TIMER(lsflag, slptime);
313
314 turnstile_block(ts, queue, rw, &rw_syncobj);
315
316 /* If we wake up and arrive here, we've been handed the lock. */
317 RW_RECEIVE(rw);
318
319 LOCKSTAT_STOP_TIMER(lsflag, slptime);
320 LOCKSTAT_EVENT(lsflag, rw,
321 LB_RWLOCK | (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2),
322 1, slptime);
323
324 break;
325 }
326
327 LOCKSTAT_EXIT(lsflag);
328
329 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
330 (op == RW_READER && RW_COUNT(rw) != 0));
331 RW_LOCKED(rw, op);
332 }
333
334 /*
335 * rw_vector_exit:
336 *
337 * Release a rwlock.
338 */
339 void
340 rw_vector_exit(krwlock_t *rw)
341 {
342 uintptr_t curthread, owner, decr, new;
343 turnstile_t *ts;
344 int rcnt, wcnt;
345 lwp_t *l;
346
347 curthread = (uintptr_t)curlwp;
348 RW_ASSERT(rw, curthread != 0);
349
350 if (panicstr != NULL)
351 return;
352
353 /*
354 * Again, we use a trick. Since we used an add operation to
355 * set the required lock bits, we can use a subtract to clear
356 * them, which makes the read-release and write-release path
357 * the same.
358 */
359 owner = rw->rw_owner;
360 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
361 RW_UNLOCKED(rw, RW_WRITER);
362 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
363 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
364 decr = curthread | RW_WRITE_LOCKED;
365 } else {
366 RW_UNLOCKED(rw, RW_READER);
367 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
368 RW_ASSERT(rw, RW_COUNT(rw) != 0);
369 decr = RW_READ_INCR;
370 }
371
372 /*
373 * Compute what we expect the new value of the lock to be. Only
374 * proceed to do direct handoff if there are waiters, and if the
375 * lock would become unowned.
376 */
377 for (;; owner = rw->rw_owner) {
378 new = (owner - decr);
379 if ((new & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
380 break;
381 if (RW_RELEASE(rw, owner, new))
382 return;
383 }
384
385 for (;;) {
386 /*
387 * Grab the turnstile chain lock. This gets the interlock
388 * on the sleep queue. Once we have that, we can adjust the
389 * waiter bits.
390 */
391 ts = turnstile_lookup(rw);
392 RW_DASSERT(rw, ts != NULL);
393 RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
394
395 owner = rw->rw_owner;
396 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
397 rcnt = TS_WAITERS(ts, TS_READER_Q);
398
399 /*
400 * Give the lock away.
401 *
402 * If we are releasing a write lock, then wake all
403 * outstanding readers. If we are releasing a read
404 * lock, then wake one writer.
405 */
406 if (rcnt == 0 || (decr == RW_READ_INCR && wcnt != 0)) {
407 RW_DASSERT(rw, wcnt != 0);
408 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
409
410 /*
411 * Give the lock to the longest waiting
412 * writer.
413 */
414 l = TS_FIRST(ts, TS_WRITER_Q);
415 new = (uintptr_t)l | RW_WRITE_LOCKED;
416
417 if (wcnt > 1)
418 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
419 else if (rcnt != 0)
420 new |= RW_HAS_WAITERS;
421
422 RW_GIVE(rw);
423 if (!RW_RELEASE(rw, owner, new)) {
424 /* Oops, try again. */
425 turnstile_exit(rw);
426 continue;
427 }
428
429 /* Wake the writer. */
430 turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
431 } else {
432 RW_DASSERT(rw, rcnt != 0);
433
434 /*
435 * Give the lock to all blocked readers. If there
436 * is a writer waiting, new readers that arrive
437 * after the release will be blocked out.
438 */
439 new = rcnt << RW_READ_COUNT_SHIFT;
440 if (wcnt != 0)
441 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
442
443 RW_GIVE(rw);
444 if (!RW_RELEASE(rw, owner, new)) {
445 /* Oops, try again. */
446 turnstile_exit(rw);
447 continue;
448 }
449
450 /* Wake up all sleeping readers. */
451 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
452 }
453
454 break;
455 }
456 }
457
458 /*
459 * rw_tryenter:
460 *
461 * Try to acquire a rwlock.
462 */
463 int
464 rw_tryenter(krwlock_t *rw, const krw_t op)
465 {
466 uintptr_t curthread, owner, incr, need_wait;
467
468 curthread = (uintptr_t)curlwp;
469
470 RW_ASSERT(rw, curthread != 0);
471 RW_WANTLOCK(rw, op);
472
473 if (op == RW_READER) {
474 incr = RW_READ_INCR;
475 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
476 } else {
477 RW_DASSERT(rw, op == RW_WRITER);
478 incr = curthread | RW_WRITE_LOCKED;
479 need_wait = RW_WRITE_LOCKED | RW_THREAD;
480 }
481
482 for (;;) {
483 owner = rw->rw_owner;
484 if ((owner & need_wait) == 0) {
485 if (RW_ACQUIRE(rw, owner, owner + incr)) {
486 /* Got it! */
487 break;
488 }
489 continue;
490 }
491 return 0;
492 }
493
494 RW_LOCKED(rw, op);
495 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
496 (op == RW_READER && RW_COUNT(rw) != 0));
497
498 return 1;
499 }
500
501 /*
502 * rw_downgrade:
503 *
504 * Downgrade a write lock to a read lock.
505 */
506 void
507 rw_downgrade(krwlock_t *rw)
508 {
509 uintptr_t owner, curthread, new;
510 turnstile_t *ts;
511 int rcnt, wcnt;
512
513 curthread = (uintptr_t)curlwp;
514 RW_ASSERT(rw, curthread != 0);
515 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
516 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
517 RW_UNLOCKED(rw, RW_WRITER);
518
519 owner = rw->rw_owner;
520 if ((owner & RW_HAS_WAITERS) == 0) {
521 /*
522 * There are no waiters, so we can do this the easy way.
523 * Try swapping us down to one read hold. If it fails, the
524 * lock condition has changed and we most likely now have
525 * waiters.
526 */
527 if (RW_RELEASE(rw, owner, RW_READ_INCR)) {
528 RW_LOCKED(rw, RW_READER);
529 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
530 RW_DASSERT(rw, RW_COUNT(rw) != 0);
531 return;
532 }
533 }
534
535 /*
536 * Grab the turnstile chain lock. This gets the interlock
537 * on the sleep queue. Once we have that, we can adjust the
538 * waiter bits.
539 */
540 for (;;) {
541 ts = turnstile_lookup(rw);
542 RW_DASSERT(rw, ts != NULL);
543
544 owner = rw->rw_owner;
545 rcnt = TS_WAITERS(ts, TS_READER_Q);
546 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
547
548 /*
549 * If there are no readers, just preserve the waiters
550 * bits, swap us down to one read hold and return.
551 */
552 if (rcnt == 0) {
553 RW_DASSERT(rw, wcnt != 0);
554 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
555 RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
556
557 new = RW_READ_INCR | RW_HAS_WAITERS | RW_WRITE_WANTED;
558 if (!RW_RELEASE(rw, owner, new)) {
559 /* Oops, try again. */
560 turnstile_exit(ts);
561 continue;
562 }
563 break;
564 }
565
566 /*
567 * Give the lock to all blocked readers. We may
568 * retain one read hold if downgrading. If there
569 * is a writer waiting, new readers will be blocked
570 * out.
571 */
572 new = (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
573 if (wcnt != 0)
574 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
575
576 RW_GIVE(rw);
577 if (!RW_RELEASE(rw, owner, new)) {
578 /* Oops, try again. */
579 turnstile_exit(rw);
580 continue;
581 }
582
583 /* Wake up all sleeping readers. */
584 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
585 break;
586 }
587
588 RW_LOCKED(rw, RW_READER);
589 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
590 RW_DASSERT(rw, RW_COUNT(rw) != 0);
591 }
592
593 /*
594 * rw_tryupgrade:
595 *
596 * Try to upgrade a read lock to a write lock. We must be the
597 * only reader.
598 */
599 int
600 rw_tryupgrade(krwlock_t *rw)
601 {
602 uintptr_t owner, curthread, new;
603
604 curthread = (uintptr_t)curlwp;
605 RW_ASSERT(rw, curthread != 0);
606 RW_WANTLOCK(rw, RW_WRITER);
607
608 for (;;) {
609 owner = rw->rw_owner;
610 RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
611 if ((owner & RW_THREAD) != RW_READ_INCR) {
612 RW_ASSERT(rw, (owner & RW_THREAD) != 0);
613 return 0;
614 }
615 new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
616 if (RW_ACQUIRE(rw, owner, new))
617 break;
618 }
619
620 RW_UNLOCKED(rw, RW_READER);
621 RW_LOCKED(rw, RW_WRITER);
622 RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
623 RW_DASSERT(rw, RW_OWNER(rw) == curthread);
624
625 return 1;
626 }
627
628 /*
629 * rw_read_held:
630 *
631 * Returns true if the rwlock is held for reading. Must only be
632 * used for diagnostic assertions, and never be used to make
633 * decisions about how to use a rwlock.
634 */
635 int
636 rw_read_held(krwlock_t *rw)
637 {
638 uintptr_t owner;
639
640 if (panicstr != NULL)
641 return 1;
642
643 owner = rw->rw_owner;
644 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
645 }
646
647 /*
648 * rw_write_held:
649 *
650 * Returns true if the rwlock is held for writing. Must only be
651 * used for diagnostic assertions, and never be used to make
652 * decisions about how to use a rwlock.
653 */
654 int
655 rw_write_held(krwlock_t *rw)
656 {
657
658 if (panicstr != NULL)
659 return 1;
660
661 return (rw->rw_owner & RW_WRITE_LOCKED) != 0;
662 }
663
664 /*
665 * rw_lock_held:
666 *
667 * Returns true if the rwlock is held for reading or writing. Must
668 * only be used for diagnostic assertions, and never be used to make
669 * decisions about how to use a rwlock.
670 */
671 int
672 rw_lock_held(krwlock_t *rw)
673 {
674
675 if (panicstr != NULL)
676 return 1;
677
678 return (rw->rw_owner & RW_THREAD) != 0;
679 }
680
681 /*
682 * rw_owner:
683 *
684 * Return the current owner of an RW lock, but only if it is write
685 * held. Used for priority inheritance.
686 */
687 static lwp_t *
688 rw_owner(wchan_t obj)
689 {
690 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
691 uintptr_t owner = rw->rw_owner;
692
693 if ((owner & RW_WRITE_LOCKED) == 0)
694 return NULL;
695
696 return (void *)(owner & RW_THREAD);
697 }
698