kern_rwlock.c revision 1.7 1 /* $NetBSD: kern_rwlock.c,v 1.7 2007/03/30 11:06:58 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Kernel reader/writer lock implementation, modeled after those
41 * found in Solaris, a description of which can be found in:
42 *
43 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
44 * Richard McDougall.
45 */
46
47 #include "opt_multiprocessor.h"
48
49 #include <sys/cdefs.h>
50 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.7 2007/03/30 11:06:58 ad Exp $");
51
52 #define __RWLOCK_PRIVATE
53
54 #include <sys/param.h>
55 #include <sys/proc.h>
56 #include <sys/rwlock.h>
57 #include <sys/sched.h>
58 #include <sys/sleepq.h>
59 #include <sys/systm.h>
60 #include <sys/lockdebug.h>
61
62 #include <dev/lockstat.h>
63
64 #define RW_ABORT(rw, msg) \
65 LOCKDEBUG_ABORT(RW_GETID(rw), rw, &rwlock_lockops, __FUNCTION__, msg)
66
67 /*
68 * LOCKDEBUG
69 */
70
71 #if defined(LOCKDEBUG)
72
73 #define RW_WANTLOCK(rw, op) \
74 LOCKDEBUG_WANTLOCK(RW_GETID(rw), \
75 (uintptr_t)__builtin_return_address(0), op == RW_READER);
76 #define RW_LOCKED(rw, op) \
77 LOCKDEBUG_LOCKED(RW_GETID(rw), \
78 (uintptr_t)__builtin_return_address(0), op == RW_READER);
79 #define RW_UNLOCKED(rw, op) \
80 LOCKDEBUG_UNLOCKED(RW_GETID(rw), \
81 (uintptr_t)__builtin_return_address(0), op == RW_READER);
82 #define RW_DASSERT(rw, cond) \
83 do { \
84 if (!(cond)) \
85 RW_ABORT(rw, "assertion failed: " #cond); \
86 } while (/* CONSTCOND */ 0);
87
88 #else /* LOCKDEBUG */
89
90 #define RW_WANTLOCK(rw, op) /* nothing */
91 #define RW_LOCKED(rw, op) /* nothing */
92 #define RW_UNLOCKED(rw, op) /* nothing */
93 #define RW_DASSERT(rw, cond) /* nothing */
94
95 #endif /* LOCKDEBUG */
96
97 /*
98 * DIAGNOSTIC
99 */
100
101 #if defined(DIAGNOSTIC)
102
103 #define RW_ASSERT(rw, cond) \
104 do { \
105 if (!(cond)) \
106 RW_ABORT(rw, "assertion failed: " #cond); \
107 } while (/* CONSTCOND */ 0)
108
109 #else
110
111 #define RW_ASSERT(rw, cond) /* nothing */
112
113 #endif /* DIAGNOSTIC */
114
115 /*
116 * For platforms that use 'simple' RW locks.
117 */
118 #ifdef __HAVE_SIMPLE_RW_LOCKS
119 #define RW_ACQUIRE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
120 #define RW_RELEASE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
121 #define RW_SETID(rw, id) ((rw)->rw_id = id)
122 #define RW_GETID(rw) ((rw)->rw_id)
123
124 static inline int
125 RW_SET_WAITERS(krwlock_t *rw, uintptr_t need, uintptr_t set)
126 {
127 uintptr_t old;
128
129 if (((old = rw->rw_owner) & need) == 0)
130 return 0;
131 return RW_CAS(&rw->rw_owner, old, old | set);
132 }
133 #endif /* __HAVE_SIMPLE_RW_LOCKS */
134
135 /*
136 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
137 */
138 #ifdef LOCKDEBUG
139 #undef __HAVE_RW_STUBS
140 #endif
141
142 #ifndef __HAVE_RW_STUBS
143 __strong_alias(rw_enter,rw_vector_enter);
144 __strong_alias(rw_exit,rw_vector_exit);
145 #endif
146
147 static void rw_dump(volatile void *);
148 static lwp_t *rw_owner(wchan_t);
149
150 lockops_t rwlock_lockops = {
151 "Reader / writer lock",
152 1,
153 rw_dump
154 };
155
156 syncobj_t rw_syncobj = {
157 SOBJ_SLEEPQ_SORTED,
158 turnstile_unsleep,
159 turnstile_changepri,
160 sleepq_lendpri,
161 rw_owner,
162 };
163
164 /*
165 * rw_dump:
166 *
167 * Dump the contents of a rwlock structure.
168 */
169 void
170 rw_dump(volatile void *cookie)
171 {
172 volatile krwlock_t *rw = cookie;
173
174 printf_nolog("owner/count : %#018lx flags : %#018x\n",
175 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
176 }
177
178 /*
179 * rw_init:
180 *
181 * Initialize a rwlock for use.
182 */
183 void
184 rw_init(krwlock_t *rw)
185 {
186 u_int id;
187
188 memset(rw, 0, sizeof(*rw));
189
190 id = LOCKDEBUG_ALLOC(rw, &rwlock_lockops);
191 RW_SETID(rw, id);
192 }
193
194 /*
195 * rw_destroy:
196 *
197 * Tear down a rwlock.
198 */
199 void
200 rw_destroy(krwlock_t *rw)
201 {
202
203 LOCKDEBUG_FREE(rw, RW_GETID(rw));
204 RW_ASSERT(rw, rw->rw_owner == 0);
205 }
206
207 /*
208 * rw_vector_enter:
209 *
210 * Acquire a rwlock.
211 */
212 void
213 rw_vector_enter(krwlock_t *rw, const krw_t op)
214 {
215 uintptr_t owner, incr, need_wait, set_wait, curthread;
216 turnstile_t *ts;
217 int queue;
218 lwp_t *l;
219 LOCKSTAT_TIMER(slptime);
220 LOCKSTAT_FLAG(lsflag);
221
222 l = curlwp;
223 curthread = (uintptr_t)l;
224
225 RW_ASSERT(rw, curthread != 0);
226 RW_WANTLOCK(rw, op);
227
228 #ifdef LOCKDEBUG
229 if (panicstr == NULL) {
230 simple_lock_only_held(NULL, "rw_enter");
231 LOCKDEBUG_BARRIER(&kernel_lock, 1);
232 }
233 #endif
234
235 /*
236 * We play a slight trick here. If we're a reader, we want
237 * increment the read count. If we're a writer, we want to
238 * set the owner field and whe WRITE_LOCKED bit.
239 *
240 * In the latter case, we expect those bits to be zero,
241 * therefore we can use an add operation to set them, which
242 * means an add operation for both cases.
243 */
244 if (__predict_true(op == RW_READER)) {
245 incr = RW_READ_INCR;
246 set_wait = RW_HAS_WAITERS;
247 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
248 queue = TS_READER_Q;
249 } else {
250 RW_DASSERT(rw, op == RW_WRITER);
251 incr = curthread | RW_WRITE_LOCKED;
252 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
253 need_wait = RW_WRITE_LOCKED | RW_THREAD;
254 queue = TS_WRITER_Q;
255 }
256
257 LOCKSTAT_ENTER(lsflag);
258
259 for (;;) {
260 /*
261 * Read the lock owner field. If the need-to-wait
262 * indicator is clear, then try to acquire the lock.
263 */
264 owner = rw->rw_owner;
265 if ((owner & need_wait) == 0) {
266 if (RW_ACQUIRE(rw, owner, owner + incr)) {
267 /* Got it! */
268 break;
269 }
270
271 /*
272 * Didn't get it -- spin around again (we'll
273 * probably sleep on the next iteration).
274 */
275 continue;
276 }
277
278 if (panicstr != NULL)
279 return;
280 if (RW_OWNER(rw) == curthread)
281 RW_ABORT(rw, "locking against myself");
282
283 /*
284 * Grab the turnstile chain lock. Once we have that, we
285 * can adjust the waiter bits and sleep queue.
286 */
287 ts = turnstile_lookup(rw);
288
289 /*
290 * XXXSMP if this is a high priority LWP (interrupt handler
291 * or realtime) and acquiring a read hold, then we shouldn't
292 * wait for RW_WRITE_WANTED if our priority is >= that of
293 * the highest priority writer that is waiting.
294 */
295
296 /*
297 * Mark the rwlock as having waiters. If the set fails,
298 * then we may not need to sleep and should spin again.
299 */
300 if (!RW_SET_WAITERS(rw, need_wait, set_wait)) {
301 turnstile_exit(rw);
302 continue;
303 }
304
305 LOCKSTAT_START_TIMER(lsflag, slptime);
306
307 turnstile_block(ts, queue, rw, &rw_syncobj);
308
309 /* If we wake up and arrive here, we've been handed the lock. */
310 RW_RECEIVE(rw);
311
312 LOCKSTAT_STOP_TIMER(lsflag, slptime);
313 LOCKSTAT_EVENT(lsflag, rw,
314 LB_RWLOCK | (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2),
315 1, slptime);
316
317 turnstile_unblock();
318 break;
319 }
320
321 LOCKSTAT_EXIT(lsflag);
322
323 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
324 (op == RW_READER && RW_COUNT(rw) != 0));
325 RW_LOCKED(rw, op);
326 }
327
328 /*
329 * rw_vector_exit:
330 *
331 * Release a rwlock.
332 */
333 void
334 rw_vector_exit(krwlock_t *rw)
335 {
336 uintptr_t curthread, owner, decr, new;
337 turnstile_t *ts;
338 int rcnt, wcnt;
339 lwp_t *l;
340
341 curthread = (uintptr_t)curlwp;
342 RW_ASSERT(rw, curthread != 0);
343
344 if (panicstr != NULL) {
345 /*
346 * XXX What's the correct thing to do here? We should at
347 * least release the lock.
348 */
349 return;
350 }
351
352 /*
353 * Again, we use a trick. Since we used an add operation to
354 * set the required lock bits, we can use a subtract to clear
355 * them, which makes the read-release and write-release path
356 * the same.
357 */
358 owner = rw->rw_owner;
359 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
360 RW_UNLOCKED(rw, RW_WRITER);
361 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
362 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
363 decr = curthread | RW_WRITE_LOCKED;
364 } else {
365 RW_UNLOCKED(rw, RW_READER);
366 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
367 RW_ASSERT(rw, RW_COUNT(rw) != 0);
368 decr = RW_READ_INCR;
369 }
370
371 /*
372 * Compute what we expect the new value of the lock to be. Only
373 * proceed to do direct handoff if there are waiters, and if the
374 * lock would become unowned.
375 */
376 for (;; owner = rw->rw_owner) {
377 new = (owner - decr);
378 if ((new & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
379 break;
380 if (RW_RELEASE(rw, owner, new))
381 return;
382 }
383
384 for (;;) {
385 /*
386 * Grab the turnstile chain lock. This gets the interlock
387 * on the sleep queue. Once we have that, we can adjust the
388 * waiter bits.
389 */
390 ts = turnstile_lookup(rw);
391 RW_DASSERT(rw, ts != NULL);
392 RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
393
394 owner = rw->rw_owner;
395 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
396 rcnt = TS_WAITERS(ts, TS_READER_Q);
397
398 /*
399 * Give the lock away.
400 *
401 * If we are releasing a write lock, then wake all
402 * outstanding readers. If we are releasing a read
403 * lock, then wake one writer.
404 */
405 if (rcnt == 0 || (decr == RW_READ_INCR && wcnt != 0)) {
406 RW_DASSERT(rw, wcnt != 0);
407 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
408
409 /*
410 * Give the lock to the longest waiting
411 * writer.
412 */
413 l = TS_FIRST(ts, TS_WRITER_Q);
414 new = (uintptr_t)l | RW_WRITE_LOCKED;
415
416 if (wcnt > 1)
417 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
418 else if (rcnt != 0)
419 new |= RW_HAS_WAITERS;
420
421 RW_GIVE(rw);
422 if (!RW_RELEASE(rw, owner, new)) {
423 /* Oops, try again. */
424 turnstile_exit(rw);
425 continue;
426 }
427
428 /* Wake the writer. */
429 turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
430 } else {
431 RW_DASSERT(rw, rcnt != 0);
432
433 /*
434 * Give the lock to all blocked readers. If there
435 * is a writer waiting, new readers that arrive
436 * after the release will be blocked out.
437 */
438 new = rcnt << RW_READ_COUNT_SHIFT;
439 if (wcnt != 0)
440 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
441
442 RW_GIVE(rw);
443 if (!RW_RELEASE(rw, owner, new)) {
444 /* Oops, try again. */
445 turnstile_exit(rw);
446 continue;
447 }
448
449 /* Wake up all sleeping readers. */
450 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
451 }
452
453 break;
454 }
455 }
456
457 /*
458 * rw_tryenter:
459 *
460 * Try to acquire a rwlock.
461 */
462 int
463 rw_tryenter(krwlock_t *rw, const krw_t op)
464 {
465 uintptr_t curthread, owner, incr, need_wait;
466
467 curthread = (uintptr_t)curlwp;
468
469 RW_ASSERT(rw, curthread != 0);
470 RW_WANTLOCK(rw, op);
471
472 if (op == RW_READER) {
473 incr = RW_READ_INCR;
474 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
475 } else {
476 RW_DASSERT(rw, op == RW_WRITER);
477 incr = curthread | RW_WRITE_LOCKED;
478 need_wait = RW_WRITE_LOCKED | RW_THREAD;
479 }
480
481 for (;;) {
482 owner = rw->rw_owner;
483 if ((owner & need_wait) == 0) {
484 if (RW_ACQUIRE(rw, owner, owner + incr)) {
485 /* Got it! */
486 break;
487 }
488 continue;
489 }
490 return 0;
491 }
492
493 RW_LOCKED(rw, op);
494 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
495 (op == RW_READER && RW_COUNT(rw) != 0));
496
497 return 1;
498 }
499
500 /*
501 * rw_downgrade:
502 *
503 * Downgrade a write lock to a read lock.
504 */
505 void
506 rw_downgrade(krwlock_t *rw)
507 {
508 uintptr_t owner, curthread, new;
509 turnstile_t *ts;
510 int rcnt, wcnt;
511
512 curthread = (uintptr_t)curlwp;
513 RW_ASSERT(rw, curthread != 0);
514 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
515 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
516 RW_UNLOCKED(rw, RW_WRITER);
517
518 owner = rw->rw_owner;
519 if ((owner & RW_HAS_WAITERS) == 0) {
520 /*
521 * There are no waiters, so we can do this the easy way.
522 * Try swapping us down to one read hold. If it fails, the
523 * lock condition has changed and we most likely now have
524 * waiters.
525 */
526 if (RW_RELEASE(rw, owner, RW_READ_INCR)) {
527 RW_LOCKED(rw, RW_READER);
528 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
529 RW_DASSERT(rw, RW_COUNT(rw) != 0);
530 return;
531 }
532 }
533
534 /*
535 * Grab the turnstile chain lock. This gets the interlock
536 * on the sleep queue. Once we have that, we can adjust the
537 * waiter bits.
538 */
539 for (;;) {
540 ts = turnstile_lookup(rw);
541 RW_DASSERT(rw, ts != NULL);
542
543 owner = rw->rw_owner;
544 rcnt = TS_WAITERS(ts, TS_READER_Q);
545 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
546
547 /*
548 * If there are no readers, just preserve the waiters
549 * bits, swap us down to one read hold and return.
550 */
551 if (rcnt == 0) {
552 RW_DASSERT(rw, wcnt != 0);
553 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
554 RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
555
556 new = RW_READ_INCR | RW_HAS_WAITERS | RW_WRITE_WANTED;
557 if (!RW_RELEASE(rw, owner, new)) {
558 /* Oops, try again. */
559 turnstile_exit(ts);
560 continue;
561 }
562 break;
563 }
564
565 /*
566 * Give the lock to all blocked readers. We may
567 * retain one read hold if downgrading. If there
568 * is a writer waiting, new readers will be blocked
569 * out.
570 */
571 new = (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
572 if (wcnt != 0)
573 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
574
575 RW_GIVE(rw);
576 if (!RW_RELEASE(rw, owner, new)) {
577 /* Oops, try again. */
578 turnstile_exit(rw);
579 continue;
580 }
581
582 /* Wake up all sleeping readers. */
583 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
584 break;
585 }
586
587 RW_LOCKED(rw, RW_READER);
588 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
589 RW_DASSERT(rw, RW_COUNT(rw) != 0);
590 }
591
592 /*
593 * rw_tryupgrade:
594 *
595 * Try to upgrade a read lock to a write lock. We must be the
596 * only reader.
597 */
598 int
599 rw_tryupgrade(krwlock_t *rw)
600 {
601 uintptr_t owner, curthread, new;
602
603 curthread = (uintptr_t)curlwp;
604 RW_ASSERT(rw, curthread != 0);
605 RW_WANTLOCK(rw, RW_WRITER);
606
607 for (;;) {
608 owner = rw->rw_owner;
609 RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
610 if ((owner & RW_THREAD) != RW_READ_INCR) {
611 RW_ASSERT(rw, (owner & RW_THREAD) != 0);
612 return 0;
613 }
614 new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
615 if (RW_ACQUIRE(rw, owner, new))
616 break;
617 }
618
619 RW_UNLOCKED(rw, RW_READER);
620 RW_LOCKED(rw, RW_WRITER);
621 RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
622 RW_DASSERT(rw, RW_OWNER(rw) == curthread);
623
624 return 1;
625 }
626
627 /*
628 * rw_read_held:
629 *
630 * Returns true if the rwlock is held for reading. Must only be
631 * used for diagnostic assertions, and never be used to make
632 * decisions about how to use a rwlock.
633 */
634 int
635 rw_read_held(krwlock_t *rw)
636 {
637 uintptr_t owner;
638
639 if (panicstr != NULL)
640 return 1;
641
642 owner = rw->rw_owner;
643 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
644 }
645
646 /*
647 * rw_write_held:
648 *
649 * Returns true if the rwlock is held for writing. Must only be
650 * used for diagnostic assertions, and never be used to make
651 * decisions about how to use a rwlock.
652 */
653 int
654 rw_write_held(krwlock_t *rw)
655 {
656
657 if (panicstr != NULL)
658 return 1;
659
660 return (rw->rw_owner & RW_WRITE_LOCKED) != 0;
661 }
662
663 /*
664 * rw_lock_held:
665 *
666 * Returns true if the rwlock is held for reading or writing. Must
667 * only be used for diagnostic assertions, and never be used to make
668 * decisions about how to use a rwlock.
669 */
670 int
671 rw_lock_held(krwlock_t *rw)
672 {
673
674 if (panicstr != NULL)
675 return 1;
676
677 return (rw->rw_owner & RW_THREAD) != 0;
678 }
679
680 /*
681 * rw_owner:
682 *
683 * Return the current owner of an RW lock, but only if it is write
684 * held. Used for priority inheritance.
685 */
686 static lwp_t *
687 rw_owner(wchan_t obj)
688 {
689 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
690 uintptr_t owner = rw->rw_owner;
691
692 if ((owner & RW_WRITE_LOCKED) == 0)
693 return NULL;
694
695 return (void *)(owner & RW_THREAD);
696 }
697