kern_rwlock.c revision 1.1.18.1 1 /* $NetBSD: kern_rwlock.c,v 1.1.18.1 2007/02/26 09:11:10 yamt Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Kernel reader/writer lock implementation, modeled after those
41 * found in Solaris, a description of which can be found in:
42 *
43 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
44 * Richard McDougall.
45 */
46
47 #include "opt_multiprocessor.h"
48
49 #include <sys/cdefs.h>
50 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.1.18.1 2007/02/26 09:11:10 yamt Exp $");
51
52 #define __RWLOCK_PRIVATE
53
54 #include <sys/param.h>
55 #include <sys/proc.h>
56 #include <sys/rwlock.h>
57 #include <sys/sched.h>
58 #include <sys/sleepq.h>
59 #include <sys/systm.h>
60 #include <sys/lockdebug.h>
61
62 #include <dev/lockstat.h>
63
64 #define RW_ABORT(rw, msg) \
65 LOCKDEBUG_ABORT(RW_GETID(rw), rw, &rwlock_lockops, __FUNCTION__, msg)
66
67 /*
68 * LOCKDEBUG
69 */
70
71 #if defined(LOCKDEBUG)
72
73 #define RW_WANTLOCK(rw, op) \
74 LOCKDEBUG_WANTLOCK(RW_GETID(rw), \
75 (uintptr_t)__builtin_return_address(0), op == RW_READER);
76 #define RW_LOCKED(rw, op) \
77 LOCKDEBUG_LOCKED(RW_GETID(rw), \
78 (uintptr_t)__builtin_return_address(0), op == RW_READER);
79 #define RW_UNLOCKED(rw, op) \
80 LOCKDEBUG_UNLOCKED(RW_GETID(rw), \
81 (uintptr_t)__builtin_return_address(0), op == RW_READER);
82 #define RW_DASSERT(rw, cond) \
83 do { \
84 if (!(cond)) \
85 RW_ABORT(rw, "assertion failed: " #cond); \
86 } while (/* CONSTCOND */ 0);
87
88 #else /* LOCKDEBUG */
89
90 #define RW_WANTLOCK(rw, op) /* nothing */
91 #define RW_LOCKED(rw, op) /* nothing */
92 #define RW_UNLOCKED(rw, op) /* nothing */
93 #define RW_DASSERT(rw, cond) /* nothing */
94
95 #endif /* LOCKDEBUG */
96
97 /*
98 * DIAGNOSTIC
99 */
100
101 #if defined(DIAGNOSTIC)
102
103 #define RW_ASSERT(rw, cond) \
104 do { \
105 if (!(cond)) \
106 RW_ABORT(rw, "assertion failed: " #cond); \
107 } while (/* CONSTCOND */ 0)
108
109 #else
110
111 #define RW_ASSERT(rw, cond) /* nothing */
112
113 #endif /* DIAGNOSTIC */
114
115 /*
116 * For platforms that use 'simple' RW locks.
117 */
118 #ifdef __HAVE_SIMPLE_RW_LOCKS
119 #define RW_ACQUIRE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
120 #define RW_RELEASE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
121 #define RW_SETID(rw, id) ((rw)->rw_id = id)
122 #define RW_GETID(rw) ((rw)->rw_id)
123
124 static inline int
125 RW_SET_WAITERS(krwlock_t *rw, uintptr_t need, uintptr_t set)
126 {
127 uintptr_t old;
128
129 if (((old = rw->rw_owner) & need) == 0)
130 return 0;
131 return RW_CAS(&rw->rw_owner, old, old | set);
132 }
133 #endif /* __HAVE_SIMPLE_RW_LOCKS */
134
135 /*
136 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
137 */
138 #ifdef LOCKDEBUG
139 #undef __HAVE_RW_STUBS
140 #endif
141
142 #ifndef __HAVE_RW_STUBS
143 __strong_alias(rw_enter, rw_vector_enter);
144 __strong_alias(rw_exit, rw_vector_exit);
145 #endif
146
147 void rw_dump(volatile void *);
148
149 lockops_t rwlock_lockops = {
150 "Reader / writer lock",
151 1,
152 rw_dump
153 };
154
155 /*
156 * rw_dump:
157 *
158 * Dump the contents of a rwlock structure.
159 */
160 void
161 rw_dump(volatile void *cookie)
162 {
163 volatile krwlock_t *rw = cookie;
164
165 printf_nolog("owner/count : %#018lx flags : %#018x\n",
166 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
167 }
168
169 /*
170 * rw_init:
171 *
172 * Initialize a rwlock for use.
173 */
174 void
175 rw_init(krwlock_t *rw)
176 {
177 u_int id;
178
179 memset(rw, 0, sizeof(*rw));
180
181 id = LOCKDEBUG_ALLOC(rw, &rwlock_lockops);
182 RW_SETID(rw, id);
183 }
184
185 /*
186 * rw_destroy:
187 *
188 * Tear down a rwlock.
189 */
190 void
191 rw_destroy(krwlock_t *rw)
192 {
193
194 LOCKDEBUG_FREE(rw, RW_GETID(rw));
195 RW_ASSERT(rw, rw->rw_owner == 0);
196 }
197
198 /*
199 * rw_vector_enter:
200 *
201 * Acquire a rwlock.
202 */
203 void
204 rw_vector_enter(krwlock_t *rw, const krw_t op)
205 {
206 uintptr_t owner, incr, need_wait, set_wait, curthread;
207 turnstile_t *ts;
208 int queue;
209 struct lwp *l;
210 LOCKSTAT_TIMER(slptime);
211 LOCKSTAT_FLAG(lsflag);
212
213 l = curlwp;
214 curthread = (uintptr_t)l;
215
216 RW_ASSERT(rw, curthread != 0);
217 RW_WANTLOCK(rw, op);
218
219 #ifdef LOCKDEBUG
220 if (panicstr == NULL) {
221 simple_lock_only_held(NULL, "rw_enter");
222 #ifdef MULTIPROCESSOR
223 LOCKDEBUG_BARRIER(&kernel_lock, 1);
224 #else
225 LOCKDEBUG_BARRIER(NULL, 1);
226 #endif
227 }
228 #endif
229
230 /*
231 * We play a slight trick here. If we're a reader, we want
232 * increment the read count. If we're a writer, we want to
233 * set the owner field and whe WRITE_LOCKED bit.
234 *
235 * In the latter case, we expect those bits to be zero,
236 * therefore we can use an add operation to set them, which
237 * means an add operation for both cases.
238 */
239 if (__predict_true(op == RW_READER)) {
240 incr = RW_READ_INCR;
241 set_wait = RW_HAS_WAITERS;
242 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
243 queue = TS_READER_Q;
244 } else {
245 RW_DASSERT(rw, op == RW_WRITER);
246 incr = curthread | RW_WRITE_LOCKED;
247 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
248 need_wait = RW_WRITE_LOCKED | RW_THREAD;
249 queue = TS_WRITER_Q;
250 }
251
252 LOCKSTAT_ENTER(lsflag);
253
254 for (;;) {
255 /*
256 * Read the lock owner field. If the need-to-wait
257 * indicator is clear, then try to acquire the lock.
258 */
259 owner = rw->rw_owner;
260 if ((owner & need_wait) == 0) {
261 if (RW_ACQUIRE(rw, owner, owner + incr)) {
262 /* Got it! */
263 break;
264 }
265
266 /*
267 * Didn't get it -- spin around again (we'll
268 * probably sleep on the next iteration).
269 */
270 continue;
271 }
272
273 if (panicstr != NULL)
274 return;
275 if (RW_OWNER(rw) == curthread)
276 RW_ABORT(rw, "locking against myself");
277
278 /*
279 * Grab the turnstile chain lock. Once we have that, we
280 * can adjust the waiter bits and sleep queue.
281 */
282 ts = turnstile_lookup(rw);
283
284 /*
285 * XXXSMP if this is a high priority LWP (interrupt handler
286 * or realtime) and acquiring a read hold, then we shouldn't
287 * wait for RW_WRITE_WANTED if our priority is >= that of
288 * the highest priority writer that is waiting.
289 */
290
291 /*
292 * Mark the rwlock as having waiters. If the set fails,
293 * then we may not need to sleep and should spin again.
294 */
295 if (!RW_SET_WAITERS(rw, need_wait, set_wait)) {
296 turnstile_exit(rw);
297 continue;
298 }
299
300 LOCKSTAT_START_TIMER(lsflag, slptime);
301
302 turnstile_block(ts, queue, rw);
303
304 /* If we wake up and arrive here, we've been handed the lock. */
305 RW_RECEIVE(rw);
306
307 LOCKSTAT_STOP_TIMER(lsflag, slptime);
308 LOCKSTAT_EVENT(lsflag, rw,
309 LB_RWLOCK | (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2),
310 1, slptime);
311
312 turnstile_unblock();
313 break;
314 }
315
316 LOCKSTAT_EXIT(lsflag);
317
318 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
319 (op == RW_READER && RW_COUNT(rw) != 0));
320 RW_LOCKED(rw, op);
321 }
322
323 /*
324 * rw_vector_exit:
325 *
326 * Release a rwlock.
327 */
328 void
329 rw_vector_exit(krwlock_t *rw)
330 {
331 uintptr_t curthread, owner, decr, new;
332 turnstile_t *ts;
333 int rcnt, wcnt;
334 struct lwp *l;
335
336 curthread = (uintptr_t)curlwp;
337 RW_ASSERT(rw, curthread != 0);
338
339 if (panicstr != NULL) {
340 /*
341 * XXX What's the correct thing to do here? We should at
342 * least release the lock.
343 */
344 return;
345 }
346
347 /*
348 * Again, we use a trick. Since we used an add operation to
349 * set the required lock bits, we can use a subtract to clear
350 * them, which makes the read-release and write-release path
351 * the same.
352 */
353 owner = rw->rw_owner;
354 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
355 RW_UNLOCKED(rw, RW_WRITER);
356 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
357 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
358 decr = curthread | RW_WRITE_LOCKED;
359 } else {
360 RW_UNLOCKED(rw, RW_READER);
361 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
362 RW_ASSERT(rw, RW_COUNT(rw) != 0);
363 decr = RW_READ_INCR;
364 }
365
366 /*
367 * Compute what we expect the new value of the lock to be. Only
368 * proceed to do direct handoff if there are waiters, and if the
369 * lock would become unowned.
370 */
371 for (;; owner = rw->rw_owner) {
372 new = (owner - decr);
373 if ((new & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
374 break;
375 if (RW_RELEASE(rw, owner, new))
376 return;
377 }
378
379 for (;;) {
380 /*
381 * Grab the turnstile chain lock. This gets the interlock
382 * on the sleep queue. Once we have that, we can adjust the
383 * waiter bits.
384 */
385 ts = turnstile_lookup(rw);
386 RW_DASSERT(rw, ts != NULL);
387 RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
388
389 owner = rw->rw_owner;
390 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
391 rcnt = TS_WAITERS(ts, TS_READER_Q);
392
393 /*
394 * Give the lock away.
395 *
396 * If we are releasing a write lock, then wake all
397 * outstanding readers. If we are releasing a read
398 * lock, then wake one writer.
399 */
400 if (rcnt == 0 || (decr == RW_READ_INCR && wcnt != 0)) {
401 RW_DASSERT(rw, wcnt != 0);
402 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
403
404 /*
405 * Give the lock to the longest waiting
406 * writer.
407 */
408 l = TS_FIRST(ts, TS_WRITER_Q);
409 new = (uintptr_t)l | RW_WRITE_LOCKED;
410
411 if (wcnt > 1)
412 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
413 else if (rcnt != 0)
414 new |= RW_HAS_WAITERS;
415
416 RW_GIVE(rw);
417 if (!RW_RELEASE(rw, owner, new)) {
418 /* Oops, try again. */
419 turnstile_exit(rw);
420 continue;
421 }
422
423 /* Wake the writer. */
424 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, l);
425 } else {
426 RW_DASSERT(rw, rcnt != 0);
427
428 /*
429 * Give the lock to all blocked readers. If there
430 * is a writer waiting, new readers that arrive
431 * after the release will be blocked out.
432 */
433 new = rcnt << RW_READ_COUNT_SHIFT;
434 if (wcnt != 0)
435 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
436
437 RW_GIVE(rw);
438 if (!RW_RELEASE(rw, owner, new)) {
439 /* Oops, try again. */
440 turnstile_exit(rw);
441 continue;
442 }
443
444 /* Wake up all sleeping readers. */
445 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
446 }
447
448 break;
449 }
450 }
451
452 /*
453 * rw_tryenter:
454 *
455 * Try to acquire a rwlock.
456 */
457 int
458 rw_tryenter(krwlock_t *rw, const krw_t op)
459 {
460 uintptr_t curthread, owner, incr, need_wait;
461
462 curthread = (uintptr_t)curlwp;
463
464 RW_ASSERT(rw, curthread != 0);
465 RW_WANTLOCK(rw, op);
466
467 if (op == RW_READER) {
468 incr = RW_READ_INCR;
469 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
470 } else {
471 RW_DASSERT(rw, op == RW_WRITER);
472 incr = curthread | RW_WRITE_LOCKED;
473 need_wait = RW_WRITE_LOCKED | RW_THREAD;
474 }
475
476 for (;;) {
477 owner = rw->rw_owner;
478 if ((owner & need_wait) == 0) {
479 if (RW_ACQUIRE(rw, owner, owner + incr)) {
480 /* Got it! */
481 break;
482 }
483 continue;
484 }
485 return 0;
486 }
487
488 RW_LOCKED(rw, op);
489 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
490 (op == RW_READER && RW_COUNT(rw) != 0));
491 return 1;
492 }
493
494 /*
495 * rw_downgrade:
496 *
497 * Downgrade a write lock to a read lock.
498 */
499 void
500 rw_downgrade(krwlock_t *rw)
501 {
502 uintptr_t owner, curthread, new;
503 turnstile_t *ts;
504 int rcnt, wcnt;
505
506 curthread = (uintptr_t)curlwp;
507 RW_ASSERT(rw, curthread != 0);
508 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
509 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
510 RW_UNLOCKED(rw, RW_WRITER);
511
512 owner = rw->rw_owner;
513 if ((owner & RW_HAS_WAITERS) == 0) {
514 /*
515 * There are no waiters, so we can do this the easy way.
516 * Try swapping us down to one read hold. If it fails, the
517 * lock condition has changed and we most likely now have
518 * waiters.
519 */
520 if (RW_RELEASE(rw, owner, RW_READ_INCR)) {
521 RW_LOCKED(rw, RW_READER);
522 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
523 RW_DASSERT(rw, RW_COUNT(rw) != 0);
524 return;
525 }
526 }
527
528 /*
529 * Grab the turnstile chain lock. This gets the interlock
530 * on the sleep queue. Once we have that, we can adjust the
531 * waiter bits.
532 */
533 for (;;) {
534 ts = turnstile_lookup(rw);
535 RW_DASSERT(rw, ts != NULL);
536
537 owner = rw->rw_owner;
538 rcnt = TS_WAITERS(ts, TS_READER_Q);
539 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
540
541 /*
542 * If there are no readers, just preserve the waiters
543 * bits, swap us down to one read hold and return.
544 */
545 if (rcnt == 0) {
546 RW_DASSERT(rw, wcnt != 0);
547 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
548 RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
549
550 new = RW_READ_INCR | RW_HAS_WAITERS | RW_WRITE_WANTED;
551 if (!RW_RELEASE(rw, owner, new)) {
552 /* Oops, try again. */
553 turnstile_exit(ts);
554 continue;
555 }
556 break;
557 }
558
559 /*
560 * Give the lock to all blocked readers. We may
561 * retain one read hold if downgrading. If there
562 * is a writer waiting, new readers will be blocked
563 * out.
564 */
565 new = (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
566 if (wcnt != 0)
567 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
568
569 RW_GIVE(rw);
570 if (!RW_RELEASE(rw, owner, new)) {
571 /* Oops, try again. */
572 turnstile_exit(rw);
573 continue;
574 }
575
576 /* Wake up all sleeping readers. */
577 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
578 break;
579 }
580
581 RW_LOCKED(rw, RW_READER);
582 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
583 RW_DASSERT(rw, RW_COUNT(rw) != 0);
584 }
585
586 /*
587 * rw_tryupgrade:
588 *
589 * Try to upgrade a read lock to a write lock. We must be the
590 * only reader.
591 */
592 int
593 rw_tryupgrade(krwlock_t *rw)
594 {
595 uintptr_t owner, curthread, new;
596
597 curthread = (uintptr_t)curlwp;
598 RW_ASSERT(rw, curthread != 0);
599 RW_WANTLOCK(rw, RW_WRITER);
600
601 for (;;) {
602 owner = rw->rw_owner;
603 RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
604 if ((owner & RW_THREAD) != RW_READ_INCR) {
605 RW_ASSERT(rw, (owner & RW_THREAD) != 0);
606 return 0;
607 }
608 new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
609 if (RW_ACQUIRE(rw, owner, new))
610 break;
611 }
612
613 RW_UNLOCKED(rw, RW_READER);
614 RW_LOCKED(rw, RW_WRITER);
615 RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
616 RW_DASSERT(rw, RW_OWNER(rw) == curthread);
617
618 return 1;
619 }
620
621 /*
622 * rw_read_held:
623 *
624 * Returns true if the rwlock is held for reading. Must only be
625 * used for diagnostic assertions, and never be used to make
626 * decisions about how to use a rwlock.
627 */
628 int
629 rw_read_held(krwlock_t *rw)
630 {
631 uintptr_t owner;
632
633 if (panicstr != NULL)
634 return 1;
635
636 owner = rw->rw_owner;
637 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
638 }
639
640 /*
641 * rw_write_held:
642 *
643 * Returns true if the rwlock is held for writing. Must only be
644 * used for diagnostic assertions, and never be used to make
645 * decisions about how to use a rwlock.
646 */
647 int
648 rw_write_held(krwlock_t *rw)
649 {
650
651 if (panicstr != NULL)
652 return 1;
653
654 return (rw->rw_owner & RW_WRITE_LOCKED) != 0;
655 }
656
657 /*
658 * rw_lock_held:
659 *
660 * Returns true if the rwlock is held for reading or writing. Must
661 * only be used for diagnostic assertions, and never be used to make
662 * decisions about how to use a rwlock.
663 */
664 int
665 rw_lock_held(krwlock_t *rw)
666 {
667
668 if (panicstr != NULL)
669 return 1;
670
671 return (rw->rw_owner & RW_THREAD) != 0;
672 }
673