kern_rwlock.c revision 1.6.2.4 1 /* $NetBSD: kern_rwlock.c,v 1.6.2.4 2007/07/15 22:17:08 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Kernel reader/writer lock implementation, modeled after those
41 * found in Solaris, a description of which can be found in:
42 *
43 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
44 * Richard McDougall.
45 */
46
47 #include <sys/cdefs.h>
48 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.6.2.4 2007/07/15 22:17:08 ad Exp $");
49
50 #define __RWLOCK_PRIVATE
51
52 #include <sys/param.h>
53 #include <sys/proc.h>
54 #include <sys/rwlock.h>
55 #include <sys/sched.h>
56 #include <sys/sleepq.h>
57 #include <sys/systm.h>
58 #include <sys/lockdebug.h>
59 #include <sys/cpu.h>
60
61 #include <dev/lockstat.h>
62
63 #define RW_ABORT(rw, msg) \
64 LOCKDEBUG_ABORT(RW_GETID(rw), rw, &rwlock_lockops, __FUNCTION__, msg)
65
66 /*
67 * LOCKDEBUG
68 */
69
70 #if defined(LOCKDEBUG)
71
72 #define RW_WANTLOCK(rw, op) \
73 LOCKDEBUG_WANTLOCK(RW_GETID(rw), \
74 (uintptr_t)__builtin_return_address(0), op == RW_READER);
75 #define RW_LOCKED(rw, op) \
76 LOCKDEBUG_LOCKED(RW_GETID(rw), \
77 (uintptr_t)__builtin_return_address(0), op == RW_READER);
78 #define RW_UNLOCKED(rw, op) \
79 LOCKDEBUG_UNLOCKED(RW_GETID(rw), \
80 (uintptr_t)__builtin_return_address(0), op == RW_READER);
81 #define RW_DASSERT(rw, cond) \
82 do { \
83 if (!(cond)) \
84 RW_ABORT(rw, "assertion failed: " #cond); \
85 } while (/* CONSTCOND */ 0);
86
87 #else /* LOCKDEBUG */
88
89 #define RW_WANTLOCK(rw, op) /* nothing */
90 #define RW_LOCKED(rw, op) /* nothing */
91 #define RW_UNLOCKED(rw, op) /* nothing */
92 #define RW_DASSERT(rw, cond) /* nothing */
93
94 #endif /* LOCKDEBUG */
95
96 /*
97 * DIAGNOSTIC
98 */
99
100 #if defined(DIAGNOSTIC)
101
102 #define RW_ASSERT(rw, cond) \
103 do { \
104 if (!(cond)) \
105 RW_ABORT(rw, "assertion failed: " #cond); \
106 } while (/* CONSTCOND */ 0)
107
108 #else
109
110 #define RW_ASSERT(rw, cond) /* nothing */
111
112 #endif /* DIAGNOSTIC */
113
114 /*
115 * For platforms that use 'simple' RW locks.
116 */
117 #ifdef __HAVE_SIMPLE_RW_LOCKS
118 #define RW_ACQUIRE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
119 #define RW_RELEASE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
120 #define RW_SETID(rw, id) ((rw)->rw_id = id)
121 #define RW_GETID(rw) ((rw)->rw_id)
122
123 static inline int
124 RW_SET_WAITERS(krwlock_t *rw, uintptr_t need, uintptr_t set)
125 {
126 uintptr_t old;
127
128 if (((old = rw->rw_owner) & need) == 0)
129 return 0;
130 return RW_CAS(&rw->rw_owner, old, old | set);
131 }
132 #endif /* __HAVE_SIMPLE_RW_LOCKS */
133
134 /*
135 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
136 */
137 #ifdef LOCKDEBUG
138 #undef __HAVE_RW_STUBS
139 #endif
140
141 #ifndef __HAVE_RW_STUBS
142 __strong_alias(rw_enter,rw_vector_enter);
143 __strong_alias(rw_exit,rw_vector_exit);
144 #endif
145
146 static void rw_dump(volatile void *);
147 static lwp_t *rw_owner(wchan_t);
148
149 lockops_t rwlock_lockops = {
150 "Reader / writer lock",
151 1,
152 rw_dump
153 };
154
155 syncobj_t rw_syncobj = {
156 SOBJ_SLEEPQ_SORTED,
157 turnstile_unsleep,
158 turnstile_changepri,
159 sleepq_lendpri,
160 rw_owner,
161 };
162
163 /*
164 * rw_dump:
165 *
166 * Dump the contents of a rwlock structure.
167 */
168 void
169 rw_dump(volatile void *cookie)
170 {
171 volatile krwlock_t *rw = cookie;
172
173 printf_nolog("owner/count : %#018lx flags : %#018x\n",
174 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
175 }
176
177 /*
178 * rw_init:
179 *
180 * Initialize a rwlock for use.
181 */
182 void
183 rw_init(krwlock_t *rw)
184 {
185 u_int id;
186
187 memset(rw, 0, sizeof(*rw));
188
189 id = LOCKDEBUG_ALLOC(rw, &rwlock_lockops);
190 RW_SETID(rw, id);
191 }
192
193 /*
194 * rw_destroy:
195 *
196 * Tear down a rwlock.
197 */
198 void
199 rw_destroy(krwlock_t *rw)
200 {
201
202 LOCKDEBUG_FREE(rw, RW_GETID(rw));
203 RW_ASSERT(rw, rw->rw_owner == 0);
204 }
205
206 /*
207 * rw_vector_enter:
208 *
209 * Acquire a rwlock.
210 */
211 void
212 rw_vector_enter(krwlock_t *rw, const krw_t op)
213 {
214 uintptr_t owner, incr, need_wait, set_wait, curthread;
215 turnstile_t *ts;
216 int queue;
217 lwp_t *l;
218 LOCKSTAT_TIMER(slptime);
219 LOCKSTAT_FLAG(lsflag);
220
221 l = curlwp;
222 curthread = (uintptr_t)l;
223
224 RW_ASSERT(rw, !cpu_intr_p());
225 RW_ASSERT(rw, curthread != 0);
226 RW_WANTLOCK(rw, op);
227
228 if (panicstr == NULL) {
229 LOCKDEBUG_BARRIER(&kernel_lock, 1);
230 }
231
232 /*
233 * We play a slight trick here. If we're a reader, we want
234 * increment the read count. If we're a writer, we want to
235 * set the owner field and whe WRITE_LOCKED bit.
236 *
237 * In the latter case, we expect those bits to be zero,
238 * therefore we can use an add operation to set them, which
239 * means an add operation for both cases.
240 */
241 if (__predict_true(op == RW_READER)) {
242 incr = RW_READ_INCR;
243 set_wait = RW_HAS_WAITERS;
244 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
245 queue = TS_READER_Q;
246 } else {
247 RW_DASSERT(rw, op == RW_WRITER);
248 incr = curthread | RW_WRITE_LOCKED;
249 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
250 need_wait = RW_WRITE_LOCKED | RW_THREAD;
251 queue = TS_WRITER_Q;
252 }
253
254 LOCKSTAT_ENTER(lsflag);
255
256 for (;;) {
257 /*
258 * Read the lock owner field. If the need-to-wait
259 * indicator is clear, then try to acquire the lock.
260 */
261 owner = rw->rw_owner;
262 if ((owner & need_wait) == 0) {
263 if (RW_ACQUIRE(rw, owner, owner + incr)) {
264 /* Got it! */
265 break;
266 }
267
268 /*
269 * Didn't get it -- spin around again (we'll
270 * probably sleep on the next iteration).
271 */
272 continue;
273 }
274
275 if (panicstr != NULL)
276 return;
277 if (RW_OWNER(rw) == curthread)
278 RW_ABORT(rw, "locking against myself");
279
280 /*
281 * Grab the turnstile chain lock. Once we have that, we
282 * can adjust the waiter bits and sleep queue.
283 */
284 ts = turnstile_lookup(rw);
285
286 /*
287 * XXXSMP if this is a high priority LWP (interrupt handler
288 * or realtime) and acquiring a read hold, then we shouldn't
289 * wait for RW_WRITE_WANTED if our priority is >= that of
290 * the highest priority writer that is waiting.
291 */
292
293 /*
294 * Mark the rwlock as having waiters. If the set fails,
295 * then we may not need to sleep and should spin again.
296 */
297 if (!RW_SET_WAITERS(rw, need_wait, set_wait)) {
298 turnstile_exit(rw);
299 continue;
300 }
301
302 LOCKSTAT_START_TIMER(lsflag, slptime);
303
304 turnstile_block(ts, queue, rw, &rw_syncobj);
305
306 /* If we wake up and arrive here, we've been handed the lock. */
307 RW_RECEIVE(rw);
308
309 LOCKSTAT_STOP_TIMER(lsflag, slptime);
310 LOCKSTAT_EVENT(lsflag, rw,
311 LB_RWLOCK | (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2),
312 1, slptime);
313
314 break;
315 }
316
317 LOCKSTAT_EXIT(lsflag);
318
319 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
320 (op == RW_READER && RW_COUNT(rw) != 0));
321 RW_LOCKED(rw, op);
322 }
323
324 /*
325 * rw_vector_exit:
326 *
327 * Release a rwlock.
328 */
329 void
330 rw_vector_exit(krwlock_t *rw)
331 {
332 uintptr_t curthread, owner, decr, new;
333 turnstile_t *ts;
334 int rcnt, wcnt;
335 lwp_t *l;
336
337 curthread = (uintptr_t)curlwp;
338 RW_ASSERT(rw, curthread != 0);
339
340 if (panicstr != NULL) {
341 /*
342 * XXX What's the correct thing to do here? We should at
343 * least release the lock.
344 */
345 return;
346 }
347
348 /*
349 * Again, we use a trick. Since we used an add operation to
350 * set the required lock bits, we can use a subtract to clear
351 * them, which makes the read-release and write-release path
352 * the same.
353 */
354 owner = rw->rw_owner;
355 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
356 RW_UNLOCKED(rw, RW_WRITER);
357 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
358 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
359 decr = curthread | RW_WRITE_LOCKED;
360 } else {
361 RW_UNLOCKED(rw, RW_READER);
362 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
363 RW_ASSERT(rw, RW_COUNT(rw) != 0);
364 decr = RW_READ_INCR;
365 }
366
367 /*
368 * Compute what we expect the new value of the lock to be. Only
369 * proceed to do direct handoff if there are waiters, and if the
370 * lock would become unowned.
371 */
372 for (;; owner = rw->rw_owner) {
373 new = (owner - decr);
374 if ((new & (RW_THREAD | RW_HAS_WAITERS)) == RW_HAS_WAITERS)
375 break;
376 if (RW_RELEASE(rw, owner, new))
377 return;
378 }
379
380 for (;;) {
381 /*
382 * Grab the turnstile chain lock. This gets the interlock
383 * on the sleep queue. Once we have that, we can adjust the
384 * waiter bits.
385 */
386 ts = turnstile_lookup(rw);
387 RW_DASSERT(rw, ts != NULL);
388 RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
389
390 owner = rw->rw_owner;
391 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
392 rcnt = TS_WAITERS(ts, TS_READER_Q);
393
394 /*
395 * Give the lock away.
396 *
397 * If we are releasing a write lock, then wake all
398 * outstanding readers. If we are releasing a read
399 * lock, then wake one writer.
400 */
401 if (rcnt == 0 || (decr == RW_READ_INCR && wcnt != 0)) {
402 RW_DASSERT(rw, wcnt != 0);
403 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
404
405 /*
406 * Give the lock to the longest waiting
407 * writer.
408 */
409 l = TS_FIRST(ts, TS_WRITER_Q);
410 new = (uintptr_t)l | RW_WRITE_LOCKED;
411
412 if (wcnt > 1)
413 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
414 else if (rcnt != 0)
415 new |= RW_HAS_WAITERS;
416
417 RW_GIVE(rw);
418 if (!RW_RELEASE(rw, owner, new)) {
419 /* Oops, try again. */
420 turnstile_exit(rw);
421 continue;
422 }
423
424 /* Wake the writer. */
425 turnstile_wakeup(ts, TS_WRITER_Q, 1, l);
426 } else {
427 RW_DASSERT(rw, rcnt != 0);
428
429 /*
430 * Give the lock to all blocked readers. If there
431 * is a writer waiting, new readers that arrive
432 * after the release will be blocked out.
433 */
434 new = rcnt << RW_READ_COUNT_SHIFT;
435 if (wcnt != 0)
436 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
437
438 RW_GIVE(rw);
439 if (!RW_RELEASE(rw, owner, new)) {
440 /* Oops, try again. */
441 turnstile_exit(rw);
442 continue;
443 }
444
445 /* Wake up all sleeping readers. */
446 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
447 }
448
449 break;
450 }
451 }
452
453 /*
454 * rw_tryenter:
455 *
456 * Try to acquire a rwlock.
457 */
458 int
459 rw_tryenter(krwlock_t *rw, const krw_t op)
460 {
461 uintptr_t curthread, owner, incr, need_wait;
462
463 curthread = (uintptr_t)curlwp;
464
465 RW_ASSERT(rw, curthread != 0);
466 RW_WANTLOCK(rw, op);
467
468 if (op == RW_READER) {
469 incr = RW_READ_INCR;
470 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
471 } else {
472 RW_DASSERT(rw, op == RW_WRITER);
473 incr = curthread | RW_WRITE_LOCKED;
474 need_wait = RW_WRITE_LOCKED | RW_THREAD;
475 }
476
477 for (;;) {
478 owner = rw->rw_owner;
479 if ((owner & need_wait) == 0) {
480 if (RW_ACQUIRE(rw, owner, owner + incr)) {
481 /* Got it! */
482 break;
483 }
484 continue;
485 }
486 return 0;
487 }
488
489 RW_LOCKED(rw, op);
490 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
491 (op == RW_READER && RW_COUNT(rw) != 0));
492
493 return 1;
494 }
495
496 /*
497 * rw_downgrade:
498 *
499 * Downgrade a write lock to a read lock.
500 */
501 void
502 rw_downgrade(krwlock_t *rw)
503 {
504 uintptr_t owner, curthread, new;
505 turnstile_t *ts;
506 int rcnt, wcnt;
507
508 curthread = (uintptr_t)curlwp;
509 RW_ASSERT(rw, curthread != 0);
510 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
511 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
512 RW_UNLOCKED(rw, RW_WRITER);
513
514 owner = rw->rw_owner;
515 if ((owner & RW_HAS_WAITERS) == 0) {
516 /*
517 * There are no waiters, so we can do this the easy way.
518 * Try swapping us down to one read hold. If it fails, the
519 * lock condition has changed and we most likely now have
520 * waiters.
521 */
522 if (RW_RELEASE(rw, owner, RW_READ_INCR)) {
523 RW_LOCKED(rw, RW_READER);
524 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
525 RW_DASSERT(rw, RW_COUNT(rw) != 0);
526 return;
527 }
528 }
529
530 /*
531 * Grab the turnstile chain lock. This gets the interlock
532 * on the sleep queue. Once we have that, we can adjust the
533 * waiter bits.
534 */
535 for (;;) {
536 ts = turnstile_lookup(rw);
537 RW_DASSERT(rw, ts != NULL);
538
539 owner = rw->rw_owner;
540 rcnt = TS_WAITERS(ts, TS_READER_Q);
541 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
542
543 /*
544 * If there are no readers, just preserve the waiters
545 * bits, swap us down to one read hold and return.
546 */
547 if (rcnt == 0) {
548 RW_DASSERT(rw, wcnt != 0);
549 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_WANTED) != 0);
550 RW_DASSERT(rw, (rw->rw_owner & RW_HAS_WAITERS) != 0);
551
552 new = RW_READ_INCR | RW_HAS_WAITERS | RW_WRITE_WANTED;
553 if (!RW_RELEASE(rw, owner, new)) {
554 /* Oops, try again. */
555 turnstile_exit(ts);
556 continue;
557 }
558 break;
559 }
560
561 /*
562 * Give the lock to all blocked readers. We may
563 * retain one read hold if downgrading. If there
564 * is a writer waiting, new readers will be blocked
565 * out.
566 */
567 new = (rcnt << RW_READ_COUNT_SHIFT) + RW_READ_INCR;
568 if (wcnt != 0)
569 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
570
571 RW_GIVE(rw);
572 if (!RW_RELEASE(rw, owner, new)) {
573 /* Oops, try again. */
574 turnstile_exit(rw);
575 continue;
576 }
577
578 /* Wake up all sleeping readers. */
579 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
580 break;
581 }
582
583 RW_LOCKED(rw, RW_READER);
584 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
585 RW_DASSERT(rw, RW_COUNT(rw) != 0);
586 }
587
588 /*
589 * rw_tryupgrade:
590 *
591 * Try to upgrade a read lock to a write lock. We must be the
592 * only reader.
593 */
594 int
595 rw_tryupgrade(krwlock_t *rw)
596 {
597 uintptr_t owner, curthread, new;
598
599 curthread = (uintptr_t)curlwp;
600 RW_ASSERT(rw, curthread != 0);
601 RW_WANTLOCK(rw, RW_WRITER);
602
603 for (;;) {
604 owner = rw->rw_owner;
605 RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
606 if ((owner & RW_THREAD) != RW_READ_INCR) {
607 RW_ASSERT(rw, (owner & RW_THREAD) != 0);
608 return 0;
609 }
610 new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
611 if (RW_ACQUIRE(rw, owner, new))
612 break;
613 }
614
615 RW_UNLOCKED(rw, RW_READER);
616 RW_LOCKED(rw, RW_WRITER);
617 RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
618 RW_DASSERT(rw, RW_OWNER(rw) == curthread);
619
620 return 1;
621 }
622
623 /*
624 * rw_read_held:
625 *
626 * Returns true if the rwlock is held for reading. Must only be
627 * used for diagnostic assertions, and never be used to make
628 * decisions about how to use a rwlock.
629 */
630 int
631 rw_read_held(krwlock_t *rw)
632 {
633 uintptr_t owner;
634
635 if (panicstr != NULL)
636 return 1;
637
638 owner = rw->rw_owner;
639 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
640 }
641
642 /*
643 * rw_write_held:
644 *
645 * Returns true if the rwlock is held for writing. Must only be
646 * used for diagnostic assertions, and never be used to make
647 * decisions about how to use a rwlock.
648 */
649 int
650 rw_write_held(krwlock_t *rw)
651 {
652
653 if (panicstr != NULL)
654 return 1;
655
656 return (rw->rw_owner & RW_WRITE_LOCKED) != 0;
657 }
658
659 /*
660 * rw_lock_held:
661 *
662 * Returns true if the rwlock is held for reading or writing. Must
663 * only be used for diagnostic assertions, and never be used to make
664 * decisions about how to use a rwlock.
665 */
666 int
667 rw_lock_held(krwlock_t *rw)
668 {
669
670 if (panicstr != NULL)
671 return 1;
672
673 return (rw->rw_owner & RW_THREAD) != 0;
674 }
675
676 /*
677 * rw_owner:
678 *
679 * Return the current owner of an RW lock, but only if it is write
680 * held. Used for priority inheritance.
681 */
682 static lwp_t *
683 rw_owner(wchan_t obj)
684 {
685 krwlock_t *rw = (void *)(uintptr_t)obj; /* discard qualifiers */
686 uintptr_t owner = rw->rw_owner;
687
688 if ((owner & RW_WRITE_LOCKED) == 0)
689 return NULL;
690
691 return (void *)(owner & RW_THREAD);
692 }
693