kern_rwlock.c revision 1.1.36.1 1 /* $NetBSD: kern_rwlock.c,v 1.1.36.1 2006/09/10 23:42:42 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Kernel reader/writer lock implementation, modeled after those
41 * found in Solaris, a description of which can be found in:
42 *
43 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
44 * Richard McDougall.
45 */
46
47 #include "opt_lockdebug.h"
48 #include "opt_multiprocessor.h"
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.1.36.1 2006/09/10 23:42:42 ad Exp $");
52
53 #define __RWLOCK_PRIVATE
54
55 #include <sys/param.h>
56 #include <sys/proc.h>
57 #include <sys/rwlock.h>
58 #include <sys/sched.h>
59 #include <sys/turnstile.h>
60 #include <sys/systm.h>
61
62 #include <dev/lockstat.h>
63
64 #define RW_ABORT(rw, msg) \
65 rw_abort((rw), __FUNCTION__, msg)
66
67 /*
68 * LOCKDEBUG
69 */
70
71 #if defined(LOCKDEBUG)
72
73 #undef __HAVE_RW_ENTER
74 #undef __HAVE_RW_EXIT
75
76 struct simplelock rw_list_lock = SIMPLELOCK_INITIALIZER;
77 LIST_HEAD(, rwlock_debug_info) rw_list_writers;
78
79 #ifdef notyet
80 #define RW_LOCKED(rw, op) \
81 do { \
82 if ((op) != RW_WRITER) \
83 break; \
84 (rw)->rw_debug.rw_locked = \
85 (vaddr_t) __builtin_return_address(0); \
86 LIST_INSERT_HEAD(&rw_list_writers, &rw->rw_debug, rw_chain); \
87 } while (/* CONSTCOND */ 0)
88
89 #define RW_UNLOCKED(rw) \
90 do { \
91 (rw)->rw_debug.rw_unlocked = \
92 (vaddr_t) __builtin_return_address(0); \
93 LIST_REMOVE(&rw->rw_debug, rw_chain); \
94 } while (/* CONSTCOND */ 0)
95 #else /* notyet */
96 #define RW_LOCKED(rw, op) /* nothing */
97 #define RW_UNLOCKED(rw) /* nothing */
98 #endif /* notyet */
99
100 #define RW_DASSERT(rw, cond) \
101 do { \
102 if (!(cond)) \
103 RW_ABORT(rw, "assertion failed: " #cond); \
104 } while (/* CONSTCOND */ 0);
105
106 #else /* LOCKDEBUG */
107
108 #define RW_LOCKED(rw, op) /* nothing */
109 #define RW_UNLOCKED(rw) /* nothing */
110 #define RW_DASSERT(rw, cond) /* nothing */
111
112 #endif /* LOCKDEBUG */
113
114 /*
115 * DIAGNOSTIC
116 */
117
118 #if defined(DIAGNOSTIC)
119
120 #define RW_ASSERT(rw, cond) \
121 do { \
122 if (!(cond)) \
123 RW_ABORT(rw, "assertion failed: " #cond); \
124 } while (/* CONSTCOND */ 0)
125
126 #else
127
128 #define RW_ASSERT(rw, cond) /* nothing */
129
130 #endif /* DIAGNOSTIC */
131
132 /*
133 * rw_abort:
134 *
135 * Dump the contents of a RW lock and call panic().
136 */
137 static void
138 rw_abort(krwlock_t *rw, const char *func, const char *msg)
139 {
140
141 printf("%s: %s\n", func, msg);
142 rw_dump(rw);
143 panic("rw_abort");
144 }
145
146 /*
147 * rw_dump:
148 *
149 * Dump the contents of a mutex structure.
150 */
151 void
152 rw_dump(krwlock_t *rw)
153 {
154
155 #ifdef notyet
156 #if defined(LOCKDEBUG)
157 printf("last locked: 0x%16lx unlocked: 0x%16lx\n",
158 (long)rw->rw_debug.rw_locked,
159 (long)rw->rw_debug.rw_unlocked);
160 #endif
161 #endif
162 printf("curlwp : 0x%16lx lockaddr: 0x%16lx\n"
163 "owner/count: 0x%16lx flags : 0x%16x\n",
164 (long)curlwp, (long)rw, (long)RW_OWNER(rw),
165 (int)RW_FLAGS(rw));
166 }
167
168 /*
169 * rw_init:
170 *
171 * Initialize a rwlock for use.
172 */
173 void
174 rw_init(krwlock_t *rw)
175 {
176
177 memset(rw, 0, sizeof(*rw));
178 }
179
180 /*
181 * rw_destroy:
182 *
183 * Tear down a rwlock.
184 */
185 void
186 rw_destroy(krwlock_t *rw)
187 {
188
189 RW_DASSERT(rw, rw->rw_owner == 0);
190 }
191
192 /*
193 * rw_vector_enter:
194 *
195 * Acquire a rwlock.
196 */
197 void
198 rw_vector_enter(krwlock_t *rw, krw_t op)
199 {
200 uintptr_t owner, incr, need_wait, set_wait, curthread;
201 struct turnstile *ts;
202 LOCKSTAT_TIMER(slptime);
203
204 curthread = (uintptr_t)curlwp;
205 RW_ASSERT(rw, curthread != 0);
206
207 #ifdef LOCKDEBUG
208 simple_lock_only_held(NULL, "rw_enter");
209 #endif
210
211 /*
212 * We play a slight trick here. If we're a reader, we want
213 * increment the read count. If we're a writer, we want to
214 * set the owner field and whe WRITE_LOCKED bit.
215 *
216 * In the latter case, we expect those bits to be zero,
217 * therefore we can use an add operation to set them, which
218 * means an add operation for both cases.
219 */
220 if (op == RW_READER) {
221 incr = RW_READ_INCR;
222 set_wait = RW_HAS_WAITERS;
223 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
224 } else {
225 RW_DASSERT(rw, op == RW_WRITER);
226 incr = curthread | RW_WRITE_LOCKED;
227 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
228 need_wait = RW_WRITE_LOCKED | RW_THREAD;
229 }
230
231 for (;;) {
232 /*
233 * Read the lock owner field. If the need-to-wait
234 * indicator is clear, then try to acquire the lock.
235 */
236 owner = rw->rw_owner;
237 if ((owner & need_wait) == 0) {
238 if (RW_ACQUIRE(rw, owner, owner + incr)) {
239 /* Got it! */
240 break;
241 }
242
243 /*
244 * Didn't get it -- spin around again (we'll
245 * probably sleep on the next iteration).
246 */
247 continue;
248 }
249
250 if (panicstr != NULL)
251 return;
252 if (RW_OWNER(rw) == curthread)
253 RW_ABORT(rw, "locking against myself");
254
255 /*
256 * Grab the turnstile chain lock. Once we have that, we
257 * can adjust the waiter bits and sleep queue.
258 */
259 ts = turnstile_lookup(rw);
260
261 /*
262 * Mark the rwlock as having waiters. If the set fails,
263 * then we may not need to sleep and should spin again.
264 */
265 if (!RW_SET_WAITERS(rw, need_wait, set_wait)) {
266 turnstile_exit(rw);
267 continue;
268 }
269
270 LOCKSTAT_START_TIMER(slptime);
271
272 turnstile_block(ts,
273 (op == RW_READER ? TS_READER_Q : TS_WRITER_Q),
274 sched_kpri(curlwp), rw, "rwlock");
275
276 LOCKSTAT_STOP_TIMER(slptime);
277 LOCKSTAT_EVENT(rw, LB_ADAPTIVE_RWLOCK | LB_SLEEP, 1, slptime);
278
279 /* If we wake up and arrive here, we've been handed the lock. */
280 RW_RECEIVE(rw);
281 break;
282 }
283
284 RW_LOCKED(rw, op);
285 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
286 (op == RW_READER && RW_COUNT(rw) != 0));
287 }
288
289 /*
290 * rw_vector_exit:
291 *
292 * Release a rwlock.
293 */
294 void
295 rw_vector_exit(krwlock_t *rw, krw_t op)
296 {
297 uintptr_t curthread, owner, decr, new;
298 struct turnstile *ts;
299 int rcnt, wcnt, dcnt;
300 struct lwp *l;
301
302 curthread = (uintptr_t)curlwp;
303 RW_ASSERT(rw, curthread != 0);
304
305 if (panicstr != NULL) {
306 /*
307 * XXX What's the correct thing to do here? We should at least
308 * release the lock.
309 */
310 return;
311 }
312
313 /*
314 * Again, we use a trick. Since we used an add operation to
315 * set the required lock bits, we can use a subtract to clear
316 * them, which makes the read-release and write-release path
317 * the same.
318 */
319 switch (op) {
320 case RW_READER:
321 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
322 RW_ASSERT(rw, RW_COUNT(rw) != 0);
323 dcnt = 0;
324 decr = RW_READ_INCR;
325 break;
326 case RW_WRITER:
327 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
328 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
329 RW_UNLOCKED(rw);
330 dcnt = 0;
331 decr = curthread | RW_WRITE_LOCKED;
332 break;
333 case __RW_DOWNGRADE:
334 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
335 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
336 RW_UNLOCKED(rw);
337 dcnt = 1;
338 decr = (curthread | RW_WRITE_LOCKED) - RW_READ_INCR;
339 break;
340 default:
341 RW_DASSERT(rw, "blame gcc, I do");
342 return;
343 }
344
345 for (;;) {
346 /*
347 * We assume that the caller has already tried to release
348 * the lock and optimize for the 'has waiters' case, and so
349 * grab the turnstile chain lock. This gets the interlock
350 * on the sleep queue. Once we have that, we can adjust the
351 * waiter bits.
352 */
353 ts = turnstile_lookup(rw);
354
355 /*
356 * Compute what we expect the new value of the lock to be.
357 * Only proceed to do direct handoff if there are waiters,
358 * and if the lock would become unowned.
359 */
360 owner = rw->rw_owner;
361 new = (owner - decr) & ~RW_WRITE_WANTED;
362 if ((new & (RW_THREAD | RW_HAS_WAITERS)) != RW_HAS_WAITERS) {
363 if (RW_RELEASE(rw, owner, new)) {
364 turnstile_exit(rw);
365 break;
366 }
367 turnstile_exit(rw);
368 continue;
369 }
370
371 /*
372 * Adjust the waiter bits. If we are releasing a write
373 * lock or downgrading a write lock to read, then wake all
374 * outstanding readers. If we are releasing a read lock,
375 * then wake one writer.
376 */
377 RW_DASSERT(rw, ts != NULL);
378
379 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
380 rcnt = TS_WAITERS(ts, TS_READER_Q);
381
382 /*
383 * Give the lock away.
384 */
385 if (dcnt == 0 &&
386 (rcnt == 0 || (op == RW_READER && wcnt != 0))) {
387 RW_DASSERT(rw, wcnt != 0);
388
389 /*
390 * Give the lock to the longest waiting
391 * writer.
392 */
393 l = TS_FIRST(ts, TS_WRITER_Q);
394 new = (uintptr_t)l | RW_WRITE_LOCKED;
395
396 if (wcnt > 1)
397 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
398 else if (rcnt != 0)
399 new |= RW_HAS_WAITERS;
400
401 if (!RW_RELEASE(rw, owner, new)) {
402 /* Oops, try again. */
403 turnstile_exit(rw);
404 continue;
405 }
406
407 /* Wake the writer. */
408 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, l);
409 } else {
410 dcnt += rcnt;
411 RW_DASSERT(rw, dcnt != 0);
412
413 /*
414 * Give the lock to all blocked readers. We may
415 * retain one read hold if downgrading. If there
416 * is a writer waiting, new readers will be blocked
417 * out.
418 */
419 new = dcnt << RW_READ_COUNT_SHIFT;
420 if (wcnt != 0)
421 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
422 if (!RW_RELEASE(rw, owner, new)) {
423 /* Oops, try again. */
424 turnstile_exit(rw);
425 continue;
426 }
427
428 /* Wake up all sleeping readers. */
429 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
430 }
431
432 break;
433 }
434 }
435
436 /*
437 * rw_tryenter:
438 *
439 * Try to acquire a rwlock.
440 */
441 int
442 rw_tryenter(krwlock_t *rw, krw_t op)
443 {
444 uintptr_t curthread, owner, incr, need_wait;
445
446 curthread = (uintptr_t)curlwp;
447 RW_ASSERT(rw, curthread != 0);
448
449 if (op == RW_READER) {
450 incr = RW_READ_INCR;
451 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
452 } else {
453 RW_DASSERT(rw, op == RW_WRITER);
454 incr = curthread | RW_WRITE_LOCKED;
455 need_wait = RW_WRITE_LOCKED | RW_THREAD;
456 }
457
458 for (;;) {
459 owner = rw->rw_owner;
460 if ((owner & need_wait) == 0) {
461 if (RW_ACQUIRE(rw, owner, owner + incr)) {
462 /* Got it! */
463 break;
464 }
465 continue;
466 }
467 return 0;
468 }
469
470 RW_LOCKED(rw, op);
471 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
472 (op == RW_READER && RW_COUNT(rw) != 0));
473 return 1;
474 }
475
476 /*
477 * rw_downgrade:
478 *
479 * Downgrade a write lock to a read lock.
480 */
481 void
482 rw_downgrade(krwlock_t *rw)
483 {
484 uintptr_t owner, curthread;
485
486 curthread = (uintptr_t)curlwp;
487 RW_ASSERT(rw, curthread != 0);
488 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
489 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
490 RW_UNLOCKED(rw);
491
492 for (;;) {
493 owner = rw->rw_owner;
494
495 /* If there are waiters we need to do this the hard way. */
496 if ((owner & RW_HAS_WAITERS) != 0) {
497 rw_vector_exit(rw, __RW_DOWNGRADE);
498 return;
499 }
500
501 /*
502 * Try swapping us down to one read hold. If it fails, the
503 * lock condition has changed and we most likely now have
504 * waiters.
505 */
506 if (RW_RELEASE(rw, owner, RW_READ_INCR))
507 break;
508 }
509
510 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
511 RW_DASSERT(rw, RW_COUNT(rw) != 0);
512 }
513
514 /*
515 * rw_tryupgrade:
516 *
517 * Try to upgrade a read lock to a write lock. We must be the
518 * only reader.
519 */
520 int
521 rw_tryupgrade(krwlock_t *rw)
522 {
523 uintptr_t owner, curthread, new;
524
525 curthread = (uintptr_t)curlwp;
526 RW_ASSERT(rw, curthread != 0);
527
528 for (;;) {
529 owner = rw->rw_owner;
530 RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
531 if ((owner & RW_THREAD) != RW_READ_INCR) {
532 RW_ASSERT(rw, (owner & RW_THREAD) != 0);
533 return 0;
534 }
535 new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
536 if (RW_ACQUIRE(rw, owner, new))
537 break;
538 }
539
540 RW_LOCKED(rw, RW_WRITER);
541 RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
542 RW_DASSERT(rw, RW_OWNER(rw) == curthread);
543
544 return 1;
545 }
546
547 /*
548 * rw_read_held:
549 *
550 * Returns true if the rwlock is held for reading. Must only be
551 * used for diagnostic assertions, and never be used to make
552 * decisions about how to use a rwlock.
553 */
554 int
555 rw_read_held(krwlock_t *rw)
556 {
557 uintptr_t owner;
558
559 owner = rw->rw_owner;
560 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
561 }
562
563 /*
564 * rw_write_held:
565 *
566 * Returns true if the rwlock is held for writing. Must only be
567 * used for diagnostic assertions, and never be used to make
568 * decisions about how to use a rwlock.
569 */
570 int
571 rw_write_held(krwlock_t *rw)
572 {
573
574 return (rw->rw_owner & RW_WRITE_LOCKED) != 0;
575 }
576
577 /*
578 * Slow stubs for platforms that do not implement fast-path ones.
579 */
580 #ifndef __HAVE_RW_ENTER
581 void
582 rw_enter(krwlock_t *rw, krw_t op)
583 {
584 rw_vector_enter(rw, op);
585 }
586 #endif
587
588 #ifndef __HAVE_RW_EXIT
589 void
590 rw_exit(krwlock_t *rw)
591 {
592 krw_t op;
593 op = ((rw->rw_owner & RW_WRITE_LOCKED) ? RW_WRITER : RW_READER);
594 rw_vector_exit(rw, op);
595 }
596 #endif
597