kern_rwlock.c revision 1.1.36.2 1 /* $NetBSD: kern_rwlock.c,v 1.1.36.2 2006/09/11 01:31:39 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Kernel reader/writer lock implementation, modeled after those
41 * found in Solaris, a description of which can be found in:
42 *
43 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
44 * Richard McDougall.
45 */
46
47 #include "opt_lockdebug.h"
48 #include "opt_multiprocessor.h"
49
50 #include <sys/cdefs.h>
51 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.1.36.2 2006/09/11 01:31:39 ad Exp $");
52
53 #define __RWLOCK_PRIVATE
54
55 #include <sys/param.h>
56 #include <sys/proc.h>
57 #include <sys/rwlock.h>
58 #include <sys/sched.h>
59 #include <sys/turnstile.h>
60 #include <sys/systm.h>
61
62 #include <dev/lockstat.h>
63
64 #define RW_ABORT(rw, msg) \
65 rw_abort((rw), __FUNCTION__, msg)
66
67 /*
68 * LOCKDEBUG
69 */
70
71 #if defined(LOCKDEBUG)
72
73 #undef __HAVE_RW_ENTER
74 #undef __HAVE_RW_EXIT
75
76 struct simplelock rw_list_lock = SIMPLELOCK_INITIALIZER;
77 LIST_HEAD(, rwlock_debug_info) rw_list_writers;
78
79 #ifdef notyet
80 #define RW_LOCKED(rw, op) \
81 do { \
82 if ((op) != RW_WRITER) \
83 break; \
84 (rw)->rw_debug.rw_locked = \
85 (vaddr_t) __builtin_return_address(0); \
86 LIST_INSERT_HEAD(&rw_list_writers, &rw->rw_debug, rw_chain); \
87 } while (/* CONSTCOND */ 0)
88
89 #define RW_UNLOCKED(rw) \
90 do { \
91 (rw)->rw_debug.rw_unlocked = \
92 (vaddr_t) __builtin_return_address(0); \
93 LIST_REMOVE(&rw->rw_debug, rw_chain); \
94 } while (/* CONSTCOND */ 0)
95 #else /* notyet */
96 #define RW_LOCKED(rw, op) /* nothing */
97 #define RW_UNLOCKED(rw) /* nothing */
98 #endif /* notyet */
99
100 #define RW_DASSERT(rw, cond) \
101 do { \
102 if (!(cond)) \
103 RW_ABORT(rw, "assertion failed: " #cond); \
104 } while (/* CONSTCOND */ 0);
105
106 #else /* LOCKDEBUG */
107
108 #define RW_LOCKED(rw, op) /* nothing */
109 #define RW_UNLOCKED(rw) /* nothing */
110 #define RW_DASSERT(rw, cond) /* nothing */
111
112 #endif /* LOCKDEBUG */
113
114 /*
115 * DIAGNOSTIC
116 */
117
118 #if defined(DIAGNOSTIC)
119
120 #define RW_ASSERT(rw, cond) \
121 do { \
122 if (!(cond)) \
123 RW_ABORT(rw, "assertion failed: " #cond); \
124 } while (/* CONSTCOND */ 0)
125
126 #else
127
128 #define RW_ASSERT(rw, cond) /* nothing */
129
130 #endif /* DIAGNOSTIC */
131
132 /*
133 * rw_abort:
134 *
135 * Dump the contents of a RW lock and call panic().
136 */
137 static void
138 rw_abort(krwlock_t *rw, const char *func, const char *msg)
139 {
140
141 printf("%s: %s\n", func, msg);
142 rw_dump(rw);
143 panic("rw_abort");
144 }
145
146 /*
147 * rw_dump:
148 *
149 * Dump the contents of a mutex structure.
150 */
151 void
152 rw_dump(krwlock_t *rw)
153 {
154
155 #ifdef notyet
156 #if defined(LOCKDEBUG)
157 printf("last locked: 0x%16lx unlocked: 0x%16lx\n",
158 (long)rw->rw_debug.rw_locked,
159 (long)rw->rw_debug.rw_unlocked);
160 #endif
161 #endif
162 printf("curlwp : 0x%16lx lockaddr: 0x%16lx\n"
163 "owner/count: 0x%16lx flags : 0x%16x\n",
164 (long)curlwp, (long)rw, (long)RW_OWNER(rw),
165 (int)RW_FLAGS(rw));
166 }
167
168 /*
169 * rw_init:
170 *
171 * Initialize a rwlock for use.
172 */
173 void
174 rw_init(krwlock_t *rw)
175 {
176
177 memset(rw, 0, sizeof(*rw));
178 }
179
180 /*
181 * rw_destroy:
182 *
183 * Tear down a rwlock.
184 */
185 void
186 rw_destroy(krwlock_t *rw)
187 {
188
189 RW_DASSERT(rw, rw->rw_owner == 0);
190 }
191
192 /*
193 * rw_vector_enter:
194 *
195 * Acquire a rwlock.
196 */
197 void
198 rw_vector_enter(krwlock_t *rw, krw_t op)
199 {
200 uintptr_t owner, incr, need_wait, set_wait, curthread;
201 struct turnstile *ts;
202 LOCKSTAT_TIMER(slptime);
203
204 curthread = (uintptr_t)curlwp;
205 RW_ASSERT(rw, curthread != 0);
206
207 #ifdef LOCKDEBUG
208 if (panicstr == NULL)
209 simple_lock_only_held(NULL, "rw_enter");
210 #endif
211
212 /*
213 * We play a slight trick here. If we're a reader, we want
214 * increment the read count. If we're a writer, we want to
215 * set the owner field and whe WRITE_LOCKED bit.
216 *
217 * In the latter case, we expect those bits to be zero,
218 * therefore we can use an add operation to set them, which
219 * means an add operation for both cases.
220 */
221 if (op == RW_READER) {
222 incr = RW_READ_INCR;
223 set_wait = RW_HAS_WAITERS;
224 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
225 } else {
226 RW_DASSERT(rw, op == RW_WRITER);
227 incr = curthread | RW_WRITE_LOCKED;
228 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
229 need_wait = RW_WRITE_LOCKED | RW_THREAD;
230 }
231
232 for (;;) {
233 /*
234 * Read the lock owner field. If the need-to-wait
235 * indicator is clear, then try to acquire the lock.
236 */
237 owner = rw->rw_owner;
238 if ((owner & need_wait) == 0) {
239 if (RW_ACQUIRE(rw, owner, owner + incr)) {
240 /* Got it! */
241 break;
242 }
243
244 /*
245 * Didn't get it -- spin around again (we'll
246 * probably sleep on the next iteration).
247 */
248 continue;
249 }
250
251 if (panicstr != NULL)
252 return;
253 if (RW_OWNER(rw) == curthread)
254 RW_ABORT(rw, "locking against myself");
255
256 /*
257 * Grab the turnstile chain lock. Once we have that, we
258 * can adjust the waiter bits and sleep queue.
259 */
260 ts = turnstile_lookup(rw);
261
262 /*
263 * Mark the rwlock as having waiters. If the set fails,
264 * then we may not need to sleep and should spin again.
265 */
266 if (!RW_SET_WAITERS(rw, need_wait, set_wait)) {
267 turnstile_exit(rw);
268 continue;
269 }
270
271 LOCKSTAT_START_TIMER(slptime);
272
273 turnstile_block(ts,
274 (op == RW_READER ? TS_READER_Q : TS_WRITER_Q),
275 sched_kpri(curlwp), rw, "rwlock");
276
277 LOCKSTAT_STOP_TIMER(slptime);
278 LOCKSTAT_EVENT(rw, LB_ADAPTIVE_RWLOCK | LB_SLEEP, 1, slptime);
279
280 /* If we wake up and arrive here, we've been handed the lock. */
281 RW_RECEIVE(rw);
282 break;
283 }
284
285 RW_LOCKED(rw, op);
286 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
287 (op == RW_READER && RW_COUNT(rw) != 0));
288 }
289
290 /*
291 * rw_vector_exit:
292 *
293 * Release a rwlock.
294 */
295 void
296 rw_vector_exit(krwlock_t *rw, krw_t op)
297 {
298 uintptr_t curthread, owner, decr, new;
299 struct turnstile *ts;
300 int rcnt, wcnt, dcnt;
301 struct lwp *l;
302
303 curthread = (uintptr_t)curlwp;
304 RW_ASSERT(rw, curthread != 0);
305
306 if (panicstr != NULL) {
307 /*
308 * XXX What's the correct thing to do here? We should at least
309 * release the lock.
310 */
311 return;
312 }
313
314 /*
315 * Again, we use a trick. Since we used an add operation to
316 * set the required lock bits, we can use a subtract to clear
317 * them, which makes the read-release and write-release path
318 * the same.
319 */
320 switch (op) {
321 case RW_READER:
322 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
323 RW_ASSERT(rw, RW_COUNT(rw) != 0);
324 dcnt = 0;
325 decr = RW_READ_INCR;
326 break;
327 case RW_WRITER:
328 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
329 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
330 RW_UNLOCKED(rw);
331 dcnt = 0;
332 decr = curthread | RW_WRITE_LOCKED;
333 break;
334 case __RW_DOWNGRADE:
335 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
336 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
337 RW_UNLOCKED(rw);
338 dcnt = 1;
339 decr = (curthread | RW_WRITE_LOCKED) - RW_READ_INCR;
340 break;
341 default:
342 RW_DASSERT(rw, "blame gcc, I do");
343 return;
344 }
345
346 for (;;) {
347 /*
348 * We assume that the caller has already tried to release
349 * the lock and optimize for the 'has waiters' case, and so
350 * grab the turnstile chain lock. This gets the interlock
351 * on the sleep queue. Once we have that, we can adjust the
352 * waiter bits.
353 */
354 ts = turnstile_lookup(rw);
355
356 /*
357 * Compute what we expect the new value of the lock to be.
358 * Only proceed to do direct handoff if there are waiters,
359 * and if the lock would become unowned.
360 */
361 owner = rw->rw_owner;
362 new = (owner - decr) & ~RW_WRITE_WANTED;
363 if ((new & (RW_THREAD | RW_HAS_WAITERS)) != RW_HAS_WAITERS) {
364 if (RW_RELEASE(rw, owner, new)) {
365 turnstile_exit(rw);
366 break;
367 }
368 turnstile_exit(rw);
369 continue;
370 }
371
372 /*
373 * Adjust the waiter bits. If we are releasing a write
374 * lock or downgrading a write lock to read, then wake all
375 * outstanding readers. If we are releasing a read lock,
376 * then wake one writer.
377 */
378 RW_DASSERT(rw, ts != NULL);
379
380 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
381 rcnt = TS_WAITERS(ts, TS_READER_Q);
382
383 /*
384 * Give the lock away.
385 */
386 if (dcnt == 0 &&
387 (rcnt == 0 || (op == RW_READER && wcnt != 0))) {
388 RW_DASSERT(rw, wcnt != 0);
389
390 /*
391 * Give the lock to the longest waiting
392 * writer.
393 */
394 l = TS_FIRST(ts, TS_WRITER_Q);
395 new = (uintptr_t)l | RW_WRITE_LOCKED;
396
397 if (wcnt > 1)
398 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
399 else if (rcnt != 0)
400 new |= RW_HAS_WAITERS;
401
402 if (!RW_RELEASE(rw, owner, new)) {
403 /* Oops, try again. */
404 turnstile_exit(rw);
405 continue;
406 }
407
408 /* Wake the writer. */
409 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, l);
410 } else {
411 dcnt += rcnt;
412 RW_DASSERT(rw, dcnt != 0);
413
414 /*
415 * Give the lock to all blocked readers. We may
416 * retain one read hold if downgrading. If there
417 * is a writer waiting, new readers will be blocked
418 * out.
419 */
420 new = dcnt << RW_READ_COUNT_SHIFT;
421 if (wcnt != 0)
422 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
423 if (!RW_RELEASE(rw, owner, new)) {
424 /* Oops, try again. */
425 turnstile_exit(rw);
426 continue;
427 }
428
429 /* Wake up all sleeping readers. */
430 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
431 }
432
433 break;
434 }
435 }
436
437 /*
438 * rw_tryenter:
439 *
440 * Try to acquire a rwlock.
441 */
442 int
443 rw_tryenter(krwlock_t *rw, krw_t op)
444 {
445 uintptr_t curthread, owner, incr, need_wait;
446
447 curthread = (uintptr_t)curlwp;
448 RW_ASSERT(rw, curthread != 0);
449
450 if (op == RW_READER) {
451 incr = RW_READ_INCR;
452 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
453 } else {
454 RW_DASSERT(rw, op == RW_WRITER);
455 incr = curthread | RW_WRITE_LOCKED;
456 need_wait = RW_WRITE_LOCKED | RW_THREAD;
457 }
458
459 for (;;) {
460 owner = rw->rw_owner;
461 if ((owner & need_wait) == 0) {
462 if (RW_ACQUIRE(rw, owner, owner + incr)) {
463 /* Got it! */
464 break;
465 }
466 continue;
467 }
468 return 0;
469 }
470
471 RW_LOCKED(rw, op);
472 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
473 (op == RW_READER && RW_COUNT(rw) != 0));
474 return 1;
475 }
476
477 /*
478 * rw_downgrade:
479 *
480 * Downgrade a write lock to a read lock.
481 */
482 void
483 rw_downgrade(krwlock_t *rw)
484 {
485 uintptr_t owner, curthread;
486
487 curthread = (uintptr_t)curlwp;
488 RW_ASSERT(rw, curthread != 0);
489 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
490 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
491 RW_UNLOCKED(rw);
492
493 for (;;) {
494 owner = rw->rw_owner;
495
496 /* If there are waiters we need to do this the hard way. */
497 if ((owner & RW_HAS_WAITERS) != 0) {
498 rw_vector_exit(rw, __RW_DOWNGRADE);
499 return;
500 }
501
502 /*
503 * Try swapping us down to one read hold. If it fails, the
504 * lock condition has changed and we most likely now have
505 * waiters.
506 */
507 if (RW_RELEASE(rw, owner, RW_READ_INCR))
508 break;
509 }
510
511 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
512 RW_DASSERT(rw, RW_COUNT(rw) != 0);
513 }
514
515 /*
516 * rw_tryupgrade:
517 *
518 * Try to upgrade a read lock to a write lock. We must be the
519 * only reader.
520 */
521 int
522 rw_tryupgrade(krwlock_t *rw)
523 {
524 uintptr_t owner, curthread, new;
525
526 curthread = (uintptr_t)curlwp;
527 RW_ASSERT(rw, curthread != 0);
528
529 for (;;) {
530 owner = rw->rw_owner;
531 RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
532 if ((owner & RW_THREAD) != RW_READ_INCR) {
533 RW_ASSERT(rw, (owner & RW_THREAD) != 0);
534 return 0;
535 }
536 new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
537 if (RW_ACQUIRE(rw, owner, new))
538 break;
539 }
540
541 RW_LOCKED(rw, RW_WRITER);
542 RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
543 RW_DASSERT(rw, RW_OWNER(rw) == curthread);
544
545 return 1;
546 }
547
548 /*
549 * rw_read_held:
550 *
551 * Returns true if the rwlock is held for reading. Must only be
552 * used for diagnostic assertions, and never be used to make
553 * decisions about how to use a rwlock.
554 */
555 int
556 rw_read_held(krwlock_t *rw)
557 {
558 uintptr_t owner;
559
560 owner = rw->rw_owner;
561 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
562 }
563
564 /*
565 * rw_write_held:
566 *
567 * Returns true if the rwlock is held for writing. Must only be
568 * used for diagnostic assertions, and never be used to make
569 * decisions about how to use a rwlock.
570 */
571 int
572 rw_write_held(krwlock_t *rw)
573 {
574
575 return (rw->rw_owner & RW_WRITE_LOCKED) != 0;
576 }
577
578 /*
579 * Slow stubs for platforms that do not implement fast-path ones.
580 */
581 #ifndef __HAVE_RW_ENTER
582 void
583 rw_enter(krwlock_t *rw, krw_t op)
584 {
585 rw_vector_enter(rw, op);
586 }
587 #endif
588
589 #ifndef __HAVE_RW_EXIT
590 void
591 rw_exit(krwlock_t *rw)
592 {
593 krw_t op;
594 op = ((rw->rw_owner & RW_WRITE_LOCKED) ? RW_WRITER : RW_READER);
595 rw_vector_exit(rw, op);
596 }
597 #endif
598