kern_rwlock.c revision 1.1.36.7 1 /* $NetBSD: kern_rwlock.c,v 1.1.36.7 2007/01/31 13:09:11 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Kernel reader/writer lock implementation, modeled after those
41 * found in Solaris, a description of which can be found in:
42 *
43 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
44 * Richard McDougall.
45 */
46
47 #include "opt_multiprocessor.h"
48
49 #include <sys/cdefs.h>
50 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.1.36.7 2007/01/31 13:09:11 ad Exp $");
51
52 #define __RWLOCK_PRIVATE
53
54 #include <sys/param.h>
55 #include <sys/proc.h>
56 #include <sys/rwlock.h>
57 #include <sys/sched.h>
58 #include <sys/sleepq.h>
59 #include <sys/systm.h>
60 #include <sys/lockdebug.h>
61
62 #include <dev/lockstat.h>
63
64 #define RW_ABORT(rw, msg) \
65 LOCKDEBUG_ABORT(RW_GETID(rw), rw, &rwlock_lockops, __FUNCTION__, msg)
66
67 /*
68 * LOCKDEBUG
69 */
70
71 #if defined(LOCKDEBUG)
72
73 #define RW_WANTLOCK(rw, op) \
74 LOCKDEBUG_WANTLOCK(RW_GETID(rw), \
75 (uintptr_t)__builtin_return_address(0), op == RW_READER);
76 #define RW_LOCKED(rw, op) \
77 LOCKDEBUG_LOCKED(RW_GETID(rw), \
78 (uintptr_t)__builtin_return_address(0), op == RW_READER);
79 #define RW_UNLOCKED(rw, op) \
80 LOCKDEBUG_UNLOCKED(RW_GETID(rw), \
81 (uintptr_t)__builtin_return_address(0), op == RW_READER);
82 #define RW_DASSERT(rw, cond) \
83 do { \
84 if (!(cond)) \
85 RW_ABORT(rw, "assertion failed: " #cond); \
86 } while (/* CONSTCOND */ 0);
87
88 #else /* LOCKDEBUG */
89
90 #define RW_WANTLOCK(rw, op) /* nothing */
91 #define RW_LOCKED(rw, op) /* nothing */
92 #define RW_UNLOCKED(rw, op) /* nothing */
93 #define RW_DASSERT(rw, cond) /* nothing */
94
95 #endif /* LOCKDEBUG */
96
97 /*
98 * DIAGNOSTIC
99 */
100
101 #if defined(DIAGNOSTIC)
102
103 #define RW_ASSERT(rw, cond) \
104 do { \
105 if (!(cond)) \
106 RW_ABORT(rw, "assertion failed: " #cond); \
107 } while (/* CONSTCOND */ 0)
108
109 #else
110
111 #define RW_ASSERT(rw, cond) /* nothing */
112
113 #endif /* DIAGNOSTIC */
114
115 /*
116 * For platforms that use 'simple' RW locks.
117 */
118 #ifdef __HAVE_SIMPLE_RW_LOCKS
119 #define RW_ACQUIRE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
120 #define RW_RELEASE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
121 #define RW_SETID(rw, id) ((rw)->rw_id = id)
122 #define RW_GETID(rw) ((rw)->rw_id)
123
124 static inline int
125 RW_SET_WAITERS(krwlock_t *rw, uintptr_t need, uintptr_t set)
126 {
127 uintptr_t old;
128
129 if (((old = rw->rw_owner) & need) == 0)
130 return 0;
131 return RW_CAS(&rw->rw_owner, old, old | set);
132 }
133 #endif /* __HAVE_SIMPLE_RW_LOCKS */
134
135 /*
136 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
137 */
138 #ifdef LOCKDEBUG
139 #undef __HAVE_RW_STUBS
140 #endif
141
142 #ifndef __HAVE_RW_STUBS
143 __strong_alias(rw_enter, rw_vector_enter);
144 __strong_alias(rw_exit, rw_vector_exit);
145 #endif
146
147 void rw_dump(volatile void *);
148
149 lockops_t rwlock_lockops = {
150 "Reader / writer lock",
151 1,
152 rw_dump
153 };
154
155 /*
156 * rw_dump:
157 *
158 * Dump the contents of a rwlock structure.
159 */
160 void
161 rw_dump(volatile void *cookie)
162 {
163 volatile krwlock_t *rw = cookie;
164
165 printf_nolog("owner/count : %#018lx flags : %#018x\n",
166 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
167 }
168
169 /*
170 * rw_init:
171 *
172 * Initialize a rwlock for use.
173 */
174 void
175 rw_init(krwlock_t *rw)
176 {
177 u_int id;
178
179 memset(rw, 0, sizeof(*rw));
180
181 id = LOCKDEBUG_ALLOC(rw, &rwlock_lockops);
182 RW_SETID(rw, id);
183 }
184
185 /*
186 * rw_destroy:
187 *
188 * Tear down a rwlock.
189 */
190 void
191 rw_destroy(krwlock_t *rw)
192 {
193
194 LOCKDEBUG_FREE(rw, RW_GETID(rw));
195 RW_ASSERT(rw, rw->rw_owner == 0);
196 }
197
198 /*
199 * rw_vector_enter:
200 *
201 * Acquire a rwlock.
202 */
203 void
204 rw_vector_enter(krwlock_t *rw, const krw_t op)
205 {
206 uintptr_t owner, incr, need_wait, set_wait, curthread;
207 turnstile_t *ts;
208 int queue;
209 LOCKSTAT_TIMER(slptime);
210 struct lwp *l;
211
212 l = curlwp;
213 curthread = (uintptr_t)l;
214
215 RW_ASSERT(rw, curthread != 0);
216 RW_WANTLOCK(rw, op);
217
218 #ifdef LOCKDEBUG
219 if (panicstr == NULL) {
220 simple_lock_only_held(NULL, "rw_enter");
221 #ifdef MULTIPROCESSOR
222 LOCKDEBUG_BARRIER(&kernel_lock, 1);
223 #else
224 LOCKDEBUG_BARRIER(NULL, 1);
225 #endif
226 }
227 #endif
228
229 /*
230 * We play a slight trick here. If we're a reader, we want
231 * increment the read count. If we're a writer, we want to
232 * set the owner field and whe WRITE_LOCKED bit.
233 *
234 * In the latter case, we expect those bits to be zero,
235 * therefore we can use an add operation to set them, which
236 * means an add operation for both cases.
237 */
238 if (op == RW_READER) {
239 incr = RW_READ_INCR;
240 set_wait = RW_HAS_WAITERS;
241 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
242 queue = TS_READER_Q;
243 } else {
244 RW_DASSERT(rw, op == RW_WRITER);
245 incr = curthread | RW_WRITE_LOCKED;
246 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
247 need_wait = RW_WRITE_LOCKED | RW_THREAD;
248 queue = TS_WRITER_Q;
249 }
250
251 for (;;) {
252 /*
253 * Read the lock owner field. If the need-to-wait
254 * indicator is clear, then try to acquire the lock.
255 */
256 owner = rw->rw_owner;
257 if ((owner & need_wait) == 0) {
258 if (RW_ACQUIRE(rw, owner, owner + incr)) {
259 /* Got it! */
260 break;
261 }
262
263 /*
264 * Didn't get it -- spin around again (we'll
265 * probably sleep on the next iteration).
266 */
267 continue;
268 }
269
270 if (panicstr != NULL)
271 return;
272 if (RW_OWNER(rw) == curthread)
273 RW_ABORT(rw, "locking against myself");
274
275 /*
276 * Grab the turnstile chain lock. Once we have that, we
277 * can adjust the waiter bits and sleep queue.
278 */
279 ts = turnstile_lookup(rw);
280
281 /*
282 * Mark the rwlock as having waiters. If the set fails,
283 * then we may not need to sleep and should spin again.
284 */
285 if (!RW_SET_WAITERS(rw, need_wait, set_wait)) {
286 turnstile_exit(rw);
287 continue;
288 }
289
290 LOCKSTAT_START_TIMER(slptime);
291
292 turnstile_block(ts, queue, sched_kpri(l), rw);
293
294 /* If we wake up and arrive here, we've been handed the lock. */
295 RW_RECEIVE(rw);
296
297 LOCKSTAT_STOP_TIMER(slptime);
298 LOCKSTAT_EVENT(rw,
299 LB_RWLOCK | (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2),
300 1, slptime);
301
302 turnstile_unblock();
303 break;
304 }
305
306 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
307 (op == RW_READER && RW_COUNT(rw) != 0));
308 RW_LOCKED(rw, op);
309 }
310
311 /*
312 * rw_vector_exit:
313 *
314 * Release a rwlock.
315 */
316 void
317 rw_vector_exit(krwlock_t *rw)
318 {
319 uintptr_t curthread, owner, decr, new;
320 turnstile_t *ts;
321 int rcnt, wcnt, dcnt;
322 struct lwp *l;
323
324 curthread = (uintptr_t)curlwp;
325 RW_ASSERT(rw, curthread != 0);
326
327 if (panicstr != NULL) {
328 /*
329 * XXX What's the correct thing to do here? We should at
330 * least release the lock.
331 */
332 return;
333 }
334
335 /*
336 * Again, we use a trick. Since we used an add operation to
337 * set the required lock bits, we can use a subtract to clear
338 * them, which makes the read-release and write-release path
339 * the same.
340 */
341 owner = rw->rw_owner;
342 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
343 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
344 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
345 if (__predict_false((owner & RW_DOWNGRADING) != 0)) {
346 /* RW_UNLOCKED() is already done */
347 dcnt = 1;
348 decr = (curthread | RW_WRITE_LOCKED) - RW_READ_INCR;
349 } else {
350 RW_UNLOCKED(rw, RW_WRITER);
351 dcnt = 0;
352 decr = curthread | RW_WRITE_LOCKED;
353 }
354 } else {
355 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
356 RW_ASSERT(rw, RW_COUNT(rw) != 0);
357 RW_UNLOCKED(rw, RW_READER);
358 dcnt = 0;
359 decr = RW_READ_INCR;
360 }
361
362 for (;; owner = rw->rw_owner) {
363 /*
364 * Compute what we expect the new value of the lock to be.
365 * Only proceed to do direct handoff if there are waiters,
366 * and if the lock would become unowned.
367 */
368 new = (owner - decr) & ~RW_WRITE_WANTED;
369 if ((new & (RW_THREAD | RW_HAS_WAITERS)) != RW_HAS_WAITERS) {
370 if (RW_RELEASE(rw, owner, new))
371 break;
372 continue;
373 }
374
375 /*
376 * Grab the turnstile chain lock. This gets the interlock
377 * on the sleep queue. Once we have that, we can adjust the
378 * waiter bits.
379 */
380 ts = turnstile_lookup(rw);
381 RW_DASSERT(rw, ts != NULL);
382
383 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
384 rcnt = TS_WAITERS(ts, TS_READER_Q);
385
386 /*
387 * Give the lock away.
388 *
389 * If we are releasing a write lock or downgrading a write
390 * lock to read, then wake all outstanding readers. If we
391 * are releasing a read lock, then wake one writer.
392 */
393 if (dcnt == 0 &&
394 (rcnt == 0 || (decr == RW_READ_INCR && wcnt != 0))) {
395 RW_DASSERT(rw, wcnt != 0);
396
397 /*
398 * Give the lock to the longest waiting
399 * writer.
400 */
401 l = TS_FIRST(ts, TS_WRITER_Q);
402 new = (uintptr_t)l | RW_WRITE_LOCKED;
403
404 if (wcnt > 1)
405 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
406 else if (rcnt != 0)
407 new |= RW_HAS_WAITERS;
408
409 RW_GIVE(rw);
410 if (!RW_RELEASE(rw, owner, new)) {
411 /* Oops, try again. */
412 turnstile_exit(rw);
413 continue;
414 }
415
416 /* Wake the writer. */
417 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, l);
418 } else {
419 dcnt += rcnt;
420 RW_DASSERT(rw, dcnt != 0);
421
422 /*
423 * Give the lock to all blocked readers. We may
424 * retain one read hold if downgrading. If there
425 * is a writer waiting, new readers will be blocked
426 * out.
427 */
428 new = dcnt << RW_READ_COUNT_SHIFT;
429 if (wcnt != 0)
430 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
431
432 RW_GIVE(rw);
433 if (!RW_RELEASE(rw, owner, new)) {
434 /* Oops, try again. */
435 turnstile_exit(rw);
436 continue;
437 }
438
439 /* Wake up all sleeping readers. */
440 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
441 }
442
443 break;
444 }
445 }
446
447 /*
448 * rw_tryenter:
449 *
450 * Try to acquire a rwlock.
451 */
452 int
453 rw_tryenter(krwlock_t *rw, const krw_t op)
454 {
455 uintptr_t curthread, owner, incr, need_wait;
456
457 curthread = (uintptr_t)curlwp;
458
459 RW_ASSERT(rw, curthread != 0);
460 RW_WANTLOCK(rw, op);
461
462 if (op == RW_READER) {
463 incr = RW_READ_INCR;
464 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
465 } else {
466 RW_DASSERT(rw, op == RW_WRITER);
467 incr = curthread | RW_WRITE_LOCKED;
468 need_wait = RW_WRITE_LOCKED | RW_THREAD;
469 }
470
471 for (;;) {
472 owner = rw->rw_owner;
473 if ((owner & need_wait) == 0) {
474 if (RW_ACQUIRE(rw, owner, owner + incr)) {
475 /* Got it! */
476 break;
477 }
478 continue;
479 }
480 return 0;
481 }
482
483 RW_LOCKED(rw, op);
484 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
485 (op == RW_READER && RW_COUNT(rw) != 0));
486 return 1;
487 }
488
489 /*
490 * rw_downgrade:
491 *
492 * Downgrade a write lock to a read lock.
493 */
494 void
495 rw_downgrade(krwlock_t *rw)
496 {
497 uintptr_t owner, curthread;
498
499 curthread = (uintptr_t)curlwp;
500 RW_ASSERT(rw, curthread != 0);
501 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
502 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
503 RW_UNLOCKED(rw, RW_WRITER);
504
505 for (;;) {
506 owner = rw->rw_owner;
507
508 /* If there are waiters we need to do this the hard way. */
509 if ((owner & RW_HAS_WAITERS) != 0) {
510 if (!RW_RELEASE(rw, owner, owner | RW_DOWNGRADING))
511 continue;
512 rw_vector_exit(rw);
513 break;
514 }
515
516 /*
517 * Try swapping us down to one read hold. If it fails, the
518 * lock condition has changed and we most likely now have
519 * waiters.
520 */
521 if (RW_RELEASE(rw, owner, RW_READ_INCR))
522 break;
523 }
524
525 RW_LOCKED(rw, RW_READER);
526 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
527 RW_DASSERT(rw, RW_COUNT(rw) != 0);
528 }
529
530 /*
531 * rw_tryupgrade:
532 *
533 * Try to upgrade a read lock to a write lock. We must be the
534 * only reader.
535 */
536 int
537 rw_tryupgrade(krwlock_t *rw)
538 {
539 uintptr_t owner, curthread, new;
540
541 curthread = (uintptr_t)curlwp;
542 RW_ASSERT(rw, curthread != 0);
543 RW_WANTLOCK(rw, RW_WRITER);
544
545 for (;;) {
546 owner = rw->rw_owner;
547 RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
548 if ((owner & RW_THREAD) != RW_READ_INCR) {
549 RW_ASSERT(rw, (owner & RW_THREAD) != 0);
550 return 0;
551 }
552 new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
553 if (RW_ACQUIRE(rw, owner, new))
554 break;
555 }
556
557 RW_LOCKED(rw, RW_WRITER);
558 RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
559 RW_DASSERT(rw, RW_OWNER(rw) == curthread);
560
561 return 1;
562 }
563
564 /*
565 * rw_read_held:
566 *
567 * Returns true if the rwlock is held for reading. Must only be
568 * used for diagnostic assertions, and never be used to make
569 * decisions about how to use a rwlock.
570 */
571 int
572 rw_read_held(krwlock_t *rw)
573 {
574 uintptr_t owner;
575
576 if (panicstr != NULL)
577 return 1;
578
579 owner = rw->rw_owner;
580 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
581 }
582
583 /*
584 * rw_write_held:
585 *
586 * Returns true if the rwlock is held for writing. Must only be
587 * used for diagnostic assertions, and never be used to make
588 * decisions about how to use a rwlock.
589 */
590 int
591 rw_write_held(krwlock_t *rw)
592 {
593
594 if (panicstr != NULL)
595 return 1;
596
597 return (rw->rw_owner & RW_WRITE_LOCKED) != 0;
598 }
599
600 /*
601 * rw_lock_held:
602 *
603 * Returns true if the rwlock is held for reading or writing. Must
604 * only be used for diagnostic assertions, and never be used to make
605 * decisions about how to use a rwlock.
606 */
607 int
608 rw_lock_held(krwlock_t *rw)
609 {
610
611 if (panicstr != NULL)
612 return 1;
613
614 return (rw->rw_owner & RW_THREAD) != 0;
615 }
616