kern_rwlock.c revision 1.1.36.6 1 /* $NetBSD: kern_rwlock.c,v 1.1.36.6 2007/01/11 22:22:59 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Kernel reader/writer lock implementation, modeled after those
41 * found in Solaris, a description of which can be found in:
42 *
43 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
44 * Richard McDougall.
45 */
46
47 #include "opt_multiprocessor.h"
48
49 #include <sys/cdefs.h>
50 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.1.36.6 2007/01/11 22:22:59 ad Exp $");
51
52 #define __RWLOCK_PRIVATE
53
54 #include <sys/param.h>
55 #include <sys/proc.h>
56 #include <sys/rwlock.h>
57 #include <sys/sched.h>
58 #include <sys/sleepq.h>
59 #include <sys/systm.h>
60 #include <sys/lockdebug.h>
61
62 #include <dev/lockstat.h>
63
64 #define RW_ABORT(rw, msg) \
65 LOCKDEBUG_ABORT(RW_GETID(rw), rw, &rwlock_lockops, __FUNCTION__, msg)
66
67 /*
68 * LOCKDEBUG
69 */
70
71 #if defined(LOCKDEBUG)
72
73 #define RW_LOCKED(rw, op) \
74 do { \
75 LOCKDEBUG_LOCKED(RW_GETID(rw), \
76 (uintptr_t)__builtin_return_address(0), op == RW_READER); \
77 } while (/* CONSTCOND */ 0)
78
79 #define RW_UNLOCKED(rw, op) \
80 do { \
81 LOCKDEBUG_UNLOCKED(RW_GETID(rw), \
82 (uintptr_t)__builtin_return_address(0), op == RW_READER); \
83 } while (/* CONSTCOND */ 0)
84
85 #define RW_DASSERT(rw, cond) \
86 do { \
87 if (!(cond)) \
88 RW_ABORT(rw, "assertion failed: " #cond); \
89 } while (/* CONSTCOND */ 0);
90
91 #else /* LOCKDEBUG */
92
93 #define RW_LOCKED(rw, op) /* nothing */
94 #define RW_UNLOCKED(rw, op) /* nothing */
95 #define RW_DASSERT(rw, cond) /* nothing */
96
97 #endif /* LOCKDEBUG */
98
99 /*
100 * DIAGNOSTIC
101 */
102
103 #if defined(DIAGNOSTIC)
104
105 #define RW_ASSERT(rw, cond) \
106 do { \
107 if (!(cond)) \
108 RW_ABORT(rw, "assertion failed: " #cond); \
109 } while (/* CONSTCOND */ 0)
110
111 #else
112
113 #define RW_ASSERT(rw, cond) /* nothing */
114
115 #endif /* DIAGNOSTIC */
116
117 /*
118 * For platforms that use 'simple' RW locks.
119 */
120 #ifdef __HAVE_SIMPLE_RW_LOCKS
121 #define RW_ACQUIRE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
122 #define RW_RELEASE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
123 #define RW_SETID(rw, id) ((rw)->rw_id = id)
124 #define RW_GETID(rw) ((rw)->rw_id)
125
126 static inline int
127 RW_SET_WAITERS(krwlock_t *rw, uintptr_t need, uintptr_t set)
128 {
129 uintptr_t old;
130
131 if (((old = rw->rw_owner) & need) == 0)
132 return 0;
133 return RW_CAS(&rw->rw_owner, old, old | set);
134 }
135 #endif /* __HAVE_SIMPLE_RW_LOCKS */
136
137 /*
138 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
139 */
140 #ifdef LOCKDEBUG
141 #undef __HAVE_RW_STUBS
142 #endif
143
144 #ifndef __HAVE_RW_STUBS
145 __strong_alias(rw_enter, rw_vector_enter);
146 __strong_alias(rw_exit, rw_vector_exit);
147 #endif
148
149 void rw_dump(volatile void *);
150
151 lockops_t rwlock_lockops = {
152 "Reader / writer lock",
153 1,
154 rw_dump
155 };
156
157 /*
158 * rw_dump:
159 *
160 * Dump the contents of a rwlock structure.
161 */
162 void
163 rw_dump(volatile void *cookie)
164 {
165 volatile krwlock_t *rw = cookie;
166
167 printf_nolog("owner/count : %#018lx flags : %#018x\n",
168 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
169 }
170
171 /*
172 * rw_init:
173 *
174 * Initialize a rwlock for use.
175 */
176 void
177 rw_init(krwlock_t *rw)
178 {
179 u_int id;
180
181 memset(rw, 0, sizeof(*rw));
182
183 id = LOCKDEBUG_ALLOC(rw, &rwlock_lockops);
184 RW_SETID(rw, id);
185 }
186
187 /*
188 * rw_destroy:
189 *
190 * Tear down a rwlock.
191 */
192 void
193 rw_destroy(krwlock_t *rw)
194 {
195
196 LOCKDEBUG_FREE(rw, RW_GETID(rw));
197 RW_ASSERT(rw, rw->rw_owner == 0);
198 }
199
200 /*
201 * rw_vector_enter:
202 *
203 * Acquire a rwlock.
204 */
205 void
206 rw_vector_enter(krwlock_t *rw, const krw_t op)
207 {
208 uintptr_t owner, incr, need_wait, set_wait, curthread;
209 turnstile_t *ts;
210 int queue;
211 LOCKSTAT_TIMER(slptime);
212 struct lwp *l;
213
214 l = curlwp;
215 curthread = (uintptr_t)l;
216 RW_ASSERT(rw, curthread != 0);
217
218 #ifdef LOCKDEBUG
219 if (panicstr == NULL) {
220 simple_lock_only_held(NULL, "rw_enter");
221 #ifdef MULTIPROCESSOR
222 LOCKDEBUG_BARRIER(&kernel_lock, 1);
223 #else
224 LOCKDEBUG_BARRIER(NULL, 1);
225 #endif
226 }
227 #endif
228
229 /*
230 * We play a slight trick here. If we're a reader, we want
231 * increment the read count. If we're a writer, we want to
232 * set the owner field and whe WRITE_LOCKED bit.
233 *
234 * In the latter case, we expect those bits to be zero,
235 * therefore we can use an add operation to set them, which
236 * means an add operation for both cases.
237 */
238 if (op == RW_READER) {
239 incr = RW_READ_INCR;
240 set_wait = RW_HAS_WAITERS;
241 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
242 queue = TS_READER_Q;
243 } else {
244 RW_DASSERT(rw, op == RW_WRITER);
245 incr = curthread | RW_WRITE_LOCKED;
246 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
247 need_wait = RW_WRITE_LOCKED | RW_THREAD;
248 queue = TS_WRITER_Q;
249 }
250
251 for (;;) {
252 /*
253 * Read the lock owner field. If the need-to-wait
254 * indicator is clear, then try to acquire the lock.
255 */
256 owner = rw->rw_owner;
257 if ((owner & need_wait) == 0) {
258 if (RW_ACQUIRE(rw, owner, owner + incr)) {
259 /* Got it! */
260 break;
261 }
262
263 /*
264 * Didn't get it -- spin around again (we'll
265 * probably sleep on the next iteration).
266 */
267 continue;
268 }
269
270 if (panicstr != NULL)
271 return;
272 if (RW_OWNER(rw) == curthread)
273 RW_ABORT(rw, "locking against myself");
274
275 /*
276 * Grab the turnstile chain lock. Once we have that, we
277 * can adjust the waiter bits and sleep queue.
278 */
279 ts = turnstile_lookup(rw);
280
281 /*
282 * Mark the rwlock as having waiters. If the set fails,
283 * then we may not need to sleep and should spin again.
284 */
285 if (!RW_SET_WAITERS(rw, need_wait, set_wait)) {
286 turnstile_exit(rw);
287 continue;
288 }
289
290 LOCKSTAT_START_TIMER(slptime);
291
292 turnstile_block(ts, queue, sched_kpri(l), rw);
293
294 /* If we wake up and arrive here, we've been handed the lock. */
295 RW_RECEIVE(rw);
296
297 LOCKSTAT_STOP_TIMER(slptime);
298 LOCKSTAT_EVENT(rw,
299 LB_RWLOCK | (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2),
300 1, slptime);
301
302 turnstile_unblock();
303 break;
304 }
305
306 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
307 (op == RW_READER && RW_COUNT(rw) != 0));
308 RW_LOCKED(rw, op);
309 }
310
311 /*
312 * rw_vector_exit:
313 *
314 * Release a rwlock.
315 */
316 void
317 rw_vector_exit(krwlock_t *rw)
318 {
319 uintptr_t curthread, owner, decr, new;
320 turnstile_t *ts;
321 int rcnt, wcnt, dcnt;
322 struct lwp *l;
323
324 curthread = (uintptr_t)curlwp;
325 RW_ASSERT(rw, curthread != 0);
326
327 if (panicstr != NULL) {
328 /*
329 * XXX What's the correct thing to do here? We should at
330 * least release the lock.
331 */
332 return;
333 }
334
335 /*
336 * Again, we use a trick. Since we used an add operation to
337 * set the required lock bits, we can use a subtract to clear
338 * them, which makes the read-release and write-release path
339 * the same.
340 */
341 owner = rw->rw_owner;
342 if (__predict_false((owner & RW_WRITE_LOCKED) != 0)) {
343 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
344 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
345 if (__predict_false((owner & RW_DOWNGRADING) != 0)) {
346 /* RW_UNLOCKED() is already done */
347 dcnt = 1;
348 decr = (curthread | RW_WRITE_LOCKED) - RW_READ_INCR;
349 } else {
350 RW_UNLOCKED(rw, RW_WRITER);
351 dcnt = 0;
352 decr = curthread | RW_WRITE_LOCKED;
353 }
354 } else {
355 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
356 RW_ASSERT(rw, RW_COUNT(rw) != 0);
357 RW_UNLOCKED(rw, RW_READER);
358 dcnt = 0;
359 decr = RW_READ_INCR;
360 }
361
362 for (;; owner = rw->rw_owner) {
363 /*
364 * Compute what we expect the new value of the lock to be.
365 * Only proceed to do direct handoff if there are waiters,
366 * and if the lock would become unowned.
367 */
368 new = (owner - decr) & ~RW_WRITE_WANTED;
369 if ((new & (RW_THREAD | RW_HAS_WAITERS)) != RW_HAS_WAITERS) {
370 if (RW_RELEASE(rw, owner, new))
371 break;
372 continue;
373 }
374
375 /*
376 * Grab the turnstile chain lock. This gets the interlock
377 * on the sleep queue. Once we have that, we can adjust the
378 * waiter bits.
379 */
380 ts = turnstile_lookup(rw);
381 RW_DASSERT(rw, ts != NULL);
382
383 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
384 rcnt = TS_WAITERS(ts, TS_READER_Q);
385
386 /*
387 * Give the lock away.
388 *
389 * If we are releasing a write lock or downgrading a write
390 * lock to read, then wake all outstanding readers. If we
391 * are releasing a read lock, then wake one writer.
392 */
393 if (dcnt == 0 &&
394 (rcnt == 0 || (decr == RW_READ_INCR && wcnt != 0))) {
395 RW_DASSERT(rw, wcnt != 0);
396
397 /*
398 * Give the lock to the longest waiting
399 * writer.
400 */
401 l = TS_FIRST(ts, TS_WRITER_Q);
402 new = (uintptr_t)l | RW_WRITE_LOCKED;
403
404 if (wcnt > 1)
405 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
406 else if (rcnt != 0)
407 new |= RW_HAS_WAITERS;
408
409 RW_GIVE(rw);
410 if (!RW_RELEASE(rw, owner, new)) {
411 /* Oops, try again. */
412 turnstile_exit(rw);
413 continue;
414 }
415
416 /* Wake the writer. */
417 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, l);
418 } else {
419 dcnt += rcnt;
420 RW_DASSERT(rw, dcnt != 0);
421
422 /*
423 * Give the lock to all blocked readers. We may
424 * retain one read hold if downgrading. If there
425 * is a writer waiting, new readers will be blocked
426 * out.
427 */
428 new = dcnt << RW_READ_COUNT_SHIFT;
429 if (wcnt != 0)
430 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
431
432 RW_GIVE(rw);
433 if (!RW_RELEASE(rw, owner, new)) {
434 /* Oops, try again. */
435 turnstile_exit(rw);
436 continue;
437 }
438
439 /* Wake up all sleeping readers. */
440 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
441 }
442
443 break;
444 }
445 }
446
447 /*
448 * rw_tryenter:
449 *
450 * Try to acquire a rwlock.
451 */
452 int
453 rw_tryenter(krwlock_t *rw, const krw_t op)
454 {
455 uintptr_t curthread, owner, incr, need_wait;
456
457 curthread = (uintptr_t)curlwp;
458 RW_ASSERT(rw, curthread != 0);
459
460 if (op == RW_READER) {
461 incr = RW_READ_INCR;
462 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
463 } else {
464 RW_DASSERT(rw, op == RW_WRITER);
465 incr = curthread | RW_WRITE_LOCKED;
466 need_wait = RW_WRITE_LOCKED | RW_THREAD;
467 }
468
469 for (;;) {
470 owner = rw->rw_owner;
471 if ((owner & need_wait) == 0) {
472 if (RW_ACQUIRE(rw, owner, owner + incr)) {
473 /* Got it! */
474 break;
475 }
476 continue;
477 }
478 return 0;
479 }
480
481 RW_LOCKED(rw, op);
482 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
483 (op == RW_READER && RW_COUNT(rw) != 0));
484 return 1;
485 }
486
487 /*
488 * rw_downgrade:
489 *
490 * Downgrade a write lock to a read lock.
491 */
492 void
493 rw_downgrade(krwlock_t *rw)
494 {
495 uintptr_t owner, curthread;
496
497 curthread = (uintptr_t)curlwp;
498 RW_ASSERT(rw, curthread != 0);
499 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
500 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
501 RW_UNLOCKED(rw, RW_WRITER);
502
503 for (;;) {
504 owner = rw->rw_owner;
505
506 /* If there are waiters we need to do this the hard way. */
507 if ((owner & RW_HAS_WAITERS) != 0) {
508 if (!RW_RELEASE(rw, owner, owner | RW_DOWNGRADING))
509 continue;
510 rw_vector_exit(rw);
511 break;
512 }
513
514 /*
515 * Try swapping us down to one read hold. If it fails, the
516 * lock condition has changed and we most likely now have
517 * waiters.
518 */
519 if (RW_RELEASE(rw, owner, RW_READ_INCR))
520 break;
521 }
522
523 RW_LOCKED(rw, RW_READER);
524 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
525 RW_DASSERT(rw, RW_COUNT(rw) != 0);
526 }
527
528 /*
529 * rw_tryupgrade:
530 *
531 * Try to upgrade a read lock to a write lock. We must be the
532 * only reader.
533 */
534 int
535 rw_tryupgrade(krwlock_t *rw)
536 {
537 uintptr_t owner, curthread, new;
538
539 curthread = (uintptr_t)curlwp;
540 RW_ASSERT(rw, curthread != 0);
541
542 for (;;) {
543 owner = rw->rw_owner;
544 RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
545 if ((owner & RW_THREAD) != RW_READ_INCR) {
546 RW_ASSERT(rw, (owner & RW_THREAD) != 0);
547 return 0;
548 }
549 new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
550 if (RW_ACQUIRE(rw, owner, new))
551 break;
552 }
553
554 RW_LOCKED(rw, RW_WRITER);
555 RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
556 RW_DASSERT(rw, RW_OWNER(rw) == curthread);
557
558 return 1;
559 }
560
561 /*
562 * rw_read_held:
563 *
564 * Returns true if the rwlock is held for reading. Must only be
565 * used for diagnostic assertions, and never be used to make
566 * decisions about how to use a rwlock.
567 */
568 int
569 rw_read_held(krwlock_t *rw)
570 {
571 uintptr_t owner;
572
573 if (panicstr != NULL)
574 return 1;
575
576 owner = rw->rw_owner;
577 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
578 }
579
580 /*
581 * rw_write_held:
582 *
583 * Returns true if the rwlock is held for writing. Must only be
584 * used for diagnostic assertions, and never be used to make
585 * decisions about how to use a rwlock.
586 */
587 int
588 rw_write_held(krwlock_t *rw)
589 {
590
591 if (panicstr != NULL)
592 return 1;
593
594 return (rw->rw_owner & RW_WRITE_LOCKED) != 0;
595 }
596
597 /*
598 * rw_lock_held:
599 *
600 * Returns true if the rwlock is held for reading or writing. Must
601 * only be used for diagnostic assertions, and never be used to make
602 * decisions about how to use a rwlock.
603 */
604 int
605 rw_lock_held(krwlock_t *rw)
606 {
607
608 if (panicstr != NULL)
609 return 1;
610
611 return (rw->rw_owner & RW_THREAD) != 0;
612 }
613