kern_rwlock.c revision 1.1.36.4 1 /* $NetBSD: kern_rwlock.c,v 1.1.36.4 2006/11/17 16:34:36 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Kernel reader/writer lock implementation, modeled after those
41 * found in Solaris, a description of which can be found in:
42 *
43 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
44 * Richard McDougall.
45 */
46
47 #include "opt_multiprocessor.h"
48
49 #include <sys/cdefs.h>
50 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.1.36.4 2006/11/17 16:34:36 ad Exp $");
51
52 #define __RWLOCK_PRIVATE
53
54 #include <sys/param.h>
55 #include <sys/proc.h>
56 #include <sys/rwlock.h>
57 #include <sys/sched.h>
58 #include <sys/sleepq.h>
59 #include <sys/systm.h>
60 #include <sys/lockdebug.h>
61
62 #include <dev/lockstat.h>
63
64 #define RW_ABORT(rw, msg) \
65 LOCKDEBUG_ABORT(RW_GETID(rw), rw, &rwlock_lockops, __FUNCTION__, msg)
66
67 /*
68 * LOCKDEBUG
69 */
70
71 #if defined(LOCKDEBUG)
72
73 #define RW_LOCKED(rw, op) \
74 do { \
75 LOCKDEBUG_LOCKED(RW_GETID(rw), \
76 (uintptr_t)__builtin_return_address(0), op == RW_READER); \
77 } while (/* CONSTCOND */ 0)
78
79 #define RW_UNLOCKED(rw, op) \
80 do { \
81 LOCKDEBUG_UNLOCKED(RW_GETID(rw), \
82 (uintptr_t)__builtin_return_address(0), op == RW_READER); \
83 } while (/* CONSTCOND */ 0)
84
85 #define RW_DASSERT(rw, cond) \
86 do { \
87 if (!(cond)) \
88 RW_ABORT(rw, "assertion failed: " #cond); \
89 } while (/* CONSTCOND */ 0);
90
91 #else /* LOCKDEBUG */
92
93 #define RW_LOCKED(rw, op) /* nothing */
94 #define RW_UNLOCKED(rw, op) /* nothing */
95 #define RW_DASSERT(rw, cond) /* nothing */
96
97 #endif /* LOCKDEBUG */
98
99 /*
100 * DIAGNOSTIC
101 */
102
103 #if defined(DIAGNOSTIC)
104
105 #define RW_ASSERT(rw, cond) \
106 do { \
107 if (!(cond)) \
108 RW_ABORT(rw, "assertion failed: " #cond); \
109 } while (/* CONSTCOND */ 0)
110
111 #else
112
113 #define RW_ASSERT(rw, cond) /* nothing */
114
115 #endif /* DIAGNOSTIC */
116
117 int rw_dump(volatile void *, char *, size_t);
118
119 lockops_t rwlock_lockops = {
120 "Reader / writer lock",
121 1,
122 rw_dump
123 };
124
125 /*
126 * rw_dump:
127 *
128 * Dump the contents of a rwlock structure.
129 */
130 int
131 rw_dump(volatile void *cookie, char *buf, size_t l)
132 {
133 volatile krwlock_t *rw = cookie;
134
135 return snprintf(buf, l, "owner/count : 0x%16lx flags : 0x%16x\n",
136 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
137 }
138
139 /*
140 * rw_init:
141 *
142 * Initialize a rwlock for use.
143 */
144 void
145 rw_init(krwlock_t *rw)
146 {
147 u_int id;
148
149 memset(rw, 0, sizeof(*rw));
150
151 id = LOCKDEBUG_ALLOC(rw, &rwlock_lockops);
152 RW_SETID(rw, id);
153 }
154
155 /*
156 * rw_destroy:
157 *
158 * Tear down a rwlock.
159 */
160 void
161 rw_destroy(krwlock_t *rw)
162 {
163
164 LOCKDEBUG_FREE(rw, RW_GETID(rw));
165 RW_ASSERT(rw, rw->rw_owner == 0);
166 }
167
168 /*
169 * rw_vector_enter:
170 *
171 * Acquire a rwlock.
172 */
173 void
174 #ifdef __NEED_RW_CALLSITE
175 rw_vector_enter(krwlock_t *rw, krw_t op, uintptr_t callsite)
176 #else
177 rw_vector_enter(krwlock_t *rw, krw_t op)
178 #endif
179 {
180 uintptr_t owner, incr, need_wait, set_wait, curthread;
181 turnstile_t *ts;
182 int queue;
183 LOCKSTAT_TIMER(slptime);
184 struct lwp *l;
185
186 l = curlwp;
187 curthread = (uintptr_t)l;
188 RW_ASSERT(rw, curthread != 0);
189
190 #ifdef LOCKDEBUG
191 if (panicstr == NULL) {
192 simple_lock_only_held(NULL, "rw_enter");
193 LOCKDEBUG_BARRIER(&kernel_lock, 1);
194 }
195 #endif
196
197 /*
198 * We play a slight trick here. If we're a reader, we want
199 * increment the read count. If we're a writer, we want to
200 * set the owner field and whe WRITE_LOCKED bit.
201 *
202 * In the latter case, we expect those bits to be zero,
203 * therefore we can use an add operation to set them, which
204 * means an add operation for both cases.
205 */
206 if (op == RW_READER) {
207 incr = RW_READ_INCR;
208 set_wait = RW_HAS_WAITERS;
209 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
210 queue = TS_READER_Q;
211 } else {
212 RW_DASSERT(rw, op == RW_WRITER);
213 incr = curthread | RW_WRITE_LOCKED;
214 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
215 need_wait = RW_WRITE_LOCKED | RW_THREAD;
216 queue = TS_WRITER_Q;
217 }
218
219 for (;;) {
220 /*
221 * Read the lock owner field. If the need-to-wait
222 * indicator is clear, then try to acquire the lock.
223 */
224 owner = rw->rw_owner;
225 if ((owner & need_wait) == 0) {
226 if (RW_ACQUIRE(rw, owner, owner + incr)) {
227 /* Got it! */
228 break;
229 }
230
231 /*
232 * Didn't get it -- spin around again (we'll
233 * probably sleep on the next iteration).
234 */
235 continue;
236 }
237
238 if (panicstr != NULL)
239 return;
240 if (RW_OWNER(rw) == curthread)
241 RW_ABORT(rw, "locking against myself");
242
243 /*
244 * Grab the turnstile chain lock. Once we have that, we
245 * can adjust the waiter bits and sleep queue.
246 */
247 ts = turnstile_lookup(rw);
248
249 /*
250 * Mark the rwlock as having waiters. If the set fails,
251 * then we may not need to sleep and should spin again.
252 */
253 if (!RW_SET_WAITERS(rw, need_wait, set_wait)) {
254 turnstile_exit(rw);
255 continue;
256 }
257
258 LOCKSTAT_START_TIMER(slptime);
259
260 turnstile_block(ts, queue, sched_kpri(l), rw);
261
262 /* If we wake up and arrive here, we've been handed the lock. */
263 RW_RECEIVE(rw);
264
265 LOCKSTAT_STOP_TIMER(slptime);
266 #ifdef __NEED_RW_CALLSITE
267 LOCKSTAT_EVENT_RA(rw, LB_RWLOCK | LB_SLEEP, 1, slptime, callsite);
268 #else
269 LOCKSTAT_EVENT(rw, LB_RWLOCK | LB_SLEEP, 1, slptime);
270 #endif
271
272 turnstile_unblock();
273 break;
274 }
275
276 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
277 (op == RW_READER && RW_COUNT(rw) != 0));
278 }
279
280 /*
281 * rw_vector_exit:
282 *
283 * Release a rwlock.
284 */
285 void
286 rw_vector_exit(krwlock_t *rw, krw_t op)
287 {
288 uintptr_t curthread, owner, decr, new;
289 turnstile_t *ts;
290 int rcnt, wcnt, dcnt;
291 struct lwp *l;
292
293 curthread = (uintptr_t)curlwp;
294 RW_ASSERT(rw, curthread != 0);
295
296 if (panicstr != NULL) {
297 /*
298 * XXX What's the correct thing to do here? We should at least
299 * release the lock.
300 */
301 return;
302 }
303
304 /*
305 * Again, we use a trick. Since we used an add operation to
306 * set the required lock bits, we can use a subtract to clear
307 * them, which makes the read-release and write-release path
308 * the same.
309 */
310 switch (op) {
311 case RW_READER:
312 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
313 RW_ASSERT(rw, RW_COUNT(rw) != 0);
314 dcnt = 0;
315 decr = RW_READ_INCR;
316 break;
317 case RW_WRITER:
318 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
319 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
320 dcnt = 0;
321 decr = curthread | RW_WRITE_LOCKED;
322 break;
323 case __RW_DOWNGRADE:
324 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
325 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
326 dcnt = 1;
327 decr = (curthread | RW_WRITE_LOCKED) - RW_READ_INCR;
328 break;
329 default:
330 RW_DASSERT(rw, "blame gcc, I do");
331 return;
332 }
333
334 for (;;) {
335 /*
336 * We assume that the caller has already tried to release
337 * the lock and optimize for the 'has waiters' case, and so
338 * grab the turnstile chain lock. This gets the interlock
339 * on the sleep queue. Once we have that, we can adjust the
340 * waiter bits.
341 */
342 ts = turnstile_lookup(rw);
343
344 /*
345 * Compute what we expect the new value of the lock to be.
346 * Only proceed to do direct handoff if there are waiters,
347 * and if the lock would become unowned.
348 */
349 owner = rw->rw_owner;
350 new = (owner - decr) & ~RW_WRITE_WANTED;
351 if ((new & (RW_THREAD | RW_HAS_WAITERS)) != RW_HAS_WAITERS) {
352 if (RW_RELEASE(rw, owner, new)) {
353 turnstile_exit(rw);
354 break;
355 }
356 turnstile_exit(rw);
357 continue;
358 }
359
360 /*
361 * Adjust the waiter bits. If we are releasing a write
362 * lock or downgrading a write lock to read, then wake all
363 * outstanding readers. If we are releasing a read lock,
364 * then wake one writer.
365 */
366 RW_DASSERT(rw, ts != NULL);
367
368 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
369 rcnt = TS_WAITERS(ts, TS_READER_Q);
370
371 /*
372 * Give the lock away.
373 */
374 if (dcnt == 0 &&
375 (rcnt == 0 || (op == RW_READER && wcnt != 0))) {
376 RW_DASSERT(rw, wcnt != 0);
377
378 /*
379 * Give the lock to the longest waiting
380 * writer.
381 */
382 l = TS_FIRST(ts, TS_WRITER_Q);
383 new = (uintptr_t)l | RW_WRITE_LOCKED;
384
385 if (wcnt > 1)
386 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
387 else if (rcnt != 0)
388 new |= RW_HAS_WAITERS;
389
390 if (!RW_RELEASE(rw, owner, new)) {
391 /* Oops, try again. */
392 turnstile_exit(rw);
393 continue;
394 }
395
396 /* Wake the writer. */
397 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, l);
398 } else {
399 dcnt += rcnt;
400 RW_DASSERT(rw, dcnt != 0);
401
402 /*
403 * Give the lock to all blocked readers. We may
404 * retain one read hold if downgrading. If there
405 * is a writer waiting, new readers will be blocked
406 * out.
407 */
408 new = dcnt << RW_READ_COUNT_SHIFT;
409 if (wcnt != 0)
410 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
411 if (!RW_RELEASE(rw, owner, new)) {
412 /* Oops, try again. */
413 turnstile_exit(rw);
414 continue;
415 }
416
417 /* Wake up all sleeping readers. */
418 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
419 }
420
421 break;
422 }
423 }
424
425 /*
426 * rw_tryenter:
427 *
428 * Try to acquire a rwlock.
429 */
430 int
431 rw_tryenter(krwlock_t *rw, krw_t op)
432 {
433 uintptr_t curthread, owner, incr, need_wait;
434
435 curthread = (uintptr_t)curlwp;
436 RW_ASSERT(rw, curthread != 0);
437
438 if (op == RW_READER) {
439 incr = RW_READ_INCR;
440 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
441 } else {
442 RW_DASSERT(rw, op == RW_WRITER);
443 incr = curthread | RW_WRITE_LOCKED;
444 need_wait = RW_WRITE_LOCKED | RW_THREAD;
445 }
446
447 for (;;) {
448 owner = rw->rw_owner;
449 if ((owner & need_wait) == 0) {
450 if (RW_ACQUIRE(rw, owner, owner + incr)) {
451 /* Got it! */
452 break;
453 }
454 continue;
455 }
456 return 0;
457 }
458
459 RW_LOCKED(rw, op);
460 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
461 (op == RW_READER && RW_COUNT(rw) != 0));
462 return 1;
463 }
464
465 /*
466 * rw_downgrade:
467 *
468 * Downgrade a write lock to a read lock.
469 */
470 void
471 rw_downgrade(krwlock_t *rw)
472 {
473 uintptr_t owner, curthread;
474
475 curthread = (uintptr_t)curlwp;
476 RW_ASSERT(rw, curthread != 0);
477 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
478 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
479 RW_UNLOCKED(rw, RW_WRITER);
480
481 for (;;) {
482 owner = rw->rw_owner;
483
484 /* If there are waiters we need to do this the hard way. */
485 if ((owner & RW_HAS_WAITERS) != 0) {
486 rw_vector_exit(rw, __RW_DOWNGRADE);
487 return;
488 }
489
490 /*
491 * Try swapping us down to one read hold. If it fails, the
492 * lock condition has changed and we most likely now have
493 * waiters.
494 */
495 if (RW_RELEASE(rw, owner, RW_READ_INCR))
496 break;
497 }
498
499 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
500 RW_DASSERT(rw, RW_COUNT(rw) != 0);
501 }
502
503 /*
504 * rw_tryupgrade:
505 *
506 * Try to upgrade a read lock to a write lock. We must be the
507 * only reader.
508 */
509 int
510 rw_tryupgrade(krwlock_t *rw)
511 {
512 uintptr_t owner, curthread, new;
513
514 curthread = (uintptr_t)curlwp;
515 RW_ASSERT(rw, curthread != 0);
516
517 for (;;) {
518 owner = rw->rw_owner;
519 RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
520 if ((owner & RW_THREAD) != RW_READ_INCR) {
521 RW_ASSERT(rw, (owner & RW_THREAD) != 0);
522 return 0;
523 }
524 new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
525 if (RW_ACQUIRE(rw, owner, new))
526 break;
527 }
528
529 RW_LOCKED(rw, RW_WRITER);
530 RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
531 RW_DASSERT(rw, RW_OWNER(rw) == curthread);
532
533 return 1;
534 }
535
536 /*
537 * rw_read_held:
538 *
539 * Returns true if the rwlock is held for reading. Must only be
540 * used for diagnostic assertions, and never be used to make
541 * decisions about how to use a rwlock.
542 */
543 int
544 rw_read_held(krwlock_t *rw)
545 {
546 uintptr_t owner;
547
548 if (panicstr != NULL)
549 return 1;
550
551 owner = rw->rw_owner;
552 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
553 }
554
555 /*
556 * rw_write_held:
557 *
558 * Returns true if the rwlock is held for writing. Must only be
559 * used for diagnostic assertions, and never be used to make
560 * decisions about how to use a rwlock.
561 */
562 int
563 rw_write_held(krwlock_t *rw)
564 {
565
566 if (panicstr != NULL)
567 return 1;
568
569 return (rw->rw_owner & RW_WRITE_LOCKED) != 0;
570 }
571
572 /*
573 * rw_lock_held:
574 *
575 * Returns true if the rwlock is held for reading or writing. Must
576 * only be used for diagnostic assertions, and never be used to make
577 * decisions about how to use a rwlock.
578 */
579 int
580 rw_lock_held(krwlock_t *rw)
581 {
582
583 if (panicstr != NULL)
584 return 1;
585
586 return (rw->rw_owner & RW_THREAD) != 0;
587 }
588
589 /*
590 * Slow stubs for platforms that do not implement fast-path ones.
591 */
592 #ifndef __HAVE_RW_ENTER
593 void
594 rw_enter(krwlock_t *rw, krw_t op)
595 {
596 rw_vector_enter(rw, op, (uintptr_t)__builtin_return_address(0));
597 RW_LOCKED(rw, op);
598 }
599 #endif
600
601 #ifndef __HAVE_RW_EXIT
602 void
603 rw_exit(krwlock_t *rw)
604 {
605 krw_t op;
606 op = ((rw->rw_owner & RW_WRITE_LOCKED) ? RW_WRITER : RW_READER);
607 RW_UNLOCKED(rw, op);
608 rw_vector_exit(rw, op);
609 }
610 #endif
611