kern_rwlock.c revision 1.1.36.3 1 /* $NetBSD: kern_rwlock.c,v 1.1.36.3 2006/10/20 19:45:13 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Kernel reader/writer lock implementation, modeled after those
41 * found in Solaris, a description of which can be found in:
42 *
43 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
44 * Richard McDougall.
45 */
46
47 #include "opt_multiprocessor.h"
48
49 #include <sys/cdefs.h>
50 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.1.36.3 2006/10/20 19:45:13 ad Exp $");
51
52 #define __RWLOCK_PRIVATE
53
54 #include <sys/param.h>
55 #include <sys/proc.h>
56 #include <sys/rwlock.h>
57 #include <sys/sched.h>
58 #include <sys/sleepq.h>
59 #include <sys/systm.h>
60 #include <sys/lockdebug.h>
61
62 #include <dev/lockstat.h>
63
64 #define RW_ABORT(rw, msg) \
65 LOCKDEBUG_ABORT(RW_GETID(rw), rw, &rwlock_lockops, __FUNCTION__, msg)
66
67 /*
68 * LOCKDEBUG
69 */
70
71 #if defined(LOCKDEBUG)
72
73 #define RW_LOCKED(rw, op) \
74 do { \
75 LOCKDEBUG_LOCKED(RW_GETID(rw), \
76 (uintptr_t)__builtin_return_address(0), op == RW_READER); \
77 } while (/* CONSTCOND */ 0)
78
79 #define RW_UNLOCKED(rw, op) \
80 do { \
81 LOCKDEBUG_UNLOCKED(RW_GETID(rw), \
82 (uintptr_t)__builtin_return_address(0), op == RW_READER); \
83 } while (/* CONSTCOND */ 0)
84
85 #define RW_DASSERT(rw, cond) \
86 do { \
87 if (!(cond)) \
88 RW_ABORT(rw, "assertion failed: " #cond); \
89 } while (/* CONSTCOND */ 0);
90
91 #else /* LOCKDEBUG */
92
93 #define RW_LOCKED(rw, op) /* nothing */
94 #define RW_UNLOCKED(rw, op) /* nothing */
95 #define RW_DASSERT(rw, cond) /* nothing */
96
97 #endif /* LOCKDEBUG */
98
99 /*
100 * DIAGNOSTIC
101 */
102
103 #if defined(DIAGNOSTIC)
104
105 #define RW_ASSERT(rw, cond) \
106 do { \
107 if (!(cond)) \
108 RW_ABORT(rw, "assertion failed: " #cond); \
109 } while (/* CONSTCOND */ 0)
110
111 #else
112
113 #define RW_ASSERT(rw, cond) /* nothing */
114
115 #endif /* DIAGNOSTIC */
116
117 int rw_dump(void *, char *, size_t);
118
119 lockops_t rwlock_lockops = {
120 "Reader / writer lock",
121 rw_dump
122 };
123
124 /*
125 * rw_dump:
126 *
127 * Dump the contents of a rwlock structure.
128 */
129 int
130 rw_dump(void *cookie, char *buf, size_t l)
131 {
132 krwlock_t *rw = cookie;
133
134 return snprintf(buf, l, "owner/count: 0x%16lx flags : 0x%16x\n",
135 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
136 }
137
138 /*
139 * rw_init:
140 *
141 * Initialize a rwlock for use.
142 */
143 void
144 rw_init(krwlock_t *rw)
145 {
146 u_int id;
147
148 memset(rw, 0, sizeof(*rw));
149
150 id = LOCKDEBUG_ALLOC(rw, &rwlock_lockops, 1);
151 RW_SETID(rw, id);
152 }
153
154 /*
155 * rw_destroy:
156 *
157 * Tear down a rwlock.
158 */
159 void
160 rw_destroy(krwlock_t *rw)
161 {
162
163 LOCKDEBUG_FREE(rw, RW_GETID(rw));
164 RW_ASSERT(rw, rw->rw_owner == 0);
165 }
166
167 /*
168 * rw_vector_enter:
169 *
170 * Acquire a rwlock.
171 */
172 void
173 rw_vector_enter(krwlock_t *rw, krw_t op)
174 {
175 uintptr_t owner, incr, need_wait, set_wait, curthread;
176 turnstile_t *ts;
177 int queue;
178 LOCKSTAT_TIMER(slptime);
179
180 curthread = (uintptr_t)curlwp;
181 RW_ASSERT(rw, curthread != 0);
182
183 #ifdef LOCKDEBUG
184 if (panicstr == NULL)
185 simple_lock_only_held(NULL, "rw_enter");
186 #endif
187
188 /*
189 * We play a slight trick here. If we're a reader, we want
190 * increment the read count. If we're a writer, we want to
191 * set the owner field and whe WRITE_LOCKED bit.
192 *
193 * In the latter case, we expect those bits to be zero,
194 * therefore we can use an add operation to set them, which
195 * means an add operation for both cases.
196 */
197 if (op == RW_READER) {
198 incr = RW_READ_INCR;
199 set_wait = RW_HAS_WAITERS;
200 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
201 queue = TS_READER_Q;
202 } else {
203 RW_DASSERT(rw, op == RW_WRITER);
204 incr = curthread | RW_WRITE_LOCKED;
205 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
206 need_wait = RW_WRITE_LOCKED | RW_THREAD;
207 queue = TS_WRITER_Q;
208 }
209
210 for (;;) {
211 /*
212 * Read the lock owner field. If the need-to-wait
213 * indicator is clear, then try to acquire the lock.
214 */
215 owner = rw->rw_owner;
216 if ((owner & need_wait) == 0) {
217 if (RW_ACQUIRE(rw, owner, owner + incr)) {
218 /* Got it! */
219 break;
220 }
221
222 /*
223 * Didn't get it -- spin around again (we'll
224 * probably sleep on the next iteration).
225 */
226 continue;
227 }
228
229 if (panicstr != NULL)
230 return;
231 if (RW_OWNER(rw) == curthread)
232 RW_ABORT(rw, "locking against myself");
233
234 /*
235 * Grab the turnstile chain lock. Once we have that, we
236 * can adjust the waiter bits and sleep queue.
237 */
238 ts = turnstile_lookup(rw);
239
240 /*
241 * Mark the rwlock as having waiters. If the set fails,
242 * then we may not need to sleep and should spin again.
243 */
244 if (!RW_SET_WAITERS(rw, need_wait, set_wait)) {
245 turnstile_exit(rw);
246 continue;
247 }
248
249 LOCKSTAT_START_TIMER(slptime);
250
251 turnstile_block(ts, queue, sched_kpri(curlwp), rw);
252
253 LOCKSTAT_STOP_TIMER(slptime);
254 LOCKSTAT_EVENT(rw, LB_ADAPTIVE_RWLOCK | LB_SLEEP, 1, slptime);
255
256 /* If we wake up and arrive here, we've been handed the lock. */
257 RW_RECEIVE(rw);
258 break;
259 }
260
261 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
262 (op == RW_READER && RW_COUNT(rw) != 0));
263 }
264
265 /*
266 * rw_vector_exit:
267 *
268 * Release a rwlock.
269 */
270 void
271 rw_vector_exit(krwlock_t *rw, krw_t op)
272 {
273 uintptr_t curthread, owner, decr, new;
274 turnstile_t *ts;
275 int rcnt, wcnt, dcnt;
276 struct lwp *l;
277
278 curthread = (uintptr_t)curlwp;
279 RW_ASSERT(rw, curthread != 0);
280
281 if (panicstr != NULL) {
282 /*
283 * XXX What's the correct thing to do here? We should at least
284 * release the lock.
285 */
286 return;
287 }
288
289 /*
290 * Again, we use a trick. Since we used an add operation to
291 * set the required lock bits, we can use a subtract to clear
292 * them, which makes the read-release and write-release path
293 * the same.
294 */
295 switch (op) {
296 case RW_READER:
297 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
298 RW_ASSERT(rw, RW_COUNT(rw) != 0);
299 dcnt = 0;
300 decr = RW_READ_INCR;
301 break;
302 case RW_WRITER:
303 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
304 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
305 dcnt = 0;
306 decr = curthread | RW_WRITE_LOCKED;
307 break;
308 case __RW_DOWNGRADE:
309 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
310 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
311 dcnt = 1;
312 decr = (curthread | RW_WRITE_LOCKED) - RW_READ_INCR;
313 break;
314 default:
315 RW_DASSERT(rw, "blame gcc, I do");
316 return;
317 }
318
319 for (;;) {
320 /*
321 * We assume that the caller has already tried to release
322 * the lock and optimize for the 'has waiters' case, and so
323 * grab the turnstile chain lock. This gets the interlock
324 * on the sleep queue. Once we have that, we can adjust the
325 * waiter bits.
326 */
327 ts = turnstile_lookup(rw);
328
329 /*
330 * Compute what we expect the new value of the lock to be.
331 * Only proceed to do direct handoff if there are waiters,
332 * and if the lock would become unowned.
333 */
334 owner = rw->rw_owner;
335 new = (owner - decr) & ~RW_WRITE_WANTED;
336 if ((new & (RW_THREAD | RW_HAS_WAITERS)) != RW_HAS_WAITERS) {
337 if (RW_RELEASE(rw, owner, new)) {
338 turnstile_exit(rw);
339 break;
340 }
341 turnstile_exit(rw);
342 continue;
343 }
344
345 /*
346 * Adjust the waiter bits. If we are releasing a write
347 * lock or downgrading a write lock to read, then wake all
348 * outstanding readers. If we are releasing a read lock,
349 * then wake one writer.
350 */
351 RW_DASSERT(rw, ts != NULL);
352
353 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
354 rcnt = TS_WAITERS(ts, TS_READER_Q);
355
356 /*
357 * Give the lock away.
358 */
359 if (dcnt == 0 &&
360 (rcnt == 0 || (op == RW_READER && wcnt != 0))) {
361 RW_DASSERT(rw, wcnt != 0);
362
363 /*
364 * Give the lock to the longest waiting
365 * writer.
366 */
367 l = TS_FIRST(ts, TS_WRITER_Q);
368 new = (uintptr_t)l | RW_WRITE_LOCKED;
369
370 if (wcnt > 1)
371 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
372 else if (rcnt != 0)
373 new |= RW_HAS_WAITERS;
374
375 if (!RW_RELEASE(rw, owner, new)) {
376 /* Oops, try again. */
377 turnstile_exit(rw);
378 continue;
379 }
380
381 /* Wake the writer. */
382 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, l);
383 } else {
384 dcnt += rcnt;
385 RW_DASSERT(rw, dcnt != 0);
386
387 /*
388 * Give the lock to all blocked readers. We may
389 * retain one read hold if downgrading. If there
390 * is a writer waiting, new readers will be blocked
391 * out.
392 */
393 new = dcnt << RW_READ_COUNT_SHIFT;
394 if (wcnt != 0)
395 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
396 if (!RW_RELEASE(rw, owner, new)) {
397 /* Oops, try again. */
398 turnstile_exit(rw);
399 continue;
400 }
401
402 /* Wake up all sleeping readers. */
403 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
404 }
405
406 break;
407 }
408 }
409
410 /*
411 * rw_tryenter:
412 *
413 * Try to acquire a rwlock.
414 */
415 int
416 rw_tryenter(krwlock_t *rw, krw_t op)
417 {
418 uintptr_t curthread, owner, incr, need_wait;
419
420 curthread = (uintptr_t)curlwp;
421 RW_ASSERT(rw, curthread != 0);
422
423 if (op == RW_READER) {
424 incr = RW_READ_INCR;
425 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
426 } else {
427 RW_DASSERT(rw, op == RW_WRITER);
428 incr = curthread | RW_WRITE_LOCKED;
429 need_wait = RW_WRITE_LOCKED | RW_THREAD;
430 }
431
432 for (;;) {
433 owner = rw->rw_owner;
434 if ((owner & need_wait) == 0) {
435 if (RW_ACQUIRE(rw, owner, owner + incr)) {
436 /* Got it! */
437 break;
438 }
439 continue;
440 }
441 return 0;
442 }
443
444 RW_LOCKED(rw, op);
445 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
446 (op == RW_READER && RW_COUNT(rw) != 0));
447 return 1;
448 }
449
450 /*
451 * rw_downgrade:
452 *
453 * Downgrade a write lock to a read lock.
454 */
455 void
456 rw_downgrade(krwlock_t *rw)
457 {
458 uintptr_t owner, curthread;
459
460 curthread = (uintptr_t)curlwp;
461 RW_ASSERT(rw, curthread != 0);
462 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
463 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
464 RW_UNLOCKED(rw, RW_WRITER);
465
466 for (;;) {
467 owner = rw->rw_owner;
468
469 /* If there are waiters we need to do this the hard way. */
470 if ((owner & RW_HAS_WAITERS) != 0) {
471 rw_vector_exit(rw, __RW_DOWNGRADE);
472 return;
473 }
474
475 /*
476 * Try swapping us down to one read hold. If it fails, the
477 * lock condition has changed and we most likely now have
478 * waiters.
479 */
480 if (RW_RELEASE(rw, owner, RW_READ_INCR))
481 break;
482 }
483
484 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
485 RW_DASSERT(rw, RW_COUNT(rw) != 0);
486 }
487
488 /*
489 * rw_tryupgrade:
490 *
491 * Try to upgrade a read lock to a write lock. We must be the
492 * only reader.
493 */
494 int
495 rw_tryupgrade(krwlock_t *rw)
496 {
497 uintptr_t owner, curthread, new;
498
499 curthread = (uintptr_t)curlwp;
500 RW_ASSERT(rw, curthread != 0);
501
502 for (;;) {
503 owner = rw->rw_owner;
504 RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
505 if ((owner & RW_THREAD) != RW_READ_INCR) {
506 RW_ASSERT(rw, (owner & RW_THREAD) != 0);
507 return 0;
508 }
509 new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
510 if (RW_ACQUIRE(rw, owner, new))
511 break;
512 }
513
514 RW_LOCKED(rw, RW_WRITER);
515 RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
516 RW_DASSERT(rw, RW_OWNER(rw) == curthread);
517
518 return 1;
519 }
520
521 /*
522 * rw_read_held:
523 *
524 * Returns true if the rwlock is held for reading. Must only be
525 * used for diagnostic assertions, and never be used to make
526 * decisions about how to use a rwlock.
527 */
528 int
529 rw_read_held(krwlock_t *rw)
530 {
531 uintptr_t owner;
532
533 if (panicstr != NULL)
534 return 1;
535
536 owner = rw->rw_owner;
537 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
538 }
539
540 /*
541 * rw_write_held:
542 *
543 * Returns true if the rwlock is held for writing. Must only be
544 * used for diagnostic assertions, and never be used to make
545 * decisions about how to use a rwlock.
546 */
547 int
548 rw_write_held(krwlock_t *rw)
549 {
550
551 if (panicstr != NULL)
552 return 1;
553
554 return (rw->rw_owner & RW_WRITE_LOCKED) != 0;
555 }
556
557 /*
558 * Slow stubs for platforms that do not implement fast-path ones.
559 */
560 #ifndef __HAVE_RW_ENTER
561 void
562 rw_enter(krwlock_t *rw, krw_t op)
563 {
564 rw_vector_enter(rw, op);
565 RW_LOCKED(rw, op);
566 }
567 #endif
568
569 #ifndef __HAVE_RW_EXIT
570 void
571 rw_exit(krwlock_t *rw)
572 {
573 krw_t op;
574 op = ((rw->rw_owner & RW_WRITE_LOCKED) ? RW_WRITER : RW_READER);
575 RW_UNLOCKED(rw, op);
576 rw_vector_exit(rw, op);
577 }
578 #endif
579