kern_rwlock.c revision 1.1.36.5 1 /* $NetBSD: kern_rwlock.c,v 1.1.36.5 2006/12/29 20:27:44 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2002, 2006 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe and Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * Kernel reader/writer lock implementation, modeled after those
41 * found in Solaris, a description of which can be found in:
42 *
43 * Solaris Internals: Core Kernel Architecture, Jim Mauro and
44 * Richard McDougall.
45 */
46
47 #include "opt_multiprocessor.h"
48
49 #include <sys/cdefs.h>
50 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.1.36.5 2006/12/29 20:27:44 ad Exp $");
51
52 #define __RWLOCK_PRIVATE
53
54 #include <sys/param.h>
55 #include <sys/proc.h>
56 #include <sys/rwlock.h>
57 #include <sys/sched.h>
58 #include <sys/sleepq.h>
59 #include <sys/systm.h>
60 #include <sys/lockdebug.h>
61
62 #include <dev/lockstat.h>
63
64 #define RW_ABORT(rw, msg) \
65 LOCKDEBUG_ABORT(RW_GETID(rw), rw, &rwlock_lockops, __FUNCTION__, msg)
66
67 /*
68 * LOCKDEBUG
69 */
70
71 #if defined(LOCKDEBUG)
72
73 #define RW_LOCKED(rw, op) \
74 do { \
75 LOCKDEBUG_LOCKED(RW_GETID(rw), \
76 (uintptr_t)__builtin_return_address(0), op == RW_READER); \
77 } while (/* CONSTCOND */ 0)
78
79 #define RW_UNLOCKED(rw, op) \
80 do { \
81 LOCKDEBUG_UNLOCKED(RW_GETID(rw), \
82 (uintptr_t)__builtin_return_address(0), op == RW_READER); \
83 } while (/* CONSTCOND */ 0)
84
85 #define RW_DASSERT(rw, cond) \
86 do { \
87 if (!(cond)) \
88 RW_ABORT(rw, "assertion failed: " #cond); \
89 } while (/* CONSTCOND */ 0);
90
91 #else /* LOCKDEBUG */
92
93 #define RW_LOCKED(rw, op) /* nothing */
94 #define RW_UNLOCKED(rw, op) /* nothing */
95 #define RW_DASSERT(rw, cond) /* nothing */
96
97 #endif /* LOCKDEBUG */
98
99 /*
100 * DIAGNOSTIC
101 */
102
103 #if defined(DIAGNOSTIC)
104
105 #define RW_ASSERT(rw, cond) \
106 do { \
107 if (!(cond)) \
108 RW_ABORT(rw, "assertion failed: " #cond); \
109 } while (/* CONSTCOND */ 0)
110
111 #else
112
113 #define RW_ASSERT(rw, cond) /* nothing */
114
115 #endif /* DIAGNOSTIC */
116
117 /*
118 * For platforms that use 'simple' RW locks.
119 */
120 #ifdef __HAVE_SIMPLE_RW_LOCKS
121 #define RW_ACQUIRE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
122 #define RW_RELEASE(rw, old, new) RW_CAS(&(rw)->rw_owner, old, new)
123 #define RW_SETID(rw, id) ((rw)->rw_id = id)
124 #define RW_GETID(rw) ((rw)->rw_id)
125
126 static inline int
127 RW_SET_WAITERS(krwlock_t *rw, uintptr_t need, uintptr_t set)
128 {
129 uintptr_t old;
130
131 if (((old = rw->rw_owner) & need) == 0)
132 return 0;
133 return RW_CAS(&rw->rw_owner, old, old | set);
134 }
135 #endif /* __HAVE_SIMPLE_RW_LOCKS */
136
137 /*
138 * For platforms that do not provide stubs, or for the LOCKDEBUG case.
139 */
140 #ifdef LOCKDEBUG
141 #undef __HAVE_RW_STUBS
142 #endif
143
144 #ifndef __HAVE_RW_STUBS
145 __strong_alias(rw_enter, rw_vector_enter);
146
147 void
148 rw_exit(krwlock_t *rw)
149 {
150 krw_t op;
151 op = ((rw->rw_owner & RW_WRITE_LOCKED) ? RW_WRITER : RW_READER);
152 RW_UNLOCKED(rw, op);
153 rw_vector_exit(rw, op);
154 }
155 #endif
156
157 void rw_dump(volatile void *);
158
159 lockops_t rwlock_lockops = {
160 "Reader / writer lock",
161 1,
162 rw_dump
163 };
164
165 /*
166 * rw_dump:
167 *
168 * Dump the contents of a rwlock structure.
169 */
170 void
171 rw_dump(volatile void *cookie)
172 {
173 volatile krwlock_t *rw = cookie;
174
175 printf_nolog("owner/count : %#018lx flags : %#018x\n",
176 (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
177 }
178
179 /*
180 * rw_init:
181 *
182 * Initialize a rwlock for use.
183 */
184 void
185 rw_init(krwlock_t *rw)
186 {
187 u_int id;
188
189 memset(rw, 0, sizeof(*rw));
190
191 id = LOCKDEBUG_ALLOC(rw, &rwlock_lockops);
192 RW_SETID(rw, id);
193 }
194
195 /*
196 * rw_destroy:
197 *
198 * Tear down a rwlock.
199 */
200 void
201 rw_destroy(krwlock_t *rw)
202 {
203
204 LOCKDEBUG_FREE(rw, RW_GETID(rw));
205 RW_ASSERT(rw, rw->rw_owner == 0);
206 }
207
208 /*
209 * rw_vector_enter:
210 *
211 * Acquire a rwlock.
212 */
213 void
214 rw_vector_enter(krwlock_t *rw, const krw_t op)
215 {
216 uintptr_t owner, incr, need_wait, set_wait, curthread;
217 turnstile_t *ts;
218 int queue;
219 LOCKSTAT_TIMER(slptime);
220 struct lwp *l;
221
222 l = curlwp;
223 curthread = (uintptr_t)l;
224 RW_ASSERT(rw, curthread != 0);
225
226 #ifdef LOCKDEBUG
227 if (panicstr == NULL) {
228 simple_lock_only_held(NULL, "rw_enter");
229 #ifdef MULTIPROCESSOR
230 LOCKDEBUG_BARRIER(&kernel_lock, 1);
231 #else
232 LOCKDEBUG_BARRIER(NULL, 1);
233 #endif
234 }
235 #endif
236
237 /*
238 * We play a slight trick here. If we're a reader, we want
239 * increment the read count. If we're a writer, we want to
240 * set the owner field and whe WRITE_LOCKED bit.
241 *
242 * In the latter case, we expect those bits to be zero,
243 * therefore we can use an add operation to set them, which
244 * means an add operation for both cases.
245 */
246 if (op == RW_READER) {
247 incr = RW_READ_INCR;
248 set_wait = RW_HAS_WAITERS;
249 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
250 queue = TS_READER_Q;
251 } else {
252 RW_DASSERT(rw, op == RW_WRITER);
253 incr = curthread | RW_WRITE_LOCKED;
254 set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
255 need_wait = RW_WRITE_LOCKED | RW_THREAD;
256 queue = TS_WRITER_Q;
257 }
258
259 for (;;) {
260 /*
261 * Read the lock owner field. If the need-to-wait
262 * indicator is clear, then try to acquire the lock.
263 */
264 owner = rw->rw_owner;
265 if ((owner & need_wait) == 0) {
266 if (RW_ACQUIRE(rw, owner, owner + incr)) {
267 /* Got it! */
268 break;
269 }
270
271 /*
272 * Didn't get it -- spin around again (we'll
273 * probably sleep on the next iteration).
274 */
275 continue;
276 }
277
278 if (panicstr != NULL)
279 return;
280 if (RW_OWNER(rw) == curthread)
281 RW_ABORT(rw, "locking against myself");
282
283 /*
284 * Grab the turnstile chain lock. Once we have that, we
285 * can adjust the waiter bits and sleep queue.
286 */
287 ts = turnstile_lookup(rw);
288
289 /*
290 * Mark the rwlock as having waiters. If the set fails,
291 * then we may not need to sleep and should spin again.
292 */
293 if (!RW_SET_WAITERS(rw, need_wait, set_wait)) {
294 turnstile_exit(rw);
295 continue;
296 }
297
298 LOCKSTAT_START_TIMER(slptime);
299
300 turnstile_block(ts, queue, sched_kpri(l), rw);
301
302 /* If we wake up and arrive here, we've been handed the lock. */
303 RW_RECEIVE(rw);
304
305 LOCKSTAT_STOP_TIMER(slptime);
306 LOCKSTAT_EVENT(rw,
307 LB_RWLOCK | (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2),
308 1, slptime);
309
310 turnstile_unblock();
311 break;
312 }
313
314 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
315 (op == RW_READER && RW_COUNT(rw) != 0));
316 RW_LOCKED(rw, op);
317 }
318
319 /*
320 * rw_vector_exit:
321 *
322 * Release a rwlock.
323 */
324 void
325 rw_vector_exit(krwlock_t *rw, const krw_t op)
326 {
327 uintptr_t curthread, owner, decr, new;
328 turnstile_t *ts;
329 int rcnt, wcnt, dcnt;
330 struct lwp *l;
331
332 curthread = (uintptr_t)curlwp;
333 RW_ASSERT(rw, curthread != 0);
334
335 if (panicstr != NULL) {
336 /*
337 * XXX What's the correct thing to do here? We should at
338 * least release the lock.
339 */
340 return;
341 }
342
343 /*
344 * Again, we use a trick. Since we used an add operation to
345 * set the required lock bits, we can use a subtract to clear
346 * them, which makes the read-release and write-release path
347 * the same.
348 */
349 switch (op) {
350 case RW_READER:
351 RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
352 RW_ASSERT(rw, RW_COUNT(rw) != 0);
353 dcnt = 0;
354 decr = RW_READ_INCR;
355 break;
356 case RW_WRITER:
357 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
358 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
359 dcnt = 0;
360 decr = curthread | RW_WRITE_LOCKED;
361 break;
362 case __RW_DOWNGRADE:
363 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
364 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
365 dcnt = 1;
366 decr = (curthread | RW_WRITE_LOCKED) - RW_READ_INCR;
367 break;
368 default:
369 RW_DASSERT(rw, "XXXgcc");
370 return;
371 }
372
373 for (;;) {
374 /*
375 * Compute what we expect the new value of the lock to be.
376 * Only proceed to do direct handoff if there are waiters,
377 * and if the lock would become unowned.
378 */
379 owner = rw->rw_owner;
380 new = (owner - decr) & ~RW_WRITE_WANTED;
381 if ((new & (RW_THREAD | RW_HAS_WAITERS)) != RW_HAS_WAITERS) {
382 if (RW_RELEASE(rw, owner, new))
383 break;
384 continue;
385 }
386
387 /*
388 * Grab the turnstile chain lock. This gets the interlock
389 * on the sleep queue. Once we have that, we can adjust the
390 * waiter bits.
391 */
392 ts = turnstile_lookup(rw);
393
394 /*
395 * Adjust the waiter bits. If we are releasing a write
396 * lock or downgrading a write lock to read, then wake all
397 * outstanding readers. If we are releasing a read lock,
398 * then wake one writer.
399 */
400 RW_DASSERT(rw, ts != NULL);
401
402 wcnt = TS_WAITERS(ts, TS_WRITER_Q);
403 rcnt = TS_WAITERS(ts, TS_READER_Q);
404
405 /*
406 * Give the lock away.
407 */
408 if (dcnt == 0 &&
409 (rcnt == 0 || (op == RW_READER && wcnt != 0))) {
410 RW_DASSERT(rw, wcnt != 0);
411
412 /*
413 * Give the lock to the longest waiting
414 * writer.
415 */
416 l = TS_FIRST(ts, TS_WRITER_Q);
417 new = (uintptr_t)l | RW_WRITE_LOCKED;
418
419 if (wcnt > 1)
420 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
421 else if (rcnt != 0)
422 new |= RW_HAS_WAITERS;
423
424 RW_GIVE(rw);
425 if (!RW_RELEASE(rw, owner, new)) {
426 /* Oops, try again. */
427 turnstile_exit(rw);
428 continue;
429 }
430
431 /* Wake the writer. */
432 turnstile_wakeup(ts, TS_WRITER_Q, wcnt, l);
433 } else {
434 dcnt += rcnt;
435 RW_DASSERT(rw, dcnt != 0);
436
437 /*
438 * Give the lock to all blocked readers. We may
439 * retain one read hold if downgrading. If there
440 * is a writer waiting, new readers will be blocked
441 * out.
442 */
443 new = dcnt << RW_READ_COUNT_SHIFT;
444 if (wcnt != 0)
445 new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
446
447 RW_GIVE(rw);
448 if (!RW_RELEASE(rw, owner, new)) {
449 /* Oops, try again. */
450 turnstile_exit(rw);
451 continue;
452 }
453
454 /* Wake up all sleeping readers. */
455 turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
456 }
457
458 break;
459 }
460 }
461
462 /*
463 * rw_tryenter:
464 *
465 * Try to acquire a rwlock.
466 */
467 int
468 rw_tryenter(krwlock_t *rw, const krw_t op)
469 {
470 uintptr_t curthread, owner, incr, need_wait;
471
472 curthread = (uintptr_t)curlwp;
473 RW_ASSERT(rw, curthread != 0);
474
475 if (op == RW_READER) {
476 incr = RW_READ_INCR;
477 need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
478 } else {
479 RW_DASSERT(rw, op == RW_WRITER);
480 incr = curthread | RW_WRITE_LOCKED;
481 need_wait = RW_WRITE_LOCKED | RW_THREAD;
482 }
483
484 for (;;) {
485 owner = rw->rw_owner;
486 if ((owner & need_wait) == 0) {
487 if (RW_ACQUIRE(rw, owner, owner + incr)) {
488 /* Got it! */
489 break;
490 }
491 continue;
492 }
493 return 0;
494 }
495
496 RW_LOCKED(rw, op);
497 RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
498 (op == RW_READER && RW_COUNT(rw) != 0));
499 return 1;
500 }
501
502 /*
503 * rw_downgrade:
504 *
505 * Downgrade a write lock to a read lock.
506 */
507 void
508 rw_downgrade(krwlock_t *rw)
509 {
510 uintptr_t owner, curthread;
511
512 curthread = (uintptr_t)curlwp;
513 RW_ASSERT(rw, curthread != 0);
514 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
515 RW_ASSERT(rw, RW_OWNER(rw) == curthread);
516 RW_UNLOCKED(rw, RW_WRITER);
517
518 for (;;) {
519 owner = rw->rw_owner;
520
521 /* If there are waiters we need to do this the hard way. */
522 if ((owner & RW_HAS_WAITERS) != 0) {
523 rw_vector_exit(rw, __RW_DOWNGRADE);
524 break;
525 }
526
527 /*
528 * Try swapping us down to one read hold. If it fails, the
529 * lock condition has changed and we most likely now have
530 * waiters.
531 */
532 if (RW_RELEASE(rw, owner, RW_READ_INCR))
533 break;
534 }
535
536 RW_LOCKED(rw, RW_READER);
537 RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
538 RW_DASSERT(rw, RW_COUNT(rw) != 0);
539 }
540
541 /*
542 * rw_tryupgrade:
543 *
544 * Try to upgrade a read lock to a write lock. We must be the
545 * only reader.
546 */
547 int
548 rw_tryupgrade(krwlock_t *rw)
549 {
550 uintptr_t owner, curthread, new;
551
552 curthread = (uintptr_t)curlwp;
553 RW_ASSERT(rw, curthread != 0);
554
555 for (;;) {
556 owner = rw->rw_owner;
557 RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
558 if ((owner & RW_THREAD) != RW_READ_INCR) {
559 RW_ASSERT(rw, (owner & RW_THREAD) != 0);
560 return 0;
561 }
562 new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
563 if (RW_ACQUIRE(rw, owner, new))
564 break;
565 }
566
567 RW_LOCKED(rw, RW_WRITER);
568 RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
569 RW_DASSERT(rw, RW_OWNER(rw) == curthread);
570
571 return 1;
572 }
573
574 /*
575 * rw_read_held:
576 *
577 * Returns true if the rwlock is held for reading. Must only be
578 * used for diagnostic assertions, and never be used to make
579 * decisions about how to use a rwlock.
580 */
581 int
582 rw_read_held(krwlock_t *rw)
583 {
584 uintptr_t owner;
585
586 if (panicstr != NULL)
587 return 1;
588
589 owner = rw->rw_owner;
590 return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
591 }
592
593 /*
594 * rw_write_held:
595 *
596 * Returns true if the rwlock is held for writing. Must only be
597 * used for diagnostic assertions, and never be used to make
598 * decisions about how to use a rwlock.
599 */
600 int
601 rw_write_held(krwlock_t *rw)
602 {
603
604 if (panicstr != NULL)
605 return 1;
606
607 return (rw->rw_owner & RW_WRITE_LOCKED) != 0;
608 }
609
610 /*
611 * rw_lock_held:
612 *
613 * Returns true if the rwlock is held for reading or writing. Must
614 * only be used for diagnostic assertions, and never be used to make
615 * decisions about how to use a rwlock.
616 */
617 int
618 rw_lock_held(krwlock_t *rw)
619 {
620
621 if (panicstr != NULL)
622 return 1;
623
624 return (rw->rw_owner & RW_THREAD) != 0;
625 }
626