Home | History | Annotate | Line # | Download | only in kern
kern_rwlock.c revision 1.1.36.2
      1 /*	$NetBSD: kern_rwlock.c,v 1.1.36.2 2006/09/11 01:31:39 ad Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2002, 2006 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe and Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 /*
     40  * Kernel reader/writer lock implementation, modeled after those
     41  * found in Solaris, a description of which can be found in:
     42  *
     43  *	Solaris Internals: Core Kernel Architecture, Jim Mauro and
     44  *	    Richard McDougall.
     45  */
     46 
     47 #include "opt_lockdebug.h"
     48 #include "opt_multiprocessor.h"
     49 
     50 #include <sys/cdefs.h>
     51 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.1.36.2 2006/09/11 01:31:39 ad Exp $");
     52 
     53 #define	__RWLOCK_PRIVATE
     54 
     55 #include <sys/param.h>
     56 #include <sys/proc.h>
     57 #include <sys/rwlock.h>
     58 #include <sys/sched.h>
     59 #include <sys/turnstile.h>
     60 #include <sys/systm.h>
     61 
     62 #include <dev/lockstat.h>
     63 
     64 #define	RW_ABORT(rw, msg)						\
     65     rw_abort((rw), __FUNCTION__, msg)
     66 
     67 /*
     68  * LOCKDEBUG
     69  */
     70 
     71 #if defined(LOCKDEBUG)
     72 
     73 #undef __HAVE_RW_ENTER
     74 #undef __HAVE_RW_EXIT
     75 
     76 struct simplelock rw_list_lock = SIMPLELOCK_INITIALIZER;
     77 LIST_HEAD(, rwlock_debug_info) rw_list_writers;
     78 
     79 #ifdef notyet
     80 #define	RW_LOCKED(rw, op)						\
     81 do {									\
     82 	if ((op) != RW_WRITER)						\
     83 		break;							\
     84 	(rw)->rw_debug.rw_locked =					\
     85 	    (vaddr_t) __builtin_return_address(0);			\
     86 	LIST_INSERT_HEAD(&rw_list_writers, &rw->rw_debug, rw_chain);	\
     87 } while (/* CONSTCOND */ 0)
     88 
     89 #define	RW_UNLOCKED(rw)							\
     90 do {									\
     91 	(rw)->rw_debug.rw_unlocked =					\
     92 	    (vaddr_t) __builtin_return_address(0);			\
     93 	LIST_REMOVE(&rw->rw_debug, rw_chain);				\
     94 } while (/* CONSTCOND */ 0)
     95 #else	/* notyet */
     96 #define	RW_LOCKED(rw, op)	/* nothing */
     97 #define	RW_UNLOCKED(rw)		/* nothing */
     98 #endif	/* notyet */
     99 
    100 #define	RW_DASSERT(rw, cond)						\
    101 do {									\
    102 	if (!(cond))							\
    103 		RW_ABORT(rw, "assertion failed: " #cond);		\
    104 } while (/* CONSTCOND */ 0);
    105 
    106 #else	/* LOCKDEBUG */
    107 
    108 #define	RW_LOCKED(rw, op)	/* nothing */
    109 #define	RW_UNLOCKED(rw)		/* nothing */
    110 #define	RW_DASSERT(rw, cond)	/* nothing */
    111 
    112 #endif	/* LOCKDEBUG */
    113 
    114 /*
    115  * DIAGNOSTIC
    116  */
    117 
    118 #if defined(DIAGNOSTIC)
    119 
    120 #define	RW_ASSERT(rw, cond)						\
    121 do {									\
    122 	if (!(cond))							\
    123 		RW_ABORT(rw, "assertion failed: " #cond);		\
    124 } while (/* CONSTCOND */ 0)
    125 
    126 #else
    127 
    128 #define	RW_ASSERT(rw, cond)	/* nothing */
    129 
    130 #endif	/* DIAGNOSTIC */
    131 
    132 /*
    133  * rw_abort:
    134  *
    135  *	Dump the contents of a RW lock and call panic().
    136  */
    137 static void
    138 rw_abort(krwlock_t *rw, const char *func, const char *msg)
    139 {
    140 
    141 	printf("%s: %s\n", func, msg);
    142 	rw_dump(rw);
    143 	panic("rw_abort");
    144 }
    145 
    146 /*
    147  * rw_dump:
    148  *
    149  *	Dump the contents of a mutex structure.
    150  */
    151 void
    152 rw_dump(krwlock_t *rw)
    153 {
    154 
    155 #ifdef notyet
    156 #if defined(LOCKDEBUG)
    157 	printf("last locked: 0x%16lx unlocked: 0x%16lx\n",
    158 	    (long)rw->rw_debug.rw_locked,
    159 	    (long)rw->rw_debug.rw_unlocked);
    160 #endif
    161 #endif
    162 	printf("curlwp     : 0x%16lx lockaddr: 0x%16lx\n"
    163 	    "owner/count: 0x%16lx flags   : 0x%16x\n",
    164 	    (long)curlwp, (long)rw, (long)RW_OWNER(rw),
    165 	    (int)RW_FLAGS(rw));
    166 }
    167 
    168 /*
    169  * rw_init:
    170  *
    171  *	Initialize a rwlock for use.
    172  */
    173 void
    174 rw_init(krwlock_t *rw)
    175 {
    176 
    177 	memset(rw, 0, sizeof(*rw));
    178 }
    179 
    180 /*
    181  * rw_destroy:
    182  *
    183  *	Tear down a rwlock.
    184  */
    185 void
    186 rw_destroy(krwlock_t *rw)
    187 {
    188 
    189 	RW_DASSERT(rw, rw->rw_owner == 0);
    190 }
    191 
    192 /*
    193  * rw_vector_enter:
    194  *
    195  *	Acquire a rwlock.
    196  */
    197 void
    198 rw_vector_enter(krwlock_t *rw, krw_t op)
    199 {
    200 	uintptr_t owner, incr, need_wait, set_wait, curthread;
    201 	struct turnstile *ts;
    202 	LOCKSTAT_TIMER(slptime);
    203 
    204 	curthread = (uintptr_t)curlwp;
    205 	RW_ASSERT(rw, curthread != 0);
    206 
    207 #ifdef LOCKDEBUG
    208 	if (panicstr == NULL)
    209 		simple_lock_only_held(NULL, "rw_enter");
    210 #endif
    211 
    212 	/*
    213 	 * We play a slight trick here.  If we're a reader, we want
    214 	 * increment the read count.  If we're a writer, we want to
    215 	 * set the owner field and whe WRITE_LOCKED bit.
    216 	 *
    217 	 * In the latter case, we expect those bits to be zero,
    218 	 * therefore we can use an add operation to set them, which
    219 	 * means an add operation for both cases.
    220 	 */
    221 	if (op == RW_READER) {
    222 		incr = RW_READ_INCR;
    223 		set_wait = RW_HAS_WAITERS;
    224 		need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
    225 	} else {
    226 		RW_DASSERT(rw, op == RW_WRITER);
    227 		incr = curthread | RW_WRITE_LOCKED;
    228 		set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
    229 		need_wait = RW_WRITE_LOCKED | RW_THREAD;
    230 	}
    231 
    232 	for (;;) {
    233 		/*
    234 		 * Read the lock owner field.  If the need-to-wait
    235 		 * indicator is clear, then try to acquire the lock.
    236 		 */
    237 		owner = rw->rw_owner;
    238 		if ((owner & need_wait) == 0) {
    239 			if (RW_ACQUIRE(rw, owner, owner + incr)) {
    240 				/* Got it! */
    241 				break;
    242 			}
    243 
    244 			/*
    245 			 * Didn't get it -- spin around again (we'll
    246 			 * probably sleep on the next iteration).
    247 			 */
    248 			continue;
    249 		}
    250 
    251 		if (panicstr != NULL)
    252 			return;
    253 		if (RW_OWNER(rw) == curthread)
    254 			RW_ABORT(rw, "locking against myself");
    255 
    256 		/*
    257 		 * Grab the turnstile chain lock.  Once we have that, we
    258 		 * can adjust the waiter bits and sleep queue.
    259 		 */
    260 		ts = turnstile_lookup(rw);
    261 
    262 		/*
    263 		 * Mark the rwlock as having waiters.  If the set fails,
    264 		 * then we may not need to sleep and should spin again.
    265 		 */
    266 		if (!RW_SET_WAITERS(rw, need_wait, set_wait)) {
    267 			turnstile_exit(rw);
    268 			continue;
    269 		}
    270 
    271 		LOCKSTAT_START_TIMER(slptime);
    272 
    273 		turnstile_block(ts,
    274 		    (op == RW_READER ? TS_READER_Q : TS_WRITER_Q),
    275 		    sched_kpri(curlwp), rw, "rwlock");
    276 
    277 		LOCKSTAT_STOP_TIMER(slptime);
    278 		LOCKSTAT_EVENT(rw, LB_ADAPTIVE_RWLOCK | LB_SLEEP, 1, slptime);
    279 
    280 		/* If we wake up and arrive here, we've been handed the lock. */
    281 		RW_RECEIVE(rw);
    282 		break;
    283 	}
    284 
    285 	RW_LOCKED(rw, op);
    286 	RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
    287 	    (op == RW_READER && RW_COUNT(rw) != 0));
    288 }
    289 
    290 /*
    291  * rw_vector_exit:
    292  *
    293  *	Release a rwlock.
    294  */
    295 void
    296 rw_vector_exit(krwlock_t *rw, krw_t op)
    297 {
    298 	uintptr_t curthread, owner, decr, new;
    299 	struct turnstile *ts;
    300 	int rcnt, wcnt, dcnt;
    301 	struct lwp *l;
    302 
    303 	curthread = (uintptr_t)curlwp;
    304 	RW_ASSERT(rw, curthread != 0);
    305 
    306 	if (panicstr != NULL) {
    307 		/*
    308 		 * XXX What's the correct thing to do here?  We should at least
    309 		 * release the lock.
    310 		 */
    311 		return;
    312 	}
    313 
    314 	/*
    315 	 * Again, we use a trick.  Since we used an add operation to
    316 	 * set the required lock bits, we can use a subtract to clear
    317 	 * them, which makes the read-release and write-release path
    318 	 * the same.
    319 	 */
    320 	switch (op) {
    321 	case RW_READER:
    322 		RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
    323 		RW_ASSERT(rw, RW_COUNT(rw) != 0);
    324 		dcnt = 0;
    325 		decr = RW_READ_INCR;
    326 		break;
    327 	case RW_WRITER:
    328 		RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
    329 		RW_ASSERT(rw, RW_OWNER(rw) == curthread);
    330 		RW_UNLOCKED(rw);
    331 		dcnt = 0;
    332 		decr = curthread | RW_WRITE_LOCKED;
    333 		break;
    334 	case __RW_DOWNGRADE:
    335 		RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
    336 		RW_ASSERT(rw, RW_OWNER(rw) == curthread);
    337 		RW_UNLOCKED(rw);
    338 		dcnt = 1;
    339 		decr = (curthread | RW_WRITE_LOCKED) - RW_READ_INCR;
    340 		break;
    341 	default:
    342 		RW_DASSERT(rw, "blame gcc, I do");
    343 		return;
    344 	}
    345 
    346 	for (;;) {
    347 		/*
    348 		 * We assume that the caller has already tried to release
    349 		 * the lock and optimize for the 'has waiters' case, and so
    350 		 * grab the turnstile chain lock.  This gets the interlock
    351 		 * on the sleep queue.  Once we have that, we can adjust the
    352 		 * waiter bits.
    353 		 */
    354 		ts = turnstile_lookup(rw);
    355 
    356 		/*
    357 		 * Compute what we expect the new value of the lock to be.
    358 		 * Only proceed to do direct handoff if there are waiters,
    359 		 * and if the lock would become unowned.
    360 		 */
    361 		owner = rw->rw_owner;
    362 		new = (owner - decr) & ~RW_WRITE_WANTED;
    363 		if ((new & (RW_THREAD | RW_HAS_WAITERS)) != RW_HAS_WAITERS) {
    364 			if (RW_RELEASE(rw, owner, new)) {
    365 				turnstile_exit(rw);
    366 				break;
    367 			}
    368 			turnstile_exit(rw);
    369 			continue;
    370 		}
    371 
    372 		/*
    373 		 * Adjust the waiter bits.  If we are releasing a write
    374 		 * lock or downgrading a write lock to read, then wake all
    375 		 * outstanding readers.  If we are releasing a read lock,
    376 		 * then wake one writer.
    377 		 */
    378 		RW_DASSERT(rw, ts != NULL);
    379 
    380 		wcnt = TS_WAITERS(ts, TS_WRITER_Q);
    381 		rcnt = TS_WAITERS(ts, TS_READER_Q);
    382 
    383 		/*
    384 		 * Give the lock away.
    385 		 */
    386 		if (dcnt == 0 &&
    387 		    (rcnt == 0 || (op == RW_READER && wcnt != 0))) {
    388 			RW_DASSERT(rw, wcnt != 0);
    389 
    390 			/*
    391 			 * Give the lock to the longest waiting
    392 			 * writer.
    393 			 */
    394 			l = TS_FIRST(ts, TS_WRITER_Q);
    395 			new = (uintptr_t)l | RW_WRITE_LOCKED;
    396 
    397 			if (wcnt > 1)
    398 				new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
    399 			else if (rcnt != 0)
    400 				new |= RW_HAS_WAITERS;
    401 
    402 			if (!RW_RELEASE(rw, owner, new)) {
    403 				/* Oops, try again. */
    404 				turnstile_exit(rw);
    405 				continue;
    406 			}
    407 
    408 			/* Wake the writer. */
    409 			turnstile_wakeup(ts, TS_WRITER_Q, wcnt, l);
    410 		} else {
    411 			dcnt += rcnt;
    412 			RW_DASSERT(rw, dcnt != 0);
    413 
    414 			/*
    415 			 * Give the lock to all blocked readers.  We may
    416 			 * retain one read hold if downgrading.  If there
    417 			 * is a writer waiting, new readers will be blocked
    418 			 * out.
    419 			 */
    420 			new = dcnt << RW_READ_COUNT_SHIFT;
    421 			if (wcnt != 0)
    422 				new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
    423 			if (!RW_RELEASE(rw, owner, new)) {
    424 				/* Oops, try again. */
    425 				turnstile_exit(rw);
    426 				continue;
    427 			}
    428 
    429 			/* Wake up all sleeping readers. */
    430 			turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
    431 		}
    432 
    433 		break;
    434 	}
    435 }
    436 
    437 /*
    438  * rw_tryenter:
    439  *
    440  *	Try to acquire a rwlock.
    441  */
    442 int
    443 rw_tryenter(krwlock_t *rw, krw_t op)
    444 {
    445 	uintptr_t curthread, owner, incr, need_wait;
    446 
    447 	curthread = (uintptr_t)curlwp;
    448 	RW_ASSERT(rw, curthread != 0);
    449 
    450 	if (op == RW_READER) {
    451 		incr = RW_READ_INCR;
    452 		need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
    453 	} else {
    454 		RW_DASSERT(rw, op == RW_WRITER);
    455 		incr = curthread | RW_WRITE_LOCKED;
    456 		need_wait = RW_WRITE_LOCKED | RW_THREAD;
    457 	}
    458 
    459 	for (;;) {
    460 		owner = rw->rw_owner;
    461 		if ((owner & need_wait) == 0) {
    462 			if (RW_ACQUIRE(rw, owner, owner + incr)) {
    463 				/* Got it! */
    464 				break;
    465 			}
    466 			continue;
    467 		}
    468 		return 0;
    469 	}
    470 
    471 	RW_LOCKED(rw, op);
    472 	RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
    473 	    (op == RW_READER && RW_COUNT(rw) != 0));
    474 	return 1;
    475 }
    476 
    477 /*
    478  * rw_downgrade:
    479  *
    480  *	Downgrade a write lock to a read lock.
    481  */
    482 void
    483 rw_downgrade(krwlock_t *rw)
    484 {
    485 	uintptr_t owner, curthread;
    486 
    487 	curthread = (uintptr_t)curlwp;
    488 	RW_ASSERT(rw, curthread != 0);
    489 	RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
    490 	RW_ASSERT(rw, RW_OWNER(rw) == curthread);
    491 	RW_UNLOCKED(rw);
    492 
    493 	for (;;) {
    494 		owner = rw->rw_owner;
    495 
    496 		/* If there are waiters we need to do this the hard way. */
    497 		if ((owner & RW_HAS_WAITERS) != 0) {
    498 			rw_vector_exit(rw, __RW_DOWNGRADE);
    499 			return;
    500 		}
    501 
    502 		/*
    503 		 * Try swapping us down to one read hold.  If it fails, the
    504 		 * lock condition has changed and we most likely now have
    505 		 * waiters.
    506 		 */
    507 		if (RW_RELEASE(rw, owner, RW_READ_INCR))
    508 			break;
    509 	}
    510 
    511 	RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
    512 	RW_DASSERT(rw, RW_COUNT(rw) != 0);
    513 }
    514 
    515 /*
    516  * rw_tryupgrade:
    517  *
    518  *	Try to upgrade a read lock to a write lock.  We must be the
    519  *	only reader.
    520  */
    521 int
    522 rw_tryupgrade(krwlock_t *rw)
    523 {
    524 	uintptr_t owner, curthread, new;
    525 
    526 	curthread = (uintptr_t)curlwp;
    527 	RW_ASSERT(rw, curthread != 0);
    528 
    529 	for (;;) {
    530 		owner = rw->rw_owner;
    531 		RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
    532 		if ((owner & RW_THREAD) != RW_READ_INCR) {
    533 			RW_ASSERT(rw, (owner & RW_THREAD) != 0);
    534 			return 0;
    535 		}
    536 		new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
    537 		if (RW_ACQUIRE(rw, owner, new))
    538 			break;
    539 	}
    540 
    541 	RW_LOCKED(rw, RW_WRITER);
    542 	RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
    543 	RW_DASSERT(rw, RW_OWNER(rw) == curthread);
    544 
    545 	return 1;
    546 }
    547 
    548 /*
    549  * rw_read_held:
    550  *
    551  *	Returns true if the rwlock is held for reading.  Must only be
    552  *	used for diagnostic assertions, and never be used to make
    553  * 	decisions about how to use a rwlock.
    554  */
    555 int
    556 rw_read_held(krwlock_t *rw)
    557 {
    558 	uintptr_t owner;
    559 
    560 	owner = rw->rw_owner;
    561 	return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
    562 }
    563 
    564 /*
    565  * rw_write_held:
    566  *
    567  *	Returns true if the rwlock is held for writing.  Must only be
    568  *	used for diagnostic assertions, and never be used to make
    569  *	decisions about how to use a rwlock.
    570  */
    571 int
    572 rw_write_held(krwlock_t *rw)
    573 {
    574 
    575 	return (rw->rw_owner & RW_WRITE_LOCKED) != 0;
    576 }
    577 
    578 /*
    579  * Slow stubs for platforms that do not implement fast-path ones.
    580  */
    581 #ifndef __HAVE_RW_ENTER
    582 void
    583 rw_enter(krwlock_t *rw, krw_t op)
    584 {
    585 	rw_vector_enter(rw, op);
    586 }
    587 #endif
    588 
    589 #ifndef __HAVE_RW_EXIT
    590 void
    591 rw_exit(krwlock_t *rw)
    592 {
    593 	krw_t op;
    594 	op = ((rw->rw_owner & RW_WRITE_LOCKED) ? RW_WRITER : RW_READER);
    595 	rw_vector_exit(rw, op);
    596 }
    597 #endif
    598