Home | History | Annotate | Line # | Download | only in kern
kern_rwlock.c revision 1.1.36.1
      1 /*	$NetBSD: kern_rwlock.c,v 1.1.36.1 2006/09/10 23:42:42 ad Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2002, 2006 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe and Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 /*
     40  * Kernel reader/writer lock implementation, modeled after those
     41  * found in Solaris, a description of which can be found in:
     42  *
     43  *	Solaris Internals: Core Kernel Architecture, Jim Mauro and
     44  *	    Richard McDougall.
     45  */
     46 
     47 #include "opt_lockdebug.h"
     48 #include "opt_multiprocessor.h"
     49 
     50 #include <sys/cdefs.h>
     51 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.1.36.1 2006/09/10 23:42:42 ad Exp $");
     52 
     53 #define	__RWLOCK_PRIVATE
     54 
     55 #include <sys/param.h>
     56 #include <sys/proc.h>
     57 #include <sys/rwlock.h>
     58 #include <sys/sched.h>
     59 #include <sys/turnstile.h>
     60 #include <sys/systm.h>
     61 
     62 #include <dev/lockstat.h>
     63 
     64 #define	RW_ABORT(rw, msg)						\
     65     rw_abort((rw), __FUNCTION__, msg)
     66 
     67 /*
     68  * LOCKDEBUG
     69  */
     70 
     71 #if defined(LOCKDEBUG)
     72 
     73 #undef __HAVE_RW_ENTER
     74 #undef __HAVE_RW_EXIT
     75 
     76 struct simplelock rw_list_lock = SIMPLELOCK_INITIALIZER;
     77 LIST_HEAD(, rwlock_debug_info) rw_list_writers;
     78 
     79 #ifdef notyet
     80 #define	RW_LOCKED(rw, op)						\
     81 do {									\
     82 	if ((op) != RW_WRITER)						\
     83 		break;							\
     84 	(rw)->rw_debug.rw_locked =					\
     85 	    (vaddr_t) __builtin_return_address(0);			\
     86 	LIST_INSERT_HEAD(&rw_list_writers, &rw->rw_debug, rw_chain);	\
     87 } while (/* CONSTCOND */ 0)
     88 
     89 #define	RW_UNLOCKED(rw)							\
     90 do {									\
     91 	(rw)->rw_debug.rw_unlocked =					\
     92 	    (vaddr_t) __builtin_return_address(0);			\
     93 	LIST_REMOVE(&rw->rw_debug, rw_chain);				\
     94 } while (/* CONSTCOND */ 0)
     95 #else	/* notyet */
     96 #define	RW_LOCKED(rw, op)	/* nothing */
     97 #define	RW_UNLOCKED(rw)		/* nothing */
     98 #endif	/* notyet */
     99 
    100 #define	RW_DASSERT(rw, cond)						\
    101 do {									\
    102 	if (!(cond))							\
    103 		RW_ABORT(rw, "assertion failed: " #cond);		\
    104 } while (/* CONSTCOND */ 0);
    105 
    106 #else	/* LOCKDEBUG */
    107 
    108 #define	RW_LOCKED(rw, op)	/* nothing */
    109 #define	RW_UNLOCKED(rw)		/* nothing */
    110 #define	RW_DASSERT(rw, cond)	/* nothing */
    111 
    112 #endif	/* LOCKDEBUG */
    113 
    114 /*
    115  * DIAGNOSTIC
    116  */
    117 
    118 #if defined(DIAGNOSTIC)
    119 
    120 #define	RW_ASSERT(rw, cond)						\
    121 do {									\
    122 	if (!(cond))							\
    123 		RW_ABORT(rw, "assertion failed: " #cond);		\
    124 } while (/* CONSTCOND */ 0)
    125 
    126 #else
    127 
    128 #define	RW_ASSERT(rw, cond)	/* nothing */
    129 
    130 #endif	/* DIAGNOSTIC */
    131 
    132 /*
    133  * rw_abort:
    134  *
    135  *	Dump the contents of a RW lock and call panic().
    136  */
    137 static void
    138 rw_abort(krwlock_t *rw, const char *func, const char *msg)
    139 {
    140 
    141 	printf("%s: %s\n", func, msg);
    142 	rw_dump(rw);
    143 	panic("rw_abort");
    144 }
    145 
    146 /*
    147  * rw_dump:
    148  *
    149  *	Dump the contents of a mutex structure.
    150  */
    151 void
    152 rw_dump(krwlock_t *rw)
    153 {
    154 
    155 #ifdef notyet
    156 #if defined(LOCKDEBUG)
    157 	printf("last locked: 0x%16lx unlocked: 0x%16lx\n",
    158 	    (long)rw->rw_debug.rw_locked,
    159 	    (long)rw->rw_debug.rw_unlocked);
    160 #endif
    161 #endif
    162 	printf("curlwp     : 0x%16lx lockaddr: 0x%16lx\n"
    163 	    "owner/count: 0x%16lx flags   : 0x%16x\n",
    164 	    (long)curlwp, (long)rw, (long)RW_OWNER(rw),
    165 	    (int)RW_FLAGS(rw));
    166 }
    167 
    168 /*
    169  * rw_init:
    170  *
    171  *	Initialize a rwlock for use.
    172  */
    173 void
    174 rw_init(krwlock_t *rw)
    175 {
    176 
    177 	memset(rw, 0, sizeof(*rw));
    178 }
    179 
    180 /*
    181  * rw_destroy:
    182  *
    183  *	Tear down a rwlock.
    184  */
    185 void
    186 rw_destroy(krwlock_t *rw)
    187 {
    188 
    189 	RW_DASSERT(rw, rw->rw_owner == 0);
    190 }
    191 
    192 /*
    193  * rw_vector_enter:
    194  *
    195  *	Acquire a rwlock.
    196  */
    197 void
    198 rw_vector_enter(krwlock_t *rw, krw_t op)
    199 {
    200 	uintptr_t owner, incr, need_wait, set_wait, curthread;
    201 	struct turnstile *ts;
    202 	LOCKSTAT_TIMER(slptime);
    203 
    204 	curthread = (uintptr_t)curlwp;
    205 	RW_ASSERT(rw, curthread != 0);
    206 
    207 #ifdef LOCKDEBUG
    208 	simple_lock_only_held(NULL, "rw_enter");
    209 #endif
    210 
    211 	/*
    212 	 * We play a slight trick here.  If we're a reader, we want
    213 	 * increment the read count.  If we're a writer, we want to
    214 	 * set the owner field and whe WRITE_LOCKED bit.
    215 	 *
    216 	 * In the latter case, we expect those bits to be zero,
    217 	 * therefore we can use an add operation to set them, which
    218 	 * means an add operation for both cases.
    219 	 */
    220 	if (op == RW_READER) {
    221 		incr = RW_READ_INCR;
    222 		set_wait = RW_HAS_WAITERS;
    223 		need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
    224 	} else {
    225 		RW_DASSERT(rw, op == RW_WRITER);
    226 		incr = curthread | RW_WRITE_LOCKED;
    227 		set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
    228 		need_wait = RW_WRITE_LOCKED | RW_THREAD;
    229 	}
    230 
    231 	for (;;) {
    232 		/*
    233 		 * Read the lock owner field.  If the need-to-wait
    234 		 * indicator is clear, then try to acquire the lock.
    235 		 */
    236 		owner = rw->rw_owner;
    237 		if ((owner & need_wait) == 0) {
    238 			if (RW_ACQUIRE(rw, owner, owner + incr)) {
    239 				/* Got it! */
    240 				break;
    241 			}
    242 
    243 			/*
    244 			 * Didn't get it -- spin around again (we'll
    245 			 * probably sleep on the next iteration).
    246 			 */
    247 			continue;
    248 		}
    249 
    250 		if (panicstr != NULL)
    251 			return;
    252 		if (RW_OWNER(rw) == curthread)
    253 			RW_ABORT(rw, "locking against myself");
    254 
    255 		/*
    256 		 * Grab the turnstile chain lock.  Once we have that, we
    257 		 * can adjust the waiter bits and sleep queue.
    258 		 */
    259 		ts = turnstile_lookup(rw);
    260 
    261 		/*
    262 		 * Mark the rwlock as having waiters.  If the set fails,
    263 		 * then we may not need to sleep and should spin again.
    264 		 */
    265 		if (!RW_SET_WAITERS(rw, need_wait, set_wait)) {
    266 			turnstile_exit(rw);
    267 			continue;
    268 		}
    269 
    270 		LOCKSTAT_START_TIMER(slptime);
    271 
    272 		turnstile_block(ts,
    273 		    (op == RW_READER ? TS_READER_Q : TS_WRITER_Q),
    274 		    sched_kpri(curlwp), rw, "rwlock");
    275 
    276 		LOCKSTAT_STOP_TIMER(slptime);
    277 		LOCKSTAT_EVENT(rw, LB_ADAPTIVE_RWLOCK | LB_SLEEP, 1, slptime);
    278 
    279 		/* If we wake up and arrive here, we've been handed the lock. */
    280 		RW_RECEIVE(rw);
    281 		break;
    282 	}
    283 
    284 	RW_LOCKED(rw, op);
    285 	RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
    286 	    (op == RW_READER && RW_COUNT(rw) != 0));
    287 }
    288 
    289 /*
    290  * rw_vector_exit:
    291  *
    292  *	Release a rwlock.
    293  */
    294 void
    295 rw_vector_exit(krwlock_t *rw, krw_t op)
    296 {
    297 	uintptr_t curthread, owner, decr, new;
    298 	struct turnstile *ts;
    299 	int rcnt, wcnt, dcnt;
    300 	struct lwp *l;
    301 
    302 	curthread = (uintptr_t)curlwp;
    303 	RW_ASSERT(rw, curthread != 0);
    304 
    305 	if (panicstr != NULL) {
    306 		/*
    307 		 * XXX What's the correct thing to do here?  We should at least
    308 		 * release the lock.
    309 		 */
    310 		return;
    311 	}
    312 
    313 	/*
    314 	 * Again, we use a trick.  Since we used an add operation to
    315 	 * set the required lock bits, we can use a subtract to clear
    316 	 * them, which makes the read-release and write-release path
    317 	 * the same.
    318 	 */
    319 	switch (op) {
    320 	case RW_READER:
    321 		RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
    322 		RW_ASSERT(rw, RW_COUNT(rw) != 0);
    323 		dcnt = 0;
    324 		decr = RW_READ_INCR;
    325 		break;
    326 	case RW_WRITER:
    327 		RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
    328 		RW_ASSERT(rw, RW_OWNER(rw) == curthread);
    329 		RW_UNLOCKED(rw);
    330 		dcnt = 0;
    331 		decr = curthread | RW_WRITE_LOCKED;
    332 		break;
    333 	case __RW_DOWNGRADE:
    334 		RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
    335 		RW_ASSERT(rw, RW_OWNER(rw) == curthread);
    336 		RW_UNLOCKED(rw);
    337 		dcnt = 1;
    338 		decr = (curthread | RW_WRITE_LOCKED) - RW_READ_INCR;
    339 		break;
    340 	default:
    341 		RW_DASSERT(rw, "blame gcc, I do");
    342 		return;
    343 	}
    344 
    345 	for (;;) {
    346 		/*
    347 		 * We assume that the caller has already tried to release
    348 		 * the lock and optimize for the 'has waiters' case, and so
    349 		 * grab the turnstile chain lock.  This gets the interlock
    350 		 * on the sleep queue.  Once we have that, we can adjust the
    351 		 * waiter bits.
    352 		 */
    353 		ts = turnstile_lookup(rw);
    354 
    355 		/*
    356 		 * Compute what we expect the new value of the lock to be.
    357 		 * Only proceed to do direct handoff if there are waiters,
    358 		 * and if the lock would become unowned.
    359 		 */
    360 		owner = rw->rw_owner;
    361 		new = (owner - decr) & ~RW_WRITE_WANTED;
    362 		if ((new & (RW_THREAD | RW_HAS_WAITERS)) != RW_HAS_WAITERS) {
    363 			if (RW_RELEASE(rw, owner, new)) {
    364 				turnstile_exit(rw);
    365 				break;
    366 			}
    367 			turnstile_exit(rw);
    368 			continue;
    369 		}
    370 
    371 		/*
    372 		 * Adjust the waiter bits.  If we are releasing a write
    373 		 * lock or downgrading a write lock to read, then wake all
    374 		 * outstanding readers.  If we are releasing a read lock,
    375 		 * then wake one writer.
    376 		 */
    377 		RW_DASSERT(rw, ts != NULL);
    378 
    379 		wcnt = TS_WAITERS(ts, TS_WRITER_Q);
    380 		rcnt = TS_WAITERS(ts, TS_READER_Q);
    381 
    382 		/*
    383 		 * Give the lock away.
    384 		 */
    385 		if (dcnt == 0 &&
    386 		    (rcnt == 0 || (op == RW_READER && wcnt != 0))) {
    387 			RW_DASSERT(rw, wcnt != 0);
    388 
    389 			/*
    390 			 * Give the lock to the longest waiting
    391 			 * writer.
    392 			 */
    393 			l = TS_FIRST(ts, TS_WRITER_Q);
    394 			new = (uintptr_t)l | RW_WRITE_LOCKED;
    395 
    396 			if (wcnt > 1)
    397 				new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
    398 			else if (rcnt != 0)
    399 				new |= RW_HAS_WAITERS;
    400 
    401 			if (!RW_RELEASE(rw, owner, new)) {
    402 				/* Oops, try again. */
    403 				turnstile_exit(rw);
    404 				continue;
    405 			}
    406 
    407 			/* Wake the writer. */
    408 			turnstile_wakeup(ts, TS_WRITER_Q, wcnt, l);
    409 		} else {
    410 			dcnt += rcnt;
    411 			RW_DASSERT(rw, dcnt != 0);
    412 
    413 			/*
    414 			 * Give the lock to all blocked readers.  We may
    415 			 * retain one read hold if downgrading.  If there
    416 			 * is a writer waiting, new readers will be blocked
    417 			 * out.
    418 			 */
    419 			new = dcnt << RW_READ_COUNT_SHIFT;
    420 			if (wcnt != 0)
    421 				new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
    422 			if (!RW_RELEASE(rw, owner, new)) {
    423 				/* Oops, try again. */
    424 				turnstile_exit(rw);
    425 				continue;
    426 			}
    427 
    428 			/* Wake up all sleeping readers. */
    429 			turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
    430 		}
    431 
    432 		break;
    433 	}
    434 }
    435 
    436 /*
    437  * rw_tryenter:
    438  *
    439  *	Try to acquire a rwlock.
    440  */
    441 int
    442 rw_tryenter(krwlock_t *rw, krw_t op)
    443 {
    444 	uintptr_t curthread, owner, incr, need_wait;
    445 
    446 	curthread = (uintptr_t)curlwp;
    447 	RW_ASSERT(rw, curthread != 0);
    448 
    449 	if (op == RW_READER) {
    450 		incr = RW_READ_INCR;
    451 		need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
    452 	} else {
    453 		RW_DASSERT(rw, op == RW_WRITER);
    454 		incr = curthread | RW_WRITE_LOCKED;
    455 		need_wait = RW_WRITE_LOCKED | RW_THREAD;
    456 	}
    457 
    458 	for (;;) {
    459 		owner = rw->rw_owner;
    460 		if ((owner & need_wait) == 0) {
    461 			if (RW_ACQUIRE(rw, owner, owner + incr)) {
    462 				/* Got it! */
    463 				break;
    464 			}
    465 			continue;
    466 		}
    467 		return 0;
    468 	}
    469 
    470 	RW_LOCKED(rw, op);
    471 	RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
    472 	    (op == RW_READER && RW_COUNT(rw) != 0));
    473 	return 1;
    474 }
    475 
    476 /*
    477  * rw_downgrade:
    478  *
    479  *	Downgrade a write lock to a read lock.
    480  */
    481 void
    482 rw_downgrade(krwlock_t *rw)
    483 {
    484 	uintptr_t owner, curthread;
    485 
    486 	curthread = (uintptr_t)curlwp;
    487 	RW_ASSERT(rw, curthread != 0);
    488 	RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
    489 	RW_ASSERT(rw, RW_OWNER(rw) == curthread);
    490 	RW_UNLOCKED(rw);
    491 
    492 	for (;;) {
    493 		owner = rw->rw_owner;
    494 
    495 		/* If there are waiters we need to do this the hard way. */
    496 		if ((owner & RW_HAS_WAITERS) != 0) {
    497 			rw_vector_exit(rw, __RW_DOWNGRADE);
    498 			return;
    499 		}
    500 
    501 		/*
    502 		 * Try swapping us down to one read hold.  If it fails, the
    503 		 * lock condition has changed and we most likely now have
    504 		 * waiters.
    505 		 */
    506 		if (RW_RELEASE(rw, owner, RW_READ_INCR))
    507 			break;
    508 	}
    509 
    510 	RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
    511 	RW_DASSERT(rw, RW_COUNT(rw) != 0);
    512 }
    513 
    514 /*
    515  * rw_tryupgrade:
    516  *
    517  *	Try to upgrade a read lock to a write lock.  We must be the
    518  *	only reader.
    519  */
    520 int
    521 rw_tryupgrade(krwlock_t *rw)
    522 {
    523 	uintptr_t owner, curthread, new;
    524 
    525 	curthread = (uintptr_t)curlwp;
    526 	RW_ASSERT(rw, curthread != 0);
    527 
    528 	for (;;) {
    529 		owner = rw->rw_owner;
    530 		RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
    531 		if ((owner & RW_THREAD) != RW_READ_INCR) {
    532 			RW_ASSERT(rw, (owner & RW_THREAD) != 0);
    533 			return 0;
    534 		}
    535 		new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
    536 		if (RW_ACQUIRE(rw, owner, new))
    537 			break;
    538 	}
    539 
    540 	RW_LOCKED(rw, RW_WRITER);
    541 	RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
    542 	RW_DASSERT(rw, RW_OWNER(rw) == curthread);
    543 
    544 	return 1;
    545 }
    546 
    547 /*
    548  * rw_read_held:
    549  *
    550  *	Returns true if the rwlock is held for reading.  Must only be
    551  *	used for diagnostic assertions, and never be used to make
    552  * 	decisions about how to use a rwlock.
    553  */
    554 int
    555 rw_read_held(krwlock_t *rw)
    556 {
    557 	uintptr_t owner;
    558 
    559 	owner = rw->rw_owner;
    560 	return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
    561 }
    562 
    563 /*
    564  * rw_write_held:
    565  *
    566  *	Returns true if the rwlock is held for writing.  Must only be
    567  *	used for diagnostic assertions, and never be used to make
    568  *	decisions about how to use a rwlock.
    569  */
    570 int
    571 rw_write_held(krwlock_t *rw)
    572 {
    573 
    574 	return (rw->rw_owner & RW_WRITE_LOCKED) != 0;
    575 }
    576 
    577 /*
    578  * Slow stubs for platforms that do not implement fast-path ones.
    579  */
    580 #ifndef __HAVE_RW_ENTER
    581 void
    582 rw_enter(krwlock_t *rw, krw_t op)
    583 {
    584 	rw_vector_enter(rw, op);
    585 }
    586 #endif
    587 
    588 #ifndef __HAVE_RW_EXIT
    589 void
    590 rw_exit(krwlock_t *rw)
    591 {
    592 	krw_t op;
    593 	op = ((rw->rw_owner & RW_WRITE_LOCKED) ? RW_WRITER : RW_READER);
    594 	rw_vector_exit(rw, op);
    595 }
    596 #endif
    597