Home | History | Annotate | Line # | Download | only in kern
kern_rwlock.c revision 1.1.36.5
      1 /*	$NetBSD: kern_rwlock.c,v 1.1.36.5 2006/12/29 20:27:44 ad Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2002, 2006 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe and Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 /*
     40  * Kernel reader/writer lock implementation, modeled after those
     41  * found in Solaris, a description of which can be found in:
     42  *
     43  *	Solaris Internals: Core Kernel Architecture, Jim Mauro and
     44  *	    Richard McDougall.
     45  */
     46 
     47 #include "opt_multiprocessor.h"
     48 
     49 #include <sys/cdefs.h>
     50 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.1.36.5 2006/12/29 20:27:44 ad Exp $");
     51 
     52 #define	__RWLOCK_PRIVATE
     53 
     54 #include <sys/param.h>
     55 #include <sys/proc.h>
     56 #include <sys/rwlock.h>
     57 #include <sys/sched.h>
     58 #include <sys/sleepq.h>
     59 #include <sys/systm.h>
     60 #include <sys/lockdebug.h>
     61 
     62 #include <dev/lockstat.h>
     63 
     64 #define RW_ABORT(rw, msg)						\
     65     LOCKDEBUG_ABORT(RW_GETID(rw), rw, &rwlock_lockops, __FUNCTION__, msg)
     66 
     67 /*
     68  * LOCKDEBUG
     69  */
     70 
     71 #if defined(LOCKDEBUG)
     72 
     73 #define	RW_LOCKED(rw, op)						\
     74 do {									\
     75 	LOCKDEBUG_LOCKED(RW_GETID(rw),					\
     76 	    (uintptr_t)__builtin_return_address(0), op == RW_READER);	\
     77 } while (/* CONSTCOND */ 0)
     78 
     79 #define	RW_UNLOCKED(rw, op)						\
     80 do {									\
     81 	LOCKDEBUG_UNLOCKED(RW_GETID(rw),				\
     82 	    (uintptr_t)__builtin_return_address(0), op == RW_READER);	\
     83 } while (/* CONSTCOND */ 0)
     84 
     85 #define	RW_DASSERT(rw, cond)						\
     86 do {									\
     87 	if (!(cond))							\
     88 		RW_ABORT(rw, "assertion failed: " #cond);		\
     89 } while (/* CONSTCOND */ 0);
     90 
     91 #else	/* LOCKDEBUG */
     92 
     93 #define	RW_LOCKED(rw, op)	/* nothing */
     94 #define	RW_UNLOCKED(rw, op)	/* nothing */
     95 #define	RW_DASSERT(rw, cond)	/* nothing */
     96 
     97 #endif	/* LOCKDEBUG */
     98 
     99 /*
    100  * DIAGNOSTIC
    101  */
    102 
    103 #if defined(DIAGNOSTIC)
    104 
    105 #define	RW_ASSERT(rw, cond)						\
    106 do {									\
    107 	if (!(cond))							\
    108 		RW_ABORT(rw, "assertion failed: " #cond);		\
    109 } while (/* CONSTCOND */ 0)
    110 
    111 #else
    112 
    113 #define	RW_ASSERT(rw, cond)	/* nothing */
    114 
    115 #endif	/* DIAGNOSTIC */
    116 
    117 /*
    118  * For platforms that use 'simple' RW locks.
    119  */
    120 #ifdef __HAVE_SIMPLE_RW_LOCKS
    121 #define	RW_ACQUIRE(rw, old, new)	RW_CAS(&(rw)->rw_owner, old, new)
    122 #define	RW_RELEASE(rw, old, new)	RW_CAS(&(rw)->rw_owner, old, new)
    123 #define	RW_SETID(rw, id)		((rw)->rw_id = id)
    124 #define	RW_GETID(rw)			((rw)->rw_id)
    125 
    126 static inline int
    127 RW_SET_WAITERS(krwlock_t *rw, uintptr_t need, uintptr_t set)
    128 {
    129 	uintptr_t old;
    130 
    131 	if (((old = rw->rw_owner) & need) == 0)
    132 		return 0;
    133 	return RW_CAS(&rw->rw_owner, old, old | set);
    134 }
    135 #endif	/* __HAVE_SIMPLE_RW_LOCKS */
    136 
    137 /*
    138  * For platforms that do not provide stubs, or for the LOCKDEBUG case.
    139  */
    140 #ifdef LOCKDEBUG
    141 #undef	__HAVE_RW_STUBS
    142 #endif
    143 
    144 #ifndef __HAVE_RW_STUBS
    145 __strong_alias(rw_enter, rw_vector_enter);
    146 
    147 void
    148 rw_exit(krwlock_t *rw)
    149 {
    150 	krw_t op;
    151 	op = ((rw->rw_owner & RW_WRITE_LOCKED) ? RW_WRITER : RW_READER);
    152 	RW_UNLOCKED(rw, op);
    153 	rw_vector_exit(rw, op);
    154 }
    155 #endif
    156 
    157 void	rw_dump(volatile void *);
    158 
    159 lockops_t rwlock_lockops = {
    160 	"Reader / writer lock",
    161 	1,
    162 	rw_dump
    163 };
    164 
    165 /*
    166  * rw_dump:
    167  *
    168  *	Dump the contents of a rwlock structure.
    169  */
    170 void
    171 rw_dump(volatile void *cookie)
    172 {
    173 	volatile krwlock_t *rw = cookie;
    174 
    175 	printf_nolog("owner/count  : %#018lx flags    : %#018x\n",
    176 	    (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
    177 }
    178 
    179 /*
    180  * rw_init:
    181  *
    182  *	Initialize a rwlock for use.
    183  */
    184 void
    185 rw_init(krwlock_t *rw)
    186 {
    187 	u_int id;
    188 
    189 	memset(rw, 0, sizeof(*rw));
    190 
    191 	id = LOCKDEBUG_ALLOC(rw, &rwlock_lockops);
    192 	RW_SETID(rw, id);
    193 }
    194 
    195 /*
    196  * rw_destroy:
    197  *
    198  *	Tear down a rwlock.
    199  */
    200 void
    201 rw_destroy(krwlock_t *rw)
    202 {
    203 
    204 	LOCKDEBUG_FREE(rw, RW_GETID(rw));
    205 	RW_ASSERT(rw, rw->rw_owner == 0);
    206 }
    207 
    208 /*
    209  * rw_vector_enter:
    210  *
    211  *	Acquire a rwlock.
    212  */
    213 void
    214 rw_vector_enter(krwlock_t *rw, const krw_t op)
    215 {
    216 	uintptr_t owner, incr, need_wait, set_wait, curthread;
    217 	turnstile_t *ts;
    218 	int queue;
    219 	LOCKSTAT_TIMER(slptime);
    220 	struct lwp *l;
    221 
    222 	l = curlwp;
    223 	curthread = (uintptr_t)l;
    224 	RW_ASSERT(rw, curthread != 0);
    225 
    226 #ifdef LOCKDEBUG
    227 	if (panicstr == NULL) {
    228 		simple_lock_only_held(NULL, "rw_enter");
    229 #ifdef MULTIPROCESSOR
    230 		LOCKDEBUG_BARRIER(&kernel_lock, 1);
    231 #else
    232 		LOCKDEBUG_BARRIER(NULL, 1);
    233 #endif
    234 	}
    235 #endif
    236 
    237 	/*
    238 	 * We play a slight trick here.  If we're a reader, we want
    239 	 * increment the read count.  If we're a writer, we want to
    240 	 * set the owner field and whe WRITE_LOCKED bit.
    241 	 *
    242 	 * In the latter case, we expect those bits to be zero,
    243 	 * therefore we can use an add operation to set them, which
    244 	 * means an add operation for both cases.
    245 	 */
    246 	if (op == RW_READER) {
    247 		incr = RW_READ_INCR;
    248 		set_wait = RW_HAS_WAITERS;
    249 		need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
    250 		queue = TS_READER_Q;
    251 	} else {
    252 		RW_DASSERT(rw, op == RW_WRITER);
    253 		incr = curthread | RW_WRITE_LOCKED;
    254 		set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
    255 		need_wait = RW_WRITE_LOCKED | RW_THREAD;
    256 		queue = TS_WRITER_Q;
    257 	}
    258 
    259 	for (;;) {
    260 		/*
    261 		 * Read the lock owner field.  If the need-to-wait
    262 		 * indicator is clear, then try to acquire the lock.
    263 		 */
    264 		owner = rw->rw_owner;
    265 		if ((owner & need_wait) == 0) {
    266 			if (RW_ACQUIRE(rw, owner, owner + incr)) {
    267 				/* Got it! */
    268 				break;
    269 			}
    270 
    271 			/*
    272 			 * Didn't get it -- spin around again (we'll
    273 			 * probably sleep on the next iteration).
    274 			 */
    275 			continue;
    276 		}
    277 
    278 		if (panicstr != NULL)
    279 			return;
    280 		if (RW_OWNER(rw) == curthread)
    281 			RW_ABORT(rw, "locking against myself");
    282 
    283 		/*
    284 		 * Grab the turnstile chain lock.  Once we have that, we
    285 		 * can adjust the waiter bits and sleep queue.
    286 		 */
    287 		ts = turnstile_lookup(rw);
    288 
    289 		/*
    290 		 * Mark the rwlock as having waiters.  If the set fails,
    291 		 * then we may not need to sleep and should spin again.
    292 		 */
    293 		if (!RW_SET_WAITERS(rw, need_wait, set_wait)) {
    294 			turnstile_exit(rw);
    295 			continue;
    296 		}
    297 
    298 		LOCKSTAT_START_TIMER(slptime);
    299 
    300 		turnstile_block(ts, queue, sched_kpri(l), rw);
    301 
    302 		/* If we wake up and arrive here, we've been handed the lock. */
    303 		RW_RECEIVE(rw);
    304 
    305 		LOCKSTAT_STOP_TIMER(slptime);
    306 		LOCKSTAT_EVENT(rw,
    307 		    LB_RWLOCK | (op == RW_WRITER ? LB_SLEEP1 : LB_SLEEP2),
    308 		    1, slptime);
    309 
    310 		turnstile_unblock();
    311 		break;
    312 	}
    313 
    314 	RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
    315 	    (op == RW_READER && RW_COUNT(rw) != 0));
    316 	RW_LOCKED(rw, op);
    317 }
    318 
    319 /*
    320  * rw_vector_exit:
    321  *
    322  *	Release a rwlock.
    323  */
    324 void
    325 rw_vector_exit(krwlock_t *rw, const krw_t op)
    326 {
    327 	uintptr_t curthread, owner, decr, new;
    328 	turnstile_t *ts;
    329 	int rcnt, wcnt, dcnt;
    330 	struct lwp *l;
    331 
    332 	curthread = (uintptr_t)curlwp;
    333 	RW_ASSERT(rw, curthread != 0);
    334 
    335 	if (panicstr != NULL) {
    336 		/*
    337 		 * XXX What's the correct thing to do here?  We should at
    338 		 * least release the lock.
    339 		 */
    340 		return;
    341 	}
    342 
    343 	/*
    344 	 * Again, we use a trick.  Since we used an add operation to
    345 	 * set the required lock bits, we can use a subtract to clear
    346 	 * them, which makes the read-release and write-release path
    347 	 * the same.
    348 	 */
    349 	switch (op) {
    350 	case RW_READER:
    351 		RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
    352 		RW_ASSERT(rw, RW_COUNT(rw) != 0);
    353 		dcnt = 0;
    354 		decr = RW_READ_INCR;
    355 		break;
    356 	case RW_WRITER:
    357 		RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
    358 		RW_ASSERT(rw, RW_OWNER(rw) == curthread);
    359 		dcnt = 0;
    360 		decr = curthread | RW_WRITE_LOCKED;
    361 		break;
    362 	case __RW_DOWNGRADE:
    363 		RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
    364 		RW_ASSERT(rw, RW_OWNER(rw) == curthread);
    365 		dcnt = 1;
    366 		decr = (curthread | RW_WRITE_LOCKED) - RW_READ_INCR;
    367 		break;
    368 	default:
    369 		RW_DASSERT(rw, "XXXgcc");
    370 		return;
    371 	}
    372 
    373 	for (;;) {
    374 		/*
    375 		 * Compute what we expect the new value of the lock to be.
    376 		 * Only proceed to do direct handoff if there are waiters,
    377 		 * and if the lock would become unowned.
    378 		 */
    379 		owner = rw->rw_owner;
    380 		new = (owner - decr) & ~RW_WRITE_WANTED;
    381 		if ((new & (RW_THREAD | RW_HAS_WAITERS)) != RW_HAS_WAITERS) {
    382 			if (RW_RELEASE(rw, owner, new))
    383 				break;
    384 			continue;
    385 		}
    386 
    387 		/*
    388 		 * Grab the turnstile chain lock.  This gets the interlock
    389 		 * on the sleep queue.  Once we have that, we can adjust the
    390 		 * waiter bits.
    391 		 */
    392 		ts = turnstile_lookup(rw);
    393 
    394 		/*
    395 		 * Adjust the waiter bits.  If we are releasing a write
    396 		 * lock or downgrading a write lock to read, then wake all
    397 		 * outstanding readers.  If we are releasing a read lock,
    398 		 * then wake one writer.
    399 		 */
    400 		RW_DASSERT(rw, ts != NULL);
    401 
    402 		wcnt = TS_WAITERS(ts, TS_WRITER_Q);
    403 		rcnt = TS_WAITERS(ts, TS_READER_Q);
    404 
    405 		/*
    406 		 * Give the lock away.
    407 		 */
    408 		if (dcnt == 0 &&
    409 		    (rcnt == 0 || (op == RW_READER && wcnt != 0))) {
    410 			RW_DASSERT(rw, wcnt != 0);
    411 
    412 			/*
    413 			 * Give the lock to the longest waiting
    414 			 * writer.
    415 			 */
    416 			l = TS_FIRST(ts, TS_WRITER_Q);
    417 			new = (uintptr_t)l | RW_WRITE_LOCKED;
    418 
    419 			if (wcnt > 1)
    420 				new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
    421 			else if (rcnt != 0)
    422 				new |= RW_HAS_WAITERS;
    423 
    424 			RW_GIVE(rw);
    425 			if (!RW_RELEASE(rw, owner, new)) {
    426 				/* Oops, try again. */
    427 				turnstile_exit(rw);
    428 				continue;
    429 			}
    430 
    431 			/* Wake the writer. */
    432 			turnstile_wakeup(ts, TS_WRITER_Q, wcnt, l);
    433 		} else {
    434 			dcnt += rcnt;
    435 			RW_DASSERT(rw, dcnt != 0);
    436 
    437 			/*
    438 			 * Give the lock to all blocked readers.  We may
    439 			 * retain one read hold if downgrading.  If there
    440 			 * is a writer waiting, new readers will be blocked
    441 			 * out.
    442 			 */
    443 			new = dcnt << RW_READ_COUNT_SHIFT;
    444 			if (wcnt != 0)
    445 				new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
    446 
    447 			RW_GIVE(rw);
    448 			if (!RW_RELEASE(rw, owner, new)) {
    449 				/* Oops, try again. */
    450 				turnstile_exit(rw);
    451 				continue;
    452 			}
    453 
    454 			/* Wake up all sleeping readers. */
    455 			turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
    456 		}
    457 
    458 		break;
    459 	}
    460 }
    461 
    462 /*
    463  * rw_tryenter:
    464  *
    465  *	Try to acquire a rwlock.
    466  */
    467 int
    468 rw_tryenter(krwlock_t *rw, const krw_t op)
    469 {
    470 	uintptr_t curthread, owner, incr, need_wait;
    471 
    472 	curthread = (uintptr_t)curlwp;
    473 	RW_ASSERT(rw, curthread != 0);
    474 
    475 	if (op == RW_READER) {
    476 		incr = RW_READ_INCR;
    477 		need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
    478 	} else {
    479 		RW_DASSERT(rw, op == RW_WRITER);
    480 		incr = curthread | RW_WRITE_LOCKED;
    481 		need_wait = RW_WRITE_LOCKED | RW_THREAD;
    482 	}
    483 
    484 	for (;;) {
    485 		owner = rw->rw_owner;
    486 		if ((owner & need_wait) == 0) {
    487 			if (RW_ACQUIRE(rw, owner, owner + incr)) {
    488 				/* Got it! */
    489 				break;
    490 			}
    491 			continue;
    492 		}
    493 		return 0;
    494 	}
    495 
    496 	RW_LOCKED(rw, op);
    497 	RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
    498 	    (op == RW_READER && RW_COUNT(rw) != 0));
    499 	return 1;
    500 }
    501 
    502 /*
    503  * rw_downgrade:
    504  *
    505  *	Downgrade a write lock to a read lock.
    506  */
    507 void
    508 rw_downgrade(krwlock_t *rw)
    509 {
    510 	uintptr_t owner, curthread;
    511 
    512 	curthread = (uintptr_t)curlwp;
    513 	RW_ASSERT(rw, curthread != 0);
    514 	RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
    515 	RW_ASSERT(rw, RW_OWNER(rw) == curthread);
    516 	RW_UNLOCKED(rw, RW_WRITER);
    517 
    518 	for (;;) {
    519 		owner = rw->rw_owner;
    520 
    521 		/* If there are waiters we need to do this the hard way. */
    522 		if ((owner & RW_HAS_WAITERS) != 0) {
    523 			rw_vector_exit(rw, __RW_DOWNGRADE);
    524 			break;
    525 		}
    526 
    527 		/*
    528 		 * Try swapping us down to one read hold.  If it fails, the
    529 		 * lock condition has changed and we most likely now have
    530 		 * waiters.
    531 		 */
    532 		if (RW_RELEASE(rw, owner, RW_READ_INCR))
    533 			break;
    534 	}
    535 
    536 	RW_LOCKED(rw, RW_READER);
    537 	RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
    538 	RW_DASSERT(rw, RW_COUNT(rw) != 0);
    539 }
    540 
    541 /*
    542  * rw_tryupgrade:
    543  *
    544  *	Try to upgrade a read lock to a write lock.  We must be the
    545  *	only reader.
    546  */
    547 int
    548 rw_tryupgrade(krwlock_t *rw)
    549 {
    550 	uintptr_t owner, curthread, new;
    551 
    552 	curthread = (uintptr_t)curlwp;
    553 	RW_ASSERT(rw, curthread != 0);
    554 
    555 	for (;;) {
    556 		owner = rw->rw_owner;
    557 		RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
    558 		if ((owner & RW_THREAD) != RW_READ_INCR) {
    559 			RW_ASSERT(rw, (owner & RW_THREAD) != 0);
    560 			return 0;
    561 		}
    562 		new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
    563 		if (RW_ACQUIRE(rw, owner, new))
    564 			break;
    565 	}
    566 
    567 	RW_LOCKED(rw, RW_WRITER);
    568 	RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
    569 	RW_DASSERT(rw, RW_OWNER(rw) == curthread);
    570 
    571 	return 1;
    572 }
    573 
    574 /*
    575  * rw_read_held:
    576  *
    577  *	Returns true if the rwlock is held for reading.  Must only be
    578  *	used for diagnostic assertions, and never be used to make
    579  * 	decisions about how to use a rwlock.
    580  */
    581 int
    582 rw_read_held(krwlock_t *rw)
    583 {
    584 	uintptr_t owner;
    585 
    586 	if (panicstr != NULL)
    587 		return 1;
    588 
    589 	owner = rw->rw_owner;
    590 	return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
    591 }
    592 
    593 /*
    594  * rw_write_held:
    595  *
    596  *	Returns true if the rwlock is held for writing.  Must only be
    597  *	used for diagnostic assertions, and never be used to make
    598  *	decisions about how to use a rwlock.
    599  */
    600 int
    601 rw_write_held(krwlock_t *rw)
    602 {
    603 
    604 	if (panicstr != NULL)
    605 		return 1;
    606 
    607 	return (rw->rw_owner & RW_WRITE_LOCKED) != 0;
    608 }
    609 
    610 /*
    611  * rw_lock_held:
    612  *
    613  *	Returns true if the rwlock is held for reading or writing.  Must
    614  *	only be used for diagnostic assertions, and never be used to make
    615  *	decisions about how to use a rwlock.
    616  */
    617 int
    618 rw_lock_held(krwlock_t *rw)
    619 {
    620 
    621 	if (panicstr != NULL)
    622 		return 1;
    623 
    624 	return (rw->rw_owner & RW_THREAD) != 0;
    625 }
    626