Home | History | Annotate | Line # | Download | only in kern
kern_rwlock.c revision 1.1.36.4
      1 /*	$NetBSD: kern_rwlock.c,v 1.1.36.4 2006/11/17 16:34:36 ad Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2002, 2006 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe and Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 /*
     40  * Kernel reader/writer lock implementation, modeled after those
     41  * found in Solaris, a description of which can be found in:
     42  *
     43  *	Solaris Internals: Core Kernel Architecture, Jim Mauro and
     44  *	    Richard McDougall.
     45  */
     46 
     47 #include "opt_multiprocessor.h"
     48 
     49 #include <sys/cdefs.h>
     50 __KERNEL_RCSID(0, "$NetBSD: kern_rwlock.c,v 1.1.36.4 2006/11/17 16:34:36 ad Exp $");
     51 
     52 #define	__RWLOCK_PRIVATE
     53 
     54 #include <sys/param.h>
     55 #include <sys/proc.h>
     56 #include <sys/rwlock.h>
     57 #include <sys/sched.h>
     58 #include <sys/sleepq.h>
     59 #include <sys/systm.h>
     60 #include <sys/lockdebug.h>
     61 
     62 #include <dev/lockstat.h>
     63 
     64 #define RW_ABORT(rw, msg)						\
     65     LOCKDEBUG_ABORT(RW_GETID(rw), rw, &rwlock_lockops, __FUNCTION__, msg)
     66 
     67 /*
     68  * LOCKDEBUG
     69  */
     70 
     71 #if defined(LOCKDEBUG)
     72 
     73 #define	RW_LOCKED(rw, op)						\
     74 do {									\
     75 	LOCKDEBUG_LOCKED(RW_GETID(rw),					\
     76 	    (uintptr_t)__builtin_return_address(0), op == RW_READER);	\
     77 } while (/* CONSTCOND */ 0)
     78 
     79 #define	RW_UNLOCKED(rw, op)						\
     80 do {									\
     81 	LOCKDEBUG_UNLOCKED(RW_GETID(rw),				\
     82 	    (uintptr_t)__builtin_return_address(0), op == RW_READER);	\
     83 } while (/* CONSTCOND */ 0)
     84 
     85 #define	RW_DASSERT(rw, cond)						\
     86 do {									\
     87 	if (!(cond))							\
     88 		RW_ABORT(rw, "assertion failed: " #cond);		\
     89 } while (/* CONSTCOND */ 0);
     90 
     91 #else	/* LOCKDEBUG */
     92 
     93 #define	RW_LOCKED(rw, op)	/* nothing */
     94 #define	RW_UNLOCKED(rw, op)	/* nothing */
     95 #define	RW_DASSERT(rw, cond)	/* nothing */
     96 
     97 #endif	/* LOCKDEBUG */
     98 
     99 /*
    100  * DIAGNOSTIC
    101  */
    102 
    103 #if defined(DIAGNOSTIC)
    104 
    105 #define	RW_ASSERT(rw, cond)						\
    106 do {									\
    107 	if (!(cond))							\
    108 		RW_ABORT(rw, "assertion failed: " #cond);		\
    109 } while (/* CONSTCOND */ 0)
    110 
    111 #else
    112 
    113 #define	RW_ASSERT(rw, cond)	/* nothing */
    114 
    115 #endif	/* DIAGNOSTIC */
    116 
    117 int	rw_dump(volatile void *, char *, size_t);
    118 
    119 lockops_t rwlock_lockops = {
    120 	"Reader / writer lock",
    121 	1,
    122 	rw_dump
    123 };
    124 
    125 /*
    126  * rw_dump:
    127  *
    128  *	Dump the contents of a rwlock structure.
    129  */
    130 int
    131 rw_dump(volatile void *cookie, char *buf, size_t l)
    132 {
    133 	volatile krwlock_t *rw = cookie;
    134 
    135 	return snprintf(buf, l, "owner/count  : 0x%16lx flags    : 0x%16x\n",
    136 	    (long)RW_OWNER(rw), (int)RW_FLAGS(rw));
    137 }
    138 
    139 /*
    140  * rw_init:
    141  *
    142  *	Initialize a rwlock for use.
    143  */
    144 void
    145 rw_init(krwlock_t *rw)
    146 {
    147 	u_int id;
    148 
    149 	memset(rw, 0, sizeof(*rw));
    150 
    151 	id = LOCKDEBUG_ALLOC(rw, &rwlock_lockops);
    152 	RW_SETID(rw, id);
    153 }
    154 
    155 /*
    156  * rw_destroy:
    157  *
    158  *	Tear down a rwlock.
    159  */
    160 void
    161 rw_destroy(krwlock_t *rw)
    162 {
    163 
    164 	LOCKDEBUG_FREE(rw, RW_GETID(rw));
    165 	RW_ASSERT(rw, rw->rw_owner == 0);
    166 }
    167 
    168 /*
    169  * rw_vector_enter:
    170  *
    171  *	Acquire a rwlock.
    172  */
    173 void
    174 #ifdef __NEED_RW_CALLSITE
    175 rw_vector_enter(krwlock_t *rw, krw_t op, uintptr_t callsite)
    176 #else
    177 rw_vector_enter(krwlock_t *rw, krw_t op)
    178 #endif
    179 {
    180 	uintptr_t owner, incr, need_wait, set_wait, curthread;
    181 	turnstile_t *ts;
    182 	int queue;
    183 	LOCKSTAT_TIMER(slptime);
    184 	struct lwp *l;
    185 
    186 	l = curlwp;
    187 	curthread = (uintptr_t)l;
    188 	RW_ASSERT(rw, curthread != 0);
    189 
    190 #ifdef LOCKDEBUG
    191 	if (panicstr == NULL) {
    192 		simple_lock_only_held(NULL, "rw_enter");
    193 		LOCKDEBUG_BARRIER(&kernel_lock, 1);
    194 	}
    195 #endif
    196 
    197 	/*
    198 	 * We play a slight trick here.  If we're a reader, we want
    199 	 * increment the read count.  If we're a writer, we want to
    200 	 * set the owner field and whe WRITE_LOCKED bit.
    201 	 *
    202 	 * In the latter case, we expect those bits to be zero,
    203 	 * therefore we can use an add operation to set them, which
    204 	 * means an add operation for both cases.
    205 	 */
    206 	if (op == RW_READER) {
    207 		incr = RW_READ_INCR;
    208 		set_wait = RW_HAS_WAITERS;
    209 		need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
    210 		queue = TS_READER_Q;
    211 	} else {
    212 		RW_DASSERT(rw, op == RW_WRITER);
    213 		incr = curthread | RW_WRITE_LOCKED;
    214 		set_wait = RW_HAS_WAITERS | RW_WRITE_WANTED;
    215 		need_wait = RW_WRITE_LOCKED | RW_THREAD;
    216 		queue = TS_WRITER_Q;
    217 	}
    218 
    219 	for (;;) {
    220 		/*
    221 		 * Read the lock owner field.  If the need-to-wait
    222 		 * indicator is clear, then try to acquire the lock.
    223 		 */
    224 		owner = rw->rw_owner;
    225 		if ((owner & need_wait) == 0) {
    226 			if (RW_ACQUIRE(rw, owner, owner + incr)) {
    227 				/* Got it! */
    228 				break;
    229 			}
    230 
    231 			/*
    232 			 * Didn't get it -- spin around again (we'll
    233 			 * probably sleep on the next iteration).
    234 			 */
    235 			continue;
    236 		}
    237 
    238 		if (panicstr != NULL)
    239 			return;
    240 		if (RW_OWNER(rw) == curthread)
    241 			RW_ABORT(rw, "locking against myself");
    242 
    243 		/*
    244 		 * Grab the turnstile chain lock.  Once we have that, we
    245 		 * can adjust the waiter bits and sleep queue.
    246 		 */
    247 		ts = turnstile_lookup(rw);
    248 
    249 		/*
    250 		 * Mark the rwlock as having waiters.  If the set fails,
    251 		 * then we may not need to sleep and should spin again.
    252 		 */
    253 		if (!RW_SET_WAITERS(rw, need_wait, set_wait)) {
    254 			turnstile_exit(rw);
    255 			continue;
    256 		}
    257 
    258 		LOCKSTAT_START_TIMER(slptime);
    259 
    260 		turnstile_block(ts, queue, sched_kpri(l), rw);
    261 
    262 		/* If we wake up and arrive here, we've been handed the lock. */
    263 		RW_RECEIVE(rw);
    264 
    265 		LOCKSTAT_STOP_TIMER(slptime);
    266 #ifdef __NEED_RW_CALLSITE
    267 		LOCKSTAT_EVENT_RA(rw, LB_RWLOCK | LB_SLEEP, 1, slptime, callsite);
    268 #else
    269 		LOCKSTAT_EVENT(rw, LB_RWLOCK | LB_SLEEP, 1, slptime);
    270 #endif
    271 
    272 		turnstile_unblock();
    273 		break;
    274 	}
    275 
    276 	RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
    277 	    (op == RW_READER && RW_COUNT(rw) != 0));
    278 }
    279 
    280 /*
    281  * rw_vector_exit:
    282  *
    283  *	Release a rwlock.
    284  */
    285 void
    286 rw_vector_exit(krwlock_t *rw, krw_t op)
    287 {
    288 	uintptr_t curthread, owner, decr, new;
    289 	turnstile_t *ts;
    290 	int rcnt, wcnt, dcnt;
    291 	struct lwp *l;
    292 
    293 	curthread = (uintptr_t)curlwp;
    294 	RW_ASSERT(rw, curthread != 0);
    295 
    296 	if (panicstr != NULL) {
    297 		/*
    298 		 * XXX What's the correct thing to do here?  We should at least
    299 		 * release the lock.
    300 		 */
    301 		return;
    302 	}
    303 
    304 	/*
    305 	 * Again, we use a trick.  Since we used an add operation to
    306 	 * set the required lock bits, we can use a subtract to clear
    307 	 * them, which makes the read-release and write-release path
    308 	 * the same.
    309 	 */
    310 	switch (op) {
    311 	case RW_READER:
    312 		RW_ASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
    313 		RW_ASSERT(rw, RW_COUNT(rw) != 0);
    314 		dcnt = 0;
    315 		decr = RW_READ_INCR;
    316 		break;
    317 	case RW_WRITER:
    318 		RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
    319 		RW_ASSERT(rw, RW_OWNER(rw) == curthread);
    320 		dcnt = 0;
    321 		decr = curthread | RW_WRITE_LOCKED;
    322 		break;
    323 	case __RW_DOWNGRADE:
    324 		RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
    325 		RW_ASSERT(rw, RW_OWNER(rw) == curthread);
    326 		dcnt = 1;
    327 		decr = (curthread | RW_WRITE_LOCKED) - RW_READ_INCR;
    328 		break;
    329 	default:
    330 		RW_DASSERT(rw, "blame gcc, I do");
    331 		return;
    332 	}
    333 
    334 	for (;;) {
    335 		/*
    336 		 * We assume that the caller has already tried to release
    337 		 * the lock and optimize for the 'has waiters' case, and so
    338 		 * grab the turnstile chain lock.  This gets the interlock
    339 		 * on the sleep queue.  Once we have that, we can adjust the
    340 		 * waiter bits.
    341 		 */
    342 		ts = turnstile_lookup(rw);
    343 
    344 		/*
    345 		 * Compute what we expect the new value of the lock to be.
    346 		 * Only proceed to do direct handoff if there are waiters,
    347 		 * and if the lock would become unowned.
    348 		 */
    349 		owner = rw->rw_owner;
    350 		new = (owner - decr) & ~RW_WRITE_WANTED;
    351 		if ((new & (RW_THREAD | RW_HAS_WAITERS)) != RW_HAS_WAITERS) {
    352 			if (RW_RELEASE(rw, owner, new)) {
    353 				turnstile_exit(rw);
    354 				break;
    355 			}
    356 			turnstile_exit(rw);
    357 			continue;
    358 		}
    359 
    360 		/*
    361 		 * Adjust the waiter bits.  If we are releasing a write
    362 		 * lock or downgrading a write lock to read, then wake all
    363 		 * outstanding readers.  If we are releasing a read lock,
    364 		 * then wake one writer.
    365 		 */
    366 		RW_DASSERT(rw, ts != NULL);
    367 
    368 		wcnt = TS_WAITERS(ts, TS_WRITER_Q);
    369 		rcnt = TS_WAITERS(ts, TS_READER_Q);
    370 
    371 		/*
    372 		 * Give the lock away.
    373 		 */
    374 		if (dcnt == 0 &&
    375 		    (rcnt == 0 || (op == RW_READER && wcnt != 0))) {
    376 			RW_DASSERT(rw, wcnt != 0);
    377 
    378 			/*
    379 			 * Give the lock to the longest waiting
    380 			 * writer.
    381 			 */
    382 			l = TS_FIRST(ts, TS_WRITER_Q);
    383 			new = (uintptr_t)l | RW_WRITE_LOCKED;
    384 
    385 			if (wcnt > 1)
    386 				new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
    387 			else if (rcnt != 0)
    388 				new |= RW_HAS_WAITERS;
    389 
    390 			if (!RW_RELEASE(rw, owner, new)) {
    391 				/* Oops, try again. */
    392 				turnstile_exit(rw);
    393 				continue;
    394 			}
    395 
    396 			/* Wake the writer. */
    397 			turnstile_wakeup(ts, TS_WRITER_Q, wcnt, l);
    398 		} else {
    399 			dcnt += rcnt;
    400 			RW_DASSERT(rw, dcnt != 0);
    401 
    402 			/*
    403 			 * Give the lock to all blocked readers.  We may
    404 			 * retain one read hold if downgrading.  If there
    405 			 * is a writer waiting, new readers will be blocked
    406 			 * out.
    407 			 */
    408 			new = dcnt << RW_READ_COUNT_SHIFT;
    409 			if (wcnt != 0)
    410 				new |= RW_HAS_WAITERS | RW_WRITE_WANTED;
    411 			if (!RW_RELEASE(rw, owner, new)) {
    412 				/* Oops, try again. */
    413 				turnstile_exit(rw);
    414 				continue;
    415 			}
    416 
    417 			/* Wake up all sleeping readers. */
    418 			turnstile_wakeup(ts, TS_READER_Q, rcnt, NULL);
    419 		}
    420 
    421 		break;
    422 	}
    423 }
    424 
    425 /*
    426  * rw_tryenter:
    427  *
    428  *	Try to acquire a rwlock.
    429  */
    430 int
    431 rw_tryenter(krwlock_t *rw, krw_t op)
    432 {
    433 	uintptr_t curthread, owner, incr, need_wait;
    434 
    435 	curthread = (uintptr_t)curlwp;
    436 	RW_ASSERT(rw, curthread != 0);
    437 
    438 	if (op == RW_READER) {
    439 		incr = RW_READ_INCR;
    440 		need_wait = RW_WRITE_LOCKED | RW_WRITE_WANTED;
    441 	} else {
    442 		RW_DASSERT(rw, op == RW_WRITER);
    443 		incr = curthread | RW_WRITE_LOCKED;
    444 		need_wait = RW_WRITE_LOCKED | RW_THREAD;
    445 	}
    446 
    447 	for (;;) {
    448 		owner = rw->rw_owner;
    449 		if ((owner & need_wait) == 0) {
    450 			if (RW_ACQUIRE(rw, owner, owner + incr)) {
    451 				/* Got it! */
    452 				break;
    453 			}
    454 			continue;
    455 		}
    456 		return 0;
    457 	}
    458 
    459 	RW_LOCKED(rw, op);
    460 	RW_DASSERT(rw, (op != RW_READER && RW_OWNER(rw) == curthread) ||
    461 	    (op == RW_READER && RW_COUNT(rw) != 0));
    462 	return 1;
    463 }
    464 
    465 /*
    466  * rw_downgrade:
    467  *
    468  *	Downgrade a write lock to a read lock.
    469  */
    470 void
    471 rw_downgrade(krwlock_t *rw)
    472 {
    473 	uintptr_t owner, curthread;
    474 
    475 	curthread = (uintptr_t)curlwp;
    476 	RW_ASSERT(rw, curthread != 0);
    477 	RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) != 0);
    478 	RW_ASSERT(rw, RW_OWNER(rw) == curthread);
    479 	RW_UNLOCKED(rw, RW_WRITER);
    480 
    481 	for (;;) {
    482 		owner = rw->rw_owner;
    483 
    484 		/* If there are waiters we need to do this the hard way. */
    485 		if ((owner & RW_HAS_WAITERS) != 0) {
    486 			rw_vector_exit(rw, __RW_DOWNGRADE);
    487 			return;
    488 		}
    489 
    490 		/*
    491 		 * Try swapping us down to one read hold.  If it fails, the
    492 		 * lock condition has changed and we most likely now have
    493 		 * waiters.
    494 		 */
    495 		if (RW_RELEASE(rw, owner, RW_READ_INCR))
    496 			break;
    497 	}
    498 
    499 	RW_DASSERT(rw, (rw->rw_owner & RW_WRITE_LOCKED) == 0);
    500 	RW_DASSERT(rw, RW_COUNT(rw) != 0);
    501 }
    502 
    503 /*
    504  * rw_tryupgrade:
    505  *
    506  *	Try to upgrade a read lock to a write lock.  We must be the
    507  *	only reader.
    508  */
    509 int
    510 rw_tryupgrade(krwlock_t *rw)
    511 {
    512 	uintptr_t owner, curthread, new;
    513 
    514 	curthread = (uintptr_t)curlwp;
    515 	RW_ASSERT(rw, curthread != 0);
    516 
    517 	for (;;) {
    518 		owner = rw->rw_owner;
    519 		RW_ASSERT(rw, (owner & RW_WRITE_LOCKED) == 0);
    520 		if ((owner & RW_THREAD) != RW_READ_INCR) {
    521 			RW_ASSERT(rw, (owner & RW_THREAD) != 0);
    522 			return 0;
    523 		}
    524 		new = curthread | RW_WRITE_LOCKED | (owner & ~RW_THREAD);
    525 		if (RW_ACQUIRE(rw, owner, new))
    526 			break;
    527 	}
    528 
    529 	RW_LOCKED(rw, RW_WRITER);
    530 	RW_DASSERT(rw, rw->rw_owner & RW_WRITE_LOCKED);
    531 	RW_DASSERT(rw, RW_OWNER(rw) == curthread);
    532 
    533 	return 1;
    534 }
    535 
    536 /*
    537  * rw_read_held:
    538  *
    539  *	Returns true if the rwlock is held for reading.  Must only be
    540  *	used for diagnostic assertions, and never be used to make
    541  * 	decisions about how to use a rwlock.
    542  */
    543 int
    544 rw_read_held(krwlock_t *rw)
    545 {
    546 	uintptr_t owner;
    547 
    548 	if (panicstr != NULL)
    549 		return 1;
    550 
    551 	owner = rw->rw_owner;
    552 	return (owner & RW_WRITE_LOCKED) == 0 && (owner & RW_THREAD) != 0;
    553 }
    554 
    555 /*
    556  * rw_write_held:
    557  *
    558  *	Returns true if the rwlock is held for writing.  Must only be
    559  *	used for diagnostic assertions, and never be used to make
    560  *	decisions about how to use a rwlock.
    561  */
    562 int
    563 rw_write_held(krwlock_t *rw)
    564 {
    565 
    566 	if (panicstr != NULL)
    567 		return 1;
    568 
    569 	return (rw->rw_owner & RW_WRITE_LOCKED) != 0;
    570 }
    571 
    572 /*
    573  * rw_lock_held:
    574  *
    575  *	Returns true if the rwlock is held for reading or writing.  Must
    576  *	only be used for diagnostic assertions, and never be used to make
    577  *	decisions about how to use a rwlock.
    578  */
    579 int
    580 rw_lock_held(krwlock_t *rw)
    581 {
    582 
    583 	if (panicstr != NULL)
    584 		return 1;
    585 
    586 	return (rw->rw_owner & RW_THREAD) != 0;
    587 }
    588 
    589 /*
    590  * Slow stubs for platforms that do not implement fast-path ones.
    591  */
    592 #ifndef __HAVE_RW_ENTER
    593 void
    594 rw_enter(krwlock_t *rw, krw_t op)
    595 {
    596 	rw_vector_enter(rw, op, (uintptr_t)__builtin_return_address(0));
    597 	RW_LOCKED(rw, op);
    598 }
    599 #endif
    600 
    601 #ifndef __HAVE_RW_EXIT
    602 void
    603 rw_exit(krwlock_t *rw)
    604 {
    605 	krw_t op;
    606 	op = ((rw->rw_owner & RW_WRITE_LOCKED) ? RW_WRITER : RW_READER);
    607 	RW_UNLOCKED(rw, op);
    608 	rw_vector_exit(rw, op);
    609 }
    610 #endif
    611