Home | History | Annotate | Line # | Download | only in kern
kern_turnstile.c revision 1.1.36.1
      1 /*	$NetBSD: kern_turnstile.c,v 1.1.36.1 2006/09/10 23:42:42 ad Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2002, 2006 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe and Andrew Doran.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the NetBSD
     21  *	Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 /*
     40  * Turnsiles are specialized sleep queues for use by locks.  Turnstiles
     41  * are described in detail in:
     42  *
     43  *	Solaris Internals: Core Kernel Architecture, Jim Mauro and
     44  *	    Richard McDougall.
     45  *
     46  * Turnstiles are kept in a hash table.  There are likely to be many more
     47  * lock objects than there are threads.  Since a thread can block on only
     48  * one lock at a time, we only need one turnstile per thread, and so they
     49  * are allocated at thread creation time.
     50  *
     51  * When a thread decides it needs to block on a lock, it looks up the
     52  * active turnstile for that lock.  If no active turnstile exists, then
     53  * the process lends its turnstile to the lock.  If there is already
     54  * an active turnstile for the lock, the thread places its turnstile on
     55  * a list of free turnstiles, and references the active one instead.
     56  *
     57  * The act of looking up the turnstile acquires an interlock on the sleep
     58  * queue.  If a thread decides it doesn't need to block after all, then
     59  * this interlock must be released by explicitly aborting the turnstile
     60  * operation.
     61  *
     62  * When a thread is awakened, it needs to get its turnstile back.  If
     63  * there are still other threads waiting in the active turnstile, the
     64  * the thread grabs a free turnstile off the free list.  Otherwise, it
     65  * can take back the active turnstile from the lock (thus deactivating
     66  * the turnstile).
     67  *
     68  * Turnstiles are the place to do priority inheritence.  However, we do
     69  * not currently implement that.
     70  *
     71  * XXX We currently have to interlock with the sched_lock.  The locking
     72  * order is:
     73  *
     74  *	turnstile chain -> sched_lock
     75  */
     76 
     77 #include "opt_lockdebug.h"
     78 #include "opt_multiprocessor.h"
     79 
     80 #include <sys/cdefs.h>
     81 __KERNEL_RCSID(0, "$NetBSD: kern_turnstile.c,v 1.1.36.1 2006/09/10 23:42:42 ad Exp $");
     82 
     83 #include <sys/param.h>
     84 #include <sys/lock.h>
     85 #include <sys/pool.h>
     86 #include <sys/proc.h>
     87 #include <sys/resourcevar.h>
     88 #include <sys/sched.h>
     89 #include <sys/turnstile.h>
     90 #include <sys/systm.h>
     91 #include <sys/sa.h>
     92 #include <sys/savar.h>
     93 
     94 /* XXX */
     95 #ifdef MULTIPROCESSOR
     96 #define	_SCHED_LOCK	simple_lock(&sched_lock);
     97 #define	_SCHED_UNLOCK	simple_unlock(&sched_lock);
     98 #else
     99 #define	_SCHED_LOCK	/* nothing */
    100 #define	_SCHED_UNLOCK	/* nothing */
    101 #endif
    102 
    103 /*
    104  * Turnstile hash -- shift the lock object to eliminate the zero bits
    105  * of the address, and mask it off with the turnstile table's size.
    106  */
    107 #if LONG_BIT == 64
    108 #define	TURNSTILE_HASH_SHIFT	3
    109 #elif LONG_BIT == 32
    110 #define	TURNSTILE_HASH_SHIFT	2
    111 #else
    112 #error "Don't know how big your pointers are."
    113 #endif
    114 
    115 #define	TURNSTILE_HASH_SIZE	64
    116 #define	TURNSTILE_HASH_MASK	(TURNSTILE_HASH_SIZE - 1)
    117 
    118 #define	TURNSTILE_HASH(obj)						\
    119 	((((u_long)(obj)) >> TURNSTILE_HASH_SHIFT) & TURNSTILE_HASH_MASK)
    120 
    121 struct turnstile_chain {
    122 	struct simplelock tc_lock;	/* lock on hash chain */
    123 	int		tc_oldspl;	/* saved spl of lock holder
    124 					   (only valid while tc_lock held) */
    125 	LIST_HEAD(, turnstile) tc_chain;/* turnstile chain */
    126 } turnstile_table[TURNSTILE_HASH_SIZE];
    127 
    128 #define	TURNSTILE_CHAIN(obj)						\
    129 	&turnstile_table[TURNSTILE_HASH(obj)]
    130 
    131 #define	TURNSTILE_CHAIN_LOCK(tc)					\
    132 do {									\
    133 	int _s_ = spllock();						\
    134 	simple_lock(&(tc)->tc_lock);					\
    135 	(tc)->tc_oldspl = _s_;						\
    136 } while (/*CONSTCOND*/0)
    137 
    138 #define	TURNSTILE_CHAIN_UNLOCK(tc)					\
    139 do {									\
    140 	int _s_ = (tc)->tc_oldspl;					\
    141 	simple_unlock(&(tc)->tc_lock);					\
    142 	splx(_s_);							\
    143 } while (/*CONSTCOND*/0)
    144 
    145 struct pool turnstile_pool;
    146 struct pool_cache turnstile_cache;
    147 
    148 int	turnstile_ctor(void *, void *, int);
    149 
    150 /*
    151  * turnstile_init:
    152  *
    153  *	Initialize the turnstile mechanism.
    154  */
    155 void
    156 turnstile_init(void)
    157 {
    158 	struct turnstile_chain *tc;
    159 	int i;
    160 
    161 	for (i = 0; i < TURNSTILE_HASH_SIZE; i++) {
    162 		tc = &turnstile_table[i];
    163 		simple_lock_init(&tc->tc_lock);
    164 		LIST_INIT(&tc->tc_chain);
    165 	}
    166 
    167 	pool_init(&turnstile_pool, sizeof(struct turnstile), 0, 0, 0,
    168 	    "tspool", &pool_allocator_nointr);
    169 	pool_cache_init(&turnstile_cache, &turnstile_pool,
    170 	    turnstile_ctor, NULL, NULL);
    171 }
    172 
    173 /*
    174  * turnstile_ctor:
    175  *
    176  *	Constructor for turnstiles.
    177  */
    178 int
    179 turnstile_ctor(void *arg, void *obj, int flags)
    180 {
    181 	struct turnstile *ts = obj;
    182 
    183 	memset(ts, 0, sizeof(*ts));
    184 	return (0);
    185 }
    186 
    187 static void
    188 turnstile_remque(struct turnstile *ts, struct lwp *l,
    189 		 struct turnstile_sleepq *tsq)
    190 {
    191 	struct lwp **q = &tsq->tsq_q.sq_head;
    192 	struct turnstile *nts;
    193 
    194 	KASSERT(l->l_ts == ts);
    195 
    196 	/*
    197 	 * This process is no longer using the active turnstile.
    198 	 * Find an inactive one on the free list to give to it.
    199 	 */
    200 	if ((nts = ts->ts_free) != NULL) {
    201 		KASSERT(TS_ALL_WAITERS(ts) > 1);
    202 		l->l_ts = nts;
    203 		ts->ts_free = nts->ts_free;
    204 		nts->ts_free = NULL;
    205 	} else {
    206 		/*
    207 		 * If the free list is empty, this is the last
    208 		 * waiter.
    209 		 */
    210 		KASSERT(TS_ALL_WAITERS(ts) == 1);
    211 		LIST_REMOVE(ts, ts_chain);
    212 	}
    213 
    214 	tsq->tsq_waiters--;
    215 
    216 	*q = l->l_forw;
    217 	if (tsq->tsq_q.sq_tailp == &l->l_forw)
    218 		tsq->tsq_q.sq_tailp = q;
    219 
    220 	KASSERT(ts->ts_sleepq[TS_READER_Q].tsq_waiters != 0 ||
    221 		ts->ts_sleepq[TS_READER_Q].tsq_q.sq_head == NULL);
    222 	KASSERT(ts->ts_sleepq[TS_WRITER_Q].tsq_waiters != 0 ||
    223 		ts->ts_sleepq[TS_WRITER_Q].tsq_q.sq_head == NULL);
    224 
    225 	KASSERT(ts->ts_sleepq[TS_READER_Q].tsq_waiters == 0 ||
    226 		ts->ts_sleepq[TS_READER_Q].tsq_q.sq_head != NULL);
    227 	KASSERT(ts->ts_sleepq[TS_WRITER_Q].tsq_waiters == 0 ||
    228 		ts->ts_sleepq[TS_WRITER_Q].tsq_q.sq_head != NULL);
    229 }
    230 
    231 /*
    232  * turnstile_lookup:
    233  *
    234  *	Look up the turnstile for the specified lock object.  This
    235  *	acquires and holds the turnstile chain lock (sleep queue
    236  *	interlock).
    237  */
    238 struct turnstile *
    239 turnstile_lookup(void *lp)
    240 {
    241 	struct turnstile_chain *tc = TURNSTILE_CHAIN(lp);
    242 	struct turnstile *ts;
    243 
    244 	TURNSTILE_CHAIN_LOCK(tc);
    245 
    246 	LIST_FOREACH(ts, &tc->tc_chain, ts_chain)
    247 		if (ts->ts_obj == lp)
    248 			return (ts);
    249 
    250 	/*
    251 	 * No turnstile yet for this lock.  No problem, turnstile_block()
    252 	 * handles this by fetching the turnstile from the blocking thread.
    253 	 */
    254 	return (NULL);
    255 }
    256 
    257 /*
    258  * turnstile_exit:
    259  *
    260  *	Abort a turnstile operation.
    261  */
    262 void
    263 turnstile_exit(void *lp)
    264 {
    265 	struct turnstile_chain *tc = TURNSTILE_CHAIN(lp);
    266 
    267 	TURNSTILE_CHAIN_UNLOCK(tc);
    268 }
    269 
    270 /*
    271  * turnstile_block:
    272  *
    273  *	Block a thread on a lock object.
    274  */
    275 int
    276 turnstile_block(struct turnstile *ts, int rw, int pri, void *lp,
    277 		const char *wmesg)
    278 {
    279 	struct turnstile_chain *tc = TURNSTILE_CHAIN(lp);
    280 	struct lwp *l = curlwp;
    281 	struct proc *p = l->l_proc;
    282 	struct turnstile *ots;
    283 	struct turnstile_sleepq *tsq;
    284 	struct sadata_upcall *sau;
    285 	struct slpque *qp;
    286 	int s, hold_count;
    287 
    288 	KASSERT(l->l_ts != NULL);
    289 	KASSERT(rw == TS_READER_Q || rw == TS_WRITER_Q);
    290 
    291 	if (ts == NULL) {
    292 		/*
    293 		 * We are the first thread to wait for this lock;
    294 		 * lend our turnstile to it.
    295 		 */
    296 		ts = l->l_ts;
    297 		KASSERT(TS_ALL_WAITERS(ts) == 0);
    298 		KASSERT(ts->ts_sleepq[TS_READER_Q].tsq_q.sq_head == NULL &&
    299 			ts->ts_sleepq[TS_WRITER_Q].tsq_q.sq_head == NULL);
    300 		ts->ts_obj = lp;
    301 		LIST_INSERT_HEAD(&tc->tc_chain, ts, ts_chain);
    302 	} else {
    303 		/*
    304 		 * Lock already has a turnstile.  Put our turnstile
    305 		 * onto the free list, and reference the existing
    306 		 * turnstile instead.
    307 		 */
    308 		ots = l->l_ts;
    309 		ots->ts_free = ts->ts_free;
    310 		ts->ts_free = ots;
    311 		l->l_ts = ts;
    312 	}
    313 
    314 #ifdef DIAGNOSTIC
    315 	if (l->l_stat != LSONPROC)
    316 		panic("turnstile_block: l_stat %d != LSONPROC", l->l_stat);
    317 	if (l->l_back != NULL)
    318 		panic("turnstile_block: l_back != NULL");
    319 #endif
    320 
    321 #ifdef KTRACE
    322 	if (KTRPOINT(p, KTR_CSW))
    323 		ktrcsw(l, 1, 0);
    324 #endif
    325 
    326 	/*
    327 	 * XXX We need to allocate the sadata_upcall structure here,
    328 	 * XXX since we can't sleep while waiting for memory inside
    329 	 * XXX sa_upcall().  It would be nice if we could safely
    330 	 * XXX allocate the sadata_upcall structure on the stack, here.
    331 	 */
    332 	if ((l->l_flag & L_SA) != 0)
    333 		sau = sadata_upcall_alloc(0);
    334 	else
    335 		sau = NULL;
    336 
    337 	l->l_wchan = lp;
    338 	l->l_wmesg = wmesg;
    339 	l->l_slptime = 0;
    340 	l->l_priority = pri & PRIMASK;
    341 
    342 	tsq = &ts->ts_sleepq[rw];
    343 	qp = &tsq->tsq_q;
    344 	tsq->tsq_waiters++;
    345 	if (qp->sq_head == NULL)
    346 		qp->sq_head = l;
    347 	else
    348 		*qp->sq_tailp = l;
    349 	*(qp->sq_tailp = &l->l_forw) = NULL;
    350 
    351 	/*
    352 	 * XXX We currently need to interlock with sched_lock.
    353 	 * Note we're already at spllock(), which will block
    354 	 * scheduler interrupts.
    355 	 */
    356 	_SCHED_LOCK;
    357 
    358 	/*
    359 	 * Release the kernel_lock, as we are about to yield the CPU.
    360 	 * The scheduler lock is still held until cpu_switch()
    361 	 * selects a new process and removes it from the run queue.
    362 	 */
    363 	hold_count = KERNEL_LOCK_RELEASE_ALL();
    364 
    365 	l->l_stat = LSSLEEP;
    366 	p->p_nrlwps--;
    367 	p->p_stats->p_ru.ru_nvcsw++;
    368 
    369 	/*
    370 	 * We can now release the turnstile chain interlock; the
    371 	 * scheduler lock is held, so a thread can't get in to
    372 	 * do a turnstile_wakeup() before we do the switch.
    373 	 *
    374 	 * Note: we need to remember our old spl which is currently
    375 	 * stored in the turnstile chain, because we have to stay
    376 	 * st spllock while the sched_lock is held.
    377 	 */
    378 	s = tc->tc_oldspl;
    379 	simple_unlock(&tc->tc_lock);
    380 
    381 	if ((l->l_flag & L_SA) != 0)
    382 		sa_switch(l, sau, SA_UPCALL_BLOCKED);
    383 	else
    384 		mi_switch(l, NULL);
    385 
    386 	SCHED_ASSERT_UNLOCKED();
    387 	splx(s);
    388 
    389 	/*
    390 	 * We are now back to the base spl level we were at when the
    391 	 * caller called turnstile_lookup().
    392 	 */
    393 	KDASSERT(l->l_cpu != NULL);
    394 	KDASSERT(l->l_cpu == curcpu());
    395 	l->l_cpu->ci_schedstate.spc_curpriority = l->l_usrpri;
    396 
    397 	/*
    398 	 * Reacquire the kernel_lock now.  We do this after we've
    399 	 * released the scheduler lock to avoid deadlock, and before
    400 	 * we reacquire other locks.
    401 	 */
    402 	KERNEL_LOCK_ACQUIRE_COUNT(hold_count);
    403 
    404 	KDASSERT((p->p_flag & (L_SINTR|L_TIMEOUT)) == 0);
    405 
    406 #ifdef KTRACE
    407 	if (KTRPOINT(p, KTR_CSW))
    408 		ktrcsw(l, 0, 0);
    409 #endif
    410 
    411 	return (0);
    412 }
    413 
    414 /*
    415  * turnstile_wakeup:
    416  *
    417  *	Wake up the specified number of threads that are blocked
    418  *	in a turnstile.
    419  */
    420 void
    421 turnstile_wakeup(struct turnstile *ts, int rw, int count,
    422 		 struct lwp *nextlwp)
    423 {
    424 	struct turnstile_chain *tc = TURNSTILE_CHAIN(ts->ts_obj);
    425 	struct turnstile_sleepq *tsq;
    426 	struct lwp *l;
    427 
    428 	KASSERT(rw == TS_READER_Q || rw == TS_WRITER_Q);
    429 	KASSERT(count > 0);
    430 
    431 	tsq = &ts->ts_sleepq[rw];
    432 
    433 	/* XXX We currently interlock with sched_lock. */
    434 	_SCHED_LOCK;
    435 
    436 	if (nextlwp != NULL) {
    437 #if defined(DEBUG) || defined(LOCKDEBUG)
    438 		for (l = tsq->tsq_q.sq_head; l != NULL;
    439 		     l = l->l_forw) {
    440 			if (l == nextlwp)
    441 				break;
    442 		}
    443 		if (l == NULL)
    444 			panic("turnstile_wakeup: nextlwp not on sleepq");
    445 #endif
    446 		turnstile_remque(ts, nextlwp, tsq);
    447 		nextlwp->l_wchan = NULL;
    448 		if (nextlwp->l_stat == LSSLEEP)
    449 			awaken(nextlwp);
    450 	} else {
    451 		while (count-- > 0) {
    452 			l = tsq->tsq_q.sq_head;
    453 			KASSERT(l != NULL);
    454 			turnstile_remque(ts, l, tsq);
    455 			l->l_wchan = NULL;
    456 			if (l->l_stat == LSSLEEP)
    457 				awaken(l);
    458 		}
    459 	}
    460 
    461 	_SCHED_UNLOCK;
    462 
    463 	TURNSTILE_CHAIN_UNLOCK(tc);
    464 }
    465 
    466