Home | History | Annotate | Line # | Download | only in kern
kern_synch.c revision 1.177.2.7
      1 /*	$NetBSD: kern_synch.c,v 1.177.2.7 2007/02/23 11:55:43 yamt Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9  * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and
     10  * Daniel Sieger.
     11  *
     12  * Redistribution and use in source and binary forms, with or without
     13  * modification, are permitted provided that the following conditions
     14  * are met:
     15  * 1. Redistributions of source code must retain the above copyright
     16  *    notice, this list of conditions and the following disclaimer.
     17  * 2. Redistributions in binary form must reproduce the above copyright
     18  *    notice, this list of conditions and the following disclaimer in the
     19  *    documentation and/or other materials provided with the distribution.
     20  * 3. All advertising materials mentioning features or use of this software
     21  *    must display the following acknowledgement:
     22  *	This product includes software developed by the NetBSD
     23  *	Foundation, Inc. and its contributors.
     24  * 4. Neither the name of The NetBSD Foundation nor the names of its
     25  *    contributors may be used to endorse or promote products derived
     26  *    from this software without specific prior written permission.
     27  *
     28  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     29  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     30  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     31  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     32  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     33  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     34  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     35  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     36  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     37  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     38  * POSSIBILITY OF SUCH DAMAGE.
     39  */
     40 
     41 /*-
     42  * Copyright (c) 1982, 1986, 1990, 1991, 1993
     43  *	The Regents of the University of California.  All rights reserved.
     44  * (c) UNIX System Laboratories, Inc.
     45  * All or some portions of this file are derived from material licensed
     46  * to the University of California by American Telephone and Telegraph
     47  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     48  * the permission of UNIX System Laboratories, Inc.
     49  *
     50  * Redistribution and use in source and binary forms, with or without
     51  * modification, are permitted provided that the following conditions
     52  * are met:
     53  * 1. Redistributions of source code must retain the above copyright
     54  *    notice, this list of conditions and the following disclaimer.
     55  * 2. Redistributions in binary form must reproduce the above copyright
     56  *    notice, this list of conditions and the following disclaimer in the
     57  *    documentation and/or other materials provided with the distribution.
     58  * 3. Neither the name of the University nor the names of its contributors
     59  *    may be used to endorse or promote products derived from this software
     60  *    without specific prior written permission.
     61  *
     62  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     63  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     65  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     66  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     67  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     68  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     69  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     70  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     71  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     72  * SUCH DAMAGE.
     73  *
     74  *	@(#)kern_synch.c	8.9 (Berkeley) 5/19/95
     75  */
     76 
     77 #include <sys/cdefs.h>
     78 __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.177.2.7 2007/02/23 11:55:43 yamt Exp $");
     79 
     80 #include "opt_kstack.h"
     81 #include "opt_lockdebug.h"
     82 #include "opt_multiprocessor.h"
     83 #include "opt_perfctrs.h"
     84 
     85 #define	__MUTEX_PRIVATE
     86 
     87 #include <sys/param.h>
     88 #include <sys/systm.h>
     89 #include <sys/proc.h>
     90 #include <sys/kernel.h>
     91 #if defined(PERFCTRS)
     92 #include <sys/pmc.h>
     93 #endif
     94 #include <sys/resourcevar.h>
     95 #include <sys/sched.h>
     96 #include <sys/sleepq.h>
     97 #include <sys/lockdebug.h>
     98 
     99 #include <uvm/uvm_extern.h>
    100 
    101 #include <machine/cpu.h>
    102 
    103 int	lbolt;			/* once a second sleep address */
    104 
    105 /*
    106  * The global scheduler state.
    107  */
    108 kmutex_t	sched_mutex;		/* global sched state mutex */
    109 
    110 void	sched_unsleep(struct lwp *);
    111 
    112 syncobj_t sleep_syncobj = {
    113 	SOBJ_SLEEPQ_SORTED,
    114 	sleepq_unsleep,
    115 	sleepq_changepri
    116 };
    117 
    118 syncobj_t sched_syncobj = {
    119 	SOBJ_SLEEPQ_SORTED,
    120 	sched_unsleep,
    121 	sched_changepri
    122 };
    123 
    124 /*
    125  * During autoconfiguration or after a panic, a sleep will simply lower the
    126  * priority briefly to allow interrupts, then return.  The priority to be
    127  * used (safepri) is machine-dependent, thus this value is initialized and
    128  * maintained in the machine-dependent layers.  This priority will typically
    129  * be 0, or the lowest priority that is safe for use on the interrupt stack;
    130  * it can be made higher to block network software interrupts after panics.
    131  */
    132 int	safepri;
    133 
    134 /*
    135  * OBSOLETE INTERFACE
    136  *
    137  * General sleep call.  Suspends the current process until a wakeup is
    138  * performed on the specified identifier.  The process will then be made
    139  * runnable with the specified priority.  Sleeps at most timo/hz seconds (0
    140  * means no timeout).  If pri includes PCATCH flag, signals are checked
    141  * before and after sleeping, else signals are not checked.  Returns 0 if
    142  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
    143  * signal needs to be delivered, ERESTART is returned if the current system
    144  * call should be restarted if possible, and EINTR is returned if the system
    145  * call should be interrupted by the signal (return EINTR).
    146  *
    147  * The interlock is held until we are on a sleep queue. The interlock will
    148  * be locked before returning back to the caller unless the PNORELOCK flag
    149  * is specified, in which case the interlock will always be unlocked upon
    150  * return.
    151  */
    152 int
    153 ltsleep(wchan_t ident, int priority, const char *wmesg, int timo,
    154 	volatile struct simplelock *interlock)
    155 {
    156 	struct lwp *l = curlwp;
    157 	sleepq_t *sq;
    158 	int error, catch;
    159 
    160 	if (sleepq_dontsleep(l)) {
    161 		(void)sleepq_abort(NULL, 0);
    162 		if ((priority & PNORELOCK) != 0)
    163 			simple_unlock(interlock);
    164 		return 0;
    165 	}
    166 
    167 	sq = sleeptab_lookup(&sleeptab, ident);
    168 	sleepq_enter(sq, l);
    169 
    170 	if (interlock != NULL) {
    171 		LOCK_ASSERT(simple_lock_held(interlock));
    172 		simple_unlock(interlock);
    173 	}
    174 
    175 	catch = priority & PCATCH;
    176 	sleepq_block(sq, priority & PRIMASK, ident, wmesg, timo, catch,
    177 	    &sleep_syncobj);
    178 	error = sleepq_unblock(timo, catch);
    179 
    180 	if (interlock != NULL && (priority & PNORELOCK) == 0)
    181 		simple_lock(interlock);
    182 
    183 	return error;
    184 }
    185 
    186 /*
    187  * General sleep call for situations where a wake-up is not expected.
    188  */
    189 int
    190 kpause(const char *wmesg, boolean_t intr, int timo, kmutex_t *mtx)
    191 {
    192 	struct lwp *l = curlwp;
    193 	sleepq_t *sq;
    194 	int error;
    195 
    196 	if (sleepq_dontsleep(l))
    197 		return sleepq_abort(NULL, 0);
    198 
    199 	if (mtx != NULL)
    200 		mutex_exit(mtx);
    201 	sq = sleeptab_lookup(&sleeptab, l);
    202 	sleepq_enter(sq, l);
    203 	sleepq_block(sq, sched_kpri(l), l, wmesg, timo, intr, &sleep_syncobj);
    204 	error = sleepq_unblock(timo, intr);
    205 	if (mtx != NULL)
    206 		mutex_enter(mtx);
    207 
    208 	return error;
    209 }
    210 
    211 /*
    212  * OBSOLETE INTERFACE
    213  *
    214  * Make all processes sleeping on the specified identifier runnable.
    215  */
    216 void
    217 wakeup(wchan_t ident)
    218 {
    219 	sleepq_t *sq;
    220 
    221 	if (cold)
    222 		return;
    223 
    224 	sq = sleeptab_lookup(&sleeptab, ident);
    225 	sleepq_wake(sq, ident, (u_int)-1);
    226 }
    227 
    228 /*
    229  * OBSOLETE INTERFACE
    230  *
    231  * Make the highest priority process first in line on the specified
    232  * identifier runnable.
    233  */
    234 void
    235 wakeup_one(wchan_t ident)
    236 {
    237 	sleepq_t *sq;
    238 
    239 	if (cold)
    240 		return;
    241 
    242 	sq = sleeptab_lookup(&sleeptab, ident);
    243 	sleepq_wake(sq, ident, 1);
    244 }
    245 
    246 
    247 /*
    248  * General yield call.  Puts the current process back on its run queue and
    249  * performs a voluntary context switch.  Should only be called when the
    250  * current process explicitly requests it (eg sched_yield(2) in compat code).
    251  */
    252 void
    253 yield(void)
    254 {
    255 	struct lwp *l = curlwp;
    256 
    257 	KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
    258 	lwp_lock(l);
    259 	if (l->l_stat == LSONPROC) {
    260 		KASSERT(lwp_locked(l, &sched_mutex));
    261 		l->l_priority = l->l_usrpri;
    262 	}
    263 	l->l_nvcsw++;
    264 	mi_switch(l, NULL);
    265 	KERNEL_LOCK(l->l_biglocks, l);
    266 }
    267 
    268 /*
    269  * General preemption call.  Puts the current process back on its run queue
    270  * and performs an involuntary context switch.
    271  */
    272 void
    273 preempt(void)
    274 {
    275 	struct lwp *l = curlwp;
    276 
    277 	KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
    278 	lwp_lock(l);
    279 	if (l->l_stat == LSONPROC) {
    280 		KASSERT(lwp_locked(l, &sched_mutex));
    281 		l->l_priority = l->l_usrpri;
    282 	}
    283 	l->l_nivcsw++;
    284 	(void)mi_switch(l, NULL);
    285 	KERNEL_LOCK(l->l_biglocks, l);
    286 }
    287 
    288 /*
    289  * sched_switch_unlock: update 'curlwp' and release old lwp.
    290  */
    291 
    292 void
    293 sched_switch_unlock(struct lwp *old, struct lwp *new)
    294 {
    295 
    296 	KASSERT(old == NULL || old == curlwp);
    297 
    298 	if (old != NULL) {
    299 		LOCKDEBUG_BARRIER(old->l_mutex, 1);
    300 	} else {
    301 		LOCKDEBUG_BARRIER(NULL, 1);
    302 	}
    303 
    304 	curlwp = new;
    305 	if (old != NULL) {
    306 		lwp_unlock(old);
    307 	}
    308 	spl0();
    309 }
    310 
    311 /*
    312  * Compute the amount of time during which the current lwp was running.
    313  *
    314  * - update l_rtime unless it's an idle lwp.
    315  * - update spc_runtime for the next lwp.
    316  */
    317 
    318 static inline void
    319 updatertime(struct lwp *l, struct schedstate_percpu *spc)
    320 {
    321 	struct timeval tv;
    322 	long s, u;
    323 
    324 	if ((l->l_flag & L_IDLE) != 0) {
    325 		microtime(&spc->spc_runtime);
    326 		return;
    327 	}
    328 
    329 	microtime(&tv);
    330 	u = l->l_rtime.tv_usec + (tv.tv_usec - spc->spc_runtime.tv_usec);
    331 	s = l->l_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
    332 	if (u < 0) {
    333 		u += 1000000;
    334 		s--;
    335 	} else if (u >= 1000000) {
    336 		u -= 1000000;
    337 		s++;
    338 	}
    339 	l->l_rtime.tv_usec = u;
    340 	l->l_rtime.tv_sec = s;
    341 
    342 	spc->spc_runtime = tv;
    343 }
    344 
    345 /*
    346  * The machine independent parts of context switch.  Switch to "new"
    347  * if non-NULL, otherwise let cpu_switch choose the next lwp.
    348  *
    349  * Returns 1 if another process was actually run.
    350  */
    351 int
    352 mi_switch(struct lwp *l, struct lwp *newl)
    353 {
    354 	struct schedstate_percpu *spc;
    355 	int retval, oldspl;
    356 
    357 	LOCK_ASSERT(lwp_locked(l, NULL));
    358 
    359 #ifdef LOCKDEBUG
    360 	spinlock_switchcheck();
    361 	simple_lock_switchcheck();
    362 #endif
    363 #ifdef KSTACK_CHECK_MAGIC
    364 	kstack_check_magic(l);
    365 #endif
    366 
    367 	/*
    368 	 * It's safe to read the per CPU schedstate unlocked here, as all we
    369 	 * are after is the run time and that's guarenteed to have been last
    370 	 * updated by this CPU.
    371 	 */
    372 	KDASSERT(l->l_cpu == curcpu());
    373 	spc = &l->l_cpu->ci_schedstate;
    374 
    375 	/*
    376 	 * XXXSMP If we are using h/w performance counters, save context.
    377 	 */
    378 #if PERFCTRS
    379 	if (PMC_ENABLED(l->l_proc)) {
    380 		pmc_save_context(l->l_proc);
    381 	}
    382 #endif
    383 
    384 	/*
    385 	 * If on the CPU and we have gotten this far, then we must yield.
    386 	 */
    387 	KASSERT(l->l_stat != LSRUN);
    388 	if (l->l_stat == LSONPROC) {
    389 		KASSERT(lwp_locked(l, &sched_mutex));
    390 		l->l_stat = LSRUN;
    391 		if ((l->l_flag & L_IDLE) == 0) {
    392 			sched_enqueue(l);
    393 		}
    394 	}
    395 	uvmexp.swtch++;
    396 
    397 	/*
    398 	 * Process is about to yield the CPU; clear the appropriate
    399 	 * scheduling flags.
    400 	 */
    401 	spc->spc_flags &= ~SPCF_SWITCHCLEAR;
    402 
    403 	LOCKDEBUG_BARRIER(l->l_mutex, 1);
    404 
    405 	/*
    406 	 * Switch to the new LWP if necessary.
    407 	 * When we run again, we'll return back here.
    408 	 */
    409 	oldspl = MUTEX_SPIN_OLDSPL(l->l_cpu);
    410 
    411 	/*
    412 	 * Acquire the sched_mutex if necessary.
    413 	 */
    414 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
    415 	if (l->l_mutex != &sched_mutex) {
    416 		mutex_enter(&sched_mutex);
    417 	}
    418 #endif
    419 
    420 	if (newl == NULL) {
    421 		newl = sched_nextlwp();
    422 	}
    423 	if (newl != NULL) {
    424 		KASSERT(lwp_locked(newl, &sched_mutex));
    425 		sched_dequeue(newl);
    426 	} else {
    427 		newl = l->l_cpu->ci_data.cpu_idlelwp;
    428 		KASSERT(newl != NULL);
    429 	}
    430 	KASSERT(lwp_locked(newl, &sched_mutex));
    431 	newl->l_stat = LSONPROC;
    432 	newl->l_cpu = l->l_cpu;
    433 
    434 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
    435 	if (l->l_mutex != &sched_mutex) {
    436 		mutex_exit(&sched_mutex);
    437 	}
    438 #endif
    439 
    440 	updatertime(l, spc);
    441 	if (l != newl) {
    442 		struct lwp *prevlwp;
    443 
    444 		uvmexp.swtch++;
    445 		pmap_deactivate(l);
    446 		prevlwp = cpu_switchto(l, newl);
    447 		sched_switch_unlock(prevlwp, l);
    448 		pmap_activate(l);
    449 		retval = 1;
    450 	} else {
    451 		sched_switch_unlock(l, l);
    452 		retval = 0;
    453 	}
    454 
    455 	KASSERT(l == curlwp);
    456 	KASSERT(l->l_stat == LSONPROC);
    457 
    458 	/*
    459 	 * XXXSMP If we are using h/w performance counters, restore context.
    460 	 */
    461 #if PERFCTRS
    462 	if (PMC_ENABLED(l->l_proc)) {
    463 		pmc_restore_context(l->l_proc);
    464 	}
    465 #endif
    466 
    467 	/*
    468 	 * We're running again; record our new start time.  We might
    469 	 * be running on a new CPU now, so don't use the cached
    470 	 * schedstate_percpu pointer.
    471 	 */
    472 	KDASSERT(l->l_cpu == curcpu());
    473 
    474 	(void)splsched();
    475 	splx(oldspl);
    476 	return retval;
    477 }
    478 
    479 /*
    480  * Change process state to be runnable, placing it on the run queue if it is
    481  * in memory, and awakening the swapper if it isn't in memory.
    482  *
    483  * Call with the process and LWP locked.  Will return with the LWP unlocked.
    484  */
    485 void
    486 setrunnable(struct lwp *l)
    487 {
    488 	struct proc *p = l->l_proc;
    489 	sigset_t *ss;
    490 
    491 	KASSERT((l->l_flag & L_IDLE) == 0);
    492 	LOCK_ASSERT(mutex_owned(&p->p_smutex));
    493 	LOCK_ASSERT(lwp_locked(l, NULL));
    494 
    495 	switch (l->l_stat) {
    496 	case LSSTOP:
    497 		/*
    498 		 * If we're being traced (possibly because someone attached us
    499 		 * while we were stopped), check for a signal from the debugger.
    500 		 */
    501 		if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xstat != 0) {
    502 			if ((sigprop[p->p_xstat] & SA_TOLWP) != 0)
    503 				ss = &l->l_sigpend.sp_set;
    504 			else
    505 				ss = &p->p_sigpend.sp_set;
    506 			sigaddset(ss, p->p_xstat);
    507 			signotify(l);
    508 		}
    509 		p->p_nrlwps++;
    510 		break;
    511 	case LSSUSPENDED:
    512 		l->l_flag &= ~L_WSUSPEND;
    513 		p->p_nrlwps++;
    514 		break;
    515 	case LSSLEEP:
    516 		KASSERT(l->l_wchan != NULL);
    517 		break;
    518 	default:
    519 		panic("setrunnable: lwp %p state was %d", l, l->l_stat);
    520 	}
    521 
    522 	/*
    523 	 * If the LWP was sleeping interruptably, then it's OK to start it
    524 	 * again.  If not, mark it as still sleeping.
    525 	 */
    526 	if (l->l_wchan != NULL) {
    527 		l->l_stat = LSSLEEP;
    528 		if ((l->l_flag & L_SINTR) != 0)
    529 			lwp_unsleep(l);
    530 		else {
    531 			lwp_unlock(l);
    532 #ifdef DIAGNOSTIC
    533 			panic("setrunnable: !L_SINTR");
    534 #endif
    535 		}
    536 		return;
    537 	}
    538 
    539 	LOCK_ASSERT(lwp_locked(l, &sched_mutex));
    540 
    541 	/*
    542 	 * If the LWP is still on the CPU, mark it as LSONPROC.  It may be
    543 	 * about to call mi_switch(), in which case it will yield.
    544 	 *
    545 	 * XXXSMP Will need to change for preemption.
    546 	 */
    547 #ifdef MULTIPROCESSOR
    548 	if (l->l_cpu->ci_curlwp == l) {
    549 #else
    550 	if (l == curlwp) {
    551 #endif
    552 		l->l_stat = LSONPROC;
    553 		l->l_slptime = 0;
    554 		lwp_unlock(l);
    555 		return;
    556 	}
    557 
    558 	/*
    559 	 * Set the LWP runnable.  If it's swapped out, we need to wake the swapper
    560 	 * to bring it back in.  Otherwise, enter it into a run queue.
    561 	 */
    562 	sched_setrunnable(l);
    563 	l->l_stat = LSRUN;
    564 	l->l_slptime = 0;
    565 
    566 	if (l->l_flag & L_INMEM) {
    567 		sched_enqueue(l);
    568 		resched_cpu(l, l->l_priority);
    569 		lwp_unlock(l);
    570 	} else {
    571 		lwp_unlock(l);
    572 		uvm_kick_scheduler();
    573 	}
    574 }
    575 
    576 /*
    577  * suspendsched:
    578  *
    579  *	Convert all non-L_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED.
    580  */
    581 void
    582 suspendsched(void)
    583 {
    584 #ifdef MULTIPROCESSOR
    585 	CPU_INFO_ITERATOR cii;
    586 	struct cpu_info *ci;
    587 #endif
    588 	struct lwp *l;
    589 	struct proc *p;
    590 
    591 	/*
    592 	 * We do this by process in order not to violate the locking rules.
    593 	 */
    594 	mutex_enter(&proclist_mutex);
    595 	PROCLIST_FOREACH(p, &allproc) {
    596 		mutex_enter(&p->p_smutex);
    597 
    598 		if ((p->p_flag & P_SYSTEM) != 0) {
    599 			mutex_exit(&p->p_smutex);
    600 			continue;
    601 		}
    602 
    603 		p->p_stat = SSTOP;
    604 
    605 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
    606 			if (l == curlwp)
    607 				continue;
    608 
    609 			lwp_lock(l);
    610 
    611 			/*
    612 			 * Set L_WREBOOT so that the LWP will suspend itself
    613 			 * when it tries to return to user mode.  We want to
    614 			 * try and get to get as many LWPs as possible to
    615 			 * the user / kernel boundary, so that they will
    616 			 * release any locks that they hold.
    617 			 */
    618 			l->l_flag |= (L_WREBOOT | L_WSUSPEND);
    619 
    620 			if (l->l_stat == LSSLEEP &&
    621 			    (l->l_flag & L_SINTR) != 0) {
    622 				/* setrunnable() will release the lock. */
    623 				setrunnable(l);
    624 				continue;
    625 			}
    626 
    627 			lwp_unlock(l);
    628 		}
    629 
    630 		mutex_exit(&p->p_smutex);
    631 	}
    632 	mutex_exit(&proclist_mutex);
    633 
    634 	/*
    635 	 * Kick all CPUs to make them preempt any LWPs running in user mode.
    636 	 * They'll trap into the kernel and suspend themselves in userret().
    637 	 */
    638 	sched_lock(0);
    639 #ifdef MULTIPROCESSOR
    640 	for (CPU_INFO_FOREACH(cii, ci))
    641 		cpu_need_resched(ci);
    642 #else
    643 	cpu_need_resched(curcpu());
    644 #endif
    645 	sched_unlock(0);
    646 }
    647 
    648 /*
    649  * sched_kpri:
    650  *
    651  *	Scale a priority level to a kernel priority level, usually
    652  *	for an LWP that is about to sleep.
    653  */
    654 int
    655 sched_kpri(struct lwp *l)
    656 {
    657 	/*
    658 	 * Scale user priorities (127 -> 50) up to kernel priorities
    659 	 * in the range (49 -> 8).  Reserve the top 8 kernel priorities
    660 	 * for high priority kthreads.  Kernel priorities passed in
    661 	 * are left "as is".  XXX This is somewhat arbitrary.
    662 	 */
    663 	static const uint8_t kpri_tab[] = {
    664 		 0,   1,   2,   3,   4,   5,   6,   7,
    665 		 8,   9,  10,  11,  12,  13,  14,  15,
    666 		16,  17,  18,  19,  20,  21,  22,  23,
    667 		24,  25,  26,  27,  28,  29,  30,  31,
    668 		32,  33,  34,  35,  36,  37,  38,  39,
    669 		40,  41,  42,  43,  44,  45,  46,  47,
    670 		48,  49,   8,   8,   9,   9,  10,  10,
    671 		11,  11,  12,  12,  13,  14,  14,  15,
    672 		15,  16,  16,  17,  17,  18,  18,  19,
    673 		20,  20,  21,  21,  22,  22,  23,  23,
    674 		24,  24,  25,  26,  26,  27,  27,  28,
    675 		28,  29,  29,  30,  30,  31,  32,  32,
    676 		33,  33,  34,  34,  35,  35,  36,  36,
    677 		37,  38,  38,  39,  39,  40,  40,  41,
    678 		41,  42,  42,  43,  44,  44,  45,  45,
    679 		46,  46,  47,  47,  48,  48,  49,  49,
    680 	};
    681 
    682 	return kpri_tab[l->l_usrpri];
    683 }
    684 
    685 /*
    686  * sched_unsleep:
    687  *
    688  *	The is called when the LWP has not been awoken normally but instead
    689  *	interrupted: for example, if the sleep timed out.  Because of this,
    690  *	it's not a valid action for running or idle LWPs.
    691  */
    692 void
    693 sched_unsleep(struct lwp *l)
    694 {
    695 
    696 	lwp_unlock(l);
    697 	panic("sched_unsleep");
    698 }
    699 
    700 inline void
    701 resched_cpu(struct lwp *l, u_char pri)
    702 {
    703 	struct cpu_info *ci;
    704 
    705 	/*
    706 	 * XXXSMP
    707 	 * Since l->l_cpu persists across a context switch,
    708 	 * this gives us *very weak* processor affinity, in
    709 	 * that we notify the CPU on which the process last
    710 	 * ran that it should try to switch.
    711 	 *
    712 	 * This does not guarantee that the process will run on
    713 	 * that processor next, because another processor might
    714 	 * grab it the next time it performs a context switch.
    715 	 *
    716 	 * This also does not handle the case where its last
    717 	 * CPU is running a higher-priority process, but every
    718 	 * other CPU is running a lower-priority process.  There
    719 	 * are ways to handle this situation, but they're not
    720 	 * currently very pretty, and we also need to weigh the
    721 	 * cost of moving a process from one CPU to another.
    722 	 *
    723 	 * XXXSMP
    724 	 * There is also the issue of locking the other CPU's
    725 	 * sched state, which we currently do not do.
    726 	 */
    727 	ci = (l->l_cpu != NULL) ? l->l_cpu : curcpu();
    728 	if (pri < ci->ci_schedstate.spc_curpriority)
    729 		cpu_need_resched(ci);
    730 }
    731