Home | History | Annotate | Line # | Download | only in kern
kern_synch.c revision 1.177.2.5
      1 /*	$NetBSD: kern_synch.c,v 1.177.2.5 2007/02/20 21:48:45 rmind Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9  * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and
     10  * Daniel Sieger.
     11  *
     12  * Redistribution and use in source and binary forms, with or without
     13  * modification, are permitted provided that the following conditions
     14  * are met:
     15  * 1. Redistributions of source code must retain the above copyright
     16  *    notice, this list of conditions and the following disclaimer.
     17  * 2. Redistributions in binary form must reproduce the above copyright
     18  *    notice, this list of conditions and the following disclaimer in the
     19  *    documentation and/or other materials provided with the distribution.
     20  * 3. All advertising materials mentioning features or use of this software
     21  *    must display the following acknowledgement:
     22  *	This product includes software developed by the NetBSD
     23  *	Foundation, Inc. and its contributors.
     24  * 4. Neither the name of The NetBSD Foundation nor the names of its
     25  *    contributors may be used to endorse or promote products derived
     26  *    from this software without specific prior written permission.
     27  *
     28  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     29  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     30  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     31  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     32  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     33  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     34  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     35  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     36  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     37  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     38  * POSSIBILITY OF SUCH DAMAGE.
     39  */
     40 
     41 /*-
     42  * Copyright (c) 1982, 1986, 1990, 1991, 1993
     43  *	The Regents of the University of California.  All rights reserved.
     44  * (c) UNIX System Laboratories, Inc.
     45  * All or some portions of this file are derived from material licensed
     46  * to the University of California by American Telephone and Telegraph
     47  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     48  * the permission of UNIX System Laboratories, Inc.
     49  *
     50  * Redistribution and use in source and binary forms, with or without
     51  * modification, are permitted provided that the following conditions
     52  * are met:
     53  * 1. Redistributions of source code must retain the above copyright
     54  *    notice, this list of conditions and the following disclaimer.
     55  * 2. Redistributions in binary form must reproduce the above copyright
     56  *    notice, this list of conditions and the following disclaimer in the
     57  *    documentation and/or other materials provided with the distribution.
     58  * 3. Neither the name of the University nor the names of its contributors
     59  *    may be used to endorse or promote products derived from this software
     60  *    without specific prior written permission.
     61  *
     62  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     63  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     65  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     66  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     67  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     68  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     69  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     70  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     71  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     72  * SUCH DAMAGE.
     73  *
     74  *	@(#)kern_synch.c	8.9 (Berkeley) 5/19/95
     75  */
     76 
     77 #include <sys/cdefs.h>
     78 __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.177.2.5 2007/02/20 21:48:45 rmind Exp $");
     79 
     80 #include "opt_ddb.h"
     81 #include "opt_kstack.h"
     82 #include "opt_lockdebug.h"
     83 #include "opt_multiprocessor.h"
     84 #include "opt_perfctrs.h"
     85 
     86 #define	__MUTEX_PRIVATE
     87 
     88 #include <sys/param.h>
     89 #include <sys/systm.h>
     90 #include <sys/callout.h>
     91 #include <sys/proc.h>
     92 #include <sys/kernel.h>
     93 #include <sys/buf.h>
     94 #if defined(PERFCTRS)
     95 #include <sys/pmc.h>
     96 #endif
     97 #include <sys/signalvar.h>
     98 #include <sys/resourcevar.h>
     99 #include <sys/sched.h>
    100 #include <sys/kauth.h>
    101 #include <sys/sleepq.h>
    102 #include <sys/lockdebug.h>
    103 
    104 #include <uvm/uvm_extern.h>
    105 
    106 #include <machine/cpu.h>
    107 
    108 int	lbolt;			/* once a second sleep address */
    109 
    110 /*
    111  * The global scheduler state.
    112  */
    113 kmutex_t	sched_mutex;		/* global sched state mutex */
    114 
    115 void	sched_unsleep(struct lwp *);
    116 
    117 syncobj_t sleep_syncobj = {
    118 	SOBJ_SLEEPQ_SORTED,
    119 	sleepq_unsleep,
    120 	sleepq_changepri
    121 };
    122 
    123 syncobj_t sched_syncobj = {
    124 	SOBJ_SLEEPQ_SORTED,
    125 	sched_unsleep,
    126 	sched_changepri
    127 };
    128 
    129 /*
    130  * During autoconfiguration or after a panic, a sleep will simply lower the
    131  * priority briefly to allow interrupts, then return.  The priority to be
    132  * used (safepri) is machine-dependent, thus this value is initialized and
    133  * maintained in the machine-dependent layers.  This priority will typically
    134  * be 0, or the lowest priority that is safe for use on the interrupt stack;
    135  * it can be made higher to block network software interrupts after panics.
    136  */
    137 int	safepri;
    138 
    139 /*
    140  * OBSOLETE INTERFACE
    141  *
    142  * General sleep call.  Suspends the current process until a wakeup is
    143  * performed on the specified identifier.  The process will then be made
    144  * runnable with the specified priority.  Sleeps at most timo/hz seconds (0
    145  * means no timeout).  If pri includes PCATCH flag, signals are checked
    146  * before and after sleeping, else signals are not checked.  Returns 0 if
    147  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
    148  * signal needs to be delivered, ERESTART is returned if the current system
    149  * call should be restarted if possible, and EINTR is returned if the system
    150  * call should be interrupted by the signal (return EINTR).
    151  *
    152  * The interlock is held until we are on a sleep queue. The interlock will
    153  * be locked before returning back to the caller unless the PNORELOCK flag
    154  * is specified, in which case the interlock will always be unlocked upon
    155  * return.
    156  */
    157 int
    158 ltsleep(wchan_t ident, int priority, const char *wmesg, int timo,
    159 	volatile struct simplelock *interlock)
    160 {
    161 	struct lwp *l = curlwp;
    162 	sleepq_t *sq;
    163 	int error, catch;
    164 
    165 	if (sleepq_dontsleep(l)) {
    166 		(void)sleepq_abort(NULL, 0);
    167 		if ((priority & PNORELOCK) != 0)
    168 			simple_unlock(interlock);
    169 		return 0;
    170 	}
    171 
    172 	sq = sleeptab_lookup(&sleeptab, ident);
    173 	sleepq_enter(sq, l);
    174 
    175 	if (interlock != NULL) {
    176 		LOCK_ASSERT(simple_lock_held(interlock));
    177 		simple_unlock(interlock);
    178 	}
    179 
    180 	catch = priority & PCATCH;
    181 	sleepq_block(sq, priority & PRIMASK, ident, wmesg, timo, catch,
    182 	    &sleep_syncobj);
    183 	error = sleepq_unblock(timo, catch);
    184 
    185 	if (interlock != NULL && (priority & PNORELOCK) == 0)
    186 		simple_lock(interlock);
    187 
    188 	return error;
    189 }
    190 
    191 /*
    192  * General sleep call for situations where a wake-up is not expected.
    193  */
    194 int
    195 kpause(const char *wmesg, boolean_t intr, int timo, kmutex_t *mtx)
    196 {
    197 	struct lwp *l = curlwp;
    198 	sleepq_t *sq;
    199 	int error;
    200 
    201 	if (sleepq_dontsleep(l))
    202 		return sleepq_abort(NULL, 0);
    203 
    204 	if (mtx != NULL)
    205 		mutex_exit(mtx);
    206 	sq = sleeptab_lookup(&sleeptab, l);
    207 	sleepq_enter(sq, l);
    208 	sleepq_block(sq, sched_kpri(l), l, wmesg, timo, intr, &sleep_syncobj);
    209 	error = sleepq_unblock(timo, intr);
    210 	if (mtx != NULL)
    211 		mutex_enter(mtx);
    212 
    213 	return error;
    214 }
    215 
    216 /*
    217  * OBSOLETE INTERFACE
    218  *
    219  * Make all processes sleeping on the specified identifier runnable.
    220  */
    221 void
    222 wakeup(wchan_t ident)
    223 {
    224 	sleepq_t *sq;
    225 
    226 	if (cold)
    227 		return;
    228 
    229 	sq = sleeptab_lookup(&sleeptab, ident);
    230 	sleepq_wake(sq, ident, (u_int)-1);
    231 }
    232 
    233 /*
    234  * OBSOLETE INTERFACE
    235  *
    236  * Make the highest priority process first in line on the specified
    237  * identifier runnable.
    238  */
    239 void
    240 wakeup_one(wchan_t ident)
    241 {
    242 	sleepq_t *sq;
    243 
    244 	if (cold)
    245 		return;
    246 
    247 	sq = sleeptab_lookup(&sleeptab, ident);
    248 	sleepq_wake(sq, ident, 1);
    249 }
    250 
    251 
    252 /*
    253  * General yield call.  Puts the current process back on its run queue and
    254  * performs a voluntary context switch.  Should only be called when the
    255  * current process explicitly requests it (eg sched_yield(2) in compat code).
    256  */
    257 void
    258 yield(void)
    259 {
    260 	struct lwp *l = curlwp;
    261 
    262 	KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
    263 	lwp_lock(l);
    264 	if (l->l_stat == LSONPROC) {
    265 		KASSERT(lwp_locked(l, &sched_mutex));
    266 		l->l_priority = l->l_usrpri;
    267 	}
    268 	l->l_nvcsw++;
    269 	mi_switch(l, NULL);
    270 	KERNEL_LOCK(l->l_biglocks, l);
    271 }
    272 
    273 /*
    274  * General preemption call.  Puts the current process back on its run queue
    275  * and performs an involuntary context switch.
    276  */
    277 void
    278 preempt(void)
    279 {
    280 	struct lwp *l = curlwp;
    281 
    282 	KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
    283 	lwp_lock(l);
    284 	if (l->l_stat == LSONPROC) {
    285 		KASSERT(lwp_locked(l, &sched_mutex));
    286 		l->l_priority = l->l_usrpri;
    287 	}
    288 	l->l_nivcsw++;
    289 	(void)mi_switch(l, NULL);
    290 	KERNEL_LOCK(l->l_biglocks, l);
    291 }
    292 
    293 /*
    294  * sched_switch_unlock: update 'curlwp' and release old lwp.
    295  */
    296 
    297 void
    298 sched_switch_unlock(struct lwp *old, struct lwp *new)
    299 {
    300 
    301 	KASSERT(old == NULL || old == curlwp);
    302 
    303 	if (old != NULL) {
    304 		LOCKDEBUG_BARRIER(old->l_mutex, 1);
    305 	} else {
    306 		LOCKDEBUG_BARRIER(NULL, 1);
    307 	}
    308 
    309 	curlwp = new;
    310 	if (old != NULL) {
    311 		lwp_unlock(old);
    312 	}
    313 	spl0();
    314 }
    315 
    316 /*
    317  * Compute the amount of time during which the current lwp was running.
    318  *
    319  * - update l_rtime unless it's an idle lwp.
    320  * - update spc_runtime for the next lwp.
    321  */
    322 
    323 static inline void
    324 updatertime(struct lwp *l, struct schedstate_percpu *spc)
    325 {
    326 	struct timeval tv;
    327 	long s, u;
    328 
    329 	if ((l->l_flag & L_IDLE) != 0) {
    330 		microtime(&spc->spc_runtime);
    331 		return;
    332 	}
    333 
    334 	microtime(&tv);
    335 	u = l->l_rtime.tv_usec + (tv.tv_usec - spc->spc_runtime.tv_usec);
    336 	s = l->l_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
    337 	if (u < 0) {
    338 		u += 1000000;
    339 		s--;
    340 	} else if (u >= 1000000) {
    341 		u -= 1000000;
    342 		s++;
    343 	}
    344 	l->l_rtime.tv_usec = u;
    345 	l->l_rtime.tv_sec = s;
    346 
    347 	spc->spc_runtime = tv;
    348 }
    349 
    350 /*
    351  * The machine independent parts of context switch.  Switch to "new"
    352  * if non-NULL, otherwise let cpu_switch choose the next lwp.
    353  *
    354  * Returns 1 if another process was actually run.
    355  */
    356 int
    357 mi_switch(struct lwp *l, struct lwp *newl)
    358 {
    359 	struct schedstate_percpu *spc;
    360 	int retval, oldspl;
    361 
    362 	LOCK_ASSERT(lwp_locked(l, NULL));
    363 
    364 #ifdef LOCKDEBUG
    365 	spinlock_switchcheck();
    366 	simple_lock_switchcheck();
    367 #endif
    368 #ifdef KSTACK_CHECK_MAGIC
    369 	kstack_check_magic(l);
    370 #endif
    371 
    372 	/*
    373 	 * It's safe to read the per CPU schedstate unlocked here, as all we
    374 	 * are after is the run time and that's guarenteed to have been last
    375 	 * updated by this CPU.
    376 	 */
    377 	KDASSERT(l->l_cpu == curcpu());
    378 	spc = &l->l_cpu->ci_schedstate;
    379 
    380 	/*
    381 	 * XXXSMP If we are using h/w performance counters, save context.
    382 	 */
    383 #if PERFCTRS
    384 	if (PMC_ENABLED(l->l_proc)) {
    385 		pmc_save_context(l->l_proc);
    386 	}
    387 #endif
    388 
    389 	/*
    390 	 * If on the CPU and we have gotten this far, then we must yield.
    391 	 */
    392 	KASSERT(l->l_stat != LSRUN);
    393 	if (l->l_stat == LSONPROC) {
    394 		KASSERT(lwp_locked(l, &sched_mutex));
    395 		l->l_stat = LSRUN;
    396 		if ((l->l_flag & L_IDLE) == 0) {
    397 			sched_enqueue(l);
    398 		}
    399 	}
    400 	uvmexp.swtch++;
    401 
    402 	/*
    403 	 * Process is about to yield the CPU; clear the appropriate
    404 	 * scheduling flags.
    405 	 */
    406 	spc->spc_flags &= ~SPCF_SWITCHCLEAR;
    407 
    408 	LOCKDEBUG_BARRIER(l->l_mutex, 1);
    409 
    410 	/*
    411 	 * Switch to the new LWP if necessary.
    412 	 * When we run again, we'll return back here.
    413 	 */
    414 	oldspl = MUTEX_SPIN_OLDSPL(l->l_cpu);
    415 
    416 	/*
    417 	 * Acquire the sched_mutex if necessary.
    418 	 */
    419 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
    420 	if (l->l_mutex != &sched_mutex) {
    421 		mutex_enter(&sched_mutex);
    422 	}
    423 #endif
    424 
    425 	if (newl == NULL) {
    426 		newl = sched_nextlwp();
    427 	}
    428 	if (newl != NULL) {
    429 		KASSERT(lwp_locked(newl, &sched_mutex));
    430 		sched_dequeue(newl);
    431 	} else {
    432 		newl = l->l_cpu->ci_data.cpu_idlelwp;
    433 		KASSERT(newl != NULL);
    434 	}
    435 
    436 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
    437 	if (l->l_mutex != &sched_mutex) {
    438 		mutex_exit(&sched_mutex);
    439 	}
    440 #endif
    441 
    442 	newl->l_stat = LSONPROC;
    443 	updatertime(l, spc);
    444 	if (l != newl) {
    445 		struct lwp *prevlwp;
    446 
    447 		uvmexp.swtch++;
    448 		pmap_deactivate(l);
    449 		newl->l_cpu = l->l_cpu;
    450 		prevlwp = cpu_switchto(l, newl);
    451 		sched_switch_unlock(prevlwp, l);
    452 		pmap_activate(l);
    453 		retval = 1;
    454 	} else {
    455 		sched_switch_unlock(l, l);
    456 		retval = 0;
    457 	}
    458 
    459 	KASSERT(l == curlwp);
    460 	KASSERT(l->l_stat == LSONPROC);
    461 
    462 	/*
    463 	 * XXXSMP If we are using h/w performance counters, restore context.
    464 	 */
    465 #if PERFCTRS
    466 	if (PMC_ENABLED(l->l_proc)) {
    467 		pmc_restore_context(l->l_proc);
    468 	}
    469 #endif
    470 
    471 	/*
    472 	 * We're running again; record our new start time.  We might
    473 	 * be running on a new CPU now, so don't use the cached
    474 	 * schedstate_percpu pointer.
    475 	 */
    476 	KDASSERT(l->l_cpu == curcpu());
    477 
    478 	(void)splsched();
    479 	splx(oldspl);
    480 	return retval;
    481 }
    482 
    483 /*
    484  * Change process state to be runnable, placing it on the run queue if it is
    485  * in memory, and awakening the swapper if it isn't in memory.
    486  *
    487  * Call with the process and LWP locked.  Will return with the LWP unlocked.
    488  */
    489 void
    490 setrunnable(struct lwp *l)
    491 {
    492 	struct proc *p = l->l_proc;
    493 	sigset_t *ss;
    494 
    495 	KASSERT((l->l_flag & L_IDLE) == 0);
    496 	LOCK_ASSERT(mutex_owned(&p->p_smutex));
    497 	LOCK_ASSERT(lwp_locked(l, NULL));
    498 
    499 	switch (l->l_stat) {
    500 	case LSSTOP:
    501 		/*
    502 		 * If we're being traced (possibly because someone attached us
    503 		 * while we were stopped), check for a signal from the debugger.
    504 		 */
    505 		if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xstat != 0) {
    506 			if ((sigprop[p->p_xstat] & SA_TOLWP) != 0)
    507 				ss = &l->l_sigpend.sp_set;
    508 			else
    509 				ss = &p->p_sigpend.sp_set;
    510 			sigaddset(ss, p->p_xstat);
    511 			signotify(l);
    512 		}
    513 		p->p_nrlwps++;
    514 		break;
    515 	case LSSUSPENDED:
    516 		l->l_flag &= ~L_WSUSPEND;
    517 		p->p_nrlwps++;
    518 		break;
    519 	case LSSLEEP:
    520 		KASSERT(l->l_wchan != NULL);
    521 		break;
    522 	default:
    523 		panic("setrunnable: lwp %p state was %d", l, l->l_stat);
    524 	}
    525 
    526 	/*
    527 	 * If the LWP was sleeping interruptably, then it's OK to start it
    528 	 * again.  If not, mark it as still sleeping.
    529 	 */
    530 	if (l->l_wchan != NULL) {
    531 		l->l_stat = LSSLEEP;
    532 		if ((l->l_flag & L_SINTR) != 0)
    533 			lwp_unsleep(l);
    534 		else {
    535 			lwp_unlock(l);
    536 #ifdef DIAGNOSTIC
    537 			panic("setrunnable: !L_SINTR");
    538 #endif
    539 		}
    540 		return;
    541 	}
    542 
    543 	LOCK_ASSERT(lwp_locked(l, &sched_mutex));
    544 
    545 	/*
    546 	 * If the LWP is still on the CPU, mark it as LSONPROC.  It may be
    547 	 * about to call mi_switch(), in which case it will yield.
    548 	 *
    549 	 * XXXSMP Will need to change for preemption.
    550 	 */
    551 #ifdef MULTIPROCESSOR
    552 	if (l->l_cpu->ci_curlwp == l) {
    553 #else
    554 	if (l == curlwp) {
    555 #endif
    556 		l->l_stat = LSONPROC;
    557 		l->l_slptime = 0;
    558 		lwp_unlock(l);
    559 		return;
    560 	}
    561 
    562 	/*
    563 	 * Set the LWP runnable.  If it's swapped out, we need to wake the swapper
    564 	 * to bring it back in.  Otherwise, enter it into a run queue.
    565 	 */
    566 	sched_setrunnable(l);
    567 	l->l_stat = LSRUN;
    568 	l->l_slptime = 0;
    569 
    570 	if (l->l_flag & L_INMEM) {
    571 		sched_enqueue(l);
    572 		resched_cpu(l, l->l_priority);
    573 		lwp_unlock(l);
    574 	} else {
    575 		lwp_unlock(l);
    576 		uvm_kick_scheduler();
    577 	}
    578 }
    579 
    580 /*
    581  * suspendsched:
    582  *
    583  *	Convert all non-L_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED.
    584  */
    585 void
    586 suspendsched(void)
    587 {
    588 #ifdef MULTIPROCESSOR
    589 	CPU_INFO_ITERATOR cii;
    590 	struct cpu_info *ci;
    591 #endif
    592 	struct lwp *l;
    593 	struct proc *p;
    594 
    595 	/*
    596 	 * We do this by process in order not to violate the locking rules.
    597 	 */
    598 	mutex_enter(&proclist_mutex);
    599 	PROCLIST_FOREACH(p, &allproc) {
    600 		mutex_enter(&p->p_smutex);
    601 
    602 		if ((p->p_flag & P_SYSTEM) != 0) {
    603 			mutex_exit(&p->p_smutex);
    604 			continue;
    605 		}
    606 
    607 		p->p_stat = SSTOP;
    608 
    609 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
    610 			if (l == curlwp)
    611 				continue;
    612 
    613 			lwp_lock(l);
    614 
    615 			/*
    616 			 * Set L_WREBOOT so that the LWP will suspend itself
    617 			 * when it tries to return to user mode.  We want to
    618 			 * try and get to get as many LWPs as possible to
    619 			 * the user / kernel boundary, so that they will
    620 			 * release any locks that they hold.
    621 			 */
    622 			l->l_flag |= (L_WREBOOT | L_WSUSPEND);
    623 
    624 			if (l->l_stat == LSSLEEP &&
    625 			    (l->l_flag & L_SINTR) != 0) {
    626 				/* setrunnable() will release the lock. */
    627 				setrunnable(l);
    628 				continue;
    629 			}
    630 
    631 			lwp_unlock(l);
    632 		}
    633 
    634 		mutex_exit(&p->p_smutex);
    635 	}
    636 	mutex_exit(&proclist_mutex);
    637 
    638 	/*
    639 	 * Kick all CPUs to make them preempt any LWPs running in user mode.
    640 	 * They'll trap into the kernel and suspend themselves in userret().
    641 	 */
    642 	sched_lock(0);
    643 #ifdef MULTIPROCESSOR
    644 	for (CPU_INFO_FOREACH(cii, ci))
    645 		cpu_need_resched(ci);
    646 #else
    647 	cpu_need_resched(curcpu());
    648 #endif
    649 	sched_unlock(0);
    650 }
    651 
    652 /*
    653  * sched_kpri:
    654  *
    655  *	Scale a priority level to a kernel priority level, usually
    656  *	for an LWP that is about to sleep.
    657  */
    658 int
    659 sched_kpri(struct lwp *l)
    660 {
    661 	/*
    662 	 * Scale user priorities (127 -> 50) up to kernel priorities
    663 	 * in the range (49 -> 8).  Reserve the top 8 kernel priorities
    664 	 * for high priority kthreads.  Kernel priorities passed in
    665 	 * are left "as is".  XXX This is somewhat arbitrary.
    666 	 */
    667 	static const uint8_t kpri_tab[] = {
    668 		 0,   1,   2,   3,   4,   5,   6,   7,
    669 		 8,   9,  10,  11,  12,  13,  14,  15,
    670 		16,  17,  18,  19,  20,  21,  22,  23,
    671 		24,  25,  26,  27,  28,  29,  30,  31,
    672 		32,  33,  34,  35,  36,  37,  38,  39,
    673 		40,  41,  42,  43,  44,  45,  46,  47,
    674 		48,  49,   8,   8,   9,   9,  10,  10,
    675 		11,  11,  12,  12,  13,  14,  14,  15,
    676 		15,  16,  16,  17,  17,  18,  18,  19,
    677 		20,  20,  21,  21,  22,  22,  23,  23,
    678 		24,  24,  25,  26,  26,  27,  27,  28,
    679 		28,  29,  29,  30,  30,  31,  32,  32,
    680 		33,  33,  34,  34,  35,  35,  36,  36,
    681 		37,  38,  38,  39,  39,  40,  40,  41,
    682 		41,  42,  42,  43,  44,  44,  45,  45,
    683 		46,  46,  47,  47,  48,  48,  49,  49,
    684 	};
    685 
    686 	return kpri_tab[l->l_usrpri];
    687 }
    688 
    689 /*
    690  * sched_unsleep:
    691  *
    692  *	The is called when the LWP has not been awoken normally but instead
    693  *	interrupted: for example, if the sleep timed out.  Because of this,
    694  *	it's not a valid action for running or idle LWPs.
    695  */
    696 void
    697 sched_unsleep(struct lwp *l)
    698 {
    699 
    700 	lwp_unlock(l);
    701 	panic("sched_unsleep");
    702 }
    703 
    704 inline void
    705 resched_cpu(struct lwp *l, u_char pri)
    706 {
    707 	struct cpu_info *ci;
    708 
    709 	/*
    710 	 * XXXSMP
    711 	 * Since l->l_cpu persists across a context switch,
    712 	 * this gives us *very weak* processor affinity, in
    713 	 * that we notify the CPU on which the process last
    714 	 * ran that it should try to switch.
    715 	 *
    716 	 * This does not guarantee that the process will run on
    717 	 * that processor next, because another processor might
    718 	 * grab it the next time it performs a context switch.
    719 	 *
    720 	 * This also does not handle the case where its last
    721 	 * CPU is running a higher-priority process, but every
    722 	 * other CPU is running a lower-priority process.  There
    723 	 * are ways to handle this situation, but they're not
    724 	 * currently very pretty, and we also need to weigh the
    725 	 * cost of moving a process from one CPU to another.
    726 	 *
    727 	 * XXXSMP
    728 	 * There is also the issue of locking the other CPU's
    729 	 * sched state, which we currently do not do.
    730 	 */
    731 	ci = (l->l_cpu != NULL) ? l->l_cpu : curcpu();
    732 	if (pri < ci->ci_schedstate.spc_curpriority)
    733 		cpu_need_resched(ci);
    734 }
    735