Home | History | Annotate | Line # | Download | only in kern
kern_synch.c revision 1.177.2.18
      1 /*	$NetBSD: kern_synch.c,v 1.177.2.18 2007/03/24 00:43:08 rmind Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9  * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and
     10  * Daniel Sieger.
     11  *
     12  * Redistribution and use in source and binary forms, with or without
     13  * modification, are permitted provided that the following conditions
     14  * are met:
     15  * 1. Redistributions of source code must retain the above copyright
     16  *    notice, this list of conditions and the following disclaimer.
     17  * 2. Redistributions in binary form must reproduce the above copyright
     18  *    notice, this list of conditions and the following disclaimer in the
     19  *    documentation and/or other materials provided with the distribution.
     20  * 3. All advertising materials mentioning features or use of this software
     21  *    must display the following acknowledgement:
     22  *	This product includes software developed by the NetBSD
     23  *	Foundation, Inc. and its contributors.
     24  * 4. Neither the name of The NetBSD Foundation nor the names of its
     25  *    contributors may be used to endorse or promote products derived
     26  *    from this software without specific prior written permission.
     27  *
     28  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     29  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     30  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     31  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     32  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     33  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     34  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     35  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     36  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     37  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     38  * POSSIBILITY OF SUCH DAMAGE.
     39  */
     40 
     41 /*-
     42  * Copyright (c) 1982, 1986, 1990, 1991, 1993
     43  *	The Regents of the University of California.  All rights reserved.
     44  * (c) UNIX System Laboratories, Inc.
     45  * All or some portions of this file are derived from material licensed
     46  * to the University of California by American Telephone and Telegraph
     47  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     48  * the permission of UNIX System Laboratories, Inc.
     49  *
     50  * Redistribution and use in source and binary forms, with or without
     51  * modification, are permitted provided that the following conditions
     52  * are met:
     53  * 1. Redistributions of source code must retain the above copyright
     54  *    notice, this list of conditions and the following disclaimer.
     55  * 2. Redistributions in binary form must reproduce the above copyright
     56  *    notice, this list of conditions and the following disclaimer in the
     57  *    documentation and/or other materials provided with the distribution.
     58  * 3. Neither the name of the University nor the names of its contributors
     59  *    may be used to endorse or promote products derived from this software
     60  *    without specific prior written permission.
     61  *
     62  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     63  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     65  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     66  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     67  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     68  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     69  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     70  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     71  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     72  * SUCH DAMAGE.
     73  *
     74  *	@(#)kern_synch.c	8.9 (Berkeley) 5/19/95
     75  */
     76 
     77 #include <sys/cdefs.h>
     78 __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.177.2.18 2007/03/24 00:43:08 rmind Exp $");
     79 
     80 #include "opt_kstack.h"
     81 #include "opt_lockdebug.h"
     82 #include "opt_multiprocessor.h"
     83 #include "opt_perfctrs.h"
     84 
     85 #define	__MUTEX_PRIVATE
     86 
     87 #include <sys/param.h>
     88 #include <sys/systm.h>
     89 #include <sys/proc.h>
     90 #include <sys/kernel.h>
     91 #if defined(PERFCTRS)
     92 #include <sys/pmc.h>
     93 #endif
     94 #include <sys/cpu.h>
     95 #include <sys/resourcevar.h>
     96 #include <sys/sched.h>
     97 #include <sys/syscall_stats.h>
     98 #include <sys/sleepq.h>
     99 #include <sys/lockdebug.h>
    100 
    101 #include <uvm/uvm_extern.h>
    102 
    103 int	lbolt;			/* once a second sleep address */
    104 
    105 static void	sched_unsleep(struct lwp *);
    106 static void	sched_changepri(struct lwp *, pri_t);
    107 static void	sched_lendpri(struct lwp *, pri_t);
    108 
    109 syncobj_t sleep_syncobj = {
    110 	SOBJ_SLEEPQ_SORTED,
    111 	sleepq_unsleep,
    112 	sleepq_changepri,
    113 	sleepq_lendpri,
    114 	syncobj_noowner,
    115 };
    116 
    117 syncobj_t sched_syncobj = {
    118 	SOBJ_SLEEPQ_SORTED,
    119 	sched_unsleep,
    120 	sched_changepri,
    121 	sched_lendpri,
    122 	syncobj_noowner,
    123 };
    124 
    125 /*
    126  * During autoconfiguration or after a panic, a sleep will simply lower the
    127  * priority briefly to allow interrupts, then return.  The priority to be
    128  * used (safepri) is machine-dependent, thus this value is initialized and
    129  * maintained in the machine-dependent layers.  This priority will typically
    130  * be 0, or the lowest priority that is safe for use on the interrupt stack;
    131  * it can be made higher to block network software interrupts after panics.
    132  */
    133 int	safepri;
    134 
    135 /*
    136  * OBSOLETE INTERFACE
    137  *
    138  * General sleep call.  Suspends the current process until a wakeup is
    139  * performed on the specified identifier.  The process will then be made
    140  * runnable with the specified priority.  Sleeps at most timo/hz seconds (0
    141  * means no timeout).  If pri includes PCATCH flag, signals are checked
    142  * before and after sleeping, else signals are not checked.  Returns 0 if
    143  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
    144  * signal needs to be delivered, ERESTART is returned if the current system
    145  * call should be restarted if possible, and EINTR is returned if the system
    146  * call should be interrupted by the signal (return EINTR).
    147  *
    148  * The interlock is held until we are on a sleep queue. The interlock will
    149  * be locked before returning back to the caller unless the PNORELOCK flag
    150  * is specified, in which case the interlock will always be unlocked upon
    151  * return.
    152  */
    153 int
    154 ltsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
    155 	volatile struct simplelock *interlock)
    156 {
    157 	struct lwp *l = curlwp;
    158 	sleepq_t *sq;
    159 	int error, catch;
    160 
    161 	if (sleepq_dontsleep(l)) {
    162 		(void)sleepq_abort(NULL, 0);
    163 		if ((priority & PNORELOCK) != 0)
    164 			simple_unlock(interlock);
    165 		return 0;
    166 	}
    167 
    168 	sq = sleeptab_lookup(&sleeptab, ident);
    169 	sleepq_enter(sq, l);
    170 
    171 	if (interlock != NULL) {
    172 		LOCK_ASSERT(simple_lock_held(interlock));
    173 		simple_unlock(interlock);
    174 	}
    175 
    176 	catch = priority & PCATCH;
    177 	sleepq_block(sq, priority & PRIMASK, ident, wmesg, timo, catch,
    178 	    &sleep_syncobj);
    179 	error = sleepq_unblock(timo, catch);
    180 
    181 	if (interlock != NULL && (priority & PNORELOCK) == 0)
    182 		simple_lock(interlock);
    183 
    184 	return error;
    185 }
    186 
    187 int
    188 mtsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
    189 	kmutex_t *mtx)
    190 {
    191 	struct lwp *l = curlwp;
    192 	sleepq_t *sq;
    193 	int error, catch;
    194 
    195 	if (sleepq_dontsleep(l)) {
    196 		(void)sleepq_abort(mtx, (priority & PNORELOCK) != 0);
    197 		return 0;
    198 	}
    199 
    200 	sq = sleeptab_lookup(&sleeptab, ident);
    201 	sleepq_enter(sq, l);
    202 	mutex_exit(mtx);
    203 
    204 	catch = priority & PCATCH;
    205 	sleepq_block(sq, priority & PRIMASK, ident, wmesg, timo, catch,
    206 	    &sleep_syncobj);
    207 	error = sleepq_unblock(timo, catch);
    208 
    209 	if ((priority & PNORELOCK) == 0)
    210 		mutex_enter(mtx);
    211 
    212 	return error;
    213 }
    214 
    215 /*
    216  * General sleep call for situations where a wake-up is not expected.
    217  */
    218 int
    219 kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx)
    220 {
    221 	struct lwp *l = curlwp;
    222 	sleepq_t *sq;
    223 	int error;
    224 
    225 	if (sleepq_dontsleep(l))
    226 		return sleepq_abort(NULL, 0);
    227 
    228 	if (mtx != NULL)
    229 		mutex_exit(mtx);
    230 	sq = sleeptab_lookup(&sleeptab, l);
    231 	sleepq_enter(sq, l);
    232 	sleepq_block(sq, sched_kpri(l), l, wmesg, timo, intr, &sleep_syncobj);
    233 	error = sleepq_unblock(timo, intr);
    234 	if (mtx != NULL)
    235 		mutex_enter(mtx);
    236 
    237 	return error;
    238 }
    239 
    240 /*
    241  * OBSOLETE INTERFACE
    242  *
    243  * Make all processes sleeping on the specified identifier runnable.
    244  */
    245 void
    246 wakeup(wchan_t ident)
    247 {
    248 	sleepq_t *sq;
    249 
    250 	if (cold)
    251 		return;
    252 
    253 	sq = sleeptab_lookup(&sleeptab, ident);
    254 	sleepq_wake(sq, ident, (u_int)-1);
    255 }
    256 
    257 /*
    258  * OBSOLETE INTERFACE
    259  *
    260  * Make the highest priority process first in line on the specified
    261  * identifier runnable.
    262  */
    263 void
    264 wakeup_one(wchan_t ident)
    265 {
    266 	sleepq_t *sq;
    267 
    268 	if (cold)
    269 		return;
    270 
    271 	sq = sleeptab_lookup(&sleeptab, ident);
    272 	sleepq_wake(sq, ident, 1);
    273 }
    274 
    275 
    276 /*
    277  * General yield call.  Puts the current process back on its run queue and
    278  * performs a voluntary context switch.  Should only be called when the
    279  * current process explicitly requests it (eg sched_yield(2) in compat code).
    280  */
    281 void
    282 yield(void)
    283 {
    284 	struct lwp *l = curlwp;
    285 
    286 	KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
    287 	lwp_lock(l);
    288 	if (l->l_stat == LSONPROC) {
    289 		KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
    290 		l->l_priority = l->l_usrpri;
    291 	}
    292 	l->l_nvcsw++;
    293 	mi_switch(l);
    294 	KERNEL_LOCK(l->l_biglocks, l);
    295 }
    296 
    297 /*
    298  * General preemption call.  Puts the current process back on its run queue
    299  * and performs an involuntary context switch.
    300  */
    301 void
    302 preempt(void)
    303 {
    304 	struct lwp *l = curlwp;
    305 
    306 	KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
    307 	lwp_lock(l);
    308 	if (l->l_stat == LSONPROC) {
    309 		KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
    310 		l->l_priority = l->l_usrpri;
    311 	}
    312 	l->l_nivcsw++;
    313 	(void)mi_switch(l);
    314 	KERNEL_LOCK(l->l_biglocks, l);
    315 }
    316 
    317 /*
    318  * sched_switch_unlock: update 'curlwp' and release old lwp.
    319  */
    320 
    321 void
    322 sched_switch_unlock(struct lwp *old, struct lwp *new)
    323 {
    324 
    325 	KASSERT(old == NULL || old == curlwp);
    326 	KASSERT(new != NULL);
    327 
    328 	if (old != NULL) {
    329 		LOCKDEBUG_BARRIER(old->l_mutex, 1);
    330 		lwp_unlock(old);
    331 	} else {
    332 		LOCKDEBUG_BARRIER(NULL, 1);
    333 	}
    334 	curlwp = new;
    335 	spl0();
    336 }
    337 
    338 /*
    339  * Compute the amount of time during which the current lwp was running.
    340  *
    341  * - update l_rtime unless it's an idle lwp.
    342  * - update spc_runtime for the next lwp.
    343  */
    344 
    345 static inline void
    346 updatertime(struct lwp *l, struct schedstate_percpu *spc)
    347 {
    348 	struct timeval tv;
    349 	long s, u;
    350 
    351 	if ((l->l_flag & LW_IDLE) != 0) {
    352 		microtime(&spc->spc_runtime);
    353 		return;
    354 	}
    355 
    356 	microtime(&tv);
    357 	u = l->l_rtime.tv_usec + (tv.tv_usec - spc->spc_runtime.tv_usec);
    358 	s = l->l_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
    359 	if (u < 0) {
    360 		u += 1000000;
    361 		s--;
    362 	} else if (u >= 1000000) {
    363 		u -= 1000000;
    364 		s++;
    365 	}
    366 	l->l_rtime.tv_usec = u;
    367 	l->l_rtime.tv_sec = s;
    368 
    369 	spc->spc_runtime = tv;
    370 }
    371 
    372 /*
    373  * The machine independent parts of context switch.
    374  *
    375  * Returns 1 if another process was actually run.
    376  */
    377 int
    378 mi_switch(struct lwp *l)
    379 {
    380 	struct schedstate_percpu *spc;
    381 	struct lwp *newl;
    382 	int retval, oldspl;
    383 
    384 	LOCK_ASSERT(lwp_locked(l, NULL));
    385 
    386 #ifdef LOCKDEBUG
    387 	spinlock_switchcheck();
    388 	simple_lock_switchcheck();
    389 #endif
    390 #ifdef KSTACK_CHECK_MAGIC
    391 	kstack_check_magic(l);
    392 #endif
    393 
    394 	/*
    395 	 * It's safe to read the per CPU schedstate unlocked here, as all we
    396 	 * are after is the run time and that's guarenteed to have been last
    397 	 * updated by this CPU.
    398 	 */
    399 	KDASSERT(l->l_cpu == curcpu());
    400 	spc = &l->l_cpu->ci_schedstate;
    401 
    402 	/* Count time spent in current system call */
    403 	SYSCALL_TIME_SLEEP(l);
    404 
    405 	/*
    406 	 * XXXSMP If we are using h/w performance counters, save context.
    407 	 */
    408 #if PERFCTRS
    409 	if (PMC_ENABLED(l->l_proc)) {
    410 		pmc_save_context(l->l_proc);
    411 	}
    412 #endif
    413 
    414 	/*
    415 	 * If on the CPU and we have gotten this far, then we must yield.
    416 	 */
    417 	KASSERT(l->l_stat != LSRUN);
    418 	if (l->l_stat == LSONPROC) {
    419 		KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
    420 		l->l_stat = LSRUN;
    421 		if ((l->l_flag & LW_IDLE) == 0) {
    422 			sched_enqueue(l, true);
    423 		}
    424 	}
    425 
    426 	/*
    427 	 * Process is about to yield the CPU; clear the appropriate
    428 	 * scheduling flags.
    429 	 */
    430 	spc->spc_flags &= ~SPCF_SWITCHCLEAR;
    431 
    432 	LOCKDEBUG_BARRIER(l->l_mutex, 1);
    433 
    434 	/*
    435 	 * Switch to the new LWP if necessary.
    436 	 * When we run again, we'll return back here.
    437 	 */
    438 	oldspl = MUTEX_SPIN_OLDSPL(l->l_cpu);
    439 
    440 	/*
    441 	 * Acquire the spc_mutex if necessary.
    442 	 */
    443 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
    444 	if (l->l_mutex != spc->spc_mutex) {
    445 		mutex_enter(spc->spc_mutex);
    446 	}
    447 #endif
    448 	/*
    449 	 * Let sched_nextlwp() select the LWP to run the CPU next.
    450 	 * If no LWP is runnable, switch to the idle LWP.
    451 	 */
    452 	newl = sched_nextlwp(l);
    453 	if (newl) {
    454 		sched_dequeue(newl);
    455 	} else {
    456 		newl = l->l_cpu->ci_data.cpu_idlelwp;
    457 		KASSERT(newl != NULL);
    458 	}
    459 	KASSERT(lwp_locked(newl, spc->spc_mutex));
    460 	newl->l_stat = LSONPROC;
    461 	newl->l_cpu = l->l_cpu;
    462 	newl->l_flag |= LW_RUNNING;
    463 
    464 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
    465 	if (l->l_mutex != spc->spc_mutex) {
    466 		mutex_exit(spc->spc_mutex);
    467 	}
    468 #endif
    469 
    470 	updatertime(l, spc);
    471 	if (l != newl) {
    472 		struct lwp *prevlwp;
    473 
    474 		uvmexp.swtch++;
    475 		pmap_deactivate(l);
    476 		l->l_flag &= ~LW_RUNNING;
    477 		prevlwp = cpu_switchto(l, newl);
    478 		sched_switch_unlock(prevlwp, l);
    479 		pmap_activate(l);
    480 		retval = 1;
    481 	} else {
    482 		sched_switch_unlock(l, l);
    483 		retval = 0;
    484 	}
    485 
    486 	KASSERT(l == curlwp);
    487 	KASSERT(l->l_stat == LSONPROC);
    488 
    489 	/*
    490 	 * XXXSMP If we are using h/w performance counters, restore context.
    491 	 */
    492 #if PERFCTRS
    493 	if (PMC_ENABLED(l->l_proc)) {
    494 		pmc_restore_context(l->l_proc);
    495 	}
    496 #endif
    497 
    498 	/*
    499 	 * We're running again; record our new start time.  We might
    500 	 * be running on a new CPU now, so don't use the cached
    501 	 * schedstate_percpu pointer.
    502 	 */
    503 	SYSCALL_TIME_WAKEUP(l);
    504 	KDASSERT(l->l_cpu == curcpu());
    505 
    506 	(void)splsched();
    507 	splx(oldspl);
    508 	return retval;
    509 }
    510 
    511 /*
    512  * Change process state to be runnable, placing it on the run queue if it is
    513  * in memory, and awakening the swapper if it isn't in memory.
    514  *
    515  * Call with the process and LWP locked.  Will return with the LWP unlocked.
    516  */
    517 void
    518 setrunnable(struct lwp *l)
    519 {
    520 	struct proc *p = l->l_proc;
    521 	sigset_t *ss;
    522 
    523 	KASSERT((l->l_flag & LW_IDLE) == 0);
    524 	KASSERT(mutex_owned(&p->p_smutex));
    525 	KASSERT(lwp_locked(l, NULL));
    526 
    527 	switch (l->l_stat) {
    528 	case LSSTOP:
    529 		/*
    530 		 * If we're being traced (possibly because someone attached us
    531 		 * while we were stopped), check for a signal from the debugger.
    532 		 */
    533 		if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xstat != 0) {
    534 			if ((sigprop[p->p_xstat] & SA_TOLWP) != 0)
    535 				ss = &l->l_sigpend.sp_set;
    536 			else
    537 				ss = &p->p_sigpend.sp_set;
    538 			sigaddset(ss, p->p_xstat);
    539 			signotify(l);
    540 		}
    541 		p->p_nrlwps++;
    542 		break;
    543 	case LSSUSPENDED:
    544 		l->l_flag &= ~LW_WSUSPEND;
    545 		p->p_nrlwps++;
    546 		break;
    547 	case LSSLEEP:
    548 		KASSERT(l->l_wchan != NULL);
    549 		break;
    550 	default:
    551 		panic("setrunnable: lwp %p state was %d", l, l->l_stat);
    552 	}
    553 
    554 	/*
    555 	 * If the LWP was sleeping interruptably, then it's OK to start it
    556 	 * again.  If not, mark it as still sleeping.
    557 	 */
    558 	if (l->l_wchan != NULL) {
    559 		l->l_stat = LSSLEEP;
    560 		/* lwp_unsleep() will release the lock. */
    561 		lwp_unsleep(l);
    562 		return;
    563 	}
    564 
    565 	LOCK_ASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
    566 
    567 	/*
    568 	 * If the LWP is still on the CPU, mark it as LSONPROC.  It may be
    569 	 * about to call mi_switch(), in which case it will yield.
    570 	 */
    571 	if ((l->l_flag & LW_RUNNING) != 0) {
    572 		l->l_stat = LSONPROC;
    573 		l->l_slptime = 0;
    574 		lwp_unlock(l);
    575 		return;
    576 	}
    577 
    578 	/*
    579 	 * Set the LWP runnable.  If it's swapped out, we need to wake the swapper
    580 	 * to bring it back in.  Otherwise, enter it into a run queue.
    581 	 */
    582 	sched_setrunnable(l);
    583 	l->l_stat = LSRUN;
    584 	l->l_slptime = 0;
    585 
    586 	if (l->l_flag & LW_INMEM) {
    587 		sched_enqueue(l, false);
    588 		resched_cpu(l);
    589 		lwp_unlock(l);
    590 	} else {
    591 		lwp_unlock(l);
    592 		uvm_kick_scheduler();
    593 	}
    594 }
    595 
    596 /*
    597  * suspendsched:
    598  *
    599  *	Convert all non-L_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED.
    600  */
    601 void
    602 suspendsched(void)
    603 {
    604 #ifdef MULTIPROCESSOR
    605 	CPU_INFO_ITERATOR cii;
    606 	struct cpu_info *ci;
    607 #endif
    608 	struct lwp *l;
    609 	struct proc *p;
    610 
    611 	/*
    612 	 * We do this by process in order not to violate the locking rules.
    613 	 */
    614 	mutex_enter(&proclist_mutex);
    615 	PROCLIST_FOREACH(p, &allproc) {
    616 		mutex_enter(&p->p_smutex);
    617 
    618 		if ((p->p_flag & PK_SYSTEM) != 0) {
    619 			mutex_exit(&p->p_smutex);
    620 			continue;
    621 		}
    622 
    623 		p->p_stat = SSTOP;
    624 
    625 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
    626 			if (l == curlwp)
    627 				continue;
    628 
    629 			lwp_lock(l);
    630 
    631 			/*
    632 			 * Set L_WREBOOT so that the LWP will suspend itself
    633 			 * when it tries to return to user mode.  We want to
    634 			 * try and get to get as many LWPs as possible to
    635 			 * the user / kernel boundary, so that they will
    636 			 * release any locks that they hold.
    637 			 */
    638 			l->l_flag |= (LW_WREBOOT | LW_WSUSPEND);
    639 
    640 			if (l->l_stat == LSSLEEP &&
    641 			    (l->l_flag & LW_SINTR) != 0) {
    642 				/* setrunnable() will release the lock. */
    643 				setrunnable(l);
    644 				continue;
    645 			}
    646 
    647 			lwp_unlock(l);
    648 		}
    649 
    650 		mutex_exit(&p->p_smutex);
    651 	}
    652 	mutex_exit(&proclist_mutex);
    653 
    654 	/*
    655 	 * Kick all CPUs to make them preempt any LWPs running in user mode.
    656 	 * They'll trap into the kernel and suspend themselves in userret().
    657 	 */
    658 #ifdef MULTIPROCESSOR
    659 	for (CPU_INFO_FOREACH(cii, ci))
    660 		cpu_need_resched(ci, 0);
    661 #else
    662 	cpu_need_resched(curcpu(), 0);
    663 #endif
    664 }
    665 
    666 /*
    667  * sched_kpri:
    668  *
    669  *	Scale a priority level to a kernel priority level, usually
    670  *	for an LWP that is about to sleep.
    671  */
    672 pri_t
    673 sched_kpri(struct lwp *l)
    674 {
    675 	/*
    676 	 * Scale user priorities (127 -> 50) up to kernel priorities
    677 	 * in the range (49 -> 8).  Reserve the top 8 kernel priorities
    678 	 * for high priority kthreads.  Kernel priorities passed in
    679 	 * are left "as is".  XXX This is somewhat arbitrary.
    680 	 */
    681 	static const uint8_t kpri_tab[] = {
    682 		 0,   1,   2,   3,   4,   5,   6,   7,
    683 		 8,   9,  10,  11,  12,  13,  14,  15,
    684 		16,  17,  18,  19,  20,  21,  22,  23,
    685 		24,  25,  26,  27,  28,  29,  30,  31,
    686 		32,  33,  34,  35,  36,  37,  38,  39,
    687 		40,  41,  42,  43,  44,  45,  46,  47,
    688 		48,  49,   8,   8,   9,   9,  10,  10,
    689 		11,  11,  12,  12,  13,  14,  14,  15,
    690 		15,  16,  16,  17,  17,  18,  18,  19,
    691 		20,  20,  21,  21,  22,  22,  23,  23,
    692 		24,  24,  25,  26,  26,  27,  27,  28,
    693 		28,  29,  29,  30,  30,  31,  32,  32,
    694 		33,  33,  34,  34,  35,  35,  36,  36,
    695 		37,  38,  38,  39,  39,  40,  40,  41,
    696 		41,  42,  42,  43,  44,  44,  45,  45,
    697 		46,  46,  47,  47,  48,  48,  49,  49,
    698 	};
    699 
    700 	return (pri_t)kpri_tab[l->l_usrpri];
    701 }
    702 
    703 /*
    704  * sched_unsleep:
    705  *
    706  *	The is called when the LWP has not been awoken normally but instead
    707  *	interrupted: for example, if the sleep timed out.  Because of this,
    708  *	it's not a valid action for running or idle LWPs.
    709  */
    710 static void
    711 sched_unsleep(struct lwp *l)
    712 {
    713 
    714 	lwp_unlock(l);
    715 	panic("sched_unsleep");
    716 }
    717 
    718 inline void
    719 resched_cpu(struct lwp *l)
    720 {
    721 	struct cpu_info *ci;
    722 	const pri_t pri = lwp_eprio(l);
    723 
    724 	/*
    725 	 * XXXSMP
    726 	 * Since l->l_cpu persists across a context switch,
    727 	 * this gives us *very weak* processor affinity, in
    728 	 * that we notify the CPU on which the process last
    729 	 * ran that it should try to switch.
    730 	 *
    731 	 * This does not guarantee that the process will run on
    732 	 * that processor next, because another processor might
    733 	 * grab it the next time it performs a context switch.
    734 	 *
    735 	 * This also does not handle the case where its last
    736 	 * CPU is running a higher-priority process, but every
    737 	 * other CPU is running a lower-priority process.  There
    738 	 * are ways to handle this situation, but they're not
    739 	 * currently very pretty, and we also need to weigh the
    740 	 * cost of moving a process from one CPU to another.
    741 	 */
    742 	ci = (l->l_cpu != NULL) ? l->l_cpu : curcpu();
    743 	if (pri < ci->ci_schedstate.spc_curpriority)
    744 		cpu_need_resched(ci, 0);
    745 }
    746 
    747 static void
    748 sched_changepri(struct lwp *l, pri_t pri)
    749 {
    750 
    751 	LOCK_ASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
    752 
    753 	l->l_usrpri = pri;
    754 	if (l->l_priority < PUSER)
    755 		return;
    756 
    757 	if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
    758 		l->l_priority = pri;
    759 		return;
    760 	}
    761 
    762 	sched_dequeue(l);
    763 	l->l_priority = pri;
    764 	sched_enqueue(l, false);
    765 	resched_cpu(l);
    766 }
    767 
    768 static void
    769 sched_lendpri(struct lwp *l, pri_t pri)
    770 {
    771 
    772 	LOCK_ASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
    773 
    774 	if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
    775 		l->l_inheritedprio = pri;
    776 		return;
    777 	}
    778 
    779 	sched_dequeue(l);
    780 	l->l_inheritedprio = pri;
    781 	sched_enqueue(l, false);
    782 	resched_cpu(l);
    783 }
    784 
    785 struct lwp *
    786 syncobj_noowner(wchan_t wchan)
    787 {
    788 
    789 	return NULL;
    790 }
    791