sys/kern/kern_synch.c

1.149.2.3      yamt /*	$NetBSD: kern_synch.c,v 1.149.2.3 2007/02/26 09:11:11 yamt Exp $	*/
     1.63   thorpej
     1.63   thorpej /*-
1.149.2.3      yamt  * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc.
     1.63   thorpej  * All rights reserved.
     1.63   thorpej  *
     1.63   thorpej  * This code is derived from software contributed to The NetBSD Foundation
     1.63   thorpej  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
1.149.2.3      yamt  * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
     1.63   thorpej  *
     1.63   thorpej  * Redistribution and use in source and binary forms, with or without
     1.63   thorpej  * modification, are permitted provided that the following conditions
     1.63   thorpej  * are met:
     1.63   thorpej  * 1. Redistributions of source code must retain the above copyright
     1.63   thorpej  *    notice, this list of conditions and the following disclaimer.
     1.63   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     1.63   thorpej  *    notice, this list of conditions and the following disclaimer in the
     1.63   thorpej  *    documentation and/or other materials provided with the distribution.
     1.63   thorpej  * 3. All advertising materials mentioning features or use of this software
     1.63   thorpej  *    must display the following acknowledgement:
     1.63   thorpej  *	This product includes software developed by the NetBSD
     1.63   thorpej  *	Foundation, Inc. and its contributors.
     1.63   thorpej  * 4. Neither the name of The NetBSD Foundation nor the names of its
     1.63   thorpej  *    contributors may be used to endorse or promote products derived
     1.63   thorpej  *    from this software without specific prior written permission.
     1.63   thorpej  *
     1.63   thorpej  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     1.63   thorpej  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     1.63   thorpej  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     1.63   thorpej  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     1.63   thorpej  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     1.63   thorpej  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     1.63   thorpej  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     1.63   thorpej  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     1.63   thorpej  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     1.63   thorpej  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     1.63   thorpej  * POSSIBILITY OF SUCH DAMAGE.
     1.63   thorpej  */
     1.26       cgd
     1.26       cgd /*-
     1.26       cgd  * Copyright (c) 1982, 1986, 1990, 1991, 1993
     1.26       cgd  *	The Regents of the University of California.  All rights reserved.
     1.26       cgd  * (c) UNIX System Laboratories, Inc.
     1.26       cgd  * All or some portions of this file are derived from material licensed
     1.26       cgd  * to the University of California by American Telephone and Telegraph
     1.26       cgd  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     1.26       cgd  * the permission of UNIX System Laboratories, Inc.
     1.26       cgd  *
     1.26       cgd  * Redistribution and use in source and binary forms, with or without
     1.26       cgd  * modification, are permitted provided that the following conditions
     1.26       cgd  * are met:
     1.26       cgd  * 1. Redistributions of source code must retain the above copyright
     1.26       cgd  *    notice, this list of conditions and the following disclaimer.
     1.26       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     1.26       cgd  *    notice, this list of conditions and the following disclaimer in the
     1.26       cgd  *    documentation and/or other materials provided with the distribution.
    1.136       agc  * 3. Neither the name of the University nor the names of its contributors
     1.26       cgd  *    may be used to endorse or promote products derived from this software
     1.26       cgd  *    without specific prior written permission.
     1.26       cgd  *
     1.26       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     1.26       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     1.26       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     1.26       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     1.26       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     1.26       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     1.26       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     1.26       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     1.26       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     1.26       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     1.26       cgd  * SUCH DAMAGE.
     1.26       cgd  *
     1.50      fvdl  *	@(#)kern_synch.c	8.9 (Berkeley) 5/19/95
     1.26       cgd  */
    1.106     lukem
    1.106     lukem #include <sys/cdefs.h>
1.149.2.3      yamt __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.149.2.3 2007/02/26 09:11:11 yamt Exp $");
     1.48       mrg
     1.52  jonathan #include "opt_ddb.h"
    1.109      yamt #include "opt_kstack.h"
     1.82   thorpej #include "opt_lockdebug.h"
     1.83   thorpej #include "opt_multiprocessor.h"
    1.110    briggs #include "opt_perfctrs.h"
     1.26       cgd
1.149.2.3      yamt #define	__MUTEX_PRIVATE
1.149.2.3      yamt
     1.26       cgd #include <sys/param.h>
     1.26       cgd #include <sys/systm.h>
     1.68   thorpej #include <sys/callout.h>
     1.26       cgd #include <sys/proc.h>
     1.26       cgd #include <sys/kernel.h>
     1.26       cgd #include <sys/buf.h>
    1.111    briggs #if defined(PERFCTRS)
    1.110    briggs #include <sys/pmc.h>
    1.111    briggs #endif
     1.26       cgd #include <sys/signalvar.h>
     1.26       cgd #include <sys/resourcevar.h>
     1.55      ross #include <sys/sched.h>
1.149.2.3      yamt #include <sys/syscall_stats.h>
1.149.2.1      yamt #include <sys/kauth.h>
1.149.2.3      yamt #include <sys/sleepq.h>
1.149.2.3      yamt #include <sys/lockdebug.h>
     1.47       mrg
     1.47       mrg #include <uvm/uvm_extern.h>
     1.47       mrg
     1.26       cgd #include <machine/cpu.h>
     1.34  christos
     1.26       cgd int	lbolt;			/* once a second sleep address */
     1.88  sommerfe int	rrticks;		/* number of hardclock ticks per roundrobin() */
     1.26       cgd
1.149.2.1      yamt /*
     1.73   thorpej  * The global scheduler state.
     1.73   thorpej  */
1.149.2.3      yamt kmutex_t	sched_mutex;		/* global sched state mutex */
1.149.2.3      yamt struct prochd	sched_qs[RUNQUE_NQS];	/* run queues */
1.149.2.1      yamt volatile uint32_t sched_whichqs;	/* bitmap of non-empty queues */
     1.83   thorpej
1.149.2.3      yamt void	schedcpu(void *);
1.149.2.3      yamt void	updatepri(struct lwp *);
     1.34  christos
1.149.2.3      yamt void	sched_unsleep(struct lwp *);
1.149.2.3      yamt void	sched_changepri(struct lwp *, int);
     1.63   thorpej
    1.143      yamt struct callout schedcpu_ch = CALLOUT_INITIALIZER_SETFUNC(schedcpu, NULL);
1.149.2.1      yamt static unsigned int schedcpu_ticks;
    1.122   thorpej
1.149.2.3      yamt syncobj_t sleep_syncobj = {
1.149.2.3      yamt 	SOBJ_SLEEPQ_SORTED,
1.149.2.3      yamt 	sleepq_unsleep,
1.149.2.3      yamt 	sleepq_changepri
1.149.2.3      yamt };
1.149.2.3      yamt
1.149.2.3      yamt syncobj_t sched_syncobj = {
1.149.2.3      yamt 	SOBJ_SLEEPQ_SORTED,
1.149.2.3      yamt 	sched_unsleep,
1.149.2.3      yamt 	sched_changepri
1.149.2.3      yamt };
    1.122   thorpej
     1.26       cgd /*
     1.26       cgd  * Force switch among equal priority processes every 100ms.
     1.88  sommerfe  * Called from hardclock every hz/10 == rrticks hardclock ticks.
     1.26       cgd  */
     1.26       cgd /* ARGSUSED */
     1.26       cgd void
     1.89  sommerfe roundrobin(struct cpu_info *ci)
     1.26       cgd {
     1.89  sommerfe 	struct schedstate_percpu *spc = &ci->ci_schedstate;
     1.26       cgd
     1.88  sommerfe 	spc->spc_rrticks = rrticks;
    1.130   nathanw
    1.122   thorpej 	if (curlwp != NULL) {
     1.73   thorpej 		if (spc->spc_flags & SPCF_SEENRR) {
     1.69   thorpej 			/*
     1.69   thorpej 			 * The process has already been through a roundrobin
     1.69   thorpej 			 * without switching and may be hogging the CPU.
     1.69   thorpej 			 * Indicate that the process should yield.
     1.69   thorpej 			 */
     1.73   thorpej 			spc->spc_flags |= SPCF_SHOULDYIELD;
     1.69   thorpej 		} else
     1.73   thorpej 			spc->spc_flags |= SPCF_SEENRR;
     1.69   thorpej 	}
1.149.2.3      yamt 	cpu_need_resched(curcpu());
     1.26       cgd }
     1.26       cgd
1.149.2.1      yamt #define	PPQ	(128 / RUNQUE_NQS)	/* priorities per queue */
1.149.2.1      yamt #define	NICE_WEIGHT 2			/* priorities per nice level */
1.149.2.1      yamt
1.149.2.1      yamt #define	ESTCPU_SHIFT	11
1.149.2.1      yamt #define	ESTCPU_MAX	((NICE_WEIGHT * PRIO_MAX - PPQ) << ESTCPU_SHIFT)
1.149.2.1      yamt #define	ESTCPULIM(e)	min((e), ESTCPU_MAX)
1.149.2.1      yamt
     1.26       cgd /*
     1.26       cgd  * Constants for digital decay and forget:
     1.26       cgd  *	90% of (p_estcpu) usage in 5 * loadav time
     1.26       cgd  *	95% of (p_pctcpu) usage in 60 seconds (load insensitive)
     1.26       cgd  *          Note that, as ps(1) mentions, this can let percentages
     1.26       cgd  *          total over 100% (I've seen 137.9% for 3 processes).
     1.26       cgd  *
     1.26       cgd  * Note that hardclock updates p_estcpu and p_cpticks independently.
     1.26       cgd  *
     1.26       cgd  * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
     1.26       cgd  * That is, the system wants to compute a value of decay such
     1.26       cgd  * that the following for loop:
     1.26       cgd  * 	for (i = 0; i < (5 * loadavg); i++)
     1.26       cgd  * 		p_estcpu *= decay;
     1.26       cgd  * will compute
     1.26       cgd  * 	p_estcpu *= 0.1;
     1.26       cgd  * for all values of loadavg:
     1.26       cgd  *
     1.26       cgd  * Mathematically this loop can be expressed by saying:
     1.26       cgd  * 	decay ** (5 * loadavg) ~= .1
     1.26       cgd  *
     1.26       cgd  * The system computes decay as:
     1.26       cgd  * 	decay = (2 * loadavg) / (2 * loadavg + 1)
     1.26       cgd  *
     1.26       cgd  * We wish to prove that the system's computation of decay
     1.26       cgd  * will always fulfill the equation:
     1.26       cgd  * 	decay ** (5 * loadavg) ~= .1
     1.26       cgd  *
     1.26       cgd  * If we compute b as:
     1.26       cgd  * 	b = 2 * loadavg
     1.26       cgd  * then
     1.26       cgd  * 	decay = b / (b + 1)
     1.26       cgd  *
     1.26       cgd  * We now need to prove two things:
     1.26       cgd  *	1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
     1.26       cgd  *	2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
    1.130   nathanw  *
     1.26       cgd  * Facts:
     1.26       cgd  *         For x close to zero, exp(x) =~ 1 + x, since
     1.26       cgd  *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
     1.26       cgd  *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
     1.26       cgd  *         For x close to zero, ln(1+x) =~ x, since
     1.26       cgd  *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
     1.26       cgd  *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
     1.26       cgd  *         ln(.1) =~ -2.30
     1.26       cgd  *
     1.26       cgd  * Proof of (1):
     1.26       cgd  *    Solve (factor)**(power) =~ .1 given power (5*loadav):
     1.26       cgd  *	solving for factor,
     1.26       cgd  *      ln(factor) =~ (-2.30/5*loadav), or
     1.26       cgd  *      factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
     1.26       cgd  *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
     1.26       cgd  *
     1.26       cgd  * Proof of (2):
     1.26       cgd  *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
     1.26       cgd  *	solving for power,
     1.26       cgd  *      power*ln(b/(b+1)) =~ -2.30, or
     1.26       cgd  *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
     1.26       cgd  *
     1.26       cgd  * Actual power values for the implemented algorithm are as follows:
     1.26       cgd  *      loadav: 1       2       3       4
     1.26       cgd  *      power:  5.68    10.32   14.94   19.55
     1.26       cgd  */
     1.26       cgd
     1.26       cgd /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
     1.26       cgd #define	loadfactor(loadav)	(2 * (loadav))
1.149.2.1      yamt
1.149.2.1      yamt static fixpt_t
1.149.2.1      yamt decay_cpu(fixpt_t loadfac, fixpt_t estcpu)
1.149.2.1      yamt {
1.149.2.1      yamt
1.149.2.1      yamt 	if (estcpu == 0) {
1.149.2.1      yamt 		return 0;
1.149.2.1      yamt 	}
1.149.2.1      yamt
1.149.2.1      yamt #if !defined(_LP64)
1.149.2.1      yamt 	/* avoid 64bit arithmetics. */
1.149.2.1      yamt #define	FIXPT_MAX ((fixpt_t)((UINTMAX_C(1) << sizeof(fixpt_t) * CHAR_BIT) - 1))
1.149.2.1      yamt 	if (__predict_true(loadfac <= FIXPT_MAX / ESTCPU_MAX)) {
1.149.2.1      yamt 		return estcpu * loadfac / (loadfac + FSCALE);
1.149.2.1      yamt 	}
1.149.2.1      yamt #endif /* !defined(_LP64) */
1.149.2.1      yamt
1.149.2.1      yamt 	return (uint64_t)estcpu * loadfac / (loadfac + FSCALE);
1.149.2.1      yamt }
1.149.2.1      yamt
1.149.2.1      yamt /*
1.149.2.1      yamt  * For all load averages >= 1 and max p_estcpu of (255 << ESTCPU_SHIFT),
1.149.2.1      yamt  * sleeping for at least seven times the loadfactor will decay p_estcpu to
1.149.2.1      yamt  * less than (1 << ESTCPU_SHIFT).
1.149.2.1      yamt  *
1.149.2.1      yamt  * note that our ESTCPU_MAX is actually much smaller than (255 << ESTCPU_SHIFT).
1.149.2.1      yamt  */
1.149.2.1      yamt static fixpt_t
1.149.2.1      yamt decay_cpu_batch(fixpt_t loadfac, fixpt_t estcpu, unsigned int n)
1.149.2.1      yamt {
1.149.2.1      yamt
1.149.2.1      yamt 	if ((n << FSHIFT) >= 7 * loadfac) {
1.149.2.1      yamt 		return 0;
1.149.2.1      yamt 	}
1.149.2.1      yamt
1.149.2.1      yamt 	while (estcpu != 0 && n > 1) {
1.149.2.1      yamt 		estcpu = decay_cpu(loadfac, estcpu);
1.149.2.1      yamt 		n--;
1.149.2.1      yamt 	}
1.149.2.1      yamt
1.149.2.1      yamt 	return estcpu;
1.149.2.1      yamt }
     1.26       cgd
     1.26       cgd /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
     1.26       cgd fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;		/* exp(-1/20) */
     1.26       cgd
     1.26       cgd /*
     1.26       cgd  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
     1.26       cgd  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
     1.26       cgd  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
     1.26       cgd  *
     1.26       cgd  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
     1.26       cgd  *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
     1.26       cgd  *
     1.26       cgd  * If you dont want to bother with the faster/more-accurate formula, you
     1.26       cgd  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
     1.26       cgd  * (more general) method of calculating the %age of CPU used by a process.
     1.26       cgd  */
     1.26       cgd #define	CCPU_SHIFT	11
     1.26       cgd
     1.26       cgd /*
1.149.2.3      yamt  * schedcpu:
1.149.2.3      yamt  *
1.149.2.3      yamt  *	Recompute process priorities, every hz ticks.
1.149.2.3      yamt  *
1.149.2.3      yamt  *	XXXSMP This needs to be reorganised in order to reduce the locking
1.149.2.3      yamt  *	burden.
     1.26       cgd  */
     1.26       cgd /* ARGSUSED */
     1.26       cgd void
     1.77   thorpej schedcpu(void *arg)
     1.26       cgd {
     1.71  augustss 	fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
1.149.2.3      yamt 	struct rlimit *rlim;
    1.122   thorpej 	struct lwp *l;
     1.71  augustss 	struct proc *p;
1.149.2.3      yamt 	int minslp, clkhz, sig;
1.149.2.3      yamt 	long runtm;
     1.26       cgd
1.149.2.1      yamt 	schedcpu_ticks++;
1.149.2.1      yamt
1.149.2.3      yamt 	mutex_enter(&proclist_mutex);
    1.145      yamt 	PROCLIST_FOREACH(p, &allproc) {
     1.26       cgd 		/*
1.149.2.3      yamt 		 * Increment time in/out of memory and sleep time (if
1.149.2.3      yamt 		 * sleeping).  We ignore overflow; with 16-bit int's
     1.26       cgd 		 * (remember them?) overflow takes 45 days.
     1.26       cgd 		 */
    1.122   thorpej 		minslp = 2;
1.149.2.3      yamt 		mutex_enter(&p->p_smutex);
1.149.2.3      yamt 		runtm = p->p_rtime.tv_sec;
    1.122   thorpej 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1.149.2.3      yamt 			lwp_lock(l);
1.149.2.3      yamt 			runtm += l->l_rtime.tv_sec;
    1.122   thorpej 			l->l_swtime++;
    1.130   nathanw 			if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
    1.122   thorpej 			    l->l_stat == LSSUSPENDED) {
    1.122   thorpej 				l->l_slptime++;
    1.122   thorpej 				minslp = min(minslp, l->l_slptime);
    1.122   thorpej 			} else
    1.122   thorpej 				minslp = 0;
1.149.2.3      yamt 			lwp_unlock(l);
    1.122   thorpej 		}
     1.26       cgd 		p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
1.149.2.3      yamt
     1.26       cgd 		/*
1.149.2.3      yamt 		 * Check if the process exceeds its CPU resource allocation.
1.149.2.3      yamt 		 * If over max, kill it.
     1.26       cgd 		 */
1.149.2.3      yamt 		rlim = &p->p_rlimit[RLIMIT_CPU];
1.149.2.3      yamt 		sig = 0;
1.149.2.3      yamt 		if (runtm >= rlim->rlim_cur) {
1.149.2.3      yamt 			if (runtm >= rlim->rlim_max)
1.149.2.3      yamt 				sig = SIGKILL;
1.149.2.3      yamt 			else {
1.149.2.3      yamt 				sig = SIGXCPU;
1.149.2.3      yamt 				if (rlim->rlim_cur < rlim->rlim_max)
1.149.2.3      yamt 					rlim->rlim_cur += 5;
1.149.2.3      yamt 			}
1.149.2.3      yamt 		}
1.149.2.3      yamt
1.149.2.3      yamt 		/*
1.149.2.3      yamt 		 * If the process has run for more than autonicetime, reduce
1.149.2.3      yamt 		 * priority to give others a chance.
1.149.2.3      yamt 		 */
1.149.2.3      yamt 		if (autonicetime && runtm > autonicetime && p->p_nice == NZERO
1.149.2.3      yamt 		    && kauth_cred_geteuid(p->p_cred)) {
1.149.2.3      yamt 			mutex_spin_enter(&p->p_stmutex);
1.149.2.3      yamt 			p->p_nice = autoniceval + NZERO;
1.149.2.3      yamt 			resetprocpriority(p);
1.149.2.3      yamt 			mutex_spin_exit(&p->p_stmutex);
1.149.2.3      yamt 		}
1.149.2.3      yamt
     1.26       cgd 		/*
1.149.2.3      yamt 		 * If the process has slept the entire second,
1.149.2.3      yamt 		 * stop recalculating its priority until it wakes up.
     1.26       cgd 		 */
1.149.2.3      yamt 		if (minslp <= 1) {
1.149.2.3      yamt 			/*
1.149.2.3      yamt 			 * p_pctcpu is only for ps.
1.149.2.3      yamt 			 */
1.149.2.3      yamt 			mutex_spin_enter(&p->p_stmutex);
1.149.2.3      yamt 			clkhz = stathz != 0 ? stathz : hz;
     1.26       cgd #if	(FSHIFT >= CCPU_SHIFT)
1.149.2.3      yamt 			p->p_pctcpu += (clkhz == 100)?
1.149.2.3      yamt 			    ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
1.149.2.3      yamt 			    100 * (((fixpt_t) p->p_cpticks)
1.149.2.3      yamt 			    << (FSHIFT - CCPU_SHIFT)) / clkhz;
     1.26       cgd #else
1.149.2.3      yamt 			p->p_pctcpu += ((FSCALE - ccpu) *
1.149.2.3      yamt 			    (p->p_cpticks * FSCALE / clkhz)) >> FSHIFT;
     1.26       cgd #endif
1.149.2.3      yamt 			p->p_cpticks = 0;
1.149.2.3      yamt 			p->p_estcpu = decay_cpu(loadfac, p->p_estcpu);
1.149.2.3      yamt
1.149.2.3      yamt 			LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1.149.2.3      yamt 				lwp_lock(l);
1.149.2.3      yamt 				if (l->l_slptime <= 1 &&
1.149.2.3      yamt 				    l->l_priority >= PUSER)
1.149.2.3      yamt 					resetpriority(l);
1.149.2.3      yamt 				lwp_unlock(l);
    1.122   thorpej 			}
1.149.2.3      yamt 			mutex_spin_exit(&p->p_stmutex);
1.149.2.3      yamt 		}
1.149.2.3      yamt
1.149.2.3      yamt 		mutex_exit(&p->p_smutex);
1.149.2.3      yamt 		if (sig) {
1.149.2.3      yamt 			psignal(p, sig);
     1.26       cgd 		}
     1.26       cgd 	}
1.149.2.3      yamt 	mutex_exit(&proclist_mutex);
     1.47       mrg 	uvm_meter();
     1.67      fvdl 	wakeup((caddr_t)&lbolt);
    1.143      yamt 	callout_schedule(&schedcpu_ch, hz);
     1.26       cgd }
     1.26       cgd
     1.26       cgd /*
     1.26       cgd  * Recalculate the priority of a process after it has slept for a while.
     1.26       cgd  */
     1.26       cgd void
    1.122   thorpej updatepri(struct lwp *l)
     1.26       cgd {
    1.122   thorpej 	struct proc *p = l->l_proc;
     1.83   thorpej 	fixpt_t loadfac;
     1.83   thorpej
1.149.2.3      yamt 	LOCK_ASSERT(lwp_locked(l, NULL));
1.149.2.1      yamt 	KASSERT(l->l_slptime > 1);
     1.83   thorpej
     1.83   thorpej 	loadfac = loadfactor(averunnable.ldavg[0]);
     1.26       cgd
1.149.2.1      yamt 	l->l_slptime--; /* the first time was done in schedcpu */
1.149.2.1      yamt 	/* XXX NJWLWP */
1.149.2.3      yamt 	/* XXXSMP occasionally unlocked, should be per-LWP */
1.149.2.1      yamt 	p->p_estcpu = decay_cpu_batch(loadfac, p->p_estcpu, l->l_slptime);
    1.122   thorpej 	resetpriority(l);
     1.26       cgd }
     1.26       cgd
     1.26       cgd /*
1.149.2.3      yamt  * During autoconfiguration or after a panic, a sleep will simply lower the
1.149.2.3      yamt  * priority briefly to allow interrupts, then return.  The priority to be
1.149.2.3      yamt  * used (safepri) is machine-dependent, thus this value is initialized and
1.149.2.3      yamt  * maintained in the machine-dependent layers.  This priority will typically
1.149.2.3      yamt  * be 0, or the lowest priority that is safe for use on the interrupt stack;
1.149.2.3      yamt  * it can be made higher to block network software interrupts after panics.
     1.26       cgd  */
1.149.2.3      yamt int	safepri;
     1.26       cgd
     1.26       cgd /*
1.149.2.3      yamt  * OBSOLETE INTERFACE
1.149.2.3      yamt  *
     1.26       cgd  * General sleep call.  Suspends the current process until a wakeup is
     1.26       cgd  * performed on the specified identifier.  The process will then be made
1.149.2.3      yamt  * runnable with the specified priority.  Sleeps at most timo/hz seconds (0
1.149.2.3      yamt  * means no timeout).  If pri includes PCATCH flag, signals are checked
     1.26       cgd  * before and after sleeping, else signals are not checked.  Returns 0 if
     1.26       cgd  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
     1.26       cgd  * signal needs to be delivered, ERESTART is returned if the current system
     1.26       cgd  * call should be restarted if possible, and EINTR is returned if the system
     1.26       cgd  * call should be interrupted by the signal (return EINTR).
     1.77   thorpej  *
1.149.2.3      yamt  * The interlock is held until we are on a sleep queue. The interlock will
1.149.2.3      yamt  * be locked before returning back to the caller unless the PNORELOCK flag
1.149.2.3      yamt  * is specified, in which case the interlock will always be unlocked upon
1.149.2.3      yamt  * return.
     1.26       cgd  */
     1.26       cgd int
1.149.2.3      yamt ltsleep(wchan_t ident, int priority, const char *wmesg, int timo,
1.149.2.3      yamt 	volatile struct simplelock *interlock)
     1.26       cgd {
    1.122   thorpej 	struct lwp *l = curlwp;
1.149.2.3      yamt 	sleepq_t *sq;
1.149.2.3      yamt 	int error, catch;
     1.26       cgd
1.149.2.3      yamt 	if (sleepq_dontsleep(l)) {
1.149.2.3      yamt 		(void)sleepq_abort(NULL, 0);
1.149.2.3      yamt 		if ((priority & PNORELOCK) != 0)
     1.77   thorpej 			simple_unlock(interlock);
1.149.2.3      yamt 		return 0;
    1.122   thorpej 	}
     1.77   thorpej
1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, ident);
1.149.2.3      yamt 	sleepq_enter(sq, l);
     1.77   thorpej
1.149.2.3      yamt 	if (interlock != NULL) {
1.149.2.3      yamt 		LOCK_ASSERT(simple_lock_held(interlock));
     1.77   thorpej 		simple_unlock(interlock);
     1.26       cgd 	}
    1.147     perry
1.149.2.3      yamt 	catch = priority & PCATCH;
1.149.2.3      yamt 	sleepq_block(sq, priority & PRIMASK, ident, wmesg, timo, catch,
1.149.2.3      yamt 	    &sleep_syncobj);
1.149.2.3      yamt 	error = sleepq_unblock(timo, catch);
    1.139        cl
1.149.2.3      yamt 	if (interlock != NULL && (priority & PNORELOCK) == 0)
1.149.2.3      yamt 		simple_lock(interlock);
1.149.2.3      yamt
1.149.2.3      yamt 	return error;
    1.139        cl }
    1.139        cl
     1.26       cgd /*
1.149.2.3      yamt  * General sleep call for situations where a wake-up is not expected.
     1.63   thorpej  */
1.149.2.3      yamt int
1.149.2.3      yamt kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx)
     1.83   thorpej {
1.149.2.3      yamt 	struct lwp *l = curlwp;
1.149.2.3      yamt 	sleepq_t *sq;
1.149.2.3      yamt 	int error;
     1.83   thorpej
1.149.2.3      yamt 	if (sleepq_dontsleep(l))
1.149.2.3      yamt 		return sleepq_abort(NULL, 0);
     1.63   thorpej
1.149.2.3      yamt 	if (mtx != NULL)
1.149.2.3      yamt 		mutex_exit(mtx);
1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, l);
1.149.2.3      yamt 	sleepq_enter(sq, l);
1.149.2.3      yamt 	sleepq_block(sq, sched_kpri(l), l, wmesg, timo, intr, &sleep_syncobj);
1.149.2.3      yamt 	error = sleepq_unblock(timo, intr);
1.149.2.3      yamt 	if (mtx != NULL)
1.149.2.3      yamt 		mutex_enter(mtx);
     1.83   thorpej
1.149.2.3      yamt 	return error;
     1.83   thorpej }
     1.83   thorpej
     1.63   thorpej /*
1.149.2.3      yamt  * OBSOLETE INTERFACE
1.149.2.3      yamt  *
     1.26       cgd  * Make all processes sleeping on the specified identifier runnable.
     1.26       cgd  */
     1.26       cgd void
1.149.2.3      yamt wakeup(wchan_t ident)
     1.26       cgd {
1.149.2.3      yamt 	sleepq_t *sq;
     1.83   thorpej
1.149.2.3      yamt 	if (cold)
1.149.2.3      yamt 		return;
     1.83   thorpej
1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, ident);
1.149.2.3      yamt 	sleepq_wake(sq, ident, (u_int)-1);
     1.63   thorpej }
     1.63   thorpej
     1.63   thorpej /*
1.149.2.3      yamt  * OBSOLETE INTERFACE
1.149.2.3      yamt  *
     1.63   thorpej  * Make the highest priority process first in line on the specified
     1.63   thorpej  * identifier runnable.
     1.63   thorpej  */
1.149.2.3      yamt void
1.149.2.3      yamt wakeup_one(wchan_t ident)
     1.63   thorpej {
1.149.2.3      yamt 	sleepq_t *sq;
     1.63   thorpej
1.149.2.3      yamt 	if (cold)
1.149.2.3      yamt 		return;
1.149.2.3      yamt
1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, ident);
1.149.2.3      yamt 	sleepq_wake(sq, ident, 1);
    1.117  gmcgarry }
    1.117  gmcgarry
1.149.2.3      yamt
    1.117  gmcgarry /*
    1.117  gmcgarry  * General yield call.  Puts the current process back on its run queue and
    1.117  gmcgarry  * performs a voluntary context switch.  Should only be called when the
    1.117  gmcgarry  * current process explicitly requests it (eg sched_yield(2) in compat code).
    1.117  gmcgarry  */
    1.117  gmcgarry void
    1.117  gmcgarry yield(void)
    1.117  gmcgarry {
    1.122   thorpej 	struct lwp *l = curlwp;
    1.117  gmcgarry
1.149.2.3      yamt 	KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
1.149.2.3      yamt 	lwp_lock(l);
1.149.2.3      yamt 	if (l->l_stat == LSONPROC) {
1.149.2.3      yamt 		KASSERT(lwp_locked(l, &sched_mutex));
1.149.2.3      yamt 		l->l_priority = l->l_usrpri;
1.149.2.3      yamt 	}
1.149.2.3      yamt 	l->l_nvcsw++;
    1.122   thorpej 	mi_switch(l, NULL);
1.149.2.3      yamt 	KERNEL_LOCK(l->l_biglocks, l);
     1.69   thorpej }
     1.69   thorpej
     1.69   thorpej /*
     1.69   thorpej  * General preemption call.  Puts the current process back on its run queue
1.149.2.1      yamt  * and performs an involuntary context switch.
     1.69   thorpej  */
     1.69   thorpej void
1.149.2.3      yamt preempt(void)
     1.69   thorpej {
    1.122   thorpej 	struct lwp *l = curlwp;
     1.69   thorpej
1.149.2.3      yamt 	KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
1.149.2.3      yamt 	lwp_lock(l);
1.149.2.3      yamt 	if (l->l_stat == LSONPROC) {
1.149.2.3      yamt 		KASSERT(lwp_locked(l, &sched_mutex));
1.149.2.3      yamt 		l->l_priority = l->l_usrpri;
1.149.2.3      yamt 	}
1.149.2.3      yamt 	l->l_nivcsw++;
1.149.2.3      yamt 	(void)mi_switch(l, NULL);
1.149.2.3      yamt 	KERNEL_LOCK(l->l_biglocks, l);
     1.69   thorpej }
     1.69   thorpej
     1.69   thorpej /*
1.149.2.3      yamt  * The machine independent parts of context switch.  Switch to "new"
1.149.2.3      yamt  * if non-NULL, otherwise let cpu_switch choose the next lwp.
    1.130   nathanw  *
    1.122   thorpej  * Returns 1 if another process was actually run.
     1.26       cgd  */
    1.122   thorpej int
    1.122   thorpej mi_switch(struct lwp *l, struct lwp *newl)
     1.26       cgd {
     1.76   thorpej 	struct schedstate_percpu *spc;
     1.26       cgd 	struct timeval tv;
1.149.2.3      yamt 	int retval, oldspl;
1.149.2.3      yamt 	long s, u;
     1.85  sommerfe
1.149.2.3      yamt 	LOCK_ASSERT(lwp_locked(l, NULL));
     1.76   thorpej
     1.54       chs #ifdef LOCKDEBUG
1.149.2.1      yamt 	spinlock_switchcheck();
     1.81   thorpej 	simple_lock_switchcheck();
     1.50      fvdl #endif
1.149.2.3      yamt #ifdef KSTACK_CHECK_MAGIC
1.149.2.3      yamt 	kstack_check_magic(l);
1.149.2.3      yamt #endif
1.149.2.3      yamt
1.149.2.3      yamt 	/*
1.149.2.3      yamt 	 * It's safe to read the per CPU schedstate unlocked here, as all we
1.149.2.3      yamt 	 * are after is the run time and that's guarenteed to have been last
1.149.2.3      yamt 	 * updated by this CPU.
1.149.2.3      yamt 	 */
1.149.2.3      yamt 	KDASSERT(l->l_cpu == curcpu());
1.149.2.3      yamt 	spc = &l->l_cpu->ci_schedstate;
     1.81   thorpej
     1.26       cgd 	/*
     1.26       cgd 	 * Compute the amount of time during which the current
    1.113  gmcgarry 	 * process was running.
     1.26       cgd 	 */
     1.26       cgd 	microtime(&tv);
1.149.2.3      yamt 	u = l->l_rtime.tv_usec +
    1.122   thorpej 	    (tv.tv_usec - spc->spc_runtime.tv_usec);
1.149.2.3      yamt 	s = l->l_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
     1.26       cgd 	if (u < 0) {
     1.26       cgd 		u += 1000000;
     1.26       cgd 		s--;
     1.26       cgd 	} else if (u >= 1000000) {
     1.26       cgd 		u -= 1000000;
     1.26       cgd 		s++;
     1.26       cgd 	}
1.149.2.3      yamt 	l->l_rtime.tv_usec = u;
1.149.2.3      yamt 	l->l_rtime.tv_sec = s;
1.149.2.3      yamt
1.149.2.3      yamt 	/* Count time spent in current system call */
1.149.2.3      yamt 	SYSCALL_TIME_SLEEP(l);
     1.26       cgd
     1.26       cgd 	/*
1.149.2.3      yamt 	 * XXXSMP If we are using h/w performance counters, save context.
     1.69   thorpej 	 */
1.149.2.3      yamt #if PERFCTRS
1.149.2.3      yamt 	if (PMC_ENABLED(l->l_proc)) {
1.149.2.3      yamt 		pmc_save_context(l->l_proc);
1.149.2.3      yamt 	}
    1.109      yamt #endif
     1.26       cgd
    1.113  gmcgarry 	/*
1.149.2.3      yamt 	 * Acquire the sched_mutex if necessary.  It will be released by
1.149.2.3      yamt 	 * cpu_switch once it has decided to idle, or picked another LWP
1.149.2.3      yamt 	 * to run.
    1.113  gmcgarry 	 */
1.149.2.3      yamt #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
1.149.2.3      yamt 	if (l->l_mutex != &sched_mutex) {
1.149.2.3      yamt 		mutex_spin_enter(&sched_mutex);
1.149.2.3      yamt 		lwp_unlock(l);
1.149.2.2      yamt 	}
    1.110    briggs #endif
    1.113  gmcgarry
    1.113  gmcgarry 	/*
1.149.2.3      yamt 	 * If on the CPU and we have gotten this far, then we must yield.
    1.113  gmcgarry 	 */
1.149.2.3      yamt 	KASSERT(l->l_stat != LSRUN);
1.149.2.3      yamt 	if (l->l_stat == LSONPROC) {
1.149.2.3      yamt 		KASSERT(lwp_locked(l, &sched_mutex));
1.149.2.3      yamt 		l->l_stat = LSRUN;
1.149.2.3      yamt 		setrunqueue(l);
1.149.2.3      yamt 	}
    1.114  gmcgarry 	uvmexp.swtch++;
1.149.2.3      yamt
1.149.2.3      yamt 	/*
1.149.2.3      yamt 	 * Process is about to yield the CPU; clear the appropriate
1.149.2.3      yamt 	 * scheduling flags.
1.149.2.3      yamt 	 */
1.149.2.3      yamt 	spc->spc_flags &= ~SPCF_SWITCHCLEAR;
1.149.2.3      yamt
1.149.2.3      yamt 	LOCKDEBUG_BARRIER(&sched_mutex, 1);
1.149.2.3      yamt
1.149.2.3      yamt 	/*
1.149.2.3      yamt 	 * Switch to the new current LWP.  When we run again, we'll
1.149.2.3      yamt 	 * return back here.
1.149.2.3      yamt 	 */
1.149.2.3      yamt 	oldspl = MUTEX_SPIN_OLDSPL(l->l_cpu);
1.149.2.3      yamt
1.149.2.3      yamt 	if (newl == NULL || newl->l_back == NULL)
    1.122   thorpej 		retval = cpu_switch(l, NULL);
1.149.2.3      yamt 	else {
1.149.2.3      yamt 		KASSERT(lwp_locked(newl, &sched_mutex));
    1.122   thorpej 		remrunqueue(newl);
    1.122   thorpej 		cpu_switchto(l, newl);
    1.122   thorpej 		retval = 0;
    1.122   thorpej 	}
    1.110    briggs
    1.110    briggs 	/*
1.149.2.3      yamt 	 * XXXSMP If we are using h/w performance counters, restore context.
     1.26       cgd 	 */
    1.114  gmcgarry #if PERFCTRS
1.149.2.3      yamt 	if (PMC_ENABLED(l->l_proc)) {
1.149.2.3      yamt 		pmc_restore_context(l->l_proc);
1.149.2.2      yamt 	}
    1.114  gmcgarry #endif
    1.110    briggs
    1.110    briggs 	/*
     1.76   thorpej 	 * We're running again; record our new start time.  We might
1.149.2.3      yamt 	 * be running on a new CPU now, so don't use the cached
     1.76   thorpej 	 * schedstate_percpu pointer.
     1.76   thorpej 	 */
1.149.2.3      yamt 	SYSCALL_TIME_WAKEUP(l);
    1.122   thorpej 	KDASSERT(l->l_cpu == curcpu());
    1.122   thorpej 	microtime(&l->l_cpu->ci_schedstate.spc_runtime);
1.149.2.3      yamt 	splx(oldspl);
1.149.2.2      yamt
    1.122   thorpej 	return retval;
     1.26       cgd }
     1.26       cgd
     1.26       cgd /*
     1.26       cgd  * Initialize the (doubly-linked) run queues
     1.26       cgd  * to be empty.
     1.26       cgd  */
     1.26       cgd void
     1.26       cgd rqinit()
     1.26       cgd {
     1.71  augustss 	int i;
     1.26       cgd
     1.73   thorpej 	for (i = 0; i < RUNQUE_NQS; i++)
     1.73   thorpej 		sched_qs[i].ph_link = sched_qs[i].ph_rlink =
    1.122   thorpej 		    (struct lwp *)&sched_qs[i];
1.149.2.3      yamt
1.149.2.3      yamt 	mutex_init(&sched_mutex, MUTEX_SPIN, IPL_SCHED);
     1.26       cgd }
     1.26       cgd
1.149.2.1      yamt static inline void
1.149.2.3      yamt resched_lwp(struct lwp *l, u_char pri)
    1.119   thorpej {
    1.119   thorpej 	struct cpu_info *ci;
    1.119   thorpej
    1.119   thorpej 	/*
    1.119   thorpej 	 * XXXSMP
    1.122   thorpej 	 * Since l->l_cpu persists across a context switch,
    1.119   thorpej 	 * this gives us *very weak* processor affinity, in
    1.119   thorpej 	 * that we notify the CPU on which the process last
    1.119   thorpej 	 * ran that it should try to switch.
    1.119   thorpej 	 *
    1.119   thorpej 	 * This does not guarantee that the process will run on
    1.119   thorpej 	 * that processor next, because another processor might
    1.119   thorpej 	 * grab it the next time it performs a context switch.
    1.119   thorpej 	 *
    1.119   thorpej 	 * This also does not handle the case where its last
    1.119   thorpej 	 * CPU is running a higher-priority process, but every
    1.119   thorpej 	 * other CPU is running a lower-priority process.  There
    1.119   thorpej 	 * are ways to handle this situation, but they're not
    1.119   thorpej 	 * currently very pretty, and we also need to weigh the
    1.119   thorpej 	 * cost of moving a process from one CPU to another.
    1.119   thorpej 	 *
    1.119   thorpej 	 * XXXSMP
    1.119   thorpej 	 * There is also the issue of locking the other CPU's
    1.119   thorpej 	 * sched state, which we currently do not do.
    1.119   thorpej 	 */
    1.122   thorpej 	ci = (l->l_cpu != NULL) ? l->l_cpu : curcpu();
    1.121   thorpej 	if (pri < ci->ci_schedstate.spc_curpriority)
1.149.2.3      yamt 		cpu_need_resched(ci);
    1.119   thorpej }
    1.119   thorpej
     1.26       cgd /*
1.149.2.3      yamt  * Change process state to be runnable, placing it on the run queue if it is
1.149.2.3      yamt  * in memory, and awakening the swapper if it isn't in memory.
1.149.2.3      yamt  *
1.149.2.3      yamt  * Call with the process and LWP locked.  Will return with the LWP unlocked.
     1.26       cgd  */
     1.26       cgd void
    1.122   thorpej setrunnable(struct lwp *l)
     1.26       cgd {
    1.122   thorpej 	struct proc *p = l->l_proc;
1.149.2.3      yamt 	sigset_t *ss;
     1.26       cgd
1.149.2.3      yamt 	KASSERT(mutex_owned(&p->p_smutex));
1.149.2.3      yamt 	KASSERT(lwp_locked(l, NULL));
     1.83   thorpej
    1.122   thorpej 	switch (l->l_stat) {
    1.122   thorpej 	case LSSTOP:
     1.33   mycroft 		/*
     1.33   mycroft 		 * If we're being traced (possibly because someone attached us
     1.33   mycroft 		 * while we were stopped), check for a signal from the debugger.
     1.33   mycroft 		 */
1.149.2.3      yamt 		if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xstat != 0) {
1.149.2.3      yamt 			if ((sigprop[p->p_xstat] & SA_TOLWP) != 0)
1.149.2.3      yamt 				ss = &l->l_sigpend.sp_set;
1.149.2.3      yamt 			else
1.149.2.3      yamt 				ss = &p->p_sigpend.sp_set;
1.149.2.3      yamt 			sigaddset(ss, p->p_xstat);
1.149.2.3      yamt 			signotify(l);
     1.53   mycroft 		}
1.149.2.3      yamt 		p->p_nrlwps++;
    1.122   thorpej 		break;
    1.122   thorpej 	case LSSUSPENDED:
1.149.2.3      yamt 		l->l_flag &= ~LW_WSUSPEND;
1.149.2.3      yamt 		p->p_nrlwps++;
1.149.2.3      yamt 		break;
1.149.2.3      yamt 	case LSSLEEP:
1.149.2.3      yamt 		KASSERT(l->l_wchan != NULL);
     1.26       cgd 		break;
1.149.2.3      yamt 	default:
1.149.2.3      yamt 		panic("setrunnable: lwp %p state was %d", l, l->l_stat);
     1.26       cgd 	}
    1.139        cl
1.149.2.3      yamt 	/*
1.149.2.3      yamt 	 * If the LWP was sleeping interruptably, then it's OK to start it
1.149.2.3      yamt 	 * again.  If not, mark it as still sleeping.
1.149.2.3      yamt 	 */
1.149.2.3      yamt 	if (l->l_wchan != NULL) {
1.149.2.3      yamt 		l->l_stat = LSSLEEP;
1.149.2.3      yamt 		/* lwp_unsleep() will release the lock. */
1.149.2.3      yamt 		lwp_unsleep(l);
1.149.2.3      yamt 		return;
1.149.2.3      yamt 	}
    1.139        cl
1.149.2.3      yamt 	LOCK_ASSERT(lwp_locked(l, &sched_mutex));
    1.122   thorpej
1.149.2.3      yamt 	/*
1.149.2.3      yamt 	 * If the LWP is still on the CPU, mark it as LSONPROC.  It may be
1.149.2.3      yamt 	 * about to call mi_switch(), in which case it will yield.
1.149.2.3      yamt 	 *
1.149.2.3      yamt 	 * XXXSMP Will need to change for preemption.
1.149.2.3      yamt 	 */
1.149.2.3      yamt #ifdef MULTIPROCESSOR
1.149.2.3      yamt 	if (l->l_cpu->ci_curlwp == l) {
1.149.2.3      yamt #else
1.149.2.3      yamt 	if (l == curlwp) {
1.149.2.3      yamt #endif
1.149.2.3      yamt 		l->l_stat = LSONPROC;
1.149.2.3      yamt 		l->l_slptime = 0;
1.149.2.3      yamt 		lwp_unlock(l);
1.149.2.3      yamt 		return;
1.149.2.3      yamt 	}
    1.122   thorpej
1.149.2.3      yamt 	/*
1.149.2.3      yamt 	 * Set the LWP runnable.  If it's swapped out, we need to wake the swapper
1.149.2.3      yamt 	 * to bring it back in.  Otherwise, enter it into a run queue.
1.149.2.3      yamt 	 */
    1.122   thorpej 	if (l->l_slptime > 1)
    1.122   thorpej 		updatepri(l);
1.149.2.3      yamt 	l->l_stat = LSRUN;
    1.122   thorpej 	l->l_slptime = 0;
1.149.2.3      yamt
1.149.2.3      yamt 	if (l->l_flag & LW_INMEM) {
1.149.2.3      yamt 		setrunqueue(l);
1.149.2.3      yamt 		resched_lwp(l, l->l_priority);
1.149.2.3      yamt 		lwp_unlock(l);
1.149.2.3      yamt 	} else {
1.149.2.3      yamt 		lwp_unlock(l);
1.149.2.3      yamt 		uvm_kick_scheduler();
1.149.2.3      yamt 	}
     1.26       cgd }
     1.26       cgd
     1.26       cgd /*
     1.26       cgd  * Compute the priority of a process when running in user mode.
     1.26       cgd  * Arrange to reschedule if the resulting priority is better
     1.26       cgd  * than that of the current process.
     1.26       cgd  */
     1.26       cgd void
    1.122   thorpej resetpriority(struct lwp *l)
     1.26       cgd {
     1.71  augustss 	unsigned int newpriority;
    1.122   thorpej 	struct proc *p = l->l_proc;
     1.26       cgd
1.149.2.3      yamt 	/* XXXSMP LOCK_ASSERT(mutex_owned(&p->p_stmutex)); */
1.149.2.3      yamt 	LOCK_ASSERT(lwp_locked(l, NULL));
1.149.2.3      yamt
1.149.2.3      yamt 	if ((l->l_flag & LW_SYSTEM) != 0)
1.149.2.3      yamt 		return;
     1.83   thorpej
1.149.2.1      yamt 	newpriority = PUSER + (p->p_estcpu >> ESTCPU_SHIFT) +
1.149.2.3      yamt 	    NICE_WEIGHT * (p->p_nice - NZERO);
     1.26       cgd 	newpriority = min(newpriority, MAXPRI);
1.149.2.3      yamt 	lwp_changepri(l, newpriority);
    1.122   thorpej }
    1.122   thorpej
    1.130   nathanw /*
    1.122   thorpej  * Recompute priority for all LWPs in a process.
    1.122   thorpej  */
    1.122   thorpej void
    1.122   thorpej resetprocpriority(struct proc *p)
    1.122   thorpej {
    1.122   thorpej 	struct lwp *l;
    1.122   thorpej
1.149.2.3      yamt 	LOCK_ASSERT(mutex_owned(&p->p_stmutex));
1.149.2.3      yamt
1.149.2.3      yamt 	LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1.149.2.3      yamt 		lwp_lock(l);
1.149.2.3      yamt 		resetpriority(l);
1.149.2.3      yamt 		lwp_unlock(l);
1.149.2.3      yamt 	}
     1.55      ross }
     1.55      ross
     1.55      ross /*
     1.56      ross  * We adjust the priority of the current process.  The priority of a process
    1.141       wiz  * gets worse as it accumulates CPU time.  The CPU usage estimator (p_estcpu)
     1.56      ross  * is increased here.  The formula for computing priorities (in kern_synch.c)
     1.56      ross  * will compute a different value each time p_estcpu increases. This can
     1.56      ross  * cause a switch, but unless the priority crosses a PPQ boundary the actual
    1.141       wiz  * queue will not change.  The CPU usage estimator ramps up quite quickly
     1.56      ross  * when the process is running (linearly), and decays away exponentially, at
     1.56      ross  * a rate which is proportionally slower when the system is busy.  The basic
     1.80   nathanw  * principle is that the system will 90% forget that the process used a lot
     1.56      ross  * of CPU time in 5 * loadav seconds.  This causes the system to favor
     1.56      ross  * processes which haven't run much recently, and to round-robin among other
     1.56      ross  * processes.
     1.55      ross  */
     1.55      ross
     1.55      ross void
    1.122   thorpej schedclock(struct lwp *l)
     1.55      ross {
    1.122   thorpej 	struct proc *p = l->l_proc;
     1.77   thorpej
1.149.2.3      yamt 	mutex_spin_enter(&p->p_stmutex);
1.149.2.1      yamt 	p->p_estcpu = ESTCPULIM(p->p_estcpu + (1 << ESTCPU_SHIFT));
1.149.2.3      yamt 	lwp_lock(l);
    1.122   thorpej 	resetpriority(l);
1.149.2.3      yamt 	mutex_spin_exit(&p->p_stmutex);
1.149.2.3      yamt 	if ((l->l_flag & LW_SYSTEM) == 0 && l->l_priority >= PUSER)
    1.122   thorpej 		l->l_priority = l->l_usrpri;
1.149.2.3      yamt 	lwp_unlock(l);
     1.26       cgd }
     1.94    bouyer
1.149.2.3      yamt /*
1.149.2.3      yamt  * suspendsched:
1.149.2.3      yamt  *
1.149.2.3      yamt  *	Convert all non-L_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED.
1.149.2.3      yamt  */
     1.94    bouyer void
1.149.2.3      yamt suspendsched(void)
     1.94    bouyer {
1.149.2.3      yamt #ifdef MULTIPROCESSOR
1.149.2.3      yamt 	CPU_INFO_ITERATOR cii;
1.149.2.3      yamt 	struct cpu_info *ci;
1.149.2.3      yamt #endif
    1.122   thorpej 	struct lwp *l;
1.149.2.3      yamt 	struct proc *p;
     1.94    bouyer
     1.94    bouyer 	/*
1.149.2.3      yamt 	 * We do this by process in order not to violate the locking rules.
     1.94    bouyer 	 */
1.149.2.3      yamt 	mutex_enter(&proclist_mutex);
1.149.2.3      yamt 	PROCLIST_FOREACH(p, &allproc) {
1.149.2.3      yamt 		mutex_enter(&p->p_smutex);
1.149.2.3      yamt
1.149.2.3      yamt 		if ((p->p_flag & PK_SYSTEM) != 0) {
1.149.2.3      yamt 			mutex_exit(&p->p_smutex);
     1.94    bouyer 			continue;
1.149.2.3      yamt 		}
1.149.2.3      yamt
1.149.2.3      yamt 		p->p_stat = SSTOP;
1.149.2.3      yamt
1.149.2.3      yamt 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1.149.2.3      yamt 			if (l == curlwp)
1.149.2.3      yamt 				continue;
1.149.2.3      yamt
1.149.2.3      yamt 			lwp_lock(l);
    1.122   thorpej
     1.97     enami 			/*
1.149.2.3      yamt 			 * Set L_WREBOOT so that the LWP will suspend itself
1.149.2.3      yamt 			 * when it tries to return to user mode.  We want to
1.149.2.3      yamt 			 * try and get to get as many LWPs as possible to
1.149.2.3      yamt 			 * the user / kernel boundary, so that they will
1.149.2.3      yamt 			 * release any locks that they hold.
     1.97     enami 			 */
1.149.2.3      yamt 			l->l_flag |= (LW_WREBOOT | LW_WSUSPEND);
1.149.2.3      yamt
1.149.2.3      yamt 			if (l->l_stat == LSSLEEP &&
1.149.2.3      yamt 			    (l->l_flag & LW_SINTR) != 0) {
1.149.2.3      yamt 				/* setrunnable() will release the lock. */
1.149.2.3      yamt 				setrunnable(l);
1.149.2.3      yamt 				continue;
1.149.2.3      yamt 			}
1.149.2.3      yamt
1.149.2.3      yamt 			lwp_unlock(l);
     1.94    bouyer 		}
1.149.2.3      yamt
1.149.2.3      yamt 		mutex_exit(&p->p_smutex);
     1.94    bouyer 	}
1.149.2.3      yamt 	mutex_exit(&proclist_mutex);
1.149.2.3      yamt
1.149.2.3      yamt 	/*
1.149.2.3      yamt 	 * Kick all CPUs to make them preempt any LWPs running in user mode.
1.149.2.3      yamt 	 * They'll trap into the kernel and suspend themselves in userret().
1.149.2.3      yamt 	 */
1.149.2.3      yamt 	sched_lock(0);
1.149.2.3      yamt #ifdef MULTIPROCESSOR
1.149.2.3      yamt 	for (CPU_INFO_FOREACH(cii, ci))
1.149.2.3      yamt 		cpu_need_resched(ci);
1.149.2.3      yamt #else
1.149.2.3      yamt 	cpu_need_resched(curcpu());
1.149.2.3      yamt #endif
1.149.2.3      yamt 	sched_unlock(0);
     1.94    bouyer }
    1.113  gmcgarry
    1.113  gmcgarry /*
1.149.2.1      yamt  * scheduler_fork_hook:
1.149.2.1      yamt  *
1.149.2.1      yamt  *	Inherit the parent's scheduler history.
1.149.2.1      yamt  */
1.149.2.1      yamt void
1.149.2.1      yamt scheduler_fork_hook(struct proc *parent, struct proc *child)
1.149.2.1      yamt {
1.149.2.1      yamt
1.149.2.3      yamt 	LOCK_ASSERT(mutex_owned(&parent->p_smutex));
1.149.2.3      yamt
1.149.2.1      yamt 	child->p_estcpu = child->p_estcpu_inherited = parent->p_estcpu;
1.149.2.1      yamt 	child->p_forktime = schedcpu_ticks;
1.149.2.1      yamt }
1.149.2.1      yamt
1.149.2.1      yamt /*
1.149.2.1      yamt  * scheduler_wait_hook:
1.149.2.1      yamt  *
1.149.2.1      yamt  *	Chargeback parents for the sins of their children.
1.149.2.1      yamt  */
1.149.2.1      yamt void
1.149.2.1      yamt scheduler_wait_hook(struct proc *parent, struct proc *child)
1.149.2.1      yamt {
1.149.2.1      yamt 	fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
1.149.2.1      yamt 	fixpt_t estcpu;
1.149.2.1      yamt
1.149.2.1      yamt 	/* XXX Only if parent != init?? */
1.149.2.1      yamt
1.149.2.3      yamt 	mutex_spin_enter(&parent->p_stmutex);
1.149.2.1      yamt 	estcpu = decay_cpu_batch(loadfac, child->p_estcpu_inherited,
1.149.2.1      yamt 	    schedcpu_ticks - child->p_forktime);
1.149.2.3      yamt 	if (child->p_estcpu > estcpu)
1.149.2.1      yamt 		parent->p_estcpu =
1.149.2.1      yamt 		    ESTCPULIM(parent->p_estcpu + child->p_estcpu - estcpu);
1.149.2.3      yamt 	mutex_spin_exit(&parent->p_stmutex);
1.149.2.3      yamt }
1.149.2.3      yamt
1.149.2.3      yamt /*
1.149.2.3      yamt  * sched_kpri:
1.149.2.3      yamt  *
1.149.2.3      yamt  *	Scale a priority level to a kernel priority level, usually
1.149.2.3      yamt  *	for an LWP that is about to sleep.
1.149.2.3      yamt  */
1.149.2.3      yamt int
1.149.2.3      yamt sched_kpri(struct lwp *l)
1.149.2.3      yamt {
1.149.2.3      yamt 	/*
1.149.2.3      yamt 	 * Scale user priorities (127 -> 50) up to kernel priorities
1.149.2.3      yamt 	 * in the range (49 -> 8).  Reserve the top 8 kernel priorities
1.149.2.3      yamt 	 * for high priority kthreads.  Kernel priorities passed in
1.149.2.3      yamt 	 * are left "as is".  XXX This is somewhat arbitrary.
1.149.2.3      yamt 	 */
1.149.2.3      yamt 	static const uint8_t kpri_tab[] = {
1.149.2.3      yamt 		 0,   1,   2,   3,   4,   5,   6,   7,
1.149.2.3      yamt 		 8,   9,  10,  11,  12,  13,  14,  15,
1.149.2.3      yamt 		16,  17,  18,  19,  20,  21,  22,  23,
1.149.2.3      yamt 		24,  25,  26,  27,  28,  29,  30,  31,
1.149.2.3      yamt 		32,  33,  34,  35,  36,  37,  38,  39,
1.149.2.3      yamt 		40,  41,  42,  43,  44,  45,  46,  47,
1.149.2.3      yamt 		48,  49,   8,   8,   9,   9,  10,  10,
1.149.2.3      yamt 		11,  11,  12,  12,  13,  14,  14,  15,
1.149.2.3      yamt 		15,  16,  16,  17,  17,  18,  18,  19,
1.149.2.3      yamt 		20,  20,  21,  21,  22,  22,  23,  23,
1.149.2.3      yamt 		24,  24,  25,  26,  26,  27,  27,  28,
1.149.2.3      yamt 		28,  29,  29,  30,  30,  31,  32,  32,
1.149.2.3      yamt 		33,  33,  34,  34,  35,  35,  36,  36,
1.149.2.3      yamt 		37,  38,  38,  39,  39,  40,  40,  41,
1.149.2.3      yamt 		41,  42,  42,  43,  44,  44,  45,  45,
1.149.2.3      yamt 		46,  46,  47,  47,  48,  48,  49,  49,
1.149.2.3      yamt 	};
1.149.2.3      yamt
1.149.2.3      yamt 	return kpri_tab[l->l_usrpri];
1.149.2.3      yamt }
1.149.2.3      yamt
1.149.2.3      yamt /*
1.149.2.3      yamt  * sched_unsleep:
1.149.2.3      yamt  *
1.149.2.3      yamt  *	The is called when the LWP has not been awoken normally but instead
1.149.2.3      yamt  *	interrupted: for example, if the sleep timed out.  Because of this,
1.149.2.3      yamt  *	it's not a valid action for running or idle LWPs.
1.149.2.3      yamt  */
1.149.2.3      yamt void
1.149.2.3      yamt sched_unsleep(struct lwp *l)
1.149.2.3      yamt {
1.149.2.3      yamt
1.149.2.3      yamt 	lwp_unlock(l);
1.149.2.3      yamt 	panic("sched_unsleep");
1.149.2.3      yamt }
1.149.2.3      yamt
1.149.2.3      yamt /*
1.149.2.3      yamt  * sched_changepri:
1.149.2.3      yamt  *
1.149.2.3      yamt  *	Adjust the priority of an LWP.
1.149.2.3      yamt  */
1.149.2.3      yamt void
1.149.2.3      yamt sched_changepri(struct lwp *l, int pri)
1.149.2.3      yamt {
1.149.2.3      yamt
1.149.2.3      yamt 	LOCK_ASSERT(lwp_locked(l, &sched_mutex));
1.149.2.3      yamt
1.149.2.3      yamt 	l->l_usrpri = pri;
1.149.2.3      yamt
1.149.2.3      yamt 	if (l->l_priority < PUSER)
1.149.2.3      yamt 		return;
1.149.2.3      yamt 	if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0 ||
1.149.2.3      yamt 	    (l->l_priority / PPQ) == (pri / PPQ)) {
1.149.2.3      yamt 		l->l_priority = pri;
1.149.2.3      yamt 		return;
1.149.2.1      yamt 	}
1.149.2.3      yamt
1.149.2.3      yamt 	remrunqueue(l);
1.149.2.3      yamt 	l->l_priority = pri;
1.149.2.3      yamt 	setrunqueue(l);
1.149.2.3      yamt 	resched_lwp(l, pri);
1.149.2.1      yamt }
1.149.2.1      yamt
1.149.2.1      yamt /*
    1.113  gmcgarry  * Low-level routines to access the run queue.  Optimised assembler
    1.113  gmcgarry  * routines can override these.
    1.113  gmcgarry  */
    1.113  gmcgarry
    1.113  gmcgarry #ifndef __HAVE_MD_RUNQUEUE
    1.115  nisimura
    1.130   nathanw /*
    1.134      matt  * On some architectures, it's faster to use a MSB ordering for the priorites
    1.134      matt  * than the traditional LSB ordering.
    1.134      matt  */
    1.134      matt #ifdef __HAVE_BIGENDIAN_BITOPS
    1.134      matt #define	RQMASK(n) (0x80000000 >> (n))
    1.134      matt #else
    1.134      matt #define	RQMASK(n) (0x00000001 << (n))
    1.134      matt #endif
    1.134      matt
    1.134      matt /*
    1.115  nisimura  * The primitives that manipulate the run queues.  whichqs tells which
    1.115  nisimura  * of the 32 queues qs have processes in them.  Setrunqueue puts processes
    1.115  nisimura  * into queues, remrunqueue removes them from queues.  The running process is
    1.115  nisimura  * on no queue, other processes are on a queue related to p->p_priority,
    1.115  nisimura  * divided by 4 actually to shrink the 0-127 range of priorities into the 32
    1.115  nisimura  * available queues.
    1.130   nathanw  */
    1.146      matt #ifdef RQDEBUG
    1.146      matt static void
    1.146      matt checkrunqueue(int whichq, struct lwp *l)
    1.146      matt {
    1.146      matt 	const struct prochd * const rq = &sched_qs[whichq];
    1.146      matt 	struct lwp *l2;
    1.146      matt 	int found = 0;
    1.146      matt 	int die = 0;
    1.146      matt 	int empty = 1;
1.149.2.2      yamt 	for (l2 = rq->ph_link; l2 != (const void*) rq; l2 = l2->l_forw) {
    1.146      matt 		if (l2->l_stat != LSRUN) {
    1.146      matt 			printf("checkrunqueue[%d]: lwp %p state (%d) "
    1.146      matt 			    " != LSRUN\n", whichq, l2, l2->l_stat);
    1.146      matt 		}
    1.146      matt 		if (l2->l_back->l_forw != l2) {
    1.146      matt 			printf("checkrunqueue[%d]: lwp %p back-qptr (%p) "
    1.146      matt 			    "corrupt %p\n", whichq, l2, l2->l_back,
    1.146      matt 			    l2->l_back->l_forw);
    1.146      matt 			die = 1;
    1.146      matt 		}
    1.146      matt 		if (l2->l_forw->l_back != l2) {
    1.146      matt 			printf("checkrunqueue[%d]: lwp %p forw-qptr (%p) "
    1.146      matt 			    "corrupt %p\n", whichq, l2, l2->l_forw,
    1.146      matt 			    l2->l_forw->l_back);
    1.146      matt 			die = 1;
    1.146      matt 		}
    1.146      matt 		if (l2 == l)
    1.146      matt 			found = 1;
    1.146      matt 		empty = 0;
    1.146      matt 	}
    1.146      matt 	if (empty && (sched_whichqs & RQMASK(whichq)) != 0) {
    1.146      matt 		printf("checkrunqueue[%d]: bit set for empty run-queue %p\n",
    1.146      matt 		    whichq, rq);
    1.146      matt 		die = 1;
    1.146      matt 	} else if (!empty && (sched_whichqs & RQMASK(whichq)) == 0) {
    1.146      matt 		printf("checkrunqueue[%d]: bit clear for non-empty "
    1.146      matt 		    "run-queue %p\n", whichq, rq);
    1.146      matt 		die = 1;
    1.146      matt 	}
    1.146      matt 	if (l != NULL && (sched_whichqs & RQMASK(whichq)) == 0) {
    1.146      matt 		printf("checkrunqueue[%d]: bit clear for active lwp %p\n",
    1.146      matt 		    whichq, l);
    1.146      matt 		die = 1;
    1.146      matt 	}
    1.146      matt 	if (l != NULL && empty) {
    1.146      matt 		printf("checkrunqueue[%d]: empty run-queue %p with "
    1.146      matt 		    "active lwp %p\n", whichq, rq, l);
    1.146      matt 		die = 1;
    1.146      matt 	}
    1.146      matt 	if (l != NULL && !found) {
    1.146      matt 		printf("checkrunqueue[%d]: lwp %p not in runqueue %p!",
    1.146      matt 		    whichq, l, rq);
    1.146      matt 		die = 1;
    1.146      matt 	}
    1.146      matt 	if (die)
    1.146      matt 		panic("checkrunqueue: inconsistency found");
    1.146      matt }
    1.146      matt #endif /* RQDEBUG */
    1.146      matt
    1.113  gmcgarry void
    1.122   thorpej setrunqueue(struct lwp *l)
    1.113  gmcgarry {
    1.113  gmcgarry 	struct prochd *rq;
    1.122   thorpej 	struct lwp *prev;
1.149.2.1      yamt 	const int whichq = l->l_priority / PPQ;
    1.113  gmcgarry
1.149.2.3      yamt 	LOCK_ASSERT(lwp_locked(l, &sched_mutex));
1.149.2.3      yamt
    1.146      matt #ifdef RQDEBUG
    1.146      matt 	checkrunqueue(whichq, NULL);
    1.146      matt #endif
    1.113  gmcgarry #ifdef DIAGNOSTIC
1.149.2.3      yamt 	if (l->l_back != NULL || l->l_stat != LSRUN)
    1.113  gmcgarry 		panic("setrunqueue");
    1.113  gmcgarry #endif
    1.134      matt 	sched_whichqs |= RQMASK(whichq);
    1.113  gmcgarry 	rq = &sched_qs[whichq];
    1.113  gmcgarry 	prev = rq->ph_rlink;
    1.122   thorpej 	l->l_forw = (struct lwp *)rq;
    1.122   thorpej 	rq->ph_rlink = l;
    1.122   thorpej 	prev->l_forw = l;
    1.122   thorpej 	l->l_back = prev;
    1.146      matt #ifdef RQDEBUG
    1.146      matt 	checkrunqueue(whichq, l);
    1.146      matt #endif
    1.113  gmcgarry }
    1.113  gmcgarry
1.149.2.3      yamt /*
1.149.2.3      yamt  * XXXSMP When LWP dispatch (cpu_switch()) is changed to use remrunqueue(),
1.149.2.3      yamt  * drop of the effective priority level from kernel to user needs to be
1.149.2.3      yamt  * moved here from userret().  The assignment in userret() is currently
1.149.2.3      yamt  * done unlocked.
1.149.2.3      yamt  */
    1.113  gmcgarry void
    1.122   thorpej remrunqueue(struct lwp *l)
    1.113  gmcgarry {
    1.122   thorpej 	struct lwp *prev, *next;
1.149.2.1      yamt 	const int whichq = l->l_priority / PPQ;
1.149.2.3      yamt
1.149.2.3      yamt 	LOCK_ASSERT(lwp_locked(l, &sched_mutex));
1.149.2.3      yamt
    1.146      matt #ifdef RQDEBUG
    1.146      matt 	checkrunqueue(whichq, l);
    1.146      matt #endif
1.149.2.3      yamt
1.149.2.3      yamt #if defined(DIAGNOSTIC)
1.149.2.3      yamt 	if (((sched_whichqs & RQMASK(whichq)) == 0) || l->l_back == NULL) {
1.149.2.3      yamt 		/* Shouldn't happen - interrupts disabled. */
    1.146      matt 		panic("remrunqueue: bit %d not set", whichq);
1.149.2.3      yamt 	}
    1.113  gmcgarry #endif
    1.122   thorpej 	prev = l->l_back;
    1.122   thorpej 	l->l_back = NULL;
    1.122   thorpej 	next = l->l_forw;
    1.122   thorpej 	prev->l_forw = next;
    1.122   thorpej 	next->l_back = prev;
    1.113  gmcgarry 	if (prev == next)
    1.134      matt 		sched_whichqs &= ~RQMASK(whichq);
    1.146      matt #ifdef RQDEBUG
    1.146      matt 	checkrunqueue(whichq, NULL);
    1.146      matt #endif
    1.113  gmcgarry }
    1.113  gmcgarry
    1.134      matt #undef RQMASK
    1.134      matt #endif /* !defined(__HAVE_MD_RUNQUEUE) */