Home | History | Annotate | Line # | Download | only in kern
kern_synch.c revision 1.149.2.3
      1  1.149.2.3      yamt /*	$NetBSD: kern_synch.c,v 1.149.2.3 2007/02/26 09:11:11 yamt Exp $	*/
      2       1.63   thorpej 
      3       1.63   thorpej /*-
      4  1.149.2.3      yamt  * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc.
      5       1.63   thorpej  * All rights reserved.
      6       1.63   thorpej  *
      7       1.63   thorpej  * This code is derived from software contributed to The NetBSD Foundation
      8       1.63   thorpej  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9  1.149.2.3      yamt  * NASA Ames Research Center, by Charles M. Hannum, and by Andrew Doran.
     10       1.63   thorpej  *
     11       1.63   thorpej  * Redistribution and use in source and binary forms, with or without
     12       1.63   thorpej  * modification, are permitted provided that the following conditions
     13       1.63   thorpej  * are met:
     14       1.63   thorpej  * 1. Redistributions of source code must retain the above copyright
     15       1.63   thorpej  *    notice, this list of conditions and the following disclaimer.
     16       1.63   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     17       1.63   thorpej  *    notice, this list of conditions and the following disclaimer in the
     18       1.63   thorpej  *    documentation and/or other materials provided with the distribution.
     19       1.63   thorpej  * 3. All advertising materials mentioning features or use of this software
     20       1.63   thorpej  *    must display the following acknowledgement:
     21       1.63   thorpej  *	This product includes software developed by the NetBSD
     22       1.63   thorpej  *	Foundation, Inc. and its contributors.
     23       1.63   thorpej  * 4. Neither the name of The NetBSD Foundation nor the names of its
     24       1.63   thorpej  *    contributors may be used to endorse or promote products derived
     25       1.63   thorpej  *    from this software without specific prior written permission.
     26       1.63   thorpej  *
     27       1.63   thorpej  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     28       1.63   thorpej  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     29       1.63   thorpej  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     30       1.63   thorpej  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     31       1.63   thorpej  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32       1.63   thorpej  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33       1.63   thorpej  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     34       1.63   thorpej  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     35       1.63   thorpej  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     36       1.63   thorpej  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     37       1.63   thorpej  * POSSIBILITY OF SUCH DAMAGE.
     38       1.63   thorpej  */
     39       1.26       cgd 
     40       1.26       cgd /*-
     41       1.26       cgd  * Copyright (c) 1982, 1986, 1990, 1991, 1993
     42       1.26       cgd  *	The Regents of the University of California.  All rights reserved.
     43       1.26       cgd  * (c) UNIX System Laboratories, Inc.
     44       1.26       cgd  * All or some portions of this file are derived from material licensed
     45       1.26       cgd  * to the University of California by American Telephone and Telegraph
     46       1.26       cgd  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     47       1.26       cgd  * the permission of UNIX System Laboratories, Inc.
     48       1.26       cgd  *
     49       1.26       cgd  * Redistribution and use in source and binary forms, with or without
     50       1.26       cgd  * modification, are permitted provided that the following conditions
     51       1.26       cgd  * are met:
     52       1.26       cgd  * 1. Redistributions of source code must retain the above copyright
     53       1.26       cgd  *    notice, this list of conditions and the following disclaimer.
     54       1.26       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     55       1.26       cgd  *    notice, this list of conditions and the following disclaimer in the
     56       1.26       cgd  *    documentation and/or other materials provided with the distribution.
     57      1.136       agc  * 3. Neither the name of the University nor the names of its contributors
     58       1.26       cgd  *    may be used to endorse or promote products derived from this software
     59       1.26       cgd  *    without specific prior written permission.
     60       1.26       cgd  *
     61       1.26       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     62       1.26       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     63       1.26       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     64       1.26       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     65       1.26       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     66       1.26       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     67       1.26       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     68       1.26       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     69       1.26       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     70       1.26       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     71       1.26       cgd  * SUCH DAMAGE.
     72       1.26       cgd  *
     73       1.50      fvdl  *	@(#)kern_synch.c	8.9 (Berkeley) 5/19/95
     74       1.26       cgd  */
     75      1.106     lukem 
     76      1.106     lukem #include <sys/cdefs.h>
     77  1.149.2.3      yamt __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.149.2.3 2007/02/26 09:11:11 yamt Exp $");
     78       1.48       mrg 
     79       1.52  jonathan #include "opt_ddb.h"
     80      1.109      yamt #include "opt_kstack.h"
     81       1.82   thorpej #include "opt_lockdebug.h"
     82       1.83   thorpej #include "opt_multiprocessor.h"
     83      1.110    briggs #include "opt_perfctrs.h"
     84       1.26       cgd 
     85  1.149.2.3      yamt #define	__MUTEX_PRIVATE
     86  1.149.2.3      yamt 
     87       1.26       cgd #include <sys/param.h>
     88       1.26       cgd #include <sys/systm.h>
     89       1.68   thorpej #include <sys/callout.h>
     90       1.26       cgd #include <sys/proc.h>
     91       1.26       cgd #include <sys/kernel.h>
     92       1.26       cgd #include <sys/buf.h>
     93      1.111    briggs #if defined(PERFCTRS)
     94      1.110    briggs #include <sys/pmc.h>
     95      1.111    briggs #endif
     96       1.26       cgd #include <sys/signalvar.h>
     97       1.26       cgd #include <sys/resourcevar.h>
     98       1.55      ross #include <sys/sched.h>
     99  1.149.2.3      yamt #include <sys/syscall_stats.h>
    100  1.149.2.1      yamt #include <sys/kauth.h>
    101  1.149.2.3      yamt #include <sys/sleepq.h>
    102  1.149.2.3      yamt #include <sys/lockdebug.h>
    103       1.47       mrg 
    104       1.47       mrg #include <uvm/uvm_extern.h>
    105       1.47       mrg 
    106       1.26       cgd #include <machine/cpu.h>
    107       1.34  christos 
    108       1.26       cgd int	lbolt;			/* once a second sleep address */
    109       1.88  sommerfe int	rrticks;		/* number of hardclock ticks per roundrobin() */
    110       1.26       cgd 
    111  1.149.2.1      yamt /*
    112       1.73   thorpej  * The global scheduler state.
    113       1.73   thorpej  */
    114  1.149.2.3      yamt kmutex_t	sched_mutex;		/* global sched state mutex */
    115  1.149.2.3      yamt struct prochd	sched_qs[RUNQUE_NQS];	/* run queues */
    116  1.149.2.1      yamt volatile uint32_t sched_whichqs;	/* bitmap of non-empty queues */
    117       1.83   thorpej 
    118  1.149.2.3      yamt void	schedcpu(void *);
    119  1.149.2.3      yamt void	updatepri(struct lwp *);
    120       1.34  christos 
    121  1.149.2.3      yamt void	sched_unsleep(struct lwp *);
    122  1.149.2.3      yamt void	sched_changepri(struct lwp *, int);
    123       1.63   thorpej 
    124      1.143      yamt struct callout schedcpu_ch = CALLOUT_INITIALIZER_SETFUNC(schedcpu, NULL);
    125  1.149.2.1      yamt static unsigned int schedcpu_ticks;
    126      1.122   thorpej 
    127  1.149.2.3      yamt syncobj_t sleep_syncobj = {
    128  1.149.2.3      yamt 	SOBJ_SLEEPQ_SORTED,
    129  1.149.2.3      yamt 	sleepq_unsleep,
    130  1.149.2.3      yamt 	sleepq_changepri
    131  1.149.2.3      yamt };
    132  1.149.2.3      yamt 
    133  1.149.2.3      yamt syncobj_t sched_syncobj = {
    134  1.149.2.3      yamt 	SOBJ_SLEEPQ_SORTED,
    135  1.149.2.3      yamt 	sched_unsleep,
    136  1.149.2.3      yamt 	sched_changepri
    137  1.149.2.3      yamt };
    138      1.122   thorpej 
    139       1.26       cgd /*
    140       1.26       cgd  * Force switch among equal priority processes every 100ms.
    141       1.88  sommerfe  * Called from hardclock every hz/10 == rrticks hardclock ticks.
    142       1.26       cgd  */
    143       1.26       cgd /* ARGSUSED */
    144       1.26       cgd void
    145       1.89  sommerfe roundrobin(struct cpu_info *ci)
    146       1.26       cgd {
    147       1.89  sommerfe 	struct schedstate_percpu *spc = &ci->ci_schedstate;
    148       1.26       cgd 
    149       1.88  sommerfe 	spc->spc_rrticks = rrticks;
    150      1.130   nathanw 
    151      1.122   thorpej 	if (curlwp != NULL) {
    152       1.73   thorpej 		if (spc->spc_flags & SPCF_SEENRR) {
    153       1.69   thorpej 			/*
    154       1.69   thorpej 			 * The process has already been through a roundrobin
    155       1.69   thorpej 			 * without switching and may be hogging the CPU.
    156       1.69   thorpej 			 * Indicate that the process should yield.
    157       1.69   thorpej 			 */
    158       1.73   thorpej 			spc->spc_flags |= SPCF_SHOULDYIELD;
    159       1.69   thorpej 		} else
    160       1.73   thorpej 			spc->spc_flags |= SPCF_SEENRR;
    161       1.69   thorpej 	}
    162  1.149.2.3      yamt 	cpu_need_resched(curcpu());
    163       1.26       cgd }
    164       1.26       cgd 
    165  1.149.2.1      yamt #define	PPQ	(128 / RUNQUE_NQS)	/* priorities per queue */
    166  1.149.2.1      yamt #define	NICE_WEIGHT 2			/* priorities per nice level */
    167  1.149.2.1      yamt 
    168  1.149.2.1      yamt #define	ESTCPU_SHIFT	11
    169  1.149.2.1      yamt #define	ESTCPU_MAX	((NICE_WEIGHT * PRIO_MAX - PPQ) << ESTCPU_SHIFT)
    170  1.149.2.1      yamt #define	ESTCPULIM(e)	min((e), ESTCPU_MAX)
    171  1.149.2.1      yamt 
    172       1.26       cgd /*
    173       1.26       cgd  * Constants for digital decay and forget:
    174       1.26       cgd  *	90% of (p_estcpu) usage in 5 * loadav time
    175       1.26       cgd  *	95% of (p_pctcpu) usage in 60 seconds (load insensitive)
    176       1.26       cgd  *          Note that, as ps(1) mentions, this can let percentages
    177       1.26       cgd  *          total over 100% (I've seen 137.9% for 3 processes).
    178       1.26       cgd  *
    179       1.26       cgd  * Note that hardclock updates p_estcpu and p_cpticks independently.
    180       1.26       cgd  *
    181       1.26       cgd  * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
    182       1.26       cgd  * That is, the system wants to compute a value of decay such
    183       1.26       cgd  * that the following for loop:
    184       1.26       cgd  * 	for (i = 0; i < (5 * loadavg); i++)
    185       1.26       cgd  * 		p_estcpu *= decay;
    186       1.26       cgd  * will compute
    187       1.26       cgd  * 	p_estcpu *= 0.1;
    188       1.26       cgd  * for all values of loadavg:
    189       1.26       cgd  *
    190       1.26       cgd  * Mathematically this loop can be expressed by saying:
    191       1.26       cgd  * 	decay ** (5 * loadavg) ~= .1
    192       1.26       cgd  *
    193       1.26       cgd  * The system computes decay as:
    194       1.26       cgd  * 	decay = (2 * loadavg) / (2 * loadavg + 1)
    195       1.26       cgd  *
    196       1.26       cgd  * We wish to prove that the system's computation of decay
    197       1.26       cgd  * will always fulfill the equation:
    198       1.26       cgd  * 	decay ** (5 * loadavg) ~= .1
    199       1.26       cgd  *
    200       1.26       cgd  * If we compute b as:
    201       1.26       cgd  * 	b = 2 * loadavg
    202       1.26       cgd  * then
    203       1.26       cgd  * 	decay = b / (b + 1)
    204       1.26       cgd  *
    205       1.26       cgd  * We now need to prove two things:
    206       1.26       cgd  *	1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
    207       1.26       cgd  *	2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
    208      1.130   nathanw  *
    209       1.26       cgd  * Facts:
    210       1.26       cgd  *         For x close to zero, exp(x) =~ 1 + x, since
    211       1.26       cgd  *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
    212       1.26       cgd  *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
    213       1.26       cgd  *         For x close to zero, ln(1+x) =~ x, since
    214       1.26       cgd  *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
    215       1.26       cgd  *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
    216       1.26       cgd  *         ln(.1) =~ -2.30
    217       1.26       cgd  *
    218       1.26       cgd  * Proof of (1):
    219       1.26       cgd  *    Solve (factor)**(power) =~ .1 given power (5*loadav):
    220       1.26       cgd  *	solving for factor,
    221       1.26       cgd  *      ln(factor) =~ (-2.30/5*loadav), or
    222       1.26       cgd  *      factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
    223       1.26       cgd  *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
    224       1.26       cgd  *
    225       1.26       cgd  * Proof of (2):
    226       1.26       cgd  *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
    227       1.26       cgd  *	solving for power,
    228       1.26       cgd  *      power*ln(b/(b+1)) =~ -2.30, or
    229       1.26       cgd  *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
    230       1.26       cgd  *
    231       1.26       cgd  * Actual power values for the implemented algorithm are as follows:
    232       1.26       cgd  *      loadav: 1       2       3       4
    233       1.26       cgd  *      power:  5.68    10.32   14.94   19.55
    234       1.26       cgd  */
    235       1.26       cgd 
    236       1.26       cgd /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
    237       1.26       cgd #define	loadfactor(loadav)	(2 * (loadav))
    238  1.149.2.1      yamt 
    239  1.149.2.1      yamt static fixpt_t
    240  1.149.2.1      yamt decay_cpu(fixpt_t loadfac, fixpt_t estcpu)
    241  1.149.2.1      yamt {
    242  1.149.2.1      yamt 
    243  1.149.2.1      yamt 	if (estcpu == 0) {
    244  1.149.2.1      yamt 		return 0;
    245  1.149.2.1      yamt 	}
    246  1.149.2.1      yamt 
    247  1.149.2.1      yamt #if !defined(_LP64)
    248  1.149.2.1      yamt 	/* avoid 64bit arithmetics. */
    249  1.149.2.1      yamt #define	FIXPT_MAX ((fixpt_t)((UINTMAX_C(1) << sizeof(fixpt_t) * CHAR_BIT) - 1))
    250  1.149.2.1      yamt 	if (__predict_true(loadfac <= FIXPT_MAX / ESTCPU_MAX)) {
    251  1.149.2.1      yamt 		return estcpu * loadfac / (loadfac + FSCALE);
    252  1.149.2.1      yamt 	}
    253  1.149.2.1      yamt #endif /* !defined(_LP64) */
    254  1.149.2.1      yamt 
    255  1.149.2.1      yamt 	return (uint64_t)estcpu * loadfac / (loadfac + FSCALE);
    256  1.149.2.1      yamt }
    257  1.149.2.1      yamt 
    258  1.149.2.1      yamt /*
    259  1.149.2.1      yamt  * For all load averages >= 1 and max p_estcpu of (255 << ESTCPU_SHIFT),
    260  1.149.2.1      yamt  * sleeping for at least seven times the loadfactor will decay p_estcpu to
    261  1.149.2.1      yamt  * less than (1 << ESTCPU_SHIFT).
    262  1.149.2.1      yamt  *
    263  1.149.2.1      yamt  * note that our ESTCPU_MAX is actually much smaller than (255 << ESTCPU_SHIFT).
    264  1.149.2.1      yamt  */
    265  1.149.2.1      yamt static fixpt_t
    266  1.149.2.1      yamt decay_cpu_batch(fixpt_t loadfac, fixpt_t estcpu, unsigned int n)
    267  1.149.2.1      yamt {
    268  1.149.2.1      yamt 
    269  1.149.2.1      yamt 	if ((n << FSHIFT) >= 7 * loadfac) {
    270  1.149.2.1      yamt 		return 0;
    271  1.149.2.1      yamt 	}
    272  1.149.2.1      yamt 
    273  1.149.2.1      yamt 	while (estcpu != 0 && n > 1) {
    274  1.149.2.1      yamt 		estcpu = decay_cpu(loadfac, estcpu);
    275  1.149.2.1      yamt 		n--;
    276  1.149.2.1      yamt 	}
    277  1.149.2.1      yamt 
    278  1.149.2.1      yamt 	return estcpu;
    279  1.149.2.1      yamt }
    280       1.26       cgd 
    281       1.26       cgd /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
    282       1.26       cgd fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;		/* exp(-1/20) */
    283       1.26       cgd 
    284       1.26       cgd /*
    285       1.26       cgd  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
    286       1.26       cgd  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
    287       1.26       cgd  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
    288       1.26       cgd  *
    289       1.26       cgd  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
    290       1.26       cgd  *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
    291       1.26       cgd  *
    292       1.26       cgd  * If you dont want to bother with the faster/more-accurate formula, you
    293       1.26       cgd  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
    294       1.26       cgd  * (more general) method of calculating the %age of CPU used by a process.
    295       1.26       cgd  */
    296       1.26       cgd #define	CCPU_SHIFT	11
    297       1.26       cgd 
    298       1.26       cgd /*
    299  1.149.2.3      yamt  * schedcpu:
    300  1.149.2.3      yamt  *
    301  1.149.2.3      yamt  *	Recompute process priorities, every hz ticks.
    302  1.149.2.3      yamt  *
    303  1.149.2.3      yamt  *	XXXSMP This needs to be reorganised in order to reduce the locking
    304  1.149.2.3      yamt  *	burden.
    305       1.26       cgd  */
    306       1.26       cgd /* ARGSUSED */
    307       1.26       cgd void
    308       1.77   thorpej schedcpu(void *arg)
    309       1.26       cgd {
    310       1.71  augustss 	fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
    311  1.149.2.3      yamt 	struct rlimit *rlim;
    312      1.122   thorpej 	struct lwp *l;
    313       1.71  augustss 	struct proc *p;
    314  1.149.2.3      yamt 	int minslp, clkhz, sig;
    315  1.149.2.3      yamt 	long runtm;
    316       1.26       cgd 
    317  1.149.2.1      yamt 	schedcpu_ticks++;
    318  1.149.2.1      yamt 
    319  1.149.2.3      yamt 	mutex_enter(&proclist_mutex);
    320      1.145      yamt 	PROCLIST_FOREACH(p, &allproc) {
    321       1.26       cgd 		/*
    322  1.149.2.3      yamt 		 * Increment time in/out of memory and sleep time (if
    323  1.149.2.3      yamt 		 * sleeping).  We ignore overflow; with 16-bit int's
    324       1.26       cgd 		 * (remember them?) overflow takes 45 days.
    325       1.26       cgd 		 */
    326      1.122   thorpej 		minslp = 2;
    327  1.149.2.3      yamt 		mutex_enter(&p->p_smutex);
    328  1.149.2.3      yamt 		runtm = p->p_rtime.tv_sec;
    329      1.122   thorpej 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
    330  1.149.2.3      yamt 			lwp_lock(l);
    331  1.149.2.3      yamt 			runtm += l->l_rtime.tv_sec;
    332      1.122   thorpej 			l->l_swtime++;
    333      1.130   nathanw 			if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
    334      1.122   thorpej 			    l->l_stat == LSSUSPENDED) {
    335      1.122   thorpej 				l->l_slptime++;
    336      1.122   thorpej 				minslp = min(minslp, l->l_slptime);
    337      1.122   thorpej 			} else
    338      1.122   thorpej 				minslp = 0;
    339  1.149.2.3      yamt 			lwp_unlock(l);
    340      1.122   thorpej 		}
    341       1.26       cgd 		p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
    342  1.149.2.3      yamt 
    343       1.26       cgd 		/*
    344  1.149.2.3      yamt 		 * Check if the process exceeds its CPU resource allocation.
    345  1.149.2.3      yamt 		 * If over max, kill it.
    346       1.26       cgd 		 */
    347  1.149.2.3      yamt 		rlim = &p->p_rlimit[RLIMIT_CPU];
    348  1.149.2.3      yamt 		sig = 0;
    349  1.149.2.3      yamt 		if (runtm >= rlim->rlim_cur) {
    350  1.149.2.3      yamt 			if (runtm >= rlim->rlim_max)
    351  1.149.2.3      yamt 				sig = SIGKILL;
    352  1.149.2.3      yamt 			else {
    353  1.149.2.3      yamt 				sig = SIGXCPU;
    354  1.149.2.3      yamt 				if (rlim->rlim_cur < rlim->rlim_max)
    355  1.149.2.3      yamt 					rlim->rlim_cur += 5;
    356  1.149.2.3      yamt 			}
    357  1.149.2.3      yamt 		}
    358  1.149.2.3      yamt 
    359  1.149.2.3      yamt 		/*
    360  1.149.2.3      yamt 		 * If the process has run for more than autonicetime, reduce
    361  1.149.2.3      yamt 		 * priority to give others a chance.
    362  1.149.2.3      yamt 		 */
    363  1.149.2.3      yamt 		if (autonicetime && runtm > autonicetime && p->p_nice == NZERO
    364  1.149.2.3      yamt 		    && kauth_cred_geteuid(p->p_cred)) {
    365  1.149.2.3      yamt 			mutex_spin_enter(&p->p_stmutex);
    366  1.149.2.3      yamt 			p->p_nice = autoniceval + NZERO;
    367  1.149.2.3      yamt 			resetprocpriority(p);
    368  1.149.2.3      yamt 			mutex_spin_exit(&p->p_stmutex);
    369  1.149.2.3      yamt 		}
    370  1.149.2.3      yamt 
    371       1.26       cgd 		/*
    372  1.149.2.3      yamt 		 * If the process has slept the entire second,
    373  1.149.2.3      yamt 		 * stop recalculating its priority until it wakes up.
    374       1.26       cgd 		 */
    375  1.149.2.3      yamt 		if (minslp <= 1) {
    376  1.149.2.3      yamt 			/*
    377  1.149.2.3      yamt 			 * p_pctcpu is only for ps.
    378  1.149.2.3      yamt 			 */
    379  1.149.2.3      yamt 			mutex_spin_enter(&p->p_stmutex);
    380  1.149.2.3      yamt 			clkhz = stathz != 0 ? stathz : hz;
    381       1.26       cgd #if	(FSHIFT >= CCPU_SHIFT)
    382  1.149.2.3      yamt 			p->p_pctcpu += (clkhz == 100)?
    383  1.149.2.3      yamt 			    ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
    384  1.149.2.3      yamt 			    100 * (((fixpt_t) p->p_cpticks)
    385  1.149.2.3      yamt 			    << (FSHIFT - CCPU_SHIFT)) / clkhz;
    386       1.26       cgd #else
    387  1.149.2.3      yamt 			p->p_pctcpu += ((FSCALE - ccpu) *
    388  1.149.2.3      yamt 			    (p->p_cpticks * FSCALE / clkhz)) >> FSHIFT;
    389       1.26       cgd #endif
    390  1.149.2.3      yamt 			p->p_cpticks = 0;
    391  1.149.2.3      yamt 			p->p_estcpu = decay_cpu(loadfac, p->p_estcpu);
    392  1.149.2.3      yamt 
    393  1.149.2.3      yamt 			LIST_FOREACH(l, &p->p_lwps, l_sibling) {
    394  1.149.2.3      yamt 				lwp_lock(l);
    395  1.149.2.3      yamt 				if (l->l_slptime <= 1 &&
    396  1.149.2.3      yamt 				    l->l_priority >= PUSER)
    397  1.149.2.3      yamt 					resetpriority(l);
    398  1.149.2.3      yamt 				lwp_unlock(l);
    399      1.122   thorpej 			}
    400  1.149.2.3      yamt 			mutex_spin_exit(&p->p_stmutex);
    401  1.149.2.3      yamt 		}
    402  1.149.2.3      yamt 
    403  1.149.2.3      yamt 		mutex_exit(&p->p_smutex);
    404  1.149.2.3      yamt 		if (sig) {
    405  1.149.2.3      yamt 			psignal(p, sig);
    406       1.26       cgd 		}
    407       1.26       cgd 	}
    408  1.149.2.3      yamt 	mutex_exit(&proclist_mutex);
    409       1.47       mrg 	uvm_meter();
    410       1.67      fvdl 	wakeup((caddr_t)&lbolt);
    411      1.143      yamt 	callout_schedule(&schedcpu_ch, hz);
    412       1.26       cgd }
    413       1.26       cgd 
    414       1.26       cgd /*
    415       1.26       cgd  * Recalculate the priority of a process after it has slept for a while.
    416       1.26       cgd  */
    417       1.26       cgd void
    418      1.122   thorpej updatepri(struct lwp *l)
    419       1.26       cgd {
    420      1.122   thorpej 	struct proc *p = l->l_proc;
    421       1.83   thorpej 	fixpt_t loadfac;
    422       1.83   thorpej 
    423  1.149.2.3      yamt 	LOCK_ASSERT(lwp_locked(l, NULL));
    424  1.149.2.1      yamt 	KASSERT(l->l_slptime > 1);
    425       1.83   thorpej 
    426       1.83   thorpej 	loadfac = loadfactor(averunnable.ldavg[0]);
    427       1.26       cgd 
    428  1.149.2.1      yamt 	l->l_slptime--; /* the first time was done in schedcpu */
    429  1.149.2.1      yamt 	/* XXX NJWLWP */
    430  1.149.2.3      yamt 	/* XXXSMP occasionally unlocked, should be per-LWP */
    431  1.149.2.1      yamt 	p->p_estcpu = decay_cpu_batch(loadfac, p->p_estcpu, l->l_slptime);
    432      1.122   thorpej 	resetpriority(l);
    433       1.26       cgd }
    434       1.26       cgd 
    435       1.26       cgd /*
    436  1.149.2.3      yamt  * During autoconfiguration or after a panic, a sleep will simply lower the
    437  1.149.2.3      yamt  * priority briefly to allow interrupts, then return.  The priority to be
    438  1.149.2.3      yamt  * used (safepri) is machine-dependent, thus this value is initialized and
    439  1.149.2.3      yamt  * maintained in the machine-dependent layers.  This priority will typically
    440  1.149.2.3      yamt  * be 0, or the lowest priority that is safe for use on the interrupt stack;
    441  1.149.2.3      yamt  * it can be made higher to block network software interrupts after panics.
    442       1.26       cgd  */
    443  1.149.2.3      yamt int	safepri;
    444       1.26       cgd 
    445       1.26       cgd /*
    446  1.149.2.3      yamt  * OBSOLETE INTERFACE
    447  1.149.2.3      yamt  *
    448       1.26       cgd  * General sleep call.  Suspends the current process until a wakeup is
    449       1.26       cgd  * performed on the specified identifier.  The process will then be made
    450  1.149.2.3      yamt  * runnable with the specified priority.  Sleeps at most timo/hz seconds (0
    451  1.149.2.3      yamt  * means no timeout).  If pri includes PCATCH flag, signals are checked
    452       1.26       cgd  * before and after sleeping, else signals are not checked.  Returns 0 if
    453       1.26       cgd  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
    454       1.26       cgd  * signal needs to be delivered, ERESTART is returned if the current system
    455       1.26       cgd  * call should be restarted if possible, and EINTR is returned if the system
    456       1.26       cgd  * call should be interrupted by the signal (return EINTR).
    457       1.77   thorpej  *
    458  1.149.2.3      yamt  * The interlock is held until we are on a sleep queue. The interlock will
    459  1.149.2.3      yamt  * be locked before returning back to the caller unless the PNORELOCK flag
    460  1.149.2.3      yamt  * is specified, in which case the interlock will always be unlocked upon
    461  1.149.2.3      yamt  * return.
    462       1.26       cgd  */
    463       1.26       cgd int
    464  1.149.2.3      yamt ltsleep(wchan_t ident, int priority, const char *wmesg, int timo,
    465  1.149.2.3      yamt 	volatile struct simplelock *interlock)
    466       1.26       cgd {
    467      1.122   thorpej 	struct lwp *l = curlwp;
    468  1.149.2.3      yamt 	sleepq_t *sq;
    469  1.149.2.3      yamt 	int error, catch;
    470       1.26       cgd 
    471  1.149.2.3      yamt 	if (sleepq_dontsleep(l)) {
    472  1.149.2.3      yamt 		(void)sleepq_abort(NULL, 0);
    473  1.149.2.3      yamt 		if ((priority & PNORELOCK) != 0)
    474       1.77   thorpej 			simple_unlock(interlock);
    475  1.149.2.3      yamt 		return 0;
    476      1.122   thorpej 	}
    477       1.77   thorpej 
    478  1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, ident);
    479  1.149.2.3      yamt 	sleepq_enter(sq, l);
    480       1.77   thorpej 
    481  1.149.2.3      yamt 	if (interlock != NULL) {
    482  1.149.2.3      yamt 		LOCK_ASSERT(simple_lock_held(interlock));
    483       1.77   thorpej 		simple_unlock(interlock);
    484       1.26       cgd 	}
    485      1.147     perry 
    486  1.149.2.3      yamt 	catch = priority & PCATCH;
    487  1.149.2.3      yamt 	sleepq_block(sq, priority & PRIMASK, ident, wmesg, timo, catch,
    488  1.149.2.3      yamt 	    &sleep_syncobj);
    489  1.149.2.3      yamt 	error = sleepq_unblock(timo, catch);
    490      1.139        cl 
    491  1.149.2.3      yamt 	if (interlock != NULL && (priority & PNORELOCK) == 0)
    492  1.149.2.3      yamt 		simple_lock(interlock);
    493  1.149.2.3      yamt 
    494  1.149.2.3      yamt 	return error;
    495      1.139        cl }
    496      1.139        cl 
    497       1.26       cgd /*
    498  1.149.2.3      yamt  * General sleep call for situations where a wake-up is not expected.
    499       1.63   thorpej  */
    500  1.149.2.3      yamt int
    501  1.149.2.3      yamt kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx)
    502       1.83   thorpej {
    503  1.149.2.3      yamt 	struct lwp *l = curlwp;
    504  1.149.2.3      yamt 	sleepq_t *sq;
    505  1.149.2.3      yamt 	int error;
    506       1.83   thorpej 
    507  1.149.2.3      yamt 	if (sleepq_dontsleep(l))
    508  1.149.2.3      yamt 		return sleepq_abort(NULL, 0);
    509       1.63   thorpej 
    510  1.149.2.3      yamt 	if (mtx != NULL)
    511  1.149.2.3      yamt 		mutex_exit(mtx);
    512  1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, l);
    513  1.149.2.3      yamt 	sleepq_enter(sq, l);
    514  1.149.2.3      yamt 	sleepq_block(sq, sched_kpri(l), l, wmesg, timo, intr, &sleep_syncobj);
    515  1.149.2.3      yamt 	error = sleepq_unblock(timo, intr);
    516  1.149.2.3      yamt 	if (mtx != NULL)
    517  1.149.2.3      yamt 		mutex_enter(mtx);
    518       1.83   thorpej 
    519  1.149.2.3      yamt 	return error;
    520       1.83   thorpej }
    521       1.83   thorpej 
    522       1.63   thorpej /*
    523  1.149.2.3      yamt  * OBSOLETE INTERFACE
    524  1.149.2.3      yamt  *
    525       1.26       cgd  * Make all processes sleeping on the specified identifier runnable.
    526       1.26       cgd  */
    527       1.26       cgd void
    528  1.149.2.3      yamt wakeup(wchan_t ident)
    529       1.26       cgd {
    530  1.149.2.3      yamt 	sleepq_t *sq;
    531       1.83   thorpej 
    532  1.149.2.3      yamt 	if (cold)
    533  1.149.2.3      yamt 		return;
    534       1.83   thorpej 
    535  1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, ident);
    536  1.149.2.3      yamt 	sleepq_wake(sq, ident, (u_int)-1);
    537       1.63   thorpej }
    538       1.63   thorpej 
    539       1.63   thorpej /*
    540  1.149.2.3      yamt  * OBSOLETE INTERFACE
    541  1.149.2.3      yamt  *
    542       1.63   thorpej  * Make the highest priority process first in line on the specified
    543       1.63   thorpej  * identifier runnable.
    544       1.63   thorpej  */
    545  1.149.2.3      yamt void
    546  1.149.2.3      yamt wakeup_one(wchan_t ident)
    547       1.63   thorpej {
    548  1.149.2.3      yamt 	sleepq_t *sq;
    549       1.63   thorpej 
    550  1.149.2.3      yamt 	if (cold)
    551  1.149.2.3      yamt 		return;
    552  1.149.2.3      yamt 
    553  1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, ident);
    554  1.149.2.3      yamt 	sleepq_wake(sq, ident, 1);
    555      1.117  gmcgarry }
    556      1.117  gmcgarry 
    557  1.149.2.3      yamt 
    558      1.117  gmcgarry /*
    559      1.117  gmcgarry  * General yield call.  Puts the current process back on its run queue and
    560      1.117  gmcgarry  * performs a voluntary context switch.  Should only be called when the
    561      1.117  gmcgarry  * current process explicitly requests it (eg sched_yield(2) in compat code).
    562      1.117  gmcgarry  */
    563      1.117  gmcgarry void
    564      1.117  gmcgarry yield(void)
    565      1.117  gmcgarry {
    566      1.122   thorpej 	struct lwp *l = curlwp;
    567      1.117  gmcgarry 
    568  1.149.2.3      yamt 	KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
    569  1.149.2.3      yamt 	lwp_lock(l);
    570  1.149.2.3      yamt 	if (l->l_stat == LSONPROC) {
    571  1.149.2.3      yamt 		KASSERT(lwp_locked(l, &sched_mutex));
    572  1.149.2.3      yamt 		l->l_priority = l->l_usrpri;
    573  1.149.2.3      yamt 	}
    574  1.149.2.3      yamt 	l->l_nvcsw++;
    575      1.122   thorpej 	mi_switch(l, NULL);
    576  1.149.2.3      yamt 	KERNEL_LOCK(l->l_biglocks, l);
    577       1.69   thorpej }
    578       1.69   thorpej 
    579       1.69   thorpej /*
    580       1.69   thorpej  * General preemption call.  Puts the current process back on its run queue
    581  1.149.2.1      yamt  * and performs an involuntary context switch.
    582       1.69   thorpej  */
    583       1.69   thorpej void
    584  1.149.2.3      yamt preempt(void)
    585       1.69   thorpej {
    586      1.122   thorpej 	struct lwp *l = curlwp;
    587       1.69   thorpej 
    588  1.149.2.3      yamt 	KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
    589  1.149.2.3      yamt 	lwp_lock(l);
    590  1.149.2.3      yamt 	if (l->l_stat == LSONPROC) {
    591  1.149.2.3      yamt 		KASSERT(lwp_locked(l, &sched_mutex));
    592  1.149.2.3      yamt 		l->l_priority = l->l_usrpri;
    593  1.149.2.3      yamt 	}
    594  1.149.2.3      yamt 	l->l_nivcsw++;
    595  1.149.2.3      yamt 	(void)mi_switch(l, NULL);
    596  1.149.2.3      yamt 	KERNEL_LOCK(l->l_biglocks, l);
    597       1.69   thorpej }
    598       1.69   thorpej 
    599       1.69   thorpej /*
    600  1.149.2.3      yamt  * The machine independent parts of context switch.  Switch to "new"
    601  1.149.2.3      yamt  * if non-NULL, otherwise let cpu_switch choose the next lwp.
    602      1.130   nathanw  *
    603      1.122   thorpej  * Returns 1 if another process was actually run.
    604       1.26       cgd  */
    605      1.122   thorpej int
    606      1.122   thorpej mi_switch(struct lwp *l, struct lwp *newl)
    607       1.26       cgd {
    608       1.76   thorpej 	struct schedstate_percpu *spc;
    609       1.26       cgd 	struct timeval tv;
    610  1.149.2.3      yamt 	int retval, oldspl;
    611  1.149.2.3      yamt 	long s, u;
    612       1.85  sommerfe 
    613  1.149.2.3      yamt 	LOCK_ASSERT(lwp_locked(l, NULL));
    614       1.76   thorpej 
    615       1.54       chs #ifdef LOCKDEBUG
    616  1.149.2.1      yamt 	spinlock_switchcheck();
    617       1.81   thorpej 	simple_lock_switchcheck();
    618       1.50      fvdl #endif
    619  1.149.2.3      yamt #ifdef KSTACK_CHECK_MAGIC
    620  1.149.2.3      yamt 	kstack_check_magic(l);
    621  1.149.2.3      yamt #endif
    622  1.149.2.3      yamt 
    623  1.149.2.3      yamt 	/*
    624  1.149.2.3      yamt 	 * It's safe to read the per CPU schedstate unlocked here, as all we
    625  1.149.2.3      yamt 	 * are after is the run time and that's guarenteed to have been last
    626  1.149.2.3      yamt 	 * updated by this CPU.
    627  1.149.2.3      yamt 	 */
    628  1.149.2.3      yamt 	KDASSERT(l->l_cpu == curcpu());
    629  1.149.2.3      yamt 	spc = &l->l_cpu->ci_schedstate;
    630       1.81   thorpej 
    631       1.26       cgd 	/*
    632       1.26       cgd 	 * Compute the amount of time during which the current
    633      1.113  gmcgarry 	 * process was running.
    634       1.26       cgd 	 */
    635       1.26       cgd 	microtime(&tv);
    636  1.149.2.3      yamt 	u = l->l_rtime.tv_usec +
    637      1.122   thorpej 	    (tv.tv_usec - spc->spc_runtime.tv_usec);
    638  1.149.2.3      yamt 	s = l->l_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
    639       1.26       cgd 	if (u < 0) {
    640       1.26       cgd 		u += 1000000;
    641       1.26       cgd 		s--;
    642       1.26       cgd 	} else if (u >= 1000000) {
    643       1.26       cgd 		u -= 1000000;
    644       1.26       cgd 		s++;
    645       1.26       cgd 	}
    646  1.149.2.3      yamt 	l->l_rtime.tv_usec = u;
    647  1.149.2.3      yamt 	l->l_rtime.tv_sec = s;
    648  1.149.2.3      yamt 
    649  1.149.2.3      yamt 	/* Count time spent in current system call */
    650  1.149.2.3      yamt 	SYSCALL_TIME_SLEEP(l);
    651       1.26       cgd 
    652       1.26       cgd 	/*
    653  1.149.2.3      yamt 	 * XXXSMP If we are using h/w performance counters, save context.
    654       1.69   thorpej 	 */
    655  1.149.2.3      yamt #if PERFCTRS
    656  1.149.2.3      yamt 	if (PMC_ENABLED(l->l_proc)) {
    657  1.149.2.3      yamt 		pmc_save_context(l->l_proc);
    658  1.149.2.3      yamt 	}
    659      1.109      yamt #endif
    660       1.26       cgd 
    661      1.113  gmcgarry 	/*
    662  1.149.2.3      yamt 	 * Acquire the sched_mutex if necessary.  It will be released by
    663  1.149.2.3      yamt 	 * cpu_switch once it has decided to idle, or picked another LWP
    664  1.149.2.3      yamt 	 * to run.
    665      1.113  gmcgarry 	 */
    666  1.149.2.3      yamt #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
    667  1.149.2.3      yamt 	if (l->l_mutex != &sched_mutex) {
    668  1.149.2.3      yamt 		mutex_spin_enter(&sched_mutex);
    669  1.149.2.3      yamt 		lwp_unlock(l);
    670  1.149.2.2      yamt 	}
    671      1.110    briggs #endif
    672      1.113  gmcgarry 
    673      1.113  gmcgarry 	/*
    674  1.149.2.3      yamt 	 * If on the CPU and we have gotten this far, then we must yield.
    675      1.113  gmcgarry 	 */
    676  1.149.2.3      yamt 	KASSERT(l->l_stat != LSRUN);
    677  1.149.2.3      yamt 	if (l->l_stat == LSONPROC) {
    678  1.149.2.3      yamt 		KASSERT(lwp_locked(l, &sched_mutex));
    679  1.149.2.3      yamt 		l->l_stat = LSRUN;
    680  1.149.2.3      yamt 		setrunqueue(l);
    681  1.149.2.3      yamt 	}
    682      1.114  gmcgarry 	uvmexp.swtch++;
    683  1.149.2.3      yamt 
    684  1.149.2.3      yamt 	/*
    685  1.149.2.3      yamt 	 * Process is about to yield the CPU; clear the appropriate
    686  1.149.2.3      yamt 	 * scheduling flags.
    687  1.149.2.3      yamt 	 */
    688  1.149.2.3      yamt 	spc->spc_flags &= ~SPCF_SWITCHCLEAR;
    689  1.149.2.3      yamt 
    690  1.149.2.3      yamt 	LOCKDEBUG_BARRIER(&sched_mutex, 1);
    691  1.149.2.3      yamt 
    692  1.149.2.3      yamt 	/*
    693  1.149.2.3      yamt 	 * Switch to the new current LWP.  When we run again, we'll
    694  1.149.2.3      yamt 	 * return back here.
    695  1.149.2.3      yamt 	 */
    696  1.149.2.3      yamt 	oldspl = MUTEX_SPIN_OLDSPL(l->l_cpu);
    697  1.149.2.3      yamt 
    698  1.149.2.3      yamt 	if (newl == NULL || newl->l_back == NULL)
    699      1.122   thorpej 		retval = cpu_switch(l, NULL);
    700  1.149.2.3      yamt 	else {
    701  1.149.2.3      yamt 		KASSERT(lwp_locked(newl, &sched_mutex));
    702      1.122   thorpej 		remrunqueue(newl);
    703      1.122   thorpej 		cpu_switchto(l, newl);
    704      1.122   thorpej 		retval = 0;
    705      1.122   thorpej 	}
    706      1.110    briggs 
    707      1.110    briggs 	/*
    708  1.149.2.3      yamt 	 * XXXSMP If we are using h/w performance counters, restore context.
    709       1.26       cgd 	 */
    710      1.114  gmcgarry #if PERFCTRS
    711  1.149.2.3      yamt 	if (PMC_ENABLED(l->l_proc)) {
    712  1.149.2.3      yamt 		pmc_restore_context(l->l_proc);
    713  1.149.2.2      yamt 	}
    714      1.114  gmcgarry #endif
    715      1.110    briggs 
    716      1.110    briggs 	/*
    717       1.76   thorpej 	 * We're running again; record our new start time.  We might
    718  1.149.2.3      yamt 	 * be running on a new CPU now, so don't use the cached
    719       1.76   thorpej 	 * schedstate_percpu pointer.
    720       1.76   thorpej 	 */
    721  1.149.2.3      yamt 	SYSCALL_TIME_WAKEUP(l);
    722      1.122   thorpej 	KDASSERT(l->l_cpu == curcpu());
    723      1.122   thorpej 	microtime(&l->l_cpu->ci_schedstate.spc_runtime);
    724  1.149.2.3      yamt 	splx(oldspl);
    725  1.149.2.2      yamt 
    726      1.122   thorpej 	return retval;
    727       1.26       cgd }
    728       1.26       cgd 
    729       1.26       cgd /*
    730       1.26       cgd  * Initialize the (doubly-linked) run queues
    731       1.26       cgd  * to be empty.
    732       1.26       cgd  */
    733       1.26       cgd void
    734       1.26       cgd rqinit()
    735       1.26       cgd {
    736       1.71  augustss 	int i;
    737       1.26       cgd 
    738       1.73   thorpej 	for (i = 0; i < RUNQUE_NQS; i++)
    739       1.73   thorpej 		sched_qs[i].ph_link = sched_qs[i].ph_rlink =
    740      1.122   thorpej 		    (struct lwp *)&sched_qs[i];
    741  1.149.2.3      yamt 
    742  1.149.2.3      yamt 	mutex_init(&sched_mutex, MUTEX_SPIN, IPL_SCHED);
    743       1.26       cgd }
    744       1.26       cgd 
    745  1.149.2.1      yamt static inline void
    746  1.149.2.3      yamt resched_lwp(struct lwp *l, u_char pri)
    747      1.119   thorpej {
    748      1.119   thorpej 	struct cpu_info *ci;
    749      1.119   thorpej 
    750      1.119   thorpej 	/*
    751      1.119   thorpej 	 * XXXSMP
    752      1.122   thorpej 	 * Since l->l_cpu persists across a context switch,
    753      1.119   thorpej 	 * this gives us *very weak* processor affinity, in
    754      1.119   thorpej 	 * that we notify the CPU on which the process last
    755      1.119   thorpej 	 * ran that it should try to switch.
    756      1.119   thorpej 	 *
    757      1.119   thorpej 	 * This does not guarantee that the process will run on
    758      1.119   thorpej 	 * that processor next, because another processor might
    759      1.119   thorpej 	 * grab it the next time it performs a context switch.
    760      1.119   thorpej 	 *
    761      1.119   thorpej 	 * This also does not handle the case where its last
    762      1.119   thorpej 	 * CPU is running a higher-priority process, but every
    763      1.119   thorpej 	 * other CPU is running a lower-priority process.  There
    764      1.119   thorpej 	 * are ways to handle this situation, but they're not
    765      1.119   thorpej 	 * currently very pretty, and we also need to weigh the
    766      1.119   thorpej 	 * cost of moving a process from one CPU to another.
    767      1.119   thorpej 	 *
    768      1.119   thorpej 	 * XXXSMP
    769      1.119   thorpej 	 * There is also the issue of locking the other CPU's
    770      1.119   thorpej 	 * sched state, which we currently do not do.
    771      1.119   thorpej 	 */
    772      1.122   thorpej 	ci = (l->l_cpu != NULL) ? l->l_cpu : curcpu();
    773      1.121   thorpej 	if (pri < ci->ci_schedstate.spc_curpriority)
    774  1.149.2.3      yamt 		cpu_need_resched(ci);
    775      1.119   thorpej }
    776      1.119   thorpej 
    777       1.26       cgd /*
    778  1.149.2.3      yamt  * Change process state to be runnable, placing it on the run queue if it is
    779  1.149.2.3      yamt  * in memory, and awakening the swapper if it isn't in memory.
    780  1.149.2.3      yamt  *
    781  1.149.2.3      yamt  * Call with the process and LWP locked.  Will return with the LWP unlocked.
    782       1.26       cgd  */
    783       1.26       cgd void
    784      1.122   thorpej setrunnable(struct lwp *l)
    785       1.26       cgd {
    786      1.122   thorpej 	struct proc *p = l->l_proc;
    787  1.149.2.3      yamt 	sigset_t *ss;
    788       1.26       cgd 
    789  1.149.2.3      yamt 	KASSERT(mutex_owned(&p->p_smutex));
    790  1.149.2.3      yamt 	KASSERT(lwp_locked(l, NULL));
    791       1.83   thorpej 
    792      1.122   thorpej 	switch (l->l_stat) {
    793      1.122   thorpej 	case LSSTOP:
    794       1.33   mycroft 		/*
    795       1.33   mycroft 		 * If we're being traced (possibly because someone attached us
    796       1.33   mycroft 		 * while we were stopped), check for a signal from the debugger.
    797       1.33   mycroft 		 */
    798  1.149.2.3      yamt 		if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xstat != 0) {
    799  1.149.2.3      yamt 			if ((sigprop[p->p_xstat] & SA_TOLWP) != 0)
    800  1.149.2.3      yamt 				ss = &l->l_sigpend.sp_set;
    801  1.149.2.3      yamt 			else
    802  1.149.2.3      yamt 				ss = &p->p_sigpend.sp_set;
    803  1.149.2.3      yamt 			sigaddset(ss, p->p_xstat);
    804  1.149.2.3      yamt 			signotify(l);
    805       1.53   mycroft 		}
    806  1.149.2.3      yamt 		p->p_nrlwps++;
    807      1.122   thorpej 		break;
    808      1.122   thorpej 	case LSSUSPENDED:
    809  1.149.2.3      yamt 		l->l_flag &= ~LW_WSUSPEND;
    810  1.149.2.3      yamt 		p->p_nrlwps++;
    811  1.149.2.3      yamt 		break;
    812  1.149.2.3      yamt 	case LSSLEEP:
    813  1.149.2.3      yamt 		KASSERT(l->l_wchan != NULL);
    814       1.26       cgd 		break;
    815  1.149.2.3      yamt 	default:
    816  1.149.2.3      yamt 		panic("setrunnable: lwp %p state was %d", l, l->l_stat);
    817       1.26       cgd 	}
    818      1.139        cl 
    819  1.149.2.3      yamt 	/*
    820  1.149.2.3      yamt 	 * If the LWP was sleeping interruptably, then it's OK to start it
    821  1.149.2.3      yamt 	 * again.  If not, mark it as still sleeping.
    822  1.149.2.3      yamt 	 */
    823  1.149.2.3      yamt 	if (l->l_wchan != NULL) {
    824  1.149.2.3      yamt 		l->l_stat = LSSLEEP;
    825  1.149.2.3      yamt 		/* lwp_unsleep() will release the lock. */
    826  1.149.2.3      yamt 		lwp_unsleep(l);
    827  1.149.2.3      yamt 		return;
    828  1.149.2.3      yamt 	}
    829      1.139        cl 
    830  1.149.2.3      yamt 	LOCK_ASSERT(lwp_locked(l, &sched_mutex));
    831      1.122   thorpej 
    832  1.149.2.3      yamt 	/*
    833  1.149.2.3      yamt 	 * If the LWP is still on the CPU, mark it as LSONPROC.  It may be
    834  1.149.2.3      yamt 	 * about to call mi_switch(), in which case it will yield.
    835  1.149.2.3      yamt 	 *
    836  1.149.2.3      yamt 	 * XXXSMP Will need to change for preemption.
    837  1.149.2.3      yamt 	 */
    838  1.149.2.3      yamt #ifdef MULTIPROCESSOR
    839  1.149.2.3      yamt 	if (l->l_cpu->ci_curlwp == l) {
    840  1.149.2.3      yamt #else
    841  1.149.2.3      yamt 	if (l == curlwp) {
    842  1.149.2.3      yamt #endif
    843  1.149.2.3      yamt 		l->l_stat = LSONPROC;
    844  1.149.2.3      yamt 		l->l_slptime = 0;
    845  1.149.2.3      yamt 		lwp_unlock(l);
    846  1.149.2.3      yamt 		return;
    847  1.149.2.3      yamt 	}
    848      1.122   thorpej 
    849  1.149.2.3      yamt 	/*
    850  1.149.2.3      yamt 	 * Set the LWP runnable.  If it's swapped out, we need to wake the swapper
    851  1.149.2.3      yamt 	 * to bring it back in.  Otherwise, enter it into a run queue.
    852  1.149.2.3      yamt 	 */
    853      1.122   thorpej 	if (l->l_slptime > 1)
    854      1.122   thorpej 		updatepri(l);
    855  1.149.2.3      yamt 	l->l_stat = LSRUN;
    856      1.122   thorpej 	l->l_slptime = 0;
    857  1.149.2.3      yamt 
    858  1.149.2.3      yamt 	if (l->l_flag & LW_INMEM) {
    859  1.149.2.3      yamt 		setrunqueue(l);
    860  1.149.2.3      yamt 		resched_lwp(l, l->l_priority);
    861  1.149.2.3      yamt 		lwp_unlock(l);
    862  1.149.2.3      yamt 	} else {
    863  1.149.2.3      yamt 		lwp_unlock(l);
    864  1.149.2.3      yamt 		uvm_kick_scheduler();
    865  1.149.2.3      yamt 	}
    866       1.26       cgd }
    867       1.26       cgd 
    868       1.26       cgd /*
    869       1.26       cgd  * Compute the priority of a process when running in user mode.
    870       1.26       cgd  * Arrange to reschedule if the resulting priority is better
    871       1.26       cgd  * than that of the current process.
    872       1.26       cgd  */
    873       1.26       cgd void
    874      1.122   thorpej resetpriority(struct lwp *l)
    875       1.26       cgd {
    876       1.71  augustss 	unsigned int newpriority;
    877      1.122   thorpej 	struct proc *p = l->l_proc;
    878       1.26       cgd 
    879  1.149.2.3      yamt 	/* XXXSMP LOCK_ASSERT(mutex_owned(&p->p_stmutex)); */
    880  1.149.2.3      yamt 	LOCK_ASSERT(lwp_locked(l, NULL));
    881  1.149.2.3      yamt 
    882  1.149.2.3      yamt 	if ((l->l_flag & LW_SYSTEM) != 0)
    883  1.149.2.3      yamt 		return;
    884       1.83   thorpej 
    885  1.149.2.1      yamt 	newpriority = PUSER + (p->p_estcpu >> ESTCPU_SHIFT) +
    886  1.149.2.3      yamt 	    NICE_WEIGHT * (p->p_nice - NZERO);
    887       1.26       cgd 	newpriority = min(newpriority, MAXPRI);
    888  1.149.2.3      yamt 	lwp_changepri(l, newpriority);
    889      1.122   thorpej }
    890      1.122   thorpej 
    891      1.130   nathanw /*
    892      1.122   thorpej  * Recompute priority for all LWPs in a process.
    893      1.122   thorpej  */
    894      1.122   thorpej void
    895      1.122   thorpej resetprocpriority(struct proc *p)
    896      1.122   thorpej {
    897      1.122   thorpej 	struct lwp *l;
    898      1.122   thorpej 
    899  1.149.2.3      yamt 	LOCK_ASSERT(mutex_owned(&p->p_stmutex));
    900  1.149.2.3      yamt 
    901  1.149.2.3      yamt 	LIST_FOREACH(l, &p->p_lwps, l_sibling) {
    902  1.149.2.3      yamt 		lwp_lock(l);
    903  1.149.2.3      yamt 		resetpriority(l);
    904  1.149.2.3      yamt 		lwp_unlock(l);
    905  1.149.2.3      yamt 	}
    906       1.55      ross }
    907       1.55      ross 
    908       1.55      ross /*
    909       1.56      ross  * We adjust the priority of the current process.  The priority of a process
    910      1.141       wiz  * gets worse as it accumulates CPU time.  The CPU usage estimator (p_estcpu)
    911       1.56      ross  * is increased here.  The formula for computing priorities (in kern_synch.c)
    912       1.56      ross  * will compute a different value each time p_estcpu increases. This can
    913       1.56      ross  * cause a switch, but unless the priority crosses a PPQ boundary the actual
    914      1.141       wiz  * queue will not change.  The CPU usage estimator ramps up quite quickly
    915       1.56      ross  * when the process is running (linearly), and decays away exponentially, at
    916       1.56      ross  * a rate which is proportionally slower when the system is busy.  The basic
    917       1.80   nathanw  * principle is that the system will 90% forget that the process used a lot
    918       1.56      ross  * of CPU time in 5 * loadav seconds.  This causes the system to favor
    919       1.56      ross  * processes which haven't run much recently, and to round-robin among other
    920       1.56      ross  * processes.
    921       1.55      ross  */
    922       1.55      ross 
    923       1.55      ross void
    924      1.122   thorpej schedclock(struct lwp *l)
    925       1.55      ross {
    926      1.122   thorpej 	struct proc *p = l->l_proc;
    927       1.77   thorpej 
    928  1.149.2.3      yamt 	mutex_spin_enter(&p->p_stmutex);
    929  1.149.2.1      yamt 	p->p_estcpu = ESTCPULIM(p->p_estcpu + (1 << ESTCPU_SHIFT));
    930  1.149.2.3      yamt 	lwp_lock(l);
    931      1.122   thorpej 	resetpriority(l);
    932  1.149.2.3      yamt 	mutex_spin_exit(&p->p_stmutex);
    933  1.149.2.3      yamt 	if ((l->l_flag & LW_SYSTEM) == 0 && l->l_priority >= PUSER)
    934      1.122   thorpej 		l->l_priority = l->l_usrpri;
    935  1.149.2.3      yamt 	lwp_unlock(l);
    936       1.26       cgd }
    937       1.94    bouyer 
    938  1.149.2.3      yamt /*
    939  1.149.2.3      yamt  * suspendsched:
    940  1.149.2.3      yamt  *
    941  1.149.2.3      yamt  *	Convert all non-L_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED.
    942  1.149.2.3      yamt  */
    943       1.94    bouyer void
    944  1.149.2.3      yamt suspendsched(void)
    945       1.94    bouyer {
    946  1.149.2.3      yamt #ifdef MULTIPROCESSOR
    947  1.149.2.3      yamt 	CPU_INFO_ITERATOR cii;
    948  1.149.2.3      yamt 	struct cpu_info *ci;
    949  1.149.2.3      yamt #endif
    950      1.122   thorpej 	struct lwp *l;
    951  1.149.2.3      yamt 	struct proc *p;
    952       1.94    bouyer 
    953       1.94    bouyer 	/*
    954  1.149.2.3      yamt 	 * We do this by process in order not to violate the locking rules.
    955       1.94    bouyer 	 */
    956  1.149.2.3      yamt 	mutex_enter(&proclist_mutex);
    957  1.149.2.3      yamt 	PROCLIST_FOREACH(p, &allproc) {
    958  1.149.2.3      yamt 		mutex_enter(&p->p_smutex);
    959  1.149.2.3      yamt 
    960  1.149.2.3      yamt 		if ((p->p_flag & PK_SYSTEM) != 0) {
    961  1.149.2.3      yamt 			mutex_exit(&p->p_smutex);
    962       1.94    bouyer 			continue;
    963  1.149.2.3      yamt 		}
    964  1.149.2.3      yamt 
    965  1.149.2.3      yamt 		p->p_stat = SSTOP;
    966  1.149.2.3      yamt 
    967  1.149.2.3      yamt 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
    968  1.149.2.3      yamt 			if (l == curlwp)
    969  1.149.2.3      yamt 				continue;
    970  1.149.2.3      yamt 
    971  1.149.2.3      yamt 			lwp_lock(l);
    972      1.122   thorpej 
    973       1.97     enami 			/*
    974  1.149.2.3      yamt 			 * Set L_WREBOOT so that the LWP will suspend itself
    975  1.149.2.3      yamt 			 * when it tries to return to user mode.  We want to
    976  1.149.2.3      yamt 			 * try and get to get as many LWPs as possible to
    977  1.149.2.3      yamt 			 * the user / kernel boundary, so that they will
    978  1.149.2.3      yamt 			 * release any locks that they hold.
    979       1.97     enami 			 */
    980  1.149.2.3      yamt 			l->l_flag |= (LW_WREBOOT | LW_WSUSPEND);
    981  1.149.2.3      yamt 
    982  1.149.2.3      yamt 			if (l->l_stat == LSSLEEP &&
    983  1.149.2.3      yamt 			    (l->l_flag & LW_SINTR) != 0) {
    984  1.149.2.3      yamt 				/* setrunnable() will release the lock. */
    985  1.149.2.3      yamt 				setrunnable(l);
    986  1.149.2.3      yamt 				continue;
    987  1.149.2.3      yamt 			}
    988  1.149.2.3      yamt 
    989  1.149.2.3      yamt 			lwp_unlock(l);
    990       1.94    bouyer 		}
    991  1.149.2.3      yamt 
    992  1.149.2.3      yamt 		mutex_exit(&p->p_smutex);
    993       1.94    bouyer 	}
    994  1.149.2.3      yamt 	mutex_exit(&proclist_mutex);
    995  1.149.2.3      yamt 
    996  1.149.2.3      yamt 	/*
    997  1.149.2.3      yamt 	 * Kick all CPUs to make them preempt any LWPs running in user mode.
    998  1.149.2.3      yamt 	 * They'll trap into the kernel and suspend themselves in userret().
    999  1.149.2.3      yamt 	 */
   1000  1.149.2.3      yamt 	sched_lock(0);
   1001  1.149.2.3      yamt #ifdef MULTIPROCESSOR
   1002  1.149.2.3      yamt 	for (CPU_INFO_FOREACH(cii, ci))
   1003  1.149.2.3      yamt 		cpu_need_resched(ci);
   1004  1.149.2.3      yamt #else
   1005  1.149.2.3      yamt 	cpu_need_resched(curcpu());
   1006  1.149.2.3      yamt #endif
   1007  1.149.2.3      yamt 	sched_unlock(0);
   1008       1.94    bouyer }
   1009      1.113  gmcgarry 
   1010      1.113  gmcgarry /*
   1011  1.149.2.1      yamt  * scheduler_fork_hook:
   1012  1.149.2.1      yamt  *
   1013  1.149.2.1      yamt  *	Inherit the parent's scheduler history.
   1014  1.149.2.1      yamt  */
   1015  1.149.2.1      yamt void
   1016  1.149.2.1      yamt scheduler_fork_hook(struct proc *parent, struct proc *child)
   1017  1.149.2.1      yamt {
   1018  1.149.2.1      yamt 
   1019  1.149.2.3      yamt 	LOCK_ASSERT(mutex_owned(&parent->p_smutex));
   1020  1.149.2.3      yamt 
   1021  1.149.2.1      yamt 	child->p_estcpu = child->p_estcpu_inherited = parent->p_estcpu;
   1022  1.149.2.1      yamt 	child->p_forktime = schedcpu_ticks;
   1023  1.149.2.1      yamt }
   1024  1.149.2.1      yamt 
   1025  1.149.2.1      yamt /*
   1026  1.149.2.1      yamt  * scheduler_wait_hook:
   1027  1.149.2.1      yamt  *
   1028  1.149.2.1      yamt  *	Chargeback parents for the sins of their children.
   1029  1.149.2.1      yamt  */
   1030  1.149.2.1      yamt void
   1031  1.149.2.1      yamt scheduler_wait_hook(struct proc *parent, struct proc *child)
   1032  1.149.2.1      yamt {
   1033  1.149.2.1      yamt 	fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
   1034  1.149.2.1      yamt 	fixpt_t estcpu;
   1035  1.149.2.1      yamt 
   1036  1.149.2.1      yamt 	/* XXX Only if parent != init?? */
   1037  1.149.2.1      yamt 
   1038  1.149.2.3      yamt 	mutex_spin_enter(&parent->p_stmutex);
   1039  1.149.2.1      yamt 	estcpu = decay_cpu_batch(loadfac, child->p_estcpu_inherited,
   1040  1.149.2.1      yamt 	    schedcpu_ticks - child->p_forktime);
   1041  1.149.2.3      yamt 	if (child->p_estcpu > estcpu)
   1042  1.149.2.1      yamt 		parent->p_estcpu =
   1043  1.149.2.1      yamt 		    ESTCPULIM(parent->p_estcpu + child->p_estcpu - estcpu);
   1044  1.149.2.3      yamt 	mutex_spin_exit(&parent->p_stmutex);
   1045  1.149.2.3      yamt }
   1046  1.149.2.3      yamt 
   1047  1.149.2.3      yamt /*
   1048  1.149.2.3      yamt  * sched_kpri:
   1049  1.149.2.3      yamt  *
   1050  1.149.2.3      yamt  *	Scale a priority level to a kernel priority level, usually
   1051  1.149.2.3      yamt  *	for an LWP that is about to sleep.
   1052  1.149.2.3      yamt  */
   1053  1.149.2.3      yamt int
   1054  1.149.2.3      yamt sched_kpri(struct lwp *l)
   1055  1.149.2.3      yamt {
   1056  1.149.2.3      yamt 	/*
   1057  1.149.2.3      yamt 	 * Scale user priorities (127 -> 50) up to kernel priorities
   1058  1.149.2.3      yamt 	 * in the range (49 -> 8).  Reserve the top 8 kernel priorities
   1059  1.149.2.3      yamt 	 * for high priority kthreads.  Kernel priorities passed in
   1060  1.149.2.3      yamt 	 * are left "as is".  XXX This is somewhat arbitrary.
   1061  1.149.2.3      yamt 	 */
   1062  1.149.2.3      yamt 	static const uint8_t kpri_tab[] = {
   1063  1.149.2.3      yamt 		 0,   1,   2,   3,   4,   5,   6,   7,
   1064  1.149.2.3      yamt 		 8,   9,  10,  11,  12,  13,  14,  15,
   1065  1.149.2.3      yamt 		16,  17,  18,  19,  20,  21,  22,  23,
   1066  1.149.2.3      yamt 		24,  25,  26,  27,  28,  29,  30,  31,
   1067  1.149.2.3      yamt 		32,  33,  34,  35,  36,  37,  38,  39,
   1068  1.149.2.3      yamt 		40,  41,  42,  43,  44,  45,  46,  47,
   1069  1.149.2.3      yamt 		48,  49,   8,   8,   9,   9,  10,  10,
   1070  1.149.2.3      yamt 		11,  11,  12,  12,  13,  14,  14,  15,
   1071  1.149.2.3      yamt 		15,  16,  16,  17,  17,  18,  18,  19,
   1072  1.149.2.3      yamt 		20,  20,  21,  21,  22,  22,  23,  23,
   1073  1.149.2.3      yamt 		24,  24,  25,  26,  26,  27,  27,  28,
   1074  1.149.2.3      yamt 		28,  29,  29,  30,  30,  31,  32,  32,
   1075  1.149.2.3      yamt 		33,  33,  34,  34,  35,  35,  36,  36,
   1076  1.149.2.3      yamt 		37,  38,  38,  39,  39,  40,  40,  41,
   1077  1.149.2.3      yamt 		41,  42,  42,  43,  44,  44,  45,  45,
   1078  1.149.2.3      yamt 		46,  46,  47,  47,  48,  48,  49,  49,
   1079  1.149.2.3      yamt 	};
   1080  1.149.2.3      yamt 
   1081  1.149.2.3      yamt 	return kpri_tab[l->l_usrpri];
   1082  1.149.2.3      yamt }
   1083  1.149.2.3      yamt 
   1084  1.149.2.3      yamt /*
   1085  1.149.2.3      yamt  * sched_unsleep:
   1086  1.149.2.3      yamt  *
   1087  1.149.2.3      yamt  *	The is called when the LWP has not been awoken normally but instead
   1088  1.149.2.3      yamt  *	interrupted: for example, if the sleep timed out.  Because of this,
   1089  1.149.2.3      yamt  *	it's not a valid action for running or idle LWPs.
   1090  1.149.2.3      yamt  */
   1091  1.149.2.3      yamt void
   1092  1.149.2.3      yamt sched_unsleep(struct lwp *l)
   1093  1.149.2.3      yamt {
   1094  1.149.2.3      yamt 
   1095  1.149.2.3      yamt 	lwp_unlock(l);
   1096  1.149.2.3      yamt 	panic("sched_unsleep");
   1097  1.149.2.3      yamt }
   1098  1.149.2.3      yamt 
   1099  1.149.2.3      yamt /*
   1100  1.149.2.3      yamt  * sched_changepri:
   1101  1.149.2.3      yamt  *
   1102  1.149.2.3      yamt  *	Adjust the priority of an LWP.
   1103  1.149.2.3      yamt  */
   1104  1.149.2.3      yamt void
   1105  1.149.2.3      yamt sched_changepri(struct lwp *l, int pri)
   1106  1.149.2.3      yamt {
   1107  1.149.2.3      yamt 
   1108  1.149.2.3      yamt 	LOCK_ASSERT(lwp_locked(l, &sched_mutex));
   1109  1.149.2.3      yamt 
   1110  1.149.2.3      yamt 	l->l_usrpri = pri;
   1111  1.149.2.3      yamt 
   1112  1.149.2.3      yamt 	if (l->l_priority < PUSER)
   1113  1.149.2.3      yamt 		return;
   1114  1.149.2.3      yamt 	if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0 ||
   1115  1.149.2.3      yamt 	    (l->l_priority / PPQ) == (pri / PPQ)) {
   1116  1.149.2.3      yamt 		l->l_priority = pri;
   1117  1.149.2.3      yamt 		return;
   1118  1.149.2.1      yamt 	}
   1119  1.149.2.3      yamt 
   1120  1.149.2.3      yamt 	remrunqueue(l);
   1121  1.149.2.3      yamt 	l->l_priority = pri;
   1122  1.149.2.3      yamt 	setrunqueue(l);
   1123  1.149.2.3      yamt 	resched_lwp(l, pri);
   1124  1.149.2.1      yamt }
   1125  1.149.2.1      yamt 
   1126  1.149.2.1      yamt /*
   1127      1.113  gmcgarry  * Low-level routines to access the run queue.  Optimised assembler
   1128      1.113  gmcgarry  * routines can override these.
   1129      1.113  gmcgarry  */
   1130      1.113  gmcgarry 
   1131      1.113  gmcgarry #ifndef __HAVE_MD_RUNQUEUE
   1132      1.115  nisimura 
   1133      1.130   nathanw /*
   1134      1.134      matt  * On some architectures, it's faster to use a MSB ordering for the priorites
   1135      1.134      matt  * than the traditional LSB ordering.
   1136      1.134      matt  */
   1137      1.134      matt #ifdef __HAVE_BIGENDIAN_BITOPS
   1138      1.134      matt #define	RQMASK(n) (0x80000000 >> (n))
   1139      1.134      matt #else
   1140      1.134      matt #define	RQMASK(n) (0x00000001 << (n))
   1141      1.134      matt #endif
   1142      1.134      matt 
   1143      1.134      matt /*
   1144      1.115  nisimura  * The primitives that manipulate the run queues.  whichqs tells which
   1145      1.115  nisimura  * of the 32 queues qs have processes in them.  Setrunqueue puts processes
   1146      1.115  nisimura  * into queues, remrunqueue removes them from queues.  The running process is
   1147      1.115  nisimura  * on no queue, other processes are on a queue related to p->p_priority,
   1148      1.115  nisimura  * divided by 4 actually to shrink the 0-127 range of priorities into the 32
   1149      1.115  nisimura  * available queues.
   1150      1.130   nathanw  */
   1151      1.146      matt #ifdef RQDEBUG
   1152      1.146      matt static void
   1153      1.146      matt checkrunqueue(int whichq, struct lwp *l)
   1154      1.146      matt {
   1155      1.146      matt 	const struct prochd * const rq = &sched_qs[whichq];
   1156      1.146      matt 	struct lwp *l2;
   1157      1.146      matt 	int found = 0;
   1158      1.146      matt 	int die = 0;
   1159      1.146      matt 	int empty = 1;
   1160  1.149.2.2      yamt 	for (l2 = rq->ph_link; l2 != (const void*) rq; l2 = l2->l_forw) {
   1161      1.146      matt 		if (l2->l_stat != LSRUN) {
   1162      1.146      matt 			printf("checkrunqueue[%d]: lwp %p state (%d) "
   1163      1.146      matt 			    " != LSRUN\n", whichq, l2, l2->l_stat);
   1164      1.146      matt 		}
   1165      1.146      matt 		if (l2->l_back->l_forw != l2) {
   1166      1.146      matt 			printf("checkrunqueue[%d]: lwp %p back-qptr (%p) "
   1167      1.146      matt 			    "corrupt %p\n", whichq, l2, l2->l_back,
   1168      1.146      matt 			    l2->l_back->l_forw);
   1169      1.146      matt 			die = 1;
   1170      1.146      matt 		}
   1171      1.146      matt 		if (l2->l_forw->l_back != l2) {
   1172      1.146      matt 			printf("checkrunqueue[%d]: lwp %p forw-qptr (%p) "
   1173      1.146      matt 			    "corrupt %p\n", whichq, l2, l2->l_forw,
   1174      1.146      matt 			    l2->l_forw->l_back);
   1175      1.146      matt 			die = 1;
   1176      1.146      matt 		}
   1177      1.146      matt 		if (l2 == l)
   1178      1.146      matt 			found = 1;
   1179      1.146      matt 		empty = 0;
   1180      1.146      matt 	}
   1181      1.146      matt 	if (empty && (sched_whichqs & RQMASK(whichq)) != 0) {
   1182      1.146      matt 		printf("checkrunqueue[%d]: bit set for empty run-queue %p\n",
   1183      1.146      matt 		    whichq, rq);
   1184      1.146      matt 		die = 1;
   1185      1.146      matt 	} else if (!empty && (sched_whichqs & RQMASK(whichq)) == 0) {
   1186      1.146      matt 		printf("checkrunqueue[%d]: bit clear for non-empty "
   1187      1.146      matt 		    "run-queue %p\n", whichq, rq);
   1188      1.146      matt 		die = 1;
   1189      1.146      matt 	}
   1190      1.146      matt 	if (l != NULL && (sched_whichqs & RQMASK(whichq)) == 0) {
   1191      1.146      matt 		printf("checkrunqueue[%d]: bit clear for active lwp %p\n",
   1192      1.146      matt 		    whichq, l);
   1193      1.146      matt 		die = 1;
   1194      1.146      matt 	}
   1195      1.146      matt 	if (l != NULL && empty) {
   1196      1.146      matt 		printf("checkrunqueue[%d]: empty run-queue %p with "
   1197      1.146      matt 		    "active lwp %p\n", whichq, rq, l);
   1198      1.146      matt 		die = 1;
   1199      1.146      matt 	}
   1200      1.146      matt 	if (l != NULL && !found) {
   1201      1.146      matt 		printf("checkrunqueue[%d]: lwp %p not in runqueue %p!",
   1202      1.146      matt 		    whichq, l, rq);
   1203      1.146      matt 		die = 1;
   1204      1.146      matt 	}
   1205      1.146      matt 	if (die)
   1206      1.146      matt 		panic("checkrunqueue: inconsistency found");
   1207      1.146      matt }
   1208      1.146      matt #endif /* RQDEBUG */
   1209      1.146      matt 
   1210      1.113  gmcgarry void
   1211      1.122   thorpej setrunqueue(struct lwp *l)
   1212      1.113  gmcgarry {
   1213      1.113  gmcgarry 	struct prochd *rq;
   1214      1.122   thorpej 	struct lwp *prev;
   1215  1.149.2.1      yamt 	const int whichq = l->l_priority / PPQ;
   1216      1.113  gmcgarry 
   1217  1.149.2.3      yamt 	LOCK_ASSERT(lwp_locked(l, &sched_mutex));
   1218  1.149.2.3      yamt 
   1219      1.146      matt #ifdef RQDEBUG
   1220      1.146      matt 	checkrunqueue(whichq, NULL);
   1221      1.146      matt #endif
   1222      1.113  gmcgarry #ifdef DIAGNOSTIC
   1223  1.149.2.3      yamt 	if (l->l_back != NULL || l->l_stat != LSRUN)
   1224      1.113  gmcgarry 		panic("setrunqueue");
   1225      1.113  gmcgarry #endif
   1226      1.134      matt 	sched_whichqs |= RQMASK(whichq);
   1227      1.113  gmcgarry 	rq = &sched_qs[whichq];
   1228      1.113  gmcgarry 	prev = rq->ph_rlink;
   1229      1.122   thorpej 	l->l_forw = (struct lwp *)rq;
   1230      1.122   thorpej 	rq->ph_rlink = l;
   1231      1.122   thorpej 	prev->l_forw = l;
   1232      1.122   thorpej 	l->l_back = prev;
   1233      1.146      matt #ifdef RQDEBUG
   1234      1.146      matt 	checkrunqueue(whichq, l);
   1235      1.146      matt #endif
   1236      1.113  gmcgarry }
   1237      1.113  gmcgarry 
   1238  1.149.2.3      yamt /*
   1239  1.149.2.3      yamt  * XXXSMP When LWP dispatch (cpu_switch()) is changed to use remrunqueue(),
   1240  1.149.2.3      yamt  * drop of the effective priority level from kernel to user needs to be
   1241  1.149.2.3      yamt  * moved here from userret().  The assignment in userret() is currently
   1242  1.149.2.3      yamt  * done unlocked.
   1243  1.149.2.3      yamt  */
   1244      1.113  gmcgarry void
   1245      1.122   thorpej remrunqueue(struct lwp *l)
   1246      1.113  gmcgarry {
   1247      1.122   thorpej 	struct lwp *prev, *next;
   1248  1.149.2.1      yamt 	const int whichq = l->l_priority / PPQ;
   1249  1.149.2.3      yamt 
   1250  1.149.2.3      yamt 	LOCK_ASSERT(lwp_locked(l, &sched_mutex));
   1251  1.149.2.3      yamt 
   1252      1.146      matt #ifdef RQDEBUG
   1253      1.146      matt 	checkrunqueue(whichq, l);
   1254      1.146      matt #endif
   1255  1.149.2.3      yamt 
   1256  1.149.2.3      yamt #if defined(DIAGNOSTIC)
   1257  1.149.2.3      yamt 	if (((sched_whichqs & RQMASK(whichq)) == 0) || l->l_back == NULL) {
   1258  1.149.2.3      yamt 		/* Shouldn't happen - interrupts disabled. */
   1259      1.146      matt 		panic("remrunqueue: bit %d not set", whichq);
   1260  1.149.2.3      yamt 	}
   1261      1.113  gmcgarry #endif
   1262      1.122   thorpej 	prev = l->l_back;
   1263      1.122   thorpej 	l->l_back = NULL;
   1264      1.122   thorpej 	next = l->l_forw;
   1265      1.122   thorpej 	prev->l_forw = next;
   1266      1.122   thorpej 	next->l_back = prev;
   1267      1.113  gmcgarry 	if (prev == next)
   1268      1.134      matt 		sched_whichqs &= ~RQMASK(whichq);
   1269      1.146      matt #ifdef RQDEBUG
   1270      1.146      matt 	checkrunqueue(whichq, NULL);
   1271      1.146      matt #endif
   1272      1.113  gmcgarry }
   1273      1.113  gmcgarry 
   1274      1.134      matt #undef RQMASK
   1275      1.134      matt #endif /* !defined(__HAVE_MD_RUNQUEUE) */
   1276