Home | History | Annotate | Line # | Download | only in kern
kern_synch.c revision 1.149.2.4
      1  1.149.2.4      yamt /*	$NetBSD: kern_synch.c,v 1.149.2.4 2007/09/03 14:40:56 yamt Exp $	*/
      2       1.63   thorpej 
      3       1.63   thorpej /*-
      4  1.149.2.3      yamt  * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc.
      5       1.63   thorpej  * All rights reserved.
      6       1.63   thorpej  *
      7       1.63   thorpej  * This code is derived from software contributed to The NetBSD Foundation
      8       1.63   thorpej  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9  1.149.2.4      yamt  * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and
     10  1.149.2.4      yamt  * Daniel Sieger.
     11       1.63   thorpej  *
     12       1.63   thorpej  * Redistribution and use in source and binary forms, with or without
     13       1.63   thorpej  * modification, are permitted provided that the following conditions
     14       1.63   thorpej  * are met:
     15       1.63   thorpej  * 1. Redistributions of source code must retain the above copyright
     16       1.63   thorpej  *    notice, this list of conditions and the following disclaimer.
     17       1.63   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     18       1.63   thorpej  *    notice, this list of conditions and the following disclaimer in the
     19       1.63   thorpej  *    documentation and/or other materials provided with the distribution.
     20       1.63   thorpej  * 3. All advertising materials mentioning features or use of this software
     21       1.63   thorpej  *    must display the following acknowledgement:
     22       1.63   thorpej  *	This product includes software developed by the NetBSD
     23       1.63   thorpej  *	Foundation, Inc. and its contributors.
     24       1.63   thorpej  * 4. Neither the name of The NetBSD Foundation nor the names of its
     25       1.63   thorpej  *    contributors may be used to endorse or promote products derived
     26       1.63   thorpej  *    from this software without specific prior written permission.
     27       1.63   thorpej  *
     28       1.63   thorpej  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     29       1.63   thorpej  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     30       1.63   thorpej  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     31       1.63   thorpej  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     32       1.63   thorpej  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     33       1.63   thorpej  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     34       1.63   thorpej  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     35       1.63   thorpej  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     36       1.63   thorpej  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     37       1.63   thorpej  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     38       1.63   thorpej  * POSSIBILITY OF SUCH DAMAGE.
     39       1.63   thorpej  */
     40       1.26       cgd 
     41       1.26       cgd /*-
     42       1.26       cgd  * Copyright (c) 1982, 1986, 1990, 1991, 1993
     43       1.26       cgd  *	The Regents of the University of California.  All rights reserved.
     44       1.26       cgd  * (c) UNIX System Laboratories, Inc.
     45       1.26       cgd  * All or some portions of this file are derived from material licensed
     46       1.26       cgd  * to the University of California by American Telephone and Telegraph
     47       1.26       cgd  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     48       1.26       cgd  * the permission of UNIX System Laboratories, Inc.
     49       1.26       cgd  *
     50       1.26       cgd  * Redistribution and use in source and binary forms, with or without
     51       1.26       cgd  * modification, are permitted provided that the following conditions
     52       1.26       cgd  * are met:
     53       1.26       cgd  * 1. Redistributions of source code must retain the above copyright
     54       1.26       cgd  *    notice, this list of conditions and the following disclaimer.
     55       1.26       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     56       1.26       cgd  *    notice, this list of conditions and the following disclaimer in the
     57       1.26       cgd  *    documentation and/or other materials provided with the distribution.
     58      1.136       agc  * 3. Neither the name of the University nor the names of its contributors
     59       1.26       cgd  *    may be used to endorse or promote products derived from this software
     60       1.26       cgd  *    without specific prior written permission.
     61       1.26       cgd  *
     62       1.26       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     63       1.26       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     64       1.26       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     65       1.26       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     66       1.26       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     67       1.26       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     68       1.26       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     69       1.26       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     70       1.26       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     71       1.26       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     72       1.26       cgd  * SUCH DAMAGE.
     73       1.26       cgd  *
     74       1.50      fvdl  *	@(#)kern_synch.c	8.9 (Berkeley) 5/19/95
     75       1.26       cgd  */
     76      1.106     lukem 
     77      1.106     lukem #include <sys/cdefs.h>
     78  1.149.2.4      yamt __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.149.2.4 2007/09/03 14:40:56 yamt Exp $");
     79       1.48       mrg 
     80      1.109      yamt #include "opt_kstack.h"
     81       1.82   thorpej #include "opt_lockdebug.h"
     82       1.83   thorpej #include "opt_multiprocessor.h"
     83      1.110    briggs #include "opt_perfctrs.h"
     84       1.26       cgd 
     85  1.149.2.3      yamt #define	__MUTEX_PRIVATE
     86  1.149.2.3      yamt 
     87       1.26       cgd #include <sys/param.h>
     88       1.26       cgd #include <sys/systm.h>
     89       1.26       cgd #include <sys/proc.h>
     90       1.26       cgd #include <sys/kernel.h>
     91      1.111    briggs #if defined(PERFCTRS)
     92      1.110    briggs #include <sys/pmc.h>
     93      1.111    briggs #endif
     94  1.149.2.4      yamt #include <sys/cpu.h>
     95       1.26       cgd #include <sys/resourcevar.h>
     96       1.55      ross #include <sys/sched.h>
     97  1.149.2.3      yamt #include <sys/syscall_stats.h>
     98  1.149.2.3      yamt #include <sys/sleepq.h>
     99  1.149.2.3      yamt #include <sys/lockdebug.h>
    100  1.149.2.4      yamt #include <sys/evcnt.h>
    101       1.47       mrg 
    102       1.47       mrg #include <uvm/uvm_extern.h>
    103       1.47       mrg 
    104  1.149.2.4      yamt callout_t sched_pstats_ch;
    105  1.149.2.4      yamt unsigned int sched_pstats_ticks;
    106       1.34  christos 
    107  1.149.2.4      yamt kcondvar_t	lbolt;			/* once a second sleep address */
    108       1.26       cgd 
    109  1.149.2.4      yamt static void	sched_unsleep(struct lwp *);
    110  1.149.2.4      yamt static void	sched_changepri(struct lwp *, pri_t);
    111  1.149.2.4      yamt static void	sched_lendpri(struct lwp *, pri_t);
    112      1.122   thorpej 
    113  1.149.2.3      yamt syncobj_t sleep_syncobj = {
    114  1.149.2.3      yamt 	SOBJ_SLEEPQ_SORTED,
    115  1.149.2.3      yamt 	sleepq_unsleep,
    116  1.149.2.4      yamt 	sleepq_changepri,
    117  1.149.2.4      yamt 	sleepq_lendpri,
    118  1.149.2.4      yamt 	syncobj_noowner,
    119  1.149.2.3      yamt };
    120  1.149.2.3      yamt 
    121  1.149.2.3      yamt syncobj_t sched_syncobj = {
    122  1.149.2.3      yamt 	SOBJ_SLEEPQ_SORTED,
    123  1.149.2.3      yamt 	sched_unsleep,
    124  1.149.2.4      yamt 	sched_changepri,
    125  1.149.2.4      yamt 	sched_lendpri,
    126  1.149.2.4      yamt 	syncobj_noowner,
    127  1.149.2.3      yamt };
    128      1.122   thorpej 
    129       1.26       cgd /*
    130  1.149.2.3      yamt  * During autoconfiguration or after a panic, a sleep will simply lower the
    131  1.149.2.3      yamt  * priority briefly to allow interrupts, then return.  The priority to be
    132  1.149.2.3      yamt  * used (safepri) is machine-dependent, thus this value is initialized and
    133  1.149.2.3      yamt  * maintained in the machine-dependent layers.  This priority will typically
    134  1.149.2.3      yamt  * be 0, or the lowest priority that is safe for use on the interrupt stack;
    135  1.149.2.3      yamt  * it can be made higher to block network software interrupts after panics.
    136       1.26       cgd  */
    137  1.149.2.3      yamt int	safepri;
    138       1.26       cgd 
    139       1.26       cgd /*
    140  1.149.2.3      yamt  * OBSOLETE INTERFACE
    141  1.149.2.3      yamt  *
    142       1.26       cgd  * General sleep call.  Suspends the current process until a wakeup is
    143       1.26       cgd  * performed on the specified identifier.  The process will then be made
    144  1.149.2.3      yamt  * runnable with the specified priority.  Sleeps at most timo/hz seconds (0
    145  1.149.2.3      yamt  * means no timeout).  If pri includes PCATCH flag, signals are checked
    146       1.26       cgd  * before and after sleeping, else signals are not checked.  Returns 0 if
    147       1.26       cgd  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
    148       1.26       cgd  * signal needs to be delivered, ERESTART is returned if the current system
    149       1.26       cgd  * call should be restarted if possible, and EINTR is returned if the system
    150       1.26       cgd  * call should be interrupted by the signal (return EINTR).
    151       1.77   thorpej  *
    152  1.149.2.3      yamt  * The interlock is held until we are on a sleep queue. The interlock will
    153  1.149.2.3      yamt  * be locked before returning back to the caller unless the PNORELOCK flag
    154  1.149.2.3      yamt  * is specified, in which case the interlock will always be unlocked upon
    155  1.149.2.3      yamt  * return.
    156       1.26       cgd  */
    157       1.26       cgd int
    158  1.149.2.4      yamt ltsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
    159  1.149.2.3      yamt 	volatile struct simplelock *interlock)
    160       1.26       cgd {
    161      1.122   thorpej 	struct lwp *l = curlwp;
    162  1.149.2.3      yamt 	sleepq_t *sq;
    163  1.149.2.4      yamt 	int error;
    164       1.26       cgd 
    165  1.149.2.3      yamt 	if (sleepq_dontsleep(l)) {
    166  1.149.2.3      yamt 		(void)sleepq_abort(NULL, 0);
    167  1.149.2.3      yamt 		if ((priority & PNORELOCK) != 0)
    168       1.77   thorpej 			simple_unlock(interlock);
    169  1.149.2.3      yamt 		return 0;
    170      1.122   thorpej 	}
    171       1.77   thorpej 
    172  1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, ident);
    173  1.149.2.3      yamt 	sleepq_enter(sq, l);
    174  1.149.2.4      yamt 	sleepq_enqueue(sq, priority & PRIMASK, ident, wmesg, &sleep_syncobj);
    175       1.77   thorpej 
    176  1.149.2.3      yamt 	if (interlock != NULL) {
    177  1.149.2.3      yamt 		LOCK_ASSERT(simple_lock_held(interlock));
    178       1.77   thorpej 		simple_unlock(interlock);
    179       1.26       cgd 	}
    180      1.147     perry 
    181  1.149.2.4      yamt 	error = sleepq_block(timo, priority & PCATCH);
    182      1.139        cl 
    183  1.149.2.3      yamt 	if (interlock != NULL && (priority & PNORELOCK) == 0)
    184  1.149.2.3      yamt 		simple_lock(interlock);
    185  1.149.2.3      yamt 
    186  1.149.2.3      yamt 	return error;
    187      1.139        cl }
    188      1.139        cl 
    189  1.149.2.4      yamt int
    190  1.149.2.4      yamt mtsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
    191  1.149.2.4      yamt 	kmutex_t *mtx)
    192  1.149.2.4      yamt {
    193  1.149.2.4      yamt 	struct lwp *l = curlwp;
    194  1.149.2.4      yamt 	sleepq_t *sq;
    195  1.149.2.4      yamt 	int error;
    196  1.149.2.4      yamt 
    197  1.149.2.4      yamt 	if (sleepq_dontsleep(l)) {
    198  1.149.2.4      yamt 		(void)sleepq_abort(mtx, (priority & PNORELOCK) != 0);
    199  1.149.2.4      yamt 		return 0;
    200  1.149.2.4      yamt 	}
    201  1.149.2.4      yamt 
    202  1.149.2.4      yamt 	sq = sleeptab_lookup(&sleeptab, ident);
    203  1.149.2.4      yamt 	sleepq_enter(sq, l);
    204  1.149.2.4      yamt 	sleepq_enqueue(sq, priority & PRIMASK, ident, wmesg, &sleep_syncobj);
    205  1.149.2.4      yamt 	mutex_exit(mtx);
    206  1.149.2.4      yamt 	error = sleepq_block(timo, priority & PCATCH);
    207  1.149.2.4      yamt 
    208  1.149.2.4      yamt 	if ((priority & PNORELOCK) == 0)
    209  1.149.2.4      yamt 		mutex_enter(mtx);
    210  1.149.2.4      yamt 
    211  1.149.2.4      yamt 	return error;
    212  1.149.2.4      yamt }
    213  1.149.2.4      yamt 
    214       1.26       cgd /*
    215  1.149.2.3      yamt  * General sleep call for situations where a wake-up is not expected.
    216       1.63   thorpej  */
    217  1.149.2.3      yamt int
    218  1.149.2.3      yamt kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx)
    219       1.83   thorpej {
    220  1.149.2.3      yamt 	struct lwp *l = curlwp;
    221  1.149.2.3      yamt 	sleepq_t *sq;
    222  1.149.2.3      yamt 	int error;
    223       1.83   thorpej 
    224  1.149.2.3      yamt 	if (sleepq_dontsleep(l))
    225  1.149.2.3      yamt 		return sleepq_abort(NULL, 0);
    226       1.63   thorpej 
    227  1.149.2.3      yamt 	if (mtx != NULL)
    228  1.149.2.3      yamt 		mutex_exit(mtx);
    229  1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, l);
    230  1.149.2.3      yamt 	sleepq_enter(sq, l);
    231  1.149.2.4      yamt 	sleepq_enqueue(sq, sched_kpri(l), l, wmesg, &sleep_syncobj);
    232  1.149.2.4      yamt 	error = sleepq_block(timo, intr);
    233  1.149.2.3      yamt 	if (mtx != NULL)
    234  1.149.2.3      yamt 		mutex_enter(mtx);
    235       1.83   thorpej 
    236  1.149.2.3      yamt 	return error;
    237       1.83   thorpej }
    238       1.83   thorpej 
    239       1.63   thorpej /*
    240  1.149.2.3      yamt  * OBSOLETE INTERFACE
    241  1.149.2.3      yamt  *
    242       1.26       cgd  * Make all processes sleeping on the specified identifier runnable.
    243       1.26       cgd  */
    244       1.26       cgd void
    245  1.149.2.3      yamt wakeup(wchan_t ident)
    246       1.26       cgd {
    247  1.149.2.3      yamt 	sleepq_t *sq;
    248       1.83   thorpej 
    249  1.149.2.3      yamt 	if (cold)
    250  1.149.2.3      yamt 		return;
    251       1.83   thorpej 
    252  1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, ident);
    253  1.149.2.3      yamt 	sleepq_wake(sq, ident, (u_int)-1);
    254       1.63   thorpej }
    255       1.63   thorpej 
    256       1.63   thorpej /*
    257  1.149.2.3      yamt  * OBSOLETE INTERFACE
    258  1.149.2.3      yamt  *
    259       1.63   thorpej  * Make the highest priority process first in line on the specified
    260       1.63   thorpej  * identifier runnable.
    261       1.63   thorpej  */
    262  1.149.2.3      yamt void
    263  1.149.2.3      yamt wakeup_one(wchan_t ident)
    264       1.63   thorpej {
    265  1.149.2.3      yamt 	sleepq_t *sq;
    266       1.63   thorpej 
    267  1.149.2.3      yamt 	if (cold)
    268  1.149.2.3      yamt 		return;
    269  1.149.2.4      yamt 
    270  1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, ident);
    271  1.149.2.3      yamt 	sleepq_wake(sq, ident, 1);
    272      1.117  gmcgarry }
    273      1.117  gmcgarry 
    274  1.149.2.3      yamt 
    275      1.117  gmcgarry /*
    276      1.117  gmcgarry  * General yield call.  Puts the current process back on its run queue and
    277      1.117  gmcgarry  * performs a voluntary context switch.  Should only be called when the
    278      1.117  gmcgarry  * current process explicitly requests it (eg sched_yield(2) in compat code).
    279      1.117  gmcgarry  */
    280      1.117  gmcgarry void
    281      1.117  gmcgarry yield(void)
    282      1.117  gmcgarry {
    283      1.122   thorpej 	struct lwp *l = curlwp;
    284      1.117  gmcgarry 
    285  1.149.2.3      yamt 	KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
    286  1.149.2.3      yamt 	lwp_lock(l);
    287  1.149.2.4      yamt 	KASSERT(lwp_locked(l, &l->l_cpu->ci_schedstate.spc_lwplock));
    288  1.149.2.4      yamt 	KASSERT(l->l_stat == LSONPROC);
    289  1.149.2.4      yamt 	l->l_priority = l->l_usrpri;
    290  1.149.2.4      yamt 	(void)mi_switch(l);
    291  1.149.2.3      yamt 	KERNEL_LOCK(l->l_biglocks, l);
    292       1.69   thorpej }
    293       1.69   thorpej 
    294       1.69   thorpej /*
    295       1.69   thorpej  * General preemption call.  Puts the current process back on its run queue
    296  1.149.2.1      yamt  * and performs an involuntary context switch.
    297       1.69   thorpej  */
    298       1.69   thorpej void
    299  1.149.2.3      yamt preempt(void)
    300       1.69   thorpej {
    301      1.122   thorpej 	struct lwp *l = curlwp;
    302       1.69   thorpej 
    303  1.149.2.3      yamt 	KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
    304  1.149.2.3      yamt 	lwp_lock(l);
    305  1.149.2.4      yamt 	KASSERT(lwp_locked(l, &l->l_cpu->ci_schedstate.spc_lwplock));
    306  1.149.2.4      yamt 	KASSERT(l->l_stat == LSONPROC);
    307  1.149.2.4      yamt 	l->l_priority = l->l_usrpri;
    308  1.149.2.3      yamt 	l->l_nivcsw++;
    309  1.149.2.4      yamt 	(void)mi_switch(l);
    310  1.149.2.3      yamt 	KERNEL_LOCK(l->l_biglocks, l);
    311       1.69   thorpej }
    312       1.69   thorpej 
    313       1.69   thorpej /*
    314  1.149.2.4      yamt  * Compute the amount of time during which the current lwp was running.
    315      1.130   nathanw  *
    316  1.149.2.4      yamt  * - update l_rtime unless it's an idle lwp.
    317  1.149.2.4      yamt  * - update spc_runtime for the next lwp.
    318  1.149.2.4      yamt  */
    319  1.149.2.4      yamt 
    320  1.149.2.4      yamt static inline void
    321  1.149.2.4      yamt updatertime(struct lwp *l, struct schedstate_percpu *spc)
    322  1.149.2.4      yamt {
    323  1.149.2.4      yamt 	struct timeval tv;
    324  1.149.2.4      yamt 	long s, u;
    325  1.149.2.4      yamt 
    326  1.149.2.4      yamt 	if ((l->l_flag & LW_IDLE) != 0) {
    327  1.149.2.4      yamt 		microtime(&spc->spc_runtime);
    328  1.149.2.4      yamt 		return;
    329  1.149.2.4      yamt 	}
    330  1.149.2.4      yamt 
    331  1.149.2.4      yamt 	microtime(&tv);
    332  1.149.2.4      yamt 	u = l->l_rtime.tv_usec + (tv.tv_usec - spc->spc_runtime.tv_usec);
    333  1.149.2.4      yamt 	s = l->l_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
    334  1.149.2.4      yamt 	if (u < 0) {
    335  1.149.2.4      yamt 		u += 1000000;
    336  1.149.2.4      yamt 		s--;
    337  1.149.2.4      yamt 	} else if (u >= 1000000) {
    338  1.149.2.4      yamt 		u -= 1000000;
    339  1.149.2.4      yamt 		s++;
    340  1.149.2.4      yamt 	}
    341  1.149.2.4      yamt 	l->l_rtime.tv_usec = u;
    342  1.149.2.4      yamt 	l->l_rtime.tv_sec = s;
    343  1.149.2.4      yamt 
    344  1.149.2.4      yamt 	spc->spc_runtime = tv;
    345  1.149.2.4      yamt }
    346  1.149.2.4      yamt 
    347  1.149.2.4      yamt /*
    348  1.149.2.4      yamt  * The machine independent parts of context switch.
    349  1.149.2.4      yamt  *
    350  1.149.2.4      yamt  * Returns 1 if another LWP was actually run.
    351       1.26       cgd  */
    352      1.122   thorpej int
    353  1.149.2.4      yamt mi_switch(struct lwp *l)
    354       1.26       cgd {
    355       1.76   thorpej 	struct schedstate_percpu *spc;
    356  1.149.2.4      yamt 	struct lwp *newl;
    357  1.149.2.3      yamt 	int retval, oldspl;
    358       1.85  sommerfe 
    359  1.149.2.4      yamt 	KASSERT(lwp_locked(l, NULL));
    360  1.149.2.4      yamt 	LOCKDEBUG_BARRIER(l->l_mutex, 1);
    361       1.76   thorpej 
    362  1.149.2.3      yamt #ifdef KSTACK_CHECK_MAGIC
    363  1.149.2.3      yamt 	kstack_check_magic(l);
    364  1.149.2.3      yamt #endif
    365  1.149.2.3      yamt 
    366  1.149.2.3      yamt 	/*
    367  1.149.2.3      yamt 	 * It's safe to read the per CPU schedstate unlocked here, as all we
    368  1.149.2.3      yamt 	 * are after is the run time and that's guarenteed to have been last
    369  1.149.2.3      yamt 	 * updated by this CPU.
    370  1.149.2.3      yamt 	 */
    371  1.149.2.3      yamt 	KDASSERT(l->l_cpu == curcpu());
    372       1.81   thorpej 
    373       1.26       cgd 	/*
    374  1.149.2.4      yamt 	 * Process is about to yield the CPU; clear the appropriate
    375  1.149.2.4      yamt 	 * scheduling flags.
    376       1.26       cgd 	 */
    377  1.149.2.4      yamt 	spc = &l->l_cpu->ci_schedstate;
    378  1.149.2.4      yamt 	newl = NULL;
    379  1.149.2.4      yamt 
    380  1.149.2.4      yamt 	if (l->l_switchto != NULL) {
    381  1.149.2.4      yamt 		newl = l->l_switchto;
    382  1.149.2.4      yamt 		l->l_switchto = NULL;
    383       1.26       cgd 	}
    384  1.149.2.3      yamt 
    385  1.149.2.3      yamt 	/* Count time spent in current system call */
    386  1.149.2.3      yamt 	SYSCALL_TIME_SLEEP(l);
    387       1.26       cgd 
    388       1.26       cgd 	/*
    389  1.149.2.4      yamt 	 * XXXSMP If we are using h/w performance counters,
    390  1.149.2.4      yamt 	 * save context.
    391       1.69   thorpej 	 */
    392  1.149.2.3      yamt #if PERFCTRS
    393  1.149.2.3      yamt 	if (PMC_ENABLED(l->l_proc)) {
    394  1.149.2.3      yamt 		pmc_save_context(l->l_proc);
    395  1.149.2.3      yamt 	}
    396      1.109      yamt #endif
    397  1.149.2.4      yamt 	updatertime(l, spc);
    398      1.113  gmcgarry 
    399      1.113  gmcgarry 	/*
    400  1.149.2.3      yamt 	 * If on the CPU and we have gotten this far, then we must yield.
    401      1.113  gmcgarry 	 */
    402  1.149.2.4      yamt 	mutex_spin_enter(spc->spc_mutex);
    403  1.149.2.4      yamt 	spc->spc_flags &= ~SPCF_SWITCHCLEAR;
    404  1.149.2.3      yamt 	KASSERT(l->l_stat != LSRUN);
    405  1.149.2.3      yamt 	if (l->l_stat == LSONPROC) {
    406  1.149.2.4      yamt 		KASSERT(lwp_locked(l, &spc->spc_lwplock));
    407  1.149.2.4      yamt 		if ((l->l_flag & LW_IDLE) == 0) {
    408  1.149.2.4      yamt 			l->l_stat = LSRUN;
    409  1.149.2.4      yamt 			lwp_setlock(l, spc->spc_mutex);
    410  1.149.2.4      yamt 			sched_enqueue(l, true);
    411  1.149.2.4      yamt 		} else
    412  1.149.2.4      yamt 			l->l_stat = LSIDL;
    413  1.149.2.3      yamt 	}
    414  1.149.2.3      yamt 
    415  1.149.2.3      yamt 	/*
    416  1.149.2.4      yamt 	 * Let sched_nextlwp() select the LWP to run the CPU next.
    417  1.149.2.4      yamt 	 * If no LWP is runnable, switch to the idle LWP.
    418  1.149.2.3      yamt 	 */
    419  1.149.2.4      yamt 	if (newl == NULL) {
    420  1.149.2.4      yamt 		newl = sched_nextlwp();
    421  1.149.2.4      yamt 		if (newl != NULL) {
    422  1.149.2.4      yamt 			sched_dequeue(newl);
    423  1.149.2.4      yamt 			KASSERT(lwp_locked(newl, spc->spc_mutex));
    424  1.149.2.4      yamt 			newl->l_stat = LSONPROC;
    425  1.149.2.4      yamt 			newl->l_cpu = l->l_cpu;
    426  1.149.2.4      yamt 			newl->l_flag |= LW_RUNNING;
    427  1.149.2.4      yamt 			lwp_setlock(newl, &spc->spc_lwplock);
    428  1.149.2.4      yamt 		} else {
    429  1.149.2.4      yamt 			newl = l->l_cpu->ci_data.cpu_idlelwp;
    430  1.149.2.4      yamt 			newl->l_stat = LSONPROC;
    431  1.149.2.4      yamt 			newl->l_flag |= LW_RUNNING;
    432  1.149.2.4      yamt 		}
    433  1.149.2.4      yamt 		spc->spc_curpriority = newl->l_usrpri;
    434  1.149.2.4      yamt 		newl->l_priority = newl->l_usrpri;
    435  1.149.2.4      yamt 		cpu_did_resched();
    436  1.149.2.4      yamt 	}
    437  1.149.2.3      yamt 
    438  1.149.2.4      yamt 	if (l != newl) {
    439  1.149.2.4      yamt 		struct lwp *prevlwp;
    440  1.149.2.3      yamt 
    441  1.149.2.4      yamt 		/*
    442  1.149.2.4      yamt 		 * If the old LWP has been moved to a run queue above,
    443  1.149.2.4      yamt 		 * drop the general purpose LWP lock: it's now locked
    444  1.149.2.4      yamt 		 * by the scheduler lock.
    445  1.149.2.4      yamt 		 *
    446  1.149.2.4      yamt 		 * Otherwise, drop the scheduler lock.  We're done with
    447  1.149.2.4      yamt 		 * the run queues for now.
    448  1.149.2.4      yamt 		 */
    449  1.149.2.4      yamt 		if (l->l_mutex == spc->spc_mutex) {
    450  1.149.2.4      yamt 			mutex_spin_exit(&spc->spc_lwplock);
    451  1.149.2.4      yamt 		} else {
    452  1.149.2.4      yamt 			mutex_spin_exit(spc->spc_mutex);
    453  1.149.2.4      yamt 		}
    454  1.149.2.4      yamt 
    455  1.149.2.4      yamt 		/* Unlocked, but for statistics only. */
    456  1.149.2.4      yamt 		uvmexp.swtch++;
    457  1.149.2.4      yamt 
    458  1.149.2.4      yamt 		/* Save old VM context. */
    459  1.149.2.4      yamt 		pmap_deactivate(l);
    460  1.149.2.4      yamt 
    461  1.149.2.4      yamt 		/* Switch to the new LWP.. */
    462  1.149.2.4      yamt 		l->l_ncsw++;
    463  1.149.2.4      yamt 		l->l_flag &= ~LW_RUNNING;
    464  1.149.2.4      yamt 		oldspl = MUTEX_SPIN_OLDSPL(l->l_cpu);
    465  1.149.2.4      yamt 		prevlwp = cpu_switchto(l, newl);
    466  1.149.2.4      yamt 
    467  1.149.2.4      yamt 		/*
    468  1.149.2.4      yamt 		 * .. we have switched away and are now back so we must
    469  1.149.2.4      yamt 		 * be the new curlwp.  prevlwp is who we replaced.
    470  1.149.2.4      yamt 		 */
    471  1.149.2.4      yamt 		curlwp = l;
    472  1.149.2.4      yamt 		if (prevlwp != NULL) {
    473  1.149.2.4      yamt 			curcpu()->ci_mtx_oldspl = oldspl;
    474  1.149.2.4      yamt 			lwp_unlock(prevlwp);
    475  1.149.2.4      yamt 		} else {
    476  1.149.2.4      yamt 			splx(oldspl);
    477  1.149.2.4      yamt 		}
    478  1.149.2.3      yamt 
    479  1.149.2.4      yamt 		/* Restore VM context. */
    480  1.149.2.4      yamt 		pmap_activate(l);
    481  1.149.2.4      yamt 		retval = 1;
    482  1.149.2.4      yamt 	} else {
    483  1.149.2.4      yamt 		/* Nothing to do - just unlock and return. */
    484  1.149.2.4      yamt 		mutex_spin_exit(spc->spc_mutex);
    485  1.149.2.4      yamt 		lwp_unlock(l);
    486      1.122   thorpej 		retval = 0;
    487      1.122   thorpej 	}
    488      1.110    briggs 
    489  1.149.2.4      yamt 	KASSERT(l == curlwp);
    490  1.149.2.4      yamt 	KASSERT(l->l_stat == LSONPROC);
    491  1.149.2.4      yamt 
    492      1.110    briggs 	/*
    493  1.149.2.3      yamt 	 * XXXSMP If we are using h/w performance counters, restore context.
    494       1.26       cgd 	 */
    495      1.114  gmcgarry #if PERFCTRS
    496  1.149.2.3      yamt 	if (PMC_ENABLED(l->l_proc)) {
    497  1.149.2.3      yamt 		pmc_restore_context(l->l_proc);
    498  1.149.2.2      yamt 	}
    499      1.114  gmcgarry #endif
    500      1.110    briggs 
    501      1.110    briggs 	/*
    502       1.76   thorpej 	 * We're running again; record our new start time.  We might
    503  1.149.2.3      yamt 	 * be running on a new CPU now, so don't use the cached
    504       1.76   thorpej 	 * schedstate_percpu pointer.
    505       1.76   thorpej 	 */
    506  1.149.2.3      yamt 	SYSCALL_TIME_WAKEUP(l);
    507      1.122   thorpej 	KDASSERT(l->l_cpu == curcpu());
    508  1.149.2.4      yamt 	LOCKDEBUG_BARRIER(NULL, 1);
    509  1.149.2.2      yamt 
    510      1.122   thorpej 	return retval;
    511       1.26       cgd }
    512       1.26       cgd 
    513       1.26       cgd /*
    514  1.149.2.3      yamt  * Change process state to be runnable, placing it on the run queue if it is
    515  1.149.2.3      yamt  * in memory, and awakening the swapper if it isn't in memory.
    516  1.149.2.3      yamt  *
    517  1.149.2.3      yamt  * Call with the process and LWP locked.  Will return with the LWP unlocked.
    518       1.26       cgd  */
    519       1.26       cgd void
    520      1.122   thorpej setrunnable(struct lwp *l)
    521       1.26       cgd {
    522      1.122   thorpej 	struct proc *p = l->l_proc;
    523  1.149.2.3      yamt 	sigset_t *ss;
    524       1.26       cgd 
    525  1.149.2.4      yamt 	KASSERT((l->l_flag & LW_IDLE) == 0);
    526  1.149.2.3      yamt 	KASSERT(mutex_owned(&p->p_smutex));
    527  1.149.2.3      yamt 	KASSERT(lwp_locked(l, NULL));
    528       1.83   thorpej 
    529      1.122   thorpej 	switch (l->l_stat) {
    530      1.122   thorpej 	case LSSTOP:
    531       1.33   mycroft 		/*
    532       1.33   mycroft 		 * If we're being traced (possibly because someone attached us
    533       1.33   mycroft 		 * while we were stopped), check for a signal from the debugger.
    534       1.33   mycroft 		 */
    535  1.149.2.3      yamt 		if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xstat != 0) {
    536  1.149.2.3      yamt 			if ((sigprop[p->p_xstat] & SA_TOLWP) != 0)
    537  1.149.2.3      yamt 				ss = &l->l_sigpend.sp_set;
    538  1.149.2.3      yamt 			else
    539  1.149.2.3      yamt 				ss = &p->p_sigpend.sp_set;
    540  1.149.2.3      yamt 			sigaddset(ss, p->p_xstat);
    541  1.149.2.3      yamt 			signotify(l);
    542       1.53   mycroft 		}
    543  1.149.2.3      yamt 		p->p_nrlwps++;
    544      1.122   thorpej 		break;
    545      1.122   thorpej 	case LSSUSPENDED:
    546  1.149.2.3      yamt 		l->l_flag &= ~LW_WSUSPEND;
    547  1.149.2.3      yamt 		p->p_nrlwps++;
    548  1.149.2.4      yamt 		cv_broadcast(&p->p_lwpcv);
    549  1.149.2.3      yamt 		break;
    550  1.149.2.3      yamt 	case LSSLEEP:
    551  1.149.2.3      yamt 		KASSERT(l->l_wchan != NULL);
    552       1.26       cgd 		break;
    553  1.149.2.3      yamt 	default:
    554  1.149.2.3      yamt 		panic("setrunnable: lwp %p state was %d", l, l->l_stat);
    555       1.26       cgd 	}
    556      1.139        cl 
    557  1.149.2.3      yamt 	/*
    558  1.149.2.3      yamt 	 * If the LWP was sleeping interruptably, then it's OK to start it
    559  1.149.2.3      yamt 	 * again.  If not, mark it as still sleeping.
    560  1.149.2.3      yamt 	 */
    561  1.149.2.3      yamt 	if (l->l_wchan != NULL) {
    562  1.149.2.3      yamt 		l->l_stat = LSSLEEP;
    563  1.149.2.3      yamt 		/* lwp_unsleep() will release the lock. */
    564  1.149.2.3      yamt 		lwp_unsleep(l);
    565  1.149.2.3      yamt 		return;
    566  1.149.2.3      yamt 	}
    567      1.139        cl 
    568  1.149.2.3      yamt 	/*
    569  1.149.2.3      yamt 	 * If the LWP is still on the CPU, mark it as LSONPROC.  It may be
    570  1.149.2.3      yamt 	 * about to call mi_switch(), in which case it will yield.
    571  1.149.2.3      yamt 	 */
    572  1.149.2.4      yamt 	if ((l->l_flag & LW_RUNNING) != 0) {
    573  1.149.2.3      yamt 		l->l_stat = LSONPROC;
    574  1.149.2.3      yamt 		l->l_slptime = 0;
    575  1.149.2.3      yamt 		lwp_unlock(l);
    576  1.149.2.3      yamt 		return;
    577  1.149.2.3      yamt 	}
    578      1.122   thorpej 
    579  1.149.2.3      yamt 	/*
    580  1.149.2.3      yamt 	 * Set the LWP runnable.  If it's swapped out, we need to wake the swapper
    581  1.149.2.3      yamt 	 * to bring it back in.  Otherwise, enter it into a run queue.
    582  1.149.2.3      yamt 	 */
    583  1.149.2.4      yamt 	if (l->l_mutex != l->l_cpu->ci_schedstate.spc_mutex) {
    584  1.149.2.4      yamt 		spc_lock(l->l_cpu);
    585  1.149.2.4      yamt 		lwp_unlock_to(l, l->l_cpu->ci_schedstate.spc_mutex);
    586  1.149.2.4      yamt 	}
    587  1.149.2.4      yamt 
    588  1.149.2.4      yamt 	sched_setrunnable(l);
    589  1.149.2.3      yamt 	l->l_stat = LSRUN;
    590      1.122   thorpej 	l->l_slptime = 0;
    591  1.149.2.3      yamt 
    592  1.149.2.3      yamt 	if (l->l_flag & LW_INMEM) {
    593  1.149.2.4      yamt 		sched_enqueue(l, false);
    594  1.149.2.4      yamt 		resched_cpu(l);
    595  1.149.2.3      yamt 		lwp_unlock(l);
    596  1.149.2.3      yamt 	} else {
    597  1.149.2.3      yamt 		lwp_unlock(l);
    598  1.149.2.3      yamt 		uvm_kick_scheduler();
    599  1.149.2.3      yamt 	}
    600       1.26       cgd }
    601       1.26       cgd 
    602       1.26       cgd /*
    603  1.149.2.3      yamt  * suspendsched:
    604  1.149.2.3      yamt  *
    605  1.149.2.3      yamt  *	Convert all non-L_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED.
    606  1.149.2.3      yamt  */
    607       1.94    bouyer void
    608  1.149.2.3      yamt suspendsched(void)
    609       1.94    bouyer {
    610  1.149.2.3      yamt 	CPU_INFO_ITERATOR cii;
    611  1.149.2.3      yamt 	struct cpu_info *ci;
    612      1.122   thorpej 	struct lwp *l;
    613  1.149.2.3      yamt 	struct proc *p;
    614       1.94    bouyer 
    615       1.94    bouyer 	/*
    616  1.149.2.3      yamt 	 * We do this by process in order not to violate the locking rules.
    617       1.94    bouyer 	 */
    618  1.149.2.3      yamt 	mutex_enter(&proclist_mutex);
    619  1.149.2.3      yamt 	PROCLIST_FOREACH(p, &allproc) {
    620  1.149.2.3      yamt 		mutex_enter(&p->p_smutex);
    621  1.149.2.3      yamt 
    622  1.149.2.3      yamt 		if ((p->p_flag & PK_SYSTEM) != 0) {
    623  1.149.2.3      yamt 			mutex_exit(&p->p_smutex);
    624       1.94    bouyer 			continue;
    625  1.149.2.3      yamt 		}
    626  1.149.2.3      yamt 
    627  1.149.2.3      yamt 		p->p_stat = SSTOP;
    628  1.149.2.3      yamt 
    629  1.149.2.3      yamt 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
    630  1.149.2.3      yamt 			if (l == curlwp)
    631  1.149.2.3      yamt 				continue;
    632  1.149.2.3      yamt 
    633  1.149.2.3      yamt 			lwp_lock(l);
    634      1.122   thorpej 
    635       1.97     enami 			/*
    636  1.149.2.3      yamt 			 * Set L_WREBOOT so that the LWP will suspend itself
    637  1.149.2.3      yamt 			 * when it tries to return to user mode.  We want to
    638  1.149.2.3      yamt 			 * try and get to get as many LWPs as possible to
    639  1.149.2.3      yamt 			 * the user / kernel boundary, so that they will
    640  1.149.2.3      yamt 			 * release any locks that they hold.
    641       1.97     enami 			 */
    642  1.149.2.3      yamt 			l->l_flag |= (LW_WREBOOT | LW_WSUSPEND);
    643  1.149.2.3      yamt 
    644  1.149.2.3      yamt 			if (l->l_stat == LSSLEEP &&
    645  1.149.2.3      yamt 			    (l->l_flag & LW_SINTR) != 0) {
    646  1.149.2.3      yamt 				/* setrunnable() will release the lock. */
    647  1.149.2.3      yamt 				setrunnable(l);
    648  1.149.2.3      yamt 				continue;
    649  1.149.2.3      yamt 			}
    650  1.149.2.3      yamt 
    651  1.149.2.3      yamt 			lwp_unlock(l);
    652       1.94    bouyer 		}
    653  1.149.2.3      yamt 
    654  1.149.2.3      yamt 		mutex_exit(&p->p_smutex);
    655       1.94    bouyer 	}
    656  1.149.2.3      yamt 	mutex_exit(&proclist_mutex);
    657  1.149.2.3      yamt 
    658  1.149.2.3      yamt 	/*
    659  1.149.2.3      yamt 	 * Kick all CPUs to make them preempt any LWPs running in user mode.
    660  1.149.2.3      yamt 	 * They'll trap into the kernel and suspend themselves in userret().
    661  1.149.2.3      yamt 	 */
    662  1.149.2.3      yamt 	for (CPU_INFO_FOREACH(cii, ci))
    663  1.149.2.4      yamt 		cpu_need_resched(ci, 0);
    664  1.149.2.3      yamt }
    665  1.149.2.3      yamt 
    666  1.149.2.3      yamt /*
    667  1.149.2.3      yamt  * sched_kpri:
    668  1.149.2.3      yamt  *
    669  1.149.2.3      yamt  *	Scale a priority level to a kernel priority level, usually
    670  1.149.2.3      yamt  *	for an LWP that is about to sleep.
    671  1.149.2.3      yamt  */
    672  1.149.2.4      yamt pri_t
    673  1.149.2.3      yamt sched_kpri(struct lwp *l)
    674  1.149.2.3      yamt {
    675  1.149.2.3      yamt 	/*
    676  1.149.2.3      yamt 	 * Scale user priorities (127 -> 50) up to kernel priorities
    677  1.149.2.3      yamt 	 * in the range (49 -> 8).  Reserve the top 8 kernel priorities
    678  1.149.2.3      yamt 	 * for high priority kthreads.  Kernel priorities passed in
    679  1.149.2.3      yamt 	 * are left "as is".  XXX This is somewhat arbitrary.
    680  1.149.2.3      yamt 	 */
    681  1.149.2.3      yamt 	static const uint8_t kpri_tab[] = {
    682  1.149.2.3      yamt 		 0,   1,   2,   3,   4,   5,   6,   7,
    683  1.149.2.3      yamt 		 8,   9,  10,  11,  12,  13,  14,  15,
    684  1.149.2.3      yamt 		16,  17,  18,  19,  20,  21,  22,  23,
    685  1.149.2.3      yamt 		24,  25,  26,  27,  28,  29,  30,  31,
    686  1.149.2.3      yamt 		32,  33,  34,  35,  36,  37,  38,  39,
    687  1.149.2.3      yamt 		40,  41,  42,  43,  44,  45,  46,  47,
    688  1.149.2.3      yamt 		48,  49,   8,   8,   9,   9,  10,  10,
    689  1.149.2.3      yamt 		11,  11,  12,  12,  13,  14,  14,  15,
    690  1.149.2.3      yamt 		15,  16,  16,  17,  17,  18,  18,  19,
    691  1.149.2.3      yamt 		20,  20,  21,  21,  22,  22,  23,  23,
    692  1.149.2.3      yamt 		24,  24,  25,  26,  26,  27,  27,  28,
    693  1.149.2.3      yamt 		28,  29,  29,  30,  30,  31,  32,  32,
    694  1.149.2.3      yamt 		33,  33,  34,  34,  35,  35,  36,  36,
    695  1.149.2.3      yamt 		37,  38,  38,  39,  39,  40,  40,  41,
    696  1.149.2.3      yamt 		41,  42,  42,  43,  44,  44,  45,  45,
    697  1.149.2.3      yamt 		46,  46,  47,  47,  48,  48,  49,  49,
    698  1.149.2.3      yamt 	};
    699  1.149.2.3      yamt 
    700  1.149.2.4      yamt 	return (pri_t)kpri_tab[l->l_usrpri];
    701  1.149.2.3      yamt }
    702  1.149.2.3      yamt 
    703  1.149.2.3      yamt /*
    704  1.149.2.3      yamt  * sched_unsleep:
    705  1.149.2.3      yamt  *
    706  1.149.2.3      yamt  *	The is called when the LWP has not been awoken normally but instead
    707  1.149.2.3      yamt  *	interrupted: for example, if the sleep timed out.  Because of this,
    708  1.149.2.3      yamt  *	it's not a valid action for running or idle LWPs.
    709  1.149.2.3      yamt  */
    710  1.149.2.4      yamt static void
    711  1.149.2.3      yamt sched_unsleep(struct lwp *l)
    712  1.149.2.3      yamt {
    713  1.149.2.3      yamt 
    714  1.149.2.3      yamt 	lwp_unlock(l);
    715  1.149.2.3      yamt 	panic("sched_unsleep");
    716  1.149.2.3      yamt }
    717  1.149.2.3      yamt 
    718  1.149.2.4      yamt inline void
    719  1.149.2.4      yamt resched_cpu(struct lwp *l)
    720  1.149.2.3      yamt {
    721  1.149.2.4      yamt 	struct cpu_info *ci;
    722  1.149.2.4      yamt 	const pri_t pri = lwp_eprio(l);
    723  1.149.2.3      yamt 
    724  1.149.2.4      yamt 	/*
    725  1.149.2.4      yamt 	 * XXXSMP
    726  1.149.2.4      yamt 	 * Since l->l_cpu persists across a context switch,
    727  1.149.2.4      yamt 	 * this gives us *very weak* processor affinity, in
    728  1.149.2.4      yamt 	 * that we notify the CPU on which the process last
    729  1.149.2.4      yamt 	 * ran that it should try to switch.
    730  1.149.2.4      yamt 	 *
    731  1.149.2.4      yamt 	 * This does not guarantee that the process will run on
    732  1.149.2.4      yamt 	 * that processor next, because another processor might
    733  1.149.2.4      yamt 	 * grab it the next time it performs a context switch.
    734  1.149.2.4      yamt 	 *
    735  1.149.2.4      yamt 	 * This also does not handle the case where its last
    736  1.149.2.4      yamt 	 * CPU is running a higher-priority process, but every
    737  1.149.2.4      yamt 	 * other CPU is running a lower-priority process.  There
    738  1.149.2.4      yamt 	 * are ways to handle this situation, but they're not
    739  1.149.2.4      yamt 	 * currently very pretty, and we also need to weigh the
    740  1.149.2.4      yamt 	 * cost of moving a process from one CPU to another.
    741  1.149.2.4      yamt 	 */
    742  1.149.2.4      yamt 	ci = (l->l_cpu != NULL) ? l->l_cpu : curcpu();
    743  1.149.2.4      yamt 	if (pri < ci->ci_schedstate.spc_curpriority)
    744  1.149.2.4      yamt 		cpu_need_resched(ci, 0);
    745  1.149.2.4      yamt }
    746  1.149.2.3      yamt 
    747  1.149.2.4      yamt static void
    748  1.149.2.4      yamt sched_changepri(struct lwp *l, pri_t pri)
    749  1.149.2.4      yamt {
    750  1.149.2.4      yamt 
    751  1.149.2.4      yamt 	KASSERT(lwp_locked(l, NULL));
    752  1.149.2.3      yamt 
    753  1.149.2.4      yamt 	l->l_usrpri = pri;
    754  1.149.2.3      yamt 	if (l->l_priority < PUSER)
    755  1.149.2.3      yamt 		return;
    756  1.149.2.4      yamt 
    757  1.149.2.4      yamt 	if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
    758  1.149.2.3      yamt 		l->l_priority = pri;
    759  1.149.2.3      yamt 		return;
    760  1.149.2.1      yamt 	}
    761  1.149.2.3      yamt 
    762  1.149.2.4      yamt 	KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
    763  1.149.2.4      yamt 
    764  1.149.2.4      yamt 	sched_dequeue(l);
    765  1.149.2.3      yamt 	l->l_priority = pri;
    766  1.149.2.4      yamt 	sched_enqueue(l, false);
    767  1.149.2.4      yamt 	resched_cpu(l);
    768  1.149.2.1      yamt }
    769  1.149.2.1      yamt 
    770      1.146      matt static void
    771  1.149.2.4      yamt sched_lendpri(struct lwp *l, pri_t pri)
    772      1.146      matt {
    773  1.149.2.4      yamt 
    774  1.149.2.4      yamt 	KASSERT(lwp_locked(l, NULL));
    775  1.149.2.4      yamt 
    776  1.149.2.4      yamt 	if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
    777  1.149.2.4      yamt 		l->l_inheritedprio = pri;
    778  1.149.2.4      yamt 		return;
    779      1.146      matt 	}
    780  1.149.2.4      yamt 
    781  1.149.2.4      yamt 	KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
    782  1.149.2.4      yamt 
    783  1.149.2.4      yamt 	sched_dequeue(l);
    784  1.149.2.4      yamt 	l->l_inheritedprio = pri;
    785  1.149.2.4      yamt 	sched_enqueue(l, false);
    786  1.149.2.4      yamt 	resched_cpu(l);
    787      1.146      matt }
    788      1.146      matt 
    789  1.149.2.4      yamt struct lwp *
    790  1.149.2.4      yamt syncobj_noowner(wchan_t wchan)
    791      1.113  gmcgarry {
    792  1.149.2.3      yamt 
    793  1.149.2.4      yamt 	return NULL;
    794      1.113  gmcgarry }
    795      1.113  gmcgarry 
    796  1.149.2.4      yamt 
    797  1.149.2.4      yamt /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
    798  1.149.2.4      yamt fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;		/* exp(-1/20) */
    799  1.149.2.4      yamt 
    800  1.149.2.3      yamt /*
    801  1.149.2.4      yamt  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
    802  1.149.2.4      yamt  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
    803  1.149.2.4      yamt  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
    804  1.149.2.4      yamt  *
    805  1.149.2.4      yamt  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
    806  1.149.2.4      yamt  *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
    807  1.149.2.4      yamt  *
    808  1.149.2.4      yamt  * If you dont want to bother with the faster/more-accurate formula, you
    809  1.149.2.4      yamt  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
    810  1.149.2.4      yamt  * (more general) method of calculating the %age of CPU used by a process.
    811  1.149.2.3      yamt  */
    812  1.149.2.4      yamt #define	CCPU_SHIFT	(FSHIFT + 1)
    813  1.149.2.4      yamt 
    814  1.149.2.4      yamt /*
    815  1.149.2.4      yamt  * sched_pstats:
    816  1.149.2.4      yamt  *
    817  1.149.2.4      yamt  * Update process statistics and check CPU resource allocation.
    818  1.149.2.4      yamt  * Call scheduler-specific hook to eventually adjust process/LWP
    819  1.149.2.4      yamt  * priorities.
    820  1.149.2.4      yamt  *
    821  1.149.2.4      yamt  *	XXXSMP This needs to be reorganised in order to reduce the locking
    822  1.149.2.4      yamt  *	burden.
    823  1.149.2.4      yamt  */
    824  1.149.2.4      yamt /* ARGSUSED */
    825      1.113  gmcgarry void
    826  1.149.2.4      yamt sched_pstats(void *arg)
    827      1.113  gmcgarry {
    828  1.149.2.4      yamt 	struct rlimit *rlim;
    829  1.149.2.4      yamt 	struct lwp *l;
    830  1.149.2.4      yamt 	struct proc *p;
    831  1.149.2.4      yamt 	int minslp, sig, clkhz;
    832  1.149.2.4      yamt 	long runtm;
    833  1.149.2.3      yamt 
    834  1.149.2.4      yamt 	sched_pstats_ticks++;
    835  1.149.2.3      yamt 
    836  1.149.2.4      yamt 	mutex_enter(&proclist_mutex);
    837  1.149.2.4      yamt 	PROCLIST_FOREACH(p, &allproc) {
    838  1.149.2.4      yamt 		/*
    839  1.149.2.4      yamt 		 * Increment time in/out of memory and sleep time (if
    840  1.149.2.4      yamt 		 * sleeping).  We ignore overflow; with 16-bit int's
    841  1.149.2.4      yamt 		 * (remember them?) overflow takes 45 days.
    842  1.149.2.4      yamt 		 */
    843  1.149.2.4      yamt 		minslp = 2;
    844  1.149.2.4      yamt 		mutex_enter(&p->p_smutex);
    845  1.149.2.4      yamt 		mutex_spin_enter(&p->p_stmutex);
    846  1.149.2.4      yamt 		runtm = p->p_rtime.tv_sec;
    847  1.149.2.4      yamt 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
    848  1.149.2.4      yamt 			if ((l->l_flag & LW_IDLE) != 0)
    849  1.149.2.4      yamt 				continue;
    850  1.149.2.4      yamt 			lwp_lock(l);
    851  1.149.2.4      yamt 			runtm += l->l_rtime.tv_sec;
    852  1.149.2.4      yamt 			l->l_swtime++;
    853  1.149.2.4      yamt 			if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
    854  1.149.2.4      yamt 			    l->l_stat == LSSUSPENDED) {
    855  1.149.2.4      yamt 				l->l_slptime++;
    856  1.149.2.4      yamt 				minslp = min(minslp, l->l_slptime);
    857  1.149.2.4      yamt 			} else
    858  1.149.2.4      yamt 				minslp = 0;
    859  1.149.2.4      yamt 			lwp_unlock(l);
    860  1.149.2.4      yamt 
    861  1.149.2.4      yamt 			/*
    862  1.149.2.4      yamt 			 * p_pctcpu is only for ps.
    863  1.149.2.4      yamt 			 */
    864  1.149.2.4      yamt 			l->l_pctcpu = (l->l_pctcpu * ccpu) >> FSHIFT;
    865  1.149.2.4      yamt 			if (l->l_slptime < 1) {
    866  1.149.2.4      yamt 				clkhz = stathz != 0 ? stathz : hz;
    867  1.149.2.4      yamt #if	(FSHIFT >= CCPU_SHIFT)
    868  1.149.2.4      yamt 				l->l_pctcpu += (clkhz == 100) ?
    869  1.149.2.4      yamt 				    ((fixpt_t)l->l_cpticks) <<
    870  1.149.2.4      yamt 				        (FSHIFT - CCPU_SHIFT) :
    871  1.149.2.4      yamt 				    100 * (((fixpt_t) p->p_cpticks)
    872  1.149.2.4      yamt 				        << (FSHIFT - CCPU_SHIFT)) / clkhz;
    873  1.149.2.4      yamt #else
    874  1.149.2.4      yamt 				l->l_pctcpu += ((FSCALE - ccpu) *
    875  1.149.2.4      yamt 				    (l->l_cpticks * FSCALE / clkhz)) >> FSHIFT;
    876      1.146      matt #endif
    877  1.149.2.4      yamt 				l->l_cpticks = 0;
    878  1.149.2.4      yamt 			}
    879  1.149.2.4      yamt 		}
    880  1.149.2.4      yamt 		p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
    881  1.149.2.4      yamt 		sched_pstats_hook(p, minslp);
    882  1.149.2.4      yamt 		mutex_spin_exit(&p->p_stmutex);
    883  1.149.2.3      yamt 
    884  1.149.2.4      yamt 		/*
    885  1.149.2.4      yamt 		 * Check if the process exceeds its CPU resource allocation.
    886  1.149.2.4      yamt 		 * If over max, kill it.
    887  1.149.2.4      yamt 		 */
    888  1.149.2.4      yamt 		rlim = &p->p_rlimit[RLIMIT_CPU];
    889  1.149.2.4      yamt 		sig = 0;
    890  1.149.2.4      yamt 		if (runtm >= rlim->rlim_cur) {
    891  1.149.2.4      yamt 			if (runtm >= rlim->rlim_max)
    892  1.149.2.4      yamt 				sig = SIGKILL;
    893  1.149.2.4      yamt 			else {
    894  1.149.2.4      yamt 				sig = SIGXCPU;
    895  1.149.2.4      yamt 				if (rlim->rlim_cur < rlim->rlim_max)
    896  1.149.2.4      yamt 					rlim->rlim_cur += 5;
    897  1.149.2.4      yamt 			}
    898  1.149.2.4      yamt 		}
    899  1.149.2.4      yamt 		mutex_exit(&p->p_smutex);
    900  1.149.2.4      yamt 		if (sig) {
    901  1.149.2.4      yamt 			psignal(p, sig);
    902  1.149.2.4      yamt 		}
    903  1.149.2.3      yamt 	}
    904  1.149.2.4      yamt 	mutex_exit(&proclist_mutex);
    905  1.149.2.4      yamt 	uvm_meter();
    906  1.149.2.4      yamt 	cv_wakeup(&lbolt);
    907  1.149.2.4      yamt 	callout_schedule(&sched_pstats_ch, hz);
    908      1.113  gmcgarry }
    909      1.113  gmcgarry 
    910  1.149.2.4      yamt void
    911  1.149.2.4      yamt sched_init(void)
    912  1.149.2.4      yamt {
    913  1.149.2.4      yamt 
    914  1.149.2.4      yamt 	cv_init(&lbolt, "lbolt");
    915  1.149.2.4      yamt 	callout_init(&sched_pstats_ch, 0);
    916  1.149.2.4      yamt 	callout_setfunc(&sched_pstats_ch, sched_pstats, NULL);
    917  1.149.2.4      yamt 	sched_setup();
    918  1.149.2.4      yamt 	sched_pstats(NULL);
    919  1.149.2.4      yamt }
    920