Home | History | Annotate | Line # | Download | only in kern
kern_synch.c revision 1.149.2.5
      1  1.149.2.5      yamt /*	$NetBSD: kern_synch.c,v 1.149.2.5 2007/10/27 11:35:29 yamt Exp $	*/
      2       1.63   thorpej 
      3       1.63   thorpej /*-
      4  1.149.2.3      yamt  * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc.
      5       1.63   thorpej  * All rights reserved.
      6       1.63   thorpej  *
      7       1.63   thorpej  * This code is derived from software contributed to The NetBSD Foundation
      8       1.63   thorpej  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
      9  1.149.2.4      yamt  * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and
     10  1.149.2.4      yamt  * Daniel Sieger.
     11       1.63   thorpej  *
     12       1.63   thorpej  * Redistribution and use in source and binary forms, with or without
     13       1.63   thorpej  * modification, are permitted provided that the following conditions
     14       1.63   thorpej  * are met:
     15       1.63   thorpej  * 1. Redistributions of source code must retain the above copyright
     16       1.63   thorpej  *    notice, this list of conditions and the following disclaimer.
     17       1.63   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     18       1.63   thorpej  *    notice, this list of conditions and the following disclaimer in the
     19       1.63   thorpej  *    documentation and/or other materials provided with the distribution.
     20       1.63   thorpej  * 3. All advertising materials mentioning features or use of this software
     21       1.63   thorpej  *    must display the following acknowledgement:
     22       1.63   thorpej  *	This product includes software developed by the NetBSD
     23       1.63   thorpej  *	Foundation, Inc. and its contributors.
     24       1.63   thorpej  * 4. Neither the name of The NetBSD Foundation nor the names of its
     25       1.63   thorpej  *    contributors may be used to endorse or promote products derived
     26       1.63   thorpej  *    from this software without specific prior written permission.
     27       1.63   thorpej  *
     28       1.63   thorpej  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     29       1.63   thorpej  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     30       1.63   thorpej  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     31       1.63   thorpej  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     32       1.63   thorpej  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     33       1.63   thorpej  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     34       1.63   thorpej  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     35       1.63   thorpej  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     36       1.63   thorpej  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     37       1.63   thorpej  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     38       1.63   thorpej  * POSSIBILITY OF SUCH DAMAGE.
     39       1.63   thorpej  */
     40       1.26       cgd 
     41       1.26       cgd /*-
     42       1.26       cgd  * Copyright (c) 1982, 1986, 1990, 1991, 1993
     43       1.26       cgd  *	The Regents of the University of California.  All rights reserved.
     44       1.26       cgd  * (c) UNIX System Laboratories, Inc.
     45       1.26       cgd  * All or some portions of this file are derived from material licensed
     46       1.26       cgd  * to the University of California by American Telephone and Telegraph
     47       1.26       cgd  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     48       1.26       cgd  * the permission of UNIX System Laboratories, Inc.
     49       1.26       cgd  *
     50       1.26       cgd  * Redistribution and use in source and binary forms, with or without
     51       1.26       cgd  * modification, are permitted provided that the following conditions
     52       1.26       cgd  * are met:
     53       1.26       cgd  * 1. Redistributions of source code must retain the above copyright
     54       1.26       cgd  *    notice, this list of conditions and the following disclaimer.
     55       1.26       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     56       1.26       cgd  *    notice, this list of conditions and the following disclaimer in the
     57       1.26       cgd  *    documentation and/or other materials provided with the distribution.
     58      1.136       agc  * 3. Neither the name of the University nor the names of its contributors
     59       1.26       cgd  *    may be used to endorse or promote products derived from this software
     60       1.26       cgd  *    without specific prior written permission.
     61       1.26       cgd  *
     62       1.26       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     63       1.26       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     64       1.26       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     65       1.26       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     66       1.26       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     67       1.26       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     68       1.26       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     69       1.26       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     70       1.26       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     71       1.26       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     72       1.26       cgd  * SUCH DAMAGE.
     73       1.26       cgd  *
     74       1.50      fvdl  *	@(#)kern_synch.c	8.9 (Berkeley) 5/19/95
     75       1.26       cgd  */
     76      1.106     lukem 
     77      1.106     lukem #include <sys/cdefs.h>
     78  1.149.2.5      yamt __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.149.2.5 2007/10/27 11:35:29 yamt Exp $");
     79       1.48       mrg 
     80      1.109      yamt #include "opt_kstack.h"
     81       1.82   thorpej #include "opt_lockdebug.h"
     82       1.83   thorpej #include "opt_multiprocessor.h"
     83      1.110    briggs #include "opt_perfctrs.h"
     84       1.26       cgd 
     85  1.149.2.3      yamt #define	__MUTEX_PRIVATE
     86  1.149.2.3      yamt 
     87       1.26       cgd #include <sys/param.h>
     88       1.26       cgd #include <sys/systm.h>
     89       1.26       cgd #include <sys/proc.h>
     90       1.26       cgd #include <sys/kernel.h>
     91      1.111    briggs #if defined(PERFCTRS)
     92      1.110    briggs #include <sys/pmc.h>
     93      1.111    briggs #endif
     94  1.149.2.4      yamt #include <sys/cpu.h>
     95       1.26       cgd #include <sys/resourcevar.h>
     96       1.55      ross #include <sys/sched.h>
     97  1.149.2.3      yamt #include <sys/syscall_stats.h>
     98  1.149.2.3      yamt #include <sys/sleepq.h>
     99  1.149.2.3      yamt #include <sys/lockdebug.h>
    100  1.149.2.4      yamt #include <sys/evcnt.h>
    101  1.149.2.5      yamt #include <sys/intr.h>
    102       1.47       mrg 
    103       1.47       mrg #include <uvm/uvm_extern.h>
    104       1.47       mrg 
    105  1.149.2.4      yamt callout_t sched_pstats_ch;
    106  1.149.2.4      yamt unsigned int sched_pstats_ticks;
    107       1.34  christos 
    108  1.149.2.4      yamt kcondvar_t	lbolt;			/* once a second sleep address */
    109       1.26       cgd 
    110  1.149.2.4      yamt static void	sched_unsleep(struct lwp *);
    111  1.149.2.4      yamt static void	sched_changepri(struct lwp *, pri_t);
    112  1.149.2.4      yamt static void	sched_lendpri(struct lwp *, pri_t);
    113      1.122   thorpej 
    114  1.149.2.3      yamt syncobj_t sleep_syncobj = {
    115  1.149.2.3      yamt 	SOBJ_SLEEPQ_SORTED,
    116  1.149.2.3      yamt 	sleepq_unsleep,
    117  1.149.2.4      yamt 	sleepq_changepri,
    118  1.149.2.4      yamt 	sleepq_lendpri,
    119  1.149.2.4      yamt 	syncobj_noowner,
    120  1.149.2.3      yamt };
    121  1.149.2.3      yamt 
    122  1.149.2.3      yamt syncobj_t sched_syncobj = {
    123  1.149.2.3      yamt 	SOBJ_SLEEPQ_SORTED,
    124  1.149.2.3      yamt 	sched_unsleep,
    125  1.149.2.4      yamt 	sched_changepri,
    126  1.149.2.4      yamt 	sched_lendpri,
    127  1.149.2.4      yamt 	syncobj_noowner,
    128  1.149.2.3      yamt };
    129      1.122   thorpej 
    130       1.26       cgd /*
    131  1.149.2.3      yamt  * During autoconfiguration or after a panic, a sleep will simply lower the
    132  1.149.2.3      yamt  * priority briefly to allow interrupts, then return.  The priority to be
    133  1.149.2.3      yamt  * used (safepri) is machine-dependent, thus this value is initialized and
    134  1.149.2.3      yamt  * maintained in the machine-dependent layers.  This priority will typically
    135  1.149.2.3      yamt  * be 0, or the lowest priority that is safe for use on the interrupt stack;
    136  1.149.2.3      yamt  * it can be made higher to block network software interrupts after panics.
    137       1.26       cgd  */
    138  1.149.2.3      yamt int	safepri;
    139       1.26       cgd 
    140       1.26       cgd /*
    141  1.149.2.3      yamt  * OBSOLETE INTERFACE
    142  1.149.2.3      yamt  *
    143       1.26       cgd  * General sleep call.  Suspends the current process until a wakeup is
    144       1.26       cgd  * performed on the specified identifier.  The process will then be made
    145  1.149.2.3      yamt  * runnable with the specified priority.  Sleeps at most timo/hz seconds (0
    146  1.149.2.3      yamt  * means no timeout).  If pri includes PCATCH flag, signals are checked
    147       1.26       cgd  * before and after sleeping, else signals are not checked.  Returns 0 if
    148       1.26       cgd  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
    149       1.26       cgd  * signal needs to be delivered, ERESTART is returned if the current system
    150       1.26       cgd  * call should be restarted if possible, and EINTR is returned if the system
    151       1.26       cgd  * call should be interrupted by the signal (return EINTR).
    152       1.77   thorpej  *
    153  1.149.2.3      yamt  * The interlock is held until we are on a sleep queue. The interlock will
    154  1.149.2.3      yamt  * be locked before returning back to the caller unless the PNORELOCK flag
    155  1.149.2.3      yamt  * is specified, in which case the interlock will always be unlocked upon
    156  1.149.2.3      yamt  * return.
    157       1.26       cgd  */
    158       1.26       cgd int
    159  1.149.2.4      yamt ltsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
    160  1.149.2.3      yamt 	volatile struct simplelock *interlock)
    161       1.26       cgd {
    162      1.122   thorpej 	struct lwp *l = curlwp;
    163  1.149.2.3      yamt 	sleepq_t *sq;
    164  1.149.2.4      yamt 	int error;
    165       1.26       cgd 
    166  1.149.2.3      yamt 	if (sleepq_dontsleep(l)) {
    167  1.149.2.3      yamt 		(void)sleepq_abort(NULL, 0);
    168  1.149.2.3      yamt 		if ((priority & PNORELOCK) != 0)
    169       1.77   thorpej 			simple_unlock(interlock);
    170  1.149.2.3      yamt 		return 0;
    171      1.122   thorpej 	}
    172       1.77   thorpej 
    173  1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, ident);
    174  1.149.2.3      yamt 	sleepq_enter(sq, l);
    175  1.149.2.4      yamt 	sleepq_enqueue(sq, priority & PRIMASK, ident, wmesg, &sleep_syncobj);
    176       1.77   thorpej 
    177  1.149.2.3      yamt 	if (interlock != NULL) {
    178  1.149.2.3      yamt 		LOCK_ASSERT(simple_lock_held(interlock));
    179       1.77   thorpej 		simple_unlock(interlock);
    180       1.26       cgd 	}
    181      1.147     perry 
    182  1.149.2.4      yamt 	error = sleepq_block(timo, priority & PCATCH);
    183      1.139        cl 
    184  1.149.2.3      yamt 	if (interlock != NULL && (priority & PNORELOCK) == 0)
    185  1.149.2.3      yamt 		simple_lock(interlock);
    186  1.149.2.3      yamt 
    187  1.149.2.3      yamt 	return error;
    188      1.139        cl }
    189      1.139        cl 
    190  1.149.2.4      yamt int
    191  1.149.2.4      yamt mtsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
    192  1.149.2.4      yamt 	kmutex_t *mtx)
    193  1.149.2.4      yamt {
    194  1.149.2.4      yamt 	struct lwp *l = curlwp;
    195  1.149.2.4      yamt 	sleepq_t *sq;
    196  1.149.2.4      yamt 	int error;
    197  1.149.2.4      yamt 
    198  1.149.2.4      yamt 	if (sleepq_dontsleep(l)) {
    199  1.149.2.4      yamt 		(void)sleepq_abort(mtx, (priority & PNORELOCK) != 0);
    200  1.149.2.4      yamt 		return 0;
    201  1.149.2.4      yamt 	}
    202  1.149.2.4      yamt 
    203  1.149.2.4      yamt 	sq = sleeptab_lookup(&sleeptab, ident);
    204  1.149.2.4      yamt 	sleepq_enter(sq, l);
    205  1.149.2.4      yamt 	sleepq_enqueue(sq, priority & PRIMASK, ident, wmesg, &sleep_syncobj);
    206  1.149.2.4      yamt 	mutex_exit(mtx);
    207  1.149.2.4      yamt 	error = sleepq_block(timo, priority & PCATCH);
    208  1.149.2.4      yamt 
    209  1.149.2.4      yamt 	if ((priority & PNORELOCK) == 0)
    210  1.149.2.4      yamt 		mutex_enter(mtx);
    211  1.149.2.4      yamt 
    212  1.149.2.4      yamt 	return error;
    213  1.149.2.4      yamt }
    214  1.149.2.4      yamt 
    215       1.26       cgd /*
    216  1.149.2.3      yamt  * General sleep call for situations where a wake-up is not expected.
    217       1.63   thorpej  */
    218  1.149.2.3      yamt int
    219  1.149.2.3      yamt kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx)
    220       1.83   thorpej {
    221  1.149.2.3      yamt 	struct lwp *l = curlwp;
    222  1.149.2.3      yamt 	sleepq_t *sq;
    223  1.149.2.3      yamt 	int error;
    224       1.83   thorpej 
    225  1.149.2.3      yamt 	if (sleepq_dontsleep(l))
    226  1.149.2.3      yamt 		return sleepq_abort(NULL, 0);
    227       1.63   thorpej 
    228  1.149.2.3      yamt 	if (mtx != NULL)
    229  1.149.2.3      yamt 		mutex_exit(mtx);
    230  1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, l);
    231  1.149.2.3      yamt 	sleepq_enter(sq, l);
    232  1.149.2.4      yamt 	sleepq_enqueue(sq, sched_kpri(l), l, wmesg, &sleep_syncobj);
    233  1.149.2.4      yamt 	error = sleepq_block(timo, intr);
    234  1.149.2.3      yamt 	if (mtx != NULL)
    235  1.149.2.3      yamt 		mutex_enter(mtx);
    236       1.83   thorpej 
    237  1.149.2.3      yamt 	return error;
    238       1.83   thorpej }
    239       1.83   thorpej 
    240       1.63   thorpej /*
    241  1.149.2.3      yamt  * OBSOLETE INTERFACE
    242  1.149.2.3      yamt  *
    243       1.26       cgd  * Make all processes sleeping on the specified identifier runnable.
    244       1.26       cgd  */
    245       1.26       cgd void
    246  1.149.2.3      yamt wakeup(wchan_t ident)
    247       1.26       cgd {
    248  1.149.2.3      yamt 	sleepq_t *sq;
    249       1.83   thorpej 
    250  1.149.2.3      yamt 	if (cold)
    251  1.149.2.3      yamt 		return;
    252       1.83   thorpej 
    253  1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, ident);
    254  1.149.2.3      yamt 	sleepq_wake(sq, ident, (u_int)-1);
    255       1.63   thorpej }
    256       1.63   thorpej 
    257       1.63   thorpej /*
    258  1.149.2.3      yamt  * OBSOLETE INTERFACE
    259  1.149.2.3      yamt  *
    260       1.63   thorpej  * Make the highest priority process first in line on the specified
    261       1.63   thorpej  * identifier runnable.
    262       1.63   thorpej  */
    263  1.149.2.3      yamt void
    264  1.149.2.3      yamt wakeup_one(wchan_t ident)
    265       1.63   thorpej {
    266  1.149.2.3      yamt 	sleepq_t *sq;
    267       1.63   thorpej 
    268  1.149.2.3      yamt 	if (cold)
    269  1.149.2.3      yamt 		return;
    270  1.149.2.4      yamt 
    271  1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, ident);
    272  1.149.2.3      yamt 	sleepq_wake(sq, ident, 1);
    273      1.117  gmcgarry }
    274      1.117  gmcgarry 
    275  1.149.2.3      yamt 
    276      1.117  gmcgarry /*
    277      1.117  gmcgarry  * General yield call.  Puts the current process back on its run queue and
    278      1.117  gmcgarry  * performs a voluntary context switch.  Should only be called when the
    279  1.149.2.5      yamt  * current process explicitly requests it (eg sched_yield(2)).
    280      1.117  gmcgarry  */
    281      1.117  gmcgarry void
    282      1.117  gmcgarry yield(void)
    283      1.117  gmcgarry {
    284      1.122   thorpej 	struct lwp *l = curlwp;
    285      1.117  gmcgarry 
    286  1.149.2.3      yamt 	KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
    287  1.149.2.3      yamt 	lwp_lock(l);
    288  1.149.2.4      yamt 	KASSERT(lwp_locked(l, &l->l_cpu->ci_schedstate.spc_lwplock));
    289  1.149.2.4      yamt 	KASSERT(l->l_stat == LSONPROC);
    290  1.149.2.5      yamt 	/* XXX Only do this for timeshared threads. */
    291  1.149.2.5      yamt 	l->l_priority = MAXPRI;
    292  1.149.2.4      yamt 	(void)mi_switch(l);
    293  1.149.2.3      yamt 	KERNEL_LOCK(l->l_biglocks, l);
    294       1.69   thorpej }
    295       1.69   thorpej 
    296       1.69   thorpej /*
    297       1.69   thorpej  * General preemption call.  Puts the current process back on its run queue
    298  1.149.2.1      yamt  * and performs an involuntary context switch.
    299       1.69   thorpej  */
    300       1.69   thorpej void
    301  1.149.2.3      yamt preempt(void)
    302       1.69   thorpej {
    303      1.122   thorpej 	struct lwp *l = curlwp;
    304       1.69   thorpej 
    305  1.149.2.3      yamt 	KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
    306  1.149.2.3      yamt 	lwp_lock(l);
    307  1.149.2.4      yamt 	KASSERT(lwp_locked(l, &l->l_cpu->ci_schedstate.spc_lwplock));
    308  1.149.2.4      yamt 	KASSERT(l->l_stat == LSONPROC);
    309  1.149.2.4      yamt 	l->l_priority = l->l_usrpri;
    310  1.149.2.3      yamt 	l->l_nivcsw++;
    311  1.149.2.4      yamt 	(void)mi_switch(l);
    312  1.149.2.3      yamt 	KERNEL_LOCK(l->l_biglocks, l);
    313       1.69   thorpej }
    314       1.69   thorpej 
    315       1.69   thorpej /*
    316  1.149.2.4      yamt  * Compute the amount of time during which the current lwp was running.
    317      1.130   nathanw  *
    318  1.149.2.4      yamt  * - update l_rtime unless it's an idle lwp.
    319  1.149.2.4      yamt  */
    320  1.149.2.4      yamt 
    321  1.149.2.5      yamt void
    322  1.149.2.5      yamt updatertime(lwp_t *l, const struct timeval *tv)
    323  1.149.2.4      yamt {
    324  1.149.2.4      yamt 	long s, u;
    325  1.149.2.4      yamt 
    326  1.149.2.5      yamt 	if ((l->l_flag & LW_IDLE) != 0)
    327  1.149.2.4      yamt 		return;
    328  1.149.2.4      yamt 
    329  1.149.2.5      yamt 	u = l->l_rtime.tv_usec + (tv->tv_usec - l->l_stime.tv_usec);
    330  1.149.2.5      yamt 	s = l->l_rtime.tv_sec + (tv->tv_sec - l->l_stime.tv_sec);
    331  1.149.2.4      yamt 	if (u < 0) {
    332  1.149.2.4      yamt 		u += 1000000;
    333  1.149.2.4      yamt 		s--;
    334  1.149.2.4      yamt 	} else if (u >= 1000000) {
    335  1.149.2.4      yamt 		u -= 1000000;
    336  1.149.2.4      yamt 		s++;
    337  1.149.2.4      yamt 	}
    338  1.149.2.4      yamt 	l->l_rtime.tv_usec = u;
    339  1.149.2.4      yamt 	l->l_rtime.tv_sec = s;
    340  1.149.2.4      yamt }
    341  1.149.2.4      yamt 
    342  1.149.2.4      yamt /*
    343  1.149.2.4      yamt  * The machine independent parts of context switch.
    344  1.149.2.4      yamt  *
    345  1.149.2.4      yamt  * Returns 1 if another LWP was actually run.
    346       1.26       cgd  */
    347      1.122   thorpej int
    348  1.149.2.5      yamt mi_switch(lwp_t *l)
    349       1.26       cgd {
    350       1.76   thorpej 	struct schedstate_percpu *spc;
    351  1.149.2.4      yamt 	struct lwp *newl;
    352  1.149.2.3      yamt 	int retval, oldspl;
    353  1.149.2.5      yamt 	struct cpu_info *ci;
    354  1.149.2.5      yamt 	struct timeval tv;
    355  1.149.2.5      yamt 	bool returning;
    356       1.85  sommerfe 
    357  1.149.2.4      yamt 	KASSERT(lwp_locked(l, NULL));
    358  1.149.2.4      yamt 	LOCKDEBUG_BARRIER(l->l_mutex, 1);
    359       1.76   thorpej 
    360  1.149.2.3      yamt #ifdef KSTACK_CHECK_MAGIC
    361  1.149.2.3      yamt 	kstack_check_magic(l);
    362  1.149.2.3      yamt #endif
    363  1.149.2.3      yamt 
    364  1.149.2.5      yamt 	microtime(&tv);
    365  1.149.2.5      yamt 
    366  1.149.2.3      yamt 	/*
    367  1.149.2.3      yamt 	 * It's safe to read the per CPU schedstate unlocked here, as all we
    368  1.149.2.3      yamt 	 * are after is the run time and that's guarenteed to have been last
    369  1.149.2.3      yamt 	 * updated by this CPU.
    370  1.149.2.3      yamt 	 */
    371  1.149.2.5      yamt 	ci = l->l_cpu;
    372  1.149.2.5      yamt 	KDASSERT(ci == curcpu());
    373       1.81   thorpej 
    374       1.26       cgd 	/*
    375  1.149.2.4      yamt 	 * Process is about to yield the CPU; clear the appropriate
    376  1.149.2.4      yamt 	 * scheduling flags.
    377       1.26       cgd 	 */
    378  1.149.2.5      yamt 	spc = &ci->ci_schedstate;
    379  1.149.2.5      yamt 	returning = false;
    380  1.149.2.4      yamt 	newl = NULL;
    381  1.149.2.4      yamt 
    382  1.149.2.5      yamt 	/*
    383  1.149.2.5      yamt 	 * If we have been asked to switch to a specific LWP, then there
    384  1.149.2.5      yamt 	 * is no need to inspect the run queues.  If a soft interrupt is
    385  1.149.2.5      yamt 	 * blocking, then return to the interrupted thread without adjusting
    386  1.149.2.5      yamt 	 * VM context or its start time: neither have been changed in order
    387  1.149.2.5      yamt 	 * to take the interrupt.
    388  1.149.2.5      yamt 	 */
    389  1.149.2.4      yamt 	if (l->l_switchto != NULL) {
    390  1.149.2.5      yamt 		if ((l->l_flag & LW_INTR) != 0) {
    391  1.149.2.5      yamt 			returning = true;
    392  1.149.2.5      yamt 			softint_block(l);
    393  1.149.2.5      yamt 			if ((l->l_flag & LW_TIMEINTR) != 0)
    394  1.149.2.5      yamt 				updatertime(l, &tv);
    395  1.149.2.5      yamt 		}
    396  1.149.2.4      yamt 		newl = l->l_switchto;
    397  1.149.2.4      yamt 		l->l_switchto = NULL;
    398       1.26       cgd 	}
    399  1.149.2.3      yamt 
    400  1.149.2.3      yamt 	/* Count time spent in current system call */
    401  1.149.2.5      yamt 	if (!returning) {
    402  1.149.2.5      yamt 		SYSCALL_TIME_SLEEP(l);
    403       1.26       cgd 
    404  1.149.2.5      yamt 		/*
    405  1.149.2.5      yamt 		 * XXXSMP If we are using h/w performance counters,
    406  1.149.2.5      yamt 		 * save context.
    407  1.149.2.5      yamt 		 */
    408  1.149.2.3      yamt #if PERFCTRS
    409  1.149.2.5      yamt 		if (PMC_ENABLED(l->l_proc)) {
    410  1.149.2.5      yamt 			pmc_save_context(l->l_proc);
    411  1.149.2.5      yamt 		}
    412      1.109      yamt #endif
    413  1.149.2.5      yamt 		updatertime(l, &tv);
    414  1.149.2.5      yamt 	}
    415      1.113  gmcgarry 
    416      1.113  gmcgarry 	/*
    417  1.149.2.3      yamt 	 * If on the CPU and we have gotten this far, then we must yield.
    418      1.113  gmcgarry 	 */
    419  1.149.2.4      yamt 	mutex_spin_enter(spc->spc_mutex);
    420  1.149.2.3      yamt 	KASSERT(l->l_stat != LSRUN);
    421  1.149.2.3      yamt 	if (l->l_stat == LSONPROC) {
    422  1.149.2.4      yamt 		KASSERT(lwp_locked(l, &spc->spc_lwplock));
    423  1.149.2.4      yamt 		if ((l->l_flag & LW_IDLE) == 0) {
    424  1.149.2.4      yamt 			l->l_stat = LSRUN;
    425  1.149.2.4      yamt 			lwp_setlock(l, spc->spc_mutex);
    426  1.149.2.4      yamt 			sched_enqueue(l, true);
    427  1.149.2.4      yamt 		} else
    428  1.149.2.4      yamt 			l->l_stat = LSIDL;
    429  1.149.2.3      yamt 	}
    430  1.149.2.3      yamt 
    431  1.149.2.3      yamt 	/*
    432  1.149.2.5      yamt 	 * Let sched_nextlwp() select the LWP to run the CPU next.
    433  1.149.2.4      yamt 	 * If no LWP is runnable, switch to the idle LWP.
    434  1.149.2.5      yamt 	 * Note that spc_lwplock might not necessary be held.
    435  1.149.2.3      yamt 	 */
    436  1.149.2.4      yamt 	if (newl == NULL) {
    437  1.149.2.4      yamt 		newl = sched_nextlwp();
    438  1.149.2.4      yamt 		if (newl != NULL) {
    439  1.149.2.4      yamt 			sched_dequeue(newl);
    440  1.149.2.4      yamt 			KASSERT(lwp_locked(newl, spc->spc_mutex));
    441  1.149.2.4      yamt 			newl->l_stat = LSONPROC;
    442  1.149.2.5      yamt 			newl->l_cpu = ci;
    443  1.149.2.4      yamt 			newl->l_flag |= LW_RUNNING;
    444  1.149.2.4      yamt 			lwp_setlock(newl, &spc->spc_lwplock);
    445  1.149.2.4      yamt 		} else {
    446  1.149.2.5      yamt 			newl = ci->ci_data.cpu_idlelwp;
    447  1.149.2.4      yamt 			newl->l_stat = LSONPROC;
    448  1.149.2.4      yamt 			newl->l_flag |= LW_RUNNING;
    449  1.149.2.4      yamt 		}
    450  1.149.2.5      yamt 		ci->ci_want_resched = 0;
    451  1.149.2.5      yamt 		spc->spc_flags &= ~SPCF_SWITCHCLEAR;
    452  1.149.2.5      yamt 	}
    453  1.149.2.5      yamt 
    454  1.149.2.5      yamt 	/* Update the new LWP's start time while it is still locked. */
    455  1.149.2.5      yamt 	if (!returning) {
    456  1.149.2.5      yamt 		newl->l_stime = tv;
    457  1.149.2.5      yamt 		/*
    458  1.149.2.5      yamt 		 * XXX The following may be done unlocked if newl != NULL
    459  1.149.2.5      yamt 		 * above.
    460  1.149.2.5      yamt 		 */
    461  1.149.2.4      yamt 		newl->l_priority = newl->l_usrpri;
    462  1.149.2.4      yamt 	}
    463  1.149.2.3      yamt 
    464  1.149.2.5      yamt 	spc->spc_curpriority = newl->l_usrpri;
    465  1.149.2.5      yamt 
    466  1.149.2.4      yamt 	if (l != newl) {
    467  1.149.2.4      yamt 		struct lwp *prevlwp;
    468  1.149.2.3      yamt 
    469  1.149.2.4      yamt 		/*
    470  1.149.2.4      yamt 		 * If the old LWP has been moved to a run queue above,
    471  1.149.2.4      yamt 		 * drop the general purpose LWP lock: it's now locked
    472  1.149.2.4      yamt 		 * by the scheduler lock.
    473  1.149.2.4      yamt 		 *
    474  1.149.2.4      yamt 		 * Otherwise, drop the scheduler lock.  We're done with
    475  1.149.2.4      yamt 		 * the run queues for now.
    476  1.149.2.4      yamt 		 */
    477  1.149.2.4      yamt 		if (l->l_mutex == spc->spc_mutex) {
    478  1.149.2.4      yamt 			mutex_spin_exit(&spc->spc_lwplock);
    479  1.149.2.4      yamt 		} else {
    480  1.149.2.4      yamt 			mutex_spin_exit(spc->spc_mutex);
    481  1.149.2.4      yamt 		}
    482  1.149.2.4      yamt 
    483  1.149.2.4      yamt 		/* Unlocked, but for statistics only. */
    484  1.149.2.4      yamt 		uvmexp.swtch++;
    485  1.149.2.4      yamt 
    486  1.149.2.5      yamt 		/*
    487  1.149.2.5      yamt 		 * Save old VM context, unless a soft interrupt
    488  1.149.2.5      yamt 		 * handler is blocking.
    489  1.149.2.5      yamt 		 */
    490  1.149.2.5      yamt 		if (!returning)
    491  1.149.2.5      yamt 			pmap_deactivate(l);
    492  1.149.2.4      yamt 
    493  1.149.2.4      yamt 		/* Switch to the new LWP.. */
    494  1.149.2.4      yamt 		l->l_ncsw++;
    495  1.149.2.4      yamt 		l->l_flag &= ~LW_RUNNING;
    496  1.149.2.5      yamt 		oldspl = MUTEX_SPIN_OLDSPL(ci);
    497  1.149.2.4      yamt 		prevlwp = cpu_switchto(l, newl);
    498  1.149.2.4      yamt 
    499  1.149.2.4      yamt 		/*
    500  1.149.2.4      yamt 		 * .. we have switched away and are now back so we must
    501  1.149.2.4      yamt 		 * be the new curlwp.  prevlwp is who we replaced.
    502  1.149.2.4      yamt 		 */
    503  1.149.2.4      yamt 		if (prevlwp != NULL) {
    504  1.149.2.4      yamt 			curcpu()->ci_mtx_oldspl = oldspl;
    505  1.149.2.4      yamt 			lwp_unlock(prevlwp);
    506  1.149.2.4      yamt 		} else {
    507  1.149.2.4      yamt 			splx(oldspl);
    508  1.149.2.4      yamt 		}
    509  1.149.2.3      yamt 
    510  1.149.2.4      yamt 		/* Restore VM context. */
    511  1.149.2.4      yamt 		pmap_activate(l);
    512  1.149.2.4      yamt 		retval = 1;
    513  1.149.2.4      yamt 	} else {
    514  1.149.2.4      yamt 		/* Nothing to do - just unlock and return. */
    515  1.149.2.4      yamt 		mutex_spin_exit(spc->spc_mutex);
    516  1.149.2.4      yamt 		lwp_unlock(l);
    517      1.122   thorpej 		retval = 0;
    518      1.122   thorpej 	}
    519      1.110    briggs 
    520  1.149.2.4      yamt 	KASSERT(l == curlwp);
    521  1.149.2.4      yamt 	KASSERT(l->l_stat == LSONPROC);
    522  1.149.2.5      yamt 	KASSERT(l->l_cpu == curcpu());
    523  1.149.2.4      yamt 
    524      1.110    briggs 	/*
    525  1.149.2.3      yamt 	 * XXXSMP If we are using h/w performance counters, restore context.
    526       1.26       cgd 	 */
    527      1.114  gmcgarry #if PERFCTRS
    528  1.149.2.3      yamt 	if (PMC_ENABLED(l->l_proc)) {
    529  1.149.2.3      yamt 		pmc_restore_context(l->l_proc);
    530  1.149.2.2      yamt 	}
    531      1.114  gmcgarry #endif
    532      1.110    briggs 
    533      1.110    briggs 	/*
    534       1.76   thorpej 	 * We're running again; record our new start time.  We might
    535  1.149.2.3      yamt 	 * be running on a new CPU now, so don't use the cached
    536       1.76   thorpej 	 * schedstate_percpu pointer.
    537       1.76   thorpej 	 */
    538  1.149.2.3      yamt 	SYSCALL_TIME_WAKEUP(l);
    539  1.149.2.5      yamt 	KASSERT(curlwp == l);
    540      1.122   thorpej 	KDASSERT(l->l_cpu == curcpu());
    541  1.149.2.4      yamt 	LOCKDEBUG_BARRIER(NULL, 1);
    542  1.149.2.2      yamt 
    543      1.122   thorpej 	return retval;
    544       1.26       cgd }
    545       1.26       cgd 
    546       1.26       cgd /*
    547  1.149.2.3      yamt  * Change process state to be runnable, placing it on the run queue if it is
    548  1.149.2.3      yamt  * in memory, and awakening the swapper if it isn't in memory.
    549  1.149.2.3      yamt  *
    550  1.149.2.3      yamt  * Call with the process and LWP locked.  Will return with the LWP unlocked.
    551       1.26       cgd  */
    552       1.26       cgd void
    553      1.122   thorpej setrunnable(struct lwp *l)
    554       1.26       cgd {
    555      1.122   thorpej 	struct proc *p = l->l_proc;
    556  1.149.2.3      yamt 	sigset_t *ss;
    557       1.26       cgd 
    558  1.149.2.4      yamt 	KASSERT((l->l_flag & LW_IDLE) == 0);
    559  1.149.2.3      yamt 	KASSERT(mutex_owned(&p->p_smutex));
    560  1.149.2.3      yamt 	KASSERT(lwp_locked(l, NULL));
    561       1.83   thorpej 
    562      1.122   thorpej 	switch (l->l_stat) {
    563      1.122   thorpej 	case LSSTOP:
    564       1.33   mycroft 		/*
    565       1.33   mycroft 		 * If we're being traced (possibly because someone attached us
    566       1.33   mycroft 		 * while we were stopped), check for a signal from the debugger.
    567       1.33   mycroft 		 */
    568  1.149.2.3      yamt 		if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xstat != 0) {
    569  1.149.2.3      yamt 			if ((sigprop[p->p_xstat] & SA_TOLWP) != 0)
    570  1.149.2.3      yamt 				ss = &l->l_sigpend.sp_set;
    571  1.149.2.3      yamt 			else
    572  1.149.2.3      yamt 				ss = &p->p_sigpend.sp_set;
    573  1.149.2.3      yamt 			sigaddset(ss, p->p_xstat);
    574  1.149.2.3      yamt 			signotify(l);
    575       1.53   mycroft 		}
    576  1.149.2.3      yamt 		p->p_nrlwps++;
    577      1.122   thorpej 		break;
    578      1.122   thorpej 	case LSSUSPENDED:
    579  1.149.2.3      yamt 		l->l_flag &= ~LW_WSUSPEND;
    580  1.149.2.3      yamt 		p->p_nrlwps++;
    581  1.149.2.4      yamt 		cv_broadcast(&p->p_lwpcv);
    582  1.149.2.3      yamt 		break;
    583  1.149.2.3      yamt 	case LSSLEEP:
    584  1.149.2.3      yamt 		KASSERT(l->l_wchan != NULL);
    585       1.26       cgd 		break;
    586  1.149.2.3      yamt 	default:
    587  1.149.2.3      yamt 		panic("setrunnable: lwp %p state was %d", l, l->l_stat);
    588       1.26       cgd 	}
    589      1.139        cl 
    590  1.149.2.3      yamt 	/*
    591  1.149.2.3      yamt 	 * If the LWP was sleeping interruptably, then it's OK to start it
    592  1.149.2.3      yamt 	 * again.  If not, mark it as still sleeping.
    593  1.149.2.3      yamt 	 */
    594  1.149.2.3      yamt 	if (l->l_wchan != NULL) {
    595  1.149.2.3      yamt 		l->l_stat = LSSLEEP;
    596  1.149.2.3      yamt 		/* lwp_unsleep() will release the lock. */
    597  1.149.2.3      yamt 		lwp_unsleep(l);
    598  1.149.2.3      yamt 		return;
    599  1.149.2.3      yamt 	}
    600      1.139        cl 
    601  1.149.2.3      yamt 	/*
    602  1.149.2.3      yamt 	 * If the LWP is still on the CPU, mark it as LSONPROC.  It may be
    603  1.149.2.3      yamt 	 * about to call mi_switch(), in which case it will yield.
    604  1.149.2.3      yamt 	 */
    605  1.149.2.4      yamt 	if ((l->l_flag & LW_RUNNING) != 0) {
    606  1.149.2.3      yamt 		l->l_stat = LSONPROC;
    607  1.149.2.3      yamt 		l->l_slptime = 0;
    608  1.149.2.3      yamt 		lwp_unlock(l);
    609  1.149.2.3      yamt 		return;
    610  1.149.2.3      yamt 	}
    611      1.122   thorpej 
    612  1.149.2.3      yamt 	/*
    613  1.149.2.3      yamt 	 * Set the LWP runnable.  If it's swapped out, we need to wake the swapper
    614  1.149.2.3      yamt 	 * to bring it back in.  Otherwise, enter it into a run queue.
    615  1.149.2.3      yamt 	 */
    616  1.149.2.4      yamt 	if (l->l_mutex != l->l_cpu->ci_schedstate.spc_mutex) {
    617  1.149.2.4      yamt 		spc_lock(l->l_cpu);
    618  1.149.2.4      yamt 		lwp_unlock_to(l, l->l_cpu->ci_schedstate.spc_mutex);
    619  1.149.2.4      yamt 	}
    620  1.149.2.4      yamt 
    621  1.149.2.4      yamt 	sched_setrunnable(l);
    622  1.149.2.3      yamt 	l->l_stat = LSRUN;
    623      1.122   thorpej 	l->l_slptime = 0;
    624  1.149.2.3      yamt 
    625  1.149.2.3      yamt 	if (l->l_flag & LW_INMEM) {
    626  1.149.2.4      yamt 		sched_enqueue(l, false);
    627  1.149.2.4      yamt 		resched_cpu(l);
    628  1.149.2.3      yamt 		lwp_unlock(l);
    629  1.149.2.3      yamt 	} else {
    630  1.149.2.3      yamt 		lwp_unlock(l);
    631  1.149.2.3      yamt 		uvm_kick_scheduler();
    632  1.149.2.3      yamt 	}
    633       1.26       cgd }
    634       1.26       cgd 
    635       1.26       cgd /*
    636  1.149.2.3      yamt  * suspendsched:
    637  1.149.2.3      yamt  *
    638  1.149.2.3      yamt  *	Convert all non-L_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED.
    639  1.149.2.3      yamt  */
    640       1.94    bouyer void
    641  1.149.2.3      yamt suspendsched(void)
    642       1.94    bouyer {
    643  1.149.2.3      yamt 	CPU_INFO_ITERATOR cii;
    644  1.149.2.3      yamt 	struct cpu_info *ci;
    645      1.122   thorpej 	struct lwp *l;
    646  1.149.2.3      yamt 	struct proc *p;
    647       1.94    bouyer 
    648       1.94    bouyer 	/*
    649  1.149.2.3      yamt 	 * We do this by process in order not to violate the locking rules.
    650       1.94    bouyer 	 */
    651  1.149.2.3      yamt 	mutex_enter(&proclist_mutex);
    652  1.149.2.3      yamt 	PROCLIST_FOREACH(p, &allproc) {
    653  1.149.2.3      yamt 		mutex_enter(&p->p_smutex);
    654  1.149.2.3      yamt 
    655  1.149.2.3      yamt 		if ((p->p_flag & PK_SYSTEM) != 0) {
    656  1.149.2.3      yamt 			mutex_exit(&p->p_smutex);
    657       1.94    bouyer 			continue;
    658  1.149.2.3      yamt 		}
    659  1.149.2.3      yamt 
    660  1.149.2.3      yamt 		p->p_stat = SSTOP;
    661  1.149.2.3      yamt 
    662  1.149.2.3      yamt 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
    663  1.149.2.3      yamt 			if (l == curlwp)
    664  1.149.2.3      yamt 				continue;
    665  1.149.2.3      yamt 
    666  1.149.2.3      yamt 			lwp_lock(l);
    667      1.122   thorpej 
    668       1.97     enami 			/*
    669  1.149.2.3      yamt 			 * Set L_WREBOOT so that the LWP will suspend itself
    670  1.149.2.3      yamt 			 * when it tries to return to user mode.  We want to
    671  1.149.2.3      yamt 			 * try and get to get as many LWPs as possible to
    672  1.149.2.3      yamt 			 * the user / kernel boundary, so that they will
    673  1.149.2.3      yamt 			 * release any locks that they hold.
    674       1.97     enami 			 */
    675  1.149.2.3      yamt 			l->l_flag |= (LW_WREBOOT | LW_WSUSPEND);
    676  1.149.2.3      yamt 
    677  1.149.2.3      yamt 			if (l->l_stat == LSSLEEP &&
    678  1.149.2.3      yamt 			    (l->l_flag & LW_SINTR) != 0) {
    679  1.149.2.3      yamt 				/* setrunnable() will release the lock. */
    680  1.149.2.3      yamt 				setrunnable(l);
    681  1.149.2.3      yamt 				continue;
    682  1.149.2.3      yamt 			}
    683  1.149.2.3      yamt 
    684  1.149.2.3      yamt 			lwp_unlock(l);
    685       1.94    bouyer 		}
    686  1.149.2.3      yamt 
    687  1.149.2.3      yamt 		mutex_exit(&p->p_smutex);
    688       1.94    bouyer 	}
    689  1.149.2.3      yamt 	mutex_exit(&proclist_mutex);
    690  1.149.2.3      yamt 
    691  1.149.2.3      yamt 	/*
    692  1.149.2.3      yamt 	 * Kick all CPUs to make them preempt any LWPs running in user mode.
    693  1.149.2.3      yamt 	 * They'll trap into the kernel and suspend themselves in userret().
    694  1.149.2.3      yamt 	 */
    695  1.149.2.3      yamt 	for (CPU_INFO_FOREACH(cii, ci))
    696  1.149.2.4      yamt 		cpu_need_resched(ci, 0);
    697  1.149.2.3      yamt }
    698  1.149.2.3      yamt 
    699  1.149.2.3      yamt /*
    700  1.149.2.3      yamt  * sched_kpri:
    701  1.149.2.3      yamt  *
    702  1.149.2.3      yamt  *	Scale a priority level to a kernel priority level, usually
    703  1.149.2.3      yamt  *	for an LWP that is about to sleep.
    704  1.149.2.3      yamt  */
    705  1.149.2.4      yamt pri_t
    706  1.149.2.3      yamt sched_kpri(struct lwp *l)
    707  1.149.2.3      yamt {
    708  1.149.2.3      yamt 	/*
    709  1.149.2.3      yamt 	 * Scale user priorities (127 -> 50) up to kernel priorities
    710  1.149.2.3      yamt 	 * in the range (49 -> 8).  Reserve the top 8 kernel priorities
    711  1.149.2.3      yamt 	 * for high priority kthreads.  Kernel priorities passed in
    712  1.149.2.3      yamt 	 * are left "as is".  XXX This is somewhat arbitrary.
    713  1.149.2.3      yamt 	 */
    714  1.149.2.3      yamt 	static const uint8_t kpri_tab[] = {
    715  1.149.2.3      yamt 		 0,   1,   2,   3,   4,   5,   6,   7,
    716  1.149.2.3      yamt 		 8,   9,  10,  11,  12,  13,  14,  15,
    717  1.149.2.3      yamt 		16,  17,  18,  19,  20,  21,  22,  23,
    718  1.149.2.3      yamt 		24,  25,  26,  27,  28,  29,  30,  31,
    719  1.149.2.3      yamt 		32,  33,  34,  35,  36,  37,  38,  39,
    720  1.149.2.3      yamt 		40,  41,  42,  43,  44,  45,  46,  47,
    721  1.149.2.3      yamt 		48,  49,   8,   8,   9,   9,  10,  10,
    722  1.149.2.3      yamt 		11,  11,  12,  12,  13,  14,  14,  15,
    723  1.149.2.3      yamt 		15,  16,  16,  17,  17,  18,  18,  19,
    724  1.149.2.3      yamt 		20,  20,  21,  21,  22,  22,  23,  23,
    725  1.149.2.3      yamt 		24,  24,  25,  26,  26,  27,  27,  28,
    726  1.149.2.3      yamt 		28,  29,  29,  30,  30,  31,  32,  32,
    727  1.149.2.3      yamt 		33,  33,  34,  34,  35,  35,  36,  36,
    728  1.149.2.3      yamt 		37,  38,  38,  39,  39,  40,  40,  41,
    729  1.149.2.3      yamt 		41,  42,  42,  43,  44,  44,  45,  45,
    730  1.149.2.3      yamt 		46,  46,  47,  47,  48,  48,  49,  49,
    731  1.149.2.3      yamt 	};
    732  1.149.2.3      yamt 
    733  1.149.2.4      yamt 	return (pri_t)kpri_tab[l->l_usrpri];
    734  1.149.2.3      yamt }
    735  1.149.2.3      yamt 
    736  1.149.2.3      yamt /*
    737  1.149.2.3      yamt  * sched_unsleep:
    738  1.149.2.3      yamt  *
    739  1.149.2.3      yamt  *	The is called when the LWP has not been awoken normally but instead
    740  1.149.2.3      yamt  *	interrupted: for example, if the sleep timed out.  Because of this,
    741  1.149.2.3      yamt  *	it's not a valid action for running or idle LWPs.
    742  1.149.2.3      yamt  */
    743  1.149.2.4      yamt static void
    744  1.149.2.3      yamt sched_unsleep(struct lwp *l)
    745  1.149.2.3      yamt {
    746  1.149.2.3      yamt 
    747  1.149.2.3      yamt 	lwp_unlock(l);
    748  1.149.2.3      yamt 	panic("sched_unsleep");
    749  1.149.2.3      yamt }
    750  1.149.2.3      yamt 
    751  1.149.2.4      yamt inline void
    752  1.149.2.4      yamt resched_cpu(struct lwp *l)
    753  1.149.2.3      yamt {
    754  1.149.2.4      yamt 	struct cpu_info *ci;
    755  1.149.2.4      yamt 	const pri_t pri = lwp_eprio(l);
    756  1.149.2.3      yamt 
    757  1.149.2.4      yamt 	/*
    758  1.149.2.4      yamt 	 * XXXSMP
    759  1.149.2.4      yamt 	 * Since l->l_cpu persists across a context switch,
    760  1.149.2.4      yamt 	 * this gives us *very weak* processor affinity, in
    761  1.149.2.4      yamt 	 * that we notify the CPU on which the process last
    762  1.149.2.4      yamt 	 * ran that it should try to switch.
    763  1.149.2.4      yamt 	 *
    764  1.149.2.4      yamt 	 * This does not guarantee that the process will run on
    765  1.149.2.4      yamt 	 * that processor next, because another processor might
    766  1.149.2.4      yamt 	 * grab it the next time it performs a context switch.
    767  1.149.2.4      yamt 	 *
    768  1.149.2.4      yamt 	 * This also does not handle the case where its last
    769  1.149.2.4      yamt 	 * CPU is running a higher-priority process, but every
    770  1.149.2.4      yamt 	 * other CPU is running a lower-priority process.  There
    771  1.149.2.4      yamt 	 * are ways to handle this situation, but they're not
    772  1.149.2.4      yamt 	 * currently very pretty, and we also need to weigh the
    773  1.149.2.4      yamt 	 * cost of moving a process from one CPU to another.
    774  1.149.2.4      yamt 	 */
    775  1.149.2.4      yamt 	ci = (l->l_cpu != NULL) ? l->l_cpu : curcpu();
    776  1.149.2.4      yamt 	if (pri < ci->ci_schedstate.spc_curpriority)
    777  1.149.2.4      yamt 		cpu_need_resched(ci, 0);
    778  1.149.2.4      yamt }
    779  1.149.2.3      yamt 
    780  1.149.2.4      yamt static void
    781  1.149.2.4      yamt sched_changepri(struct lwp *l, pri_t pri)
    782  1.149.2.4      yamt {
    783  1.149.2.4      yamt 
    784  1.149.2.4      yamt 	KASSERT(lwp_locked(l, NULL));
    785  1.149.2.3      yamt 
    786  1.149.2.4      yamt 	l->l_usrpri = pri;
    787  1.149.2.3      yamt 	if (l->l_priority < PUSER)
    788  1.149.2.3      yamt 		return;
    789  1.149.2.4      yamt 
    790  1.149.2.4      yamt 	if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
    791  1.149.2.3      yamt 		l->l_priority = pri;
    792  1.149.2.3      yamt 		return;
    793  1.149.2.1      yamt 	}
    794  1.149.2.3      yamt 
    795  1.149.2.4      yamt 	KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
    796  1.149.2.4      yamt 
    797  1.149.2.4      yamt 	sched_dequeue(l);
    798  1.149.2.3      yamt 	l->l_priority = pri;
    799  1.149.2.4      yamt 	sched_enqueue(l, false);
    800  1.149.2.4      yamt 	resched_cpu(l);
    801  1.149.2.1      yamt }
    802  1.149.2.1      yamt 
    803      1.146      matt static void
    804  1.149.2.4      yamt sched_lendpri(struct lwp *l, pri_t pri)
    805      1.146      matt {
    806  1.149.2.4      yamt 
    807  1.149.2.4      yamt 	KASSERT(lwp_locked(l, NULL));
    808  1.149.2.4      yamt 
    809  1.149.2.4      yamt 	if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
    810  1.149.2.4      yamt 		l->l_inheritedprio = pri;
    811  1.149.2.4      yamt 		return;
    812      1.146      matt 	}
    813  1.149.2.4      yamt 
    814  1.149.2.4      yamt 	KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
    815  1.149.2.4      yamt 
    816  1.149.2.4      yamt 	sched_dequeue(l);
    817  1.149.2.4      yamt 	l->l_inheritedprio = pri;
    818  1.149.2.4      yamt 	sched_enqueue(l, false);
    819  1.149.2.4      yamt 	resched_cpu(l);
    820      1.146      matt }
    821      1.146      matt 
    822  1.149.2.4      yamt struct lwp *
    823  1.149.2.4      yamt syncobj_noowner(wchan_t wchan)
    824      1.113  gmcgarry {
    825  1.149.2.3      yamt 
    826  1.149.2.4      yamt 	return NULL;
    827      1.113  gmcgarry }
    828      1.113  gmcgarry 
    829  1.149.2.4      yamt 
    830  1.149.2.4      yamt /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
    831  1.149.2.4      yamt fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;		/* exp(-1/20) */
    832  1.149.2.4      yamt 
    833  1.149.2.3      yamt /*
    834  1.149.2.4      yamt  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
    835  1.149.2.4      yamt  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
    836  1.149.2.4      yamt  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
    837  1.149.2.4      yamt  *
    838  1.149.2.4      yamt  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
    839  1.149.2.4      yamt  *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
    840  1.149.2.4      yamt  *
    841  1.149.2.4      yamt  * If you dont want to bother with the faster/more-accurate formula, you
    842  1.149.2.4      yamt  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
    843  1.149.2.4      yamt  * (more general) method of calculating the %age of CPU used by a process.
    844  1.149.2.3      yamt  */
    845  1.149.2.4      yamt #define	CCPU_SHIFT	(FSHIFT + 1)
    846  1.149.2.4      yamt 
    847  1.149.2.4      yamt /*
    848  1.149.2.4      yamt  * sched_pstats:
    849  1.149.2.4      yamt  *
    850  1.149.2.4      yamt  * Update process statistics and check CPU resource allocation.
    851  1.149.2.4      yamt  * Call scheduler-specific hook to eventually adjust process/LWP
    852  1.149.2.4      yamt  * priorities.
    853  1.149.2.4      yamt  */
    854  1.149.2.4      yamt /* ARGSUSED */
    855      1.113  gmcgarry void
    856  1.149.2.4      yamt sched_pstats(void *arg)
    857      1.113  gmcgarry {
    858  1.149.2.4      yamt 	struct rlimit *rlim;
    859  1.149.2.4      yamt 	struct lwp *l;
    860  1.149.2.4      yamt 	struct proc *p;
    861  1.149.2.4      yamt 	int minslp, sig, clkhz;
    862  1.149.2.4      yamt 	long runtm;
    863  1.149.2.3      yamt 
    864  1.149.2.4      yamt 	sched_pstats_ticks++;
    865  1.149.2.3      yamt 
    866  1.149.2.4      yamt 	mutex_enter(&proclist_mutex);
    867  1.149.2.4      yamt 	PROCLIST_FOREACH(p, &allproc) {
    868  1.149.2.4      yamt 		/*
    869  1.149.2.4      yamt 		 * Increment time in/out of memory and sleep time (if
    870  1.149.2.4      yamt 		 * sleeping).  We ignore overflow; with 16-bit int's
    871  1.149.2.4      yamt 		 * (remember them?) overflow takes 45 days.
    872  1.149.2.4      yamt 		 */
    873  1.149.2.4      yamt 		minslp = 2;
    874  1.149.2.4      yamt 		mutex_enter(&p->p_smutex);
    875  1.149.2.4      yamt 		mutex_spin_enter(&p->p_stmutex);
    876  1.149.2.4      yamt 		runtm = p->p_rtime.tv_sec;
    877  1.149.2.4      yamt 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
    878  1.149.2.4      yamt 			if ((l->l_flag & LW_IDLE) != 0)
    879  1.149.2.4      yamt 				continue;
    880  1.149.2.4      yamt 			lwp_lock(l);
    881  1.149.2.4      yamt 			runtm += l->l_rtime.tv_sec;
    882  1.149.2.4      yamt 			l->l_swtime++;
    883  1.149.2.4      yamt 			if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
    884  1.149.2.4      yamt 			    l->l_stat == LSSUSPENDED) {
    885  1.149.2.4      yamt 				l->l_slptime++;
    886  1.149.2.4      yamt 				minslp = min(minslp, l->l_slptime);
    887  1.149.2.4      yamt 			} else
    888  1.149.2.4      yamt 				minslp = 0;
    889  1.149.2.5      yamt 			sched_pstats_hook(l);
    890  1.149.2.4      yamt 			lwp_unlock(l);
    891  1.149.2.4      yamt 
    892  1.149.2.4      yamt 			/*
    893  1.149.2.4      yamt 			 * p_pctcpu is only for ps.
    894  1.149.2.4      yamt 			 */
    895  1.149.2.4      yamt 			l->l_pctcpu = (l->l_pctcpu * ccpu) >> FSHIFT;
    896  1.149.2.4      yamt 			if (l->l_slptime < 1) {
    897  1.149.2.4      yamt 				clkhz = stathz != 0 ? stathz : hz;
    898  1.149.2.4      yamt #if	(FSHIFT >= CCPU_SHIFT)
    899  1.149.2.4      yamt 				l->l_pctcpu += (clkhz == 100) ?
    900  1.149.2.4      yamt 				    ((fixpt_t)l->l_cpticks) <<
    901  1.149.2.4      yamt 				        (FSHIFT - CCPU_SHIFT) :
    902  1.149.2.4      yamt 				    100 * (((fixpt_t) p->p_cpticks)
    903  1.149.2.4      yamt 				        << (FSHIFT - CCPU_SHIFT)) / clkhz;
    904  1.149.2.4      yamt #else
    905  1.149.2.4      yamt 				l->l_pctcpu += ((FSCALE - ccpu) *
    906  1.149.2.4      yamt 				    (l->l_cpticks * FSCALE / clkhz)) >> FSHIFT;
    907      1.146      matt #endif
    908  1.149.2.4      yamt 				l->l_cpticks = 0;
    909  1.149.2.4      yamt 			}
    910  1.149.2.4      yamt 		}
    911  1.149.2.5      yamt 
    912  1.149.2.4      yamt 		p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
    913  1.149.2.5      yamt #ifdef SCHED_4BSD
    914  1.149.2.5      yamt 		/*
    915  1.149.2.5      yamt 		 * XXX: Workaround - belongs to sched_4bsd.c
    916  1.149.2.5      yamt 		 * If the process has slept the entire second,
    917  1.149.2.5      yamt 		 * stop recalculating its priority until it wakes up.
    918  1.149.2.5      yamt 		 */
    919  1.149.2.5      yamt 		if (minslp <= 1) {
    920  1.149.2.5      yamt 			extern fixpt_t decay_cpu(fixpt_t, fixpt_t);
    921  1.149.2.5      yamt 
    922  1.149.2.5      yamt 			fixpt_t loadfac = 2 * (averunnable.ldavg[0]);
    923  1.149.2.5      yamt 			p->p_estcpu = decay_cpu(loadfac, p->p_estcpu);
    924  1.149.2.5      yamt 		}
    925  1.149.2.5      yamt #endif
    926  1.149.2.4      yamt 		mutex_spin_exit(&p->p_stmutex);
    927  1.149.2.3      yamt 
    928  1.149.2.4      yamt 		/*
    929  1.149.2.4      yamt 		 * Check if the process exceeds its CPU resource allocation.
    930  1.149.2.4      yamt 		 * If over max, kill it.
    931  1.149.2.4      yamt 		 */
    932  1.149.2.4      yamt 		rlim = &p->p_rlimit[RLIMIT_CPU];
    933  1.149.2.4      yamt 		sig = 0;
    934  1.149.2.4      yamt 		if (runtm >= rlim->rlim_cur) {
    935  1.149.2.4      yamt 			if (runtm >= rlim->rlim_max)
    936  1.149.2.4      yamt 				sig = SIGKILL;
    937  1.149.2.4      yamt 			else {
    938  1.149.2.4      yamt 				sig = SIGXCPU;
    939  1.149.2.4      yamt 				if (rlim->rlim_cur < rlim->rlim_max)
    940  1.149.2.4      yamt 					rlim->rlim_cur += 5;
    941  1.149.2.4      yamt 			}
    942  1.149.2.4      yamt 		}
    943  1.149.2.4      yamt 		mutex_exit(&p->p_smutex);
    944  1.149.2.4      yamt 		if (sig) {
    945  1.149.2.4      yamt 			psignal(p, sig);
    946  1.149.2.4      yamt 		}
    947  1.149.2.3      yamt 	}
    948  1.149.2.4      yamt 	mutex_exit(&proclist_mutex);
    949  1.149.2.4      yamt 	uvm_meter();
    950  1.149.2.4      yamt 	cv_wakeup(&lbolt);
    951  1.149.2.4      yamt 	callout_schedule(&sched_pstats_ch, hz);
    952      1.113  gmcgarry }
    953      1.113  gmcgarry 
    954  1.149.2.4      yamt void
    955  1.149.2.4      yamt sched_init(void)
    956  1.149.2.4      yamt {
    957  1.149.2.4      yamt 
    958  1.149.2.4      yamt 	cv_init(&lbolt, "lbolt");
    959  1.149.2.4      yamt 	callout_init(&sched_pstats_ch, 0);
    960  1.149.2.4      yamt 	callout_setfunc(&sched_pstats_ch, sched_pstats, NULL);
    961  1.149.2.4      yamt 	sched_setup();
    962  1.149.2.4      yamt 	sched_pstats(NULL);
    963  1.149.2.4      yamt }
    964