sys/kern/kern_synch.c

1.149.2.5      yamt /*	$NetBSD: kern_synch.c,v 1.149.2.5 2007/10/27 11:35:29 yamt Exp $	*/
     1.63   thorpej
     1.63   thorpej /*-
1.149.2.3      yamt  * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc.
     1.63   thorpej  * All rights reserved.
     1.63   thorpej  *
     1.63   thorpej  * This code is derived from software contributed to The NetBSD Foundation
     1.63   thorpej  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
1.149.2.4      yamt  * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and
1.149.2.4      yamt  * Daniel Sieger.
     1.63   thorpej  *
     1.63   thorpej  * Redistribution and use in source and binary forms, with or without
     1.63   thorpej  * modification, are permitted provided that the following conditions
     1.63   thorpej  * are met:
     1.63   thorpej  * 1. Redistributions of source code must retain the above copyright
     1.63   thorpej  *    notice, this list of conditions and the following disclaimer.
     1.63   thorpej  * 2. Redistributions in binary form must reproduce the above copyright
     1.63   thorpej  *    notice, this list of conditions and the following disclaimer in the
     1.63   thorpej  *    documentation and/or other materials provided with the distribution.
     1.63   thorpej  * 3. All advertising materials mentioning features or use of this software
     1.63   thorpej  *    must display the following acknowledgement:
     1.63   thorpej  *	This product includes software developed by the NetBSD
     1.63   thorpej  *	Foundation, Inc. and its contributors.
     1.63   thorpej  * 4. Neither the name of The NetBSD Foundation nor the names of its
     1.63   thorpej  *    contributors may be used to endorse or promote products derived
     1.63   thorpej  *    from this software without specific prior written permission.
     1.63   thorpej  *
     1.63   thorpej  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     1.63   thorpej  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     1.63   thorpej  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     1.63   thorpej  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     1.63   thorpej  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     1.63   thorpej  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     1.63   thorpej  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     1.63   thorpej  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     1.63   thorpej  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     1.63   thorpej  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     1.63   thorpej  * POSSIBILITY OF SUCH DAMAGE.
     1.63   thorpej  */
     1.26       cgd
     1.26       cgd /*-
     1.26       cgd  * Copyright (c) 1982, 1986, 1990, 1991, 1993
     1.26       cgd  *	The Regents of the University of California.  All rights reserved.
     1.26       cgd  * (c) UNIX System Laboratories, Inc.
     1.26       cgd  * All or some portions of this file are derived from material licensed
     1.26       cgd  * to the University of California by American Telephone and Telegraph
     1.26       cgd  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
     1.26       cgd  * the permission of UNIX System Laboratories, Inc.
     1.26       cgd  *
     1.26       cgd  * Redistribution and use in source and binary forms, with or without
     1.26       cgd  * modification, are permitted provided that the following conditions
     1.26       cgd  * are met:
     1.26       cgd  * 1. Redistributions of source code must retain the above copyright
     1.26       cgd  *    notice, this list of conditions and the following disclaimer.
     1.26       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     1.26       cgd  *    notice, this list of conditions and the following disclaimer in the
     1.26       cgd  *    documentation and/or other materials provided with the distribution.
    1.136       agc  * 3. Neither the name of the University nor the names of its contributors
     1.26       cgd  *    may be used to endorse or promote products derived from this software
     1.26       cgd  *    without specific prior written permission.
     1.26       cgd  *
     1.26       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     1.26       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     1.26       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     1.26       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     1.26       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     1.26       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     1.26       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     1.26       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     1.26       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     1.26       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     1.26       cgd  * SUCH DAMAGE.
     1.26       cgd  *
     1.50      fvdl  *	@(#)kern_synch.c	8.9 (Berkeley) 5/19/95
     1.26       cgd  */
    1.106     lukem
    1.106     lukem #include <sys/cdefs.h>
1.149.2.5      yamt __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.149.2.5 2007/10/27 11:35:29 yamt Exp $");
     1.48       mrg
    1.109      yamt #include "opt_kstack.h"
     1.82   thorpej #include "opt_lockdebug.h"
     1.83   thorpej #include "opt_multiprocessor.h"
    1.110    briggs #include "opt_perfctrs.h"
     1.26       cgd
1.149.2.3      yamt #define	__MUTEX_PRIVATE
1.149.2.3      yamt
     1.26       cgd #include <sys/param.h>
     1.26       cgd #include <sys/systm.h>
     1.26       cgd #include <sys/proc.h>
     1.26       cgd #include <sys/kernel.h>
    1.111    briggs #if defined(PERFCTRS)
    1.110    briggs #include <sys/pmc.h>
    1.111    briggs #endif
1.149.2.4      yamt #include <sys/cpu.h>
     1.26       cgd #include <sys/resourcevar.h>
     1.55      ross #include <sys/sched.h>
1.149.2.3      yamt #include <sys/syscall_stats.h>
1.149.2.3      yamt #include <sys/sleepq.h>
1.149.2.3      yamt #include <sys/lockdebug.h>
1.149.2.4      yamt #include <sys/evcnt.h>
1.149.2.5      yamt #include <sys/intr.h>
     1.47       mrg
     1.47       mrg #include <uvm/uvm_extern.h>
     1.47       mrg
1.149.2.4      yamt callout_t sched_pstats_ch;
1.149.2.4      yamt unsigned int sched_pstats_ticks;
     1.34  christos
1.149.2.4      yamt kcondvar_t	lbolt;			/* once a second sleep address */
     1.26       cgd
1.149.2.4      yamt static void	sched_unsleep(struct lwp *);
1.149.2.4      yamt static void	sched_changepri(struct lwp *, pri_t);
1.149.2.4      yamt static void	sched_lendpri(struct lwp *, pri_t);
    1.122   thorpej
1.149.2.3      yamt syncobj_t sleep_syncobj = {
1.149.2.3      yamt 	SOBJ_SLEEPQ_SORTED,
1.149.2.3      yamt 	sleepq_unsleep,
1.149.2.4      yamt 	sleepq_changepri,
1.149.2.4      yamt 	sleepq_lendpri,
1.149.2.4      yamt 	syncobj_noowner,
1.149.2.3      yamt };
1.149.2.3      yamt
1.149.2.3      yamt syncobj_t sched_syncobj = {
1.149.2.3      yamt 	SOBJ_SLEEPQ_SORTED,
1.149.2.3      yamt 	sched_unsleep,
1.149.2.4      yamt 	sched_changepri,
1.149.2.4      yamt 	sched_lendpri,
1.149.2.4      yamt 	syncobj_noowner,
1.149.2.3      yamt };
    1.122   thorpej
     1.26       cgd /*
1.149.2.3      yamt  * During autoconfiguration or after a panic, a sleep will simply lower the
1.149.2.3      yamt  * priority briefly to allow interrupts, then return.  The priority to be
1.149.2.3      yamt  * used (safepri) is machine-dependent, thus this value is initialized and
1.149.2.3      yamt  * maintained in the machine-dependent layers.  This priority will typically
1.149.2.3      yamt  * be 0, or the lowest priority that is safe for use on the interrupt stack;
1.149.2.3      yamt  * it can be made higher to block network software interrupts after panics.
     1.26       cgd  */
1.149.2.3      yamt int	safepri;
     1.26       cgd
     1.26       cgd /*
1.149.2.3      yamt  * OBSOLETE INTERFACE
1.149.2.3      yamt  *
     1.26       cgd  * General sleep call.  Suspends the current process until a wakeup is
     1.26       cgd  * performed on the specified identifier.  The process will then be made
1.149.2.3      yamt  * runnable with the specified priority.  Sleeps at most timo/hz seconds (0
1.149.2.3      yamt  * means no timeout).  If pri includes PCATCH flag, signals are checked
     1.26       cgd  * before and after sleeping, else signals are not checked.  Returns 0 if
     1.26       cgd  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
     1.26       cgd  * signal needs to be delivered, ERESTART is returned if the current system
     1.26       cgd  * call should be restarted if possible, and EINTR is returned if the system
     1.26       cgd  * call should be interrupted by the signal (return EINTR).
     1.77   thorpej  *
1.149.2.3      yamt  * The interlock is held until we are on a sleep queue. The interlock will
1.149.2.3      yamt  * be locked before returning back to the caller unless the PNORELOCK flag
1.149.2.3      yamt  * is specified, in which case the interlock will always be unlocked upon
1.149.2.3      yamt  * return.
     1.26       cgd  */
     1.26       cgd int
1.149.2.4      yamt ltsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
1.149.2.3      yamt 	volatile struct simplelock *interlock)
     1.26       cgd {
    1.122   thorpej 	struct lwp *l = curlwp;
1.149.2.3      yamt 	sleepq_t *sq;
1.149.2.4      yamt 	int error;
     1.26       cgd
1.149.2.3      yamt 	if (sleepq_dontsleep(l)) {
1.149.2.3      yamt 		(void)sleepq_abort(NULL, 0);
1.149.2.3      yamt 		if ((priority & PNORELOCK) != 0)
     1.77   thorpej 			simple_unlock(interlock);
1.149.2.3      yamt 		return 0;
    1.122   thorpej 	}
     1.77   thorpej
1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, ident);
1.149.2.3      yamt 	sleepq_enter(sq, l);
1.149.2.4      yamt 	sleepq_enqueue(sq, priority & PRIMASK, ident, wmesg, &sleep_syncobj);
     1.77   thorpej
1.149.2.3      yamt 	if (interlock != NULL) {
1.149.2.3      yamt 		LOCK_ASSERT(simple_lock_held(interlock));
     1.77   thorpej 		simple_unlock(interlock);
     1.26       cgd 	}
    1.147     perry
1.149.2.4      yamt 	error = sleepq_block(timo, priority & PCATCH);
    1.139        cl
1.149.2.3      yamt 	if (interlock != NULL && (priority & PNORELOCK) == 0)
1.149.2.3      yamt 		simple_lock(interlock);
1.149.2.3      yamt
1.149.2.3      yamt 	return error;
    1.139        cl }
    1.139        cl
1.149.2.4      yamt int
1.149.2.4      yamt mtsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
1.149.2.4      yamt 	kmutex_t *mtx)
1.149.2.4      yamt {
1.149.2.4      yamt 	struct lwp *l = curlwp;
1.149.2.4      yamt 	sleepq_t *sq;
1.149.2.4      yamt 	int error;
1.149.2.4      yamt
1.149.2.4      yamt 	if (sleepq_dontsleep(l)) {
1.149.2.4      yamt 		(void)sleepq_abort(mtx, (priority & PNORELOCK) != 0);
1.149.2.4      yamt 		return 0;
1.149.2.4      yamt 	}
1.149.2.4      yamt
1.149.2.4      yamt 	sq = sleeptab_lookup(&sleeptab, ident);
1.149.2.4      yamt 	sleepq_enter(sq, l);
1.149.2.4      yamt 	sleepq_enqueue(sq, priority & PRIMASK, ident, wmesg, &sleep_syncobj);
1.149.2.4      yamt 	mutex_exit(mtx);
1.149.2.4      yamt 	error = sleepq_block(timo, priority & PCATCH);
1.149.2.4      yamt
1.149.2.4      yamt 	if ((priority & PNORELOCK) == 0)
1.149.2.4      yamt 		mutex_enter(mtx);
1.149.2.4      yamt
1.149.2.4      yamt 	return error;
1.149.2.4      yamt }
1.149.2.4      yamt
     1.26       cgd /*
1.149.2.3      yamt  * General sleep call for situations where a wake-up is not expected.
     1.63   thorpej  */
1.149.2.3      yamt int
1.149.2.3      yamt kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx)
     1.83   thorpej {
1.149.2.3      yamt 	struct lwp *l = curlwp;
1.149.2.3      yamt 	sleepq_t *sq;
1.149.2.3      yamt 	int error;
     1.83   thorpej
1.149.2.3      yamt 	if (sleepq_dontsleep(l))
1.149.2.3      yamt 		return sleepq_abort(NULL, 0);
     1.63   thorpej
1.149.2.3      yamt 	if (mtx != NULL)
1.149.2.3      yamt 		mutex_exit(mtx);
1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, l);
1.149.2.3      yamt 	sleepq_enter(sq, l);
1.149.2.4      yamt 	sleepq_enqueue(sq, sched_kpri(l), l, wmesg, &sleep_syncobj);
1.149.2.4      yamt 	error = sleepq_block(timo, intr);
1.149.2.3      yamt 	if (mtx != NULL)
1.149.2.3      yamt 		mutex_enter(mtx);
     1.83   thorpej
1.149.2.3      yamt 	return error;
     1.83   thorpej }
     1.83   thorpej
     1.63   thorpej /*
1.149.2.3      yamt  * OBSOLETE INTERFACE
1.149.2.3      yamt  *
     1.26       cgd  * Make all processes sleeping on the specified identifier runnable.
     1.26       cgd  */
     1.26       cgd void
1.149.2.3      yamt wakeup(wchan_t ident)
     1.26       cgd {
1.149.2.3      yamt 	sleepq_t *sq;
     1.83   thorpej
1.149.2.3      yamt 	if (cold)
1.149.2.3      yamt 		return;
     1.83   thorpej
1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, ident);
1.149.2.3      yamt 	sleepq_wake(sq, ident, (u_int)-1);
     1.63   thorpej }
     1.63   thorpej
     1.63   thorpej /*
1.149.2.3      yamt  * OBSOLETE INTERFACE
1.149.2.3      yamt  *
     1.63   thorpej  * Make the highest priority process first in line on the specified
     1.63   thorpej  * identifier runnable.
     1.63   thorpej  */
1.149.2.3      yamt void
1.149.2.3      yamt wakeup_one(wchan_t ident)
     1.63   thorpej {
1.149.2.3      yamt 	sleepq_t *sq;
     1.63   thorpej
1.149.2.3      yamt 	if (cold)
1.149.2.3      yamt 		return;
1.149.2.4      yamt
1.149.2.3      yamt 	sq = sleeptab_lookup(&sleeptab, ident);
1.149.2.3      yamt 	sleepq_wake(sq, ident, 1);
    1.117  gmcgarry }
    1.117  gmcgarry
1.149.2.3      yamt
    1.117  gmcgarry /*
    1.117  gmcgarry  * General yield call.  Puts the current process back on its run queue and
    1.117  gmcgarry  * performs a voluntary context switch.  Should only be called when the
1.149.2.5      yamt  * current process explicitly requests it (eg sched_yield(2)).
    1.117  gmcgarry  */
    1.117  gmcgarry void
    1.117  gmcgarry yield(void)
    1.117  gmcgarry {
    1.122   thorpej 	struct lwp *l = curlwp;
    1.117  gmcgarry
1.149.2.3      yamt 	KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
1.149.2.3      yamt 	lwp_lock(l);
1.149.2.4      yamt 	KASSERT(lwp_locked(l, &l->l_cpu->ci_schedstate.spc_lwplock));
1.149.2.4      yamt 	KASSERT(l->l_stat == LSONPROC);
1.149.2.5      yamt 	/* XXX Only do this for timeshared threads. */
1.149.2.5      yamt 	l->l_priority = MAXPRI;
1.149.2.4      yamt 	(void)mi_switch(l);
1.149.2.3      yamt 	KERNEL_LOCK(l->l_biglocks, l);
     1.69   thorpej }
     1.69   thorpej
     1.69   thorpej /*
     1.69   thorpej  * General preemption call.  Puts the current process back on its run queue
1.149.2.1      yamt  * and performs an involuntary context switch.
     1.69   thorpej  */
     1.69   thorpej void
1.149.2.3      yamt preempt(void)
     1.69   thorpej {
    1.122   thorpej 	struct lwp *l = curlwp;
     1.69   thorpej
1.149.2.3      yamt 	KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
1.149.2.3      yamt 	lwp_lock(l);
1.149.2.4      yamt 	KASSERT(lwp_locked(l, &l->l_cpu->ci_schedstate.spc_lwplock));
1.149.2.4      yamt 	KASSERT(l->l_stat == LSONPROC);
1.149.2.4      yamt 	l->l_priority = l->l_usrpri;
1.149.2.3      yamt 	l->l_nivcsw++;
1.149.2.4      yamt 	(void)mi_switch(l);
1.149.2.3      yamt 	KERNEL_LOCK(l->l_biglocks, l);
     1.69   thorpej }
     1.69   thorpej
     1.69   thorpej /*
1.149.2.4      yamt  * Compute the amount of time during which the current lwp was running.
    1.130   nathanw  *
1.149.2.4      yamt  * - update l_rtime unless it's an idle lwp.
1.149.2.4      yamt  */
1.149.2.4      yamt
1.149.2.5      yamt void
1.149.2.5      yamt updatertime(lwp_t *l, const struct timeval *tv)
1.149.2.4      yamt {
1.149.2.4      yamt 	long s, u;
1.149.2.4      yamt
1.149.2.5      yamt 	if ((l->l_flag & LW_IDLE) != 0)
1.149.2.4      yamt 		return;
1.149.2.4      yamt
1.149.2.5      yamt 	u = l->l_rtime.tv_usec + (tv->tv_usec - l->l_stime.tv_usec);
1.149.2.5      yamt 	s = l->l_rtime.tv_sec + (tv->tv_sec - l->l_stime.tv_sec);
1.149.2.4      yamt 	if (u < 0) {
1.149.2.4      yamt 		u += 1000000;
1.149.2.4      yamt 		s--;
1.149.2.4      yamt 	} else if (u >= 1000000) {
1.149.2.4      yamt 		u -= 1000000;
1.149.2.4      yamt 		s++;
1.149.2.4      yamt 	}
1.149.2.4      yamt 	l->l_rtime.tv_usec = u;
1.149.2.4      yamt 	l->l_rtime.tv_sec = s;
1.149.2.4      yamt }
1.149.2.4      yamt
1.149.2.4      yamt /*
1.149.2.4      yamt  * The machine independent parts of context switch.
1.149.2.4      yamt  *
1.149.2.4      yamt  * Returns 1 if another LWP was actually run.
     1.26       cgd  */
    1.122   thorpej int
1.149.2.5      yamt mi_switch(lwp_t *l)
     1.26       cgd {
     1.76   thorpej 	struct schedstate_percpu *spc;
1.149.2.4      yamt 	struct lwp *newl;
1.149.2.3      yamt 	int retval, oldspl;
1.149.2.5      yamt 	struct cpu_info *ci;
1.149.2.5      yamt 	struct timeval tv;
1.149.2.5      yamt 	bool returning;
     1.85  sommerfe
1.149.2.4      yamt 	KASSERT(lwp_locked(l, NULL));
1.149.2.4      yamt 	LOCKDEBUG_BARRIER(l->l_mutex, 1);
     1.76   thorpej
1.149.2.3      yamt #ifdef KSTACK_CHECK_MAGIC
1.149.2.3      yamt 	kstack_check_magic(l);
1.149.2.3      yamt #endif
1.149.2.3      yamt
1.149.2.5      yamt 	microtime(&tv);
1.149.2.5      yamt
1.149.2.3      yamt 	/*
1.149.2.3      yamt 	 * It's safe to read the per CPU schedstate unlocked here, as all we
1.149.2.3      yamt 	 * are after is the run time and that's guarenteed to have been last
1.149.2.3      yamt 	 * updated by this CPU.
1.149.2.3      yamt 	 */
1.149.2.5      yamt 	ci = l->l_cpu;
1.149.2.5      yamt 	KDASSERT(ci == curcpu());
     1.81   thorpej
     1.26       cgd 	/*
1.149.2.4      yamt 	 * Process is about to yield the CPU; clear the appropriate
1.149.2.4      yamt 	 * scheduling flags.
     1.26       cgd 	 */
1.149.2.5      yamt 	spc = &ci->ci_schedstate;
1.149.2.5      yamt 	returning = false;
1.149.2.4      yamt 	newl = NULL;
1.149.2.4      yamt
1.149.2.5      yamt 	/*
1.149.2.5      yamt 	 * If we have been asked to switch to a specific LWP, then there
1.149.2.5      yamt 	 * is no need to inspect the run queues.  If a soft interrupt is
1.149.2.5      yamt 	 * blocking, then return to the interrupted thread without adjusting
1.149.2.5      yamt 	 * VM context or its start time: neither have been changed in order
1.149.2.5      yamt 	 * to take the interrupt.
1.149.2.5      yamt 	 */
1.149.2.4      yamt 	if (l->l_switchto != NULL) {
1.149.2.5      yamt 		if ((l->l_flag & LW_INTR) != 0) {
1.149.2.5      yamt 			returning = true;
1.149.2.5      yamt 			softint_block(l);
1.149.2.5      yamt 			if ((l->l_flag & LW_TIMEINTR) != 0)
1.149.2.5      yamt 				updatertime(l, &tv);
1.149.2.5      yamt 		}
1.149.2.4      yamt 		newl = l->l_switchto;
1.149.2.4      yamt 		l->l_switchto = NULL;
     1.26       cgd 	}
1.149.2.3      yamt
1.149.2.3      yamt 	/* Count time spent in current system call */
1.149.2.5      yamt 	if (!returning) {
1.149.2.5      yamt 		SYSCALL_TIME_SLEEP(l);
     1.26       cgd
1.149.2.5      yamt 		/*
1.149.2.5      yamt 		 * XXXSMP If we are using h/w performance counters,
1.149.2.5      yamt 		 * save context.
1.149.2.5      yamt 		 */
1.149.2.3      yamt #if PERFCTRS
1.149.2.5      yamt 		if (PMC_ENABLED(l->l_proc)) {
1.149.2.5      yamt 			pmc_save_context(l->l_proc);
1.149.2.5      yamt 		}
    1.109      yamt #endif
1.149.2.5      yamt 		updatertime(l, &tv);
1.149.2.5      yamt 	}
    1.113  gmcgarry
    1.113  gmcgarry 	/*
1.149.2.3      yamt 	 * If on the CPU and we have gotten this far, then we must yield.
    1.113  gmcgarry 	 */
1.149.2.4      yamt 	mutex_spin_enter(spc->spc_mutex);
1.149.2.3      yamt 	KASSERT(l->l_stat != LSRUN);
1.149.2.3      yamt 	if (l->l_stat == LSONPROC) {
1.149.2.4      yamt 		KASSERT(lwp_locked(l, &spc->spc_lwplock));
1.149.2.4      yamt 		if ((l->l_flag & LW_IDLE) == 0) {
1.149.2.4      yamt 			l->l_stat = LSRUN;
1.149.2.4      yamt 			lwp_setlock(l, spc->spc_mutex);
1.149.2.4      yamt 			sched_enqueue(l, true);
1.149.2.4      yamt 		} else
1.149.2.4      yamt 			l->l_stat = LSIDL;
1.149.2.3      yamt 	}
1.149.2.3      yamt
1.149.2.3      yamt 	/*
1.149.2.5      yamt 	 * Let sched_nextlwp() select the LWP to run the CPU next.
1.149.2.4      yamt 	 * If no LWP is runnable, switch to the idle LWP.
1.149.2.5      yamt 	 * Note that spc_lwplock might not necessary be held.
1.149.2.3      yamt 	 */
1.149.2.4      yamt 	if (newl == NULL) {
1.149.2.4      yamt 		newl = sched_nextlwp();
1.149.2.4      yamt 		if (newl != NULL) {
1.149.2.4      yamt 			sched_dequeue(newl);
1.149.2.4      yamt 			KASSERT(lwp_locked(newl, spc->spc_mutex));
1.149.2.4      yamt 			newl->l_stat = LSONPROC;
1.149.2.5      yamt 			newl->l_cpu = ci;
1.149.2.4      yamt 			newl->l_flag |= LW_RUNNING;
1.149.2.4      yamt 			lwp_setlock(newl, &spc->spc_lwplock);
1.149.2.4      yamt 		} else {
1.149.2.5      yamt 			newl = ci->ci_data.cpu_idlelwp;
1.149.2.4      yamt 			newl->l_stat = LSONPROC;
1.149.2.4      yamt 			newl->l_flag |= LW_RUNNING;
1.149.2.4      yamt 		}
1.149.2.5      yamt 		ci->ci_want_resched = 0;
1.149.2.5      yamt 		spc->spc_flags &= ~SPCF_SWITCHCLEAR;
1.149.2.5      yamt 	}
1.149.2.5      yamt
1.149.2.5      yamt 	/* Update the new LWP's start time while it is still locked. */
1.149.2.5      yamt 	if (!returning) {
1.149.2.5      yamt 		newl->l_stime = tv;
1.149.2.5      yamt 		/*
1.149.2.5      yamt 		 * XXX The following may be done unlocked if newl != NULL
1.149.2.5      yamt 		 * above.
1.149.2.5      yamt 		 */
1.149.2.4      yamt 		newl->l_priority = newl->l_usrpri;
1.149.2.4      yamt 	}
1.149.2.3      yamt
1.149.2.5      yamt 	spc->spc_curpriority = newl->l_usrpri;
1.149.2.5      yamt
1.149.2.4      yamt 	if (l != newl) {
1.149.2.4      yamt 		struct lwp *prevlwp;
1.149.2.3      yamt
1.149.2.4      yamt 		/*
1.149.2.4      yamt 		 * If the old LWP has been moved to a run queue above,
1.149.2.4      yamt 		 * drop the general purpose LWP lock: it's now locked
1.149.2.4      yamt 		 * by the scheduler lock.
1.149.2.4      yamt 		 *
1.149.2.4      yamt 		 * Otherwise, drop the scheduler lock.  We're done with
1.149.2.4      yamt 		 * the run queues for now.
1.149.2.4      yamt 		 */
1.149.2.4      yamt 		if (l->l_mutex == spc->spc_mutex) {
1.149.2.4      yamt 			mutex_spin_exit(&spc->spc_lwplock);
1.149.2.4      yamt 		} else {
1.149.2.4      yamt 			mutex_spin_exit(spc->spc_mutex);
1.149.2.4      yamt 		}
1.149.2.4      yamt
1.149.2.4      yamt 		/* Unlocked, but for statistics only. */
1.149.2.4      yamt 		uvmexp.swtch++;
1.149.2.4      yamt
1.149.2.5      yamt 		/*
1.149.2.5      yamt 		 * Save old VM context, unless a soft interrupt
1.149.2.5      yamt 		 * handler is blocking.
1.149.2.5      yamt 		 */
1.149.2.5      yamt 		if (!returning)
1.149.2.5      yamt 			pmap_deactivate(l);
1.149.2.4      yamt
1.149.2.4      yamt 		/* Switch to the new LWP.. */
1.149.2.4      yamt 		l->l_ncsw++;
1.149.2.4      yamt 		l->l_flag &= ~LW_RUNNING;
1.149.2.5      yamt 		oldspl = MUTEX_SPIN_OLDSPL(ci);
1.149.2.4      yamt 		prevlwp = cpu_switchto(l, newl);
1.149.2.4      yamt
1.149.2.4      yamt 		/*
1.149.2.4      yamt 		 * .. we have switched away and are now back so we must
1.149.2.4      yamt 		 * be the new curlwp.  prevlwp is who we replaced.
1.149.2.4      yamt 		 */
1.149.2.4      yamt 		if (prevlwp != NULL) {
1.149.2.4      yamt 			curcpu()->ci_mtx_oldspl = oldspl;
1.149.2.4      yamt 			lwp_unlock(prevlwp);
1.149.2.4      yamt 		} else {
1.149.2.4      yamt 			splx(oldspl);
1.149.2.4      yamt 		}
1.149.2.3      yamt
1.149.2.4      yamt 		/* Restore VM context. */
1.149.2.4      yamt 		pmap_activate(l);
1.149.2.4      yamt 		retval = 1;
1.149.2.4      yamt 	} else {
1.149.2.4      yamt 		/* Nothing to do - just unlock and return. */
1.149.2.4      yamt 		mutex_spin_exit(spc->spc_mutex);
1.149.2.4      yamt 		lwp_unlock(l);
    1.122   thorpej 		retval = 0;
    1.122   thorpej 	}
    1.110    briggs
1.149.2.4      yamt 	KASSERT(l == curlwp);
1.149.2.4      yamt 	KASSERT(l->l_stat == LSONPROC);
1.149.2.5      yamt 	KASSERT(l->l_cpu == curcpu());
1.149.2.4      yamt
    1.110    briggs 	/*
1.149.2.3      yamt 	 * XXXSMP If we are using h/w performance counters, restore context.
     1.26       cgd 	 */
    1.114  gmcgarry #if PERFCTRS
1.149.2.3      yamt 	if (PMC_ENABLED(l->l_proc)) {
1.149.2.3      yamt 		pmc_restore_context(l->l_proc);
1.149.2.2      yamt 	}
    1.114  gmcgarry #endif
    1.110    briggs
    1.110    briggs 	/*
     1.76   thorpej 	 * We're running again; record our new start time.  We might
1.149.2.3      yamt 	 * be running on a new CPU now, so don't use the cached
     1.76   thorpej 	 * schedstate_percpu pointer.
     1.76   thorpej 	 */
1.149.2.3      yamt 	SYSCALL_TIME_WAKEUP(l);
1.149.2.5      yamt 	KASSERT(curlwp == l);
    1.122   thorpej 	KDASSERT(l->l_cpu == curcpu());
1.149.2.4      yamt 	LOCKDEBUG_BARRIER(NULL, 1);
1.149.2.2      yamt
    1.122   thorpej 	return retval;
     1.26       cgd }
     1.26       cgd
     1.26       cgd /*
1.149.2.3      yamt  * Change process state to be runnable, placing it on the run queue if it is
1.149.2.3      yamt  * in memory, and awakening the swapper if it isn't in memory.
1.149.2.3      yamt  *
1.149.2.3      yamt  * Call with the process and LWP locked.  Will return with the LWP unlocked.
     1.26       cgd  */
     1.26       cgd void
    1.122   thorpej setrunnable(struct lwp *l)
     1.26       cgd {
    1.122   thorpej 	struct proc *p = l->l_proc;
1.149.2.3      yamt 	sigset_t *ss;
     1.26       cgd
1.149.2.4      yamt 	KASSERT((l->l_flag & LW_IDLE) == 0);
1.149.2.3      yamt 	KASSERT(mutex_owned(&p->p_smutex));
1.149.2.3      yamt 	KASSERT(lwp_locked(l, NULL));
     1.83   thorpej
    1.122   thorpej 	switch (l->l_stat) {
    1.122   thorpej 	case LSSTOP:
     1.33   mycroft 		/*
     1.33   mycroft 		 * If we're being traced (possibly because someone attached us
     1.33   mycroft 		 * while we were stopped), check for a signal from the debugger.
     1.33   mycroft 		 */
1.149.2.3      yamt 		if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xstat != 0) {
1.149.2.3      yamt 			if ((sigprop[p->p_xstat] & SA_TOLWP) != 0)
1.149.2.3      yamt 				ss = &l->l_sigpend.sp_set;
1.149.2.3      yamt 			else
1.149.2.3      yamt 				ss = &p->p_sigpend.sp_set;
1.149.2.3      yamt 			sigaddset(ss, p->p_xstat);
1.149.2.3      yamt 			signotify(l);
     1.53   mycroft 		}
1.149.2.3      yamt 		p->p_nrlwps++;
    1.122   thorpej 		break;
    1.122   thorpej 	case LSSUSPENDED:
1.149.2.3      yamt 		l->l_flag &= ~LW_WSUSPEND;
1.149.2.3      yamt 		p->p_nrlwps++;
1.149.2.4      yamt 		cv_broadcast(&p->p_lwpcv);
1.149.2.3      yamt 		break;
1.149.2.3      yamt 	case LSSLEEP:
1.149.2.3      yamt 		KASSERT(l->l_wchan != NULL);
     1.26       cgd 		break;
1.149.2.3      yamt 	default:
1.149.2.3      yamt 		panic("setrunnable: lwp %p state was %d", l, l->l_stat);
     1.26       cgd 	}
    1.139        cl
1.149.2.3      yamt 	/*
1.149.2.3      yamt 	 * If the LWP was sleeping interruptably, then it's OK to start it
1.149.2.3      yamt 	 * again.  If not, mark it as still sleeping.
1.149.2.3      yamt 	 */
1.149.2.3      yamt 	if (l->l_wchan != NULL) {
1.149.2.3      yamt 		l->l_stat = LSSLEEP;
1.149.2.3      yamt 		/* lwp_unsleep() will release the lock. */
1.149.2.3      yamt 		lwp_unsleep(l);
1.149.2.3      yamt 		return;
1.149.2.3      yamt 	}
    1.139        cl
1.149.2.3      yamt 	/*
1.149.2.3      yamt 	 * If the LWP is still on the CPU, mark it as LSONPROC.  It may be
1.149.2.3      yamt 	 * about to call mi_switch(), in which case it will yield.
1.149.2.3      yamt 	 */
1.149.2.4      yamt 	if ((l->l_flag & LW_RUNNING) != 0) {
1.149.2.3      yamt 		l->l_stat = LSONPROC;
1.149.2.3      yamt 		l->l_slptime = 0;
1.149.2.3      yamt 		lwp_unlock(l);
1.149.2.3      yamt 		return;
1.149.2.3      yamt 	}
    1.122   thorpej
1.149.2.3      yamt 	/*
1.149.2.3      yamt 	 * Set the LWP runnable.  If it's swapped out, we need to wake the swapper
1.149.2.3      yamt 	 * to bring it back in.  Otherwise, enter it into a run queue.
1.149.2.3      yamt 	 */
1.149.2.4      yamt 	if (l->l_mutex != l->l_cpu->ci_schedstate.spc_mutex) {
1.149.2.4      yamt 		spc_lock(l->l_cpu);
1.149.2.4      yamt 		lwp_unlock_to(l, l->l_cpu->ci_schedstate.spc_mutex);
1.149.2.4      yamt 	}
1.149.2.4      yamt
1.149.2.4      yamt 	sched_setrunnable(l);
1.149.2.3      yamt 	l->l_stat = LSRUN;
    1.122   thorpej 	l->l_slptime = 0;
1.149.2.3      yamt
1.149.2.3      yamt 	if (l->l_flag & LW_INMEM) {
1.149.2.4      yamt 		sched_enqueue(l, false);
1.149.2.4      yamt 		resched_cpu(l);
1.149.2.3      yamt 		lwp_unlock(l);
1.149.2.3      yamt 	} else {
1.149.2.3      yamt 		lwp_unlock(l);
1.149.2.3      yamt 		uvm_kick_scheduler();
1.149.2.3      yamt 	}
     1.26       cgd }
     1.26       cgd
     1.26       cgd /*
1.149.2.3      yamt  * suspendsched:
1.149.2.3      yamt  *
1.149.2.3      yamt  *	Convert all non-L_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED.
1.149.2.3      yamt  */
     1.94    bouyer void
1.149.2.3      yamt suspendsched(void)
     1.94    bouyer {
1.149.2.3      yamt 	CPU_INFO_ITERATOR cii;
1.149.2.3      yamt 	struct cpu_info *ci;
    1.122   thorpej 	struct lwp *l;
1.149.2.3      yamt 	struct proc *p;
     1.94    bouyer
     1.94    bouyer 	/*
1.149.2.3      yamt 	 * We do this by process in order not to violate the locking rules.
     1.94    bouyer 	 */
1.149.2.3      yamt 	mutex_enter(&proclist_mutex);
1.149.2.3      yamt 	PROCLIST_FOREACH(p, &allproc) {
1.149.2.3      yamt 		mutex_enter(&p->p_smutex);
1.149.2.3      yamt
1.149.2.3      yamt 		if ((p->p_flag & PK_SYSTEM) != 0) {
1.149.2.3      yamt 			mutex_exit(&p->p_smutex);
     1.94    bouyer 			continue;
1.149.2.3      yamt 		}
1.149.2.3      yamt
1.149.2.3      yamt 		p->p_stat = SSTOP;
1.149.2.3      yamt
1.149.2.3      yamt 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1.149.2.3      yamt 			if (l == curlwp)
1.149.2.3      yamt 				continue;
1.149.2.3      yamt
1.149.2.3      yamt 			lwp_lock(l);
    1.122   thorpej
     1.97     enami 			/*
1.149.2.3      yamt 			 * Set L_WREBOOT so that the LWP will suspend itself
1.149.2.3      yamt 			 * when it tries to return to user mode.  We want to
1.149.2.3      yamt 			 * try and get to get as many LWPs as possible to
1.149.2.3      yamt 			 * the user / kernel boundary, so that they will
1.149.2.3      yamt 			 * release any locks that they hold.
     1.97     enami 			 */
1.149.2.3      yamt 			l->l_flag |= (LW_WREBOOT | LW_WSUSPEND);
1.149.2.3      yamt
1.149.2.3      yamt 			if (l->l_stat == LSSLEEP &&
1.149.2.3      yamt 			    (l->l_flag & LW_SINTR) != 0) {
1.149.2.3      yamt 				/* setrunnable() will release the lock. */
1.149.2.3      yamt 				setrunnable(l);
1.149.2.3      yamt 				continue;
1.149.2.3      yamt 			}
1.149.2.3      yamt
1.149.2.3      yamt 			lwp_unlock(l);
     1.94    bouyer 		}
1.149.2.3      yamt
1.149.2.3      yamt 		mutex_exit(&p->p_smutex);
     1.94    bouyer 	}
1.149.2.3      yamt 	mutex_exit(&proclist_mutex);
1.149.2.3      yamt
1.149.2.3      yamt 	/*
1.149.2.3      yamt 	 * Kick all CPUs to make them preempt any LWPs running in user mode.
1.149.2.3      yamt 	 * They'll trap into the kernel and suspend themselves in userret().
1.149.2.3      yamt 	 */
1.149.2.3      yamt 	for (CPU_INFO_FOREACH(cii, ci))
1.149.2.4      yamt 		cpu_need_resched(ci, 0);
1.149.2.3      yamt }
1.149.2.3      yamt
1.149.2.3      yamt /*
1.149.2.3      yamt  * sched_kpri:
1.149.2.3      yamt  *
1.149.2.3      yamt  *	Scale a priority level to a kernel priority level, usually
1.149.2.3      yamt  *	for an LWP that is about to sleep.
1.149.2.3      yamt  */
1.149.2.4      yamt pri_t
1.149.2.3      yamt sched_kpri(struct lwp *l)
1.149.2.3      yamt {
1.149.2.3      yamt 	/*
1.149.2.3      yamt 	 * Scale user priorities (127 -> 50) up to kernel priorities
1.149.2.3      yamt 	 * in the range (49 -> 8).  Reserve the top 8 kernel priorities
1.149.2.3      yamt 	 * for high priority kthreads.  Kernel priorities passed in
1.149.2.3      yamt 	 * are left "as is".  XXX This is somewhat arbitrary.
1.149.2.3      yamt 	 */
1.149.2.3      yamt 	static const uint8_t kpri_tab[] = {
1.149.2.3      yamt 		 0,   1,   2,   3,   4,   5,   6,   7,
1.149.2.3      yamt 		 8,   9,  10,  11,  12,  13,  14,  15,
1.149.2.3      yamt 		16,  17,  18,  19,  20,  21,  22,  23,
1.149.2.3      yamt 		24,  25,  26,  27,  28,  29,  30,  31,
1.149.2.3      yamt 		32,  33,  34,  35,  36,  37,  38,  39,
1.149.2.3      yamt 		40,  41,  42,  43,  44,  45,  46,  47,
1.149.2.3      yamt 		48,  49,   8,   8,   9,   9,  10,  10,
1.149.2.3      yamt 		11,  11,  12,  12,  13,  14,  14,  15,
1.149.2.3      yamt 		15,  16,  16,  17,  17,  18,  18,  19,
1.149.2.3      yamt 		20,  20,  21,  21,  22,  22,  23,  23,
1.149.2.3      yamt 		24,  24,  25,  26,  26,  27,  27,  28,
1.149.2.3      yamt 		28,  29,  29,  30,  30,  31,  32,  32,
1.149.2.3      yamt 		33,  33,  34,  34,  35,  35,  36,  36,
1.149.2.3      yamt 		37,  38,  38,  39,  39,  40,  40,  41,
1.149.2.3      yamt 		41,  42,  42,  43,  44,  44,  45,  45,
1.149.2.3      yamt 		46,  46,  47,  47,  48,  48,  49,  49,
1.149.2.3      yamt 	};
1.149.2.3      yamt
1.149.2.4      yamt 	return (pri_t)kpri_tab[l->l_usrpri];
1.149.2.3      yamt }
1.149.2.3      yamt
1.149.2.3      yamt /*
1.149.2.3      yamt  * sched_unsleep:
1.149.2.3      yamt  *
1.149.2.3      yamt  *	The is called when the LWP has not been awoken normally but instead
1.149.2.3      yamt  *	interrupted: for example, if the sleep timed out.  Because of this,
1.149.2.3      yamt  *	it's not a valid action for running or idle LWPs.
1.149.2.3      yamt  */
1.149.2.4      yamt static void
1.149.2.3      yamt sched_unsleep(struct lwp *l)
1.149.2.3      yamt {
1.149.2.3      yamt
1.149.2.3      yamt 	lwp_unlock(l);
1.149.2.3      yamt 	panic("sched_unsleep");
1.149.2.3      yamt }
1.149.2.3      yamt
1.149.2.4      yamt inline void
1.149.2.4      yamt resched_cpu(struct lwp *l)
1.149.2.3      yamt {
1.149.2.4      yamt 	struct cpu_info *ci;
1.149.2.4      yamt 	const pri_t pri = lwp_eprio(l);
1.149.2.3      yamt
1.149.2.4      yamt 	/*
1.149.2.4      yamt 	 * XXXSMP
1.149.2.4      yamt 	 * Since l->l_cpu persists across a context switch,
1.149.2.4      yamt 	 * this gives us *very weak* processor affinity, in
1.149.2.4      yamt 	 * that we notify the CPU on which the process last
1.149.2.4      yamt 	 * ran that it should try to switch.
1.149.2.4      yamt 	 *
1.149.2.4      yamt 	 * This does not guarantee that the process will run on
1.149.2.4      yamt 	 * that processor next, because another processor might
1.149.2.4      yamt 	 * grab it the next time it performs a context switch.
1.149.2.4      yamt 	 *
1.149.2.4      yamt 	 * This also does not handle the case where its last
1.149.2.4      yamt 	 * CPU is running a higher-priority process, but every
1.149.2.4      yamt 	 * other CPU is running a lower-priority process.  There
1.149.2.4      yamt 	 * are ways to handle this situation, but they're not
1.149.2.4      yamt 	 * currently very pretty, and we also need to weigh the
1.149.2.4      yamt 	 * cost of moving a process from one CPU to another.
1.149.2.4      yamt 	 */
1.149.2.4      yamt 	ci = (l->l_cpu != NULL) ? l->l_cpu : curcpu();
1.149.2.4      yamt 	if (pri < ci->ci_schedstate.spc_curpriority)
1.149.2.4      yamt 		cpu_need_resched(ci, 0);
1.149.2.4      yamt }
1.149.2.3      yamt
1.149.2.4      yamt static void
1.149.2.4      yamt sched_changepri(struct lwp *l, pri_t pri)
1.149.2.4      yamt {
1.149.2.4      yamt
1.149.2.4      yamt 	KASSERT(lwp_locked(l, NULL));
1.149.2.3      yamt
1.149.2.4      yamt 	l->l_usrpri = pri;
1.149.2.3      yamt 	if (l->l_priority < PUSER)
1.149.2.3      yamt 		return;
1.149.2.4      yamt
1.149.2.4      yamt 	if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
1.149.2.3      yamt 		l->l_priority = pri;
1.149.2.3      yamt 		return;
1.149.2.1      yamt 	}
1.149.2.3      yamt
1.149.2.4      yamt 	KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
1.149.2.4      yamt
1.149.2.4      yamt 	sched_dequeue(l);
1.149.2.3      yamt 	l->l_priority = pri;
1.149.2.4      yamt 	sched_enqueue(l, false);
1.149.2.4      yamt 	resched_cpu(l);
1.149.2.1      yamt }
1.149.2.1      yamt
    1.146      matt static void
1.149.2.4      yamt sched_lendpri(struct lwp *l, pri_t pri)
    1.146      matt {
1.149.2.4      yamt
1.149.2.4      yamt 	KASSERT(lwp_locked(l, NULL));
1.149.2.4      yamt
1.149.2.4      yamt 	if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
1.149.2.4      yamt 		l->l_inheritedprio = pri;
1.149.2.4      yamt 		return;
    1.146      matt 	}
1.149.2.4      yamt
1.149.2.4      yamt 	KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
1.149.2.4      yamt
1.149.2.4      yamt 	sched_dequeue(l);
1.149.2.4      yamt 	l->l_inheritedprio = pri;
1.149.2.4      yamt 	sched_enqueue(l, false);
1.149.2.4      yamt 	resched_cpu(l);
    1.146      matt }
    1.146      matt
1.149.2.4      yamt struct lwp *
1.149.2.4      yamt syncobj_noowner(wchan_t wchan)
    1.113  gmcgarry {
1.149.2.3      yamt
1.149.2.4      yamt 	return NULL;
    1.113  gmcgarry }
    1.113  gmcgarry
1.149.2.4      yamt
1.149.2.4      yamt /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
1.149.2.4      yamt fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;		/* exp(-1/20) */
1.149.2.4      yamt
1.149.2.3      yamt /*
1.149.2.4      yamt  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
1.149.2.4      yamt  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
1.149.2.4      yamt  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
1.149.2.4      yamt  *
1.149.2.4      yamt  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
1.149.2.4      yamt  *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
1.149.2.4      yamt  *
1.149.2.4      yamt  * If you dont want to bother with the faster/more-accurate formula, you
1.149.2.4      yamt  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
1.149.2.4      yamt  * (more general) method of calculating the %age of CPU used by a process.
1.149.2.3      yamt  */
1.149.2.4      yamt #define	CCPU_SHIFT	(FSHIFT + 1)
1.149.2.4      yamt
1.149.2.4      yamt /*
1.149.2.4      yamt  * sched_pstats:
1.149.2.4      yamt  *
1.149.2.4      yamt  * Update process statistics and check CPU resource allocation.
1.149.2.4      yamt  * Call scheduler-specific hook to eventually adjust process/LWP
1.149.2.4      yamt  * priorities.
1.149.2.4      yamt  */
1.149.2.4      yamt /* ARGSUSED */
    1.113  gmcgarry void
1.149.2.4      yamt sched_pstats(void *arg)
    1.113  gmcgarry {
1.149.2.4      yamt 	struct rlimit *rlim;
1.149.2.4      yamt 	struct lwp *l;
1.149.2.4      yamt 	struct proc *p;
1.149.2.4      yamt 	int minslp, sig, clkhz;
1.149.2.4      yamt 	long runtm;
1.149.2.3      yamt
1.149.2.4      yamt 	sched_pstats_ticks++;
1.149.2.3      yamt
1.149.2.4      yamt 	mutex_enter(&proclist_mutex);
1.149.2.4      yamt 	PROCLIST_FOREACH(p, &allproc) {
1.149.2.4      yamt 		/*
1.149.2.4      yamt 		 * Increment time in/out of memory and sleep time (if
1.149.2.4      yamt 		 * sleeping).  We ignore overflow; with 16-bit int's
1.149.2.4      yamt 		 * (remember them?) overflow takes 45 days.
1.149.2.4      yamt 		 */
1.149.2.4      yamt 		minslp = 2;
1.149.2.4      yamt 		mutex_enter(&p->p_smutex);
1.149.2.4      yamt 		mutex_spin_enter(&p->p_stmutex);
1.149.2.4      yamt 		runtm = p->p_rtime.tv_sec;
1.149.2.4      yamt 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1.149.2.4      yamt 			if ((l->l_flag & LW_IDLE) != 0)
1.149.2.4      yamt 				continue;
1.149.2.4      yamt 			lwp_lock(l);
1.149.2.4      yamt 			runtm += l->l_rtime.tv_sec;
1.149.2.4      yamt 			l->l_swtime++;
1.149.2.4      yamt 			if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
1.149.2.4      yamt 			    l->l_stat == LSSUSPENDED) {
1.149.2.4      yamt 				l->l_slptime++;
1.149.2.4      yamt 				minslp = min(minslp, l->l_slptime);
1.149.2.4      yamt 			} else
1.149.2.4      yamt 				minslp = 0;
1.149.2.5      yamt 			sched_pstats_hook(l);
1.149.2.4      yamt 			lwp_unlock(l);
1.149.2.4      yamt
1.149.2.4      yamt 			/*
1.149.2.4      yamt 			 * p_pctcpu is only for ps.
1.149.2.4      yamt 			 */
1.149.2.4      yamt 			l->l_pctcpu = (l->l_pctcpu * ccpu) >> FSHIFT;
1.149.2.4      yamt 			if (l->l_slptime < 1) {
1.149.2.4      yamt 				clkhz = stathz != 0 ? stathz : hz;
1.149.2.4      yamt #if	(FSHIFT >= CCPU_SHIFT)
1.149.2.4      yamt 				l->l_pctcpu += (clkhz == 100) ?
1.149.2.4      yamt 				    ((fixpt_t)l->l_cpticks) <<
1.149.2.4      yamt 				        (FSHIFT - CCPU_SHIFT) :
1.149.2.4      yamt 				    100 * (((fixpt_t) p->p_cpticks)
1.149.2.4      yamt 				        << (FSHIFT - CCPU_SHIFT)) / clkhz;
1.149.2.4      yamt #else
1.149.2.4      yamt 				l->l_pctcpu += ((FSCALE - ccpu) *
1.149.2.4      yamt 				    (l->l_cpticks * FSCALE / clkhz)) >> FSHIFT;
    1.146      matt #endif
1.149.2.4      yamt 				l->l_cpticks = 0;
1.149.2.4      yamt 			}
1.149.2.4      yamt 		}
1.149.2.5      yamt
1.149.2.4      yamt 		p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
1.149.2.5      yamt #ifdef SCHED_4BSD
1.149.2.5      yamt 		/*
1.149.2.5      yamt 		 * XXX: Workaround - belongs to sched_4bsd.c
1.149.2.5      yamt 		 * If the process has slept the entire second,
1.149.2.5      yamt 		 * stop recalculating its priority until it wakes up.
1.149.2.5      yamt 		 */
1.149.2.5      yamt 		if (minslp <= 1) {
1.149.2.5      yamt 			extern fixpt_t decay_cpu(fixpt_t, fixpt_t);
1.149.2.5      yamt
1.149.2.5      yamt 			fixpt_t loadfac = 2 * (averunnable.ldavg[0]);
1.149.2.5      yamt 			p->p_estcpu = decay_cpu(loadfac, p->p_estcpu);
1.149.2.5      yamt 		}
1.149.2.5      yamt #endif
1.149.2.4      yamt 		mutex_spin_exit(&p->p_stmutex);
1.149.2.3      yamt
1.149.2.4      yamt 		/*
1.149.2.4      yamt 		 * Check if the process exceeds its CPU resource allocation.
1.149.2.4      yamt 		 * If over max, kill it.
1.149.2.4      yamt 		 */
1.149.2.4      yamt 		rlim = &p->p_rlimit[RLIMIT_CPU];
1.149.2.4      yamt 		sig = 0;
1.149.2.4      yamt 		if (runtm >= rlim->rlim_cur) {
1.149.2.4      yamt 			if (runtm >= rlim->rlim_max)
1.149.2.4      yamt 				sig = SIGKILL;
1.149.2.4      yamt 			else {
1.149.2.4      yamt 				sig = SIGXCPU;
1.149.2.4      yamt 				if (rlim->rlim_cur < rlim->rlim_max)
1.149.2.4      yamt 					rlim->rlim_cur += 5;
1.149.2.4      yamt 			}
1.149.2.4      yamt 		}
1.149.2.4      yamt 		mutex_exit(&p->p_smutex);
1.149.2.4      yamt 		if (sig) {
1.149.2.4      yamt 			psignal(p, sig);
1.149.2.4      yamt 		}
1.149.2.3      yamt 	}
1.149.2.4      yamt 	mutex_exit(&proclist_mutex);
1.149.2.4      yamt 	uvm_meter();
1.149.2.4      yamt 	cv_wakeup(&lbolt);
1.149.2.4      yamt 	callout_schedule(&sched_pstats_ch, hz);
    1.113  gmcgarry }
    1.113  gmcgarry
1.149.2.4      yamt void
1.149.2.4      yamt sched_init(void)
1.149.2.4      yamt {
1.149.2.4      yamt
1.149.2.4      yamt 	cv_init(&lbolt, "lbolt");
1.149.2.4      yamt 	callout_init(&sched_pstats_ch, 0);
1.149.2.4      yamt 	callout_setfunc(&sched_pstats_ch, sched_pstats, NULL);
1.149.2.4      yamt 	sched_setup();
1.149.2.4      yamt 	sched_pstats(NULL);
1.149.2.4      yamt }