sys/kern/kern_synch.c

1.194.6.2  yamt /*	$NetBSD: kern_synch.c,v 1.194.6.2 2007/08/06 11:48:24 yamt Exp $	*/
1.194.6.2  yamt
1.194.6.2  yamt /*-
1.194.6.2  yamt  * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc.
1.194.6.2  yamt  * All rights reserved.
1.194.6.2  yamt  *
1.194.6.2  yamt  * This code is derived from software contributed to The NetBSD Foundation
1.194.6.2  yamt  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
1.194.6.2  yamt  * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and
1.194.6.2  yamt  * Daniel Sieger.
1.194.6.2  yamt  *
1.194.6.2  yamt  * Redistribution and use in source and binary forms, with or without
1.194.6.2  yamt  * modification, are permitted provided that the following conditions
1.194.6.2  yamt  * are met:
1.194.6.2  yamt  * 1. Redistributions of source code must retain the above copyright
1.194.6.2  yamt  *    notice, this list of conditions and the following disclaimer.
1.194.6.2  yamt  * 2. Redistributions in binary form must reproduce the above copyright
1.194.6.2  yamt  *    notice, this list of conditions and the following disclaimer in the
1.194.6.2  yamt  *    documentation and/or other materials provided with the distribution.
1.194.6.2  yamt  * 3. All advertising materials mentioning features or use of this software
1.194.6.2  yamt  *    must display the following acknowledgement:
1.194.6.2  yamt  *	This product includes software developed by the NetBSD
1.194.6.2  yamt  *	Foundation, Inc. and its contributors.
1.194.6.2  yamt  * 4. Neither the name of The NetBSD Foundation nor the names of its
1.194.6.2  yamt  *    contributors may be used to endorse or promote products derived
1.194.6.2  yamt  *    from this software without specific prior written permission.
1.194.6.2  yamt  *
1.194.6.2  yamt  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
1.194.6.2  yamt  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
1.194.6.2  yamt  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
1.194.6.2  yamt  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
1.194.6.2  yamt  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
1.194.6.2  yamt  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
1.194.6.2  yamt  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
1.194.6.2  yamt  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
1.194.6.2  yamt  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
1.194.6.2  yamt  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
1.194.6.2  yamt  * POSSIBILITY OF SUCH DAMAGE.
1.194.6.2  yamt  */
1.194.6.2  yamt
1.194.6.2  yamt /*-
1.194.6.2  yamt  * Copyright (c) 1982, 1986, 1990, 1991, 1993
1.194.6.2  yamt  *	The Regents of the University of California.  All rights reserved.
1.194.6.2  yamt  * (c) UNIX System Laboratories, Inc.
1.194.6.2  yamt  * All or some portions of this file are derived from material licensed
1.194.6.2  yamt  * to the University of California by American Telephone and Telegraph
1.194.6.2  yamt  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
1.194.6.2  yamt  * the permission of UNIX System Laboratories, Inc.
1.194.6.2  yamt  *
1.194.6.2  yamt  * Redistribution and use in source and binary forms, with or without
1.194.6.2  yamt  * modification, are permitted provided that the following conditions
1.194.6.2  yamt  * are met:
1.194.6.2  yamt  * 1. Redistributions of source code must retain the above copyright
1.194.6.2  yamt  *    notice, this list of conditions and the following disclaimer.
1.194.6.2  yamt  * 2. Redistributions in binary form must reproduce the above copyright
1.194.6.2  yamt  *    notice, this list of conditions and the following disclaimer in the
1.194.6.2  yamt  *    documentation and/or other materials provided with the distribution.
1.194.6.2  yamt  * 3. Neither the name of the University nor the names of its contributors
1.194.6.2  yamt  *    may be used to endorse or promote products derived from this software
1.194.6.2  yamt  *    without specific prior written permission.
1.194.6.2  yamt  *
1.194.6.2  yamt  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
1.194.6.2  yamt  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1.194.6.2  yamt  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1.194.6.2  yamt  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
1.194.6.2  yamt  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1.194.6.2  yamt  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
1.194.6.2  yamt  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
1.194.6.2  yamt  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
1.194.6.2  yamt  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
1.194.6.2  yamt  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
1.194.6.2  yamt  * SUCH DAMAGE.
1.194.6.2  yamt  *
1.194.6.2  yamt  *	@(#)kern_synch.c	8.9 (Berkeley) 5/19/95
1.194.6.2  yamt  */
1.194.6.2  yamt
1.194.6.2  yamt #include <sys/cdefs.h>
1.194.6.2  yamt __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.194.6.2 2007/08/06 11:48:24 yamt Exp $");
1.194.6.2  yamt
1.194.6.2  yamt #include "opt_kstack.h"
1.194.6.2  yamt #include "opt_lockdebug.h"
1.194.6.2  yamt #include "opt_multiprocessor.h"
1.194.6.2  yamt #include "opt_perfctrs.h"
1.194.6.2  yamt
1.194.6.2  yamt #define	__MUTEX_PRIVATE
1.194.6.2  yamt
1.194.6.2  yamt #include <sys/param.h>
1.194.6.2  yamt #include <sys/systm.h>
1.194.6.2  yamt #include <sys/proc.h>
1.194.6.2  yamt #include <sys/kernel.h>
1.194.6.2  yamt #if defined(PERFCTRS)
1.194.6.2  yamt #include <sys/pmc.h>
1.194.6.2  yamt #endif
1.194.6.2  yamt #include <sys/cpu.h>
1.194.6.2  yamt #include <sys/resourcevar.h>
1.194.6.2  yamt #include <sys/sched.h>
1.194.6.2  yamt #include <sys/syscall_stats.h>
1.194.6.2  yamt #include <sys/sleepq.h>
1.194.6.2  yamt #include <sys/lockdebug.h>
1.194.6.2  yamt #include <sys/evcnt.h>
1.194.6.2  yamt
1.194.6.2  yamt #include <uvm/uvm_extern.h>
1.194.6.2  yamt
1.194.6.2  yamt callout_t sched_pstats_ch;
1.194.6.2  yamt unsigned int sched_pstats_ticks;
1.194.6.2  yamt
1.194.6.2  yamt kcondvar_t	lbolt;			/* once a second sleep address */
1.194.6.2  yamt
1.194.6.2  yamt static void	sched_unsleep(struct lwp *);
1.194.6.2  yamt static void	sched_changepri(struct lwp *, pri_t);
1.194.6.2  yamt static void	sched_lendpri(struct lwp *, pri_t);
1.194.6.2  yamt
1.194.6.2  yamt syncobj_t sleep_syncobj = {
1.194.6.2  yamt 	SOBJ_SLEEPQ_SORTED,
1.194.6.2  yamt 	sleepq_unsleep,
1.194.6.2  yamt 	sleepq_changepri,
1.194.6.2  yamt 	sleepq_lendpri,
1.194.6.2  yamt 	syncobj_noowner,
1.194.6.2  yamt };
1.194.6.2  yamt
1.194.6.2  yamt syncobj_t sched_syncobj = {
1.194.6.2  yamt 	SOBJ_SLEEPQ_SORTED,
1.194.6.2  yamt 	sched_unsleep,
1.194.6.2  yamt 	sched_changepri,
1.194.6.2  yamt 	sched_lendpri,
1.194.6.2  yamt 	syncobj_noowner,
1.194.6.2  yamt };
1.194.6.2  yamt
1.194.6.2  yamt /*
1.194.6.2  yamt  * During autoconfiguration or after a panic, a sleep will simply lower the
1.194.6.2  yamt  * priority briefly to allow interrupts, then return.  The priority to be
1.194.6.2  yamt  * used (safepri) is machine-dependent, thus this value is initialized and
1.194.6.2  yamt  * maintained in the machine-dependent layers.  This priority will typically
1.194.6.2  yamt  * be 0, or the lowest priority that is safe for use on the interrupt stack;
1.194.6.2  yamt  * it can be made higher to block network software interrupts after panics.
1.194.6.2  yamt  */
1.194.6.2  yamt int	safepri;
1.194.6.2  yamt
1.194.6.2  yamt /*
1.194.6.2  yamt  * OBSOLETE INTERFACE
1.194.6.2  yamt  *
1.194.6.2  yamt  * General sleep call.  Suspends the current process until a wakeup is
1.194.6.2  yamt  * performed on the specified identifier.  The process will then be made
1.194.6.2  yamt  * runnable with the specified priority.  Sleeps at most timo/hz seconds (0
1.194.6.2  yamt  * means no timeout).  If pri includes PCATCH flag, signals are checked
1.194.6.2  yamt  * before and after sleeping, else signals are not checked.  Returns 0 if
1.194.6.2  yamt  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
1.194.6.2  yamt  * signal needs to be delivered, ERESTART is returned if the current system
1.194.6.2  yamt  * call should be restarted if possible, and EINTR is returned if the system
1.194.6.2  yamt  * call should be interrupted by the signal (return EINTR).
1.194.6.2  yamt  *
1.194.6.2  yamt  * The interlock is held until we are on a sleep queue. The interlock will
1.194.6.2  yamt  * be locked before returning back to the caller unless the PNORELOCK flag
1.194.6.2  yamt  * is specified, in which case the interlock will always be unlocked upon
1.194.6.2  yamt  * return.
1.194.6.2  yamt  */
1.194.6.2  yamt int
1.194.6.2  yamt ltsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
1.194.6.2  yamt 	volatile struct simplelock *interlock)
1.194.6.2  yamt {
1.194.6.2  yamt 	struct lwp *l = curlwp;
1.194.6.2  yamt 	sleepq_t *sq;
1.194.6.2  yamt 	int error;
1.194.6.2  yamt
1.194.6.2  yamt 	if (sleepq_dontsleep(l)) {
1.194.6.2  yamt 		(void)sleepq_abort(NULL, 0);
1.194.6.2  yamt 		if ((priority & PNORELOCK) != 0)
1.194.6.2  yamt 			simple_unlock(interlock);
1.194.6.2  yamt 		return 0;
1.194.6.2  yamt 	}
1.194.6.2  yamt
1.194.6.2  yamt 	sq = sleeptab_lookup(&sleeptab, ident);
1.194.6.2  yamt 	sleepq_enter(sq, l);
1.194.6.2  yamt 	sleepq_enqueue(sq, priority & PRIMASK, ident, wmesg, &sleep_syncobj);
1.194.6.2  yamt
1.194.6.2  yamt 	if (interlock != NULL) {
1.194.6.2  yamt 		LOCK_ASSERT(simple_lock_held(interlock));
1.194.6.2  yamt 		simple_unlock(interlock);
1.194.6.2  yamt 	}
1.194.6.2  yamt
1.194.6.2  yamt 	error = sleepq_block(timo, priority & PCATCH);
1.194.6.2  yamt
1.194.6.2  yamt 	if (interlock != NULL && (priority & PNORELOCK) == 0)
1.194.6.2  yamt 		simple_lock(interlock);
1.194.6.2  yamt
1.194.6.2  yamt 	return error;
1.194.6.2  yamt }
1.194.6.2  yamt
1.194.6.2  yamt int
1.194.6.2  yamt mtsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
1.194.6.2  yamt 	kmutex_t *mtx)
1.194.6.2  yamt {
1.194.6.2  yamt 	struct lwp *l = curlwp;
1.194.6.2  yamt 	sleepq_t *sq;
1.194.6.2  yamt 	int error;
1.194.6.2  yamt
1.194.6.2  yamt 	if (sleepq_dontsleep(l)) {
1.194.6.2  yamt 		(void)sleepq_abort(mtx, (priority & PNORELOCK) != 0);
1.194.6.2  yamt 		return 0;
1.194.6.2  yamt 	}
1.194.6.2  yamt
1.194.6.2  yamt 	sq = sleeptab_lookup(&sleeptab, ident);
1.194.6.2  yamt 	sleepq_enter(sq, l);
1.194.6.2  yamt 	sleepq_enqueue(sq, priority & PRIMASK, ident, wmesg, &sleep_syncobj);
1.194.6.2  yamt 	mutex_exit(mtx);
1.194.6.2  yamt 	error = sleepq_block(timo, priority & PCATCH);
1.194.6.2  yamt
1.194.6.2  yamt 	if ((priority & PNORELOCK) == 0)
1.194.6.2  yamt 		mutex_enter(mtx);
1.194.6.2  yamt
1.194.6.2  yamt 	return error;
1.194.6.2  yamt }
1.194.6.2  yamt
1.194.6.2  yamt /*
1.194.6.2  yamt  * General sleep call for situations where a wake-up is not expected.
1.194.6.2  yamt  */
1.194.6.2  yamt int
1.194.6.2  yamt kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx)
1.194.6.2  yamt {
1.194.6.2  yamt 	struct lwp *l = curlwp;
1.194.6.2  yamt 	sleepq_t *sq;
1.194.6.2  yamt 	int error;
1.194.6.2  yamt
1.194.6.2  yamt 	if (sleepq_dontsleep(l))
1.194.6.2  yamt 		return sleepq_abort(NULL, 0);
1.194.6.2  yamt
1.194.6.2  yamt 	if (mtx != NULL)
1.194.6.2  yamt 		mutex_exit(mtx);
1.194.6.2  yamt 	sq = sleeptab_lookup(&sleeptab, l);
1.194.6.2  yamt 	sleepq_enter(sq, l);
1.194.6.2  yamt 	sleepq_enqueue(sq, sched_kpri(l), l, wmesg, &sleep_syncobj);
1.194.6.2  yamt 	error = sleepq_block(timo, intr);
1.194.6.2  yamt 	if (mtx != NULL)
1.194.6.2  yamt 		mutex_enter(mtx);
1.194.6.2  yamt
1.194.6.2  yamt 	return error;
1.194.6.2  yamt }
1.194.6.2  yamt
1.194.6.2  yamt /*
1.194.6.2  yamt  * OBSOLETE INTERFACE
1.194.6.2  yamt  *
1.194.6.2  yamt  * Make all processes sleeping on the specified identifier runnable.
1.194.6.2  yamt  */
1.194.6.2  yamt void
1.194.6.2  yamt wakeup(wchan_t ident)
1.194.6.2  yamt {
1.194.6.2  yamt 	sleepq_t *sq;
1.194.6.2  yamt
1.194.6.2  yamt 	if (cold)
1.194.6.2  yamt 		return;
1.194.6.2  yamt
1.194.6.2  yamt 	sq = sleeptab_lookup(&sleeptab, ident);
1.194.6.2  yamt 	sleepq_wake(sq, ident, (u_int)-1);
1.194.6.2  yamt }
1.194.6.2  yamt
1.194.6.2  yamt /*
1.194.6.2  yamt  * OBSOLETE INTERFACE
1.194.6.2  yamt  *
1.194.6.2  yamt  * Make the highest priority process first in line on the specified
1.194.6.2  yamt  * identifier runnable.
1.194.6.2  yamt  */
1.194.6.2  yamt void
1.194.6.2  yamt wakeup_one(wchan_t ident)
1.194.6.2  yamt {
1.194.6.2  yamt 	sleepq_t *sq;
1.194.6.2  yamt
1.194.6.2  yamt 	if (cold)
1.194.6.2  yamt 		return;
1.194.6.2  yamt
1.194.6.2  yamt 	sq = sleeptab_lookup(&sleeptab, ident);
1.194.6.2  yamt 	sleepq_wake(sq, ident, 1);
1.194.6.2  yamt }
1.194.6.2  yamt
1.194.6.2  yamt
1.194.6.2  yamt /*
1.194.6.2  yamt  * General yield call.  Puts the current process back on its run queue and
1.194.6.2  yamt  * performs a voluntary context switch.  Should only be called when the
1.194.6.2  yamt  * current process explicitly requests it (eg sched_yield(2) in compat code).
1.194.6.2  yamt  */
1.194.6.2  yamt void
1.194.6.2  yamt yield(void)
1.194.6.2  yamt {
1.194.6.2  yamt 	struct lwp *l = curlwp;
1.194.6.2  yamt
1.194.6.2  yamt 	KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
1.194.6.2  yamt 	lwp_lock(l);
1.194.6.2  yamt 	KASSERT(lwp_locked(l, &l->l_cpu->ci_schedstate.spc_lwplock));
1.194.6.2  yamt 	KASSERT(l->l_stat == LSONPROC);
1.194.6.2  yamt 	l->l_priority = l->l_usrpri;
1.194.6.2  yamt 	(void)mi_switch(l);
1.194.6.2  yamt 	KERNEL_LOCK(l->l_biglocks, l);
1.194.6.2  yamt }
1.194.6.2  yamt
1.194.6.2  yamt /*
1.194.6.2  yamt  * General preemption call.  Puts the current process back on its run queue
1.194.6.2  yamt  * and performs an involuntary context switch.
1.194.6.2  yamt  */
1.194.6.2  yamt void
1.194.6.2  yamt preempt(void)
1.194.6.2  yamt {
1.194.6.2  yamt 	struct lwp *l = curlwp;
1.194.6.2  yamt
1.194.6.2  yamt 	KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
1.194.6.2  yamt 	lwp_lock(l);
1.194.6.2  yamt 	KASSERT(lwp_locked(l, &l->l_cpu->ci_schedstate.spc_lwplock));
1.194.6.2  yamt 	KASSERT(l->l_stat == LSONPROC);
1.194.6.2  yamt 	l->l_priority = l->l_usrpri;
1.194.6.2  yamt 	l->l_nivcsw++;
1.194.6.2  yamt 	(void)mi_switch(l);
1.194.6.2  yamt 	KERNEL_LOCK(l->l_biglocks, l);
1.194.6.2  yamt }
1.194.6.2  yamt
1.194.6.2  yamt /*
1.194.6.2  yamt  * Compute the amount of time during which the current lwp was running.
1.194.6.2  yamt  *
1.194.6.2  yamt  * - update l_rtime unless it's an idle lwp.
1.194.6.2  yamt  * - update spc_runtime for the next lwp.
1.194.6.2  yamt  */
1.194.6.2  yamt
1.194.6.2  yamt static inline void
1.194.6.2  yamt updatertime(struct lwp *l, struct schedstate_percpu *spc)
1.194.6.2  yamt {
1.194.6.2  yamt 	struct timeval tv;
1.194.6.2  yamt 	long s, u;
1.194.6.2  yamt
1.194.6.2  yamt 	if ((l->l_flag & LW_IDLE) != 0) {
1.194.6.2  yamt 		microtime(&spc->spc_runtime);
1.194.6.2  yamt 		return;
1.194.6.2  yamt 	}
1.194.6.2  yamt
1.194.6.2  yamt 	microtime(&tv);
1.194.6.2  yamt 	u = l->l_rtime.tv_usec + (tv.tv_usec - spc->spc_runtime.tv_usec);
1.194.6.2  yamt 	s = l->l_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
1.194.6.2  yamt 	if (u < 0) {
1.194.6.2  yamt 		u += 1000000;
1.194.6.2  yamt 		s--;
1.194.6.2  yamt 	} else if (u >= 1000000) {
1.194.6.2  yamt 		u -= 1000000;
1.194.6.2  yamt 		s++;
1.194.6.2  yamt 	}
1.194.6.2  yamt 	l->l_rtime.tv_usec = u;
1.194.6.2  yamt 	l->l_rtime.tv_sec = s;
1.194.6.2  yamt
1.194.6.2  yamt 	spc->spc_runtime = tv;
1.194.6.2  yamt }
1.194.6.2  yamt
1.194.6.2  yamt /*
1.194.6.2  yamt  * The machine independent parts of context switch.
1.194.6.2  yamt  *
1.194.6.2  yamt  * Returns 1 if another LWP was actually run.
1.194.6.2  yamt  */
1.194.6.2  yamt int
1.194.6.2  yamt mi_switch(struct lwp *l)
1.194.6.2  yamt {
1.194.6.2  yamt 	struct schedstate_percpu *spc;
1.194.6.2  yamt 	struct lwp *newl;
1.194.6.2  yamt 	int retval, oldspl;
1.194.6.2  yamt
1.194.6.2  yamt 	KASSERT(lwp_locked(l, NULL));
1.194.6.2  yamt 	LOCKDEBUG_BARRIER(l->l_mutex, 1);
1.194.6.2  yamt
1.194.6.2  yamt #ifdef KSTACK_CHECK_MAGIC
1.194.6.2  yamt 	kstack_check_magic(l);
1.194.6.2  yamt #endif
1.194.6.2  yamt
1.194.6.2  yamt 	/*
1.194.6.2  yamt 	 * It's safe to read the per CPU schedstate unlocked here, as all we
1.194.6.2  yamt 	 * are after is the run time and that's guarenteed to have been last
1.194.6.2  yamt 	 * updated by this CPU.
1.194.6.2  yamt 	 */
1.194.6.2  yamt 	KDASSERT(l->l_cpu == curcpu());
1.194.6.2  yamt
1.194.6.2  yamt 	/*
1.194.6.2  yamt 	 * Process is about to yield the CPU; clear the appropriate
1.194.6.2  yamt 	 * scheduling flags.
1.194.6.2  yamt 	 */
1.194.6.2  yamt 	spc = &l->l_cpu->ci_schedstate;
1.194.6.2  yamt 	newl = NULL;
1.194.6.2  yamt
1.194.6.2  yamt 	if (l->l_switchto != NULL) {
1.194.6.2  yamt 		newl = l->l_switchto;
1.194.6.2  yamt 		l->l_switchto = NULL;
1.194.6.2  yamt 	}
1.194.6.2  yamt
1.194.6.2  yamt 	/* Count time spent in current system call */
1.194.6.2  yamt 	SYSCALL_TIME_SLEEP(l);
1.194.6.2  yamt
1.194.6.2  yamt 	/*
1.194.6.2  yamt 	 * XXXSMP If we are using h/w performance counters,
1.194.6.2  yamt 	 * save context.
1.194.6.2  yamt 	 */
1.194.6.2  yamt #if PERFCTRS
1.194.6.2  yamt 	if (PMC_ENABLED(l->l_proc)) {
1.194.6.2  yamt 		pmc_save_context(l->l_proc);
1.194.6.2  yamt 	}
1.194.6.2  yamt #endif
1.194.6.2  yamt 	updatertime(l, spc);
1.194.6.2  yamt
1.194.6.2  yamt 	/*
1.194.6.2  yamt 	 * If on the CPU and we have gotten this far, then we must yield.
1.194.6.2  yamt 	 */
1.194.6.2  yamt 	mutex_spin_enter(spc->spc_mutex);
1.194.6.2  yamt 	spc->spc_flags &= ~SPCF_SWITCHCLEAR;
1.194.6.2  yamt 	KASSERT(l->l_stat != LSRUN);
1.194.6.2  yamt 	if (l->l_stat == LSONPROC) {
1.194.6.2  yamt 		KASSERT(lwp_locked(l, &spc->spc_lwplock));
1.194.6.2  yamt 		if ((l->l_flag & LW_IDLE) == 0) {
1.194.6.2  yamt 			l->l_stat = LSRUN;
1.194.6.2  yamt 			lwp_setlock(l, spc->spc_mutex);
1.194.6.2  yamt 			sched_enqueue(l, true);
1.194.6.2  yamt 		} else
1.194.6.2  yamt 			l->l_stat = LSIDL;
1.194.6.2  yamt 	}
1.194.6.2  yamt
1.194.6.2  yamt 	/*
1.194.6.2  yamt 	 * Let sched_nextlwp() select the LWP to run the CPU next.
1.194.6.2  yamt 	 * If no LWP is runnable, switch to the idle LWP.
1.194.6.2  yamt 	 */
1.194.6.2  yamt 	if (newl == NULL) {
1.194.6.2  yamt 		newl = sched_nextlwp();
1.194.6.2  yamt 		if (newl != NULL) {
1.194.6.2  yamt 			sched_dequeue(newl);
1.194.6.2  yamt 			KASSERT(lwp_locked(newl, spc->spc_mutex));
1.194.6.2  yamt 			newl->l_stat = LSONPROC;
1.194.6.2  yamt 			newl->l_cpu = l->l_cpu;
1.194.6.2  yamt 			newl->l_flag |= LW_RUNNING;
1.194.6.2  yamt 			lwp_setlock(newl, &spc->spc_lwplock);
1.194.6.2  yamt 		} else {
1.194.6.2  yamt 			newl = l->l_cpu->ci_data.cpu_idlelwp;
1.194.6.2  yamt 			newl->l_stat = LSONPROC;
1.194.6.2  yamt 			newl->l_flag |= LW_RUNNING;
1.194.6.2  yamt 		}
1.194.6.2  yamt 		spc->spc_curpriority = newl->l_usrpri;
1.194.6.2  yamt 		newl->l_priority = newl->l_usrpri;
1.194.6.2  yamt 		cpu_did_resched();
1.194.6.2  yamt 	}
1.194.6.2  yamt
1.194.6.2  yamt 	if (l != newl) {
1.194.6.2  yamt 		struct lwp *prevlwp;
1.194.6.2  yamt
1.194.6.2  yamt 		/*
1.194.6.2  yamt 		 * If the old LWP has been moved to a run queue above,
1.194.6.2  yamt 		 * drop the general purpose LWP lock: it's now locked
1.194.6.2  yamt 		 * by the scheduler lock.
1.194.6.2  yamt 		 *
1.194.6.2  yamt 		 * Otherwise, drop the scheduler lock.  We're done with
1.194.6.2  yamt 		 * the run queues for now.
1.194.6.2  yamt 		 */
1.194.6.2  yamt 		if (l->l_mutex == spc->spc_mutex) {
1.194.6.2  yamt 			mutex_spin_exit(&spc->spc_lwplock);
1.194.6.2  yamt 		} else {
1.194.6.2  yamt 			mutex_spin_exit(spc->spc_mutex);
1.194.6.2  yamt 		}
1.194.6.2  yamt
1.194.6.2  yamt 		/* Unlocked, but for statistics only. */
1.194.6.2  yamt 		uvmexp.swtch++;
1.194.6.2  yamt
1.194.6.2  yamt 		/* Save old VM context. */
1.194.6.2  yamt 		pmap_deactivate(l);
1.194.6.2  yamt
1.194.6.2  yamt 		/* Switch to the new LWP.. */
1.194.6.2  yamt 		l->l_ncsw++;
1.194.6.2  yamt 		l->l_flag &= ~LW_RUNNING;
1.194.6.2  yamt 		oldspl = MUTEX_SPIN_OLDSPL(l->l_cpu);
1.194.6.2  yamt 		prevlwp = cpu_switchto(l, newl);
1.194.6.2  yamt
1.194.6.2  yamt 		/*
1.194.6.2  yamt 		 * .. we have switched away and are now back so we must
1.194.6.2  yamt 		 * be the new curlwp.  prevlwp is who we replaced.
1.194.6.2  yamt 		 */
1.194.6.2  yamt 		curlwp = l;
1.194.6.2  yamt 		if (prevlwp != NULL) {
1.194.6.2  yamt 			curcpu()->ci_mtx_oldspl = oldspl;
1.194.6.2  yamt 			lwp_unlock(prevlwp);
1.194.6.2  yamt 		} else {
1.194.6.2  yamt 			splx(oldspl);
1.194.6.2  yamt 		}
1.194.6.2  yamt
1.194.6.2  yamt 		/* Restore VM context. */
1.194.6.2  yamt 		pmap_activate(l);
1.194.6.2  yamt 		retval = 1;
1.194.6.2  yamt 	} else {
1.194.6.2  yamt 		/* Nothing to do - just unlock and return. */
1.194.6.2  yamt 		mutex_spin_exit(spc->spc_mutex);
1.194.6.2  yamt 		lwp_unlock(l);
1.194.6.2  yamt 		retval = 0;
1.194.6.2  yamt 	}
1.194.6.2  yamt
1.194.6.2  yamt 	KASSERT(l == curlwp);
1.194.6.2  yamt 	KASSERT(l->l_stat == LSONPROC);
1.194.6.2  yamt
1.194.6.2  yamt 	/*
1.194.6.2  yamt 	 * XXXSMP If we are using h/w performance counters, restore context.
1.194.6.2  yamt 	 */
1.194.6.2  yamt #if PERFCTRS
1.194.6.2  yamt 	if (PMC_ENABLED(l->l_proc)) {
1.194.6.2  yamt 		pmc_restore_context(l->l_proc);
1.194.6.2  yamt 	}
1.194.6.2  yamt #endif
1.194.6.2  yamt
1.194.6.2  yamt 	/*
1.194.6.2  yamt 	 * We're running again; record our new start time.  We might
1.194.6.2  yamt 	 * be running on a new CPU now, so don't use the cached
1.194.6.2  yamt 	 * schedstate_percpu pointer.
1.194.6.2  yamt 	 */
1.194.6.2  yamt 	SYSCALL_TIME_WAKEUP(l);
1.194.6.2  yamt 	KDASSERT(l->l_cpu == curcpu());
1.194.6.2  yamt 	LOCKDEBUG_BARRIER(NULL, 1);
1.194.6.2  yamt
1.194.6.2  yamt 	return retval;
1.194.6.2  yamt }
1.194.6.2  yamt
1.194.6.2  yamt /*
1.194.6.2  yamt  * Change process state to be runnable, placing it on the run queue if it is
1.194.6.2  yamt  * in memory, and awakening the swapper if it isn't in memory.
1.194.6.2  yamt  *
1.194.6.2  yamt  * Call with the process and LWP locked.  Will return with the LWP unlocked.
1.194.6.2  yamt  */
1.194.6.2  yamt void
1.194.6.2  yamt setrunnable(struct lwp *l)
1.194.6.2  yamt {
1.194.6.2  yamt 	struct proc *p = l->l_proc;
1.194.6.2  yamt 	sigset_t *ss;
1.194.6.2  yamt
1.194.6.2  yamt 	KASSERT((l->l_flag & LW_IDLE) == 0);
1.194.6.2  yamt 	KASSERT(mutex_owned(&p->p_smutex));
1.194.6.2  yamt 	KASSERT(lwp_locked(l, NULL));
1.194.6.2  yamt
1.194.6.2  yamt 	switch (l->l_stat) {
1.194.6.2  yamt 	case LSSTOP:
1.194.6.2  yamt 		/*
1.194.6.2  yamt 		 * If we're being traced (possibly because someone attached us
1.194.6.2  yamt 		 * while we were stopped), check for a signal from the debugger.
1.194.6.2  yamt 		 */
1.194.6.2  yamt 		if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xstat != 0) {
1.194.6.2  yamt 			if ((sigprop[p->p_xstat] & SA_TOLWP) != 0)
1.194.6.2  yamt 				ss = &l->l_sigpend.sp_set;
1.194.6.2  yamt 			else
1.194.6.2  yamt 				ss = &p->p_sigpend.sp_set;
1.194.6.2  yamt 			sigaddset(ss, p->p_xstat);
1.194.6.2  yamt 			signotify(l);
1.194.6.2  yamt 		}
1.194.6.2  yamt 		p->p_nrlwps++;
1.194.6.2  yamt 		break;
1.194.6.2  yamt 	case LSSUSPENDED:
1.194.6.2  yamt 		l->l_flag &= ~LW_WSUSPEND;
1.194.6.2  yamt 		p->p_nrlwps++;
1.194.6.2  yamt 		cv_broadcast(&p->p_lwpcv);
1.194.6.2  yamt 		break;
1.194.6.2  yamt 	case LSSLEEP:
1.194.6.2  yamt 		KASSERT(l->l_wchan != NULL);
1.194.6.2  yamt 		break;
1.194.6.2  yamt 	default:
1.194.6.2  yamt 		panic("setrunnable: lwp %p state was %d", l, l->l_stat);
1.194.6.2  yamt 	}
1.194.6.2  yamt
1.194.6.2  yamt 	/*
1.194.6.2  yamt 	 * If the LWP was sleeping interruptably, then it's OK to start it
1.194.6.2  yamt 	 * again.  If not, mark it as still sleeping.
1.194.6.2  yamt 	 */
1.194.6.2  yamt 	if (l->l_wchan != NULL) {
1.194.6.2  yamt 		l->l_stat = LSSLEEP;
1.194.6.2  yamt 		/* lwp_unsleep() will release the lock. */
1.194.6.2  yamt 		lwp_unsleep(l);
1.194.6.2  yamt 		return;
1.194.6.2  yamt 	}
1.194.6.2  yamt
1.194.6.2  yamt 	/*
1.194.6.2  yamt 	 * If the LWP is still on the CPU, mark it as LSONPROC.  It may be
1.194.6.2  yamt 	 * about to call mi_switch(), in which case it will yield.
1.194.6.2  yamt 	 */
1.194.6.2  yamt 	if ((l->l_flag & LW_RUNNING) != 0) {
1.194.6.2  yamt 		l->l_stat = LSONPROC;
1.194.6.2  yamt 		l->l_slptime = 0;
1.194.6.2  yamt 		lwp_unlock(l);
1.194.6.2  yamt 		return;
1.194.6.2  yamt 	}
1.194.6.2  yamt
1.194.6.2  yamt 	/*
1.194.6.2  yamt 	 * Set the LWP runnable.  If it's swapped out, we need to wake the swapper
1.194.6.2  yamt 	 * to bring it back in.  Otherwise, enter it into a run queue.
1.194.6.2  yamt 	 */
1.194.6.2  yamt 	if (l->l_mutex != l->l_cpu->ci_schedstate.spc_mutex) {
1.194.6.2  yamt 		spc_lock(l->l_cpu);
1.194.6.2  yamt 		lwp_unlock_to(l, l->l_cpu->ci_schedstate.spc_mutex);
1.194.6.2  yamt 	}
1.194.6.2  yamt
1.194.6.2  yamt 	sched_setrunnable(l);
1.194.6.2  yamt 	l->l_stat = LSRUN;
1.194.6.2  yamt 	l->l_slptime = 0;
1.194.6.2  yamt
1.194.6.2  yamt 	if (l->l_flag & LW_INMEM) {
1.194.6.2  yamt 		sched_enqueue(l, false);
1.194.6.2  yamt 		resched_cpu(l);
1.194.6.2  yamt 		lwp_unlock(l);
1.194.6.2  yamt 	} else {
1.194.6.2  yamt 		lwp_unlock(l);
1.194.6.2  yamt 		uvm_kick_scheduler();
1.194.6.2  yamt 	}
1.194.6.2  yamt }
1.194.6.2  yamt
1.194.6.2  yamt /*
1.194.6.2  yamt  * suspendsched:
1.194.6.2  yamt  *
1.194.6.2  yamt  *	Convert all non-L_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED.
1.194.6.2  yamt  */
1.194.6.2  yamt void
1.194.6.2  yamt suspendsched(void)
1.194.6.2  yamt {
1.194.6.2  yamt 	CPU_INFO_ITERATOR cii;
1.194.6.2  yamt 	struct cpu_info *ci;
1.194.6.2  yamt 	struct lwp *l;
1.194.6.2  yamt 	struct proc *p;
1.194.6.2  yamt
1.194.6.2  yamt 	/*
1.194.6.2  yamt 	 * We do this by process in order not to violate the locking rules.
1.194.6.2  yamt 	 */
1.194.6.2  yamt 	mutex_enter(&proclist_mutex);
1.194.6.2  yamt 	PROCLIST_FOREACH(p, &allproc) {
1.194.6.2  yamt 		mutex_enter(&p->p_smutex);
1.194.6.2  yamt
1.194.6.2  yamt 		if ((p->p_flag & PK_SYSTEM) != 0) {
1.194.6.2  yamt 			mutex_exit(&p->p_smutex);
1.194.6.2  yamt 			continue;
1.194.6.2  yamt 		}
1.194.6.2  yamt
1.194.6.2  yamt 		p->p_stat = SSTOP;
1.194.6.2  yamt
1.194.6.2  yamt 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1.194.6.2  yamt 			if (l == curlwp)
1.194.6.2  yamt 				continue;
1.194.6.2  yamt
1.194.6.2  yamt 			lwp_lock(l);
1.194.6.2  yamt
1.194.6.2  yamt 			/*
1.194.6.2  yamt 			 * Set L_WREBOOT so that the LWP will suspend itself
1.194.6.2  yamt 			 * when it tries to return to user mode.  We want to
1.194.6.2  yamt 			 * try and get to get as many LWPs as possible to
1.194.6.2  yamt 			 * the user / kernel boundary, so that they will
1.194.6.2  yamt 			 * release any locks that they hold.
1.194.6.2  yamt 			 */
1.194.6.2  yamt 			l->l_flag |= (LW_WREBOOT | LW_WSUSPEND);
1.194.6.2  yamt
1.194.6.2  yamt 			if (l->l_stat == LSSLEEP &&
1.194.6.2  yamt 			    (l->l_flag & LW_SINTR) != 0) {
1.194.6.2  yamt 				/* setrunnable() will release the lock. */
1.194.6.2  yamt 				setrunnable(l);
1.194.6.2  yamt 				continue;
1.194.6.2  yamt 			}
1.194.6.2  yamt
1.194.6.2  yamt 			lwp_unlock(l);
1.194.6.2  yamt 		}
1.194.6.2  yamt
1.194.6.2  yamt 		mutex_exit(&p->p_smutex);
1.194.6.2  yamt 	}
1.194.6.2  yamt 	mutex_exit(&proclist_mutex);
1.194.6.2  yamt
1.194.6.2  yamt 	/*
1.194.6.2  yamt 	 * Kick all CPUs to make them preempt any LWPs running in user mode.
1.194.6.2  yamt 	 * They'll trap into the kernel and suspend themselves in userret().
1.194.6.2  yamt 	 */
1.194.6.2  yamt 	for (CPU_INFO_FOREACH(cii, ci))
1.194.6.2  yamt 		cpu_need_resched(ci, 0);
1.194.6.2  yamt }
1.194.6.2  yamt
1.194.6.2  yamt /*
1.194.6.2  yamt  * sched_kpri:
1.194.6.2  yamt  *
1.194.6.2  yamt  *	Scale a priority level to a kernel priority level, usually
1.194.6.2  yamt  *	for an LWP that is about to sleep.
1.194.6.2  yamt  */
1.194.6.2  yamt pri_t
1.194.6.2  yamt sched_kpri(struct lwp *l)
1.194.6.2  yamt {
1.194.6.2  yamt 	/*
1.194.6.2  yamt 	 * Scale user priorities (127 -> 50) up to kernel priorities
1.194.6.2  yamt 	 * in the range (49 -> 8).  Reserve the top 8 kernel priorities
1.194.6.2  yamt 	 * for high priority kthreads.  Kernel priorities passed in
1.194.6.2  yamt 	 * are left "as is".  XXX This is somewhat arbitrary.
1.194.6.2  yamt 	 */
1.194.6.2  yamt 	static const uint8_t kpri_tab[] = {
1.194.6.2  yamt 		 0,   1,   2,   3,   4,   5,   6,   7,
1.194.6.2  yamt 		 8,   9,  10,  11,  12,  13,  14,  15,
1.194.6.2  yamt 		16,  17,  18,  19,  20,  21,  22,  23,
1.194.6.2  yamt 		24,  25,  26,  27,  28,  29,  30,  31,
1.194.6.2  yamt 		32,  33,  34,  35,  36,  37,  38,  39,
1.194.6.2  yamt 		40,  41,  42,  43,  44,  45,  46,  47,
1.194.6.2  yamt 		48,  49,   8,   8,   9,   9,  10,  10,
1.194.6.2  yamt 		11,  11,  12,  12,  13,  14,  14,  15,
1.194.6.2  yamt 		15,  16,  16,  17,  17,  18,  18,  19,
1.194.6.2  yamt 		20,  20,  21,  21,  22,  22,  23,  23,
1.194.6.2  yamt 		24,  24,  25,  26,  26,  27,  27,  28,
1.194.6.2  yamt 		28,  29,  29,  30,  30,  31,  32,  32,
1.194.6.2  yamt 		33,  33,  34,  34,  35,  35,  36,  36,
1.194.6.2  yamt 		37,  38,  38,  39,  39,  40,  40,  41,
1.194.6.2  yamt 		41,  42,  42,  43,  44,  44,  45,  45,
1.194.6.2  yamt 		46,  46,  47,  47,  48,  48,  49,  49,
1.194.6.2  yamt 	};
1.194.6.2  yamt
1.194.6.2  yamt 	return (pri_t)kpri_tab[l->l_usrpri];
1.194.6.2  yamt }
1.194.6.2  yamt
1.194.6.2  yamt /*
1.194.6.2  yamt  * sched_unsleep:
1.194.6.2  yamt  *
1.194.6.2  yamt  *	The is called when the LWP has not been awoken normally but instead
1.194.6.2  yamt  *	interrupted: for example, if the sleep timed out.  Because of this,
1.194.6.2  yamt  *	it's not a valid action for running or idle LWPs.
1.194.6.2  yamt  */
1.194.6.2  yamt static void
1.194.6.2  yamt sched_unsleep(struct lwp *l)
1.194.6.2  yamt {
1.194.6.2  yamt
1.194.6.2  yamt 	lwp_unlock(l);
1.194.6.2  yamt 	panic("sched_unsleep");
1.194.6.2  yamt }
1.194.6.2  yamt
1.194.6.2  yamt inline void
1.194.6.2  yamt resched_cpu(struct lwp *l)
1.194.6.2  yamt {
1.194.6.2  yamt 	struct cpu_info *ci;
1.194.6.2  yamt 	const pri_t pri = lwp_eprio(l);
1.194.6.2  yamt
1.194.6.2  yamt 	/*
1.194.6.2  yamt 	 * XXXSMP
1.194.6.2  yamt 	 * Since l->l_cpu persists across a context switch,
1.194.6.2  yamt 	 * this gives us *very weak* processor affinity, in
1.194.6.2  yamt 	 * that we notify the CPU on which the process last
1.194.6.2  yamt 	 * ran that it should try to switch.
1.194.6.2  yamt 	 *
1.194.6.2  yamt 	 * This does not guarantee that the process will run on
1.194.6.2  yamt 	 * that processor next, because another processor might
1.194.6.2  yamt 	 * grab it the next time it performs a context switch.
1.194.6.2  yamt 	 *
1.194.6.2  yamt 	 * This also does not handle the case where its last
1.194.6.2  yamt 	 * CPU is running a higher-priority process, but every
1.194.6.2  yamt 	 * other CPU is running a lower-priority process.  There
1.194.6.2  yamt 	 * are ways to handle this situation, but they're not
1.194.6.2  yamt 	 * currently very pretty, and we also need to weigh the
1.194.6.2  yamt 	 * cost of moving a process from one CPU to another.
1.194.6.2  yamt 	 */
1.194.6.2  yamt 	ci = (l->l_cpu != NULL) ? l->l_cpu : curcpu();
1.194.6.2  yamt 	if (pri < ci->ci_schedstate.spc_curpriority)
1.194.6.2  yamt 		cpu_need_resched(ci, 0);
1.194.6.2  yamt }
1.194.6.2  yamt
1.194.6.2  yamt static void
1.194.6.2  yamt sched_changepri(struct lwp *l, pri_t pri)
1.194.6.2  yamt {
1.194.6.2  yamt
1.194.6.2  yamt 	KASSERT(lwp_locked(l, NULL));
1.194.6.2  yamt
1.194.6.2  yamt 	l->l_usrpri = pri;
1.194.6.2  yamt 	if (l->l_priority < PUSER)
1.194.6.2  yamt 		return;
1.194.6.2  yamt
1.194.6.2  yamt 	if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
1.194.6.2  yamt 		l->l_priority = pri;
1.194.6.2  yamt 		return;
1.194.6.2  yamt 	}
1.194.6.2  yamt
1.194.6.2  yamt 	KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
1.194.6.2  yamt
1.194.6.2  yamt 	sched_dequeue(l);
1.194.6.2  yamt 	l->l_priority = pri;
1.194.6.2  yamt 	sched_enqueue(l, false);
1.194.6.2  yamt 	resched_cpu(l);
1.194.6.2  yamt }
1.194.6.2  yamt
1.194.6.2  yamt static void
1.194.6.2  yamt sched_lendpri(struct lwp *l, pri_t pri)
1.194.6.2  yamt {
1.194.6.2  yamt
1.194.6.2  yamt 	KASSERT(lwp_locked(l, NULL));
1.194.6.2  yamt
1.194.6.2  yamt 	if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
1.194.6.2  yamt 		l->l_inheritedprio = pri;
1.194.6.2  yamt 		return;
1.194.6.2  yamt 	}
1.194.6.2  yamt
1.194.6.2  yamt 	KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
1.194.6.2  yamt
1.194.6.2  yamt 	sched_dequeue(l);
1.194.6.2  yamt 	l->l_inheritedprio = pri;
1.194.6.2  yamt 	sched_enqueue(l, false);
1.194.6.2  yamt 	resched_cpu(l);
1.194.6.2  yamt }
1.194.6.2  yamt
1.194.6.2  yamt struct lwp *
1.194.6.2  yamt syncobj_noowner(wchan_t wchan)
1.194.6.2  yamt {
1.194.6.2  yamt
1.194.6.2  yamt 	return NULL;
1.194.6.2  yamt }
1.194.6.2  yamt
1.194.6.2  yamt
1.194.6.2  yamt /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
1.194.6.2  yamt fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;		/* exp(-1/20) */
1.194.6.2  yamt
1.194.6.2  yamt /*
1.194.6.2  yamt  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
1.194.6.2  yamt  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
1.194.6.2  yamt  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
1.194.6.2  yamt  *
1.194.6.2  yamt  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
1.194.6.2  yamt  *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
1.194.6.2  yamt  *
1.194.6.2  yamt  * If you dont want to bother with the faster/more-accurate formula, you
1.194.6.2  yamt  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
1.194.6.2  yamt  * (more general) method of calculating the %age of CPU used by a process.
1.194.6.2  yamt  */
1.194.6.2  yamt #define	CCPU_SHIFT	(FSHIFT + 1)
1.194.6.2  yamt
1.194.6.2  yamt /*
1.194.6.2  yamt  * sched_pstats:
1.194.6.2  yamt  *
1.194.6.2  yamt  * Update process statistics and check CPU resource allocation.
1.194.6.2  yamt  * Call scheduler-specific hook to eventually adjust process/LWP
1.194.6.2  yamt  * priorities.
1.194.6.2  yamt  *
1.194.6.2  yamt  *	XXXSMP This needs to be reorganised in order to reduce the locking
1.194.6.2  yamt  *	burden.
1.194.6.2  yamt  */
1.194.6.2  yamt /* ARGSUSED */
1.194.6.2  yamt void
1.194.6.2  yamt sched_pstats(void *arg)
1.194.6.2  yamt {
1.194.6.2  yamt 	struct rlimit *rlim;
1.194.6.2  yamt 	struct lwp *l;
1.194.6.2  yamt 	struct proc *p;
1.194.6.2  yamt 	int minslp, sig, clkhz;
1.194.6.2  yamt 	long runtm;
1.194.6.2  yamt
1.194.6.2  yamt 	sched_pstats_ticks++;
1.194.6.2  yamt
1.194.6.2  yamt 	mutex_enter(&proclist_mutex);
1.194.6.2  yamt 	PROCLIST_FOREACH(p, &allproc) {
1.194.6.2  yamt 		/*
1.194.6.2  yamt 		 * Increment time in/out of memory and sleep time (if
1.194.6.2  yamt 		 * sleeping).  We ignore overflow; with 16-bit int's
1.194.6.2  yamt 		 * (remember them?) overflow takes 45 days.
1.194.6.2  yamt 		 */
1.194.6.2  yamt 		minslp = 2;
1.194.6.2  yamt 		mutex_enter(&p->p_smutex);
1.194.6.2  yamt 		mutex_spin_enter(&p->p_stmutex);
1.194.6.2  yamt 		runtm = p->p_rtime.tv_sec;
1.194.6.2  yamt 		LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1.194.6.2  yamt 			if ((l->l_flag & LW_IDLE) != 0)
1.194.6.2  yamt 				continue;
1.194.6.2  yamt 			lwp_lock(l);
1.194.6.2  yamt 			runtm += l->l_rtime.tv_sec;
1.194.6.2  yamt 			l->l_swtime++;
1.194.6.2  yamt 			if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
1.194.6.2  yamt 			    l->l_stat == LSSUSPENDED) {
1.194.6.2  yamt 				l->l_slptime++;
1.194.6.2  yamt 				minslp = min(minslp, l->l_slptime);
1.194.6.2  yamt 			} else
1.194.6.2  yamt 				minslp = 0;
1.194.6.2  yamt 			lwp_unlock(l);
1.194.6.2  yamt
1.194.6.2  yamt 			/*
1.194.6.2  yamt 			 * p_pctcpu is only for ps.
1.194.6.2  yamt 			 */
1.194.6.2  yamt 			l->l_pctcpu = (l->l_pctcpu * ccpu) >> FSHIFT;
1.194.6.2  yamt 			if (l->l_slptime < 1) {
1.194.6.2  yamt 				clkhz = stathz != 0 ? stathz : hz;
1.194.6.2  yamt #if	(FSHIFT >= CCPU_SHIFT)
1.194.6.2  yamt 				l->l_pctcpu += (clkhz == 100) ?
1.194.6.2  yamt 				    ((fixpt_t)l->l_cpticks) <<
1.194.6.2  yamt 				        (FSHIFT - CCPU_SHIFT) :
1.194.6.2  yamt 				    100 * (((fixpt_t) p->p_cpticks)
1.194.6.2  yamt 				        << (FSHIFT - CCPU_SHIFT)) / clkhz;
1.194.6.2  yamt #else
1.194.6.2  yamt 				l->l_pctcpu += ((FSCALE - ccpu) *
1.194.6.2  yamt 				    (l->l_cpticks * FSCALE / clkhz)) >> FSHIFT;
1.194.6.2  yamt #endif
1.194.6.2  yamt 				l->l_cpticks = 0;
1.194.6.2  yamt 			}
1.194.6.2  yamt 		}
1.194.6.2  yamt 		p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
1.194.6.2  yamt 		sched_pstats_hook(p, minslp);
1.194.6.2  yamt 		mutex_spin_exit(&p->p_stmutex);
1.194.6.2  yamt
1.194.6.2  yamt 		/*
1.194.6.2  yamt 		 * Check if the process exceeds its CPU resource allocation.
1.194.6.2  yamt 		 * If over max, kill it.
1.194.6.2  yamt 		 */
1.194.6.2  yamt 		rlim = &p->p_rlimit[RLIMIT_CPU];
1.194.6.2  yamt 		sig = 0;
1.194.6.2  yamt 		if (runtm >= rlim->rlim_cur) {
1.194.6.2  yamt 			if (runtm >= rlim->rlim_max)
1.194.6.2  yamt 				sig = SIGKILL;
1.194.6.2  yamt 			else {
1.194.6.2  yamt 				sig = SIGXCPU;
1.194.6.2  yamt 				if (rlim->rlim_cur < rlim->rlim_max)
1.194.6.2  yamt 					rlim->rlim_cur += 5;
1.194.6.2  yamt 			}
1.194.6.2  yamt 		}
1.194.6.2  yamt 		mutex_exit(&p->p_smutex);
1.194.6.2  yamt 		if (sig) {
1.194.6.2  yamt 			psignal(p, sig);
1.194.6.2  yamt 		}
1.194.6.2  yamt 	}
1.194.6.2  yamt 	mutex_exit(&proclist_mutex);
1.194.6.2  yamt 	uvm_meter();
1.194.6.2  yamt 	cv_wakeup(&lbolt);
1.194.6.2  yamt 	callout_schedule(&sched_pstats_ch, hz);
1.194.6.2  yamt }
1.194.6.2  yamt
1.194.6.2  yamt void
1.194.6.2  yamt sched_init(void)
1.194.6.2  yamt {
1.194.6.2  yamt
1.194.6.2  yamt 	cv_init(&lbolt, "lbolt");
1.194.6.2  yamt 	callout_init(&sched_pstats_ch, 0);
1.194.6.2  yamt 	callout_setfunc(&sched_pstats_ch, sched_pstats, NULL);
1.194.6.2  yamt 	sched_setup();
1.194.6.2  yamt 	sched_pstats(NULL);
1.194.6.2  yamt }