kern_synch.c revision 1.149.2.5 1 1.149.2.5 yamt /* $NetBSD: kern_synch.c,v 1.149.2.5 2007/10/27 11:35:29 yamt Exp $ */
2 1.63 thorpej
3 1.63 thorpej /*-
4 1.149.2.3 yamt * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc.
5 1.63 thorpej * All rights reserved.
6 1.63 thorpej *
7 1.63 thorpej * This code is derived from software contributed to The NetBSD Foundation
8 1.63 thorpej * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 1.149.2.4 yamt * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and
10 1.149.2.4 yamt * Daniel Sieger.
11 1.63 thorpej *
12 1.63 thorpej * Redistribution and use in source and binary forms, with or without
13 1.63 thorpej * modification, are permitted provided that the following conditions
14 1.63 thorpej * are met:
15 1.63 thorpej * 1. Redistributions of source code must retain the above copyright
16 1.63 thorpej * notice, this list of conditions and the following disclaimer.
17 1.63 thorpej * 2. Redistributions in binary form must reproduce the above copyright
18 1.63 thorpej * notice, this list of conditions and the following disclaimer in the
19 1.63 thorpej * documentation and/or other materials provided with the distribution.
20 1.63 thorpej * 3. All advertising materials mentioning features or use of this software
21 1.63 thorpej * must display the following acknowledgement:
22 1.63 thorpej * This product includes software developed by the NetBSD
23 1.63 thorpej * Foundation, Inc. and its contributors.
24 1.63 thorpej * 4. Neither the name of The NetBSD Foundation nor the names of its
25 1.63 thorpej * contributors may be used to endorse or promote products derived
26 1.63 thorpej * from this software without specific prior written permission.
27 1.63 thorpej *
28 1.63 thorpej * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 1.63 thorpej * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 1.63 thorpej * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 1.63 thorpej * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 1.63 thorpej * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 1.63 thorpej * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 1.63 thorpej * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 1.63 thorpej * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 1.63 thorpej * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 1.63 thorpej * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 1.63 thorpej * POSSIBILITY OF SUCH DAMAGE.
39 1.63 thorpej */
40 1.26 cgd
41 1.26 cgd /*-
42 1.26 cgd * Copyright (c) 1982, 1986, 1990, 1991, 1993
43 1.26 cgd * The Regents of the University of California. All rights reserved.
44 1.26 cgd * (c) UNIX System Laboratories, Inc.
45 1.26 cgd * All or some portions of this file are derived from material licensed
46 1.26 cgd * to the University of California by American Telephone and Telegraph
47 1.26 cgd * Co. or Unix System Laboratories, Inc. and are reproduced herein with
48 1.26 cgd * the permission of UNIX System Laboratories, Inc.
49 1.26 cgd *
50 1.26 cgd * Redistribution and use in source and binary forms, with or without
51 1.26 cgd * modification, are permitted provided that the following conditions
52 1.26 cgd * are met:
53 1.26 cgd * 1. Redistributions of source code must retain the above copyright
54 1.26 cgd * notice, this list of conditions and the following disclaimer.
55 1.26 cgd * 2. Redistributions in binary form must reproduce the above copyright
56 1.26 cgd * notice, this list of conditions and the following disclaimer in the
57 1.26 cgd * documentation and/or other materials provided with the distribution.
58 1.136 agc * 3. Neither the name of the University nor the names of its contributors
59 1.26 cgd * may be used to endorse or promote products derived from this software
60 1.26 cgd * without specific prior written permission.
61 1.26 cgd *
62 1.26 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
63 1.26 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64 1.26 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65 1.26 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
66 1.26 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67 1.26 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
68 1.26 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
69 1.26 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70 1.26 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71 1.26 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
72 1.26 cgd * SUCH DAMAGE.
73 1.26 cgd *
74 1.50 fvdl * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95
75 1.26 cgd */
76 1.106 lukem
77 1.106 lukem #include <sys/cdefs.h>
78 1.149.2.5 yamt __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.149.2.5 2007/10/27 11:35:29 yamt Exp $");
79 1.48 mrg
80 1.109 yamt #include "opt_kstack.h"
81 1.82 thorpej #include "opt_lockdebug.h"
82 1.83 thorpej #include "opt_multiprocessor.h"
83 1.110 briggs #include "opt_perfctrs.h"
84 1.26 cgd
85 1.149.2.3 yamt #define __MUTEX_PRIVATE
86 1.149.2.3 yamt
87 1.26 cgd #include <sys/param.h>
88 1.26 cgd #include <sys/systm.h>
89 1.26 cgd #include <sys/proc.h>
90 1.26 cgd #include <sys/kernel.h>
91 1.111 briggs #if defined(PERFCTRS)
92 1.110 briggs #include <sys/pmc.h>
93 1.111 briggs #endif
94 1.149.2.4 yamt #include <sys/cpu.h>
95 1.26 cgd #include <sys/resourcevar.h>
96 1.55 ross #include <sys/sched.h>
97 1.149.2.3 yamt #include <sys/syscall_stats.h>
98 1.149.2.3 yamt #include <sys/sleepq.h>
99 1.149.2.3 yamt #include <sys/lockdebug.h>
100 1.149.2.4 yamt #include <sys/evcnt.h>
101 1.149.2.5 yamt #include <sys/intr.h>
102 1.47 mrg
103 1.47 mrg #include <uvm/uvm_extern.h>
104 1.47 mrg
105 1.149.2.4 yamt callout_t sched_pstats_ch;
106 1.149.2.4 yamt unsigned int sched_pstats_ticks;
107 1.34 christos
108 1.149.2.4 yamt kcondvar_t lbolt; /* once a second sleep address */
109 1.26 cgd
110 1.149.2.4 yamt static void sched_unsleep(struct lwp *);
111 1.149.2.4 yamt static void sched_changepri(struct lwp *, pri_t);
112 1.149.2.4 yamt static void sched_lendpri(struct lwp *, pri_t);
113 1.122 thorpej
114 1.149.2.3 yamt syncobj_t sleep_syncobj = {
115 1.149.2.3 yamt SOBJ_SLEEPQ_SORTED,
116 1.149.2.3 yamt sleepq_unsleep,
117 1.149.2.4 yamt sleepq_changepri,
118 1.149.2.4 yamt sleepq_lendpri,
119 1.149.2.4 yamt syncobj_noowner,
120 1.149.2.3 yamt };
121 1.149.2.3 yamt
122 1.149.2.3 yamt syncobj_t sched_syncobj = {
123 1.149.2.3 yamt SOBJ_SLEEPQ_SORTED,
124 1.149.2.3 yamt sched_unsleep,
125 1.149.2.4 yamt sched_changepri,
126 1.149.2.4 yamt sched_lendpri,
127 1.149.2.4 yamt syncobj_noowner,
128 1.149.2.3 yamt };
129 1.122 thorpej
130 1.26 cgd /*
131 1.149.2.3 yamt * During autoconfiguration or after a panic, a sleep will simply lower the
132 1.149.2.3 yamt * priority briefly to allow interrupts, then return. The priority to be
133 1.149.2.3 yamt * used (safepri) is machine-dependent, thus this value is initialized and
134 1.149.2.3 yamt * maintained in the machine-dependent layers. This priority will typically
135 1.149.2.3 yamt * be 0, or the lowest priority that is safe for use on the interrupt stack;
136 1.149.2.3 yamt * it can be made higher to block network software interrupts after panics.
137 1.26 cgd */
138 1.149.2.3 yamt int safepri;
139 1.26 cgd
140 1.26 cgd /*
141 1.149.2.3 yamt * OBSOLETE INTERFACE
142 1.149.2.3 yamt *
143 1.26 cgd * General sleep call. Suspends the current process until a wakeup is
144 1.26 cgd * performed on the specified identifier. The process will then be made
145 1.149.2.3 yamt * runnable with the specified priority. Sleeps at most timo/hz seconds (0
146 1.149.2.3 yamt * means no timeout). If pri includes PCATCH flag, signals are checked
147 1.26 cgd * before and after sleeping, else signals are not checked. Returns 0 if
148 1.26 cgd * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a
149 1.26 cgd * signal needs to be delivered, ERESTART is returned if the current system
150 1.26 cgd * call should be restarted if possible, and EINTR is returned if the system
151 1.26 cgd * call should be interrupted by the signal (return EINTR).
152 1.77 thorpej *
153 1.149.2.3 yamt * The interlock is held until we are on a sleep queue. The interlock will
154 1.149.2.3 yamt * be locked before returning back to the caller unless the PNORELOCK flag
155 1.149.2.3 yamt * is specified, in which case the interlock will always be unlocked upon
156 1.149.2.3 yamt * return.
157 1.26 cgd */
158 1.26 cgd int
159 1.149.2.4 yamt ltsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
160 1.149.2.3 yamt volatile struct simplelock *interlock)
161 1.26 cgd {
162 1.122 thorpej struct lwp *l = curlwp;
163 1.149.2.3 yamt sleepq_t *sq;
164 1.149.2.4 yamt int error;
165 1.26 cgd
166 1.149.2.3 yamt if (sleepq_dontsleep(l)) {
167 1.149.2.3 yamt (void)sleepq_abort(NULL, 0);
168 1.149.2.3 yamt if ((priority & PNORELOCK) != 0)
169 1.77 thorpej simple_unlock(interlock);
170 1.149.2.3 yamt return 0;
171 1.122 thorpej }
172 1.77 thorpej
173 1.149.2.3 yamt sq = sleeptab_lookup(&sleeptab, ident);
174 1.149.2.3 yamt sleepq_enter(sq, l);
175 1.149.2.4 yamt sleepq_enqueue(sq, priority & PRIMASK, ident, wmesg, &sleep_syncobj);
176 1.77 thorpej
177 1.149.2.3 yamt if (interlock != NULL) {
178 1.149.2.3 yamt LOCK_ASSERT(simple_lock_held(interlock));
179 1.77 thorpej simple_unlock(interlock);
180 1.26 cgd }
181 1.147 perry
182 1.149.2.4 yamt error = sleepq_block(timo, priority & PCATCH);
183 1.139 cl
184 1.149.2.3 yamt if (interlock != NULL && (priority & PNORELOCK) == 0)
185 1.149.2.3 yamt simple_lock(interlock);
186 1.149.2.3 yamt
187 1.149.2.3 yamt return error;
188 1.139 cl }
189 1.139 cl
190 1.149.2.4 yamt int
191 1.149.2.4 yamt mtsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
192 1.149.2.4 yamt kmutex_t *mtx)
193 1.149.2.4 yamt {
194 1.149.2.4 yamt struct lwp *l = curlwp;
195 1.149.2.4 yamt sleepq_t *sq;
196 1.149.2.4 yamt int error;
197 1.149.2.4 yamt
198 1.149.2.4 yamt if (sleepq_dontsleep(l)) {
199 1.149.2.4 yamt (void)sleepq_abort(mtx, (priority & PNORELOCK) != 0);
200 1.149.2.4 yamt return 0;
201 1.149.2.4 yamt }
202 1.149.2.4 yamt
203 1.149.2.4 yamt sq = sleeptab_lookup(&sleeptab, ident);
204 1.149.2.4 yamt sleepq_enter(sq, l);
205 1.149.2.4 yamt sleepq_enqueue(sq, priority & PRIMASK, ident, wmesg, &sleep_syncobj);
206 1.149.2.4 yamt mutex_exit(mtx);
207 1.149.2.4 yamt error = sleepq_block(timo, priority & PCATCH);
208 1.149.2.4 yamt
209 1.149.2.4 yamt if ((priority & PNORELOCK) == 0)
210 1.149.2.4 yamt mutex_enter(mtx);
211 1.149.2.4 yamt
212 1.149.2.4 yamt return error;
213 1.149.2.4 yamt }
214 1.149.2.4 yamt
215 1.26 cgd /*
216 1.149.2.3 yamt * General sleep call for situations where a wake-up is not expected.
217 1.63 thorpej */
218 1.149.2.3 yamt int
219 1.149.2.3 yamt kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx)
220 1.83 thorpej {
221 1.149.2.3 yamt struct lwp *l = curlwp;
222 1.149.2.3 yamt sleepq_t *sq;
223 1.149.2.3 yamt int error;
224 1.83 thorpej
225 1.149.2.3 yamt if (sleepq_dontsleep(l))
226 1.149.2.3 yamt return sleepq_abort(NULL, 0);
227 1.63 thorpej
228 1.149.2.3 yamt if (mtx != NULL)
229 1.149.2.3 yamt mutex_exit(mtx);
230 1.149.2.3 yamt sq = sleeptab_lookup(&sleeptab, l);
231 1.149.2.3 yamt sleepq_enter(sq, l);
232 1.149.2.4 yamt sleepq_enqueue(sq, sched_kpri(l), l, wmesg, &sleep_syncobj);
233 1.149.2.4 yamt error = sleepq_block(timo, intr);
234 1.149.2.3 yamt if (mtx != NULL)
235 1.149.2.3 yamt mutex_enter(mtx);
236 1.83 thorpej
237 1.149.2.3 yamt return error;
238 1.83 thorpej }
239 1.83 thorpej
240 1.63 thorpej /*
241 1.149.2.3 yamt * OBSOLETE INTERFACE
242 1.149.2.3 yamt *
243 1.26 cgd * Make all processes sleeping on the specified identifier runnable.
244 1.26 cgd */
245 1.26 cgd void
246 1.149.2.3 yamt wakeup(wchan_t ident)
247 1.26 cgd {
248 1.149.2.3 yamt sleepq_t *sq;
249 1.83 thorpej
250 1.149.2.3 yamt if (cold)
251 1.149.2.3 yamt return;
252 1.83 thorpej
253 1.149.2.3 yamt sq = sleeptab_lookup(&sleeptab, ident);
254 1.149.2.3 yamt sleepq_wake(sq, ident, (u_int)-1);
255 1.63 thorpej }
256 1.63 thorpej
257 1.63 thorpej /*
258 1.149.2.3 yamt * OBSOLETE INTERFACE
259 1.149.2.3 yamt *
260 1.63 thorpej * Make the highest priority process first in line on the specified
261 1.63 thorpej * identifier runnable.
262 1.63 thorpej */
263 1.149.2.3 yamt void
264 1.149.2.3 yamt wakeup_one(wchan_t ident)
265 1.63 thorpej {
266 1.149.2.3 yamt sleepq_t *sq;
267 1.63 thorpej
268 1.149.2.3 yamt if (cold)
269 1.149.2.3 yamt return;
270 1.149.2.4 yamt
271 1.149.2.3 yamt sq = sleeptab_lookup(&sleeptab, ident);
272 1.149.2.3 yamt sleepq_wake(sq, ident, 1);
273 1.117 gmcgarry }
274 1.117 gmcgarry
275 1.149.2.3 yamt
276 1.117 gmcgarry /*
277 1.117 gmcgarry * General yield call. Puts the current process back on its run queue and
278 1.117 gmcgarry * performs a voluntary context switch. Should only be called when the
279 1.149.2.5 yamt * current process explicitly requests it (eg sched_yield(2)).
280 1.117 gmcgarry */
281 1.117 gmcgarry void
282 1.117 gmcgarry yield(void)
283 1.117 gmcgarry {
284 1.122 thorpej struct lwp *l = curlwp;
285 1.117 gmcgarry
286 1.149.2.3 yamt KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
287 1.149.2.3 yamt lwp_lock(l);
288 1.149.2.4 yamt KASSERT(lwp_locked(l, &l->l_cpu->ci_schedstate.spc_lwplock));
289 1.149.2.4 yamt KASSERT(l->l_stat == LSONPROC);
290 1.149.2.5 yamt /* XXX Only do this for timeshared threads. */
291 1.149.2.5 yamt l->l_priority = MAXPRI;
292 1.149.2.4 yamt (void)mi_switch(l);
293 1.149.2.3 yamt KERNEL_LOCK(l->l_biglocks, l);
294 1.69 thorpej }
295 1.69 thorpej
296 1.69 thorpej /*
297 1.69 thorpej * General preemption call. Puts the current process back on its run queue
298 1.149.2.1 yamt * and performs an involuntary context switch.
299 1.69 thorpej */
300 1.69 thorpej void
301 1.149.2.3 yamt preempt(void)
302 1.69 thorpej {
303 1.122 thorpej struct lwp *l = curlwp;
304 1.69 thorpej
305 1.149.2.3 yamt KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
306 1.149.2.3 yamt lwp_lock(l);
307 1.149.2.4 yamt KASSERT(lwp_locked(l, &l->l_cpu->ci_schedstate.spc_lwplock));
308 1.149.2.4 yamt KASSERT(l->l_stat == LSONPROC);
309 1.149.2.4 yamt l->l_priority = l->l_usrpri;
310 1.149.2.3 yamt l->l_nivcsw++;
311 1.149.2.4 yamt (void)mi_switch(l);
312 1.149.2.3 yamt KERNEL_LOCK(l->l_biglocks, l);
313 1.69 thorpej }
314 1.69 thorpej
315 1.69 thorpej /*
316 1.149.2.4 yamt * Compute the amount of time during which the current lwp was running.
317 1.130 nathanw *
318 1.149.2.4 yamt * - update l_rtime unless it's an idle lwp.
319 1.149.2.4 yamt */
320 1.149.2.4 yamt
321 1.149.2.5 yamt void
322 1.149.2.5 yamt updatertime(lwp_t *l, const struct timeval *tv)
323 1.149.2.4 yamt {
324 1.149.2.4 yamt long s, u;
325 1.149.2.4 yamt
326 1.149.2.5 yamt if ((l->l_flag & LW_IDLE) != 0)
327 1.149.2.4 yamt return;
328 1.149.2.4 yamt
329 1.149.2.5 yamt u = l->l_rtime.tv_usec + (tv->tv_usec - l->l_stime.tv_usec);
330 1.149.2.5 yamt s = l->l_rtime.tv_sec + (tv->tv_sec - l->l_stime.tv_sec);
331 1.149.2.4 yamt if (u < 0) {
332 1.149.2.4 yamt u += 1000000;
333 1.149.2.4 yamt s--;
334 1.149.2.4 yamt } else if (u >= 1000000) {
335 1.149.2.4 yamt u -= 1000000;
336 1.149.2.4 yamt s++;
337 1.149.2.4 yamt }
338 1.149.2.4 yamt l->l_rtime.tv_usec = u;
339 1.149.2.4 yamt l->l_rtime.tv_sec = s;
340 1.149.2.4 yamt }
341 1.149.2.4 yamt
342 1.149.2.4 yamt /*
343 1.149.2.4 yamt * The machine independent parts of context switch.
344 1.149.2.4 yamt *
345 1.149.2.4 yamt * Returns 1 if another LWP was actually run.
346 1.26 cgd */
347 1.122 thorpej int
348 1.149.2.5 yamt mi_switch(lwp_t *l)
349 1.26 cgd {
350 1.76 thorpej struct schedstate_percpu *spc;
351 1.149.2.4 yamt struct lwp *newl;
352 1.149.2.3 yamt int retval, oldspl;
353 1.149.2.5 yamt struct cpu_info *ci;
354 1.149.2.5 yamt struct timeval tv;
355 1.149.2.5 yamt bool returning;
356 1.85 sommerfe
357 1.149.2.4 yamt KASSERT(lwp_locked(l, NULL));
358 1.149.2.4 yamt LOCKDEBUG_BARRIER(l->l_mutex, 1);
359 1.76 thorpej
360 1.149.2.3 yamt #ifdef KSTACK_CHECK_MAGIC
361 1.149.2.3 yamt kstack_check_magic(l);
362 1.149.2.3 yamt #endif
363 1.149.2.3 yamt
364 1.149.2.5 yamt microtime(&tv);
365 1.149.2.5 yamt
366 1.149.2.3 yamt /*
367 1.149.2.3 yamt * It's safe to read the per CPU schedstate unlocked here, as all we
368 1.149.2.3 yamt * are after is the run time and that's guarenteed to have been last
369 1.149.2.3 yamt * updated by this CPU.
370 1.149.2.3 yamt */
371 1.149.2.5 yamt ci = l->l_cpu;
372 1.149.2.5 yamt KDASSERT(ci == curcpu());
373 1.81 thorpej
374 1.26 cgd /*
375 1.149.2.4 yamt * Process is about to yield the CPU; clear the appropriate
376 1.149.2.4 yamt * scheduling flags.
377 1.26 cgd */
378 1.149.2.5 yamt spc = &ci->ci_schedstate;
379 1.149.2.5 yamt returning = false;
380 1.149.2.4 yamt newl = NULL;
381 1.149.2.4 yamt
382 1.149.2.5 yamt /*
383 1.149.2.5 yamt * If we have been asked to switch to a specific LWP, then there
384 1.149.2.5 yamt * is no need to inspect the run queues. If a soft interrupt is
385 1.149.2.5 yamt * blocking, then return to the interrupted thread without adjusting
386 1.149.2.5 yamt * VM context or its start time: neither have been changed in order
387 1.149.2.5 yamt * to take the interrupt.
388 1.149.2.5 yamt */
389 1.149.2.4 yamt if (l->l_switchto != NULL) {
390 1.149.2.5 yamt if ((l->l_flag & LW_INTR) != 0) {
391 1.149.2.5 yamt returning = true;
392 1.149.2.5 yamt softint_block(l);
393 1.149.2.5 yamt if ((l->l_flag & LW_TIMEINTR) != 0)
394 1.149.2.5 yamt updatertime(l, &tv);
395 1.149.2.5 yamt }
396 1.149.2.4 yamt newl = l->l_switchto;
397 1.149.2.4 yamt l->l_switchto = NULL;
398 1.26 cgd }
399 1.149.2.3 yamt
400 1.149.2.3 yamt /* Count time spent in current system call */
401 1.149.2.5 yamt if (!returning) {
402 1.149.2.5 yamt SYSCALL_TIME_SLEEP(l);
403 1.26 cgd
404 1.149.2.5 yamt /*
405 1.149.2.5 yamt * XXXSMP If we are using h/w performance counters,
406 1.149.2.5 yamt * save context.
407 1.149.2.5 yamt */
408 1.149.2.3 yamt #if PERFCTRS
409 1.149.2.5 yamt if (PMC_ENABLED(l->l_proc)) {
410 1.149.2.5 yamt pmc_save_context(l->l_proc);
411 1.149.2.5 yamt }
412 1.109 yamt #endif
413 1.149.2.5 yamt updatertime(l, &tv);
414 1.149.2.5 yamt }
415 1.113 gmcgarry
416 1.113 gmcgarry /*
417 1.149.2.3 yamt * If on the CPU and we have gotten this far, then we must yield.
418 1.113 gmcgarry */
419 1.149.2.4 yamt mutex_spin_enter(spc->spc_mutex);
420 1.149.2.3 yamt KASSERT(l->l_stat != LSRUN);
421 1.149.2.3 yamt if (l->l_stat == LSONPROC) {
422 1.149.2.4 yamt KASSERT(lwp_locked(l, &spc->spc_lwplock));
423 1.149.2.4 yamt if ((l->l_flag & LW_IDLE) == 0) {
424 1.149.2.4 yamt l->l_stat = LSRUN;
425 1.149.2.4 yamt lwp_setlock(l, spc->spc_mutex);
426 1.149.2.4 yamt sched_enqueue(l, true);
427 1.149.2.4 yamt } else
428 1.149.2.4 yamt l->l_stat = LSIDL;
429 1.149.2.3 yamt }
430 1.149.2.3 yamt
431 1.149.2.3 yamt /*
432 1.149.2.5 yamt * Let sched_nextlwp() select the LWP to run the CPU next.
433 1.149.2.4 yamt * If no LWP is runnable, switch to the idle LWP.
434 1.149.2.5 yamt * Note that spc_lwplock might not necessary be held.
435 1.149.2.3 yamt */
436 1.149.2.4 yamt if (newl == NULL) {
437 1.149.2.4 yamt newl = sched_nextlwp();
438 1.149.2.4 yamt if (newl != NULL) {
439 1.149.2.4 yamt sched_dequeue(newl);
440 1.149.2.4 yamt KASSERT(lwp_locked(newl, spc->spc_mutex));
441 1.149.2.4 yamt newl->l_stat = LSONPROC;
442 1.149.2.5 yamt newl->l_cpu = ci;
443 1.149.2.4 yamt newl->l_flag |= LW_RUNNING;
444 1.149.2.4 yamt lwp_setlock(newl, &spc->spc_lwplock);
445 1.149.2.4 yamt } else {
446 1.149.2.5 yamt newl = ci->ci_data.cpu_idlelwp;
447 1.149.2.4 yamt newl->l_stat = LSONPROC;
448 1.149.2.4 yamt newl->l_flag |= LW_RUNNING;
449 1.149.2.4 yamt }
450 1.149.2.5 yamt ci->ci_want_resched = 0;
451 1.149.2.5 yamt spc->spc_flags &= ~SPCF_SWITCHCLEAR;
452 1.149.2.5 yamt }
453 1.149.2.5 yamt
454 1.149.2.5 yamt /* Update the new LWP's start time while it is still locked. */
455 1.149.2.5 yamt if (!returning) {
456 1.149.2.5 yamt newl->l_stime = tv;
457 1.149.2.5 yamt /*
458 1.149.2.5 yamt * XXX The following may be done unlocked if newl != NULL
459 1.149.2.5 yamt * above.
460 1.149.2.5 yamt */
461 1.149.2.4 yamt newl->l_priority = newl->l_usrpri;
462 1.149.2.4 yamt }
463 1.149.2.3 yamt
464 1.149.2.5 yamt spc->spc_curpriority = newl->l_usrpri;
465 1.149.2.5 yamt
466 1.149.2.4 yamt if (l != newl) {
467 1.149.2.4 yamt struct lwp *prevlwp;
468 1.149.2.3 yamt
469 1.149.2.4 yamt /*
470 1.149.2.4 yamt * If the old LWP has been moved to a run queue above,
471 1.149.2.4 yamt * drop the general purpose LWP lock: it's now locked
472 1.149.2.4 yamt * by the scheduler lock.
473 1.149.2.4 yamt *
474 1.149.2.4 yamt * Otherwise, drop the scheduler lock. We're done with
475 1.149.2.4 yamt * the run queues for now.
476 1.149.2.4 yamt */
477 1.149.2.4 yamt if (l->l_mutex == spc->spc_mutex) {
478 1.149.2.4 yamt mutex_spin_exit(&spc->spc_lwplock);
479 1.149.2.4 yamt } else {
480 1.149.2.4 yamt mutex_spin_exit(spc->spc_mutex);
481 1.149.2.4 yamt }
482 1.149.2.4 yamt
483 1.149.2.4 yamt /* Unlocked, but for statistics only. */
484 1.149.2.4 yamt uvmexp.swtch++;
485 1.149.2.4 yamt
486 1.149.2.5 yamt /*
487 1.149.2.5 yamt * Save old VM context, unless a soft interrupt
488 1.149.2.5 yamt * handler is blocking.
489 1.149.2.5 yamt */
490 1.149.2.5 yamt if (!returning)
491 1.149.2.5 yamt pmap_deactivate(l);
492 1.149.2.4 yamt
493 1.149.2.4 yamt /* Switch to the new LWP.. */
494 1.149.2.4 yamt l->l_ncsw++;
495 1.149.2.4 yamt l->l_flag &= ~LW_RUNNING;
496 1.149.2.5 yamt oldspl = MUTEX_SPIN_OLDSPL(ci);
497 1.149.2.4 yamt prevlwp = cpu_switchto(l, newl);
498 1.149.2.4 yamt
499 1.149.2.4 yamt /*
500 1.149.2.4 yamt * .. we have switched away and are now back so we must
501 1.149.2.4 yamt * be the new curlwp. prevlwp is who we replaced.
502 1.149.2.4 yamt */
503 1.149.2.4 yamt if (prevlwp != NULL) {
504 1.149.2.4 yamt curcpu()->ci_mtx_oldspl = oldspl;
505 1.149.2.4 yamt lwp_unlock(prevlwp);
506 1.149.2.4 yamt } else {
507 1.149.2.4 yamt splx(oldspl);
508 1.149.2.4 yamt }
509 1.149.2.3 yamt
510 1.149.2.4 yamt /* Restore VM context. */
511 1.149.2.4 yamt pmap_activate(l);
512 1.149.2.4 yamt retval = 1;
513 1.149.2.4 yamt } else {
514 1.149.2.4 yamt /* Nothing to do - just unlock and return. */
515 1.149.2.4 yamt mutex_spin_exit(spc->spc_mutex);
516 1.149.2.4 yamt lwp_unlock(l);
517 1.122 thorpej retval = 0;
518 1.122 thorpej }
519 1.110 briggs
520 1.149.2.4 yamt KASSERT(l == curlwp);
521 1.149.2.4 yamt KASSERT(l->l_stat == LSONPROC);
522 1.149.2.5 yamt KASSERT(l->l_cpu == curcpu());
523 1.149.2.4 yamt
524 1.110 briggs /*
525 1.149.2.3 yamt * XXXSMP If we are using h/w performance counters, restore context.
526 1.26 cgd */
527 1.114 gmcgarry #if PERFCTRS
528 1.149.2.3 yamt if (PMC_ENABLED(l->l_proc)) {
529 1.149.2.3 yamt pmc_restore_context(l->l_proc);
530 1.149.2.2 yamt }
531 1.114 gmcgarry #endif
532 1.110 briggs
533 1.110 briggs /*
534 1.76 thorpej * We're running again; record our new start time. We might
535 1.149.2.3 yamt * be running on a new CPU now, so don't use the cached
536 1.76 thorpej * schedstate_percpu pointer.
537 1.76 thorpej */
538 1.149.2.3 yamt SYSCALL_TIME_WAKEUP(l);
539 1.149.2.5 yamt KASSERT(curlwp == l);
540 1.122 thorpej KDASSERT(l->l_cpu == curcpu());
541 1.149.2.4 yamt LOCKDEBUG_BARRIER(NULL, 1);
542 1.149.2.2 yamt
543 1.122 thorpej return retval;
544 1.26 cgd }
545 1.26 cgd
546 1.26 cgd /*
547 1.149.2.3 yamt * Change process state to be runnable, placing it on the run queue if it is
548 1.149.2.3 yamt * in memory, and awakening the swapper if it isn't in memory.
549 1.149.2.3 yamt *
550 1.149.2.3 yamt * Call with the process and LWP locked. Will return with the LWP unlocked.
551 1.26 cgd */
552 1.26 cgd void
553 1.122 thorpej setrunnable(struct lwp *l)
554 1.26 cgd {
555 1.122 thorpej struct proc *p = l->l_proc;
556 1.149.2.3 yamt sigset_t *ss;
557 1.26 cgd
558 1.149.2.4 yamt KASSERT((l->l_flag & LW_IDLE) == 0);
559 1.149.2.3 yamt KASSERT(mutex_owned(&p->p_smutex));
560 1.149.2.3 yamt KASSERT(lwp_locked(l, NULL));
561 1.83 thorpej
562 1.122 thorpej switch (l->l_stat) {
563 1.122 thorpej case LSSTOP:
564 1.33 mycroft /*
565 1.33 mycroft * If we're being traced (possibly because someone attached us
566 1.33 mycroft * while we were stopped), check for a signal from the debugger.
567 1.33 mycroft */
568 1.149.2.3 yamt if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xstat != 0) {
569 1.149.2.3 yamt if ((sigprop[p->p_xstat] & SA_TOLWP) != 0)
570 1.149.2.3 yamt ss = &l->l_sigpend.sp_set;
571 1.149.2.3 yamt else
572 1.149.2.3 yamt ss = &p->p_sigpend.sp_set;
573 1.149.2.3 yamt sigaddset(ss, p->p_xstat);
574 1.149.2.3 yamt signotify(l);
575 1.53 mycroft }
576 1.149.2.3 yamt p->p_nrlwps++;
577 1.122 thorpej break;
578 1.122 thorpej case LSSUSPENDED:
579 1.149.2.3 yamt l->l_flag &= ~LW_WSUSPEND;
580 1.149.2.3 yamt p->p_nrlwps++;
581 1.149.2.4 yamt cv_broadcast(&p->p_lwpcv);
582 1.149.2.3 yamt break;
583 1.149.2.3 yamt case LSSLEEP:
584 1.149.2.3 yamt KASSERT(l->l_wchan != NULL);
585 1.26 cgd break;
586 1.149.2.3 yamt default:
587 1.149.2.3 yamt panic("setrunnable: lwp %p state was %d", l, l->l_stat);
588 1.26 cgd }
589 1.139 cl
590 1.149.2.3 yamt /*
591 1.149.2.3 yamt * If the LWP was sleeping interruptably, then it's OK to start it
592 1.149.2.3 yamt * again. If not, mark it as still sleeping.
593 1.149.2.3 yamt */
594 1.149.2.3 yamt if (l->l_wchan != NULL) {
595 1.149.2.3 yamt l->l_stat = LSSLEEP;
596 1.149.2.3 yamt /* lwp_unsleep() will release the lock. */
597 1.149.2.3 yamt lwp_unsleep(l);
598 1.149.2.3 yamt return;
599 1.149.2.3 yamt }
600 1.139 cl
601 1.149.2.3 yamt /*
602 1.149.2.3 yamt * If the LWP is still on the CPU, mark it as LSONPROC. It may be
603 1.149.2.3 yamt * about to call mi_switch(), in which case it will yield.
604 1.149.2.3 yamt */
605 1.149.2.4 yamt if ((l->l_flag & LW_RUNNING) != 0) {
606 1.149.2.3 yamt l->l_stat = LSONPROC;
607 1.149.2.3 yamt l->l_slptime = 0;
608 1.149.2.3 yamt lwp_unlock(l);
609 1.149.2.3 yamt return;
610 1.149.2.3 yamt }
611 1.122 thorpej
612 1.149.2.3 yamt /*
613 1.149.2.3 yamt * Set the LWP runnable. If it's swapped out, we need to wake the swapper
614 1.149.2.3 yamt * to bring it back in. Otherwise, enter it into a run queue.
615 1.149.2.3 yamt */
616 1.149.2.4 yamt if (l->l_mutex != l->l_cpu->ci_schedstate.spc_mutex) {
617 1.149.2.4 yamt spc_lock(l->l_cpu);
618 1.149.2.4 yamt lwp_unlock_to(l, l->l_cpu->ci_schedstate.spc_mutex);
619 1.149.2.4 yamt }
620 1.149.2.4 yamt
621 1.149.2.4 yamt sched_setrunnable(l);
622 1.149.2.3 yamt l->l_stat = LSRUN;
623 1.122 thorpej l->l_slptime = 0;
624 1.149.2.3 yamt
625 1.149.2.3 yamt if (l->l_flag & LW_INMEM) {
626 1.149.2.4 yamt sched_enqueue(l, false);
627 1.149.2.4 yamt resched_cpu(l);
628 1.149.2.3 yamt lwp_unlock(l);
629 1.149.2.3 yamt } else {
630 1.149.2.3 yamt lwp_unlock(l);
631 1.149.2.3 yamt uvm_kick_scheduler();
632 1.149.2.3 yamt }
633 1.26 cgd }
634 1.26 cgd
635 1.26 cgd /*
636 1.149.2.3 yamt * suspendsched:
637 1.149.2.3 yamt *
638 1.149.2.3 yamt * Convert all non-L_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED.
639 1.149.2.3 yamt */
640 1.94 bouyer void
641 1.149.2.3 yamt suspendsched(void)
642 1.94 bouyer {
643 1.149.2.3 yamt CPU_INFO_ITERATOR cii;
644 1.149.2.3 yamt struct cpu_info *ci;
645 1.122 thorpej struct lwp *l;
646 1.149.2.3 yamt struct proc *p;
647 1.94 bouyer
648 1.94 bouyer /*
649 1.149.2.3 yamt * We do this by process in order not to violate the locking rules.
650 1.94 bouyer */
651 1.149.2.3 yamt mutex_enter(&proclist_mutex);
652 1.149.2.3 yamt PROCLIST_FOREACH(p, &allproc) {
653 1.149.2.3 yamt mutex_enter(&p->p_smutex);
654 1.149.2.3 yamt
655 1.149.2.3 yamt if ((p->p_flag & PK_SYSTEM) != 0) {
656 1.149.2.3 yamt mutex_exit(&p->p_smutex);
657 1.94 bouyer continue;
658 1.149.2.3 yamt }
659 1.149.2.3 yamt
660 1.149.2.3 yamt p->p_stat = SSTOP;
661 1.149.2.3 yamt
662 1.149.2.3 yamt LIST_FOREACH(l, &p->p_lwps, l_sibling) {
663 1.149.2.3 yamt if (l == curlwp)
664 1.149.2.3 yamt continue;
665 1.149.2.3 yamt
666 1.149.2.3 yamt lwp_lock(l);
667 1.122 thorpej
668 1.97 enami /*
669 1.149.2.3 yamt * Set L_WREBOOT so that the LWP will suspend itself
670 1.149.2.3 yamt * when it tries to return to user mode. We want to
671 1.149.2.3 yamt * try and get to get as many LWPs as possible to
672 1.149.2.3 yamt * the user / kernel boundary, so that they will
673 1.149.2.3 yamt * release any locks that they hold.
674 1.97 enami */
675 1.149.2.3 yamt l->l_flag |= (LW_WREBOOT | LW_WSUSPEND);
676 1.149.2.3 yamt
677 1.149.2.3 yamt if (l->l_stat == LSSLEEP &&
678 1.149.2.3 yamt (l->l_flag & LW_SINTR) != 0) {
679 1.149.2.3 yamt /* setrunnable() will release the lock. */
680 1.149.2.3 yamt setrunnable(l);
681 1.149.2.3 yamt continue;
682 1.149.2.3 yamt }
683 1.149.2.3 yamt
684 1.149.2.3 yamt lwp_unlock(l);
685 1.94 bouyer }
686 1.149.2.3 yamt
687 1.149.2.3 yamt mutex_exit(&p->p_smutex);
688 1.94 bouyer }
689 1.149.2.3 yamt mutex_exit(&proclist_mutex);
690 1.149.2.3 yamt
691 1.149.2.3 yamt /*
692 1.149.2.3 yamt * Kick all CPUs to make them preempt any LWPs running in user mode.
693 1.149.2.3 yamt * They'll trap into the kernel and suspend themselves in userret().
694 1.149.2.3 yamt */
695 1.149.2.3 yamt for (CPU_INFO_FOREACH(cii, ci))
696 1.149.2.4 yamt cpu_need_resched(ci, 0);
697 1.149.2.3 yamt }
698 1.149.2.3 yamt
699 1.149.2.3 yamt /*
700 1.149.2.3 yamt * sched_kpri:
701 1.149.2.3 yamt *
702 1.149.2.3 yamt * Scale a priority level to a kernel priority level, usually
703 1.149.2.3 yamt * for an LWP that is about to sleep.
704 1.149.2.3 yamt */
705 1.149.2.4 yamt pri_t
706 1.149.2.3 yamt sched_kpri(struct lwp *l)
707 1.149.2.3 yamt {
708 1.149.2.3 yamt /*
709 1.149.2.3 yamt * Scale user priorities (127 -> 50) up to kernel priorities
710 1.149.2.3 yamt * in the range (49 -> 8). Reserve the top 8 kernel priorities
711 1.149.2.3 yamt * for high priority kthreads. Kernel priorities passed in
712 1.149.2.3 yamt * are left "as is". XXX This is somewhat arbitrary.
713 1.149.2.3 yamt */
714 1.149.2.3 yamt static const uint8_t kpri_tab[] = {
715 1.149.2.3 yamt 0, 1, 2, 3, 4, 5, 6, 7,
716 1.149.2.3 yamt 8, 9, 10, 11, 12, 13, 14, 15,
717 1.149.2.3 yamt 16, 17, 18, 19, 20, 21, 22, 23,
718 1.149.2.3 yamt 24, 25, 26, 27, 28, 29, 30, 31,
719 1.149.2.3 yamt 32, 33, 34, 35, 36, 37, 38, 39,
720 1.149.2.3 yamt 40, 41, 42, 43, 44, 45, 46, 47,
721 1.149.2.3 yamt 48, 49, 8, 8, 9, 9, 10, 10,
722 1.149.2.3 yamt 11, 11, 12, 12, 13, 14, 14, 15,
723 1.149.2.3 yamt 15, 16, 16, 17, 17, 18, 18, 19,
724 1.149.2.3 yamt 20, 20, 21, 21, 22, 22, 23, 23,
725 1.149.2.3 yamt 24, 24, 25, 26, 26, 27, 27, 28,
726 1.149.2.3 yamt 28, 29, 29, 30, 30, 31, 32, 32,
727 1.149.2.3 yamt 33, 33, 34, 34, 35, 35, 36, 36,
728 1.149.2.3 yamt 37, 38, 38, 39, 39, 40, 40, 41,
729 1.149.2.3 yamt 41, 42, 42, 43, 44, 44, 45, 45,
730 1.149.2.3 yamt 46, 46, 47, 47, 48, 48, 49, 49,
731 1.149.2.3 yamt };
732 1.149.2.3 yamt
733 1.149.2.4 yamt return (pri_t)kpri_tab[l->l_usrpri];
734 1.149.2.3 yamt }
735 1.149.2.3 yamt
736 1.149.2.3 yamt /*
737 1.149.2.3 yamt * sched_unsleep:
738 1.149.2.3 yamt *
739 1.149.2.3 yamt * The is called when the LWP has not been awoken normally but instead
740 1.149.2.3 yamt * interrupted: for example, if the sleep timed out. Because of this,
741 1.149.2.3 yamt * it's not a valid action for running or idle LWPs.
742 1.149.2.3 yamt */
743 1.149.2.4 yamt static void
744 1.149.2.3 yamt sched_unsleep(struct lwp *l)
745 1.149.2.3 yamt {
746 1.149.2.3 yamt
747 1.149.2.3 yamt lwp_unlock(l);
748 1.149.2.3 yamt panic("sched_unsleep");
749 1.149.2.3 yamt }
750 1.149.2.3 yamt
751 1.149.2.4 yamt inline void
752 1.149.2.4 yamt resched_cpu(struct lwp *l)
753 1.149.2.3 yamt {
754 1.149.2.4 yamt struct cpu_info *ci;
755 1.149.2.4 yamt const pri_t pri = lwp_eprio(l);
756 1.149.2.3 yamt
757 1.149.2.4 yamt /*
758 1.149.2.4 yamt * XXXSMP
759 1.149.2.4 yamt * Since l->l_cpu persists across a context switch,
760 1.149.2.4 yamt * this gives us *very weak* processor affinity, in
761 1.149.2.4 yamt * that we notify the CPU on which the process last
762 1.149.2.4 yamt * ran that it should try to switch.
763 1.149.2.4 yamt *
764 1.149.2.4 yamt * This does not guarantee that the process will run on
765 1.149.2.4 yamt * that processor next, because another processor might
766 1.149.2.4 yamt * grab it the next time it performs a context switch.
767 1.149.2.4 yamt *
768 1.149.2.4 yamt * This also does not handle the case where its last
769 1.149.2.4 yamt * CPU is running a higher-priority process, but every
770 1.149.2.4 yamt * other CPU is running a lower-priority process. There
771 1.149.2.4 yamt * are ways to handle this situation, but they're not
772 1.149.2.4 yamt * currently very pretty, and we also need to weigh the
773 1.149.2.4 yamt * cost of moving a process from one CPU to another.
774 1.149.2.4 yamt */
775 1.149.2.4 yamt ci = (l->l_cpu != NULL) ? l->l_cpu : curcpu();
776 1.149.2.4 yamt if (pri < ci->ci_schedstate.spc_curpriority)
777 1.149.2.4 yamt cpu_need_resched(ci, 0);
778 1.149.2.4 yamt }
779 1.149.2.3 yamt
780 1.149.2.4 yamt static void
781 1.149.2.4 yamt sched_changepri(struct lwp *l, pri_t pri)
782 1.149.2.4 yamt {
783 1.149.2.4 yamt
784 1.149.2.4 yamt KASSERT(lwp_locked(l, NULL));
785 1.149.2.3 yamt
786 1.149.2.4 yamt l->l_usrpri = pri;
787 1.149.2.3 yamt if (l->l_priority < PUSER)
788 1.149.2.3 yamt return;
789 1.149.2.4 yamt
790 1.149.2.4 yamt if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
791 1.149.2.3 yamt l->l_priority = pri;
792 1.149.2.3 yamt return;
793 1.149.2.1 yamt }
794 1.149.2.3 yamt
795 1.149.2.4 yamt KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
796 1.149.2.4 yamt
797 1.149.2.4 yamt sched_dequeue(l);
798 1.149.2.3 yamt l->l_priority = pri;
799 1.149.2.4 yamt sched_enqueue(l, false);
800 1.149.2.4 yamt resched_cpu(l);
801 1.149.2.1 yamt }
802 1.149.2.1 yamt
803 1.146 matt static void
804 1.149.2.4 yamt sched_lendpri(struct lwp *l, pri_t pri)
805 1.146 matt {
806 1.149.2.4 yamt
807 1.149.2.4 yamt KASSERT(lwp_locked(l, NULL));
808 1.149.2.4 yamt
809 1.149.2.4 yamt if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
810 1.149.2.4 yamt l->l_inheritedprio = pri;
811 1.149.2.4 yamt return;
812 1.146 matt }
813 1.149.2.4 yamt
814 1.149.2.4 yamt KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
815 1.149.2.4 yamt
816 1.149.2.4 yamt sched_dequeue(l);
817 1.149.2.4 yamt l->l_inheritedprio = pri;
818 1.149.2.4 yamt sched_enqueue(l, false);
819 1.149.2.4 yamt resched_cpu(l);
820 1.146 matt }
821 1.146 matt
822 1.149.2.4 yamt struct lwp *
823 1.149.2.4 yamt syncobj_noowner(wchan_t wchan)
824 1.113 gmcgarry {
825 1.149.2.3 yamt
826 1.149.2.4 yamt return NULL;
827 1.113 gmcgarry }
828 1.113 gmcgarry
829 1.149.2.4 yamt
830 1.149.2.4 yamt /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
831 1.149.2.4 yamt fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
832 1.149.2.4 yamt
833 1.149.2.3 yamt /*
834 1.149.2.4 yamt * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
835 1.149.2.4 yamt * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
836 1.149.2.4 yamt * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
837 1.149.2.4 yamt *
838 1.149.2.4 yamt * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
839 1.149.2.4 yamt * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
840 1.149.2.4 yamt *
841 1.149.2.4 yamt * If you dont want to bother with the faster/more-accurate formula, you
842 1.149.2.4 yamt * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
843 1.149.2.4 yamt * (more general) method of calculating the %age of CPU used by a process.
844 1.149.2.3 yamt */
845 1.149.2.4 yamt #define CCPU_SHIFT (FSHIFT + 1)
846 1.149.2.4 yamt
847 1.149.2.4 yamt /*
848 1.149.2.4 yamt * sched_pstats:
849 1.149.2.4 yamt *
850 1.149.2.4 yamt * Update process statistics and check CPU resource allocation.
851 1.149.2.4 yamt * Call scheduler-specific hook to eventually adjust process/LWP
852 1.149.2.4 yamt * priorities.
853 1.149.2.4 yamt */
854 1.149.2.4 yamt /* ARGSUSED */
855 1.113 gmcgarry void
856 1.149.2.4 yamt sched_pstats(void *arg)
857 1.113 gmcgarry {
858 1.149.2.4 yamt struct rlimit *rlim;
859 1.149.2.4 yamt struct lwp *l;
860 1.149.2.4 yamt struct proc *p;
861 1.149.2.4 yamt int minslp, sig, clkhz;
862 1.149.2.4 yamt long runtm;
863 1.149.2.3 yamt
864 1.149.2.4 yamt sched_pstats_ticks++;
865 1.149.2.3 yamt
866 1.149.2.4 yamt mutex_enter(&proclist_mutex);
867 1.149.2.4 yamt PROCLIST_FOREACH(p, &allproc) {
868 1.149.2.4 yamt /*
869 1.149.2.4 yamt * Increment time in/out of memory and sleep time (if
870 1.149.2.4 yamt * sleeping). We ignore overflow; with 16-bit int's
871 1.149.2.4 yamt * (remember them?) overflow takes 45 days.
872 1.149.2.4 yamt */
873 1.149.2.4 yamt minslp = 2;
874 1.149.2.4 yamt mutex_enter(&p->p_smutex);
875 1.149.2.4 yamt mutex_spin_enter(&p->p_stmutex);
876 1.149.2.4 yamt runtm = p->p_rtime.tv_sec;
877 1.149.2.4 yamt LIST_FOREACH(l, &p->p_lwps, l_sibling) {
878 1.149.2.4 yamt if ((l->l_flag & LW_IDLE) != 0)
879 1.149.2.4 yamt continue;
880 1.149.2.4 yamt lwp_lock(l);
881 1.149.2.4 yamt runtm += l->l_rtime.tv_sec;
882 1.149.2.4 yamt l->l_swtime++;
883 1.149.2.4 yamt if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
884 1.149.2.4 yamt l->l_stat == LSSUSPENDED) {
885 1.149.2.4 yamt l->l_slptime++;
886 1.149.2.4 yamt minslp = min(minslp, l->l_slptime);
887 1.149.2.4 yamt } else
888 1.149.2.4 yamt minslp = 0;
889 1.149.2.5 yamt sched_pstats_hook(l);
890 1.149.2.4 yamt lwp_unlock(l);
891 1.149.2.4 yamt
892 1.149.2.4 yamt /*
893 1.149.2.4 yamt * p_pctcpu is only for ps.
894 1.149.2.4 yamt */
895 1.149.2.4 yamt l->l_pctcpu = (l->l_pctcpu * ccpu) >> FSHIFT;
896 1.149.2.4 yamt if (l->l_slptime < 1) {
897 1.149.2.4 yamt clkhz = stathz != 0 ? stathz : hz;
898 1.149.2.4 yamt #if (FSHIFT >= CCPU_SHIFT)
899 1.149.2.4 yamt l->l_pctcpu += (clkhz == 100) ?
900 1.149.2.4 yamt ((fixpt_t)l->l_cpticks) <<
901 1.149.2.4 yamt (FSHIFT - CCPU_SHIFT) :
902 1.149.2.4 yamt 100 * (((fixpt_t) p->p_cpticks)
903 1.149.2.4 yamt << (FSHIFT - CCPU_SHIFT)) / clkhz;
904 1.149.2.4 yamt #else
905 1.149.2.4 yamt l->l_pctcpu += ((FSCALE - ccpu) *
906 1.149.2.4 yamt (l->l_cpticks * FSCALE / clkhz)) >> FSHIFT;
907 1.146 matt #endif
908 1.149.2.4 yamt l->l_cpticks = 0;
909 1.149.2.4 yamt }
910 1.149.2.4 yamt }
911 1.149.2.5 yamt
912 1.149.2.4 yamt p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
913 1.149.2.5 yamt #ifdef SCHED_4BSD
914 1.149.2.5 yamt /*
915 1.149.2.5 yamt * XXX: Workaround - belongs to sched_4bsd.c
916 1.149.2.5 yamt * If the process has slept the entire second,
917 1.149.2.5 yamt * stop recalculating its priority until it wakes up.
918 1.149.2.5 yamt */
919 1.149.2.5 yamt if (minslp <= 1) {
920 1.149.2.5 yamt extern fixpt_t decay_cpu(fixpt_t, fixpt_t);
921 1.149.2.5 yamt
922 1.149.2.5 yamt fixpt_t loadfac = 2 * (averunnable.ldavg[0]);
923 1.149.2.5 yamt p->p_estcpu = decay_cpu(loadfac, p->p_estcpu);
924 1.149.2.5 yamt }
925 1.149.2.5 yamt #endif
926 1.149.2.4 yamt mutex_spin_exit(&p->p_stmutex);
927 1.149.2.3 yamt
928 1.149.2.4 yamt /*
929 1.149.2.4 yamt * Check if the process exceeds its CPU resource allocation.
930 1.149.2.4 yamt * If over max, kill it.
931 1.149.2.4 yamt */
932 1.149.2.4 yamt rlim = &p->p_rlimit[RLIMIT_CPU];
933 1.149.2.4 yamt sig = 0;
934 1.149.2.4 yamt if (runtm >= rlim->rlim_cur) {
935 1.149.2.4 yamt if (runtm >= rlim->rlim_max)
936 1.149.2.4 yamt sig = SIGKILL;
937 1.149.2.4 yamt else {
938 1.149.2.4 yamt sig = SIGXCPU;
939 1.149.2.4 yamt if (rlim->rlim_cur < rlim->rlim_max)
940 1.149.2.4 yamt rlim->rlim_cur += 5;
941 1.149.2.4 yamt }
942 1.149.2.4 yamt }
943 1.149.2.4 yamt mutex_exit(&p->p_smutex);
944 1.149.2.4 yamt if (sig) {
945 1.149.2.4 yamt psignal(p, sig);
946 1.149.2.4 yamt }
947 1.149.2.3 yamt }
948 1.149.2.4 yamt mutex_exit(&proclist_mutex);
949 1.149.2.4 yamt uvm_meter();
950 1.149.2.4 yamt cv_wakeup(&lbolt);
951 1.149.2.4 yamt callout_schedule(&sched_pstats_ch, hz);
952 1.113 gmcgarry }
953 1.113 gmcgarry
954 1.149.2.4 yamt void
955 1.149.2.4 yamt sched_init(void)
956 1.149.2.4 yamt {
957 1.149.2.4 yamt
958 1.149.2.4 yamt cv_init(&lbolt, "lbolt");
959 1.149.2.4 yamt callout_init(&sched_pstats_ch, 0);
960 1.149.2.4 yamt callout_setfunc(&sched_pstats_ch, sched_pstats, NULL);
961 1.149.2.4 yamt sched_setup();
962 1.149.2.4 yamt sched_pstats(NULL);
963 1.149.2.4 yamt }
964