kern_synch.c revision 1.78.2.5 1 /* $NetBSD: kern_synch.c,v 1.78.2.5 2000/09/26 02:07:17 enami Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*-
41 * Copyright (c) 1982, 1986, 1990, 1991, 1993
42 * The Regents of the University of California. All rights reserved.
43 * (c) UNIX System Laboratories, Inc.
44 * All or some portions of this file are derived from material licensed
45 * to the University of California by American Telephone and Telegraph
46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47 * the permission of UNIX System Laboratories, Inc.
48 *
49 * Redistribution and use in source and binary forms, with or without
50 * modification, are permitted provided that the following conditions
51 * are met:
52 * 1. Redistributions of source code must retain the above copyright
53 * notice, this list of conditions and the following disclaimer.
54 * 2. Redistributions in binary form must reproduce the above copyright
55 * notice, this list of conditions and the following disclaimer in the
56 * documentation and/or other materials provided with the distribution.
57 * 3. All advertising materials mentioning features or use of this software
58 * must display the following acknowledgement:
59 * This product includes software developed by the University of
60 * California, Berkeley and its contributors.
61 * 4. Neither the name of the University nor the names of its contributors
62 * may be used to endorse or promote products derived from this software
63 * without specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
75 * SUCH DAMAGE.
76 *
77 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95
78 */
79
80 #include "opt_ddb.h"
81 #include "opt_ktrace.h"
82 #include "opt_lockdebug.h"
83
84 #include <sys/param.h>
85 #include <sys/systm.h>
86 #include <sys/callout.h>
87 #include <sys/proc.h>
88 #include <sys/kernel.h>
89 #include <sys/buf.h>
90 #include <sys/signalvar.h>
91 #include <sys/resourcevar.h>
92 #include <vm/vm.h>
93 #include <sys/sched.h>
94
95 #include <uvm/uvm_extern.h>
96
97 #ifdef KTRACE
98 #include <sys/ktrace.h>
99 #endif
100
101 #include <machine/cpu.h>
102
103 int lbolt; /* once a second sleep address */
104
105 /*
106 * The global scheduler state.
107 */
108 struct prochd sched_qs[RUNQUE_NQS]; /* run queues */
109 __volatile u_int32_t sched_whichqs; /* bitmap of non-empty queues */
110 struct slpque sched_slpque[SLPQUE_TABLESIZE]; /* sleep queues */
111
112 void roundrobin(void *);
113 void schedcpu(void *);
114 void updatepri(struct proc *);
115 void endtsleep(void *);
116
117 __inline void awaken(struct proc *);
118
119 struct callout roundrobin_ch = CALLOUT_INITIALIZER;
120 struct callout schedcpu_ch = CALLOUT_INITIALIZER;
121
122 /*
123 * Force switch among equal priority processes every 100ms.
124 */
125 /* ARGSUSED */
126 void
127 roundrobin(void *arg)
128 {
129 struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
130 int s;
131
132 if (curproc != NULL) {
133 s = splstatclock();
134 if (spc->spc_flags & SPCF_SEENRR) {
135 /*
136 * The process has already been through a roundrobin
137 * without switching and may be hogging the CPU.
138 * Indicate that the process should yield.
139 */
140 spc->spc_flags |= SPCF_SHOULDYIELD;
141 } else
142 spc->spc_flags |= SPCF_SEENRR;
143 splx(s);
144 }
145 need_resched();
146 callout_reset(&roundrobin_ch, hz / 10, roundrobin, NULL);
147 }
148
149 /*
150 * Constants for digital decay and forget:
151 * 90% of (p_estcpu) usage in 5 * loadav time
152 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive)
153 * Note that, as ps(1) mentions, this can let percentages
154 * total over 100% (I've seen 137.9% for 3 processes).
155 *
156 * Note that hardclock updates p_estcpu and p_cpticks independently.
157 *
158 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
159 * That is, the system wants to compute a value of decay such
160 * that the following for loop:
161 * for (i = 0; i < (5 * loadavg); i++)
162 * p_estcpu *= decay;
163 * will compute
164 * p_estcpu *= 0.1;
165 * for all values of loadavg:
166 *
167 * Mathematically this loop can be expressed by saying:
168 * decay ** (5 * loadavg) ~= .1
169 *
170 * The system computes decay as:
171 * decay = (2 * loadavg) / (2 * loadavg + 1)
172 *
173 * We wish to prove that the system's computation of decay
174 * will always fulfill the equation:
175 * decay ** (5 * loadavg) ~= .1
176 *
177 * If we compute b as:
178 * b = 2 * loadavg
179 * then
180 * decay = b / (b + 1)
181 *
182 * We now need to prove two things:
183 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
184 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
185 *
186 * Facts:
187 * For x close to zero, exp(x) =~ 1 + x, since
188 * exp(x) = 0! + x**1/1! + x**2/2! + ... .
189 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
190 * For x close to zero, ln(1+x) =~ x, since
191 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1
192 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
193 * ln(.1) =~ -2.30
194 *
195 * Proof of (1):
196 * Solve (factor)**(power) =~ .1 given power (5*loadav):
197 * solving for factor,
198 * ln(factor) =~ (-2.30/5*loadav), or
199 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
200 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED
201 *
202 * Proof of (2):
203 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
204 * solving for power,
205 * power*ln(b/(b+1)) =~ -2.30, or
206 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED
207 *
208 * Actual power values for the implemented algorithm are as follows:
209 * loadav: 1 2 3 4
210 * power: 5.68 10.32 14.94 19.55
211 */
212
213 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
214 #define loadfactor(loadav) (2 * (loadav))
215 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
216
217 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
218 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
219
220 /*
221 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
222 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
223 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
224 *
225 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
226 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
227 *
228 * If you dont want to bother with the faster/more-accurate formula, you
229 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
230 * (more general) method of calculating the %age of CPU used by a process.
231 */
232 #define CCPU_SHIFT 11
233
234 /*
235 * Recompute process priorities, every hz ticks.
236 */
237 /* ARGSUSED */
238 void
239 schedcpu(void *arg)
240 {
241 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
242 struct proc *p;
243 int s;
244 unsigned int newcpu;
245 int clkhz;
246
247 proclist_lock_read();
248 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
249 /*
250 * Increment time in/out of memory and sleep time
251 * (if sleeping). We ignore overflow; with 16-bit int's
252 * (remember them?) overflow takes 45 days.
253 */
254 p->p_swtime++;
255 if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
256 p->p_slptime++;
257 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
258 /*
259 * If the process has slept the entire second,
260 * stop recalculating its priority until it wakes up.
261 */
262 if (p->p_slptime > 1)
263 continue;
264 s = splstatclock(); /* prevent state changes */
265 /*
266 * p_pctcpu is only for ps.
267 */
268 clkhz = stathz != 0 ? stathz : hz;
269 #if (FSHIFT >= CCPU_SHIFT)
270 p->p_pctcpu += (clkhz == 100)?
271 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
272 100 * (((fixpt_t) p->p_cpticks)
273 << (FSHIFT - CCPU_SHIFT)) / clkhz;
274 #else
275 p->p_pctcpu += ((FSCALE - ccpu) *
276 (p->p_cpticks * FSCALE / clkhz)) >> FSHIFT;
277 #endif
278 p->p_cpticks = 0;
279 newcpu = (u_int)decay_cpu(loadfac, p->p_estcpu);
280 p->p_estcpu = newcpu;
281 resetpriority(p);
282 if (p->p_priority >= PUSER) {
283 if (p->p_stat == SRUN &&
284 (p->p_flag & P_INMEM) &&
285 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) {
286 remrunqueue(p);
287 p->p_priority = p->p_usrpri;
288 setrunqueue(p);
289 } else
290 p->p_priority = p->p_usrpri;
291 }
292 splx(s);
293 }
294 proclist_unlock_read();
295 uvm_meter();
296 wakeup((caddr_t)&lbolt);
297 callout_reset(&schedcpu_ch, hz, schedcpu, NULL);
298 }
299
300 /*
301 * Recalculate the priority of a process after it has slept for a while.
302 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
303 * least six times the loadfactor will decay p_estcpu to zero.
304 */
305 void
306 updatepri(struct proc *p)
307 {
308 unsigned int newcpu = p->p_estcpu;
309 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
310
311 if (p->p_slptime > 5 * loadfac)
312 p->p_estcpu = 0;
313 else {
314 p->p_slptime--; /* the first time was done in schedcpu */
315 while (newcpu && --p->p_slptime)
316 newcpu = (int) decay_cpu(loadfac, newcpu);
317 p->p_estcpu = newcpu;
318 }
319 resetpriority(p);
320 }
321
322 /*
323 * During autoconfiguration or after a panic, a sleep will simply
324 * lower the priority briefly to allow interrupts, then return.
325 * The priority to be used (safepri) is machine-dependent, thus this
326 * value is initialized and maintained in the machine-dependent layers.
327 * This priority will typically be 0, or the lowest priority
328 * that is safe for use on the interrupt stack; it can be made
329 * higher to block network software interrupts after panics.
330 */
331 int safepri;
332
333 /*
334 * General sleep call. Suspends the current process until a wakeup is
335 * performed on the specified identifier. The process will then be made
336 * runnable with the specified priority. Sleeps at most timo/hz seconds
337 * (0 means no timeout). If pri includes PCATCH flag, signals are checked
338 * before and after sleeping, else signals are not checked. Returns 0 if
339 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a
340 * signal needs to be delivered, ERESTART is returned if the current system
341 * call should be restarted if possible, and EINTR is returned if the system
342 * call should be interrupted by the signal (return EINTR).
343 *
344 * The interlock is held until the scheduler_slock is held. The
345 * interlock will be locked before returning back to the caller
346 * unless the PNORELOCK flag is specified, in which case the
347 * interlock will always be unlocked upon return.
348 */
349 int
350 ltsleep(void *ident, int priority, const char *wmesg, int timo,
351 __volatile struct simplelock *interlock)
352 {
353 struct proc *p = curproc;
354 struct slpque *qp;
355 int sig, s;
356 int catch = priority & PCATCH;
357 int relock = (priority & PNORELOCK) == 0;
358 #if 0 /* XXXSMP */
359 int dobiglock;
360 #endif
361
362 /*
363 * XXXSMP
364 * This is probably bogus. Figure out what the right
365 * thing to do here really is.
366 * Note that not sleeping if ltsleep is called with curproc == NULL
367 * in the shutdown case is disgusting but partly necessary given
368 * how shutdown (barely) works.
369 */
370 if (cold || (doing_shutdown && (panicstr || (p == NULL)))) {
371 /*
372 * After a panic, or during autoconfiguration,
373 * just give interrupts a chance, then just return;
374 * don't run any other procs or panic below,
375 * in case this is the idle process and already asleep.
376 */
377 s = splhigh();
378 splx(safepri);
379 splx(s);
380 if (interlock != NULL && relock == 0)
381 simple_unlock(interlock);
382 return (0);
383 }
384
385 #if 0 /* XXXSMP */
386 dobiglock = (p->p_flags & P_BIGLOCK) != 0;
387 #endif
388
389 #ifdef KTRACE
390 if (KTRPOINT(p, KTR_CSW))
391 ktrcsw(p, 1, 0);
392 #endif
393
394 s = splhigh(); /* XXXSMP: SCHED_LOCK(s) */
395
396 #ifdef DIAGNOSTIC
397 if (ident == NULL)
398 panic("ltsleep: ident == NULL");
399 if (p->p_stat != SONPROC)
400 panic("ltsleep: p_stat %d != SONPROC", p->p_stat);
401 if (p->p_back != NULL)
402 panic("ltsleep: p_back != NULL");
403 #endif
404
405 p->p_wchan = ident;
406 p->p_wmesg = wmesg;
407 p->p_slptime = 0;
408 p->p_priority = priority & PRIMASK;
409
410 qp = SLPQUE(ident);
411 if (qp->sq_head == 0)
412 qp->sq_head = p;
413 else
414 *qp->sq_tailp = p;
415 *(qp->sq_tailp = &p->p_forw) = 0;
416
417 if (timo)
418 callout_reset(&p->p_tsleep_ch, timo, endtsleep, p);
419
420 /*
421 * We can now release the interlock; the scheduler_slock
422 * is held, so a thread can't get in to do wakeup() before
423 * we do the switch.
424 *
425 * XXX We leave the code block here, after inserting ourselves
426 * on the sleep queue, because we might want a more clever
427 * data structure for the sleep queues at some point.
428 */
429 if (interlock != NULL)
430 simple_unlock(interlock);
431
432 /*
433 * We put ourselves on the sleep queue and start our timeout
434 * before calling CURSIG, as we could stop there, and a wakeup
435 * or a SIGCONT (or both) could occur while we were stopped.
436 * A SIGCONT would cause us to be marked as SSLEEP
437 * without resuming us, thus we must be ready for sleep
438 * when CURSIG is called. If the wakeup happens while we're
439 * stopped, p->p_wchan will be 0 upon return from CURSIG.
440 */
441 if (catch) {
442 p->p_flag |= P_SINTR;
443 if ((sig = CURSIG(p)) != 0) {
444 if (p->p_wchan != NULL)
445 unsleep(p);
446 p->p_stat = SONPROC;
447 #if 0 /* XXXSMP */
448 /*
449 * We're going to skip the unlock, so
450 * we don't need to relock after resume.
451 */
452 dobiglock = 0;
453 #endif
454 goto resume;
455 }
456 if (p->p_wchan == NULL) {
457 catch = 0;
458 #if 0 /* XXXSMP */
459 /* See above. */
460 dobiglock = 0;
461 #endif
462 goto resume;
463 }
464 } else
465 sig = 0;
466 p->p_stat = SSLEEP;
467 p->p_stats->p_ru.ru_nvcsw++;
468
469 #if 0 /* XXXSMP */
470 if (dobiglock) {
471 /*
472 * Release the kernel_lock, as we are about to
473 * yield the CPU. The scheduler_slock is still
474 * held until cpu_switch() selects a new process
475 * and removes it from the run queue.
476 */
477 kernel_lock_release();
478 }
479 #endif
480
481 /* scheduler_slock held */
482 mi_switch(p);
483 /* scheduler_slock held */
484 #ifdef DDB
485 /* handy breakpoint location after process "wakes" */
486 asm(".globl bpendtsleep ; bpendtsleep:");
487 #endif
488
489 resume:
490 KDASSERT(p->p_cpu != NULL);
491 KDASSERT(p->p_cpu == curcpu());
492 p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
493 splx(s); /* XXXSMP: SCHED_UNLOCK(s) */
494 #if 0 /* XXXSMP */
495 if (dobiglock) {
496 /*
497 * Reacquire the kernel_lock now. We do this after
498 * we've released scheduler_slock to avoid deadlock.
499 */
500 kernel_lock_acquire(LK_EXCLUSIVE);
501 }
502 #endif
503 p->p_flag &= ~P_SINTR;
504 if (p->p_flag & P_TIMEOUT) {
505 p->p_flag &= ~P_TIMEOUT;
506 if (sig == 0) {
507 #ifdef KTRACE
508 if (KTRPOINT(p, KTR_CSW))
509 ktrcsw(p, 0, 0);
510 #endif
511 if (relock && interlock != NULL)
512 simple_lock(interlock);
513 return (EWOULDBLOCK);
514 }
515 } else if (timo)
516 callout_stop(&p->p_tsleep_ch);
517 if (catch && (sig != 0 || (sig = CURSIG(p)) != 0)) {
518 #ifdef KTRACE
519 if (KTRPOINT(p, KTR_CSW))
520 ktrcsw(p, 0, 0);
521 #endif
522 if (relock && interlock != NULL)
523 simple_lock(interlock);
524 if ((p->p_sigacts->ps_sigact[sig].sa_flags & SA_RESTART) == 0)
525 return (EINTR);
526 return (ERESTART);
527 }
528 #ifdef KTRACE
529 if (KTRPOINT(p, KTR_CSW))
530 ktrcsw(p, 0, 0);
531 #endif
532 if (relock && interlock != NULL)
533 simple_lock(interlock);
534 return (0);
535 }
536
537 /*
538 * Implement timeout for tsleep.
539 * If process hasn't been awakened (wchan non-zero),
540 * set timeout flag and undo the sleep. If proc
541 * is stopped, just unsleep so it will remain stopped.
542 */
543 void
544 endtsleep(void *arg)
545 {
546 struct proc *p;
547 int s;
548
549 p = (struct proc *)arg;
550 s = splhigh();
551 if (p->p_wchan) {
552 if (p->p_stat == SSLEEP)
553 setrunnable(p);
554 else
555 unsleep(p);
556 p->p_flag |= P_TIMEOUT;
557 }
558 splx(s);
559 }
560
561 /*
562 * Remove a process from its wait queue
563 */
564 void
565 unsleep(struct proc *p)
566 {
567 struct slpque *qp;
568 struct proc **hp;
569 int s;
570
571 s = splhigh();
572 if (p->p_wchan) {
573 hp = &(qp = SLPQUE(p->p_wchan))->sq_head;
574 while (*hp != p)
575 hp = &(*hp)->p_forw;
576 *hp = p->p_forw;
577 if (qp->sq_tailp == &p->p_forw)
578 qp->sq_tailp = hp;
579 p->p_wchan = 0;
580 }
581 splx(s);
582 }
583
584 /*
585 * Optimized-for-wakeup() version of setrunnable().
586 */
587 __inline void
588 awaken(struct proc *p)
589 {
590
591 if (p->p_slptime > 1)
592 updatepri(p);
593 p->p_slptime = 0;
594 p->p_stat = SRUN;
595
596 /*
597 * Since curpriority is a user priority, p->p_priority
598 * is always better than curpriority.
599 */
600 if (p->p_flag & P_INMEM) {
601 setrunqueue(p);
602 need_resched();
603 } else
604 wakeup(&proc0);
605 }
606
607 /*
608 * Make all processes sleeping on the specified identifier runnable.
609 */
610 void
611 wakeup(void *ident)
612 {
613 struct slpque *qp;
614 struct proc *p, **q;
615 int s;
616
617 s = splhigh(); /* XXXSMP: SCHED_LOCK(s) */
618
619 qp = SLPQUE(ident);
620 restart:
621 for (q = &qp->sq_head; (p = *q) != NULL; ) {
622 #ifdef DIAGNOSTIC
623 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP))
624 panic("wakeup");
625 #endif
626 if (p->p_wchan == ident) {
627 p->p_wchan = 0;
628 *q = p->p_forw;
629 if (qp->sq_tailp == &p->p_forw)
630 qp->sq_tailp = q;
631 if (p->p_stat == SSLEEP) {
632 awaken(p);
633 goto restart;
634 }
635 } else
636 q = &p->p_forw;
637 }
638 splx(s); /* XXXSMP: SCHED_UNLOCK(s) */
639 }
640
641 /*
642 * Make the highest priority process first in line on the specified
643 * identifier runnable.
644 */
645 void
646 wakeup_one(void *ident)
647 {
648 struct slpque *qp;
649 struct proc *p, **q;
650 struct proc *best_sleepp, **best_sleepq;
651 struct proc *best_stopp, **best_stopq;
652 int s;
653
654 best_sleepp = best_stopp = NULL;
655 best_sleepq = best_stopq = NULL;
656
657 s = splhigh(); /* XXXSMP: SCHED_LOCK(s) */
658
659 qp = SLPQUE(ident);
660
661 for (q = &qp->sq_head; (p = *q) != NULL; q = &p->p_forw) {
662 #ifdef DIAGNOSTIC
663 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP))
664 panic("wakeup_one");
665 #endif
666 if (p->p_wchan == ident) {
667 if (p->p_stat == SSLEEP) {
668 if (best_sleepp == NULL ||
669 p->p_priority < best_sleepp->p_priority) {
670 best_sleepp = p;
671 best_sleepq = q;
672 }
673 } else {
674 if (best_stopp == NULL ||
675 p->p_priority < best_stopp->p_priority) {
676 best_stopp = p;
677 best_stopq = q;
678 }
679 }
680 }
681 }
682
683 /*
684 * Consider any SSLEEP process higher than the highest priority SSTOP
685 * process.
686 */
687 if (best_sleepp != NULL) {
688 p = best_sleepp;
689 q = best_sleepq;
690 } else {
691 p = best_stopp;
692 q = best_stopq;
693 }
694
695 if (p != NULL) {
696 p->p_wchan = NULL;
697 *q = p->p_forw;
698 if (qp->sq_tailp == &p->p_forw)
699 qp->sq_tailp = q;
700 if (p->p_stat == SSLEEP)
701 awaken(p);
702 }
703 splx(s); /* XXXSMP: SCHED_UNLOCK(s) */
704 }
705
706 /*
707 * General yield call. Puts the current process back on its run queue and
708 * performs a voluntary context switch.
709 */
710 void
711 yield(void)
712 {
713 struct proc *p = curproc;
714 int s;
715
716 s = splstatclock();
717 p->p_priority = p->p_usrpri;
718 p->p_stat = SRUN;
719 setrunqueue(p);
720 p->p_stats->p_ru.ru_nvcsw++;
721 mi_switch(p);
722 splx(s);
723 }
724
725 /*
726 * General preemption call. Puts the current process back on its run queue
727 * and performs an involuntary context switch. If a process is supplied,
728 * we switch to that process. Otherwise, we use the normal process selection
729 * criteria.
730 */
731 void
732 preempt(struct proc *newp)
733 {
734 struct proc *p = curproc;
735 int s;
736
737 /*
738 * XXX Switching to a specific process is not supported yet.
739 */
740 if (newp != NULL)
741 panic("preempt: cpu_preempt not yet implemented");
742
743 s = splstatclock();
744 p->p_priority = p->p_usrpri;
745 p->p_stat = SRUN;
746 setrunqueue(p);
747 p->p_stats->p_ru.ru_nivcsw++;
748 mi_switch(p);
749 splx(s);
750 }
751
752 /*
753 * The machine independent parts of context switch.
754 * Must be called at splstatclock() or higher.
755 */
756 void
757 mi_switch(struct proc *p)
758 {
759 struct schedstate_percpu *spc;
760 struct rlimit *rlim;
761 long s, u;
762 struct timeval tv;
763
764 KDASSERT(p->p_cpu != NULL);
765 KDASSERT(p->p_cpu == curcpu());
766
767 spc = &p->p_cpu->ci_schedstate;
768
769 #if defined(LOCKDEBUG) || defined(DIAGNOSTIC)
770 spinlock_switchcheck();
771 #endif
772 #ifdef LOCKDEBUG
773 simple_lock_switchcheck();
774 #endif
775
776 /*
777 * Compute the amount of time during which the current
778 * process was running, and add that to its total so far.
779 */
780 microtime(&tv);
781 u = p->p_rtime.tv_usec + (tv.tv_usec - spc->spc_runtime.tv_usec);
782 s = p->p_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
783 if (u < 0) {
784 u += 1000000;
785 s--;
786 } else if (u >= 1000000) {
787 u -= 1000000;
788 s++;
789 }
790 p->p_rtime.tv_usec = u;
791 p->p_rtime.tv_sec = s;
792
793 /*
794 * Check if the process exceeds its cpu resource allocation.
795 * If over max, kill it. In any case, if it has run for more
796 * than 10 minutes, reduce priority to give others a chance.
797 */
798 rlim = &p->p_rlimit[RLIMIT_CPU];
799 if (s >= rlim->rlim_cur) {
800 if (s >= rlim->rlim_max)
801 psignal(p, SIGKILL);
802 else {
803 psignal(p, SIGXCPU);
804 if (rlim->rlim_cur < rlim->rlim_max)
805 rlim->rlim_cur += 5;
806 }
807 }
808 if (autonicetime && s > autonicetime && p->p_ucred->cr_uid &&
809 p->p_nice == NZERO) {
810 p->p_nice = autoniceval + NZERO;
811 resetpriority(p);
812 }
813
814 /*
815 * Process is about to yield the CPU; clear the appropriate
816 * scheduling flags.
817 */
818 spc->spc_flags &= ~SPCF_SWITCHCLEAR;
819
820 /*
821 * Pick a new current process and switch to it. When we
822 * run again, we'll return back here.
823 */
824 uvmexp.swtch++;
825 cpu_switch(p);
826
827 /*
828 * We're running again; record our new start time. We might
829 * be running on a new CPU now, so don't use the cache'd
830 * schedstate_percpu pointer.
831 */
832 KDASSERT(p->p_cpu != NULL);
833 KDASSERT(p->p_cpu == curcpu());
834 microtime(&p->p_cpu->ci_schedstate.spc_runtime);
835 }
836
837 /*
838 * Initialize the (doubly-linked) run queues
839 * to be empty.
840 */
841 void
842 rqinit()
843 {
844 int i;
845
846 for (i = 0; i < RUNQUE_NQS; i++)
847 sched_qs[i].ph_link = sched_qs[i].ph_rlink =
848 (struct proc *)&sched_qs[i];
849 }
850
851 /*
852 * Change process state to be runnable,
853 * placing it on the run queue if it is in memory,
854 * and awakening the swapper if it isn't in memory.
855 */
856 void
857 setrunnable(struct proc *p)
858 {
859 int s;
860
861 s = splhigh();
862 switch (p->p_stat) {
863 case 0:
864 case SRUN:
865 case SONPROC:
866 case SZOMB:
867 case SDEAD:
868 default:
869 panic("setrunnable");
870 case SSTOP:
871 /*
872 * If we're being traced (possibly because someone attached us
873 * while we were stopped), check for a signal from the debugger.
874 */
875 if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0) {
876 sigaddset(&p->p_siglist, p->p_xstat);
877 p->p_sigcheck = 1;
878 }
879 case SSLEEP:
880 unsleep(p); /* e.g. when sending signals */
881 break;
882
883 case SIDL:
884 break;
885 }
886 p->p_stat = SRUN;
887 if (p->p_flag & P_INMEM)
888 setrunqueue(p);
889 splx(s);
890 if (p->p_slptime > 1)
891 updatepri(p);
892 p->p_slptime = 0;
893 if ((p->p_flag & P_INMEM) == 0)
894 wakeup((caddr_t)&proc0);
895 else if (p->p_priority < curcpu()->ci_schedstate.spc_curpriority) {
896 /*
897 * XXXSMP
898 * This is wrong. It will work, but what really
899 * needs to happen is:
900 *
901 * - Need to check if p is higher priority
902 * than the process currently running on
903 * the CPU p last ran on (let p_cpu persist
904 * after a context switch?), and preempt
905 * that one (or, if there is no process
906 * there, simply need_resched() that CPU.
907 *
908 * - Failing that, traverse a list of
909 * available CPUs and need_resched() the
910 * CPU with the lowest priority that's
911 * lower than p's.
912 */
913 need_resched();
914 }
915 }
916
917 /*
918 * Compute the priority of a process when running in user mode.
919 * Arrange to reschedule if the resulting priority is better
920 * than that of the current process.
921 */
922 void
923 resetpriority(struct proc *p)
924 {
925 unsigned int newpriority;
926
927 newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO);
928 newpriority = min(newpriority, MAXPRI);
929 p->p_usrpri = newpriority;
930 if (newpriority < curcpu()->ci_schedstate.spc_curpriority) {
931 /*
932 * XXXSMP
933 * Same applies as in setrunnable() above.
934 */
935 need_resched();
936 }
937 }
938
939 /*
940 * We adjust the priority of the current process. The priority of a process
941 * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu)
942 * is increased here. The formula for computing priorities (in kern_synch.c)
943 * will compute a different value each time p_estcpu increases. This can
944 * cause a switch, but unless the priority crosses a PPQ boundary the actual
945 * queue will not change. The cpu usage estimator ramps up quite quickly
946 * when the process is running (linearly), and decays away exponentially, at
947 * a rate which is proportionally slower when the system is busy. The basic
948 * principal is that the system will 90% forget that the process used a lot
949 * of CPU time in 5 * loadav seconds. This causes the system to favor
950 * processes which haven't run much recently, and to round-robin among other
951 * processes.
952 */
953
954 void
955 schedclock(struct proc *p)
956 {
957
958 p->p_estcpu = ESTCPULIM(p->p_estcpu + 1);
959 resetpriority(p);
960 if (p->p_priority >= PUSER)
961 p->p_priority = p->p_usrpri;
962 }
963
964 void
965 suspendsched()
966 {
967 struct proc *p;
968 int s;
969
970 /*
971 * Convert all non-P_SYSTEM SSLEEP or SRUN processes to SSTOP.
972 */
973 proclist_lock_read();
974 s = splclock();
975 for (p = LIST_FIRST(&allproc); p != NULL; p = LIST_NEXT(p, p_list)) {
976 if ((p->p_flag & P_SYSTEM) != 0)
977 continue;
978 switch (p->p_stat) {
979 case SRUN:
980 if ((p->p_flag & P_INMEM) != 0)
981 remrunqueue(p);
982 /* FALLTHROUGH */
983 case SSLEEP:
984 p->p_stat = SSTOP;
985 break;
986 case SONPROC:
987 /*
988 * XXX SMP: we need to deal with processes on
989 * others CPU !
990 */
991 break;
992 default:
993 break;
994 }
995 }
996 splx(s);
997 proclist_unlock_read();
998 }
999