kern_synch.c revision 1.92 1 /* $NetBSD: kern_synch.c,v 1.92 2000/09/01 17:14:04 bouyer Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*-
41 * Copyright (c) 1982, 1986, 1990, 1991, 1993
42 * The Regents of the University of California. All rights reserved.
43 * (c) UNIX System Laboratories, Inc.
44 * All or some portions of this file are derived from material licensed
45 * to the University of California by American Telephone and Telegraph
46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47 * the permission of UNIX System Laboratories, Inc.
48 *
49 * Redistribution and use in source and binary forms, with or without
50 * modification, are permitted provided that the following conditions
51 * are met:
52 * 1. Redistributions of source code must retain the above copyright
53 * notice, this list of conditions and the following disclaimer.
54 * 2. Redistributions in binary form must reproduce the above copyright
55 * notice, this list of conditions and the following disclaimer in the
56 * documentation and/or other materials provided with the distribution.
57 * 3. All advertising materials mentioning features or use of this software
58 * must display the following acknowledgement:
59 * This product includes software developed by the University of
60 * California, Berkeley and its contributors.
61 * 4. Neither the name of the University nor the names of its contributors
62 * may be used to endorse or promote products derived from this software
63 * without specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
75 * SUCH DAMAGE.
76 *
77 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95
78 */
79
80 #include "opt_ddb.h"
81 #include "opt_ktrace.h"
82 #include "opt_lockdebug.h"
83 #include "opt_multiprocessor.h"
84
85 #include <sys/param.h>
86 #include <sys/systm.h>
87 #include <sys/callout.h>
88 #include <sys/proc.h>
89 #include <sys/kernel.h>
90 #include <sys/buf.h>
91 #include <sys/signalvar.h>
92 #include <sys/resourcevar.h>
93 #include <sys/sched.h>
94
95 #include <uvm/uvm_extern.h>
96
97 #ifdef KTRACE
98 #include <sys/ktrace.h>
99 #endif
100
101 #include <machine/cpu.h>
102
103 int lbolt; /* once a second sleep address */
104 int rrticks; /* number of hardclock ticks per roundrobin() */
105
106 /*
107 * The global scheduler state.
108 */
109 struct prochd sched_qs[RUNQUE_NQS]; /* run queues */
110 __volatile u_int32_t sched_whichqs; /* bitmap of non-empty queues */
111 struct slpque sched_slpque[SLPQUE_TABLESIZE]; /* sleep queues */
112
113 struct simplelock sched_lock = SIMPLELOCK_INITIALIZER;
114 #if defined(MULTIPROCESSOR)
115 struct lock kernel_lock;
116 #endif
117
118 void schedcpu(void *);
119 void updatepri(struct proc *);
120 void endtsleep(void *);
121
122 __inline void awaken(struct proc *);
123
124 struct callout schedcpu_ch = CALLOUT_INITIALIZER;
125
126 int sched_suspended = 0;
127
128 /*
129 * Force switch among equal priority processes every 100ms.
130 * Called from hardclock every hz/10 == rrticks hardclock ticks.
131 */
132 /* ARGSUSED */
133 void
134 roundrobin(struct cpu_info *ci)
135 {
136 struct schedstate_percpu *spc = &ci->ci_schedstate;
137
138 spc->spc_rrticks = rrticks;
139
140 if (curproc != NULL) {
141 if (spc->spc_flags & SPCF_SEENRR) {
142 /*
143 * The process has already been through a roundrobin
144 * without switching and may be hogging the CPU.
145 * Indicate that the process should yield.
146 */
147 spc->spc_flags |= SPCF_SHOULDYIELD;
148 } else
149 spc->spc_flags |= SPCF_SEENRR;
150 }
151 need_resched(curcpu());
152 }
153
154 /*
155 * Constants for digital decay and forget:
156 * 90% of (p_estcpu) usage in 5 * loadav time
157 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive)
158 * Note that, as ps(1) mentions, this can let percentages
159 * total over 100% (I've seen 137.9% for 3 processes).
160 *
161 * Note that hardclock updates p_estcpu and p_cpticks independently.
162 *
163 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
164 * That is, the system wants to compute a value of decay such
165 * that the following for loop:
166 * for (i = 0; i < (5 * loadavg); i++)
167 * p_estcpu *= decay;
168 * will compute
169 * p_estcpu *= 0.1;
170 * for all values of loadavg:
171 *
172 * Mathematically this loop can be expressed by saying:
173 * decay ** (5 * loadavg) ~= .1
174 *
175 * The system computes decay as:
176 * decay = (2 * loadavg) / (2 * loadavg + 1)
177 *
178 * We wish to prove that the system's computation of decay
179 * will always fulfill the equation:
180 * decay ** (5 * loadavg) ~= .1
181 *
182 * If we compute b as:
183 * b = 2 * loadavg
184 * then
185 * decay = b / (b + 1)
186 *
187 * We now need to prove two things:
188 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
189 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
190 *
191 * Facts:
192 * For x close to zero, exp(x) =~ 1 + x, since
193 * exp(x) = 0! + x**1/1! + x**2/2! + ... .
194 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
195 * For x close to zero, ln(1+x) =~ x, since
196 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1
197 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
198 * ln(.1) =~ -2.30
199 *
200 * Proof of (1):
201 * Solve (factor)**(power) =~ .1 given power (5*loadav):
202 * solving for factor,
203 * ln(factor) =~ (-2.30/5*loadav), or
204 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
205 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED
206 *
207 * Proof of (2):
208 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
209 * solving for power,
210 * power*ln(b/(b+1)) =~ -2.30, or
211 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED
212 *
213 * Actual power values for the implemented algorithm are as follows:
214 * loadav: 1 2 3 4
215 * power: 5.68 10.32 14.94 19.55
216 */
217
218 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
219 #define loadfactor(loadav) (2 * (loadav))
220 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
221
222 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
223 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
224
225 /*
226 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
227 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
228 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
229 *
230 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
231 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
232 *
233 * If you dont want to bother with the faster/more-accurate formula, you
234 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
235 * (more general) method of calculating the %age of CPU used by a process.
236 */
237 #define CCPU_SHIFT 11
238
239 /*
240 * Recompute process priorities, every hz ticks.
241 */
242 /* ARGSUSED */
243 void
244 schedcpu(void *arg)
245 {
246 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
247 struct proc *p;
248 int s, s1;
249 unsigned int newcpu;
250 int clkhz;
251
252 proclist_lock_read();
253 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
254 /*
255 * Increment time in/out of memory and sleep time
256 * (if sleeping). We ignore overflow; with 16-bit int's
257 * (remember them?) overflow takes 45 days.
258 */
259 p->p_swtime++;
260 if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
261 p->p_slptime++;
262 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
263 /*
264 * If the process has slept the entire second,
265 * stop recalculating its priority until it wakes up.
266 */
267 if (p->p_slptime > 1)
268 continue;
269 s = splstatclock(); /* prevent state changes */
270 /*
271 * p_pctcpu is only for ps.
272 */
273 clkhz = stathz != 0 ? stathz : hz;
274 #if (FSHIFT >= CCPU_SHIFT)
275 p->p_pctcpu += (clkhz == 100)?
276 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
277 100 * (((fixpt_t) p->p_cpticks)
278 << (FSHIFT - CCPU_SHIFT)) / clkhz;
279 #else
280 p->p_pctcpu += ((FSCALE - ccpu) *
281 (p->p_cpticks * FSCALE / clkhz)) >> FSHIFT;
282 #endif
283 p->p_cpticks = 0;
284 newcpu = (u_int)decay_cpu(loadfac, p->p_estcpu);
285 p->p_estcpu = newcpu;
286 SCHED_LOCK(s1);
287 resetpriority(p);
288 if (p->p_priority >= PUSER) {
289 if (p->p_stat == SRUN &&
290 (p->p_flag & P_INMEM) &&
291 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) {
292 remrunqueue(p);
293 p->p_priority = p->p_usrpri;
294 setrunqueue(p);
295 } else
296 p->p_priority = p->p_usrpri;
297 }
298 SCHED_UNLOCK(s1);
299 splx(s);
300 }
301 proclist_unlock_read();
302 uvm_meter();
303 wakeup((caddr_t)&lbolt);
304 callout_reset(&schedcpu_ch, hz, schedcpu, NULL);
305 }
306
307 /*
308 * Recalculate the priority of a process after it has slept for a while.
309 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
310 * least six times the loadfactor will decay p_estcpu to zero.
311 */
312 void
313 updatepri(struct proc *p)
314 {
315 unsigned int newcpu;
316 fixpt_t loadfac;
317
318 SCHED_ASSERT_LOCKED();
319
320 newcpu = p->p_estcpu;
321 loadfac = loadfactor(averunnable.ldavg[0]);
322
323 if (p->p_slptime > 5 * loadfac)
324 p->p_estcpu = 0;
325 else {
326 p->p_slptime--; /* the first time was done in schedcpu */
327 while (newcpu && --p->p_slptime)
328 newcpu = (int) decay_cpu(loadfac, newcpu);
329 p->p_estcpu = newcpu;
330 }
331 resetpriority(p);
332 }
333
334 /*
335 * During autoconfiguration or after a panic, a sleep will simply
336 * lower the priority briefly to allow interrupts, then return.
337 * The priority to be used (safepri) is machine-dependent, thus this
338 * value is initialized and maintained in the machine-dependent layers.
339 * This priority will typically be 0, or the lowest priority
340 * that is safe for use on the interrupt stack; it can be made
341 * higher to block network software interrupts after panics.
342 */
343 int safepri;
344
345 /*
346 * General sleep call. Suspends the current process until a wakeup is
347 * performed on the specified identifier. The process will then be made
348 * runnable with the specified priority. Sleeps at most timo/hz seconds
349 * (0 means no timeout). If pri includes PCATCH flag, signals are checked
350 * before and after sleeping, else signals are not checked. Returns 0 if
351 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a
352 * signal needs to be delivered, ERESTART is returned if the current system
353 * call should be restarted if possible, and EINTR is returned if the system
354 * call should be interrupted by the signal (return EINTR).
355 *
356 * The interlock is held until the scheduler_slock is held. The
357 * interlock will be locked before returning back to the caller
358 * unless the PNORELOCK flag is specified, in which case the
359 * interlock will always be unlocked upon return.
360 */
361 int
362 ltsleep(void *ident, int priority, const char *wmesg, int timo,
363 __volatile struct simplelock *interlock)
364 {
365 struct proc *p = curproc;
366 struct slpque *qp;
367 int sig, s;
368 int catch = priority & PCATCH;
369 int relock = (priority & PNORELOCK) == 0;
370
371 /*
372 * XXXSMP
373 * This is probably bogus. Figure out what the right
374 * thing to do here really is.
375 * Note that not sleeping if ltsleep is called with curproc == NULL
376 * in the shutdown case is disgusting but partly necessary given
377 * how shutdown (barely) works.
378 */
379 if (cold || (doing_shutdown && (panicstr || (p == NULL)))) {
380 /*
381 * After a panic, or during autoconfiguration,
382 * just give interrupts a chance, then just return;
383 * don't run any other procs or panic below,
384 * in case this is the idle process and already asleep.
385 */
386 s = splhigh();
387 splx(safepri);
388 splx(s);
389 if (interlock != NULL && relock == 0)
390 simple_unlock(interlock);
391 return (0);
392 }
393
394
395 #ifdef KTRACE
396 if (KTRPOINT(p, KTR_CSW))
397 ktrcsw(p, 1, 0);
398 #endif
399
400 SCHED_LOCK(s);
401
402 #ifdef DIAGNOSTIC
403 if (ident == NULL)
404 panic("ltsleep: ident == NULL");
405 if (p->p_stat != SONPROC)
406 panic("ltsleep: p_stat %d != SONPROC", p->p_stat);
407 if (p->p_back != NULL)
408 panic("ltsleep: p_back != NULL");
409 #endif
410
411 p->p_wchan = ident;
412 p->p_wmesg = wmesg;
413 p->p_slptime = 0;
414 p->p_priority = priority & PRIMASK;
415
416 qp = SLPQUE(ident);
417 if (qp->sq_head == 0)
418 qp->sq_head = p;
419 else
420 *qp->sq_tailp = p;
421 *(qp->sq_tailp = &p->p_forw) = 0;
422
423 if (timo)
424 callout_reset(&p->p_tsleep_ch, timo, endtsleep, p);
425
426 /*
427 * We can now release the interlock; the scheduler_slock
428 * is held, so a thread can't get in to do wakeup() before
429 * we do the switch.
430 *
431 * XXX We leave the code block here, after inserting ourselves
432 * on the sleep queue, because we might want a more clever
433 * data structure for the sleep queues at some point.
434 */
435 if (interlock != NULL)
436 simple_unlock(interlock);
437
438 /*
439 * We put ourselves on the sleep queue and start our timeout
440 * before calling CURSIG, as we could stop there, and a wakeup
441 * or a SIGCONT (or both) could occur while we were stopped.
442 * A SIGCONT would cause us to be marked as SSLEEP
443 * without resuming us, thus we must be ready for sleep
444 * when CURSIG is called. If the wakeup happens while we're
445 * stopped, p->p_wchan will be 0 upon return from CURSIG.
446 */
447 if (catch) {
448 p->p_flag |= P_SINTR;
449 if ((sig = CURSIG(p)) != 0) {
450 if (p->p_wchan != NULL)
451 unsleep(p);
452 p->p_stat = SONPROC;
453 SCHED_UNLOCK(s);
454 goto resume;
455 }
456 if (p->p_wchan == NULL) {
457 catch = 0;
458 SCHED_UNLOCK(s);
459 goto resume;
460 }
461 } else
462 sig = 0;
463 p->p_stat = SSLEEP;
464 p->p_stats->p_ru.ru_nvcsw++;
465
466 SCHED_ASSERT_LOCKED();
467 mi_switch(p);
468
469 #ifdef DDB
470 /* handy breakpoint location after process "wakes" */
471 asm(".globl bpendtsleep ; bpendtsleep:");
472 #endif
473
474 SCHED_ASSERT_UNLOCKED();
475 splx(s);
476
477 resume:
478 KDASSERT(p->p_cpu != NULL);
479 KDASSERT(p->p_cpu == curcpu());
480 p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
481
482 p->p_flag &= ~P_SINTR;
483 if (p->p_flag & P_TIMEOUT) {
484 p->p_flag &= ~P_TIMEOUT;
485 if (sig == 0) {
486 #ifdef KTRACE
487 if (KTRPOINT(p, KTR_CSW))
488 ktrcsw(p, 0, 0);
489 #endif
490 if (relock && interlock != NULL)
491 simple_lock(interlock);
492 return (EWOULDBLOCK);
493 }
494 } else if (timo)
495 callout_stop(&p->p_tsleep_ch);
496 if (catch && (sig != 0 || (sig = CURSIG(p)) != 0)) {
497 #ifdef KTRACE
498 if (KTRPOINT(p, KTR_CSW))
499 ktrcsw(p, 0, 0);
500 #endif
501 if (relock && interlock != NULL)
502 simple_lock(interlock);
503 if ((p->p_sigacts->ps_sigact[sig].sa_flags & SA_RESTART) == 0)
504 return (EINTR);
505 return (ERESTART);
506 }
507 #ifdef KTRACE
508 if (KTRPOINT(p, KTR_CSW))
509 ktrcsw(p, 0, 0);
510 #endif
511 if (relock && interlock != NULL)
512 simple_lock(interlock);
513 return (0);
514 }
515
516 /*
517 * Implement timeout for tsleep.
518 * If process hasn't been awakened (wchan non-zero),
519 * set timeout flag and undo the sleep. If proc
520 * is stopped, just unsleep so it will remain stopped.
521 */
522 void
523 endtsleep(void *arg)
524 {
525 struct proc *p;
526 int s;
527
528 p = (struct proc *)arg;
529
530 SCHED_LOCK(s);
531 if (p->p_wchan) {
532 if (p->p_stat == SSLEEP)
533 setrunnable(p);
534 else
535 unsleep(p);
536 p->p_flag |= P_TIMEOUT;
537 }
538 SCHED_UNLOCK(s);
539 }
540
541 /*
542 * Remove a process from its wait queue
543 */
544 void
545 unsleep(struct proc *p)
546 {
547 struct slpque *qp;
548 struct proc **hp;
549
550 SCHED_ASSERT_LOCKED();
551
552 if (p->p_wchan) {
553 hp = &(qp = SLPQUE(p->p_wchan))->sq_head;
554 while (*hp != p)
555 hp = &(*hp)->p_forw;
556 *hp = p->p_forw;
557 if (qp->sq_tailp == &p->p_forw)
558 qp->sq_tailp = hp;
559 p->p_wchan = 0;
560 }
561 }
562
563 /*
564 * Optimized-for-wakeup() version of setrunnable().
565 */
566 __inline void
567 awaken(struct proc *p)
568 {
569
570 SCHED_ASSERT_LOCKED();
571
572 if (p->p_slptime > 1)
573 updatepri(p);
574 p->p_slptime = 0;
575 if ((p->p_flag & P_SUSPEND) == 0) {
576 p->p_stat = SRUN;
577 /*
578 * Since curpriority is a user priority, p->p_priority
579 * is always better than curpriority.
580 */
581 if (p->p_flag & P_INMEM) {
582 setrunqueue(p);
583 KASSERT(p->p_cpu != NULL);
584 need_resched(p->p_cpu);
585 } else
586 sched_wakeup(&proc0);
587 } else {
588 p->p_stat = SSUSPEND;
589 }
590 }
591
592 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
593 void
594 sched_unlock_idle(void)
595 {
596
597 simple_unlock(&sched_lock);
598 }
599
600 void
601 sched_lock_idle(void)
602 {
603
604 simple_lock(&sched_lock);
605 }
606 #endif /* MULTIPROCESSOR || LOCKDEBUG */
607
608 /*
609 * Make all processes sleeping on the specified identifier runnable.
610 */
611
612 void
613 wakeup(void *ident)
614 {
615 int s;
616
617 SCHED_ASSERT_UNLOCKED();
618
619 SCHED_LOCK(s);
620 sched_wakeup(ident);
621 SCHED_UNLOCK(s);
622 }
623
624 void
625 sched_wakeup(void *ident)
626 {
627 struct slpque *qp;
628 struct proc *p, **q;
629
630 SCHED_ASSERT_LOCKED();
631
632 qp = SLPQUE(ident);
633 restart:
634 for (q = &qp->sq_head; (p = *q) != NULL; ) {
635 #ifdef DIAGNOSTIC
636 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP))
637 panic("wakeup");
638 #endif
639 if (p->p_wchan == ident) {
640 p->p_wchan = 0;
641 *q = p->p_forw;
642 if (qp->sq_tailp == &p->p_forw)
643 qp->sq_tailp = q;
644 if (p->p_stat == SSLEEP) {
645 awaken(p);
646 goto restart;
647 }
648 } else
649 q = &p->p_forw;
650 }
651 }
652
653 /*
654 * Make the highest priority process first in line on the specified
655 * identifier runnable.
656 */
657 void
658 wakeup_one(void *ident)
659 {
660 struct slpque *qp;
661 struct proc *p, **q;
662 struct proc *best_sleepp, **best_sleepq;
663 struct proc *best_stopp, **best_stopq;
664 int s;
665
666 best_sleepp = best_stopp = NULL;
667 best_sleepq = best_stopq = NULL;
668
669 SCHED_LOCK(s);
670
671 qp = SLPQUE(ident);
672
673 for (q = &qp->sq_head; (p = *q) != NULL; q = &p->p_forw) {
674 #ifdef DIAGNOSTIC
675 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP))
676 panic("wakeup_one");
677 #endif
678 if (p->p_wchan == ident) {
679 if (p->p_stat == SSLEEP) {
680 if (best_sleepp == NULL ||
681 p->p_priority < best_sleepp->p_priority) {
682 best_sleepp = p;
683 best_sleepq = q;
684 }
685 } else {
686 if (best_stopp == NULL ||
687 p->p_priority < best_stopp->p_priority) {
688 best_stopp = p;
689 best_stopq = q;
690 }
691 }
692 }
693 }
694
695 /*
696 * Consider any SSLEEP process higher than the highest priority SSTOP
697 * process.
698 */
699 if (best_sleepp != NULL) {
700 p = best_sleepp;
701 q = best_sleepq;
702 } else {
703 p = best_stopp;
704 q = best_stopq;
705 }
706
707 if (p != NULL) {
708 p->p_wchan = NULL;
709 *q = p->p_forw;
710 if (qp->sq_tailp == &p->p_forw)
711 qp->sq_tailp = q;
712 if (p->p_stat == SSLEEP)
713 awaken(p);
714 }
715 SCHED_UNLOCK(s);
716 }
717
718 /*
719 * General yield call. Puts the current process back on its run queue and
720 * performs a voluntary context switch.
721 */
722 void
723 yield(void)
724 {
725 struct proc *p = curproc;
726 int s;
727
728 SCHED_LOCK(s);
729 p->p_priority = p->p_usrpri;
730 if ((p->p_flag & P_SUSPEND) == 0) {
731 p->p_stat = SRUN;
732 setrunqueue(p);
733 } else
734 p->p_stat = SSUSPEND;
735 p->p_stats->p_ru.ru_nvcsw++;
736 mi_switch(p);
737 SCHED_ASSERT_UNLOCKED();
738 splx(s);
739 }
740
741 /*
742 * General preemption call. Puts the current process back on its run queue
743 * and performs an involuntary context switch. If a process is supplied,
744 * we switch to that process. Otherwise, we use the normal process selection
745 * criteria.
746 */
747 void
748 preempt(struct proc *newp)
749 {
750 struct proc *p = curproc;
751 int s;
752
753 /*
754 * XXX Switching to a specific process is not supported yet.
755 */
756 if (newp != NULL)
757 panic("preempt: cpu_preempt not yet implemented");
758
759 SCHED_LOCK(s);
760 p->p_priority = p->p_usrpri;
761 if ((p->p_flag & P_SUSPEND) == 0) {
762 p->p_stat = SRUN;
763 setrunqueue(p);
764 } else
765 p->p_stat = SSUSPEND;
766 p->p_stats->p_ru.ru_nivcsw++;
767 mi_switch(p);
768 SCHED_ASSERT_UNLOCKED();
769 splx(s);
770 }
771
772 /*
773 * The machine independent parts of context switch.
774 * Must be called at splsched() (no higher!) and with
775 * the sched_lock held.
776 */
777 void
778 mi_switch(struct proc *p)
779 {
780 struct schedstate_percpu *spc;
781 struct rlimit *rlim;
782 long s, u;
783 struct timeval tv;
784 #if defined(MULTIPROCESSOR)
785 int hold_count;
786 #endif
787
788 SCHED_ASSERT_LOCKED();
789
790 #if defined(MULTIPROCESSOR)
791 /*
792 * Release the kernel_lock, as we are about to yield the CPU.
793 * The scheduler lock is still held until cpu_switch()
794 * selects a new process and removes it from the run queue.
795 */
796 if (p->p_flag & P_BIGLOCK)
797 hold_count = spinlock_release_all(&kernel_lock);
798 #endif
799
800 KDASSERT(p->p_cpu != NULL);
801 KDASSERT(p->p_cpu == curcpu());
802
803 spc = &p->p_cpu->ci_schedstate;
804
805 #if defined(LOCKDEBUG) || defined(DIAGNOSTIC)
806 spinlock_switchcheck();
807 #endif
808 #ifdef LOCKDEBUG
809 simple_lock_switchcheck();
810 #endif
811
812 /*
813 * Compute the amount of time during which the current
814 * process was running, and add that to its total so far.
815 */
816 microtime(&tv);
817 u = p->p_rtime.tv_usec + (tv.tv_usec - spc->spc_runtime.tv_usec);
818 s = p->p_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
819 if (u < 0) {
820 u += 1000000;
821 s--;
822 } else if (u >= 1000000) {
823 u -= 1000000;
824 s++;
825 }
826 p->p_rtime.tv_usec = u;
827 p->p_rtime.tv_sec = s;
828
829 /*
830 * Check if the process exceeds its cpu resource allocation.
831 * If over max, kill it. In any case, if it has run for more
832 * than 10 minutes, reduce priority to give others a chance.
833 */
834 rlim = &p->p_rlimit[RLIMIT_CPU];
835 if (s >= rlim->rlim_cur) {
836 if (s >= rlim->rlim_max)
837 psignal(p, SIGKILL);
838 else {
839 psignal(p, SIGXCPU);
840 if (rlim->rlim_cur < rlim->rlim_max)
841 rlim->rlim_cur += 5;
842 }
843 }
844 if (autonicetime && s > autonicetime && p->p_ucred->cr_uid &&
845 p->p_nice == NZERO) {
846 p->p_nice = autoniceval + NZERO;
847 resetpriority(p);
848 }
849
850 /*
851 * Process is about to yield the CPU; clear the appropriate
852 * scheduling flags.
853 */
854 spc->spc_flags &= ~SPCF_SWITCHCLEAR;
855
856 /*
857 * Pick a new current process and switch to it. When we
858 * run again, we'll return back here.
859 */
860 uvmexp.swtch++;
861 cpu_switch(p);
862
863 /*
864 * Make sure that MD code released the scheduler lock before
865 * resuming us.
866 */
867 SCHED_ASSERT_UNLOCKED();
868
869 /*
870 * We're running again; record our new start time. We might
871 * be running on a new CPU now, so don't use the cache'd
872 * schedstate_percpu pointer.
873 */
874 KDASSERT(p->p_cpu != NULL);
875 KDASSERT(p->p_cpu == curcpu());
876 microtime(&p->p_cpu->ci_schedstate.spc_runtime);
877
878 #if defined(MULTIPROCESSOR)
879 /*
880 * Reacquire the kernel_lock now. We do this after we've
881 * released the scheduler lock to avoid deadlock, and before
882 * we reacquire the interlock.
883 */
884 if (p->p_flag & P_BIGLOCK)
885 spinlock_acquire_count(&kernel_lock, hold_count);
886 #endif
887 }
888
889 /*
890 * Initialize the (doubly-linked) run queues
891 * to be empty.
892 */
893 void
894 rqinit()
895 {
896 int i;
897
898 for (i = 0; i < RUNQUE_NQS; i++)
899 sched_qs[i].ph_link = sched_qs[i].ph_rlink =
900 (struct proc *)&sched_qs[i];
901 }
902
903 /*
904 * Change process state to be runnable,
905 * placing it on the run queue if it is in memory,
906 * and awakening the swapper if it isn't in memory.
907 */
908 void
909 setrunnable(struct proc *p)
910 {
911
912 SCHED_ASSERT_LOCKED();
913
914 switch (p->p_stat) {
915 case 0:
916 case SRUN:
917 case SSUSPEND:
918 case SONPROC:
919 case SZOMB:
920 case SDEAD:
921 default:
922 panic("setrunnable");
923 case SSTOP:
924 /*
925 * If we're being traced (possibly because someone attached us
926 * while we were stopped), check for a signal from the debugger.
927 */
928 if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0) {
929 sigaddset(&p->p_siglist, p->p_xstat);
930 p->p_sigcheck = 1;
931 }
932 case SSLEEP:
933 unsleep(p); /* e.g. when sending signals */
934 break;
935
936 case SIDL:
937 break;
938 }
939 if ((p->p_flag & P_SUSPEND) == 0) {
940 p->p_stat = SRUN;
941 if (p->p_flag & P_INMEM)
942 setrunqueue(p);
943 } else
944 p->p_stat = SSUSPEND;
945 if (p->p_slptime > 1)
946 updatepri(p);
947 p->p_slptime = 0;
948 if ((p->p_flag & P_INMEM) == 0)
949 sched_wakeup((caddr_t)&proc0);
950 else if (p->p_priority < curcpu()->ci_schedstate.spc_curpriority) {
951 /*
952 * XXXSMP
953 * This is not exactly right. Since p->p_cpu persists
954 * across a context switch, this gives us some sort
955 * of processor affinity. But we need to figure out
956 * at what point it's better to reschedule on a different
957 * CPU than the last one.
958 */
959 need_resched((p->p_cpu != NULL) ? p->p_cpu : curcpu());
960 }
961 }
962
963 /*
964 * Compute the priority of a process when running in user mode.
965 * Arrange to reschedule if the resulting priority is better
966 * than that of the current process.
967 */
968 void
969 resetpriority(struct proc *p)
970 {
971 unsigned int newpriority;
972
973 SCHED_ASSERT_LOCKED();
974
975 newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO);
976 newpriority = min(newpriority, MAXPRI);
977 p->p_usrpri = newpriority;
978 if (newpriority < curcpu()->ci_schedstate.spc_curpriority) {
979 /*
980 * XXXSMP
981 * Same applies as in setrunnable() above.
982 */
983 need_resched((p->p_cpu != NULL) ? p->p_cpu : curcpu());
984 }
985 }
986
987 /*
988 * We adjust the priority of the current process. The priority of a process
989 * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu)
990 * is increased here. The formula for computing priorities (in kern_synch.c)
991 * will compute a different value each time p_estcpu increases. This can
992 * cause a switch, but unless the priority crosses a PPQ boundary the actual
993 * queue will not change. The cpu usage estimator ramps up quite quickly
994 * when the process is running (linearly), and decays away exponentially, at
995 * a rate which is proportionally slower when the system is busy. The basic
996 * principle is that the system will 90% forget that the process used a lot
997 * of CPU time in 5 * loadav seconds. This causes the system to favor
998 * processes which haven't run much recently, and to round-robin among other
999 * processes.
1000 */
1001
1002 void
1003 schedclock(struct proc *p)
1004 {
1005 int s;
1006
1007 p->p_estcpu = ESTCPULIM(p->p_estcpu + 1);
1008
1009 SCHED_LOCK(s);
1010 resetpriority(p);
1011 SCHED_UNLOCK(s);
1012
1013 if (p->p_priority >= PUSER)
1014 p->p_priority = p->p_usrpri;
1015 }
1016
1017 /*
1018 * Mark all non-system processes as non-runnable, and remove from run queue.
1019 */
1020 void
1021 suspendsched()
1022 {
1023 int s, i;
1024 struct proc *p, *next;
1025
1026 SCHED_LOCK(s);
1027 sched_suspended++;
1028 if (sched_suspended > 1) {
1029 /* sheduling was already suspended */
1030 SCHED_UNLOCK(s);
1031 return;
1032 }
1033 /* mark P_SUSPEND all processes that aren't P_SYSTEM or curproc */
1034 for (p = LIST_FIRST(&allproc); p != NULL; p = next) {
1035 next = LIST_NEXT(p, p_list);
1036 if (p == curproc || (p->p_flag & P_SYSTEM) != 0)
1037 continue;
1038 p->p_flag |= P_SUSPEND;
1039 }
1040 /* go through the run queues, remove P_SUSPEND processes */
1041 for (i = 0; i < RUNQUE_NQS; i++) {
1042 for (p = (struct proc *)&sched_qs[i];
1043 p->p_forw != (struct proc *)&sched_qs[i]; p = next) {
1044 next = p->p_forw;
1045 if ((p->p_flag & P_SUSPEND) != 0) {
1046 remrunqueue(p);
1047 p->p_stat = SSUSPEND;
1048 }
1049 }
1050 }
1051 SCHED_UNLOCK(s);
1052 }
1053
1054 /* resume scheduling, replace all suspended processes on a run queue */
1055 void
1056 resumesched()
1057 {
1058 int s;
1059 struct proc *p, *next;
1060
1061 SCHED_LOCK(s);
1062 sched_suspended--;
1063 if (sched_suspended < 0)
1064 panic("resumesched");
1065 if (sched_suspended > 0) {
1066 /* someone still wants the sheduling to be suspended */
1067 /* XXX should we mark curproc P_SUSPEND as well ? */
1068 SCHED_UNLOCK(s);
1069 return;
1070 }
1071 for (p = LIST_FIRST(&allproc); p != NULL; p = next) {
1072 next = LIST_NEXT(p, p_list);
1073 p->p_flag &= ~P_SUSPEND;
1074 if (p->p_stat == SSUSPEND) {
1075 p->p_stat = SRUN;
1076 setrunqueue(p);
1077 }
1078 }
1079 SCHED_UNLOCK(s);
1080 }
1081