kern_synch.c revision 1.1.1.2 1 /*-
2 * Copyright (c) 1982, 1986, 1990, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94
39 */
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/proc.h>
44 #include <sys/kernel.h>
45 #include <sys/buf.h>
46 #include <sys/signalvar.h>
47 #include <sys/resourcevar.h>
48 #include <sys/vmmeter.h>
49 #ifdef KTRACE
50 #include <sys/ktrace.h>
51 #endif
52
53 #include <machine/cpu.h>
54
55 u_char curpriority; /* usrpri of curproc */
56 int lbolt; /* once a second sleep address */
57
58 /*
59 * Force switch among equal priority processes every 100ms.
60 */
61 /* ARGSUSED */
62 void
63 roundrobin(arg)
64 void *arg;
65 {
66
67 need_resched();
68 timeout(roundrobin, NULL, hz / 10);
69 }
70
71 /*
72 * Constants for digital decay and forget:
73 * 90% of (p_estcpu) usage in 5 * loadav time
74 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive)
75 * Note that, as ps(1) mentions, this can let percentages
76 * total over 100% (I've seen 137.9% for 3 processes).
77 *
78 * Note that hardclock updates p_estcpu and p_cpticks independently.
79 *
80 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
81 * That is, the system wants to compute a value of decay such
82 * that the following for loop:
83 * for (i = 0; i < (5 * loadavg); i++)
84 * p_estcpu *= decay;
85 * will compute
86 * p_estcpu *= 0.1;
87 * for all values of loadavg:
88 *
89 * Mathematically this loop can be expressed by saying:
90 * decay ** (5 * loadavg) ~= .1
91 *
92 * The system computes decay as:
93 * decay = (2 * loadavg) / (2 * loadavg + 1)
94 *
95 * We wish to prove that the system's computation of decay
96 * will always fulfill the equation:
97 * decay ** (5 * loadavg) ~= .1
98 *
99 * If we compute b as:
100 * b = 2 * loadavg
101 * then
102 * decay = b / (b + 1)
103 *
104 * We now need to prove two things:
105 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
106 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
107 *
108 * Facts:
109 * For x close to zero, exp(x) =~ 1 + x, since
110 * exp(x) = 0! + x**1/1! + x**2/2! + ... .
111 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
112 * For x close to zero, ln(1+x) =~ x, since
113 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1
114 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
115 * ln(.1) =~ -2.30
116 *
117 * Proof of (1):
118 * Solve (factor)**(power) =~ .1 given power (5*loadav):
119 * solving for factor,
120 * ln(factor) =~ (-2.30/5*loadav), or
121 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
122 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED
123 *
124 * Proof of (2):
125 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
126 * solving for power,
127 * power*ln(b/(b+1)) =~ -2.30, or
128 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED
129 *
130 * Actual power values for the implemented algorithm are as follows:
131 * loadav: 1 2 3 4
132 * power: 5.68 10.32 14.94 19.55
133 */
134
135 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
136 #define loadfactor(loadav) (2 * (loadav))
137 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
138
139 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
140 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
141
142 /*
143 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
144 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
145 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
146 *
147 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
148 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
149 *
150 * If you dont want to bother with the faster/more-accurate formula, you
151 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
152 * (more general) method of calculating the %age of CPU used by a process.
153 */
154 #define CCPU_SHIFT 11
155
156 /*
157 * Recompute process priorities, every hz ticks.
158 */
159 /* ARGSUSED */
160 void
161 schedcpu(arg)
162 void *arg;
163 {
164 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
165 register struct proc *p;
166 register int s;
167 register unsigned int newcpu;
168
169 wakeup((caddr_t)&lbolt);
170 for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
171 /*
172 * Increment time in/out of memory and sleep time
173 * (if sleeping). We ignore overflow; with 16-bit int's
174 * (remember them?) overflow takes 45 days.
175 */
176 p->p_swtime++;
177 if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
178 p->p_slptime++;
179 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
180 /*
181 * If the process has slept the entire second,
182 * stop recalculating its priority until it wakes up.
183 */
184 if (p->p_slptime > 1)
185 continue;
186 s = splstatclock(); /* prevent state changes */
187 /*
188 * p_pctcpu is only for ps.
189 */
190 #if (FSHIFT >= CCPU_SHIFT)
191 p->p_pctcpu += (hz == 100)?
192 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
193 100 * (((fixpt_t) p->p_cpticks)
194 << (FSHIFT - CCPU_SHIFT)) / hz;
195 #else
196 p->p_pctcpu += ((FSCALE - ccpu) *
197 (p->p_cpticks * FSCALE / hz)) >> FSHIFT;
198 #endif
199 p->p_cpticks = 0;
200 newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu) + p->p_nice;
201 p->p_estcpu = min(newcpu, UCHAR_MAX);
202 resetpriority(p);
203 if (p->p_priority >= PUSER) {
204 #define PPQ (128 / NQS) /* priorities per queue */
205 if ((p != curproc) &&
206 p->p_stat == SRUN &&
207 (p->p_flag & P_INMEM) &&
208 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) {
209 remrq(p);
210 p->p_priority = p->p_usrpri;
211 setrunqueue(p);
212 } else
213 p->p_priority = p->p_usrpri;
214 }
215 splx(s);
216 }
217 vmmeter();
218 if (bclnlist != NULL)
219 wakeup((caddr_t)pageproc);
220 timeout(schedcpu, (void *)0, hz);
221 }
222
223 /*
224 * Recalculate the priority of a process after it has slept for a while.
225 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
226 * least six times the loadfactor will decay p_estcpu to zero.
227 */
228 void
229 updatepri(p)
230 register struct proc *p;
231 {
232 register unsigned int newcpu = p->p_estcpu;
233 register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
234
235 if (p->p_slptime > 5 * loadfac)
236 p->p_estcpu = 0;
237 else {
238 p->p_slptime--; /* the first time was done in schedcpu */
239 while (newcpu && --p->p_slptime)
240 newcpu = (int) decay_cpu(loadfac, newcpu);
241 p->p_estcpu = min(newcpu, UCHAR_MAX);
242 }
243 resetpriority(p);
244 }
245
246 /*
247 * We're only looking at 7 bits of the address; everything is
248 * aligned to 4, lots of things are aligned to greater powers
249 * of 2. Shift right by 8, i.e. drop the bottom 256 worth.
250 */
251 #define TABLESIZE 128
252 #define LOOKUP(x) (((int)(x) >> 8) & (TABLESIZE - 1))
253 struct slpque {
254 struct proc *sq_head;
255 struct proc **sq_tailp;
256 } slpque[TABLESIZE];
257
258 /*
259 * During autoconfiguration or after a panic, a sleep will simply
260 * lower the priority briefly to allow interrupts, then return.
261 * The priority to be used (safepri) is machine-dependent, thus this
262 * value is initialized and maintained in the machine-dependent layers.
263 * This priority will typically be 0, or the lowest priority
264 * that is safe for use on the interrupt stack; it can be made
265 * higher to block network software interrupts after panics.
266 */
267 int safepri;
268
269 /*
270 * General sleep call. Suspends the current process until a wakeup is
271 * performed on the specified identifier. The process will then be made
272 * runnable with the specified priority. Sleeps at most timo/hz seconds
273 * (0 means no timeout). If pri includes PCATCH flag, signals are checked
274 * before and after sleeping, else signals are not checked. Returns 0 if
275 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a
276 * signal needs to be delivered, ERESTART is returned if the current system
277 * call should be restarted if possible, and EINTR is returned if the system
278 * call should be interrupted by the signal (return EINTR).
279 */
280 int
281 tsleep(ident, priority, wmesg, timo)
282 void *ident;
283 int priority, timo;
284 char *wmesg;
285 {
286 register struct proc *p = curproc;
287 register struct slpque *qp;
288 register s;
289 int sig, catch = priority & PCATCH;
290 extern int cold;
291 void endtsleep __P((void *));
292
293 #ifdef KTRACE
294 if (KTRPOINT(p, KTR_CSW))
295 ktrcsw(p->p_tracep, 1, 0);
296 #endif
297 s = splhigh();
298 if (cold || panicstr) {
299 /*
300 * After a panic, or during autoconfiguration,
301 * just give interrupts a chance, then just return;
302 * don't run any other procs or panic below,
303 * in case this is the idle process and already asleep.
304 */
305 splx(safepri);
306 splx(s);
307 return (0);
308 }
309 #ifdef DIAGNOSTIC
310 if (ident == NULL || p->p_stat != SRUN || p->p_back)
311 panic("tsleep");
312 #endif
313 p->p_wchan = ident;
314 p->p_wmesg = wmesg;
315 p->p_slptime = 0;
316 p->p_priority = priority & PRIMASK;
317 qp = &slpque[LOOKUP(ident)];
318 if (qp->sq_head == 0)
319 qp->sq_head = p;
320 else
321 *qp->sq_tailp = p;
322 *(qp->sq_tailp = &p->p_forw) = 0;
323 if (timo)
324 timeout(endtsleep, (void *)p, timo);
325 /*
326 * We put ourselves on the sleep queue and start our timeout
327 * before calling CURSIG, as we could stop there, and a wakeup
328 * or a SIGCONT (or both) could occur while we were stopped.
329 * A SIGCONT would cause us to be marked as SSLEEP
330 * without resuming us, thus we must be ready for sleep
331 * when CURSIG is called. If the wakeup happens while we're
332 * stopped, p->p_wchan will be 0 upon return from CURSIG.
333 */
334 if (catch) {
335 p->p_flag |= P_SINTR;
336 if (sig = CURSIG(p)) {
337 if (p->p_wchan)
338 unsleep(p);
339 p->p_stat = SRUN;
340 goto resume;
341 }
342 if (p->p_wchan == 0) {
343 catch = 0;
344 goto resume;
345 }
346 } else
347 sig = 0;
348 p->p_stat = SSLEEP;
349 p->p_stats->p_ru.ru_nvcsw++;
350 mi_switch();
351 resume:
352 curpriority = p->p_usrpri;
353 splx(s);
354 p->p_flag &= ~P_SINTR;
355 if (p->p_flag & P_TIMEOUT) {
356 p->p_flag &= ~P_TIMEOUT;
357 if (sig == 0) {
358 #ifdef KTRACE
359 if (KTRPOINT(p, KTR_CSW))
360 ktrcsw(p->p_tracep, 0, 0);
361 #endif
362 return (EWOULDBLOCK);
363 }
364 } else if (timo)
365 untimeout(endtsleep, (void *)p);
366 if (catch && (sig != 0 || (sig = CURSIG(p)))) {
367 #ifdef KTRACE
368 if (KTRPOINT(p, KTR_CSW))
369 ktrcsw(p->p_tracep, 0, 0);
370 #endif
371 if (p->p_sigacts->ps_sigintr & sigmask(sig))
372 return (EINTR);
373 return (ERESTART);
374 }
375 #ifdef KTRACE
376 if (KTRPOINT(p, KTR_CSW))
377 ktrcsw(p->p_tracep, 0, 0);
378 #endif
379 return (0);
380 }
381
382 /*
383 * Implement timeout for tsleep.
384 * If process hasn't been awakened (wchan non-zero),
385 * set timeout flag and undo the sleep. If proc
386 * is stopped, just unsleep so it will remain stopped.
387 */
388 void
389 endtsleep(arg)
390 void *arg;
391 {
392 register struct proc *p;
393 int s;
394
395 p = (struct proc *)arg;
396 s = splhigh();
397 if (p->p_wchan) {
398 if (p->p_stat == SSLEEP)
399 setrunnable(p);
400 else
401 unsleep(p);
402 p->p_flag |= P_TIMEOUT;
403 }
404 splx(s);
405 }
406
407 /*
408 * Short-term, non-interruptable sleep.
409 */
410 void
411 sleep(ident, priority)
412 void *ident;
413 int priority;
414 {
415 register struct proc *p = curproc;
416 register struct slpque *qp;
417 register s;
418 extern int cold;
419
420 #ifdef DIAGNOSTIC
421 if (priority > PZERO) {
422 printf("sleep called with priority %d > PZERO, wchan: %x\n",
423 priority, ident);
424 panic("old sleep");
425 }
426 #endif
427 s = splhigh();
428 if (cold || panicstr) {
429 /*
430 * After a panic, or during autoconfiguration,
431 * just give interrupts a chance, then just return;
432 * don't run any other procs or panic below,
433 * in case this is the idle process and already asleep.
434 */
435 splx(safepri);
436 splx(s);
437 return;
438 }
439 #ifdef DIAGNOSTIC
440 if (ident == NULL || p->p_stat != SRUN || p->p_back)
441 panic("sleep");
442 #endif
443 p->p_wchan = ident;
444 p->p_wmesg = NULL;
445 p->p_slptime = 0;
446 p->p_priority = priority;
447 qp = &slpque[LOOKUP(ident)];
448 if (qp->sq_head == 0)
449 qp->sq_head = p;
450 else
451 *qp->sq_tailp = p;
452 *(qp->sq_tailp = &p->p_forw) = 0;
453 p->p_stat = SSLEEP;
454 p->p_stats->p_ru.ru_nvcsw++;
455 #ifdef KTRACE
456 if (KTRPOINT(p, KTR_CSW))
457 ktrcsw(p->p_tracep, 1, 0);
458 #endif
459 mi_switch();
460 #ifdef KTRACE
461 if (KTRPOINT(p, KTR_CSW))
462 ktrcsw(p->p_tracep, 0, 0);
463 #endif
464 curpriority = p->p_usrpri;
465 splx(s);
466 }
467
468 /*
469 * Remove a process from its wait queue
470 */
471 void
472 unsleep(p)
473 register struct proc *p;
474 {
475 register struct slpque *qp;
476 register struct proc **hp;
477 int s;
478
479 s = splhigh();
480 if (p->p_wchan) {
481 hp = &(qp = &slpque[LOOKUP(p->p_wchan)])->sq_head;
482 while (*hp != p)
483 hp = &(*hp)->p_forw;
484 *hp = p->p_forw;
485 if (qp->sq_tailp == &p->p_forw)
486 qp->sq_tailp = hp;
487 p->p_wchan = 0;
488 }
489 splx(s);
490 }
491
492 /*
493 * Make all processes sleeping on the specified identifier runnable.
494 */
495 void
496 wakeup(ident)
497 register void *ident;
498 {
499 register struct slpque *qp;
500 register struct proc *p, **q;
501 int s;
502
503 s = splhigh();
504 qp = &slpque[LOOKUP(ident)];
505 restart:
506 for (q = &qp->sq_head; p = *q; ) {
507 #ifdef DIAGNOSTIC
508 if (p->p_back || p->p_stat != SSLEEP && p->p_stat != SSTOP)
509 panic("wakeup");
510 #endif
511 if (p->p_wchan == ident) {
512 p->p_wchan = 0;
513 *q = p->p_forw;
514 if (qp->sq_tailp == &p->p_forw)
515 qp->sq_tailp = q;
516 if (p->p_stat == SSLEEP) {
517 /* OPTIMIZED EXPANSION OF setrunnable(p); */
518 if (p->p_slptime > 1)
519 updatepri(p);
520 p->p_slptime = 0;
521 p->p_stat = SRUN;
522 if (p->p_flag & P_INMEM)
523 setrunqueue(p);
524 /*
525 * Since curpriority is a user priority,
526 * p->p_priority is always better than
527 * curpriority.
528 */
529 if ((p->p_flag & P_INMEM) == 0)
530 wakeup((caddr_t)&proc0);
531 else
532 need_resched();
533 /* END INLINE EXPANSION */
534 goto restart;
535 }
536 } else
537 q = &p->p_forw;
538 }
539 splx(s);
540 }
541
542 /*
543 * The machine independent parts of mi_switch().
544 * Must be called at splstatclock() or higher.
545 */
546 void
547 mi_switch()
548 {
549 register struct proc *p = curproc; /* XXX */
550 register struct rlimit *rlim;
551 register long s, u;
552 struct timeval tv;
553
554 /*
555 * Compute the amount of time during which the current
556 * process was running, and add that to its total so far.
557 */
558 microtime(&tv);
559 u = p->p_rtime.tv_usec + (tv.tv_usec - runtime.tv_usec);
560 s = p->p_rtime.tv_sec + (tv.tv_sec - runtime.tv_sec);
561 if (u < 0) {
562 u += 1000000;
563 s--;
564 } else if (u >= 1000000) {
565 u -= 1000000;
566 s++;
567 }
568 p->p_rtime.tv_usec = u;
569 p->p_rtime.tv_sec = s;
570
571 /*
572 * Check if the process exceeds its cpu resource allocation.
573 * If over max, kill it. In any case, if it has run for more
574 * than 10 minutes, reduce priority to give others a chance.
575 */
576 rlim = &p->p_rlimit[RLIMIT_CPU];
577 if (s >= rlim->rlim_cur) {
578 if (s >= rlim->rlim_max)
579 psignal(p, SIGKILL);
580 else {
581 psignal(p, SIGXCPU);
582 if (rlim->rlim_cur < rlim->rlim_max)
583 rlim->rlim_cur += 5;
584 }
585 }
586 if (s > 10 * 60 && p->p_ucred->cr_uid && p->p_nice == NZERO) {
587 p->p_nice = NZERO + 4;
588 resetpriority(p);
589 }
590
591 /*
592 * Pick a new current process and record its start time.
593 */
594 cnt.v_swtch++;
595 cpu_switch(p);
596 microtime(&runtime);
597 }
598
599 /*
600 * Initialize the (doubly-linked) run queues
601 * to be empty.
602 */
603 rqinit()
604 {
605 register int i;
606
607 for (i = 0; i < NQS; i++)
608 qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
609 }
610
611 /*
612 * Change process state to be runnable,
613 * placing it on the run queue if it is in memory,
614 * and awakening the swapper if it isn't in memory.
615 */
616 void
617 setrunnable(p)
618 register struct proc *p;
619 {
620 register int s;
621
622 s = splhigh();
623 switch (p->p_stat) {
624 case 0:
625 case SRUN:
626 case SZOMB:
627 default:
628 panic("setrunnable");
629 case SSTOP:
630 case SSLEEP:
631 unsleep(p); /* e.g. when sending signals */
632 break;
633
634 case SIDL:
635 break;
636 }
637 p->p_stat = SRUN;
638 if (p->p_flag & P_INMEM)
639 setrunqueue(p);
640 splx(s);
641 if (p->p_slptime > 1)
642 updatepri(p);
643 p->p_slptime = 0;
644 if ((p->p_flag & P_INMEM) == 0)
645 wakeup((caddr_t)&proc0);
646 else if (p->p_priority < curpriority)
647 need_resched();
648 }
649
650 /*
651 * Compute the priority of a process when running in user mode.
652 * Arrange to reschedule if the resulting priority is better
653 * than that of the current process.
654 */
655 void
656 resetpriority(p)
657 register struct proc *p;
658 {
659 register unsigned int newpriority;
660
661 newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
662 newpriority = min(newpriority, MAXPRI);
663 p->p_usrpri = newpriority;
664 if (newpriority < curpriority)
665 need_resched();
666 }
667