kern_synch.c revision 1.88 1 /* $NetBSD: kern_synch.c,v 1.88 2000/08/26 03:34:37 sommerfeld Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*-
41 * Copyright (c) 1982, 1986, 1990, 1991, 1993
42 * The Regents of the University of California. All rights reserved.
43 * (c) UNIX System Laboratories, Inc.
44 * All or some portions of this file are derived from material licensed
45 * to the University of California by American Telephone and Telegraph
46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47 * the permission of UNIX System Laboratories, Inc.
48 *
49 * Redistribution and use in source and binary forms, with or without
50 * modification, are permitted provided that the following conditions
51 * are met:
52 * 1. Redistributions of source code must retain the above copyright
53 * notice, this list of conditions and the following disclaimer.
54 * 2. Redistributions in binary form must reproduce the above copyright
55 * notice, this list of conditions and the following disclaimer in the
56 * documentation and/or other materials provided with the distribution.
57 * 3. All advertising materials mentioning features or use of this software
58 * must display the following acknowledgement:
59 * This product includes software developed by the University of
60 * California, Berkeley and its contributors.
61 * 4. Neither the name of the University nor the names of its contributors
62 * may be used to endorse or promote products derived from this software
63 * without specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
75 * SUCH DAMAGE.
76 *
77 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95
78 */
79
80 #include "opt_ddb.h"
81 #include "opt_ktrace.h"
82 #include "opt_lockdebug.h"
83 #include "opt_multiprocessor.h"
84
85 #include <sys/param.h>
86 #include <sys/systm.h>
87 #include <sys/callout.h>
88 #include <sys/proc.h>
89 #include <sys/kernel.h>
90 #include <sys/buf.h>
91 #include <sys/signalvar.h>
92 #include <sys/resourcevar.h>
93 #include <sys/sched.h>
94
95 #include <uvm/uvm_extern.h>
96
97 #ifdef KTRACE
98 #include <sys/ktrace.h>
99 #endif
100
101 #include <machine/cpu.h>
102
103 int lbolt; /* once a second sleep address */
104 int rrticks; /* number of hardclock ticks per roundrobin() */
105
106 /*
107 * The global scheduler state.
108 */
109 struct prochd sched_qs[RUNQUE_NQS]; /* run queues */
110 __volatile u_int32_t sched_whichqs; /* bitmap of non-empty queues */
111 struct slpque sched_slpque[SLPQUE_TABLESIZE]; /* sleep queues */
112
113 struct simplelock sched_lock = SIMPLELOCK_INITIALIZER;
114 #if defined(MULTIPROCESSOR)
115 struct lock kernel_lock;
116 #endif
117
118 void schedcpu(void *);
119 void updatepri(struct proc *);
120 void endtsleep(void *);
121
122 __inline void awaken(struct proc *);
123
124 struct callout schedcpu_ch = CALLOUT_INITIALIZER;
125
126 /*
127 * Force switch among equal priority processes every 100ms.
128 * Called from hardclock every hz/10 == rrticks hardclock ticks.
129 */
130 /* ARGSUSED */
131 void
132 roundrobin()
133 {
134 struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
135
136 spc->spc_rrticks = rrticks;
137
138 if (curproc != NULL) {
139 if (spc->spc_flags & SPCF_SEENRR) {
140 /*
141 * The process has already been through a roundrobin
142 * without switching and may be hogging the CPU.
143 * Indicate that the process should yield.
144 */
145 spc->spc_flags |= SPCF_SHOULDYIELD;
146 } else
147 spc->spc_flags |= SPCF_SEENRR;
148 }
149 need_resched(curcpu());
150 }
151
152 /*
153 * Constants for digital decay and forget:
154 * 90% of (p_estcpu) usage in 5 * loadav time
155 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive)
156 * Note that, as ps(1) mentions, this can let percentages
157 * total over 100% (I've seen 137.9% for 3 processes).
158 *
159 * Note that hardclock updates p_estcpu and p_cpticks independently.
160 *
161 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
162 * That is, the system wants to compute a value of decay such
163 * that the following for loop:
164 * for (i = 0; i < (5 * loadavg); i++)
165 * p_estcpu *= decay;
166 * will compute
167 * p_estcpu *= 0.1;
168 * for all values of loadavg:
169 *
170 * Mathematically this loop can be expressed by saying:
171 * decay ** (5 * loadavg) ~= .1
172 *
173 * The system computes decay as:
174 * decay = (2 * loadavg) / (2 * loadavg + 1)
175 *
176 * We wish to prove that the system's computation of decay
177 * will always fulfill the equation:
178 * decay ** (5 * loadavg) ~= .1
179 *
180 * If we compute b as:
181 * b = 2 * loadavg
182 * then
183 * decay = b / (b + 1)
184 *
185 * We now need to prove two things:
186 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
187 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
188 *
189 * Facts:
190 * For x close to zero, exp(x) =~ 1 + x, since
191 * exp(x) = 0! + x**1/1! + x**2/2! + ... .
192 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
193 * For x close to zero, ln(1+x) =~ x, since
194 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1
195 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
196 * ln(.1) =~ -2.30
197 *
198 * Proof of (1):
199 * Solve (factor)**(power) =~ .1 given power (5*loadav):
200 * solving for factor,
201 * ln(factor) =~ (-2.30/5*loadav), or
202 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
203 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED
204 *
205 * Proof of (2):
206 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
207 * solving for power,
208 * power*ln(b/(b+1)) =~ -2.30, or
209 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED
210 *
211 * Actual power values for the implemented algorithm are as follows:
212 * loadav: 1 2 3 4
213 * power: 5.68 10.32 14.94 19.55
214 */
215
216 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
217 #define loadfactor(loadav) (2 * (loadav))
218 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
219
220 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
221 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
222
223 /*
224 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
225 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
226 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
227 *
228 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
229 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
230 *
231 * If you dont want to bother with the faster/more-accurate formula, you
232 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
233 * (more general) method of calculating the %age of CPU used by a process.
234 */
235 #define CCPU_SHIFT 11
236
237 /*
238 * Recompute process priorities, every hz ticks.
239 */
240 /* ARGSUSED */
241 void
242 schedcpu(void *arg)
243 {
244 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
245 struct proc *p;
246 int s, s1;
247 unsigned int newcpu;
248 int clkhz;
249
250 proclist_lock_read();
251 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
252 /*
253 * Increment time in/out of memory and sleep time
254 * (if sleeping). We ignore overflow; with 16-bit int's
255 * (remember them?) overflow takes 45 days.
256 */
257 p->p_swtime++;
258 if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
259 p->p_slptime++;
260 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
261 /*
262 * If the process has slept the entire second,
263 * stop recalculating its priority until it wakes up.
264 */
265 if (p->p_slptime > 1)
266 continue;
267 s = splstatclock(); /* prevent state changes */
268 /*
269 * p_pctcpu is only for ps.
270 */
271 clkhz = stathz != 0 ? stathz : hz;
272 #if (FSHIFT >= CCPU_SHIFT)
273 p->p_pctcpu += (clkhz == 100)?
274 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
275 100 * (((fixpt_t) p->p_cpticks)
276 << (FSHIFT - CCPU_SHIFT)) / clkhz;
277 #else
278 p->p_pctcpu += ((FSCALE - ccpu) *
279 (p->p_cpticks * FSCALE / clkhz)) >> FSHIFT;
280 #endif
281 p->p_cpticks = 0;
282 newcpu = (u_int)decay_cpu(loadfac, p->p_estcpu);
283 p->p_estcpu = newcpu;
284 SCHED_LOCK(s1);
285 resetpriority(p);
286 if (p->p_priority >= PUSER) {
287 if (p->p_stat == SRUN &&
288 (p->p_flag & P_INMEM) &&
289 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) {
290 remrunqueue(p);
291 p->p_priority = p->p_usrpri;
292 setrunqueue(p);
293 } else
294 p->p_priority = p->p_usrpri;
295 }
296 SCHED_UNLOCK(s1);
297 splx(s);
298 }
299 proclist_unlock_read();
300 uvm_meter();
301 wakeup((caddr_t)&lbolt);
302 callout_reset(&schedcpu_ch, hz, schedcpu, NULL);
303 }
304
305 /*
306 * Recalculate the priority of a process after it has slept for a while.
307 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
308 * least six times the loadfactor will decay p_estcpu to zero.
309 */
310 void
311 updatepri(struct proc *p)
312 {
313 unsigned int newcpu;
314 fixpt_t loadfac;
315
316 SCHED_ASSERT_LOCKED();
317
318 newcpu = p->p_estcpu;
319 loadfac = loadfactor(averunnable.ldavg[0]);
320
321 if (p->p_slptime > 5 * loadfac)
322 p->p_estcpu = 0;
323 else {
324 p->p_slptime--; /* the first time was done in schedcpu */
325 while (newcpu && --p->p_slptime)
326 newcpu = (int) decay_cpu(loadfac, newcpu);
327 p->p_estcpu = newcpu;
328 }
329 resetpriority(p);
330 }
331
332 #if defined(MULTIPROCESSOR)
333 /*
334 * XXX Need to tweak lockmgr() for this!
335 */
336 static int
337 kernel_lock_release_all(void)
338 {
339 int count, s;
340
341 s = splsched();
342 count = kernel_lock.lk_exclusivecount;
343 KDASSERT(count > 0);
344 kernel_lock.lk_exclusivecount = 1;
345 kernel_lock.lk_recurselevel = 1;
346 spinlockmgr(&kernel_lock, LK_RELEASE, 0);
347 splx(s);
348
349 return (count);
350 }
351
352 static void
353 kernel_lock_acquire_count(int flags, int count)
354 {
355 int s;
356
357 s = splsched();
358 KDASSERT(count > 0);
359 spinlockmgr(&kernel_lock, flags, 0);
360 KDASSERT(kernel_lock.lk_exclusivecount == 1);
361 kernel_lock.lk_exclusivecount = count;
362 splx(s);
363 }
364 #endif /* MULTIPROCESSOR */
365
366 /*
367 * During autoconfiguration or after a panic, a sleep will simply
368 * lower the priority briefly to allow interrupts, then return.
369 * The priority to be used (safepri) is machine-dependent, thus this
370 * value is initialized and maintained in the machine-dependent layers.
371 * This priority will typically be 0, or the lowest priority
372 * that is safe for use on the interrupt stack; it can be made
373 * higher to block network software interrupts after panics.
374 */
375 int safepri;
376
377 /*
378 * General sleep call. Suspends the current process until a wakeup is
379 * performed on the specified identifier. The process will then be made
380 * runnable with the specified priority. Sleeps at most timo/hz seconds
381 * (0 means no timeout). If pri includes PCATCH flag, signals are checked
382 * before and after sleeping, else signals are not checked. Returns 0 if
383 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a
384 * signal needs to be delivered, ERESTART is returned if the current system
385 * call should be restarted if possible, and EINTR is returned if the system
386 * call should be interrupted by the signal (return EINTR).
387 *
388 * The interlock is held until the scheduler_slock is held. The
389 * interlock will be locked before returning back to the caller
390 * unless the PNORELOCK flag is specified, in which case the
391 * interlock will always be unlocked upon return.
392 */
393 int
394 ltsleep(void *ident, int priority, const char *wmesg, int timo,
395 __volatile struct simplelock *interlock)
396 {
397 struct proc *p = curproc;
398 struct slpque *qp;
399 int sig, s;
400 int catch = priority & PCATCH;
401 int relock = (priority & PNORELOCK) == 0;
402
403 /*
404 * XXXSMP
405 * This is probably bogus. Figure out what the right
406 * thing to do here really is.
407 * Note that not sleeping if ltsleep is called with curproc == NULL
408 * in the shutdown case is disgusting but partly necessary given
409 * how shutdown (barely) works.
410 */
411 if (cold || (doing_shutdown && (panicstr || (p == NULL)))) {
412 /*
413 * After a panic, or during autoconfiguration,
414 * just give interrupts a chance, then just return;
415 * don't run any other procs or panic below,
416 * in case this is the idle process and already asleep.
417 */
418 s = splhigh();
419 splx(safepri);
420 splx(s);
421 if (interlock != NULL && relock == 0)
422 simple_unlock(interlock);
423 return (0);
424 }
425
426
427 #ifdef KTRACE
428 if (KTRPOINT(p, KTR_CSW))
429 ktrcsw(p, 1, 0);
430 #endif
431
432 SCHED_LOCK(s);
433
434 #ifdef DIAGNOSTIC
435 if (ident == NULL)
436 panic("ltsleep: ident == NULL");
437 if (p->p_stat != SONPROC)
438 panic("ltsleep: p_stat %d != SONPROC", p->p_stat);
439 if (p->p_back != NULL)
440 panic("ltsleep: p_back != NULL");
441 #endif
442
443 p->p_wchan = ident;
444 p->p_wmesg = wmesg;
445 p->p_slptime = 0;
446 p->p_priority = priority & PRIMASK;
447
448 qp = SLPQUE(ident);
449 if (qp->sq_head == 0)
450 qp->sq_head = p;
451 else
452 *qp->sq_tailp = p;
453 *(qp->sq_tailp = &p->p_forw) = 0;
454
455 if (timo)
456 callout_reset(&p->p_tsleep_ch, timo, endtsleep, p);
457
458 /*
459 * We can now release the interlock; the scheduler_slock
460 * is held, so a thread can't get in to do wakeup() before
461 * we do the switch.
462 *
463 * XXX We leave the code block here, after inserting ourselves
464 * on the sleep queue, because we might want a more clever
465 * data structure for the sleep queues at some point.
466 */
467 if (interlock != NULL)
468 simple_unlock(interlock);
469
470 /*
471 * We put ourselves on the sleep queue and start our timeout
472 * before calling CURSIG, as we could stop there, and a wakeup
473 * or a SIGCONT (or both) could occur while we were stopped.
474 * A SIGCONT would cause us to be marked as SSLEEP
475 * without resuming us, thus we must be ready for sleep
476 * when CURSIG is called. If the wakeup happens while we're
477 * stopped, p->p_wchan will be 0 upon return from CURSIG.
478 */
479 if (catch) {
480 p->p_flag |= P_SINTR;
481 if ((sig = CURSIG(p)) != 0) {
482 if (p->p_wchan != NULL)
483 unsleep(p);
484 p->p_stat = SONPROC;
485 SCHED_UNLOCK(s);
486 goto resume;
487 }
488 if (p->p_wchan == NULL) {
489 catch = 0;
490 SCHED_UNLOCK(s);
491 goto resume;
492 }
493 } else
494 sig = 0;
495 p->p_stat = SSLEEP;
496 p->p_stats->p_ru.ru_nvcsw++;
497
498 SCHED_ASSERT_LOCKED();
499 mi_switch(p);
500
501 #ifdef DDB
502 /* handy breakpoint location after process "wakes" */
503 asm(".globl bpendtsleep ; bpendtsleep:");
504 #endif
505
506 SCHED_ASSERT_UNLOCKED();
507 splx(s);
508
509 resume:
510 KDASSERT(p->p_cpu != NULL);
511 KDASSERT(p->p_cpu == curcpu());
512 p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri;
513
514 p->p_flag &= ~P_SINTR;
515 if (p->p_flag & P_TIMEOUT) {
516 p->p_flag &= ~P_TIMEOUT;
517 if (sig == 0) {
518 #ifdef KTRACE
519 if (KTRPOINT(p, KTR_CSW))
520 ktrcsw(p, 0, 0);
521 #endif
522 if (relock && interlock != NULL)
523 simple_lock(interlock);
524 return (EWOULDBLOCK);
525 }
526 } else if (timo)
527 callout_stop(&p->p_tsleep_ch);
528 if (catch && (sig != 0 || (sig = CURSIG(p)) != 0)) {
529 #ifdef KTRACE
530 if (KTRPOINT(p, KTR_CSW))
531 ktrcsw(p, 0, 0);
532 #endif
533 if (relock && interlock != NULL)
534 simple_lock(interlock);
535 if ((p->p_sigacts->ps_sigact[sig].sa_flags & SA_RESTART) == 0)
536 return (EINTR);
537 return (ERESTART);
538 }
539 #ifdef KTRACE
540 if (KTRPOINT(p, KTR_CSW))
541 ktrcsw(p, 0, 0);
542 #endif
543 if (relock && interlock != NULL)
544 simple_lock(interlock);
545 return (0);
546 }
547
548 /*
549 * Implement timeout for tsleep.
550 * If process hasn't been awakened (wchan non-zero),
551 * set timeout flag and undo the sleep. If proc
552 * is stopped, just unsleep so it will remain stopped.
553 */
554 void
555 endtsleep(void *arg)
556 {
557 struct proc *p;
558 int s;
559
560 p = (struct proc *)arg;
561
562 SCHED_LOCK(s);
563 if (p->p_wchan) {
564 if (p->p_stat == SSLEEP)
565 setrunnable(p);
566 else
567 unsleep(p);
568 p->p_flag |= P_TIMEOUT;
569 }
570 SCHED_UNLOCK(s);
571 }
572
573 /*
574 * Remove a process from its wait queue
575 */
576 void
577 unsleep(struct proc *p)
578 {
579 struct slpque *qp;
580 struct proc **hp;
581
582 SCHED_ASSERT_LOCKED();
583
584 if (p->p_wchan) {
585 hp = &(qp = SLPQUE(p->p_wchan))->sq_head;
586 while (*hp != p)
587 hp = &(*hp)->p_forw;
588 *hp = p->p_forw;
589 if (qp->sq_tailp == &p->p_forw)
590 qp->sq_tailp = hp;
591 p->p_wchan = 0;
592 }
593 }
594
595 /*
596 * Optimized-for-wakeup() version of setrunnable().
597 */
598 __inline void
599 awaken(struct proc *p)
600 {
601
602 SCHED_ASSERT_LOCKED();
603
604 if (p->p_slptime > 1)
605 updatepri(p);
606 p->p_slptime = 0;
607 p->p_stat = SRUN;
608
609 /*
610 * Since curpriority is a user priority, p->p_priority
611 * is always better than curpriority.
612 */
613 if (p->p_flag & P_INMEM) {
614 setrunqueue(p);
615 KASSERT(p->p_cpu != NULL);
616 need_resched(p->p_cpu);
617 } else
618 sched_wakeup(&proc0);
619 }
620
621 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
622 void
623 sched_unlock_idle(void)
624 {
625
626 simple_unlock(&sched_lock);
627 }
628
629 void
630 sched_lock_idle(void)
631 {
632
633 simple_lock(&sched_lock);
634 }
635 #endif /* MULTIPROCESSOR || LOCKDEBUG */
636
637 /*
638 * Make all processes sleeping on the specified identifier runnable.
639 */
640
641 void
642 wakeup(void *ident)
643 {
644 int s;
645
646 SCHED_ASSERT_UNLOCKED();
647
648 SCHED_LOCK(s);
649 sched_wakeup(ident);
650 SCHED_UNLOCK(s);
651 }
652
653 void
654 sched_wakeup(void *ident)
655 {
656 struct slpque *qp;
657 struct proc *p, **q;
658
659 SCHED_ASSERT_LOCKED();
660
661 qp = SLPQUE(ident);
662 restart:
663 for (q = &qp->sq_head; (p = *q) != NULL; ) {
664 #ifdef DIAGNOSTIC
665 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP))
666 panic("wakeup");
667 #endif
668 if (p->p_wchan == ident) {
669 p->p_wchan = 0;
670 *q = p->p_forw;
671 if (qp->sq_tailp == &p->p_forw)
672 qp->sq_tailp = q;
673 if (p->p_stat == SSLEEP) {
674 awaken(p);
675 goto restart;
676 }
677 } else
678 q = &p->p_forw;
679 }
680 }
681
682 /*
683 * Make the highest priority process first in line on the specified
684 * identifier runnable.
685 */
686 void
687 wakeup_one(void *ident)
688 {
689 struct slpque *qp;
690 struct proc *p, **q;
691 struct proc *best_sleepp, **best_sleepq;
692 struct proc *best_stopp, **best_stopq;
693 int s;
694
695 best_sleepp = best_stopp = NULL;
696 best_sleepq = best_stopq = NULL;
697
698 SCHED_LOCK(s);
699
700 qp = SLPQUE(ident);
701
702 for (q = &qp->sq_head; (p = *q) != NULL; q = &p->p_forw) {
703 #ifdef DIAGNOSTIC
704 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP))
705 panic("wakeup_one");
706 #endif
707 if (p->p_wchan == ident) {
708 if (p->p_stat == SSLEEP) {
709 if (best_sleepp == NULL ||
710 p->p_priority < best_sleepp->p_priority) {
711 best_sleepp = p;
712 best_sleepq = q;
713 }
714 } else {
715 if (best_stopp == NULL ||
716 p->p_priority < best_stopp->p_priority) {
717 best_stopp = p;
718 best_stopq = q;
719 }
720 }
721 }
722 }
723
724 /*
725 * Consider any SSLEEP process higher than the highest priority SSTOP
726 * process.
727 */
728 if (best_sleepp != NULL) {
729 p = best_sleepp;
730 q = best_sleepq;
731 } else {
732 p = best_stopp;
733 q = best_stopq;
734 }
735
736 if (p != NULL) {
737 p->p_wchan = NULL;
738 *q = p->p_forw;
739 if (qp->sq_tailp == &p->p_forw)
740 qp->sq_tailp = q;
741 if (p->p_stat == SSLEEP)
742 awaken(p);
743 }
744 SCHED_UNLOCK(s);
745 }
746
747 /*
748 * General yield call. Puts the current process back on its run queue and
749 * performs a voluntary context switch.
750 */
751 void
752 yield(void)
753 {
754 struct proc *p = curproc;
755 int s;
756
757 SCHED_LOCK(s);
758 p->p_priority = p->p_usrpri;
759 p->p_stat = SRUN;
760 setrunqueue(p);
761 p->p_stats->p_ru.ru_nvcsw++;
762 mi_switch(p);
763 SCHED_ASSERT_UNLOCKED();
764 splx(s);
765 }
766
767 /*
768 * General preemption call. Puts the current process back on its run queue
769 * and performs an involuntary context switch. If a process is supplied,
770 * we switch to that process. Otherwise, we use the normal process selection
771 * criteria.
772 */
773 void
774 preempt(struct proc *newp)
775 {
776 struct proc *p = curproc;
777 int s;
778
779 /*
780 * XXX Switching to a specific process is not supported yet.
781 */
782 if (newp != NULL)
783 panic("preempt: cpu_preempt not yet implemented");
784
785 SCHED_LOCK(s);
786 p->p_priority = p->p_usrpri;
787 p->p_stat = SRUN;
788 setrunqueue(p);
789 p->p_stats->p_ru.ru_nivcsw++;
790 mi_switch(p);
791 SCHED_ASSERT_UNLOCKED();
792 splx(s);
793 }
794
795 /*
796 * The machine independent parts of context switch.
797 * Must be called at splsched() (no higher!) and with
798 * the sched_lock held.
799 */
800 void
801 mi_switch(struct proc *p)
802 {
803 struct schedstate_percpu *spc;
804 struct rlimit *rlim;
805 long s, u;
806 struct timeval tv;
807 #if defined(MULTIPROCESSOR)
808 int hold_count;
809 #endif
810
811 SCHED_ASSERT_LOCKED();
812
813 #if defined(MULTIPROCESSOR)
814 if (p->p_flag & P_BIGLOCK) {
815 /*
816 * Release the kernel_lock, as we are about to
817 * yield the CPU. The scheduler_slock is still
818 * held until cpu_switch() selects a new process
819 * and removes it from the run queue.
820 */
821 hold_count = kernel_lock_release_all();
822 }
823 #endif
824
825 KDASSERT(p->p_cpu != NULL);
826 KDASSERT(p->p_cpu == curcpu());
827
828 spc = &p->p_cpu->ci_schedstate;
829
830 #if defined(LOCKDEBUG) || defined(DIAGNOSTIC)
831 spinlock_switchcheck();
832 #endif
833 #ifdef LOCKDEBUG
834 simple_lock_switchcheck();
835 #endif
836
837 /*
838 * Compute the amount of time during which the current
839 * process was running, and add that to its total so far.
840 */
841 microtime(&tv);
842 u = p->p_rtime.tv_usec + (tv.tv_usec - spc->spc_runtime.tv_usec);
843 s = p->p_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
844 if (u < 0) {
845 u += 1000000;
846 s--;
847 } else if (u >= 1000000) {
848 u -= 1000000;
849 s++;
850 }
851 p->p_rtime.tv_usec = u;
852 p->p_rtime.tv_sec = s;
853
854 /*
855 * Check if the process exceeds its cpu resource allocation.
856 * If over max, kill it. In any case, if it has run for more
857 * than 10 minutes, reduce priority to give others a chance.
858 */
859 rlim = &p->p_rlimit[RLIMIT_CPU];
860 if (s >= rlim->rlim_cur) {
861 if (s >= rlim->rlim_max)
862 psignal(p, SIGKILL);
863 else {
864 psignal(p, SIGXCPU);
865 if (rlim->rlim_cur < rlim->rlim_max)
866 rlim->rlim_cur += 5;
867 }
868 }
869 if (autonicetime && s > autonicetime && p->p_ucred->cr_uid &&
870 p->p_nice == NZERO) {
871 p->p_nice = autoniceval + NZERO;
872 resetpriority(p);
873 }
874
875 /*
876 * Process is about to yield the CPU; clear the appropriate
877 * scheduling flags.
878 */
879 spc->spc_flags &= ~SPCF_SWITCHCLEAR;
880
881 /*
882 * Pick a new current process and switch to it. When we
883 * run again, we'll return back here.
884 */
885 uvmexp.swtch++;
886 cpu_switch(p);
887
888 /*
889 * Make sure that MD code released the scheduler lock before
890 * resuming us.
891 */
892 SCHED_ASSERT_UNLOCKED();
893
894 /*
895 * We're running again; record our new start time. We might
896 * be running on a new CPU now, so don't use the cache'd
897 * schedstate_percpu pointer.
898 */
899 KDASSERT(p->p_cpu != NULL);
900 KDASSERT(p->p_cpu == curcpu());
901 microtime(&p->p_cpu->ci_schedstate.spc_runtime);
902
903 #if defined(MULTIPROCESSOR)
904 if (p->p_flag & P_BIGLOCK) {
905 /*
906 * Reacquire the kernel_lock now. We do this after
907 * we've released sched_lock to avoid deadlock,
908 * and before we reacquire the interlock.
909 */
910 kernel_lock_acquire_count(LK_EXCLUSIVE|LK_CANRECURSE,
911 hold_count);
912 }
913 #endif
914 }
915
916 /*
917 * Initialize the (doubly-linked) run queues
918 * to be empty.
919 */
920 void
921 rqinit()
922 {
923 int i;
924
925 for (i = 0; i < RUNQUE_NQS; i++)
926 sched_qs[i].ph_link = sched_qs[i].ph_rlink =
927 (struct proc *)&sched_qs[i];
928 }
929
930 /*
931 * Change process state to be runnable,
932 * placing it on the run queue if it is in memory,
933 * and awakening the swapper if it isn't in memory.
934 */
935 void
936 setrunnable(struct proc *p)
937 {
938
939 SCHED_ASSERT_LOCKED();
940
941 switch (p->p_stat) {
942 case 0:
943 case SRUN:
944 case SONPROC:
945 case SZOMB:
946 case SDEAD:
947 default:
948 panic("setrunnable");
949 case SSTOP:
950 /*
951 * If we're being traced (possibly because someone attached us
952 * while we were stopped), check for a signal from the debugger.
953 */
954 if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0) {
955 sigaddset(&p->p_siglist, p->p_xstat);
956 p->p_sigcheck = 1;
957 }
958 case SSLEEP:
959 unsleep(p); /* e.g. when sending signals */
960 break;
961
962 case SIDL:
963 break;
964 }
965 p->p_stat = SRUN;
966 if (p->p_flag & P_INMEM)
967 setrunqueue(p);
968
969 if (p->p_slptime > 1)
970 updatepri(p);
971 p->p_slptime = 0;
972 if ((p->p_flag & P_INMEM) == 0)
973 sched_wakeup((caddr_t)&proc0);
974 else if (p->p_priority < curcpu()->ci_schedstate.spc_curpriority) {
975 /*
976 * XXXSMP
977 * This is not exactly right. Since p->p_cpu persists
978 * across a context switch, this gives us some sort
979 * of processor affinity. But we need to figure out
980 * at what point it's better to reschedule on a different
981 * CPU than the last one.
982 */
983 need_resched((p->p_cpu != NULL) ? p->p_cpu : curcpu());
984 }
985 }
986
987 /*
988 * Compute the priority of a process when running in user mode.
989 * Arrange to reschedule if the resulting priority is better
990 * than that of the current process.
991 */
992 void
993 resetpriority(struct proc *p)
994 {
995 unsigned int newpriority;
996
997 SCHED_ASSERT_LOCKED();
998
999 newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO);
1000 newpriority = min(newpriority, MAXPRI);
1001 p->p_usrpri = newpriority;
1002 if (newpriority < curcpu()->ci_schedstate.spc_curpriority) {
1003 /*
1004 * XXXSMP
1005 * Same applies as in setrunnable() above.
1006 */
1007 need_resched((p->p_cpu != NULL) ? p->p_cpu : curcpu());
1008 }
1009 }
1010
1011 /*
1012 * We adjust the priority of the current process. The priority of a process
1013 * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu)
1014 * is increased here. The formula for computing priorities (in kern_synch.c)
1015 * will compute a different value each time p_estcpu increases. This can
1016 * cause a switch, but unless the priority crosses a PPQ boundary the actual
1017 * queue will not change. The cpu usage estimator ramps up quite quickly
1018 * when the process is running (linearly), and decays away exponentially, at
1019 * a rate which is proportionally slower when the system is busy. The basic
1020 * principle is that the system will 90% forget that the process used a lot
1021 * of CPU time in 5 * loadav seconds. This causes the system to favor
1022 * processes which haven't run much recently, and to round-robin among other
1023 * processes.
1024 */
1025
1026 void
1027 schedclock(struct proc *p)
1028 {
1029 int s;
1030
1031 p->p_estcpu = ESTCPULIM(p->p_estcpu + 1);
1032
1033 SCHED_LOCK(s);
1034 resetpriority(p);
1035 SCHED_UNLOCK(s);
1036
1037 if (p->p_priority >= PUSER)
1038 p->p_priority = p->p_usrpri;
1039 }
1040