kern_synch.c revision 1.186.2.9 1 /* $NetBSD: kern_synch.c,v 1.186.2.9 2007/07/01 21:43:40 ad Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and
10 * Daniel Sieger.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*-
42 * Copyright (c) 1982, 1986, 1990, 1991, 1993
43 * The Regents of the University of California. All rights reserved.
44 * (c) UNIX System Laboratories, Inc.
45 * All or some portions of this file are derived from material licensed
46 * to the University of California by American Telephone and Telegraph
47 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
48 * the permission of UNIX System Laboratories, Inc.
49 *
50 * Redistribution and use in source and binary forms, with or without
51 * modification, are permitted provided that the following conditions
52 * are met:
53 * 1. Redistributions of source code must retain the above copyright
54 * notice, this list of conditions and the following disclaimer.
55 * 2. Redistributions in binary form must reproduce the above copyright
56 * notice, this list of conditions and the following disclaimer in the
57 * documentation and/or other materials provided with the distribution.
58 * 3. Neither the name of the University nor the names of its contributors
59 * may be used to endorse or promote products derived from this software
60 * without specific prior written permission.
61 *
62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
72 * SUCH DAMAGE.
73 *
74 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95
75 */
76
77 #include <sys/cdefs.h>
78 __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.186.2.9 2007/07/01 21:43:40 ad Exp $");
79
80 #include "opt_kstack.h"
81 #include "opt_lockdebug.h"
82 #include "opt_multiprocessor.h"
83 #include "opt_perfctrs.h"
84
85 #define __MUTEX_PRIVATE
86
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/proc.h>
90 #include <sys/kernel.h>
91 #if defined(PERFCTRS)
92 #include <sys/pmc.h>
93 #endif
94 #include <sys/cpu.h>
95 #include <sys/resourcevar.h>
96 #include <sys/sched.h>
97 #include <sys/syscall_stats.h>
98 #include <sys/sleepq.h>
99 #include <sys/lockdebug.h>
100 #include <sys/evcnt.h>
101
102 #include <uvm/uvm_extern.h>
103
104 callout_t sched_pstats_ch;
105 unsigned int sched_pstats_ticks;
106
107 kcondvar_t lbolt; /* once a second sleep address */
108
109 static void sched_unsleep(struct lwp *);
110 static void sched_changepri(struct lwp *, pri_t);
111 static void sched_lendpri(struct lwp *, pri_t);
112
113 syncobj_t sleep_syncobj = {
114 SOBJ_SLEEPQ_SORTED,
115 sleepq_unsleep,
116 sleepq_changepri,
117 sleepq_lendpri,
118 syncobj_noowner,
119 };
120
121 syncobj_t sched_syncobj = {
122 SOBJ_SLEEPQ_SORTED,
123 sched_unsleep,
124 sched_changepri,
125 sched_lendpri,
126 syncobj_noowner,
127 };
128
129 /*
130 * During autoconfiguration or after a panic, a sleep will simply lower the
131 * priority briefly to allow interrupts, then return. The priority to be
132 * used (safepri) is machine-dependent, thus this value is initialized and
133 * maintained in the machine-dependent layers. This priority will typically
134 * be 0, or the lowest priority that is safe for use on the interrupt stack;
135 * it can be made higher to block network software interrupts after panics.
136 */
137 int safepri;
138
139 /*
140 * OBSOLETE INTERFACE
141 *
142 * General sleep call. Suspends the current process until a wakeup is
143 * performed on the specified identifier. The process will then be made
144 * runnable with the specified priority. Sleeps at most timo/hz seconds (0
145 * means no timeout). If pri includes PCATCH flag, signals are checked
146 * before and after sleeping, else signals are not checked. Returns 0 if
147 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a
148 * signal needs to be delivered, ERESTART is returned if the current system
149 * call should be restarted if possible, and EINTR is returned if the system
150 * call should be interrupted by the signal (return EINTR).
151 *
152 * The interlock is held until we are on a sleep queue. The interlock will
153 * be locked before returning back to the caller unless the PNORELOCK flag
154 * is specified, in which case the interlock will always be unlocked upon
155 * return.
156 */
157 int
158 ltsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
159 volatile struct simplelock *interlock)
160 {
161 struct lwp *l = curlwp;
162 sleepq_t *sq;
163 int error;
164
165 if (sleepq_dontsleep(l)) {
166 (void)sleepq_abort(NULL, 0);
167 if ((priority & PNORELOCK) != 0)
168 simple_unlock(interlock);
169 return 0;
170 }
171
172 sq = sleeptab_lookup(&sleeptab, ident);
173 sleepq_enter(sq, l);
174 sleepq_enqueue(sq, sched_kpri(l), ident, wmesg, &sleep_syncobj);
175
176 if (interlock != NULL) {
177 KASSERT(simple_lock_held(interlock));
178 simple_unlock(interlock);
179 }
180
181 error = sleepq_block(timo, priority & PCATCH);
182
183 if (interlock != NULL && (priority & PNORELOCK) == 0)
184 simple_lock(interlock);
185
186 return error;
187 }
188
189 int
190 mtsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
191 kmutex_t *mtx)
192 {
193 struct lwp *l = curlwp;
194 sleepq_t *sq;
195 int error;
196
197 if (sleepq_dontsleep(l)) {
198 (void)sleepq_abort(mtx, (priority & PNORELOCK) != 0);
199 return 0;
200 }
201
202 sq = sleeptab_lookup(&sleeptab, ident);
203 sleepq_enter(sq, l);
204 sleepq_enqueue(sq, sched_kpri(l), ident, wmesg, &sleep_syncobj);
205 mutex_exit(mtx);
206 error = sleepq_block(timo, priority & PCATCH);
207
208 if ((priority & PNORELOCK) == 0)
209 mutex_enter(mtx);
210
211 return error;
212 }
213
214 /*
215 * General sleep call for situations where a wake-up is not expected.
216 */
217 int
218 kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx)
219 {
220 struct lwp *l = curlwp;
221 sleepq_t *sq;
222 int error;
223
224 if (sleepq_dontsleep(l))
225 return sleepq_abort(NULL, 0);
226
227 if (mtx != NULL)
228 mutex_exit(mtx);
229 sq = sleeptab_lookup(&sleeptab, l);
230 sleepq_enter(sq, l);
231 sleepq_enqueue(sq, sched_kpri(l), l, wmesg, &sleep_syncobj);
232 error = sleepq_block(timo, intr);
233 if (mtx != NULL)
234 mutex_enter(mtx);
235
236 return error;
237 }
238
239 /*
240 * OBSOLETE INTERFACE
241 *
242 * Make all processes sleeping on the specified identifier runnable.
243 */
244 void
245 wakeup(wchan_t ident)
246 {
247 sleepq_t *sq;
248
249 if (cold)
250 return;
251
252 sq = sleeptab_lookup(&sleeptab, ident);
253 sleepq_wake(sq, ident, (u_int)-1);
254 }
255
256 /*
257 * OBSOLETE INTERFACE
258 *
259 * Make the highest priority process first in line on the specified
260 * identifier runnable.
261 */
262 void
263 wakeup_one(wchan_t ident)
264 {
265 sleepq_t *sq;
266
267 if (cold)
268 return;
269
270 sq = sleeptab_lookup(&sleeptab, ident);
271 sleepq_wake(sq, ident, 1);
272 }
273
274
275 /*
276 * General yield call. Puts the current process back on its run queue and
277 * performs a voluntary context switch. Should only be called when the
278 * current process explicitly requests it (eg sched_yield(2) in compat code).
279 */
280 void
281 yield(void)
282 {
283 struct lwp *l = curlwp;
284
285 KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
286 lwp_lock(l);
287 KASSERT(lwp_locked(l, &l->l_cpu->ci_schedstate.spc_lwplock));
288 KASSERT(l->l_stat == LSONPROC);
289 l->l_priority = l->l_usrpri;
290 (void)mi_switch(l);
291 KERNEL_LOCK(l->l_biglocks, l);
292 }
293
294 /*
295 * General preemption call. Puts the current process back on its run queue
296 * and performs an involuntary context switch.
297 */
298 void
299 preempt(void)
300 {
301 struct lwp *l = curlwp;
302
303 KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
304 lwp_lock(l);
305 KASSERT(lwp_locked(l, &l->l_cpu->ci_schedstate.spc_lwplock));
306 KASSERT(l->l_stat == LSONPROC);
307 l->l_priority = l->l_usrpri;
308 l->l_nivcsw++;
309 (void)mi_switch(l);
310 KERNEL_LOCK(l->l_biglocks, l);
311 }
312
313 /*
314 * Compute the amount of time during which the current lwp was running.
315 *
316 * - update l_rtime unless it's an idle lwp.
317 * - update spc_runtime for the next lwp.
318 */
319
320 static inline void
321 updatertime(struct lwp *l, struct schedstate_percpu *spc)
322 {
323 struct timeval tv;
324 long s, u;
325
326 if ((l->l_flag & LW_IDLE) != 0) {
327 microtime(&spc->spc_runtime);
328 return;
329 }
330
331 microtime(&tv);
332 u = l->l_rtime.tv_usec + (tv.tv_usec - spc->spc_runtime.tv_usec);
333 s = l->l_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
334 if (u < 0) {
335 u += 1000000;
336 s--;
337 } else if (u >= 1000000) {
338 u -= 1000000;
339 s++;
340 }
341 l->l_rtime.tv_usec = u;
342 l->l_rtime.tv_sec = s;
343
344 spc->spc_runtime = tv;
345 }
346
347 /*
348 * The machine independent parts of context switch.
349 *
350 * Returns 1 if another LWP was actually run.
351 */
352 int
353 mi_switch(struct lwp *l)
354 {
355 struct schedstate_percpu *spc;
356 struct lwp *newl;
357 int retval, oldspl;
358 bool returning;
359
360 KASSERT(lwp_locked(l, NULL));
361 LOCKDEBUG_BARRIER(l->l_mutex, 1);
362
363 #ifdef KSTACK_CHECK_MAGIC
364 kstack_check_magic(l);
365 #endif
366
367 /*
368 * It's safe to read the per CPU schedstate unlocked here, as all we
369 * are after is the run time and that's guarenteed to have been last
370 * updated by this CPU.
371 */
372 KDASSERT(l->l_cpu == curcpu());
373
374 /*
375 * Process is about to yield the CPU; clear the appropriate
376 * scheduling flags.
377 */
378 spc = &l->l_cpu->ci_schedstate;
379 if (l->l_pinned != NULL) {
380 extern struct evcnt softint_block;
381
382 returning = true;
383 newl = l->l_pinned;
384 l->l_pinned = NULL;
385 softint_block.ev_count++;
386 } else {
387 returning = false;
388 newl = NULL;
389
390 /* Count time spent in current system call */
391 SYSCALL_TIME_SLEEP(l);
392
393 /*
394 * XXXSMP If we are using h/w performance counters,
395 * save context.
396 */
397 #if PERFCTRS
398 if (PMC_ENABLED(l->l_proc)) {
399 pmc_save_context(l->l_proc);
400 }
401 #endif
402 spc->spc_flags &= ~SPCF_SWITCHCLEAR;
403 updatertime(l, spc);
404 }
405
406 /*
407 * If on the CPU and we have gotten this far, then we must yield.
408 */
409 mutex_spin_enter(spc->spc_mutex);
410 KASSERT(l->l_stat != LSRUN);
411 if (l->l_stat == LSONPROC) {
412 KASSERT(lwp_locked(l, &spc->spc_lwplock));
413 if ((l->l_flag & LW_IDLE) == 0) {
414 l->l_stat = LSRUN;
415 lwp_setlock(l, spc->spc_mutex);
416 sched_enqueue(l, true);
417 } else
418 l->l_stat = LSIDL;
419 }
420
421 /*
422 * Let sched_nextlwp() select the LWP to run the CPU next.
423 * If no LWP is runnable, switch to the idle LWP.
424 */
425 if (!returning) {
426 newl = sched_nextlwp();
427 if (newl) {
428 sched_dequeue(newl);
429 KASSERT(lwp_locked(newl, spc->spc_mutex));
430 newl->l_stat = LSONPROC;
431 newl->l_cpu = l->l_cpu;
432 newl->l_flag |= LW_RUNNING;
433 lwp_setlock(newl, &spc->spc_lwplock);
434 } else {
435 newl = l->l_cpu->ci_data.cpu_idlelwp;
436 newl->l_stat = LSONPROC;
437 newl->l_flag |= LW_RUNNING;
438 }
439 spc->spc_curpriority = newl->l_usrpri;
440 newl->l_priority = newl->l_usrpri;
441 cpu_did_resched();
442 }
443
444 if (l != newl) {
445 struct lwp *prevlwp;
446
447 /*
448 * If the old LWP has been moved to a run queue above,
449 * drop the general purpose LWP lock: it's now locked
450 * by the scheduler lock.
451 *
452 * Otherwise, drop the scheduler lock. We're done with
453 * the run queues for now.
454 */
455 if (l->l_mutex == spc->spc_mutex) {
456 mutex_spin_exit(&spc->spc_lwplock);
457 } else {
458 mutex_spin_exit(spc->spc_mutex);
459 }
460
461 /* Unlocked, but for statistics only. */
462 uvmexp.swtch++;
463
464 /*
465 * Save old VM context, unless a soft interrupt
466 * handler is blocking.
467 */
468 if (!returning)
469 pmap_deactivate(l);
470
471 /* Switch to the new LWP.. */
472 l->l_ncsw++;
473 l->l_flag &= ~LW_RUNNING;
474 oldspl = MUTEX_SPIN_OLDSPL(l->l_cpu);
475 prevlwp = cpu_switchto(l, newl, returning);
476
477 /*
478 * .. we have switched away and are now back so we must
479 * be the new curlwp. prevlwp is who we replaced.
480 */
481 curlwp = l;
482 if (prevlwp != NULL) {
483 curcpu()->ci_mtx_oldspl = oldspl;
484 lwp_unlock(prevlwp);
485 } else {
486 splx(oldspl);
487 }
488
489 /* Restore VM context. */
490 pmap_activate(l);
491 retval = 1;
492 } else {
493 /* Nothing to do - just unlock and return. */
494 mutex_spin_exit(spc->spc_mutex);
495 lwp_unlock(l);
496 retval = 0;
497 }
498
499 KASSERT(l == curlwp);
500 KASSERT(l->l_stat == LSONPROC);
501
502 /*
503 * XXXSMP If we are using h/w performance counters, restore context.
504 */
505 #if PERFCTRS
506 if (PMC_ENABLED(l->l_proc)) {
507 pmc_restore_context(l->l_proc);
508 }
509 #endif
510
511 /*
512 * We're running again; record our new start time. We might
513 * be running on a new CPU now, so don't use the cached
514 * schedstate_percpu pointer.
515 */
516 SYSCALL_TIME_WAKEUP(l);
517 KDASSERT(l->l_cpu == curcpu());
518 LOCKDEBUG_BARRIER(NULL, 1);
519
520 return retval;
521 }
522
523 /*
524 * Change process state to be runnable, placing it on the run queue if it is
525 * in memory, and awakening the swapper if it isn't in memory.
526 *
527 * Call with the process and LWP locked. Will return with the LWP unlocked.
528 */
529 void
530 setrunnable(struct lwp *l)
531 {
532 struct proc *p = l->l_proc;
533 sigset_t *ss;
534
535 KASSERT((l->l_flag & LW_IDLE) == 0);
536 KASSERT(mutex_owned(&p->p_smutex));
537 KASSERT(lwp_locked(l, NULL));
538
539 switch (l->l_stat) {
540 case LSSTOP:
541 /*
542 * If we're being traced (possibly because someone attached us
543 * while we were stopped), check for a signal from the debugger.
544 */
545 if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xstat != 0) {
546 if ((sigprop[p->p_xstat] & SA_TOLWP) != 0)
547 ss = &l->l_sigpend.sp_set;
548 else
549 ss = &p->p_sigpend.sp_set;
550 sigaddset(ss, p->p_xstat);
551 signotify(l);
552 }
553 p->p_nrlwps++;
554 break;
555 case LSSUSPENDED:
556 l->l_flag &= ~LW_WSUSPEND;
557 p->p_nrlwps++;
558 break;
559 case LSSLEEP:
560 KASSERT(l->l_wchan != NULL);
561 break;
562 default:
563 panic("setrunnable: lwp %p state was %d", l, l->l_stat);
564 }
565
566 /*
567 * If the LWP was sleeping interruptably, then it's OK to start it
568 * again. If not, mark it as still sleeping.
569 */
570 if (l->l_wchan != NULL) {
571 l->l_stat = LSSLEEP;
572 /* lwp_unsleep() will release the lock. */
573 lwp_unsleep(l);
574 return;
575 }
576
577 /*
578 * If the LWP is still on the CPU, mark it as LSONPROC. It may be
579 * about to call mi_switch(), in which case it will yield.
580 */
581 if ((l->l_flag & LW_RUNNING) != 0) {
582 l->l_stat = LSONPROC;
583 l->l_slptime = 0;
584 lwp_unlock(l);
585 return;
586 }
587
588 /*
589 * Set the LWP runnable. If it's swapped out, we need to wake the swapper
590 * to bring it back in. Otherwise, enter it into a run queue.
591 */
592 if (l->l_mutex != l->l_cpu->ci_schedstate.spc_mutex) {
593 spc_lock(l->l_cpu);
594 lwp_unlock_to(l, l->l_cpu->ci_schedstate.spc_mutex);
595 }
596
597 sched_setrunnable(l);
598 l->l_stat = LSRUN;
599 l->l_slptime = 0;
600
601 if (l->l_flag & LW_INMEM) {
602 sched_enqueue(l, false);
603 resched_cpu(l);
604 lwp_unlock(l);
605 } else {
606 lwp_unlock(l);
607 uvm_kick_scheduler();
608 }
609 }
610
611 /*
612 * suspendsched:
613 *
614 * Convert all non-L_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED.
615 */
616 void
617 suspendsched(void)
618 {
619 #ifdef MULTIPROCESSOR
620 CPU_INFO_ITERATOR cii;
621 struct cpu_info *ci;
622 #endif
623 struct lwp *l;
624 struct proc *p;
625
626 /*
627 * We do this by process in order not to violate the locking rules.
628 */
629 mutex_enter(&proclist_lock);
630 PROCLIST_FOREACH(p, &allproc) {
631 mutex_enter(&p->p_smutex);
632
633 if ((p->p_flag & PK_SYSTEM) != 0) {
634 mutex_exit(&p->p_smutex);
635 continue;
636 }
637
638 p->p_stat = SSTOP;
639
640 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
641 if (l == curlwp)
642 continue;
643
644 lwp_lock(l);
645
646 /*
647 * Set L_WREBOOT so that the LWP will suspend itself
648 * when it tries to return to user mode. We want to
649 * try and get to get as many LWPs as possible to
650 * the user / kernel boundary, so that they will
651 * release any locks that they hold.
652 */
653 l->l_flag |= (LW_WREBOOT | LW_WSUSPEND);
654
655 if (l->l_stat == LSSLEEP &&
656 (l->l_flag & LW_SINTR) != 0) {
657 /* setrunnable() will release the lock. */
658 setrunnable(l);
659 continue;
660 }
661
662 lwp_unlock(l);
663 }
664
665 mutex_exit(&p->p_smutex);
666 }
667 mutex_exit(&proclist_lock);
668
669 /*
670 * Kick all CPUs to make them preempt any LWPs running in user mode.
671 * They'll trap into the kernel and suspend themselves in userret().
672 */
673 #ifdef MULTIPROCESSOR
674 for (CPU_INFO_FOREACH(cii, ci))
675 cpu_need_resched(ci, 0);
676 #else
677 cpu_need_resched(curcpu(), 0);
678 #endif
679 }
680
681 /*
682 * sched_kpri:
683 *
684 * Scale a priority level to a kernel priority level, usually
685 * for an LWP that is about to sleep.
686 */
687 pri_t
688 sched_kpri(struct lwp *l)
689 {
690 pri_t pri;
691
692 /*
693 * Scale user priorities (0 -> 63) up to kernel priorities
694 * in the range (64 -> 95). This makes assumptions about
695 * the priority space and so should be kept in sync with
696 * param.h.
697 */
698 if ((pri = l->l_usrpri) >= PRI_KERNEL)
699 return pri;
700
701 return (pri >> 1) + PRI_KERNEL;
702 }
703
704 /*
705 * sched_unsleep:
706 *
707 * The is called when the LWP has not been awoken normally but instead
708 * interrupted: for example, if the sleep timed out. Because of this,
709 * it's not a valid action for running or idle LWPs.
710 */
711 static void
712 sched_unsleep(struct lwp *l)
713 {
714
715 lwp_unlock(l);
716 panic("sched_unsleep");
717 }
718
719 inline void
720 resched_cpu(struct lwp *l)
721 {
722 struct cpu_info *ci;
723 const pri_t pri = lwp_eprio(l);
724
725 /*
726 * XXXSMP
727 * Since l->l_cpu persists across a context switch,
728 * this gives us *very weak* processor affinity, in
729 * that we notify the CPU on which the process last
730 * ran that it should try to switch.
731 *
732 * This does not guarantee that the process will run on
733 * that processor next, because another processor might
734 * grab it the next time it performs a context switch.
735 *
736 * This also does not handle the case where its last
737 * CPU is running a higher-priority process, but every
738 * other CPU is running a lower-priority process. There
739 * are ways to handle this situation, but they're not
740 * currently very pretty, and we also need to weigh the
741 * cost of moving a process from one CPU to another.
742 */
743 ci = (l->l_cpu != NULL) ? l->l_cpu : curcpu();
744 if (pri < ci->ci_schedstate.spc_curpriority)
745 cpu_need_resched(ci, 0);
746 }
747
748 static void
749 sched_changepri(struct lwp *l, pri_t pri)
750 {
751
752 KASSERT(lwp_locked(l, NULL));
753
754 l->l_usrpri = pri;
755 if (l->l_priority >= PRI_KERNEL)
756 return;
757
758 if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
759 l->l_priority = pri;
760 return;
761 }
762
763 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
764
765 sched_dequeue(l);
766 l->l_priority = pri;
767 sched_enqueue(l, false);
768 resched_cpu(l);
769 }
770
771 static void
772 sched_lendpri(struct lwp *l, pri_t pri)
773 {
774
775 KASSERT(lwp_locked(l, NULL));
776
777 if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
778 l->l_inheritedprio = pri;
779 return;
780 }
781
782 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
783
784 sched_dequeue(l);
785 l->l_inheritedprio = pri;
786 sched_enqueue(l, false);
787 resched_cpu(l);
788 }
789
790 struct lwp *
791 syncobj_noowner(wchan_t wchan)
792 {
793
794 return NULL;
795 }
796
797
798 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
799 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
800
801 /*
802 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
803 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
804 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
805 *
806 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
807 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
808 *
809 * If you dont want to bother with the faster/more-accurate formula, you
810 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
811 * (more general) method of calculating the %age of CPU used by a process.
812 */
813 #define CCPU_SHIFT (FSHIFT + 1)
814
815 /*
816 * sched_pstats:
817 *
818 * Update process statistics and check CPU resource allocation.
819 * Call scheduler-specific hook to eventually adjust process/LWP
820 * priorities.
821 */
822 /* ARGSUSED */
823 void
824 sched_pstats(void *arg)
825 {
826 struct rlimit *rlim;
827 struct lwp *l;
828 struct proc *p;
829 int minslp, sig, clkhz;
830 long runtm;
831
832 sched_pstats_ticks++;
833
834 mutex_enter(&proclist_lock);
835 PROCLIST_FOREACH(p, &allproc) {
836 /*
837 * Increment time in/out of memory and sleep time (if
838 * sleeping). We ignore overflow; with 16-bit int's
839 * (remember them?) overflow takes 45 days.
840 */
841 minslp = 2;
842 mutex_enter(&p->p_smutex);
843 mutex_spin_enter(&p->p_stmutex);
844 runtm = p->p_rtime.tv_sec;
845 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
846 if ((l->l_flag & LW_IDLE) != 0)
847 continue;
848 lwp_lock(l);
849 runtm += l->l_rtime.tv_sec;
850 l->l_swtime++;
851 if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
852 l->l_stat == LSSUSPENDED) {
853 l->l_slptime++;
854 minslp = min(minslp, l->l_slptime);
855 } else
856 minslp = 0;
857 lwp_unlock(l);
858
859 /*
860 * p_pctcpu is only for ps.
861 */
862 l->l_pctcpu = (l->l_pctcpu * ccpu) >> FSHIFT;
863 if (l->l_slptime < 1) {
864 clkhz = stathz != 0 ? stathz : hz;
865 #if (FSHIFT >= CCPU_SHIFT)
866 l->l_pctcpu += (clkhz == 100) ?
867 ((fixpt_t)l->l_cpticks) <<
868 (FSHIFT - CCPU_SHIFT) :
869 100 * (((fixpt_t) p->p_cpticks)
870 << (FSHIFT - CCPU_SHIFT)) / clkhz;
871 #else
872 l->l_pctcpu += ((FSCALE - ccpu) *
873 (l->l_cpticks * FSCALE / clkhz)) >> FSHIFT;
874 #endif
875 l->l_cpticks = 0;
876 }
877 }
878 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
879 sched_pstats_hook(p, minslp);
880 mutex_spin_exit(&p->p_stmutex);
881
882 /*
883 * Check if the process exceeds its CPU resource allocation.
884 * If over max, kill it.
885 */
886 rlim = &p->p_rlimit[RLIMIT_CPU];
887 sig = 0;
888 if (runtm >= rlim->rlim_cur) {
889 if (runtm >= rlim->rlim_max)
890 sig = SIGKILL;
891 else {
892 sig = SIGXCPU;
893 if (rlim->rlim_cur < rlim->rlim_max)
894 rlim->rlim_cur += 5;
895 }
896 }
897 mutex_exit(&p->p_smutex);
898 if (sig) {
899 /* XXXAD */
900 mutex_enter(&proclist_mutex);
901 psignal(p, sig);
902 mutex_enter(&proclist_mutex);
903 }
904 }
905 mutex_exit(&proclist_lock);
906 uvm_meter();
907 cv_broadcast(&lbolt);
908 callout_schedule(&sched_pstats_ch, hz);
909 }
910
911 void
912 sched_init(void)
913 {
914
915 callout_init(&sched_pstats_ch, CALLOUT_MPSAFE);
916 callout_setfunc(&sched_pstats_ch, sched_pstats, NULL);
917 sched_setup();
918 sched_pstats(NULL);
919 }
920