kern_synch.c revision 1.177.2.27 1 /* $NetBSD: kern_synch.c,v 1.177.2.27 2007/04/19 04:19:44 ad Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and
10 * Daniel Sieger.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*-
42 * Copyright (c) 1982, 1986, 1990, 1991, 1993
43 * The Regents of the University of California. All rights reserved.
44 * (c) UNIX System Laboratories, Inc.
45 * All or some portions of this file are derived from material licensed
46 * to the University of California by American Telephone and Telegraph
47 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
48 * the permission of UNIX System Laboratories, Inc.
49 *
50 * Redistribution and use in source and binary forms, with or without
51 * modification, are permitted provided that the following conditions
52 * are met:
53 * 1. Redistributions of source code must retain the above copyright
54 * notice, this list of conditions and the following disclaimer.
55 * 2. Redistributions in binary form must reproduce the above copyright
56 * notice, this list of conditions and the following disclaimer in the
57 * documentation and/or other materials provided with the distribution.
58 * 3. Neither the name of the University nor the names of its contributors
59 * may be used to endorse or promote products derived from this software
60 * without specific prior written permission.
61 *
62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
72 * SUCH DAMAGE.
73 *
74 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95
75 */
76
77 #include <sys/cdefs.h>
78 __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.177.2.27 2007/04/19 04:19:44 ad Exp $");
79
80 #include "opt_kstack.h"
81 #include "opt_lockdebug.h"
82 #include "opt_multiprocessor.h"
83 #include "opt_perfctrs.h"
84
85 #define __MUTEX_PRIVATE
86
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/proc.h>
90 #include <sys/kernel.h>
91 #if defined(PERFCTRS)
92 #include <sys/pmc.h>
93 #endif
94 #include <sys/cpu.h>
95 #include <sys/resourcevar.h>
96 #include <sys/sched.h>
97 #include <sys/syscall_stats.h>
98 #include <sys/sleepq.h>
99 #include <sys/lockdebug.h>
100
101 #include <uvm/uvm_extern.h>
102
103 struct callout sched_pstats_ch = CALLOUT_INITIALIZER_SETFUNC(sched_pstats, NULL);
104 unsigned int sched_pstats_ticks;
105
106 int lbolt; /* once a second sleep address */
107
108 static void sched_unsleep(struct lwp *);
109 static void sched_changepri(struct lwp *, pri_t);
110 static void sched_lendpri(struct lwp *, pri_t);
111
112 syncobj_t sleep_syncobj = {
113 SOBJ_SLEEPQ_SORTED,
114 sleepq_unsleep,
115 sleepq_changepri,
116 sleepq_lendpri,
117 syncobj_noowner,
118 };
119
120 syncobj_t sched_syncobj = {
121 SOBJ_SLEEPQ_SORTED,
122 sched_unsleep,
123 sched_changepri,
124 sched_lendpri,
125 syncobj_noowner,
126 };
127
128 /*
129 * During autoconfiguration or after a panic, a sleep will simply lower the
130 * priority briefly to allow interrupts, then return. The priority to be
131 * used (safepri) is machine-dependent, thus this value is initialized and
132 * maintained in the machine-dependent layers. This priority will typically
133 * be 0, or the lowest priority that is safe for use on the interrupt stack;
134 * it can be made higher to block network software interrupts after panics.
135 */
136 int safepri;
137
138 /*
139 * OBSOLETE INTERFACE
140 *
141 * General sleep call. Suspends the current process until a wakeup is
142 * performed on the specified identifier. The process will then be made
143 * runnable with the specified priority. Sleeps at most timo/hz seconds (0
144 * means no timeout). If pri includes PCATCH flag, signals are checked
145 * before and after sleeping, else signals are not checked. Returns 0 if
146 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a
147 * signal needs to be delivered, ERESTART is returned if the current system
148 * call should be restarted if possible, and EINTR is returned if the system
149 * call should be interrupted by the signal (return EINTR).
150 *
151 * The interlock is held until we are on a sleep queue. The interlock will
152 * be locked before returning back to the caller unless the PNORELOCK flag
153 * is specified, in which case the interlock will always be unlocked upon
154 * return.
155 */
156 int
157 ltsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
158 volatile struct simplelock *interlock)
159 {
160 struct lwp *l = curlwp;
161 sleepq_t *sq;
162 int error;
163
164 if (sleepq_dontsleep(l)) {
165 (void)sleepq_abort(NULL, 0);
166 if ((priority & PNORELOCK) != 0)
167 simple_unlock(interlock);
168 return 0;
169 }
170
171 sq = sleeptab_lookup(&sleeptab, ident);
172 sleepq_enter(sq, l);
173 sleepq_enqueue(sq, priority & PRIMASK, ident, wmesg, &sleep_syncobj);
174
175 if (interlock != NULL) {
176 LOCK_ASSERT(simple_lock_held(interlock));
177 simple_unlock(interlock);
178 }
179
180 error = sleepq_block(timo, priority & PCATCH);
181
182 if (interlock != NULL && (priority & PNORELOCK) == 0)
183 simple_lock(interlock);
184
185 return error;
186 }
187
188 int
189 mtsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
190 kmutex_t *mtx)
191 {
192 struct lwp *l = curlwp;
193 sleepq_t *sq;
194 int error;
195
196 if (sleepq_dontsleep(l)) {
197 (void)sleepq_abort(mtx, (priority & PNORELOCK) != 0);
198 return 0;
199 }
200
201 sq = sleeptab_lookup(&sleeptab, ident);
202 sleepq_enter(sq, l);
203 sleepq_enqueue(sq, priority & PRIMASK, ident, wmesg, &sleep_syncobj);
204 mutex_exit(mtx);
205 error = sleepq_block(timo, priority & PCATCH);
206
207 if ((priority & PNORELOCK) == 0)
208 mutex_enter(mtx);
209
210 return error;
211 }
212
213 /*
214 * General sleep call for situations where a wake-up is not expected.
215 */
216 int
217 kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx)
218 {
219 struct lwp *l = curlwp;
220 sleepq_t *sq;
221 int error;
222
223 if (sleepq_dontsleep(l))
224 return sleepq_abort(NULL, 0);
225
226 if (mtx != NULL)
227 mutex_exit(mtx);
228 sq = sleeptab_lookup(&sleeptab, l);
229 sleepq_enter(sq, l);
230 sleepq_enqueue(sq, sched_kpri(l), l, wmesg, &sleep_syncobj);
231 error = sleepq_block(timo, intr);
232 if (mtx != NULL)
233 mutex_enter(mtx);
234
235 return error;
236 }
237
238 /*
239 * OBSOLETE INTERFACE
240 *
241 * Make all processes sleeping on the specified identifier runnable.
242 */
243 void
244 wakeup(wchan_t ident)
245 {
246 sleepq_t *sq;
247
248 if (cold)
249 return;
250
251 sq = sleeptab_lookup(&sleeptab, ident);
252 sleepq_wake(sq, ident, (u_int)-1);
253 }
254
255 /*
256 * OBSOLETE INTERFACE
257 *
258 * Make the highest priority process first in line on the specified
259 * identifier runnable.
260 */
261 void
262 wakeup_one(wchan_t ident)
263 {
264 sleepq_t *sq;
265
266 if (cold)
267 return;
268
269 sq = sleeptab_lookup(&sleeptab, ident);
270 sleepq_wake(sq, ident, 1);
271 }
272
273
274 /*
275 * General yield call. Puts the current process back on its run queue and
276 * performs a voluntary context switch. Should only be called when the
277 * current process explicitly requests it (eg sched_yield(2) in compat code).
278 */
279 void
280 yield(void)
281 {
282 struct lwp *l = curlwp;
283
284 KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
285 lwp_lock(l);
286 if (l->l_stat == LSONPROC) {
287 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
288 l->l_priority = l->l_usrpri;
289 }
290 l->l_nvcsw++;
291 (void)mi_switch(l);
292 KERNEL_LOCK(l->l_biglocks, l);
293 }
294
295 /*
296 * General preemption call. Puts the current process back on its run queue
297 * and performs an involuntary context switch.
298 */
299 void
300 preempt(void)
301 {
302 struct lwp *l = curlwp;
303
304 KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
305 lwp_lock(l);
306 if (l->l_stat == LSONPROC) {
307 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
308 l->l_priority = l->l_usrpri;
309 }
310 l->l_nivcsw++;
311 (void)mi_switch(l);
312 KERNEL_LOCK(l->l_biglocks, l);
313 }
314
315 /*
316 * Compute the amount of time during which the current lwp was running.
317 *
318 * - update l_rtime unless it's an idle lwp.
319 * - update spc_runtime for the next lwp.
320 */
321
322 static inline void
323 updatertime(struct lwp *l, struct schedstate_percpu *spc)
324 {
325 struct timeval tv;
326 long s, u;
327
328 if ((l->l_flag & LW_IDLE) != 0) {
329 microtime(&spc->spc_runtime);
330 return;
331 }
332
333 microtime(&tv);
334 u = l->l_rtime.tv_usec + (tv.tv_usec - spc->spc_runtime.tv_usec);
335 s = l->l_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
336 if (u < 0) {
337 u += 1000000;
338 s--;
339 } else if (u >= 1000000) {
340 u -= 1000000;
341 s++;
342 }
343 l->l_rtime.tv_usec = u;
344 l->l_rtime.tv_sec = s;
345
346 spc->spc_runtime = tv;
347 }
348
349 /*
350 * The machine independent parts of context switch.
351 *
352 * Returns 1 if another process was actually run.
353 */
354 int
355 mi_switch(struct lwp *l)
356 {
357 struct schedstate_percpu *spc;
358 struct lwp *newl;
359 int retval, oldspl;
360
361 LOCK_ASSERT(lwp_locked(l, NULL));
362
363 #ifdef LOCKDEBUG
364 spinlock_switchcheck();
365 simple_lock_switchcheck();
366 #endif
367 #ifdef KSTACK_CHECK_MAGIC
368 kstack_check_magic(l);
369 #endif
370
371 /*
372 * It's safe to read the per CPU schedstate unlocked here, as all we
373 * are after is the run time and that's guarenteed to have been last
374 * updated by this CPU.
375 */
376 KDASSERT(l->l_cpu == curcpu());
377 spc = &l->l_cpu->ci_schedstate;
378
379 /* Count time spent in current system call */
380 SYSCALL_TIME_SLEEP(l);
381
382 /*
383 * XXXSMP If we are using h/w performance counters, save context.
384 */
385 #if PERFCTRS
386 if (PMC_ENABLED(l->l_proc)) {
387 pmc_save_context(l->l_proc);
388 }
389 #endif
390
391 /*
392 * If on the CPU and we have gotten this far, then we must yield.
393 */
394 KASSERT(l->l_stat != LSRUN);
395 if (l->l_stat == LSONPROC) {
396 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
397 l->l_stat = LSRUN;
398 if ((l->l_flag & LW_IDLE) == 0) {
399 sched_enqueue(l, true);
400 }
401 }
402
403 /*
404 * Process is about to yield the CPU; clear the appropriate
405 * scheduling flags.
406 */
407 spc->spc_flags &= ~SPCF_SWITCHCLEAR;
408
409 LOCKDEBUG_BARRIER(l->l_mutex, 1);
410
411 /*
412 * Acquire the spc_mutex if necessary.
413 */
414 if (l->l_mutex != spc->spc_mutex) {
415 mutex_spin_enter(spc->spc_mutex);
416 }
417
418 /*
419 * Let sched_nextlwp() select the LWP to run the CPU next.
420 * If no LWP is runnable, switch to the idle LWP.
421 */
422 newl = sched_nextlwp();
423 if (newl) {
424 sched_dequeue(newl);
425 } else {
426 newl = l->l_cpu->ci_data.cpu_idlelwp;
427 KASSERT(newl != NULL);
428 }
429 KASSERT(lwp_locked(newl, spc->spc_mutex));
430 newl->l_stat = LSONPROC;
431 newl->l_cpu = l->l_cpu;
432 newl->l_flag |= LW_RUNNING;
433 cpu_did_resched();
434
435 if (l->l_mutex != spc->spc_mutex) {
436 mutex_spin_exit(spc->spc_mutex);
437 }
438
439 updatertime(l, spc);
440 if (l != newl) {
441 struct lwp *prevlwp;
442
443 /* Unlocked, but for statistics only. */
444 uvmexp.swtch++;
445
446 /* Save old VM context. */
447 pmap_deactivate(l);
448
449 /* Switch to the new LWP.. */
450 l->l_flag &= ~LW_RUNNING;
451 oldspl = MUTEX_SPIN_OLDSPL(l->l_cpu);
452 prevlwp = cpu_switchto(l, newl);
453
454 /*
455 * .. we have switched away and are now back so we must
456 * be the new curlwp. prevlwp is who we replaced.
457 */
458 curlwp = l;
459 if (prevlwp != NULL) {
460 curcpu()->ci_mtx_oldspl = oldspl;
461 lwp_unlock(prevlwp);
462 } else {
463 splx(oldspl);
464 }
465
466 /* Restore VM context. */
467 pmap_activate(l);
468 retval = 1;
469 } else {
470 /* Nothing to do - just unlock and return. */
471 lwp_unlock(l);
472 retval = 0;
473 }
474
475 KASSERT(l == curlwp);
476 KASSERT(l->l_stat == LSONPROC);
477
478 /*
479 * XXXSMP If we are using h/w performance counters, restore context.
480 */
481 #if PERFCTRS
482 if (PMC_ENABLED(l->l_proc)) {
483 pmc_restore_context(l->l_proc);
484 }
485 #endif
486
487 /*
488 * We're running again; record our new start time. We might
489 * be running on a new CPU now, so don't use the cached
490 * schedstate_percpu pointer.
491 */
492 SYSCALL_TIME_WAKEUP(l);
493 KDASSERT(l->l_cpu == curcpu());
494 LOCKDEBUG_BARRIER(NULL, 1);
495
496 return retval;
497 }
498
499 /*
500 * Change process state to be runnable, placing it on the run queue if it is
501 * in memory, and awakening the swapper if it isn't in memory.
502 *
503 * Call with the process and LWP locked. Will return with the LWP unlocked.
504 */
505 void
506 setrunnable(struct lwp *l)
507 {
508 struct proc *p = l->l_proc;
509 sigset_t *ss;
510
511 KASSERT((l->l_flag & LW_IDLE) == 0);
512 KASSERT(mutex_owned(&p->p_smutex));
513 KASSERT(lwp_locked(l, NULL));
514
515 switch (l->l_stat) {
516 case LSSTOP:
517 /*
518 * If we're being traced (possibly because someone attached us
519 * while we were stopped), check for a signal from the debugger.
520 */
521 if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xstat != 0) {
522 if ((sigprop[p->p_xstat] & SA_TOLWP) != 0)
523 ss = &l->l_sigpend.sp_set;
524 else
525 ss = &p->p_sigpend.sp_set;
526 sigaddset(ss, p->p_xstat);
527 signotify(l);
528 }
529 p->p_nrlwps++;
530 break;
531 case LSSUSPENDED:
532 l->l_flag &= ~LW_WSUSPEND;
533 p->p_nrlwps++;
534 break;
535 case LSSLEEP:
536 KASSERT(l->l_wchan != NULL);
537 break;
538 default:
539 panic("setrunnable: lwp %p state was %d", l, l->l_stat);
540 }
541
542 /*
543 * If the LWP was sleeping interruptably, then it's OK to start it
544 * again. If not, mark it as still sleeping.
545 */
546 if (l->l_wchan != NULL) {
547 l->l_stat = LSSLEEP;
548 /* lwp_unsleep() will release the lock. */
549 lwp_unsleep(l);
550 return;
551 }
552
553 LOCK_ASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
554
555 /*
556 * If the LWP is still on the CPU, mark it as LSONPROC. It may be
557 * about to call mi_switch(), in which case it will yield.
558 */
559 if ((l->l_flag & LW_RUNNING) != 0) {
560 l->l_stat = LSONPROC;
561 l->l_slptime = 0;
562 lwp_unlock(l);
563 return;
564 }
565
566 /*
567 * Set the LWP runnable. If it's swapped out, we need to wake the swapper
568 * to bring it back in. Otherwise, enter it into a run queue.
569 */
570 sched_setrunnable(l);
571 l->l_stat = LSRUN;
572 l->l_slptime = 0;
573
574 if (l->l_flag & LW_INMEM) {
575 sched_enqueue(l, false);
576 resched_cpu(l);
577 lwp_unlock(l);
578 } else {
579 lwp_unlock(l);
580 uvm_kick_scheduler();
581 }
582 }
583
584 /*
585 * suspendsched:
586 *
587 * Convert all non-L_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED.
588 */
589 void
590 suspendsched(void)
591 {
592 #ifdef MULTIPROCESSOR
593 CPU_INFO_ITERATOR cii;
594 struct cpu_info *ci;
595 #endif
596 struct lwp *l;
597 struct proc *p;
598
599 /*
600 * We do this by process in order not to violate the locking rules.
601 */
602 mutex_enter(&proclist_mutex);
603 PROCLIST_FOREACH(p, &allproc) {
604 mutex_enter(&p->p_smutex);
605
606 if ((p->p_flag & PK_SYSTEM) != 0) {
607 mutex_exit(&p->p_smutex);
608 continue;
609 }
610
611 p->p_stat = SSTOP;
612
613 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
614 if (l == curlwp)
615 continue;
616
617 lwp_lock(l);
618
619 /*
620 * Set L_WREBOOT so that the LWP will suspend itself
621 * when it tries to return to user mode. We want to
622 * try and get to get as many LWPs as possible to
623 * the user / kernel boundary, so that they will
624 * release any locks that they hold.
625 */
626 l->l_flag |= (LW_WREBOOT | LW_WSUSPEND);
627
628 if (l->l_stat == LSSLEEP &&
629 (l->l_flag & LW_SINTR) != 0) {
630 /* setrunnable() will release the lock. */
631 setrunnable(l);
632 continue;
633 }
634
635 lwp_unlock(l);
636 }
637
638 mutex_exit(&p->p_smutex);
639 }
640 mutex_exit(&proclist_mutex);
641
642 /*
643 * Kick all CPUs to make them preempt any LWPs running in user mode.
644 * They'll trap into the kernel and suspend themselves in userret().
645 */
646 #ifdef MULTIPROCESSOR
647 for (CPU_INFO_FOREACH(cii, ci))
648 cpu_need_resched(ci, 0);
649 #else
650 cpu_need_resched(curcpu(), 0);
651 #endif
652 }
653
654 /*
655 * sched_kpri:
656 *
657 * Scale a priority level to a kernel priority level, usually
658 * for an LWP that is about to sleep.
659 */
660 pri_t
661 sched_kpri(struct lwp *l)
662 {
663 /*
664 * Scale user priorities (127 -> 50) up to kernel priorities
665 * in the range (49 -> 8). Reserve the top 8 kernel priorities
666 * for high priority kthreads. Kernel priorities passed in
667 * are left "as is". XXX This is somewhat arbitrary.
668 */
669 static const uint8_t kpri_tab[] = {
670 0, 1, 2, 3, 4, 5, 6, 7,
671 8, 9, 10, 11, 12, 13, 14, 15,
672 16, 17, 18, 19, 20, 21, 22, 23,
673 24, 25, 26, 27, 28, 29, 30, 31,
674 32, 33, 34, 35, 36, 37, 38, 39,
675 40, 41, 42, 43, 44, 45, 46, 47,
676 48, 49, 8, 8, 9, 9, 10, 10,
677 11, 11, 12, 12, 13, 14, 14, 15,
678 15, 16, 16, 17, 17, 18, 18, 19,
679 20, 20, 21, 21, 22, 22, 23, 23,
680 24, 24, 25, 26, 26, 27, 27, 28,
681 28, 29, 29, 30, 30, 31, 32, 32,
682 33, 33, 34, 34, 35, 35, 36, 36,
683 37, 38, 38, 39, 39, 40, 40, 41,
684 41, 42, 42, 43, 44, 44, 45, 45,
685 46, 46, 47, 47, 48, 48, 49, 49,
686 };
687
688 return (pri_t)kpri_tab[l->l_usrpri];
689 }
690
691 /*
692 * sched_unsleep:
693 *
694 * The is called when the LWP has not been awoken normally but instead
695 * interrupted: for example, if the sleep timed out. Because of this,
696 * it's not a valid action for running or idle LWPs.
697 */
698 static void
699 sched_unsleep(struct lwp *l)
700 {
701
702 lwp_unlock(l);
703 panic("sched_unsleep");
704 }
705
706 inline void
707 resched_cpu(struct lwp *l)
708 {
709 struct cpu_info *ci;
710 const pri_t pri = lwp_eprio(l);
711
712 /*
713 * XXXSMP
714 * Since l->l_cpu persists across a context switch,
715 * this gives us *very weak* processor affinity, in
716 * that we notify the CPU on which the process last
717 * ran that it should try to switch.
718 *
719 * This does not guarantee that the process will run on
720 * that processor next, because another processor might
721 * grab it the next time it performs a context switch.
722 *
723 * This also does not handle the case where its last
724 * CPU is running a higher-priority process, but every
725 * other CPU is running a lower-priority process. There
726 * are ways to handle this situation, but they're not
727 * currently very pretty, and we also need to weigh the
728 * cost of moving a process from one CPU to another.
729 */
730 ci = (l->l_cpu != NULL) ? l->l_cpu : curcpu();
731 if (pri < ci->ci_schedstate.spc_curpriority)
732 cpu_need_resched(ci, 0);
733 }
734
735 static void
736 sched_changepri(struct lwp *l, pri_t pri)
737 {
738
739 LOCK_ASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
740
741 l->l_usrpri = pri;
742 if (l->l_priority < PUSER)
743 return;
744
745 if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
746 l->l_priority = pri;
747 return;
748 }
749
750 sched_dequeue(l);
751 l->l_priority = pri;
752 sched_enqueue(l, false);
753 resched_cpu(l);
754 }
755
756 static void
757 sched_lendpri(struct lwp *l, pri_t pri)
758 {
759
760 LOCK_ASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
761
762 if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
763 l->l_inheritedprio = pri;
764 return;
765 }
766
767 sched_dequeue(l);
768 l->l_inheritedprio = pri;
769 sched_enqueue(l, false);
770 resched_cpu(l);
771 }
772
773 struct lwp *
774 syncobj_noowner(wchan_t wchan)
775 {
776
777 return NULL;
778 }
779
780
781 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
782 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
783
784 /*
785 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
786 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
787 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
788 *
789 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
790 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
791 *
792 * If you dont want to bother with the faster/more-accurate formula, you
793 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
794 * (more general) method of calculating the %age of CPU used by a process.
795 */
796 #define CCPU_SHIFT (FSHIFT + 1)
797
798 /*
799 * sched_pstats:
800 *
801 * Update process statistics and check CPU resource allocation.
802 * Call scheduler-specific hook to eventually adjust process/LWP
803 * priorities.
804 *
805 * XXXSMP This needs to be reorganised in order to reduce the locking
806 * burden.
807 */
808 /* ARGSUSED */
809 void
810 sched_pstats(void *arg)
811 {
812 struct rlimit *rlim;
813 struct lwp *l;
814 struct proc *p;
815 int minslp, sig, clkhz;
816 long runtm;
817
818 sched_pstats_ticks++;
819
820 mutex_enter(&proclist_mutex);
821 PROCLIST_FOREACH(p, &allproc) {
822 /*
823 * Increment time in/out of memory and sleep time (if
824 * sleeping). We ignore overflow; with 16-bit int's
825 * (remember them?) overflow takes 45 days.
826 */
827 minslp = 2;
828 mutex_enter(&p->p_smutex);
829 runtm = p->p_rtime.tv_sec;
830 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
831 if ((l->l_flag & LW_IDLE) != 0)
832 continue;
833 lwp_lock(l);
834 runtm += l->l_rtime.tv_sec;
835 l->l_swtime++;
836 if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
837 l->l_stat == LSSUSPENDED) {
838 l->l_slptime++;
839 minslp = min(minslp, l->l_slptime);
840 } else
841 minslp = 0;
842 lwp_unlock(l);
843 }
844
845 /*
846 * Check if the process exceeds its CPU resource allocation.
847 * If over max, kill it.
848 */
849 rlim = &p->p_rlimit[RLIMIT_CPU];
850 sig = 0;
851 if (runtm >= rlim->rlim_cur) {
852 if (runtm >= rlim->rlim_max)
853 sig = SIGKILL;
854 else {
855 sig = SIGXCPU;
856 if (rlim->rlim_cur < rlim->rlim_max)
857 rlim->rlim_cur += 5;
858 }
859 }
860
861 mutex_spin_enter(&p->p_stmutex);
862 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
863 if (minslp < 1) {
864 /*
865 * p_pctcpu is only for ps.
866 */
867 clkhz = stathz != 0 ? stathz : hz;
868 #if (FSHIFT >= CCPU_SHIFT)
869 p->p_pctcpu += (clkhz == 100)?
870 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
871 100 * (((fixpt_t) p->p_cpticks)
872 << (FSHIFT - CCPU_SHIFT)) / clkhz;
873 #else
874 p->p_pctcpu += ((FSCALE - ccpu) *
875 (p->p_cpticks * FSCALE / clkhz)) >> FSHIFT;
876 #endif
877 p->p_cpticks = 0;
878 }
879
880 sched_pstats_hook(p, minslp);
881 mutex_spin_exit(&p->p_stmutex);
882 mutex_exit(&p->p_smutex);
883 if (sig) {
884 psignal(p, sig);
885 }
886 }
887 mutex_exit(&proclist_mutex);
888 uvm_meter();
889 wakeup(&lbolt);
890 callout_schedule(&sched_pstats_ch, hz);
891 }
892