kern_synch.c revision 1.177.2.22 1 /* $NetBSD: kern_synch.c,v 1.177.2.22 2007/04/03 15:21:20 matt Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and
10 * Daniel Sieger.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*-
42 * Copyright (c) 1982, 1986, 1990, 1991, 1993
43 * The Regents of the University of California. All rights reserved.
44 * (c) UNIX System Laboratories, Inc.
45 * All or some portions of this file are derived from material licensed
46 * to the University of California by American Telephone and Telegraph
47 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
48 * the permission of UNIX System Laboratories, Inc.
49 *
50 * Redistribution and use in source and binary forms, with or without
51 * modification, are permitted provided that the following conditions
52 * are met:
53 * 1. Redistributions of source code must retain the above copyright
54 * notice, this list of conditions and the following disclaimer.
55 * 2. Redistributions in binary form must reproduce the above copyright
56 * notice, this list of conditions and the following disclaimer in the
57 * documentation and/or other materials provided with the distribution.
58 * 3. Neither the name of the University nor the names of its contributors
59 * may be used to endorse or promote products derived from this software
60 * without specific prior written permission.
61 *
62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
72 * SUCH DAMAGE.
73 *
74 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95
75 */
76
77 #include <sys/cdefs.h>
78 __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.177.2.22 2007/04/03 15:21:20 matt Exp $");
79
80 #include "opt_kstack.h"
81 #include "opt_lockdebug.h"
82 #include "opt_multiprocessor.h"
83 #include "opt_perfctrs.h"
84
85 #define __MUTEX_PRIVATE
86
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/proc.h>
90 #include <sys/kernel.h>
91 #if defined(PERFCTRS)
92 #include <sys/pmc.h>
93 #endif
94 #include <sys/cpu.h>
95 #include <sys/resourcevar.h>
96 #include <sys/sched.h>
97 #include <sys/syscall_stats.h>
98 #include <sys/sleepq.h>
99 #include <sys/lockdebug.h>
100
101 #include <uvm/uvm_extern.h>
102
103 struct callout sched_pstats_ch = CALLOUT_INITIALIZER_SETFUNC(sched_pstats, NULL);
104 unsigned int sched_pstats_ticks;
105
106 int lbolt; /* once a second sleep address */
107
108 static void sched_unsleep(struct lwp *);
109 static void sched_changepri(struct lwp *, pri_t);
110 static void sched_lendpri(struct lwp *, pri_t);
111
112 syncobj_t sleep_syncobj = {
113 SOBJ_SLEEPQ_SORTED,
114 sleepq_unsleep,
115 sleepq_changepri,
116 sleepq_lendpri,
117 syncobj_noowner,
118 };
119
120 syncobj_t sched_syncobj = {
121 SOBJ_SLEEPQ_SORTED,
122 sched_unsleep,
123 sched_changepri,
124 sched_lendpri,
125 syncobj_noowner,
126 };
127
128 /*
129 * During autoconfiguration or after a panic, a sleep will simply lower the
130 * priority briefly to allow interrupts, then return. The priority to be
131 * used (safepri) is machine-dependent, thus this value is initialized and
132 * maintained in the machine-dependent layers. This priority will typically
133 * be 0, or the lowest priority that is safe for use on the interrupt stack;
134 * it can be made higher to block network software interrupts after panics.
135 */
136 int safepri;
137
138 /*
139 * OBSOLETE INTERFACE
140 *
141 * General sleep call. Suspends the current process until a wakeup is
142 * performed on the specified identifier. The process will then be made
143 * runnable with the specified priority. Sleeps at most timo/hz seconds (0
144 * means no timeout). If pri includes PCATCH flag, signals are checked
145 * before and after sleeping, else signals are not checked. Returns 0 if
146 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a
147 * signal needs to be delivered, ERESTART is returned if the current system
148 * call should be restarted if possible, and EINTR is returned if the system
149 * call should be interrupted by the signal (return EINTR).
150 *
151 * The interlock is held until we are on a sleep queue. The interlock will
152 * be locked before returning back to the caller unless the PNORELOCK flag
153 * is specified, in which case the interlock will always be unlocked upon
154 * return.
155 */
156 int
157 ltsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
158 volatile struct simplelock *interlock)
159 {
160 struct lwp *l = curlwp;
161 sleepq_t *sq;
162 int error, catch;
163
164 if (sleepq_dontsleep(l)) {
165 (void)sleepq_abort(NULL, 0);
166 if ((priority & PNORELOCK) != 0)
167 simple_unlock(interlock);
168 return 0;
169 }
170
171 sq = sleeptab_lookup(&sleeptab, ident);
172 sleepq_enter(sq, l);
173
174 if (interlock != NULL) {
175 LOCK_ASSERT(simple_lock_held(interlock));
176 simple_unlock(interlock);
177 }
178
179 catch = priority & PCATCH;
180 sleepq_block(sq, priority & PRIMASK, ident, wmesg, timo, catch,
181 &sleep_syncobj);
182 error = sleepq_unblock(timo, catch);
183
184 if (interlock != NULL && (priority & PNORELOCK) == 0)
185 simple_lock(interlock);
186
187 return error;
188 }
189
190 int
191 mtsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
192 kmutex_t *mtx)
193 {
194 struct lwp *l = curlwp;
195 sleepq_t *sq;
196 int error, catch;
197
198 if (sleepq_dontsleep(l)) {
199 (void)sleepq_abort(mtx, (priority & PNORELOCK) != 0);
200 return 0;
201 }
202
203 sq = sleeptab_lookup(&sleeptab, ident);
204 sleepq_enter(sq, l);
205 mutex_exit(mtx);
206
207 catch = priority & PCATCH;
208 sleepq_block(sq, priority & PRIMASK, ident, wmesg, timo, catch,
209 &sleep_syncobj);
210 error = sleepq_unblock(timo, catch);
211
212 if ((priority & PNORELOCK) == 0)
213 mutex_enter(mtx);
214
215 return error;
216 }
217
218 /*
219 * General sleep call for situations where a wake-up is not expected.
220 */
221 int
222 kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx)
223 {
224 struct lwp *l = curlwp;
225 sleepq_t *sq;
226 int error;
227
228 if (sleepq_dontsleep(l))
229 return sleepq_abort(NULL, 0);
230
231 if (mtx != NULL)
232 mutex_exit(mtx);
233 sq = sleeptab_lookup(&sleeptab, l);
234 sleepq_enter(sq, l);
235 sleepq_block(sq, sched_kpri(l), l, wmesg, timo, intr, &sleep_syncobj);
236 error = sleepq_unblock(timo, intr);
237 if (mtx != NULL)
238 mutex_enter(mtx);
239
240 return error;
241 }
242
243 /*
244 * OBSOLETE INTERFACE
245 *
246 * Make all processes sleeping on the specified identifier runnable.
247 */
248 void
249 wakeup(wchan_t ident)
250 {
251 sleepq_t *sq;
252
253 if (cold)
254 return;
255
256 sq = sleeptab_lookup(&sleeptab, ident);
257 sleepq_wake(sq, ident, (u_int)-1);
258 }
259
260 /*
261 * OBSOLETE INTERFACE
262 *
263 * Make the highest priority process first in line on the specified
264 * identifier runnable.
265 */
266 void
267 wakeup_one(wchan_t ident)
268 {
269 sleepq_t *sq;
270
271 if (cold)
272 return;
273
274 sq = sleeptab_lookup(&sleeptab, ident);
275 sleepq_wake(sq, ident, 1);
276 }
277
278
279 /*
280 * General yield call. Puts the current process back on its run queue and
281 * performs a voluntary context switch. Should only be called when the
282 * current process explicitly requests it (eg sched_yield(2) in compat code).
283 */
284 void
285 yield(void)
286 {
287 struct lwp *l = curlwp;
288
289 KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
290 lwp_lock(l);
291 if (l->l_stat == LSONPROC) {
292 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
293 l->l_priority = l->l_usrpri;
294 }
295 l->l_nvcsw++;
296 mi_switch(l);
297 KERNEL_LOCK(l->l_biglocks, l);
298 }
299
300 /*
301 * General preemption call. Puts the current process back on its run queue
302 * and performs an involuntary context switch.
303 */
304 void
305 preempt(void)
306 {
307 struct lwp *l = curlwp;
308
309 KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
310 lwp_lock(l);
311 if (l->l_stat == LSONPROC) {
312 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
313 l->l_priority = l->l_usrpri;
314 }
315 l->l_nivcsw++;
316 (void)mi_switch(l);
317 KERNEL_LOCK(l->l_biglocks, l);
318 }
319
320 /*
321 * Compute the amount of time during which the current lwp was running.
322 *
323 * - update l_rtime unless it's an idle lwp.
324 * - update spc_runtime for the next lwp.
325 */
326
327 static inline void
328 updatertime(struct lwp *l, struct schedstate_percpu *spc)
329 {
330 struct timeval tv;
331 long s, u;
332
333 if ((l->l_flag & LW_IDLE) != 0) {
334 microtime(&spc->spc_runtime);
335 return;
336 }
337
338 microtime(&tv);
339 u = l->l_rtime.tv_usec + (tv.tv_usec - spc->spc_runtime.tv_usec);
340 s = l->l_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
341 if (u < 0) {
342 u += 1000000;
343 s--;
344 } else if (u >= 1000000) {
345 u -= 1000000;
346 s++;
347 }
348 l->l_rtime.tv_usec = u;
349 l->l_rtime.tv_sec = s;
350
351 spc->spc_runtime = tv;
352 }
353
354 /*
355 * The machine independent parts of context switch.
356 *
357 * Returns 1 if another process was actually run.
358 */
359 int
360 mi_switch(struct lwp *l)
361 {
362 struct schedstate_percpu *spc;
363 struct lwp *newl;
364 int retval, oldspl;
365
366 LOCK_ASSERT(lwp_locked(l, NULL));
367
368 #ifdef LOCKDEBUG
369 spinlock_switchcheck();
370 simple_lock_switchcheck();
371 #endif
372 #ifdef KSTACK_CHECK_MAGIC
373 kstack_check_magic(l);
374 #endif
375
376 /*
377 * It's safe to read the per CPU schedstate unlocked here, as all we
378 * are after is the run time and that's guarenteed to have been last
379 * updated by this CPU.
380 */
381 KDASSERT(l->l_cpu == curcpu());
382 spc = &l->l_cpu->ci_schedstate;
383
384 /* Count time spent in current system call */
385 SYSCALL_TIME_SLEEP(l);
386
387 /*
388 * XXXSMP If we are using h/w performance counters, save context.
389 */
390 #if PERFCTRS
391 if (PMC_ENABLED(l->l_proc)) {
392 pmc_save_context(l->l_proc);
393 }
394 #endif
395
396 /*
397 * If on the CPU and we have gotten this far, then we must yield.
398 */
399 KASSERT(l->l_stat != LSRUN);
400 if (l->l_stat == LSONPROC) {
401 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
402 l->l_stat = LSRUN;
403 if ((l->l_flag & LW_IDLE) == 0) {
404 sched_enqueue(l, true);
405 }
406 }
407
408 /*
409 * Process is about to yield the CPU; clear the appropriate
410 * scheduling flags.
411 */
412 spc->spc_flags &= ~SPCF_SWITCHCLEAR;
413
414 LOCKDEBUG_BARRIER(l->l_mutex, 1);
415
416 /*
417 * Acquire the spc_mutex if necessary.
418 */
419 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
420 if (l->l_mutex != spc->spc_mutex) {
421 mutex_enter(spc->spc_mutex);
422 }
423 #endif
424 /*
425 * Let sched_nextlwp() select the LWP to run the CPU next.
426 * If no LWP is runnable, switch to the idle LWP.
427 */
428 newl = sched_nextlwp();
429 if (newl) {
430 sched_dequeue(newl);
431 } else {
432 newl = l->l_cpu->ci_data.cpu_idlelwp;
433 KASSERT(newl != NULL);
434 }
435 KASSERT(lwp_locked(newl, spc->spc_mutex));
436 newl->l_stat = LSONPROC;
437 newl->l_cpu = l->l_cpu;
438 newl->l_flag |= LW_RUNNING;
439
440 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
441 if (l->l_mutex != spc->spc_mutex) {
442 mutex_exit(spc->spc_mutex);
443 }
444 #endif
445
446 updatertime(l, spc);
447 if (l != newl) {
448 struct lwp *prevlwp;
449
450 /* Unlocked, but for statistics only. */
451 uvmexp.swtch++;
452
453 /* Save old VM context. */
454 pmap_deactivate(l);
455
456 /* Switch to the new LWP.. */
457 l->l_flag &= ~LW_RUNNING;
458 oldspl = MUTEX_SPIN_OLDSPL(l->l_cpu);
459 prevlwp = cpu_switchto(l, newl);
460
461 /*
462 * .. we have switched away and are now back so we must
463 * be the new curlwp. prevlwp is who we replaced.
464 */
465 curlwp = l;
466 if (prevlwp != NULL) {
467 curcpu()->ci_mtx_oldspl = oldspl;
468 lwp_unlock(prevlwp);
469 } else {
470 splx(oldspl);
471 }
472
473 /* Restore VM context. */
474 pmap_activate(l);
475 retval = 1;
476 } else {
477 /* Nothing to do - just unlock and return. */
478 lwp_unlock(l);
479 retval = 0;
480 }
481
482 KASSERT(l == curlwp);
483 KASSERT(l->l_stat == LSONPROC);
484
485 /*
486 * XXXSMP If we are using h/w performance counters, restore context.
487 */
488 #if PERFCTRS
489 if (PMC_ENABLED(l->l_proc)) {
490 pmc_restore_context(l->l_proc);
491 }
492 #endif
493
494 /*
495 * We're running again; record our new start time. We might
496 * be running on a new CPU now, so don't use the cached
497 * schedstate_percpu pointer.
498 */
499 SYSCALL_TIME_WAKEUP(l);
500 KDASSERT(l->l_cpu == curcpu());
501 LOCKDEBUG_BARRIER(NULL, 1);
502
503 return retval;
504 }
505
506 /*
507 * Change process state to be runnable, placing it on the run queue if it is
508 * in memory, and awakening the swapper if it isn't in memory.
509 *
510 * Call with the process and LWP locked. Will return with the LWP unlocked.
511 */
512 void
513 setrunnable(struct lwp *l)
514 {
515 struct proc *p = l->l_proc;
516 sigset_t *ss;
517
518 KASSERT((l->l_flag & LW_IDLE) == 0);
519 KASSERT(mutex_owned(&p->p_smutex));
520 KASSERT(lwp_locked(l, NULL));
521
522 switch (l->l_stat) {
523 case LSSTOP:
524 /*
525 * If we're being traced (possibly because someone attached us
526 * while we were stopped), check for a signal from the debugger.
527 */
528 if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xstat != 0) {
529 if ((sigprop[p->p_xstat] & SA_TOLWP) != 0)
530 ss = &l->l_sigpend.sp_set;
531 else
532 ss = &p->p_sigpend.sp_set;
533 sigaddset(ss, p->p_xstat);
534 signotify(l);
535 }
536 p->p_nrlwps++;
537 break;
538 case LSSUSPENDED:
539 l->l_flag &= ~LW_WSUSPEND;
540 p->p_nrlwps++;
541 break;
542 case LSSLEEP:
543 KASSERT(l->l_wchan != NULL);
544 break;
545 default:
546 panic("setrunnable: lwp %p state was %d", l, l->l_stat);
547 }
548
549 /*
550 * If the LWP was sleeping interruptably, then it's OK to start it
551 * again. If not, mark it as still sleeping.
552 */
553 if (l->l_wchan != NULL) {
554 l->l_stat = LSSLEEP;
555 /* lwp_unsleep() will release the lock. */
556 lwp_unsleep(l);
557 return;
558 }
559
560 LOCK_ASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
561
562 /*
563 * If the LWP is still on the CPU, mark it as LSONPROC. It may be
564 * about to call mi_switch(), in which case it will yield.
565 */
566 if ((l->l_flag & LW_RUNNING) != 0) {
567 l->l_stat = LSONPROC;
568 l->l_slptime = 0;
569 lwp_unlock(l);
570 return;
571 }
572
573 /*
574 * Set the LWP runnable. If it's swapped out, we need to wake the swapper
575 * to bring it back in. Otherwise, enter it into a run queue.
576 */
577 sched_setrunnable(l);
578 l->l_stat = LSRUN;
579 l->l_slptime = 0;
580
581 if (l->l_flag & LW_INMEM) {
582 sched_enqueue(l, false);
583 resched_cpu(l);
584 lwp_unlock(l);
585 } else {
586 lwp_unlock(l);
587 uvm_kick_scheduler();
588 }
589 }
590
591 /*
592 * suspendsched:
593 *
594 * Convert all non-L_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED.
595 */
596 void
597 suspendsched(void)
598 {
599 #ifdef MULTIPROCESSOR
600 CPU_INFO_ITERATOR cii;
601 struct cpu_info *ci;
602 #endif
603 struct lwp *l;
604 struct proc *p;
605
606 /*
607 * We do this by process in order not to violate the locking rules.
608 */
609 mutex_enter(&proclist_mutex);
610 PROCLIST_FOREACH(p, &allproc) {
611 mutex_enter(&p->p_smutex);
612
613 if ((p->p_flag & PK_SYSTEM) != 0) {
614 mutex_exit(&p->p_smutex);
615 continue;
616 }
617
618 p->p_stat = SSTOP;
619
620 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
621 if (l == curlwp)
622 continue;
623
624 lwp_lock(l);
625
626 /*
627 * Set L_WREBOOT so that the LWP will suspend itself
628 * when it tries to return to user mode. We want to
629 * try and get to get as many LWPs as possible to
630 * the user / kernel boundary, so that they will
631 * release any locks that they hold.
632 */
633 l->l_flag |= (LW_WREBOOT | LW_WSUSPEND);
634
635 if (l->l_stat == LSSLEEP &&
636 (l->l_flag & LW_SINTR) != 0) {
637 /* setrunnable() will release the lock. */
638 setrunnable(l);
639 continue;
640 }
641
642 lwp_unlock(l);
643 }
644
645 mutex_exit(&p->p_smutex);
646 }
647 mutex_exit(&proclist_mutex);
648
649 /*
650 * Kick all CPUs to make them preempt any LWPs running in user mode.
651 * They'll trap into the kernel and suspend themselves in userret().
652 */
653 #ifdef MULTIPROCESSOR
654 for (CPU_INFO_FOREACH(cii, ci))
655 cpu_need_resched(ci, 0);
656 #else
657 cpu_need_resched(curcpu(), 0);
658 #endif
659 }
660
661 /*
662 * sched_kpri:
663 *
664 * Scale a priority level to a kernel priority level, usually
665 * for an LWP that is about to sleep.
666 */
667 pri_t
668 sched_kpri(struct lwp *l)
669 {
670 /*
671 * Scale user priorities (127 -> 50) up to kernel priorities
672 * in the range (49 -> 8). Reserve the top 8 kernel priorities
673 * for high priority kthreads. Kernel priorities passed in
674 * are left "as is". XXX This is somewhat arbitrary.
675 */
676 static const uint8_t kpri_tab[] = {
677 0, 1, 2, 3, 4, 5, 6, 7,
678 8, 9, 10, 11, 12, 13, 14, 15,
679 16, 17, 18, 19, 20, 21, 22, 23,
680 24, 25, 26, 27, 28, 29, 30, 31,
681 32, 33, 34, 35, 36, 37, 38, 39,
682 40, 41, 42, 43, 44, 45, 46, 47,
683 48, 49, 8, 8, 9, 9, 10, 10,
684 11, 11, 12, 12, 13, 14, 14, 15,
685 15, 16, 16, 17, 17, 18, 18, 19,
686 20, 20, 21, 21, 22, 22, 23, 23,
687 24, 24, 25, 26, 26, 27, 27, 28,
688 28, 29, 29, 30, 30, 31, 32, 32,
689 33, 33, 34, 34, 35, 35, 36, 36,
690 37, 38, 38, 39, 39, 40, 40, 41,
691 41, 42, 42, 43, 44, 44, 45, 45,
692 46, 46, 47, 47, 48, 48, 49, 49,
693 };
694
695 return (pri_t)kpri_tab[l->l_usrpri];
696 }
697
698 /*
699 * sched_unsleep:
700 *
701 * The is called when the LWP has not been awoken normally but instead
702 * interrupted: for example, if the sleep timed out. Because of this,
703 * it's not a valid action for running or idle LWPs.
704 */
705 static void
706 sched_unsleep(struct lwp *l)
707 {
708
709 lwp_unlock(l);
710 panic("sched_unsleep");
711 }
712
713 inline void
714 resched_cpu(struct lwp *l)
715 {
716 struct cpu_info *ci;
717 const pri_t pri = lwp_eprio(l);
718
719 /*
720 * XXXSMP
721 * Since l->l_cpu persists across a context switch,
722 * this gives us *very weak* processor affinity, in
723 * that we notify the CPU on which the process last
724 * ran that it should try to switch.
725 *
726 * This does not guarantee that the process will run on
727 * that processor next, because another processor might
728 * grab it the next time it performs a context switch.
729 *
730 * This also does not handle the case where its last
731 * CPU is running a higher-priority process, but every
732 * other CPU is running a lower-priority process. There
733 * are ways to handle this situation, but they're not
734 * currently very pretty, and we also need to weigh the
735 * cost of moving a process from one CPU to another.
736 */
737 ci = (l->l_cpu != NULL) ? l->l_cpu : curcpu();
738 if (pri < ci->ci_schedstate.spc_curpriority)
739 cpu_need_resched(ci, 0);
740 }
741
742 static void
743 sched_changepri(struct lwp *l, pri_t pri)
744 {
745
746 LOCK_ASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
747
748 l->l_usrpri = pri;
749 if (l->l_priority < PUSER)
750 return;
751
752 if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
753 l->l_priority = pri;
754 return;
755 }
756
757 sched_dequeue(l);
758 l->l_priority = pri;
759 sched_enqueue(l, false);
760 resched_cpu(l);
761 }
762
763 static void
764 sched_lendpri(struct lwp *l, pri_t pri)
765 {
766
767 LOCK_ASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
768
769 if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
770 l->l_inheritedprio = pri;
771 return;
772 }
773
774 sched_dequeue(l);
775 l->l_inheritedprio = pri;
776 sched_enqueue(l, false);
777 resched_cpu(l);
778 }
779
780 struct lwp *
781 syncobj_noowner(wchan_t wchan)
782 {
783
784 return NULL;
785 }
786
787
788 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
789 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
790
791 /*
792 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
793 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
794 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
795 *
796 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
797 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
798 *
799 * If you dont want to bother with the faster/more-accurate formula, you
800 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
801 * (more general) method of calculating the %age of CPU used by a process.
802 */
803 #define CCPU_SHIFT (FSHIFT + 1)
804
805 /*
806 * sched_pstats:
807 *
808 * Update process statistics and check CPU resource allocation.
809 * Call scheduler-specific hook to eventually adjust process/LWP
810 * priorities.
811 *
812 * XXXSMP This needs to be reorganised in order to reduce the locking
813 * burden.
814 */
815 /* ARGSUSED */
816 void
817 sched_pstats(void *arg)
818 {
819 struct rlimit *rlim;
820 struct lwp *l;
821 struct proc *p;
822 int minslp, sig, clkhz;
823 long runtm;
824
825 sched_pstats_ticks++;
826
827 mutex_enter(&proclist_mutex);
828 PROCLIST_FOREACH(p, &allproc) {
829 /*
830 * Increment time in/out of memory and sleep time (if
831 * sleeping). We ignore overflow; with 16-bit int's
832 * (remember them?) overflow takes 45 days.
833 */
834 minslp = 2;
835 mutex_enter(&p->p_smutex);
836 runtm = p->p_rtime.tv_sec;
837 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
838 if ((l->l_flag & LW_IDLE) != 0)
839 continue;
840 lwp_lock(l);
841 runtm += l->l_rtime.tv_sec;
842 l->l_swtime++;
843 if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
844 l->l_stat == LSSUSPENDED) {
845 l->l_slptime++;
846 minslp = min(minslp, l->l_slptime);
847 } else
848 minslp = 0;
849 lwp_unlock(l);
850 }
851
852 /*
853 * Check if the process exceeds its CPU resource allocation.
854 * If over max, kill it.
855 */
856 rlim = &p->p_rlimit[RLIMIT_CPU];
857 sig = 0;
858 if (runtm >= rlim->rlim_cur) {
859 if (runtm >= rlim->rlim_max)
860 sig = SIGKILL;
861 else {
862 sig = SIGXCPU;
863 if (rlim->rlim_cur < rlim->rlim_max)
864 rlim->rlim_cur += 5;
865 }
866 }
867
868 mutex_spin_enter(&p->p_stmutex);
869 if (minslp < 1) {
870 /*
871 * p_pctcpu is only for ps.
872 */
873 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
874 clkhz = stathz != 0 ? stathz : hz;
875 #if (FSHIFT >= CCPU_SHIFT)
876 p->p_pctcpu += (clkhz == 100)?
877 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
878 100 * (((fixpt_t) p->p_cpticks)
879 << (FSHIFT - CCPU_SHIFT)) / clkhz;
880 #else
881 p->p_pctcpu += ((FSCALE - ccpu) *
882 (p->p_cpticks * FSCALE / clkhz)) >> FSHIFT;
883 #endif
884 p->p_cpticks = 0;
885 }
886
887 sched_pstats_hook(p, minslp);
888 mutex_spin_exit(&p->p_stmutex);
889 mutex_exit(&p->p_smutex);
890 if (sig) {
891 psignal(p, sig);
892 }
893 }
894 mutex_exit(&proclist_mutex);
895 uvm_meter();
896 wakeup(&lbolt);
897 callout_schedule(&sched_pstats_ch, hz);
898 }
899