kern_synch.c revision 1.177.2.25 1
2
3 /*-
4 * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and
10 * Daniel Sieger.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*-
42 * Copyright (c) 1982, 1986, 1990, 1991, 1993
43 * The Regents of the University of California. All rights reserved.
44 * (c) UNIX System Laboratories, Inc.
45 * All or some portions of this file are derived from material licensed
46 * to the University of California by American Telephone and Telegraph
47 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
48 * the permission of UNIX System Laboratories, Inc.
49 *
50 * Redistribution and use in source and binary forms, with or without
51 * modification, are permitted provided that the following conditions
52 * are met:
53 * 1. Redistributions of source code must retain the above copyright
54 * notice, this list of conditions and the following disclaimer.
55 * 2. Redistributions in binary form must reproduce the above copyright
56 * notice, this list of conditions and the following disclaimer in the
57 * documentation and/or other materials provided with the distribution.
58 * 3. Neither the name of the University nor the names of its contributors
59 * may be used to endorse or promote products derived from this software
60 * without specific prior written permission.
61 *
62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
72 * SUCH DAMAGE.
73 *
74 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95
75 */
76
77 #include <sys/cdefs.h>
78 __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.177.2.25 2007/04/18 10:14:55 yamt Exp $");
79
80 #include "opt_kstack.h"
81 #include "opt_lockdebug.h"
82 #include "opt_multiprocessor.h"
83 #include "opt_perfctrs.h"
84
85 #define __MUTEX_PRIVATE
86
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/proc.h>
90 #include <sys/kernel.h>
91 #if defined(PERFCTRS)
92 #include <sys/pmc.h>
93 #endif
94 #include <sys/cpu.h>
95 #include <sys/resourcevar.h>
96 #include <sys/sched.h>
97 #include <sys/syscall_stats.h>
98 #include <sys/sleepq.h>
99 #include <sys/lockdebug.h>
100
101 #include <uvm/uvm_extern.h>
102
103 struct callout sched_pstats_ch = CALLOUT_INITIALIZER_SETFUNC(sched_pstats, NULL);
104 unsigned int sched_pstats_ticks;
105
106 int lbolt; /* once a second sleep address */
107
108 static void sched_unsleep(struct lwp *);
109 static void sched_changepri(struct lwp *, pri_t);
110 static void sched_lendpri(struct lwp *, pri_t);
111
112 syncobj_t sleep_syncobj = {
113 SOBJ_SLEEPQ_SORTED,
114 sleepq_unsleep,
115 sleepq_changepri,
116 sleepq_lendpri,
117 syncobj_noowner,
118 };
119
120 syncobj_t sched_syncobj = {
121 SOBJ_SLEEPQ_SORTED,
122 sched_unsleep,
123 sched_changepri,
124 sched_lendpri,
125 syncobj_noowner,
126 };
127
128 /*
129 * During autoconfiguration or after a panic, a sleep will simply lower the
130 * priority briefly to allow interrupts, then return. The priority to be
131 * used (safepri) is machine-dependent, thus this value is initialized and
132 * maintained in the machine-dependent layers. This priority will typically
133 * be 0, or the lowest priority that is safe for use on the interrupt stack;
134 * it can be made higher to block network software interrupts after panics.
135 */
136 int safepri;
137
138 /*
139 * OBSOLETE INTERFACE
140 *
141 * General sleep call. Suspends the current process until a wakeup is
142 * performed on the specified identifier. The process will then be made
143 * runnable with the specified priority. Sleeps at most timo/hz seconds (0
144 * means no timeout). If pri includes PCATCH flag, signals are checked
145 * before and after sleeping, else signals are not checked. Returns 0 if
146 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a
147 * signal needs to be delivered, ERESTART is returned if the current system
148 * call should be restarted if possible, and EINTR is returned if the system
149 * call should be interrupted by the signal (return EINTR).
150 *
151 * The interlock is held until we are on a sleep queue. The interlock will
152 * be locked before returning back to the caller unless the PNORELOCK flag
153 * is specified, in which case the interlock will always be unlocked upon
154 * return.
155 */
156 int
157 ltsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
158 volatile struct simplelock *interlock)
159 {
160 struct lwp *l = curlwp;
161 sleepq_t *sq;
162 int error, catch;
163
164 if (sleepq_dontsleep(l)) {
165 (void)sleepq_abort(NULL, 0);
166 if ((priority & PNORELOCK) != 0)
167 simple_unlock(interlock);
168 return 0;
169 }
170
171 sq = sleeptab_lookup(&sleeptab, ident);
172 sleepq_enter(sq, l);
173
174 if (interlock != NULL) {
175 LOCK_ASSERT(simple_lock_held(interlock));
176 simple_unlock(interlock);
177 }
178
179 catch = priority & PCATCH;
180 sleepq_block(sq, priority & PRIMASK, ident, wmesg, timo, catch,
181 &sleep_syncobj);
182 error = sleepq_unblock(timo, catch);
183
184 if (interlock != NULL && (priority & PNORELOCK) == 0)
185 simple_lock(interlock);
186
187 return error;
188 }
189
190 int
191 mtsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
192 kmutex_t *mtx)
193 {
194 struct lwp *l = curlwp;
195 sleepq_t *sq;
196 int error, catch;
197
198 if (sleepq_dontsleep(l)) {
199 (void)sleepq_abort(mtx, (priority & PNORELOCK) != 0);
200 return 0;
201 }
202
203 sq = sleeptab_lookup(&sleeptab, ident);
204 sleepq_enter(sq, l);
205 mutex_exit(mtx);
206
207 catch = priority & PCATCH;
208 sleepq_block(sq, priority & PRIMASK, ident, wmesg, timo, catch,
209 &sleep_syncobj);
210 error = sleepq_unblock(timo, catch);
211
212 if ((priority & PNORELOCK) == 0)
213 mutex_enter(mtx);
214
215 return error;
216 }
217
218 /*
219 * General sleep call for situations where a wake-up is not expected.
220 */
221 int
222 kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx)
223 {
224 struct lwp *l = curlwp;
225 sleepq_t *sq;
226 int error;
227
228 if (sleepq_dontsleep(l))
229 return sleepq_abort(NULL, 0);
230
231 if (mtx != NULL)
232 mutex_exit(mtx);
233 sq = sleeptab_lookup(&sleeptab, l);
234 sleepq_enter(sq, l);
235 sleepq_block(sq, sched_kpri(l), l, wmesg, timo, intr, &sleep_syncobj);
236 error = sleepq_unblock(timo, intr);
237 if (mtx != NULL)
238 mutex_enter(mtx);
239
240 return error;
241 }
242
243 /*
244 * OBSOLETE INTERFACE
245 *
246 * Make all processes sleeping on the specified identifier runnable.
247 */
248 void
249 wakeup(wchan_t ident)
250 {
251 sleepq_t *sq;
252
253 if (cold)
254 return;
255
256 sq = sleeptab_lookup(&sleeptab, ident);
257 sleepq_wake(sq, ident, (u_int)-1);
258 }
259
260 /*
261 * OBSOLETE INTERFACE
262 *
263 * Make the highest priority process first in line on the specified
264 * identifier runnable.
265 */
266 void
267 wakeup_one(wchan_t ident)
268 {
269 sleepq_t *sq;
270
271 if (cold)
272 return;
273
274 sq = sleeptab_lookup(&sleeptab, ident);
275 sleepq_wake(sq, ident, 1);
276 }
277
278
279 /*
280 * General yield call. Puts the current process back on its run queue and
281 * performs a voluntary context switch. Should only be called when the
282 * current process explicitly requests it (eg sched_yield(2) in compat code).
283 */
284 void
285 yield(void)
286 {
287 struct lwp *l = curlwp;
288
289 KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
290 lwp_lock(l);
291 if (l->l_stat == LSONPROC) {
292 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
293 l->l_priority = l->l_usrpri;
294 }
295 l->l_nvcsw++;
296 (void)mi_switch(l);
297 KERNEL_LOCK(l->l_biglocks, l);
298 }
299
300 /*
301 * General preemption call. Puts the current process back on its run queue
302 * and performs an involuntary context switch.
303 */
304 void
305 preempt(void)
306 {
307 struct lwp *l = curlwp;
308
309 KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
310 lwp_lock(l);
311 if (l->l_stat == LSONPROC) {
312 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
313 l->l_priority = l->l_usrpri;
314 }
315 l->l_nivcsw++;
316 (void)mi_switch(l);
317 KERNEL_LOCK(l->l_biglocks, l);
318 }
319
320 /*
321 * Compute the amount of time during which the current lwp was running.
322 *
323 * - update l_rtime unless it's an idle lwp.
324 * - update spc_runtime for the next lwp.
325 */
326
327 static inline void
328 updatertime(struct lwp *l, struct schedstate_percpu *spc)
329 {
330 struct timeval tv;
331 long s, u;
332
333 if ((l->l_flag & LW_IDLE) != 0) {
334 microtime(&spc->spc_runtime);
335 return;
336 }
337
338 microtime(&tv);
339 u = l->l_rtime.tv_usec + (tv.tv_usec - spc->spc_runtime.tv_usec);
340 s = l->l_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
341 if (u < 0) {
342 u += 1000000;
343 s--;
344 } else if (u >= 1000000) {
345 u -= 1000000;
346 s++;
347 }
348 l->l_rtime.tv_usec = u;
349 l->l_rtime.tv_sec = s;
350
351 spc->spc_runtime = tv;
352 }
353
354 /*
355 * The machine independent parts of context switch.
356 *
357 * Returns 1 if another process was actually run.
358 */
359 int
360 mi_switch(struct lwp *l)
361 {
362 struct schedstate_percpu *spc;
363 struct lwp *newl;
364 int retval, oldspl;
365
366 LOCK_ASSERT(lwp_locked(l, NULL));
367
368 #ifdef LOCKDEBUG
369 spinlock_switchcheck();
370 simple_lock_switchcheck();
371 #endif
372 #ifdef KSTACK_CHECK_MAGIC
373 kstack_check_magic(l);
374 #endif
375
376 /*
377 * It's safe to read the per CPU schedstate unlocked here, as all we
378 * are after is the run time and that's guarenteed to have been last
379 * updated by this CPU.
380 */
381 KDASSERT(l->l_cpu == curcpu());
382 spc = &l->l_cpu->ci_schedstate;
383
384 /* Count time spent in current system call */
385 SYSCALL_TIME_SLEEP(l);
386
387 /*
388 * XXXSMP If we are using h/w performance counters, save context.
389 */
390 #if PERFCTRS
391 if (PMC_ENABLED(l->l_proc)) {
392 pmc_save_context(l->l_proc);
393 }
394 #endif
395
396 /*
397 * If on the CPU and we have gotten this far, then we must yield.
398 */
399 KASSERT(l->l_stat != LSRUN);
400 if (l->l_stat == LSONPROC) {
401 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
402 l->l_stat = LSRUN;
403 if ((l->l_flag & LW_IDLE) == 0) {
404 sched_enqueue(l, true);
405 }
406 }
407
408 /*
409 * Process is about to yield the CPU; clear the appropriate
410 * scheduling flags.
411 */
412 spc->spc_flags &= ~SPCF_SWITCHCLEAR;
413
414 LOCKDEBUG_BARRIER(l->l_mutex, 1);
415
416 /*
417 * Acquire the spc_mutex if necessary.
418 */
419 if (l->l_mutex != spc->spc_mutex) {
420 mutex_spin_enter(spc->spc_mutex);
421 }
422
423 /*
424 * Let sched_nextlwp() select the LWP to run the CPU next.
425 * If no LWP is runnable, switch to the idle LWP.
426 */
427 newl = sched_nextlwp();
428 if (newl) {
429 sched_dequeue(newl);
430 } else {
431 newl = l->l_cpu->ci_data.cpu_idlelwp;
432 KASSERT(newl != NULL);
433 }
434 KASSERT(lwp_locked(newl, spc->spc_mutex));
435 newl->l_stat = LSONPROC;
436 newl->l_cpu = l->l_cpu;
437 newl->l_flag |= LW_RUNNING;
438 cpu_did_resched();
439
440 if (l->l_mutex != spc->spc_mutex) {
441 mutex_spin_exit(spc->spc_mutex);
442 }
443
444 updatertime(l, spc);
445 if (l != newl) {
446 struct lwp *prevlwp;
447
448 /* Unlocked, but for statistics only. */
449 uvmexp.swtch++;
450
451 /* Save old VM context. */
452 pmap_deactivate(l);
453
454 /* Switch to the new LWP.. */
455 l->l_flag &= ~LW_RUNNING;
456 oldspl = MUTEX_SPIN_OLDSPL(l->l_cpu);
457 prevlwp = cpu_switchto(l, newl);
458
459 /*
460 * .. we have switched away and are now back so we must
461 * be the new curlwp. prevlwp is who we replaced.
462 */
463 curlwp = l;
464 if (prevlwp != NULL) {
465 curcpu()->ci_mtx_oldspl = oldspl;
466 lwp_unlock(prevlwp);
467 } else {
468 splx(oldspl);
469 }
470
471 /* Restore VM context. */
472 pmap_activate(l);
473 retval = 1;
474 } else {
475 /* Nothing to do - just unlock and return. */
476 lwp_unlock(l);
477 retval = 0;
478 }
479
480 KASSERT(l == curlwp);
481 KASSERT(l->l_stat == LSONPROC);
482
483 /*
484 * XXXSMP If we are using h/w performance counters, restore context.
485 */
486 #if PERFCTRS
487 if (PMC_ENABLED(l->l_proc)) {
488 pmc_restore_context(l->l_proc);
489 }
490 #endif
491
492 /*
493 * We're running again; record our new start time. We might
494 * be running on a new CPU now, so don't use the cached
495 * schedstate_percpu pointer.
496 */
497 SYSCALL_TIME_WAKEUP(l);
498 KDASSERT(l->l_cpu == curcpu());
499 LOCKDEBUG_BARRIER(NULL, 1);
500
501 return retval;
502 }
503
504 /*
505 * Change process state to be runnable, placing it on the run queue if it is
506 * in memory, and awakening the swapper if it isn't in memory.
507 *
508 * Call with the process and LWP locked. Will return with the LWP unlocked.
509 */
510 void
511 setrunnable(struct lwp *l)
512 {
513 struct proc *p = l->l_proc;
514 sigset_t *ss;
515
516 KASSERT((l->l_flag & LW_IDLE) == 0);
517 KASSERT(mutex_owned(&p->p_smutex));
518 KASSERT(lwp_locked(l, NULL));
519
520 switch (l->l_stat) {
521 case LSSTOP:
522 /*
523 * If we're being traced (possibly because someone attached us
524 * while we were stopped), check for a signal from the debugger.
525 */
526 if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xstat != 0) {
527 if ((sigprop[p->p_xstat] & SA_TOLWP) != 0)
528 ss = &l->l_sigpend.sp_set;
529 else
530 ss = &p->p_sigpend.sp_set;
531 sigaddset(ss, p->p_xstat);
532 signotify(l);
533 }
534 p->p_nrlwps++;
535 break;
536 case LSSUSPENDED:
537 l->l_flag &= ~LW_WSUSPEND;
538 p->p_nrlwps++;
539 break;
540 case LSSLEEP:
541 KASSERT(l->l_wchan != NULL);
542 break;
543 default:
544 panic("setrunnable: lwp %p state was %d", l, l->l_stat);
545 }
546
547 /*
548 * If the LWP was sleeping interruptably, then it's OK to start it
549 * again. If not, mark it as still sleeping.
550 */
551 if (l->l_wchan != NULL) {
552 l->l_stat = LSSLEEP;
553 /* lwp_unsleep() will release the lock. */
554 lwp_unsleep(l);
555 return;
556 }
557
558 LOCK_ASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
559
560 /*
561 * If the LWP is still on the CPU, mark it as LSONPROC. It may be
562 * about to call mi_switch(), in which case it will yield.
563 */
564 if ((l->l_flag & LW_RUNNING) != 0) {
565 l->l_stat = LSONPROC;
566 l->l_slptime = 0;
567 lwp_unlock(l);
568 return;
569 }
570
571 /*
572 * Set the LWP runnable. If it's swapped out, we need to wake the swapper
573 * to bring it back in. Otherwise, enter it into a run queue.
574 */
575 sched_setrunnable(l);
576 l->l_stat = LSRUN;
577 l->l_slptime = 0;
578
579 if (l->l_flag & LW_INMEM) {
580 sched_enqueue(l, false);
581 resched_cpu(l);
582 lwp_unlock(l);
583 } else {
584 lwp_unlock(l);
585 uvm_kick_scheduler();
586 }
587 }
588
589 /*
590 * suspendsched:
591 *
592 * Convert all non-L_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED.
593 */
594 void
595 suspendsched(void)
596 {
597 #ifdef MULTIPROCESSOR
598 CPU_INFO_ITERATOR cii;
599 struct cpu_info *ci;
600 #endif
601 struct lwp *l;
602 struct proc *p;
603
604 /*
605 * We do this by process in order not to violate the locking rules.
606 */
607 mutex_enter(&proclist_mutex);
608 PROCLIST_FOREACH(p, &allproc) {
609 mutex_enter(&p->p_smutex);
610
611 if ((p->p_flag & PK_SYSTEM) != 0) {
612 mutex_exit(&p->p_smutex);
613 continue;
614 }
615
616 p->p_stat = SSTOP;
617
618 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
619 if (l == curlwp)
620 continue;
621
622 lwp_lock(l);
623
624 /*
625 * Set L_WREBOOT so that the LWP will suspend itself
626 * when it tries to return to user mode. We want to
627 * try and get to get as many LWPs as possible to
628 * the user / kernel boundary, so that they will
629 * release any locks that they hold.
630 */
631 l->l_flag |= (LW_WREBOOT | LW_WSUSPEND);
632
633 if (l->l_stat == LSSLEEP &&
634 (l->l_flag & LW_SINTR) != 0) {
635 /* setrunnable() will release the lock. */
636 setrunnable(l);
637 continue;
638 }
639
640 lwp_unlock(l);
641 }
642
643 mutex_exit(&p->p_smutex);
644 }
645 mutex_exit(&proclist_mutex);
646
647 /*
648 * Kick all CPUs to make them preempt any LWPs running in user mode.
649 * They'll trap into the kernel and suspend themselves in userret().
650 */
651 #ifdef MULTIPROCESSOR
652 for (CPU_INFO_FOREACH(cii, ci))
653 cpu_need_resched(ci, 0);
654 #else
655 cpu_need_resched(curcpu(), 0);
656 #endif
657 }
658
659 /*
660 * sched_kpri:
661 *
662 * Scale a priority level to a kernel priority level, usually
663 * for an LWP that is about to sleep.
664 */
665 pri_t
666 sched_kpri(struct lwp *l)
667 {
668 /*
669 * Scale user priorities (127 -> 50) up to kernel priorities
670 * in the range (49 -> 8). Reserve the top 8 kernel priorities
671 * for high priority kthreads. Kernel priorities passed in
672 * are left "as is". XXX This is somewhat arbitrary.
673 */
674 static const uint8_t kpri_tab[] = {
675 0, 1, 2, 3, 4, 5, 6, 7,
676 8, 9, 10, 11, 12, 13, 14, 15,
677 16, 17, 18, 19, 20, 21, 22, 23,
678 24, 25, 26, 27, 28, 29, 30, 31,
679 32, 33, 34, 35, 36, 37, 38, 39,
680 40, 41, 42, 43, 44, 45, 46, 47,
681 48, 49, 8, 8, 9, 9, 10, 10,
682 11, 11, 12, 12, 13, 14, 14, 15,
683 15, 16, 16, 17, 17, 18, 18, 19,
684 20, 20, 21, 21, 22, 22, 23, 23,
685 24, 24, 25, 26, 26, 27, 27, 28,
686 28, 29, 29, 30, 30, 31, 32, 32,
687 33, 33, 34, 34, 35, 35, 36, 36,
688 37, 38, 38, 39, 39, 40, 40, 41,
689 41, 42, 42, 43, 44, 44, 45, 45,
690 46, 46, 47, 47, 48, 48, 49, 49,
691 };
692
693 return (pri_t)kpri_tab[l->l_usrpri];
694 }
695
696 /*
697 * sched_unsleep:
698 *
699 * The is called when the LWP has not been awoken normally but instead
700 * interrupted: for example, if the sleep timed out. Because of this,
701 * it's not a valid action for running or idle LWPs.
702 */
703 static void
704 sched_unsleep(struct lwp *l)
705 {
706
707 lwp_unlock(l);
708 panic("sched_unsleep");
709 }
710
711 inline void
712 resched_cpu(struct lwp *l)
713 {
714 struct cpu_info *ci;
715 const pri_t pri = lwp_eprio(l);
716
717 /*
718 * XXXSMP
719 * Since l->l_cpu persists across a context switch,
720 * this gives us *very weak* processor affinity, in
721 * that we notify the CPU on which the process last
722 * ran that it should try to switch.
723 *
724 * This does not guarantee that the process will run on
725 * that processor next, because another processor might
726 * grab it the next time it performs a context switch.
727 *
728 * This also does not handle the case where its last
729 * CPU is running a higher-priority process, but every
730 * other CPU is running a lower-priority process. There
731 * are ways to handle this situation, but they're not
732 * currently very pretty, and we also need to weigh the
733 * cost of moving a process from one CPU to another.
734 */
735 ci = (l->l_cpu != NULL) ? l->l_cpu : curcpu();
736 if (pri < ci->ci_schedstate.spc_curpriority)
737 cpu_need_resched(ci, 0);
738 }
739
740 static void
741 sched_changepri(struct lwp *l, pri_t pri)
742 {
743
744 LOCK_ASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
745
746 l->l_usrpri = pri;
747 if (l->l_priority < PUSER)
748 return;
749
750 if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
751 l->l_priority = pri;
752 return;
753 }
754
755 sched_dequeue(l);
756 l->l_priority = pri;
757 sched_enqueue(l, false);
758 resched_cpu(l);
759 }
760
761 static void
762 sched_lendpri(struct lwp *l, pri_t pri)
763 {
764
765 LOCK_ASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
766
767 if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
768 l->l_inheritedprio = pri;
769 return;
770 }
771
772 sched_dequeue(l);
773 l->l_inheritedprio = pri;
774 sched_enqueue(l, false);
775 resched_cpu(l);
776 }
777
778 struct lwp *
779 syncobj_noowner(wchan_t wchan)
780 {
781
782 return NULL;
783 }
784
785
786 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
787 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
788
789 /*
790 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
791 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
792 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
793 *
794 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
795 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
796 *
797 * If you dont want to bother with the faster/more-accurate formula, you
798 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
799 * (more general) method of calculating the %age of CPU used by a process.
800 */
801 #define CCPU_SHIFT (FSHIFT + 1)
802
803 /*
804 * sched_pstats:
805 *
806 * Update process statistics and check CPU resource allocation.
807 * Call scheduler-specific hook to eventually adjust process/LWP
808 * priorities.
809 *
810 * XXXSMP This needs to be reorganised in order to reduce the locking
811 * burden.
812 */
813 /* ARGSUSED */
814 void
815 sched_pstats(void *arg)
816 {
817 struct rlimit *rlim;
818 struct lwp *l;
819 struct proc *p;
820 int minslp, sig, clkhz;
821 long runtm;
822
823 sched_pstats_ticks++;
824
825 mutex_enter(&proclist_mutex);
826 PROCLIST_FOREACH(p, &allproc) {
827 /*
828 * Increment time in/out of memory and sleep time (if
829 * sleeping). We ignore overflow; with 16-bit int's
830 * (remember them?) overflow takes 45 days.
831 */
832 minslp = 2;
833 mutex_enter(&p->p_smutex);
834 runtm = p->p_rtime.tv_sec;
835 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
836 if ((l->l_flag & LW_IDLE) != 0)
837 continue;
838 lwp_lock(l);
839 runtm += l->l_rtime.tv_sec;
840 l->l_swtime++;
841 if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
842 l->l_stat == LSSUSPENDED) {
843 l->l_slptime++;
844 minslp = min(minslp, l->l_slptime);
845 } else
846 minslp = 0;
847 lwp_unlock(l);
848 }
849
850 /*
851 * Check if the process exceeds its CPU resource allocation.
852 * If over max, kill it.
853 */
854 rlim = &p->p_rlimit[RLIMIT_CPU];
855 sig = 0;
856 if (runtm >= rlim->rlim_cur) {
857 if (runtm >= rlim->rlim_max)
858 sig = SIGKILL;
859 else {
860 sig = SIGXCPU;
861 if (rlim->rlim_cur < rlim->rlim_max)
862 rlim->rlim_cur += 5;
863 }
864 }
865
866 mutex_spin_enter(&p->p_stmutex);
867 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
868 if (minslp < 1) {
869 /*
870 * p_pctcpu is only for ps.
871 */
872 clkhz = stathz != 0 ? stathz : hz;
873 #if (FSHIFT >= CCPU_SHIFT)
874 p->p_pctcpu += (clkhz == 100)?
875 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
876 100 * (((fixpt_t) p->p_cpticks)
877 << (FSHIFT - CCPU_SHIFT)) / clkhz;
878 #else
879 p->p_pctcpu += ((FSCALE - ccpu) *
880 (p->p_cpticks * FSCALE / clkhz)) >> FSHIFT;
881 #endif
882 p->p_cpticks = 0;
883 }
884
885 sched_pstats_hook(p, minslp);
886 mutex_spin_exit(&p->p_stmutex);
887 mutex_exit(&p->p_smutex);
888 if (sig) {
889 psignal(p, sig);
890 }
891 }
892 mutex_exit(&proclist_mutex);
893 uvm_meter();
894 wakeup(&lbolt);
895 callout_schedule(&sched_pstats_ch, hz);
896 }
897