kern_synch.c revision 1.177.2.21 1 /* $NetBSD: kern_synch.c,v 1.177.2.21 2007/04/02 00:28:08 rmind Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and
10 * Daniel Sieger.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*-
42 * Copyright (c) 1982, 1986, 1990, 1991, 1993
43 * The Regents of the University of California. All rights reserved.
44 * (c) UNIX System Laboratories, Inc.
45 * All or some portions of this file are derived from material licensed
46 * to the University of California by American Telephone and Telegraph
47 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
48 * the permission of UNIX System Laboratories, Inc.
49 *
50 * Redistribution and use in source and binary forms, with or without
51 * modification, are permitted provided that the following conditions
52 * are met:
53 * 1. Redistributions of source code must retain the above copyright
54 * notice, this list of conditions and the following disclaimer.
55 * 2. Redistributions in binary form must reproduce the above copyright
56 * notice, this list of conditions and the following disclaimer in the
57 * documentation and/or other materials provided with the distribution.
58 * 3. Neither the name of the University nor the names of its contributors
59 * may be used to endorse or promote products derived from this software
60 * without specific prior written permission.
61 *
62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
72 * SUCH DAMAGE.
73 *
74 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95
75 */
76
77 #include <sys/cdefs.h>
78 __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.177.2.21 2007/04/02 00:28:08 rmind Exp $");
79
80 #include "opt_kstack.h"
81 #include "opt_lockdebug.h"
82 #include "opt_multiprocessor.h"
83 #include "opt_perfctrs.h"
84
85 #define __MUTEX_PRIVATE
86
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/proc.h>
90 #include <sys/kernel.h>
91 #if defined(PERFCTRS)
92 #include <sys/pmc.h>
93 #endif
94 #include <sys/cpu.h>
95 #include <sys/resourcevar.h>
96 #include <sys/sched.h>
97 #include <sys/syscall_stats.h>
98 #include <sys/sleepq.h>
99 #include <sys/lockdebug.h>
100
101 #include <uvm/uvm_extern.h>
102
103 struct callout sched_pstats_ch = CALLOUT_INITIALIZER_SETFUNC(sched_pstats, NULL);
104 unsigned int sched_pstats_ticks;
105
106 int lbolt; /* once a second sleep address */
107
108 static void sched_unsleep(struct lwp *);
109 static void sched_changepri(struct lwp *, pri_t);
110 static void sched_lendpri(struct lwp *, pri_t);
111
112 syncobj_t sleep_syncobj = {
113 SOBJ_SLEEPQ_SORTED,
114 sleepq_unsleep,
115 sleepq_changepri,
116 sleepq_lendpri,
117 syncobj_noowner,
118 };
119
120 syncobj_t sched_syncobj = {
121 SOBJ_SLEEPQ_SORTED,
122 sched_unsleep,
123 sched_changepri,
124 sched_lendpri,
125 syncobj_noowner,
126 };
127
128 /*
129 * During autoconfiguration or after a panic, a sleep will simply lower the
130 * priority briefly to allow interrupts, then return. The priority to be
131 * used (safepri) is machine-dependent, thus this value is initialized and
132 * maintained in the machine-dependent layers. This priority will typically
133 * be 0, or the lowest priority that is safe for use on the interrupt stack;
134 * it can be made higher to block network software interrupts after panics.
135 */
136 int safepri;
137
138 /*
139 * OBSOLETE INTERFACE
140 *
141 * General sleep call. Suspends the current process until a wakeup is
142 * performed on the specified identifier. The process will then be made
143 * runnable with the specified priority. Sleeps at most timo/hz seconds (0
144 * means no timeout). If pri includes PCATCH flag, signals are checked
145 * before and after sleeping, else signals are not checked. Returns 0 if
146 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a
147 * signal needs to be delivered, ERESTART is returned if the current system
148 * call should be restarted if possible, and EINTR is returned if the system
149 * call should be interrupted by the signal (return EINTR).
150 *
151 * The interlock is held until we are on a sleep queue. The interlock will
152 * be locked before returning back to the caller unless the PNORELOCK flag
153 * is specified, in which case the interlock will always be unlocked upon
154 * return.
155 */
156 int
157 ltsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
158 volatile struct simplelock *interlock)
159 {
160 struct lwp *l = curlwp;
161 sleepq_t *sq;
162 int error, catch;
163
164 if (sleepq_dontsleep(l)) {
165 (void)sleepq_abort(NULL, 0);
166 if ((priority & PNORELOCK) != 0)
167 simple_unlock(interlock);
168 return 0;
169 }
170
171 sq = sleeptab_lookup(&sleeptab, ident);
172 sleepq_enter(sq, l);
173
174 if (interlock != NULL) {
175 LOCK_ASSERT(simple_lock_held(interlock));
176 simple_unlock(interlock);
177 }
178
179 catch = priority & PCATCH;
180 sleepq_block(sq, priority & PRIMASK, ident, wmesg, timo, catch,
181 &sleep_syncobj);
182 error = sleepq_unblock(timo, catch);
183
184 if (interlock != NULL && (priority & PNORELOCK) == 0)
185 simple_lock(interlock);
186
187 return error;
188 }
189
190 int
191 mtsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
192 kmutex_t *mtx)
193 {
194 struct lwp *l = curlwp;
195 sleepq_t *sq;
196 int error, catch;
197
198 if (sleepq_dontsleep(l)) {
199 (void)sleepq_abort(mtx, (priority & PNORELOCK) != 0);
200 return 0;
201 }
202
203 sq = sleeptab_lookup(&sleeptab, ident);
204 sleepq_enter(sq, l);
205 mutex_exit(mtx);
206
207 catch = priority & PCATCH;
208 sleepq_block(sq, priority & PRIMASK, ident, wmesg, timo, catch,
209 &sleep_syncobj);
210 error = sleepq_unblock(timo, catch);
211
212 if ((priority & PNORELOCK) == 0)
213 mutex_enter(mtx);
214
215 return error;
216 }
217
218 /*
219 * General sleep call for situations where a wake-up is not expected.
220 */
221 int
222 kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx)
223 {
224 struct lwp *l = curlwp;
225 sleepq_t *sq;
226 int error;
227
228 if (sleepq_dontsleep(l))
229 return sleepq_abort(NULL, 0);
230
231 if (mtx != NULL)
232 mutex_exit(mtx);
233 sq = sleeptab_lookup(&sleeptab, l);
234 sleepq_enter(sq, l);
235 sleepq_block(sq, sched_kpri(l), l, wmesg, timo, intr, &sleep_syncobj);
236 error = sleepq_unblock(timo, intr);
237 if (mtx != NULL)
238 mutex_enter(mtx);
239
240 return error;
241 }
242
243 /*
244 * OBSOLETE INTERFACE
245 *
246 * Make all processes sleeping on the specified identifier runnable.
247 */
248 void
249 wakeup(wchan_t ident)
250 {
251 sleepq_t *sq;
252
253 if (cold)
254 return;
255
256 sq = sleeptab_lookup(&sleeptab, ident);
257 sleepq_wake(sq, ident, (u_int)-1);
258 }
259
260 /*
261 * OBSOLETE INTERFACE
262 *
263 * Make the highest priority process first in line on the specified
264 * identifier runnable.
265 */
266 void
267 wakeup_one(wchan_t ident)
268 {
269 sleepq_t *sq;
270
271 if (cold)
272 return;
273
274 sq = sleeptab_lookup(&sleeptab, ident);
275 sleepq_wake(sq, ident, 1);
276 }
277
278
279 /*
280 * General yield call. Puts the current process back on its run queue and
281 * performs a voluntary context switch. Should only be called when the
282 * current process explicitly requests it (eg sched_yield(2) in compat code).
283 */
284 void
285 yield(void)
286 {
287 struct lwp *l = curlwp;
288
289 KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
290 lwp_lock(l);
291 if (l->l_stat == LSONPROC) {
292 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
293 l->l_priority = l->l_usrpri;
294 }
295 l->l_nvcsw++;
296 mi_switch(l);
297 KERNEL_LOCK(l->l_biglocks, l);
298 }
299
300 /*
301 * General preemption call. Puts the current process back on its run queue
302 * and performs an involuntary context switch.
303 */
304 void
305 preempt(void)
306 {
307 struct lwp *l = curlwp;
308
309 KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
310 lwp_lock(l);
311 if (l->l_stat == LSONPROC) {
312 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
313 l->l_priority = l->l_usrpri;
314 }
315 l->l_nivcsw++;
316 (void)mi_switch(l);
317 KERNEL_LOCK(l->l_biglocks, l);
318 }
319
320 /*
321 * Compute the amount of time during which the current lwp was running.
322 *
323 * - update l_rtime unless it's an idle lwp.
324 * - update spc_runtime for the next lwp.
325 */
326
327 static inline void
328 updatertime(struct lwp *l, struct schedstate_percpu *spc)
329 {
330 struct timeval tv;
331 long s, u;
332
333 if ((l->l_flag & LW_IDLE) != 0) {
334 microtime(&spc->spc_runtime);
335 return;
336 }
337
338 microtime(&tv);
339 u = l->l_rtime.tv_usec + (tv.tv_usec - spc->spc_runtime.tv_usec);
340 s = l->l_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
341 if (u < 0) {
342 u += 1000000;
343 s--;
344 } else if (u >= 1000000) {
345 u -= 1000000;
346 s++;
347 }
348 l->l_rtime.tv_usec = u;
349 l->l_rtime.tv_sec = s;
350
351 spc->spc_runtime = tv;
352 }
353
354 /*
355 * The machine independent parts of context switch.
356 *
357 * Returns 1 if another process was actually run.
358 */
359 int
360 mi_switch(struct lwp *l)
361 {
362 struct schedstate_percpu *spc;
363 struct lwp *newl;
364 int retval, oldspl;
365
366 LOCK_ASSERT(lwp_locked(l, NULL));
367
368 #ifdef LOCKDEBUG
369 spinlock_switchcheck();
370 simple_lock_switchcheck();
371 #endif
372 #ifdef KSTACK_CHECK_MAGIC
373 kstack_check_magic(l);
374 #endif
375
376 /*
377 * It's safe to read the per CPU schedstate unlocked here, as all we
378 * are after is the run time and that's guarenteed to have been last
379 * updated by this CPU.
380 */
381 KDASSERT(l->l_cpu == curcpu());
382 spc = &l->l_cpu->ci_schedstate;
383
384 /* Count time spent in current system call */
385 SYSCALL_TIME_SLEEP(l);
386
387 /*
388 * XXXSMP If we are using h/w performance counters, save context.
389 */
390 #if PERFCTRS
391 if (PMC_ENABLED(l->l_proc)) {
392 pmc_save_context(l->l_proc);
393 }
394 #endif
395
396 /*
397 * If on the CPU and we have gotten this far, then we must yield.
398 */
399 KASSERT(l->l_stat != LSRUN);
400 if (l->l_stat == LSONPROC) {
401 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
402 l->l_stat = LSRUN;
403 if ((l->l_flag & LW_IDLE) == 0) {
404 sched_enqueue(l, true);
405 }
406 }
407
408 /*
409 * Process is about to yield the CPU; clear the appropriate
410 * scheduling flags.
411 */
412 spc->spc_flags &= ~SPCF_SWITCHCLEAR;
413
414 LOCKDEBUG_BARRIER(l->l_mutex, 1);
415
416 /*
417 * Acquire the spc_mutex if necessary.
418 */
419 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
420 if (l->l_mutex != spc->spc_mutex) {
421 mutex_enter(spc->spc_mutex);
422 }
423 #endif
424 /*
425 * Let sched_nextlwp() select the LWP to run the CPU next.
426 * If no LWP is runnable, switch to the idle LWP.
427 */
428 newl = sched_nextlwp();
429 if (newl) {
430 sched_dequeue(newl);
431 } else {
432 newl = l->l_cpu->ci_data.cpu_idlelwp;
433 KASSERT(newl != NULL);
434 }
435 KASSERT(lwp_locked(newl, spc->spc_mutex));
436 newl->l_stat = LSONPROC;
437 newl->l_cpu = l->l_cpu;
438 newl->l_flag |= LW_RUNNING;
439
440 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
441 if (l->l_mutex != spc->spc_mutex) {
442 mutex_exit(spc->spc_mutex);
443 }
444 #endif
445
446 updatertime(l, spc);
447 if (l != newl) {
448 struct lwp *prevlwp;
449
450 /* Unlocked, but for statistics only. */
451 uvmexp.swtch++;
452
453 /* Save old VM context. */
454 pmap_deactivate(l);
455
456 /* Switch to the new LWP.. */
457 l->l_flag &= ~LW_RUNNING;
458 oldspl = MUTEX_SPIN_OLDSPL(l->l_cpu);
459 prevlwp = cpu_switchto(l, newl);
460
461 /* .. we have switched. */
462 curlwp = l;
463 if (prevlwp != NULL) {
464 curcpu()->ci_mtx_oldspl = oldspl;
465 lwp_unlock(prevlwp);
466 } else {
467 splx(oldspl);
468 }
469
470 /* Restore VM context. */
471 pmap_activate(l);
472 retval = 1;
473 } else {
474 /* Nothing to do - just unlock and return. */
475 lwp_unlock(l);
476 retval = 0;
477 }
478
479 KASSERT(l == curlwp);
480 KASSERT(l->l_stat == LSONPROC);
481
482 /*
483 * XXXSMP If we are using h/w performance counters, restore context.
484 */
485 #if PERFCTRS
486 if (PMC_ENABLED(l->l_proc)) {
487 pmc_restore_context(l->l_proc);
488 }
489 #endif
490
491 /*
492 * We're running again; record our new start time. We might
493 * be running on a new CPU now, so don't use the cached
494 * schedstate_percpu pointer.
495 */
496 SYSCALL_TIME_WAKEUP(l);
497 KDASSERT(l->l_cpu == curcpu());
498 LOCKDEBUG_BARRIER(NULL, 1);
499
500 return retval;
501 }
502
503 /*
504 * Change process state to be runnable, placing it on the run queue if it is
505 * in memory, and awakening the swapper if it isn't in memory.
506 *
507 * Call with the process and LWP locked. Will return with the LWP unlocked.
508 */
509 void
510 setrunnable(struct lwp *l)
511 {
512 struct proc *p = l->l_proc;
513 sigset_t *ss;
514
515 KASSERT((l->l_flag & LW_IDLE) == 0);
516 KASSERT(mutex_owned(&p->p_smutex));
517 KASSERT(lwp_locked(l, NULL));
518
519 switch (l->l_stat) {
520 case LSSTOP:
521 /*
522 * If we're being traced (possibly because someone attached us
523 * while we were stopped), check for a signal from the debugger.
524 */
525 if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xstat != 0) {
526 if ((sigprop[p->p_xstat] & SA_TOLWP) != 0)
527 ss = &l->l_sigpend.sp_set;
528 else
529 ss = &p->p_sigpend.sp_set;
530 sigaddset(ss, p->p_xstat);
531 signotify(l);
532 }
533 p->p_nrlwps++;
534 break;
535 case LSSUSPENDED:
536 l->l_flag &= ~LW_WSUSPEND;
537 p->p_nrlwps++;
538 break;
539 case LSSLEEP:
540 KASSERT(l->l_wchan != NULL);
541 break;
542 default:
543 panic("setrunnable: lwp %p state was %d", l, l->l_stat);
544 }
545
546 /*
547 * If the LWP was sleeping interruptably, then it's OK to start it
548 * again. If not, mark it as still sleeping.
549 */
550 if (l->l_wchan != NULL) {
551 l->l_stat = LSSLEEP;
552 /* lwp_unsleep() will release the lock. */
553 lwp_unsleep(l);
554 return;
555 }
556
557 LOCK_ASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
558
559 /*
560 * If the LWP is still on the CPU, mark it as LSONPROC. It may be
561 * about to call mi_switch(), in which case it will yield.
562 */
563 if ((l->l_flag & LW_RUNNING) != 0) {
564 l->l_stat = LSONPROC;
565 l->l_slptime = 0;
566 lwp_unlock(l);
567 return;
568 }
569
570 /*
571 * Set the LWP runnable. If it's swapped out, we need to wake the swapper
572 * to bring it back in. Otherwise, enter it into a run queue.
573 */
574 sched_setrunnable(l);
575 l->l_stat = LSRUN;
576 l->l_slptime = 0;
577
578 if (l->l_flag & LW_INMEM) {
579 sched_enqueue(l, false);
580 resched_cpu(l);
581 lwp_unlock(l);
582 } else {
583 lwp_unlock(l);
584 uvm_kick_scheduler();
585 }
586 }
587
588 /*
589 * suspendsched:
590 *
591 * Convert all non-L_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED.
592 */
593 void
594 suspendsched(void)
595 {
596 #ifdef MULTIPROCESSOR
597 CPU_INFO_ITERATOR cii;
598 struct cpu_info *ci;
599 #endif
600 struct lwp *l;
601 struct proc *p;
602
603 /*
604 * We do this by process in order not to violate the locking rules.
605 */
606 mutex_enter(&proclist_mutex);
607 PROCLIST_FOREACH(p, &allproc) {
608 mutex_enter(&p->p_smutex);
609
610 if ((p->p_flag & PK_SYSTEM) != 0) {
611 mutex_exit(&p->p_smutex);
612 continue;
613 }
614
615 p->p_stat = SSTOP;
616
617 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
618 if (l == curlwp)
619 continue;
620
621 lwp_lock(l);
622
623 /*
624 * Set L_WREBOOT so that the LWP will suspend itself
625 * when it tries to return to user mode. We want to
626 * try and get to get as many LWPs as possible to
627 * the user / kernel boundary, so that they will
628 * release any locks that they hold.
629 */
630 l->l_flag |= (LW_WREBOOT | LW_WSUSPEND);
631
632 if (l->l_stat == LSSLEEP &&
633 (l->l_flag & LW_SINTR) != 0) {
634 /* setrunnable() will release the lock. */
635 setrunnable(l);
636 continue;
637 }
638
639 lwp_unlock(l);
640 }
641
642 mutex_exit(&p->p_smutex);
643 }
644 mutex_exit(&proclist_mutex);
645
646 /*
647 * Kick all CPUs to make them preempt any LWPs running in user mode.
648 * They'll trap into the kernel and suspend themselves in userret().
649 */
650 #ifdef MULTIPROCESSOR
651 for (CPU_INFO_FOREACH(cii, ci))
652 cpu_need_resched(ci, 0);
653 #else
654 cpu_need_resched(curcpu(), 0);
655 #endif
656 }
657
658 /*
659 * sched_kpri:
660 *
661 * Scale a priority level to a kernel priority level, usually
662 * for an LWP that is about to sleep.
663 */
664 pri_t
665 sched_kpri(struct lwp *l)
666 {
667 /*
668 * Scale user priorities (127 -> 50) up to kernel priorities
669 * in the range (49 -> 8). Reserve the top 8 kernel priorities
670 * for high priority kthreads. Kernel priorities passed in
671 * are left "as is". XXX This is somewhat arbitrary.
672 */
673 static const uint8_t kpri_tab[] = {
674 0, 1, 2, 3, 4, 5, 6, 7,
675 8, 9, 10, 11, 12, 13, 14, 15,
676 16, 17, 18, 19, 20, 21, 22, 23,
677 24, 25, 26, 27, 28, 29, 30, 31,
678 32, 33, 34, 35, 36, 37, 38, 39,
679 40, 41, 42, 43, 44, 45, 46, 47,
680 48, 49, 8, 8, 9, 9, 10, 10,
681 11, 11, 12, 12, 13, 14, 14, 15,
682 15, 16, 16, 17, 17, 18, 18, 19,
683 20, 20, 21, 21, 22, 22, 23, 23,
684 24, 24, 25, 26, 26, 27, 27, 28,
685 28, 29, 29, 30, 30, 31, 32, 32,
686 33, 33, 34, 34, 35, 35, 36, 36,
687 37, 38, 38, 39, 39, 40, 40, 41,
688 41, 42, 42, 43, 44, 44, 45, 45,
689 46, 46, 47, 47, 48, 48, 49, 49,
690 };
691
692 return (pri_t)kpri_tab[l->l_usrpri];
693 }
694
695 /*
696 * sched_unsleep:
697 *
698 * The is called when the LWP has not been awoken normally but instead
699 * interrupted: for example, if the sleep timed out. Because of this,
700 * it's not a valid action for running or idle LWPs.
701 */
702 static void
703 sched_unsleep(struct lwp *l)
704 {
705
706 lwp_unlock(l);
707 panic("sched_unsleep");
708 }
709
710 inline void
711 resched_cpu(struct lwp *l)
712 {
713 struct cpu_info *ci;
714 const pri_t pri = lwp_eprio(l);
715
716 /*
717 * XXXSMP
718 * Since l->l_cpu persists across a context switch,
719 * this gives us *very weak* processor affinity, in
720 * that we notify the CPU on which the process last
721 * ran that it should try to switch.
722 *
723 * This does not guarantee that the process will run on
724 * that processor next, because another processor might
725 * grab it the next time it performs a context switch.
726 *
727 * This also does not handle the case where its last
728 * CPU is running a higher-priority process, but every
729 * other CPU is running a lower-priority process. There
730 * are ways to handle this situation, but they're not
731 * currently very pretty, and we also need to weigh the
732 * cost of moving a process from one CPU to another.
733 */
734 ci = (l->l_cpu != NULL) ? l->l_cpu : curcpu();
735 if (pri < ci->ci_schedstate.spc_curpriority)
736 cpu_need_resched(ci, 0);
737 }
738
739 static void
740 sched_changepri(struct lwp *l, pri_t pri)
741 {
742
743 LOCK_ASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
744
745 l->l_usrpri = pri;
746 if (l->l_priority < PUSER)
747 return;
748
749 if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
750 l->l_priority = pri;
751 return;
752 }
753
754 sched_dequeue(l);
755 l->l_priority = pri;
756 sched_enqueue(l, false);
757 resched_cpu(l);
758 }
759
760 static void
761 sched_lendpri(struct lwp *l, pri_t pri)
762 {
763
764 LOCK_ASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
765
766 if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
767 l->l_inheritedprio = pri;
768 return;
769 }
770
771 sched_dequeue(l);
772 l->l_inheritedprio = pri;
773 sched_enqueue(l, false);
774 resched_cpu(l);
775 }
776
777 struct lwp *
778 syncobj_noowner(wchan_t wchan)
779 {
780
781 return NULL;
782 }
783
784
785 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
786 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
787
788 /*
789 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
790 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
791 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
792 *
793 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
794 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
795 *
796 * If you dont want to bother with the faster/more-accurate formula, you
797 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
798 * (more general) method of calculating the %age of CPU used by a process.
799 */
800 #define CCPU_SHIFT (FSHIFT + 1)
801
802 /*
803 * sched_pstats:
804 *
805 * Update process statistics and check CPU resource allocation.
806 * Call scheduler-specific hook to eventually adjust process/LWP
807 * priorities.
808 *
809 * XXXSMP This needs to be reorganised in order to reduce the locking
810 * burden.
811 */
812 /* ARGSUSED */
813 void
814 sched_pstats(void *arg)
815 {
816 struct rlimit *rlim;
817 struct lwp *l;
818 struct proc *p;
819 int minslp, sig, clkhz;
820 long runtm;
821
822 sched_pstats_ticks++;
823
824 mutex_enter(&proclist_mutex);
825 PROCLIST_FOREACH(p, &allproc) {
826 /*
827 * Increment time in/out of memory and sleep time (if
828 * sleeping). We ignore overflow; with 16-bit int's
829 * (remember them?) overflow takes 45 days.
830 */
831 minslp = 2;
832 mutex_enter(&p->p_smutex);
833 runtm = p->p_rtime.tv_sec;
834 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
835 if ((l->l_flag & LW_IDLE) != 0)
836 continue;
837 lwp_lock(l);
838 runtm += l->l_rtime.tv_sec;
839 l->l_swtime++;
840 if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
841 l->l_stat == LSSUSPENDED) {
842 l->l_slptime++;
843 minslp = min(minslp, l->l_slptime);
844 } else
845 minslp = 0;
846 lwp_unlock(l);
847 }
848
849 /*
850 * Check if the process exceeds its CPU resource allocation.
851 * If over max, kill it.
852 */
853 rlim = &p->p_rlimit[RLIMIT_CPU];
854 sig = 0;
855 if (runtm >= rlim->rlim_cur) {
856 if (runtm >= rlim->rlim_max)
857 sig = SIGKILL;
858 else {
859 sig = SIGXCPU;
860 if (rlim->rlim_cur < rlim->rlim_max)
861 rlim->rlim_cur += 5;
862 }
863 }
864
865 mutex_spin_enter(&p->p_stmutex);
866 if (minslp < 1) {
867 /*
868 * p_pctcpu is only for ps.
869 */
870 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
871 clkhz = stathz != 0 ? stathz : hz;
872 #if (FSHIFT >= CCPU_SHIFT)
873 p->p_pctcpu += (clkhz == 100)?
874 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
875 100 * (((fixpt_t) p->p_cpticks)
876 << (FSHIFT - CCPU_SHIFT)) / clkhz;
877 #else
878 p->p_pctcpu += ((FSCALE - ccpu) *
879 (p->p_cpticks * FSCALE / clkhz)) >> FSHIFT;
880 #endif
881 p->p_cpticks = 0;
882 }
883
884 sched_pstats_hook(p, minslp);
885 mutex_spin_exit(&p->p_stmutex);
886 mutex_exit(&p->p_smutex);
887 if (sig) {
888 psignal(p, sig);
889 }
890 }
891 mutex_exit(&proclist_mutex);
892 uvm_meter();
893 wakeup(&lbolt);
894 callout_schedule(&sched_pstats_ch, hz);
895 }
896