kern_synch.c revision 1.177.2.23 1 /* $NetBSD: kern_synch.c,v 1.177.2.23 2007/04/04 22:28:17 ad Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000, 2004, 2006, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, by Charles M. Hannum, Andrew Doran and
10 * Daniel Sieger.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the NetBSD
23 * Foundation, Inc. and its contributors.
24 * 4. Neither the name of The NetBSD Foundation nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
29 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
30 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
31 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
32 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41 /*-
42 * Copyright (c) 1982, 1986, 1990, 1991, 1993
43 * The Regents of the University of California. All rights reserved.
44 * (c) UNIX System Laboratories, Inc.
45 * All or some portions of this file are derived from material licensed
46 * to the University of California by American Telephone and Telegraph
47 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
48 * the permission of UNIX System Laboratories, Inc.
49 *
50 * Redistribution and use in source and binary forms, with or without
51 * modification, are permitted provided that the following conditions
52 * are met:
53 * 1. Redistributions of source code must retain the above copyright
54 * notice, this list of conditions and the following disclaimer.
55 * 2. Redistributions in binary form must reproduce the above copyright
56 * notice, this list of conditions and the following disclaimer in the
57 * documentation and/or other materials provided with the distribution.
58 * 3. Neither the name of the University nor the names of its contributors
59 * may be used to endorse or promote products derived from this software
60 * without specific prior written permission.
61 *
62 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
63 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
66 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
67 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
68 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
69 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
70 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
71 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
72 * SUCH DAMAGE.
73 *
74 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95
75 */
76
77 #include <sys/cdefs.h>
78 __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.177.2.23 2007/04/04 22:28:17 ad Exp $");
79
80 #include "opt_kstack.h"
81 #include "opt_lockdebug.h"
82 #include "opt_multiprocessor.h"
83 #include "opt_perfctrs.h"
84
85 #define __MUTEX_PRIVATE
86
87 #include <sys/param.h>
88 #include <sys/systm.h>
89 #include <sys/proc.h>
90 #include <sys/kernel.h>
91 #if defined(PERFCTRS)
92 #include <sys/pmc.h>
93 #endif
94 #include <sys/cpu.h>
95 #include <sys/resourcevar.h>
96 #include <sys/sched.h>
97 #include <sys/syscall_stats.h>
98 #include <sys/sleepq.h>
99 #include <sys/lockdebug.h>
100
101 #include <uvm/uvm_extern.h>
102
103 struct callout sched_pstats_ch = CALLOUT_INITIALIZER_SETFUNC(sched_pstats, NULL);
104 unsigned int sched_pstats_ticks;
105
106 int lbolt; /* once a second sleep address */
107
108 static void sched_unsleep(struct lwp *);
109 static void sched_changepri(struct lwp *, pri_t);
110 static void sched_lendpri(struct lwp *, pri_t);
111
112 syncobj_t sleep_syncobj = {
113 SOBJ_SLEEPQ_SORTED,
114 sleepq_unsleep,
115 sleepq_changepri,
116 sleepq_lendpri,
117 syncobj_noowner,
118 };
119
120 syncobj_t sched_syncobj = {
121 SOBJ_SLEEPQ_SORTED,
122 sched_unsleep,
123 sched_changepri,
124 sched_lendpri,
125 syncobj_noowner,
126 };
127
128 /*
129 * During autoconfiguration or after a panic, a sleep will simply lower the
130 * priority briefly to allow interrupts, then return. The priority to be
131 * used (safepri) is machine-dependent, thus this value is initialized and
132 * maintained in the machine-dependent layers. This priority will typically
133 * be 0, or the lowest priority that is safe for use on the interrupt stack;
134 * it can be made higher to block network software interrupts after panics.
135 */
136 int safepri;
137
138 /*
139 * OBSOLETE INTERFACE
140 *
141 * General sleep call. Suspends the current process until a wakeup is
142 * performed on the specified identifier. The process will then be made
143 * runnable with the specified priority. Sleeps at most timo/hz seconds (0
144 * means no timeout). If pri includes PCATCH flag, signals are checked
145 * before and after sleeping, else signals are not checked. Returns 0 if
146 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a
147 * signal needs to be delivered, ERESTART is returned if the current system
148 * call should be restarted if possible, and EINTR is returned if the system
149 * call should be interrupted by the signal (return EINTR).
150 *
151 * The interlock is held until we are on a sleep queue. The interlock will
152 * be locked before returning back to the caller unless the PNORELOCK flag
153 * is specified, in which case the interlock will always be unlocked upon
154 * return.
155 */
156 int
157 ltsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
158 volatile struct simplelock *interlock)
159 {
160 struct lwp *l = curlwp;
161 sleepq_t *sq;
162 int error, catch;
163
164 if (sleepq_dontsleep(l)) {
165 (void)sleepq_abort(NULL, 0);
166 if ((priority & PNORELOCK) != 0)
167 simple_unlock(interlock);
168 return 0;
169 }
170
171 sq = sleeptab_lookup(&sleeptab, ident);
172 sleepq_enter(sq, l);
173
174 if (interlock != NULL) {
175 LOCK_ASSERT(simple_lock_held(interlock));
176 simple_unlock(interlock);
177 }
178
179 catch = priority & PCATCH;
180 sleepq_block(sq, priority & PRIMASK, ident, wmesg, timo, catch,
181 &sleep_syncobj);
182 error = sleepq_unblock(timo, catch);
183
184 if (interlock != NULL && (priority & PNORELOCK) == 0)
185 simple_lock(interlock);
186
187 return error;
188 }
189
190 int
191 mtsleep(wchan_t ident, pri_t priority, const char *wmesg, int timo,
192 kmutex_t *mtx)
193 {
194 struct lwp *l = curlwp;
195 sleepq_t *sq;
196 int error, catch;
197
198 if (sleepq_dontsleep(l)) {
199 (void)sleepq_abort(mtx, (priority & PNORELOCK) != 0);
200 return 0;
201 }
202
203 sq = sleeptab_lookup(&sleeptab, ident);
204 sleepq_enter(sq, l);
205 mutex_exit(mtx);
206
207 catch = priority & PCATCH;
208 sleepq_block(sq, priority & PRIMASK, ident, wmesg, timo, catch,
209 &sleep_syncobj);
210 error = sleepq_unblock(timo, catch);
211
212 if ((priority & PNORELOCK) == 0)
213 mutex_enter(mtx);
214
215 return error;
216 }
217
218 /*
219 * General sleep call for situations where a wake-up is not expected.
220 */
221 int
222 kpause(const char *wmesg, bool intr, int timo, kmutex_t *mtx)
223 {
224 struct lwp *l = curlwp;
225 sleepq_t *sq;
226 int error;
227
228 if (sleepq_dontsleep(l))
229 return sleepq_abort(NULL, 0);
230
231 if (mtx != NULL)
232 mutex_exit(mtx);
233 sq = sleeptab_lookup(&sleeptab, l);
234 sleepq_enter(sq, l);
235 sleepq_block(sq, sched_kpri(l), l, wmesg, timo, intr, &sleep_syncobj);
236 error = sleepq_unblock(timo, intr);
237 if (mtx != NULL)
238 mutex_enter(mtx);
239
240 return error;
241 }
242
243 /*
244 * OBSOLETE INTERFACE
245 *
246 * Make all processes sleeping on the specified identifier runnable.
247 */
248 void
249 wakeup(wchan_t ident)
250 {
251 sleepq_t *sq;
252
253 if (cold)
254 return;
255
256 sq = sleeptab_lookup(&sleeptab, ident);
257 sleepq_wake(sq, ident, (u_int)-1);
258 }
259
260 /*
261 * OBSOLETE INTERFACE
262 *
263 * Make the highest priority process first in line on the specified
264 * identifier runnable.
265 */
266 void
267 wakeup_one(wchan_t ident)
268 {
269 sleepq_t *sq;
270
271 if (cold)
272 return;
273
274 sq = sleeptab_lookup(&sleeptab, ident);
275 sleepq_wake(sq, ident, 1);
276 }
277
278
279 /*
280 * General yield call. Puts the current process back on its run queue and
281 * performs a voluntary context switch. Should only be called when the
282 * current process explicitly requests it (eg sched_yield(2) in compat code).
283 */
284 void
285 yield(void)
286 {
287 struct lwp *l = curlwp;
288
289 KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
290 lwp_lock(l);
291 if (l->l_stat == LSONPROC) {
292 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
293 l->l_priority = l->l_usrpri;
294 }
295 l->l_nvcsw++;
296 mi_switch(l);
297 KERNEL_LOCK(l->l_biglocks, l);
298 }
299
300 /*
301 * General preemption call. Puts the current process back on its run queue
302 * and performs an involuntary context switch.
303 */
304 void
305 preempt(void)
306 {
307 struct lwp *l = curlwp;
308
309 KERNEL_UNLOCK_ALL(l, &l->l_biglocks);
310 lwp_lock(l);
311 if (l->l_stat == LSONPROC) {
312 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
313 l->l_priority = l->l_usrpri;
314 }
315 l->l_nivcsw++;
316 (void)mi_switch(l);
317 KERNEL_LOCK(l->l_biglocks, l);
318 }
319
320 /*
321 * Compute the amount of time during which the current lwp was running.
322 *
323 * - update l_rtime unless it's an idle lwp.
324 * - update spc_runtime for the next lwp.
325 */
326
327 static inline void
328 updatertime(struct lwp *l, struct schedstate_percpu *spc)
329 {
330 struct timeval tv;
331 long s, u;
332
333 if ((l->l_flag & LW_IDLE) != 0) {
334 microtime(&spc->spc_runtime);
335 return;
336 }
337
338 microtime(&tv);
339 u = l->l_rtime.tv_usec + (tv.tv_usec - spc->spc_runtime.tv_usec);
340 s = l->l_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec);
341 if (u < 0) {
342 u += 1000000;
343 s--;
344 } else if (u >= 1000000) {
345 u -= 1000000;
346 s++;
347 }
348 l->l_rtime.tv_usec = u;
349 l->l_rtime.tv_sec = s;
350
351 spc->spc_runtime = tv;
352 }
353
354 /*
355 * The machine independent parts of context switch.
356 *
357 * Returns 1 if another process was actually run.
358 */
359 int
360 mi_switch(struct lwp *l)
361 {
362 struct schedstate_percpu *spc;
363 struct lwp *newl;
364 int retval, oldspl;
365
366 LOCK_ASSERT(lwp_locked(l, NULL));
367
368 #ifdef LOCKDEBUG
369 spinlock_switchcheck();
370 simple_lock_switchcheck();
371 #endif
372 #ifdef KSTACK_CHECK_MAGIC
373 kstack_check_magic(l);
374 #endif
375
376 /*
377 * It's safe to read the per CPU schedstate unlocked here, as all we
378 * are after is the run time and that's guarenteed to have been last
379 * updated by this CPU.
380 */
381 KDASSERT(l->l_cpu == curcpu());
382 spc = &l->l_cpu->ci_schedstate;
383
384 /* Count time spent in current system call */
385 SYSCALL_TIME_SLEEP(l);
386
387 /*
388 * XXXSMP If we are using h/w performance counters, save context.
389 */
390 #if PERFCTRS
391 if (PMC_ENABLED(l->l_proc)) {
392 pmc_save_context(l->l_proc);
393 }
394 #endif
395
396 /*
397 * If on the CPU and we have gotten this far, then we must yield.
398 */
399 KASSERT(l->l_stat != LSRUN);
400 if (l->l_stat == LSONPROC) {
401 KASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
402 l->l_stat = LSRUN;
403 if ((l->l_flag & LW_IDLE) == 0) {
404 sched_enqueue(l, true);
405 }
406 }
407
408 /*
409 * Process is about to yield the CPU; clear the appropriate
410 * scheduling flags.
411 */
412 spc->spc_flags &= ~SPCF_SWITCHCLEAR;
413
414 LOCKDEBUG_BARRIER(l->l_mutex, 1);
415
416 /*
417 * Acquire the spc_mutex if necessary.
418 */
419 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
420 if (l->l_mutex != spc->spc_mutex) {
421 mutex_spin_enter(spc->spc_mutex);
422 }
423 #endif
424
425 /*
426 * Let sched_nextlwp() select the LWP to run the CPU next.
427 * If no LWP is runnable, switch to the idle LWP.
428 */
429 newl = sched_nextlwp();
430 if (newl) {
431 sched_dequeue(newl);
432 } else {
433 newl = l->l_cpu->ci_data.cpu_idlelwp;
434 KASSERT(newl != NULL);
435 }
436 KASSERT(lwp_locked(newl, spc->spc_mutex));
437 newl->l_stat = LSONPROC;
438 newl->l_cpu = l->l_cpu;
439 newl->l_flag |= LW_RUNNING;
440 cpu_did_resched();
441
442 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
443 if (l->l_mutex != spc->spc_mutex) {
444 mutex_spin_exit(spc->spc_mutex);
445 }
446 #endif
447
448 updatertime(l, spc);
449 if (l != newl) {
450 struct lwp *prevlwp;
451
452 /* Unlocked, but for statistics only. */
453 uvmexp.swtch++;
454
455 /* Save old VM context. */
456 pmap_deactivate(l);
457
458 /* Switch to the new LWP.. */
459 l->l_flag &= ~LW_RUNNING;
460 oldspl = MUTEX_SPIN_OLDSPL(l->l_cpu);
461 prevlwp = cpu_switchto(l, newl);
462
463 /*
464 * .. we have switched away and are now back so we must
465 * be the new curlwp. prevlwp is who we replaced.
466 */
467 curlwp = l;
468 if (prevlwp != NULL) {
469 curcpu()->ci_mtx_oldspl = oldspl;
470 lwp_unlock(prevlwp);
471 } else {
472 splx(oldspl);
473 }
474
475 /* Restore VM context. */
476 pmap_activate(l);
477 retval = 1;
478 } else {
479 /* Nothing to do - just unlock and return. */
480 lwp_unlock(l);
481 retval = 0;
482 }
483
484 KASSERT(l == curlwp);
485 KASSERT(l->l_stat == LSONPROC);
486
487 /*
488 * XXXSMP If we are using h/w performance counters, restore context.
489 */
490 #if PERFCTRS
491 if (PMC_ENABLED(l->l_proc)) {
492 pmc_restore_context(l->l_proc);
493 }
494 #endif
495
496 /*
497 * We're running again; record our new start time. We might
498 * be running on a new CPU now, so don't use the cached
499 * schedstate_percpu pointer.
500 */
501 SYSCALL_TIME_WAKEUP(l);
502 KDASSERT(l->l_cpu == curcpu());
503 LOCKDEBUG_BARRIER(NULL, 1);
504
505 return retval;
506 }
507
508 /*
509 * Change process state to be runnable, placing it on the run queue if it is
510 * in memory, and awakening the swapper if it isn't in memory.
511 *
512 * Call with the process and LWP locked. Will return with the LWP unlocked.
513 */
514 void
515 setrunnable(struct lwp *l)
516 {
517 struct proc *p = l->l_proc;
518 sigset_t *ss;
519
520 KASSERT((l->l_flag & LW_IDLE) == 0);
521 KASSERT(mutex_owned(&p->p_smutex));
522 KASSERT(lwp_locked(l, NULL));
523
524 switch (l->l_stat) {
525 case LSSTOP:
526 /*
527 * If we're being traced (possibly because someone attached us
528 * while we were stopped), check for a signal from the debugger.
529 */
530 if ((p->p_slflag & PSL_TRACED) != 0 && p->p_xstat != 0) {
531 if ((sigprop[p->p_xstat] & SA_TOLWP) != 0)
532 ss = &l->l_sigpend.sp_set;
533 else
534 ss = &p->p_sigpend.sp_set;
535 sigaddset(ss, p->p_xstat);
536 signotify(l);
537 }
538 p->p_nrlwps++;
539 break;
540 case LSSUSPENDED:
541 l->l_flag &= ~LW_WSUSPEND;
542 p->p_nrlwps++;
543 break;
544 case LSSLEEP:
545 KASSERT(l->l_wchan != NULL);
546 break;
547 default:
548 panic("setrunnable: lwp %p state was %d", l, l->l_stat);
549 }
550
551 /*
552 * If the LWP was sleeping interruptably, then it's OK to start it
553 * again. If not, mark it as still sleeping.
554 */
555 if (l->l_wchan != NULL) {
556 l->l_stat = LSSLEEP;
557 /* lwp_unsleep() will release the lock. */
558 lwp_unsleep(l);
559 return;
560 }
561
562 LOCK_ASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
563
564 /*
565 * If the LWP is still on the CPU, mark it as LSONPROC. It may be
566 * about to call mi_switch(), in which case it will yield.
567 */
568 if ((l->l_flag & LW_RUNNING) != 0) {
569 l->l_stat = LSONPROC;
570 l->l_slptime = 0;
571 lwp_unlock(l);
572 return;
573 }
574
575 /*
576 * Set the LWP runnable. If it's swapped out, we need to wake the swapper
577 * to bring it back in. Otherwise, enter it into a run queue.
578 */
579 sched_setrunnable(l);
580 l->l_stat = LSRUN;
581 l->l_slptime = 0;
582
583 if (l->l_flag & LW_INMEM) {
584 sched_enqueue(l, false);
585 resched_cpu(l);
586 lwp_unlock(l);
587 } else {
588 lwp_unlock(l);
589 uvm_kick_scheduler();
590 }
591 }
592
593 /*
594 * suspendsched:
595 *
596 * Convert all non-L_SYSTEM LSSLEEP or LSRUN LWPs to LSSUSPENDED.
597 */
598 void
599 suspendsched(void)
600 {
601 #ifdef MULTIPROCESSOR
602 CPU_INFO_ITERATOR cii;
603 struct cpu_info *ci;
604 #endif
605 struct lwp *l;
606 struct proc *p;
607
608 /*
609 * We do this by process in order not to violate the locking rules.
610 */
611 mutex_enter(&proclist_mutex);
612 PROCLIST_FOREACH(p, &allproc) {
613 mutex_enter(&p->p_smutex);
614
615 if ((p->p_flag & PK_SYSTEM) != 0) {
616 mutex_exit(&p->p_smutex);
617 continue;
618 }
619
620 p->p_stat = SSTOP;
621
622 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
623 if (l == curlwp)
624 continue;
625
626 lwp_lock(l);
627
628 /*
629 * Set L_WREBOOT so that the LWP will suspend itself
630 * when it tries to return to user mode. We want to
631 * try and get to get as many LWPs as possible to
632 * the user / kernel boundary, so that they will
633 * release any locks that they hold.
634 */
635 l->l_flag |= (LW_WREBOOT | LW_WSUSPEND);
636
637 if (l->l_stat == LSSLEEP &&
638 (l->l_flag & LW_SINTR) != 0) {
639 /* setrunnable() will release the lock. */
640 setrunnable(l);
641 continue;
642 }
643
644 lwp_unlock(l);
645 }
646
647 mutex_exit(&p->p_smutex);
648 }
649 mutex_exit(&proclist_mutex);
650
651 /*
652 * Kick all CPUs to make them preempt any LWPs running in user mode.
653 * They'll trap into the kernel and suspend themselves in userret().
654 */
655 #ifdef MULTIPROCESSOR
656 for (CPU_INFO_FOREACH(cii, ci))
657 cpu_need_resched(ci, 0);
658 #else
659 cpu_need_resched(curcpu(), 0);
660 #endif
661 }
662
663 /*
664 * sched_kpri:
665 *
666 * Scale a priority level to a kernel priority level, usually
667 * for an LWP that is about to sleep.
668 */
669 pri_t
670 sched_kpri(struct lwp *l)
671 {
672 /*
673 * Scale user priorities (127 -> 50) up to kernel priorities
674 * in the range (49 -> 8). Reserve the top 8 kernel priorities
675 * for high priority kthreads. Kernel priorities passed in
676 * are left "as is". XXX This is somewhat arbitrary.
677 */
678 static const uint8_t kpri_tab[] = {
679 0, 1, 2, 3, 4, 5, 6, 7,
680 8, 9, 10, 11, 12, 13, 14, 15,
681 16, 17, 18, 19, 20, 21, 22, 23,
682 24, 25, 26, 27, 28, 29, 30, 31,
683 32, 33, 34, 35, 36, 37, 38, 39,
684 40, 41, 42, 43, 44, 45, 46, 47,
685 48, 49, 8, 8, 9, 9, 10, 10,
686 11, 11, 12, 12, 13, 14, 14, 15,
687 15, 16, 16, 17, 17, 18, 18, 19,
688 20, 20, 21, 21, 22, 22, 23, 23,
689 24, 24, 25, 26, 26, 27, 27, 28,
690 28, 29, 29, 30, 30, 31, 32, 32,
691 33, 33, 34, 34, 35, 35, 36, 36,
692 37, 38, 38, 39, 39, 40, 40, 41,
693 41, 42, 42, 43, 44, 44, 45, 45,
694 46, 46, 47, 47, 48, 48, 49, 49,
695 };
696
697 return (pri_t)kpri_tab[l->l_usrpri];
698 }
699
700 /*
701 * sched_unsleep:
702 *
703 * The is called when the LWP has not been awoken normally but instead
704 * interrupted: for example, if the sleep timed out. Because of this,
705 * it's not a valid action for running or idle LWPs.
706 */
707 static void
708 sched_unsleep(struct lwp *l)
709 {
710
711 lwp_unlock(l);
712 panic("sched_unsleep");
713 }
714
715 inline void
716 resched_cpu(struct lwp *l)
717 {
718 struct cpu_info *ci;
719 const pri_t pri = lwp_eprio(l);
720
721 /*
722 * XXXSMP
723 * Since l->l_cpu persists across a context switch,
724 * this gives us *very weak* processor affinity, in
725 * that we notify the CPU on which the process last
726 * ran that it should try to switch.
727 *
728 * This does not guarantee that the process will run on
729 * that processor next, because another processor might
730 * grab it the next time it performs a context switch.
731 *
732 * This also does not handle the case where its last
733 * CPU is running a higher-priority process, but every
734 * other CPU is running a lower-priority process. There
735 * are ways to handle this situation, but they're not
736 * currently very pretty, and we also need to weigh the
737 * cost of moving a process from one CPU to another.
738 */
739 ci = (l->l_cpu != NULL) ? l->l_cpu : curcpu();
740 if (pri < ci->ci_schedstate.spc_curpriority)
741 cpu_need_resched(ci, 0);
742 }
743
744 static void
745 sched_changepri(struct lwp *l, pri_t pri)
746 {
747
748 LOCK_ASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
749
750 l->l_usrpri = pri;
751 if (l->l_priority < PUSER)
752 return;
753
754 if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
755 l->l_priority = pri;
756 return;
757 }
758
759 sched_dequeue(l);
760 l->l_priority = pri;
761 sched_enqueue(l, false);
762 resched_cpu(l);
763 }
764
765 static void
766 sched_lendpri(struct lwp *l, pri_t pri)
767 {
768
769 LOCK_ASSERT(lwp_locked(l, l->l_cpu->ci_schedstate.spc_mutex));
770
771 if (l->l_stat != LSRUN || (l->l_flag & LW_INMEM) == 0) {
772 l->l_inheritedprio = pri;
773 return;
774 }
775
776 sched_dequeue(l);
777 l->l_inheritedprio = pri;
778 sched_enqueue(l, false);
779 resched_cpu(l);
780 }
781
782 struct lwp *
783 syncobj_noowner(wchan_t wchan)
784 {
785
786 return NULL;
787 }
788
789
790 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
791 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
792
793 /*
794 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
795 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
796 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
797 *
798 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
799 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
800 *
801 * If you dont want to bother with the faster/more-accurate formula, you
802 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
803 * (more general) method of calculating the %age of CPU used by a process.
804 */
805 #define CCPU_SHIFT (FSHIFT + 1)
806
807 /*
808 * sched_pstats:
809 *
810 * Update process statistics and check CPU resource allocation.
811 * Call scheduler-specific hook to eventually adjust process/LWP
812 * priorities.
813 *
814 * XXXSMP This needs to be reorganised in order to reduce the locking
815 * burden.
816 */
817 /* ARGSUSED */
818 void
819 sched_pstats(void *arg)
820 {
821 struct rlimit *rlim;
822 struct lwp *l;
823 struct proc *p;
824 int minslp, sig, clkhz;
825 long runtm;
826
827 sched_pstats_ticks++;
828
829 mutex_enter(&proclist_mutex);
830 PROCLIST_FOREACH(p, &allproc) {
831 /*
832 * Increment time in/out of memory and sleep time (if
833 * sleeping). We ignore overflow; with 16-bit int's
834 * (remember them?) overflow takes 45 days.
835 */
836 minslp = 2;
837 mutex_enter(&p->p_smutex);
838 runtm = p->p_rtime.tv_sec;
839 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
840 if ((l->l_flag & LW_IDLE) != 0)
841 continue;
842 lwp_lock(l);
843 runtm += l->l_rtime.tv_sec;
844 l->l_swtime++;
845 if (l->l_stat == LSSLEEP || l->l_stat == LSSTOP ||
846 l->l_stat == LSSUSPENDED) {
847 l->l_slptime++;
848 minslp = min(minslp, l->l_slptime);
849 } else
850 minslp = 0;
851 lwp_unlock(l);
852 }
853
854 /*
855 * Check if the process exceeds its CPU resource allocation.
856 * If over max, kill it.
857 */
858 rlim = &p->p_rlimit[RLIMIT_CPU];
859 sig = 0;
860 if (runtm >= rlim->rlim_cur) {
861 if (runtm >= rlim->rlim_max)
862 sig = SIGKILL;
863 else {
864 sig = SIGXCPU;
865 if (rlim->rlim_cur < rlim->rlim_max)
866 rlim->rlim_cur += 5;
867 }
868 }
869
870 mutex_spin_enter(&p->p_stmutex);
871 if (minslp < 1) {
872 /*
873 * p_pctcpu is only for ps.
874 */
875 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
876 clkhz = stathz != 0 ? stathz : hz;
877 #if (FSHIFT >= CCPU_SHIFT)
878 p->p_pctcpu += (clkhz == 100)?
879 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
880 100 * (((fixpt_t) p->p_cpticks)
881 << (FSHIFT - CCPU_SHIFT)) / clkhz;
882 #else
883 p->p_pctcpu += ((FSCALE - ccpu) *
884 (p->p_cpticks * FSCALE / clkhz)) >> FSHIFT;
885 #endif
886 p->p_cpticks = 0;
887 }
888
889 sched_pstats_hook(p, minslp);
890 mutex_spin_exit(&p->p_stmutex);
891 mutex_exit(&p->p_smutex);
892 if (sig) {
893 psignal(p, sig);
894 }
895 }
896 mutex_exit(&proclist_mutex);
897 uvm_meter();
898 wakeup(&lbolt);
899 callout_schedule(&sched_pstats_ch, hz);
900 }
901