kern_clock.c revision 1.26 1 /* $NetBSD: kern_clock.c,v 1.26 1996/02/09 18:59:24 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1982, 1986, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
41 */
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/dkstat.h>
46 #include <sys/callout.h>
47 #include <sys/kernel.h>
48 #include <sys/proc.h>
49 #include <sys/resourcevar.h>
50 #include <sys/signalvar.h>
51 #include <sys/cpu.h>
52 #include <vm/vm.h>
53 #include <sys/sysctl.h>
54
55 #include <machine/cpu.h>
56
57 #ifdef GPROF
58 #include <sys/gmon.h>
59 #endif
60
61 /*
62 * Clock handling routines.
63 *
64 * This code is written to operate with two timers that run independently of
65 * each other. The main clock, running hz times per second, is used to keep
66 * track of real time. The second timer handles kernel and user profiling,
67 * and does resource use estimation. If the second timer is programmable,
68 * it is randomized to avoid aliasing between the two clocks. For example,
69 * the randomization prevents an adversary from always giving up the cpu
70 * just before its quantum expires. Otherwise, it would never accumulate
71 * cpu ticks. The mean frequency of the second timer is stathz.
72 *
73 * If no second timer exists, stathz will be zero; in this case we drive
74 * profiling and statistics off the main clock. This WILL NOT be accurate;
75 * do not do it unless absolutely necessary.
76 *
77 * The statistics clock may (or may not) be run at a higher rate while
78 * profiling. This profile clock runs at profhz. We require that profhz
79 * be an integral multiple of stathz.
80 *
81 * If the statistics clock is running fast, it must be divided by the ratio
82 * profhz/stathz for statistics. (For profiling, every tick counts.)
83 */
84
85 /*
86 * TODO:
87 * allocate more timeout table slots when table overflows.
88 */
89
90 /*
91 * Bump a timeval by a small number of usec's.
92 */
93 #define BUMPTIME(t, usec) { \
94 register volatile struct timeval *tp = (t); \
95 register long us; \
96 \
97 tp->tv_usec = us = tp->tv_usec + (usec); \
98 if (us >= 1000000) { \
99 tp->tv_usec = us - 1000000; \
100 tp->tv_sec++; \
101 } \
102 }
103
104 int stathz;
105 int profhz;
106 int profprocs;
107 int ticks;
108 static int psdiv, pscnt; /* prof => stat divider */
109 int psratio; /* ratio: prof / stat */
110 int tickfix, tickfixinterval; /* used if tick not really integral */
111 static int tickfixcnt; /* number of ticks since last fix */
112
113 volatile struct timeval time;
114 volatile struct timeval mono_time;
115
116 /*
117 * Initialize clock frequencies and start both clocks running.
118 */
119 void
120 initclocks()
121 {
122 register int i;
123
124 /*
125 * Set divisors to 1 (normal case) and let the machine-specific
126 * code do its bit.
127 */
128 psdiv = pscnt = 1;
129 cpu_initclocks();
130
131 /*
132 * Compute profhz/stathz, and fix profhz if needed.
133 */
134 i = stathz ? stathz : hz;
135 if (profhz == 0)
136 profhz = i;
137 psratio = profhz / i;
138 }
139
140 /*
141 * The real-time timer, interrupting hz times per second.
142 */
143 void
144 hardclock(frame)
145 register struct clockframe *frame;
146 {
147 register struct callout *p1;
148 register struct proc *p;
149 register int delta, needsoft;
150 extern int tickdelta;
151 extern long timedelta;
152
153 /*
154 * Update real-time timeout queue.
155 * At front of queue are some number of events which are ``due''.
156 * The time to these is <= 0 and if negative represents the
157 * number of ticks which have passed since it was supposed to happen.
158 * The rest of the q elements (times > 0) are events yet to happen,
159 * where the time for each is given as a delta from the previous.
160 * Decrementing just the first of these serves to decrement the time
161 * to all events.
162 */
163 needsoft = 0;
164 for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
165 if (--p1->c_time > 0)
166 break;
167 needsoft = 1;
168 if (p1->c_time == 0)
169 break;
170 }
171
172 p = curproc;
173 if (p) {
174 register struct pstats *pstats;
175
176 /*
177 * Run current process's virtual and profile time, as needed.
178 */
179 pstats = p->p_stats;
180 if (CLKF_USERMODE(frame) &&
181 timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
182 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
183 psignal(p, SIGVTALRM);
184 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
185 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
186 psignal(p, SIGPROF);
187 }
188
189 /*
190 * If no separate statistics clock is available, run it from here.
191 */
192 if (stathz == 0)
193 statclock(frame);
194
195 /*
196 * Increment the time-of-day. The increment is normally just
197 * ``tick''. If the machine is one which has a clock frequency
198 * such that ``hz'' would not divide the second evenly into
199 * milliseconds, a periodic adjustment must be applied. Finally,
200 * if we are still adjusting the time (see adjtime()),
201 * ``tickdelta'' may also be added in.
202 */
203 ticks++;
204 delta = tick;
205 if (tickfix) {
206 tickfixcnt++;
207 if (tickfixcnt >= tickfixinterval) {
208 delta += tickfix;
209 tickfixcnt = 0;
210 }
211 }
212 if (timedelta != 0) {
213 delta = tick + tickdelta;
214 timedelta -= tickdelta;
215 }
216 BUMPTIME(&time, delta);
217 BUMPTIME(&mono_time, delta);
218
219 /*
220 * Process callouts at a very low cpu priority, so we don't keep the
221 * relatively high clock interrupt priority any longer than necessary.
222 */
223 if (needsoft) {
224 if (CLKF_BASEPRI(frame)) {
225 /*
226 * Save the overhead of a software interrupt;
227 * it will happen as soon as we return, so do it now.
228 */
229 (void)splsoftclock();
230 softclock();
231 } else
232 setsoftclock();
233 }
234 }
235
236 /*
237 * Software (low priority) clock interrupt.
238 * Run periodic events from timeout queue.
239 */
240 /*ARGSUSED*/
241 void
242 softclock()
243 {
244 register struct callout *c;
245 register void *arg;
246 register void (*func) __P((void *));
247 register int s;
248
249 s = splhigh();
250 while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
251 func = c->c_func;
252 arg = c->c_arg;
253 calltodo.c_next = c->c_next;
254 c->c_next = callfree;
255 callfree = c;
256 splx(s);
257 (*func)(arg);
258 (void) splhigh();
259 }
260 splx(s);
261 }
262
263 /*
264 * timeout --
265 * Execute a function after a specified length of time.
266 *
267 * untimeout --
268 * Cancel previous timeout function call.
269 *
270 * See AT&T BCI Driver Reference Manual for specification. This
271 * implementation differs from that one in that no identification
272 * value is returned from timeout, rather, the original arguments
273 * to timeout are used to identify entries for untimeout.
274 */
275 void
276 timeout(ftn, arg, ticks)
277 void (*ftn) __P((void *));
278 void *arg;
279 register int ticks;
280 {
281 register struct callout *new, *p, *t;
282 register int s;
283
284 if (ticks <= 0)
285 ticks = 1;
286
287 /* Lock out the clock. */
288 s = splhigh();
289
290 /* Fill in the next free callout structure. */
291 if (callfree == NULL)
292 panic("timeout table full");
293 new = callfree;
294 callfree = new->c_next;
295 new->c_arg = arg;
296 new->c_func = ftn;
297
298 /*
299 * The time for each event is stored as a difference from the time
300 * of the previous event on the queue. Walk the queue, correcting
301 * the ticks argument for queue entries passed. Correct the ticks
302 * value for the queue entry immediately after the insertion point
303 * as well. Watch out for negative c_time values; these represent
304 * overdue events.
305 */
306 for (p = &calltodo;
307 (t = p->c_next) != NULL && ticks > t->c_time; p = t)
308 if (t->c_time > 0)
309 ticks -= t->c_time;
310 new->c_time = ticks;
311 if (t != NULL)
312 t->c_time -= ticks;
313
314 /* Insert the new entry into the queue. */
315 p->c_next = new;
316 new->c_next = t;
317 splx(s);
318 }
319
320 void
321 untimeout(ftn, arg)
322 void (*ftn) __P((void *));
323 void *arg;
324 {
325 register struct callout *p, *t;
326 register int s;
327
328 s = splhigh();
329 for (p = &calltodo; (t = p->c_next) != NULL; p = t)
330 if (t->c_func == ftn && t->c_arg == arg) {
331 /* Increment next entry's tick count. */
332 if (t->c_next && t->c_time > 0)
333 t->c_next->c_time += t->c_time;
334
335 /* Move entry from callout queue to callfree queue. */
336 p->c_next = t->c_next;
337 t->c_next = callfree;
338 callfree = t;
339 break;
340 }
341 splx(s);
342 }
343
344 /*
345 * Compute number of hz until specified time. Used to
346 * compute third argument to timeout() from an absolute time.
347 */
348 int
349 hzto(tv)
350 struct timeval *tv;
351 {
352 register long ticks, sec;
353 int s;
354
355 /*
356 * If number of microseconds will fit in 32 bit arithmetic,
357 * then compute number of microseconds to time and scale to
358 * ticks. Otherwise just compute number of hz in time, rounding
359 * times greater than representible to maximum value. (We must
360 * compute in microseconds, because hz can be greater than 1000,
361 * and thus tick can be less than one millisecond).
362 *
363 * Delta times less than 14 hours can be computed ``exactly''.
364 * (Note that if hz would yeild a non-integral number of us per
365 * tick, i.e. tickfix is nonzero, timouts can be a tick longer
366 * than they should be.) Maximum value for any timeout in 10ms
367 * ticks is 250 days.
368 */
369 s = splhigh();
370 sec = tv->tv_sec - time.tv_sec;
371 if (sec <= 0x7fffffff / 1000000 - 1)
372 ticks = ((tv->tv_sec - time.tv_sec) * 1000000 +
373 (tv->tv_usec - time.tv_usec)) / tick;
374 else if (sec <= 0x7fffffff / hz)
375 ticks = sec * hz;
376 else
377 ticks = 0x7fffffff;
378 splx(s);
379 return (ticks);
380 }
381
382 /*
383 * Start profiling on a process.
384 *
385 * Kernel profiling passes proc0 which never exits and hence
386 * keeps the profile clock running constantly.
387 */
388 void
389 startprofclock(p)
390 register struct proc *p;
391 {
392 int s;
393
394 if ((p->p_flag & P_PROFIL) == 0) {
395 p->p_flag |= P_PROFIL;
396 if (++profprocs == 1 && stathz != 0) {
397 s = splstatclock();
398 psdiv = pscnt = psratio;
399 setstatclockrate(profhz);
400 splx(s);
401 }
402 }
403 }
404
405 /*
406 * Stop profiling on a process.
407 */
408 void
409 stopprofclock(p)
410 register struct proc *p;
411 {
412 int s;
413
414 if (p->p_flag & P_PROFIL) {
415 p->p_flag &= ~P_PROFIL;
416 if (--profprocs == 0 && stathz != 0) {
417 s = splstatclock();
418 psdiv = pscnt = 1;
419 setstatclockrate(stathz);
420 splx(s);
421 }
422 }
423 }
424
425 /*
426 * Statistics clock. Grab profile sample, and if divider reaches 0,
427 * do process and kernel statistics.
428 */
429 void
430 statclock(frame)
431 register struct clockframe *frame;
432 {
433 #ifdef GPROF
434 register struct gmonparam *g;
435 #endif
436 register struct proc *p;
437 register int i;
438
439 if (CLKF_USERMODE(frame)) {
440 p = curproc;
441 if (p->p_flag & P_PROFIL)
442 addupc_intr(p, CLKF_PC(frame), 1);
443 if (--pscnt > 0)
444 return;
445 /*
446 * Came from user mode; CPU was in user state.
447 * If this process is being profiled record the tick.
448 */
449 p->p_uticks++;
450 if (p->p_nice > NZERO)
451 cp_time[CP_NICE]++;
452 else
453 cp_time[CP_USER]++;
454 } else {
455 #ifdef GPROF
456 /*
457 * Kernel statistics are just like addupc_intr, only easier.
458 */
459 g = &_gmonparam;
460 if (g->state == GMON_PROF_ON) {
461 i = CLKF_PC(frame) - g->lowpc;
462 if (i < g->textsize) {
463 i /= HISTFRACTION * sizeof(*g->kcount);
464 g->kcount[i]++;
465 }
466 }
467 #endif
468 if (--pscnt > 0)
469 return;
470 /*
471 * Came from kernel mode, so we were:
472 * - handling an interrupt,
473 * - doing syscall or trap work on behalf of the current
474 * user process, or
475 * - spinning in the idle loop.
476 * Whichever it is, charge the time as appropriate.
477 * Note that we charge interrupts to the current process,
478 * regardless of whether they are ``for'' that process,
479 * so that we know how much of its real time was spent
480 * in ``non-process'' (i.e., interrupt) work.
481 */
482 p = curproc;
483 if (CLKF_INTR(frame)) {
484 if (p != NULL)
485 p->p_iticks++;
486 cp_time[CP_INTR]++;
487 } else if (p != NULL) {
488 p->p_sticks++;
489 cp_time[CP_SYS]++;
490 } else
491 cp_time[CP_IDLE]++;
492 }
493 pscnt = psdiv;
494
495 /*
496 * XXX Support old-style instrumentation for now.
497 *
498 * We maintain statistics shown by user-level statistics
499 * programs: the amount of time in each cpu state, and
500 * the amount of time each of DK_NDRIVE ``drives'' is busy.
501 *
502 * XXX should either run linked list of drives, or (better)
503 * grab timestamps in the start & done code.
504 */
505 for (i = 0; i < DK_NDRIVE; i++)
506 if (dk_busy & (1 << i))
507 dk_time[i]++;
508
509 /*
510 * We adjust the priority of the current process. The priority of
511 * a process gets worse as it accumulates CPU time. The cpu usage
512 * estimator (p_estcpu) is increased here. The formula for computing
513 * priorities (in kern_synch.c) will compute a different value each
514 * time p_estcpu increases by 4. The cpu usage estimator ramps up
515 * quite quickly when the process is running (linearly), and decays
516 * away exponentially, at a rate which is proportionally slower when
517 * the system is busy. The basic principal is that the system will
518 * 90% forget that the process used a lot of CPU time in 5 * loadav
519 * seconds. This causes the system to favor processes which haven't
520 * run much recently, and to round-robin among other processes.
521 */
522 if (p != NULL) {
523 p->p_cpticks++;
524 if (++p->p_estcpu == 0)
525 p->p_estcpu--;
526 if ((p->p_estcpu & 3) == 0) {
527 resetpriority(p);
528 if (p->p_priority >= PUSER)
529 p->p_priority = p->p_usrpri;
530 }
531 }
532 }
533
534 /*
535 * Return information about system clocks.
536 */
537 int
538 sysctl_clockrate(where, sizep)
539 register char *where;
540 size_t *sizep;
541 {
542 struct clockinfo clkinfo;
543
544 /*
545 * Construct clockinfo structure.
546 */
547 clkinfo.tick = tick;
548 clkinfo.tickadj = tickadj;
549 clkinfo.hz = hz;
550 clkinfo.profhz = profhz;
551 clkinfo.stathz = stathz ? stathz : hz;
552 return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
553 }
554
555 #ifdef DDB
556 #include <machine/db_machdep.h>
557
558 #include <ddb/db_interface.h>
559 #include <ddb/db_access.h>
560 #include <ddb/db_sym.h>
561 #include <ddb/db_output.h>
562
563 void db_show_callout(addr, haddr, count, modif)
564 db_expr_t addr;
565 int haddr;
566 db_expr_t count;
567 char *modif;
568 {
569 register struct callout *p1;
570 register int cum;
571 register int s;
572 db_expr_t offset;
573 char *name;
574
575 db_printf(" cum ticks arg func\n");
576 s = splhigh();
577 for (cum = 0, p1 = calltodo.c_next; p1; p1 = p1->c_next) {
578 register int t = p1->c_time;
579
580 if (t > 0)
581 cum += t;
582
583 db_find_sym_and_offset((db_addr_t)p1->c_func, &name, &offset);
584 if (name == NULL)
585 name = "?";
586
587 db_printf("%9d %9d %8x %s (%x)\n",
588 cum, t, p1->c_arg, name, p1->c_func);
589 }
590 splx(s);
591 }
592 #endif
593