kern_clock.c revision 1.19 1 /* $NetBSD: kern_clock.c,v 1.19 1994/06/29 06:32:19 cgd Exp $ */
2
3 /*-
4 * Copyright (c) 1982, 1986, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. All advertising materials mentioning features or use of this software
21 * must display the following acknowledgement:
22 * This product includes software developed by the University of
23 * California, Berkeley and its contributors.
24 * 4. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
41 */
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/dkstat.h>
46 #include <sys/callout.h>
47 #include <sys/kernel.h>
48 #include <sys/proc.h>
49 #include <sys/resourcevar.h>
50
51 #include <machine/cpu.h>
52
53 #ifdef GPROF
54 #include <sys/gmon.h>
55 #endif
56
57 /*
58 * Clock handling routines.
59 *
60 * This code is written to operate with two timers that run independently of
61 * each other. The main clock, running hz times per second, is used to keep
62 * track of real time. The second timer handles kernel and user profiling,
63 * and does resource use estimation. If the second timer is programmable,
64 * it is randomized to avoid aliasing between the two clocks. For example,
65 * the randomization prevents an adversary from always giving up the cpu
66 * just before its quantum expires. Otherwise, it would never accumulate
67 * cpu ticks. The mean frequency of the second timer is stathz.
68 *
69 * If no second timer exists, stathz will be zero; in this case we drive
70 * profiling and statistics off the main clock. This WILL NOT be accurate;
71 * do not do it unless absolutely necessary.
72 *
73 * The statistics clock may (or may not) be run at a higher rate while
74 * profiling. This profile clock runs at profhz. We require that profhz
75 * be an integral multiple of stathz.
76 *
77 * If the statistics clock is running fast, it must be divided by the ratio
78 * profhz/stathz for statistics. (For profiling, every tick counts.)
79 */
80
81 /*
82 * TODO:
83 * allocate more timeout table slots when table overflows.
84 */
85
86 /*
87 * Bump a timeval by a small number of usec's.
88 */
89 #define BUMPTIME(t, usec) { \
90 register volatile struct timeval *tp = (t); \
91 register long us; \
92 \
93 tp->tv_usec = us = tp->tv_usec + (usec); \
94 if (us >= 1000000) { \
95 tp->tv_usec = us - 1000000; \
96 tp->tv_sec++; \
97 } \
98 }
99
100 int stathz;
101 int profhz;
102 int profprocs;
103 int ticks;
104 static int psdiv, pscnt; /* prof => stat divider */
105 int psratio; /* ratio: prof / stat */
106
107 volatile struct timeval time;
108 volatile struct timeval mono_time;
109
110 /*
111 * Initialize clock frequencies and start both clocks running.
112 */
113 void
114 initclocks()
115 {
116 register int i;
117
118 /*
119 * Set divisors to 1 (normal case) and let the machine-specific
120 * code do its bit.
121 */
122 psdiv = pscnt = 1;
123 cpu_initclocks();
124
125 /*
126 * Compute profhz/stathz, and fix profhz if needed.
127 */
128 i = stathz ? stathz : hz;
129 if (profhz == 0)
130 profhz = i;
131 psratio = profhz / i;
132 }
133
134 /*
135 * The real-time timer, interrupting hz times per second.
136 */
137 void
138 hardclock(frame)
139 register struct clockframe *frame;
140 {
141 register struct callout *p1;
142 register struct proc *p;
143 register int delta, needsoft;
144 extern int tickdelta;
145 extern long timedelta;
146
147 /*
148 * Update real-time timeout queue.
149 * At front of queue are some number of events which are ``due''.
150 * The time to these is <= 0 and if negative represents the
151 * number of ticks which have passed since it was supposed to happen.
152 * The rest of the q elements (times > 0) are events yet to happen,
153 * where the time for each is given as a delta from the previous.
154 * Decrementing just the first of these serves to decrement the time
155 * to all events.
156 */
157 needsoft = 0;
158 for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
159 if (--p1->c_time > 0)
160 break;
161 needsoft = 1;
162 if (p1->c_time == 0)
163 break;
164 }
165
166 p = curproc;
167 if (p) {
168 register struct pstats *pstats;
169
170 /*
171 * Run current process's virtual and profile time, as needed.
172 */
173 pstats = p->p_stats;
174 if (CLKF_USERMODE(frame) &&
175 timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
176 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
177 psignal(p, SIGVTALRM);
178 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
179 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
180 psignal(p, SIGPROF);
181 }
182
183 /*
184 * If no separate statistics clock is available, run it from here.
185 */
186 if (stathz == 0)
187 statclock(frame);
188
189 /*
190 * Increment the time-of-day. The increment is just ``tick'' unless
191 * we are still adjusting the clock; see adjtime().
192 */
193 ticks++;
194 if (timedelta == 0)
195 delta = tick;
196 else {
197 delta = tick + tickdelta;
198 timedelta -= tickdelta;
199 }
200 BUMPTIME(&time, delta);
201 BUMPTIME(&mono_time, delta);
202
203 /*
204 * Process callouts at a very low cpu priority, so we don't keep the
205 * relatively high clock interrupt priority any longer than necessary.
206 */
207 if (needsoft) {
208 if (CLKF_BASEPRI(frame)) {
209 /*
210 * Save the overhead of a software interrupt;
211 * it will happen as soon as we return, so do it now.
212 */
213 (void)splsoftclock();
214 softclock();
215 } else
216 setsoftclock();
217 }
218 }
219
220 /*
221 * Software (low priority) clock interrupt.
222 * Run periodic events from timeout queue.
223 */
224 /*ARGSUSED*/
225 void
226 softclock()
227 {
228 register struct callout *c;
229 register void *arg;
230 register void (*func) __P((void *));
231 register int s;
232
233 s = splhigh();
234 while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
235 func = c->c_func;
236 arg = c->c_arg;
237 calltodo.c_next = c->c_next;
238 c->c_next = callfree;
239 callfree = c;
240 splx(s);
241 (*func)(arg);
242 (void) splhigh();
243 }
244 splx(s);
245 }
246
247 /*
248 * timeout --
249 * Execute a function after a specified length of time.
250 *
251 * untimeout --
252 * Cancel previous timeout function call.
253 *
254 * See AT&T BCI Driver Reference Manual for specification. This
255 * implementation differs from that one in that no identification
256 * value is returned from timeout, rather, the original arguments
257 * to timeout are used to identify entries for untimeout.
258 */
259 void
260 timeout(ftn, arg, ticks)
261 void (*ftn) __P((void *));
262 void *arg;
263 register int ticks;
264 {
265 register struct callout *new, *p, *t;
266 register int s;
267
268 if (ticks <= 0)
269 ticks = 1;
270
271 /* Lock out the clock. */
272 s = splhigh();
273
274 /* Fill in the next free callout structure. */
275 if (callfree == NULL)
276 panic("timeout table full");
277 new = callfree;
278 callfree = new->c_next;
279 new->c_arg = arg;
280 new->c_func = ftn;
281
282 /*
283 * The time for each event is stored as a difference from the time
284 * of the previous event on the queue. Walk the queue, correcting
285 * the ticks argument for queue entries passed. Correct the ticks
286 * value for the queue entry immediately after the insertion point
287 * as well. Watch out for negative c_time values; these represent
288 * overdue events.
289 */
290 for (p = &calltodo;
291 (t = p->c_next) != NULL && ticks > t->c_time; p = t)
292 if (t->c_time > 0)
293 ticks -= t->c_time;
294 new->c_time = ticks;
295 if (t != NULL)
296 t->c_time -= ticks;
297
298 /* Insert the new entry into the queue. */
299 p->c_next = new;
300 new->c_next = t;
301 splx(s);
302 }
303
304 void
305 untimeout(ftn, arg)
306 void (*ftn) __P((void *));
307 void *arg;
308 {
309 register struct callout *p, *t;
310 register int s;
311
312 s = splhigh();
313 for (p = &calltodo; (t = p->c_next) != NULL; p = t)
314 if (t->c_func == ftn && t->c_arg == arg) {
315 /* Increment next entry's tick count. */
316 if (t->c_next && t->c_time > 0)
317 t->c_next->c_time += t->c_time;
318
319 /* Move entry from callout queue to callfree queue. */
320 p->c_next = t->c_next;
321 t->c_next = callfree;
322 callfree = t;
323 break;
324 }
325 splx(s);
326 }
327
328 /*
329 * Compute number of hz until specified time. Used to
330 * compute third argument to timeout() from an absolute time.
331 */
332 int
333 hzto(tv)
334 struct timeval *tv;
335 {
336 register long ticks, sec;
337 int s;
338
339 /*
340 * If number of milliseconds will fit in 32 bit arithmetic,
341 * then compute number of milliseconds to time and scale to
342 * ticks. Otherwise just compute number of hz in time, rounding
343 * times greater than representible to maximum value.
344 *
345 * Delta times less than 25 days can be computed ``exactly''.
346 * Maximum value for any timeout in 10ms ticks is 250 days.
347 */
348 s = splhigh();
349 sec = tv->tv_sec - time.tv_sec;
350 if (sec <= 0x7fffffff / 1000 - 1000)
351 ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
352 (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
353 else if (sec <= 0x7fffffff / hz)
354 ticks = sec * hz;
355 else
356 ticks = 0x7fffffff;
357 splx(s);
358 return (ticks);
359 }
360
361 /*
362 * Start profiling on a process.
363 *
364 * Kernel profiling passes proc0 which never exits and hence
365 * keeps the profile clock running constantly.
366 */
367 void
368 startprofclock(p)
369 register struct proc *p;
370 {
371 int s;
372
373 if ((p->p_flag & P_PROFIL) == 0) {
374 p->p_flag |= P_PROFIL;
375 if (++profprocs == 1 && stathz != 0) {
376 s = splstatclock();
377 psdiv = pscnt = psratio;
378 setstatclockrate(profhz);
379 splx(s);
380 }
381 }
382 }
383
384 /*
385 * Stop profiling on a process.
386 */
387 void
388 stopprofclock(p)
389 register struct proc *p;
390 {
391 int s;
392
393 if (p->p_flag & P_PROFIL) {
394 p->p_flag &= ~P_PROFIL;
395 if (--profprocs == 0 && stathz != 0) {
396 s = splstatclock();
397 psdiv = pscnt = 1;
398 setstatclockrate(stathz);
399 splx(s);
400 }
401 }
402 }
403
404 int dk_ndrive = DK_NDRIVE;
405
406 /*
407 * Statistics clock. Grab profile sample, and if divider reaches 0,
408 * do process and kernel statistics.
409 */
410 void
411 statclock(frame)
412 register struct clockframe *frame;
413 {
414 #ifdef GPROF
415 register struct gmonparam *g;
416 #endif
417 register struct proc *p;
418 register int i;
419
420 if (CLKF_USERMODE(frame)) {
421 p = curproc;
422 if (p->p_flag & P_PROFIL)
423 addupc_intr(p, CLKF_PC(frame), 1);
424 if (--pscnt > 0)
425 return;
426 /*
427 * Came from user mode; CPU was in user state.
428 * If this process is being profiled record the tick.
429 */
430 p->p_uticks++;
431 if (p->p_nice > NZERO)
432 cp_time[CP_NICE]++;
433 else
434 cp_time[CP_USER]++;
435 } else {
436 #ifdef GPROF
437 /*
438 * Kernel statistics are just like addupc_intr, only easier.
439 */
440 g = &_gmonparam;
441 if (g->state == GMON_PROF_ON) {
442 i = CLKF_PC(frame) - g->lowpc;
443 if (i < g->textsize) {
444 i /= HISTFRACTION * sizeof(*g->kcount);
445 g->kcount[i]++;
446 }
447 }
448 #endif
449 if (--pscnt > 0)
450 return;
451 /*
452 * Came from kernel mode, so we were:
453 * - handling an interrupt,
454 * - doing syscall or trap work on behalf of the current
455 * user process, or
456 * - spinning in the idle loop.
457 * Whichever it is, charge the time as appropriate.
458 * Note that we charge interrupts to the current process,
459 * regardless of whether they are ``for'' that process,
460 * so that we know how much of its real time was spent
461 * in ``non-process'' (i.e., interrupt) work.
462 */
463 p = curproc;
464 if (CLKF_INTR(frame)) {
465 if (p != NULL)
466 p->p_iticks++;
467 cp_time[CP_INTR]++;
468 } else if (p != NULL) {
469 p->p_sticks++;
470 cp_time[CP_SYS]++;
471 } else
472 cp_time[CP_IDLE]++;
473 }
474 pscnt = psdiv;
475
476 /*
477 * We maintain statistics shown by user-level statistics
478 * programs: the amount of time in each cpu state, and
479 * the amount of time each of DK_NDRIVE ``drives'' is busy.
480 *
481 * XXX should either run linked list of drives, or (better)
482 * grab timestamps in the start & done code.
483 */
484 for (i = 0; i < DK_NDRIVE; i++)
485 if (dk_busy & (1 << i))
486 dk_time[i]++;
487
488 /*
489 * We adjust the priority of the current process. The priority of
490 * a process gets worse as it accumulates CPU time. The cpu usage
491 * estimator (p_estcpu) is increased here. The formula for computing
492 * priorities (in kern_synch.c) will compute a different value each
493 * time p_estcpu increases by 4. The cpu usage estimator ramps up
494 * quite quickly when the process is running (linearly), and decays
495 * away exponentially, at a rate which is proportionally slower when
496 * the system is busy. The basic principal is that the system will
497 * 90% forget that the process used a lot of CPU time in 5 * loadav
498 * seconds. This causes the system to favor processes which haven't
499 * run much recently, and to round-robin among other processes.
500 */
501 if (p != NULL) {
502 p->p_cpticks++;
503 if (++p->p_estcpu == 0)
504 p->p_estcpu--;
505 if ((p->p_estcpu & 3) == 0) {
506 resetpriority(p);
507 if (p->p_priority >= PUSER)
508 p->p_priority = p->p_usrpri;
509 }
510 }
511 }
512
513 /*
514 * Return information about system clocks.
515 */
516 sysctl_clockrate(where, sizep)
517 register char *where;
518 size_t *sizep;
519 {
520 struct clockinfo clkinfo;
521
522 /*
523 * Construct clockinfo structure.
524 */
525 clkinfo.hz = hz;
526 clkinfo.tick = tick;
527 clkinfo.profhz = profhz;
528 clkinfo.stathz = stathz ? stathz : hz;
529 return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
530 }
531
532 #ifdef DDB
533 #include <ddb/db_access.h>
534 #include <ddb/db_sym.h>
535
536 void db_show_callout(long addr, int haddr, int count, char *modif)
537 {
538 register struct callout *p1;
539 register int cum;
540 register int s;
541 db_expr_t offset;
542 char *name;
543
544 db_printf(" cum ticks arg func\n");
545 s = splhigh();
546 for (cum = 0, p1 = calltodo.c_next; p1; p1 = p1->c_next) {
547 register int t = p1->c_time;
548
549 if (t > 0)
550 cum += t;
551
552 db_find_sym_and_offset(p1->c_func, &name, &offset);
553 if (name == NULL)
554 name = "?";
555
556 db_printf("%9d %9d %8x %s (%x)\n",
557 cum, t, p1->c_arg, name, p1->c_func);
558 }
559 splx(s);
560 }
561 #endif
562