kern_clock.c revision 1.126 1 1.126 pooka /* $NetBSD: kern_clock.c,v 1.126 2008/10/05 21:57:20 pooka Exp $ */
2 1.52 thorpej
3 1.52 thorpej /*-
4 1.118 ad * Copyright (c) 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5 1.52 thorpej * All rights reserved.
6 1.52 thorpej *
7 1.52 thorpej * This code is derived from software contributed to The NetBSD Foundation
8 1.52 thorpej * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 1.52 thorpej * NASA Ames Research Center.
10 1.94 mycroft * This code is derived from software contributed to The NetBSD Foundation
11 1.94 mycroft * by Charles M. Hannum.
12 1.52 thorpej *
13 1.52 thorpej * Redistribution and use in source and binary forms, with or without
14 1.52 thorpej * modification, are permitted provided that the following conditions
15 1.52 thorpej * are met:
16 1.52 thorpej * 1. Redistributions of source code must retain the above copyright
17 1.52 thorpej * notice, this list of conditions and the following disclaimer.
18 1.52 thorpej * 2. Redistributions in binary form must reproduce the above copyright
19 1.52 thorpej * notice, this list of conditions and the following disclaimer in the
20 1.52 thorpej * documentation and/or other materials provided with the distribution.
21 1.52 thorpej *
22 1.52 thorpej * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23 1.52 thorpej * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24 1.52 thorpej * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 1.52 thorpej * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26 1.52 thorpej * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 1.52 thorpej * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 1.52 thorpej * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 1.52 thorpej * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 1.52 thorpej * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 1.52 thorpej * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 1.52 thorpej * POSSIBILITY OF SUCH DAMAGE.
33 1.52 thorpej */
34 1.19 cgd
35 1.19 cgd /*-
36 1.19 cgd * Copyright (c) 1982, 1986, 1991, 1993
37 1.19 cgd * The Regents of the University of California. All rights reserved.
38 1.19 cgd * (c) UNIX System Laboratories, Inc.
39 1.19 cgd * All or some portions of this file are derived from material licensed
40 1.19 cgd * to the University of California by American Telephone and Telegraph
41 1.19 cgd * Co. or Unix System Laboratories, Inc. and are reproduced herein with
42 1.19 cgd * the permission of UNIX System Laboratories, Inc.
43 1.19 cgd *
44 1.19 cgd * Redistribution and use in source and binary forms, with or without
45 1.19 cgd * modification, are permitted provided that the following conditions
46 1.19 cgd * are met:
47 1.19 cgd * 1. Redistributions of source code must retain the above copyright
48 1.19 cgd * notice, this list of conditions and the following disclaimer.
49 1.19 cgd * 2. Redistributions in binary form must reproduce the above copyright
50 1.19 cgd * notice, this list of conditions and the following disclaimer in the
51 1.19 cgd * documentation and/or other materials provided with the distribution.
52 1.87 agc * 3. Neither the name of the University nor the names of its contributors
53 1.19 cgd * may be used to endorse or promote products derived from this software
54 1.19 cgd * without specific prior written permission.
55 1.19 cgd *
56 1.19 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57 1.19 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58 1.19 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59 1.19 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60 1.19 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61 1.19 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62 1.19 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 1.19 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64 1.19 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65 1.19 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 1.19 cgd * SUCH DAMAGE.
67 1.19 cgd *
68 1.19 cgd * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
69 1.19 cgd */
70 1.78 lukem
71 1.78 lukem #include <sys/cdefs.h>
72 1.126 pooka __KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.126 2008/10/05 21:57:20 pooka Exp $");
73 1.44 jonathan
74 1.44 jonathan #include "opt_ntp.h"
75 1.80 briggs #include "opt_perfctrs.h"
76 1.19 cgd
77 1.19 cgd #include <sys/param.h>
78 1.19 cgd #include <sys/systm.h>
79 1.19 cgd #include <sys/callout.h>
80 1.19 cgd #include <sys/kernel.h>
81 1.19 cgd #include <sys/proc.h>
82 1.19 cgd #include <sys/resourcevar.h>
83 1.25 christos #include <sys/signalvar.h>
84 1.26 christos #include <sys/sysctl.h>
85 1.27 jonathan #include <sys/timex.h>
86 1.45 ross #include <sys/sched.h>
87 1.82 thorpej #include <sys/time.h>
88 1.99 kardel #include <sys/timetc.h>
89 1.109 ad #include <sys/cpu.h>
90 1.118 ad #include <sys/atomic.h>
91 1.118 ad
92 1.118 ad #include <uvm/uvm_extern.h>
93 1.25 christos
94 1.19 cgd #ifdef GPROF
95 1.19 cgd #include <sys/gmon.h>
96 1.19 cgd #endif
97 1.19 cgd
98 1.19 cgd /*
99 1.19 cgd * Clock handling routines.
100 1.19 cgd *
101 1.19 cgd * This code is written to operate with two timers that run independently of
102 1.19 cgd * each other. The main clock, running hz times per second, is used to keep
103 1.19 cgd * track of real time. The second timer handles kernel and user profiling,
104 1.19 cgd * and does resource use estimation. If the second timer is programmable,
105 1.19 cgd * it is randomized to avoid aliasing between the two clocks. For example,
106 1.90 wiz * the randomization prevents an adversary from always giving up the CPU
107 1.19 cgd * just before its quantum expires. Otherwise, it would never accumulate
108 1.90 wiz * CPU ticks. The mean frequency of the second timer is stathz.
109 1.19 cgd *
110 1.19 cgd * If no second timer exists, stathz will be zero; in this case we drive
111 1.19 cgd * profiling and statistics off the main clock. This WILL NOT be accurate;
112 1.19 cgd * do not do it unless absolutely necessary.
113 1.19 cgd *
114 1.19 cgd * The statistics clock may (or may not) be run at a higher rate while
115 1.19 cgd * profiling. This profile clock runs at profhz. We require that profhz
116 1.19 cgd * be an integral multiple of stathz.
117 1.19 cgd *
118 1.19 cgd * If the statistics clock is running fast, it must be divided by the ratio
119 1.19 cgd * profhz/stathz for statistics. (For profiling, every tick counts.)
120 1.19 cgd */
121 1.19 cgd
122 1.19 cgd int stathz;
123 1.19 cgd int profhz;
124 1.80 briggs int profsrc;
125 1.75 simonb int schedhz;
126 1.19 cgd int profprocs;
127 1.100 drochner int hardclock_ticks;
128 1.114 ad static int hardscheddiv; /* hard => sched divider (used if schedhz == 0) */
129 1.70 sommerfe static int psdiv; /* prof => stat divider */
130 1.22 cgd int psratio; /* ratio: prof / stat */
131 1.19 cgd
132 1.99 kardel static u_int get_intr_timecount(struct timecounter *);
133 1.99 kardel
134 1.99 kardel static struct timecounter intr_timecounter = {
135 1.99 kardel get_intr_timecount, /* get_timecount */
136 1.99 kardel 0, /* no poll_pps */
137 1.99 kardel ~0u, /* counter_mask */
138 1.99 kardel 0, /* frequency */
139 1.99 kardel "clockinterrupt", /* name */
140 1.102 christos 0, /* quality - minimum implementation level for a clock */
141 1.102 christos NULL, /* prev */
142 1.102 christos NULL, /* next */
143 1.99 kardel };
144 1.99 kardel
145 1.99 kardel static u_int
146 1.104 yamt get_intr_timecount(struct timecounter *tc)
147 1.99 kardel {
148 1.104 yamt
149 1.100 drochner return (u_int)hardclock_ticks;
150 1.99 kardel }
151 1.73 thorpej
152 1.66 thorpej /*
153 1.19 cgd * Initialize clock frequencies and start both clocks running.
154 1.19 cgd */
155 1.19 cgd void
156 1.63 thorpej initclocks(void)
157 1.19 cgd {
158 1.55 augustss int i;
159 1.19 cgd
160 1.19 cgd /*
161 1.19 cgd * Set divisors to 1 (normal case) and let the machine-specific
162 1.19 cgd * code do its bit.
163 1.19 cgd */
164 1.70 sommerfe psdiv = 1;
165 1.99 kardel /*
166 1.99 kardel * provide minimum default time counter
167 1.99 kardel * will only run at interrupt resolution
168 1.99 kardel */
169 1.99 kardel intr_timecounter.tc_frequency = hz;
170 1.99 kardel tc_init(&intr_timecounter);
171 1.19 cgd cpu_initclocks();
172 1.19 cgd
173 1.19 cgd /*
174 1.108 yamt * Compute profhz and stathz, fix profhz if needed.
175 1.19 cgd */
176 1.19 cgd i = stathz ? stathz : hz;
177 1.19 cgd if (profhz == 0)
178 1.19 cgd profhz = i;
179 1.19 cgd psratio = profhz / i;
180 1.91 yamt if (schedhz == 0) {
181 1.91 yamt /* 16Hz is best */
182 1.114 ad hardscheddiv = hz / 16;
183 1.114 ad if (hardscheddiv <= 0)
184 1.114 ad panic("hardscheddiv");
185 1.91 yamt }
186 1.31 mycroft
187 1.19 cgd }
188 1.19 cgd
189 1.19 cgd /*
190 1.19 cgd * The real-time timer, interrupting hz times per second.
191 1.19 cgd */
192 1.19 cgd void
193 1.63 thorpej hardclock(struct clockframe *frame)
194 1.19 cgd {
195 1.82 thorpej struct lwp *l;
196 1.120 ad struct cpu_info *ci;
197 1.19 cgd
198 1.120 ad ci = curcpu();
199 1.114 ad l = ci->ci_data.cpu_onproc;
200 1.120 ad
201 1.120 ad timer_tick(l, CLKF_USERMODE(frame));
202 1.19 cgd
203 1.19 cgd /*
204 1.19 cgd * If no separate statistics clock is available, run it from here.
205 1.19 cgd */
206 1.19 cgd if (stathz == 0)
207 1.19 cgd statclock(frame);
208 1.114 ad /*
209 1.114 ad * If no separate schedclock is provided, call it here
210 1.114 ad * at about 16 Hz.
211 1.114 ad */
212 1.114 ad if (schedhz == 0) {
213 1.114 ad if ((int)(--ci->ci_schedstate.spc_schedticks) <= 0) {
214 1.114 ad schedclock(l);
215 1.114 ad ci->ci_schedstate.spc_schedticks = hardscheddiv;
216 1.114 ad }
217 1.114 ad }
218 1.108 yamt if ((--ci->ci_schedstate.spc_ticks) <= 0)
219 1.108 yamt sched_tick(ci);
220 1.93 perry
221 1.123 ad if (CPU_IS_PRIMARY(ci)) {
222 1.121 ad hardclock_ticks++;
223 1.121 ad tc_ticktock();
224 1.121 ad }
225 1.19 cgd
226 1.19 cgd /*
227 1.126 pooka * Update real-time timeout queue.
228 1.106 ad */
229 1.109 ad callout_hardclock();
230 1.19 cgd }
231 1.19 cgd
232 1.19 cgd /*
233 1.19 cgd * Start profiling on a process.
234 1.19 cgd *
235 1.19 cgd * Kernel profiling passes proc0 which never exits and hence
236 1.19 cgd * keeps the profile clock running constantly.
237 1.19 cgd */
238 1.19 cgd void
239 1.63 thorpej startprofclock(struct proc *p)
240 1.19 cgd {
241 1.19 cgd
242 1.109 ad KASSERT(mutex_owned(&p->p_stmutex));
243 1.105 ad
244 1.105 ad if ((p->p_stflag & PST_PROFIL) == 0) {
245 1.105 ad p->p_stflag |= PST_PROFIL;
246 1.80 briggs /*
247 1.80 briggs * This is only necessary if using the clock as the
248 1.80 briggs * profiling source.
249 1.80 briggs */
250 1.70 sommerfe if (++profprocs == 1 && stathz != 0)
251 1.70 sommerfe psdiv = psratio;
252 1.19 cgd }
253 1.19 cgd }
254 1.19 cgd
255 1.19 cgd /*
256 1.19 cgd * Stop profiling on a process.
257 1.19 cgd */
258 1.19 cgd void
259 1.63 thorpej stopprofclock(struct proc *p)
260 1.19 cgd {
261 1.19 cgd
262 1.109 ad KASSERT(mutex_owned(&p->p_stmutex));
263 1.105 ad
264 1.105 ad if (p->p_stflag & PST_PROFIL) {
265 1.105 ad p->p_stflag &= ~PST_PROFIL;
266 1.80 briggs /*
267 1.80 briggs * This is only necessary if using the clock as the
268 1.80 briggs * profiling source.
269 1.80 briggs */
270 1.70 sommerfe if (--profprocs == 0 && stathz != 0)
271 1.70 sommerfe psdiv = 1;
272 1.19 cgd }
273 1.19 cgd }
274 1.19 cgd
275 1.80 briggs #if defined(PERFCTRS)
276 1.80 briggs /*
277 1.80 briggs * Independent profiling "tick" in case we're using a separate
278 1.80 briggs * clock or profiling event source. Currently, that's just
279 1.80 briggs * performance counters--hence the wrapper.
280 1.80 briggs */
281 1.80 briggs void
282 1.80 briggs proftick(struct clockframe *frame)
283 1.80 briggs {
284 1.80 briggs #ifdef GPROF
285 1.93 perry struct gmonparam *g;
286 1.93 perry intptr_t i;
287 1.80 briggs #endif
288 1.105 ad struct lwp *l;
289 1.80 briggs struct proc *p;
290 1.80 briggs
291 1.114 ad l = curcpu()->ci_data.cpu_onproc;
292 1.105 ad p = (l ? l->l_proc : NULL);
293 1.80 briggs if (CLKF_USERMODE(frame)) {
294 1.105 ad mutex_spin_enter(&p->p_stmutex);
295 1.105 ad if (p->p_stflag & PST_PROFIL)
296 1.105 ad addupc_intr(l, CLKF_PC(frame));
297 1.105 ad mutex_spin_exit(&p->p_stmutex);
298 1.80 briggs } else {
299 1.80 briggs #ifdef GPROF
300 1.80 briggs g = &_gmonparam;
301 1.80 briggs if (g->state == GMON_PROF_ON) {
302 1.80 briggs i = CLKF_PC(frame) - g->lowpc;
303 1.80 briggs if (i < g->textsize) {
304 1.80 briggs i /= HISTFRACTION * sizeof(*g->kcount);
305 1.80 briggs g->kcount[i]++;
306 1.80 briggs }
307 1.80 briggs }
308 1.80 briggs #endif
309 1.111 ad #ifdef LWP_PC
310 1.111 ad if (p != NULL && (p->p_stflag & PST_PROFIL) != 0)
311 1.112 ad addupc_intr(l, LWP_PC(l));
312 1.93 perry #endif
313 1.80 briggs }
314 1.80 briggs }
315 1.80 briggs #endif
316 1.80 briggs
317 1.108 yamt void
318 1.108 yamt schedclock(struct lwp *l)
319 1.108 yamt {
320 1.118 ad struct cpu_info *ci;
321 1.118 ad
322 1.118 ad ci = l->l_cpu;
323 1.118 ad
324 1.118 ad /* Accumulate syscall and context switch counts. */
325 1.119 ad atomic_add_int((unsigned *)&uvmexp.swtch, ci->ci_data.cpu_nswtch);
326 1.118 ad ci->ci_data.cpu_nswtch = 0;
327 1.119 ad atomic_add_int((unsigned *)&uvmexp.syscalls, ci->ci_data.cpu_nsyscall);
328 1.118 ad ci->ci_data.cpu_nsyscall = 0;
329 1.124 ad atomic_add_int((unsigned *)&uvmexp.traps, ci->ci_data.cpu_ntrap);
330 1.124 ad ci->ci_data.cpu_ntrap = 0;
331 1.108 yamt
332 1.108 yamt if ((l->l_flag & LW_IDLE) != 0)
333 1.108 yamt return;
334 1.108 yamt
335 1.108 yamt sched_schedclock(l);
336 1.108 yamt }
337 1.108 yamt
338 1.19 cgd /*
339 1.19 cgd * Statistics clock. Grab profile sample, and if divider reaches 0,
340 1.19 cgd * do process and kernel statistics.
341 1.19 cgd */
342 1.19 cgd void
343 1.63 thorpej statclock(struct clockframe *frame)
344 1.19 cgd {
345 1.19 cgd #ifdef GPROF
346 1.55 augustss struct gmonparam *g;
347 1.68 eeh intptr_t i;
348 1.19 cgd #endif
349 1.60 thorpej struct cpu_info *ci = curcpu();
350 1.60 thorpej struct schedstate_percpu *spc = &ci->ci_schedstate;
351 1.55 augustss struct proc *p;
352 1.98 christos struct lwp *l;
353 1.19 cgd
354 1.70 sommerfe /*
355 1.70 sommerfe * Notice changes in divisor frequency, and adjust clock
356 1.70 sommerfe * frequency accordingly.
357 1.70 sommerfe */
358 1.70 sommerfe if (spc->spc_psdiv != psdiv) {
359 1.70 sommerfe spc->spc_psdiv = psdiv;
360 1.70 sommerfe spc->spc_pscnt = psdiv;
361 1.70 sommerfe if (psdiv == 1) {
362 1.70 sommerfe setstatclockrate(stathz);
363 1.70 sommerfe } else {
364 1.93 perry setstatclockrate(profhz);
365 1.70 sommerfe }
366 1.70 sommerfe }
367 1.114 ad l = ci->ci_data.cpu_onproc;
368 1.108 yamt if ((l->l_flag & LW_IDLE) != 0) {
369 1.108 yamt /*
370 1.108 yamt * don't account idle lwps as swapper.
371 1.108 yamt */
372 1.108 yamt p = NULL;
373 1.108 yamt } else {
374 1.108 yamt p = l->l_proc;
375 1.105 ad mutex_spin_enter(&p->p_stmutex);
376 1.108 yamt }
377 1.108 yamt
378 1.19 cgd if (CLKF_USERMODE(frame)) {
379 1.105 ad if ((p->p_stflag & PST_PROFIL) && profsrc == PROFSRC_CLOCK)
380 1.105 ad addupc_intr(l, CLKF_PC(frame));
381 1.105 ad if (--spc->spc_pscnt > 0) {
382 1.105 ad mutex_spin_exit(&p->p_stmutex);
383 1.19 cgd return;
384 1.105 ad }
385 1.105 ad
386 1.19 cgd /*
387 1.19 cgd * Came from user mode; CPU was in user state.
388 1.19 cgd * If this process is being profiled record the tick.
389 1.19 cgd */
390 1.19 cgd p->p_uticks++;
391 1.19 cgd if (p->p_nice > NZERO)
392 1.60 thorpej spc->spc_cp_time[CP_NICE]++;
393 1.19 cgd else
394 1.60 thorpej spc->spc_cp_time[CP_USER]++;
395 1.19 cgd } else {
396 1.19 cgd #ifdef GPROF
397 1.19 cgd /*
398 1.19 cgd * Kernel statistics are just like addupc_intr, only easier.
399 1.19 cgd */
400 1.19 cgd g = &_gmonparam;
401 1.80 briggs if (profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) {
402 1.19 cgd i = CLKF_PC(frame) - g->lowpc;
403 1.19 cgd if (i < g->textsize) {
404 1.19 cgd i /= HISTFRACTION * sizeof(*g->kcount);
405 1.19 cgd g->kcount[i]++;
406 1.19 cgd }
407 1.19 cgd }
408 1.19 cgd #endif
409 1.82 thorpej #ifdef LWP_PC
410 1.108 yamt if (p != NULL && profsrc == PROFSRC_CLOCK &&
411 1.108 yamt (p->p_stflag & PST_PROFIL)) {
412 1.105 ad addupc_intr(l, LWP_PC(l));
413 1.108 yamt }
414 1.72 mycroft #endif
415 1.105 ad if (--spc->spc_pscnt > 0) {
416 1.105 ad if (p != NULL)
417 1.105 ad mutex_spin_exit(&p->p_stmutex);
418 1.19 cgd return;
419 1.105 ad }
420 1.19 cgd /*
421 1.19 cgd * Came from kernel mode, so we were:
422 1.19 cgd * - handling an interrupt,
423 1.19 cgd * - doing syscall or trap work on behalf of the current
424 1.19 cgd * user process, or
425 1.19 cgd * - spinning in the idle loop.
426 1.19 cgd * Whichever it is, charge the time as appropriate.
427 1.19 cgd * Note that we charge interrupts to the current process,
428 1.19 cgd * regardless of whether they are ``for'' that process,
429 1.19 cgd * so that we know how much of its real time was spent
430 1.19 cgd * in ``non-process'' (i.e., interrupt) work.
431 1.19 cgd */
432 1.114 ad if (CLKF_INTR(frame) || (curlwp->l_pflag & LP_INTR) != 0) {
433 1.108 yamt if (p != NULL) {
434 1.19 cgd p->p_iticks++;
435 1.108 yamt }
436 1.60 thorpej spc->spc_cp_time[CP_INTR]++;
437 1.19 cgd } else if (p != NULL) {
438 1.19 cgd p->p_sticks++;
439 1.60 thorpej spc->spc_cp_time[CP_SYS]++;
440 1.108 yamt } else {
441 1.60 thorpej spc->spc_cp_time[CP_IDLE]++;
442 1.108 yamt }
443 1.19 cgd }
444 1.70 sommerfe spc->spc_pscnt = psdiv;
445 1.19 cgd
446 1.97 elad if (p != NULL) {
447 1.125 rmind atomic_inc_uint(&l->l_cpticks);
448 1.105 ad mutex_spin_exit(&p->p_stmutex);
449 1.108 yamt }
450 1.19 cgd }
451