kern_clock.c revision 1.125 1 1.125 rmind /* $NetBSD: kern_clock.c,v 1.125 2008/07/02 19:38:37 rmind Exp $ */
2 1.52 thorpej
3 1.52 thorpej /*-
4 1.118 ad * Copyright (c) 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5 1.52 thorpej * All rights reserved.
6 1.52 thorpej *
7 1.52 thorpej * This code is derived from software contributed to The NetBSD Foundation
8 1.52 thorpej * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 1.52 thorpej * NASA Ames Research Center.
10 1.94 mycroft * This code is derived from software contributed to The NetBSD Foundation
11 1.94 mycroft * by Charles M. Hannum.
12 1.52 thorpej *
13 1.52 thorpej * Redistribution and use in source and binary forms, with or without
14 1.52 thorpej * modification, are permitted provided that the following conditions
15 1.52 thorpej * are met:
16 1.52 thorpej * 1. Redistributions of source code must retain the above copyright
17 1.52 thorpej * notice, this list of conditions and the following disclaimer.
18 1.52 thorpej * 2. Redistributions in binary form must reproduce the above copyright
19 1.52 thorpej * notice, this list of conditions and the following disclaimer in the
20 1.52 thorpej * documentation and/or other materials provided with the distribution.
21 1.52 thorpej *
22 1.52 thorpej * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23 1.52 thorpej * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24 1.52 thorpej * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 1.52 thorpej * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26 1.52 thorpej * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 1.52 thorpej * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 1.52 thorpej * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 1.52 thorpej * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 1.52 thorpej * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 1.52 thorpej * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 1.52 thorpej * POSSIBILITY OF SUCH DAMAGE.
33 1.52 thorpej */
34 1.19 cgd
35 1.19 cgd /*-
36 1.19 cgd * Copyright (c) 1982, 1986, 1991, 1993
37 1.19 cgd * The Regents of the University of California. All rights reserved.
38 1.19 cgd * (c) UNIX System Laboratories, Inc.
39 1.19 cgd * All or some portions of this file are derived from material licensed
40 1.19 cgd * to the University of California by American Telephone and Telegraph
41 1.19 cgd * Co. or Unix System Laboratories, Inc. and are reproduced herein with
42 1.19 cgd * the permission of UNIX System Laboratories, Inc.
43 1.19 cgd *
44 1.19 cgd * Redistribution and use in source and binary forms, with or without
45 1.19 cgd * modification, are permitted provided that the following conditions
46 1.19 cgd * are met:
47 1.19 cgd * 1. Redistributions of source code must retain the above copyright
48 1.19 cgd * notice, this list of conditions and the following disclaimer.
49 1.19 cgd * 2. Redistributions in binary form must reproduce the above copyright
50 1.19 cgd * notice, this list of conditions and the following disclaimer in the
51 1.19 cgd * documentation and/or other materials provided with the distribution.
52 1.87 agc * 3. Neither the name of the University nor the names of its contributors
53 1.19 cgd * may be used to endorse or promote products derived from this software
54 1.19 cgd * without specific prior written permission.
55 1.19 cgd *
56 1.19 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57 1.19 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58 1.19 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59 1.19 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60 1.19 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61 1.19 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62 1.19 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 1.19 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64 1.19 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65 1.19 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 1.19 cgd * SUCH DAMAGE.
67 1.19 cgd *
68 1.19 cgd * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
69 1.19 cgd */
70 1.78 lukem
71 1.78 lukem #include <sys/cdefs.h>
72 1.125 rmind __KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.125 2008/07/02 19:38:37 rmind Exp $");
73 1.44 jonathan
74 1.44 jonathan #include "opt_ntp.h"
75 1.80 briggs #include "opt_perfctrs.h"
76 1.19 cgd
77 1.19 cgd #include <sys/param.h>
78 1.19 cgd #include <sys/systm.h>
79 1.19 cgd #include <sys/callout.h>
80 1.19 cgd #include <sys/kernel.h>
81 1.19 cgd #include <sys/proc.h>
82 1.19 cgd #include <sys/resourcevar.h>
83 1.25 christos #include <sys/signalvar.h>
84 1.26 christos #include <sys/sysctl.h>
85 1.27 jonathan #include <sys/timex.h>
86 1.45 ross #include <sys/sched.h>
87 1.82 thorpej #include <sys/time.h>
88 1.99 kardel #include <sys/timetc.h>
89 1.109 ad #include <sys/cpu.h>
90 1.118 ad #include <sys/atomic.h>
91 1.118 ad
92 1.118 ad #include <uvm/uvm_extern.h>
93 1.25 christos
94 1.19 cgd #ifdef GPROF
95 1.19 cgd #include <sys/gmon.h>
96 1.19 cgd #endif
97 1.19 cgd
98 1.19 cgd /*
99 1.19 cgd * Clock handling routines.
100 1.19 cgd *
101 1.19 cgd * This code is written to operate with two timers that run independently of
102 1.19 cgd * each other. The main clock, running hz times per second, is used to keep
103 1.19 cgd * track of real time. The second timer handles kernel and user profiling,
104 1.19 cgd * and does resource use estimation. If the second timer is programmable,
105 1.19 cgd * it is randomized to avoid aliasing between the two clocks. For example,
106 1.90 wiz * the randomization prevents an adversary from always giving up the CPU
107 1.19 cgd * just before its quantum expires. Otherwise, it would never accumulate
108 1.90 wiz * CPU ticks. The mean frequency of the second timer is stathz.
109 1.19 cgd *
110 1.19 cgd * If no second timer exists, stathz will be zero; in this case we drive
111 1.19 cgd * profiling and statistics off the main clock. This WILL NOT be accurate;
112 1.19 cgd * do not do it unless absolutely necessary.
113 1.19 cgd *
114 1.19 cgd * The statistics clock may (or may not) be run at a higher rate while
115 1.19 cgd * profiling. This profile clock runs at profhz. We require that profhz
116 1.19 cgd * be an integral multiple of stathz.
117 1.19 cgd *
118 1.19 cgd * If the statistics clock is running fast, it must be divided by the ratio
119 1.19 cgd * profhz/stathz for statistics. (For profiling, every tick counts.)
120 1.19 cgd */
121 1.19 cgd
122 1.19 cgd int stathz;
123 1.19 cgd int profhz;
124 1.80 briggs int profsrc;
125 1.75 simonb int schedhz;
126 1.19 cgd int profprocs;
127 1.100 drochner int hardclock_ticks;
128 1.114 ad static int hardscheddiv; /* hard => sched divider (used if schedhz == 0) */
129 1.70 sommerfe static int psdiv; /* prof => stat divider */
130 1.22 cgd int psratio; /* ratio: prof / stat */
131 1.19 cgd
132 1.99 kardel static u_int get_intr_timecount(struct timecounter *);
133 1.99 kardel
134 1.99 kardel static struct timecounter intr_timecounter = {
135 1.99 kardel get_intr_timecount, /* get_timecount */
136 1.99 kardel 0, /* no poll_pps */
137 1.99 kardel ~0u, /* counter_mask */
138 1.99 kardel 0, /* frequency */
139 1.99 kardel "clockinterrupt", /* name */
140 1.102 christos 0, /* quality - minimum implementation level for a clock */
141 1.102 christos NULL, /* prev */
142 1.102 christos NULL, /* next */
143 1.99 kardel };
144 1.99 kardel
145 1.99 kardel static u_int
146 1.104 yamt get_intr_timecount(struct timecounter *tc)
147 1.99 kardel {
148 1.104 yamt
149 1.100 drochner return (u_int)hardclock_ticks;
150 1.99 kardel }
151 1.73 thorpej
152 1.66 thorpej /*
153 1.19 cgd * Initialize clock frequencies and start both clocks running.
154 1.19 cgd */
155 1.19 cgd void
156 1.63 thorpej initclocks(void)
157 1.19 cgd {
158 1.55 augustss int i;
159 1.19 cgd
160 1.19 cgd /*
161 1.19 cgd * Set divisors to 1 (normal case) and let the machine-specific
162 1.19 cgd * code do its bit.
163 1.19 cgd */
164 1.70 sommerfe psdiv = 1;
165 1.99 kardel /*
166 1.99 kardel * provide minimum default time counter
167 1.99 kardel * will only run at interrupt resolution
168 1.99 kardel */
169 1.99 kardel intr_timecounter.tc_frequency = hz;
170 1.99 kardel tc_init(&intr_timecounter);
171 1.19 cgd cpu_initclocks();
172 1.19 cgd
173 1.19 cgd /*
174 1.108 yamt * Compute profhz and stathz, fix profhz if needed.
175 1.19 cgd */
176 1.19 cgd i = stathz ? stathz : hz;
177 1.19 cgd if (profhz == 0)
178 1.19 cgd profhz = i;
179 1.19 cgd psratio = profhz / i;
180 1.91 yamt if (schedhz == 0) {
181 1.91 yamt /* 16Hz is best */
182 1.114 ad hardscheddiv = hz / 16;
183 1.114 ad if (hardscheddiv <= 0)
184 1.114 ad panic("hardscheddiv");
185 1.91 yamt }
186 1.31 mycroft
187 1.19 cgd }
188 1.19 cgd
189 1.19 cgd /*
190 1.19 cgd * The real-time timer, interrupting hz times per second.
191 1.19 cgd */
192 1.19 cgd void
193 1.63 thorpej hardclock(struct clockframe *frame)
194 1.19 cgd {
195 1.82 thorpej struct lwp *l;
196 1.120 ad struct cpu_info *ci;
197 1.19 cgd
198 1.120 ad ci = curcpu();
199 1.114 ad l = ci->ci_data.cpu_onproc;
200 1.120 ad
201 1.120 ad timer_tick(l, CLKF_USERMODE(frame));
202 1.19 cgd
203 1.19 cgd /*
204 1.19 cgd * If no separate statistics clock is available, run it from here.
205 1.19 cgd */
206 1.19 cgd if (stathz == 0)
207 1.19 cgd statclock(frame);
208 1.114 ad /*
209 1.114 ad * If no separate schedclock is provided, call it here
210 1.114 ad * at about 16 Hz.
211 1.114 ad */
212 1.114 ad if (schedhz == 0) {
213 1.114 ad if ((int)(--ci->ci_schedstate.spc_schedticks) <= 0) {
214 1.114 ad schedclock(l);
215 1.114 ad ci->ci_schedstate.spc_schedticks = hardscheddiv;
216 1.114 ad }
217 1.114 ad }
218 1.108 yamt if ((--ci->ci_schedstate.spc_ticks) <= 0)
219 1.108 yamt sched_tick(ci);
220 1.93 perry
221 1.123 ad if (CPU_IS_PRIMARY(ci)) {
222 1.121 ad hardclock_ticks++;
223 1.121 ad tc_ticktock();
224 1.121 ad }
225 1.19 cgd
226 1.19 cgd /*
227 1.106 ad * Update real-time timeout queue. Callouts are processed at a
228 1.106 ad * very low CPU priority, so we don't keep the relatively high
229 1.106 ad * clock interrupt priority any longer than necessary.
230 1.106 ad */
231 1.109 ad callout_hardclock();
232 1.19 cgd }
233 1.19 cgd
234 1.19 cgd /*
235 1.19 cgd * Start profiling on a process.
236 1.19 cgd *
237 1.19 cgd * Kernel profiling passes proc0 which never exits and hence
238 1.19 cgd * keeps the profile clock running constantly.
239 1.19 cgd */
240 1.19 cgd void
241 1.63 thorpej startprofclock(struct proc *p)
242 1.19 cgd {
243 1.19 cgd
244 1.109 ad KASSERT(mutex_owned(&p->p_stmutex));
245 1.105 ad
246 1.105 ad if ((p->p_stflag & PST_PROFIL) == 0) {
247 1.105 ad p->p_stflag |= PST_PROFIL;
248 1.80 briggs /*
249 1.80 briggs * This is only necessary if using the clock as the
250 1.80 briggs * profiling source.
251 1.80 briggs */
252 1.70 sommerfe if (++profprocs == 1 && stathz != 0)
253 1.70 sommerfe psdiv = psratio;
254 1.19 cgd }
255 1.19 cgd }
256 1.19 cgd
257 1.19 cgd /*
258 1.19 cgd * Stop profiling on a process.
259 1.19 cgd */
260 1.19 cgd void
261 1.63 thorpej stopprofclock(struct proc *p)
262 1.19 cgd {
263 1.19 cgd
264 1.109 ad KASSERT(mutex_owned(&p->p_stmutex));
265 1.105 ad
266 1.105 ad if (p->p_stflag & PST_PROFIL) {
267 1.105 ad p->p_stflag &= ~PST_PROFIL;
268 1.80 briggs /*
269 1.80 briggs * This is only necessary if using the clock as the
270 1.80 briggs * profiling source.
271 1.80 briggs */
272 1.70 sommerfe if (--profprocs == 0 && stathz != 0)
273 1.70 sommerfe psdiv = 1;
274 1.19 cgd }
275 1.19 cgd }
276 1.19 cgd
277 1.80 briggs #if defined(PERFCTRS)
278 1.80 briggs /*
279 1.80 briggs * Independent profiling "tick" in case we're using a separate
280 1.80 briggs * clock or profiling event source. Currently, that's just
281 1.80 briggs * performance counters--hence the wrapper.
282 1.80 briggs */
283 1.80 briggs void
284 1.80 briggs proftick(struct clockframe *frame)
285 1.80 briggs {
286 1.80 briggs #ifdef GPROF
287 1.93 perry struct gmonparam *g;
288 1.93 perry intptr_t i;
289 1.80 briggs #endif
290 1.105 ad struct lwp *l;
291 1.80 briggs struct proc *p;
292 1.80 briggs
293 1.114 ad l = curcpu()->ci_data.cpu_onproc;
294 1.105 ad p = (l ? l->l_proc : NULL);
295 1.80 briggs if (CLKF_USERMODE(frame)) {
296 1.105 ad mutex_spin_enter(&p->p_stmutex);
297 1.105 ad if (p->p_stflag & PST_PROFIL)
298 1.105 ad addupc_intr(l, CLKF_PC(frame));
299 1.105 ad mutex_spin_exit(&p->p_stmutex);
300 1.80 briggs } else {
301 1.80 briggs #ifdef GPROF
302 1.80 briggs g = &_gmonparam;
303 1.80 briggs if (g->state == GMON_PROF_ON) {
304 1.80 briggs i = CLKF_PC(frame) - g->lowpc;
305 1.80 briggs if (i < g->textsize) {
306 1.80 briggs i /= HISTFRACTION * sizeof(*g->kcount);
307 1.80 briggs g->kcount[i]++;
308 1.80 briggs }
309 1.80 briggs }
310 1.80 briggs #endif
311 1.111 ad #ifdef LWP_PC
312 1.111 ad if (p != NULL && (p->p_stflag & PST_PROFIL) != 0)
313 1.112 ad addupc_intr(l, LWP_PC(l));
314 1.93 perry #endif
315 1.80 briggs }
316 1.80 briggs }
317 1.80 briggs #endif
318 1.80 briggs
319 1.108 yamt void
320 1.108 yamt schedclock(struct lwp *l)
321 1.108 yamt {
322 1.118 ad struct cpu_info *ci;
323 1.118 ad
324 1.118 ad ci = l->l_cpu;
325 1.118 ad
326 1.118 ad /* Accumulate syscall and context switch counts. */
327 1.119 ad atomic_add_int((unsigned *)&uvmexp.swtch, ci->ci_data.cpu_nswtch);
328 1.118 ad ci->ci_data.cpu_nswtch = 0;
329 1.119 ad atomic_add_int((unsigned *)&uvmexp.syscalls, ci->ci_data.cpu_nsyscall);
330 1.118 ad ci->ci_data.cpu_nsyscall = 0;
331 1.124 ad atomic_add_int((unsigned *)&uvmexp.traps, ci->ci_data.cpu_ntrap);
332 1.124 ad ci->ci_data.cpu_ntrap = 0;
333 1.108 yamt
334 1.108 yamt if ((l->l_flag & LW_IDLE) != 0)
335 1.108 yamt return;
336 1.108 yamt
337 1.108 yamt sched_schedclock(l);
338 1.108 yamt }
339 1.108 yamt
340 1.19 cgd /*
341 1.19 cgd * Statistics clock. Grab profile sample, and if divider reaches 0,
342 1.19 cgd * do process and kernel statistics.
343 1.19 cgd */
344 1.19 cgd void
345 1.63 thorpej statclock(struct clockframe *frame)
346 1.19 cgd {
347 1.19 cgd #ifdef GPROF
348 1.55 augustss struct gmonparam *g;
349 1.68 eeh intptr_t i;
350 1.19 cgd #endif
351 1.60 thorpej struct cpu_info *ci = curcpu();
352 1.60 thorpej struct schedstate_percpu *spc = &ci->ci_schedstate;
353 1.55 augustss struct proc *p;
354 1.98 christos struct lwp *l;
355 1.19 cgd
356 1.70 sommerfe /*
357 1.70 sommerfe * Notice changes in divisor frequency, and adjust clock
358 1.70 sommerfe * frequency accordingly.
359 1.70 sommerfe */
360 1.70 sommerfe if (spc->spc_psdiv != psdiv) {
361 1.70 sommerfe spc->spc_psdiv = psdiv;
362 1.70 sommerfe spc->spc_pscnt = psdiv;
363 1.70 sommerfe if (psdiv == 1) {
364 1.70 sommerfe setstatclockrate(stathz);
365 1.70 sommerfe } else {
366 1.93 perry setstatclockrate(profhz);
367 1.70 sommerfe }
368 1.70 sommerfe }
369 1.114 ad l = ci->ci_data.cpu_onproc;
370 1.108 yamt if ((l->l_flag & LW_IDLE) != 0) {
371 1.108 yamt /*
372 1.108 yamt * don't account idle lwps as swapper.
373 1.108 yamt */
374 1.108 yamt p = NULL;
375 1.108 yamt } else {
376 1.108 yamt p = l->l_proc;
377 1.105 ad mutex_spin_enter(&p->p_stmutex);
378 1.108 yamt }
379 1.108 yamt
380 1.19 cgd if (CLKF_USERMODE(frame)) {
381 1.105 ad if ((p->p_stflag & PST_PROFIL) && profsrc == PROFSRC_CLOCK)
382 1.105 ad addupc_intr(l, CLKF_PC(frame));
383 1.105 ad if (--spc->spc_pscnt > 0) {
384 1.105 ad mutex_spin_exit(&p->p_stmutex);
385 1.19 cgd return;
386 1.105 ad }
387 1.105 ad
388 1.19 cgd /*
389 1.19 cgd * Came from user mode; CPU was in user state.
390 1.19 cgd * If this process is being profiled record the tick.
391 1.19 cgd */
392 1.19 cgd p->p_uticks++;
393 1.19 cgd if (p->p_nice > NZERO)
394 1.60 thorpej spc->spc_cp_time[CP_NICE]++;
395 1.19 cgd else
396 1.60 thorpej spc->spc_cp_time[CP_USER]++;
397 1.19 cgd } else {
398 1.19 cgd #ifdef GPROF
399 1.19 cgd /*
400 1.19 cgd * Kernel statistics are just like addupc_intr, only easier.
401 1.19 cgd */
402 1.19 cgd g = &_gmonparam;
403 1.80 briggs if (profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) {
404 1.19 cgd i = CLKF_PC(frame) - g->lowpc;
405 1.19 cgd if (i < g->textsize) {
406 1.19 cgd i /= HISTFRACTION * sizeof(*g->kcount);
407 1.19 cgd g->kcount[i]++;
408 1.19 cgd }
409 1.19 cgd }
410 1.19 cgd #endif
411 1.82 thorpej #ifdef LWP_PC
412 1.108 yamt if (p != NULL && profsrc == PROFSRC_CLOCK &&
413 1.108 yamt (p->p_stflag & PST_PROFIL)) {
414 1.105 ad addupc_intr(l, LWP_PC(l));
415 1.108 yamt }
416 1.72 mycroft #endif
417 1.105 ad if (--spc->spc_pscnt > 0) {
418 1.105 ad if (p != NULL)
419 1.105 ad mutex_spin_exit(&p->p_stmutex);
420 1.19 cgd return;
421 1.105 ad }
422 1.19 cgd /*
423 1.19 cgd * Came from kernel mode, so we were:
424 1.19 cgd * - handling an interrupt,
425 1.19 cgd * - doing syscall or trap work on behalf of the current
426 1.19 cgd * user process, or
427 1.19 cgd * - spinning in the idle loop.
428 1.19 cgd * Whichever it is, charge the time as appropriate.
429 1.19 cgd * Note that we charge interrupts to the current process,
430 1.19 cgd * regardless of whether they are ``for'' that process,
431 1.19 cgd * so that we know how much of its real time was spent
432 1.19 cgd * in ``non-process'' (i.e., interrupt) work.
433 1.19 cgd */
434 1.114 ad if (CLKF_INTR(frame) || (curlwp->l_pflag & LP_INTR) != 0) {
435 1.108 yamt if (p != NULL) {
436 1.19 cgd p->p_iticks++;
437 1.108 yamt }
438 1.60 thorpej spc->spc_cp_time[CP_INTR]++;
439 1.19 cgd } else if (p != NULL) {
440 1.19 cgd p->p_sticks++;
441 1.60 thorpej spc->spc_cp_time[CP_SYS]++;
442 1.108 yamt } else {
443 1.60 thorpej spc->spc_cp_time[CP_IDLE]++;
444 1.108 yamt }
445 1.19 cgd }
446 1.70 sommerfe spc->spc_pscnt = psdiv;
447 1.19 cgd
448 1.97 elad if (p != NULL) {
449 1.125 rmind atomic_inc_uint(&l->l_cpticks);
450 1.105 ad mutex_spin_exit(&p->p_stmutex);
451 1.108 yamt }
452 1.19 cgd }
453