kern_clock.c revision 1.131 1 1.131 chs /* $NetBSD: kern_clock.c,v 1.131 2012/12/02 01:05:16 chs Exp $ */
2 1.52 thorpej
3 1.52 thorpej /*-
4 1.118 ad * Copyright (c) 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5 1.52 thorpej * All rights reserved.
6 1.52 thorpej *
7 1.52 thorpej * This code is derived from software contributed to The NetBSD Foundation
8 1.52 thorpej * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 1.52 thorpej * NASA Ames Research Center.
10 1.94 mycroft * This code is derived from software contributed to The NetBSD Foundation
11 1.94 mycroft * by Charles M. Hannum.
12 1.52 thorpej *
13 1.52 thorpej * Redistribution and use in source and binary forms, with or without
14 1.52 thorpej * modification, are permitted provided that the following conditions
15 1.52 thorpej * are met:
16 1.52 thorpej * 1. Redistributions of source code must retain the above copyright
17 1.52 thorpej * notice, this list of conditions and the following disclaimer.
18 1.52 thorpej * 2. Redistributions in binary form must reproduce the above copyright
19 1.52 thorpej * notice, this list of conditions and the following disclaimer in the
20 1.52 thorpej * documentation and/or other materials provided with the distribution.
21 1.52 thorpej *
22 1.52 thorpej * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23 1.52 thorpej * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24 1.52 thorpej * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 1.52 thorpej * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26 1.52 thorpej * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 1.52 thorpej * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 1.52 thorpej * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 1.52 thorpej * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 1.52 thorpej * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 1.52 thorpej * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 1.52 thorpej * POSSIBILITY OF SUCH DAMAGE.
33 1.52 thorpej */
34 1.19 cgd
35 1.19 cgd /*-
36 1.19 cgd * Copyright (c) 1982, 1986, 1991, 1993
37 1.19 cgd * The Regents of the University of California. All rights reserved.
38 1.19 cgd * (c) UNIX System Laboratories, Inc.
39 1.19 cgd * All or some portions of this file are derived from material licensed
40 1.19 cgd * to the University of California by American Telephone and Telegraph
41 1.19 cgd * Co. or Unix System Laboratories, Inc. and are reproduced herein with
42 1.19 cgd * the permission of UNIX System Laboratories, Inc.
43 1.19 cgd *
44 1.19 cgd * Redistribution and use in source and binary forms, with or without
45 1.19 cgd * modification, are permitted provided that the following conditions
46 1.19 cgd * are met:
47 1.19 cgd * 1. Redistributions of source code must retain the above copyright
48 1.19 cgd * notice, this list of conditions and the following disclaimer.
49 1.19 cgd * 2. Redistributions in binary form must reproduce the above copyright
50 1.19 cgd * notice, this list of conditions and the following disclaimer in the
51 1.19 cgd * documentation and/or other materials provided with the distribution.
52 1.87 agc * 3. Neither the name of the University nor the names of its contributors
53 1.19 cgd * may be used to endorse or promote products derived from this software
54 1.19 cgd * without specific prior written permission.
55 1.19 cgd *
56 1.19 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57 1.19 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58 1.19 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59 1.19 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60 1.19 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61 1.19 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62 1.19 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 1.19 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64 1.19 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65 1.19 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 1.19 cgd * SUCH DAMAGE.
67 1.19 cgd *
68 1.19 cgd * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
69 1.19 cgd */
70 1.78 lukem
71 1.78 lukem #include <sys/cdefs.h>
72 1.131 chs __KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.131 2012/12/02 01:05:16 chs Exp $");
73 1.44 jonathan
74 1.131 chs #include "opt_dtrace.h"
75 1.44 jonathan #include "opt_ntp.h"
76 1.80 briggs #include "opt_perfctrs.h"
77 1.19 cgd
78 1.19 cgd #include <sys/param.h>
79 1.19 cgd #include <sys/systm.h>
80 1.19 cgd #include <sys/callout.h>
81 1.19 cgd #include <sys/kernel.h>
82 1.19 cgd #include <sys/proc.h>
83 1.19 cgd #include <sys/resourcevar.h>
84 1.25 christos #include <sys/signalvar.h>
85 1.26 christos #include <sys/sysctl.h>
86 1.27 jonathan #include <sys/timex.h>
87 1.45 ross #include <sys/sched.h>
88 1.82 thorpej #include <sys/time.h>
89 1.99 kardel #include <sys/timetc.h>
90 1.109 ad #include <sys/cpu.h>
91 1.118 ad #include <sys/atomic.h>
92 1.118 ad
93 1.19 cgd #ifdef GPROF
94 1.19 cgd #include <sys/gmon.h>
95 1.19 cgd #endif
96 1.19 cgd
97 1.131 chs #ifdef KDTRACE_HOOKS
98 1.131 chs #include <sys/dtrace_bsd.h>
99 1.131 chs #include <sys/cpu.h>
100 1.131 chs
101 1.131 chs cyclic_clock_func_t cyclic_clock_func[MAXCPUS];
102 1.131 chs #endif
103 1.131 chs
104 1.19 cgd /*
105 1.19 cgd * Clock handling routines.
106 1.19 cgd *
107 1.19 cgd * This code is written to operate with two timers that run independently of
108 1.19 cgd * each other. The main clock, running hz times per second, is used to keep
109 1.19 cgd * track of real time. The second timer handles kernel and user profiling,
110 1.19 cgd * and does resource use estimation. If the second timer is programmable,
111 1.19 cgd * it is randomized to avoid aliasing between the two clocks. For example,
112 1.90 wiz * the randomization prevents an adversary from always giving up the CPU
113 1.19 cgd * just before its quantum expires. Otherwise, it would never accumulate
114 1.90 wiz * CPU ticks. The mean frequency of the second timer is stathz.
115 1.19 cgd *
116 1.19 cgd * If no second timer exists, stathz will be zero; in this case we drive
117 1.19 cgd * profiling and statistics off the main clock. This WILL NOT be accurate;
118 1.19 cgd * do not do it unless absolutely necessary.
119 1.19 cgd *
120 1.19 cgd * The statistics clock may (or may not) be run at a higher rate while
121 1.19 cgd * profiling. This profile clock runs at profhz. We require that profhz
122 1.19 cgd * be an integral multiple of stathz.
123 1.19 cgd *
124 1.19 cgd * If the statistics clock is running fast, it must be divided by the ratio
125 1.19 cgd * profhz/stathz for statistics. (For profiling, every tick counts.)
126 1.19 cgd */
127 1.19 cgd
128 1.19 cgd int stathz;
129 1.19 cgd int profhz;
130 1.80 briggs int profsrc;
131 1.75 simonb int schedhz;
132 1.19 cgd int profprocs;
133 1.100 drochner int hardclock_ticks;
134 1.114 ad static int hardscheddiv; /* hard => sched divider (used if schedhz == 0) */
135 1.70 sommerfe static int psdiv; /* prof => stat divider */
136 1.22 cgd int psratio; /* ratio: prof / stat */
137 1.19 cgd
138 1.99 kardel static u_int get_intr_timecount(struct timecounter *);
139 1.99 kardel
140 1.99 kardel static struct timecounter intr_timecounter = {
141 1.99 kardel get_intr_timecount, /* get_timecount */
142 1.99 kardel 0, /* no poll_pps */
143 1.99 kardel ~0u, /* counter_mask */
144 1.99 kardel 0, /* frequency */
145 1.99 kardel "clockinterrupt", /* name */
146 1.102 christos 0, /* quality - minimum implementation level for a clock */
147 1.102 christos NULL, /* prev */
148 1.102 christos NULL, /* next */
149 1.99 kardel };
150 1.99 kardel
151 1.99 kardel static u_int
152 1.104 yamt get_intr_timecount(struct timecounter *tc)
153 1.99 kardel {
154 1.104 yamt
155 1.100 drochner return (u_int)hardclock_ticks;
156 1.99 kardel }
157 1.73 thorpej
158 1.66 thorpej /*
159 1.19 cgd * Initialize clock frequencies and start both clocks running.
160 1.19 cgd */
161 1.19 cgd void
162 1.63 thorpej initclocks(void)
163 1.19 cgd {
164 1.55 augustss int i;
165 1.19 cgd
166 1.19 cgd /*
167 1.19 cgd * Set divisors to 1 (normal case) and let the machine-specific
168 1.19 cgd * code do its bit.
169 1.19 cgd */
170 1.70 sommerfe psdiv = 1;
171 1.99 kardel /*
172 1.99 kardel * provide minimum default time counter
173 1.99 kardel * will only run at interrupt resolution
174 1.99 kardel */
175 1.99 kardel intr_timecounter.tc_frequency = hz;
176 1.99 kardel tc_init(&intr_timecounter);
177 1.19 cgd cpu_initclocks();
178 1.19 cgd
179 1.19 cgd /*
180 1.108 yamt * Compute profhz and stathz, fix profhz if needed.
181 1.19 cgd */
182 1.19 cgd i = stathz ? stathz : hz;
183 1.19 cgd if (profhz == 0)
184 1.19 cgd profhz = i;
185 1.19 cgd psratio = profhz / i;
186 1.91 yamt if (schedhz == 0) {
187 1.91 yamt /* 16Hz is best */
188 1.114 ad hardscheddiv = hz / 16;
189 1.114 ad if (hardscheddiv <= 0)
190 1.114 ad panic("hardscheddiv");
191 1.91 yamt }
192 1.31 mycroft
193 1.19 cgd }
194 1.19 cgd
195 1.19 cgd /*
196 1.19 cgd * The real-time timer, interrupting hz times per second.
197 1.19 cgd */
198 1.19 cgd void
199 1.63 thorpej hardclock(struct clockframe *frame)
200 1.19 cgd {
201 1.82 thorpej struct lwp *l;
202 1.120 ad struct cpu_info *ci;
203 1.19 cgd
204 1.120 ad ci = curcpu();
205 1.114 ad l = ci->ci_data.cpu_onproc;
206 1.120 ad
207 1.120 ad timer_tick(l, CLKF_USERMODE(frame));
208 1.19 cgd
209 1.19 cgd /*
210 1.19 cgd * If no separate statistics clock is available, run it from here.
211 1.19 cgd */
212 1.19 cgd if (stathz == 0)
213 1.19 cgd statclock(frame);
214 1.114 ad /*
215 1.114 ad * If no separate schedclock is provided, call it here
216 1.114 ad * at about 16 Hz.
217 1.114 ad */
218 1.114 ad if (schedhz == 0) {
219 1.114 ad if ((int)(--ci->ci_schedstate.spc_schedticks) <= 0) {
220 1.114 ad schedclock(l);
221 1.114 ad ci->ci_schedstate.spc_schedticks = hardscheddiv;
222 1.114 ad }
223 1.114 ad }
224 1.108 yamt if ((--ci->ci_schedstate.spc_ticks) <= 0)
225 1.108 yamt sched_tick(ci);
226 1.93 perry
227 1.123 ad if (CPU_IS_PRIMARY(ci)) {
228 1.121 ad hardclock_ticks++;
229 1.121 ad tc_ticktock();
230 1.121 ad }
231 1.19 cgd
232 1.19 cgd /*
233 1.126 pooka * Update real-time timeout queue.
234 1.106 ad */
235 1.109 ad callout_hardclock();
236 1.131 chs
237 1.131 chs #ifdef KDTRACE_HOOKS
238 1.131 chs cyclic_clock_func_t func = cyclic_clock_func[cpu_index(ci)];
239 1.131 chs if (func) {
240 1.131 chs (*func)((struct clockframe *)frame);
241 1.131 chs }
242 1.131 chs #endif
243 1.19 cgd }
244 1.19 cgd
245 1.19 cgd /*
246 1.19 cgd * Start profiling on a process.
247 1.19 cgd *
248 1.19 cgd * Kernel profiling passes proc0 which never exits and hence
249 1.19 cgd * keeps the profile clock running constantly.
250 1.19 cgd */
251 1.19 cgd void
252 1.63 thorpej startprofclock(struct proc *p)
253 1.19 cgd {
254 1.19 cgd
255 1.109 ad KASSERT(mutex_owned(&p->p_stmutex));
256 1.105 ad
257 1.105 ad if ((p->p_stflag & PST_PROFIL) == 0) {
258 1.105 ad p->p_stflag |= PST_PROFIL;
259 1.80 briggs /*
260 1.80 briggs * This is only necessary if using the clock as the
261 1.80 briggs * profiling source.
262 1.80 briggs */
263 1.70 sommerfe if (++profprocs == 1 && stathz != 0)
264 1.70 sommerfe psdiv = psratio;
265 1.19 cgd }
266 1.19 cgd }
267 1.19 cgd
268 1.19 cgd /*
269 1.19 cgd * Stop profiling on a process.
270 1.19 cgd */
271 1.19 cgd void
272 1.63 thorpej stopprofclock(struct proc *p)
273 1.19 cgd {
274 1.19 cgd
275 1.109 ad KASSERT(mutex_owned(&p->p_stmutex));
276 1.105 ad
277 1.105 ad if (p->p_stflag & PST_PROFIL) {
278 1.105 ad p->p_stflag &= ~PST_PROFIL;
279 1.80 briggs /*
280 1.80 briggs * This is only necessary if using the clock as the
281 1.80 briggs * profiling source.
282 1.80 briggs */
283 1.70 sommerfe if (--profprocs == 0 && stathz != 0)
284 1.70 sommerfe psdiv = 1;
285 1.19 cgd }
286 1.19 cgd }
287 1.19 cgd
288 1.80 briggs #if defined(PERFCTRS)
289 1.80 briggs /*
290 1.80 briggs * Independent profiling "tick" in case we're using a separate
291 1.80 briggs * clock or profiling event source. Currently, that's just
292 1.80 briggs * performance counters--hence the wrapper.
293 1.80 briggs */
294 1.80 briggs void
295 1.80 briggs proftick(struct clockframe *frame)
296 1.80 briggs {
297 1.80 briggs #ifdef GPROF
298 1.93 perry struct gmonparam *g;
299 1.93 perry intptr_t i;
300 1.80 briggs #endif
301 1.105 ad struct lwp *l;
302 1.80 briggs struct proc *p;
303 1.80 briggs
304 1.114 ad l = curcpu()->ci_data.cpu_onproc;
305 1.105 ad p = (l ? l->l_proc : NULL);
306 1.80 briggs if (CLKF_USERMODE(frame)) {
307 1.105 ad mutex_spin_enter(&p->p_stmutex);
308 1.105 ad if (p->p_stflag & PST_PROFIL)
309 1.105 ad addupc_intr(l, CLKF_PC(frame));
310 1.105 ad mutex_spin_exit(&p->p_stmutex);
311 1.80 briggs } else {
312 1.80 briggs #ifdef GPROF
313 1.80 briggs g = &_gmonparam;
314 1.80 briggs if (g->state == GMON_PROF_ON) {
315 1.80 briggs i = CLKF_PC(frame) - g->lowpc;
316 1.80 briggs if (i < g->textsize) {
317 1.80 briggs i /= HISTFRACTION * sizeof(*g->kcount);
318 1.80 briggs g->kcount[i]++;
319 1.80 briggs }
320 1.80 briggs }
321 1.80 briggs #endif
322 1.111 ad #ifdef LWP_PC
323 1.111 ad if (p != NULL && (p->p_stflag & PST_PROFIL) != 0)
324 1.112 ad addupc_intr(l, LWP_PC(l));
325 1.93 perry #endif
326 1.80 briggs }
327 1.80 briggs }
328 1.80 briggs #endif
329 1.80 briggs
330 1.108 yamt void
331 1.108 yamt schedclock(struct lwp *l)
332 1.108 yamt {
333 1.108 yamt if ((l->l_flag & LW_IDLE) != 0)
334 1.108 yamt return;
335 1.108 yamt
336 1.108 yamt sched_schedclock(l);
337 1.108 yamt }
338 1.108 yamt
339 1.19 cgd /*
340 1.19 cgd * Statistics clock. Grab profile sample, and if divider reaches 0,
341 1.19 cgd * do process and kernel statistics.
342 1.19 cgd */
343 1.19 cgd void
344 1.63 thorpej statclock(struct clockframe *frame)
345 1.19 cgd {
346 1.19 cgd #ifdef GPROF
347 1.55 augustss struct gmonparam *g;
348 1.68 eeh intptr_t i;
349 1.19 cgd #endif
350 1.60 thorpej struct cpu_info *ci = curcpu();
351 1.60 thorpej struct schedstate_percpu *spc = &ci->ci_schedstate;
352 1.55 augustss struct proc *p;
353 1.98 christos struct lwp *l;
354 1.19 cgd
355 1.70 sommerfe /*
356 1.70 sommerfe * Notice changes in divisor frequency, and adjust clock
357 1.70 sommerfe * frequency accordingly.
358 1.70 sommerfe */
359 1.70 sommerfe if (spc->spc_psdiv != psdiv) {
360 1.70 sommerfe spc->spc_psdiv = psdiv;
361 1.70 sommerfe spc->spc_pscnt = psdiv;
362 1.70 sommerfe if (psdiv == 1) {
363 1.70 sommerfe setstatclockrate(stathz);
364 1.70 sommerfe } else {
365 1.93 perry setstatclockrate(profhz);
366 1.70 sommerfe }
367 1.70 sommerfe }
368 1.114 ad l = ci->ci_data.cpu_onproc;
369 1.108 yamt if ((l->l_flag & LW_IDLE) != 0) {
370 1.108 yamt /*
371 1.108 yamt * don't account idle lwps as swapper.
372 1.108 yamt */
373 1.108 yamt p = NULL;
374 1.108 yamt } else {
375 1.108 yamt p = l->l_proc;
376 1.105 ad mutex_spin_enter(&p->p_stmutex);
377 1.108 yamt }
378 1.108 yamt
379 1.19 cgd if (CLKF_USERMODE(frame)) {
380 1.105 ad if ((p->p_stflag & PST_PROFIL) && profsrc == PROFSRC_CLOCK)
381 1.105 ad addupc_intr(l, CLKF_PC(frame));
382 1.105 ad if (--spc->spc_pscnt > 0) {
383 1.105 ad mutex_spin_exit(&p->p_stmutex);
384 1.19 cgd return;
385 1.105 ad }
386 1.105 ad
387 1.19 cgd /*
388 1.19 cgd * Came from user mode; CPU was in user state.
389 1.19 cgd * If this process is being profiled record the tick.
390 1.19 cgd */
391 1.19 cgd p->p_uticks++;
392 1.19 cgd if (p->p_nice > NZERO)
393 1.60 thorpej spc->spc_cp_time[CP_NICE]++;
394 1.19 cgd else
395 1.60 thorpej spc->spc_cp_time[CP_USER]++;
396 1.19 cgd } else {
397 1.19 cgd #ifdef GPROF
398 1.19 cgd /*
399 1.19 cgd * Kernel statistics are just like addupc_intr, only easier.
400 1.19 cgd */
401 1.19 cgd g = &_gmonparam;
402 1.80 briggs if (profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) {
403 1.19 cgd i = CLKF_PC(frame) - g->lowpc;
404 1.19 cgd if (i < g->textsize) {
405 1.19 cgd i /= HISTFRACTION * sizeof(*g->kcount);
406 1.19 cgd g->kcount[i]++;
407 1.19 cgd }
408 1.19 cgd }
409 1.19 cgd #endif
410 1.82 thorpej #ifdef LWP_PC
411 1.108 yamt if (p != NULL && profsrc == PROFSRC_CLOCK &&
412 1.108 yamt (p->p_stflag & PST_PROFIL)) {
413 1.105 ad addupc_intr(l, LWP_PC(l));
414 1.108 yamt }
415 1.72 mycroft #endif
416 1.105 ad if (--spc->spc_pscnt > 0) {
417 1.105 ad if (p != NULL)
418 1.105 ad mutex_spin_exit(&p->p_stmutex);
419 1.19 cgd return;
420 1.105 ad }
421 1.19 cgd /*
422 1.19 cgd * Came from kernel mode, so we were:
423 1.19 cgd * - handling an interrupt,
424 1.19 cgd * - doing syscall or trap work on behalf of the current
425 1.19 cgd * user process, or
426 1.19 cgd * - spinning in the idle loop.
427 1.19 cgd * Whichever it is, charge the time as appropriate.
428 1.19 cgd * Note that we charge interrupts to the current process,
429 1.19 cgd * regardless of whether they are ``for'' that process,
430 1.19 cgd * so that we know how much of its real time was spent
431 1.19 cgd * in ``non-process'' (i.e., interrupt) work.
432 1.19 cgd */
433 1.114 ad if (CLKF_INTR(frame) || (curlwp->l_pflag & LP_INTR) != 0) {
434 1.108 yamt if (p != NULL) {
435 1.19 cgd p->p_iticks++;
436 1.108 yamt }
437 1.60 thorpej spc->spc_cp_time[CP_INTR]++;
438 1.19 cgd } else if (p != NULL) {
439 1.19 cgd p->p_sticks++;
440 1.60 thorpej spc->spc_cp_time[CP_SYS]++;
441 1.108 yamt } else {
442 1.60 thorpej spc->spc_cp_time[CP_IDLE]++;
443 1.108 yamt }
444 1.19 cgd }
445 1.70 sommerfe spc->spc_pscnt = psdiv;
446 1.19 cgd
447 1.97 elad if (p != NULL) {
448 1.125 rmind atomic_inc_uint(&l->l_cpticks);
449 1.105 ad mutex_spin_exit(&p->p_stmutex);
450 1.108 yamt }
451 1.19 cgd }
452