kern_clock.c revision 1.121.2.1 1 1.121.2.1 yamt /* $NetBSD: kern_clock.c,v 1.121.2.1 2008/05/16 02:25:24 yamt Exp $ */
2 1.52 thorpej
3 1.52 thorpej /*-
4 1.118 ad * Copyright (c) 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5 1.52 thorpej * All rights reserved.
6 1.52 thorpej *
7 1.52 thorpej * This code is derived from software contributed to The NetBSD Foundation
8 1.52 thorpej * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 1.52 thorpej * NASA Ames Research Center.
10 1.94 mycroft * This code is derived from software contributed to The NetBSD Foundation
11 1.94 mycroft * by Charles M. Hannum.
12 1.52 thorpej *
13 1.52 thorpej * Redistribution and use in source and binary forms, with or without
14 1.52 thorpej * modification, are permitted provided that the following conditions
15 1.52 thorpej * are met:
16 1.52 thorpej * 1. Redistributions of source code must retain the above copyright
17 1.52 thorpej * notice, this list of conditions and the following disclaimer.
18 1.52 thorpej * 2. Redistributions in binary form must reproduce the above copyright
19 1.52 thorpej * notice, this list of conditions and the following disclaimer in the
20 1.52 thorpej * documentation and/or other materials provided with the distribution.
21 1.52 thorpej *
22 1.52 thorpej * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23 1.52 thorpej * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24 1.52 thorpej * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 1.52 thorpej * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26 1.52 thorpej * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 1.52 thorpej * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 1.52 thorpej * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 1.52 thorpej * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 1.52 thorpej * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 1.52 thorpej * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 1.52 thorpej * POSSIBILITY OF SUCH DAMAGE.
33 1.52 thorpej */
34 1.19 cgd
35 1.19 cgd /*-
36 1.19 cgd * Copyright (c) 1982, 1986, 1991, 1993
37 1.19 cgd * The Regents of the University of California. All rights reserved.
38 1.19 cgd * (c) UNIX System Laboratories, Inc.
39 1.19 cgd * All or some portions of this file are derived from material licensed
40 1.19 cgd * to the University of California by American Telephone and Telegraph
41 1.19 cgd * Co. or Unix System Laboratories, Inc. and are reproduced herein with
42 1.19 cgd * the permission of UNIX System Laboratories, Inc.
43 1.19 cgd *
44 1.19 cgd * Redistribution and use in source and binary forms, with or without
45 1.19 cgd * modification, are permitted provided that the following conditions
46 1.19 cgd * are met:
47 1.19 cgd * 1. Redistributions of source code must retain the above copyright
48 1.19 cgd * notice, this list of conditions and the following disclaimer.
49 1.19 cgd * 2. Redistributions in binary form must reproduce the above copyright
50 1.19 cgd * notice, this list of conditions and the following disclaimer in the
51 1.19 cgd * documentation and/or other materials provided with the distribution.
52 1.87 agc * 3. Neither the name of the University nor the names of its contributors
53 1.19 cgd * may be used to endorse or promote products derived from this software
54 1.19 cgd * without specific prior written permission.
55 1.19 cgd *
56 1.19 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57 1.19 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58 1.19 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59 1.19 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60 1.19 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61 1.19 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62 1.19 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 1.19 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64 1.19 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65 1.19 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 1.19 cgd * SUCH DAMAGE.
67 1.19 cgd *
68 1.19 cgd * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
69 1.19 cgd */
70 1.78 lukem
71 1.78 lukem #include <sys/cdefs.h>
72 1.121.2.1 yamt __KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.121.2.1 2008/05/16 02:25:24 yamt Exp $");
73 1.44 jonathan
74 1.44 jonathan #include "opt_ntp.h"
75 1.86 martin #include "opt_multiprocessor.h"
76 1.80 briggs #include "opt_perfctrs.h"
77 1.19 cgd
78 1.19 cgd #include <sys/param.h>
79 1.19 cgd #include <sys/systm.h>
80 1.19 cgd #include <sys/callout.h>
81 1.19 cgd #include <sys/kernel.h>
82 1.19 cgd #include <sys/proc.h>
83 1.19 cgd #include <sys/resourcevar.h>
84 1.25 christos #include <sys/signalvar.h>
85 1.26 christos #include <sys/sysctl.h>
86 1.27 jonathan #include <sys/timex.h>
87 1.45 ross #include <sys/sched.h>
88 1.82 thorpej #include <sys/time.h>
89 1.99 kardel #include <sys/timetc.h>
90 1.109 ad #include <sys/cpu.h>
91 1.118 ad #include <sys/atomic.h>
92 1.118 ad
93 1.118 ad #include <uvm/uvm_extern.h>
94 1.25 christos
95 1.19 cgd #ifdef GPROF
96 1.19 cgd #include <sys/gmon.h>
97 1.19 cgd #endif
98 1.19 cgd
99 1.19 cgd /*
100 1.19 cgd * Clock handling routines.
101 1.19 cgd *
102 1.19 cgd * This code is written to operate with two timers that run independently of
103 1.19 cgd * each other. The main clock, running hz times per second, is used to keep
104 1.19 cgd * track of real time. The second timer handles kernel and user profiling,
105 1.19 cgd * and does resource use estimation. If the second timer is programmable,
106 1.19 cgd * it is randomized to avoid aliasing between the two clocks. For example,
107 1.90 wiz * the randomization prevents an adversary from always giving up the CPU
108 1.19 cgd * just before its quantum expires. Otherwise, it would never accumulate
109 1.90 wiz * CPU ticks. The mean frequency of the second timer is stathz.
110 1.19 cgd *
111 1.19 cgd * If no second timer exists, stathz will be zero; in this case we drive
112 1.19 cgd * profiling and statistics off the main clock. This WILL NOT be accurate;
113 1.19 cgd * do not do it unless absolutely necessary.
114 1.19 cgd *
115 1.19 cgd * The statistics clock may (or may not) be run at a higher rate while
116 1.19 cgd * profiling. This profile clock runs at profhz. We require that profhz
117 1.19 cgd * be an integral multiple of stathz.
118 1.19 cgd *
119 1.19 cgd * If the statistics clock is running fast, it must be divided by the ratio
120 1.19 cgd * profhz/stathz for statistics. (For profiling, every tick counts.)
121 1.19 cgd */
122 1.19 cgd
123 1.19 cgd int stathz;
124 1.19 cgd int profhz;
125 1.80 briggs int profsrc;
126 1.75 simonb int schedhz;
127 1.19 cgd int profprocs;
128 1.100 drochner int hardclock_ticks;
129 1.114 ad static int hardscheddiv; /* hard => sched divider (used if schedhz == 0) */
130 1.70 sommerfe static int psdiv; /* prof => stat divider */
131 1.22 cgd int psratio; /* ratio: prof / stat */
132 1.19 cgd
133 1.99 kardel static u_int get_intr_timecount(struct timecounter *);
134 1.99 kardel
135 1.99 kardel static struct timecounter intr_timecounter = {
136 1.99 kardel get_intr_timecount, /* get_timecount */
137 1.99 kardel 0, /* no poll_pps */
138 1.99 kardel ~0u, /* counter_mask */
139 1.99 kardel 0, /* frequency */
140 1.99 kardel "clockinterrupt", /* name */
141 1.102 christos 0, /* quality - minimum implementation level for a clock */
142 1.102 christos NULL, /* prev */
143 1.102 christos NULL, /* next */
144 1.99 kardel };
145 1.99 kardel
146 1.99 kardel static u_int
147 1.104 yamt get_intr_timecount(struct timecounter *tc)
148 1.99 kardel {
149 1.104 yamt
150 1.100 drochner return (u_int)hardclock_ticks;
151 1.99 kardel }
152 1.73 thorpej
153 1.66 thorpej /*
154 1.19 cgd * Initialize clock frequencies and start both clocks running.
155 1.19 cgd */
156 1.19 cgd void
157 1.63 thorpej initclocks(void)
158 1.19 cgd {
159 1.55 augustss int i;
160 1.19 cgd
161 1.19 cgd /*
162 1.19 cgd * Set divisors to 1 (normal case) and let the machine-specific
163 1.19 cgd * code do its bit.
164 1.19 cgd */
165 1.70 sommerfe psdiv = 1;
166 1.99 kardel /*
167 1.99 kardel * provide minimum default time counter
168 1.99 kardel * will only run at interrupt resolution
169 1.99 kardel */
170 1.99 kardel intr_timecounter.tc_frequency = hz;
171 1.99 kardel tc_init(&intr_timecounter);
172 1.19 cgd cpu_initclocks();
173 1.19 cgd
174 1.19 cgd /*
175 1.108 yamt * Compute profhz and stathz, fix profhz if needed.
176 1.19 cgd */
177 1.19 cgd i = stathz ? stathz : hz;
178 1.19 cgd if (profhz == 0)
179 1.19 cgd profhz = i;
180 1.19 cgd psratio = profhz / i;
181 1.91 yamt if (schedhz == 0) {
182 1.91 yamt /* 16Hz is best */
183 1.114 ad hardscheddiv = hz / 16;
184 1.114 ad if (hardscheddiv <= 0)
185 1.114 ad panic("hardscheddiv");
186 1.91 yamt }
187 1.31 mycroft
188 1.19 cgd }
189 1.19 cgd
190 1.19 cgd /*
191 1.19 cgd * The real-time timer, interrupting hz times per second.
192 1.19 cgd */
193 1.19 cgd void
194 1.63 thorpej hardclock(struct clockframe *frame)
195 1.19 cgd {
196 1.82 thorpej struct lwp *l;
197 1.120 ad struct cpu_info *ci;
198 1.19 cgd
199 1.120 ad ci = curcpu();
200 1.114 ad l = ci->ci_data.cpu_onproc;
201 1.120 ad
202 1.120 ad timer_tick(l, CLKF_USERMODE(frame));
203 1.19 cgd
204 1.19 cgd /*
205 1.19 cgd * If no separate statistics clock is available, run it from here.
206 1.19 cgd */
207 1.19 cgd if (stathz == 0)
208 1.19 cgd statclock(frame);
209 1.114 ad /*
210 1.114 ad * If no separate schedclock is provided, call it here
211 1.114 ad * at about 16 Hz.
212 1.114 ad */
213 1.114 ad if (schedhz == 0) {
214 1.114 ad if ((int)(--ci->ci_schedstate.spc_schedticks) <= 0) {
215 1.114 ad schedclock(l);
216 1.114 ad ci->ci_schedstate.spc_schedticks = hardscheddiv;
217 1.114 ad }
218 1.114 ad }
219 1.108 yamt if ((--ci->ci_schedstate.spc_ticks) <= 0)
220 1.108 yamt sched_tick(ci);
221 1.93 perry
222 1.60 thorpej #if defined(MULTIPROCESSOR)
223 1.121 ad if (CPU_IS_PRIMARY(ci))
224 1.60 thorpej #endif
225 1.121 ad {
226 1.121 ad hardclock_ticks++;
227 1.121 ad tc_ticktock();
228 1.121 ad }
229 1.19 cgd
230 1.19 cgd /*
231 1.106 ad * Update real-time timeout queue. Callouts are processed at a
232 1.106 ad * very low CPU priority, so we don't keep the relatively high
233 1.106 ad * clock interrupt priority any longer than necessary.
234 1.106 ad */
235 1.109 ad callout_hardclock();
236 1.19 cgd }
237 1.19 cgd
238 1.19 cgd /*
239 1.19 cgd * Start profiling on a process.
240 1.19 cgd *
241 1.19 cgd * Kernel profiling passes proc0 which never exits and hence
242 1.19 cgd * keeps the profile clock running constantly.
243 1.19 cgd */
244 1.19 cgd void
245 1.63 thorpej startprofclock(struct proc *p)
246 1.19 cgd {
247 1.19 cgd
248 1.109 ad KASSERT(mutex_owned(&p->p_stmutex));
249 1.105 ad
250 1.105 ad if ((p->p_stflag & PST_PROFIL) == 0) {
251 1.105 ad p->p_stflag |= PST_PROFIL;
252 1.80 briggs /*
253 1.80 briggs * This is only necessary if using the clock as the
254 1.80 briggs * profiling source.
255 1.80 briggs */
256 1.70 sommerfe if (++profprocs == 1 && stathz != 0)
257 1.70 sommerfe psdiv = psratio;
258 1.19 cgd }
259 1.19 cgd }
260 1.19 cgd
261 1.19 cgd /*
262 1.19 cgd * Stop profiling on a process.
263 1.19 cgd */
264 1.19 cgd void
265 1.63 thorpej stopprofclock(struct proc *p)
266 1.19 cgd {
267 1.19 cgd
268 1.109 ad KASSERT(mutex_owned(&p->p_stmutex));
269 1.105 ad
270 1.105 ad if (p->p_stflag & PST_PROFIL) {
271 1.105 ad p->p_stflag &= ~PST_PROFIL;
272 1.80 briggs /*
273 1.80 briggs * This is only necessary if using the clock as the
274 1.80 briggs * profiling source.
275 1.80 briggs */
276 1.70 sommerfe if (--profprocs == 0 && stathz != 0)
277 1.70 sommerfe psdiv = 1;
278 1.19 cgd }
279 1.19 cgd }
280 1.19 cgd
281 1.80 briggs #if defined(PERFCTRS)
282 1.80 briggs /*
283 1.80 briggs * Independent profiling "tick" in case we're using a separate
284 1.80 briggs * clock or profiling event source. Currently, that's just
285 1.80 briggs * performance counters--hence the wrapper.
286 1.80 briggs */
287 1.80 briggs void
288 1.80 briggs proftick(struct clockframe *frame)
289 1.80 briggs {
290 1.80 briggs #ifdef GPROF
291 1.93 perry struct gmonparam *g;
292 1.93 perry intptr_t i;
293 1.80 briggs #endif
294 1.105 ad struct lwp *l;
295 1.80 briggs struct proc *p;
296 1.80 briggs
297 1.114 ad l = curcpu()->ci_data.cpu_onproc;
298 1.105 ad p = (l ? l->l_proc : NULL);
299 1.80 briggs if (CLKF_USERMODE(frame)) {
300 1.105 ad mutex_spin_enter(&p->p_stmutex);
301 1.105 ad if (p->p_stflag & PST_PROFIL)
302 1.105 ad addupc_intr(l, CLKF_PC(frame));
303 1.105 ad mutex_spin_exit(&p->p_stmutex);
304 1.80 briggs } else {
305 1.80 briggs #ifdef GPROF
306 1.80 briggs g = &_gmonparam;
307 1.80 briggs if (g->state == GMON_PROF_ON) {
308 1.80 briggs i = CLKF_PC(frame) - g->lowpc;
309 1.80 briggs if (i < g->textsize) {
310 1.80 briggs i /= HISTFRACTION * sizeof(*g->kcount);
311 1.80 briggs g->kcount[i]++;
312 1.80 briggs }
313 1.80 briggs }
314 1.80 briggs #endif
315 1.111 ad #ifdef LWP_PC
316 1.111 ad if (p != NULL && (p->p_stflag & PST_PROFIL) != 0)
317 1.112 ad addupc_intr(l, LWP_PC(l));
318 1.93 perry #endif
319 1.80 briggs }
320 1.80 briggs }
321 1.80 briggs #endif
322 1.80 briggs
323 1.108 yamt void
324 1.108 yamt schedclock(struct lwp *l)
325 1.108 yamt {
326 1.118 ad struct cpu_info *ci;
327 1.118 ad
328 1.118 ad ci = l->l_cpu;
329 1.118 ad
330 1.118 ad /* Accumulate syscall and context switch counts. */
331 1.119 ad atomic_add_int((unsigned *)&uvmexp.swtch, ci->ci_data.cpu_nswtch);
332 1.118 ad ci->ci_data.cpu_nswtch = 0;
333 1.119 ad atomic_add_int((unsigned *)&uvmexp.syscalls, ci->ci_data.cpu_nsyscall);
334 1.118 ad ci->ci_data.cpu_nsyscall = 0;
335 1.108 yamt
336 1.108 yamt if ((l->l_flag & LW_IDLE) != 0)
337 1.108 yamt return;
338 1.108 yamt
339 1.108 yamt sched_schedclock(l);
340 1.108 yamt }
341 1.108 yamt
342 1.19 cgd /*
343 1.19 cgd * Statistics clock. Grab profile sample, and if divider reaches 0,
344 1.19 cgd * do process and kernel statistics.
345 1.19 cgd */
346 1.19 cgd void
347 1.63 thorpej statclock(struct clockframe *frame)
348 1.19 cgd {
349 1.19 cgd #ifdef GPROF
350 1.55 augustss struct gmonparam *g;
351 1.68 eeh intptr_t i;
352 1.19 cgd #endif
353 1.60 thorpej struct cpu_info *ci = curcpu();
354 1.60 thorpej struct schedstate_percpu *spc = &ci->ci_schedstate;
355 1.55 augustss struct proc *p;
356 1.98 christos struct lwp *l;
357 1.19 cgd
358 1.70 sommerfe /*
359 1.70 sommerfe * Notice changes in divisor frequency, and adjust clock
360 1.70 sommerfe * frequency accordingly.
361 1.70 sommerfe */
362 1.70 sommerfe if (spc->spc_psdiv != psdiv) {
363 1.70 sommerfe spc->spc_psdiv = psdiv;
364 1.70 sommerfe spc->spc_pscnt = psdiv;
365 1.70 sommerfe if (psdiv == 1) {
366 1.70 sommerfe setstatclockrate(stathz);
367 1.70 sommerfe } else {
368 1.93 perry setstatclockrate(profhz);
369 1.70 sommerfe }
370 1.70 sommerfe }
371 1.114 ad l = ci->ci_data.cpu_onproc;
372 1.108 yamt if ((l->l_flag & LW_IDLE) != 0) {
373 1.108 yamt /*
374 1.108 yamt * don't account idle lwps as swapper.
375 1.108 yamt */
376 1.108 yamt p = NULL;
377 1.108 yamt } else {
378 1.108 yamt p = l->l_proc;
379 1.105 ad mutex_spin_enter(&p->p_stmutex);
380 1.108 yamt }
381 1.108 yamt
382 1.19 cgd if (CLKF_USERMODE(frame)) {
383 1.105 ad if ((p->p_stflag & PST_PROFIL) && profsrc == PROFSRC_CLOCK)
384 1.105 ad addupc_intr(l, CLKF_PC(frame));
385 1.105 ad if (--spc->spc_pscnt > 0) {
386 1.105 ad mutex_spin_exit(&p->p_stmutex);
387 1.19 cgd return;
388 1.105 ad }
389 1.105 ad
390 1.19 cgd /*
391 1.19 cgd * Came from user mode; CPU was in user state.
392 1.19 cgd * If this process is being profiled record the tick.
393 1.19 cgd */
394 1.19 cgd p->p_uticks++;
395 1.19 cgd if (p->p_nice > NZERO)
396 1.60 thorpej spc->spc_cp_time[CP_NICE]++;
397 1.19 cgd else
398 1.60 thorpej spc->spc_cp_time[CP_USER]++;
399 1.19 cgd } else {
400 1.19 cgd #ifdef GPROF
401 1.19 cgd /*
402 1.19 cgd * Kernel statistics are just like addupc_intr, only easier.
403 1.19 cgd */
404 1.19 cgd g = &_gmonparam;
405 1.80 briggs if (profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) {
406 1.19 cgd i = CLKF_PC(frame) - g->lowpc;
407 1.19 cgd if (i < g->textsize) {
408 1.19 cgd i /= HISTFRACTION * sizeof(*g->kcount);
409 1.19 cgd g->kcount[i]++;
410 1.19 cgd }
411 1.19 cgd }
412 1.19 cgd #endif
413 1.82 thorpej #ifdef LWP_PC
414 1.108 yamt if (p != NULL && profsrc == PROFSRC_CLOCK &&
415 1.108 yamt (p->p_stflag & PST_PROFIL)) {
416 1.105 ad addupc_intr(l, LWP_PC(l));
417 1.108 yamt }
418 1.72 mycroft #endif
419 1.105 ad if (--spc->spc_pscnt > 0) {
420 1.105 ad if (p != NULL)
421 1.105 ad mutex_spin_exit(&p->p_stmutex);
422 1.19 cgd return;
423 1.105 ad }
424 1.19 cgd /*
425 1.19 cgd * Came from kernel mode, so we were:
426 1.19 cgd * - handling an interrupt,
427 1.19 cgd * - doing syscall or trap work on behalf of the current
428 1.19 cgd * user process, or
429 1.19 cgd * - spinning in the idle loop.
430 1.19 cgd * Whichever it is, charge the time as appropriate.
431 1.19 cgd * Note that we charge interrupts to the current process,
432 1.19 cgd * regardless of whether they are ``for'' that process,
433 1.19 cgd * so that we know how much of its real time was spent
434 1.19 cgd * in ``non-process'' (i.e., interrupt) work.
435 1.19 cgd */
436 1.114 ad if (CLKF_INTR(frame) || (curlwp->l_pflag & LP_INTR) != 0) {
437 1.108 yamt if (p != NULL) {
438 1.19 cgd p->p_iticks++;
439 1.108 yamt }
440 1.60 thorpej spc->spc_cp_time[CP_INTR]++;
441 1.19 cgd } else if (p != NULL) {
442 1.19 cgd p->p_sticks++;
443 1.60 thorpej spc->spc_cp_time[CP_SYS]++;
444 1.108 yamt } else {
445 1.60 thorpej spc->spc_cp_time[CP_IDLE]++;
446 1.108 yamt }
447 1.19 cgd }
448 1.70 sommerfe spc->spc_pscnt = psdiv;
449 1.19 cgd
450 1.97 elad if (p != NULL) {
451 1.108 yamt ++l->l_cpticks;
452 1.105 ad mutex_spin_exit(&p->p_stmutex);
453 1.108 yamt }
454 1.19 cgd }
455