kern_clock.c revision 1.133 1 /* $NetBSD: kern_clock.c,v 1.133 2015/04/22 16:43:11 pooka Exp $ */
2
3 /*-
4 * Copyright (c) 2000, 2004, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 * This code is derived from software contributed to The NetBSD Foundation
11 * by Charles M. Hannum.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 /*-
36 * Copyright (c) 1982, 1986, 1991, 1993
37 * The Regents of the University of California. All rights reserved.
38 * (c) UNIX System Laboratories, Inc.
39 * All or some portions of this file are derived from material licensed
40 * to the University of California by American Telephone and Telegraph
41 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
42 * the permission of UNIX System Laboratories, Inc.
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 * 3. Neither the name of the University nor the names of its contributors
53 * may be used to endorse or promote products derived from this software
54 * without specific prior written permission.
55 *
56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
66 * SUCH DAMAGE.
67 *
68 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
69 */
70
71 #include <sys/cdefs.h>
72 __KERNEL_RCSID(0, "$NetBSD: kern_clock.c,v 1.133 2015/04/22 16:43:11 pooka Exp $");
73
74 #ifdef _KERNEL_OPT
75 #include "opt_dtrace.h"
76 #include "opt_ntp.h"
77 #include "opt_perfctrs.h"
78 #endif
79
80 #include <sys/param.h>
81 #include <sys/systm.h>
82 #include <sys/callout.h>
83 #include <sys/kernel.h>
84 #include <sys/proc.h>
85 #include <sys/resourcevar.h>
86 #include <sys/signalvar.h>
87 #include <sys/sysctl.h>
88 #include <sys/timex.h>
89 #include <sys/sched.h>
90 #include <sys/time.h>
91 #include <sys/timetc.h>
92 #include <sys/cpu.h>
93 #include <sys/atomic.h>
94
95 #ifdef GPROF
96 #include <sys/gmon.h>
97 #endif
98
99 #ifdef KDTRACE_HOOKS
100 #include <sys/dtrace_bsd.h>
101 #include <sys/cpu.h>
102
103 cyclic_clock_func_t cyclic_clock_func[MAXCPUS];
104 #endif
105
106 static int sysctl_kern_clockrate(SYSCTLFN_PROTO);
107
108 /*
109 * Clock handling routines.
110 *
111 * This code is written to operate with two timers that run independently of
112 * each other. The main clock, running hz times per second, is used to keep
113 * track of real time. The second timer handles kernel and user profiling,
114 * and does resource use estimation. If the second timer is programmable,
115 * it is randomized to avoid aliasing between the two clocks. For example,
116 * the randomization prevents an adversary from always giving up the CPU
117 * just before its quantum expires. Otherwise, it would never accumulate
118 * CPU ticks. The mean frequency of the second timer is stathz.
119 *
120 * If no second timer exists, stathz will be zero; in this case we drive
121 * profiling and statistics off the main clock. This WILL NOT be accurate;
122 * do not do it unless absolutely necessary.
123 *
124 * The statistics clock may (or may not) be run at a higher rate while
125 * profiling. This profile clock runs at profhz. We require that profhz
126 * be an integral multiple of stathz.
127 *
128 * If the statistics clock is running fast, it must be divided by the ratio
129 * profhz/stathz for statistics. (For profiling, every tick counts.)
130 */
131
132 int stathz;
133 int profhz;
134 int profsrc;
135 int schedhz;
136 int profprocs;
137 int hardclock_ticks;
138 static int hardscheddiv; /* hard => sched divider (used if schedhz == 0) */
139 static int psdiv; /* prof => stat divider */
140 int psratio; /* ratio: prof / stat */
141
142 static u_int get_intr_timecount(struct timecounter *);
143
144 static struct timecounter intr_timecounter = {
145 get_intr_timecount, /* get_timecount */
146 0, /* no poll_pps */
147 ~0u, /* counter_mask */
148 0, /* frequency */
149 "clockinterrupt", /* name */
150 0, /* quality - minimum implementation level for a clock */
151 NULL, /* prev */
152 NULL, /* next */
153 };
154
155 static u_int
156 get_intr_timecount(struct timecounter *tc)
157 {
158
159 return (u_int)hardclock_ticks;
160 }
161
162 /*
163 * Initialize clock frequencies and start both clocks running.
164 */
165 void
166 initclocks(void)
167 {
168 static struct sysctllog *clog;
169 int i;
170
171 /*
172 * Set divisors to 1 (normal case) and let the machine-specific
173 * code do its bit.
174 */
175 psdiv = 1;
176 /*
177 * provide minimum default time counter
178 * will only run at interrupt resolution
179 */
180 intr_timecounter.tc_frequency = hz;
181 tc_init(&intr_timecounter);
182 cpu_initclocks();
183
184 /*
185 * Compute profhz and stathz, fix profhz if needed.
186 */
187 i = stathz ? stathz : hz;
188 if (profhz == 0)
189 profhz = i;
190 psratio = profhz / i;
191 if (schedhz == 0) {
192 /* 16Hz is best */
193 hardscheddiv = hz / 16;
194 if (hardscheddiv <= 0)
195 panic("hardscheddiv");
196 }
197
198 sysctl_createv(&clog, 0, NULL, NULL,
199 CTLFLAG_PERMANENT,
200 CTLTYPE_STRUCT, "clockrate",
201 SYSCTL_DESCR("Kernel clock rates"),
202 sysctl_kern_clockrate, 0, NULL,
203 sizeof(struct clockinfo),
204 CTL_KERN, KERN_CLOCKRATE, CTL_EOL);
205 sysctl_createv(&clog, 0, NULL, NULL,
206 CTLFLAG_PERMANENT,
207 CTLTYPE_INT, "hardclock_ticks",
208 SYSCTL_DESCR("Number of hardclock ticks"),
209 NULL, 0, &hardclock_ticks, sizeof(hardclock_ticks),
210 CTL_KERN, KERN_HARDCLOCK_TICKS, CTL_EOL);
211 }
212
213 /*
214 * The real-time timer, interrupting hz times per second.
215 */
216 void
217 hardclock(struct clockframe *frame)
218 {
219 struct lwp *l;
220 struct cpu_info *ci;
221
222 ci = curcpu();
223 l = ci->ci_data.cpu_onproc;
224
225 timer_tick(l, CLKF_USERMODE(frame));
226
227 /*
228 * If no separate statistics clock is available, run it from here.
229 */
230 if (stathz == 0)
231 statclock(frame);
232 /*
233 * If no separate schedclock is provided, call it here
234 * at about 16 Hz.
235 */
236 if (schedhz == 0) {
237 if ((int)(--ci->ci_schedstate.spc_schedticks) <= 0) {
238 schedclock(l);
239 ci->ci_schedstate.spc_schedticks = hardscheddiv;
240 }
241 }
242 if ((--ci->ci_schedstate.spc_ticks) <= 0)
243 sched_tick(ci);
244
245 if (CPU_IS_PRIMARY(ci)) {
246 hardclock_ticks++;
247 tc_ticktock();
248 }
249
250 /*
251 * Update real-time timeout queue.
252 */
253 callout_hardclock();
254
255 #ifdef KDTRACE_HOOKS
256 cyclic_clock_func_t func = cyclic_clock_func[cpu_index(ci)];
257 if (func) {
258 (*func)((struct clockframe *)frame);
259 }
260 #endif
261 }
262
263 /*
264 * Start profiling on a process.
265 *
266 * Kernel profiling passes proc0 which never exits and hence
267 * keeps the profile clock running constantly.
268 */
269 void
270 startprofclock(struct proc *p)
271 {
272
273 KASSERT(mutex_owned(&p->p_stmutex));
274
275 if ((p->p_stflag & PST_PROFIL) == 0) {
276 p->p_stflag |= PST_PROFIL;
277 /*
278 * This is only necessary if using the clock as the
279 * profiling source.
280 */
281 if (++profprocs == 1 && stathz != 0)
282 psdiv = psratio;
283 }
284 }
285
286 /*
287 * Stop profiling on a process.
288 */
289 void
290 stopprofclock(struct proc *p)
291 {
292
293 KASSERT(mutex_owned(&p->p_stmutex));
294
295 if (p->p_stflag & PST_PROFIL) {
296 p->p_stflag &= ~PST_PROFIL;
297 /*
298 * This is only necessary if using the clock as the
299 * profiling source.
300 */
301 if (--profprocs == 0 && stathz != 0)
302 psdiv = 1;
303 }
304 }
305
306 #if defined(PERFCTRS)
307 /*
308 * Independent profiling "tick" in case we're using a separate
309 * clock or profiling event source. Currently, that's just
310 * performance counters--hence the wrapper.
311 */
312 void
313 proftick(struct clockframe *frame)
314 {
315 #ifdef GPROF
316 struct gmonparam *g;
317 intptr_t i;
318 #endif
319 struct lwp *l;
320 struct proc *p;
321
322 l = curcpu()->ci_data.cpu_onproc;
323 p = (l ? l->l_proc : NULL);
324 if (CLKF_USERMODE(frame)) {
325 mutex_spin_enter(&p->p_stmutex);
326 if (p->p_stflag & PST_PROFIL)
327 addupc_intr(l, CLKF_PC(frame));
328 mutex_spin_exit(&p->p_stmutex);
329 } else {
330 #ifdef GPROF
331 g = &_gmonparam;
332 if (g->state == GMON_PROF_ON) {
333 i = CLKF_PC(frame) - g->lowpc;
334 if (i < g->textsize) {
335 i /= HISTFRACTION * sizeof(*g->kcount);
336 g->kcount[i]++;
337 }
338 }
339 #endif
340 #ifdef LWP_PC
341 if (p != NULL && (p->p_stflag & PST_PROFIL) != 0)
342 addupc_intr(l, LWP_PC(l));
343 #endif
344 }
345 }
346 #endif
347
348 void
349 schedclock(struct lwp *l)
350 {
351 if ((l->l_flag & LW_IDLE) != 0)
352 return;
353
354 sched_schedclock(l);
355 }
356
357 /*
358 * Statistics clock. Grab profile sample, and if divider reaches 0,
359 * do process and kernel statistics.
360 */
361 void
362 statclock(struct clockframe *frame)
363 {
364 #ifdef GPROF
365 struct gmonparam *g;
366 intptr_t i;
367 #endif
368 struct cpu_info *ci = curcpu();
369 struct schedstate_percpu *spc = &ci->ci_schedstate;
370 struct proc *p;
371 struct lwp *l;
372
373 /*
374 * Notice changes in divisor frequency, and adjust clock
375 * frequency accordingly.
376 */
377 if (spc->spc_psdiv != psdiv) {
378 spc->spc_psdiv = psdiv;
379 spc->spc_pscnt = psdiv;
380 if (psdiv == 1) {
381 setstatclockrate(stathz);
382 } else {
383 setstatclockrate(profhz);
384 }
385 }
386 l = ci->ci_data.cpu_onproc;
387 if ((l->l_flag & LW_IDLE) != 0) {
388 /*
389 * don't account idle lwps as swapper.
390 */
391 p = NULL;
392 } else {
393 p = l->l_proc;
394 mutex_spin_enter(&p->p_stmutex);
395 }
396
397 if (CLKF_USERMODE(frame)) {
398 if ((p->p_stflag & PST_PROFIL) && profsrc == PROFSRC_CLOCK)
399 addupc_intr(l, CLKF_PC(frame));
400 if (--spc->spc_pscnt > 0) {
401 mutex_spin_exit(&p->p_stmutex);
402 return;
403 }
404
405 /*
406 * Came from user mode; CPU was in user state.
407 * If this process is being profiled record the tick.
408 */
409 p->p_uticks++;
410 if (p->p_nice > NZERO)
411 spc->spc_cp_time[CP_NICE]++;
412 else
413 spc->spc_cp_time[CP_USER]++;
414 } else {
415 #ifdef GPROF
416 /*
417 * Kernel statistics are just like addupc_intr, only easier.
418 */
419 g = &_gmonparam;
420 if (profsrc == PROFSRC_CLOCK && g->state == GMON_PROF_ON) {
421 i = CLKF_PC(frame) - g->lowpc;
422 if (i < g->textsize) {
423 i /= HISTFRACTION * sizeof(*g->kcount);
424 g->kcount[i]++;
425 }
426 }
427 #endif
428 #ifdef LWP_PC
429 if (p != NULL && profsrc == PROFSRC_CLOCK &&
430 (p->p_stflag & PST_PROFIL)) {
431 addupc_intr(l, LWP_PC(l));
432 }
433 #endif
434 if (--spc->spc_pscnt > 0) {
435 if (p != NULL)
436 mutex_spin_exit(&p->p_stmutex);
437 return;
438 }
439 /*
440 * Came from kernel mode, so we were:
441 * - handling an interrupt,
442 * - doing syscall or trap work on behalf of the current
443 * user process, or
444 * - spinning in the idle loop.
445 * Whichever it is, charge the time as appropriate.
446 * Note that we charge interrupts to the current process,
447 * regardless of whether they are ``for'' that process,
448 * so that we know how much of its real time was spent
449 * in ``non-process'' (i.e., interrupt) work.
450 */
451 if (CLKF_INTR(frame) || (curlwp->l_pflag & LP_INTR) != 0) {
452 if (p != NULL) {
453 p->p_iticks++;
454 }
455 spc->spc_cp_time[CP_INTR]++;
456 } else if (p != NULL) {
457 p->p_sticks++;
458 spc->spc_cp_time[CP_SYS]++;
459 } else {
460 spc->spc_cp_time[CP_IDLE]++;
461 }
462 }
463 spc->spc_pscnt = psdiv;
464
465 if (p != NULL) {
466 atomic_inc_uint(&l->l_cpticks);
467 mutex_spin_exit(&p->p_stmutex);
468 }
469 }
470
471 /*
472 * sysctl helper routine for kern.clockrate. Assembles a struct on
473 * the fly to be returned to the caller.
474 */
475 static int
476 sysctl_kern_clockrate(SYSCTLFN_ARGS)
477 {
478 struct clockinfo clkinfo;
479 struct sysctlnode node;
480
481 clkinfo.tick = tick;
482 clkinfo.tickadj = tickadj;
483 clkinfo.hz = hz;
484 clkinfo.profhz = profhz;
485 clkinfo.stathz = stathz ? stathz : hz;
486
487 node = *rnode;
488 node.sysctl_data = &clkinfo;
489 return (sysctl_lookup(SYSCTLFN_CALL(&node)));
490 }
491