kern_clock.c revision 1.74.2.1 1 /* $NetBSD: kern_clock.c,v 1.74.2.1 2001/03/05 22:49:38 nathanw Exp $ */
2
3 /*-
4 * Copyright (c) 2000 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the NetBSD
22 * Foundation, Inc. and its contributors.
23 * 4. Neither the name of The NetBSD Foundation nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 * POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /*-
41 * Copyright (c) 1982, 1986, 1991, 1993
42 * The Regents of the University of California. All rights reserved.
43 * (c) UNIX System Laboratories, Inc.
44 * All or some portions of this file are derived from material licensed
45 * to the University of California by American Telephone and Telegraph
46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
47 * the permission of UNIX System Laboratories, Inc.
48 *
49 * Redistribution and use in source and binary forms, with or without
50 * modification, are permitted provided that the following conditions
51 * are met:
52 * 1. Redistributions of source code must retain the above copyright
53 * notice, this list of conditions and the following disclaimer.
54 * 2. Redistributions in binary form must reproduce the above copyright
55 * notice, this list of conditions and the following disclaimer in the
56 * documentation and/or other materials provided with the distribution.
57 * 3. All advertising materials mentioning features or use of this software
58 * must display the following acknowledgement:
59 * This product includes software developed by the University of
60 * California, Berkeley and its contributors.
61 * 4. Neither the name of the University nor the names of its contributors
62 * may be used to endorse or promote products derived from this software
63 * without specific prior written permission.
64 *
65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
75 * SUCH DAMAGE.
76 *
77 * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
78 */
79
80 #include "opt_ntp.h"
81
82 #include <sys/param.h>
83 #include <sys/systm.h>
84 #include <sys/dkstat.h>
85 #include <sys/callout.h>
86 #include <sys/kernel.h>
87 #include <sys/lwp.h>
88 #include <sys/proc.h>
89 #include <sys/resourcevar.h>
90 #include <sys/signalvar.h>
91 #include <uvm/uvm_extern.h>
92 #include <sys/sysctl.h>
93 #include <sys/timex.h>
94 #include <sys/sched.h>
95
96 #include <machine/cpu.h>
97 #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS
98 #include <machine/intr.h>
99 #endif
100
101 #ifdef GPROF
102 #include <sys/gmon.h>
103 #endif
104
105 /*
106 * Clock handling routines.
107 *
108 * This code is written to operate with two timers that run independently of
109 * each other. The main clock, running hz times per second, is used to keep
110 * track of real time. The second timer handles kernel and user profiling,
111 * and does resource use estimation. If the second timer is programmable,
112 * it is randomized to avoid aliasing between the two clocks. For example,
113 * the randomization prevents an adversary from always giving up the cpu
114 * just before its quantum expires. Otherwise, it would never accumulate
115 * cpu ticks. The mean frequency of the second timer is stathz.
116 *
117 * If no second timer exists, stathz will be zero; in this case we drive
118 * profiling and statistics off the main clock. This WILL NOT be accurate;
119 * do not do it unless absolutely necessary.
120 *
121 * The statistics clock may (or may not) be run at a higher rate while
122 * profiling. This profile clock runs at profhz. We require that profhz
123 * be an integral multiple of stathz.
124 *
125 * If the statistics clock is running fast, it must be divided by the ratio
126 * profhz/stathz for statistics. (For profiling, every tick counts.)
127 */
128
129 #ifdef NTP /* NTP phase-locked loop in kernel */
130 /*
131 * Phase/frequency-lock loop (PLL/FLL) definitions
132 *
133 * The following variables are read and set by the ntp_adjtime() system
134 * call.
135 *
136 * time_state shows the state of the system clock, with values defined
137 * in the timex.h header file.
138 *
139 * time_status shows the status of the system clock, with bits defined
140 * in the timex.h header file.
141 *
142 * time_offset is used by the PLL/FLL to adjust the system time in small
143 * increments.
144 *
145 * time_constant determines the bandwidth or "stiffness" of the PLL.
146 *
147 * time_tolerance determines maximum frequency error or tolerance of the
148 * CPU clock oscillator and is a property of the architecture; however,
149 * in principle it could change as result of the presence of external
150 * discipline signals, for instance.
151 *
152 * time_precision is usually equal to the kernel tick variable; however,
153 * in cases where a precision clock counter or external clock is
154 * available, the resolution can be much less than this and depend on
155 * whether the external clock is working or not.
156 *
157 * time_maxerror is initialized by a ntp_adjtime() call and increased by
158 * the kernel once each second to reflect the maximum error bound
159 * growth.
160 *
161 * time_esterror is set and read by the ntp_adjtime() call, but
162 * otherwise not used by the kernel.
163 */
164 int time_state = TIME_OK; /* clock state */
165 int time_status = STA_UNSYNC; /* clock status bits */
166 long time_offset = 0; /* time offset (us) */
167 long time_constant = 0; /* pll time constant */
168 long time_tolerance = MAXFREQ; /* frequency tolerance (scaled ppm) */
169 long time_precision = 1; /* clock precision (us) */
170 long time_maxerror = MAXPHASE; /* maximum error (us) */
171 long time_esterror = MAXPHASE; /* estimated error (us) */
172
173 /*
174 * The following variables establish the state of the PLL/FLL and the
175 * residual time and frequency offset of the local clock. The scale
176 * factors are defined in the timex.h header file.
177 *
178 * time_phase and time_freq are the phase increment and the frequency
179 * increment, respectively, of the kernel time variable.
180 *
181 * time_freq is set via ntp_adjtime() from a value stored in a file when
182 * the synchronization daemon is first started. Its value is retrieved
183 * via ntp_adjtime() and written to the file about once per hour by the
184 * daemon.
185 *
186 * time_adj is the adjustment added to the value of tick at each timer
187 * interrupt and is recomputed from time_phase and time_freq at each
188 * seconds rollover.
189 *
190 * time_reftime is the second's portion of the system time at the last
191 * call to ntp_adjtime(). It is used to adjust the time_freq variable
192 * and to increase the time_maxerror as the time since last update
193 * increases.
194 */
195 long time_phase = 0; /* phase offset (scaled us) */
196 long time_freq = 0; /* frequency offset (scaled ppm) */
197 long time_adj = 0; /* tick adjust (scaled 1 / hz) */
198 long time_reftime = 0; /* time at last adjustment (s) */
199
200 #ifdef PPS_SYNC
201 /*
202 * The following variables are used only if the kernel PPS discipline
203 * code is configured (PPS_SYNC). The scale factors are defined in the
204 * timex.h header file.
205 *
206 * pps_time contains the time at each calibration interval, as read by
207 * microtime(). pps_count counts the seconds of the calibration
208 * interval, the duration of which is nominally pps_shift in powers of
209 * two.
210 *
211 * pps_offset is the time offset produced by the time median filter
212 * pps_tf[], while pps_jitter is the dispersion (jitter) measured by
213 * this filter.
214 *
215 * pps_freq is the frequency offset produced by the frequency median
216 * filter pps_ff[], while pps_stabil is the dispersion (wander) measured
217 * by this filter.
218 *
219 * pps_usec is latched from a high resolution counter or external clock
220 * at pps_time. Here we want the hardware counter contents only, not the
221 * contents plus the time_tv.usec as usual.
222 *
223 * pps_valid counts the number of seconds since the last PPS update. It
224 * is used as a watchdog timer to disable the PPS discipline should the
225 * PPS signal be lost.
226 *
227 * pps_glitch counts the number of seconds since the beginning of an
228 * offset burst more than tick/2 from current nominal offset. It is used
229 * mainly to suppress error bursts due to priority conflicts between the
230 * PPS interrupt and timer interrupt.
231 *
232 * pps_intcnt counts the calibration intervals for use in the interval-
233 * adaptation algorithm. It's just too complicated for words.
234 */
235 struct timeval pps_time; /* kernel time at last interval */
236 long pps_tf[] = {0, 0, 0}; /* pps time offset median filter (us) */
237 long pps_offset = 0; /* pps time offset (us) */
238 long pps_jitter = MAXTIME; /* time dispersion (jitter) (us) */
239 long pps_ff[] = {0, 0, 0}; /* pps frequency offset median filter */
240 long pps_freq = 0; /* frequency offset (scaled ppm) */
241 long pps_stabil = MAXFREQ; /* frequency dispersion (scaled ppm) */
242 long pps_usec = 0; /* microsec counter at last interval */
243 long pps_valid = PPS_VALID; /* pps signal watchdog counter */
244 int pps_glitch = 0; /* pps signal glitch counter */
245 int pps_count = 0; /* calibration interval counter (s) */
246 int pps_shift = PPS_SHIFT; /* interval duration (s) (shift) */
247 int pps_intcnt = 0; /* intervals at current duration */
248
249 /*
250 * PPS signal quality monitors
251 *
252 * pps_jitcnt counts the seconds that have been discarded because the
253 * jitter measured by the time median filter exceeds the limit MAXTIME
254 * (100 us).
255 *
256 * pps_calcnt counts the frequency calibration intervals, which are
257 * variable from 4 s to 256 s.
258 *
259 * pps_errcnt counts the calibration intervals which have been discarded
260 * because the wander exceeds the limit MAXFREQ (100 ppm) or where the
261 * calibration interval jitter exceeds two ticks.
262 *
263 * pps_stbcnt counts the calibration intervals that have been discarded
264 * because the frequency wander exceeds the limit MAXFREQ / 4 (25 us).
265 */
266 long pps_jitcnt = 0; /* jitter limit exceeded */
267 long pps_calcnt = 0; /* calibration intervals */
268 long pps_errcnt = 0; /* calibration errors */
269 long pps_stbcnt = 0; /* stability limit exceeded */
270 #endif /* PPS_SYNC */
271
272 #ifdef EXT_CLOCK
273 /*
274 * External clock definitions
275 *
276 * The following definitions and declarations are used only if an
277 * external clock is configured on the system.
278 */
279 #define CLOCK_INTERVAL 30 /* CPU clock update interval (s) */
280
281 /*
282 * The clock_count variable is set to CLOCK_INTERVAL at each PPS
283 * interrupt and decremented once each second.
284 */
285 int clock_count = 0; /* CPU clock counter */
286
287 #ifdef HIGHBALL
288 /*
289 * The clock_offset and clock_cpu variables are used by the HIGHBALL
290 * interface. The clock_offset variable defines the offset between
291 * system time and the HIGBALL counters. The clock_cpu variable contains
292 * the offset between the system clock and the HIGHBALL clock for use in
293 * disciplining the kernel time variable.
294 */
295 extern struct timeval clock_offset; /* Highball clock offset */
296 long clock_cpu = 0; /* CPU clock adjust */
297 #endif /* HIGHBALL */
298 #endif /* EXT_CLOCK */
299 #endif /* NTP */
300
301
302 /*
303 * Bump a timeval by a small number of usec's.
304 */
305 #define BUMPTIME(t, usec) { \
306 volatile struct timeval *tp = (t); \
307 long us; \
308 \
309 tp->tv_usec = us = tp->tv_usec + (usec); \
310 if (us >= 1000000) { \
311 tp->tv_usec = us - 1000000; \
312 tp->tv_sec++; \
313 } \
314 }
315
316 int stathz;
317 int profhz;
318 int profprocs;
319 int softclock_running; /* 1 => softclock() is running */
320 static int psdiv; /* prof => stat divider */
321 int psratio; /* ratio: prof / stat */
322 int tickfix, tickfixinterval; /* used if tick not really integral */
323 #ifndef NTP
324 static int tickfixcnt; /* accumulated fractional error */
325 #else
326 int fixtick; /* used by NTP for same */
327 int shifthz;
328 #endif
329
330 /*
331 * We might want ldd to load the both words from time at once.
332 * To succeed we need to be quadword aligned.
333 * The sparc already does that, and that it has worked so far is a fluke.
334 */
335 volatile struct timeval time __attribute__((__aligned__(__alignof__(quad_t))));
336 volatile struct timeval mono_time;
337
338 /*
339 * The callout mechanism is based on the work of Adam M. Costello and
340 * George Varghese, published in a technical report entitled "Redesigning
341 * the BSD Callout and Timer Facilities", and Justin Gibbs's subsequent
342 * integration into FreeBSD, modified for NetBSD by Jason R. Thorpe.
343 *
344 * The original work on the data structures used in this implementation
345 * was published by G. Varghese and A. Lauck in the paper "Hashed and
346 * Hierarchical Timing Wheels: Data Structures for the Efficient
347 * Implementation of a Timer Facility" in the Proceedings of the 11th
348 * ACM Annual Symposium on Operating System Principles, Austin, Texas,
349 * November 1987.
350 */
351 struct callout_queue *callwheel;
352 int callwheelsize, callwheelbits, callwheelmask;
353
354 static struct callout *nextsoftcheck; /* next callout to be checked */
355
356 #ifdef CALLWHEEL_STATS
357 int callwheel_collisions; /* number of hash collisions */
358 int callwheel_maxlength; /* length of the longest hash chain */
359 int *callwheel_sizes; /* per-bucket length count */
360 u_int64_t callwheel_count; /* # callouts currently */
361 u_int64_t callwheel_established; /* # callouts established */
362 u_int64_t callwheel_fired; /* # callouts that fired */
363 u_int64_t callwheel_disestablished; /* # callouts disestablished */
364 u_int64_t callwheel_changed; /* # callouts changed */
365 u_int64_t callwheel_softclocks; /* # times softclock() called */
366 u_int64_t callwheel_softchecks; /* # checks per softclock() */
367 u_int64_t callwheel_softempty; /* # empty buckets seen */
368 #endif /* CALLWHEEL_STATS */
369
370 /*
371 * This value indicates the number of consecutive callouts that
372 * will be checked before we allow interrupts to have a chance
373 * again.
374 */
375 #ifndef MAX_SOFTCLOCK_STEPS
376 #define MAX_SOFTCLOCK_STEPS 100
377 #endif
378
379 struct simplelock callwheel_slock;
380
381 #define CALLWHEEL_LOCK(s) \
382 do { \
383 s = splclock(); \
384 simple_lock(&callwheel_slock); \
385 } while (0)
386
387 #define CALLWHEEL_UNLOCK(s) \
388 do { \
389 simple_unlock(&callwheel_slock); \
390 splx(s); \
391 } while (0)
392
393 static void callout_stop_locked(struct callout *);
394
395 /*
396 * These are both protected by callwheel_lock.
397 * XXX SHOULD BE STATIC!!
398 */
399 u_int64_t hardclock_ticks, softclock_ticks;
400
401 #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS
402 void softclock(void *);
403 void *softclock_si;
404 #endif
405
406 /*
407 * Initialize clock frequencies and start both clocks running.
408 */
409 void
410 initclocks(void)
411 {
412 int i;
413
414 #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS
415 softclock_si = softintr_establish(IPL_SOFTCLOCK, softclock, NULL);
416 if (softclock_si == NULL)
417 panic("initclocks: unable to register softclock intr");
418 #endif
419
420 /*
421 * Set divisors to 1 (normal case) and let the machine-specific
422 * code do its bit.
423 */
424 psdiv = 1;
425 cpu_initclocks();
426
427 /*
428 * Compute profhz/stathz/rrticks, and fix profhz if needed.
429 */
430 i = stathz ? stathz : hz;
431 if (profhz == 0)
432 profhz = i;
433 psratio = profhz / i;
434 rrticks = hz / 10;
435
436 #ifdef NTP
437 switch (hz) {
438 case 1:
439 shifthz = SHIFT_SCALE - 0;
440 break;
441 case 2:
442 shifthz = SHIFT_SCALE - 1;
443 break;
444 case 4:
445 shifthz = SHIFT_SCALE - 2;
446 break;
447 case 8:
448 shifthz = SHIFT_SCALE - 3;
449 break;
450 case 16:
451 shifthz = SHIFT_SCALE - 4;
452 break;
453 case 32:
454 shifthz = SHIFT_SCALE - 5;
455 break;
456 case 60:
457 case 64:
458 shifthz = SHIFT_SCALE - 6;
459 break;
460 case 96:
461 case 100:
462 case 128:
463 shifthz = SHIFT_SCALE - 7;
464 break;
465 case 256:
466 shifthz = SHIFT_SCALE - 8;
467 break;
468 case 512:
469 shifthz = SHIFT_SCALE - 9;
470 break;
471 case 1000:
472 case 1024:
473 shifthz = SHIFT_SCALE - 10;
474 break;
475 case 1200:
476 case 2048:
477 shifthz = SHIFT_SCALE - 11;
478 break;
479 case 4096:
480 shifthz = SHIFT_SCALE - 12;
481 break;
482 case 8192:
483 shifthz = SHIFT_SCALE - 13;
484 break;
485 case 16384:
486 shifthz = SHIFT_SCALE - 14;
487 break;
488 case 32768:
489 shifthz = SHIFT_SCALE - 15;
490 break;
491 case 65536:
492 shifthz = SHIFT_SCALE - 16;
493 break;
494 default:
495 panic("weird hz");
496 }
497 if (fixtick == 0) {
498 /*
499 * Give MD code a chance to set this to a better
500 * value; but, if it doesn't, we should.
501 */
502 fixtick = (1000000 - (hz*tick));
503 }
504 #endif
505 }
506
507 /*
508 * The real-time timer, interrupting hz times per second.
509 */
510 void
511 hardclock(struct clockframe *frame)
512 {
513 struct lwp *l;
514 struct proc *p;
515 int delta;
516 extern int tickdelta;
517 extern long timedelta;
518 struct cpu_info *ci = curcpu();
519 #ifdef NTP
520 int time_update;
521 int ltemp;
522 #endif
523
524 l = curproc;
525 if (l) {
526 struct pstats *pstats;
527 p = l->l_proc;
528 /*
529 * Run current process's virtual and profile time, as needed.
530 */
531 pstats = p->p_stats;
532 if (CLKF_USERMODE(frame) &&
533 timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
534 itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
535 psignal(p, SIGVTALRM);
536 if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
537 itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
538 psignal(p, SIGPROF);
539 }
540
541 /*
542 * If no separate statistics clock is available, run it from here.
543 */
544 if (stathz == 0)
545 statclock(frame);
546 if ((--ci->ci_schedstate.spc_rrticks) <= 0)
547 roundrobin(ci);
548
549 #if defined(MULTIPROCESSOR)
550 /*
551 * If we are not the primary CPU, we're not allowed to do
552 * any more work.
553 */
554 if (CPU_IS_PRIMARY(ci) == 0)
555 return;
556 #endif
557
558 /*
559 * Increment the time-of-day. The increment is normally just
560 * ``tick''. If the machine is one which has a clock frequency
561 * such that ``hz'' would not divide the second evenly into
562 * milliseconds, a periodic adjustment must be applied. Finally,
563 * if we are still adjusting the time (see adjtime()),
564 * ``tickdelta'' may also be added in.
565 */
566 delta = tick;
567
568 #ifndef NTP
569 if (tickfix) {
570 tickfixcnt += tickfix;
571 if (tickfixcnt >= tickfixinterval) {
572 delta++;
573 tickfixcnt -= tickfixinterval;
574 }
575 }
576 #endif /* !NTP */
577 /* Imprecise 4bsd adjtime() handling */
578 if (timedelta != 0) {
579 delta += tickdelta;
580 timedelta -= tickdelta;
581 }
582
583 #ifdef notyet
584 microset();
585 #endif
586
587 #ifndef NTP
588 BUMPTIME(&time, delta); /* XXX Now done using NTP code below */
589 #endif
590 BUMPTIME(&mono_time, delta);
591
592 #ifdef NTP
593 time_update = delta;
594
595 /*
596 * Compute the phase adjustment. If the low-order bits
597 * (time_phase) of the update overflow, bump the high-order bits
598 * (time_update).
599 */
600 time_phase += time_adj;
601 if (time_phase <= -FINEUSEC) {
602 ltemp = -time_phase >> SHIFT_SCALE;
603 time_phase += ltemp << SHIFT_SCALE;
604 time_update -= ltemp;
605 } else if (time_phase >= FINEUSEC) {
606 ltemp = time_phase >> SHIFT_SCALE;
607 time_phase -= ltemp << SHIFT_SCALE;
608 time_update += ltemp;
609 }
610
611 #ifdef HIGHBALL
612 /*
613 * If the HIGHBALL board is installed, we need to adjust the
614 * external clock offset in order to close the hardware feedback
615 * loop. This will adjust the external clock phase and frequency
616 * in small amounts. The additional phase noise and frequency
617 * wander this causes should be minimal. We also need to
618 * discipline the kernel time variable, since the PLL is used to
619 * discipline the external clock. If the Highball board is not
620 * present, we discipline kernel time with the PLL as usual. We
621 * assume that the external clock phase adjustment (time_update)
622 * and kernel phase adjustment (clock_cpu) are less than the
623 * value of tick.
624 */
625 clock_offset.tv_usec += time_update;
626 if (clock_offset.tv_usec >= 1000000) {
627 clock_offset.tv_sec++;
628 clock_offset.tv_usec -= 1000000;
629 }
630 if (clock_offset.tv_usec < 0) {
631 clock_offset.tv_sec--;
632 clock_offset.tv_usec += 1000000;
633 }
634 time.tv_usec += clock_cpu;
635 clock_cpu = 0;
636 #else
637 time.tv_usec += time_update;
638 #endif /* HIGHBALL */
639
640 /*
641 * On rollover of the second the phase adjustment to be used for
642 * the next second is calculated. Also, the maximum error is
643 * increased by the tolerance. If the PPS frequency discipline
644 * code is present, the phase is increased to compensate for the
645 * CPU clock oscillator frequency error.
646 *
647 * On a 32-bit machine and given parameters in the timex.h
648 * header file, the maximum phase adjustment is +-512 ms and
649 * maximum frequency offset is a tad less than) +-512 ppm. On a
650 * 64-bit machine, you shouldn't need to ask.
651 */
652 if (time.tv_usec >= 1000000) {
653 time.tv_usec -= 1000000;
654 time.tv_sec++;
655 time_maxerror += time_tolerance >> SHIFT_USEC;
656
657 /*
658 * Leap second processing. If in leap-insert state at
659 * the end of the day, the system clock is set back one
660 * second; if in leap-delete state, the system clock is
661 * set ahead one second. The microtime() routine or
662 * external clock driver will insure that reported time
663 * is always monotonic. The ugly divides should be
664 * replaced.
665 */
666 switch (time_state) {
667 case TIME_OK:
668 if (time_status & STA_INS)
669 time_state = TIME_INS;
670 else if (time_status & STA_DEL)
671 time_state = TIME_DEL;
672 break;
673
674 case TIME_INS:
675 if (time.tv_sec % 86400 == 0) {
676 time.tv_sec--;
677 time_state = TIME_OOP;
678 }
679 break;
680
681 case TIME_DEL:
682 if ((time.tv_sec + 1) % 86400 == 0) {
683 time.tv_sec++;
684 time_state = TIME_WAIT;
685 }
686 break;
687
688 case TIME_OOP:
689 time_state = TIME_WAIT;
690 break;
691
692 case TIME_WAIT:
693 if (!(time_status & (STA_INS | STA_DEL)))
694 time_state = TIME_OK;
695 break;
696 }
697
698 /*
699 * Compute the phase adjustment for the next second. In
700 * PLL mode, the offset is reduced by a fixed factor
701 * times the time constant. In FLL mode the offset is
702 * used directly. In either mode, the maximum phase
703 * adjustment for each second is clamped so as to spread
704 * the adjustment over not more than the number of
705 * seconds between updates.
706 */
707 if (time_offset < 0) {
708 ltemp = -time_offset;
709 if (!(time_status & STA_FLL))
710 ltemp >>= SHIFT_KG + time_constant;
711 if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
712 ltemp = (MAXPHASE / MINSEC) <<
713 SHIFT_UPDATE;
714 time_offset += ltemp;
715 time_adj = -ltemp << (shifthz - SHIFT_UPDATE);
716 } else if (time_offset > 0) {
717 ltemp = time_offset;
718 if (!(time_status & STA_FLL))
719 ltemp >>= SHIFT_KG + time_constant;
720 if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
721 ltemp = (MAXPHASE / MINSEC) <<
722 SHIFT_UPDATE;
723 time_offset -= ltemp;
724 time_adj = ltemp << (shifthz - SHIFT_UPDATE);
725 } else
726 time_adj = 0;
727
728 /*
729 * Compute the frequency estimate and additional phase
730 * adjustment due to frequency error for the next
731 * second. When the PPS signal is engaged, gnaw on the
732 * watchdog counter and update the frequency computed by
733 * the pll and the PPS signal.
734 */
735 #ifdef PPS_SYNC
736 pps_valid++;
737 if (pps_valid == PPS_VALID) {
738 pps_jitter = MAXTIME;
739 pps_stabil = MAXFREQ;
740 time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
741 STA_PPSWANDER | STA_PPSERROR);
742 }
743 ltemp = time_freq + pps_freq;
744 #else
745 ltemp = time_freq;
746 #endif /* PPS_SYNC */
747
748 if (ltemp < 0)
749 time_adj -= -ltemp >> (SHIFT_USEC - shifthz);
750 else
751 time_adj += ltemp >> (SHIFT_USEC - shifthz);
752 time_adj += (long)fixtick << shifthz;
753
754 /*
755 * When the CPU clock oscillator frequency is not a
756 * power of 2 in Hz, shifthz is only an approximate
757 * scale factor.
758 *
759 * To determine the adjustment, you can do the following:
760 * bc -q
761 * scale=24
762 * obase=2
763 * idealhz/realhz
764 * where `idealhz' is the next higher power of 2, and `realhz'
765 * is the actual value. You may need to factor this result
766 * into a sequence of 2 multipliers to get better precision.
767 *
768 * Likewise, the error can be calculated with (e.g. for 100Hz):
769 * bc -q
770 * scale=24
771 * ((1+2^-2+2^-5)*(1-2^-10)*realhz-idealhz)/idealhz
772 * (and then multiply by 1000000 to get ppm).
773 */
774 switch (hz) {
775 case 60:
776 /* A factor of 1.000100010001 gives about 15ppm
777 error. */
778 if (time_adj < 0) {
779 time_adj -= (-time_adj >> 4);
780 time_adj -= (-time_adj >> 8);
781 } else {
782 time_adj += (time_adj >> 4);
783 time_adj += (time_adj >> 8);
784 }
785 break;
786
787 case 96:
788 /* A factor of 1.0101010101 gives about 244ppm error. */
789 if (time_adj < 0) {
790 time_adj -= (-time_adj >> 2);
791 time_adj -= (-time_adj >> 4) + (-time_adj >> 8);
792 } else {
793 time_adj += (time_adj >> 2);
794 time_adj += (time_adj >> 4) + (time_adj >> 8);
795 }
796 break;
797
798 case 100:
799 /* A factor of 1.010001111010111 gives about 1ppm
800 error. */
801 if (time_adj < 0) {
802 time_adj -= (-time_adj >> 2) + (-time_adj >> 5);
803 time_adj += (-time_adj >> 10);
804 } else {
805 time_adj += (time_adj >> 2) + (time_adj >> 5);
806 time_adj -= (time_adj >> 10);
807 }
808 break;
809
810 case 1000:
811 /* A factor of 1.000001100010100001 gives about 50ppm
812 error. */
813 if (time_adj < 0) {
814 time_adj -= (-time_adj >> 6) + (-time_adj >> 11);
815 time_adj -= (-time_adj >> 7);
816 } else {
817 time_adj += (time_adj >> 6) + (time_adj >> 11);
818 time_adj += (time_adj >> 7);
819 }
820 break;
821
822 case 1200:
823 /* A factor of 1.1011010011100001 gives about 64ppm
824 error. */
825 if (time_adj < 0) {
826 time_adj -= (-time_adj >> 1) + (-time_adj >> 6);
827 time_adj -= (-time_adj >> 3) + (-time_adj >> 10);
828 } else {
829 time_adj += (time_adj >> 1) + (time_adj >> 6);
830 time_adj += (time_adj >> 3) + (time_adj >> 10);
831 }
832 break;
833 }
834
835 #ifdef EXT_CLOCK
836 /*
837 * If an external clock is present, it is necessary to
838 * discipline the kernel time variable anyway, since not
839 * all system components use the microtime() interface.
840 * Here, the time offset between the external clock and
841 * kernel time variable is computed every so often.
842 */
843 clock_count++;
844 if (clock_count > CLOCK_INTERVAL) {
845 clock_count = 0;
846 microtime(&clock_ext);
847 delta.tv_sec = clock_ext.tv_sec - time.tv_sec;
848 delta.tv_usec = clock_ext.tv_usec -
849 time.tv_usec;
850 if (delta.tv_usec < 0)
851 delta.tv_sec--;
852 if (delta.tv_usec >= 500000) {
853 delta.tv_usec -= 1000000;
854 delta.tv_sec++;
855 }
856 if (delta.tv_usec < -500000) {
857 delta.tv_usec += 1000000;
858 delta.tv_sec--;
859 }
860 if (delta.tv_sec > 0 || (delta.tv_sec == 0 &&
861 delta.tv_usec > MAXPHASE) ||
862 delta.tv_sec < -1 || (delta.tv_sec == -1 &&
863 delta.tv_usec < -MAXPHASE)) {
864 time = clock_ext;
865 delta.tv_sec = 0;
866 delta.tv_usec = 0;
867 }
868 #ifdef HIGHBALL
869 clock_cpu = delta.tv_usec;
870 #else /* HIGHBALL */
871 hardupdate(delta.tv_usec);
872 #endif /* HIGHBALL */
873 }
874 #endif /* EXT_CLOCK */
875 }
876
877 #endif /* NTP */
878
879 /*
880 * Process callouts at a very low cpu priority, so we don't keep the
881 * relatively high clock interrupt priority any longer than necessary.
882 */
883 simple_lock(&callwheel_slock); /* already at splclock() */
884 hardclock_ticks++;
885 if (TAILQ_FIRST(&callwheel[hardclock_ticks & callwheelmask]) != NULL) {
886 simple_unlock(&callwheel_slock);
887 if (CLKF_BASEPRI(frame)) {
888 /*
889 * Save the overhead of a software interrupt;
890 * it will happen as soon as we return, so do
891 * it now.
892 *
893 * NOTE: If we're at ``base priority'', softclock()
894 * was not already running.
895 */
896 spllowersoftclock();
897 KERNEL_LOCK(LK_CANRECURSE|LK_EXCLUSIVE);
898 softclock(NULL);
899 KERNEL_UNLOCK();
900 } else {
901 #ifdef __HAVE_GENERIC_SOFT_INTERRUPTS
902 softintr_schedule(softclock_si);
903 #else
904 setsoftclock();
905 #endif
906 }
907 return;
908 } else if (softclock_running == 0 &&
909 (softclock_ticks + 1) == hardclock_ticks) {
910 softclock_ticks++;
911 }
912 simple_unlock(&callwheel_slock);
913 }
914
915 /*
916 * Software (low priority) clock interrupt.
917 * Run periodic events from timeout queue.
918 */
919 /*ARGSUSED*/
920 void
921 softclock(void *v)
922 {
923 struct callout_queue *bucket;
924 struct callout *c;
925 void (*func)(void *);
926 void *arg;
927 int s, idx;
928 int steps = 0;
929
930 CALLWHEEL_LOCK(s);
931
932 softclock_running = 1;
933
934 #ifdef CALLWHEEL_STATS
935 callwheel_softclocks++;
936 #endif
937
938 while (softclock_ticks != hardclock_ticks) {
939 softclock_ticks++;
940 idx = (int)(softclock_ticks & callwheelmask);
941 bucket = &callwheel[idx];
942 c = TAILQ_FIRST(bucket);
943 #ifdef CALLWHEEL_STATS
944 if (c == NULL)
945 callwheel_softempty++;
946 #endif
947 while (c != NULL) {
948 #ifdef CALLWHEEL_STATS
949 callwheel_softchecks++;
950 #endif
951 if (c->c_time != softclock_ticks) {
952 c = TAILQ_NEXT(c, c_link);
953 if (++steps >= MAX_SOFTCLOCK_STEPS) {
954 nextsoftcheck = c;
955 /* Give interrupts a chance. */
956 CALLWHEEL_UNLOCK(s);
957 CALLWHEEL_LOCK(s);
958 c = nextsoftcheck;
959 steps = 0;
960 }
961 } else {
962 nextsoftcheck = TAILQ_NEXT(c, c_link);
963 TAILQ_REMOVE(bucket, c, c_link);
964 #ifdef CALLWHEEL_STATS
965 callwheel_sizes[idx]--;
966 callwheel_fired++;
967 callwheel_count--;
968 #endif
969 func = c->c_func;
970 arg = c->c_arg;
971 c->c_func = NULL;
972 c->c_flags &= ~CALLOUT_PENDING;
973 CALLWHEEL_UNLOCK(s);
974 (*func)(arg);
975 CALLWHEEL_LOCK(s);
976 steps = 0;
977 c = nextsoftcheck;
978 }
979 }
980 }
981 nextsoftcheck = NULL;
982 softclock_running = 0;
983 CALLWHEEL_UNLOCK(s);
984 }
985
986 /*
987 * callout_setsize:
988 *
989 * Determine how many callwheels are necessary and
990 * set hash mask. Called from allocsys().
991 */
992 void
993 callout_setsize(void)
994 {
995
996 for (callwheelsize = 1; callwheelsize < ncallout; callwheelsize <<= 1)
997 /* loop */ ;
998 callwheelmask = callwheelsize - 1;
999 }
1000
1001 /*
1002 * callout_startup:
1003 *
1004 * Initialize the callwheel buckets.
1005 */
1006 void
1007 callout_startup(void)
1008 {
1009 int i;
1010
1011 for (i = 0; i < callwheelsize; i++)
1012 TAILQ_INIT(&callwheel[i]);
1013
1014 simple_lock_init(&callwheel_slock);
1015 }
1016
1017 /*
1018 * callout_init:
1019 *
1020 * Initialize a callout structure so that it can be used
1021 * by callout_reset() and callout_stop().
1022 */
1023 void
1024 callout_init(struct callout *c)
1025 {
1026
1027 memset(c, 0, sizeof(*c));
1028 }
1029
1030 /*
1031 * callout_reset:
1032 *
1033 * Establish or change a timeout.
1034 */
1035 void
1036 callout_reset(struct callout *c, int ticks, void (*func)(void *), void *arg)
1037 {
1038 struct callout_queue *bucket;
1039 int s;
1040
1041 if (ticks <= 0)
1042 ticks = 1;
1043
1044 CALLWHEEL_LOCK(s);
1045
1046 /*
1047 * If this callout's timer is already running, cancel it
1048 * before we modify it.
1049 */
1050 if (c->c_flags & CALLOUT_PENDING) {
1051 callout_stop_locked(c); /* Already locked */
1052 #ifdef CALLWHEEL_STATS
1053 callwheel_changed++;
1054 #endif
1055 }
1056
1057 c->c_arg = arg;
1058 c->c_func = func;
1059 c->c_flags = CALLOUT_ACTIVE | CALLOUT_PENDING;
1060 c->c_time = hardclock_ticks + ticks;
1061
1062 bucket = &callwheel[c->c_time & callwheelmask];
1063
1064 #ifdef CALLWHEEL_STATS
1065 if (TAILQ_FIRST(bucket) != NULL)
1066 callwheel_collisions++;
1067 #endif
1068
1069 TAILQ_INSERT_TAIL(bucket, c, c_link);
1070
1071 #ifdef CALLWHEEL_STATS
1072 callwheel_count++;
1073 callwheel_established++;
1074 if (++callwheel_sizes[c->c_time & callwheelmask] > callwheel_maxlength)
1075 callwheel_maxlength =
1076 callwheel_sizes[c->c_time & callwheelmask];
1077 #endif
1078
1079 CALLWHEEL_UNLOCK(s);
1080 }
1081
1082 /*
1083 * callout_stop_locked:
1084 *
1085 * Disestablish a timeout. Callwheel is locked.
1086 */
1087 static void
1088 callout_stop_locked(struct callout *c)
1089 {
1090
1091 /*
1092 * Don't attempt to delete a callout that's not on the queue.
1093 */
1094 if ((c->c_flags & CALLOUT_PENDING) == 0) {
1095 c->c_flags &= ~CALLOUT_ACTIVE;
1096 return;
1097 }
1098
1099 c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
1100
1101 if (nextsoftcheck == c)
1102 nextsoftcheck = TAILQ_NEXT(c, c_link);
1103
1104 TAILQ_REMOVE(&callwheel[c->c_time & callwheelmask], c, c_link);
1105 #ifdef CALLWHEEL_STATS
1106 callwheel_count--;
1107 callwheel_disestablished++;
1108 callwheel_sizes[c->c_time & callwheelmask]--;
1109 #endif
1110
1111 c->c_func = NULL;
1112 }
1113
1114 /*
1115 * callout_stop:
1116 *
1117 * Disestablish a timeout. Callwheel is unlocked. This is
1118 * the standard entry point.
1119 */
1120 void
1121 callout_stop(struct callout *c)
1122 {
1123 int s;
1124
1125 CALLWHEEL_LOCK(s);
1126 callout_stop_locked(c);
1127 CALLWHEEL_UNLOCK(s);
1128 }
1129
1130 #ifdef CALLWHEEL_STATS
1131 /*
1132 * callout_showstats:
1133 *
1134 * Display callout statistics. Call it from DDB.
1135 */
1136 void
1137 callout_showstats(void)
1138 {
1139 u_int64_t curticks;
1140 int s;
1141
1142 s = splclock();
1143 curticks = softclock_ticks;
1144 splx(s);
1145
1146 printf("Callwheel statistics:\n");
1147 printf("\tCallouts currently queued: %llu\n", callwheel_count);
1148 printf("\tCallouts established: %llu\n", callwheel_established);
1149 printf("\tCallouts disestablished: %llu\n", callwheel_disestablished);
1150 if (callwheel_changed != 0)
1151 printf("\t\tOf those, %llu were changes\n", callwheel_changed);
1152 printf("\tCallouts that fired: %llu\n", callwheel_fired);
1153 printf("\tNumber of buckets: %d\n", callwheelsize);
1154 printf("\tNumber of hash collisions: %d\n", callwheel_collisions);
1155 printf("\tMaximum hash chain length: %d\n", callwheel_maxlength);
1156 printf("\tSoftclocks: %llu, Softchecks: %llu\n",
1157 callwheel_softclocks, callwheel_softchecks);
1158 printf("\t\tEmpty buckets seen: %llu\n", callwheel_softempty);
1159 }
1160 #endif
1161
1162 /*
1163 * Compute number of hz until specified time. Used to compute second
1164 * argument to callout_reset() from an absolute time.
1165 */
1166 int
1167 hzto(struct timeval *tv)
1168 {
1169 unsigned long ticks;
1170 long sec, usec;
1171 int s;
1172
1173 /*
1174 * If the number of usecs in the whole seconds part of the time
1175 * difference fits in a long, then the total number of usecs will
1176 * fit in an unsigned long. Compute the total and convert it to
1177 * ticks, rounding up and adding 1 to allow for the current tick
1178 * to expire. Rounding also depends on unsigned long arithmetic
1179 * to avoid overflow.
1180 *
1181 * Otherwise, if the number of ticks in the whole seconds part of
1182 * the time difference fits in a long, then convert the parts to
1183 * ticks separately and add, using similar rounding methods and
1184 * overflow avoidance. This method would work in the previous
1185 * case, but it is slightly slower and assume that hz is integral.
1186 *
1187 * Otherwise, round the time difference down to the maximum
1188 * representable value.
1189 *
1190 * If ints are 32-bit, then the maximum value for any timeout in
1191 * 10ms ticks is 248 days.
1192 */
1193 s = splclock();
1194 sec = tv->tv_sec - time.tv_sec;
1195 usec = tv->tv_usec - time.tv_usec;
1196 splx(s);
1197
1198 if (usec < 0) {
1199 sec--;
1200 usec += 1000000;
1201 }
1202
1203 if (sec < 0 || (sec == 0 && usec <= 0)) {
1204 /*
1205 * Would expire now or in the past. Return 0 ticks.
1206 * This is different from the legacy hzto() interface,
1207 * and callers need to check for it.
1208 */
1209 ticks = 0;
1210 } else if (sec <= (LONG_MAX / 1000000))
1211 ticks = (((sec * 1000000) + (unsigned long)usec + (tick - 1))
1212 / tick) + 1;
1213 else if (sec <= (LONG_MAX / hz))
1214 ticks = (sec * hz) +
1215 (((unsigned long)usec + (tick - 1)) / tick) + 1;
1216 else
1217 ticks = LONG_MAX;
1218
1219 if (ticks > INT_MAX)
1220 ticks = INT_MAX;
1221
1222 return ((int)ticks);
1223 }
1224
1225 /*
1226 * Start profiling on a process.
1227 *
1228 * Kernel profiling passes proc0 which never exits and hence
1229 * keeps the profile clock running constantly.
1230 */
1231 void
1232 startprofclock(struct proc *p)
1233 {
1234
1235 if ((p->p_flag & P_PROFIL) == 0) {
1236 p->p_flag |= P_PROFIL;
1237 if (++profprocs == 1 && stathz != 0)
1238 psdiv = psratio;
1239 }
1240 }
1241
1242 /*
1243 * Stop profiling on a process.
1244 */
1245 void
1246 stopprofclock(struct proc *p)
1247 {
1248
1249 if (p->p_flag & P_PROFIL) {
1250 p->p_flag &= ~P_PROFIL;
1251 if (--profprocs == 0 && stathz != 0)
1252 psdiv = 1;
1253 }
1254 }
1255
1256 /*
1257 * Statistics clock. Grab profile sample, and if divider reaches 0,
1258 * do process and kernel statistics.
1259 */
1260 void
1261 statclock(struct clockframe *frame)
1262 {
1263 #ifdef GPROF
1264 struct gmonparam *g;
1265 intptr_t i;
1266 #endif
1267 struct cpu_info *ci = curcpu();
1268 struct schedstate_percpu *spc = &ci->ci_schedstate;
1269 struct lwp *l;
1270 struct proc *p;
1271
1272 /*
1273 * Notice changes in divisor frequency, and adjust clock
1274 * frequency accordingly.
1275 */
1276 if (spc->spc_psdiv != psdiv) {
1277 spc->spc_psdiv = psdiv;
1278 spc->spc_pscnt = psdiv;
1279 if (psdiv == 1) {
1280 setstatclockrate(stathz);
1281 } else {
1282 setstatclockrate(profhz);
1283 }
1284 }
1285 l = curproc;
1286 p = (l ? l->l_proc : 0);
1287 if (CLKF_USERMODE(frame)) {
1288 if (p->p_flag & P_PROFIL)
1289 addupc_intr(p, CLKF_PC(frame));
1290 if (--spc->spc_pscnt > 0)
1291 return;
1292 /*
1293 * Came from user mode; CPU was in user state.
1294 * If this process is being profiled record the tick.
1295 */
1296 p->p_uticks++;
1297 if (p->p_nice > NZERO)
1298 spc->spc_cp_time[CP_NICE]++;
1299 else
1300 spc->spc_cp_time[CP_USER]++;
1301 } else {
1302 #ifdef GPROF
1303 /*
1304 * Kernel statistics are just like addupc_intr, only easier.
1305 */
1306 g = &_gmonparam;
1307 if (g->state == GMON_PROF_ON) {
1308 i = CLKF_PC(frame) - g->lowpc;
1309 if (i < g->textsize) {
1310 i /= HISTFRACTION * sizeof(*g->kcount);
1311 g->kcount[i]++;
1312 }
1313 }
1314 #endif
1315 #ifdef LWP_PC
1316 if (p && p->p_flag & P_PROFIL)
1317 addupc_intr(p, LWP_PC(l));
1318 #endif
1319 if (--spc->spc_pscnt > 0)
1320 return;
1321 /*
1322 * Came from kernel mode, so we were:
1323 * - handling an interrupt,
1324 * - doing syscall or trap work on behalf of the current
1325 * user process, or
1326 * - spinning in the idle loop.
1327 * Whichever it is, charge the time as appropriate.
1328 * Note that we charge interrupts to the current process,
1329 * regardless of whether they are ``for'' that process,
1330 * so that we know how much of its real time was spent
1331 * in ``non-process'' (i.e., interrupt) work.
1332 */
1333 if (CLKF_INTR(frame)) {
1334 if (p != NULL)
1335 p->p_iticks++;
1336 spc->spc_cp_time[CP_INTR]++;
1337 } else if (p != NULL) {
1338 p->p_sticks++;
1339 spc->spc_cp_time[CP_SYS]++;
1340 } else
1341 spc->spc_cp_time[CP_IDLE]++;
1342 }
1343 spc->spc_pscnt = psdiv;
1344
1345 if (l != NULL) {
1346 ++p->p_cpticks;
1347 /*
1348 * If no separate schedclock is provided, call it here
1349 * at ~~12-25 Hz, ~~16 Hz is best
1350 */
1351 if (schedhz == 0)
1352 if ((++ci->ci_schedstate.spc_schedticks & 3) == 0)
1353 schedclock(l);
1354 }
1355 }
1356
1357
1358 #ifdef NTP /* NTP phase-locked loop in kernel */
1359
1360 /*
1361 * hardupdate() - local clock update
1362 *
1363 * This routine is called by ntp_adjtime() to update the local clock
1364 * phase and frequency. The implementation is of an adaptive-parameter,
1365 * hybrid phase/frequency-lock loop (PLL/FLL). The routine computes new
1366 * time and frequency offset estimates for each call. If the kernel PPS
1367 * discipline code is configured (PPS_SYNC), the PPS signal itself
1368 * determines the new time offset, instead of the calling argument.
1369 * Presumably, calls to ntp_adjtime() occur only when the caller
1370 * believes the local clock is valid within some bound (+-128 ms with
1371 * NTP). If the caller's time is far different than the PPS time, an
1372 * argument will ensue, and it's not clear who will lose.
1373 *
1374 * For uncompensated quartz crystal oscillatores and nominal update
1375 * intervals less than 1024 s, operation should be in phase-lock mode
1376 * (STA_FLL = 0), where the loop is disciplined to phase. For update
1377 * intervals greater than thiss, operation should be in frequency-lock
1378 * mode (STA_FLL = 1), where the loop is disciplined to frequency.
1379 *
1380 * Note: splclock() is in effect.
1381 */
1382 void
1383 hardupdate(long offset)
1384 {
1385 long ltemp, mtemp;
1386
1387 if (!(time_status & STA_PLL) && !(time_status & STA_PPSTIME))
1388 return;
1389 ltemp = offset;
1390 #ifdef PPS_SYNC
1391 if (time_status & STA_PPSTIME && time_status & STA_PPSSIGNAL)
1392 ltemp = pps_offset;
1393 #endif /* PPS_SYNC */
1394
1395 /*
1396 * Scale the phase adjustment and clamp to the operating range.
1397 */
1398 if (ltemp > MAXPHASE)
1399 time_offset = MAXPHASE << SHIFT_UPDATE;
1400 else if (ltemp < -MAXPHASE)
1401 time_offset = -(MAXPHASE << SHIFT_UPDATE);
1402 else
1403 time_offset = ltemp << SHIFT_UPDATE;
1404
1405 /*
1406 * Select whether the frequency is to be controlled and in which
1407 * mode (PLL or FLL). Clamp to the operating range. Ugly
1408 * multiply/divide should be replaced someday.
1409 */
1410 if (time_status & STA_FREQHOLD || time_reftime == 0)
1411 time_reftime = time.tv_sec;
1412 mtemp = time.tv_sec - time_reftime;
1413 time_reftime = time.tv_sec;
1414 if (time_status & STA_FLL) {
1415 if (mtemp >= MINSEC) {
1416 ltemp = ((time_offset / mtemp) << (SHIFT_USEC -
1417 SHIFT_UPDATE));
1418 if (ltemp < 0)
1419 time_freq -= -ltemp >> SHIFT_KH;
1420 else
1421 time_freq += ltemp >> SHIFT_KH;
1422 }
1423 } else {
1424 if (mtemp < MAXSEC) {
1425 ltemp *= mtemp;
1426 if (ltemp < 0)
1427 time_freq -= -ltemp >> (time_constant +
1428 time_constant + SHIFT_KF -
1429 SHIFT_USEC);
1430 else
1431 time_freq += ltemp >> (time_constant +
1432 time_constant + SHIFT_KF -
1433 SHIFT_USEC);
1434 }
1435 }
1436 if (time_freq > time_tolerance)
1437 time_freq = time_tolerance;
1438 else if (time_freq < -time_tolerance)
1439 time_freq = -time_tolerance;
1440 }
1441
1442 #ifdef PPS_SYNC
1443 /*
1444 * hardpps() - discipline CPU clock oscillator to external PPS signal
1445 *
1446 * This routine is called at each PPS interrupt in order to discipline
1447 * the CPU clock oscillator to the PPS signal. It measures the PPS phase
1448 * and leaves it in a handy spot for the hardclock() routine. It
1449 * integrates successive PPS phase differences and calculates the
1450 * frequency offset. This is used in hardclock() to discipline the CPU
1451 * clock oscillator so that intrinsic frequency error is cancelled out.
1452 * The code requires the caller to capture the time and hardware counter
1453 * value at the on-time PPS signal transition.
1454 *
1455 * Note that, on some Unix systems, this routine runs at an interrupt
1456 * priority level higher than the timer interrupt routine hardclock().
1457 * Therefore, the variables used are distinct from the hardclock()
1458 * variables, except for certain exceptions: The PPS frequency pps_freq
1459 * and phase pps_offset variables are determined by this routine and
1460 * updated atomically. The time_tolerance variable can be considered a
1461 * constant, since it is infrequently changed, and then only when the
1462 * PPS signal is disabled. The watchdog counter pps_valid is updated
1463 * once per second by hardclock() and is atomically cleared in this
1464 * routine.
1465 */
1466 void
1467 hardpps(struct timeval *tvp, /* time at PPS */
1468 long usec /* hardware counter at PPS */)
1469 {
1470 long u_usec, v_usec, bigtick;
1471 long cal_sec, cal_usec;
1472
1473 /*
1474 * An occasional glitch can be produced when the PPS interrupt
1475 * occurs in the hardclock() routine before the time variable is
1476 * updated. Here the offset is discarded when the difference
1477 * between it and the last one is greater than tick/2, but not
1478 * if the interval since the first discard exceeds 30 s.
1479 */
1480 time_status |= STA_PPSSIGNAL;
1481 time_status &= ~(STA_PPSJITTER | STA_PPSWANDER | STA_PPSERROR);
1482 pps_valid = 0;
1483 u_usec = -tvp->tv_usec;
1484 if (u_usec < -500000)
1485 u_usec += 1000000;
1486 v_usec = pps_offset - u_usec;
1487 if (v_usec < 0)
1488 v_usec = -v_usec;
1489 if (v_usec > (tick >> 1)) {
1490 if (pps_glitch > MAXGLITCH) {
1491 pps_glitch = 0;
1492 pps_tf[2] = u_usec;
1493 pps_tf[1] = u_usec;
1494 } else {
1495 pps_glitch++;
1496 u_usec = pps_offset;
1497 }
1498 } else
1499 pps_glitch = 0;
1500
1501 /*
1502 * A three-stage median filter is used to help deglitch the pps
1503 * time. The median sample becomes the time offset estimate; the
1504 * difference between the other two samples becomes the time
1505 * dispersion (jitter) estimate.
1506 */
1507 pps_tf[2] = pps_tf[1];
1508 pps_tf[1] = pps_tf[0];
1509 pps_tf[0] = u_usec;
1510 if (pps_tf[0] > pps_tf[1]) {
1511 if (pps_tf[1] > pps_tf[2]) {
1512 pps_offset = pps_tf[1]; /* 0 1 2 */
1513 v_usec = pps_tf[0] - pps_tf[2];
1514 } else if (pps_tf[2] > pps_tf[0]) {
1515 pps_offset = pps_tf[0]; /* 2 0 1 */
1516 v_usec = pps_tf[2] - pps_tf[1];
1517 } else {
1518 pps_offset = pps_tf[2]; /* 0 2 1 */
1519 v_usec = pps_tf[0] - pps_tf[1];
1520 }
1521 } else {
1522 if (pps_tf[1] < pps_tf[2]) {
1523 pps_offset = pps_tf[1]; /* 2 1 0 */
1524 v_usec = pps_tf[2] - pps_tf[0];
1525 } else if (pps_tf[2] < pps_tf[0]) {
1526 pps_offset = pps_tf[0]; /* 1 0 2 */
1527 v_usec = pps_tf[1] - pps_tf[2];
1528 } else {
1529 pps_offset = pps_tf[2]; /* 1 2 0 */
1530 v_usec = pps_tf[1] - pps_tf[0];
1531 }
1532 }
1533 if (v_usec > MAXTIME)
1534 pps_jitcnt++;
1535 v_usec = (v_usec << PPS_AVG) - pps_jitter;
1536 if (v_usec < 0)
1537 pps_jitter -= -v_usec >> PPS_AVG;
1538 else
1539 pps_jitter += v_usec >> PPS_AVG;
1540 if (pps_jitter > (MAXTIME >> 1))
1541 time_status |= STA_PPSJITTER;
1542
1543 /*
1544 * During the calibration interval adjust the starting time when
1545 * the tick overflows. At the end of the interval compute the
1546 * duration of the interval and the difference of the hardware
1547 * counters at the beginning and end of the interval. This code
1548 * is deliciously complicated by the fact valid differences may
1549 * exceed the value of tick when using long calibration
1550 * intervals and small ticks. Note that the counter can be
1551 * greater than tick if caught at just the wrong instant, but
1552 * the values returned and used here are correct.
1553 */
1554 bigtick = (long)tick << SHIFT_USEC;
1555 pps_usec -= pps_freq;
1556 if (pps_usec >= bigtick)
1557 pps_usec -= bigtick;
1558 if (pps_usec < 0)
1559 pps_usec += bigtick;
1560 pps_time.tv_sec++;
1561 pps_count++;
1562 if (pps_count < (1 << pps_shift))
1563 return;
1564 pps_count = 0;
1565 pps_calcnt++;
1566 u_usec = usec << SHIFT_USEC;
1567 v_usec = pps_usec - u_usec;
1568 if (v_usec >= bigtick >> 1)
1569 v_usec -= bigtick;
1570 if (v_usec < -(bigtick >> 1))
1571 v_usec += bigtick;
1572 if (v_usec < 0)
1573 v_usec = -(-v_usec >> pps_shift);
1574 else
1575 v_usec = v_usec >> pps_shift;
1576 pps_usec = u_usec;
1577 cal_sec = tvp->tv_sec;
1578 cal_usec = tvp->tv_usec;
1579 cal_sec -= pps_time.tv_sec;
1580 cal_usec -= pps_time.tv_usec;
1581 if (cal_usec < 0) {
1582 cal_usec += 1000000;
1583 cal_sec--;
1584 }
1585 pps_time = *tvp;
1586
1587 /*
1588 * Check for lost interrupts, noise, excessive jitter and
1589 * excessive frequency error. The number of timer ticks during
1590 * the interval may vary +-1 tick. Add to this a margin of one
1591 * tick for the PPS signal jitter and maximum frequency
1592 * deviation. If the limits are exceeded, the calibration
1593 * interval is reset to the minimum and we start over.
1594 */
1595 u_usec = (long)tick << 1;
1596 if (!((cal_sec == -1 && cal_usec > (1000000 - u_usec))
1597 || (cal_sec == 0 && cal_usec < u_usec))
1598 || v_usec > time_tolerance || v_usec < -time_tolerance) {
1599 pps_errcnt++;
1600 pps_shift = PPS_SHIFT;
1601 pps_intcnt = 0;
1602 time_status |= STA_PPSERROR;
1603 return;
1604 }
1605
1606 /*
1607 * A three-stage median filter is used to help deglitch the pps
1608 * frequency. The median sample becomes the frequency offset
1609 * estimate; the difference between the other two samples
1610 * becomes the frequency dispersion (stability) estimate.
1611 */
1612 pps_ff[2] = pps_ff[1];
1613 pps_ff[1] = pps_ff[0];
1614 pps_ff[0] = v_usec;
1615 if (pps_ff[0] > pps_ff[1]) {
1616 if (pps_ff[1] > pps_ff[2]) {
1617 u_usec = pps_ff[1]; /* 0 1 2 */
1618 v_usec = pps_ff[0] - pps_ff[2];
1619 } else if (pps_ff[2] > pps_ff[0]) {
1620 u_usec = pps_ff[0]; /* 2 0 1 */
1621 v_usec = pps_ff[2] - pps_ff[1];
1622 } else {
1623 u_usec = pps_ff[2]; /* 0 2 1 */
1624 v_usec = pps_ff[0] - pps_ff[1];
1625 }
1626 } else {
1627 if (pps_ff[1] < pps_ff[2]) {
1628 u_usec = pps_ff[1]; /* 2 1 0 */
1629 v_usec = pps_ff[2] - pps_ff[0];
1630 } else if (pps_ff[2] < pps_ff[0]) {
1631 u_usec = pps_ff[0]; /* 1 0 2 */
1632 v_usec = pps_ff[1] - pps_ff[2];
1633 } else {
1634 u_usec = pps_ff[2]; /* 1 2 0 */
1635 v_usec = pps_ff[1] - pps_ff[0];
1636 }
1637 }
1638
1639 /*
1640 * Here the frequency dispersion (stability) is updated. If it
1641 * is less than one-fourth the maximum (MAXFREQ), the frequency
1642 * offset is updated as well, but clamped to the tolerance. It
1643 * will be processed later by the hardclock() routine.
1644 */
1645 v_usec = (v_usec >> 1) - pps_stabil;
1646 if (v_usec < 0)
1647 pps_stabil -= -v_usec >> PPS_AVG;
1648 else
1649 pps_stabil += v_usec >> PPS_AVG;
1650 if (pps_stabil > MAXFREQ >> 2) {
1651 pps_stbcnt++;
1652 time_status |= STA_PPSWANDER;
1653 return;
1654 }
1655 if (time_status & STA_PPSFREQ) {
1656 if (u_usec < 0) {
1657 pps_freq -= -u_usec >> PPS_AVG;
1658 if (pps_freq < -time_tolerance)
1659 pps_freq = -time_tolerance;
1660 u_usec = -u_usec;
1661 } else {
1662 pps_freq += u_usec >> PPS_AVG;
1663 if (pps_freq > time_tolerance)
1664 pps_freq = time_tolerance;
1665 }
1666 }
1667
1668 /*
1669 * Here the calibration interval is adjusted. If the maximum
1670 * time difference is greater than tick / 4, reduce the interval
1671 * by half. If this is not the case for four consecutive
1672 * intervals, double the interval.
1673 */
1674 if (u_usec << pps_shift > bigtick >> 2) {
1675 pps_intcnt = 0;
1676 if (pps_shift > PPS_SHIFT)
1677 pps_shift--;
1678 } else if (pps_intcnt >= 4) {
1679 pps_intcnt = 0;
1680 if (pps_shift < PPS_SHIFTMAX)
1681 pps_shift++;
1682 } else
1683 pps_intcnt++;
1684 }
1685 #endif /* PPS_SYNC */
1686 #endif /* NTP */
1687
1688 /*
1689 * Return information about system clocks.
1690 */
1691 int
1692 sysctl_clockrate(void *where, size_t *sizep)
1693 {
1694 struct clockinfo clkinfo;
1695
1696 /*
1697 * Construct clockinfo structure.
1698 */
1699 clkinfo.tick = tick;
1700 clkinfo.tickadj = tickadj;
1701 clkinfo.hz = hz;
1702 clkinfo.profhz = profhz;
1703 clkinfo.stathz = stathz ? stathz : hz;
1704 return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
1705 }
1706