kern_tc.c revision 1.67 1 /* $NetBSD: kern_tc.c,v 1.67 2023/07/17 13:42:23 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*-
33 * ----------------------------------------------------------------------------
34 * "THE BEER-WARE LICENSE" (Revision 42):
35 * <phk (at) FreeBSD.ORG> wrote this file. As long as you retain this notice you
36 * can do whatever you want with this stuff. If we meet some day, and you think
37 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
38 * ---------------------------------------------------------------------------
39 */
40
41 #include <sys/cdefs.h>
42 /* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */
43 __KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.67 2023/07/17 13:42:23 riastradh Exp $");
44
45 #ifdef _KERNEL_OPT
46 #include "opt_ntp.h"
47 #endif
48
49 #include <sys/param.h>
50
51 #include <sys/atomic.h>
52 #include <sys/evcnt.h>
53 #include <sys/ipi.h>
54 #include <sys/kauth.h>
55 #include <sys/kernel.h>
56 #include <sys/lock.h>
57 #include <sys/mutex.h>
58 #include <sys/reboot.h> /* XXX just to get AB_VERBOSE */
59 #include <sys/sysctl.h>
60 #include <sys/syslog.h>
61 #include <sys/systm.h>
62 #include <sys/timepps.h>
63 #include <sys/timetc.h>
64 #include <sys/timex.h>
65 #include <sys/xcall.h>
66
67 /*
68 * A large step happens on boot. This constant detects such steps.
69 * It is relatively small so that ntp_update_second gets called enough
70 * in the typical 'missed a couple of seconds' case, but doesn't loop
71 * forever when the time step is large.
72 */
73 #define LARGE_STEP 200
74
75 /*
76 * Implement a dummy timecounter which we can use until we get a real one
77 * in the air. This allows the console and other early stuff to use
78 * time services.
79 */
80
81 static u_int
82 dummy_get_timecount(struct timecounter *tc)
83 {
84 static u_int now;
85
86 return ++now;
87 }
88
89 static struct timecounter dummy_timecounter = {
90 .tc_get_timecount = dummy_get_timecount,
91 .tc_counter_mask = ~0u,
92 .tc_frequency = 1000000,
93 .tc_name = "dummy",
94 .tc_quality = -1000000,
95 .tc_priv = NULL,
96 };
97
98 struct timehands {
99 /* These fields must be initialized by the driver. */
100 struct timecounter *th_counter; /* active timecounter */
101 int64_t th_adjustment; /* frequency adjustment */
102 /* (NTP/adjtime) */
103 uint64_t th_scale; /* scale factor (counter */
104 /* tick->time) */
105 uint64_t th_offset_count; /* offset at last time */
106 /* update (tc_windup()) */
107 struct bintime th_offset; /* bin (up)time at windup */
108 struct timeval th_microtime; /* cached microtime */
109 struct timespec th_nanotime; /* cached nanotime */
110 /* Fields not to be copied in tc_windup start with th_generation. */
111 volatile u_int th_generation; /* current genration */
112 struct timehands *th_next; /* next timehand */
113 };
114
115 static struct timehands th0;
116 static struct timehands th9 = { .th_next = &th0, };
117 static struct timehands th8 = { .th_next = &th9, };
118 static struct timehands th7 = { .th_next = &th8, };
119 static struct timehands th6 = { .th_next = &th7, };
120 static struct timehands th5 = { .th_next = &th6, };
121 static struct timehands th4 = { .th_next = &th5, };
122 static struct timehands th3 = { .th_next = &th4, };
123 static struct timehands th2 = { .th_next = &th3, };
124 static struct timehands th1 = { .th_next = &th2, };
125 static struct timehands th0 = {
126 .th_counter = &dummy_timecounter,
127 .th_scale = (uint64_t)-1 / 1000000,
128 .th_offset = { .sec = 1, .frac = 0 },
129 .th_generation = 1,
130 .th_next = &th1,
131 };
132
133 static struct timehands *volatile timehands = &th0;
134 struct timecounter *timecounter = &dummy_timecounter;
135 static struct timecounter *timecounters = &dummy_timecounter;
136
137 volatile time_t time__second __cacheline_aligned = 1;
138 volatile time_t time__uptime __cacheline_aligned = 1;
139
140 #ifndef __HAVE_ATOMIC64_LOADSTORE
141 static volatile struct {
142 uint32_t lo, hi;
143 } time__uptime32 __cacheline_aligned = {
144 .lo = 1,
145 }, time__second32 __cacheline_aligned = {
146 .lo = 1,
147 };
148 #endif
149
150 static struct bintime timebasebin;
151
152 static int timestepwarnings;
153
154 kmutex_t timecounter_lock;
155 static u_int timecounter_mods;
156 static volatile int timecounter_removals = 1;
157 static u_int timecounter_bad;
158
159 #ifdef __HAVE_ATOMIC64_LOADSTORE
160
161 static inline void
162 setrealuptime(time_t second, time_t uptime)
163 {
164
165 atomic_store_relaxed(&time__second, second);
166 atomic_store_relaxed(&time__uptime, uptime);
167 }
168
169 #else
170
171 static void
172 nullipi(void *cookie)
173 {
174 }
175
176 /*
177 * Issue membar_release on this CPU, and force membar_acquire on all
178 * CPUs.
179 */
180 static void
181 ipi_barrier(void)
182 {
183 ipi_msg_t msg = { .func = nullipi };
184
185 ipi_broadcast(&msg, /*skip_self*/true);
186 ipi_wait(&msg);
187 }
188
189 static inline void
190 setrealuptime(time_t second, time_t uptime)
191 {
192 uint32_t seclo = second & 0xffffffff, sechi = second >> 32;
193 uint32_t uplo = uptime & 0xffffffff, uphi = uptime >> 32;
194
195 KDASSERT(mutex_owned(&timecounter_lock));
196
197 /*
198 * Fast path -- no wraparound, just updating the low bits, so
199 * no need for seqlocked access.
200 */
201 if (__predict_true(sechi == time__second32.hi) &&
202 __predict_true(uphi == time__uptime32.hi)) {
203 atomic_store_relaxed(&time__second32.lo, seclo);
204 atomic_store_relaxed(&time__uptime32.lo, uplo);
205 return;
206 }
207
208 atomic_store_relaxed(&time__second32.hi, 0xffffffff);
209 atomic_store_relaxed(&time__uptime32.hi, 0xffffffff);
210 ipi_barrier();
211 atomic_store_relaxed(&time__second32.lo, seclo);
212 atomic_store_relaxed(&time__uptime32.lo, uplo);
213 ipi_barrier();
214 atomic_store_relaxed(&time__second32.hi, sechi);
215 atomic_store_relaxed(&time__uptime32.hi, uphi);
216 }
217
218 time_t
219 getrealtime(void)
220 {
221 uint32_t lo, hi;
222
223 do {
224 for (;;) {
225 hi = atomic_load_relaxed(&time__second32.hi);
226 if (__predict_true(hi != 0xffffffff))
227 break;
228 SPINLOCK_BACKOFF_HOOK;
229 }
230 __insn_barrier();
231 lo = atomic_load_relaxed(&time__second32.lo);
232 __insn_barrier();
233 } while (hi != atomic_load_relaxed(&time__second32.hi));
234
235 return ((time_t)hi << 32) | lo;
236 }
237
238 time_t
239 getuptime(void)
240 {
241 uint32_t lo, hi;
242
243 do {
244 for (;;) {
245 hi = atomic_load_relaxed(&time__uptime32.hi);
246 if (__predict_true(hi != 0xffffffff))
247 break;
248 SPINLOCK_BACKOFF_HOOK;
249 }
250 __insn_barrier();
251 lo = atomic_load_relaxed(&time__uptime32.lo);
252 __insn_barrier();
253 } while (hi != atomic_load_relaxed(&time__uptime32.hi));
254
255 return ((time_t)hi << 32) | lo;
256 }
257
258 time_t
259 getboottime(void)
260 {
261
262 return getrealtime() - getuptime();
263 }
264
265 uint32_t
266 getuptime32(void)
267 {
268
269 return atomic_load_relaxed(&time__uptime32.lo);
270 }
271
272 #endif /* !defined(__HAVE_ATOMIC64_LOADSTORE) */
273
274 /*
275 * sysctl helper routine for kern.timercounter.hardware
276 */
277 static int
278 sysctl_kern_timecounter_hardware(SYSCTLFN_ARGS)
279 {
280 struct sysctlnode node;
281 int error;
282 char newname[MAX_TCNAMELEN];
283 struct timecounter *newtc, *tc;
284
285 tc = timecounter;
286
287 strlcpy(newname, tc->tc_name, sizeof(newname));
288
289 node = *rnode;
290 node.sysctl_data = newname;
291 node.sysctl_size = sizeof(newname);
292
293 error = sysctl_lookup(SYSCTLFN_CALL(&node));
294
295 if (error ||
296 newp == NULL ||
297 strncmp(newname, tc->tc_name, sizeof(newname)) == 0)
298 return error;
299
300 if (l != NULL && (error = kauth_authorize_system(l->l_cred,
301 KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_TIMECOUNTERS, newname,
302 NULL, NULL)) != 0)
303 return error;
304
305 if (!cold)
306 mutex_spin_enter(&timecounter_lock);
307 error = EINVAL;
308 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
309 if (strcmp(newname, newtc->tc_name) != 0)
310 continue;
311 /* Warm up new timecounter. */
312 (void)newtc->tc_get_timecount(newtc);
313 (void)newtc->tc_get_timecount(newtc);
314 timecounter = newtc;
315 error = 0;
316 break;
317 }
318 if (!cold)
319 mutex_spin_exit(&timecounter_lock);
320 return error;
321 }
322
323 static int
324 sysctl_kern_timecounter_choice(SYSCTLFN_ARGS)
325 {
326 char buf[MAX_TCNAMELEN+48];
327 char *where;
328 const char *spc;
329 struct timecounter *tc;
330 size_t needed, left, slen;
331 int error, mods;
332
333 if (newp != NULL)
334 return EPERM;
335 if (namelen != 0)
336 return EINVAL;
337
338 mutex_spin_enter(&timecounter_lock);
339 retry:
340 spc = "";
341 error = 0;
342 needed = 0;
343 left = *oldlenp;
344 where = oldp;
345 for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) {
346 if (where == NULL) {
347 needed += sizeof(buf); /* be conservative */
348 } else {
349 slen = snprintf(buf, sizeof(buf), "%s%s(q=%d, f=%" PRId64
350 " Hz)", spc, tc->tc_name, tc->tc_quality,
351 tc->tc_frequency);
352 if (left < slen + 1)
353 break;
354 mods = timecounter_mods;
355 mutex_spin_exit(&timecounter_lock);
356 error = copyout(buf, where, slen + 1);
357 mutex_spin_enter(&timecounter_lock);
358 if (mods != timecounter_mods) {
359 goto retry;
360 }
361 spc = " ";
362 where += slen;
363 needed += slen;
364 left -= slen;
365 }
366 }
367 mutex_spin_exit(&timecounter_lock);
368
369 *oldlenp = needed;
370 return error;
371 }
372
373 SYSCTL_SETUP(sysctl_timecounter_setup, "sysctl timecounter setup")
374 {
375 const struct sysctlnode *node;
376
377 sysctl_createv(clog, 0, NULL, &node,
378 CTLFLAG_PERMANENT,
379 CTLTYPE_NODE, "timecounter",
380 SYSCTL_DESCR("time counter information"),
381 NULL, 0, NULL, 0,
382 CTL_KERN, CTL_CREATE, CTL_EOL);
383
384 if (node != NULL) {
385 sysctl_createv(clog, 0, NULL, NULL,
386 CTLFLAG_PERMANENT,
387 CTLTYPE_STRING, "choice",
388 SYSCTL_DESCR("available counters"),
389 sysctl_kern_timecounter_choice, 0, NULL, 0,
390 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
391
392 sysctl_createv(clog, 0, NULL, NULL,
393 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
394 CTLTYPE_STRING, "hardware",
395 SYSCTL_DESCR("currently active time counter"),
396 sysctl_kern_timecounter_hardware, 0, NULL, MAX_TCNAMELEN,
397 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
398
399 sysctl_createv(clog, 0, NULL, NULL,
400 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
401 CTLTYPE_INT, "timestepwarnings",
402 SYSCTL_DESCR("log time steps"),
403 NULL, 0, ×tepwarnings, 0,
404 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
405 }
406 }
407
408 #ifdef TC_COUNTERS
409 #define TC_STATS(name) \
410 static struct evcnt n##name = \
411 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "timecounter", #name); \
412 EVCNT_ATTACH_STATIC(n##name)
413 TC_STATS(binuptime); TC_STATS(nanouptime); TC_STATS(microuptime);
414 TC_STATS(bintime); TC_STATS(nanotime); TC_STATS(microtime);
415 TC_STATS(getbinuptime); TC_STATS(getnanouptime); TC_STATS(getmicrouptime);
416 TC_STATS(getbintime); TC_STATS(getnanotime); TC_STATS(getmicrotime);
417 TC_STATS(setclock);
418 #define TC_COUNT(var) var.ev_count++
419 #undef TC_STATS
420 #else
421 #define TC_COUNT(var) /* nothing */
422 #endif /* TC_COUNTERS */
423
424 static void tc_windup(void);
425
426 /*
427 * Return the difference between the timehands' counter value now and what
428 * was when we copied it to the timehands' offset_count.
429 */
430 static inline u_int
431 tc_delta(struct timehands *th)
432 {
433 struct timecounter *tc;
434
435 tc = th->th_counter;
436 return (tc->tc_get_timecount(tc) -
437 th->th_offset_count) & tc->tc_counter_mask;
438 }
439
440 /*
441 * Functions for reading the time. We have to loop until we are sure that
442 * the timehands that we operated on was not updated under our feet. See
443 * the comment in <sys/timevar.h> for a description of these 12 functions.
444 */
445
446 void
447 binuptime(struct bintime *bt)
448 {
449 struct timehands *th;
450 lwp_t *l;
451 u_int lgen, gen;
452
453 TC_COUNT(nbinuptime);
454
455 /*
456 * Provide exclusion against tc_detach().
457 *
458 * We record the number of timecounter removals before accessing
459 * timecounter state. Note that the LWP can be using multiple
460 * "generations" at once, due to interrupts (interrupted while in
461 * this function). Hardware interrupts will borrow the interrupted
462 * LWP's l_tcgen value for this purpose, and can themselves be
463 * interrupted by higher priority interrupts. In this case we need
464 * to ensure that the oldest generation in use is recorded.
465 *
466 * splsched() is too expensive to use, so we take care to structure
467 * this code in such a way that it is not required. Likewise, we
468 * do not disable preemption.
469 *
470 * Memory barriers are also too expensive to use for such a
471 * performance critical function. The good news is that we do not
472 * need memory barriers for this type of exclusion, as the thread
473 * updating timecounter_removals will issue a broadcast cross call
474 * before inspecting our l_tcgen value (this elides memory ordering
475 * issues).
476 */
477 l = curlwp;
478 lgen = l->l_tcgen;
479 if (__predict_true(lgen == 0)) {
480 l->l_tcgen = timecounter_removals;
481 }
482 __insn_barrier();
483
484 do {
485 th = timehands;
486 gen = th->th_generation;
487 *bt = th->th_offset;
488 bintime_addx(bt, th->th_scale * tc_delta(th));
489 } while (gen == 0 || gen != th->th_generation);
490
491 __insn_barrier();
492 l->l_tcgen = lgen;
493 }
494
495 void
496 nanouptime(struct timespec *tsp)
497 {
498 struct bintime bt;
499
500 TC_COUNT(nnanouptime);
501 binuptime(&bt);
502 bintime2timespec(&bt, tsp);
503 }
504
505 void
506 microuptime(struct timeval *tvp)
507 {
508 struct bintime bt;
509
510 TC_COUNT(nmicrouptime);
511 binuptime(&bt);
512 bintime2timeval(&bt, tvp);
513 }
514
515 void
516 bintime(struct bintime *bt)
517 {
518
519 TC_COUNT(nbintime);
520 binuptime(bt);
521 bintime_add(bt, &timebasebin);
522 }
523
524 void
525 nanotime(struct timespec *tsp)
526 {
527 struct bintime bt;
528
529 TC_COUNT(nnanotime);
530 bintime(&bt);
531 bintime2timespec(&bt, tsp);
532 }
533
534 void
535 microtime(struct timeval *tvp)
536 {
537 struct bintime bt;
538
539 TC_COUNT(nmicrotime);
540 bintime(&bt);
541 bintime2timeval(&bt, tvp);
542 }
543
544 void
545 getbinuptime(struct bintime *bt)
546 {
547 struct timehands *th;
548 u_int gen;
549
550 TC_COUNT(ngetbinuptime);
551 do {
552 th = timehands;
553 gen = th->th_generation;
554 *bt = th->th_offset;
555 } while (gen == 0 || gen != th->th_generation);
556 }
557
558 void
559 getnanouptime(struct timespec *tsp)
560 {
561 struct timehands *th;
562 u_int gen;
563
564 TC_COUNT(ngetnanouptime);
565 do {
566 th = timehands;
567 gen = th->th_generation;
568 bintime2timespec(&th->th_offset, tsp);
569 } while (gen == 0 || gen != th->th_generation);
570 }
571
572 void
573 getmicrouptime(struct timeval *tvp)
574 {
575 struct timehands *th;
576 u_int gen;
577
578 TC_COUNT(ngetmicrouptime);
579 do {
580 th = timehands;
581 gen = th->th_generation;
582 bintime2timeval(&th->th_offset, tvp);
583 } while (gen == 0 || gen != th->th_generation);
584 }
585
586 void
587 getbintime(struct bintime *bt)
588 {
589 struct timehands *th;
590 u_int gen;
591
592 TC_COUNT(ngetbintime);
593 do {
594 th = timehands;
595 gen = th->th_generation;
596 *bt = th->th_offset;
597 } while (gen == 0 || gen != th->th_generation);
598 bintime_add(bt, &timebasebin);
599 }
600
601 static inline void
602 dogetnanotime(struct timespec *tsp)
603 {
604 struct timehands *th;
605 u_int gen;
606
607 TC_COUNT(ngetnanotime);
608 do {
609 th = timehands;
610 gen = th->th_generation;
611 *tsp = th->th_nanotime;
612 } while (gen == 0 || gen != th->th_generation);
613 }
614
615 void
616 getnanotime(struct timespec *tsp)
617 {
618
619 dogetnanotime(tsp);
620 }
621
622 void dtrace_getnanotime(struct timespec *tsp);
623
624 void
625 dtrace_getnanotime(struct timespec *tsp)
626 {
627
628 dogetnanotime(tsp);
629 }
630
631 void
632 getmicrotime(struct timeval *tvp)
633 {
634 struct timehands *th;
635 u_int gen;
636
637 TC_COUNT(ngetmicrotime);
638 do {
639 th = timehands;
640 gen = th->th_generation;
641 *tvp = th->th_microtime;
642 } while (gen == 0 || gen != th->th_generation);
643 }
644
645 void
646 getnanoboottime(struct timespec *tsp)
647 {
648 struct bintime bt;
649
650 getbinboottime(&bt);
651 bintime2timespec(&bt, tsp);
652 }
653
654 void
655 getmicroboottime(struct timeval *tvp)
656 {
657 struct bintime bt;
658
659 getbinboottime(&bt);
660 bintime2timeval(&bt, tvp);
661 }
662
663 void
664 getbinboottime(struct bintime *bt)
665 {
666
667 /*
668 * XXX Need lockless read synchronization around timebasebin
669 * (and not just here).
670 */
671 *bt = timebasebin;
672 }
673
674 /*
675 * Initialize a new timecounter and possibly use it.
676 */
677 void
678 tc_init(struct timecounter *tc)
679 {
680 u_int u;
681
682 KASSERTMSG(tc->tc_next == NULL, "timecounter %s already initialised",
683 tc->tc_name);
684
685 u = tc->tc_frequency / tc->tc_counter_mask;
686 /* XXX: We need some margin here, 10% is a guess */
687 u *= 11;
688 u /= 10;
689 if (u > hz && tc->tc_quality >= 0) {
690 tc->tc_quality = -2000;
691 aprint_verbose(
692 "timecounter: Timecounter \"%s\" frequency %ju Hz",
693 tc->tc_name, (uintmax_t)tc->tc_frequency);
694 aprint_verbose(" -- Insufficient hz, needs at least %u\n", u);
695 } else if (tc->tc_quality >= 0 || bootverbose) {
696 aprint_verbose(
697 "timecounter: Timecounter \"%s\" frequency %ju Hz "
698 "quality %d\n", tc->tc_name, (uintmax_t)tc->tc_frequency,
699 tc->tc_quality);
700 }
701
702 mutex_spin_enter(&timecounter_lock);
703 tc->tc_next = timecounters;
704 timecounters = tc;
705 timecounter_mods++;
706 /*
707 * Never automatically use a timecounter with negative quality.
708 * Even though we run on the dummy counter, switching here may be
709 * worse since this timecounter may not be monotonous.
710 */
711 if (tc->tc_quality >= 0 && (tc->tc_quality > timecounter->tc_quality ||
712 (tc->tc_quality == timecounter->tc_quality &&
713 tc->tc_frequency > timecounter->tc_frequency))) {
714 (void)tc->tc_get_timecount(tc);
715 (void)tc->tc_get_timecount(tc);
716 timecounter = tc;
717 tc_windup();
718 }
719 mutex_spin_exit(&timecounter_lock);
720 }
721
722 /*
723 * Pick a new timecounter due to the existing counter going bad.
724 */
725 static void
726 tc_pick(void)
727 {
728 struct timecounter *best, *tc;
729
730 KASSERT(mutex_owned(&timecounter_lock));
731
732 for (best = tc = timecounters; tc != NULL; tc = tc->tc_next) {
733 if (tc->tc_quality > best->tc_quality)
734 best = tc;
735 else if (tc->tc_quality < best->tc_quality)
736 continue;
737 else if (tc->tc_frequency > best->tc_frequency)
738 best = tc;
739 }
740 (void)best->tc_get_timecount(best);
741 (void)best->tc_get_timecount(best);
742 timecounter = best;
743 }
744
745 /*
746 * A timecounter has gone bad, arrange to pick a new one at the next
747 * clock tick.
748 */
749 void
750 tc_gonebad(struct timecounter *tc)
751 {
752
753 tc->tc_quality = -100;
754 membar_producer();
755 atomic_inc_uint(&timecounter_bad);
756 }
757
758 /*
759 * Stop using a timecounter and remove it from the timecounters list.
760 */
761 int
762 tc_detach(struct timecounter *target)
763 {
764 struct timecounter *tc;
765 struct timecounter **tcp = NULL;
766 int removals;
767 lwp_t *l;
768
769 /* First, find the timecounter. */
770 mutex_spin_enter(&timecounter_lock);
771 for (tcp = &timecounters, tc = timecounters;
772 tc != NULL;
773 tcp = &tc->tc_next, tc = tc->tc_next) {
774 if (tc == target)
775 break;
776 }
777 if (tc == NULL) {
778 mutex_spin_exit(&timecounter_lock);
779 return ESRCH;
780 }
781
782 /* And now, remove it. */
783 *tcp = tc->tc_next;
784 if (timecounter == target) {
785 tc_pick();
786 tc_windup();
787 }
788 timecounter_mods++;
789 removals = timecounter_removals++;
790 mutex_spin_exit(&timecounter_lock);
791
792 /*
793 * We now have to determine if any threads in the system are still
794 * making use of this timecounter.
795 *
796 * We issue a broadcast cross call to elide memory ordering issues,
797 * then scan all LWPs in the system looking at each's timecounter
798 * generation number. We need to see a value of zero (not actively
799 * using a timecounter) or a value greater than our removal value.
800 *
801 * We may race with threads that read `timecounter_removals' and
802 * and then get preempted before updating `l_tcgen'. This is not
803 * a problem, since it means that these threads have not yet started
804 * accessing timecounter state. All we do need is one clean
805 * snapshot of the system where every thread appears not to be using
806 * old timecounter state.
807 */
808 for (;;) {
809 xc_barrier(0);
810
811 mutex_enter(&proc_lock);
812 LIST_FOREACH(l, &alllwp, l_list) {
813 if (l->l_tcgen == 0 || l->l_tcgen > removals) {
814 /*
815 * Not using timecounter or old timecounter
816 * state at time of our xcall or later.
817 */
818 continue;
819 }
820 break;
821 }
822 mutex_exit(&proc_lock);
823
824 /*
825 * If the timecounter is still in use, wait at least 10ms
826 * before retrying.
827 */
828 if (l == NULL) {
829 break;
830 }
831 (void)kpause("tcdetach", false, mstohz(10), NULL);
832 }
833
834 tc->tc_next = NULL;
835 return 0;
836 }
837
838 /* Report the frequency of the current timecounter. */
839 uint64_t
840 tc_getfrequency(void)
841 {
842
843 return timehands->th_counter->tc_frequency;
844 }
845
846 /*
847 * Step our concept of UTC. This is done by modifying our estimate of
848 * when we booted.
849 */
850 void
851 tc_setclock(const struct timespec *ts)
852 {
853 struct timespec ts2;
854 struct bintime bt, bt2;
855
856 mutex_spin_enter(&timecounter_lock);
857 TC_COUNT(nsetclock);
858 binuptime(&bt2);
859 timespec2bintime(ts, &bt);
860 bintime_sub(&bt, &bt2);
861 bintime_add(&bt2, &timebasebin);
862 timebasebin = bt;
863 tc_windup();
864 mutex_spin_exit(&timecounter_lock);
865
866 if (timestepwarnings) {
867 bintime2timespec(&bt2, &ts2);
868 log(LOG_INFO,
869 "Time stepped from %lld.%09ld to %lld.%09ld\n",
870 (long long)ts2.tv_sec, ts2.tv_nsec,
871 (long long)ts->tv_sec, ts->tv_nsec);
872 }
873 }
874
875 /*
876 * Initialize the next struct timehands in the ring and make
877 * it the active timehands. Along the way we might switch to a different
878 * timecounter and/or do seconds processing in NTP. Slightly magic.
879 */
880 static void
881 tc_windup(void)
882 {
883 struct bintime bt;
884 struct timehands *th, *tho;
885 uint64_t scale;
886 u_int delta, ncount, ogen;
887 int i, s_update;
888 time_t t;
889
890 KASSERT(mutex_owned(&timecounter_lock));
891
892 s_update = 0;
893
894 /*
895 * Make the next timehands a copy of the current one, but do not
896 * overwrite the generation or next pointer. While we update
897 * the contents, the generation must be zero. Ensure global
898 * visibility of the generation before proceeding.
899 */
900 tho = timehands;
901 th = tho->th_next;
902 ogen = th->th_generation;
903 th->th_generation = 0;
904 membar_producer();
905 bcopy(tho, th, offsetof(struct timehands, th_generation));
906
907 /*
908 * Capture a timecounter delta on the current timecounter and if
909 * changing timecounters, a counter value from the new timecounter.
910 * Update the offset fields accordingly.
911 */
912 delta = tc_delta(th);
913 if (th->th_counter != timecounter)
914 ncount = timecounter->tc_get_timecount(timecounter);
915 else
916 ncount = 0;
917 th->th_offset_count += delta;
918 bintime_addx(&th->th_offset, th->th_scale * delta);
919
920 /*
921 * Hardware latching timecounters may not generate interrupts on
922 * PPS events, so instead we poll them. There is a finite risk that
923 * the hardware might capture a count which is later than the one we
924 * got above, and therefore possibly in the next NTP second which might
925 * have a different rate than the current NTP second. It doesn't
926 * matter in practice.
927 */
928 if (tho->th_counter->tc_poll_pps)
929 tho->th_counter->tc_poll_pps(tho->th_counter);
930
931 /*
932 * Deal with NTP second processing. The for loop normally
933 * iterates at most once, but in extreme situations it might
934 * keep NTP sane if timeouts are not run for several seconds.
935 * At boot, the time step can be large when the TOD hardware
936 * has been read, so on really large steps, we call
937 * ntp_update_second only twice. We need to call it twice in
938 * case we missed a leap second.
939 * If NTP is not compiled in ntp_update_second still calculates
940 * the adjustment resulting from adjtime() calls.
941 */
942 bt = th->th_offset;
943 bintime_add(&bt, &timebasebin);
944 i = bt.sec - tho->th_microtime.tv_sec;
945 if (i > LARGE_STEP)
946 i = 2;
947 for (; i > 0; i--) {
948 t = bt.sec;
949 ntp_update_second(&th->th_adjustment, &bt.sec);
950 s_update = 1;
951 if (bt.sec != t)
952 timebasebin.sec += bt.sec - t;
953 }
954
955 /* Update the UTC timestamps used by the get*() functions. */
956 /* XXX shouldn't do this here. Should force non-`get' versions. */
957 bintime2timeval(&bt, &th->th_microtime);
958 bintime2timespec(&bt, &th->th_nanotime);
959 /* Now is a good time to change timecounters. */
960 if (th->th_counter != timecounter) {
961 th->th_counter = timecounter;
962 th->th_offset_count = ncount;
963 s_update = 1;
964 }
965
966 /*-
967 * Recalculate the scaling factor. We want the number of 1/2^64
968 * fractions of a second per period of the hardware counter, taking
969 * into account the th_adjustment factor which the NTP PLL/adjtime(2)
970 * processing provides us with.
971 *
972 * The th_adjustment is nanoseconds per second with 32 bit binary
973 * fraction and we want 64 bit binary fraction of second:
974 *
975 * x = a * 2^32 / 10^9 = a * 4.294967296
976 *
977 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
978 * we can only multiply by about 850 without overflowing, but that
979 * leaves suitably precise fractions for multiply before divide.
980 *
981 * Divide before multiply with a fraction of 2199/512 results in a
982 * systematic undercompensation of 10PPM of th_adjustment. On a
983 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable.
984 *
985 * We happily sacrifice the lowest of the 64 bits of our result
986 * to the goddess of code clarity.
987 *
988 */
989 if (s_update) {
990 scale = (uint64_t)1 << 63;
991 scale += (th->th_adjustment / 1024) * 2199;
992 scale /= th->th_counter->tc_frequency;
993 th->th_scale = scale * 2;
994 }
995 /*
996 * Now that the struct timehands is again consistent, set the new
997 * generation number, making sure to not make it zero. Ensure
998 * changes are globally visible before changing.
999 */
1000 if (++ogen == 0)
1001 ogen = 1;
1002 membar_producer();
1003 th->th_generation = ogen;
1004
1005 /*
1006 * Go live with the new struct timehands. Ensure changes are
1007 * globally visible before changing.
1008 */
1009 setrealuptime(th->th_microtime.tv_sec, th->th_offset.sec);
1010 membar_producer();
1011 timehands = th;
1012
1013 /*
1014 * Force users of the old timehand to move on. This is
1015 * necessary for MP systems; we need to ensure that the
1016 * consumers will move away from the old timehand before
1017 * we begin updating it again when we eventually wrap
1018 * around.
1019 */
1020 if (++tho->th_generation == 0)
1021 tho->th_generation = 1;
1022 }
1023
1024 /*
1025 * RFC 2783 PPS-API implementation.
1026 */
1027
1028 int
1029 pps_ioctl(u_long cmd, void *data, struct pps_state *pps)
1030 {
1031 pps_params_t *app;
1032 pps_info_t *pipi;
1033 #ifdef PPS_SYNC
1034 int *epi;
1035 #endif
1036
1037 KASSERT(mutex_owned(&timecounter_lock));
1038
1039 KASSERT(pps != NULL);
1040
1041 switch (cmd) {
1042 case PPS_IOC_CREATE:
1043 return 0;
1044 case PPS_IOC_DESTROY:
1045 return 0;
1046 case PPS_IOC_SETPARAMS:
1047 app = (pps_params_t *)data;
1048 if (app->mode & ~pps->ppscap)
1049 return EINVAL;
1050 pps->ppsparam = *app;
1051 return 0;
1052 case PPS_IOC_GETPARAMS:
1053 app = (pps_params_t *)data;
1054 *app = pps->ppsparam;
1055 app->api_version = PPS_API_VERS_1;
1056 return 0;
1057 case PPS_IOC_GETCAP:
1058 *(int*)data = pps->ppscap;
1059 return 0;
1060 case PPS_IOC_FETCH:
1061 pipi = (pps_info_t *)data;
1062 pps->ppsinfo.current_mode = pps->ppsparam.mode;
1063 *pipi = pps->ppsinfo;
1064 return 0;
1065 case PPS_IOC_KCBIND:
1066 #ifdef PPS_SYNC
1067 epi = (int *)data;
1068 /* XXX Only root should be able to do this */
1069 if (*epi & ~pps->ppscap)
1070 return EINVAL;
1071 pps->kcmode = *epi;
1072 return 0;
1073 #else
1074 return EOPNOTSUPP;
1075 #endif
1076 default:
1077 return EPASSTHROUGH;
1078 }
1079 }
1080
1081 void
1082 pps_init(struct pps_state *pps)
1083 {
1084
1085 KASSERT(mutex_owned(&timecounter_lock));
1086
1087 pps->ppscap |= PPS_TSFMT_TSPEC;
1088 if (pps->ppscap & PPS_CAPTUREASSERT)
1089 pps->ppscap |= PPS_OFFSETASSERT;
1090 if (pps->ppscap & PPS_CAPTURECLEAR)
1091 pps->ppscap |= PPS_OFFSETCLEAR;
1092 }
1093
1094 /*
1095 * capture a timetamp in the pps structure
1096 */
1097 void
1098 pps_capture(struct pps_state *pps)
1099 {
1100 struct timehands *th;
1101
1102 KASSERT(mutex_owned(&timecounter_lock));
1103 KASSERT(pps != NULL);
1104
1105 th = timehands;
1106 pps->capgen = th->th_generation;
1107 pps->capth = th;
1108 pps->capcount = (uint64_t)tc_delta(th) + th->th_offset_count;
1109 if (pps->capgen != th->th_generation)
1110 pps->capgen = 0;
1111 }
1112
1113 #ifdef PPS_DEBUG
1114 int ppsdebug = 0;
1115 #endif
1116
1117 /*
1118 * process a pps_capture()ed event
1119 */
1120 void
1121 pps_event(struct pps_state *pps, int event)
1122 {
1123 pps_ref_event(pps, event, NULL, PPS_REFEVNT_PPS|PPS_REFEVNT_CAPTURE);
1124 }
1125
1126 /*
1127 * extended pps api / kernel pll/fll entry point
1128 *
1129 * feed reference time stamps to PPS engine
1130 *
1131 * will simulate a PPS event and feed
1132 * the NTP PLL/FLL if requested.
1133 *
1134 * the ref time stamps should be roughly once
1135 * a second but do not need to be exactly in phase
1136 * with the UTC second but should be close to it.
1137 * this relaxation of requirements allows callout
1138 * driven timestamping mechanisms to feed to pps
1139 * capture/kernel pll logic.
1140 *
1141 * calling pattern is:
1142 * pps_capture() (for PPS_REFEVNT_{CAPTURE|CAPCUR})
1143 * read timestamp from reference source
1144 * pps_ref_event()
1145 *
1146 * supported refmodes:
1147 * PPS_REFEVNT_CAPTURE
1148 * use system timestamp of pps_capture()
1149 * PPS_REFEVNT_CURRENT
1150 * use system timestamp of this call
1151 * PPS_REFEVNT_CAPCUR
1152 * use average of read capture and current system time stamp
1153 * PPS_REFEVNT_PPS
1154 * assume timestamp on second mark - ref_ts is ignored
1155 *
1156 */
1157
1158 void
1159 pps_ref_event(struct pps_state *pps,
1160 int event,
1161 struct bintime *ref_ts,
1162 int refmode
1163 )
1164 {
1165 struct bintime bt; /* current time */
1166 struct bintime btd; /* time difference */
1167 struct bintime bt_ref; /* reference time */
1168 struct timespec ts, *tsp, *osp;
1169 struct timehands *th;
1170 uint64_t tcount, acount, dcount, *pcount;
1171 int foff, gen;
1172 #ifdef PPS_SYNC
1173 int fhard;
1174 #endif
1175 pps_seq_t *pseq;
1176
1177 KASSERT(mutex_owned(&timecounter_lock));
1178
1179 KASSERT(pps != NULL);
1180
1181 /* pick up current time stamp if needed */
1182 if (refmode & (PPS_REFEVNT_CURRENT|PPS_REFEVNT_CAPCUR)) {
1183 /* pick up current time stamp */
1184 th = timehands;
1185 gen = th->th_generation;
1186 tcount = (uint64_t)tc_delta(th) + th->th_offset_count;
1187 if (gen != th->th_generation)
1188 gen = 0;
1189
1190 /* If the timecounter was wound up underneath us, bail out. */
1191 if (pps->capgen == 0 ||
1192 pps->capgen != pps->capth->th_generation ||
1193 gen == 0 ||
1194 gen != pps->capgen) {
1195 #ifdef PPS_DEBUG
1196 if (ppsdebug & 0x1) {
1197 log(LOG_DEBUG,
1198 "pps_ref_event(pps=%p, event=%d, ...): DROP (wind-up)\n",
1199 pps, event);
1200 }
1201 #endif
1202 return;
1203 }
1204 } else {
1205 tcount = 0; /* keep GCC happy */
1206 }
1207
1208 #ifdef PPS_DEBUG
1209 if (ppsdebug & 0x1) {
1210 struct timespec tmsp;
1211
1212 if (ref_ts == NULL) {
1213 tmsp.tv_sec = 0;
1214 tmsp.tv_nsec = 0;
1215 } else {
1216 bintime2timespec(ref_ts, &tmsp);
1217 }
1218
1219 log(LOG_DEBUG,
1220 "pps_ref_event(pps=%p, event=%d, ref_ts=%"PRIi64
1221 ".%09"PRIi32", refmode=0x%1x)\n",
1222 pps, event, tmsp.tv_sec, (int32_t)tmsp.tv_nsec, refmode);
1223 }
1224 #endif
1225
1226 /* setup correct event references */
1227 if (event == PPS_CAPTUREASSERT) {
1228 tsp = &pps->ppsinfo.assert_timestamp;
1229 osp = &pps->ppsparam.assert_offset;
1230 foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
1231 #ifdef PPS_SYNC
1232 fhard = pps->kcmode & PPS_CAPTUREASSERT;
1233 #endif
1234 pcount = &pps->ppscount[0];
1235 pseq = &pps->ppsinfo.assert_sequence;
1236 } else {
1237 tsp = &pps->ppsinfo.clear_timestamp;
1238 osp = &pps->ppsparam.clear_offset;
1239 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
1240 #ifdef PPS_SYNC
1241 fhard = pps->kcmode & PPS_CAPTURECLEAR;
1242 #endif
1243 pcount = &pps->ppscount[1];
1244 pseq = &pps->ppsinfo.clear_sequence;
1245 }
1246
1247 /* determine system time stamp according to refmode */
1248 dcount = 0; /* keep GCC happy */
1249 switch (refmode & PPS_REFEVNT_RMASK) {
1250 case PPS_REFEVNT_CAPTURE:
1251 acount = pps->capcount; /* use capture timestamp */
1252 break;
1253
1254 case PPS_REFEVNT_CURRENT:
1255 acount = tcount; /* use current timestamp */
1256 break;
1257
1258 case PPS_REFEVNT_CAPCUR:
1259 /*
1260 * calculate counter value between pps_capture() and
1261 * pps_ref_event()
1262 */
1263 dcount = tcount - pps->capcount;
1264 acount = (dcount / 2) + pps->capcount;
1265 break;
1266
1267 default: /* ignore call error silently */
1268 return;
1269 }
1270
1271 /*
1272 * If the timecounter changed, we cannot compare the count values, so
1273 * we have to drop the rest of the PPS-stuff until the next event.
1274 */
1275 if (pps->ppstc != pps->capth->th_counter) {
1276 pps->ppstc = pps->capth->th_counter;
1277 pps->capcount = acount;
1278 *pcount = acount;
1279 pps->ppscount[2] = acount;
1280 #ifdef PPS_DEBUG
1281 if (ppsdebug & 0x1) {
1282 log(LOG_DEBUG,
1283 "pps_ref_event(pps=%p, event=%d, ...): DROP (time-counter change)\n",
1284 pps, event);
1285 }
1286 #endif
1287 return;
1288 }
1289
1290 pps->capcount = acount;
1291
1292 /* Convert the count to a bintime. */
1293 bt = pps->capth->th_offset;
1294 bintime_addx(&bt, pps->capth->th_scale * (acount - pps->capth->th_offset_count));
1295 bintime_add(&bt, &timebasebin);
1296
1297 if ((refmode & PPS_REFEVNT_PPS) == 0) {
1298 /* determine difference to reference time stamp */
1299 bt_ref = *ref_ts;
1300
1301 btd = bt;
1302 bintime_sub(&btd, &bt_ref);
1303
1304 /*
1305 * simulate a PPS timestamp by dropping the fraction
1306 * and applying the offset
1307 */
1308 if (bt.frac >= (uint64_t)1<<63) /* skip to nearest second */
1309 bt.sec++;
1310 bt.frac = 0;
1311 bintime_add(&bt, &btd);
1312 } else {
1313 /*
1314 * create ref_ts from current time -
1315 * we are supposed to be called on
1316 * the second mark
1317 */
1318 bt_ref = bt;
1319 if (bt_ref.frac >= (uint64_t)1<<63) /* skip to nearest second */
1320 bt_ref.sec++;
1321 bt_ref.frac = 0;
1322 }
1323
1324 /* convert bintime to timestamp */
1325 bintime2timespec(&bt, &ts);
1326
1327 /* If the timecounter was wound up underneath us, bail out. */
1328 if (pps->capgen != pps->capth->th_generation)
1329 return;
1330
1331 /* store time stamp */
1332 *pcount = pps->capcount;
1333 (*pseq)++;
1334 *tsp = ts;
1335
1336 /* add offset correction */
1337 if (foff) {
1338 timespecadd(tsp, osp, tsp);
1339 if (tsp->tv_nsec < 0) {
1340 tsp->tv_nsec += 1000000000;
1341 tsp->tv_sec -= 1;
1342 }
1343 }
1344
1345 #ifdef PPS_DEBUG
1346 if (ppsdebug & 0x2) {
1347 struct timespec ts2;
1348 struct timespec ts3;
1349
1350 bintime2timespec(&bt_ref, &ts2);
1351
1352 bt.sec = 0;
1353 bt.frac = 0;
1354
1355 if (refmode & PPS_REFEVNT_CAPCUR) {
1356 bintime_addx(&bt, pps->capth->th_scale * dcount);
1357 }
1358 bintime2timespec(&bt, &ts3);
1359
1360 log(LOG_DEBUG, "ref_ts=%"PRIi64".%09"PRIi32
1361 ", ts=%"PRIi64".%09"PRIi32", read latency=%"PRIi64" ns\n",
1362 ts2.tv_sec, (int32_t)ts2.tv_nsec,
1363 tsp->tv_sec, (int32_t)tsp->tv_nsec,
1364 timespec2ns(&ts3));
1365 }
1366 #endif
1367
1368 #ifdef PPS_SYNC
1369 if (fhard) {
1370 uint64_t scale;
1371 uint64_t div;
1372
1373 /*
1374 * Feed the NTP PLL/FLL.
1375 * The FLL wants to know how many (hardware) nanoseconds
1376 * elapsed since the previous event (mod 1 second) thus
1377 * we are actually looking at the frequency difference scaled
1378 * in nsec.
1379 * As the counter time stamps are not truly at 1Hz
1380 * we need to scale the count by the elapsed
1381 * reference time.
1382 * valid sampling interval: [0.5..2[ sec
1383 */
1384
1385 /* calculate elapsed raw count */
1386 tcount = pps->capcount - pps->ppscount[2];
1387 pps->ppscount[2] = pps->capcount;
1388 tcount &= pps->capth->th_counter->tc_counter_mask;
1389
1390 /* calculate elapsed ref time */
1391 btd = bt_ref;
1392 bintime_sub(&btd, &pps->ref_time);
1393 pps->ref_time = bt_ref;
1394
1395 /* check that we stay below 2 sec */
1396 if (btd.sec < 0 || btd.sec > 1)
1397 return;
1398
1399 /* we want at least 0.5 sec between samples */
1400 if (btd.sec == 0 && btd.frac < (uint64_t)1<<63)
1401 return;
1402
1403 /*
1404 * calculate cycles per period by multiplying
1405 * the frequency with the elapsed period
1406 * we pick a fraction of 30 bits
1407 * ~1ns resolution for elapsed time
1408 */
1409 div = (uint64_t)btd.sec << 30;
1410 div |= (btd.frac >> 34) & (((uint64_t)1 << 30) - 1);
1411 div *= pps->capth->th_counter->tc_frequency;
1412 div >>= 30;
1413
1414 if (div == 0) /* safeguard */
1415 return;
1416
1417 scale = (uint64_t)1 << 63;
1418 scale /= div;
1419 scale *= 2;
1420
1421 bt.sec = 0;
1422 bt.frac = 0;
1423 bintime_addx(&bt, scale * tcount);
1424 bintime2timespec(&bt, &ts);
1425
1426 #ifdef PPS_DEBUG
1427 if (ppsdebug & 0x4) {
1428 struct timespec ts2;
1429 int64_t df;
1430
1431 bintime2timespec(&bt_ref, &ts2);
1432 df = timespec2ns(&ts);
1433 if (df > 500000000)
1434 df -= 1000000000;
1435 log(LOG_DEBUG, "hardpps: ref_ts=%"PRIi64
1436 ".%09"PRIi32", ts=%"PRIi64".%09"PRIi32
1437 ", freqdiff=%"PRIi64" ns/s\n",
1438 ts2.tv_sec, (int32_t)ts2.tv_nsec,
1439 tsp->tv_sec, (int32_t)tsp->tv_nsec,
1440 df);
1441 }
1442 #endif
1443
1444 hardpps(tsp, timespec2ns(&ts));
1445 }
1446 #endif
1447 }
1448
1449 /*
1450 * Timecounters need to be updated every so often to prevent the hardware
1451 * counter from overflowing. Updating also recalculates the cached values
1452 * used by the get*() family of functions, so their precision depends on
1453 * the update frequency.
1454 */
1455
1456 static int tc_tick;
1457
1458 void
1459 tc_ticktock(void)
1460 {
1461 static int count;
1462
1463 if (++count < tc_tick)
1464 return;
1465 count = 0;
1466 mutex_spin_enter(&timecounter_lock);
1467 if (__predict_false(timecounter_bad != 0)) {
1468 /* An existing timecounter has gone bad, pick a new one. */
1469 (void)atomic_swap_uint(&timecounter_bad, 0);
1470 if (timecounter->tc_quality < 0) {
1471 tc_pick();
1472 }
1473 }
1474 tc_windup();
1475 mutex_spin_exit(&timecounter_lock);
1476 }
1477
1478 void
1479 inittimecounter(void)
1480 {
1481 u_int p;
1482
1483 mutex_init(&timecounter_lock, MUTEX_DEFAULT, IPL_HIGH);
1484
1485 /*
1486 * Set the initial timeout to
1487 * max(1, <approx. number of hardclock ticks in a millisecond>).
1488 * People should probably not use the sysctl to set the timeout
1489 * to smaller than its initial value, since that value is the
1490 * smallest reasonable one. If they want better timestamps they
1491 * should use the non-"get"* functions.
1492 */
1493 if (hz > 1000)
1494 tc_tick = (hz + 500) / 1000;
1495 else
1496 tc_tick = 1;
1497 p = (tc_tick * 1000000) / hz;
1498 aprint_verbose("timecounter: Timecounters tick every %d.%03u msec\n",
1499 p / 1000, p % 1000);
1500
1501 /* warm up new timecounter (again) and get rolling. */
1502 (void)timecounter->tc_get_timecount(timecounter);
1503 (void)timecounter->tc_get_timecount(timecounter);
1504 }
1505