kern_tc.c revision 1.39 1 /* $NetBSD: kern_tc.c,v 1.39 2009/05/23 17:08:04 ad Exp $ */
2
3 /*-
4 * Copyright (c) 2008, 2009 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*-
33 * ----------------------------------------------------------------------------
34 * "THE BEER-WARE LICENSE" (Revision 42):
35 * <phk (at) FreeBSD.ORG> wrote this file. As long as you retain this notice you
36 * can do whatever you want with this stuff. If we meet some day, and you think
37 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
38 * ---------------------------------------------------------------------------
39 */
40
41 #include <sys/cdefs.h>
42 /* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */
43 __KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.39 2009/05/23 17:08:04 ad Exp $");
44
45 #include "opt_ntp.h"
46
47 #include <sys/param.h>
48 #include <sys/kernel.h>
49 #include <sys/reboot.h> /* XXX just to get AB_VERBOSE */
50 #include <sys/sysctl.h>
51 #include <sys/syslog.h>
52 #include <sys/systm.h>
53 #include <sys/timepps.h>
54 #include <sys/timetc.h>
55 #include <sys/timex.h>
56 #include <sys/evcnt.h>
57 #include <sys/kauth.h>
58 #include <sys/mutex.h>
59 #include <sys/atomic.h>
60 #include <sys/xcall.h>
61
62 /*
63 * A large step happens on boot. This constant detects such steps.
64 * It is relatively small so that ntp_update_second gets called enough
65 * in the typical 'missed a couple of seconds' case, but doesn't loop
66 * forever when the time step is large.
67 */
68 #define LARGE_STEP 200
69
70 /*
71 * Implement a dummy timecounter which we can use until we get a real one
72 * in the air. This allows the console and other early stuff to use
73 * time services.
74 */
75
76 static u_int
77 dummy_get_timecount(struct timecounter *tc)
78 {
79 static u_int now;
80
81 return (++now);
82 }
83
84 static struct timecounter dummy_timecounter = {
85 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000, NULL, NULL,
86 };
87
88 struct timehands {
89 /* These fields must be initialized by the driver. */
90 struct timecounter *th_counter;
91 int64_t th_adjustment;
92 u_int64_t th_scale;
93 u_int th_offset_count;
94 struct bintime th_offset;
95 struct timeval th_microtime;
96 struct timespec th_nanotime;
97 /* Fields not to be copied in tc_windup start with th_generation. */
98 volatile u_int th_generation;
99 struct timehands *th_next;
100 };
101
102 static struct timehands th0;
103 static struct timehands th9 = { .th_next = &th0, };
104 static struct timehands th8 = { .th_next = &th9, };
105 static struct timehands th7 = { .th_next = &th8, };
106 static struct timehands th6 = { .th_next = &th7, };
107 static struct timehands th5 = { .th_next = &th6, };
108 static struct timehands th4 = { .th_next = &th5, };
109 static struct timehands th3 = { .th_next = &th4, };
110 static struct timehands th2 = { .th_next = &th3, };
111 static struct timehands th1 = { .th_next = &th2, };
112 static struct timehands th0 = {
113 .th_counter = &dummy_timecounter,
114 .th_scale = (uint64_t)-1 / 1000000,
115 .th_offset = { .sec = 1, .frac = 0 },
116 .th_generation = 1,
117 .th_next = &th1,
118 };
119
120 static struct timehands *volatile timehands = &th0;
121 struct timecounter *timecounter = &dummy_timecounter;
122 static struct timecounter *timecounters = &dummy_timecounter;
123
124 time_t time_second = 1;
125 time_t time_uptime = 1;
126
127 static struct bintime timebasebin;
128
129 static int timestepwarnings;
130
131 kmutex_t timecounter_lock;
132 static u_int timecounter_mods;
133 static volatile int timecounter_removals = 1;
134 static u_int timecounter_bad;
135
136 #ifdef __FreeBSD__
137 SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW,
138 ×tepwarnings, 0, "");
139 #endif /* __FreeBSD__ */
140
141 /*
142 * sysctl helper routine for kern.timercounter.hardware
143 */
144 static int
145 sysctl_kern_timecounter_hardware(SYSCTLFN_ARGS)
146 {
147 struct sysctlnode node;
148 int error;
149 char newname[MAX_TCNAMELEN];
150 struct timecounter *newtc, *tc;
151
152 tc = timecounter;
153
154 strlcpy(newname, tc->tc_name, sizeof(newname));
155
156 node = *rnode;
157 node.sysctl_data = newname;
158 node.sysctl_size = sizeof(newname);
159
160 error = sysctl_lookup(SYSCTLFN_CALL(&node));
161
162 if (error ||
163 newp == NULL ||
164 strncmp(newname, tc->tc_name, sizeof(newname)) == 0)
165 return error;
166
167 if (l != NULL && (error = kauth_authorize_system(l->l_cred,
168 KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_TIMECOUNTERS, newname,
169 NULL, NULL)) != 0)
170 return (error);
171
172 if (!cold)
173 mutex_spin_enter(&timecounter_lock);
174 error = EINVAL;
175 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
176 if (strcmp(newname, newtc->tc_name) != 0)
177 continue;
178 /* Warm up new timecounter. */
179 (void)newtc->tc_get_timecount(newtc);
180 (void)newtc->tc_get_timecount(newtc);
181 timecounter = newtc;
182 error = 0;
183 break;
184 }
185 if (!cold)
186 mutex_spin_exit(&timecounter_lock);
187 return error;
188 }
189
190 static int
191 sysctl_kern_timecounter_choice(SYSCTLFN_ARGS)
192 {
193 char buf[MAX_TCNAMELEN+48];
194 char *where;
195 const char *spc;
196 struct timecounter *tc;
197 size_t needed, left, slen;
198 int error, mods;
199
200 if (newp != NULL)
201 return (EPERM);
202 if (namelen != 0)
203 return (EINVAL);
204
205 mutex_spin_enter(&timecounter_lock);
206 retry:
207 spc = "";
208 error = 0;
209 needed = 0;
210 left = *oldlenp;
211 where = oldp;
212 for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) {
213 if (where == NULL) {
214 needed += sizeof(buf); /* be conservative */
215 } else {
216 slen = snprintf(buf, sizeof(buf), "%s%s(q=%d, f=%" PRId64
217 " Hz)", spc, tc->tc_name, tc->tc_quality,
218 tc->tc_frequency);
219 if (left < slen + 1)
220 break;
221 mods = timecounter_mods;
222 mutex_spin_exit(&timecounter_lock);
223 error = copyout(buf, where, slen + 1);
224 mutex_spin_enter(&timecounter_lock);
225 if (mods != timecounter_mods) {
226 goto retry;
227 }
228 spc = " ";
229 where += slen;
230 needed += slen;
231 left -= slen;
232 }
233 }
234 mutex_spin_exit(&timecounter_lock);
235
236 *oldlenp = needed;
237 return (error);
238 }
239
240 SYSCTL_SETUP(sysctl_timecounter_setup, "sysctl timecounter setup")
241 {
242 const struct sysctlnode *node;
243
244 sysctl_createv(clog, 0, NULL, &node,
245 CTLFLAG_PERMANENT,
246 CTLTYPE_NODE, "timecounter",
247 SYSCTL_DESCR("time counter information"),
248 NULL, 0, NULL, 0,
249 CTL_KERN, CTL_CREATE, CTL_EOL);
250
251 if (node != NULL) {
252 sysctl_createv(clog, 0, NULL, NULL,
253 CTLFLAG_PERMANENT,
254 CTLTYPE_STRING, "choice",
255 SYSCTL_DESCR("available counters"),
256 sysctl_kern_timecounter_choice, 0, NULL, 0,
257 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
258
259 sysctl_createv(clog, 0, NULL, NULL,
260 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
261 CTLTYPE_STRING, "hardware",
262 SYSCTL_DESCR("currently active time counter"),
263 sysctl_kern_timecounter_hardware, 0, NULL, MAX_TCNAMELEN,
264 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
265
266 sysctl_createv(clog, 0, NULL, NULL,
267 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
268 CTLTYPE_INT, "timestepwarnings",
269 SYSCTL_DESCR("log time steps"),
270 NULL, 0, ×tepwarnings, 0,
271 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
272 }
273 }
274
275 #ifdef TC_COUNTERS
276 #define TC_STATS(name) \
277 static struct evcnt n##name = \
278 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "timecounter", #name); \
279 EVCNT_ATTACH_STATIC(n##name)
280 TC_STATS(binuptime); TC_STATS(nanouptime); TC_STATS(microuptime);
281 TC_STATS(bintime); TC_STATS(nanotime); TC_STATS(microtime);
282 TC_STATS(getbinuptime); TC_STATS(getnanouptime); TC_STATS(getmicrouptime);
283 TC_STATS(getbintime); TC_STATS(getnanotime); TC_STATS(getmicrotime);
284 TC_STATS(setclock);
285 #define TC_COUNT(var) var.ev_count++
286 #undef TC_STATS
287 #else
288 #define TC_COUNT(var) /* nothing */
289 #endif /* TC_COUNTERS */
290
291 static void tc_windup(void);
292
293 /*
294 * Return the difference between the timehands' counter value now and what
295 * was when we copied it to the timehands' offset_count.
296 */
297 static __inline u_int
298 tc_delta(struct timehands *th)
299 {
300 struct timecounter *tc;
301
302 tc = th->th_counter;
303 return ((tc->tc_get_timecount(tc) -
304 th->th_offset_count) & tc->tc_counter_mask);
305 }
306
307 /*
308 * Functions for reading the time. We have to loop until we are sure that
309 * the timehands that we operated on was not updated under our feet. See
310 * the comment in <sys/timevar.h> for a description of these 12 functions.
311 */
312
313 void
314 binuptime(struct bintime *bt)
315 {
316 struct timehands *th;
317 lwp_t *l;
318 u_int lgen, gen;
319
320 TC_COUNT(nbinuptime);
321
322 /*
323 * Provide exclusion against tc_detach().
324 *
325 * We record the number of timecounter removals before accessing
326 * timecounter state. Note that the LWP can be using multiple
327 * "generations" at once, due to interrupts (interrupted while in
328 * this function). Hardware interrupts will borrow the interrupted
329 * LWP's l_tcgen value for this purpose, and can themselves be
330 * interrupted by higher priority interrupts. In this case we need
331 * to ensure that the oldest generation in use is recorded.
332 *
333 * splsched() is too expensive to use, so we take care to structure
334 * this code in such a way that it is not required. Likewise, we
335 * do not disable preemption.
336 *
337 * Memory barriers are also too expensive to use for such a
338 * performance critical function. The good news is that we do not
339 * need memory barriers for this type of exclusion, as the thread
340 * updating timecounter_removals will issue a broadcast cross call
341 * before inspecting our l_tcgen value (this elides memory ordering
342 * issues).
343 */
344 l = curlwp;
345 lgen = l->l_tcgen;
346 if (__predict_true(lgen == 0)) {
347 l->l_tcgen = timecounter_removals;
348 }
349 __insn_barrier();
350
351 do {
352 th = timehands;
353 gen = th->th_generation;
354 *bt = th->th_offset;
355 bintime_addx(bt, th->th_scale * tc_delta(th));
356 } while (gen == 0 || gen != th->th_generation);
357
358 __insn_barrier();
359 l->l_tcgen = lgen;
360 }
361
362 void
363 nanouptime(struct timespec *tsp)
364 {
365 struct bintime bt;
366
367 TC_COUNT(nnanouptime);
368 binuptime(&bt);
369 bintime2timespec(&bt, tsp);
370 }
371
372 void
373 microuptime(struct timeval *tvp)
374 {
375 struct bintime bt;
376
377 TC_COUNT(nmicrouptime);
378 binuptime(&bt);
379 bintime2timeval(&bt, tvp);
380 }
381
382 void
383 bintime(struct bintime *bt)
384 {
385
386 TC_COUNT(nbintime);
387 binuptime(bt);
388 bintime_add(bt, &timebasebin);
389 }
390
391 void
392 nanotime(struct timespec *tsp)
393 {
394 struct bintime bt;
395
396 TC_COUNT(nnanotime);
397 bintime(&bt);
398 bintime2timespec(&bt, tsp);
399 }
400
401 void
402 microtime(struct timeval *tvp)
403 {
404 struct bintime bt;
405
406 TC_COUNT(nmicrotime);
407 bintime(&bt);
408 bintime2timeval(&bt, tvp);
409 }
410
411 void
412 getbinuptime(struct bintime *bt)
413 {
414 struct timehands *th;
415 u_int gen;
416
417 TC_COUNT(ngetbinuptime);
418 do {
419 th = timehands;
420 gen = th->th_generation;
421 *bt = th->th_offset;
422 } while (gen == 0 || gen != th->th_generation);
423 }
424
425 void
426 getnanouptime(struct timespec *tsp)
427 {
428 struct timehands *th;
429 u_int gen;
430
431 TC_COUNT(ngetnanouptime);
432 do {
433 th = timehands;
434 gen = th->th_generation;
435 bintime2timespec(&th->th_offset, tsp);
436 } while (gen == 0 || gen != th->th_generation);
437 }
438
439 void
440 getmicrouptime(struct timeval *tvp)
441 {
442 struct timehands *th;
443 u_int gen;
444
445 TC_COUNT(ngetmicrouptime);
446 do {
447 th = timehands;
448 gen = th->th_generation;
449 bintime2timeval(&th->th_offset, tvp);
450 } while (gen == 0 || gen != th->th_generation);
451 }
452
453 void
454 getbintime(struct bintime *bt)
455 {
456 struct timehands *th;
457 u_int gen;
458
459 TC_COUNT(ngetbintime);
460 do {
461 th = timehands;
462 gen = th->th_generation;
463 *bt = th->th_offset;
464 } while (gen == 0 || gen != th->th_generation);
465 bintime_add(bt, &timebasebin);
466 }
467
468 void
469 getnanotime(struct timespec *tsp)
470 {
471 struct timehands *th;
472 u_int gen;
473
474 TC_COUNT(ngetnanotime);
475 do {
476 th = timehands;
477 gen = th->th_generation;
478 *tsp = th->th_nanotime;
479 } while (gen == 0 || gen != th->th_generation);
480 }
481
482 void
483 getmicrotime(struct timeval *tvp)
484 {
485 struct timehands *th;
486 u_int gen;
487
488 TC_COUNT(ngetmicrotime);
489 do {
490 th = timehands;
491 gen = th->th_generation;
492 *tvp = th->th_microtime;
493 } while (gen == 0 || gen != th->th_generation);
494 }
495
496 /*
497 * Initialize a new timecounter and possibly use it.
498 */
499 void
500 tc_init(struct timecounter *tc)
501 {
502 u_int u;
503
504 u = tc->tc_frequency / tc->tc_counter_mask;
505 /* XXX: We need some margin here, 10% is a guess */
506 u *= 11;
507 u /= 10;
508 if (u > hz && tc->tc_quality >= 0) {
509 tc->tc_quality = -2000;
510 aprint_verbose(
511 "timecounter: Timecounter \"%s\" frequency %ju Hz",
512 tc->tc_name, (uintmax_t)tc->tc_frequency);
513 aprint_verbose(" -- Insufficient hz, needs at least %u\n", u);
514 } else if (tc->tc_quality >= 0 || bootverbose) {
515 aprint_verbose(
516 "timecounter: Timecounter \"%s\" frequency %ju Hz "
517 "quality %d\n", tc->tc_name, (uintmax_t)tc->tc_frequency,
518 tc->tc_quality);
519 }
520
521 mutex_spin_enter(&timecounter_lock);
522 tc->tc_next = timecounters;
523 timecounters = tc;
524 timecounter_mods++;
525 /*
526 * Never automatically use a timecounter with negative quality.
527 * Even though we run on the dummy counter, switching here may be
528 * worse since this timecounter may not be monotonous.
529 */
530 if (tc->tc_quality >= 0 && (tc->tc_quality > timecounter->tc_quality ||
531 (tc->tc_quality == timecounter->tc_quality &&
532 tc->tc_frequency > timecounter->tc_frequency))) {
533 (void)tc->tc_get_timecount(tc);
534 (void)tc->tc_get_timecount(tc);
535 timecounter = tc;
536 tc_windup();
537 }
538 mutex_spin_exit(&timecounter_lock);
539 }
540
541 /*
542 * Pick a new timecounter due to the existing counter going bad.
543 */
544 static void
545 tc_pick(void)
546 {
547 struct timecounter *best, *tc;
548
549 KASSERT(mutex_owned(&timecounter_lock));
550
551 for (best = tc = timecounters; tc != NULL; tc = tc->tc_next) {
552 if (tc->tc_quality > best->tc_quality)
553 best = tc;
554 else if (tc->tc_quality < best->tc_quality)
555 continue;
556 else if (tc->tc_frequency > best->tc_frequency)
557 best = tc;
558 }
559 (void)best->tc_get_timecount(best);
560 (void)best->tc_get_timecount(best);
561 timecounter = best;
562 }
563
564 /*
565 * A timecounter has gone bad, arrange to pick a new one at the next
566 * clock tick.
567 */
568 void
569 tc_gonebad(struct timecounter *tc)
570 {
571
572 tc->tc_quality = -100;
573 membar_producer();
574 atomic_inc_uint(&timecounter_bad);
575 }
576
577 /*
578 * Stop using a timecounter and remove it from the timecounters list.
579 */
580 int
581 tc_detach(struct timecounter *target)
582 {
583 struct timecounter *tc;
584 struct timecounter **tcp = NULL;
585 int removals;
586 uint64_t where;
587 lwp_t *l;
588
589 /* First, find the timecounter. */
590 mutex_spin_enter(&timecounter_lock);
591 for (tcp = &timecounters, tc = timecounters;
592 tc != NULL;
593 tcp = &tc->tc_next, tc = tc->tc_next) {
594 if (tc == target)
595 break;
596 }
597 if (tc == NULL) {
598 mutex_spin_exit(&timecounter_lock);
599 return ESRCH;
600 }
601
602 /* And now, remove it. */
603 *tcp = tc->tc_next;
604 if (timecounter == target) {
605 tc_pick();
606 tc_windup();
607 }
608 timecounter_mods++;
609 removals = timecounter_removals++;
610 mutex_spin_exit(&timecounter_lock);
611
612 /*
613 * We now have to determine if any threads in the system are still
614 * making use of this timecounter.
615 *
616 * We issue a broadcast cross call to elide memory ordering issues,
617 * then scan all LWPs in the system looking at each's timecounter
618 * generation number. We need to see a value of zero (not actively
619 * using a timecounter) or a value greater than our removal value.
620 *
621 * We may race with threads that read `timecounter_removals' and
622 * and then get preempted before updating `l_tcgen'. This is not
623 * a problem, since it means that these threads have not yet started
624 * accessing timecounter state. All we do need is one clean
625 * snapshot of the system where every thread appears not to be using
626 * old timecounter state.
627 */
628 for (;;) {
629 where = xc_broadcast(0, (xcfunc_t)nullop, NULL, NULL);
630 xc_wait(where);
631
632 mutex_enter(proc_lock);
633 LIST_FOREACH(l, &alllwp, l_list) {
634 if (l->l_tcgen == 0 || l->l_tcgen > removals) {
635 /*
636 * Not using timecounter or old timecounter
637 * state at time of our xcall or later.
638 */
639 continue;
640 }
641 break;
642 }
643 mutex_exit(proc_lock);
644
645 /*
646 * If the timecounter is still in use, wait at least 10ms
647 * before retrying.
648 */
649 if (l == NULL) {
650 return 0;
651 }
652 (void)kpause("tcdetach", false, mstohz(10), NULL);
653 }
654 }
655
656 /* Report the frequency of the current timecounter. */
657 u_int64_t
658 tc_getfrequency(void)
659 {
660
661 return (timehands->th_counter->tc_frequency);
662 }
663
664 /*
665 * Step our concept of UTC. This is done by modifying our estimate of
666 * when we booted.
667 */
668 void
669 tc_setclock(const struct timespec *ts)
670 {
671 struct timespec ts2;
672 struct bintime bt, bt2;
673
674 mutex_spin_enter(&timecounter_lock);
675 TC_COUNT(nsetclock);
676 binuptime(&bt2);
677 timespec2bintime(ts, &bt);
678 bintime_sub(&bt, &bt2);
679 bintime_add(&bt2, &timebasebin);
680 timebasebin = bt;
681 tc_windup();
682 mutex_spin_exit(&timecounter_lock);
683
684 if (timestepwarnings) {
685 bintime2timespec(&bt2, &ts2);
686 log(LOG_INFO, "Time stepped from %lld.%09ld to %lld.%09ld\n",
687 (long long)ts2.tv_sec, ts2.tv_nsec,
688 (long long)ts->tv_sec, ts->tv_nsec);
689 }
690 }
691
692 /*
693 * Initialize the next struct timehands in the ring and make
694 * it the active timehands. Along the way we might switch to a different
695 * timecounter and/or do seconds processing in NTP. Slightly magic.
696 */
697 static void
698 tc_windup(void)
699 {
700 struct bintime bt;
701 struct timehands *th, *tho;
702 u_int64_t scale;
703 u_int delta, ncount, ogen;
704 int i, s_update;
705 time_t t;
706
707 KASSERT(mutex_owned(&timecounter_lock));
708
709 s_update = 0;
710
711 /*
712 * Make the next timehands a copy of the current one, but do not
713 * overwrite the generation or next pointer. While we update
714 * the contents, the generation must be zero. Ensure global
715 * visibility of the generation before proceeding.
716 */
717 tho = timehands;
718 th = tho->th_next;
719 ogen = th->th_generation;
720 th->th_generation = 0;
721 membar_producer();
722 bcopy(tho, th, offsetof(struct timehands, th_generation));
723
724 /*
725 * Capture a timecounter delta on the current timecounter and if
726 * changing timecounters, a counter value from the new timecounter.
727 * Update the offset fields accordingly.
728 */
729 delta = tc_delta(th);
730 if (th->th_counter != timecounter)
731 ncount = timecounter->tc_get_timecount(timecounter);
732 else
733 ncount = 0;
734 th->th_offset_count += delta;
735 th->th_offset_count &= th->th_counter->tc_counter_mask;
736 bintime_addx(&th->th_offset, th->th_scale * delta);
737
738 /*
739 * Hardware latching timecounters may not generate interrupts on
740 * PPS events, so instead we poll them. There is a finite risk that
741 * the hardware might capture a count which is later than the one we
742 * got above, and therefore possibly in the next NTP second which might
743 * have a different rate than the current NTP second. It doesn't
744 * matter in practice.
745 */
746 if (tho->th_counter->tc_poll_pps)
747 tho->th_counter->tc_poll_pps(tho->th_counter);
748
749 /*
750 * Deal with NTP second processing. The for loop normally
751 * iterates at most once, but in extreme situations it might
752 * keep NTP sane if timeouts are not run for several seconds.
753 * At boot, the time step can be large when the TOD hardware
754 * has been read, so on really large steps, we call
755 * ntp_update_second only twice. We need to call it twice in
756 * case we missed a leap second.
757 * If NTP is not compiled in ntp_update_second still calculates
758 * the adjustment resulting from adjtime() calls.
759 */
760 bt = th->th_offset;
761 bintime_add(&bt, &timebasebin);
762 i = bt.sec - tho->th_microtime.tv_sec;
763 if (i > LARGE_STEP)
764 i = 2;
765 for (; i > 0; i--) {
766 t = bt.sec;
767 ntp_update_second(&th->th_adjustment, &bt.sec);
768 s_update = 1;
769 if (bt.sec != t)
770 timebasebin.sec += bt.sec - t;
771 }
772
773 /* Update the UTC timestamps used by the get*() functions. */
774 /* XXX shouldn't do this here. Should force non-`get' versions. */
775 bintime2timeval(&bt, &th->th_microtime);
776 bintime2timespec(&bt, &th->th_nanotime);
777 /* Now is a good time to change timecounters. */
778 if (th->th_counter != timecounter) {
779 th->th_counter = timecounter;
780 th->th_offset_count = ncount;
781 s_update = 1;
782 }
783
784 /*-
785 * Recalculate the scaling factor. We want the number of 1/2^64
786 * fractions of a second per period of the hardware counter, taking
787 * into account the th_adjustment factor which the NTP PLL/adjtime(2)
788 * processing provides us with.
789 *
790 * The th_adjustment is nanoseconds per second with 32 bit binary
791 * fraction and we want 64 bit binary fraction of second:
792 *
793 * x = a * 2^32 / 10^9 = a * 4.294967296
794 *
795 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
796 * we can only multiply by about 850 without overflowing, but that
797 * leaves suitably precise fractions for multiply before divide.
798 *
799 * Divide before multiply with a fraction of 2199/512 results in a
800 * systematic undercompensation of 10PPM of th_adjustment. On a
801 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable.
802 *
803 * We happily sacrifice the lowest of the 64 bits of our result
804 * to the goddess of code clarity.
805 *
806 */
807 if (s_update) {
808 scale = (u_int64_t)1 << 63;
809 scale += (th->th_adjustment / 1024) * 2199;
810 scale /= th->th_counter->tc_frequency;
811 th->th_scale = scale * 2;
812 }
813 /*
814 * Now that the struct timehands is again consistent, set the new
815 * generation number, making sure to not make it zero. Ensure
816 * changes are globally visible before changing.
817 */
818 if (++ogen == 0)
819 ogen = 1;
820 membar_producer();
821 th->th_generation = ogen;
822
823 /*
824 * Go live with the new struct timehands. Ensure changes are
825 * globally visible before changing.
826 */
827 time_second = th->th_microtime.tv_sec;
828 time_uptime = th->th_offset.sec;
829 membar_producer();
830 timehands = th;
831
832 /*
833 * Force users of the old timehand to move on. This is
834 * necessary for MP systems; we need to ensure that the
835 * consumers will move away from the old timehand before
836 * we begin updating it again when we eventually wrap
837 * around.
838 */
839 if (++tho->th_generation == 0)
840 tho->th_generation = 1;
841 }
842
843 /*
844 * RFC 2783 PPS-API implementation.
845 */
846
847 int
848 pps_ioctl(u_long cmd, void *data, struct pps_state *pps)
849 {
850 pps_params_t *app;
851 pps_info_t *pipi;
852 #ifdef PPS_SYNC
853 int *epi;
854 #endif
855
856 KASSERT(mutex_owned(&timecounter_lock));
857
858 KASSERT(pps != NULL); /* XXX ("NULL pps pointer in pps_ioctl") */
859 switch (cmd) {
860 case PPS_IOC_CREATE:
861 return (0);
862 case PPS_IOC_DESTROY:
863 return (0);
864 case PPS_IOC_SETPARAMS:
865 app = (pps_params_t *)data;
866 if (app->mode & ~pps->ppscap)
867 return (EINVAL);
868 pps->ppsparam = *app;
869 return (0);
870 case PPS_IOC_GETPARAMS:
871 app = (pps_params_t *)data;
872 *app = pps->ppsparam;
873 app->api_version = PPS_API_VERS_1;
874 return (0);
875 case PPS_IOC_GETCAP:
876 *(int*)data = pps->ppscap;
877 return (0);
878 case PPS_IOC_FETCH:
879 pipi = (pps_info_t *)data;
880 pps->ppsinfo.current_mode = pps->ppsparam.mode;
881 *pipi = pps->ppsinfo;
882 return (0);
883 case PPS_IOC_KCBIND:
884 #ifdef PPS_SYNC
885 epi = (int *)data;
886 /* XXX Only root should be able to do this */
887 if (*epi & ~pps->ppscap)
888 return (EINVAL);
889 pps->kcmode = *epi;
890 return (0);
891 #else
892 return (EOPNOTSUPP);
893 #endif
894 default:
895 return (EPASSTHROUGH);
896 }
897 }
898
899 void
900 pps_init(struct pps_state *pps)
901 {
902
903 KASSERT(mutex_owned(&timecounter_lock));
904
905 pps->ppscap |= PPS_TSFMT_TSPEC;
906 if (pps->ppscap & PPS_CAPTUREASSERT)
907 pps->ppscap |= PPS_OFFSETASSERT;
908 if (pps->ppscap & PPS_CAPTURECLEAR)
909 pps->ppscap |= PPS_OFFSETCLEAR;
910 }
911
912 void
913 pps_capture(struct pps_state *pps)
914 {
915 struct timehands *th;
916
917 KASSERT(mutex_owned(&timecounter_lock));
918 KASSERT(pps != NULL);
919
920 th = timehands;
921 pps->capgen = th->th_generation;
922 pps->capth = th;
923 pps->capcount = th->th_counter->tc_get_timecount(th->th_counter);
924 if (pps->capgen != th->th_generation)
925 pps->capgen = 0;
926 }
927
928 void
929 pps_event(struct pps_state *pps, int event)
930 {
931 struct bintime bt;
932 struct timespec ts, *tsp, *osp;
933 u_int tcount, *pcount;
934 int foff, fhard;
935 pps_seq_t *pseq;
936
937 KASSERT(mutex_owned(&timecounter_lock));
938
939 KASSERT(pps != NULL); /* XXX ("NULL pps pointer in pps_event") */
940 /* If the timecounter was wound up underneath us, bail out. */
941 if (pps->capgen == 0 || pps->capgen != pps->capth->th_generation)
942 return;
943
944 /* Things would be easier with arrays. */
945 if (event == PPS_CAPTUREASSERT) {
946 tsp = &pps->ppsinfo.assert_timestamp;
947 osp = &pps->ppsparam.assert_offset;
948 foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
949 fhard = pps->kcmode & PPS_CAPTUREASSERT;
950 pcount = &pps->ppscount[0];
951 pseq = &pps->ppsinfo.assert_sequence;
952 } else {
953 tsp = &pps->ppsinfo.clear_timestamp;
954 osp = &pps->ppsparam.clear_offset;
955 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
956 fhard = pps->kcmode & PPS_CAPTURECLEAR;
957 pcount = &pps->ppscount[1];
958 pseq = &pps->ppsinfo.clear_sequence;
959 }
960
961 /*
962 * If the timecounter changed, we cannot compare the count values, so
963 * we have to drop the rest of the PPS-stuff until the next event.
964 */
965 if (pps->ppstc != pps->capth->th_counter) {
966 pps->ppstc = pps->capth->th_counter;
967 *pcount = pps->capcount;
968 pps->ppscount[2] = pps->capcount;
969 return;
970 }
971
972 /* Convert the count to a timespec. */
973 tcount = pps->capcount - pps->capth->th_offset_count;
974 tcount &= pps->capth->th_counter->tc_counter_mask;
975 bt = pps->capth->th_offset;
976 bintime_addx(&bt, pps->capth->th_scale * tcount);
977 bintime_add(&bt, &timebasebin);
978 bintime2timespec(&bt, &ts);
979
980 /* If the timecounter was wound up underneath us, bail out. */
981 if (pps->capgen != pps->capth->th_generation)
982 return;
983
984 *pcount = pps->capcount;
985 (*pseq)++;
986 *tsp = ts;
987
988 if (foff) {
989 timespecadd(tsp, osp, tsp);
990 if (tsp->tv_nsec < 0) {
991 tsp->tv_nsec += 1000000000;
992 tsp->tv_sec -= 1;
993 }
994 }
995 #ifdef PPS_SYNC
996 if (fhard) {
997 u_int64_t scale;
998
999 /*
1000 * Feed the NTP PLL/FLL.
1001 * The FLL wants to know how many (hardware) nanoseconds
1002 * elapsed since the previous event.
1003 */
1004 tcount = pps->capcount - pps->ppscount[2];
1005 pps->ppscount[2] = pps->capcount;
1006 tcount &= pps->capth->th_counter->tc_counter_mask;
1007 scale = (u_int64_t)1 << 63;
1008 scale /= pps->capth->th_counter->tc_frequency;
1009 scale *= 2;
1010 bt.sec = 0;
1011 bt.frac = 0;
1012 bintime_addx(&bt, scale * tcount);
1013 bintime2timespec(&bt, &ts);
1014 hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec);
1015 }
1016 #endif
1017 }
1018
1019 /*
1020 * Timecounters need to be updated every so often to prevent the hardware
1021 * counter from overflowing. Updating also recalculates the cached values
1022 * used by the get*() family of functions, so their precision depends on
1023 * the update frequency.
1024 */
1025
1026 static int tc_tick;
1027
1028 void
1029 tc_ticktock(void)
1030 {
1031 static int count;
1032
1033 if (++count < tc_tick)
1034 return;
1035 count = 0;
1036 mutex_spin_enter(&timecounter_lock);
1037 if (timecounter_bad != 0) {
1038 /* An existing timecounter has gone bad, pick a new one. */
1039 (void)atomic_swap_uint(&timecounter_bad, 0);
1040 if (timecounter->tc_quality < 0) {
1041 tc_pick();
1042 }
1043 }
1044 tc_windup();
1045 mutex_spin_exit(&timecounter_lock);
1046 }
1047
1048 void
1049 inittimecounter(void)
1050 {
1051 u_int p;
1052
1053 mutex_init(&timecounter_lock, MUTEX_DEFAULT, IPL_HIGH);
1054
1055 /*
1056 * Set the initial timeout to
1057 * max(1, <approx. number of hardclock ticks in a millisecond>).
1058 * People should probably not use the sysctl to set the timeout
1059 * to smaller than its inital value, since that value is the
1060 * smallest reasonable one. If they want better timestamps they
1061 * should use the non-"get"* functions.
1062 */
1063 if (hz > 1000)
1064 tc_tick = (hz + 500) / 1000;
1065 else
1066 tc_tick = 1;
1067 p = (tc_tick * 1000000) / hz;
1068 aprint_verbose("timecounter: Timecounters tick every %d.%03u msec\n",
1069 p / 1000, p % 1000);
1070
1071 /* warm up new timecounter (again) and get rolling. */
1072 (void)timecounter->tc_get_timecount(timecounter);
1073 (void)timecounter->tc_get_timecount(timecounter);
1074 }
1075