kern_tc.c revision 1.34 1 /* $NetBSD: kern_tc.c,v 1.34 2008/04/28 20:24:03 martin Exp $ */
2
3 /*-
4 * Copyright (c) 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*-
30 * ----------------------------------------------------------------------------
31 * "THE BEER-WARE LICENSE" (Revision 42):
32 * <phk (at) FreeBSD.ORG> wrote this file. As long as you retain this notice you
33 * can do whatever you want with this stuff. If we meet some day, and you think
34 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
35 * ---------------------------------------------------------------------------
36 */
37
38 #include <sys/cdefs.h>
39 /* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */
40 __KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.34 2008/04/28 20:24:03 martin Exp $");
41
42 #include "opt_ntp.h"
43
44 #include <sys/param.h>
45 #include <sys/kernel.h>
46 #include <sys/reboot.h> /* XXX just to get AB_VERBOSE */
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/systm.h>
50 #include <sys/timepps.h>
51 #include <sys/timetc.h>
52 #include <sys/timex.h>
53 #include <sys/evcnt.h>
54 #include <sys/kauth.h>
55 #include <sys/mutex.h>
56 #include <sys/atomic.h>
57
58 /*
59 * A large step happens on boot. This constant detects such steps.
60 * It is relatively small so that ntp_update_second gets called enough
61 * in the typical 'missed a couple of seconds' case, but doesn't loop
62 * forever when the time step is large.
63 */
64 #define LARGE_STEP 200
65
66 /*
67 * Implement a dummy timecounter which we can use until we get a real one
68 * in the air. This allows the console and other early stuff to use
69 * time services.
70 */
71
72 static u_int
73 dummy_get_timecount(struct timecounter *tc)
74 {
75 static u_int now;
76
77 return (++now);
78 }
79
80 static struct timecounter dummy_timecounter = {
81 dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000, NULL, NULL,
82 };
83
84 struct timehands {
85 /* These fields must be initialized by the driver. */
86 struct timecounter *th_counter;
87 int64_t th_adjustment;
88 u_int64_t th_scale;
89 u_int th_offset_count;
90 struct bintime th_offset;
91 struct timeval th_microtime;
92 struct timespec th_nanotime;
93 /* Fields not to be copied in tc_windup start with th_generation. */
94 volatile u_int th_generation;
95 struct timehands *th_next;
96 };
97
98 static struct timehands th0;
99 static struct timehands th9 = { .th_next = &th0, };
100 static struct timehands th8 = { .th_next = &th9, };
101 static struct timehands th7 = { .th_next = &th8, };
102 static struct timehands th6 = { .th_next = &th7, };
103 static struct timehands th5 = { .th_next = &th6, };
104 static struct timehands th4 = { .th_next = &th5, };
105 static struct timehands th3 = { .th_next = &th4, };
106 static struct timehands th2 = { .th_next = &th3, };
107 static struct timehands th1 = { .th_next = &th2, };
108 static struct timehands th0 = {
109 .th_counter = &dummy_timecounter,
110 .th_scale = (uint64_t)-1 / 1000000,
111 .th_offset = { .sec = 1, .frac = 0 },
112 .th_generation = 1,
113 .th_next = &th1,
114 };
115
116 static struct timehands *volatile timehands = &th0;
117 struct timecounter *timecounter = &dummy_timecounter;
118 static struct timecounter *timecounters = &dummy_timecounter;
119
120 time_t time_second = 1;
121 time_t time_uptime = 1;
122
123 static struct bintime timebasebin;
124
125 static int timestepwarnings;
126
127 extern kmutex_t time_lock;
128 kmutex_t timecounter_lock;
129
130 #ifdef __FreeBSD__
131 SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW,
132 ×tepwarnings, 0, "");
133 #endif /* __FreeBSD__ */
134
135 /*
136 * sysctl helper routine for kern.timercounter.hardware
137 */
138 static int
139 sysctl_kern_timecounter_hardware(SYSCTLFN_ARGS)
140 {
141 struct sysctlnode node;
142 int error;
143 char newname[MAX_TCNAMELEN];
144 struct timecounter *newtc, *tc;
145
146 tc = timecounter;
147
148 strlcpy(newname, tc->tc_name, sizeof(newname));
149
150 node = *rnode;
151 node.sysctl_data = newname;
152 node.sysctl_size = sizeof(newname);
153
154 error = sysctl_lookup(SYSCTLFN_CALL(&node));
155
156 if (error ||
157 newp == NULL ||
158 strncmp(newname, tc->tc_name, sizeof(newname)) == 0)
159 return error;
160
161 if (l != NULL && (error = kauth_authorize_system(l->l_cred,
162 KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_TIMECOUNTERS, newname,
163 NULL, NULL)) != 0)
164 return (error);
165
166 if (!cold)
167 mutex_enter(&time_lock);
168 error = EINVAL;
169 for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
170 if (strcmp(newname, newtc->tc_name) != 0)
171 continue;
172 /* Warm up new timecounter. */
173 (void)newtc->tc_get_timecount(newtc);
174 (void)newtc->tc_get_timecount(newtc);
175 timecounter = newtc;
176 error = 0;
177 break;
178 }
179 if (!cold)
180 mutex_exit(&time_lock);
181 return error;
182 }
183
184 static int
185 sysctl_kern_timecounter_choice(SYSCTLFN_ARGS)
186 {
187 char buf[MAX_TCNAMELEN+48];
188 char *where = oldp;
189 const char *spc;
190 struct timecounter *tc;
191 size_t needed, left, slen;
192 int error;
193
194 if (newp != NULL)
195 return (EPERM);
196 if (namelen != 0)
197 return (EINVAL);
198
199 spc = "";
200 error = 0;
201 needed = 0;
202 left = *oldlenp;
203
204 mutex_enter(&time_lock);
205 for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) {
206 if (where == NULL) {
207 needed += sizeof(buf); /* be conservative */
208 } else {
209 slen = snprintf(buf, sizeof(buf), "%s%s(q=%d, f=%" PRId64
210 " Hz)", spc, tc->tc_name, tc->tc_quality,
211 tc->tc_frequency);
212 if (left < slen + 1)
213 break;
214 /* XXX use sysctl_copyout? (from sysctl_hw_disknames) */
215 /* XXX copyout with held lock. */
216 error = copyout(buf, where, slen + 1);
217 spc = " ";
218 where += slen;
219 needed += slen;
220 left -= slen;
221 }
222 }
223 mutex_exit(&time_lock);
224
225 *oldlenp = needed;
226 return (error);
227 }
228
229 SYSCTL_SETUP(sysctl_timecounter_setup, "sysctl timecounter setup")
230 {
231 const struct sysctlnode *node;
232
233 sysctl_createv(clog, 0, NULL, &node,
234 CTLFLAG_PERMANENT,
235 CTLTYPE_NODE, "timecounter",
236 SYSCTL_DESCR("time counter information"),
237 NULL, 0, NULL, 0,
238 CTL_KERN, CTL_CREATE, CTL_EOL);
239
240 if (node != NULL) {
241 sysctl_createv(clog, 0, NULL, NULL,
242 CTLFLAG_PERMANENT,
243 CTLTYPE_STRING, "choice",
244 SYSCTL_DESCR("available counters"),
245 sysctl_kern_timecounter_choice, 0, NULL, 0,
246 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
247
248 sysctl_createv(clog, 0, NULL, NULL,
249 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
250 CTLTYPE_STRING, "hardware",
251 SYSCTL_DESCR("currently active time counter"),
252 sysctl_kern_timecounter_hardware, 0, NULL, MAX_TCNAMELEN,
253 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
254
255 sysctl_createv(clog, 0, NULL, NULL,
256 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
257 CTLTYPE_INT, "timestepwarnings",
258 SYSCTL_DESCR("log time steps"),
259 NULL, 0, ×tepwarnings, 0,
260 CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
261 }
262 }
263
264 #ifdef TC_COUNTERS
265 #define TC_STATS(name) \
266 static struct evcnt n##name = \
267 EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "timecounter", #name); \
268 EVCNT_ATTACH_STATIC(n##name)
269 TC_STATS(binuptime); TC_STATS(nanouptime); TC_STATS(microuptime);
270 TC_STATS(bintime); TC_STATS(nanotime); TC_STATS(microtime);
271 TC_STATS(getbinuptime); TC_STATS(getnanouptime); TC_STATS(getmicrouptime);
272 TC_STATS(getbintime); TC_STATS(getnanotime); TC_STATS(getmicrotime);
273 TC_STATS(setclock);
274 #define TC_COUNT(var) var.ev_count++
275 #undef TC_STATS
276 #else
277 #define TC_COUNT(var) /* nothing */
278 #endif /* TC_COUNTERS */
279
280 static void tc_windup(void);
281
282 /*
283 * Return the difference between the timehands' counter value now and what
284 * was when we copied it to the timehands' offset_count.
285 */
286 static __inline u_int
287 tc_delta(struct timehands *th)
288 {
289 struct timecounter *tc;
290
291 tc = th->th_counter;
292 return ((tc->tc_get_timecount(tc) -
293 th->th_offset_count) & tc->tc_counter_mask);
294 }
295
296 /*
297 * Functions for reading the time. We have to loop until we are sure that
298 * the timehands that we operated on was not updated under our feet. See
299 * the comment in <sys/timevar.h> for a description of these 12 functions.
300 */
301
302 void
303 binuptime(struct bintime *bt)
304 {
305 struct timehands *th;
306 u_int gen;
307
308 TC_COUNT(nbinuptime);
309 do {
310 th = timehands;
311 gen = th->th_generation;
312 *bt = th->th_offset;
313 bintime_addx(bt, th->th_scale * tc_delta(th));
314 } while (gen == 0 || gen != th->th_generation);
315 }
316
317 void
318 nanouptime(struct timespec *tsp)
319 {
320 struct bintime bt;
321
322 TC_COUNT(nnanouptime);
323 binuptime(&bt);
324 bintime2timespec(&bt, tsp);
325 }
326
327 void
328 microuptime(struct timeval *tvp)
329 {
330 struct bintime bt;
331
332 TC_COUNT(nmicrouptime);
333 binuptime(&bt);
334 bintime2timeval(&bt, tvp);
335 }
336
337 void
338 bintime(struct bintime *bt)
339 {
340
341 TC_COUNT(nbintime);
342 binuptime(bt);
343 bintime_add(bt, &timebasebin);
344 }
345
346 void
347 nanotime(struct timespec *tsp)
348 {
349 struct bintime bt;
350
351 TC_COUNT(nnanotime);
352 bintime(&bt);
353 bintime2timespec(&bt, tsp);
354 }
355
356 void
357 microtime(struct timeval *tvp)
358 {
359 struct bintime bt;
360
361 TC_COUNT(nmicrotime);
362 bintime(&bt);
363 bintime2timeval(&bt, tvp);
364 }
365
366 void
367 getbinuptime(struct bintime *bt)
368 {
369 struct timehands *th;
370 u_int gen;
371
372 TC_COUNT(ngetbinuptime);
373 do {
374 th = timehands;
375 gen = th->th_generation;
376 *bt = th->th_offset;
377 } while (gen == 0 || gen != th->th_generation);
378 }
379
380 void
381 getnanouptime(struct timespec *tsp)
382 {
383 struct timehands *th;
384 u_int gen;
385
386 TC_COUNT(ngetnanouptime);
387 do {
388 th = timehands;
389 gen = th->th_generation;
390 bintime2timespec(&th->th_offset, tsp);
391 } while (gen == 0 || gen != th->th_generation);
392 }
393
394 void
395 getmicrouptime(struct timeval *tvp)
396 {
397 struct timehands *th;
398 u_int gen;
399
400 TC_COUNT(ngetmicrouptime);
401 do {
402 th = timehands;
403 gen = th->th_generation;
404 bintime2timeval(&th->th_offset, tvp);
405 } while (gen == 0 || gen != th->th_generation);
406 }
407
408 void
409 getbintime(struct bintime *bt)
410 {
411 struct timehands *th;
412 u_int gen;
413
414 TC_COUNT(ngetbintime);
415 do {
416 th = timehands;
417 gen = th->th_generation;
418 *bt = th->th_offset;
419 } while (gen == 0 || gen != th->th_generation);
420 bintime_add(bt, &timebasebin);
421 }
422
423 void
424 getnanotime(struct timespec *tsp)
425 {
426 struct timehands *th;
427 u_int gen;
428
429 TC_COUNT(ngetnanotime);
430 do {
431 th = timehands;
432 gen = th->th_generation;
433 *tsp = th->th_nanotime;
434 } while (gen == 0 || gen != th->th_generation);
435 }
436
437 void
438 getmicrotime(struct timeval *tvp)
439 {
440 struct timehands *th;
441 u_int gen;
442
443 TC_COUNT(ngetmicrotime);
444 do {
445 th = timehands;
446 gen = th->th_generation;
447 *tvp = th->th_microtime;
448 } while (gen == 0 || gen != th->th_generation);
449 }
450
451 /*
452 * Initialize a new timecounter and possibly use it.
453 */
454 void
455 tc_init(struct timecounter *tc)
456 {
457 u_int u;
458
459 u = tc->tc_frequency / tc->tc_counter_mask;
460 /* XXX: We need some margin here, 10% is a guess */
461 u *= 11;
462 u /= 10;
463 if (u > hz && tc->tc_quality >= 0) {
464 tc->tc_quality = -2000;
465 aprint_verbose(
466 "timecounter: Timecounter \"%s\" frequency %ju Hz",
467 tc->tc_name, (uintmax_t)tc->tc_frequency);
468 aprint_verbose(" -- Insufficient hz, needs at least %u\n", u);
469 } else if (tc->tc_quality >= 0 || bootverbose) {
470 aprint_verbose(
471 "timecounter: Timecounter \"%s\" frequency %ju Hz "
472 "quality %d\n", tc->tc_name, (uintmax_t)tc->tc_frequency,
473 tc->tc_quality);
474 }
475
476 mutex_enter(&time_lock);
477 mutex_spin_enter(&timecounter_lock);
478 tc->tc_next = timecounters;
479 timecounters = tc;
480 /*
481 * Never automatically use a timecounter with negative quality.
482 * Even though we run on the dummy counter, switching here may be
483 * worse since this timecounter may not be monotonous.
484 */
485 if (tc->tc_quality >= 0 && (tc->tc_quality > timecounter->tc_quality ||
486 (tc->tc_quality == timecounter->tc_quality &&
487 tc->tc_frequency > timecounter->tc_frequency))) {
488 (void)tc->tc_get_timecount(tc);
489 (void)tc->tc_get_timecount(tc);
490 timecounter = tc;
491 tc_windup();
492 }
493 mutex_spin_exit(&timecounter_lock);
494 mutex_exit(&time_lock);
495 }
496
497 /*
498 * Stop using a timecounter and remove it from the timecounters list.
499 */
500 int
501 tc_detach(struct timecounter *target)
502 {
503 struct timecounter *best, *tc;
504 struct timecounter **tcp = NULL;
505 int rc = 0;
506
507 mutex_enter(&time_lock);
508 for (tcp = &timecounters, tc = timecounters;
509 tc != NULL;
510 tcp = &tc->tc_next, tc = tc->tc_next) {
511 if (tc == target)
512 break;
513 }
514 if (tc == NULL) {
515 rc = ESRCH;
516 goto out;
517 }
518 *tcp = tc->tc_next;
519
520 if (timecounter != target)
521 goto out;
522
523 for (best = tc = timecounters; tc != NULL; tc = tc->tc_next) {
524 if (tc->tc_quality > best->tc_quality)
525 best = tc;
526 else if (tc->tc_quality < best->tc_quality)
527 continue;
528 else if (tc->tc_frequency > best->tc_frequency)
529 best = tc;
530 }
531 mutex_spin_enter(&timecounter_lock);
532 (void)best->tc_get_timecount(best);
533 (void)best->tc_get_timecount(best);
534 timecounter = best;
535 tc_windup();
536 mutex_spin_exit(&timecounter_lock);
537 out:
538 mutex_exit(&time_lock);
539 return rc;
540 }
541
542 /* Report the frequency of the current timecounter. */
543 u_int64_t
544 tc_getfrequency(void)
545 {
546
547 return (timehands->th_counter->tc_frequency);
548 }
549
550 /*
551 * Step our concept of UTC. This is done by modifying our estimate of
552 * when we booted.
553 */
554 void
555 tc_setclock(struct timespec *ts)
556 {
557 struct timespec ts2;
558 struct bintime bt, bt2;
559
560 mutex_spin_enter(&timecounter_lock);
561 TC_COUNT(nsetclock);
562 binuptime(&bt2);
563 timespec2bintime(ts, &bt);
564 bintime_sub(&bt, &bt2);
565 bintime_add(&bt2, &timebasebin);
566 timebasebin = bt;
567 tc_windup();
568 mutex_spin_exit(&timecounter_lock);
569
570 if (timestepwarnings) {
571 bintime2timespec(&bt2, &ts2);
572 log(LOG_INFO, "Time stepped from %jd.%09ld to %jd.%09ld\n",
573 (intmax_t)ts2.tv_sec, ts2.tv_nsec,
574 (intmax_t)ts->tv_sec, ts->tv_nsec);
575 }
576 }
577
578 /*
579 * Initialize the next struct timehands in the ring and make
580 * it the active timehands. Along the way we might switch to a different
581 * timecounter and/or do seconds processing in NTP. Slightly magic.
582 */
583 static void
584 tc_windup(void)
585 {
586 struct bintime bt;
587 struct timehands *th, *tho;
588 u_int64_t scale;
589 u_int delta, ncount, ogen;
590 int i, s_update;
591 time_t t;
592
593 KASSERT(mutex_owned(&timecounter_lock));
594
595 s_update = 0;
596
597 /*
598 * Make the next timehands a copy of the current one, but do not
599 * overwrite the generation or next pointer. While we update
600 * the contents, the generation must be zero. Ensure global
601 * visibility of the generation before proceeding.
602 */
603 tho = timehands;
604 th = tho->th_next;
605 ogen = th->th_generation;
606 th->th_generation = 0;
607 membar_producer();
608 bcopy(tho, th, offsetof(struct timehands, th_generation));
609
610 /*
611 * Capture a timecounter delta on the current timecounter and if
612 * changing timecounters, a counter value from the new timecounter.
613 * Update the offset fields accordingly.
614 */
615 delta = tc_delta(th);
616 if (th->th_counter != timecounter)
617 ncount = timecounter->tc_get_timecount(timecounter);
618 else
619 ncount = 0;
620 th->th_offset_count += delta;
621 th->th_offset_count &= th->th_counter->tc_counter_mask;
622 bintime_addx(&th->th_offset, th->th_scale * delta);
623
624 /*
625 * Hardware latching timecounters may not generate interrupts on
626 * PPS events, so instead we poll them. There is a finite risk that
627 * the hardware might capture a count which is later than the one we
628 * got above, and therefore possibly in the next NTP second which might
629 * have a different rate than the current NTP second. It doesn't
630 * matter in practice.
631 */
632 if (tho->th_counter->tc_poll_pps)
633 tho->th_counter->tc_poll_pps(tho->th_counter);
634
635 /*
636 * Deal with NTP second processing. The for loop normally
637 * iterates at most once, but in extreme situations it might
638 * keep NTP sane if timeouts are not run for several seconds.
639 * At boot, the time step can be large when the TOD hardware
640 * has been read, so on really large steps, we call
641 * ntp_update_second only twice. We need to call it twice in
642 * case we missed a leap second.
643 * If NTP is not compiled in ntp_update_second still calculates
644 * the adjustment resulting from adjtime() calls.
645 */
646 bt = th->th_offset;
647 bintime_add(&bt, &timebasebin);
648 i = bt.sec - tho->th_microtime.tv_sec;
649 if (i > LARGE_STEP)
650 i = 2;
651 for (; i > 0; i--) {
652 t = bt.sec;
653 ntp_update_second(&th->th_adjustment, &bt.sec);
654 s_update = 1;
655 if (bt.sec != t)
656 timebasebin.sec += bt.sec - t;
657 }
658
659 /* Update the UTC timestamps used by the get*() functions. */
660 /* XXX shouldn't do this here. Should force non-`get' versions. */
661 bintime2timeval(&bt, &th->th_microtime);
662 bintime2timespec(&bt, &th->th_nanotime);
663
664 /* Now is a good time to change timecounters. */
665 if (th->th_counter != timecounter) {
666 th->th_counter = timecounter;
667 th->th_offset_count = ncount;
668 s_update = 1;
669 }
670
671 /*-
672 * Recalculate the scaling factor. We want the number of 1/2^64
673 * fractions of a second per period of the hardware counter, taking
674 * into account the th_adjustment factor which the NTP PLL/adjtime(2)
675 * processing provides us with.
676 *
677 * The th_adjustment is nanoseconds per second with 32 bit binary
678 * fraction and we want 64 bit binary fraction of second:
679 *
680 * x = a * 2^32 / 10^9 = a * 4.294967296
681 *
682 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
683 * we can only multiply by about 850 without overflowing, but that
684 * leaves suitably precise fractions for multiply before divide.
685 *
686 * Divide before multiply with a fraction of 2199/512 results in a
687 * systematic undercompensation of 10PPM of th_adjustment. On a
688 * 5000PPM adjustment this is a 0.05PPM error. This is acceptable.
689 *
690 * We happily sacrifice the lowest of the 64 bits of our result
691 * to the goddess of code clarity.
692 *
693 */
694 if (s_update) {
695 scale = (u_int64_t)1 << 63;
696 scale += (th->th_adjustment / 1024) * 2199;
697 scale /= th->th_counter->tc_frequency;
698 th->th_scale = scale * 2;
699 }
700 /*
701 * Now that the struct timehands is again consistent, set the new
702 * generation number, making sure to not make it zero. Ensure
703 * changes are globally visible before changing.
704 */
705 if (++ogen == 0)
706 ogen = 1;
707 membar_producer();
708 th->th_generation = ogen;
709
710 /*
711 * Go live with the new struct timehands. Ensure changes are
712 * globally visible before changing.
713 */
714 time_second = th->th_microtime.tv_sec;
715 time_uptime = th->th_offset.sec;
716 membar_producer();
717 timehands = th;
718
719 /*
720 * Force users of the old timehand to move on. This is
721 * necessary for MP systems; we need to ensure that the
722 * consumers will move away from the old timehand before
723 * we begin updating it again when we eventually wrap
724 * around.
725 */
726 if (++tho->th_generation == 0)
727 tho->th_generation = 1;
728 }
729
730 /*
731 * RFC 2783 PPS-API implementation.
732 */
733
734 int
735 pps_ioctl(u_long cmd, void *data, struct pps_state *pps)
736 {
737 pps_params_t *app;
738 pps_info_t *pipi;
739 #ifdef PPS_SYNC
740 int *epi;
741 #endif
742
743 KASSERT(mutex_owned(&timecounter_lock));
744
745 KASSERT(pps != NULL); /* XXX ("NULL pps pointer in pps_ioctl") */
746 switch (cmd) {
747 case PPS_IOC_CREATE:
748 return (0);
749 case PPS_IOC_DESTROY:
750 return (0);
751 case PPS_IOC_SETPARAMS:
752 app = (pps_params_t *)data;
753 if (app->mode & ~pps->ppscap)
754 return (EINVAL);
755 pps->ppsparam = *app;
756 return (0);
757 case PPS_IOC_GETPARAMS:
758 app = (pps_params_t *)data;
759 *app = pps->ppsparam;
760 app->api_version = PPS_API_VERS_1;
761 return (0);
762 case PPS_IOC_GETCAP:
763 *(int*)data = pps->ppscap;
764 return (0);
765 case PPS_IOC_FETCH:
766 pipi = (pps_info_t *)data;
767 pps->ppsinfo.current_mode = pps->ppsparam.mode;
768 *pipi = pps->ppsinfo;
769 return (0);
770 case PPS_IOC_KCBIND:
771 #ifdef PPS_SYNC
772 epi = (int *)data;
773 /* XXX Only root should be able to do this */
774 if (*epi & ~pps->ppscap)
775 return (EINVAL);
776 pps->kcmode = *epi;
777 return (0);
778 #else
779 return (EOPNOTSUPP);
780 #endif
781 default:
782 return (EPASSTHROUGH);
783 }
784 }
785
786 void
787 pps_init(struct pps_state *pps)
788 {
789
790 KASSERT(mutex_owned(&timecounter_lock));
791
792 pps->ppscap |= PPS_TSFMT_TSPEC;
793 if (pps->ppscap & PPS_CAPTUREASSERT)
794 pps->ppscap |= PPS_OFFSETASSERT;
795 if (pps->ppscap & PPS_CAPTURECLEAR)
796 pps->ppscap |= PPS_OFFSETCLEAR;
797 }
798
799 void
800 pps_capture(struct pps_state *pps)
801 {
802 struct timehands *th;
803
804 KASSERT(mutex_owned(&timecounter_lock));
805 KASSERT(pps != NULL);
806
807 th = timehands;
808 pps->capgen = th->th_generation;
809 pps->capth = th;
810 pps->capcount = th->th_counter->tc_get_timecount(th->th_counter);
811 if (pps->capgen != th->th_generation)
812 pps->capgen = 0;
813 }
814
815 void
816 pps_event(struct pps_state *pps, int event)
817 {
818 struct bintime bt;
819 struct timespec ts, *tsp, *osp;
820 u_int tcount, *pcount;
821 int foff, fhard;
822 pps_seq_t *pseq;
823
824 KASSERT(mutex_owned(&timecounter_lock));
825
826 KASSERT(pps != NULL); /* XXX ("NULL pps pointer in pps_event") */
827 /* If the timecounter was wound up underneath us, bail out. */
828 if (pps->capgen == 0 || pps->capgen != pps->capth->th_generation)
829 return;
830
831 /* Things would be easier with arrays. */
832 if (event == PPS_CAPTUREASSERT) {
833 tsp = &pps->ppsinfo.assert_timestamp;
834 osp = &pps->ppsparam.assert_offset;
835 foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
836 fhard = pps->kcmode & PPS_CAPTUREASSERT;
837 pcount = &pps->ppscount[0];
838 pseq = &pps->ppsinfo.assert_sequence;
839 } else {
840 tsp = &pps->ppsinfo.clear_timestamp;
841 osp = &pps->ppsparam.clear_offset;
842 foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
843 fhard = pps->kcmode & PPS_CAPTURECLEAR;
844 pcount = &pps->ppscount[1];
845 pseq = &pps->ppsinfo.clear_sequence;
846 }
847
848 /*
849 * If the timecounter changed, we cannot compare the count values, so
850 * we have to drop the rest of the PPS-stuff until the next event.
851 */
852 if (pps->ppstc != pps->capth->th_counter) {
853 pps->ppstc = pps->capth->th_counter;
854 *pcount = pps->capcount;
855 pps->ppscount[2] = pps->capcount;
856 return;
857 }
858
859 /* Convert the count to a timespec. */
860 tcount = pps->capcount - pps->capth->th_offset_count;
861 tcount &= pps->capth->th_counter->tc_counter_mask;
862 bt = pps->capth->th_offset;
863 bintime_addx(&bt, pps->capth->th_scale * tcount);
864 bintime_add(&bt, &timebasebin);
865 bintime2timespec(&bt, &ts);
866
867 /* If the timecounter was wound up underneath us, bail out. */
868 if (pps->capgen != pps->capth->th_generation)
869 return;
870
871 *pcount = pps->capcount;
872 (*pseq)++;
873 *tsp = ts;
874
875 if (foff) {
876 timespecadd(tsp, osp, tsp);
877 if (tsp->tv_nsec < 0) {
878 tsp->tv_nsec += 1000000000;
879 tsp->tv_sec -= 1;
880 }
881 }
882 #ifdef PPS_SYNC
883 if (fhard) {
884 u_int64_t scale;
885
886 /*
887 * Feed the NTP PLL/FLL.
888 * The FLL wants to know how many (hardware) nanoseconds
889 * elapsed since the previous event.
890 */
891 tcount = pps->capcount - pps->ppscount[2];
892 pps->ppscount[2] = pps->capcount;
893 tcount &= pps->capth->th_counter->tc_counter_mask;
894 scale = (u_int64_t)1 << 63;
895 scale /= pps->capth->th_counter->tc_frequency;
896 scale *= 2;
897 bt.sec = 0;
898 bt.frac = 0;
899 bintime_addx(&bt, scale * tcount);
900 bintime2timespec(&bt, &ts);
901 hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec);
902 }
903 #endif
904 }
905
906 /*
907 * Timecounters need to be updated every so often to prevent the hardware
908 * counter from overflowing. Updating also recalculates the cached values
909 * used by the get*() family of functions, so their precision depends on
910 * the update frequency.
911 */
912
913 static int tc_tick;
914
915 void
916 tc_ticktock(void)
917 {
918 static int count;
919
920 if (++count < tc_tick)
921 return;
922 count = 0;
923 mutex_spin_enter(&timecounter_lock);
924 tc_windup();
925 mutex_spin_exit(&timecounter_lock);
926 }
927
928 void
929 inittimecounter(void)
930 {
931 u_int p;
932
933 mutex_init(&timecounter_lock, MUTEX_DEFAULT, IPL_SCHED);
934
935 /*
936 * Set the initial timeout to
937 * max(1, <approx. number of hardclock ticks in a millisecond>).
938 * People should probably not use the sysctl to set the timeout
939 * to smaller than its inital value, since that value is the
940 * smallest reasonable one. If they want better timestamps they
941 * should use the non-"get"* functions.
942 */
943 if (hz > 1000)
944 tc_tick = (hz + 500) / 1000;
945 else
946 tc_tick = 1;
947 p = (tc_tick * 1000000) / hz;
948 aprint_verbose("timecounter: Timecounters tick every %d.%03u msec\n",
949 p / 1000, p % 1000);
950
951 /* warm up new timecounter (again) and get rolling. */
952 (void)timecounter->tc_get_timecount(timecounter);
953 (void)timecounter->tc_get_timecount(timecounter);
954 }
955