Home | History | Annotate | Line # | Download | only in kern
kern_tc.c revision 1.21.2.4
      1 /* $NetBSD: kern_tc.c,v 1.21.2.4 2008/02/18 21:06:46 mjf Exp $ */
      2 
      3 /*-
      4  * ----------------------------------------------------------------------------
      5  * "THE BEER-WARE LICENSE" (Revision 42):
      6  * <phk (at) FreeBSD.ORG> wrote this file.  As long as you retain this notice you
      7  * can do whatever you want with this stuff. If we meet some day, and you think
      8  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
      9  * ---------------------------------------------------------------------------
     10  */
     11 
     12 #include <sys/cdefs.h>
     13 /* __FBSDID("$FreeBSD: src/sys/kern/kern_tc.c,v 1.166 2005/09/19 22:16:31 andre Exp $"); */
     14 __KERNEL_RCSID(0, "$NetBSD: kern_tc.c,v 1.21.2.4 2008/02/18 21:06:46 mjf Exp $");
     15 
     16 #include "opt_ntp.h"
     17 
     18 #include <sys/param.h>
     19 #include <sys/kernel.h>
     20 #include <sys/reboot.h>	/* XXX just to get AB_VERBOSE */
     21 #include <sys/sysctl.h>
     22 #include <sys/syslog.h>
     23 #include <sys/systm.h>
     24 #include <sys/timepps.h>
     25 #include <sys/timetc.h>
     26 #include <sys/timex.h>
     27 #include <sys/evcnt.h>
     28 #include <sys/kauth.h>
     29 #include <sys/mutex.h>
     30 #include <sys/atomic.h>
     31 
     32 /*
     33  * A large step happens on boot.  This constant detects such steps.
     34  * It is relatively small so that ntp_update_second gets called enough
     35  * in the typical 'missed a couple of seconds' case, but doesn't loop
     36  * forever when the time step is large.
     37  */
     38 #define LARGE_STEP	200
     39 
     40 /*
     41  * Implement a dummy timecounter which we can use until we get a real one
     42  * in the air.  This allows the console and other early stuff to use
     43  * time services.
     44  */
     45 
     46 static u_int
     47 dummy_get_timecount(struct timecounter *tc)
     48 {
     49 	static u_int now;
     50 
     51 	return (++now);
     52 }
     53 
     54 static struct timecounter dummy_timecounter = {
     55 	dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000, NULL, NULL,
     56 };
     57 
     58 struct timehands {
     59 	/* These fields must be initialized by the driver. */
     60 	struct timecounter	*th_counter;
     61 	int64_t			th_adjustment;
     62 	u_int64_t		th_scale;
     63 	u_int	 		th_offset_count;
     64 	struct bintime		th_offset;
     65 	struct timeval		th_microtime;
     66 	struct timespec		th_nanotime;
     67 	/* Fields not to be copied in tc_windup start with th_generation. */
     68 	volatile u_int		th_generation;
     69 	struct timehands	*th_next;
     70 };
     71 
     72 static struct timehands th0;
     73 static struct timehands th9 = { .th_next = &th0, };
     74 static struct timehands th8 = { .th_next = &th9, };
     75 static struct timehands th7 = { .th_next = &th8, };
     76 static struct timehands th6 = { .th_next = &th7, };
     77 static struct timehands th5 = { .th_next = &th6, };
     78 static struct timehands th4 = { .th_next = &th5, };
     79 static struct timehands th3 = { .th_next = &th4, };
     80 static struct timehands th2 = { .th_next = &th3, };
     81 static struct timehands th1 = { .th_next = &th2, };
     82 static struct timehands th0 = {
     83 	.th_counter = &dummy_timecounter,
     84 	.th_scale = (uint64_t)-1 / 1000000,
     85 	.th_offset = { .sec = 1, .frac = 0 },
     86 	.th_generation = 1,
     87 	.th_next = &th1,
     88 };
     89 
     90 static struct timehands *volatile timehands = &th0;
     91 struct timecounter *timecounter = &dummy_timecounter;
     92 static struct timecounter *timecounters = &dummy_timecounter;
     93 
     94 time_t time_second = 1;
     95 time_t time_uptime = 1;
     96 
     97 static struct bintime timebasebin;
     98 
     99 static int timestepwarnings;
    100 
    101 extern kmutex_t time_lock;
    102 static kmutex_t tc_windup_lock;
    103 
    104 #ifdef __FreeBSD__
    105 SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW,
    106     &timestepwarnings, 0, "");
    107 #endif /* __FreeBSD__ */
    108 
    109 /*
    110  * sysctl helper routine for kern.timercounter.hardware
    111  */
    112 static int
    113 sysctl_kern_timecounter_hardware(SYSCTLFN_ARGS)
    114 {
    115 	struct sysctlnode node;
    116 	int error;
    117 	char newname[MAX_TCNAMELEN];
    118 	struct timecounter *newtc, *tc;
    119 
    120 	tc = timecounter;
    121 
    122 	strlcpy(newname, tc->tc_name, sizeof(newname));
    123 
    124 	node = *rnode;
    125 	node.sysctl_data = newname;
    126 	node.sysctl_size = sizeof(newname);
    127 
    128 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
    129 
    130 	if (error ||
    131 	    newp == NULL ||
    132 	    strncmp(newname, tc->tc_name, sizeof(newname)) == 0)
    133 		return error;
    134 
    135 	if (l != NULL && (error = kauth_authorize_system(l->l_cred,
    136 	    KAUTH_SYSTEM_TIME, KAUTH_REQ_SYSTEM_TIME_TIMECOUNTERS, newname,
    137 	    NULL, NULL)) != 0)
    138 		return (error);
    139 
    140 	if (!cold)
    141 		mutex_enter(&time_lock);
    142 	error = EINVAL;
    143 	for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
    144 		if (strcmp(newname, newtc->tc_name) != 0)
    145 			continue;
    146 		/* Warm up new timecounter. */
    147 		(void)newtc->tc_get_timecount(newtc);
    148 		(void)newtc->tc_get_timecount(newtc);
    149 		timecounter = newtc;
    150 		error = 0;
    151 		break;
    152 	}
    153 	if (!cold)
    154 		mutex_exit(&time_lock);
    155 	return error;
    156 }
    157 
    158 static int
    159 sysctl_kern_timecounter_choice(SYSCTLFN_ARGS)
    160 {
    161 	char buf[MAX_TCNAMELEN+48];
    162 	char *where = oldp;
    163 	const char *spc;
    164 	struct timecounter *tc;
    165 	size_t needed, left, slen;
    166 	int error;
    167 
    168 	if (newp != NULL)
    169 		return (EPERM);
    170 	if (namelen != 0)
    171 		return (EINVAL);
    172 
    173 	spc = "";
    174 	error = 0;
    175 	needed = 0;
    176 	left = *oldlenp;
    177 
    178 	mutex_enter(&time_lock);
    179 	for (tc = timecounters; error == 0 && tc != NULL; tc = tc->tc_next) {
    180 		if (where == NULL) {
    181 			needed += sizeof(buf);  /* be conservative */
    182 		} else {
    183 			slen = snprintf(buf, sizeof(buf), "%s%s(q=%d, f=%" PRId64
    184 					" Hz)", spc, tc->tc_name, tc->tc_quality,
    185 					tc->tc_frequency);
    186 			if (left < slen + 1)
    187 				break;
    188 			/* XXX use sysctl_copyout? (from sysctl_hw_disknames) */
    189 			/* XXX copyout with held lock. */
    190 			error = copyout(buf, where, slen + 1);
    191 			spc = " ";
    192 			where += slen;
    193 			needed += slen;
    194 			left -= slen;
    195 		}
    196 	}
    197 	mutex_exit(&time_lock);
    198 
    199 	*oldlenp = needed;
    200 	return (error);
    201 }
    202 
    203 SYSCTL_SETUP(sysctl_timecounter_setup, "sysctl timecounter setup")
    204 {
    205 	const struct sysctlnode *node;
    206 
    207 	sysctl_createv(clog, 0, NULL, &node,
    208 		       CTLFLAG_PERMANENT,
    209 		       CTLTYPE_NODE, "timecounter",
    210 		       SYSCTL_DESCR("time counter information"),
    211 		       NULL, 0, NULL, 0,
    212 		       CTL_KERN, CTL_CREATE, CTL_EOL);
    213 
    214 	if (node != NULL) {
    215 		sysctl_createv(clog, 0, NULL, NULL,
    216 			       CTLFLAG_PERMANENT,
    217 			       CTLTYPE_STRING, "choice",
    218 			       SYSCTL_DESCR("available counters"),
    219 			       sysctl_kern_timecounter_choice, 0, NULL, 0,
    220 			       CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
    221 
    222 		sysctl_createv(clog, 0, NULL, NULL,
    223 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
    224 			       CTLTYPE_STRING, "hardware",
    225 			       SYSCTL_DESCR("currently active time counter"),
    226 			       sysctl_kern_timecounter_hardware, 0, NULL, MAX_TCNAMELEN,
    227 			       CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
    228 
    229 		sysctl_createv(clog, 0, NULL, NULL,
    230 			       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
    231 			       CTLTYPE_INT, "timestepwarnings",
    232 			       SYSCTL_DESCR("log time steps"),
    233 			       NULL, 0, &timestepwarnings, 0,
    234 			       CTL_KERN, node->sysctl_num, CTL_CREATE, CTL_EOL);
    235 	}
    236 }
    237 
    238 #ifdef TC_COUNTERS
    239 #define	TC_STATS(name)							\
    240 static struct evcnt n##name =						\
    241     EVCNT_INITIALIZER(EVCNT_TYPE_MISC, NULL, "timecounter", #name);	\
    242 EVCNT_ATTACH_STATIC(n##name)
    243 TC_STATS(binuptime);    TC_STATS(nanouptime);    TC_STATS(microuptime);
    244 TC_STATS(bintime);      TC_STATS(nanotime);      TC_STATS(microtime);
    245 TC_STATS(getbinuptime); TC_STATS(getnanouptime); TC_STATS(getmicrouptime);
    246 TC_STATS(getbintime);   TC_STATS(getnanotime);   TC_STATS(getmicrotime);
    247 TC_STATS(setclock);
    248 #define	TC_COUNT(var)	var.ev_count++
    249 #undef TC_STATS
    250 #else
    251 #define	TC_COUNT(var)	/* nothing */
    252 #endif	/* TC_COUNTERS */
    253 
    254 static void tc_windup(void);
    255 
    256 /*
    257  * Return the difference between the timehands' counter value now and what
    258  * was when we copied it to the timehands' offset_count.
    259  */
    260 static __inline u_int
    261 tc_delta(struct timehands *th)
    262 {
    263 	struct timecounter *tc;
    264 
    265 	tc = th->th_counter;
    266 	return ((tc->tc_get_timecount(tc) -
    267 		 th->th_offset_count) & tc->tc_counter_mask);
    268 }
    269 
    270 /*
    271  * Functions for reading the time.  We have to loop until we are sure that
    272  * the timehands that we operated on was not updated under our feet.  See
    273  * the comment in <sys/timevar.h> for a description of these 12 functions.
    274  */
    275 
    276 void
    277 binuptime(struct bintime *bt)
    278 {
    279 	struct timehands *th;
    280 	u_int gen;
    281 
    282 	TC_COUNT(nbinuptime);
    283 	do {
    284 		th = timehands;
    285 		gen = th->th_generation;
    286 		*bt = th->th_offset;
    287 		bintime_addx(bt, th->th_scale * tc_delta(th));
    288 	} while (gen == 0 || gen != th->th_generation);
    289 }
    290 
    291 void
    292 nanouptime(struct timespec *tsp)
    293 {
    294 	struct bintime bt;
    295 
    296 	TC_COUNT(nnanouptime);
    297 	binuptime(&bt);
    298 	bintime2timespec(&bt, tsp);
    299 }
    300 
    301 void
    302 microuptime(struct timeval *tvp)
    303 {
    304 	struct bintime bt;
    305 
    306 	TC_COUNT(nmicrouptime);
    307 	binuptime(&bt);
    308 	bintime2timeval(&bt, tvp);
    309 }
    310 
    311 void
    312 bintime(struct bintime *bt)
    313 {
    314 
    315 	TC_COUNT(nbintime);
    316 	binuptime(bt);
    317 	bintime_add(bt, &timebasebin);
    318 }
    319 
    320 void
    321 nanotime(struct timespec *tsp)
    322 {
    323 	struct bintime bt;
    324 
    325 	TC_COUNT(nnanotime);
    326 	bintime(&bt);
    327 	bintime2timespec(&bt, tsp);
    328 }
    329 
    330 void
    331 microtime(struct timeval *tvp)
    332 {
    333 	struct bintime bt;
    334 
    335 	TC_COUNT(nmicrotime);
    336 	bintime(&bt);
    337 	bintime2timeval(&bt, tvp);
    338 }
    339 
    340 void
    341 getbinuptime(struct bintime *bt)
    342 {
    343 	struct timehands *th;
    344 	u_int gen;
    345 
    346 	TC_COUNT(ngetbinuptime);
    347 	do {
    348 		th = timehands;
    349 		gen = th->th_generation;
    350 		*bt = th->th_offset;
    351 	} while (gen == 0 || gen != th->th_generation);
    352 }
    353 
    354 void
    355 getnanouptime(struct timespec *tsp)
    356 {
    357 	struct timehands *th;
    358 	u_int gen;
    359 
    360 	TC_COUNT(ngetnanouptime);
    361 	do {
    362 		th = timehands;
    363 		gen = th->th_generation;
    364 		bintime2timespec(&th->th_offset, tsp);
    365 	} while (gen == 0 || gen != th->th_generation);
    366 }
    367 
    368 void
    369 getmicrouptime(struct timeval *tvp)
    370 {
    371 	struct timehands *th;
    372 	u_int gen;
    373 
    374 	TC_COUNT(ngetmicrouptime);
    375 	do {
    376 		th = timehands;
    377 		gen = th->th_generation;
    378 		bintime2timeval(&th->th_offset, tvp);
    379 	} while (gen == 0 || gen != th->th_generation);
    380 }
    381 
    382 void
    383 getbintime(struct bintime *bt)
    384 {
    385 	struct timehands *th;
    386 	u_int gen;
    387 
    388 	TC_COUNT(ngetbintime);
    389 	do {
    390 		th = timehands;
    391 		gen = th->th_generation;
    392 		*bt = th->th_offset;
    393 	} while (gen == 0 || gen != th->th_generation);
    394 	bintime_add(bt, &timebasebin);
    395 }
    396 
    397 void
    398 getnanotime(struct timespec *tsp)
    399 {
    400 	struct timehands *th;
    401 	u_int gen;
    402 
    403 	TC_COUNT(ngetnanotime);
    404 	do {
    405 		th = timehands;
    406 		gen = th->th_generation;
    407 		*tsp = th->th_nanotime;
    408 	} while (gen == 0 || gen != th->th_generation);
    409 }
    410 
    411 void
    412 getmicrotime(struct timeval *tvp)
    413 {
    414 	struct timehands *th;
    415 	u_int gen;
    416 
    417 	TC_COUNT(ngetmicrotime);
    418 	do {
    419 		th = timehands;
    420 		gen = th->th_generation;
    421 		*tvp = th->th_microtime;
    422 	} while (gen == 0 || gen != th->th_generation);
    423 }
    424 
    425 /*
    426  * Initialize a new timecounter and possibly use it.
    427  */
    428 void
    429 tc_init(struct timecounter *tc)
    430 {
    431 	u_int u;
    432 
    433 	u = tc->tc_frequency / tc->tc_counter_mask;
    434 	/* XXX: We need some margin here, 10% is a guess */
    435 	u *= 11;
    436 	u /= 10;
    437 	if (u > hz && tc->tc_quality >= 0) {
    438 		tc->tc_quality = -2000;
    439 		aprint_verbose(
    440 		    "timecounter: Timecounter \"%s\" frequency %ju Hz",
    441 			    tc->tc_name, (uintmax_t)tc->tc_frequency);
    442 		aprint_verbose(" -- Insufficient hz, needs at least %u\n", u);
    443 	} else if (tc->tc_quality >= 0 || bootverbose) {
    444 		aprint_verbose(
    445 		    "timecounter: Timecounter \"%s\" frequency %ju Hz "
    446 		    "quality %d\n", tc->tc_name, (uintmax_t)tc->tc_frequency,
    447 		    tc->tc_quality);
    448 	}
    449 
    450 	mutex_enter(&time_lock);
    451 	mutex_spin_enter(&tc_windup_lock);
    452 	tc->tc_next = timecounters;
    453 	timecounters = tc;
    454 	/*
    455 	 * Never automatically use a timecounter with negative quality.
    456 	 * Even though we run on the dummy counter, switching here may be
    457 	 * worse since this timecounter may not be monotonous.
    458 	 */
    459 	if (tc->tc_quality >= 0 && (tc->tc_quality > timecounter->tc_quality ||
    460 	    (tc->tc_quality == timecounter->tc_quality &&
    461 	    tc->tc_frequency > timecounter->tc_frequency))) {
    462 		(void)tc->tc_get_timecount(tc);
    463 		(void)tc->tc_get_timecount(tc);
    464 		timecounter = tc;
    465 		tc_windup();
    466 	}
    467 	mutex_spin_exit(&tc_windup_lock);
    468 	mutex_exit(&time_lock);
    469 }
    470 
    471 /*
    472  * Stop using a timecounter and remove it from the timecounters list.
    473  */
    474 int
    475 tc_detach(struct timecounter *target)
    476 {
    477 	struct timecounter *best, *tc;
    478 	struct timecounter **tcp = NULL;
    479 	int rc = 0;
    480 
    481 	mutex_enter(&time_lock);
    482 	for (tcp = &timecounters, tc = timecounters;
    483 	     tc != NULL;
    484 	     tcp = &tc->tc_next, tc = tc->tc_next) {
    485 		if (tc == target)
    486 			break;
    487 	}
    488 	if (tc == NULL) {
    489 		rc = ESRCH;
    490 		goto out;
    491 	}
    492 	*tcp = tc->tc_next;
    493 
    494 	if (timecounter != target)
    495 		goto out;
    496 
    497 	for (best = tc = timecounters; tc != NULL; tc = tc->tc_next) {
    498 		if (tc->tc_quality > best->tc_quality)
    499 			best = tc;
    500 		else if (tc->tc_quality < best->tc_quality)
    501 			continue;
    502 		else if (tc->tc_frequency > best->tc_frequency)
    503 			best = tc;
    504 	}
    505 	mutex_spin_enter(&tc_windup_lock);
    506 	(void)best->tc_get_timecount(best);
    507 	(void)best->tc_get_timecount(best);
    508 	timecounter = best;
    509 	tc_windup();
    510 	mutex_spin_exit(&tc_windup_lock);
    511 out:
    512 	mutex_exit(&time_lock);
    513 	return rc;
    514 }
    515 
    516 /* Report the frequency of the current timecounter. */
    517 u_int64_t
    518 tc_getfrequency(void)
    519 {
    520 
    521 	return (timehands->th_counter->tc_frequency);
    522 }
    523 
    524 /*
    525  * Step our concept of UTC.  This is done by modifying our estimate of
    526  * when we booted.
    527  */
    528 void
    529 tc_setclock(struct timespec *ts)
    530 {
    531 	struct timespec ts2;
    532 	struct bintime bt, bt2;
    533 
    534 	mutex_spin_enter(&tc_windup_lock);
    535 	TC_COUNT(nsetclock);
    536 	binuptime(&bt2);
    537 	timespec2bintime(ts, &bt);
    538 	bintime_sub(&bt, &bt2);
    539 	bintime_add(&bt2, &timebasebin);
    540 	timebasebin = bt;
    541 	tc_windup();
    542 	mutex_spin_exit(&tc_windup_lock);
    543 
    544 	if (timestepwarnings) {
    545 		bintime2timespec(&bt2, &ts2);
    546 		log(LOG_INFO, "Time stepped from %jd.%09ld to %jd.%09ld\n",
    547 		    (intmax_t)ts2.tv_sec, ts2.tv_nsec,
    548 		    (intmax_t)ts->tv_sec, ts->tv_nsec);
    549 	}
    550 }
    551 
    552 /*
    553  * Initialize the next struct timehands in the ring and make
    554  * it the active timehands.  Along the way we might switch to a different
    555  * timecounter and/or do seconds processing in NTP.  Slightly magic.
    556  */
    557 static void
    558 tc_windup(void)
    559 {
    560 	struct bintime bt;
    561 	struct timehands *th, *tho;
    562 	u_int64_t scale;
    563 	u_int delta, ncount, ogen;
    564 	int i, s_update;
    565 	time_t t;
    566 
    567 	KASSERT(mutex_owned(&tc_windup_lock));
    568 
    569 	s_update = 0;
    570 
    571 	/*
    572 	 * Make the next timehands a copy of the current one, but do not
    573 	 * overwrite the generation or next pointer.  While we update
    574 	 * the contents, the generation must be zero.  Ensure global
    575 	 * visibility of the generation before proceeding.
    576 	 */
    577 	tho = timehands;
    578 	th = tho->th_next;
    579 	ogen = th->th_generation;
    580 	th->th_generation = 0;
    581 	membar_producer();
    582 	bcopy(tho, th, offsetof(struct timehands, th_generation));
    583 
    584 	/*
    585 	 * Capture a timecounter delta on the current timecounter and if
    586 	 * changing timecounters, a counter value from the new timecounter.
    587 	 * Update the offset fields accordingly.
    588 	 */
    589 	delta = tc_delta(th);
    590 	if (th->th_counter != timecounter)
    591 		ncount = timecounter->tc_get_timecount(timecounter);
    592 	else
    593 		ncount = 0;
    594 	th->th_offset_count += delta;
    595 	th->th_offset_count &= th->th_counter->tc_counter_mask;
    596 	bintime_addx(&th->th_offset, th->th_scale * delta);
    597 
    598 	/*
    599 	 * Hardware latching timecounters may not generate interrupts on
    600 	 * PPS events, so instead we poll them.  There is a finite risk that
    601 	 * the hardware might capture a count which is later than the one we
    602 	 * got above, and therefore possibly in the next NTP second which might
    603 	 * have a different rate than the current NTP second.  It doesn't
    604 	 * matter in practice.
    605 	 */
    606 	if (tho->th_counter->tc_poll_pps)
    607 		tho->th_counter->tc_poll_pps(tho->th_counter);
    608 
    609 	/*
    610 	 * Deal with NTP second processing.  The for loop normally
    611 	 * iterates at most once, but in extreme situations it might
    612 	 * keep NTP sane if timeouts are not run for several seconds.
    613 	 * At boot, the time step can be large when the TOD hardware
    614 	 * has been read, so on really large steps, we call
    615 	 * ntp_update_second only twice.  We need to call it twice in
    616 	 * case we missed a leap second.
    617 	 * If NTP is not compiled in ntp_update_second still calculates
    618 	 * the adjustment resulting from adjtime() calls.
    619 	 */
    620 	bt = th->th_offset;
    621 	bintime_add(&bt, &timebasebin);
    622 	i = bt.sec - tho->th_microtime.tv_sec;
    623 	if (i > LARGE_STEP)
    624 		i = 2;
    625 	for (; i > 0; i--) {
    626 		t = bt.sec;
    627 		ntp_update_second(&th->th_adjustment, &bt.sec);
    628 		s_update = 1;
    629 		if (bt.sec != t)
    630 			timebasebin.sec += bt.sec - t;
    631 	}
    632 
    633 	/* Update the UTC timestamps used by the get*() functions. */
    634 	/* XXX shouldn't do this here.  Should force non-`get' versions. */
    635 	bintime2timeval(&bt, &th->th_microtime);
    636 	bintime2timespec(&bt, &th->th_nanotime);
    637 
    638 	/* Now is a good time to change timecounters. */
    639 	if (th->th_counter != timecounter) {
    640 		th->th_counter = timecounter;
    641 		th->th_offset_count = ncount;
    642 		s_update = 1;
    643 	}
    644 
    645 	/*-
    646 	 * Recalculate the scaling factor.  We want the number of 1/2^64
    647 	 * fractions of a second per period of the hardware counter, taking
    648 	 * into account the th_adjustment factor which the NTP PLL/adjtime(2)
    649 	 * processing provides us with.
    650 	 *
    651 	 * The th_adjustment is nanoseconds per second with 32 bit binary
    652 	 * fraction and we want 64 bit binary fraction of second:
    653 	 *
    654 	 *	 x = a * 2^32 / 10^9 = a * 4.294967296
    655 	 *
    656 	 * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
    657 	 * we can only multiply by about 850 without overflowing, but that
    658 	 * leaves suitably precise fractions for multiply before divide.
    659 	 *
    660 	 * Divide before multiply with a fraction of 2199/512 results in a
    661 	 * systematic undercompensation of 10PPM of th_adjustment.  On a
    662 	 * 5000PPM adjustment this is a 0.05PPM error.  This is acceptable.
    663  	 *
    664 	 * We happily sacrifice the lowest of the 64 bits of our result
    665 	 * to the goddess of code clarity.
    666 	 *
    667 	 */
    668 	if (s_update) {
    669 		scale = (u_int64_t)1 << 63;
    670 		scale += (th->th_adjustment / 1024) * 2199;
    671 		scale /= th->th_counter->tc_frequency;
    672 		th->th_scale = scale * 2;
    673 	}
    674 	/*
    675 	 * Now that the struct timehands is again consistent, set the new
    676 	 * generation number, making sure to not make it zero.  Ensure
    677 	 * changes are globally visible before changing.
    678 	 */
    679 	if (++ogen == 0)
    680 		ogen = 1;
    681 	membar_producer();
    682 	th->th_generation = ogen;
    683 
    684 	/*
    685 	 * Go live with the new struct timehands.  Ensure changes are
    686 	 * globally visible before changing.
    687 	 */
    688 	time_second = th->th_microtime.tv_sec;
    689 	time_uptime = th->th_offset.sec;
    690 	membar_producer();
    691 	timehands = th;
    692 
    693 	/*
    694 	 * Force users of the old timehand to move on.  This is
    695 	 * necessary for MP systems; we need to ensure that the
    696 	 * consumers will move away from the old timehand before
    697 	 * we begin updating it again when we eventually wrap
    698 	 * around.
    699 	 */
    700 	if (++tho->th_generation == 0)
    701 		tho->th_generation = 1;
    702 }
    703 
    704 /*
    705  * RFC 2783 PPS-API implementation.
    706  */
    707 
    708 int
    709 pps_ioctl(u_long cmd, void *data, struct pps_state *pps)
    710 {
    711 	pps_params_t *app;
    712 	pps_info_t *pipi;
    713 #ifdef PPS_SYNC
    714 	int *epi;
    715 #endif
    716 
    717 	KASSERT(pps != NULL); /* XXX ("NULL pps pointer in pps_ioctl") */
    718 	switch (cmd) {
    719 	case PPS_IOC_CREATE:
    720 		return (0);
    721 	case PPS_IOC_DESTROY:
    722 		return (0);
    723 	case PPS_IOC_SETPARAMS:
    724 		app = (pps_params_t *)data;
    725 		if (app->mode & ~pps->ppscap)
    726 			return (EINVAL);
    727 		pps->ppsparam = *app;
    728 		return (0);
    729 	case PPS_IOC_GETPARAMS:
    730 		app = (pps_params_t *)data;
    731 		*app = pps->ppsparam;
    732 		app->api_version = PPS_API_VERS_1;
    733 		return (0);
    734 	case PPS_IOC_GETCAP:
    735 		*(int*)data = pps->ppscap;
    736 		return (0);
    737 	case PPS_IOC_FETCH:
    738 		pipi = (pps_info_t *)data;
    739 		pps->ppsinfo.current_mode = pps->ppsparam.mode;
    740 		*pipi = pps->ppsinfo;
    741 		return (0);
    742 	case PPS_IOC_KCBIND:
    743 #ifdef PPS_SYNC
    744 		epi = (int *)data;
    745 		/* XXX Only root should be able to do this */
    746 		if (*epi & ~pps->ppscap)
    747 			return (EINVAL);
    748 		pps->kcmode = *epi;
    749 		return (0);
    750 #else
    751 		return (EOPNOTSUPP);
    752 #endif
    753 	default:
    754 		return (EPASSTHROUGH);
    755 	}
    756 }
    757 
    758 void
    759 pps_init(struct pps_state *pps)
    760 {
    761 	pps->ppscap |= PPS_TSFMT_TSPEC;
    762 	if (pps->ppscap & PPS_CAPTUREASSERT)
    763 		pps->ppscap |= PPS_OFFSETASSERT;
    764 	if (pps->ppscap & PPS_CAPTURECLEAR)
    765 		pps->ppscap |= PPS_OFFSETCLEAR;
    766 }
    767 
    768 void
    769 pps_capture(struct pps_state *pps)
    770 {
    771 	struct timehands *th;
    772 
    773 	KASSERT(pps != NULL); /* XXX ("NULL pps pointer in pps_capture") */
    774 	th = timehands;
    775 	pps->capgen = th->th_generation;
    776 	pps->capth = th;
    777 	pps->capcount = th->th_counter->tc_get_timecount(th->th_counter);
    778 	if (pps->capgen != th->th_generation)
    779 		pps->capgen = 0;
    780 }
    781 
    782 void
    783 pps_event(struct pps_state *pps, int event)
    784 {
    785 	struct bintime bt;
    786 	struct timespec ts, *tsp, *osp;
    787 	u_int tcount, *pcount;
    788 	int foff, fhard;
    789 	pps_seq_t *pseq;
    790 
    791 	KASSERT(pps != NULL); /* XXX ("NULL pps pointer in pps_event") */
    792 	/* If the timecounter was wound up underneath us, bail out. */
    793 	if (pps->capgen == 0 || pps->capgen != pps->capth->th_generation)
    794 		return;
    795 
    796 	/* Things would be easier with arrays. */
    797 	if (event == PPS_CAPTUREASSERT) {
    798 		tsp = &pps->ppsinfo.assert_timestamp;
    799 		osp = &pps->ppsparam.assert_offset;
    800 		foff = pps->ppsparam.mode & PPS_OFFSETASSERT;
    801 		fhard = pps->kcmode & PPS_CAPTUREASSERT;
    802 		pcount = &pps->ppscount[0];
    803 		pseq = &pps->ppsinfo.assert_sequence;
    804 	} else {
    805 		tsp = &pps->ppsinfo.clear_timestamp;
    806 		osp = &pps->ppsparam.clear_offset;
    807 		foff = pps->ppsparam.mode & PPS_OFFSETCLEAR;
    808 		fhard = pps->kcmode & PPS_CAPTURECLEAR;
    809 		pcount = &pps->ppscount[1];
    810 		pseq = &pps->ppsinfo.clear_sequence;
    811 	}
    812 
    813 	/*
    814 	 * If the timecounter changed, we cannot compare the count values, so
    815 	 * we have to drop the rest of the PPS-stuff until the next event.
    816 	 */
    817 	if (pps->ppstc != pps->capth->th_counter) {
    818 		pps->ppstc = pps->capth->th_counter;
    819 		*pcount = pps->capcount;
    820 		pps->ppscount[2] = pps->capcount;
    821 		return;
    822 	}
    823 
    824 	/* Convert the count to a timespec. */
    825 	tcount = pps->capcount - pps->capth->th_offset_count;
    826 	tcount &= pps->capth->th_counter->tc_counter_mask;
    827 	bt = pps->capth->th_offset;
    828 	bintime_addx(&bt, pps->capth->th_scale * tcount);
    829 	bintime_add(&bt, &timebasebin);
    830 	bintime2timespec(&bt, &ts);
    831 
    832 	/* If the timecounter was wound up underneath us, bail out. */
    833 	if (pps->capgen != pps->capth->th_generation)
    834 		return;
    835 
    836 	*pcount = pps->capcount;
    837 	(*pseq)++;
    838 	*tsp = ts;
    839 
    840 	if (foff) {
    841 		timespecadd(tsp, osp, tsp);
    842 		if (tsp->tv_nsec < 0) {
    843 			tsp->tv_nsec += 1000000000;
    844 			tsp->tv_sec -= 1;
    845 		}
    846 	}
    847 #ifdef PPS_SYNC
    848 	if (fhard) {
    849 		u_int64_t scale;
    850 
    851 		/*
    852 		 * Feed the NTP PLL/FLL.
    853 		 * The FLL wants to know how many (hardware) nanoseconds
    854 		 * elapsed since the previous event.
    855 		 */
    856 		tcount = pps->capcount - pps->ppscount[2];
    857 		pps->ppscount[2] = pps->capcount;
    858 		tcount &= pps->capth->th_counter->tc_counter_mask;
    859 		scale = (u_int64_t)1 << 63;
    860 		scale /= pps->capth->th_counter->tc_frequency;
    861 		scale *= 2;
    862 		bt.sec = 0;
    863 		bt.frac = 0;
    864 		bintime_addx(&bt, scale * tcount);
    865 		bintime2timespec(&bt, &ts);
    866 		hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec);
    867 	}
    868 #endif
    869 }
    870 
    871 /*
    872  * Timecounters need to be updated every so often to prevent the hardware
    873  * counter from overflowing.  Updating also recalculates the cached values
    874  * used by the get*() family of functions, so their precision depends on
    875  * the update frequency.
    876  */
    877 
    878 static int tc_tick;
    879 
    880 void
    881 tc_ticktock(void)
    882 {
    883 	static int count;
    884 
    885 	if (++count < tc_tick)
    886 		return;
    887 	count = 0;
    888 	mutex_spin_enter(&tc_windup_lock);
    889 	tc_windup();
    890 	mutex_spin_exit(&tc_windup_lock);
    891 }
    892 
    893 void
    894 inittimecounter(void)
    895 {
    896 	u_int p;
    897 
    898 	mutex_init(&tc_windup_lock, MUTEX_DEFAULT, IPL_SCHED);
    899 
    900 	/*
    901 	 * Set the initial timeout to
    902 	 * max(1, <approx. number of hardclock ticks in a millisecond>).
    903 	 * People should probably not use the sysctl to set the timeout
    904 	 * to smaller than its inital value, since that value is the
    905 	 * smallest reasonable one.  If they want better timestamps they
    906 	 * should use the non-"get"* functions.
    907 	 */
    908 	if (hz > 1000)
    909 		tc_tick = (hz + 500) / 1000;
    910 	else
    911 		tc_tick = 1;
    912 	p = (tc_tick * 1000000) / hz;
    913 	aprint_verbose("timecounter: Timecounters tick every %d.%03u msec\n",
    914 	    p / 1000, p % 1000);
    915 
    916 	/* warm up new timecounter (again) and get rolling. */
    917 	(void)timecounter->tc_get_timecount(timecounter);
    918 	(void)timecounter->tc_get_timecount(timecounter);
    919 }
    920