Home | History | Annotate | Line # | Download | only in kern
kern_heartbeat.c revision 1.1
      1 /*	$NetBSD: kern_heartbeat.c,v 1.1 2023/07/07 12:34:50 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2023 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * heartbeat(9) -- periodic checks to ensure CPUs are making progress
     31  *
     32  * Manual tests to run when changing this file.  Magic numbers are for
     33  * evbarm; adjust for other platforms.  Tests involving cpuctl
     34  * online/offline assume a 2-CPU system -- for full testing on a >2-CPU
     35  * system, offline all but one CPU.
     36  *
     37  * 1.	cpuctl offline 0
     38  *	sleep 20
     39  *	cpuctl online 0
     40  *
     41  * 2.	cpuctl offline 1
     42  *	sleep 20
     43  *	cpuctl online 1
     44  *
     45  * 3.	cpuctl offline 0
     46  *	sysctl -w kern.heartbeat.max_period=5
     47  *	sleep 10
     48  *	sysctl -w kern.heartbeat.max_period=0
     49  *	sleep 10
     50  *	sysctl -w kern.heartbeat.max_period=5
     51  *	sleep 10
     52  *	cpuctl online 0
     53  *
     54  * 4.	sysctl -w debug.crashme_enable=1
     55  *	sysctl -w debug.crashme.spl_spinout=1   # IPL_SOFTCLOCK
     56  *	# verify system panics after 15sec
     57  *
     58  * 5.	sysctl -w debug.crashme_enable=1
     59  *	sysctl -w debug.crashme.spl_spinout=6   # IPL_SCHED
     60  *	# verify system panics after 15sec
     61  *
     62  * 6.	cpuctl offline 0
     63  *	sysctl -w debug.crashme_enable=1
     64  *	sysctl -w debug.crashme.spl_spinout=1   # IPL_SOFTCLOCK
     65  *	# verify system panics after 15sec
     66  *
     67  * 7.	cpuctl offline 0
     68  *	sysctl -w debug.crashme_enable=1
     69  *	sysctl -w debug.crashme.spl_spinout=5   # IPL_VM
     70  *	# verify system panics after 15sec
     71  *
     72  *	# Not this -- IPL_SCHED and IPL_HIGH spinout on a single CPU
     73  *	# require a hardware watchdog timer.
     74  *	#cpuctl offline 0
     75  *	#sysctl -w debug.crashme_enable
     76  *	#sysctl -w debug.crashme.spl_spinout=6   # IPL_SCHED
     77  *	# hope watchdog timer kicks in
     78  */
     79 
     80 #include <sys/cdefs.h>
     81 __KERNEL_RCSID(0, "$NetBSD: kern_heartbeat.c,v 1.1 2023/07/07 12:34:50 riastradh Exp $");
     82 
     83 #ifdef _KERNEL_OPT
     84 #include "opt_ddb.h"
     85 #include "opt_heartbeat.h"
     86 #endif
     87 
     88 #include "heartbeat.h"
     89 
     90 #include <sys/param.h>
     91 #include <sys/types.h>
     92 
     93 #include <sys/atomic.h>
     94 #include <sys/cpu.h>
     95 #include <sys/errno.h>
     96 #include <sys/heartbeat.h>
     97 #include <sys/ipi.h>
     98 #include <sys/mutex.h>
     99 #include <sys/sysctl.h>
    100 #include <sys/systm.h>
    101 #include <sys/xcall.h>
    102 
    103 #ifdef DDB
    104 #include <ddb/ddb.h>
    105 #endif
    106 
    107 /*
    108  * Global state.
    109  *
    110  *	heartbeat_lock serializes access to heartbeat_max_period_secs
    111  *	and heartbeat_max_period_ticks.  Two separate variables so we
    112  *	can avoid multiplication or division in the heartbeat routine.
    113  *
    114  *	heartbeat_sih is stable after initialization in
    115  *	heartbeat_start.
    116  */
    117 kmutex_t heartbeat_lock			__cacheline_aligned;
    118 unsigned heartbeat_max_period_secs	__read_mostly;
    119 unsigned heartbeat_max_period_ticks	__read_mostly;
    120 
    121 void *heartbeat_sih			__read_mostly;
    122 
    123 /*
    124  * heartbeat_suspend()
    125  *
    126  *	Suspend heartbeat monitoring of the current CPU.
    127  *
    128  *	Called after the current CPU has been marked offline but before
    129  *	it has stopped running.  Caller must have preemption disabled.
    130  */
    131 void
    132 heartbeat_suspend(void)
    133 {
    134 
    135 	KASSERT(kpreempt_disabled());
    136 
    137 	/*
    138 	 * Nothing to do -- we just check the SPCF_OFFLINE flag.
    139 	 */
    140 }
    141 
    142 /*
    143  * heartbeat_resume()
    144  *
    145  *	Resume heartbeat monitoring of the current CPU.
    146  *
    147  *	Called after the current CPU has started running but before it
    148  *	has been marked online.  Also used internally when starting up
    149  *	heartbeat monitoring at boot or when the maximum period is set
    150  *	from zero to nonzero.  Caller must have preemption disabled.
    151  */
    152 void
    153 heartbeat_resume(void)
    154 {
    155 	struct cpu_info *ci = curcpu();
    156 	int s;
    157 
    158 	KASSERT(kpreempt_disabled());
    159 
    160 	/*
    161 	 * Block heartbeats while we reset the state so we don't
    162 	 * spuriously think we had a heart attack in the middle of
    163 	 * resetting the count and the uptime stamp.
    164 	 */
    165 	s = splsched();
    166 	ci->ci_heartbeat_count = 0;
    167 	ci->ci_heartbeat_uptime_cache = atomic_load_relaxed(&time_uptime);
    168 	ci->ci_heartbeat_uptime_stamp = 0;
    169 	splx(s);
    170 }
    171 
    172 /*
    173  * heartbeat_reset_xc(a, b)
    174  *
    175  *	Cross-call handler to reset heartbeat state just prior to
    176  *	enabling heartbeat checks.
    177  */
    178 static void
    179 heartbeat_reset_xc(void *a, void *b)
    180 {
    181 
    182 	heartbeat_resume();
    183 }
    184 
    185 /*
    186  * set_max_period(max_period)
    187  *
    188  *	Set the maximum period, in seconds, for heartbeat checks.
    189  *
    190  *	- If max_period is zero, disable them.
    191  *
    192  *	- If the max period was zero and max_period is nonzero, ensure
    193  *	  all CPUs' heartbeat uptime caches are up-to-date before
    194  *	  re-enabling them.
    195  *
    196  *	max_period must be below UINT_MAX/4/hz to avoid arithmetic
    197  *	overflow and give room for slop.
    198  *
    199  *	Caller must hold heartbeat_lock.
    200  */
    201 static void
    202 set_max_period(unsigned max_period)
    203 {
    204 
    205 	KASSERTMSG(max_period <= UINT_MAX/4/hz,
    206 	    "max_period=%u must not exceed UINT_MAX/4/hz=%u (hz=%u)",
    207 	    max_period, UINT_MAX/4/hz, hz);
    208 	KASSERT(mutex_owned(&heartbeat_lock));
    209 
    210 	/*
    211 	 * If we're enabling heartbeat checks, make sure we have a
    212 	 * reasonably up-to-date time_uptime cache on all CPUs so we
    213 	 * don't think we had an instant heart attack.
    214 	 */
    215 	if (heartbeat_max_period_secs == 0 && max_period != 0)
    216 		xc_wait(xc_broadcast(0, &heartbeat_reset_xc, NULL, NULL));
    217 
    218 	/*
    219 	 * Once the heartbeat state has been updated on all (online)
    220 	 * CPUs, set the period.  At this point, heartbeat checks can
    221 	 * begin.
    222 	 */
    223 	atomic_store_relaxed(&heartbeat_max_period_secs, max_period);
    224 	atomic_store_relaxed(&heartbeat_max_period_ticks, max_period*hz);
    225 }
    226 
    227 /*
    228  * heartbeat_max_period_ticks(SYSCTLFN_ARGS)
    229  *
    230  *	Sysctl handler for sysctl kern.heartbeat.max_period.  Verifies
    231  *	it lies within a reasonable interval and sets it.
    232  */
    233 static int
    234 heartbeat_max_period_sysctl(SYSCTLFN_ARGS)
    235 {
    236 	struct sysctlnode node;
    237 	unsigned max_period;
    238 	int error;
    239 
    240 	mutex_enter(&heartbeat_lock);
    241 
    242 	max_period = heartbeat_max_period_secs;
    243 	node = *rnode;
    244 	node.sysctl_data = &max_period;
    245 	error = sysctl_lookup(SYSCTLFN_CALL(&node));
    246 	if (error || newp == NULL)
    247 		goto out;
    248 
    249 	/*
    250 	 * Ensure there's plenty of slop between heartbeats.
    251 	 */
    252 	if (max_period > UINT_MAX/4/hz) {
    253 		error = EOVERFLOW;
    254 		goto out;
    255 	}
    256 
    257 	/*
    258 	 * Success!  Set the period.  This enables heartbeat checks if
    259 	 * we went from zero period to nonzero period, or disables them
    260 	 * if the other way around.
    261 	 */
    262 	set_max_period(max_period);
    263 	error = 0;
    264 
    265 out:	mutex_exit(&heartbeat_lock);
    266 	return error;
    267 }
    268 
    269 /*
    270  * sysctl_heartbeat_setup()
    271  *
    272  *	Set up the kern.heartbeat.* sysctl subtree.
    273  */
    274 SYSCTL_SETUP(sysctl_heartbeat_setup, "sysctl kern.heartbeat setup")
    275 {
    276 	const struct sysctlnode *rnode;
    277 	int error;
    278 
    279 	mutex_init(&heartbeat_lock, MUTEX_DEFAULT, IPL_NONE);
    280 
    281 	/* kern.heartbeat */
    282 	error = sysctl_createv(NULL, 0, NULL, &rnode,
    283 	    CTLFLAG_PERMANENT,
    284 	    CTLTYPE_NODE, "heartbeat",
    285 	    SYSCTL_DESCR("Kernel heartbeat parameters"),
    286 	    NULL, 0, NULL, 0,
    287 	    CTL_KERN, CTL_CREATE, CTL_EOL);
    288 	if (error) {
    289 		printf("%s: failed to create kern.heartbeat: %d\n",
    290 		    __func__, error);
    291 		return;
    292 	}
    293 
    294 	/* kern.heartbeat.max_period */
    295 	error = sysctl_createv(NULL, 0, &rnode, NULL,
    296 	    CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
    297 	    CTLTYPE_INT, "max_period",
    298 	    SYSCTL_DESCR("Max seconds between heartbeats before panic"),
    299 	    &heartbeat_max_period_sysctl, 0, NULL, 0,
    300 	    CTL_CREATE, CTL_EOL);
    301 	if (error) {
    302 		printf("%s: failed to create kern.heartbeat.max_period: %d\n",
    303 		    __func__, error);
    304 		return;
    305 	}
    306 }
    307 
    308 /*
    309  * heartbeat_intr(cookie)
    310  *
    311  *	Soft interrupt handler to update the local CPU's view of the
    312  *	system uptime.  This runs at the same priority level as
    313  *	callouts, so if callouts are stuck on this CPU, it won't run,
    314  *	and eventually another CPU will notice that this one is stuck.
    315  *
    316  *	Don't do spl* here -- keep it to a minimum so if anything goes
    317  *	wrong we don't end up with hard interrupts blocked and unable
    318  *	to detect a missed heartbeat.
    319  */
    320 static void
    321 heartbeat_intr(void *cookie)
    322 {
    323 	unsigned count = atomic_load_relaxed(&curcpu()->ci_heartbeat_count);
    324 	unsigned uptime = atomic_load_relaxed(&time_uptime);
    325 
    326 	atomic_store_relaxed(&curcpu()->ci_heartbeat_uptime_stamp, count);
    327 	atomic_store_relaxed(&curcpu()->ci_heartbeat_uptime_cache, uptime);
    328 }
    329 
    330 /*
    331  * heartbeat_start()
    332  *
    333  *	Start system heartbeat monitoring.
    334  */
    335 void
    336 heartbeat_start(void)
    337 {
    338 	const unsigned max_period = HEARTBEAT_MAX_PERIOD_DEFAULT;
    339 
    340 	/*
    341 	 * Establish a softint so we can schedule it once ready.  This
    342 	 * should be at the lowest softint priority level so that we
    343 	 * ensure all softint priorities are making progress.
    344 	 */
    345 	heartbeat_sih = softint_establish(SOFTINT_CLOCK|SOFTINT_MPSAFE,
    346 	    &heartbeat_intr, NULL);
    347 
    348 	/*
    349 	 * Now that the softint is established, kick off heartbeat
    350 	 * monitoring with the default period.  This will initialize
    351 	 * the per-CPU state to an up-to-date cache of time_uptime.
    352 	 */
    353 	mutex_enter(&heartbeat_lock);
    354 	set_max_period(max_period);
    355 	mutex_exit(&heartbeat_lock);
    356 }
    357 
    358 /*
    359  * defibrillator(cookie)
    360  *
    361  *	IPI handler for defibrillation.  If the CPU's heart has stopped
    362  *	beating normally, but the CPU can still execute things,
    363  *	acknowledge the IPI to the doctor and then panic so we at least
    364  *	get a stack trace from whatever the current CPU is stuck doing,
    365  *	if not a core dump.
    366  *
    367  *	(This metaphor is a little stretched, since defibrillation is
    368  *	usually administered when the heart is beating errattically but
    369  *	hasn't stopped, and causes the heart to stop temporarily, and
    370  *	one hopes it is not fatal.  But we're (software) engineers, so
    371  *	we can stretch metaphors like silly putty in a blender.)
    372  */
    373 static void
    374 defibrillator(void *cookie)
    375 {
    376 	bool *ack = cookie;
    377 
    378 	atomic_store_relaxed(ack, true);
    379 	panic("%s[%d %s]: heart stopped beating", cpu_name(curcpu()),
    380 	    curlwp->l_lid,
    381 	    curlwp->l_name ? curlwp->l_name : curproc->p_comm);
    382 }
    383 
    384 /*
    385  * defibrillate(ci, unsigned d)
    386  *
    387  *	The patient CPU ci's heart has stopped beating after d seconds.
    388  *	Force the patient CPU ci to panic, or panic on this CPU if the
    389  *	patient CPU doesn't respond within 1sec.
    390  */
    391 static void __noinline
    392 defibrillate(struct cpu_info *ci, unsigned d)
    393 {
    394 	bool ack = false;
    395 	ipi_msg_t msg = {
    396 		.func = &defibrillator,
    397 		.arg = &ack,
    398 	};
    399 	unsigned countdown = 1000; /* 1sec */
    400 
    401 	KASSERT(kpreempt_disabled());
    402 
    403 	/*
    404 	 * First notify the console that the patient CPU's heart seems
    405 	 * to have stopped beating.
    406 	 */
    407 	printf("%s: found %s heart stopped beating after %u seconds\n",
    408 	    cpu_name(curcpu()), cpu_name(ci), d);
    409 
    410 	/*
    411 	 * Next, give the patient CPU a chance to panic, so we get a
    412 	 * stack trace on that CPU even if we don't get a crash dump.
    413 	 */
    414 	ipi_unicast(&msg, ci);
    415 
    416 	/*
    417 	 * Busy-wait up to 1sec for the patient CPU to print a stack
    418 	 * trace and panic.  If the patient CPU acknowledges the IPI,
    419 	 * or if we're panicking anyway, just give up and stop here --
    420 	 * the system is coming down soon and we should avoid getting
    421 	 * in the way.
    422 	 */
    423 	while (countdown --> 0) {
    424 		if (atomic_load_relaxed(&ack) ||
    425 		    atomic_load_relaxed(&panicstr) != NULL)
    426 			return;
    427 		DELAY(1000);	/* 1ms */
    428 	}
    429 
    430 	/*
    431 	 * The patient CPU failed to acknowledge the panic request.
    432 	 * Panic now; with any luck, we'll get a crash dump.
    433 	 */
    434 	panic("%s: found %s heart stopped beating and unresponsive",
    435 	    cpu_name(curcpu()), cpu_name(ci));
    436 }
    437 
    438 /*
    439  * select_patient()
    440  *
    441  *	Select another CPU to check the heartbeat of.  Returns NULL if
    442  *	there are no other online CPUs.  Never returns curcpu().
    443  *	Caller must have kpreemption disabled.
    444  */
    445 static struct cpu_info *
    446 select_patient(void)
    447 {
    448 	CPU_INFO_ITERATOR cii;
    449 	struct cpu_info *first = NULL, *patient = NULL, *ci;
    450 	bool passedcur = false;
    451 
    452 	KASSERT(kpreempt_disabled());
    453 
    454 	/*
    455 	 * In the iteration order of all CPUs, find the next online CPU
    456 	 * after curcpu(), or the first online one if curcpu() is last
    457 	 * in the iteration order.
    458 	 */
    459 	for (CPU_INFO_FOREACH(cii, ci)) {
    460 		if (ci->ci_schedstate.spc_flags & SPCF_OFFLINE)
    461 			continue;
    462 		if (passedcur) {
    463 			/*
    464 			 * (...|curcpu()|ci|...)
    465 			 *
    466 			 * Found the patient right after curcpu().
    467 			 */
    468 			KASSERT(patient != ci);
    469 			patient = ci;
    470 			break;
    471 		}
    472 		if (ci == curcpu()) {
    473 			/*
    474 			 * (...|prev|ci=curcpu()|next|...)
    475 			 *
    476 			 * Note that we want next (or first, if there's
    477 			 * nothing after curcpu()).
    478 			 */
    479 			passedcur = true;
    480 			continue;
    481 		}
    482 		if (first == NULL) {
    483 			/*
    484 			 * (ci|...|curcpu()|...)
    485 			 *
    486 			 * Record ci as first in case there's nothing
    487 			 * after curcpu().
    488 			 */
    489 			first = ci;
    490 			continue;
    491 		}
    492 	}
    493 
    494 	/*
    495 	 * If we hit the end, wrap around to the beginning.
    496 	 */
    497 	if (patient == NULL) {
    498 		KASSERT(passedcur);
    499 		patient = first;
    500 	}
    501 
    502 	return patient;
    503 }
    504 
    505 /*
    506  * heartbeat()
    507  *
    508  *	1. Count a heartbeat on the local CPU.
    509  *
    510  *	2. Panic if the system uptime doesn't seem to have advanced in
    511  *	   a while.
    512  *
    513  *	3. Panic if the soft interrupt on this CPU hasn't advanced the
    514  *	   local view of the system uptime.
    515  *
    516  *	4. Schedule the soft interrupt to advance the local view of the
    517  *	   system uptime.
    518  *
    519  *	5. Select another CPU to check the heartbeat of.
    520  *
    521  *	6. Panic if the other CPU hasn't advanced its view of the
    522  *	   system uptime in a while.
    523  */
    524 void
    525 heartbeat(void)
    526 {
    527 	unsigned period_ticks, period_secs;
    528 	unsigned count, uptime, cache, stamp, d;
    529 	struct cpu_info *patient;
    530 
    531 	KASSERT(kpreempt_disabled());
    532 
    533 	period_ticks = atomic_load_relaxed(&heartbeat_max_period_ticks);
    534 	period_secs = atomic_load_relaxed(&heartbeat_max_period_secs);
    535 	if (__predict_false(period_ticks == 0) ||
    536 	    __predict_false(period_secs == 0) ||
    537 	    __predict_false(curcpu()->ci_schedstate.spc_flags & SPCF_OFFLINE))
    538 		return;
    539 
    540 	/*
    541 	 * Count a heartbeat on this CPU.
    542 	 */
    543 	count = curcpu()->ci_heartbeat_count++;
    544 
    545 	/*
    546 	 * If the uptime hasn't changed, make sure that we haven't
    547 	 * counted too many of our own heartbeats since the uptime last
    548 	 * changed, and stop here -- we only do the cross-CPU work once
    549 	 * per second.
    550 	 */
    551 	uptime = atomic_load_relaxed(&time_uptime);
    552 	cache = atomic_load_relaxed(&curcpu()->ci_heartbeat_uptime_cache);
    553 	if (__predict_true(cache == uptime)) {
    554 		/*
    555 		 * Timecounter hasn't advanced by more than a second.
    556 		 * Make sure the timecounter isn't stuck according to
    557 		 * our heartbeats.
    558 		 *
    559 		 * Our own heartbeat count can't roll back, and
    560 		 * time_uptime should be updated before it wraps
    561 		 * around, so d should never go negative; hence no
    562 		 * check for d < UINT_MAX/2.
    563 		 */
    564 		stamp =
    565 		    atomic_load_relaxed(&curcpu()->ci_heartbeat_uptime_stamp);
    566 		d = count - stamp;
    567 		if (__predict_false(d > period_ticks)) {
    568 			panic("%s: time has not advanced in %u heartbeats",
    569 			    cpu_name(curcpu()), d);
    570 		}
    571 		return;
    572 	}
    573 
    574 	/*
    575 	 * If the uptime has changed, make sure that it hasn't changed
    576 	 * so much that softints must be stuck on this CPU.  Since
    577 	 * time_uptime is monotonic, this can't go negative, hence no
    578 	 * check for d < UINT_MAX/2.
    579 	 *
    580 	 * This uses the hard timer interrupt handler on the current
    581 	 * CPU to ensure soft interrupts at all priority levels have
    582 	 * made progress.
    583 	 */
    584 	d = uptime - cache;
    585 	if (__predict_false(d > period_secs)) {
    586 		panic("%s: softints stuck for %u seconds",
    587 		    cpu_name(curcpu()), d);
    588 	}
    589 
    590 	/*
    591 	 * Schedule a softint to update our cache of the system uptime
    592 	 * so the next call to heartbeat, on this or another CPU, can
    593 	 * detect progress on this one.
    594 	 */
    595 	softint_schedule(heartbeat_sih);
    596 
    597 	/*
    598 	 * Select a patient to check the heartbeat of.  If there's no
    599 	 * other online CPU, nothing to do.
    600 	 */
    601 	patient = select_patient();
    602 	if (patient == NULL)
    603 		return;
    604 
    605 	/*
    606 	 * Verify that time is advancing on the patient CPU.  If the
    607 	 * delta exceeds UINT_MAX/2, that means it is already ahead by
    608 	 * a little on the other CPU, and the subtraction went
    609 	 * negative, which is OK.  If the CPU has been
    610 	 * offlined since we selected it, no worries.
    611 	 *
    612 	 * This uses the current CPU to ensure the other CPU has made
    613 	 * progress, even if the other CPU's hard timer interrupt
    614 	 * handler is stuck for some reason.
    615 	 *
    616 	 * XXX Maybe confirm it hasn't gone negative by more than
    617 	 * max_period?
    618 	 */
    619 	d = uptime - atomic_load_relaxed(&patient->ci_heartbeat_uptime_cache);
    620 	if (__predict_false(d > period_secs) &&
    621 	    __predict_false(d < UINT_MAX/2) &&
    622 	    ((patient->ci_schedstate.spc_flags & SPCF_OFFLINE) == 0))
    623 		defibrillate(patient, d);
    624 }
    625 
    626 /*
    627  * heartbeat_dump()
    628  *
    629  *	Print the heartbeat data of all CPUs.  Can be called from ddb.
    630  */
    631 #ifdef DDB
    632 static unsigned
    633 db_read_unsigned(const unsigned *p)
    634 {
    635 	unsigned x;
    636 
    637 	db_read_bytes((db_addr_t)p, sizeof(x), (char *)&x);
    638 
    639 	return x;
    640 }
    641 
    642 void
    643 heartbeat_dump(void)
    644 {
    645 	struct cpu_info *ci;
    646 
    647 	db_printf("Heartbeats:\n");
    648 	for (ci = db_cpu_first(); ci != NULL; ci = db_cpu_next(ci)) {
    649 		db_printf("cpu%u: count %u uptime %u stamp %u\n",
    650 		    db_read_unsigned(&ci->ci_index),
    651 		    db_read_unsigned(&ci->ci_heartbeat_count),
    652 		    db_read_unsigned(&ci->ci_heartbeat_uptime_cache),
    653 		    db_read_unsigned(&ci->ci_heartbeat_uptime_stamp));
    654 	}
    655 }
    656 #endif
    657