kern_heartbeat.c revision 1.7 1 /* $NetBSD: kern_heartbeat.c,v 1.7 2023/09/02 17:44:23 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2023 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * heartbeat(9) -- periodic checks to ensure CPUs are making progress
31 *
32 * Manual tests to run when changing this file. Magic numbers are for
33 * evbarm; adjust for other platforms. Tests involving cpuctl
34 * online/offline assume a 2-CPU system -- for full testing on a >2-CPU
35 * system, offline all but one CPU.
36 *
37 * 1. cpuctl offline 0
38 * sleep 20
39 * cpuctl online 0
40 *
41 * 2. cpuctl offline 1
42 * sleep 20
43 * cpuctl online 1
44 *
45 * 3. cpuctl offline 0
46 * sysctl -w kern.heartbeat.max_period=5
47 * sleep 10
48 * sysctl -w kern.heartbeat.max_period=0
49 * sleep 10
50 * sysctl -w kern.heartbeat.max_period=5
51 * sleep 10
52 * cpuctl online 0
53 *
54 * 4. sysctl -w debug.crashme_enable=1
55 * sysctl -w debug.crashme.spl_spinout=1 # IPL_SOFTCLOCK
56 * # verify system panics after 15sec
57 *
58 * 5. sysctl -w debug.crashme_enable=1
59 * sysctl -w debug.crashme.spl_spinout=6 # IPL_SCHED
60 * # verify system panics after 15sec
61 *
62 * 6. cpuctl offline 0
63 * sysctl -w debug.crashme_enable=1
64 * sysctl -w debug.crashme.spl_spinout=1 # IPL_SOFTCLOCK
65 * # verify system panics after 15sec
66 *
67 * 7. cpuctl offline 0
68 * sysctl -w debug.crashme_enable=1
69 * sysctl -w debug.crashme.spl_spinout=5 # IPL_VM
70 * # verify system panics after 15sec
71 *
72 * # Not this -- IPL_SCHED and IPL_HIGH spinout on a single CPU
73 * # require a hardware watchdog timer.
74 * #cpuctl offline 0
75 * #sysctl -w debug.crashme_enable
76 * #sysctl -w debug.crashme.spl_spinout=6 # IPL_SCHED
77 * # hope watchdog timer kicks in
78 */
79
80 #include <sys/cdefs.h>
81 __KERNEL_RCSID(0, "$NetBSD: kern_heartbeat.c,v 1.7 2023/09/02 17:44:23 riastradh Exp $");
82
83 #ifdef _KERNEL_OPT
84 #include "opt_ddb.h"
85 #include "opt_heartbeat.h"
86 #endif
87
88 #include "heartbeat.h"
89
90 #include <sys/param.h>
91 #include <sys/types.h>
92
93 #include <sys/atomic.h>
94 #include <sys/cpu.h>
95 #include <sys/errno.h>
96 #include <sys/heartbeat.h>
97 #include <sys/ipi.h>
98 #include <sys/kernel.h>
99 #include <sys/mutex.h>
100 #include <sys/sysctl.h>
101 #include <sys/systm.h>
102 #include <sys/xcall.h>
103
104 #ifdef DDB
105 #include <ddb/ddb.h>
106 #endif
107
108 /*
109 * Global state.
110 *
111 * heartbeat_lock serializes access to heartbeat_max_period_secs
112 * and heartbeat_max_period_ticks. Two separate variables so we
113 * can avoid multiplication or division in the heartbeat routine.
114 *
115 * heartbeat_sih is stable after initialization in
116 * heartbeat_start.
117 */
118 kmutex_t heartbeat_lock __cacheline_aligned;
119 unsigned heartbeat_max_period_secs __read_mostly;
120 unsigned heartbeat_max_period_ticks __read_mostly;
121
122 void *heartbeat_sih __read_mostly;
123
124 /*
125 * heartbeat_suspend()
126 *
127 * Suspend heartbeat monitoring of the current CPU.
128 *
129 * Called after the current CPU has been marked offline but before
130 * it has stopped running, or after IPL has been raised for
131 * polling-mode console input. Caller must have preemption
132 * disabled. Non-nestable. Reversed by heartbeat_resume.
133 */
134 void
135 heartbeat_suspend(void)
136 {
137 struct cpu_info *ci = curcpu();
138 int s;
139
140 KASSERT(curcpu_stable());
141 KASSERT((ci->ci_schedstate.spc_flags & SPCF_HEARTBEATSUSPENDED) == 0);
142
143 s = splsched();
144 ci->ci_schedstate.spc_flags |= SPCF_HEARTBEATSUSPENDED;
145 splx(s);
146 }
147
148 /*
149 * heartbeat_resume_cpu(ci)
150 *
151 * Resume heartbeat monitoring of ci.
152 *
153 * Called at startup while cold, and whenever heartbeat monitoring
154 * is re-enabled after being disabled or the period is changed.
155 * When not cold, ci must be the current CPU.
156 *
157 * Must be run at splsched.
158 */
159 static void
160 heartbeat_resume_cpu(struct cpu_info *ci)
161 {
162
163 KASSERT(__predict_false(cold) || curcpu_stable());
164 KASSERT(__predict_false(cold) || ci == curcpu());
165 /* XXX KASSERT IPL_SCHED */
166
167 ci->ci_heartbeat_count = 0;
168 ci->ci_heartbeat_uptime_cache = time_uptime;
169 ci->ci_heartbeat_uptime_stamp = 0;
170 }
171
172 /*
173 * heartbeat_resume()
174 *
175 * Resume heartbeat monitoring of the current CPU.
176 *
177 * Called after the current CPU has started running but before it
178 * has been marked online, or when ending polling-mode input
179 * before IPL is restored. Caller must have preemption disabled.
180 */
181 void
182 heartbeat_resume(void)
183 {
184 struct cpu_info *ci = curcpu();
185 int s;
186
187 KASSERT(curcpu_stable());
188 KASSERT(ci->ci_schedstate.spc_flags & SPCF_HEARTBEATSUSPENDED);
189
190 /*
191 * Block heartbeats while we reset the state so we don't
192 * spuriously think we had a heart attack in the middle of
193 * resetting the count and the uptime stamp.
194 */
195 s = splsched();
196 ci->ci_schedstate.spc_flags &= ~SPCF_HEARTBEATSUSPENDED;
197 heartbeat_resume_cpu(ci);
198 splx(s);
199 }
200
201 /*
202 * heartbeat_timecounter_suspended()
203 *
204 * True if timecounter heartbeat checks are suspended because the
205 * timecounter may not be advancing, false if heartbeat checks
206 * should check for timecounter progress.
207 */
208 static bool
209 heartbeat_timecounter_suspended(void)
210 {
211 CPU_INFO_ITERATOR cii;
212 struct cpu_info *ci;
213
214 /*
215 * The timecounter ticks only on the primary CPU. Check
216 * whether it's suspended.
217 *
218 * XXX Would be nice if we could find the primary CPU without
219 * iterating over all CPUs.
220 */
221 for (CPU_INFO_FOREACH(cii, ci)) {
222 if (CPU_IS_PRIMARY(ci)) {
223 return ci->ci_schedstate.spc_flags &
224 SPCF_HEARTBEATSUSPENDED;
225 }
226 }
227
228 /*
229 * This should be unreachable -- there had better be a primary
230 * CPU in the system! If not, the timecounter will be busted
231 * anyway.
232 */
233 panic("no primary CPU");
234 }
235
236 /*
237 * heartbeat_reset_xc(a, b)
238 *
239 * Cross-call handler to reset heartbeat state just prior to
240 * enabling heartbeat checks.
241 */
242 static void
243 heartbeat_reset_xc(void *a, void *b)
244 {
245 int s;
246
247 s = splsched();
248 heartbeat_resume_cpu(curcpu());
249 splx(s);
250 }
251
252 /*
253 * set_max_period(max_period)
254 *
255 * Set the maximum period, in seconds, for heartbeat checks.
256 *
257 * - If max_period is zero, disable them.
258 *
259 * - If the max period was zero and max_period is nonzero, ensure
260 * all CPUs' heartbeat uptime caches are up-to-date before
261 * re-enabling them.
262 *
263 * max_period must be below UINT_MAX/4/hz to avoid arithmetic
264 * overflow and give room for slop.
265 *
266 * Caller must hold heartbeat_lock.
267 */
268 static void
269 set_max_period(unsigned max_period)
270 {
271
272 KASSERTMSG(max_period <= UINT_MAX/4/hz,
273 "max_period=%u must not exceed UINT_MAX/4/hz=%u (hz=%u)",
274 max_period, UINT_MAX/4/hz, hz);
275 KASSERT(mutex_owned(&heartbeat_lock));
276
277 /*
278 * If we're enabling heartbeat checks, make sure we have a
279 * reasonably up-to-date time_uptime cache on all CPUs so we
280 * don't think we had an instant heart attack.
281 */
282 if (heartbeat_max_period_secs == 0 && max_period != 0) {
283 if (cold) {
284 CPU_INFO_ITERATOR cii;
285 struct cpu_info *ci;
286
287 for (CPU_INFO_FOREACH(cii, ci))
288 heartbeat_resume_cpu(ci);
289 } else {
290 const uint64_t ticket =
291 xc_broadcast(0, &heartbeat_reset_xc, NULL, NULL);
292 xc_wait(ticket);
293 }
294 }
295
296 /*
297 * Once the heartbeat state has been updated on all (online)
298 * CPUs, set the period. At this point, heartbeat checks can
299 * begin.
300 */
301 atomic_store_relaxed(&heartbeat_max_period_secs, max_period);
302 atomic_store_relaxed(&heartbeat_max_period_ticks, max_period*hz);
303 }
304
305 /*
306 * heartbeat_max_period_ticks(SYSCTLFN_ARGS)
307 *
308 * Sysctl handler for sysctl kern.heartbeat.max_period. Verifies
309 * it lies within a reasonable interval and sets it.
310 */
311 static int
312 heartbeat_max_period_sysctl(SYSCTLFN_ARGS)
313 {
314 struct sysctlnode node;
315 unsigned max_period;
316 int error;
317
318 mutex_enter(&heartbeat_lock);
319
320 max_period = heartbeat_max_period_secs;
321 node = *rnode;
322 node.sysctl_data = &max_period;
323 error = sysctl_lookup(SYSCTLFN_CALL(&node));
324 if (error || newp == NULL)
325 goto out;
326
327 /*
328 * Ensure there's plenty of slop between heartbeats.
329 */
330 if (max_period > UINT_MAX/4/hz) {
331 error = EOVERFLOW;
332 goto out;
333 }
334
335 /*
336 * Success! Set the period. This enables heartbeat checks if
337 * we went from zero period to nonzero period, or disables them
338 * if the other way around.
339 */
340 set_max_period(max_period);
341 error = 0;
342
343 out: mutex_exit(&heartbeat_lock);
344 return error;
345 }
346
347 /*
348 * sysctl_heartbeat_setup()
349 *
350 * Set up the kern.heartbeat.* sysctl subtree.
351 */
352 SYSCTL_SETUP(sysctl_heartbeat_setup, "sysctl kern.heartbeat setup")
353 {
354 const struct sysctlnode *rnode;
355 int error;
356
357 mutex_init(&heartbeat_lock, MUTEX_DEFAULT, IPL_NONE);
358
359 /* kern.heartbeat */
360 error = sysctl_createv(NULL, 0, NULL, &rnode,
361 CTLFLAG_PERMANENT,
362 CTLTYPE_NODE, "heartbeat",
363 SYSCTL_DESCR("Kernel heartbeat parameters"),
364 NULL, 0, NULL, 0,
365 CTL_KERN, CTL_CREATE, CTL_EOL);
366 if (error) {
367 printf("%s: failed to create kern.heartbeat: %d\n",
368 __func__, error);
369 return;
370 }
371
372 /* kern.heartbeat.max_period */
373 error = sysctl_createv(NULL, 0, &rnode, NULL,
374 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
375 CTLTYPE_INT, "max_period",
376 SYSCTL_DESCR("Max seconds between heartbeats before panic"),
377 &heartbeat_max_period_sysctl, 0, NULL, 0,
378 CTL_CREATE, CTL_EOL);
379 if (error) {
380 printf("%s: failed to create kern.heartbeat.max_period: %d\n",
381 __func__, error);
382 return;
383 }
384 }
385
386 /*
387 * heartbeat_intr(cookie)
388 *
389 * Soft interrupt handler to update the local CPU's view of the
390 * system uptime. This runs at the same priority level as
391 * callouts, so if callouts are stuck on this CPU, it won't run,
392 * and eventually another CPU will notice that this one is stuck.
393 *
394 * Don't do spl* here -- keep it to a minimum so if anything goes
395 * wrong we don't end up with hard interrupts blocked and unable
396 * to detect a missed heartbeat.
397 */
398 static void
399 heartbeat_intr(void *cookie)
400 {
401 unsigned count = atomic_load_relaxed(&curcpu()->ci_heartbeat_count);
402 unsigned uptime = time_uptime;
403
404 atomic_store_relaxed(&curcpu()->ci_heartbeat_uptime_stamp, count);
405 atomic_store_relaxed(&curcpu()->ci_heartbeat_uptime_cache, uptime);
406 }
407
408 /*
409 * heartbeat_start()
410 *
411 * Start system heartbeat monitoring.
412 */
413 void
414 heartbeat_start(void)
415 {
416 const unsigned max_period = HEARTBEAT_MAX_PERIOD_DEFAULT;
417
418 /*
419 * Establish a softint so we can schedule it once ready. This
420 * should be at the lowest softint priority level so that we
421 * ensure all softint priorities are making progress.
422 */
423 heartbeat_sih = softint_establish(SOFTINT_CLOCK|SOFTINT_MPSAFE,
424 &heartbeat_intr, NULL);
425
426 /*
427 * Now that the softint is established, kick off heartbeat
428 * monitoring with the default period. This will initialize
429 * the per-CPU state to an up-to-date cache of time_uptime.
430 */
431 mutex_enter(&heartbeat_lock);
432 set_max_period(max_period);
433 mutex_exit(&heartbeat_lock);
434 }
435
436 /*
437 * defibrillator(cookie)
438 *
439 * IPI handler for defibrillation. If the CPU's heart has stopped
440 * beating normally, but the CPU can still execute things,
441 * acknowledge the IPI to the doctor and then panic so we at least
442 * get a stack trace from whatever the current CPU is stuck doing,
443 * if not a core dump.
444 *
445 * (This metaphor is a little stretched, since defibrillation is
446 * usually administered when the heart is beating errattically but
447 * hasn't stopped, and causes the heart to stop temporarily, and
448 * one hopes it is not fatal. But we're (software) engineers, so
449 * we can stretch metaphors like silly putty in a blender.)
450 */
451 static void
452 defibrillator(void *cookie)
453 {
454 bool *ack = cookie;
455
456 atomic_store_relaxed(ack, true);
457 panic("%s[%d %s]: heart stopped beating", cpu_name(curcpu()),
458 curlwp->l_lid,
459 curlwp->l_name ? curlwp->l_name : curproc->p_comm);
460 }
461
462 /*
463 * defibrillate(ci, unsigned d)
464 *
465 * The patient CPU ci's heart has stopped beating after d seconds.
466 * Force the patient CPU ci to panic, or panic on this CPU if the
467 * patient CPU doesn't respond within 1sec.
468 */
469 static void __noinline
470 defibrillate(struct cpu_info *ci, unsigned d)
471 {
472 bool ack = false;
473 ipi_msg_t msg = {
474 .func = &defibrillator,
475 .arg = &ack,
476 };
477 unsigned countdown = 1000; /* 1sec */
478
479 KASSERT(curcpu_stable());
480
481 /*
482 * First notify the console that the patient CPU's heart seems
483 * to have stopped beating.
484 */
485 printf("%s: found %s heart stopped beating after %u seconds\n",
486 cpu_name(curcpu()), cpu_name(ci), d);
487
488 /*
489 * Next, give the patient CPU a chance to panic, so we get a
490 * stack trace on that CPU even if we don't get a crash dump.
491 */
492 ipi_unicast(&msg, ci);
493
494 /*
495 * Busy-wait up to 1sec for the patient CPU to print a stack
496 * trace and panic. If the patient CPU acknowledges the IPI,
497 * or if we're panicking anyway, just give up and stop here --
498 * the system is coming down soon and we should avoid getting
499 * in the way.
500 */
501 while (countdown --> 0) {
502 if (atomic_load_relaxed(&ack) ||
503 atomic_load_relaxed(&panicstr) != NULL)
504 return;
505 DELAY(1000); /* 1ms */
506 }
507
508 /*
509 * The patient CPU failed to acknowledge the panic request.
510 * Panic now; with any luck, we'll get a crash dump.
511 */
512 panic("%s: found %s heart stopped beating and unresponsive",
513 cpu_name(curcpu()), cpu_name(ci));
514 }
515
516 /*
517 * select_patient()
518 *
519 * Select another CPU to check the heartbeat of. Returns NULL if
520 * there are no other online CPUs. Never returns curcpu().
521 * Caller must have kpreemption disabled.
522 */
523 static struct cpu_info *
524 select_patient(void)
525 {
526 CPU_INFO_ITERATOR cii;
527 struct cpu_info *first = NULL, *patient = NULL, *ci;
528 bool passedcur = false;
529
530 KASSERT(curcpu_stable());
531
532 /*
533 * In the iteration order of all CPUs, find the next online CPU
534 * after curcpu(), or the first online one if curcpu() is last
535 * in the iteration order.
536 */
537 for (CPU_INFO_FOREACH(cii, ci)) {
538 if (ci->ci_schedstate.spc_flags & SPCF_HEARTBEATSUSPENDED)
539 continue;
540 if (passedcur) {
541 /*
542 * (...|curcpu()|ci|...)
543 *
544 * Found the patient right after curcpu().
545 */
546 KASSERT(patient != ci);
547 patient = ci;
548 break;
549 }
550 if (ci == curcpu()) {
551 /*
552 * (...|prev|ci=curcpu()|next|...)
553 *
554 * Note that we want next (or first, if there's
555 * nothing after curcpu()).
556 */
557 passedcur = true;
558 continue;
559 }
560 if (first == NULL) {
561 /*
562 * (ci|...|curcpu()|...)
563 *
564 * Record ci as first in case there's nothing
565 * after curcpu().
566 */
567 first = ci;
568 continue;
569 }
570 }
571
572 /*
573 * If we hit the end, wrap around to the beginning.
574 */
575 if (patient == NULL) {
576 KASSERT(passedcur);
577 patient = first;
578 }
579
580 return patient;
581 }
582
583 /*
584 * heartbeat()
585 *
586 * 1. Count a heartbeat on the local CPU.
587 *
588 * 2. Panic if the system uptime doesn't seem to have advanced in
589 * a while.
590 *
591 * 3. Panic if the soft interrupt on this CPU hasn't advanced the
592 * local view of the system uptime.
593 *
594 * 4. Schedule the soft interrupt to advance the local view of the
595 * system uptime.
596 *
597 * 5. Select another CPU to check the heartbeat of.
598 *
599 * 6. Panic if the other CPU hasn't advanced its view of the
600 * system uptime in a while.
601 */
602 void
603 heartbeat(void)
604 {
605 unsigned period_ticks, period_secs;
606 unsigned count, uptime, cache, stamp, d;
607 struct cpu_info *patient;
608
609 KASSERT(curcpu_stable());
610
611 period_ticks = atomic_load_relaxed(&heartbeat_max_period_ticks);
612 period_secs = atomic_load_relaxed(&heartbeat_max_period_secs);
613 if (__predict_false(period_ticks == 0) ||
614 __predict_false(period_secs == 0) ||
615 __predict_false(curcpu()->ci_schedstate.spc_flags &
616 SPCF_HEARTBEATSUSPENDED))
617 return;
618
619 /*
620 * Count a heartbeat on this CPU.
621 */
622 count = curcpu()->ci_heartbeat_count++;
623
624 /*
625 * If the uptime hasn't changed, make sure that we haven't
626 * counted too many of our own heartbeats since the uptime last
627 * changed, and stop here -- we only do the cross-CPU work once
628 * per second.
629 */
630 uptime = time_uptime;
631 cache = atomic_load_relaxed(&curcpu()->ci_heartbeat_uptime_cache);
632 if (__predict_true(cache == uptime)) {
633 /*
634 * Timecounter hasn't advanced by more than a second.
635 * Make sure the timecounter isn't stuck according to
636 * our heartbeats -- unless timecounter heartbeats are
637 * suspended too.
638 *
639 * Our own heartbeat count can't roll back, and
640 * time_uptime should be updated before it wraps
641 * around, so d should never go negative; hence no
642 * check for d < UINT_MAX/2.
643 */
644 stamp =
645 atomic_load_relaxed(&curcpu()->ci_heartbeat_uptime_stamp);
646 d = count - stamp;
647 if (__predict_false(d > period_ticks) &&
648 !heartbeat_timecounter_suspended()) {
649 panic("%s: time has not advanced in %u heartbeats",
650 cpu_name(curcpu()), d);
651 }
652 return;
653 }
654
655 /*
656 * If the uptime has changed, make sure that it hasn't changed
657 * so much that softints must be stuck on this CPU. Since
658 * time_uptime is monotonic, this can't go negative, hence no
659 * check for d < UINT_MAX/2.
660 *
661 * This uses the hard timer interrupt handler on the current
662 * CPU to ensure soft interrupts at all priority levels have
663 * made progress.
664 */
665 d = uptime - cache;
666 if (__predict_false(d > period_secs)) {
667 panic("%s: softints stuck for %u seconds",
668 cpu_name(curcpu()), d);
669 }
670
671 /*
672 * Schedule a softint to update our cache of the system uptime
673 * so the next call to heartbeat, on this or another CPU, can
674 * detect progress on this one.
675 */
676 softint_schedule(heartbeat_sih);
677
678 /*
679 * Select a patient to check the heartbeat of. If there's no
680 * other online CPU, nothing to do.
681 */
682 patient = select_patient();
683 if (patient == NULL)
684 return;
685
686 /*
687 * Verify that time is advancing on the patient CPU. If the
688 * delta exceeds UINT_MAX/2, that means it is already ahead by
689 * a little on the other CPU, and the subtraction went
690 * negative, which is OK. If the CPU's heartbeats have been
691 * suspended since we selected it, no worries.
692 *
693 * This uses the current CPU to ensure the other CPU has made
694 * progress, even if the other CPU's hard timer interrupt
695 * handler is stuck for some reason.
696 *
697 * XXX Maybe confirm it hasn't gone negative by more than
698 * max_period?
699 */
700 d = uptime - atomic_load_relaxed(&patient->ci_heartbeat_uptime_cache);
701 if (__predict_false(d > period_secs) &&
702 __predict_false(d < UINT_MAX/2) &&
703 ((patient->ci_schedstate.spc_flags & SPCF_HEARTBEATSUSPENDED)
704 == 0))
705 defibrillate(patient, d);
706 }
707
708 /*
709 * heartbeat_dump()
710 *
711 * Print the heartbeat data of all CPUs. Can be called from ddb.
712 */
713 #ifdef DDB
714 static unsigned
715 db_read_unsigned(const volatile unsigned *p)
716 {
717 unsigned x;
718
719 db_read_bytes((db_addr_t)(uintptr_t)p, sizeof(x), (char *)&x);
720
721 return x;
722 }
723
724 static int
725 db_read_signed(const volatile int *p)
726 {
727 int x;
728
729 db_read_bytes((db_addr_t)(uintptr_t)p, sizeof(x), (char *)&x);
730
731 return x;
732 }
733
734 void
735 heartbeat_dump(void)
736 {
737 struct cpu_info *ci;
738
739 db_printf("Heartbeats:\n");
740 for (ci = db_cpu_first(); ci != NULL; ci = db_cpu_next(ci)) {
741 db_printf("cpu%u: count %u uptime %u stamp %u%s\n",
742 db_read_unsigned(&ci->ci_index),
743 db_read_unsigned(&ci->ci_heartbeat_count),
744 db_read_unsigned(&ci->ci_heartbeat_uptime_cache),
745 db_read_unsigned(&ci->ci_heartbeat_uptime_stamp),
746 (db_read_signed(&ci->ci_schedstate.spc_flags) &
747 SPCF_HEARTBEATSUSPENDED ? " (suspended)" : ""));
748 }
749 }
750 #endif
751