1 1.14 riastrad /* $NetBSD: kern_heartbeat.c,v 1.14 2024/08/25 01:14:01 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad /*- 4 1.1 riastrad * Copyright (c) 2023 The NetBSD Foundation, Inc. 5 1.1 riastrad * All rights reserved. 6 1.1 riastrad * 7 1.1 riastrad * Redistribution and use in source and binary forms, with or without 8 1.1 riastrad * modification, are permitted provided that the following conditions 9 1.1 riastrad * are met: 10 1.1 riastrad * 1. Redistributions of source code must retain the above copyright 11 1.1 riastrad * notice, this list of conditions and the following disclaimer. 12 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright 13 1.1 riastrad * notice, this list of conditions and the following disclaimer in the 14 1.1 riastrad * documentation and/or other materials provided with the distribution. 15 1.1 riastrad * 16 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE. 27 1.1 riastrad */ 28 1.1 riastrad 29 1.1 riastrad /* 30 1.1 riastrad * heartbeat(9) -- periodic checks to ensure CPUs are making progress 31 1.1 riastrad * 32 1.1 riastrad * Manual tests to run when changing this file. Magic numbers are for 33 1.1 riastrad * evbarm; adjust for other platforms. Tests involving cpuctl 34 1.1 riastrad * online/offline assume a 2-CPU system -- for full testing on a >2-CPU 35 1.1 riastrad * system, offline all but one CPU. 36 1.1 riastrad * 37 1.1 riastrad * 1. cpuctl offline 0 38 1.1 riastrad * sleep 20 39 1.1 riastrad * cpuctl online 0 40 1.1 riastrad * 41 1.1 riastrad * 2. cpuctl offline 1 42 1.1 riastrad * sleep 20 43 1.1 riastrad * cpuctl online 1 44 1.1 riastrad * 45 1.1 riastrad * 3. cpuctl offline 0 46 1.1 riastrad * sysctl -w kern.heartbeat.max_period=5 47 1.1 riastrad * sleep 10 48 1.1 riastrad * sysctl -w kern.heartbeat.max_period=0 49 1.1 riastrad * sleep 10 50 1.1 riastrad * sysctl -w kern.heartbeat.max_period=5 51 1.1 riastrad * sleep 10 52 1.1 riastrad * cpuctl online 0 53 1.1 riastrad * 54 1.1 riastrad * 4. sysctl -w debug.crashme_enable=1 55 1.1 riastrad * sysctl -w debug.crashme.spl_spinout=1 # IPL_SOFTCLOCK 56 1.8 riastrad * # verify system panics after 15sec, with a stack trace through 57 1.8 riastrad * # crashme_spl_spinout 58 1.1 riastrad * 59 1.1 riastrad * 5. sysctl -w debug.crashme_enable=1 60 1.1 riastrad * sysctl -w debug.crashme.spl_spinout=6 # IPL_SCHED 61 1.8 riastrad * # verify system panics after 15sec, with a stack trace through 62 1.8 riastrad * # crashme_spl_spinout 63 1.1 riastrad * 64 1.1 riastrad * 6. cpuctl offline 0 65 1.1 riastrad * sysctl -w debug.crashme_enable=1 66 1.1 riastrad * sysctl -w debug.crashme.spl_spinout=1 # IPL_SOFTCLOCK 67 1.8 riastrad * # verify system panics after 15sec, with a stack trace through 68 1.8 riastrad * # crashme_spl_spinout 69 1.1 riastrad * 70 1.1 riastrad * 7. cpuctl offline 0 71 1.1 riastrad * sysctl -w debug.crashme_enable=1 72 1.1 riastrad * sysctl -w debug.crashme.spl_spinout=5 # IPL_VM 73 1.8 riastrad * # verify system panics after 15sec, with a stack trace through 74 1.8 riastrad * # crashme_spl_spinout 75 1.1 riastrad * 76 1.1 riastrad * # Not this -- IPL_SCHED and IPL_HIGH spinout on a single CPU 77 1.1 riastrad * # require a hardware watchdog timer. 78 1.1 riastrad * #cpuctl offline 0 79 1.1 riastrad * #sysctl -w debug.crashme_enable 80 1.1 riastrad * #sysctl -w debug.crashme.spl_spinout=6 # IPL_SCHED 81 1.1 riastrad * # hope watchdog timer kicks in 82 1.1 riastrad */ 83 1.1 riastrad 84 1.1 riastrad #include <sys/cdefs.h> 85 1.14 riastrad __KERNEL_RCSID(0, "$NetBSD: kern_heartbeat.c,v 1.14 2024/08/25 01:14:01 riastradh Exp $"); 86 1.1 riastrad 87 1.1 riastrad #ifdef _KERNEL_OPT 88 1.1 riastrad #include "opt_ddb.h" 89 1.1 riastrad #include "opt_heartbeat.h" 90 1.1 riastrad #endif 91 1.1 riastrad 92 1.1 riastrad #include "heartbeat.h" 93 1.1 riastrad 94 1.1 riastrad #include <sys/param.h> 95 1.1 riastrad #include <sys/types.h> 96 1.1 riastrad 97 1.1 riastrad #include <sys/atomic.h> 98 1.1 riastrad #include <sys/cpu.h> 99 1.1 riastrad #include <sys/errno.h> 100 1.1 riastrad #include <sys/heartbeat.h> 101 1.1 riastrad #include <sys/ipi.h> 102 1.4 riastrad #include <sys/kernel.h> 103 1.1 riastrad #include <sys/mutex.h> 104 1.1 riastrad #include <sys/sysctl.h> 105 1.1 riastrad #include <sys/systm.h> 106 1.1 riastrad #include <sys/xcall.h> 107 1.1 riastrad 108 1.1 riastrad #ifdef DDB 109 1.1 riastrad #include <ddb/ddb.h> 110 1.1 riastrad #endif 111 1.1 riastrad 112 1.1 riastrad /* 113 1.1 riastrad * Global state. 114 1.1 riastrad * 115 1.1 riastrad * heartbeat_lock serializes access to heartbeat_max_period_secs 116 1.1 riastrad * and heartbeat_max_period_ticks. Two separate variables so we 117 1.1 riastrad * can avoid multiplication or division in the heartbeat routine. 118 1.1 riastrad * 119 1.1 riastrad * heartbeat_sih is stable after initialization in 120 1.1 riastrad * heartbeat_start. 121 1.1 riastrad */ 122 1.1 riastrad kmutex_t heartbeat_lock __cacheline_aligned; 123 1.1 riastrad unsigned heartbeat_max_period_secs __read_mostly; 124 1.1 riastrad unsigned heartbeat_max_period_ticks __read_mostly; 125 1.1 riastrad 126 1.1 riastrad void *heartbeat_sih __read_mostly; 127 1.1 riastrad 128 1.1 riastrad /* 129 1.1 riastrad * heartbeat_suspend() 130 1.1 riastrad * 131 1.1 riastrad * Suspend heartbeat monitoring of the current CPU. 132 1.1 riastrad * 133 1.1 riastrad * Called after the current CPU has been marked offline but before 134 1.6 riastrad * it has stopped running, or after IPL has been raised for 135 1.12 riastrad * polling-mode console input. Nestable (but only 2^32 times, so 136 1.12 riastrad * don't do this in a loop). Reversed by heartbeat_resume. 137 1.11 riastrad * 138 1.11 riastrad * Caller must be bound to the CPU, i.e., curcpu_stable() must be 139 1.11 riastrad * true. This function does not assert curcpu_stable() since it 140 1.11 riastrad * is used in the ddb entry path, where any assertions risk 141 1.11 riastrad * infinite regress into undebuggable chaos, so callers must be 142 1.11 riastrad * careful. 143 1.1 riastrad */ 144 1.1 riastrad void 145 1.1 riastrad heartbeat_suspend(void) 146 1.1 riastrad { 147 1.10 riastrad unsigned *p; 148 1.1 riastrad 149 1.10 riastrad p = &curcpu()->ci_heartbeat_suspend; 150 1.10 riastrad atomic_store_relaxed(p, *p + 1); 151 1.1 riastrad } 152 1.1 riastrad 153 1.1 riastrad /* 154 1.4 riastrad * heartbeat_resume_cpu(ci) 155 1.4 riastrad * 156 1.4 riastrad * Resume heartbeat monitoring of ci. 157 1.4 riastrad * 158 1.4 riastrad * Called at startup while cold, and whenever heartbeat monitoring 159 1.4 riastrad * is re-enabled after being disabled or the period is changed. 160 1.4 riastrad * When not cold, ci must be the current CPU. 161 1.6 riastrad * 162 1.6 riastrad * Must be run at splsched. 163 1.4 riastrad */ 164 1.4 riastrad static void 165 1.4 riastrad heartbeat_resume_cpu(struct cpu_info *ci) 166 1.4 riastrad { 167 1.4 riastrad 168 1.4 riastrad KASSERT(__predict_false(cold) || curcpu_stable()); 169 1.4 riastrad KASSERT(__predict_false(cold) || ci == curcpu()); 170 1.6 riastrad /* XXX KASSERT IPL_SCHED */ 171 1.4 riastrad 172 1.4 riastrad ci->ci_heartbeat_count = 0; 173 1.14 riastrad ci->ci_heartbeat_uptime_cache = time_uptime32; 174 1.4 riastrad ci->ci_heartbeat_uptime_stamp = 0; 175 1.4 riastrad } 176 1.4 riastrad 177 1.4 riastrad /* 178 1.1 riastrad * heartbeat_resume() 179 1.1 riastrad * 180 1.1 riastrad * Resume heartbeat monitoring of the current CPU. 181 1.1 riastrad * 182 1.1 riastrad * Called after the current CPU has started running but before it 183 1.6 riastrad * has been marked online, or when ending polling-mode input 184 1.10 riastrad * before IPL is restored. Reverses heartbeat_suspend. 185 1.11 riastrad * 186 1.11 riastrad * Caller must be bound to the CPU, i.e., curcpu_stable() must be 187 1.11 riastrad * true. 188 1.1 riastrad */ 189 1.1 riastrad void 190 1.1 riastrad heartbeat_resume(void) 191 1.1 riastrad { 192 1.1 riastrad struct cpu_info *ci = curcpu(); 193 1.10 riastrad unsigned *p; 194 1.1 riastrad int s; 195 1.1 riastrad 196 1.11 riastrad KASSERT(curcpu_stable()); 197 1.11 riastrad 198 1.1 riastrad /* 199 1.10 riastrad * Reset the state so nobody spuriously thinks we had a heart 200 1.10 riastrad * attack as soon as the heartbeat checks resume. 201 1.1 riastrad */ 202 1.1 riastrad s = splsched(); 203 1.4 riastrad heartbeat_resume_cpu(ci); 204 1.1 riastrad splx(s); 205 1.10 riastrad 206 1.10 riastrad p = &ci->ci_heartbeat_suspend; 207 1.10 riastrad atomic_store_relaxed(p, *p - 1); 208 1.1 riastrad } 209 1.1 riastrad 210 1.1 riastrad /* 211 1.7 riastrad * heartbeat_timecounter_suspended() 212 1.7 riastrad * 213 1.7 riastrad * True if timecounter heartbeat checks are suspended because the 214 1.7 riastrad * timecounter may not be advancing, false if heartbeat checks 215 1.7 riastrad * should check for timecounter progress. 216 1.7 riastrad */ 217 1.7 riastrad static bool 218 1.7 riastrad heartbeat_timecounter_suspended(void) 219 1.7 riastrad { 220 1.7 riastrad CPU_INFO_ITERATOR cii; 221 1.7 riastrad struct cpu_info *ci; 222 1.7 riastrad 223 1.7 riastrad /* 224 1.7 riastrad * The timecounter ticks only on the primary CPU. Check 225 1.7 riastrad * whether it's suspended. 226 1.7 riastrad * 227 1.7 riastrad * XXX Would be nice if we could find the primary CPU without 228 1.7 riastrad * iterating over all CPUs. 229 1.7 riastrad */ 230 1.7 riastrad for (CPU_INFO_FOREACH(cii, ci)) { 231 1.10 riastrad if (CPU_IS_PRIMARY(ci)) 232 1.10 riastrad return atomic_load_relaxed(&ci->ci_heartbeat_suspend); 233 1.7 riastrad } 234 1.7 riastrad 235 1.7 riastrad /* 236 1.7 riastrad * This should be unreachable -- there had better be a primary 237 1.7 riastrad * CPU in the system! If not, the timecounter will be busted 238 1.7 riastrad * anyway. 239 1.7 riastrad */ 240 1.7 riastrad panic("no primary CPU"); 241 1.7 riastrad } 242 1.7 riastrad 243 1.7 riastrad /* 244 1.1 riastrad * heartbeat_reset_xc(a, b) 245 1.1 riastrad * 246 1.1 riastrad * Cross-call handler to reset heartbeat state just prior to 247 1.1 riastrad * enabling heartbeat checks. 248 1.1 riastrad */ 249 1.1 riastrad static void 250 1.1 riastrad heartbeat_reset_xc(void *a, void *b) 251 1.1 riastrad { 252 1.6 riastrad int s; 253 1.1 riastrad 254 1.6 riastrad s = splsched(); 255 1.6 riastrad heartbeat_resume_cpu(curcpu()); 256 1.6 riastrad splx(s); 257 1.1 riastrad } 258 1.1 riastrad 259 1.1 riastrad /* 260 1.1 riastrad * set_max_period(max_period) 261 1.1 riastrad * 262 1.1 riastrad * Set the maximum period, in seconds, for heartbeat checks. 263 1.1 riastrad * 264 1.1 riastrad * - If max_period is zero, disable them. 265 1.1 riastrad * 266 1.1 riastrad * - If the max period was zero and max_period is nonzero, ensure 267 1.1 riastrad * all CPUs' heartbeat uptime caches are up-to-date before 268 1.1 riastrad * re-enabling them. 269 1.1 riastrad * 270 1.1 riastrad * max_period must be below UINT_MAX/4/hz to avoid arithmetic 271 1.1 riastrad * overflow and give room for slop. 272 1.1 riastrad * 273 1.1 riastrad * Caller must hold heartbeat_lock. 274 1.1 riastrad */ 275 1.1 riastrad static void 276 1.1 riastrad set_max_period(unsigned max_period) 277 1.1 riastrad { 278 1.1 riastrad 279 1.1 riastrad KASSERTMSG(max_period <= UINT_MAX/4/hz, 280 1.1 riastrad "max_period=%u must not exceed UINT_MAX/4/hz=%u (hz=%u)", 281 1.1 riastrad max_period, UINT_MAX/4/hz, hz); 282 1.1 riastrad KASSERT(mutex_owned(&heartbeat_lock)); 283 1.1 riastrad 284 1.1 riastrad /* 285 1.1 riastrad * If we're enabling heartbeat checks, make sure we have a 286 1.14 riastrad * reasonably up-to-date time_uptime32 cache on all CPUs so we 287 1.1 riastrad * don't think we had an instant heart attack. 288 1.1 riastrad */ 289 1.4 riastrad if (heartbeat_max_period_secs == 0 && max_period != 0) { 290 1.4 riastrad if (cold) { 291 1.4 riastrad CPU_INFO_ITERATOR cii; 292 1.4 riastrad struct cpu_info *ci; 293 1.4 riastrad 294 1.4 riastrad for (CPU_INFO_FOREACH(cii, ci)) 295 1.4 riastrad heartbeat_resume_cpu(ci); 296 1.4 riastrad } else { 297 1.4 riastrad const uint64_t ticket = 298 1.4 riastrad xc_broadcast(0, &heartbeat_reset_xc, NULL, NULL); 299 1.4 riastrad xc_wait(ticket); 300 1.4 riastrad } 301 1.4 riastrad } 302 1.1 riastrad 303 1.1 riastrad /* 304 1.1 riastrad * Once the heartbeat state has been updated on all (online) 305 1.1 riastrad * CPUs, set the period. At this point, heartbeat checks can 306 1.1 riastrad * begin. 307 1.1 riastrad */ 308 1.1 riastrad atomic_store_relaxed(&heartbeat_max_period_secs, max_period); 309 1.1 riastrad atomic_store_relaxed(&heartbeat_max_period_ticks, max_period*hz); 310 1.1 riastrad } 311 1.1 riastrad 312 1.1 riastrad /* 313 1.1 riastrad * heartbeat_max_period_ticks(SYSCTLFN_ARGS) 314 1.1 riastrad * 315 1.1 riastrad * Sysctl handler for sysctl kern.heartbeat.max_period. Verifies 316 1.1 riastrad * it lies within a reasonable interval and sets it. 317 1.1 riastrad */ 318 1.1 riastrad static int 319 1.1 riastrad heartbeat_max_period_sysctl(SYSCTLFN_ARGS) 320 1.1 riastrad { 321 1.1 riastrad struct sysctlnode node; 322 1.1 riastrad unsigned max_period; 323 1.1 riastrad int error; 324 1.1 riastrad 325 1.1 riastrad mutex_enter(&heartbeat_lock); 326 1.1 riastrad 327 1.1 riastrad max_period = heartbeat_max_period_secs; 328 1.1 riastrad node = *rnode; 329 1.1 riastrad node.sysctl_data = &max_period; 330 1.1 riastrad error = sysctl_lookup(SYSCTLFN_CALL(&node)); 331 1.1 riastrad if (error || newp == NULL) 332 1.1 riastrad goto out; 333 1.1 riastrad 334 1.1 riastrad /* 335 1.1 riastrad * Ensure there's plenty of slop between heartbeats. 336 1.1 riastrad */ 337 1.1 riastrad if (max_period > UINT_MAX/4/hz) { 338 1.1 riastrad error = EOVERFLOW; 339 1.1 riastrad goto out; 340 1.1 riastrad } 341 1.1 riastrad 342 1.1 riastrad /* 343 1.1 riastrad * Success! Set the period. This enables heartbeat checks if 344 1.1 riastrad * we went from zero period to nonzero period, or disables them 345 1.1 riastrad * if the other way around. 346 1.1 riastrad */ 347 1.1 riastrad set_max_period(max_period); 348 1.1 riastrad error = 0; 349 1.1 riastrad 350 1.1 riastrad out: mutex_exit(&heartbeat_lock); 351 1.1 riastrad return error; 352 1.1 riastrad } 353 1.1 riastrad 354 1.1 riastrad /* 355 1.1 riastrad * sysctl_heartbeat_setup() 356 1.1 riastrad * 357 1.1 riastrad * Set up the kern.heartbeat.* sysctl subtree. 358 1.1 riastrad */ 359 1.1 riastrad SYSCTL_SETUP(sysctl_heartbeat_setup, "sysctl kern.heartbeat setup") 360 1.1 riastrad { 361 1.1 riastrad const struct sysctlnode *rnode; 362 1.1 riastrad int error; 363 1.1 riastrad 364 1.1 riastrad mutex_init(&heartbeat_lock, MUTEX_DEFAULT, IPL_NONE); 365 1.1 riastrad 366 1.1 riastrad /* kern.heartbeat */ 367 1.1 riastrad error = sysctl_createv(NULL, 0, NULL, &rnode, 368 1.1 riastrad CTLFLAG_PERMANENT, 369 1.1 riastrad CTLTYPE_NODE, "heartbeat", 370 1.1 riastrad SYSCTL_DESCR("Kernel heartbeat parameters"), 371 1.1 riastrad NULL, 0, NULL, 0, 372 1.1 riastrad CTL_KERN, CTL_CREATE, CTL_EOL); 373 1.1 riastrad if (error) { 374 1.1 riastrad printf("%s: failed to create kern.heartbeat: %d\n", 375 1.1 riastrad __func__, error); 376 1.1 riastrad return; 377 1.1 riastrad } 378 1.1 riastrad 379 1.1 riastrad /* kern.heartbeat.max_period */ 380 1.1 riastrad error = sysctl_createv(NULL, 0, &rnode, NULL, 381 1.1 riastrad CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 382 1.1 riastrad CTLTYPE_INT, "max_period", 383 1.1 riastrad SYSCTL_DESCR("Max seconds between heartbeats before panic"), 384 1.1 riastrad &heartbeat_max_period_sysctl, 0, NULL, 0, 385 1.1 riastrad CTL_CREATE, CTL_EOL); 386 1.1 riastrad if (error) { 387 1.1 riastrad printf("%s: failed to create kern.heartbeat.max_period: %d\n", 388 1.1 riastrad __func__, error); 389 1.1 riastrad return; 390 1.1 riastrad } 391 1.1 riastrad } 392 1.1 riastrad 393 1.1 riastrad /* 394 1.1 riastrad * heartbeat_intr(cookie) 395 1.1 riastrad * 396 1.1 riastrad * Soft interrupt handler to update the local CPU's view of the 397 1.1 riastrad * system uptime. This runs at the same priority level as 398 1.1 riastrad * callouts, so if callouts are stuck on this CPU, it won't run, 399 1.1 riastrad * and eventually another CPU will notice that this one is stuck. 400 1.1 riastrad * 401 1.1 riastrad * Don't do spl* here -- keep it to a minimum so if anything goes 402 1.1 riastrad * wrong we don't end up with hard interrupts blocked and unable 403 1.1 riastrad * to detect a missed heartbeat. 404 1.1 riastrad */ 405 1.1 riastrad static void 406 1.1 riastrad heartbeat_intr(void *cookie) 407 1.1 riastrad { 408 1.1 riastrad unsigned count = atomic_load_relaxed(&curcpu()->ci_heartbeat_count); 409 1.14 riastrad unsigned uptime = time_uptime32; 410 1.1 riastrad 411 1.1 riastrad atomic_store_relaxed(&curcpu()->ci_heartbeat_uptime_stamp, count); 412 1.1 riastrad atomic_store_relaxed(&curcpu()->ci_heartbeat_uptime_cache, uptime); 413 1.1 riastrad } 414 1.1 riastrad 415 1.1 riastrad /* 416 1.1 riastrad * heartbeat_start() 417 1.1 riastrad * 418 1.1 riastrad * Start system heartbeat monitoring. 419 1.1 riastrad */ 420 1.1 riastrad void 421 1.1 riastrad heartbeat_start(void) 422 1.1 riastrad { 423 1.14 riastrad enum { max_period = HEARTBEAT_MAX_PERIOD_DEFAULT }; 424 1.14 riastrad 425 1.14 riastrad /* 426 1.14 riastrad * Ensure the maximum period is small enough that we never have 427 1.14 riastrad * to worry about 32-bit wraparound even if there's a lot of 428 1.14 riastrad * slop. (In fact this is required to be less than 429 1.14 riastrad * UINT_MAX/4/hz, but that's not a compile-time constant.) 430 1.14 riastrad */ 431 1.14 riastrad __CTASSERT(max_period < UINT_MAX/4); 432 1.1 riastrad 433 1.1 riastrad /* 434 1.1 riastrad * Establish a softint so we can schedule it once ready. This 435 1.1 riastrad * should be at the lowest softint priority level so that we 436 1.1 riastrad * ensure all softint priorities are making progress. 437 1.1 riastrad */ 438 1.1 riastrad heartbeat_sih = softint_establish(SOFTINT_CLOCK|SOFTINT_MPSAFE, 439 1.1 riastrad &heartbeat_intr, NULL); 440 1.1 riastrad 441 1.1 riastrad /* 442 1.1 riastrad * Now that the softint is established, kick off heartbeat 443 1.1 riastrad * monitoring with the default period. This will initialize 444 1.14 riastrad * the per-CPU state to an up-to-date cache of time_uptime32. 445 1.1 riastrad */ 446 1.1 riastrad mutex_enter(&heartbeat_lock); 447 1.1 riastrad set_max_period(max_period); 448 1.1 riastrad mutex_exit(&heartbeat_lock); 449 1.1 riastrad } 450 1.1 riastrad 451 1.1 riastrad /* 452 1.1 riastrad * defibrillator(cookie) 453 1.1 riastrad * 454 1.1 riastrad * IPI handler for defibrillation. If the CPU's heart has stopped 455 1.1 riastrad * beating normally, but the CPU can still execute things, 456 1.1 riastrad * acknowledge the IPI to the doctor and then panic so we at least 457 1.1 riastrad * get a stack trace from whatever the current CPU is stuck doing, 458 1.1 riastrad * if not a core dump. 459 1.1 riastrad * 460 1.1 riastrad * (This metaphor is a little stretched, since defibrillation is 461 1.1 riastrad * usually administered when the heart is beating errattically but 462 1.1 riastrad * hasn't stopped, and causes the heart to stop temporarily, and 463 1.1 riastrad * one hopes it is not fatal. But we're (software) engineers, so 464 1.1 riastrad * we can stretch metaphors like silly putty in a blender.) 465 1.1 riastrad */ 466 1.1 riastrad static void 467 1.1 riastrad defibrillator(void *cookie) 468 1.1 riastrad { 469 1.1 riastrad bool *ack = cookie; 470 1.1 riastrad 471 1.9 riastrad /* 472 1.9 riastrad * Acknowledge the interrupt so the doctor CPU won't trigger a 473 1.9 riastrad * new panic for defibrillation timeout. 474 1.9 riastrad */ 475 1.1 riastrad atomic_store_relaxed(ack, true); 476 1.9 riastrad 477 1.9 riastrad /* 478 1.9 riastrad * If a panic is already in progress, we may have interrupted 479 1.9 riastrad * the logic that prints a stack trace on this CPU -- so let's 480 1.9 riastrad * not make it worse by giving the misapprehension of a 481 1.9 riastrad * recursive panic. 482 1.9 riastrad */ 483 1.9 riastrad if (atomic_load_relaxed(&panicstr) != NULL) 484 1.9 riastrad return; 485 1.9 riastrad 486 1.1 riastrad panic("%s[%d %s]: heart stopped beating", cpu_name(curcpu()), 487 1.1 riastrad curlwp->l_lid, 488 1.1 riastrad curlwp->l_name ? curlwp->l_name : curproc->p_comm); 489 1.1 riastrad } 490 1.1 riastrad 491 1.1 riastrad /* 492 1.1 riastrad * defibrillate(ci, unsigned d) 493 1.1 riastrad * 494 1.1 riastrad * The patient CPU ci's heart has stopped beating after d seconds. 495 1.1 riastrad * Force the patient CPU ci to panic, or panic on this CPU if the 496 1.1 riastrad * patient CPU doesn't respond within 1sec. 497 1.1 riastrad */ 498 1.1 riastrad static void __noinline 499 1.1 riastrad defibrillate(struct cpu_info *ci, unsigned d) 500 1.1 riastrad { 501 1.1 riastrad bool ack = false; 502 1.1 riastrad ipi_msg_t msg = { 503 1.1 riastrad .func = &defibrillator, 504 1.1 riastrad .arg = &ack, 505 1.1 riastrad }; 506 1.1 riastrad unsigned countdown = 1000; /* 1sec */ 507 1.1 riastrad 508 1.2 riastrad KASSERT(curcpu_stable()); 509 1.1 riastrad 510 1.1 riastrad /* 511 1.1 riastrad * First notify the console that the patient CPU's heart seems 512 1.1 riastrad * to have stopped beating. 513 1.1 riastrad */ 514 1.1 riastrad printf("%s: found %s heart stopped beating after %u seconds\n", 515 1.1 riastrad cpu_name(curcpu()), cpu_name(ci), d); 516 1.1 riastrad 517 1.1 riastrad /* 518 1.1 riastrad * Next, give the patient CPU a chance to panic, so we get a 519 1.1 riastrad * stack trace on that CPU even if we don't get a crash dump. 520 1.1 riastrad */ 521 1.1 riastrad ipi_unicast(&msg, ci); 522 1.1 riastrad 523 1.1 riastrad /* 524 1.1 riastrad * Busy-wait up to 1sec for the patient CPU to print a stack 525 1.1 riastrad * trace and panic. If the patient CPU acknowledges the IPI, 526 1.9 riastrad * just give up and stop here -- the system is coming down soon 527 1.9 riastrad * and we should avoid getting in the way. 528 1.1 riastrad */ 529 1.1 riastrad while (countdown --> 0) { 530 1.9 riastrad if (atomic_load_relaxed(&ack)) 531 1.1 riastrad return; 532 1.1 riastrad DELAY(1000); /* 1ms */ 533 1.1 riastrad } 534 1.1 riastrad 535 1.1 riastrad /* 536 1.1 riastrad * The patient CPU failed to acknowledge the panic request. 537 1.1 riastrad * Panic now; with any luck, we'll get a crash dump. 538 1.1 riastrad */ 539 1.1 riastrad panic("%s: found %s heart stopped beating and unresponsive", 540 1.1 riastrad cpu_name(curcpu()), cpu_name(ci)); 541 1.1 riastrad } 542 1.1 riastrad 543 1.1 riastrad /* 544 1.1 riastrad * select_patient() 545 1.1 riastrad * 546 1.1 riastrad * Select another CPU to check the heartbeat of. Returns NULL if 547 1.1 riastrad * there are no other online CPUs. Never returns curcpu(). 548 1.1 riastrad * Caller must have kpreemption disabled. 549 1.1 riastrad */ 550 1.1 riastrad static struct cpu_info * 551 1.1 riastrad select_patient(void) 552 1.1 riastrad { 553 1.1 riastrad CPU_INFO_ITERATOR cii; 554 1.1 riastrad struct cpu_info *first = NULL, *patient = NULL, *ci; 555 1.1 riastrad bool passedcur = false; 556 1.1 riastrad 557 1.2 riastrad KASSERT(curcpu_stable()); 558 1.1 riastrad 559 1.1 riastrad /* 560 1.1 riastrad * In the iteration order of all CPUs, find the next online CPU 561 1.1 riastrad * after curcpu(), or the first online one if curcpu() is last 562 1.1 riastrad * in the iteration order. 563 1.1 riastrad */ 564 1.1 riastrad for (CPU_INFO_FOREACH(cii, ci)) { 565 1.10 riastrad if (atomic_load_relaxed(&ci->ci_heartbeat_suspend)) 566 1.1 riastrad continue; 567 1.1 riastrad if (passedcur) { 568 1.1 riastrad /* 569 1.1 riastrad * (...|curcpu()|ci|...) 570 1.1 riastrad * 571 1.1 riastrad * Found the patient right after curcpu(). 572 1.1 riastrad */ 573 1.1 riastrad KASSERT(patient != ci); 574 1.1 riastrad patient = ci; 575 1.1 riastrad break; 576 1.1 riastrad } 577 1.1 riastrad if (ci == curcpu()) { 578 1.1 riastrad /* 579 1.1 riastrad * (...|prev|ci=curcpu()|next|...) 580 1.1 riastrad * 581 1.1 riastrad * Note that we want next (or first, if there's 582 1.1 riastrad * nothing after curcpu()). 583 1.1 riastrad */ 584 1.1 riastrad passedcur = true; 585 1.1 riastrad continue; 586 1.1 riastrad } 587 1.1 riastrad if (first == NULL) { 588 1.1 riastrad /* 589 1.1 riastrad * (ci|...|curcpu()|...) 590 1.1 riastrad * 591 1.1 riastrad * Record ci as first in case there's nothing 592 1.1 riastrad * after curcpu(). 593 1.1 riastrad */ 594 1.1 riastrad first = ci; 595 1.1 riastrad continue; 596 1.1 riastrad } 597 1.1 riastrad } 598 1.1 riastrad 599 1.1 riastrad /* 600 1.1 riastrad * If we hit the end, wrap around to the beginning. 601 1.1 riastrad */ 602 1.1 riastrad if (patient == NULL) { 603 1.1 riastrad KASSERT(passedcur); 604 1.1 riastrad patient = first; 605 1.1 riastrad } 606 1.1 riastrad 607 1.1 riastrad return patient; 608 1.1 riastrad } 609 1.1 riastrad 610 1.1 riastrad /* 611 1.1 riastrad * heartbeat() 612 1.1 riastrad * 613 1.1 riastrad * 1. Count a heartbeat on the local CPU. 614 1.1 riastrad * 615 1.1 riastrad * 2. Panic if the system uptime doesn't seem to have advanced in 616 1.1 riastrad * a while. 617 1.1 riastrad * 618 1.1 riastrad * 3. Panic if the soft interrupt on this CPU hasn't advanced the 619 1.1 riastrad * local view of the system uptime. 620 1.1 riastrad * 621 1.1 riastrad * 4. Schedule the soft interrupt to advance the local view of the 622 1.1 riastrad * system uptime. 623 1.1 riastrad * 624 1.1 riastrad * 5. Select another CPU to check the heartbeat of. 625 1.1 riastrad * 626 1.1 riastrad * 6. Panic if the other CPU hasn't advanced its view of the 627 1.1 riastrad * system uptime in a while. 628 1.1 riastrad */ 629 1.1 riastrad void 630 1.1 riastrad heartbeat(void) 631 1.1 riastrad { 632 1.1 riastrad unsigned period_ticks, period_secs; 633 1.1 riastrad unsigned count, uptime, cache, stamp, d; 634 1.1 riastrad struct cpu_info *patient; 635 1.1 riastrad 636 1.2 riastrad KASSERT(curcpu_stable()); 637 1.1 riastrad 638 1.13 riastrad /* 639 1.13 riastrad * If heartbeat checks are disabled globally, or if they are 640 1.13 riastrad * suspended locally, or if we're already panicking so it's not 641 1.13 riastrad * helpful to trigger more panics for more reasons, do nothing. 642 1.13 riastrad */ 643 1.1 riastrad period_ticks = atomic_load_relaxed(&heartbeat_max_period_ticks); 644 1.1 riastrad period_secs = atomic_load_relaxed(&heartbeat_max_period_secs); 645 1.1 riastrad if (__predict_false(period_ticks == 0) || 646 1.1 riastrad __predict_false(period_secs == 0) || 647 1.13 riastrad __predict_false(curcpu()->ci_heartbeat_suspend) || 648 1.13 riastrad __predict_false(panicstr != NULL)) 649 1.1 riastrad return; 650 1.1 riastrad 651 1.1 riastrad /* 652 1.1 riastrad * Count a heartbeat on this CPU. 653 1.1 riastrad */ 654 1.1 riastrad count = curcpu()->ci_heartbeat_count++; 655 1.1 riastrad 656 1.1 riastrad /* 657 1.1 riastrad * If the uptime hasn't changed, make sure that we haven't 658 1.1 riastrad * counted too many of our own heartbeats since the uptime last 659 1.1 riastrad * changed, and stop here -- we only do the cross-CPU work once 660 1.1 riastrad * per second. 661 1.1 riastrad */ 662 1.14 riastrad uptime = time_uptime32; 663 1.1 riastrad cache = atomic_load_relaxed(&curcpu()->ci_heartbeat_uptime_cache); 664 1.1 riastrad if (__predict_true(cache == uptime)) { 665 1.1 riastrad /* 666 1.1 riastrad * Timecounter hasn't advanced by more than a second. 667 1.1 riastrad * Make sure the timecounter isn't stuck according to 668 1.7 riastrad * our heartbeats -- unless timecounter heartbeats are 669 1.7 riastrad * suspended too. 670 1.1 riastrad * 671 1.1 riastrad * Our own heartbeat count can't roll back, and 672 1.14 riastrad * time_uptime32 should be updated before it wraps 673 1.1 riastrad * around, so d should never go negative; hence no 674 1.1 riastrad * check for d < UINT_MAX/2. 675 1.1 riastrad */ 676 1.1 riastrad stamp = 677 1.1 riastrad atomic_load_relaxed(&curcpu()->ci_heartbeat_uptime_stamp); 678 1.1 riastrad d = count - stamp; 679 1.7 riastrad if (__predict_false(d > period_ticks) && 680 1.7 riastrad !heartbeat_timecounter_suspended()) { 681 1.1 riastrad panic("%s: time has not advanced in %u heartbeats", 682 1.1 riastrad cpu_name(curcpu()), d); 683 1.1 riastrad } 684 1.1 riastrad return; 685 1.1 riastrad } 686 1.1 riastrad 687 1.1 riastrad /* 688 1.1 riastrad * If the uptime has changed, make sure that it hasn't changed 689 1.1 riastrad * so much that softints must be stuck on this CPU. Since 690 1.14 riastrad * time_uptime32 is monotonic and our cache of it is updated at 691 1.14 riastrad * most every UINT_MAX/4/hz sec (hence no concern about 692 1.14 riastrad * wraparound even after 68 or 136 years), this can't go 693 1.14 riastrad * negative, hence no check for d < UINT_MAX/2. 694 1.1 riastrad * 695 1.1 riastrad * This uses the hard timer interrupt handler on the current 696 1.1 riastrad * CPU to ensure soft interrupts at all priority levels have 697 1.1 riastrad * made progress. 698 1.1 riastrad */ 699 1.1 riastrad d = uptime - cache; 700 1.1 riastrad if (__predict_false(d > period_secs)) { 701 1.1 riastrad panic("%s: softints stuck for %u seconds", 702 1.1 riastrad cpu_name(curcpu()), d); 703 1.1 riastrad } 704 1.1 riastrad 705 1.1 riastrad /* 706 1.1 riastrad * Schedule a softint to update our cache of the system uptime 707 1.1 riastrad * so the next call to heartbeat, on this or another CPU, can 708 1.1 riastrad * detect progress on this one. 709 1.1 riastrad */ 710 1.1 riastrad softint_schedule(heartbeat_sih); 711 1.1 riastrad 712 1.1 riastrad /* 713 1.1 riastrad * Select a patient to check the heartbeat of. If there's no 714 1.1 riastrad * other online CPU, nothing to do. 715 1.1 riastrad */ 716 1.1 riastrad patient = select_patient(); 717 1.1 riastrad if (patient == NULL) 718 1.1 riastrad return; 719 1.1 riastrad 720 1.1 riastrad /* 721 1.1 riastrad * Verify that time is advancing on the patient CPU. If the 722 1.1 riastrad * delta exceeds UINT_MAX/2, that means it is already ahead by 723 1.1 riastrad * a little on the other CPU, and the subtraction went 724 1.6 riastrad * negative, which is OK. If the CPU's heartbeats have been 725 1.6 riastrad * suspended since we selected it, no worries. 726 1.1 riastrad * 727 1.1 riastrad * This uses the current CPU to ensure the other CPU has made 728 1.1 riastrad * progress, even if the other CPU's hard timer interrupt 729 1.1 riastrad * handler is stuck for some reason. 730 1.1 riastrad * 731 1.1 riastrad * XXX Maybe confirm it hasn't gone negative by more than 732 1.1 riastrad * max_period? 733 1.1 riastrad */ 734 1.1 riastrad d = uptime - atomic_load_relaxed(&patient->ci_heartbeat_uptime_cache); 735 1.1 riastrad if (__predict_false(d > period_secs) && 736 1.1 riastrad __predict_false(d < UINT_MAX/2) && 737 1.10 riastrad atomic_load_relaxed(&patient->ci_heartbeat_suspend) == 0) 738 1.1 riastrad defibrillate(patient, d); 739 1.1 riastrad } 740 1.1 riastrad 741 1.1 riastrad /* 742 1.1 riastrad * heartbeat_dump() 743 1.1 riastrad * 744 1.1 riastrad * Print the heartbeat data of all CPUs. Can be called from ddb. 745 1.1 riastrad */ 746 1.1 riastrad #ifdef DDB 747 1.1 riastrad static unsigned 748 1.6 riastrad db_read_unsigned(const volatile unsigned *p) 749 1.1 riastrad { 750 1.1 riastrad unsigned x; 751 1.1 riastrad 752 1.6 riastrad db_read_bytes((db_addr_t)(uintptr_t)p, sizeof(x), (char *)&x); 753 1.6 riastrad 754 1.6 riastrad return x; 755 1.6 riastrad } 756 1.6 riastrad 757 1.1 riastrad void 758 1.1 riastrad heartbeat_dump(void) 759 1.1 riastrad { 760 1.1 riastrad struct cpu_info *ci; 761 1.1 riastrad 762 1.1 riastrad db_printf("Heartbeats:\n"); 763 1.1 riastrad for (ci = db_cpu_first(); ci != NULL; ci = db_cpu_next(ci)) { 764 1.10 riastrad db_printf("cpu%u: count %u uptime %u stamp %u suspend %u\n", 765 1.1 riastrad db_read_unsigned(&ci->ci_index), 766 1.1 riastrad db_read_unsigned(&ci->ci_heartbeat_count), 767 1.1 riastrad db_read_unsigned(&ci->ci_heartbeat_uptime_cache), 768 1.6 riastrad db_read_unsigned(&ci->ci_heartbeat_uptime_stamp), 769 1.10 riastrad db_read_unsigned(&ci->ci_heartbeat_suspend)); 770 1.1 riastrad } 771 1.1 riastrad } 772 1.1 riastrad #endif 773