1 /* $NetBSD: kern_cpu.c,v 1.100 2026/01/03 23:58:52 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2007, 2008, 2009, 2010, 2012, 2019 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /*- 33 * Copyright (c)2007 YAMAMOTO Takashi, 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 45 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 46 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 47 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 48 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 49 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 50 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 51 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 52 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 53 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 54 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 55 * SUCH DAMAGE. 56 */ 57 58 /* 59 * CPU related routines not shared with rump. 60 */ 61 62 #include <sys/cdefs.h> 63 __KERNEL_RCSID(0, "$NetBSD: kern_cpu.c,v 1.100 2026/01/03 23:58:52 riastradh Exp $"); 64 65 #ifdef _KERNEL_OPT 66 #include "opt_cpu_ucode.h" 67 #include "opt_heartbeat.h" 68 #endif 69 70 #include <sys/param.h> 71 #include <sys/types.h> 72 73 #include <sys/callout.h> 74 #include <sys/conf.h> 75 #include <sys/cpu.h> 76 #include <sys/cpuio.h> 77 #include <sys/heartbeat.h> 78 #include <sys/idle.h> 79 #include <sys/intr.h> 80 #include <sys/kauth.h> 81 #include <sys/kernel.h> 82 #include <sys/kmem.h> 83 #include <sys/namei.h> 84 #include <sys/pcu.h> 85 #include <sys/percpu.h> 86 #include <sys/pool.h> 87 #include <sys/proc.h> 88 #include <sys/sched.h> 89 #include <sys/sdt.h> 90 #include <sys/select.h> 91 #include <sys/systm.h> 92 #include <sys/xcall.h> 93 94 #include <uvm/uvm_extern.h> 95 96 #include "ioconf.h" 97 98 /* 99 * If the port has stated that cpu_data is the first thing in cpu_info, 100 * verify that the claim is true. This will prevent them from getting out 101 * of sync. 102 */ 103 #ifdef __HAVE_CPU_DATA_FIRST 104 CTASSERT(offsetof(struct cpu_info, ci_data) == 0); 105 #else 106 CTASSERT(offsetof(struct cpu_info, ci_data) != 0); 107 #endif 108 109 int (*compat_cpuctl_ioctl)(struct lwp *, u_long, void *) = (void *)enosys; 110 111 static void cpu_xc_online(struct cpu_info *, void *); 112 static void cpu_xc_offline(struct cpu_info *, void *); 113 114 dev_type_ioctl(cpuctl_ioctl); 115 116 const struct cdevsw cpuctl_cdevsw = { 117 .d_open = nullopen, 118 .d_close = nullclose, 119 .d_read = nullread, 120 .d_write = nullwrite, 121 .d_ioctl = cpuctl_ioctl, 122 .d_stop = nullstop, 123 .d_tty = notty, 124 .d_poll = nopoll, 125 .d_mmap = nommap, 126 .d_kqfilter = nokqfilter, 127 .d_discard = nodiscard, 128 .d_flag = D_OTHER | D_MPSAFE 129 }; 130 131 int 132 mi_cpu_attach(struct cpu_info *ci) 133 { 134 int error; 135 136 KASSERT(maxcpus > 0); 137 138 if ((ci->ci_index = ncpu) >= maxcpus) 139 panic("Too many CPUs. Increase MAXCPUS?"); 140 kcpuset_set(kcpuset_attached, cpu_index(ci)); 141 142 /* 143 * Create a convenience cpuset of just ourselves. 144 */ 145 kcpuset_create(&ci->ci_kcpuset, true); 146 kcpuset_set(ci->ci_kcpuset, cpu_index(ci)); 147 148 TAILQ_INIT(&ci->ci_data.cpu_ld_locks); 149 __cpu_simple_lock_init(&ci->ci_data.cpu_ld_lock); 150 151 /* This is useful for eg, per-cpu evcnt */ 152 snprintf(ci->ci_data.cpu_name, sizeof(ci->ci_data.cpu_name), "cpu%d", 153 cpu_index(ci)); 154 155 if (__predict_false(cpu_infos == NULL)) { 156 size_t ci_bufsize = (maxcpus + 1) * sizeof(struct cpu_info *); 157 cpu_infos = kmem_zalloc(ci_bufsize, KM_SLEEP); 158 } 159 cpu_infos[cpu_index(ci)] = ci; 160 161 sched_cpuattach(ci); 162 163 error = create_idle_lwp(ci); 164 if (error != 0) { 165 /* XXX revert sched_cpuattach */ 166 return error; 167 } 168 169 if (ci == curcpu()) 170 ci->ci_onproc = curlwp; 171 else 172 ci->ci_onproc = ci->ci_data.cpu_idlelwp; 173 174 percpu_init_cpu(ci); 175 softint_init(ci); 176 callout_init_cpu(ci); 177 xc_init_cpu(ci); 178 pool_cache_cpu_init(ci); 179 selsysinit(ci); 180 cache_cpu_init(ci); 181 TAILQ_INIT(&ci->ci_data.cpu_biodone); 182 ncpu++; 183 ncpuonline++; 184 185 return 0; 186 } 187 188 void 189 cpuctlattach(int dummy __unused) 190 { 191 192 KASSERT(cpu_infos != NULL); 193 } 194 195 int 196 cpuctl_ioctl(dev_t dev, u_long cmd, void *data, int flag, lwp_t *l) 197 { 198 CPU_INFO_ITERATOR cii; 199 cpustate_t *cs; 200 struct cpu_info *ci; 201 int error, i; 202 u_int id; 203 204 error = 0; 205 206 mutex_enter(&cpu_lock); 207 switch (cmd) { 208 case IOC_CPU_SETSTATE: 209 cs = data; 210 error = kauth_authorize_system(l->l_cred, 211 KAUTH_SYSTEM_CPU, KAUTH_REQ_SYSTEM_CPU_SETSTATE, cs, NULL, 212 NULL); 213 if (error != 0) 214 break; 215 if (cs->cs_id >= maxcpus || 216 (ci = cpu_lookup(cs->cs_id)) == NULL) { 217 error = SET_ERROR(ESRCH); 218 break; 219 } 220 cpu_setintr(ci, cs->cs_intr); /* XXX neglect errors */ 221 error = cpu_setstate(ci, cs->cs_online); 222 break; 223 224 case IOC_CPU_GETSTATE: 225 cs = data; 226 id = cs->cs_id; 227 memset(cs, 0, sizeof(*cs)); 228 cs->cs_id = id; 229 if (cs->cs_id >= maxcpus || 230 (ci = cpu_lookup(id)) == NULL) { 231 error = SET_ERROR(ESRCH); 232 break; 233 } 234 if ((ci->ci_schedstate.spc_flags & SPCF_OFFLINE) != 0) 235 cs->cs_online = false; 236 else 237 cs->cs_online = true; 238 if ((ci->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) 239 cs->cs_intr = false; 240 else 241 cs->cs_intr = true; 242 cs->cs_lastmod = (int32_t)ci->ci_schedstate.spc_lastmod; 243 cs->cs_lastmodhi = (int32_t) 244 (ci->ci_schedstate.spc_lastmod >> 32); 245 cs->cs_intrcnt = cpu_intr_count(ci) + 1; 246 cs->cs_hwid = ci->ci_cpuid; 247 break; 248 249 case IOC_CPU_MAPID: 250 i = 0; 251 for (CPU_INFO_FOREACH(cii, ci)) { 252 if (i++ == *(int *)data) 253 break; 254 } 255 if (ci == NULL) 256 error = SET_ERROR(ESRCH); 257 else 258 *(int *)data = cpu_index(ci); 259 break; 260 261 case IOC_CPU_GETCOUNT: 262 *(int *)data = ncpu; 263 break; 264 265 #ifdef CPU_UCODE 266 case IOC_CPU_UCODE_GET_VERSION: 267 error = cpu_ucode_get_version((struct cpu_ucode_version *)data); 268 break; 269 270 case IOC_CPU_UCODE_APPLY: 271 error = kauth_authorize_machdep(l->l_cred, 272 KAUTH_MACHDEP_CPU_UCODE_APPLY, 273 NULL, NULL, NULL, NULL); 274 if (error != 0) 275 break; 276 error = cpu_ucode_apply((const struct cpu_ucode *)data); 277 break; 278 #endif 279 280 default: 281 error = (*compat_cpuctl_ioctl)(l, cmd, data); 282 break; 283 } 284 mutex_exit(&cpu_lock); 285 286 return error; 287 } 288 289 struct cpu_info * 290 cpu_lookup(u_int idx) 291 { 292 struct cpu_info *ci; 293 294 /* 295 * cpu_infos is a NULL terminated array of MAXCPUS + 1 entries, 296 * so an index of MAXCPUS here is ok. See mi_cpu_attach. 297 */ 298 KASSERT(idx <= maxcpus); 299 300 if (__predict_false(cpu_infos == NULL)) { 301 KASSERT(idx == 0); 302 return curcpu(); 303 } 304 305 ci = cpu_infos[idx]; 306 KASSERT(ci == NULL || cpu_index(ci) == idx); 307 KASSERTMSG(idx < maxcpus || ci == NULL, "idx %d ci %p", idx, ci); 308 309 return ci; 310 } 311 312 static void 313 cpu_xc_offline(struct cpu_info *ci, void *unused) 314 { 315 struct schedstate_percpu *spc, *mspc = NULL; 316 struct cpu_info *target_ci; 317 struct lwp *l; 318 CPU_INFO_ITERATOR cii; 319 int s; 320 321 /* 322 * Thread that made the cross call (separate context) holds 323 * cpu_lock on our behalf. 324 */ 325 spc = &ci->ci_schedstate; 326 s = splsched(); 327 spc->spc_flags |= SPCF_OFFLINE; 328 splx(s); 329 330 /* Take the first available CPU for the migration. */ 331 for (CPU_INFO_FOREACH(cii, target_ci)) { 332 mspc = &target_ci->ci_schedstate; 333 if ((mspc->spc_flags & SPCF_OFFLINE) == 0) 334 break; 335 } 336 KASSERT(target_ci != NULL); 337 338 /* 339 * Migrate all non-bound threads to the other CPU. Note that this 340 * runs from the xcall thread, thus handling of LSONPROC is not needed. 341 */ 342 mutex_enter(&proc_lock); 343 LIST_FOREACH(l, &alllwp, l_list) { 344 struct cpu_info *mci; 345 346 lwp_lock(l); 347 if (l->l_cpu != ci || (l->l_pflag & (LP_BOUND | LP_INTR))) { 348 lwp_unlock(l); 349 continue; 350 } 351 /* Regular case - no affinity. */ 352 if (l->l_affinity == NULL) { 353 lwp_migrate(l, target_ci); 354 continue; 355 } 356 /* Affinity is set, find an online CPU in the set. */ 357 for (CPU_INFO_FOREACH(cii, mci)) { 358 mspc = &mci->ci_schedstate; 359 if ((mspc->spc_flags & SPCF_OFFLINE) == 0 && 360 kcpuset_isset(l->l_affinity, cpu_index(mci))) 361 break; 362 } 363 if (mci == NULL) { 364 lwp_unlock(l); 365 mutex_exit(&proc_lock); 366 goto fail; 367 } 368 lwp_migrate(l, mci); 369 } 370 mutex_exit(&proc_lock); 371 372 #if PCU_UNIT_COUNT > 0 373 pcu_save_all_on_cpu(); 374 #endif 375 376 heartbeat_suspend(); 377 378 #ifdef __HAVE_MD_CPU_OFFLINE 379 cpu_offline_md(); 380 #endif 381 return; 382 fail: 383 /* Just unset the SPCF_OFFLINE flag, caller will check */ 384 s = splsched(); 385 spc->spc_flags &= ~SPCF_OFFLINE; 386 splx(s); 387 } 388 389 static void 390 cpu_xc_online(struct cpu_info *ci, void *unused) 391 { 392 struct schedstate_percpu *spc; 393 int s; 394 395 heartbeat_resume(); 396 397 spc = &ci->ci_schedstate; 398 s = splsched(); 399 spc->spc_flags &= ~SPCF_OFFLINE; 400 splx(s); 401 } 402 403 int 404 cpu_setstate(struct cpu_info *ci, bool online) 405 { 406 struct schedstate_percpu *spc; 407 CPU_INFO_ITERATOR cii; 408 struct cpu_info *ci2; 409 uint64_t where; 410 xcfunc_t func; 411 int nonline; 412 413 spc = &ci->ci_schedstate; 414 415 KASSERT(mutex_owned(&cpu_lock)); 416 417 if (online) { 418 if ((spc->spc_flags & SPCF_OFFLINE) == 0) 419 return 0; 420 func = (xcfunc_t)cpu_xc_online; 421 } else { 422 if ((spc->spc_flags & SPCF_OFFLINE) != 0) 423 return 0; 424 nonline = 0; 425 /* 426 * Ensure that at least one CPU within the processor set 427 * stays online. Revisit this later. 428 */ 429 for (CPU_INFO_FOREACH(cii, ci2)) { 430 if ((ci2->ci_schedstate.spc_flags & SPCF_OFFLINE) != 0) 431 continue; 432 if (ci2->ci_schedstate.spc_psid != spc->spc_psid) 433 continue; 434 nonline++; 435 } 436 if (nonline == 1) 437 return SET_ERROR(EBUSY); 438 func = (xcfunc_t)cpu_xc_offline; 439 } 440 441 where = xc_unicast(0, func, ci, NULL, ci); 442 xc_wait(where); 443 if (online) { 444 KASSERT((spc->spc_flags & SPCF_OFFLINE) == 0); 445 ncpuonline++; 446 } else { 447 if ((spc->spc_flags & SPCF_OFFLINE) == 0) { 448 /* If was not set offline, then it is busy */ 449 return SET_ERROR(EBUSY); 450 } 451 ncpuonline--; 452 } 453 454 spc->spc_lastmod = time_second; 455 return 0; 456 } 457 458 bool 459 cpu_is_type(struct cpu_info *ci, int wanted) 460 { 461 462 return (ci->ci_schedstate.spc_flags & wanted) == wanted; 463 } 464 465 bool 466 cpu_is_idle_1stclass(struct cpu_info *ci) 467 { 468 const int wanted = SPCF_IDLE | SPCF_1STCLASS; 469 470 return cpu_is_type(ci, wanted); 471 } 472 473 bool 474 cpu_is_1stclass(struct cpu_info *ci) 475 { 476 const int wanted = SPCF_1STCLASS; 477 478 return cpu_is_type(ci, wanted); 479 } 480 481 bool 482 cpu_is_better(struct cpu_info *ci1, struct cpu_info *ci2) 483 { 484 const int ci1_flags = ci1->ci_schedstate.spc_flags; 485 const int ci2_flags = ci2->ci_schedstate.spc_flags; 486 487 if ((ci1_flags & SPCF_1STCLASS) != 0 && 488 (ci2_flags & SPCF_1STCLASS) == 0) 489 return ci1; 490 491 return ci2; 492 } 493 494 #if defined(__HAVE_INTR_CONTROL) 495 static void 496 cpu_xc_intr(struct cpu_info *ci, void *unused) 497 { 498 struct schedstate_percpu *spc; 499 int s; 500 501 spc = &ci->ci_schedstate; 502 s = splsched(); 503 spc->spc_flags &= ~SPCF_NOINTR; 504 splx(s); 505 } 506 507 static void 508 cpu_xc_nointr(struct cpu_info *ci, void *unused) 509 { 510 struct schedstate_percpu *spc; 511 int s; 512 513 spc = &ci->ci_schedstate; 514 s = splsched(); 515 spc->spc_flags |= SPCF_NOINTR; 516 splx(s); 517 } 518 519 int 520 cpu_setintr(struct cpu_info *ci, bool intr) 521 { 522 struct schedstate_percpu *spc; 523 CPU_INFO_ITERATOR cii; 524 struct cpu_info *ci2; 525 uint64_t where; 526 xcfunc_t func; 527 int nintr; 528 529 spc = &ci->ci_schedstate; 530 531 KASSERT(mutex_owned(&cpu_lock)); 532 533 if (intr) { 534 if ((spc->spc_flags & SPCF_NOINTR) == 0) 535 return 0; 536 func = (xcfunc_t)cpu_xc_intr; 537 } else { 538 if (CPU_IS_PRIMARY(ci)) /* XXX kern/45117 */ 539 return SET_ERROR(EINVAL); 540 if ((spc->spc_flags & SPCF_NOINTR) != 0) 541 return 0; 542 /* 543 * Ensure that at least one CPU within the system 544 * is handing device interrupts. 545 */ 546 nintr = 0; 547 for (CPU_INFO_FOREACH(cii, ci2)) { 548 if ((ci2->ci_schedstate.spc_flags & SPCF_NOINTR) != 0) 549 continue; 550 if (ci2 == ci) 551 continue; 552 nintr++; 553 } 554 if (nintr == 0) 555 return SET_ERROR(EBUSY); 556 func = (xcfunc_t)cpu_xc_nointr; 557 } 558 559 where = xc_unicast(0, func, ci, NULL, ci); 560 xc_wait(where); 561 if (intr) { 562 KASSERT((spc->spc_flags & SPCF_NOINTR) == 0); 563 } else if ((spc->spc_flags & SPCF_NOINTR) == 0) { 564 /* If was not set offline, then it is busy */ 565 return SET_ERROR(EBUSY); 566 } 567 568 /* Direct interrupts away from the CPU and record the change. */ 569 cpu_intr_redistribute(); 570 spc->spc_lastmod = time_second; 571 return 0; 572 } 573 #else /* __HAVE_INTR_CONTROL */ 574 int 575 cpu_setintr(struct cpu_info *ci, bool intr) 576 { 577 578 return SET_ERROR(EOPNOTSUPP); 579 } 580 581 u_int 582 cpu_intr_count(struct cpu_info *ci) 583 { 584 585 return 0; /* 0 == "don't know" */ 586 } 587 #endif /* __HAVE_INTR_CONTROL */ 588 589 #ifdef CPU_UCODE 590 int 591 cpu_ucode_load(struct cpu_ucode_softc *sc, const char *fwname) 592 { 593 firmware_handle_t fwh; 594 int error; 595 596 if (sc->sc_blob != NULL) { 597 firmware_free(sc->sc_blob, sc->sc_blobsize); 598 sc->sc_blob = NULL; 599 sc->sc_blobsize = 0; 600 } 601 602 error = cpu_ucode_md_open(&fwh, sc->loader_version, fwname); 603 if (error != 0) { 604 #ifdef DEBUG 605 printf("ucode: firmware_open(%s) failed: %i\n", fwname, error); 606 #endif 607 goto err0; 608 } 609 610 sc->sc_blobsize = firmware_get_size(fwh); 611 if (sc->sc_blobsize == 0) { 612 error = SET_ERROR(EFTYPE); 613 firmware_close(fwh); 614 goto err0; 615 } 616 sc->sc_blob = firmware_malloc(sc->sc_blobsize); 617 if (sc->sc_blob == NULL) { 618 error = SET_ERROR(ENOMEM); 619 firmware_close(fwh); 620 goto err0; 621 } 622 623 error = firmware_read(fwh, 0, sc->sc_blob, sc->sc_blobsize); 624 firmware_close(fwh); 625 if (error != 0) 626 goto err1; 627 628 return 0; 629 630 err1: 631 firmware_free(sc->sc_blob, sc->sc_blobsize); 632 sc->sc_blob = NULL; 633 sc->sc_blobsize = 0; 634 err0: 635 return error; 636 } 637 #endif 638