1 /* $NetBSD: cpu.c,v 1.214 2025/05/02 07:08:11 imil Exp $ */ 2 3 /* 4 * Copyright (c) 2000-2020 NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Bill Sommerfeld of RedBack Networks Inc, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1999 Stefan Grefen 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgement: 45 * This product includes software developed by the NetBSD 46 * Foundation, Inc. and its contributors. 47 * 4. Neither the name of The NetBSD Foundation nor the names of its 48 * contributors may be used to endorse or promote products derived 49 * from this software without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY 52 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64 #include <sys/cdefs.h> 65 __KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.214 2025/05/02 07:08:11 imil Exp $"); 66 67 #include "opt_ddb.h" 68 #include "opt_mpbios.h" /* for MPDEBUG */ 69 #include "opt_mtrr.h" 70 #include "opt_multiprocessor.h" 71 #include "opt_svs.h" 72 73 #include "lapic.h" 74 #include "ioapic.h" 75 #include "acpica.h" 76 #include "hpet.h" 77 78 #include <sys/param.h> 79 #include <sys/proc.h> 80 #include <sys/systm.h> 81 #include <sys/device.h> 82 #include <sys/cpu.h> 83 #include <sys/cpufreq.h> 84 #include <sys/idle.h> 85 #include <sys/atomic.h> 86 #include <sys/reboot.h> 87 #include <sys/csan.h> 88 89 #include <uvm/uvm.h> 90 91 #include "acpica.h" /* for NACPICA, for mp_verbose */ 92 93 #include <x86/machdep.h> 94 #include <machine/cpufunc.h> 95 #include <machine/cpuvar.h> 96 #include <machine/pmap.h> 97 #include <machine/vmparam.h> 98 #if defined(MULTIPROCESSOR) 99 #include <machine/mpbiosvar.h> 100 #endif 101 #include <machine/mpconfig.h> /* for mp_verbose */ 102 #include <machine/pcb.h> 103 #include <machine/specialreg.h> 104 #include <machine/segments.h> 105 #include <machine/gdt.h> 106 #include <machine/mtrr.h> 107 #include <machine/pio.h> 108 #include <machine/cpu_counter.h> 109 #include <machine/pmap_private.h> 110 111 #include <x86/fpu.h> 112 113 #if NACPICA > 0 114 #include <dev/acpi/acpi_srat.h> 115 #endif 116 117 #if NLAPIC > 0 118 #include <machine/apicvar.h> 119 #include <machine/i82489reg.h> 120 #include <machine/i82489var.h> 121 #endif 122 123 #include <dev/ic/mc146818reg.h> 124 #include <dev/ic/hpetvar.h> 125 #include <i386/isa/nvram.h> 126 #include <dev/isa/isareg.h> 127 128 #include "tsc.h" 129 130 #ifndef XENPV 131 #include "hyperv.h" 132 #if NHYPERV > 0 133 #include <x86/x86/hypervvar.h> 134 #endif 135 #endif 136 137 #ifdef XEN 138 #include <xen/hypervisor.h> 139 #endif 140 141 static int cpu_match(device_t, cfdata_t, void *); 142 static void cpu_attach(device_t, device_t, void *); 143 static void cpu_defer(device_t); 144 static int cpu_rescan(device_t, const char *, const int *); 145 static void cpu_childdetached(device_t, device_t); 146 static bool cpu_stop(device_t); 147 static bool cpu_suspend(device_t, const pmf_qual_t *); 148 static bool cpu_resume(device_t, const pmf_qual_t *); 149 static bool cpu_shutdown(device_t, int); 150 151 struct cpu_softc { 152 device_t sc_dev; /* device tree glue */ 153 struct cpu_info *sc_info; /* pointer to CPU info */ 154 bool sc_wasonline; 155 }; 156 157 #ifdef MULTIPROCESSOR 158 int mp_cpu_start(struct cpu_info *, paddr_t); 159 void mp_cpu_start_cleanup(struct cpu_info *); 160 const struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL, 161 mp_cpu_start_cleanup }; 162 #endif 163 164 165 CFATTACH_DECL2_NEW(cpu, sizeof(struct cpu_softc), 166 cpu_match, cpu_attach, NULL, NULL, cpu_rescan, cpu_childdetached); 167 168 /* 169 * Statically-allocated CPU info for the primary CPU (or the only 170 * CPU, on uniprocessors). The CPU info list is initialized to 171 * point at it. 172 */ 173 struct cpu_info cpu_info_primary __aligned(CACHE_LINE_SIZE) = { 174 .ci_dev = 0, 175 .ci_self = &cpu_info_primary, 176 .ci_idepth = -1, 177 .ci_curlwp = &lwp0, 178 .ci_curldt = -1, 179 .ci_kfpu_spl = -1, 180 }; 181 182 struct cpu_info *cpu_info_list = &cpu_info_primary; 183 184 #ifdef i386 185 void cpu_set_tss_gates(struct cpu_info *); 186 #endif 187 188 static void cpu_init_idle_lwp(struct cpu_info *); 189 190 uint32_t cpu_feature[7] __read_mostly; /* X86 CPUID feature bits */ 191 /* [0] basic features cpuid.1:%edx 192 * [1] basic features cpuid.1:%ecx (CPUID2_xxx bits) 193 * [2] extended features cpuid:80000001:%edx 194 * [3] extended features cpuid:80000001:%ecx 195 * [4] VIA padlock features 196 * [5] structured extended features cpuid.7:%ebx 197 * [6] structured extended features cpuid.7:%ecx 198 */ 199 200 #ifdef MULTIPROCESSOR 201 bool x86_mp_online; 202 paddr_t mp_trampoline_paddr = MP_TRAMPOLINE; 203 #endif 204 #if NLAPIC > 0 205 static vaddr_t cmos_data_mapping; 206 #endif 207 struct cpu_info *cpu_starting; 208 209 #ifdef MULTIPROCESSOR 210 void cpu_hatch(void *); 211 static void cpu_boot_secondary(struct cpu_info *ci); 212 static void cpu_start_secondary(struct cpu_info *ci); 213 #if NLAPIC > 0 214 static void cpu_copy_trampoline(paddr_t); 215 #endif 216 #endif /* MULTIPROCESSOR */ 217 218 /* 219 * Runs once per boot once multiprocessor goo has been detected and 220 * the local APIC on the boot processor has been mapped. 221 * 222 * Called from lapic_boot_init() (from mpbios_scan()). 223 */ 224 #if NLAPIC > 0 225 void 226 cpu_init_first(void) 227 { 228 229 cpu_info_primary.ci_cpuid = lapic_cpu_number(); 230 231 cmos_data_mapping = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_VAONLY); 232 if (cmos_data_mapping == 0) 233 panic("No KVA for page 0"); 234 pmap_kenter_pa(cmos_data_mapping, 0, VM_PROT_READ|VM_PROT_WRITE, 0); 235 pmap_update(pmap_kernel()); 236 } 237 #endif 238 239 static int 240 cpu_match(device_t parent, cfdata_t match, void *aux) 241 { 242 243 return 1; 244 } 245 246 #ifdef __HAVE_PCPU_AREA 247 void 248 cpu_pcpuarea_init(struct cpu_info *ci) 249 { 250 struct vm_page *pg; 251 size_t i, npages; 252 vaddr_t base, va; 253 paddr_t pa; 254 255 CTASSERT(sizeof(struct pcpu_entry) % PAGE_SIZE == 0); 256 257 npages = sizeof(struct pcpu_entry) / PAGE_SIZE; 258 base = (vaddr_t)&pcpuarea->ent[cpu_index(ci)]; 259 260 for (i = 0; i < npages; i++) { 261 pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); 262 if (pg == NULL) { 263 panic("failed to allocate pcpu PA"); 264 } 265 266 va = base + i * PAGE_SIZE; 267 pa = VM_PAGE_TO_PHYS(pg); 268 269 pmap_kenter_pa(va, pa, VM_PROT_READ|VM_PROT_WRITE, 0); 270 } 271 272 pmap_update(pmap_kernel()); 273 } 274 #endif 275 276 static void 277 cpu_vm_init(struct cpu_info *ci) 278 { 279 unsigned int ncolors = 2; 280 281 /* 282 * XXX: for AP's the cache info has not been initialized yet 283 * but that does not matter because uvm only pays attention at 284 * the maximum only. We should fix it once cpus have different 285 * cache sizes. 286 */ 287 for (unsigned int i = CAI_ICACHE; i <= CAI_L2CACHE; i++) { 288 struct x86_cache_info *cai; 289 unsigned int tcolors; 290 291 cai = &ci->ci_cinfo[i]; 292 293 tcolors = atop(cai->cai_totalsize); 294 switch (cai->cai_associativity) { 295 case 0xff: 296 tcolors = 1; /* fully associative */ 297 break; 298 case 0: 299 case 1: 300 break; 301 default: 302 tcolors /= cai->cai_associativity; 303 } 304 if (tcolors <= ncolors) 305 continue; 306 ncolors = tcolors; 307 } 308 309 /* 310 * If the desired number of colors is not a power of 311 * two, it won't be good. Find the greatest power of 312 * two which is an even divisor of the number of colors, 313 * to preserve even coloring of pages. 314 */ 315 if (ncolors & (ncolors - 1) ) { 316 unsigned int try, picked = 1; 317 for (try = 1; try < ncolors; try *= 2) { 318 if (ncolors % try == 0) picked = try; 319 } 320 if (picked == 1) { 321 panic("desired number of cache colors %u is " 322 " > 1, but not even!", ncolors); 323 } 324 ncolors = picked; 325 } 326 327 /* 328 * Knowing the size of the largest cache on this CPU, potentially 329 * re-color our pages. 330 */ 331 aprint_debug_dev(ci->ci_dev, "%d page colors\n", ncolors); 332 uvm_page_recolor(ncolors); 333 334 pmap_tlb_cpu_init(ci); 335 #ifndef __HAVE_DIRECT_MAP 336 pmap_vpage_cpu_init(ci); 337 #endif 338 } 339 340 static void 341 cpu_attach(device_t parent, device_t self, void *aux) 342 { 343 struct cpu_softc *sc = device_private(self); 344 struct cpu_attach_args *caa = aux; 345 struct cpu_info *ci; 346 uintptr_t ptr; 347 #if NLAPIC > 0 348 int cpunum = caa->cpu_number; 349 #endif 350 static bool again; 351 352 sc->sc_dev = self; 353 354 if (ncpu > maxcpus) { 355 #ifndef _LP64 356 aprint_error(": too many CPUs, please use NetBSD/amd64\n"); 357 #else 358 aprint_error(": too many CPUs\n"); 359 #endif 360 return; 361 } 362 363 /* 364 * If we're an Application Processor, allocate a cpu_info 365 * structure, otherwise use the primary's. 366 */ 367 if (caa->cpu_role == CPU_ROLE_AP) { 368 if ((boothowto & RB_MD1) != 0) { 369 aprint_error(": multiprocessor boot disabled\n"); 370 if (!pmf_device_register(self, NULL, NULL)) 371 aprint_error_dev(self, 372 "couldn't establish power handler\n"); 373 return; 374 } 375 aprint_naive(": Application Processor\n"); 376 ptr = (uintptr_t)uvm_km_alloc(kernel_map, 377 sizeof(*ci) + CACHE_LINE_SIZE - 1, 0, 378 UVM_KMF_WIRED|UVM_KMF_ZERO); 379 ci = (struct cpu_info *)roundup2(ptr, CACHE_LINE_SIZE); 380 ci->ci_curldt = -1; 381 } else { 382 aprint_naive(": %s Processor\n", 383 caa->cpu_role == CPU_ROLE_SP ? "Single" : "Boot"); 384 ci = &cpu_info_primary; 385 #if NLAPIC > 0 386 if (cpunum != lapic_cpu_number()) { 387 /* XXX should be done earlier. */ 388 uint32_t reg; 389 aprint_verbose("\n"); 390 aprint_verbose_dev(self, "running CPU at apic %d" 391 " instead of at expected %d", lapic_cpu_number(), 392 cpunum); 393 reg = lapic_readreg(LAPIC_ID); 394 lapic_writereg(LAPIC_ID, (reg & ~LAPIC_ID_MASK) | 395 (cpunum << LAPIC_ID_SHIFT)); 396 } 397 if (cpunum != lapic_cpu_number()) { 398 aprint_error_dev(self, "unable to reset apic id\n"); 399 } 400 #endif 401 } 402 403 ci->ci_self = ci; 404 sc->sc_info = ci; 405 ci->ci_dev = self; 406 ci->ci_acpiid = caa->cpu_id; 407 ci->ci_cpuid = caa->cpu_number; 408 ci->ci_func = caa->cpu_func; 409 ci->ci_kfpu_spl = -1; 410 aprint_normal("\n"); 411 412 /* Must be before mi_cpu_attach(). */ 413 cpu_vm_init(ci); 414 415 if (caa->cpu_role == CPU_ROLE_AP) { 416 int error; 417 418 error = mi_cpu_attach(ci); 419 if (error != 0) { 420 aprint_error_dev(self, 421 "mi_cpu_attach failed with %d\n", error); 422 return; 423 } 424 #ifdef __HAVE_PCPU_AREA 425 cpu_pcpuarea_init(ci); 426 #endif 427 cpu_init_tss(ci); 428 } else { 429 KASSERT(ci->ci_data.cpu_idlelwp != NULL); 430 #if NACPICA > 0 431 /* Parse out NUMA info for cpu_identify(). */ 432 acpisrat_init(); 433 #endif 434 } 435 436 #ifdef SVS 437 cpu_svs_init(ci); 438 #endif 439 440 pmap_reference(pmap_kernel()); 441 ci->ci_pmap = pmap_kernel(); 442 ci->ci_tlbstate = TLBSTATE_STALE; 443 444 /* 445 * Boot processor may not be attached first, but the below 446 * must be done to allow booting other processors. 447 */ 448 if (!again) { 449 /* Make sure DELAY() (likely i8254_delay()) is initialized. */ 450 DELAY(1); 451 452 /* 453 * Basic init. Compute an approximate frequency for the TSC 454 * using the i8254. If there's a HPET we'll redo it later. 455 */ 456 atomic_or_32(&ci->ci_flags, CPUF_PRESENT | CPUF_PRIMARY); 457 cpu_intr_init(ci); 458 tsc_setfunc(ci); 459 cpu_get_tsc_freq(ci); 460 cpu_init(ci); 461 #ifdef i386 462 cpu_set_tss_gates(ci); 463 #endif 464 pmap_cpu_init_late(ci); 465 #if NLAPIC > 0 466 if (caa->cpu_role != CPU_ROLE_SP) { 467 /* Enable lapic. */ 468 lapic_enable(); 469 lapic_set_lvt(); 470 if (!vm_guest_is_xenpvh_or_pvhvm()) 471 lapic_calibrate_timer(false); 472 } 473 #endif 474 kcsan_cpu_init(ci); 475 again = true; 476 } 477 478 /* further PCB init done later. */ 479 480 switch (caa->cpu_role) { 481 case CPU_ROLE_SP: 482 atomic_or_32(&ci->ci_flags, CPUF_SP); 483 cpu_identify(ci); 484 x86_errata(); 485 x86_cpu_idle_init(); 486 #ifdef XENPVHVM 487 xen_hvm_init_cpu(ci); 488 #endif 489 break; 490 491 case CPU_ROLE_BP: 492 atomic_or_32(&ci->ci_flags, CPUF_BSP); 493 cpu_identify(ci); 494 x86_errata(); 495 x86_cpu_idle_init(); 496 #ifdef XENPVHVM 497 xen_hvm_init_cpu(ci); 498 #endif 499 break; 500 501 #ifdef MULTIPROCESSOR 502 case CPU_ROLE_AP: 503 /* 504 * report on an AP 505 */ 506 cpu_intr_init(ci); 507 idt_vec_init_cpu_md(&ci->ci_idtvec, cpu_index(ci)); 508 gdt_alloc_cpu(ci); 509 #ifdef i386 510 cpu_set_tss_gates(ci); 511 #endif 512 pmap_cpu_init_late(ci); 513 cpu_start_secondary(ci); 514 if (ci->ci_flags & CPUF_PRESENT) { 515 struct cpu_info *tmp; 516 517 cpu_identify(ci); 518 tmp = cpu_info_list; 519 while (tmp->ci_next) 520 tmp = tmp->ci_next; 521 522 tmp->ci_next = ci; 523 } 524 break; 525 #endif 526 527 default: 528 panic("unknown processor type??\n"); 529 } 530 531 pat_init(ci); 532 533 if (!pmf_device_register1(self, cpu_suspend, cpu_resume, cpu_shutdown)) 534 aprint_error_dev(self, "couldn't establish power handler\n"); 535 536 #ifdef MULTIPROCESSOR 537 if (mp_verbose) { 538 struct lwp *l = ci->ci_data.cpu_idlelwp; 539 struct pcb *pcb = lwp_getpcb(l); 540 541 aprint_verbose_dev(self, 542 "idle lwp at %p, idle sp at %p\n", 543 l, 544 #ifdef i386 545 (void *)pcb->pcb_esp 546 #else 547 (void *)pcb->pcb_rsp 548 #endif 549 ); 550 } 551 #endif 552 553 /* 554 * Postpone the "cpufeaturebus" scan. 555 * It is safe to scan the pseudo-bus 556 * only after all CPUs have attached. 557 */ 558 (void)config_defer(self, cpu_defer); 559 } 560 561 static void 562 cpu_defer(device_t self) 563 { 564 cpu_rescan(self, NULL, NULL); 565 } 566 567 static int 568 cpu_rescan(device_t self, const char *ifattr, const int *locators) 569 { 570 struct cpu_softc *sc = device_private(self); 571 struct cpufeature_attach_args cfaa; 572 struct cpu_info *ci = sc->sc_info; 573 574 /* 575 * If we booted with RB_MD1 to disable multiprocessor, the 576 * auto-configuration data still contains the additional 577 * CPUs. But their initialization was mostly bypassed 578 * during attach, so we have to make sure we don't look at 579 * their featurebus info, since it wasn't retrieved. 580 */ 581 if (ci == NULL) 582 return 0; 583 584 memset(&cfaa, 0, sizeof(cfaa)); 585 cfaa.ci = ci; 586 587 if (ifattr_match(ifattr, "cpufeaturebus")) { 588 if (ci->ci_frequency == NULL) { 589 cfaa.name = "frequency"; 590 ci->ci_frequency = 591 config_found(self, &cfaa, NULL, 592 CFARGS(.iattr = "cpufeaturebus")); 593 } 594 595 if (ci->ci_padlock == NULL) { 596 cfaa.name = "padlock"; 597 ci->ci_padlock = 598 config_found(self, &cfaa, NULL, 599 CFARGS(.iattr = "cpufeaturebus")); 600 } 601 602 if (ci->ci_temperature == NULL) { 603 cfaa.name = "temperature"; 604 ci->ci_temperature = 605 config_found(self, &cfaa, NULL, 606 CFARGS(.iattr = "cpufeaturebus")); 607 } 608 609 if (ci->ci_vm == NULL) { 610 cfaa.name = "vm"; 611 ci->ci_vm = 612 config_found(self, &cfaa, NULL, 613 CFARGS(.iattr = "cpufeaturebus")); 614 } 615 } 616 617 return 0; 618 } 619 620 static void 621 cpu_childdetached(device_t self, device_t child) 622 { 623 struct cpu_softc *sc = device_private(self); 624 struct cpu_info *ci = sc->sc_info; 625 626 if (ci->ci_frequency == child) 627 ci->ci_frequency = NULL; 628 629 if (ci->ci_padlock == child) 630 ci->ci_padlock = NULL; 631 632 if (ci->ci_temperature == child) 633 ci->ci_temperature = NULL; 634 635 if (ci->ci_vm == child) 636 ci->ci_vm = NULL; 637 } 638 639 /* 640 * Initialize the processor appropriately. 641 */ 642 643 void 644 cpu_init(struct cpu_info *ci) 645 { 646 extern int x86_fpu_save; 647 uint32_t cr4 = 0; 648 649 lcr0(rcr0() | CR0_WP); 650 651 /* If global TLB caching is supported, enable it */ 652 if (cpu_feature[0] & CPUID_PGE) 653 cr4 |= CR4_PGE; 654 655 /* 656 * If we have FXSAVE/FXRESTOR, use them. 657 */ 658 if (cpu_feature[0] & CPUID_FXSR) { 659 cr4 |= CR4_OSFXSR; 660 661 /* 662 * If we have SSE/SSE2, enable XMM exceptions. 663 */ 664 if (cpu_feature[0] & (CPUID_SSE|CPUID_SSE2)) 665 cr4 |= CR4_OSXMMEXCPT; 666 } 667 668 /* If xsave is supported, enable it */ 669 if (cpu_feature[1] & CPUID2_XSAVE) 670 cr4 |= CR4_OSXSAVE; 671 672 /* If SMEP is supported, enable it */ 673 if (cpu_feature[5] & CPUID_SEF_SMEP) 674 cr4 |= CR4_SMEP; 675 676 /* If SMAP is supported, enable it */ 677 if (cpu_feature[5] & CPUID_SEF_SMAP) 678 cr4 |= CR4_SMAP; 679 680 #ifdef SVS 681 /* If PCID is supported, enable it */ 682 if (svs_pcid) 683 cr4 |= CR4_PCIDE; 684 #endif 685 686 if (cr4) { 687 cr4 |= rcr4(); 688 lcr4(cr4); 689 } 690 691 /* 692 * Changing CR4 register may change cpuid values. For example, setting 693 * CR4_OSXSAVE sets CPUID2_OSXSAVE. The CPUID2_OSXSAVE is in 694 * ci_feat_val[1], so update it. 695 * XXX Other than ci_feat_val[1] might be changed. 696 */ 697 if (cpuid_level >= 1) { 698 u_int descs[4]; 699 700 x86_cpuid(1, descs); 701 ci->ci_feat_val[1] = descs[2]; 702 } 703 704 if (CPU_IS_PRIMARY(ci) && 705 x86_fpu_save >= FPU_SAVE_FXSAVE) { 706 fpuinit_mxcsr_mask(); 707 } 708 709 /* If xsave is enabled, enable all fpu features */ 710 if (cr4 & CR4_OSXSAVE) 711 wrxcr(0, x86_xsave_features & XCR0_FPU); 712 713 #ifdef MTRR 714 /* 715 * On a P6 or above, initialize MTRR's if the hardware supports them. 716 */ 717 if (cpu_feature[0] & CPUID_MTRR) { 718 if ((ci->ci_flags & CPUF_AP) == 0) 719 i686_mtrr_init_first(); 720 mtrr_init_cpu(ci); 721 } 722 723 #ifdef i386 724 if (strcmp((char *)(ci->ci_vendor), "AuthenticAMD") == 0) { 725 /* 726 * Must be a K6-2 Step >= 7 or a K6-III. 727 */ 728 if (CPUID_TO_FAMILY(ci->ci_signature) == 5) { 729 if (CPUID_TO_MODEL(ci->ci_signature) > 8 || 730 (CPUID_TO_MODEL(ci->ci_signature) == 8 && 731 CPUID_TO_STEPPING(ci->ci_signature) >= 7)) { 732 mtrr_funcs = &k6_mtrr_funcs; 733 k6_mtrr_init_first(); 734 mtrr_init_cpu(ci); 735 } 736 } 737 } 738 #endif /* i386 */ 739 #endif /* MTRR */ 740 741 if (ci != &cpu_info_primary) { 742 /* Synchronize TSC */ 743 atomic_or_32(&ci->ci_flags, CPUF_RUNNING); 744 tsc_sync_ap(ci); 745 } else { 746 atomic_or_32(&ci->ci_flags, CPUF_RUNNING); 747 } 748 } 749 750 #ifdef MULTIPROCESSOR 751 void 752 cpu_boot_secondary_processors(void) 753 { 754 struct cpu_info *ci; 755 kcpuset_t *cpus; 756 u_long i; 757 758 /* Now that we know the number of CPUs, patch the text segment. */ 759 x86_patch(false); 760 761 #if NACPICA > 0 762 /* Finished with NUMA info for now. */ 763 acpisrat_exit(); 764 #endif 765 766 kcpuset_create(&cpus, true); 767 kcpuset_set(cpus, cpu_index(curcpu())); 768 for (i = 0; i < maxcpus; i++) { 769 ci = cpu_lookup(i); 770 if (ci == NULL) 771 continue; 772 if (ci->ci_data.cpu_idlelwp == NULL) 773 continue; 774 if ((ci->ci_flags & CPUF_PRESENT) == 0) 775 continue; 776 if (ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY)) 777 continue; 778 cpu_boot_secondary(ci); 779 kcpuset_set(cpus, cpu_index(ci)); 780 } 781 while (!kcpuset_match(cpus, kcpuset_running)) 782 ; 783 kcpuset_destroy(cpus); 784 785 x86_mp_online = true; 786 787 /* Now that we know about the TSC, attach the timecounter. */ 788 tsc_tc_init(); 789 } 790 #endif 791 792 static void 793 cpu_init_idle_lwp(struct cpu_info *ci) 794 { 795 struct lwp *l = ci->ci_data.cpu_idlelwp; 796 struct pcb *pcb = lwp_getpcb(l); 797 798 pcb->pcb_cr0 = rcr0(); 799 } 800 801 void 802 cpu_init_idle_lwps(void) 803 { 804 struct cpu_info *ci; 805 u_long i; 806 807 for (i = 0; i < maxcpus; i++) { 808 ci = cpu_lookup(i); 809 if (ci == NULL) 810 continue; 811 if (ci->ci_data.cpu_idlelwp == NULL) 812 continue; 813 if ((ci->ci_flags & CPUF_PRESENT) == 0) 814 continue; 815 cpu_init_idle_lwp(ci); 816 } 817 } 818 819 #ifdef MULTIPROCESSOR 820 void 821 cpu_start_secondary(struct cpu_info *ci) 822 { 823 u_long psl; 824 int i; 825 826 #if NLAPIC > 0 827 paddr_t mp_pdirpa; 828 mp_pdirpa = pmap_init_tmp_pgtbl(mp_trampoline_paddr); 829 cpu_copy_trampoline(mp_pdirpa); 830 #endif 831 832 atomic_or_32(&ci->ci_flags, CPUF_AP); 833 ci->ci_curlwp = ci->ci_data.cpu_idlelwp; 834 if (CPU_STARTUP(ci, mp_trampoline_paddr) != 0) { 835 return; 836 } 837 838 /* 839 * Wait for it to become ready. Setting cpu_starting opens the 840 * initial gate and allows the AP to start soft initialization. 841 */ 842 KASSERT(cpu_starting == NULL); 843 cpu_starting = ci; 844 for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT)) && i > 0; i--) { 845 delay_func(10); 846 } 847 848 if ((ci->ci_flags & CPUF_PRESENT) == 0) { 849 aprint_error_dev(ci->ci_dev, "failed to become ready\n"); 850 #if defined(MPDEBUG) && defined(DDB) 851 printf("dropping into debugger; continue from here to resume boot\n"); 852 Debugger(); 853 #endif 854 } else { 855 /* 856 * Synchronize time stamp counters. Invalidate cache and do 857 * twice (in tsc_sync_bp) to minimize possible cache effects. 858 * Disable interrupts to try and rule out any external 859 * interference. 860 */ 861 psl = x86_read_psl(); 862 x86_disable_intr(); 863 tsc_sync_bp(ci); 864 x86_write_psl(psl); 865 } 866 867 CPU_START_CLEANUP(ci); 868 cpu_starting = NULL; 869 } 870 871 void 872 cpu_boot_secondary(struct cpu_info *ci) 873 { 874 int64_t drift; 875 u_long psl; 876 int i; 877 878 atomic_or_32(&ci->ci_flags, CPUF_GO); 879 for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i > 0; i--) { 880 delay_func(10); 881 } 882 if ((ci->ci_flags & CPUF_RUNNING) == 0) { 883 aprint_error_dev(ci->ci_dev, "failed to start\n"); 884 #if defined(MPDEBUG) && defined(DDB) 885 printf("dropping into debugger; continue from here to resume boot\n"); 886 Debugger(); 887 #endif 888 } else { 889 /* Synchronize TSC again, check for drift. */ 890 drift = ci->ci_data.cpu_cc_skew; 891 psl = x86_read_psl(); 892 x86_disable_intr(); 893 tsc_sync_bp(ci); 894 x86_write_psl(psl); 895 drift -= ci->ci_data.cpu_cc_skew; 896 aprint_debug_dev(ci->ci_dev, "TSC skew=%lld drift=%lld\n", 897 (long long)ci->ci_data.cpu_cc_skew, (long long)drift); 898 tsc_sync_drift(drift); 899 } 900 } 901 902 /* 903 * The CPU ends up here when it's ready to run. 904 * This is called from code in mptramp.s; at this point, we are running 905 * in the idle pcb/idle stack of the new CPU. When this function returns, 906 * this processor will enter the idle loop and start looking for work. 907 */ 908 void 909 cpu_hatch(void *v) 910 { 911 struct cpu_info *ci = (struct cpu_info *)v; 912 struct pcb *pcb; 913 int s, i; 914 915 /* ------------------------------------------------------------- */ 916 917 /* 918 * This section of code must be compiled with SSP disabled, to 919 * prevent a race against cpu0. See sys/conf/ssp.mk. 920 */ 921 922 /* 923 * Initialize MSRs on this CPU: 924 * 925 * - On amd64: Enables SYSCALL/SYSRET. 926 * 927 * - On amd64: Sets up %fs and %gs so that %gs points to the 928 * current struct cpu_info as needed for CPUVAR(...), 929 * curcpu(), and curlwp. 930 * 931 * (On i386, CPUVAR(...), curcpu(), and curlwp are made to 932 * work first by the conifguration of segment descriptors in 933 * the Global Descriptor Table (GDT) in initgdt.) 934 * 935 * - Enables the no-execute bit if supported. 936 * 937 * Thus, after this point, CPUVAR(...), curcpu(), and curlwp 938 * will work on this CPU. 939 * 940 * Note: The call to cpu_init_msrs for cpu0 happens in 941 * init386/init_x86_64. 942 */ 943 cpu_init_msrs(ci, true); 944 945 cpu_probe(ci); 946 cpu_speculation_init(ci); 947 #if NHYPERV > 0 948 hyperv_init_cpu(ci); 949 #endif 950 951 ci->ci_data.cpu_cc_freq = cpu_info_primary.ci_data.cpu_cc_freq; 952 /* cpu_get_tsc_freq(ci); */ 953 954 KDASSERT((ci->ci_flags & CPUF_PRESENT) == 0); 955 956 /* 957 * Synchronize the TSC for the first time. Note that interrupts are 958 * off at this point. 959 */ 960 atomic_or_32(&ci->ci_flags, CPUF_PRESENT); 961 tsc_sync_ap(ci); 962 963 /* ------------------------------------------------------------- */ 964 965 /* 966 * Wait to be brought online. 967 * 968 * Use MONITOR/MWAIT if available. These instructions put the CPU in 969 * a low consumption mode (C-state), and if the TSC is not invariant, 970 * this causes the TSC to drift. We want this to happen, so that we 971 * can later detect (in tsc_tc_init) any abnormal drift with invariant 972 * TSCs. That's just for safety; by definition such drifts should 973 * never occur with invariant TSCs. 974 * 975 * If not available, try PAUSE. We'd like to use HLT, but we have 976 * interrupts off. 977 */ 978 while ((ci->ci_flags & CPUF_GO) == 0) { 979 if ((cpu_feature[1] & CPUID2_MONITOR) != 0) { 980 x86_monitor(&ci->ci_flags, 0, 0); 981 if ((ci->ci_flags & CPUF_GO) != 0) { 982 continue; 983 } 984 x86_mwait(0, 0); 985 } else { 986 /* 987 * XXX The loop repetition count could be a lot higher, but 988 * XXX currently qemu emulator takes a _very_long_time_ to 989 * XXX execute the pause instruction. So for now, use a low 990 * XXX value to allow the cpu to hatch before timing out. 991 */ 992 for (i = 50; i != 0; i--) { 993 x86_pause(); 994 } 995 } 996 } 997 998 /* Because the text may have been patched in x86_patch(). */ 999 wbinvd(); 1000 x86_flush(); 1001 tlbflushg(); 1002 1003 KASSERT((ci->ci_flags & CPUF_RUNNING) == 0); 1004 1005 #ifdef PAE 1006 pd_entry_t * l3_pd = ci->ci_pae_l3_pdir; 1007 for (i = 0 ; i < PDP_SIZE; i++) { 1008 l3_pd[i] = pmap_kernel()->pm_pdirpa[i] | PTE_P; 1009 } 1010 lcr3(ci->ci_pae_l3_pdirpa); 1011 #else 1012 lcr3(pmap_pdirpa(pmap_kernel(), 0)); 1013 #endif 1014 1015 pcb = lwp_getpcb(curlwp); 1016 pcb->pcb_cr3 = rcr3(); 1017 pcb = lwp_getpcb(ci->ci_data.cpu_idlelwp); 1018 lcr0(pcb->pcb_cr0); 1019 1020 cpu_init_idt(ci); 1021 gdt_init_cpu(ci); 1022 #if NLAPIC > 0 1023 lapic_enable(); 1024 lapic_set_lvt(); 1025 #endif 1026 1027 fpuinit(ci); 1028 lldt(GSYSSEL(GLDT_SEL, SEL_KPL)); 1029 ltr(ci->ci_tss_sel); 1030 1031 /* 1032 * cpu_init will re-synchronize the TSC, and will detect any abnormal 1033 * drift that would have been caused by the use of MONITOR/MWAIT 1034 * above. 1035 */ 1036 cpu_init(ci); 1037 #ifdef XENPVHVM 1038 xen_hvm_init_cpu(ci); 1039 #endif 1040 (*x86_initclock_func)(); 1041 cpu_get_tsc_freq(ci); 1042 1043 s = splhigh(); 1044 #if NLAPIC > 0 1045 lapic_write_tpri(0); 1046 #endif 1047 x86_enable_intr(); 1048 splx(s); 1049 x86_errata(); 1050 1051 aprint_debug_dev(ci->ci_dev, "running\n"); 1052 1053 kcsan_cpu_init(ci); 1054 1055 idle_loop(NULL); 1056 KASSERT(false); 1057 } 1058 #endif 1059 1060 #if defined(DDB) 1061 1062 #include <ddb/db_output.h> 1063 #include <machine/db_machdep.h> 1064 1065 /* 1066 * Dump CPU information from ddb. 1067 */ 1068 void 1069 cpu_debug_dump(void) 1070 { 1071 struct cpu_info *ci; 1072 CPU_INFO_ITERATOR cii; 1073 const char sixtyfour64space[] = 1074 #ifdef _LP64 1075 " " 1076 #endif 1077 ""; 1078 1079 db_printf("addr %sdev id flags ipis spl curlwp " 1080 "\n", sixtyfour64space); 1081 for (CPU_INFO_FOREACH(cii, ci)) { 1082 db_printf("%p %s %ld %x %x %d %10p\n", 1083 ci, 1084 ci->ci_dev == NULL ? "BOOT" : device_xname(ci->ci_dev), 1085 (long)ci->ci_cpuid, 1086 ci->ci_flags, ci->ci_ipis, ci->ci_ilevel, 1087 ci->ci_curlwp); 1088 } 1089 } 1090 #endif 1091 1092 #ifdef MULTIPROCESSOR 1093 #if NLAPIC > 0 1094 static void 1095 cpu_copy_trampoline(paddr_t pdir_pa) 1096 { 1097 extern uint32_t nox_flag; 1098 extern u_char cpu_spinup_trampoline[]; 1099 extern u_char cpu_spinup_trampoline_end[]; 1100 vaddr_t mp_trampoline_vaddr; 1101 struct { 1102 uint32_t large; 1103 uint32_t nox; 1104 uint32_t pdir; 1105 } smp_data; 1106 CTASSERT(sizeof(smp_data) == 3 * 4); 1107 1108 smp_data.large = (pmap_largepages != 0); 1109 smp_data.nox = nox_flag; 1110 smp_data.pdir = (uint32_t)(pdir_pa & 0xFFFFFFFF); 1111 1112 /* Enter the physical address */ 1113 mp_trampoline_vaddr = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 1114 UVM_KMF_VAONLY); 1115 pmap_kenter_pa(mp_trampoline_vaddr, mp_trampoline_paddr, 1116 VM_PROT_READ | VM_PROT_WRITE, 0); 1117 pmap_update(pmap_kernel()); 1118 1119 /* Copy boot code */ 1120 memcpy((void *)mp_trampoline_vaddr, 1121 cpu_spinup_trampoline, 1122 cpu_spinup_trampoline_end - cpu_spinup_trampoline); 1123 1124 /* Copy smp_data at the end */ 1125 memcpy((void *)(mp_trampoline_vaddr + PAGE_SIZE - sizeof(smp_data)), 1126 &smp_data, sizeof(smp_data)); 1127 1128 pmap_kremove(mp_trampoline_vaddr, PAGE_SIZE); 1129 pmap_update(pmap_kernel()); 1130 uvm_km_free(kernel_map, mp_trampoline_vaddr, PAGE_SIZE, UVM_KMF_VAONLY); 1131 } 1132 #endif 1133 1134 int 1135 mp_cpu_start(struct cpu_info *ci, paddr_t target) 1136 { 1137 #if NLAPIC > 0 1138 int error; 1139 1140 /* 1141 * Bootstrap code must be addressable in real mode 1142 * and it must be page aligned. 1143 */ 1144 KASSERT(target < 0x10000 && target % PAGE_SIZE == 0); 1145 1146 /* 1147 * "The BSP must initialize CMOS shutdown code to 0Ah ..." 1148 */ 1149 1150 outb(IO_RTC, NVRAM_RESET); 1151 outb(IO_RTC+1, NVRAM_RESET_JUMP); 1152 1153 /* 1154 * "and the warm reset vector (DWORD based at 40:67) to point 1155 * to the AP startup code ..." 1156 */ 1157 unsigned short dwordptr[2]; 1158 dwordptr[0] = 0; 1159 dwordptr[1] = target >> 4; 1160 1161 memcpy((uint8_t *)cmos_data_mapping + 0x467, dwordptr, 4); 1162 1163 if ((cpu_feature[0] & CPUID_APIC) == 0) { 1164 aprint_error("mp_cpu_start: CPU does not have APIC\n"); 1165 return ENODEV; 1166 } 1167 1168 /* 1169 * ... prior to executing the following sequence:". We'll also add in 1170 * local cache flush, in case the BIOS has left the AP with its cache 1171 * disabled. It may not be able to cope with MP coherency. 1172 */ 1173 wbinvd(); 1174 1175 if (ci->ci_flags & CPUF_AP) { 1176 error = x86_ipi_init(ci->ci_cpuid); 1177 if (error != 0) { 1178 aprint_error_dev(ci->ci_dev, "%s: IPI not taken (1)\n", 1179 __func__); 1180 return error; 1181 } 1182 delay_func(10000); 1183 1184 error = x86_ipi_startup(ci->ci_cpuid, target / PAGE_SIZE); 1185 if (error != 0) { 1186 aprint_error_dev(ci->ci_dev, "%s: IPI not taken (2)\n", 1187 __func__); 1188 return error; 1189 } 1190 delay_func(200); 1191 1192 error = x86_ipi_startup(ci->ci_cpuid, target / PAGE_SIZE); 1193 if (error != 0) { 1194 aprint_error_dev(ci->ci_dev, "%s: IPI not taken (3)\n", 1195 __func__); 1196 return error; 1197 } 1198 delay_func(200); 1199 } 1200 1201 return 0; 1202 #else 1203 return ENODEV; 1204 #endif /* NLAPIC > 0 */ 1205 } 1206 1207 void 1208 mp_cpu_start_cleanup(struct cpu_info *ci) 1209 { 1210 /* 1211 * Ensure the NVRAM reset byte contains something vaguely sane. 1212 */ 1213 1214 outb(IO_RTC, NVRAM_RESET); 1215 outb(IO_RTC+1, NVRAM_RESET_RST); 1216 } 1217 #endif 1218 1219 #ifdef __x86_64__ 1220 typedef void (vector)(void); 1221 extern vector Xsyscall, Xsyscall32, Xsyscall_svs; 1222 #endif 1223 1224 /* 1225 * cpu_init_msrs(ci, full) 1226 * 1227 * Initialize some Model-Specific Registers (MSRs) on the current 1228 * CPU, whose struct cpu_info pointer is ci, for: 1229 * 1230 * - SYSCALL/SYSRET. 1231 * - %fs/%gs on amd64 if `full' is true; needed to make 1232 * CPUVAR(...), curcpu(), and curlwp work. (We do this at boot, 1233 * but skip it on ACPI wakeup.) 1234 * - No-execute bit, if supported. 1235 * 1236 * References: 1237 * 1238 * - Intel 64 and IA-32 Architectures Software Developer's Manual, 1239 * Volume 3: System Programming Guide, Order Number 325384, 1240 * April 2022, Sec. 5.8.8 `Fast System Calls in 64-Bit Mode', 1241 * pp. 5-22 through 5-23. 1242 * 1243 * - Intel 64 and IA-32 Architectures Software Developer's Manual, 1244 * Volume 4: Model-Specific Registers, Order Number 335592, 1245 * April 2022, Sec. 2.1 `Architectural MSRs', Table 2-2, 1246 * pp. 2-60 through 2-61. 1247 */ 1248 void 1249 cpu_init_msrs(struct cpu_info *ci, bool full) 1250 { 1251 #ifdef __x86_64__ 1252 /* 1253 * On amd64, set up the syscall target address registers 1254 * for SYSCALL/SYSRET: 1255 * 1256 * - IA32_STAR, c000_0081h (MSR_STAR): System Call Target 1257 * Address. Code and stack segment selectors for SYSRET 1258 * (bits 48:63) and SYSCALL (bits 32:47). 1259 * 1260 * - IA32_LSTAR, c000_0082h (MSR_LSTAR): IA-32e Mode System 1261 * Call Target Address. Target rip for SYSCALL when executed 1262 * in 64-bit mode. 1263 * 1264 * - IA32_CSTAR, c000_0083h (MSR_CSTAR): IA-32e Mode System 1265 * Call Target Address. Target rip for SYSCALL when executed 1266 * in compatibility mode. (XXX Manual says this is `[n]ot 1267 * used, as the SYSCALL instruction is not recognized in 1268 * compatibility mode', so why do we set it?) 1269 * 1270 * - IA32_FMASK, c000_0084h (MSR_SFMASK): System Call Flag 1271 * Mask. Mask for the RFLAGS register on SYSCALL. 1272 */ 1273 wrmsr(MSR_STAR, 1274 ((uint64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | 1275 ((uint64_t)LSEL(LSYSRETBASE_SEL, SEL_UPL) << 48)); 1276 wrmsr(MSR_LSTAR, (uint64_t)Xsyscall); 1277 wrmsr(MSR_CSTAR, (uint64_t)Xsyscall32); 1278 wrmsr(MSR_SFMASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D|PSL_AC); 1279 1280 #ifdef SVS 1281 if (svs_enabled) 1282 wrmsr(MSR_LSTAR, (uint64_t)Xsyscall_svs); 1283 #endif 1284 1285 /* 1286 * On amd64 if `full' is true -- used at boot, but not on ACPI 1287 * wakeup -- then additionally set up %fs and %gs: 1288 * 1289 * - IA32_FS_BASE, c000_0100h (MSR_FSBASE): Base address of 1290 * %fs. Not used in NetBSD kernel, so zero it. 1291 * 1292 * - IA32_GS_BASE, c000_0101h (MSR_GSBASE): Base address of 1293 * %gs. Used in NetBSD kernel by CPUVAR(...), curcpu(), and 1294 * curlwp for access to the CPU-local area, so set it to ci. 1295 * 1296 * - IA32_KERNEL_GS_BASE, c000_0102h (MSR_KERNELGSBASE): Base 1297 * address of what swapgs will leave in %gs when switching to 1298 * userland. Zero for now; will be set to pcb->pcb_gs in 1299 * cpu_switchto for user threads. 1300 */ 1301 if (full) { 1302 wrmsr(MSR_FSBASE, 0); 1303 wrmsr(MSR_GSBASE, (uint64_t)ci); 1304 wrmsr(MSR_KERNELGSBASE, 0); 1305 } 1306 #endif /* __x86_64__ */ 1307 1308 /* 1309 * If the no-execute bit is supported, enable it in: 1310 * 1311 * - IA32_EFER, c000_0080h (MSR_EFER): Extended Feature 1312 * Enables. 1313 */ 1314 if (cpu_feature[2] & CPUID_NOX) 1315 wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE); 1316 } 1317 1318 void 1319 cpu_offline_md(void) 1320 { 1321 return; 1322 } 1323 1324 /* XXX joerg restructure and restart CPUs individually */ 1325 static bool 1326 cpu_stop(device_t dv) 1327 { 1328 struct cpu_softc *sc = device_private(dv); 1329 struct cpu_info *ci = sc->sc_info; 1330 int err; 1331 1332 KASSERT((ci->ci_flags & CPUF_PRESENT) != 0); 1333 1334 if (CPU_IS_PRIMARY(ci)) 1335 return true; 1336 1337 if (ci->ci_data.cpu_idlelwp == NULL) 1338 return true; 1339 1340 sc->sc_wasonline = !(ci->ci_schedstate.spc_flags & SPCF_OFFLINE); 1341 1342 if (sc->sc_wasonline) { 1343 mutex_enter(&cpu_lock); 1344 err = cpu_setstate(ci, false); 1345 mutex_exit(&cpu_lock); 1346 1347 if (err != 0) 1348 return false; 1349 } 1350 1351 return true; 1352 } 1353 1354 static bool 1355 cpu_suspend(device_t dv, const pmf_qual_t *qual) 1356 { 1357 struct cpu_softc *sc = device_private(dv); 1358 struct cpu_info *ci = sc->sc_info; 1359 1360 if ((ci->ci_flags & CPUF_PRESENT) == 0) 1361 return true; 1362 else { 1363 cpufreq_suspend(ci); 1364 } 1365 1366 return cpu_stop(dv); 1367 } 1368 1369 static bool 1370 cpu_resume(device_t dv, const pmf_qual_t *qual) 1371 { 1372 struct cpu_softc *sc = device_private(dv); 1373 struct cpu_info *ci = sc->sc_info; 1374 int err = 0; 1375 1376 if ((ci->ci_flags & CPUF_PRESENT) == 0) 1377 return true; 1378 1379 if (CPU_IS_PRIMARY(ci)) 1380 goto out; 1381 1382 if (ci->ci_data.cpu_idlelwp == NULL) 1383 goto out; 1384 1385 if (sc->sc_wasonline) { 1386 mutex_enter(&cpu_lock); 1387 err = cpu_setstate(ci, true); 1388 mutex_exit(&cpu_lock); 1389 } 1390 1391 out: 1392 if (err != 0) 1393 return false; 1394 1395 cpufreq_resume(ci); 1396 1397 return true; 1398 } 1399 1400 static bool 1401 cpu_shutdown(device_t dv, int how) 1402 { 1403 struct cpu_softc *sc = device_private(dv); 1404 struct cpu_info *ci = sc->sc_info; 1405 1406 if ((ci->ci_flags & CPUF_BSP) != 0) 1407 return false; 1408 1409 if ((ci->ci_flags & CPUF_PRESENT) == 0) 1410 return true; 1411 1412 return cpu_stop(dv); 1413 } 1414 1415 /* Get the TSC frequency and set it to ci->ci_data.cpu_cc_freq. */ 1416 void 1417 cpu_get_tsc_freq(struct cpu_info *ci) 1418 { 1419 static uint64_t freq_from_cpuid = 0; 1420 uint64_t freq = 0, t0, t1; 1421 int64_t overhead; 1422 1423 if (CPU_IS_PRIMARY(ci) && cpu_hascounter()) { 1424 /* 1425 * If it's the first call of this function, try to get TSC 1426 * freq from CPUID by calling cpu_tsc_freq_cpuid(). 1427 * The function also set lapic_per_second variable if it's 1428 * known. This is required for Intel's Comet Lake and newer 1429 * processors to set LAPIC timer correctly. 1430 * 1431 * If TSC freq is already known by CPUID, don't go through 1432 * tests again. 1433 */ 1434 if (freq_from_cpuid != 0) 1435 return; 1436 1437 if (ci->ci_data.cpu_cc_freq == 0) 1438 freq = freq_from_cpuid = cpu_tsc_freq_cpuid(ci); 1439 if (freq != 0) 1440 aprint_debug_dev(ci->ci_dev, "TSC freq " 1441 "from CPUID %" PRIu64 " Hz\n", freq); 1442 #if NHPET > 0 1443 if (freq == 0) { 1444 freq = hpet_tsc_freq(); 1445 if (freq != 0) 1446 aprint_debug_dev(ci->ci_dev, "TSC freq " 1447 "from HPET %" PRIu64 " Hz\n", freq); 1448 } 1449 #endif 1450 if (freq == 0) { 1451 /* 1452 * Work out the approximate overhead involved below. 1453 * Discard the result of the first go around the 1454 * loop. 1455 */ 1456 overhead = 0; 1457 for (int i = 0; i <= 8; i++) { 1458 const int s = splhigh(); 1459 t0 = cpu_counter(); 1460 delay_func(0); 1461 t1 = cpu_counter(); 1462 splx(s); 1463 if (i > 0) { 1464 overhead += (t1 - t0); 1465 } 1466 } 1467 overhead >>= 3; 1468 1469 /* 1470 * Now do the calibration. 1471 */ 1472 freq = 0; 1473 for (int i = 0; i < 1000; i++) { 1474 const int s = splhigh(); 1475 t0 = cpu_counter(); 1476 delay_func(100); 1477 t1 = cpu_counter(); 1478 splx(s); 1479 freq += t1 - t0 - overhead; 1480 } 1481 freq = freq * 10; 1482 1483 aprint_debug_dev(ci->ci_dev, "TSC freq " 1484 "from delay %" PRIu64 " Hz\n", freq); 1485 } 1486 if (ci->ci_data.cpu_cc_freq != 0) { 1487 freq_from_cpuid = cpu_tsc_freq_cpuid(ci); 1488 if ((freq_from_cpuid != 0) 1489 && (freq != freq_from_cpuid)) 1490 aprint_verbose_dev(ci->ci_dev, "TSC freq " 1491 "calibrated %" PRIu64 " Hz\n", freq); 1492 } 1493 } else { 1494 freq = cpu_info_primary.ci_data.cpu_cc_freq; 1495 } 1496 1497 ci->ci_data.cpu_cc_freq = freq; 1498 } 1499 1500 bool 1501 has_lapic(void) 1502 { 1503 #if NLAPIC > 0 1504 return true; 1505 #else 1506 return false; 1507 #endif 1508 } 1509 1510 void 1511 x86_cpu_idle_mwait(void) 1512 { 1513 struct cpu_info *ci = curcpu(); 1514 1515 KASSERT(ci->ci_ilevel == IPL_NONE); 1516 1517 x86_monitor(&ci->ci_want_resched, 0, 0); 1518 if (__predict_false(ci->ci_want_resched)) { 1519 return; 1520 } 1521 x86_mwait(0, 0); 1522 } 1523 1524 void 1525 x86_cpu_idle_halt(void) 1526 { 1527 struct cpu_info *ci = curcpu(); 1528 1529 KASSERT(ci->ci_ilevel == IPL_NONE); 1530 1531 x86_disable_intr(); 1532 if (!__predict_false(ci->ci_want_resched)) { 1533 x86_stihlt(); 1534 } else { 1535 x86_enable_intr(); 1536 } 1537 } 1538 1539 /* 1540 * Loads pmap for the current CPU. 1541 */ 1542 void 1543 cpu_load_pmap(struct pmap *pmap, struct pmap *oldpmap) 1544 { 1545 1546 KASSERT(kpreempt_disabled()); 1547 1548 #ifdef SVS 1549 if (svs_enabled && pmap_is_user(pmap)) { 1550 svs_pdir_switch(pmap); 1551 } 1552 #endif 1553 1554 #ifdef PAE 1555 struct cpu_info *ci = curcpu(); 1556 bool interrupts_enabled; 1557 pd_entry_t *l3_pd = ci->ci_pae_l3_pdir; 1558 int i; 1559 1560 /* 1561 * disable interrupts to block TLB shootdowns, which can reload cr3. 1562 * while this doesn't block NMIs, it's probably ok as NMIs unlikely 1563 * reload cr3. 1564 */ 1565 interrupts_enabled = (x86_read_flags() & PSL_I) != 0; 1566 if (interrupts_enabled) 1567 x86_disable_intr(); 1568 1569 for (i = 0 ; i < PDP_SIZE; i++) { 1570 l3_pd[i] = pmap->pm_pdirpa[i] | PTE_P; 1571 } 1572 1573 if (interrupts_enabled) 1574 x86_enable_intr(); 1575 tlbflush(); 1576 #else 1577 lcr3(pmap_pdirpa(pmap, 0)); 1578 #endif 1579 } 1580 1581 /* 1582 * Notify all other cpus to halt. 1583 */ 1584 1585 void 1586 cpu_broadcast_halt(void) 1587 { 1588 x86_broadcast_ipi(X86_IPI_HALT); 1589 } 1590 1591 /* 1592 * Send a dummy ipi to a cpu to force it to run splraise()/spllower(), 1593 * and trigger an AST on the running LWP. 1594 */ 1595 1596 void 1597 cpu_kick(struct cpu_info *ci) 1598 { 1599 x86_send_ipi(ci, X86_IPI_AST); 1600 } 1601