1 1.214 imil /* $NetBSD: cpu.c,v 1.214 2025/05/02 07:08:11 imil Exp $ */ 2 1.2 ad 3 1.134 maxv /* 4 1.190 ad * Copyright (c) 2000-2020 NetBSD Foundation, Inc. 5 1.2 ad * All rights reserved. 6 1.2 ad * 7 1.2 ad * This code is derived from software contributed to The NetBSD Foundation 8 1.11 ad * by Bill Sommerfeld of RedBack Networks Inc, and by Andrew Doran. 9 1.2 ad * 10 1.2 ad * Redistribution and use in source and binary forms, with or without 11 1.2 ad * modification, are permitted provided that the following conditions 12 1.2 ad * are met: 13 1.2 ad * 1. Redistributions of source code must retain the above copyright 14 1.2 ad * notice, this list of conditions and the following disclaimer. 15 1.2 ad * 2. Redistributions in binary form must reproduce the above copyright 16 1.2 ad * notice, this list of conditions and the following disclaimer in the 17 1.2 ad * documentation and/or other materials provided with the distribution. 18 1.2 ad * 19 1.2 ad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.2 ad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.2 ad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.2 ad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.2 ad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.2 ad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.2 ad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.2 ad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.2 ad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.2 ad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.2 ad * POSSIBILITY OF SUCH DAMAGE. 30 1.2 ad */ 31 1.2 ad 32 1.2 ad /* 33 1.2 ad * Copyright (c) 1999 Stefan Grefen 34 1.2 ad * 35 1.2 ad * Redistribution and use in source and binary forms, with or without 36 1.2 ad * modification, are permitted provided that the following conditions 37 1.2 ad * are met: 38 1.2 ad * 1. Redistributions of source code must retain the above copyright 39 1.2 ad * notice, this list of conditions and the following disclaimer. 40 1.2 ad * 2. Redistributions in binary form must reproduce the above copyright 41 1.2 ad * notice, this list of conditions and the following disclaimer in the 42 1.2 ad * documentation and/or other materials provided with the distribution. 43 1.2 ad * 3. All advertising materials mentioning features or use of this software 44 1.2 ad * must display the following acknowledgement: 45 1.2 ad * This product includes software developed by the NetBSD 46 1.2 ad * Foundation, Inc. and its contributors. 47 1.2 ad * 4. Neither the name of The NetBSD Foundation nor the names of its 48 1.2 ad * contributors may be used to endorse or promote products derived 49 1.2 ad * from this software without specific prior written permission. 50 1.2 ad * 51 1.2 ad * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY 52 1.2 ad * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 1.2 ad * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 1.2 ad * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE 55 1.2 ad * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 1.2 ad * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 1.2 ad * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 1.2 ad * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 1.2 ad * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 1.2 ad * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 1.2 ad * SUCH DAMAGE. 62 1.2 ad */ 63 1.2 ad 64 1.2 ad #include <sys/cdefs.h> 65 1.214 imil __KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.214 2025/05/02 07:08:11 imil Exp $"); 66 1.2 ad 67 1.2 ad #include "opt_ddb.h" 68 1.2 ad #include "opt_mpbios.h" /* for MPDEBUG */ 69 1.2 ad #include "opt_mtrr.h" 70 1.101 kiyohara #include "opt_multiprocessor.h" 71 1.144 maxv #include "opt_svs.h" 72 1.2 ad 73 1.2 ad #include "lapic.h" 74 1.2 ad #include "ioapic.h" 75 1.179 ad #include "acpica.h" 76 1.190 ad #include "hpet.h" 77 1.2 ad 78 1.2 ad #include <sys/param.h> 79 1.2 ad #include <sys/proc.h> 80 1.2 ad #include <sys/systm.h> 81 1.2 ad #include <sys/device.h> 82 1.9 ad #include <sys/cpu.h> 83 1.93 jruoho #include <sys/cpufreq.h> 84 1.98 rmind #include <sys/idle.h> 85 1.9 ad #include <sys/atomic.h> 86 1.35 ad #include <sys/reboot.h> 87 1.174 maxv #include <sys/csan.h> 88 1.2 ad 89 1.78 uebayasi #include <uvm/uvm.h> 90 1.2 ad 91 1.102 pgoyette #include "acpica.h" /* for NACPICA, for mp_verbose */ 92 1.102 pgoyette 93 1.187 bouyer #include <x86/machdep.h> 94 1.2 ad #include <machine/cpufunc.h> 95 1.2 ad #include <machine/cpuvar.h> 96 1.2 ad #include <machine/pmap.h> 97 1.2 ad #include <machine/vmparam.h> 98 1.102 pgoyette #if defined(MULTIPROCESSOR) 99 1.2 ad #include <machine/mpbiosvar.h> 100 1.101 kiyohara #endif 101 1.102 pgoyette #include <machine/mpconfig.h> /* for mp_verbose */ 102 1.2 ad #include <machine/pcb.h> 103 1.2 ad #include <machine/specialreg.h> 104 1.2 ad #include <machine/segments.h> 105 1.2 ad #include <machine/gdt.h> 106 1.2 ad #include <machine/mtrr.h> 107 1.2 ad #include <machine/pio.h> 108 1.38 ad #include <machine/cpu_counter.h> 109 1.205 riastrad #include <machine/pmap_private.h> 110 1.2 ad 111 1.109 dsl #include <x86/fpu.h> 112 1.109 dsl 113 1.179 ad #if NACPICA > 0 114 1.179 ad #include <dev/acpi/acpi_srat.h> 115 1.179 ad #endif 116 1.179 ad 117 1.101 kiyohara #if NLAPIC > 0 118 1.2 ad #include <machine/apicvar.h> 119 1.2 ad #include <machine/i82489reg.h> 120 1.2 ad #include <machine/i82489var.h> 121 1.101 kiyohara #endif 122 1.2 ad 123 1.2 ad #include <dev/ic/mc146818reg.h> 124 1.190 ad #include <dev/ic/hpetvar.h> 125 1.2 ad #include <i386/isa/nvram.h> 126 1.2 ad #include <dev/isa/isareg.h> 127 1.2 ad 128 1.38 ad #include "tsc.h" 129 1.38 ad 130 1.187 bouyer #ifndef XENPV 131 1.178 nonaka #include "hyperv.h" 132 1.178 nonaka #if NHYPERV > 0 133 1.178 nonaka #include <x86/x86/hypervvar.h> 134 1.178 nonaka #endif 135 1.178 nonaka #endif 136 1.178 nonaka 137 1.187 bouyer #ifdef XEN 138 1.187 bouyer #include <xen/hypervisor.h> 139 1.187 bouyer #endif 140 1.187 bouyer 141 1.87 jruoho static int cpu_match(device_t, cfdata_t, void *); 142 1.87 jruoho static void cpu_attach(device_t, device_t, void *); 143 1.87 jruoho static void cpu_defer(device_t); 144 1.87 jruoho static int cpu_rescan(device_t, const char *, const int *); 145 1.87 jruoho static void cpu_childdetached(device_t, device_t); 146 1.96 jruoho static bool cpu_stop(device_t); 147 1.69 dyoung static bool cpu_suspend(device_t, const pmf_qual_t *); 148 1.69 dyoung static bool cpu_resume(device_t, const pmf_qual_t *); 149 1.79 jruoho static bool cpu_shutdown(device_t, int); 150 1.12 jmcneill 151 1.2 ad struct cpu_softc { 152 1.23 cube device_t sc_dev; /* device tree glue */ 153 1.2 ad struct cpu_info *sc_info; /* pointer to CPU info */ 154 1.20 jmcneill bool sc_wasonline; 155 1.2 ad }; 156 1.2 ad 157 1.101 kiyohara #ifdef MULTIPROCESSOR 158 1.120 msaitoh int mp_cpu_start(struct cpu_info *, paddr_t); 159 1.2 ad void mp_cpu_start_cleanup(struct cpu_info *); 160 1.2 ad const struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL, 161 1.2 ad mp_cpu_start_cleanup }; 162 1.101 kiyohara #endif 163 1.2 ad 164 1.2 ad 165 1.81 jmcneill CFATTACH_DECL2_NEW(cpu, sizeof(struct cpu_softc), 166 1.81 jmcneill cpu_match, cpu_attach, NULL, NULL, cpu_rescan, cpu_childdetached); 167 1.2 ad 168 1.2 ad /* 169 1.2 ad * Statically-allocated CPU info for the primary CPU (or the only 170 1.2 ad * CPU, on uniprocessors). The CPU info list is initialized to 171 1.2 ad * point at it. 172 1.2 ad */ 173 1.21 ad struct cpu_info cpu_info_primary __aligned(CACHE_LINE_SIZE) = { 174 1.2 ad .ci_dev = 0, 175 1.2 ad .ci_self = &cpu_info_primary, 176 1.2 ad .ci_idepth = -1, 177 1.2 ad .ci_curlwp = &lwp0, 178 1.43 ad .ci_curldt = -1, 179 1.203 riastrad .ci_kfpu_spl = -1, 180 1.2 ad }; 181 1.2 ad 182 1.2 ad struct cpu_info *cpu_info_list = &cpu_info_primary; 183 1.2 ad 184 1.2 ad #ifdef i386 185 1.134 maxv void cpu_set_tss_gates(struct cpu_info *); 186 1.2 ad #endif 187 1.2 ad 188 1.12 jmcneill static void cpu_init_idle_lwp(struct cpu_info *); 189 1.12 jmcneill 190 1.122 maxv uint32_t cpu_feature[7] __read_mostly; /* X86 CPUID feature bits */ 191 1.117 maxv /* [0] basic features cpuid.1:%edx 192 1.117 maxv * [1] basic features cpuid.1:%ecx (CPUID2_xxx bits) 193 1.117 maxv * [2] extended features cpuid:80000001:%edx 194 1.117 maxv * [3] extended features cpuid:80000001:%ecx 195 1.117 maxv * [4] VIA padlock features 196 1.117 maxv * [5] structured extended features cpuid.7:%ebx 197 1.117 maxv * [6] structured extended features cpuid.7:%ecx 198 1.117 maxv */ 199 1.70 jym 200 1.101 kiyohara #ifdef MULTIPROCESSOR 201 1.12 jmcneill bool x86_mp_online; 202 1.12 jmcneill paddr_t mp_trampoline_paddr = MP_TRAMPOLINE; 203 1.101 kiyohara #endif 204 1.101 kiyohara #if NLAPIC > 0 205 1.14 joerg static vaddr_t cmos_data_mapping; 206 1.101 kiyohara #endif 207 1.45 ad struct cpu_info *cpu_starting; 208 1.2 ad 209 1.101 kiyohara #ifdef MULTIPROCESSOR 210 1.184 msaitoh void cpu_hatch(void *); 211 1.184 msaitoh static void cpu_boot_secondary(struct cpu_info *ci); 212 1.184 msaitoh static void cpu_start_secondary(struct cpu_info *ci); 213 1.101 kiyohara #if NLAPIC > 0 214 1.136 maxv static void cpu_copy_trampoline(paddr_t); 215 1.101 kiyohara #endif 216 1.164 cherry #endif /* MULTIPROCESSOR */ 217 1.2 ad 218 1.2 ad /* 219 1.2 ad * Runs once per boot once multiprocessor goo has been detected and 220 1.2 ad * the local APIC on the boot processor has been mapped. 221 1.2 ad * 222 1.2 ad * Called from lapic_boot_init() (from mpbios_scan()). 223 1.2 ad */ 224 1.101 kiyohara #if NLAPIC > 0 225 1.2 ad void 226 1.9 ad cpu_init_first(void) 227 1.2 ad { 228 1.2 ad 229 1.45 ad cpu_info_primary.ci_cpuid = lapic_cpu_number(); 230 1.14 joerg 231 1.14 joerg cmos_data_mapping = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_VAONLY); 232 1.14 joerg if (cmos_data_mapping == 0) 233 1.14 joerg panic("No KVA for page 0"); 234 1.64 cegger pmap_kenter_pa(cmos_data_mapping, 0, VM_PROT_READ|VM_PROT_WRITE, 0); 235 1.14 joerg pmap_update(pmap_kernel()); 236 1.2 ad } 237 1.101 kiyohara #endif 238 1.2 ad 239 1.87 jruoho static int 240 1.23 cube cpu_match(device_t parent, cfdata_t match, void *aux) 241 1.2 ad { 242 1.2 ad 243 1.2 ad return 1; 244 1.2 ad } 245 1.2 ad 246 1.142 maxv #ifdef __HAVE_PCPU_AREA 247 1.142 maxv void 248 1.142 maxv cpu_pcpuarea_init(struct cpu_info *ci) 249 1.142 maxv { 250 1.142 maxv struct vm_page *pg; 251 1.142 maxv size_t i, npages; 252 1.142 maxv vaddr_t base, va; 253 1.142 maxv paddr_t pa; 254 1.142 maxv 255 1.142 maxv CTASSERT(sizeof(struct pcpu_entry) % PAGE_SIZE == 0); 256 1.142 maxv 257 1.142 maxv npages = sizeof(struct pcpu_entry) / PAGE_SIZE; 258 1.142 maxv base = (vaddr_t)&pcpuarea->ent[cpu_index(ci)]; 259 1.142 maxv 260 1.142 maxv for (i = 0; i < npages; i++) { 261 1.142 maxv pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); 262 1.142 maxv if (pg == NULL) { 263 1.142 maxv panic("failed to allocate pcpu PA"); 264 1.142 maxv } 265 1.142 maxv 266 1.142 maxv va = base + i * PAGE_SIZE; 267 1.142 maxv pa = VM_PAGE_TO_PHYS(pg); 268 1.142 maxv 269 1.142 maxv pmap_kenter_pa(va, pa, VM_PROT_READ|VM_PROT_WRITE, 0); 270 1.142 maxv } 271 1.142 maxv 272 1.142 maxv pmap_update(pmap_kernel()); 273 1.142 maxv } 274 1.142 maxv #endif 275 1.142 maxv 276 1.2 ad static void 277 1.2 ad cpu_vm_init(struct cpu_info *ci) 278 1.2 ad { 279 1.199 christos unsigned int ncolors = 2; 280 1.2 ad 281 1.199 christos /* 282 1.199 christos * XXX: for AP's the cache info has not been initialized yet 283 1.199 christos * but that does not matter because uvm only pays attention at 284 1.199 christos * the maximum only. We should fix it once cpus have different 285 1.199 christos * cache sizes. 286 1.199 christos */ 287 1.199 christos for (unsigned int i = CAI_ICACHE; i <= CAI_L2CACHE; i++) { 288 1.2 ad struct x86_cache_info *cai; 289 1.199 christos unsigned int tcolors; 290 1.2 ad 291 1.2 ad cai = &ci->ci_cinfo[i]; 292 1.2 ad 293 1.2 ad tcolors = atop(cai->cai_totalsize); 294 1.184 msaitoh switch (cai->cai_associativity) { 295 1.2 ad case 0xff: 296 1.2 ad tcolors = 1; /* fully associative */ 297 1.2 ad break; 298 1.2 ad case 0: 299 1.2 ad case 1: 300 1.2 ad break; 301 1.2 ad default: 302 1.2 ad tcolors /= cai->cai_associativity; 303 1.2 ad } 304 1.199 christos if (tcolors <= ncolors) 305 1.199 christos continue; 306 1.199 christos ncolors = tcolors; 307 1.199 christos } 308 1.199 christos 309 1.199 christos /* 310 1.199 christos * If the desired number of colors is not a power of 311 1.199 christos * two, it won't be good. Find the greatest power of 312 1.199 christos * two which is an even divisor of the number of colors, 313 1.199 christos * to preserve even coloring of pages. 314 1.199 christos */ 315 1.199 christos if (ncolors & (ncolors - 1) ) { 316 1.199 christos unsigned int try, picked = 1; 317 1.199 christos for (try = 1; try < ncolors; try *= 2) { 318 1.199 christos if (ncolors % try == 0) picked = try; 319 1.199 christos } 320 1.199 christos if (picked == 1) { 321 1.199 christos panic("desired number of cache colors %u is " 322 1.199 christos " > 1, but not even!", ncolors); 323 1.32 tls } 324 1.199 christos ncolors = picked; 325 1.2 ad } 326 1.2 ad 327 1.2 ad /* 328 1.94 mrg * Knowing the size of the largest cache on this CPU, potentially 329 1.94 mrg * re-color our pages. 330 1.2 ad */ 331 1.52 ad aprint_debug_dev(ci->ci_dev, "%d page colors\n", ncolors); 332 1.2 ad uvm_page_recolor(ncolors); 333 1.98 rmind 334 1.98 rmind pmap_tlb_cpu_init(ci); 335 1.123 maxv #ifndef __HAVE_DIRECT_MAP 336 1.123 maxv pmap_vpage_cpu_init(ci); 337 1.123 maxv #endif 338 1.2 ad } 339 1.2 ad 340 1.87 jruoho static void 341 1.23 cube cpu_attach(device_t parent, device_t self, void *aux) 342 1.2 ad { 343 1.23 cube struct cpu_softc *sc = device_private(self); 344 1.2 ad struct cpu_attach_args *caa = aux; 345 1.2 ad struct cpu_info *ci; 346 1.21 ad uintptr_t ptr; 347 1.101 kiyohara #if NLAPIC > 0 348 1.2 ad int cpunum = caa->cpu_number; 349 1.101 kiyohara #endif 350 1.51 ad static bool again; 351 1.2 ad 352 1.23 cube sc->sc_dev = self; 353 1.23 cube 354 1.163 cherry if (ncpu > maxcpus) { 355 1.98 rmind #ifndef _LP64 356 1.98 rmind aprint_error(": too many CPUs, please use NetBSD/amd64\n"); 357 1.98 rmind #else 358 1.98 rmind aprint_error(": too many CPUs\n"); 359 1.98 rmind #endif 360 1.48 ad return; 361 1.48 ad } 362 1.48 ad 363 1.2 ad /* 364 1.2 ad * If we're an Application Processor, allocate a cpu_info 365 1.2 ad * structure, otherwise use the primary's. 366 1.2 ad */ 367 1.2 ad if (caa->cpu_role == CPU_ROLE_AP) { 368 1.36 ad if ((boothowto & RB_MD1) != 0) { 369 1.35 ad aprint_error(": multiprocessor boot disabled\n"); 370 1.56 jmcneill if (!pmf_device_register(self, NULL, NULL)) 371 1.56 jmcneill aprint_error_dev(self, 372 1.56 jmcneill "couldn't establish power handler\n"); 373 1.35 ad return; 374 1.35 ad } 375 1.2 ad aprint_naive(": Application Processor\n"); 376 1.143 maxv ptr = (uintptr_t)uvm_km_alloc(kernel_map, 377 1.143 maxv sizeof(*ci) + CACHE_LINE_SIZE - 1, 0, 378 1.143 maxv UVM_KMF_WIRED|UVM_KMF_ZERO); 379 1.67 jym ci = (struct cpu_info *)roundup2(ptr, CACHE_LINE_SIZE); 380 1.43 ad ci->ci_curldt = -1; 381 1.2 ad } else { 382 1.2 ad aprint_naive(": %s Processor\n", 383 1.2 ad caa->cpu_role == CPU_ROLE_SP ? "Single" : "Boot"); 384 1.2 ad ci = &cpu_info_primary; 385 1.101 kiyohara #if NLAPIC > 0 386 1.2 ad if (cpunum != lapic_cpu_number()) { 387 1.51 ad /* XXX should be done earlier. */ 388 1.39 ad uint32_t reg; 389 1.39 ad aprint_verbose("\n"); 390 1.47 ad aprint_verbose_dev(self, "running CPU at apic %d" 391 1.47 ad " instead of at expected %d", lapic_cpu_number(), 392 1.23 cube cpunum); 393 1.125 nonaka reg = lapic_readreg(LAPIC_ID); 394 1.125 nonaka lapic_writereg(LAPIC_ID, (reg & ~LAPIC_ID_MASK) | 395 1.39 ad (cpunum << LAPIC_ID_SHIFT)); 396 1.2 ad } 397 1.47 ad if (cpunum != lapic_cpu_number()) { 398 1.47 ad aprint_error_dev(self, "unable to reset apic id\n"); 399 1.47 ad } 400 1.101 kiyohara #endif 401 1.2 ad } 402 1.2 ad 403 1.2 ad ci->ci_self = ci; 404 1.2 ad sc->sc_info = ci; 405 1.2 ad ci->ci_dev = self; 406 1.74 jruoho ci->ci_acpiid = caa->cpu_id; 407 1.42 ad ci->ci_cpuid = caa->cpu_number; 408 1.2 ad ci->ci_func = caa->cpu_func; 409 1.177 maxv ci->ci_kfpu_spl = -1; 410 1.112 msaitoh aprint_normal("\n"); 411 1.2 ad 412 1.55 ad /* Must be before mi_cpu_attach(). */ 413 1.55 ad cpu_vm_init(ci); 414 1.55 ad 415 1.2 ad if (caa->cpu_role == CPU_ROLE_AP) { 416 1.2 ad int error; 417 1.2 ad 418 1.2 ad error = mi_cpu_attach(ci); 419 1.2 ad if (error != 0) { 420 1.47 ad aprint_error_dev(self, 421 1.30 cegger "mi_cpu_attach failed with %d\n", error); 422 1.2 ad return; 423 1.2 ad } 424 1.142 maxv #ifdef __HAVE_PCPU_AREA 425 1.142 maxv cpu_pcpuarea_init(ci); 426 1.142 maxv #endif 427 1.15 yamt cpu_init_tss(ci); 428 1.2 ad } else { 429 1.2 ad KASSERT(ci->ci_data.cpu_idlelwp != NULL); 430 1.179 ad #if NACPICA > 0 431 1.179 ad /* Parse out NUMA info for cpu_identify(). */ 432 1.179 ad acpisrat_init(); 433 1.179 ad #endif 434 1.2 ad } 435 1.2 ad 436 1.146 maxv #ifdef SVS 437 1.146 maxv cpu_svs_init(ci); 438 1.146 maxv #endif 439 1.146 maxv 440 1.2 ad pmap_reference(pmap_kernel()); 441 1.2 ad ci->ci_pmap = pmap_kernel(); 442 1.2 ad ci->ci_tlbstate = TLBSTATE_STALE; 443 1.2 ad 444 1.51 ad /* 445 1.51 ad * Boot processor may not be attached first, but the below 446 1.51 ad * must be done to allow booting other processors. 447 1.51 ad */ 448 1.51 ad if (!again) { 449 1.190 ad /* Make sure DELAY() (likely i8254_delay()) is initialized. */ 450 1.190 ad DELAY(1); 451 1.190 ad 452 1.190 ad /* 453 1.190 ad * Basic init. Compute an approximate frequency for the TSC 454 1.190 ad * using the i8254. If there's a HPET we'll redo it later. 455 1.190 ad */ 456 1.188 ad atomic_or_32(&ci->ci_flags, CPUF_PRESENT | CPUF_PRIMARY); 457 1.2 ad cpu_intr_init(ci); 458 1.194 msaitoh tsc_setfunc(ci); 459 1.40 ad cpu_get_tsc_freq(ci); 460 1.2 ad cpu_init(ci); 461 1.134 maxv #ifdef i386 462 1.2 ad cpu_set_tss_gates(ci); 463 1.134 maxv #endif 464 1.2 ad pmap_cpu_init_late(ci); 465 1.101 kiyohara #if NLAPIC > 0 466 1.51 ad if (caa->cpu_role != CPU_ROLE_SP) { 467 1.51 ad /* Enable lapic. */ 468 1.51 ad lapic_enable(); 469 1.51 ad lapic_set_lvt(); 470 1.189 bouyer if (!vm_guest_is_xenpvh_or_pvhvm()) 471 1.192 ad lapic_calibrate_timer(false); 472 1.51 ad } 473 1.101 kiyohara #endif 474 1.174 maxv kcsan_cpu_init(ci); 475 1.51 ad again = true; 476 1.51 ad } 477 1.51 ad 478 1.51 ad /* further PCB init done later. */ 479 1.51 ad 480 1.51 ad switch (caa->cpu_role) { 481 1.51 ad case CPU_ROLE_SP: 482 1.51 ad atomic_or_32(&ci->ci_flags, CPUF_SP); 483 1.51 ad cpu_identify(ci); 484 1.53 ad x86_errata(); 485 1.37 joerg x86_cpu_idle_init(); 486 1.187 bouyer #ifdef XENPVHVM 487 1.187 bouyer xen_hvm_init_cpu(ci); 488 1.187 bouyer #endif 489 1.2 ad break; 490 1.2 ad 491 1.2 ad case CPU_ROLE_BP: 492 1.51 ad atomic_or_32(&ci->ci_flags, CPUF_BSP); 493 1.40 ad cpu_identify(ci); 494 1.53 ad x86_errata(); 495 1.37 joerg x86_cpu_idle_init(); 496 1.187 bouyer #ifdef XENPVHVM 497 1.187 bouyer xen_hvm_init_cpu(ci); 498 1.187 bouyer #endif 499 1.2 ad break; 500 1.2 ad 501 1.101 kiyohara #ifdef MULTIPROCESSOR 502 1.2 ad case CPU_ROLE_AP: 503 1.2 ad /* 504 1.2 ad * report on an AP 505 1.2 ad */ 506 1.2 ad cpu_intr_init(ci); 507 1.195 yamaguch idt_vec_init_cpu_md(&ci->ci_idtvec, cpu_index(ci)); 508 1.2 ad gdt_alloc_cpu(ci); 509 1.134 maxv #ifdef i386 510 1.2 ad cpu_set_tss_gates(ci); 511 1.134 maxv #endif 512 1.2 ad pmap_cpu_init_late(ci); 513 1.2 ad cpu_start_secondary(ci); 514 1.2 ad if (ci->ci_flags & CPUF_PRESENT) { 515 1.59 cegger struct cpu_info *tmp; 516 1.59 cegger 517 1.40 ad cpu_identify(ci); 518 1.59 cegger tmp = cpu_info_list; 519 1.59 cegger while (tmp->ci_next) 520 1.59 cegger tmp = tmp->ci_next; 521 1.59 cegger 522 1.59 cegger tmp->ci_next = ci; 523 1.2 ad } 524 1.2 ad break; 525 1.101 kiyohara #endif 526 1.2 ad 527 1.2 ad default: 528 1.2 ad panic("unknown processor type??\n"); 529 1.2 ad } 530 1.51 ad 531 1.71 cegger pat_init(ci); 532 1.2 ad 533 1.79 jruoho if (!pmf_device_register1(self, cpu_suspend, cpu_resume, cpu_shutdown)) 534 1.12 jmcneill aprint_error_dev(self, "couldn't establish power handler\n"); 535 1.12 jmcneill 536 1.101 kiyohara #ifdef MULTIPROCESSOR 537 1.2 ad if (mp_verbose) { 538 1.2 ad struct lwp *l = ci->ci_data.cpu_idlelwp; 539 1.65 rmind struct pcb *pcb = lwp_getpcb(l); 540 1.2 ad 541 1.47 ad aprint_verbose_dev(self, 542 1.28 cegger "idle lwp at %p, idle sp at %p\n", 543 1.28 cegger l, 544 1.2 ad #ifdef i386 545 1.65 rmind (void *)pcb->pcb_esp 546 1.2 ad #else 547 1.65 rmind (void *)pcb->pcb_rsp 548 1.2 ad #endif 549 1.2 ad ); 550 1.2 ad } 551 1.101 kiyohara #endif 552 1.81 jmcneill 553 1.89 jruoho /* 554 1.89 jruoho * Postpone the "cpufeaturebus" scan. 555 1.89 jruoho * It is safe to scan the pseudo-bus 556 1.89 jruoho * only after all CPUs have attached. 557 1.89 jruoho */ 558 1.87 jruoho (void)config_defer(self, cpu_defer); 559 1.87 jruoho } 560 1.87 jruoho 561 1.87 jruoho static void 562 1.87 jruoho cpu_defer(device_t self) 563 1.87 jruoho { 564 1.81 jmcneill cpu_rescan(self, NULL, NULL); 565 1.81 jmcneill } 566 1.81 jmcneill 567 1.87 jruoho static int 568 1.81 jmcneill cpu_rescan(device_t self, const char *ifattr, const int *locators) 569 1.81 jmcneill { 570 1.83 jruoho struct cpu_softc *sc = device_private(self); 571 1.81 jmcneill struct cpufeature_attach_args cfaa; 572 1.81 jmcneill struct cpu_info *ci = sc->sc_info; 573 1.81 jmcneill 574 1.181 pgoyette /* 575 1.181 pgoyette * If we booted with RB_MD1 to disable multiprocessor, the 576 1.181 pgoyette * auto-configuration data still contains the additional 577 1.181 pgoyette * CPUs. But their initialization was mostly bypassed 578 1.181 pgoyette * during attach, so we have to make sure we don't look at 579 1.181 pgoyette * their featurebus info, since it wasn't retrieved. 580 1.181 pgoyette */ 581 1.181 pgoyette if (ci == NULL) 582 1.181 pgoyette return 0; 583 1.181 pgoyette 584 1.81 jmcneill memset(&cfaa, 0, sizeof(cfaa)); 585 1.81 jmcneill cfaa.ci = ci; 586 1.81 jmcneill 587 1.81 jmcneill if (ifattr_match(ifattr, "cpufeaturebus")) { 588 1.83 jruoho if (ci->ci_frequency == NULL) { 589 1.86 jruoho cfaa.name = "frequency"; 590 1.200 thorpej ci->ci_frequency = 591 1.200 thorpej config_found(self, &cfaa, NULL, 592 1.201 thorpej CFARGS(.iattr = "cpufeaturebus")); 593 1.84 jruoho } 594 1.84 jruoho 595 1.81 jmcneill if (ci->ci_padlock == NULL) { 596 1.81 jmcneill cfaa.name = "padlock"; 597 1.200 thorpej ci->ci_padlock = 598 1.200 thorpej config_found(self, &cfaa, NULL, 599 1.201 thorpej CFARGS(.iattr = "cpufeaturebus")); 600 1.81 jmcneill } 601 1.82 jruoho 602 1.86 jruoho if (ci->ci_temperature == NULL) { 603 1.86 jruoho cfaa.name = "temperature"; 604 1.200 thorpej ci->ci_temperature = 605 1.200 thorpej config_found(self, &cfaa, NULL, 606 1.201 thorpej CFARGS(.iattr = "cpufeaturebus")); 607 1.85 jruoho } 608 1.95 jmcneill 609 1.95 jmcneill if (ci->ci_vm == NULL) { 610 1.95 jmcneill cfaa.name = "vm"; 611 1.200 thorpej ci->ci_vm = 612 1.200 thorpej config_found(self, &cfaa, NULL, 613 1.201 thorpej CFARGS(.iattr = "cpufeaturebus")); 614 1.95 jmcneill } 615 1.81 jmcneill } 616 1.81 jmcneill 617 1.81 jmcneill return 0; 618 1.81 jmcneill } 619 1.81 jmcneill 620 1.87 jruoho static void 621 1.81 jmcneill cpu_childdetached(device_t self, device_t child) 622 1.81 jmcneill { 623 1.81 jmcneill struct cpu_softc *sc = device_private(self); 624 1.81 jmcneill struct cpu_info *ci = sc->sc_info; 625 1.81 jmcneill 626 1.83 jruoho if (ci->ci_frequency == child) 627 1.83 jruoho ci->ci_frequency = NULL; 628 1.82 jruoho 629 1.81 jmcneill if (ci->ci_padlock == child) 630 1.81 jmcneill ci->ci_padlock = NULL; 631 1.83 jruoho 632 1.86 jruoho if (ci->ci_temperature == child) 633 1.86 jruoho ci->ci_temperature = NULL; 634 1.95 jmcneill 635 1.95 jmcneill if (ci->ci_vm == child) 636 1.95 jmcneill ci->ci_vm = NULL; 637 1.2 ad } 638 1.2 ad 639 1.2 ad /* 640 1.2 ad * Initialize the processor appropriately. 641 1.2 ad */ 642 1.2 ad 643 1.2 ad void 644 1.9 ad cpu_init(struct cpu_info *ci) 645 1.2 ad { 646 1.141 maxv extern int x86_fpu_save; 647 1.113 christos uint32_t cr4 = 0; 648 1.2 ad 649 1.2 ad lcr0(rcr0() | CR0_WP); 650 1.2 ad 651 1.169 maxv /* If global TLB caching is supported, enable it */ 652 1.70 jym if (cpu_feature[0] & CPUID_PGE) 653 1.169 maxv cr4 |= CR4_PGE; 654 1.2 ad 655 1.2 ad /* 656 1.2 ad * If we have FXSAVE/FXRESTOR, use them. 657 1.2 ad */ 658 1.70 jym if (cpu_feature[0] & CPUID_FXSR) { 659 1.110 dsl cr4 |= CR4_OSFXSR; 660 1.2 ad 661 1.2 ad /* 662 1.2 ad * If we have SSE/SSE2, enable XMM exceptions. 663 1.2 ad */ 664 1.70 jym if (cpu_feature[0] & (CPUID_SSE|CPUID_SSE2)) 665 1.110 dsl cr4 |= CR4_OSXMMEXCPT; 666 1.2 ad } 667 1.2 ad 668 1.110 dsl /* If xsave is supported, enable it */ 669 1.110 dsl if (cpu_feature[1] & CPUID2_XSAVE) 670 1.110 dsl cr4 |= CR4_OSXSAVE; 671 1.110 dsl 672 1.118 maxv /* If SMEP is supported, enable it */ 673 1.118 maxv if (cpu_feature[5] & CPUID_SEF_SMEP) 674 1.118 maxv cr4 |= CR4_SMEP; 675 1.118 maxv 676 1.137 maxv /* If SMAP is supported, enable it */ 677 1.137 maxv if (cpu_feature[5] & CPUID_SEF_SMAP) 678 1.137 maxv cr4 |= CR4_SMAP; 679 1.137 maxv 680 1.171 maxv #ifdef SVS 681 1.171 maxv /* If PCID is supported, enable it */ 682 1.171 maxv if (svs_pcid) 683 1.171 maxv cr4 |= CR4_PCIDE; 684 1.171 maxv #endif 685 1.171 maxv 686 1.113 christos if (cr4) { 687 1.113 christos cr4 |= rcr4(); 688 1.113 christos lcr4(cr4); 689 1.113 christos } 690 1.110 dsl 691 1.145 msaitoh /* 692 1.145 msaitoh * Changing CR4 register may change cpuid values. For example, setting 693 1.145 msaitoh * CR4_OSXSAVE sets CPUID2_OSXSAVE. The CPUID2_OSXSAVE is in 694 1.145 msaitoh * ci_feat_val[1], so update it. 695 1.145 msaitoh * XXX Other than ci_feat_val[1] might be changed. 696 1.145 msaitoh */ 697 1.145 msaitoh if (cpuid_level >= 1) { 698 1.145 msaitoh u_int descs[4]; 699 1.145 msaitoh 700 1.145 msaitoh x86_cpuid(1, descs); 701 1.145 msaitoh ci->ci_feat_val[1] = descs[2]; 702 1.145 msaitoh } 703 1.145 msaitoh 704 1.208 riastrad if (CPU_IS_PRIMARY(ci) && 705 1.208 riastrad x86_fpu_save >= FPU_SAVE_FXSAVE) { 706 1.158 maxv fpuinit_mxcsr_mask(); 707 1.141 maxv } 708 1.141 maxv 709 1.110 dsl /* If xsave is enabled, enable all fpu features */ 710 1.110 dsl if (cr4 & CR4_OSXSAVE) 711 1.110 dsl wrxcr(0, x86_xsave_features & XCR0_FPU); 712 1.110 dsl 713 1.2 ad #ifdef MTRR 714 1.2 ad /* 715 1.2 ad * On a P6 or above, initialize MTRR's if the hardware supports them. 716 1.2 ad */ 717 1.70 jym if (cpu_feature[0] & CPUID_MTRR) { 718 1.2 ad if ((ci->ci_flags & CPUF_AP) == 0) 719 1.2 ad i686_mtrr_init_first(); 720 1.2 ad mtrr_init_cpu(ci); 721 1.2 ad } 722 1.2 ad 723 1.2 ad #ifdef i386 724 1.2 ad if (strcmp((char *)(ci->ci_vendor), "AuthenticAMD") == 0) { 725 1.2 ad /* 726 1.2 ad * Must be a K6-2 Step >= 7 or a K6-III. 727 1.2 ad */ 728 1.106 msaitoh if (CPUID_TO_FAMILY(ci->ci_signature) == 5) { 729 1.106 msaitoh if (CPUID_TO_MODEL(ci->ci_signature) > 8 || 730 1.106 msaitoh (CPUID_TO_MODEL(ci->ci_signature) == 8 && 731 1.106 msaitoh CPUID_TO_STEPPING(ci->ci_signature) >= 7)) { 732 1.2 ad mtrr_funcs = &k6_mtrr_funcs; 733 1.2 ad k6_mtrr_init_first(); 734 1.2 ad mtrr_init_cpu(ci); 735 1.2 ad } 736 1.2 ad } 737 1.2 ad } 738 1.2 ad #endif /* i386 */ 739 1.2 ad #endif /* MTRR */ 740 1.2 ad 741 1.38 ad if (ci != &cpu_info_primary) { 742 1.150 maxv /* Synchronize TSC */ 743 1.38 ad atomic_or_32(&ci->ci_flags, CPUF_RUNNING); 744 1.38 ad tsc_sync_ap(ci); 745 1.38 ad } else { 746 1.38 ad atomic_or_32(&ci->ci_flags, CPUF_RUNNING); 747 1.38 ad } 748 1.2 ad } 749 1.2 ad 750 1.101 kiyohara #ifdef MULTIPROCESSOR 751 1.2 ad void 752 1.12 jmcneill cpu_boot_secondary_processors(void) 753 1.2 ad { 754 1.2 ad struct cpu_info *ci; 755 1.100 chs kcpuset_t *cpus; 756 1.2 ad u_long i; 757 1.2 ad 758 1.5 ad /* Now that we know the number of CPUs, patch the text segment. */ 759 1.60 ad x86_patch(false); 760 1.5 ad 761 1.179 ad #if NACPICA > 0 762 1.179 ad /* Finished with NUMA info for now. */ 763 1.179 ad acpisrat_exit(); 764 1.179 ad #endif 765 1.179 ad 766 1.100 chs kcpuset_create(&cpus, true); 767 1.100 chs kcpuset_set(cpus, cpu_index(curcpu())); 768 1.100 chs for (i = 0; i < maxcpus; i++) { 769 1.57 ad ci = cpu_lookup(i); 770 1.2 ad if (ci == NULL) 771 1.2 ad continue; 772 1.2 ad if (ci->ci_data.cpu_idlelwp == NULL) 773 1.2 ad continue; 774 1.2 ad if ((ci->ci_flags & CPUF_PRESENT) == 0) 775 1.2 ad continue; 776 1.2 ad if (ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY)) 777 1.2 ad continue; 778 1.2 ad cpu_boot_secondary(ci); 779 1.100 chs kcpuset_set(cpus, cpu_index(ci)); 780 1.2 ad } 781 1.100 chs while (!kcpuset_match(cpus, kcpuset_running)) 782 1.100 chs ; 783 1.100 chs kcpuset_destroy(cpus); 784 1.2 ad 785 1.2 ad x86_mp_online = true; 786 1.38 ad 787 1.38 ad /* Now that we know about the TSC, attach the timecounter. */ 788 1.38 ad tsc_tc_init(); 789 1.2 ad } 790 1.101 kiyohara #endif 791 1.2 ad 792 1.2 ad static void 793 1.2 ad cpu_init_idle_lwp(struct cpu_info *ci) 794 1.2 ad { 795 1.2 ad struct lwp *l = ci->ci_data.cpu_idlelwp; 796 1.65 rmind struct pcb *pcb = lwp_getpcb(l); 797 1.2 ad 798 1.2 ad pcb->pcb_cr0 = rcr0(); 799 1.2 ad } 800 1.2 ad 801 1.2 ad void 802 1.12 jmcneill cpu_init_idle_lwps(void) 803 1.2 ad { 804 1.2 ad struct cpu_info *ci; 805 1.2 ad u_long i; 806 1.2 ad 807 1.54 ad for (i = 0; i < maxcpus; i++) { 808 1.57 ad ci = cpu_lookup(i); 809 1.2 ad if (ci == NULL) 810 1.2 ad continue; 811 1.2 ad if (ci->ci_data.cpu_idlelwp == NULL) 812 1.2 ad continue; 813 1.2 ad if ((ci->ci_flags & CPUF_PRESENT) == 0) 814 1.2 ad continue; 815 1.2 ad cpu_init_idle_lwp(ci); 816 1.2 ad } 817 1.2 ad } 818 1.2 ad 819 1.101 kiyohara #ifdef MULTIPROCESSOR 820 1.2 ad void 821 1.12 jmcneill cpu_start_secondary(struct cpu_info *ci) 822 1.2 ad { 823 1.38 ad u_long psl; 824 1.2 ad int i; 825 1.2 ad 826 1.165 cherry #if NLAPIC > 0 827 1.165 cherry paddr_t mp_pdirpa; 828 1.12 jmcneill mp_pdirpa = pmap_init_tmp_pgtbl(mp_trampoline_paddr); 829 1.136 maxv cpu_copy_trampoline(mp_pdirpa); 830 1.165 cherry #endif 831 1.136 maxv 832 1.9 ad atomic_or_32(&ci->ci_flags, CPUF_AP); 833 1.2 ad ci->ci_curlwp = ci->ci_data.cpu_idlelwp; 834 1.45 ad if (CPU_STARTUP(ci, mp_trampoline_paddr) != 0) { 835 1.25 ad return; 836 1.45 ad } 837 1.2 ad 838 1.2 ad /* 839 1.50 ad * Wait for it to become ready. Setting cpu_starting opens the 840 1.50 ad * initial gate and allows the AP to start soft initialization. 841 1.2 ad */ 842 1.50 ad KASSERT(cpu_starting == NULL); 843 1.50 ad cpu_starting = ci; 844 1.26 cegger for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT)) && i > 0; i--) { 845 1.189 bouyer delay_func(10); 846 1.2 ad } 847 1.38 ad 848 1.9 ad if ((ci->ci_flags & CPUF_PRESENT) == 0) { 849 1.26 cegger aprint_error_dev(ci->ci_dev, "failed to become ready\n"); 850 1.2 ad #if defined(MPDEBUG) && defined(DDB) 851 1.2 ad printf("dropping into debugger; continue from here to resume boot\n"); 852 1.2 ad Debugger(); 853 1.2 ad #endif 854 1.38 ad } else { 855 1.38 ad /* 856 1.68 jym * Synchronize time stamp counters. Invalidate cache and do 857 1.150 maxv * twice (in tsc_sync_bp) to minimize possible cache effects. 858 1.150 maxv * Disable interrupts to try and rule out any external 859 1.150 maxv * interference. 860 1.38 ad */ 861 1.38 ad psl = x86_read_psl(); 862 1.38 ad x86_disable_intr(); 863 1.38 ad tsc_sync_bp(ci); 864 1.38 ad x86_write_psl(psl); 865 1.2 ad } 866 1.2 ad 867 1.2 ad CPU_START_CLEANUP(ci); 868 1.45 ad cpu_starting = NULL; 869 1.2 ad } 870 1.2 ad 871 1.2 ad void 872 1.12 jmcneill cpu_boot_secondary(struct cpu_info *ci) 873 1.2 ad { 874 1.38 ad int64_t drift; 875 1.38 ad u_long psl; 876 1.2 ad int i; 877 1.2 ad 878 1.9 ad atomic_or_32(&ci->ci_flags, CPUF_GO); 879 1.26 cegger for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i > 0; i--) { 880 1.189 bouyer delay_func(10); 881 1.2 ad } 882 1.9 ad if ((ci->ci_flags & CPUF_RUNNING) == 0) { 883 1.26 cegger aprint_error_dev(ci->ci_dev, "failed to start\n"); 884 1.2 ad #if defined(MPDEBUG) && defined(DDB) 885 1.2 ad printf("dropping into debugger; continue from here to resume boot\n"); 886 1.2 ad Debugger(); 887 1.2 ad #endif 888 1.38 ad } else { 889 1.38 ad /* Synchronize TSC again, check for drift. */ 890 1.38 ad drift = ci->ci_data.cpu_cc_skew; 891 1.38 ad psl = x86_read_psl(); 892 1.38 ad x86_disable_intr(); 893 1.38 ad tsc_sync_bp(ci); 894 1.38 ad x86_write_psl(psl); 895 1.38 ad drift -= ci->ci_data.cpu_cc_skew; 896 1.38 ad aprint_debug_dev(ci->ci_dev, "TSC skew=%lld drift=%lld\n", 897 1.38 ad (long long)ci->ci_data.cpu_cc_skew, (long long)drift); 898 1.38 ad tsc_sync_drift(drift); 899 1.2 ad } 900 1.2 ad } 901 1.2 ad 902 1.2 ad /* 903 1.117 maxv * The CPU ends up here when it's ready to run. 904 1.2 ad * This is called from code in mptramp.s; at this point, we are running 905 1.2 ad * in the idle pcb/idle stack of the new CPU. When this function returns, 906 1.2 ad * this processor will enter the idle loop and start looking for work. 907 1.2 ad */ 908 1.2 ad void 909 1.2 ad cpu_hatch(void *v) 910 1.2 ad { 911 1.2 ad struct cpu_info *ci = (struct cpu_info *)v; 912 1.65 rmind struct pcb *pcb; 913 1.130 kre int s, i; 914 1.2 ad 915 1.162 maxv /* ------------------------------------------------------------- */ 916 1.162 maxv 917 1.162 maxv /* 918 1.162 maxv * This section of code must be compiled with SSP disabled, to 919 1.162 maxv * prevent a race against cpu0. See sys/conf/ssp.mk. 920 1.162 maxv */ 921 1.162 maxv 922 1.209 riastrad /* 923 1.209 riastrad * Initialize MSRs on this CPU: 924 1.209 riastrad * 925 1.209 riastrad * - On amd64: Enables SYSCALL/SYSRET. 926 1.209 riastrad * 927 1.209 riastrad * - On amd64: Sets up %fs and %gs so that %gs points to the 928 1.209 riastrad * current struct cpu_info as needed for CPUVAR(...), 929 1.209 riastrad * curcpu(), and curlwp. 930 1.209 riastrad * 931 1.209 riastrad * (On i386, CPUVAR(...), curcpu(), and curlwp are made to 932 1.209 riastrad * work first by the conifguration of segment descriptors in 933 1.209 riastrad * the Global Descriptor Table (GDT) in initgdt.) 934 1.209 riastrad * 935 1.209 riastrad * - Enables the no-execute bit if supported. 936 1.209 riastrad * 937 1.209 riastrad * Thus, after this point, CPUVAR(...), curcpu(), and curlwp 938 1.209 riastrad * will work on this CPU. 939 1.209 riastrad * 940 1.209 riastrad * Note: The call to cpu_init_msrs for cpu0 happens in 941 1.209 riastrad * init386/init_x86_64. 942 1.209 riastrad */ 943 1.12 jmcneill cpu_init_msrs(ci, true); 944 1.209 riastrad 945 1.40 ad cpu_probe(ci); 946 1.154 maxv cpu_speculation_init(ci); 947 1.178 nonaka #if NHYPERV > 0 948 1.178 nonaka hyperv_init_cpu(ci); 949 1.178 nonaka #endif 950 1.46 ad 951 1.46 ad ci->ci_data.cpu_cc_freq = cpu_info_primary.ci_data.cpu_cc_freq; 952 1.134 maxv /* cpu_get_tsc_freq(ci); */ 953 1.38 ad 954 1.8 ad KDASSERT((ci->ci_flags & CPUF_PRESENT) == 0); 955 1.38 ad 956 1.38 ad /* 957 1.150 maxv * Synchronize the TSC for the first time. Note that interrupts are 958 1.150 maxv * off at this point. 959 1.38 ad */ 960 1.9 ad atomic_or_32(&ci->ci_flags, CPUF_PRESENT); 961 1.38 ad tsc_sync_ap(ci); 962 1.38 ad 963 1.162 maxv /* ------------------------------------------------------------- */ 964 1.162 maxv 965 1.38 ad /* 966 1.150 maxv * Wait to be brought online. 967 1.150 maxv * 968 1.150 maxv * Use MONITOR/MWAIT if available. These instructions put the CPU in 969 1.150 maxv * a low consumption mode (C-state), and if the TSC is not invariant, 970 1.150 maxv * this causes the TSC to drift. We want this to happen, so that we 971 1.150 maxv * can later detect (in tsc_tc_init) any abnormal drift with invariant 972 1.150 maxv * TSCs. That's just for safety; by definition such drifts should 973 1.150 maxv * never occur with invariant TSCs. 974 1.150 maxv * 975 1.150 maxv * If not available, try PAUSE. We'd like to use HLT, but we have 976 1.150 maxv * interrupts off. 977 1.38 ad */ 978 1.6 ad while ((ci->ci_flags & CPUF_GO) == 0) { 979 1.70 jym if ((cpu_feature[1] & CPUID2_MONITOR) != 0) { 980 1.38 ad x86_monitor(&ci->ci_flags, 0, 0); 981 1.38 ad if ((ci->ci_flags & CPUF_GO) != 0) { 982 1.38 ad continue; 983 1.38 ad } 984 1.38 ad x86_mwait(0, 0); 985 1.38 ad } else { 986 1.131 pgoyette /* 987 1.131 pgoyette * XXX The loop repetition count could be a lot higher, but 988 1.131 pgoyette * XXX currently qemu emulator takes a _very_long_time_ to 989 1.131 pgoyette * XXX execute the pause instruction. So for now, use a low 990 1.131 pgoyette * XXX value to allow the cpu to hatch before timing out. 991 1.131 pgoyette */ 992 1.131 pgoyette for (i = 50; i != 0; i--) { 993 1.127 pgoyette x86_pause(); 994 1.127 pgoyette } 995 1.38 ad } 996 1.6 ad } 997 1.5 ad 998 1.26 cegger /* Because the text may have been patched in x86_patch(). */ 999 1.5 ad wbinvd(); 1000 1.5 ad x86_flush(); 1001 1.88 rmind tlbflushg(); 1002 1.5 ad 1003 1.8 ad KASSERT((ci->ci_flags & CPUF_RUNNING) == 0); 1004 1.2 ad 1005 1.73 jym #ifdef PAE 1006 1.73 jym pd_entry_t * l3_pd = ci->ci_pae_l3_pdir; 1007 1.73 jym for (i = 0 ; i < PDP_SIZE; i++) { 1008 1.168 maxv l3_pd[i] = pmap_kernel()->pm_pdirpa[i] | PTE_P; 1009 1.73 jym } 1010 1.73 jym lcr3(ci->ci_pae_l3_pdirpa); 1011 1.73 jym #else 1012 1.73 jym lcr3(pmap_pdirpa(pmap_kernel(), 0)); 1013 1.73 jym #endif 1014 1.73 jym 1015 1.65 rmind pcb = lwp_getpcb(curlwp); 1016 1.73 jym pcb->pcb_cr3 = rcr3(); 1017 1.65 rmind pcb = lwp_getpcb(ci->ci_data.cpu_idlelwp); 1018 1.65 rmind lcr0(pcb->pcb_cr0); 1019 1.65 rmind 1020 1.195 yamaguch cpu_init_idt(ci); 1021 1.8 ad gdt_init_cpu(ci); 1022 1.111 joerg #if NLAPIC > 0 1023 1.8 ad lapic_enable(); 1024 1.2 ad lapic_set_lvt(); 1025 1.111 joerg #endif 1026 1.2 ad 1027 1.2 ad fpuinit(ci); 1028 1.2 ad lldt(GSYSSEL(GLDT_SEL, SEL_KPL)); 1029 1.15 yamt ltr(ci->ci_tss_sel); 1030 1.2 ad 1031 1.150 maxv /* 1032 1.150 maxv * cpu_init will re-synchronize the TSC, and will detect any abnormal 1033 1.150 maxv * drift that would have been caused by the use of MONITOR/MWAIT 1034 1.150 maxv * above. 1035 1.150 maxv */ 1036 1.2 ad cpu_init(ci); 1037 1.187 bouyer #ifdef XENPVHVM 1038 1.187 bouyer xen_hvm_init_cpu(ci); 1039 1.187 bouyer #endif 1040 1.192 ad (*x86_initclock_func)(); 1041 1.7 ad cpu_get_tsc_freq(ci); 1042 1.2 ad 1043 1.2 ad s = splhigh(); 1044 1.165 cherry #if NLAPIC > 0 1045 1.124 nonaka lapic_write_tpri(0); 1046 1.165 cherry #endif 1047 1.3 ad x86_enable_intr(); 1048 1.2 ad splx(s); 1049 1.6 ad x86_errata(); 1050 1.2 ad 1051 1.42 ad aprint_debug_dev(ci->ci_dev, "running\n"); 1052 1.98 rmind 1053 1.174 maxv kcsan_cpu_init(ci); 1054 1.174 maxv 1055 1.98 rmind idle_loop(NULL); 1056 1.98 rmind KASSERT(false); 1057 1.2 ad } 1058 1.101 kiyohara #endif 1059 1.2 ad 1060 1.2 ad #if defined(DDB) 1061 1.2 ad 1062 1.2 ad #include <ddb/db_output.h> 1063 1.2 ad #include <machine/db_machdep.h> 1064 1.2 ad 1065 1.2 ad /* 1066 1.2 ad * Dump CPU information from ddb. 1067 1.2 ad */ 1068 1.2 ad void 1069 1.2 ad cpu_debug_dump(void) 1070 1.2 ad { 1071 1.2 ad struct cpu_info *ci; 1072 1.2 ad CPU_INFO_ITERATOR cii; 1073 1.184 msaitoh const char sixtyfour64space[] = 1074 1.172 mrg #ifdef _LP64 1075 1.172 mrg " " 1076 1.172 mrg #endif 1077 1.172 mrg ""; 1078 1.2 ad 1079 1.180 ad db_printf("addr %sdev id flags ipis spl curlwp " 1080 1.173 maxv "\n", sixtyfour64space); 1081 1.2 ad for (CPU_INFO_FOREACH(cii, ci)) { 1082 1.180 ad db_printf("%p %s %ld %x %x %d %10p\n", 1083 1.2 ad ci, 1084 1.27 cegger ci->ci_dev == NULL ? "BOOT" : device_xname(ci->ci_dev), 1085 1.2 ad (long)ci->ci_cpuid, 1086 1.180 ad ci->ci_flags, ci->ci_ipis, ci->ci_ilevel, 1087 1.173 maxv ci->ci_curlwp); 1088 1.2 ad } 1089 1.2 ad } 1090 1.2 ad #endif 1091 1.2 ad 1092 1.164 cherry #ifdef MULTIPROCESSOR 1093 1.101 kiyohara #if NLAPIC > 0 1094 1.2 ad static void 1095 1.136 maxv cpu_copy_trampoline(paddr_t pdir_pa) 1096 1.2 ad { 1097 1.136 maxv extern uint32_t nox_flag; 1098 1.2 ad extern u_char cpu_spinup_trampoline[]; 1099 1.2 ad extern u_char cpu_spinup_trampoline_end[]; 1100 1.12 jmcneill vaddr_t mp_trampoline_vaddr; 1101 1.136 maxv struct { 1102 1.136 maxv uint32_t large; 1103 1.136 maxv uint32_t nox; 1104 1.136 maxv uint32_t pdir; 1105 1.136 maxv } smp_data; 1106 1.136 maxv CTASSERT(sizeof(smp_data) == 3 * 4); 1107 1.136 maxv 1108 1.136 maxv smp_data.large = (pmap_largepages != 0); 1109 1.136 maxv smp_data.nox = nox_flag; 1110 1.136 maxv smp_data.pdir = (uint32_t)(pdir_pa & 0xFFFFFFFF); 1111 1.12 jmcneill 1112 1.136 maxv /* Enter the physical address */ 1113 1.12 jmcneill mp_trampoline_vaddr = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 1114 1.12 jmcneill UVM_KMF_VAONLY); 1115 1.12 jmcneill pmap_kenter_pa(mp_trampoline_vaddr, mp_trampoline_paddr, 1116 1.64 cegger VM_PROT_READ | VM_PROT_WRITE, 0); 1117 1.2 ad pmap_update(pmap_kernel()); 1118 1.136 maxv 1119 1.136 maxv /* Copy boot code */ 1120 1.12 jmcneill memcpy((void *)mp_trampoline_vaddr, 1121 1.2 ad cpu_spinup_trampoline, 1122 1.26 cegger cpu_spinup_trampoline_end - cpu_spinup_trampoline); 1123 1.12 jmcneill 1124 1.136 maxv /* Copy smp_data at the end */ 1125 1.136 maxv memcpy((void *)(mp_trampoline_vaddr + PAGE_SIZE - sizeof(smp_data)), 1126 1.136 maxv &smp_data, sizeof(smp_data)); 1127 1.136 maxv 1128 1.12 jmcneill pmap_kremove(mp_trampoline_vaddr, PAGE_SIZE); 1129 1.12 jmcneill pmap_update(pmap_kernel()); 1130 1.12 jmcneill uvm_km_free(kernel_map, mp_trampoline_vaddr, PAGE_SIZE, UVM_KMF_VAONLY); 1131 1.2 ad } 1132 1.101 kiyohara #endif 1133 1.2 ad 1134 1.2 ad int 1135 1.14 joerg mp_cpu_start(struct cpu_info *ci, paddr_t target) 1136 1.2 ad { 1137 1.210 andvar #if NLAPIC > 0 1138 1.2 ad int error; 1139 1.14 joerg 1140 1.14 joerg /* 1141 1.14 joerg * Bootstrap code must be addressable in real mode 1142 1.14 joerg * and it must be page aligned. 1143 1.14 joerg */ 1144 1.14 joerg KASSERT(target < 0x10000 && target % PAGE_SIZE == 0); 1145 1.2 ad 1146 1.2 ad /* 1147 1.2 ad * "The BSP must initialize CMOS shutdown code to 0Ah ..." 1148 1.2 ad */ 1149 1.2 ad 1150 1.2 ad outb(IO_RTC, NVRAM_RESET); 1151 1.2 ad outb(IO_RTC+1, NVRAM_RESET_JUMP); 1152 1.2 ad 1153 1.2 ad /* 1154 1.2 ad * "and the warm reset vector (DWORD based at 40:67) to point 1155 1.2 ad * to the AP startup code ..." 1156 1.2 ad */ 1157 1.165 cherry unsigned short dwordptr[2]; 1158 1.2 ad dwordptr[0] = 0; 1159 1.14 joerg dwordptr[1] = target >> 4; 1160 1.2 ad 1161 1.25 ad memcpy((uint8_t *)cmos_data_mapping + 0x467, dwordptr, 4); 1162 1.2 ad 1163 1.70 jym if ((cpu_feature[0] & CPUID_APIC) == 0) { 1164 1.25 ad aprint_error("mp_cpu_start: CPU does not have APIC\n"); 1165 1.25 ad return ENODEV; 1166 1.25 ad } 1167 1.25 ad 1168 1.2 ad /* 1169 1.51 ad * ... prior to executing the following sequence:". We'll also add in 1170 1.51 ad * local cache flush, in case the BIOS has left the AP with its cache 1171 1.51 ad * disabled. It may not be able to cope with MP coherency. 1172 1.2 ad */ 1173 1.51 ad wbinvd(); 1174 1.2 ad 1175 1.2 ad if (ci->ci_flags & CPUF_AP) { 1176 1.42 ad error = x86_ipi_init(ci->ci_cpuid); 1177 1.26 cegger if (error != 0) { 1178 1.26 cegger aprint_error_dev(ci->ci_dev, "%s: IPI not taken (1)\n", 1179 1.50 ad __func__); 1180 1.2 ad return error; 1181 1.25 ad } 1182 1.189 bouyer delay_func(10000); 1183 1.2 ad 1184 1.50 ad error = x86_ipi_startup(ci->ci_cpuid, target / PAGE_SIZE); 1185 1.26 cegger if (error != 0) { 1186 1.26 cegger aprint_error_dev(ci->ci_dev, "%s: IPI not taken (2)\n", 1187 1.50 ad __func__); 1188 1.25 ad return error; 1189 1.25 ad } 1190 1.189 bouyer delay_func(200); 1191 1.2 ad 1192 1.50 ad error = x86_ipi_startup(ci->ci_cpuid, target / PAGE_SIZE); 1193 1.26 cegger if (error != 0) { 1194 1.26 cegger aprint_error_dev(ci->ci_dev, "%s: IPI not taken (3)\n", 1195 1.50 ad __func__); 1196 1.25 ad return error; 1197 1.2 ad } 1198 1.189 bouyer delay_func(200); 1199 1.2 ad } 1200 1.44 ad 1201 1.2 ad return 0; 1202 1.210 andvar #else 1203 1.210 andvar return ENODEV; 1204 1.210 andvar #endif /* NLAPIC > 0 */ 1205 1.2 ad } 1206 1.2 ad 1207 1.2 ad void 1208 1.2 ad mp_cpu_start_cleanup(struct cpu_info *ci) 1209 1.2 ad { 1210 1.2 ad /* 1211 1.2 ad * Ensure the NVRAM reset byte contains something vaguely sane. 1212 1.2 ad */ 1213 1.2 ad 1214 1.2 ad outb(IO_RTC, NVRAM_RESET); 1215 1.2 ad outb(IO_RTC+1, NVRAM_RESET_RST); 1216 1.2 ad } 1217 1.101 kiyohara #endif 1218 1.2 ad 1219 1.2 ad #ifdef __x86_64__ 1220 1.2 ad typedef void (vector)(void); 1221 1.148 maxv extern vector Xsyscall, Xsyscall32, Xsyscall_svs; 1222 1.70 jym #endif 1223 1.2 ad 1224 1.209 riastrad /* 1225 1.209 riastrad * cpu_init_msrs(ci, full) 1226 1.209 riastrad * 1227 1.209 riastrad * Initialize some Model-Specific Registers (MSRs) on the current 1228 1.209 riastrad * CPU, whose struct cpu_info pointer is ci, for: 1229 1.209 riastrad * 1230 1.209 riastrad * - SYSCALL/SYSRET. 1231 1.209 riastrad * - %fs/%gs on amd64 if `full' is true; needed to make 1232 1.209 riastrad * CPUVAR(...), curcpu(), and curlwp work. (We do this at boot, 1233 1.209 riastrad * but skip it on ACPI wakeup.) 1234 1.209 riastrad * - No-execute bit, if supported. 1235 1.209 riastrad * 1236 1.209 riastrad * References: 1237 1.209 riastrad * 1238 1.209 riastrad * - Intel 64 and IA-32 Architectures Software Developer's Manual, 1239 1.209 riastrad * Volume 3: System Programming Guide, Order Number 325384, 1240 1.209 riastrad * April 2022, Sec. 5.8.8 `Fast System Calls in 64-Bit Mode', 1241 1.209 riastrad * pp. 5-22 through 5-23. 1242 1.209 riastrad * 1243 1.209 riastrad * - Intel 64 and IA-32 Architectures Software Developer's Manual, 1244 1.209 riastrad * Volume 4: Model-Specific Registers, Order Number 335592, 1245 1.209 riastrad * April 2022, Sec. 2.1 `Architectural MSRs', Table 2-2, 1246 1.209 riastrad * pp. 2-60 through 2-61. 1247 1.209 riastrad */ 1248 1.2 ad void 1249 1.12 jmcneill cpu_init_msrs(struct cpu_info *ci, bool full) 1250 1.2 ad { 1251 1.70 jym #ifdef __x86_64__ 1252 1.209 riastrad /* 1253 1.209 riastrad * On amd64, set up the syscall target address registers 1254 1.209 riastrad * for SYSCALL/SYSRET: 1255 1.209 riastrad * 1256 1.209 riastrad * - IA32_STAR, c000_0081h (MSR_STAR): System Call Target 1257 1.209 riastrad * Address. Code and stack segment selectors for SYSRET 1258 1.209 riastrad * (bits 48:63) and SYSCALL (bits 32:47). 1259 1.209 riastrad * 1260 1.209 riastrad * - IA32_LSTAR, c000_0082h (MSR_LSTAR): IA-32e Mode System 1261 1.209 riastrad * Call Target Address. Target rip for SYSCALL when executed 1262 1.209 riastrad * in 64-bit mode. 1263 1.209 riastrad * 1264 1.209 riastrad * - IA32_CSTAR, c000_0083h (MSR_CSTAR): IA-32e Mode System 1265 1.209 riastrad * Call Target Address. Target rip for SYSCALL when executed 1266 1.209 riastrad * in compatibility mode. (XXX Manual says this is `[n]ot 1267 1.209 riastrad * used, as the SYSCALL instruction is not recognized in 1268 1.209 riastrad * compatibility mode', so why do we set it?) 1269 1.209 riastrad * 1270 1.209 riastrad * - IA32_FMASK, c000_0084h (MSR_SFMASK): System Call Flag 1271 1.209 riastrad * Mask. Mask for the RFLAGS register on SYSCALL. 1272 1.209 riastrad */ 1273 1.2 ad wrmsr(MSR_STAR, 1274 1.2 ad ((uint64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | 1275 1.2 ad ((uint64_t)LSEL(LSYSRETBASE_SEL, SEL_UPL) << 48)); 1276 1.2 ad wrmsr(MSR_LSTAR, (uint64_t)Xsyscall); 1277 1.2 ad wrmsr(MSR_CSTAR, (uint64_t)Xsyscall32); 1278 1.138 maxv wrmsr(MSR_SFMASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D|PSL_AC); 1279 1.2 ad 1280 1.148 maxv #ifdef SVS 1281 1.148 maxv if (svs_enabled) 1282 1.148 maxv wrmsr(MSR_LSTAR, (uint64_t)Xsyscall_svs); 1283 1.148 maxv #endif 1284 1.148 maxv 1285 1.209 riastrad /* 1286 1.209 riastrad * On amd64 if `full' is true -- used at boot, but not on ACPI 1287 1.209 riastrad * wakeup -- then additionally set up %fs and %gs: 1288 1.209 riastrad * 1289 1.209 riastrad * - IA32_FS_BASE, c000_0100h (MSR_FSBASE): Base address of 1290 1.209 riastrad * %fs. Not used in NetBSD kernel, so zero it. 1291 1.209 riastrad * 1292 1.209 riastrad * - IA32_GS_BASE, c000_0101h (MSR_GSBASE): Base address of 1293 1.209 riastrad * %gs. Used in NetBSD kernel by CPUVAR(...), curcpu(), and 1294 1.209 riastrad * curlwp for access to the CPU-local area, so set it to ci. 1295 1.209 riastrad * 1296 1.209 riastrad * - IA32_KERNEL_GS_BASE, c000_0102h (MSR_KERNELGSBASE): Base 1297 1.209 riastrad * address of what swapgs will leave in %gs when switching to 1298 1.209 riastrad * userland. Zero for now; will be set to pcb->pcb_gs in 1299 1.209 riastrad * cpu_switchto for user threads. 1300 1.209 riastrad */ 1301 1.12 jmcneill if (full) { 1302 1.12 jmcneill wrmsr(MSR_FSBASE, 0); 1303 1.27 cegger wrmsr(MSR_GSBASE, (uint64_t)ci); 1304 1.12 jmcneill wrmsr(MSR_KERNELGSBASE, 0); 1305 1.12 jmcneill } 1306 1.70 jym #endif /* __x86_64__ */ 1307 1.2 ad 1308 1.209 riastrad /* 1309 1.209 riastrad * If the no-execute bit is supported, enable it in: 1310 1.209 riastrad * 1311 1.209 riastrad * - IA32_EFER, c000_0080h (MSR_EFER): Extended Feature 1312 1.209 riastrad * Enables. 1313 1.209 riastrad */ 1314 1.70 jym if (cpu_feature[2] & CPUID_NOX) 1315 1.2 ad wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE); 1316 1.2 ad } 1317 1.7 ad 1318 1.107 christos void 1319 1.107 christos cpu_offline_md(void) 1320 1.107 christos { 1321 1.173 maxv return; 1322 1.107 christos } 1323 1.107 christos 1324 1.12 jmcneill /* XXX joerg restructure and restart CPUs individually */ 1325 1.12 jmcneill static bool 1326 1.96 jruoho cpu_stop(device_t dv) 1327 1.12 jmcneill { 1328 1.12 jmcneill struct cpu_softc *sc = device_private(dv); 1329 1.12 jmcneill struct cpu_info *ci = sc->sc_info; 1330 1.18 joerg int err; 1331 1.12 jmcneill 1332 1.96 jruoho KASSERT((ci->ci_flags & CPUF_PRESENT) != 0); 1333 1.93 jruoho 1334 1.196 fcambus if (CPU_IS_PRIMARY(ci)) 1335 1.93 jruoho return true; 1336 1.93 jruoho 1337 1.12 jmcneill if (ci->ci_data.cpu_idlelwp == NULL) 1338 1.12 jmcneill return true; 1339 1.12 jmcneill 1340 1.20 jmcneill sc->sc_wasonline = !(ci->ci_schedstate.spc_flags & SPCF_OFFLINE); 1341 1.17 joerg 1342 1.20 jmcneill if (sc->sc_wasonline) { 1343 1.20 jmcneill mutex_enter(&cpu_lock); 1344 1.58 rmind err = cpu_setstate(ci, false); 1345 1.20 jmcneill mutex_exit(&cpu_lock); 1346 1.79 jruoho 1347 1.93 jruoho if (err != 0) 1348 1.20 jmcneill return false; 1349 1.20 jmcneill } 1350 1.17 joerg 1351 1.17 joerg return true; 1352 1.12 jmcneill } 1353 1.12 jmcneill 1354 1.12 jmcneill static bool 1355 1.96 jruoho cpu_suspend(device_t dv, const pmf_qual_t *qual) 1356 1.96 jruoho { 1357 1.96 jruoho struct cpu_softc *sc = device_private(dv); 1358 1.96 jruoho struct cpu_info *ci = sc->sc_info; 1359 1.96 jruoho 1360 1.96 jruoho if ((ci->ci_flags & CPUF_PRESENT) == 0) 1361 1.96 jruoho return true; 1362 1.96 jruoho else { 1363 1.96 jruoho cpufreq_suspend(ci); 1364 1.96 jruoho } 1365 1.96 jruoho 1366 1.96 jruoho return cpu_stop(dv); 1367 1.96 jruoho } 1368 1.96 jruoho 1369 1.96 jruoho static bool 1370 1.69 dyoung cpu_resume(device_t dv, const pmf_qual_t *qual) 1371 1.12 jmcneill { 1372 1.12 jmcneill struct cpu_softc *sc = device_private(dv); 1373 1.12 jmcneill struct cpu_info *ci = sc->sc_info; 1374 1.20 jmcneill int err = 0; 1375 1.12 jmcneill 1376 1.93 jruoho if ((ci->ci_flags & CPUF_PRESENT) == 0) 1377 1.12 jmcneill return true; 1378 1.93 jruoho 1379 1.196 fcambus if (CPU_IS_PRIMARY(ci)) 1380 1.93 jruoho goto out; 1381 1.93 jruoho 1382 1.12 jmcneill if (ci->ci_data.cpu_idlelwp == NULL) 1383 1.93 jruoho goto out; 1384 1.12 jmcneill 1385 1.20 jmcneill if (sc->sc_wasonline) { 1386 1.20 jmcneill mutex_enter(&cpu_lock); 1387 1.58 rmind err = cpu_setstate(ci, true); 1388 1.20 jmcneill mutex_exit(&cpu_lock); 1389 1.20 jmcneill } 1390 1.13 joerg 1391 1.93 jruoho out: 1392 1.93 jruoho if (err != 0) 1393 1.93 jruoho return false; 1394 1.93 jruoho 1395 1.93 jruoho cpufreq_resume(ci); 1396 1.93 jruoho 1397 1.93 jruoho return true; 1398 1.12 jmcneill } 1399 1.12 jmcneill 1400 1.79 jruoho static bool 1401 1.79 jruoho cpu_shutdown(device_t dv, int how) 1402 1.79 jruoho { 1403 1.90 dyoung struct cpu_softc *sc = device_private(dv); 1404 1.90 dyoung struct cpu_info *ci = sc->sc_info; 1405 1.90 dyoung 1406 1.96 jruoho if ((ci->ci_flags & CPUF_BSP) != 0) 1407 1.90 dyoung return false; 1408 1.90 dyoung 1409 1.96 jruoho if ((ci->ci_flags & CPUF_PRESENT) == 0) 1410 1.96 jruoho return true; 1411 1.96 jruoho 1412 1.96 jruoho return cpu_stop(dv); 1413 1.79 jruoho } 1414 1.79 jruoho 1415 1.185 msaitoh /* Get the TSC frequency and set it to ci->ci_data.cpu_cc_freq. */ 1416 1.7 ad void 1417 1.7 ad cpu_get_tsc_freq(struct cpu_info *ci) 1418 1.7 ad { 1419 1.214 imil static uint64_t freq_from_cpuid = 0; 1420 1.214 imil uint64_t freq = 0, t0, t1; 1421 1.190 ad int64_t overhead; 1422 1.7 ad 1423 1.196 fcambus if (CPU_IS_PRIMARY(ci) && cpu_hascounter()) { 1424 1.191 msaitoh /* 1425 1.191 msaitoh * If it's the first call of this function, try to get TSC 1426 1.191 msaitoh * freq from CPUID by calling cpu_tsc_freq_cpuid(). 1427 1.191 msaitoh * The function also set lapic_per_second variable if it's 1428 1.191 msaitoh * known. This is required for Intel's Comet Lake and newer 1429 1.191 msaitoh * processors to set LAPIC timer correctly. 1430 1.214 imil * 1431 1.214 imil * If TSC freq is already known by CPUID, don't go through 1432 1.214 imil * tests again. 1433 1.191 msaitoh */ 1434 1.214 imil if (freq_from_cpuid != 0) 1435 1.214 imil return; 1436 1.214 imil 1437 1.191 msaitoh if (ci->ci_data.cpu_cc_freq == 0) 1438 1.191 msaitoh freq = freq_from_cpuid = cpu_tsc_freq_cpuid(ci); 1439 1.204 mlelstv if (freq != 0) 1440 1.204 mlelstv aprint_debug_dev(ci->ci_dev, "TSC freq " 1441 1.204 mlelstv "from CPUID %" PRIu64 " Hz\n", freq); 1442 1.190 ad #if NHPET > 0 1443 1.204 mlelstv if (freq == 0) { 1444 1.190 ad freq = hpet_tsc_freq(); 1445 1.204 mlelstv if (freq != 0) 1446 1.204 mlelstv aprint_debug_dev(ci->ci_dev, "TSC freq " 1447 1.204 mlelstv "from HPET %" PRIu64 " Hz\n", freq); 1448 1.204 mlelstv } 1449 1.190 ad #endif 1450 1.190 ad if (freq == 0) { 1451 1.190 ad /* 1452 1.202 msaitoh * Work out the approximate overhead involved below. 1453 1.190 ad * Discard the result of the first go around the 1454 1.190 ad * loop. 1455 1.190 ad */ 1456 1.190 ad overhead = 0; 1457 1.190 ad for (int i = 0; i <= 8; i++) { 1458 1.204 mlelstv const int s = splhigh(); 1459 1.190 ad t0 = cpu_counter(); 1460 1.192 ad delay_func(0); 1461 1.190 ad t1 = cpu_counter(); 1462 1.204 mlelstv splx(s); 1463 1.190 ad if (i > 0) { 1464 1.190 ad overhead += (t1 - t0); 1465 1.190 ad } 1466 1.190 ad } 1467 1.190 ad overhead >>= 3; 1468 1.185 msaitoh 1469 1.204 mlelstv /* 1470 1.204 mlelstv * Now do the calibration. 1471 1.204 mlelstv */ 1472 1.204 mlelstv freq = 0; 1473 1.204 mlelstv for (int i = 0; i < 1000; i++) { 1474 1.204 mlelstv const int s = splhigh(); 1475 1.204 mlelstv t0 = cpu_counter(); 1476 1.204 mlelstv delay_func(100); 1477 1.204 mlelstv t1 = cpu_counter(); 1478 1.204 mlelstv splx(s); 1479 1.204 mlelstv freq += t1 - t0 - overhead; 1480 1.204 mlelstv } 1481 1.204 mlelstv freq = freq * 10; 1482 1.204 mlelstv 1483 1.204 mlelstv aprint_debug_dev(ci->ci_dev, "TSC freq " 1484 1.204 mlelstv "from delay %" PRIu64 " Hz\n", freq); 1485 1.190 ad } 1486 1.191 msaitoh if (ci->ci_data.cpu_cc_freq != 0) { 1487 1.191 msaitoh freq_from_cpuid = cpu_tsc_freq_cpuid(ci); 1488 1.191 msaitoh if ((freq_from_cpuid != 0) 1489 1.191 msaitoh && (freq != freq_from_cpuid)) 1490 1.191 msaitoh aprint_verbose_dev(ci->ci_dev, "TSC freq " 1491 1.191 msaitoh "calibrated %" PRIu64 " Hz\n", freq); 1492 1.191 msaitoh } 1493 1.185 msaitoh } else { 1494 1.190 ad freq = cpu_info_primary.ci_data.cpu_cc_freq; 1495 1.7 ad } 1496 1.190 ad 1497 1.190 ad ci->ci_data.cpu_cc_freq = freq; 1498 1.7 ad } 1499 1.37 joerg 1500 1.213 imil bool 1501 1.213 imil has_lapic(void) 1502 1.213 imil { 1503 1.213 imil #if NLAPIC > 0 1504 1.213 imil return true; 1505 1.213 imil #else 1506 1.213 imil return false; 1507 1.213 imil #endif 1508 1.213 imil } 1509 1.213 imil 1510 1.37 joerg void 1511 1.37 joerg x86_cpu_idle_mwait(void) 1512 1.37 joerg { 1513 1.37 joerg struct cpu_info *ci = curcpu(); 1514 1.37 joerg 1515 1.37 joerg KASSERT(ci->ci_ilevel == IPL_NONE); 1516 1.37 joerg 1517 1.37 joerg x86_monitor(&ci->ci_want_resched, 0, 0); 1518 1.37 joerg if (__predict_false(ci->ci_want_resched)) { 1519 1.37 joerg return; 1520 1.37 joerg } 1521 1.37 joerg x86_mwait(0, 0); 1522 1.37 joerg } 1523 1.37 joerg 1524 1.37 joerg void 1525 1.37 joerg x86_cpu_idle_halt(void) 1526 1.37 joerg { 1527 1.37 joerg struct cpu_info *ci = curcpu(); 1528 1.37 joerg 1529 1.37 joerg KASSERT(ci->ci_ilevel == IPL_NONE); 1530 1.37 joerg 1531 1.37 joerg x86_disable_intr(); 1532 1.37 joerg if (!__predict_false(ci->ci_want_resched)) { 1533 1.37 joerg x86_stihlt(); 1534 1.37 joerg } else { 1535 1.37 joerg x86_enable_intr(); 1536 1.37 joerg } 1537 1.37 joerg } 1538 1.73 jym 1539 1.73 jym /* 1540 1.73 jym * Loads pmap for the current CPU. 1541 1.73 jym */ 1542 1.73 jym void 1543 1.97 bouyer cpu_load_pmap(struct pmap *pmap, struct pmap *oldpmap) 1544 1.73 jym { 1545 1.207 riastrad 1546 1.207 riastrad KASSERT(kpreempt_disabled()); 1547 1.207 riastrad 1548 1.144 maxv #ifdef SVS 1549 1.206 riastrad if (svs_enabled && pmap_is_user(pmap)) { 1550 1.159 maxv svs_pdir_switch(pmap); 1551 1.159 maxv } 1552 1.144 maxv #endif 1553 1.144 maxv 1554 1.73 jym #ifdef PAE 1555 1.99 yamt struct cpu_info *ci = curcpu(); 1556 1.116 nat bool interrupts_enabled; 1557 1.99 yamt pd_entry_t *l3_pd = ci->ci_pae_l3_pdir; 1558 1.99 yamt int i; 1559 1.73 jym 1560 1.99 yamt /* 1561 1.99 yamt * disable interrupts to block TLB shootdowns, which can reload cr3. 1562 1.99 yamt * while this doesn't block NMIs, it's probably ok as NMIs unlikely 1563 1.99 yamt * reload cr3. 1564 1.99 yamt */ 1565 1.116 nat interrupts_enabled = (x86_read_flags() & PSL_I) != 0; 1566 1.116 nat if (interrupts_enabled) 1567 1.116 nat x86_disable_intr(); 1568 1.116 nat 1569 1.73 jym for (i = 0 ; i < PDP_SIZE; i++) { 1570 1.168 maxv l3_pd[i] = pmap->pm_pdirpa[i] | PTE_P; 1571 1.73 jym } 1572 1.134 maxv 1573 1.116 nat if (interrupts_enabled) 1574 1.116 nat x86_enable_intr(); 1575 1.73 jym tlbflush(); 1576 1.160 maxv #else 1577 1.73 jym lcr3(pmap_pdirpa(pmap, 0)); 1578 1.160 maxv #endif 1579 1.73 jym } 1580 1.91 cherry 1581 1.91 cherry /* 1582 1.91 cherry * Notify all other cpus to halt. 1583 1.91 cherry */ 1584 1.91 cherry 1585 1.91 cherry void 1586 1.92 cherry cpu_broadcast_halt(void) 1587 1.91 cherry { 1588 1.91 cherry x86_broadcast_ipi(X86_IPI_HALT); 1589 1.91 cherry } 1590 1.91 cherry 1591 1.91 cherry /* 1592 1.176 ad * Send a dummy ipi to a cpu to force it to run splraise()/spllower(), 1593 1.176 ad * and trigger an AST on the running LWP. 1594 1.91 cherry */ 1595 1.91 cherry 1596 1.91 cherry void 1597 1.91 cherry cpu_kick(struct cpu_info *ci) 1598 1.91 cherry { 1599 1.176 ad x86_send_ipi(ci, X86_IPI_AST); 1600 1.91 cherry } 1601