1 1.145 riastrad /* $NetBSD: cpu.c,v 1.145 2023/02/25 00:35:01 riastradh Exp $ */ 2 1.2 bouyer 3 1.2 bouyer /*- 4 1.2 bouyer * Copyright (c) 2000 The NetBSD Foundation, Inc. 5 1.19 joerg * Copyright (c) 2002, 2006, 2007 YAMAMOTO Takashi, 6 1.2 bouyer * All rights reserved. 7 1.2 bouyer * 8 1.2 bouyer * This code is derived from software contributed to The NetBSD Foundation 9 1.2 bouyer * by RedBack Networks Inc. 10 1.2 bouyer * 11 1.2 bouyer * Author: Bill Sommerfeld 12 1.2 bouyer * 13 1.2 bouyer * Redistribution and use in source and binary forms, with or without 14 1.2 bouyer * modification, are permitted provided that the following conditions 15 1.2 bouyer * are met: 16 1.2 bouyer * 1. Redistributions of source code must retain the above copyright 17 1.2 bouyer * notice, this list of conditions and the following disclaimer. 18 1.2 bouyer * 2. Redistributions in binary form must reproduce the above copyright 19 1.2 bouyer * notice, this list of conditions and the following disclaimer in the 20 1.2 bouyer * documentation and/or other materials provided with the distribution. 21 1.2 bouyer * 22 1.2 bouyer * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 1.2 bouyer * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 1.2 bouyer * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 1.2 bouyer * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 1.2 bouyer * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 1.2 bouyer * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 1.2 bouyer * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 1.2 bouyer * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 1.2 bouyer * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 1.2 bouyer * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 1.2 bouyer * POSSIBILITY OF SUCH DAMAGE. 33 1.2 bouyer */ 34 1.2 bouyer 35 1.2 bouyer /* 36 1.2 bouyer * Copyright (c) 1999 Stefan Grefen 37 1.2 bouyer * 38 1.2 bouyer * Redistribution and use in source and binary forms, with or without 39 1.2 bouyer * modification, are permitted provided that the following conditions 40 1.2 bouyer * are met: 41 1.2 bouyer * 1. Redistributions of source code must retain the above copyright 42 1.2 bouyer * notice, this list of conditions and the following disclaimer. 43 1.2 bouyer * 2. Redistributions in binary form must reproduce the above copyright 44 1.2 bouyer * notice, this list of conditions and the following disclaimer in the 45 1.2 bouyer * documentation and/or other materials provided with the distribution. 46 1.2 bouyer * 3. All advertising materials mentioning features or use of this software 47 1.2 bouyer * must display the following acknowledgement: 48 1.2 bouyer * This product includes software developed by the NetBSD 49 1.2 bouyer * Foundation, Inc. and its contributors. 50 1.2 bouyer * 4. Neither the name of The NetBSD Foundation nor the names of its 51 1.2 bouyer * contributors may be used to endorse or promote products derived 52 1.2 bouyer * from this software without specific prior written permission. 53 1.2 bouyer * 54 1.2 bouyer * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY 55 1.2 bouyer * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 1.2 bouyer * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 1.2 bouyer * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE 58 1.2 bouyer * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 1.2 bouyer * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 1.2 bouyer * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 1.2 bouyer * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 1.2 bouyer * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 1.2 bouyer * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 1.2 bouyer * SUCH DAMAGE. 65 1.2 bouyer */ 66 1.2 bouyer 67 1.2 bouyer #include <sys/cdefs.h> 68 1.145 riastrad __KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.145 2023/02/25 00:35:01 riastradh Exp $"); 69 1.2 bouyer 70 1.2 bouyer #include "opt_ddb.h" 71 1.2 bouyer #include "opt_multiprocessor.h" 72 1.2 bouyer #include "opt_mpbios.h" /* for MPDEBUG */ 73 1.2 bouyer #include "opt_mtrr.h" 74 1.2 bouyer #include "opt_xen.h" 75 1.2 bouyer 76 1.2 bouyer #include "lapic.h" 77 1.2 bouyer #include "ioapic.h" 78 1.2 bouyer 79 1.2 bouyer #include <sys/param.h> 80 1.2 bouyer #include <sys/proc.h> 81 1.2 bouyer #include <sys/systm.h> 82 1.2 bouyer #include <sys/device.h> 83 1.31 cegger #include <sys/kmem.h> 84 1.11 cegger #include <sys/cpu.h> 85 1.66 jruoho #include <sys/cpufreq.h> 86 1.11 cegger #include <sys/atomic.h> 87 1.32 cegger #include <sys/reboot.h> 88 1.62 cherry #include <sys/idle.h> 89 1.2 bouyer 90 1.51 uebayasi #include <uvm/uvm.h> 91 1.2 bouyer 92 1.114 riastrad #include <machine/cpu.h> 93 1.2 bouyer #include <machine/cpufunc.h> 94 1.2 bouyer #include <machine/cpuvar.h> 95 1.2 bouyer #include <machine/pmap.h> 96 1.142 riastrad #include <machine/pmap_private.h> 97 1.2 bouyer #include <machine/vmparam.h> 98 1.2 bouyer #include <machine/mpbiosvar.h> 99 1.2 bouyer #include <machine/pcb.h> 100 1.2 bouyer #include <machine/specialreg.h> 101 1.2 bouyer #include <machine/segments.h> 102 1.2 bouyer #include <machine/gdt.h> 103 1.2 bouyer #include <machine/mtrr.h> 104 1.2 bouyer #include <machine/pio.h> 105 1.2 bouyer 106 1.97 dsl #include <x86/fpu.h> 107 1.62 cherry 108 1.62 cherry #include <xen/xen.h> 109 1.128 cherry #include <xen/include/public/vcpu.h> 110 1.2 bouyer #include <xen/vcpuvar.h> 111 1.2 bouyer 112 1.2 bouyer #if NLAPIC > 0 113 1.2 bouyer #include <machine/apicvar.h> 114 1.2 bouyer #include <machine/i82489reg.h> 115 1.2 bouyer #include <machine/i82489var.h> 116 1.2 bouyer #endif 117 1.2 bouyer 118 1.2 bouyer #include <dev/ic/mc146818reg.h> 119 1.2 bouyer #include <dev/isa/isareg.h> 120 1.2 bouyer 121 1.56 jruoho static int cpu_match(device_t, cfdata_t, void *); 122 1.56 jruoho static void cpu_attach(device_t, device_t, void *); 123 1.56 jruoho static void cpu_defer(device_t); 124 1.56 jruoho static int cpu_rescan(device_t, const char *, const int *); 125 1.56 jruoho static void cpu_childdetached(device_t, device_t); 126 1.56 jruoho static int vcpu_match(device_t, cfdata_t, void *); 127 1.56 jruoho static void vcpu_attach(device_t, device_t, void *); 128 1.56 jruoho static void cpu_attach_common(device_t, device_t, void *); 129 1.56 jruoho void cpu_offline_md(void); 130 1.2 bouyer 131 1.2 bouyer struct cpu_softc { 132 1.10 cegger device_t sc_dev; /* device tree glue */ 133 1.2 bouyer struct cpu_info *sc_info; /* pointer to CPU info */ 134 1.32 cegger bool sc_wasonline; 135 1.2 bouyer }; 136 1.2 bouyer 137 1.62 cherry int mp_cpu_start(struct cpu_info *, vaddr_t); 138 1.2 bouyer void mp_cpu_start_cleanup(struct cpu_info *); 139 1.2 bouyer const struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL, 140 1.2 bouyer mp_cpu_start_cleanup }; 141 1.2 bouyer 142 1.53 jruoho CFATTACH_DECL2_NEW(cpu, sizeof(struct cpu_softc), 143 1.53 jruoho cpu_match, cpu_attach, NULL, NULL, cpu_rescan, cpu_childdetached); 144 1.53 jruoho 145 1.10 cegger CFATTACH_DECL_NEW(vcpu, sizeof(struct cpu_softc), 146 1.2 bouyer vcpu_match, vcpu_attach, NULL, NULL); 147 1.2 bouyer 148 1.2 bouyer /* 149 1.2 bouyer * Statically-allocated CPU info for the primary CPU (or the only 150 1.2 bouyer * CPU, on uniprocessors). The CPU info list is initialized to 151 1.2 bouyer * point at it. 152 1.2 bouyer */ 153 1.38 cegger struct cpu_info cpu_info_primary __aligned(CACHE_LINE_SIZE) = { 154 1.7 bouyer .ci_dev = 0, 155 1.2 bouyer .ci_self = &cpu_info_primary, 156 1.4 bouyer .ci_idepth = -1, 157 1.2 bouyer .ci_curlwp = &lwp0, 158 1.25 ad .ci_curldt = -1, 159 1.2 bouyer }; 160 1.38 cegger struct cpu_info phycpu_info_primary __aligned(CACHE_LINE_SIZE) = { 161 1.7 bouyer .ci_dev = 0, 162 1.2 bouyer .ci_self = &phycpu_info_primary, 163 1.2 bouyer }; 164 1.2 bouyer 165 1.2 bouyer struct cpu_info *cpu_info_list = &cpu_info_primary; 166 1.38 cegger struct cpu_info *phycpu_info_list = &phycpu_info_primary; 167 1.2 bouyer 168 1.107 maxv uint32_t cpu_feature[7] __read_mostly; /* X86 CPUID feature bits 169 1.43 jym * [0] basic features %edx 170 1.43 jym * [1] basic features %ecx 171 1.43 jym * [2] extended features %edx 172 1.43 jym * [3] extended features %ecx 173 1.43 jym * [4] VIA padlock features 174 1.102 christos * [5] structured extended features cpuid.7:%ebx 175 1.102 christos * [6] structured extended features cpuid.7:%ecx 176 1.43 jym */ 177 1.43 jym 178 1.11 cegger bool x86_mp_online; 179 1.11 cegger paddr_t mp_trampoline_paddr = MP_TRAMPOLINE; 180 1.2 bouyer 181 1.38 cegger #if defined(MULTIPROCESSOR) 182 1.2 bouyer void cpu_hatch(void *); 183 1.2 bouyer static void cpu_boot_secondary(struct cpu_info *ci); 184 1.2 bouyer static void cpu_start_secondary(struct cpu_info *ci); 185 1.38 cegger #endif /* MULTIPROCESSOR */ 186 1.2 bouyer 187 1.56 jruoho static int 188 1.10 cegger cpu_match(device_t parent, cfdata_t match, void *aux) 189 1.2 bouyer { 190 1.2 bouyer 191 1.2 bouyer return 1; 192 1.2 bouyer } 193 1.2 bouyer 194 1.56 jruoho static void 195 1.10 cegger cpu_attach(device_t parent, device_t self, void *aux) 196 1.2 bouyer { 197 1.10 cegger struct cpu_softc *sc = device_private(self); 198 1.2 bouyer struct cpu_attach_args *caa = aux; 199 1.2 bouyer struct cpu_info *ci; 200 1.34 cegger uintptr_t ptr; 201 1.52 bouyer static int nphycpu = 0; 202 1.2 bouyer 203 1.10 cegger sc->sc_dev = self; 204 1.10 cegger 205 1.2 bouyer /* 206 1.2 bouyer * If we're an Application Processor, allocate a cpu_info 207 1.52 bouyer * If we're the first attached CPU use the primary cpu_info, 208 1.52 bouyer * otherwise allocate a new one 209 1.2 bouyer */ 210 1.52 bouyer aprint_naive("\n"); 211 1.52 bouyer aprint_normal("\n"); 212 1.52 bouyer if (nphycpu > 0) { 213 1.52 bouyer struct cpu_info *tmp; 214 1.34 cegger ptr = (uintptr_t)kmem_zalloc(sizeof(*ci) + CACHE_LINE_SIZE - 1, 215 1.34 cegger KM_SLEEP); 216 1.42 jym ci = (struct cpu_info *)roundup2(ptr, CACHE_LINE_SIZE); 217 1.24 ad ci->ci_curldt = -1; 218 1.52 bouyer 219 1.52 bouyer tmp = phycpu_info_list; 220 1.52 bouyer while (tmp->ci_next) 221 1.52 bouyer tmp = tmp->ci_next; 222 1.52 bouyer 223 1.52 bouyer tmp->ci_next = ci; 224 1.2 bouyer } else { 225 1.2 bouyer ci = &phycpu_info_primary; 226 1.2 bouyer } 227 1.2 bouyer 228 1.2 bouyer ci->ci_self = ci; 229 1.2 bouyer sc->sc_info = ci; 230 1.2 bouyer 231 1.2 bouyer ci->ci_dev = self; 232 1.50 jruoho ci->ci_acpiid = caa->cpu_id; 233 1.23 ad ci->ci_cpuid = caa->cpu_number; 234 1.16 cegger ci->ci_vcpu = NULL; 235 1.52 bouyer ci->ci_index = nphycpu++; 236 1.138 jdolecek ci->ci_kfpu_spl = -1; 237 1.2 bouyer 238 1.52 bouyer if (!pmf_device_register(self, NULL, NULL)) 239 1.52 bouyer aprint_error_dev(self, "couldn't establish power handler\n"); 240 1.34 cegger 241 1.56 jruoho (void)config_defer(self, cpu_defer); 242 1.56 jruoho } 243 1.56 jruoho 244 1.56 jruoho static void 245 1.56 jruoho cpu_defer(device_t self) 246 1.56 jruoho { 247 1.56 jruoho cpu_rescan(self, NULL, NULL); 248 1.2 bouyer } 249 1.2 bouyer 250 1.56 jruoho static int 251 1.53 jruoho cpu_rescan(device_t self, const char *ifattr, const int *locators) 252 1.53 jruoho { 253 1.53 jruoho struct cpu_softc *sc = device_private(self); 254 1.53 jruoho struct cpufeature_attach_args cfaa; 255 1.53 jruoho struct cpu_info *ci = sc->sc_info; 256 1.53 jruoho 257 1.53 jruoho memset(&cfaa, 0, sizeof(cfaa)); 258 1.53 jruoho cfaa.ci = ci; 259 1.53 jruoho 260 1.53 jruoho if (ifattr_match(ifattr, "cpufeaturebus")) { 261 1.53 jruoho 262 1.53 jruoho if (ci->ci_frequency == NULL) { 263 1.55 jruoho cfaa.name = "frequency"; 264 1.140 thorpej ci->ci_frequency = 265 1.140 thorpej config_found(self, &cfaa, NULL, 266 1.141 thorpej CFARGS(.iattr = "cpufeaturebus")); 267 1.54 jruoho } 268 1.53 jruoho } 269 1.53 jruoho 270 1.53 jruoho return 0; 271 1.53 jruoho } 272 1.53 jruoho 273 1.56 jruoho static void 274 1.53 jruoho cpu_childdetached(device_t self, device_t child) 275 1.53 jruoho { 276 1.53 jruoho struct cpu_softc *sc = device_private(self); 277 1.53 jruoho struct cpu_info *ci = sc->sc_info; 278 1.53 jruoho 279 1.53 jruoho if (ci->ci_frequency == child) 280 1.53 jruoho ci->ci_frequency = NULL; 281 1.53 jruoho } 282 1.53 jruoho 283 1.56 jruoho static int 284 1.10 cegger vcpu_match(device_t parent, cfdata_t match, void *aux) 285 1.2 bouyer { 286 1.2 bouyer struct vcpu_attach_args *vcaa = aux; 287 1.62 cherry struct vcpu_runstate_info vcr; 288 1.62 cherry int error; 289 1.62 cherry 290 1.62 cherry if (strcmp(vcaa->vcaa_name, match->cf_name) == 0) { 291 1.62 cherry error = HYPERVISOR_vcpu_op(VCPUOP_get_runstate_info, 292 1.105 maxv vcaa->vcaa_caa.cpu_number, &vcr); 293 1.62 cherry switch (error) { 294 1.62 cherry case 0: 295 1.62 cherry return 1; 296 1.62 cherry case -ENOENT: 297 1.62 cherry return 0; 298 1.62 cherry default: 299 1.62 cherry panic("Unknown hypervisor error %d returned on vcpu runstate probe\n", error); 300 1.62 cherry } 301 1.62 cherry } 302 1.2 bouyer 303 1.2 bouyer return 0; 304 1.2 bouyer } 305 1.2 bouyer 306 1.56 jruoho static void 307 1.10 cegger vcpu_attach(device_t parent, device_t self, void *aux) 308 1.2 bouyer { 309 1.2 bouyer struct vcpu_attach_args *vcaa = aux; 310 1.2 bouyer 311 1.62 cherry KASSERT(vcaa->vcaa_caa.cpu_func == NULL); 312 1.62 cherry vcaa->vcaa_caa.cpu_func = &mp_cpu_funcs; 313 1.2 bouyer cpu_attach_common(parent, self, &vcaa->vcaa_caa); 314 1.65 jym 315 1.65 jym if (!pmf_device_register(self, NULL, NULL)) 316 1.65 jym aprint_error_dev(self, "couldn't establish power handler\n"); 317 1.2 bouyer } 318 1.2 bouyer 319 1.62 cherry static int 320 1.62 cherry vcpu_is_up(struct cpu_info *ci) 321 1.62 cherry { 322 1.62 cherry KASSERT(ci != NULL); 323 1.135 bouyer return HYPERVISOR_vcpu_op(VCPUOP_is_up, ci->ci_vcpuid, NULL); 324 1.62 cherry } 325 1.62 cherry 326 1.2 bouyer static void 327 1.2 bouyer cpu_vm_init(struct cpu_info *ci) 328 1.2 bouyer { 329 1.2 bouyer int ncolors = 2, i; 330 1.2 bouyer 331 1.2 bouyer for (i = CAI_ICACHE; i <= CAI_L2CACHE; i++) { 332 1.2 bouyer struct x86_cache_info *cai; 333 1.2 bouyer int tcolors; 334 1.2 bouyer 335 1.2 bouyer cai = &ci->ci_cinfo[i]; 336 1.2 bouyer 337 1.2 bouyer tcolors = atop(cai->cai_totalsize); 338 1.105 maxv switch (cai->cai_associativity) { 339 1.2 bouyer case 0xff: 340 1.2 bouyer tcolors = 1; /* fully associative */ 341 1.2 bouyer break; 342 1.2 bouyer case 0: 343 1.2 bouyer case 1: 344 1.2 bouyer break; 345 1.2 bouyer default: 346 1.2 bouyer tcolors /= cai->cai_associativity; 347 1.2 bouyer } 348 1.127 riastrad ncolors = uimax(ncolors, tcolors); 349 1.2 bouyer } 350 1.2 bouyer 351 1.2 bouyer /* 352 1.67 mrg * Knowing the size of the largest cache on this CPU, potentially 353 1.67 mrg * re-color our pages. 354 1.2 bouyer */ 355 1.28 bouyer aprint_debug_dev(ci->ci_dev, "%d page colors\n", ncolors); 356 1.2 bouyer uvm_page_recolor(ncolors); 357 1.91 rmind pmap_tlb_cpu_init(ci); 358 1.109 maxv #ifndef __HAVE_DIRECT_MAP 359 1.109 maxv pmap_vpage_cpu_init(ci); 360 1.109 maxv #endif 361 1.2 bouyer } 362 1.2 bouyer 363 1.56 jruoho static void 364 1.11 cegger cpu_attach_common(device_t parent, device_t self, void *aux) 365 1.2 bouyer { 366 1.10 cegger struct cpu_softc *sc = device_private(self); 367 1.2 bouyer struct cpu_attach_args *caa = aux; 368 1.2 bouyer struct cpu_info *ci; 369 1.12 cegger uintptr_t ptr; 370 1.2 bouyer int cpunum = caa->cpu_number; 371 1.38 cegger static bool again = false; 372 1.2 bouyer 373 1.10 cegger sc->sc_dev = self; 374 1.10 cegger 375 1.2 bouyer /* 376 1.2 bouyer * If we're an Application Processor, allocate a cpu_info 377 1.2 bouyer * structure, otherwise use the primary's. 378 1.2 bouyer */ 379 1.2 bouyer if (caa->cpu_role == CPU_ROLE_AP) { 380 1.12 cegger aprint_naive(": Application Processor\n"); 381 1.31 cegger ptr = (uintptr_t)kmem_alloc(sizeof(*ci) + CACHE_LINE_SIZE - 1, 382 1.31 cegger KM_SLEEP); 383 1.42 jym ci = (struct cpu_info *)roundup2(ptr, CACHE_LINE_SIZE); 384 1.12 cegger memset(ci, 0, sizeof(*ci)); 385 1.117 bouyer cpu_init_tss(ci); 386 1.2 bouyer } else { 387 1.12 cegger aprint_naive(": %s Processor\n", 388 1.12 cegger caa->cpu_role == CPU_ROLE_SP ? "Single" : "Boot"); 389 1.2 bouyer ci = &cpu_info_primary; 390 1.2 bouyer } 391 1.2 bouyer 392 1.2 bouyer ci->ci_self = ci; 393 1.2 bouyer sc->sc_info = ci; 394 1.2 bouyer ci->ci_dev = self; 395 1.23 ad ci->ci_cpuid = cpunum; 396 1.135 bouyer ci->ci_vcpuid = cpunum; 397 1.138 jdolecek ci->ci_kfpu_spl = -1; 398 1.16 cegger 399 1.16 cegger KASSERT(HYPERVISOR_shared_info != NULL); 400 1.89 bouyer KASSERT(cpunum < XEN_LEGACY_MAX_VCPUS); 401 1.16 cegger ci->ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[cpunum]; 402 1.16 cegger 403 1.62 cherry KASSERT(ci->ci_func == 0); 404 1.2 bouyer ci->ci_func = caa->cpu_func; 405 1.101 msaitoh aprint_normal("\n"); 406 1.2 bouyer 407 1.38 cegger /* Must be called before mi_cpu_attach(). */ 408 1.38 cegger cpu_vm_init(ci); 409 1.38 cegger 410 1.2 bouyer if (caa->cpu_role == CPU_ROLE_AP) { 411 1.2 bouyer int error; 412 1.2 bouyer 413 1.2 bouyer error = mi_cpu_attach(ci); 414 1.62 cherry 415 1.62 cherry KASSERT(ci->ci_data.cpu_idlelwp != NULL); 416 1.2 bouyer if (error != 0) { 417 1.38 cegger aprint_error_dev(self, 418 1.38 cegger "mi_cpu_attach failed with %d\n", error); 419 1.2 bouyer return; 420 1.2 bouyer } 421 1.62 cherry 422 1.2 bouyer } else { 423 1.2 bouyer KASSERT(ci->ci_data.cpu_idlelwp != NULL); 424 1.2 bouyer } 425 1.2 bouyer 426 1.89 bouyer KASSERT(ci->ci_cpuid == ci->ci_index); 427 1.100 bouyer #ifdef __x86_64__ 428 1.100 bouyer /* No user PGD mapped for this CPU yet */ 429 1.100 bouyer ci->ci_xen_current_user_pgd = 0; 430 1.100 bouyer #endif 431 1.100 bouyer mutex_init(&ci->ci_kpm_mtx, MUTEX_DEFAULT, IPL_VM); 432 1.2 bouyer pmap_reference(pmap_kernel()); 433 1.2 bouyer ci->ci_pmap = pmap_kernel(); 434 1.2 bouyer ci->ci_tlbstate = TLBSTATE_STALE; 435 1.2 bouyer 436 1.38 cegger /* 437 1.38 cegger * Boot processor may not be attached first, but the below 438 1.38 cegger * must be done to allow booting other processors. 439 1.38 cegger */ 440 1.38 cegger if (!again) { 441 1.38 cegger atomic_or_32(&ci->ci_flags, CPUF_PRESENT | CPUF_PRIMARY); 442 1.38 cegger /* Basic init. */ 443 1.38 cegger cpu_intr_init(ci); 444 1.38 cegger cpu_get_tsc_freq(ci); 445 1.38 cegger cpu_init(ci); 446 1.78 cherry pmap_cpu_init_late(ci); 447 1.62 cherry 448 1.99 snj /* Every processor needs to init its own ipi h/w (similar to lapic) */ 449 1.62 cherry xen_ipi_init(); 450 1.62 cherry 451 1.38 cegger /* Make sure DELAY() is initialized. */ 452 1.38 cegger DELAY(1); 453 1.38 cegger again = true; 454 1.38 cegger } 455 1.38 cegger 456 1.2 bouyer /* further PCB init done later. */ 457 1.2 bouyer 458 1.2 bouyer switch (caa->cpu_role) { 459 1.2 bouyer case CPU_ROLE_SP: 460 1.38 cegger atomic_or_32(&ci->ci_flags, CPUF_SP); 461 1.21 ad cpu_identify(ci); 462 1.38 cegger x86_cpu_idle_init(); 463 1.2 bouyer break; 464 1.2 bouyer 465 1.2 bouyer case CPU_ROLE_BP: 466 1.38 cegger atomic_or_32(&ci->ci_flags, CPUF_BSP); 467 1.21 ad cpu_identify(ci); 468 1.38 cegger x86_cpu_idle_init(); 469 1.2 bouyer break; 470 1.2 bouyer 471 1.2 bouyer case CPU_ROLE_AP: 472 1.62 cherry atomic_or_32(&ci->ci_flags, CPUF_AP); 473 1.62 cherry 474 1.2 bouyer /* 475 1.2 bouyer * report on an AP 476 1.2 bouyer */ 477 1.2 bouyer 478 1.2 bouyer #if defined(MULTIPROCESSOR) 479 1.62 cherry /* interrupt handler stack */ 480 1.2 bouyer cpu_intr_init(ci); 481 1.62 cherry 482 1.139 yamaguch /* Setup per-cpu memory for idt */ 483 1.139 yamaguch idt_vec_init_cpu_md(&ci->ci_idtvec, cpu_index(ci)); 484 1.139 yamaguch 485 1.62 cherry /* Setup per-cpu memory for gdt */ 486 1.2 bouyer gdt_alloc_cpu(ci); 487 1.62 cherry 488 1.62 cherry pmap_cpu_init_late(ci); 489 1.2 bouyer cpu_start_secondary(ci); 490 1.62 cherry 491 1.2 bouyer if (ci->ci_flags & CPUF_PRESENT) { 492 1.30 cegger struct cpu_info *tmp; 493 1.30 cegger 494 1.62 cherry cpu_identify(ci); 495 1.30 cegger tmp = cpu_info_list; 496 1.30 cegger while (tmp->ci_next) 497 1.30 cegger tmp = tmp->ci_next; 498 1.30 cegger 499 1.30 cegger tmp->ci_next = ci; 500 1.2 bouyer } 501 1.2 bouyer #else 502 1.101 msaitoh aprint_error_dev(ci->ci_dev, "not started\n"); 503 1.2 bouyer #endif 504 1.2 bouyer break; 505 1.2 bouyer 506 1.2 bouyer default: 507 1.2 bouyer panic("unknown processor type??\n"); 508 1.2 bouyer } 509 1.2 bouyer 510 1.62 cherry #ifdef MPVERBOSE 511 1.2 bouyer if (mp_verbose) { 512 1.2 bouyer struct lwp *l = ci->ci_data.cpu_idlelwp; 513 1.37 rmind struct pcb *pcb = lwp_getpcb(l); 514 1.2 bouyer 515 1.38 cegger aprint_verbose_dev(self, 516 1.133 rin "idle lwp at %p, idle sp at %p\n", 517 1.12 cegger l, 518 1.12 cegger #ifdef i386 519 1.37 rmind (void *)pcb->pcb_esp 520 1.105 maxv #else 521 1.37 rmind (void *)pcb->pcb_rsp 522 1.105 maxv #endif 523 1.12 cegger ); 524 1.145 riastrad 525 1.2 bouyer } 526 1.62 cherry #endif /* MPVERBOSE */ 527 1.2 bouyer } 528 1.2 bouyer 529 1.2 bouyer /* 530 1.2 bouyer * Initialize the processor appropriately. 531 1.2 bouyer */ 532 1.2 bouyer 533 1.2 bouyer void 534 1.10 cegger cpu_init(struct cpu_info *ci) 535 1.2 bouyer { 536 1.122 jdolecek uint32_t cr4 = 0; 537 1.2 bouyer 538 1.2 bouyer /* 539 1.2 bouyer * If we have FXSAVE/FXRESTOR, use them. 540 1.2 bouyer */ 541 1.43 jym if (cpu_feature[0] & CPUID_FXSR) { 542 1.122 jdolecek cr4 |= CR4_OSFXSR; 543 1.2 bouyer 544 1.2 bouyer /* 545 1.2 bouyer * If we have SSE/SSE2, enable XMM exceptions. 546 1.2 bouyer */ 547 1.43 jym if (cpu_feature[0] & (CPUID_SSE|CPUID_SSE2)) 548 1.122 jdolecek cr4 |= CR4_OSXMMEXCPT; 549 1.122 jdolecek } 550 1.122 jdolecek 551 1.122 jdolecek /* If xsave is supported, enable it */ 552 1.122 jdolecek if (cpu_feature[1] & CPUID2_XSAVE && x86_fpu_save >= FPU_SAVE_XSAVE) 553 1.122 jdolecek cr4 |= CR4_OSXSAVE; 554 1.122 jdolecek 555 1.122 jdolecek if (cr4) { 556 1.122 jdolecek cr4 |= rcr4(); 557 1.122 jdolecek lcr4(cr4); 558 1.2 bouyer } 559 1.2 bouyer 560 1.116 maxv if (x86_fpu_save >= FPU_SAVE_FXSAVE) { 561 1.120 maxv fpuinit_mxcsr_mask(); 562 1.118 jdolecek } 563 1.118 jdolecek 564 1.122 jdolecek /* 565 1.122 jdolecek * Changing CR4 register may change cpuid values. For example, setting 566 1.122 jdolecek * CR4_OSXSAVE sets CPUID2_OSXSAVE. The CPUID2_OSXSAVE is in 567 1.122 jdolecek * ci_feat_val[1], so update it. 568 1.122 jdolecek * XXX Other than ci_feat_val[1] might be changed. 569 1.122 jdolecek */ 570 1.122 jdolecek if (cpuid_level >= 1) { 571 1.122 jdolecek u_int descs[4]; 572 1.122 jdolecek 573 1.122 jdolecek x86_cpuid(1, descs); 574 1.122 jdolecek ci->ci_feat_val[1] = descs[2]; 575 1.122 jdolecek } 576 1.122 jdolecek 577 1.122 jdolecek /* If xsave is enabled, enable all fpu features */ 578 1.122 jdolecek if (cr4 & CR4_OSXSAVE) { 579 1.122 jdolecek wrxcr(0, x86_xsave_features & XCR0_FPU); 580 1.122 jdolecek } 581 1.122 jdolecek 582 1.11 cegger atomic_or_32(&ci->ci_flags, CPUF_RUNNING); 583 1.2 bouyer } 584 1.2 bouyer 585 1.2 bouyer 586 1.2 bouyer #ifdef MULTIPROCESSOR 587 1.62 cherry 588 1.2 bouyer void 589 1.10 cegger cpu_boot_secondary_processors(void) 590 1.2 bouyer { 591 1.2 bouyer struct cpu_info *ci; 592 1.123 bouyer kcpuset_t *cpus; 593 1.2 bouyer u_long i; 594 1.123 bouyer 595 1.123 bouyer kcpuset_create(&cpus, true); 596 1.123 bouyer kcpuset_set(cpus, cpu_index(curcpu())); 597 1.38 cegger for (i = 0; i < maxcpus; i++) { 598 1.38 cegger ci = cpu_lookup(i); 599 1.2 bouyer if (ci == NULL) 600 1.2 bouyer continue; 601 1.2 bouyer if (ci->ci_data.cpu_idlelwp == NULL) 602 1.2 bouyer continue; 603 1.2 bouyer if ((ci->ci_flags & CPUF_PRESENT) == 0) 604 1.2 bouyer continue; 605 1.2 bouyer if (ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY)) 606 1.2 bouyer continue; 607 1.2 bouyer cpu_boot_secondary(ci); 608 1.123 bouyer kcpuset_set(cpus, cpu_index(ci)); 609 1.2 bouyer } 610 1.123 bouyer while (!kcpuset_match(cpus, kcpuset_running)) 611 1.123 bouyer ; 612 1.123 bouyer kcpuset_destroy(cpus); 613 1.11 cegger 614 1.11 cegger x86_mp_online = true; 615 1.2 bouyer } 616 1.2 bouyer 617 1.2 bouyer static void 618 1.2 bouyer cpu_init_idle_lwp(struct cpu_info *ci) 619 1.2 bouyer { 620 1.2 bouyer struct lwp *l = ci->ci_data.cpu_idlelwp; 621 1.37 rmind struct pcb *pcb = lwp_getpcb(l); 622 1.2 bouyer 623 1.2 bouyer pcb->pcb_cr0 = rcr0(); 624 1.2 bouyer } 625 1.2 bouyer 626 1.2 bouyer void 627 1.10 cegger cpu_init_idle_lwps(void) 628 1.2 bouyer { 629 1.2 bouyer struct cpu_info *ci; 630 1.2 bouyer u_long i; 631 1.2 bouyer 632 1.38 cegger for (i = 0; i < maxcpus; i++) { 633 1.38 cegger ci = cpu_lookup(i); 634 1.2 bouyer if (ci == NULL) 635 1.2 bouyer continue; 636 1.2 bouyer if (ci->ci_data.cpu_idlelwp == NULL) 637 1.2 bouyer continue; 638 1.2 bouyer if ((ci->ci_flags & CPUF_PRESENT) == 0) 639 1.2 bouyer continue; 640 1.2 bouyer cpu_init_idle_lwp(ci); 641 1.2 bouyer } 642 1.2 bouyer } 643 1.2 bouyer 644 1.62 cherry static void 645 1.10 cegger cpu_start_secondary(struct cpu_info *ci) 646 1.2 bouyer { 647 1.2 bouyer int i; 648 1.2 bouyer 649 1.11 cegger aprint_debug_dev(ci->ci_dev, "starting\n"); 650 1.2 bouyer 651 1.2 bouyer ci->ci_curlwp = ci->ci_data.cpu_idlelwp; 652 1.62 cherry 653 1.62 cherry if (CPU_STARTUP(ci, (vaddr_t) cpu_hatch) != 0) { 654 1.11 cegger return; 655 1.62 cherry } 656 1.2 bouyer 657 1.2 bouyer /* 658 1.2 bouyer * wait for it to become ready 659 1.2 bouyer */ 660 1.11 cegger for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT)) && i > 0; i--) { 661 1.2 bouyer delay(10); 662 1.2 bouyer } 663 1.11 cegger if ((ci->ci_flags & CPUF_PRESENT) == 0) { 664 1.9 cegger aprint_error_dev(ci->ci_dev, "failed to become ready\n"); 665 1.2 bouyer #if defined(MPDEBUG) && defined(DDB) 666 1.2 bouyer printf("dropping into debugger; continue from here to resume boot\n"); 667 1.2 bouyer Debugger(); 668 1.2 bouyer #endif 669 1.2 bouyer } 670 1.2 bouyer 671 1.2 bouyer CPU_START_CLEANUP(ci); 672 1.2 bouyer } 673 1.2 bouyer 674 1.2 bouyer void 675 1.10 cegger cpu_boot_secondary(struct cpu_info *ci) 676 1.2 bouyer { 677 1.2 bouyer int i; 678 1.11 cegger atomic_or_32(&ci->ci_flags, CPUF_GO); 679 1.11 cegger for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i > 0; i--) { 680 1.2 bouyer delay(10); 681 1.2 bouyer } 682 1.11 cegger if ((ci->ci_flags & CPUF_RUNNING) == 0) { 683 1.11 cegger aprint_error_dev(ci->ci_dev, "CPU failed to start\n"); 684 1.2 bouyer #if defined(MPDEBUG) && defined(DDB) 685 1.2 bouyer printf("dropping into debugger; continue from here to resume boot\n"); 686 1.2 bouyer Debugger(); 687 1.2 bouyer #endif 688 1.2 bouyer } 689 1.2 bouyer } 690 1.2 bouyer 691 1.2 bouyer /* 692 1.62 cherry * APs end up here immediately after initialisation and VCPUOP_up in 693 1.145 riastrad * mp_cpu_start(). 694 1.62 cherry * At this point, we are running in the idle pcb/idle stack of the new 695 1.62 cherry * CPU. This function jumps to the idle loop and starts looking for 696 1.145 riastrad * work. 697 1.2 bouyer */ 698 1.62 cherry extern void x86_64_tls_switch(struct lwp *); 699 1.2 bouyer void 700 1.2 bouyer cpu_hatch(void *v) 701 1.2 bouyer { 702 1.2 bouyer struct cpu_info *ci = (struct cpu_info *)v; 703 1.37 rmind struct pcb *pcb; 704 1.11 cegger int s, i; 705 1.11 cegger 706 1.62 cherry /* Setup TLS and kernel GS/FS */ 707 1.62 cherry cpu_init_msrs(ci, true); 708 1.139 yamaguch cpu_init_idt(ci); 709 1.62 cherry gdt_init_cpu(ci); 710 1.62 cherry 711 1.21 ad cpu_probe(ci); 712 1.11 cegger 713 1.62 cherry atomic_or_32(&ci->ci_flags, CPUF_PRESENT); 714 1.2 bouyer 715 1.11 cegger while ((ci->ci_flags & CPUF_GO) == 0) { 716 1.11 cegger /* Don't use delay, boot CPU may be patching the text. */ 717 1.11 cegger for (i = 10000; i != 0; i--) 718 1.11 cegger x86_pause(); 719 1.11 cegger } 720 1.2 bouyer 721 1.11 cegger /* Because the text may have been patched in x86_patch(). */ 722 1.11 cegger x86_flush(); 723 1.58 rmind tlbflushg(); 724 1.2 bouyer 725 1.11 cegger KASSERT((ci->ci_flags & CPUF_RUNNING) == 0); 726 1.2 bouyer 727 1.132 bouyer KASSERT(ci->ci_curlwp == ci->ci_data.cpu_idlelwp); 728 1.132 bouyer KASSERT(curlwp == ci->ci_data.cpu_idlelwp); 729 1.37 rmind pcb = lwp_getpcb(curlwp); 730 1.85 cherry pcb->pcb_cr3 = pmap_pdirpa(pmap_kernel(), 0); 731 1.37 rmind 732 1.62 cherry xen_ipi_init(); 733 1.62 cherry 734 1.136 ad xen_initclocks(); 735 1.105 maxv 736 1.62 cherry #ifdef __x86_64__ 737 1.12 cegger fpuinit(ci); 738 1.12 cegger #endif 739 1.2 bouyer 740 1.2 bouyer lldt(GSEL(GLDT_SEL, SEL_KPL)); 741 1.2 bouyer 742 1.2 bouyer cpu_init(ci); 743 1.11 cegger cpu_get_tsc_freq(ci); 744 1.2 bouyer 745 1.2 bouyer s = splhigh(); 746 1.11 cegger x86_enable_intr(); 747 1.11 cegger splx(s); 748 1.2 bouyer 749 1.62 cherry aprint_debug_dev(ci->ci_dev, "running\n"); 750 1.62 cherry 751 1.132 bouyer KASSERT(ci->ci_curlwp == ci->ci_data.cpu_idlelwp); 752 1.91 rmind idle_loop(NULL); 753 1.91 rmind KASSERT(false); 754 1.2 bouyer } 755 1.2 bouyer 756 1.2 bouyer #if defined(DDB) 757 1.2 bouyer 758 1.2 bouyer #include <ddb/db_output.h> 759 1.2 bouyer #include <machine/db_machdep.h> 760 1.2 bouyer 761 1.2 bouyer /* 762 1.2 bouyer * Dump CPU information from ddb. 763 1.2 bouyer */ 764 1.2 bouyer void 765 1.2 bouyer cpu_debug_dump(void) 766 1.2 bouyer { 767 1.2 bouyer struct cpu_info *ci; 768 1.2 bouyer CPU_INFO_ITERATOR cii; 769 1.2 bouyer 770 1.130 maxv db_printf("addr dev id flags ipis curlwp\n"); 771 1.2 bouyer for (CPU_INFO_FOREACH(cii, ci)) { 772 1.130 maxv db_printf("%p %s %ld %x %x %10p\n", 773 1.2 bouyer ci, 774 1.9 cegger ci->ci_dev == NULL ? "BOOT" : device_xname(ci->ci_dev), 775 1.135 bouyer (long)ci->ci_vcpuid, 776 1.2 bouyer ci->ci_flags, ci->ci_ipis, 777 1.130 maxv ci->ci_curlwp); 778 1.2 bouyer } 779 1.2 bouyer } 780 1.38 cegger #endif /* DDB */ 781 1.2 bouyer 782 1.62 cherry #endif /* MULTIPROCESSOR */ 783 1.62 cherry 784 1.62 cherry extern void hypervisor_callback(void); 785 1.62 cherry extern void failsafe_callback(void); 786 1.62 cherry #ifdef __x86_64__ 787 1.62 cherry typedef void (vector)(void); 788 1.62 cherry extern vector Xsyscall, Xsyscall32; 789 1.62 cherry #endif 790 1.62 cherry 791 1.62 cherry /* 792 1.62 cherry * Setup the "trampoline". On Xen, we setup nearly all cpu context 793 1.62 cherry * outside a trampoline, so we prototype and call targetip like so: 794 1.62 cherry * void targetip(struct cpu_info *); 795 1.62 cherry */ 796 1.62 cherry 797 1.2 bouyer static void 798 1.62 cherry gdt_prepframes(paddr_t *frames, vaddr_t base, uint32_t entries) 799 1.2 bouyer { 800 1.104 msaitoh int i; 801 1.111 bouyer for (i = 0; i < entries; i++) { 802 1.105 maxv frames[i] = ((paddr_t)xpmap_ptetomach( 803 1.105 maxv (pt_entry_t *)(base + (i << PAGE_SHIFT)))) >> PAGE_SHIFT; 804 1.62 cherry 805 1.62 cherry /* Mark Read-only */ 806 1.62 cherry pmap_pte_clearbits(kvtopte(base + (i << PAGE_SHIFT)), 807 1.129 maxv PTE_W); 808 1.62 cherry } 809 1.62 cherry } 810 1.62 cherry 811 1.62 cherry #ifdef __x86_64__ 812 1.85 cherry extern char *ldtstore; 813 1.62 cherry 814 1.62 cherry static void 815 1.105 maxv xen_init_amd64_vcpuctxt(struct cpu_info *ci, struct vcpu_guest_context *initctx, 816 1.105 maxv void targetrip(struct cpu_info *)) 817 1.62 cherry { 818 1.62 cherry /* page frames to point at GDT */ 819 1.62 cherry extern int gdt_size; 820 1.62 cherry paddr_t frames[16]; 821 1.62 cherry psize_t gdt_ents; 822 1.62 cherry 823 1.62 cherry struct lwp *l; 824 1.62 cherry struct pcb *pcb; 825 1.62 cherry 826 1.62 cherry volatile struct vcpu_info *vci; 827 1.62 cherry 828 1.62 cherry KASSERT(ci != NULL); 829 1.62 cherry KASSERT(ci != &cpu_info_primary); 830 1.62 cherry KASSERT(initctx != NULL); 831 1.62 cherry KASSERT(targetrip != NULL); 832 1.62 cherry 833 1.105 maxv memset(initctx, 0, sizeof(*initctx)); 834 1.62 cherry 835 1.104 msaitoh gdt_ents = roundup(gdt_size, PAGE_SIZE) >> PAGE_SHIFT; 836 1.62 cherry KASSERT(gdt_ents <= 16); 837 1.62 cherry 838 1.105 maxv gdt_prepframes(frames, (vaddr_t)ci->ci_gdt, gdt_ents); 839 1.62 cherry 840 1.62 cherry /* Initialise the vcpu context: We use idle_loop()'s pcb context. */ 841 1.11 cegger 842 1.62 cherry l = ci->ci_data.cpu_idlelwp; 843 1.11 cegger 844 1.62 cherry KASSERT(l != NULL); 845 1.62 cherry pcb = lwp_getpcb(l); 846 1.62 cherry KASSERT(pcb != NULL); 847 1.11 cegger 848 1.62 cherry /* resume with interrupts off */ 849 1.62 cherry vci = ci->ci_vcpu; 850 1.62 cherry vci->evtchn_upcall_mask = 1; 851 1.144 riastrad __insn_barrier(); 852 1.2 bouyer 853 1.62 cherry /* resume in kernel-mode */ 854 1.62 cherry initctx->flags = VGCF_in_kernel | VGCF_online; 855 1.2 bouyer 856 1.62 cherry /* Stack and entry points: 857 1.62 cherry * We arrange for the stack frame for cpu_hatch() to 858 1.62 cherry * appear as a callee frame of lwp_trampoline(). Being a 859 1.62 cherry * leaf frame prevents trampling on any of the MD stack setup 860 1.62 cherry * that x86/vm_machdep.c:cpu_lwp_fork() does for idle_loop() 861 1.62 cherry */ 862 1.2 bouyer 863 1.62 cherry initctx->user_regs.rdi = (uint64_t) ci; /* targetrip(ci); */ 864 1.62 cherry initctx->user_regs.rip = (vaddr_t) targetrip; 865 1.2 bouyer 866 1.62 cherry initctx->user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); 867 1.11 cegger 868 1.62 cherry initctx->user_regs.rflags = pcb->pcb_flags; 869 1.62 cherry initctx->user_regs.rsp = pcb->pcb_rsp; 870 1.11 cegger 871 1.62 cherry /* Data segments */ 872 1.62 cherry initctx->user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); 873 1.62 cherry initctx->user_regs.es = GSEL(GDATA_SEL, SEL_KPL); 874 1.62 cherry initctx->user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); 875 1.62 cherry 876 1.62 cherry /* GDT */ 877 1.105 maxv memcpy(initctx->gdt_frames, frames, sizeof(frames)); 878 1.62 cherry initctx->gdt_ents = gdt_ents; 879 1.62 cherry 880 1.62 cherry /* LDT */ 881 1.105 maxv initctx->ldt_base = (unsigned long)ldtstore; 882 1.62 cherry initctx->ldt_ents = LDT_SIZE >> 3; 883 1.62 cherry 884 1.62 cherry /* Kernel context state */ 885 1.62 cherry initctx->kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 886 1.62 cherry initctx->kernel_sp = pcb->pcb_rsp0; 887 1.62 cherry initctx->ctrlreg[0] = pcb->pcb_cr0; 888 1.62 cherry initctx->ctrlreg[1] = 0; /* "resuming" from kernel - no User cr3. */ 889 1.105 maxv initctx->ctrlreg[2] = (vaddr_t)targetrip; 890 1.105 maxv /* 891 1.62 cherry * Use pmap_kernel() L4 PD directly, until we setup the 892 1.62 cherry * per-cpu L4 PD in pmap_cpu_init_late() 893 1.2 bouyer */ 894 1.70 cherry initctx->ctrlreg[3] = xen_pfn_to_cr3(x86_btop(xpmap_ptom(ci->ci_kpm_pdirpa))); 895 1.62 cherry initctx->ctrlreg[4] = CR4_PAE | CR4_OSFXSR | CR4_OSXMMEXCPT; 896 1.2 bouyer 897 1.62 cherry /* Xen callbacks */ 898 1.105 maxv initctx->event_callback_eip = (unsigned long)hypervisor_callback; 899 1.105 maxv initctx->failsafe_callback_eip = (unsigned long)failsafe_callback; 900 1.105 maxv initctx->syscall_callback_eip = (unsigned long)Xsyscall; 901 1.62 cherry 902 1.62 cherry return; 903 1.2 bouyer } 904 1.62 cherry #else /* i386 */ 905 1.108 maxv extern union descriptor *ldtstore; 906 1.62 cherry extern void Xsyscall(void); 907 1.62 cherry 908 1.11 cegger static void 909 1.105 maxv xen_init_i386_vcpuctxt(struct cpu_info *ci, struct vcpu_guest_context *initctx, 910 1.105 maxv void targeteip(struct cpu_info *)) 911 1.62 cherry { 912 1.62 cherry /* page frames to point at GDT */ 913 1.62 cherry extern int gdt_size; 914 1.62 cherry paddr_t frames[16]; 915 1.62 cherry psize_t gdt_ents; 916 1.62 cherry 917 1.62 cherry struct lwp *l; 918 1.62 cherry struct pcb *pcb; 919 1.62 cherry 920 1.62 cherry volatile struct vcpu_info *vci; 921 1.62 cherry 922 1.62 cherry KASSERT(ci != NULL); 923 1.62 cherry KASSERT(ci != &cpu_info_primary); 924 1.62 cherry KASSERT(initctx != NULL); 925 1.62 cherry KASSERT(targeteip != NULL); 926 1.62 cherry 927 1.105 maxv memset(initctx, 0, sizeof(*initctx)); 928 1.11 cegger 929 1.85 cherry gdt_ents = roundup(gdt_size, PAGE_SIZE) >> PAGE_SHIFT; 930 1.62 cherry KASSERT(gdt_ents <= 16); 931 1.2 bouyer 932 1.105 maxv gdt_prepframes(frames, (vaddr_t)ci->ci_gdt, gdt_ents); 933 1.2 bouyer 934 1.145 riastrad /* 935 1.145 riastrad * Initialise the vcpu context: 936 1.62 cherry * We use this cpu's idle_loop() pcb context. 937 1.11 cegger */ 938 1.11 cegger 939 1.62 cherry l = ci->ci_data.cpu_idlelwp; 940 1.62 cherry 941 1.62 cherry KASSERT(l != NULL); 942 1.62 cherry pcb = lwp_getpcb(l); 943 1.62 cherry KASSERT(pcb != NULL); 944 1.62 cherry 945 1.62 cherry /* resume with interrupts off */ 946 1.62 cherry vci = ci->ci_vcpu; 947 1.62 cherry vci->evtchn_upcall_mask = 1; 948 1.144 riastrad __insn_barrier(); 949 1.62 cherry 950 1.62 cherry /* resume in kernel-mode */ 951 1.62 cherry initctx->flags = VGCF_in_kernel | VGCF_online; 952 1.62 cherry 953 1.62 cherry /* Stack frame setup for cpu_hatch(): 954 1.62 cherry * We arrange for the stack frame for cpu_hatch() to 955 1.62 cherry * appear as a callee frame of lwp_trampoline(). Being a 956 1.62 cherry * leaf frame prevents trampling on any of the MD stack setup 957 1.62 cherry * that x86/vm_machdep.c:cpu_lwp_fork() does for idle_loop() 958 1.2 bouyer */ 959 1.2 bouyer 960 1.62 cherry initctx->user_regs.esp = pcb->pcb_esp - 4; /* Leave word for 961 1.62 cherry arg1 */ 962 1.105 maxv { 963 1.105 maxv /* targeteip(ci); */ 964 1.105 maxv uint32_t *arg = (uint32_t *)initctx->user_regs.esp; 965 1.105 maxv arg[1] = (uint32_t)ci; /* arg1 */ 966 1.62 cherry } 967 1.2 bouyer 968 1.105 maxv initctx->user_regs.eip = (vaddr_t)targeteip; 969 1.62 cherry initctx->user_regs.cs = GSEL(GCODE_SEL, SEL_KPL); 970 1.62 cherry initctx->user_regs.eflags |= pcb->pcb_iopl; 971 1.62 cherry 972 1.62 cherry /* Data segments */ 973 1.62 cherry initctx->user_regs.ss = GSEL(GDATA_SEL, SEL_KPL); 974 1.62 cherry initctx->user_regs.es = GSEL(GDATA_SEL, SEL_KPL); 975 1.62 cherry initctx->user_regs.ds = GSEL(GDATA_SEL, SEL_KPL); 976 1.62 cherry initctx->user_regs.fs = GSEL(GDATA_SEL, SEL_KPL); 977 1.62 cherry 978 1.62 cherry /* GDT */ 979 1.105 maxv memcpy(initctx->gdt_frames, frames, sizeof(frames)); 980 1.62 cherry initctx->gdt_ents = gdt_ents; 981 1.62 cherry 982 1.62 cherry /* LDT */ 983 1.108 maxv initctx->ldt_base = (unsigned long)ldtstore; 984 1.62 cherry initctx->ldt_ents = NLDT; 985 1.62 cherry 986 1.62 cherry /* Kernel context state */ 987 1.62 cherry initctx->kernel_ss = GSEL(GDATA_SEL, SEL_KPL); 988 1.62 cherry initctx->kernel_sp = pcb->pcb_esp0; 989 1.62 cherry initctx->ctrlreg[0] = pcb->pcb_cr0; 990 1.62 cherry initctx->ctrlreg[1] = 0; /* "resuming" from kernel - no User cr3. */ 991 1.105 maxv initctx->ctrlreg[2] = (vaddr_t)targeteip; 992 1.70 cherry initctx->ctrlreg[3] = xen_pfn_to_cr3(x86_btop(xpmap_ptom(ci->ci_pae_l3_pdirpa))); 993 1.105 maxv initctx->ctrlreg[4] = /* CR4_PAE | */CR4_OSFXSR | CR4_OSXMMEXCPT; 994 1.2 bouyer 995 1.62 cherry /* Xen callbacks */ 996 1.105 maxv initctx->event_callback_eip = (unsigned long)hypervisor_callback; 997 1.62 cherry initctx->event_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 998 1.105 maxv initctx->failsafe_callback_eip = (unsigned long)failsafe_callback; 999 1.62 cherry initctx->failsafe_callback_cs = GSEL(GCODE_SEL, SEL_KPL); 1000 1.45 rmind 1001 1.62 cherry return; 1002 1.62 cherry } 1003 1.62 cherry #endif /* __x86_64__ */ 1004 1.45 rmind 1005 1.62 cherry int 1006 1.62 cherry mp_cpu_start(struct cpu_info *ci, vaddr_t target) 1007 1.62 cherry { 1008 1.62 cherry int hyperror; 1009 1.137 jdolecek struct vcpu_guest_context *vcpuctx; 1010 1.2 bouyer 1011 1.62 cherry KASSERT(ci != NULL); 1012 1.62 cherry KASSERT(ci != &cpu_info_primary); 1013 1.62 cherry KASSERT(ci->ci_flags & CPUF_AP); 1014 1.62 cherry 1015 1.137 jdolecek vcpuctx = kmem_alloc(sizeof(*vcpuctx), KM_SLEEP); 1016 1.137 jdolecek 1017 1.62 cherry #ifdef __x86_64__ 1018 1.137 jdolecek xen_init_amd64_vcpuctxt(ci, vcpuctx, (void (*)(struct cpu_info *))target); 1019 1.105 maxv #else 1020 1.137 jdolecek xen_init_i386_vcpuctxt(ci, vcpuctx, (void (*)(struct cpu_info *))target); 1021 1.105 maxv #endif 1022 1.62 cherry 1023 1.62 cherry /* Initialise the given vcpu to execute cpu_hatch(ci); */ 1024 1.137 jdolecek if ((hyperror = HYPERVISOR_vcpu_op(VCPUOP_initialise, ci->ci_vcpuid, vcpuctx))) { 1025 1.62 cherry aprint_error(": context initialisation failed. errno = %d\n", hyperror); 1026 1.137 jdolecek goto out; 1027 1.62 cherry } 1028 1.62 cherry 1029 1.62 cherry /* Start it up */ 1030 1.62 cherry 1031 1.70 cherry /* First bring it down */ 1032 1.135 bouyer if ((hyperror = HYPERVISOR_vcpu_op(VCPUOP_down, ci->ci_vcpuid, NULL))) { 1033 1.62 cherry aprint_error(": VCPUOP_down hypervisor command failed. errno = %d\n", hyperror); 1034 1.137 jdolecek goto out; 1035 1.62 cherry } 1036 1.62 cherry 1037 1.135 bouyer if ((hyperror = HYPERVISOR_vcpu_op(VCPUOP_up, ci->ci_vcpuid, NULL))) { 1038 1.62 cherry aprint_error(": VCPUOP_up hypervisor command failed. errno = %d\n", hyperror); 1039 1.137 jdolecek goto out; 1040 1.62 cherry } 1041 1.2 bouyer 1042 1.62 cherry if (!vcpu_is_up(ci)) { 1043 1.62 cherry aprint_error(": did not come up\n"); 1044 1.137 jdolecek hyperror = -1; 1045 1.137 jdolecek goto out; 1046 1.2 bouyer } 1047 1.62 cherry 1048 1.137 jdolecek out: 1049 1.137 jdolecek kmem_free(vcpuctx, sizeof(*vcpuctx)); 1050 1.137 jdolecek return hyperror; 1051 1.2 bouyer } 1052 1.2 bouyer 1053 1.2 bouyer void 1054 1.2 bouyer mp_cpu_start_cleanup(struct cpu_info *ci) 1055 1.2 bouyer { 1056 1.62 cherry if (vcpu_is_up(ci)) { 1057 1.62 cherry aprint_debug_dev(ci->ci_dev, "is started.\n"); 1058 1.105 maxv } else { 1059 1.62 cherry aprint_error_dev(ci->ci_dev, "did not start up.\n"); 1060 1.62 cherry } 1061 1.2 bouyer } 1062 1.2 bouyer 1063 1.2 bouyer void 1064 1.3 bouyer cpu_init_msrs(struct cpu_info *ci, bool full) 1065 1.2 bouyer { 1066 1.43 jym #ifdef __x86_64__ 1067 1.3 bouyer if (full) { 1068 1.105 maxv HYPERVISOR_set_segment_base(SEGBASE_FS, 0); 1069 1.105 maxv HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL, (uint64_t)ci); 1070 1.105 maxv HYPERVISOR_set_segment_base(SEGBASE_GS_USER, 0); 1071 1.3 bouyer } 1072 1.105 maxv #endif 1073 1.44 jym 1074 1.44 jym if (cpu_feature[2] & CPUID_NOX) 1075 1.44 jym wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE); 1076 1.2 bouyer } 1077 1.2 bouyer 1078 1.95 christos void 1079 1.95 christos cpu_offline_md(void) 1080 1.95 christos { 1081 1.130 maxv return; 1082 1.95 christos } 1083 1.95 christos 1084 1.105 maxv void 1085 1.2 bouyer cpu_get_tsc_freq(struct cpu_info *ci) 1086 1.2 bouyer { 1087 1.62 cherry uint32_t vcpu_tversion; 1088 1.16 cegger const volatile vcpu_time_info_t *tinfo = &ci->ci_vcpu->time; 1089 1.62 cherry 1090 1.62 cherry vcpu_tversion = tinfo->version; 1091 1.62 cherry while (tinfo->version == vcpu_tversion); /* Wait for a time update. XXX: timeout ? */ 1092 1.62 cherry 1093 1.2 bouyer uint64_t freq = 1000000000ULL << 32; 1094 1.2 bouyer freq = freq / (uint64_t)tinfo->tsc_to_system_mul; 1095 1.105 maxv if (tinfo->tsc_shift < 0) 1096 1.2 bouyer freq = freq << -tinfo->tsc_shift; 1097 1.2 bouyer else 1098 1.2 bouyer freq = freq >> tinfo->tsc_shift; 1099 1.20 ad ci->ci_data.cpu_cc_freq = freq; 1100 1.2 bouyer } 1101 1.19 joerg 1102 1.47 jym /* 1103 1.47 jym * Loads pmap for the current CPU. 1104 1.47 jym */ 1105 1.47 jym void 1106 1.81 bouyer cpu_load_pmap(struct pmap *pmap, struct pmap *oldpmap) 1107 1.47 jym { 1108 1.81 bouyer struct cpu_info *ci = curcpu(); 1109 1.92 rmind cpuid_t cid = cpu_index(ci); 1110 1.125 maxv int i; 1111 1.81 bouyer 1112 1.143 riastrad KASSERT(kpreempt_disabled()); 1113 1.124 maxv KASSERT(pmap != pmap_kernel()); 1114 1.124 maxv 1115 1.81 bouyer mutex_enter(&ci->ci_kpm_mtx); 1116 1.93 jym /* make new pmap visible to xen_kpm_sync() */ 1117 1.92 rmind kcpuset_atomic_set(pmap->pm_xen_ptp_cpus, cid); 1118 1.105 maxv 1119 1.47 jym #ifdef __x86_64__ 1120 1.125 maxv pd_entry_t *new_pgd; 1121 1.125 maxv paddr_t l4_pd_ma; 1122 1.81 bouyer 1123 1.125 maxv l4_pd_ma = xpmap_ptom_masked(ci->ci_kpm_pdirpa); 1124 1.47 jym 1125 1.125 maxv /* 1126 1.125 maxv * Map user space address in kernel space and load 1127 1.125 maxv * user cr3 1128 1.125 maxv */ 1129 1.125 maxv new_pgd = pmap->pm_pdir; 1130 1.125 maxv KASSERT(pmap == ci->ci_pmap); 1131 1.70 cherry 1132 1.125 maxv /* Copy user pmap L4 PDEs (in user addr. range) to per-cpu L4 */ 1133 1.126 maxv for (i = 0; i < PDIR_SLOT_USERLIM; i++) { 1134 1.125 maxv KASSERT(pmap != pmap_kernel() || new_pgd[i] == 0); 1135 1.125 maxv if (ci->ci_kpm_pdir[i] != new_pgd[i]) { 1136 1.125 maxv xpq_queue_pte_update(l4_pd_ma + i * sizeof(pd_entry_t), 1137 1.125 maxv new_pgd[i]); 1138 1.81 bouyer } 1139 1.125 maxv } 1140 1.70 cherry 1141 1.125 maxv xen_set_user_pgd(pmap_pdirpa(pmap, 0)); 1142 1.125 maxv ci->ci_xen_current_user_pgd = pmap_pdirpa(pmap, 0); 1143 1.125 maxv #else 1144 1.125 maxv paddr_t l3_pd = xpmap_ptom_masked(ci->ci_pae_l3_pdirpa); 1145 1.125 maxv /* don't update the kernel L3 slot */ 1146 1.125 maxv for (i = 0; i < PDP_SIZE - 1; i++) { 1147 1.125 maxv xpq_queue_pte_update(l3_pd + i * sizeof(pd_entry_t), 1148 1.129 maxv xpmap_ptom(pmap->pm_pdirpa[i]) | PTE_P); 1149 1.70 cherry } 1150 1.124 maxv #endif 1151 1.70 cherry 1152 1.125 maxv tlbflush(); 1153 1.125 maxv 1154 1.93 jym /* old pmap no longer visible to xen_kpm_sync() */ 1155 1.92 rmind if (oldpmap != pmap_kernel()) { 1156 1.92 rmind kcpuset_atomic_clear(oldpmap->pm_xen_ptp_cpus, cid); 1157 1.92 rmind } 1158 1.81 bouyer mutex_exit(&ci->ci_kpm_mtx); 1159 1.47 jym } 1160 1.61 cherry 1161 1.105 maxv /* 1162 1.105 maxv * pmap_cpu_init_late: perform late per-CPU initialization. 1163 1.145 riastrad * 1164 1.105 maxv * Short note about percpu PDIR pages. Both the PAE and __x86_64__ architectures 1165 1.105 maxv * have per-cpu PDIR tables, for two different reasons: 1166 1.105 maxv * - on PAE, this is to get around Xen's pagetable setup constraints (multiple 1167 1.105 maxv * L3[3]s cannot point to the same L2 - Xen will refuse to pin a table set up 1168 1.105 maxv * this way). 1169 1.105 maxv * - on __x86_64__, this is for multiple CPUs to map in different user pmaps 1170 1.105 maxv * (see cpu_load_pmap()). 1171 1.105 maxv * 1172 1.105 maxv * What this means for us is that the PDIR of the pmap_kernel() is considered 1173 1.145 riastrad * to be a canonical "SHADOW" PDIR with the following properties: 1174 1.105 maxv * - its recursive mapping points to itself 1175 1.105 maxv * - per-cpu recursive mappings point to themselves on __x86_64__ 1176 1.105 maxv * - per-cpu L4 pages' kernel entries are expected to be in sync with 1177 1.105 maxv * the shadow 1178 1.105 maxv */ 1179 1.70 cherry 1180 1.70 cherry void 1181 1.70 cherry pmap_cpu_init_late(struct cpu_info *ci) 1182 1.70 cherry { 1183 1.125 maxv int i; 1184 1.125 maxv 1185 1.70 cherry /* 1186 1.70 cherry * The BP has already its own PD page allocated during early 1187 1.70 cherry * MD startup. 1188 1.70 cherry */ 1189 1.70 cherry 1190 1.124 maxv #ifdef __x86_64__ 1191 1.78 cherry /* Setup per-cpu normal_pdes */ 1192 1.78 cherry extern pd_entry_t * const normal_pdes[]; 1193 1.78 cherry for (i = 0;i < PTP_LEVELS - 1;i++) { 1194 1.78 cherry ci->ci_normal_pdes[i] = normal_pdes[i]; 1195 1.78 cherry } 1196 1.124 maxv #endif 1197 1.78 cherry 1198 1.70 cherry if (ci == &cpu_info_primary) 1199 1.70 cherry return; 1200 1.70 cherry 1201 1.70 cherry KASSERT(ci != NULL); 1202 1.70 cherry 1203 1.124 maxv #if defined(i386) 1204 1.73 cherry cpu_alloc_l3_page(ci); 1205 1.70 cherry KASSERT(ci->ci_pae_l3_pdirpa != 0); 1206 1.70 cherry 1207 1.70 cherry /* Initialise L2 entries 0 - 2: Point them to pmap_kernel() */ 1208 1.125 maxv for (i = 0; i < PDP_SIZE - 1; i++) { 1209 1.73 cherry ci->ci_pae_l3_pdir[i] = 1210 1.129 maxv xpmap_ptom_masked(pmap_kernel()->pm_pdirpa[i]) | PTE_P; 1211 1.73 cherry } 1212 1.124 maxv #endif 1213 1.70 cherry 1214 1.70 cherry ci->ci_kpm_pdir = (pd_entry_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 1215 1.70 cherry UVM_KMF_WIRED | UVM_KMF_ZERO | UVM_KMF_NOWAIT); 1216 1.70 cherry 1217 1.70 cherry if (ci->ci_kpm_pdir == NULL) { 1218 1.70 cherry panic("%s: failed to allocate L4 per-cpu PD for CPU %d\n", 1219 1.105 maxv __func__, cpu_index(ci)); 1220 1.70 cherry } 1221 1.105 maxv ci->ci_kpm_pdirpa = vtophys((vaddr_t)ci->ci_kpm_pdir); 1222 1.70 cherry KASSERT(ci->ci_kpm_pdirpa != 0); 1223 1.70 cherry 1224 1.124 maxv #ifdef __x86_64__ 1225 1.106 maxv extern pt_entry_t xpmap_pg_nx; 1226 1.70 cherry 1227 1.106 maxv /* Copy over the pmap_kernel() shadow L4 entries */ 1228 1.70 cherry memcpy(ci->ci_kpm_pdir, pmap_kernel()->pm_pdir, PAGE_SIZE); 1229 1.70 cherry 1230 1.70 cherry /* Recursive kernel mapping */ 1231 1.105 maxv ci->ci_kpm_pdir[PDIR_SLOT_PTE] = xpmap_ptom_masked(ci->ci_kpm_pdirpa) 1232 1.129 maxv | PTE_P | xpmap_pg_nx; 1233 1.124 maxv #else 1234 1.106 maxv /* Copy over the pmap_kernel() shadow L2 entries */ 1235 1.105 maxv memcpy(ci->ci_kpm_pdir, pmap_kernel()->pm_pdir + PDIR_SLOT_KERN, 1236 1.105 maxv nkptp[PTP_LEVELS - 1] * sizeof(pd_entry_t)); 1237 1.106 maxv #endif 1238 1.70 cherry 1239 1.105 maxv /* Xen wants a RO pdir. */ 1240 1.83 bouyer pmap_protect(pmap_kernel(), (vaddr_t)ci->ci_kpm_pdir, 1241 1.83 bouyer (vaddr_t)ci->ci_kpm_pdir + PAGE_SIZE, VM_PROT_READ); 1242 1.83 bouyer pmap_update(pmap_kernel()); 1243 1.124 maxv 1244 1.124 maxv #ifdef __x86_64__ 1245 1.124 maxv xpq_queue_pin_l4_table(xpmap_ptom_masked(ci->ci_kpm_pdirpa)); 1246 1.124 maxv #else 1247 1.105 maxv /* 1248 1.105 maxv * Initialize L3 entry 3. This mapping is shared across all pmaps and is 1249 1.105 maxv * static, ie: loading a new pmap will not update this entry. 1250 1.70 cherry */ 1251 1.129 maxv ci->ci_pae_l3_pdir[3] = xpmap_ptom_masked(ci->ci_kpm_pdirpa) | PTE_P; 1252 1.70 cherry 1253 1.105 maxv /* Xen wants a RO L3. */ 1254 1.83 bouyer pmap_protect(pmap_kernel(), (vaddr_t)ci->ci_pae_l3_pdir, 1255 1.83 bouyer (vaddr_t)ci->ci_pae_l3_pdir + PAGE_SIZE, VM_PROT_READ); 1256 1.83 bouyer pmap_update(pmap_kernel()); 1257 1.70 cherry 1258 1.70 cherry xpq_queue_pin_l3_table(xpmap_ptom_masked(ci->ci_pae_l3_pdirpa)); 1259 1.124 maxv #endif 1260 1.70 cherry } 1261 1.70 cherry 1262 1.61 cherry /* 1263 1.61 cherry * Notify all other cpus to halt. 1264 1.61 cherry */ 1265 1.61 cherry 1266 1.61 cherry void 1267 1.61 cherry cpu_broadcast_halt(void) 1268 1.61 cherry { 1269 1.61 cherry xen_broadcast_ipi(XEN_IPI_HALT); 1270 1.61 cherry } 1271 1.61 cherry 1272 1.61 cherry /* 1273 1.131 ad * Send a dummy ipi to a cpu, and raise an AST on the running LWP. 1274 1.61 cherry */ 1275 1.61 cherry 1276 1.61 cherry void 1277 1.61 cherry cpu_kick(struct cpu_info *ci) 1278 1.61 cherry { 1279 1.131 ad (void)xen_send_ipi(ci, XEN_IPI_AST); 1280 1.61 cherry } 1281