1 /* $NetBSD: x86_machdep.c,v 1.159 2025/07/14 21:34:48 bouyer Exp $ */ 2 3 /*- 4 * Copyright (c) 2002, 2006, 2007 YAMAMOTO Takashi, 5 * Copyright (c) 2005, 2008, 2009, 2019, 2023 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * This code is derived from software contributed to The NetBSD Foundation 9 * by Julio M. Merino Vidal, and Andrew Doran. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __KERNEL_RCSID(0, "$NetBSD: x86_machdep.c,v 1.159 2025/07/14 21:34:48 bouyer Exp $"); 35 36 #include "opt_modular.h" 37 #include "opt_physmem.h" 38 #include "opt_splash.h" 39 #include "opt_kaslr.h" 40 #include "opt_svs.h" 41 #include "opt_xen.h" 42 43 #include <sys/types.h> 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kcore.h> 47 #include <sys/errno.h> 48 #include <sys/kauth.h> 49 #include <sys/mutex.h> 50 #include <sys/cpu.h> 51 #include <sys/intr.h> 52 #include <sys/atomic.h> 53 #include <sys/module.h> 54 #include <sys/sysctl.h> 55 #include <sys/extent.h> 56 #include <sys/rnd.h> 57 58 #include <x86/bootspace.h> 59 #include <x86/cpuvar.h> 60 #include <x86/cputypes.h> 61 #include <x86/efi.h> 62 #include <x86/machdep.h> 63 #include <x86/nmi.h> 64 #include <x86/pio.h> 65 66 #include <dev/splash/splash.h> 67 #include <dev/isa/isareg.h> 68 #include <dev/ic/i8042reg.h> 69 #include <dev/mm.h> 70 71 #include <machine/bootinfo.h> 72 #include <machine/pmap_private.h> 73 #include <machine/vmparam.h> 74 75 #include <uvm/uvm_extern.h> 76 77 #include "tsc.h" 78 79 #include "acpica.h" 80 #include "ioapic.h" 81 #include "lapic.h" 82 83 #if NACPICA > 0 84 #include <dev/acpi/acpivar.h> 85 #endif 86 87 #if NIOAPIC > 0 || NACPICA > 0 88 #include <machine/i82093var.h> 89 #endif 90 91 #include "opt_md.h" 92 #if defined(MEMORY_DISK_HOOKS) && defined(MEMORY_DISK_DYNAMIC) 93 #include <dev/md.h> 94 #endif 95 96 void (*x86_cpu_idle)(void); 97 static bool x86_cpu_idle_ipi; 98 static char x86_cpu_idle_text[16]; 99 100 static bool x86_user_ldt_enabled __read_mostly = false; 101 102 #ifdef XEN 103 104 #include <xen/xen.h> 105 #include <xen/hypervisor.h> 106 #endif 107 108 #ifndef XENPV 109 void (*delay_func)(unsigned int) = i8254_delay; 110 void (*x86_initclock_func)(void) = i8254_initclocks; 111 #else /* XENPV */ 112 void (*delay_func)(unsigned int) = xen_delay; 113 void (*x86_initclock_func)(void) = xen_initclocks; 114 #endif 115 116 117 /* --------------------------------------------------------------------- */ 118 119 /* 120 * Main bootinfo structure. This is filled in by the bootstrap process 121 * done in locore.S based on the information passed by the boot loader. 122 */ 123 struct bootinfo bootinfo; 124 125 /* --------------------------------------------------------------------- */ 126 127 bool bootmethod_efi; 128 129 static kauth_listener_t x86_listener; 130 131 extern paddr_t lowmem_rsvd, avail_start, avail_end; 132 133 vaddr_t msgbuf_vaddr; 134 135 struct msgbuf_p_seg msgbuf_p_seg[VM_PHYSSEG_MAX]; 136 137 unsigned int msgbuf_p_cnt = 0; 138 139 void init_x86_msgbuf(void); 140 141 /* 142 * Given the type of a bootinfo entry, looks for a matching item inside 143 * the bootinfo structure. If found, returns a pointer to it (which must 144 * then be casted to the appropriate bootinfo_* type); otherwise, returns 145 * NULL. 146 */ 147 void * 148 lookup_bootinfo(int type) 149 { 150 bool found; 151 int i; 152 struct btinfo_common *bic; 153 154 bic = (struct btinfo_common *)(bootinfo.bi_data); 155 found = FALSE; 156 for (i = 0; i < bootinfo.bi_nentries && !found; i++) { 157 if (bic->type == type) 158 found = TRUE; 159 else 160 bic = (struct btinfo_common *) 161 ((uint8_t *)bic + bic->len); 162 } 163 164 return found ? bic : NULL; 165 } 166 167 #ifdef notyet 168 /* 169 * List the available bootinfo entries. 170 */ 171 static const char *btinfo_str[] = { 172 BTINFO_STR 173 }; 174 175 void 176 aprint_bootinfo(void) 177 { 178 int i; 179 struct btinfo_common *bic; 180 181 aprint_normal("bootinfo:"); 182 bic = (struct btinfo_common *)(bootinfo.bi_data); 183 for (i = 0; i < bootinfo.bi_nentries; i++) { 184 if (bic->type >= 0 && bic->type < __arraycount(btinfo_str)) 185 aprint_normal(" %s", btinfo_str[bic->type]); 186 else 187 aprint_normal(" %d", bic->type); 188 bic = (struct btinfo_common *) 189 ((uint8_t *)bic + bic->len); 190 } 191 aprint_normal("\n"); 192 } 193 #endif 194 195 /* 196 * mm_md_physacc: check if given pa is accessible. 197 */ 198 int 199 mm_md_physacc(paddr_t pa, vm_prot_t prot) 200 { 201 extern phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX]; 202 extern int mem_cluster_cnt; 203 int i; 204 205 for (i = 0; i < mem_cluster_cnt; i++) { 206 const phys_ram_seg_t *seg = &mem_clusters[i]; 207 paddr_t lstart = seg->start; 208 209 if (lstart <= pa && pa - lstart <= seg->size) { 210 return 0; 211 } 212 } 213 return kauth_authorize_machdep(kauth_cred_get(), 214 KAUTH_MACHDEP_UNMANAGEDMEM, NULL, NULL, NULL, NULL); 215 } 216 217 #ifdef MODULAR 218 /* 219 * Push any modules loaded by the boot loader. 220 */ 221 void 222 module_init_md(void) 223 { 224 struct btinfo_modulelist *biml; 225 struct bi_modulelist_entry *bi, *bimax; 226 227 biml = lookup_bootinfo(BTINFO_MODULELIST); 228 if (biml == NULL) { 229 aprint_debug("No module info at boot\n"); 230 return; 231 } 232 233 bi = (struct bi_modulelist_entry *)((uint8_t *)biml + sizeof(*biml)); 234 bimax = bi + biml->num; 235 for (; bi < bimax; bi++) { 236 switch (bi->type) { 237 case BI_MODULE_ELF: 238 aprint_debug("Prep module path=%s len=%d pa=%x\n", 239 bi->path, bi->len, bi->base); 240 KASSERT(trunc_page(bi->base) == bi->base); 241 module_prime(bi->path, 242 #ifdef KASLR 243 (void *)PMAP_DIRECT_MAP((uintptr_t)bi->base), 244 #else 245 (void *)((uintptr_t)bi->base + KERNBASE), 246 #endif 247 bi->len); 248 break; 249 case BI_MODULE_IMAGE: 250 #ifdef SPLASHSCREEN 251 aprint_debug("Splash image path=%s len=%d pa=%x\n", 252 bi->path, bi->len, bi->base); 253 KASSERT(trunc_page(bi->base) == bi->base); 254 splash_setimage( 255 #ifdef KASLR 256 (void *)PMAP_DIRECT_MAP((uintptr_t)bi->base), 257 #else 258 (void *)((uintptr_t)bi->base + KERNBASE), 259 #endif 260 bi->len); 261 #endif 262 break; 263 case BI_MODULE_RND: 264 /* handled in x86_rndseed */ 265 break; 266 case BI_MODULE_FS: 267 aprint_debug("File-system image path=%s len=%d pa=%x\n", 268 bi->path, bi->len, bi->base); 269 KASSERT(trunc_page(bi->base) == bi->base); 270 #if defined(MEMORY_DISK_HOOKS) && defined(MEMORY_DISK_DYNAMIC) 271 md_root_setconf( 272 #ifdef KASLR 273 (void *)PMAP_DIRECT_MAP((uintptr_t)bi->base), 274 #else 275 (void *)((uintptr_t)bi->base + KERNBASE), 276 #endif 277 bi->len); 278 #endif 279 break; 280 default: 281 aprint_debug("Skipping non-ELF module\n"); 282 break; 283 } 284 } 285 } 286 #endif /* MODULAR */ 287 288 void 289 x86_rndseed(void) 290 { 291 struct btinfo_modulelist *biml; 292 struct bi_modulelist_entry *bi, *bimax; 293 294 biml = lookup_bootinfo(BTINFO_MODULELIST); 295 if (biml == NULL) { 296 aprint_debug("No module info at boot\n"); 297 return; 298 } 299 300 bi = (struct bi_modulelist_entry *)((uint8_t *)biml + sizeof(*biml)); 301 bimax = bi + biml->num; 302 for (; bi < bimax; bi++) { 303 switch (bi->type) { 304 case BI_MODULE_RND: 305 aprint_debug("Random seed data path=%s len=%d pa=%x\n", 306 bi->path, bi->len, bi->base); 307 KASSERT(trunc_page(bi->base) == bi->base); 308 rnd_seed( 309 #ifdef KASLR 310 (void *)PMAP_DIRECT_MAP((uintptr_t)bi->base), 311 #else 312 (void *)((uintptr_t)bi->base + KERNBASE), 313 #endif 314 bi->len); 315 } 316 } 317 } 318 319 void 320 cpu_need_resched(struct cpu_info *ci, struct lwp *l, int flags) 321 { 322 323 KASSERT(kpreempt_disabled()); 324 325 if ((flags & RESCHED_IDLE) != 0) { 326 if ((flags & RESCHED_REMOTE) != 0 && 327 x86_cpu_idle_ipi != false) { 328 cpu_kick(ci); 329 } 330 return; 331 } 332 333 #ifdef __HAVE_PREEMPTION 334 if ((flags & RESCHED_KPREEMPT) != 0) { 335 if ((flags & RESCHED_REMOTE) != 0) { 336 #ifdef XENPV 337 xen_send_ipi(ci, XEN_IPI_KPREEMPT); 338 #else 339 x86_send_ipi(ci, X86_IPI_KPREEMPT); 340 #endif 341 } else { 342 softint_trigger(1 << SIR_PREEMPT); 343 } 344 return; 345 } 346 #endif 347 348 KASSERT((flags & RESCHED_UPREEMPT) != 0); 349 if ((flags & RESCHED_REMOTE) != 0) { 350 cpu_kick(ci); 351 } else { 352 aston(l); 353 } 354 } 355 356 void 357 cpu_signotify(struct lwp *l) 358 { 359 360 KASSERT(kpreempt_disabled()); 361 362 if (l->l_cpu != curcpu()) { 363 cpu_kick(l->l_cpu); 364 } else { 365 aston(l); 366 } 367 } 368 369 void 370 cpu_need_proftick(struct lwp *l) 371 { 372 373 KASSERT(kpreempt_disabled()); 374 KASSERT(l->l_cpu == curcpu()); 375 376 l->l_pflag |= LP_OWEUPC; 377 aston(l); 378 } 379 380 bool 381 cpu_intr_p(void) 382 { 383 int idepth; 384 long pctr; 385 lwp_t *l; 386 387 l = curlwp; 388 if (__predict_false(l->l_cpu == NULL)) { 389 KASSERT(l == &lwp0); 390 return false; 391 } 392 do { 393 pctr = lwp_pctr(); 394 idepth = l->l_cpu->ci_idepth; 395 } while (__predict_false(pctr != lwp_pctr())); 396 397 return idepth >= 0; 398 } 399 400 #ifdef __HAVE_PREEMPTION 401 /* 402 * Called to check MD conditions that would prevent preemption, and to 403 * arrange for those conditions to be rechecked later. 404 */ 405 bool 406 cpu_kpreempt_enter(uintptr_t where, int s) 407 { 408 struct pcb *pcb; 409 lwp_t *l; 410 411 KASSERT(kpreempt_disabled()); 412 l = curlwp; 413 414 /* 415 * If SPL raised, can't go. Note this implies that spin 416 * mutexes at IPL_NONE are _not_ valid to use. 417 */ 418 if (s > IPL_PREEMPT) { 419 softint_trigger(1 << SIR_PREEMPT); 420 return false; 421 } 422 423 /* Must save cr2 or it could be clobbered. */ 424 pcb = lwp_getpcb(l); 425 pcb->pcb_cr2 = rcr2(); 426 427 return true; 428 } 429 430 /* 431 * Called after returning from a kernel preemption, and called with 432 * preemption disabled. 433 */ 434 void 435 cpu_kpreempt_exit(uintptr_t where) 436 { 437 extern char x86_copyfunc_start, x86_copyfunc_end; 438 #if defined(XENPV) && defined(i386) 439 extern char i386_calltrap_start, i386_calltrap_end; 440 #endif 441 struct pcb *pcb; 442 443 KASSERT(kpreempt_disabled()); 444 445 /* 446 * If we interrupted any of the copy functions we must reload 447 * the pmap when resuming, as they cannot tolerate it being 448 * swapped out. 449 */ 450 if (where >= (uintptr_t)&x86_copyfunc_start && 451 where < (uintptr_t)&x86_copyfunc_end) { 452 pmap_load(); 453 } 454 #if defined(XENPV) && defined(i386) 455 else if (where >= (uintptr_t)&i386_calltrap_start && 456 where < (uintptr_t)&i386_calltrap_end) { 457 pmap_load(); 458 } 459 #endif 460 461 /* Restore cr2 only after the pmap, as pmap_load can block. */ 462 pcb = lwp_getpcb(curlwp); 463 lcr2(pcb->pcb_cr2); 464 } 465 466 /* 467 * Return true if preemption is disabled for MD reasons. Must be called 468 * with preemption disabled, and thus is only for diagnostic checks. 469 */ 470 bool 471 cpu_kpreempt_disabled(void) 472 { 473 474 return curcpu()->ci_ilevel > IPL_NONE; 475 } 476 #endif /* __HAVE_PREEMPTION */ 477 478 SYSCTL_SETUP(sysctl_machdep_cpu_idle, "sysctl machdep cpu_idle") 479 { 480 const struct sysctlnode *mnode, *node; 481 482 sysctl_createv(NULL, 0, NULL, &mnode, 483 CTLFLAG_PERMANENT, CTLTYPE_NODE, "machdep", NULL, 484 NULL, 0, NULL, 0, CTL_MACHDEP, CTL_EOL); 485 486 sysctl_createv(NULL, 0, &mnode, &node, 487 CTLFLAG_PERMANENT, CTLTYPE_STRING, "idle-mechanism", 488 SYSCTL_DESCR("Mechanism used for the idle loop."), 489 NULL, 0, x86_cpu_idle_text, 0, 490 CTL_CREATE, CTL_EOL); 491 } 492 493 void 494 x86_cpu_idle_init(void) 495 { 496 497 #ifndef XENPV 498 if ((cpu_feature[1] & CPUID2_MONITOR) == 0) 499 x86_cpu_idle_set(x86_cpu_idle_halt, "halt", true); 500 else 501 x86_cpu_idle_set(x86_cpu_idle_mwait, "mwait", false); 502 #else 503 x86_cpu_idle_set(x86_cpu_idle_xen, "xen", true); 504 #endif 505 } 506 507 void 508 x86_cpu_idle_get(void (**func)(void), char *text, size_t len) 509 { 510 511 *func = x86_cpu_idle; 512 513 (void)strlcpy(text, x86_cpu_idle_text, len); 514 } 515 516 void 517 x86_cpu_idle_set(void (*func)(void), const char *text, bool ipi) 518 { 519 520 x86_cpu_idle = func; 521 x86_cpu_idle_ipi = ipi; 522 523 (void)strlcpy(x86_cpu_idle_text, text, sizeof(x86_cpu_idle_text)); 524 } 525 526 #ifndef XENPV 527 528 #define KBTOB(x) ((size_t)(x) * 1024UL) 529 #define MBTOB(x) ((size_t)(x) * 1024UL * 1024UL) 530 531 static struct { 532 int freelist; 533 uint64_t limit; 534 } x86_freelists[VM_NFREELIST] = { 535 { VM_FREELIST_DEFAULT, 0 }, 536 #ifdef VM_FREELIST_FIRST1T 537 /* 40-bit addresses needed for modern graphics. */ 538 { VM_FREELIST_FIRST1T, 1ULL * 1024 * 1024 * 1024 * 1024 }, 539 #endif 540 #ifdef VM_FREELIST_FIRST64G 541 /* 36-bit addresses needed for oldish graphics. */ 542 { VM_FREELIST_FIRST64G, 64ULL * 1024 * 1024 * 1024 }, 543 #endif 544 #ifdef VM_FREELIST_FIRST4G 545 /* 32-bit addresses needed for PCI 32-bit DMA and old graphics. */ 546 { VM_FREELIST_FIRST4G, 4ULL * 1024 * 1024 * 1024 }, 547 #endif 548 /* 30-bit addresses needed for ancient graphics. */ 549 { VM_FREELIST_FIRST1G, 1ULL * 1024 * 1024 * 1024 }, 550 /* 24-bit addresses needed for ISA DMA. */ 551 { VM_FREELIST_FIRST16, 16 * 1024 * 1024 }, 552 }; 553 554 int 555 x86_select_freelist(uint64_t maxaddr) 556 { 557 unsigned int i; 558 559 if (avail_end <= maxaddr) 560 return VM_NFREELIST; 561 562 for (i = 0; i < __arraycount(x86_freelists); i++) { 563 if ((x86_freelists[i].limit - 1) <= maxaddr) 564 return x86_freelists[i].freelist; 565 } 566 567 panic("no freelist for maximum address %"PRIx64, maxaddr); 568 } 569 570 static int 571 x86_add_cluster(uint64_t seg_start, uint64_t seg_end, uint32_t type) 572 { 573 extern struct extent *iomem_ex; 574 const uint64_t endext = MAXIOMEM + 1; 575 uint64_t new_physmem = 0; 576 phys_ram_seg_t *cluster; 577 int i; 578 579 if (seg_end > MAXPHYSMEM) { 580 aprint_verbose("WARNING: skipping large memory map entry: " 581 "0x%"PRIx64"/0x%"PRIx64"/0x%x\n", 582 seg_start, (seg_end - seg_start), type); 583 return 0; 584 } 585 586 /* 587 * XXX: Chop the last page off the size so that it can fit in avail_end. 588 */ 589 if (seg_end == MAXPHYSMEM) 590 seg_end -= PAGE_SIZE; 591 592 if (seg_end <= seg_start) 593 return 0; 594 595 for (i = 0; i < mem_cluster_cnt; i++) { 596 cluster = &mem_clusters[i]; 597 if ((cluster->start == round_page(seg_start)) && 598 (cluster->size == trunc_page(seg_end) - cluster->start)) { 599 #ifdef DEBUG_MEMLOAD 600 printf("WARNING: skipping duplicate segment entry\n"); 601 #endif 602 return 0; 603 } 604 } 605 606 /* 607 * This cluster is used by RAM. If it is included in the iomem extent, 608 * allocate it from there, so that we won't unintentionally reuse it 609 * later with extent_alloc_region. A way to avoid collision (with UVM 610 * for example). 611 * 612 * This is done before the addresses are page rounded just to make 613 * sure we get them all. 614 */ 615 if (seg_start < endext) { 616 uint64_t io_end; 617 618 if (seg_end > endext) 619 io_end = endext; 620 else 621 io_end = seg_end; 622 623 if (iomem_ex != NULL && extent_alloc_region(iomem_ex, seg_start, 624 io_end - seg_start, EX_NOWAIT)) { 625 /* XXX What should we do? */ 626 printf("WARNING: CAN't ALLOCATE MEMORY SEGMENT " 627 "(0x%"PRIx64"/0x%"PRIx64"/0x%x) FROM " 628 "IOMEM EXTENT MAP!\n", 629 seg_start, seg_end - seg_start, type); 630 return 0; 631 } 632 } 633 634 /* If it's not free memory, skip it. */ 635 if (type != BIM_Memory) 636 return 0; 637 638 if (mem_cluster_cnt >= VM_PHYSSEG_MAX) { 639 printf("WARNING: too many memory segments" 640 "(increase VM_PHYSSEG_MAX)"); 641 return -1; 642 } 643 644 #ifdef PHYSMEM_MAX_ADDR 645 if (seg_start >= MBTOB(PHYSMEM_MAX_ADDR)) 646 return 0; 647 if (seg_end > MBTOB(PHYSMEM_MAX_ADDR)) 648 seg_end = MBTOB(PHYSMEM_MAX_ADDR); 649 #endif 650 651 seg_start = round_page(seg_start); 652 seg_end = trunc_page(seg_end); 653 654 if (seg_start == seg_end) 655 return 0; 656 657 cluster = &mem_clusters[mem_cluster_cnt]; 658 cluster->start = seg_start; 659 if (iomem_ex != NULL) 660 new_physmem = physmem + atop(seg_end - seg_start); 661 662 #ifdef PHYSMEM_MAX_SIZE 663 if (iomem_ex != NULL) { 664 if (physmem >= atop(MBTOB(PHYSMEM_MAX_SIZE))) 665 return 0; 666 if (new_physmem > atop(MBTOB(PHYSMEM_MAX_SIZE))) { 667 seg_end = seg_start + MBTOB(PHYSMEM_MAX_SIZE) - ptoa(physmem); 668 new_physmem = atop(MBTOB(PHYSMEM_MAX_SIZE)); 669 } 670 } 671 #endif 672 673 cluster->size = seg_end - seg_start; 674 675 if (iomem_ex != NULL) { 676 if (avail_end < seg_end) 677 avail_end = seg_end; 678 physmem = new_physmem; 679 } 680 mem_cluster_cnt++; 681 682 return 0; 683 } 684 685 static int 686 x86_parse_clusters(struct btinfo_memmap *bim) 687 { 688 uint64_t seg_start, seg_end; 689 uint64_t addr, size; 690 uint32_t type; 691 int x; 692 693 KASSERT(bim != NULL); 694 KASSERT(bim->num > 0); 695 696 #ifdef DEBUG_MEMLOAD 697 printf("MEMMAP: %s MEMORY MAP (%d ENTRIES):\n", 698 lookup_bootinfo(BTINFO_EFIMEMMAP) != NULL ? "UEFI" : "BIOS", 699 bim->num); 700 #endif 701 702 for (x = 0; x < bim->num; x++) { 703 addr = bim->entry[x].addr; 704 size = bim->entry[x].size; 705 type = bim->entry[x].type; 706 #ifdef DEBUG_MEMLOAD 707 printf("MEMMAP: 0x%016" PRIx64 "-0x%016" PRIx64 708 "\n\tsize=0x%016" PRIx64 ", type=%d(%s)\n", 709 addr, addr + size - 1, size, type, 710 (type == BIM_Memory) ? "Memory" : 711 (type == BIM_Reserved) ? "Reserved" : 712 (type == BIM_ACPI) ? "ACPI" : 713 (type == BIM_NVS) ? "NVS" : 714 (type == BIM_PMEM) ? "Persistent" : 715 (type == BIM_PRAM) ? "Persistent (Legacy)" : 716 "unknown"); 717 #endif 718 719 /* If the segment is not memory, skip it. */ 720 switch (type) { 721 case BIM_Memory: 722 case BIM_ACPI: 723 case BIM_NVS: 724 break; 725 default: 726 continue; 727 } 728 729 /* If the segment is smaller than a page, skip it. */ 730 if (size < PAGE_SIZE) 731 continue; 732 733 seg_start = addr; 734 seg_end = addr + size; 735 736 /* 737 * XXX XXX: Avoid the ISA I/O MEM. 738 * 739 * Some laptops (for example, Toshiba Satellite2550X) report 740 * this area as valid. 741 */ 742 if (seg_start < IOM_END && seg_end > IOM_BEGIN) { 743 printf("WARNING: memory map entry overlaps " 744 "with ``Compatibility Holes'': " 745 "0x%"PRIx64"/0x%"PRIx64"/0x%x\n", seg_start, 746 seg_end - seg_start, type); 747 748 if (x86_add_cluster(seg_start, IOM_BEGIN, type) == -1) 749 break; 750 if (x86_add_cluster(IOM_END, seg_end, type) == -1) 751 break; 752 } else { 753 if (x86_add_cluster(seg_start, seg_end, type) == -1) 754 break; 755 } 756 } 757 758 return 0; 759 } 760 761 static int 762 x86_fake_clusters(void) 763 { 764 extern struct extent *iomem_ex; 765 phys_ram_seg_t *cluster; 766 KASSERT(mem_cluster_cnt == 0); 767 768 /* 769 * Allocate the physical addresses used by RAM from the iomem extent 770 * map. This is done before the addresses are page rounded just to make 771 * sure we get them all. 772 */ 773 if (extent_alloc_region(iomem_ex, 0, KBTOB(biosbasemem), EX_NOWAIT)) { 774 /* XXX What should we do? */ 775 printf("WARNING: CAN'T ALLOCATE BASE MEMORY FROM " 776 "IOMEM EXTENT MAP!\n"); 777 } 778 779 cluster = &mem_clusters[0]; 780 cluster->start = 0; 781 cluster->size = trunc_page(KBTOB(biosbasemem)); 782 physmem += atop(cluster->size); 783 784 if (extent_alloc_region(iomem_ex, IOM_END, KBTOB(biosextmem), 785 EX_NOWAIT)) { 786 /* XXX What should we do? */ 787 printf("WARNING: CAN'T ALLOCATE EXTENDED MEMORY FROM " 788 "IOMEM EXTENT MAP!\n"); 789 } 790 791 #if NISADMA > 0 792 /* 793 * Some motherboards/BIOSes remap the 384K of RAM that would 794 * normally be covered by the ISA hole to the end of memory 795 * so that it can be used. However, on a 16M system, this 796 * would cause bounce buffers to be allocated and used. 797 * This is not desirable behaviour, as more than 384K of 798 * bounce buffers might be allocated. As a work-around, 799 * we round memory down to the nearest 1M boundary if 800 * we're using any isadma devices and the remapped memory 801 * is what puts us over 16M. 802 */ 803 if (biosextmem > (15*1024) && biosextmem < (16*1024)) { 804 char pbuf[9]; 805 806 format_bytes(pbuf, sizeof(pbuf), biosextmem - (15*1024)); 807 printf("Warning: ignoring %s of remapped memory\n", pbuf); 808 biosextmem = (15*1024); 809 } 810 #endif 811 812 cluster = &mem_clusters[1]; 813 cluster->start = IOM_END; 814 cluster->size = trunc_page(KBTOB(biosextmem)); 815 physmem += atop(cluster->size); 816 817 mem_cluster_cnt = 2; 818 819 avail_end = IOM_END + trunc_page(KBTOB(biosextmem)); 820 821 return 0; 822 } 823 824 /* 825 * x86_load_region: load the physical memory region from seg_start to seg_end 826 * into the VM system. 827 */ 828 static void 829 x86_load_region(uint64_t seg_start, uint64_t seg_end) 830 { 831 unsigned int i; 832 uint64_t tmp; 833 834 i = __arraycount(x86_freelists); 835 while (i--) { 836 if (x86_freelists[i].limit <= seg_start) 837 continue; 838 if (x86_freelists[i].freelist == VM_FREELIST_DEFAULT) 839 continue; 840 tmp = MIN(x86_freelists[i].limit, seg_end); 841 if (tmp == seg_start) 842 continue; 843 844 #ifdef DEBUG_MEMLOAD 845 printf("loading freelist %d 0x%"PRIx64"-0x%"PRIx64 846 " (0x%"PRIx64"-0x%"PRIx64")\n", x86_freelists[i].freelist, 847 seg_start, tmp, (uint64_t)atop(seg_start), 848 (uint64_t)atop(tmp)); 849 #endif 850 851 uvm_page_physload(atop(seg_start), atop(tmp), atop(seg_start), 852 atop(tmp), x86_freelists[i].freelist); 853 seg_start = tmp; 854 } 855 856 if (seg_start != seg_end) { 857 #ifdef DEBUG_MEMLOAD 858 printf("loading default 0x%"PRIx64"-0x%"PRIx64 859 " (0x%"PRIx64"-0x%"PRIx64")\n", seg_start, seg_end, 860 (uint64_t)atop(seg_start), (uint64_t)atop(seg_end)); 861 #endif 862 uvm_page_physload(atop(seg_start), atop(seg_end), 863 atop(seg_start), atop(seg_end), VM_FREELIST_DEFAULT); 864 } 865 } 866 867 #ifdef XEN 868 static void 869 x86_add_xen_clusters(void) 870 { 871 if (hvm_start_info->memmap_entries > 0) { 872 struct hvm_memmap_table_entry *map_entry; 873 map_entry = (void *)((uintptr_t)hvm_start_info->memmap_paddr + KERNBASE); 874 for (int i = 0; i < hvm_start_info->memmap_entries; i++) { 875 if (map_entry[i].size < PAGE_SIZE) 876 continue; 877 switch (map_entry[i].type) { 878 case XEN_HVM_MEMMAP_TYPE_RAM: 879 x86_add_cluster(map_entry[i].addr, 880 map_entry[i].addr + map_entry[i].size, 881 BIM_Memory); 882 break; 883 case XEN_HVM_MEMMAP_TYPE_ACPI: 884 x86_add_cluster(map_entry[i].addr, 885 map_entry[i].addr + map_entry[i].size, 886 BIM_ACPI); 887 break; 888 } 889 } 890 } else { 891 struct xen_memory_map memmap; 892 static struct _xen_mmap { 893 struct btinfo_memmap bim; 894 struct bi_memmap_entry map[128]; /* same as FreeBSD */ 895 } __packed xen_mmap; 896 int err; 897 898 memmap.nr_entries = 128; 899 set_xen_guest_handle(memmap.buffer, &xen_mmap.bim.entry[0]); 900 if ((err = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap)) 901 < 0) 902 panic("XENMEM_memory_map %d", err); 903 xen_mmap.bim.num = memmap.nr_entries; 904 x86_parse_clusters(&xen_mmap.bim); 905 } 906 } 907 #endif /* XEN */ 908 /* 909 * init_x86_clusters: retrieve the memory clusters provided by the BIOS, and 910 * initialize mem_clusters. 911 */ 912 void 913 init_x86_clusters(void) 914 { 915 struct btinfo_memmap *bim; 916 struct btinfo_efimemmap *biem; 917 918 /* 919 * Check to see if we have a memory map from the BIOS (passed to us by 920 * the boot program). 921 */ 922 #ifdef XEN 923 if (pvh_boot) { 924 x86_add_xen_clusters(); 925 } 926 #endif /* XEN */ 927 928 #ifdef i386 929 extern int biosmem_implicit; 930 biem = lookup_bootinfo(BTINFO_EFIMEMMAP); 931 if (biem != NULL) 932 bim = efi_get_e820memmap(); 933 else 934 bim = lookup_bootinfo(BTINFO_MEMMAP); 935 if ((biosmem_implicit || (biosbasemem == 0 && biosextmem == 0)) && 936 bim != NULL && bim->num > 0) 937 x86_parse_clusters(bim); 938 #else 939 #if !defined(REALBASEMEM) && !defined(REALEXTMEM) 940 biem = lookup_bootinfo(BTINFO_EFIMEMMAP); 941 if (biem != NULL) 942 bim = efi_get_e820memmap(); 943 else 944 bim = lookup_bootinfo(BTINFO_MEMMAP); 945 if (bim != NULL && bim->num > 0) 946 x86_parse_clusters(bim); 947 #else 948 (void)bim, (void)biem; 949 #endif 950 #endif 951 952 if (mem_cluster_cnt == 0) { 953 /* 954 * If x86_parse_clusters didn't find any valid segment, create 955 * fake clusters. 956 */ 957 x86_fake_clusters(); 958 } 959 } 960 961 /* 962 * init_x86_vm: initialize the VM system on x86. We basically internalize as 963 * many physical pages as we can, starting at lowmem_rsvd, but we don't 964 * internalize the kernel physical pages (from pa_kstart to pa_kend). 965 */ 966 int 967 init_x86_vm(paddr_t pa_kend) 968 { 969 extern struct bootspace bootspace; 970 paddr_t pa_kstart = bootspace.head.pa; 971 uint64_t seg_start, seg_end; 972 uint64_t seg_start1, seg_end1; 973 int x; 974 unsigned i; 975 976 for (i = 0; i < __arraycount(x86_freelists); i++) { 977 if (avail_end < x86_freelists[i].limit) 978 x86_freelists[i].freelist = VM_FREELIST_DEFAULT; 979 } 980 981 /* 982 * Now, load the memory clusters (which have already been rounded and 983 * truncated) into the VM system. 984 * 985 * NOTE: we assume that memory starts at 0. 986 */ 987 for (x = 0; x < mem_cluster_cnt; x++) { 988 const phys_ram_seg_t *cluster = &mem_clusters[x]; 989 990 seg_start = cluster->start; 991 seg_end = cluster->start + cluster->size; 992 seg_start1 = 0; 993 seg_end1 = 0; 994 995 #ifdef DEBUG_MEMLOAD 996 printf("segment %" PRIx64 " - %" PRIx64 "\n", 997 seg_start, seg_end); 998 #endif 999 1000 /* Skip memory before our available starting point. */ 1001 if (seg_end <= lowmem_rsvd) { 1002 #ifdef DEBUG_MEMLOAD 1003 printf("discard segment below starting point " 1004 "%" PRIx64 " - %" PRIx64 "\n", seg_start, seg_end); 1005 #endif 1006 continue; 1007 } 1008 1009 if (seg_start <= lowmem_rsvd && lowmem_rsvd < seg_end) { 1010 seg_start = lowmem_rsvd; 1011 if (seg_start == seg_end) { 1012 #ifdef DEBUG_MEMLOAD 1013 printf("discard segment below starting point " 1014 "%" PRIx64 " - %" PRIx64 "\n", 1015 seg_start, seg_end); 1016 1017 1018 #endif 1019 continue; 1020 } 1021 } 1022 1023 /* 1024 * If this segment contains the kernel, split it in two, around 1025 * the kernel. 1026 * [seg_start seg_end] 1027 * [pa_kstart pa_kend] 1028 */ 1029 if (seg_start <= pa_kstart && pa_kend <= seg_end) { 1030 #ifdef DEBUG_MEMLOAD 1031 printf("split kernel overlapping to " 1032 "%" PRIx64 " - %" PRIxPADDR " and " 1033 "%" PRIxPADDR " - %" PRIx64 "\n", 1034 seg_start, pa_kstart, pa_kend, seg_end); 1035 #endif 1036 seg_start1 = pa_kend; 1037 seg_end1 = seg_end; 1038 seg_end = pa_kstart; 1039 KASSERT(seg_end < seg_end1); 1040 } 1041 1042 /* 1043 * Discard a segment inside the kernel 1044 * [pa_kstart pa_kend] 1045 * [seg_start seg_end] 1046 */ 1047 if (pa_kstart < seg_start && seg_end < pa_kend) { 1048 #ifdef DEBUG_MEMLOAD 1049 printf("discard complete kernel overlap " 1050 "%" PRIx64 " - %" PRIx64 "\n", seg_start, seg_end); 1051 #endif 1052 continue; 1053 } 1054 1055 /* 1056 * Discard leading hunk that overlaps the kernel 1057 * [pa_kstart pa_kend] 1058 * [seg_start seg_end] 1059 */ 1060 if (pa_kstart < seg_start && 1061 seg_start < pa_kend && 1062 pa_kend < seg_end) { 1063 #ifdef DEBUG_MEMLOAD 1064 printf("discard leading kernel overlap " 1065 "%" PRIx64 " - %" PRIxPADDR "\n", 1066 seg_start, pa_kend); 1067 #endif 1068 seg_start = pa_kend; 1069 } 1070 1071 /* 1072 * Discard trailing hunk that overlaps the kernel 1073 * [pa_kstart pa_kend] 1074 * [seg_start seg_end] 1075 */ 1076 if (seg_start < pa_kstart && 1077 pa_kstart < seg_end && 1078 seg_end < pa_kend) { 1079 #ifdef DEBUG_MEMLOAD 1080 printf("discard trailing kernel overlap " 1081 "%" PRIxPADDR " - %" PRIx64 "\n", 1082 pa_kstart, seg_end); 1083 #endif 1084 seg_end = pa_kstart; 1085 } 1086 1087 /* First hunk */ 1088 if (seg_start != seg_end) { 1089 x86_load_region(seg_start, seg_end); 1090 } 1091 1092 /* Second hunk */ 1093 if (seg_start1 != seg_end1) { 1094 x86_load_region(seg_start1, seg_end1); 1095 } 1096 } 1097 1098 return 0; 1099 } 1100 1101 #endif /* !XENPV */ 1102 1103 void 1104 init_x86_msgbuf(void) 1105 { 1106 /* Message buffer is located at end of core. */ 1107 psize_t sz = round_page(MSGBUFSIZE); 1108 psize_t reqsz = sz; 1109 uvm_physseg_t x; 1110 1111 search_again: 1112 for (x = uvm_physseg_get_first(); 1113 uvm_physseg_valid_p(x); 1114 x = uvm_physseg_get_next(x)) { 1115 1116 if (ctob(uvm_physseg_get_avail_end(x)) == avail_end) 1117 break; 1118 } 1119 1120 if (uvm_physseg_valid_p(x) == false) 1121 panic("init_x86_msgbuf: can't find end of memory"); 1122 1123 /* Shrink so it'll fit in the last segment. */ 1124 if (uvm_physseg_get_avail_end(x) - uvm_physseg_get_avail_start(x) < atop(sz)) 1125 sz = ctob(uvm_physseg_get_avail_end(x) - uvm_physseg_get_avail_start(x)); 1126 1127 msgbuf_p_seg[msgbuf_p_cnt].sz = sz; 1128 msgbuf_p_seg[msgbuf_p_cnt++].paddr = ctob(uvm_physseg_get_avail_end(x)) - sz; 1129 uvm_physseg_unplug(uvm_physseg_get_end(x) - atop(sz), atop(sz)); 1130 1131 /* Now find where the new avail_end is. */ 1132 avail_end = ctob(uvm_physseg_get_highest_frame()); 1133 1134 if (sz == reqsz) 1135 return; 1136 1137 reqsz -= sz; 1138 if (msgbuf_p_cnt == VM_PHYSSEG_MAX) { 1139 /* No more segments available, bail out. */ 1140 printf("WARNING: MSGBUFSIZE (%zu) too large, using %zu.\n", 1141 (size_t)MSGBUFSIZE, (size_t)(MSGBUFSIZE - reqsz)); 1142 return; 1143 } 1144 1145 sz = reqsz; 1146 goto search_again; 1147 } 1148 1149 void 1150 x86_reset(void) 1151 { 1152 uint8_t b; 1153 1154 #if NACPICA > 0 1155 /* 1156 * If ACPI is active, try to reset using the reset register 1157 * defined in the FADT. 1158 */ 1159 if (acpi_active) { 1160 if (acpi_reset() == 0) { 1161 delay(500000); /* wait 0.5 sec to see if that did it */ 1162 } 1163 } 1164 #endif 1165 1166 /* 1167 * The keyboard controller has 4 random output pins, one of which is 1168 * connected to the RESET pin on the CPU in many PCs. We tell the 1169 * keyboard controller to pulse this line a couple of times. 1170 */ 1171 outb(IO_KBD + KBCMDP, KBC_PULSE0); 1172 delay(100000); 1173 outb(IO_KBD + KBCMDP, KBC_PULSE0); 1174 delay(100000); 1175 1176 /* 1177 * Attempt to force a reset via the Reset Control register at 1178 * I/O port 0xcf9. Bit 2 forces a system reset when it 1179 * transitions from 0 to 1. Bit 1 selects the type of reset 1180 * to attempt: 0 selects a "soft" reset, and 1 selects a 1181 * "hard" reset. We try a "hard" reset. The first write sets 1182 * bit 1 to select a "hard" reset and clears bit 2. The 1183 * second write forces a 0 -> 1 transition in bit 2 to trigger 1184 * a reset. 1185 */ 1186 outb(0xcf9, 0x2); 1187 outb(0xcf9, 0x6); 1188 DELAY(500000); /* wait 0.5 sec to see if that did it */ 1189 1190 /* 1191 * Attempt to force a reset via the Fast A20 and Init register 1192 * at I/O port 0x92. Bit 1 serves as an alternate A20 gate. 1193 * Bit 0 asserts INIT# when set to 1. We are careful to only 1194 * preserve bit 1 while setting bit 0. We also must clear bit 1195 * 0 before setting it if it isn't already clear. 1196 */ 1197 b = inb(0x92); 1198 if (b != 0xff) { 1199 if ((b & 0x1) != 0) 1200 outb(0x92, b & 0xfe); 1201 outb(0x92, b | 0x1); 1202 DELAY(500000); /* wait 0.5 sec to see if that did it */ 1203 } 1204 } 1205 1206 static int 1207 x86_listener_cb(kauth_cred_t cred, kauth_action_t action, void *cookie, 1208 void *arg0, void *arg1, void *arg2, void *arg3) 1209 { 1210 int result; 1211 1212 result = KAUTH_RESULT_DEFER; 1213 1214 switch (action) { 1215 case KAUTH_MACHDEP_IOPERM_GET: 1216 result = KAUTH_RESULT_ALLOW; 1217 break; 1218 1219 case KAUTH_MACHDEP_LDT_GET: 1220 case KAUTH_MACHDEP_LDT_SET: 1221 if (x86_user_ldt_enabled) { 1222 result = KAUTH_RESULT_ALLOW; 1223 } 1224 break; 1225 1226 default: 1227 break; 1228 } 1229 1230 return result; 1231 } 1232 1233 void 1234 machdep_init(void) 1235 { 1236 1237 x86_listener = kauth_listen_scope(KAUTH_SCOPE_MACHDEP, 1238 x86_listener_cb, NULL); 1239 } 1240 1241 /* 1242 * x86_startup: x86 common startup routine 1243 * 1244 * called by cpu_startup. 1245 */ 1246 1247 void 1248 x86_startup(void) 1249 { 1250 #if !defined(XENPV) 1251 nmi_init(); 1252 #endif 1253 } 1254 1255 const char * 1256 get_booted_kernel(void) 1257 { 1258 const struct btinfo_bootpath *bibp = lookup_bootinfo(BTINFO_BOOTPATH); 1259 return bibp ? bibp->bootpath : NULL; 1260 } 1261 1262 /* 1263 * machine dependent system variables. 1264 */ 1265 static int 1266 sysctl_machdep_booted_kernel(SYSCTLFN_ARGS) 1267 { 1268 struct btinfo_bootpath *bibp; 1269 struct sysctlnode node; 1270 1271 bibp = lookup_bootinfo(BTINFO_BOOTPATH); 1272 if (!bibp) 1273 return ENOENT; /* ??? */ 1274 1275 node = *rnode; 1276 node.sysctl_data = bibp->bootpath; 1277 node.sysctl_size = sizeof(bibp->bootpath); 1278 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1279 } 1280 1281 static int 1282 sysctl_machdep_bootmethod(SYSCTLFN_ARGS) 1283 { 1284 struct sysctlnode node; 1285 char buf[5]; 1286 1287 node = *rnode; 1288 node.sysctl_data = buf; 1289 if (bootmethod_efi) 1290 memcpy(node.sysctl_data, "UEFI", 5); 1291 else 1292 memcpy(node.sysctl_data, "BIOS", 5); 1293 1294 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1295 } 1296 1297 1298 static int 1299 sysctl_machdep_diskinfo(SYSCTLFN_ARGS) 1300 { 1301 struct sysctlnode node; 1302 extern struct bi_devmatch *x86_alldisks; 1303 extern int x86_ndisks; 1304 1305 if (x86_alldisks == NULL) 1306 return EOPNOTSUPP; 1307 1308 node = *rnode; 1309 node.sysctl_data = x86_alldisks; 1310 node.sysctl_size = sizeof(struct disklist) + 1311 (x86_ndisks - 1) * sizeof(struct nativedisk_info); 1312 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1313 } 1314 1315 #ifndef XENPV 1316 static int 1317 sysctl_machdep_tsc_enable(SYSCTLFN_ARGS) 1318 { 1319 struct sysctlnode node; 1320 int error, val; 1321 1322 val = *(int *)rnode->sysctl_data; 1323 1324 node = *rnode; 1325 node.sysctl_data = &val; 1326 1327 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1328 if (error != 0 || newp == NULL) 1329 return error; 1330 1331 if (val == 1) { 1332 tsc_user_enable(); 1333 } else if (val == 0) { 1334 tsc_user_disable(); 1335 } else { 1336 error = EINVAL; 1337 } 1338 if (error) 1339 return error; 1340 1341 *(int *)rnode->sysctl_data = val; 1342 1343 return 0; 1344 } 1345 #endif 1346 1347 static const char * const vm_guest_name[VM_LAST] = { 1348 [VM_GUEST_NO] = "none", 1349 [VM_GUEST_VM] = "generic", 1350 [VM_GUEST_XENPV] = "XenPV", 1351 [VM_GUEST_XENPVH] = "XenPVH", 1352 [VM_GUEST_XENHVM] = "XenHVM", 1353 [VM_GUEST_XENPVHVM] = "XenPVHVM", 1354 [VM_GUEST_GENPVH] = "GenPVH", 1355 [VM_GUEST_HV] = "Hyper-V", 1356 [VM_GUEST_VMWARE] = "VMware", 1357 [VM_GUEST_KVM] = "KVM", 1358 [VM_GUEST_VIRTUALBOX] = "VirtualBox", 1359 [VM_GUEST_NVMM] = "NVMM", 1360 }; 1361 1362 static int 1363 sysctl_machdep_hypervisor(SYSCTLFN_ARGS) 1364 { 1365 struct sysctlnode node; 1366 const char *t = NULL; 1367 char buf[64]; 1368 1369 node = *rnode; 1370 node.sysctl_data = buf; 1371 if (vm_guest >= VM_GUEST_NO && vm_guest < VM_LAST) 1372 t = vm_guest_name[vm_guest]; 1373 if (t == NULL) 1374 t = "unknown"; 1375 strlcpy(buf, t, sizeof(buf)); 1376 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1377 } 1378 1379 static void 1380 const_sysctl(struct sysctllog **clog, const char *name, int type, 1381 u_quad_t value, int tag) 1382 { 1383 (sysctl_createv)(clog, 0, NULL, NULL, 1384 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 1385 type, name, NULL, NULL, value, NULL, 0, 1386 CTL_MACHDEP, tag, CTL_EOL); 1387 } 1388 1389 SYSCTL_SETUP(sysctl_machdep_setup, "sysctl machdep subtree setup") 1390 { 1391 extern uint64_t tsc_freq; 1392 #ifndef XENPV 1393 extern int tsc_user_enabled; 1394 #endif 1395 extern int sparse_dump; 1396 1397 sysctl_createv(clog, 0, NULL, NULL, 1398 CTLFLAG_PERMANENT, 1399 CTLTYPE_NODE, "machdep", NULL, 1400 NULL, 0, NULL, 0, 1401 CTL_MACHDEP, CTL_EOL); 1402 1403 sysctl_createv(clog, 0, NULL, NULL, 1404 CTLFLAG_PERMANENT, 1405 CTLTYPE_STRUCT, "console_device", NULL, 1406 sysctl_consdev, 0, NULL, sizeof(dev_t), 1407 CTL_MACHDEP, CPU_CONSDEV, CTL_EOL); 1408 sysctl_createv(clog, 0, NULL, NULL, 1409 CTLFLAG_PERMANENT, 1410 CTLTYPE_STRING, "booted_kernel", NULL, 1411 sysctl_machdep_booted_kernel, 0, NULL, 0, 1412 CTL_MACHDEP, CPU_BOOTED_KERNEL, CTL_EOL); 1413 sysctl_createv(clog, 0, NULL, NULL, 1414 CTLFLAG_PERMANENT, 1415 CTLTYPE_STRING, "bootmethod", NULL, 1416 sysctl_machdep_bootmethod, 0, NULL, 0, 1417 CTL_MACHDEP, CTL_CREATE, CTL_EOL); 1418 sysctl_createv(clog, 0, NULL, NULL, 1419 CTLFLAG_PERMANENT, 1420 CTLTYPE_STRUCT, "diskinfo", NULL, 1421 sysctl_machdep_diskinfo, 0, NULL, 0, 1422 CTL_MACHDEP, CPU_DISKINFO, CTL_EOL); 1423 sysctl_createv(clog, 0, NULL, NULL, 1424 CTLFLAG_PERMANENT, 1425 CTLTYPE_STRING, "cpu_brand", NULL, 1426 NULL, 0, cpu_brand_string, 0, 1427 CTL_MACHDEP, CTL_CREATE, CTL_EOL); 1428 sysctl_createv(clog, 0, NULL, NULL, 1429 CTLFLAG_PERMANENT|CTLFLAG_READWRITE, 1430 CTLTYPE_INT, "sparse_dump", NULL, 1431 NULL, 0, &sparse_dump, 0, 1432 CTL_MACHDEP, CTL_CREATE, CTL_EOL); 1433 sysctl_createv(clog, 0, NULL, NULL, 1434 CTLFLAG_PERMANENT, 1435 CTLTYPE_QUAD, "tsc_freq", NULL, 1436 NULL, 0, &tsc_freq, 0, 1437 CTL_MACHDEP, CTL_CREATE, CTL_EOL); 1438 sysctl_createv(clog, 0, NULL, NULL, 1439 CTLFLAG_PERMANENT, 1440 CTLTYPE_INT, "pae", 1441 SYSCTL_DESCR("Whether the kernel uses PAE"), 1442 NULL, 0, &use_pae, 0, 1443 CTL_MACHDEP, CTL_CREATE, CTL_EOL); 1444 #ifndef XENPV 1445 sysctl_createv(clog, 0, NULL, NULL, 1446 CTLFLAG_READWRITE, 1447 CTLTYPE_INT, "tsc_user_enable", 1448 SYSCTL_DESCR("RDTSC instruction enabled in usermode"), 1449 sysctl_machdep_tsc_enable, 0, &tsc_user_enabled, 0, 1450 CTL_MACHDEP, CTL_CREATE, CTL_EOL); 1451 #endif 1452 sysctl_createv(clog, 0, NULL, NULL, 1453 CTLFLAG_PERMANENT, 1454 CTLTYPE_STRING, "hypervisor", NULL, 1455 sysctl_machdep_hypervisor, 0, NULL, 0, 1456 CTL_MACHDEP, CTL_CREATE, CTL_EOL); 1457 #ifdef SVS 1458 const struct sysctlnode *svs_rnode = NULL; 1459 sysctl_createv(clog, 0, NULL, &svs_rnode, 1460 CTLFLAG_PERMANENT, 1461 CTLTYPE_NODE, "svs", NULL, 1462 NULL, 0, NULL, 0, 1463 CTL_MACHDEP, CTL_CREATE); 1464 sysctl_createv(clog, 0, &svs_rnode, NULL, 1465 CTLFLAG_PERMANENT, 1466 CTLTYPE_BOOL, "enabled", 1467 SYSCTL_DESCR("Whether the kernel uses SVS"), 1468 NULL, 0, &svs_enabled, 0, 1469 CTL_CREATE, CTL_EOL); 1470 sysctl_createv(clog, 0, &svs_rnode, NULL, 1471 CTLFLAG_PERMANENT, 1472 CTLTYPE_BOOL, "pcid", 1473 SYSCTL_DESCR("Whether SVS uses PCID"), 1474 NULL, 0, &svs_pcid, 0, 1475 CTL_CREATE, CTL_EOL); 1476 #endif 1477 1478 sysctl_createv(clog, 0, NULL, NULL, 1479 CTLFLAG_READWRITE, 1480 CTLTYPE_BOOL, "user_ldt", 1481 SYSCTL_DESCR("Whether USER_LDT is enabled"), 1482 NULL, 0, &x86_user_ldt_enabled, 0, 1483 CTL_MACHDEP, CTL_CREATE, CTL_EOL); 1484 1485 #ifndef XENPV 1486 void sysctl_speculation_init(struct sysctllog **); 1487 sysctl_speculation_init(clog); 1488 #endif 1489 1490 /* None of these can ever change once the system has booted */ 1491 const_sysctl(clog, "fpu_present", CTLTYPE_INT, i386_fpu_present, 1492 CPU_FPU_PRESENT); 1493 const_sysctl(clog, "osfxsr", CTLTYPE_INT, i386_use_fxsave, 1494 CPU_OSFXSR); 1495 const_sysctl(clog, "sse", CTLTYPE_INT, i386_has_sse, 1496 CPU_SSE); 1497 const_sysctl(clog, "sse2", CTLTYPE_INT, i386_has_sse2, 1498 CPU_SSE2); 1499 1500 const_sysctl(clog, "fpu_save", CTLTYPE_INT, x86_fpu_save, 1501 CPU_FPU_SAVE); 1502 const_sysctl(clog, "fpu_save_size", CTLTYPE_INT, x86_fpu_save_size, 1503 CPU_FPU_SAVE_SIZE); 1504 const_sysctl(clog, "xsave_features", CTLTYPE_QUAD, x86_xsave_features, 1505 CPU_XSAVE_FEATURES); 1506 1507 #ifndef XENPV 1508 const_sysctl(clog, "biosbasemem", CTLTYPE_INT, biosbasemem, 1509 CPU_BIOSBASEMEM); 1510 const_sysctl(clog, "biosextmem", CTLTYPE_INT, biosextmem, 1511 CPU_BIOSEXTMEM); 1512 #endif 1513 } 1514 1515 /* Here for want of a better place */ 1516 #if defined(DOM0OPS) || !defined(XENPV) 1517 struct pic * 1518 intr_findpic(int num) 1519 { 1520 #if NIOAPIC > 0 1521 struct ioapic_softc *pic; 1522 1523 pic = ioapic_find_bybase(num); 1524 if (pic != NULL) 1525 return &pic->sc_pic; 1526 #endif 1527 if (num < NUM_LEGACY_IRQS) 1528 return &i8259_pic; 1529 1530 return NULL; 1531 } 1532 #endif 1533 1534 void 1535 cpu_initclocks(void) 1536 { 1537 1538 /* 1539 * Re-calibrate TSC on boot CPU using most accurate time source, 1540 * thus making accurate TSC available for x86_initclock_func(). 1541 */ 1542 cpu_get_tsc_freq(curcpu()); 1543 1544 /* Now start the clocks on this CPU (the boot CPU). */ 1545 (*x86_initclock_func)(); 1546 } 1547 1548 int 1549 x86_cpu_is_lcall(const void *ip) 1550 { 1551 static const uint8_t lcall[] = { 0x9a, 0, 0, 0, 0 }; 1552 int error; 1553 const size_t sz = sizeof(lcall) + 2; 1554 uint8_t tmp[sizeof(lcall) + 2]; 1555 1556 if ((error = copyin(ip, tmp, sz)) != 0) 1557 return error; 1558 1559 if (memcmp(tmp, lcall, sizeof(lcall)) != 0 || tmp[sz - 1] != 0) 1560 return EINVAL; 1561 1562 switch (tmp[sz - 2]) { 1563 case (uint8_t)0x07: /* NetBSD */ 1564 case (uint8_t)0x87: /* BSD/OS */ 1565 return 0; 1566 default: 1567 return EINVAL; 1568 } 1569 } 1570