Home | History | Annotate | Line # | Download | only in i386
      1 /*	$NetBSD: machdep.c,v 1.781 2017/03/23 18:08:06 maxv Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008, 2009
      5  *     The NetBSD Foundation, Inc.
      6  * All rights reserved.
      7  *
      8  * This code is derived from software contributed to The NetBSD Foundation
      9  * by Charles M. Hannum, by Jason R. Thorpe of the Numerical Aerospace
     10  * Simulation Facility NASA Ames Research Center, by Julio M. Merino Vidal,
     11  * and by Andrew Doran.
     12  *
     13  * Redistribution and use in source and binary forms, with or without
     14  * modification, are permitted provided that the following conditions
     15  * are met:
     16  * 1. Redistributions of source code must retain the above copyright
     17  *    notice, this list of conditions and the following disclaimer.
     18  * 2. Redistributions in binary form must reproduce the above copyright
     19  *    notice, this list of conditions and the following disclaimer in the
     20  *    documentation and/or other materials provided with the distribution.
     21  *
     22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32  * POSSIBILITY OF SUCH DAMAGE.
     33  */
     34 
     35 /*-
     36  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
     37  * All rights reserved.
     38  *
     39  * This code is derived from software contributed to Berkeley by
     40  * William Jolitz.
     41  *
     42  * Redistribution and use in source and binary forms, with or without
     43  * modification, are permitted provided that the following conditions
     44  * are met:
     45  * 1. Redistributions of source code must retain the above copyright
     46  *    notice, this list of conditions and the following disclaimer.
     47  * 2. Redistributions in binary form must reproduce the above copyright
     48  *    notice, this list of conditions and the following disclaimer in the
     49  *    documentation and/or other materials provided with the distribution.
     50  * 3. Neither the name of the University nor the names of its contributors
     51  *    may be used to endorse or promote products derived from this software
     52  *    without specific prior written permission.
     53  *
     54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     64  * SUCH DAMAGE.
     65  *
     66  *	@(#)machdep.c	7.4 (Berkeley) 6/3/91
     67  */
     68 
     69 #include <sys/cdefs.h>
     70 __KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.781 2017/03/23 18:08:06 maxv Exp $");
     71 
     72 #include "opt_beep.h"
     73 #include "opt_compat_ibcs2.h"
     74 #include "opt_compat_freebsd.h"
     75 #include "opt_compat_netbsd.h"
     76 #include "opt_compat_svr4.h"
     77 #include "opt_cpureset_delay.h"
     78 #include "opt_ddb.h"
     79 #include "opt_ipkdb.h"
     80 #include "opt_kgdb.h"
     81 #include "opt_mtrr.h"
     82 #include "opt_modular.h"
     83 #include "opt_multiboot.h"
     84 #include "opt_multiprocessor.h"
     85 #include "opt_physmem.h"
     86 #include "opt_realmem.h"
     87 #include "opt_user_ldt.h"
     88 #include "opt_vm86.h"
     89 #include "opt_xen.h"
     90 #include "isa.h"
     91 #include "pci.h"
     92 
     93 #include <sys/param.h>
     94 #include <sys/systm.h>
     95 #include <sys/signal.h>
     96 #include <sys/signalvar.h>
     97 #include <sys/kernel.h>
     98 #include <sys/cpu.h>
     99 #include <sys/exec.h>
    100 #include <sys/fcntl.h>
    101 #include <sys/reboot.h>
    102 #include <sys/conf.h>
    103 #include <sys/kauth.h>
    104 #include <sys/mbuf.h>
    105 #include <sys/msgbuf.h>
    106 #include <sys/mount.h>
    107 #include <sys/syscallargs.h>
    108 #include <sys/core.h>
    109 #include <sys/kcore.h>
    110 #include <sys/ucontext.h>
    111 #include <sys/ras.h>
    112 #include <sys/ksyms.h>
    113 #include <sys/device.h>
    114 
    115 #ifdef IPKDB
    116 #include <ipkdb/ipkdb.h>
    117 #endif
    118 
    119 #ifdef KGDB
    120 #include <sys/kgdb.h>
    121 #endif
    122 
    123 #include <dev/cons.h>
    124 #include <dev/mm.h>
    125 
    126 #include <uvm/uvm.h>
    127 #include <uvm/uvm_page.h>
    128 
    129 #include <sys/sysctl.h>
    130 
    131 #include <machine/cpu.h>
    132 #include <machine/cpufunc.h>
    133 #include <machine/cpuvar.h>
    134 #include <machine/gdt.h>
    135 #include <machine/intr.h>
    136 #include <machine/kcore.h>
    137 #include <machine/pio.h>
    138 #include <machine/psl.h>
    139 #include <machine/reg.h>
    140 #include <machine/specialreg.h>
    141 #include <machine/bootinfo.h>
    142 #include <machine/mtrr.h>
    143 #include <x86/x86/tsc.h>
    144 
    145 #include <x86/fpu.h>
    146 #include <x86/dbregs.h>
    147 #include <x86/machdep.h>
    148 
    149 #include <machine/multiboot.h>
    150 #ifdef XEN
    151 #include <xen/evtchn.h>
    152 #include <xen/xen.h>
    153 #include <xen/hypervisor.h>
    154 
    155 /* #define	XENDEBUG */
    156 /* #define	XENDEBUG_LOW */
    157 
    158 #ifdef XENDEBUG
    159 #define	XENPRINTF(x) printf x
    160 #define	XENPRINTK(x) printk x
    161 #else
    162 #define	XENPRINTF(x)
    163 #define	XENPRINTK(x)
    164 #endif
    165 #define	PRINTK(x) printf x
    166 #endif /* XEN */
    167 
    168 #include <dev/isa/isareg.h>
    169 #include <machine/isa_machdep.h>
    170 #include <dev/ic/i8042reg.h>
    171 
    172 #ifdef DDB
    173 #include <machine/db_machdep.h>
    174 #include <ddb/db_extern.h>
    175 #endif
    176 
    177 #ifdef VM86
    178 #include <machine/vm86.h>
    179 #endif
    180 
    181 #include "acpica.h"
    182 #include "bioscall.h"
    183 
    184 #if NBIOSCALL > 0
    185 #include <machine/bioscall.h>
    186 #endif
    187 
    188 #if NACPICA > 0
    189 #include <dev/acpi/acpivar.h>
    190 #define ACPI_MACHDEP_PRIVATE
    191 #include <machine/acpi_machdep.h>
    192 #else
    193 #include <machine/i82489var.h>
    194 #endif
    195 
    196 #include "isa.h"
    197 #include "isadma.h"
    198 #include "ksyms.h"
    199 
    200 #include "cardbus.h"
    201 #if NCARDBUS > 0
    202 /* For rbus_min_start hint. */
    203 #include <sys/bus.h>
    204 #include <dev/cardbus/rbus.h>
    205 #include <machine/rbus_machdep.h>
    206 #endif
    207 
    208 #include "mca.h"
    209 #if NMCA > 0
    210 #include <machine/mca_machdep.h>	/* for mca_busprobe() */
    211 #endif
    212 
    213 #ifdef MULTIPROCESSOR		/* XXX */
    214 #include <machine/mpbiosvar.h>	/* XXX */
    215 #endif				/* XXX */
    216 
    217 /* the following is used externally (sysctl_hw) */
    218 char machine[] = "i386";		/* CPU "architecture" */
    219 char machine_arch[] = "i386";		/* machine == machine_arch */
    220 
    221 #ifdef CPURESET_DELAY
    222 int cpureset_delay = CPURESET_DELAY;
    223 #else
    224 int cpureset_delay = 2000; /* default to 2s */
    225 #endif
    226 
    227 #ifdef MTRR
    228 struct mtrr_funcs *mtrr_funcs;
    229 #endif
    230 
    231 int cpu_class;
    232 int use_pae;
    233 int i386_fpu_present = 1;
    234 int i386_fpu_fdivbug;
    235 
    236 int i386_use_fxsave;
    237 int i386_has_sse;
    238 int i386_has_sse2;
    239 
    240 struct pool x86_dbregspl;
    241 
    242 vaddr_t idt_vaddr;
    243 paddr_t idt_paddr;
    244 vaddr_t gdt_vaddr;
    245 paddr_t gdt_paddr;
    246 vaddr_t ldt_vaddr;
    247 paddr_t ldt_paddr;
    248 
    249 vaddr_t pentium_idt_vaddr;
    250 
    251 struct vm_map *phys_map = NULL;
    252 
    253 extern paddr_t lowmem_rsvd;
    254 extern paddr_t avail_start, avail_end;
    255 #ifdef XEN
    256 extern paddr_t pmap_pa_start, pmap_pa_end;
    257 void hypervisor_callback(void);
    258 void failsafe_callback(void);
    259 #endif
    260 
    261 #ifdef XEN
    262 void (*delay_func)(unsigned int) = xen_delay;
    263 void (*initclock_func)(void) = xen_initclocks;
    264 #else
    265 void (*delay_func)(unsigned int) = i8254_delay;
    266 void (*initclock_func)(void) = i8254_initclocks;
    267 #endif
    268 
    269 
    270 /*
    271  * Size of memory segments, before any memory is stolen.
    272  */
    273 phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX];
    274 int mem_cluster_cnt = 0;
    275 
    276 void init386(paddr_t);
    277 void initgdt(union descriptor *);
    278 
    279 extern int time_adjusted;
    280 
    281 int *esym;
    282 int *eblob;
    283 extern int boothowto;
    284 
    285 #ifndef XEN
    286 
    287 /* Base memory reported by BIOS. */
    288 #ifndef REALBASEMEM
    289 int biosbasemem = 0;
    290 #else
    291 int biosbasemem = REALBASEMEM;
    292 #endif
    293 
    294 /* Extended memory reported by BIOS. */
    295 #ifndef REALEXTMEM
    296 int biosextmem = 0;
    297 #else
    298 int biosextmem = REALEXTMEM;
    299 #endif
    300 
    301 /* Set if any boot-loader set biosbasemem/biosextmem. */
    302 int biosmem_implicit;
    303 
    304 /*
    305  * Representation of the bootinfo structure constructed by a NetBSD native
    306  * boot loader.  Only be used by native_loader().
    307  */
    308 struct bootinfo_source {
    309 	uint32_t bs_naddrs;
    310 	void *bs_addrs[1]; /* Actually longer. */
    311 };
    312 
    313 /* Only called by locore.S; no need to be in a header file. */
    314 void native_loader(int, int, struct bootinfo_source *, paddr_t, int, int);
    315 
    316 /*
    317  * Called as one of the very first things during system startup (just after
    318  * the boot loader gave control to the kernel image), this routine is in
    319  * charge of retrieving the parameters passed in by the boot loader and
    320  * storing them in the appropriate kernel variables.
    321  *
    322  * WARNING: Because the kernel has not yet relocated itself to KERNBASE,
    323  * special care has to be taken when accessing memory because absolute
    324  * addresses (referring to kernel symbols) do not work.  So:
    325  *
    326  *     1) Avoid jumps to absolute addresses (such as gotos and switches).
    327  *     2) To access global variables use their physical address, which
    328  *        can be obtained using the RELOC macro.
    329  */
    330 void
    331 native_loader(int bl_boothowto, int bl_bootdev,
    332     struct bootinfo_source *bl_bootinfo, paddr_t bl_esym,
    333     int bl_biosextmem, int bl_biosbasemem)
    334 {
    335 #define RELOC(type, x) ((type)((vaddr_t)(x) - KERNBASE))
    336 
    337 	*RELOC(int *, &boothowto) = bl_boothowto;
    338 
    339 #ifdef COMPAT_OLDBOOT
    340 	/*
    341 	 * Pre-1.3 boot loaders gave the boot device as a parameter
    342 	 * (instead of a bootinfo entry).
    343 	 */
    344 	*RELOC(int *, &bootdev) = bl_bootdev;
    345 #endif
    346 
    347 	/*
    348 	 * The boot loader provides a physical, non-relocated address
    349 	 * for the symbols table's end.  We need to convert it to a
    350 	 * virtual address.
    351 	 */
    352 	if (bl_esym != 0)
    353 		*RELOC(int **, &esym) = (int *)((vaddr_t)bl_esym + KERNBASE);
    354 	else
    355 		*RELOC(int **, &esym) = 0;
    356 
    357 	/*
    358 	 * Copy bootinfo entries (if any) from the boot loader's
    359 	 * representation to the kernel's bootinfo space.
    360 	 */
    361 	if (bl_bootinfo != NULL) {
    362 		size_t i;
    363 		uint8_t *data;
    364 		struct bootinfo *bidest;
    365 		struct btinfo_modulelist *bi;
    366 
    367 		bidest = RELOC(struct bootinfo *, &bootinfo);
    368 
    369 		data = &bidest->bi_data[0];
    370 
    371 		for (i = 0; i < bl_bootinfo->bs_naddrs; i++) {
    372 			struct btinfo_common *bc;
    373 
    374 			bc = bl_bootinfo->bs_addrs[i];
    375 
    376 			if ((data + bc->len) >
    377 			    (&bidest->bi_data[0] + BOOTINFO_MAXSIZE))
    378 				break;
    379 
    380 			memcpy(data, bc, bc->len);
    381 			/*
    382 			 * If any modules were loaded, record where they
    383 			 * end.  We'll need to skip over them.
    384 			 */
    385 			bi = (struct btinfo_modulelist *)data;
    386 			if (bi->common.type == BTINFO_MODULELIST) {
    387 				*RELOC(int **, &eblob) =
    388 				    (int *)(bi->endpa + KERNBASE);
    389 			}
    390 			data += bc->len;
    391 		}
    392 		bidest->bi_nentries = i;
    393 	}
    394 
    395 	/*
    396 	 * Configure biosbasemem and biosextmem only if they were not
    397 	 * explicitly given during the kernel's build.
    398 	 */
    399 	if (*RELOC(int *, &biosbasemem) == 0) {
    400 		*RELOC(int *, &biosbasemem) = bl_biosbasemem;
    401 		*RELOC(int *, &biosmem_implicit) = 1;
    402 	}
    403 	if (*RELOC(int *, &biosextmem) == 0) {
    404 		*RELOC(int *, &biosextmem) = bl_biosextmem;
    405 		*RELOC(int *, &biosmem_implicit) = 1;
    406 	}
    407 #undef RELOC
    408 }
    409 
    410 #endif /* XEN */
    411 
    412 /*
    413  * Machine-dependent startup code
    414  */
    415 void
    416 cpu_startup(void)
    417 {
    418 	int x, y;
    419 	vaddr_t minaddr, maxaddr;
    420 	psize_t sz;
    421 
    422 	/*
    423 	 * For console drivers that require uvm and pmap to be initialized,
    424 	 * we'll give them one more chance here...
    425 	 */
    426 	consinit();
    427 
    428 	/*
    429 	 * Initialize error message buffer (et end of core).
    430 	 */
    431 	if (msgbuf_p_cnt == 0)
    432 		panic("msgbuf paddr map has not been set up");
    433 	for (x = 0, sz = 0; x < msgbuf_p_cnt; sz += msgbuf_p_seg[x++].sz)
    434 		continue;
    435 
    436 	msgbuf_vaddr = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_VAONLY);
    437 	if (msgbuf_vaddr == 0)
    438 		panic("failed to valloc msgbuf_vaddr");
    439 
    440 	for (y = 0, sz = 0; y < msgbuf_p_cnt; y++) {
    441 		for (x = 0; x < btoc(msgbuf_p_seg[y].sz); x++, sz += PAGE_SIZE)
    442 			pmap_kenter_pa((vaddr_t)msgbuf_vaddr + sz,
    443 			    msgbuf_p_seg[y].paddr + x * PAGE_SIZE,
    444 			    VM_PROT_READ|VM_PROT_WRITE, 0);
    445 	}
    446 
    447 	pmap_update(pmap_kernel());
    448 
    449 	initmsgbuf((void *)msgbuf_vaddr, sz);
    450 
    451 #ifdef MULTIBOOT
    452 	multiboot_print_info();
    453 #endif
    454 
    455 #ifdef TRAPLOG
    456 	/*
    457 	 * Enable recording of branch from/to in MSR's
    458 	 */
    459 	wrmsr(MSR_DEBUGCTLMSR, 0x1);
    460 #endif
    461 
    462 #if NCARDBUS > 0
    463 	/* Tell RBUS how much RAM we have, so it can use heuristics. */
    464 	rbus_min_start_hint(ctob((psize_t)physmem));
    465 #endif
    466 
    467 	minaddr = 0;
    468 
    469 	/*
    470 	 * Allocate a submap for physio
    471 	 */
    472 	phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
    473 	    VM_PHYS_SIZE, 0, false, NULL);
    474 
    475 	/* Say hello. */
    476 	banner();
    477 
    478 	/* Safe for i/o port / memory space allocation to use malloc now. */
    479 #if NISA > 0 || NPCI > 0
    480 	x86_bus_space_mallocok();
    481 #endif
    482 
    483 	gdt_init();
    484 	i386_proc0_tss_ldt_init();
    485 
    486 #ifndef XEN
    487 	cpu_init_tss(&cpu_info_primary);
    488 	ltr(cpu_info_primary.ci_tss_sel);
    489 #endif
    490 
    491 	x86_startup();
    492 }
    493 
    494 /*
    495  * Set up proc0's TSS and LDT.
    496  */
    497 void
    498 i386_proc0_tss_ldt_init(void)
    499 {
    500 	struct lwp *l;
    501 	struct pcb *pcb __diagused;
    502 
    503 	l = &lwp0;
    504 	pcb = lwp_getpcb(l);
    505 
    506 	pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
    507 	pcb->pcb_cr0 = rcr0() & ~CR0_TS;
    508 	pcb->pcb_esp0 = uvm_lwp_getuarea(l) + USPACE - 16;
    509 	pcb->pcb_iopl = SEL_KPL;
    510 	l->l_md.md_regs = (struct trapframe *)pcb->pcb_esp0 - 1;
    511 	memcpy(&pcb->pcb_fsd, &gdtstore[GUDATA_SEL], sizeof(pcb->pcb_fsd));
    512 	memcpy(&pcb->pcb_gsd, &gdtstore[GUDATA_SEL], sizeof(pcb->pcb_gsd));
    513 	pcb->pcb_dbregs = NULL;
    514 
    515 #ifndef XEN
    516 	lldt(pmap_kernel()->pm_ldt_sel);
    517 #else
    518 	HYPERVISOR_fpu_taskswitch(1);
    519 	XENPRINTF(("lwp tss sp %p ss %04x/%04x\n",
    520 	    (void *)pcb->pcb_esp0,
    521 	    GSEL(GDATA_SEL, SEL_KPL),
    522 	    IDXSEL(GSEL(GDATA_SEL, SEL_KPL))));
    523 	HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_esp0);
    524 #endif
    525 }
    526 
    527 #ifdef XEN
    528 /* used in assembly */
    529 void i386_switch_context(lwp_t *);
    530 void i386_tls_switch(lwp_t *);
    531 
    532 /*
    533  * Switch context:
    534  * - switch stack pointer for user->kernel transition
    535  */
    536 void
    537 i386_switch_context(lwp_t *l)
    538 {
    539 	struct pcb *pcb;
    540 	struct physdev_op physop;
    541 
    542 	pcb = lwp_getpcb(l);
    543 
    544 	HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_esp0);
    545 
    546 	physop.cmd = PHYSDEVOP_SET_IOPL;
    547 	physop.u.set_iopl.iopl = pcb->pcb_iopl;
    548 	HYPERVISOR_physdev_op(&physop);
    549 }
    550 
    551 void
    552 i386_tls_switch(lwp_t *l)
    553 {
    554 	struct cpu_info *ci = curcpu();
    555 	struct pcb *pcb = lwp_getpcb(l);
    556 	/*
    557          * Raise the IPL to IPL_HIGH.
    558 	 * FPU IPIs can alter the LWP's saved cr0.  Dropping the priority
    559 	 * is deferred until mi_switch(), when cpu_switchto() returns.
    560 	 */
    561 	(void)splhigh();
    562 
    563         /*
    564 	 * If our floating point registers are on a different CPU,
    565 	 * set CR0_TS so we'll trap rather than reuse bogus state.
    566 	 */
    567 
    568 	if (l != ci->ci_fpcurlwp) {
    569 		HYPERVISOR_fpu_taskswitch(1);
    570 	}
    571 
    572 	/* Update TLS segment pointers */
    573 	update_descriptor(&ci->ci_gdt[GUFS_SEL],
    574 			  (union descriptor *) &pcb->pcb_fsd);
    575 	update_descriptor(&ci->ci_gdt[GUGS_SEL],
    576 			  (union descriptor *) &pcb->pcb_gsd);
    577 
    578 }
    579 #endif /* XEN */
    580 
    581 #ifndef XEN
    582 /*
    583  * Set up TSS and I/O bitmap.
    584  */
    585 void
    586 cpu_init_tss(struct cpu_info *ci)
    587 {
    588 	struct i386tss *tss = &ci->ci_tss;
    589 
    590 	tss->tss_iobase = IOMAP_INVALOFF << 16;
    591 	tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
    592 	tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
    593 	tss->tss_cr3 = rcr3();
    594 	ci->ci_tss_sel = tss_alloc(tss);
    595 }
    596 #endif /* XEN */
    597 
    598 void *
    599 getframe(struct lwp *l, int sig, int *onstack)
    600 {
    601 	struct proc *p = l->l_proc;
    602 	struct trapframe *tf = l->l_md.md_regs;
    603 
    604 	/* Do we need to jump onto the signal stack? */
    605 	*onstack = (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0
    606 	    && (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    607 	if (*onstack)
    608 		return (char *)l->l_sigstk.ss_sp + l->l_sigstk.ss_size;
    609 #ifdef VM86
    610 	if (tf->tf_eflags & PSL_VM)
    611 		return (void *)(tf->tf_esp + (tf->tf_ss << 4));
    612 	else
    613 #endif
    614 		return (void *)tf->tf_esp;
    615 }
    616 
    617 /*
    618  * Build context to run handler in.  We invoke the handler
    619  * directly, only returning via the trampoline.  Note the
    620  * trampoline version numbers are coordinated with machine-
    621  * dependent code in libc.
    622  */
    623 void
    624 buildcontext(struct lwp *l, int sel, void *catcher, void *fp)
    625 {
    626 	struct trapframe *tf = l->l_md.md_regs;
    627 
    628 	tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
    629 	tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
    630 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    631 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    632 	tf->tf_eip = (int)catcher;
    633 	tf->tf_cs = GSEL(sel, SEL_UPL);
    634 	tf->tf_eflags &= ~PSL_CLEARSIG;
    635 	tf->tf_esp = (int)fp;
    636 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    637 
    638 	/* Ensure FP state is reset. */
    639 	fpu_save_area_reset(l);
    640 }
    641 
    642 void
    643 sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask)
    644 {
    645 	struct lwp *l = curlwp;
    646 	struct proc *p = l->l_proc;
    647 	struct pmap *pmap = vm_map_pmap(&p->p_vmspace->vm_map);
    648 	int sel = pmap->pm_hiexec > I386_MAX_EXE_ADDR ?
    649 	    GUCODEBIG_SEL : GUCODE_SEL;
    650 	struct sigacts *ps = p->p_sigacts;
    651 	int onstack, error;
    652 	int sig = ksi->ksi_signo;
    653 	struct sigframe_siginfo *fp = getframe(l, sig, &onstack), frame;
    654 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    655 	struct trapframe *tf = l->l_md.md_regs;
    656 
    657 	KASSERT(mutex_owned(p->p_lock));
    658 
    659 	fp--;
    660 
    661 	frame.sf_ra = (int)ps->sa_sigdesc[sig].sd_tramp;
    662 	frame.sf_signum = sig;
    663 	frame.sf_sip = &fp->sf_si;
    664 	frame.sf_ucp = &fp->sf_uc;
    665 	frame.sf_si._info = ksi->ksi_info;
    666 	frame.sf_uc.uc_flags = _UC_SIGMASK|_UC_VM;
    667 	frame.sf_uc.uc_sigmask = *mask;
    668 	frame.sf_uc.uc_link = l->l_ctxlink;
    669 	frame.sf_uc.uc_flags |= (l->l_sigstk.ss_flags & SS_ONSTACK)
    670 	    ? _UC_SETSTACK : _UC_CLRSTACK;
    671 	memset(&frame.sf_uc.uc_stack, 0, sizeof(frame.sf_uc.uc_stack));
    672 
    673 	if (tf->tf_eflags & PSL_VM)
    674 		(*p->p_emul->e_syscall_intern)(p);
    675 	sendsig_reset(l, sig);
    676 
    677 	mutex_exit(p->p_lock);
    678 	cpu_getmcontext(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags);
    679 	error = copyout(&frame, fp, sizeof(frame));
    680 	mutex_enter(p->p_lock);
    681 
    682 	if (error != 0) {
    683 		/*
    684 		 * Process has trashed its stack; give it an illegal
    685 		 * instruction to halt it in its tracks.
    686 		 */
    687 		sigexit(l, SIGILL);
    688 		/* NOTREACHED */
    689 	}
    690 
    691 	buildcontext(l, sel, catcher, fp);
    692 
    693 	/* Remember that we're now on the signal stack. */
    694 	if (onstack)
    695 		l->l_sigstk.ss_flags |= SS_ONSTACK;
    696 }
    697 
    698 static void
    699 maybe_dump(int howto)
    700 {
    701 	int s;
    702 
    703 	/* Disable interrupts. */
    704 	s = splhigh();
    705 
    706 	/* Do a dump if requested. */
    707 	if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP)
    708 		dumpsys();
    709 
    710 	splx(s);
    711 }
    712 
    713 void
    714 cpu_reboot(int howto, char *bootstr)
    715 {
    716 	static bool syncdone = false;
    717 	int s = IPL_NONE;
    718 
    719 	if (cold) {
    720 		howto |= RB_HALT;
    721 		goto haltsys;
    722 	}
    723 
    724 	boothowto = howto;
    725 
    726 	/* XXX used to dump after vfs_shutdown() and before
    727 	 * detaching devices / shutdown hooks / pmf_system_shutdown().
    728 	 */
    729 	maybe_dump(howto);
    730 
    731 	/*
    732 	 * If we've panic'd, don't make the situation potentially
    733 	 * worse by syncing or unmounting the file systems.
    734 	 */
    735 	if ((howto & RB_NOSYNC) == 0 && panicstr == NULL) {
    736 		if (!syncdone) {
    737 			syncdone = true;
    738 			/* XXX used to force unmount as well, here */
    739 			vfs_sync_all(curlwp);
    740 			/*
    741 			 * If we've been adjusting the clock, the todr
    742 			 * will be out of synch; adjust it now.
    743 			 *
    744 			 * XXX used to do this after unmounting all
    745 			 * filesystems with vfs_shutdown().
    746 			 */
    747 			if (time_adjusted != 0)
    748 				resettodr();
    749 		}
    750 
    751 		while (vfs_unmountall1(curlwp, false, false) ||
    752 		       config_detach_all(boothowto) ||
    753 		       vfs_unmount_forceone(curlwp))
    754 			;	/* do nothing */
    755 	} else
    756 		suspendsched();
    757 
    758 	pmf_system_shutdown(boothowto);
    759 
    760 	s = splhigh();
    761 
    762 	/* amd64 maybe_dump() */
    763 
    764 haltsys:
    765 	doshutdownhooks();
    766 
    767 	if ((howto & RB_POWERDOWN) == RB_POWERDOWN) {
    768 #if NACPICA > 0
    769 		if (s != IPL_NONE)
    770 			splx(s);
    771 
    772 		acpi_enter_sleep_state(ACPI_STATE_S5);
    773 #else
    774 		__USE(s);
    775 #endif
    776 #ifdef XEN
    777 		HYPERVISOR_shutdown();
    778 		for (;;);
    779 #endif
    780 	}
    781 
    782 #ifdef MULTIPROCESSOR
    783 	cpu_broadcast_halt();
    784 #endif /* MULTIPROCESSOR */
    785 
    786 	if (howto & RB_HALT) {
    787 #if NACPICA > 0
    788 		acpi_disable();
    789 #endif
    790 
    791 		printf("\n");
    792 		printf("The operating system has halted.\n");
    793 		printf("Please press any key to reboot.\n\n");
    794 
    795 #ifdef BEEP_ONHALT
    796 		{
    797 			int c;
    798 			for (c = BEEP_ONHALT_COUNT; c > 0; c--) {
    799 				sysbeep(BEEP_ONHALT_PITCH,
    800 					BEEP_ONHALT_PERIOD * hz / 1000);
    801 				delay(BEEP_ONHALT_PERIOD * 1000);
    802 				sysbeep(0, BEEP_ONHALT_PERIOD * hz / 1000);
    803 				delay(BEEP_ONHALT_PERIOD * 1000);
    804 			}
    805 		}
    806 #endif
    807 
    808 		cnpollc(1);	/* for proper keyboard command handling */
    809 		if (cngetc() == 0) {
    810 			/* no console attached, so just hlt */
    811 			printf("No keyboard - cannot reboot after all.\n");
    812 			for(;;) {
    813 				x86_hlt();
    814 			}
    815 		}
    816 		cnpollc(0);
    817 	}
    818 
    819 	printf("rebooting...\n");
    820 	if (cpureset_delay > 0)
    821 		delay(cpureset_delay * 1000);
    822 	cpu_reset();
    823 	for(;;) ;
    824 	/*NOTREACHED*/
    825 }
    826 
    827 /*
    828  * Clear registers on exec
    829  */
    830 void
    831 setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
    832 {
    833 	struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
    834 	struct pcb *pcb = lwp_getpcb(l);
    835 	struct trapframe *tf;
    836 
    837 #ifdef USER_LDT
    838 	pmap_ldt_cleanup(l);
    839 #endif
    840 
    841 	fpu_save_area_clear(l, pack->ep_osversion >= 699002600
    842 	    ? __INITIAL_NPXCW__ : __NetBSD_COMPAT_NPXCW__);
    843 
    844 	memcpy(&pcb->pcb_fsd, &gdtstore[GUDATA_SEL], sizeof(pcb->pcb_fsd));
    845 	memcpy(&pcb->pcb_gsd, &gdtstore[GUDATA_SEL], sizeof(pcb->pcb_gsd));
    846 	if (pcb->pcb_dbregs != NULL) {
    847 		pool_put(&x86_dbregspl, pcb->pcb_dbregs);
    848 		pcb->pcb_dbregs = NULL;
    849 	}
    850 
    851 	tf = l->l_md.md_regs;
    852 	tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
    853 	tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
    854 	tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL);
    855 	tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL);
    856 	tf->tf_edi = 0;
    857 	tf->tf_esi = 0;
    858 	tf->tf_ebp = 0;
    859 	tf->tf_ebx = l->l_proc->p_psstrp;
    860 	tf->tf_edx = 0;
    861 	tf->tf_ecx = 0;
    862 	tf->tf_eax = 0;
    863 	tf->tf_eip = pack->ep_entry;
    864 	tf->tf_cs = pmap->pm_hiexec > I386_MAX_EXE_ADDR ?
    865 	    LSEL(LUCODEBIG_SEL, SEL_UPL) : LSEL(LUCODE_SEL, SEL_UPL);
    866 	tf->tf_eflags = PSL_USERSET;
    867 	tf->tf_esp = stack;
    868 	tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL);
    869 }
    870 
    871 /*
    872  * Initialize segments and descriptor tables
    873  */
    874 
    875 union descriptor *gdtstore, *ldtstore;
    876 union descriptor *pentium_idt;
    877 extern vaddr_t lwp0uarea;
    878 
    879 void
    880 setgate(struct gate_descriptor *gd, void *func, int args, int type, int dpl,
    881     int sel)
    882 {
    883 
    884 	gd->gd_looffset = (int)func;
    885 	gd->gd_selector = sel;
    886 	gd->gd_stkcpy = args;
    887 	gd->gd_xx = 0;
    888 	gd->gd_type = type;
    889 	gd->gd_dpl = dpl;
    890 	gd->gd_p = 1;
    891 	gd->gd_hioffset = (int)func >> 16;
    892 }
    893 
    894 void
    895 unsetgate(struct gate_descriptor *gd)
    896 {
    897 	gd->gd_p = 0;
    898 	gd->gd_hioffset = 0;
    899 	gd->gd_looffset = 0;
    900 	gd->gd_selector = 0;
    901 	gd->gd_xx = 0;
    902 	gd->gd_stkcpy = 0;
    903 	gd->gd_type = 0;
    904 	gd->gd_dpl = 0;
    905 }
    906 
    907 
    908 void
    909 setregion(struct region_descriptor *rd, void *base, size_t limit)
    910 {
    911 
    912 	rd->rd_limit = (int)limit;
    913 	rd->rd_base = (int)base;
    914 }
    915 
    916 void
    917 setsegment(struct segment_descriptor *sd, const void *base, size_t limit,
    918     int type, int dpl, int def32, int gran)
    919 {
    920 
    921 	sd->sd_lolimit = (int)limit;
    922 	sd->sd_lobase = (int)base;
    923 	sd->sd_type = type;
    924 	sd->sd_dpl = dpl;
    925 	sd->sd_p = 1;
    926 	sd->sd_hilimit = (int)limit >> 16;
    927 	sd->sd_xx = 0;
    928 	sd->sd_def32 = def32;
    929 	sd->sd_gran = gran;
    930 	sd->sd_hibase = (int)base >> 24;
    931 }
    932 
    933 #define	IDTVEC(name)	__CONCAT(X, name)
    934 typedef void (vector)(void);
    935 extern vector IDTVEC(syscall);
    936 extern vector IDTVEC(osyscall);
    937 extern vector *IDTVEC(exceptions)[];
    938 extern vector IDTVEC(svr4_fasttrap);
    939 void (*svr4_fasttrap_vec)(void) = (void (*)(void))nullop;
    940 krwlock_t svr4_fasttrap_lock;
    941 #ifdef XEN
    942 #define MAX_XEN_IDT 128
    943 trap_info_t xen_idt[MAX_XEN_IDT];
    944 int xen_idt_idx;
    945 extern union descriptor tmpgdt[];
    946 #endif
    947 
    948 void
    949 cpu_init_idt(void)
    950 {
    951 #ifndef XEN
    952 	struct region_descriptor region;
    953 	setregion(&region, pentium_idt, NIDT * sizeof(idt[0]) - 1);
    954 	lidt(&region);
    955 #else /* XEN */
    956 	XENPRINTF(("HYPERVISOR_set_trap_table %p\n", xen_idt));
    957 	if (HYPERVISOR_set_trap_table(xen_idt))
    958 		panic("HYPERVISOR_set_trap_table %p failed\n", xen_idt);
    959 #endif /* !XEN */
    960 }
    961 
    962 void
    963 initgdt(union descriptor *tgdt)
    964 {
    965 	KASSERT(tgdt != NULL);
    966 
    967 	gdtstore = tgdt;
    968 #ifdef XEN
    969 	u_long	frames[16];
    970 #else
    971 	struct region_descriptor region;
    972 	memset(gdtstore, 0, NGDT * sizeof(*gdtstore));
    973 #endif
    974 
    975 	/* make gdt gates and memory segments */
    976 	setsegment(&gdtstore[GCODE_SEL].sd, 0, 0xfffff,
    977 	    SDT_MEMERA, SEL_KPL, 1, 1);
    978 	setsegment(&gdtstore[GDATA_SEL].sd, 0, 0xfffff,
    979 	    SDT_MEMRWA, SEL_KPL, 1, 1);
    980 	setsegment(&gdtstore[GUCODE_SEL].sd, 0, x86_btop(I386_MAX_EXE_ADDR) - 1,
    981 	    SDT_MEMERA, SEL_UPL, 1, 1);
    982 	setsegment(&gdtstore[GUCODEBIG_SEL].sd, 0, 0xfffff,
    983 	    SDT_MEMERA, SEL_UPL, 1, 1);
    984 	setsegment(&gdtstore[GUDATA_SEL].sd, 0, 0xfffff,
    985 	    SDT_MEMRWA, SEL_UPL, 1, 1);
    986 #if NBIOSCALL > 0
    987 	/* bios trampoline GDT entries */
    988 	setsegment(&gdtstore[GBIOSCODE_SEL].sd, 0, 0xfffff,
    989 	    SDT_MEMERA, SEL_KPL, 0, 0);
    990 	setsegment(&gdtstore[GBIOSDATA_SEL].sd, 0, 0xfffff,
    991 	    SDT_MEMRWA, SEL_KPL, 0, 0);
    992 #endif
    993 	setsegment(&gdtstore[GCPU_SEL].sd, &cpu_info_primary,
    994 	    sizeof(struct cpu_info) - 1, SDT_MEMRWA, SEL_KPL, 1, 0);
    995 
    996 #ifndef XEN
    997 	setregion(&region, gdtstore, NGDT * sizeof(gdtstore[0]) - 1);
    998 	lgdt(&region);
    999 #else /* !XEN */
   1000 	/*
   1001 	 * We jumpstart the bootstrap process a bit so we can update
   1002 	 * page permissions. This is done redundantly later from
   1003 	 * x86_xpmap.c:xen_locore() - harmless.
   1004 	 */
   1005 	xpmap_phys_to_machine_mapping =
   1006 	    (unsigned long *)xen_start_info.mfn_list;
   1007 
   1008 	frames[0] = xpmap_ptom((uint32_t)gdtstore - KERNBASE) >> PAGE_SHIFT;
   1009 	{	/*
   1010 		 * Enter the gdt page RO into the kernel map. We can't
   1011 		 * use pmap_kenter_pa() here, because %fs is not
   1012 		 * usable until the gdt is loaded, and %fs is used as
   1013 		 * the base pointer for curcpu() and curlwp(), both of
   1014 		 * which are in the callpath of pmap_kenter_pa().
   1015 		 * So we mash up our own - this is MD code anyway.
   1016 		 */
   1017 		extern pt_entry_t xpmap_pg_nx;
   1018 		pt_entry_t pte;
   1019 
   1020 		pte = pmap_pa2pte((vaddr_t)gdtstore - KERNBASE);
   1021 		pte |= PG_RO | xpmap_pg_nx | PG_V;
   1022 
   1023 		if (HYPERVISOR_update_va_mapping((vaddr_t)gdtstore, pte,
   1024 		    UVMF_INVLPG) < 0) {
   1025 			panic("gdt page RO update failed.\n");
   1026 		}
   1027 
   1028 	}
   1029 
   1030 	XENPRINTK(("loading gdt %lx, %d entries\n", frames[0] << PAGE_SHIFT,
   1031 	    NGDT));
   1032 	if (HYPERVISOR_set_gdt(frames, NGDT /* XXX is it right ? */))
   1033 		panic("HYPERVISOR_set_gdt failed!\n");
   1034 
   1035 	lgdt_finish();
   1036 #endif /* !XEN */
   1037 }
   1038 
   1039 #ifndef XEN
   1040 static void
   1041 init386_pte0(void)
   1042 {
   1043 	paddr_t paddr;
   1044 	vaddr_t vaddr;
   1045 
   1046 	paddr = 4 * PAGE_SIZE;
   1047 	vaddr = (vaddr_t)vtopte(0);
   1048 	pmap_kenter_pa(vaddr, paddr, VM_PROT_ALL, 0);
   1049 	pmap_update(pmap_kernel());
   1050 	/* make sure it is clean before using */
   1051 	memset((void *)vaddr, 0, PAGE_SIZE);
   1052 }
   1053 #endif /* !XEN */
   1054 
   1055 static void
   1056 init386_ksyms(void)
   1057 {
   1058 #if NKSYMS || defined(DDB) || defined(MODULAR)
   1059 	extern int end;
   1060 	struct btinfo_symtab *symtab;
   1061 
   1062 #ifdef DDB
   1063 	db_machine_init();
   1064 #endif
   1065 
   1066 #if defined(MULTIBOOT)
   1067 	if (multiboot_ksyms_addsyms_elf())
   1068 		return;
   1069 #endif
   1070 
   1071 	if ((symtab = lookup_bootinfo(BTINFO_SYMTAB)) == NULL) {
   1072 		ksyms_addsyms_elf(*(int *)&end, ((int *)&end) + 1, esym);
   1073 		return;
   1074 	}
   1075 
   1076 	symtab->ssym += KERNBASE;
   1077 	symtab->esym += KERNBASE;
   1078 	ksyms_addsyms_elf(symtab->nsym, (int *)symtab->ssym, (int *)symtab->esym);
   1079 #endif
   1080 }
   1081 
   1082 void
   1083 init386(paddr_t first_avail)
   1084 {
   1085 	extern void consinit(void);
   1086 	int x;
   1087 #ifndef XEN
   1088 	extern paddr_t local_apic_pa;
   1089 	union descriptor *tgdt;
   1090 	struct region_descriptor region;
   1091 #endif
   1092 #if NBIOSCALL > 0
   1093 	extern int biostramp_image_size;
   1094 	extern u_char biostramp_image[];
   1095 #endif
   1096 	struct pcb *pcb;
   1097 
   1098 	KASSERT(first_avail % PAGE_SIZE == 0);
   1099 
   1100 #ifdef XEN
   1101 	XENPRINTK(("HYPERVISOR_shared_info %p (%x)\n", HYPERVISOR_shared_info,
   1102 	    xen_start_info.shared_info));
   1103 	KASSERT(HYPERVISOR_shared_info != NULL);
   1104 	cpu_info_primary.ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[0];
   1105 #endif
   1106 
   1107 	uvm_lwp_setuarea(&lwp0, lwp0uarea);
   1108 
   1109 	cpu_probe(&cpu_info_primary);
   1110 	cpu_init_msrs(&cpu_info_primary, true);
   1111 
   1112 #ifdef PAE
   1113 	use_pae = 1;
   1114 #else
   1115 	use_pae = 0;
   1116 #endif
   1117 
   1118 	pcb = lwp_getpcb(&lwp0);
   1119 #ifdef XEN
   1120 	pcb->pcb_cr3 = PDPpaddr;
   1121 	__PRINTK(("pcb_cr3 0x%lx cr3 0x%lx\n",
   1122 	    PDPpaddr, xpmap_ptom(PDPpaddr)));
   1123 	XENPRINTK(("lwp0uarea %p first_avail %p\n",
   1124 	    lwp0uarea, (void *)(long)first_avail));
   1125 	XENPRINTK(("ptdpaddr %p atdevbase %p\n", (void *)PDPpaddr,
   1126 	    (void *)atdevbase));
   1127 #endif
   1128 
   1129 #if defined(PAE) && !defined(XEN)
   1130 	/*
   1131 	 * Save VA and PA of L3 PD of boot processor (for Xen, this is done
   1132 	 * in xen_locore())
   1133 	 */
   1134 	cpu_info_primary.ci_pae_l3_pdirpa = rcr3();
   1135 	cpu_info_primary.ci_pae_l3_pdir = (pd_entry_t *)(rcr3() + KERNBASE);
   1136 #endif /* PAE && !XEN */
   1137 
   1138 	uvm_md_init();
   1139 
   1140 	/*
   1141 	 * Start with 2 color bins -- this is just a guess to get us
   1142 	 * started.  We'll recolor when we determine the largest cache
   1143 	 * sizes on the system.
   1144 	 */
   1145 	uvmexp.ncolors = 2;
   1146 
   1147 	avail_start = first_avail;
   1148 
   1149 #ifndef XEN
   1150 	/*
   1151 	 * Low memory reservations:
   1152 	 * Page 0:	BIOS data
   1153 	 * Page 1:	BIOS callback
   1154 	 * Page 2:	MP bootstrap code (MP_TRAMPOLINE)
   1155 	 * Page 3:	ACPI wakeup code (ACPI_WAKEUP_ADDR)
   1156 	 * Page 4:	Temporary page table for 0MB-4MB
   1157 	 * Page 5:	Temporary page directory
   1158 	 */
   1159 	lowmem_rsvd = 6 * PAGE_SIZE;
   1160 #else /* !XEN */
   1161 	/* Parse Xen command line (replace bootinfo) */
   1162 	xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL);
   1163 
   1164 	/* Use the dummy page as a gdt */
   1165 	extern vaddr_t xen_dummy_page;
   1166 	gdtstore = (void *)xen_dummy_page;
   1167 
   1168 	/* Determine physical address space */
   1169 	avail_end = ctob((paddr_t)xen_start_info.nr_pages);
   1170 	pmap_pa_start = (KERNTEXTOFF - KERNBASE);
   1171 	pmap_pa_end = pmap_pa_start + ctob((paddr_t)xen_start_info.nr_pages);
   1172 	mem_clusters[0].start = avail_start;
   1173 	mem_clusters[0].size = avail_end - avail_start;
   1174 	mem_cluster_cnt++;
   1175 	physmem += xen_start_info.nr_pages;
   1176 	uvmexp.wired += atop(avail_start);
   1177 
   1178 	/*
   1179 	 * initgdt() has to be done before consinit(), so that %fs is properly
   1180 	 * initialised. initgdt() uses pmap_kenter_pa so it can't be called
   1181 	 * before the above variables are set.
   1182 	 */
   1183 	initgdt(gdtstore);
   1184 
   1185 	mutex_init(&pte_lock, MUTEX_DEFAULT, IPL_VM);
   1186 #endif /* XEN */
   1187 
   1188 #if NISA > 0 || NPCI > 0
   1189 	x86_bus_space_init();
   1190 #endif /* NISA > 0 || NPCI > 0 */
   1191 
   1192 	consinit();	/* XXX SHOULD NOT BE DONE HERE */
   1193 
   1194 #ifdef DEBUG_MEMLOAD
   1195 	printf("mem_cluster_count: %d\n", mem_cluster_cnt);
   1196 #endif
   1197 
   1198 	/*
   1199 	 * Call pmap initialization to make new kernel address space.
   1200 	 * We must do this before loading pages into the VM system.
   1201 	 */
   1202 	pmap_bootstrap((vaddr_t)atdevbase + IOM_SIZE);
   1203 
   1204 #ifndef XEN
   1205 	/* Initialize the memory clusters. */
   1206 	init_x86_clusters();
   1207 
   1208 	/* Internalize the physical pages into the VM system. */
   1209 	init_x86_vm(avail_start);
   1210 #else /* !XEN */
   1211 	XENPRINTK(("load the memory cluster 0x%" PRIx64 " (%" PRId64 ") - "
   1212 	    "0x%" PRIx64 " (%" PRId64 ")\n",
   1213 	    (uint64_t)avail_start, (uint64_t)atop(avail_start),
   1214 	    (uint64_t)avail_end, (uint64_t)atop(avail_end)));
   1215 	uvm_page_physload(atop(avail_start), atop(avail_end),
   1216 	    atop(avail_start), atop(avail_end),
   1217 	    VM_FREELIST_DEFAULT);
   1218 
   1219 	/* Reclaim the boot gdt page - see locore.s */
   1220 	{
   1221 		extern pt_entry_t xpmap_pg_nx;
   1222 		pt_entry_t pte;
   1223 
   1224 		pte = pmap_pa2pte((vaddr_t)tmpgdt - KERNBASE);
   1225 		pte |= PG_RW | xpmap_pg_nx | PG_V;
   1226 
   1227 		if (HYPERVISOR_update_va_mapping((vaddr_t)tmpgdt, pte, UVMF_INVLPG) < 0) {
   1228 			panic("tmpgdt page relaim RW update failed.\n");
   1229 		}
   1230 	}
   1231 
   1232 #endif /* !XEN */
   1233 
   1234 	init_x86_msgbuf();
   1235 
   1236 #if !defined(XEN) && NBIOSCALL > 0
   1237 	/*
   1238 	 * XXX Remove this
   1239 	 *
   1240 	 * Setup a temporary Page Table Entry to allow identity mappings of
   1241 	 * the real mode address. This is required by bioscall.
   1242 	 */
   1243 	init386_pte0();
   1244 
   1245 	KASSERT(biostramp_image_size <= PAGE_SIZE);
   1246 	pmap_kenter_pa((vaddr_t)BIOSTRAMP_BASE, (paddr_t)BIOSTRAMP_BASE,
   1247 	    VM_PROT_ALL, 0);
   1248 	pmap_update(pmap_kernel());
   1249 	memcpy((void *)BIOSTRAMP_BASE, biostramp_image, biostramp_image_size);
   1250 
   1251 	/* Needed early, for bioscall() */
   1252 	cpu_info_primary.ci_pmap = pmap_kernel();
   1253 #endif
   1254 
   1255 #ifndef XEN
   1256 	pmap_kenter_pa(local_apic_va, local_apic_pa,
   1257 	    VM_PROT_READ|VM_PROT_WRITE, 0);
   1258 	pmap_update(pmap_kernel());
   1259 	memset((void *)local_apic_va, 0, PAGE_SIZE);
   1260 #endif
   1261 
   1262 	pmap_kenter_pa(idt_vaddr, idt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
   1263 	pmap_kenter_pa(gdt_vaddr, gdt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
   1264 	pmap_kenter_pa(ldt_vaddr, ldt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
   1265 	pmap_update(pmap_kernel());
   1266 	memset((void *)idt_vaddr, 0, PAGE_SIZE);
   1267 	memset((void *)gdt_vaddr, 0, PAGE_SIZE);
   1268 	memset((void *)ldt_vaddr, 0, PAGE_SIZE);
   1269 
   1270 #ifndef XEN
   1271 	pmap_kenter_pa(pentium_idt_vaddr, idt_paddr, VM_PROT_READ, 0);
   1272 	pmap_update(pmap_kernel());
   1273 	pentium_idt = (union descriptor *)pentium_idt_vaddr;
   1274 
   1275 	tgdt = gdtstore;
   1276 	idt = (struct gate_descriptor *)idt_vaddr;
   1277 	gdtstore = (union descriptor *)gdt_vaddr;
   1278 	ldtstore = (union descriptor *)ldt_vaddr;
   1279 
   1280 	memcpy(gdtstore, tgdt, NGDT * sizeof(*gdtstore));
   1281 
   1282 	setsegment(&gdtstore[GLDT_SEL].sd, ldtstore,
   1283 	    NLDT * sizeof(ldtstore[0]) - 1, SDT_SYSLDT, SEL_KPL, 0, 0);
   1284 #else
   1285 	HYPERVISOR_set_callbacks(
   1286 	    GSEL(GCODE_SEL, SEL_KPL), (unsigned long)hypervisor_callback,
   1287 	    GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback);
   1288 
   1289 	ldtstore = (union descriptor *)idt_vaddr;
   1290 #endif /* XEN */
   1291 
   1292 	/* make ldt gates and memory segments */
   1293 	setgate(&ldtstore[LSYS5CALLS_SEL].gd, &IDTVEC(osyscall), 1,
   1294 	    SDT_SYS386CGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
   1295 
   1296 	ldtstore[LUCODE_SEL] = gdtstore[GUCODE_SEL];
   1297 	ldtstore[LUCODEBIG_SEL] = gdtstore[GUCODEBIG_SEL];
   1298 	ldtstore[LUDATA_SEL] = gdtstore[GUDATA_SEL];
   1299 	ldtstore[LSOL26CALLS_SEL] = ldtstore[LBSDICALLS_SEL] =
   1300 	    ldtstore[LSYS5CALLS_SEL];
   1301 
   1302 #ifndef XEN
   1303 	/* exceptions */
   1304 	for (x = 0; x < 32; x++) {
   1305 		idt_vec_reserve(x);
   1306 		setgate(&idt[x], IDTVEC(exceptions)[x], 0, SDT_SYS386IGT,
   1307 		    (x == 3 || x == 4) ? SEL_UPL : SEL_KPL,
   1308 		    GSEL(GCODE_SEL, SEL_KPL));
   1309 	}
   1310 
   1311 	/* new-style interrupt gate for syscalls */
   1312 	idt_vec_reserve(128);
   1313 	setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386IGT, SEL_UPL,
   1314 	    GSEL(GCODE_SEL, SEL_KPL));
   1315 	idt_vec_reserve(0xd2);
   1316 	setgate(&idt[0xd2], &IDTVEC(svr4_fasttrap), 0, SDT_SYS386IGT,
   1317 	    SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
   1318 
   1319 	setregion(&region, gdtstore, NGDT * sizeof(gdtstore[0]) - 1);
   1320 	lgdt(&region);
   1321 
   1322 	cpu_init_idt();
   1323 #else /* !XEN */
   1324 	memset(xen_idt, 0, sizeof(trap_info_t) * MAX_XEN_IDT);
   1325 	xen_idt_idx = 0;
   1326 	for (x = 0; x < 32; x++) {
   1327 		KASSERT(xen_idt_idx < MAX_XEN_IDT);
   1328 		xen_idt[xen_idt_idx].vector = x;
   1329 
   1330 		switch (x) {
   1331 		case 2:  /* NMI */
   1332 		case 18: /* MCA */
   1333 			TI_SET_IF(&(xen_idt[xen_idt_idx]), 2);
   1334 			break;
   1335 		case 3:
   1336 		case 4:
   1337 			xen_idt[xen_idt_idx].flags = SEL_UPL;
   1338 			break;
   1339 		default:
   1340 			xen_idt[xen_idt_idx].flags = SEL_XEN;
   1341 			break;
   1342 		}
   1343 
   1344 		xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL);
   1345 		xen_idt[xen_idt_idx].address =
   1346 			(uint32_t)IDTVEC(exceptions)[x];
   1347 		xen_idt_idx++;
   1348 	}
   1349 	KASSERT(xen_idt_idx < MAX_XEN_IDT);
   1350 	xen_idt[xen_idt_idx].vector = 128;
   1351 	xen_idt[xen_idt_idx].flags = SEL_UPL;
   1352 	xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL);
   1353 	xen_idt[xen_idt_idx].address = (uint32_t)&IDTVEC(syscall);
   1354 	xen_idt_idx++;
   1355 	KASSERT(xen_idt_idx < MAX_XEN_IDT);
   1356 	xen_idt[xen_idt_idx].vector = 0xd2;
   1357 	xen_idt[xen_idt_idx].flags = SEL_UPL;
   1358 	xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL);
   1359 	xen_idt[xen_idt_idx].address = (uint32_t)&IDTVEC(svr4_fasttrap);
   1360 	xen_idt_idx++;
   1361 	lldt(GSEL(GLDT_SEL, SEL_KPL));
   1362 	cpu_init_idt();
   1363 #endif /* XEN */
   1364 
   1365 	init386_ksyms();
   1366 
   1367 #if NMCA > 0
   1368 	/* check for MCA bus, needed to be done before ISA stuff - if
   1369 	 * MCA is detected, ISA needs to use level triggered interrupts
   1370 	 * by default */
   1371 	mca_busprobe();
   1372 #endif
   1373 
   1374 #ifdef XEN
   1375 	XENPRINTF(("events_default_setup\n"));
   1376 	events_default_setup();
   1377 #else
   1378 	intr_default_setup();
   1379 #endif
   1380 
   1381 	splraise(IPL_HIGH);
   1382 	x86_enable_intr();
   1383 
   1384 #ifdef DDB
   1385 	if (boothowto & RB_KDB)
   1386 		Debugger();
   1387 #endif
   1388 #ifdef IPKDB
   1389 	ipkdb_init();
   1390 	if (boothowto & RB_KDB)
   1391 		ipkdb_connect(0);
   1392 #endif
   1393 #ifdef KGDB
   1394 	kgdb_port_init();
   1395 	if (boothowto & RB_KDB) {
   1396 		kgdb_debug_init = 1;
   1397 		kgdb_connect(1);
   1398 	}
   1399 #endif
   1400 
   1401 	if (physmem < btoc(2 * 1024 * 1024)) {
   1402 		printf("warning: too little memory available; "
   1403 		       "have %lu bytes, want %lu bytes\n"
   1404 		       "running in degraded mode\n"
   1405 		       "press a key to confirm\n\n",
   1406 		       (unsigned long)ptoa(physmem), 2*1024*1024UL);
   1407 		cngetc();
   1408 	}
   1409 
   1410 	rw_init(&svr4_fasttrap_lock);
   1411 
   1412 	pcb->pcb_dbregs = NULL;
   1413 
   1414 	x86_dbregs_setup_initdbstate();
   1415 
   1416 	pool_init(&x86_dbregspl, sizeof(struct dbreg), 16, 0, 0, "dbregs",
   1417 	    NULL, IPL_NONE);
   1418 }
   1419 
   1420 #include <dev/ic/mc146818reg.h>		/* for NVRAM POST */
   1421 #include <i386/isa/nvram.h>		/* for NVRAM POST */
   1422 
   1423 void
   1424 cpu_reset(void)
   1425 {
   1426 #ifdef XEN
   1427 	HYPERVISOR_reboot();
   1428 	for (;;);
   1429 #else /* XEN */
   1430 	struct region_descriptor region;
   1431 
   1432 	x86_disable_intr();
   1433 
   1434 	/*
   1435 	 * Ensure the NVRAM reset byte contains something vaguely sane.
   1436 	 */
   1437 
   1438 	outb(IO_RTC, NVRAM_RESET);
   1439 	outb(IO_RTC+1, NVRAM_RESET_RST);
   1440 
   1441 	/*
   1442 	 * Reset AMD Geode SC1100.
   1443 	 *
   1444 	 * 1) Write PCI Configuration Address Register (0xcf8) to
   1445 	 *    select Function 0, Register 0x44: Bridge Configuration,
   1446 	 *    GPIO and LPC Configuration Register Space, Reset
   1447 	 *    Control Register.
   1448 	 *
   1449 	 * 2) Write 0xf to PCI Configuration Data Register (0xcfc)
   1450 	 *    to reset IDE controller, IDE bus, and PCI bus, and
   1451 	 *    to trigger a system-wide reset.
   1452 	 *
   1453 	 * See AMD Geode SC1100 Processor Data Book, Revision 2.0,
   1454 	 * sections 6.3.1, 6.3.2, and 6.4.1.
   1455 	 */
   1456 	if (cpu_info_primary.ci_signature == 0x540) {
   1457 		outl(0xcf8, 0x80009044);
   1458 		outl(0xcfc, 0xf);
   1459 	}
   1460 
   1461 	x86_reset();
   1462 
   1463 	/*
   1464 	 * Try to cause a triple fault and watchdog reset by making the IDT
   1465 	 * invalid and causing a fault.
   1466 	 */
   1467 	memset((void *)idt, 0, NIDT * sizeof(idt[0]));
   1468 	setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
   1469 	lidt(&region);
   1470 	breakpoint();
   1471 
   1472 #if 0
   1473 	/*
   1474 	 * Try to cause a triple fault and watchdog reset by unmapping the
   1475 	 * entire address space and doing a TLB flush.
   1476 	 */
   1477 	memset((void *)PTD, 0, PAGE_SIZE);
   1478 	tlbflush();
   1479 #endif
   1480 
   1481 	for (;;);
   1482 #endif /* XEN */
   1483 }
   1484 
   1485 void
   1486 cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags)
   1487 {
   1488 	const struct trapframe *tf = l->l_md.md_regs;
   1489 	__greg_t *gr = mcp->__gregs;
   1490 	__greg_t ras_eip;
   1491 
   1492 	/* Save register context. */
   1493 #ifdef VM86
   1494 	if (tf->tf_eflags & PSL_VM) {
   1495 		gr[_REG_GS]  = tf->tf_vm86_gs;
   1496 		gr[_REG_FS]  = tf->tf_vm86_fs;
   1497 		gr[_REG_ES]  = tf->tf_vm86_es;
   1498 		gr[_REG_DS]  = tf->tf_vm86_ds;
   1499 		gr[_REG_EFL] = get_vflags(l);
   1500 	} else
   1501 #endif
   1502 	{
   1503 		gr[_REG_GS]  = tf->tf_gs;
   1504 		gr[_REG_FS]  = tf->tf_fs;
   1505 		gr[_REG_ES]  = tf->tf_es;
   1506 		gr[_REG_DS]  = tf->tf_ds;
   1507 		gr[_REG_EFL] = tf->tf_eflags;
   1508 	}
   1509 	gr[_REG_EDI]    = tf->tf_edi;
   1510 	gr[_REG_ESI]    = tf->tf_esi;
   1511 	gr[_REG_EBP]    = tf->tf_ebp;
   1512 	gr[_REG_EBX]    = tf->tf_ebx;
   1513 	gr[_REG_EDX]    = tf->tf_edx;
   1514 	gr[_REG_ECX]    = tf->tf_ecx;
   1515 	gr[_REG_EAX]    = tf->tf_eax;
   1516 	gr[_REG_EIP]    = tf->tf_eip;
   1517 	gr[_REG_CS]     = tf->tf_cs;
   1518 	gr[_REG_ESP]    = tf->tf_esp;
   1519 	gr[_REG_UESP]   = tf->tf_esp;
   1520 	gr[_REG_SS]     = tf->tf_ss;
   1521 	gr[_REG_TRAPNO] = tf->tf_trapno;
   1522 	gr[_REG_ERR]    = tf->tf_err;
   1523 
   1524 	if ((ras_eip = (__greg_t)ras_lookup(l->l_proc,
   1525 	    (void *) gr[_REG_EIP])) != -1)
   1526 		gr[_REG_EIP] = ras_eip;
   1527 
   1528 	*flags |= _UC_CPU;
   1529 
   1530 	mcp->_mc_tlsbase = (uintptr_t)l->l_private;
   1531 	*flags |= _UC_TLSBASE;
   1532 
   1533 	/*
   1534 	 * Save floating point register context.
   1535 	 *
   1536 	 * If the cpu doesn't support fxsave we must still write to
   1537 	 * the entire 512 byte area - otherwise we leak kernel memory
   1538 	 * contents to userspace.
   1539 	 * It wouldn't matter if we were doing the copyout here.
   1540 	 * So we might as well convert to fxsave format.
   1541 	 */
   1542 	__CTASSERT(sizeof (struct fxsave) ==
   1543 	    sizeof mcp->__fpregs.__fp_reg_set.__fp_xmm_state);
   1544 	process_read_fpregs_xmm(l, (struct fxsave *)
   1545 	    &mcp->__fpregs.__fp_reg_set.__fp_xmm_state);
   1546 	memset(&mcp->__fpregs.__fp_pad, 0, sizeof mcp->__fpregs.__fp_pad);
   1547 	*flags |= _UC_FXSAVE | _UC_FPU;
   1548 }
   1549 
   1550 int
   1551 cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp)
   1552 {
   1553 	const __greg_t *gr = mcp->__gregs;
   1554 	struct trapframe *tf = l->l_md.md_regs;
   1555 
   1556 	/*
   1557 	 * Check for security violations.  If we're returning
   1558 	 * to protected mode, the CPU will validate the segment
   1559 	 * registers automatically and generate a trap on
   1560 	 * violations.  We handle the trap, rather than doing
   1561 	 * all of the checking here.
   1562 	 */
   1563 	if (((gr[_REG_EFL] ^ tf->tf_eflags) & PSL_USERSTATIC) ||
   1564 	    !USERMODE(gr[_REG_CS], gr[_REG_EFL]))
   1565 		return EINVAL;
   1566 
   1567 	return 0;
   1568 }
   1569 
   1570 int
   1571 cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags)
   1572 {
   1573 	struct trapframe *tf = l->l_md.md_regs;
   1574 	const __greg_t *gr = mcp->__gregs;
   1575 	struct proc *p = l->l_proc;
   1576 	int error;
   1577 
   1578 	/* Restore register context, if any. */
   1579 	if ((flags & _UC_CPU) != 0) {
   1580 #ifdef VM86
   1581 		if (gr[_REG_EFL] & PSL_VM) {
   1582 			tf->tf_vm86_gs = gr[_REG_GS];
   1583 			tf->tf_vm86_fs = gr[_REG_FS];
   1584 			tf->tf_vm86_es = gr[_REG_ES];
   1585 			tf->tf_vm86_ds = gr[_REG_DS];
   1586 			set_vflags(l, gr[_REG_EFL]);
   1587 			if (flags & _UC_VM) {
   1588 				void syscall_vm86(struct trapframe *);
   1589 				l->l_proc->p_md.md_syscall = syscall_vm86;
   1590 			}
   1591 		} else
   1592 #endif
   1593 		{
   1594 			error = cpu_mcontext_validate(l, mcp);
   1595 			if (error)
   1596 				return error;
   1597 
   1598 			tf->tf_gs = gr[_REG_GS];
   1599 			tf->tf_fs = gr[_REG_FS];
   1600 			tf->tf_es = gr[_REG_ES];
   1601 			tf->tf_ds = gr[_REG_DS];
   1602 			/* Only change the user-alterable part of eflags */
   1603 			tf->tf_eflags &= ~PSL_USER;
   1604 			tf->tf_eflags |= (gr[_REG_EFL] & PSL_USER);
   1605 		}
   1606 		tf->tf_edi    = gr[_REG_EDI];
   1607 		tf->tf_esi    = gr[_REG_ESI];
   1608 		tf->tf_ebp    = gr[_REG_EBP];
   1609 		tf->tf_ebx    = gr[_REG_EBX];
   1610 		tf->tf_edx    = gr[_REG_EDX];
   1611 		tf->tf_ecx    = gr[_REG_ECX];
   1612 		tf->tf_eax    = gr[_REG_EAX];
   1613 		tf->tf_eip    = gr[_REG_EIP];
   1614 		tf->tf_cs     = gr[_REG_CS];
   1615 		tf->tf_esp    = gr[_REG_UESP];
   1616 		tf->tf_ss     = gr[_REG_SS];
   1617 	}
   1618 
   1619 	if ((flags & _UC_TLSBASE) != 0)
   1620 		lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase);
   1621 
   1622 	/* Restore floating point register context, if given. */
   1623 	if ((flags & _UC_FPU) != 0) {
   1624 		__CTASSERT(sizeof (struct fxsave) ==
   1625 		    sizeof mcp->__fpregs.__fp_reg_set.__fp_xmm_state);
   1626 		__CTASSERT(sizeof (struct save87) ==
   1627 		    sizeof mcp->__fpregs.__fp_reg_set.__fpchip_state);
   1628 
   1629 		if (flags & _UC_FXSAVE) {
   1630 			process_write_fpregs_xmm(l, (const struct fxsave *)
   1631 				    &mcp->__fpregs.__fp_reg_set.__fp_xmm_state);
   1632 		} else {
   1633 			process_write_fpregs_s87(l, (const struct save87 *)
   1634 				    &mcp->__fpregs.__fp_reg_set.__fpchip_state);
   1635 		}
   1636 	}
   1637 
   1638 	mutex_enter(p->p_lock);
   1639 	if (flags & _UC_SETSTACK)
   1640 		l->l_sigstk.ss_flags |= SS_ONSTACK;
   1641 	if (flags & _UC_CLRSTACK)
   1642 		l->l_sigstk.ss_flags &= ~SS_ONSTACK;
   1643 	mutex_exit(p->p_lock);
   1644 	return (0);
   1645 }
   1646 
   1647 void
   1648 cpu_initclocks(void)
   1649 {
   1650 
   1651 	(*initclock_func)();
   1652 }
   1653 
   1654 #define	DEV_IO 14		/* iopl for compat_10 */
   1655 
   1656 int
   1657 mm_md_open(dev_t dev, int flag, int mode, struct lwp *l)
   1658 {
   1659 
   1660 	switch (minor(dev)) {
   1661 	case DEV_IO:
   1662 		/*
   1663 		 * This is done by i386_iopl(3) now.
   1664 		 *
   1665 		 * #if defined(COMPAT_10) || defined(COMPAT_FREEBSD)
   1666 		 */
   1667 		if (flag & FWRITE) {
   1668 			struct trapframe *fp;
   1669 			int error;
   1670 
   1671 			error = kauth_authorize_machdep(l->l_cred,
   1672 			    KAUTH_MACHDEP_IOPL, NULL, NULL, NULL, NULL);
   1673 			if (error)
   1674 				return (error);
   1675 			fp = curlwp->l_md.md_regs;
   1676 			fp->tf_eflags |= PSL_IOPL;
   1677 		}
   1678 		break;
   1679 	default:
   1680 		break;
   1681 	}
   1682 	return 0;
   1683 }
   1684 
   1685 #ifdef PAE
   1686 void
   1687 cpu_alloc_l3_page(struct cpu_info *ci)
   1688 {
   1689 	int ret;
   1690 	struct pglist pg;
   1691 	struct vm_page *vmap;
   1692 
   1693 	KASSERT(ci != NULL);
   1694 	/*
   1695 	 * Allocate a page for the per-CPU L3 PD. cr3 being 32 bits, PA musts
   1696 	 * resides below the 4GB boundary.
   1697 	 */
   1698 	ret = uvm_pglistalloc(PAGE_SIZE, 0, 0x100000000ULL, 32, 0, &pg, 1, 0);
   1699 	vmap = TAILQ_FIRST(&pg);
   1700 
   1701 	if (ret != 0 || vmap == NULL)
   1702 		panic("%s: failed to allocate L3 pglist for CPU %d (ret %d)\n",
   1703 			__func__, cpu_index(ci), ret);
   1704 
   1705 	ci->ci_pae_l3_pdirpa = vmap->phys_addr;
   1706 
   1707 	ci->ci_pae_l3_pdir = (paddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
   1708 		UVM_KMF_VAONLY | UVM_KMF_NOWAIT);
   1709 	if (ci->ci_pae_l3_pdir == NULL)
   1710 		panic("%s: failed to allocate L3 PD for CPU %d\n",
   1711 			__func__, cpu_index(ci));
   1712 
   1713 	pmap_kenter_pa((vaddr_t)ci->ci_pae_l3_pdir, ci->ci_pae_l3_pdirpa,
   1714 		VM_PROT_READ | VM_PROT_WRITE, 0);
   1715 
   1716 	pmap_update(pmap_kernel());
   1717 }
   1718 #endif /* PAE */
   1719