Home | History | Annotate | Line # | Download | only in i386
      1 /*	$NetBSD: machdep.c,v 1.795 2017/09/30 11:43:57 maxv Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008, 2009, 2017
      5  *     The NetBSD Foundation, Inc.
      6  * All rights reserved.
      7  *
      8  * This code is derived from software contributed to The NetBSD Foundation
      9  * by Charles M. Hannum, by Jason R. Thorpe of the Numerical Aerospace
     10  * Simulation Facility NASA Ames Research Center, by Julio M. Merino Vidal,
     11  * by Andrew Doran, and by Maxime Villard.
     12  *
     13  * Redistribution and use in source and binary forms, with or without
     14  * modification, are permitted provided that the following conditions
     15  * are met:
     16  * 1. Redistributions of source code must retain the above copyright
     17  *    notice, this list of conditions and the following disclaimer.
     18  * 2. Redistributions in binary form must reproduce the above copyright
     19  *    notice, this list of conditions and the following disclaimer in the
     20  *    documentation and/or other materials provided with the distribution.
     21  *
     22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     32  * POSSIBILITY OF SUCH DAMAGE.
     33  */
     34 
     35 /*
     36  * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
     37  * All rights reserved.
     38  *
     39  * This code is derived from software contributed to Berkeley by
     40  * William Jolitz.
     41  *
     42  * Redistribution and use in source and binary forms, with or without
     43  * modification, are permitted provided that the following conditions
     44  * are met:
     45  * 1. Redistributions of source code must retain the above copyright
     46  *    notice, this list of conditions and the following disclaimer.
     47  * 2. Redistributions in binary form must reproduce the above copyright
     48  *    notice, this list of conditions and the following disclaimer in the
     49  *    documentation and/or other materials provided with the distribution.
     50  * 3. Neither the name of the University nor the names of its contributors
     51  *    may be used to endorse or promote products derived from this software
     52  *    without specific prior written permission.
     53  *
     54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     64  * SUCH DAMAGE.
     65  *
     66  *	@(#)machdep.c	7.4 (Berkeley) 6/3/91
     67  */
     68 
     69 #include <sys/cdefs.h>
     70 __KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.795 2017/09/30 11:43:57 maxv Exp $");
     71 
     72 #include "opt_beep.h"
     73 #include "opt_compat_freebsd.h"
     74 #include "opt_compat_netbsd.h"
     75 #include "opt_cpureset_delay.h"
     76 #include "opt_ddb.h"
     77 #include "opt_ipkdb.h"
     78 #include "opt_kgdb.h"
     79 #include "opt_mtrr.h"
     80 #include "opt_modular.h"
     81 #include "opt_multiboot.h"
     82 #include "opt_multiprocessor.h"
     83 #include "opt_physmem.h"
     84 #include "opt_realmem.h"
     85 #include "opt_user_ldt.h"
     86 #include "opt_xen.h"
     87 #include "isa.h"
     88 #include "pci.h"
     89 
     90 #include <sys/param.h>
     91 #include <sys/systm.h>
     92 #include <sys/signal.h>
     93 #include <sys/signalvar.h>
     94 #include <sys/kernel.h>
     95 #include <sys/cpu.h>
     96 #include <sys/exec.h>
     97 #include <sys/fcntl.h>
     98 #include <sys/reboot.h>
     99 #include <sys/conf.h>
    100 #include <sys/kauth.h>
    101 #include <sys/mbuf.h>
    102 #include <sys/msgbuf.h>
    103 #include <sys/mount.h>
    104 #include <sys/syscallargs.h>
    105 #include <sys/core.h>
    106 #include <sys/kcore.h>
    107 #include <sys/ucontext.h>
    108 #include <sys/ras.h>
    109 #include <sys/ksyms.h>
    110 #include <sys/device.h>
    111 
    112 #ifdef IPKDB
    113 #include <ipkdb/ipkdb.h>
    114 #endif
    115 
    116 #ifdef KGDB
    117 #include <sys/kgdb.h>
    118 #endif
    119 
    120 #include <dev/cons.h>
    121 #include <dev/mm.h>
    122 
    123 #include <uvm/uvm.h>
    124 #include <uvm/uvm_page.h>
    125 
    126 #include <sys/sysctl.h>
    127 
    128 #include <machine/cpu.h>
    129 #include <machine/cpufunc.h>
    130 #include <machine/cpuvar.h>
    131 #include <machine/gdt.h>
    132 #include <machine/intr.h>
    133 #include <machine/kcore.h>
    134 #include <machine/pio.h>
    135 #include <machine/psl.h>
    136 #include <machine/reg.h>
    137 #include <machine/specialreg.h>
    138 #include <machine/bootinfo.h>
    139 #include <machine/mtrr.h>
    140 #include <x86/x86/tsc.h>
    141 
    142 #include <x86/fpu.h>
    143 #include <x86/dbregs.h>
    144 #include <x86/machdep.h>
    145 
    146 #include <machine/multiboot.h>
    147 
    148 #ifdef XEN
    149 #include <xen/evtchn.h>
    150 #include <xen/xen.h>
    151 #include <xen/hypervisor.h>
    152 #endif
    153 
    154 #include <dev/isa/isareg.h>
    155 #include <machine/isa_machdep.h>
    156 #include <dev/ic/i8042reg.h>
    157 
    158 #ifdef DDB
    159 #include <machine/db_machdep.h>
    160 #include <ddb/db_extern.h>
    161 #endif
    162 
    163 #include "acpica.h"
    164 #include "bioscall.h"
    165 
    166 #if NBIOSCALL > 0
    167 #include <machine/bioscall.h>
    168 #endif
    169 
    170 #if NACPICA > 0
    171 #include <dev/acpi/acpivar.h>
    172 #define ACPI_MACHDEP_PRIVATE
    173 #include <machine/acpi_machdep.h>
    174 #else
    175 #include <machine/i82489var.h>
    176 #endif
    177 
    178 #include "isa.h"
    179 #include "isadma.h"
    180 #include "ksyms.h"
    181 
    182 #include "cardbus.h"
    183 #if NCARDBUS > 0
    184 /* For rbus_min_start hint. */
    185 #include <sys/bus.h>
    186 #include <dev/cardbus/rbus.h>
    187 #include <machine/rbus_machdep.h>
    188 #endif
    189 
    190 #include "mca.h"
    191 #if NMCA > 0
    192 #include <machine/mca_machdep.h>	/* for mca_busprobe() */
    193 #endif
    194 
    195 #ifdef MULTIPROCESSOR		/* XXX */
    196 #include <machine/mpbiosvar.h>	/* XXX */
    197 #endif				/* XXX */
    198 
    199 /* the following is used externally (sysctl_hw) */
    200 char machine[] = "i386";		/* CPU "architecture" */
    201 char machine_arch[] = "i386";		/* machine == machine_arch */
    202 
    203 #ifdef CPURESET_DELAY
    204 int cpureset_delay = CPURESET_DELAY;
    205 #else
    206 int cpureset_delay = 2000; /* default to 2s */
    207 #endif
    208 
    209 #ifdef MTRR
    210 struct mtrr_funcs *mtrr_funcs;
    211 #endif
    212 
    213 int cpu_class;
    214 int use_pae;
    215 int i386_fpu_fdivbug;
    216 
    217 int i386_use_fxsave;
    218 int i386_has_sse;
    219 int i386_has_sse2;
    220 
    221 struct pool x86_dbregspl;
    222 
    223 vaddr_t idt_vaddr;
    224 paddr_t idt_paddr;
    225 vaddr_t gdt_vaddr;
    226 paddr_t gdt_paddr;
    227 vaddr_t ldt_vaddr;
    228 paddr_t ldt_paddr;
    229 
    230 vaddr_t pentium_idt_vaddr;
    231 
    232 struct vm_map *phys_map = NULL;
    233 
    234 extern struct bootspace bootspace;
    235 
    236 extern paddr_t lowmem_rsvd;
    237 extern paddr_t avail_start, avail_end;
    238 #ifdef XEN
    239 extern paddr_t pmap_pa_start, pmap_pa_end;
    240 void hypervisor_callback(void);
    241 void failsafe_callback(void);
    242 #endif
    243 
    244 #ifdef XEN
    245 void (*delay_func)(unsigned int) = xen_delay;
    246 void (*initclock_func)(void) = xen_initclocks;
    247 #else
    248 void (*delay_func)(unsigned int) = i8254_delay;
    249 void (*initclock_func)(void) = i8254_initclocks;
    250 #endif
    251 
    252 /*
    253  * Size of memory segments, before any memory is stolen.
    254  */
    255 phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX];
    256 int mem_cluster_cnt = 0;
    257 
    258 void init_bootspace(void);
    259 void init386(paddr_t);
    260 void initgdt(union descriptor *);
    261 
    262 static void i386_proc0_pcb_ldt_init(void);
    263 
    264 extern int time_adjusted;
    265 
    266 int *esym;
    267 int *eblob;
    268 extern int boothowto;
    269 
    270 #ifndef XEN
    271 
    272 /* Base memory reported by BIOS. */
    273 #ifndef REALBASEMEM
    274 int biosbasemem = 0;
    275 #else
    276 int biosbasemem = REALBASEMEM;
    277 #endif
    278 
    279 /* Extended memory reported by BIOS. */
    280 #ifndef REALEXTMEM
    281 int biosextmem = 0;
    282 #else
    283 int biosextmem = REALEXTMEM;
    284 #endif
    285 
    286 /* Set if any boot-loader set biosbasemem/biosextmem. */
    287 int biosmem_implicit;
    288 
    289 /*
    290  * Representation of the bootinfo structure constructed by a NetBSD native
    291  * boot loader.  Only be used by native_loader().
    292  */
    293 struct bootinfo_source {
    294 	uint32_t bs_naddrs;
    295 	void *bs_addrs[1]; /* Actually longer. */
    296 };
    297 
    298 /* Only called by locore.S; no need to be in a header file. */
    299 void native_loader(int, int, struct bootinfo_source *, paddr_t, int, int);
    300 
    301 /*
    302  * Called as one of the very first things during system startup (just after
    303  * the boot loader gave control to the kernel image), this routine is in
    304  * charge of retrieving the parameters passed in by the boot loader and
    305  * storing them in the appropriate kernel variables.
    306  *
    307  * WARNING: Because the kernel has not yet relocated itself to KERNBASE,
    308  * special care has to be taken when accessing memory because absolute
    309  * addresses (referring to kernel symbols) do not work.  So:
    310  *
    311  *     1) Avoid jumps to absolute addresses (such as gotos and switches).
    312  *     2) To access global variables use their physical address, which
    313  *        can be obtained using the RELOC macro.
    314  */
    315 void
    316 native_loader(int bl_boothowto, int bl_bootdev,
    317     struct bootinfo_source *bl_bootinfo, paddr_t bl_esym,
    318     int bl_biosextmem, int bl_biosbasemem)
    319 {
    320 #define RELOC(type, x) ((type)((vaddr_t)(x) - KERNBASE))
    321 
    322 	*RELOC(int *, &boothowto) = bl_boothowto;
    323 
    324 	/*
    325 	 * The boot loader provides a physical, non-relocated address
    326 	 * for the symbols table's end.  We need to convert it to a
    327 	 * virtual address.
    328 	 */
    329 	if (bl_esym != 0)
    330 		*RELOC(int **, &esym) = (int *)((vaddr_t)bl_esym + KERNBASE);
    331 	else
    332 		*RELOC(int **, &esym) = 0;
    333 
    334 	/*
    335 	 * Copy bootinfo entries (if any) from the boot loader's
    336 	 * representation to the kernel's bootinfo space.
    337 	 */
    338 	if (bl_bootinfo != NULL) {
    339 		size_t i;
    340 		uint8_t *data;
    341 		struct bootinfo *bidest;
    342 		struct btinfo_modulelist *bi;
    343 
    344 		bidest = RELOC(struct bootinfo *, &bootinfo);
    345 
    346 		data = &bidest->bi_data[0];
    347 
    348 		for (i = 0; i < bl_bootinfo->bs_naddrs; i++) {
    349 			struct btinfo_common *bc;
    350 
    351 			bc = bl_bootinfo->bs_addrs[i];
    352 
    353 			if ((data + bc->len) >
    354 			    (&bidest->bi_data[0] + BOOTINFO_MAXSIZE))
    355 				break;
    356 
    357 			memcpy(data, bc, bc->len);
    358 			/*
    359 			 * If any modules were loaded, record where they
    360 			 * end.  We'll need to skip over them.
    361 			 */
    362 			bi = (struct btinfo_modulelist *)data;
    363 			if (bi->common.type == BTINFO_MODULELIST) {
    364 				*RELOC(int **, &eblob) =
    365 				    (int *)(bi->endpa + KERNBASE);
    366 			}
    367 			data += bc->len;
    368 		}
    369 		bidest->bi_nentries = i;
    370 	}
    371 
    372 	/*
    373 	 * Configure biosbasemem and biosextmem only if they were not
    374 	 * explicitly given during the kernel's build.
    375 	 */
    376 	if (*RELOC(int *, &biosbasemem) == 0) {
    377 		*RELOC(int *, &biosbasemem) = bl_biosbasemem;
    378 		*RELOC(int *, &biosmem_implicit) = 1;
    379 	}
    380 	if (*RELOC(int *, &biosextmem) == 0) {
    381 		*RELOC(int *, &biosextmem) = bl_biosextmem;
    382 		*RELOC(int *, &biosmem_implicit) = 1;
    383 	}
    384 #undef RELOC
    385 }
    386 
    387 #endif /* XEN */
    388 
    389 /*
    390  * Machine-dependent startup code
    391  */
    392 void
    393 cpu_startup(void)
    394 {
    395 	int x, y;
    396 	vaddr_t minaddr, maxaddr;
    397 	psize_t sz;
    398 
    399 	/*
    400 	 * For console drivers that require uvm and pmap to be initialized,
    401 	 * we'll give them one more chance here...
    402 	 */
    403 	consinit();
    404 
    405 	/*
    406 	 * Initialize error message buffer (et end of core).
    407 	 */
    408 	if (msgbuf_p_cnt == 0)
    409 		panic("msgbuf paddr map has not been set up");
    410 	for (x = 0, sz = 0; x < msgbuf_p_cnt; sz += msgbuf_p_seg[x++].sz)
    411 		continue;
    412 
    413 	msgbuf_vaddr = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_VAONLY);
    414 	if (msgbuf_vaddr == 0)
    415 		panic("failed to valloc msgbuf_vaddr");
    416 
    417 	for (y = 0, sz = 0; y < msgbuf_p_cnt; y++) {
    418 		for (x = 0; x < btoc(msgbuf_p_seg[y].sz); x++, sz += PAGE_SIZE)
    419 			pmap_kenter_pa((vaddr_t)msgbuf_vaddr + sz,
    420 			    msgbuf_p_seg[y].paddr + x * PAGE_SIZE,
    421 			    VM_PROT_READ|VM_PROT_WRITE, 0);
    422 	}
    423 
    424 	pmap_update(pmap_kernel());
    425 
    426 	initmsgbuf((void *)msgbuf_vaddr, sz);
    427 
    428 #ifdef MULTIBOOT
    429 	multiboot_print_info();
    430 #endif
    431 
    432 #if NCARDBUS > 0
    433 	/* Tell RBUS how much RAM we have, so it can use heuristics. */
    434 	rbus_min_start_hint(ctob((psize_t)physmem));
    435 #endif
    436 
    437 	minaddr = 0;
    438 
    439 	/*
    440 	 * Allocate a submap for physio
    441 	 */
    442 	phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
    443 	    VM_PHYS_SIZE, 0, false, NULL);
    444 
    445 	/* Say hello. */
    446 	banner();
    447 
    448 	/* Safe for i/o port / memory space allocation to use malloc now. */
    449 #if NISA > 0 || NPCI > 0
    450 	x86_bus_space_mallocok();
    451 #endif
    452 
    453 	gdt_init();
    454 	i386_proc0_pcb_ldt_init();
    455 
    456 #ifndef XEN
    457 	cpu_init_tss(&cpu_info_primary);
    458 	ltr(cpu_info_primary.ci_tss_sel);
    459 #endif
    460 
    461 	x86_startup();
    462 }
    463 
    464 /*
    465  * Set up proc0's PCB and LDT.
    466  */
    467 static void
    468 i386_proc0_pcb_ldt_init(void)
    469 {
    470 	struct lwp *l = &lwp0;
    471 	struct pcb *pcb = lwp_getpcb(l);
    472 
    473 	pcb->pcb_cr0 = rcr0() & ~CR0_TS;
    474 	pcb->pcb_esp0 = uvm_lwp_getuarea(l) + USPACE - 16;
    475 	pcb->pcb_iopl = SEL_KPL;
    476 	l->l_md.md_regs = (struct trapframe *)pcb->pcb_esp0 - 1;
    477 	memcpy(&pcb->pcb_fsd, &gdtstore[GUDATA_SEL], sizeof(pcb->pcb_fsd));
    478 	memcpy(&pcb->pcb_gsd, &gdtstore[GUDATA_SEL], sizeof(pcb->pcb_gsd));
    479 	pcb->pcb_dbregs = NULL;
    480 
    481 #ifndef XEN
    482 	lldt(GSEL(GLDT_SEL, SEL_KPL));
    483 #else
    484 	HYPERVISOR_fpu_taskswitch(1);
    485 	HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_esp0);
    486 #endif
    487 }
    488 
    489 #ifdef XEN
    490 /* used in assembly */
    491 void i386_switch_context(lwp_t *);
    492 void i386_tls_switch(lwp_t *);
    493 
    494 /*
    495  * Switch context:
    496  * - switch stack pointer for user->kernel transition
    497  */
    498 void
    499 i386_switch_context(lwp_t *l)
    500 {
    501 	struct pcb *pcb;
    502 	struct physdev_op physop;
    503 
    504 	pcb = lwp_getpcb(l);
    505 
    506 	HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_esp0);
    507 
    508 	physop.cmd = PHYSDEVOP_SET_IOPL;
    509 	physop.u.set_iopl.iopl = pcb->pcb_iopl;
    510 	HYPERVISOR_physdev_op(&physop);
    511 }
    512 
    513 void
    514 i386_tls_switch(lwp_t *l)
    515 {
    516 	struct cpu_info *ci = curcpu();
    517 	struct pcb *pcb = lwp_getpcb(l);
    518 	/*
    519          * Raise the IPL to IPL_HIGH.
    520 	 * FPU IPIs can alter the LWP's saved cr0.  Dropping the priority
    521 	 * is deferred until mi_switch(), when cpu_switchto() returns.
    522 	 */
    523 	(void)splhigh();
    524 
    525         /*
    526 	 * If our floating point registers are on a different CPU,
    527 	 * set CR0_TS so we'll trap rather than reuse bogus state.
    528 	 */
    529 
    530 	if (l != ci->ci_fpcurlwp) {
    531 		HYPERVISOR_fpu_taskswitch(1);
    532 	}
    533 
    534 	/* Update TLS segment pointers */
    535 	update_descriptor(&ci->ci_gdt[GUFS_SEL],
    536 			  (union descriptor *) &pcb->pcb_fsd);
    537 	update_descriptor(&ci->ci_gdt[GUGS_SEL],
    538 			  (union descriptor *) &pcb->pcb_gsd);
    539 
    540 }
    541 #endif /* XEN */
    542 
    543 /* XXX */
    544 #define IDTVEC(name)	__CONCAT(X, name)
    545 typedef void (vector)(void);
    546 
    547 #ifndef XEN
    548 static void	tss_init(struct i386tss *, void *, void *);
    549 
    550 static void
    551 tss_init(struct i386tss *tss, void *stack, void *func)
    552 {
    553 	KASSERT(curcpu()->ci_pmap == pmap_kernel());
    554 
    555 	memset(tss, 0, sizeof *tss);
    556 	tss->tss_esp0 = tss->tss_esp = (int)((char *)stack + USPACE - 16);
    557 	tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
    558 	tss->__tss_cs = GSEL(GCODE_SEL, SEL_KPL);
    559 	tss->tss_fs = GSEL(GCPU_SEL, SEL_KPL);
    560 	tss->tss_gs = tss->__tss_es = tss->__tss_ds =
    561 	    tss->__tss_ss = GSEL(GDATA_SEL, SEL_KPL);
    562 	/* %cr3 contains the value associated to pmap_kernel */
    563 	tss->tss_cr3 = rcr3();
    564 	tss->tss_esp = (int)((char *)stack + USPACE - 16);
    565 	tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
    566 	tss->__tss_eflags = PSL_MBO | PSL_NT;	/* XXX not needed? */
    567 	tss->__tss_eip = (int)func;
    568 }
    569 
    570 extern vector IDTVEC(tss_trap08);
    571 #if defined(DDB) && defined(MULTIPROCESSOR)
    572 extern vector Xintrddbipi, Xx2apic_intrddbipi;
    573 extern int ddb_vec;
    574 #endif
    575 
    576 void
    577 cpu_set_tss_gates(struct cpu_info *ci)
    578 {
    579 	struct segment_descriptor sd;
    580 	void *doubleflt_stack;
    581 
    582 	doubleflt_stack = (void *)uvm_km_alloc(kernel_map, USPACE, 0,
    583 	    UVM_KMF_WIRED);
    584 	tss_init(&ci->ci_doubleflt_tss, doubleflt_stack, IDTVEC(tss_trap08));
    585 
    586 	setsegment(&sd, &ci->ci_doubleflt_tss, sizeof(struct i386tss) - 1,
    587 	    SDT_SYS386TSS, SEL_KPL, 0, 0);
    588 	ci->ci_gdt[GTRAPTSS_SEL].sd = sd;
    589 
    590 	setgate(&idt[8], NULL, 0, SDT_SYSTASKGT, SEL_KPL,
    591 	    GSEL(GTRAPTSS_SEL, SEL_KPL));
    592 
    593 #if defined(DDB) && defined(MULTIPROCESSOR)
    594 	/*
    595 	 * Set up separate handler for the DDB IPI, so that it doesn't
    596 	 * stomp on a possibly corrupted stack.
    597 	 *
    598 	 * XXX overwriting the gate set in db_machine_init.
    599 	 * Should rearrange the code so that it's set only once.
    600 	 */
    601 	void *ddbipi_stack;
    602 
    603 	ddbipi_stack = (void *)uvm_km_alloc(kernel_map, USPACE, 0,
    604 	    UVM_KMF_WIRED);
    605 	tss_init(&ci->ci_ddbipi_tss, ddbipi_stack,
    606 	    x2apic_mode ? Xx2apic_intrddbipi : Xintrddbipi);
    607 
    608 	setsegment(&sd, &ci->ci_ddbipi_tss, sizeof(struct i386tss) - 1,
    609 	    SDT_SYS386TSS, SEL_KPL, 0, 0);
    610 	ci->ci_gdt[GIPITSS_SEL].sd = sd;
    611 
    612 	setgate(&idt[ddb_vec], NULL, 0, SDT_SYSTASKGT, SEL_KPL,
    613 	    GSEL(GIPITSS_SEL, SEL_KPL));
    614 #endif
    615 }
    616 
    617 /*
    618  * Set up TSS and I/O bitmap.
    619  */
    620 void
    621 cpu_init_tss(struct cpu_info *ci)
    622 {
    623 	struct i386tss *tss = &ci->ci_tss;
    624 
    625 	tss->tss_iobase = IOMAP_INVALOFF << 16;
    626 	tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
    627 	tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
    628 	tss->tss_cr3 = rcr3();
    629 	ci->ci_tss_sel = tss_alloc(tss);
    630 }
    631 #endif /* XEN */
    632 
    633 void *
    634 getframe(struct lwp *l, int sig, int *onstack)
    635 {
    636 	struct proc *p = l->l_proc;
    637 	struct trapframe *tf = l->l_md.md_regs;
    638 
    639 	/* Do we need to jump onto the signal stack? */
    640 	*onstack = (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0
    641 	    && (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
    642 	if (*onstack)
    643 		return (char *)l->l_sigstk.ss_sp + l->l_sigstk.ss_size;
    644 	return (void *)tf->tf_esp;
    645 }
    646 
    647 /*
    648  * Build context to run handler in.  We invoke the handler
    649  * directly, only returning via the trampoline.  Note the
    650  * trampoline version numbers are coordinated with machine-
    651  * dependent code in libc.
    652  */
    653 void
    654 buildcontext(struct lwp *l, int sel, void *catcher, void *fp)
    655 {
    656 	struct trapframe *tf = l->l_md.md_regs;
    657 
    658 	tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
    659 	tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
    660 	tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
    661 	tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
    662 	tf->tf_eip = (int)catcher;
    663 	tf->tf_cs = GSEL(sel, SEL_UPL);
    664 	tf->tf_eflags &= ~PSL_CLEARSIG;
    665 	tf->tf_esp = (int)fp;
    666 	tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
    667 
    668 	/* Ensure FP state is reset. */
    669 	fpu_save_area_reset(l);
    670 }
    671 
    672 void
    673 sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask)
    674 {
    675 	struct lwp *l = curlwp;
    676 	struct proc *p = l->l_proc;
    677 	struct pmap *pmap = vm_map_pmap(&p->p_vmspace->vm_map);
    678 	int sel = pmap->pm_hiexec > I386_MAX_EXE_ADDR ?
    679 	    GUCODEBIG_SEL : GUCODE_SEL;
    680 	struct sigacts *ps = p->p_sigacts;
    681 	int onstack, error;
    682 	int sig = ksi->ksi_signo;
    683 	struct sigframe_siginfo *fp = getframe(l, sig, &onstack), frame;
    684 	sig_t catcher = SIGACTION(p, sig).sa_handler;
    685 
    686 	KASSERT(mutex_owned(p->p_lock));
    687 
    688 	fp--;
    689 
    690 	frame.sf_ra = (int)ps->sa_sigdesc[sig].sd_tramp;
    691 	frame.sf_signum = sig;
    692 	frame.sf_sip = &fp->sf_si;
    693 	frame.sf_ucp = &fp->sf_uc;
    694 	frame.sf_si._info = ksi->ksi_info;
    695 	frame.sf_uc.uc_flags = _UC_SIGMASK|_UC_VM;
    696 	frame.sf_uc.uc_sigmask = *mask;
    697 	frame.sf_uc.uc_link = l->l_ctxlink;
    698 	frame.sf_uc.uc_flags |= (l->l_sigstk.ss_flags & SS_ONSTACK)
    699 	    ? _UC_SETSTACK : _UC_CLRSTACK;
    700 	memset(&frame.sf_uc.uc_stack, 0, sizeof(frame.sf_uc.uc_stack));
    701 
    702 	sendsig_reset(l, sig);
    703 
    704 	mutex_exit(p->p_lock);
    705 	cpu_getmcontext(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags);
    706 	error = copyout(&frame, fp, sizeof(frame));
    707 	mutex_enter(p->p_lock);
    708 
    709 	if (error != 0) {
    710 		/*
    711 		 * Process has trashed its stack; give it an illegal
    712 		 * instruction to halt it in its tracks.
    713 		 */
    714 		sigexit(l, SIGILL);
    715 		/* NOTREACHED */
    716 	}
    717 
    718 	buildcontext(l, sel, catcher, fp);
    719 
    720 	/* Remember that we're now on the signal stack. */
    721 	if (onstack)
    722 		l->l_sigstk.ss_flags |= SS_ONSTACK;
    723 }
    724 
    725 static void
    726 maybe_dump(int howto)
    727 {
    728 	int s;
    729 
    730 	/* Disable interrupts. */
    731 	s = splhigh();
    732 
    733 	/* Do a dump if requested. */
    734 	if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP)
    735 		dumpsys();
    736 
    737 	splx(s);
    738 }
    739 
    740 void
    741 cpu_reboot(int howto, char *bootstr)
    742 {
    743 	static bool syncdone = false;
    744 	int s = IPL_NONE;
    745 
    746 	if (cold) {
    747 		howto |= RB_HALT;
    748 		goto haltsys;
    749 	}
    750 
    751 	boothowto = howto;
    752 
    753 	/* XXX used to dump after vfs_shutdown() and before
    754 	 * detaching devices / shutdown hooks / pmf_system_shutdown().
    755 	 */
    756 	maybe_dump(howto);
    757 
    758 	/*
    759 	 * If we've panic'd, don't make the situation potentially
    760 	 * worse by syncing or unmounting the file systems.
    761 	 */
    762 	if ((howto & RB_NOSYNC) == 0 && panicstr == NULL) {
    763 		if (!syncdone) {
    764 			syncdone = true;
    765 			/* XXX used to force unmount as well, here */
    766 			vfs_sync_all(curlwp);
    767 			/*
    768 			 * If we've been adjusting the clock, the todr
    769 			 * will be out of synch; adjust it now.
    770 			 *
    771 			 * XXX used to do this after unmounting all
    772 			 * filesystems with vfs_shutdown().
    773 			 */
    774 			if (time_adjusted != 0)
    775 				resettodr();
    776 		}
    777 
    778 		while (vfs_unmountall1(curlwp, false, false) ||
    779 		       config_detach_all(boothowto) ||
    780 		       vfs_unmount_forceone(curlwp))
    781 			;	/* do nothing */
    782 	} else
    783 		suspendsched();
    784 
    785 	pmf_system_shutdown(boothowto);
    786 
    787 	s = splhigh();
    788 
    789 	/* amd64 maybe_dump() */
    790 
    791 haltsys:
    792 	doshutdownhooks();
    793 
    794 	if ((howto & RB_POWERDOWN) == RB_POWERDOWN) {
    795 #if NACPICA > 0
    796 		if (s != IPL_NONE)
    797 			splx(s);
    798 
    799 		acpi_enter_sleep_state(ACPI_STATE_S5);
    800 #else
    801 		__USE(s);
    802 #endif
    803 #ifdef XEN
    804 		HYPERVISOR_shutdown();
    805 		for (;;);
    806 #endif
    807 	}
    808 
    809 #ifdef MULTIPROCESSOR
    810 	cpu_broadcast_halt();
    811 #endif /* MULTIPROCESSOR */
    812 
    813 	if (howto & RB_HALT) {
    814 #if NACPICA > 0
    815 		acpi_disable();
    816 #endif
    817 
    818 		printf("\n");
    819 		printf("The operating system has halted.\n");
    820 		printf("Please press any key to reboot.\n\n");
    821 
    822 #ifdef BEEP_ONHALT
    823 		{
    824 			int c;
    825 			for (c = BEEP_ONHALT_COUNT; c > 0; c--) {
    826 				sysbeep(BEEP_ONHALT_PITCH,
    827 					BEEP_ONHALT_PERIOD * hz / 1000);
    828 				delay(BEEP_ONHALT_PERIOD * 1000);
    829 				sysbeep(0, BEEP_ONHALT_PERIOD * hz / 1000);
    830 				delay(BEEP_ONHALT_PERIOD * 1000);
    831 			}
    832 		}
    833 #endif
    834 
    835 		cnpollc(1);	/* for proper keyboard command handling */
    836 		if (cngetc() == 0) {
    837 			/* no console attached, so just hlt */
    838 			printf("No keyboard - cannot reboot after all.\n");
    839 			for(;;) {
    840 				x86_hlt();
    841 			}
    842 		}
    843 		cnpollc(0);
    844 	}
    845 
    846 	printf("rebooting...\n");
    847 	if (cpureset_delay > 0)
    848 		delay(cpureset_delay * 1000);
    849 	cpu_reset();
    850 	for(;;) ;
    851 	/*NOTREACHED*/
    852 }
    853 
    854 /*
    855  * Clear registers on exec
    856  */
    857 void
    858 setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
    859 {
    860 	struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
    861 	struct pcb *pcb = lwp_getpcb(l);
    862 	struct trapframe *tf;
    863 
    864 #ifdef USER_LDT
    865 	pmap_ldt_cleanup(l);
    866 #endif
    867 
    868 	fpu_save_area_clear(l, pack->ep_osversion >= 699002600
    869 	    ? __INITIAL_NPXCW__ : __NetBSD_COMPAT_NPXCW__);
    870 
    871 	memcpy(&pcb->pcb_fsd, &gdtstore[GUDATA_SEL], sizeof(pcb->pcb_fsd));
    872 	memcpy(&pcb->pcb_gsd, &gdtstore[GUDATA_SEL], sizeof(pcb->pcb_gsd));
    873 	if (pcb->pcb_dbregs != NULL) {
    874 		pool_put(&x86_dbregspl, pcb->pcb_dbregs);
    875 		pcb->pcb_dbregs = NULL;
    876 	}
    877 
    878 	tf = l->l_md.md_regs;
    879 	tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
    880 	tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
    881 	tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL);
    882 	tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL);
    883 	tf->tf_edi = 0;
    884 	tf->tf_esi = 0;
    885 	tf->tf_ebp = 0;
    886 	tf->tf_ebx = l->l_proc->p_psstrp;
    887 	tf->tf_edx = 0;
    888 	tf->tf_ecx = 0;
    889 	tf->tf_eax = 0;
    890 	tf->tf_eip = pack->ep_entry;
    891 	tf->tf_cs = pmap->pm_hiexec > I386_MAX_EXE_ADDR ?
    892 	    LSEL(LUCODEBIG_SEL, SEL_UPL) : LSEL(LUCODE_SEL, SEL_UPL);
    893 	tf->tf_eflags = PSL_USERSET;
    894 	tf->tf_esp = stack;
    895 	tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL);
    896 }
    897 
    898 /*
    899  * Initialize segments and descriptor tables
    900  */
    901 
    902 union descriptor *gdtstore, *ldtstore;
    903 union descriptor *pentium_idt;
    904 extern vaddr_t lwp0uarea;
    905 
    906 void
    907 setgate(struct gate_descriptor *gd, void *func, int args, int type, int dpl,
    908     int sel)
    909 {
    910 
    911 	gd->gd_looffset = (int)func;
    912 	gd->gd_selector = sel;
    913 	gd->gd_stkcpy = args;
    914 	gd->gd_xx = 0;
    915 	gd->gd_type = type;
    916 	gd->gd_dpl = dpl;
    917 	gd->gd_p = 1;
    918 	gd->gd_hioffset = (int)func >> 16;
    919 }
    920 
    921 void
    922 unsetgate(struct gate_descriptor *gd)
    923 {
    924 	gd->gd_p = 0;
    925 	gd->gd_hioffset = 0;
    926 	gd->gd_looffset = 0;
    927 	gd->gd_selector = 0;
    928 	gd->gd_xx = 0;
    929 	gd->gd_stkcpy = 0;
    930 	gd->gd_type = 0;
    931 	gd->gd_dpl = 0;
    932 }
    933 
    934 void
    935 setregion(struct region_descriptor *rd, void *base, size_t limit)
    936 {
    937 
    938 	rd->rd_limit = (int)limit;
    939 	rd->rd_base = (int)base;
    940 }
    941 
    942 void
    943 setsegment(struct segment_descriptor *sd, const void *base, size_t limit,
    944     int type, int dpl, int def32, int gran)
    945 {
    946 
    947 	sd->sd_lolimit = (int)limit;
    948 	sd->sd_lobase = (int)base;
    949 	sd->sd_type = type;
    950 	sd->sd_dpl = dpl;
    951 	sd->sd_p = 1;
    952 	sd->sd_hilimit = (int)limit >> 16;
    953 	sd->sd_xx = 0;
    954 	sd->sd_def32 = def32;
    955 	sd->sd_gran = gran;
    956 	sd->sd_hibase = (int)base >> 24;
    957 }
    958 
    959 /* XXX */
    960 extern vector IDTVEC(syscall);
    961 extern vector *IDTVEC(exceptions)[];
    962 #ifdef XEN
    963 #define MAX_XEN_IDT 128
    964 trap_info_t xen_idt[MAX_XEN_IDT];
    965 int xen_idt_idx;
    966 extern union descriptor tmpgdt[];
    967 #endif
    968 
    969 void
    970 cpu_init_idt(void)
    971 {
    972 #ifndef XEN
    973 	struct region_descriptor region;
    974 	setregion(&region, pentium_idt, NIDT * sizeof(idt[0]) - 1);
    975 	lidt(&region);
    976 #else
    977 	if (HYPERVISOR_set_trap_table(xen_idt))
    978 		panic("HYPERVISOR_set_trap_table %p failed\n", xen_idt);
    979 #endif
    980 }
    981 
    982 void
    983 initgdt(union descriptor *tgdt)
    984 {
    985 	KASSERT(tgdt != NULL);
    986 
    987 	gdtstore = tgdt;
    988 #ifdef XEN
    989 	u_long	frames[16];
    990 #else
    991 	struct region_descriptor region;
    992 	memset(gdtstore, 0, NGDT * sizeof(*gdtstore));
    993 #endif
    994 
    995 	/* make gdt gates and memory segments */
    996 	setsegment(&gdtstore[GCODE_SEL].sd, 0, 0xfffff,
    997 	    SDT_MEMERA, SEL_KPL, 1, 1);
    998 	setsegment(&gdtstore[GDATA_SEL].sd, 0, 0xfffff,
    999 	    SDT_MEMRWA, SEL_KPL, 1, 1);
   1000 	setsegment(&gdtstore[GUCODE_SEL].sd, 0, x86_btop(I386_MAX_EXE_ADDR) - 1,
   1001 	    SDT_MEMERA, SEL_UPL, 1, 1);
   1002 	setsegment(&gdtstore[GUCODEBIG_SEL].sd, 0, 0xfffff,
   1003 	    SDT_MEMERA, SEL_UPL, 1, 1);
   1004 	setsegment(&gdtstore[GUDATA_SEL].sd, 0, 0xfffff,
   1005 	    SDT_MEMRWA, SEL_UPL, 1, 1);
   1006 #if NBIOSCALL > 0
   1007 	/* bios trampoline GDT entries */
   1008 	setsegment(&gdtstore[GBIOSCODE_SEL].sd, 0, 0xfffff,
   1009 	    SDT_MEMERA, SEL_KPL, 0, 0);
   1010 	setsegment(&gdtstore[GBIOSDATA_SEL].sd, 0, 0xfffff,
   1011 	    SDT_MEMRWA, SEL_KPL, 0, 0);
   1012 #endif
   1013 	setsegment(&gdtstore[GCPU_SEL].sd, &cpu_info_primary,
   1014 	    sizeof(struct cpu_info) - 1, SDT_MEMRWA, SEL_KPL, 1, 0);
   1015 
   1016 #ifndef XEN
   1017 	setregion(&region, gdtstore, NGDT * sizeof(gdtstore[0]) - 1);
   1018 	lgdt(&region);
   1019 #else /* !XEN */
   1020 	/*
   1021 	 * We jumpstart the bootstrap process a bit so we can update
   1022 	 * page permissions. This is done redundantly later from
   1023 	 * x86_xpmap.c:xen_locore() - harmless.
   1024 	 */
   1025 	xpmap_phys_to_machine_mapping =
   1026 	    (unsigned long *)xen_start_info.mfn_list;
   1027 
   1028 	frames[0] = xpmap_ptom((uint32_t)gdtstore - KERNBASE) >> PAGE_SHIFT;
   1029 	{	/*
   1030 		 * Enter the gdt page RO into the kernel map. We can't
   1031 		 * use pmap_kenter_pa() here, because %fs is not
   1032 		 * usable until the gdt is loaded, and %fs is used as
   1033 		 * the base pointer for curcpu() and curlwp(), both of
   1034 		 * which are in the callpath of pmap_kenter_pa().
   1035 		 * So we mash up our own - this is MD code anyway.
   1036 		 */
   1037 		extern pt_entry_t xpmap_pg_nx;
   1038 		pt_entry_t pte;
   1039 
   1040 		pte = pmap_pa2pte((vaddr_t)gdtstore - KERNBASE);
   1041 		pte |= PG_RO | xpmap_pg_nx | PG_V;
   1042 
   1043 		if (HYPERVISOR_update_va_mapping((vaddr_t)gdtstore, pte,
   1044 		    UVMF_INVLPG) < 0) {
   1045 			panic("gdt page RO update failed.\n");
   1046 		}
   1047 	}
   1048 
   1049 	if (HYPERVISOR_set_gdt(frames, NGDT /* XXX is it right ? */))
   1050 		panic("HYPERVISOR_set_gdt failed!\n");
   1051 
   1052 	lgdt_finish();
   1053 #endif /* !XEN */
   1054 }
   1055 
   1056 #ifndef XEN
   1057 static void
   1058 init386_pte0(void)
   1059 {
   1060 	paddr_t paddr;
   1061 	vaddr_t vaddr;
   1062 
   1063 	paddr = 4 * PAGE_SIZE;
   1064 	vaddr = (vaddr_t)vtopte(0);
   1065 	pmap_kenter_pa(vaddr, paddr, VM_PROT_ALL, 0);
   1066 	pmap_update(pmap_kernel());
   1067 	/* make sure it is clean before using */
   1068 	memset((void *)vaddr, 0, PAGE_SIZE);
   1069 }
   1070 #endif /* !XEN */
   1071 
   1072 static void
   1073 init386_ksyms(void)
   1074 {
   1075 #if NKSYMS || defined(DDB) || defined(MODULAR)
   1076 	extern int end;
   1077 	struct btinfo_symtab *symtab;
   1078 
   1079 #ifdef DDB
   1080 	db_machine_init();
   1081 #endif
   1082 
   1083 #if defined(MULTIBOOT)
   1084 	if (multiboot_ksyms_addsyms_elf())
   1085 		return;
   1086 #endif
   1087 
   1088 	if ((symtab = lookup_bootinfo(BTINFO_SYMTAB)) == NULL) {
   1089 		ksyms_addsyms_elf(*(int *)&end, ((int *)&end) + 1, esym);
   1090 		return;
   1091 	}
   1092 
   1093 	symtab->ssym += KERNBASE;
   1094 	symtab->esym += KERNBASE;
   1095 	ksyms_addsyms_elf(symtab->nsym, (int *)symtab->ssym, (int *)symtab->esym);
   1096 #endif
   1097 }
   1098 
   1099 void
   1100 init_bootspace(void)
   1101 {
   1102 	extern char __rodata_start;
   1103 	extern char __data_start;
   1104 	extern char __kernel_end;
   1105 
   1106 	memset(&bootspace, 0, sizeof(bootspace));
   1107 
   1108 	bootspace.text.va = KERNTEXTOFF;
   1109 	bootspace.text.pa = KERNTEXTOFF - KERNBASE;
   1110 	bootspace.text.sz = (size_t)&__rodata_start - KERNTEXTOFF;
   1111 
   1112 	bootspace.rodata.va = (vaddr_t)&__rodata_start;
   1113 	bootspace.rodata.pa = (paddr_t)(vaddr_t)&__rodata_start - KERNBASE;
   1114 	bootspace.rodata.sz = (size_t)&__data_start - (size_t)&__rodata_start;
   1115 
   1116 	bootspace.data.va = (vaddr_t)&__data_start;
   1117 	bootspace.data.pa = (paddr_t)(vaddr_t)&__data_start - KERNBASE;
   1118 	bootspace.data.sz = (size_t)&__kernel_end - (size_t)&__data_start;
   1119 
   1120 	bootspace.boot.va = (vaddr_t)&__kernel_end;
   1121 	bootspace.boot.pa = (paddr_t)(vaddr_t)&__kernel_end - KERNBASE;
   1122 	bootspace.boot.sz = (size_t)(atdevbase + IOM_SIZE) -
   1123 	    (size_t)&__kernel_end;
   1124 
   1125 	/* Virtual address of the top level page */
   1126 	bootspace.pdir = (vaddr_t)(PDPpaddr + KERNBASE);
   1127 }
   1128 
   1129 void
   1130 init386(paddr_t first_avail)
   1131 {
   1132 	extern void consinit(void);
   1133 	int x;
   1134 #ifndef XEN
   1135 	extern paddr_t local_apic_pa;
   1136 	union descriptor *tgdt;
   1137 	struct region_descriptor region;
   1138 #endif
   1139 #if NBIOSCALL > 0
   1140 	extern int biostramp_image_size;
   1141 	extern u_char biostramp_image[];
   1142 #endif
   1143 	struct pcb *pcb;
   1144 
   1145 	KASSERT(first_avail % PAGE_SIZE == 0);
   1146 
   1147 #ifdef XEN
   1148 	KASSERT(HYPERVISOR_shared_info != NULL);
   1149 	cpu_info_primary.ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[0];
   1150 #endif
   1151 
   1152 	uvm_lwp_setuarea(&lwp0, lwp0uarea);
   1153 
   1154 	cpu_probe(&cpu_info_primary);
   1155 	cpu_init_msrs(&cpu_info_primary, true);
   1156 
   1157 #ifdef PAE
   1158 	use_pae = 1;
   1159 #else
   1160 	use_pae = 0;
   1161 #endif
   1162 
   1163 	pcb = lwp_getpcb(&lwp0);
   1164 #ifdef XEN
   1165 	pcb->pcb_cr3 = PDPpaddr;
   1166 #endif
   1167 
   1168 #if defined(PAE) && !defined(XEN)
   1169 	/*
   1170 	 * Save VA and PA of L3 PD of boot processor (for Xen, this is done
   1171 	 * in xen_locore())
   1172 	 */
   1173 	cpu_info_primary.ci_pae_l3_pdirpa = rcr3();
   1174 	cpu_info_primary.ci_pae_l3_pdir = (pd_entry_t *)(rcr3() + KERNBASE);
   1175 #endif
   1176 
   1177 	uvm_md_init();
   1178 
   1179 	/*
   1180 	 * Start with 2 color bins -- this is just a guess to get us
   1181 	 * started.  We'll recolor when we determine the largest cache
   1182 	 * sizes on the system.
   1183 	 */
   1184 	uvmexp.ncolors = 2;
   1185 
   1186 	avail_start = first_avail;
   1187 
   1188 #ifndef XEN
   1189 	/*
   1190 	 * Low memory reservations:
   1191 	 * Page 0:	BIOS data
   1192 	 * Page 1:	BIOS callback
   1193 	 * Page 2:	MP bootstrap code (MP_TRAMPOLINE)
   1194 	 * Page 3:	ACPI wakeup code (ACPI_WAKEUP_ADDR)
   1195 	 * Page 4:	Temporary page table for 0MB-4MB
   1196 	 * Page 5:	Temporary page directory
   1197 	 */
   1198 	lowmem_rsvd = 6 * PAGE_SIZE;
   1199 #else /* !XEN */
   1200 	/* Parse Xen command line (replace bootinfo) */
   1201 	xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL);
   1202 
   1203 	/* Use the dummy page as a gdt */
   1204 	extern vaddr_t xen_dummy_page;
   1205 	gdtstore = (void *)xen_dummy_page;
   1206 
   1207 	/* Determine physical address space */
   1208 	avail_end = ctob((paddr_t)xen_start_info.nr_pages);
   1209 	pmap_pa_start = (KERNTEXTOFF - KERNBASE);
   1210 	pmap_pa_end = pmap_pa_start + ctob((paddr_t)xen_start_info.nr_pages);
   1211 	mem_clusters[0].start = avail_start;
   1212 	mem_clusters[0].size = avail_end - avail_start;
   1213 	mem_cluster_cnt++;
   1214 	physmem += xen_start_info.nr_pages;
   1215 	uvmexp.wired += atop(avail_start);
   1216 
   1217 	/*
   1218 	 * initgdt() has to be done before consinit(), so that %fs is properly
   1219 	 * initialised. initgdt() uses pmap_kenter_pa so it can't be called
   1220 	 * before the above variables are set.
   1221 	 */
   1222 	initgdt(gdtstore);
   1223 
   1224 	mutex_init(&pte_lock, MUTEX_DEFAULT, IPL_VM);
   1225 #endif /* XEN */
   1226 
   1227 #if NISA > 0 || NPCI > 0
   1228 	x86_bus_space_init();
   1229 #endif
   1230 
   1231 	consinit();	/* XXX SHOULD NOT BE DONE HERE */
   1232 
   1233 #ifdef DEBUG_MEMLOAD
   1234 	printf("mem_cluster_count: %d\n", mem_cluster_cnt);
   1235 #endif
   1236 
   1237 	/*
   1238 	 * Call pmap initialization to make new kernel address space.
   1239 	 * We must do this before loading pages into the VM system.
   1240 	 */
   1241 	pmap_bootstrap((vaddr_t)atdevbase + IOM_SIZE);
   1242 
   1243 #ifndef XEN
   1244 	/* Initialize the memory clusters. */
   1245 	init_x86_clusters();
   1246 
   1247 	/* Internalize the physical pages into the VM system. */
   1248 	init_x86_vm(avail_start);
   1249 #else /* !XEN */
   1250 	uvm_page_physload(atop(avail_start), atop(avail_end),
   1251 	    atop(avail_start), atop(avail_end),
   1252 	    VM_FREELIST_DEFAULT);
   1253 
   1254 	/* Reclaim the boot gdt page - see locore.s */
   1255 	{
   1256 		extern pt_entry_t xpmap_pg_nx;
   1257 		pt_entry_t pte;
   1258 
   1259 		pte = pmap_pa2pte((vaddr_t)tmpgdt - KERNBASE);
   1260 		pte |= PG_RW | xpmap_pg_nx | PG_V;
   1261 
   1262 		if (HYPERVISOR_update_va_mapping((vaddr_t)tmpgdt, pte, UVMF_INVLPG) < 0) {
   1263 			panic("tmpgdt page relaim RW update failed.\n");
   1264 		}
   1265 	}
   1266 #endif /* !XEN */
   1267 
   1268 	init_x86_msgbuf();
   1269 
   1270 #if !defined(XEN) && NBIOSCALL > 0
   1271 	/*
   1272 	 * XXX Remove this
   1273 	 *
   1274 	 * Setup a temporary Page Table Entry to allow identity mappings of
   1275 	 * the real mode address. This is required by bioscall.
   1276 	 */
   1277 	init386_pte0();
   1278 
   1279 	KASSERT(biostramp_image_size <= PAGE_SIZE);
   1280 	pmap_kenter_pa((vaddr_t)BIOSTRAMP_BASE, (paddr_t)BIOSTRAMP_BASE,
   1281 	    VM_PROT_ALL, 0);
   1282 	pmap_update(pmap_kernel());
   1283 	memcpy((void *)BIOSTRAMP_BASE, biostramp_image, biostramp_image_size);
   1284 
   1285 	/* Needed early, for bioscall() */
   1286 	cpu_info_primary.ci_pmap = pmap_kernel();
   1287 #endif
   1288 
   1289 #ifndef XEN
   1290 	pmap_kenter_pa(local_apic_va, local_apic_pa,
   1291 	    VM_PROT_READ|VM_PROT_WRITE, 0);
   1292 	pmap_update(pmap_kernel());
   1293 	memset((void *)local_apic_va, 0, PAGE_SIZE);
   1294 #endif
   1295 
   1296 	pmap_kenter_pa(idt_vaddr, idt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
   1297 	pmap_kenter_pa(gdt_vaddr, gdt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
   1298 	pmap_kenter_pa(ldt_vaddr, ldt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
   1299 	pmap_update(pmap_kernel());
   1300 	memset((void *)idt_vaddr, 0, PAGE_SIZE);
   1301 	memset((void *)gdt_vaddr, 0, PAGE_SIZE);
   1302 	memset((void *)ldt_vaddr, 0, PAGE_SIZE);
   1303 
   1304 #ifndef XEN
   1305 	pmap_kenter_pa(pentium_idt_vaddr, idt_paddr, VM_PROT_READ, 0);
   1306 	pmap_update(pmap_kernel());
   1307 	pentium_idt = (union descriptor *)pentium_idt_vaddr;
   1308 
   1309 	tgdt = gdtstore;
   1310 	idt = (struct gate_descriptor *)idt_vaddr;
   1311 	gdtstore = (union descriptor *)gdt_vaddr;
   1312 	ldtstore = (union descriptor *)ldt_vaddr;
   1313 
   1314 	memcpy(gdtstore, tgdt, NGDT * sizeof(*gdtstore));
   1315 
   1316 	setsegment(&gdtstore[GLDT_SEL].sd, ldtstore,
   1317 	    NLDT * sizeof(ldtstore[0]) - 1, SDT_SYSLDT, SEL_KPL, 0, 0);
   1318 #else
   1319 	HYPERVISOR_set_callbacks(
   1320 	    GSEL(GCODE_SEL, SEL_KPL), (unsigned long)hypervisor_callback,
   1321 	    GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback);
   1322 
   1323 	ldtstore = (union descriptor *)idt_vaddr;
   1324 #endif /* XEN */
   1325 
   1326 	/* make ldt gates and memory segments */
   1327 	ldtstore[LUCODE_SEL] = gdtstore[GUCODE_SEL];
   1328 	ldtstore[LUCODEBIG_SEL] = gdtstore[GUCODEBIG_SEL];
   1329 	ldtstore[LUDATA_SEL] = gdtstore[GUDATA_SEL];
   1330 
   1331 #ifndef XEN
   1332 	/* exceptions */
   1333 	for (x = 0; x < 32; x++) {
   1334 		idt_vec_reserve(x);
   1335 		setgate(&idt[x], IDTVEC(exceptions)[x], 0, SDT_SYS386IGT,
   1336 		    (x == 3 || x == 4) ? SEL_UPL : SEL_KPL,
   1337 		    GSEL(GCODE_SEL, SEL_KPL));
   1338 	}
   1339 
   1340 	/* new-style interrupt gate for syscalls */
   1341 	idt_vec_reserve(128);
   1342 	setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386IGT, SEL_UPL,
   1343 	    GSEL(GCODE_SEL, SEL_KPL));
   1344 
   1345 	setregion(&region, gdtstore, NGDT * sizeof(gdtstore[0]) - 1);
   1346 	lgdt(&region);
   1347 
   1348 	cpu_init_idt();
   1349 #else /* !XEN */
   1350 	memset(xen_idt, 0, sizeof(trap_info_t) * MAX_XEN_IDT);
   1351 	xen_idt_idx = 0;
   1352 	for (x = 0; x < 32; x++) {
   1353 		KASSERT(xen_idt_idx < MAX_XEN_IDT);
   1354 		xen_idt[xen_idt_idx].vector = x;
   1355 
   1356 		switch (x) {
   1357 		case 2:  /* NMI */
   1358 		case 18: /* MCA */
   1359 			TI_SET_IF(&(xen_idt[xen_idt_idx]), 2);
   1360 			break;
   1361 		case 3:
   1362 		case 4:
   1363 			xen_idt[xen_idt_idx].flags = SEL_UPL;
   1364 			break;
   1365 		default:
   1366 			xen_idt[xen_idt_idx].flags = SEL_XEN;
   1367 			break;
   1368 		}
   1369 
   1370 		xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL);
   1371 		xen_idt[xen_idt_idx].address =
   1372 			(uint32_t)IDTVEC(exceptions)[x];
   1373 		xen_idt_idx++;
   1374 	}
   1375 	KASSERT(xen_idt_idx < MAX_XEN_IDT);
   1376 	xen_idt[xen_idt_idx].vector = 128;
   1377 	xen_idt[xen_idt_idx].flags = SEL_UPL;
   1378 	xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL);
   1379 	xen_idt[xen_idt_idx].address = (uint32_t)&IDTVEC(syscall);
   1380 	xen_idt_idx++;
   1381 	KASSERT(xen_idt_idx < MAX_XEN_IDT);
   1382 	lldt(GSEL(GLDT_SEL, SEL_KPL));
   1383 	cpu_init_idt();
   1384 #endif /* XEN */
   1385 
   1386 	init386_ksyms();
   1387 
   1388 #if NMCA > 0
   1389 	/* check for MCA bus, needed to be done before ISA stuff - if
   1390 	 * MCA is detected, ISA needs to use level triggered interrupts
   1391 	 * by default */
   1392 	mca_busprobe();
   1393 #endif
   1394 
   1395 #ifdef XEN
   1396 	events_default_setup();
   1397 #else
   1398 	intr_default_setup();
   1399 #endif
   1400 
   1401 	splraise(IPL_HIGH);
   1402 	x86_enable_intr();
   1403 
   1404 #ifdef DDB
   1405 	if (boothowto & RB_KDB)
   1406 		Debugger();
   1407 #endif
   1408 #ifdef IPKDB
   1409 	ipkdb_init();
   1410 	if (boothowto & RB_KDB)
   1411 		ipkdb_connect(0);
   1412 #endif
   1413 #ifdef KGDB
   1414 	kgdb_port_init();
   1415 	if (boothowto & RB_KDB) {
   1416 		kgdb_debug_init = 1;
   1417 		kgdb_connect(1);
   1418 	}
   1419 #endif
   1420 
   1421 	if (physmem < btoc(2 * 1024 * 1024)) {
   1422 		printf("warning: too little memory available; "
   1423 		       "have %lu bytes, want %lu bytes\n"
   1424 		       "running in degraded mode\n"
   1425 		       "press a key to confirm\n\n",
   1426 		       (unsigned long)ptoa(physmem), 2*1024*1024UL);
   1427 		cngetc();
   1428 	}
   1429 
   1430 	pcb->pcb_dbregs = NULL;
   1431 
   1432 	x86_dbregs_setup_initdbstate();
   1433 
   1434 	pool_init(&x86_dbregspl, sizeof(struct dbreg), 16, 0, 0, "dbregs",
   1435 	    NULL, IPL_NONE);
   1436 }
   1437 
   1438 #include <dev/ic/mc146818reg.h>		/* for NVRAM POST */
   1439 #include <i386/isa/nvram.h>		/* for NVRAM POST */
   1440 
   1441 void
   1442 cpu_reset(void)
   1443 {
   1444 #ifdef XEN
   1445 	HYPERVISOR_reboot();
   1446 	for (;;);
   1447 #else /* XEN */
   1448 	struct region_descriptor region;
   1449 
   1450 	x86_disable_intr();
   1451 
   1452 	/*
   1453 	 * Ensure the NVRAM reset byte contains something vaguely sane.
   1454 	 */
   1455 
   1456 	outb(IO_RTC, NVRAM_RESET);
   1457 	outb(IO_RTC+1, NVRAM_RESET_RST);
   1458 
   1459 	/*
   1460 	 * Reset AMD Geode SC1100.
   1461 	 *
   1462 	 * 1) Write PCI Configuration Address Register (0xcf8) to
   1463 	 *    select Function 0, Register 0x44: Bridge Configuration,
   1464 	 *    GPIO and LPC Configuration Register Space, Reset
   1465 	 *    Control Register.
   1466 	 *
   1467 	 * 2) Write 0xf to PCI Configuration Data Register (0xcfc)
   1468 	 *    to reset IDE controller, IDE bus, and PCI bus, and
   1469 	 *    to trigger a system-wide reset.
   1470 	 *
   1471 	 * See AMD Geode SC1100 Processor Data Book, Revision 2.0,
   1472 	 * sections 6.3.1, 6.3.2, and 6.4.1.
   1473 	 */
   1474 	if (cpu_info_primary.ci_signature == 0x540) {
   1475 		outl(0xcf8, 0x80009044);
   1476 		outl(0xcfc, 0xf);
   1477 	}
   1478 
   1479 	x86_reset();
   1480 
   1481 	/*
   1482 	 * Try to cause a triple fault and watchdog reset by making the IDT
   1483 	 * invalid and causing a fault.
   1484 	 */
   1485 	memset((void *)idt, 0, NIDT * sizeof(idt[0]));
   1486 	setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
   1487 	lidt(&region);
   1488 	breakpoint();
   1489 
   1490 #if 0
   1491 	/*
   1492 	 * Try to cause a triple fault and watchdog reset by unmapping the
   1493 	 * entire address space and doing a TLB flush.
   1494 	 */
   1495 	memset((void *)PTD, 0, PAGE_SIZE);
   1496 	tlbflush();
   1497 #endif
   1498 
   1499 	for (;;);
   1500 #endif /* XEN */
   1501 }
   1502 
   1503 void
   1504 cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags)
   1505 {
   1506 	const struct trapframe *tf = l->l_md.md_regs;
   1507 	__greg_t *gr = mcp->__gregs;
   1508 	__greg_t ras_eip;
   1509 
   1510 	/* Save register context. */
   1511 	gr[_REG_GS]  = tf->tf_gs;
   1512 	gr[_REG_FS]  = tf->tf_fs;
   1513 	gr[_REG_ES]  = tf->tf_es;
   1514 	gr[_REG_DS]  = tf->tf_ds;
   1515 	gr[_REG_EFL] = tf->tf_eflags;
   1516 
   1517 	gr[_REG_EDI]    = tf->tf_edi;
   1518 	gr[_REG_ESI]    = tf->tf_esi;
   1519 	gr[_REG_EBP]    = tf->tf_ebp;
   1520 	gr[_REG_EBX]    = tf->tf_ebx;
   1521 	gr[_REG_EDX]    = tf->tf_edx;
   1522 	gr[_REG_ECX]    = tf->tf_ecx;
   1523 	gr[_REG_EAX]    = tf->tf_eax;
   1524 	gr[_REG_EIP]    = tf->tf_eip;
   1525 	gr[_REG_CS]     = tf->tf_cs;
   1526 	gr[_REG_ESP]    = tf->tf_esp;
   1527 	gr[_REG_UESP]   = tf->tf_esp;
   1528 	gr[_REG_SS]     = tf->tf_ss;
   1529 	gr[_REG_TRAPNO] = tf->tf_trapno;
   1530 	gr[_REG_ERR]    = tf->tf_err;
   1531 
   1532 	if ((ras_eip = (__greg_t)ras_lookup(l->l_proc,
   1533 	    (void *) gr[_REG_EIP])) != -1)
   1534 		gr[_REG_EIP] = ras_eip;
   1535 
   1536 	*flags |= _UC_CPU;
   1537 
   1538 	mcp->_mc_tlsbase = (uintptr_t)l->l_private;
   1539 	*flags |= _UC_TLSBASE;
   1540 
   1541 	/*
   1542 	 * Save floating point register context.
   1543 	 *
   1544 	 * If the cpu doesn't support fxsave we must still write to
   1545 	 * the entire 512 byte area - otherwise we leak kernel memory
   1546 	 * contents to userspace.
   1547 	 * It wouldn't matter if we were doing the copyout here.
   1548 	 * So we might as well convert to fxsave format.
   1549 	 */
   1550 	__CTASSERT(sizeof (struct fxsave) ==
   1551 	    sizeof mcp->__fpregs.__fp_reg_set.__fp_xmm_state);
   1552 	process_read_fpregs_xmm(l, (struct fxsave *)
   1553 	    &mcp->__fpregs.__fp_reg_set.__fp_xmm_state);
   1554 	memset(&mcp->__fpregs.__fp_pad, 0, sizeof mcp->__fpregs.__fp_pad);
   1555 	*flags |= _UC_FXSAVE | _UC_FPU;
   1556 }
   1557 
   1558 int
   1559 cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp)
   1560 {
   1561 	const __greg_t *gr = mcp->__gregs;
   1562 	struct trapframe *tf = l->l_md.md_regs;
   1563 
   1564 	/*
   1565 	 * Check for security violations.  If we're returning
   1566 	 * to protected mode, the CPU will validate the segment
   1567 	 * registers automatically and generate a trap on
   1568 	 * violations.  We handle the trap, rather than doing
   1569 	 * all of the checking here.
   1570 	 */
   1571 	if (((gr[_REG_EFL] ^ tf->tf_eflags) & PSL_USERSTATIC) ||
   1572 	    !USERMODE(gr[_REG_CS]))
   1573 		return EINVAL;
   1574 
   1575 	return 0;
   1576 }
   1577 
   1578 int
   1579 cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags)
   1580 {
   1581 	struct trapframe *tf = l->l_md.md_regs;
   1582 	const __greg_t *gr = mcp->__gregs;
   1583 	struct proc *p = l->l_proc;
   1584 	int error;
   1585 
   1586 	/* Restore register context, if any. */
   1587 	if ((flags & _UC_CPU) != 0) {
   1588 		error = cpu_mcontext_validate(l, mcp);
   1589 		if (error)
   1590 			return error;
   1591 
   1592 		tf->tf_gs = gr[_REG_GS];
   1593 		tf->tf_fs = gr[_REG_FS];
   1594 		tf->tf_es = gr[_REG_ES];
   1595 		tf->tf_ds = gr[_REG_DS];
   1596 		/* Only change the user-alterable part of eflags */
   1597 		tf->tf_eflags &= ~PSL_USER;
   1598 		tf->tf_eflags |= (gr[_REG_EFL] & PSL_USER);
   1599 
   1600 		tf->tf_edi    = gr[_REG_EDI];
   1601 		tf->tf_esi    = gr[_REG_ESI];
   1602 		tf->tf_ebp    = gr[_REG_EBP];
   1603 		tf->tf_ebx    = gr[_REG_EBX];
   1604 		tf->tf_edx    = gr[_REG_EDX];
   1605 		tf->tf_ecx    = gr[_REG_ECX];
   1606 		tf->tf_eax    = gr[_REG_EAX];
   1607 		tf->tf_eip    = gr[_REG_EIP];
   1608 		tf->tf_cs     = gr[_REG_CS];
   1609 		tf->tf_esp    = gr[_REG_UESP];
   1610 		tf->tf_ss     = gr[_REG_SS];
   1611 	}
   1612 
   1613 	if ((flags & _UC_TLSBASE) != 0)
   1614 		lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase);
   1615 
   1616 	/* Restore floating point register context, if given. */
   1617 	if ((flags & _UC_FPU) != 0) {
   1618 		__CTASSERT(sizeof (struct fxsave) ==
   1619 		    sizeof mcp->__fpregs.__fp_reg_set.__fp_xmm_state);
   1620 		__CTASSERT(sizeof (struct save87) ==
   1621 		    sizeof mcp->__fpregs.__fp_reg_set.__fpchip_state);
   1622 
   1623 		if (flags & _UC_FXSAVE) {
   1624 			process_write_fpregs_xmm(l, (const struct fxsave *)
   1625 				    &mcp->__fpregs.__fp_reg_set.__fp_xmm_state);
   1626 		} else {
   1627 			process_write_fpregs_s87(l, (const struct save87 *)
   1628 				    &mcp->__fpregs.__fp_reg_set.__fpchip_state);
   1629 		}
   1630 	}
   1631 
   1632 	mutex_enter(p->p_lock);
   1633 	if (flags & _UC_SETSTACK)
   1634 		l->l_sigstk.ss_flags |= SS_ONSTACK;
   1635 	if (flags & _UC_CLRSTACK)
   1636 		l->l_sigstk.ss_flags &= ~SS_ONSTACK;
   1637 	mutex_exit(p->p_lock);
   1638 	return (0);
   1639 }
   1640 
   1641 void
   1642 cpu_initclocks(void)
   1643 {
   1644 
   1645 	(*initclock_func)();
   1646 }
   1647 
   1648 #define	DEV_IO 14		/* iopl for compat_10 */
   1649 
   1650 int
   1651 mm_md_open(dev_t dev, int flag, int mode, struct lwp *l)
   1652 {
   1653 
   1654 	switch (minor(dev)) {
   1655 	case DEV_IO:
   1656 		/*
   1657 		 * This is done by i386_iopl(3) now.
   1658 		 *
   1659 		 * #if defined(COMPAT_10) || defined(COMPAT_FREEBSD)
   1660 		 */
   1661 		if (flag & FWRITE) {
   1662 			struct trapframe *fp;
   1663 			int error;
   1664 
   1665 			error = kauth_authorize_machdep(l->l_cred,
   1666 			    KAUTH_MACHDEP_IOPL, NULL, NULL, NULL, NULL);
   1667 			if (error)
   1668 				return (error);
   1669 			fp = curlwp->l_md.md_regs;
   1670 			fp->tf_eflags |= PSL_IOPL;
   1671 		}
   1672 		break;
   1673 	default:
   1674 		break;
   1675 	}
   1676 	return 0;
   1677 }
   1678 
   1679 #ifdef PAE
   1680 void
   1681 cpu_alloc_l3_page(struct cpu_info *ci)
   1682 {
   1683 	int ret;
   1684 	struct pglist pg;
   1685 	struct vm_page *vmap;
   1686 
   1687 	KASSERT(ci != NULL);
   1688 	/*
   1689 	 * Allocate a page for the per-CPU L3 PD. cr3 being 32 bits, PA musts
   1690 	 * resides below the 4GB boundary.
   1691 	 */
   1692 	ret = uvm_pglistalloc(PAGE_SIZE, 0, 0x100000000ULL, 32, 0, &pg, 1, 0);
   1693 	vmap = TAILQ_FIRST(&pg);
   1694 
   1695 	if (ret != 0 || vmap == NULL)
   1696 		panic("%s: failed to allocate L3 pglist for CPU %d (ret %d)\n",
   1697 			__func__, cpu_index(ci), ret);
   1698 
   1699 	ci->ci_pae_l3_pdirpa = vmap->phys_addr;
   1700 
   1701 	ci->ci_pae_l3_pdir = (paddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
   1702 		UVM_KMF_VAONLY | UVM_KMF_NOWAIT);
   1703 	if (ci->ci_pae_l3_pdir == NULL)
   1704 		panic("%s: failed to allocate L3 PD for CPU %d\n",
   1705 			__func__, cpu_index(ci));
   1706 
   1707 	pmap_kenter_pa((vaddr_t)ci->ci_pae_l3_pdir, ci->ci_pae_l3_pdirpa,
   1708 		VM_PROT_READ | VM_PROT_WRITE, 0);
   1709 
   1710 	pmap_update(pmap_kernel());
   1711 }
   1712 #endif /* PAE */
   1713