Home | History | Annotate | Line # | Download | only in i386
      1 /*	$NetBSD: locore.S,v 1.204 2025/09/09 11:34:25 bouyer Exp $	*/
      2 
      3 /*
      4  * Copyright-o-rama!
      5  */
      6 
      7 /*
      8  * Copyright (c) 1998, 2000, 2004, 2006, 2007, 2009, 2016
      9  * The NetBSD Foundation, Inc., All rights reserved.
     10  *
     11  * This code is derived from software contributed to The NetBSD Foundation
     12  * by Charles M. Hannum, by Andrew Doran and by Maxime Villard.
     13  *
     14  * Redistribution and use in source and binary forms, with or without
     15  * modification, are permitted provided that the following conditions
     16  * are met:
     17  * 1. Redistributions of source code must retain the above copyright
     18  *    notice, this list of conditions and the following disclaimer.
     19  * 2. Redistributions in binary form must reproduce the above copyright
     20  *    notice, this list of conditions and the following disclaimer in the
     21  *    documentation and/or other materials provided with the distribution.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     33  * POSSIBILITY OF SUCH DAMAGE.
     34  */
     35 
     36 /*
     37  * Copyright (c) 2006 Manuel Bouyer.
     38  *
     39  * Redistribution and use in source and binary forms, with or without
     40  * modification, are permitted provided that the following conditions
     41  * are met:
     42  * 1. Redistributions of source code must retain the above copyright
     43  *    notice, this list of conditions and the following disclaimer.
     44  * 2. Redistributions in binary form must reproduce the above copyright
     45  *    notice, this list of conditions and the following disclaimer in the
     46  *    documentation and/or other materials provided with the distribution.
     47  *
     48  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     49  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     50  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     51  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     52  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     53  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     54  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     55  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     56  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     57  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     58  *
     59  */
     60 
     61 /*
     62  * Copyright (c) 2001 Wasabi Systems, Inc.
     63  * All rights reserved.
     64  *
     65  * Written by Frank van der Linden for Wasabi Systems, Inc.
     66  *
     67  * Redistribution and use in source and binary forms, with or without
     68  * modification, are permitted provided that the following conditions
     69  * are met:
     70  * 1. Redistributions of source code must retain the above copyright
     71  *    notice, this list of conditions and the following disclaimer.
     72  * 2. Redistributions in binary form must reproduce the above copyright
     73  *    notice, this list of conditions and the following disclaimer in the
     74  *    documentation and/or other materials provided with the distribution.
     75  * 3. All advertising materials mentioning features or use of this software
     76  *    must display the following acknowledgement:
     77  *      This product includes software developed for the NetBSD Project by
     78  *      Wasabi Systems, Inc.
     79  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
     80  *    or promote products derived from this software without specific prior
     81  *    written permission.
     82  *
     83  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
     84  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     85  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     86  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
     87  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     88  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     89  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     90  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     91  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     92  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     93  * POSSIBILITY OF SUCH DAMAGE.
     94  */
     95 
     96 /*-
     97  * Copyright (c) 1990 The Regents of the University of California.
     98  * All rights reserved.
     99  *
    100  * This code is derived from software contributed to Berkeley by
    101  * William Jolitz.
    102  *
    103  * Redistribution and use in source and binary forms, with or without
    104  * modification, are permitted provided that the following conditions
    105  * are met:
    106  * 1. Redistributions of source code must retain the above copyright
    107  *    notice, this list of conditions and the following disclaimer.
    108  * 2. Redistributions in binary form must reproduce the above copyright
    109  *    notice, this list of conditions and the following disclaimer in the
    110  *    documentation and/or other materials provided with the distribution.
    111  * 3. Neither the name of the University nor the names of its contributors
    112  *    may be used to endorse or promote products derived from this software
    113  *    without specific prior written permission.
    114  *
    115  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
    116  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
    117  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
    118  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
    119  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
    120  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
    121  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    122  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
    123  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    124  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    125  * SUCH DAMAGE.
    126  *
    127  *	@(#)locore.s	7.3 (Berkeley) 5/13/91
    128  */
    129 
    130 #include <machine/asm.h>
    131 __KERNEL_RCSID(0, "$NetBSD: locore.S,v 1.204 2025/09/09 11:34:25 bouyer Exp $");
    132 
    133 #include "opt_copy_symtab.h"
    134 #include "opt_ddb.h"
    135 #include "opt_modular.h"
    136 #include "opt_multiboot.h"
    137 #include "opt_realmem.h"
    138 #include "opt_xen.h"
    139 
    140 #include "assym.h"
    141 #include "lapic.h"
    142 #include "ioapic.h"
    143 #include "ksyms.h"
    144 
    145 #include <sys/errno.h>
    146 #include <sys/syscall.h>
    147 
    148 #include <machine/segments.h>
    149 #include <machine/specialreg.h>
    150 #include <machine/trap.h>
    151 #include <machine/i82489reg.h>
    152 #include <machine/frameasm.h>
    153 #include <machine/i82489reg.h>
    154 #include <machine/cputypes.h>
    155 
    156 #ifndef XENPV
    157 #include <machine/multiboot.h>
    158 #endif
    159 
    160 /* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */
    161 #include <dev/isa/isareg.h>
    162 
    163 #ifndef XENPV
    164 #define	_RELOC(x)	((x) - KERNBASE)
    165 #else
    166 #define	_RELOC(x)	((x))
    167 #endif /* XENPV */
    168 #define	RELOC(x)	_RELOC(_C_LABEL(x))
    169 
    170 /* 32bit version of PTE_NX */
    171 #define PTE_NX32	0x80000000
    172 
    173 #ifndef PAE
    174 #define	PROC0_PDIR_OFF	0
    175 #else
    176 #define PROC0_L3_OFF	0
    177 #define PROC0_PDIR_OFF	1 * PAGE_SIZE
    178 #endif
    179 
    180 #define	PROC0_STK_OFF	(PROC0_PDIR_OFF + PDP_SIZE * PAGE_SIZE)
    181 #define	PROC0_PTP1_OFF	(PROC0_STK_OFF + UPAGES * PAGE_SIZE)
    182 
    183 /*
    184  * fillkpt - Fill in a kernel page table
    185  *	eax = pte (page frame | control | status)
    186  *	ebx = page table address
    187  *	ecx = number of pages to map
    188  *
    189  * For PAE, each entry is 8 bytes long: we must set the 4 upper bytes to 0.
    190  * This is done by the first instruction of fillkpt. In the non-PAE case, this
    191  * instruction just clears the page table entry.
    192  */
    193 #define fillkpt	\
    194 	cmpl	$0,%ecx			;	/* zero-sized? */	\
    195 	je 	2f			; \
    196 1:	movl	$0,(PDE_SIZE-4)(%ebx)	;	/* upper 32 bits: 0 */	\
    197 	movl	%eax,(%ebx)		;	/* store phys addr */	\
    198 	addl	$PDE_SIZE,%ebx		;	/* next PTE/PDE */	\
    199 	addl	$PAGE_SIZE,%eax		;	/* next phys page */	\
    200 	loop	1b			; \
    201 2:					;
    202 
    203 /*
    204  * fillkpt_nox - Same as fillkpt, but sets the NX/XD bit.
    205  */
    206 #define fillkpt_nox \
    207 	cmpl	$0,%ecx			;	/* zero-sized? */	\
    208 	je 	2f			; \
    209 	pushl	%ebp			; \
    210 	movl	RELOC(nox_flag),%ebp	; \
    211 1:	movl	%ebp,(PDE_SIZE-4)(%ebx)	;	/* upper 32 bits: NX */ \
    212 	movl	%eax,(%ebx)		;	/* store phys addr */	\
    213 	addl	$PDE_SIZE,%ebx		;	/* next PTE/PDE */	\
    214 	addl	$PAGE_SIZE,%eax		;	/* next phys page */	\
    215 	loop	1b			; \
    216 	popl	%ebp			; \
    217 2:					;
    218 
    219 /*
    220  * fillkpt_blank - Fill in a kernel page table with blank entries
    221  *	ebx = page table address
    222  *	ecx = number of pages to map
    223  */
    224 #define fillkpt_blank	\
    225 	cmpl	$0,%ecx			;	/* zero-sized? */	\
    226 	je 	2f			; \
    227 1:	movl	$0,(PDE_SIZE-4)(%ebx)	;	/* upper 32 bits: 0 */	\
    228 	movl	$0,(%ebx)		;	/* lower 32 bits: 0 */	\
    229 	addl	$PDE_SIZE,%ebx		;	/* next PTE/PDE */	\
    230 	loop	1b			; \
    231 2:					;
    232 
    233 /*
    234  * killkpt - Destroy a kernel page table
    235  *	ebx = page table address
    236  *	ecx = number of pages to destroy
    237  */
    238 #define killkpt \
    239 1:	movl	$0,(PDE_SIZE-4)(%ebx)	;	/* upper bits (for PAE) */ \
    240 	movl	$0,(%ebx)		; \
    241 	addl	$PDE_SIZE,%ebx		; \
    242 	loop	1b			;
    243 
    244 /* record boot start cycle count */
    245 #define getstarttsc \
    246 	rdtsc					; \
    247 	movl	%eax, RELOC(starttsc_lo)	; \
    248 	movl	%edx, RELOC(starttsc_hi)	;
    249 
    250 #ifdef XEN
    251 #define __ASSEMBLY__
    252 #include <xen/include/public/arch-x86/cpuid.h>
    253 #include <xen/include/public/elfnote.h>
    254 #include <xen/include/public/xen.h>
    255 
    256 #define ELFNOTE(name, type, desctype, descdata...) \
    257 .pushsection .note.name, "a", @note	;	\
    258   .align 4				;	\
    259   .long 2f - 1f		/* namesz */	;	\
    260   .long 4f - 3f		/* descsz */	;	\
    261   .long type				;	\
    262 1:.asciz #name				;	\
    263 2:.align 4				;	\
    264 3:desctype descdata			;	\
    265 4:.align 4				;	\
    266 .popsection
    267 
    268 /*
    269  * Xen guest identifier and loader selection
    270  */
    271 .section __xen_guest
    272 	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz, "NetBSD")
    273 	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION,  .asciz, "4.99")
    274 	ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION,    .asciz, "xen-3.0")
    275 	ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE,      .long,  KERNBASE)
    276 #ifdef XENPV
    277 	ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   .long,  KERNBASE)
    278 	ELFNOTE(Xen, XEN_ELFNOTE_ENTRY,          .long,  start)
    279 #else
    280 	ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET,   .long,  0)
    281 	ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,   .long,  RELOC(start_pvh))
    282 #endif /* XENPV */
    283 	ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long,  hypercall_page)
    284 	ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW,   .long,  HYPERVISOR_VIRT_START)
    285 	ELFNOTE(Xen, XEN_ELFNOTE_FEATURES,       .asciz, "writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel|hvm_callback_vector")
    286 	ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE,       .asciz, "yes")
    287 	ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID,   .quad,  PTE_P, PTE_P)\
    288 	ELFNOTE(Xen, XEN_ELFNOTE_LOADER,         .asciz, "generic")
    289 	ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long,  0)
    290 #if NKSYMS > 0 || defined(DDB) || defined(MODULAR)
    291 	ELFNOTE(Xen, XEN_ELFNOTE_BSD_SYMTAB,     .asciz, "yes")
    292 #endif
    293 #endif  /* XEN */
    294 
    295 /*
    296  * Initialization
    297  */
    298 	.data
    299 
    300 	.globl	_C_LABEL(tablesize)
    301 	.globl	_C_LABEL(nox_flag)
    302 	.globl	_C_LABEL(cputype)
    303 	.globl	_C_LABEL(cpuid_level)
    304 	.globl	_C_LABEL(esym)
    305 	.globl	_C_LABEL(eblob)
    306 	.globl	_C_LABEL(atdevbase)
    307 	.globl	_C_LABEL(PDPpaddr)
    308 	.globl	_C_LABEL(lwp0uarea)
    309 	.globl	_C_LABEL(gdt)
    310 	.globl	_C_LABEL(idt)
    311 
    312 	.type	_C_LABEL(tablesize), @object
    313 _C_LABEL(tablesize):	.long	0
    314 END(tablesize)
    315 	.type	_C_LABEL(nox_flag), @object
    316 LABEL(nox_flag)		.long	0	/* 32bit NOX flag, set if supported */
    317 END(nox_flag)
    318 	.type	_C_LABEL(cputype), @object
    319 LABEL(cputype)		.long	0	/* are we 80486, Pentium, or.. */
    320 END(cputype)
    321 	.type	_C_LABEL(cpuid_level), @object
    322 LABEL(cpuid_level)	.long	-1	/* max. level accepted by cpuid instr */
    323 END(cpuid_level)
    324 	.type	_C_LABEL(atdevbase), @object
    325 LABEL(atdevbase)	.long	0	/* location of start of iomem in virt */
    326 END(atdevbase)
    327 	.type	_C_LABEL(lwp0uarea), @object
    328 LABEL(lwp0uarea)	.long	0
    329 END(lwp0uarea)
    330 	.type	_C_LABEL(PDPpaddr), @object
    331 LABEL(PDPpaddr)		.long	0	/* paddr of PDP, for libkvm */
    332 END(PDPpaddr)
    333 	.type	_C_LABEL(starttsc_lo), @object
    334 LABEL(starttsc_lo)		.long	0	/* low part of rdtsc */
    335 END(starttsc_lo)
    336 	.type	_C_LABEL(starttsc_hi), @object
    337 LABEL(starttsc_hi)		.long	0	/* high part of rdtsc */
    338 END(starttsc_hi)
    339 
    340 	/* Space for the temporary stack */
    341 	.globl	_C_LABEL(tmpstk)
    342 	.size	tmpstk, tmpstk - .
    343 	.space	512
    344 tmpstk:
    345 #ifdef XENPV
    346 	.align 		PAGE_SIZE, 0x0	/* Align on page boundary */
    347 LABEL(tmpgdt)
    348 	.space 		PAGE_SIZE	/* Xen expects a page */
    349 END(tmpgdt)
    350 #endif /* XENPV */
    351 
    352 	.text
    353 	.globl	_C_LABEL(kernel_text)
    354 	.set	_C_LABEL(kernel_text),KERNTEXTOFF
    355 
    356 ENTRY(start)
    357 #ifndef XENPV
    358 
    359 #ifdef BOOT_DURATION
    360 	getstarttsc
    361 #endif
    362 
    363 	/* Warm boot */
    364 	movw	$0x1234,0x472
    365 
    366 #if defined(MULTIBOOT)
    367 	jmp	1f
    368 
    369 	.align	4
    370 	.globl	Multiboot_Header
    371 _C_LABEL(Multiboot_Header):
    372 #define MULTIBOOT_HEADER_FLAGS	(MULTIBOOT_HEADER_WANT_MEMORY)
    373 	.long	MULTIBOOT_HEADER_MAGIC
    374 	.long	MULTIBOOT_HEADER_FLAGS
    375 	.long	-(MULTIBOOT_HEADER_MAGIC + MULTIBOOT_HEADER_FLAGS)
    376 
    377 	.align	8
    378 	.globl	Multiboot2_Header
    379 _C_LABEL(Multiboot2_Header):
    380 	.long	MULTIBOOT2_HEADER_MAGIC
    381 	.long	MULTIBOOT2_ARCHITECTURE_I386
    382 	.long	Multiboot2_Header_end - Multiboot2_Header
    383 	.long	-(MULTIBOOT2_HEADER_MAGIC + MULTIBOOT2_ARCHITECTURE_I386 \
    384 		+ (Multiboot2_Header_end - Multiboot2_Header))
    385 
    386 	.long	1	/* MULTIBOOT_HEADER_TAG_INFORMATION_REQUEST */
    387 	.long	12	/* sizeof(multiboot_header_tag_information_request) */
    388 			/* + sizeof(uint32_t) * requests */
    389 	.long	4	/* MULTIBOOT_TAG_TYPE_BASIC_MEMINFO */
    390 	.long	0	/* pad for 8 bytes alignment */
    391 
    392 	.long	8	/* MULTIBOOT_HEADER_TAG_ENTRY_ADDRESS_EFI32 */
    393 	.long	12	/* sizeof(struct multiboot_tag_efi32) */
    394 	.long	efi_multiboot2_loader - KERNBASE
    395 	.long   0	/* pad for 8 bytes alignment */
    396 
    397 #if notyet
    398 	/*
    399 	 * Could be used to get an early console for debug,
    400 	 * but this is broken.
    401 	 */
    402 	.long	7	/* MULTIBOOT_HEADER_TAG_EFI_BS */
    403 	.long	8	/* sizeof(struct multiboot_tag) */
    404 #endif
    405 
    406 	.long	0	/* MULTIBOOT_HEADER_TAG_END */
    407 	.long	8	/* sizeof(struct multiboot_tag) */
    408 	.globl	Multiboot2_Header_end
    409 _C_LABEL(Multiboot2_Header_end):
    410 
    411 1:
    412 	/* Check if we are being executed by a Multiboot-compliant boot
    413 	 * loader. */
    414 	cmpl	$MULTIBOOT_INFO_MAGIC,%eax
    415 	je	multiboot1_loader
    416 
    417 	cmpl	$MULTIBOOT2_BOOTLOADER_MAGIC,%eax
    418 	je	multiboot2_loader
    419 
    420 	jmp	1f
    421 
    422 multiboot1_loader:
    423 	/*
    424 	 * Indeed, a multiboot-compliant boot loader executed us. We switch
    425 	 * to the temporary stack, and copy the received Multiboot information
    426 	 * structure into kernel's data space to process it later -- after we
    427 	 * are relocated. It will be safer to run complex C code than doing it
    428 	 * at this point.
    429 	 */
    430 	movl	$_RELOC(tmpstk),%esp
    431 	pushl	%ebx		/* Address of Multiboot information */
    432 	call	_C_LABEL(multiboot1_pre_reloc)
    433 	addl	$4,%esp
    434 	jmp	.Lstart_common
    435 
    436 efi_multiboot2_loader:
    437 	/*
    438 	 * EFI32 multiboot2 entry point. We are left here without
    439 	 * stack and with no idea of where we were loaded in memory.
    440 	 * The only inputs are
    441 	 * %eax MULTIBOOT2_BOOTLOADER_MAGIC
    442 	 * %ebx pointer to multiboot_info
    443 	 *
    444 	 * Here we will copy the kernel to 0x100000 (KERNTEXTOFF - KERNBASE)
    445 	 * as almost all the code in locore.S assume it is there. Once done,
    446 	 * we join the main start code .This is derived from
    447 	 * src/sys/arch/i386/stand/efiboot/bootia32/startprog32.S
    448 	 */
    449 
    450 	cli
    451 
    452 	/*
    453 	 * Discover our load address and store it in %edx
    454 	 */
    455 	movl	$_RELOC(tmpstk),%esp
    456 	call	next
    457 next:	popl	%edx
    458 	subl	$(next - efi_multiboot2_loader), %edx
    459 
    460 	/*
    461 	 * Save multiboot_info for later. We cannot use
    462 	 * temporary stack for that since we are going to
    463 	 * overwrite it.
    464 	 */
    465 	movl	%ebx, (multiboot2_info_ptr - efi_multiboot2_loader)(%edx)
    466 
    467 	/*
    468 	 * Get relocated multiboot2_loader entry point in %ebx
    469 	 */
    470 	movl	$(KERNTEXTOFF - KERNBASE), %ebx
    471 	addl	$(multiboot2_loader - start), %ebx
    472 
    473         /* Copy kernel */
    474         movl    $(KERNTEXTOFF - KERNBASE), %edi		/* dest */
    475         movl    %edx, %esi
    476 	subl	$(efi_multiboot2_loader - start), %esi	/* src */
    477 	movl	$(__kernel_end - kernel_text), %ecx	/* size */
    478 #if defined(NO_OVERLAP)
    479         movl    %ecx, %eax
    480 #else
    481         movl    %edi, %eax
    482         subl    %esi, %eax
    483         cmpl    %ecx, %eax      /* overlapping? */
    484         movl    %ecx, %eax
    485         jb      .Lbackwards
    486 #endif
    487         /* nope, copy forwards. */
    488         shrl    $2, %ecx        /* copy by words */
    489         rep
    490         movsl
    491         and     $3, %eax        /* any bytes left? */
    492         jnz     .Ltrailing
    493         jmp     .Lcopy_done
    494 
    495 .Ltrailing:
    496         cmp     $2, %eax
    497         jb      11f
    498         movw    (%esi), %ax
    499         movw    %ax, (%edi)
    500         je      .Lcopy_done
    501         movb    2(%esi), %al
    502         movb    %al, 2(%edi)
    503         jmp     .Lcopy_done
    504 11:     movb    (%esi), %al
    505         movb    %al, (%edi)
    506         jmp     .Lcopy_done
    507 
    508 #if !defined(NO_OVERLAP)
    509 .Lbackwards:
    510         addl    %ecx, %edi      /* copy backwards. */
    511         addl    %ecx, %esi
    512         and     $3, %eax        /* any fractional bytes? */
    513         jnz     .Lback_align
    514 .Lback_aligned:
    515         shrl    $2, %ecx
    516         subl    $4, %esi
    517         subl    $4, %edi
    518         std
    519         rep
    520         movsl
    521         cld
    522         jmp     .Lcopy_done
    523 
    524 .Lback_align:
    525         sub     %eax, %esi
    526         sub     %eax, %edi
    527         cmp     $2, %eax
    528         jb      11f
    529         je      12f
    530         movb    2(%esi), %al
    531         movb    %al, 2(%edi)
    532 12:     movw    (%esi), %ax
    533         movw    %ax, (%edi)
    534         jmp     .Lback_aligned
    535 11:     movb    (%esi), %al
    536         movb    %al, (%edi)
    537         jmp     .Lback_aligned
    538 #endif
    539         /* End of copy kernel */
    540 .Lcopy_done:
    541 	cld			/* LynxOS depends on it */
    542 
    543 	/* Prepare jump address */
    544 	lea	(efi_multiboot2_loader32a - efi_multiboot2_loader)(%edx), %eax
    545 	movl	%eax, (efi_multiboot2_loader32r - efi_multiboot2_loader)(%edx)
    546 
    547 	/* Setup GDT */
    548 	lea	(gdt - efi_multiboot2_loader)(%edx), %eax
    549 	movl	%eax, (gdtrr - efi_multiboot2_loader)(%edx)
    550 	lgdt	(gdtr - efi_multiboot2_loader)(%edx)
    551 
    552 	/* Jump to set %cs */
    553 	ljmp	*(efi_multiboot2_loader32r - efi_multiboot2_loader)(%edx)
    554 
    555 	.align	4
    556 efi_multiboot2_loader32a:
    557 	movl	$0x10, %eax	/* #define DATA_SEGMENT	0x10 */
    558 	movw	%ax, %ds
    559 	movw	%ax, %es
    560 	movw	%ax, %fs
    561 	movw	%ax, %gs
    562 	movw	%ax, %ss
    563 
    564 	/* Already set new stack pointer */
    565 	movl	%esp, %ebp
    566 
    567 	/* Disable Paging in CR0 */
    568 	movl	%cr0, %eax
    569 	andl	$(~CR0_PG), %eax
    570 	movl	%eax, %cr0
    571 
    572 	/* Disable PAE in CR4 */
    573 	movl	%cr4, %eax
    574 	andl	$(~CR4_PAE), %eax
    575 	movl	%eax, %cr4
    576 
    577 	jmp	efi_multiboot2_loader32b
    578 
    579 	.align	4
    580 efi_multiboot2_loader32b:
    581 	xor	%eax, %eax
    582 	movl	%ebx, (efi_multiboot2_loader32r - efi_multiboot2_loader)(%edx)
    583 	/*
    584 	 * Reload multiboot info from target location
    585 	 */
    586 	movl	_RELOC(multiboot2_info_ptr), %ebx
    587 	ljmp	*(efi_multiboot2_loader32r - efi_multiboot2_loader)(%edx)
    588 
    589 	.align	16
    590 efi_multiboot2_loader32r:
    591 	.long	0
    592 	.long	0x08	/* #define	CODE_SEGMENT	0x08 */
    593 	.align	16
    594 gdt:
    595 	.long	0, 0
    596 	.byte	0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00
    597 	.byte	0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00
    598 gdtr:
    599 	.word	gdtr - gdt
    600 gdtrr:
    601 	.quad  	0
    602 multiboot2_info_ptr:
    603 	.long	0
    604 
    605 	.align 16
    606 multiboot2_loader:
    607 	movl    $_RELOC(tmpstk),%esp
    608 	pushl	%ebx		/* Address of Multiboot information */
    609 	call	_C_LABEL(multiboot2_pre_reloc)
    610 	addl	$4,%esp
    611 	jmp	.Lstart_common
    612 #endif /* MULTIBOOT */
    613 
    614 1:
    615 	/*
    616 	 * At this point, we know that a NetBSD-specific boot loader
    617 	 * booted this kernel.
    618 	 *
    619 	 * Load parameters from the stack (32 bits):
    620 	 *     boothowto, [bootdev], bootinfo, esym, biosextmem, biosbasemem
    621 	 * We are not interested in 'bootdev'.
    622 	 */
    623 
    624 	addl	$4,%esp		/* Discard return address to boot loader */
    625 	call	_C_LABEL(native_loader)
    626 	addl	$24,%esp
    627 
    628 .Lstart_common:
    629 	/* First, reset the PSL. */
    630 	pushl	$PSL_MBO
    631 	popfl
    632 
    633 	/* Clear segment registers; always null in proc0. */
    634 	xorl	%eax,%eax
    635 	movw	%ax,%fs
    636 	movw	%ax,%gs
    637 
    638 	/* Find out our CPU type. */
    639 
    640 try386:	/* Try to toggle alignment check flag; does not exist on 386. */
    641 	pushfl
    642 	popl	%eax
    643 	movl	%eax,%ecx
    644 	orl	$PSL_AC,%eax
    645 	pushl	%eax
    646 	popfl
    647 	pushfl
    648 	popl	%eax
    649 	xorl	%ecx,%eax
    650 	andl	$PSL_AC,%eax
    651 	pushl	%ecx
    652 	popfl
    653 
    654 	testl	%eax,%eax
    655 	jnz	try486
    656 
    657 	/*
    658 	 * Try the test of a NexGen CPU -- ZF will not change on a DIV
    659 	 * instruction on a NexGen, it will on an i386.  Documented in
    660 	 * Nx586 Processor Recognition Application Note, NexGen, Inc.
    661 	 */
    662 	movl	$0x5555,%eax
    663 	xorl	%edx,%edx
    664 	movl	$2,%ecx
    665 	divl	%ecx
    666 	jnz	is386
    667 
    668 isnx586:
    669 	/*
    670 	 * Don't try cpuid, as Nx586s reportedly don't support the
    671 	 * PSL_ID bit.
    672 	 */
    673 	movl	$CPU_NX586,RELOC(cputype)
    674 	jmp	2f
    675 
    676 is386:
    677 	movl	$CPU_386,RELOC(cputype)
    678 	jmp	2f
    679 
    680 try486:	/* Try to toggle identification flag; does not exist on early 486s. */
    681 	pushfl
    682 	popl	%eax
    683 	movl	%eax,%ecx
    684 	xorl	$PSL_ID,%eax
    685 	pushl	%eax
    686 	popfl
    687 	pushfl
    688 	popl	%eax
    689 	xorl	%ecx,%eax
    690 	andl	$PSL_ID,%eax
    691 	pushl	%ecx
    692 	popfl
    693 
    694 	testl	%eax,%eax
    695 	jnz	try586
    696 is486:	movl	$CPU_486,RELOC(cputype)
    697 	/*
    698 	 * Check Cyrix CPU
    699 	 * Cyrix CPUs do not change the undefined flags following
    700 	 * execution of the divide instruction which divides 5 by 2.
    701 	 *
    702 	 * Note: CPUID is enabled on M2, so it passes another way.
    703 	 */
    704 	pushfl
    705 	movl	$0x5555, %eax
    706 	xorl	%edx, %edx
    707 	movl	$2, %ecx
    708 	clc
    709 	divl	%ecx
    710 	jnc	trycyrix486
    711 	popfl
    712 	jmp 2f
    713 trycyrix486:
    714 	movl	$CPU_6x86,RELOC(cputype)	/* set CPU type */
    715 	/*
    716 	 * Check for Cyrix 486 CPU by seeing if the flags change during a
    717 	 * divide. This is documented in the Cx486SLC/e SMM Programmer's
    718 	 * Guide.
    719 	 */
    720 	xorl	%edx,%edx
    721 	cmpl	%edx,%edx		/* set flags to known state */
    722 	pushfl
    723 	popl	%ecx			/* store flags in ecx */
    724 	movl	$-1,%eax
    725 	movl	$4,%ebx
    726 	divl	%ebx			/* do a long division */
    727 	pushfl
    728 	popl	%eax
    729 	xorl	%ecx,%eax		/* are the flags different? */
    730 	testl	$0x8d5,%eax		/* only check C|PF|AF|Z|N|V */
    731 	jne	2f			/* yes; must be Cyrix 6x86 CPU */
    732 	movl	$CPU_486DLC,RELOC(cputype) 	/* set CPU type */
    733 
    734 #ifndef CYRIX_CACHE_WORKS
    735 	/* Disable caching of the ISA hole only. */
    736 	invd
    737 	movb	$CCR0,%al	/* Configuration Register index (CCR0) */
    738 	outb	%al,$0x22
    739 	inb	$0x23,%al
    740 	orb	$(CCR0_NC1|CCR0_BARB),%al
    741 	movb	%al,%ah
    742 	movb	$CCR0,%al
    743 	outb	%al,$0x22
    744 	movb	%ah,%al
    745 	outb	%al,$0x23
    746 	invd
    747 #else /* CYRIX_CACHE_WORKS */
    748 	/* Set cache parameters */
    749 	invd			/* Start with guaranteed clean cache */
    750 	movb	$CCR0,%al	/* Configuration Register index (CCR0) */
    751 	outb	%al,$0x22
    752 	inb	$0x23,%al
    753 	andb	$~CCR0_NC0,%al
    754 #ifndef CYRIX_CACHE_REALLY_WORKS
    755 	orb	$(CCR0_NC1|CCR0_BARB),%al
    756 #else
    757 	orb	$CCR0_NC1,%al
    758 #endif
    759 	movb	%al,%ah
    760 	movb	$CCR0,%al
    761 	outb	%al,$0x22
    762 	movb	%ah,%al
    763 	outb	%al,$0x23
    764 	/* clear non-cacheable region 1	*/
    765 	movb	$(NCR1+2),%al
    766 	outb	%al,$0x22
    767 	movb	$NCR_SIZE_0K,%al
    768 	outb	%al,$0x23
    769 	/* clear non-cacheable region 2	*/
    770 	movb	$(NCR2+2),%al
    771 	outb	%al,$0x22
    772 	movb	$NCR_SIZE_0K,%al
    773 	outb	%al,$0x23
    774 	/* clear non-cacheable region 3	*/
    775 	movb	$(NCR3+2),%al
    776 	outb	%al,$0x22
    777 	movb	$NCR_SIZE_0K,%al
    778 	outb	%al,$0x23
    779 	/* clear non-cacheable region 4	*/
    780 	movb	$(NCR4+2),%al
    781 	outb	%al,$0x22
    782 	movb	$NCR_SIZE_0K,%al
    783 	outb	%al,$0x23
    784 	/* enable caching in CR0 */
    785 	movl	%cr0,%eax
    786 	andl	$~(CR0_CD|CR0_NW),%eax
    787 	movl	%eax,%cr0
    788 	invd
    789 #endif /* CYRIX_CACHE_WORKS */
    790 
    791 	jmp	2f
    792 
    793 try586:	/* Use the `cpuid' instruction. */
    794 	xorl	%eax,%eax
    795 	cpuid
    796 	movl	%eax,RELOC(cpuid_level)
    797 
    798 	/*
    799 	 * Retrieve the NX/XD flag. We use the 32bit version of PTE_NX.
    800 	 */
    801 	movl	$0x80000001,%eax
    802 	cpuid
    803 	andl	$CPUID_NOX,%edx
    804 	jz	no_NOX
    805 	movl	$PTE_NX32,RELOC(nox_flag)
    806 no_NOX:
    807 
    808 2:
    809 	/*
    810 	 * Finished with old stack; load new %esp now instead of later so we
    811 	 * can trace this code without having to worry about the trace trap
    812 	 * clobbering the memory test or the zeroing of the bss+bootstrap page
    813 	 * tables.
    814 	 *
    815 	 * The boot program should check:
    816 	 *	text+data <= &stack_variable - more_space_for_stack
    817 	 *	text+data+bss+pad+space_for_page_tables <= end_of_memory
    818 	 *
    819 	 * XXX: the gdt is in the carcass of the boot program so clearing
    820 	 * the rest of memory is still not possible.
    821 	 */
    822 	movl	$_RELOC(tmpstk),%esp
    823 
    824 /*
    825  * There are two different layouts possible, depending on whether PAE is
    826  * enabled or not.
    827  *
    828  * If PAE is not enabled, there are two levels of pages: PD -> PT. They will
    829  * be referred to as: L2 -> L1. L2 is 1 page long. The BOOTSTRAP TABLES have
    830  * the following layout:
    831  * 	+-----+------------+----+
    832  * 	| L2 -> PROC0 STK -> L1 |
    833  * 	+-----+------------+----+
    834  *
    835  * If PAE is enabled, there are three levels of pages: PDP -> PD -> PT. They
    836  * will be referred to as: L3 -> L2 -> L1. L3 is 1 page long, L2 is 4 page
    837  * long. The BOOTSTRAP TABLES have the following layout:
    838  * 	+-----+-----+------------+----+
    839  * 	| L3 -> L2 -> PROC0 STK -> L1 |
    840  * 	+-----+-----+------------+----+
    841  *
    842  * Virtual address space of the kernel in both cases:
    843  * +------+--------+------+-----+--------+---------------------+-----------
    844  * | TEXT | RODATA | DATA | BSS | [SYMS] | [PRELOADED MODULES] | BOOTSTRAP
    845  * +------+--------+------+-----+--------+---------------------+-----------
    846  *                             (1)      (2)                   (3)
    847  *
    848  * -------+-------------+
    849  * TABLES | ISA I/O MEM |
    850  * -------+-------------+
    851  *       (4)
    852  *
    853  * PROC0 STK is obviously not linked as a page level. It just happens to be
    854  * caught between L2 and L1.
    855  *
    856  * Important note: the kernel segments are properly 4k-aligned
    857  * (see kern.ldscript), so there's no need to enforce alignment.
    858  */
    859 
    860 	/* Find end of kernel image; brings us on (1). */
    861 	movl	$RELOC(__kernel_end),%edi
    862 
    863 #if (NKSYMS || defined(DDB) || defined(MODULAR)) && !defined(makeoptions_COPY_SYMTAB)
    864 	/* Save the symbols (if loaded); brings us on (2). */
    865 	movl	RELOC(esym),%eax
    866 	testl	%eax,%eax
    867 	jz	1f
    868 	subl	$KERNBASE,%eax
    869 	movl	%eax,%edi
    870 1:
    871 #endif
    872 
    873 	/* Skip over any modules/blobs; brings us on (3). */
    874 	movl	RELOC(eblob),%eax
    875 	testl	%eax,%eax
    876 	jz	1f
    877 	subl	$KERNBASE,%eax
    878 	movl	%eax,%edi
    879 1:
    880 
    881 	/* We are on (3). Align up for BOOTSTRAP TABLES. */
    882 	movl	%edi,%esi
    883 	addl	$PGOFSET,%esi
    884 	andl	$~PGOFSET,%esi
    885 
    886 	/* nkptp[1] = (esi + ~L2_FRAME) >> L2_SHIFT + 1; */
    887 	movl	%esi,%eax
    888 	addl	$~L2_FRAME,%eax
    889 	shrl	$L2_SHIFT,%eax
    890 	incl	%eax		/* one more PTP for VAs stolen by bootstrap */
    891 1:	movl	%eax,RELOC(nkptp)+1*4
    892 
    893 	/* tablesize = (PDP_SIZE + UPAGES + nkptp[1]) << PGSHIFT; */
    894 	addl	$(PDP_SIZE+UPAGES),%eax
    895 #ifdef PAE
    896 	incl	%eax 		/* one more page for L3 */
    897 	shll	$PGSHIFT+1,%eax	/* PTP tables are twice larger with PAE */
    898 #else
    899 	shll	$PGSHIFT,%eax
    900 #endif
    901 	movl	%eax,RELOC(tablesize)
    902 
    903 	/* Ensure that nkptp[1] covers BOOTSTRAP TABLES, ie:
    904 	 * (esi + tablesize) >> L2_SHIFT + 1 < nkptp[1] */
    905 	addl	%esi,%eax
    906 	addl	$~L2_FRAME,%eax
    907 	shrl	$L2_SHIFT,%eax
    908 	incl	%eax
    909 	cmpl	%eax,RELOC(nkptp)+1*4
    910 	jnz	1b
    911 
    912 	/* Now, zero out the BOOTSTRAP TABLES (before filling them in). */
    913 	movl	%esi,%edi
    914 	xorl	%eax,%eax
    915 	cld
    916 	movl	RELOC(tablesize),%ecx
    917 	shrl	$2,%ecx
    918 	rep
    919 	stosl				/* copy eax -> edi */
    920 
    921 /*
    922  * Build the page tables and levels. We go from L1 to L2/L3, and link the levels
    923  * together. Note: RELOC computes &addr - KERNBASE in 32 bits; the value can't
    924  * be > 4G, or we can't deal with it anyway, since we are in 32bit mode.
    925  */
    926 	/*
    927 	 * Build L1.
    928 	 */
    929 	leal	(PROC0_PTP1_OFF)(%esi),%ebx
    930 
    931 	/* Skip the area below the kernel text. */
    932 	movl	$(KERNTEXTOFF - KERNBASE),%ecx
    933 	shrl	$PGSHIFT,%ecx
    934 	fillkpt_blank
    935 
    936 	/* Map the kernel text RX. */
    937 	movl	$(KERNTEXTOFF - KERNBASE),%eax	/* start of TEXT */
    938 	movl	$RELOC(__rodata_start),%ecx
    939 	subl	%eax,%ecx
    940 	shrl	$PGSHIFT,%ecx
    941 	orl	$(PTE_P),%eax
    942 	fillkpt
    943 
    944 	/* Map the kernel rodata R. */
    945 	movl	$RELOC(__rodata_start),%eax
    946 	movl	$RELOC(__data_start),%ecx
    947 	subl	%eax,%ecx
    948 	shrl	$PGSHIFT,%ecx
    949 	orl	$(PTE_P),%eax
    950 	fillkpt_nox
    951 
    952 	/* Map the kernel data+bss RW. */
    953 	movl	$RELOC(__data_start),%eax
    954 	movl	$RELOC(__kernel_end),%ecx
    955 	subl	%eax,%ecx
    956 	shrl	$PGSHIFT,%ecx
    957 	orl	$(PTE_P|PTE_W),%eax
    958 	fillkpt_nox
    959 
    960 	/* Map [SYMS]+[PRELOADED MODULES] RW. */
    961 	movl	$RELOC(__kernel_end),%eax
    962 	movl	%esi,%ecx		/* start of BOOTSTRAP TABLES */
    963 	subl	%eax,%ecx
    964 	shrl	$PGSHIFT,%ecx
    965 	orl	$(PTE_P|PTE_W),%eax
    966 	fillkpt_nox
    967 
    968 	/* Map the BOOTSTRAP TABLES RW. */
    969 	movl	%esi,%eax		/* start of BOOTSTRAP TABLES */
    970 	movl	RELOC(tablesize),%ecx	/* length of BOOTSTRAP TABLES */
    971 	shrl	$PGSHIFT,%ecx
    972 	orl	$(PTE_P|PTE_W),%eax
    973 	fillkpt_nox
    974 
    975 	/* We are on (4). Map ISA I/O MEM RW. */
    976 	movl	$IOM_BEGIN,%eax
    977 	movl	$IOM_SIZE,%ecx	/* size of ISA I/O MEM */
    978 	shrl	$PGSHIFT,%ecx
    979 	orl	$(PTE_P|PTE_W/*|PTE_PCD*/),%eax
    980 	fillkpt_nox
    981 
    982 	/*
    983 	 * Build L2 for identity mapping. Linked to L1.
    984 	 */
    985 	leal	(PROC0_PDIR_OFF)(%esi),%ebx
    986 	leal	(PROC0_PTP1_OFF)(%esi),%eax
    987 	orl	$(PTE_P|PTE_W),%eax
    988 	movl	RELOC(nkptp)+1*4,%ecx
    989 	fillkpt
    990 
    991 	/* Set up L2 entries for actual kernel mapping */
    992 	leal	(PROC0_PDIR_OFF + L2_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx
    993 	leal	(PROC0_PTP1_OFF)(%esi),%eax
    994 	orl	$(PTE_P|PTE_W),%eax
    995 	movl	RELOC(nkptp)+1*4,%ecx
    996 	fillkpt
    997 
    998 	/* Install recursive top level PDE */
    999 	leal	(PROC0_PDIR_OFF + PDIR_SLOT_PTE * PDE_SIZE)(%esi),%ebx
   1000 	leal	(PROC0_PDIR_OFF)(%esi),%eax
   1001 	orl	$(PTE_P|PTE_W),%eax
   1002 	movl	$PDP_SIZE,%ecx
   1003 	fillkpt_nox
   1004 
   1005 #ifdef PAE
   1006 	/*
   1007 	 * Build L3. Linked to L2.
   1008 	 */
   1009 	leal	(PROC0_L3_OFF)(%esi),%ebx
   1010 	leal	(PROC0_PDIR_OFF)(%esi),%eax
   1011 	orl	$(PTE_P),%eax
   1012 	movl	$PDP_SIZE,%ecx
   1013 	fillkpt
   1014 
   1015 	/* Enable PAE mode */
   1016 	movl	%cr4,%eax
   1017 	orl	$CR4_PAE,%eax
   1018 	movl	%eax,%cr4
   1019 #endif
   1020 
   1021 	/* Save physical address of L2. */
   1022 	leal	(PROC0_PDIR_OFF)(%esi),%eax
   1023 	movl	%eax,RELOC(PDPpaddr)
   1024 
   1025 	/*
   1026 	 * Startup checklist:
   1027 	 * 1. Load %cr3 with pointer to L2 (or L3 for PAE).
   1028 	 */
   1029 	movl	%esi,%eax
   1030 	movl	%eax,%cr3
   1031 
   1032 	/*
   1033 	 * 2. Set NOX in EFER, if available.
   1034 	 */
   1035 	movl	RELOC(nox_flag),%ebx
   1036 	cmpl	$0,%ebx
   1037 	je 	skip_NOX
   1038 	movl	$MSR_EFER,%ecx
   1039 	rdmsr
   1040 	xorl	%eax,%eax
   1041 	orl	$(EFER_NXE),%eax
   1042 	wrmsr
   1043 skip_NOX:
   1044 
   1045 	/*
   1046 	 * 3. Enable paging and the rest of it.
   1047 	 */
   1048 	movl	%cr0,%eax
   1049 	orl	$(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP|CR0_AM),%eax
   1050 	movl	%eax,%cr0
   1051 
   1052 	pushl	$begin			/* jump to high mem */
   1053 	ret
   1054 
   1055 begin:
   1056 	/*
   1057 	 * We have arrived. There's no need anymore for the identity mapping in
   1058 	 * low memory, remove it.
   1059 	 */
   1060 	movl	_C_LABEL(nkptp)+1*4,%ecx
   1061 	leal	(PROC0_PDIR_OFF)(%esi),%ebx	/* old, phys address of PDIR */
   1062 	addl	$(KERNBASE), %ebx		/* new, virt address of PDIR */
   1063 	killkpt
   1064 
   1065 	/* Relocate atdevbase. */
   1066 	movl	$KERNBASE,%edx
   1067 	addl	_C_LABEL(tablesize),%edx
   1068 	addl	%esi,%edx
   1069 	movl	%edx,_C_LABEL(atdevbase)
   1070 
   1071 	/* Set up bootstrap stack. */
   1072 	leal	(PROC0_STK_OFF+KERNBASE)(%esi),%eax
   1073 	movl	%eax,_C_LABEL(lwp0uarea)
   1074 	leal	(USPACE-FRAMESIZE)(%eax),%esp
   1075 	movl	%esi,PCB_CR3(%eax)	/* pcb->pcb_cr3 */
   1076 	xorl	%ebp,%ebp		/* mark end of frames */
   1077 
   1078 #if defined(MULTIBOOT)
   1079 	/* It is now safe to parse the Multiboot information structure
   1080 	 * we saved before from C code.  Note that we cannot delay its
   1081 	 * parsing any more because initgdt (called below) needs to make
   1082 	 * use of this information.
   1083 	 * We call both multiboot 1 and 2 flavors, they now if they
   1084 	 * have something to do on their own.
   1085 	 */
   1086 	call	_C_LABEL(multiboot1_post_reloc)
   1087 	call 	_C_LABEL(multiboot2_post_reloc)
   1088 #endif
   1089 
   1090 	/*
   1091 	 * Initialize a temporary GDT (Global Descriptor Table) on the
   1092 	 * stack and make the segment registers to use it.
   1093 	 *
   1094 	 * This creates a segment descriptor for the CPU-local segment
   1095 	 * and loads %fs with its segment selector to set up addressing
   1096 	 * for %fs.  Thus, after this point, CPUVAR(...), curcpu(), and
   1097 	 * curlwp will work.
   1098 	 *
   1099 	 * Later, we will replace this temporary GDT on the stack by a
   1100 	 * permanent GDT allocated with uvm_km in gdt_init.
   1101 	 *
   1102 	 * XXX Intel recommends ensuring the GDT address is aligned on
   1103 	 * an 8-byte boundary for performance.  Perhaps not an issue
   1104 	 * early at boot, but maybe worth doing?
   1105 	 *
   1106 	 *	Intel 64 and IA-32 Architectures, Software Developer's
   1107 	 *	Manual, Volume 3: System Programming Guide, Order
   1108 	 *	Number 325383, April 2022, Sec. 3.5.1 `Segment
   1109 	 *	Descriptor Tables', p. 3-15:
   1110 	 *
   1111 	 *		The base address of the GDT should be aligned
   1112 	 *		on an eight-byte boundary to yield the best
   1113 	 *		processor performance.
   1114 	 */
   1115 	subl	$NGDT*8, %esp		/* space for temporary gdt */
   1116 	pushl	%esp
   1117 	call	_C_LABEL(initgdt)
   1118 	addl	$4,%esp
   1119 
   1120 	movl	_C_LABEL(tablesize),%eax
   1121 	addl	%esi,%eax		/* skip past stack and page tables */
   1122 
   1123 #ifdef PAE
   1124 	pushl	$0	/* init386() expects a 64 bits paddr_t with PAE */
   1125 #endif
   1126 	pushl	%eax
   1127 #if defined(XEN) && !defined(XENPV)
   1128         call    _C_LABEL(init_xen_early)
   1129 #endif
   1130 	call	_C_LABEL(init_bootspace)
   1131 	call	_C_LABEL(init386)
   1132 	addl	$PDE_SIZE,%esp		/* pop paddr_t */
   1133 	addl	$NGDT*8,%esp		/* pop temporary gdt */
   1134 
   1135 	call 	_C_LABEL(main)
   1136 #else /* XENPV */
   1137 	/* First, reset the PSL. */
   1138 	pushl	$PSL_MBO
   1139 	popfl
   1140 
   1141 	cld
   1142 
   1143 	/*
   1144 	 * Xen info:
   1145 	 * - %esp -> stack, *theoretically* the last used page by Xen bootstrap
   1146 	 */
   1147 	movl	%esp,%ebx
   1148 	movl	$_RELOC(tmpstk),%esp
   1149 
   1150 	/* Clear BSS. */
   1151 	xorl	%eax,%eax
   1152 	movl	$RELOC(__bss_start),%edi
   1153 	movl	$RELOC(_end),%ecx
   1154 	subl	%edi,%ecx
   1155 	rep
   1156 	stosb
   1157 
   1158 	/* Copy the necessary stuff from start_info structure. */
   1159 	/* We need to copy shared_info early, so that sti/cli work */
   1160 	movl	$RELOC(start_info_union),%edi
   1161 	movl	$(PAGE_SIZE / 4),%ecx
   1162 	rep
   1163 	movsl
   1164 
   1165 	/* Clear segment registers. */
   1166 	xorl	%eax,%eax
   1167 	movw	%ax,%fs
   1168 	movw	%ax,%gs
   1169 
   1170 	xorl	%eax,%eax
   1171 	cpuid
   1172 	movl	%eax,RELOC(cpuid_level)
   1173 
   1174 	movl    $VM_GUEST_XENPV, RELOC(vm_guest)
   1175 
   1176 	/*
   1177 	 * Use a temporary GDT page. We'll re-add it to uvm(9) once we're done
   1178 	 * using it.
   1179 	 */
   1180 	movl	$RELOC(tmpgdt),%eax
   1181 	pushl	%eax		/* start of temporary gdt */
   1182 	call	_C_LABEL(initgdt)
   1183 	addl	$4,%esp
   1184 
   1185 	call	xen_locore
   1186 
   1187 	/*
   1188 	 * The first VA available is returned by xen_locore in %eax. We
   1189 	 * use it as the UAREA, and set up the stack here.
   1190 	 */
   1191 	movl	%eax,%esi
   1192 	movl	%esi,_C_LABEL(lwp0uarea)
   1193 	leal	(USPACE-FRAMESIZE)(%eax),%esp
   1194 	xorl	%ebp,%ebp		/* mark end of frames */
   1195 
   1196 	/* Set first_avail after the DUMMY PAGE (see xen_locore). */
   1197 	addl	$(USPACE+PAGE_SIZE),%esi
   1198 	subl	$KERNBASE,%esi		/* init386 wants a physical address */
   1199 
   1200 	pushl	$0	/* init386() expects a 64 bits paddr_t with PAE */
   1201 	pushl	%esi
   1202 	call	_C_LABEL(init_bootspace)
   1203 	call	_C_LABEL(init386)
   1204 	addl	$PDE_SIZE,%esp		/* pop paddr_t */
   1205 	call 	_C_LABEL(main)
   1206 #endif /* XENPV */
   1207 END(start)
   1208 
   1209 #if defined(XEN)
   1210 #ifndef XENPV
   1211 /* entry point for Xen PVH */
   1212 ENTRY(start_pvh)
   1213 #ifdef BOOT_DURATION
   1214 	getstarttsc
   1215 #endif
   1216 	/* Xen doesn't start us with a valid gdt */
   1217 	movl    $RELOC(gdtdesc_xenpvh), %eax
   1218 	lgdt    (%eax)
   1219 	jmp     $GSEL(GCODE_SEL, SEL_KPL), $RELOC(.Lreload_cs)
   1220 
   1221 .Lreload_cs:
   1222 	movw    $GSEL(GDATA_SEL, SEL_KPL), %ax
   1223 	movw    %ax, %ds
   1224 	movw    %ax, %es
   1225 	movw    %ax, %ss
   1226 
   1227 	/* we need a valid stack */
   1228 	movl	$RELOC(tmpstk),%esp
   1229 
   1230 	/* clear BSS */
   1231         xorl    %eax,%eax
   1232 	movl    $RELOC(__bss_start),%edi
   1233 	movl    $RELOC(_end),%ecx
   1234 	subl    %edi,%ecx
   1235 	rep
   1236 	stosb
   1237 
   1238 	/*
   1239 	 * Here, we have 2 cases :
   1240 	 *
   1241 	 *  1) We have been started by Xen
   1242 	 *  2) We have been started by another VMM (Qemu, Firecracker, ...)
   1243 	 *
   1244 	 * The main difference is that, when we are started by Xen,
   1245 	 * %ebx (addr of the hvm_start_info structure) is pointing to a
   1246 	 * location that will be mapped correctly later.
   1247 	 *
   1248 	 * In the second case, we have to copy this structure (and all
   1249 	 * the information contained in it) to a location that will be
   1250 	 * mapped later : __kernel_end
   1251 	 *
   1252 	 * To distinguish between the 2 cases, we'll use the 'cpuid' instruction
   1253 	 */
   1254 
   1255 	push %ebx
   1256 	xorl %eax, %eax
   1257 	cpuid
   1258 	cmpl $0x1, %eax		/* Check if we can call CPUID with eax=1 */
   1259 	jb .start_genpvh
   1260 	xorl %eax, %eax
   1261 	inc %eax
   1262 	cpuid
   1263 	shr $31, %ecx
   1264 	testb $1, %cl		/* Check if bit 31 of ECX (hypervisor) is set */
   1265 	jz .start_genpvh
   1266 	xorl %eax, %eax
   1267 	inc %eax
   1268 	shl $30, %eax
   1269 	cpuid			/* Calling cpuid with eax=0x40000000 */
   1270 	cmp $XEN_CPUID_SIGNATURE_EBX, %ebx	/* "VneX" */
   1271 	je .start_xen
   1272 
   1273 	/* We have been started by a VMM that is *not* Xen */
   1274 
   1275 .start_genpvh:
   1276 	/* announce ourself */
   1277 	movl $VM_GUEST_GENPVH, RELOC(vm_guest)
   1278 	pop %ebx
   1279 
   1280 	movl $RELOC(__kernel_end), %eax
   1281 	movl __kernel_end, %ecx
   1282 	movl $RELOC(esym),%ebp
   1283 	movl %ecx,(%ebp)
   1284 	jmp .copy_hvm_info
   1285 
   1286 .start_xen:
   1287 	movl $VM_GUEST_XENPVH, RELOC(vm_guest)
   1288         /*
   1289 	 * read the size of the symbol table, sanity-check and compute the end
   1290 	 * we have:
   1291 	 * |   kernel   |
   1292 	 * -------------- kernel_end
   1293 	 *     alignment
   1294 	 * -------------- bsd_symtab
   1295 	 * | size (int) |
   1296 	 * | elf_header |
   1297 	 *
   1298 	 */
   1299 	movl $RELOC(__kernel_end), %ebp
   1300 	addl $3, %ebp
   1301 	andl $~3, %ebp
   1302 	movl 0(%ebp), %eax /* read size */
   1303 	testl $~0x00ffffff, %eax /* more than 16MB ? */
   1304 	jnz .bad_esym
   1305 	addl %ebp, %eax /* compute esym */
   1306 	/* check if start_info is within symbol table */
   1307 	movl 0(%esp), %ebx
   1308 	cmp %ebp, %ebx
   1309 	jb .save_esym /* %ebx < __kernel_end */
   1310 	cmp %eax, %ebx
   1311 	jae .save_esym /* %ebx > esym */
   1312 
   1313 .bad_esym:
   1314 	movl $RELOC(__kernel_end), %eax
   1315 .save_esym:
   1316 	movl %eax, %ebx
   1317 	addl $KERNBASE,%ebx
   1318 	movl %ebx,RELOC(esym)
   1319 	/* advance to next page boundary, this will be our hvm_start_info */
   1320 	addl $PGOFSET,%eax
   1321 	andl $~PGOFSET,%eax
   1322 	pop %ebx
   1323 
   1324 .copy_hvm_info:
   1325 	/*
   1326 	 * save addr of the hvm_start_info structure.
   1327 	 * %ebx points to physical address provided by Xen
   1328 	 * %eax points to we want it to be copied to
   1329 	 */
   1330 	/* check if %ebx and %eax are in the same page */
   1331 	movl %ebx, %esi
   1332 	addl $PGOFSET,%esi
   1333 	andl $~PGOFSET,%esi
   1334         cmp %esi, %eax
   1335 	je .same_hvm_info
   1336 
   1337 	/* First, copy the hvm_start_info structure to %eax */
   1338 	movl %ebx, %esi
   1339 	movl %eax, %edi
   1340 	movl $HVM_START_INFO_SIZE, %ecx
   1341 	shrl $2, %ecx
   1342 	rep movsl
   1343 
   1344 	/* Copy cmdline_paddr after hvm_start_info */
   1345 	movl CMDLINE_PADDR(%ebx), %esi
   1346 	movl %edi, CMDLINE_PADDR(%eax)	/* Set new cmdline_paddr in hvm_start_info */
   1347 .cmdline_copy:
   1348 	movb (%esi), %cl
   1349 	movsb
   1350 	cmp $0, %cl
   1351 	jne .cmdline_copy
   1352 
   1353 	/* Copy memmap_paddr after cmdline (only if hvm_start_info->version != 0) */
   1354 	xorl %ecx, %ecx
   1355 	cmpl START_INFO_VERSION(%ebx), %ecx
   1356 	je .save_hvm_info
   1357 	pushl %eax
   1358 	movl MMAP_PADDR(%ebx), %esi
   1359 	movl %edi, MMAP_PADDR(%eax)	/* Set new memmap_paddr in hvm_start_info */
   1360 	movl MMAP_ENTRIES(%ebx), %eax	/* Get memmap_entries */
   1361 	movl $MMAP_ENTRY_SIZE, %ebx
   1362 	mull %ebx			/* eax * ebx => edx:eax */
   1363 	movl %eax, %ecx
   1364 	shrl $2, %ecx
   1365 	rep movsl
   1366 	popl %eax
   1367 
   1368 .save_hvm_info:
   1369 	/*
   1370 	 * %eax points to the start of hvm_start_info
   1371 	 * %edi points to the end
   1372 	 */
   1373 	addl    $KERNBASE,%eax
   1374 	movl	%eax,RELOC(hvm_start_info)
   1375 
   1376 	/* round end to next page boundary */
   1377 	addl    $PGOFSET,%edi
   1378 	andl    $~PGOFSET,%edi
   1379 
   1380 	/* get a page for HYPERVISOR_shared_info */
   1381 	/* this is only needed if we are running on Xen */
   1382 	cmpl	$VM_GUEST_XENPVH, RELOC(vm_guest)
   1383 	jne	.save_eblob
   1384 	movl	$RELOC(HYPERVISOR_shared_info_pa),%ebp
   1385 	movl	%edi,(%ebp)
   1386 	addl	$PAGE_SIZE, %edi
   1387 
   1388 .save_eblob:
   1389 	addl    $KERNBASE,%edi
   1390 	movl	$RELOC(eblob),%ebp
   1391 	movl	%edi,(%ebp)
   1392 	jmp	.Lstart_common
   1393 
   1394 .same_hvm_info:
   1395 	/* just use the provided %ebx */
   1396 	/* XXX assume hvm_start_info+dependant structure fits in a single page */
   1397 	movl %ebx, %eax
   1398 	movl %ebx, %edi
   1399 	addl    $PAGE_SIZE, %edi
   1400 	jmp .save_hvm_info
   1401 END(start_pvh)
   1402 	.align 8
   1403 gdtdesc_xenpvh:
   1404 	.word	gdt_xenpvhend - gdt_xenpvh
   1405 	.long	RELOC(gdt_xenpvh)
   1406 	.word	0
   1407 gdt_xenpvh:
   1408 	.long   0			# null descriptor
   1409 	.long   0
   1410 	.long   0x0000ffff		# %cs
   1411 	.long   0x00cf9a00
   1412 	.long   0x0000ffff		# %ds, %es, %ss
   1413 	.long   0x00cf9200
   1414 gdt_xenpvhend:
   1415 	.align 4
   1416 #endif /* !XENPV */
   1417 
   1418 
   1419 /* space for the hypercall call page */
   1420 #define HYPERCALL_PAGE_OFFSET 0x1000
   1421 .align HYPERCALL_PAGE_OFFSET
   1422 ENTRY(hypercall_page) /* Returns -1, on HYPERVISOR_xen_version() */
   1423 .skip	(__HYPERVISOR_xen_version*32), 0x90
   1424 	movl	$-1, %eax
   1425 	retl
   1426 .align HYPERCALL_PAGE_OFFSET, 0x90
   1427 END(hypercall_page)
   1428 
   1429 #ifdef XENPV
   1430 /*
   1431  * void lgdt_finish(void);
   1432  * Finish load a new GDT pointer (do any necessary cleanup).
   1433  * XXX It's somewhat questionable whether reloading all the segment registers
   1434  * is necessary, since the actual descriptor data is not changed except by
   1435  * process creation and exit, both of which clean up via task switches.  OTOH,
   1436  * this only happens at run time when the GDT is resized.
   1437  */
   1438 /* LINTSTUB: Func: void lgdt_finish(void) */
   1439 ENTRY(lgdt_finish)
   1440 	movl	$GSEL(GDATA_SEL, SEL_KPL),%eax
   1441 	movw	%ax,%ds
   1442 	movw	%ax,%es
   1443 	movw	%ax,%gs
   1444 	movw	%ax,%ss
   1445 	movl	$GSEL(GCPU_SEL, SEL_KPL),%eax
   1446 	movw	%ax,%fs
   1447 	/* Reload code selector by doing intersegment return. */
   1448 	popl	%eax
   1449 	pushl	$GSEL(GCODE_SEL, SEL_KPL)
   1450 	pushl	%eax
   1451 	lret
   1452 END(lgdt_finish)
   1453 
   1454 #endif /* XENPV */
   1455 #endif /* XEN */
   1456 
   1457 /*
   1458  * void lwp_trampoline(void);
   1459  *
   1460  * This is a trampoline function pushed onto the stack of a newly created
   1461  * process in order to do some additional setup.  The trampoline is entered by
   1462  * cpu_switchto()ing to the process, so we abuse the callee-saved
   1463  * registers used by cpu_switchto() to store the information about the
   1464  * stub to call.
   1465  * NOTE: This function does not have a normal calling sequence!
   1466  */
   1467 ENTRY(lwp_trampoline)
   1468 	movl	%ebp,%edi	/* for .Lsyscall_checkast */
   1469 	xorl	%ebp,%ebp
   1470 	pushl	%edi
   1471 	pushl	%eax
   1472 	call	_C_LABEL(lwp_startup)
   1473 	addl	$8,%esp
   1474 	pushl	%ebx
   1475 	call	*%esi
   1476 	addl	$4,%esp
   1477 	jmp	.Lsyscall_checkast
   1478 	/* NOTREACHED */
   1479 END(lwp_trampoline)
   1480 
   1481 /*
   1482  * sigcode()
   1483  *
   1484  * Signal trampoline; copied to top of user stack.  Used only for
   1485  * compatibility with old releases of NetBSD.
   1486  */
   1487 ENTRY(sigcode)
   1488 	/*
   1489 	 * Handler has returned here as if we called it.  The sigcontext
   1490 	 * is on the stack after the 3 args "we" pushed.
   1491 	 */
   1492 	leal	12(%esp),%eax		/* get pointer to sigcontext */
   1493 	movl	%eax,4(%esp)		/* put it in the argument slot */
   1494 					/* fake return address already there */
   1495 	movl	$SYS_compat_16___sigreturn14,%eax
   1496 	int	$0x80	 		/* enter kernel with args on stack */
   1497 	movl	$SYS_exit,%eax
   1498 	int	$0x80			/* exit if sigreturn fails */
   1499 	.globl	_C_LABEL(esigcode)
   1500 _C_LABEL(esigcode):
   1501 END(sigcode)
   1502 
   1503 /*
   1504  * int setjmp(label_t *)
   1505  *
   1506  * Used primarily by DDB.
   1507  */
   1508 ENTRY(setjmp)
   1509 	movl	4(%esp),%eax
   1510 	movl	%ebx,(%eax)		/* save ebx */
   1511 	movl	%esp,4(%eax)		/* save esp */
   1512 	movl	%ebp,8(%eax)		/* save ebp */
   1513 	movl	%esi,12(%eax)		/* save esi */
   1514 	movl	%edi,16(%eax)		/* save edi */
   1515 	movl	(%esp),%edx		/* get rta */
   1516 	movl	%edx,20(%eax)		/* save eip */
   1517 	xorl	%eax,%eax		/* return 0 */
   1518 	ret
   1519 END(setjmp)
   1520 
   1521 /*
   1522  * int longjmp(label_t *)
   1523  *
   1524  * Used primarily by DDB.
   1525  */
   1526 ENTRY(longjmp)
   1527 	movl	4(%esp),%eax
   1528 	movl	(%eax),%ebx		/* restore ebx */
   1529 	movl	4(%eax),%esp		/* restore esp */
   1530 	movl	8(%eax),%ebp		/* restore ebp */
   1531 	movl	12(%eax),%esi		/* restore esi */
   1532 	movl	16(%eax),%edi		/* restore edi */
   1533 	movl	20(%eax),%edx		/* get rta */
   1534 	movl	%edx,(%esp)		/* put in return frame */
   1535 	movl	$1,%eax			/* return 1 */
   1536 	ret
   1537 END(longjmp)
   1538 
   1539 /*
   1540  * void dumpsys(void)
   1541  *
   1542  * Mimic cpu_switchto() for postmortem debugging.
   1543  */
   1544 ENTRY(dumpsys)
   1545 	pushl	%ebx			/* set up fake switchframe */
   1546 	pushl	%esi			/* and save context */
   1547 	pushl	%edi
   1548 	movl	%esp,_C_LABEL(dumppcb)+PCB_ESP
   1549 	movl	%ebp,_C_LABEL(dumppcb)+PCB_EBP
   1550 	call	_C_LABEL(dodumpsys)	/* dump! */
   1551 	addl	$(3*4), %esp		/* unwind switchframe */
   1552 	ret
   1553 END(dumpsys)
   1554 
   1555 /*
   1556  * struct lwp *cpu_switchto(struct lwp *oldlwp, struct lwp *newlwp,
   1557  *     bool returning)
   1558  *
   1559  *	1. save context of oldlwp.
   1560  *	2. restore context of newlwp.
   1561  *
   1562  * Note that the stack frame layout is known to "struct switchframe" in
   1563  * <machine/frame.h> and to the code in cpu_lwp_fork() which initializes
   1564  * it for a new lwp.
   1565  */
   1566 ENTRY(cpu_switchto)
   1567 	pushl	%ebx
   1568 	pushl	%esi
   1569 	pushl	%edi
   1570 
   1571 	movl	16(%esp),%esi		/* oldlwp */
   1572 	movl	20(%esp),%edi		/* newlwp */
   1573 	movl	24(%esp),%edx		/* returning */
   1574 
   1575 	/* Save old context. */
   1576 	movl	L_PCB(%esi),%eax
   1577 	movl	%esp,PCB_ESP(%eax)
   1578 	movl	%ebp,PCB_EBP(%eax)
   1579 
   1580 	/* Switch to newlwp's stack. */
   1581 	movl	L_PCB(%edi),%ebx
   1582 	movl	PCB_EBP(%ebx),%ebp
   1583 	movl	PCB_ESP(%ebx),%esp
   1584 
   1585 	/*
   1586 	 * Issue XCHG, rather than MOV, to set ci_curlwp := newlwp in
   1587 	 * order to coordinate mutex_exit on this CPU with
   1588 	 * mutex_vector_enter on another CPU.
   1589 	 *
   1590 	 * 1. Any prior mutex_exit by oldlwp must be visible to other
   1591 	 *    CPUs before we set ci_curlwp := newlwp on this one,
   1592 	 *    requiring a store-before-store barrier.
   1593 	 *
   1594 	 *    (This is always guaranteed by the x86 memory model, TSO,
   1595 	 *    but other architectures require a explicit barrier before
   1596 	 *    the store to ci->ci_curlwp.)
   1597 	 *
   1598 	 * 2. ci_curlwp := newlwp must be visible on all other CPUs
   1599 	 *    before any subsequent mutex_exit by newlwp can even test
   1600 	 *    whether there might be waiters, requiring a
   1601 	 *    store-before-load barrier.
   1602 	 *
   1603 	 *    (This is the only ordering x86 TSO ever requires any kind
   1604 	 *    of barrier for -- in this case, we take advantage of the
   1605 	 *    sequential consistency implied by XCHG to obviate the
   1606 	 *    need for MFENCE or something.)
   1607 	 *
   1608 	 * See kern_mutex.c for details -- this is necessary for
   1609 	 * adaptive mutexes to detect whether the lwp is on the CPU in
   1610 	 * order to safely block without requiring atomic r/m/w in
   1611 	 * mutex_exit.
   1612 	 */
   1613 	movl	%edi,%ecx
   1614 	xchgl	%ecx,CPUVAR(CURLWP)
   1615 
   1616 #ifdef XENPV
   1617 	/* if we are there, we're obviously not in user context.
   1618 	 * reset ci_xen_clockf_* in case the splx() at the end of mi_switch()
   1619 	 * triggers a deffered call do xen_timer_handler()
   1620 	 */
   1621 	movb	$0, CPUVAR(XEN_CLOCKF_USERMODE)
   1622 	movl	$_C_LABEL(cpu_switchto), CPUVAR(XEN_CLOCKF_PC)
   1623 #endif
   1624 
   1625 	/* Skip the rest if returning to a pinned LWP. */
   1626 	testl	%edx,%edx
   1627 	jnz	switch_return
   1628 
   1629 	/* Switch ring0 stack */
   1630 #ifdef XENPV
   1631 	pushl	%edi
   1632 	call	_C_LABEL(i386_switch_context)
   1633 	addl	$4,%esp
   1634 #else
   1635 	movl	PCB_ESP0(%ebx),%eax
   1636 	movl	CPUVAR(TSS),%ecx
   1637 	movl	%eax,TSS_ESP0(%ecx)
   1638 #endif
   1639 
   1640 	/* Switch the dbregs. */
   1641 	pushl	%edi
   1642 	pushl	%esi
   1643 	call	_C_LABEL(x86_dbregs_switch)
   1644 	addl	$8,%esp
   1645 
   1646 	/* Switch the FPU. */
   1647 	pushl	%edx
   1648 	pushl	%edi
   1649 	pushl	%esi
   1650 	call	_C_LABEL(fpu_switch)
   1651 	addl	$8,%esp
   1652 	popl	%edx
   1653 
   1654 	/* Don't bother with the rest if switching to a system process. */
   1655 	testl	$LW_SYSTEM,L_FLAG(%edi)
   1656 	jnz	switch_return
   1657 
   1658 #ifndef XENPV
   1659 	/* Restore thread-private %fs/%gs descriptors. */
   1660 	movl	CPUVAR(GDT),%ecx
   1661 	movl	PCB_FSD(%ebx),%eax
   1662 	movl	PCB_FSD+4(%ebx),%edx
   1663 	movl	%eax,(GUFS_SEL*8)(%ecx)
   1664 	movl	%edx,(GUFS_SEL*8+4)(%ecx)
   1665 	movl	PCB_GSD(%ebx),%eax
   1666 	movl	PCB_GSD+4(%ebx),%edx
   1667 	movl	%eax,(GUGS_SEL*8)(%ecx)
   1668 	movl	%edx,(GUGS_SEL*8+4)(%ecx)
   1669 #endif /* !XENPV */
   1670 
   1671 	/* Switch I/O bitmap */
   1672 	movl	PCB_IOMAP(%ebx),%eax
   1673 	orl	%eax,%eax
   1674 	jnz	.Lcopy_iobitmap
   1675 	movl	CPUVAR(TSS),%eax
   1676 	movl	$(IOMAP_INVALOFF << 16),TSS_IOBASE(%eax)
   1677 .Liobitmap_done:
   1678 
   1679 	/* Is this process using RAS (restartable atomic sequences)? */
   1680 	movl	L_PROC(%edi),%eax
   1681 	cmpl	$0,P_RASLIST(%eax)
   1682 	je	no_RAS
   1683 
   1684 	/* Handle restartable atomic sequences (RAS). */
   1685 	movl	L_MD_REGS(%edi),%ecx
   1686 	pushl	TF_EIP(%ecx)
   1687 	pushl	%eax
   1688 	call	_C_LABEL(ras_lookup)
   1689 	addl	$8,%esp
   1690 	cmpl	$-1,%eax
   1691 	je	no_RAS
   1692 	movl	L_MD_REGS(%edi),%ecx
   1693 	movl	%eax,TF_EIP(%ecx)
   1694 no_RAS:
   1695 
   1696 #ifdef XENPV
   1697 	pushl	%edi
   1698 	call	_C_LABEL(i386_tls_switch)
   1699 	addl	$4,%esp
   1700 #endif
   1701 
   1702 switch_return:
   1703 	/* Return to the new LWP, returning 'oldlwp' in %eax. */
   1704 	movl	%esi,%eax
   1705 	popl	%edi
   1706 	popl	%esi
   1707 	popl	%ebx
   1708 	ret
   1709 
   1710 .Lcopy_iobitmap:
   1711 	/* Copy I/O bitmap. */
   1712 	incl	_C_LABEL(pmap_iobmp_evcnt)+EV_COUNT
   1713 	movl	$(IOMAPSIZE/4),%ecx
   1714 	pushl	%esi
   1715 	pushl	%edi
   1716 	movl	%eax,%esi		/* pcb_iomap */
   1717 	movl	CPUVAR(TSS),%edi
   1718 	leal	TSS_IOMAP(%edi),%edi
   1719 	rep
   1720 	movsl
   1721 	popl	%edi
   1722 	popl	%esi
   1723 	movl	CPUVAR(TSS),%eax
   1724 	movl	$(IOMAP_VALIDOFF << 16),TSS_IOBASE(%eax)
   1725 	jmp	.Liobitmap_done
   1726 END(cpu_switchto)
   1727 
   1728 /*
   1729  * void savectx(struct pcb *pcb);
   1730  *
   1731  * Update pcb, saving current processor state.
   1732  */
   1733 ENTRY(savectx)
   1734 	movl	4(%esp),%edx		/* edx = pcb */
   1735 	movl	%esp,PCB_ESP(%edx)
   1736 	movl	%ebp,PCB_EBP(%edx)
   1737 	ret
   1738 END(savectx)
   1739 
   1740 /*
   1741  * syscall()
   1742  *
   1743  * Trap gate entry for syscall
   1744  */
   1745 IDTVEC(syscall)
   1746 	pushl	$2		/* size of instruction for restart */
   1747 	pushl	$T_ASTFLT	/* trap # for doing ASTs */
   1748 	INTRENTRY
   1749 	STI(%eax)
   1750 
   1751 #ifdef DIAGNOSTIC
   1752 	movzbl	CPUVAR(ILEVEL),%ebx
   1753 	testl	%ebx,%ebx
   1754 	jz	1f
   1755 	pushl	$5f
   1756 	call	_C_LABEL(panic)
   1757 	addl	$4,%esp
   1758 #ifdef DDB
   1759 	int	$3
   1760 #endif
   1761 1:
   1762 #endif /* DIAGNOSTIC */
   1763 
   1764 	addl	$1,CPUVAR(NSYSCALL)	/* count it atomically */
   1765 	adcl	$0,CPUVAR(NSYSCALL)+4	/* count it atomically */
   1766 	movl	CPUVAR(CURLWP),%edi
   1767 	movl	L_PROC(%edi),%edx
   1768 	movl	%esp,L_MD_REGS(%edi)	/* save pointer to frame */
   1769 	pushl	%esp
   1770 	call	*P_MD_SYSCALL(%edx)	/* get pointer to syscall() function */
   1771 	addl	$4,%esp
   1772 .Lsyscall_checkast:
   1773 	/* Check for ASTs on exit to user mode. */
   1774 	CLI(%eax)
   1775 	movl	L_MD_ASTPENDING(%edi), %eax
   1776 	orl	CPUVAR(WANT_PMAPLOAD), %eax
   1777 	jnz	9f
   1778 
   1779 	HANDLE_DEFERRED_FPU
   1780 
   1781 #ifdef XENPV
   1782 	STIC(%eax)
   1783 	jz	14f
   1784 	call	_C_LABEL(stipending)
   1785 	testl	%eax,%eax
   1786 	jz	14f
   1787 	/* process pending interrupts */
   1788 	CLI(%eax)
   1789 	movzbl	CPUVAR(ILEVEL), %ebx
   1790 	movl	$.Lsyscall_resume, %esi /* address to resume loop at */
   1791 .Lsyscall_resume:
   1792 	movl	%ebx,%eax		/* get cpl */
   1793 	movl	CPUVAR(IUNMASK)(,%eax,4),%eax
   1794 	andl	CPUVAR(IPENDING),%eax	/* any non-masked bits left? */
   1795 	jz	17f
   1796 	bsrl	%eax,%eax
   1797 	btrl	%eax,CPUVAR(IPENDING)
   1798 	movl	CPUVAR(ISOURCES)(,%eax,4),%eax
   1799 	jmp	*IS_RESUME(%eax)
   1800 17:	movb	%bl, CPUVAR(ILEVEL)	/* restore cpl  */
   1801 	jmp	.Lsyscall_checkast
   1802 14:
   1803 #endif /* XENPV */
   1804 
   1805 #ifdef DIAGNOSTIC
   1806 	cmpb	$IPL_NONE,CPUVAR(ILEVEL)
   1807 	jne	3f
   1808 #endif
   1809 
   1810 	INTRFASTEXIT
   1811 
   1812 #ifdef DIAGNOSTIC
   1813 3:	STI(%eax)
   1814 	pushl	$4f
   1815 	call	_C_LABEL(panic)
   1816 	addl	$4,%esp
   1817 	pushl	$IPL_NONE
   1818 	call	_C_LABEL(spllower)
   1819 	addl	$4,%esp
   1820 	jmp	.Lsyscall_checkast
   1821 4:	.asciz	"SPL NOT LOWERED ON SYSCALL EXIT\n"
   1822 5:	.asciz	"SPL NOT ZERO ON SYSCALL ENTRY\n"
   1823 #endif
   1824 
   1825 9:
   1826 	cmpl	$0, CPUVAR(WANT_PMAPLOAD)
   1827 	jz	10f
   1828 	STI(%eax)
   1829 	call	_C_LABEL(pmap_load)
   1830 	jmp	.Lsyscall_checkast	/* re-check ASTs */
   1831 10:
   1832 	/* Always returning to user mode here. */
   1833 	movl	$0, L_MD_ASTPENDING(%edi)
   1834 	STI(%eax)
   1835 	/* Pushed T_ASTFLT into tf_trapno on entry. */
   1836 	pushl	%esp
   1837 	call	_C_LABEL(trap)
   1838 	addl	$4,%esp
   1839 	jmp	.Lsyscall_checkast	/* re-check ASTs */
   1840 IDTVEC_END(syscall)
   1841 
   1842 /*
   1843  * int npx586bug1(int a, int b)
   1844  * Used when checking for the FDIV bug on first generations pentiums.
   1845  * Anything 120MHz or above is fine.
   1846  */
   1847 ENTRY(npx586bug1)
   1848 	fildl	4(%esp)		/* x */
   1849 	fildl	8(%esp)		/* y */
   1850 	fld	%st(1)
   1851 	fdiv	%st(1),%st	/* x/y */
   1852 	fmulp	%st,%st(1)	/* (x/y)*y */
   1853 	fsubrp	%st,%st(1)	/* x-(x/y)*y */
   1854 	pushl	$0
   1855 	fistpl	(%esp)
   1856 	popl	%eax
   1857 	ret
   1858 END(npx586bug1)
   1859 
   1860 ENTRY(intrfastexit)
   1861 	movw	TF_GS(%esp),%gs
   1862 	movw	TF_FS(%esp),%fs
   1863 	movw	TF_ES(%esp),%es
   1864 	movw	TF_DS(%esp),%ds
   1865 	movl	TF_EDI(%esp),%edi
   1866 	movl	TF_ESI(%esp),%esi
   1867 	movl	TF_EBP(%esp),%ebp
   1868 	movl	TF_EBX(%esp),%ebx
   1869 	movl	TF_EDX(%esp),%edx
   1870 	movl	TF_ECX(%esp),%ecx
   1871 	movl	TF_EAX(%esp),%eax
   1872 	addl	$(TF_PUSHSIZE+8),%esp
   1873 	iret
   1874 END(intrfastexit)
   1875 
   1876 	.section .rodata
   1877 
   1878 	/*
   1879 	 * Hotpatch templates.
   1880 	 */
   1881 
   1882 LABEL(hp_nolock)
   1883 	nop
   1884 LABEL(hp_nolock_end)
   1885 
   1886 LABEL(hp_retfence)
   1887 	lfence
   1888 LABEL(hp_retfence_end)
   1889 
   1890 LABEL(hp_clac)
   1891 	clac
   1892 LABEL(hp_clac_end)
   1893 
   1894 LABEL(hp_stac)
   1895 	stac
   1896 LABEL(hp_stac_end)
   1897