1 /* $NetBSD: locore.S,v 1.233 2025/09/09 11:34:25 bouyer Exp $ */ 2 3 /* 4 * Copyright-o-rama! 5 */ 6 7 /* 8 * Copyright (c) 1998, 2000, 2007, 2008, 2016 The NetBSD Foundation, Inc. 9 * All rights reserved. 10 * 11 * This code is derived from software contributed to The NetBSD Foundation 12 * by Charles M. Hannum and by Maxime Villard. 13 * 14 * Redistribution and use in source and binary forms, with or without 15 * modification, are permitted provided that the following conditions 16 * are met: 17 * 1. Redistributions of source code must retain the above copyright 18 * notice, this list of conditions and the following disclaimer. 19 * 2. Redistributions in binary form must reproduce the above copyright 20 * notice, this list of conditions and the following disclaimer in the 21 * documentation and/or other materials provided with the distribution. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 /* 37 * Copyright (c) 2007 Manuel Bouyer. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 49 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 50 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 51 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 52 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 53 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 54 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 55 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 56 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 57 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 58 * 59 */ 60 61 /* 62 * Copyright (c) 2006 Mathieu Ropert <mro (at) adviseo.fr> 63 * 64 * Permission to use, copy, modify, and distribute this software for any 65 * purpose with or without fee is hereby granted, provided that the above 66 * copyright notice and this permission notice appear in all copies. 67 * 68 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 69 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 70 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 71 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 72 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 73 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 74 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 75 */ 76 77 /* 78 * Copyright (c) 2001 Wasabi Systems, Inc. 79 * All rights reserved. 80 * 81 * Written by Frank van der Linden for Wasabi Systems, Inc. 82 * 83 * Redistribution and use in source and binary forms, with or without 84 * modification, are permitted provided that the following conditions 85 * are met: 86 * 1. Redistributions of source code must retain the above copyright 87 * notice, this list of conditions and the following disclaimer. 88 * 2. Redistributions in binary form must reproduce the above copyright 89 * notice, this list of conditions and the following disclaimer in the 90 * documentation and/or other materials provided with the distribution. 91 * 3. All advertising materials mentioning features or use of this software 92 * must display the following acknowledgement: 93 * This product includes software developed for the NetBSD Project by 94 * Wasabi Systems, Inc. 95 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 96 * or promote products derived from this software without specific prior 97 * written permission. 98 * 99 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 100 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 101 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 102 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 103 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 104 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 105 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 106 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 107 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 108 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 109 * POSSIBILITY OF SUCH DAMAGE. 110 */ 111 112 /*- 113 * Copyright (c) 1990 The Regents of the University of California. 114 * All rights reserved. 115 * 116 * This code is derived from software contributed to Berkeley by 117 * William Jolitz. 118 * 119 * Redistribution and use in source and binary forms, with or without 120 * modification, are permitted provided that the following conditions 121 * are met: 122 * 1. Redistributions of source code must retain the above copyright 123 * notice, this list of conditions and the following disclaimer. 124 * 2. Redistributions in binary form must reproduce the above copyright 125 * notice, this list of conditions and the following disclaimer in the 126 * documentation and/or other materials provided with the distribution. 127 * 3. Neither the name of the University nor the names of its contributors 128 * may be used to endorse or promote products derived from this software 129 * without specific prior written permission. 130 * 131 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 132 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 133 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 134 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 135 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 136 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 137 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 138 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 139 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 140 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 141 * SUCH DAMAGE. 142 * 143 * @(#)locore.s 7.3 (Berkeley) 5/13/91 144 */ 145 146 /* Override user-land alignment before including asm.h */ 147 #define ALIGN_DATA .align 8 148 #define ALIGN_TEXT .align 16,0x90 149 #define _ALIGN_TEXT ALIGN_TEXT 150 151 #include <machine/asm.h> 152 153 #include "opt_kasan.h" 154 #include "opt_copy_symtab.h" 155 #include "opt_ddb.h" 156 #include "opt_ddbparam.h" 157 #include "opt_modular.h" 158 #include "opt_realmem.h" 159 #include "opt_selfreloc.h" 160 161 #include "opt_compat_netbsd.h" 162 #include "opt_compat_netbsd32.h" 163 #include "opt_xen.h" 164 #include "opt_svs.h" 165 166 #include "assym.h" 167 #include "lapic.h" 168 #include "ioapic.h" 169 #include "ksyms.h" 170 171 #include <sys/errno.h> 172 #include <sys/syscall.h> 173 174 #include <machine/pte.h> 175 #include <machine/segments.h> 176 #include <machine/specialreg.h> 177 #include <machine/trap.h> 178 #include <machine/bootinfo.h> 179 #include <machine/frameasm.h> 180 #include <machine/cputypes.h> 181 182 #if NLAPIC > 0 183 #include <machine/i82489reg.h> 184 #endif 185 186 /* Get definitions for IOM_BEGIN, IOM_END, and IOM_SIZE */ 187 #include <dev/isa/isareg.h> 188 189 #define _RELOC(x) ((x) - KERNBASE) 190 #define RELOC(x) _RELOC(_C_LABEL(x)) 191 192 /* 32bit version of PTE_NX */ 193 #define PTE_NX32 0x80000000 194 195 #if L2_SLOT_KERNBASE > 0 196 #define TABLE_L2_ENTRIES (2 * (NKL2_KIMG_ENTRIES + 1)) 197 #else 198 #define TABLE_L2_ENTRIES (NKL2_KIMG_ENTRIES + 1) 199 #endif 200 201 #if L3_SLOT_KERNBASE > 0 202 #define TABLE_L3_ENTRIES (2 * NKL3_KIMG_ENTRIES) 203 #else 204 #define TABLE_L3_ENTRIES NKL3_KIMG_ENTRIES 205 #endif 206 207 #define PROC0_PML4_OFF 0 208 #define PROC0_STK_OFF (PROC0_PML4_OFF + 1 * PAGE_SIZE) 209 #define PROC0_PTP3_OFF (PROC0_STK_OFF + UPAGES * PAGE_SIZE) 210 #define PROC0_PTP2_OFF (PROC0_PTP3_OFF + NKL4_KIMG_ENTRIES * PAGE_SIZE) 211 #define PROC0_PTP1_OFF (PROC0_PTP2_OFF + TABLE_L3_ENTRIES * PAGE_SIZE) 212 #define TABLESIZE \ 213 ((NKL4_KIMG_ENTRIES + TABLE_L3_ENTRIES + TABLE_L2_ENTRIES + 1 + UPAGES) \ 214 * PAGE_SIZE) 215 216 /* Amount of VA used to map the kernel, the syms and the preloaded modules */ 217 #define BOOTMAP_VA_SIZE \ 218 (NKL2_KIMG_ENTRIES * (1 << L2_SHIFT) - TABLESIZE - IOM_SIZE) 219 220 /* 221 * fillkpt - Fill in a kernel page table 222 * eax = pte (page frame | control | status) 223 * ebx = page table address 224 * ecx = number of pages to map 225 * 226 * Each entry is 8 (PDE_SIZE) bytes long: we must set the 4 upper bytes to 0. 227 */ 228 #define fillkpt \ 229 cmpl $0,%ecx ; /* zero-sized? */ \ 230 je 2f ; \ 231 1: movl $0,(PDE_SIZE-4)(%ebx) ; /* upper 32 bits: 0 */ \ 232 movl %eax,(%ebx) ; /* store phys addr */ \ 233 addl $PDE_SIZE,%ebx ; /* next PTE/PDE */ \ 234 addl $PAGE_SIZE,%eax ; /* next phys page */ \ 235 loop 1b ; \ 236 2: ; 237 238 /* 239 * fillkpt_nox - Same as fillkpt, but sets the NX/XD bit. 240 */ 241 #define fillkpt_nox \ 242 cmpl $0,%ecx ; /* zero-sized? */ \ 243 je 2f ; \ 244 pushl %ebp ; \ 245 movl RELOC(nox_flag),%ebp ; \ 246 1: movl %ebp,(PDE_SIZE-4)(%ebx) ; /* upper 32 bits: NX */ \ 247 movl %eax,(%ebx) ; /* store phys addr */ \ 248 addl $PDE_SIZE,%ebx ; /* next PTE/PDE */ \ 249 addl $PAGE_SIZE,%eax ; /* next phys page */ \ 250 loop 1b ; \ 251 popl %ebp ; \ 252 2: ; 253 254 /* 255 * fillkpt_blank - Fill in a kernel page table with blank entries 256 * ebx = page table address 257 * ecx = number of pages to map 258 */ 259 #define fillkpt_blank \ 260 cmpl $0,%ecx ; /* zero-sized? */ \ 261 je 2f ; \ 262 1: movl $0,(PDE_SIZE-4)(%ebx) ; /* upper 32 bits: 0 */ \ 263 movl $0,(%ebx) ; /* lower 32 bits: 0 */ \ 264 addl $PDE_SIZE,%ebx ; /* next PTE/PDE */ \ 265 loop 1b ; \ 266 2: ; 267 268 /* 269 * killkpt - Destroy a kernel page table (long mode) 270 * rbx = page table address 271 * rcx = number of pages to destroy 272 */ 273 #define killkpt \ 274 1: movq $0,(%rbx) ; \ 275 addq $PDE_SIZE,%rbx ; \ 276 loop 1b ; 277 278 /* record boot start cycle count */ 279 #define getstarttsc \ 280 rdtsc ; \ 281 movl %eax, RELOC(starttsc_lo) ; \ 282 movl %edx, RELOC(starttsc_hi) ; 283 284 #ifdef XEN 285 #define __ASSEMBLY__ 286 #include <xen/include/public/arch-x86/cpuid.h> 287 #include <xen/include/public/elfnote.h> 288 #include <xen/include/public/xen.h> 289 290 #define ELFNOTE(name, type, desctype, descdata...) \ 291 .pushsection .note.name, "a", @note ; \ 292 .align 4 ; \ 293 .long 2f - 1f /* namesz */ ; \ 294 .long 4f - 3f /* descsz */ ; \ 295 .long type ; \ 296 1:.asciz #name ; \ 297 2:.align 4 ; \ 298 3:desctype descdata ; \ 299 4:.align 4 ; \ 300 .popsection 301 302 /* 303 * Xen guest identifier and loader selection 304 */ 305 .section __xen_guest 306 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz, "NetBSD") 307 ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz, "4.99") 308 ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz, "xen-3.0") 309 ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .quad, KERNBASE) 310 #ifdef XENPV 311 ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, KERNBASE) 312 ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .quad, start) 313 #else 314 ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, .quad, 0) 315 ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, .long, RELOC(start_pvh)) 316 #endif /* XENPV */ 317 ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .quad, hypercall_page) 318 ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .quad, HYPERVISOR_VIRT_START) 319 ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz, "writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel|hvm_callback_vector") 320 ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz, "yes") 321 ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, .long, PTE_P, PTE_P)\ 322 ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz, "generic") 323 ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long, 0) 324 #if NKSYMS > 0 || defined(DDB) || defined(MODULAR) 325 ELFNOTE(Xen, XEN_ELFNOTE_BSD_SYMTAB, .asciz, "yes") 326 #endif 327 #endif /* XEN */ 328 329 /* 330 * Initialization 331 */ 332 .data 333 334 .globl _C_LABEL(tablesize) 335 .globl _C_LABEL(nox_flag) 336 .globl _C_LABEL(cputype) 337 .globl _C_LABEL(cpuid_level) 338 .globl _C_LABEL(esym) 339 .globl _C_LABEL(eblob) 340 .globl _C_LABEL(atdevbase) 341 .globl _C_LABEL(PDPpaddr) 342 .globl _C_LABEL(boothowto) 343 .globl _C_LABEL(bootinfo) 344 .globl _C_LABEL(biosbasemem) 345 .globl _C_LABEL(biosextmem) 346 .globl _C_LABEL(lwp0uarea) 347 .globl do_mov_es 348 .globl do_mov_ds 349 .globl do_mov_fs 350 .globl do_mov_gs 351 .globl do_iret 352 353 .type _C_LABEL(tablesize), @object 354 _C_LABEL(tablesize): .long TABLESIZE 355 END(tablesize) 356 .type _C_LABEL(nox_flag), @object 357 LABEL(nox_flag) .long 0 /* 32bit NOX flag, set if supported */ 358 END(nox_flag) 359 .type _C_LABEL(cputype), @object 360 LABEL(cputype) .long 0 /* are we 80486, Pentium, or.. */ 361 END(cputype) 362 .type _C_LABEL(cpuid_level), @object 363 LABEL(cpuid_level) .long -1 /* max. level accepted by cpuid instr */ 364 END(cpuid_level) 365 .type _C_LABEL(esym), @object 366 LABEL(esym) .quad 0 /* ptr to end of syms */ 367 END(esym) 368 .type _C_LABEL(eblob), @object 369 LABEL(eblob) .quad 0 /* ptr to end of modules */ 370 END(eblob) 371 .type _C_LABEL(atdevbase), @object 372 LABEL(atdevbase) .quad 0 /* location of start of iomem in virt */ 373 END(atdevbase) 374 .type _C_LABEL(PDPpaddr), @object 375 LABEL(PDPpaddr) .quad 0 /* paddr of PTD, for libkvm */ 376 END(PDPpaddr) 377 .type _C_LABEL(biosbasemem), @object 378 #ifndef REALBASEMEM 379 LABEL(biosbasemem) .long 0 /* base memory reported by BIOS */ 380 #else 381 LABEL(biosbasemem) .long REALBASEMEM 382 #endif 383 END(biosbasemem) 384 .type _C_LABEL(biosextmem), @object 385 #ifndef REALEXTMEM 386 LABEL(biosextmem) .long 0 /* extended memory reported by BIOS */ 387 #else 388 LABEL(biosextmem) .long REALEXTMEM 389 #endif 390 END(biosextmem) 391 .type _C_LABEL(lwp0uarea), @object 392 LABEL(lwp0uarea) .quad 0 393 END(lwp0uarea) 394 .type _C_LABEL(starttsc_lo), @object 395 LABEL(starttsc_lo) .long 0 /* low part of rdtsc */ 396 END(starttsc_lo) 397 .type _C_LABEL(starttsc_hi), @object 398 LABEL(starttsc_hi) .long 0 /* high part of rdtsc */ 399 END(starttsc_hi) 400 401 #ifndef XENPV 402 .globl gdt64_lo 403 .globl gdt64_hi 404 405 #define GDT64_LIMIT gdt64_end-gdt64_start-1 406 /* Temporary gdt64, with base address in low memory */ 407 .type _C_LABEL(gdt64_lo), @object 408 LABEL(gdt64_lo) 409 .word GDT64_LIMIT 410 .quad _RELOC(gdt64_start) 411 END(gdt64_lo) 412 .align 64 413 414 /* Temporary gdt64, with base address in high memory */ 415 .type _C_LABEL(gdt64_hi), @object 416 LABEL(gdt64_hi) 417 .word GDT64_LIMIT 418 .quad gdt64_start 419 END(gdt64_hi) 420 .align 64 421 #undef GDT64_LIMIT 422 423 .type _C_LABEL(gdt64_start), @object 424 _C_LABEL(gdt64_start): 425 .quad 0x0000000000000000 /* always empty */ 426 .quad 0x00af9a000000ffff /* kernel CS */ 427 .quad 0x00cf92000000ffff /* kernel DS */ 428 END(gdt64_start) 429 gdt64_end: 430 431 .type _C_LABEL(farjmp64), @object 432 _C_LABEL(farjmp64): 433 .long _RELOC(longmode) 434 .word GSEL(GCODE_SEL, SEL_KPL) 435 END(farjmp64) 436 437 #ifdef XEN 438 /* 32bit GDT */ 439 gdtdesc32: 440 .word gdt32end - gdt32 441 .long RELOC(gdt32) 442 .long 0 443 gdt32: 444 .long 0 # null descriptor 445 .long 0 446 .long 0x0000ffff # %cs 447 .long 0x00cf9a00 448 .long 0x0000ffff # %ds, %es, %ss 449 .long 0x00cf9200 450 gdt32end: 451 #endif /* XEN */ 452 #endif /* !XENPV */ 453 454 /* Space for the temporary stack */ 455 .size tmpstk, tmpstk - . 456 .space 512 457 tmpstk: 458 459 /* 460 * Some hackage to deal with 64bit symbols in 32 bit mode. 461 * This may not be needed if things are cleaned up a little. 462 */ 463 464 .text 465 .globl _C_LABEL(kernel_text) 466 .set _C_LABEL(kernel_text),KERNTEXTOFF 467 468 ENTRY(start) 469 #ifndef XENPV 470 .code32 471 #ifdef BOOT_DURATION 472 getstarttsc 473 #endif 474 475 #ifdef SELFRELOC 476 call next 477 next: pop %edi 478 sub $(next - kernel_text), %edi 479 480 /* If not KERNBASE, reloc ourselves to KERNBASE */ 481 cmpl $(KERNTEXTOFF_LO - KERNBASE_LO), %edi 482 jne selfreloc_start 483 #endif /* SELFRELOC */ 484 485 /* Warm boot */ 486 movw $0x1234,0x472 487 488 /* 489 * Load parameters from the stack (32 bits): 490 * boothowto, [bootdev], bootinfo, esym, biosextmem, biosbasemem 491 * We are not interested in 'bootdev'. 492 */ 493 494 /* Load 'boothowto' */ 495 movl 4(%esp),%eax 496 movl %eax,RELOC(boothowto) 497 498 /* Load 'bootinfo' */ 499 movl 12(%esp),%eax 500 testl %eax,%eax /* bootinfo = NULL? */ 501 jz .Lbootinfo_finished 502 503 movl (%eax),%ebx /* bootinfo::bi_nentries */ 504 movl $RELOC(bootinfo),%ebp 505 movl %ebp,%edx 506 addl $BOOTINFO_MAXSIZE,%ebp 507 movl %ebx,(%edx) 508 addl $4,%edx 509 510 .Lbootinfo_entryloop: 511 testl %ebx,%ebx /* no remaining entries? */ 512 jz .Lbootinfo_finished 513 514 addl $4,%eax 515 movl (%eax),%ecx /* address of entry */ 516 pushl %edi 517 pushl %esi 518 pushl %eax 519 520 movl (%ecx),%eax /* btinfo_common::len (size of entry) */ 521 movl %edx,%edi 522 addl %eax,%edx /* update dest pointer */ 523 cmpl %ebp,%edx /* beyond bootinfo+BOOTINFO_MAXSIZE? */ 524 jg .Lbootinfo_overflow 525 526 movl %ecx,%esi 527 movl %eax,%ecx 528 529 /* 530 * If any modules were loaded, record where they end. 'eblob' is used 531 * later to compute the initial bootstrap tables. 532 */ 533 cmpl $BTINFO_MODULELIST,4(%esi) /* btinfo_common::type */ 534 jne .Lbootinfo_copy 535 536 /* Skip the modules if we won't have enough VA to map them */ 537 movl 12(%esi),%eax /* btinfo_modulelist::endpa */ 538 addl $PGOFSET,%eax /* roundup to a page */ 539 andl $~PGOFSET,%eax 540 cmpl $BOOTMAP_VA_SIZE,%eax 541 jg .Lbootinfo_skip 542 movl %eax,RELOC(eblob) 543 addl $KERNBASE_LO,RELOC(eblob) 544 adcl $KERNBASE_HI,RELOC(eblob)+4 545 546 .Lbootinfo_copy: 547 rep 548 movsb /* copy esi -> edi */ 549 jmp .Lbootinfo_next 550 551 .Lbootinfo_skip: 552 subl %ecx,%edx /* revert dest pointer */ 553 554 .Lbootinfo_next: 555 popl %eax 556 popl %esi 557 popl %edi 558 subl $1,%ebx /* decrement the # of entries */ 559 jmp .Lbootinfo_entryloop 560 561 .Lbootinfo_overflow: 562 /* 563 * Cleanup for overflow case. Pop the registers, and correct the number 564 * of entries. 565 */ 566 popl %eax 567 popl %esi 568 popl %edi 569 movl $RELOC(bootinfo),%ebp 570 movl %ebp,%edx 571 subl %ebx,(%edx) /* correct the number of entries */ 572 .Lbootinfo_finished: 573 574 /* Load 'esym' */ 575 movl 16(%esp),%eax 576 testl %eax,%eax /* esym = NULL? */ 577 jz 1f 578 579 addl $KERNBASE_LO,%eax 580 581 1: 582 movl $RELOC(esym),%ebp 583 movl %eax,(%ebp) 584 movl $KERNBASE_HI,4(%ebp) 585 586 /* Load 'biosextmem' */ 587 movl $RELOC(biosextmem),%ebp 588 movl (%ebp),%eax 589 testl %eax,%eax /* already set? */ 590 jnz .Lbiosextmem_finished 591 592 movl 20(%esp),%eax 593 movl %eax,(%ebp) 594 595 .Lbiosextmem_finished: 596 /* Load 'biosbasemem' */ 597 movl $RELOC(biosbasemem),%ebp 598 movl (%ebp),%eax 599 testl %eax,%eax /* already set? */ 600 jnz .Lbiosbasemem_finished 601 602 movl 24(%esp),%eax 603 movl %eax,(%ebp) 604 605 .Lbiosbasemem_finished: 606 /* 607 * Done with the parameters! 608 */ 609 610 /* First, reset the PSL. */ 611 pushl $PSL_MBO 612 popfl 613 614 xorl %eax,%eax 615 cpuid 616 movl %eax,RELOC(cpuid_level) 617 618 /* 619 * Finished with old stack; load new %esp now instead of later so we 620 * can trace this code without having to worry about the trace trap 621 * clobbering the memory test or the zeroing of the bss+bootstrap page 622 * tables. 623 * 624 * The boot program should check: 625 * text+data <= &stack_variable - more_space_for_stack 626 * text+data+bss+pad+space_for_page_tables <= end_of_memory 627 * 628 * XXX: the gdt is in the carcass of the boot program so clearing 629 * the rest of memory is still not possible. 630 */ 631 movl $RELOC(tmpstk),%esp 632 633 /* 634 * Retrieve the NX/XD flag. We use the 32bit version of PTE_NX. 635 */ 636 movl $0x80000001,%eax 637 cpuid 638 andl $CPUID_NOX,%edx 639 jz .Lno_NOX 640 movl $PTE_NX32,RELOC(nox_flag) 641 .Lno_NOX: 642 643 /* 644 * There are four levels of pages in amd64: PML4 -> PDP -> PD -> PT. They will 645 * be referred to as: L4 -> L3 -> L2 -> L1. 646 * 647 * Virtual address space of the kernel: 648 * +------+--------+------+-----+--------+---------------------+---------- 649 * | TEXT | RODATA | DATA | BSS | [SYMS] | [PRELOADED MODULES] | L4 -> 650 * +------+--------+------+-----+--------+---------------------+---------- 651 * (1) (2) (3) 652 * 653 * --------------+-----+-----+----+-------------+ 654 * -> PROC0 STK -> L3 -> L2 -> L1 | ISA I/O MEM | 655 * --------------+-----+-----+----+-------------+ 656 * (4) 657 * 658 * PROC0 STK is obviously not linked as a page level. It just happens to be 659 * caught between L4 and L3. 660 * 661 * (PROC0 STK + L4 + L3 + L2 + L1) is later referred to as BOOTSTRAP TABLES. 662 * 663 * ISA I/O MEM has no physical page allocated here, just virtual addresses. 664 * 665 * Important note: the kernel segments are properly 4k-aligned 666 * (see kern.ldscript), so there's no need to enforce alignment. 667 */ 668 669 /* Find end of kernel image; brings us on (1). */ 670 movl $RELOC(__kernel_end),%edi 671 672 #if (NKSYMS || defined(DDB) || defined(MODULAR)) && !defined(makeoptions_COPY_SYMTAB) 673 /* Save the symbols (if loaded); brings us on (2). */ 674 movl RELOC(esym),%eax 675 testl %eax,%eax 676 jz 1f 677 subl $KERNBASE_LO,%eax /* XXX */ 678 movl %eax,%edi 679 1: 680 #endif 681 /* Skip over any modules/blobs; brings us on (3). */ 682 movl RELOC(eblob),%eax 683 testl %eax,%eax 684 jz 1f 685 subl $KERNBASE_LO,%eax /* XXX */ 686 movl %eax,%edi 687 1: 688 689 /* We are on (3). Align up for BOOTSTRAP TABLES. */ 690 movl %edi,%esi 691 addl $PGOFSET,%esi 692 andl $~PGOFSET,%esi 693 694 /* We are on the BOOTSTRAP TABLES. Save L4's physical address. */ 695 movl $RELOC(PDPpaddr),%ebp 696 movl %esi,(%ebp) 697 movl $0,4(%ebp) 698 699 /* Now, zero out the BOOTSTRAP TABLES (before filling them in). */ 700 movl %esi,%edi 701 xorl %eax,%eax 702 cld 703 movl $TABLESIZE,%ecx 704 shrl $2,%ecx 705 rep 706 stosl /* copy eax -> edi */ 707 708 /* 709 * Build the page tables and levels. We go from L1 to L4, and link the levels 710 * together. Note: RELOC computes &addr - KERNBASE in 32 bits; the value can't 711 * be > 4G, or we can't deal with it anyway, since we are in 32bit mode. 712 */ 713 /* 714 * Build L1. 715 */ 716 leal (PROC0_PTP1_OFF)(%esi),%ebx 717 718 /* Skip the area below the kernel text. */ 719 movl $(KERNTEXTOFF_LO - KERNBASE_LO),%ecx 720 shrl $PGSHIFT,%ecx 721 fillkpt_blank 722 723 /* Map the kernel text RX. */ 724 movl $(KERNTEXTOFF_LO - KERNBASE_LO),%eax /* start of TEXT */ 725 movl $RELOC(__rodata_start),%ecx 726 subl %eax,%ecx 727 shrl $PGSHIFT,%ecx 728 orl $(PTE_P),%eax 729 fillkpt 730 731 /* Map the kernel rodata R. */ 732 movl $RELOC(__rodata_start),%eax 733 movl $RELOC(__data_start),%ecx 734 subl %eax,%ecx 735 shrl $PGSHIFT,%ecx 736 orl $(PTE_P),%eax 737 fillkpt_nox 738 739 /* Map the kernel data+bss RW. */ 740 movl $RELOC(__data_start),%eax 741 movl $RELOC(__kernel_end),%ecx 742 subl %eax,%ecx 743 shrl $PGSHIFT,%ecx 744 orl $(PTE_P|PTE_W),%eax 745 fillkpt_nox 746 747 /* Map [SYMS]+[PRELOADED MODULES] RW. */ 748 movl $RELOC(__kernel_end),%eax 749 movl %esi,%ecx /* start of BOOTSTRAP TABLES */ 750 subl %eax,%ecx 751 shrl $PGSHIFT,%ecx 752 orl $(PTE_P|PTE_W),%eax 753 fillkpt_nox 754 755 /* Map the BOOTSTRAP TABLES RW. */ 756 movl %esi,%eax /* start of BOOTSTRAP TABLES */ 757 movl $TABLESIZE,%ecx /* length of BOOTSTRAP TABLES */ 758 shrl $PGSHIFT,%ecx 759 orl $(PTE_P|PTE_W),%eax 760 fillkpt_nox 761 762 /* We are on (4). Map ISA I/O MEM RW. */ 763 movl $IOM_BEGIN,%eax 764 movl $IOM_SIZE,%ecx /* size of ISA I/O MEM */ 765 shrl $PGSHIFT,%ecx 766 orl $(PTE_P|PTE_W/*|PTE_PCD*/),%eax 767 fillkpt_nox 768 769 /* 770 * Build L2. Linked to L1. 771 */ 772 leal (PROC0_PTP2_OFF)(%esi),%ebx 773 leal (PROC0_PTP1_OFF)(%esi),%eax 774 orl $(PTE_P|PTE_W),%eax 775 movl $(NKL2_KIMG_ENTRIES+1),%ecx 776 fillkpt 777 778 #if L2_SLOT_KERNBASE > 0 779 /* If needed, set up level 2 entries for actual kernel mapping */ 780 leal (PROC0_PTP2_OFF + L2_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx 781 leal (PROC0_PTP1_OFF)(%esi),%eax 782 orl $(PTE_P|PTE_W),%eax 783 movl $(NKL2_KIMG_ENTRIES+1),%ecx 784 fillkpt 785 #endif 786 787 /* 788 * Build L3. Linked to L2. 789 */ 790 leal (PROC0_PTP3_OFF)(%esi),%ebx 791 leal (PROC0_PTP2_OFF)(%esi),%eax 792 orl $(PTE_P|PTE_W),%eax 793 movl $NKL3_KIMG_ENTRIES,%ecx 794 fillkpt 795 796 #if L3_SLOT_KERNBASE > 0 797 /* If needed, set up level 3 entries for actual kernel mapping */ 798 leal (PROC0_PTP3_OFF + L3_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx 799 leal (PROC0_PTP2_OFF)(%esi),%eax 800 orl $(PTE_P|PTE_W),%eax 801 movl $NKL3_KIMG_ENTRIES,%ecx 802 fillkpt 803 #endif 804 805 /* 806 * Build L4 for identity mapping. Linked to L3. 807 */ 808 leal (PROC0_PML4_OFF)(%esi),%ebx 809 leal (PROC0_PTP3_OFF)(%esi),%eax 810 orl $(PTE_P|PTE_W),%eax 811 movl $NKL4_KIMG_ENTRIES,%ecx 812 fillkpt 813 814 /* Set up L4 entries for actual kernel mapping */ 815 leal (PROC0_PML4_OFF + L4_SLOT_KERNBASE * PDE_SIZE)(%esi),%ebx 816 leal (PROC0_PTP3_OFF)(%esi),%eax 817 orl $(PTE_P|PTE_W),%eax 818 movl $NKL4_KIMG_ENTRIES,%ecx 819 fillkpt 820 821 /* 822 * Startup checklist: 823 * 1. Enable PAE (and SSE while here). 824 */ 825 movl %cr4,%eax 826 orl $(CR4_PAE|CR4_OSFXSR|CR4_OSXMMEXCPT),%eax 827 movl %eax,%cr4 828 829 /* 830 * 2. Set Long Mode Enable in EFER. Also enable the syscall extensions, 831 * and NOX if available. 832 */ 833 movl $MSR_EFER,%ecx 834 rdmsr 835 xorl %eax,%eax /* XXX */ 836 orl $(EFER_LME|EFER_SCE),%eax 837 movl RELOC(nox_flag),%ebx 838 cmpl $0,%ebx 839 je .Lskip_NOX 840 orl $(EFER_NXE),%eax 841 .Lskip_NOX: 842 wrmsr 843 844 /* 845 * 3. Load %cr3 with pointer to PML4. 846 */ 847 movl %esi,%eax 848 movl %eax,%cr3 849 850 /* 851 * 4. Enable paging and the rest of it. 852 */ 853 movl %cr0,%eax 854 orl $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_MP|CR0_WP|CR0_AM),%eax 855 movl %eax,%cr0 856 jmp compat 857 compat: 858 859 movl $RELOC(tmpstk),%esp 860 /* 861 * 5. Not quite done yet, we're now in a compatibility segment, in 862 * legacy mode. We must jump to a long mode segment. Need to set up 863 * a temporary GDT with a long mode segment in it to do that. 864 */ 865 movl $RELOC(gdt64_lo),%eax 866 lgdt (%eax) 867 movl $RELOC(farjmp64),%eax 868 ljmp *(%eax) 869 870 .code64 871 longmode: 872 /* 873 * 6. Finally, we're in long mode. However, we're still in the identity 874 * mapped area (could not jump out of that earlier because it would 875 * have been a > 32bit jump). We can do that now, so here we go. 876 */ 877 movabsq $longmode_hi,%rax 878 jmp *%rax 879 880 longmode_hi: 881 882 /* 883 * We left the identity mapped area. Base address of 884 * the temporary gdt64 should now be in high memory. 885 */ 886 movq $RELOC(gdt64_hi),%rax 887 lgdt (%rax) 888 889 /* 890 * We have arrived. There's no need anymore for the identity mapping in 891 * low memory, remove it. 892 */ 893 movq $KERNBASE,%r8 894 895 #if L2_SLOT_KERNBASE > 0 896 movq $(NKL2_KIMG_ENTRIES+1),%rcx 897 leaq (PROC0_PTP2_OFF)(%rsi),%rbx /* old, phys address */ 898 addq %r8,%rbx /* new, virt address */ 899 killkpt 900 #endif 901 902 #if L3_SLOT_KERNBASE > 0 903 movq $NKL3_KIMG_ENTRIES,%rcx 904 leaq (PROC0_PTP3_OFF)(%rsi),%rbx /* old, phys address */ 905 addq %r8,%rbx /* new, virt address */ 906 killkpt 907 #endif 908 909 movq $NKL4_KIMG_ENTRIES,%rcx 910 leaq (PROC0_PML4_OFF)(%rsi),%rbx /* old, phys address of PML4 */ 911 addq %r8,%rbx /* new, virt address of PML4 */ 912 killkpt 913 914 /* Relocate atdevbase. */ 915 movq $(TABLESIZE+KERNBASE),%rdx 916 addq %rsi,%rdx 917 movq %rdx,_C_LABEL(atdevbase)(%rip) 918 919 /* Set up bootstrap stack. */ 920 leaq (PROC0_STK_OFF)(%rsi),%rax 921 addq %r8,%rax 922 movq %rax,_C_LABEL(lwp0uarea)(%rip) 923 leaq (USPACE-FRAMESIZE)(%rax),%rsp 924 xorq %rbp,%rbp /* mark end of frames */ 925 926 xorw %ax,%ax 927 movw %ax,%gs 928 movw %ax,%fs 929 930 /* The first physical page available. */ 931 leaq (TABLESIZE)(%rsi),%rdi 932 933 #else /* XENPV */ 934 /* First, reset the PSL. */ 935 pushq $2 936 popfq 937 938 cld 939 940 /* 941 * Xen info: 942 * - %rsi -> start_info struct 943 * - %rsp -> stack, *theoretically* the last used page by Xen bootstrap 944 */ 945 movq %rsi,%rbx 946 947 /* Clear BSS. */ 948 xorq %rax,%rax 949 movq $_C_LABEL(__bss_start),%rdi 950 movq $_C_LABEL(_end),%rcx 951 subq %rdi,%rcx 952 rep 953 stosb 954 955 /* Copy start_info to a safe place. */ 956 movq %rbx,%rsi 957 movq $_C_LABEL(start_info_union),%rdi 958 movq $(PAGE_SIZE / 8),%rcx 959 rep 960 movsq 961 962 /* 963 * Memory layout at start of the day: 964 * - Kernel image 965 * - Page frames list 966 * - start_info struct. we copied it, so it can be recycled. 967 * - xenstore 968 * - console 969 * - Xen bootstrap page tables 970 * - kernel stack. provided by Xen 971 * - guaranteed 512kB padding 972 * 973 * As we want to rebuild our page tables and place our stack 974 * in proc0 struct, all data starting from after console can be 975 * discarded after we've done a little setup. 976 */ 977 978 /* 979 * We want our own page tables, and will rebuild them. We will reclaim 980 * the Xen space later, INCLUDING the stack. So we need to switch to a 981 * temporary one now. 982 */ 983 movq $tmpstk,%rax 984 subq $8,%rax 985 movq %rax,%rsp 986 987 xorl %eax,%eax 988 cpuid 989 movl %eax,_C_LABEL(cpuid_level) 990 991 movl $VM_GUEST_XENPV, _C_LABEL(vm_guest) 992 993 /* 994 * Initialize cpu_info_primary.ci_self := &cpu_info_primary, 995 * and initialize some MSRs with 996 * cpu_init_msrs(&cpu_info_primary, full=true). This sets up 997 * SYSCALL/SYSRET (XXX why?) and %fs/%gs, which is needed for 998 * the %gs-relative addressing used by CPUVAR(...), curcpu(), 999 * and curlwp. 1000 * 1001 * XXX Is it necessary to set cpu_info_primary.ci_self here? 1002 * Isn't it statically initialized in x86/cpu.c? 1003 * 1004 * XXX Why do we immediately clear the segment registers just 1005 * afterward? 1006 */ 1007 movq $cpu_info_primary,%rdi 1008 movq %rdi,CPU_INFO_SELF(%rdi) /* ci->ci_self = ci */ 1009 movq $1,%rsi 1010 call cpu_init_msrs /* cpu_init_msrs(ci, true); */ 1011 1012 call xen_locore 1013 1014 /* 1015 * The first VA available is returned by xen_locore in %rax. We 1016 * use it as the UAREA, and set up the stack here. 1017 */ 1018 movq %rax,%rsi 1019 movq %rsi,_C_LABEL(lwp0uarea)(%rip) 1020 leaq (USPACE-FRAMESIZE)(%rsi),%rsp 1021 xorq %rbp,%rbp 1022 1023 /* Clear segment registers. */ 1024 xorw %ax,%ax 1025 movw %ax,%gs 1026 movw %ax,%fs 1027 1028 /* Set first_avail after the DUMMY PAGE (see xen_locore). */ 1029 movq %rsi,%rdi 1030 addq $(USPACE+PAGE_SIZE),%rdi 1031 subq $KERNBASE,%rdi /* init_x86_64 wants a physical address */ 1032 #endif /* XENPV */ 1033 1034 pushq %rdi 1035 call _C_LABEL(init_bootspace) 1036 #ifdef KASAN 1037 movq _C_LABEL(lwp0uarea)(%rip),%rdi 1038 call _C_LABEL(kasan_early_init) 1039 #endif 1040 /* <-- DO NOT INSERT C CALLS BEFORE THIS POINT --> */ 1041 #if defined(XEN) && !defined(XENPV) 1042 call _C_LABEL(init_xen_early) 1043 #endif 1044 call _C_LABEL(init_slotspace) 1045 popq %rdi 1046 call _C_LABEL(init_x86_64) 1047 call _C_LABEL(main) 1048 END(start) 1049 1050 #if defined(XEN) 1051 # if !defined(XENPV) 1052 /* entry point for Xen PVH */ 1053 .code32 1054 ENTRY(start_pvh) 1055 #ifdef BOOT_DURATION 1056 getstarttsc 1057 #endif 1058 /* Xen doesn't start us with a valid gdt */ 1059 movl $RELOC(gdtdesc32), %eax 1060 lgdt (%eax) 1061 jmp $GSEL(GCODE_SEL, SEL_KPL), $RELOC(.Lreload_cs) 1062 1063 .Lreload_cs: 1064 movw $GSEL(GDATA_SEL, SEL_KPL), %ax 1065 movw %ax, %ds 1066 movw %ax, %es 1067 movw %ax, %ss 1068 1069 /* we need a valid stack */ 1070 movl $RELOC(tmpstk),%esp 1071 1072 /* clear BSS */ 1073 xorl %eax,%eax 1074 movl $RELOC(__bss_start),%edi 1075 movl $RELOC(_end),%ecx 1076 subl %edi,%ecx 1077 rep 1078 stosb 1079 1080 /* 1081 * Here, we have 2 cases : 1082 * 1083 * 1) We have been started by Xen 1084 * 2) We have been started by another VMM (Qemu, Firecracker, ...) 1085 * 1086 * The main difference is that, when we are started by Xen, 1087 * %ebx (addr of the hvm_start_info structure) is pointing to a 1088 * location that will be mapped correctly later. 1089 * 1090 * In the second case, we have to copy this structure (and all 1091 * the information contained in it) to a location that will be 1092 * mapped later : __kernel_end 1093 * 1094 * To distinguish between the 2 cases, we'll use the 'cpuid' instruction 1095 */ 1096 push %ebx 1097 xorl %eax, %eax 1098 cpuid 1099 cmpl $0x1, %eax /* Check if we can call CPUID with eax=1 */ 1100 jb .start_genpvh 1101 xorl %eax, %eax 1102 inc %eax 1103 cpuid 1104 shr $31, %ecx 1105 testb $1, %cl /* Check if bit 31 of ECX (hypervisor) is set */ 1106 jz .start_genpvh 1107 xorl %eax, %eax 1108 inc %eax 1109 shl $30, %eax 1110 cpuid /* Calling cpuid with eax=0x40000000 */ 1111 cmp $XEN_CPUID_SIGNATURE_EBX, %ebx /* "VneX" */ 1112 je .start_xen32 1113 1114 /* We have been started by a VMM that is *not* Xen */ 1115 1116 .start_genpvh: 1117 1118 /* announce ourself */ 1119 movl $VM_GUEST_GENPVH, RELOC(vm_guest) 1120 1121 pop %ebx 1122 movl $RELOC(__kernel_end), %eax 1123 movl %eax, %ecx 1124 addl $KERNBASE_LO,%ecx 1125 movl $RELOC(esym),%ebp 1126 movl %ecx,(%ebp) 1127 movl $KERNBASE_HI,4(%ebp) 1128 1129 jmp .copy_hvm_info 1130 1131 .start_xen32: 1132 movl $VM_GUEST_XENPVH, RELOC(vm_guest) 1133 /* 1134 * Read the size of the symbol table, sanity-check and compute the end 1135 * We have: 1136 * | kernel | 1137 * -------------- kernel_end 1138 * alignment 1139 * -------------- bsd_symtab 1140 * | size (int) | 1141 * | elf_header | 1142 * 1143 */ 1144 movl $RELOC(__kernel_end), %ebp 1145 addl $3, %ebp 1146 andl $~3, %ebp 1147 movl 0(%ebp), %eax /* read size */ 1148 testl $~0x00ffffff, %eax /* more than 16MB ? */ 1149 jnz .bad_esym 1150 addl %ebp, %eax /* compute esym */ 1151 /* check if start_info is within symbol table */ 1152 movl 0(%esp), %ebx 1153 cmp %ebp, %ebx 1154 jb .save_esym /* %ebx < __kernel_end */ 1155 cmp %eax, %ebx 1156 jae .save_esym /* %ebx >= esym */ 1157 1158 .bad_esym: 1159 movl $RELOC(__kernel_end), %eax 1160 .save_esym: 1161 movl %eax, %ebx 1162 addl $KERNBASE_LO,%ebx 1163 movl $RELOC(esym),%ebp 1164 movl %ebx,(%ebp) 1165 movl $KERNBASE_HI,4(%ebp) 1166 /* advance to next page boundary, this will be our hvm_start_info */ 1167 addl $PGOFSET,%eax 1168 andl $~PGOFSET,%eax 1169 pop %ebx 1170 1171 .copy_hvm_info: 1172 /* 1173 * %ebx points to physical address provided by Xen 1174 * %eax points to where we want it to be copied to 1175 */ 1176 /* check if %ebx and %eax are in the same page */ 1177 movl %ebx, %esi 1178 addl $PGOFSET,%esi 1179 andl $~PGOFSET,%esi 1180 cmp %esi, %eax 1181 je .same_hvm_info 1182 1183 /* First, copy the hvm_start_info structure to %eax */ 1184 movl %ebx, %esi 1185 movl %eax, %edi 1186 movl $HVM_START_INFO_SIZE, %ecx 1187 shrl $2, %ecx 1188 rep movsl 1189 1190 /* Copy cmdline_paddr after hvm_start_info */ 1191 movl CMDLINE_PADDR(%ebx), %esi 1192 movl %edi, CMDLINE_PADDR(%eax) /* Set new cmdline_paddr in hvm_start_info */ 1193 .cmdline_copy: 1194 movb (%esi), %cl 1195 movsb 1196 cmp $0, %cl 1197 jne .cmdline_copy 1198 1199 /* Copy memmap_paddr after cmdline (only if hvm_start_info->version != 0) */ 1200 xorl %ecx, %ecx 1201 cmpl START_INFO_VERSION(%ebx), %ecx 1202 je .save_hvm_info 1203 pushl %eax 1204 movl MMAP_PADDR(%ebx), %esi 1205 movl %edi, MMAP_PADDR(%eax) /* Set new memmap_paddr in hvm_start_info */ 1206 movl MMAP_ENTRIES(%ebx), %eax /* Get memmap_entries */ 1207 movl $MMAP_ENTRY_SIZE, %ebx 1208 mull %ebx /* eax * ebx => edx:eax */ 1209 movl %eax, %ecx 1210 shrl $2, %ecx 1211 rep movsl 1212 popl %eax 1213 1214 .save_hvm_info: 1215 /* 1216 * %eax points to the start of hvm_start_info 1217 * %edi points to the end 1218 */ 1219 addl $KERNBASE_LO,%eax 1220 movl $RELOC(hvm_start_info),%ebp 1221 movl %eax,(%ebp) 1222 movl $KERNBASE_HI,4(%ebp) 1223 1224 /* round end to next page boundary */ 1225 addl $PGOFSET,%edi 1226 andl $~PGOFSET,%edi 1227 1228 /* get a page for HYPERVISOR_shared_info */ 1229 /* this is only needed if we are running on Xen */ 1230 cmpl $VM_GUEST_XENPVH, RELOC(vm_guest) 1231 jne .save_eblob 1232 movl $RELOC(HYPERVISOR_shared_info_pa),%ebp 1233 movl %edi,(%ebp) 1234 movl $0,4(%ebp) 1235 addl $PAGE_SIZE, %edi 1236 .save_eblob: 1237 addl $KERNBASE_LO,%edi 1238 movl $RELOC(eblob),%ebp 1239 movl %edi,(%ebp) 1240 movl $KERNBASE_HI,4(%ebp) 1241 1242 jmp .Lbiosbasemem_finished 1243 1244 .same_hvm_info: 1245 /* just use the provided %ebx */ 1246 /* XXX assume hvm_start_info+dependant structure fits in a single page */ 1247 movl %ebx, %eax 1248 movl %ebx, %edi 1249 addl $PAGE_SIZE, %edi 1250 jmp .save_hvm_info 1251 END(start_pvh) 1252 .code64 1253 # endif /* !XENPV */ 1254 /* space for the hypercall call page */ 1255 #define HYPERCALL_PAGE_OFFSET 0x1000 1256 .align HYPERCALL_PAGE_OFFSET 1257 ENTRY(hypercall_page) /* Returns -1, on HYPERVISOR_xen_version() */ 1258 .skip (__HYPERVISOR_xen_version*32), 0x90 1259 movq $-1, %rax 1260 retq 1261 .align HYPERCALL_PAGE_OFFSET, 0x90 1262 END(hypercall_page) 1263 #endif /* XEN */ 1264 1265 /* 1266 * int setjmp(label_t *) 1267 * 1268 * Used primarily by DDB. 1269 */ 1270 ENTRY(setjmp) 1271 /* 1272 * Only save registers that must be preserved across function 1273 * calls according to the ABI (%rbx, %rsp, %rbp, %r12-%r15) 1274 * and %rip. 1275 */ 1276 movq %rdi,%rax 1277 movq %rbx,(%rax) 1278 movq %rsp,8(%rax) 1279 movq %rbp,16(%rax) 1280 movq %r12,24(%rax) 1281 movq %r13,32(%rax) 1282 movq %r14,40(%rax) 1283 movq %r15,48(%rax) 1284 movq (%rsp),%rdx 1285 movq %rdx,56(%rax) 1286 xorl %eax,%eax 1287 ret 1288 END(setjmp) 1289 1290 /* 1291 * int longjmp(label_t *) 1292 * 1293 * Used primarily by DDB. 1294 */ 1295 ENTRY(longjmp) 1296 movq %rdi,%rax 1297 movq (%rax),%rbx 1298 movq 8(%rax),%rsp 1299 movq 16(%rax),%rbp 1300 movq 24(%rax),%r12 1301 movq 32(%rax),%r13 1302 movq 40(%rax),%r14 1303 movq 48(%rax),%r15 1304 movq 56(%rax),%rdx 1305 movq %rdx,(%rsp) 1306 movl $1,%eax 1307 ret 1308 END(longjmp) 1309 1310 /* 1311 * void dumpsys(void) 1312 * 1313 * Mimic cpu_switchto() for postmortem debugging. 1314 */ 1315 ENTRY(dumpsys) 1316 /* Build a fake switch frame. */ 1317 pushq %rbx 1318 pushq %r12 1319 pushq %r13 1320 pushq %r14 1321 pushq %r15 1322 1323 /* Save a context. */ 1324 movq $dumppcb, %rax 1325 movq %rsp, PCB_RSP(%rax) 1326 movq %rbp, PCB_RBP(%rax) 1327 1328 call _C_LABEL(dodumpsys) 1329 1330 addq $(5*8), %rsp /* sizeof(switchframe) - sizeof(%rip) */ 1331 ret 1332 END(dumpsys) 1333 1334 /* 1335 * struct lwp *cpu_switchto(struct lwp *oldlwp, struct lwp *newlwp, 1336 * bool returning) 1337 * 1338 * 1. save context of oldlwp. 1339 * 2. restore context of newlwp. 1340 * 1341 * Note that the stack frame layout is known to "struct switchframe" in 1342 * <machine/frame.h> and to the code in cpu_lwp_fork() which initializes 1343 * it for a new lwp. 1344 */ 1345 ENTRY(cpu_switchto) 1346 pushq %rbx 1347 pushq %r12 1348 pushq %r13 1349 pushq %r14 1350 pushq %r15 1351 1352 movq %rdi,%r13 /* oldlwp */ 1353 movq %rsi,%r12 /* newlwp */ 1354 1355 /* Save old context. */ 1356 movq L_PCB(%r13),%rax 1357 movq %rsp,PCB_RSP(%rax) 1358 movq %rbp,PCB_RBP(%rax) 1359 1360 /* Switch to newlwp's stack. */ 1361 movq L_PCB(%r12),%r14 1362 movq PCB_RSP(%r14),%rsp 1363 movq PCB_RBP(%r14),%rbp 1364 1365 /* 1366 * Issue XCHG, rather than MOV, to set ci_curlwp := newlwp in 1367 * order to coordinate mutex_exit on this CPU with 1368 * mutex_vector_enter on another CPU. 1369 * 1370 * 1. Any prior mutex_exit by oldlwp must be visible to other 1371 * CPUs before we set ci_curlwp := newlwp on this one, 1372 * requiring a store-before-store barrier. 1373 * 1374 * (This is always guaranteed by the x86 memory model, TSO, 1375 * but other architectures require a explicit barrier before 1376 * the store to ci->ci_curlwp.) 1377 * 1378 * 2. ci_curlwp := newlwp must be visible on all other CPUs 1379 * before any subsequent mutex_exit by newlwp can even test 1380 * whether there might be waiters, requiring a 1381 * store-before-load barrier. 1382 * 1383 * (This is the only ordering x86 TSO ever requires any kind 1384 * of barrier for -- in this case, we take advantage of the 1385 * sequential consistency implied by XCHG to obviate the 1386 * need for MFENCE or something.) 1387 * 1388 * See kern_mutex.c for details -- this is necessary for 1389 * adaptive mutexes to detect whether the lwp is on the CPU in 1390 * order to safely block without requiring atomic r/m/w in 1391 * mutex_exit. 1392 */ 1393 movq %r12,%rcx 1394 xchgq %rcx,CPUVAR(CURLWP) 1395 1396 #ifdef XENPV 1397 /* if we are there, we're obviously not in user context. 1398 * reset ci_xen_clockf_* in case the splx() at the end of mi_switch() 1399 * triggers a deffered call do xen_timer_handler() 1400 */ 1401 movb $0, CPUVAR(XEN_CLOCKF_USERMODE) 1402 movq $_C_LABEL(cpu_switchto), CPUVAR(XEN_CLOCKF_PC) 1403 #endif 1404 1405 1406 /* Skip the rest if returning to a pinned LWP. */ 1407 testb %dl,%dl /* returning = true ? */ 1408 jnz .Lswitch_return 1409 1410 #ifdef SVS 1411 movb _C_LABEL(svs_enabled),%dl 1412 testb %dl,%dl 1413 jz .Lskip_svs 1414 callq _C_LABEL(svs_lwp_switch) 1415 .Lskip_svs: 1416 #endif 1417 1418 #ifndef XENPV 1419 movq %r13,%rdi 1420 movq %r12,%rsi 1421 callq _C_LABEL(speculation_barrier) 1422 #endif 1423 1424 /* Switch ring0 stack */ 1425 #ifdef SVS 1426 movb _C_LABEL(svs_enabled),%al 1427 testb %al,%al 1428 jz .Lno_svs_switch 1429 1430 movq CPUVAR(RSP0),%rax 1431 movq CPUVAR(TSS),%rdi 1432 movq %rax,TSS_RSP0(%rdi) 1433 jmp .Lring0_switched 1434 1435 .Lno_svs_switch: 1436 #endif 1437 1438 #if !defined(XENPV) 1439 movq PCB_RSP0(%r14),%rax 1440 movq CPUVAR(TSS),%rdi 1441 movq %rax,TSS_RSP0(%rdi) 1442 #else 1443 movq %r14,%rdi 1444 callq _C_LABEL(x86_64_switch_context) 1445 #endif 1446 .Lring0_switched: 1447 1448 /* Switch the dbregs. */ 1449 movq %r13,%rdi 1450 movq %r12,%rsi 1451 callq _C_LABEL(x86_dbregs_switch) 1452 1453 /* Switch the FPU. */ 1454 movq %r13,%rdi 1455 movq %r12,%rsi 1456 callq _C_LABEL(fpu_switch) 1457 1458 /* Don't bother with the rest if switching to a system process. */ 1459 testl $LW_SYSTEM,L_FLAG(%r12) 1460 jnz .Lswitch_return 1461 1462 /* Is this process using RAS (restartable atomic sequences)? */ 1463 movq L_PROC(%r12),%rdi 1464 cmpq $0,P_RASLIST(%rdi) 1465 je .Lno_RAS 1466 1467 /* Handle restartable atomic sequences (RAS). */ 1468 movq L_MD_REGS(%r12),%rbx 1469 movq TF_RIP(%rbx),%rsi 1470 call _C_LABEL(ras_lookup) 1471 cmpq $-1,%rax 1472 je .Lno_RAS 1473 movq %rax,TF_RIP(%rbx) 1474 .Lno_RAS: 1475 1476 #ifndef XENPV 1477 /* Raise the IPL to IPL_HIGH. Dropping the priority is deferred until 1478 * mi_switch(), when cpu_switchto() returns. XXX Still needed? */ 1479 movb $IPL_HIGH,CPUVAR(ILEVEL) 1480 1481 /* The 32bit LWPs are handled differently. */ 1482 testl $PCB_COMPAT32,PCB_FLAGS(%r14) 1483 jnz .Llwp_32bit 1484 1485 .Llwp_64bit: 1486 /* Set default 64bit values in %ds, %es, %fs and %gs. */ 1487 movq $GSEL(GUDATA_SEL, SEL_UPL),%rax 1488 movw %ax,%ds 1489 movw %ax,%es 1490 xorq %rax,%rax 1491 movw %ax,%fs 1492 CLI(cx) 1493 SWAPGS 1494 movw %ax,%gs 1495 SWAPGS 1496 STI(cx) 1497 1498 /* Zero out GDT descriptors. */ 1499 movq CPUVAR(GDT),%rcx 1500 movq %rax,(GUFS_SEL*8)(%rcx) 1501 movq %rax,(GUGS_SEL*8)(%rcx) 1502 1503 /* Reload 64-bit %fs/%gs MSRs. */ 1504 movl $MSR_FSBASE,%ecx 1505 movl PCB_FS(%r14),%eax 1506 movl 4+PCB_FS(%r14),%edx 1507 wrmsr 1508 movl $MSR_KERNELGSBASE,%ecx 1509 movl PCB_GS(%r14),%eax 1510 movl 4+PCB_GS(%r14),%edx 1511 wrmsr 1512 1513 jmp .Lswitch_return 1514 1515 .Llwp_32bit: 1516 /* Reload %fs/%gs GDT descriptors. */ 1517 movq CPUVAR(GDT),%rcx 1518 movq PCB_FS(%r14),%rax 1519 movq %rax,(GUFS_SEL*8)(%rcx) 1520 movq PCB_GS(%r14),%rax 1521 movq %rax,(GUGS_SEL*8)(%rcx) 1522 1523 /* Set default 32bit values in %ds, %es, %fs and %gs. */ 1524 movq L_MD_REGS(%r12),%rbx 1525 movq $GSEL(GUDATA32_SEL, SEL_UPL),%rax 1526 movw %ax,%ds 1527 movw %ax,%es 1528 movw %ax,%fs 1529 CLI(ax) 1530 SWAPGS 1531 movw %ax,%gs 1532 SWAPGS 1533 STI(ax) 1534 #else 1535 movq %r12,%rdi 1536 callq _C_LABEL(x86_64_tls_switch) 1537 #endif 1538 1539 .Lswitch_return: 1540 /* Return to the new LWP, returning 'oldlwp' in %rax. */ 1541 KMSAN_INIT_RET(8) 1542 movq %r13,%rax 1543 popq %r15 1544 popq %r14 1545 popq %r13 1546 popq %r12 1547 popq %rbx 1548 ret 1549 END(cpu_switchto) 1550 1551 /* 1552 * void savectx(struct pcb *pcb); 1553 * 1554 * Update pcb, saving current processor state. 1555 */ 1556 ENTRY(savectx) 1557 /* Save stack pointers. */ 1558 movq %rsp,PCB_RSP(%rdi) 1559 movq %rbp,PCB_RBP(%rdi) 1560 ret 1561 END(savectx) 1562 1563 /* 1564 * Syscall handler. 1565 */ 1566 ENTRY(handle_syscall) 1567 STI(si) 1568 1569 movq CPUVAR(CURLWP),%r14 1570 incq CPUVAR(NSYSCALL) /* count it atomically */ 1571 movq %rsp,L_MD_REGS(%r14) /* save pointer to frame */ 1572 movq L_PROC(%r14),%r15 1573 andl $~MDL_IRET,L_MD_FLAGS(%r14) /* Allow sysret return */ 1574 movq %rsp,%rdi /* Pass frame as arg0 */ 1575 call *P_MD_SYSCALL(%r15) 1576 .Lsyscall_checkast: 1577 /* 1578 * Disable interrupts to avoid new ASTs (etc) being added and 1579 * to ensure we don't take an interrupt with some of the user 1580 * registers loaded. 1581 */ 1582 CLI(si) 1583 /* Check for ASTs on exit to user mode. */ 1584 movl L_MD_ASTPENDING(%r14),%eax 1585 orl CPUVAR(WANT_PMAPLOAD),%eax 1586 jnz 9f 1587 1588 #ifdef DIAGNOSTIC 1589 cmpb $IPL_NONE,CPUVAR(ILEVEL) 1590 jne .Lspl_error 1591 #endif 1592 1593 HANDLE_DEFERRED_FPU 1594 1595 /* 1596 * Decide if we need to take a slow path. That's the case when we 1597 * want to reload %cs and %ss on a 64bit LWP (MDL_IRET set), or when 1598 * we're returning to a 32bit LWP (MDL_COMPAT32 set). 1599 * 1600 * In either case, we jump into intrfastexit and return to userland 1601 * with the iret instruction. 1602 */ 1603 testl $(MDL_IRET|MDL_COMPAT32),L_MD_FLAGS(%r14) 1604 jnz intrfastexit 1605 1606 jmp syscall_sysret 1607 1608 #ifdef DIAGNOSTIC 1609 .Lspl_error: 1610 movabsq $4f,%rdi 1611 movzbl CPUVAR(ILEVEL),%esi 1612 call _C_LABEL(panic) 1613 4: .asciz "spl not lowered on syscall, ilevel=%x" 1614 #endif 1615 1616 /* AST pending or pmap load needed */ 1617 9: 1618 cmpl $0,CPUVAR(WANT_PMAPLOAD) 1619 jz 10f 1620 STI(si) 1621 call _C_LABEL(do_pmap_load) 1622 jmp .Lsyscall_checkast /* re-check ASTs */ 1623 10: 1624 CLEAR_ASTPENDING(%r14) 1625 STI(si) 1626 /* Pushed T_ASTFLT into tf_trapno on entry. */ 1627 movq %rsp,%rdi 1628 KMSAN_INIT_ARG(8) 1629 call _C_LABEL(trap) 1630 jmp .Lsyscall_checkast /* re-check ASTs */ 1631 END(handle_syscall) 1632 1633 /* 1634 * void lwp_trampoline(void); 1635 * 1636 * This is a trampoline function pushed run by newly created LWPs 1637 * in order to do additional setup in their context. 1638 */ 1639 ENTRY(lwp_trampoline) 1640 movq %rbp,%rsi 1641 movq %rbp,%r14 /* for .Lsyscall_checkast */ 1642 movq %rax,%rdi 1643 xorq %rbp,%rbp 1644 KMSAN_INIT_ARG(16) 1645 call _C_LABEL(lwp_startup) 1646 movq %r13,%rdi 1647 KMSAN_INIT_ARG(8) 1648 call *%r12 1649 jmp .Lsyscall_checkast 1650 END(lwp_trampoline) 1651 1652 /* 1653 * Entry points of the 'syscall' instruction, 64bit and 32bit mode. 1654 */ 1655 1656 #define SP(x) (x)-(TF_SS+8)(%rax) 1657 1658 .macro SYSCALL_ENTRY name,is_svs 1659 IDTVEC(\name) 1660 #ifndef XENPV 1661 /* 1662 * The user %rip is in %rcx and the user %rflags in %r11. The kernel %cs 1663 * and %ss are loaded, but nothing else is. 1664 * 1665 * The 'swapgs' instruction gives us access to cpu-specific memory where 1666 * we can save a user register and then read the LWP's kernel stack 1667 * pointer. 1668 * 1669 * This code doesn't seem to set %ds, this may not matter since it is 1670 * ignored in 64bit mode, OTOH the syscall instruction sets %ss and that 1671 * is ignored as well. 1672 */ 1673 swapgs 1674 1675 /* Get the LWP's kernel stack pointer in %rax */ 1676 .if \is_svs 1677 movabs %rax,SVS_UTLS+UTLS_SCRATCH 1678 movabs SVS_UTLS+UTLS_RSP0,%rax 1679 .else 1680 movq %rax,CPUVAR(SCRATCH) 1681 movq CPUVAR(CURLWP),%rax 1682 movq L_PCB(%rax),%rax 1683 movq PCB_RSP0(%rax),%rax 1684 .endif 1685 1686 /* Make stack look like an 'int nn' frame */ 1687 movq $(LSEL(LUDATA_SEL, SEL_UPL)),SP(TF_SS) /* user %ss */ 1688 movq %rsp,SP(TF_RSP) /* user %rsp */ 1689 movq %r11,SP(TF_RFLAGS) /* user %rflags */ 1690 movq $(LSEL(LUCODE_SEL, SEL_UPL)),SP(TF_CS) /* user %cs */ 1691 movq %rcx,SP(TF_RIP) /* user %rip */ 1692 leaq SP(0),%rsp /* %rsp now valid after frame */ 1693 1694 /* Restore %rax */ 1695 .if \is_svs 1696 movabs SVS_UTLS+UTLS_SCRATCH,%rax 1697 .else 1698 movq CPUVAR(SCRATCH),%rax 1699 .endif 1700 1701 movq $2,TF_ERR(%rsp) /* syscall instruction size */ 1702 movq $T_ASTFLT,TF_TRAPNO(%rsp) 1703 #else 1704 /* 1705 * Xen already switched to kernel stack. 1706 * But it didn't disable events 1707 */ 1708 pushq %rsi 1709 CLI(si) 1710 popq %rsi 1711 addq $0x10,%rsp /* gap to match cs:rip */ 1712 pushq $2 /* error code */ 1713 pushq $T_ASTFLT 1714 subq $TF_REGSIZE,%rsp 1715 cld 1716 #endif 1717 INTR_SAVE_GPRS 1718 IBRS_ENTER 1719 movw $GSEL(GUDATA_SEL, SEL_UPL),TF_DS(%rsp) 1720 movw $GSEL(GUDATA_SEL, SEL_UPL),TF_ES(%rsp) 1721 movw $0,TF_FS(%rsp) 1722 movw $0,TF_GS(%rsp) 1723 .if \is_svs 1724 SVS_ENTER 1725 .endif 1726 KMSAN_ENTER 1727 jmp handle_syscall 1728 IDTVEC_END(\name) 1729 .endm 1730 1731 SYSCALL_ENTRY syscall,is_svs=0 1732 1733 TEXT_USER_BEGIN 1734 1735 #ifdef SVS 1736 SYSCALL_ENTRY syscall_svs,is_svs=1 1737 #endif 1738 1739 IDTVEC(syscall32) 1740 sysretl /* go away please */ 1741 IDTVEC_END(syscall32) 1742 1743 TEXT_USER_END 1744 1745 /* 1746 * osyscall() 1747 * 1748 * Trap gate entry for int $80 syscall, also used by sigreturn. 1749 */ 1750 TEXT_USER_BEGIN 1751 IDTVEC(osyscall) 1752 #ifdef XENPV 1753 pushq %rsi 1754 CLI(si) 1755 popq %rsi 1756 movq (%rsp),%rcx 1757 movq 8(%rsp),%r11 1758 addq $0x10,%rsp 1759 #endif 1760 pushq $2 /* size of instruction for restart */ 1761 pushq $T_ASTFLT /* trap # for doing ASTs */ 1762 INTRENTRY 1763 jmp handle_syscall 1764 IDTVEC_END(osyscall) 1765 TEXT_USER_END 1766 1767 /* 1768 * Return to userland via 'sysret'. 1769 */ 1770 TEXT_USER_BEGIN 1771 _ALIGN_TEXT 1772 LABEL(syscall_sysret) 1773 KMSAN_LEAVE 1774 MDS_LEAVE 1775 SVS_LEAVE 1776 IBRS_LEAVE 1777 INTR_RESTORE_GPRS 1778 SWAPGS 1779 #ifndef XENPV 1780 movq TF_RIP(%rsp),%rcx /* %rip for sysret */ 1781 movq TF_RFLAGS(%rsp),%r11 /* %flags for sysret */ 1782 movq TF_RSP(%rsp),%rsp 1783 sysretq 1784 #else 1785 addq $TF_RIP,%rsp 1786 pushq $256 /* VGCF_IN_SYSCALL */ 1787 jmp HYPERVISOR_iret 1788 #endif 1789 END(syscall_sysret) 1790 TEXT_USER_END 1791 1792 TEXT_USER_BEGIN 1793 1794 /* 1795 * In intrfastexit, we advance %rsp at the beginning. We then access the 1796 * segment registers in the trapframe with TF_BACKW (backwards). See the 1797 * documentation in amd64_trap.S for an explanation. 1798 */ 1799 1800 #define TF_BACKW(val, reg) (val - (TF_REGSIZE+16))(reg) 1801 1802 _ALIGN_TEXT 1803 .type intrfastexit,@function 1804 LABEL(intrfastexit) 1805 NOT_XEN(cli;) 1806 KMSAN_LEAVE 1807 1808 testb $SEL_UPL,TF_CS(%rsp) 1809 jz .Lkexit 1810 1811 MDS_LEAVE 1812 SVS_LEAVE 1813 IBRS_LEAVE 1814 INTR_RESTORE_GPRS 1815 addq $(TF_REGSIZE+16),%rsp /* iret frame */ 1816 SWAPGS 1817 1818 cmpw $LSEL(LUCODE_SEL, SEL_UPL),TF_BACKW(TF_CS, %rsp) 1819 je do_iret 1820 cmpw $GSEL(GUCODE_SEL, SEL_UPL),TF_BACKW(TF_CS, %rsp) 1821 je do_iret 1822 #ifdef XENPV 1823 cmpw $FLAT_RING3_CS64,TF_BACKW(TF_CS, %rsp) 1824 je do_iret 1825 #endif 1826 1827 do_mov_es: 1828 movw TF_BACKW(TF_ES, %rsp),%es 1829 do_mov_ds: 1830 movw TF_BACKW(TF_DS, %rsp),%ds 1831 do_mov_fs: 1832 movw TF_BACKW(TF_FS, %rsp),%fs 1833 #ifndef XENPV 1834 do_mov_gs: 1835 movw TF_BACKW(TF_GS, %rsp),%gs 1836 #endif 1837 1838 do_iret: 1839 iretq 1840 1841 .Lkexit: 1842 INTR_RESTORE_GPRS 1843 addq $(TF_REGSIZE+16),%rsp /* iret frame */ 1844 iretq 1845 END(intrfastexit) 1846 1847 TEXT_USER_END 1848 1849 .section .rodata 1850 1851 /* 1852 * Hotpatch templates. 1853 */ 1854 1855 LABEL(hp_nolock) 1856 nop 1857 LABEL(hp_nolock_end) 1858 1859 LABEL(hp_retfence) 1860 lfence 1861 LABEL(hp_retfence_end) 1862 1863 LABEL(hp_clac) 1864 clac 1865 LABEL(hp_clac_end) 1866 1867 LABEL(hp_stac) 1868 stac 1869 LABEL(hp_stac_end) 1870 1871 #ifdef SVS 1872 LABEL(svs_enter) 1873 movabs SVS_UTLS+UTLS_KPDIRPA,%rax 1874 movq %rax,%cr3 1875 movq CPUVAR(KRSP0),%rsp 1876 LABEL(svs_enter_end) 1877 1878 LABEL(svs_enter_altstack) 1879 testb $SEL_UPL,TF_CS(%rsp) 1880 jz 1234f 1881 movabs SVS_UTLS+UTLS_KPDIRPA,%rax 1882 movq %rax,%cr3 1883 1234: 1884 LABEL(svs_enter_altstack_end) 1885 1886 LABEL(svs_enter_nmi) 1887 movq %cr3,%rax 1888 movq %rax,(FRAMESIZE+1*8)(%rsp) /* nmistore->scratch */ 1889 movq (FRAMESIZE+0*8)(%rsp),%rax /* nmistore->cr3 */ 1890 movq %rax,%cr3 1891 LABEL(svs_enter_nmi_end) 1892 1893 LABEL(svs_leave) 1894 movq CPUVAR(URSP0),%rsp 1895 movq CPUVAR(UPDIRPA),%rax 1896 movq %rax,%cr3 1897 LABEL(svs_leave_end) 1898 1899 LABEL(svs_leave_altstack) 1900 testb $SEL_UPL,TF_CS(%rsp) 1901 jz 1234f 1902 movq CPUVAR(UPDIRPA),%rax 1903 movq %rax,%cr3 1904 1234: 1905 LABEL(svs_leave_altstack_end) 1906 1907 LABEL(svs_leave_nmi) 1908 movq (FRAMESIZE+1*8)(%rsp),%rax /* nmistore->scratch */ 1909 movq %rax,%cr3 1910 LABEL(svs_leave_nmi_end) 1911 #endif 1912 1913 /* IBRS <- 1 */ 1914 LABEL(ibrs_enter) 1915 movl $MSR_IA32_SPEC_CTRL,%ecx 1916 rdmsr 1917 orl $IA32_SPEC_CTRL_IBRS,%eax 1918 wrmsr 1919 LABEL(ibrs_enter_end) 1920 1921 /* IBRS <- 0 */ 1922 LABEL(ibrs_leave) 1923 movl $MSR_IA32_SPEC_CTRL,%ecx 1924 rdmsr 1925 andl $~IA32_SPEC_CTRL_IBRS,%eax 1926 wrmsr 1927 LABEL(ibrs_leave_end) 1928 1929 LABEL(noibrs_enter) 1930 NOIBRS_ENTER 1931 LABEL(noibrs_enter_end) 1932 1933 LABEL(noibrs_leave) 1934 NOIBRS_LEAVE 1935 LABEL(noibrs_leave_end) 1936 1937 LABEL(mds_leave) 1938 pushq $GSEL(GDATA_SEL, SEL_KPL) 1939 verw (%rsp) 1940 addq $8,%rsp 1941 LABEL(mds_leave_end) 1942 1943 LABEL(nomds_leave) 1944 NOMDS_LEAVE 1945 LABEL(nomds_leave_end) 1946 1947 #ifdef SELFRELOC 1948 /* 1949 * selfreloc(loadaddr edi) 1950 * This is adapted from sys/arch/i386/i386/locore.S 1951 */ 1952 .code32 1953 ENTRY(selfreloc_start) 1954 movl %edi, %ebx /* loadaddr saved in ebx */ 1955 movl %edi, %esi /* src */ 1956 movl $_RELOC(kernel_text), %edi /* dest */ 1957 movl 16(%esp),%ecx /* esym */ 1958 subl $_RELOC(kernel_text), %ecx /* size */ 1959 1960 #if defined(NO_OVERLAP) 1961 movl %ecx, %eax 1962 #else 1963 movl %edi, %eax 1964 subl %esi, %eax 1965 cmpl %ecx, %eax /* overlapping? */ 1966 movl %ecx, %eax 1967 jb .Lbackwards 1968 #endif 1969 /* nope, copy forwards. */ 1970 shrl $2, %ecx /* copy by words */ 1971 rep 1972 movsl 1973 and $3, %eax /* any bytes left? */ 1974 jnz .Ltrailing 1975 jmp .Lcopy_done 1976 1977 .Ltrailing: 1978 cmp $2, %eax 1979 jb 11f 1980 movw (%esi), %ax 1981 movw %ax, (%edi) 1982 je .Lcopy_done 1983 movb 2(%esi), %al 1984 movb %al, 2(%edi) 1985 jmp .Lcopy_done 1986 11: movb (%esi), %al 1987 movb %al, (%edi) 1988 jmp .Lcopy_done 1989 1990 #if !defined(NO_OVERLAP) 1991 .Lbackwards: 1992 addl %ecx, %edi /* copy backwards. */ 1993 addl %ecx, %esi 1994 and $3, %eax /* any fractional bytes? */ 1995 jnz .Lback_align 1996 .Lback_aligned: 1997 shrl $2, %ecx 1998 subl $4, %esi 1999 subl $4, %edi 2000 std 2001 rep 2002 movsl 2003 cld 2004 jmp .Lcopy_done 2005 2006 .Lback_align: 2007 sub %eax, %esi 2008 sub %eax, %edi 2009 cmp $2, %eax 2010 jb 11f 2011 je 12f 2012 movb 2(%esi), %al 2013 movb %al, 2(%edi) 2014 12: movw (%esi), %ax 2015 movw %ax, (%edi) 2016 jmp .Lback_aligned 2017 11: movb (%esi), %al 2018 movb %al, (%edi) 2019 jmp .Lback_aligned 2020 #endif 2021 /* End of copy kernel */ 2022 .Lcopy_done: 2023 cld /* LynxOS depends on it */ 2024 2025 /* load current selfreloc_start addesss in $edi */ 2026 movl %ebx, %edi /* loadaddr was saved in ebx */ 2027 addl $(selfreloc_start - kernel_text), %edi 2028 2029 /* Prepare jump address */ 2030 lea (selfreloc_start32a - selfreloc_start)(%edi), %eax 2031 movl %eax, (selfreloc_start32r - selfreloc_start)(%edi) 2032 2033 /* Setup GDT */ 2034 lea (gdt - selfreloc_start)(%edi), %eax 2035 mov %eax, (gdtrr - selfreloc_start)(%edi) 2036 lgdt (gdtr - selfreloc_start)(%edi) 2037 2038 /* Jump to set %cs */ 2039 ljmp *(selfreloc_start32r - selfreloc_start)(%edi) 2040 2041 .align 4 2042 selfreloc_start32a: 2043 movl $0x10, %eax /* #define DATA_SEGMENT 0x10 */ 2044 movw %ax, %ds 2045 movw %ax, %es 2046 movw %ax, %fs 2047 movw %ax, %gs 2048 movw %ax, %ss 2049 2050 /* Disable Paging in CR0 */ 2051 movl %cr0, %eax 2052 andl $(~CR0_PG), %eax 2053 movl %eax, %cr0 2054 2055 /* Disable PAE in CR4 */ 2056 movl %cr4, %eax 2057 andl $(~CR4_PAE), %eax 2058 movl %eax, %cr4 2059 2060 jmp selfreloc_start32b 2061 2062 .align 4 2063 selfreloc_start32b: 2064 xor %eax, %eax 2065 movl $_RELOC(start), %esi 2066 jmp *%esi 2067 2068 .align 16 2069 selfreloc_start32r: 2070 .long 0 2071 .long 0x08 /* #define CODE_SEGMENT 0x08 */ 2072 .align 16 2073 gdt: 2074 .long 0, 0 2075 .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00 2076 .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00 2077 gdtr: 2078 .word gdtr - gdt 2079 gdtrr: 2080 .quad 2081 END(selfreloc_start) 2082 #endif /* SELFRELOC */ 2083