Home | History | Annotate | Line # | Download | only in bootx64
startprog64.S revision 1.3.16.2
      1  1.3.16.2  jdolecek /*	$NetBSD: startprog64.S,v 1.3.16.2 2017/12/03 11:36:19 jdolecek Exp $	*/
      2  1.3.16.2  jdolecek /*	NetBSD: startprog.S,v 1.3 2003/02/01 14:48:18 dsl Exp	*/
      3  1.3.16.2  jdolecek 
      4  1.3.16.2  jdolecek /* starts program in protected mode / flat space
      5  1.3.16.2  jdolecek  with given stackframe
      6  1.3.16.2  jdolecek  needs global variables flatcodeseg and flatdataseg
      7  1.3.16.2  jdolecek  (gdt offsets)
      8  1.3.16.2  jdolecek   derived from: NetBSD:sys/arch/i386/boot/asm.S
      9  1.3.16.2  jdolecek  */
     10  1.3.16.2  jdolecek 
     11  1.3.16.2  jdolecek /*
     12  1.3.16.2  jdolecek  * Ported to boot 386BSD by Julian Elischer (julian (at) tfs.com) Sept 1992
     13  1.3.16.2  jdolecek  *
     14  1.3.16.2  jdolecek  * Mach Operating System
     15  1.3.16.2  jdolecek  * Copyright (c) 1992, 1991 Carnegie Mellon University
     16  1.3.16.2  jdolecek  * All Rights Reserved.
     17  1.3.16.2  jdolecek  *
     18  1.3.16.2  jdolecek  * Permission to use, copy, modify and distribute this software and its
     19  1.3.16.2  jdolecek  * documentation is hereby granted, provided that both the copyright
     20  1.3.16.2  jdolecek  * notice and this permission notice appear in all copies of the
     21  1.3.16.2  jdolecek  * software, derivative works or modified versions, and any portions
     22  1.3.16.2  jdolecek  * thereof, and that both notices appear in supporting documentation.
     23  1.3.16.2  jdolecek  *
     24  1.3.16.2  jdolecek  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     25  1.3.16.2  jdolecek  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
     26  1.3.16.2  jdolecek  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     27  1.3.16.2  jdolecek  *
     28  1.3.16.2  jdolecek  * Carnegie Mellon requests users of this software to return to
     29  1.3.16.2  jdolecek  *
     30  1.3.16.2  jdolecek  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     31  1.3.16.2  jdolecek  *  School of Computer Science
     32  1.3.16.2  jdolecek  *  Carnegie Mellon University
     33  1.3.16.2  jdolecek  *  Pittsburgh PA 15213-3890
     34  1.3.16.2  jdolecek  *
     35  1.3.16.2  jdolecek  * any improvements or extensions that they make and grant Carnegie Mellon
     36  1.3.16.2  jdolecek  * the rights to redistribute these changes.
     37  1.3.16.2  jdolecek  */
     38  1.3.16.2  jdolecek 
     39  1.3.16.2  jdolecek /*
     40  1.3.16.2  jdolecek   Copyright 1988, 1989, 1990, 1991, 1992
     41  1.3.16.2  jdolecek    by Intel Corporation, Santa Clara, California.
     42  1.3.16.2  jdolecek 
     43  1.3.16.2  jdolecek                 All Rights Reserved
     44  1.3.16.2  jdolecek 
     45  1.3.16.2  jdolecek Permission to use, copy, modify, and distribute this software and
     46  1.3.16.2  jdolecek its documentation for any purpose and without fee is hereby
     47  1.3.16.2  jdolecek granted, provided that the above copyright notice appears in all
     48  1.3.16.2  jdolecek copies and that both the copyright notice and this permission notice
     49  1.3.16.2  jdolecek appear in supporting documentation, and that the name of Intel
     50  1.3.16.2  jdolecek not be used in advertising or publicity pertaining to distribution
     51  1.3.16.2  jdolecek of the software without specific, written prior permission.
     52  1.3.16.2  jdolecek 
     53  1.3.16.2  jdolecek INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
     54  1.3.16.2  jdolecek INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
     55  1.3.16.2  jdolecek IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
     56  1.3.16.2  jdolecek CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
     57  1.3.16.2  jdolecek LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
     58  1.3.16.2  jdolecek NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
     59  1.3.16.2  jdolecek WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     60  1.3.16.2  jdolecek */
     61  1.3.16.2  jdolecek 
     62  1.3.16.2  jdolecek #include <machine/asm.h>
     63  1.3.16.2  jdolecek #include <machine/specialreg.h>
     64  1.3.16.2  jdolecek 
     65  1.3.16.2  jdolecek #define	CODE_SEGMENT	0x08
     66  1.3.16.2  jdolecek #define	DATA_SEGMENT	0x10
     67  1.3.16.2  jdolecek 
     68  1.3.16.2  jdolecek 	.align	16
     69  1.3.16.2  jdolecek 	.globl _C_LABEL(startprog64)
     70  1.3.16.2  jdolecek _C_LABEL(startprog64):
     71  1.3.16.2  jdolecek 	.quad 0
     72  1.3.16.2  jdolecek 
     73  1.3.16.2  jdolecek 	.globl _C_LABEL(startprog64_size)
     74  1.3.16.2  jdolecek _C_LABEL(startprog64_size):
     75  1.3.16.2  jdolecek 	.long startprog64_end - _C_LABEL(startprog64_start)
     76  1.3.16.2  jdolecek 
     77  1.3.16.2  jdolecek 	.text
     78  1.3.16.2  jdolecek 	.p2align 4,,15
     79  1.3.16.2  jdolecek 
     80  1.3.16.2  jdolecek /*
     81  1.3.16.2  jdolecek  * startprog64(loadddr,entry,stack,kern_load,kern_start,kern_size)
     82  1.3.16.2  jdolecek  */
     83  1.3.16.2  jdolecek ENTRY(startprog64_start)
     84  1.3.16.2  jdolecek start:
     85  1.3.16.2  jdolecek 	/*
     86  1.3.16.2  jdolecek 	 * This function is to call the loaded kernel's start() with
     87  1.3.16.2  jdolecek 	 * 32bit segment mode from x64 mode.
     88  1.3.16.2  jdolecek 	 * %rdi: kernel start address
     89  1.3.16.2  jdolecek 	 * %rsi: loaded kernel address
     90  1.3.16.2  jdolecek 	 * %rdx: stack address
     91  1.3.16.2  jdolecek 	 * %rcx: loaded kernel size
     92  1.3.16.2  jdolecek 	 * %r8 : loaded start address
     93  1.3.16.2  jdolecek 	 * %r9 : kernel entry address
     94  1.3.16.2  jdolecek 	 */
     95  1.3.16.2  jdolecek 
     96  1.3.16.2  jdolecek 	cld		/* LynxOS depends on it */
     97  1.3.16.2  jdolecek 
     98  1.3.16.2  jdolecek 	cli
     99  1.3.16.2  jdolecek 
    100  1.3.16.2  jdolecek 	/* Copy kernel */
    101  1.3.16.2  jdolecek 	mov	%rcx, %r12		/* original kernel size */
    102  1.3.16.2  jdolecek 	movq	%rdi, %r11		/* for misaligned check */
    103  1.3.16.2  jdolecek 
    104  1.3.16.2  jdolecek #if !defined(NO_OVERLAP)
    105  1.3.16.2  jdolecek 	movq	%rdi, %r13
    106  1.3.16.2  jdolecek 	subq	%rsi, %r13
    107  1.3.16.2  jdolecek #endif
    108  1.3.16.2  jdolecek 
    109  1.3.16.2  jdolecek 	shrq	$3, %rcx		/* count for copy by words */
    110  1.3.16.2  jdolecek 	jz	8f			/* j if less than 8 bytes */
    111  1.3.16.2  jdolecek 
    112  1.3.16.2  jdolecek 	lea	-8(%rdi, %r12), %r14	/* target address of last 8 */
    113  1.3.16.2  jdolecek 	mov	-8(%rsi, %r12), %r15	/* get last word */
    114  1.3.16.2  jdolecek #if !defined(NO_OVERLAP)
    115  1.3.16.2  jdolecek 	cmpq	%r12, %r13		/* overlapping? */
    116  1.3.16.2  jdolecek 	jb	10f
    117  1.3.16.2  jdolecek #endif
    118  1.3.16.2  jdolecek 
    119  1.3.16.2  jdolecek /*
    120  1.3.16.2  jdolecek  * Non-overlaping, copy forwards.
    121  1.3.16.2  jdolecek  * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
    122  1.3.16.2  jdolecek  * if %ecx is more than 76.
    123  1.3.16.2  jdolecek  * AMD might do something similar some day.
    124  1.3.16.2  jdolecek  */
    125  1.3.16.2  jdolecek 	and	$7, %r11		/* destination misaligned ? */
    126  1.3.16.2  jdolecek 	jnz	2f
    127  1.3.16.2  jdolecek 	rep
    128  1.3.16.2  jdolecek 	movsq
    129  1.3.16.2  jdolecek 	mov	%r15, (%r14)		/* write last word */
    130  1.3.16.2  jdolecek 	jmp	.Lcopy_done
    131  1.3.16.2  jdolecek 
    132  1.3.16.2  jdolecek /*
    133  1.3.16.2  jdolecek  * Destination misaligned
    134  1.3.16.2  jdolecek  * AMD say it is better to align the destination (not the source).
    135  1.3.16.2  jdolecek  * This will also re-align copies if the source and dest are both
    136  1.3.16.2  jdolecek  * misaligned by the same amount)
    137  1.3.16.2  jdolecek  * (I think Nehalem will use its accelerated copy if the source
    138  1.3.16.2  jdolecek  * and destination have the same alignment.)
    139  1.3.16.2  jdolecek  */
    140  1.3.16.2  jdolecek 2:
    141  1.3.16.2  jdolecek 	lea	-9(%r11, %r12), %rcx	/* post re-alignment count */
    142  1.3.16.2  jdolecek 	neg	%r11			/* now -1 .. -7 */
    143  1.3.16.2  jdolecek 	mov	(%rsi), %r12		/* get first word */
    144  1.3.16.2  jdolecek 	mov	%rdi, %r13		/* target for first word */
    145  1.3.16.2  jdolecek 	lea	8(%rsi, %r11), %rsi
    146  1.3.16.2  jdolecek 	lea	8(%rdi, %r11), %rdi
    147  1.3.16.2  jdolecek 	shr	$3, %rcx
    148  1.3.16.2  jdolecek 	rep
    149  1.3.16.2  jdolecek 	movsq
    150  1.3.16.2  jdolecek 	mov	%r12, (%r13)		/* write first word */
    151  1.3.16.2  jdolecek 	mov	%r15, (%r14)		/* write last word */
    152  1.3.16.2  jdolecek 	jmp	.Lcopy_done
    153  1.3.16.2  jdolecek 
    154  1.3.16.2  jdolecek #if !defined(NO_OVERLAP)
    155  1.3.16.2  jdolecek /* Must copy backwards.
    156  1.3.16.2  jdolecek  * Reverse copy is probably easy to code faster than 'rep movds'
    157  1.3.16.2  jdolecek  * since that requires (IIRC) an extra clock every 3 iterations (AMD).
    158  1.3.16.2  jdolecek  * However I don't suppose anything cares that much!
    159  1.3.16.2  jdolecek  * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
    160  1.3.16.2  jdolecek  * The copy is aligned with the buffer start (more likely to
    161  1.3.16.2  jdolecek  * be a multiple of 8 than the end).
    162  1.3.16.2  jdolecek  */
    163  1.3.16.2  jdolecek 10:
    164  1.3.16.2  jdolecek 	lea	-8(%rsi, %rcx, 8), %rsi
    165  1.3.16.2  jdolecek 	lea	-8(%rdi, %rcx, 8), %rdi
    166  1.3.16.2  jdolecek 	std
    167  1.3.16.2  jdolecek 	rep
    168  1.3.16.2  jdolecek 	movsq
    169  1.3.16.2  jdolecek 	cld
    170  1.3.16.2  jdolecek 	mov	%r15, (%r14)	/* write last bytes */
    171  1.3.16.2  jdolecek 	jmp	.Lcopy_done
    172  1.3.16.2  jdolecek #endif
    173  1.3.16.2  jdolecek 
    174  1.3.16.2  jdolecek /* Less than 8 bytes to copy, copy by bytes */
    175  1.3.16.2  jdolecek /* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
    176  1.3.16.2  jdolecek  * For longer transfers it is 50+ !
    177  1.3.16.2  jdolecek  */
    178  1.3.16.2  jdolecek 8:	mov	%r12, %rcx
    179  1.3.16.2  jdolecek 
    180  1.3.16.2  jdolecek #if !defined(NO_OVERLAP)
    181  1.3.16.2  jdolecek 	cmpq	%r12, %r13	/* overlapping? */
    182  1.3.16.2  jdolecek 	jb	81f
    183  1.3.16.2  jdolecek #endif
    184  1.3.16.2  jdolecek 
    185  1.3.16.2  jdolecek 	/* nope, copy forwards. */
    186  1.3.16.2  jdolecek 	rep
    187  1.3.16.2  jdolecek 	movsb
    188  1.3.16.2  jdolecek 	jmp	.Lcopy_done
    189  1.3.16.2  jdolecek 
    190  1.3.16.2  jdolecek #if !defined(NO_OVERLAP)
    191  1.3.16.2  jdolecek /* Must copy backwards */
    192  1.3.16.2  jdolecek 81:
    193  1.3.16.2  jdolecek 	lea	-1(%rsi, %rcx), %rsi
    194  1.3.16.2  jdolecek 	lea	-1(%rdi, %rcx), %rdi
    195  1.3.16.2  jdolecek 	std
    196  1.3.16.2  jdolecek 	rep
    197  1.3.16.2  jdolecek 	movsb
    198  1.3.16.2  jdolecek 	cld
    199  1.3.16.2  jdolecek #endif
    200  1.3.16.2  jdolecek 	/* End of copy kernel */
    201  1.3.16.2  jdolecek .Lcopy_done:
    202  1.3.16.2  jdolecek 
    203  1.3.16.2  jdolecek 	mov	%r8, %rdi	/* %rdi: loaded start address */
    204  1.3.16.2  jdolecek 	mov	%r9, %rsi	/* %rsi: kernel entry address */
    205  1.3.16.2  jdolecek 
    206  1.3.16.2  jdolecek 	/* Prepare jump address */
    207  1.3.16.2  jdolecek 	lea	(start32a - start)(%rdi), %rax
    208  1.3.16.2  jdolecek 	movl	%eax, (start32r - start)(%rdi)
    209  1.3.16.2  jdolecek 
    210  1.3.16.2  jdolecek 	/* Setup GDT */
    211  1.3.16.2  jdolecek 	lea	(gdt - start)(%rdi), %rax
    212  1.3.16.2  jdolecek 	mov	%rax, (gdtrr - start)(%rdi)
    213  1.3.16.2  jdolecek 	lgdt	(gdtr - start)(%rdi)
    214  1.3.16.2  jdolecek 
    215  1.3.16.2  jdolecek 	/* Jump to set %cs */
    216  1.3.16.2  jdolecek 	ljmp	*(start32r - start)(%rdi)
    217  1.3.16.2  jdolecek 
    218  1.3.16.2  jdolecek 	.align	4
    219  1.3.16.2  jdolecek 	.code32
    220  1.3.16.2  jdolecek start32a:
    221  1.3.16.2  jdolecek 	movl	$DATA_SEGMENT, %eax
    222  1.3.16.2  jdolecek 	movw	%ax, %ds
    223  1.3.16.2  jdolecek 	movw	%ax, %es
    224  1.3.16.2  jdolecek 	movw	%ax, %fs
    225  1.3.16.2  jdolecek 	movw	%ax, %gs
    226  1.3.16.2  jdolecek 	movw	%ax, %ss
    227  1.3.16.2  jdolecek 
    228  1.3.16.2  jdolecek 	movl	%edx, %esp
    229  1.3.16.2  jdolecek 
    230  1.3.16.2  jdolecek 	/* Disable Paging in CR0 */
    231  1.3.16.2  jdolecek 	movl	%cr0, %eax
    232  1.3.16.2  jdolecek 	andl	$(~CR0_PG), %eax
    233  1.3.16.2  jdolecek 	movl	%eax, %cr0
    234  1.3.16.2  jdolecek 
    235  1.3.16.2  jdolecek 	/* Disable PAE in CR4 */
    236  1.3.16.2  jdolecek 	movl	%cr4, %eax
    237  1.3.16.2  jdolecek 	andl	$(~CR4_PAE), %eax
    238  1.3.16.2  jdolecek 	movl	%eax, %cr4
    239  1.3.16.2  jdolecek 
    240  1.3.16.2  jdolecek 	jmp	start32b
    241  1.3.16.2  jdolecek 
    242  1.3.16.2  jdolecek 	.align	4
    243  1.3.16.2  jdolecek start32b:
    244  1.3.16.2  jdolecek 	xor	%eax, %eax
    245  1.3.16.2  jdolecek 	call	*%esi
    246  1.3.16.2  jdolecek 
    247  1.3.16.2  jdolecek 	.align	16
    248  1.3.16.2  jdolecek start32r:
    249  1.3.16.2  jdolecek 	.long	0
    250  1.3.16.2  jdolecek 	.long	CODE_SEGMENT
    251  1.3.16.2  jdolecek 	.align	16
    252  1.3.16.2  jdolecek gdt:
    253  1.3.16.2  jdolecek 	.long	0, 0
    254  1.3.16.2  jdolecek 	.byte	0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00
    255  1.3.16.2  jdolecek 	.byte	0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00
    256  1.3.16.2  jdolecek gdtr:
    257  1.3.16.2  jdolecek 	.word	gdtr - gdt
    258  1.3.16.2  jdolecek gdtrr:
    259  1.3.16.2  jdolecek 	.quad
    260  1.3.16.2  jdolecek start32end:
    261  1.3.16.2  jdolecek 	/* Space for the stack */
    262  1.3.16.2  jdolecek 	.align	16
    263  1.3.16.2  jdolecek 	.space	8192
    264  1.3.16.2  jdolecek startprog64_end:
    265