Home | History | Annotate | Line # | Download | only in bootx64
startprog64.S revision 1.3.2.2
      1  1.3.2.2  pgoyette /*	$NetBSD: startprog64.S,v 1.3.2.2 2017/03/20 06:57:15 pgoyette Exp $	*/
      2  1.3.2.2  pgoyette /*	NetBSD: startprog.S,v 1.3 2003/02/01 14:48:18 dsl Exp	*/
      3  1.3.2.2  pgoyette 
      4  1.3.2.2  pgoyette /* starts program in protected mode / flat space
      5  1.3.2.2  pgoyette  with given stackframe
      6  1.3.2.2  pgoyette  needs global variables flatcodeseg and flatdataseg
      7  1.3.2.2  pgoyette  (gdt offsets)
      8  1.3.2.2  pgoyette   derived from: NetBSD:sys/arch/i386/boot/asm.S
      9  1.3.2.2  pgoyette  */
     10  1.3.2.2  pgoyette 
     11  1.3.2.2  pgoyette /*
     12  1.3.2.2  pgoyette  * Ported to boot 386BSD by Julian Elischer (julian (at) tfs.com) Sept 1992
     13  1.3.2.2  pgoyette  *
     14  1.3.2.2  pgoyette  * Mach Operating System
     15  1.3.2.2  pgoyette  * Copyright (c) 1992, 1991 Carnegie Mellon University
     16  1.3.2.2  pgoyette  * All Rights Reserved.
     17  1.3.2.2  pgoyette  *
     18  1.3.2.2  pgoyette  * Permission to use, copy, modify and distribute this software and its
     19  1.3.2.2  pgoyette  * documentation is hereby granted, provided that both the copyright
     20  1.3.2.2  pgoyette  * notice and this permission notice appear in all copies of the
     21  1.3.2.2  pgoyette  * software, derivative works or modified versions, and any portions
     22  1.3.2.2  pgoyette  * thereof, and that both notices appear in supporting documentation.
     23  1.3.2.2  pgoyette  *
     24  1.3.2.2  pgoyette  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     25  1.3.2.2  pgoyette  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
     26  1.3.2.2  pgoyette  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     27  1.3.2.2  pgoyette  *
     28  1.3.2.2  pgoyette  * Carnegie Mellon requests users of this software to return to
     29  1.3.2.2  pgoyette  *
     30  1.3.2.2  pgoyette  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     31  1.3.2.2  pgoyette  *  School of Computer Science
     32  1.3.2.2  pgoyette  *  Carnegie Mellon University
     33  1.3.2.2  pgoyette  *  Pittsburgh PA 15213-3890
     34  1.3.2.2  pgoyette  *
     35  1.3.2.2  pgoyette  * any improvements or extensions that they make and grant Carnegie Mellon
     36  1.3.2.2  pgoyette  * the rights to redistribute these changes.
     37  1.3.2.2  pgoyette  */
     38  1.3.2.2  pgoyette 
     39  1.3.2.2  pgoyette /*
     40  1.3.2.2  pgoyette   Copyright 1988, 1989, 1990, 1991, 1992
     41  1.3.2.2  pgoyette    by Intel Corporation, Santa Clara, California.
     42  1.3.2.2  pgoyette 
     43  1.3.2.2  pgoyette                 All Rights Reserved
     44  1.3.2.2  pgoyette 
     45  1.3.2.2  pgoyette Permission to use, copy, modify, and distribute this software and
     46  1.3.2.2  pgoyette its documentation for any purpose and without fee is hereby
     47  1.3.2.2  pgoyette granted, provided that the above copyright notice appears in all
     48  1.3.2.2  pgoyette copies and that both the copyright notice and this permission notice
     49  1.3.2.2  pgoyette appear in supporting documentation, and that the name of Intel
     50  1.3.2.2  pgoyette not be used in advertising or publicity pertaining to distribution
     51  1.3.2.2  pgoyette of the software without specific, written prior permission.
     52  1.3.2.2  pgoyette 
     53  1.3.2.2  pgoyette INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
     54  1.3.2.2  pgoyette INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
     55  1.3.2.2  pgoyette IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
     56  1.3.2.2  pgoyette CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
     57  1.3.2.2  pgoyette LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
     58  1.3.2.2  pgoyette NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
     59  1.3.2.2  pgoyette WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     60  1.3.2.2  pgoyette */
     61  1.3.2.2  pgoyette 
     62  1.3.2.2  pgoyette #include <machine/asm.h>
     63  1.3.2.2  pgoyette #include <machine/specialreg.h>
     64  1.3.2.2  pgoyette 
     65  1.3.2.2  pgoyette #define	CODE_SEGMENT	0x08
     66  1.3.2.2  pgoyette #define	DATA_SEGMENT	0x10
     67  1.3.2.2  pgoyette 
     68  1.3.2.2  pgoyette 	.align	16
     69  1.3.2.2  pgoyette 	.globl _C_LABEL(startprog64)
     70  1.3.2.2  pgoyette _C_LABEL(startprog64):
     71  1.3.2.2  pgoyette 	.quad 0
     72  1.3.2.2  pgoyette 
     73  1.3.2.2  pgoyette 	.globl _C_LABEL(startprog64_size)
     74  1.3.2.2  pgoyette _C_LABEL(startprog64_size):
     75  1.3.2.2  pgoyette 	.long startprog64_end - _C_LABEL(startprog64_start)
     76  1.3.2.2  pgoyette 
     77  1.3.2.2  pgoyette 	.text
     78  1.3.2.2  pgoyette 	.p2align 4,,15
     79  1.3.2.2  pgoyette 
     80  1.3.2.2  pgoyette /*
     81  1.3.2.2  pgoyette  * startprog64(loadddr,entry,stack,kern_load,kern_start,kern_size)
     82  1.3.2.2  pgoyette  */
     83  1.3.2.2  pgoyette ENTRY(startprog64_start)
     84  1.3.2.2  pgoyette start:
     85  1.3.2.2  pgoyette 	/*
     86  1.3.2.2  pgoyette 	 * This function is to call the loaded kernel's start() with
     87  1.3.2.2  pgoyette 	 * 32bit segment mode from x64 mode.
     88  1.3.2.2  pgoyette 	 * %rdi: kernel start address
     89  1.3.2.2  pgoyette 	 * %rsi: loaded kernel address
     90  1.3.2.2  pgoyette 	 * %rdx: stack address
     91  1.3.2.2  pgoyette 	 * %rcx: loaded kernel size
     92  1.3.2.2  pgoyette 	 * %r8 : loaded start address
     93  1.3.2.2  pgoyette 	 * %r9 : kernel entry address
     94  1.3.2.2  pgoyette 	 */
     95  1.3.2.2  pgoyette 
     96  1.3.2.2  pgoyette 	cld		/* LynxOS depends on it */
     97  1.3.2.2  pgoyette 
     98  1.3.2.2  pgoyette 	cli
     99  1.3.2.2  pgoyette 
    100  1.3.2.2  pgoyette 	/* Copy kernel */
    101  1.3.2.2  pgoyette 	mov	%rcx, %r12		/* original kernel size */
    102  1.3.2.2  pgoyette 	movq	%rdi, %r11		/* for misaligned check */
    103  1.3.2.2  pgoyette 
    104  1.3.2.2  pgoyette #if !defined(NO_OVERLAP)
    105  1.3.2.2  pgoyette 	movq	%rdi, %r13
    106  1.3.2.2  pgoyette 	subq	%rsi, %r13
    107  1.3.2.2  pgoyette #endif
    108  1.3.2.2  pgoyette 
    109  1.3.2.2  pgoyette 	shrq	$3, %rcx		/* count for copy by words */
    110  1.3.2.2  pgoyette 	jz	8f			/* j if less than 8 bytes */
    111  1.3.2.2  pgoyette 
    112  1.3.2.2  pgoyette 	lea	-8(%rdi, %r12), %r14	/* target address of last 8 */
    113  1.3.2.2  pgoyette 	mov	-8(%rsi, %r12), %r15	/* get last word */
    114  1.3.2.2  pgoyette #if !defined(NO_OVERLAP)
    115  1.3.2.2  pgoyette 	cmpq	%r12, %r13		/* overlapping? */
    116  1.3.2.2  pgoyette 	jb	10f
    117  1.3.2.2  pgoyette #endif
    118  1.3.2.2  pgoyette 
    119  1.3.2.2  pgoyette /*
    120  1.3.2.2  pgoyette  * Non-overlaping, copy forwards.
    121  1.3.2.2  pgoyette  * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
    122  1.3.2.2  pgoyette  * if %ecx is more than 76.
    123  1.3.2.2  pgoyette  * AMD might do something similar some day.
    124  1.3.2.2  pgoyette  */
    125  1.3.2.2  pgoyette 	and	$7, %r11		/* destination misaligned ? */
    126  1.3.2.2  pgoyette 	jnz	2f
    127  1.3.2.2  pgoyette 	rep
    128  1.3.2.2  pgoyette 	movsq
    129  1.3.2.2  pgoyette 	mov	%r15, (%r14)		/* write last word */
    130  1.3.2.2  pgoyette 	jmp	.Lcopy_done
    131  1.3.2.2  pgoyette 
    132  1.3.2.2  pgoyette /*
    133  1.3.2.2  pgoyette  * Destination misaligned
    134  1.3.2.2  pgoyette  * AMD say it is better to align the destination (not the source).
    135  1.3.2.2  pgoyette  * This will also re-align copies if the source and dest are both
    136  1.3.2.2  pgoyette  * misaligned by the same amount)
    137  1.3.2.2  pgoyette  * (I think Nehalem will use its accelerated copy if the source
    138  1.3.2.2  pgoyette  * and destination have the same alignment.)
    139  1.3.2.2  pgoyette  */
    140  1.3.2.2  pgoyette 2:
    141  1.3.2.2  pgoyette 	lea	-9(%r11, %r12), %rcx	/* post re-alignment count */
    142  1.3.2.2  pgoyette 	neg	%r11			/* now -1 .. -7 */
    143  1.3.2.2  pgoyette 	mov	(%rsi), %r12		/* get first word */
    144  1.3.2.2  pgoyette 	mov	%rdi, %r13		/* target for first word */
    145  1.3.2.2  pgoyette 	lea	8(%rsi, %r11), %rsi
    146  1.3.2.2  pgoyette 	lea	8(%rdi, %r11), %rdi
    147  1.3.2.2  pgoyette 	shr	$3, %rcx
    148  1.3.2.2  pgoyette 	rep
    149  1.3.2.2  pgoyette 	movsq
    150  1.3.2.2  pgoyette 	mov	%r12, (%r13)		/* write first word */
    151  1.3.2.2  pgoyette 	mov	%r15, (%r14)		/* write last word */
    152  1.3.2.2  pgoyette 	jmp	.Lcopy_done
    153  1.3.2.2  pgoyette 
    154  1.3.2.2  pgoyette #if !defined(NO_OVERLAP)
    155  1.3.2.2  pgoyette /* Must copy backwards.
    156  1.3.2.2  pgoyette  * Reverse copy is probably easy to code faster than 'rep movds'
    157  1.3.2.2  pgoyette  * since that requires (IIRC) an extra clock every 3 iterations (AMD).
    158  1.3.2.2  pgoyette  * However I don't suppose anything cares that much!
    159  1.3.2.2  pgoyette  * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
    160  1.3.2.2  pgoyette  * The copy is aligned with the buffer start (more likely to
    161  1.3.2.2  pgoyette  * be a multiple of 8 than the end).
    162  1.3.2.2  pgoyette  */
    163  1.3.2.2  pgoyette 10:
    164  1.3.2.2  pgoyette 	lea	-8(%rsi, %rcx, 8), %rsi
    165  1.3.2.2  pgoyette 	lea	-8(%rdi, %rcx, 8), %rdi
    166  1.3.2.2  pgoyette 	std
    167  1.3.2.2  pgoyette 	rep
    168  1.3.2.2  pgoyette 	movsq
    169  1.3.2.2  pgoyette 	cld
    170  1.3.2.2  pgoyette 	mov	%r15, (%r14)	/* write last bytes */
    171  1.3.2.2  pgoyette 	jmp	.Lcopy_done
    172  1.3.2.2  pgoyette #endif
    173  1.3.2.2  pgoyette 
    174  1.3.2.2  pgoyette /* Less than 8 bytes to copy, copy by bytes */
    175  1.3.2.2  pgoyette /* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
    176  1.3.2.2  pgoyette  * For longer transfers it is 50+ !
    177  1.3.2.2  pgoyette  */
    178  1.3.2.2  pgoyette 8:	mov	%r12, %rcx
    179  1.3.2.2  pgoyette 
    180  1.3.2.2  pgoyette #if !defined(NO_OVERLAP)
    181  1.3.2.2  pgoyette 	cmpq	%r12, %r13	/* overlapping? */
    182  1.3.2.2  pgoyette 	jb	81f
    183  1.3.2.2  pgoyette #endif
    184  1.3.2.2  pgoyette 
    185  1.3.2.2  pgoyette 	/* nope, copy forwards. */
    186  1.3.2.2  pgoyette 	rep
    187  1.3.2.2  pgoyette 	movsb
    188  1.3.2.2  pgoyette 	jmp	.Lcopy_done
    189  1.3.2.2  pgoyette 
    190  1.3.2.2  pgoyette #if !defined(NO_OVERLAP)
    191  1.3.2.2  pgoyette /* Must copy backwards */
    192  1.3.2.2  pgoyette 81:
    193  1.3.2.2  pgoyette 	lea	-1(%rsi, %rcx), %rsi
    194  1.3.2.2  pgoyette 	lea	-1(%rdi, %rcx), %rdi
    195  1.3.2.2  pgoyette 	std
    196  1.3.2.2  pgoyette 	rep
    197  1.3.2.2  pgoyette 	movsb
    198  1.3.2.2  pgoyette 	cld
    199  1.3.2.2  pgoyette #endif
    200  1.3.2.2  pgoyette 	/* End of copy kernel */
    201  1.3.2.2  pgoyette .Lcopy_done:
    202  1.3.2.2  pgoyette 
    203  1.3.2.2  pgoyette 	mov	%r8, %rdi	/* %rdi: loaded start address */
    204  1.3.2.2  pgoyette 	mov	%r9, %rsi	/* %rsi: kernel entry address */
    205  1.3.2.2  pgoyette 
    206  1.3.2.2  pgoyette 	/* Prepare jump address */
    207  1.3.2.2  pgoyette 	lea	(start32a - start)(%rdi), %rax
    208  1.3.2.2  pgoyette 	movl	%eax, (start32r - start)(%rdi)
    209  1.3.2.2  pgoyette 
    210  1.3.2.2  pgoyette 	/* Setup GDT */
    211  1.3.2.2  pgoyette 	lea	(gdt - start)(%rdi), %rax
    212  1.3.2.2  pgoyette 	mov	%rax, (gdtrr - start)(%rdi)
    213  1.3.2.2  pgoyette 	lgdt	(gdtr - start)(%rdi)
    214  1.3.2.2  pgoyette 
    215  1.3.2.2  pgoyette 	/* Jump to set %cs */
    216  1.3.2.2  pgoyette 	ljmp	*(start32r - start)(%rdi)
    217  1.3.2.2  pgoyette 
    218  1.3.2.2  pgoyette 	.align	4
    219  1.3.2.2  pgoyette 	.code32
    220  1.3.2.2  pgoyette start32a:
    221  1.3.2.2  pgoyette 	movl	$DATA_SEGMENT, %eax
    222  1.3.2.2  pgoyette 	movw	%ax, %ds
    223  1.3.2.2  pgoyette 	movw	%ax, %es
    224  1.3.2.2  pgoyette 	movw	%ax, %fs
    225  1.3.2.2  pgoyette 	movw	%ax, %gs
    226  1.3.2.2  pgoyette 	movw	%ax, %ss
    227  1.3.2.2  pgoyette 
    228  1.3.2.2  pgoyette 	movl	%edx, %esp
    229  1.3.2.2  pgoyette 
    230  1.3.2.2  pgoyette 	/* Disable Paging in CR0 */
    231  1.3.2.2  pgoyette 	movl	%cr0, %eax
    232  1.3.2.2  pgoyette 	andl	$(~CR0_PG), %eax
    233  1.3.2.2  pgoyette 	movl	%eax, %cr0
    234  1.3.2.2  pgoyette 
    235  1.3.2.2  pgoyette 	/* Disable PAE in CR4 */
    236  1.3.2.2  pgoyette 	movl	%cr4, %eax
    237  1.3.2.2  pgoyette 	andl	$(~CR4_PAE), %eax
    238  1.3.2.2  pgoyette 	movl	%eax, %cr4
    239  1.3.2.2  pgoyette 
    240  1.3.2.2  pgoyette 	jmp	start32b
    241  1.3.2.2  pgoyette 
    242  1.3.2.2  pgoyette 	.align	4
    243  1.3.2.2  pgoyette start32b:
    244  1.3.2.2  pgoyette 	xor	%eax, %eax
    245  1.3.2.2  pgoyette 	call	*%esi
    246  1.3.2.2  pgoyette 
    247  1.3.2.2  pgoyette 	.align	16
    248  1.3.2.2  pgoyette start32r:
    249  1.3.2.2  pgoyette 	.long	0
    250  1.3.2.2  pgoyette 	.long	CODE_SEGMENT
    251  1.3.2.2  pgoyette 	.align	16
    252  1.3.2.2  pgoyette gdt:
    253  1.3.2.2  pgoyette 	.long	0, 0
    254  1.3.2.2  pgoyette 	.byte	0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00
    255  1.3.2.2  pgoyette 	.byte	0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00
    256  1.3.2.2  pgoyette gdtr:
    257  1.3.2.2  pgoyette 	.word	gdtr - gdt
    258  1.3.2.2  pgoyette gdtrr:
    259  1.3.2.2  pgoyette 	.quad
    260  1.3.2.2  pgoyette start32end:
    261  1.3.2.2  pgoyette 	/* Space for the stack */
    262  1.3.2.2  pgoyette 	.align	16
    263  1.3.2.2  pgoyette 	.space	8192
    264  1.3.2.2  pgoyette startprog64_end:
    265