Home | History | Annotate | Line # | Download | only in bootx64
startprog64.S revision 1.3
      1  1.3    nonaka /*	$NetBSD: startprog64.S,v 1.3 2017/02/11 10:23:39 nonaka Exp $	*/
      2  1.1    nonaka /*	NetBSD: startprog.S,v 1.3 2003/02/01 14:48:18 dsl Exp	*/
      3  1.1    nonaka 
      4  1.1    nonaka /* starts program in protected mode / flat space
      5  1.1    nonaka  with given stackframe
      6  1.1    nonaka  needs global variables flatcodeseg and flatdataseg
      7  1.1    nonaka  (gdt offsets)
      8  1.1    nonaka   derived from: NetBSD:sys/arch/i386/boot/asm.S
      9  1.1    nonaka  */
     10  1.1    nonaka 
     11  1.1    nonaka /*
     12  1.1    nonaka  * Ported to boot 386BSD by Julian Elischer (julian (at) tfs.com) Sept 1992
     13  1.1    nonaka  *
     14  1.1    nonaka  * Mach Operating System
     15  1.1    nonaka  * Copyright (c) 1992, 1991 Carnegie Mellon University
     16  1.1    nonaka  * All Rights Reserved.
     17  1.1    nonaka  *
     18  1.1    nonaka  * Permission to use, copy, modify and distribute this software and its
     19  1.1    nonaka  * documentation is hereby granted, provided that both the copyright
     20  1.1    nonaka  * notice and this permission notice appear in all copies of the
     21  1.1    nonaka  * software, derivative works or modified versions, and any portions
     22  1.1    nonaka  * thereof, and that both notices appear in supporting documentation.
     23  1.1    nonaka  *
     24  1.1    nonaka  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     25  1.1    nonaka  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
     26  1.1    nonaka  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     27  1.1    nonaka  *
     28  1.1    nonaka  * Carnegie Mellon requests users of this software to return to
     29  1.1    nonaka  *
     30  1.1    nonaka  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     31  1.1    nonaka  *  School of Computer Science
     32  1.1    nonaka  *  Carnegie Mellon University
     33  1.1    nonaka  *  Pittsburgh PA 15213-3890
     34  1.1    nonaka  *
     35  1.1    nonaka  * any improvements or extensions that they make and grant Carnegie Mellon
     36  1.1    nonaka  * the rights to redistribute these changes.
     37  1.1    nonaka  */
     38  1.1    nonaka 
     39  1.1    nonaka /*
     40  1.1    nonaka   Copyright 1988, 1989, 1990, 1991, 1992
     41  1.1    nonaka    by Intel Corporation, Santa Clara, California.
     42  1.1    nonaka 
     43  1.1    nonaka                 All Rights Reserved
     44  1.1    nonaka 
     45  1.1    nonaka Permission to use, copy, modify, and distribute this software and
     46  1.1    nonaka its documentation for any purpose and without fee is hereby
     47  1.1    nonaka granted, provided that the above copyright notice appears in all
     48  1.1    nonaka copies and that both the copyright notice and this permission notice
     49  1.1    nonaka appear in supporting documentation, and that the name of Intel
     50  1.1    nonaka not be used in advertising or publicity pertaining to distribution
     51  1.1    nonaka of the software without specific, written prior permission.
     52  1.1    nonaka 
     53  1.1    nonaka INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
     54  1.1    nonaka INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
     55  1.1    nonaka IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
     56  1.1    nonaka CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
     57  1.1    nonaka LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
     58  1.1    nonaka NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
     59  1.1    nonaka WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     60  1.1    nonaka */
     61  1.1    nonaka 
     62  1.1    nonaka #include <machine/asm.h>
     63  1.1    nonaka #include <machine/specialreg.h>
     64  1.1    nonaka 
     65  1.1    nonaka #define	CODE_SEGMENT	0x08
     66  1.1    nonaka #define	DATA_SEGMENT	0x10
     67  1.1    nonaka 
     68  1.1    nonaka 	.align	16
     69  1.1    nonaka 	.globl _C_LABEL(startprog64)
     70  1.1    nonaka _C_LABEL(startprog64):
     71  1.1    nonaka 	.quad 0
     72  1.1    nonaka 
     73  1.1    nonaka 	.globl _C_LABEL(startprog64_size)
     74  1.1    nonaka _C_LABEL(startprog64_size):
     75  1.1    nonaka 	.long startprog64_end - _C_LABEL(startprog64_start)
     76  1.1    nonaka 
     77  1.1    nonaka 	.text
     78  1.1    nonaka 	.p2align 4,,15
     79  1.1    nonaka 
     80  1.1    nonaka /*
     81  1.3    nonaka  * startprog64(loadddr,entry,stack,kern_load,kern_start,kern_size)
     82  1.1    nonaka  */
     83  1.1    nonaka ENTRY(startprog64_start)
     84  1.1    nonaka start:
     85  1.1    nonaka 	/*
     86  1.1    nonaka 	 * This function is to call the loaded kernel's start() with
     87  1.1    nonaka 	 * 32bit segment mode from x64 mode.
     88  1.3    nonaka 	 * %rdi: kernel start address
     89  1.3    nonaka 	 * %rsi: loaded kernel address
     90  1.1    nonaka 	 * %rdx: stack address
     91  1.3    nonaka 	 * %rcx: loaded kernel size
     92  1.3    nonaka 	 * %r8 : loaded start address
     93  1.3    nonaka 	 * %r9 : kernel entry address
     94  1.1    nonaka 	 */
     95  1.1    nonaka 
     96  1.3    nonaka 	cld		/* LynxOS depends on it */
     97  1.3    nonaka 
     98  1.3    nonaka 	cli
     99  1.3    nonaka 
    100  1.3    nonaka 	/* Copy kernel */
    101  1.3    nonaka 	mov	%rcx, %r12		/* original kernel size */
    102  1.3    nonaka 	movq	%rdi, %r11		/* for misaligned check */
    103  1.3    nonaka 
    104  1.3    nonaka #if !defined(NO_OVERLAP)
    105  1.3    nonaka 	movq	%rdi, %r13
    106  1.3    nonaka 	subq	%rsi, %r13
    107  1.3    nonaka #endif
    108  1.3    nonaka 
    109  1.3    nonaka 	shrq	$3, %rcx		/* count for copy by words */
    110  1.3    nonaka 	jz	8f			/* j if less than 8 bytes */
    111  1.3    nonaka 
    112  1.3    nonaka 	lea	-8(%rdi, %r12), %r14	/* target address of last 8 */
    113  1.3    nonaka 	mov	-8(%rsi, %r12), %r15	/* get last word */
    114  1.3    nonaka #if !defined(NO_OVERLAP)
    115  1.3    nonaka 	cmpq	%r12, %r13		/* overlapping? */
    116  1.3    nonaka 	jb	10f
    117  1.3    nonaka #endif
    118  1.3    nonaka 
    119  1.3    nonaka /*
    120  1.3    nonaka  * Non-overlaping, copy forwards.
    121  1.3    nonaka  * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
    122  1.3    nonaka  * if %ecx is more than 76.
    123  1.3    nonaka  * AMD might do something similar some day.
    124  1.3    nonaka  */
    125  1.3    nonaka 	and	$7, %r11		/* destination misaligned ? */
    126  1.3    nonaka 	jnz	2f
    127  1.3    nonaka 	rep
    128  1.3    nonaka 	movsq
    129  1.3    nonaka 	mov	%r15, (%r14)		/* write last word */
    130  1.3    nonaka 	jmp	.Lcopy_done
    131  1.3    nonaka 
    132  1.3    nonaka /*
    133  1.3    nonaka  * Destination misaligned
    134  1.3    nonaka  * AMD say it is better to align the destination (not the source).
    135  1.3    nonaka  * This will also re-align copies if the source and dest are both
    136  1.3    nonaka  * misaligned by the same amount)
    137  1.3    nonaka  * (I think Nehalem will use its accelerated copy if the source
    138  1.3    nonaka  * and destination have the same alignment.)
    139  1.3    nonaka  */
    140  1.3    nonaka 2:
    141  1.3    nonaka 	lea	-9(%r11, %r12), %rcx	/* post re-alignment count */
    142  1.3    nonaka 	neg	%r11			/* now -1 .. -7 */
    143  1.3    nonaka 	mov	(%rsi), %r12		/* get first word */
    144  1.3    nonaka 	mov	%rdi, %r13		/* target for first word */
    145  1.3    nonaka 	lea	8(%rsi, %r11), %rsi
    146  1.3    nonaka 	lea	8(%rdi, %r11), %rdi
    147  1.3    nonaka 	shr	$3, %rcx
    148  1.3    nonaka 	rep
    149  1.3    nonaka 	movsq
    150  1.3    nonaka 	mov	%r12, (%r13)		/* write first word */
    151  1.3    nonaka 	mov	%r15, (%r14)		/* write last word */
    152  1.3    nonaka 	jmp	.Lcopy_done
    153  1.3    nonaka 
    154  1.3    nonaka #if !defined(NO_OVERLAP)
    155  1.3    nonaka /* Must copy backwards.
    156  1.3    nonaka  * Reverse copy is probably easy to code faster than 'rep movds'
    157  1.3    nonaka  * since that requires (IIRC) an extra clock every 3 iterations (AMD).
    158  1.3    nonaka  * However I don't suppose anything cares that much!
    159  1.3    nonaka  * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
    160  1.3    nonaka  * The copy is aligned with the buffer start (more likely to
    161  1.3    nonaka  * be a multiple of 8 than the end).
    162  1.3    nonaka  */
    163  1.3    nonaka 10:
    164  1.3    nonaka 	lea	-8(%rsi, %rcx, 8), %rsi
    165  1.3    nonaka 	lea	-8(%rdi, %rcx, 8), %rdi
    166  1.3    nonaka 	std
    167  1.3    nonaka 	rep
    168  1.3    nonaka 	movsq
    169  1.3    nonaka 	cld
    170  1.3    nonaka 	mov	%r15, (%r14)	/* write last bytes */
    171  1.3    nonaka 	jmp	.Lcopy_done
    172  1.3    nonaka #endif
    173  1.3    nonaka 
    174  1.3    nonaka /* Less than 8 bytes to copy, copy by bytes */
    175  1.3    nonaka /* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
    176  1.3    nonaka  * For longer transfers it is 50+ !
    177  1.3    nonaka  */
    178  1.3    nonaka 8:	mov	%r12, %rcx
    179  1.3    nonaka 
    180  1.3    nonaka #if !defined(NO_OVERLAP)
    181  1.3    nonaka 	cmpq	%r12, %r13	/* overlapping? */
    182  1.3    nonaka 	jb	81f
    183  1.3    nonaka #endif
    184  1.3    nonaka 
    185  1.3    nonaka 	/* nope, copy forwards. */
    186  1.3    nonaka 	rep
    187  1.3    nonaka 	movsb
    188  1.3    nonaka 	jmp	.Lcopy_done
    189  1.3    nonaka 
    190  1.3    nonaka #if !defined(NO_OVERLAP)
    191  1.3    nonaka /* Must copy backwards */
    192  1.3    nonaka 81:
    193  1.3    nonaka 	lea	-1(%rsi, %rcx), %rsi
    194  1.3    nonaka 	lea	-1(%rdi, %rcx), %rdi
    195  1.3    nonaka 	std
    196  1.3    nonaka 	rep
    197  1.3    nonaka 	movsb
    198  1.3    nonaka 	cld
    199  1.3    nonaka #endif
    200  1.3    nonaka 	/* End of copy kernel */
    201  1.3    nonaka .Lcopy_done:
    202  1.3    nonaka 
    203  1.3    nonaka 	mov	%r8, %rdi	/* %rdi: loaded start address */
    204  1.3    nonaka 	mov	%r9, %rsi	/* %rsi: kernel entry address */
    205  1.1    nonaka 
    206  1.1    nonaka 	/* Prepare jump address */
    207  1.1    nonaka 	lea	(start32a - start)(%rdi), %rax
    208  1.1    nonaka 	movl	%eax, (start32r - start)(%rdi)
    209  1.1    nonaka 
    210  1.1    nonaka 	/* Setup GDT */
    211  1.1    nonaka 	lea	(gdt - start)(%rdi), %rax
    212  1.1    nonaka 	mov	%rax, (gdtrr - start)(%rdi)
    213  1.1    nonaka 	lgdt	(gdtr - start)(%rdi)
    214  1.1    nonaka 
    215  1.1    nonaka 	/* Jump to set %cs */
    216  1.1    nonaka 	ljmp	*(start32r - start)(%rdi)
    217  1.1    nonaka 
    218  1.1    nonaka 	.align	4
    219  1.1    nonaka 	.code32
    220  1.1    nonaka start32a:
    221  1.1    nonaka 	movl	$DATA_SEGMENT, %eax
    222  1.2  christos 	movw	%ax, %ds
    223  1.2  christos 	movw	%ax, %es
    224  1.2  christos 	movw	%ax, %fs
    225  1.2  christos 	movw	%ax, %gs
    226  1.2  christos 	movw	%ax, %ss
    227  1.1    nonaka 
    228  1.1    nonaka 	movl	%edx, %esp
    229  1.1    nonaka 
    230  1.1    nonaka 	/* Disable Paging in CR0 */
    231  1.1    nonaka 	movl	%cr0, %eax
    232  1.1    nonaka 	andl	$(~CR0_PG), %eax
    233  1.1    nonaka 	movl	%eax, %cr0
    234  1.1    nonaka 
    235  1.1    nonaka 	/* Disable PAE in CR4 */
    236  1.1    nonaka 	movl	%cr4, %eax
    237  1.1    nonaka 	andl	$(~CR4_PAE), %eax
    238  1.1    nonaka 	movl	%eax, %cr4
    239  1.1    nonaka 
    240  1.1    nonaka 	jmp	start32b
    241  1.1    nonaka 
    242  1.1    nonaka 	.align	4
    243  1.1    nonaka start32b:
    244  1.1    nonaka 	xor	%eax, %eax
    245  1.1    nonaka 	call	*%esi
    246  1.1    nonaka 
    247  1.1    nonaka 	.align	16
    248  1.1    nonaka start32r:
    249  1.1    nonaka 	.long	0
    250  1.1    nonaka 	.long	CODE_SEGMENT
    251  1.1    nonaka 	.align	16
    252  1.1    nonaka gdt:
    253  1.1    nonaka 	.long	0, 0
    254  1.1    nonaka 	.byte	0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00
    255  1.1    nonaka 	.byte	0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00
    256  1.1    nonaka gdtr:
    257  1.1    nonaka 	.word	gdtr - gdt
    258  1.1    nonaka gdtrr:
    259  1.1    nonaka 	.quad
    260  1.1    nonaka start32end:
    261  1.1    nonaka 	/* Space for the stack */
    262  1.1    nonaka 	.align	16
    263  1.1    nonaka 	.space	8192
    264  1.1    nonaka startprog64_end:
    265