Home | History | Annotate | Line # | Download | only in bootx64
startprog64.S revision 1.3.16.2
      1 /*	$NetBSD: startprog64.S,v 1.3.16.2 2017/12/03 11:36:19 jdolecek Exp $	*/
      2 /*	NetBSD: startprog.S,v 1.3 2003/02/01 14:48:18 dsl Exp	*/
      3 
      4 /* starts program in protected mode / flat space
      5  with given stackframe
      6  needs global variables flatcodeseg and flatdataseg
      7  (gdt offsets)
      8   derived from: NetBSD:sys/arch/i386/boot/asm.S
      9  */
     10 
     11 /*
     12  * Ported to boot 386BSD by Julian Elischer (julian (at) tfs.com) Sept 1992
     13  *
     14  * Mach Operating System
     15  * Copyright (c) 1992, 1991 Carnegie Mellon University
     16  * All Rights Reserved.
     17  *
     18  * Permission to use, copy, modify and distribute this software and its
     19  * documentation is hereby granted, provided that both the copyright
     20  * notice and this permission notice appear in all copies of the
     21  * software, derivative works or modified versions, and any portions
     22  * thereof, and that both notices appear in supporting documentation.
     23  *
     24  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     25  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
     26  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     27  *
     28  * Carnegie Mellon requests users of this software to return to
     29  *
     30  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     31  *  School of Computer Science
     32  *  Carnegie Mellon University
     33  *  Pittsburgh PA 15213-3890
     34  *
     35  * any improvements or extensions that they make and grant Carnegie Mellon
     36  * the rights to redistribute these changes.
     37  */
     38 
     39 /*
     40   Copyright 1988, 1989, 1990, 1991, 1992
     41    by Intel Corporation, Santa Clara, California.
     42 
     43                 All Rights Reserved
     44 
     45 Permission to use, copy, modify, and distribute this software and
     46 its documentation for any purpose and without fee is hereby
     47 granted, provided that the above copyright notice appears in all
     48 copies and that both the copyright notice and this permission notice
     49 appear in supporting documentation, and that the name of Intel
     50 not be used in advertising or publicity pertaining to distribution
     51 of the software without specific, written prior permission.
     52 
     53 INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
     54 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
     55 IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
     56 CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
     57 LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
     58 NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
     59 WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     60 */
     61 
     62 #include <machine/asm.h>
     63 #include <machine/specialreg.h>
     64 
     65 #define	CODE_SEGMENT	0x08
     66 #define	DATA_SEGMENT	0x10
     67 
     68 	.align	16
     69 	.globl _C_LABEL(startprog64)
     70 _C_LABEL(startprog64):
     71 	.quad 0
     72 
     73 	.globl _C_LABEL(startprog64_size)
     74 _C_LABEL(startprog64_size):
     75 	.long startprog64_end - _C_LABEL(startprog64_start)
     76 
     77 	.text
     78 	.p2align 4,,15
     79 
     80 /*
     81  * startprog64(loadddr,entry,stack,kern_load,kern_start,kern_size)
     82  */
     83 ENTRY(startprog64_start)
     84 start:
     85 	/*
     86 	 * This function is to call the loaded kernel's start() with
     87 	 * 32bit segment mode from x64 mode.
     88 	 * %rdi: kernel start address
     89 	 * %rsi: loaded kernel address
     90 	 * %rdx: stack address
     91 	 * %rcx: loaded kernel size
     92 	 * %r8 : loaded start address
     93 	 * %r9 : kernel entry address
     94 	 */
     95 
     96 	cld		/* LynxOS depends on it */
     97 
     98 	cli
     99 
    100 	/* Copy kernel */
    101 	mov	%rcx, %r12		/* original kernel size */
    102 	movq	%rdi, %r11		/* for misaligned check */
    103 
    104 #if !defined(NO_OVERLAP)
    105 	movq	%rdi, %r13
    106 	subq	%rsi, %r13
    107 #endif
    108 
    109 	shrq	$3, %rcx		/* count for copy by words */
    110 	jz	8f			/* j if less than 8 bytes */
    111 
    112 	lea	-8(%rdi, %r12), %r14	/* target address of last 8 */
    113 	mov	-8(%rsi, %r12), %r15	/* get last word */
    114 #if !defined(NO_OVERLAP)
    115 	cmpq	%r12, %r13		/* overlapping? */
    116 	jb	10f
    117 #endif
    118 
    119 /*
    120  * Non-overlaping, copy forwards.
    121  * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
    122  * if %ecx is more than 76.
    123  * AMD might do something similar some day.
    124  */
    125 	and	$7, %r11		/* destination misaligned ? */
    126 	jnz	2f
    127 	rep
    128 	movsq
    129 	mov	%r15, (%r14)		/* write last word */
    130 	jmp	.Lcopy_done
    131 
    132 /*
    133  * Destination misaligned
    134  * AMD say it is better to align the destination (not the source).
    135  * This will also re-align copies if the source and dest are both
    136  * misaligned by the same amount)
    137  * (I think Nehalem will use its accelerated copy if the source
    138  * and destination have the same alignment.)
    139  */
    140 2:
    141 	lea	-9(%r11, %r12), %rcx	/* post re-alignment count */
    142 	neg	%r11			/* now -1 .. -7 */
    143 	mov	(%rsi), %r12		/* get first word */
    144 	mov	%rdi, %r13		/* target for first word */
    145 	lea	8(%rsi, %r11), %rsi
    146 	lea	8(%rdi, %r11), %rdi
    147 	shr	$3, %rcx
    148 	rep
    149 	movsq
    150 	mov	%r12, (%r13)		/* write first word */
    151 	mov	%r15, (%r14)		/* write last word */
    152 	jmp	.Lcopy_done
    153 
    154 #if !defined(NO_OVERLAP)
    155 /* Must copy backwards.
    156  * Reverse copy is probably easy to code faster than 'rep movds'
    157  * since that requires (IIRC) an extra clock every 3 iterations (AMD).
    158  * However I don't suppose anything cares that much!
    159  * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
    160  * The copy is aligned with the buffer start (more likely to
    161  * be a multiple of 8 than the end).
    162  */
    163 10:
    164 	lea	-8(%rsi, %rcx, 8), %rsi
    165 	lea	-8(%rdi, %rcx, 8), %rdi
    166 	std
    167 	rep
    168 	movsq
    169 	cld
    170 	mov	%r15, (%r14)	/* write last bytes */
    171 	jmp	.Lcopy_done
    172 #endif
    173 
    174 /* Less than 8 bytes to copy, copy by bytes */
    175 /* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
    176  * For longer transfers it is 50+ !
    177  */
    178 8:	mov	%r12, %rcx
    179 
    180 #if !defined(NO_OVERLAP)
    181 	cmpq	%r12, %r13	/* overlapping? */
    182 	jb	81f
    183 #endif
    184 
    185 	/* nope, copy forwards. */
    186 	rep
    187 	movsb
    188 	jmp	.Lcopy_done
    189 
    190 #if !defined(NO_OVERLAP)
    191 /* Must copy backwards */
    192 81:
    193 	lea	-1(%rsi, %rcx), %rsi
    194 	lea	-1(%rdi, %rcx), %rdi
    195 	std
    196 	rep
    197 	movsb
    198 	cld
    199 #endif
    200 	/* End of copy kernel */
    201 .Lcopy_done:
    202 
    203 	mov	%r8, %rdi	/* %rdi: loaded start address */
    204 	mov	%r9, %rsi	/* %rsi: kernel entry address */
    205 
    206 	/* Prepare jump address */
    207 	lea	(start32a - start)(%rdi), %rax
    208 	movl	%eax, (start32r - start)(%rdi)
    209 
    210 	/* Setup GDT */
    211 	lea	(gdt - start)(%rdi), %rax
    212 	mov	%rax, (gdtrr - start)(%rdi)
    213 	lgdt	(gdtr - start)(%rdi)
    214 
    215 	/* Jump to set %cs */
    216 	ljmp	*(start32r - start)(%rdi)
    217 
    218 	.align	4
    219 	.code32
    220 start32a:
    221 	movl	$DATA_SEGMENT, %eax
    222 	movw	%ax, %ds
    223 	movw	%ax, %es
    224 	movw	%ax, %fs
    225 	movw	%ax, %gs
    226 	movw	%ax, %ss
    227 
    228 	movl	%edx, %esp
    229 
    230 	/* Disable Paging in CR0 */
    231 	movl	%cr0, %eax
    232 	andl	$(~CR0_PG), %eax
    233 	movl	%eax, %cr0
    234 
    235 	/* Disable PAE in CR4 */
    236 	movl	%cr4, %eax
    237 	andl	$(~CR4_PAE), %eax
    238 	movl	%eax, %cr4
    239 
    240 	jmp	start32b
    241 
    242 	.align	4
    243 start32b:
    244 	xor	%eax, %eax
    245 	call	*%esi
    246 
    247 	.align	16
    248 start32r:
    249 	.long	0
    250 	.long	CODE_SEGMENT
    251 	.align	16
    252 gdt:
    253 	.long	0, 0
    254 	.byte	0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00
    255 	.byte	0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00
    256 gdtr:
    257 	.word	gdtr - gdt
    258 gdtrr:
    259 	.quad
    260 start32end:
    261 	/* Space for the stack */
    262 	.align	16
    263 	.space	8192
    264 startprog64_end:
    265