Home | History | Annotate | Line # | Download | only in bootx64
      1 /*	$NetBSD: startprog64.S,v 1.5 2023/06/24 05:31:04 msaitoh Exp $	*/
      2 /*	NetBSD: startprog.S,v 1.3 2003/02/01 14:48:18 dsl Exp	*/
      3 
      4 /* starts program in protected mode / flat space
      5  with given stackframe
      6  needs global variables flatcodeseg and flatdataseg
      7  (gdt offsets)
      8   derived from: NetBSD:sys/arch/i386/boot/asm.S
      9  */
     10 
     11 /*
     12  * Ported to boot 386BSD by Julian Elischer (julian (at) tfs.com) Sept 1992
     13  *
     14  * Mach Operating System
     15  * Copyright (c) 1992, 1991 Carnegie Mellon University
     16  * All Rights Reserved.
     17  *
     18  * Permission to use, copy, modify and distribute this software and its
     19  * documentation is hereby granted, provided that both the copyright
     20  * notice and this permission notice appear in all copies of the
     21  * software, derivative works or modified versions, and any portions
     22  * thereof, and that both notices appear in supporting documentation.
     23  *
     24  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     25  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
     26  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     27  *
     28  * Carnegie Mellon requests users of this software to return to
     29  *
     30  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     31  *  School of Computer Science
     32  *  Carnegie Mellon University
     33  *  Pittsburgh PA 15213-3890
     34  *
     35  * any improvements or extensions that they make and grant Carnegie Mellon
     36  * the rights to redistribute these changes.
     37  */
     38 
     39 /*
     40   Copyright 1988, 1989, 1990, 1991, 1992
     41    by Intel Corporation, Santa Clara, California.
     42 
     43                 All Rights Reserved
     44 
     45 Permission to use, copy, modify, and distribute this software and
     46 its documentation for any purpose and without fee is hereby
     47 granted, provided that the above copyright notice appears in all
     48 copies and that both the copyright notice and this permission notice
     49 appear in supporting documentation, and that the name of Intel
     50 not be used in advertising or publicity pertaining to distribution
     51 of the software without specific, written prior permission.
     52 
     53 INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
     54 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
     55 IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
     56 CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
     57 LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
     58 NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
     59 WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     60 */
     61 
     62 #include <machine/asm.h>
     63 #include <machine/specialreg.h>
     64 
     65 #define	CODE_SEGMENT	0x08
     66 #define	DATA_SEGMENT	0x10
     67 
     68 	.align	16
     69 	.globl _C_LABEL(startprog64)
     70 _C_LABEL(startprog64):
     71 	.quad 0
     72 
     73 	.globl _C_LABEL(startprog64_size)
     74 _C_LABEL(startprog64_size):
     75 	.long startprog64_end - _C_LABEL(startprog64_start)
     76 
     77 	.text
     78 	.p2align 4,,15
     79 
     80 /*
     81  * startprog64(loaddr,entry,stack,kern_load,kern_start,kern_size)
     82  */
     83 ENTRY(startprog64_start)
     84 start:
     85 	/*
     86 	 * This function is to call the loaded kernel's start() with
     87 	 * 32bit segment mode from x64 mode.
     88 	 * %rdi: kernel start address
     89 	 * %rsi: loaded kernel address
     90 	 * %rdx: stack address
     91 	 * %rcx: loaded kernel size
     92 	 * %r8 : loaded start address
     93 	 * %r9 : kernel entry address
     94 	 */
     95 
     96 	cld		/* LynxOS depends on it */
     97 
     98 	cli
     99 
    100 	/* skip copy if same source and destination */
    101 	cmpq	%rdi,%rsi
    102 	jz	.Lcopy_done
    103 
    104 	/* Copy kernel */
    105 	mov	%rcx, %r12		/* original kernel size */
    106 	movq	%rdi, %r11		/* for misaligned check */
    107 
    108 #if !defined(NO_OVERLAP)
    109 	movq	%rdi, %r13
    110 	subq	%rsi, %r13
    111 #endif
    112 
    113 	shrq	$3, %rcx		/* count for copy by words */
    114 	jz	8f			/* j if less than 8 bytes */
    115 
    116 	lea	-8(%rdi, %r12), %r14	/* target address of last 8 */
    117 	mov	-8(%rsi, %r12), %r15	/* get last word */
    118 #if !defined(NO_OVERLAP)
    119 	cmpq	%r12, %r13		/* overlapping? */
    120 	jb	10f
    121 #endif
    122 
    123 /*
    124  * Non-overlaping, copy forwards.
    125  * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
    126  * if %ecx is more than 76.
    127  * AMD might do something similar some day.
    128  */
    129 	and	$7, %r11		/* destination misaligned ? */
    130 	jnz	2f
    131 	rep
    132 	movsq
    133 	mov	%r15, (%r14)		/* write last word */
    134 	jmp	.Lcopy_done
    135 
    136 /*
    137  * Destination misaligned
    138  * AMD say it is better to align the destination (not the source).
    139  * This will also re-align copies if the source and dest are both
    140  * misaligned by the same amount)
    141  * (I think Nehalem will use its accelerated copy if the source
    142  * and destination have the same alignment.)
    143  */
    144 2:
    145 	lea	-9(%r11, %r12), %rcx	/* post re-alignment count */
    146 	neg	%r11			/* now -1 .. -7 */
    147 	mov	(%rsi), %r12		/* get first word */
    148 	mov	%rdi, %r13		/* target for first word */
    149 	lea	8(%rsi, %r11), %rsi
    150 	lea	8(%rdi, %r11), %rdi
    151 	shr	$3, %rcx
    152 	rep
    153 	movsq
    154 	mov	%r12, (%r13)		/* write first word */
    155 	mov	%r15, (%r14)		/* write last word */
    156 	jmp	.Lcopy_done
    157 
    158 #if !defined(NO_OVERLAP)
    159 /* Must copy backwards.
    160  * Reverse copy is probably easy to code faster than 'rep movds'
    161  * since that requires (IIRC) an extra clock every 3 iterations (AMD).
    162  * However I don't suppose anything cares that much!
    163  * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
    164  * The copy is aligned with the buffer start (more likely to
    165  * be a multiple of 8 than the end).
    166  */
    167 10:
    168 	lea	-8(%rsi, %rcx, 8), %rsi
    169 	lea	-8(%rdi, %rcx, 8), %rdi
    170 	std
    171 	rep
    172 	movsq
    173 	cld
    174 	mov	%r15, (%r14)	/* write last bytes */
    175 	jmp	.Lcopy_done
    176 #endif
    177 
    178 /* Less than 8 bytes to copy, copy by bytes */
    179 /* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
    180  * For longer transfers it is 50+ !
    181  */
    182 8:	mov	%r12, %rcx
    183 
    184 #if !defined(NO_OVERLAP)
    185 	cmpq	%r12, %r13	/* overlapping? */
    186 	jb	81f
    187 #endif
    188 
    189 	/* nope, copy forwards. */
    190 	rep
    191 	movsb
    192 	jmp	.Lcopy_done
    193 
    194 #if !defined(NO_OVERLAP)
    195 /* Must copy backwards */
    196 81:
    197 	lea	-1(%rsi, %rcx), %rsi
    198 	lea	-1(%rdi, %rcx), %rdi
    199 	std
    200 	rep
    201 	movsb
    202 	cld
    203 #endif
    204 	/* End of copy kernel */
    205 .Lcopy_done:
    206 
    207 	mov	%r8, %rdi	/* %rdi: loaded start address */
    208 	mov	%r9, %rsi	/* %rsi: kernel entry address */
    209 
    210 	/* Prepare jump address */
    211 	lea	(start32a - start)(%rdi), %rax
    212 	movl	%eax, (start32r - start)(%rdi)
    213 
    214 	/* Setup GDT */
    215 	lea	(gdt - start)(%rdi), %rax
    216 	mov	%rax, (gdtrr - start)(%rdi)
    217 	lgdt	(gdtr - start)(%rdi)
    218 
    219 	/* Jump to set %cs */
    220 	ljmp	*(start32r - start)(%rdi)
    221 
    222 	.align	4
    223 	.code32
    224 start32a:
    225 	movl	$DATA_SEGMENT, %eax
    226 	movw	%ax, %ds
    227 	movw	%ax, %es
    228 	movw	%ax, %fs
    229 	movw	%ax, %gs
    230 	movw	%ax, %ss
    231 
    232 	movl	%edx, %esp
    233 
    234 	/* Disable Paging in CR0 */
    235 	movl	%cr0, %eax
    236 	andl	$(~CR0_PG), %eax
    237 	movl	%eax, %cr0
    238 
    239 	/* Disable PAE in CR4 */
    240 	movl	%cr4, %eax
    241 	andl	$(~CR4_PAE), %eax
    242 	movl	%eax, %cr4
    243 
    244 	jmp	start32b
    245 
    246 	.align	4
    247 start32b:
    248 	xor	%eax, %eax
    249 	call	*%esi
    250 
    251 	.align	16
    252 start32r:
    253 	.long	0
    254 	.long	CODE_SEGMENT
    255 	.align	16
    256 gdt:
    257 	.long	0, 0
    258 	.byte	0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00
    259 	.byte	0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00
    260 gdtr:
    261 	.word	gdtr - gdt
    262 gdtrr:
    263 	.quad
    264 start32end:
    265 	/* Space for the stack */
    266 	.align	16
    267 	.space	8192
    268 startprog64_end:
    269