startprog64.S revision 1.2.2.3 1 1.2.2.3 skrll /* $NetBSD: startprog64.S,v 1.2.2.3 2017/08/28 17:51:41 skrll Exp $ */
2 1.2.2.2 skrll /* NetBSD: startprog.S,v 1.3 2003/02/01 14:48:18 dsl Exp */
3 1.2.2.2 skrll
4 1.2.2.2 skrll /* starts program in protected mode / flat space
5 1.2.2.2 skrll with given stackframe
6 1.2.2.2 skrll needs global variables flatcodeseg and flatdataseg
7 1.2.2.2 skrll (gdt offsets)
8 1.2.2.2 skrll derived from: NetBSD:sys/arch/i386/boot/asm.S
9 1.2.2.2 skrll */
10 1.2.2.2 skrll
11 1.2.2.2 skrll /*
12 1.2.2.2 skrll * Ported to boot 386BSD by Julian Elischer (julian (at) tfs.com) Sept 1992
13 1.2.2.2 skrll *
14 1.2.2.2 skrll * Mach Operating System
15 1.2.2.2 skrll * Copyright (c) 1992, 1991 Carnegie Mellon University
16 1.2.2.2 skrll * All Rights Reserved.
17 1.2.2.2 skrll *
18 1.2.2.2 skrll * Permission to use, copy, modify and distribute this software and its
19 1.2.2.2 skrll * documentation is hereby granted, provided that both the copyright
20 1.2.2.2 skrll * notice and this permission notice appear in all copies of the
21 1.2.2.2 skrll * software, derivative works or modified versions, and any portions
22 1.2.2.2 skrll * thereof, and that both notices appear in supporting documentation.
23 1.2.2.2 skrll *
24 1.2.2.2 skrll * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
25 1.2.2.2 skrll * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
26 1.2.2.2 skrll * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
27 1.2.2.2 skrll *
28 1.2.2.2 skrll * Carnegie Mellon requests users of this software to return to
29 1.2.2.2 skrll *
30 1.2.2.2 skrll * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
31 1.2.2.2 skrll * School of Computer Science
32 1.2.2.2 skrll * Carnegie Mellon University
33 1.2.2.2 skrll * Pittsburgh PA 15213-3890
34 1.2.2.2 skrll *
35 1.2.2.2 skrll * any improvements or extensions that they make and grant Carnegie Mellon
36 1.2.2.2 skrll * the rights to redistribute these changes.
37 1.2.2.2 skrll */
38 1.2.2.2 skrll
39 1.2.2.2 skrll /*
40 1.2.2.2 skrll Copyright 1988, 1989, 1990, 1991, 1992
41 1.2.2.2 skrll by Intel Corporation, Santa Clara, California.
42 1.2.2.2 skrll
43 1.2.2.2 skrll All Rights Reserved
44 1.2.2.2 skrll
45 1.2.2.2 skrll Permission to use, copy, modify, and distribute this software and
46 1.2.2.2 skrll its documentation for any purpose and without fee is hereby
47 1.2.2.2 skrll granted, provided that the above copyright notice appears in all
48 1.2.2.2 skrll copies and that both the copyright notice and this permission notice
49 1.2.2.2 skrll appear in supporting documentation, and that the name of Intel
50 1.2.2.2 skrll not be used in advertising or publicity pertaining to distribution
51 1.2.2.2 skrll of the software without specific, written prior permission.
52 1.2.2.2 skrll
53 1.2.2.2 skrll INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
54 1.2.2.2 skrll INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
55 1.2.2.2 skrll IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
56 1.2.2.2 skrll CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
57 1.2.2.2 skrll LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
58 1.2.2.2 skrll NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
59 1.2.2.2 skrll WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
60 1.2.2.2 skrll */
61 1.2.2.2 skrll
62 1.2.2.2 skrll #include <machine/asm.h>
63 1.2.2.2 skrll #include <machine/specialreg.h>
64 1.2.2.2 skrll
65 1.2.2.2 skrll #define CODE_SEGMENT 0x08
66 1.2.2.2 skrll #define DATA_SEGMENT 0x10
67 1.2.2.2 skrll
68 1.2.2.2 skrll .align 16
69 1.2.2.2 skrll .globl _C_LABEL(startprog64)
70 1.2.2.2 skrll _C_LABEL(startprog64):
71 1.2.2.2 skrll .quad 0
72 1.2.2.2 skrll
73 1.2.2.2 skrll .globl _C_LABEL(startprog64_size)
74 1.2.2.2 skrll _C_LABEL(startprog64_size):
75 1.2.2.2 skrll .long startprog64_end - _C_LABEL(startprog64_start)
76 1.2.2.2 skrll
77 1.2.2.2 skrll .text
78 1.2.2.2 skrll .p2align 4,,15
79 1.2.2.2 skrll
80 1.2.2.2 skrll /*
81 1.2.2.3 skrll * startprog64(loadddr,entry,stack,kern_load,kern_start,kern_size)
82 1.2.2.2 skrll */
83 1.2.2.2 skrll ENTRY(startprog64_start)
84 1.2.2.2 skrll start:
85 1.2.2.2 skrll /*
86 1.2.2.2 skrll * This function is to call the loaded kernel's start() with
87 1.2.2.2 skrll * 32bit segment mode from x64 mode.
88 1.2.2.3 skrll * %rdi: kernel start address
89 1.2.2.3 skrll * %rsi: loaded kernel address
90 1.2.2.2 skrll * %rdx: stack address
91 1.2.2.3 skrll * %rcx: loaded kernel size
92 1.2.2.3 skrll * %r8 : loaded start address
93 1.2.2.3 skrll * %r9 : kernel entry address
94 1.2.2.2 skrll */
95 1.2.2.2 skrll
96 1.2.2.3 skrll cld /* LynxOS depends on it */
97 1.2.2.3 skrll
98 1.2.2.3 skrll cli
99 1.2.2.3 skrll
100 1.2.2.3 skrll /* Copy kernel */
101 1.2.2.3 skrll mov %rcx, %r12 /* original kernel size */
102 1.2.2.3 skrll movq %rdi, %r11 /* for misaligned check */
103 1.2.2.3 skrll
104 1.2.2.3 skrll #if !defined(NO_OVERLAP)
105 1.2.2.3 skrll movq %rdi, %r13
106 1.2.2.3 skrll subq %rsi, %r13
107 1.2.2.3 skrll #endif
108 1.2.2.3 skrll
109 1.2.2.3 skrll shrq $3, %rcx /* count for copy by words */
110 1.2.2.3 skrll jz 8f /* j if less than 8 bytes */
111 1.2.2.3 skrll
112 1.2.2.3 skrll lea -8(%rdi, %r12), %r14 /* target address of last 8 */
113 1.2.2.3 skrll mov -8(%rsi, %r12), %r15 /* get last word */
114 1.2.2.3 skrll #if !defined(NO_OVERLAP)
115 1.2.2.3 skrll cmpq %r12, %r13 /* overlapping? */
116 1.2.2.3 skrll jb 10f
117 1.2.2.3 skrll #endif
118 1.2.2.3 skrll
119 1.2.2.3 skrll /*
120 1.2.2.3 skrll * Non-overlaping, copy forwards.
121 1.2.2.3 skrll * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
122 1.2.2.3 skrll * if %ecx is more than 76.
123 1.2.2.3 skrll * AMD might do something similar some day.
124 1.2.2.3 skrll */
125 1.2.2.3 skrll and $7, %r11 /* destination misaligned ? */
126 1.2.2.3 skrll jnz 2f
127 1.2.2.3 skrll rep
128 1.2.2.3 skrll movsq
129 1.2.2.3 skrll mov %r15, (%r14) /* write last word */
130 1.2.2.3 skrll jmp .Lcopy_done
131 1.2.2.3 skrll
132 1.2.2.3 skrll /*
133 1.2.2.3 skrll * Destination misaligned
134 1.2.2.3 skrll * AMD say it is better to align the destination (not the source).
135 1.2.2.3 skrll * This will also re-align copies if the source and dest are both
136 1.2.2.3 skrll * misaligned by the same amount)
137 1.2.2.3 skrll * (I think Nehalem will use its accelerated copy if the source
138 1.2.2.3 skrll * and destination have the same alignment.)
139 1.2.2.3 skrll */
140 1.2.2.3 skrll 2:
141 1.2.2.3 skrll lea -9(%r11, %r12), %rcx /* post re-alignment count */
142 1.2.2.3 skrll neg %r11 /* now -1 .. -7 */
143 1.2.2.3 skrll mov (%rsi), %r12 /* get first word */
144 1.2.2.3 skrll mov %rdi, %r13 /* target for first word */
145 1.2.2.3 skrll lea 8(%rsi, %r11), %rsi
146 1.2.2.3 skrll lea 8(%rdi, %r11), %rdi
147 1.2.2.3 skrll shr $3, %rcx
148 1.2.2.3 skrll rep
149 1.2.2.3 skrll movsq
150 1.2.2.3 skrll mov %r12, (%r13) /* write first word */
151 1.2.2.3 skrll mov %r15, (%r14) /* write last word */
152 1.2.2.3 skrll jmp .Lcopy_done
153 1.2.2.3 skrll
154 1.2.2.3 skrll #if !defined(NO_OVERLAP)
155 1.2.2.3 skrll /* Must copy backwards.
156 1.2.2.3 skrll * Reverse copy is probably easy to code faster than 'rep movds'
157 1.2.2.3 skrll * since that requires (IIRC) an extra clock every 3 iterations (AMD).
158 1.2.2.3 skrll * However I don't suppose anything cares that much!
159 1.2.2.3 skrll * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
160 1.2.2.3 skrll * The copy is aligned with the buffer start (more likely to
161 1.2.2.3 skrll * be a multiple of 8 than the end).
162 1.2.2.3 skrll */
163 1.2.2.3 skrll 10:
164 1.2.2.3 skrll lea -8(%rsi, %rcx, 8), %rsi
165 1.2.2.3 skrll lea -8(%rdi, %rcx, 8), %rdi
166 1.2.2.3 skrll std
167 1.2.2.3 skrll rep
168 1.2.2.3 skrll movsq
169 1.2.2.3 skrll cld
170 1.2.2.3 skrll mov %r15, (%r14) /* write last bytes */
171 1.2.2.3 skrll jmp .Lcopy_done
172 1.2.2.3 skrll #endif
173 1.2.2.3 skrll
174 1.2.2.3 skrll /* Less than 8 bytes to copy, copy by bytes */
175 1.2.2.3 skrll /* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
176 1.2.2.3 skrll * For longer transfers it is 50+ !
177 1.2.2.3 skrll */
178 1.2.2.3 skrll 8: mov %r12, %rcx
179 1.2.2.3 skrll
180 1.2.2.3 skrll #if !defined(NO_OVERLAP)
181 1.2.2.3 skrll cmpq %r12, %r13 /* overlapping? */
182 1.2.2.3 skrll jb 81f
183 1.2.2.3 skrll #endif
184 1.2.2.3 skrll
185 1.2.2.3 skrll /* nope, copy forwards. */
186 1.2.2.3 skrll rep
187 1.2.2.3 skrll movsb
188 1.2.2.3 skrll jmp .Lcopy_done
189 1.2.2.3 skrll
190 1.2.2.3 skrll #if !defined(NO_OVERLAP)
191 1.2.2.3 skrll /* Must copy backwards */
192 1.2.2.3 skrll 81:
193 1.2.2.3 skrll lea -1(%rsi, %rcx), %rsi
194 1.2.2.3 skrll lea -1(%rdi, %rcx), %rdi
195 1.2.2.3 skrll std
196 1.2.2.3 skrll rep
197 1.2.2.3 skrll movsb
198 1.2.2.3 skrll cld
199 1.2.2.3 skrll #endif
200 1.2.2.3 skrll /* End of copy kernel */
201 1.2.2.3 skrll .Lcopy_done:
202 1.2.2.3 skrll
203 1.2.2.3 skrll mov %r8, %rdi /* %rdi: loaded start address */
204 1.2.2.3 skrll mov %r9, %rsi /* %rsi: kernel entry address */
205 1.2.2.2 skrll
206 1.2.2.2 skrll /* Prepare jump address */
207 1.2.2.2 skrll lea (start32a - start)(%rdi), %rax
208 1.2.2.2 skrll movl %eax, (start32r - start)(%rdi)
209 1.2.2.2 skrll
210 1.2.2.2 skrll /* Setup GDT */
211 1.2.2.2 skrll lea (gdt - start)(%rdi), %rax
212 1.2.2.2 skrll mov %rax, (gdtrr - start)(%rdi)
213 1.2.2.2 skrll lgdt (gdtr - start)(%rdi)
214 1.2.2.2 skrll
215 1.2.2.2 skrll /* Jump to set %cs */
216 1.2.2.2 skrll ljmp *(start32r - start)(%rdi)
217 1.2.2.2 skrll
218 1.2.2.2 skrll .align 4
219 1.2.2.2 skrll .code32
220 1.2.2.2 skrll start32a:
221 1.2.2.2 skrll movl $DATA_SEGMENT, %eax
222 1.2.2.3 skrll movw %ax, %ds
223 1.2.2.3 skrll movw %ax, %es
224 1.2.2.3 skrll movw %ax, %fs
225 1.2.2.3 skrll movw %ax, %gs
226 1.2.2.3 skrll movw %ax, %ss
227 1.2.2.2 skrll
228 1.2.2.2 skrll movl %edx, %esp
229 1.2.2.2 skrll
230 1.2.2.2 skrll /* Disable Paging in CR0 */
231 1.2.2.2 skrll movl %cr0, %eax
232 1.2.2.2 skrll andl $(~CR0_PG), %eax
233 1.2.2.2 skrll movl %eax, %cr0
234 1.2.2.2 skrll
235 1.2.2.2 skrll /* Disable PAE in CR4 */
236 1.2.2.2 skrll movl %cr4, %eax
237 1.2.2.2 skrll andl $(~CR4_PAE), %eax
238 1.2.2.2 skrll movl %eax, %cr4
239 1.2.2.2 skrll
240 1.2.2.2 skrll jmp start32b
241 1.2.2.2 skrll
242 1.2.2.2 skrll .align 4
243 1.2.2.2 skrll start32b:
244 1.2.2.2 skrll xor %eax, %eax
245 1.2.2.2 skrll call *%esi
246 1.2.2.2 skrll
247 1.2.2.2 skrll .align 16
248 1.2.2.2 skrll start32r:
249 1.2.2.2 skrll .long 0
250 1.2.2.2 skrll .long CODE_SEGMENT
251 1.2.2.2 skrll .align 16
252 1.2.2.2 skrll gdt:
253 1.2.2.2 skrll .long 0, 0
254 1.2.2.2 skrll .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00
255 1.2.2.2 skrll .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00
256 1.2.2.2 skrll gdtr:
257 1.2.2.2 skrll .word gdtr - gdt
258 1.2.2.2 skrll gdtrr:
259 1.2.2.2 skrll .quad
260 1.2.2.2 skrll start32end:
261 1.2.2.2 skrll /* Space for the stack */
262 1.2.2.2 skrll .align 16
263 1.2.2.2 skrll .space 8192
264 1.2.2.2 skrll startprog64_end:
265