startprog64.S revision 1.3.2.2 1 1.3.2.2 pgoyette /* $NetBSD: startprog64.S,v 1.3.2.2 2017/03/20 06:57:15 pgoyette Exp $ */
2 1.3.2.2 pgoyette /* NetBSD: startprog.S,v 1.3 2003/02/01 14:48:18 dsl Exp */
3 1.3.2.2 pgoyette
4 1.3.2.2 pgoyette /* starts program in protected mode / flat space
5 1.3.2.2 pgoyette with given stackframe
6 1.3.2.2 pgoyette needs global variables flatcodeseg and flatdataseg
7 1.3.2.2 pgoyette (gdt offsets)
8 1.3.2.2 pgoyette derived from: NetBSD:sys/arch/i386/boot/asm.S
9 1.3.2.2 pgoyette */
10 1.3.2.2 pgoyette
11 1.3.2.2 pgoyette /*
12 1.3.2.2 pgoyette * Ported to boot 386BSD by Julian Elischer (julian (at) tfs.com) Sept 1992
13 1.3.2.2 pgoyette *
14 1.3.2.2 pgoyette * Mach Operating System
15 1.3.2.2 pgoyette * Copyright (c) 1992, 1991 Carnegie Mellon University
16 1.3.2.2 pgoyette * All Rights Reserved.
17 1.3.2.2 pgoyette *
18 1.3.2.2 pgoyette * Permission to use, copy, modify and distribute this software and its
19 1.3.2.2 pgoyette * documentation is hereby granted, provided that both the copyright
20 1.3.2.2 pgoyette * notice and this permission notice appear in all copies of the
21 1.3.2.2 pgoyette * software, derivative works or modified versions, and any portions
22 1.3.2.2 pgoyette * thereof, and that both notices appear in supporting documentation.
23 1.3.2.2 pgoyette *
24 1.3.2.2 pgoyette * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
25 1.3.2.2 pgoyette * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
26 1.3.2.2 pgoyette * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
27 1.3.2.2 pgoyette *
28 1.3.2.2 pgoyette * Carnegie Mellon requests users of this software to return to
29 1.3.2.2 pgoyette *
30 1.3.2.2 pgoyette * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
31 1.3.2.2 pgoyette * School of Computer Science
32 1.3.2.2 pgoyette * Carnegie Mellon University
33 1.3.2.2 pgoyette * Pittsburgh PA 15213-3890
34 1.3.2.2 pgoyette *
35 1.3.2.2 pgoyette * any improvements or extensions that they make and grant Carnegie Mellon
36 1.3.2.2 pgoyette * the rights to redistribute these changes.
37 1.3.2.2 pgoyette */
38 1.3.2.2 pgoyette
39 1.3.2.2 pgoyette /*
40 1.3.2.2 pgoyette Copyright 1988, 1989, 1990, 1991, 1992
41 1.3.2.2 pgoyette by Intel Corporation, Santa Clara, California.
42 1.3.2.2 pgoyette
43 1.3.2.2 pgoyette All Rights Reserved
44 1.3.2.2 pgoyette
45 1.3.2.2 pgoyette Permission to use, copy, modify, and distribute this software and
46 1.3.2.2 pgoyette its documentation for any purpose and without fee is hereby
47 1.3.2.2 pgoyette granted, provided that the above copyright notice appears in all
48 1.3.2.2 pgoyette copies and that both the copyright notice and this permission notice
49 1.3.2.2 pgoyette appear in supporting documentation, and that the name of Intel
50 1.3.2.2 pgoyette not be used in advertising or publicity pertaining to distribution
51 1.3.2.2 pgoyette of the software without specific, written prior permission.
52 1.3.2.2 pgoyette
53 1.3.2.2 pgoyette INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
54 1.3.2.2 pgoyette INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
55 1.3.2.2 pgoyette IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
56 1.3.2.2 pgoyette CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
57 1.3.2.2 pgoyette LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
58 1.3.2.2 pgoyette NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
59 1.3.2.2 pgoyette WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
60 1.3.2.2 pgoyette */
61 1.3.2.2 pgoyette
62 1.3.2.2 pgoyette #include <machine/asm.h>
63 1.3.2.2 pgoyette #include <machine/specialreg.h>
64 1.3.2.2 pgoyette
65 1.3.2.2 pgoyette #define CODE_SEGMENT 0x08
66 1.3.2.2 pgoyette #define DATA_SEGMENT 0x10
67 1.3.2.2 pgoyette
68 1.3.2.2 pgoyette .align 16
69 1.3.2.2 pgoyette .globl _C_LABEL(startprog64)
70 1.3.2.2 pgoyette _C_LABEL(startprog64):
71 1.3.2.2 pgoyette .quad 0
72 1.3.2.2 pgoyette
73 1.3.2.2 pgoyette .globl _C_LABEL(startprog64_size)
74 1.3.2.2 pgoyette _C_LABEL(startprog64_size):
75 1.3.2.2 pgoyette .long startprog64_end - _C_LABEL(startprog64_start)
76 1.3.2.2 pgoyette
77 1.3.2.2 pgoyette .text
78 1.3.2.2 pgoyette .p2align 4,,15
79 1.3.2.2 pgoyette
80 1.3.2.2 pgoyette /*
81 1.3.2.2 pgoyette * startprog64(loadddr,entry,stack,kern_load,kern_start,kern_size)
82 1.3.2.2 pgoyette */
83 1.3.2.2 pgoyette ENTRY(startprog64_start)
84 1.3.2.2 pgoyette start:
85 1.3.2.2 pgoyette /*
86 1.3.2.2 pgoyette * This function is to call the loaded kernel's start() with
87 1.3.2.2 pgoyette * 32bit segment mode from x64 mode.
88 1.3.2.2 pgoyette * %rdi: kernel start address
89 1.3.2.2 pgoyette * %rsi: loaded kernel address
90 1.3.2.2 pgoyette * %rdx: stack address
91 1.3.2.2 pgoyette * %rcx: loaded kernel size
92 1.3.2.2 pgoyette * %r8 : loaded start address
93 1.3.2.2 pgoyette * %r9 : kernel entry address
94 1.3.2.2 pgoyette */
95 1.3.2.2 pgoyette
96 1.3.2.2 pgoyette cld /* LynxOS depends on it */
97 1.3.2.2 pgoyette
98 1.3.2.2 pgoyette cli
99 1.3.2.2 pgoyette
100 1.3.2.2 pgoyette /* Copy kernel */
101 1.3.2.2 pgoyette mov %rcx, %r12 /* original kernel size */
102 1.3.2.2 pgoyette movq %rdi, %r11 /* for misaligned check */
103 1.3.2.2 pgoyette
104 1.3.2.2 pgoyette #if !defined(NO_OVERLAP)
105 1.3.2.2 pgoyette movq %rdi, %r13
106 1.3.2.2 pgoyette subq %rsi, %r13
107 1.3.2.2 pgoyette #endif
108 1.3.2.2 pgoyette
109 1.3.2.2 pgoyette shrq $3, %rcx /* count for copy by words */
110 1.3.2.2 pgoyette jz 8f /* j if less than 8 bytes */
111 1.3.2.2 pgoyette
112 1.3.2.2 pgoyette lea -8(%rdi, %r12), %r14 /* target address of last 8 */
113 1.3.2.2 pgoyette mov -8(%rsi, %r12), %r15 /* get last word */
114 1.3.2.2 pgoyette #if !defined(NO_OVERLAP)
115 1.3.2.2 pgoyette cmpq %r12, %r13 /* overlapping? */
116 1.3.2.2 pgoyette jb 10f
117 1.3.2.2 pgoyette #endif
118 1.3.2.2 pgoyette
119 1.3.2.2 pgoyette /*
120 1.3.2.2 pgoyette * Non-overlaping, copy forwards.
121 1.3.2.2 pgoyette * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
122 1.3.2.2 pgoyette * if %ecx is more than 76.
123 1.3.2.2 pgoyette * AMD might do something similar some day.
124 1.3.2.2 pgoyette */
125 1.3.2.2 pgoyette and $7, %r11 /* destination misaligned ? */
126 1.3.2.2 pgoyette jnz 2f
127 1.3.2.2 pgoyette rep
128 1.3.2.2 pgoyette movsq
129 1.3.2.2 pgoyette mov %r15, (%r14) /* write last word */
130 1.3.2.2 pgoyette jmp .Lcopy_done
131 1.3.2.2 pgoyette
132 1.3.2.2 pgoyette /*
133 1.3.2.2 pgoyette * Destination misaligned
134 1.3.2.2 pgoyette * AMD say it is better to align the destination (not the source).
135 1.3.2.2 pgoyette * This will also re-align copies if the source and dest are both
136 1.3.2.2 pgoyette * misaligned by the same amount)
137 1.3.2.2 pgoyette * (I think Nehalem will use its accelerated copy if the source
138 1.3.2.2 pgoyette * and destination have the same alignment.)
139 1.3.2.2 pgoyette */
140 1.3.2.2 pgoyette 2:
141 1.3.2.2 pgoyette lea -9(%r11, %r12), %rcx /* post re-alignment count */
142 1.3.2.2 pgoyette neg %r11 /* now -1 .. -7 */
143 1.3.2.2 pgoyette mov (%rsi), %r12 /* get first word */
144 1.3.2.2 pgoyette mov %rdi, %r13 /* target for first word */
145 1.3.2.2 pgoyette lea 8(%rsi, %r11), %rsi
146 1.3.2.2 pgoyette lea 8(%rdi, %r11), %rdi
147 1.3.2.2 pgoyette shr $3, %rcx
148 1.3.2.2 pgoyette rep
149 1.3.2.2 pgoyette movsq
150 1.3.2.2 pgoyette mov %r12, (%r13) /* write first word */
151 1.3.2.2 pgoyette mov %r15, (%r14) /* write last word */
152 1.3.2.2 pgoyette jmp .Lcopy_done
153 1.3.2.2 pgoyette
154 1.3.2.2 pgoyette #if !defined(NO_OVERLAP)
155 1.3.2.2 pgoyette /* Must copy backwards.
156 1.3.2.2 pgoyette * Reverse copy is probably easy to code faster than 'rep movds'
157 1.3.2.2 pgoyette * since that requires (IIRC) an extra clock every 3 iterations (AMD).
158 1.3.2.2 pgoyette * However I don't suppose anything cares that much!
159 1.3.2.2 pgoyette * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
160 1.3.2.2 pgoyette * The copy is aligned with the buffer start (more likely to
161 1.3.2.2 pgoyette * be a multiple of 8 than the end).
162 1.3.2.2 pgoyette */
163 1.3.2.2 pgoyette 10:
164 1.3.2.2 pgoyette lea -8(%rsi, %rcx, 8), %rsi
165 1.3.2.2 pgoyette lea -8(%rdi, %rcx, 8), %rdi
166 1.3.2.2 pgoyette std
167 1.3.2.2 pgoyette rep
168 1.3.2.2 pgoyette movsq
169 1.3.2.2 pgoyette cld
170 1.3.2.2 pgoyette mov %r15, (%r14) /* write last bytes */
171 1.3.2.2 pgoyette jmp .Lcopy_done
172 1.3.2.2 pgoyette #endif
173 1.3.2.2 pgoyette
174 1.3.2.2 pgoyette /* Less than 8 bytes to copy, copy by bytes */
175 1.3.2.2 pgoyette /* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
176 1.3.2.2 pgoyette * For longer transfers it is 50+ !
177 1.3.2.2 pgoyette */
178 1.3.2.2 pgoyette 8: mov %r12, %rcx
179 1.3.2.2 pgoyette
180 1.3.2.2 pgoyette #if !defined(NO_OVERLAP)
181 1.3.2.2 pgoyette cmpq %r12, %r13 /* overlapping? */
182 1.3.2.2 pgoyette jb 81f
183 1.3.2.2 pgoyette #endif
184 1.3.2.2 pgoyette
185 1.3.2.2 pgoyette /* nope, copy forwards. */
186 1.3.2.2 pgoyette rep
187 1.3.2.2 pgoyette movsb
188 1.3.2.2 pgoyette jmp .Lcopy_done
189 1.3.2.2 pgoyette
190 1.3.2.2 pgoyette #if !defined(NO_OVERLAP)
191 1.3.2.2 pgoyette /* Must copy backwards */
192 1.3.2.2 pgoyette 81:
193 1.3.2.2 pgoyette lea -1(%rsi, %rcx), %rsi
194 1.3.2.2 pgoyette lea -1(%rdi, %rcx), %rdi
195 1.3.2.2 pgoyette std
196 1.3.2.2 pgoyette rep
197 1.3.2.2 pgoyette movsb
198 1.3.2.2 pgoyette cld
199 1.3.2.2 pgoyette #endif
200 1.3.2.2 pgoyette /* End of copy kernel */
201 1.3.2.2 pgoyette .Lcopy_done:
202 1.3.2.2 pgoyette
203 1.3.2.2 pgoyette mov %r8, %rdi /* %rdi: loaded start address */
204 1.3.2.2 pgoyette mov %r9, %rsi /* %rsi: kernel entry address */
205 1.3.2.2 pgoyette
206 1.3.2.2 pgoyette /* Prepare jump address */
207 1.3.2.2 pgoyette lea (start32a - start)(%rdi), %rax
208 1.3.2.2 pgoyette movl %eax, (start32r - start)(%rdi)
209 1.3.2.2 pgoyette
210 1.3.2.2 pgoyette /* Setup GDT */
211 1.3.2.2 pgoyette lea (gdt - start)(%rdi), %rax
212 1.3.2.2 pgoyette mov %rax, (gdtrr - start)(%rdi)
213 1.3.2.2 pgoyette lgdt (gdtr - start)(%rdi)
214 1.3.2.2 pgoyette
215 1.3.2.2 pgoyette /* Jump to set %cs */
216 1.3.2.2 pgoyette ljmp *(start32r - start)(%rdi)
217 1.3.2.2 pgoyette
218 1.3.2.2 pgoyette .align 4
219 1.3.2.2 pgoyette .code32
220 1.3.2.2 pgoyette start32a:
221 1.3.2.2 pgoyette movl $DATA_SEGMENT, %eax
222 1.3.2.2 pgoyette movw %ax, %ds
223 1.3.2.2 pgoyette movw %ax, %es
224 1.3.2.2 pgoyette movw %ax, %fs
225 1.3.2.2 pgoyette movw %ax, %gs
226 1.3.2.2 pgoyette movw %ax, %ss
227 1.3.2.2 pgoyette
228 1.3.2.2 pgoyette movl %edx, %esp
229 1.3.2.2 pgoyette
230 1.3.2.2 pgoyette /* Disable Paging in CR0 */
231 1.3.2.2 pgoyette movl %cr0, %eax
232 1.3.2.2 pgoyette andl $(~CR0_PG), %eax
233 1.3.2.2 pgoyette movl %eax, %cr0
234 1.3.2.2 pgoyette
235 1.3.2.2 pgoyette /* Disable PAE in CR4 */
236 1.3.2.2 pgoyette movl %cr4, %eax
237 1.3.2.2 pgoyette andl $(~CR4_PAE), %eax
238 1.3.2.2 pgoyette movl %eax, %cr4
239 1.3.2.2 pgoyette
240 1.3.2.2 pgoyette jmp start32b
241 1.3.2.2 pgoyette
242 1.3.2.2 pgoyette .align 4
243 1.3.2.2 pgoyette start32b:
244 1.3.2.2 pgoyette xor %eax, %eax
245 1.3.2.2 pgoyette call *%esi
246 1.3.2.2 pgoyette
247 1.3.2.2 pgoyette .align 16
248 1.3.2.2 pgoyette start32r:
249 1.3.2.2 pgoyette .long 0
250 1.3.2.2 pgoyette .long CODE_SEGMENT
251 1.3.2.2 pgoyette .align 16
252 1.3.2.2 pgoyette gdt:
253 1.3.2.2 pgoyette .long 0, 0
254 1.3.2.2 pgoyette .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00
255 1.3.2.2 pgoyette .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00
256 1.3.2.2 pgoyette gdtr:
257 1.3.2.2 pgoyette .word gdtr - gdt
258 1.3.2.2 pgoyette gdtrr:
259 1.3.2.2 pgoyette .quad
260 1.3.2.2 pgoyette start32end:
261 1.3.2.2 pgoyette /* Space for the stack */
262 1.3.2.2 pgoyette .align 16
263 1.3.2.2 pgoyette .space 8192
264 1.3.2.2 pgoyette startprog64_end:
265