startprog64.S revision 1.3.16.2 1 1.3.16.2 jdolecek /* $NetBSD: startprog64.S,v 1.3.16.2 2017/12/03 11:36:19 jdolecek Exp $ */
2 1.3.16.2 jdolecek /* NetBSD: startprog.S,v 1.3 2003/02/01 14:48:18 dsl Exp */
3 1.3.16.2 jdolecek
4 1.3.16.2 jdolecek /* starts program in protected mode / flat space
5 1.3.16.2 jdolecek with given stackframe
6 1.3.16.2 jdolecek needs global variables flatcodeseg and flatdataseg
7 1.3.16.2 jdolecek (gdt offsets)
8 1.3.16.2 jdolecek derived from: NetBSD:sys/arch/i386/boot/asm.S
9 1.3.16.2 jdolecek */
10 1.3.16.2 jdolecek
11 1.3.16.2 jdolecek /*
12 1.3.16.2 jdolecek * Ported to boot 386BSD by Julian Elischer (julian (at) tfs.com) Sept 1992
13 1.3.16.2 jdolecek *
14 1.3.16.2 jdolecek * Mach Operating System
15 1.3.16.2 jdolecek * Copyright (c) 1992, 1991 Carnegie Mellon University
16 1.3.16.2 jdolecek * All Rights Reserved.
17 1.3.16.2 jdolecek *
18 1.3.16.2 jdolecek * Permission to use, copy, modify and distribute this software and its
19 1.3.16.2 jdolecek * documentation is hereby granted, provided that both the copyright
20 1.3.16.2 jdolecek * notice and this permission notice appear in all copies of the
21 1.3.16.2 jdolecek * software, derivative works or modified versions, and any portions
22 1.3.16.2 jdolecek * thereof, and that both notices appear in supporting documentation.
23 1.3.16.2 jdolecek *
24 1.3.16.2 jdolecek * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
25 1.3.16.2 jdolecek * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
26 1.3.16.2 jdolecek * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
27 1.3.16.2 jdolecek *
28 1.3.16.2 jdolecek * Carnegie Mellon requests users of this software to return to
29 1.3.16.2 jdolecek *
30 1.3.16.2 jdolecek * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
31 1.3.16.2 jdolecek * School of Computer Science
32 1.3.16.2 jdolecek * Carnegie Mellon University
33 1.3.16.2 jdolecek * Pittsburgh PA 15213-3890
34 1.3.16.2 jdolecek *
35 1.3.16.2 jdolecek * any improvements or extensions that they make and grant Carnegie Mellon
36 1.3.16.2 jdolecek * the rights to redistribute these changes.
37 1.3.16.2 jdolecek */
38 1.3.16.2 jdolecek
39 1.3.16.2 jdolecek /*
40 1.3.16.2 jdolecek Copyright 1988, 1989, 1990, 1991, 1992
41 1.3.16.2 jdolecek by Intel Corporation, Santa Clara, California.
42 1.3.16.2 jdolecek
43 1.3.16.2 jdolecek All Rights Reserved
44 1.3.16.2 jdolecek
45 1.3.16.2 jdolecek Permission to use, copy, modify, and distribute this software and
46 1.3.16.2 jdolecek its documentation for any purpose and without fee is hereby
47 1.3.16.2 jdolecek granted, provided that the above copyright notice appears in all
48 1.3.16.2 jdolecek copies and that both the copyright notice and this permission notice
49 1.3.16.2 jdolecek appear in supporting documentation, and that the name of Intel
50 1.3.16.2 jdolecek not be used in advertising or publicity pertaining to distribution
51 1.3.16.2 jdolecek of the software without specific, written prior permission.
52 1.3.16.2 jdolecek
53 1.3.16.2 jdolecek INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
54 1.3.16.2 jdolecek INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
55 1.3.16.2 jdolecek IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
56 1.3.16.2 jdolecek CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
57 1.3.16.2 jdolecek LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
58 1.3.16.2 jdolecek NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
59 1.3.16.2 jdolecek WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
60 1.3.16.2 jdolecek */
61 1.3.16.2 jdolecek
62 1.3.16.2 jdolecek #include <machine/asm.h>
63 1.3.16.2 jdolecek #include <machine/specialreg.h>
64 1.3.16.2 jdolecek
65 1.3.16.2 jdolecek #define CODE_SEGMENT 0x08
66 1.3.16.2 jdolecek #define DATA_SEGMENT 0x10
67 1.3.16.2 jdolecek
68 1.3.16.2 jdolecek .align 16
69 1.3.16.2 jdolecek .globl _C_LABEL(startprog64)
70 1.3.16.2 jdolecek _C_LABEL(startprog64):
71 1.3.16.2 jdolecek .quad 0
72 1.3.16.2 jdolecek
73 1.3.16.2 jdolecek .globl _C_LABEL(startprog64_size)
74 1.3.16.2 jdolecek _C_LABEL(startprog64_size):
75 1.3.16.2 jdolecek .long startprog64_end - _C_LABEL(startprog64_start)
76 1.3.16.2 jdolecek
77 1.3.16.2 jdolecek .text
78 1.3.16.2 jdolecek .p2align 4,,15
79 1.3.16.2 jdolecek
80 1.3.16.2 jdolecek /*
81 1.3.16.2 jdolecek * startprog64(loadddr,entry,stack,kern_load,kern_start,kern_size)
82 1.3.16.2 jdolecek */
83 1.3.16.2 jdolecek ENTRY(startprog64_start)
84 1.3.16.2 jdolecek start:
85 1.3.16.2 jdolecek /*
86 1.3.16.2 jdolecek * This function is to call the loaded kernel's start() with
87 1.3.16.2 jdolecek * 32bit segment mode from x64 mode.
88 1.3.16.2 jdolecek * %rdi: kernel start address
89 1.3.16.2 jdolecek * %rsi: loaded kernel address
90 1.3.16.2 jdolecek * %rdx: stack address
91 1.3.16.2 jdolecek * %rcx: loaded kernel size
92 1.3.16.2 jdolecek * %r8 : loaded start address
93 1.3.16.2 jdolecek * %r9 : kernel entry address
94 1.3.16.2 jdolecek */
95 1.3.16.2 jdolecek
96 1.3.16.2 jdolecek cld /* LynxOS depends on it */
97 1.3.16.2 jdolecek
98 1.3.16.2 jdolecek cli
99 1.3.16.2 jdolecek
100 1.3.16.2 jdolecek /* Copy kernel */
101 1.3.16.2 jdolecek mov %rcx, %r12 /* original kernel size */
102 1.3.16.2 jdolecek movq %rdi, %r11 /* for misaligned check */
103 1.3.16.2 jdolecek
104 1.3.16.2 jdolecek #if !defined(NO_OVERLAP)
105 1.3.16.2 jdolecek movq %rdi, %r13
106 1.3.16.2 jdolecek subq %rsi, %r13
107 1.3.16.2 jdolecek #endif
108 1.3.16.2 jdolecek
109 1.3.16.2 jdolecek shrq $3, %rcx /* count for copy by words */
110 1.3.16.2 jdolecek jz 8f /* j if less than 8 bytes */
111 1.3.16.2 jdolecek
112 1.3.16.2 jdolecek lea -8(%rdi, %r12), %r14 /* target address of last 8 */
113 1.3.16.2 jdolecek mov -8(%rsi, %r12), %r15 /* get last word */
114 1.3.16.2 jdolecek #if !defined(NO_OVERLAP)
115 1.3.16.2 jdolecek cmpq %r12, %r13 /* overlapping? */
116 1.3.16.2 jdolecek jb 10f
117 1.3.16.2 jdolecek #endif
118 1.3.16.2 jdolecek
119 1.3.16.2 jdolecek /*
120 1.3.16.2 jdolecek * Non-overlaping, copy forwards.
121 1.3.16.2 jdolecek * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
122 1.3.16.2 jdolecek * if %ecx is more than 76.
123 1.3.16.2 jdolecek * AMD might do something similar some day.
124 1.3.16.2 jdolecek */
125 1.3.16.2 jdolecek and $7, %r11 /* destination misaligned ? */
126 1.3.16.2 jdolecek jnz 2f
127 1.3.16.2 jdolecek rep
128 1.3.16.2 jdolecek movsq
129 1.3.16.2 jdolecek mov %r15, (%r14) /* write last word */
130 1.3.16.2 jdolecek jmp .Lcopy_done
131 1.3.16.2 jdolecek
132 1.3.16.2 jdolecek /*
133 1.3.16.2 jdolecek * Destination misaligned
134 1.3.16.2 jdolecek * AMD say it is better to align the destination (not the source).
135 1.3.16.2 jdolecek * This will also re-align copies if the source and dest are both
136 1.3.16.2 jdolecek * misaligned by the same amount)
137 1.3.16.2 jdolecek * (I think Nehalem will use its accelerated copy if the source
138 1.3.16.2 jdolecek * and destination have the same alignment.)
139 1.3.16.2 jdolecek */
140 1.3.16.2 jdolecek 2:
141 1.3.16.2 jdolecek lea -9(%r11, %r12), %rcx /* post re-alignment count */
142 1.3.16.2 jdolecek neg %r11 /* now -1 .. -7 */
143 1.3.16.2 jdolecek mov (%rsi), %r12 /* get first word */
144 1.3.16.2 jdolecek mov %rdi, %r13 /* target for first word */
145 1.3.16.2 jdolecek lea 8(%rsi, %r11), %rsi
146 1.3.16.2 jdolecek lea 8(%rdi, %r11), %rdi
147 1.3.16.2 jdolecek shr $3, %rcx
148 1.3.16.2 jdolecek rep
149 1.3.16.2 jdolecek movsq
150 1.3.16.2 jdolecek mov %r12, (%r13) /* write first word */
151 1.3.16.2 jdolecek mov %r15, (%r14) /* write last word */
152 1.3.16.2 jdolecek jmp .Lcopy_done
153 1.3.16.2 jdolecek
154 1.3.16.2 jdolecek #if !defined(NO_OVERLAP)
155 1.3.16.2 jdolecek /* Must copy backwards.
156 1.3.16.2 jdolecek * Reverse copy is probably easy to code faster than 'rep movds'
157 1.3.16.2 jdolecek * since that requires (IIRC) an extra clock every 3 iterations (AMD).
158 1.3.16.2 jdolecek * However I don't suppose anything cares that much!
159 1.3.16.2 jdolecek * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
160 1.3.16.2 jdolecek * The copy is aligned with the buffer start (more likely to
161 1.3.16.2 jdolecek * be a multiple of 8 than the end).
162 1.3.16.2 jdolecek */
163 1.3.16.2 jdolecek 10:
164 1.3.16.2 jdolecek lea -8(%rsi, %rcx, 8), %rsi
165 1.3.16.2 jdolecek lea -8(%rdi, %rcx, 8), %rdi
166 1.3.16.2 jdolecek std
167 1.3.16.2 jdolecek rep
168 1.3.16.2 jdolecek movsq
169 1.3.16.2 jdolecek cld
170 1.3.16.2 jdolecek mov %r15, (%r14) /* write last bytes */
171 1.3.16.2 jdolecek jmp .Lcopy_done
172 1.3.16.2 jdolecek #endif
173 1.3.16.2 jdolecek
174 1.3.16.2 jdolecek /* Less than 8 bytes to copy, copy by bytes */
175 1.3.16.2 jdolecek /* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
176 1.3.16.2 jdolecek * For longer transfers it is 50+ !
177 1.3.16.2 jdolecek */
178 1.3.16.2 jdolecek 8: mov %r12, %rcx
179 1.3.16.2 jdolecek
180 1.3.16.2 jdolecek #if !defined(NO_OVERLAP)
181 1.3.16.2 jdolecek cmpq %r12, %r13 /* overlapping? */
182 1.3.16.2 jdolecek jb 81f
183 1.3.16.2 jdolecek #endif
184 1.3.16.2 jdolecek
185 1.3.16.2 jdolecek /* nope, copy forwards. */
186 1.3.16.2 jdolecek rep
187 1.3.16.2 jdolecek movsb
188 1.3.16.2 jdolecek jmp .Lcopy_done
189 1.3.16.2 jdolecek
190 1.3.16.2 jdolecek #if !defined(NO_OVERLAP)
191 1.3.16.2 jdolecek /* Must copy backwards */
192 1.3.16.2 jdolecek 81:
193 1.3.16.2 jdolecek lea -1(%rsi, %rcx), %rsi
194 1.3.16.2 jdolecek lea -1(%rdi, %rcx), %rdi
195 1.3.16.2 jdolecek std
196 1.3.16.2 jdolecek rep
197 1.3.16.2 jdolecek movsb
198 1.3.16.2 jdolecek cld
199 1.3.16.2 jdolecek #endif
200 1.3.16.2 jdolecek /* End of copy kernel */
201 1.3.16.2 jdolecek .Lcopy_done:
202 1.3.16.2 jdolecek
203 1.3.16.2 jdolecek mov %r8, %rdi /* %rdi: loaded start address */
204 1.3.16.2 jdolecek mov %r9, %rsi /* %rsi: kernel entry address */
205 1.3.16.2 jdolecek
206 1.3.16.2 jdolecek /* Prepare jump address */
207 1.3.16.2 jdolecek lea (start32a - start)(%rdi), %rax
208 1.3.16.2 jdolecek movl %eax, (start32r - start)(%rdi)
209 1.3.16.2 jdolecek
210 1.3.16.2 jdolecek /* Setup GDT */
211 1.3.16.2 jdolecek lea (gdt - start)(%rdi), %rax
212 1.3.16.2 jdolecek mov %rax, (gdtrr - start)(%rdi)
213 1.3.16.2 jdolecek lgdt (gdtr - start)(%rdi)
214 1.3.16.2 jdolecek
215 1.3.16.2 jdolecek /* Jump to set %cs */
216 1.3.16.2 jdolecek ljmp *(start32r - start)(%rdi)
217 1.3.16.2 jdolecek
218 1.3.16.2 jdolecek .align 4
219 1.3.16.2 jdolecek .code32
220 1.3.16.2 jdolecek start32a:
221 1.3.16.2 jdolecek movl $DATA_SEGMENT, %eax
222 1.3.16.2 jdolecek movw %ax, %ds
223 1.3.16.2 jdolecek movw %ax, %es
224 1.3.16.2 jdolecek movw %ax, %fs
225 1.3.16.2 jdolecek movw %ax, %gs
226 1.3.16.2 jdolecek movw %ax, %ss
227 1.3.16.2 jdolecek
228 1.3.16.2 jdolecek movl %edx, %esp
229 1.3.16.2 jdolecek
230 1.3.16.2 jdolecek /* Disable Paging in CR0 */
231 1.3.16.2 jdolecek movl %cr0, %eax
232 1.3.16.2 jdolecek andl $(~CR0_PG), %eax
233 1.3.16.2 jdolecek movl %eax, %cr0
234 1.3.16.2 jdolecek
235 1.3.16.2 jdolecek /* Disable PAE in CR4 */
236 1.3.16.2 jdolecek movl %cr4, %eax
237 1.3.16.2 jdolecek andl $(~CR4_PAE), %eax
238 1.3.16.2 jdolecek movl %eax, %cr4
239 1.3.16.2 jdolecek
240 1.3.16.2 jdolecek jmp start32b
241 1.3.16.2 jdolecek
242 1.3.16.2 jdolecek .align 4
243 1.3.16.2 jdolecek start32b:
244 1.3.16.2 jdolecek xor %eax, %eax
245 1.3.16.2 jdolecek call *%esi
246 1.3.16.2 jdolecek
247 1.3.16.2 jdolecek .align 16
248 1.3.16.2 jdolecek start32r:
249 1.3.16.2 jdolecek .long 0
250 1.3.16.2 jdolecek .long CODE_SEGMENT
251 1.3.16.2 jdolecek .align 16
252 1.3.16.2 jdolecek gdt:
253 1.3.16.2 jdolecek .long 0, 0
254 1.3.16.2 jdolecek .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00
255 1.3.16.2 jdolecek .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00
256 1.3.16.2 jdolecek gdtr:
257 1.3.16.2 jdolecek .word gdtr - gdt
258 1.3.16.2 jdolecek gdtrr:
259 1.3.16.2 jdolecek .quad
260 1.3.16.2 jdolecek start32end:
261 1.3.16.2 jdolecek /* Space for the stack */
262 1.3.16.2 jdolecek .align 16
263 1.3.16.2 jdolecek .space 8192
264 1.3.16.2 jdolecek startprog64_end:
265