startprog64.S revision 1.3.24.1 1 /* $NetBSD: startprog64.S,v 1.3.24.1 2023/05/13 11:45:53 martin Exp $ */
2 /* NetBSD: startprog.S,v 1.3 2003/02/01 14:48:18 dsl Exp */
3
4 /* starts program in protected mode / flat space
5 with given stackframe
6 needs global variables flatcodeseg and flatdataseg
7 (gdt offsets)
8 derived from: NetBSD:sys/arch/i386/boot/asm.S
9 */
10
11 /*
12 * Ported to boot 386BSD by Julian Elischer (julian (at) tfs.com) Sept 1992
13 *
14 * Mach Operating System
15 * Copyright (c) 1992, 1991 Carnegie Mellon University
16 * All Rights Reserved.
17 *
18 * Permission to use, copy, modify and distribute this software and its
19 * documentation is hereby granted, provided that both the copyright
20 * notice and this permission notice appear in all copies of the
21 * software, derivative works or modified versions, and any portions
22 * thereof, and that both notices appear in supporting documentation.
23 *
24 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
25 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
26 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
27 *
28 * Carnegie Mellon requests users of this software to return to
29 *
30 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
31 * School of Computer Science
32 * Carnegie Mellon University
33 * Pittsburgh PA 15213-3890
34 *
35 * any improvements or extensions that they make and grant Carnegie Mellon
36 * the rights to redistribute these changes.
37 */
38
39 /*
40 Copyright 1988, 1989, 1990, 1991, 1992
41 by Intel Corporation, Santa Clara, California.
42
43 All Rights Reserved
44
45 Permission to use, copy, modify, and distribute this software and
46 its documentation for any purpose and without fee is hereby
47 granted, provided that the above copyright notice appears in all
48 copies and that both the copyright notice and this permission notice
49 appear in supporting documentation, and that the name of Intel
50 not be used in advertising or publicity pertaining to distribution
51 of the software without specific, written prior permission.
52
53 INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
54 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
55 IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
56 CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
57 LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
58 NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
59 WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
60 */
61
62 #include <machine/asm.h>
63 #include <machine/specialreg.h>
64
65 #define CODE_SEGMENT 0x08
66 #define DATA_SEGMENT 0x10
67
68 .align 16
69 .globl _C_LABEL(startprog64)
70 _C_LABEL(startprog64):
71 .quad 0
72
73 .globl _C_LABEL(startprog64_size)
74 _C_LABEL(startprog64_size):
75 .long startprog64_end - _C_LABEL(startprog64_start)
76
77 .text
78 .p2align 4,,15
79
80 /*
81 * startprog64(loadddr,entry,stack,kern_load,kern_start,kern_size)
82 */
83 ENTRY(startprog64_start)
84 start:
85 /*
86 * This function is to call the loaded kernel's start() with
87 * 32bit segment mode from x64 mode.
88 * %rdi: kernel start address
89 * %rsi: loaded kernel address
90 * %rdx: stack address
91 * %rcx: loaded kernel size
92 * %r8 : loaded start address
93 * %r9 : kernel entry address
94 */
95
96 cld /* LynxOS depends on it */
97
98 cli
99
100 /* skip copy if same source and destination */
101 cmpq %rdi,%rsi
102 jz .Lcopy_done
103
104 /* Copy kernel */
105 mov %rcx, %r12 /* original kernel size */
106 movq %rdi, %r11 /* for misaligned check */
107
108 #if !defined(NO_OVERLAP)
109 movq %rdi, %r13
110 subq %rsi, %r13
111 #endif
112
113 shrq $3, %rcx /* count for copy by words */
114 jz 8f /* j if less than 8 bytes */
115
116 lea -8(%rdi, %r12), %r14 /* target address of last 8 */
117 mov -8(%rsi, %r12), %r15 /* get last word */
118 #if !defined(NO_OVERLAP)
119 cmpq %r12, %r13 /* overlapping? */
120 jb 10f
121 #endif
122
123 /*
124 * Non-overlaping, copy forwards.
125 * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
126 * if %ecx is more than 76.
127 * AMD might do something similar some day.
128 */
129 and $7, %r11 /* destination misaligned ? */
130 jnz 2f
131 rep
132 movsq
133 mov %r15, (%r14) /* write last word */
134 jmp .Lcopy_done
135
136 /*
137 * Destination misaligned
138 * AMD say it is better to align the destination (not the source).
139 * This will also re-align copies if the source and dest are both
140 * misaligned by the same amount)
141 * (I think Nehalem will use its accelerated copy if the source
142 * and destination have the same alignment.)
143 */
144 2:
145 lea -9(%r11, %r12), %rcx /* post re-alignment count */
146 neg %r11 /* now -1 .. -7 */
147 mov (%rsi), %r12 /* get first word */
148 mov %rdi, %r13 /* target for first word */
149 lea 8(%rsi, %r11), %rsi
150 lea 8(%rdi, %r11), %rdi
151 shr $3, %rcx
152 rep
153 movsq
154 mov %r12, (%r13) /* write first word */
155 mov %r15, (%r14) /* write last word */
156 jmp .Lcopy_done
157
158 #if !defined(NO_OVERLAP)
159 /* Must copy backwards.
160 * Reverse copy is probably easy to code faster than 'rep movds'
161 * since that requires (IIRC) an extra clock every 3 iterations (AMD).
162 * However I don't suppose anything cares that much!
163 * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
164 * The copy is aligned with the buffer start (more likely to
165 * be a multiple of 8 than the end).
166 */
167 10:
168 lea -8(%rsi, %rcx, 8), %rsi
169 lea -8(%rdi, %rcx, 8), %rdi
170 std
171 rep
172 movsq
173 cld
174 mov %r15, (%r14) /* write last bytes */
175 jmp .Lcopy_done
176 #endif
177
178 /* Less than 8 bytes to copy, copy by bytes */
179 /* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
180 * For longer transfers it is 50+ !
181 */
182 8: mov %r12, %rcx
183
184 #if !defined(NO_OVERLAP)
185 cmpq %r12, %r13 /* overlapping? */
186 jb 81f
187 #endif
188
189 /* nope, copy forwards. */
190 rep
191 movsb
192 jmp .Lcopy_done
193
194 #if !defined(NO_OVERLAP)
195 /* Must copy backwards */
196 81:
197 lea -1(%rsi, %rcx), %rsi
198 lea -1(%rdi, %rcx), %rdi
199 std
200 rep
201 movsb
202 cld
203 #endif
204 /* End of copy kernel */
205 .Lcopy_done:
206
207 mov %r8, %rdi /* %rdi: loaded start address */
208 mov %r9, %rsi /* %rsi: kernel entry address */
209
210 /* Prepare jump address */
211 lea (start32a - start)(%rdi), %rax
212 movl %eax, (start32r - start)(%rdi)
213
214 /* Setup GDT */
215 lea (gdt - start)(%rdi), %rax
216 mov %rax, (gdtrr - start)(%rdi)
217 lgdt (gdtr - start)(%rdi)
218
219 /* Jump to set %cs */
220 ljmp *(start32r - start)(%rdi)
221
222 .align 4
223 .code32
224 start32a:
225 movl $DATA_SEGMENT, %eax
226 movw %ax, %ds
227 movw %ax, %es
228 movw %ax, %fs
229 movw %ax, %gs
230 movw %ax, %ss
231
232 movl %edx, %esp
233
234 /* Disable Paging in CR0 */
235 movl %cr0, %eax
236 andl $(~CR0_PG), %eax
237 movl %eax, %cr0
238
239 /* Disable PAE in CR4 */
240 movl %cr4, %eax
241 andl $(~CR4_PAE), %eax
242 movl %eax, %cr4
243
244 jmp start32b
245
246 .align 4
247 start32b:
248 xor %eax, %eax
249 call *%esi
250
251 .align 16
252 start32r:
253 .long 0
254 .long CODE_SEGMENT
255 .align 16
256 gdt:
257 .long 0, 0
258 .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00
259 .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00
260 gdtr:
261 .word gdtr - gdt
262 gdtrr:
263 .quad
264 start32end:
265 /* Space for the stack */
266 .align 16
267 .space 8192
268 startprog64_end:
269