startprog64.S revision 1.3.16.2 1 /* $NetBSD: startprog64.S,v 1.3.16.2 2017/12/03 11:36:19 jdolecek Exp $ */
2 /* NetBSD: startprog.S,v 1.3 2003/02/01 14:48:18 dsl Exp */
3
4 /* starts program in protected mode / flat space
5 with given stackframe
6 needs global variables flatcodeseg and flatdataseg
7 (gdt offsets)
8 derived from: NetBSD:sys/arch/i386/boot/asm.S
9 */
10
11 /*
12 * Ported to boot 386BSD by Julian Elischer (julian (at) tfs.com) Sept 1992
13 *
14 * Mach Operating System
15 * Copyright (c) 1992, 1991 Carnegie Mellon University
16 * All Rights Reserved.
17 *
18 * Permission to use, copy, modify and distribute this software and its
19 * documentation is hereby granted, provided that both the copyright
20 * notice and this permission notice appear in all copies of the
21 * software, derivative works or modified versions, and any portions
22 * thereof, and that both notices appear in supporting documentation.
23 *
24 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
25 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
26 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
27 *
28 * Carnegie Mellon requests users of this software to return to
29 *
30 * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
31 * School of Computer Science
32 * Carnegie Mellon University
33 * Pittsburgh PA 15213-3890
34 *
35 * any improvements or extensions that they make and grant Carnegie Mellon
36 * the rights to redistribute these changes.
37 */
38
39 /*
40 Copyright 1988, 1989, 1990, 1991, 1992
41 by Intel Corporation, Santa Clara, California.
42
43 All Rights Reserved
44
45 Permission to use, copy, modify, and distribute this software and
46 its documentation for any purpose and without fee is hereby
47 granted, provided that the above copyright notice appears in all
48 copies and that both the copyright notice and this permission notice
49 appear in supporting documentation, and that the name of Intel
50 not be used in advertising or publicity pertaining to distribution
51 of the software without specific, written prior permission.
52
53 INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
54 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
55 IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
56 CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
57 LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
58 NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
59 WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
60 */
61
62 #include <machine/asm.h>
63 #include <machine/specialreg.h>
64
65 #define CODE_SEGMENT 0x08
66 #define DATA_SEGMENT 0x10
67
68 .align 16
69 .globl _C_LABEL(startprog64)
70 _C_LABEL(startprog64):
71 .quad 0
72
73 .globl _C_LABEL(startprog64_size)
74 _C_LABEL(startprog64_size):
75 .long startprog64_end - _C_LABEL(startprog64_start)
76
77 .text
78 .p2align 4,,15
79
80 /*
81 * startprog64(loadddr,entry,stack,kern_load,kern_start,kern_size)
82 */
83 ENTRY(startprog64_start)
84 start:
85 /*
86 * This function is to call the loaded kernel's start() with
87 * 32bit segment mode from x64 mode.
88 * %rdi: kernel start address
89 * %rsi: loaded kernel address
90 * %rdx: stack address
91 * %rcx: loaded kernel size
92 * %r8 : loaded start address
93 * %r9 : kernel entry address
94 */
95
96 cld /* LynxOS depends on it */
97
98 cli
99
100 /* Copy kernel */
101 mov %rcx, %r12 /* original kernel size */
102 movq %rdi, %r11 /* for misaligned check */
103
104 #if !defined(NO_OVERLAP)
105 movq %rdi, %r13
106 subq %rsi, %r13
107 #endif
108
109 shrq $3, %rcx /* count for copy by words */
110 jz 8f /* j if less than 8 bytes */
111
112 lea -8(%rdi, %r12), %r14 /* target address of last 8 */
113 mov -8(%rsi, %r12), %r15 /* get last word */
114 #if !defined(NO_OVERLAP)
115 cmpq %r12, %r13 /* overlapping? */
116 jb 10f
117 #endif
118
119 /*
120 * Non-overlaping, copy forwards.
121 * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
122 * if %ecx is more than 76.
123 * AMD might do something similar some day.
124 */
125 and $7, %r11 /* destination misaligned ? */
126 jnz 2f
127 rep
128 movsq
129 mov %r15, (%r14) /* write last word */
130 jmp .Lcopy_done
131
132 /*
133 * Destination misaligned
134 * AMD say it is better to align the destination (not the source).
135 * This will also re-align copies if the source and dest are both
136 * misaligned by the same amount)
137 * (I think Nehalem will use its accelerated copy if the source
138 * and destination have the same alignment.)
139 */
140 2:
141 lea -9(%r11, %r12), %rcx /* post re-alignment count */
142 neg %r11 /* now -1 .. -7 */
143 mov (%rsi), %r12 /* get first word */
144 mov %rdi, %r13 /* target for first word */
145 lea 8(%rsi, %r11), %rsi
146 lea 8(%rdi, %r11), %rdi
147 shr $3, %rcx
148 rep
149 movsq
150 mov %r12, (%r13) /* write first word */
151 mov %r15, (%r14) /* write last word */
152 jmp .Lcopy_done
153
154 #if !defined(NO_OVERLAP)
155 /* Must copy backwards.
156 * Reverse copy is probably easy to code faster than 'rep movds'
157 * since that requires (IIRC) an extra clock every 3 iterations (AMD).
158 * However I don't suppose anything cares that much!
159 * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
160 * The copy is aligned with the buffer start (more likely to
161 * be a multiple of 8 than the end).
162 */
163 10:
164 lea -8(%rsi, %rcx, 8), %rsi
165 lea -8(%rdi, %rcx, 8), %rdi
166 std
167 rep
168 movsq
169 cld
170 mov %r15, (%r14) /* write last bytes */
171 jmp .Lcopy_done
172 #endif
173
174 /* Less than 8 bytes to copy, copy by bytes */
175 /* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
176 * For longer transfers it is 50+ !
177 */
178 8: mov %r12, %rcx
179
180 #if !defined(NO_OVERLAP)
181 cmpq %r12, %r13 /* overlapping? */
182 jb 81f
183 #endif
184
185 /* nope, copy forwards. */
186 rep
187 movsb
188 jmp .Lcopy_done
189
190 #if !defined(NO_OVERLAP)
191 /* Must copy backwards */
192 81:
193 lea -1(%rsi, %rcx), %rsi
194 lea -1(%rdi, %rcx), %rdi
195 std
196 rep
197 movsb
198 cld
199 #endif
200 /* End of copy kernel */
201 .Lcopy_done:
202
203 mov %r8, %rdi /* %rdi: loaded start address */
204 mov %r9, %rsi /* %rsi: kernel entry address */
205
206 /* Prepare jump address */
207 lea (start32a - start)(%rdi), %rax
208 movl %eax, (start32r - start)(%rdi)
209
210 /* Setup GDT */
211 lea (gdt - start)(%rdi), %rax
212 mov %rax, (gdtrr - start)(%rdi)
213 lgdt (gdtr - start)(%rdi)
214
215 /* Jump to set %cs */
216 ljmp *(start32r - start)(%rdi)
217
218 .align 4
219 .code32
220 start32a:
221 movl $DATA_SEGMENT, %eax
222 movw %ax, %ds
223 movw %ax, %es
224 movw %ax, %fs
225 movw %ax, %gs
226 movw %ax, %ss
227
228 movl %edx, %esp
229
230 /* Disable Paging in CR0 */
231 movl %cr0, %eax
232 andl $(~CR0_PG), %eax
233 movl %eax, %cr0
234
235 /* Disable PAE in CR4 */
236 movl %cr4, %eax
237 andl $(~CR4_PAE), %eax
238 movl %eax, %cr4
239
240 jmp start32b
241
242 .align 4
243 start32b:
244 xor %eax, %eax
245 call *%esi
246
247 .align 16
248 start32r:
249 .long 0
250 .long CODE_SEGMENT
251 .align 16
252 gdt:
253 .long 0, 0
254 .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00
255 .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00
256 gdtr:
257 .word gdtr - gdt
258 gdtrr:
259 .quad
260 start32end:
261 /* Space for the stack */
262 .align 16
263 .space 8192
264 startprog64_end:
265