startprog64.S revision 1.3.24.1 1 1.3.24.1 martin /* $NetBSD: startprog64.S,v 1.3.24.1 2023/05/13 11:45:53 martin Exp $ */
2 1.1 nonaka /* NetBSD: startprog.S,v 1.3 2003/02/01 14:48:18 dsl Exp */
3 1.1 nonaka
4 1.1 nonaka /* starts program in protected mode / flat space
5 1.1 nonaka with given stackframe
6 1.1 nonaka needs global variables flatcodeseg and flatdataseg
7 1.1 nonaka (gdt offsets)
8 1.1 nonaka derived from: NetBSD:sys/arch/i386/boot/asm.S
9 1.1 nonaka */
10 1.1 nonaka
11 1.1 nonaka /*
12 1.1 nonaka * Ported to boot 386BSD by Julian Elischer (julian (at) tfs.com) Sept 1992
13 1.1 nonaka *
14 1.1 nonaka * Mach Operating System
15 1.1 nonaka * Copyright (c) 1992, 1991 Carnegie Mellon University
16 1.1 nonaka * All Rights Reserved.
17 1.1 nonaka *
18 1.1 nonaka * Permission to use, copy, modify and distribute this software and its
19 1.1 nonaka * documentation is hereby granted, provided that both the copyright
20 1.1 nonaka * notice and this permission notice appear in all copies of the
21 1.1 nonaka * software, derivative works or modified versions, and any portions
22 1.1 nonaka * thereof, and that both notices appear in supporting documentation.
23 1.1 nonaka *
24 1.1 nonaka * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
25 1.1 nonaka * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
26 1.1 nonaka * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
27 1.1 nonaka *
28 1.1 nonaka * Carnegie Mellon requests users of this software to return to
29 1.1 nonaka *
30 1.1 nonaka * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
31 1.1 nonaka * School of Computer Science
32 1.1 nonaka * Carnegie Mellon University
33 1.1 nonaka * Pittsburgh PA 15213-3890
34 1.1 nonaka *
35 1.1 nonaka * any improvements or extensions that they make and grant Carnegie Mellon
36 1.1 nonaka * the rights to redistribute these changes.
37 1.1 nonaka */
38 1.1 nonaka
39 1.1 nonaka /*
40 1.1 nonaka Copyright 1988, 1989, 1990, 1991, 1992
41 1.1 nonaka by Intel Corporation, Santa Clara, California.
42 1.1 nonaka
43 1.1 nonaka All Rights Reserved
44 1.1 nonaka
45 1.1 nonaka Permission to use, copy, modify, and distribute this software and
46 1.1 nonaka its documentation for any purpose and without fee is hereby
47 1.1 nonaka granted, provided that the above copyright notice appears in all
48 1.1 nonaka copies and that both the copyright notice and this permission notice
49 1.1 nonaka appear in supporting documentation, and that the name of Intel
50 1.1 nonaka not be used in advertising or publicity pertaining to distribution
51 1.1 nonaka of the software without specific, written prior permission.
52 1.1 nonaka
53 1.1 nonaka INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
54 1.1 nonaka INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
55 1.1 nonaka IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
56 1.1 nonaka CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
57 1.1 nonaka LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
58 1.1 nonaka NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
59 1.1 nonaka WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
60 1.1 nonaka */
61 1.1 nonaka
62 1.1 nonaka #include <machine/asm.h>
63 1.1 nonaka #include <machine/specialreg.h>
64 1.1 nonaka
65 1.1 nonaka #define CODE_SEGMENT 0x08
66 1.1 nonaka #define DATA_SEGMENT 0x10
67 1.1 nonaka
68 1.1 nonaka .align 16
69 1.1 nonaka .globl _C_LABEL(startprog64)
70 1.1 nonaka _C_LABEL(startprog64):
71 1.1 nonaka .quad 0
72 1.1 nonaka
73 1.1 nonaka .globl _C_LABEL(startprog64_size)
74 1.1 nonaka _C_LABEL(startprog64_size):
75 1.1 nonaka .long startprog64_end - _C_LABEL(startprog64_start)
76 1.1 nonaka
77 1.1 nonaka .text
78 1.1 nonaka .p2align 4,,15
79 1.1 nonaka
80 1.1 nonaka /*
81 1.3 nonaka * startprog64(loadddr,entry,stack,kern_load,kern_start,kern_size)
82 1.1 nonaka */
83 1.1 nonaka ENTRY(startprog64_start)
84 1.1 nonaka start:
85 1.1 nonaka /*
86 1.1 nonaka * This function is to call the loaded kernel's start() with
87 1.1 nonaka * 32bit segment mode from x64 mode.
88 1.3 nonaka * %rdi: kernel start address
89 1.3 nonaka * %rsi: loaded kernel address
90 1.1 nonaka * %rdx: stack address
91 1.3 nonaka * %rcx: loaded kernel size
92 1.3 nonaka * %r8 : loaded start address
93 1.3 nonaka * %r9 : kernel entry address
94 1.1 nonaka */
95 1.1 nonaka
96 1.3 nonaka cld /* LynxOS depends on it */
97 1.3 nonaka
98 1.3 nonaka cli
99 1.3 nonaka
100 1.3.24.1 martin /* skip copy if same source and destination */
101 1.3.24.1 martin cmpq %rdi,%rsi
102 1.3.24.1 martin jz .Lcopy_done
103 1.3.24.1 martin
104 1.3 nonaka /* Copy kernel */
105 1.3 nonaka mov %rcx, %r12 /* original kernel size */
106 1.3 nonaka movq %rdi, %r11 /* for misaligned check */
107 1.3 nonaka
108 1.3 nonaka #if !defined(NO_OVERLAP)
109 1.3 nonaka movq %rdi, %r13
110 1.3 nonaka subq %rsi, %r13
111 1.3 nonaka #endif
112 1.3 nonaka
113 1.3 nonaka shrq $3, %rcx /* count for copy by words */
114 1.3 nonaka jz 8f /* j if less than 8 bytes */
115 1.3 nonaka
116 1.3 nonaka lea -8(%rdi, %r12), %r14 /* target address of last 8 */
117 1.3 nonaka mov -8(%rsi, %r12), %r15 /* get last word */
118 1.3 nonaka #if !defined(NO_OVERLAP)
119 1.3 nonaka cmpq %r12, %r13 /* overlapping? */
120 1.3 nonaka jb 10f
121 1.3 nonaka #endif
122 1.3 nonaka
123 1.3 nonaka /*
124 1.3 nonaka * Non-overlaping, copy forwards.
125 1.3 nonaka * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
126 1.3 nonaka * if %ecx is more than 76.
127 1.3 nonaka * AMD might do something similar some day.
128 1.3 nonaka */
129 1.3 nonaka and $7, %r11 /* destination misaligned ? */
130 1.3 nonaka jnz 2f
131 1.3 nonaka rep
132 1.3 nonaka movsq
133 1.3 nonaka mov %r15, (%r14) /* write last word */
134 1.3 nonaka jmp .Lcopy_done
135 1.3 nonaka
136 1.3 nonaka /*
137 1.3 nonaka * Destination misaligned
138 1.3 nonaka * AMD say it is better to align the destination (not the source).
139 1.3 nonaka * This will also re-align copies if the source and dest are both
140 1.3 nonaka * misaligned by the same amount)
141 1.3 nonaka * (I think Nehalem will use its accelerated copy if the source
142 1.3 nonaka * and destination have the same alignment.)
143 1.3 nonaka */
144 1.3 nonaka 2:
145 1.3 nonaka lea -9(%r11, %r12), %rcx /* post re-alignment count */
146 1.3 nonaka neg %r11 /* now -1 .. -7 */
147 1.3 nonaka mov (%rsi), %r12 /* get first word */
148 1.3 nonaka mov %rdi, %r13 /* target for first word */
149 1.3 nonaka lea 8(%rsi, %r11), %rsi
150 1.3 nonaka lea 8(%rdi, %r11), %rdi
151 1.3 nonaka shr $3, %rcx
152 1.3 nonaka rep
153 1.3 nonaka movsq
154 1.3 nonaka mov %r12, (%r13) /* write first word */
155 1.3 nonaka mov %r15, (%r14) /* write last word */
156 1.3 nonaka jmp .Lcopy_done
157 1.3 nonaka
158 1.3 nonaka #if !defined(NO_OVERLAP)
159 1.3 nonaka /* Must copy backwards.
160 1.3 nonaka * Reverse copy is probably easy to code faster than 'rep movds'
161 1.3 nonaka * since that requires (IIRC) an extra clock every 3 iterations (AMD).
162 1.3 nonaka * However I don't suppose anything cares that much!
163 1.3 nonaka * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
164 1.3 nonaka * The copy is aligned with the buffer start (more likely to
165 1.3 nonaka * be a multiple of 8 than the end).
166 1.3 nonaka */
167 1.3 nonaka 10:
168 1.3 nonaka lea -8(%rsi, %rcx, 8), %rsi
169 1.3 nonaka lea -8(%rdi, %rcx, 8), %rdi
170 1.3 nonaka std
171 1.3 nonaka rep
172 1.3 nonaka movsq
173 1.3 nonaka cld
174 1.3 nonaka mov %r15, (%r14) /* write last bytes */
175 1.3 nonaka jmp .Lcopy_done
176 1.3 nonaka #endif
177 1.3 nonaka
178 1.3 nonaka /* Less than 8 bytes to copy, copy by bytes */
179 1.3 nonaka /* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
180 1.3 nonaka * For longer transfers it is 50+ !
181 1.3 nonaka */
182 1.3 nonaka 8: mov %r12, %rcx
183 1.3 nonaka
184 1.3 nonaka #if !defined(NO_OVERLAP)
185 1.3 nonaka cmpq %r12, %r13 /* overlapping? */
186 1.3 nonaka jb 81f
187 1.3 nonaka #endif
188 1.3 nonaka
189 1.3 nonaka /* nope, copy forwards. */
190 1.3 nonaka rep
191 1.3 nonaka movsb
192 1.3 nonaka jmp .Lcopy_done
193 1.3 nonaka
194 1.3 nonaka #if !defined(NO_OVERLAP)
195 1.3 nonaka /* Must copy backwards */
196 1.3 nonaka 81:
197 1.3 nonaka lea -1(%rsi, %rcx), %rsi
198 1.3 nonaka lea -1(%rdi, %rcx), %rdi
199 1.3 nonaka std
200 1.3 nonaka rep
201 1.3 nonaka movsb
202 1.3 nonaka cld
203 1.3 nonaka #endif
204 1.3 nonaka /* End of copy kernel */
205 1.3 nonaka .Lcopy_done:
206 1.3 nonaka
207 1.3 nonaka mov %r8, %rdi /* %rdi: loaded start address */
208 1.3 nonaka mov %r9, %rsi /* %rsi: kernel entry address */
209 1.1 nonaka
210 1.1 nonaka /* Prepare jump address */
211 1.1 nonaka lea (start32a - start)(%rdi), %rax
212 1.1 nonaka movl %eax, (start32r - start)(%rdi)
213 1.1 nonaka
214 1.1 nonaka /* Setup GDT */
215 1.1 nonaka lea (gdt - start)(%rdi), %rax
216 1.1 nonaka mov %rax, (gdtrr - start)(%rdi)
217 1.1 nonaka lgdt (gdtr - start)(%rdi)
218 1.1 nonaka
219 1.1 nonaka /* Jump to set %cs */
220 1.1 nonaka ljmp *(start32r - start)(%rdi)
221 1.1 nonaka
222 1.1 nonaka .align 4
223 1.1 nonaka .code32
224 1.1 nonaka start32a:
225 1.1 nonaka movl $DATA_SEGMENT, %eax
226 1.2 christos movw %ax, %ds
227 1.2 christos movw %ax, %es
228 1.2 christos movw %ax, %fs
229 1.2 christos movw %ax, %gs
230 1.2 christos movw %ax, %ss
231 1.1 nonaka
232 1.1 nonaka movl %edx, %esp
233 1.1 nonaka
234 1.1 nonaka /* Disable Paging in CR0 */
235 1.1 nonaka movl %cr0, %eax
236 1.1 nonaka andl $(~CR0_PG), %eax
237 1.1 nonaka movl %eax, %cr0
238 1.1 nonaka
239 1.1 nonaka /* Disable PAE in CR4 */
240 1.1 nonaka movl %cr4, %eax
241 1.1 nonaka andl $(~CR4_PAE), %eax
242 1.1 nonaka movl %eax, %cr4
243 1.1 nonaka
244 1.1 nonaka jmp start32b
245 1.1 nonaka
246 1.1 nonaka .align 4
247 1.1 nonaka start32b:
248 1.1 nonaka xor %eax, %eax
249 1.1 nonaka call *%esi
250 1.1 nonaka
251 1.1 nonaka .align 16
252 1.1 nonaka start32r:
253 1.1 nonaka .long 0
254 1.1 nonaka .long CODE_SEGMENT
255 1.1 nonaka .align 16
256 1.1 nonaka gdt:
257 1.1 nonaka .long 0, 0
258 1.1 nonaka .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00
259 1.1 nonaka .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00
260 1.1 nonaka gdtr:
261 1.1 nonaka .word gdtr - gdt
262 1.1 nonaka gdtrr:
263 1.1 nonaka .quad
264 1.1 nonaka start32end:
265 1.1 nonaka /* Space for the stack */
266 1.1 nonaka .align 16
267 1.1 nonaka .space 8192
268 1.1 nonaka startprog64_end:
269