startprog64.S revision 1.3 1 1.3 nonaka /* $NetBSD: startprog64.S,v 1.3 2017/02/11 10:23:39 nonaka Exp $ */
2 1.1 nonaka /* NetBSD: startprog.S,v 1.3 2003/02/01 14:48:18 dsl Exp */
3 1.1 nonaka
4 1.1 nonaka /* starts program in protected mode / flat space
5 1.1 nonaka with given stackframe
6 1.1 nonaka needs global variables flatcodeseg and flatdataseg
7 1.1 nonaka (gdt offsets)
8 1.1 nonaka derived from: NetBSD:sys/arch/i386/boot/asm.S
9 1.1 nonaka */
10 1.1 nonaka
11 1.1 nonaka /*
12 1.1 nonaka * Ported to boot 386BSD by Julian Elischer (julian (at) tfs.com) Sept 1992
13 1.1 nonaka *
14 1.1 nonaka * Mach Operating System
15 1.1 nonaka * Copyright (c) 1992, 1991 Carnegie Mellon University
16 1.1 nonaka * All Rights Reserved.
17 1.1 nonaka *
18 1.1 nonaka * Permission to use, copy, modify and distribute this software and its
19 1.1 nonaka * documentation is hereby granted, provided that both the copyright
20 1.1 nonaka * notice and this permission notice appear in all copies of the
21 1.1 nonaka * software, derivative works or modified versions, and any portions
22 1.1 nonaka * thereof, and that both notices appear in supporting documentation.
23 1.1 nonaka *
24 1.1 nonaka * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
25 1.1 nonaka * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
26 1.1 nonaka * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
27 1.1 nonaka *
28 1.1 nonaka * Carnegie Mellon requests users of this software to return to
29 1.1 nonaka *
30 1.1 nonaka * Software Distribution Coordinator or Software.Distribution (at) CS.CMU.EDU
31 1.1 nonaka * School of Computer Science
32 1.1 nonaka * Carnegie Mellon University
33 1.1 nonaka * Pittsburgh PA 15213-3890
34 1.1 nonaka *
35 1.1 nonaka * any improvements or extensions that they make and grant Carnegie Mellon
36 1.1 nonaka * the rights to redistribute these changes.
37 1.1 nonaka */
38 1.1 nonaka
39 1.1 nonaka /*
40 1.1 nonaka Copyright 1988, 1989, 1990, 1991, 1992
41 1.1 nonaka by Intel Corporation, Santa Clara, California.
42 1.1 nonaka
43 1.1 nonaka All Rights Reserved
44 1.1 nonaka
45 1.1 nonaka Permission to use, copy, modify, and distribute this software and
46 1.1 nonaka its documentation for any purpose and without fee is hereby
47 1.1 nonaka granted, provided that the above copyright notice appears in all
48 1.1 nonaka copies and that both the copyright notice and this permission notice
49 1.1 nonaka appear in supporting documentation, and that the name of Intel
50 1.1 nonaka not be used in advertising or publicity pertaining to distribution
51 1.1 nonaka of the software without specific, written prior permission.
52 1.1 nonaka
53 1.1 nonaka INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE
54 1.1 nonaka INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS,
55 1.1 nonaka IN NO EVENT SHALL INTEL BE LIABLE FOR ANY SPECIAL, INDIRECT, OR
56 1.1 nonaka CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
57 1.1 nonaka LOSS OF USE, DATA OR PROFITS, WHETHER IN ACTION OF CONTRACT,
58 1.1 nonaka NEGLIGENCE, OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
59 1.1 nonaka WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
60 1.1 nonaka */
61 1.1 nonaka
62 1.1 nonaka #include <machine/asm.h>
63 1.1 nonaka #include <machine/specialreg.h>
64 1.1 nonaka
65 1.1 nonaka #define CODE_SEGMENT 0x08
66 1.1 nonaka #define DATA_SEGMENT 0x10
67 1.1 nonaka
68 1.1 nonaka .align 16
69 1.1 nonaka .globl _C_LABEL(startprog64)
70 1.1 nonaka _C_LABEL(startprog64):
71 1.1 nonaka .quad 0
72 1.1 nonaka
73 1.1 nonaka .globl _C_LABEL(startprog64_size)
74 1.1 nonaka _C_LABEL(startprog64_size):
75 1.1 nonaka .long startprog64_end - _C_LABEL(startprog64_start)
76 1.1 nonaka
77 1.1 nonaka .text
78 1.1 nonaka .p2align 4,,15
79 1.1 nonaka
80 1.1 nonaka /*
81 1.3 nonaka * startprog64(loadddr,entry,stack,kern_load,kern_start,kern_size)
82 1.1 nonaka */
83 1.1 nonaka ENTRY(startprog64_start)
84 1.1 nonaka start:
85 1.1 nonaka /*
86 1.1 nonaka * This function is to call the loaded kernel's start() with
87 1.1 nonaka * 32bit segment mode from x64 mode.
88 1.3 nonaka * %rdi: kernel start address
89 1.3 nonaka * %rsi: loaded kernel address
90 1.1 nonaka * %rdx: stack address
91 1.3 nonaka * %rcx: loaded kernel size
92 1.3 nonaka * %r8 : loaded start address
93 1.3 nonaka * %r9 : kernel entry address
94 1.1 nonaka */
95 1.1 nonaka
96 1.3 nonaka cld /* LynxOS depends on it */
97 1.3 nonaka
98 1.3 nonaka cli
99 1.3 nonaka
100 1.3 nonaka /* Copy kernel */
101 1.3 nonaka mov %rcx, %r12 /* original kernel size */
102 1.3 nonaka movq %rdi, %r11 /* for misaligned check */
103 1.3 nonaka
104 1.3 nonaka #if !defined(NO_OVERLAP)
105 1.3 nonaka movq %rdi, %r13
106 1.3 nonaka subq %rsi, %r13
107 1.3 nonaka #endif
108 1.3 nonaka
109 1.3 nonaka shrq $3, %rcx /* count for copy by words */
110 1.3 nonaka jz 8f /* j if less than 8 bytes */
111 1.3 nonaka
112 1.3 nonaka lea -8(%rdi, %r12), %r14 /* target address of last 8 */
113 1.3 nonaka mov -8(%rsi, %r12), %r15 /* get last word */
114 1.3 nonaka #if !defined(NO_OVERLAP)
115 1.3 nonaka cmpq %r12, %r13 /* overlapping? */
116 1.3 nonaka jb 10f
117 1.3 nonaka #endif
118 1.3 nonaka
119 1.3 nonaka /*
120 1.3 nonaka * Non-overlaping, copy forwards.
121 1.3 nonaka * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
122 1.3 nonaka * if %ecx is more than 76.
123 1.3 nonaka * AMD might do something similar some day.
124 1.3 nonaka */
125 1.3 nonaka and $7, %r11 /* destination misaligned ? */
126 1.3 nonaka jnz 2f
127 1.3 nonaka rep
128 1.3 nonaka movsq
129 1.3 nonaka mov %r15, (%r14) /* write last word */
130 1.3 nonaka jmp .Lcopy_done
131 1.3 nonaka
132 1.3 nonaka /*
133 1.3 nonaka * Destination misaligned
134 1.3 nonaka * AMD say it is better to align the destination (not the source).
135 1.3 nonaka * This will also re-align copies if the source and dest are both
136 1.3 nonaka * misaligned by the same amount)
137 1.3 nonaka * (I think Nehalem will use its accelerated copy if the source
138 1.3 nonaka * and destination have the same alignment.)
139 1.3 nonaka */
140 1.3 nonaka 2:
141 1.3 nonaka lea -9(%r11, %r12), %rcx /* post re-alignment count */
142 1.3 nonaka neg %r11 /* now -1 .. -7 */
143 1.3 nonaka mov (%rsi), %r12 /* get first word */
144 1.3 nonaka mov %rdi, %r13 /* target for first word */
145 1.3 nonaka lea 8(%rsi, %r11), %rsi
146 1.3 nonaka lea 8(%rdi, %r11), %rdi
147 1.3 nonaka shr $3, %rcx
148 1.3 nonaka rep
149 1.3 nonaka movsq
150 1.3 nonaka mov %r12, (%r13) /* write first word */
151 1.3 nonaka mov %r15, (%r14) /* write last word */
152 1.3 nonaka jmp .Lcopy_done
153 1.3 nonaka
154 1.3 nonaka #if !defined(NO_OVERLAP)
155 1.3 nonaka /* Must copy backwards.
156 1.3 nonaka * Reverse copy is probably easy to code faster than 'rep movds'
157 1.3 nonaka * since that requires (IIRC) an extra clock every 3 iterations (AMD).
158 1.3 nonaka * However I don't suppose anything cares that much!
159 1.3 nonaka * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
160 1.3 nonaka * The copy is aligned with the buffer start (more likely to
161 1.3 nonaka * be a multiple of 8 than the end).
162 1.3 nonaka */
163 1.3 nonaka 10:
164 1.3 nonaka lea -8(%rsi, %rcx, 8), %rsi
165 1.3 nonaka lea -8(%rdi, %rcx, 8), %rdi
166 1.3 nonaka std
167 1.3 nonaka rep
168 1.3 nonaka movsq
169 1.3 nonaka cld
170 1.3 nonaka mov %r15, (%r14) /* write last bytes */
171 1.3 nonaka jmp .Lcopy_done
172 1.3 nonaka #endif
173 1.3 nonaka
174 1.3 nonaka /* Less than 8 bytes to copy, copy by bytes */
175 1.3 nonaka /* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
176 1.3 nonaka * For longer transfers it is 50+ !
177 1.3 nonaka */
178 1.3 nonaka 8: mov %r12, %rcx
179 1.3 nonaka
180 1.3 nonaka #if !defined(NO_OVERLAP)
181 1.3 nonaka cmpq %r12, %r13 /* overlapping? */
182 1.3 nonaka jb 81f
183 1.3 nonaka #endif
184 1.3 nonaka
185 1.3 nonaka /* nope, copy forwards. */
186 1.3 nonaka rep
187 1.3 nonaka movsb
188 1.3 nonaka jmp .Lcopy_done
189 1.3 nonaka
190 1.3 nonaka #if !defined(NO_OVERLAP)
191 1.3 nonaka /* Must copy backwards */
192 1.3 nonaka 81:
193 1.3 nonaka lea -1(%rsi, %rcx), %rsi
194 1.3 nonaka lea -1(%rdi, %rcx), %rdi
195 1.3 nonaka std
196 1.3 nonaka rep
197 1.3 nonaka movsb
198 1.3 nonaka cld
199 1.3 nonaka #endif
200 1.3 nonaka /* End of copy kernel */
201 1.3 nonaka .Lcopy_done:
202 1.3 nonaka
203 1.3 nonaka mov %r8, %rdi /* %rdi: loaded start address */
204 1.3 nonaka mov %r9, %rsi /* %rsi: kernel entry address */
205 1.1 nonaka
206 1.1 nonaka /* Prepare jump address */
207 1.1 nonaka lea (start32a - start)(%rdi), %rax
208 1.1 nonaka movl %eax, (start32r - start)(%rdi)
209 1.1 nonaka
210 1.1 nonaka /* Setup GDT */
211 1.1 nonaka lea (gdt - start)(%rdi), %rax
212 1.1 nonaka mov %rax, (gdtrr - start)(%rdi)
213 1.1 nonaka lgdt (gdtr - start)(%rdi)
214 1.1 nonaka
215 1.1 nonaka /* Jump to set %cs */
216 1.1 nonaka ljmp *(start32r - start)(%rdi)
217 1.1 nonaka
218 1.1 nonaka .align 4
219 1.1 nonaka .code32
220 1.1 nonaka start32a:
221 1.1 nonaka movl $DATA_SEGMENT, %eax
222 1.2 christos movw %ax, %ds
223 1.2 christos movw %ax, %es
224 1.2 christos movw %ax, %fs
225 1.2 christos movw %ax, %gs
226 1.2 christos movw %ax, %ss
227 1.1 nonaka
228 1.1 nonaka movl %edx, %esp
229 1.1 nonaka
230 1.1 nonaka /* Disable Paging in CR0 */
231 1.1 nonaka movl %cr0, %eax
232 1.1 nonaka andl $(~CR0_PG), %eax
233 1.1 nonaka movl %eax, %cr0
234 1.1 nonaka
235 1.1 nonaka /* Disable PAE in CR4 */
236 1.1 nonaka movl %cr4, %eax
237 1.1 nonaka andl $(~CR4_PAE), %eax
238 1.1 nonaka movl %eax, %cr4
239 1.1 nonaka
240 1.1 nonaka jmp start32b
241 1.1 nonaka
242 1.1 nonaka .align 4
243 1.1 nonaka start32b:
244 1.1 nonaka xor %eax, %eax
245 1.1 nonaka call *%esi
246 1.1 nonaka
247 1.1 nonaka .align 16
248 1.1 nonaka start32r:
249 1.1 nonaka .long 0
250 1.1 nonaka .long CODE_SEGMENT
251 1.1 nonaka .align 16
252 1.1 nonaka gdt:
253 1.1 nonaka .long 0, 0
254 1.1 nonaka .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x9f, 0xcf, 0x00
255 1.1 nonaka .byte 0xff, 0xff, 0x00, 0x00, 0x00, 0x93, 0xcf, 0x00
256 1.1 nonaka gdtr:
257 1.1 nonaka .word gdtr - gdt
258 1.1 nonaka gdtrr:
259 1.1 nonaka .quad
260 1.1 nonaka start32end:
261 1.1 nonaka /* Space for the stack */
262 1.1 nonaka .align 16
263 1.1 nonaka .space 8192
264 1.1 nonaka startprog64_end:
265