cpuswitch.S revision 1.47 1 /* $NetBSD: cpuswitch.S,v 1.47 2007/05/17 14:51:15 yamt Exp $ */
2
3 /*
4 * Copyright 2003 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37 /*
38 * Copyright (c) 1994-1998 Mark Brinicombe.
39 * Copyright (c) 1994 Brini.
40 * All rights reserved.
41 *
42 * This code is derived from software written for Brini by Mark Brinicombe
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 * 3. All advertising materials mentioning features or use of this software
53 * must display the following acknowledgement:
54 * This product includes software developed by Brini.
55 * 4. The name of the company nor the name of the author may be used to
56 * endorse or promote products derived from this software without specific
57 * prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
60 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
61 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
62 * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
63 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
64 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
65 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 * RiscBSD kernel project
72 *
73 * cpuswitch.S
74 *
75 * cpu switching functions
76 *
77 * Created : 15/10/94
78 */
79
80 #include "opt_armfpe.h"
81 #include "opt_arm32_pmap.h"
82 #include "opt_multiprocessor.h"
83 #include "opt_lockdebug.h"
84
85 #include "assym.h"
86 #include <arm/arm32/pte.h>
87 #include <machine/param.h>
88 #include <machine/cpu.h>
89 #include <machine/frame.h>
90 #include <machine/asm.h>
91
92 /* LINTSTUB: include <sys/param.h> */
93
94 #undef IRQdisable
95 #undef IRQenable
96
97 /*
98 * New experimental definitions of IRQdisable and IRQenable
99 * These keep FIQ's enabled since FIQ's are special.
100 */
101
102 #define IRQdisable \
103 mrs r14, cpsr ; \
104 orr r14, r14, #(I32_bit) ; \
105 msr cpsr_c, r14 ; \
106
107 #define IRQenable \
108 mrs r14, cpsr ; \
109 bic r14, r14, #(I32_bit) ; \
110 msr cpsr_c, r14 ; \
111
112 /*
113 * These are used for switching the translation table/DACR.
114 * Since the vector page can be invalid for a short time, we must
115 * disable both regular IRQs *and* FIQs.
116 *
117 * XXX: This is not necessary if the vector table is relocated.
118 */
119 #define IRQdisableALL \
120 mrs r14, cpsr ; \
121 orr r14, r14, #(I32_bit | F32_bit) ; \
122 msr cpsr_c, r14
123
124 #define IRQenableALL \
125 mrs r14, cpsr ; \
126 bic r14, r14, #(I32_bit | F32_bit) ; \
127 msr cpsr_c, r14
128
129 .text
130
131 #ifdef MULTIPROCESSOR
132 .Lcpu_info_store:
133 .word _C_LABEL(cpu_info_store)
134 .Lcurlwp:
135 /* FIXME: This is bogus in the general case. */
136 .word _C_LABEL(cpu_info_store) + CI_CURLWP
137
138 .Lcurpcb:
139 .word _C_LABEL(cpu_info_store) + CI_CURPCB
140 #else
141 .Lcurlwp:
142 .word _C_LABEL(curlwp)
143
144 .Lcurpcb:
145 .word _C_LABEL(curpcb)
146 #endif
147
148 .Lcpufuncs:
149 .word _C_LABEL(cpufuncs)
150
151 #ifndef MULTIPROCESSOR
152 .data
153 .global _C_LABEL(curpcb)
154 _C_LABEL(curpcb):
155 .word 0x00000000
156 .text
157 #endif
158
159 .Lblock_userspace_access:
160 .word _C_LABEL(block_userspace_access)
161
162 .Lpmap_kernel_cstate:
163 .word (kernel_pmap_store + PMAP_CSTATE)
164
165 .Llast_cache_state_ptr:
166 .word _C_LABEL(pmap_cache_state)
167
168 /*
169 * struct lwp *
170 * cpu_switchto(struct lwp *current, struct lwp *next)
171 * Switch to the specified next LWP
172 * Arguments:
173 *
174 * r0 'struct lwp *' of the current LWP
175 * r1 'struct lwp *' of the LWP to switch to
176 */
177 ENTRY(cpu_switchto)
178 stmfd sp!, {r4-r7, lr}
179
180 mov r6, r1 /* save new lwp */
181 mov r4, r0 /* save old lwp, it's the return value */
182
183 IRQdisable
184
185 #ifdef MULTIPROCESSOR
186 /* XXX use curcpu() */
187 ldr r0, .Lcpu_info_store
188 str r0, [r6, #(L_CPU)]
189 #else
190 /* l->l_cpu initialized in fork1() for single-processor */
191 #endif
192
193 /* We have a new curlwp now so make a note it */
194 ldr r7, .Lcurlwp
195 str r6, [r7]
196
197 /* Hook in a new pcb */
198 ldr r7, .Lcurpcb
199 ldr r0, [r6, #(L_ADDR)]
200 str r0, [r7]
201
202 /* At this point we can allow IRQ's again. */
203 IRQenable
204
205 /* rem: r4 = old lwp */
206 /* rem: r6 = new lwp */
207 /* rem: interrupts are enabled */
208
209 /*
210 * If the new lwp is the same as the old lwp then we do not need to
211 * save and restore any contexts. This means we can make a quick exit.
212 */
213 teq r4, r6
214 beq .Lswitch_return
215
216 /*
217 * If the old lwp on entry to cpu_switchto was zero then the
218 * process that called it was exiting. This means that we do
219 * not need to save the current context. Instead we can jump
220 * straight to restoring the context for the new process.
221 */
222 teq r4, #0x00000000
223 beq .Lswitch_exited
224
225 /* rem: r4 = old lwp */
226 /* rem: r6 = new lwp */
227 /* rem: interrupts are enabled */
228
229 /* Stage two : Save old context */
230
231 /* Get the user structure for the old lwp. */
232 ldr r1, [r4, #(L_ADDR)]
233
234 /* Save all the registers in the old lwp's pcb */
235 #ifndef __XSCALE__
236 add r7, r1, #(PCB_R8)
237 stmia r7, {r8-r13}
238 #else
239 strd r8, [r1, #(PCB_R8)]
240 strd r10, [r1, #(PCB_R10)]
241 strd r12, [r1, #(PCB_R12)]
242 #endif
243
244 /*
245 * NOTE: We can now use r8-r13 until it is time to restore
246 * them for the new process.
247 */
248
249 /* rem: r1 = old lwp PCB */
250 /* rem: r4 = old lwp */
251 /* rem: r6 = new lwp */
252 /* rem: interrupts are enabled */
253
254 /* Remember the old PCB. */
255 mov r8, r1
256
257 /* r1 now free! */
258
259 /* Get the user structure for the new process in r9 */
260 ldr r9, [r6, #(L_ADDR)]
261
262 /*
263 * This can be optimised... We know we want to go from SVC32
264 * mode to UND32 mode
265 */
266 mrs r3, cpsr
267 bic r2, r3, #(PSR_MODE)
268 orr r2, r2, #(PSR_UND32_MODE | I32_bit)
269 msr cpsr_c, r2
270
271 str sp, [r8, #(PCB_UND_SP)]
272
273 msr cpsr_c, r3 /* Restore the old mode */
274
275 /* What else needs to be saved Only FPA stuff when that is supported */
276
277 /* Third phase : restore saved context */
278
279 /* rem: r4 = old lwp */
280 /* rem: r6 = new lwp */
281 /* rem: r8 = old PCB */
282 /* rem: r9 = new PCB */
283 /* rem: interrupts are enabled */
284
285 /*
286 * Get the new L1 table pointer into r11. If we're switching to
287 * an LWP with the same address space as the outgoing one, we can
288 * skip the cache purge and the TTB load.
289 *
290 * To avoid data dep stalls that would happen anyway, we try
291 * and get some useful work done in the mean time.
292 */
293 ldr r10, [r8, #(PCB_PAGEDIR)] /* r10 = old L1 */
294 ldr r11, [r9, #(PCB_PAGEDIR)] /* r11 = new L1 */
295
296 ldr r0, [r8, #(PCB_DACR)] /* r0 = old DACR */
297 ldr r1, [r9, #(PCB_DACR)] /* r1 = new DACR */
298 ldr r8, [r9, #(PCB_CSTATE)] /* r8 = &new_pmap->pm_cstate */
299 ldr r5, .Llast_cache_state_ptr /* Previous thread's cstate */
300
301 teq r10, r11 /* Same L1? */
302 ldr r5, [r5]
303 cmpeq r0, r1 /* Same DACR? */
304 beq .Lcs_context_switched /* yes! */
305
306 ldr r3, .Lblock_userspace_access
307 mov r12, #0
308 cmp r5, #0 /* No last vm? (switch_exit) */
309 beq .Lcs_cache_purge_skipped /* No, we can skip cache flsh */
310
311 mov r2, #DOMAIN_CLIENT
312 cmp r1, r2, lsl #(PMAP_DOMAIN_KERNEL * 2) /* Sw to kernel thread? */
313 beq .Lcs_cache_purge_skipped /* Yup. Don't flush cache */
314
315 cmp r5, r8 /* Same userland VM space? */
316 ldrneb r12, [r5, #(CS_CACHE_ID)] /* Last VM space cache state */
317
318 /*
319 * We're definately switching to a new userland VM space,
320 * and the previous userland VM space has yet to be flushed
321 * from the cache/tlb.
322 *
323 * r12 holds the previous VM space's cs_cache_id state
324 */
325 tst r12, #0xff /* Test cs_cache_id */
326 beq .Lcs_cache_purge_skipped /* VM space is not in cache */
327
328 /*
329 * Definately need to flush the cache.
330 * Mark the old VM space as NOT being resident in the cache.
331 */
332
333 mov r2, #0x00000000
334 strb r2, [r5, #(CS_CACHE_ID)]
335 strb r2, [r5, #(CS_CACHE_D)]
336
337 .Lcs_cache_purge:
338 /*
339 * Don't allow user space access between the purge and the switch.
340 */
341 mov r2, #0x00000001
342 str r2, [r3]
343
344 stmfd sp!, {r0-r3}
345 ldr r1, .Lcpufuncs
346 mov lr, pc
347 ldr pc, [r1, #CF_IDCACHE_WBINV_ALL]
348 ldmfd sp!, {r0-r3}
349
350 .Lcs_cache_purge_skipped:
351 /* rem: r1 = new DACR */
352 /* rem: r3 = &block_userspace_access */
353 /* rem: r4 = old lwp */
354 /* rem: r5 = &old_pmap->pm_cstate (or NULL) */
355 /* rem: r6 = new lwp */
356 /* rem: r8 = &new_pmap->pm_cstate */
357 /* rem: r9 = new PCB */
358 /* rem: r10 = old L1 */
359 /* rem: r11 = new L1 */
360
361 mov r2, #0x00000000
362 ldr r7, [r9, #(PCB_PL1VEC)]
363
364 /*
365 * At this point we need to kill IRQ's again.
366 *
367 * XXXSCW: Don't need to block FIQs if vectors have been relocated
368 */
369 IRQdisableALL
370
371 /*
372 * Interrupts are disabled so we can allow user space accesses again
373 * as none will occur until interrupts are re-enabled after the
374 * switch.
375 */
376 str r2, [r3]
377
378 /*
379 * Ensure the vector table is accessible by fixing up the L1
380 */
381 cmp r7, #0 /* No need to fixup vector table? */
382 ldrne r2, [r7] /* But if yes, fetch current value */
383 ldrne r0, [r9, #(PCB_L1VEC)] /* Fetch new vector_page value */
384 mcr p15, 0, r1, c3, c0, 0 /* Update DACR for new context */
385 cmpne r2, r0 /* Stuffing the same value? */
386 #ifndef PMAP_INCLUDE_PTE_SYNC
387 strne r0, [r7] /* Nope, update it */
388 #else
389 beq .Lcs_same_vector
390 str r0, [r7] /* Otherwise, update it */
391
392 /*
393 * Need to sync the cache to make sure that last store is
394 * visible to the MMU.
395 */
396 ldr r2, .Lcpufuncs
397 mov r0, r7
398 mov r1, #4
399 mov lr, pc
400 ldr pc, [r2, #CF_DCACHE_WB_RANGE]
401
402 .Lcs_same_vector:
403 #endif /* PMAP_INCLUDE_PTE_SYNC */
404
405 cmp r10, r11 /* Switching to the same L1? */
406 ldr r10, .Lcpufuncs
407 beq .Lcs_same_l1 /* Yup. */
408
409 /*
410 * Do a full context switch, including full TLB flush.
411 */
412 mov r0, r11
413 mov lr, pc
414 ldr pc, [r10, #CF_CONTEXT_SWITCH]
415
416 /*
417 * Mark the old VM space as NOT being resident in the TLB
418 */
419 mov r2, #0x00000000
420 cmp r5, #0
421 strneh r2, [r5, #(CS_TLB_ID)]
422 b .Lcs_context_switched
423
424 /*
425 * We're switching to a different process in the same L1.
426 * In this situation, we only need to flush the TLB for the
427 * vector_page mapping, and even then only if r7 is non-NULL.
428 */
429 .Lcs_same_l1:
430 cmp r7, #0
431 movne r0, #0 /* We *know* vector_page's VA is 0x0 */
432 movne lr, pc
433 ldrne pc, [r10, #CF_TLB_FLUSHID_SE]
434
435 .Lcs_context_switched:
436 /* rem: r8 = &new_pmap->pm_cstate */
437
438 /* XXXSCW: Safe to re-enable FIQs here */
439
440 /*
441 * The new VM space is live in the cache and TLB.
442 * Update its cache/tlb state, and if it's not the kernel
443 * pmap, update the 'last cache state' pointer.
444 */
445 mov r2, #-1
446 ldr r5, .Lpmap_kernel_cstate
447 ldr r0, .Llast_cache_state_ptr
448 str r2, [r8, #(CS_ALL)]
449 cmp r5, r8
450 strne r8, [r0]
451
452 /* rem: r4 = old lwp */
453 /* rem: r6 = new lwp */
454 /* rem: r9 = new PCB */
455
456 /*
457 * This can be optimised... We know we want to go from SVC32
458 * mode to UND32 mode
459 */
460 mrs r3, cpsr
461 bic r2, r3, #(PSR_MODE)
462 orr r2, r2, #(PSR_UND32_MODE)
463 msr cpsr_c, r2
464
465 ldr sp, [r9, #(PCB_UND_SP)]
466
467 msr cpsr_c, r3 /* Restore the old mode */
468
469 /* Restore all the save registers */
470 #ifndef __XSCALE__
471 add r7, r9, #PCB_R8
472 ldmia r7, {r8-r13}
473
474 sub r7, r7, #PCB_R8 /* restore PCB pointer */
475 #else
476 mov r7, r9
477 ldr r8, [r7, #(PCB_R8)]
478 ldr r9, [r7, #(PCB_R9)]
479 ldr r10, [r7, #(PCB_R10)]
480 ldr r11, [r7, #(PCB_R11)]
481 ldr r12, [r7, #(PCB_R12)]
482 ldr r13, [r7, #(PCB_SP)]
483 #endif
484
485 ldr r5, [r6, #(L_PROC)] /* fetch the proc for below */
486
487 /* rem: r4 = old lwp */
488 /* rem: r5 = new lwp's proc */
489 /* rem: r6 = new lwp */
490 /* rem: r7 = new pcb */
491
492 #ifdef ARMFPE
493 add r0, r7, #(USER_SIZE) & 0x00ff
494 add r0, r0, #(USER_SIZE) & 0xff00
495 bl _C_LABEL(arm_fpe_core_changecontext)
496 #endif
497
498 /* We can enable interrupts again */
499 IRQenableALL
500
501 /* rem: r4 = old lwp */
502 /* rem: r5 = new lwp's proc */
503 /* rem: r6 = new lwp */
504 /* rem: r7 = new PCB */
505
506 /*
507 * Check for restartable atomic sequences (RAS).
508 */
509
510 ldr r2, [r5, #(P_RASLIST)]
511 ldr r1, [r7, #(PCB_TF)] /* r1 = trapframe (used below) */
512 teq r2, #0 /* p->p_nras == 0? */
513 bne .Lswitch_do_ras /* no, check for one */
514
515 .Lswitch_return:
516 /* cpu_switchto returns the old lwp */
517 mov r0, r4
518 /* lwp_trampoline expects new lwp as it's second argument */
519 mov r1, r6
520
521 /*
522 * Pull the registers that got pushed when either savectx() or
523 * cpu_switchto() was called and return.
524 */
525 ldmfd sp!, {r4-r7, pc}
526
527 .Lswitch_do_ras:
528 ldr r1, [r1, #(TF_PC)] /* second ras_lookup() arg */
529 mov r0, r5 /* first ras_lookup() arg */
530 bl _C_LABEL(ras_lookup)
531 cmn r0, #1 /* -1 means "not in a RAS" */
532 ldrne r1, [r7, #(PCB_TF)]
533 strne r0, [r1, #(TF_PC)]
534 b .Lswitch_return
535
536 .Lswitch_exited:
537
538 /*
539 * We're about to clear both the cache and the TLB.
540 * Make sure to zap the 'last cache state' pointer since the
541 * pmap might be about to go away. Also ensure the outgoing
542 * VM space's cache state is marked as NOT resident in the
543 * cache.
544 */
545
546 /* rem: r4 = old lwp (NULL) */
547 /* rem: r6 = new lwp */
548 /* rem: interrupts are enabled */
549
550 /*
551 * Load up registers the way .Lcs_purge_cache expects.
552 */
553
554 ldr r3, .Lblock_userspace_access
555 ldr r9, [r6, #(L_ADDR)] /* r9 = new PCB */
556 mrc p15, 0, r10, c2, c0, 0 /* r10 = old L1 */
557 mov r5, #0 /* No previous cache state */
558 ldr r1, [r9, #(PCB_DACR)] /* r1 = new DACR */
559 ldr r8, [r9, #(PCB_CSTATE)] /* r8 = new cache state */
560 ldr r11, [r9, #(PCB_PAGEDIR)] /* r11 = new L1 */
561 b .Lcs_cache_purge
562
563 /* LINTSTUB: Func: void savectx(struct pcb *pcb) */
564 ENTRY(savectx)
565 /*
566 * r0 = pcb
567 */
568
569 /* Push registers.*/
570 stmfd sp!, {r4-r7, lr}
571
572 /* Store all the registers in the process's pcb */
573 #ifndef __XSCALE__
574 add r2, r0, #(PCB_R8)
575 stmia r2, {r8-r13}
576 #else
577 strd r8, [r0, #(PCB_R8)]
578 strd r10, [r0, #(PCB_R10)]
579 strd r12, [r0, #(PCB_R12)]
580 #endif
581
582 /* Pull the regs of the stack */
583 ldmfd sp!, {r4-r7, pc}
584
585 ENTRY(lwp_trampoline)
586 bl _C_LABEL(lwp_startup)
587
588 mov r0, r5
589 mov r1, sp
590 mov lr, pc
591 mov pc, r4
592
593 /* Kill irq's */
594 mrs r0, cpsr
595 orr r0, r0, #(I32_bit)
596 msr cpsr_c, r0
597
598 PULLFRAME
599
600 movs pc, lr /* Exit */
601