cpuswitch.S revision 1.39 1 /* $NetBSD: cpuswitch.S,v 1.39 2003/11/04 10:33:16 dsl Exp $ */
2
3 /*
4 * Copyright 2003 Wasabi Systems, Inc.
5 * All rights reserved.
6 *
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
23 * written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
36 */
37 /*
38 * Copyright (c) 1994-1998 Mark Brinicombe.
39 * Copyright (c) 1994 Brini.
40 * All rights reserved.
41 *
42 * This code is derived from software written for Brini by Mark Brinicombe
43 *
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions
46 * are met:
47 * 1. Redistributions of source code must retain the above copyright
48 * notice, this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright
50 * notice, this list of conditions and the following disclaimer in the
51 * documentation and/or other materials provided with the distribution.
52 * 3. All advertising materials mentioning features or use of this software
53 * must display the following acknowledgement:
54 * This product includes software developed by Brini.
55 * 4. The name of the company nor the name of the author may be used to
56 * endorse or promote products derived from this software without specific
57 * prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
60 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
61 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
62 * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
63 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
64 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
65 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 *
71 * RiscBSD kernel project
72 *
73 * cpuswitch.S
74 *
75 * cpu switching functions
76 *
77 * Created : 15/10/94
78 */
79
80 #include "opt_armfpe.h"
81 #include "opt_arm32_pmap.h"
82 #include "opt_multiprocessor.h"
83 #include "opt_lockdebug.h"
84
85 #include "assym.h"
86 #include <machine/param.h>
87 #include <machine/cpu.h>
88 #include <machine/frame.h>
89 #include <machine/asm.h>
90
91 /* LINTSTUB: include <sys/param.h> */
92
93 #undef IRQdisable
94 #undef IRQenable
95
96 /*
97 * New experimental definitions of IRQdisable and IRQenable
98 * These keep FIQ's enabled since FIQ's are special.
99 */
100
101 #define IRQdisable \
102 mrs r14, cpsr ; \
103 orr r14, r14, #(I32_bit) ; \
104 msr cpsr_c, r14 ; \
105
106 #define IRQenable \
107 mrs r14, cpsr ; \
108 bic r14, r14, #(I32_bit) ; \
109 msr cpsr_c, r14 ; \
110
111 /*
112 * These are used for switching the translation table/DACR.
113 * Since the vector page can be invalid for a short time, we must
114 * disable both regular IRQs *and* FIQs.
115 *
116 * XXX: This is not necessary if the vector table is relocated.
117 */
118 #define IRQdisableALL \
119 mrs r14, cpsr ; \
120 orr r14, r14, #(I32_bit | F32_bit) ; \
121 msr cpsr_c, r14
122
123 #define IRQenableALL \
124 mrs r14, cpsr ; \
125 bic r14, r14, #(I32_bit | F32_bit) ; \
126 msr cpsr_c, r14
127
128 .text
129
130 .Lwhichqs:
131 .word _C_LABEL(sched_whichqs)
132
133 .Lqs:
134 .word _C_LABEL(sched_qs)
135
136 /*
137 * cpuswitch()
138 *
139 * preforms a process context switch.
140 * This function has several entry points
141 */
142
143 #ifdef MULTIPROCESSOR
144 .Lcpu_info_store:
145 .word _C_LABEL(cpu_info_store)
146 .Lcurlwp:
147 /* FIXME: This is bogus in the general case. */
148 .word _C_LABEL(cpu_info_store) + CI_CURLWP
149
150 .Lcurpcb:
151 .word _C_LABEL(cpu_info_store) + CI_CURPCB
152 #else
153 .Lcurlwp:
154 .word _C_LABEL(curlwp)
155
156 .Lcurpcb:
157 .word _C_LABEL(curpcb)
158 #endif
159
160 .Lwant_resched:
161 .word _C_LABEL(want_resched)
162
163 .Lcpufuncs:
164 .word _C_LABEL(cpufuncs)
165
166 #ifndef MULTIPROCESSOR
167 .data
168 .global _C_LABEL(curpcb)
169 _C_LABEL(curpcb):
170 .word 0x00000000
171 .text
172 #endif
173
174 .Lblock_userspace_access:
175 .word _C_LABEL(block_userspace_access)
176
177 .Lcpu_do_powersave:
178 .word _C_LABEL(cpu_do_powersave)
179
180 .Lpmap_kernel_cstate:
181 .word (kernel_pmap_store + PMAP_CSTATE)
182
183 .Llast_cache_state_ptr:
184 .word _C_LABEL(pmap_cache_state)
185
186 /*
187 * Idle loop, exercised while waiting for a process to wake up.
188 *
189 * NOTE: When we jump back to .Lswitch_search, we must have a
190 * pointer to whichqs in r7, which is what it is when we arrive
191 * here.
192 */
193 /* LINTSTUB: Ignore */
194 ASENTRY_NP(idle)
195 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
196 bl _C_LABEL(sched_unlock_idle)
197 #endif
198
199 /* Enable interrupts */
200 IRQenable
201
202 ldr r6, .Lcpu_do_powersave
203
204 /* Lower the spl level to spl0 and get the current spl level. */
205 #ifdef __NEWINTR
206 mov r0, #(IPL_NONE)
207 bl _C_LABEL(_spllower)
208 #else /* ! __NEWINTR */
209 mov r0, #(_SPL_0)
210 bl _C_LABEL(splx)
211 #endif /* __NEWINTR */
212
213 /* Old interrupt level in r0 */
214
215 /* If we don't want to sleep, use a simpler loop. */
216 ldr r6, [r6] /* r6 = cpu_do_powersave */
217 teq r6, #0
218 bne 2f
219
220 /* Non-powersave idle. */
221 1: /* should maybe do uvm pageidlezero stuff here */
222 ldr r3, [r7] /* r3 = whichqs */
223 teq r3, #0x00000000
224 beq 1b
225 adr lr, .Lswitch_search
226 b _C_LABEL(splx) /* Restore ipl, return to switch_search */
227
228 2: /* Powersave idle. */
229 ldr r4, .Lcpufuncs
230 mov r6, r0 /* Preserve old interrupt level */
231
232 3: ldr r3, [r7] /* r3 = whichqs */
233 teq r3, #0x00000000
234 moveq r0, r6
235 adreq lr, .Lswitch_search
236 beq _C_LABEL(splx) /* Restore ipl, return to switch_search */
237
238 /* if saving power, don't want to pageidlezero */
239 mov r0, #0
240 adr lr, 3b
241 ldr pc, [r4, #(CF_SLEEP)]
242 /* loops back around */
243
244
245 /*
246 * Find a new lwp to run, save the current context and
247 * load the new context
248 *
249 * Arguments:
250 * r0 'struct lwp *' of the current LWP
251 */
252
253 ENTRY(cpu_switch)
254 /*
255 * Local register usage. Some of these registers are out of date.
256 * r1 = oldlwp
257 * r2 = spl level
258 * r3 = whichqs
259 * r4 = queue
260 * r5 = &qs[queue]
261 * r6 = newlwp
262 * r7 = scratch
263 */
264 stmfd sp!, {r4-r7, lr}
265
266 /*
267 * Indicate that there is no longer a valid process (curlwp = 0).
268 * Zero the current PCB pointer while we're at it.
269 */
270 ldr r7, .Lcurlwp
271 ldr r6, .Lcurpcb
272 mov r2, #0x00000000
273 str r2, [r7] /* curproc = NULL */
274 str r2, [r6] /* curpcb = NULL */
275
276 /* stash the old proc while we call functions */
277 mov r5, r0
278
279 /* First phase : find a new lwp */
280 ldr r7, .Lwhichqs
281
282 /* rem: r5 = old lwp */
283 /* rem: r7 = &whichqs */
284
285 .Lswitch_search:
286 IRQdisable
287
288 /* Do we have any active queues */
289 ldr r3, [r7]
290
291 /* If not we must idle until we do. */
292 teq r3, #0x00000000
293 beq _ASM_LABEL(idle)
294
295 /* put old proc back in r1 */
296 mov r1, r5
297
298 /* rem: r1 = old lwp */
299 /* rem: r3 = whichqs */
300 /* rem: interrupts are disabled */
301
302 /* used further down, saves SA stall */
303 ldr r6, .Lqs
304
305 /*
306 * We have found an active queue. Currently we do not know which queue
307 * is active just that one of them is.
308 */
309 /* Non-Xscale version of the ffs algorithm devised by d.seal and
310 * posted to comp.sys.arm on 16 Feb 1994.
311 */
312 rsb r5, r3, #0
313 ands r0, r3, r5
314
315 #ifndef __XSCALE__
316 adr r5, .Lcpu_switch_ffs_table
317
318 /* X = R0 */
319 orr r4, r0, r0, lsl #4 /* r4 = X * 0x11 */
320 orr r4, r4, r4, lsl #6 /* r4 = X * 0x451 */
321 rsb r4, r4, r4, lsl #16 /* r4 = X * 0x0450fbaf */
322
323 /* now lookup in table indexed on top 6 bits of a4 */
324 ldrb r4, [ r5, r4, lsr #26 ]
325
326 #else /* __XSCALE__ */
327 clz r4, r0
328 rsb r4, r4, #31
329 #endif /* __XSCALE__ */
330
331 /* rem: r0 = bit mask of chosen queue (1 << r4) */
332 /* rem: r1 = old lwp */
333 /* rem: r3 = whichqs */
334 /* rem: r4 = queue number */
335 /* rem: interrupts are disabled */
336
337 /* Get the address of the queue (&qs[queue]) */
338 add r5, r6, r4, lsl #3
339
340 /*
341 * Get the lwp from the queue and place the next process in
342 * the queue at the head. This basically unlinks the lwp at
343 * the head of the queue.
344 */
345 ldr r6, [r5, #(L_FORW)]
346
347 /* rem: r6 = new lwp */
348 ldr r7, [r6, #(L_FORW)]
349 str r7, [r5, #(L_FORW)]
350
351 /*
352 * Test to see if the queue is now empty. If the head of the queue
353 * points to the queue itself then there are no more lwps in
354 * the queue. We can therefore clear the queue not empty flag held
355 * in r3.
356 */
357
358 teq r5, r7
359 biceq r3, r3, r0
360
361 /* rem: r0 = bit mask of chosen queue (1 << r4) - NOT NEEDED AN MORE */
362
363 /* Fix the back pointer for the lwp now at the head of the queue. */
364 ldr r0, [r6, #(L_BACK)]
365 str r0, [r7, #(L_BACK)]
366
367 /* Update the RAM copy of the queue not empty flags word. */
368 ldreq r7, .Lwhichqs
369 streq r3, [r7]
370
371 /* rem: r1 = old lwp */
372 /* rem: r3 = whichqs - NOT NEEDED ANY MORE */
373 /* rem: r4 = queue number - NOT NEEDED ANY MORE */
374 /* rem: r6 = new lwp */
375 /* rem: interrupts are disabled */
376
377 /* Clear the want_resched flag */
378 ldr r7, .Lwant_resched
379 mov r0, #0x00000000
380 str r0, [r7]
381
382 /*
383 * Clear the back pointer of the lwp we have removed from
384 * the head of the queue. The new lwp is isolated now.
385 */
386 str r0, [r6, #(L_BACK)]
387
388 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
389 /*
390 * unlock the sched_lock, but leave interrupts off, for now.
391 */
392 mov r7, r1
393 bl _C_LABEL(sched_unlock_idle)
394 mov r1, r7
395 #endif
396
397
398 .Lswitch_resume:
399 /* rem: r1 = old lwp */
400 /* rem: r4 = return value [not used if came from cpu_switchto()] */
401 /* rem: r6 = new process */
402 /* rem: interrupts are disabled */
403
404 #ifdef MULTIPROCESSOR
405 /* XXX use curcpu() */
406 ldr r0, .Lcpu_info_store
407 str r0, [r6, #(L_CPU)]
408 #else
409 /* l->l_cpu initialized in fork1() for single-processor */
410 #endif
411
412 /* Process is now on a processor. */
413 mov r0, #LSONPROC /* l->l_stat = LSONPROC */
414 str r0, [r6, #(L_STAT)]
415
416 /* We have a new curlwp now so make a note it */
417 ldr r7, .Lcurlwp
418 str r6, [r7]
419
420 /* Hook in a new pcb */
421 ldr r7, .Lcurpcb
422 ldr r0, [r6, #(L_ADDR)]
423 str r0, [r7]
424
425 /* At this point we can allow IRQ's again. */
426 IRQenable
427
428 /* rem: r1 = old lwp */
429 /* rem: r4 = return value */
430 /* rem: r6 = new process */
431 /* rem: interrupts are enabled */
432
433 /*
434 * If the new process is the same as the process that called
435 * cpu_switch() then we do not need to save and restore any
436 * contexts. This means we can make a quick exit.
437 * The test is simple if curlwp on entry (now in r1) is the
438 * same as the proc removed from the queue we can jump to the exit.
439 */
440 teq r1, r6
441 moveq r4, #0x00000000 /* default to "didn't switch" */
442 beq .Lswitch_return
443
444 /*
445 * At this point, we are guaranteed to be switching to
446 * a new lwp.
447 */
448 mov r4, #0x00000001
449
450 /* Remember the old lwp in r0 */
451 mov r0, r1
452
453 /*
454 * If the old lwp on entry to cpu_switch was zero then the
455 * process that called it was exiting. This means that we do
456 * not need to save the current context. Instead we can jump
457 * straight to restoring the context for the new process.
458 */
459 teq r0, #0x00000000
460 beq .Lswitch_exited
461
462 /* rem: r0 = old lwp */
463 /* rem: r4 = return value */
464 /* rem: r6 = new process */
465 /* rem: interrupts are enabled */
466
467 /* Stage two : Save old context */
468
469 /* Get the user structure for the old lwp. */
470 ldr r1, [r0, #(L_ADDR)]
471
472 /* Save all the registers in the old lwp's pcb */
473 #ifndef __XSCALE__
474 add r7, r1, #(PCB_R8)
475 stmia r7, {r8-r13}
476 #else
477 strd r8, [r1, #(PCB_R8)]
478 strd r10, [r1, #(PCB_R10)]
479 strd r12, [r1, #(PCB_R12)]
480 #endif
481
482 /*
483 * NOTE: We can now use r8-r13 until it is time to restore
484 * them for the new process.
485 */
486
487 /* Remember the old PCB. */
488 mov r8, r1
489
490 /* r1 now free! */
491
492 /* Get the user structure for the new process in r9 */
493 ldr r9, [r6, #(L_ADDR)]
494
495 /*
496 * This can be optimised... We know we want to go from SVC32
497 * mode to UND32 mode
498 */
499 mrs r3, cpsr
500 bic r2, r3, #(PSR_MODE)
501 orr r2, r2, #(PSR_UND32_MODE | I32_bit)
502 msr cpsr_c, r2
503
504 str sp, [r8, #(PCB_UND_SP)]
505
506 msr cpsr_c, r3 /* Restore the old mode */
507
508 /* rem: r0 = old lwp */
509 /* rem: r4 = return value */
510 /* rem: r6 = new process */
511 /* rem: r8 = old PCB */
512 /* rem: r9 = new PCB */
513 /* rem: interrupts are enabled */
514
515 /* What else needs to be saved Only FPA stuff when that is supported */
516
517 /* Third phase : restore saved context */
518
519 /* rem: r0 = old lwp */
520 /* rem: r4 = return value */
521 /* rem: r6 = new lwp */
522 /* rem: r8 = old PCB */
523 /* rem: r9 = new PCB */
524 /* rem: interrupts are enabled */
525
526 /*
527 * Get the new L1 table pointer into r11. If we're switching to
528 * an LWP with the same address space as the outgoing one, we can
529 * skip the cache purge and the TTB load.
530 *
531 * To avoid data dep stalls that would happen anyway, we try
532 * and get some useful work done in the mean time.
533 */
534 ldr r10, [r8, #(PCB_PAGEDIR)] /* r10 = old L1 */
535 ldr r11, [r9, #(PCB_PAGEDIR)] /* r11 = new L1 */
536
537 ldr r0, [r8, #(PCB_DACR)] /* r0 = old DACR */
538 ldr r1, [r9, #(PCB_DACR)] /* r1 = new DACR */
539 ldr r8, [r9, #(PCB_CSTATE)] /* r8 = &new_pmap->pm_cstate */
540 ldr r5, .Llast_cache_state_ptr /* Previous thread's cstate */
541
542 teq r10, r11 /* Same L1? */
543 ldr r5, [r5]
544 cmpeq r0, r1 /* Same DACR? */
545 beq .Lcs_context_switched /* yes! */
546
547 ldr r3, .Lblock_userspace_access
548 mov r12, #0
549 cmp r5, #0 /* No last vm? (switch_exit) */
550 beq .Lcs_cache_purge_skipped /* No, we can skip cache flsh */
551
552 mov r2, #DOMAIN_CLIENT
553 cmp r1, r2, lsl #(PMAP_DOMAIN_KERNEL * 2) /* Sw to kernel thread? */
554 beq .Lcs_cache_purge_skipped /* Yup. Don't flush cache */
555
556 cmp r5, r8 /* Same userland VM space? */
557 ldrneb r12, [r5, #(CS_CACHE_ID)] /* Last VM space cache state */
558
559 /*
560 * We're definately switching to a new userland VM space,
561 * and the previous userland VM space has yet to be flushed
562 * from the cache/tlb.
563 *
564 * r12 holds the previous VM space's cs_cache_id state
565 */
566 tst r12, #0xff /* Test cs_cache_id */
567 beq .Lcs_cache_purge_skipped /* VM space is not in cache */
568
569 /*
570 * Definately need to flush the cache.
571 * Mark the old VM space as NOT being resident in the cache.
572 */
573 mov r2, #0x00000000
574 strb r2, [r5, #(CS_CACHE_ID)]
575 strb r2, [r5, #(CS_CACHE_D)]
576
577 /*
578 * Don't allow user space access between the purge and the switch.
579 */
580 mov r2, #0x00000001
581 str r2, [r3]
582
583 stmfd sp!, {r0-r3}
584 ldr r1, .Lcpufuncs
585 mov lr, pc
586 ldr pc, [r1, #CF_IDCACHE_WBINV_ALL]
587 ldmfd sp!, {r0-r3}
588
589 .Lcs_cache_purge_skipped:
590 /* rem: r1 = new DACR */
591 /* rem: r3 = &block_userspace_access */
592 /* rem: r4 = return value */
593 /* rem: r5 = &old_pmap->pm_cstate (or NULL) */
594 /* rem: r6 = new lwp */
595 /* rem: r8 = &new_pmap->pm_cstate */
596 /* rem: r9 = new PCB */
597 /* rem: r10 = old L1 */
598 /* rem: r11 = new L1 */
599
600 mov r2, #0x00000000
601 ldr r7, [r9, #(PCB_PL1VEC)]
602
603 /*
604 * At this point we need to kill IRQ's again.
605 *
606 * XXXSCW: Don't need to block FIQs if vectors have been relocated
607 */
608 IRQdisableALL
609
610 /*
611 * Interrupts are disabled so we can allow user space accesses again
612 * as none will occur until interrupts are re-enabled after the
613 * switch.
614 */
615 str r2, [r3]
616
617 /*
618 * Ensure the vector table is accessible by fixing up the L1
619 */
620 cmp r7, #0 /* No need to fixup vector table? */
621 ldrne r2, [r7] /* But if yes, fetch current value */
622 ldrne r0, [r9, #(PCB_L1VEC)] /* Fetch new vector_page value */
623 mcr p15, 0, r1, c3, c0, 0 /* Update DACR for new context */
624 cmpne r2, r0 /* Stuffing the same value? */
625 #ifndef PMAP_INCLUDE_PTE_SYNC
626 strne r0, [r7] /* Nope, update it */
627 #else
628 beq .Lcs_same_vector
629 str r0, [r7] /* Otherwise, update it */
630
631 /*
632 * Need to sync the cache to make sure that last store is
633 * visible to the MMU.
634 */
635 ldr r2, .Lcpufuncs
636 mov r0, r7
637 mov r1, #4
638 mov lr, pc
639 ldr pc, [r2, #CF_DCACHE_WB_RANGE]
640
641 .Lcs_same_vector:
642 #endif /* PMAP_INCLUDE_PTE_SYNC */
643
644 cmp r10, r11 /* Switching to the same L1? */
645 ldr r10, .Lcpufuncs
646 beq .Lcs_same_l1 /* Yup. */
647
648 /*
649 * Do a full context switch, including full TLB flush.
650 */
651 mov r0, r11
652 mov lr, pc
653 ldr pc, [r10, #CF_CONTEXT_SWITCH]
654
655 /*
656 * Mark the old VM space as NOT being resident in the TLB
657 */
658 mov r2, #0x00000000
659 cmp r5, #0
660 strneh r2, [r5, #(CS_TLB_ID)]
661 b .Lcs_context_switched
662
663 /*
664 * We're switching to a different process in the same L1.
665 * In this situation, we only need to flush the TLB for the
666 * vector_page mapping, and even then only if r7 is non-NULL.
667 */
668 .Lcs_same_l1:
669 cmp r7, #0
670 movne r0, #0 /* We *know* vector_page's VA is 0x0 */
671 movne lr, pc
672 ldrne pc, [r10, #CF_TLB_FLUSHID_SE]
673
674 .Lcs_context_switched:
675 /* rem: r8 = &new_pmap->pm_cstate */
676
677 /* XXXSCW: Safe to re-enable FIQs here */
678
679 /*
680 * The new VM space is live in the cache and TLB.
681 * Update its cache/tlb state, and if it's not the kernel
682 * pmap, update the 'last cache state' pointer.
683 */
684 mov r2, #-1
685 ldr r5, .Lpmap_kernel_cstate
686 ldr r0, .Llast_cache_state_ptr
687 str r2, [r8, #(CS_ALL)]
688 cmp r5, r8
689 strne r8, [r0]
690
691 /* rem: r4 = return value */
692 /* rem: r6 = new lwp */
693 /* rem: r9 = new PCB */
694
695 /*
696 * This can be optimised... We know we want to go from SVC32
697 * mode to UND32 mode
698 */
699 mrs r3, cpsr
700 bic r2, r3, #(PSR_MODE)
701 orr r2, r2, #(PSR_UND32_MODE)
702 msr cpsr_c, r2
703
704 ldr sp, [r9, #(PCB_UND_SP)]
705
706 msr cpsr_c, r3 /* Restore the old mode */
707
708 /* Restore all the save registers */
709 #ifndef __XSCALE__
710 add r7, r9, #PCB_R8
711 ldmia r7, {r8-r13}
712
713 sub r7, r7, #PCB_R8 /* restore PCB pointer */
714 #else
715 mov r7, r9
716 ldr r8, [r7, #(PCB_R8)]
717 ldr r9, [r7, #(PCB_R9)]
718 ldr r10, [r7, #(PCB_R10)]
719 ldr r11, [r7, #(PCB_R11)]
720 ldr r12, [r7, #(PCB_R12)]
721 ldr r13, [r7, #(PCB_SP)]
722 #endif
723
724 ldr r5, [r6, #(L_PROC)] /* fetch the proc for below */
725
726 /* rem: r4 = return value */
727 /* rem: r5 = new lwp's proc */
728 /* rem: r6 = new lwp */
729 /* rem: r7 = new pcb */
730
731 #ifdef ARMFPE
732 add r0, r7, #(USER_SIZE) & 0x00ff
733 add r0, r0, #(USER_SIZE) & 0xff00
734 bl _C_LABEL(arm_fpe_core_changecontext)
735 #endif
736
737 /* We can enable interrupts again */
738 IRQenableALL
739
740 /* rem: r4 = return value */
741 /* rem: r5 = new lwp's proc */
742 /* rem: r6 = new lwp */
743 /* rem: r7 = new PCB */
744
745 /*
746 * Check for restartable atomic sequences (RAS).
747 */
748
749 ldr r2, [r5, #(P_RASLIST)]
750 ldr r1, [r7, #(PCB_TF)] /* r1 = trapframe (used below) */
751 teq r2, #0 /* p->p_nras == 0? */
752 bne .Lswitch_do_ras /* no, check for one */
753
754 .Lswitch_return:
755 /* cpu_switch returns 1 == switched, 0 == didn't switch */
756 mov r0, r4
757
758 /*
759 * Pull the registers that got pushed when either savectx() or
760 * cpu_switch() was called and return.
761 */
762 ldmfd sp!, {r4-r7, pc}
763
764 .Lswitch_do_ras:
765 ldr r1, [r1, #(TF_PC)] /* second ras_lookup() arg */
766 mov r0, r5 /* first ras_lookup() arg */
767 bl _C_LABEL(ras_lookup)
768 cmn r0, #1 /* -1 means "not in a RAS" */
769 ldrne r1, [r7, #(PCB_TF)]
770 strne r0, [r1, #(TF_PC)]
771 b .Lswitch_return
772
773 .Lswitch_exited:
774 /*
775 * We skip the cache purge because switch_exit() already did it.
776 * Load up registers the way .Lcs_cache_purge_skipped expects.
777 * Userpsace access already blocked by switch_exit().
778 */
779 ldr r9, [r6, #(L_ADDR)] /* r9 = new PCB */
780 ldr r3, .Lblock_userspace_access
781 mrc p15, 0, r10, c2, c0, 0 /* r10 = old L1 */
782 mov r5, #0 /* No previous cache state */
783 ldr r1, [r9, #(PCB_DACR)] /* r1 = new DACR */
784 ldr r8, [r9, #(PCB_CSTATE)] /* r8 = new cache state */
785 ldr r11, [r9, #(PCB_PAGEDIR)] /* r11 = new L1 */
786 b .Lcs_cache_purge_skipped
787
788 /*
789 * cpu_switchto(struct lwp *current, struct lwp *next)
790 * Switch to the specified next LWP
791 * Arguments:
792 *
793 * r0 'struct lwp *' of the current LWP
794 * r1 'struct lwp *' of the LWP to switch to
795 */
796 ENTRY(cpu_switchto)
797 stmfd sp!, {r4-r7, lr}
798
799 mov r6, r1 /* save new lwp */
800
801 #if defined(LOCKDEBUG)
802 mov r5, r0 /* save old lwp */
803 bl _C_LABEL(sched_unlock_idle)
804 mov r1, r5
805 #else
806 mov r1, r0
807 #endif
808
809 IRQdisable
810
811 /*
812 * Okay, set up registers the way cpu_switch() wants them,
813 * and jump into the middle of it (where we bring up the
814 * new process).
815 *
816 * r1 = old lwp (r6 = new lwp)
817 */
818 b .Lswitch_resume
819
820 /*
821 * void switch_exit(struct lwp *l, struct lwp *l0, void (*exit)(struct lwp *));
822 * Switch to lwp0's saved context and deallocate the address space and kernel
823 * stack for l. Then jump into cpu_switch(), as if we were in lwp0 all along.
824 */
825
826 /* LINTSTUB: Func: void switch_exit(struct lwp *l, struct lwp *l0, void (*func)(struct lwp *)) */
827 ENTRY(switch_exit)
828 /*
829 * The process is going away, so we can use callee-saved
830 * registers here without having to save them.
831 */
832
833 mov r4, r0
834 ldr r0, .Lcurlwp
835
836 mov r5, r1
837 ldr r1, .Lblock_userspace_access
838
839 mov r6, r2
840
841 /*
842 * r4 = lwp
843 * r5 = lwp0
844 * r6 = exit func
845 */
846
847 mov r2, #0x00000000 /* curlwp = NULL */
848 str r2, [r0]
849
850 /*
851 * We're about to clear both the cache and the TLB.
852 * Make sure to zap the 'last cache state' pointer since the
853 * pmap might be about to go away. Also ensure the outgoing
854 * VM space's cache state is marked as NOT resident in the
855 * cache, and that lwp0's cache state IS resident.
856 */
857 ldr r7, [r4, #(L_ADDR)] /* r7 = old lwp's PCB */
858 ldr r0, .Llast_cache_state_ptr /* Last userland cache state */
859 ldr r9, [r7, #(PCB_CSTATE)] /* Fetch cache state pointer */
860 ldr r3, [r5, #(L_ADDR)] /* r3 = lwp0's PCB */
861 str r2, [r0] /* No previous cache state */
862 str r2, [r9, #(CS_ALL)] /* Zap old lwp's cache state */
863 ldr r3, [r3, #(PCB_CSTATE)] /* lwp0's cache state */
864 mov r2, #-1
865 str r2, [r3, #(CS_ALL)] /* lwp0 is in da cache! */
866
867 /*
868 * Don't allow user space access between the purge and the switch.
869 */
870 mov r2, #0x00000001
871 str r2, [r1]
872
873 /* Switch to lwp0 context */
874
875 ldr r9, .Lcpufuncs
876 mov lr, pc
877 ldr pc, [r9, #CF_IDCACHE_WBINV_ALL]
878
879 ldr r0, [r7, #(PCB_PL1VEC)]
880 ldr r1, [r7, #(PCB_DACR)]
881
882 /*
883 * r0 = Pointer to L1 slot for vector_page (or NULL)
884 * r1 = lwp0's DACR
885 * r4 = lwp we're switching from
886 * r5 = lwp0
887 * r6 = exit func
888 * r7 = lwp0's PCB
889 * r9 = cpufuncs
890 */
891
892 IRQdisableALL
893
894 /*
895 * Ensure the vector table is accessible by fixing up lwp0's L1
896 */
897 cmp r0, #0 /* No need to fixup vector table? */
898 ldrne r3, [r0] /* But if yes, fetch current value */
899 ldrne r2, [r7, #(PCB_L1VEC)] /* Fetch new vector_page value */
900 mcr p15, 0, r1, c3, c0, 0 /* Update DACR for lwp0's context */
901 cmpne r3, r2 /* Stuffing the same value? */
902 strne r2, [r0] /* Store if not. */
903
904 #ifdef PMAP_INCLUDE_PTE_SYNC
905 /*
906 * Need to sync the cache to make sure that last store is
907 * visible to the MMU.
908 */
909 movne r1, #4
910 movne lr, pc
911 ldrne pc, [r9, #CF_DCACHE_WB_RANGE]
912 #endif /* PMAP_INCLUDE_PTE_SYNC */
913
914 /*
915 * Note: We don't do the same optimisation as cpu_switch() with
916 * respect to avoiding flushing the TLB if we're switching to
917 * the same L1 since this process' VM space may be about to go
918 * away, so we don't want *any* turds left in the TLB.
919 */
920
921 /* Switch the memory to the new process */
922 ldr r0, [r7, #(PCB_PAGEDIR)]
923 mov lr, pc
924 ldr pc, [r9, #CF_CONTEXT_SWITCH]
925
926 ldr r0, .Lcurpcb
927
928 /* Restore all the save registers */
929 #ifndef __XSCALE__
930 add r1, r7, #PCB_R8
931 ldmia r1, {r8-r13}
932 #else
933 ldr r8, [r7, #(PCB_R8)]
934 ldr r9, [r7, #(PCB_R9)]
935 ldr r10, [r7, #(PCB_R10)]
936 ldr r11, [r7, #(PCB_R11)]
937 ldr r12, [r7, #(PCB_R12)]
938 ldr r13, [r7, #(PCB_SP)]
939 #endif
940 str r7, [r0] /* curpcb = lwp0's PCB */
941
942 IRQenableALL
943
944 /*
945 * Schedule the vmspace and stack to be freed.
946 */
947 mov r0, r4 /* {lwp_}exit2(l) */
948 mov lr, pc
949 mov pc, r6
950
951 ldr r7, .Lwhichqs /* r7 = &whichqs */
952 mov r5, #0x00000000 /* r5 = old lwp = NULL */
953 b .Lswitch_search
954
955 /* LINTSTUB: Func: void savectx(struct pcb *pcb) */
956 ENTRY(savectx)
957 /*
958 * r0 = pcb
959 */
960
961 /* Push registers.*/
962 stmfd sp!, {r4-r7, lr}
963
964 /* Store all the registers in the process's pcb */
965 #ifndef __XSCALE__
966 add r2, r0, #(PCB_R8)
967 stmia r2, {r8-r13}
968 #else
969 strd r8, [r0, #(PCB_R8)]
970 strd r10, [r0, #(PCB_R10)]
971 strd r12, [r0, #(PCB_R12)]
972 #endif
973
974 /* Pull the regs of the stack */
975 ldmfd sp!, {r4-r7, pc}
976
977 ENTRY(proc_trampoline)
978 #ifdef __NEWINTR
979 mov r0, #(IPL_NONE)
980 bl _C_LABEL(_spllower)
981 #else /* ! __NEWINTR */
982 mov r0, #(_SPL_0)
983 bl _C_LABEL(splx)
984 #endif /* __NEWINTR */
985
986 #ifdef MULTIPROCESSOR
987 bl _C_LABEL(proc_trampoline_mp)
988 #endif
989 mov r0, r5
990 mov r1, sp
991 mov lr, pc
992 mov pc, r4
993
994 /* Kill irq's */
995 mrs r0, cpsr
996 orr r0, r0, #(I32_bit)
997 msr cpsr_c, r0
998
999 PULLFRAME
1000
1001 movs pc, lr /* Exit */
1002
1003 #ifndef __XSCALE__
1004 .type .Lcpu_switch_ffs_table, _ASM_TYPE_OBJECT;
1005 .Lcpu_switch_ffs_table:
1006 /* same as ffs table but all nums are -1 from that */
1007 /* 0 1 2 3 4 5 6 7 */
1008 .byte 0, 0, 1, 12, 2, 6, 0, 13 /* 0- 7 */
1009 .byte 3, 0, 7, 0, 0, 0, 0, 14 /* 8-15 */
1010 .byte 10, 4, 0, 0, 8, 0, 0, 25 /* 16-23 */
1011 .byte 0, 0, 0, 0, 0, 21, 27, 15 /* 24-31 */
1012 .byte 31, 11, 5, 0, 0, 0, 0, 0 /* 32-39 */
1013 .byte 9, 0, 0, 24, 0, 0, 20, 26 /* 40-47 */
1014 .byte 30, 0, 0, 0, 0, 23, 0, 19 /* 48-55 */
1015 .byte 29, 0, 22, 18, 28, 17, 16, 0 /* 56-63 */
1016 #endif /* !__XSCALE_ */
1017