Home | History | Annotate | Line # | Download | only in arm32
cpuswitch.S revision 1.32
      1 /*	$NetBSD: cpuswitch.S,v 1.32 2003/04/26 17:50:21 chris Exp $	*/
      2 
      3 /*
      4  * Copyright 2003 Wasabi Systems, Inc.
      5  * All rights reserved.
      6  *
      7  * Written by Steve C. Woodford for Wasabi Systems, Inc.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *      This product includes software developed for the NetBSD Project by
     20  *      Wasabi Systems, Inc.
     21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
     22  *    or promote products derived from this software without specific prior
     23  *    written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
     26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 /*
     38  * Copyright (c) 1994-1998 Mark Brinicombe.
     39  * Copyright (c) 1994 Brini.
     40  * All rights reserved.
     41  *
     42  * This code is derived from software written for Brini by Mark Brinicombe
     43  *
     44  * Redistribution and use in source and binary forms, with or without
     45  * modification, are permitted provided that the following conditions
     46  * are met:
     47  * 1. Redistributions of source code must retain the above copyright
     48  *    notice, this list of conditions and the following disclaimer.
     49  * 2. Redistributions in binary form must reproduce the above copyright
     50  *    notice, this list of conditions and the following disclaimer in the
     51  *    documentation and/or other materials provided with the distribution.
     52  * 3. All advertising materials mentioning features or use of this software
     53  *    must display the following acknowledgement:
     54  *	This product includes software developed by Brini.
     55  * 4. The name of the company nor the name of the author may be used to
     56  *    endorse or promote products derived from this software without specific
     57  *    prior written permission.
     58  *
     59  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
     60  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     61  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     62  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
     63  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     64  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     65  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     66  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     67  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     68  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     69  * SUCH DAMAGE.
     70  *
     71  * RiscBSD kernel project
     72  *
     73  * cpuswitch.S
     74  *
     75  * cpu switching functions
     76  *
     77  * Created      : 15/10/94
     78  */
     79 
     80 #include "opt_armfpe.h"
     81 #include "opt_arm32_pmap.h"
     82 #include "opt_multiprocessor.h"
     83 
     84 #include "assym.h"
     85 #include <machine/param.h>
     86 #include <machine/cpu.h>
     87 #include <machine/frame.h>
     88 #include <machine/asm.h>
     89 
     90 #undef IRQdisable
     91 #undef IRQenable
     92 
     93 /*
     94  * New experimental definitions of IRQdisable and IRQenable
     95  * These keep FIQ's enabled since FIQ's are special.
     96  */
     97 
     98 #define IRQdisable \
     99 	mrs	r14, cpsr ; \
    100 	orr	r14, r14, #(I32_bit) ; \
    101 	msr	cpsr_c, r14 ; \
    102 
    103 #define IRQenable \
    104 	mrs	r14, cpsr ; \
    105 	bic	r14, r14, #(I32_bit) ; \
    106 	msr	cpsr_c, r14 ; \
    107 
    108 #ifdef ARM32_PMAP_NEW
    109 /*
    110  * These are used for switching the translation table/DACR.
    111  * Since the vector page can be invalid for a short time, we must
    112  * disable both regular IRQs *and* FIQs.
    113  *
    114  * XXX: This is not necessary if the vector table is relocated.
    115  */
    116 #define IRQdisableALL \
    117 	mrs	r14, cpsr ; \
    118 	orr	r14, r14, #(I32_bit | F32_bit) ; \
    119 	msr	cpsr_c, r14
    120 
    121 #define IRQenableALL \
    122 	mrs	r14, cpsr ; \
    123 	bic	r14, r14, #(I32_bit | F32_bit) ; \
    124 	msr	cpsr_c, r14
    125 #endif
    126 
    127 	.text
    128 
    129 .Lwhichqs:
    130 	.word	_C_LABEL(sched_whichqs)
    131 
    132 .Lqs:
    133 	.word	_C_LABEL(sched_qs)
    134 
    135 /*
    136  * cpuswitch()
    137  *
    138  * preforms a process context switch.
    139  * This function has several entry points
    140  */
    141 
    142 #ifdef MULTIPROCESSOR
    143 .Lcpu_info_store:
    144 	.word	_C_LABEL(cpu_info_store)
    145 .Lcurlwp:
    146 	/* FIXME: This is bogus in the general case. */
    147 	.word	_C_LABEL(cpu_info_store) + CI_CURLWP
    148 
    149 .Lcurpcb:
    150 	.word	_C_LABEL(cpu_info_store) + CI_CURPCB
    151 #else
    152 .Lcurlwp:
    153 	.word	_C_LABEL(curlwp)
    154 
    155 .Lcurpcb:
    156 	.word	_C_LABEL(curpcb)
    157 #endif
    158 
    159 .Lwant_resched:
    160 	.word	_C_LABEL(want_resched)
    161 
    162 .Lcpufuncs:
    163 	.word	_C_LABEL(cpufuncs)
    164 
    165 #ifndef MULTIPROCESSOR
    166 	.data
    167 	.global	_C_LABEL(curpcb)
    168 _C_LABEL(curpcb):
    169 	.word	0x00000000
    170 	.text
    171 #endif
    172 
    173 .Lblock_userspace_access:
    174 	.word	_C_LABEL(block_userspace_access)
    175 
    176 .Lcpu_do_powersave:
    177 	.word	_C_LABEL(cpu_do_powersave)
    178 
    179 #ifdef ARM32_PMAP_NEW
    180 .Lpmap_kernel_cstate:
    181 	.word	(kernel_pmap_store + PMAP_CSTATE)
    182 
    183 .Llast_cache_state_ptr:
    184 	.word	_C_LABEL(pmap_cache_state)
    185 #endif
    186 
    187 /*
    188  * Idle loop, exercised while waiting for a process to wake up.
    189  *
    190  * NOTE: When we jump back to .Lswitch_search, we must have a
    191  * pointer to whichqs in r7, which is what it is when we arrive
    192  * here.
    193  */
    194 /* LINTSTUB: Ignore */
    195 ASENTRY_NP(idle)
    196 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
    197 	bl	_C_LABEL(sched_unlock_idle)
    198 #endif
    199 	ldr	r3, .Lcpu_do_powersave
    200 
    201 	/* Enable interrupts */
    202 	IRQenable
    203 
    204 	/* If we don't want to sleep, use a simpler loop. */
    205 	ldr	r3, [r3]		/* r3 = cpu_do_powersave */
    206 	teq	r3, #0
    207 	bne	2f
    208 
    209 	/* Non-powersave idle. */
    210 1:	/* should maybe do uvm pageidlezero stuff here */
    211 	ldr	r3, [r7]		/* r3 = whichqs */
    212 	teq	r3, #0x00000000
    213 	bne	.Lswitch_search
    214 	b	1b
    215 
    216 2:	/* Powersave idle. */
    217 	ldr	r4, .Lcpufuncs
    218 3:	ldr	r3, [r7]		/* r3 = whichqs */
    219 	teq	r3, #0x00000000
    220 	bne	.Lswitch_search
    221 
    222 	/* if saving power, don't want to pageidlezero */
    223 	mov	r0, #0
    224 	adr	lr, 3b
    225 	ldr	pc, [r4, #(CF_SLEEP)]
    226 	/* loops back around */
    227 
    228 
    229 /*
    230  * Find a new lwp to run, save the current context and
    231  * load the new context
    232  *
    233  * Arguments:
    234  *	r0	'struct lwp *' of the current LWP
    235  */
    236 
    237 ENTRY(cpu_switch)
    238 /*
    239  * Local register usage. Some of these registers are out of date.
    240  * r1 = oldlwp
    241  * r2 = spl level
    242  * r3 = whichqs
    243  * r4 = queue
    244  * r5 = &qs[queue]
    245  * r6 = newlwp
    246  * r7 = scratch
    247  */
    248 	stmfd	sp!, {r4-r7, lr}
    249 
    250 	/*
    251 	 * Indicate that there is no longer a valid process (curlwp = 0).
    252 	 * Zero the current PCB pointer while we're at it.
    253 	 */
    254 	ldr	r7, .Lcurlwp
    255 	ldr	r6, .Lcurpcb
    256 	mov	r2, #0x00000000
    257 	str	r2, [r7]		/* curproc = NULL */
    258 	str	r2, [r6]		/* curpcb = NULL */
    259 
    260 	/* stash the old proc while we call functions */
    261 	mov	r5, r0
    262 
    263 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
    264 	/* release the sched_lock before handling interrupts */
    265 	bl	_C_LABEL(sched_unlock_idle)
    266 #endif
    267 
    268 	/* Lower the spl level to spl0 and get the current spl level. */
    269 #ifdef __NEWINTR
    270 	mov	r0, #(IPL_NONE)
    271 	bl	_C_LABEL(_spllower)
    272 #else /* ! __NEWINTR */
    273 #ifdef spl0
    274 	mov	r0, #(_SPL_0)
    275 	bl	_C_LABEL(splx)
    276 #else
    277 	bl	_C_LABEL(spl0)
    278 #endif /* spl0 */
    279 #endif /* __NEWINTR */
    280 
    281 	/* Push the old spl level onto the stack */
    282 	str	r0, [sp, #-0x0004]!
    283 
    284 	/* First phase : find a new lwp */
    285 
    286 	ldr	r7, .Lwhichqs
    287 
    288 	/* rem: r5 = old lwp */
    289 	/* rem: r7 = &whichqs */
    290 
    291 .Lswitch_search:
    292 	IRQdisable
    293 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
    294 	bl	_C_LABEL(sched_lock_idle)
    295 #endif
    296 
    297 	/* Do we have any active queues  */
    298 	ldr	r3, [r7]
    299 
    300 	/* If not we must idle until we do. */
    301 	teq	r3, #0x00000000
    302 	beq	_ASM_LABEL(idle)
    303 
    304 	/* put old proc back in r1 */
    305 	mov	r1, r5
    306 
    307 	/* rem: r1 = old lwp */
    308 	/* rem: r3 = whichqs */
    309 	/* rem: interrupts are disabled */
    310 
    311 	/*
    312 	 * We have found an active queue. Currently we do not know which queue
    313 	 * is active just that one of them is.
    314 	 */
    315 	/* this is the ffs algorithm devised by d.seal and posted to
    316 	 * comp.sys.arm on 16 Feb 1994.
    317 	 */
    318  	rsb	r5, r3, #0
    319  	ands	r0, r3, r5
    320 
    321 	adr	r5, .Lcpu_switch_ffs_table
    322 
    323 				    /* X = R0 */
    324 	orr	r4, r0, r0, lsl #4  /* r4 = X * 0x11 */
    325 	orr	r4, r4, r4, lsl #6  /* r4 = X * 0x451 */
    326 	rsb	r4, r4, r4, lsl #16 /* r4 = X * 0x0450fbaf */
    327 
    328 	/* used further down, saves SA stall */
    329 	ldr	r6, .Lqs
    330 
    331 	/* now lookup in table indexed on top 6 bits of a4 */
    332 	ldrb	r4, [ r5, r4, lsr #26 ]
    333 
    334 	/* rem: r0 = bit mask of chosen queue (1 << r4) */
    335 	/* rem: r1 = old lwp */
    336 	/* rem: r3 = whichqs */
    337 	/* rem: r4 = queue number */
    338 	/* rem: interrupts are disabled */
    339 
    340 	/* Get the address of the queue (&qs[queue]) */
    341 	add	r5, r6, r4, lsl #3
    342 
    343 	/*
    344 	 * Get the lwp from the queue and place the next process in
    345 	 * the queue at the head. This basically unlinks the lwp at
    346 	 * the head of the queue.
    347 	 */
    348 	ldr	r6, [r5, #(L_FORW)]
    349 
    350 	/* rem: r6 = new lwp */
    351 	ldr	r7, [r6, #(L_FORW)]
    352 	str	r7, [r5, #(L_FORW)]
    353 
    354 	/*
    355 	 * Test to see if the queue is now empty. If the head of the queue
    356 	 * points to the queue itself then there are no more lwps in
    357 	 * the queue. We can therefore clear the queue not empty flag held
    358 	 * in r3.
    359 	 */
    360 
    361 	teq	r5, r7
    362 	biceq	r3, r3, r0
    363 
    364 	/* rem: r0 = bit mask of chosen queue (1 << r4) - NOT NEEDED AN MORE */
    365 
    366 	/* Fix the back pointer for the lwp now at the head of the queue. */
    367 	ldr	r0, [r6, #(L_BACK)]
    368 	str	r0, [r7, #(L_BACK)]
    369 
    370 	/* Update the RAM copy of the queue not empty flags word. */
    371 	ldr	r7, .Lwhichqs
    372 	str	r3, [r7]
    373 
    374 	/* rem: r1 = old lwp */
    375 	/* rem: r3 = whichqs - NOT NEEDED ANY MORE */
    376 	/* rem: r4 = queue number - NOT NEEDED ANY MORE */
    377 	/* rem: r6 = new lwp */
    378 	/* rem: interrupts are disabled */
    379 
    380 	/* Clear the want_resched flag */
    381 	ldr	r7, .Lwant_resched
    382 	mov	r0, #0x00000000
    383 	str	r0, [r7]
    384 
    385 	/*
    386 	 * Clear the back pointer of the lwp we have removed from
    387 	 * the head of the queue. The new lwp is isolated now.
    388 	 */
    389 	str	r0, [r6, #(L_BACK)]
    390 
    391 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
    392 	/*
    393 	 * unlock the sched_lock, but leave interrupts off, for now.
    394 	 */
    395 	mov	r7, r1
    396 	bl	_C_LABEL(sched_unlock_idle)
    397 	mov	r1, r7
    398 #endif
    399 
    400 .Lswitch_resume:
    401 #ifdef MULTIPROCESSOR
    402 	/* XXX use curcpu() */
    403 	ldr	r0, .Lcpu_info_store
    404 	str	r0, [r6, #(L_CPU)]
    405 #else
    406 	/* l->l_cpu initialized in fork1() for single-processor */
    407 #endif
    408 
    409 	/* Process is now on a processor. */
    410 	mov	r0, #LSONPROC			/* l->l_stat = LSONPROC */
    411 	str	r0, [r6, #(L_STAT)]
    412 
    413 	/* We have a new curlwp now so make a note it */
    414 	ldr	r7, .Lcurlwp
    415 	str	r6, [r7]
    416 
    417 	/* Hook in a new pcb */
    418 	ldr	r7, .Lcurpcb
    419 	ldr	r0, [r6, #(L_ADDR)]
    420 	str	r0, [r7]
    421 
    422 	/* At this point we can allow IRQ's again. */
    423 	IRQenable
    424 
    425 	/* rem: r1 = old lwp */
    426 	/* rem: r4 = return value */
    427 	/* rem: r6 = new process */
    428 	/* rem: interrupts are enabled */
    429 
    430 	/*
    431 	 * If the new process is the same as the process that called
    432 	 * cpu_switch() then we do not need to save and restore any
    433 	 * contexts. This means we can make a quick exit.
    434 	 * The test is simple if curlwp on entry (now in r1) is the
    435 	 * same as the proc removed from the queue we can jump to the exit.
    436 	 */
    437 	teq	r1, r6
    438 	moveq	r4, #0x00000000		/* default to "didn't switch" */
    439 	beq	.Lswitch_return
    440 
    441 	/*
    442 	 * At this point, we are guaranteed to be switching to
    443 	 * a new lwp.
    444 	 */
    445 	mov	r4, #0x00000001
    446 
    447 	/* Remember the old lwp in r0 */
    448 	mov	r0, r1
    449 
    450 	/*
    451 	 * If the old lwp on entry to cpu_switch was zero then the
    452 	 * process that called it was exiting. This means that we do
    453 	 * not need to save the current context. Instead we can jump
    454 	 * straight to restoring the context for the new process.
    455 	 */
    456 	teq	r0, #0x00000000
    457 	beq	.Lswitch_exited
    458 
    459 	/* rem: r0 = old lwp */
    460 	/* rem: r4 = return value */
    461 	/* rem: r6 = new process */
    462 	/* rem: interrupts are enabled */
    463 
    464 	/* Stage two : Save old context */
    465 
    466 	/* Get the user structure for the old lwp. */
    467 	ldr	r1, [r0, #(L_ADDR)]
    468 
    469 	/* Save all the registers in the old lwp's pcb */
    470 	add	r7, r1, #(PCB_R8)
    471 	stmia	r7, {r8-r13}
    472 
    473 	/*
    474 	 * NOTE: We can now use r8-r13 until it is time to restore
    475 	 * them for the new process.
    476 	 */
    477 
    478 	/* Remember the old PCB. */
    479 	mov	r8, r1
    480 
    481 	/* r1 now free! */
    482 
    483 	/* Get the user structure for the new process in r9 */
    484 	ldr	r9, [r6, #(L_ADDR)]
    485 
    486 	/*
    487 	 * This can be optimised... We know we want to go from SVC32
    488 	 * mode to UND32 mode
    489 	 */
    490         mrs	r3, cpsr
    491 	bic	r2, r3, #(PSR_MODE)
    492 	orr	r2, r2, #(PSR_UND32_MODE | I32_bit)
    493         msr	cpsr_c, r2
    494 
    495 	str	sp, [r8, #(PCB_UND_SP)]
    496 
    497         msr	cpsr_c, r3		/* Restore the old mode */
    498 
    499 	/* rem: r0 = old lwp */
    500 	/* rem: r4 = return value */
    501 	/* rem: r6 = new process */
    502 	/* rem: r8 = old PCB */
    503 	/* rem: r9 = new PCB */
    504 	/* rem: interrupts are enabled */
    505 
    506 	/* What else needs to be saved  Only FPA stuff when that is supported */
    507 
    508 	/* Third phase : restore saved context */
    509 
    510 	/* rem: r0 = old lwp */
    511 	/* rem: r4 = return value */
    512 	/* rem: r6 = new lwp */
    513 	/* rem: r8 = old PCB */
    514 	/* rem: r9 = new PCB */
    515 	/* rem: interrupts are enabled */
    516 
    517 	/*
    518 	 * Get the new L1 table pointer into r11.  If we're switching to
    519 	 * an LWP with the same address space as the outgoing one, we can
    520 	 * skip the cache purge and the TTB load.
    521 	 *
    522 	 * To avoid data dep stalls that would happen anyway, we try
    523 	 * and get some useful work done in the mean time.
    524 	 */
    525 	ldr	r10, [r8, #(PCB_PAGEDIR)]	/* r10 = old L1 */
    526 	ldr	r11, [r9, #(PCB_PAGEDIR)]	/* r11 = new L1 */
    527 
    528 #ifndef ARM32_PMAP_NEW
    529 	ldr	r3, .Lblock_userspace_access
    530 	mov	r1, #0x00000001
    531 	mov	r2, #0x00000000
    532 	teq	r10, r11			/* r10 == r11? */
    533 	beq	.Lcs_context_switched		/* yes! */
    534 
    535 	/*
    536 	 * Don't allow user space access between the purge and the switch.
    537 	 */
    538 	ldr	r3, .Lblock_userspace_access
    539 	mov	r1, #0x00000001
    540 	mov	r2, #0x00000000
    541 	str	r1, [r3]
    542 
    543 	stmfd	sp!, {r0-r3}
    544 	ldr	r1, .Lcpufuncs
    545 	mov	lr, pc
    546 	ldr	pc, [r1, #CF_IDCACHE_WBINV_ALL]
    547 	ldmfd	sp!, {r0-r3}
    548 
    549 .Lcs_cache_purge_skipped:
    550 	/* At this point we need to kill IRQ's again. */
    551 	IRQdisable
    552 
    553 	/* rem: r2 = 0 */
    554 	/* rem: r3 = &block_userspace_access */
    555 	/* rem: r4 = return value */
    556 	/* rem: r6 = new lwp */
    557 	/* rem: r9 = new PCB */
    558 	/* rem: r11 == new L1 */
    559 
    560 	/*
    561 	 * Interrupts are disabled so we can allow user space accesses again
    562 	 * as none will occur until interrupts are re-enabled after the
    563 	 * switch.
    564 	 */
    565 	str	r2, [r3]
    566 
    567 	/* Switch the memory to the new process */
    568 	ldr	r3, .Lcpufuncs
    569 	mov	r0, r11
    570 	mov	lr, pc
    571 	ldr	pc, [r3, #CF_CONTEXT_SWITCH]
    572 
    573 .Lcs_context_switched:
    574 
    575 
    576 #else	/* ARM32_PMAP_NEW */
    577 
    578 	ldr	r0, [r8, #(PCB_DACR)]		/* r0 = old DACR */
    579 	ldr	r1, [r9, #(PCB_DACR)]		/* r1 = new DACR */
    580 	ldr	r8, [r9, #(PCB_CSTATE)]		/* r8 = &new_pmap->pm_cstate */
    581 	ldr	r5, .Llast_cache_state_ptr	/* Previous thread's cstate */
    582 
    583 	teq	r10, r11			/* Same L1? */
    584 	ldr	r5, [r5]
    585 	cmpeq	r0, r1				/* Same DACR? */
    586 	beq	.Lcs_context_switched		/* yes! */
    587 
    588 	ldr	r3, .Lblock_userspace_access
    589 	mov	r12, #0
    590 	cmp	r5, #0				/* No last vm? (switch_exit) */
    591 	beq	.Lcs_cache_purge_skipped	/* No, we can skip cache flsh */
    592 
    593 	mov	r2, #DOMAIN_CLIENT
    594 	cmp	r1, r2, lsl #(PMAP_DOMAIN_KERNEL * 2) /* Sw to kernel thread? */
    595 	beq	.Lcs_cache_purge_skipped	/* Yup. Don't flush cache */
    596 
    597 	cmp	r5, r8				/* Same userland VM space? */
    598 	ldrneb	r12, [r5, #(CS_CACHE_ID)]	/* Last VM space cache state */
    599 
    600 	/*
    601 	 * We're definately switching to a new userland VM space,
    602 	 * and the previous userland VM space has yet to be flushed
    603 	 * from the cache/tlb.
    604 	 *
    605 	 * r12 holds the previous VM space's cs_cache_id state
    606 	 */
    607 	tst	r12, #0xff			/* Test cs_cache_id */
    608 	beq	.Lcs_cache_purge_skipped	/* VM space is not in cache */
    609 
    610 	/*
    611 	 * Definately need to flush the cache.
    612 	 * Mark the old VM space as NOT being resident in the cache.
    613 	 */
    614 	mov	r2, #0x00000000
    615 	strb	r2, [r5, #(CS_CACHE_ID)]
    616 	strb	r2, [r5, #(CS_CACHE_D)]
    617 
    618 	/*
    619 	 * Don't allow user space access between the purge and the switch.
    620 	 */
    621 	mov	r2, #0x00000001
    622 	str	r2, [r3]
    623 
    624 	stmfd	sp!, {r0-r3}
    625 	ldr	r1, .Lcpufuncs
    626 	mov	lr, pc
    627 	ldr	pc, [r1, #CF_IDCACHE_WBINV_ALL]
    628 	ldmfd	sp!, {r0-r3}
    629 
    630 .Lcs_cache_purge_skipped:
    631 	/* rem: r1 = new DACR */
    632 	/* rem: r3 = &block_userspace_access */
    633 	/* rem: r4 = return value */
    634 	/* rem: r5 = &old_pmap->pm_cstate (or NULL) */
    635 	/* rem: r6 = new lwp */
    636 	/* rem: r8 = &new_pmap->pm_cstate */
    637 	/* rem: r9 = new PCB */
    638 	/* rem: r10 = old L1 */
    639 	/* rem: r11 = new L1 */
    640 
    641 	mov	r2, #0x00000000
    642 	ldr	r7, [r9, #(PCB_PL1VEC)]
    643 
    644 	/*
    645 	 * At this point we need to kill IRQ's again.
    646 	 *
    647 	 * XXXSCW: Don't need to block FIQs if vectors have been relocated
    648 	 */
    649 	IRQdisableALL
    650 
    651 	/*
    652 	 * Interrupts are disabled so we can allow user space accesses again
    653 	 * as none will occur until interrupts are re-enabled after the
    654 	 * switch.
    655 	 */
    656 	str	r2, [r3]
    657 
    658 	/*
    659 	 * Ensure the vector table is accessible by fixing up the L1
    660 	 */
    661 	cmp	r7, #0			/* No need to fixup vector table? */
    662 	ldrne	r2, [r7]		/* But if yes, fetch current value */
    663 	ldrne	r0, [r9, #(PCB_L1VEC)]	/* Fetch new vector_page value */
    664 	mcr	p15, 0, r1, c3, c0, 0	/* Update DACR for new context */
    665 	cmpne	r2, r0			/* Stuffing the same value? */
    666 #ifndef PMAP_INCLUDE_PTE_SYNC
    667 	strne	r0, [r7]		/* Nope, update it */
    668 #else
    669 	beq	.Lcs_same_vector
    670 	str	r0, [r7]		/* Otherwise, update it */
    671 
    672 	/*
    673 	 * Need to sync the cache to make sure that last store is
    674 	 * visible to the MMU.
    675 	 */
    676 	ldr	r2, .Lcpufuncs
    677 	mov	r0, r7
    678 	mov	r1, #4
    679 	mov	lr, pc
    680 	ldr	pc, [r2, #CF_DCACHE_WB_RANGE]
    681 
    682 .Lcs_same_vector:
    683 #endif
    684 
    685 	cmp	r10, r11		/* Switching to the same L1? */
    686 	ldr	r10, .Lcpufuncs
    687 	beq	.Lcs_same_l1		/* Yup. */
    688 
    689 	/*
    690 	 * Do a full context switch, including full TLB flush.
    691 	 */
    692 	mov	r0, r11
    693 	mov	lr, pc
    694 	ldr	pc, [r10, #CF_CONTEXT_SWITCH]
    695 
    696 	/*
    697 	 * Mark the old VM space as NOT being resident in the TLB
    698 	 */
    699 	mov	r2, #0x00000000
    700 	cmp	r5, #0
    701 	strneh	r2, [r5, #(CS_TLB_ID)]
    702 	b	.Lcs_context_switched
    703 
    704 	/*
    705 	 * We're switching to a different process in the same L1.
    706 	 * In this situation, we only need to flush the TLB for the
    707 	 * vector_page mapping, and even then only if r7 is non-NULL.
    708 	 */
    709 .Lcs_same_l1:
    710 	cmp	r7, #0
    711 	movne	r0, #0			/* We *know* vector_page's VA is 0x0 */
    712 	movne	lr, pc
    713 	ldrne	pc, [r10, #CF_TLB_FLUSHID_SE]
    714 
    715 .Lcs_context_switched:
    716 	/* rem: r8 = &new_pmap->pm_cstate */
    717 
    718 	/* XXXSCW: Safe to re-enable FIQs here */
    719 
    720 	/*
    721 	 * The new VM space is live in the cache and TLB.
    722 	 * Update its cache/tlb state, and if it's not the kernel
    723 	 * pmap, update the 'last cache state' pointer.
    724 	 */
    725 	mov	r2, #-1
    726 	ldr	r5, .Lpmap_kernel_cstate
    727 	ldr	r0, .Llast_cache_state_ptr
    728 	str	r2, [r8, #(CS_ALL)]
    729 	cmp	r5, r8
    730 	strne	r8, [r0]
    731 
    732 #endif	/* ARM32_PMAP_NEW */
    733 
    734 	/* rem: r4 = return value */
    735 	/* rem: r6 = new lwp */
    736 	/* rem: r9 = new PCB */
    737 
    738 	/*
    739 	 * This can be optimised... We know we want to go from SVC32
    740 	 * mode to UND32 mode
    741 	 */
    742         mrs	r3, cpsr
    743 	bic	r2, r3, #(PSR_MODE)
    744 	orr	r2, r2, #(PSR_UND32_MODE)
    745         msr	cpsr_c, r2
    746 
    747 	ldr	sp, [r9, #(PCB_UND_SP)]
    748 
    749         msr	cpsr_c, r3		/* Restore the old mode */
    750 
    751 	/* Restore all the save registers */
    752 	add	r7, r9, #PCB_R8
    753 	ldmia	r7, {r8-r13}
    754 
    755 	sub	r7, r7, #PCB_R8		/* restore PCB pointer */
    756 
    757 	ldr	r5, [r6, #(L_PROC)]	/* fetch the proc for below */
    758 
    759 	/* rem: r4 = return value */
    760 	/* rem: r5 = new lwp's proc */
    761 	/* rem: r6 = new lwp */
    762 	/* rem: r7 = new pcb */
    763 
    764 #ifdef ARMFPE
    765 	add	r0, r7, #(USER_SIZE) & 0x00ff
    766 	add	r0, r0, #(USER_SIZE) & 0xff00
    767 	bl	_C_LABEL(arm_fpe_core_changecontext)
    768 #endif
    769 
    770 	/* We can enable interrupts again */
    771 #ifndef ARM32_PMAP_NEW
    772 	IRQenable
    773 #else
    774 	IRQenableALL
    775 #endif
    776 
    777 	/* rem: r4 = return value */
    778 	/* rem: r5 = new lwp's proc */
    779 	/* rem: r6 = new lwp */
    780 	/* rem: r7 = new PCB */
    781 
    782 	/*
    783 	 * Check for restartable atomic sequences (RAS).
    784 	 */
    785 
    786 	ldr	r2, [r5, #(P_NRAS)]
    787 	ldr	r4, [r7, #(PCB_TF)]	/* r4 = trapframe (used below) */
    788 	teq	r2, #0			/* p->p_nras == 0? */
    789 	bne	.Lswitch_do_ras		/* no, check for one */
    790 
    791 .Lswitch_return:
    792 
    793 	/* Get the spl level from the stack and update the current spl level */
    794 	ldr	r0, [sp], #0x0004
    795 	bl	_C_LABEL(splx)
    796 
    797 	/* cpu_switch returns 1 == switched, 0 == didn't switch */
    798 	mov	r0, r4
    799 
    800 	/*
    801 	 * Pull the registers that got pushed when either savectx() or
    802 	 * cpu_switch() was called and return.
    803 	 */
    804 	ldmfd	sp!, {r4-r7, pc}
    805 
    806 .Lswitch_do_ras:
    807 	ldr	r1, [r4, #(TF_PC)]	/* second ras_lookup() arg */
    808 	mov	r0, r5			/* first ras_lookup() arg */
    809 	bl	_C_LABEL(ras_lookup)
    810 	cmn	r0, #1			/* -1 means "not in a RAS" */
    811 	strne	r0, [r4, #(TF_PC)]
    812 	b	.Lswitch_return
    813 
    814 .Lswitch_exited:
    815 	/*
    816 	 * We skip the cache purge because switch_exit() already did it.
    817 	 * Load up registers the way .Lcs_cache_purge_skipped expects.
    818 	 * Userpsace access already blocked by switch_exit().
    819 	 */
    820 	ldr	r9, [r6, #(L_ADDR)]		/* r9 = new PCB */
    821 	ldr	r3, .Lblock_userspace_access
    822 #ifndef ARM32_PMAP_NEW
    823 	mov	r2, #0x00000000
    824 #else
    825 	mrc	p15, 0, r10, c2, c0, 0		/* r10 = old L1 */
    826 	mov	r5, #0				/* No previous cache state */
    827 	ldr	r1, [r9, #(PCB_DACR)]		/* r1 = new DACR */
    828 	ldr	r8, [r9, #(PCB_CSTATE)]		/* r8 = new cache state */
    829 #endif
    830 	ldr	r11, [r9, #(PCB_PAGEDIR)]	/* r11 = new L1 */
    831 	b	.Lcs_cache_purge_skipped
    832 
    833 /*
    834  * cpu_switchto(struct lwp *current, struct lwp *next)
    835  * Switch to the specified next LWP
    836  * Arguments:
    837  *
    838  *	r0	'struct lwp *' of the current LWP
    839  *	r1	'struct lwp *' of the LWP to switch to
    840  */
    841 ENTRY(cpu_switchto)
    842 	stmfd	sp!, {r4-r7, lr}
    843 
    844 	/* Lower the spl level to spl0 and get the current spl level. */
    845 	mov	r6, r0		/* save old lwp */
    846 	mov	r5, r1		/* save new lwp */
    847 
    848 #if defined(LOCKDEBUG)
    849 	/* release the sched_lock before handling interrupts */
    850 	bl	_C_LABEL(sched_unlock_idle)
    851 #endif
    852 
    853 #ifdef __NEWINTR
    854 	mov	r0, #(IPL_NONE)
    855 	bl	_C_LABEL(_spllower)
    856 #else /* ! __NEWINTR */
    857 #ifdef spl0
    858 	mov	r0, #(_SPL_0)
    859 	bl	_C_LABEL(splx)
    860 #else
    861 	bl	_C_LABEL(spl0)
    862 #endif /* spl0 */
    863 #endif /* __NEWINTR */
    864 
    865 	/* Push the old spl level onto the stack */
    866 	str	r0, [sp, #-0x0004]!
    867 
    868 	IRQdisable
    869 #if defined(LOCKDEBUG)
    870 	bl	_C_LABEL(sched_lock_idle)
    871 #endif
    872 
    873 	mov	r0, r6		/* restore old lwp */
    874 	mov	r1, r5		/* restore new lwp */
    875 
    876 	/* rem: r0 = old lwp */
    877 	/* rem: r1 = new lwp */
    878 	/* rem: interrupts are disabled */
    879 
    880 	/*
    881 	 * Okay, set up registers the way cpu_switch() wants them,
    882 	 * and jump into the middle of it (where we bring up the
    883 	 * new process).
    884 	 */
    885 	mov	r6, r1			/* r6 = new lwp */
    886 #if defined(LOCKDEBUG)
    887 	mov	r5, r0			/* preserve old lwp */
    888 	bl	_C_LABEL(sched_unlock_idle)
    889 	mov	r1, r5			/* r1 = old lwp */
    890 #else
    891 	mov	r1, r0			/* r1 = old lwp */
    892 #endif
    893 	b	.Lswitch_resume
    894 
    895 /*
    896  * void switch_exit(struct lwp *l, struct lwp *l0, void (*exit)(struct lwp *));
    897  * Switch to lwp0's saved context and deallocate the address space and kernel
    898  * stack for l.  Then jump into cpu_switch(), as if we were in lwp0 all along.
    899  */
    900 
    901 /* LINTSTUB: Func: void switch_exit(struct lwp *l, struct lwp *l0, void (*)(struct lwp *)) */
    902 ENTRY(switch_exit)
    903 	/*
    904 	 * The process is going away, so we can use callee-saved
    905 	 * registers here without having to save them.
    906 	 */
    907 
    908 	mov	r4, r0
    909 	ldr	r0, .Lcurlwp
    910 
    911 	mov	r5, r1
    912 	ldr	r1, .Lblock_userspace_access
    913 
    914 	mov	r6, r2
    915 
    916 	/*
    917 	 * r4 = lwp
    918 	 * r5 = lwp0
    919 	 * r6 = exit func
    920 	 */
    921 
    922 	mov	r2, #0x00000000		/* curlwp = NULL */
    923 	str	r2, [r0]
    924 
    925 #ifdef ARM32_PMAP_NEW
    926 	/*
    927 	 * We're about to clear both the cache and the TLB.
    928 	 * Make sure to zap the 'last cache state' pointer since the
    929 	 * pmap might be about to go away. Also ensure the outgoing
    930 	 * VM space's cache state is marked as NOT resident in the
    931 	 * cache, and that lwp0's cache state IS resident.
    932 	 */
    933 	ldr	r7, [r4, #(L_ADDR)]		/* r7 = old lwp's PCB */
    934 	ldr	r0, .Llast_cache_state_ptr	/* Last userland cache state */
    935 	ldr	r9, [r7, #(PCB_CSTATE)]		/* Fetch cache state pointer */
    936 	ldr	r3, [r5, #(L_ADDR)]		/* r3 = lwp0's PCB */
    937 	str	r2, [r0]			/* No previous cache state */
    938 	str	r2, [r9, #(CS_ALL)]		/* Zap old lwp's cache state */
    939 	ldr	r3, [r3, #(PCB_CSTATE)]		/* lwp0's cache state */
    940 	mov	r2, #-1
    941 	str	r2, [r3, #(CS_ALL)]		/* lwp0 is in da cache! */
    942 #endif
    943 
    944 	/*
    945 	 * Don't allow user space access between the purge and the switch.
    946 	 */
    947 	mov	r2, #0x00000001
    948 	str	r2, [r1]
    949 
    950 #ifndef ARM32_PMAP_NEW
    951 	/* Switch to lwp0 context */
    952 
    953 	ldr	r0, .Lcpufuncs
    954 	mov	lr, pc
    955 	ldr	pc, [r0, #CF_IDCACHE_WBINV_ALL]
    956 
    957 	ldr	r2, [r5, #(L_ADDR)]
    958 
    959 	/*
    960 	 * r2 = lwp0's PCB
    961 	 */
    962 
    963 	IRQdisable
    964 
    965 	ldr	r0, [r2, #(PCB_PAGEDIR)]
    966 
    967 	/* Switch the memory to the new process */
    968 	ldr	r1, .Lcpufuncs
    969 	mov	lr, pc
    970 	ldr	pc, [r1, #CF_CONTEXT_SWITCH]
    971 
    972 	ldr	r0, .Lcurpcb
    973 
    974 	/* Restore all the save registers */
    975 	add	r7, r2, #PCB_R8
    976 	ldmia	r7, {r8-r13}
    977 
    978 	str	r2, [r0]	/* curpcb = lwp0's PCB */
    979 
    980 	IRQenable
    981 
    982 #else	/* ARM32_PMAP_NEW */
    983 	/* Switch to lwp0 context */
    984 
    985 	ldr	r9, .Lcpufuncs
    986 	mov	lr, pc
    987 	ldr	pc, [r9, #CF_IDCACHE_WBINV_ALL]
    988 
    989 	ldr	r0, [r7, #(PCB_PL1VEC)]
    990 	ldr	r1, [r7, #(PCB_DACR)]
    991 
    992 	/*
    993 	 * r0 = Pointer to L1 slot for vector_page (or NULL)
    994 	 * r1 = lwp0's DACR
    995 	 * r4 = lwp we're switching from
    996 	 * r5 = lwp0
    997 	 * r6 = exit func
    998 	 * r7 = lwp0's PCB
    999 	 * r9 = cpufuncs
   1000 	 */
   1001 
   1002 	IRQdisableALL
   1003 
   1004 	/*
   1005 	 * Ensure the vector table is accessible by fixing up lwp0's L1
   1006 	 */
   1007 	cmp	r0, #0			/* No need to fixup vector table? */
   1008 	ldrne	r3, [r0]		/* But if yes, fetch current value */
   1009 	ldrne	r2, [r7, #(PCB_L1VEC)]	/* Fetch new vector_page value */
   1010 	mcr	p15, 0, r1, c3, c0, 0	/* Update DACR for lwp0's context */
   1011 	cmpne	r3, r2			/* Stuffing the same value? */
   1012 	strne	r2, [r0]		/* Store if not. */
   1013 
   1014 #ifdef PMAP_INCLUDE_PTE_SYNC
   1015 	/*
   1016 	 * Need to sync the cache to make sure that last store is
   1017 	 * visible to the MMU.
   1018 	 */
   1019 	movne	r1, #4
   1020 	movne	lr, pc
   1021 	ldrne	pc, [r9, #CF_DCACHE_WB_RANGE]
   1022 #endif
   1023 
   1024 	/*
   1025 	 * Note: We don't do the same optimisation as cpu_switch() with
   1026 	 * respect to avoiding flushing the TLB if we're switching to
   1027 	 * the same L1 since this process' VM space may be about to go
   1028 	 * away, so we don't want *any* turds left in the TLB.
   1029 	 */
   1030 
   1031 	/* Switch the memory to the new process */
   1032 	ldr	r0, [r7, #(PCB_PAGEDIR)]
   1033 	mov	lr, pc
   1034 	ldr	pc, [r9, #CF_CONTEXT_SWITCH]
   1035 
   1036 	ldr	r0, .Lcurpcb
   1037 
   1038 	/* Restore all the save registers */
   1039 	add	r1, r7, #PCB_R8
   1040 	ldmia	r1, {r8-r13}
   1041 
   1042 	str	r7, [r0]	/* curpcb = lwp0's PCB */
   1043 
   1044 	IRQenableALL
   1045 #endif
   1046 
   1047 	/*
   1048 	 * Schedule the vmspace and stack to be freed.
   1049 	 */
   1050 	mov	r0, r4			/* {lwp_}exit2(l) */
   1051 	mov	lr, pc
   1052 	mov	pc, r6
   1053 
   1054 	ldr	r7, .Lwhichqs		/* r7 = &whichqs */
   1055 	mov	r5, #0x00000000		/* r5 = old lwp = NULL */
   1056 	b	.Lswitch_search
   1057 
   1058 /* LINTSTUB: Func: void savectx(struct pcb *pcb) */
   1059 ENTRY(savectx)
   1060 	/*
   1061 	 * r0 = pcb
   1062 	 */
   1063 
   1064 	/* Push registers.*/
   1065 	stmfd	sp!, {r4-r7, lr}
   1066 
   1067 	/* Store all the registers in the process's pcb */
   1068 	add	r2, r0, #(PCB_R8)
   1069 	stmia	r2, {r8-r13}
   1070 
   1071 	/* Pull the regs of the stack */
   1072 	ldmfd	sp!, {r4-r7, pc}
   1073 
   1074 ENTRY(proc_trampoline)
   1075 #ifdef MULTIPROCESSOR
   1076 	bl	_C_LABEL(proc_trampoline_mp)
   1077 #endif
   1078 	mov	r0, r5
   1079 	mov	r1, sp
   1080 	mov	lr, pc
   1081 	mov	pc, r4
   1082 
   1083 	/* Kill irq's */
   1084         mrs     r0, cpsr
   1085         orr     r0, r0, #(I32_bit)
   1086         msr     cpsr_c, r0
   1087 
   1088 	PULLFRAME
   1089 
   1090 	movs	pc, lr			/* Exit */
   1091 
   1092 	.type .Lcpu_switch_ffs_table, _ASM_TYPE_OBJECT;
   1093 .Lcpu_switch_ffs_table:
   1094 /* same as ffs table but all nums are -1 from that */
   1095 /*               0   1   2   3   4   5   6   7           */
   1096 	.byte	 0,  0,  1, 12,  2,  6,  0, 13  /*  0- 7 */
   1097 	.byte	 3,  0,  7,  0,  0,  0,  0, 14  /*  8-15 */
   1098 	.byte	10,  4,  0,  0,  8,  0,  0, 25  /* 16-23 */
   1099 	.byte	 0,  0,  0,  0,  0, 21, 27, 15  /* 24-31 */
   1100 	.byte	31, 11,  5,  0,  0,  0,  0,  0	/* 32-39 */
   1101 	.byte	 9,  0,  0, 24,  0,  0, 20, 26  /* 40-47 */
   1102 	.byte	30,  0,  0,  0,  0, 23,  0, 19  /* 48-55 */
   1103 	.byte   29,  0, 22, 18, 28, 17, 16,  0  /* 56-63 */
   1104