Home | History | Annotate | Line # | Download | only in vfp
vfp_init.c revision 1.11
      1 /*      $NetBSD: vfp_init.c,v 1.11 2012/12/10 01:35:28 matt Exp $ */
      2 
      3 /*
      4  * Copyright (c) 2008 ARM Ltd
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. The name of the company may not be used to endorse or promote
     16  *    products derived from this software without specific prior written
     17  *    permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR
     20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY
     23  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
     25  * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
     27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     28  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     29  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/param.h>
     33 #include <sys/types.h>
     34 #include <sys/systm.h>
     35 #include <sys/device.h>
     36 #include <sys/proc.h>
     37 #include <sys/cpu.h>
     38 
     39 #include <arm/pcb.h>
     40 #include <arm/undefined.h>
     41 #include <arm/vfpreg.h>
     42 #include <arm/mcontext.h>
     43 
     44 /*
     45  * Use generic co-processor instructions to avoid assembly problems.
     46  */
     47 
     48 /* FMRX <X>, fpsid */
     49 static inline uint32_t
     50 read_fpsid(void)
     51 {
     52 	uint32_t rv;
     53 	__asm __volatile("mrc p10, 7, %0, c0, c0, 0" : "=r" (rv));
     54 	return rv;
     55 }
     56 
     57 /* FMRX <X>, fpexc */
     58 static inline uint32_t
     59 read_fpscr(void)
     60 {
     61 	uint32_t rv;
     62 	__asm __volatile("mrc p10, 7, %0, c1, c0, 0" : "=r" (rv));
     63 	return rv;
     64 }
     65 
     66 /* FMRX <X>, fpexc */
     67 static inline uint32_t
     68 read_fpexc(void)
     69 {
     70 	uint32_t rv;
     71 	__asm __volatile("mrc p10, 7, %0, c8, c0, 0" : "=r" (rv));
     72 	return rv;
     73 }
     74 
     75 /* FMRX <X>, fpinst */
     76 static inline uint32_t
     77 read_fpinst(void)
     78 {
     79 	uint32_t rv;
     80 	__asm __volatile("mrc p10, 7, %0, c9, c0, 0" : "=r" (rv));
     81 	return rv;
     82 }
     83 
     84 /* FMRX <X>, fpinst2 */
     85 static inline uint32_t
     86 read_fpinst2(void)
     87 {
     88 	uint32_t rv;
     89 	__asm __volatile("mrc p10, 7, %0, c10, c0, 0" : "=r" (rv));
     90 	return rv;
     91 }
     92 
     93 /* FMXR <X>, fpscr */
     94 #define write_fpscr(X)	__asm __volatile("mcr p10, 7, %0, c1, c0, 0" : \
     95 			    : "r" (X))
     96 /* FMXR <X>, fpexc */
     97 #define write_fpexc(X)	__asm __volatile("mcr p10, 7, %0, c8, c0, 0" : \
     98 			    : "r" (X))
     99 /* FMXR <X>, fpinst */
    100 #define write_fpinst(X)	__asm __volatile("mcr p10, 7, %0, c9, c0, 0" : \
    101 			    : "r" (X))
    102 /* FMXR <X>, fpinst2 */
    103 #define write_fpinst2(X) __asm __volatile("mcr p10, 7, %0, c10, c0, 0" : \
    104 			    : "r" (X))
    105 
    106 #ifdef FPU_VFP
    107 
    108 /* FLDMD <X>, {d0-d15} */
    109 static inline void
    110 load_vfpregs_lo(uint64_t *p)
    111 {
    112 	/* vldmia rN, {d0-d15} */
    113 	__asm __volatile("ldc\tp11, c0, [%0], {32}" :: "r" (p) : "memory");
    114 }
    115 
    116 /* FSTMD <X>, {d0-d15} */
    117 static inline void
    118 save_vfpregs_lo(uint64_t *p)
    119 {
    120 	__asm __volatile("stc\tp11, c0, [%0], {32}" :: "r" (p) : "memory");
    121 }
    122 
    123 #ifdef CPU_CORTEX
    124 /* FLDMD <X>, {d16-d31} */
    125 static inline void
    126 load_vfpregs_hi(uint64_t *p)
    127 {
    128 	__asm __volatile("ldcl\tp11, c0, [%0], {32}" :: "r" (&p[16]) : "memory");
    129 }
    130 
    131 /* FLDMD <X>, {d16-d31} */
    132 static inline void
    133 save_vfpregs_hi(uint64_t *p)
    134 {
    135 	__asm __volatile("stcl\tp11, c0, [%0], {32}" :: "r" (&p[16]) : "memory");
    136 }
    137 #endif
    138 
    139 /* The real handler for VFP bounces.  */
    140 static int vfp_handler(u_int, u_int, trapframe_t *, int);
    141 static int vfp_handler(u_int, u_int, trapframe_t *, int);
    142 
    143 static void vfp_state_load(lwp_t *, bool);
    144 static void vfp_state_save(lwp_t *);
    145 static void vfp_state_release(lwp_t *);
    146 
    147 const pcu_ops_t arm_vfp_ops = {
    148 	.pcu_id = PCU_FPU,
    149 	.pcu_state_load = vfp_state_load,
    150 	.pcu_state_save = vfp_state_save,
    151 	.pcu_state_release = vfp_state_release,
    152 };
    153 
    154 struct evcnt vfpevent_use;
    155 struct evcnt vfpevent_reuse;
    156 
    157 /*
    158  * Used to test for a VFP. The following function is installed as a coproc10
    159  * handler on the undefined instruction vector and then we issue a VFP
    160  * instruction. If undefined_test is non zero then the VFP did not handle
    161  * the instruction so must be absent, or disabled.
    162  */
    163 
    164 static int undefined_test;
    165 
    166 static int
    167 vfp_test(u_int address, u_int insn, trapframe_t *frame, int fault_code)
    168 {
    169 
    170 	frame->tf_pc += INSN_SIZE;
    171 	++undefined_test;
    172 	return 0;
    173 }
    174 
    175 #endif /* FPU_VFP */
    176 
    177 struct evcnt vfp_fpscr_ev =
    178     EVCNT_INITIALIZER(EVCNT_TYPE_TRAP, NULL, "VFP", "FPSCR traps");
    179 EVCNT_ATTACH_STATIC(vfp_fpscr_ev);
    180 
    181 static int
    182 vfp_fpscr_handler(u_int address, u_int insn, trapframe_t *frame, int fault_code)
    183 {
    184 	struct lwp * const l = curlwp;
    185 	const u_int regno = (insn >> 12) & 0xf;
    186 	/*
    187 	 * Only match move to/from the FPSCR register and we
    188 	 * can't be using the SP,LR,PC as a source.
    189 	 */
    190 	if ((insn & 0xffef0fff) != 0xeee10a10 || regno > 12)
    191 		return 1;
    192 
    193 	struct pcb * const pcb = lwp_getpcb(l);
    194 
    195 #ifdef FPU_VFP
    196 	/*
    197 	 * If FPU is valid somewhere, let's just reenable VFP and
    198 	 * retry the instruction (only safe thing to do since the
    199 	 * pcb has a stale copy).
    200 	 */
    201 	if (pcb->pcb_vfp.vfp_fpexc & VFP_FPEXC_EN)
    202 		return 1;
    203 #endif
    204 
    205 	if (__predict_false((l->l_md.md_flags & MDLWP_VFPUSED) == 0)) {
    206 		l->l_md.md_flags |= MDLWP_VFPUSED;
    207 		pcb->pcb_vfp.vfp_fpscr =
    208 		    (VFP_FPSCR_DN | VFP_FPSCR_FZ);	/* Runfast */
    209 	}
    210 
    211 	/*
    212 	 * We know know the pcb has the saved copy.
    213 	 */
    214 	register_t * const regp = &frame->tf_r0 + regno;
    215 	if (insn & 0x00100000) {
    216 		*regp = pcb->pcb_vfp.vfp_fpscr;
    217 	} else {
    218 		pcb->pcb_vfp.vfp_fpscr = *regp;
    219 	}
    220 
    221 	vfp_fpscr_ev.ev_count++;
    222 
    223 	frame->tf_pc += INSN_SIZE;
    224 	return 0;
    225 }
    226 
    227 #ifndef FPU_VFP
    228 /*
    229  * If we don't want VFP support, we still need to handle emulating VFP FPSCR
    230  * instructions.
    231  */
    232 void
    233 vfp_attach(void)
    234 {
    235 	install_coproc_handler(VFP_COPROC, vfp_fpscr_handler);
    236 }
    237 
    238 #else
    239 void
    240 vfp_attach(void)
    241 {
    242 	struct cpu_info * const ci = curcpu();
    243 	const char *model = NULL;
    244 	bool vfp_p = false;
    245 
    246 #ifdef FPU_VFP
    247 	if (CPU_ID_ARM11_P(curcpu()->ci_arm_cpuid)
    248 	    || CPU_ID_CORTEX_P(curcpu()->ci_arm_cpuid)) {
    249 		const uint32_t cpacr_vfp = CPACR_CPn(VFP_COPROC);
    250 		const uint32_t cpacr_vfp2 = CPACR_CPn(VFP_COPROC2);
    251 
    252 		/*
    253 		 * We first need to enable access to the coprocessors.
    254 		 */
    255 		uint32_t cpacr = armreg_cpacr_read();
    256 		cpacr |= __SHIFTIN(CPACR_ALL, cpacr_vfp);
    257 		cpacr |= __SHIFTIN(CPACR_ALL, cpacr_vfp2);
    258 #if 0
    259 		if (CPU_ID_CORTEX_P(curcpu()->ci_arm_cpuid)) {
    260 			/*
    261 			 * Disable access to the upper 16 FP registers and NEON.
    262 			 */
    263 			cpacr |= CPACR_V7_D32DIS;
    264 			cpacr |= CPACR_V7_ASEDIS;
    265 		}
    266 #endif
    267 		armreg_cpacr_write(cpacr);
    268 
    269 		/*
    270 		 * If we could enable them, then they exist.
    271 		 */
    272 		cpacr = armreg_cpacr_read();
    273 		vfp_p = __SHIFTOUT(cpacr, cpacr_vfp2) != CPACR_NOACCESS
    274 		    || __SHIFTOUT(cpacr, cpacr_vfp) != CPACR_NOACCESS;
    275 	}
    276 #endif
    277 
    278 	void *uh = install_coproc_handler(VFP_COPROC, vfp_test);
    279 
    280 	undefined_test = 0;
    281 
    282 	const uint32_t fpsid = read_fpsid();
    283 
    284 	remove_coproc_handler(uh);
    285 
    286 	if (undefined_test != 0) {
    287 		aprint_normal_dev(ci->ci_dev, "No VFP detected\n");
    288 		install_coproc_handler(VFP_COPROC, vfp_fpscr_handler);
    289 		ci->ci_vfp_id = 0;
    290 		return;
    291 	}
    292 
    293 	ci->ci_vfp_id = fpsid;
    294 	switch (fpsid & ~ VFP_FPSID_REV_MSK) {
    295 	case FPU_VFP10_ARM10E:
    296 		model = "VFP10 R1";
    297 		break;
    298 	case FPU_VFP11_ARM11:
    299 		model = "VFP11";
    300 		break;
    301 	case FPU_VFP_CORTEXA5:
    302 	case FPU_VFP_CORTEXA7:
    303 	case FPU_VFP_CORTEXA8:
    304 	case FPU_VFP_CORTEXA9:
    305 		model = "NEON MPE (VFP 3.0+)";
    306 		break;
    307 	default:
    308 		aprint_normal_dev(ci->ci_dev, "unrecognized VFP version %x\n",
    309 		    fpsid);
    310 		install_coproc_handler(VFP_COPROC, vfp_fpscr_handler);
    311 		return;
    312 	}
    313 
    314 	if (fpsid != 0) {
    315 		aprint_normal("vfp%d at %s: %s\n",
    316 		    device_unit(curcpu()->ci_dev), device_xname(curcpu()->ci_dev),
    317 		    model);
    318 	}
    319 	evcnt_attach_dynamic(&vfpevent_use, EVCNT_TYPE_MISC, NULL,
    320 	    "VFP", "proc use");
    321 	evcnt_attach_dynamic(&vfpevent_reuse, EVCNT_TYPE_MISC, NULL,
    322 	    "VFP", "proc re-use");
    323 	install_coproc_handler(VFP_COPROC, vfp_handler);
    324 	install_coproc_handler(VFP_COPROC2, vfp_handler);
    325 }
    326 
    327 /* The real handler for VFP bounces.  */
    328 static int
    329 vfp_handler(u_int address, u_int insn, trapframe_t *frame,
    330     int fault_code)
    331 {
    332 	struct cpu_info * const ci = curcpu();
    333 
    334 	/* This shouldn't ever happen.  */
    335 	if (fault_code != FAULT_USER)
    336 		panic("VFP fault in non-user mode");
    337 
    338 	if (ci->ci_vfp_id == 0)
    339 		/* No VFP detected, just fault.  */
    340 		return 1;
    341 
    342 	/*
    343 	 * If we are just changing/fetching FPSCR, don't bother loading it.
    344 	 */
    345 	if (!vfp_fpscr_handler(address, insn, frame, fault_code))
    346 		return 0;
    347 
    348 	pcu_load(&arm_vfp_ops);
    349 
    350 	/* Need to restart the faulted instruction.  */
    351 //	frame->tf_pc -= INSN_SIZE;
    352 	return 0;
    353 }
    354 
    355 static void
    356 vfp_state_load(lwp_t *l, bool used)
    357 {
    358 	struct pcb * const pcb = lwp_getpcb(l);
    359 	struct vfpreg * const fregs = &pcb->pcb_vfp;
    360 
    361 	/*
    362 	 * Instrument VFP usage -- if a process has not previously
    363 	 * used the VFP, mark it as having used VFP for the first time,
    364 	 * and count this event.
    365 	 *
    366 	 * If a process has used the VFP, count a "used VFP, and took
    367 	 * a trap to use it again" event.
    368 	 */
    369 	if (__predict_false((l->l_md.md_flags & MDLWP_VFPUSED) == 0)) {
    370 		vfpevent_use.ev_count++;
    371 		l->l_md.md_flags |= MDLWP_VFPUSED;
    372 		pcb->pcb_vfp.vfp_fpscr =
    373 		    (VFP_FPSCR_DN | VFP_FPSCR_FZ);	/* Runfast */
    374 	} else {
    375 		vfpevent_reuse.ev_count++;
    376 	}
    377 
    378 	if (fregs->vfp_fpexc & VFP_FPEXC_EN) {
    379 		/*
    380 		 * If we think the VFP is enabled, it must have be disabled by
    381 		 * vfp_state_release for another LWP so we can just restore
    382 		 * FPEXC and return since our VFP state is still loaded.
    383 		 */
    384 		write_fpexc(fregs->vfp_fpexc);
    385 		return;
    386 	}
    387 
    388 	/* Enable the VFP (so that we can write the registers).  */
    389 	uint32_t fpexc = read_fpexc();
    390 	KDASSERT((fpexc & VFP_FPEXC_EX) == 0);
    391 	write_fpexc(fpexc | VFP_FPEXC_EN);
    392 
    393 	load_vfpregs_lo(fregs->vfp_regs);
    394 #ifdef CPU_CORTEX
    395 #ifdef CPU_ARM11
    396 	switch (curcpu()->ci_vfp_id) {
    397 	case FPU_VFP_CORTEXA5:
    398 	case FPU_VFP_CORTEXA7:
    399 	case FPU_VFP_CORTEXA8:
    400 	case FPU_VFP_CORTEXA9:
    401 #endif
    402 		load_vfpregs_hi(fregs->vfp_regs);
    403 #ifdef CPU_ARM11
    404 		break;
    405 	}
    406 #endif
    407 #endif
    408 	write_fpscr(fregs->vfp_fpscr);
    409 
    410 	if (fregs->vfp_fpexc & VFP_FPEXC_EX) {
    411 		struct cpu_info * const ci = curcpu();
    412 		/* Need to restore the exception handling state.  */
    413 		switch (ci->ci_vfp_id) {
    414 		case FPU_VFP10_ARM10E:
    415 		case FPU_VFP11_ARM11:
    416 		case FPU_VFP_CORTEXA5:
    417 		case FPU_VFP_CORTEXA7:
    418 		case FPU_VFP_CORTEXA8:
    419 		case FPU_VFP_CORTEXA9:
    420 			write_fpinst2(fregs->vfp_fpinst2);
    421 			write_fpinst(fregs->vfp_fpinst);
    422 			break;
    423 		default:
    424 			panic("%s: Unsupported VFP %#x",
    425 			    __func__, ci->ci_vfp_id);
    426 		}
    427 	}
    428 
    429 	/* Finally, restore the FPEXC but don't enable the VFP. */
    430 	fregs->vfp_fpexc |= VFP_FPEXC_EN;
    431 	write_fpexc(fregs->vfp_fpexc);
    432 }
    433 
    434 void
    435 vfp_state_save(lwp_t *l)
    436 {
    437 	struct pcb * const pcb = lwp_getpcb(l);
    438 	struct vfpreg * const fregs = &pcb->pcb_vfp;
    439 
    440 	/*
    441 	 * If it's already disabled, then the state has been saved
    442 	 * (or discarded).
    443 	 */
    444 	if ((fregs->vfp_fpexc & VFP_FPEXC_EN) == 0)
    445 		return;
    446 
    447 	/*
    448 	 * Enable the VFP (so we can read the registers).
    449 	 * Make sure the exception bit is cleared so that we can
    450 	 * safely dump the registers.
    451 	 */
    452 	uint32_t fpexc = read_fpexc();
    453 	write_fpexc((fpexc | VFP_FPEXC_EN) & ~VFP_FPEXC_EX);
    454 
    455 	fregs->vfp_fpexc = fpexc;
    456 	if (fpexc & VFP_FPEXC_EX) {
    457 		struct cpu_info * const ci = curcpu();
    458 		/* Need to save the exception handling state */
    459 		switch (ci->ci_vfp_id) {
    460 		case FPU_VFP10_ARM10E:
    461 		case FPU_VFP11_ARM11:
    462 		case FPU_VFP_CORTEXA5:
    463 		case FPU_VFP_CORTEXA7:
    464 		case FPU_VFP_CORTEXA8:
    465 		case FPU_VFP_CORTEXA9:
    466 			fregs->vfp_fpinst = read_fpinst();
    467 			fregs->vfp_fpinst2 = read_fpinst2();
    468 			break;
    469 		default:
    470 			panic("%s: Unsupported VFP %#x",
    471 			    __func__, ci->ci_vfp_id);
    472 		}
    473 	}
    474 	fregs->vfp_fpscr = read_fpscr();
    475 	save_vfpregs_lo(fregs->vfp_regs);
    476 #ifdef CPU_CORTEX
    477 #ifdef CPU_ARM11
    478 	switch (curcpu()->ci_vfp_id) {
    479 	case FPU_VFP_CORTEXA5:
    480 	case FPU_VFP_CORTEXA7:
    481 	case FPU_VFP_CORTEXA8:
    482 	case FPU_VFP_CORTEXA9:
    483 #endif
    484 		save_vfpregs_hi(fregs->vfp_regs);
    485 #ifdef CPU_ARM11
    486 		break;
    487 	}
    488 #endif
    489 #endif
    490 
    491 	/* Disable the VFP.  */
    492 	write_fpexc(fpexc);
    493 }
    494 
    495 void
    496 vfp_state_release(lwp_t *l)
    497 {
    498 	struct pcb * const pcb = lwp_getpcb(l);
    499 
    500 	/*
    501 	 * Now mark the VFP as disabled (and our state has been already
    502 	 * saved or is being discarded).
    503 	 */
    504 	pcb->pcb_vfp.vfp_fpexc &= ~VFP_FPEXC_EN;
    505 
    506 	/*
    507 	 * Turn off the FPU so the next time a VFP instruction is issued
    508 	 * an exception happens.  We don't know if this LWP's state was
    509 	 * loaded but if we turned off the FPU for some other LWP, when
    510 	 * pcu_load invokes vfp_state_load it will see that VFP_FPEXC_EN
    511 	 * is still set so it just restroe fpexc and return since its
    512 	 * contents are still sitting in the VFP.
    513 	 */
    514 	write_fpexc(read_fpexc() & ~VFP_FPEXC_EN);
    515 }
    516 
    517 void
    518 vfp_savecontext(void)
    519 {
    520 	pcu_save(&arm_vfp_ops);
    521 }
    522 
    523 void
    524 vfp_discardcontext(void)
    525 {
    526 	pcu_discard(&arm_vfp_ops);
    527 }
    528 
    529 void
    530 vfp_getcontext(struct lwp *l, mcontext_t *mcp, int *flagsp)
    531 {
    532 	if (l->l_md.md_flags & MDLWP_VFPUSED) {
    533 		const struct pcb * const pcb = lwp_getpcb(l);
    534 		pcu_save(&arm_vfp_ops);
    535 		mcp->__fpu.__vfpregs.__vfp_fpscr = pcb->pcb_vfp.vfp_fpscr;
    536 		memcpy(mcp->__fpu.__vfpregs.__vfp_fstmx, pcb->pcb_vfp.vfp_regs,
    537 		    sizeof(mcp->__fpu.__vfpregs.__vfp_fstmx));
    538 		*flagsp |= _UC_FPU|_UC_ARM_VFP;
    539 	}
    540 }
    541 
    542 void
    543 vfp_setcontext(struct lwp *l, const mcontext_t *mcp)
    544 {
    545 	pcu_discard(&arm_vfp_ops);
    546 	struct pcb * const pcb = lwp_getpcb(l);
    547 	l->l_md.md_flags |= MDLWP_VFPUSED;
    548 	pcb->pcb_vfp.vfp_fpscr = mcp->__fpu.__vfpregs.__vfp_fpscr;
    549 	memcpy(pcb->pcb_vfp.vfp_regs, mcp->__fpu.__vfpregs.__vfp_fstmx,
    550 	    sizeof(mcp->__fpu.__vfpregs.__vfp_fstmx));
    551 }
    552 
    553 #endif /* FPU_VFP */
    554