1 1.78 riastrad /* $NetBSD: vfp_init.c,v 1.78 2022/08/20 11:34:08 riastradh Exp $ */ 2 1.1 rearnsha 3 1.1 rearnsha /* 4 1.1 rearnsha * Copyright (c) 2008 ARM Ltd 5 1.1 rearnsha * All rights reserved. 6 1.1 rearnsha * 7 1.1 rearnsha * Redistribution and use in source and binary forms, with or without 8 1.1 rearnsha * modification, are permitted provided that the following conditions 9 1.1 rearnsha * are met: 10 1.1 rearnsha * 1. Redistributions of source code must retain the above copyright 11 1.1 rearnsha * notice, this list of conditions and the following disclaimer. 12 1.1 rearnsha * 2. Redistributions in binary form must reproduce the above copyright 13 1.1 rearnsha * notice, this list of conditions and the following disclaimer in the 14 1.1 rearnsha * documentation and/or other materials provided with the distribution. 15 1.1 rearnsha * 3. The name of the company may not be used to endorse or promote 16 1.1 rearnsha * products derived from this software without specific prior written 17 1.1 rearnsha * permission. 18 1.1 rearnsha * 19 1.1 rearnsha * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR 20 1.1 rearnsha * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 1.1 rearnsha * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 1.1 rearnsha * ARE DISCLAIMED. IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY 23 1.1 rearnsha * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 1.1 rearnsha * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 25 1.1 rearnsha * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.1 rearnsha * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 27 1.1 rearnsha * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 28 1.1 rearnsha * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 1.1 rearnsha * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 1.1 rearnsha */ 31 1.1 rearnsha 32 1.59 skrll #include "opt_cputypes.h" 33 1.59 skrll 34 1.58 skrll #include <sys/cdefs.h> 35 1.78 riastrad __KERNEL_RCSID(0, "$NetBSD: vfp_init.c,v 1.78 2022/08/20 11:34:08 riastradh Exp $"); 36 1.58 skrll 37 1.1 rearnsha #include <sys/param.h> 38 1.1 rearnsha #include <sys/types.h> 39 1.1 rearnsha #include <sys/systm.h> 40 1.1 rearnsha #include <sys/device.h> 41 1.77 riastrad #include <sys/kernel.h> 42 1.71 riastrad #include <sys/kthread.h> 43 1.1 rearnsha #include <sys/proc.h> 44 1.4 matt #include <sys/cpu.h> 45 1.1 rearnsha 46 1.23 matt #include <arm/locore.h> 47 1.5 matt #include <arm/pcb.h> 48 1.1 rearnsha #include <arm/undefined.h> 49 1.1 rearnsha #include <arm/vfpreg.h> 50 1.8 matt #include <arm/mcontext.h> 51 1.65 riastrad #include <arm/fpu.h> 52 1.1 rearnsha 53 1.12 matt #include <uvm/uvm_extern.h> /* for pmap.h */ 54 1.12 matt 55 1.69 riastrad #include <crypto/aes/aes_impl.h> 56 1.66 riastrad #include <crypto/aes/arch/arm/aes_neon.h> 57 1.70 riastrad #include <crypto/chacha/arch/arm/chacha_neon.h> 58 1.70 riastrad #include <crypto/chacha/chacha_impl.h> 59 1.66 riastrad 60 1.11 matt #ifdef FPU_VFP 61 1.11 matt 62 1.29 matt #ifdef CPU_CORTEX 63 1.56 christos #define SETFPU __asm(".fpu\tvfpv4") 64 1.29 matt #else 65 1.56 christos #define SETFPU __asm(".fpu\tvfp") 66 1.29 matt #endif 67 1.56 christos SETFPU; 68 1.29 matt 69 1.1 rearnsha /* FLDMD <X>, {d0-d15} */ 70 1.11 matt static inline void 71 1.13 matt load_vfpregs_lo(const uint64_t *p) 72 1.10 matt { 73 1.56 christos SETFPU; 74 1.64 joerg __asm __volatile(".fpu vfp\n vldmia\t%0, {d0-d15}" :: "r" (p) : "memory"); 75 1.10 matt } 76 1.10 matt 77 1.10 matt /* FSTMD <X>, {d0-d15} */ 78 1.11 matt static inline void 79 1.10 matt save_vfpregs_lo(uint64_t *p) 80 1.10 matt { 81 1.56 christos SETFPU; 82 1.64 joerg __asm __volatile(".fpu vfp\n vstmia\t%0, {d0-d15}" :: "r" (p) : "memory"); 83 1.10 matt } 84 1.10 matt 85 1.10 matt #ifdef CPU_CORTEX 86 1.10 matt /* FLDMD <X>, {d16-d31} */ 87 1.11 matt static inline void 88 1.13 matt load_vfpregs_hi(const uint64_t *p) 89 1.10 matt { 90 1.56 christos SETFPU; 91 1.64 joerg __asm __volatile(".fpu neon-vfpv4\n vldmia\t%0, {d16-d31}" :: "r" (&p[16]) : "memory"); 92 1.10 matt } 93 1.10 matt 94 1.10 matt /* FLDMD <X>, {d16-d31} */ 95 1.11 matt static inline void 96 1.10 matt save_vfpregs_hi(uint64_t *p) 97 1.10 matt { 98 1.56 christos SETFPU; 99 1.64 joerg __asm __volatile(".fpu neon-vfpv4\nvstmia\t%0, {d16-d31}" :: "r" (&p[16]) : "memory"); 100 1.10 matt } 101 1.10 matt #endif 102 1.1 rearnsha 103 1.13 matt static inline void 104 1.13 matt load_vfpregs(const struct vfpreg *fregs) 105 1.13 matt { 106 1.13 matt load_vfpregs_lo(fregs->vfp_regs); 107 1.13 matt #ifdef CPU_CORTEX 108 1.13 matt #ifdef CPU_ARM11 109 1.13 matt switch (curcpu()->ci_vfp_id) { 110 1.13 matt case FPU_VFP_CORTEXA5: 111 1.13 matt case FPU_VFP_CORTEXA7: 112 1.13 matt case FPU_VFP_CORTEXA8: 113 1.13 matt case FPU_VFP_CORTEXA9: 114 1.20 matt case FPU_VFP_CORTEXA15: 115 1.42 slp case FPU_VFP_CORTEXA15_QEMU: 116 1.50 skrll case FPU_VFP_CORTEXA53: 117 1.53 jmcneill case FPU_VFP_CORTEXA57: 118 1.13 matt #endif 119 1.13 matt load_vfpregs_hi(fregs->vfp_regs); 120 1.13 matt #ifdef CPU_ARM11 121 1.13 matt break; 122 1.13 matt } 123 1.13 matt #endif 124 1.13 matt #endif 125 1.13 matt } 126 1.13 matt 127 1.13 matt static inline void 128 1.13 matt save_vfpregs(struct vfpreg *fregs) 129 1.13 matt { 130 1.13 matt save_vfpregs_lo(fregs->vfp_regs); 131 1.13 matt #ifdef CPU_CORTEX 132 1.13 matt #ifdef CPU_ARM11 133 1.13 matt switch (curcpu()->ci_vfp_id) { 134 1.13 matt case FPU_VFP_CORTEXA5: 135 1.13 matt case FPU_VFP_CORTEXA7: 136 1.13 matt case FPU_VFP_CORTEXA8: 137 1.13 matt case FPU_VFP_CORTEXA9: 138 1.20 matt case FPU_VFP_CORTEXA15: 139 1.42 slp case FPU_VFP_CORTEXA15_QEMU: 140 1.50 skrll case FPU_VFP_CORTEXA53: 141 1.53 jmcneill case FPU_VFP_CORTEXA57: 142 1.13 matt #endif 143 1.13 matt save_vfpregs_hi(fregs->vfp_regs); 144 1.13 matt #ifdef CPU_ARM11 145 1.13 matt break; 146 1.13 matt } 147 1.13 matt #endif 148 1.13 matt #endif 149 1.13 matt } 150 1.13 matt 151 1.1 rearnsha /* The real handler for VFP bounces. */ 152 1.1 rearnsha static int vfp_handler(u_int, u_int, trapframe_t *, int); 153 1.13 matt #ifdef CPU_CORTEX 154 1.13 matt static int neon_handler(u_int, u_int, trapframe_t *, int); 155 1.13 matt #endif 156 1.1 rearnsha 157 1.13 matt static void vfp_state_load(lwp_t *, u_int); 158 1.39 rmind static void vfp_state_save(lwp_t *); 159 1.39 rmind static void vfp_state_release(lwp_t *); 160 1.4 matt 161 1.4 matt const pcu_ops_t arm_vfp_ops = { 162 1.4 matt .pcu_id = PCU_FPU, 163 1.13 matt .pcu_state_save = vfp_state_save, 164 1.4 matt .pcu_state_load = vfp_state_load, 165 1.4 matt .pcu_state_release = vfp_state_release, 166 1.4 matt }; 167 1.1 rearnsha 168 1.34 matt /* determine what bits can be changed */ 169 1.34 matt uint32_t vfp_fpscr_changable = VFP_FPSCR_CSUM; 170 1.34 matt /* default to run fast */ 171 1.34 matt uint32_t vfp_fpscr_default = (VFP_FPSCR_DN | VFP_FPSCR_FZ | VFP_FPSCR_RN); 172 1.34 matt 173 1.35 matt #else 174 1.35 matt /* determine what bits can be changed */ 175 1.35 matt uint32_t vfp_fpscr_changable = VFP_FPSCR_CSUM|VFP_FPSCR_ESUM|VFP_FPSCR_RMODE; 176 1.4 matt #endif /* FPU_VFP */ 177 1.4 matt 178 1.4 matt static int 179 1.4 matt vfp_fpscr_handler(u_int address, u_int insn, trapframe_t *frame, int fault_code) 180 1.4 matt { 181 1.4 matt struct lwp * const l = curlwp; 182 1.4 matt const u_int regno = (insn >> 12) & 0xf; 183 1.4 matt /* 184 1.4 matt * Only match move to/from the FPSCR register and we 185 1.4 matt * can't be using the SP,LR,PC as a source. 186 1.4 matt */ 187 1.4 matt if ((insn & 0xffef0fff) != 0xeee10a10 || regno > 12) 188 1.4 matt return 1; 189 1.4 matt 190 1.4 matt struct pcb * const pcb = lwp_getpcb(l); 191 1.4 matt 192 1.4 matt #ifdef FPU_VFP 193 1.4 matt /* 194 1.4 matt * If FPU is valid somewhere, let's just reenable VFP and 195 1.4 matt * retry the instruction (only safe thing to do since the 196 1.4 matt * pcb has a stale copy). 197 1.4 matt */ 198 1.4 matt if (pcb->pcb_vfp.vfp_fpexc & VFP_FPEXC_EN) 199 1.4 matt return 1; 200 1.4 matt 201 1.51 chs if (__predict_false(!vfp_used_p(l))) { 202 1.35 matt pcb->pcb_vfp.vfp_fpscr = vfp_fpscr_default; 203 1.4 matt } 204 1.26 matt #endif 205 1.4 matt 206 1.4 matt /* 207 1.30 skrll * We now know the pcb has the saved copy. 208 1.4 matt */ 209 1.4 matt register_t * const regp = &frame->tf_r0 + regno; 210 1.4 matt if (insn & 0x00100000) { 211 1.4 matt *regp = pcb->pcb_vfp.vfp_fpscr; 212 1.4 matt } else { 213 1.34 matt pcb->pcb_vfp.vfp_fpscr &= ~vfp_fpscr_changable; 214 1.34 matt pcb->pcb_vfp.vfp_fpscr |= *regp & vfp_fpscr_changable; 215 1.4 matt } 216 1.4 matt 217 1.37 matt curcpu()->ci_vfp_evs[0].ev_count++; 218 1.61 skrll 219 1.4 matt frame->tf_pc += INSN_SIZE; 220 1.4 matt return 0; 221 1.1 rearnsha } 222 1.1 rearnsha 223 1.4 matt #ifndef FPU_VFP 224 1.76 skrll void 225 1.76 skrll vfp_detect(struct cpu_info *ci) 226 1.76 skrll { 227 1.76 skrll ci->ci_vfp_id = 0; 228 1.76 skrll return; 229 1.76 skrll } 230 1.4 matt /* 231 1.4 matt * If we don't want VFP support, we still need to handle emulating VFP FPSCR 232 1.4 matt * instructions. 233 1.4 matt */ 234 1.4 matt void 235 1.37 matt vfp_attach(struct cpu_info *ci) 236 1.4 matt { 237 1.37 matt if (CPU_IS_PRIMARY(ci)) { 238 1.76 skrll replace_coproc_handler(VFP_COPROC, vfp_fpscr_handler); 239 1.37 matt } 240 1.37 matt evcnt_attach_dynamic(&ci->ci_vfp_evs[0], EVCNT_TYPE_TRAP, NULL, 241 1.37 matt ci->ci_cpuname, "vfp fpscr traps"); 242 1.4 matt } 243 1.4 matt 244 1.4 matt #else 245 1.1 rearnsha void 246 1.76 skrll vfp_detect(struct cpu_info *ci) 247 1.1 rearnsha { 248 1.1 rearnsha 249 1.37 matt if (CPU_ID_ARM11_P(ci->ci_arm_cpuid) 250 1.37 matt || CPU_ID_MV88SV58XX_P(ci->ci_arm_cpuid) 251 1.37 matt || CPU_ID_CORTEX_P(ci->ci_arm_cpuid)) { 252 1.37 matt #if 0 253 1.37 matt const uint32_t nsacr = armreg_nsacr_read(); 254 1.37 matt const uint32_t nsacr_vfp = __BITS(VFP_COPROC,VFP_COPROC2); 255 1.37 matt if ((nsacr & nsacr_vfp) != nsacr_vfp) { 256 1.76 skrll ci->ci_fp_id = 0; 257 1.37 matt return; 258 1.37 matt } 259 1.37 matt #endif 260 1.7 matt const uint32_t cpacr_vfp = CPACR_CPn(VFP_COPROC); 261 1.7 matt const uint32_t cpacr_vfp2 = CPACR_CPn(VFP_COPROC2); 262 1.1 rearnsha 263 1.7 matt /* 264 1.7 matt * We first need to enable access to the coprocessors. 265 1.7 matt */ 266 1.7 matt uint32_t cpacr = armreg_cpacr_read(); 267 1.7 matt cpacr |= __SHIFTIN(CPACR_ALL, cpacr_vfp); 268 1.7 matt cpacr |= __SHIFTIN(CPACR_ALL, cpacr_vfp2); 269 1.7 matt armreg_cpacr_write(cpacr); 270 1.1 rearnsha 271 1.72 skrll isb(); 272 1.48 jmcneill 273 1.7 matt /* 274 1.7 matt * If we could enable them, then they exist. 275 1.7 matt */ 276 1.7 matt cpacr = armreg_cpacr_read(); 277 1.40 matt bool vfp_p = __SHIFTOUT(cpacr, cpacr_vfp2) == CPACR_ALL 278 1.40 matt && __SHIFTOUT(cpacr, cpacr_vfp) == CPACR_ALL; 279 1.28 matt if (!vfp_p) { 280 1.28 matt ci->ci_vfp_id = 0; 281 1.28 matt return; 282 1.28 matt } 283 1.6 matt } 284 1.6 matt 285 1.76 skrll /* borrow the ci_vfd_id field for VFP detection */ 286 1.76 skrll ci->ci_vfp_id = -1; 287 1.7 matt 288 1.21 matt const uint32_t fpsid = armreg_fpsid_read(); 289 1.76 skrll if (ci->ci_vfp_id == 0) { 290 1.1 rearnsha return; 291 1.1 rearnsha } 292 1.1 rearnsha 293 1.4 matt ci->ci_vfp_id = fpsid; 294 1.76 skrll 295 1.76 skrll ci->ci_mvfr[0] = armreg_mvfr0_read(); 296 1.76 skrll ci->ci_mvfr[1] = armreg_mvfr1_read(); 297 1.76 skrll 298 1.76 skrll } 299 1.76 skrll 300 1.76 skrll void 301 1.76 skrll vfp_attach(struct cpu_info *ci) 302 1.76 skrll { 303 1.76 skrll const char *model = NULL; 304 1.76 skrll 305 1.76 skrll switch (ci->ci_vfp_id & ~ VFP_FPSID_REV_MSK) { 306 1.4 matt case FPU_VFP10_ARM10E: 307 1.4 matt model = "VFP10 R1"; 308 1.4 matt break; 309 1.4 matt case FPU_VFP11_ARM11: 310 1.4 matt model = "VFP11"; 311 1.4 matt break; 312 1.36 matt case FPU_VFP_MV88SV58XX: 313 1.36 matt model = "VFP3"; 314 1.36 matt break; 315 1.7 matt case FPU_VFP_CORTEXA5: 316 1.7 matt case FPU_VFP_CORTEXA7: 317 1.7 matt case FPU_VFP_CORTEXA8: 318 1.7 matt case FPU_VFP_CORTEXA9: 319 1.63 tnn case FPU_VFP_CORTEXA12: 320 1.20 matt case FPU_VFP_CORTEXA15: 321 1.42 slp case FPU_VFP_CORTEXA15_QEMU: 322 1.63 tnn case FPU_VFP_CORTEXA17: 323 1.50 skrll case FPU_VFP_CORTEXA53: 324 1.53 jmcneill case FPU_VFP_CORTEXA57: 325 1.37 matt if (armreg_cpacr_read() & CPACR_V7_ASEDIS) { 326 1.37 matt model = "VFP 4.0+"; 327 1.37 matt } else { 328 1.37 matt model = "NEON MPE (VFP 3.0+)"; 329 1.37 matt cpu_neon_present = 1; 330 1.37 matt } 331 1.6 matt break; 332 1.4 matt default: 333 1.36 matt aprint_normal_dev(ci->ci_dev, "unrecognized VFP version %#x\n", 334 1.76 skrll ci->ci_vfp_id); 335 1.62 skrll if (CPU_IS_PRIMARY(ci)) 336 1.76 skrll replace_coproc_handler(VFP_COPROC, vfp_fpscr_handler); 337 1.35 matt vfp_fpscr_changable = VFP_FPSCR_CSUM|VFP_FPSCR_ESUM 338 1.35 matt |VFP_FPSCR_RMODE; 339 1.35 matt vfp_fpscr_default = 0; 340 1.4 matt return; 341 1.4 matt } 342 1.1 rearnsha 343 1.17 matt cpu_fpu_present = 1; 344 1.76 skrll 345 1.76 skrll const uint32_t f0 = ci->ci_mvfr[0]; 346 1.76 skrll const uint32_t f1 = ci->ci_mvfr[1]; 347 1.76 skrll aprint_normal("vfp%d at %s: %s%s%s%s%s\n", 348 1.76 skrll device_unit(ci->ci_dev), 349 1.76 skrll device_xname(ci->ci_dev), 350 1.76 skrll model, 351 1.76 skrll ((f0 & ARM_MVFR0_ROUNDING_MASK) ? ", rounding" : ""), 352 1.76 skrll ((f0 & ARM_MVFR0_EXCEPT_MASK) ? ", exceptions" : ""), 353 1.76 skrll ((f1 & ARM_MVFR1_D_NAN_MASK) ? ", NaN propagation" : ""), 354 1.76 skrll ((f1 & ARM_MVFR1_FTZ_MASK) ? ", denormals" : "")); 355 1.76 skrll 356 1.76 skrll aprint_debug("vfp%d: mvfr: [0]=%#x [1]=%#x\n", 357 1.76 skrll device_unit(ci->ci_dev), f0, f1); 358 1.76 skrll 359 1.76 skrll if (CPU_IS_PRIMARY(ci)) { 360 1.76 skrll cpu_media_and_vfp_features[0] = f0; 361 1.76 skrll cpu_media_and_vfp_features[1] = f1; 362 1.76 skrll 363 1.76 skrll if (f0 & ARM_MVFR0_ROUNDING_MASK) { 364 1.76 skrll vfp_fpscr_changable |= VFP_FPSCR_RMODE; 365 1.76 skrll } 366 1.76 skrll if (f1 & ARM_MVFR0_EXCEPT_MASK) { 367 1.76 skrll vfp_fpscr_changable |= VFP_FPSCR_ESUM; 368 1.76 skrll } 369 1.76 skrll // If hardware supports propagation of NaNs, select it. 370 1.76 skrll if (f1 & ARM_MVFR1_D_NAN_MASK) { 371 1.76 skrll vfp_fpscr_default &= ~VFP_FPSCR_DN; 372 1.76 skrll vfp_fpscr_changable |= VFP_FPSCR_DN; 373 1.76 skrll } 374 1.76 skrll // If hardware supports denormalized numbers, use it. 375 1.76 skrll if (f1 & ARM_MVFR1_FTZ_MASK) { 376 1.76 skrll vfp_fpscr_default &= ~VFP_FPSCR_FZ; 377 1.76 skrll vfp_fpscr_changable |= VFP_FPSCR_FZ; 378 1.37 matt } 379 1.76 skrll 380 1.76 skrll replace_coproc_handler(VFP_COPROC, vfp_handler); 381 1.62 skrll install_coproc_handler(VFP_COPROC2, vfp_handler); 382 1.13 matt #ifdef CPU_CORTEX 383 1.66 riastrad if (cpu_neon_present) { 384 1.66 riastrad install_coproc_handler(CORE_UNKNOWN_HANDLER, 385 1.66 riastrad neon_handler); 386 1.66 riastrad aes_md_init(&aes_neon_impl); 387 1.70 riastrad chacha_md_init(&chacha_neon_impl); 388 1.66 riastrad } 389 1.13 matt #endif 390 1.62 skrll } 391 1.76 skrll 392 1.76 skrll evcnt_attach_dynamic(&ci->ci_vfp_evs[0], EVCNT_TYPE_MISC, NULL, 393 1.76 skrll ci->ci_cpuname, "vfp coproc use"); 394 1.76 skrll evcnt_attach_dynamic(&ci->ci_vfp_evs[1], EVCNT_TYPE_MISC, NULL, 395 1.76 skrll ci->ci_cpuname, "vfp coproc re-use"); 396 1.76 skrll evcnt_attach_dynamic(&ci->ci_vfp_evs[2], EVCNT_TYPE_TRAP, NULL, 397 1.76 skrll ci->ci_cpuname, "vfp coproc fault"); 398 1.1 rearnsha } 399 1.1 rearnsha 400 1.1 rearnsha /* The real handler for VFP bounces. */ 401 1.4 matt static int 402 1.21 matt vfp_handler(u_int address, u_int insn, trapframe_t *frame, int fault_code) 403 1.1 rearnsha { 404 1.4 matt struct cpu_info * const ci = curcpu(); 405 1.74 rin uint32_t fpexc; 406 1.1 rearnsha 407 1.1 rearnsha /* This shouldn't ever happen. */ 408 1.71 riastrad if (fault_code != FAULT_USER && 409 1.71 riastrad (curlwp->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) == LW_SYSTEM) 410 1.14 matt panic("VFP fault at %#x in non-user mode", frame->tf_pc); 411 1.1 rearnsha 412 1.27 matt if (ci->ci_vfp_id == 0) { 413 1.1 rearnsha /* No VFP detected, just fault. */ 414 1.1 rearnsha return 1; 415 1.27 matt } 416 1.27 matt 417 1.75 skrll /* 418 1.47 matt * If we already own the FPU and it's enabled (and no exception), raise 419 1.74 rin * SIGILL. If there is an exception, raise SIGFPE. 420 1.47 matt */ 421 1.74 rin if (curlwp->l_pcu_cpu[PCU_FPU] == ci) { 422 1.73 rin KASSERT(ci->ci_pcu_curlwp[PCU_FPU] == curlwp); 423 1.74 rin 424 1.74 rin fpexc = armreg_fpexc_read(); 425 1.74 rin if (fpexc & VFP_FPEXC_EN) { 426 1.74 rin if ((fpexc & VFP_FPEXC_EX) == 0) { 427 1.74 rin return 1; /* SIGILL */ 428 1.74 rin } else { 429 1.74 rin goto fpe; /* SIGFPE; skip pcu_load(9) */ 430 1.74 rin } 431 1.74 rin } 432 1.73 rin } 433 1.44 matt 434 1.27 matt /* 435 1.27 matt * Make sure we own the FP. 436 1.27 matt */ 437 1.27 matt pcu_load(&arm_vfp_ops); 438 1.1 rearnsha 439 1.74 rin fpexc = armreg_fpexc_read(); 440 1.21 matt if (fpexc & VFP_FPEXC_EX) { 441 1.21 matt ksiginfo_t ksi; 442 1.21 matt KASSERT(fpexc & VFP_FPEXC_EN); 443 1.21 matt 444 1.74 rin fpe: 445 1.37 matt curcpu()->ci_vfp_evs[2].ev_count++; 446 1.21 matt 447 1.21 matt /* 448 1.21 matt * Need the clear the exception condition so any signal 449 1.33 skrll * and future use can proceed. 450 1.21 matt */ 451 1.31 skrll armreg_fpexc_write(fpexc & ~(VFP_FPEXC_EX|VFP_FPEXC_FSUM)); 452 1.21 matt 453 1.51 chs pcu_save(&arm_vfp_ops, curlwp); 454 1.33 skrll 455 1.33 skrll /* 456 1.33 skrll * XXX Need to emulate bounce instructions here to get correct 457 1.33 skrll * XXX exception codes, etc. 458 1.33 skrll */ 459 1.21 matt KSI_INIT_TRAP(&ksi); 460 1.21 matt ksi.ksi_signo = SIGFPE; 461 1.21 matt if (fpexc & VFP_FPEXC_IXF) 462 1.21 matt ksi.ksi_code = FPE_FLTRES; 463 1.21 matt else if (fpexc & VFP_FPEXC_UFF) 464 1.21 matt ksi.ksi_code = FPE_FLTUND; 465 1.21 matt else if (fpexc & VFP_FPEXC_OFF) 466 1.21 matt ksi.ksi_code = FPE_FLTOVF; 467 1.21 matt else if (fpexc & VFP_FPEXC_DZF) 468 1.21 matt ksi.ksi_code = FPE_FLTDIV; 469 1.21 matt else if (fpexc & VFP_FPEXC_IOF) 470 1.21 matt ksi.ksi_code = FPE_FLTINV; 471 1.21 matt ksi.ksi_addr = (uint32_t *)address; 472 1.21 matt ksi.ksi_trap = 0; 473 1.21 matt trapsignal(curlwp, &ksi); 474 1.21 matt return 0; 475 1.21 matt } 476 1.21 matt 477 1.4 matt /* Need to restart the faulted instruction. */ 478 1.4 matt // frame->tf_pc -= INSN_SIZE; 479 1.4 matt return 0; 480 1.4 matt } 481 1.1 rearnsha 482 1.13 matt #ifdef CPU_CORTEX 483 1.13 matt /* The real handler for NEON bounces. */ 484 1.13 matt static int 485 1.21 matt neon_handler(u_int address, u_int insn, trapframe_t *frame, int fault_code) 486 1.13 matt { 487 1.13 matt struct cpu_info * const ci = curcpu(); 488 1.13 matt 489 1.13 matt if (ci->ci_vfp_id == 0) 490 1.13 matt /* No VFP detected, just fault. */ 491 1.13 matt return 1; 492 1.13 matt 493 1.13 matt if ((insn & 0xfe000000) != 0xf2000000 494 1.13 matt && (insn & 0xfe000000) != 0xf4000000) 495 1.13 matt /* Not NEON instruction, just fault. */ 496 1.13 matt return 1; 497 1.13 matt 498 1.13 matt /* This shouldn't ever happen. */ 499 1.71 riastrad if (fault_code != FAULT_USER && 500 1.71 riastrad (curlwp->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) == LW_SYSTEM) 501 1.13 matt panic("NEON fault in non-user mode"); 502 1.13 matt 503 1.45 matt /* if we already own the FPU and it's enabled, raise SIGILL */ 504 1.45 matt if (curcpu()->ci_pcu_curlwp[PCU_FPU] == curlwp 505 1.45 matt && (armreg_fpexc_read() & VFP_FPEXC_EN) != 0) 506 1.47 matt return 1; 507 1.43 matt 508 1.13 matt pcu_load(&arm_vfp_ops); 509 1.13 matt 510 1.13 matt /* Need to restart the faulted instruction. */ 511 1.13 matt // frame->tf_pc -= INSN_SIZE; 512 1.13 matt return 0; 513 1.13 matt } 514 1.13 matt #endif 515 1.13 matt 516 1.4 matt static void 517 1.13 matt vfp_state_load(lwp_t *l, u_int flags) 518 1.4 matt { 519 1.4 matt struct pcb * const pcb = lwp_getpcb(l); 520 1.4 matt struct vfpreg * const fregs = &pcb->pcb_vfp; 521 1.1 rearnsha 522 1.1 rearnsha /* 523 1.1 rearnsha * Instrument VFP usage -- if a process has not previously 524 1.1 rearnsha * used the VFP, mark it as having used VFP for the first time, 525 1.1 rearnsha * and count this event. 526 1.1 rearnsha * 527 1.1 rearnsha * If a process has used the VFP, count a "used VFP, and took 528 1.1 rearnsha * a trap to use it again" event. 529 1.1 rearnsha */ 530 1.39 rmind if (__predict_false((flags & PCU_VALID) == 0)) { 531 1.37 matt curcpu()->ci_vfp_evs[0].ev_count++; 532 1.34 matt pcb->pcb_vfp.vfp_fpscr = vfp_fpscr_default; 533 1.4 matt } else { 534 1.37 matt curcpu()->ci_vfp_evs[1].ev_count++; 535 1.4 matt } 536 1.1 rearnsha 537 1.54 bouyer KASSERT((armreg_fpexc_read() & VFP_FPEXC_EN) == 0); 538 1.39 rmind /* 539 1.39 rmind * If the VFP is already enabled we must be bouncing an instruction. 540 1.39 rmind */ 541 1.39 rmind if (flags & PCU_REENABLE) { 542 1.39 rmind uint32_t fpexc = armreg_fpexc_read(); 543 1.39 rmind armreg_fpexc_write(fpexc | VFP_FPEXC_EN); 544 1.54 bouyer fregs->vfp_fpexc |= VFP_FPEXC_EN; 545 1.39 rmind return; 546 1.39 rmind } 547 1.54 bouyer KASSERT((fregs->vfp_fpexc & VFP_FPEXC_EN) == 0); 548 1.33 skrll 549 1.39 rmind /* 550 1.39 rmind * Load and Enable the VFP (so that we can write the registers). 551 1.39 rmind */ 552 1.39 rmind fregs->vfp_fpexc |= VFP_FPEXC_EN; 553 1.39 rmind armreg_fpexc_write(fregs->vfp_fpexc); 554 1.54 bouyer KASSERT(curcpu()->ci_pcu_curlwp[PCU_FPU] == NULL); 555 1.54 bouyer KASSERT(l->l_pcu_cpu[PCU_FPU] == NULL); 556 1.13 matt 557 1.39 rmind load_vfpregs(fregs); 558 1.39 rmind armreg_fpscr_write(fregs->vfp_fpscr); 559 1.13 matt 560 1.39 rmind if (fregs->vfp_fpexc & VFP_FPEXC_EX) { 561 1.39 rmind /* Need to restore the exception handling state. */ 562 1.52 chs armreg_fpinst_write(fregs->vfp_fpinst); 563 1.39 rmind if (fregs->vfp_fpexc & VFP_FPEXC_FP2V) 564 1.52 chs armreg_fpinst2_write(fregs->vfp_fpinst2); 565 1.1 rearnsha } 566 1.1 rearnsha } 567 1.1 rearnsha 568 1.1 rearnsha void 569 1.39 rmind vfp_state_save(lwp_t *l) 570 1.1 rearnsha { 571 1.4 matt struct pcb * const pcb = lwp_getpcb(l); 572 1.39 rmind struct vfpreg * const fregs = &pcb->pcb_vfp; 573 1.21 matt uint32_t fpexc = armreg_fpexc_read(); 574 1.33 skrll 575 1.54 bouyer KASSERT(curcpu()->ci_pcu_curlwp[PCU_FPU] == l); 576 1.54 bouyer KASSERT(curcpu() == l->l_pcu_cpu[PCU_FPU]); 577 1.54 bouyer KASSERT(curlwp == l || curlwp->l_pcu_cpu[PCU_FPU] != curcpu()); 578 1.33 skrll /* 579 1.33 skrll * Enable the VFP (so we can read the registers). 580 1.33 skrll * Make sure the exception bit is cleared so that we can 581 1.33 skrll * safely dump the registers. 582 1.33 skrll */ 583 1.21 matt armreg_fpexc_write((fpexc | VFP_FPEXC_EN) & ~VFP_FPEXC_EX); 584 1.1 rearnsha 585 1.4 matt fregs->vfp_fpexc = fpexc; 586 1.4 matt if (fpexc & VFP_FPEXC_EX) { 587 1.4 matt /* Need to save the exception handling state */ 588 1.21 matt fregs->vfp_fpinst = armreg_fpinst_read(); 589 1.21 matt if (fpexc & VFP_FPEXC_FP2V) 590 1.21 matt fregs->vfp_fpinst2 = armreg_fpinst2_read(); 591 1.1 rearnsha } 592 1.21 matt fregs->vfp_fpscr = armreg_fpscr_read(); 593 1.13 matt save_vfpregs(fregs); 594 1.4 matt 595 1.1 rearnsha /* Disable the VFP. */ 596 1.33 skrll armreg_fpexc_write(fpexc & ~VFP_FPEXC_EN); 597 1.1 rearnsha } 598 1.1 rearnsha 599 1.1 rearnsha void 600 1.39 rmind vfp_state_release(lwp_t *l) 601 1.1 rearnsha { 602 1.4 matt struct pcb * const pcb = lwp_getpcb(l); 603 1.1 rearnsha 604 1.39 rmind /* 605 1.39 rmind * Now mark the VFP as disabled (and our state 606 1.39 rmind * has been already saved or is being discarded). 607 1.39 rmind */ 608 1.39 rmind pcb->pcb_vfp.vfp_fpexc &= ~VFP_FPEXC_EN; 609 1.1 rearnsha 610 1.1 rearnsha /* 611 1.4 matt * Turn off the FPU so the next time a VFP instruction is issued 612 1.4 matt * an exception happens. We don't know if this LWP's state was 613 1.4 matt * loaded but if we turned off the FPU for some other LWP, when 614 1.4 matt * pcu_load invokes vfp_state_load it will see that VFP_FPEXC_EN 615 1.13 matt * is still set so it just restore fpexc and return since its 616 1.4 matt * contents are still sitting in the VFP. 617 1.1 rearnsha */ 618 1.21 matt armreg_fpexc_write(armreg_fpexc_read() & ~VFP_FPEXC_EN); 619 1.1 rearnsha } 620 1.1 rearnsha 621 1.1 rearnsha void 622 1.51 chs vfp_savecontext(lwp_t *l) 623 1.1 rearnsha { 624 1.51 chs pcu_save(&arm_vfp_ops, l); 625 1.1 rearnsha } 626 1.1 rearnsha 627 1.1 rearnsha void 628 1.51 chs vfp_discardcontext(lwp_t *l, bool used_p) 629 1.1 rearnsha { 630 1.51 chs pcu_discard(&arm_vfp_ops, l, used_p); 631 1.25 matt } 632 1.25 matt 633 1.25 matt bool 634 1.51 chs vfp_used_p(const lwp_t *l) 635 1.25 matt { 636 1.51 chs return pcu_valid_p(&arm_vfp_ops, l); 637 1.13 matt } 638 1.13 matt 639 1.13 matt void 640 1.8 matt vfp_getcontext(struct lwp *l, mcontext_t *mcp, int *flagsp) 641 1.8 matt { 642 1.51 chs if (vfp_used_p(l)) { 643 1.8 matt const struct pcb * const pcb = lwp_getpcb(l); 644 1.51 chs 645 1.51 chs pcu_save(&arm_vfp_ops, l); 646 1.8 matt mcp->__fpu.__vfpregs.__vfp_fpscr = pcb->pcb_vfp.vfp_fpscr; 647 1.8 matt memcpy(mcp->__fpu.__vfpregs.__vfp_fstmx, pcb->pcb_vfp.vfp_regs, 648 1.8 matt sizeof(mcp->__fpu.__vfpregs.__vfp_fstmx)); 649 1.10 matt *flagsp |= _UC_FPU|_UC_ARM_VFP; 650 1.8 matt } 651 1.8 matt } 652 1.8 matt 653 1.8 matt void 654 1.8 matt vfp_setcontext(struct lwp *l, const mcontext_t *mcp) 655 1.8 matt { 656 1.8 matt struct pcb * const pcb = lwp_getpcb(l); 657 1.51 chs 658 1.51 chs pcu_discard(&arm_vfp_ops, l, true); 659 1.8 matt pcb->pcb_vfp.vfp_fpscr = mcp->__fpu.__vfpregs.__vfp_fpscr; 660 1.8 matt memcpy(pcb->pcb_vfp.vfp_regs, mcp->__fpu.__vfpregs.__vfp_fstmx, 661 1.8 matt sizeof(mcp->__fpu.__vfpregs.__vfp_fstmx)); 662 1.8 matt } 663 1.8 matt 664 1.71 riastrad /* 665 1.71 riastrad * True if this is a system thread with its own private FPU state. 666 1.71 riastrad */ 667 1.71 riastrad static inline bool 668 1.71 riastrad lwp_system_fpu_p(struct lwp *l) 669 1.71 riastrad { 670 1.71 riastrad 671 1.71 riastrad return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) == 672 1.71 riastrad (LW_SYSTEM|LW_SYSTEM_FPU); 673 1.71 riastrad } 674 1.71 riastrad 675 1.71 riastrad static const struct vfpreg zero_vfpreg; 676 1.71 riastrad 677 1.65 riastrad void 678 1.65 riastrad fpu_kern_enter(void) 679 1.65 riastrad { 680 1.65 riastrad struct cpu_info *ci; 681 1.65 riastrad uint32_t fpexc; 682 1.65 riastrad int s; 683 1.65 riastrad 684 1.71 riastrad if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) { 685 1.71 riastrad KASSERT(!cpu_softintr_p()); 686 1.71 riastrad return; 687 1.71 riastrad } 688 1.71 riastrad 689 1.65 riastrad /* 690 1.67 riastrad * Block interrupts up to IPL_VM. We must block preemption 691 1.67 riastrad * since -- if this is a user thread -- there is nowhere to 692 1.67 riastrad * save the kernel fpu state, and if we want this to be usable 693 1.67 riastrad * in interrupts, we can't let interrupts interfere with the 694 1.67 riastrad * fpu state in use since there's nowhere for them to save it. 695 1.65 riastrad */ 696 1.67 riastrad s = splvm(); 697 1.65 riastrad ci = curcpu(); 698 1.78 riastrad #if 0 699 1.78 riastrad /* 700 1.78 riastrad * Can't assert this because if the caller holds a spin lock at 701 1.78 riastrad * IPL_VM, and previously held and released a spin lock at 702 1.78 riastrad * higher IPL, the IPL remains raised above IPL_VM. 703 1.78 riastrad */ 704 1.77 riastrad KASSERTMSG(ci->ci_cpl <= IPL_VM || cold, "cpl=%d", ci->ci_cpl); 705 1.78 riastrad #endif 706 1.65 riastrad KASSERT(ci->ci_kfpu_spl == -1); 707 1.65 riastrad ci->ci_kfpu_spl = s; 708 1.65 riastrad 709 1.68 riastrad /* Save any fpu state on the current CPU. */ 710 1.68 riastrad pcu_save_all_on_cpu(); 711 1.65 riastrad 712 1.65 riastrad /* Enable the fpu. */ 713 1.65 riastrad fpexc = armreg_fpexc_read(); 714 1.65 riastrad fpexc |= VFP_FPEXC_EN; 715 1.65 riastrad fpexc &= ~VFP_FPEXC_EX; 716 1.65 riastrad armreg_fpexc_write(fpexc); 717 1.65 riastrad } 718 1.65 riastrad 719 1.65 riastrad void 720 1.65 riastrad fpu_kern_leave(void) 721 1.65 riastrad { 722 1.65 riastrad struct cpu_info *ci = curcpu(); 723 1.65 riastrad int s; 724 1.65 riastrad uint32_t fpexc; 725 1.65 riastrad 726 1.71 riastrad if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) { 727 1.71 riastrad KASSERT(!cpu_softintr_p()); 728 1.71 riastrad return; 729 1.71 riastrad } 730 1.71 riastrad 731 1.78 riastrad #if 0 732 1.78 riastrad /* 733 1.78 riastrad * Can't assert this because if the caller holds a spin lock at 734 1.78 riastrad * IPL_VM, and previously held and released a spin lock at 735 1.78 riastrad * higher IPL, the IPL remains raised above IPL_VM. 736 1.78 riastrad */ 737 1.77 riastrad KASSERT(ci->ci_cpl == IPL_VM || cold); 738 1.78 riastrad #endif 739 1.65 riastrad KASSERT(ci->ci_kfpu_spl != -1); 740 1.65 riastrad 741 1.65 riastrad /* 742 1.65 riastrad * Zero the fpu registers; otherwise we might leak secrets 743 1.65 riastrad * through Spectre-class attacks to userland, even if there are 744 1.65 riastrad * no bugs in fpu state management. 745 1.65 riastrad */ 746 1.65 riastrad load_vfpregs(&zero_vfpreg); 747 1.65 riastrad 748 1.65 riastrad /* 749 1.65 riastrad * Disable the fpu so that the kernel can't accidentally use 750 1.65 riastrad * it again. 751 1.65 riastrad */ 752 1.65 riastrad fpexc = armreg_fpexc_read(); 753 1.65 riastrad fpexc &= ~VFP_FPEXC_EN; 754 1.65 riastrad armreg_fpexc_write(fpexc); 755 1.65 riastrad 756 1.65 riastrad /* Restore interrupts. */ 757 1.65 riastrad s = ci->ci_kfpu_spl; 758 1.65 riastrad ci->ci_kfpu_spl = -1; 759 1.65 riastrad splx(s); 760 1.65 riastrad } 761 1.65 riastrad 762 1.71 riastrad void 763 1.71 riastrad kthread_fpu_enter_md(void) 764 1.71 riastrad { 765 1.71 riastrad 766 1.71 riastrad pcu_load(&arm_vfp_ops); 767 1.71 riastrad } 768 1.71 riastrad 769 1.71 riastrad void 770 1.71 riastrad kthread_fpu_exit_md(void) 771 1.71 riastrad { 772 1.71 riastrad 773 1.71 riastrad /* XXX Should vfp_state_release zero the registers itself? */ 774 1.71 riastrad load_vfpregs(&zero_vfpreg); 775 1.71 riastrad vfp_discardcontext(curlwp, 0); 776 1.71 riastrad } 777 1.71 riastrad 778 1.4 matt #endif /* FPU_VFP */ 779