1 /* $NetBSD: fpu.c,v 1.13 2022/08/20 11:34:08 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2014 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Matt Thomas of 3am Software Foundry. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 34 __KERNEL_RCSID(1, "$NetBSD: fpu.c,v 1.13 2022/08/20 11:34:08 riastradh Exp $"); 35 36 #include <sys/param.h> 37 #include <sys/types.h> 38 #include <sys/cpu.h> 39 #include <sys/kernel.h> 40 #include <sys/kthread.h> 41 #include <sys/lwp.h> 42 #include <sys/evcnt.h> 43 44 #include <arm/cpufunc.h> 45 #include <arm/fpu.h> 46 #include <arm/cpufunc.h> 47 48 #include <aarch64/locore.h> 49 #include <aarch64/reg.h> 50 #include <aarch64/pcb.h> 51 #include <aarch64/armreg.h> 52 #include <aarch64/machdep.h> 53 54 static void fpu_state_load(lwp_t *, unsigned int); 55 static void fpu_state_save(lwp_t *); 56 static void fpu_state_release(lwp_t *); 57 58 const pcu_ops_t pcu_fpu_ops = { 59 .pcu_id = PCU_FPU, 60 .pcu_state_load = fpu_state_load, 61 .pcu_state_save = fpu_state_save, 62 .pcu_state_release = fpu_state_release 63 }; 64 65 void 66 fpu_attach(struct cpu_info *ci) 67 { 68 evcnt_attach_dynamic(&ci->ci_vfp_use, EVCNT_TYPE_MISC, NULL, 69 ci->ci_cpuname, "vfp use"); 70 evcnt_attach_dynamic(&ci->ci_vfp_reuse, EVCNT_TYPE_MISC, NULL, 71 ci->ci_cpuname, "vfp reuse"); 72 evcnt_attach_dynamic(&ci->ci_vfp_save, EVCNT_TYPE_MISC, NULL, 73 ci->ci_cpuname, "vfp save"); 74 evcnt_attach_dynamic(&ci->ci_vfp_release, EVCNT_TYPE_MISC, NULL, 75 ci->ci_cpuname, "vfp release"); 76 } 77 78 static void 79 fpu_state_load(lwp_t *l, unsigned int flags) 80 { 81 struct pcb * const pcb = lwp_getpcb(l); 82 83 KASSERT(l == curlwp); 84 85 if (__predict_false((flags & PCU_VALID) == 0)) { 86 uint64_t mvfr1 = reg_mvfr1_el1_read(); 87 bool fp16 = false; 88 uint32_t fpcr = 0; 89 90 /* 91 * Determine whether ARMv8.2-FP16 binary16 92 * floating-point arithmetic is supported. 93 */ 94 switch (__SHIFTOUT(mvfr1, MVFR1_FPHP)) { 95 case MVFR1_FPHP_HALF_ARITH: 96 fp16 = true; 97 break; 98 } 99 100 /* Rounding mode: round to nearest, ties to even. */ 101 fpcr |= __SHIFTIN(FPCR_RN, FPCR_RMODE); 102 103 /* NaN propagation or default NaN. */ 104 switch (__SHIFTOUT(mvfr1, MVFR1_FPDNAN)) { 105 case MVFR1_FPDNAN_NAN: 106 /* 107 * IEEE 754 NaN propagation supported. Don't 108 * enable default NaN mode. 109 */ 110 break; 111 default: 112 /* 113 * IEEE 754 NaN propagation not supported, so 114 * enable default NaN mode. 115 */ 116 fpcr |= FPCR_DN; 117 } 118 119 /* Subnormal arithmetic or flush-to-zero. */ 120 switch (__SHIFTOUT(mvfr1, MVFR1_FPFTZ)) { 121 case MVFR1_FPFTZ_DENORMAL: 122 /* 123 * IEEE 754 subnormal arithmetic supported. 124 * Don't enable flush-to-zero mode. 125 */ 126 break; 127 default: 128 /* 129 * IEEE 754 subnormal arithmetic not supported, 130 * so enable flush-to-zero mode. If FP16 is 131 * supported, also enable flush-to-zero for 132 * binary16 arithmetic. 133 */ 134 fpcr |= FPCR_FZ; 135 if (fp16) 136 fpcr |= FPCR_FZ16; 137 } 138 139 /* initialize fpregs */ 140 memset(&pcb->pcb_fpregs, 0, sizeof(pcb->pcb_fpregs)); 141 pcb->pcb_fpregs.fpcr = fpcr; 142 143 curcpu()->ci_vfp_use.ev_count++; 144 } else { 145 curcpu()->ci_vfp_reuse.ev_count++; 146 } 147 148 /* allow user process to use FP */ 149 l->l_md.md_cpacr = CPACR_FPEN_ALL; 150 reg_cpacr_el1_write(CPACR_FPEN_ALL); 151 isb(); 152 153 if ((flags & PCU_REENABLE) == 0) 154 load_fpregs(&pcb->pcb_fpregs); 155 } 156 157 static void 158 fpu_state_save(lwp_t *l) 159 { 160 struct pcb * const pcb = lwp_getpcb(l); 161 162 curcpu()->ci_vfp_save.ev_count++; 163 164 reg_cpacr_el1_write(CPACR_FPEN_EL1); /* fpreg access enable */ 165 isb(); 166 167 save_fpregs(&pcb->pcb_fpregs); 168 169 reg_cpacr_el1_write(CPACR_FPEN_NONE); /* fpreg access disable */ 170 isb(); 171 } 172 173 static void 174 fpu_state_release(lwp_t *l) 175 { 176 curcpu()->ci_vfp_release.ev_count++; 177 178 /* disallow user process to use FP */ 179 l->l_md.md_cpacr = CPACR_FPEN_NONE; 180 reg_cpacr_el1_write(CPACR_FPEN_NONE); 181 isb(); 182 } 183 184 static const struct fpreg zero_fpreg; 185 186 /* 187 * True if this is a system thread with its own private FPU state. 188 */ 189 static inline bool 190 lwp_system_fpu_p(struct lwp *l) 191 { 192 193 return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) == 194 (LW_SYSTEM|LW_SYSTEM_FPU); 195 } 196 197 void 198 fpu_kern_enter(void) 199 { 200 struct cpu_info *ci; 201 int s; 202 203 if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) { 204 KASSERT(!cpu_softintr_p()); 205 return; 206 } 207 208 /* 209 * Block interrupts up to IPL_VM. We must block preemption 210 * since -- if this is a user thread -- there is nowhere to 211 * save the kernel fpu state, and if we want this to be usable 212 * in interrupts, we can't let interrupts interfere with the 213 * fpu state in use since there's nowhere for them to save it. 214 */ 215 s = splvm(); 216 ci = curcpu(); 217 #if 0 218 /* 219 * Can't assert this because if the caller holds a spin lock at 220 * IPL_VM, and previously held and released a spin lock at 221 * higher IPL, the IPL remains raised above IPL_VM. 222 */ 223 KASSERTMSG(ci->ci_cpl <= IPL_VM || cold, "cpl=%d", ci->ci_cpl); 224 #endif 225 KASSERT(ci->ci_kfpu_spl == -1); 226 ci->ci_kfpu_spl = s; 227 228 /* Save any fpu state on the current CPU. */ 229 pcu_save_all_on_cpu(); 230 231 /* 232 * Enable the fpu, and wait until it is enabled before 233 * executing any further instructions. 234 */ 235 reg_cpacr_el1_write(CPACR_FPEN_ALL); 236 isb(); 237 } 238 239 void 240 fpu_kern_leave(void) 241 { 242 struct cpu_info *ci; 243 int s; 244 245 if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) { 246 KASSERT(!cpu_softintr_p()); 247 return; 248 } 249 250 ci = curcpu(); 251 252 #if 0 253 /* 254 * Can't assert this because if the caller holds a spin lock at 255 * IPL_VM, and previously held and released a spin lock at 256 * higher IPL, the IPL remains raised above IPL_VM. 257 */ 258 KASSERT(ci->ci_cpl == IPL_VM || cold); 259 #endif 260 KASSERT(ci->ci_kfpu_spl != -1); 261 262 /* 263 * Zero the fpu registers; otherwise we might leak secrets 264 * through Spectre-class attacks to userland, even if there are 265 * no bugs in fpu state management. 266 */ 267 load_fpregs(&zero_fpreg); 268 269 /* 270 * Disable the fpu so that the kernel can't accidentally use 271 * it again. 272 */ 273 reg_cpacr_el1_write(CPACR_FPEN_NONE); 274 isb(); 275 276 s = ci->ci_kfpu_spl; 277 ci->ci_kfpu_spl = -1; 278 splx(s); 279 } 280 281 void 282 kthread_fpu_enter_md(void) 283 { 284 285 fpu_load(curlwp); 286 } 287 288 void 289 kthread_fpu_exit_md(void) 290 { 291 292 /* XXX Should fpu_state_release zero the registers itself? */ 293 load_fpregs(&zero_fpreg); 294 fpu_discard(curlwp, 0); 295 } 296