Home | History | Annotate | Line # | Download | only in aarch64
      1 /* $NetBSD: fpu.c,v 1.13 2022/08/20 11:34:08 riastradh Exp $ */
      2 
      3 /*-
      4  * Copyright (c) 2014 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Matt Thomas of 3am Software Foundry.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 
     34 __KERNEL_RCSID(1, "$NetBSD: fpu.c,v 1.13 2022/08/20 11:34:08 riastradh Exp $");
     35 
     36 #include <sys/param.h>
     37 #include <sys/types.h>
     38 #include <sys/cpu.h>
     39 #include <sys/kernel.h>
     40 #include <sys/kthread.h>
     41 #include <sys/lwp.h>
     42 #include <sys/evcnt.h>
     43 
     44 #include <arm/cpufunc.h>
     45 #include <arm/fpu.h>
     46 #include <arm/cpufunc.h>
     47 
     48 #include <aarch64/locore.h>
     49 #include <aarch64/reg.h>
     50 #include <aarch64/pcb.h>
     51 #include <aarch64/armreg.h>
     52 #include <aarch64/machdep.h>
     53 
     54 static void fpu_state_load(lwp_t *, unsigned int);
     55 static void fpu_state_save(lwp_t *);
     56 static void fpu_state_release(lwp_t *);
     57 
     58 const pcu_ops_t pcu_fpu_ops = {
     59 	.pcu_id = PCU_FPU,
     60 	.pcu_state_load = fpu_state_load,
     61 	.pcu_state_save = fpu_state_save,
     62 	.pcu_state_release = fpu_state_release
     63 };
     64 
     65 void
     66 fpu_attach(struct cpu_info *ci)
     67 {
     68 	evcnt_attach_dynamic(&ci->ci_vfp_use, EVCNT_TYPE_MISC, NULL,
     69 	    ci->ci_cpuname, "vfp use");
     70 	evcnt_attach_dynamic(&ci->ci_vfp_reuse, EVCNT_TYPE_MISC, NULL,
     71 	    ci->ci_cpuname, "vfp reuse");
     72 	evcnt_attach_dynamic(&ci->ci_vfp_save, EVCNT_TYPE_MISC, NULL,
     73 	    ci->ci_cpuname, "vfp save");
     74 	evcnt_attach_dynamic(&ci->ci_vfp_release, EVCNT_TYPE_MISC, NULL,
     75 	    ci->ci_cpuname, "vfp release");
     76 }
     77 
     78 static void
     79 fpu_state_load(lwp_t *l, unsigned int flags)
     80 {
     81 	struct pcb * const pcb = lwp_getpcb(l);
     82 
     83 	KASSERT(l == curlwp);
     84 
     85 	if (__predict_false((flags & PCU_VALID) == 0)) {
     86 		uint64_t mvfr1 = reg_mvfr1_el1_read();
     87 		bool fp16 = false;
     88 		uint32_t fpcr = 0;
     89 
     90 		/*
     91 		 * Determine whether ARMv8.2-FP16 binary16
     92 		 * floating-point arithmetic is supported.
     93 		 */
     94 		switch (__SHIFTOUT(mvfr1, MVFR1_FPHP)) {
     95 		case MVFR1_FPHP_HALF_ARITH:
     96 			fp16 = true;
     97 			break;
     98 		}
     99 
    100 		/* Rounding mode: round to nearest, ties to even.  */
    101 		fpcr |= __SHIFTIN(FPCR_RN, FPCR_RMODE);
    102 
    103 		/* NaN propagation or default NaN.   */
    104 		switch (__SHIFTOUT(mvfr1, MVFR1_FPDNAN)) {
    105 		case MVFR1_FPDNAN_NAN:
    106 			/*
    107 			 * IEEE 754 NaN propagation supported.  Don't
    108 			 * enable default NaN mode.
    109 			 */
    110 			break;
    111 		default:
    112 			/*
    113 			 * IEEE 754 NaN propagation not supported, so
    114 			 * enable default NaN mode.
    115 			 */
    116 			fpcr |= FPCR_DN;
    117 		}
    118 
    119 		/* Subnormal arithmetic or flush-to-zero.  */
    120 		switch (__SHIFTOUT(mvfr1, MVFR1_FPFTZ)) {
    121 		case MVFR1_FPFTZ_DENORMAL:
    122 			/*
    123 			 * IEEE 754 subnormal arithmetic supported.
    124 			 * Don't enable flush-to-zero mode.
    125 			 */
    126 			break;
    127 		default:
    128 			/*
    129 			 * IEEE 754 subnormal arithmetic not supported,
    130 			 * so enable flush-to-zero mode.  If FP16 is
    131 			 * supported, also enable flush-to-zero for
    132 			 * binary16 arithmetic.
    133 			 */
    134 			fpcr |= FPCR_FZ;
    135 			if (fp16)
    136 				fpcr |= FPCR_FZ16;
    137 		}
    138 
    139 		/* initialize fpregs */
    140 		memset(&pcb->pcb_fpregs, 0, sizeof(pcb->pcb_fpregs));
    141 		pcb->pcb_fpregs.fpcr = fpcr;
    142 
    143 		curcpu()->ci_vfp_use.ev_count++;
    144 	} else {
    145 		curcpu()->ci_vfp_reuse.ev_count++;
    146 	}
    147 
    148 	/* allow user process to use FP */
    149 	l->l_md.md_cpacr = CPACR_FPEN_ALL;
    150 	reg_cpacr_el1_write(CPACR_FPEN_ALL);
    151 	isb();
    152 
    153 	if ((flags & PCU_REENABLE) == 0)
    154 		load_fpregs(&pcb->pcb_fpregs);
    155 }
    156 
    157 static void
    158 fpu_state_save(lwp_t *l)
    159 {
    160 	struct pcb * const pcb = lwp_getpcb(l);
    161 
    162 	curcpu()->ci_vfp_save.ev_count++;
    163 
    164 	reg_cpacr_el1_write(CPACR_FPEN_EL1);	/* fpreg access enable */
    165 	isb();
    166 
    167 	save_fpregs(&pcb->pcb_fpregs);
    168 
    169 	reg_cpacr_el1_write(CPACR_FPEN_NONE);	/* fpreg access disable */
    170 	isb();
    171 }
    172 
    173 static void
    174 fpu_state_release(lwp_t *l)
    175 {
    176 	curcpu()->ci_vfp_release.ev_count++;
    177 
    178 	/* disallow user process to use FP */
    179 	l->l_md.md_cpacr = CPACR_FPEN_NONE;
    180 	reg_cpacr_el1_write(CPACR_FPEN_NONE);
    181 	isb();
    182 }
    183 
    184 static const struct fpreg zero_fpreg;
    185 
    186 /*
    187  * True if this is a system thread with its own private FPU state.
    188  */
    189 static inline bool
    190 lwp_system_fpu_p(struct lwp *l)
    191 {
    192 
    193 	return (l->l_flag & (LW_SYSTEM|LW_SYSTEM_FPU)) ==
    194 	    (LW_SYSTEM|LW_SYSTEM_FPU);
    195 }
    196 
    197 void
    198 fpu_kern_enter(void)
    199 {
    200 	struct cpu_info *ci;
    201 	int s;
    202 
    203 	if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) {
    204 		KASSERT(!cpu_softintr_p());
    205 		return;
    206 	}
    207 
    208 	/*
    209 	 * Block interrupts up to IPL_VM.  We must block preemption
    210 	 * since -- if this is a user thread -- there is nowhere to
    211 	 * save the kernel fpu state, and if we want this to be usable
    212 	 * in interrupts, we can't let interrupts interfere with the
    213 	 * fpu state in use since there's nowhere for them to save it.
    214 	 */
    215 	s = splvm();
    216 	ci = curcpu();
    217 #if 0
    218 	/*
    219 	 * Can't assert this because if the caller holds a spin lock at
    220 	 * IPL_VM, and previously held and released a spin lock at
    221 	 * higher IPL, the IPL remains raised above IPL_VM.
    222 	 */
    223 	KASSERTMSG(ci->ci_cpl <= IPL_VM || cold, "cpl=%d", ci->ci_cpl);
    224 #endif
    225 	KASSERT(ci->ci_kfpu_spl == -1);
    226 	ci->ci_kfpu_spl = s;
    227 
    228 	/* Save any fpu state on the current CPU.  */
    229 	pcu_save_all_on_cpu();
    230 
    231 	/*
    232 	 * Enable the fpu, and wait until it is enabled before
    233 	 * executing any further instructions.
    234 	 */
    235 	reg_cpacr_el1_write(CPACR_FPEN_ALL);
    236 	isb();
    237 }
    238 
    239 void
    240 fpu_kern_leave(void)
    241 {
    242 	struct cpu_info *ci;
    243 	int s;
    244 
    245 	if (lwp_system_fpu_p(curlwp) && !cpu_intr_p()) {
    246 		KASSERT(!cpu_softintr_p());
    247 		return;
    248 	}
    249 
    250 	ci = curcpu();
    251 
    252 #if 0
    253 	/*
    254 	 * Can't assert this because if the caller holds a spin lock at
    255 	 * IPL_VM, and previously held and released a spin lock at
    256 	 * higher IPL, the IPL remains raised above IPL_VM.
    257 	 */
    258 	KASSERT(ci->ci_cpl == IPL_VM || cold);
    259 #endif
    260 	KASSERT(ci->ci_kfpu_spl != -1);
    261 
    262 	/*
    263 	 * Zero the fpu registers; otherwise we might leak secrets
    264 	 * through Spectre-class attacks to userland, even if there are
    265 	 * no bugs in fpu state management.
    266 	 */
    267 	load_fpregs(&zero_fpreg);
    268 
    269 	/*
    270 	 * Disable the fpu so that the kernel can't accidentally use
    271 	 * it again.
    272 	 */
    273 	reg_cpacr_el1_write(CPACR_FPEN_NONE);
    274 	isb();
    275 
    276 	s = ci->ci_kfpu_spl;
    277 	ci->ci_kfpu_spl = -1;
    278 	splx(s);
    279 }
    280 
    281 void
    282 kthread_fpu_enter_md(void)
    283 {
    284 
    285 	fpu_load(curlwp);
    286 }
    287 
    288 void
    289 kthread_fpu_exit_md(void)
    290 {
    291 
    292 	/* XXX Should fpu_state_release zero the registers itself?  */
    293 	load_fpregs(&zero_fpreg);
    294 	fpu_discard(curlwp, 0);
    295 }
    296