vfp_init.c revision 1.11 1 /* $NetBSD: vfp_init.c,v 1.11 2012/12/10 01:35:28 matt Exp $ */
2
3 /*
4 * Copyright (c) 2008 ARM Ltd
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the company may not be used to endorse or promote
16 * products derived from this software without specific prior written
17 * permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY
23 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
25 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/device.h>
36 #include <sys/proc.h>
37 #include <sys/cpu.h>
38
39 #include <arm/pcb.h>
40 #include <arm/undefined.h>
41 #include <arm/vfpreg.h>
42 #include <arm/mcontext.h>
43
44 /*
45 * Use generic co-processor instructions to avoid assembly problems.
46 */
47
48 /* FMRX <X>, fpsid */
49 static inline uint32_t
50 read_fpsid(void)
51 {
52 uint32_t rv;
53 __asm __volatile("mrc p10, 7, %0, c0, c0, 0" : "=r" (rv));
54 return rv;
55 }
56
57 /* FMRX <X>, fpexc */
58 static inline uint32_t
59 read_fpscr(void)
60 {
61 uint32_t rv;
62 __asm __volatile("mrc p10, 7, %0, c1, c0, 0" : "=r" (rv));
63 return rv;
64 }
65
66 /* FMRX <X>, fpexc */
67 static inline uint32_t
68 read_fpexc(void)
69 {
70 uint32_t rv;
71 __asm __volatile("mrc p10, 7, %0, c8, c0, 0" : "=r" (rv));
72 return rv;
73 }
74
75 /* FMRX <X>, fpinst */
76 static inline uint32_t
77 read_fpinst(void)
78 {
79 uint32_t rv;
80 __asm __volatile("mrc p10, 7, %0, c9, c0, 0" : "=r" (rv));
81 return rv;
82 }
83
84 /* FMRX <X>, fpinst2 */
85 static inline uint32_t
86 read_fpinst2(void)
87 {
88 uint32_t rv;
89 __asm __volatile("mrc p10, 7, %0, c10, c0, 0" : "=r" (rv));
90 return rv;
91 }
92
93 /* FMXR <X>, fpscr */
94 #define write_fpscr(X) __asm __volatile("mcr p10, 7, %0, c1, c0, 0" : \
95 : "r" (X))
96 /* FMXR <X>, fpexc */
97 #define write_fpexc(X) __asm __volatile("mcr p10, 7, %0, c8, c0, 0" : \
98 : "r" (X))
99 /* FMXR <X>, fpinst */
100 #define write_fpinst(X) __asm __volatile("mcr p10, 7, %0, c9, c0, 0" : \
101 : "r" (X))
102 /* FMXR <X>, fpinst2 */
103 #define write_fpinst2(X) __asm __volatile("mcr p10, 7, %0, c10, c0, 0" : \
104 : "r" (X))
105
106 #ifdef FPU_VFP
107
108 /* FLDMD <X>, {d0-d15} */
109 static inline void
110 load_vfpregs_lo(uint64_t *p)
111 {
112 /* vldmia rN, {d0-d15} */
113 __asm __volatile("ldc\tp11, c0, [%0], {32}" :: "r" (p) : "memory");
114 }
115
116 /* FSTMD <X>, {d0-d15} */
117 static inline void
118 save_vfpregs_lo(uint64_t *p)
119 {
120 __asm __volatile("stc\tp11, c0, [%0], {32}" :: "r" (p) : "memory");
121 }
122
123 #ifdef CPU_CORTEX
124 /* FLDMD <X>, {d16-d31} */
125 static inline void
126 load_vfpregs_hi(uint64_t *p)
127 {
128 __asm __volatile("ldcl\tp11, c0, [%0], {32}" :: "r" (&p[16]) : "memory");
129 }
130
131 /* FLDMD <X>, {d16-d31} */
132 static inline void
133 save_vfpregs_hi(uint64_t *p)
134 {
135 __asm __volatile("stcl\tp11, c0, [%0], {32}" :: "r" (&p[16]) : "memory");
136 }
137 #endif
138
139 /* The real handler for VFP bounces. */
140 static int vfp_handler(u_int, u_int, trapframe_t *, int);
141 static int vfp_handler(u_int, u_int, trapframe_t *, int);
142
143 static void vfp_state_load(lwp_t *, bool);
144 static void vfp_state_save(lwp_t *);
145 static void vfp_state_release(lwp_t *);
146
147 const pcu_ops_t arm_vfp_ops = {
148 .pcu_id = PCU_FPU,
149 .pcu_state_load = vfp_state_load,
150 .pcu_state_save = vfp_state_save,
151 .pcu_state_release = vfp_state_release,
152 };
153
154 struct evcnt vfpevent_use;
155 struct evcnt vfpevent_reuse;
156
157 /*
158 * Used to test for a VFP. The following function is installed as a coproc10
159 * handler on the undefined instruction vector and then we issue a VFP
160 * instruction. If undefined_test is non zero then the VFP did not handle
161 * the instruction so must be absent, or disabled.
162 */
163
164 static int undefined_test;
165
166 static int
167 vfp_test(u_int address, u_int insn, trapframe_t *frame, int fault_code)
168 {
169
170 frame->tf_pc += INSN_SIZE;
171 ++undefined_test;
172 return 0;
173 }
174
175 #endif /* FPU_VFP */
176
177 struct evcnt vfp_fpscr_ev =
178 EVCNT_INITIALIZER(EVCNT_TYPE_TRAP, NULL, "VFP", "FPSCR traps");
179 EVCNT_ATTACH_STATIC(vfp_fpscr_ev);
180
181 static int
182 vfp_fpscr_handler(u_int address, u_int insn, trapframe_t *frame, int fault_code)
183 {
184 struct lwp * const l = curlwp;
185 const u_int regno = (insn >> 12) & 0xf;
186 /*
187 * Only match move to/from the FPSCR register and we
188 * can't be using the SP,LR,PC as a source.
189 */
190 if ((insn & 0xffef0fff) != 0xeee10a10 || regno > 12)
191 return 1;
192
193 struct pcb * const pcb = lwp_getpcb(l);
194
195 #ifdef FPU_VFP
196 /*
197 * If FPU is valid somewhere, let's just reenable VFP and
198 * retry the instruction (only safe thing to do since the
199 * pcb has a stale copy).
200 */
201 if (pcb->pcb_vfp.vfp_fpexc & VFP_FPEXC_EN)
202 return 1;
203 #endif
204
205 if (__predict_false((l->l_md.md_flags & MDLWP_VFPUSED) == 0)) {
206 l->l_md.md_flags |= MDLWP_VFPUSED;
207 pcb->pcb_vfp.vfp_fpscr =
208 (VFP_FPSCR_DN | VFP_FPSCR_FZ); /* Runfast */
209 }
210
211 /*
212 * We know know the pcb has the saved copy.
213 */
214 register_t * const regp = &frame->tf_r0 + regno;
215 if (insn & 0x00100000) {
216 *regp = pcb->pcb_vfp.vfp_fpscr;
217 } else {
218 pcb->pcb_vfp.vfp_fpscr = *regp;
219 }
220
221 vfp_fpscr_ev.ev_count++;
222
223 frame->tf_pc += INSN_SIZE;
224 return 0;
225 }
226
227 #ifndef FPU_VFP
228 /*
229 * If we don't want VFP support, we still need to handle emulating VFP FPSCR
230 * instructions.
231 */
232 void
233 vfp_attach(void)
234 {
235 install_coproc_handler(VFP_COPROC, vfp_fpscr_handler);
236 }
237
238 #else
239 void
240 vfp_attach(void)
241 {
242 struct cpu_info * const ci = curcpu();
243 const char *model = NULL;
244 bool vfp_p = false;
245
246 #ifdef FPU_VFP
247 if (CPU_ID_ARM11_P(curcpu()->ci_arm_cpuid)
248 || CPU_ID_CORTEX_P(curcpu()->ci_arm_cpuid)) {
249 const uint32_t cpacr_vfp = CPACR_CPn(VFP_COPROC);
250 const uint32_t cpacr_vfp2 = CPACR_CPn(VFP_COPROC2);
251
252 /*
253 * We first need to enable access to the coprocessors.
254 */
255 uint32_t cpacr = armreg_cpacr_read();
256 cpacr |= __SHIFTIN(CPACR_ALL, cpacr_vfp);
257 cpacr |= __SHIFTIN(CPACR_ALL, cpacr_vfp2);
258 #if 0
259 if (CPU_ID_CORTEX_P(curcpu()->ci_arm_cpuid)) {
260 /*
261 * Disable access to the upper 16 FP registers and NEON.
262 */
263 cpacr |= CPACR_V7_D32DIS;
264 cpacr |= CPACR_V7_ASEDIS;
265 }
266 #endif
267 armreg_cpacr_write(cpacr);
268
269 /*
270 * If we could enable them, then they exist.
271 */
272 cpacr = armreg_cpacr_read();
273 vfp_p = __SHIFTOUT(cpacr, cpacr_vfp2) != CPACR_NOACCESS
274 || __SHIFTOUT(cpacr, cpacr_vfp) != CPACR_NOACCESS;
275 }
276 #endif
277
278 void *uh = install_coproc_handler(VFP_COPROC, vfp_test);
279
280 undefined_test = 0;
281
282 const uint32_t fpsid = read_fpsid();
283
284 remove_coproc_handler(uh);
285
286 if (undefined_test != 0) {
287 aprint_normal_dev(ci->ci_dev, "No VFP detected\n");
288 install_coproc_handler(VFP_COPROC, vfp_fpscr_handler);
289 ci->ci_vfp_id = 0;
290 return;
291 }
292
293 ci->ci_vfp_id = fpsid;
294 switch (fpsid & ~ VFP_FPSID_REV_MSK) {
295 case FPU_VFP10_ARM10E:
296 model = "VFP10 R1";
297 break;
298 case FPU_VFP11_ARM11:
299 model = "VFP11";
300 break;
301 case FPU_VFP_CORTEXA5:
302 case FPU_VFP_CORTEXA7:
303 case FPU_VFP_CORTEXA8:
304 case FPU_VFP_CORTEXA9:
305 model = "NEON MPE (VFP 3.0+)";
306 break;
307 default:
308 aprint_normal_dev(ci->ci_dev, "unrecognized VFP version %x\n",
309 fpsid);
310 install_coproc_handler(VFP_COPROC, vfp_fpscr_handler);
311 return;
312 }
313
314 if (fpsid != 0) {
315 aprint_normal("vfp%d at %s: %s\n",
316 device_unit(curcpu()->ci_dev), device_xname(curcpu()->ci_dev),
317 model);
318 }
319 evcnt_attach_dynamic(&vfpevent_use, EVCNT_TYPE_MISC, NULL,
320 "VFP", "proc use");
321 evcnt_attach_dynamic(&vfpevent_reuse, EVCNT_TYPE_MISC, NULL,
322 "VFP", "proc re-use");
323 install_coproc_handler(VFP_COPROC, vfp_handler);
324 install_coproc_handler(VFP_COPROC2, vfp_handler);
325 }
326
327 /* The real handler for VFP bounces. */
328 static int
329 vfp_handler(u_int address, u_int insn, trapframe_t *frame,
330 int fault_code)
331 {
332 struct cpu_info * const ci = curcpu();
333
334 /* This shouldn't ever happen. */
335 if (fault_code != FAULT_USER)
336 panic("VFP fault in non-user mode");
337
338 if (ci->ci_vfp_id == 0)
339 /* No VFP detected, just fault. */
340 return 1;
341
342 /*
343 * If we are just changing/fetching FPSCR, don't bother loading it.
344 */
345 if (!vfp_fpscr_handler(address, insn, frame, fault_code))
346 return 0;
347
348 pcu_load(&arm_vfp_ops);
349
350 /* Need to restart the faulted instruction. */
351 // frame->tf_pc -= INSN_SIZE;
352 return 0;
353 }
354
355 static void
356 vfp_state_load(lwp_t *l, bool used)
357 {
358 struct pcb * const pcb = lwp_getpcb(l);
359 struct vfpreg * const fregs = &pcb->pcb_vfp;
360
361 /*
362 * Instrument VFP usage -- if a process has not previously
363 * used the VFP, mark it as having used VFP for the first time,
364 * and count this event.
365 *
366 * If a process has used the VFP, count a "used VFP, and took
367 * a trap to use it again" event.
368 */
369 if (__predict_false((l->l_md.md_flags & MDLWP_VFPUSED) == 0)) {
370 vfpevent_use.ev_count++;
371 l->l_md.md_flags |= MDLWP_VFPUSED;
372 pcb->pcb_vfp.vfp_fpscr =
373 (VFP_FPSCR_DN | VFP_FPSCR_FZ); /* Runfast */
374 } else {
375 vfpevent_reuse.ev_count++;
376 }
377
378 if (fregs->vfp_fpexc & VFP_FPEXC_EN) {
379 /*
380 * If we think the VFP is enabled, it must have be disabled by
381 * vfp_state_release for another LWP so we can just restore
382 * FPEXC and return since our VFP state is still loaded.
383 */
384 write_fpexc(fregs->vfp_fpexc);
385 return;
386 }
387
388 /* Enable the VFP (so that we can write the registers). */
389 uint32_t fpexc = read_fpexc();
390 KDASSERT((fpexc & VFP_FPEXC_EX) == 0);
391 write_fpexc(fpexc | VFP_FPEXC_EN);
392
393 load_vfpregs_lo(fregs->vfp_regs);
394 #ifdef CPU_CORTEX
395 #ifdef CPU_ARM11
396 switch (curcpu()->ci_vfp_id) {
397 case FPU_VFP_CORTEXA5:
398 case FPU_VFP_CORTEXA7:
399 case FPU_VFP_CORTEXA8:
400 case FPU_VFP_CORTEXA9:
401 #endif
402 load_vfpregs_hi(fregs->vfp_regs);
403 #ifdef CPU_ARM11
404 break;
405 }
406 #endif
407 #endif
408 write_fpscr(fregs->vfp_fpscr);
409
410 if (fregs->vfp_fpexc & VFP_FPEXC_EX) {
411 struct cpu_info * const ci = curcpu();
412 /* Need to restore the exception handling state. */
413 switch (ci->ci_vfp_id) {
414 case FPU_VFP10_ARM10E:
415 case FPU_VFP11_ARM11:
416 case FPU_VFP_CORTEXA5:
417 case FPU_VFP_CORTEXA7:
418 case FPU_VFP_CORTEXA8:
419 case FPU_VFP_CORTEXA9:
420 write_fpinst2(fregs->vfp_fpinst2);
421 write_fpinst(fregs->vfp_fpinst);
422 break;
423 default:
424 panic("%s: Unsupported VFP %#x",
425 __func__, ci->ci_vfp_id);
426 }
427 }
428
429 /* Finally, restore the FPEXC but don't enable the VFP. */
430 fregs->vfp_fpexc |= VFP_FPEXC_EN;
431 write_fpexc(fregs->vfp_fpexc);
432 }
433
434 void
435 vfp_state_save(lwp_t *l)
436 {
437 struct pcb * const pcb = lwp_getpcb(l);
438 struct vfpreg * const fregs = &pcb->pcb_vfp;
439
440 /*
441 * If it's already disabled, then the state has been saved
442 * (or discarded).
443 */
444 if ((fregs->vfp_fpexc & VFP_FPEXC_EN) == 0)
445 return;
446
447 /*
448 * Enable the VFP (so we can read the registers).
449 * Make sure the exception bit is cleared so that we can
450 * safely dump the registers.
451 */
452 uint32_t fpexc = read_fpexc();
453 write_fpexc((fpexc | VFP_FPEXC_EN) & ~VFP_FPEXC_EX);
454
455 fregs->vfp_fpexc = fpexc;
456 if (fpexc & VFP_FPEXC_EX) {
457 struct cpu_info * const ci = curcpu();
458 /* Need to save the exception handling state */
459 switch (ci->ci_vfp_id) {
460 case FPU_VFP10_ARM10E:
461 case FPU_VFP11_ARM11:
462 case FPU_VFP_CORTEXA5:
463 case FPU_VFP_CORTEXA7:
464 case FPU_VFP_CORTEXA8:
465 case FPU_VFP_CORTEXA9:
466 fregs->vfp_fpinst = read_fpinst();
467 fregs->vfp_fpinst2 = read_fpinst2();
468 break;
469 default:
470 panic("%s: Unsupported VFP %#x",
471 __func__, ci->ci_vfp_id);
472 }
473 }
474 fregs->vfp_fpscr = read_fpscr();
475 save_vfpregs_lo(fregs->vfp_regs);
476 #ifdef CPU_CORTEX
477 #ifdef CPU_ARM11
478 switch (curcpu()->ci_vfp_id) {
479 case FPU_VFP_CORTEXA5:
480 case FPU_VFP_CORTEXA7:
481 case FPU_VFP_CORTEXA8:
482 case FPU_VFP_CORTEXA9:
483 #endif
484 save_vfpregs_hi(fregs->vfp_regs);
485 #ifdef CPU_ARM11
486 break;
487 }
488 #endif
489 #endif
490
491 /* Disable the VFP. */
492 write_fpexc(fpexc);
493 }
494
495 void
496 vfp_state_release(lwp_t *l)
497 {
498 struct pcb * const pcb = lwp_getpcb(l);
499
500 /*
501 * Now mark the VFP as disabled (and our state has been already
502 * saved or is being discarded).
503 */
504 pcb->pcb_vfp.vfp_fpexc &= ~VFP_FPEXC_EN;
505
506 /*
507 * Turn off the FPU so the next time a VFP instruction is issued
508 * an exception happens. We don't know if this LWP's state was
509 * loaded but if we turned off the FPU for some other LWP, when
510 * pcu_load invokes vfp_state_load it will see that VFP_FPEXC_EN
511 * is still set so it just restroe fpexc and return since its
512 * contents are still sitting in the VFP.
513 */
514 write_fpexc(read_fpexc() & ~VFP_FPEXC_EN);
515 }
516
517 void
518 vfp_savecontext(void)
519 {
520 pcu_save(&arm_vfp_ops);
521 }
522
523 void
524 vfp_discardcontext(void)
525 {
526 pcu_discard(&arm_vfp_ops);
527 }
528
529 void
530 vfp_getcontext(struct lwp *l, mcontext_t *mcp, int *flagsp)
531 {
532 if (l->l_md.md_flags & MDLWP_VFPUSED) {
533 const struct pcb * const pcb = lwp_getpcb(l);
534 pcu_save(&arm_vfp_ops);
535 mcp->__fpu.__vfpregs.__vfp_fpscr = pcb->pcb_vfp.vfp_fpscr;
536 memcpy(mcp->__fpu.__vfpregs.__vfp_fstmx, pcb->pcb_vfp.vfp_regs,
537 sizeof(mcp->__fpu.__vfpregs.__vfp_fstmx));
538 *flagsp |= _UC_FPU|_UC_ARM_VFP;
539 }
540 }
541
542 void
543 vfp_setcontext(struct lwp *l, const mcontext_t *mcp)
544 {
545 pcu_discard(&arm_vfp_ops);
546 struct pcb * const pcb = lwp_getpcb(l);
547 l->l_md.md_flags |= MDLWP_VFPUSED;
548 pcb->pcb_vfp.vfp_fpscr = mcp->__fpu.__vfpregs.__vfp_fpscr;
549 memcpy(pcb->pcb_vfp.vfp_regs, mcp->__fpu.__vfpregs.__vfp_fstmx,
550 sizeof(mcp->__fpu.__vfpregs.__vfp_fstmx));
551 }
552
553 #endif /* FPU_VFP */
554