vfp_init.c revision 1.12 1 /* $NetBSD: vfp_init.c,v 1.12 2012/12/11 01:52:30 matt Exp $ */
2
3 /*
4 * Copyright (c) 2008 ARM Ltd
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. The name of the company may not be used to endorse or promote
16 * products derived from this software without specific prior written
17 * permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY
23 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
25 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
28 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
29 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/param.h>
33 #include <sys/types.h>
34 #include <sys/systm.h>
35 #include <sys/device.h>
36 #include <sys/proc.h>
37 #include <sys/cpu.h>
38
39 #include <arm/pcb.h>
40 #include <arm/undefined.h>
41 #include <arm/vfpreg.h>
42 #include <arm/mcontext.h>
43
44 #include <uvm/uvm_extern.h> /* for pmap.h */
45
46 /*
47 * Use generic co-processor instructions to avoid assembly problems.
48 */
49
50 /* FMRX <X>, fpsid */
51 static inline uint32_t
52 read_fpsid(void)
53 {
54 uint32_t rv;
55 __asm __volatile("mrc p10, 7, %0, c0, c0, 0" : "=r" (rv));
56 return rv;
57 }
58
59 /* FMRX <X>, fpexc */
60 static inline uint32_t
61 read_fpscr(void)
62 {
63 uint32_t rv;
64 __asm __volatile("mrc p10, 7, %0, c1, c0, 0" : "=r" (rv));
65 return rv;
66 }
67
68 /* FMRX <X>, fpexc */
69 static inline uint32_t
70 read_fpexc(void)
71 {
72 uint32_t rv;
73 __asm __volatile("mrc p10, 7, %0, c8, c0, 0" : "=r" (rv));
74 return rv;
75 }
76
77 /* FMRX <X>, fpinst */
78 static inline uint32_t
79 read_fpinst(void)
80 {
81 uint32_t rv;
82 __asm __volatile("mrc p10, 7, %0, c9, c0, 0" : "=r" (rv));
83 return rv;
84 }
85
86 /* FMRX <X>, fpinst2 */
87 static inline uint32_t
88 read_fpinst2(void)
89 {
90 uint32_t rv;
91 __asm __volatile("mrc p10, 7, %0, c10, c0, 0" : "=r" (rv));
92 return rv;
93 }
94
95 /* FMXR <X>, fpscr */
96 #define write_fpscr(X) __asm __volatile("mcr p10, 7, %0, c1, c0, 0" : \
97 : "r" (X))
98 /* FMXR <X>, fpexc */
99 #define write_fpexc(X) __asm __volatile("mcr p10, 7, %0, c8, c0, 0" : \
100 : "r" (X))
101 /* FMXR <X>, fpinst */
102 #define write_fpinst(X) __asm __volatile("mcr p10, 7, %0, c9, c0, 0" : \
103 : "r" (X))
104 /* FMXR <X>, fpinst2 */
105 #define write_fpinst2(X) __asm __volatile("mcr p10, 7, %0, c10, c0, 0" : \
106 : "r" (X))
107
108 #ifdef FPU_VFP
109
110 /* FLDMD <X>, {d0-d15} */
111 static inline void
112 load_vfpregs_lo(uint64_t *p)
113 {
114 /* vldmia rN, {d0-d15} */
115 __asm __volatile("ldc\tp11, c0, [%0], {32}" :: "r" (p) : "memory");
116 }
117
118 /* FSTMD <X>, {d0-d15} */
119 static inline void
120 save_vfpregs_lo(uint64_t *p)
121 {
122 __asm __volatile("stc\tp11, c0, [%0], {32}" :: "r" (p) : "memory");
123 }
124
125 #ifdef CPU_CORTEX
126 /* FLDMD <X>, {d16-d31} */
127 static inline void
128 load_vfpregs_hi(uint64_t *p)
129 {
130 __asm __volatile("ldcl\tp11, c0, [%0], {32}" :: "r" (&p[16]) : "memory");
131 }
132
133 /* FLDMD <X>, {d16-d31} */
134 static inline void
135 save_vfpregs_hi(uint64_t *p)
136 {
137 __asm __volatile("stcl\tp11, c0, [%0], {32}" :: "r" (&p[16]) : "memory");
138 }
139 #endif
140
141 /* The real handler for VFP bounces. */
142 static int vfp_handler(u_int, u_int, trapframe_t *, int);
143 static int vfp_handler(u_int, u_int, trapframe_t *, int);
144
145 static void vfp_state_load(lwp_t *, bool);
146 static void vfp_state_save(lwp_t *);
147 static void vfp_state_release(lwp_t *);
148
149 const pcu_ops_t arm_vfp_ops = {
150 .pcu_id = PCU_FPU,
151 .pcu_state_load = vfp_state_load,
152 .pcu_state_save = vfp_state_save,
153 .pcu_state_release = vfp_state_release,
154 };
155
156 struct evcnt vfpevent_use;
157 struct evcnt vfpevent_reuse;
158
159 /*
160 * Used to test for a VFP. The following function is installed as a coproc10
161 * handler on the undefined instruction vector and then we issue a VFP
162 * instruction. If undefined_test is non zero then the VFP did not handle
163 * the instruction so must be absent, or disabled.
164 */
165
166 static int undefined_test;
167
168 static int
169 vfp_test(u_int address, u_int insn, trapframe_t *frame, int fault_code)
170 {
171
172 frame->tf_pc += INSN_SIZE;
173 ++undefined_test;
174 return 0;
175 }
176
177 #endif /* FPU_VFP */
178
179 struct evcnt vfp_fpscr_ev =
180 EVCNT_INITIALIZER(EVCNT_TYPE_TRAP, NULL, "VFP", "FPSCR traps");
181 EVCNT_ATTACH_STATIC(vfp_fpscr_ev);
182
183 static int
184 vfp_fpscr_handler(u_int address, u_int insn, trapframe_t *frame, int fault_code)
185 {
186 struct lwp * const l = curlwp;
187 const u_int regno = (insn >> 12) & 0xf;
188 /*
189 * Only match move to/from the FPSCR register and we
190 * can't be using the SP,LR,PC as a source.
191 */
192 if ((insn & 0xffef0fff) != 0xeee10a10 || regno > 12)
193 return 1;
194
195 struct pcb * const pcb = lwp_getpcb(l);
196
197 #ifdef FPU_VFP
198 /*
199 * If FPU is valid somewhere, let's just reenable VFP and
200 * retry the instruction (only safe thing to do since the
201 * pcb has a stale copy).
202 */
203 if (pcb->pcb_vfp.vfp_fpexc & VFP_FPEXC_EN)
204 return 1;
205 #endif
206
207 if (__predict_false((l->l_md.md_flags & MDLWP_VFPUSED) == 0)) {
208 l->l_md.md_flags |= MDLWP_VFPUSED;
209 pcb->pcb_vfp.vfp_fpscr =
210 (VFP_FPSCR_DN | VFP_FPSCR_FZ); /* Runfast */
211 }
212
213 /*
214 * We know know the pcb has the saved copy.
215 */
216 register_t * const regp = &frame->tf_r0 + regno;
217 if (insn & 0x00100000) {
218 *regp = pcb->pcb_vfp.vfp_fpscr;
219 } else {
220 pcb->pcb_vfp.vfp_fpscr = *regp;
221 }
222
223 vfp_fpscr_ev.ev_count++;
224
225 frame->tf_pc += INSN_SIZE;
226 return 0;
227 }
228
229 #ifndef FPU_VFP
230 /*
231 * If we don't want VFP support, we still need to handle emulating VFP FPSCR
232 * instructions.
233 */
234 void
235 vfp_attach(void)
236 {
237 install_coproc_handler(VFP_COPROC, vfp_fpscr_handler);
238 }
239
240 #else
241 static bool
242 vfp_patch_branch(uintptr_t code, uintptr_t func, uintptr_t newfunc)
243 {
244 for (;; code += sizeof(uint32_t)) {
245 uint32_t insn = *(uint32_t *)code;
246 if ((insn & 0xffd08000) == 0xe8908000) /* ldm ... { pc } */
247 return false;
248 if ((insn & 0xfffffff0) == 0xe12fff10) /* bx rN */
249 return false;
250 if ((insn & 0xf1a0f000) == 0xe1a0f000) /* mov pc, ... */
251 return false;
252 if ((insn >> 25) != 0x75) /* not b/bl insn */
253 continue;
254 intptr_t imm26 = ((int32_t)insn << 8) >> 6;
255 if (code + imm26 + 8 == func) {
256 int32_t imm24 = (newfunc - (code + 8)) >> 2;
257 uint32_t new_insn = (insn & 0xff000000)
258 | (imm24 & 0xffffff);
259 KASSERTMSG((uint32_t)((imm24 >> 24) + 1) <= 1, "%x",
260 ((imm24 >> 24) + 1));
261 *(uint32_t *)code = new_insn;
262 cpu_idcache_wbinv_range(code, sizeof(uint32_t));
263 return true;
264 }
265 }
266 }
267
268 void
269 vfp_attach(void)
270 {
271 struct cpu_info * const ci = curcpu();
272 const char *model = NULL;
273 bool vfp_p = false;
274
275 if (CPU_ID_ARM11_P(curcpu()->ci_arm_cpuid)
276 || CPU_ID_CORTEX_P(curcpu()->ci_arm_cpuid)) {
277 const uint32_t cpacr_vfp = CPACR_CPn(VFP_COPROC);
278 const uint32_t cpacr_vfp2 = CPACR_CPn(VFP_COPROC2);
279
280 /*
281 * We first need to enable access to the coprocessors.
282 */
283 uint32_t cpacr = armreg_cpacr_read();
284 cpacr |= __SHIFTIN(CPACR_ALL, cpacr_vfp);
285 cpacr |= __SHIFTIN(CPACR_ALL, cpacr_vfp2);
286 #if 0
287 if (CPU_ID_CORTEX_P(curcpu()->ci_arm_cpuid)) {
288 /*
289 * Disable access to the upper 16 FP registers and NEON.
290 */
291 cpacr |= CPACR_V7_D32DIS;
292 cpacr |= CPACR_V7_ASEDIS;
293 }
294 #endif
295 armreg_cpacr_write(cpacr);
296
297 /*
298 * If we could enable them, then they exist.
299 */
300 cpacr = armreg_cpacr_read();
301 vfp_p = __SHIFTOUT(cpacr, cpacr_vfp2) != CPACR_NOACCESS
302 || __SHIFTOUT(cpacr, cpacr_vfp) != CPACR_NOACCESS;
303 }
304
305 void *uh = install_coproc_handler(VFP_COPROC, vfp_test);
306
307 undefined_test = 0;
308
309 const uint32_t fpsid = read_fpsid();
310
311 remove_coproc_handler(uh);
312
313 if (undefined_test != 0) {
314 aprint_normal_dev(ci->ci_dev, "No VFP detected\n");
315 install_coproc_handler(VFP_COPROC, vfp_fpscr_handler);
316 ci->ci_vfp_id = 0;
317 return;
318 }
319
320 ci->ci_vfp_id = fpsid;
321 switch (fpsid & ~ VFP_FPSID_REV_MSK) {
322 case FPU_VFP10_ARM10E:
323 model = "VFP10 R1";
324 break;
325 case FPU_VFP11_ARM11:
326 model = "VFP11";
327 break;
328 case FPU_VFP_CORTEXA5:
329 case FPU_VFP_CORTEXA7:
330 case FPU_VFP_CORTEXA8:
331 case FPU_VFP_CORTEXA9:
332 model = "NEON MPE (VFP 3.0+)";
333 break;
334 default:
335 aprint_normal_dev(ci->ci_dev, "unrecognized VFP version %x\n",
336 fpsid);
337 install_coproc_handler(VFP_COPROC, vfp_fpscr_handler);
338 return;
339 }
340
341 if (fpsid != 0) {
342 aprint_normal("vfp%d at %s: %s\n",
343 device_unit(curcpu()->ci_dev), device_xname(curcpu()->ci_dev),
344 model);
345 }
346 evcnt_attach_dynamic(&vfpevent_use, EVCNT_TYPE_MISC, NULL,
347 "VFP", "coproc use");
348 evcnt_attach_dynamic(&vfpevent_reuse, EVCNT_TYPE_MISC, NULL,
349 "VFP", "coproc re-use");
350 install_coproc_handler(VFP_COPROC, vfp_handler);
351 install_coproc_handler(VFP_COPROC2, vfp_handler);
352
353 vfp_patch_branch((uintptr_t)pmap_copy_page_generic,
354 (uintptr_t)bcopy_page, (uintptr_t)bcopy_page_vfp);
355 vfp_patch_branch((uintptr_t)pmap_zero_page_generic,
356 (uintptr_t)bzero_page, (uintptr_t)bzero_page_vfp);
357 }
358
359 /* The real handler for VFP bounces. */
360 static int
361 vfp_handler(u_int address, u_int insn, trapframe_t *frame,
362 int fault_code)
363 {
364 struct cpu_info * const ci = curcpu();
365
366 /* This shouldn't ever happen. */
367 if (fault_code != FAULT_USER)
368 panic("VFP fault in non-user mode");
369
370 if (ci->ci_vfp_id == 0)
371 /* No VFP detected, just fault. */
372 return 1;
373
374 /*
375 * If we are just changing/fetching FPSCR, don't bother loading it.
376 */
377 if (!vfp_fpscr_handler(address, insn, frame, fault_code))
378 return 0;
379
380 pcu_load(&arm_vfp_ops);
381
382 /* Need to restart the faulted instruction. */
383 // frame->tf_pc -= INSN_SIZE;
384 return 0;
385 }
386
387 static void
388 vfp_state_load(lwp_t *l, bool used)
389 {
390 struct pcb * const pcb = lwp_getpcb(l);
391 struct vfpreg * const fregs = &pcb->pcb_vfp;
392
393 /*
394 * Instrument VFP usage -- if a process has not previously
395 * used the VFP, mark it as having used VFP for the first time,
396 * and count this event.
397 *
398 * If a process has used the VFP, count a "used VFP, and took
399 * a trap to use it again" event.
400 */
401 if (__predict_false((l->l_md.md_flags & MDLWP_VFPUSED) == 0)) {
402 vfpevent_use.ev_count++;
403 l->l_md.md_flags |= MDLWP_VFPUSED;
404 pcb->pcb_vfp.vfp_fpscr =
405 (VFP_FPSCR_DN | VFP_FPSCR_FZ); /* Runfast */
406 } else {
407 vfpevent_reuse.ev_count++;
408 }
409
410 if (fregs->vfp_fpexc & VFP_FPEXC_EN) {
411 /*
412 * If we think the VFP is enabled, it must have be disabled by
413 * vfp_state_release for another LWP so we can just restore
414 * FPEXC and return since our VFP state is still loaded.
415 */
416 write_fpexc(fregs->vfp_fpexc);
417 return;
418 }
419
420 /* Enable the VFP (so that we can write the registers). */
421 uint32_t fpexc = read_fpexc();
422 KDASSERT((fpexc & VFP_FPEXC_EX) == 0);
423 write_fpexc(fpexc | VFP_FPEXC_EN);
424
425 load_vfpregs_lo(fregs->vfp_regs);
426 #ifdef CPU_CORTEX
427 #ifdef CPU_ARM11
428 switch (curcpu()->ci_vfp_id) {
429 case FPU_VFP_CORTEXA5:
430 case FPU_VFP_CORTEXA7:
431 case FPU_VFP_CORTEXA8:
432 case FPU_VFP_CORTEXA9:
433 #endif
434 load_vfpregs_hi(fregs->vfp_regs);
435 #ifdef CPU_ARM11
436 break;
437 }
438 #endif
439 #endif
440 write_fpscr(fregs->vfp_fpscr);
441
442 if (fregs->vfp_fpexc & VFP_FPEXC_EX) {
443 struct cpu_info * const ci = curcpu();
444 /* Need to restore the exception handling state. */
445 switch (ci->ci_vfp_id) {
446 case FPU_VFP10_ARM10E:
447 case FPU_VFP11_ARM11:
448 case FPU_VFP_CORTEXA5:
449 case FPU_VFP_CORTEXA7:
450 case FPU_VFP_CORTEXA8:
451 case FPU_VFP_CORTEXA9:
452 write_fpinst2(fregs->vfp_fpinst2);
453 write_fpinst(fregs->vfp_fpinst);
454 break;
455 default:
456 panic("%s: Unsupported VFP %#x",
457 __func__, ci->ci_vfp_id);
458 }
459 }
460
461 /* Finally, restore the FPEXC but don't enable the VFP. */
462 fregs->vfp_fpexc |= VFP_FPEXC_EN;
463 write_fpexc(fregs->vfp_fpexc);
464 }
465
466 void
467 vfp_state_save(lwp_t *l)
468 {
469 struct pcb * const pcb = lwp_getpcb(l);
470 struct vfpreg * const fregs = &pcb->pcb_vfp;
471
472 /*
473 * If it's already disabled, then the state has been saved
474 * (or discarded).
475 */
476 if ((fregs->vfp_fpexc & VFP_FPEXC_EN) == 0)
477 return;
478
479 /*
480 * Enable the VFP (so we can read the registers).
481 * Make sure the exception bit is cleared so that we can
482 * safely dump the registers.
483 */
484 uint32_t fpexc = read_fpexc();
485 write_fpexc((fpexc | VFP_FPEXC_EN) & ~VFP_FPEXC_EX);
486
487 fregs->vfp_fpexc = fpexc;
488 if (fpexc & VFP_FPEXC_EX) {
489 struct cpu_info * const ci = curcpu();
490 /* Need to save the exception handling state */
491 switch (ci->ci_vfp_id) {
492 case FPU_VFP10_ARM10E:
493 case FPU_VFP11_ARM11:
494 case FPU_VFP_CORTEXA5:
495 case FPU_VFP_CORTEXA7:
496 case FPU_VFP_CORTEXA8:
497 case FPU_VFP_CORTEXA9:
498 fregs->vfp_fpinst = read_fpinst();
499 fregs->vfp_fpinst2 = read_fpinst2();
500 break;
501 default:
502 panic("%s: Unsupported VFP %#x",
503 __func__, ci->ci_vfp_id);
504 }
505 }
506 fregs->vfp_fpscr = read_fpscr();
507 save_vfpregs_lo(fregs->vfp_regs);
508 #ifdef CPU_CORTEX
509 #ifdef CPU_ARM11
510 switch (curcpu()->ci_vfp_id) {
511 case FPU_VFP_CORTEXA5:
512 case FPU_VFP_CORTEXA7:
513 case FPU_VFP_CORTEXA8:
514 case FPU_VFP_CORTEXA9:
515 #endif
516 save_vfpregs_hi(fregs->vfp_regs);
517 #ifdef CPU_ARM11
518 break;
519 }
520 #endif
521 #endif
522
523 /* Disable the VFP. */
524 write_fpexc(fpexc);
525 }
526
527 void
528 vfp_state_release(lwp_t *l)
529 {
530 struct pcb * const pcb = lwp_getpcb(l);
531
532 /*
533 * Now mark the VFP as disabled (and our state has been already
534 * saved or is being discarded).
535 */
536 pcb->pcb_vfp.vfp_fpexc &= ~VFP_FPEXC_EN;
537
538 /*
539 * Turn off the FPU so the next time a VFP instruction is issued
540 * an exception happens. We don't know if this LWP's state was
541 * loaded but if we turned off the FPU for some other LWP, when
542 * pcu_load invokes vfp_state_load it will see that VFP_FPEXC_EN
543 * is still set so it just restroe fpexc and return since its
544 * contents are still sitting in the VFP.
545 */
546 write_fpexc(read_fpexc() & ~VFP_FPEXC_EN);
547 }
548
549 void
550 vfp_savecontext(void)
551 {
552 pcu_save(&arm_vfp_ops);
553 }
554
555 void
556 vfp_discardcontext(void)
557 {
558 pcu_discard(&arm_vfp_ops);
559 }
560
561 void
562 vfp_getcontext(struct lwp *l, mcontext_t *mcp, int *flagsp)
563 {
564 if (l->l_md.md_flags & MDLWP_VFPUSED) {
565 const struct pcb * const pcb = lwp_getpcb(l);
566 pcu_save(&arm_vfp_ops);
567 mcp->__fpu.__vfpregs.__vfp_fpscr = pcb->pcb_vfp.vfp_fpscr;
568 memcpy(mcp->__fpu.__vfpregs.__vfp_fstmx, pcb->pcb_vfp.vfp_regs,
569 sizeof(mcp->__fpu.__vfpregs.__vfp_fstmx));
570 *flagsp |= _UC_FPU|_UC_ARM_VFP;
571 }
572 }
573
574 void
575 vfp_setcontext(struct lwp *l, const mcontext_t *mcp)
576 {
577 pcu_discard(&arm_vfp_ops);
578 struct pcb * const pcb = lwp_getpcb(l);
579 l->l_md.md_flags |= MDLWP_VFPUSED;
580 pcb->pcb_vfp.vfp_fpscr = mcp->__fpu.__vfpregs.__vfp_fpscr;
581 memcpy(pcb->pcb_vfp.vfp_regs, mcp->__fpu.__vfpregs.__vfp_fstmx,
582 sizeof(mcp->__fpu.__vfpregs.__vfp_fstmx));
583 }
584
585 #endif /* FPU_VFP */
586