fpu.c revision 1.16 1 /* $NetBSD: fpu.c,v 1.16 2008/08/28 08:45:26 skrll Exp $ */
2
3 /*
4 * Copyright (c) 2002 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthew Fredette.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * FPU handling for NetBSD/hppa.
34 */
35
36 #include <sys/cdefs.h>
37 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.16 2008/08/28 08:45:26 skrll Exp $");
38
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/proc.h>
42 #include <sys/signalvar.h>
43 #include <sys/user.h>
44
45 #include <machine/cpu.h>
46 #include <machine/cpufunc.h>
47 #include <machine/frame.h>
48 #include <machine/reg.h>
49
50 #include <hppa/hppa/machdep.h>
51
52 #include "../spmath/float.h"
53 #include "../spmath/fpudispatch.h"
54
55 /* Some macros representing opcodes. */
56 #define OPCODE_NOP 0x08000240
57 #define OPCODE_COPR_0_0 0x30000000
58
59 /* Some macros representing fields in load/store opcodes. */
60 #define OPCODE_CMPLT_S 0x00002000
61 #define OPCODE_CMPLT_M 0x00000020
62 #define OPCODE_CMPLT_SM (OPCODE_CMPLT_S | OPCODE_CMPLT_M)
63 #define OPCODE_CMPLT_MB OPCODE_CMPLT_M
64 #define OPCODE_CMPLT_MA (OPCODE_CMPLT_S | OPCODE_CMPLT_M)
65 #define OPCODE_CMPLT (OPCODE_CMPLT_S | OPCODE_CMPLT_M)
66 #define OPCODE_DOUBLE 0x08000000
67 #define OPCODE_STORE 0x00000200
68 #define OPCODE_INDEXED 0x00001000
69
70 /* This is nonzero iff we're using a hardware FPU. */
71 int fpu_present;
72
73 /* If we have any FPU, this is its version. */
74 u_int fpu_version;
75
76 /* The number of times we have had to switch the FPU context. */
77 u_int fpu_csw;
78
79 /* The U-space physical address of the proc in the FPU, or zero. */
80 paddr_t fpu_cur_uspace;
81
82 /* In locore.S, this swaps states in and out of the FPU. */
83 void hppa_fpu_swap(struct pcb *, struct pcb *);
84
85 #ifdef FPEMUL
86 /*
87 * Given a trapframe and a general register number, the
88 * FRAME_REG macro returns a pointer to that general
89 * register. The _frame_reg_positions array is a lookup
90 * table, since the general registers aren't in order
91 * in a trapframe.
92 *
93 * NB: this more or less assumes that all members of
94 * struct trapframe are u_ints.
95 */
96 #define FRAME_REG(f, reg, r0) \
97 ((reg) == 0 ? (&r0) : ((&(f)->tf_t1) + _frame_reg_positions[reg]))
98 #define _FRAME_POSITION(f) \
99 ((&((struct trapframe *) 0)->f) - (&((struct trapframe *) 0)->tf_t1))
100 const int _frame_reg_positions[32] = {
101 -1, /* r0 */
102 _FRAME_POSITION(tf_r1),
103 _FRAME_POSITION(tf_rp), /* r2 */
104 _FRAME_POSITION(tf_r3),
105 _FRAME_POSITION(tf_r4),
106 _FRAME_POSITION(tf_r5),
107 _FRAME_POSITION(tf_r6),
108 _FRAME_POSITION(tf_r7),
109 _FRAME_POSITION(tf_r8),
110 _FRAME_POSITION(tf_r9),
111 _FRAME_POSITION(tf_r10),
112 _FRAME_POSITION(tf_r11),
113 _FRAME_POSITION(tf_r12),
114 _FRAME_POSITION(tf_r13),
115 _FRAME_POSITION(tf_r14),
116 _FRAME_POSITION(tf_r15),
117 _FRAME_POSITION(tf_r16),
118 _FRAME_POSITION(tf_r17),
119 _FRAME_POSITION(tf_r18),
120 _FRAME_POSITION(tf_t4), /* r19 */
121 _FRAME_POSITION(tf_t3), /* r20 */
122 _FRAME_POSITION(tf_t2), /* r21 */
123 _FRAME_POSITION(tf_t1), /* r22 */
124 _FRAME_POSITION(tf_arg3), /* r23 */
125 _FRAME_POSITION(tf_arg2), /* r24 */
126 _FRAME_POSITION(tf_arg1), /* r25 */
127 _FRAME_POSITION(tf_arg0), /* r26 */
128 _FRAME_POSITION(tf_dp), /* r27 */
129 _FRAME_POSITION(tf_ret0), /* r28 */
130 _FRAME_POSITION(tf_ret1), /* r29 */
131 _FRAME_POSITION(tf_sp), /* r30 */
132 _FRAME_POSITION(tf_r31),
133 };
134 #endif /* FPEMUL */
135
136 /*
137 * Bootstraps the FPU.
138 */
139 void
140 hppa_fpu_bootstrap(u_int ccr_enable)
141 {
142 u_int32_t junk[2];
143 u_int32_t vers[2];
144 extern u_int hppa_fpu_nop0;
145 extern u_int hppa_fpu_nop1;
146
147 /* See if we have a present and functioning hardware FPU. */
148 fpu_present = (ccr_enable & HPPA_FPUS) == HPPA_FPUS;
149
150 /* Initialize the FPU and get its version. */
151 if (fpu_present) {
152
153 /*
154 * To somewhat optimize the emulation
155 * assist trap handling and context
156 * switching (to save them from having
157 * to always load and check fpu_present),
158 * there are two instructions in locore.S
159 * that are replaced with nops when
160 * there is a hardware FPU.
161 */
162 hppa_fpu_nop0 = OPCODE_NOP;
163 hppa_fpu_nop1 = OPCODE_NOP;
164 fcacheall();
165
166 /*
167 * We track what process has the FPU,
168 * and how many times we have to swap
169 * in and out.
170 */
171
172 /*
173 * The PA-RISC 1.1 Architecture manual is
174 * pretty clear that the copr,0,0 must be
175 * wrapped in double word stores of fr0,
176 * otherwise its operation is undefined.
177 */
178 __asm volatile(
179 " ldo %0, %%r22 \n"
180 " fstds %%fr0, 0(%%r22) \n"
181 " ldo %1, %%r22 \n"
182 " copr,0,0 \n"
183 " fstds %%fr0, 0(%%r22) \n"
184 : "=m" (junk), "=m" (vers) : : "r22");
185
186 /*
187 * Now mark that no process has the FPU,
188 * and disable it, so the first time it
189 * gets used the process' state gets
190 * swapped in.
191 */
192 fpu_csw = 0;
193 fpu_cur_uspace = 0;
194 mtctl(ccr_enable & (CCR_MASK ^ HPPA_FPUS), CR_CCR);
195 }
196 #ifdef FPEMUL
197 else
198 /*
199 * XXX This is a hack - to avoid
200 * having to set up the emulator so
201 * it can work for one instruction for
202 * proc0, we dispatch the copr,0,0 opcode
203 * into the emulator directly.
204 */
205 decode_0c(OPCODE_COPR_0_0, 0, 0, vers);
206 #endif /* FPEMUL */
207 fpu_version = vers[0];
208 }
209
210 /*
211 * If the given LWP has its state in the FPU,
212 * flush that state out into the LWP's PCB.
213 */
214 void
215 hppa_fpu_flush(struct lwp *l)
216 {
217 struct trapframe *tf = l->l_md.md_regs;
218
219 /*
220 * If we have a hardware FPU, and this process'
221 * state is currently in it, swap it out.
222 */
223
224 if (!fpu_present || fpu_cur_uspace == 0 ||
225 fpu_cur_uspace != tf->tf_cr30) {
226 return;
227 }
228
229 hppa_fpu_swap(&l->l_addr->u_pcb, NULL);
230 fpu_cur_uspace = 0;
231 }
232
233 #ifdef FPEMUL
234
235 /*
236 * This emulates a coprocessor load/store instruction.
237 */
238 static int hppa_fpu_ls(struct trapframe *, struct lwp *);
239 static int
240 hppa_fpu_ls(struct trapframe *frame, struct lwp *l)
241 {
242 u_int inst, inst_b, inst_x, inst_s, inst_t;
243 int log2size;
244 u_int *base;
245 u_int offset, index, im5;
246 void *fpreg;
247 u_int r0 = 0;
248 int error;
249
250 /*
251 * Get the instruction that we're emulating,
252 * and break it down. Using HP bit notation,
253 * b is a five-bit field starting at bit 10,
254 * x is a five-bit field starting at bit 15,
255 * s is a two-bit field starting at bit 17,
256 * and t is a five-bit field starting at bit 31.
257 */
258 inst = frame->tf_iir;
259 __asm volatile(
260 " extru %4, 10, 5, %1 \n"
261 " extru %4, 15, 5, %2 \n"
262 " extru %4, 17, 2, %3 \n"
263 " extru %4, 31, 5, %4 \n"
264 : "=r" (inst_b), "=r" (inst_x), "=r" (inst_s), "=r" (inst_t)
265 : "r" (inst));
266
267 /*
268 * The space must be the user's space, else we
269 * segfault.
270 */
271 if (inst_s != l->l_addr->u_pcb.pcb_space)
272 return (EFAULT);
273
274 /* See whether or not this is a doubleword load/store. */
275 log2size = (inst & OPCODE_DOUBLE) ? 3 : 2;
276
277 /* Get the floating point register. */
278 fpreg = ((char *)l->l_addr->u_pcb.pcb_fpregs) + (inst_t << log2size);
279
280 /* Get the base register. */
281 base = FRAME_REG(frame, inst_b, r0);
282
283 /* Dispatch on whether or not this is an indexed load/store. */
284 if (inst & OPCODE_INDEXED) {
285
286 /* Get the index register value. */
287 index = *FRAME_REG(frame, inst_x, r0);
288
289 /* Dispatch on the completer. */
290 switch (inst & OPCODE_CMPLT) {
291 case OPCODE_CMPLT_S:
292 offset = *base + (index << log2size);
293 break;
294 case OPCODE_CMPLT_M:
295 offset = *base;
296 *base = *base + index;
297 break;
298 case OPCODE_CMPLT_SM:
299 offset = *base;
300 *base = *base + (index << log2size);
301 break;
302 default:
303 offset = *base + index;
304 break;
305 }
306 } else {
307
308 /* Do a low_sign_ext(x, 5). */
309 im5 = inst_x >> 1;
310 if (inst_x & 1)
311 im5 |= 0xfffffff0;
312
313 /* Dispatch on the completer. */
314 switch (inst & OPCODE_CMPLT) {
315 case OPCODE_CMPLT_MB:
316 offset = *base + im5;
317 *base = *base + im5;
318 break;
319 case OPCODE_CMPLT_MA:
320 offset = *base;
321 *base = *base + im5;
322 break;
323 default:
324 offset = *base + im5;
325 break;
326 }
327 }
328
329 /*
330 * The offset we calculated must be the same as the
331 * offset in the IOR.
332 */
333 KASSERT(offset == frame->tf_ior);
334
335 /* Perform the load or store. */
336 error = (inst & OPCODE_STORE) ?
337 copyout(fpreg, (void *) offset, 1 << log2size) :
338 copyin((const void *) offset, fpreg, 1 << log2size);
339 fdcache(HPPA_SID_KERNEL, (vaddr_t)fpreg,
340 sizeof(l->l_addr->u_pcb.pcb_fpregs));
341 return error;
342 }
343
344 /*
345 * This is called to emulate an instruction.
346 */
347 void
348 hppa_fpu_emulate(struct trapframe *frame, struct lwp *l, u_int inst)
349 {
350 u_int opcode, class, sub;
351 u_int *fpregs;
352 int exception;
353 ksiginfo_t ksi;
354
355 /*
356 * If the process' state is in any hardware FPU,
357 * flush it out - we need to operate on it.
358 */
359 hppa_fpu_flush(l);
360
361 /*
362 * Get the instruction that we're emulating,
363 * and break it down. Using HP bit notation,
364 * the class is a two-bit field starting at
365 * bit 22, the opcode is a 6-bit field starting
366 * at bit 5, and sub for a class 1 instruction
367 * is a two bit field starting at bit 16, else
368 * it is a three bit field starting at bit 18.
369 */
370 #if 0
371 __asm volatile(
372 " extru %3, 22, 2, %1 \n"
373 " extru %3, 5, 6, %0 \n"
374 " extru %3, 18, 3, %2 \n"
375 " comib,<> 1, %1, 0 \n"
376 " extru %3, 16, 2, %2 \n"
377 : "=r" (opcode), "=r" (class), "=r" (sub)
378 : "r" (inst));
379 #else
380 opcode = (inst >> (31 - 5)) & 0x3f;
381 class = (inst >> (31 - 22)) & 0x3;
382 if (class == 1) {
383 sub = (inst >> (31 - 16)) & 3;
384 } else {
385 sub = (inst >> (31 - 18)) & 7;
386 }
387 #endif
388
389 /* Get this LWP's FPU registers. */
390 fpregs = (u_int *) l->l_addr->u_pcb.pcb_fpregs;
391
392 /* Dispatch on the opcode. */
393 switch (opcode) {
394 case 0x09:
395 case 0x0b:
396 if (hppa_fpu_ls(frame, l) != 0) {
397 KSI_INIT_TRAP(&ksi);
398 ksi.ksi_signo = SIGSEGV;
399 ksi.ksi_code = SEGV_MAPERR;
400 ksi.ksi_trap = T_DTLBMISS;
401 ksi.ksi_addr = (void *)frame->tf_iioq_head;
402 trapsignal(l, &ksi);
403 }
404 return;
405 case 0x0c:
406 exception = decode_0c(inst, class, sub, fpregs);
407 break;
408 case 0x0e:
409 exception = decode_0e(inst, class, sub, fpregs);
410 break;
411 case 0x06:
412 exception = decode_06(inst, fpregs);
413 break;
414 case 0x26:
415 exception = decode_26(inst, fpregs);
416 break;
417 default:
418 exception = UNIMPLEMENTEDEXCEPTION;
419 break;
420 }
421
422 if (exception) {
423 KSI_INIT_TRAP(&ksi);
424 if (exception & UNIMPLEMENTEDEXCEPTION) {
425 ksi.ksi_signo = SIGILL;
426 ksi.ksi_code = ILL_COPROC;
427 } else {
428 ksi.ksi_signo = SIGFPE;
429 if (exception & INVALIDEXCEPTION) {
430 ksi.ksi_code = FPE_FLTINV;
431 } else if (exception & DIVISIONBYZEROEXCEPTION) {
432 ksi.ksi_code = FPE_FLTDIV;
433 } else if (exception & OVERFLOWEXCEPTION) {
434 ksi.ksi_code = FPE_FLTOVF;
435 } else if (exception & UNDERFLOWEXCEPTION) {
436 ksi.ksi_code = FPE_FLTUND;
437 } else if (exception & INEXACTEXCEPTION) {
438 ksi.ksi_code = FPE_FLTRES;
439 }
440 }
441 ksi.ksi_trap = T_EMULATION;
442 ksi.ksi_addr = (void *)frame->tf_iioq_head;
443 trapsignal(l, &ksi);
444 }
445 fdcache(HPPA_SID_KERNEL, (vaddr_t)fpregs,
446 sizeof(l->l_addr->u_pcb.pcb_fpregs));
447 }
448
449 #endif /* FPEMUL */
450