fpu.c revision 1.10 1 /* $NetBSD: fpu.c,v 1.10 2005/05/31 16:11:58 chs Exp $ */
2
3 /*
4 * Copyright (c) 2002 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthew Fredette.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * FPU handling for NetBSD/hppa.
41 */
42
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.10 2005/05/31 16:11:58 chs Exp $");
45
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/proc.h>
49 #include <sys/signalvar.h>
50 #include <sys/user.h>
51
52 #include <machine/cpu.h>
53 #include <machine/cpufunc.h>
54 #include <machine/frame.h>
55 #include <machine/reg.h>
56
57 #include <hppa/hppa/machdep.h>
58
59 #include "../spmath/float.h"
60 #include "../spmath/fpudispatch.h"
61
62 /* Some macros representing opcodes. */
63 #define OPCODE_NOP 0x08000240
64 #define OPCODE_COPR_0_0 0x30000000
65
66 /* Some macros representing fields in load/store opcodes. */
67 #define OPCODE_CMPLT_S 0x00002000
68 #define OPCODE_CMPLT_M 0x00000020
69 #define OPCODE_CMPLT_SM (OPCODE_CMPLT_S | OPCODE_CMPLT_M)
70 #define OPCODE_CMPLT_MB OPCODE_CMPLT_M
71 #define OPCODE_CMPLT_MA (OPCODE_CMPLT_S | OPCODE_CMPLT_M)
72 #define OPCODE_CMPLT (OPCODE_CMPLT_S | OPCODE_CMPLT_M)
73 #define OPCODE_DOUBLE 0x08000000
74 #define OPCODE_STORE 0x00000200
75 #define OPCODE_INDEXED 0x00001000
76
77 /* This is nonzero iff we're using a hardware FPU. */
78 int fpu_present;
79
80 /* If we have any FPU, this is its version. */
81 u_int fpu_version;
82
83 /* The number of times we have had to switch the FPU context. */
84 u_int fpu_csw;
85
86 /* The U-space physical address of the proc in the FPU, or zero. */
87 paddr_t fpu_cur_uspace;
88
89 /* In locore.S, this swaps states in and out of the FPU. */
90 void hppa_fpu_swap(struct pcb *, struct pcb *);
91
92 #ifdef FPEMUL
93 /*
94 * Given a trapframe and a general register number, the
95 * FRAME_REG macro returns a pointer to that general
96 * register. The _frame_reg_positions array is a lookup
97 * table, since the general registers aren't in order
98 * in a trapframe.
99 *
100 * NB: this more or less assumes that all members of
101 * struct trapframe are u_ints.
102 */
103 #define FRAME_REG(f, reg, r0) \
104 ((reg) == 0 ? (&r0) : ((&(f)->tf_t1) + _frame_reg_positions[reg]))
105 #define _FRAME_POSITION(f) \
106 ((&((struct trapframe *) 0)->f) - (&((struct trapframe *) 0)->tf_t1))
107 const int _frame_reg_positions[32] = {
108 -1, /* r0 */
109 _FRAME_POSITION(tf_r1),
110 _FRAME_POSITION(tf_rp), /* r2 */
111 _FRAME_POSITION(tf_r3),
112 _FRAME_POSITION(tf_r4),
113 _FRAME_POSITION(tf_r5),
114 _FRAME_POSITION(tf_r6),
115 _FRAME_POSITION(tf_r7),
116 _FRAME_POSITION(tf_r8),
117 _FRAME_POSITION(tf_r9),
118 _FRAME_POSITION(tf_r10),
119 _FRAME_POSITION(tf_r11),
120 _FRAME_POSITION(tf_r12),
121 _FRAME_POSITION(tf_r13),
122 _FRAME_POSITION(tf_r14),
123 _FRAME_POSITION(tf_r15),
124 _FRAME_POSITION(tf_r16),
125 _FRAME_POSITION(tf_r17),
126 _FRAME_POSITION(tf_r18),
127 _FRAME_POSITION(tf_t4), /* r19 */
128 _FRAME_POSITION(tf_t3), /* r20 */
129 _FRAME_POSITION(tf_t2), /* r21 */
130 _FRAME_POSITION(tf_t1), /* r22 */
131 _FRAME_POSITION(tf_arg3), /* r23 */
132 _FRAME_POSITION(tf_arg2), /* r24 */
133 _FRAME_POSITION(tf_arg1), /* r25 */
134 _FRAME_POSITION(tf_arg0), /* r26 */
135 _FRAME_POSITION(tf_dp), /* r27 */
136 _FRAME_POSITION(tf_ret0), /* r28 */
137 _FRAME_POSITION(tf_ret1), /* r29 */
138 _FRAME_POSITION(tf_sp), /* r30 */
139 _FRAME_POSITION(tf_r31),
140 };
141 #endif /* FPEMUL */
142
143 /*
144 * Bootstraps the FPU.
145 */
146 void
147 hppa_fpu_bootstrap(u_int ccr_enable)
148 {
149 u_int32_t junk[2];
150 u_int32_t vers[2];
151 extern u_int hppa_fpu_nop0;
152 extern u_int hppa_fpu_nop1;
153
154 /* See if we have a present and functioning hardware FPU. */
155 fpu_present = (ccr_enable & HPPA_FPUS) == HPPA_FPUS;
156
157 /* Initialize the FPU and get its version. */
158 if (fpu_present) {
159
160 /*
161 * To somewhat optimize the emulation
162 * assist trap handling and context
163 * switching (to save them from having
164 * to always load and check fpu_present),
165 * there are two instructions in locore.S
166 * that are replaced with nops when
167 * there is a hardware FPU.
168 */
169 hppa_fpu_nop0 = OPCODE_NOP;
170 hppa_fpu_nop1 = OPCODE_NOP;
171 fcacheall();
172
173 /*
174 * We track what process has the FPU,
175 * and how many times we have to swap
176 * in and out.
177 */
178
179 /*
180 * The PA-RISC 1.1 Architecture manual is
181 * pretty clear that the copr,0,0 must be
182 * wrapped in double word stores of fr0,
183 * otherwise its operation is undefined.
184 */
185 __asm __volatile(
186 " ldo %0, %%r22 \n"
187 " fstds %%fr0, 0(%%r22) \n"
188 " ldo %1, %%r22 \n"
189 " copr,0,0 \n"
190 " fstds %%fr0, 0(%%r22) \n"
191 : "=m" (junk), "=m" (vers) : : "r22");
192
193 /*
194 * Now mark that no process has the FPU,
195 * and disable it, so the first time it
196 * gets used the process' state gets
197 * swapped in.
198 */
199 fpu_csw = 0;
200 fpu_cur_uspace = 0;
201 mtctl(ccr_enable & (CCR_MASK ^ HPPA_FPUS), CR_CCR);
202 }
203 #ifdef FPEMUL
204 else
205 /*
206 * XXX This is a hack - to avoid
207 * having to set up the emulator so
208 * it can work for one instruction for
209 * proc0, we dispatch the copr,0,0 opcode
210 * into the emulator directly.
211 */
212 decode_0c(OPCODE_COPR_0_0, 0, 0, vers);
213 #endif /* FPEMUL */
214 fpu_version = vers[0];
215 }
216
217 /*
218 * If the given LWP has its state in the FPU,
219 * flush that state out into the LWP's PCB.
220 */
221 void
222 hppa_fpu_flush(struct lwp *l)
223 {
224 struct trapframe *tf = l->l_md.md_regs;
225
226 /*
227 * If we have a hardware FPU, and this process'
228 * state is currently in it, swap it out.
229 */
230
231 if (!fpu_present || fpu_cur_uspace == 0 ||
232 fpu_cur_uspace != tf->tf_cr30) {
233 return;
234 }
235
236 hppa_fpu_swap(&l->l_addr->u_pcb, NULL);
237 fpu_cur_uspace = 0;
238 }
239
240 #ifdef FPEMUL
241
242 /*
243 * This emulates a coprocessor load/store instruction.
244 */
245 static int hppa_fpu_ls(struct trapframe *, struct lwp *);
246 static int
247 hppa_fpu_ls(struct trapframe *frame, struct lwp *l)
248 {
249 u_int inst, inst_b, inst_x, inst_s, inst_t;
250 int log2size;
251 u_int *base;
252 u_int offset, index, im5;
253 void *fpreg;
254 u_int r0 = 0;
255 int error;
256
257 /*
258 * Get the instruction that we're emulating,
259 * and break it down. Using HP bit notation,
260 * b is a five-bit field starting at bit 10,
261 * x is a five-bit field starting at bit 15,
262 * s is a two-bit field starting at bit 17,
263 * and t is a five-bit field starting at bit 31.
264 */
265 inst = frame->tf_iir;
266 __asm __volatile(
267 " extru %4, 10, 5, %1 \n"
268 " extru %4, 15, 5, %2 \n"
269 " extru %4, 17, 2, %3 \n"
270 " extru %4, 31, 5, %4 \n"
271 : "=r" (inst_b), "=r" (inst_x), "=r" (inst_s), "=r" (inst_t)
272 : "r" (inst));
273
274 /*
275 * The space must be the user's space, else we
276 * segfault.
277 */
278 if (inst_s != l->l_addr->u_pcb.pcb_space)
279 return (EFAULT);
280
281 /* See whether or not this is a doubleword load/store. */
282 log2size = (inst & OPCODE_DOUBLE) ? 3 : 2;
283
284 /* Get the floating point register. */
285 fpreg = ((caddr_t)l->l_addr->u_pcb.pcb_fpregs) + (inst_t << log2size);
286
287 /* Get the base register. */
288 base = FRAME_REG(frame, inst_b, r0);
289
290 /* Dispatch on whether or not this is an indexed load/store. */
291 if (inst & OPCODE_INDEXED) {
292
293 /* Get the index register value. */
294 index = *FRAME_REG(frame, inst_x, r0);
295
296 /* Dispatch on the completer. */
297 switch (inst & OPCODE_CMPLT) {
298 case OPCODE_CMPLT_S:
299 offset = *base + (index << log2size);
300 break;
301 case OPCODE_CMPLT_M:
302 offset = *base;
303 *base = *base + index;
304 break;
305 case OPCODE_CMPLT_SM:
306 offset = *base;
307 *base = *base + (index << log2size);
308 break;
309 default:
310 offset = *base + index;
311 break;
312 }
313 } else {
314
315 /* Do a low_sign_ext(x, 5). */
316 im5 = inst_x >> 1;
317 if (inst_x & 1)
318 im5 |= 0xfffffff0;
319
320 /* Dispatch on the completer. */
321 switch (inst & OPCODE_CMPLT) {
322 case OPCODE_CMPLT_MB:
323 offset = *base + im5;
324 *base = *base + im5;
325 break;
326 case OPCODE_CMPLT_MA:
327 offset = *base;
328 *base = *base + im5;
329 break;
330 default:
331 offset = *base + im5;
332 break;
333 }
334 }
335
336 /*
337 * The offset we calculated must be the same as the
338 * offset in the IOR.
339 */
340 KASSERT(offset == frame->tf_ior);
341
342 /* Perform the load or store. */
343 error = (inst & OPCODE_STORE) ?
344 copyout(fpreg, (void *) offset, 1 << log2size) :
345 copyin((const void *) offset, fpreg, 1 << log2size);
346 fdcache(HPPA_SID_KERNEL, (vaddr_t)fpreg,
347 sizeof(l->l_addr->u_pcb.pcb_fpregs));
348 return error;
349 }
350
351 /*
352 * This is called to emulate an instruction.
353 */
354 void
355 hppa_fpu_emulate(struct trapframe *frame, struct lwp *l, u_int inst)
356 {
357 u_int opcode, class, sub;
358 u_int *fpregs;
359 int exception;
360 ksiginfo_t ksi;
361
362 /*
363 * If the process' state is in any hardware FPU,
364 * flush it out - we need to operate on it.
365 */
366 hppa_fpu_flush(l);
367
368 /*
369 * Get the instruction that we're emulating,
370 * and break it down. Using HP bit notation,
371 * the class is a two-bit field starting at
372 * bit 22, the opcode is a 6-bit field starting
373 * at bit 5, and sub for a class 1 instruction
374 * is a two bit field starting at bit 16, else
375 * it is a three bit field starting at bit 18.
376 */
377 #if 0
378 __asm __volatile(
379 " extru %3, 22, 2, %1 \n"
380 " extru %3, 5, 6, %0 \n"
381 " extru %3, 18, 3, %2 \n"
382 " comib,<> 1, %1, 0 \n"
383 " extru %3, 16, 2, %2 \n"
384 : "=r" (opcode), "=r" (class), "=r" (sub)
385 : "r" (inst));
386 #else
387 opcode = (inst >> (31 - 5)) & 0x3f;
388 class = (inst >> (31 - 22)) & 0x3;
389 if (class == 1) {
390 sub = (inst >> (31 - 16)) & 3;
391 } else {
392 sub = (inst >> (31 - 18)) & 7;
393 }
394 #endif
395
396 /* Get this LWP's FPU registers. */
397 fpregs = (u_int *) l->l_addr->u_pcb.pcb_fpregs;
398
399 /* Dispatch on the opcode. */
400 switch (opcode) {
401 case 0x09:
402 case 0x0b:
403 if (hppa_fpu_ls(frame, l) != 0) {
404 KSI_INIT_TRAP(&ksi);
405 ksi.ksi_signo = SIGSEGV;
406 ksi.ksi_code = SEGV_MAPERR;
407 ksi.ksi_trap = T_DTLBMISS;
408 ksi.ksi_addr = (void *)frame->tf_iioq_head;
409 trapsignal(l, &ksi);
410 }
411 return;
412 case 0x0c:
413 exception = decode_0c(inst, class, sub, fpregs);
414 break;
415 case 0x0e:
416 exception = decode_0e(inst, class, sub, fpregs);
417 break;
418 case 0x06:
419 exception = decode_06(inst, fpregs);
420 break;
421 case 0x26:
422 exception = decode_26(inst, fpregs);
423 break;
424 default:
425 exception = UNIMPLEMENTEDEXCEPTION;
426 break;
427 }
428
429 if (exception) {
430 KSI_INIT_TRAP(&ksi);
431 if (exception & UNIMPLEMENTEDEXCEPTION) {
432 ksi.ksi_signo = SIGILL;
433 ksi.ksi_code = ILL_COPROC;
434 } else {
435 ksi.ksi_signo = SIGFPE;
436 if (exception & INVALIDEXCEPTION) {
437 ksi.ksi_code = FPE_FLTINV;
438 } else if (exception & DIVISIONBYZEROEXCEPTION) {
439 ksi.ksi_code = FPE_FLTDIV;
440 } else if (exception & OVERFLOWEXCEPTION) {
441 ksi.ksi_code = FPE_FLTOVF;
442 } else if (exception & UNDERFLOWEXCEPTION) {
443 ksi.ksi_code = FPE_FLTUND;
444 } else if (exception & INEXACTEXCEPTION) {
445 ksi.ksi_code = FPE_FLTRES;
446 }
447 }
448 ksi.ksi_trap = T_EMULATION;
449 ksi.ksi_addr = (void *)frame->tf_iioq_head;
450 trapsignal(l, &ksi);
451 }
452 fdcache(HPPA_SID_KERNEL, (vaddr_t)fpregs,
453 sizeof(l->l_addr->u_pcb.pcb_fpregs));
454 }
455
456 #endif /* FPEMUL */
457