Home | History | Annotate | Line # | Download | only in hppa
fpu.c revision 1.24
      1 /*	$NetBSD: fpu.c,v 1.24 2012/04/06 12:21:59 skrll Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 2002 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Matthew Fredette.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * FPU handling for NetBSD/hppa.
     34  */
     35 
     36 #include <sys/cdefs.h>
     37 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.24 2012/04/06 12:21:59 skrll Exp $");
     38 
     39 #include <sys/param.h>
     40 #include <sys/systm.h>
     41 #include <sys/proc.h>
     42 #include <sys/signalvar.h>
     43 
     44 #include <uvm/uvm_extern.h>
     45 
     46 #include <machine/cpufunc.h>
     47 #include <machine/frame.h>
     48 #include <machine/reg.h>
     49 #include <machine/pcb.h>
     50 #include <machine/pmap.h>
     51 
     52 #include <hppa/hppa/machdep.h>
     53 
     54 #include "../spmath/float.h"
     55 #include "../spmath/fpudispatch.h"
     56 
     57 /* Some macros representing opcodes. */
     58 #define OPCODE_NOP	0x08000240
     59 #define OPCODE_COPR_0_0	0x30000000
     60 
     61 /* Some macros representing fields in load/store opcodes. */
     62 #define	OPCODE_CMPLT_S	0x00002000
     63 #define	OPCODE_CMPLT_M	0x00000020
     64 #define	OPCODE_CMPLT_SM	(OPCODE_CMPLT_S | OPCODE_CMPLT_M)
     65 #define	OPCODE_CMPLT_MB	OPCODE_CMPLT_M
     66 #define	OPCODE_CMPLT_MA	(OPCODE_CMPLT_S | OPCODE_CMPLT_M)
     67 #define	OPCODE_CMPLT	(OPCODE_CMPLT_S | OPCODE_CMPLT_M)
     68 #define	OPCODE_DOUBLE	0x08000000
     69 #define	OPCODE_STORE	0x00000200
     70 #define OPCODE_INDEXED	0x00001000
     71 
     72 /* This is nonzero iff we're using a hardware FPU. */
     73 int fpu_present;
     74 
     75 /* If we have any FPU, this is its version. */
     76 u_int fpu_version;
     77 
     78 /* The number of times we have had to switch the FPU context. */
     79 u_int fpu_csw;
     80 
     81 /* In locore.S, this swaps states in and out of the FPU. */
     82 void hppa_fpu_swapout(struct pcb *);
     83 void hppa_fpu_swap(struct fpreg *, struct fpreg *);
     84 
     85 static int hppa_fpu_ls(struct trapframe *, struct lwp *);
     86 
     87 /*
     88  * Given a trapframe and a general register number, the
     89  * FRAME_REG macro returns a pointer to that general
     90  * register.  The _frame_reg_positions array is a lookup
     91  * table, since the general registers aren't in order
     92  * in a trapframe.
     93  *
     94  * NB: this more or less assumes that all members of
     95  * struct trapframe are u_ints.
     96  */
     97 #define FRAME_REG(f, reg, r0)	\
     98 	((reg) == 0 ? (&r0) : ((&(f)->tf_t1) + _frame_reg_positions[reg]))
     99 #define _FRAME_POSITION(f)	\
    100 	((&((struct trapframe *) 0)->f) - (&((struct trapframe *) 0)->tf_t1))
    101 const int _frame_reg_positions[32] = {
    102 	-1,				/* r0 */
    103 	_FRAME_POSITION(tf_r1),
    104 	_FRAME_POSITION(tf_rp),		/* r2 */
    105 	_FRAME_POSITION(tf_r3),
    106 	_FRAME_POSITION(tf_r4),
    107 	_FRAME_POSITION(tf_r5),
    108 	_FRAME_POSITION(tf_r6),
    109 	_FRAME_POSITION(tf_r7),
    110 	_FRAME_POSITION(tf_r8),
    111 	_FRAME_POSITION(tf_r9),
    112 	_FRAME_POSITION(tf_r10),
    113 	_FRAME_POSITION(tf_r11),
    114 	_FRAME_POSITION(tf_r12),
    115 	_FRAME_POSITION(tf_r13),
    116 	_FRAME_POSITION(tf_r14),
    117 	_FRAME_POSITION(tf_r15),
    118 	_FRAME_POSITION(tf_r16),
    119 	_FRAME_POSITION(tf_r17),
    120 	_FRAME_POSITION(tf_r18),
    121 	_FRAME_POSITION(tf_t4),		/* r19 */
    122 	_FRAME_POSITION(tf_t3),		/* r20 */
    123 	_FRAME_POSITION(tf_t2),		/* r21 */
    124 	_FRAME_POSITION(tf_t1),		/* r22 */
    125 	_FRAME_POSITION(tf_arg3),	/* r23 */
    126 	_FRAME_POSITION(tf_arg2),	/* r24 */
    127 	_FRAME_POSITION(tf_arg1),	/* r25 */
    128 	_FRAME_POSITION(tf_arg0),	/* r26 */
    129 	_FRAME_POSITION(tf_dp),		/* r27 */
    130 	_FRAME_POSITION(tf_ret0),	/* r28 */
    131 	_FRAME_POSITION(tf_ret1),	/* r29 */
    132 	_FRAME_POSITION(tf_sp),		/* r30 */
    133 	_FRAME_POSITION(tf_r31),
    134 };
    135 
    136 /*
    137  * Bootstraps the FPU.
    138  */
    139 void
    140 hppa_fpu_bootstrap(u_int ccr_enable)
    141 {
    142 	uint32_t junk[2];
    143 	uint32_t vers[2];
    144 
    145 	/* See if we have a present and functioning hardware FPU. */
    146 	fpu_present = (ccr_enable & HPPA_FPUS) == HPPA_FPUS;
    147 
    148 	KASSERT(fpu_present);
    149 	/* Initialize the FPU and get its version. */
    150 
    151 	/*
    152 	 * We track what process has the FPU,
    153 	 * and how many times we have to swap
    154 	 * in and out.
    155 	 */
    156 
    157 	/*
    158 	 * The PA-RISC 1.1 Architecture manual is
    159 	 * pretty clear that the copr,0,0 must be
    160 	 * wrapped in double word stores of fr0,
    161 	 * otherwise its operation is undefined.
    162 	 */
    163 	__asm volatile(
    164 		"	ldo	%0, %%r22	\n"
    165 		"	fstds	%%fr0, 0(%%r22)	\n"
    166 		"	ldo	%1, %%r22	\n"
    167 		"	copr,0,0		\n"
    168 		"	fstds	%%fr0, 0(%%r22)	\n"
    169 		: "=m" (junk), "=m" (vers) : : "r22");
    170 
    171 	/*
    172 	 * Now mark that no process has the FPU,
    173 	 * and disable it, so the first time it
    174 	 * gets used the process' state gets
    175 	 * swapped in.
    176 	 */
    177 	fpu_csw = 0;
    178 	curcpu()->ci_fpu_state = 0;
    179 	mtctl(ccr_enable & (CCR_MASK ^ HPPA_FPUS), CR_CCR);
    180 
    181 	fpu_version = vers[0];
    182 }
    183 
    184 /*
    185  * If the given LWP has its state in the FPU,
    186  * flush that state out into the LWP's PCB.
    187  */
    188 void
    189 hppa_fpu_flush(struct lwp *l)
    190 {
    191 	struct trapframe *tf = l->l_md.md_regs;
    192 	struct pcb *pcb = lwp_getpcb(l);
    193 	struct cpu_info *ci = curcpu();
    194 
    195 	KASSERT(fpu_present);
    196 
    197 	/*
    198 	 * If this process' state is currently in hardware, swap it out.
    199 	 */
    200 
    201 	if (ci->ci_fpu_state == 0 ||
    202 	    ci->ci_fpu_state != tf->tf_cr30) {
    203 		return;
    204 	}
    205 
    206 	hppa_fpu_swapout(pcb);
    207 	ci->ci_fpu_state = 0;
    208 }
    209 
    210 /*
    211  * This emulates a coprocessor load/store instruction.
    212  */
    213 static int
    214 hppa_fpu_ls(struct trapframe *frame, struct lwp *l)
    215 {
    216 	struct pcb *pcb = lwp_getpcb(l);
    217 	u_int inst, inst_b, inst_x, inst_s, inst_t;
    218 	int log2size;
    219 	u_int *base;
    220 	u_int offset, index, im5;
    221 	void *fpreg;
    222 	u_int r0 = 0;
    223 	int error;
    224 
    225 	/*
    226 	 * Get the instruction that we're emulating,
    227 	 * and break it down.  Using HP bit notation,
    228 	 * b is a five-bit field starting at bit 10,
    229 	 * x is a five-bit field starting at bit 15,
    230 	 * s is a two-bit field starting at bit 17,
    231 	 * and t is a five-bit field starting at bit 31.
    232 	 */
    233 	inst = frame->tf_iir;
    234 	__asm volatile(
    235 		"	extru %4, 10, 5, %1	\n"
    236 		"	extru %4, 15, 5, %2	\n"
    237 		"	extru %4, 17, 2, %3	\n"
    238 		"	extru %4, 31, 5, %4	\n"
    239 		: "=r" (inst_b), "=r" (inst_x), "=r" (inst_s), "=r" (inst_t)
    240 		: "r" (inst));
    241 
    242 	/*
    243 	 * The space must be the user's space, else we
    244 	 * segfault.
    245 	 */
    246 	if (inst_s != pcb->pcb_space)
    247 		return EFAULT;
    248 
    249 	/* See whether or not this is a doubleword load/store. */
    250 	log2size = (inst & OPCODE_DOUBLE) ? 3 : 2;
    251 
    252 	/* Get the floating point register. */
    253 	fpreg = ((char *)pcb->pcb_fpregs) + (inst_t << log2size);
    254 
    255 	/* Get the base register. */
    256 	base = FRAME_REG(frame, inst_b, r0);
    257 
    258 	/* Dispatch on whether or not this is an indexed load/store. */
    259 	if (inst & OPCODE_INDEXED) {
    260 
    261 		/* Get the index register value. */
    262 		index = *FRAME_REG(frame, inst_x, r0);
    263 
    264 		/* Dispatch on the completer. */
    265 		switch (inst & OPCODE_CMPLT) {
    266 		case OPCODE_CMPLT_S:
    267 			offset = *base + (index << log2size);
    268 			break;
    269 		case OPCODE_CMPLT_M:
    270 			offset = *base;
    271 			*base = *base + index;
    272 			break;
    273 		case OPCODE_CMPLT_SM:
    274 			offset = *base;
    275 			*base = *base + (index << log2size);
    276 			break;
    277 		default:
    278 			offset = *base + index;
    279 			break;
    280 		}
    281 	} else {
    282 
    283 		/* Do a low_sign_ext(x, 5). */
    284 		im5 = inst_x >> 1;
    285 		if (inst_x & 1)
    286 			im5 |= 0xfffffff0;
    287 
    288 		/* Dispatch on the completer. */
    289 		switch (inst & OPCODE_CMPLT) {
    290 		case OPCODE_CMPLT_MB:
    291 			offset = *base + im5;
    292 			*base = *base + im5;
    293 			break;
    294 		case OPCODE_CMPLT_MA:
    295 			offset = *base;
    296 			*base = *base + im5;
    297 			break;
    298 		default:
    299 			offset = *base + im5;
    300 			break;
    301 		}
    302 	}
    303 
    304 	/*
    305 	 * The offset we calculated must be the same as the
    306 	 * offset in the IOR.
    307 	 */
    308 	KASSERT(offset == frame->tf_ior);
    309 
    310 	/* Perform the load or store. */
    311 	error = (inst & OPCODE_STORE) ?
    312 		copyout(fpreg, (void *) offset, 1 << log2size) :
    313 		copyin((const void *) offset, fpreg, 1 << log2size);
    314 	return error;
    315 }
    316 
    317 /*
    318  * This is called to emulate an instruction.
    319  */
    320 void
    321 hppa_fpu_emulate(struct trapframe *frame, struct lwp *l, u_int inst)
    322 {
    323 	struct pcb *pcb = lwp_getpcb(l);
    324 	u_int opcode, class, sub;
    325 	u_int *fpregs;
    326 	int exception;
    327 	ksiginfo_t ksi;
    328 
    329 	/*
    330 	 * If the process' state is in any hardware FPU,
    331 	 * flush it out - we need to operate on it.
    332 	 */
    333 	hppa_fpu_flush(l);
    334 
    335 	/*
    336 	 * Get the instruction that we're emulating,
    337 	 * and break it down.  Using HP bit notation,
    338 	 * the class is a two-bit field starting at
    339 	 * bit 22, the opcode is a 6-bit field starting
    340 	 * at bit 5, and sub for a class 1 instruction
    341 	 * is a two bit field starting at bit 16, else
    342 	 * it is a three bit field starting at bit 18.
    343 	 */
    344 #if 0
    345 	__asm volatile(
    346 		"	extru %3, 22, 2, %1	\n"
    347 		"	extru %3, 5, 6, %0	\n"
    348 		"	extru %3, 18, 3, %2	\n"
    349 		"	comib,<> 1, %1, 0	\n"
    350 		"	extru %3, 16, 2, %2	\n"
    351 		: "=r" (opcode), "=r" (class), "=r" (sub)
    352 		: "r" (inst));
    353 #else
    354 	opcode = (inst >> (31 - 5)) & 0x3f;
    355 	class = (inst >> (31 - 22)) & 0x3;
    356 	if (class == 1) {
    357 		sub = (inst >> (31 - 16)) & 3;
    358 	} else {
    359 		sub = (inst >> (31 - 18)) & 7;
    360 	}
    361 #endif
    362 
    363 	/* Get this LWP's FPU registers. */
    364 	fpregs = (u_int *)pcb->pcb_fpregs;
    365 
    366 	/* Dispatch on the opcode. */
    367 	switch (opcode) {
    368 	case 0x09:
    369 	case 0x0b:
    370 		if (hppa_fpu_ls(frame, l) != 0) {
    371 			KSI_INIT_TRAP(&ksi);
    372 			ksi.ksi_signo = SIGSEGV;
    373 			ksi.ksi_code = SEGV_MAPERR;
    374 			ksi.ksi_trap = T_DTLBMISS;
    375 			ksi.ksi_addr = (void *)frame->tf_iioq_head;
    376 			trapsignal(l, &ksi);
    377 		}
    378 		return;
    379 	case 0x0c:
    380 		exception = decode_0c(inst, class, sub, fpregs);
    381 		break;
    382 	case 0x0e:
    383 		exception = decode_0e(inst, class, sub, fpregs);
    384 		break;
    385 	case 0x06:
    386 		exception = decode_06(inst, fpregs);
    387 		break;
    388 	case 0x26:
    389 		exception = decode_26(inst, fpregs);
    390 		break;
    391 	default:
    392 		exception = UNIMPLEMENTEDEXCEPTION;
    393 		break;
    394         }
    395 
    396 	if (exception) {
    397 		KSI_INIT_TRAP(&ksi);
    398 		if (exception & UNIMPLEMENTEDEXCEPTION) {
    399 			ksi.ksi_signo = SIGILL;
    400 			ksi.ksi_code = ILL_COPROC;
    401 		} else {
    402 			ksi.ksi_signo = SIGFPE;
    403 			if (exception & INVALIDEXCEPTION) {
    404 				ksi.ksi_code = FPE_FLTINV;
    405 			} else if (exception & DIVISIONBYZEROEXCEPTION) {
    406 				ksi.ksi_code = FPE_FLTDIV;
    407 			} else if (exception & OVERFLOWEXCEPTION) {
    408 				ksi.ksi_code = FPE_FLTOVF;
    409 			} else if (exception & UNDERFLOWEXCEPTION) {
    410 				ksi.ksi_code = FPE_FLTUND;
    411 			} else if (exception & INEXACTEXCEPTION) {
    412 				ksi.ksi_code = FPE_FLTRES;
    413 			}
    414 		}
    415 		ksi.ksi_trap = T_EMULATION;
    416 		ksi.ksi_addr = (void *)frame->tf_iioq_head;
    417 		trapsignal(l, &ksi);
    418 	}
    419 }
    420