Home | History | Annotate | Line # | Download | only in fpu
      1 /*	$NetBSD: fpu_emu.c,v 1.60 2022/09/20 12:25:01 rin Exp $ */
      2 
      3 /*
      4  * Copyright 2001 Wasabi Systems, Inc.
      5  * All rights reserved.
      6  *
      7  * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. All advertising materials mentioning features or use of this software
     18  *    must display the following acknowledgement:
     19  *      This product includes software developed for the NetBSD Project by
     20  *      Wasabi Systems, Inc.
     21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
     22  *    or promote products derived from this software without specific prior
     23  *    written permission.
     24  *
     25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
     26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
     29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     35  * POSSIBILITY OF SUCH DAMAGE.
     36  */
     37 
     38 /*
     39  * Copyright (c) 1992, 1993
     40  *	The Regents of the University of California.  All rights reserved.
     41  *
     42  * This software was developed by the Computer Systems Engineering group
     43  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
     44  * contributed to Berkeley.
     45  *
     46  * All advertising materials mentioning features or use of this software
     47  * must display the following acknowledgement:
     48  *	This product includes software developed by the University of
     49  *	California, Lawrence Berkeley Laboratory.
     50  *
     51  * Redistribution and use in source and binary forms, with or without
     52  * modification, are permitted provided that the following conditions
     53  * are met:
     54  * 1. Redistributions of source code must retain the above copyright
     55  *    notice, this list of conditions and the following disclaimer.
     56  * 2. Redistributions in binary form must reproduce the above copyright
     57  *    notice, this list of conditions and the following disclaimer in the
     58  *    documentation and/or other materials provided with the distribution.
     59  * 3. Neither the name of the University nor the names of its contributors
     60  *    may be used to endorse or promote products derived from this software
     61  *    without specific prior written permission.
     62  *
     63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     73  * SUCH DAMAGE.
     74  *
     75  *	@(#)fpu.c	8.1 (Berkeley) 6/11/93
     76  */
     77 
     78 #include <sys/cdefs.h>
     79 __KERNEL_RCSID(0, "$NetBSD: fpu_emu.c,v 1.60 2022/09/20 12:25:01 rin Exp $");
     80 
     81 #ifdef _KERNEL_OPT
     82 #include "opt_ddb.h"
     83 #endif
     84 
     85 #include <sys/param.h>
     86 #include <sys/systm.h>
     87 #include <sys/evcnt.h>
     88 #include <sys/proc.h>
     89 #include <sys/siginfo.h>
     90 #include <sys/signal.h>
     91 #include <sys/signalvar.h>
     92 #include <sys/syslog.h>
     93 
     94 #include <powerpc/instr.h>
     95 #include <powerpc/psl.h>
     96 
     97 #include <machine/fpu.h>
     98 #include <machine/reg.h>
     99 #include <machine/trap.h>
    100 
    101 #include <powerpc/fpu/fpu_emu.h>
    102 #include <powerpc/fpu/fpu_extern.h>
    103 
    104 #define	FPU_EMU_EVCNT_DECL(name)					\
    105 static struct evcnt fpu_emu_ev_##name =					\
    106     EVCNT_INITIALIZER(EVCNT_TYPE_TRAP, NULL, "fpemu", #name);		\
    107 EVCNT_ATTACH_STATIC(fpu_emu_ev_##name)
    108 
    109 #define	FPU_EMU_EVCNT_INCR(name)					\
    110     fpu_emu_ev_##name.ev_count++
    111 
    112 FPU_EMU_EVCNT_DECL(stfiwx);
    113 FPU_EMU_EVCNT_DECL(fpstore);
    114 FPU_EMU_EVCNT_DECL(fpload);
    115 FPU_EMU_EVCNT_DECL(fcmpu);
    116 FPU_EMU_EVCNT_DECL(frsp);
    117 FPU_EMU_EVCNT_DECL(fctiw);
    118 FPU_EMU_EVCNT_DECL(fcmpo);
    119 FPU_EMU_EVCNT_DECL(mtfsb1);
    120 FPU_EMU_EVCNT_DECL(fnegabs);
    121 FPU_EMU_EVCNT_DECL(mcrfs);
    122 FPU_EMU_EVCNT_DECL(mtfsb0);
    123 FPU_EMU_EVCNT_DECL(fmr);
    124 FPU_EMU_EVCNT_DECL(mtfsfi);
    125 FPU_EMU_EVCNT_DECL(fnabs);
    126 FPU_EMU_EVCNT_DECL(fabs);
    127 FPU_EMU_EVCNT_DECL(mffs);
    128 FPU_EMU_EVCNT_DECL(mtfsf);
    129 FPU_EMU_EVCNT_DECL(fctid);
    130 FPU_EMU_EVCNT_DECL(fcfid);
    131 FPU_EMU_EVCNT_DECL(fdiv);
    132 FPU_EMU_EVCNT_DECL(fsub);
    133 FPU_EMU_EVCNT_DECL(fadd);
    134 FPU_EMU_EVCNT_DECL(fsqrt);
    135 FPU_EMU_EVCNT_DECL(fsel);
    136 FPU_EMU_EVCNT_DECL(fpres);
    137 FPU_EMU_EVCNT_DECL(fmul);
    138 FPU_EMU_EVCNT_DECL(frsqrte);
    139 FPU_EMU_EVCNT_DECL(fmsub);
    140 FPU_EMU_EVCNT_DECL(fmadd);
    141 FPU_EMU_EVCNT_DECL(fnmsub);
    142 FPU_EMU_EVCNT_DECL(fnmadd);
    143 
    144 /* FPSR exception masks */
    145 #define FPSR_EX_MSK	(FPSCR_VX|FPSCR_OX|FPSCR_UX|FPSCR_ZX|		\
    146 			FPSCR_XX|FPSCR_VXSNAN|FPSCR_VXISI|FPSCR_VXIDI|	\
    147 			FPSCR_VXZDZ|FPSCR_VXIMZ|FPSCR_VXVC|FPSCR_VXSOFT|\
    148 			FPSCR_VXSQRT|FPSCR_VXCVI)
    149 #define	FPSR_EX		(FPSCR_VE|FPSCR_OE|FPSCR_UE|FPSCR_ZE|FPSCR_XE)
    150 #define	FPSR_INV	(FPSCR_VXSNAN|FPSCR_VXISI|FPSCR_VXIDI|		\
    151 			FPSCR_VXZDZ|FPSCR_VXIMZ|FPSCR_VXVC|FPSCR_VXSOFT|\
    152 			FPSCR_VXSQRT|FPSCR_VXCVI)
    153 #define	MCRFS_MASK							\
    154     (									\
    155 	FPSCR_FX     | FPSCR_OX     |					\
    156 	FPSCR_UX     | FPSCR_ZX     | FPSCR_XX    | FPSCR_VXSNAN |	\
    157 	FPSCR_VXISI  | FPSCR_VXIDI  | FPSCR_VXZDZ | FPSCR_VXIMZ  |	\
    158 	FPSCR_VXVC   |							\
    159 	FPSCR_VXSOFT | FPSCR_VXSQRT | FPSCR_VXCVI			\
    160     )
    161 
    162 #define	FR(reg)	(fs->fpreg[reg])
    163 
    164 int fpe_debug = 0;
    165 
    166 #ifdef DDB
    167 extern vaddr_t opc_disasm(vaddr_t loc, int opcode);
    168 #endif
    169 
    170 static int fpu_execute(struct trapframe *, struct fpemu *, union instr *);
    171 
    172 #ifdef DEBUG
    173 /*
    174  * Dump a `fpn' structure.
    175  */
    176 void
    177 fpu_dumpfpn(struct fpn *fp)
    178 {
    179 	static const char *class[] = {
    180 		"SNAN", "QNAN", "ZERO", "NUM", "INF"
    181 	};
    182 
    183 	KASSERT(fp != NULL);
    184 
    185 	printf("%s %c.%x %x %x %xE%d\n", class[fp->fp_class + 2],
    186 		fp->fp_sign ? '-' : ' ',
    187 		fp->fp_mant[0],	fp->fp_mant[1],
    188 		fp->fp_mant[2], fp->fp_mant[3],
    189 		fp->fp_exp);
    190 }
    191 #endif
    192 
    193 /*
    194  * fpu_execute returns the following error numbers (0 = no error):
    195  */
    196 #define	FPE		1	/* take a floating point exception */
    197 #define	NOTFPU		2	/* not an FPU instruction */
    198 #define	FAULT		3
    199 
    200 
    201 /*
    202  * Emulate a floating-point instruction.
    203  * Return true if insn is consumed anyway.
    204  * Otherwise, the caller must take care of it.
    205  */
    206 bool
    207 fpu_emulate(struct trapframe *tf, struct fpreg *fpf, ksiginfo_t *ksi)
    208 {
    209 	struct pcb *pcb;
    210 	union instr insn;
    211 	struct fpemu fe;
    212 
    213 	KSI_INIT_TRAP(ksi);
    214 	ksi->ksi_signo = 0;
    215 	ksi->ksi_addr = (void *)tf->tf_srr0;
    216 
    217 	/* initialize insn.is_datasize to tell it is *not* initialized */
    218 	fe.fe_fpstate = fpf;
    219 	fe.fe_cx = 0;
    220 
    221 	/* always set this (to avoid a warning) */
    222 
    223 	if (copyin((void *) (tf->tf_srr0), &insn.i_int, sizeof (insn.i_int))) {
    224 #ifdef DEBUG
    225 		printf("fpu_emulate: fault reading opcode\n");
    226 #endif
    227 		ksi->ksi_signo = SIGSEGV;
    228 		ksi->ksi_trap = EXC_ISI;
    229 		ksi->ksi_code = SEGV_MAPERR;
    230 		return true;
    231 	}
    232 
    233 	DPRINTF(FPE_EX, ("fpu_emulate: emulating insn %x at %p\n",
    234 	    insn.i_int, (void *)tf->tf_srr0));
    235 
    236 	if ((insn.i_any.i_opcd == OPC_TWI) ||
    237 	    ((insn.i_any.i_opcd == OPC_integer_31) &&
    238 	    (insn.i_x.i_xo == OPC31_TW))) {
    239 		/* Check for the two trap insns. */
    240 		DPRINTF(FPE_EX, ("fpu_emulate: SIGTRAP\n"));
    241 		ksi->ksi_signo = SIGTRAP;
    242 		ksi->ksi_trap = EXC_PGM;
    243 		ksi->ksi_code = TRAP_BRKPT;
    244 		return true;
    245 	}
    246 	switch (fpu_execute(tf, &fe, &insn)) {
    247 	case 0:
    248 success:
    249 		DPRINTF(FPE_EX, ("fpu_emulate: success\n"));
    250 		tf->tf_srr0 += 4;
    251 		return true;
    252 
    253 	case FPE:
    254 		pcb = lwp_getpcb(curlwp);
    255 		if ((pcb->pcb_flags & PSL_FE_PREC) == 0)
    256 			goto success;
    257 		DPRINTF(FPE_EX, ("fpu_emulate: SIGFPE\n"));
    258 		ksi->ksi_signo = SIGFPE;
    259 		ksi->ksi_trap = EXC_PGM;
    260 		ksi->ksi_code = fpu_get_fault_code();
    261 		return true;
    262 
    263 	case FAULT:
    264 		DPRINTF(FPE_EX, ("fpu_emulate: SIGSEGV\n"));
    265 		ksi->ksi_signo = SIGSEGV;
    266 		ksi->ksi_trap = EXC_DSI;
    267 		ksi->ksi_code = SEGV_MAPERR;
    268 		ksi->ksi_addr = (void *)fe.fe_addr;
    269 		return true;
    270 
    271 	case NOTFPU:
    272 	default:
    273 		DPRINTF(FPE_EX, ("fpu_emulate: SIGILL\n"));
    274 #if defined(DDB) && defined(DEBUG)
    275 		if (fpe_debug & FPE_EX) {
    276 			printf("fpu_emulate:  illegal insn %x at %p:",
    277 			insn.i_int, (void *) (tf->tf_srr0));
    278 			opc_disasm((vaddr_t)(tf->tf_srr0), insn.i_int);
    279 		}
    280 #endif
    281 		return false;
    282 	}
    283 }
    284 
    285 /*
    286  * fpu_to_single(): Helper function for stfs{,u}{,x}.
    287  *
    288  * Single-precision (float) data is internally represented in
    289  * double-precision (double) format in floating-point registers (FRs).
    290  * Even though double value cannot be translated into float format in
    291  * general, Power ISA (2.0.3--3.1) specify conversion algorithm when
    292  * stored to memory (see Sec. 4.6.3):
    293  *
    294  *  - Extra fraction bits are truncated regardless of rounding mode.
    295  *  - When magnitude is larger than the maximum number in float format,
    296  *    bits 63--62 and 58--29 are mechanically copied into bits 31--0.
    297  *  - When magnitude is representable as denormalized number in float
    298  *    format, it is stored as normalized double value in FRs;
    299  *    denormalization is required in this case.
    300  *  - When magnitude is smaller than the minimum denormalized number in
    301  *    float format, the result is undefined. For G5 (970MP Rev 1.1),
    302  *    (sign | 0) seems to be stored. For G4 and prior, some ``random''
    303  *    garbage is stored in exponent. We mimic G5 for now.
    304  */
    305 static uint32_t
    306 fpu_to_single(uint64_t reg)
    307 {
    308 	uint32_t sign, frac, word;
    309 	int exp, shift;
    310 
    311 	sign = (reg & __BIT(63)) >> 32;
    312 	exp = __SHIFTOUT(reg, __BITS(62, 52)) - 1023;
    313 	if (exp > -127 || (reg & ~__BIT(63)) == 0) {
    314 		/*
    315 		 * No denormalization required: normalized, zero, inf, NaN,
    316 		 * or numbers larger than MAXFLOAT (see comment above).
    317 		 *
    318 		 * Note that MSB and 7-LSBs in exponent are same for double
    319 		 * and float formats in this case.
    320 		 */
    321 		word =  ((reg & __BIT(62)) >> 32) |
    322 		    __SHIFTOUT(reg, __BITS(58, 52) | __BITS(51, 29));
    323 	} else if (exp <= -127 && exp >= -149) {
    324 		/* Denormalized. */
    325 		shift = - 126 - exp; /* 1 ... 23 */
    326 		frac = __SHIFTOUT(__BIT(52) | reg, __BITS(52, 29 + shift));
    327 		word = /* __SHIFTIN(0, __BITS(30, 23)) | */ frac;
    328 	} else {
    329 		/* Undefined. Mimic G5 for now. */
    330 		word = 0;
    331 	}
    332 	return sign | word;
    333 }
    334 
    335 /*
    336  * Execute an FPU instruction (one that runs entirely in the FPU; not
    337  * FBfcc or STF, for instance).  On return, fe->fe_fs->fs_fsr will be
    338  * modified to reflect the setting the hardware would have left.
    339  *
    340  * Note that we do not catch all illegal opcodes, so you can, for instance,
    341  * multiply two integers this way.
    342  */
    343 static int
    344 fpu_execute(struct trapframe *tf, struct fpemu *fe, union instr *insn)
    345 {
    346 	struct fpn *fp;
    347 	union instr instr = *insn;
    348 	int *a;
    349 	int ra, rb, rc, rt, type, mask, fsr, cx, bf, setcr, cond;
    350 	u_int bits;
    351 	struct fpreg *fs;
    352 	int i;
    353 
    354 	/* Setup work. */
    355 	fp = NULL;
    356 	fs = fe->fe_fpstate;
    357 	fe->fe_fpscr = ((int *)&fs->fpscr)[1];
    358 
    359 	/*
    360 	 * On PowerPC all floating point values are stored in registers
    361 	 * as doubles, even when used for single precision operations.
    362 	 */
    363 	type = FTYPE_DBL;
    364 	cond = instr.i_any.i_rc;
    365 	setcr = 0;
    366 	bf = 0;	/* XXX gcc */
    367 
    368 #if defined(DDB) && defined(DEBUG)
    369 	if (fpe_debug & FPE_EX) {
    370 		vaddr_t loc = tf->tf_srr0;
    371 
    372 		printf("Trying to emulate: %p ", (void *)loc);
    373 		opc_disasm(loc, instr.i_int);
    374 	}
    375 #endif
    376 
    377 	/*
    378 	 * `Decode' and execute instruction.
    379 	 */
    380 
    381 	if ((instr.i_any.i_opcd >= OPC_LFS && instr.i_any.i_opcd <= OPC_STFDU) ||
    382 	    instr.i_any.i_opcd == OPC_integer_31) {
    383 		/*
    384 		 * Handle load/store insns:
    385 		 *
    386 		 * Convert to/from single if needed, calculate addr,
    387 		 * and update index reg if needed.
    388 		 */
    389 		vaddr_t addr;
    390 		size_t size = sizeof(double);
    391 		int store, update;
    392 
    393 		cond = 0; /* ld/st never set condition codes */
    394 
    395 
    396 		if (instr.i_any.i_opcd == OPC_integer_31) {
    397 			if (instr.i_x.i_xo == OPC31_STFIWX) {
    398 				FPU_EMU_EVCNT_INCR(stfiwx);
    399 
    400 				/* Store as integer */
    401 				ra = instr.i_x.i_ra;
    402 				rb = instr.i_x.i_rb;
    403 				DPRINTF(FPE_INSN, ("reg %d has %lx reg %d has %lx\n",
    404 					ra, tf->tf_fixreg[ra], rb, tf->tf_fixreg[rb]));
    405 
    406 				addr = tf->tf_fixreg[rb];
    407 				if (ra != 0)
    408 					addr += tf->tf_fixreg[ra];
    409 				rt = instr.i_x.i_rt;
    410 				a = (int *)&fs->fpreg[rt];
    411 				DPRINTF(FPE_INSN,
    412 					("fpu_execute: Store INT %x at %p\n",
    413 						a[1], (void *)addr));
    414 				if (copyout(&a[1], (void *)addr, sizeof(int))) {
    415 					fe->fe_addr = addr;
    416 					return (FAULT);
    417 				}
    418 				return (0);
    419 			}
    420 
    421 			if ((instr.i_x.i_xo & OPC31_FPMASK) != OPC31_FPOP)
    422 				/* Not an indexed FP load/store op */
    423 				return (NOTFPU);
    424 
    425 			store = (instr.i_x.i_xo & 0x80);
    426 			if ((instr.i_x.i_xo & 0x40) == 0) {
    427 				type = FTYPE_SNG;
    428 				size = sizeof(float);
    429 			}
    430 			update = (instr.i_x.i_xo & 0x20);
    431 
    432 			/* calculate EA of load/store */
    433 			ra = instr.i_x.i_ra;
    434 			rb = instr.i_x.i_rb;
    435 			DPRINTF(FPE_INSN, ("reg %d has %lx reg %d has %lx\n",
    436 				ra, tf->tf_fixreg[ra], rb, tf->tf_fixreg[rb]));
    437 			addr = tf->tf_fixreg[rb];
    438 			if (ra != 0)
    439 				addr += tf->tf_fixreg[ra];
    440 			rt = instr.i_x.i_rt;
    441 		} else {
    442 			store = instr.i_d.i_opcd & 0x4;
    443 			if ((instr.i_d.i_opcd & 0x2) == 0) {
    444 				type = FTYPE_SNG;
    445 				size = sizeof(float);
    446 			}
    447 			update = instr.i_d.i_opcd & 0x1;
    448 
    449 			/* calculate EA of load/store */
    450 			ra = instr.i_d.i_ra;
    451 			addr = instr.i_d.i_d;
    452 			DPRINTF(FPE_INSN, ("reg %d has %lx displ %lx\n",
    453 				ra, tf->tf_fixreg[ra], addr));
    454 			if (ra != 0)
    455 				addr += tf->tf_fixreg[ra];
    456 			rt = instr.i_d.i_rt;
    457 		}
    458 
    459 		if (update && ra == 0)
    460 			return (NOTFPU);
    461 
    462 		if (store) {
    463 			/* Store */
    464 			uint32_t word;
    465 			const void *kaddr;
    466 
    467 			FPU_EMU_EVCNT_INCR(fpstore);
    468 			if (type != FTYPE_DBL) {
    469 				/*
    470 				 * As Power ISA specifies conversion algorithm
    471 				 * for store floating-point single insns, we
    472 				 * cannot use fpu_explode() and _implode() here.
    473 				 * See fpu_to_single() and comment therein for
    474 				 * more details.
    475 				 */
    476 				DPRINTF(FPE_INSN,
    477 					("fpu_execute: Store SNG at %p\n",
    478 						(void *)addr));
    479 				word = fpu_to_single(FR(rt));
    480 				kaddr = &word;
    481 			} else {
    482 				DPRINTF(FPE_INSN,
    483 					("fpu_execute: Store DBL at %p\n",
    484 						(void *)addr));
    485 				kaddr = &FR(rt);
    486 			}
    487 			if (copyout(kaddr, (void *)addr, size)) {
    488 				fe->fe_addr = addr;
    489 				return (FAULT);
    490 			}
    491 		} else {
    492 			/* Load */
    493 			FPU_EMU_EVCNT_INCR(fpload);
    494 			DPRINTF(FPE_INSN, ("fpu_execute: Load from %p\n",
    495 				(void *)addr));
    496 			if (copyin((const void *)addr, &FR(rt), size)) {
    497 				fe->fe_addr = addr;
    498 				return (FAULT);
    499 			}
    500 			if (type != FTYPE_DBL) {
    501 				fpu_explode(fe, fp = &fe->fe_f1, type, FR(rt));
    502 				fpu_implode(fe, fp, FTYPE_DBL, &FR(rt));
    503 			}
    504 		}
    505 		if (update)
    506 			tf->tf_fixreg[ra] = addr;
    507 		/* Complete. */
    508 		return (0);
    509 	} else if (instr.i_any.i_opcd == OPC_sp_fp_59 ||
    510 		instr.i_any.i_opcd == OPC_dp_fp_63) {
    511 
    512 
    513 		if (instr.i_any.i_opcd == OPC_dp_fp_63 &&
    514 		    !(instr.i_a.i_xo & OPC63M_MASK)) {
    515 			/* Format X */
    516 			rt = instr.i_x.i_rt;
    517 			ra = instr.i_x.i_ra;
    518 			rb = instr.i_x.i_rb;
    519 
    520 
    521 			/* One of the special opcodes.... */
    522 			switch (instr.i_x.i_xo) {
    523 			case	OPC63_FCMPU:
    524 				FPU_EMU_EVCNT_INCR(fcmpu);
    525 				DPRINTF(FPE_INSN, ("fpu_execute: FCMPU\n"));
    526 				rt >>= 2;
    527 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
    528 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
    529 				fpu_compare(fe, 0);
    530 				/* Make sure we do the condition regs. */
    531 				cond = 0;
    532 				/* N.B.: i_rs is already left shifted by two. */
    533 				bf = instr.i_x.i_rs & 0xfc;
    534 				setcr = 1;
    535 				break;
    536 
    537 			case	OPC63_FRSP:
    538 				/*
    539 				 * Convert to single:
    540 				 *
    541 				 * PowerPC uses this to round a double
    542 				 * precision value to single precision,
    543 				 * but values in registers are always
    544 				 * stored in double precision format.
    545 				 */
    546 				FPU_EMU_EVCNT_INCR(frsp);
    547 				DPRINTF(FPE_INSN, ("fpu_execute: FRSP\n"));
    548 				fpu_explode(fe, fp = &fe->fe_f1, FTYPE_DBL,
    549 				    FR(rb));
    550 				fpu_implode(fe, fp, FTYPE_SNG, &FR(rt));
    551 				fpu_explode(fe, fp = &fe->fe_f1, FTYPE_SNG,
    552 				    FR(rt));
    553 				type = FTYPE_DBL | FTYPE_FPSCR;
    554 				break;
    555 			case	OPC63_FCTIW:
    556 			case	OPC63_FCTIWZ:
    557 				FPU_EMU_EVCNT_INCR(fctiw);
    558 				DPRINTF(FPE_INSN, ("fpu_execute: FCTIW\n"));
    559 				fpu_explode(fe, fp = &fe->fe_f1, type, FR(rb));
    560 				type = FTYPE_INT | FTYPE_FPSCR;
    561 				if (instr.i_x.i_xo == OPC63_FCTIWZ)
    562 					type |= FTYPE_RD_RZ;
    563 				break;
    564 			case	OPC63_FCMPO:
    565 				FPU_EMU_EVCNT_INCR(fcmpo);
    566 				DPRINTF(FPE_INSN, ("fpu_execute: FCMPO\n"));
    567 				rt >>= 2;
    568 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
    569 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
    570 				fpu_compare(fe, 1);
    571 				/* Make sure we do the condition regs. */
    572 				cond = 0;
    573 				/* N.B.: i_rs is already left shifted by two. */
    574 				bf = instr.i_x.i_rs & 0xfc;
    575 				setcr = 1;
    576 				break;
    577 			case	OPC63_MTFSB1:
    578 				FPU_EMU_EVCNT_INCR(mtfsb1);
    579 				DPRINTF(FPE_INSN, ("fpu_execute: MTFSB1\n"));
    580 				fe->fe_cx = (1 << (31 - rt)) &
    581 				    ~(FPSCR_FEX | FPSCR_VX);
    582 				break;
    583 			case	OPC63_FNEG:
    584 				FPU_EMU_EVCNT_INCR(fnegabs);
    585 				DPRINTF(FPE_INSN, ("fpu_execute: FNEGABS\n"));
    586 				memcpy(&fs->fpreg[rt], &fs->fpreg[rb],
    587 					sizeof(double));
    588 				a = (int *)&fs->fpreg[rt];
    589 				*a ^= (1 << 31);
    590 				break;
    591 			case	OPC63_MCRFS:
    592 				FPU_EMU_EVCNT_INCR(mcrfs);
    593 				DPRINTF(FPE_INSN, ("fpu_execute: MCRFS\n"));
    594 				cond = 0;
    595 				rt &= 0x1c;
    596 				ra &= 0x1c;
    597 				/* Extract the bits we want */
    598 				bits = (fe->fe_fpscr >> (28 - ra)) & 0xf;
    599 				/* Clear the bits we copied. */
    600 				mask = (0xf << (28 - ra)) & MCRFS_MASK;
    601 				fe->fe_fpscr &= ~mask;
    602 				/* Now shove them in the right part of cr */
    603 				tf->tf_cr &= ~(0xf << (28 - rt));
    604 				tf->tf_cr |= bits << (28 - rt);
    605 				break;
    606 			case	OPC63_MTFSB0:
    607 				FPU_EMU_EVCNT_INCR(mtfsb0);
    608 				DPRINTF(FPE_INSN, ("fpu_execute: MTFSB0\n"));
    609 				fe->fe_fpscr &= ~(1 << (31 - rt)) |
    610 				    (FPSCR_FEX | FPSCR_VX);
    611 				break;
    612 			case	OPC63_FMR:
    613 				FPU_EMU_EVCNT_INCR(fmr);
    614 				DPRINTF(FPE_INSN, ("fpu_execute: FMR\n"));
    615 				memcpy(&fs->fpreg[rt], &fs->fpreg[rb],
    616 					sizeof(double));
    617 				break;
    618 			case	OPC63_MTFSFI:
    619 				FPU_EMU_EVCNT_INCR(mtfsfi);
    620 				DPRINTF(FPE_INSN, ("fpu_execute: MTFSFI\n"));
    621 				rb >>= 1;
    622 				rt &= 0x1c; /* Already left-shifted 4 */
    623 				bits = rb << (28 - rt);
    624 				mask = 0xf << (28 - rt);
    625 				fe->fe_fpscr = (fe->fe_fpscr & ~mask) | bits;
    626 				break;
    627 			case	OPC63_FNABS:
    628 				FPU_EMU_EVCNT_INCR(fnabs);
    629 				DPRINTF(FPE_INSN, ("fpu_execute: FABS\n"));
    630 				memcpy(&fs->fpreg[rt], &fs->fpreg[rb],
    631 					sizeof(double));
    632 				a = (int *)&fs->fpreg[rt];
    633 				*a |= (1 << 31);
    634 				break;
    635 			case	OPC63_FABS:
    636 				FPU_EMU_EVCNT_INCR(fabs);
    637 				DPRINTF(FPE_INSN, ("fpu_execute: FABS\n"));
    638 				memcpy(&fs->fpreg[rt], &fs->fpreg[rb],
    639 					sizeof(double));
    640 				a = (int *)&fs->fpreg[rt];
    641 				*a &= ~(1 << 31);
    642 				break;
    643 			case	OPC63_MFFS:
    644 				FPU_EMU_EVCNT_INCR(mffs);
    645 				DPRINTF(FPE_INSN, ("fpu_execute: MFFS\n"));
    646 				memcpy(&fs->fpreg[rt], &fs->fpscr,
    647 					sizeof(fs->fpscr));
    648 				break;
    649 			case	OPC63_MTFSF:
    650 				FPU_EMU_EVCNT_INCR(mtfsf);
    651 				DPRINTF(FPE_INSN, ("fpu_execute: MTFSF\n"));
    652 				if ((rt = instr.i_xfl.i_flm) == -1) {
    653 					mask = -1;
    654 				} else {
    655 					mask = 0;
    656 					/* Convert 1 bit -> 4 bits */
    657 					for (i = 0; i < 8; i++)
    658 						if (rt & (1 << i))
    659 							mask |=
    660 							    (0xf << (4 * i));
    661 				}
    662 				a = (int *)&fs->fpreg[rb];
    663 				bits = a[1] & mask;
    664 				fe->fe_fpscr = (fe->fe_fpscr & ~mask) | bits;
    665 				break;
    666 			case	OPC63_FCTID:
    667 			case	OPC63_FCTIDZ:
    668 				FPU_EMU_EVCNT_INCR(fctid);
    669 				DPRINTF(FPE_INSN, ("fpu_execute: FCTID\n"));
    670 				fpu_explode(fe, fp = &fe->fe_f1, type, FR(rb));
    671 				type = FTYPE_LNG | FTYPE_FPSCR;
    672 				if (instr.i_x.i_xo == OPC63_FCTIDZ)
    673 					type |= FTYPE_RD_RZ;
    674 				break;
    675 			case	OPC63_FCFID:
    676 				FPU_EMU_EVCNT_INCR(fcfid);
    677 				DPRINTF(FPE_INSN, ("fpu_execute: FCFID\n"));
    678 				fpu_explode(fe, fp = &fe->fe_f1, FTYPE_LNG,
    679 				    FR(rb));
    680 				type = FTYPE_DBL | FTYPE_FPSCR;
    681 				break;
    682 			default:
    683 				return (NOTFPU);
    684 				break;
    685 			}
    686 		} else {
    687 			/* Format A */
    688 			rt = instr.i_a.i_frt;
    689 			ra = instr.i_a.i_fra;
    690 			rb = instr.i_a.i_frb;
    691 			rc = instr.i_a.i_frc;
    692 
    693 			/*
    694 			 * All arithmetic operations work on registers, which
    695 			 * are stored as doubles.
    696 			 */
    697 			type = FTYPE_DBL;
    698 			switch ((unsigned int)instr.i_a.i_xo) {
    699 			case	OPC59_FDIVS:
    700 				FPU_EMU_EVCNT_INCR(fdiv);
    701 				DPRINTF(FPE_INSN, ("fpu_execute: FDIV\n"));
    702 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
    703 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
    704 				fp = fpu_div(fe);
    705 				break;
    706 			case	OPC59_FSUBS:
    707 				FPU_EMU_EVCNT_INCR(fsub);
    708 				DPRINTF(FPE_INSN, ("fpu_execute: FSUB\n"));
    709 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
    710 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
    711 				fp = fpu_sub(fe);
    712 				break;
    713 			case	OPC59_FADDS:
    714 				FPU_EMU_EVCNT_INCR(fadd);
    715 				DPRINTF(FPE_INSN, ("fpu_execute: FADD\n"));
    716 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
    717 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
    718 				fp = fpu_add(fe);
    719 				break;
    720 			case	OPC59_FSQRTS:
    721 				FPU_EMU_EVCNT_INCR(fsqrt);
    722 				DPRINTF(FPE_INSN, ("fpu_execute: FSQRT\n"));
    723 				fpu_explode(fe, &fe->fe_f1, type, FR(rb));
    724 				fp = fpu_sqrt(fe);
    725 				break;
    726 			case	OPC63M_FSEL:
    727 				FPU_EMU_EVCNT_INCR(fsel);
    728 				DPRINTF(FPE_INSN, ("fpu_execute: FSEL\n"));
    729 				a = (int *)&fe->fe_fpstate->fpreg[ra];
    730 				if ((( a[0] & 0x80000000) &&
    731 				     ((a[0] & 0x7fffffff) | a[1])) ||
    732 				    (( a[0] & 0x7ff00000) &&
    733 				     ((a[0] & 0x000fffff) | a[1]))) {
    734 					/* negative/NaN or NaN */
    735 					rc = rb;
    736 				}
    737 				DPRINTF(FPE_INSN, ("f%d => f%d\n", rc, rt));
    738 				memcpy(&fs->fpreg[rt], &fs->fpreg[rc],
    739 					sizeof(double));
    740 				break;
    741 			case	OPC59_FRES:
    742 				FPU_EMU_EVCNT_INCR(fpres);
    743 				DPRINTF(FPE_INSN, ("fpu_execute: FPRES\n"));
    744 				fpu_explode(fe, &fe->fe_f1, FTYPE_INT, 1);
    745 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
    746 				fp = fpu_div(fe);
    747 				break;
    748 			case	OPC59_FMULS:
    749 				FPU_EMU_EVCNT_INCR(fmul);
    750 				DPRINTF(FPE_INSN, ("fpu_execute: FMUL\n"));
    751 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
    752 				fpu_explode(fe, &fe->fe_f2, type, FR(rc));
    753 				fp = fpu_mul(fe);
    754 				break;
    755 			case	OPC63M_FRSQRTE:
    756 				/* Reciprocal sqrt() estimate */
    757 				FPU_EMU_EVCNT_INCR(frsqrte);
    758 				DPRINTF(FPE_INSN, ("fpu_execute: FRSQRTE\n"));
    759 				fpu_explode(fe, &fe->fe_f1, type, FR(rb));
    760 				fp = fpu_sqrt(fe);
    761 				fe->fe_f2 = *fp;
    762 				fpu_explode(fe, &fe->fe_f1, FTYPE_INT, 1);
    763 				fp = fpu_div(fe);
    764 				break;
    765 			case	OPC59_FMSUBS:
    766 				FPU_EMU_EVCNT_INCR(fmsub);
    767 				DPRINTF(FPE_INSN, ("fpu_execute: FMSUB\n"));
    768 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
    769 				fpu_explode(fe, &fe->fe_f2, type, FR(rc));
    770 				fp = fpu_mul(fe);
    771 				fe->fe_f1 = *fp;
    772 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
    773 				fp = fpu_sub(fe);
    774 				break;
    775 			case	OPC59_FMADDS:
    776 				FPU_EMU_EVCNT_INCR(fmadd);
    777 				DPRINTF(FPE_INSN, ("fpu_execute: FMADD\n"));
    778 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
    779 				fpu_explode(fe, &fe->fe_f2, type, FR(rc));
    780 				fp = fpu_mul(fe);
    781 				fe->fe_f1 = *fp;
    782 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
    783 				fp = fpu_add(fe);
    784 				break;
    785 			case	OPC59_FNMSUBS:
    786 				FPU_EMU_EVCNT_INCR(fnmsub);
    787 				DPRINTF(FPE_INSN, ("fpu_execute: FNMSUB\n"));
    788 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
    789 				fpu_explode(fe, &fe->fe_f2, type, FR(rc));
    790 				fp = fpu_mul(fe);
    791 				fe->fe_f1 = *fp;
    792 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
    793 				fp = fpu_sub(fe);
    794 				/* Negate */
    795 				if (!ISNAN(fp))
    796 					fp->fp_sign ^= 1;
    797 				break;
    798 			case	OPC59_FNMADDS:
    799 				FPU_EMU_EVCNT_INCR(fnmadd);
    800 				DPRINTF(FPE_INSN, ("fpu_execute: FNMADD\n"));
    801 				fpu_explode(fe, &fe->fe_f1, type, FR(ra));
    802 				fpu_explode(fe, &fe->fe_f2, type, FR(rc));
    803 				fp = fpu_mul(fe);
    804 				fe->fe_f1 = *fp;
    805 				fpu_explode(fe, &fe->fe_f2, type, FR(rb));
    806 				fp = fpu_add(fe);
    807 				/* Negate */
    808 				if (!ISNAN(fp))
    809 					fp->fp_sign ^= 1;
    810 				break;
    811 			default:
    812 				return (NOTFPU);
    813 				break;
    814 			}
    815 
    816 			/* If the instruction was single precision, round */
    817 			if (!(instr.i_any.i_opcd & 0x4)) {
    818 				fpu_implode(fe, fp, FTYPE_SNG | FTYPE_FPSCR,
    819 				    &FR(rt));
    820 				fpu_explode(fe, fp = &fe->fe_f1, FTYPE_SNG,
    821 				    FR(rt));
    822 			} else
    823 				type |= FTYPE_FPSCR;
    824 		}
    825 	} else {
    826 		return (NOTFPU);
    827 	}
    828 
    829 	/*
    830 	 * ALU operation is complete.  Collapse the result and then check
    831 	 * for exceptions.  If we got any, and they are enabled, do not
    832 	 * alter the destination register, just stop with an exception.
    833 	 * Otherwise set new current exceptions and accrue.
    834 	 */
    835 	if (fp)
    836 		fpu_implode(fe, fp, type, &FR(rt));
    837 	cx = fe->fe_cx;
    838 	fsr = fe->fe_fpscr & ~(FPSCR_FEX|FPSCR_VX);
    839 	if (cx != 0) {
    840 		fsr |= cx;
    841 		DPRINTF(FPE_INSN, ("fpu_execute: cx %x, fsr %x\n", cx, fsr));
    842 	}
    843 	if (fsr & FPSR_INV)
    844 		fsr |= FPSCR_VX;
    845 	mask = (fsr & FPSR_EX) << (25 - 3);
    846 	if (fsr & mask)
    847 		fsr |= FPSCR_FEX;
    848 	if ((fsr ^ fe->fe_fpscr) & FPSR_EX_MSK)
    849 		fsr |= FPSCR_FX;
    850 
    851 	if (cond) {
    852 		bits = fsr & 0xf0000000;
    853 		/* Isolate condition codes */
    854 		bits >>= 28;
    855 		/* Move fpu condition codes to cr[1] */
    856 		tf->tf_cr &= ~(0x0f000000);
    857 		tf->tf_cr |= (bits << 24);
    858 		DPRINTF(FPE_INSN, ("fpu_execute: cr[1] <= %x\n", bits));
    859 	}
    860 
    861 	if (setcr) {
    862 		bits = fsr & FPSCR_FPCC;
    863 		/* Isolate condition codes */
    864 		bits <<= 16;
    865 		/* Move fpu condition codes to cr[bf/4] */
    866 		tf->tf_cr &= ~(0xf0000000>>bf);
    867 		tf->tf_cr |= (bits >> bf);
    868 		DPRINTF(FPE_INSN, ("fpu_execute: cr[%d] (cr=%x) <= %x\n", bf/4, tf->tf_cr, bits));
    869 	}
    870 
    871 	((int *)&fs->fpscr)[1] = fsr;
    872 	if (fsr & FPSCR_FEX)
    873 		return(FPE);
    874 	return (0);	/* success */
    875 }
    876