Home | History | Annotate | Line # | Download | only in alpha
      1 /* $NetBSD: fp_complete.c,v 1.33 2025/03/16 22:34:36 thorpej Exp $ */
      2 
      3 /*-
      4  * Copyright (c) 2001 Ross Harvey
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *	This product includes software developed by the NetBSD
     18  *	Foundation, Inc. and its contributors.
     19  * 4. Neither the name of The NetBSD Foundation nor the names of its
     20  *    contributors may be used to endorse or promote products derived
     21  *    from this software without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     33  * POSSIBILITY OF SUCH DAMAGE.
     34  */
     35 
     36 #include "opt_ddb.h"
     37 
     38 #include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
     39 
     40 __KERNEL_RCSID(0, "$NetBSD: fp_complete.c,v 1.33 2025/03/16 22:34:36 thorpej Exp $");
     41 
     42 #include <sys/param.h>
     43 #include <sys/systm.h>
     44 #include <sys/proc.h>
     45 #include <sys/atomic.h>
     46 #include <sys/evcnt.h>
     47 
     48 #include <machine/cpu.h>
     49 #include <machine/fpu.h>
     50 #include <machine/reg.h>
     51 #include <machine/alpha.h>
     52 #include <machine/alpha_instruction.h>
     53 
     54 #include <lib/libkern/softfloat.h>
     55 
     56 /*
     57  * Validate our assumptions about bit positions.
     58  */
     59 __CTASSERT(ALPHA_AESR_INV == (FP_X_INV << 1));
     60 __CTASSERT(ALPHA_AESR_DZE == (FP_X_DZ  << 1));
     61 __CTASSERT(ALPHA_AESR_OVF == (FP_X_OFL << 1));
     62 __CTASSERT(ALPHA_AESR_UNF == (FP_X_UFL << 1));
     63 __CTASSERT(ALPHA_AESR_INE == (FP_X_IMP << 1));
     64 __CTASSERT(ALPHA_AESR_IOV == (FP_X_IOV << 1));
     65 
     66 __CTASSERT(IEEE_TRAP_ENABLE_INV == (FP_X_INV << 1));
     67 __CTASSERT(IEEE_TRAP_ENABLE_DZE == (FP_X_DZ  << 1));
     68 __CTASSERT(IEEE_TRAP_ENABLE_OVF == (FP_X_OFL << 1));
     69 __CTASSERT(IEEE_TRAP_ENABLE_UNF == (FP_X_UFL << 1));
     70 __CTASSERT(IEEE_TRAP_ENABLE_INE == (FP_X_IMP << 1));
     71 
     72 __CTASSERT((uint64_t)FP_X_IMP << (61 - 3) == FPCR_INED);
     73 __CTASSERT((uint64_t)FP_X_UFL << (61 - 3) == FPCR_UNFD);
     74 __CTASSERT((uint64_t)FP_X_OFL << (49 - 0) == FPCR_OVFD);
     75 __CTASSERT((uint64_t)FP_X_DZ  << (49 - 0) == FPCR_DZED);
     76 __CTASSERT((uint64_t)FP_X_INV << (49 - 0) == FPCR_INVD);
     77 
     78 __CTASSERT(FP_C_ALLBITS == MDLWP_FP_C);
     79 
     80 #define	TSWINSIZE 4	/* size of trap shadow window in uint32_t units */
     81 
     82 /*	Set Name		Opcodes			AARM C.* Symbols  */
     83 
     84 #define	CPUREG_CLASS		(0xfUL << 0x10)		/* INT[ALSM]	  */
     85 #define	FPUREG_CLASS		(0xfUL << 0x14)		/* ITFP, FLT[ILV] */
     86 #define	CHECKFUNCTIONCODE	(1UL << 0x18)		/* MISC		  */
     87 #define	TRAPSHADOWBOUNDARY	(1UL << 0x00 |		/* PAL		  */\
     88 				 1UL << 0x19 |		/* \PAL\	  */\
     89 				 1UL << 0x1a |		/* JSR		  */\
     90 				 1UL << 0x1b |		/* \PAL\	  */\
     91 				 1UL << 0x1d |		/* \PAL\	  */\
     92 				 1UL << 0x1e |		/* \PAL\	  */\
     93 				 1UL << 0x1f |		/* \PAL\	  */\
     94 				 0xffffUL << 0x30 | 	/* branch ops	  */\
     95 				 CHECKFUNCTIONCODE)
     96 
     97 #define	MAKE_FLOATXX(width, expwidth, sign, exp, msb, rest_of_frac) \
     98 	(u_int ## width ## _t)(sign) << ((width) - 1)			|\
     99 	(u_int ## width ## _t)(exp)  << ((width) - 1 - (expwidth))	|\
    100 	(u_int ## width ## _t)(msb)  << ((width) - 1 - (expwidth) - 1)	|\
    101 	(u_int ## width ## _t)(rest_of_frac)
    102 
    103 #define	FLOAT32QNAN MAKE_FLOATXX(32, 8, 0, 0xff, 1, 0)
    104 #define	FLOAT64QNAN MAKE_FLOATXX(64, 11, 0, 0x7ff, 1, 0)
    105 
    106 #define IS_SUBNORMAL(v)	((v)->exp == 0 && (v)->frac != 0)
    107 
    108 #define	PREFILTER_SUBNORMAL(l,v) if ((l)->l_md.md_flags & IEEE_MAP_DMZ	\
    109 				     && IS_SUBNORMAL(v))		\
    110 					 (v)->frac = 0; else
    111 
    112 #define	POSTFILTER_SUBNORMAL(l,v) if ((l)->l_md.md_flags & IEEE_MAP_UMZ	\
    113 				      && IS_SUBNORMAL(v))		\
    114 					  (v)->frac = 0; else
    115 
    116 	/* Alpha returns 2.0 for true, all zeroes for false. */
    117 
    118 #define CMP_RESULT(flag) ((flag) ? 4UL << 60 : 0L)
    119 
    120 	/* Move bits from sw fp_c to hw fpcr. */
    121 
    122 #define	CRBLIT(sw, hw, m, offs) (((sw) & ~(m)) | ((hw) >> (offs) & (m)))
    123 
    124 static struct evcnt fpevent_use;
    125 static struct evcnt fpevent_reuse;
    126 
    127 /*
    128  * Temporary trap shadow instrumentation. The [un]resolved counters
    129  * could be kept permanently, as they provide information on whether
    130  * user code has met AARM trap shadow generation requirements.
    131  */
    132 
    133 static struct evcnt ts_scans;      /* trap shadow scans */
    134 static struct evcnt ts_insns;      /* total scanned insns */
    135 static struct evcnt ts_insns_max;  /* per-scan high water mark */
    136 static struct evcnt ts_resolved;   /* cases trigger pc found */
    137 static struct evcnt ts_unresolved; /* cases it wasn't, code problems? */
    138 
    139 static struct evcnt fp_ill_opc;    /* unexpected op codes */
    140 static struct evcnt fp_ill_func;   /* unexpected function codes */
    141 static struct evcnt fp_ill_anyop;  /* this "cannot happen" */
    142 
    143 static struct evcnt fp_vax;        /* traps from VAX FP insns */
    144 
    145 struct alpha_shadow {
    146 	uint64_t uop;		/* bit mask of unexpected opcodes */
    147 	uint32_t ufunc;		/* bit mask of unexpected functions */
    148 } alpha_shadow;
    149 
    150 static float64 float64_unk(float64, float64);
    151 static float64 compare_un(float64, float64);
    152 static float64 compare_eq(float64, float64);
    153 static float64 compare_lt(float64, float64);
    154 static float64 compare_le(float64, float64);
    155 static void cvt_qs_ts_st_gf_qf(uint32_t, struct lwp *);
    156 static void cvt_gd(uint32_t, struct lwp *);
    157 static void cvt_qt_dg_qg(uint32_t, struct lwp *);
    158 static void cvt_tq_gq(uint32_t, struct lwp *);
    159 
    160 static float32 (*swfp_s[])(float32, float32) = {
    161 	float32_add, float32_sub, float32_mul, float32_div,
    162 };
    163 
    164 static float64 (*swfp_t[])(float64, float64) = {
    165 	float64_add, float64_sub, float64_mul, float64_div,
    166 	compare_un,    compare_eq,    compare_lt,    compare_le,
    167 	float64_unk, float64_unk, float64_unk, float64_unk
    168 };
    169 
    170 static void (*swfp_cvt[])(uint32_t, struct lwp *) = {
    171 	cvt_qs_ts_st_gf_qf, cvt_gd, cvt_qt_dg_qg, cvt_tq_gq
    172 };
    173 
    174 static void
    175 this_cannot_happen(int what_cannot_happen, int64_t bits)
    176 {
    177 	static int total;
    178 	alpha_instruction inst;
    179 	static uint64_t reported;
    180 
    181 	inst.bits = bits;
    182 	atomic_inc_ulong(&fp_ill_func.ev_count);
    183 	if (bits != -1)
    184 		alpha_shadow.uop |= 1UL << inst.generic_format.opcode;
    185 	if (1UL << what_cannot_happen & reported)
    186 		return;
    187 	reported |= 1UL << what_cannot_happen;
    188 	if (total >= 1000)
    189 		return;	/* right now, this return "cannot happen" */
    190 	++total;
    191 	if (bits)
    192 		printf("FP instruction %x\n", (unsigned int)bits);
    193 	printf("FP event %d/%lx/%lx\n", what_cannot_happen, reported,
    194 	    alpha_shadow.uop);
    195 	printf("Please report this to port-alpha-maintainer (at) NetBSD.org\n");
    196 }
    197 
    198 static inline void
    199 sts(unsigned int rn, s_float *v, struct lwp *l)
    200 {
    201 	alpha_sts(rn, v);
    202 	PREFILTER_SUBNORMAL(l, v);
    203 }
    204 
    205 static inline void
    206 stt(unsigned int rn, t_float *v, struct lwp *l)
    207 {
    208 	alpha_stt(rn, v);
    209 	PREFILTER_SUBNORMAL(l, v);
    210 }
    211 
    212 static inline void
    213 lds(unsigned int rn, s_float *v, struct lwp *l)
    214 {
    215 	POSTFILTER_SUBNORMAL(l, v);
    216 	alpha_lds(rn, v);
    217 }
    218 
    219 static inline void
    220 ldt(unsigned int rn, t_float *v, struct lwp *l)
    221 {
    222 	POSTFILTER_SUBNORMAL(l, v);
    223 	alpha_ldt(rn, v);
    224 }
    225 
    226 static float64
    227 compare_lt(float64 a, float64 b)
    228 {
    229 	return CMP_RESULT(float64_lt_quiet(a, b));
    230 }
    231 
    232 static float64
    233 compare_le(float64 a, float64 b)
    234 {
    235 	return CMP_RESULT(float64_le_quiet(a, b));
    236 }
    237 
    238 static float64
    239 compare_un(float64 a, float64 b)
    240 {
    241 	if (float64_is_nan(a) | float64_is_nan(b)) {
    242 		if (float64_is_signaling_nan(a) | float64_is_signaling_nan(b))
    243 			float_set_invalid();
    244 		return CMP_RESULT(1);
    245 	}
    246 	return CMP_RESULT(0);
    247 }
    248 
    249 static float64
    250 compare_eq(float64 a, float64 b)
    251 {
    252 	return CMP_RESULT(float64_eq(a, b));
    253 }
    254 /*
    255  * A note regarding the VAX FP ops.
    256  *
    257  * The AARM gives us complete leeway to set or not set status flags on VAX
    258  * ops, but we do any subnorm, NaN and dirty zero fixups anyway, and we set
    259  * flags by IEEE rules.  Many ops are common to d/f/g and s/t source types.
    260  * For the purely vax ones, it's hard to imagine ever running them.
    261  * (Generated VAX fp ops with completion flags? Hmm.)  We are careful never
    262  * to panic, assert, or print unlimited output based on a path through the
    263  * decoder, so weird cases don't become security issues.
    264  */
    265 static void
    266 cvt_qs_ts_st_gf_qf(uint32_t inst_bits, struct lwp *l)
    267 {
    268 	t_float tfb, tfc;
    269 	s_float sfb, sfc;
    270 	alpha_instruction inst;
    271 
    272 	inst.bits = inst_bits;
    273 	/*
    274 	 * cvtst and cvtts have the same opcode, function, and source.  The
    275 	 * distinction for cvtst is hidden in the illegal modifier combinations.
    276 	 * We decode even the non-/s modifier, so that the fix-up-always mode
    277 	 * works on ev6 and later. The rounding bits are unused and fixed for
    278 	 * cvtst, so we check those too.
    279 	 */
    280 	switch(inst.float_format.function) {
    281 	case op_cvtst:
    282 	case op_cvtst_u:
    283 		sts(inst.float_detail.fb, &sfb, l);
    284 		tfc.i = float32_to_float64(sfb.i);
    285 		ldt(inst.float_detail.fc, &tfc, l);
    286 		return;
    287 	}
    288 	if(inst.float_detail.src == 2) {
    289 		stt(inst.float_detail.fb, &tfb, l);
    290 		sfc.i = float64_to_float32(tfb.i);
    291 		lds(inst.float_detail.fc, &sfc, l);
    292 		return;
    293 	}
    294 	/* 0: S/F */
    295 	/* 1:  /D */
    296 	/* 3: Q/Q */
    297 	this_cannot_happen(5, inst.generic_format.opcode);
    298 	tfc.i = FLOAT64QNAN;
    299 	ldt(inst.float_detail.fc, &tfc, l);
    300 	return;
    301 }
    302 
    303 static void
    304 cvt_gd(uint32_t inst_bits, struct lwp *l)
    305 {
    306 	t_float tfb, tfc;
    307 	alpha_instruction inst;
    308 
    309 	inst.bits = inst_bits;
    310 	stt(inst.float_detail.fb, &tfb, l);
    311 	(void) float64_to_float32(tfb.i);
    312 	l->l_md.md_flags &= ~NETBSD_FLAG_TO_FP_C(FP_X_IMP);
    313 	tfc.i = float64_add(tfb.i, (float64)0);
    314 	ldt(inst.float_detail.fc, &tfc, l);
    315 }
    316 
    317 static void
    318 cvt_qt_dg_qg(uint32_t inst_bits, struct lwp *l)
    319 {
    320 	t_float tfb, tfc;
    321 	alpha_instruction inst;
    322 
    323 	inst.bits = inst_bits;
    324 	switch(inst.float_detail.src) {
    325 	case 0:	/* S/F */
    326 		this_cannot_happen(3, inst.bits);
    327 		/* fall thru */
    328 	case 1: /* D */
    329 		/* VAX dirty 0's and reserved ops => UNPREDICTABLE */
    330 		/* We've done what's important by just not trapping */
    331 		tfc.i = 0;
    332 		break;
    333 	case 2: /* T/G */
    334 		this_cannot_happen(4, inst.bits);
    335 		tfc.i = 0;
    336 		break;
    337 	case 3:	/* Q/Q */
    338 		stt(inst.float_detail.fb, &tfb, l);
    339 		tfc.i = int64_to_float64(tfb.i);
    340 		break;
    341 	}
    342 	alpha_ldt(inst.float_detail.fc, &tfc);
    343 }
    344 /*
    345  * XXX: AARM and 754 seem to disagree here, also, beware of softfloat's
    346  *      unfortunate habit of always returning the nontrapping result.
    347  * XXX: there are several apparent AARM/AAH disagreements, as well as
    348  *      the issue of trap handler pc and trapping results.
    349  */
    350 static void
    351 cvt_tq_gq(uint32_t inst_bits, struct lwp *l)
    352 {
    353 	t_float tfb, tfc;
    354 	alpha_instruction inst;
    355 
    356 	inst.bits = inst_bits;
    357 	stt(inst.float_detail.fb, &tfb, l);
    358 	tfc.i = tfb.sign ? float64_to_int64(tfb.i) : float64_to_uint64(tfb.i);
    359 	alpha_ldt(inst.float_detail.fc, &tfc);	/* yes, ldt */
    360 }
    361 
    362 static uint64_t
    363 fp_c_to_fpcr_1(uint64_t fpcr, uint64_t fp_c)
    364 {
    365 	uint64_t disables;
    366 
    367 	/*
    368 	 * It's hard to arrange for conforming bit fields, because the FP_C
    369 	 * and the FPCR are both architected, with specified (and relatively
    370 	 * scrambled) bit numbers. Defining an internal unscrambled FP_C
    371 	 * wouldn't help much, because every user exception requires the
    372 	 * architected bit order in the sigcontext.
    373 	 *
    374 	 * Programs that fiddle with the fpcr exception bits (instead of fp_c)
    375 	 * will lose, because those bits can be and usually are subsetted;
    376 	 * the official home is in the fp_c. Furthermore, the kernel puts
    377 	 * phony enables (it lies :-) in the fpcr in order to get control when
    378 	 * it is necessary to initially set a sticky bit.
    379 	 */
    380 
    381 	fpcr &= FPCR_DYN_RM;
    382 
    383 	/*
    384 	 * enable traps = case where flag bit is clear AND program wants a trap
    385 	 *
    386 	 * enables = ~flags & mask
    387 	 * disables = ~(~flags | mask)
    388 	 * disables = flags & ~mask. Thank you, Augustus De Morgan (1806-1871)
    389 	 */
    390 	disables = FP_C_TO_NETBSD_FLAG(fp_c) & ~FP_C_TO_NETBSD_MASK(fp_c);
    391 
    392 	fpcr |= (disables & (FP_X_IMP | FP_X_UFL)) << (61 - 3);
    393 	fpcr |= (disables & (FP_X_OFL | FP_X_DZ | FP_X_INV)) << (49 - 0);
    394 
    395 	fpcr |= fp_c & FP_C_MIRRORED << (FPCR_MIR_START - FP_C_MIR_START);
    396 	fpcr |= (fp_c & IEEE_MAP_DMZ) << 36;
    397 	if (fp_c & FP_C_MIRRORED)
    398 		fpcr |= FPCR_SUM;
    399 	if (fp_c & IEEE_MAP_UMZ)
    400 		fpcr |= FPCR_UNDZ | FPCR_UNFD;
    401 	fpcr |= (~fp_c & IEEE_TRAP_ENABLE_DNO) << 41;
    402 	return fpcr;
    403 }
    404 
    405 static void
    406 fp_c_to_fpcr(struct lwp *l)
    407 {
    408 	alpha_write_fpcr(fp_c_to_fpcr_1(alpha_read_fpcr(), l->l_md.md_flags));
    409 }
    410 
    411 void
    412 alpha_write_fp_c(struct lwp *l, uint64_t fp_c)
    413 {
    414 	uint64_t md_flags;
    415 
    416 	fp_c &= MDLWP_FP_C;
    417 	md_flags = l->l_md.md_flags;
    418 	if ((md_flags & MDLWP_FP_C) == fp_c)
    419 		return;
    420 	l->l_md.md_flags = (md_flags & ~MDLWP_FP_C) | fp_c;
    421 	kpreempt_disable();
    422 	if (md_flags & MDLWP_FPACTIVE) {
    423 		alpha_pal_wrfen(1);
    424 		fp_c_to_fpcr(l);
    425 		alpha_pal_wrfen(0);
    426 	} else {
    427 		struct pcb *pcb = l->l_addr;
    428 
    429 		pcb->pcb_fp.fpr_cr =
    430 		    fp_c_to_fpcr_1(pcb->pcb_fp.fpr_cr, l->l_md.md_flags);
    431 	}
    432 	kpreempt_enable();
    433 }
    434 
    435 uint64_t
    436 alpha_read_fp_c(struct lwp *l)
    437 {
    438 	/*
    439 	 * A possibly-desirable EV6-specific optimization would deviate from
    440 	 * the Alpha Architecture spec and keep some FP_C bits in the FPCR,
    441 	 * but in a transparent way. Some of the code for that would need to
    442 	 * go right here.
    443 	 */
    444 	return l->l_md.md_flags & MDLWP_FP_C;
    445 }
    446 
    447 static float64
    448 float64_unk(float64 a, float64 b)
    449 {
    450 	return 0;
    451 }
    452 
    453 /*
    454  * The real function field encodings for IEEE and VAX FP instructions.
    455  *
    456  * Since there is only one operand type field, the cvtXX instructions
    457  * require a variety of special cases, and these have to be analyzed as
    458  * they don't always fit into the field descriptions in AARM section I.
    459  *
    460  * Lots of staring at bits in the appendix shows what's really going on.
    461  *
    462  *	   |	       |
    463  * 15 14 13|12 11 10 09|08 07 06 05
    464  * --------======------============
    465  *  TRAP   : RND : SRC : FUNCTION  :
    466  *  0  0  0:. . .:. . . . . . . . . . . . Imprecise
    467  *  0  0  1|. . .:. . . . . . . . . . . ./U underflow enable (if FP output)
    468  *	   |				 /V overfloat enable (if int output)
    469  *  0  1  0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST
    470  *  0  1  1|. . .:. . . . . . . . . . . . Unsupported
    471  *  1  0  0:. . .:. . . . . . . . . . . ./S software completion (VAX only)
    472  *  1  0  1|. . .:. . . . . . . . . . . ./SU
    473  *	   |				 /SV
    474  *  1  1  0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST/S
    475  *  1  1  1|. . .:. . . . . . . . . . . ./SUI (if FP output)	(IEEE only)
    476  *	   |				 /SVI (if int output)   (IEEE only)
    477  *  S  I  UV: In other words: bits 15:13 are S:I:UV, except that _usually_
    478  *	   |  not all combinations are valid.
    479  *	   |	       |
    480  * 15 14 13|12 11 10 09|08 07 06 05
    481  * --------======------============
    482  *  TRAP   : RND : SRC : FUNCTION  :
    483  *	   | 0	0 . . . . . . . . . . . ./C Chopped
    484  *	   : 0	1 . . . . . . . . . . . ./M Minus Infinity
    485  *	   | 1	0 . . . . . . . . . . . .   Normal
    486  *	   : 1	1 . . . . . . . . . . . ./D Dynamic (in FPCR: Plus Infinity)
    487  *	   |	       |
    488  * 15 14 13|12 11 10 09|08 07 06 05
    489  * --------======------============
    490  *  TRAP   : RND : SRC : FUNCTION  :
    491  *		   0 0. . . . . . . . . . S/F
    492  *		   0 1. . . . . . . . . . -/D
    493  *		   1 0. . . . . . . . . . T/G
    494  *		   1 1. . . . . . . . . . Q/Q
    495  *	   |	       |
    496  * 15 14 13|12 11 10 09|08 07 06 05
    497  * --------======------============
    498  *  TRAP   : RND : SRC : FUNCTION  :
    499  *			 0  0  0  0 . . . addX
    500  *			 0  0  0  1 . . . subX
    501  *			 0  0  1  0 . . . mulX
    502  *			 0  0  1  1 . . . divX
    503  *			 0  1  0  0 . . . cmpXun
    504  *			 0  1  0  1 . . . cmpXeq
    505  *			 0  1  1  0 . . . cmpXlt
    506  *			 0  1  1  1 . . . cmpXle
    507  *			 1  0  0  0 . . . reserved
    508  *			 1  0  0  1 . . . reserved
    509  *			 1  0  1  0 . . . sqrt[fg] (op_fix, not exactly "vax")
    510  *			 1  0  1  1 . . . sqrt[st] (op_fix, not exactly "ieee")
    511  *			 1  1  0  0 . . . cvtXs/f (cvt[qt]s, cvtst(!), cvt[gq]f)
    512  *			 1  1  0  1 . . . cvtXd   (vax only)
    513  *			 1  1  1  0 . . . cvtXt/g (cvtqt, cvt[dq]g only)
    514  *			 1  1  1  1 . . . cvtXq/q (cvttq, cvtgq)
    515  *	   |	       |
    516  * 15 14 13|12 11 10 09|08 07 06 05	  the twilight zone
    517  * --------======------============
    518  *  TRAP   : RND : SRC : FUNCTION  :
    519  * /s /i /u  x  x  1  0  1  1  0  0 . . . cvtts, /siu only 0, 1, 5, 7
    520  *  0  1  0  1  0  1  0  1  1  0  0 . . . cvtst   (src == T (!)) 2ac NOT /S
    521  *  1  1  0  1  0  1  0  1  1  0  0 . . . cvtst/s (src == T (!)) 6ac
    522  *  x  0  x  x  x  x  0	 1  1  1  1 . . . cvttq/_ (src == T)
    523  */
    524 
    525 static void
    526 print_fp_instruction(unsigned long pc, struct lwp *l, uint32_t bits)
    527 {
    528 #if defined(DDB)
    529 	char buf[32];
    530 	struct alpha_print_instruction_context ctx = {
    531 		.insn.bits = bits,
    532 		.pc = pc,
    533 		.buf = buf,
    534 		.bufsize = sizeof(buf),
    535 	};
    536 
    537 	(void) alpha_print_instruction(&ctx);
    538 
    539 	printf("INSN [%s:%d] @0x%lx -> %s\n",
    540 	    l->l_proc->p_comm, l->l_proc->p_pid, ctx.pc, ctx.buf);
    541 #else
    542 	alpha_instruction insn = {
    543 		.bits = bits,
    544 	};
    545 	printf("INSN [%s:%d] @0x%lx -> opc=0x%x func=0x%x fa=%d fb=%d fc=%d\n",
    546 	    l->l_proc->p_comm, l->l_proc->p_pid, (unsigned long)pc,
    547 	    insn.float_format.opcode, insn.float_format.function,
    548 	    insn.float_format.fa, insn.float_format.fb, insn.float_format.fc);
    549 	printf("INSN [%s:%d] @0x%lx -> trp=0x%x rnd=0x%x src=0x%x fn=0x%x\n",
    550 	    l->l_proc->p_comm, l->l_proc->p_pid, (unsigned long)pc,
    551 	    insn.float_detail.trp, insn.float_detail.rnd,
    552 	    insn.float_detail.src, insn.float_detail.opclass);
    553 #endif /* DDB */
    554 }
    555 
    556 static void
    557 alpha_fp_interpret(unsigned long pc, struct lwp *l, uint32_t bits)
    558 {
    559 	s_float sfa, sfb, sfc;
    560 	t_float tfa, tfb, tfc;
    561 	alpha_instruction inst;
    562 
    563 	if (alpha_fp_complete_debug) {
    564 		print_fp_instruction(pc, l, bits);
    565 	}
    566 
    567 	inst.bits = bits;
    568 	switch(inst.generic_format.opcode) {
    569 	default:
    570 		/* this "cannot happen" */
    571 		atomic_inc_ulong(&fp_ill_opc.ev_count);
    572 		this_cannot_happen(2, inst.bits);
    573 		return;
    574 	case op_any_float:
    575 		if (inst.float_format.function == op_cvtql_sv ||
    576 		    inst.float_format.function == op_cvtql_v) {
    577 			alpha_stt(inst.float_detail.fb, &tfb);
    578 			sfc.i = (int64_t)tfb.i >= 0L ? INT_MAX : INT_MIN;
    579 			alpha_lds(inst.float_detail.fc, &sfc);
    580 			float_raise(FP_X_INV);
    581 		} else {
    582 			atomic_inc_ulong(&fp_ill_anyop.ev_count);
    583 			this_cannot_happen(3, inst.bits);
    584 		}
    585 		break;
    586 	case op_vax_float:
    587 		atomic_inc_ulong(&fp_vax.ev_count);
    588 		/* FALLTHROUGH */		/* XXX */
    589 	case op_ieee_float:
    590 	case op_fix_float:
    591 		switch(inst.float_detail.src) {
    592 		case op_src_sf:
    593 			sts(inst.float_detail.fb, &sfb, l);
    594 			if (inst.float_detail.opclass == 11)
    595 				sfc.i = float32_sqrt(sfb.i);
    596 			else if (inst.float_detail.opclass & ~3) {
    597 				this_cannot_happen(1, inst.bits);
    598 				sfc.i = FLOAT32QNAN;
    599 			} else {
    600 				sts(inst.float_detail.fa, &sfa, l);
    601 				sfc.i = (*swfp_s[inst.float_detail.opclass])(
    602 				    sfa.i, sfb.i);
    603 			}
    604 			lds(inst.float_detail.fc, &sfc, l);
    605 			break;
    606 		case op_src_xd:
    607 		case op_src_tg:
    608 			if (inst.float_detail.opclass >= 12)
    609 				(*swfp_cvt[inst.float_detail.opclass - 12])(
    610 				    inst.bits, l);
    611 			else {
    612 				stt(inst.float_detail.fb, &tfb, l);
    613 				if (inst.float_detail.opclass == 11)
    614 					tfc.i = float64_sqrt(tfb.i);
    615 				else {
    616 					stt(inst.float_detail.fa, &tfa, l);
    617 					tfc.i = (*swfp_t[inst.float_detail
    618 					    .opclass])(tfa.i, tfb.i);
    619 				}
    620 				ldt(inst.float_detail.fc, &tfc, l);
    621 			}
    622 			break;
    623 		case op_src_qq:
    624 			float_raise(FP_X_IMP);
    625 			break;
    626 		}
    627 	}
    628 }
    629 
    630 int
    631 alpha_fp_complete_at(unsigned long trigger_pc, struct lwp *l, uint64_t *ucode)
    632 {
    633 	int needsig;
    634 	alpha_instruction inst;
    635 	uint64_t rm, fpcr, orig_fpcr;
    636 	uint64_t orig_flags, new_flags, changed_flags, md_flags;
    637 
    638 	if (__predict_false(ufetch_32((void *)trigger_pc, &inst.bits))) {
    639 		this_cannot_happen(6, -1);
    640 		return SIGSEGV;
    641 	}
    642 	kpreempt_disable();
    643 	if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) {
    644 		fpu_load();
    645 	}
    646 	alpha_pal_wrfen(1);
    647 	/*
    648 	 * Alpha FLOAT instructions can override the rounding mode on a
    649 	 * per-instruction basis.  If necessary, lie about the dynamic
    650 	 * rounding mode so emulation software need go to only one place
    651 	 * for it, and so we don't have to lock any memory locations or
    652 	 * pass a third parameter to every SoftFloat entry point.
    653 	 *
    654 	 * N.B. the rounding mode field of the FLOAT format instructions
    655 	 * matches that of the FPCR *except* for the value 3, which means
    656 	 * "dynamic" rounding mode (i.e. what is programmed into the FPCR).
    657 	 */
    658 	orig_fpcr = fpcr = alpha_read_fpcr();
    659 	rm = inst.float_detail.rnd;
    660 	if (__predict_false(rm != 3 /* dynamic */ &&
    661 			    rm != __SHIFTOUT(fpcr, FPCR_DYN_RM))) {
    662 		fpcr = (fpcr & ~FPCR_DYN_RM) | __SHIFTIN(rm, FPCR_DYN_RM);
    663 		alpha_write_fpcr(fpcr);
    664 	}
    665 	orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags);
    666 
    667 	alpha_fp_interpret(trigger_pc, l, inst.bits);
    668 
    669 	md_flags = l->l_md.md_flags;
    670 
    671 	new_flags = FP_C_TO_NETBSD_FLAG(md_flags);
    672 	changed_flags = orig_flags ^ new_flags;
    673 	KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */
    674 	alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, md_flags));
    675 	needsig = changed_flags & FP_C_TO_NETBSD_MASK(md_flags);
    676 	alpha_pal_wrfen(0);
    677 	kpreempt_enable();
    678 	if (__predict_false(needsig)) {
    679 		*ucode = needsig;
    680 		return SIGFPE;
    681 	}
    682 	return 0;
    683 }
    684 
    685 int
    686 alpha_fp_complete(u_long a0, u_long a1, struct lwp *l, uint64_t *ucode)
    687 {
    688 	uint64_t op_class;
    689 	alpha_instruction inst;
    690 	/* "trigger_pc" is Compaq's term for the earliest faulting op */
    691 	alpha_instruction *trigger_pc, *usertrap_pc;
    692 	alpha_instruction *pc, *win_begin, tsw[TSWINSIZE];
    693 	long insn_count = 0;
    694 	int sig;
    695 
    696 	if (alpha_fp_complete_debug) {
    697 		printf("%s: [%s:%d] a0[AESR]=0x%lx a1[regmask]=0x%lx "
    698 		       "FPCR=0x%lx FP_C=0x%lx\n",
    699 		    __func__, l->l_proc->p_comm, l->l_proc->p_pid,
    700 		    a0, a1, alpha_read_fpcr(),
    701 		    l->l_md.md_flags & (MDLWP_FP_C|MDLWP_FPACTIVE));
    702 	}
    703 
    704 	pc = (alpha_instruction *)l->l_md.md_tf->tf_regs[FRAME_PC];
    705 	trigger_pc = pc - 1;	/* for ALPHA_AMASK_PAT case */
    706 
    707 	/*
    708 	 * Start out with the code mirroring the exception flags
    709 	 * (FP_X_*).  Shift right 1 bit to discard SWC to achieve
    710 	 * this.
    711 	 */
    712 	*ucode = a0 >> 1;
    713 
    714 	if (cpu_amask & ALPHA_AMASK_PAT) {
    715 		if ((a0 & (ALPHA_AESR_SWC | ALPHA_AESR_INV)) != 0 ||
    716 		    alpha_fp_sync_complete) {
    717 			sig = alpha_fp_complete_at((u_long)trigger_pc, l,
    718 			    ucode);
    719 			goto resolved;
    720 		}
    721 	}
    722 	if ((a0 & (ALPHA_AESR_SWC | ALPHA_AESR_INV)) == 0)
    723 		goto unresolved;
    724 /*
    725  * At this point we are somewhere in the trap shadow of one or more instruc-
    726  * tions that have trapped with software completion specified.  We have a mask
    727  * of the registers written by trapping instructions.
    728  *
    729  * Now step backwards through the trap shadow, clearing bits in the
    730  * destination write mask until the trigger instruction is found, and
    731  * interpret this one instruction in SW. If a SIGFPE is not required, back up
    732  * the PC until just after this instruction and restart. This will execute all
    733  * trap shadow instructions between the trigger pc and the trap pc twice.
    734  */
    735 	trigger_pc = 0;
    736 	win_begin = pc;
    737 	atomic_inc_ulong(&ts_scans.ev_count);
    738 	for (--pc; a1; --pc) {
    739 		insn_count++;
    740 		if (pc < win_begin) {
    741 			win_begin = pc - TSWINSIZE + 1;
    742 			if (copyin(win_begin, tsw, sizeof tsw)) {
    743 				/* sigh, try to get just one */
    744 				win_begin = pc;
    745 				if (copyin(win_begin, tsw, 4)) {
    746 					/*
    747 					 * We're off the rails here; don't
    748 					 * bother updating the FP_C.
    749 					 */
    750 					return SIGSEGV;
    751 				}
    752 			}
    753 		}
    754 		assert(win_begin <= pc && !((long)pc  & 3));
    755 		inst = tsw[pc - win_begin];
    756 		op_class = 1UL << inst.generic_format.opcode;
    757 		if (op_class & FPUREG_CLASS) {
    758 			a1 &= ~(1UL << (inst.operate_generic_format.rc + 32));
    759 			trigger_pc = pc;
    760 		} else if (op_class & CPUREG_CLASS) {
    761 			a1 &= ~(1UL << inst.operate_generic_format.rc);
    762 			trigger_pc = pc;
    763 		} else if (op_class & TRAPSHADOWBOUNDARY) {
    764 			if (op_class & CHECKFUNCTIONCODE) {
    765 				if (inst.mem_format.displacement == op_trapb ||
    766 				    inst.mem_format.displacement == op_excb)
    767 					break;	/* code breaks AARM rules */
    768 			} else
    769 				break; /* code breaks AARM rules */
    770 		}
    771 		/* Some shadow-safe op, probably load, store, or FPTI class */
    772 	}
    773 	if (insn_count > atomic_load_relaxed(&ts_insns_max.ev_count)) {
    774 		atomic_store_relaxed(&ts_insns_max.ev_count, insn_count);
    775 	}
    776 	atomic_add_long(&ts_insns.ev_count, insn_count);
    777 	if (__predict_true(trigger_pc != 0 && a1 == 0)) {
    778 		atomic_inc_ulong(&ts_resolved.ev_count);
    779 		sig = alpha_fp_complete_at((u_long)trigger_pc, l, ucode);
    780 		goto resolved;
    781 	} else {
    782 		atomic_inc_ulong(&ts_unresolved.ev_count);
    783 	}
    784 
    785  unresolved: /* obligatory statement */;
    786 	/*
    787 	 * *ucode contains the exception bits (FP_X_*).  We need to
    788 	 * update the FP_C and FPCR, and send a signal for any new
    789 	 * trap that is enabled.
    790 	 */
    791 	uint64_t orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags);
    792 	uint64_t new_flags = orig_flags | *ucode;
    793 	uint64_t changed_flags = orig_flags ^ new_flags;
    794 	KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */
    795 
    796 	l->l_md.md_flags |= NETBSD_FLAG_TO_FP_C(new_flags);
    797 
    798 	kpreempt_disable();
    799 	if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) {
    800 		fpu_load();
    801 	}
    802 	alpha_pal_wrfen(1);
    803 	uint64_t orig_fpcr = alpha_read_fpcr();
    804 	alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, l->l_md.md_flags));
    805 	uint64_t needsig =
    806 	    changed_flags & FP_C_TO_NETBSD_MASK(l->l_md.md_flags);
    807 	alpha_pal_wrfen(0);
    808 	kpreempt_enable();
    809 
    810 	if (__predict_false(needsig)) {
    811 		*ucode = needsig;
    812 		return SIGFPE;
    813 	}
    814 	return 0;
    815 
    816  resolved:
    817 	if (sig) {
    818 		usertrap_pc = trigger_pc + 1;
    819 		l->l_md.md_tf->tf_regs[FRAME_PC] = (unsigned long)usertrap_pc;
    820 	}
    821 	return sig;
    822 }
    823 
    824 /*
    825  * Initialize FP handling.
    826  */
    827 void
    828 alpha_fp_init(void)
    829 {
    830 	evcnt_attach_dynamic_nozero(&fpevent_use, EVCNT_TYPE_MISC, NULL,
    831 	    "FP", "proc use");
    832 	evcnt_attach_dynamic_nozero(&fpevent_reuse, EVCNT_TYPE_MISC, NULL,
    833 	    "FP", "proc re-use");
    834 
    835 	evcnt_attach_dynamic_nozero(&ts_scans, EVCNT_TYPE_MISC, NULL,
    836 	    "FP", "TS scans");
    837 	evcnt_attach_dynamic_nozero(&ts_insns, EVCNT_TYPE_MISC, NULL,
    838 	    "FP", "TS total insns");
    839 	evcnt_attach_dynamic_nozero(&ts_insns_max, EVCNT_TYPE_MISC, NULL,
    840 	    "FP", "TS max single-scan insns");
    841 	evcnt_attach_dynamic_nozero(&ts_resolved, EVCNT_TYPE_MISC, NULL,
    842 	    "FP", "TS resolved");
    843 	evcnt_attach_dynamic_nozero(&ts_unresolved, EVCNT_TYPE_MISC, NULL,
    844 	    "FP", "TS unresolved");
    845 
    846 	evcnt_attach_dynamic_nozero(&fp_ill_opc, EVCNT_TYPE_MISC, NULL,
    847 	    "FP", "illegal op code");
    848 	evcnt_attach_dynamic_nozero(&fp_ill_func, EVCNT_TYPE_MISC, NULL,
    849 	    "FP", "illegal function code");
    850 	evcnt_attach_dynamic_nozero(&fp_ill_anyop, EVCNT_TYPE_MISC, NULL,
    851 	    "FP", "illegal any_float function code");
    852 }
    853 
    854 /*
    855  * Load the float-point context for the current lwp.
    856  */
    857 void
    858 fpu_state_load(struct lwp *l, u_int flags)
    859 {
    860 	struct pcb * const pcb = lwp_getpcb(l);
    861 	KASSERT(l == curlwp);
    862 
    863 #ifdef MULTIPROCESSOR
    864 	/*
    865 	 * If the LWP got switched to another CPU, pcu_switchpoint would have
    866 	 * called state_release to clear MDLWP_FPACTIVE.  Now that we are back
    867 	 * on the CPU that has our FP context, set MDLWP_FPACTIVE again.
    868 	 */
    869 	if (flags & PCU_REENABLE) {
    870 		KASSERT(flags & PCU_VALID);
    871 		l->l_md.md_flags |= MDLWP_FPACTIVE;
    872 		return;
    873 	}
    874 #else
    875 	KASSERT((flags & PCU_REENABLE) == 0);
    876 #endif
    877 
    878 	/*
    879 	 * Instrument FP usage -- if a process had not previously
    880 	 * used FP, mark it as having used FP for the first time,
    881 	 * and count this event.
    882 	 *
    883 	 * If a process has used FP, count a "used FP, and took
    884 	 * a trap to use it again" event.
    885 	 */
    886 	if ((flags & PCU_VALID) == 0) {
    887 		atomic_inc_ulong(&fpevent_use.ev_count);
    888 	} else {
    889 		atomic_inc_ulong(&fpevent_reuse.ev_count);
    890 	}
    891 
    892 	if (alpha_fp_complete_debug) {
    893 		printf("%s: [%s:%d] loading FPCR=0x%lx\n",
    894 		    __func__, l->l_proc->p_comm, l->l_proc->p_pid,
    895 		    pcb->pcb_fp.fpr_cr);
    896 	}
    897 	alpha_pal_wrfen(1);
    898 	restorefpstate(&pcb->pcb_fp);
    899 	alpha_pal_wrfen(0);
    900 
    901 	l->l_md.md_flags |= MDLWP_FPACTIVE;
    902 }
    903 
    904 /*
    905  * Save the FPU state.
    906  */
    907 
    908 void
    909 fpu_state_save(struct lwp *l)
    910 {
    911 	struct pcb * const pcb = lwp_getpcb(l);
    912 
    913 	alpha_pal_wrfen(1);
    914 	savefpstate(&pcb->pcb_fp);
    915 	alpha_pal_wrfen(0);
    916 	if (alpha_fp_complete_debug) {
    917 		printf("%s: [%s:%d] saved FPCR=0x%lx\n",
    918 		    __func__, l->l_proc->p_comm, l->l_proc->p_pid,
    919 		    pcb->pcb_fp.fpr_cr);
    920 	}
    921 }
    922 
    923 /*
    924  * Release the FPU.
    925  */
    926 void
    927 fpu_state_release(struct lwp *l)
    928 {
    929 	l->l_md.md_flags &= ~MDLWP_FPACTIVE;
    930 }
    931