1 /* $NetBSD: fp_complete.c,v 1.33 2025/03/16 22:34:36 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 2001 Ross Harvey 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by the NetBSD 18 * Foundation, Inc. and its contributors. 19 * 4. Neither the name of The NetBSD Foundation nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 * POSSIBILITY OF SUCH DAMAGE. 34 */ 35 36 #include "opt_ddb.h" 37 38 #include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ 39 40 __KERNEL_RCSID(0, "$NetBSD: fp_complete.c,v 1.33 2025/03/16 22:34:36 thorpej Exp $"); 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/proc.h> 45 #include <sys/atomic.h> 46 #include <sys/evcnt.h> 47 48 #include <machine/cpu.h> 49 #include <machine/fpu.h> 50 #include <machine/reg.h> 51 #include <machine/alpha.h> 52 #include <machine/alpha_instruction.h> 53 54 #include <lib/libkern/softfloat.h> 55 56 /* 57 * Validate our assumptions about bit positions. 58 */ 59 __CTASSERT(ALPHA_AESR_INV == (FP_X_INV << 1)); 60 __CTASSERT(ALPHA_AESR_DZE == (FP_X_DZ << 1)); 61 __CTASSERT(ALPHA_AESR_OVF == (FP_X_OFL << 1)); 62 __CTASSERT(ALPHA_AESR_UNF == (FP_X_UFL << 1)); 63 __CTASSERT(ALPHA_AESR_INE == (FP_X_IMP << 1)); 64 __CTASSERT(ALPHA_AESR_IOV == (FP_X_IOV << 1)); 65 66 __CTASSERT(IEEE_TRAP_ENABLE_INV == (FP_X_INV << 1)); 67 __CTASSERT(IEEE_TRAP_ENABLE_DZE == (FP_X_DZ << 1)); 68 __CTASSERT(IEEE_TRAP_ENABLE_OVF == (FP_X_OFL << 1)); 69 __CTASSERT(IEEE_TRAP_ENABLE_UNF == (FP_X_UFL << 1)); 70 __CTASSERT(IEEE_TRAP_ENABLE_INE == (FP_X_IMP << 1)); 71 72 __CTASSERT((uint64_t)FP_X_IMP << (61 - 3) == FPCR_INED); 73 __CTASSERT((uint64_t)FP_X_UFL << (61 - 3) == FPCR_UNFD); 74 __CTASSERT((uint64_t)FP_X_OFL << (49 - 0) == FPCR_OVFD); 75 __CTASSERT((uint64_t)FP_X_DZ << (49 - 0) == FPCR_DZED); 76 __CTASSERT((uint64_t)FP_X_INV << (49 - 0) == FPCR_INVD); 77 78 __CTASSERT(FP_C_ALLBITS == MDLWP_FP_C); 79 80 #define TSWINSIZE 4 /* size of trap shadow window in uint32_t units */ 81 82 /* Set Name Opcodes AARM C.* Symbols */ 83 84 #define CPUREG_CLASS (0xfUL << 0x10) /* INT[ALSM] */ 85 #define FPUREG_CLASS (0xfUL << 0x14) /* ITFP, FLT[ILV] */ 86 #define CHECKFUNCTIONCODE (1UL << 0x18) /* MISC */ 87 #define TRAPSHADOWBOUNDARY (1UL << 0x00 | /* PAL */\ 88 1UL << 0x19 | /* \PAL\ */\ 89 1UL << 0x1a | /* JSR */\ 90 1UL << 0x1b | /* \PAL\ */\ 91 1UL << 0x1d | /* \PAL\ */\ 92 1UL << 0x1e | /* \PAL\ */\ 93 1UL << 0x1f | /* \PAL\ */\ 94 0xffffUL << 0x30 | /* branch ops */\ 95 CHECKFUNCTIONCODE) 96 97 #define MAKE_FLOATXX(width, expwidth, sign, exp, msb, rest_of_frac) \ 98 (u_int ## width ## _t)(sign) << ((width) - 1) |\ 99 (u_int ## width ## _t)(exp) << ((width) - 1 - (expwidth)) |\ 100 (u_int ## width ## _t)(msb) << ((width) - 1 - (expwidth) - 1) |\ 101 (u_int ## width ## _t)(rest_of_frac) 102 103 #define FLOAT32QNAN MAKE_FLOATXX(32, 8, 0, 0xff, 1, 0) 104 #define FLOAT64QNAN MAKE_FLOATXX(64, 11, 0, 0x7ff, 1, 0) 105 106 #define IS_SUBNORMAL(v) ((v)->exp == 0 && (v)->frac != 0) 107 108 #define PREFILTER_SUBNORMAL(l,v) if ((l)->l_md.md_flags & IEEE_MAP_DMZ \ 109 && IS_SUBNORMAL(v)) \ 110 (v)->frac = 0; else 111 112 #define POSTFILTER_SUBNORMAL(l,v) if ((l)->l_md.md_flags & IEEE_MAP_UMZ \ 113 && IS_SUBNORMAL(v)) \ 114 (v)->frac = 0; else 115 116 /* Alpha returns 2.0 for true, all zeroes for false. */ 117 118 #define CMP_RESULT(flag) ((flag) ? 4UL << 60 : 0L) 119 120 /* Move bits from sw fp_c to hw fpcr. */ 121 122 #define CRBLIT(sw, hw, m, offs) (((sw) & ~(m)) | ((hw) >> (offs) & (m))) 123 124 static struct evcnt fpevent_use; 125 static struct evcnt fpevent_reuse; 126 127 /* 128 * Temporary trap shadow instrumentation. The [un]resolved counters 129 * could be kept permanently, as they provide information on whether 130 * user code has met AARM trap shadow generation requirements. 131 */ 132 133 static struct evcnt ts_scans; /* trap shadow scans */ 134 static struct evcnt ts_insns; /* total scanned insns */ 135 static struct evcnt ts_insns_max; /* per-scan high water mark */ 136 static struct evcnt ts_resolved; /* cases trigger pc found */ 137 static struct evcnt ts_unresolved; /* cases it wasn't, code problems? */ 138 139 static struct evcnt fp_ill_opc; /* unexpected op codes */ 140 static struct evcnt fp_ill_func; /* unexpected function codes */ 141 static struct evcnt fp_ill_anyop; /* this "cannot happen" */ 142 143 static struct evcnt fp_vax; /* traps from VAX FP insns */ 144 145 struct alpha_shadow { 146 uint64_t uop; /* bit mask of unexpected opcodes */ 147 uint32_t ufunc; /* bit mask of unexpected functions */ 148 } alpha_shadow; 149 150 static float64 float64_unk(float64, float64); 151 static float64 compare_un(float64, float64); 152 static float64 compare_eq(float64, float64); 153 static float64 compare_lt(float64, float64); 154 static float64 compare_le(float64, float64); 155 static void cvt_qs_ts_st_gf_qf(uint32_t, struct lwp *); 156 static void cvt_gd(uint32_t, struct lwp *); 157 static void cvt_qt_dg_qg(uint32_t, struct lwp *); 158 static void cvt_tq_gq(uint32_t, struct lwp *); 159 160 static float32 (*swfp_s[])(float32, float32) = { 161 float32_add, float32_sub, float32_mul, float32_div, 162 }; 163 164 static float64 (*swfp_t[])(float64, float64) = { 165 float64_add, float64_sub, float64_mul, float64_div, 166 compare_un, compare_eq, compare_lt, compare_le, 167 float64_unk, float64_unk, float64_unk, float64_unk 168 }; 169 170 static void (*swfp_cvt[])(uint32_t, struct lwp *) = { 171 cvt_qs_ts_st_gf_qf, cvt_gd, cvt_qt_dg_qg, cvt_tq_gq 172 }; 173 174 static void 175 this_cannot_happen(int what_cannot_happen, int64_t bits) 176 { 177 static int total; 178 alpha_instruction inst; 179 static uint64_t reported; 180 181 inst.bits = bits; 182 atomic_inc_ulong(&fp_ill_func.ev_count); 183 if (bits != -1) 184 alpha_shadow.uop |= 1UL << inst.generic_format.opcode; 185 if (1UL << what_cannot_happen & reported) 186 return; 187 reported |= 1UL << what_cannot_happen; 188 if (total >= 1000) 189 return; /* right now, this return "cannot happen" */ 190 ++total; 191 if (bits) 192 printf("FP instruction %x\n", (unsigned int)bits); 193 printf("FP event %d/%lx/%lx\n", what_cannot_happen, reported, 194 alpha_shadow.uop); 195 printf("Please report this to port-alpha-maintainer (at) NetBSD.org\n"); 196 } 197 198 static inline void 199 sts(unsigned int rn, s_float *v, struct lwp *l) 200 { 201 alpha_sts(rn, v); 202 PREFILTER_SUBNORMAL(l, v); 203 } 204 205 static inline void 206 stt(unsigned int rn, t_float *v, struct lwp *l) 207 { 208 alpha_stt(rn, v); 209 PREFILTER_SUBNORMAL(l, v); 210 } 211 212 static inline void 213 lds(unsigned int rn, s_float *v, struct lwp *l) 214 { 215 POSTFILTER_SUBNORMAL(l, v); 216 alpha_lds(rn, v); 217 } 218 219 static inline void 220 ldt(unsigned int rn, t_float *v, struct lwp *l) 221 { 222 POSTFILTER_SUBNORMAL(l, v); 223 alpha_ldt(rn, v); 224 } 225 226 static float64 227 compare_lt(float64 a, float64 b) 228 { 229 return CMP_RESULT(float64_lt_quiet(a, b)); 230 } 231 232 static float64 233 compare_le(float64 a, float64 b) 234 { 235 return CMP_RESULT(float64_le_quiet(a, b)); 236 } 237 238 static float64 239 compare_un(float64 a, float64 b) 240 { 241 if (float64_is_nan(a) | float64_is_nan(b)) { 242 if (float64_is_signaling_nan(a) | float64_is_signaling_nan(b)) 243 float_set_invalid(); 244 return CMP_RESULT(1); 245 } 246 return CMP_RESULT(0); 247 } 248 249 static float64 250 compare_eq(float64 a, float64 b) 251 { 252 return CMP_RESULT(float64_eq(a, b)); 253 } 254 /* 255 * A note regarding the VAX FP ops. 256 * 257 * The AARM gives us complete leeway to set or not set status flags on VAX 258 * ops, but we do any subnorm, NaN and dirty zero fixups anyway, and we set 259 * flags by IEEE rules. Many ops are common to d/f/g and s/t source types. 260 * For the purely vax ones, it's hard to imagine ever running them. 261 * (Generated VAX fp ops with completion flags? Hmm.) We are careful never 262 * to panic, assert, or print unlimited output based on a path through the 263 * decoder, so weird cases don't become security issues. 264 */ 265 static void 266 cvt_qs_ts_st_gf_qf(uint32_t inst_bits, struct lwp *l) 267 { 268 t_float tfb, tfc; 269 s_float sfb, sfc; 270 alpha_instruction inst; 271 272 inst.bits = inst_bits; 273 /* 274 * cvtst and cvtts have the same opcode, function, and source. The 275 * distinction for cvtst is hidden in the illegal modifier combinations. 276 * We decode even the non-/s modifier, so that the fix-up-always mode 277 * works on ev6 and later. The rounding bits are unused and fixed for 278 * cvtst, so we check those too. 279 */ 280 switch(inst.float_format.function) { 281 case op_cvtst: 282 case op_cvtst_u: 283 sts(inst.float_detail.fb, &sfb, l); 284 tfc.i = float32_to_float64(sfb.i); 285 ldt(inst.float_detail.fc, &tfc, l); 286 return; 287 } 288 if(inst.float_detail.src == 2) { 289 stt(inst.float_detail.fb, &tfb, l); 290 sfc.i = float64_to_float32(tfb.i); 291 lds(inst.float_detail.fc, &sfc, l); 292 return; 293 } 294 /* 0: S/F */ 295 /* 1: /D */ 296 /* 3: Q/Q */ 297 this_cannot_happen(5, inst.generic_format.opcode); 298 tfc.i = FLOAT64QNAN; 299 ldt(inst.float_detail.fc, &tfc, l); 300 return; 301 } 302 303 static void 304 cvt_gd(uint32_t inst_bits, struct lwp *l) 305 { 306 t_float tfb, tfc; 307 alpha_instruction inst; 308 309 inst.bits = inst_bits; 310 stt(inst.float_detail.fb, &tfb, l); 311 (void) float64_to_float32(tfb.i); 312 l->l_md.md_flags &= ~NETBSD_FLAG_TO_FP_C(FP_X_IMP); 313 tfc.i = float64_add(tfb.i, (float64)0); 314 ldt(inst.float_detail.fc, &tfc, l); 315 } 316 317 static void 318 cvt_qt_dg_qg(uint32_t inst_bits, struct lwp *l) 319 { 320 t_float tfb, tfc; 321 alpha_instruction inst; 322 323 inst.bits = inst_bits; 324 switch(inst.float_detail.src) { 325 case 0: /* S/F */ 326 this_cannot_happen(3, inst.bits); 327 /* fall thru */ 328 case 1: /* D */ 329 /* VAX dirty 0's and reserved ops => UNPREDICTABLE */ 330 /* We've done what's important by just not trapping */ 331 tfc.i = 0; 332 break; 333 case 2: /* T/G */ 334 this_cannot_happen(4, inst.bits); 335 tfc.i = 0; 336 break; 337 case 3: /* Q/Q */ 338 stt(inst.float_detail.fb, &tfb, l); 339 tfc.i = int64_to_float64(tfb.i); 340 break; 341 } 342 alpha_ldt(inst.float_detail.fc, &tfc); 343 } 344 /* 345 * XXX: AARM and 754 seem to disagree here, also, beware of softfloat's 346 * unfortunate habit of always returning the nontrapping result. 347 * XXX: there are several apparent AARM/AAH disagreements, as well as 348 * the issue of trap handler pc and trapping results. 349 */ 350 static void 351 cvt_tq_gq(uint32_t inst_bits, struct lwp *l) 352 { 353 t_float tfb, tfc; 354 alpha_instruction inst; 355 356 inst.bits = inst_bits; 357 stt(inst.float_detail.fb, &tfb, l); 358 tfc.i = tfb.sign ? float64_to_int64(tfb.i) : float64_to_uint64(tfb.i); 359 alpha_ldt(inst.float_detail.fc, &tfc); /* yes, ldt */ 360 } 361 362 static uint64_t 363 fp_c_to_fpcr_1(uint64_t fpcr, uint64_t fp_c) 364 { 365 uint64_t disables; 366 367 /* 368 * It's hard to arrange for conforming bit fields, because the FP_C 369 * and the FPCR are both architected, with specified (and relatively 370 * scrambled) bit numbers. Defining an internal unscrambled FP_C 371 * wouldn't help much, because every user exception requires the 372 * architected bit order in the sigcontext. 373 * 374 * Programs that fiddle with the fpcr exception bits (instead of fp_c) 375 * will lose, because those bits can be and usually are subsetted; 376 * the official home is in the fp_c. Furthermore, the kernel puts 377 * phony enables (it lies :-) in the fpcr in order to get control when 378 * it is necessary to initially set a sticky bit. 379 */ 380 381 fpcr &= FPCR_DYN_RM; 382 383 /* 384 * enable traps = case where flag bit is clear AND program wants a trap 385 * 386 * enables = ~flags & mask 387 * disables = ~(~flags | mask) 388 * disables = flags & ~mask. Thank you, Augustus De Morgan (1806-1871) 389 */ 390 disables = FP_C_TO_NETBSD_FLAG(fp_c) & ~FP_C_TO_NETBSD_MASK(fp_c); 391 392 fpcr |= (disables & (FP_X_IMP | FP_X_UFL)) << (61 - 3); 393 fpcr |= (disables & (FP_X_OFL | FP_X_DZ | FP_X_INV)) << (49 - 0); 394 395 fpcr |= fp_c & FP_C_MIRRORED << (FPCR_MIR_START - FP_C_MIR_START); 396 fpcr |= (fp_c & IEEE_MAP_DMZ) << 36; 397 if (fp_c & FP_C_MIRRORED) 398 fpcr |= FPCR_SUM; 399 if (fp_c & IEEE_MAP_UMZ) 400 fpcr |= FPCR_UNDZ | FPCR_UNFD; 401 fpcr |= (~fp_c & IEEE_TRAP_ENABLE_DNO) << 41; 402 return fpcr; 403 } 404 405 static void 406 fp_c_to_fpcr(struct lwp *l) 407 { 408 alpha_write_fpcr(fp_c_to_fpcr_1(alpha_read_fpcr(), l->l_md.md_flags)); 409 } 410 411 void 412 alpha_write_fp_c(struct lwp *l, uint64_t fp_c) 413 { 414 uint64_t md_flags; 415 416 fp_c &= MDLWP_FP_C; 417 md_flags = l->l_md.md_flags; 418 if ((md_flags & MDLWP_FP_C) == fp_c) 419 return; 420 l->l_md.md_flags = (md_flags & ~MDLWP_FP_C) | fp_c; 421 kpreempt_disable(); 422 if (md_flags & MDLWP_FPACTIVE) { 423 alpha_pal_wrfen(1); 424 fp_c_to_fpcr(l); 425 alpha_pal_wrfen(0); 426 } else { 427 struct pcb *pcb = l->l_addr; 428 429 pcb->pcb_fp.fpr_cr = 430 fp_c_to_fpcr_1(pcb->pcb_fp.fpr_cr, l->l_md.md_flags); 431 } 432 kpreempt_enable(); 433 } 434 435 uint64_t 436 alpha_read_fp_c(struct lwp *l) 437 { 438 /* 439 * A possibly-desirable EV6-specific optimization would deviate from 440 * the Alpha Architecture spec and keep some FP_C bits in the FPCR, 441 * but in a transparent way. Some of the code for that would need to 442 * go right here. 443 */ 444 return l->l_md.md_flags & MDLWP_FP_C; 445 } 446 447 static float64 448 float64_unk(float64 a, float64 b) 449 { 450 return 0; 451 } 452 453 /* 454 * The real function field encodings for IEEE and VAX FP instructions. 455 * 456 * Since there is only one operand type field, the cvtXX instructions 457 * require a variety of special cases, and these have to be analyzed as 458 * they don't always fit into the field descriptions in AARM section I. 459 * 460 * Lots of staring at bits in the appendix shows what's really going on. 461 * 462 * | | 463 * 15 14 13|12 11 10 09|08 07 06 05 464 * --------======------============ 465 * TRAP : RND : SRC : FUNCTION : 466 * 0 0 0:. . .:. . . . . . . . . . . . Imprecise 467 * 0 0 1|. . .:. . . . . . . . . . . ./U underflow enable (if FP output) 468 * | /V overfloat enable (if int output) 469 * 0 1 0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST 470 * 0 1 1|. . .:. . . . . . . . . . . . Unsupported 471 * 1 0 0:. . .:. . . . . . . . . . . ./S software completion (VAX only) 472 * 1 0 1|. . .:. . . . . . . . . . . ./SU 473 * | /SV 474 * 1 1 0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST/S 475 * 1 1 1|. . .:. . . . . . . . . . . ./SUI (if FP output) (IEEE only) 476 * | /SVI (if int output) (IEEE only) 477 * S I UV: In other words: bits 15:13 are S:I:UV, except that _usually_ 478 * | not all combinations are valid. 479 * | | 480 * 15 14 13|12 11 10 09|08 07 06 05 481 * --------======------============ 482 * TRAP : RND : SRC : FUNCTION : 483 * | 0 0 . . . . . . . . . . . ./C Chopped 484 * : 0 1 . . . . . . . . . . . ./M Minus Infinity 485 * | 1 0 . . . . . . . . . . . . Normal 486 * : 1 1 . . . . . . . . . . . ./D Dynamic (in FPCR: Plus Infinity) 487 * | | 488 * 15 14 13|12 11 10 09|08 07 06 05 489 * --------======------============ 490 * TRAP : RND : SRC : FUNCTION : 491 * 0 0. . . . . . . . . . S/F 492 * 0 1. . . . . . . . . . -/D 493 * 1 0. . . . . . . . . . T/G 494 * 1 1. . . . . . . . . . Q/Q 495 * | | 496 * 15 14 13|12 11 10 09|08 07 06 05 497 * --------======------============ 498 * TRAP : RND : SRC : FUNCTION : 499 * 0 0 0 0 . . . addX 500 * 0 0 0 1 . . . subX 501 * 0 0 1 0 . . . mulX 502 * 0 0 1 1 . . . divX 503 * 0 1 0 0 . . . cmpXun 504 * 0 1 0 1 . . . cmpXeq 505 * 0 1 1 0 . . . cmpXlt 506 * 0 1 1 1 . . . cmpXle 507 * 1 0 0 0 . . . reserved 508 * 1 0 0 1 . . . reserved 509 * 1 0 1 0 . . . sqrt[fg] (op_fix, not exactly "vax") 510 * 1 0 1 1 . . . sqrt[st] (op_fix, not exactly "ieee") 511 * 1 1 0 0 . . . cvtXs/f (cvt[qt]s, cvtst(!), cvt[gq]f) 512 * 1 1 0 1 . . . cvtXd (vax only) 513 * 1 1 1 0 . . . cvtXt/g (cvtqt, cvt[dq]g only) 514 * 1 1 1 1 . . . cvtXq/q (cvttq, cvtgq) 515 * | | 516 * 15 14 13|12 11 10 09|08 07 06 05 the twilight zone 517 * --------======------============ 518 * TRAP : RND : SRC : FUNCTION : 519 * /s /i /u x x 1 0 1 1 0 0 . . . cvtts, /siu only 0, 1, 5, 7 520 * 0 1 0 1 0 1 0 1 1 0 0 . . . cvtst (src == T (!)) 2ac NOT /S 521 * 1 1 0 1 0 1 0 1 1 0 0 . . . cvtst/s (src == T (!)) 6ac 522 * x 0 x x x x 0 1 1 1 1 . . . cvttq/_ (src == T) 523 */ 524 525 static void 526 print_fp_instruction(unsigned long pc, struct lwp *l, uint32_t bits) 527 { 528 #if defined(DDB) 529 char buf[32]; 530 struct alpha_print_instruction_context ctx = { 531 .insn.bits = bits, 532 .pc = pc, 533 .buf = buf, 534 .bufsize = sizeof(buf), 535 }; 536 537 (void) alpha_print_instruction(&ctx); 538 539 printf("INSN [%s:%d] @0x%lx -> %s\n", 540 l->l_proc->p_comm, l->l_proc->p_pid, ctx.pc, ctx.buf); 541 #else 542 alpha_instruction insn = { 543 .bits = bits, 544 }; 545 printf("INSN [%s:%d] @0x%lx -> opc=0x%x func=0x%x fa=%d fb=%d fc=%d\n", 546 l->l_proc->p_comm, l->l_proc->p_pid, (unsigned long)pc, 547 insn.float_format.opcode, insn.float_format.function, 548 insn.float_format.fa, insn.float_format.fb, insn.float_format.fc); 549 printf("INSN [%s:%d] @0x%lx -> trp=0x%x rnd=0x%x src=0x%x fn=0x%x\n", 550 l->l_proc->p_comm, l->l_proc->p_pid, (unsigned long)pc, 551 insn.float_detail.trp, insn.float_detail.rnd, 552 insn.float_detail.src, insn.float_detail.opclass); 553 #endif /* DDB */ 554 } 555 556 static void 557 alpha_fp_interpret(unsigned long pc, struct lwp *l, uint32_t bits) 558 { 559 s_float sfa, sfb, sfc; 560 t_float tfa, tfb, tfc; 561 alpha_instruction inst; 562 563 if (alpha_fp_complete_debug) { 564 print_fp_instruction(pc, l, bits); 565 } 566 567 inst.bits = bits; 568 switch(inst.generic_format.opcode) { 569 default: 570 /* this "cannot happen" */ 571 atomic_inc_ulong(&fp_ill_opc.ev_count); 572 this_cannot_happen(2, inst.bits); 573 return; 574 case op_any_float: 575 if (inst.float_format.function == op_cvtql_sv || 576 inst.float_format.function == op_cvtql_v) { 577 alpha_stt(inst.float_detail.fb, &tfb); 578 sfc.i = (int64_t)tfb.i >= 0L ? INT_MAX : INT_MIN; 579 alpha_lds(inst.float_detail.fc, &sfc); 580 float_raise(FP_X_INV); 581 } else { 582 atomic_inc_ulong(&fp_ill_anyop.ev_count); 583 this_cannot_happen(3, inst.bits); 584 } 585 break; 586 case op_vax_float: 587 atomic_inc_ulong(&fp_vax.ev_count); 588 /* FALLTHROUGH */ /* XXX */ 589 case op_ieee_float: 590 case op_fix_float: 591 switch(inst.float_detail.src) { 592 case op_src_sf: 593 sts(inst.float_detail.fb, &sfb, l); 594 if (inst.float_detail.opclass == 11) 595 sfc.i = float32_sqrt(sfb.i); 596 else if (inst.float_detail.opclass & ~3) { 597 this_cannot_happen(1, inst.bits); 598 sfc.i = FLOAT32QNAN; 599 } else { 600 sts(inst.float_detail.fa, &sfa, l); 601 sfc.i = (*swfp_s[inst.float_detail.opclass])( 602 sfa.i, sfb.i); 603 } 604 lds(inst.float_detail.fc, &sfc, l); 605 break; 606 case op_src_xd: 607 case op_src_tg: 608 if (inst.float_detail.opclass >= 12) 609 (*swfp_cvt[inst.float_detail.opclass - 12])( 610 inst.bits, l); 611 else { 612 stt(inst.float_detail.fb, &tfb, l); 613 if (inst.float_detail.opclass == 11) 614 tfc.i = float64_sqrt(tfb.i); 615 else { 616 stt(inst.float_detail.fa, &tfa, l); 617 tfc.i = (*swfp_t[inst.float_detail 618 .opclass])(tfa.i, tfb.i); 619 } 620 ldt(inst.float_detail.fc, &tfc, l); 621 } 622 break; 623 case op_src_qq: 624 float_raise(FP_X_IMP); 625 break; 626 } 627 } 628 } 629 630 int 631 alpha_fp_complete_at(unsigned long trigger_pc, struct lwp *l, uint64_t *ucode) 632 { 633 int needsig; 634 alpha_instruction inst; 635 uint64_t rm, fpcr, orig_fpcr; 636 uint64_t orig_flags, new_flags, changed_flags, md_flags; 637 638 if (__predict_false(ufetch_32((void *)trigger_pc, &inst.bits))) { 639 this_cannot_happen(6, -1); 640 return SIGSEGV; 641 } 642 kpreempt_disable(); 643 if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) { 644 fpu_load(); 645 } 646 alpha_pal_wrfen(1); 647 /* 648 * Alpha FLOAT instructions can override the rounding mode on a 649 * per-instruction basis. If necessary, lie about the dynamic 650 * rounding mode so emulation software need go to only one place 651 * for it, and so we don't have to lock any memory locations or 652 * pass a third parameter to every SoftFloat entry point. 653 * 654 * N.B. the rounding mode field of the FLOAT format instructions 655 * matches that of the FPCR *except* for the value 3, which means 656 * "dynamic" rounding mode (i.e. what is programmed into the FPCR). 657 */ 658 orig_fpcr = fpcr = alpha_read_fpcr(); 659 rm = inst.float_detail.rnd; 660 if (__predict_false(rm != 3 /* dynamic */ && 661 rm != __SHIFTOUT(fpcr, FPCR_DYN_RM))) { 662 fpcr = (fpcr & ~FPCR_DYN_RM) | __SHIFTIN(rm, FPCR_DYN_RM); 663 alpha_write_fpcr(fpcr); 664 } 665 orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags); 666 667 alpha_fp_interpret(trigger_pc, l, inst.bits); 668 669 md_flags = l->l_md.md_flags; 670 671 new_flags = FP_C_TO_NETBSD_FLAG(md_flags); 672 changed_flags = orig_flags ^ new_flags; 673 KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */ 674 alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, md_flags)); 675 needsig = changed_flags & FP_C_TO_NETBSD_MASK(md_flags); 676 alpha_pal_wrfen(0); 677 kpreempt_enable(); 678 if (__predict_false(needsig)) { 679 *ucode = needsig; 680 return SIGFPE; 681 } 682 return 0; 683 } 684 685 int 686 alpha_fp_complete(u_long a0, u_long a1, struct lwp *l, uint64_t *ucode) 687 { 688 uint64_t op_class; 689 alpha_instruction inst; 690 /* "trigger_pc" is Compaq's term for the earliest faulting op */ 691 alpha_instruction *trigger_pc, *usertrap_pc; 692 alpha_instruction *pc, *win_begin, tsw[TSWINSIZE]; 693 long insn_count = 0; 694 int sig; 695 696 if (alpha_fp_complete_debug) { 697 printf("%s: [%s:%d] a0[AESR]=0x%lx a1[regmask]=0x%lx " 698 "FPCR=0x%lx FP_C=0x%lx\n", 699 __func__, l->l_proc->p_comm, l->l_proc->p_pid, 700 a0, a1, alpha_read_fpcr(), 701 l->l_md.md_flags & (MDLWP_FP_C|MDLWP_FPACTIVE)); 702 } 703 704 pc = (alpha_instruction *)l->l_md.md_tf->tf_regs[FRAME_PC]; 705 trigger_pc = pc - 1; /* for ALPHA_AMASK_PAT case */ 706 707 /* 708 * Start out with the code mirroring the exception flags 709 * (FP_X_*). Shift right 1 bit to discard SWC to achieve 710 * this. 711 */ 712 *ucode = a0 >> 1; 713 714 if (cpu_amask & ALPHA_AMASK_PAT) { 715 if ((a0 & (ALPHA_AESR_SWC | ALPHA_AESR_INV)) != 0 || 716 alpha_fp_sync_complete) { 717 sig = alpha_fp_complete_at((u_long)trigger_pc, l, 718 ucode); 719 goto resolved; 720 } 721 } 722 if ((a0 & (ALPHA_AESR_SWC | ALPHA_AESR_INV)) == 0) 723 goto unresolved; 724 /* 725 * At this point we are somewhere in the trap shadow of one or more instruc- 726 * tions that have trapped with software completion specified. We have a mask 727 * of the registers written by trapping instructions. 728 * 729 * Now step backwards through the trap shadow, clearing bits in the 730 * destination write mask until the trigger instruction is found, and 731 * interpret this one instruction in SW. If a SIGFPE is not required, back up 732 * the PC until just after this instruction and restart. This will execute all 733 * trap shadow instructions between the trigger pc and the trap pc twice. 734 */ 735 trigger_pc = 0; 736 win_begin = pc; 737 atomic_inc_ulong(&ts_scans.ev_count); 738 for (--pc; a1; --pc) { 739 insn_count++; 740 if (pc < win_begin) { 741 win_begin = pc - TSWINSIZE + 1; 742 if (copyin(win_begin, tsw, sizeof tsw)) { 743 /* sigh, try to get just one */ 744 win_begin = pc; 745 if (copyin(win_begin, tsw, 4)) { 746 /* 747 * We're off the rails here; don't 748 * bother updating the FP_C. 749 */ 750 return SIGSEGV; 751 } 752 } 753 } 754 assert(win_begin <= pc && !((long)pc & 3)); 755 inst = tsw[pc - win_begin]; 756 op_class = 1UL << inst.generic_format.opcode; 757 if (op_class & FPUREG_CLASS) { 758 a1 &= ~(1UL << (inst.operate_generic_format.rc + 32)); 759 trigger_pc = pc; 760 } else if (op_class & CPUREG_CLASS) { 761 a1 &= ~(1UL << inst.operate_generic_format.rc); 762 trigger_pc = pc; 763 } else if (op_class & TRAPSHADOWBOUNDARY) { 764 if (op_class & CHECKFUNCTIONCODE) { 765 if (inst.mem_format.displacement == op_trapb || 766 inst.mem_format.displacement == op_excb) 767 break; /* code breaks AARM rules */ 768 } else 769 break; /* code breaks AARM rules */ 770 } 771 /* Some shadow-safe op, probably load, store, or FPTI class */ 772 } 773 if (insn_count > atomic_load_relaxed(&ts_insns_max.ev_count)) { 774 atomic_store_relaxed(&ts_insns_max.ev_count, insn_count); 775 } 776 atomic_add_long(&ts_insns.ev_count, insn_count); 777 if (__predict_true(trigger_pc != 0 && a1 == 0)) { 778 atomic_inc_ulong(&ts_resolved.ev_count); 779 sig = alpha_fp_complete_at((u_long)trigger_pc, l, ucode); 780 goto resolved; 781 } else { 782 atomic_inc_ulong(&ts_unresolved.ev_count); 783 } 784 785 unresolved: /* obligatory statement */; 786 /* 787 * *ucode contains the exception bits (FP_X_*). We need to 788 * update the FP_C and FPCR, and send a signal for any new 789 * trap that is enabled. 790 */ 791 uint64_t orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags); 792 uint64_t new_flags = orig_flags | *ucode; 793 uint64_t changed_flags = orig_flags ^ new_flags; 794 KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */ 795 796 l->l_md.md_flags |= NETBSD_FLAG_TO_FP_C(new_flags); 797 798 kpreempt_disable(); 799 if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) { 800 fpu_load(); 801 } 802 alpha_pal_wrfen(1); 803 uint64_t orig_fpcr = alpha_read_fpcr(); 804 alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, l->l_md.md_flags)); 805 uint64_t needsig = 806 changed_flags & FP_C_TO_NETBSD_MASK(l->l_md.md_flags); 807 alpha_pal_wrfen(0); 808 kpreempt_enable(); 809 810 if (__predict_false(needsig)) { 811 *ucode = needsig; 812 return SIGFPE; 813 } 814 return 0; 815 816 resolved: 817 if (sig) { 818 usertrap_pc = trigger_pc + 1; 819 l->l_md.md_tf->tf_regs[FRAME_PC] = (unsigned long)usertrap_pc; 820 } 821 return sig; 822 } 823 824 /* 825 * Initialize FP handling. 826 */ 827 void 828 alpha_fp_init(void) 829 { 830 evcnt_attach_dynamic_nozero(&fpevent_use, EVCNT_TYPE_MISC, NULL, 831 "FP", "proc use"); 832 evcnt_attach_dynamic_nozero(&fpevent_reuse, EVCNT_TYPE_MISC, NULL, 833 "FP", "proc re-use"); 834 835 evcnt_attach_dynamic_nozero(&ts_scans, EVCNT_TYPE_MISC, NULL, 836 "FP", "TS scans"); 837 evcnt_attach_dynamic_nozero(&ts_insns, EVCNT_TYPE_MISC, NULL, 838 "FP", "TS total insns"); 839 evcnt_attach_dynamic_nozero(&ts_insns_max, EVCNT_TYPE_MISC, NULL, 840 "FP", "TS max single-scan insns"); 841 evcnt_attach_dynamic_nozero(&ts_resolved, EVCNT_TYPE_MISC, NULL, 842 "FP", "TS resolved"); 843 evcnt_attach_dynamic_nozero(&ts_unresolved, EVCNT_TYPE_MISC, NULL, 844 "FP", "TS unresolved"); 845 846 evcnt_attach_dynamic_nozero(&fp_ill_opc, EVCNT_TYPE_MISC, NULL, 847 "FP", "illegal op code"); 848 evcnt_attach_dynamic_nozero(&fp_ill_func, EVCNT_TYPE_MISC, NULL, 849 "FP", "illegal function code"); 850 evcnt_attach_dynamic_nozero(&fp_ill_anyop, EVCNT_TYPE_MISC, NULL, 851 "FP", "illegal any_float function code"); 852 } 853 854 /* 855 * Load the float-point context for the current lwp. 856 */ 857 void 858 fpu_state_load(struct lwp *l, u_int flags) 859 { 860 struct pcb * const pcb = lwp_getpcb(l); 861 KASSERT(l == curlwp); 862 863 #ifdef MULTIPROCESSOR 864 /* 865 * If the LWP got switched to another CPU, pcu_switchpoint would have 866 * called state_release to clear MDLWP_FPACTIVE. Now that we are back 867 * on the CPU that has our FP context, set MDLWP_FPACTIVE again. 868 */ 869 if (flags & PCU_REENABLE) { 870 KASSERT(flags & PCU_VALID); 871 l->l_md.md_flags |= MDLWP_FPACTIVE; 872 return; 873 } 874 #else 875 KASSERT((flags & PCU_REENABLE) == 0); 876 #endif 877 878 /* 879 * Instrument FP usage -- if a process had not previously 880 * used FP, mark it as having used FP for the first time, 881 * and count this event. 882 * 883 * If a process has used FP, count a "used FP, and took 884 * a trap to use it again" event. 885 */ 886 if ((flags & PCU_VALID) == 0) { 887 atomic_inc_ulong(&fpevent_use.ev_count); 888 } else { 889 atomic_inc_ulong(&fpevent_reuse.ev_count); 890 } 891 892 if (alpha_fp_complete_debug) { 893 printf("%s: [%s:%d] loading FPCR=0x%lx\n", 894 __func__, l->l_proc->p_comm, l->l_proc->p_pid, 895 pcb->pcb_fp.fpr_cr); 896 } 897 alpha_pal_wrfen(1); 898 restorefpstate(&pcb->pcb_fp); 899 alpha_pal_wrfen(0); 900 901 l->l_md.md_flags |= MDLWP_FPACTIVE; 902 } 903 904 /* 905 * Save the FPU state. 906 */ 907 908 void 909 fpu_state_save(struct lwp *l) 910 { 911 struct pcb * const pcb = lwp_getpcb(l); 912 913 alpha_pal_wrfen(1); 914 savefpstate(&pcb->pcb_fp); 915 alpha_pal_wrfen(0); 916 if (alpha_fp_complete_debug) { 917 printf("%s: [%s:%d] saved FPCR=0x%lx\n", 918 __func__, l->l_proc->p_comm, l->l_proc->p_pid, 919 pcb->pcb_fp.fpr_cr); 920 } 921 } 922 923 /* 924 * Release the FPU. 925 */ 926 void 927 fpu_state_release(struct lwp *l) 928 { 929 l->l_md.md_flags &= ~MDLWP_FPACTIVE; 930 } 931