1 /* $NetBSD: fpu.c,v 1.93 2025/05/14 23:39:54 riastradh Exp $ */ 2 3 /* 4 * Copyright (c) 2008, 2019 The NetBSD Foundation, Inc. All 5 * rights reserved. 6 * 7 * This code is derived from software developed for The NetBSD Foundation 8 * by Andrew Doran and Maxime Villard. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1991 The Regents of the University of California. 34 * All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 3. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)npx.c 7.2 (Berkeley) 5/12/91 61 */ 62 63 /* 64 * Copyright (c) 1994, 1995, 1998 Charles M. Hannum. All rights reserved. 65 * Copyright (c) 1990 William Jolitz. 66 * 67 * Redistribution and use in source and binary forms, with or without 68 * modification, are permitted provided that the following conditions 69 * are met: 70 * 1. Redistributions of source code must retain the above copyright 71 * notice, this list of conditions and the following disclaimer. 72 * 2. Redistributions in binary form must reproduce the above copyright 73 * notice, this list of conditions and the following disclaimer in the 74 * documentation and/or other materials provided with the distribution. 75 * 3. All advertising materials mentioning features or use of this software 76 * must display the following acknowledgement: 77 * This product includes software developed by the University of 78 * California, Berkeley and its contributors. 79 * 4. Neither the name of the University nor the names of its contributors 80 * may be used to endorse or promote products derived from this software 81 * without specific prior written permission. 82 * 83 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 84 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 85 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 86 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 87 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 88 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 89 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 90 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 91 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 92 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 93 * SUCH DAMAGE. 94 * 95 * @(#)npx.c 7.2 (Berkeley) 5/12/91 96 */ 97 98 #include <sys/cdefs.h> 99 __KERNEL_RCSID(0, "$NetBSD: fpu.c,v 1.93 2025/05/14 23:39:54 riastradh Exp $"); 100 101 #include "opt_ddb.h" 102 #include "opt_multiprocessor.h" 103 104 #include <sys/param.h> 105 #include <sys/systm.h> 106 #include <sys/conf.h> 107 #include <sys/cpu.h> 108 #include <sys/file.h> 109 #include <sys/proc.h> 110 #include <sys/kernel.h> 111 #include <sys/sysctl.h> 112 #include <sys/xcall.h> 113 114 #include <machine/cpu.h> 115 #include <machine/cpuvar.h> 116 #include <machine/cputypes.h> 117 #include <machine/intr.h> 118 #include <machine/cpufunc.h> 119 #include <machine/pcb.h> 120 #include <machine/trap.h> 121 #include <machine/specialreg.h> 122 #include <x86/cpu.h> 123 #include <x86/fpu.h> 124 125 #ifdef DDB 126 #include <ddb/ddb.h> 127 #endif 128 129 #ifdef XENPV 130 #define clts() HYPERVISOR_fpu_taskswitch(0) 131 #define stts() HYPERVISOR_fpu_taskswitch(1) 132 #endif 133 134 void fpu_handle_deferred(void); 135 void fpu_switch(struct lwp *, struct lwp *); 136 137 uint32_t x86_fpu_mxcsr_mask __read_mostly = 0; 138 139 static const union savefpu safe_fpu_storage __aligned(64) = { 140 .sv_xmm = { 141 .fx_mxcsr = __SAFE_MXCSR__, 142 }, 143 }; 144 static const union savefpu zero_fpu_storage __aligned(64); 145 146 static const void *safe_fpu __read_mostly = &safe_fpu_storage; 147 static const void *zero_fpu __read_mostly = &zero_fpu_storage; 148 149 /* 150 * x86_fpu_save_separate_p() 151 * 152 * True if we allocate the FPU save space separately, outside the 153 * struct pcb itself, because it doesn't fit in a single page. 154 */ 155 bool 156 x86_fpu_save_separate_p(void) 157 { 158 159 return x86_fpu_save_size > 160 PAGE_SIZE - offsetof(struct pcb, pcb_savefpusmall); 161 } 162 163 static inline union savefpu * 164 fpu_lwp_area(struct lwp *l) 165 { 166 struct pcb *pcb = lwp_getpcb(l); 167 union savefpu *area = pcb->pcb_savefpu; 168 169 KASSERT((l->l_flag & LW_SYSTEM) == 0); 170 if (l == curlwp) { 171 fpu_save(); 172 } 173 KASSERT(!(l->l_md.md_flags & MDL_FPU_IN_CPU)); 174 175 return area; 176 } 177 178 static inline void 179 fpu_save_lwp(struct lwp *l) 180 { 181 struct pcb *pcb = lwp_getpcb(l); 182 union savefpu *area = pcb->pcb_savefpu; 183 int s; 184 185 s = splvm(); 186 if (l->l_md.md_flags & MDL_FPU_IN_CPU) { 187 KASSERT((l->l_flag & LW_SYSTEM) == 0); 188 fpu_area_save(area, x86_xsave_features, !(l->l_proc->p_flag & PK_32)); 189 l->l_md.md_flags &= ~MDL_FPU_IN_CPU; 190 } 191 splx(s); 192 } 193 194 /* 195 * Bring curlwp's FPU state in memory. It will get installed back in the CPU 196 * when returning to userland. 197 */ 198 void 199 fpu_save(void) 200 { 201 fpu_save_lwp(curlwp); 202 } 203 204 void 205 fpuinit(struct cpu_info *ci) 206 { 207 /* 208 * This might not be strictly necessary since it will be initialized 209 * for each process. However it does no harm. 210 */ 211 clts(); 212 fninit(); 213 stts(); 214 } 215 216 /* 217 * fpuinit_mxcsr_mask() 218 * 219 * Called once by cpu_init on the primary CPU. Initializes 220 * x86_fpu_mxcsr_mask based on the initial FPU state, and 221 * initializes save_fpu and zero_fpu if necessary when the 222 * hardware's FPU save size is larger than union savefpu. 223 * 224 * XXX Rename this function! 225 */ 226 void 227 fpuinit_mxcsr_mask(void) 228 { 229 /* 230 * If the CPU's x86 fpu save size is larger than union savefpu, 231 * we have to allocate larger buffers for the safe and zero FPU 232 * states used here and by fpu_kern_enter/leave. 233 * 234 * Note: This is NOT the same as x86_fpu_save_separate_p(), 235 * which may have a little more space than union savefpu. 236 */ 237 const bool allocfpusave = x86_fpu_save_size > sizeof(union savefpu); 238 vaddr_t va; 239 240 #if defined XENPV 241 if (x86_fpu_save_separate_p()) { 242 /* 243 * XXX Temporary workaround for PR kern/59371 until we 244 * work out the implications. 245 */ 246 panic("NetBSD/xen does not support fpu save size %u", 247 x86_fpu_save_size); 248 } 249 #elif defined __i386__ 250 if (x86_fpu_save_separate_p()) { 251 /* 252 * XXX Need to teach cpu_uarea_alloc/free to allocate a 253 * separate fpu save space, and make pcb_savefpu a 254 * pointer indirection -- currently only done on amd64, 255 * not on i386. 256 * 257 * But the primary motivation on amd64 is the 8192-byte 258 * TILEDATA state for Intel AMX (Advanced Matrix 259 * Extensions), which doesn't work in 32-bit mode 260 * anyway, so on such machines we ought to just disable 261 * it in the first place and keep x86_fpu_save_size 262 * down: 263 * 264 * While Intel AMX instructions can be executed 265 * only in 64-bit mode, instructions of the XSAVE 266 * feature set can operate on TILECFG and TILEDATA 267 * in any mode. It is recommended that only 268 * 64-bit operating systems enable Intel AMX by 269 * setting XCR0[18:17]. 270 * 271 * --Intel 64 and IA-32 Architectures Software 272 * Developer's Manual, Volume 1: Basic 273 * Architecture, Order Number: 253665-087US, March 274 * 2025, Sec. 13.3 `Enabling the XSAVE feature set 275 * and XSAVE-enabled features', p. 13-6. 276 * https://cdrdv2.intel.com/v1/dl/getContent/671436 277 * https://web.archive.org/web/20250404141850/https://cdrdv2-public.intel.com/851056/253665-087-sdm-vol-1.pdf 278 * https://web.archive.org/web/20250404141850if_/https://cdrdv2-public.intel.com/851056/253665-087-sdm-vol-1.pdf#page=324 279 */ 280 panic("NetBSD/i386 does not support fpu save size %u", 281 x86_fpu_save_size); 282 } 283 #endif 284 285 #ifndef XENPV 286 struct fxsave fpusave __aligned(64); 287 u_long psl; 288 289 memset(&fpusave, 0, sizeof(fpusave)); 290 291 /* Disable interrupts, and enable FPU */ 292 psl = x86_read_psl(); 293 x86_disable_intr(); 294 clts(); 295 296 /* Fill in the FPU area */ 297 fxsave(&fpusave); 298 299 /* Restore previous state */ 300 stts(); 301 x86_write_psl(psl); 302 303 if (fpusave.fx_mxcsr_mask == 0) { 304 x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__; 305 } else { 306 x86_fpu_mxcsr_mask = fpusave.fx_mxcsr_mask; 307 } 308 #else 309 /* 310 * XXX XXX XXX: On Xen the FXSAVE above faults. That's because 311 * &fpusave is not 16-byte aligned. Stack alignment problem 312 * somewhere, it seems. 313 */ 314 x86_fpu_mxcsr_mask = __INITIAL_MXCSR_MASK__; 315 #endif 316 317 /* 318 * If necessary, allocate FPU save spaces for safe or zero FPU 319 * state, for fpu_kern_enter/leave. 320 */ 321 if (allocfpusave) { 322 __CTASSERT(PAGE_SIZE >= 64); 323 324 va = uvm_km_alloc(kernel_map, x86_fpu_save_size, PAGE_SIZE, 325 UVM_KMF_WIRED|UVM_KMF_ZERO|UVM_KMF_WAITVA); 326 memcpy((void *)va, &safe_fpu_storage, 327 sizeof(safe_fpu_storage)); 328 uvm_km_protect(kernel_map, va, x86_fpu_save_size, 329 VM_PROT_READ); 330 safe_fpu = (void *)va; 331 332 va = uvm_km_alloc(kernel_map, x86_fpu_save_size, PAGE_SIZE, 333 UVM_KMF_WIRED|UVM_KMF_ZERO|UVM_KMF_WAITVA); 334 /* 335 * No initialization -- just want zeroes! In fact we 336 * could share this with other all-zero pages. 337 */ 338 uvm_km_protect(kernel_map, va, x86_fpu_save_size, 339 VM_PROT_READ); 340 zero_fpu = (void *)va; 341 } 342 } 343 344 static inline void 345 fpu_errata_amd(void) 346 { 347 uint16_t sw; 348 349 /* 350 * AMD FPUs do not restore FIP, FDP, and FOP on fxrstor and xrstor 351 * when FSW.ES=0, leaking other threads' execution history. 352 * 353 * Clear them manually by loading a zero (fldummy). We do this 354 * unconditionally, regardless of FSW.ES. 355 * 356 * Before that, clear the ES bit in the x87 status word if it is 357 * currently set, in order to avoid causing a fault in the 358 * upcoming load. 359 * 360 * Newer generations of AMD CPUs have CPUID_Fn80000008_EBX[2], 361 * which indicates that FIP/FDP/FOP are restored (same behavior 362 * as Intel). We're not using it though. 363 */ 364 fnstsw(&sw); 365 if (sw & 0x80) 366 fnclex(); 367 fldummy(); 368 } 369 370 #ifdef __x86_64__ 371 #define XS64(x) (is_64bit ? x##64 : x) 372 #else 373 #define XS64(x) x 374 #endif 375 376 void 377 fpu_area_save(void *area, uint64_t xsave_features, bool is_64bit) 378 { 379 switch (x86_fpu_save) { 380 case FPU_SAVE_FSAVE: 381 fnsave(area); 382 break; 383 case FPU_SAVE_FXSAVE: 384 XS64(fxsave)(area); 385 break; 386 case FPU_SAVE_XSAVE: 387 XS64(xsave)(area, xsave_features); 388 break; 389 case FPU_SAVE_XSAVEOPT: 390 XS64(xsaveopt)(area, xsave_features); 391 break; 392 } 393 394 stts(); 395 } 396 397 void 398 fpu_area_restore(const void *area, uint64_t xsave_features, bool is_64bit) 399 { 400 clts(); 401 402 switch (x86_fpu_save) { 403 case FPU_SAVE_FSAVE: 404 frstor(area); 405 break; 406 case FPU_SAVE_FXSAVE: 407 if (cpu_vendor == CPUVENDOR_AMD) 408 fpu_errata_amd(); 409 XS64(fxrstor)(area); 410 break; 411 case FPU_SAVE_XSAVE: 412 case FPU_SAVE_XSAVEOPT: 413 if (cpu_vendor == CPUVENDOR_AMD) 414 fpu_errata_amd(); 415 XS64(xrstor)(area, xsave_features); 416 break; 417 } 418 } 419 420 void 421 fpu_handle_deferred(void) 422 { 423 struct pcb *pcb = lwp_getpcb(curlwp); 424 fpu_area_restore(pcb->pcb_savefpu, x86_xsave_features, 425 !(curlwp->l_proc->p_flag & PK_32)); 426 } 427 428 void 429 fpu_switch(struct lwp *oldlwp, struct lwp *newlwp) 430 { 431 struct cpu_info *ci __diagused = curcpu(); 432 struct pcb *pcb; 433 434 KASSERTMSG(ci->ci_ilevel >= IPL_SCHED, "cpu%d ilevel=%d", 435 cpu_index(ci), ci->ci_ilevel); 436 437 if (oldlwp->l_md.md_flags & MDL_FPU_IN_CPU) { 438 KASSERT(!(oldlwp->l_flag & LW_SYSTEM)); 439 pcb = lwp_getpcb(oldlwp); 440 fpu_area_save(pcb->pcb_savefpu, x86_xsave_features, 441 !(oldlwp->l_proc->p_flag & PK_32)); 442 oldlwp->l_md.md_flags &= ~MDL_FPU_IN_CPU; 443 } 444 KASSERT(!(newlwp->l_md.md_flags & MDL_FPU_IN_CPU)); 445 } 446 447 void 448 fpu_lwp_fork(struct lwp *l1, struct lwp *l2) 449 { 450 struct pcb *pcb2 = lwp_getpcb(l2); 451 union savefpu *fpu_save; 452 453 /* Kernel threads have no FPU. */ 454 if (__predict_false(l2->l_flag & LW_SYSTEM)) { 455 return; 456 } 457 458 /* For init(8). */ 459 if (__predict_false(l1->l_flag & LW_SYSTEM)) { 460 memset(pcb2->pcb_savefpu, 0, x86_fpu_save_size); 461 return; 462 } 463 464 fpu_save = fpu_lwp_area(l1); 465 memcpy(pcb2->pcb_savefpu, fpu_save, x86_fpu_save_size); 466 l2->l_md.md_flags &= ~MDL_FPU_IN_CPU; 467 } 468 469 void 470 fpu_lwp_abandon(struct lwp *l) 471 { 472 int s; 473 474 KASSERT(l == curlwp); 475 s = splvm(); 476 l->l_md.md_flags &= ~MDL_FPU_IN_CPU; 477 stts(); 478 splx(s); 479 } 480 481 /* -------------------------------------------------------------------------- */ 482 483 /* 484 * fpu_kern_enter() 485 * 486 * Begin using the FPU. Raises to splvm, disabling most 487 * interrupts and rendering the thread non-preemptible; caller 488 * should not use this for long periods of time, and must call 489 * fpu_kern_leave() afterward. Non-recursive -- you cannot call 490 * fpu_kern_enter() again without calling fpu_kern_leave() first. 491 * 492 * Must be used only at IPL_VM or below -- never in IPL_SCHED or 493 * IPL_HIGH interrupt handlers. 494 */ 495 void 496 fpu_kern_enter(void) 497 { 498 struct lwp *l = curlwp; 499 struct cpu_info *ci; 500 int s; 501 502 s = splvm(); 503 504 ci = curcpu(); 505 #if 0 506 /* 507 * Can't assert this because if the caller holds a spin lock at 508 * IPL_VM, and previously held and released a spin lock at 509 * higher IPL, the IPL remains raised above IPL_VM. 510 */ 511 KASSERTMSG(ci->ci_ilevel <= IPL_VM || cold, "ilevel=%d", 512 ci->ci_ilevel); 513 #endif 514 KASSERT(ci->ci_kfpu_spl == -1); 515 ci->ci_kfpu_spl = s; 516 517 /* 518 * If we are in a softint and have a pinned lwp, the fpu state is that 519 * of the pinned lwp, so save it there. 520 */ 521 while ((l->l_pflag & LP_INTR) && (l->l_switchto != NULL)) 522 l = l->l_switchto; 523 fpu_save_lwp(l); 524 525 /* 526 * Clear CR0_TS, which fpu_save_lwp set if it saved anything -- 527 * otherwise the CPU will trap if we try to use the FPU under 528 * the false impression that there has been a task switch since 529 * the last FPU usage requiring that we save the FPU state. 530 */ 531 clts(); 532 533 /* 534 * Zero the FPU registers and install safe control words. 535 */ 536 fpu_area_restore(safe_fpu, x86_xsave_features, /*is_64bit*/false); 537 } 538 539 /* 540 * fpu_kern_leave() 541 * 542 * End using the FPU after fpu_kern_enter(). 543 */ 544 void 545 fpu_kern_leave(void) 546 { 547 struct cpu_info *ci = curcpu(); 548 int s; 549 550 #if 0 551 /* 552 * Can't assert this because if the caller holds a spin lock at 553 * IPL_VM, and previously held and released a spin lock at 554 * higher IPL, the IPL remains raised above IPL_VM. 555 */ 556 KASSERT(ci->ci_ilevel == IPL_VM || cold); 557 #endif 558 KASSERT(ci->ci_kfpu_spl != -1); 559 560 /* 561 * Zero the fpu registers; otherwise we might leak secrets 562 * through Spectre-class attacks to userland, even if there are 563 * no bugs in fpu state management. 564 */ 565 fpu_area_restore(zero_fpu, x86_xsave_features, /*is_64bit*/false); 566 567 /* 568 * Set CR0_TS again so that the kernel can't accidentally use 569 * the FPU. 570 */ 571 stts(); 572 573 s = ci->ci_kfpu_spl; 574 ci->ci_kfpu_spl = -1; 575 splx(s); 576 } 577 578 /* -------------------------------------------------------------------------- */ 579 580 /* 581 * The following table is used to ensure that the FPE_... value 582 * that is passed as a trapcode to the signal handler of the user 583 * process does not have more than one bit set. 584 * 585 * Multiple bits may be set if SSE simd instructions generate errors 586 * on more than one value or if the user process modifies the control 587 * word while a status word bit is already set (which this is a sign 588 * of bad coding). 589 * We have no choice than to narrow them down to one bit, since we must 590 * not send a trapcode that is not exactly one of the FPE_ macros. 591 * 592 * The mechanism has a static table with 127 entries. Each combination 593 * of the 7 FPU status word exception bits directly translates to a 594 * position in this table, where a single FPE_... value is stored. 595 * This FPE_... value stored there is considered the "most important" 596 * of the exception bits and will be sent as the signal code. The 597 * precedence of the bits is based upon Intel Document "Numerical 598 * Applications", Chapter "Special Computational Situations". 599 * 600 * The code to choose one of these values does these steps: 601 * 1) Throw away status word bits that cannot be masked. 602 * 2) Throw away the bits currently masked in the control word, 603 * assuming the user isn't interested in them anymore. 604 * 3) Reinsert status word bit 7 (stack fault) if it is set, which 605 * cannot be masked but must be preserved. 606 * 'Stack fault' is a sub-class of 'invalid operation'. 607 * 4) Use the remaining bits to point into the trapcode table. 608 * 609 * The 6 maskable bits in order of their preference, as stated in the 610 * above referenced Intel manual: 611 * 1 Invalid operation (FP_X_INV) 612 * 1a Stack underflow 613 * 1b Stack overflow 614 * 1c Operand of unsupported format 615 * 1d SNaN operand. 616 * 2 QNaN operand (not an exception, irrelevant here) 617 * 3 Any other invalid-operation not mentioned above or zero divide 618 * (FP_X_INV, FP_X_DZ) 619 * 4 Denormal operand (FP_X_DNML) 620 * 5 Numeric over/underflow (FP_X_OFL, FP_X_UFL) 621 * 6 Inexact result (FP_X_IMP) 622 * 623 * NB: the above seems to mix up the mxscr error bits and the x87 ones. 624 * They are in the same order, but there is no EN_SW_STACK_FAULT in the mmx 625 * status. 626 * 627 * The table is nearly, but not quite, in bit order (ZERODIV and DENORM 628 * are swapped). 629 * 630 * This table assumes that any stack fault is cleared - so that an INVOP 631 * fault will only be reported as FLTSUB once. 632 * This might not happen if the mask is being changed. 633 */ 634 #define FPE_xxx1(f) (f & EN_SW_INVOP \ 635 ? (f & EN_SW_STACK_FAULT ? FPE_FLTSUB : FPE_FLTINV) \ 636 : f & EN_SW_ZERODIV ? FPE_FLTDIV \ 637 : f & EN_SW_DENORM ? FPE_FLTUND \ 638 : f & EN_SW_OVERFLOW ? FPE_FLTOVF \ 639 : f & EN_SW_UNDERFLOW ? FPE_FLTUND \ 640 : f & EN_SW_PRECLOSS ? FPE_FLTRES \ 641 : f & EN_SW_STACK_FAULT ? FPE_FLTSUB : 0) 642 #define FPE_xxx2(f) FPE_xxx1(f), FPE_xxx1((f + 1)) 643 #define FPE_xxx4(f) FPE_xxx2(f), FPE_xxx2((f + 2)) 644 #define FPE_xxx8(f) FPE_xxx4(f), FPE_xxx4((f + 4)) 645 #define FPE_xxx16(f) FPE_xxx8(f), FPE_xxx8((f + 8)) 646 #define FPE_xxx32(f) FPE_xxx16(f), FPE_xxx16((f + 16)) 647 static const uint8_t fpetable[128] = { 648 FPE_xxx32(0), FPE_xxx32(32), FPE_xxx32(64), FPE_xxx32(96) 649 }; 650 #undef FPE_xxx1 651 #undef FPE_xxx2 652 #undef FPE_xxx4 653 #undef FPE_xxx8 654 #undef FPE_xxx16 655 #undef FPE_xxx32 656 657 /* 658 * This is a synchronous trap on either an x87 instruction (due to an unmasked 659 * error on the previous x87 instruction) or on an SSE/SSE2/etc instruction due 660 * to an error on the instruction itself. 661 * 662 * If trap actually generates a signal, then the fpu state is saved and then 663 * copied onto the lwp's user-stack, and then recovered from there when the 664 * signal returns. 665 * 666 * All this code needs to do is save the reason for the trap. For x87 traps the 667 * status word bits need clearing to stop the trap re-occurring. For SSE traps 668 * the mxcsr bits are 'sticky' and need clearing to not confuse a later trap. 669 * 670 * We come here with interrupts disabled. 671 */ 672 void 673 fputrap(struct trapframe *frame) 674 { 675 uint32_t statbits; 676 ksiginfo_t ksi; 677 678 if (__predict_false(!USERMODE(frame->tf_cs))) { 679 register_t ip = X86_TF_RIP(frame); 680 char where[128]; 681 682 #ifdef DDB 683 db_symstr(where, sizeof(where), (db_expr_t)ip, DB_STGY_PROC); 684 #else 685 snprintf(where, sizeof(where), "%p", (void *)ip); 686 #endif 687 panic("fpu trap from kernel at %s, trapframe %p\n", where, 688 frame); 689 } 690 691 KASSERT(curlwp->l_md.md_flags & MDL_FPU_IN_CPU); 692 693 if (frame->tf_trapno == T_XMM) { 694 uint32_t mxcsr; 695 x86_stmxcsr(&mxcsr); 696 statbits = mxcsr; 697 /* Clear the sticky status bits */ 698 mxcsr &= ~0x3f; 699 x86_ldmxcsr(&mxcsr); 700 701 /* Remove masked interrupts and non-status bits */ 702 statbits &= ~(statbits >> 7) & 0x3f; 703 /* Mark this is an XMM status */ 704 statbits |= 0x10000; 705 } else { 706 uint16_t cw, sw; 707 /* Get current control and status words */ 708 fnstcw(&cw); 709 fnstsw(&sw); 710 /* Clear any pending exceptions from status word */ 711 fnclex(); 712 713 /* Remove masked interrupts */ 714 statbits = sw & ~(cw & 0x3f); 715 } 716 717 /* Doesn't matter now if we get pre-empted */ 718 x86_enable_intr(); 719 720 KSI_INIT_TRAP(&ksi); 721 ksi.ksi_signo = SIGFPE; 722 ksi.ksi_addr = (void *)X86_TF_RIP(frame); 723 ksi.ksi_code = fpetable[statbits & 0x7f]; 724 ksi.ksi_trap = statbits; 725 (*curlwp->l_proc->p_emul->e_trapsignal)(curlwp, &ksi); 726 } 727 728 void 729 fpudna(struct trapframe *frame) 730 { 731 #ifdef XENPV 732 /* 733 * Xen produes spurious fpudna traps, just do nothing. 734 */ 735 if (USERMODE(frame->tf_cs)) { 736 clts(); 737 return; 738 } 739 #endif 740 panic("fpudna from %s, ip %p, trapframe %p", 741 USERMODE(frame->tf_cs) ? "userland" : "kernel", 742 (void *)X86_TF_RIP(frame), frame); 743 } 744 745 /* -------------------------------------------------------------------------- */ 746 747 static inline void 748 fpu_xstate_reload(union savefpu *fpu_save, uint64_t xstate) 749 { 750 /* 751 * Force a reload of the given xstate during the next XRSTOR. 752 */ 753 if (x86_fpu_save >= FPU_SAVE_XSAVE) { 754 fpu_save->sv_xsave_hdr.xsh_xstate_bv |= xstate; 755 } 756 } 757 758 void 759 fpu_set_default_cw(struct lwp *l, unsigned int x87_cw) 760 { 761 union savefpu *fpu_save = fpu_lwp_area(l); 762 struct pcb *pcb = lwp_getpcb(l); 763 764 if (i386_use_fxsave) { 765 fpu_save->sv_xmm.fx_cw = x87_cw; 766 if (x87_cw != __INITIAL_NPXCW__) { 767 fpu_xstate_reload(fpu_save, XCR0_X87); 768 } 769 } else { 770 fpu_save->sv_87.s87_cw = x87_cw; 771 } 772 pcb->pcb_fpu_dflt_cw = x87_cw; 773 } 774 775 void 776 fpu_clear(struct lwp *l, unsigned int x87_cw) 777 { 778 union savefpu *fpu_save; 779 struct pcb *pcb; 780 781 KASSERT(l == curlwp); 782 fpu_save = fpu_lwp_area(l); 783 784 switch (x86_fpu_save) { 785 case FPU_SAVE_FSAVE: 786 memset(&fpu_save->sv_87, 0, x86_fpu_save_size); 787 fpu_save->sv_87.s87_tw = 0xffff; 788 fpu_save->sv_87.s87_cw = x87_cw; 789 break; 790 case FPU_SAVE_FXSAVE: 791 memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size); 792 fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__; 793 fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask; 794 fpu_save->sv_xmm.fx_cw = x87_cw; 795 break; 796 case FPU_SAVE_XSAVE: 797 case FPU_SAVE_XSAVEOPT: 798 memset(&fpu_save->sv_xmm, 0, x86_fpu_save_size); 799 fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__; 800 fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask; 801 fpu_save->sv_xmm.fx_cw = x87_cw; 802 if (__predict_false(x87_cw != __INITIAL_NPXCW__)) { 803 fpu_xstate_reload(fpu_save, XCR0_X87); 804 } 805 break; 806 } 807 808 pcb = lwp_getpcb(l); 809 pcb->pcb_fpu_dflt_cw = x87_cw; 810 } 811 812 void 813 fpu_sigreset(struct lwp *l) 814 { 815 union savefpu *fpu_save = fpu_lwp_area(l); 816 struct pcb *pcb = lwp_getpcb(l); 817 818 /* 819 * For signal handlers the register values don't matter. Just reset 820 * a few fields. 821 */ 822 if (i386_use_fxsave) { 823 fpu_save->sv_xmm.fx_mxcsr = __INITIAL_MXCSR__; 824 fpu_save->sv_xmm.fx_mxcsr_mask = x86_fpu_mxcsr_mask; 825 fpu_save->sv_xmm.fx_tw = 0; 826 fpu_save->sv_xmm.fx_cw = pcb->pcb_fpu_dflt_cw; 827 } else { 828 fpu_save->sv_87.s87_tw = 0xffff; 829 fpu_save->sv_87.s87_cw = pcb->pcb_fpu_dflt_cw; 830 } 831 } 832 833 void 834 process_write_fpregs_xmm(struct lwp *l, const struct fxsave *fpregs) 835 { 836 union savefpu *fpu_save = fpu_lwp_area(l); 837 838 if (i386_use_fxsave) { 839 memcpy(&fpu_save->sv_xmm, fpregs, sizeof(fpu_save->sv_xmm)); 840 841 /* 842 * Invalid bits in mxcsr or mxcsr_mask will cause faults. 843 */ 844 fpu_save->sv_xmm.fx_mxcsr_mask &= x86_fpu_mxcsr_mask; 845 fpu_save->sv_xmm.fx_mxcsr &= fpu_save->sv_xmm.fx_mxcsr_mask; 846 847 fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE); 848 } else { 849 process_xmm_to_s87(fpregs, &fpu_save->sv_87); 850 } 851 } 852 853 void 854 process_write_fpregs_s87(struct lwp *l, const struct save87 *fpregs) 855 { 856 union savefpu *fpu_save = fpu_lwp_area(l); 857 858 if (i386_use_fxsave) { 859 process_s87_to_xmm(fpregs, &fpu_save->sv_xmm); 860 fpu_xstate_reload(fpu_save, XCR0_X87 | XCR0_SSE); 861 } else { 862 memcpy(&fpu_save->sv_87, fpregs, sizeof(fpu_save->sv_87)); 863 } 864 } 865 866 void 867 process_read_fpregs_xmm(struct lwp *l, struct fxsave *fpregs) 868 { 869 union savefpu *fpu_save = fpu_lwp_area(l); 870 871 if (i386_use_fxsave) { 872 memcpy(fpregs, &fpu_save->sv_xmm, sizeof(fpu_save->sv_xmm)); 873 } else { 874 memset(fpregs, 0, sizeof(*fpregs)); 875 process_s87_to_xmm(&fpu_save->sv_87, fpregs); 876 } 877 } 878 879 void 880 process_read_fpregs_s87(struct lwp *l, struct save87 *fpregs) 881 { 882 union savefpu *fpu_save = fpu_lwp_area(l); 883 884 if (i386_use_fxsave) { 885 memset(fpregs, 0, sizeof(*fpregs)); 886 process_xmm_to_s87(&fpu_save->sv_xmm, fpregs); 887 } else { 888 memcpy(fpregs, &fpu_save->sv_87, sizeof(fpu_save->sv_87)); 889 } 890 } 891 892 int 893 process_read_xstate(struct lwp *l, struct xstate *xstate) 894 { 895 union savefpu *fpu_save = fpu_lwp_area(l); 896 897 if (x86_fpu_save == FPU_SAVE_FSAVE) { 898 /* Convert from legacy FSAVE format. */ 899 memset(&xstate->xs_fxsave, 0, sizeof(xstate->xs_fxsave)); 900 process_s87_to_xmm(&fpu_save->sv_87, &xstate->xs_fxsave); 901 902 /* We only got x87 data. */ 903 xstate->xs_rfbm = XCR0_X87; 904 xstate->xs_xstate_bv = XCR0_X87; 905 return 0; 906 } 907 908 /* Copy the legacy area. */ 909 memcpy(&xstate->xs_fxsave, fpu_save->sv_xsave_hdr.xsh_fxsave, 910 sizeof(xstate->xs_fxsave)); 911 912 if (x86_fpu_save == FPU_SAVE_FXSAVE) { 913 /* FXSAVE means we've got x87 + SSE data. */ 914 xstate->xs_rfbm = XCR0_X87 | XCR0_SSE; 915 xstate->xs_xstate_bv = XCR0_X87 | XCR0_SSE; 916 return 0; 917 } 918 919 /* Copy the bitmap indicating which states are available. */ 920 xstate->xs_rfbm = x86_xsave_features & XCR0_FPU; 921 xstate->xs_xstate_bv = fpu_save->sv_xsave_hdr.xsh_xstate_bv; 922 KASSERT(!(xstate->xs_xstate_bv & ~xstate->xs_rfbm)); 923 924 #define COPY_COMPONENT(xcr0_val, xsave_val, field) \ 925 if (xstate->xs_xstate_bv & xcr0_val) { \ 926 KASSERT(x86_xsave_offsets[xsave_val] \ 927 >= sizeof(struct xsave_header)); \ 928 KASSERT(x86_xsave_sizes[xsave_val] \ 929 >= sizeof(xstate->field)); \ 930 memcpy(&xstate->field, \ 931 (char*)fpu_save + x86_xsave_offsets[xsave_val], \ 932 sizeof(xstate->field)); \ 933 } 934 935 COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128); 936 COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask); 937 COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256); 938 COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm); 939 940 #undef COPY_COMPONENT 941 942 return 0; 943 } 944 945 int 946 process_verify_xstate(const struct xstate *xstate) 947 { 948 /* xstate_bv must be a subset of RFBM */ 949 if (xstate->xs_xstate_bv & ~xstate->xs_rfbm) 950 return EINVAL; 951 952 switch (x86_fpu_save) { 953 case FPU_SAVE_FSAVE: 954 if ((xstate->xs_rfbm & ~XCR0_X87)) 955 return EINVAL; 956 break; 957 case FPU_SAVE_FXSAVE: 958 if ((xstate->xs_rfbm & ~(XCR0_X87 | XCR0_SSE))) 959 return EINVAL; 960 break; 961 default: 962 /* Verify whether no unsupported features are enabled */ 963 if ((xstate->xs_rfbm & ~(x86_xsave_features & XCR0_FPU)) != 0) 964 return EINVAL; 965 } 966 967 return 0; 968 } 969 970 int 971 process_write_xstate(struct lwp *l, const struct xstate *xstate) 972 { 973 union savefpu *fpu_save = fpu_lwp_area(l); 974 975 /* Convert data into legacy FSAVE format. */ 976 if (x86_fpu_save == FPU_SAVE_FSAVE) { 977 if (xstate->xs_xstate_bv & XCR0_X87) 978 process_xmm_to_s87(&xstate->xs_fxsave, &fpu_save->sv_87); 979 return 0; 980 } 981 982 /* If XSAVE is supported, make sure that xstate_bv is set correctly. */ 983 if (x86_fpu_save >= FPU_SAVE_XSAVE) { 984 /* 985 * Bit-wise "xstate->xs_rfbm ? xstate->xs_xstate_bv : 986 * fpu_save->sv_xsave_hdr.xsh_xstate_bv" 987 */ 988 fpu_save->sv_xsave_hdr.xsh_xstate_bv = 989 (fpu_save->sv_xsave_hdr.xsh_xstate_bv & ~xstate->xs_rfbm) | 990 xstate->xs_xstate_bv; 991 } 992 993 if (xstate->xs_xstate_bv & XCR0_X87) { 994 /* 995 * X87 state is split into two areas, interspersed with SSE 996 * data. 997 */ 998 memcpy(&fpu_save->sv_xmm, &xstate->xs_fxsave, 24); 999 memcpy(fpu_save->sv_xmm.fx_87_ac, xstate->xs_fxsave.fx_87_ac, 1000 sizeof(xstate->xs_fxsave.fx_87_ac)); 1001 } 1002 1003 /* 1004 * Copy MXCSR if either SSE or AVX state is requested, to match the 1005 * XSAVE behavior for those flags. 1006 */ 1007 if (xstate->xs_xstate_bv & (XCR0_SSE|XCR0_YMM_Hi128)) { 1008 /* 1009 * Invalid bits in mxcsr or mxcsr_mask will cause faults. 1010 */ 1011 fpu_save->sv_xmm.fx_mxcsr_mask = xstate->xs_fxsave.fx_mxcsr_mask 1012 & x86_fpu_mxcsr_mask; 1013 fpu_save->sv_xmm.fx_mxcsr = xstate->xs_fxsave.fx_mxcsr & 1014 fpu_save->sv_xmm.fx_mxcsr_mask; 1015 } 1016 1017 if (xstate->xs_xstate_bv & XCR0_SSE) { 1018 memcpy(&fpu_save->sv_xsave_hdr.xsh_fxsave[160], 1019 xstate->xs_fxsave.fx_xmm, sizeof(xstate->xs_fxsave.fx_xmm)); 1020 } 1021 1022 #define COPY_COMPONENT(xcr0_val, xsave_val, field) \ 1023 if (xstate->xs_xstate_bv & xcr0_val) { \ 1024 KASSERT(x86_xsave_offsets[xsave_val] \ 1025 >= sizeof(struct xsave_header)); \ 1026 KASSERT(x86_xsave_sizes[xsave_val] \ 1027 >= sizeof(xstate->field)); \ 1028 memcpy((char *)fpu_save + x86_xsave_offsets[xsave_val], \ 1029 &xstate->field, sizeof(xstate->field)); \ 1030 } 1031 1032 COPY_COMPONENT(XCR0_YMM_Hi128, XSAVE_YMM_Hi128, xs_ymm_hi128); 1033 COPY_COMPONENT(XCR0_Opmask, XSAVE_Opmask, xs_opmask); 1034 COPY_COMPONENT(XCR0_ZMM_Hi256, XSAVE_ZMM_Hi256, xs_zmm_hi256); 1035 COPY_COMPONENT(XCR0_Hi16_ZMM, XSAVE_Hi16_ZMM, xs_hi16_zmm); 1036 1037 #undef COPY_COMPONENT 1038 1039 return 0; 1040 } 1041